Compare commits

...

7393 Commits

Author SHA1 Message Date
f244439b4f Revert "Format"
This reverts commit 30fd546c12.
2025-07-10 16:43:45 +02:00
30fd546c12 Format 2025-07-10 16:43:10 +02:00
a930977460 Fix test 2025-07-10 09:37:58 +02:00
a3b8c2b71f Gate behind multimodal experimental feature 2025-07-09 18:21:52 +02:00
39f808714d Implement a documentTemplate filter 2025-07-09 18:03:32 +02:00
8adf6141e0 Fix old test 2025-07-08 16:55:43 +02:00
df3f282e4d Merge branch 'request-fragments-test' into fragment-filters 2025-07-08 16:35:14 +02:00
d81855015b Add test 2025-07-08 16:23:45 +02:00
feb53104e5 Grammar 2025-07-08 16:19:55 +02:00
881c37393f Add telemetry 2025-07-08 16:06:27 +02:00
9e98a25e45 Fix clippy 2025-07-08 15:56:09 +02:00
0a4f2ef891 Leak mock servers 2025-07-08 15:27:35 +02:00
3cc5d86598 Format 2025-07-08 13:57:17 +02:00
1ae47bec77 Improve composite test 2025-07-08 13:57:07 +02:00
2f1be0ff86 Ignore faulty test (see #5746) 2025-07-08 13:55:07 +02:00
fb73b83abe Fix performance 2025-07-08 12:14:34 +02:00
29b74424ad Clean code 2025-07-08 12:03:32 +02:00
b4cafec8b3 Add tests for operators along vector filter 2025-07-08 11:56:19 +02:00
d43cd40807 Split tests 2025-07-08 11:48:23 +02:00
0301d8f239 Improve error handling 2025-07-08 11:39:10 +02:00
2d45124d9b Fix parsing 2025-07-08 10:01:50 +02:00
40e7284d70 Add tests 2025-07-08 10:01:35 +02:00
4d8d34cc93 Merge branch 'request-fragments-test' into fragment-filters 2025-07-07 18:45:34 +02:00
5cced0af02 Prevent having both a fragment name and userProvided 2025-07-07 18:41:03 +02:00
9c60e9689f Support not specifying an embedder in the vector filter 2025-07-07 18:34:24 +02:00
3261aadcf2 Add composite test 2025-07-07 16:50:39 +02:00
073e9f2967 Disable similarity check on composite embedders using fragments 2025-07-07 16:46:16 +02:00
2052537681 Implement core filter logic 2025-07-07 15:28:35 +02:00
a9bb64c55a Unrelated minor fixes 2025-07-07 15:28:10 +02:00
132065afda Minor improvements 2025-07-07 13:10:16 +02:00
51c298662b Merge branch 'main' into request-fragments-test 2025-07-07 13:00:21 +02:00
ef4c87accf Merge pull request #5732 from meilisearch/chat-route-support-metrics
Add chat-related metrics on the prometheus route
2025-07-07 08:33:31 +00:00
ced7ea4a5c Merge pull request #5731 from meilisearch/chat-route-support-dumps
Export and import chat completions workspace settings in dumps
2025-07-07 08:31:41 +00:00
fa3990daf9 Format 2025-07-04 13:33:49 +02:00
c5993196b3 Add test 2025-07-04 13:32:55 +02:00
16234e1313 Add fragment swapping test 2025-07-04 13:25:42 +02:00
be9f4f96df Add experimental feature test 2025-07-04 13:15:15 +02:00
b274106ad3 Add test 2025-07-04 13:05:52 +02:00
48527761e7 Add test 2025-07-04 12:01:15 +02:00
6792d048b8 Test both fragments and document template 2025-07-04 11:47:38 +02:00
8dfded2993 Update tests 2025-07-04 10:49:03 +02:00
3714f16696 Fix bug 2025-07-04 10:40:50 +02:00
d0cd3cacec Add a way to reproduce the bug 2025-07-03 18:18:04 +02:00
fef089c7b6 Merge pull request #5596 from meilisearch/request-fragments
Request fragments
2025-07-03 15:01:44 +00:00
d47e1e15de Merge pull request #5730 from meilisearch/update-version-v1.16.0
Update version for the next release (v1.16.0) in Cargo.toml
2025-07-03 14:45:43 +00:00
caccb51814 Add a complex value test 2025-07-03 16:10:23 +02:00
a76a3e8f11 Change the metric name for the search to use a label 2025-07-03 16:01:31 +02:00
32dede35c7 Update snapshots 2025-07-03 15:59:14 +02:00
6397ef12a0 Use three metrics for the three different tokens 2025-07-03 15:56:56 +02:00
cf9b311f71 Format 2025-07-03 15:53:09 +02:00
7423243be0 Add test with multiple embedders 2025-07-03 15:52:18 +02:00
b5e41f0e46 Fix the Mistral uncompatibility with the usage of OpenAI 2025-07-03 15:21:40 +02:00
5690700601 Add fragment addition test 2025-07-03 15:19:31 +02:00
2faad504c6 Add test 2025-07-03 15:12:47 +02:00
2bcd69750f Add fragment modification test 2025-07-03 15:08:27 +02:00
9f0d33ec99 Expose the number of tokens on the chat completions routes 2025-07-03 15:05:15 +02:00
de24e75be8 Update test 2025-07-03 15:00:11 +02:00
a3af9fe057 new extractor bugfixes:
- fix old_has_fragments
- new_is_user_provided is always false when generating fragments,
  even if no fragment ever matches
2025-07-03 14:44:34 +02:00
90683d0e4e add snapshot of get settings 2025-07-03 14:43:06 +02:00
5c79273748 Add TODOs 2025-07-03 14:42:49 +02:00
90e6b6416f new extractor bugfixes:
- fix old_has_fragments
- new_is_user_provided is always false when generating fragments,
  even if no fragment ever matches
2025-07-03 14:35:02 +02:00
2b75072b09 Expose the number of internal chat searches on the /metrics route 2025-07-03 14:04:27 +02:00
6e6fd077d4 Ignore unexisting chat completions settings folder 2025-07-03 13:37:38 +02:00
b45eea0d3e Add test for fragment deletion 2025-07-03 13:26:44 +02:00
a051ab3d9a Support importing chat completions settings 2025-07-03 12:04:40 +02:00
0b89ef1fd7 Make tests use a shared index 2025-07-03 11:32:49 +02:00
65ba7b47af Test search fragments 2025-07-03 11:32:49 +02:00
8af76a65bf Add test_fragment_indexing 2025-07-03 11:32:49 +02:00
6b94033c97 Correctly export the chat completions settings in dumps 2025-07-03 11:30:24 +02:00
dfe0c8664e Add a version of prompt::Context that has no fields 2025-07-03 11:08:31 +02:00
0ca652de28 Extract vector points: remove the { 2025-07-03 10:52:30 +02:00
87f105747f Add documentation to Extractor trait 2025-07-03 10:41:20 +02:00
735634e998 Send owned metadata and clear inputs in case of error 2025-07-03 10:32:57 +02:00
3740755d9c Compare to RawValue::NULL constant rather than explicit "null" 2025-07-03 10:11:07 +02:00
bbcabc47bd Update version for the next release (v1.16.0) in Cargo.toml 2025-07-03 08:06:38 +00:00
a06cb1bfd6 Remove Embed::process_embeddings and have it be an inherent function of the type that uses it 2025-07-03 10:02:16 +02:00
549dc985b8 Old dump import indexer: fix the case where going from Generated to Generated 2025-07-03 09:58:41 +02:00
428463e45c Check indexing fragments as well as search fragments 2025-07-02 16:17:22 +02:00
7113fcf63a New error 2025-07-02 16:17:12 +02:00
aa6855cd4f Vector settings: don't assume which kind of request is asked when looking at a settings update without fragments 2025-07-02 16:12:23 +02:00
895db76a51 Fix snaps 2025-07-02 16:10:05 +02:00
a88146d59e Merge pull request #5728 from meilisearch/bump-minidashboard-v0.2.20
Bump the mini-dashboard to v0.2.20
2025-07-02 11:03:00 +00:00
91e77abf4f Bump the mini-dashboard to v0.2.20 2025-07-02 12:15:11 +02:00
82a796aea7 vector settings: fix bug where removed fragments were returned as new 2025-07-02 11:36:50 +02:00
f6287602e9 Improve error message when request contains the wrong type of placeholder 2025-07-02 11:36:50 +02:00
ede456c5b0 New error: rest inconsistent fragments 2025-07-02 11:36:50 +02:00
3f5b5df139 Check consistency of fragments 2025-07-02 11:36:50 +02:00
d72e5f5f69 Hide documentTemplate and documentTemplateMaxBytes when indexing_fragment is defined 2025-07-02 11:29:50 +02:00
aa366d593d Merge pull request #5726 from meilisearch/dependabot/github_actions/Swatinem/rust-cache-2.8.0
Bump Swatinem/rust-cache from 2.7.8 to 2.8.0
2025-07-02 08:09:11 +00:00
205430854d Merge pull request #5727 from meilisearch/dependabot/github_actions/svenstaro/upload-release-action-2.11.1
Bump svenstaro/upload-release-action from 2.7.0 to 2.11.1
2025-07-02 08:05:07 +00:00
be64006211 Fix process export 2025-07-02 09:12:18 +02:00
eda309d562 make sure fragments are ordered 2025-07-02 00:05:13 +02:00
119d618a76 Do not "upgrade" regnerate fragments to regenerate prompt 2025-07-02 00:05:13 +02:00
2b2e6c0b3a Settings changes 2025-07-02 00:05:13 +02:00
e6329e77e1 settings fragment_diffs 2025-07-02 00:05:13 +02:00
b086c51a23 new settings indexer 2025-07-02 00:05:13 +02:00
9ce5598fef parsed vectors: embeddings is None when it is null when read from DB 2025-07-02 00:05:13 +02:00
e30c24b5bf Prompt: relax lifetime constraints 2025-07-02 00:05:13 +02:00
c1a132fa06 multimodal experimental feature 2025-07-02 00:05:13 +02:00
e54fc59248 Fix snaps 2025-07-02 00:05:13 +02:00
11e7c0d75f Fix tests 2025-07-02 00:05:13 +02:00
c593fbe648 Analytics 2025-07-02 00:05:12 +02:00
2b3327ea74 Use media to determine search kind 2025-07-02 00:05:12 +02:00
d14184f4da Add media to search 2025-07-02 00:05:12 +02:00
46bceb91f1 New search errors 2025-07-02 00:05:12 +02:00
cab5e35ff7 Implement in old settings indexer and old dump import indexer 2025-07-02 00:05:12 +02:00
f8232976ed Implement in new document indexer 2025-07-02 00:05:12 +02:00
22d363c05a Clear DB on clear documents 2025-07-02 00:05:12 +02:00
41620d5325 Support indexingFragments and searchFragments in settings 2025-07-02 00:05:12 +02:00
f3d5c74c02 Vector settings to add indexingFragments and searchFragments 2025-07-02 00:05:12 +02:00
d48baece51 New error when too many fragments in settings 2025-07-02 00:05:12 +02:00
c45ede44a8 Add new parameters to openai and rest embedders 2025-07-02 00:05:11 +02:00
4235a82dcf REST embedder supports fragments 2025-07-02 00:05:11 +02:00
e7b9b8f002 Change embedder API 2025-07-02 00:05:11 +02:00
5716ab70f3 EmbeddingConfigs -> RuntimeEmbedders 2025-07-02 00:05:11 +02:00
422a786ffd RuntimeEmbedder and RuntimeFragments 2025-07-02 00:05:11 +02:00
836ae19bec ArroyWrapper changes 2025-07-02 00:05:11 +02:00
0b5bc41b79 Add new vector errors 2025-07-02 00:05:11 +02:00
b45059e8f2 Add vector::session module 2025-07-02 00:05:11 +02:00
c16c60b599 Add vector::extractor module 2025-07-02 00:05:11 +02:00
0114796d2a Index uses the vector::db stuff 2025-07-02 00:05:10 +02:00
17a94c40dc Add vector::db module 2025-07-02 00:05:10 +02:00
76ca44b214 Expand json_template module 2025-07-02 00:05:10 +02:00
d2e4d6dd8a prompt: Publishes some types 2025-07-02 00:04:04 +02:00
879cf85037 Bump svenstaro/upload-release-action from 2.7.0 to 2.11.1
Bumps [svenstaro/upload-release-action](https://github.com/svenstaro/upload-release-action) from 2.7.0 to 2.11.1.
- [Release notes](https://github.com/svenstaro/upload-release-action/releases)
- [Changelog](https://github.com/svenstaro/upload-release-action/blob/master/CHANGELOG.md)
- [Commits](https://github.com/svenstaro/upload-release-action/compare/2.7.0...2.11.1)

---
updated-dependencies:
- dependency-name: svenstaro/upload-release-action
  dependency-version: 2.11.1
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>
2025-07-01 17:23:13 +00:00
c2d5b20a42 Bump Swatinem/rust-cache from 2.7.8 to 2.8.0
Bumps [Swatinem/rust-cache](https://github.com/swatinem/rust-cache) from 2.7.8 to 2.8.0.
- [Release notes](https://github.com/swatinem/rust-cache/releases)
- [Changelog](https://github.com/Swatinem/rust-cache/blob/master/CHANGELOG.md)
- [Commits](https://github.com/swatinem/rust-cache/compare/v2.7.8...v2.8.0)

---
updated-dependencies:
- dependency-name: Swatinem/rust-cache
  dependency-version: 2.8.0
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>
2025-07-01 17:23:08 +00:00
b93ca3945e Merge pull request #5723 from meilisearch/fix-flaky-embedder-test
Fix flaky last_error test
2025-07-01 15:14:28 +00:00
8fef48f8ca Merge pull request #5670 from meilisearch/export-and-transfer-route
Introduce a new route to export indexes
2025-07-01 14:37:02 +00:00
d2776efb11 Fix flaky last_error test 2025-07-01 15:14:56 +02:00
9211e94c4f Format 2025-07-01 15:03:20 +02:00
b7bebe9bbb Fix export when index already exists 2025-07-01 15:03:04 +02:00
37a692f942 Keep IndexUidPattern 2025-07-01 14:47:43 +02:00
25c19a306b Rename variable
Co-authored-by: Kero <clement@meilisearch.com>
2025-07-01 14:42:44 +02:00
c078efd730 Remove experimental todo 2025-07-01 14:40:59 +02:00
9dac91efe0 Fix utoipa response 2025-07-01 14:40:39 +02:00
074d509d92 Fix expect message 2025-07-01 14:39:52 +02:00
d439a3cb9d Fix progress names 2025-07-01 14:39:24 +02:00
259fc067d3 Count exported documents by index name, not pattern 2025-07-01 11:14:59 +02:00
e8b2bb3ea6 Merge pull request #5709 from meilisearch/analytics-chat-completions
Add analytics to the chat completions
2025-07-01 09:14:47 +00:00
7dfb2071b5 Merge pull request #5683 from meilisearch/fix-recoverable-file-store-error
Make sure to recover from missing update file
2025-07-01 09:08:55 +00:00
9cfbef478e Add override setttings to analytics 2025-07-01 11:04:59 +02:00
efd5fd96cc Add the overrideSettings parameter 2025-07-01 11:02:42 +02:00
0ef52941c7 Merge pull request #5687 from meilisearch/settings-indexer-edition-2024
Settings indexer edition 2024
2025-07-01 07:35:21 +00:00
0d85f8fcee Make sure to recover from missing update file 2025-06-30 19:09:30 +02:00
f4bb6cbca8 Better behavior when null indexes 2025-06-30 18:59:16 +02:00
ad03c86c44 Display an accurate number of uploaded documents 2025-06-30 18:46:47 +02:00
85037352b9 Fix most of the easy issues 2025-06-30 18:31:32 +02:00
1b54c866e1 Link experimental feature discussion 2025-06-30 14:47:39 +02:00
e414284335 Clippy too many arguments 2025-06-30 14:25:28 +02:00
7a204609fe Move document context and identifiers in document.rs 2025-06-30 14:21:46 +02:00
6b2b8ed676 Transform experimental_no_edition_2024_for_settings into a config 2025-06-30 11:49:03 +02:00
6db5939f84 Re-integrate embedder stats 2025-06-30 09:52:06 +02:00
d35b2d8d33 minor fixes 2025-06-30 09:52:06 +02:00
0687cf058a Avoid rewritting documents that don't change
Ensure being on a reindex action before getting embedder_category_id

Fix document skip function
2025-06-30 09:52:06 +02:00
7219299436 Better handle task abortion 2025-06-27 12:33:32 +02:00
657bbf5d1e Fix more tests 2025-06-27 10:14:26 +02:00
7fa1c41190 Fix some api key errors 2025-06-26 18:25:49 +02:00
77802dabf6 rename DocumentChangeContext into DocumentContext 2025-06-26 18:14:48 +02:00
a685eeafeb wierd snapshot update 2025-06-26 18:14:48 +02:00
f16e6f7c37 Update snapshots 2025-06-26 18:14:48 +02:00
900be0ccad Extract or regenerate vectors related to settings changes 2025-06-26 18:14:48 +02:00
51a087b764 Write back user provided vectors from deleted embedders 2025-06-26 18:14:48 +02:00
31142b3663 Introduce extractor for setting changes 2025-06-26 18:14:48 +02:00
e60b855a54 Delete embedders from arroy 2025-06-26 18:14:48 +02:00
510a4b91be Introduce DatabaseDocument type 2025-06-26 18:14:48 +02:00
e704f4d1ec Reimplement reindexing shell 2025-06-26 18:14:48 +02:00
82fe80b360 Replace the legacy Settings::execute by the new one 2025-06-26 18:14:14 +02:00
0f1dd3614c Update tasks tests 2025-06-26 18:11:12 +02:00
3aa6c3c750 Merge pull request #5707 from Mubelotix/last_embedder_message
Add last embedder error in batches
2025-06-26 15:21:17 +00:00
b956918c11 Fix clippy and more utoipa issues 2025-06-26 16:31:38 +02:00
e3003c1609 Improve OpenAPI schema 2025-06-26 16:05:12 +02:00
bf13268649 Better compute aggragates 2025-06-26 16:03:13 +02:00
0bb7866f1e Remove the skip embeddings boolean in the settings 2025-06-26 15:48:21 +02:00
e6e9a033aa Introduce new analytics to the export route 2025-06-26 15:45:24 +02:00
63031219c5 Add the payload size to the parameters 2025-06-26 13:57:32 +02:00
44d6430bae Rename fields 2025-06-26 12:30:08 +02:00
4d26e9c6f2 Remove my comments 2025-06-26 12:21:34 +02:00
2ff382c023 Remove useless clone 2025-06-26 12:15:09 +02:00
0f6dd133b2 Turn to references 2025-06-26 12:15:09 +02:00
29f6eeff8f Remove lots of Arcs 2025-06-26 12:15:08 +02:00
ef007d547d Remove panics 2025-06-26 12:15:08 +02:00
3fc16c627d Comment the delay 2025-06-26 12:15:08 +02:00
9422b6d654 Update crates/meilisearch/src/lib.rs
Co-authored-by: Louis Dureuil <louis.dureuil@gmail.com>
2025-06-26 10:58:27 +02:00
ddba52414a Merge pull request #5702 from Nymuxyzo/fix/5688-reset-typo_tolerance-settings
Fix disableOnNumbers reset
2025-06-26 07:58:47 +00:00
a743da3061 Gzip-compress the content 2025-06-25 15:27:10 +02:00
c6216517c7 Parallelize document upload 2025-06-25 15:27:10 +02:00
2d4f7c635e Make tests happy 2025-06-25 15:27:10 +02:00
ee812b31c4 Support JSON value as filters 2025-06-25 15:27:09 +02:00
3329248a84 Support no pattern when exporting 2025-06-25 15:27:09 +02:00
bc08cd0deb Make clippy happy again 2025-06-25 15:27:09 +02:00
3e2f468213 Support task cancelation 2025-06-25 15:27:09 +02:00
7c448bcc00 Make clippy happy 2025-06-25 15:27:09 +02:00
acb7c0a449 Implement a retry strategy 2025-06-25 15:27:08 +02:00
e8795d2608 Export embeddings 2025-06-25 15:26:47 +02:00
e023ee4b6b Working first implementation 2025-06-25 15:26:47 +02:00
e74c3b692a Introduce a new route to export documents and enqueue the export task 2025-06-25 15:26:46 +02:00
1d3b18f774 Update test to be more reproducible 2025-06-25 14:58:21 +02:00
00bc86e74b Merge pull request #5705 from meilisearch/fix-max-total-size-limit-env-var
Fix the environment variable name of the experimental limit batched tasks total size feature
2025-06-25 12:49:30 +00:00
adc9976615 Simplify the analytics chat completions aggragetor 2025-06-25 11:50:26 +02:00
ae8c1461e1 Merge pull request #5708 from meilisearch/unsupport-gemini
Remove Gemini from the LLM-providers list
2025-06-25 06:44:37 +00:00
5f62274f21 Add disableOnNumbers to settings reset 2025-06-24 23:32:50 +02:00
5f50fc9464 Add new analytics to the chat completions route 2025-06-24 17:05:49 +02:00
89498a2bea Remove Gemini from the LLM-providers list 2025-06-24 15:58:39 +02:00
211c1b753f Fix the env variable name 2025-06-24 15:27:39 +02:00
d08e89ea3d Remove options 2025-06-24 15:10:15 +02:00
695877043a Fix warnings 2025-06-24 14:53:39 +02:00
bc4d1530ee Fix tests 2025-06-24 14:50:23 +02:00
d7721fe607 Format 2025-06-24 12:20:22 +02:00
4a179fb3c0 Improve code quality 2025-06-24 11:38:11 +02:00
59a1c5d9a7 Make test more reproducible 2025-06-24 11:08:06 +02:00
2f82d94502 Fix the test and simplify types 2025-06-23 18:55:23 +02:00
bd2bd0f33b Merge pull request #5697 from martin-g/documents-use-server-wait-task
tests: Use Server::wait_task() instead of Index::wait_task() in documents::
2025-06-23 16:33:21 +00:00
e02733df4a Merge pull request #5698 from martin-g/index-use-server-wait-task
tests: Use Server::wait_task() instead of Index::wait_task() in index::
2025-06-23 16:31:40 +00:00
f373ecc96a Merge pull request #5699 from martin-g/settings-use-server-wait-task
tests: Use Server::wait_task() instead of Index::wait_task() in settings::
2025-06-23 16:30:49 +00:00
748a327271 Merge pull request #5700 from martin-g/search-use-server-wait-task
tests: Use Server::wait_task() instead of Index::wait_task() in search::
2025-06-23 16:29:53 +00:00
4925b30196 Move embedder stats out of progress 2025-06-23 15:24:14 +02:00
43c4a229b7 Merge pull request #5692 from diksipav/5684-gemini-chat-completions-fix
Fix Gemini base_url when used with OpenAI clients
2025-06-23 09:03:34 +00:00
ca112a8b95 tests: Use Server::wait_task() instead of Index::wait_task() in index::
The code is mostly duplicated. Server::wait_task() has better handling for errors and more retries.

Signed-off-by: Martin Tzvetanov Grigorov <mgrigorov@apache.org>
2025-06-22 14:59:29 +03:00
855fa555a3 tests: Use Server::wait_task() instead of Index::wait_task() in search::
The code is mostly duplicated. Server::wait_task() has better handling for errors and more retries.

Signed-off-by: Martin Tzvetanov Grigorov <mgrigorov@apache.org>
2025-06-22 14:54:49 +03:00
a237c0797a tests: Use Server::wait_task() instead of Index::wait_task() in settings::
The code is mostly duplicated. Server::wait_task() has better handling for errors and more retries.

Signed-off-by: Martin Tzvetanov Grigorov <mgrigorov@apache.org>
2025-06-22 14:32:45 +03:00
5c46dc702a tests: Use Server::wait_task() instead of Index::wait_task()
The code is mostly duplicated.
Server::wait_task() has better handling for errors and more retries.

Signed-off-by: Martin Tzvetanov Grigorov <mgrigorov@apache.org>
2025-06-22 14:22:59 +03:00
4cadc8113b Add embedder stats in batches 2025-06-20 12:42:22 +02:00
c17031d3de Fix Gemini base_url when used with OpenAI clients 2025-06-19 15:11:37 +02:00
fc6cc80705 Merge pull request #5689 from Mubelotix/main
Remove old dependencies
2025-06-19 08:11:55 +00:00
138d20b277 Remove old dependencies 2025-06-18 16:46:20 +02:00
7c1a9113f9 Merge pull request #5686 from meilisearch/upgrade-dependencies-again
Upgrade dependencies
2025-06-18 09:22:18 +00:00
07ae297ffd Merge pull request #5681 from martin-g/faster-settings-prefix_search_settings-it-tests
tests: Faster settings::prefix_search_settings IT tests
2025-06-18 09:20:56 +00:00
4069dbcfca Upgrade incompatible dependencies 2025-06-17 22:23:37 +02:00
03eb50fbac Upgrade dependencies 2025-06-17 22:03:06 +02:00
2616d776f2 Merge pull request #5677 from martin-g/faster-documents-errors-it-tests
tests: Faster document::errors IT tests
2025-06-17 15:53:35 +00:00
3004db95af Merge pull request #5680 from martin-g/faster-similar-mod-it-tests
tests: Faster similar::mod IT tests
2025-06-17 15:51:38 +00:00
9a729bf31d Merge pull request #5682 from martin-g/faster-documents-update_documents-it-tests
tests: Faster documents::update_documents IT tests
2025-06-17 14:36:09 +00:00
8bfa6a7f54 tests: Faster documents::update_documents IT tests
Use a shared server + unique index

Signed-off-by: Martin Tzvetanov Grigorov <mgrigorov@apache.org>
2025-06-16 23:48:59 +03:00
056f18bd02 tests: Faster settings::prefix_search_settings IT tests
Use shared server + unique indices

Signed-off-by: Martin Tzvetanov Grigorov <mgrigorov@apache.org>
2025-06-16 23:20:11 +03:00
fe9866aca8 tests: Faster similar::mod IT tests
Use shared server + unique indexes

Signed-off-by: Martin Tzvetanov Grigorov <mgrigorov@apache.org>
2025-06-16 22:51:07 +03:00
60f105a4a3 tests: Faster document::errors IT tests
* Add a call to .failed() for an awaited task
* Use Server::wait_task() instead of Index::wait_task() - it has better
  error checking

Signed-off-by: Martin Tzvetanov Grigorov <mgrigorov@apache.org>
2025-06-16 16:25:15 +03:00
abb399b802 Merge pull request #5674 from meilisearch/release-v1.15.2
Bring back v1.15.2 to main
2025-06-16 11:36:07 +00:00
aeaac7270e Merge pull request #5603 from martin-g/faster-search-multi-it-tests
tests: Faster search::multi IT tests
2025-06-16 09:43:24 +00:00
f45770a3ce Merge pull request #5672 from martin-g/reuse-bench-data
docs: Recommend using a custom path for the benches' data
2025-06-16 09:35:57 +00:00
0e10ff1aa3 docs: Recommend using a custom path for the benches' data
This reduces the build time of the `benchmarks` crate from ~220secs to
45secs (according to `cargo build --timings`) on my dev machine

Additionally I've introduced a parent folder for the Meili related cache
paths - ~/.cache/meili

Signed-off-by: Martin Tzvetanov Grigorov <mgrigorov@apache.org>
2025-06-16 09:21:47 +03:00
6ee608c2d1 Remove debug leftovers
Signed-off-by: Martin Tzvetanov Grigorov <mgrigorov@apache.org>
2025-06-14 15:45:04 +03:00
95e8a9bef1 Use a unique name for an index in a shared server
Signed-off-by: Martin Tzvetanov Grigorov <mgrigorov@apache.org>
2025-06-14 15:10:48 +03:00
0598320252 Try to debug the problem with the existing "test" index in a shared server
Signed-off-by: Martin Tzvetanov Grigorov <mgrigorov@apache.org>
2025-06-14 14:07:57 +03:00
2269104337 Use unique_index_with_prefix() instead of composing the index names manually with Uuid
Signed-off-by: Martin Tzvetanov Grigorov <mgrigorov@apache.org>
2025-06-14 13:35:03 +03:00
6b4d69996c Merge pull request #5663 from meilisearch/update-version-v1.15.2
Update version for the next release (v1.15.2) in Cargo.toml
2025-06-12 16:41:47 +00:00
df4e3c2e43 Fix the version everywhere 2025-06-12 16:57:59 +02:00
e2b549c5ee Merge pull request #5668 from meilisearch/fix-must-regenerate
Various fixes to embedding regeneration
2025-06-12 14:48:38 +00:00
8390006ebf Merge pull request #5665 from meilisearch/fix-chat-route
Fix chat route missing base URL and Mistral error handling
2025-06-12 14:11:39 +00:00
7200437246 Comment the cases 2025-06-12 15:55:52 +02:00
68e7bfb37f Don't fail if you cannot render previous version 2025-06-12 15:55:33 +02:00
209c4bfc18 Switch the versions of the documents for rendering :/ 2025-06-12 15:47:47 +02:00
396d76046d Regenerate embeddings more often:
- When `regenerate` was previously `false` and became `true`
- When rendering the old version of the docs failed
2025-06-12 15:41:53 +02:00
9ae73e3c05 Better support for Mistral errors 2025-06-12 15:18:37 +02:00
933e319364 Merge pull request #5660 from meilisearch/reproduce-5650
Searchable fields aren't indexed when I add and remove them out of filterableAttributes
2025-06-12 14:46:21 +02:00
596617dd31 Make sure Mistral base url is well defined 2025-06-12 13:45:05 +02:00
f3dd6834c6 Update version for the next release (v1.15.2) in Cargo.toml 2025-06-12 10:51:09 +00:00
e8774ad079 Extract shared indices for movies and batman documents
Signed-off-by: Martin Tzvetanov Grigorov <mgrigorov@apache.org>
2025-06-12 13:46:17 +03:00
5d191c479e Skip indexing on settings update when possible,
when removing a field from the filterable settings,
this will trigger a reindexing of the negative version of the document,
which removes the document from the searchable as well because the field was considered removed.
2025-06-12 12:37:27 +02:00
c3368e6859 Merge pull request #5659 from meilisearch/tmp-release-v1.15.1
Bring back v1.15.0 and v1.15.1 changes
2025-06-12 09:16:56 +00:00
40776ed4cd add test reproducing #5650 2025-06-12 11:09:31 +02:00
9bda9a9a64 Merge remote-tracking branch 'origin/main' into tmp-release-v1.15.1 2025-06-12 10:21:07 +02:00
aefebdeb8b Merge pull request #5617 from workbackai/workback/patch/5594/FB6ED899-E821-4C88-AA79-8BB975E1937A
fix(milli/search): Cyrillic has different typo tolerance due to byte counting bug
2025-06-12 07:39:19 +00:00
646e44ddf9 Re-use the shared_index_with_score_documents since the settings are as the default
Signed-off-by: Martin Tzvetanov Grigorov <mgrigorov@apache.org>
2025-06-12 08:59:19 +03:00
9275ce1503 Merge pull request #5655 from meilisearch/update-version-v1.15.1
Update version for the next release (v1.15.1) in Cargo.toml
2025-06-11 14:54:01 +00:00
48d2d3a5cd Fix more tests 2025-06-11 14:53:34 +02:00
7ec0c9aa83 Merge pull request #5556 from meilisearch/chat-route
Chat route
2025-06-11 12:09:30 +00:00
484fdd9ce2 Fix the insta snapshots 2025-06-11 10:59:14 +02:00
7533a11143 Make sure to send the tool response before the error message 2025-06-11 10:49:21 +02:00
19d077a4b1 Update version for the next release (v1.15.1) in Cargo.toml 2025-06-11 08:35:24 +00:00
b8845d1015 Sort the imports
Signed-off-by: Martin Tzvetanov Grigorov <mgrigorov@apache.org>
2025-06-11 11:29:33 +03:00
620867d611 Use unique indices for the searches in non-existing indices
By using hardcoded there is a chance that the index could exist

Signed-off-by: Martin Tzvetanov Grigorov <mgrigorov@apache.org>
2025-06-11 11:01:05 +03:00
77cc3678b5 Make sure template errors are reported to the LLM and front-end without panicking 2025-06-11 09:27:14 +02:00
a73d3c03e9 Make the dynamic assertion for facetsByIndex JSON key more broader
Signed-off-by: Martin Tzvetanov Grigorov <mgrigorov@apache.org>
2025-06-11 09:10:10 +03:00
824f5b12ce Formatting
Signed-off-by: Martin Tzvetanov Grigorov <mgrigorov@apache.org>
2025-06-11 08:54:58 +03:00
bb4baf7fae Remove useless dynamic redactions. They are covered by their .**.xyz counterparts
Signed-off-by: Martin Tzvetanov Grigorov <mgrigorov@apache.org>
2025-06-11 08:52:28 +03:00
0263eb0aec More assertion fixes
Signed-off-by: Martin Tzvetanov Grigorov <mgrigorov@apache.org>
2025-06-11 08:42:35 +03:00
8a916a4e42 More assertion fixes
Signed-off-by: Martin Tzvetanov Grigorov <mgrigorov@apache.org>
2025-06-11 07:54:04 +03:00
506ee40dc5 Improve errors and other stuff 2025-06-10 17:52:35 +02:00
952fabf8a0 Better document function names 2025-06-10 17:01:00 +02:00
7ea2e4ec7b Better document why we duplicate structs 2025-06-10 16:51:39 +02:00
a0a4ac66ec Better document the done streamed event 2025-06-10 16:48:28 +02:00
b037e416d3 Make an unreachable case, unreachable 2025-06-10 16:43:20 +02:00
e9d547556d Better error reporting when multi choices is used 2025-06-10 16:41:02 +02:00
ab0eba2f72 Remove useless double check 2025-06-10 16:31:58 +02:00
5ceb3c6a10 Report an error when the document template max bytes is zero 2025-06-10 16:27:18 +02:00
34d572e3e5 Reove useless commented code 2025-06-10 16:17:41 +02:00
28e6adc435 Remove the SearchQuery Default impl and change the From impl 2025-06-10 16:16:11 +02:00
6a683975bf More fixes of the tests
Signed-off-by: Martin Tzvetanov Grigorov <mgrigorov@apache.org>
2025-06-10 16:58:48 +03:00
c60d11fb42 Clean up the prompts 2025-06-10 14:56:13 +02:00
32207f9f19 Rename the error code about ranking score threshold 2025-06-10 14:07:53 +02:00
7c1b15fd06 Remove useless liquid dependency for Meilisearch 2025-06-10 14:05:35 +02:00
4352a924d7 Remove useless filters parameter 2025-06-10 14:05:02 +02:00
bbe802c656 Remove the write txn method from the index scheduler 2025-06-10 14:03:05 +02:00
b32e30ad27 Make the chat setting db name a const 2025-06-10 14:02:43 +02:00
ae115cee78 Make clippy happy 2025-06-10 13:51:04 +02:00
1824fbd1b5 Introduce Index::unique_index_with_prefix(&str)
It could be used when we want to see the index name in the assertions,
e.g. `movies-[uuid]`

Signed-off-by: Martin Tzvetanov Grigorov <mgrigorov@apache.org>
2025-06-10 14:49:18 +03:00
34d8a54c4b Fix typos in comments and update assertions
Signed-off-by: Martin Tzvetanov Grigorov <mgrigorov@apache.org>
2025-06-10 14:48:59 +03:00
8fa6e8670a tests: Faster search::multi IT tests
Use shared server + unique indices where possible

Signed-off-by: Martin Tzvetanov Grigorov <mgrigorov@apache.org>
2025-06-10 14:10:43 +03:00
c640856cc1 Improve code comments 2025-06-10 11:13:32 +02:00
1a1317ab0f Make clippy happy 2025-06-10 11:12:27 +02:00
9cab754942 Update insta snapshots 2025-06-10 11:11:34 +02:00
4a0ec15ad2 Make cargo fmt happy 2025-06-10 11:00:14 +02:00
985b892b7a Add a basic chat setting validation 2025-06-10 10:57:43 +02:00
605dea4f85 Do not leak the chat "workspace" term 2025-06-10 10:34:30 +02:00
95d4775d4a Remove the preQuery chat setting 2025-06-10 10:32:58 +02:00
416fcf47f1 Use the same units 2025-06-10 10:28:06 +02:00
6433e49882 Remove useless code 2025-06-10 10:27:22 +02:00
85939ae8ad Add support for missing sources 2025-06-10 10:25:22 +02:00
e654eddf56 Improve the chat workspace REST endpoints 2025-06-10 10:21:34 +02:00
170ad87e44 Merge pull request #5622 from martin-g/faster-search-filters-it-tests
tests: Faster search::filters IT tests
2025-06-10 08:17:52 +00:00
bc56087a17 Fix the chatCompletions key 2025-06-10 10:08:01 +02:00
29d82ade56 Rename base_api into base_rul 2025-06-10 09:24:07 +02:00
a7f5d3bb7a Redact the API Key when patching chat workspace settings 2025-06-10 09:21:45 +02:00
48e8356a16 Mark the non-streaming chat completions route unimplemented 2025-06-10 09:18:36 +02:00
1fda05c2fd Delete chat.rs 2025-06-09 15:26:13 +02:00
8f96724adf Set max_attempts to 400 for Server::wait_task()
Co-authored-by: Tamo <irevoire@protonmail.ch>
2025-06-09 14:03:49 +03:00
01e5b0effa Merge pull request #5611 from martin-g/faster-stats-mod-it-tests
tests: Faster stats::mod IT tests
2025-06-09 11:02:12 +00:00
2ec9664878 chore: Fix English grammar in SearchQueue's comments
No functional changes!

Signed-off-by: Martin Tzvetanov Grigorov <mgrigorov@apache.org>
2025-06-09 12:05:36 +02:00
7f5a0c0013 Merge pull request #5646 from meilisearch/revert-5635-prompt-for-email 2025-06-09 12:03:11 +02:00
f5c3dad3ed Revert "Prompt for Email" 2025-06-09 10:47:21 +02:00
10028515ac Use a unique server for the summarized dump creation test
Signed-off-by: Martin Tzvetanov Grigorov <mgrigorov@apache.org>
2025-06-06 14:52:05 +03:00
63ccd19ab1 Use Server::wait_task() instead of Index::wait_task() for tasks IT tests
Revert the debugging helper that dumped the thread stack traces.
Try with 400 max attempts for the task success/failure (200 secs)

Signed-off-by: Martin Tzvetanov Grigorov <mgrigorov@apache.org>
2025-06-06 14:16:50 +03:00
1b4d344e18 Increase the wait time in the tests
Signed-off-by: Martin Tzvetanov Grigorov <mgrigorov@apache.org>
2025-06-06 14:13:32 +03:00
89c0cf9b12 temporary: Dump the threads stack traces when .wait_task() times out
Signed-off-by: Martin Tzvetanov Grigorov <mgrigorov@apache.org>
2025-06-06 14:13:32 +03:00
3770e70581 Optimize the imports
Signed-off-by: Martin Tzvetanov Grigorov <mgrigorov@apache.org>
2025-06-06 14:13:31 +03:00
e497008161 Add cattos to the shared_index_with_nested_documents() as a filterable attribute
This allows to make some more search::filters IT tests using shared
server + unique/shared indices

Signed-off-by: Martin Tzvetanov Grigorov <mgrigorov@apache.org>
2025-06-06 14:13:31 +03:00
a15ebb283f Remove unused import
Signed-off-by: Martin Tzvetanov Grigorov <mgrigorov@apache.org>
2025-06-06 14:13:30 +03:00
3f256a7959 Use the shared index with DOCUMENTS where possible
Remove useless assertion that is covered by the earlier call of
.succeeded()

Signed-off-by: Martin Tzvetanov Grigorov <mgrigorov@apache.org>
2025-06-06 14:13:30 +03:00
b41af0d0f6 Formatting
Signed-off-by: Martin Tzvetanov Grigorov <mgrigorov@apache.org>
2025-06-06 14:13:30 +03:00
3ebff65ef3 tests: Faster search::filters IT tests
Use shared server + unique indices

Related-to: https://github.com/meilisearch/meilisearch/issues/4840

Signed-off-by: Martin Tzvetanov Grigorov <mgrigorov@apache.org>
2025-06-06 14:13:29 +03:00
717a026fdd Make sure to use the system prompt 2025-06-06 12:32:40 +02:00
70670c3be4 Introduce the support of Azure, Gemini, vLLM 2025-06-06 12:08:37 +02:00
62e2a5a324 Merge pull request #5635 from meilisearch/prompt-for-email
Prompt for Email
2025-06-05 19:18:23 +00:00
90d96ee415 Make clippy happy 2025-06-05 18:21:55 +02:00
38b317857d Improve the wording again 2025-06-05 18:19:19 +02:00
765e76857f store the email file in the global config directory instead of the local data.ms so it's shared between all instances 2025-06-05 16:01:30 +02:00
204cf423b2 Fix Docker Image 2025-06-05 15:02:09 +02:00
e575b5af74 Improve the contact email flag to make it friendly to disable prompt 2025-06-05 14:49:08 +02:00
4fc24cb691 Improve prompting again 2025-06-05 14:45:05 +02:00
8bc8484e95 Skip the prompt when the email was once provided 2025-06-05 14:43:09 +02:00
7b49c30d8c Change the email prompting 2025-06-05 12:02:30 +02:00
239851046d Send requests to Hubspot 2025-06-05 12:00:23 +02:00
60796dfb14 Disable it by default in our Docker image 2025-06-05 11:02:30 +02:00
c7cb72a77a Make sure we skip empty prompted emails 2025-06-05 10:59:06 +02:00
4d819ea636 Initial working version for a prompt for email 2025-06-05 10:54:46 +02:00
4dfb89168b Add a test for the chat route 2025-06-04 15:41:33 +02:00
258e6a115b Fix some other tests 2025-06-04 15:29:55 +02:00
666680bd87 test(meilisearch/search/locales.rs): updates snapshot
Used `cargo insta test`
Reviewed with `cargo insta review`
2025-06-04 14:18:20 +01:00
27527849bb test(meilisearch/search/locales.rs): updates snapshot
Used `cargo insta test`
Reviewed with `cargo insta review`
2025-06-04 14:17:10 +01:00
cf2bc03bed Fix the API key issue by reordering the default keys 2025-06-04 14:50:20 +02:00
1d02efeab9 Merge pull request #5615 from martin-g/faster-tasks-mod-it-tests
tests: Faster tasks::mod IT tests
2025-06-04 12:38:39 +00:00
53fc98d3b0 Merge pull request #5632 from martin-g/db-change-label
ci: Use `GITHUB_TOKEN` secret for the `db change check` workflow
2025-06-04 12:23:01 +00:00
263300b3a3 style(milli): linting 2025-06-04 12:19:00 +01:00
ab3d92d163 chore(parse_query): delete println and move test inside tests module 2025-06-04 12:19:00 +01:00
ef9fc6c854 fix(parse_query): cyrillic bug 2025-06-04 12:19:00 +01:00
61b0f50d4d Trigger build
Signed-off-by: Martin Tzvetanov Grigorov <mgrigorov@apache.org>
2025-06-04 13:37:42 +03:00
0557a4dd2f Trigger build
Signed-off-by: Martin Tzvetanov Grigorov <mgrigorov@apache.org>
2025-06-04 13:08:13 +03:00
930d5a09a8 Use unique server + its own index for #stats() test
Using a shared server will make this test fragile

Signed-off-by: Martin Tzvetanov Grigorov <mgrigorov@apache.org>
2025-06-04 13:08:13 +03:00
8b0c4291ae tests: Fater stats::mod IT tests
Use shared server + unique indices

Related-to: https://github.com/meilisearch/meilisearch/issues/4840

Signed-off-by: Martin Tzvetanov Grigorov <mgrigorov@apache.org>
2025-06-04 13:08:13 +03:00
c9efdf8c88 Render details.dumpUid as [dump_uid] in Value's Display
Signed-off-by: Martin Tzvetanov Grigorov <mgrigorov@apache.org>
2025-06-04 13:00:47 +03:00
72736c0ea9 Merge pull request #5627 from meilisearch/skip_remote_test
ignore flaky test
2025-06-04 08:28:24 +00:00
92d0d36ff6 Fix a bunch of snapshot tests 2025-06-04 10:25:35 +02:00
352ac759b5 Update dependencies 2025-06-04 09:35:43 +02:00
28dc7b836b Fix the chat completions feature gate 2025-06-03 17:10:53 +02:00
c4e1407e77 Fix the chat, chats, and chatsSettings actions 2025-06-03 16:11:54 +02:00
49317bbee4 Merge pull request #5625 from martin-g/faster-search-hybrid-it-tests
tests: Faster search::hybrid IT tests
2025-06-03 13:54:38 +00:00
82313a4444 Cargo fmt 2025-06-03 15:39:26 +02:00
8fdcdee0cc Do a first clippy pass 2025-06-03 15:39:26 +02:00
3c218cc3a0 Update the default chat completions prompt
Co-authored-by: Martin Grigorov <martin-g@users.noreply.github.com>
2025-06-03 15:39:26 +02:00
7d574433b6 Clean up chat completions modules a bit 2025-06-03 15:39:26 +02:00
201a808fe2 Better report errors happening with the underlying LLM 2025-06-03 15:39:26 +02:00
f827c2442c Mark tool calls to be implemented later for non-streaming 2025-06-03 15:36:35 +02:00
87d2e213f3 Update chat keys 2025-06-03 15:36:35 +02:00
3b931e75d9 Make the chats settings and chat completions route experimental 2025-06-03 15:36:35 +02:00
ae135d1d46 Implement a first version of a streamed chat API 2025-06-03 15:36:35 +02:00
0efb72fe66 Introduce the first version of the /chat route that mimics the OpenAI API 2025-06-03 15:36:35 +02:00
bed442528f Update charabia v0.9.4 2025-06-03 15:31:28 +02:00
496685fa26 Implement deserr on ChatCompletions settings structs 2025-06-03 15:31:28 +02:00
02cbcea3db Better chat completions settings management 2025-06-03 15:31:28 +02:00
0f7f5fa104 Introduce listing/getting/deleting/updating chat workspace settings 2025-06-03 15:31:28 +02:00
50fafbbc8b Implement useful conversion strategies and clean up the code 2025-06-03 15:31:28 +02:00
2821163b95 Clean up the code a bit 2025-06-03 15:31:27 +02:00
2da64e835e Factorize the code a bit more and support reporting errors 2025-06-03 15:31:27 +02:00
420c6e1932 Report the sources 2025-06-03 15:31:27 +02:00
2a067d3327 Fix compilation error in test 2025-06-03 15:31:27 +02:00
564cad1163 Call specific tools to show progression and results. 2025-06-03 15:31:27 +02:00
33dfd422db Introduce a lot of search parameters and make Deserr happy 2025-06-03 15:31:27 +02:00
036a9d5dbc Expose a well defined set of sources 2025-06-03 15:31:26 +02:00
7b74810b03 Add the index descriptions to the function description 2025-06-03 15:31:26 +02:00
3e53527bff redact the chat settings API key 2025-06-03 15:31:26 +02:00
7929872091 Better chat settings management 2025-06-03 15:31:26 +02:00
afb43d266e Correctly list the chat settings key actions 2025-06-03 15:31:26 +02:00
05828ff2c7 Always use the frequency matching strategy 2025-06-03 15:31:26 +02:00
75c3f33478 Correctly support document templates on the chat API 2025-06-03 15:31:25 +02:00
c6930c8819 Introduce the new index chat settings 2025-06-03 15:31:25 +02:00
439146289e Make sure errorneous calls are handled and forwarded to the LLM 2025-06-03 15:31:25 +02:00
6bf214bb14 Catch invalid argument calls to search function 2025-06-03 15:31:25 +02:00
fcf694026d Support multiple indexes and not only main 2025-06-03 15:31:25 +02:00
0b675bd530 Limit the number of internal loop calls and change the function name 2025-06-03 15:31:25 +02:00
7636365a65 Correctly support tenant tokens and filters 2025-06-03 15:31:24 +02:00
46680585ae Stream errors 2025-06-03 15:31:24 +02:00
bcec8d8984 Stop the stream when the connexion stops and chnage the events 2025-06-03 15:31:24 +02:00
56c1bd3afe Generate a new default chat API key 2025-06-03 15:31:24 +02:00
1a84f00fbf Change the /chat route to /chat/completions to be OpenAI-compatible 2025-06-03 15:31:24 +02:00
39320a6fce Better stop the stream 2025-06-03 15:31:24 +02:00
1d2dbcb51f Update the streaming detection to work with Mistral 2025-06-03 15:31:23 +02:00
341183cd57 Make it compatible with the Mistral API 2025-06-03 15:31:23 +02:00
b9716ec346 Support base_api in the settings 2025-06-03 15:31:03 +02:00
564f85280c Make clippy happy 2025-06-03 15:31:03 +02:00
7fa74b4931 Display pre-query prompt in search tool response 2025-06-03 15:31:03 +02:00
7d8415448c Commit when putting stuff in LMDB 2025-06-03 15:31:03 +02:00
c7839b5a84 Remove useless function 2025-06-03 15:31:03 +02:00
a52b513023 Expose new chat settings routes 2025-06-03 15:31:02 +02:00
77e03e3f8c Factorise a bit the code 2025-06-03 15:31:02 +02:00
148816a3da Display the different tool calls we need to do 2025-06-03 15:31:02 +02:00
511eef87bf Send an event with the content of the tool calling 2025-06-03 15:31:02 +02:00
aef8448fc6 Streaming supports tool calling 2025-06-03 15:31:02 +02:00
5fab2aee51 Nearly support tools on the streaming route 2025-06-03 15:31:02 +02:00
1235523918 Return the right message format 2025-06-03 15:31:01 +02:00
d4a16f2349 Aggregate tool calls and display the calls to make. 2025-06-03 15:31:01 +02:00
0f05c0eb6f Implement a first version of a streamed chat API 2025-06-03 15:31:01 +02:00
2cd85c732a Make it work by retrieving content from the index 2025-06-03 15:30:48 +02:00
82fa70da83 Support overwriten prompts of the search query 2025-06-03 15:30:48 +02:00
951be67060 Support querying the index named main 2025-06-03 15:30:48 +02:00
5400f3941a Introduce the first version of the /chat route that mimics the OpenAI API 2025-06-03 15:30:48 +02:00
af54c8381e Use ${{ github.repository }} instead of hardcoding the repo/owner
Signed-off-by: Martin Tzvetanov Grigorov <mgrigorov@apache.org>
2025-06-03 15:46:16 +03:00
693fcd5752 Try with GITHUB_TOKEN
Signed-off-by: Martin Tzvetanov Grigorov <mgrigorov@apache.org>
2025-06-03 15:40:40 +03:00
733175359a Update the new test case to use the new signature of index_with_documents_user_provided()
Signed-off-by: Martin Tzvetanov Grigorov <mgrigorov@apache.org>
2025-06-03 15:29:45 +03:00
7c6162f0bf Fix clippy error
Signed-off-by: Martin Tzvetanov Grigorov <mgrigorov@apache.org>
2025-06-03 15:26:21 +03:00
d6ae39bf0f tests: Faster search::hybrid IT tests
Use shared server + unique indices

Related-to: https://github.com/meilisearch/meilisearch/issues/4840

Signed-off-by: Martin Tzvetanov Grigorov <mgrigorov@apache.org>
2025-06-03 15:26:21 +03:00
e416bbc1de Merge pull request #5623 from martin-g/faster-search-geo-it-tests
tests: Faster search::geo IT tests
2025-06-03 12:25:48 +00:00
5d0d12dfbd Merge pull request #5630 from meilisearch/fix-test_meilisearch_1714
Adapt tests to the Chinese word segmenter changes
2025-06-03 12:20:08 +00:00
2cfd363dc6 Merge pull request #5619 from martin-g/faster-documents-delete_documents-it-tests
tests: Faster documents::delete_documents IT tests
2025-06-03 12:06:07 +00:00
70aa78a2c2 Remove unused import
Signed-off-by: Martin Tzvetanov Grigorov <mgrigorov@apache.org>
2025-06-03 14:04:15 +03:00
96c81762ed Apply suggestions from code review
Do not redactions for the snapshot assertions

Co-authored-by: Tamo <irevoire@protonmail.ch>
2025-06-03 14:00:38 +03:00
0b1f634afa Remove useless code
Signed-off-by: Martin Tzvetanov Grigorov <mgrigorov@apache.org>
2025-06-03 13:52:55 +03:00
d3d5015854 Use the cancelled task uid
Signed-off-by: Martin Tzvetanov Grigorov <mgrigorov@apache.org>
2025-06-03 13:50:04 +03:00
f95f29c492 Use unique server+index for list_tasks_type_filtered() test case
Co-authored-by: Tamo <irevoire@protonmail.ch>
2025-06-03 13:45:46 +03:00
a50b69b868 Use unique server+index for list_tasks_status_filtered() test case
Co-authored-by: Tamo <irevoire@protonmail.ch>
2025-06-03 13:45:17 +03:00
3668f5f021 Use unique server+index for list_tasks() test case
Co-authored-by: Tamo <irevoire@protonmail.ch>
2025-06-03 13:44:38 +03:00
54fdf379bb Use shared_does_not_exists_index() index for delete_one_document_unexisting_index() test case
Signed-off-by: Martin Tzvetanov Grigorov <mgrigorov@apache.org>
2025-06-03 13:41:13 +03:00
41b1cd5a73 Extract GEO_DOCUMENTS static variable and shared index with these docs
Signed-off-by: Martin Tzvetanov Grigorov <mgrigorov@apache.org>
2025-06-03 13:08:12 +03:00
5c14a25d5a Merge pull request #5624 from martin-g/faster-documents-get_documents-it-tests
tests: Faster documents::get_documents IT tests
2025-06-03 09:37:07 +00:00
fda2843135 Merge pull request #5621 from martin-g/faster-similar-errors-it-tests
tests: Faster similar::errors IT tests
2025-06-03 09:27:27 +00:00
9347330f3a Merge pull request #5620 from martin-g/faster-search-distinct-it-tests
tests: Faster search::distinct IT tests
2025-06-03 09:24:39 +00:00
56c9190dab Merge pull request #5618 from martin-g/faster-vector-binary_quantized-it-tests
tests: Faster vector::binary_quantized IT tests
2025-06-03 09:20:08 +00:00
6b986dceaf Merge pull request #5607 from martin-g/faster-settings-get_settings-it-tests
tests: Faster settings::get_settings IT tests
2025-06-03 08:53:17 +00:00
cb7bb36080 update charabia v0.9.6 2025-06-03 10:48:41 +02:00
161cb736ea Adapt tests to the Chinese word segmenter changes
The new Chinese segmenter is splitting words in smaller parts.
The words `小化妆包` was previously seegmented as `小 / 化妆包` and is now segmented as `小 / 化妆 / 包`,
which changes the tests results.
2025-06-03 10:37:29 +02:00
ea6bb4df1d Merge pull request #5614 from meilisearch/fix-hybrid-distinct
Fix distinct for hybrid search
2025-06-03 07:20:55 +00:00
a3d2f64725 tests: Faster search::distinct IT tests
Use shared server + unique indices

Signed-off-by: Martin Tzvetanov Grigorov <mgrigorov@apache.org>
2025-06-03 08:23:26 +03:00
d5526cffff Merge pull request #5527 from nnethercott/all-cpus-in-import-dump
Use all CPUs during an import dump
2025-06-02 15:24:59 +00:00
5cb75d1f2a ignore flaky test 2025-06-02 17:06:53 +02:00
921e3c4ffe tests: Faster documents::get_documents IT tests
Use shared server + unique index

Related-to: https://github.com/meilisearch/meilisearch/issues/4840

Signed-off-by: Martin Tzvetanov Grigorov <mgrigorov@apache.org>
2025-06-02 15:36:08 +03:00
52591761af tests: Faster search::geo IT tests
Use shared server + unique indices

Related-to: https://github.com/meilisearch/meilisearch/issues/4840

Signed-off-by: Martin Tzvetanov Grigorov <mgrigorov@apache.org>
2025-06-02 15:32:32 +03:00
f80182f0a9 tests: Faster similar::errors IT tests
Use shared server + unique indices

Related to: https://github.com/meilisearch/meilisearch/issues/4840

Signed-off-by: Martin Tzvetanov Grigorov <mgrigorov@apache.org>
2025-06-02 15:20:17 +03:00
3b30b6a57a tests: Faster documents::delete_documents IT tests
Use shared server + unique indices
Assert .succeeded()/.failed() for the waited tasks

Related-to: https://github.com/meilisearch/meilisearch/issues/4840

Signed-off-by: Martin Tzvetanov Grigorov <mgrigorov@apache.org>
2025-06-02 15:04:48 +03:00
5efc78db55 tests: Faster vector::binary_quantized IT tests
Use shared server + unique indices where possible

Related-to: https://github.com/meilisearch/meilisearch/issues/4840

Signed-off-by: Martin Tzvetanov Grigorov <mgrigorov@apache.org>
2025-06-02 14:47:18 +03:00
cffbe3fcb6 Trigger build
Signed-off-by: Martin Tzvetanov Grigorov <mgrigorov@apache.org>
2025-06-02 14:17:19 +03:00
8d8fcb9846 Revert to unique server + named index for some tests
Signed-off-by: Martin Tzvetanov Grigorov <mgrigorov@apache.org>
2025-06-02 11:44:21 +03:00
20049669c9 Merge pull request #5600 from martin-g/faster-search-facet_search-it-tests
tests: Faster search::facet_search IT tests
2025-06-02 08:39:30 +00:00
db28d13cb1 Remove useless assertion.
.succeeded() does the same

Signed-off-by: Martin Tzvetanov Grigorov <mgrigorov@apache.org>
2025-06-02 10:59:46 +03:00
5a7cfc57fd tests: Faster tasks::mode IT tests
Use shared server + unique indices

Related-to: https://github.com/meilisearch/meilisearch/issues/4840

Signed-off-by: Martin Tzvetanov Grigorov <mgrigorov@apache.org>
2025-06-02 10:56:43 +03:00
790621dc29 Remove useless assert
Co-authored-by: Many the fish <many@meilisearch.com>
2025-06-02 10:55:28 +03:00
1d577ae98b Merge pull request #5610 from martin-g/faster-settings-tokenizer_customization-it-tests
tests: Faster settings::tokenizer_customization IT tests
2025-06-02 07:09:41 +00:00
88e9a55d44 Merge pull request #5609 from martin-g/faster-settings-proximity_settings-it-tests
tests: Faster settings::proximity_settings IT tests
2025-06-02 07:09:06 +00:00
dbe551cf99 Merge pull request #5606 from martin-g/faster-settings-distinct-it-tests
tests: Faster settings::distinct IT tests
2025-06-02 07:07:23 +00:00
a299fbd33b Merge pull request #5605 from martin-g/faster-search-restricted_searchable-it-tests
tests: Faster search::restricted_searchable IT tests
2025-06-02 07:06:50 +00:00
193119acb9 Merge pull request #5604 from martin-g/search-pagination-it-tests
tests: search::pagination IT tests
2025-06-02 07:05:52 +00:00
4c71118699 Merge pull request #5602 from martin-g/faster-search-matching_strategy-it-tests
tests: Faster search::matching_strategy IT tests
2025-06-02 07:04:43 +00:00
5fe2943d3c Merge pull request #5601 from martin-g/faster-search-locales-it-tests
tests: Faster search::locales IT tests
2025-06-02 07:02:28 +00:00
86ff502327 Merge pull request #5599 from martin-g/faster-index-search-errors-tests
tests: Faster search::errors IT tests
2025-06-02 06:54:32 +00:00
6b1a345dce tests: Faster settings::tokenizer_customization IT tests
Use shared server + unique indices

Related-to: https://github.com/meilisearch/meilisearch/issues/4840

Signed-off-by: Martin Tzvetanov Grigorov <mgrigorov@apache.org>
2025-06-02 08:23:09 +03:00
b54ece690b tests: Faster settings::proximity_settings IT tests
Use shared server + unique indices

Related-to: https://github.com/meilisearch/meilisearch/issues/4840

Signed-off-by: Martin Tzvetanov Grigorov <mgrigorov@apache.org>
2025-06-02 08:20:05 +03:00
3ea167bade tests: Faster settings::get_settings IT tests
Use shared server + unique indices

Signed-off-by: Martin Tzvetanov Grigorov <mgrigorov@apache.org>
2025-05-30 16:33:27 +03:00
1158d6689f tests: Faster settings::distinct IT tests
Use shared server + unique indices

Related-to: https://github.com/meilisearch/meilisearch/issues/4840

Signed-off-by: Martin Tzvetanov Grigorov <mgrigorov@apache.org>
2025-05-30 15:41:31 +03:00
d9b0463a0b tests: Faster search::restricted_searchable IT tests
Use shared server + unique indices

Signed-off-by: Martin Tzvetanov Grigorov <mgrigorov@apache.org>
2025-05-30 15:37:27 +03:00
ae9899f179 tests: search::pagination IT tests
Minor cleanup.

Related-to: https://github.com/meilisearch/meilisearch/issues/4840

Signed-off-by: Martin Tzvetanov Grigorov <mgrigorov@apache.org>
2025-05-30 15:26:55 +03:00
308fd7128e Fix clippy errors
Signed-off-by: Martin Tzvetanov Grigorov <mgrigorov@apache.org>
2025-05-29 11:36:56 +03:00
27e7c00622 Add dynamic redactions for taskUid and enqueuedAt properties
Signed-off-by: Martin Tzvetanov Grigorov <mgrigorov@apache.org>
2025-05-29 11:33:10 +03:00
58207da934 Trigger build
Signed-off-by: Martin Tzvetanov Grigorov <mgrigorov@apache.org>
2025-05-29 10:56:33 +03:00
fb8b832192 Trigger build
Signed-off-by: Martin Tzvetanov Grigorov <mgrigorov@apache.org>
2025-05-29 10:54:31 +03:00
17207b5405 tests: Faster search::matching_strategy IT tests
Use shared server + unique indices for all tests

Related-to: https://github.com/meilisearch/meilisearch/issues/4840

Signed-off-by: Martin Tzvetanov Grigorov <mgrigorov@apache.org>
2025-05-29 09:09:02 +03:00
bd95503eba tests: Faster search::locales IT tests
Use a shared server + unique indices where possible

Related-to: https://github.com/meilisearch/meilisearch/issues/4840

Signed-off-by: Martin Tzvetanov Grigorov <mgrigorov@apache.org>
2025-05-29 09:03:23 +03:00
8b8b0d802c tests: Faster search::facet_search IT tests
Use shared server + unique indices where possible.
Assert .succeeded() for the waited tasks.
Drop usage of dbg!() in the assertions. It caused noise in the logs

Signed-off-by: Martin Tzvetanov Grigorov <mgrigorov@apache.org>
2025-05-29 08:53:10 +03:00
d329e86250 tests: Use shared server + unique server where possible
Related-to: https://github.com/meilisearch/meilisearch/issues/4840

Signed-off-by: Martin Tzvetanov Grigorov <mgrigorov@apache.org>
2025-05-29 08:42:10 +03:00
d416b3b390 Merge pull request #5592 from nnethercott/extract-geo-facets-seperately
Decouple geo facet extraction from rest of document
2025-05-28 16:22:10 +00:00
54f5e74744 Support distinct in hybrid search 2025-05-28 17:58:58 +02:00
fd4b192a39 Add distinct_fid function and expose distinct_single_docid 2025-05-28 17:58:58 +02:00
3c13feebf7 Test that distinct is applied for hybrid search 2025-05-28 17:58:58 +02:00
1811168b96 remove duplicated check on geo field changes 2025-05-28 15:45:13 +02:00
b06cc1e0a2 Update crates/milli/src/update/new/extract/faceted/extract_facets.rs
Co-authored-by: Many the fish <many@meilisearch.com>
2025-05-28 15:38:23 +02:00
44f812c36d Update crates/milli/src/update/new/extract/faceted/extract_facets.rs
Co-authored-by: Many the fish <many@meilisearch.com>
2025-05-28 15:38:12 +02:00
c8e77b5f25 Merge pull request #5574 from martin-g/faster-add_documents-it-tests
perf: Faster integration tests for add_documents.rs
2025-05-28 13:13:38 +00:00
283f516e15 Merge pull request #5579 from martin-g/faster-index-update_index-it-tests
perf: Faster index::update_index IT tests
2025-05-28 13:11:56 +00:00
b4ca0a8c98 Update the tests related to updating indices
Signed-off-by: Martin Tzvetanov Grigorov <mgrigorov@apache.org>
2025-05-28 15:02:41 +03:00
b658e38acd Fix formatting
Signed-off-by: Martin Tzvetanov Grigorov <mgrigorov@apache.org>
2025-05-28 15:02:41 +03:00
f87e46cc16 Ignore the result from #wait_task()
Signed-off-by: Martin Tzvetanov Grigorov <mgrigorov@apache.org>
2025-05-28 15:02:41 +03:00
65354b414a Update crates/meilisearch/tests/index/update_index.rs
Co-authored-by: Tamo <irevoire@protonmail.ch>
2025-05-28 15:02:40 +03:00
025df397c0 Update crates/meilisearch/tests/index/update_index.rs
Co-authored-by: Tamo <irevoire@protonmail.ch>
2025-05-28 15:02:40 +03:00
f77abc9dc8 Update crates/meilisearch/tests/index/update_index.rs
Co-authored-by: Tamo <irevoire@protonmail.ch>
2025-05-28 15:02:40 +03:00
7e9909ee45 perf: Faster index::update_index IT tests
Use a shared server where possible.
Assert succeeded/failed task waits.

Signed-off-by: Martin Tzvetanov Grigorov <mgrigorov@apache.org>
2025-05-28 15:02:40 +03:00
43ec97fe45 format the code
Signed-off-by: Martin Tzvetanov Grigorov <mgrigorov@apache.org>
2025-05-28 15:01:04 +03:00
02929e241b Update the status code
Signed-off-by: Martin Tzvetanov Grigorov <mgrigorov@apache.org>
2025-05-28 14:36:13 +03:00
c13efde042 uuid is a production dependency of meili-snap
Signed-off-by: Martin Tzvetanov Grigorov <mgrigorov@apache.org>
2025-05-28 14:35:50 +03:00
36f0a1492c Apply suggestions from code review
Co-authored-by: Tamo <irevoire@protonmail.ch>
2025-05-28 14:22:04 +03:00
ce65ad213b Add dynamic redactions for uid, batchUid and taskUid
Signed-off-by: Martin Tzvetanov Grigorov <mgrigorov@apache.org>
2025-05-28 14:22:04 +03:00
3e0de6cb83 Wait for the batched tasks bu their real uid.
Some of them succeed, others fail.

Signed-off-by: Martin Tzvetanov Grigorov <mgrigorov@apache.org>
2025-05-28 14:22:04 +03:00
f3d691667d Use a Regex in insta dynamic redaction to replace Uuids with [uuid]
Signed-off-by: Martin Tzvetanov Grigorov <mgrigorov@apache.org>
2025-05-28 14:22:01 +03:00
ce9c930d10 Fix clippy and fmt
Signed-off-by: Martin Tzvetanov Grigorov <mgrigorov@apache.org>
2025-05-28 14:21:25 +03:00
fc88b003b4 Use shared server and unique indices for add_documents IT tests
Signed-off-by: Martin Tzvetanov Grigorov <mgrigorov@apache.org>
2025-05-28 14:20:07 +03:00
cf5d26124a Call .succeeded() or .failed() on the waited task
Signed-off-by: Martin Tzvetanov Grigorov <mgrigorov@apache.org>
2025-05-28 14:18:34 +03:00
38b1c57fa8 Faster IT tests for add_documents.rs
Use Shared server where possible

Signed-off-by: Martin Tzvetanov Grigorov <mgrigorov@apache.org>
2025-05-28 14:18:33 +03:00
25c525b057 Merge pull request #5589 from mcmah309/typo_fix
Typo fix
2025-05-28 11:02:22 +00:00
83cd28b60b Merge pull request #5584 from martin-g/faster-index-search-mod-tests
tests: Faster index::search::mod IT tests
2025-05-28 08:40:37 +00:00
48cad4132a Fix clippy - ignore code variable
Signed-off-by: Martin Tzvetanov Grigorov <mgrigorov@apache.org>
2025-05-27 16:44:57 +03:00
4897ad99d0 Wait for the add_documents task
Format the code

Signed-off-by: Martin Tzvetanov Grigorov <mgrigorov@apache.org>
2025-05-27 14:26:29 +03:00
5b67de0367 Merge pull request #5593 from meilisearch/remove-template-checker
Remove TemplateChecker
2025-05-27 09:11:51 +00:00
46ff78b4ec Update the regex to replace all occurrences of uuids in the redaction
Signed-off-by: Martin Tzvetanov Grigorov <mgrigorov@apache.org>
2025-05-27 11:47:02 +03:00
5810fb239f Reference PR in comments 2025-05-27 10:24:04 +02:00
b007ed6be9 Remove TemplateChecker 2025-05-27 10:04:14 +02:00
9ad43b6841 rename has_changed to has_changed_for_facets 2025-05-26 18:37:20 +02:00
c9ec502ed9 refactor for readability 2025-05-26 18:32:59 +02:00
18aed75d3b fix logic 2025-05-26 18:20:55 +02:00
6738a4f6ee feat: mettre a jour the insta snapshots 2025-05-26 16:36:36 +02:00
a1ff41cabb Merge pull request #5541 from meilisearch/deactivate-numbers-in-typos-enhancements
Minor fixes: Deactivate numbers in typos
2025-05-26 14:36:21 +00:00
d2948adea3 Migrate more tests to assert with "[uuid]" instead of real Uuid
Signed-off-by: Martin Tzvetanov Grigorov <mgrigorov@apache.org>
2025-05-26 14:31:58 +03:00
f54b57e5be Use a Regex in insta dynamic redaction to replace Uuids with [uuid]
(cherry picked from commit f8b8c6ab71a28052cf9b271ca8aa5d4175f9e8f9)
Signed-off-by: Martin Tzvetanov Grigorov <mgrigorov@apache.org>
2025-05-26 14:03:48 +03:00
95821d0bde refactor: update macro 2025-05-26 10:07:13 +02:00
f690fa0686 feat: add macro_rules to factorize 2025-05-26 09:46:14 +02:00
24e94b28c1 feat: uncouple geo extraction from full doc 2025-05-26 09:22:20 +02:00
34d58f35c8 Print [uuid] instead of the Uuid index name for MeilisearchHttpError::Milli errors
This way the tests' assertions/snapshots for unique indices would be stable

Signed-off-by: Martin Tzvetanov Grigorov <mgrigorov@apache.org>
2025-05-25 15:48:55 +03:00
1d5265caf4 Fix typo in method name 2025-05-22 14:25:04 +00:00
97aeb6db4d Merge pull request #5548 from lblack00/attributes-to-search-on-nested-fields
Added support for nested wildcards to attributes_to_search_on
2025-05-22 13:58:23 +00:00
ff64c64abe Merge pull request #5587 from meilisearch/fix-derivations-again
Fix another derivation-related panic in the search
2025-05-22 13:39:04 +00:00
ee326a1ecc Merge pull request #5588 from meilisearch/rename-batch-stopped-reason
Rename batch creation complete
2025-05-22 12:39:34 +00:00
c204a7bb12 Update snapshots 2025-05-22 12:39:37 +02:00
cf4798bd2b Change batch stop reason messages to match the new batch_strategy API name 2025-05-22 12:20:17 +02:00
4d761d3444 Rename batch_creation_complete to batch_strategy 2025-05-22 12:19:54 +02:00
c9b78970c9 Remove lambdas from the find_*_derivations
Make sure their number of insert in the interner are bounded
2025-05-22 11:06:14 +02:00
ae3c4e27c4 Merge pull request #5557 from meilisearch/update-charabia-v0.9.4
Update charabia v0.9.5
2025-05-21 10:56:41 +00:00
1b718afd11 Update charabia removing a lot of dependencies 2025-05-21 11:52:19 +02:00
01ef055f40 Update charabia v0.9.4 2025-05-21 11:52:19 +02:00
f888f87635 Updated formatting using RustFmt 2025-05-21 02:07:25 -07:00
293a425183 Apply suggestions from code review
Co-authored-by: Martin Grigorov <martin-g@users.noreply.github.com>
2025-05-21 10:49:43 +02:00
699ec18de8 Fix warnings 2025-05-21 10:49:43 +02:00
73e4206b3c Pass a progress callback to recompute_word_fst_from_word_docids_database
fixes https://github.com/meilisearch/meilisearch/pull/5494#discussion_r2069377991
2025-05-21 10:49:43 +02:00
a964251cee Remove useless reset
fixes https://github.com/meilisearch/meilisearch/pull/5494#discussion_r2069373494
2025-05-21 10:49:43 +02:00
8c8d98eeaa Use shared server and unique indices for all tests where possible
Signed-off-by: Martin Tzvetanov Grigorov <mgrigorov@apache.org>
2025-05-21 10:48:20 +03:00
c5ae43cac6 Updated all additional test cases 2025-05-20 09:03:26 -07:00
57eecd6197 Remove an empty line
Signed-off-by: Martin Tzvetanov Grigorov <mgrigorov@apache.org>
2025-05-20 14:37:45 +03:00
2fe5c78cb6 tests: Faster index::search::mod IT tests
* Use shared index where possible.
* Call .succeeded/.failed when waiting for a task.
* Use newer format_args syntax
* Do not use fully qualified name for meili_snap:: functions. The
  functions are already imported in scope

Signed-off-by: Martin Tzvetanov Grigorov <mgrigorov@apache.org>
2025-05-20 14:26:26 +03:00
8068337b07 Merge pull request #5573 from CodeMan62/fix-5555
Only intern in case of typo when looking for one or two typoes
2025-05-20 09:17:35 +00:00
8047cfe438 Merge pull request #5580 from martin-g/better-assertions-index-delete_index-it-tests
tests: Assert succeeded/failed for the index::delete_index IT tests
2025-05-20 08:49:24 +00:00
f26826f115 fix issue 5555 2025-05-20 10:41:32 +02:00
5717e5c1af Merge pull request #5578 from martin-g/faster-index-get_index-it-tests
perf: Faster index::get_index IT tests
2025-05-20 08:41:11 +00:00
bb07038c31 tests: Assert succeeded/failed for the index::delete_index IT tests
Related-to: https://github.com/meilisearch/meilisearch/issues/4840

Signed-off-by: Martin Tzvetanov Grigorov <mgrigorov@apache.org>
2025-05-19 16:57:53 +03:00
d1a088ea0b Format the code
Signed-off-by: Martin Tzvetanov Grigorov <mgrigorov@apache.org>
2025-05-19 16:52:43 +03:00
b68e22c0e6 Revert the improvements for get_and_paginate_indexes()
Because they won't work in multi-threaded execution of the tests

Signed-off-by: Martin Tzvetanov Grigorov <mgrigorov@apache.org>
2025-05-19 16:36:45 +03:00
03a36f116e 1. Use a unique Server for no_index_return_empty_list test
... because a Shared one could see indices created by other tests

2. List at least 1000 indices to make sure we get the newly created ones
   in list_multiple_indexes()

Signed-off-by: Martin Tzvetanov Grigorov <mgrigorov@apache.org>
2025-05-19 16:20:16 +03:00
8a0bf24ed5 Merge pull request #5572 from martin-g/faster-stats-it-tests
perf: Faster IT tests - stats.rs
2025-05-19 12:44:08 +00:00
e2763471e5 Faster index::get_index IT tests
Use shared server for all tests in get_index.rs

Signed-off-by: Martin Tzvetanov Grigorov <mgrigorov@apache.org>
2025-05-19 15:36:25 +03:00
b2f2c5d69f Remove an assertion of a task uid.
It differs for every run of the IT test suite.

Format the imports

Signed-off-by: Martin Tzvetanov Grigorov <mgrigorov@apache.org>
2025-05-19 14:44:08 +03:00
e547bfb428 Merge pull request #5577 from meilisearch/comment-out-swarmia
Comment out swarmia deployment for now
2025-05-19 10:13:41 +00:00
1594c54e23 Provide more information about resulting documents on test case 2025-05-19 02:37:23 -07:00
768cfb6c2d Comment out swarmia deployment for now 2025-05-19 11:34:21 +02:00
13b607bd68 Removed matches_wildcard_pattern() and integrated match_pattern() into attributes_to_search_on(), updated test cases 2025-05-18 20:24:52 -07:00
3d130d31c8 Do not hard code the non-exiting index name/uid
Signed-off-by: Martin Tzvetanov Grigorov <mgrigorov@apache.org>
2025-05-16 15:49:50 +03:00
4cda584b0c Fix the build of stats.rs
Signed-off-by: Martin Tzvetanov Grigorov <mgrigorov@apache.org>
2025-05-16 15:45:25 +03:00
248c90bad5 removing .await 2025-05-16 15:29:24 +03:00
0e9040e605 remove warnings 2025-05-16 15:29:23 +03:00
3e3c00f44c fix for test failure 2025-05-16 15:29:23 +03:00
d986a3bbaf Changes to index and expected_response as per feedback 2025-05-16 15:29:22 +03:00
c2ceb8e41b Improve Integration tests in the file stats.rs 2025-05-16 15:29:18 +03:00
a25eb9c136 Merge pull request #5566 from meilisearch/bad-max-total-hits
Forbid 0 in maxTotalHits
2025-05-15 15:01:22 +00:00
cc2011a27f Merge pull request #5565 from meilisearch/fix-0-batched-task
Fix 0 batched task
2025-05-15 12:41:48 +00:00
604e156c2b add the snapshots 2025-05-15 11:35:31 +02:00
1d6777ee68 Forbid 0 in maxTotalHits 2025-05-15 11:32:08 +02:00
79db2e67fb refactor: prefer helper over explicit pool construction
Co-authored-by: Many the fish <many@meilisearch.com>
2025-05-15 11:24:34 +02:00
0940f0e4f4 add a test 2025-05-15 11:10:08 +02:00
d40290aaaf Merge pull request #5560 from meilisearch/experimental-no-snapshot-compression
Add an experimental cli flag to disable snapshot compaction
2025-05-15 07:51:06 +00:00
865f24cfef refactor: helper methods for pool and max threads 2025-05-14 23:45:24 +02:00
fd2de7c668 Merge pull request #5564 from meilisearch/dont-intern-without-typo-v15
Port to v1.15: Only intern in case of single-typo when looking for single typoes
2025-05-14 16:30:57 +00:00
448564b674 Merge pull request #5563 from meilisearch/fix-swarmia-deploy
Fix swarmia deployement
2025-05-14 16:12:05 +00:00
c5dd8e7d6f Add test 2025-05-14 17:36:09 +02:00
c9b4c1fb81 Only intern in case of single-typo when looking for single typoes 2025-05-14 17:36:03 +02:00
0f10ec96af Fix swarmia deployement 2025-05-14 17:35:47 +02:00
8608d10fa2 Don't process any tasks if the max number of batched tasks is set to 0 2025-05-14 17:09:10 +02:00
83e71cd7b9 Add an experimental cli flag to disable snapshot compaction 2025-05-14 15:59:35 +02:00
3fbe1df770 Updated nested_search_all_details_with_deep_wildcard() to test deeply nested attributes 2025-05-14 00:18:30 -07:00
150d1db86b Implemented integration tests for restrict_searchable.rs on nested wildcard attributes 2025-05-13 21:44:24 -07:00
806e983aa5 fix: lazy computation in thread default
Co-authored-by: Martin Grigorov <martin-g@users.noreply.github.com>
2025-05-13 14:14:48 +02:00
e96c1d4b0f style: change fmt from empty str to "unlimited" 2025-05-13 12:16:34 +02:00
15cdc6924b refactor: remove runtime cfg!(test) check
Won't work in integration tests and consequently all threads would be
used. To remedy this we make explicit `max_threads=Some(1)` in the
IndexerConfig::default
2025-05-13 09:18:19 +02:00
677e8b122c Merge pull request #5551 from meilisearch/dont-intern-without-typo
Only intern in case of single-typo when looking for single typoes
2025-05-12 20:23:39 +00:00
75a7e40a27 Merge branch 'main' into all-cpus-in-import-dump 2025-05-12 21:48:12 +02:00
d9a527854a Merge pull request #5546 from meilisearch/curquiza-patch-1
Add set in GitHub action to notify deployment to Swarmia
2025-05-12 13:36:13 +00:00
e4f05326be Merge pull request #5552 from meilisearch/v1-15-dumpless-upgrade
Add v1.15 in index-scheduler upgrade
2025-05-12 12:59:47 +00:00
d99419acfb Add a NoOp operation in index update 2025-05-12 14:19:15 +02:00
f349630e78 Add v1.15 in index-scheduler upgrade 2025-05-12 13:53:23 +02:00
c8939944c6 Add test 2025-05-12 12:40:55 +02:00
4e6252fb03 Only intern in case of single-typo when looking for single typoes 2025-05-12 11:59:21 +02:00
2d1412afce Merge pull request #5549 from meilisearch/update-version-v1.15.0
Update version for the next release (v1.15.0) in Cargo.toml
2025-05-12 09:21:43 +00:00
0f4536df2d Adapt dumpless upgrade tests 2025-05-12 10:43:12 +02:00
3531efb169 Update version for the next release (v1.15.0) in Cargo.toml 2025-05-12 08:04:18 +00:00
8bd8e744f3 Attributes to search on supports nested wildcards 2025-05-09 02:42:48 -07:00
6ec430b633 Update .github/workflows/publish-docker-images.yml 2025-05-08 20:08:34 +02:00
4041978402 Add set in GitHub action to notify deployment to Swarmia 2025-05-08 20:07:36 +02:00
53f32a7dd7 refactor: change thread_pool from Option<ThreadPoolNoAbort> to
ThreadPoolNoAbort
2025-05-07 17:00:08 +02:00
47a7ed93d3 feat: Make MaxThreads None by default 2025-05-06 09:11:55 +02:00
71ab11f1fe Merge pull request #5523 from meilisearch/rollback-updates
Allow rollbacking updates
2025-05-05 09:53:56 +00:00
436776cdbf Merge pull request #5535 from meilisearch/filter-comparison-string
Allow lexicographic filtering of strings
2025-05-05 09:53:19 +00:00
96bc519f9e Merge pull request #5494 from meilisearch/deactivate-numbers-in-typos
Deactivate numbers in typos
2025-05-05 09:19:53 +00:00
2ac826edca Apply suggested changes
Co-authored-by: Clément Renault <renault.cle@gmail.com>

Update crates/meilisearch/src/lib.rs

Co-authored-by: Clément Renault <renault.cle@gmail.com>
2025-05-01 16:12:06 +02:00
8b23eddc10 Dumpless upgrade 2025-04-30 18:03:50 +02:00
185f2b8f74 Fix test now that lexicographic string comparisons are allowed 2025-04-30 17:28:59 +02:00
c0e987979a Allow lexicographic string comparisons 2025-04-30 17:28:49 +02:00
89aff2081c Fix clippy warnings 2025-04-30 14:17:32 +02:00
032c67662d Merge pull request #5533 from ZeroZ-lab/fix-readme
Fix links and formatting in README.md for clarity and consistency
2025-04-29 22:11:36 +00:00
03f59786c2 Fix links and formatting in README.md for clarity and consistency 2025-04-30 00:10:41 +08:00
f7c1f19dd8 rust fmt 2025-04-29 16:10:43 +02:00
1542ff30ae Roll back index scheduler version first 2025-04-29 16:05:43 +02:00
20d0aa499a Apply suggestions from code review
Co-authored-by: Clément Renault <clement@meilisearch.com>
2025-04-29 16:03:30 +02:00
0cb2bf34a5 Fix test 2025-04-29 14:47:30 +02:00
de03b7e437 Merge pull request #5530 from meilisearch/rename-batcher-stopped-because
Rename `batcherStoppedBecause` to `batchCreationComplete`
2025-04-29 10:35:57 +00:00
a315726f96 Update snapshots 2025-04-29 11:50:32 +02:00
91d2a07499 Rename batcherStoppedBecause to batchCreationComplete 2025-04-29 10:40:12 +02:00
3b773b3416 Revert thread_pool type back to Option in config 2025-04-28 11:56:37 +02:00
648b2876f6 Create temp threadpool with all CPUs in dump 2025-04-27 00:52:10 +02:00
c5360bcdbf When canceling an upgrade task, execute the rollback code 2025-04-24 16:59:03 +02:00
1bdc08a73a tick: always refuse to batch tasks when the version in the index-scheduler is wrong 2025-04-24 16:54:43 +02:00
63b5e21ae1 tick: check tasks to cancel before checking for upgrade tasks 2025-04-24 16:52:28 +02:00
eb0b5239cb process rollback 2025-04-24 16:52:28 +02:00
121c1ac1dd Upgrade supports cancelling 2025-04-24 16:08:10 +02:00
b82dda2d0d Allow rollbacking indexes in the mapper 2025-04-24 16:08:10 +02:00
ea9330e9c9 Add new errors when there is a version mismatch between the bin and index or index-scheduler 2025-04-24 16:08:10 +02:00
b6a9d8d2ac Add Error::RollbackFailed 2025-04-24 16:06:19 +02:00
a03eef6511 Support rollback 2025-04-24 16:06:19 +02:00
42fae9994d Move tests out of index.rs 2025-04-24 16:06:19 +02:00
e1aa534389 Wait 10 seconds in case of irrecoverable error 2025-04-24 16:06:19 +02:00
49add50cb3 Make version constants u32 2025-04-24 16:06:19 +02:00
29b947ee43 make Index::get_version public 2025-04-24 16:05:52 +02:00
3f683c4238 Merge pull request #5525 from meilisearch/arroy-call-tracking
Display the time spent querying the vector store
2025-04-23 20:37:26 +00:00
294ccb6f44 Add test 2025-04-23 16:57:50 +02:00
63a4dfa2a8 Add disableOnNumber setting 2025-04-23 16:57:50 +02:00
3b8965bc76 Display and sum the time spent in arroy 2025-04-22 18:10:42 +02:00
9fd9fcb03e Merge pull request #5512 from DanasFi/task_queue_metrics
Task queue metrics
2025-04-17 09:38:25 +00:00
30805bbed5 Merge pull request #5520 from meilisearch/remove-ph-banner
Remove ProductHunt banner
2025-04-17 09:29:36 +00:00
2984be880f Add task queue metrics to grafana dashboard 2025-04-17 10:49:04 +02:00
fd0623c085 Fix typo in function to get size until task queue stops 2025-04-17 10:48:56 +02:00
eeb33b913c Corrected metric for task queue total size 2025-04-17 10:46:26 +02:00
3d93efc6aa Added metric to check task queue size until stop 2025-04-17 10:46:25 +02:00
425ef1b205 Added task queue used size metric 2025-04-17 10:45:02 +02:00
f607449cb7 Added metric for task queue total size. 2025-04-17 10:45:02 +02:00
e9b4794f2b Merge pull request #5488 from meilisearch/try-batch-end-reason
add "batcher stopped because" field to batch objects
2025-04-17 08:26:31 +00:00
c413855156 Merge pull request #5519 from meilisearch/fix-ruleset-workflow
Fix ruleset workflow
2025-04-17 07:08:07 +00:00
7cdb4aa473 Remove ProductHunt banner 2025-04-16 18:45:37 +02:00
bfe4968d7e Debug and change the method to get the env content 2025-04-16 18:15:36 +02:00
7372083a5a Do not trigger ruleset workflow when closing a milestone 2025-04-16 18:14:12 +02:00
8cecc6989a Merge pull request #5513 from meilisearch/bump-prometheus-protobuf
Bump prometheus protobuf
2025-04-16 09:15:29 +00:00
1f1edd6e25 Fix prometheus function signature to use strings instead of strs 2025-04-16 10:30:55 +02:00
bc5efa9a76 Bump prometheus to v0.14.0 2025-04-16 10:30:25 +02:00
3ec5b9d488 Merge pull request #5487 from HDT3213/bugfix/geosort
fix ranking rules after _geo do not work
2025-04-15 13:29:07 +00:00
b61eb19601 Fix snapshots 2025-04-15 15:13:53 +02:00
231a027c7d Use TaskKindCannotBeBatched for task deletion, upgrade database and snapshot creation 2025-04-15 15:13:53 +02:00
f8ff91ed30 Add BatchReason::TaskKindCannotBeBatched 2025-04-15 15:13:53 +02:00
b73660fa8e Update crates/index-scheduler/src/scheduler/test_document_addition.rs
fix comment in test

Co-authored-by: Tamo <tamo@meilisearch.com>
2025-04-15 14:48:11 +02:00
55adbac2dd Apply suggestions from code review 2025-04-15 14:43:07 +02:00
fd7fbfa9eb Refactor geo_max_bucket_size injection 2025-04-15 20:24:04 +08:00
3a93f88ba6 Merge pull request #5498 from meilisearch/snapshot-no-compaction
Stop compacting the snapshot
2025-04-15 08:30:40 +00:00
7c1c4f9c26 fix test_geo_sort_reached_max_bucket_size 2025-04-15 08:19:22 +08:00
1f5412003d optimize test suite 2025-04-15 07:17:47 +08:00
5da92a3d53 test geo sort reached max_bucket_size 2025-04-14 23:14:17 +08:00
c4a8b84dc0 code style 2025-04-14 23:04:17 +08:00
ffe3faeca7 cargo fmt 2025-04-14 23:04:17 +08:00
0f07cfed14 GeoSort support max_bucket_size and distance_error_margin configuration 2025-04-14 23:04:17 +08:00
326a728434 fix code style 2025-04-14 23:04:17 +08:00
e4733dcd42 fix ranking rules after _geo do not work 2025-04-14 23:04:17 +08:00
a500fa053c Merge pull request #5509 from meilisearch/release-v1.14.0-tmp
Bring back changes from v1.14.0 to main
2025-04-14 13:59:23 +00:00
61db56f785 remove duplicated test 2025-04-14 14:55:57 +02:00
235556d699 Merge pull request #5485 from meilisearch/dependabot/github_actions/actions/checkout-3
Bump actions/checkout from 1 to 3
2025-04-14 11:40:37 +00:00
a3a1065c16 Merge pull request #5497 from meilisearch/dependabot/cargo/tokio-1.43.1
Bump tokio from 1.42.0 to 1.43.1
2025-04-14 11:40:13 +00:00
b025f1bcf1 Merge branch 'main' into release-v1.14.0-tmp 2025-04-14 12:35:47 +02:00
707d106a24 Merge pull request #5482 from meilisearch/dependabot/github_actions/actions/github-script-7
Bump actions/github-script from 6 to 7
2025-04-14 09:53:41 +00:00
97d6726291 Merge pull request #5483 from meilisearch/dependabot/github_actions/Swatinem/rust-cache-2.7.8
Bump Swatinem/rust-cache from 2.7.7 to 2.7.8
2025-04-14 09:53:32 +00:00
82fa571ef7 Merge pull request #5503 from meilisearch/dependabot/cargo/crossbeam-channel-0.5.15
Bump crossbeam-channel from 0.5.14 to 0.5.15
2025-04-14 09:53:03 +00:00
5d453e6049 Bump crossbeam-channel from 0.5.14 to 0.5.15
Bumps [crossbeam-channel](https://github.com/crossbeam-rs/crossbeam) from 0.5.14 to 0.5.15.
- [Release notes](https://github.com/crossbeam-rs/crossbeam/releases)
- [Changelog](https://github.com/crossbeam-rs/crossbeam/blob/master/CHANGELOG.md)
- [Commits](https://github.com/crossbeam-rs/crossbeam/compare/crossbeam-channel-0.5.14...crossbeam-channel-0.5.15)

---
updated-dependencies:
- dependency-name: crossbeam-channel
  dependency-version: 0.5.15
  dependency-type: direct:production
...

Signed-off-by: dependabot[bot] <support@github.com>
2025-04-10 14:44:12 +00:00
9e7d7beb4a stop compacting the snapshot 2025-04-08 14:53:58 +02:00
a225ab2637 Bump tokio from 1.42.0 to 1.43.1
Bumps [tokio](https://github.com/tokio-rs/tokio) from 1.42.0 to 1.43.1.
- [Release notes](https://github.com/tokio-rs/tokio/releases)
- [Commits](https://github.com/tokio-rs/tokio/compare/tokio-1.42.0...tokio-1.43.1)

---
updated-dependencies:
- dependency-name: tokio
  dependency-version: 1.43.1
  dependency-type: direct:production
...

Signed-off-by: dependabot[bot] <support@github.com>
2025-04-08 02:13:40 +00:00
94b43001db Merge pull request #5492 from meilisearch/accept-cancelation-tasks-when-disk-full
make meilisearch accept cancelation tasks even when the disk is full
2025-04-03 15:46:46 +00:00
796a325972 Fix typos
Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
2025-04-03 15:53:42 +02:00
1db550ec7f make meilisearch accept cancelation tasks even when the disk is full 2025-04-03 15:47:56 +02:00
c3c5a928e4 Merge pull request #5486 from CodeMan62/fix-network-url-validation-error-msg
Update network URL validation error message format to match expected
2025-04-03 10:42:33 +00:00
c4787760d3 add test 2025-04-03 11:57:43 +02:00
7ca2a8eb6f Use url::Url::parse to check the url 2025-04-03 11:57:36 +02:00
c1c065079f Fix snapshots again 2025-04-03 10:51:57 +02:00
1cca4abf5a Replace batch stop reason when deleting index 2025-04-03 10:33:59 +02:00
bd172bf68a Fix more snapshots 2025-04-03 10:30:03 +02:00
70ed6ba798 fix test + change name 2025-04-02 17:56:34 +02:00
f3ab940776 Make it compile 2025-04-02 17:14:40 +02:00
87547550f5 patch reasons 2025-04-02 16:10:11 +02:00
e067d796b3 Improve the primary key stop reasons error messages 2025-04-02 15:56:56 +02:00
c2ff4dd3b2 Apply cargo fmt changes 2025-04-02 19:08:46 +05:30
31bda976f2 WIP 2025-04-02 15:29:47 +02:00
fce0fa9c57 Update network URL validation error message format to match expected pattern 2025-04-02 00:19:50 +05:30
a10efedd2f Bump actions/checkout from 1 to 3
Bumps [actions/checkout](https://github.com/actions/checkout) from 1 to 3.
- [Release notes](https://github.com/actions/checkout/releases)
- [Changelog](https://github.com/actions/checkout/blob/main/CHANGELOG.md)
- [Commits](https://github.com/actions/checkout/compare/v1...v3)

---
updated-dependencies:
- dependency-name: actions/checkout
  dependency-version: '3'
  dependency-type: direct:production
  update-type: version-update:semver-major
...

Signed-off-by: dependabot[bot] <support@github.com>
2025-04-01 17:31:28 +00:00
55ec96d31a Bump Swatinem/rust-cache from 2.7.7 to 2.7.8
Bumps [Swatinem/rust-cache](https://github.com/swatinem/rust-cache) from 2.7.7 to 2.7.8.
- [Release notes](https://github.com/swatinem/rust-cache/releases)
- [Changelog](https://github.com/Swatinem/rust-cache/blob/master/CHANGELOG.md)
- [Commits](https://github.com/swatinem/rust-cache/compare/v2.7.7...v2.7.8)

---
updated-dependencies:
- dependency-name: Swatinem/rust-cache
  dependency-version: 2.7.8
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>
2025-04-01 17:31:18 +00:00
4249630791 Bump actions/github-script from 6 to 7
Bumps [actions/github-script](https://github.com/actions/github-script) from 6 to 7.
- [Release notes](https://github.com/actions/github-script/releases)
- [Commits](https://github.com/actions/github-script/compare/v6...v7)

---
updated-dependencies:
- dependency-name: actions/github-script
  dependency-version: '7'
  dependency-type: direct:production
  update-type: version-update:semver-major
...

Signed-off-by: dependabot[bot] <support@github.com>
2025-04-01 17:31:14 +00:00
418fa47963 Merge pull request #5313 from barloes/fixRankingScoreThresholdRankingIssue
fix for rankingScoreThreshold changes the results' ranking
2025-04-01 13:10:55 +00:00
0656a0d515 Optimize roaring operation
Co-authored-by: Many the fish <many@meilisearch.com>
2025-04-01 14:25:27 +02:00
19f4c1ac98 Merge pull request #5480 from meilisearch/bump-rustc-version
Bump Rust version to 1.85.1
2025-04-01 11:51:36 +00:00
a0bfcf8872 Make cargo fmt happy 2025-04-01 11:27:41 +02:00
64477aac60 Box the large GeoError error variant 2025-04-01 11:26:34 +02:00
4d90e3d2ec Make Cargo and Clippy happy 2025-04-01 11:26:34 +02:00
4ab547c6fa Merge pull request #5471 from HDT3213/feat/ecPrivateKey
Support EC private key
2025-04-01 08:55:29 +00:00
e36a8c50b9 Merge pull request #5478 from meilisearch/enforce-embedding-dimensions
Enforce embedding dimensions
2025-03-31 15:31:29 +00:00
249da5846c Bump version in Dockerfile 2025-03-31 16:46:12 +02:00
ee15d4fe77 Bump version in the CIs 2025-03-31 16:45:08 +02:00
f0f6c3000f Bump version in the rust-toolchain TOML 2025-03-31 16:43:36 +02:00
08ff135ad6 Fix test 2025-03-31 15:27:49 +02:00
f729864466 Check dimension mismatch at insertion time 2025-03-31 15:27:49 +02:00
94ea263bef Add new error for dimensions mismatch during indexing 2025-03-31 15:27:49 +02:00
85efa6f493 Use ref instead of clone in option.rs 2025-03-31 20:31:26 +08:00
0e475cb5e6 fix warn and show what meilisearch understood of the vectors in the cursed test 2025-03-31 13:49:22 +02:00
62de70b73c Document problematic case in test and acknowledge PR comment 2025-03-31 13:49:22 +02:00
7707fb18dd add embedding with dimension mismatch test case 2025-03-31 13:49:22 +02:00
ba6d755120 Support EC private key 2025-03-27 21:30:08 +08:00
5607802fe1 Merge pull request #5449 from vuthanhtung2412/fix-dim-mismatch
Display more detailed error message instead of panic on embeddings dimension mismatch
2025-03-27 10:52:23 +00:00
bb2e9419d3 Merge pull request #5468 from meilisearch/more-precise-post-processing
More Precise Post Processing
2025-03-27 10:07:09 +00:00
a8afd5dbcb fix warn and show what meilisearch understood of the vectors in the cursed test 2025-03-27 11:07:01 +01:00
cf68713145 Merge pull request #5465 from meilisearch/improve-stats-perf
Improve documents stats performances
2025-03-27 09:20:14 +00:00
55f620a986 Merge pull request #5425 from CodeMan62/enhance-filterable-error-messages
Enhance filterable error messages
2025-03-27 09:18:37 +00:00
811143cbe9 Add more progress precision when doing post processing 2025-03-27 10:17:28 +01:00
c670e9a39b Make sure the snaps are happy 2025-03-26 20:03:35 +01:00
be6abb952d Merge pull request #5466 from meilisearch/update-charabia-v0.9.3
Update charabia v0.9.3
2025-03-26 18:23:31 +00:00
2f07afa97e Update Charabia v0.9.3 2025-03-26 17:43:19 +01:00
65f1b13475 Merge pull request #5464 from meilisearch/camel-case-database-sizes
Prefer camelCase for internal database sizes db name
2025-03-26 16:40:39 +00:00
db7ce03763 Improve the performances of computing the size of the documents database 2025-03-26 17:40:12 +01:00
7ed9adde29 Prefer camelCase for internal database sizes db name 2025-03-26 16:45:52 +01:00
bf3a29b60d Document problematic case in test and acknowledge PR comment 2025-03-26 12:57:25 +01:00
9ce7ccfbe7 Merge pull request #5457 from meilisearch/show-database-sizes-changes
Show database sizes batches
2025-03-26 10:19:40 +00:00
3deb1ef78f Fix the snapshots again 2025-03-26 10:38:49 +01:00
5820d822c8 Add more details about the finalizing progress step 2025-03-26 09:49:43 +01:00
637bea0370 Compute and store the database sizes 2025-03-26 09:49:42 +01:00
3acf036526 fix: improve error messages for filterable attributes and fix formatting 2025-03-25 21:44:39 +05:30
fd079c6757 Add an index method to get the database sizes 2025-03-25 16:30:51 +01:00
182e5d5632 Add database sizes stats to the batches 2025-03-25 16:30:15 +01:00
eefefc482b Merge pull request #5446 from shaokeyibb/main
Fix _matchesPosition length calculate
2025-03-25 14:16:38 +00:00
43c8a206b4 detail comments 2025-03-25 13:07:17 +01:00
a8c407fa36 fix failling tests 2025-03-25 13:06:11 +01:00
18bc56f1fa update cargo insta 2025-03-25 12:54:49 +01:00
38b3e03dde add embedding with dimension mismatch test case 2025-03-25 12:51:36 +01:00
82aee6a9af Merge pull request #5415 from meilisearch/isolate-word-fst-usage
Isolate word fst usage
2025-03-25 11:43:37 +00:00
6b1c262b74 fix all tests 2025-03-25 12:43:15 +01:00
0f654e45c9 Merge pull request #5458 from meilisearch/update-again-ph-link
Fix the PH link on the README
2025-03-25 11:27:31 +00:00
d71c6f3483 allow multiple embedding in per document per embedder to pass 2025-03-25 12:04:25 +01:00
8b4166410c Fix the PH link on the README 2025-03-25 11:45:47 +01:00
9d3037aa1a Fix clippy error 2025-03-25 18:12:36 +08:00
5414887bff Merge pull request #5455 from meilisearch/update-readme-ph-link
Fix the Product Hunt link
2025-03-25 09:44:09 +00:00
03a0550b63 Fix the Product Hunt link to link to meilisearch-ai 2025-03-25 10:00:24 +01:00
fca947219f Merge pull request #5402 from meilisearch/do-not-reindex-searchable-order-change
Avoid reindexing searchable order changes
2025-03-25 07:03:14 +00:00
fb7ae9f97f Merge pull request #5454 from meilisearch/update-charabia-v0.9.3
Update Charabia v0.9.3
2025-03-24 22:34:51 +00:00
cd421fea1e Merge pull request #5456 from meilisearch/fix-CI
Fix CI to work with merge queues
2025-03-25 09:55:59 +00:00
1ad4235beb Remove the bors file 2025-03-25 10:05:41 +01:00
de6c7e551e Remove bors references from the repository 2025-03-25 10:04:38 +01:00
c0fe70c5f0 Make the CI work with merge queue grouping 2025-03-25 10:04:24 +01:00
2800e42243 Separate calc_byte_length function 2025-03-25 00:47:17 +08:00
a09d08c7b6 Avoid reindexing searchable order changes
Update settings.rs

Update settings.rs
2025-03-24 16:26:52 +01:00
2e6aa63efc Update Charabia v0.9.3 2025-03-24 14:32:21 +01:00
5759afac41 Merge pull request #5424 from shu-kitamura/split-tasks-test
Split unit test in tasks.rs
2025-03-24 09:55:50 +00:00
868c902935 fix meilisearch integration vector tests 2025-03-24 00:24:50 +01:00
e019ad7692 Display more detailed error message instead of panic 2025-03-21 15:41:31 +01:00
1f67f373d1 fixed all the tests failing will "cargo insta test --accept" 2025-03-20 22:51:56 +05:30
2c0bd35923 Merge pull request #5447 from meilisearch/clean-up-bors
Remove bors references from the repository
2025-03-20 16:11:11 +00:00
b3aaa64de5 Remove the bors file 2025-03-20 16:28:08 +01:00
7b3072ad28 Remove bors references from the repository 2025-03-20 15:57:05 +01:00
db26c1e5bf Merge pull request #5395 from meilisearch/update-process-for-dumpless-upgrade
Update process for dumpless upgrade
2025-03-20 13:42:50 +00:00
9aee12c906 fixed the failing tests from snapshots 2025-03-20 17:55:12 +05:30
debd2b21b8 Merge branch 'meilisearch:main' into main 2025-03-20 20:10:00 +08:00
39aca661dd Make _matchesPosition length byte based instead of char based 2025-03-20 20:02:51 +08:00
5b51e8a083 simplify the sprint issue to only tell you to add a label on your PR 2025-03-20 12:41:34 +01:00
3928fb36b3 Introduce a second github action that post the right message when we declare there are db changes 2025-03-20 12:41:34 +01:00
2ddc1d2258 update the CI to enforce the db change label on PR 2025-03-20 12:41:34 +01:00
7c267a8a0e update the issue template for the sprint issue 2025-03-20 12:41:34 +01:00
d39d915a7e Merge pull request #5445 from meilisearch/support-merge-grouping
Make the CI work with merge queue grouping
2025-03-20 12:30:52 +01:00
3160ddf9df Make the CI work with merge queue grouping 2025-03-20 12:29:08 +01:00
d286e63f15 Merge pull request #5444 from meilisearch/setup-ci-with-rulesets
Setup the Milestone CI to update the Ruleset
2025-03-20 12:12:57 +01:00
9ee6254eec Setup the Milestone CI to update the Ruleset 2025-03-20 11:28:03 +01:00
e2c824a7cd fixed all test fails in the run 2025-03-20 15:21:47 +05:30
0dd65caffe test: update test snapshots to match new error message format 2025-03-20 10:59:21 +05:30
4397b7d170 chore: revert Cargo.lock changes 2025-03-20 10:54:14 +05:30
15db203b7d refactor: update error message format for filterable attributes 2025-03-20 00:08:37 +05:30
041f635214 Fix: Add #[allow(dead_code)] to format_invalid_filter_distribution function 2025-03-19 20:13:28 +05:30
f9807ba32e Fix logic when results are below the threshold 2025-03-19 11:34:53 +01:00
8c8cc59a6c remove new line added by accident 2025-03-19 11:34:53 +01:00
f540a69ac3 add 1 to index so it points to correct position 2025-03-19 11:34:52 +01:00
537bf27e7c Update crates/meilisearch/src/routes/tasks_test.rs
Co-authored-by: Many the fish <many@meilisearch.com>
2025-03-19 19:11:04 +09:00
7df2bdfb15 Merge #5436
5436: Update mini-dashboard to v0.2.19 version r=Kerollmops a=curquiza

Fixes mini dashboard to prevent the panel from popping up every time

Fixed by `@mdubus` 👍 

Co-authored-by: curquiza <clementine@meilisearch.com>
2025-03-18 16:24:31 +00:00
71f7456748 Update mini-dashboard to v0.2.19 version 2025-03-18 12:48:38 +01:00
cf31a65a88 Merge pull request #5431 from meilisearch/add-ph-readme-banner
Display the ProductHunt banner on the README
2025-03-18 11:26:45 +01:00
0f7d71041f Display the ProductHunt banner on the README 2025-03-18 11:21:07 +01:00
c98b313d03 Merge #5426
5426: Bump zip from 2.2.2 to 2.3.0 r=Kerollmops a=dependabot[bot]

Bumps [zip](https://github.com/zip-rs/zip2) from 2.2.2 to 2.3.0.
<details>
<summary>Release notes</summary>
<p><em>Sourced from <a href="https://github.com/zip-rs/zip2/releases">zip's releases</a>.</em></p>
<blockquote>
<h2>v2.3.0</h2>
<h3><!-- raw HTML omitted -->🚀 Features</h3>
<ul>
<li>Add support for NTFS extra field (<a href="https://redirect.github.com/zip-rs/zip2/pull/279">#279</a>)</li>
</ul>
<h3><!-- raw HTML omitted -->🐛 Bug Fixes</h3>
<ul>
<li><em>(test)</em> Conditionalize a zip64 doctest (<a href="https://redirect.github.com/zip-rs/zip2/pull/308">#308</a>)</li>
<li>fix failing tests, remove symlink loop check</li>
<li>Canonicalize output path to avoid false negatives</li>
<li>Symlink handling in stream extraction</li>
<li>Canonicalize output paths and symlink targets, and ensure they descend from the destination</li>
</ul>
<h3><!-- raw HTML omitted -->⚙️ Miscellaneous Tasks</h3>
<ul>
<li>Fix clippy and cargo fmt warnings (<a href="https://redirect.github.com/zip-rs/zip2/pull/310">#310</a>)</li>
</ul>
<h2>v2.2.3</h2>
<h3><!-- raw HTML omitted -->🚜 Refactor</h3>
<ul>
<li>Change the inner structure of <code>DateTime</code> (<a href="https://redirect.github.com/zip-rs/zip2/issues/267">#267</a>)</li>
</ul>
<h3><!-- raw HTML omitted -->⚙️ Miscellaneous Tasks</h3>
<ul>
<li>cargo fix --edition</li>
</ul>
</blockquote>
</details>
<details>
<summary>Changelog</summary>
<p><em>Sourced from <a href="https://github.com/zip-rs/zip2/blob/master/CHANGELOG.md">zip's changelog</a>.</em></p>
<blockquote>
<h2><a href="https://github.com/zip-rs/zip2/compare/v2.2.3...v2.3.0">2.3.0</a> - 2025-03-16</h2>
<h3><!-- raw HTML omitted -->🚀 Features</h3>
<ul>
<li>Add support for NTFS extra field (<a href="https://redirect.github.com/zip-rs/zip2/pull/279">#279</a>)</li>
</ul>
<h3><!-- raw HTML omitted -->🐛 Bug Fixes</h3>
<ul>
<li><em>(test)</em> Conditionalize a zip64 doctest (<a href="https://redirect.github.com/zip-rs/zip2/pull/308">#308</a>)</li>
<li>fix failing tests, remove symlink loop check</li>
<li>Canonicalize output path to avoid false negatives</li>
<li>Symlink handling in stream extraction</li>
<li>Canonicalize output paths and symlink targets, and ensure they descend from the destination</li>
</ul>
<h3><!-- raw HTML omitted -->⚙️ Miscellaneous Tasks</h3>
<ul>
<li>Fix clippy and cargo fmt warnings (<a href="https://redirect.github.com/zip-rs/zip2/pull/310">#310</a>)</li>
</ul>
<h2><a href="https://github.com/zip-rs/zip2/compare/v2.2.2...v2.2.3">2.2.3</a> - 2025-02-26</h2>
<h3><!-- raw HTML omitted -->🚜 Refactor</h3>
<ul>
<li>Change the inner structure of <code>DateTime</code> (<a href="https://redirect.github.com/zip-rs/zip2/issues/267">#267</a>)</li>
</ul>
<h3><!-- raw HTML omitted -->⚙️ Miscellaneous Tasks</h3>
<ul>
<li>cargo fix --edition</li>
</ul>
</blockquote>
</details>
<details>
<summary>Commits</summary>
<ul>
<li><a href="6eab5f5cc6"><code>6eab5f5</code></a> chore: release v2.3.0 (<a href="https://redirect.github.com/zip-rs/zip2/issues/300">#300</a>)</li>
<li><a href="e4aee2050f"><code>e4aee20</code></a> implement <code>ZipFile::options</code> + refactor options normalization (<a href="https://redirect.github.com/zip-rs/zip2/issues/305">#305</a>)</li>
<li><a href="ea8a7bba24"><code>ea8a7bb</code></a> fix(test): Conditionalize a zip64 doctest (<a href="https://redirect.github.com/zip-rs/zip2/issues/308">#308</a>)</li>
<li><a href="365c81a39f"><code>365c81a</code></a> Use <code>xz2</code> crate instead of a custom implementation (<a href="https://redirect.github.com/zip-rs/zip2/issues/306">#306</a>)</li>
<li><a href="ae94b3452b"><code>ae94b34</code></a> chore: Fix clippy and cargo fmt warnings (<a href="https://redirect.github.com/zip-rs/zip2/issues/310">#310</a>)</li>
<li><a href="a2e062f370"><code>a2e062f</code></a> Merge commit from fork</li>
<li><a href="0199ac2cb8"><code>0199ac2</code></a> Simplify handling for symlink targets</li>
<li><a href="977bb9479d"><code>977bb94</code></a> fix failing tests, remove symlink loop check</li>
<li><a href="3cb29e70d1"><code>3cb29e7</code></a> Partial fix for tests</li>
<li><a href="2182b07686"><code>2182b07</code></a> Refactor</li>
<li>Additional commits viewable in <a href="https://github.com/zip-rs/zip2/compare/v2.2.2...v2.3.0">compare view</a></li>
</ul>
</details>
<br />


[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=zip&package-manager=cargo&previous-version=2.2.2&new-version=2.3.0)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores)

Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting ``@dependabot` rebase`.

[//]: # (dependabot-automerge-start)
[//]: # (dependabot-automerge-end)

---

<details>
<summary>Dependabot commands and options</summary>
<br />

You can trigger Dependabot actions by commenting on this PR:
- ``@dependabot` rebase` will rebase this PR
- ``@dependabot` recreate` will recreate this PR, overwriting any edits that have been made to it
- ``@dependabot` merge` will merge this PR after your CI passes on it
- ``@dependabot` squash and merge` will squash and merge this PR after your CI passes on it
- ``@dependabot` cancel merge` will cancel a previously requested merge and block automerging
- ``@dependabot` reopen` will reopen this PR if it is closed
- ``@dependabot` close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually
- ``@dependabot` show <dependency name> ignore conditions` will show all of the ignore conditions of the specified dependency
- ``@dependabot` ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself)
- ``@dependabot` ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself)
- ``@dependabot` ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)
You can disable automated security fix PRs for this repo from the [Security Alerts page](https://github.com/meilisearch/meilisearch/network/alerts).

</details>

Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2025-03-18 08:57:11 +00:00
69678ed8e1 Bump zip from 2.2.2 to 2.3.0
Bumps [zip](https://github.com/zip-rs/zip2) from 2.2.2 to 2.3.0.
- [Release notes](https://github.com/zip-rs/zip2/releases)
- [Changelog](https://github.com/zip-rs/zip2/blob/master/CHANGELOG.md)
- [Commits](https://github.com/zip-rs/zip2/compare/v2.2.2...v2.3.0)

---
updated-dependencies:
- dependency-name: zip
  dependency-type: direct:production
...

Signed-off-by: dependabot[bot] <support@github.com>
2025-03-18 00:19:49 +00:00
91d221ebe7 revert: Remove unintended Cargo.lock changes 2025-03-17 22:13:59 +05:30
9162e8ba04 Enhance error messages for filterable attributes and improve error handling 2025-03-17 22:04:18 +05:30
2118cc092e rm db.snapshot 2025-03-17 23:04:13 +09:00
c7564d500f Split unit test in tasks.rs 2025-03-17 22:55:23 +09:00
bf144a94d8 No more use FST to find a word without any typo 2025-03-17 14:20:10 +01:00
b0b1888ef9 Add test 2025-03-17 14:20:10 +01:00
6ec1d2b712 Merge #5423
5423: Bump ring to v0.17.14 to compile on old aarch64 r=irevoire a=Kerollmops

This PR will fix [this CI issue](https://github.com/meilisearch/meilisearch/actions/runs/13896085925/job/38876941154) where ring v0.17.13 breaks the compilation on old aarch64 machines by bumping its version to v0.17.14.

Co-authored-by: Kerollmops <clement@meilisearch.com>
2025-03-17 12:53:02 +00:00
cbdf80893d Merge #5422
5422: Add more progress levels to measure merging r=Kerollmops a=Kerollmops

I found out that Meilisearch was not correctly reporting the long indexing times in the progress and that a lot of time was spent on extracting words with all documents already extracted. The reason was that there was no step to report merging the cache and sending the entries to write to the writer thread. This PR adds these entries to the progress.

Co-authored-by: Kerollmops <clement@meilisearch.com>
2025-03-17 12:02:46 +00:00
e2156ddfc7 Simplify the IndexingStep progress enum 2025-03-17 11:40:50 +01:00
49dd50dab2 Bump ring to v0.17.14 to compile on old aarch64 2025-03-17 11:29:17 +01:00
13a88d6131 Merge #5407
5407: Geo update bug r=irevoire a=ManyTheFish

# Pull Request

## Related issue
Fixes #5380
Fixes #5399



Co-authored-by: Tamo <tamo@meilisearch.com>
Co-authored-by: ManyTheFish <many@meilisearch.com>
2025-03-17 10:24:33 +00:00
d9875b782d Merge #5421
5421: Accept total batch size in human size r=irevoire a=Kerollmops

This PR fixes the new `experimental-limit-batched-tasks-total-size` to accept human-defined sizes in bytes.

Co-authored-by: Kerollmops <clement@meilisearch.com>
2025-03-17 09:41:22 +00:00
cb16baab18 Add more progress levels to measure merging 2025-03-17 10:13:29 +01:00
2500e3c067 Merge #5414
5414: Update version for the next release (v1.14.0) in Cargo.toml r=Kerollmops a=meili-bot

⚠️ This PR is automatically generated. Check the new version is the expected one and Cargo.lock has been updated before merging. Fixes https://github.com/meilisearch/meilisearch/issues/5268.

Co-authored-by: Kerollmops <Kerollmops@users.noreply.github.com>
Co-authored-by: Kerollmops <clement@meilisearch.com>
2025-03-14 13:35:54 +00:00
d3e4b2dfe7 Accept total batch size in human size 2025-03-14 13:07:51 +01:00
2a46624e19 Merge #5420
5420: Add support for the progress API of arroy r=Kerollmops a=irevoire

# Pull Request

## Related issue
Fixes https://github.com/meilisearch/meilisearch/issues/5419

## What does this PR do?
- Convert the arroy progress to the meilisearch progress
- Use the new arroy closure to support the progress of arroy


Co-authored-by: Tamo <tamo@meilisearch.com>
2025-03-13 18:03:08 +00:00
009c36a4d0 Add support for the progress API of arroy 2025-03-13 19:00:43 +01:00
2a47e25e6d Update the upgrade path snap 2025-03-13 18:35:06 +01:00
82912e191b Merge #5418
5418: Cache embeddings in search r=Kerollmops a=dureuill

# Pull Request

## Related issue
TBD

## What does this PR do?
- Adds a cache for embeddings produced in search
- The cache is disabled by default, and can be enabled following the instructions [here](https://github.com/orgs/meilisearch/discussions/818).
- Had to accommodate the `timeout` test for openai that uses a mock that simulates a timeout on subsequent responses: since the test was reusing the same query, the cache would kick-in and no request would be made to the mock, meaning no timeout any longer and so a failing test 😅 
- `Embedder::embed_search` now accepts a reference instead of an owned `String`.

## Manual testing

- I created 4 indexes on a fresh DB with the same settings (one embedder from openai)
- I sent 1/4 of movies.json to each index
- I sent a federated search request against all 4 indexes, with the same query for each index, using the embedder of each index.

Results:

- The first call took 400ms to 1s. Before this change, it took in the 3s range.
- Any repeated call with the same query took in the range of 25ms.
- Looking at the details at trace log level, I can see that the first index that needs the embedding is taking most of the 400ms in `embed_one`. The other indexes report that the query text is found in the cache and they each take a few µs.

Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2025-03-13 16:37:15 +00:00
e2d372823a Disable the cache by default and make it experimental 2025-03-13 17:22:51 +01:00
1876132172 Mutex-based implementation 2025-03-13 17:22:50 +01:00
d0b0b90d17 fixup tests, in particular foil the cache for the timeout test 2025-03-13 17:22:50 +01:00
b08544e86d Add embedding cache 2025-03-13 17:22:50 +01:00
d9111fe8ce Add lru crate to milli again 2025-03-13 17:22:50 +01:00
41d8161017 Update the versions 2025-03-13 17:22:32 +01:00
7df5715d39 Merge pull request #5406 from meilisearch/bump-heed
Bump heed to v0.22 and arroy to v0.6
2025-03-13 16:52:45 +01:00
5fe02ab5e0 Move to heed 0.22 and arroy 0.6 2025-03-13 15:48:18 +01:00
5ef7767429 Let arroy uses all the memory available instead of 50% of the 70% 2025-03-13 15:06:03 +01:00
3fad48167b remove arroy dependency in the index-scheduler 2025-03-13 14:57:56 +01:00
a92a48b9b9 Do not recompute stats on dumpless upgrade
Co-authored-by: Tamo <tamo@meilisearch.com>
2025-03-13 13:58:58 +01:00
d53225bf64 uses a random seed instead of 42 2025-03-13 12:43:31 +01:00
20896500c2 Bump arroy to the latest version 2025-03-13 12:37:10 +01:00
1af520077c Call the underlying Env::copy_to_path method 2025-03-13 11:49:25 +01:00
7e07cb9de1 Make meilitool prefer WithoutTls Env 2025-03-13 11:47:19 +01:00
a12b06d99d Merge #5369
5369: exhaustive facet search r=ManyTheFish a=ManyTheFish

Fixes #5403

This PR adds an `exhaustiveFacetCount` field to the `/facet-search` API allowing the end-user to have a better facet count when having a distinct attribute set in the index settings.

 # Usage

`POST /index/:index_uid/facet-search`
**Body:**
```json
{
  "facetQuery": "blob",
  "facetName": "genres",
  "q": "",
  "exhaustiveFacetCount": true
}
```

# Prototype Docker images

```sh
$ docker pull getmeili/meilisearch:prototype-exhaustive-facet-search-00
```

Co-authored-by: ManyTheFish <many@meilisearch.com>
2025-03-13 10:36:04 +00:00
331dc3d241 Add a comment to explain why we keep debug assertions 2025-03-13 11:29:00 +01:00
ef9d9f8481 set the memory in arroy 2025-03-13 11:29:00 +01:00
d3d22d8ed4 Prefer waiting for the task before getting the indexes 2025-03-13 11:29:00 +01:00
5e6abcf50c Prefer using WithoutTls for the auth env 2025-03-13 11:29:00 +01:00
a4aaf932ba Fix some test (invalid anyway) 2025-03-13 11:29:00 +01:00
16c962eb30 Enable debug assertions of heed 2025-03-13 11:07:49 +01:00
55ca2c4481 Avoid opening the Auth environment multiple times 2025-03-13 11:07:49 +01:00
fedb444e66 Fix the upgrade arroy calls 2025-03-13 11:07:49 +01:00
bef5954741 Use a WithoutTls env 2025-03-13 11:07:49 +01:00
ff8cf38d6b Move to the latest version of arroy 2025-03-13 11:07:48 +01:00
f8ac575ec5 Move to the latest version of arroy 2025-03-13 11:07:48 +01:00
566b4efb06 Dumpless upgrade from v1.13 to v1.14 2025-03-13 11:07:44 +01:00
1d499ed9b2 Use the new arroy upgrade method to move from 0.4 to 0.5 2025-03-13 11:07:44 +01:00
3bc62f0549 WIP: Still need to introduce a Env::copy_to_path method 2025-03-13 11:07:39 +01:00
21bbbdec76 Specify WithoutTls everywhere 2025-03-13 11:07:38 +01:00
78ebd8dba2 Fix the error variants 2025-03-13 11:07:38 +01:00
34df44a002 Open Env without TLS 2025-03-13 11:07:38 +01:00
48a27f669e Bump heed and other dependencies 2025-03-13 11:07:37 +01:00
e2d0ce52ba Merge #5384
5384: Get multiple documents by ids r=irevoire a=dureuill

# Pull Request

## Related issue
Fixes #5345 

## What does this PR do?
- Implements [public usage](https://www.notion.so/meilisearch/Get-documents-by-ID-1994b06b651f805ba273e1c6b75ce4d8)
- Slightly refactor error messages for the `/similar` route

Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2025-03-12 17:26:49 +00:00
995f8962bd Merge #5398
5398: Bump ring from 0.17.8 to 0.17.13 r=Kerollmops a=dependabot[bot]

Bumps [ring](https://github.com/briansmith/ring) from 0.17.8 to 0.17.13.
<details>
<summary>Changelog</summary>
<p><em>Sourced from <a href="https://github.com/briansmith/ring/blob/main/RELEASES.md">ring's changelog</a>.</em></p>
<blockquote>
<h1>Version 0.17.13 (2025-03-06)</h1>
<p>Increased MSRV to 1.66.0 to avoid bugs in earlier versions so that we can
safely use <code>core::arch::x86_64::__cpuid</code> and <code>core::arch::x86::__cpuid</code> from
Rust in future releases.</p>
<p>AVX2-based VAES-CLMUL implementation. This will be a notable performance
improvement for most newish x86-64 systems. This will likely raise the minimum
binutils version supported for very old Linux distros.</p>
<h1>Version 0.17.12 (2025-03-05)</h1>
<p>Bug fix: <a href="https://redirect.github.com/briansmith/ring/pull/2447">briansmith/ring#2447</a> for denial of service (DoS).</p>
<ul>
<li>
<p>Fixes a panic in <code>ring::aead::quic::HeaderProtectionKey::new_mask()</code> when
integer overflow checking is enabled. In the QUIC protocol, an attacker can
induce this panic by sending a specially-crafted packet. Even unintentionally
it is likely to occur in 1 out of every 2**32 packets sent and/or received.</p>
</li>
<li>
<p>Fixes a panic on 64-bit targets in <code>ring::aead::{AES_128_GCM, AES_256_GCM}</code>
when overflow checking is enabled, when encrypting/decrypting approximately
68,719,476,700 bytes (about 64 gigabytes) of data in a single chunk. Protocols
like TLS and SSH are not affected by this because those protocols break large
amounts of data into small chunks. Similarly, most applications will not
attempt to encrypt/decrypt 64GB of data in one chunk.</p>
</li>
</ul>
<p>Overflow checking is not enabled in release mode by default, but
<code>RUSTFLAGS=&quot;-C overflow-checks&quot;</code> or <code>overflow-checks = true</code> in the Cargo.toml
profile can override this. Overflow checking is usually enabled by default in
debug mode.</p>
</blockquote>
</details>
<details>
<summary>Commits</summary>
<ul>
<li>See full diff in <a href="https://github.com/briansmith/ring/commits">compare view</a></li>
</ul>
</details>
<br />


[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=ring&package-manager=cargo&previous-version=0.17.8&new-version=0.17.13)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores)

Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting ``@dependabot` rebase`.

[//]: # (dependabot-automerge-start)
[//]: # (dependabot-automerge-end)

---

<details>
<summary>Dependabot commands and options</summary>
<br />

You can trigger Dependabot actions by commenting on this PR:
- ``@dependabot` rebase` will rebase this PR
- ``@dependabot` recreate` will recreate this PR, overwriting any edits that have been made to it
- ``@dependabot` merge` will merge this PR after your CI passes on it
- ``@dependabot` squash and merge` will squash and merge this PR after your CI passes on it
- ``@dependabot` cancel merge` will cancel a previously requested merge and block automerging
- ``@dependabot` reopen` will reopen this PR if it is closed
- ``@dependabot` close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually
- ``@dependabot` show <dependency name> ignore conditions` will show all of the ignore conditions of the specified dependency
- ``@dependabot` ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself)
- ``@dependabot` ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself)
- ``@dependabot` ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)
You can disable automated security fix PRs for this repo from the [Security Alerts page](https://github.com/meilisearch/meilisearch/network/alerts).

</details>

Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2025-03-12 13:20:30 +00:00
1cd00f37c0 Merge #5413
5413: Make sure to delete useless prefixes r=ManyTheFish a=Kerollmops

We discovered a bug where the new indexer was still writing empty roaring bitmaps instead of deleting the prefix entry from the prefix database.

Co-authored-by: Kerollmops <clement@meilisearch.com>
2025-03-12 10:54:04 +00:00
1aa3375e12 Update version for the next release (v1.14.0) in Cargo.toml 2025-03-12 10:51:04 +00:00
60ff1b19a8 Searching for a document that does not exist no longer raises an error 2025-03-12 11:50:39 +01:00
7df5e3f059 Fix error message
Co-authored-by: Tamo <tamo@meilisearch.com>
2025-03-12 11:48:40 +01:00
0197dc87e0 Make sure to delete useless prefixes 2025-03-12 11:24:13 +01:00
7a172b82ca Add test 2025-03-12 11:22:59 +01:00
eb3ff325d1 Add an exhaustiveFacetCount field to the facet-search API 2025-03-12 11:22:59 +01:00
d3cd5ea689 Check if the geo fields changed additionally to the other faceted fields when reindexing facets 2025-03-12 11:20:10 +01:00
3ed43f9097 add a failing test reproducing the bug 2025-03-12 11:20:10 +01:00
a2a86ef4e2 Merge #5254
5254: Granular Filterable attribute settings r=ManyTheFish a=ManyTheFish

# Related
**Issue:** https://github.com/meilisearch/meilisearch/issues/5163
**PRD:** https://meilisearch.notion.site/API-usage-Settings-to-opt-out-indexing-features-filterableAttributes-1764b06b651f80aba8bdf359b2df3ca8

# Summary
Change the `filterableAttributes` settings to let the user choose which facet feature he wants to activate or not.
Deactivating a feature will avoid some database computation in the indexing process and save time and disk size.

# Example

`PATCH /indexes/:index_uid/settings`

```json
{
  "filterableAttributes": [
    {
      "patterns": [
        "cattos",
        "doggos.age"
      ],
      "features": {
        "facetSearch": false,
        "filter": {
          "equality": true,
          "comparison": false
        }
      }
    }
  ]
}
```

# Impact on the codebase
- Settings API:
  - `/settings`
  - `/settings/filterable-attributes`
  - OpenAPI 
  - may impact the LocalizedAttributesRules due to the AttributePatterns factorization
- Database:
  - Filterable attributes format changed
  - Faceted field_ids are no more stored in the database
  - FieldIdsMap has no more unexisting fields
- Search:
  - Search using filters
  - Facet search
  - `Attributes` ranking rule
  - Distinct attribute
  - Facet distribution
- Settings reindexing:
  - searchable
  - facet
  - vector
  - geo
- Document indexing:
  - searchable
  - facet
  - vector
  - geo
- Dump import

# Note for the reviewers
The changes are huge and have been split in different commits with a dedicated explanation, I suggest reviewing the commit 1by1

Co-authored-by: ManyTheFish <many@meilisearch.com>
2025-03-12 09:00:43 +00:00
d500c7f625 Add default deserialize value 2025-03-11 17:55:49 +01:00
ea7e299663 Update has_changed_for_fields documentation 2025-03-11 16:48:55 +01:00
a370b467fe Merge MetadataBuilder::_new into MetadataBuilder::new 2025-03-11 15:31:57 +01:00
8790880589 Fix clippy 2025-03-11 15:22:39 +01:00
7072fe9780 Fix typos in comments and messages 2025-03-11 15:22:00 +01:00
d0dda78f3d Merge #5401
5401: Make composite embedders an experimental feature r=irevoire a=dureuill

# Pull Request

## Related issue
Fixes #5343 

## What does this PR do?
- Introduce new `compositeEmbedders` experimental feature
- Guard `source = "composite"` and `searchEmbedder`, `indexingEmbedder` behind enabling the feature.
- Update tests accordingly

## Dumpless upgrade

- Adding an experimental feature is never a breaking change

Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2025-03-11 14:20:36 +00:00
fa8afc5cfd Style change after review
Co-authored-by: Tamo <tamo@meilisearch.com>
2025-03-11 13:25:35 +01:00
6d52c6e711 Merge branch 'main' into granular-filterable-attributes 2025-03-11 10:05:58 +01:00
dfb8411647 Revert "Remove filter pre-check"
This reverts commit b12ffd1356.
2025-03-11 09:48:30 +01:00
6269f757ff Revert document creation in tests 2025-03-10 18:35:10 +01:00
40c5f911fd Revert metadata creation when computing the facet-distribution 2025-03-10 17:05:41 +01:00
abef655849 Revert metadata creation when computing facet search and distinct 2025-03-10 15:45:59 +01:00
b12ffd1356 Remove filter pre-check 2025-03-10 14:29:45 +01:00
c9a4c6ed96 REvert metadata creation when computing filters at search time 2025-03-10 14:29:44 +01:00
aa32b719c7 Add tests about experimentalness of the feature and fix existing 2025-03-10 14:23:22 +01:00
41d2b1e52b Analytics 2025-03-10 14:23:07 +01:00
54ee81bb09 Make composite embedders experimental 2025-03-10 14:22:47 +01:00
689e69d6d2 Take into account PR messages 2025-03-10 13:46:33 +01:00
9d9e0d4c54 Add analytics 2025-03-10 11:33:15 +01:00
19c9caed39 Fix tests 2025-03-10 11:11:48 +01:00
21c3b3957e tests: Change get_document_by_filter to fetch_documents 2025-03-10 11:11:48 +01:00
f292fc9ac0 Add ids parameter to GET documents and POST documents/fetch 2025-03-10 11:11:48 +01:00
1d3c4642a6 Don't use Deserr for ExternalDocumentId, instead convert to error afterward 2025-03-10 11:11:48 +01:00
9a282be0a2 Merge #5393
Some checks failed
Test suite / Tests on ubuntu-22.04 (push) Failing after 13s
Test suite / Tests almost all features (push) Has been skipped
Test suite / Test with Ollama (push) Failing after 11s
Test suite / Test disabled tokenization (push) Has been skipped
Test suite / Run tests in debug (push) Failing after 13s
Test suite / Run Clippy (push) Successful in 15m20s
Test suite / Run Rustfmt (push) Successful in 2m40s
Run the indexing fuzzer / Setup the action (push) Failing after 1h10m55s
Indexing bench (push) / Run and upload benchmarks (push) Has been cancelled
Benchmarks of indexing (push) / Run and upload benchmarks (push) Has been cancelled
Benchmarks of search for geo (push) / Run and upload benchmarks (push) Has been cancelled
Benchmarks of search for songs (push) / Run and upload benchmarks (push) Has been cancelled
Benchmarks of search for Wikipedia articles (push) / Run and upload benchmarks (push) Has been cancelled
Test suite / Tests on macos-13 (push) Has been cancelled
Test suite / Tests on windows-2022 (push) Has been cancelled
5393: Bring back changes from v1.13.3 into main r=irevoire a=Kerollmops



Co-authored-by: Kerollmops <clement@meilisearch.com>
Co-authored-by: Kerollmops <Kerollmops@users.noreply.github.com>
Co-authored-by: Strift <lau.cazanove@gmail.com>
2025-03-10 07:57:02 +00:00
bea28968a0 Bump ring from 0.17.8 to 0.17.13
Bumps [ring](https://github.com/briansmith/ring) from 0.17.8 to 0.17.13.
- [Changelog](https://github.com/briansmith/ring/blob/main/RELEASES.md)
- [Commits](https://github.com/briansmith/ring/commits)

---
updated-dependencies:
- dependency-name: ring
  dependency-type: indirect
...

Signed-off-by: dependabot[bot] <support@github.com>
2025-03-07 17:04:57 +00:00
ed1dcbe0f7 Fix behavior change in the Attributes criterion 2025-03-06 14:18:25 +01:00
5ceddbda84 Add the max_weight of the weight map if it's lacking 2025-03-06 13:58:28 +01:00
ca41ce3bbd Old indexer document addition now check if facet search is globally activated 2025-03-06 11:43:42 +01:00
8ec0c322ea Apply PR requests related to Refactor the FieldIdMapWithMetadata 2025-03-06 11:42:53 +01:00
b88aa9cc76 Rely on FieldIdMapWithMetadata in facet search and filters 2025-03-05 18:22:12 +01:00
3fd86e8d76 Merge #5371
Some checks failed
Test suite / Tests on ubuntu-22.04 (push) Failing after 12s
Test suite / Tests almost all features (push) Has been skipped
Test suite / Test with Ollama (push) Failing after 8s
Test suite / Test disabled tokenization (push) Has been skipped
Test suite / Run tests in debug (push) Failing after 11s
Test suite / Run Clippy (push) Successful in 7m1s
Test suite / Run Rustfmt (push) Successful in 2m44s
Run the indexing fuzzer / Setup the action (push) Successful in 1h5m40s
Indexing bench (push) / Run and upload benchmarks (push) Has been cancelled
Benchmarks of indexing (push) / Run and upload benchmarks (push) Has been cancelled
Benchmarks of search for geo (push) / Run and upload benchmarks (push) Has been cancelled
Benchmarks of search for songs (push) / Run and upload benchmarks (push) Has been cancelled
Benchmarks of search for Wikipedia articles (push) / Run and upload benchmarks (push) Has been cancelled
Test suite / Tests on macos-13 (push) Has been cancelled
Test suite / Tests on windows-2022 (push) Has been cancelled
5371: Composite embedders r=irevoire a=dureuill

# Pull Request

## Related issue
Fixes #5343 

## What does this PR do?
- Implement [public usage](https://www.notion.so/meilisearch/Composite-embedder-usage-14a4b06b651f81859dc3df21e8cd02a0)
- Refactor the way we check if a parameter is mandatory/allowed/disallowed for a given source
- Take the "nesting context" into account for computer if a parameter is mandatory/allowed/disallowed
- Add tests checking all parameters with all sources, and made sure the results didn't change compared with v1.13

## Dumpless Upgrade

- This adds a new value for an existing parameter => compatible without change
- This adds new optional parameters => compatible without change

Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2025-03-05 17:18:11 +00:00
67f7470c83 Apply PR requests related to Refactor search and facet-search 2025-03-05 18:17:42 +01:00
4fab72cbea Rename SettingsDiff::diff to SettingsDiff::apply_and_diff 2025-03-05 18:16:57 +01:00
afb4b9677f Remove Embedder:embed 2025-03-05 18:16:57 +01:00
73d2dbd60f Error handling 2025-03-05 18:16:57 +01:00
57a6beee30 Test composite embedders 2025-03-05 18:16:57 +01:00
b190b612a3 Add test on all parameters 2025-03-05 18:16:57 +01:00
111e77eff2 Bump mini-dashboard to v0.2.18 2025-03-05 15:24:53 +01:00
ba30747de3 Bump v1.13.2 to v1.13.3 in the TOMLs and snaps 2025-03-05 15:24:53 +01:00
25f0536f5a Update version for the next release (v1.13.3) in Cargo.toml 2025-03-05 15:24:52 +01:00
c8c0951c43 Update the snapshots 2025-03-05 15:24:21 +01:00
63e753bde0 Apply PR requests related to settings API 2025-03-05 12:05:40 +01:00
5fa4b5c50a Add a test on filterable attributes rules priority
**Changes:**
- Add a new test playing with filterable attributes rules priority
- Optimize the faceted field selector avoiding to match false positives
2025-03-05 09:44:52 +01:00
a7a62e5e4c Add some documentation in modules 2025-03-05 08:49:18 +01:00
683a2ac685 Merge #5379
Some checks failed
Publish binaries to GitHub release / Check the version validity (push) Failing after 57s
Publish binaries to GitHub release / Publish binary for Linux (push) Has been skipped
Publish binaries to GitHub release / Publish binary for aarch64 (meilisearch-linux-aarch64, aarch64-unknown-linux-gnu) (push) Has been skipped
Publish binaries to GitHub release / Publish binary for macos-13 (push) Has been cancelled
Publish binaries to GitHub release / Publish binary for windows-2022 (push) Has been cancelled
Publish binaries to GitHub release / Publish binary for macOS silicon (meilisearch-macos-apple-silicon, aarch64-apple-darwin) (push) Has been cancelled
Test suite / Tests on ubuntu-22.04 (push) Failing after 6s
Test suite / Tests almost all features (push) Failing after 6s
Test suite / Test with Ollama (push) Failing after 7s
Test suite / Test disabled tokenization (push) Failing after 5s
Test suite / Run tests in debug (push) Failing after 6s
Test suite / Run Clippy (push) Failing after 6s
Test suite / Run Rustfmt (push) Failing after 6s
SDKs tests / define-docker-image (push) Failing after 15s
SDKs tests / .NET SDK tests (push) Has been skipped
SDKs tests / Dart SDK tests (push) Has been skipped
SDKs tests / Go SDK tests (push) Has been skipped
SDKs tests / Java SDK tests (push) Has been skipped
SDKs tests / JS SDK tests (push) Has been skipped
SDKs tests / PHP SDK tests (push) Has been skipped
SDKs tests / Python SDK tests (push) Has been skipped
SDKs tests / Ruby SDK tests (push) Has been skipped
SDKs tests / Rust SDK tests (push) Has been skipped
SDKs tests / Swift SDK tests (push) Has been skipped
SDKs tests / meilisearch-js-plugins tests (push) Has been skipped
SDKs tests / meilisearch-rails tests (push) Has been skipped
SDKs tests / meilisearch-symfony tests (push) Has been skipped
Test suite / Tests on macos-13 (push) Has been cancelled
Test suite / Tests on windows-2022 (push) Has been cancelled
Publish images to Docker Hub / docker (push) Has been cancelled
5379: Bring back the changes from v1.13.2 into main r=dureuill a=Kerollmops



Co-authored-by: Kerollmops <Kerollmops@users.noreply.github.com>
Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2025-03-04 13:24:25 +00:00
e751342dfb Merge #5370
Some checks failed
Test suite / Tests almost all features (push) Has been skipped
Test suite / Test disabled tokenization (push) Has been skipped
Test suite / Run tests in debug (push) Failing after 10s
Test suite / Test with Ollama (push) Failing after 6m46s
Test suite / Run Clippy (push) Successful in 6m23s
Test suite / Run Rustfmt (push) Failing after 16s
Test suite / Tests on ubuntu-22.04 (push) Failing after 7m19s
Run the indexing fuzzer / Setup the action (push) Successful in 1h5m28s
Indexing bench (push) / Run and upload benchmarks (push) Has been cancelled
Benchmarks of indexing (push) / Run and upload benchmarks (push) Has been cancelled
Benchmarks of search for geo (push) / Run and upload benchmarks (push) Has been cancelled
Benchmarks of search for songs (push) / Run and upload benchmarks (push) Has been cancelled
Benchmarks of search for Wikipedia articles (push) / Run and upload benchmarks (push) Has been cancelled
Test suite / Tests on macos-13 (push) Has been cancelled
Test suite / Tests on windows-2022 (push) Has been cancelled
5370: Introduce a CI to check milestones and branches r=curquiza a=Kerollmops



Co-authored-by: Kerollmops <clement@meilisearch.com>
2025-03-03 15:51:52 +00:00
17bf82235d Merge #5381
5381: Bump actions/checkout from 1 to 3 r=Kerollmops a=dependabot[bot]

Bumps [actions/checkout](https://github.com/actions/checkout) from 1 to 3.
<details>
<summary>Release notes</summary>
<p><em>Sourced from <a href="https://github.com/actions/checkout/releases">actions/checkout's releases</a>.</em></p>
<blockquote>
<h2>v3.0.0</h2>
<ul>
<li>Updated to the node16 runtime by default
<ul>
<li>This requires a minimum <a href="https://github.com/actions/runner/releases/tag/v2.285.0">Actions Runner</a> version of v2.285.0 to run, which is by default available in GHES 3.4 or later.</li>
</ul>
</li>
</ul>
<h2>v2.7.0</h2>
<h2>What's Changed</h2>
<ul>
<li>Add new public key for known_hosts (<a href="https://redirect.github.com/actions/checkout/issues/1237">#1237</a>) by <a href="https://github.com/TingluoHuang"><code>`@​TingluoHuang</code></a>` in <a href="https://redirect.github.com/actions/checkout/pull/1238">actions/checkout#1238</a></li>
</ul>
<p><strong>Full Changelog</strong>: <a href="https://github.com/actions/checkout/compare/v2.6.0...v2.7.0">https://github.com/actions/checkout/compare/v2.6.0...v2.7.0</a></p>
<h2>v2.6.0</h2>
<h2>What's Changed</h2>
<ul>
<li>Add backports to v2 branch by <a href="https://github.com/cory-miller"><code>`@​cory-miller</code></a>` in <a href="https://redirect.github.com/actions/checkout/pull/1040">actions/checkout#1040</a>
<ul>
<li>Includes backports from the following changes: <a href="https://redirect.github.com/actions/checkout/pull/964">actions/checkout#964</a>, <a href="https://redirect.github.com/actions/checkout/pull/1002">actions/checkout#1002</a>, <a href="https://redirect.github.com/actions/checkout/pull/1029">actions/checkout#1029</a></li>
<li>Upgraded the licensed version to match what is used in v3.</li>
</ul>
</li>
</ul>
<p><strong>Full Changelog</strong>: <a href="https://github.com/actions/checkout/compare/v2.5.0...v2.6.0">https://github.com/actions/checkout/compare/v2.5.0...v2.6.0</a></p>
<h2>v2.5.0</h2>
<h2>What's Changed</h2>
<ul>
<li>Update <code>`@​actions/core</code>` to 1.10.0 by <a href="https://github.com/rentziass"><code>`@​rentziass</code></a>` in <a href="https://redirect.github.com/actions/checkout/pull/962">actions/checkout#962</a></li>
</ul>
<p><strong>Full Changelog</strong>: <a href="https://github.com/actions/checkout/compare/v2...v2.5.0">https://github.com/actions/checkout/compare/v2...v2.5.0</a></p>
<h2>v2.4.2</h2>
<h2>What's Changed</h2>
<ul>
<li>Add set-safe-directory input to allow customers to take control. (<a href="https://redirect.github.com/actions/checkout/issues/770">#770</a>) by <a href="https://github.com/TingluoHuang"><code>`@​TingluoHuang</code></a>` in <a href="https://redirect.github.com/actions/checkout/pull/776">actions/checkout#776</a></li>
<li>Prepare changelog for v2.4.2. by <a href="https://github.com/TingluoHuang"><code>`@​TingluoHuang</code></a>` in <a href="https://redirect.github.com/actions/checkout/pull/778">actions/checkout#778</a></li>
</ul>
<p><strong>Full Changelog</strong>: <a href="https://github.com/actions/checkout/compare/v2...v2.4.2">https://github.com/actions/checkout/compare/v2...v2.4.2</a></p>
<h2>v2.4.1</h2>
<ul>
<li>Fixed an issue where checkout failed to run in container jobs due to the new git setting <code>safe.directory</code></li>
</ul>
<h2>v2.4.0</h2>
<ul>
<li>Convert SSH URLs like <code>org-&lt;ORG_ID&gt;`@github.com:</code>` to <code>https://github.com/</code> - <a href="https://redirect.github.com/actions/checkout/pull/621">pr</a></li>
</ul>
<h2>v2.3.5</h2>
<p>Update dependencies</p>
<h2>v2.3.4</h2>
<ul>
<li><a href="https://redirect.github.com/actions/checkout/pull/379">Add missing <code>await</code>s</a></li>
<li><a href="https://redirect.github.com/actions/checkout/pull/360">Swap to Environment Files</a></li>
</ul>
<h2>v2.3.3</h2>
<ul>
<li><a href="https://redirect.github.com/actions/checkout/pull/345">Remove Unneeded commit information from build logs</a></li>
<li><a href="https://redirect.github.com/actions/checkout/pull/326">Add Licensed to verify third party dependencies</a></li>
</ul>
<!-- raw HTML omitted -->
</blockquote>
<p>... (truncated)</p>
</details>
<details>
<summary>Changelog</summary>
<p><em>Sourced from <a href="https://github.com/actions/checkout/blob/main/CHANGELOG.md">actions/checkout's changelog</a>.</em></p>
<blockquote>
<h1>Changelog</h1>
<h2>v4.2.2</h2>
<ul>
<li><code>url-helper.ts</code> now leverages well-known environment variables by <a href="https://github.com/jww3"><code>`@​jww3</code></a>` in <a href="https://redirect.github.com/actions/checkout/pull/1941">actions/checkout#1941</a></li>
<li>Expand unit test coverage for <code>isGhes</code> by <a href="https://github.com/jww3"><code>`@​jww3</code></a>` in <a href="https://redirect.github.com/actions/checkout/pull/1946">actions/checkout#1946</a></li>
</ul>
<h2>v4.2.1</h2>
<ul>
<li>Check out other refs/* by commit if provided, fall back to ref by <a href="https://github.com/orhantoy"><code>`@​orhantoy</code></a>` in <a href="https://redirect.github.com/actions/checkout/pull/1924">actions/checkout#1924</a></li>
</ul>
<h2>v4.2.0</h2>
<ul>
<li>Add Ref and Commit outputs by <a href="https://github.com/lucacome"><code>`@​lucacome</code></a>` in <a href="https://redirect.github.com/actions/checkout/pull/1180">actions/checkout#1180</a></li>
<li>Dependency updates by <a href="https://github.com/dependabot"><code>`@​dependabot</code></a>-` <a href="https://redirect.github.com/actions/checkout/pull/1777">actions/checkout#1777</a>, <a href="https://redirect.github.com/actions/checkout/pull/1872">actions/checkout#1872</a></li>
</ul>
<h2>v4.1.7</h2>
<ul>
<li>Bump the minor-npm-dependencies group across 1 directory with 4 updates by <a href="https://github.com/dependabot"><code>`@​dependabot</code></a>` in <a href="https://redirect.github.com/actions/checkout/pull/1739">actions/checkout#1739</a></li>
<li>Bump actions/checkout from 3 to 4 by <a href="https://github.com/dependabot"><code>`@​dependabot</code></a>` in <a href="https://redirect.github.com/actions/checkout/pull/1697">actions/checkout#1697</a></li>
<li>Check out other refs/* by commit by <a href="https://github.com/orhantoy"><code>`@​orhantoy</code></a>` in <a href="https://redirect.github.com/actions/checkout/pull/1774">actions/checkout#1774</a></li>
<li>Pin actions/checkout's own workflows to a known, good, stable version. by <a href="https://github.com/jww3"><code>`@​jww3</code></a>` in <a href="https://redirect.github.com/actions/checkout/pull/1776">actions/checkout#1776</a></li>
</ul>
<h2>v4.1.6</h2>
<ul>
<li>Check platform to set archive extension appropriately by <a href="https://github.com/cory-miller"><code>`@​cory-miller</code></a>` in <a href="https://redirect.github.com/actions/checkout/pull/1732">actions/checkout#1732</a></li>
</ul>
<h2>v4.1.5</h2>
<ul>
<li>Update NPM dependencies by <a href="https://github.com/cory-miller"><code>`@​cory-miller</code></a>` in <a href="https://redirect.github.com/actions/checkout/pull/1703">actions/checkout#1703</a></li>
<li>Bump github/codeql-action from 2 to 3 by <a href="https://github.com/dependabot"><code>`@​dependabot</code></a>` in <a href="https://redirect.github.com/actions/checkout/pull/1694">actions/checkout#1694</a></li>
<li>Bump actions/setup-node from 1 to 4 by <a href="https://github.com/dependabot"><code>`@​dependabot</code></a>` in <a href="https://redirect.github.com/actions/checkout/pull/1696">actions/checkout#1696</a></li>
<li>Bump actions/upload-artifact from 2 to 4 by <a href="https://github.com/dependabot"><code>`@​dependabot</code></a>` in <a href="https://redirect.github.com/actions/checkout/pull/1695">actions/checkout#1695</a></li>
<li>README: Suggest <code>user.email</code> to be <code>41898282+github-actions[bot]`@users.noreply.github.com</code>` by <a href="https://github.com/cory-miller"><code>`@​cory-miller</code></a>` in <a href="https://redirect.github.com/actions/checkout/pull/1707">actions/checkout#1707</a></li>
</ul>
<h2>v4.1.4</h2>
<ul>
<li>Disable <code>extensions.worktreeConfig</code> when disabling <code>sparse-checkout</code> by <a href="https://github.com/jww3"><code>`@​jww3</code></a>` in <a href="https://redirect.github.com/actions/checkout/pull/1692">actions/checkout#1692</a></li>
<li>Add dependabot config by <a href="https://github.com/cory-miller"><code>`@​cory-miller</code></a>` in <a href="https://redirect.github.com/actions/checkout/pull/1688">actions/checkout#1688</a></li>
<li>Bump the minor-actions-dependencies group with 2 updates by <a href="https://github.com/dependabot"><code>`@​dependabot</code></a>` in <a href="https://redirect.github.com/actions/checkout/pull/1693">actions/checkout#1693</a></li>
<li>Bump word-wrap from 1.2.3 to 1.2.5 by <a href="https://github.com/dependabot"><code>`@​dependabot</code></a>` in <a href="https://redirect.github.com/actions/checkout/pull/1643">actions/checkout#1643</a></li>
</ul>
<h2>v4.1.3</h2>
<ul>
<li>Check git version before attempting to disable <code>sparse-checkout</code> by <a href="https://github.com/jww3"><code>`@​jww3</code></a>` in <a href="https://redirect.github.com/actions/checkout/pull/1656">actions/checkout#1656</a></li>
<li>Add SSH user parameter by <a href="https://github.com/cory-miller"><code>`@​cory-miller</code></a>` in <a href="https://redirect.github.com/actions/checkout/pull/1685">actions/checkout#1685</a></li>
<li>Update <code>actions/checkout</code> version in <code>update-main-version.yml</code> by <a href="https://github.com/jww3"><code>`@​jww3</code></a>` in <a href="https://redirect.github.com/actions/checkout/pull/1650">actions/checkout#1650</a></li>
</ul>
<h2>v4.1.2</h2>
<ul>
<li>Fix: Disable sparse checkout whenever <code>sparse-checkout</code> option is not present <a href="https://github.com/dscho"><code>`@​dscho</code></a>` in <a href="https://redirect.github.com/actions/checkout/pull/1598">actions/checkout#1598</a></li>
</ul>
<h2>v4.1.1</h2>
<ul>
<li>Correct link to GitHub Docs by <a href="https://github.com/peterbe"><code>`@​peterbe</code></a>` in <a href="https://redirect.github.com/actions/checkout/pull/1511">actions/checkout#1511</a></li>
<li>Link to release page from what's new section by <a href="https://github.com/cory-miller"><code>`@​cory-miller</code></a>` in <a href="https://redirect.github.com/actions/checkout/pull/1514">actions/checkout#1514</a></li>
</ul>
<h2>v4.1.0</h2>
<ul>
<li><a href="https://redirect.github.com/actions/checkout/pull/1396">Add support for partial checkout filters</a></li>
</ul>
<!-- raw HTML omitted -->
</blockquote>
<p>... (truncated)</p>
</details>
<details>
<summary>Commits</summary>
<ul>
<li><a href="f43a0e5ff2"><code>f43a0e5</code></a> Release 3.6.0 (<a href="https://redirect.github.com/actions/checkout/issues/1437">#1437</a>)</li>
<li><a href="7739b9ba2e"><code>7739b9b</code></a> Add option to fetch tags even if fetch-depth &gt; 0 (<a href="https://redirect.github.com/actions/checkout/issues/579">#579</a>)</li>
<li><a href="96f53100ba"><code>96f5310</code></a> Mark test scripts with Bash'isms to be run via Bash (<a href="https://redirect.github.com/actions/checkout/issues/1377">#1377</a>)</li>
<li><a href="c85c95e3d7"><code>c85c95e</code></a> Release v3.5.3 (<a href="https://redirect.github.com/actions/checkout/issues/1376">#1376</a>)</li>
<li><a href="d106d4669b"><code>d106d46</code></a> Add support for sparse checkouts (<a href="https://redirect.github.com/actions/checkout/issues/1369">#1369</a>)</li>
<li><a href="f095bcc56b"><code>f095bcc</code></a> Fix typos found by codespell (<a href="https://redirect.github.com/actions/checkout/issues/1287">#1287</a>)</li>
<li><a href="47fbe2df0a"><code>47fbe2d</code></a> Fix: Checkout fail in self-hosted runners when faulty submodule are checked-i...</li>
<li><a href="8e5e7e5ab8"><code>8e5e7e5</code></a> Release v3.5.2 (<a href="https://redirect.github.com/actions/checkout/issues/1291">#1291</a>)</li>
<li><a href="eb35239ec2"><code>eb35239</code></a> Fix: convert baseUrl to serverApiUrl 'formatted' (<a href="https://redirect.github.com/actions/checkout/issues/1289">#1289</a>)</li>
<li><a href="83b7061638"><code>83b7061</code></a> Release v3.5.1 (<a href="https://redirect.github.com/actions/checkout/issues/1284">#1284</a>)</li>
<li>Additional commits viewable in <a href="https://github.com/actions/checkout/compare/v1...v3">compare view</a></li>
</ul>
</details>
<br />

<details>
<summary>Most Recent Ignore Conditions Applied to This Pull Request</summary>

| Dependency Name | Ignore Conditions |
| --- | --- |
| actions/checkout | [>= 4.a, < 5] |
</details>


[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=actions/checkout&package-manager=github_actions&previous-version=1&new-version=3)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores)

You can trigger a rebase of this PR by commenting ``@dependabot` rebase`.

[//]: # (dependabot-automerge-start)
[//]: # (dependabot-automerge-end)

---

<details>
<summary>Dependabot commands and options</summary>
<br />

You can trigger Dependabot actions by commenting on this PR:
- ``@dependabot` rebase` will rebase this PR
- ``@dependabot` recreate` will recreate this PR, overwriting any edits that have been made to it
- ``@dependabot` merge` will merge this PR after your CI passes on it
- ``@dependabot` squash and merge` will squash and merge this PR after your CI passes on it
- ``@dependabot` cancel merge` will cancel a previously requested merge and block automerging
- ``@dependabot` reopen` will reopen this PR if it is closed
- ``@dependabot` close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually
- ``@dependabot` show <dependency name> ignore conditions` will show all of the ignore conditions of the specified dependency
- ``@dependabot` ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself)
- ``@dependabot` ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself)
- ``@dependabot` ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)


</details>

Co-authored-by: Kerollmops <clement@meilisearch.com>
2025-03-03 15:14:00 +00:00
0401c4e511 Add a settings API test 2025-03-03 16:08:21 +01:00
4798c35c50 Merge #5383
5383: Skip a snapshot test on Windows r=dureuill a=Kerollmops

This PR skips the `perform_on_demand_snapshot` test on Windows, which is very flaky on this platform. Note that we keep running it on macOS and Ubuntu.

Co-authored-by: Kerollmops <clement@meilisearch.com>
2025-03-03 13:23:24 +00:00
9585950e0e Merge #5365
Some checks failed
Indexing bench (push) / Run and upload benchmarks (push) Waiting to run
Benchmarks of indexing (push) / Run and upload benchmarks (push) Waiting to run
Benchmarks of search for geo (push) / Run and upload benchmarks (push) Waiting to run
Benchmarks of search for songs (push) / Run and upload benchmarks (push) Waiting to run
Benchmarks of search for Wikipedia articles (push) / Run and upload benchmarks (push) Waiting to run
Run the indexing fuzzer / Setup the action (push) Failing after 5s
Publish binaries to GitHub release / Check the version validity (push) Successful in 9s
Publish binaries to GitHub release / Publish binary for Linux (push) Failing after 9s
Publish binaries to GitHub release / Publish binary for aarch64 (meilisearch-linux-aarch64, aarch64-unknown-linux-gnu) (push) Failing after 10s
Test suite / Tests on ubuntu-22.04 (push) Failing after 17s
Test suite / Tests almost all features (push) Failing after 11s
Test suite / Test with Ollama (push) Failing after 17s
Test suite / Test disabled tokenization (push) Failing after 12s
Test suite / Run tests in debug (push) Failing after 13s
Test suite / Run Rustfmt (push) Successful in 3m43s
Test suite / Run Clippy (push) Successful in 9m23s
Publish binaries to GitHub release / Publish binary for macos-13 (push) Has been cancelled
Publish binaries to GitHub release / Publish binary for windows-2022 (push) Has been cancelled
Publish binaries to GitHub release / Publish binary for macOS silicon (meilisearch-macos-apple-silicon, aarch64-apple-darwin) (push) Has been cancelled
Test suite / Tests on macos-13 (push) Has been cancelled
Test suite / Tests on windows-2022 (push) Has been cancelled
5365: Mention openAPI in CONTRIBUTING.md r=Kerollmops a=irevoire

I only referred to other documents to be sure the process is written only once and won’t get out of sync.

Co-authored-by: Tamo <tamo@meilisearch.com>
2025-03-03 11:23:51 +00:00
b8c6eb5453 Improve bors toml 2025-03-03 12:22:33 +01:00
02586e727e Introduce a CI to check milestones and branches 2025-03-03 12:22:24 +01:00
0cfc9261ba Skip a snapshot test on Windows 2025-03-03 10:44:28 +01:00
035674d56e Bump actions/checkout from 1 to 4 2025-03-03 10:37:28 +01:00
d35470e29b Update dumps
**Impact:**
- dump import
2025-03-03 10:33:39 +01:00
23e07f1a93 Attribute positions changed in snapshots
**Reason:**
Only the existing field are registered in the fieldid_map
2025-03-03 10:33:39 +01:00
f2a28a4dd7 Add and enhance tests
**Changes:**
Introduce a test_settings_documents_indexing_swapping_and_search function that run the test twice:
1) by indexing the settings before the documents then running the test
2) by indexing the documents before the settings then running the test

This helps to ensure that their is no bug coming from one or the other indexer.
2025-03-03 10:33:39 +01:00
1994494155 Update snapshot using the new filterableAttributes type 2025-03-03 10:33:39 +01:00
6dbec91d2b Index document in filterable attributes tests
**Reason:**
Because the filterable attributes are patterns now,
the fieldIdMap will only register the fields that exists in at least one document.
if a field doesn't exist in any document, it will not be registered even if it has been specified in the filterable fields.
2025-03-03 10:33:39 +01:00
9a75dc6ab3 Update tests using filterable attributes rules
**Changes:**
Replace the BTreeSet<String> by Vec<FilterableAttributesRule> without changing the test results

**Impact:**
- None
2025-03-03 10:33:34 +01:00
ae8d453868 Refactor Document indexing process (searchables)
**Changes:**
The searchable database extraction is now relying on the AttributePatterns and FieldIdMapWithMetadata to match the field to extract.
Remove the SearchableExtractor trait to make the code less complex.

**Impact:**
- Document Addition/modification searchable indexing
- Document deletion searchable indexing
2025-03-03 10:32:42 +01:00
95bccaf5f5 Refactor Document indexing process (Facets)
**Changes:**
The Documents changes now take a selector closure instead of a list of field to match the field to extract.
The seek_leaf_values_in_object function now uses a selector closure of a list of field to match the field to extract
The facet database extraction is now relying on the FilterableAttributesRule to match the field to extract.
The facet-search database extraction is now relying on the FieldIdMapWithMetadata to select the field to index.
The facet level database extraction is now relying on the FieldIdMapWithMetadata to select the field to index.

**Important:**
Because the filterable attributes are patterns now,
the fieldIdMap will only register the fields that exists in at least one document.
if a field doesn't exist in any document, it will not be registered even if it has been specified in the filterable fields.

**Impact:**
- Document Addition/modification facet indexing
- Document deletion facet indexing
2025-03-03 10:32:03 +01:00
659855c88e Refactor Settings Indexing process
**Changes:**
The transform structure is now relying on FieldIdMapWithMetadata and AttributePatterns to prepare
the obkv documents during a settings reindexing.
The InnerIndexSettingsDiff and InnerIndexSettings structs are now relying on FieldIdMapWithMetadata, FilterableAttributesRule and AttributePatterns to define the field and the databases that should be reindexed.
The faceted_fields_ids, localized_searchable_fields_ids and localized_faceted_fields_ids have been removed in favor of the FieldIdMapWithMetadata.
We are now relying on the FieldIdMapWithMetadata to retain vectors_fids from the facets and the searchables.

The searchable database computing is now relying on the FieldIdMapWithMetadata to know if a field is searchable and retrieve the locales.

The facet database computing is now relying on the FieldIdMapWithMetadata to compute the facet databases, the facet-search and retrieve the locales.

The facet level database computing is now relying on the FieldIdMapWithMetadata and the facet level database are cleared depending on the settings differences (clear_facet_levels_based_on_settings_diff).

The vector point extraction uses the FieldIdMapWithMetadata instead of FieldsIdsMapWithMetadata.

**Impact:**
- Dump import
- Settings update
2025-03-03 10:32:02 +01:00
286d310287 Fix inconsistency in attribute ranking rule computation
**Changes:**
The building of the Attributes ranking rule graph was comparing fieldids with weights
which doesn't make sense and may be bug prone, we are now comparing fieldids with fieldids.

**Impact:**
- search: Attribute ranking rule
2025-03-03 10:29:34 +01:00
4f7ece2411 Refactor the FieldIdMapWithMetadata
**Changes:**
The FieldIdMapWithMetadata structure now stores more information about fields.
The metadata_for_field function computes all the needed information relying on the user provided data instead of the enriched data (searchable/sortable)
which may solve an indexing bug on sortable attributes that was not matching the nested fields.

The FieldIdMapWithMetadata structure was duplicated in the embeddings as FieldsIdsMapWithMetadata,
so the FieldsIdsMapWithMetadata has been removed in favor of FieldIdMapWithMetadata.

The Facet distribution is now relying on the FieldIdMapWithMetadata with metadata to match is a field can be faceted.

**Impact:**
- searchable attributes matching
- searchable attributes weight computation
- sortable attributes matching
- faceted fields matching
- prompt computing
- facet distribution
2025-03-03 10:29:33 +01:00
967033579d Refactor search and facet-search
**Changes:**
The search filters are now using the FilterableAttributesFeatures from the FilterableAttributesRules to know if a field is filterable.
Moreover, the FilterableAttributesFeatures is more precise and an error will be returned if an operator is used on a field that doesn't have the related feature.
The facet-search is now checking if the feature is allowed in the FilterableAttributesFeatures and an error will be returned if the field doesn't have the related feature.

**Impact:**
- facet-search is now relying on AttributePatterns to match the locales
- search using filters is now relying on FilterableAttributesFeatures
- distinct attribute is now relying on FilterableAttributesRules
2025-03-03 10:25:32 +01:00
0200c65ebf Change the filterableAttributes setting API
**Changes:**
The filterableAttributes type has been changed from a `BTreeSet<String>` to a `Vec<FilterableAttributesRule>`,
Which is a list of rules defining patterns to match the documents' fields and a set of feature to apply on the matching fields.
The rule order given by the user is now an important information, the features applied on a filterable field will be chosen based on the rule order as we do for the LocalizedAttributesRules.
This means that the list will not be reordered anymore and will keep the user defined order,
moreover, if there are any duplicates, they will not be de-duplicated anymore.

**Impact:**
- Settings API
- the database format of the filterable attributes changed
- may impact the LocalizedAttributesRules due to the AttributePatterns factorization
- OpenAPI generator
2025-03-03 10:22:02 +01:00
c63c25a9a2 Merge #5355
Some checks failed
Look for flaky tests / flaky (push) Failing after 1s
Indexing bench (push) / Run and upload benchmarks (push) Has been cancelled
Benchmarks of indexing (push) / Run and upload benchmarks (push) Has been cancelled
Benchmarks of search for geo (push) / Run and upload benchmarks (push) Has been cancelled
Benchmarks of search for songs (push) / Run and upload benchmarks (push) Has been cancelled
Benchmarks of search for Wikipedia articles (push) / Run and upload benchmarks (push) Has been cancelled
Publish binaries to GitHub release / Check the version validity (push) Failing after 5s
Test suite / Tests almost all features (push) Failing after 13s
Test suite / Tests on ubuntu-22.04 (push) Failing after 19s
Test suite / Test with Ollama (push) Failing after 7s
Test suite / Test disabled tokenization (push) Failing after 10s
Test suite / Run tests in debug (push) Failing after 15s
Test suite / Run Rustfmt (push) Failing after 16s
Test suite / Run Clippy (push) Successful in 9m39s
SDKs tests / define-docker-image (push) Failing after 5s
SDKs tests / .NET SDK tests (push) Has been skipped
SDKs tests / Dart SDK tests (push) Has been skipped
SDKs tests / Go SDK tests (push) Has been skipped
SDKs tests / Java SDK tests (push) Has been skipped
SDKs tests / JS SDK tests (push) Has been skipped
SDKs tests / PHP SDK tests (push) Has been skipped
SDKs tests / Python SDK tests (push) Has been skipped
SDKs tests / Ruby SDK tests (push) Has been skipped
SDKs tests / Rust SDK tests (push) Has been skipped
SDKs tests / Swift SDK tests (push) Has been skipped
SDKs tests / meilisearch-js-plugins tests (push) Has been skipped
SDKs tests / meilisearch-rails tests (push) Has been skipped
SDKs tests / meilisearch-symfony tests (push) Has been skipped
Test suite / Tests on macos-13 (push) Has been cancelled
Test suite / Tests on windows-2022 (push) Has been cancelled
5355: Support fetching the pooling method from the model configuration r=Kerollmops a=dureuill

# Pull Request

## Related issue
Fixes #5354 

## What does this PR do?
- Fetches the pooling configuration from the model repository
- Use a pooling method that depends on the pooling configuration of that model.
- Allow overriding the pooling method with a new huggingFace embedder parameter `pooling`
  - for backward-compatibility with Meilisearch v1.13
  - for compatibility with embedders that exhibit the same behavior as Meilisearch v1.13
- Handle the default value of that new parameter
   - for compatibility, when importing a db/a dump, it should be set to `forceMean`
   - when (re)set from the settings for an embedder, it should be set to `useModel`


Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2025-02-27 14:55:13 +00:00
046bbea864 Keep old stat format to make sure the number of documents is available during dumpless upgrade 2025-02-27 15:17:23 +01:00
c5cb7d2f2c Forbid opening a db of v1.13.x from v1.13.y 2025-02-27 15:17:23 +01:00
5e7f226ac9 Support dumpless upgrade for all v1.13 patches 2025-02-27 15:17:23 +01:00
754f254a00 Update snapshots following version bump 2025-02-27 15:17:23 +01:00
39b5ad3c86 Update version for the next release (v1.13.2) in Cargo.toml 2025-02-27 15:17:22 +01:00
80adbb1bdc Merge #5338
Some checks are pending
Indexing bench (push) / Run and upload benchmarks (push) Waiting to run
Benchmarks of indexing (push) / Run and upload benchmarks (push) Waiting to run
Benchmarks of search for geo (push) / Run and upload benchmarks (push) Waiting to run
Benchmarks of search for songs (push) / Run and upload benchmarks (push) Waiting to run
Benchmarks of search for Wikipedia articles (push) / Run and upload benchmarks (push) Waiting to run
Run the indexing fuzzer / Setup the action (push) Successful in 1h5m23s
5338: Bump Ubuntu in the CI from 20.04 to 22.04 r=dureuill a=Kerollmops

This PR bumps the Ubuntu version we use in the CI from version 20.04 to version 22.04. This also means we are [using GLIBC version 2.35 and not version 2.28](https://gist.github.com/zchrissirhcz/ee13f604996bbbe312ba1d105954d2ed).

Note, the indentation fix is done by my IDE (Zed), sorry about that 🤦 

Fixes https://github.com/meilisearch/meilisearch/issues/5374

Co-authored-by: Kerollmops <clement@meilisearch.com>
2025-02-27 08:14:12 +00:00
4b6fa1cf41 Merge #5372
Some checks failed
Indexing bench (push) / Run and upload benchmarks (push) Waiting to run
Benchmarks of indexing (push) / Run and upload benchmarks (push) Waiting to run
Benchmarks of search for geo (push) / Run and upload benchmarks (push) Waiting to run
Benchmarks of search for songs (push) / Run and upload benchmarks (push) Waiting to run
Benchmarks of search for Wikipedia articles (push) / Run and upload benchmarks (push) Waiting to run
Test suite / Tests on ubuntu-20.04 (push) Failing after 1s
Test suite / Tests almost all features (push) Has been skipped
Test suite / Test disabled tokenization (push) Has been skipped
Run the indexing fuzzer / Setup the action (push) Failing after 11s
Test suite / Test with Ollama (push) Failing after 10s
Test suite / Run tests in debug (push) Failing after 13s
Test suite / Run Clippy (push) Failing after 19s
Test suite / Run Rustfmt (push) Failing after 32s
Test suite / Tests on macos-13 (push) Has been cancelled
Test suite / Tests on windows-2022 (push) Has been cancelled
5372: Bring back changes from v1.13.1 to main r=irevoire a=Kerollmops



Co-authored-by: Kerollmops <Kerollmops@users.noreply.github.com>
Co-authored-by: Kerollmops <clement@meilisearch.com>
Co-authored-by: ManyTheFish <many@meilisearch.com>
Co-authored-by: Strift <lau.cazanove@gmail.com>
Co-authored-by: Many the fish <many@meilisearch.com>
2025-02-26 17:24:51 +00:00
dc78d8e9c4 Fix the dumpless upgrade log 2025-02-26 17:02:46 +01:00
d4063c9dcd Fix fmt 2025-02-26 17:02:45 +01:00
abebc574f6 Update crates/milli/src/index.rs
Co-authored-by: Tamo <tamo@meilisearch.com>
2025-02-26 17:02:45 +01:00
f32ab67819 Update crates/milli/src/index.rs
Co-authored-by: Tamo <tamo@meilisearch.com>
2025-02-26 17:02:44 +01:00
d25953f322 fix clippy 2025-02-26 17:02:43 +01:00
405bbd04c1 Dumpless upgrade 2025-02-26 17:01:38 +01:00
5d421abdc4 Update Snapshots 2025-02-26 17:01:37 +01:00
9f3663e768 Implement Incremental document database stats computing 2025-02-26 17:01:35 +01:00
d9642ec916 Use checked_div in average computation 2025-02-26 17:01:34 +01:00
818e8b0237 Fix zero division 2025-02-26 17:01:31 +01:00
4f77a7fba5 fix clippy 2025-02-26 17:01:29 +01:00
058f08dff5 fix snapshots 2025-02-26 17:01:26 +01:00
9a6c1730aa Add document database stats 2025-02-26 17:01:25 +01:00
91a8a97045 Bump 2025-02-26 17:01:24 +01:00
15788773af Check the exact_word database when computing zero typo query 2025-02-26 17:01:22 +01:00
025b9b79bb Update the snapshots 2025-02-26 17:01:21 +01:00
1c60b17a37 Update version for the next release (v1.13.1) in Cargo.toml 2025-02-26 17:01:19 +01:00
3b2cd54b9d tests: add a check to know if a Value has an uid 2025-02-25 17:24:45 +01:00
0833cb7d34 Mention openAPI in CONTRIBUTING.md 2025-02-25 12:01:26 +01:00
b0d4f9590f Merge #5364
Some checks failed
Test suite / Tests on ubuntu-20.04 (push) Failing after 1s
Test suite / Tests almost all features (push) Has been skipped
Test suite / Test disabled tokenization (push) Has been skipped
Test suite / Test with Ollama (push) Failing after 15s
Test suite / Run tests in debug (push) Failing after 15s
Test suite / Run Rustfmt (push) Successful in 2m20s
Test suite / Run Clippy (push) Failing after 6m46s
Run the indexing fuzzer / Setup the action (push) Failing after 7m0s
Indexing bench (push) / Run and upload benchmarks (push) Has been cancelled
Benchmarks of indexing (push) / Run and upload benchmarks (push) Has been cancelled
Benchmarks of search for geo (push) / Run and upload benchmarks (push) Has been cancelled
Benchmarks of search for songs (push) / Run and upload benchmarks (push) Has been cancelled
Benchmarks of search for Wikipedia articles (push) / Run and upload benchmarks (push) Has been cancelled
Test suite / Tests on macos-13 (push) Has been cancelled
Test suite / Tests on windows-2022 (push) Has been cancelled
5364: Rename `callTrace` into `progressTrace` r=Kerollmops a=Kerollmops

Rename the `callTrace` field into a `progressTrace`.

Co-authored-by: Kerollmops <clement@meilisearch.com>
2025-02-25 09:34:13 +00:00
dfce20be21 Rename callTrace into progressTrace 2025-02-25 10:09:03 +01:00
24fe6cd205 Fix multiple embeddings in hf 2025-02-24 16:24:04 +01:00
e374b095a2 Fix tests 2025-02-24 14:11:26 +01:00
9f3e4801b1 Refactor settings validation and introduce SubEmbedderSettings 2025-02-24 13:58:26 +01:00
b85180fedb Error types 2025-02-24 13:58:26 +01:00
3cdcc54a9e analytics 2025-02-24 13:58:26 +01:00
294cf39cad Integrate composite embedder 2025-02-24 13:58:26 +01:00
4a2643daa2 Rename embed_one to embed_search and embed_chunks* to embed_index* 2025-02-24 13:58:26 +01:00
8d2d9066ba Add composite embedder 2025-02-24 13:58:26 +01:00
526476e168 Move settings test to its own file 2025-02-24 13:58:26 +01:00
ea7bae9a71 Merge #5356
Some checks failed
Test suite / Tests almost all features (push) Has been skipped
Test suite / Test disabled tokenization (push) Has been skipped
Test suite / Run tests in debug (push) Failing after 1s
Test suite / Test with Ollama (push) Failing after 12s
Test suite / Tests on ubuntu-20.04 (push) Failing after 19s
Test suite / Run Rustfmt (push) Failing after 17s
Test suite / Run Clippy (push) Successful in 9m21s
Run the indexing fuzzer / Setup the action (push) Successful in 1h8m47s
Test suite / Tests on macos-13 (push) Has been cancelled
Test suite / Tests on windows-2022 (push) Has been cancelled
Indexing bench (push) / Run and upload benchmarks (push) Has been cancelled
Benchmarks of indexing (push) / Run and upload benchmarks (push) Has been cancelled
Benchmarks of search for geo (push) / Run and upload benchmarks (push) Has been cancelled
Benchmarks of search for songs (push) / Run and upload benchmarks (push) Has been cancelled
Benchmarks of search for Wikipedia articles (push) / Run and upload benchmarks (push) Has been cancelled
5356: Display the internal indexing steps with timings on the `/batches` route r=irevoire a=Kerollmops

This PR computes the durations of each step, stores them in a map, and prints them (for now).

```
"callTrace": {
    "processing tasks > retrieving config": "185.38µs",
    "processing tasks > computing document changes > preparing update file > payload": "23.11ms",
    "processing tasks > computing document changes > preparing update file": "23.26ms",
    "processing tasks > computing document changes": "24.06ms",
    "processing tasks > indexing > extracting documents > document": "15.13ms",
    "processing tasks > indexing > extracting documents": "15.13ms",
    "processing tasks > indexing > extracting facets > document": "5.70ms",
    "processing tasks > indexing > extracting facets": "5.72ms",
    "processing tasks > indexing > extracting words > document": "597.24ms",
    "processing tasks > indexing > extracting words": "597.25ms",
    "processing tasks > indexing > extracting word proximity > document": "1.14s",
    "processing tasks > indexing > extracting word proximity": "1.15s",
    "processing tasks > indexing > tail writing to database": "430.91ms",
    "processing tasks > indexing > waiting for extractors": "52.54µs",
    "processing tasks > indexing > writing embeddings to database": "47.79µs",
    "processing tasks > indexing > post-processing facets": "476.04µs",
    "processing tasks > indexing > post-processing words": "97.82ms",
    "processing tasks > indexing > finalizing": "67.41ms",
    "processing tasks > indexing": "2.40s",
    "processing tasks": "2.43s",
    "writing tasks to disk > task": "37.71µs",
    "writing tasks to disk": "67.13µs"
},
"writeChannelCongestion": {
    "attempts": 2608482,
    "blocking_attempts": 0,
    "blocking_ratio": 0.0
}
```

## To Do
- [x] Update the batches PRD + delivery + tracking issue.
- [x] Store that in the batches to be visible from the `/batches` route.
- [x] Display the writer's congestion.
- [x] Display the info back in the logs too.
- [ ] (optional) Compute the size of each database by [using LMDB](https://docs.rs/heed/latest/heed/struct.DatabaseStat.html).
- [x] Push them in reverse order so that "processing task" is after the other sub-steps.


Co-authored-by: Kerollmops <clement@meilisearch.com>
2025-02-20 17:38:50 +00:00
76fd5d92d7 Clarify the tail writing to database 2025-02-20 17:35:23 +01:00
245a55722a Remove commented code 2025-02-20 16:48:18 +01:00
434fad5327 Fix insta tests again 2025-02-20 16:41:48 +01:00
243a5fa6a8 Log the call trace and congestion 2025-02-20 14:17:34 +01:00
9d314ace09 Fix the insta tests 2025-02-20 11:51:58 +01:00
1b1172ad16 Fix dump tests 2025-02-20 10:44:53 +01:00
1d99c8465c Hide the batch stats to make insta pass 2025-02-20 10:16:54 +01:00
05cc8c650c Expose the write channel congestion in the batches 2025-02-19 15:47:54 +01:00
14e1459bf5 Document settings 2025-02-19 15:06:22 +01:00
589bf30ec6 make clippy happy 2025-02-19 11:38:07 +01:00
b367c71ad2 fixup test 2025-02-19 11:31:17 +01:00
3ff1de0a21 Expose the call trace in the batch stats 2025-02-19 11:24:11 +01:00
1005a60fb8 Fixup dump settings 2025-02-19 11:03:48 +01:00
e9add14189 Reorder steps 2025-02-18 19:26:41 +01:00
4a058a080e Simplify the name generation 2025-02-18 18:48:44 +01:00
11a11fc870 Accumulate step durations from the progress system 2025-02-18 18:33:19 +01:00
cd0dfa3f1b Fix test cases 2025-02-18 17:21:52 +01:00
7b4ce468a6 Allow overriding pooling method 2025-02-18 17:12:23 +01:00
11759c4be4 Support pooling 2025-02-18 16:10:51 +01:00
0f1aeb8eaa Merge #5351
Some checks failed
Look for flaky tests / flaky (push) Failing after 19s
SDKs tests / define-docker-image (push) Failing after 5s
SDKs tests / .NET SDK tests (push) Has been skipped
SDKs tests / Dart SDK tests (push) Has been skipped
SDKs tests / Go SDK tests (push) Has been skipped
SDKs tests / Java SDK tests (push) Has been skipped
SDKs tests / JS SDK tests (push) Has been skipped
SDKs tests / PHP SDK tests (push) Has been skipped
SDKs tests / Python SDK tests (push) Has been skipped
SDKs tests / Ruby SDK tests (push) Has been skipped
SDKs tests / Rust SDK tests (push) Has been skipped
SDKs tests / Swift SDK tests (push) Has been skipped
SDKs tests / meilisearch-js-plugins tests (push) Has been skipped
SDKs tests / meilisearch-rails tests (push) Has been skipped
SDKs tests / meilisearch-symfony tests (push) Has been skipped
Publish binaries to GitHub release / Check the version validity (push) Successful in 9s
Publish binaries to GitHub release / Publish binary for aarch64 (meilisearch-linux-aarch64, aarch64-unknown-linux-gnu) (push) Failing after 2s
Publish binaries to GitHub release / Publish binary for Linux (push) Failing after 12s
Publish binaries to GitHub release / Publish binary for macos-13 (push) Has been cancelled
Publish binaries to GitHub release / Publish binary for windows-2022 (push) Has been cancelled
Publish binaries to GitHub release / Publish binary for macOS silicon (meilisearch-macos-apple-silicon, aarch64-apple-darwin) (push) Has been cancelled
Test suite / Tests on ubuntu-20.04 (push) Failing after 12s
Test suite / Test with Ollama (push) Failing after 7s
Test suite / Test disabled tokenization (push) Failing after 11s
Test suite / Run tests in debug (push) Failing after 11s
Test suite / Run Clippy (push) Failing after 17s
Test suite / Run Rustfmt (push) Successful in 1m51s
Test suite / Tests almost all features (push) Failing after 7m7s
Test suite / Tests on macos-13 (push) Has been cancelled
Test suite / Tests on windows-2022 (push) Has been cancelled
5351: Bring back v1.13.0 changes into main r=irevoire a=Kerollmops

This PR brings back the changes made in v1.13 into the main branch.

Co-authored-by: ManyTheFish <many@meilisearch.com>
Co-authored-by: Kerollmops <clement@meilisearch.com>
Co-authored-by: Louis Dureuil <louis@meilisearch.com>
Co-authored-by: Clémentine <clementine@meilisearch.com>
Co-authored-by: meili-bors[bot] <89034592+meili-bors[bot]@users.noreply.github.com>
Co-authored-by: Tamo <tamo@meilisearch.com>
Co-authored-by: Clément Renault <clement@meilisearch.com>
2025-02-18 08:05:02 +00:00
5e7803632d Merge #5342
Some checks failed
Test suite / Tests on ubuntu-20.04 (push) Failing after 1s
Test suite / Tests almost all features (push) Has been skipped
Test suite / Test disabled tokenization (push) Has been skipped
Test suite / Run tests in debug (push) Failing after 14s
Test suite / Run Rustfmt (push) Successful in 1m54s
Test suite / Run Clippy (push) Failing after 6m49s
Test suite / Tests on macos-13 (push) Has been cancelled
Test suite / Tests on windows-2022 (push) Has been cancelled
5342: Fix workload sha r=dureuill a=ManyTheFish

The dataset shasum was wrong for some workloads making the `/bench workloads/*.json` crash

Co-authored-by: ManyTheFish <many@meilisearch.com>
2025-02-12 16:27:09 +00:00
885710a07b Merge #5341
5341: Embeddings stats r=ManyTheFish a=ManyTheFish

# Pull Request

## Related issue
Fixes #5321

## What does this PR do?
- Add embedding stats
- force dumpless upgrade to recompute stats
- add tests


Co-authored-by: ManyTheFish <many@meilisearch.com>
2025-02-12 15:46:37 +00:00
c55fdad2c3 Fix dumpless upgrade target version 2025-02-12 16:35:05 +01:00
1caad4c4b0 Add multiple embeddings for the same embedder in tests 2025-02-12 16:13:34 +01:00
8419ed52a1 fix clippy 2025-02-12 14:38:51 +01:00
a65c52cc97 Convert dump test into snapshots 2025-02-12 14:14:10 +01:00
49e9655c24 Update snapshots 2025-02-12 14:05:32 +01:00
fa763ca5dc Merge #5339
5339: Add back timeout from v1.11.3 r=Kerollmops a=dureuill

# Pull Request

## Related issue
Fixes #5337

## What does this PR do?
- Fix regression compared with v1.11 by reintroducing the 30s timeout on all REST API calls.

Thanks to `@migueltarga` for reporting the issue


Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2025-02-12 12:50:27 +00:00
c7aeb554b2 Add tests 2025-02-12 13:37:41 +01:00
88d9d47928 Fix benchmark sha 2025-02-12 13:27:15 +01:00
8e0d8d31f9 Add back timeout from v1.11.3 2025-02-12 11:53:00 +01:00
81a38099ec Merge #5336
5336: Meilitool Hair Dryer r=dureuill a=Kerollmops

This pull request introduces a new subcommand to hair dry a specific part of specific indexes. It is useful when [the memory-mapped pages are not hot in the cache](https://arc.net/l/quote/ixhcdwcq) and must be. Hair drying those interesting pages makes the search requests using the vector store much faster.

The previous technique used the "cat method," which consists of reading the whole LMDB data file and pipping it into the null file descriptor. By doing that, the whole LMDB data file becomes hot in the cache. However, when the database is large, at least 30% of it is free, and unused pages and many other pages don't need to be hot, e.g., raw JSON documents or uninteresting parts of the inverted index.

This new subcommand reads all the Arroy pages of a given index to make them hot, and only those. More coming...

The current algorithm is single-threaded and takes a lot of time. I am in the process of multithreading it. This is the time it takes to hair dry a 305GiB database with a single thread.

```
real    21m51.054s
user    0m3.155s
sys     0m19.393s
```

## To Do
- [ ] (optional) Do the reads in parallel.

Co-authored-by: Kerollmops <clement@meilisearch.com>
2025-02-12 10:45:16 +00:00
bd27fe7d02 force dumpless upgrade to recompute stats 2025-02-12 11:45:02 +01:00
41203f0931 Add embedders stats 2025-02-12 11:37:47 +01:00
803a699b15 Remove unsafes 2025-02-12 10:46:45 +01:00
246ad3b06e Display a progress percentage 2025-02-12 09:56:05 +01:00
a21c440274 Bump Ubuntu from 20.04 to 22.04 2025-02-12 09:49:50 +01:00
c01d26ffd7 Merge #5324
Some checks failed
Test suite / Tests almost all features (push) Has been skipped
Test suite / Test disabled tokenization (push) Has been skipped
Test suite / Test with Ollama (push) Failing after 5s
Test suite / Tests on ubuntu-20.04 (push) Failing after 11s
Test suite / Run tests in debug (push) Failing after 0s
Test suite / Run Rustfmt (push) Failing after 7s
Test suite / Run Clippy (push) Successful in 7m29s
Run the indexing fuzzer / Setup the action (push) Successful in 1h5m25s
Indexing bench (push) / Run and upload benchmarks (push) Has been cancelled
Benchmarks of indexing (push) / Run and upload benchmarks (push) Has been cancelled
Benchmarks of search for geo (push) / Run and upload benchmarks (push) Has been cancelled
Benchmarks of search for songs (push) / Run and upload benchmarks (push) Has been cancelled
Benchmarks of search for Wikipedia articles (push) / Run and upload benchmarks (push) Has been cancelled
Test suite / Tests on macos-13 (push) Has been cancelled
Test suite / Tests on windows-2022 (push) Has been cancelled
5324: Mention utoipa in sprint issues r=curquiza a=irevoire

Update the sprint-issue template to mention the openAPI file and utoipa.

Let me know if something is not clear or missing

Co-authored-by: Tamo <tamo@meilisearch.com>
2025-02-11 20:46:26 +00:00
225af069a9 Merge #5149
5149: Ensure the settings routes are now configurated when a new field is added to the Settings struct  r=curquiza a=MichaScant

# Pull Request
## Related issue
Fixes #5126 

## What does this PR do?
Ensures the settings routes are properly configured before a new field is added to the settings structure. Changes were made based on what was proposed in the original issue, any new field for settings struct is added in the [make_settings_route! macro list](6298db5bea/crates/meilisearch/src/routes/indexes/settings.rs (L182-L403)) 

## PR checklist
Please check if your PR fulfills the following requirements:
- [ ] Does this PR fix an existing issue, or have you listed the changes applied in the PR description (and why they are needed)?
- [ ] Have you read the contributing guidelines?
- [ ] Have you made sure that the title is accurate and descriptive of the changes?

Thank you so much for contributing to Meilisearch!


Co-authored-by: michascant <89426143+MichaScant@users.noreply.github.com>
2025-02-11 20:10:29 +00:00
70305b9f71 Merge #5332
5332: Fix geo update r=Kerollmops a=dureuill

# Pull Request

## Related issue
Fixes #5331

## What does this PR do?
- use the merged version that contains all fields instead of the updated version that contains only updated fields
- add test that detects the problem
- As it is the second time that `changes.updated` is causing a bug, I'm changing its name to `only_changed_fields`, hopefully better communicating that old fields are not there


Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2025-02-11 18:51:33 +00:00
5dab435d13 Add more logs about read txns 2025-02-11 18:14:48 +01:00
c83c1a3c51 Introduce the Hair Dryer meilitool sucommand 2025-02-11 18:01:53 +01:00
afc6c10a2a add more info on utoipa 2025-02-11 17:45:17 +01:00
b83275c9c5 Change the updated* functions to only_new functions, hopefully better communicating what they do 2025-02-11 15:27:10 +01:00
d7f35ee3ba Use merged document instead of updated 2025-02-11 15:27:10 +01:00
1dce341bfb Add test 2025-02-11 15:27:10 +01:00
4876c1c8eb Merge #5310
Some checks failed
Test suite / Test disabled tokenization (push) Has been skipped
Test suite / Tests almost all features (push) Has been skipped
Test suite / Tests on ubuntu-20.04 (push) Failing after 18s
Test suite / Run tests in debug (push) Failing after 17s
Test suite / Run Rustfmt (push) Successful in 2m42s
Test suite / Run Clippy (push) Failing after 7m17s
Test suite / Tests on macos-13 (push) Has been cancelled
Test suite / Tests on windows-2022 (push) Has been cancelled
5310: Fix batch export/import dump r=Kerollmops a=irevoire

# Pull Request

## Related issue
Fixes https://github.com/meilisearch/meilisearch/issues/5304
Fixes https://github.com/meilisearch/meilisearch/issues/5247

## What does this PR do?
- Add the batches to the dump
- Update the tests
- Create a new dump test containing batches and an enqueued task with a document addition


Co-authored-by: Tamo <tamo@meilisearch.com>
2025-02-11 10:21:34 +00:00
43c8d54501 fix test after rebase 2025-02-11 11:19:13 +01:00
84e2a1f836 rename the atomic to something more meaningful 2025-02-11 11:14:49 +01:00
00eb47d42e use serde_json::to_writer instead of serializing + writing 2025-02-11 11:14:49 +01:00
9293e7f2c1 fix tests after rebase 2025-02-11 11:14:49 +01:00
80198aa855 add a dump test with batches and enqueued tasks 2025-02-11 11:14:49 +01:00
fa00b42c93 fix the missing batch in the dumps in meilisearch and meilitools 2025-02-11 11:14:49 +01:00
6c9409edf8 Merge #5326
Some checks failed
Test suite / Test disabled tokenization (push) Has been skipped
Test suite / Tests almost all features (push) Has been skipped
Test suite / Tests on ubuntu-20.04 (push) Failing after 15s
Test suite / Run Clippy (push) Failing after 11s
Test suite / Run tests in debug (push) Failing after 34s
Test suite / Run Rustfmt (push) Successful in 1m32s
Test suite / Tests on macos-13 (push) Has been cancelled
Test suite / Tests on windows-2022 (push) Has been cancelled
5326: Expose a route to get the file content associated with a task r=Kerollmops a=Kerollmops

This PR exposes a new `/tasks/{taskUid}/documents` route, exposing the update file associated with a task.

## To Do
- [x] (optional) Change the route to `/tasks/{taskUid}/documents` `@dureuill.`
- [x] Update Open API example.
- [x] Create [an Experimental Feature Discussion](https://github.com/orgs/meilisearch/discussions/808).
- [x] Make this route experimental and enable it via the experimental route.

Co-authored-by: Kerollmops <clement@meilisearch.com>
Co-authored-by: Clément Renault <clement@meilisearch.com>
2025-02-10 16:50:13 +00:00
acb06cb3e6 Improve the error message when missing documents
Co-authored-by: Tamo <tamo@meilisearch.com>
2025-02-10 16:53:50 +01:00
7d0d8f4445 Make the feature experimental 2025-02-10 16:11:32 +01:00
491d115c3c Change the route to get the task documents 2025-02-10 14:55:07 +01:00
55fa2dda00 Update the Open API example 2025-02-10 14:52:48 +01:00
c71eea8023 Improve error message when update file has been processed 2025-02-10 14:33:01 +01:00
df40533741 Expose a route to get the update file content of a task 2025-02-10 14:05:32 +01:00
4e819a6187 mention utoipa in sprint issues 2025-02-10 13:35:15 +01:00
0c3e7fe963 Merge #5316
Some checks failed
Test suite / Tests on ubuntu-20.04 (push) Failing after 2s
Test suite / Tests almost all features (push) Has been skipped
Test suite / Test disabled tokenization (push) Has been skipped
Test suite / Run tests in debug (push) Failing after 16s
Test suite / Run Clippy (push) Failing after 12s
Test suite / Run Rustfmt (push) Failing after 32s
Test suite / Tests on macos-13 (push) Has been cancelled
Test suite / Tests on windows-2022 (push) Has been cancelled
5316: Fix the dumpless upgrade corruption r=dureuill a=irevoire

# Pull Request

## Related issue
Fixes https://github.com/meilisearch/meilisearch/issues/5280

## What does this PR do?
- Add a test that ensure we write the version in the index-scheduler even if we have a bug while writing the VERSION file
- Do what was described in the issue


Co-authored-by: Tamo <tamo@meilisearch.com>
2025-02-10 09:53:57 +00:00
45f843ccb9 fmt 2025-02-10 10:46:42 +01:00
35b6bca598 remove the failing test 2025-02-10 10:20:14 +01:00
7f82d33597 update the version file atomically 2025-02-06 18:23:28 +01:00
f2185438ee Merge #5308
Some checks failed
Look for flaky tests / flaky (push) Failing after 13s
SDKs tests / define-docker-image (push) Failing after 7m9s
SDKs tests / .NET SDK tests (push) Has been skipped
SDKs tests / Dart SDK tests (push) Has been skipped
SDKs tests / Go SDK tests (push) Has been skipped
SDKs tests / Java SDK tests (push) Has been skipped
SDKs tests / JS SDK tests (push) Has been skipped
SDKs tests / PHP SDK tests (push) Has been skipped
SDKs tests / Python SDK tests (push) Has been skipped
SDKs tests / Ruby SDK tests (push) Has been skipped
SDKs tests / Rust SDK tests (push) Has been skipped
SDKs tests / Swift SDK tests (push) Has been skipped
SDKs tests / meilisearch-js-plugins tests (push) Has been skipped
SDKs tests / meilisearch-rails tests (push) Has been skipped
SDKs tests / meilisearch-symfony tests (push) Has been skipped
Publish binaries to GitHub release / Check the version validity (push) Successful in 12s
Publish binaries to GitHub release / Publish binary for Linux (push) Failing after 16s
Publish binaries to GitHub release / Publish binary for aarch64 (meilisearch-linux-aarch64, aarch64-unknown-linux-gnu) (push) Failing after 19s
Test suite / Tests almost all features (push) Failing after 1s
Test suite / Test with Ollama (push) Failing after 7s
Test suite / Tests on ubuntu-20.04 (push) Failing after 15s
Test suite / Test disabled tokenization (push) Failing after 9s
Test suite / Run tests in debug (push) Failing after 10s
Test suite / Run Rustfmt (push) Failing after 9s
Test suite / Run Clippy (push) Failing after 18s
Test suite / Tests on macos-13 (push) Has been cancelled
Test suite / Tests on windows-2022 (push) Has been cancelled
Publish binaries to GitHub release / Publish binary for macos-13 (push) Has been cancelled
Publish binaries to GitHub release / Publish binary for windows-2022 (push) Has been cancelled
Publish binaries to GitHub release / Publish binary for macOS silicon (meilisearch-macos-apple-silicon, aarch64-apple-darwin) (push) Has been cancelled
5308: Ollama Integration Tests r=dureuill a=Kerollmops

This PR improves test coverage of #4757 by providing a new CI to test the Ollama setup with Ollama.

## To Do
- [x] Clean up the commits
- [x] Feature gate the Ollama tests and run them only in the CI

Co-authored-by: Louis Dureuil <louis@meilisearch.com>
Co-authored-by: Kerollmops <clement@meilisearch.com>
2025-02-06 17:21:51 +00:00
8c5856007c flush+sync the version file just in case 2025-02-06 18:04:43 +01:00
ae1d7f4d9b Improve the test and disable it on windows and linux since they don't work on the CI 2025-02-06 17:54:12 +01:00
792be63567 Merge #5323
5323: exclude network time from processingMs r=Kerollmops a=dureuill



Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2025-02-06 16:35:44 +00:00
ca1ad51564 Put the Ollama tests under a feature 2025-02-06 17:27:47 +01:00
70aac71c63 exclude network time from processingMs 2025-02-06 17:18:36 +01:00
a1d1e7c82a Setup dedicated CI to run the Ollama tests 2025-02-06 17:12:17 +01:00
56438bdea4 Introduce an Ollama integration test 2025-02-06 17:12:17 +01:00
a562d6abc1 Merge #5322
5322: Make sure arroy is using the rayon thread-pool r=dureuill a=Kerollmops

This PR fixes #5249 by ensuring arroy uses the rayon thread pool.

Co-authored-by: Kerollmops <clement@meilisearch.com>
2025-02-06 15:28:47 +00:00
33b67b82e1 fixed rustfmt errors 2025-02-06 09:57:39 -05:00
b7fdd9516c Merge #4970
4970: Create a new export documents meilitool subcommand r=dureuill a=Kerollmops

This subcommand can be useful for extracting documents from an existing database.

Co-authored-by: Kerollmops <clement@meilisearch.com>
2025-02-06 14:48:27 +00:00
5f2a1a4fd1 Skip the documents before fetching them 2025-02-06 15:40:22 +01:00
2b0e17ede0 Make sure arroy is using the rayon thread-pool 2025-02-06 15:28:10 +01:00
37092adc71 Show a bit of progress 2025-02-06 10:37:05 +01:00
86fcad788e Introduce a parameter to skip the first documents 2025-02-06 10:32:50 +01:00
2ea5c57871 Create a new export documents meilitool subcommand based on v1.12 2025-02-06 10:32:39 +01:00
7b4f2aa593 updated code 2025-02-05 22:07:32 -05:00
1fb96d3edb made changes to ensure its not allowing everything through 2025-02-05 20:37:07 -05:00
b63c64395d add a test ensuring the index-scheduler version is set when we cannot write the version file 2025-02-05 18:08:50 +01:00
628119e31e fix the dumpless upgrade potential corruption when upgrading from the v1.12 2025-02-05 18:08:50 +01:00
78867b6852 Merge #5299
Some checks failed
Test suite / Tests almost all features (push) Has been skipped
Test suite / Test disabled tokenization (push) Has been skipped
Test suite / Run tests in debug (push) Failing after 1s
Test suite / Tests on ubuntu-20.04 (push) Failing after 17s
Test suite / Tests on windows-2022 (push) Failing after 25s
Test suite / Run Rustfmt (push) Failing after 1m6s
Test suite / Run Clippy (push) Successful in 8m46s
Test suite / Tests on macos-13 (push) Has been cancelled
5299: Remote federated search r=dureuill a=dureuill

Fixes #4980 

- Usage: https://www.notion.so/meilisearch/API-usage-Remote-search-request-f64fae093abf409e9434c9b9c8fab6f3?pvs=25#1894b06b651f809a9f3dcc6b7189646e

- Changes database format:
  - Adds a new database key: the code is resilient to the case where the key is missing
  - Adds a new experimental feature: the code for experimental features is resilient to this case

Changes:

- Add experimental feature `proxySearch`
- Add network routes
- Dump support for network
- Add proxy search
- Add various tests

Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2025-02-05 16:08:48 +00:00
b21b8e8f30 Remote search tests 2025-02-05 15:03:33 +01:00
4a9e5ae215 mv multi.rs -> multi/mod.rs 2025-02-05 15:03:33 +01:00
6e1865b75b network integration tests 2025-02-05 15:03:32 +01:00
64409a1de7 Test server: clear_api_key 2025-02-05 15:03:32 +01:00
1b81cab782 Add more analytics 2025-02-05 15:03:32 +01:00
88190b5602 Fix tests 2025-02-05 15:03:32 +01:00
0b27aa5138 Multi search reads header to know if it is being proxied 2025-02-05 15:03:32 +01:00
35160788d7 Proxy search requests 2025-02-05 15:03:32 +01:00
c3e5c3ba36 Allow rebuilding a SearchQueryWithIndex from its components 2025-02-05 15:03:16 +01:00
04ac0af54b Add WeightedScoreValues to be able to compare remote scores 2025-02-05 15:03:16 +01:00
9996533364 Make search types serialize and deserialize so that reading from a proxy is possible 2025-02-05 15:03:16 +01:00
3f6b334fc5 Route network 2025-02-05 15:03:16 +01:00
b30e5a7a35 Add new permissions 2025-02-05 15:03:16 +01:00
6d79cb23ba New error codes 2025-02-05 15:03:16 +01:00
e34afca6d7 Support network in dumps 2025-02-05 15:03:16 +01:00
4918b9ffb6 Network stored in DB 2025-02-05 15:03:15 +01:00
73474e7af0 Network types 2025-02-05 15:03:15 +01:00
7ae6dda03f Add new experimental feature 2025-02-05 15:01:04 +01:00
00e764b0d3 Merge #5314
5314: Activate used database size r=irevoire a=ManyTheFish

# Pull Request

make the `/stats` route return the `usedDatabaseSize` corresponding to the size used to store the "real" data in the database and not the disk size used by LMDB


Co-authored-by: ManyTheFish <many@meilisearch.com>
2025-02-05 12:51:57 +00:00
4abf0db0b4 Activate used database size 2025-02-05 13:45:47 +01:00
acc885fd0a Merge #5312
5312: Send the OSS analytics once per day instead of once per hour r=ManyTheFish a=irevoire

# Pull Request

## Related issue
Fixes https://github.com/meilisearch/meilisearch/issues/5311

## What does this PR do?
- If the instance is OSS => we send the analytics once every day
- If the instance is on the meilisearch cloud => we send the analytics every hour


Co-authored-by: Tamo <tamo@meilisearch.com>
2025-02-05 11:15:34 +00:00
61e8cfd4bc Send the OSS analytics once per day instead of once per hour 2025-02-04 15:39:00 +01:00
796acd1aee Merge #5288
Some checks failed
Test suite / Tests almost all features (push) Has been skipped
Test suite / Test disabled tokenization (push) Has been skipped
Test suite / Tests on ubuntu-20.04 (push) Failing after 13s
Test suite / Run tests in debug (push) Failing after 13s
Test suite / Run Clippy (push) Failing after 19s
Test suite / Tests on windows-2022 (push) Failing after 48s
Test suite / Run Rustfmt (push) Successful in 1m28s
Test suite / Tests on macos-13 (push) Has been cancelled
5288: Improve AI logging r=dureuill a=Kerollmops

This PR fixes #5285 and brings the changes from #5233 to simplify debugging indexation and search performance issues related to AI. The following texts can be found in the logs to debug and understand performance issues:

 - `embed_one: search` represents the time we spent waiting for the embedding generation, i.e., OpenAI, local HuggingFace, Ollama.
 - `filtered_universe: search::universe` the time spent filtering the documents.
 - ~`next_bucket: search::vector_sort` is the time spent finding the nearest neighbors (ANNs) in the vector store (arroy), locally~ was being triggered too many times.
 - `indexing::vectors` is the time arroy spends indexing the new vectors for a batch.
 - `documents::extract vectors` and `documents::merge vectors` to see the time spent generating and writing the embeddings.

Co-authored-by: Kerollmops <clement@meilisearch.com>
2025-02-04 10:20:45 +00:00
cc8df5e11f Move back the search-side logging to tracing 2025-02-04 11:16:17 +01:00
ede74ccc42 Merge #5306
Some checks failed
Test suite / Tests on ubuntu-20.04 (push) Failing after 2s
Test suite / Tests almost all features (push) Has been skipped
Test suite / Test disabled tokenization (push) Has been skipped
Test suite / Run tests in debug (push) Failing after 2s
Test suite / Tests on windows-2022 (push) Failing after 24s
Test suite / Run Rustfmt (push) Successful in 1m33s
Test suite / Run Clippy (push) Successful in 6m20s
Test suite / Tests on macos-13 (push) Has been cancelled
5306: Fix internal error when passing `documentTemplateMaxBytes` to a source that doesn't support it r=ManyTheFish a=dureuill

# Pull Request

## Related issue
Fixes #5305 

## What does this PR do?
- add `DOCUMENT_TEMPLATE_MAX_BYTES` to `allowed_sources_for_field` and `allowed_fields_for_source` to prevent a panic


Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2025-02-04 08:46:13 +00:00
e93a5719ef Merge #5293
Some checks failed
Test suite / Tests on ubuntu-20.04 (push) Failing after 1s
Test suite / Tests almost all features (push) Has been skipped
Test suite / Test disabled tokenization (push) Has been skipped
Test suite / Run tests in debug (push) Failing after 14s
Test suite / Tests on windows-2022 (push) Failing after 24s
Test suite / Run Clippy (push) Failing after 31s
Test suite / Run Rustfmt (push) Successful in 1m45s
Run the indexing fuzzer / Setup the action (push) Successful in 1h5m3s
Indexing bench (push) / Run and upload benchmarks (push) Has been cancelled
Benchmarks of indexing (push) / Run and upload benchmarks (push) Has been cancelled
Benchmarks of search for geo (push) / Run and upload benchmarks (push) Has been cancelled
Benchmarks of search for songs (push) / Run and upload benchmarks (push) Has been cancelled
Benchmarks of search for Wikipedia articles (push) / Run and upload benchmarks (push) Has been cancelled
Test suite / Tests on macos-13 (push) Has been cancelled
5293: Support merging update and replacement operations r=irevoire a=Kerollmops

This PR fixes #5286 by modifying the auto-batcher and how we merge documents when preparing them for the new indexer.

## To do
- [x] Make sure we can auto-batch different operation types.
- [x] Make sure the indexer correctly understands and mixes the different kinds.
- [x] Create a test to see if it mixes the documents correctly.
- [x] Modify the auto-batcher tests for the new behavior.


Co-authored-by: Kerollmops <clement@meilisearch.com>
Co-authored-by: Tamo <tamo@meilisearch.com>
2025-02-03 11:28:41 +00:00
d34f0b606c Update crates/milli/src/update/new/document_change.rs 2025-02-03 12:08:52 +01:00
6425451bbc Merge #5303
5303: Bring back changes from v1.12.8 into v1.13.0 r=Kerollmops a=Kerollmops

Fixes #5087 and other problems that you can find in the original PR #5294.

Co-authored-by: Kerollmops <clement@meilisearch.com>
2025-02-03 10:49:26 +00:00
acc400face Support merging update and replacement operations 2025-02-03 11:47:17 +01:00
fe46855462 Merge #5235
5235: Introduce a compaction subcommand in meilitool r=dureuill a=Kerollmops

This PR proposes a change to the meilitool helper, introducing the `compact-index` subcommand to reduce the size of the indexes.

While working on this tool, I discovered that the current heed `Env::copy_to_file` API is not very temp file friendly and [could be improved](https://github.com/meilisearch/heed/issues/306).

Co-authored-by: Kerollmops <clement@meilisearch.com>
Co-authored-by: Clément Renault <clement@meilisearch.com>
2025-02-03 10:11:01 +00:00
8e7d2d25f2 Only open indexes, do not create them 2025-02-03 10:50:38 +01:00
a436534515 Fix test 2025-02-03 10:36:34 +01:00
aa2327591e Add more mixing updates and replacements tests 2025-02-03 10:34:07 +01:00
a6f9e0ddf0 Fix auto batching related tests 2025-02-03 10:34:07 +01:00
60470bb647 Fix the tests to use the new replace/update documents 2025-02-03 10:34:07 +01:00
294e1ba16d Fix functions calls to use the new mixed system 2025-02-03 10:34:06 +01:00
8e6893ddbe Make sure we correctly mix different document operations 2025-02-03 10:34:06 +01:00
d018346f18 Make the auto-batcher batche replacement with updates 2025-02-03 10:34:05 +01:00
2385842537 Fix the imports 2025-02-03 10:29:09 +01:00
6a70c0ec92 Add a link to the experimental feature GitHub discussion 2025-02-03 10:24:53 +01:00
7a9382b115 Better document the rayon limitation condition 2025-02-03 10:24:53 +01:00
62dabeba5f Do not create too many rayon tasks when processing the settings 2025-02-03 10:24:52 +01:00
48812229a9 Remove a log that would log too much 2025-02-03 10:24:52 +01:00
915cc377fb Refine the env variable and the max readers 2025-02-03 10:24:52 +01:00
96544bfa43 add DOCUMENT_TEMPLATE_MAX_BYTES to allowed_sources_for_field and allowed_fields_for_source 2025-02-03 09:59:17 +01:00
09d474da63 Merge #5140
Some checks failed
Test suite / Test disabled tokenization (push) Has been skipped
Test suite / Tests almost all features (push) Has been skipped
Test suite / Run tests in debug (push) Failing after 21s
Test suite / Tests on windows-2022 (push) Failing after 26s
Test suite / Run Clippy (push) Failing after 19s
Test suite / Run Rustfmt (push) Successful in 4m7s
Test suite / Tests on ubuntu-20.04 (push) Failing after 14m22s
Test suite / Tests on macos-13 (push) Has been cancelled
5140: Fix workload inversion r=dureuill a=ManyTheFish

The used assets were inverted between `workloads/hackernews-modify-facet-numbers.json`
and `workloads/hackernews-modify-facet-strings.json`, now fixed.


Co-authored-by: ManyTheFish <many@meilisearch.com>
2025-02-03 08:22:22 +00:00
aaefbfae1f Do not create too many rayon tasks 2025-01-30 16:36:12 +01:00
97e17f52a1 Add more logs to see calls to the embedders 2025-01-30 16:36:12 +01:00
62ced0e3f1 Make cargo fmt happy 2025-01-30 11:09:54 +01:00
71bb24f17e Throw and error when the index is not found
Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2025-01-30 11:07:43 +01:00
c72f114b33 Fix english in the comments
Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2025-01-30 11:07:09 +01:00
8ed39f5de0 Merge #5300
5300: Improve unexpected panic message r=irevoire a=irevoire

# Pull Request

## Related issue
Fixes https://github.com/meilisearch/meilisearch/issues/5273

## What does this PR do?
- When an unexpected panic happens in the index-scheduler we catch it and rebuild an error message from the join_error
- Same when the upgrade index-scheduler fails


Co-authored-by: Tamo <tamo@meilisearch.com>
2025-01-30 09:23:17 +00:00
424c5bde40 Move the embedding computation and extraction log to debug 2025-01-29 16:40:36 +01:00
bdd3005d10 Log the progress when a batch fails 2025-01-29 16:36:23 +01:00
4224edea28 Merge #5177
Some checks failed
Test suite / Tests on ubuntu-20.04 (push) Failing after 0s
Test suite / Tests almost all features (push) Has been skipped
Test suite / Test disabled tokenization (push) Has been skipped
Test suite / Run tests in debug (push) Failing after 2s
Test suite / Tests on windows-2022 (push) Failing after 23s
Test suite / Run Rustfmt (push) Successful in 2m17s
Test suite / Run Clippy (push) Successful in 5m55s
Test suite / Tests on macos-13 (push) Has been cancelled
5177: Debug log  the channel congestion r=Kerollmops a=Kerollmops

This PR displays the congestion of the BBQueue channel and the allocated memory for the channel and the extraction. This information can be beneficial for debugging and noticing slow disks. We show three pieces of information in debug:
- The direct attempts: the number of tries to send something in the BBQueue channel,
- The blocked attempts: the number of unsuccessful attempts that must be retried,
- The congestion: The percentage of blocking attempts. The higher, the slower the receiver and, therefore, the disk.

Co-authored-by: Kerollmops <clement@meilisearch.com>
Co-authored-by: Clément Renault <clement@meilisearch.com>
2025-01-29 15:35:31 +00:00
cb1b7513af Log the memory metrics only once 2025-01-29 15:21:52 +01:00
2f89b8209f Merge #5291
Some checks failed
Test suite / Tests almost all features (push) Has been skipped
Test suite / Test disabled tokenization (push) Has been skipped
Test suite / Tests on ubuntu-20.04 (push) Failing after 12s
Test suite / Run tests in debug (push) Failing after 12s
Test suite / Run Clippy (push) Failing after 21s
Test suite / Run Rustfmt (push) Successful in 1m43s
Test suite / Tests on windows-2022 (push) Failing after 5m39s
Test suite / Tests on macos-13 (push) Has been cancelled
5291: Fix Dotnet tests in sdks-tests.yml r=irevoire a=curquiza



Co-authored-by: Clémentine <clementine@meilisearch.com>
2025-01-29 14:18:48 +00:00
a9d0f4a002 Improve english comments
Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2025-01-29 15:16:40 +01:00
db032079d8 Show indexation allocated memory 2025-01-29 14:21:02 +01:00
a00796c46a Improve the naming in the log message 2025-01-29 14:21:02 +01:00
6112bd8caa Display the channel congestion 2025-01-29 14:21:02 +01:00
cec88cfc29 Measure the bbqueue congestion 2025-01-29 14:21:02 +01:00
8439aeb7cf improve error message in case of unexpected panic while processing tasks 2025-01-29 11:51:06 +01:00
42257eec53 Merge #5272
Some checks failed
Test suite / Tests on ubuntu-20.04 (push) Failing after 1s
Test suite / Tests almost all features (push) Has been skipped
Test suite / Test disabled tokenization (push) Has been skipped
Test suite / Run tests in debug (push) Failing after 0s
Test suite / Tests on windows-2022 (push) Failing after 14s
Test suite / Run Rustfmt (push) Successful in 1m59s
Test suite / Run Clippy (push) Successful in 5m48s
Test suite / Tests on macos-13 (push) Has been cancelled
5272: Fix Batches Deletion and flaky tests r=irevoire a=Kerollmops

- This issue fixes #5263 by removing the batches from the date and time databases.
- It also introduces a new `enqueued_at` field in the batch object to quickly retrieve them in the `batches.enqueued_at` database
- Finally, it probably fixes all the flaky tests of the batches: https://github.com/meilisearch/meilisearch/issues/5256

Co-authored-by: Kerollmops <clement@meilisearch.com>
Co-authored-by: Tamo <tamo@meilisearch.com>
2025-01-28 16:14:11 +00:00
1beda3b9af fix another flaky test 2025-01-28 16:53:50 +01:00
8676e94f5c fix the flaky tests 2025-01-28 16:53:50 +01:00
ef47a0d820 apply review comment 2025-01-28 16:53:50 +01:00
e0f0da57e2 make sure the batches we snapshots actually all contains an enqueued_at 2025-01-28 16:53:50 +01:00
485e3127c7 use the remove_n_tasks_datetime_earlier_than function when updating batches 2025-01-28 16:53:50 +01:00
58f90b70c7 store the enqueued at to eases the batch deletion 2025-01-28 16:53:50 +01:00
508db9020d update the snapshots 2025-01-28 16:53:50 +01:00
6ff37c6fc4 Fix the insta snapshots 2025-01-28 16:53:50 +01:00
f21ae1f5d1 Remove the batch id from the date time databases 2025-01-28 16:53:50 +01:00
483c52f07b Merge #5289
5289: Fix workload files after removing the vectorStore experimental feature r=Kerollmops a=dureuill

Running the bench [currently fails](https://github.com/meilisearch/meilisearch/actions/runs/12990029453) on embedding-related workloads, due to the call to `/experimental-features` that is used to enable the vector store:

In v1.13, `vectorStore` is no longer an experimental feature, so trying to enable it causes a 400

Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2025-01-28 10:28:21 +00:00
f0d7ab81ad Fix Dotnet tests in sdks-tests.yml 2025-01-27 15:37:32 +01:00
f88f415a00 Fix workload files after removing the vectorStore experimental feature 2025-01-27 14:39:28 +01:00
19bc885b07 Fix the milli logo 2025-01-27 14:30:59 +01:00
47f70e3d79 Debug the first vector sort fill buffer 2025-01-27 14:22:29 +01:00
0f8eb3b506 Improve the logs of the search with AI 2025-01-27 14:22:22 +01:00
4a5923a55e log the time arroy took to insert embeddings 2025-01-27 14:22:17 +01:00
de98656ed1 Merge #5210
Some checks failed
Test suite / Tests almost all features (push) Has been skipped
Run the indexing fuzzer / Setup the action (push) Failing after 6s
Test suite / Test disabled tokenization (push) Has been skipped
Test suite / Tests on ubuntu-20.04 (push) Failing after 14s
Test suite / Run tests in debug (push) Failing after 2s
Test suite / Run Rustfmt (push) Failing after 8s
Test suite / Tests on windows-2022 (push) Failing after 20s
Test suite / Run Clippy (push) Successful in 5m48s
Indexing bench (push) / Run and upload benchmarks (push) Has been cancelled
Benchmarks of indexing (push) / Run and upload benchmarks (push) Has been cancelled
Benchmarks of search for geo (push) / Run and upload benchmarks (push) Has been cancelled
Benchmarks of search for songs (push) / Run and upload benchmarks (push) Has been cancelled
Benchmarks of search for Wikipedia articles (push) / Run and upload benchmarks (push) Has been cancelled
Test suite / Tests on macos-13 (push) Has been cancelled
5210: Improve test performance of get_index.rs  r=irevoire a=DerTimonius

# Pull Request

## Related issue
related to #4840

## What does this PR do?
This PR aims to improve the performance of the tests in `get_index.rs`.

There is a small issue though: 
the `list_multiple_indexes` test works great when ran alone, but when running with other tests it fails with a `corrupted task queue` error. I guess this has something to do with using a shared server, but I was not really able to pinpoint the issue.

Also, the `no_index_return_empty_list` does not work a shared server (as there now will always be at least one index on the server) and I was not really sure if rebuilding the whole suite for `get_and_paginate_indexes` should be viable? While waiting for feedback on the issue mentioned above, I'll try to change the `get_and_paginate_indexes` test so that it can use the shared server

## PR checklist
Please check if your PR fulfills the following requirements:
- [x] Does this PR fix an existing issue, or have you listed the changes applied in the PR description (and why they are needed)?
- [x] Have you read the contributing guidelines?
- [x] Have you made sure that the title is accurate and descriptive of the changes?

Thank you so much for contributing to Meilisearch!


Co-authored-by: Timon Jurschitsch <timon.jurschitsch@gmail.com>
Co-authored-by: Timon Jurschitsch <103483059+DerTimonius@users.noreply.github.com>
Co-authored-by: Tamo <tamo@meilisearch.com>
2025-01-27 10:04:08 +00:00
da7469be38 removed unrelated files 2025-01-27 10:35:34 +01:00
df9d10ac44 Merge #5284
5284: Fix [5281] Removed CouldNotUpgrade from error file  r=irevoire a=manojks1999

# Pull Request

## Related issue
Fixes #5281

## What does this PR do?
- ...

## PR checklist
Please check if your PR fulfills the following requirements:
- [ * ] Does this PR fix an existing issue, or have you listed the changes applied in the PR description (and why they are needed)?
- [ * ] Have you read the contributing guidelines?
- [ * ] Have you made sure that the title is accurate and descriptive of the changes?


Co-authored-by: manojks1999 <9743manoj@gmail.com>
2025-01-27 09:26:39 +00:00
528d9d6d8b Removed CouldNotUpgrade from error file 2025-01-26 21:04:57 +05:30
4fb5c39b92 resolve merge conflicts 2025-01-24 14:35:54 +01:00
022205af90 Merge #5279
Some checks failed
Test suite / Tests on ubuntu-20.04 (push) Failing after 0s
Test suite / Tests almost all features (push) Has been skipped
Test suite / Test disabled tokenization (push) Has been skipped
Test suite / Run tests in debug (push) Failing after 2s
Test suite / Tests on windows-2022 (push) Failing after 22s
Test suite / Run Rustfmt (push) Successful in 2m14s
Test suite / Run Clippy (push) Successful in 5m21s
Run the indexing fuzzer / Setup the action (push) Successful in 1h4m54s
Test suite / Tests on macos-13 (push) Has been cancelled
Indexing bench (push) / Run and upload benchmarks (push) Has been cancelled
Benchmarks of indexing (push) / Run and upload benchmarks (push) Has been cancelled
Benchmarks of search for geo (push) / Run and upload benchmarks (push) Has been cancelled
Benchmarks of search for songs (push) / Run and upload benchmarks (push) Has been cancelled
Benchmarks of search for Wikipedia articles (push) / Run and upload benchmarks (push) Has been cancelled
5279: Bring back changes from v1.12.7 into main r=dureuill a=Kerollmops

This PR brings back v1.12.7 into main.

Co-authored-by: Kerollmops <clement@meilisearch.com>
Co-authored-by: Louis Dureuil <louis@meilisearch.com>
Co-authored-by: Clément Renault <clement@meilisearch.com>
2025-01-24 11:48:46 +00:00
50280bf02b Support offline upgrade up to v1.12.7 2025-01-24 12:25:33 +01:00
9b579069df Comment the max grant of the bbqueue
Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2025-01-24 12:18:32 +01:00
f5a4a1c8b2 Give more RAM to bbqueue.
- bbqueue buffers used to have (5% * 2%) / num_threads
- they now have 5% / num_threads
2025-01-24 12:18:32 +01:00
5ab4cdb1f3 Reduce the maximum grant possible we can store in the BBQueue 2025-01-24 12:18:32 +01:00
1f54f07f72 Merge #5264
Some checks failed
Indexing bench (push) / Run and upload benchmarks (push) Waiting to run
Benchmarks of indexing (push) / Run and upload benchmarks (push) Waiting to run
Benchmarks of search for geo (push) / Run and upload benchmarks (push) Waiting to run
Benchmarks of search for songs (push) / Run and upload benchmarks (push) Waiting to run
Benchmarks of search for Wikipedia articles (push) / Run and upload benchmarks (push) Waiting to run
Test suite / Tests almost all features (push) Has been skipped
Test suite / Test disabled tokenization (push) Has been skipped
Test suite / Tests on ubuntu-20.04 (push) Failing after 12s
Test suite / Run tests in debug (push) Failing after 2s
Test suite / Tests on windows-2022 (push) Failing after 21s
Test suite / Run Rustfmt (push) Failing after 8s
Test suite / Run Clippy (push) Successful in 6m30s
Run the indexing fuzzer / Setup the action (push) Successful in 1h5m21s
Test suite / Tests on macos-13 (push) Has been cancelled
5264: Dumpless upgrade r=dureuill a=irevoire

# Pull Request
Usage: https://meilisearch.notion.site/Dumpless-upgrade-fff4b06b651f81f1acafe24d4687b3f7?pvs=74

## Related issue
Fixes https://github.com/meilisearch/meilisearch/issues/5162

## What does this PR do?
- Implement the dumpless upgrade with multiple hooks:
  - In meilisearch directly before the task queue has been opened
  - In the index-scheduler while processing the task
  - In milli while upgrading the indexes
- There is no hook at search/query time to handle the old version of a database. That's left to the next person upgrading a database
- A new special type of task (`upgradeDatabase`) that can be retried has been introduced
- A new experimental cli flag has been introduced
- The version has been upgraded to the v1.13.0 in this PR otherwise it was a lot of useless work to test the dumpless upgrade
- Multiple tests have been introduced

## PR checklist
Please check if your PR fulfills the following requirements:
- [ ] Update the issue template we use for features, mentioning what we should do in case of a database upgrade
- [ ] The experimental feature discussion should be opened and updated in the PR
- [ ] Update the PRD
    - [ ] Add the new error codes
    - [ ] Add the task details
    - [ ] Add the telemetry

## Notes

The new tests introduced are not _that_ slow
![image](https://github.com/user-attachments/assets/c5884540-482f-41eb-97ef-fc995c62d666)



Co-authored-by: Tamo <tamo@meilisearch.com>
2025-01-23 16:22:37 +00:00
73d8a4eace Remove db.snapshot 2025-01-23 17:21:42 +01:00
c1e5897076 Do not assume v1.12 when there is no index-scheduler version 2025-01-23 17:16:53 +01:00
718a98fbbf remove : char from filenames 2025-01-23 17:08:35 +01:00
86bf231d29 Change to meilitool after rebase 2025-01-23 16:59:32 +01:00
182c3f4b80 Write assumed version to the index-scheduler version db when it is missing 2025-01-23 16:51:25 +01:00
c1eba66443 introduce a corruption in the v1.12 data.ms field distribution 2025-01-23 16:51:24 +01:00
7197ced673 fix the bad index version on opening 2025-01-23 16:51:24 +01:00
4f21ee6c66 update the data.ms snapshot 2025-01-23 16:51:24 +01:00
787472453d write the version of the index while upgrading it 2025-01-23 16:51:24 +01:00
8f65f35de9 rewrite part of the index-scheduler upgrade test 2025-01-23 16:51:23 +01:00
c27c923439 introduce a trait to upgrade the indexes 2025-01-23 16:51:23 +01:00
fd5649091d add the upgradeTo field in the details 2025-01-23 16:51:23 +01:00
9a57736773 fix the early exit when rewriting a batch 2025-01-23 16:51:23 +01:00
7740997ea8 reintroduce the unrecoverable error and use it where its supposed to be used 2025-01-23 16:51:22 +01:00
7eb23f73ba add the version to the index-scheduler snapshots + fix a bug when opening an index scheduler for the first time 2025-01-23 16:51:22 +01:00
b9e9fc376a add the version in the index-scheduler 2025-01-23 16:51:22 +01:00
27bf2f1298 remove the empty progress made for the upgrade database 2025-01-23 16:51:22 +01:00
d4d82fbd0c commit the index wtxn before the index-scheduler wtxn 2025-01-23 16:51:21 +01:00
eda09a54da improve the index-scheduler tests 2025-01-23 16:51:21 +01:00
b132d70413 fix the details in all cases 2025-01-23 16:51:21 +01:00
e41ebd3047 expose the number of database in the index-scheduler and rewrite the lib.rs to use the value provided in the options instead of a magic number 2025-01-23 16:51:21 +01:00
705d31e8bd apply all the comments changes 2025-01-23 16:51:21 +01:00
7d95950ce6 fix warning 2025-01-23 16:51:21 +01:00
c6b4c21c23 update the snapshots after the rebase 2025-01-23 16:51:20 +01:00
bf96fdb858 update the cli url 2025-01-23 16:51:20 +01:00
41eeffd88d fmt 2025-01-23 16:51:20 +01:00
1eb9fe8562 remove warnings 2025-01-23 16:51:20 +01:00
bac7a1623a fix the upgrade test 2025-01-23 16:51:19 +01:00
5458850d21 write a test ensuring the index-scheduler is effectively down when the upgrade task fail and try to process it when it restarts. There is a bug when deleting this task 2025-01-23 16:51:19 +01:00
20ac59c946 fix the field distribution when upgrading from the v1_12 2025-01-23 16:51:19 +01:00
cfc1e193b6 update the test with the stats 2025-01-23 16:51:19 +01:00
0cc25c7e4c add a large test importing a data.ms from the v1.12.0 2025-01-23 16:51:18 +01:00
102681e384 starts adding tests and fix the starts of meilisearch 2025-01-23 16:51:18 +01:00
3ef7a478cd move the version check to the task queue 2025-01-23 16:48:32 +01:00
e70ac35e02 fix bugs after rebase 2025-01-23 16:48:32 +01:00
d3654906bf Add the new tasks with most of the job done 2025-01-23 16:48:32 +01:00
e6295c9c5f Introduce a meilitool subcommand to compact an index 2025-01-22 16:37:00 +01:00
b15de68831 Merge #5257
Some checks failed
Indexing bench (push) / Run and upload benchmarks (push) Waiting to run
Benchmarks of indexing (push) / Run and upload benchmarks (push) Waiting to run
Benchmarks of search for geo (push) / Run and upload benchmarks (push) Waiting to run
Benchmarks of search for songs (push) / Run and upload benchmarks (push) Waiting to run
Benchmarks of search for Wikipedia articles (push) / Run and upload benchmarks (push) Waiting to run
Test suite / Tests on ubuntu-20.04 (push) Failing after 0s
Test suite / Tests almost all features (push) Has been skipped
Test suite / Test disabled tokenization (push) Has been skipped
Test suite / Run tests in debug (push) Failing after 11s
Test suite / Run Clippy (push) Failing after 11s
Test suite / Tests on windows-2022 (push) Failing after 43s
Test suite / Run Rustfmt (push) Successful in 2m21s
Run the indexing fuzzer / Setup the action (push) Successful in 1h5m6s
Test suite / Tests on macos-13 (push) Has been cancelled
5257: Fix ollama r=Kerollmops a=dureuill

Fix oversight in ollama embedder 

WIP Integration tests are on branch `ollama-integration-test` and will be added as a future PR.

Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2025-01-22 15:36:15 +00:00
6723700fb9 Merge #5262
5262: Bring back changes from v1.12.4, v1.12.5, and v1.12.6 into main r=dureuill a=Kerollmops

This PR follows [this guideline to bring back changes after we worked on v1.12.4, v1.12.5, and v1.12.6](https://github.com/meilisearch/engine-team/blob/main/resources/meilisearch-release.md#after-the-release-bring-back-changes-to-main).

Co-authored-by: Louis Dureuil <louis@meilisearch.com>
Co-authored-by: ManyTheFish <many@meilisearch.com>
Co-authored-by: Kerollmops <clement@meilisearch.com>
Co-authored-by: Kerollmops <Kerollmops@users.noreply.github.com>
2025-01-22 14:55:02 +00:00
2c099b7c23 Update Cargo.lock again 2025-01-22 15:53:52 +01:00
50fca8fc70 Create update files in new format 2025-01-22 15:51:21 +01:00
b9d92c481b Update version for the next release (v1.12.6) in Cargo.toml 2025-01-22 15:51:20 +01:00
d142c5e432 Do not panic when the facet string is not found 2025-01-22 15:50:43 +01:00
4d4683adb6 Add a test to check the facet casing is good 2025-01-22 15:50:42 +01:00
d6063079af Unify facet strings by their normalized value 2025-01-22 15:50:42 +01:00
2e04ab4737 Replace guards by OR patterns
Co-authored-by: Tamo <tamo@meilisearch.com>
2025-01-22 15:50:42 +01:00
d95384a636 Remove batch ids on export 2025-01-22 15:50:42 +01:00
c0690f5b9e Make offline upgrade more flexible 2025-01-22 15:50:42 +01:00
909d84447d meilitool dumps old-style dump for older DBs, otherwise new-style 2025-01-22 15:50:42 +01:00
2cf57d584e Handle empty payloads 2025-01-22 15:50:42 +01:00
59242b9c4f Fix warnings 2025-01-22 15:50:42 +01:00
6a6212d4e1 Fix warnings 2025-01-22 15:50:42 +01:00
a8006a3750 Change format of update file when importing dump 2025-01-22 15:50:41 +01:00
0e0e462f5b Also fix dump import from meilitool 2025-01-22 15:50:41 +01:00
805531c90d Do not explode on missing content file if the task has no docs 2025-01-22 15:50:41 +01:00
a6470a0c37 Improve error log 2025-01-22 15:50:41 +01:00
8a54f14b8e Demote panic to error log 2025-01-22 15:49:24 +01:00
be5e521cb0 Merge #5271
5271: Update version for the next release (v1.13.0) in Cargo.toml r=curquiza a=meili-bot

⚠️ This PR is automatically generated. Check the new version is the expected one and Cargo.lock has been updated before merging.

Co-authored-by: Kerollmops <Kerollmops@users.noreply.github.com>
2025-01-22 13:04:20 +00:00
60f20119a2 Update version for the next release (v1.13.0) in Cargo.toml 2025-01-22 10:52:47 +00:00
2f257fdc3d fix clippy error 2025-01-21 17:11:29 +01:00
0991cb0de4 change list_multiple_indexes test to single server 2025-01-21 17:01:45 +01:00
4709c638ed Swap implementations of ollama 2025-01-20 22:22:22 +01:00
0776217801 Merge #5234
Some checks failed
Test suite / Tests on ${{ matrix.os }} (macos-13) (push) Waiting to run
Run the indexing fuzzer / Setup the action (push) Failing after 15s
Test suite / Tests on ${{ matrix.os }} (windows-2022) (push) Failing after 12m54s
Indexing bench (push) / Run and upload benchmarks (push) Has been cancelled
Benchmarks of indexing (push) / Run and upload benchmarks (push) Has been cancelled
Benchmarks of search for geo (push) / Run and upload benchmarks (push) Has been cancelled
Benchmarks of search for songs (push) / Run and upload benchmarks (push) Has been cancelled
Benchmarks of search for Wikipedia articles (push) / Run and upload benchmarks (push) Has been cancelled
Look for flaky tests / flaky (push) Failing after 7s
Publish binaries to GitHub release / Check the version validity (push) Successful in 11s
Publish binaries to GitHub release / Publish binary for Linux (push) Failing after 1s
Publish binaries to GitHub release / Publish binary for aarch64 (meilisearch-linux-aarch64, aarch64-unknown-linux-gnu) (push) Failing after 15s
Publish binaries to GitHub release / Publish binary for windows-2022 (push) Failing after 24s
Publish binaries to GitHub release / Publish binary for macos-13 (push) Has been cancelled
Publish binaries to GitHub release / Publish binary for macOS silicon (meilisearch-macos-apple-silicon, aarch64-apple-darwin) (push) Has been cancelled
Test suite / Tests almost all features (push) Failing after 1s
Test suite / Test disabled tokenization (push) Failing after 2s
Test suite / Tests on ubuntu-20.04 (push) Failing after 12s
Test suite / Run tests in debug (push) Failing after 1s
Test suite / Tests on windows-2022 (push) Failing after 26s
Test suite / Run Clippy (push) Failing after 21s
Test suite / Run Rustfmt (push) Successful in 1m37s
Test suite / Tests on macos-13 (push) Has been cancelled
5234: Parse ollama URL to adapt configuration depending on the endpoint r=Kerollmops a=dureuill

# Pull Request

## Related issue
Fixes #5002 

## What does this PR do?
- Parses `url` parameter of `ollama` to recognize supported endpoint and adapt the REST configuration to the recognized endpoint
- Throws a new error if no endpoint is recognized
- Add a test for the various recognized endpoints


Thanks to `@Guikingone` for the original report and PR

Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2025-01-20 09:51:42 +00:00
9eae36ce3e update snapshot 2025-01-16 17:17:06 +01:00
3f501c9b85 Update crates/index-scheduler/src/scheduler/test.rs
Co-authored-by: Tamo <irevoire@protonmail.ch>
2025-01-16 16:13:14 +01:00
c85146524b Merge #5232
Some checks failed
Run the indexing fuzzer / Setup the action (push) Successful in 1h5m2s
Test suite / Tests on ${{ matrix.os }} (macos-13) (push) Waiting to run
Look for flaky tests / flaky (push) Failing after 1s
Indexing bench (push) / Run and upload benchmarks (push) Has been cancelled
Benchmarks of indexing (push) / Run and upload benchmarks (push) Has been cancelled
Benchmarks of search for geo (push) / Run and upload benchmarks (push) Has been cancelled
Benchmarks of search for songs (push) / Run and upload benchmarks (push) Has been cancelled
Benchmarks of search for Wikipedia articles (push) / Run and upload benchmarks (push) Has been cancelled
Publish binaries to GitHub release / Publish binary for Linux (push) Has been skipped
Publish binaries to GitHub release / Publish binary for ${{ matrix.os }} (meilisearch, meilisearch-macos-amd64, macos-13) (push) Has been skipped
Publish binaries to GitHub release / Publish binary for ${{ matrix.os }} (meilisearch.exe, meilisearch-windows-amd64.exe, windows-2022) (push) Has been skipped
Publish binaries to GitHub release / Publish binary for macOS silicon (meilisearch-macos-apple-silicon, aarch64-apple-darwin) (push) Has been skipped
Publish binaries to GitHub release / Publish binary for aarch64 (meilisearch-linux-aarch64, aarch64-unknown-linux-gnu) (push) Has been skipped
Test suite / Tests almost all features (push) Failing after 2s
Test suite / Test disabled tokenization (push) Failing after 0s
Test suite / Run tests in debug (push) Failing after 1s
Test suite / Tests on ubuntu-20.04 (push) Failing after 21s
Test suite / Tests on ${{ matrix.os }} (windows-2022) (push) Failing after 24s
Test suite / Run Rustfmt (push) Failing after 17s
Test suite / Run Clippy (push) Failing after 6m47s
Publish binaries to GitHub release / Check the version validity (push) Failing after 5s
5232: Stabilize vector store feature r=Kerollmops a=dureuill

# Pull Request

## Related issue
Fixes #4733 

## What does this PR do?
- `vectorStore` feature can no longer be set or get from `/experimental-features`
- That feature has been removed, and there is no longer any check for its activation
- Always display `embedders` in the settings, even if empty
- Always hide `_vectors` in documents, unless `retrieveVectors: true`
- Make error codes consistent with the usual nomenclature
- Update tests as needed


Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2025-01-16 11:50:21 +00:00
79d192fb3f implement suggestions 2025-01-16 11:42:12 +01:00
a4ed36f0cc Merge branch 'main' of github.com:meilisearch/meilisearch into chore/update-get-index-test 2025-01-16 11:17:17 +01:00
dddb51a9ca removed trailing whitespace so cargo fmt passes 2025-01-15 13:30:10 -05:00
8f006eeaf3 Merge #5239
Some checks failed
Indexing bench (push) / Run and upload benchmarks (push) Waiting to run
Benchmarks of indexing (push) / Run and upload benchmarks (push) Waiting to run
Benchmarks of search for geo (push) / Run and upload benchmarks (push) Waiting to run
Benchmarks of search for songs (push) / Run and upload benchmarks (push) Waiting to run
Benchmarks of search for Wikipedia articles (push) / Run and upload benchmarks (push) Waiting to run
Test suite / Tests on ${{ matrix.os }} (macos-13) (push) Waiting to run
Test suite / Tests on ${{ matrix.os }} (windows-2022) (push) Failing after 27s
Test suite / Tests on ubuntu-20.04 (push) Failing after 20s
Test suite / Tests almost all features (push) Has been skipped
Test suite / Test disabled tokenization (push) Has been skipped
Test suite / Run tests in debug (push) Failing after 26s
Test suite / Run Clippy (push) Successful in 7m55s
Test suite / Run Rustfmt (push) Successful in 2m21s
Run the indexing fuzzer / Setup the action (push) Successful in 1h6m18s
5239: Fix corrupted task queue errors on index creation r=dureuill a=irevoire

# Pull Request

## Related issue
Fixes https://github.com/meilisearch/meilisearch/issues/5238

## What does this PR do?
- Add a test that reproduces the issue and ensure we never introduce the bug again
- Fix the bug by storing the stats of the index upon creation instead of waiting for the update index task to do it


Co-authored-by: Tamo <tamo@meilisearch.com>
2025-01-15 12:45:25 +00:00
445e5aff02 fix the corruption 2025-01-15 12:38:40 +01:00
234d0c360f Add a test reproducing the issue 2025-01-15 12:29:56 +01:00
cd181b36c3 all test cases now passing 2025-01-14 17:50:31 -05:00
4cfe0dbdd8 Merge #5237
Some checks failed
Indexing bench (push) / Run and upload benchmarks (push) Waiting to run
Benchmarks of indexing (push) / Run and upload benchmarks (push) Waiting to run
Benchmarks of search for geo (push) / Run and upload benchmarks (push) Waiting to run
Benchmarks of search for songs (push) / Run and upload benchmarks (push) Waiting to run
Benchmarks of search for Wikipedia articles (push) / Run and upload benchmarks (push) Waiting to run
Test suite / Tests on ${{ matrix.os }} (macos-13) (push) Waiting to run
Test suite / Tests on ubuntu-20.04 (push) Failing after 13s
Test suite / Tests almost all features (push) Has been skipped
Test suite / Test disabled tokenization (push) Has been skipped
Test suite / Tests on ${{ matrix.os }} (windows-2022) (push) Failing after 26s
Test suite / Run tests in debug (push) Failing after 11s
Test suite / Run Clippy (push) Successful in 7m58s
Test suite / Run Rustfmt (push) Successful in 2m44s
Run the indexing fuzzer / Setup the action (push) Successful in 1h5m43s
5237: Bring back v1.12.3 changes into main r=irevoire a=dureuill

This brings back the (already reviewed) changes of v1.12.3 into main:

1. fix the field distribution issue
2. improve the error message when trying to delete a non-existing key

Co-authored-by: Louis Dureuil <louis@meilisearch.com>
Co-authored-by: Kerollmops <clement@meilisearch.com>
2025-01-14 14:04:48 +00:00
89a4ac92eb fix after rebase 2025-01-14 14:08:56 +01:00
deb90ff573 Fix tests 2025-01-14 13:55:34 +01:00
0c10063a87 PATCH experimental-features also returns the route type rather than internal type 2025-01-14 13:55:34 +01:00
87ea080c10 Fully remove vector store feature 2025-01-14 13:55:34 +01:00
6d62fa061b Fix tests 2025-01-14 13:55:34 +01:00
de6cd3ac01 Consistent error codes 2025-01-14 13:55:34 +01:00
cb8f033130 Fix tests 2025-01-14 13:55:34 +01:00
03097e65e8 Always display embedders setting 2025-01-14 13:55:34 +01:00
c32bec338f Fix tests 2025-01-14 13:55:33 +01:00
73d3d286d9 Serialize features as camelCase 2025-01-14 13:53:53 +01:00
29eeb84ce3 Add --experimental-disable-vector-store CLI flag 2025-01-14 13:53:53 +01:00
d78951feb7 vectorStore stabilization
- `vectorStore` feature is always enabled
- `vectorStore` can no longer be set in the `/experimental-features` PATCH route
- `vectorStore` status is no longer returned in the `/experimental-features` GET route
2025-01-14 13:53:53 +01:00
63c8cbae5b Improve the panic message when deleting an unknown entry 2025-01-14 10:31:44 +01:00
72ded27e98 Update after review 2025-01-14 10:24:50 +01:00
c25781f720 Skip rebuilding field distribution if not coming from v1.12 2025-01-14 10:24:28 +01:00
c3b18fede9 write stats after rebuilding facet distribution 2025-01-14 10:24:27 +01:00
4070895a21 Add support to upgrade to v1.12.3 in meilitool 2025-01-14 10:24:27 +01:00
a21711f473 Fix test 2025-01-14 10:23:59 +01:00
f0ec8cbffe Add currently failing test 2025-01-14 10:23:15 +01:00
e568dbbabb Merge #5182
Some checks failed
Indexing bench (push) / Run and upload benchmarks (push) Waiting to run
Benchmarks of indexing (push) / Run and upload benchmarks (push) Waiting to run
Benchmarks of search for geo (push) / Run and upload benchmarks (push) Waiting to run
Benchmarks of search for songs (push) / Run and upload benchmarks (push) Waiting to run
Benchmarks of search for Wikipedia articles (push) / Run and upload benchmarks (push) Waiting to run
Test suite / Tests on ${{ matrix.os }} (macos-13) (push) Waiting to run
Test suite / Tests on ubuntu-20.04 (push) Failing after 11s
Test suite / Tests almost all features (push) Has been skipped
Test suite / Tests on ${{ matrix.os }} (windows-2022) (push) Failing after 13s
Test suite / Test disabled tokenization (push) Has been skipped
Test suite / Run tests in debug (push) Failing after 9s
Test suite / Run Clippy (push) Successful in 6m11s
Test suite / Run Rustfmt (push) Successful in 2m38s
Run the indexing fuzzer / Setup the action (push) Successful in 1h5m8s
5182: Remove hard coded task ids to prevent flaky tests r=irevoire a=mhmoudr

# Pull Request

## Related issue
Fixes partial #4840

## What does this PR do?
- Mainly scan the test code for any hard coded task Id and replace it by the returned task Id once the action or task is performed on an index.
- PS: _PR is not split by files as it has one theme applied to all tests which make it easy to review_ 


## PR checklist
Please check if your PR fulfills the following requirements:
- [x] Does this PR fix an existing issue, or have you listed the changes applied in the PR description (and why they are needed)?
- [x] Have you read the contributing guidelines?
- [x] Have you made sure that the title is accurate and descriptive of the changes?

Thank you so much for contributing to Meilisearch!


Co-authored-by: Mahmoud Rawas <mhmoudr@gmail.com>
Co-authored-by: Tamo <tamo@meilisearch.com>
2025-01-13 15:18:55 +00:00
8ff15b3dfb fix the tests 2025-01-13 16:17:50 +01:00
247eaed872 Merge #5221
Some checks failed
Test suite / Tests on ${{ matrix.os }} (macos-13) (push) Waiting to run
Test suite / Tests on ${{ matrix.os }} (windows-2022) (push) Failing after 1m4s
Test suite / Tests on ubuntu-20.04 (push) Failing after 27s
Test suite / Tests almost all features (push) Has been skipped
Test suite / Test disabled tokenization (push) Has been skipped
Test suite / Run tests in debug (push) Failing after 20s
Test suite / Run Clippy (push) Successful in 8m45s
Test suite / Run Rustfmt (push) Successful in 2m31s
5221: Merge bitmaps by using `Extend::extend` r=Kerollmops a=Kerollmops

This PR tries to speed up the merging of bitmaps by using [the new `Extend::extend` implementation](https://github.com/RoaringBitmap/roaring-rs/pull/306).

Co-authored-by: Clément Renault <clement@meilisearch.com>
2025-01-13 13:43:28 +00:00
8b1fcfd7f8 Parse ollama URL to adapt configuration depending on the endpoint 2025-01-13 14:34:11 +01:00
45f289488d Add test for url checks on ollama embedders 2025-01-13 14:33:30 +01:00
b0ef7701ae Merge #5231
Some checks failed
Indexing bench (push) / Run and upload benchmarks (push) Waiting to run
Benchmarks of indexing (push) / Run and upload benchmarks (push) Waiting to run
Benchmarks of search for geo (push) / Run and upload benchmarks (push) Waiting to run
Benchmarks of search for songs (push) / Run and upload benchmarks (push) Waiting to run
Benchmarks of search for Wikipedia articles (push) / Run and upload benchmarks (push) Waiting to run
Run the indexing fuzzer / Setup the action (push) Successful in 1h6m28s
Publish binaries to GitHub release / Publish binary for ${{ matrix.os }} (meilisearch, meilisearch-macos-amd64, macos-13) (push) Waiting to run
Publish binaries to GitHub release / Publish binary for macOS silicon (meilisearch-macos-apple-silicon, aarch64-apple-darwin) (push) Waiting to run
Test suite / Tests on ${{ matrix.os }} (macos-13) (push) Waiting to run
Publish binaries to GitHub release / Check the version validity (push) Successful in 8s
Publish binaries to GitHub release / Publish binary for Linux (push) Failing after 8s
Publish binaries to GitHub release / Publish binary for ${{ matrix.os }} (meilisearch.exe, meilisearch-windows-amd64.exe, windows-2022) (push) Failing after 27s
Publish binaries to GitHub release / Publish binary for aarch64 (meilisearch-linux-aarch64, aarch64-unknown-linux-gnu) (push) Failing after 10s
Test suite / Tests on ${{ matrix.os }} (windows-2022) (push) Failing after 19s
Test suite / Tests on ubuntu-20.04 (push) Failing after 26s
Test suite / Tests almost all features (push) Failing after 25s
Test suite / Test disabled tokenization (push) Failing after 26s
Test suite / Run tests in debug (push) Failing after 47s
Test suite / Run Rustfmt (push) Successful in 4m40s
Test suite / Run Clippy (push) Successful in 11m19s
5231: Improve openapi r=dureuill a=irevoire

# Pull Request

## Related issue
Fixes https://github.com/meilisearch/open-api/issues/17
Fixes https://github.com/meilisearch/open-api/issues/13
Fixes https://github.com/meilisearch/open-api/issues/14
Fixes https://github.com/meilisearch/open-api/issues/16


Co-authored-by: Tamo <tamo@meilisearch.com>
2025-01-13 13:04:09 +00:00
c9fb6c48b8 Update crates/meilisearch/src/routes/features.rs
Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2025-01-13 13:43:38 +01:00
0de34aa8fa avoid generating the same operationId 2025-01-13 12:36:22 +01:00
6bfcad4b05 Add the server property 2025-01-13 12:13:39 +01:00
67a0c9fff8 remove trailing slash in path 2025-01-13 11:55:59 +01:00
cc4aca78c4 Merge #5220
5220: Merge back changes of v1.12.2 in main r=dureuill a=dureuill



Co-authored-by: curquiza <curquiza@users.noreply.github.com>
Co-authored-by: Tamo <tamo@meilisearch.com>
Co-authored-by: dureuill <dureuill@users.noreply.github.com>
Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2025-01-13 10:54:36 +00:00
5c7fa9b924 fix the examples of the experimental-feature route 2025-01-13 11:40:57 +01:00
9837de271d fixed majority of errors 2025-01-10 15:31:45 -05:00
fd251c37bb Merge #5225
Some checks failed
Test suite / Run tests in debug (push) Failing after 10s
Test suite / Tests on ubuntu-20.04 (push) Failing after 11s
Test suite / Tests almost all features (push) Has been skipped
Test suite / Test disabled tokenization (push) Has been skipped
Test suite / Tests on ${{ matrix.os }} (windows-2022) (push) Failing after 53s
Test suite / Run Clippy (push) Successful in 6m26s
Test suite / Run Rustfmt (push) Successful in 1m38s
Run the indexing fuzzer / Setup the action (push) Successful in 1h5m31s
Indexing bench (push) / Run and upload benchmarks (push) Has been cancelled
Benchmarks of indexing (push) / Run and upload benchmarks (push) Has been cancelled
Benchmarks of search for geo (push) / Run and upload benchmarks (push) Has been cancelled
Benchmarks of search for songs (push) / Run and upload benchmarks (push) Has been cancelled
Benchmarks of search for Wikipedia articles (push) / Run and upload benchmarks (push) Has been cancelled
Test suite / Tests on ${{ matrix.os }} (macos-13) (push) Has been cancelled
5225: Update license for 2025 r=curquiza a=meili-bot

_This PR is auto-generated._


Co-authored-by: meili-bot <74670311+meili-bot@users.noreply.github.com>
2025-01-10 13:28:49 +00:00
adb6bca950 Update LICENSE 2025-01-10 14:19:54 +01:00
42854c0bca Merge #5223
Some checks failed
Indexing bench (push) / Run and upload benchmarks (push) Waiting to run
Benchmarks of indexing (push) / Run and upload benchmarks (push) Waiting to run
Benchmarks of search for geo (push) / Run and upload benchmarks (push) Waiting to run
Benchmarks of search for songs (push) / Run and upload benchmarks (push) Waiting to run
Benchmarks of search for Wikipedia articles (push) / Run and upload benchmarks (push) Waiting to run
Test suite / Tests on ${{ matrix.os }} (macos-13) (push) Waiting to run
Test suite / Tests on ${{ matrix.os }} (windows-2022) (push) Failing after 12s
Test suite / Tests on ubuntu-20.04 (push) Failing after 11s
Test suite / Tests almost all features (push) Has been skipped
Test suite / Test disabled tokenization (push) Has been skipped
Test suite / Run tests in debug (push) Failing after 9s
Test suite / Run Clippy (push) Successful in 6m16s
Test suite / Run Rustfmt (push) Successful in 1m39s
Run the indexing fuzzer / Setup the action (push) Successful in 1h5m11s
5223: Limit batched tasks total size r=curquiza a=Kerollmops

Introduce a new engine parameter (env and config, too) to limit the maximum payload size processed by the engine in batches. You can [review the Discussion and usage on GitHub](https://github.com/orgs/meilisearch/discussions/801).

Co-authored-by: Clément Renault <clement@meilisearch.com>
2025-01-09 16:13:17 +00:00
d0bdff7b7b Make the batched tasks size limit effectively work 2025-01-09 12:06:28 +01:00
8650ee66c1 Introduce the new experimental-limit-batched-tasks-total-size argument 2025-01-09 12:06:28 +01:00
377fa09cb7 Merge pull request #5218 from meilisearch/upgrade-dependencies
Some checks failed
Indexing bench (push) / Run and upload benchmarks (push) Waiting to run
Benchmarks of indexing (push) / Run and upload benchmarks (push) Waiting to run
Benchmarks of search for geo (push) / Run and upload benchmarks (push) Waiting to run
Benchmarks of search for songs (push) / Run and upload benchmarks (push) Waiting to run
Benchmarks of search for Wikipedia articles (push) / Run and upload benchmarks (push) Waiting to run
Run the indexing fuzzer / Setup the action (push) Successful in 1h5m17s
Publish binaries to GitHub release / Publish binary for ${{ matrix.os }} (meilisearch, meilisearch-macos-amd64, macos-13) (push) Waiting to run
Publish binaries to GitHub release / Publish binary for macOS silicon (meilisearch-macos-apple-silicon, aarch64-apple-darwin) (push) Waiting to run
Test suite / Tests on ${{ matrix.os }} (macos-13) (push) Waiting to run
Look for flaky tests / flaky (push) Failing after 9s
Publish binaries to GitHub release / Check the version validity (push) Successful in 8s
Publish binaries to GitHub release / Publish binary for Linux (push) Failing after 8s
Publish binaries to GitHub release / Publish binary for aarch64 (meilisearch-linux-aarch64, aarch64-unknown-linux-gnu) (push) Failing after 9s
Publish binaries to GitHub release / Publish binary for ${{ matrix.os }} (meilisearch.exe, meilisearch-windows-amd64.exe, windows-2022) (push) Failing after 28s
Test suite / Tests on ubuntu-20.04 (push) Failing after 10s
Test suite / Tests on ${{ matrix.os }} (windows-2022) (push) Failing after 13s
Test suite / Tests almost all features (push) Failing after 8s
Test suite / Test disabled tokenization (push) Failing after 7s
Test suite / Run tests in debug (push) Failing after 9s
Test suite / Run Rustfmt (push) Successful in 2m26s
Test suite / Run Clippy (push) Successful in 5m40s
Upgrade dependencies
2025-01-09 11:46:44 +01:00
00a03742ff Prefer using extend when merging bitmaps than unions (less allocations) 2025-01-09 10:42:38 +01:00
d11e359244 When spilling on the next fid, no longer ignore children 2025-01-09 10:36:38 +01:00
09d45439c7 Check valid_facet_value as part of a filter of the iterator 2025-01-09 10:36:38 +01:00
5d92da0c73 No longer ignore the first child without parent 2025-01-09 10:36:38 +01:00
677bb39e73 Modernize valid_lmdb_key 2025-01-09 10:36:38 +01:00
85ea77de0b Switch to an iterative algorithm for find_changed_parents 2025-01-09 10:36:38 +01:00
03317be0bd Update after review 2025-01-09 10:36:38 +01:00
4aa7c8f7b1 Remove unused FacetFieldIdOperation 2025-01-09 10:36:37 +01:00
ce57a342a3 center groups 2025-01-09 10:36:37 +01:00
1cc6cd78e0 Fix uselessly deep stack trace 2025-01-09 10:36:37 +01:00
c204afdc79 Update snapshot 2025-01-09 10:36:37 +01:00
c14967eeac Use new incremental facet indexing and enable sanity checks in debug 2025-01-09 10:36:35 +01:00
f38db86120 Add new incremental facet indexing 2025-01-09 10:24:36 +01:00
50b155fa2d add valid_facet_value utility function 2025-01-09 10:24:36 +01:00
a533c8e041 Add sanity checks for facet values 2025-01-09 10:24:36 +01:00
e5595a05df Update version for the next release (v1.12.2) in Cargo.toml 2025-01-09 10:24:36 +01:00
908adee6fc Fix the addition of empty payload 2025-01-09 10:24:36 +01:00
7b3353252f update the test to ensure it works when specifying the primary key or not: it doesn't work 2025-01-09 10:24:35 +01:00
647a10bf18 stop skipping empty tasks when adding documents 2025-01-09 10:24:34 +01:00
f2141a894a Bump roaring to v0.10.10 2025-01-09 10:21:05 +01:00
08c332980b add a test reproducing the bug 2025-01-09 10:12:12 +01:00
7b57a44b5a Update version for the next release (v1.12.1) in Cargo.toml 2025-01-09 10:12:12 +01:00
fe2c0cc3d5 Bump rust version to v1.81 2025-01-09 09:47:08 +01:00
eecf4c53e7 updated changes 2025-01-08 15:10:09 -05:00
cf4c3c287b Make rustfmt happy 2025-01-08 18:24:39 +01:00
71e5605daa Make clippy happy 2025-01-08 18:24:39 +01:00
890a5c64dd Merge #5216
Some checks failed
Indexing bench (push) / Run and upload benchmarks (push) Waiting to run
Benchmarks of indexing (push) / Run and upload benchmarks (push) Waiting to run
Benchmarks of search for geo (push) / Run and upload benchmarks (push) Waiting to run
Benchmarks of search for songs (push) / Run and upload benchmarks (push) Waiting to run
Benchmarks of search for Wikipedia articles (push) / Run and upload benchmarks (push) Waiting to run
Test suite / Tests on ${{ matrix.os }} (macos-13) (push) Waiting to run
Test suite / Tests on ${{ matrix.os }} (windows-2022) (push) Failing after 22s
Test suite / Tests on ubuntu-20.04 (push) Failing after 11s
Test suite / Tests almost all features (push) Failing after 9s
Test suite / Test disabled tokenization (push) Failing after 9s
Test suite / Run tests in debug (push) Failing after 9s
Run the indexing fuzzer / Setup the action (push) Successful in 1h5m27s
Test suite / Run Rustfmt (push) Successful in 1m37s
Test suite / Run Clippy (push) Successful in 5m58s
5216: Add support for GITHUB_TOKEN authentication in installation script r=curquiza a=Sherlouk

# Pull Request

## What does this PR do?
This tweaks the install script to support detection of a "GITHUB_TOKEN" variable. This is well documented [here](https://docs.github.com/en/actions/security-for-github-actions/security-guides/automatic-token-authentication) but is useful for GitHub Actions workflows, reducing the need for users to maintain a separate PAT token. This should be more reliable.

Note: these changes have been tested on the Swift project: https://github.com/meilisearch/meilisearch-swift/pull/464.

## PR checklist
Please check if your PR fulfills the following requirements:
- [x] Does this PR fix an existing issue, or have you listed the changes applied in the PR description (and why they are needed)?
- [x] Have you read the contributing guidelines?
- [x] Have you made sure that the title is accurate and descriptive of the changes?

Thank you so much for contributing to Meilisearch!


Co-authored-by: James Sherlock <15193942+Sherlouk@users.noreply.github.com>
2025-01-08 17:15:10 +00:00
0ee4671a91 Fix after upgrading candle 2025-01-08 15:59:56 +01:00
68333424c6 Remove a useless script test 2025-01-08 15:59:43 +01:00
d4529d8c83 Fix after upgrading sysinfo 2025-01-08 15:59:30 +01:00
5e8144b0e1 Remove fuzzing feature 2025-01-08 15:59:03 +01:00
3e3695445f Fix after upgrading thiserror 2025-01-08 15:58:32 +01:00
091f989b72 Upgrade incompatible dependencies 2025-01-08 15:58:03 +01:00
dd28a3fd5a Bump the minimal version to 1.81 as we use std LazyLock 2025-01-08 15:31:24 +01:00
6f24b438e0 Ignore benchmarks folder 2025-01-08 15:31:24 +01:00
48a9ad4c17 Fix insta to 1.39 2025-01-08 15:18:08 +01:00
b997039a91 Upgrade compatible dependencies 2025-01-08 13:52:14 +01:00
0e6b6bd130 Merge #4867
Some checks failed
Indexing bench (push) / Run and upload benchmarks (push) Waiting to run
Benchmarks of indexing (push) / Run and upload benchmarks (push) Waiting to run
Benchmarks of search for geo (push) / Run and upload benchmarks (push) Waiting to run
Benchmarks of search for songs (push) / Run and upload benchmarks (push) Waiting to run
Benchmarks of search for Wikipedia articles (push) / Run and upload benchmarks (push) Waiting to run
Test suite / Tests on ${{ matrix.os }} (macos-13) (push) Waiting to run
Test suite / Tests on ubuntu-20.04 (push) Failing after 12s
Test suite / Tests almost all features (push) Has been skipped
Test suite / Test disabled tokenization (push) Has been skipped
Test suite / Run tests in debug (push) Failing after 10s
Test suite / Tests on ${{ matrix.os }} (windows-2022) (push) Failing after 48s
Test suite / Run Clippy (push) Successful in 7m0s
Test suite / Run Rustfmt (push) Successful in 1m55s
Run the indexing fuzzer / Setup the action (push) Successful in 1h5m59s
4867: Autogenerate the openAPI spec r=irevoire a=irevoire

# Pull Request

## Related issue
Fixes https://github.com/meilisearch/meilisearch/issues/5073

## What does this PR do?
- Introduce utoipa and the auto-generation of the openAPI file
- Introduce the scalar swagger when the `swagger` feature flag is enabled.

Generating the openAPI file takes between 15 and 20ms at startup time on my computer. That could be an issue if we plan to stabilize the feature.

Co-authored-by: Tamo <tamo@meilisearch.com>
2025-01-08 09:45:50 +00:00
b1b0b0b67c Merge #5168
5168: Refactor indexer r=ManyTheFish a=dureuill

# Pull Request

Split the indexer mod into multiple submodules. 

This restores the ability of rustfmt to format the file 🎉

Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2025-01-08 09:12:49 +00:00
27155f845c adding back a missing task wait. 2025-01-08 15:36:10 +11:00
c6f14279d7 remove unused imports. 2025-01-08 15:11:34 +11:00
fa15356209 Add support for GITHUB_TOKEN authentication 2025-01-07 20:21:00 +00:00
99f5e09a79 fix the tests 2025-01-07 16:42:53 +01:00
a8ef6f08e0 Update crates/meilisearch-types/src/settings.rs
Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2025-01-07 16:31:39 +01:00
ae5a04e85c apply review comments 2025-01-07 16:30:14 +01:00
8ebfc9fa92 Update crates/meilisearch-types/src/settings.rs
Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2025-01-07 16:30:14 +01:00
21026f0ca8 move the swagger behind a feature flag 2025-01-07 16:30:14 +01:00
e579554c84 fmt 2025-01-07 16:30:12 +01:00
ff49250c1a remove useless doc 2025-01-07 16:29:09 +01:00
8b95c6ae56 improve the description of all the settings route 2025-01-07 16:29:09 +01:00
28162759a4 fix imports after rebase 2025-01-07 16:29:08 +01:00
dd128656cb fix all the tests 2025-01-07 16:28:12 +01:00
4456df5a46 fix some tests 2025-01-07 16:28:11 +01:00
0b104b3efa fix the list indexes 2025-01-07 16:26:06 +01:00
ac944f0960 review all the return type 2025-01-07 16:26:06 +01:00
5f55e88484 review all the parameters and tags 2025-01-07 16:26:06 +01:00
aab6ffec30 fix and review all the documents route 2025-01-07 16:26:06 +01:00
1dd33af8a3 add the batches 2025-01-07 16:26:06 +01:00
8a2a1e4d27 add the experimental features route 2025-01-07 16:26:06 +01:00
e2686c0fce add the swap indexes 2025-01-07 16:26:06 +01:00
9473a2a6ca add the multi-search 2025-01-07 16:26:06 +01:00
11ce3b9636 fix the settings 2025-01-07 16:26:06 +01:00
0bf4157a75 try my best to make the sub-settings routes works, it doesn't 2025-01-07 16:26:06 +01:00
4eaa626bca add the similar route 2025-01-07 16:26:06 +01:00
668b26b641 add the facet search 2025-01-07 16:26:06 +01:00
04e4586fb3 add the searches route and fix a few broken things 2025-01-07 16:26:06 +01:00
78f6f22a80 implement all the /indexes/documents route 2025-01-07 16:26:06 +01:00
13afdaf393 finish rebase and update utoipa to the latest version 2025-01-07 16:26:06 +01:00
742d0ee531 Implements the get and delete tasks route 2025-01-07 16:26:04 +01:00
4275833bab Rename compute.rs to post_process.rs 2025-01-07 15:31:20 +01:00
de7f8c4406 refactor indexer mod 2025-01-07 15:29:02 +01:00
f00a285a6d Merge #5199
Some checks failed
Indexing bench (push) / Run and upload benchmarks (push) Waiting to run
Benchmarks of indexing (push) / Run and upload benchmarks (push) Waiting to run
Benchmarks of search for geo (push) / Run and upload benchmarks (push) Waiting to run
Benchmarks of search for songs (push) / Run and upload benchmarks (push) Waiting to run
Benchmarks of search for Wikipedia articles (push) / Run and upload benchmarks (push) Waiting to run
Test suite / Tests on ${{ matrix.os }} (macos-13) (push) Waiting to run
Test suite / Tests on ubuntu-20.04 (push) Failing after 24s
Test suite / Tests almost all features (push) Has been skipped
Test suite / Test disabled tokenization (push) Has been skipped
Test suite / Tests on ${{ matrix.os }} (windows-2022) (push) Failing after 33s
Test suite / Run tests in debug (push) Failing after 9s
Test suite / Run Clippy (push) Successful in 6m27s
Test suite / Run Rustfmt (push) Successful in 1m44s
Run the indexing fuzzer / Setup the action (push) Successful in 1h5m26s
5199: Refactorize the index-scheduler r=Kerollmops a=irevoire

# Pull Request

## Related issue
Fixes https://github.com/meilisearch/meilisearch/issues/5115

## What does this PR do?
- Extract all the « task/batch queue » part of the `lib.rs` to a `queue` module containing:
  - The batches, and its test in another file
  - The tasks, and its test in another file
- Extract all the « scheduler » stuff to another module 
  - One file for the batch creation
  - One file for the autobatcher
  - One file for the batch process
  - The tests are a bit messier and are made by features (i.e.: All the embedder tests in one file)
- The average size of the files is around 500 loc now and R-A is way faster


Co-authored-by: Tamo <tamo@meilisearch.com>
2025-01-07 14:05:21 +00:00
43bb02e7b4 split the autobatcher in two 2025-01-07 15:02:03 +01:00
56fd4ee9bd Merge #5211
Some checks failed
Indexing bench (push) / Run and upload benchmarks (push) Waiting to run
Benchmarks of indexing (push) / Run and upload benchmarks (push) Waiting to run
Benchmarks of search for geo (push) / Run and upload benchmarks (push) Waiting to run
Benchmarks of search for songs (push) / Run and upload benchmarks (push) Waiting to run
Benchmarks of search for Wikipedia articles (push) / Run and upload benchmarks (push) Waiting to run
Test suite / Tests on ${{ matrix.os }} (macos-13) (push) Waiting to run
Test suite / Tests on ${{ matrix.os }} (windows-2022) (push) Failing after 46s
Test suite / Tests on ubuntu-20.04 (push) Failing after 10s
Test suite / Tests almost all features (push) Has been skipped
Test suite / Test disabled tokenization (push) Has been skipped
Test suite / Run tests in debug (push) Failing after 9s
Test suite / Run Clippy (push) Successful in 6m53s
Test suite / Run Rustfmt (push) Successful in 1m40s
Run the indexing fuzzer / Setup the action (push) Successful in 1h6m8s
5211: Update README.md with AI integrations (langchain & MCP) r=ManyTheFish a=tpayet

With AI integrations 🤖

# Pull Request

## Related issue
None

## What does this PR do?
- Update the README with AI integrations

## PR checklist
Please check if your PR fulfills the following requirements:
- [x] Does this PR fix an existing issue, or have you listed the changes applied in the PR description (and why they are needed)?
- [x] Have you read the contributing guidelines?
- [x] Have you made sure that the title is accurate and descriptive of the changes?

Thank you so much for contributing to Meilisearch!


Co-authored-by: Thomas Payet <thomas@meilisearch.com>
2025-01-07 08:39:09 +00:00
0625d08e4e adding a function toextract batch_uid from json and modifying the get_batch interface for easier call - did not work, so falling back to hard coded batch id for now. 2025-01-07 12:07:33 +11:00
9269086fda fixing a rebase issue 2025-01-07 11:48:09 +11:00
98e3ecb86b Format fixes after running: cargo +nightly fmt 2025-01-07 11:16:37 +11:00
9af9e73c45 Update README.md
With AI integrations 🤖
2025-01-06 18:02:30 +01:00
4b107b17cb test: improve performance of get_index.rs 2025-01-06 17:38:44 +01:00
cb82b0798a Split the index-scheduler in ~500 loc modules 2025-01-06 14:08:26 +01:00
7f1071943e Merge #5198
Some checks failed
Test suite / Tests on ubuntu-20.04 (push) Failing after 24s
Test suite / Tests almost all features (push) Has been skipped
Test suite / Test disabled tokenization (push) Has been skipped
Test suite / Run tests in debug (push) Failing after 10s
Test suite / Tests on ${{ matrix.os }} (windows-2022) (push) Failing after 58s
Test suite / Run Clippy (push) Successful in 6m41s
Test suite / Run Rustfmt (push) Successful in 1m42s
Run the indexing fuzzer / Setup the action (push) Successful in 1h5m19s
Indexing bench (push) / Run and upload benchmarks (push) Has been cancelled
Benchmarks of indexing (push) / Run and upload benchmarks (push) Has been cancelled
Benchmarks of search for geo (push) / Run and upload benchmarks (push) Has been cancelled
Benchmarks of search for songs (push) / Run and upload benchmarks (push) Has been cancelled
Benchmarks of search for Wikipedia articles (push) / Run and upload benchmarks (push) Has been cancelled
Test suite / Tests on ${{ matrix.os }} (macos-13) (push) Has been cancelled
5198: Bump Swatinem/rust-cache from 2.7.5 to 2.7.7 r=curquiza a=dependabot[bot]

Bumps [Swatinem/rust-cache](https://github.com/swatinem/rust-cache) from 2.7.5 to 2.7.7.
<details>
<summary>Release notes</summary>
<p><em>Sourced from <a href="https://github.com/swatinem/rust-cache/releases">Swatinem/rust-cache's releases</a>.</em></p>
<blockquote>
<h2>v2.7.7</h2>
<p><strong>Full Changelog</strong>: <a href="https://github.com/Swatinem/rust-cache/compare/v2.7.6...v2.7.7">https://github.com/Swatinem/rust-cache/compare/v2.7.6...v2.7.7</a></p>
<h2>v2.7.6</h2>
<h2>What's Changed</h2>
<ul>
<li>Updated artifact upload action to v4 by <a href="https://github.com/guylamar2006"><code>`@​guylamar2006</code></a>` in <a href="https://redirect.github.com/Swatinem/rust-cache/pull/212">Swatinem/rust-cache#212</a></li>
<li>Adds an option to do lookup-only of the cache by <a href="https://github.com/danlec"><code>`@​danlec</code></a>` in <a href="https://redirect.github.com/Swatinem/rust-cache/pull/217">Swatinem/rust-cache#217</a></li>
<li>add runner OS in cache key by <a href="https://github.com/rnbguy"><code>`@​rnbguy</code></a>` in <a href="https://redirect.github.com/Swatinem/rust-cache/pull/220">Swatinem/rust-cache#220</a></li>
<li>Allow opting out of caching $CARGO_HOME/bin. by <a href="https://github.com/benjyw"><code>`@​benjyw</code></a>` in <a href="https://redirect.github.com/Swatinem/rust-cache/pull/216">Swatinem/rust-cache#216</a></li>
</ul>
<h2>New Contributors</h2>
<ul>
<li><a href="https://github.com/guylamar2006"><code>`@​guylamar2006</code></a>` made their first contribution in <a href="https://redirect.github.com/Swatinem/rust-cache/pull/212">Swatinem/rust-cache#212</a></li>
<li><a href="https://github.com/danlec"><code>`@​danlec</code></a>` made their first contribution in <a href="https://redirect.github.com/Swatinem/rust-cache/pull/217">Swatinem/rust-cache#217</a></li>
<li><a href="https://github.com/rnbguy"><code>`@​rnbguy</code></a>` made their first contribution in <a href="https://redirect.github.com/Swatinem/rust-cache/pull/220">Swatinem/rust-cache#220</a></li>
<li><a href="https://github.com/benjyw"><code>`@​benjyw</code></a>` made their first contribution in <a href="https://redirect.github.com/Swatinem/rust-cache/pull/216">Swatinem/rust-cache#216</a></li>
</ul>
<p><strong>Full Changelog</strong>: <a href="https://github.com/Swatinem/rust-cache/compare/v2.7.5...v2.7.6">https://github.com/Swatinem/rust-cache/compare/v2.7.5...v2.7.6</a></p>
</blockquote>
</details>
<details>
<summary>Commits</summary>
<ul>
<li><a href="f0deed1e0e"><code>f0deed1</code></a> 2.7.7</li>
<li><a href="008623fb83"><code>008623f</code></a> also cache <code>cargo install</code> metadata</li>
<li><a href="720f7e45cc"><code>720f7e4</code></a> 2.7.6</li>
<li><a href="4b1f006ad2"><code>4b1f006</code></a> update dependencies, in particular <code>`@actions/cache</code></li>`
<li><a href="e8e63cdbf2"><code>e8e63cd</code></a> Allow opting out of caching $CARGO_HOME/bin. (<a href="https://redirect.github.com/swatinem/rust-cache/issues/216">#216</a>)</li>
<li><a href="9a2e0d3212"><code>9a2e0d3</code></a> add runner OS in cache key (<a href="https://redirect.github.com/swatinem/rust-cache/issues/220">#220</a>)</li>
<li><a href="c00f3025ca"><code>c00f302</code></a> Adds an option to do lookup-only of the cache (<a href="https://redirect.github.com/swatinem/rust-cache/issues/217">#217</a>)</li>
<li><a href="68b3cb7503"><code>68b3cb7</code></a> Updated artifact upload action to v4 (<a href="https://redirect.github.com/swatinem/rust-cache/issues/212">#212</a>)</li>
<li>See full diff in <a href="https://github.com/swatinem/rust-cache/compare/v2.7.5...v2.7.7">compare view</a></li>
</ul>
</details>
<br />


[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=Swatinem/rust-cache&package-manager=github_actions&previous-version=2.7.5&new-version=2.7.7)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores)

You can trigger a rebase of this PR by commenting ``@dependabot` rebase`.

[//]: # (dependabot-automerge-start)
[//]: # (dependabot-automerge-end)

---

<details>
<summary>Dependabot commands and options</summary>
<br />

You can trigger Dependabot actions by commenting on this PR:
- ``@dependabot` rebase` will rebase this PR
- ``@dependabot` recreate` will recreate this PR, overwriting any edits that have been made to it
- ``@dependabot` merge` will merge this PR after your CI passes on it
- ``@dependabot` squash and merge` will squash and merge this PR after your CI passes on it
- ``@dependabot` cancel merge` will cancel a previously requested merge and block automerging
- ``@dependabot` reopen` will reopen this PR if it is closed
- ``@dependabot` close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually
- ``@dependabot` show <dependency name> ignore conditions` will show all of the ignore conditions of the specified dependency
- ``@dependabot` ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself)
- ``@dependabot` ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself)
- ``@dependabot` ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)


</details>

Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2025-01-02 09:18:13 +00:00
3c1e7c7428 Bump Swatinem/rust-cache from 2.7.5 to 2.7.7
Bumps [Swatinem/rust-cache](https://github.com/swatinem/rust-cache) from 2.7.5 to 2.7.7.
- [Release notes](https://github.com/swatinem/rust-cache/releases)
- [Changelog](https://github.com/Swatinem/rust-cache/blob/master/CHANGELOG.md)
- [Commits](https://github.com/swatinem/rust-cache/compare/v2.7.5...v2.7.7)

---
updated-dependencies:
- dependency-name: Swatinem/rust-cache
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>
2025-01-01 17:44:53 +00:00
baeefa4817 Merge #5166
Some checks failed
Test suite / Tests on ${{ matrix.os }} (macos-13) (push) Waiting to run
Test suite / Tests on ubuntu-20.04 (push) Failing after 2s
Test suite / Tests almost all features (push) Has been skipped
Test suite / Test disabled tokenization (push) Has been skipped
Test suite / Run tests in debug (push) Failing after 11s
Test suite / Tests on ${{ matrix.os }} (windows-2022) (push) Failing after 53s
Test suite / Run Clippy (push) Successful in 6m35s
Test suite / Run Rustfmt (push) Successful in 1m41s
Run the indexing fuzzer / Setup the action (push) Successful in 1h5m27s
Indexing bench (push) / Run and upload benchmarks (push) Has been cancelled
Benchmarks of indexing (push) / Run and upload benchmarks (push) Has been cancelled
Benchmarks of search for geo (push) / Run and upload benchmarks (push) Has been cancelled
Benchmarks of search for songs (push) / Run and upload benchmarks (push) Has been cancelled
Benchmarks of search for Wikipedia articles (push) / Run and upload benchmarks (push) Has been cancelled
5166: fix list indexes r=dureuill a=irevoire

# Pull Request

### Smol benchmark on a meilisearch with 1009 indexes:

**Before** this PR on my computer, it was taking 5.5s to call the `GET /indexes` route on a cold computer where all the indexes were closed.
**After** this PR it takes 0.009s to call the route on the first 20 indexes, and 0.176 for the last 20 indexes (retrieving the first or last indexes on main has no impact on performances).

If my computations are right, that's between 61111.1% and 3125% faster on this test 😂 

## Related issue
Fixes https://github.com/meilisearch/meilisearch/issues/4694

## What does this PR do?
- Add the primary key to the cache we already have in the index-mapper
- Provide a new route to retrieve the paginated indexes straight from the cache without opening them
- Fix a bug where the cache was not computed when loading a dump and was forcing us to open the indexes to compute their stats on the fly

## Is it breaking?

Since the field I added is an `Option` I think we should consider it as non-breaking and let it update itself automatically on the next operation of this index.
I also tested to run my patch over a DB generated on release-v1.12.0 and it works. The importing a dump also works.

Co-authored-by: Tamo <tamo@meilisearch.com>
2024-12-31 10:55:22 +00:00
e8ba7833ec Update crates/meilisearch/src/routes/indexes/mod.rs
Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2024-12-31 10:43:22 +01:00
db676aee73 Update crates/meilisearch/src/routes/indexes/mod.rs
Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2024-12-31 10:43:12 +01:00
1a0d8810e5 Merge #5178
Some checks failed
Test suite / Tests on ubuntu-20.04 (push) Failing after 13s
Test suite / Tests almost all features (push) Has been skipped
Test suite / Test disabled tokenization (push) Has been skipped
Test suite / Run tests in debug (push) Failing after 11s
Test suite / Tests on ${{ matrix.os }} (windows-2022) (push) Failing after 52s
Test suite / Run Clippy (push) Successful in 6m45s
Test suite / Run Rustfmt (push) Successful in 1m43s
Run the indexing fuzzer / Setup the action (push) Successful in 1h5m31s
Indexing bench (push) / Run and upload benchmarks (push) Has been cancelled
Benchmarks of indexing (push) / Run and upload benchmarks (push) Has been cancelled
Benchmarks of search for geo (push) / Run and upload benchmarks (push) Has been cancelled
Benchmarks of search for songs (push) / Run and upload benchmarks (push) Has been cancelled
Benchmarks of search for Wikipedia articles (push) / Run and upload benchmarks (push) Has been cancelled
Test suite / Tests on ${{ matrix.os }} (macos-13) (push) Has been cancelled
5178: Add Prometheus metrics to measure task queue latency r=irevoire a=takaebato

# Pull Request

## Related issue
Fixes https://github.com/meilisearch/meilisearch/issues/5046

## What does this PR do?

- Added Prometheus metrics to measure task queue latency

(Confirmed locally that latency is measured during parallel task execution in the benchmark.)

## PR checklist
Please check if your PR fulfills the following requirements:
- [x] Does this PR fix an existing issue, or have you listed the changes applied in the PR description (and why they are needed)?
- [x] Have you read the contributing guidelines?
- [x] Have you made sure that the title is accurate and descriptive of the changes?

Thank you so much for contributing to Meilisearch!


Co-authored-by: Takahiro Ebato <takahiro.ebato@gmail.com>
2024-12-30 14:47:15 +00:00
4615d86748 Merge #5169
5169: Replace hardcoded string with constants r=irevoire a=Gnosnay

# Pull Request

## Related issue
Fixes #5136

## What does this PR do?
- Replace all of hardcoded "_geo" to one constant string.

## PR checklist
Please check if your PR fulfills the following requirements:
- [x] Does this PR fix an existing issue, or have you listed the changes applied in the PR description (and why they are needed)?
- [x] Have you read the contributing guidelines?
- [x] Have you made sure that the title is accurate and descriptive of the changes?

Thank you so much for contributing to Meilisearch!


Co-authored-by: Gnosnay <iamgnosnay@gmail.com>
2024-12-30 14:12:05 +00:00
525e67ba93 Fix the format and linter error 2024-12-28 20:35:55 +08:00
44eb153619 Replace hardcoded string with constants 2024-12-28 20:35:55 +08:00
195785c47f Add a task queue latency panel to the grafana dashboard 2024-12-27 23:26:20 +09:00
4eae92f411 fix list indexes 2024-12-26 18:48:25 +01:00
d7cb319217 #4840 - Partial fix - Confirm task success after waiting for it - continued, few missing cases - batch 2 2024-12-24 23:07:43 +11:00
15062e7dba #4840 - Partial fix - Confirm task success after waiting for it - continued, few missing cases. 2024-12-24 23:06:07 +11:00
bf19f86e38 #4840 - Partial fix - Confirm task success after waiting for it. 2024-12-24 23:06:07 +11:00
91c7ef8723 #4840 - Partial fix - Remove hard coded task ids to prevent flaky tests.
# Conflicts:
#	crates/meilisearch/tests/documents/add_documents.rs
#	crates/meilisearch/tests/search/facet_search.rs
#	crates/meilisearch/tests/settings/get_settings.rs
#	crates/meilisearch/tests/snapshot/mod.rs
2024-12-24 23:05:59 +11:00
fc23a0ee52 Merge #5135
Some checks failed
Test suite / Tests on ${{ matrix.os }} (windows-2022) (push) Failing after 24s
Test suite / Tests on ubuntu-20.04 (push) Failing after 16s
Test suite / Tests almost all features (push) Has been skipped
Test suite / Test disabled tokenization (push) Has been skipped
Test suite / Run tests in debug (push) Failing after 15s
Test suite / Run Clippy (push) Successful in 57m37s
Test suite / Run Rustfmt (push) Successful in 7m19s
Run the indexing fuzzer / Setup the action (push) Failing after 1h41m18s
Indexing bench (push) / Run and upload benchmarks (push) Has been cancelled
Benchmarks of indexing (push) / Run and upload benchmarks (push) Has been cancelled
Benchmarks of search for geo (push) / Run and upload benchmarks (push) Has been cancelled
Benchmarks of search for songs (push) / Run and upload benchmarks (push) Has been cancelled
Benchmarks of search for Wikipedia articles (push) / Run and upload benchmarks (push) Has been cancelled
Test suite / Tests on ${{ matrix.os }} (macos-13) (push) Has been cancelled
5135: Check all search filter attributes are filterable upfront r=curquiza a=jameshiew

# Pull Request

## Related issue
Fixes #5069

## What does this PR do?
- checks all `fid`s in the `Filter` tree are filterable before evaluating search query
- returns AttributeNotFilterable error if any are not

## PR checklist
Please check if your PR fulfills the following requirements:
- [x] Does this PR fix an existing issue, or have you listed the changes applied in the PR description (and why they are needed)?
- [x] Have you read the contributing guidelines?
- [x] Have you made sure that the title is accurate and descriptive of the changes?

Thank you so much for contributing to Meilisearch!

Co-authored-by: James Hiew <james@hiew.net>
2024-12-24 10:09:35 +00:00
d3491851bc Merge #5187
Some checks failed
Indexing bench (push) / Run and upload benchmarks (push) Waiting to run
Benchmarks of indexing (push) / Run and upload benchmarks (push) Waiting to run
Benchmarks of search for geo (push) / Run and upload benchmarks (push) Waiting to run
Benchmarks of search for songs (push) / Run and upload benchmarks (push) Waiting to run
Benchmarks of search for Wikipedia articles (push) / Run and upload benchmarks (push) Waiting to run
Test suite / Tests on ${{ matrix.os }} (macos-13) (push) Waiting to run
Test suite / Tests on ${{ matrix.os }} (windows-2022) (push) Failing after 12s
Test suite / Tests on ubuntu-20.04 (push) Failing after 20s
Test suite / Tests almost all features (push) Has been skipped
Test suite / Test disabled tokenization (push) Has been skipped
Test suite / Run tests in debug (push) Failing after 16s
Test suite / Run Clippy (push) Successful in 33m58s
Test suite / Run Rustfmt (push) Successful in 11m45s
Run the indexing fuzzer / Setup the action (push) Successful in 1h10m33s
5187: Bring back v1.12.0 of pre-release changes into `main` r=irevoire a=curquiza



Co-authored-by: ManyTheFish <many@meilisearch.com>
Co-authored-by: Louis Dureuil <louis@meilisearch.com>
Co-authored-by: Clément Renault <clement@meilisearch.com>
Co-authored-by: meili-bors[bot] <89034592+meili-bors[bot]@users.noreply.github.com>
Co-authored-by: Many the fish <many@meilisearch.com>
2024-12-23 10:59:33 +00:00
886404cc4d Merge #5184
5184: Fix typo in a comment r=curquiza a=eltociear



# Pull Request


## What does this PR do?
- formating -> formatting

## PR checklist
Please check if your PR fulfills the following requirements:
- [ ] Does this PR fix an existing issue, or have you listed the changes applied in the PR description (and why they are needed)?
- [ ] Have you read the contributing guidelines?
- [ ] Have you made sure that the title is accurate and descriptive of the changes?

Thank you so much for contributing to Meilisearch!


Co-authored-by: Ikko Eltociear Ashimine <eltociear@gmail.com>
2024-12-23 09:55:52 +00:00
75a7f0e26c chore: update mod.rs
formating -> formatting
2024-12-21 22:09:15 +09:00
47827ca5c1 Add Prometheus metrics to measure task queue latency 2024-12-21 18:29:30 +09:00
f75d74a967 removed formating issue 2024-12-20 16:28:30 -05:00
42648919c7 updated settings to pass cargo fmt check 2024-12-19 10:24:15 -05:00
6987cac1ba Merge #5174
Some checks failed
Test suite / Tests almost all features (push) Has been skipped
Test suite / Test disabled tokenization (push) Has been skipped
Test suite / Tests on ubuntu-20.04 (push) Failing after 22s
Test suite / Run tests in debug (push) Failing after 15s
Test suite / Tests on ${{ matrix.os }} (windows-2022) (push) Failing after 29s
Test suite / Run Rustfmt (push) Successful in 1h14m6s
Test suite / Run Clippy (push) Failing after 2h47m59s
Run the indexing fuzzer / Setup the action (push) Successful in 1h5m44s
Test suite / Tests on ${{ matrix.os }} (macos-13) (push) Has been cancelled
Indexing bench (push) / Run and upload benchmarks (push) Has been cancelled
Benchmarks of indexing (push) / Run and upload benchmarks (push) Has been cancelled
Benchmarks of search for geo (push) / Run and upload benchmarks (push) Has been cancelled
Benchmarks of search for songs (push) / Run and upload benchmarks (push) Has been cancelled
Benchmarks of search for Wikipedia articles (push) / Run and upload benchmarks (push) Has been cancelled
5174: Split tests for option crate meilisearch in a separate test file r=irevoire a=K-Kumar-01

# Pull Request
Splits the tests for meilisearch option crate in a separate testfile.

## Related issue
Partially solves #5116

## What does this PR do?
- Splits the test for `/src/option.rs` into a separate file `/src/option_test.rs` in meilisearch crate


## PR checklist
Please check if your PR fulfills the following requirements:
- [x] Does this PR fix an existing issue, or have you listed the changes applied in the PR description (and why they are needed)?
- [x] Have you read the contributing guidelines?
- [x] Have you made sure that the title is accurate and descriptive of the changes?

Co-authored-by: Kushal Kumar <kushalkumargupta4@gmail.com>
2024-12-19 09:39:25 +00:00
082237863e Merge #5175
Some checks failed
Indexing bench (push) / Run and upload benchmarks (push) Waiting to run
Benchmarks of indexing (push) / Run and upload benchmarks (push) Waiting to run
Benchmarks of search for geo (push) / Run and upload benchmarks (push) Waiting to run
Benchmarks of search for songs (push) / Run and upload benchmarks (push) Waiting to run
Benchmarks of search for Wikipedia articles (push) / Run and upload benchmarks (push) Waiting to run
Test suite / Tests on ${{ matrix.os }} (macos-13) (push) Waiting to run
Test suite / Tests on ubuntu-20.04 (push) Failing after 12s
Test suite / Tests almost all features (push) Has been skipped
Test suite / Tests on ${{ matrix.os }} (windows-2022) (push) Failing after 18s
Test suite / Test disabled tokenization (push) Has been skipped
Test suite / Run tests in debug (push) Failing after 8s
Test suite / Run Clippy (push) Successful in 6m42s
Test suite / Run Rustfmt (push) Successful in 1m42s
Run the indexing fuzzer / Setup the action (push) Failing after 20m27s
5175: fix the flaky batches test r=dureuill a=irevoire

## What does this PR do?
I finally reproduced the flaky test in the CI here: https://github.com/meilisearch/meilisearch/actions/runs/12390709982/job/34586313125

I cannot reproduce it locally even with `cargo flaky --iter 2000` so I'm not 100% my fix will work. 
But what I did was definitely part of the flakyness of the tests, we were querying a batch that could in some cases not be started.
That worked well for the tasks since an enqueued task is already written on disk, but since the batch do not exist if they're not processing they were just missing.

---

I also changed what we were doing because there is no point in doing an indexing process for this test

Co-authored-by: Tamo <tamo@meilisearch.com>
2024-12-18 12:47:50 +00:00
4bcdd7a9f9 fix the flaky batches test 2024-12-18 11:51:12 +01:00
fc4b7ccb70 Merge #5173
5173: Remove obsolete test code r=irevoire a=K-Kumar-01

# Pull Request
Removes the test from the meilisearch/search/mod.rs. The tests were already split in the PR #5171 


## What does this PR do?
- Removes the obsolete tests

## PR checklist
Please check if your PR fulfills the following requirements:
- [x] Does this PR fix an existing issue, or have you listed the changes applied in the PR description (and why they are needed)?
- [x] Have you read the contributing guidelines?
- [x] Have you made sure that the title is accurate and descriptive of the changes?


Co-authored-by: Kushal Kumar <kushalkumargupta4@gmail.com>
2024-12-18 10:26:02 +00:00
df9ac07922 tests: split tests option crate in separate test file
Signed-off-by: Kushal Kumar <kushalkumargupta4@gmail.com>
2024-12-18 02:55:02 +05:30
ba27a09efe refactor: fmt
Signed-off-by: Kushal Kumar <kushalkumargupta4@gmail.com>
2024-12-18 02:28:02 +05:30
bc51d3a918 refactor: remove obsolete test code
Signed-off-by: Kushal Kumar <kushalkumargupta4@gmail.com>
2024-12-18 02:18:57 +05:30
b39d4e9b50 removed unused import 2024-12-17 12:01:06 -05:00
b18cd9075d Merge #5171
Some checks failed
Test suite / Tests on ${{ matrix.os }} (windows-2022) (push) Failing after 13s
Test suite / Tests on ubuntu-20.04 (push) Failing after 12s
Test suite / Tests almost all features (push) Has been skipped
Test suite / Test disabled tokenization (push) Has been skipped
Test suite / Run tests in debug (push) Failing after 10s
Test suite / Run Clippy (push) Successful in 6m34s
Test suite / Run Rustfmt (push) Successful in 1m50s
Run the indexing fuzzer / Setup the action (push) Successful in 1h5m21s
Indexing bench (push) / Run and upload benchmarks (push) Has been cancelled
Benchmarks of indexing (push) / Run and upload benchmarks (push) Has been cancelled
Benchmarks of search for geo (push) / Run and upload benchmarks (push) Has been cancelled
Benchmarks of search for songs (push) / Run and upload benchmarks (push) Has been cancelled
Benchmarks of search for Wikipedia articles (push) / Run and upload benchmarks (push) Has been cancelled
Test suite / Tests on ${{ matrix.os }} (macos-13) (push) Has been cancelled
5171: tests: split tests in separate file r=irevoire a=K-Kumar-01

# Pull Request
Splits the tests for meilisearch search crate in a separate testfile.

## Related issue
Partially solves #5116.

## Related Pull Requests
https://github.com/meilisearch/meilisearch/pull/5134

## What does this PR do?
- Splits the test for `/search/mod.rs` into a separate file `search/mod_test.rs` in meilisearch crate

## PR checklist
Please check if your PR fulfills the following requirements:
- [x] Does this PR fix an existing issue, or have you listed the changes applied in the PR description (and why they are needed)?
- [x] Have you read the contributing guidelines?
- [x] Have you made sure that the title is accurate and descriptive of the changes?

Thank you so much for contributing to Meilisearch!


Co-authored-by: Kushal Kumar <kushalkumargupta4@gmail.com>
Co-authored-by: Tamo <tamo@meilisearch.com>
2024-12-17 10:42:16 +00:00
36b897858a fmt 2024-12-17 11:40:28 +01:00
a7b2f461cf fixed the cargo errors that were occuring 2024-12-16 18:01:27 -05:00
fce132a21b tests: split tests in separate file
Signed-off-by: Kushal Kumar <kushalkumargupta4@gmail.com>
2024-12-17 03:04:50 +05:30
9c857ff48f handling error where multple attributes aren't allowed to be checked, only checking single now since this is being executed in make_setting_route 2024-12-16 16:08:22 -05:00
f27b33dabe undid changes from the pull 1.12.0 branch 2024-12-16 13:27:57 -05:00
9eb4b84abd now cheecking to enusre that all the settings in the struct are listed in this macro. 2024-12-16 13:23:24 -05:00
71834787ec Merge #5134
Some checks failed
Indexing bench (push) / Run and upload benchmarks (push) Waiting to run
Benchmarks of indexing (push) / Run and upload benchmarks (push) Waiting to run
Benchmarks of search for geo (push) / Run and upload benchmarks (push) Waiting to run
Benchmarks of search for songs (push) / Run and upload benchmarks (push) Waiting to run
Benchmarks of search for Wikipedia articles (push) / Run and upload benchmarks (push) Waiting to run
Test suite / Tests on ${{ matrix.os }} (macos-13) (push) Waiting to run
Test suite / Tests on ubuntu-20.04 (push) Failing after 11s
Test suite / Tests almost all features (push) Has been skipped
Test suite / Test disabled tokenization (push) Has been skipped
Test suite / Tests on ${{ matrix.os }} (windows-2022) (push) Failing after 16s
Test suite / Run tests in debug (push) Failing after 10s
Test suite / Run Clippy (push) Successful in 6m16s
Test suite / Run Rustfmt (push) Successful in 1m55s
Run the indexing fuzzer / Setup the action (push) Successful in 1h5m33s
5134: Split Meilisearch Crate Tests in separate file r=irevoire a=K-Kumar-01

# Pull Request
Splits the tests for meilisearch crate in a separate file.

## Related issue
Partially solves #5116 

## What does this PR do?
- Splits the test for `/indexes/search.rs` into a separate file `indexes/search_test.rs` in meilisearch crate

## PR checklist
Please check if your PR fulfills the following requirements:
- [x] Does this PR fix an existing issue, or have you listed the changes applied in the PR description (and why they are needed)?
- [x] Have you read the contributing guidelines?
- [x] Have you made sure that the title is accurate and descriptive of the changes?

Thank you so much for contributing to Meilisearch!


Co-authored-by: Kushal Kumar <kushalkumargupta4@gmail.com>
Co-authored-by: Tamo <tamo@meilisearch.com>
2024-12-16 15:27:14 +00:00
b004db37c7 fmt 2024-12-16 15:59:26 +01:00
0c04cd1d9f make clippy happy 2024-12-16 15:52:47 +01:00
63ea405b3e Merge branch 'release-v1.12.0' of https://github.com/meilisearch/meilisearch into configure_setting_routes_when_new_field_is_added 2024-12-13 13:08:45 -05:00
ba11121cfc Merge #5159
Some checks failed
Test suite / Tests almost all features (push) Has been skipped
Test suite / Test disabled tokenization (push) Has been skipped
Test suite / Tests on ubuntu-20.04 (push) Failing after 11s
Test suite / Run tests in debug (push) Failing after 10s
Test suite / Tests on ${{ matrix.os }} (windows-2022) (push) Failing after 22s
Test suite / Run Rustfmt (push) Successful in 1m18s
Test suite / Run Clippy (push) Successful in 5m30s
Test suite / Tests on ${{ matrix.os }} (macos-13) (push) Has been cancelled
5159: Fix the New Indexer Spilling r=irevoire a=Kerollmops

Fix two bugs in the merging of the spilled caches. Thanks to `@ManyTheFish` and `@irevoire` 👏

Co-authored-by: Kerollmops <clement@meilisearch.com>
Co-authored-by: ManyTheFish <many@meilisearch.com>
2024-12-12 17:16:53 +00:00
acdd5aa6ea Use the thread source id instead of the destination id
when filtering on the cache to merge
2024-12-12 18:12:00 +01:00
2f3cc8cdd2 Fix the merge_caches_sorted function 2024-12-12 16:15:37 +01:00
7a95fed23f Merge #5158
Some checks failed
Test suite / Tests on ${{ matrix.os }} (macos-13) (push) Waiting to run
Test suite / Tests on ${{ matrix.os }} (windows-2022) (push) Failing after 46s
Test suite / Tests on ubuntu-20.04 (push) Failing after 13s
Test suite / Tests almost all features (push) Has been skipped
Test suite / Test disabled tokenization (push) Has been skipped
Test suite / Run tests in debug (push) Failing after 15s
Test suite / Run Rustfmt (push) Successful in 9m49s
Test suite / Run Clippy (push) Successful in 46m15s
5158: Indexer edition 2024 fix facet fst r=Kerollmops a=ManyTheFish

# Pull Request
Fix a regression in the new indexer; when several filterable attributes containing strings were set, all the field IDs were shifted, and the last one was overwriting the previous FST.

## What does this PR do?
- Add a test reproducing the bug
- fix the bug

Co-authored-by: ManyTheFish <many@meilisearch.com>
2024-12-12 14:14:44 +00:00
961de4d34e Fix facet fst 2024-12-12 15:12:28 +01:00
18ce95dcbf Add test reproducing the bug 2024-12-12 14:56:45 +01:00
c177210b1b Merge #5152
5152: Make xtasks be able to use the specified binary r=dureuill a=Kerollmops

Makes it possible to specify the binary to run. It is useful to run PGO optimized binaries.

Co-authored-by: Kerollmops <clement@meilisearch.com>
Co-authored-by: Clément Renault <clement@meilisearch.com>
2024-12-12 12:28:16 +00:00
1fc90fbacb Merge #5147
5147: Batch progress r=dureuill a=irevoire

# Pull Request

## Related issue
Fixes https://github.com/meilisearch/meilisearch/issues/5068

## What does this PR do?
- ...

## PR checklist
Please check if your PR fulfills the following requirements:
- [ ] Does this PR fix an existing issue, or have you listed the changes applied in the PR description (and why they are needed)?
- [ ] Have you read the contributing guidelines?
- [ ] Have you made sure that the title is accurate and descriptive of the changes?

Thank you so much for contributing to Meilisearch!


Co-authored-by: Tamo <tamo@meilisearch.com>
2024-12-12 09:15:54 +00:00
6c72559457 Update the binary-path description
Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2024-12-12 09:39:39 +01:00
1fdfa3f208 Change the exit code to 130 when Ctrl-Ced 2024-12-12 09:26:14 +01:00
1a01196a80 removed the method outside of macro rules, no longer needed 2024-12-11 13:06:19 -05:00
0d0c18f519 rename the Step::name into Step::current_step 2024-12-11 18:41:03 +01:00
d12364c1e0 fix the tests 2024-12-11 18:30:48 +01:00
8cd3a1aa57 fmt 2024-12-11 18:18:40 +01:00
08fd026ebd fix warning 2024-12-11 18:18:13 +01:00
75d5cea624 use a with_capacity while allocating the progress view 2024-12-11 18:17:33 +01:00
ab9213fa94 ensure we never write the progress to the db 2024-12-11 18:16:20 +01:00
45d5d4bf40 make the progressview public 2024-12-11 18:15:33 +01:00
fa885e75b4 rename the send_progress in progress 2024-12-11 18:13:12 +01:00
29fc77ee5b remove usuless print 2024-12-11 18:11:19 +01:00
ad4dc70720 rename the ComputingTheChanges to ComputingDocumentChanges in the edit document progress 2024-12-11 18:09:54 +01:00
5d682b4700 rename the ComputingTheChanges to ComputingDocumentChanges 2024-12-11 18:08:45 +01:00
f1beb60204 make the progress use payload instead of documents 2024-12-11 18:07:45 +01:00
85577e70cd reuse the enqueued 2024-12-11 18:05:34 +01:00
c5536c37b5 rename the atomic::name to unit_name 2024-12-11 18:03:06 +01:00
9245c89cfe move the macros to milli 2024-12-11 18:00:46 +01:00
f4ff722247 simplified the method in the macro 2024-12-11 12:00:39 -05:00
262b429a4c updated to fix macro error by creating one method to ensure all routes corresponding to fields adn another to ensure each field provided in settings has a corresponding route 2024-12-11 10:43:13 -05:00
eaabc1af2f Merge #5144
Some checks failed
Test suite / Tests on ${{ matrix.os }} (macos-13) (push) Waiting to run
Test suite / Tests almost all features (push) Has been skipped
Test suite / Test disabled tokenization (push) Has been skipped
Test suite / Tests on ubuntu-20.04 (push) Failing after 11s
Test suite / Run tests in debug (push) Failing after 10s
Test suite / Tests on ${{ matrix.os }} (windows-2022) (push) Failing after 47s
Test suite / Run Rustfmt (push) Successful in 1m17s
Test suite / Run Clippy (push) Successful in 5m30s
5144: Exactly 512 bytes docid fails r=Kerollmops a=dureuill

# Pull Request

## Related issue
Fixes #5050 

## What does this PR do?
- Return a user error rather than an internal one for docids of exactly 512 bytes
- Fix up error message to indicate that exactly 512 bytes long docids are not supported.
- Fix up error message to reflect that index uids are actually limited to 400 bytes in length

## Impact

- Impacts docs: 
    - update [this paragraph](https://www.meilisearch.com/docs/learn/resources/known_limitations#length-of-primary-key-values) to say 511 bytes instead of 512 

Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2024-12-11 15:41:05 +00:00
04a24a9239 Kill Meilisearch with a TERM signal 2024-12-11 16:27:07 +01:00
1f54dfa883 update the macro to look more like an enum 2024-12-11 16:26:09 +01:00
786b0fabea implement the progress for almost all the tasks 2024-12-11 16:26:08 +01:00
26733c705d add progress for the task deletion and task cancelation 2024-12-11 16:25:02 +01:00
ab75f53efd update all snapshots 2024-12-11 16:25:02 +01:00
867e6a8f1d rename the send_progress field to progress since it s not sending anything 2024-12-11 16:25:01 +01:00
6f4823fc97 make the number of document in the document tasks more incremental 2024-12-11 16:25:01 +01:00
df9b68f8ed inital implementation of the progress 2024-12-11 16:25:01 +01:00
0a0a5f84bf added attribute name such that each verify_field_exists generated by the macro is unique 2024-12-11 10:05:08 -05:00
5bc6391700 Merge #5153
5153: Return docid in case of errors while rendering the document template r=Kerollmops a=dureuill

Improves error message:

Before: 

```
ERROR index_scheduler: Batch failed Index `mieli`: user error: missing field in document: liquid: Unknown index
  with:
    variable=doc
    requested index=title
    available indexes=by, id, kids, parent, text, time, type
```

After:

```
ERROR index_scheduler: Batch failed Index `mieli`: user error: missing field in document `11345147`: liquid: Unknown index
  with:
    variable=doc
    requested index=title
    available indexes=by, id, kids, parent, text, time, type
```

Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2024-12-11 15:01:40 +00:00
eaa897d983 Avoid compiling when unecessary 2024-12-11 15:57:16 +01:00
c06f386ac3 specifying generic structure now for verifiy_field_exists 2024-12-11 09:36:36 -05:00
bfca54cc2c Return docid in case of errors while rendering the document template 2024-12-11 15:26:18 +01:00
04a62d2b97 Compile Meilisearch or run the dedicated binary file 2024-12-11 14:57:07 +01:00
8c19cb0a0b Merge #5146
Some checks failed
Test suite / Tests on ${{ matrix.os }} (macos-13) (push) Waiting to run
Test suite / Tests almost all features (push) Has been skipped
Test suite / Test disabled tokenization (push) Has been skipped
Test suite / Tests on ubuntu-20.04 (push) Failing after 19s
Test suite / Run tests in debug (push) Failing after 14s
Test suite / Tests on ${{ matrix.os }} (windows-2022) (push) Failing after 23s
Test suite / Run Rustfmt (push) Successful in 4m52s
Test suite / Run Clippy (push) Failing after 8m9s
5146: Offline upgrade v1.12 r=irevoire a=ManyTheFish

# Pull Request

## Related issue
Fixes #4978 

## What does this PR do?
- add v1_11_to_v1_12 function to upgrade Meilisearch from v1.11 to v1.12
- Convert the update files from OBKV to ndjson format


Co-authored-by: ManyTheFish <many@meilisearch.com>
Co-authored-by: Many the fish <many@meilisearch.com>
2024-12-11 13:39:14 +00:00
5c492031d9 Update crates/meilitool/src/upgrade/v1_12.rs
Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2024-12-11 14:34:18 +01:00
fb1caa4724 Merge #5148
5148: Do not duplicate NDJson data when unecessary r=dureuill a=Kerollmops

This PR improves the NDJSON support. Usually, we save all of the user's document content into a temporary file, validate its content, and then convert everything into NDJSON in the file store (update files in the tasks).

It is a waste of time when users are already sending NDJSON. So, this PR removes the last copy and directly stores the user content in the file store, validating it from the file store. If an issue arises, the file will not persist and will be dropped/deleted instead.

Related to #5078.

Co-authored-by: Kerollmops <clement@meilisearch.com>
2024-12-11 13:00:50 +00:00
5622b9607d Wrap the read NDJSON pass into a tokio blocking 2024-12-11 12:18:36 +01:00
01bcc601be Use a nonrandom hasher when decoding JSON 2024-12-11 12:04:29 +01:00
93fbdc06d3 Use a nonrandom hasher when decoding NDJSON 2024-12-11 12:03:09 +01:00
69c931334f Fix the error messages categorization with invalid NDJson 2024-12-11 12:02:48 +01:00
d683f5980c Do not duplicate NDJson when unecessary 2024-12-11 12:02:48 +01:00
f8ba112f66 Merge #5150
5150: Reintroduce the Document Addition Logs r=dureuill a=Kerollmops

This PR reintroduces lost tracing logs showing some information about the number of indexed documents.

Related to #5078. Resolves [this comment](https://github.com/meilisearch/meilisearch/pull/4900/files?show-deleted-files=true&show-viewed-files=true&file-filters%5B%5D=#r1852158338) and [this other one](https://github.com/meilisearch/meilisearch/pull/4900/files?show-deleted-files=true&show-viewed-files=true&file-filters%5B%5D=#r1852159073).

Co-authored-by: Kerollmops <clement@meilisearch.com>
2024-12-11 10:48:48 +00:00
c614d0dd35 Add context when returning an error 2024-12-11 10:55:39 +01:00
479607e5dd Convert update files from OBKV to ndjson 2024-12-11 10:55:39 +01:00
bb00e70087 Reintroduce the document addition logs 2024-12-11 10:39:04 +01:00
2a04ecccc4 first commit 2024-12-11 01:43:37 -05:00
e974be9518 Merge #5145
Some checks failed
Test suite / Tests on ${{ matrix.os }} (macos-13) (push) Waiting to run
Test suite / Tests almost all features (push) Has been skipped
Test suite / Test disabled tokenization (push) Has been skipped
Test suite / Tests on ubuntu-20.04 (push) Failing after 11s
Test suite / Run tests in debug (push) Failing after 9s
Test suite / Tests on ${{ matrix.os }} (windows-2022) (push) Failing after 22s
Test suite / Run Rustfmt (push) Successful in 1m18s
Test suite / Run Clippy (push) Successful in 5m32s
5145: Use bumparaw-collections in Meilisearch/milli r=dureuill a=Kerollmops

This PR is related to #5078. It uses the now published bumparaw-collections and (soon) makes the `RawMap` hasher nonrandom.

Co-authored-by: Kerollmops <clement@meilisearch.com>
2024-12-10 15:51:01 +00:00
aeb6b74725 Make sure we use an FxHashBuilder on the Value 2024-12-10 15:52:22 +01:00
a751972c57 Prefer using a stable than a random hash builder 2024-12-10 14:25:53 +01:00
6b269795d2 Update bumparaw-collections to 0.1.2 2024-12-10 14:25:13 +01:00
d075be798a Fix tests 2024-12-10 13:39:07 +01:00
89637bcaaf Use bumparaw-collections in Meilisearch/milli 2024-12-10 11:52:20 +01:00
866ac91be3 Fix error messages 2024-12-10 11:06:58 +01:00
e610af36aa User failure for documents with docid of ==512 bytes 2024-12-10 11:06:24 +01:00
7cf6707ed3 Extend test to add the ==512 bytes case 2024-12-10 11:05:42 +01:00
34254b42b6 refactor: use test configuration on import
Signed-off-by: Kushal Kumar <kushalkumargupta4@gmail.com>
2024-12-10 00:00:43 +05:30
1995040846 Merge #5142
Some checks failed
Test suite / Tests on ${{ matrix.os }} (macos-13) (push) Waiting to run
Test suite / Tests almost all features (push) Has been skipped
Test suite / Test disabled tokenization (push) Has been skipped
Test suite / Tests on ubuntu-20.04 (push) Failing after 10s
Test suite / Run tests in debug (push) Failing after 11s
Test suite / Tests on ${{ matrix.os }} (windows-2022) (push) Failing after 22s
Test suite / Run Rustfmt (push) Successful in 1m19s
Test suite / Run Clippy (push) Successful in 5m49s
5142: Try merge optimisation r=dureuill a=ManyTheFish

![Capture_decran_2024-12-09_a_11 59 42](https://github.com/user-attachments/assets/0dfc7e30-a603-4546-98d2-791990bdfcce)

Co-authored-by: ManyTheFish <many@meilisearch.com>
2024-12-09 14:48:26 +00:00
07f42e8057 Do not index a filed count when no word is counted 2024-12-09 15:45:12 +01:00
71f59749dc Reduce union impact in merging 2024-12-09 15:44:06 +01:00
3b0b9967f6 Merge #5141
Some checks failed
Test suite / Tests on ${{ matrix.os }} (macos-13) (push) Waiting to run
Test suite / Tests almost all features (push) Has been skipped
Test suite / Test disabled tokenization (push) Has been skipped
Test suite / Tests on ubuntu-20.04 (push) Failing after 16s
Test suite / Run tests in debug (push) Failing after 14s
Test suite / Tests on ${{ matrix.os }} (windows-2022) (push) Failing after 44s
Test suite / Run Rustfmt (push) Successful in 9m52s
Test suite / Run Clippy (push) Successful in 1h2m24s
5141: Use the right amount of max memory and not impact the settings r=curquiza a=Kerollmops

Fixes #5132. Related to #5125.

Co-authored-by: Kerollmops <clement@meilisearch.com>
2024-12-09 10:40:46 +00:00
123b54a178 Merge #5056
5056: Attach index name in error message r=irevoire a=airycanon

# Pull Request

## Related issue
Fixes #4392 

## What does this PR do?
- ...

## PR checklist
Please check if your PR fulfills the following requirements:
- [x] Does this PR fix an existing issue, or have you listed the changes applied in the PR description (and why they are needed)?
- [x] Have you read the contributing guidelines?
- [x] Have you made sure that the title is accurate and descriptive of the changes?

Thank you so much for contributing to Meilisearch!


Co-authored-by: airycanon <airycanon@airycanon.me>
2024-12-09 09:59:12 +00:00
f5dd8dfc3e Rollback max memory usage changes 2024-12-09 10:26:30 +01:00
6768e4ef75 Fix workload inversion 2024-12-09 10:20:49 +01:00
bcfed70888 Revert "Merge #5125"
This reverts commit 9a9383643f, reversing
changes made to cac355bfa7.
2024-12-09 10:08:02 +01:00
503ef3bbc9 Merge #5138
5138: Allow xtask bench to proceed without a commit message r=Kerollmops a=dureuill



Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2024-12-09 09:00:12 +00:00
08f2c696b0 Allow xtask bench to proceed without a commit message 2024-12-09 09:36:59 +01:00
54e34beac6 Check attributes are filterable before evaluating search query 2024-12-07 21:13:13 +00:00
c0aa018c87 tests: split test in separate file
Signed-off-by: Kushal Kumar <kushalkumargupta4@gmail.com>
2024-12-08 00:32:32 +05:30
b75f1f4c17 fix tests
# Conflicts:
#	crates/index-scheduler/src/batch.rs
#	crates/index-scheduler/src/snapshots/lib.rs/fail_in_process_batch_for_document_deletion/after_removing_the_documents.snap
#	crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_with_bad_primary_key/fifth_task_succeeds.snap
#	crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_with_bad_primary_key/fourth_task_fails.snap
#	crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_with_multiple_primary_key/second_task_fails.snap
#	crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_with_multiple_primary_key/third_task_fails.snap
#	crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_with_multiple_primary_key_batch_wrong_key/second_and_third_tasks_fails.snap
#	crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_with_set_and_null_primary_key_inference_works/all_other_tasks_succeeds.snap
#	crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_with_set_and_null_primary_key_inference_works/second_task_fails.snap
#	crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_with_set_and_null_primary_key_inference_works/third_task_succeeds.snap

# Conflicts:
#	crates/index-scheduler/src/batch.rs
#	crates/meilisearch/src/search/mod.rs
#	crates/meilisearch/tests/vector/mod.rs

# Conflicts:
#	crates/index-scheduler/src/batch.rs
2024-12-06 02:03:02 +08:00
95ed079761 attach index name in errors
# Conflicts:
#	crates/index-scheduler/src/batch.rs

# Conflicts:
#	crates/index-scheduler/src/batch.rs
#	crates/meilisearch/src/search/mod.rs
2024-12-06 01:12:13 +08:00
4a082683df Merge #5131
Some checks failed
Test suite / Tests on ${{ matrix.os }} (windows-2022) (push) Failing after 21s
Test suite / Tests on ubuntu-20.04 (push) Failing after 10s
Test suite / Tests almost all features (push) Has been skipped
Test suite / Test disabled tokenization (push) Has been skipped
Test suite / Run tests in debug (push) Failing after 10s
Test suite / Run Rustfmt (push) Successful in 1m25s
Test suite / Run Clippy (push) Successful in 5m54s
Test suite / Tests on ${{ matrix.os }} (macos-13) (push) Has been cancelled
5131: Ignore documents whose selected fields didn't change r=dureuill a=dureuill

Attempts to improve the new indexer performance by ignoring documents whose selected fields didn't change:

- Add `Update::has_changed_for_fields` function
- Ignore documents whose searchable attributes didn't change for word docids and word pair proximity extraction
- Ignore documents whose faceted attributes didn't change for facet extraction

Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2024-12-05 16:04:16 +00:00
26be5e0733 Merge #5123
5123: Fix batch details r=dureuill a=irevoire

# Pull Request

## Related issue
Fixes https://github.com/meilisearch/meilisearch/issues/5079
Fixes https://github.com/meilisearch/meilisearch/issues/5112

## What does this PR do?
- Make the processing tasks actually processing in the stats of the batch instead of enqueued
- Stop counting one extra task for all non-prioritized batches in the stats
- Add a test

Co-authored-by: Tamo <tamo@meilisearch.com>
2024-12-05 15:21:55 +00:00
bd5110a2fe Fix clippy warnings 2024-12-05 16:13:07 +01:00
fa8b9acdf6 Ignore documents that didn't change in facets 2024-12-05 16:12:52 +01:00
2b74d1824b Ignore documents that didn't change any field in word pair proximity 2024-12-05 15:56:22 +01:00
c77b00d3ac Don't extract word docids when no searchable changed 2024-12-05 15:51:58 +01:00
c77073efcc Update::has_changed_for_fields 2024-12-05 15:50:12 +01:00
1537323eb9 Merge #5119
5119: Settings opt out error msg r=Kerollmops a=ManyTheFish

# Pull Request

## Related issue
PRD: https://meilisearch.notion.site/API-usage-Settings-to-opt-out-indexing-features-fff4b06b651f8108ade3f858aeb16b14?pvs=4
## What does this PR do?

Add a new error code and message when the user tries a facet search on an index where the facet search is disabled:
```json
{
  "message": "The facet search is disabled for this index",
  "code": "facet_search_disabled",
  "type": "invalid_request",
  "link": "https://docs.meilisearch.com/errors#invalid_facet_search_disabled"
}
 ```


Co-authored-by: ManyTheFish <many@meilisearch.com>
2024-12-05 13:51:11 +00:00
a0a3b55700 Change error code 2024-12-05 14:48:29 +01:00
214b51de87 try to fix the snapshot on demand flaky test 2024-12-05 14:45:54 +01:00
95975944d7 fix the dumps missing the empty swap index tasks 2024-12-05 14:23:38 +01:00
9a9383643f Merge #5125
Some checks failed
Test suite / Tests on ${{ matrix.os }} (macos-13) (push) Waiting to run
Test suite / Tests on ${{ matrix.os }} (windows-2022) (push) Failing after 37s
Test suite / Tests on ubuntu-20.04 (push) Failing after 15s
Test suite / Tests almost all features (push) Has been skipped
Test suite / Test disabled tokenization (push) Has been skipped
Test suite / Run tests in debug (push) Failing after 12s
Test suite / Run Rustfmt (push) Successful in 2m14s
Test suite / Run Clippy (push) Successful in 12m4s
5125: Change the default max memory usage to 5% of the total memory r=ManyTheFish a=Kerollmops

After thorough testing, we found that giving 5% of the total available memory to allocate resident memory (caches and channels) is the best approach.

The main reason is that the new indexer is highly memory-map oriented, with LMDB, and reads the database while performing the indexation. So, by allowing the maximum amount of memory available to LMDB and the OS, it will perform the key-value store reads and all other indexation operations faster by keeping more pages hot in the cache. In #5124, we also sorted the entries to merge to improve the read speed of LMDB.

This is common in database management systems: Reading stuff on the disk is much faster when done in lexicographic order (the default sorted order of key values). The entries have a great chance of already being in the OS memory cache, as they were loaded in a previous read, and reading stuff on the disk is very slow compared to reading memory.

Co-authored-by: Kerollmops <clement@meilisearch.com>
2024-12-05 10:11:25 +00:00
cac355bfa7 Merge #5124
5124: Optimize Prefixes and Merges r=ManyTheFish a=Kerollmops

In this PR, we plan to optimize the read of LMDB to use read the entries in lexicographic order and better use the memory-mapping OS cache:

 - Optimize the prefix generation for word position docids (`@manythefish)`
 - Optimize the parallel merging of the caches to sort entries before merging the caches (`@kerollmops)`
 
## Benchmarks on 1cpu 2gb gpo3 (5k IOps)
 
Before on the tag meilisearch-v1.12.0-rc.3.

```
word_position_docids:merge_and_send_docids: 988s
compute_word_fst: 23.3s
word_pair_proximity_docids:merge_and_send_docids: 428s
compute_word_prefix_fid_docids:recompute_modified_prefixes: 76.3s
compute_word_prefix_position_docids:recompute_modified_prefixes:from_prefixes: 429s
```

After sorting the whole `HashMap`s in a `Vec` on this branch.

```
word_position_docids:merge_and_send_docids: 202s
compute_word_fst: 20.4s
word_pair_proximity_docids:merge_and_send_docids: 427s
compute_word_prefix_fid_docids:recompute_modified_prefixes: 65.5s
compute_word_prefix_position_docids:recompute_modified_prefixes:from_prefixes: 62.5s
```

Co-authored-by: ManyTheFish <many@meilisearch.com>
Co-authored-by: Kerollmops <clement@meilisearch.com>
2024-12-05 09:35:52 +00:00
9020a50df8 Change the default max memory usage to 5% of the total memory 2024-12-05 10:14:46 +01:00
52843123d4 Clean up and remove the non-sorted merge_caches function 2024-12-05 10:03:05 +01:00
6298db5bea Merge #5113
5113: Fix the Minimum BBQueue channel threshold r=Kerollmops a=Kerollmops



Co-authored-by: Kerollmops <clement@meilisearch.com>
Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2024-12-05 09:01:02 +00:00
a003a0934a Merge #5121
Some checks failed
Test suite / Tests on ${{ matrix.os }} (macos-13) (push) Waiting to run
Test suite / Tests almost all features (push) Has been skipped
Test suite / Test disabled tokenization (push) Has been skipped
Test suite / Tests on ubuntu-20.04 (push) Failing after 11s
Test suite / Run tests in debug (push) Failing after 9s
Test suite / Tests on ${{ matrix.os }} (windows-2022) (push) Failing after 24s
Test suite / Run Rustfmt (push) Successful in 1m19s
Test suite / Run Clippy (push) Successful in 5m32s
5121: Make the tasks pulling timeout configurable r=dureuill a=Kerollmops



Co-authored-by: Kerollmops <clement@meilisearch.com>
2024-12-04 17:04:14 +00:00
3a11e39c01 Force max_memory to a min of 100MiB 2024-12-04 17:53:30 +01:00
5f896b1050 Fix geo when spilling 2024-12-04 17:51:12 +01:00
d0c4e6da6b Make clippy happy 2024-12-04 17:39:10 +01:00
2da5584bb5 Make the tasks pulling timeout configurable 2024-12-04 17:39:07 +01:00
b7eb802ae6 Merge #5120
5120: Add cross tasks r=Kerollmops a=ManyTheFish

Add 4 xtask bench workloads:
- `hackernews-add-new-documents`: adds new documents on a db already containing documents
- `hackernews-modify-facet-numbers`: modify filterable fields containing numbers of documents on a db already containing documents
- `hackernews-modify-facet-strings`: modify filterable fields containing strings of documents on a db already containing documents
- `hackernews-modify-searchables`: modify searchable fields of documents on a db already containing documents

Co-authored-by: ManyTheFish <many@meilisearch.com>
2024-12-04 16:16:57 +00:00
2e32d0474c Lexicographically sort all the map to merge 2024-12-04 17:05:11 +01:00
cb99ac6f7e Consume vec instead of draining 2024-12-04 17:00:22 +01:00
be411435f5 Use the merge_caches_alt function in the docids merging 2024-12-04 16:37:29 +01:00
29ef164530 Introduce a new semi ordered merge function 2024-12-04 16:33:35 +01:00
739c52a3cd Replace HashSets by BTreeSets for the prefixes 2024-12-04 16:16:48 +01:00
7a2af06b1e update the impacted snapshots 2024-12-04 15:52:24 +01:00
cb0c3a5aad stop adding one enqueued tasks to all unprioritized batches 2024-12-04 15:48:28 +01:00
8388698993 Fix dat hash 2024-12-04 15:09:10 +01:00
cbcf6c9ba3 make the processing tasks as processing in a batch 2024-12-04 14:48:48 +01:00
bf742d81cf add a test 2024-12-04 14:47:02 +01:00
7458f0386c fix asset name 2024-12-04 14:44:57 +01:00
fc1df5793c fix tests 2024-12-04 14:35:20 +01:00
3ded069042 Merge #5122
5122: Yield the BBQueue writing loop r=ManyTheFish a=Kerollmops

We prefer yielding to let the writing thread do its job instead of spin looping.

Co-authored-by: Kerollmops <clement@meilisearch.com>
2024-12-04 13:33:51 +00:00
261d2ceb06 Yield the BBQueue writer instead of spin looping 2024-12-04 14:16:40 +01:00
1a17e2e572 fix formating 2024-12-04 13:57:06 +01:00
5b8cd68abe Merge #5110
5110: Increase margin on deletion of task r=dureuill a=irevoire

# Pull Request

## Related issue
Fixes https://github.com/meilisearch/meilisearch/issues/5077

## What does this PR do?
- Increase the margin we keep to enqueue task deletion

The issue was that we had not enough space on the reserved memory to write both the batch and the deletion task we just enqueued.
We could fix it only for this test as it’s not an issue in production where we have 10GiB of margin, but I thought it wasn’t a bad idea either to increase our margin a bit since we’re effectively writing more to lmdb.


Co-authored-by: Tamo <tamo@meilisearch.com>
2024-12-04 12:54:48 +00:00
5ce9acb0b9 Add workloads 2024-12-04 12:19:19 +01:00
953a82ca04 Add new error message 2024-12-04 11:15:29 +01:00
54341c2e80 Merge #5118
5118: Change the reserve and grant function to accept a closure r=ManyTheFish a=Kerollmops

This simplifies the usage of the grant and commits it at the right time, just after having written in it.

Co-authored-by: Kerollmops <clement@meilisearch.com>
2024-12-04 10:12:39 +00:00
96831ed9bb Send the WakeUp message if necessary in the reserve function 2024-12-04 11:03:01 +01:00
0459b1a242 Change the reserve and grant function to accept a closure 2024-12-04 10:32:25 +01:00
8ecb726683 Fix the minimun BBQueue channel threshold 2024-12-03 15:49:11 +01:00
297e72e262 Merge #5111
Some checks failed
Test suite / Tests on ${{ matrix.os }} (macos-13) (push) Waiting to run
Test suite / Tests on ${{ matrix.os }} (windows-2022) (push) Failing after 43s
Test suite / Tests on ubuntu-20.04 (push) Failing after 11s
Test suite / Tests almost all features (push) Has been skipped
Test suite / Test disabled tokenization (push) Has been skipped
Test suite / Run tests in debug (push) Failing after 9s
Test suite / Run Clippy (push) Successful in 7m18s
Test suite / Run Rustfmt (push) Successful in 1m32s
5111: Update BBQueue repo to point to the Meilisearch org r=curquiza a=Kerollmops

This PR updates the milli dependencies to make BBQueue point to the Meilisearch org repo.

Co-authored-by: Clément Renault <clement@meilisearch.com>
2024-12-03 14:27:04 +00:00
0ad2f57a92 Update bbqueue repo to point to the meilisearch org 2024-12-03 12:00:04 +01:00
b21d7aedf9 Merge #5029
Some checks failed
Test suite / Tests on ${{ matrix.os }} (macos-13) (push) Waiting to run
Run the indexing fuzzer / Setup the action (push) Successful in 1h5m5s
Publish binaries to GitHub release / Publish binary for ${{ matrix.os }} (meilisearch, meilisearch-macos-amd64, macos-13) (push) Waiting to run
Publish binaries to GitHub release / Publish binary for macOS silicon (meilisearch-macos-apple-silicon, aarch64-apple-darwin) (push) Waiting to run
Indexing bench (push) / Run and upload benchmarks (push) Has been cancelled
Benchmarks of indexing (push) / Run and upload benchmarks (push) Has been cancelled
Benchmarks of search for geo (push) / Run and upload benchmarks (push) Has been cancelled
Benchmarks of search for songs (push) / Run and upload benchmarks (push) Has been cancelled
Benchmarks of search for Wikipedia articles (push) / Run and upload benchmarks (push) Has been cancelled
Create issue to upgrade dependencies / create-issue (push) Failing after 13s
Look for flaky tests / flaky (push) Failing after 9s
Test suite / Tests on ${{ matrix.os }} (windows-2022) (push) Failing after 12s
Test suite / Tests on ubuntu-20.04 (push) Failing after 11s
Test suite / Tests almost all features (push) Failing after 8s
Test suite / Test disabled tokenization (push) Failing after 7s
Test suite / Run tests in debug (push) Failing after 11s
Test suite / Run Rustfmt (push) Successful in 1m53s
Test suite / Run Clippy (push) Successful in 6m7s
Publish binaries to GitHub release / Check the version validity (push) Successful in 9s
Publish binaries to GitHub release / Publish binary for Linux (push) Failing after 9s
Publish binaries to GitHub release / Publish binary for ${{ matrix.os }} (meilisearch.exe, meilisearch-windows-amd64.exe, windows-2022) (push) Failing after 21s
Publish binaries to GitHub release / Publish binary for aarch64 (meilisearch-linux-aarch64, aarch64-unknown-linux-gnu) (push) Failing after 10s
5029: Guide people to create custom reports on the benchboard r=Kerollmops a=Kerollmops



Co-authored-by: Clément Renault <clement@meilisearch.com>
2024-12-03 10:18:11 +00:00
71d53f413f increase the margin allowed to delete task 2024-12-03 11:07:03 +01:00
054622bd16 Merge #5094
5094: Implement a bbqueue channel between the extractors and the writer r=dureuill a=Kerollmops

This PR switches from a bounded crossbeam channel only with allocated entries for the communication between the extractors and the writer to a [BBQueue](https://github.com/jamesmunns/bbqueue)-based system with a Single Producer Single Consumer kind of Circular/Ring Buffers channel.

 - [x] Implement the BBQueue channel system...
 - [x] with a crossbeam channel to wake up the receiver.
 - [x] Manage the BBQueue allocated memory dynamically.
 - [x] Support content that doesn't fit in the bbqueues.

Co-authored-by: Clément Renault <clement@meilisearch.com>
2024-12-03 08:00:55 +00:00
e905a72d73 remove mimalloc on Windows 2024-12-02 18:13:56 +01:00
2e879c1df8 Merge #5109
Some checks failed
Test suite / Tests on ${{ matrix.os }} (macos-13) (push) Waiting to run
Test suite / Tests almost all features (push) Has been skipped
Test suite / Test disabled tokenization (push) Has been skipped
Test suite / Tests on ubuntu-20.04 (push) Failing after 11s
Test suite / Run tests in debug (push) Failing after 11s
Test suite / Tests on ${{ matrix.os }} (windows-2022) (push) Failing after 24s
Test suite / Run Rustfmt (push) Successful in 1m22s
Test suite / Run Clippy (push) Successful in 6m29s
5109: Fix autobatch r=dureuill a=dureuill

Fixes most SDK tests and flaky failures

Changes:

- Make sure that the settings are not autobatched with document operations, as the new indexer no longer supports this operating mode

Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2024-12-02 16:30:51 +00:00
d040aff101 Stop allocating 1GiB for documents 2024-12-02 16:30:14 +01:00
5e30731cad Merge #5107
5107: While spamming the batches route we could see a processing batch becoming missing and then finished, this commit ensures the batches goes from processing to finished directly r=irevoire a=irevoire

# Pull Request

## Related issue
Fixes the failed tests from this PR: https://github.com/meilisearch/meilisearch-js/pull/1775
See [this message](https://meilisearch.slack.com/archives/CD7Q2UKGB/p1732784680450749) [private link] for more context

## What does this PR do?
- Ensure we never enter a state where a processing batches (only existing in RAM) becomes « Not found » by removing the processing batches AFTER writing them to disk
- This should also theoretically avoid an issue where a task could go from processing to enqueued and then finished


Co-authored-by: Tamo <tamo@meilisearch.com>
2024-12-02 14:36:29 +00:00
beeb31ce41 Update crates/index-scheduler/src/lib.rs 2024-12-02 15:32:16 +01:00
057143214d Fix warnings 2024-12-02 14:42:31 +01:00
6a1d26a60c Update autobatching tests 2024-12-02 14:15:15 +01:00
d78f4666a0 Fix autobatching of documents and settings 2024-12-02 12:25:01 +01:00
a439fa3e1a While spamming the batches route we could see a processing batch becoming missing and then finished, this commit ensures the batches goes from processing to finished directly 2024-12-02 12:02:16 +01:00
767259be7e Prefer returning a abort indexation rather than throwing a panic 2024-12-02 11:53:42 +01:00
e9f34fb4b1 Make the frame consumer pulling fair 2024-12-02 11:49:01 +01:00
d5c07ef7b3 Manage key length conversion error correctly 2024-12-02 11:03:00 +01:00
5e218f3f4d Remove a sync_all (mark my words) 2024-12-02 11:03:00 +01:00
bcab61ab1d Do spurious wake ups on the receiver side 2024-12-02 11:03:00 +01:00
263c5a348e Move the spin looping for BBQueue frames into a dedicated function 2024-12-02 10:33:49 +01:00
2f1a9105b9 Merge #5104
Some checks failed
Test suite / Tests on ${{ matrix.os }} (macos-13) (push) Waiting to run
Test suite / Tests on ${{ matrix.os }} (windows-2022) (push) Failing after 23s
Test suite / Tests on ubuntu-20.04 (push) Failing after 13s
Test suite / Tests almost all features (push) Has been skipped
Test suite / Test disabled tokenization (push) Has been skipped
Test suite / Run tests in debug (push) Failing after 12s
Test suite / Run Clippy (push) Successful in 9m3s
Test suite / Run Rustfmt (push) Successful in 2m44s
Run the indexing fuzzer / Setup the action (push) Successful in 1h6m48s
Indexing bench (push) / Run and upload benchmarks (push) Has been cancelled
Benchmarks of indexing (push) / Run and upload benchmarks (push) Has been cancelled
Benchmarks of search for geo (push) / Run and upload benchmarks (push) Has been cancelled
Benchmarks of search for songs (push) / Run and upload benchmarks (push) Has been cancelled
Benchmarks of search for Wikipedia articles (push) / Run and upload benchmarks (push) Has been cancelled
5104: Bump xt0rted/pull-request-comment-branch from 2 to 3 r=curquiza a=dependabot[bot]

Bumps [xt0rted/pull-request-comment-branch](https://github.com/xt0rted/pull-request-comment-branch) from 2 to 3.
<details>
<summary>Release notes</summary>
<p><em>Sourced from <a href="https://github.com/xt0rted/pull-request-comment-branch/releases">xt0rted/pull-request-comment-branch's releases</a>.</em></p>
<blockquote>
<h2>v3.0.0</h2>
<ul>
<li>Updated node runtime from 16 to 20</li>
<li>Bumped <code>`@actions/core</code>` from 1.10.0 to 1.11.1</li>
<li>Bumped <code>`@actions/github</code>` from 5.1.1 to 6.0.0</li>
<li>Bumped <code>undici</code> from 5.28.3 to 5.28.4</li>
</ul>
</blockquote>
</details>
<details>
<summary>Changelog</summary>
<p><em>Sourced from <a href="https://github.com/xt0rted/pull-request-comment-branch/blob/main/CHANGELOG.md">xt0rted/pull-request-comment-branch's changelog</a>.</em></p>
<blockquote>
<h2><a href="https://github.com/xt0rted/pull-request-comment-branch/compare/v2.0.0...v3.0.0">3.0.0</a> - 2024-11-19</h2>
<ul>
<li>Updated node runtime from 16 to 20</li>
<li>Bumped <code>`@actions/core</code>` from 1.10.0 to 1.11.1</li>
<li>Bumped <code>`@actions/github</code>` from 5.1.1 to 6.0.0</li>
<li>Bumped <code>undici</code> from 5.28.3 to 5.28.4</li>
</ul>
<h2><a href="https://github.com/xt0rted/pull-request-comment-branch/compare/v1.4.0...v2.0.0">2.0.0</a> - 2023-03-29</h2>
<ul>
<li>Updated node runtime from 12 to 16</li>
<li>Removed deprecated <code>ref</code> and <code>sha</code> outputs. If you're using these then you should switch to <code>head_ref</code> and <code>head_sha</code> respectively.</li>
</ul>
<h2><a href="https://github.com/xt0rted/pull-request-comment-branch/compare/v1.3.0...v1.4.0">1.4.0</a> - 2022-10-23</h2>
<ul>
<li>Bumped <code>`@actions/core</code>` from 1.2.7 to 1.10.0</li>
<li>Bumped <code>`@actions/github</code>` from 4.0.0 to 5.1.1</li>
<li>Bumped <code>node-fetch</code> from 2.6.1 to 2.6.7</li>
</ul>
<h2><a href="https://github.com/xt0rted/pull-request-comment-branch/compare/v1.2.0...v1.3.0">1.3.0</a> - 2021-05-09</h2>
<ul>
<li>Bumped <code>`@actions/core</code>` from 1.2.5 to 1.2.7</li>
<li>Updated the <code>repo_token</code> input so it defaults to <code>GITHUB_TOKEN</code>. If you're already using this value you can remove this setting from your workflow.</li>
</ul>
<h2><a href="https://github.com/xt0rted/pull-request-comment-branch/compare/v1.1.0...v1.2.0">1.2.0</a> - 2020-09-09</h2>
<ul>
<li>Deprecated <code>ref</code> and <code>sha</code> outputs in favor of <code>head_ref</code> and <code>head_sha</code>.</li>
<li>Added <code>base_ref</code> and <code>base_sha</code> outputs</li>
<li>Bumped <code>`@actions/core</code>` from 1.2.2 to 1.2.5</li>
<li>Bumped <code>`@actions/github</code>` from 2.1.1 to 4.0.0</li>
</ul>
<h2><a href="https://github.com/xt0rted/pull-request-comment-branch/compare/v1.0.0...v1.1.0">1.1.0</a> - 2020-02-21</h2>
<ul>
<li>Bumped <code>`@actions/github</code>` from 2.1.0 to 2.1.1</li>
</ul>
<h2><a href="https://github.com/xt0rted/pull-request-comment-branch/releases/tag/v1.0.0">1.0.0</a> - 2020-02-09</h2>
<ul>
<li>Initial release</li>
</ul>
</blockquote>
</details>
<details>
<summary>Commits</summary>
<ul>
<li><a href="e8b8daa837"><code>e8b8daa</code></a> Release v3.0.0</li>
<li><a href="bdedca277b"><code>bdedca2</code></a> v3.0.0</li>
<li><a href="4bff54f5df"><code>4bff54f</code></a> Merge pull request <a href="https://redirect.github.com/xt0rted/pull-request-comment-branch/issues/437">#437</a> from xt0rted/dependabot/npm_and_yarn/undici-5.28.4</li>
<li><a href="e0ea3daa0d"><code>e0ea3da</code></a> Update CHANGELOG.md</li>
<li><a href="3096af14cd"><code>3096af1</code></a> Bump undici from 5.28.3 to 5.28.4</li>
<li><a href="b7ffabdc5d"><code>b7ffabd</code></a> Merge pull request <a href="https://redirect.github.com/xt0rted/pull-request-comment-branch/issues/461">#461</a> from xt0rted/dependabot/npm_and_yarn/actions-e659d6d3f1</li>
<li><a href="6fc3c73d82"><code>6fc3c73</code></a> Update CHANGELOG.md</li>
<li><a href="20807fbbbc"><code>20807fb</code></a> Bump <code>`@​actions/core</code>` from 1.10.1 to 1.11.1 in the actions group</li>
<li><a href="8d51fb5346"><code>8d51fb5</code></a> Merge pull request <a href="https://redirect.github.com/xt0rted/pull-request-comment-branch/issues/463">#463</a> from xt0rted/dependabot/npm_and_yarn/typescript-5.6.3</li>
<li><a href="37c7636fab"><code>37c7636</code></a> Merge pull request <a href="https://redirect.github.com/xt0rted/pull-request-comment-branch/issues/462">#462</a> from xt0rted/dependabot/npm_and_yarn/vercel/ncc-0.38.3</li>
<li>Additional commits viewable in <a href="https://github.com/xt0rted/pull-request-comment-branch/compare/v2...v3">compare view</a></li>
</ul>
</details>
<br />


[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=xt0rted/pull-request-comment-branch&package-manager=github_actions&previous-version=2&new-version=3)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores)

You can trigger a rebase of this PR by commenting ``@dependabot` rebase`.

[//]: # (dependabot-automerge-start)
[//]: # (dependabot-automerge-end)

---

<details>
<summary>Dependabot commands and options</summary>
<br />

You can trigger Dependabot actions by commenting on this PR:
- ``@dependabot` rebase` will rebase this PR
- ``@dependabot` recreate` will recreate this PR, overwriting any edits that have been made to it
- ``@dependabot` merge` will merge this PR after your CI passes on it
- ``@dependabot` squash and merge` will squash and merge this PR after your CI passes on it
- ``@dependabot` cancel merge` will cancel a previously requested merge and block automerging
- ``@dependabot` reopen` will reopen this PR if it is closed
- ``@dependabot` close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually
- ``@dependabot` show <dependency name> ignore conditions` will show all of the ignore conditions of the specified dependency
- ``@dependabot` ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself)
- ``@dependabot` ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself)
- ``@dependabot` ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)


</details>

Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2024-12-02 09:28:38 +00:00
be7d2fbe63 Move the EntryHeader up in the file and document the safety related to the size 2024-12-02 10:19:11 +01:00
f7f9a131e4 Improve copying bytes into aligned memory area 2024-12-02 10:15:58 +01:00
5df5eb2db2 Clarify a method name 2024-12-02 10:10:48 +01:00
30eb0e5b5b Rename recv and read methods to recv_action and recv_frame 2024-12-02 10:08:01 +01:00
5b860cb989 Fix english in the doc 2024-12-02 10:06:35 +01:00
76d0623b11 Reduce the number of unwraps 2024-12-02 10:05:06 +01:00
db4eaf4d2d Rename serialize_into into serialize_into_writer 2024-12-02 10:03:27 +01:00
13f21206a6 Call the serialize_into_writer method from the serialize_into one 2024-12-02 10:03:01 +01:00
27bb591331 Bump xt0rted/pull-request-comment-branch from 2 to 3
Bumps [xt0rted/pull-request-comment-branch](https://github.com/xt0rted/pull-request-comment-branch) from 2 to 3.
- [Release notes](https://github.com/xt0rted/pull-request-comment-branch/releases)
- [Changelog](https://github.com/xt0rted/pull-request-comment-branch/blob/main/CHANGELOG.md)
- [Commits](https://github.com/xt0rted/pull-request-comment-branch/compare/v2...v3)

---
updated-dependencies:
- dependency-name: xt0rted/pull-request-comment-branch
  dependency-type: direct:production
  update-type: version-update:semver-major
...

Signed-off-by: dependabot[bot] <support@github.com>
2024-12-01 17:52:21 +00:00
14ee7aa84c Make sure the BBQueue is at least 50 MiB 2024-11-28 18:02:48 +01:00
8a35cd1743 Adjust the BBQueue buffers to use 2% instead of 10% 2024-11-28 16:00:15 +01:00
8d33af1dff Merge #5102
Some checks failed
Test suite / Tests almost all features (push) Has been skipped
Test suite / Test disabled tokenization (push) Has been skipped
Test suite / Tests on ubuntu-20.04 (push) Failing after 24s
Test suite / Tests on ${{ matrix.os }} (windows-2022) (push) Failing after 28s
Test suite / Run tests in debug (push) Failing after 28s
Test suite / Run Rustfmt (push) Successful in 3m52s
Test suite / Run Clippy (push) Successful in 9m8s
Test suite / Tests on ${{ matrix.os }} (macos-13) (push) Has been cancelled
5102: Update mini-dashboard to v0.2.16 version r=curquiza a=curquiza

Fixes https://github.com/meilisearch/meilisearch/issues/5093

Fixes this bug: https://github.com/meilisearch/mini-dashboard/issues/563

Co-authored-by: curquiza <clementine@meilisearch.com>
2024-11-28 14:57:27 +00:00
3c7ac093d3 Take the BBQueue capacity into account in the max memory 2024-11-28 15:43:14 +01:00
d49d127863 Merge #5101
5101: Fix index settings opt out r=Kerollmops a=ManyTheFish

# Pull Request

## Related issue
Fixes #5099 

## What does this PR do?
- Refactor the settings implementation ensuring the routes are configured
- Add a test checking if all the routes are tested
- Refactor the tests to ease the modifications


Co-authored-by: ManyTheFish <many@meilisearch.com>
2024-11-28 14:23:33 +00:00
b57dd5c58e Remove the Vector variant and use the Vectors 2024-11-28 15:20:43 +01:00
90b428a8c3 Apply change requests 2024-11-28 15:16:13 +01:00
096a28656e Fix a bug around deleting all the vectors of a doc 2024-11-28 15:15:06 +01:00
3dc87f5baa Update mini-dashboard to v0.2.16 version 2024-11-28 14:33:05 +01:00
cc4bd54669 Correctly construct the Embeddings struct 2024-11-28 13:53:25 +01:00
5383f41bba Polish test_setting_routes! 2024-11-28 12:04:21 +01:00
58eab9a018 Send large payload through crossbeam 2024-11-28 12:01:06 +01:00
9f36ffcbdb Polish make_setting_routes! 2024-11-28 11:44:09 +01:00
68c4717e21 Change the settings tests and macros to avoid oversights 2024-11-28 11:34:35 +01:00
5c488e20cc Send the geo rtree through crossbeam channel 2024-11-27 18:03:45 +01:00
da650f834e Plug the NoPanicThreadPool in the tests and benchmarks 2024-11-27 17:04:49 +01:00
e83534a430 Fix the indexer::index to correctly use the rayon::ThreadPool 2024-11-27 16:27:43 +01:00
98d4a2909e Fix the way we spawn the rayon threadpool 2024-11-27 16:05:44 +01:00
a514ce472a Make clippy happy 2024-11-27 14:59:04 +01:00
cc63802115 Modify and return the IndexEmbeddings to write them later 2024-11-27 14:58:03 +01:00
acec45ad7c Send a WakeUp when writing data in the BBQueue buffers 2024-11-27 14:33:23 +01:00
08d6413365 Fix result types 2024-11-27 14:32:42 +01:00
70802eb7c7 Fix most issues with the lifetimes 2024-11-27 14:32:42 +01:00
6ac5b3b136 Finish most of the channels types 2024-11-27 14:32:26 +01:00
e1e76f39d0 Clean up dependencies 2024-11-27 14:30:34 +01:00
2094ce8a9a Move the arroy building after the writing loop 2024-11-27 14:30:33 +01:00
8442db8101 Implement mostly all senders 2024-11-27 14:16:35 +01:00
79671c9faa Implement a first version of the bbqueue channels 2024-11-27 14:15:00 +01:00
a2f64f6552 Merge #5095
Some checks failed
Test suite / Tests on ${{ matrix.os }} (macos-13) (push) Waiting to run
Test suite / Tests almost all features (push) Has been skipped
Test suite / Test disabled tokenization (push) Has been skipped
Test suite / Tests on ubuntu-20.04 (push) Failing after 13s
Test suite / Run tests in debug (push) Failing after 12s
Test suite / Tests on ${{ matrix.os }} (windows-2022) (push) Failing after 40s
Test suite / Run Rustfmt (push) Successful in 1m46s
Test suite / Run Clippy (push) Successful in 9m55s
5095: Span to measure the part of db writes that is after the merge/extraction r=curquiza a=dureuill



Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2024-11-27 11:10:00 +00:00
fde2e0691c Merge #5098
5098: Update charabia v0.9.2 r=dureuill a=ManyTheFish

# Pull Request

## Related issue
Fixes #5097

Co-authored-by: ManyTheFish <many@meilisearch.com>
2024-11-27 10:28:04 +00:00
18a9af353c Update Charabia version to v0.9.2 2024-11-27 11:12:08 +01:00
aae0dc715d Merge #5063
5063: Fix pagination when embedding fails r=Kerollmops a=dureuill

# Pull Request

## Related issue
Fixes https://github.com/meilisearch/meilisearch/issues/5045

## What does this PR do?
- Use `return_keyword_results` function when embedding fails


Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2024-11-27 09:13:28 +00:00
d0b2c0a523 Merge #5091
Some checks failed
Test suite / Tests on ${{ matrix.os }} (macos-13) (push) Waiting to run
Test suite / Tests almost all features (push) Has been skipped
Test suite / Test disabled tokenization (push) Has been skipped
Test suite / Tests on ubuntu-20.04 (push) Failing after 11s
Test suite / Run tests in debug (push) Failing after 10s
Test suite / Tests on ${{ matrix.os }} (windows-2022) (push) Failing after 39s
Test suite / Run Rustfmt (push) Successful in 1m38s
Test suite / Run Clippy (push) Successful in 23m11s
5091: Settings opt out r=Kerollmops a=ManyTheFish

# Pull Request

Related PRD: https://www.notion.so/meilisearch/API-usage-Settings-to-opt-out-indexing-features-fff4b06b651f8108ade3f858aeb16b14?pvs=4

## Related issue
Fixes #4979 

- [x] Add setting opt-out
- [x] Add analytics
- [x] Add tests


Co-authored-by: ManyTheFish <many@meilisearch.com>
Co-authored-by: Many the fish <many@meilisearch.com>
2024-11-26 15:50:28 +00:00
2e896f30a5 Fix PR comments 2024-11-26 16:06:33 +01:00
8f57b4fdf4 Span to measure the part of db writes that is after the merge/extraction 2024-11-26 14:46:36 +01:00
f014e78684 Update crates/milli/src/index.rs
Co-authored-by: Clément Renault <clement@meilisearch.com>
2024-11-26 14:46:01 +01:00
9008ecda3d Update crates/meilisearch-types/src/settings.rs
Co-authored-by: Clément Renault <clement@meilisearch.com>
2024-11-26 14:44:24 +01:00
d7bcfb2d19 fix clippy 2024-11-26 14:04:16 +01:00
fb66fec398 Merge #5092
Some checks failed
Test suite / Tests on ${{ matrix.os }} (macos-13) (push) Waiting to run
Test suite / Tests almost all features (push) Has been skipped
Test suite / Test disabled tokenization (push) Has been skipped
Test suite / Tests on ubuntu-20.04 (push) Failing after 12s
Test suite / Run tests in debug (push) Failing after 11s
Test suite / Tests on ${{ matrix.os }} (windows-2022) (push) Failing after 23s
Test suite / Run Rustfmt (push) Successful in 1m41s
Test suite / Run Clippy (push) Successful in 5m36s
5092: Precise spans for new indexer r=dureuill a=dureuill

- Separate extract and merge spans
- Add span around commit

Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2024-11-26 10:59:40 +00:00
fa15be5bc4 Add span around commit 2024-11-26 09:45:48 +01:00
aa460819a7 Add more precise spans 2024-11-26 09:45:36 +01:00
e241f91285 Merge #5062
5062: Fix bugs for v1.12 r=Kerollmops a=ManyTheFish

# Pull Request

## Related issue
Fixes #4984
Fixes https://github.com/meilisearch/meilisearch/issues/4974
Fixes [SDK test](https://github.com/meilisearch/meilisearch/actions/runs/11886701996/job/33118278794)
## What does this PR do?
- add 3 tests
- fix bugs

Co-authored-by: ManyTheFish <many@meilisearch.com>
2024-11-26 08:10:50 +00:00
d66dc363ed Test and implement settings opt-out 2024-11-25 18:23:22 +01:00
5560452ef9 Merge #5089
5089: Improve error handling when writing into LMDB r=dureuill a=Kerollmops

This PR exposes two new internal error variants: `StoreDelete` and `StorePut`. So that the error messages are better when we fail at writing into LMDB.

Related to #5078

Co-authored-by: Clément Renault <clement@meilisearch.com>
2024-11-25 16:19:41 +00:00
d9df7e00e1 Merge #5090
5090: Use the published crates versions r=dureuill a=Kerollmops

This PR uses the published versions of the obkv, grenad, and roaring crates in milli and Meilisearch.

Related to #5078.


Co-authored-by: Clément Renault <clement@meilisearch.com>
2024-11-25 15:33:55 +00:00
b4fb2dabd4 Use the grenad rayon feature 2024-11-25 16:31:21 +01:00
5606679c53 Use the obkv and grenad crates.io versions 2024-11-25 16:24:59 +01:00
a3103f347e Fix the facet f64 database name 2024-11-25 16:05:31 +01:00
25aac45fc7 Expose better error messages 2024-11-25 15:54:43 +01:00
dd76eaaaec Merge #5076
Some checks failed
Test suite / Tests on ubuntu-20.04 (push) Failing after 11s
Test suite / Tests almost all features (push) Has been skipped
Test suite / Test disabled tokenization (push) Has been skipped
Test suite / Tests on ${{ matrix.os }} (windows-2022) (push) Failing after 24s
Test suite / Run tests in debug (push) Failing after 10s
Test suite / Run Clippy (push) Successful in 6m35s
Test suite / Run Rustfmt (push) Successful in 1m52s
Run the indexing fuzzer / Setup the action (push) Successful in 1h5m8s
Test suite / Tests on ${{ matrix.os }} (macos-13) (push) Has been cancelled
Indexing bench (push) / Run and upload benchmarks (push) Has been cancelled
Benchmarks of indexing (push) / Run and upload benchmarks (push) Has been cancelled
Benchmarks of search for geo (push) / Run and upload benchmarks (push) Has been cancelled
Benchmarks of search for songs (push) / Run and upload benchmarks (push) Has been cancelled
Benchmarks of search for Wikipedia articles (push) / Run and upload benchmarks (push) Has been cancelled
5076: Update version for the next release (v1.12.0) in Cargo.toml r=curquiza a=meili-bot

⚠️ This PR is automatically generated. Check the new version is the expected one and Cargo.lock has been updated before merging.

Co-authored-by: curquiza <curquiza@users.noreply.github.com>
2024-11-21 17:51:32 +00:00
98a785b0d7 Merge #5080
Some checks failed
Indexing bench (push) / Run and upload benchmarks (push) Waiting to run
Benchmarks of indexing (push) / Run and upload benchmarks (push) Waiting to run
Benchmarks of search for geo (push) / Run and upload benchmarks (push) Waiting to run
Benchmarks of search for songs (push) / Run and upload benchmarks (push) Waiting to run
Benchmarks of search for Wikipedia articles (push) / Run and upload benchmarks (push) Waiting to run
Run the indexing fuzzer / Setup the action (push) Successful in 1h5m43s
Test suite / Tests on ${{ matrix.os }} (macos-13) (push) Waiting to run
Publish binaries to GitHub release / Publish binary for ${{ matrix.os }} (meilisearch, meilisearch-macos-amd64, macos-13) (push) Waiting to run
Publish binaries to GitHub release / Publish binary for macOS silicon (meilisearch-macos-apple-silicon, aarch64-apple-darwin) (push) Waiting to run
Look for flaky tests / flaky (push) Failing after 21s
Test suite / Tests on ubuntu-20.04 (push) Failing after 10s
Test suite / Tests on ${{ matrix.os }} (windows-2022) (push) Failing after 22s
Test suite / Tests almost all features (push) Failing after 7s
Test suite / Test disabled tokenization (push) Failing after 7s
Test suite / Run tests in debug (push) Failing after 9s
Test suite / Run Rustfmt (push) Successful in 1m24s
Test suite / Run Clippy (push) Successful in 6m14s
Publish binaries to GitHub release / Check the version validity (push) Successful in 7s
Publish binaries to GitHub release / Publish binary for Linux (push) Failing after 9s
Publish binaries to GitHub release / Publish binary for ${{ matrix.os }} (meilisearch.exe, meilisearch-windows-amd64.exe, windows-2022) (push) Failing after 19s
Publish binaries to GitHub release / Publish binary for aarch64 (meilisearch-linux-aarch64, aarch64-unknown-linux-gnu) (push) Failing after 9s
5080: Fix getting a single batch through the GET route r=Kerollmops a=dureuill

# Pull Request

## Related issue
Fixes a bug where getting a single batch does not work

Related to #5070 


fix by `@Kerollmops` 

Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2024-11-21 17:08:46 +00:00
ba7500998e Fix getting a single batch through the GET route 2024-11-21 17:59:31 +01:00
19e6f675b3 Merge #4900
4900: Indexer edition 2024 r=Kerollmops a=dureuill

This PR is implementing the indexer edition 2024, largely inspired by [the ideas from this blog post](https://blog.kerollmops.com/meilisearch-is-too-slow).

Fixes https://github.com/meilisearch/meilisearch/issues/4985

## Features
- Stream-first approach to reading documents.
- Minimum disk write operations.
- RAM usage-first approach to avoid modifying common bitmaps on disk but in memory.
- Reduced LMDB fragmentation by writing entries only once...
- ...computing the final version of the entries in parallel...
- ...and storing them in write-optimized data structures before sending them to the BTree (LMDB).
- Indexing in multiple transactions to improve large dataset support (dumps).


Co-authored-by: ManyTheFish <many@meilisearch.com>
Co-authored-by: Clément Renault <clement@meilisearch.com>
Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2024-11-21 16:19:10 +00:00
323ecbb885 Add span on document operation 2024-11-21 17:01:10 +01:00
ffb60cb885 Add comment explaining why we fixed the version of insta 2024-11-21 16:56:56 +01:00
dcc3caef0d Remove TopLevelMap 2024-11-21 16:56:46 +01:00
221e547e86 Slight changes 2024-11-21 16:47:44 +01:00
61d0615253 Document the geo point extractor 2024-11-21 16:47:08 +01:00
5727e00374 Remove useless geo skipped 2024-11-21 16:47:08 +01:00
9b60843831 Remove commented lines 2024-11-21 16:47:07 +01:00
36962b943b First batch of PR comment 2024-11-21 16:38:11 +01:00
32bcacefd5 Changes Document::len to Document::top_level_fields_count 2024-11-21 15:01:07 +01:00
4ed195426c remove unused stuff in global.rs 2024-11-21 15:01:07 +01:00
ff38f29981 Update crates/index-scheduler/src/batch.rs
Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2024-11-21 14:18:39 +01:00
5899861ff0 Update version for the next release (v1.12.0) in Cargo.toml 2024-11-21 11:21:18 +00:00
94b260fd25 Remove orphan span 2024-11-21 12:12:07 +01:00
03ab6b39e7 Revert the change in run count for movies workload 2024-11-21 11:17:34 +01:00
ab2c83f868 Use the disk less when computing prefixes 2024-11-21 10:45:37 +01:00
9a08757a70 Merge #5070
Some checks failed
Test suite / Tests on ubuntu-20.04 (push) Failing after 12s
Test suite / Tests on ${{ matrix.os }} (windows-2022) (push) Failing after 11s
Test suite / Tests almost all features (push) Has been skipped
Test suite / Test disabled tokenization (push) Has been skipped
Test suite / Run tests in debug (push) Failing after 10s
Test suite / Run Clippy (push) Successful in 6m18s
Test suite / Run Rustfmt (push) Successful in 1m34s
Indexing bench (push) / Run and upload benchmarks (push) Waiting to run
Benchmarks of indexing (push) / Run and upload benchmarks (push) Waiting to run
Benchmarks of search for geo (push) / Run and upload benchmarks (push) Waiting to run
Benchmarks of search for songs (push) / Run and upload benchmarks (push) Waiting to run
Benchmarks of search for Wikipedia articles (push) / Run and upload benchmarks (push) Waiting to run
Run the indexing fuzzer / Setup the action (push) Successful in 1h4m33s
Test suite / Tests on ${{ matrix.os }} (macos-13) (push) Has been cancelled
5070: Improve the details and stats of the current batch processing r=Kerollmops a=irevoire

Small improvement we missed over https://github.com/meilisearch/meilisearch/pull/5060

The current batch processing had empty details and stats.

Co-authored-by: Tamo <tamo@meilisearch.com>
2024-11-20 16:56:01 +00:00
1f9692cd04 Increase map size for tests 2024-11-20 17:52:21 +01:00
1e694ae432 improve the count of the number of tasks in a batch 2024-11-20 17:48:26 +01:00
71807cac6d makes clippy happy 2024-11-20 17:40:58 +01:00
21a2264782 improve the details and stats of the current batch processing 2024-11-20 17:25:55 +01:00
bda2b41d11 update snaps after merge 2024-11-20 17:08:30 +01:00
6e6acfcf1b Merge branch 'main' into indexer-edition-2024 2024-11-20 16:59:58 +01:00
e0864f1b21 Separate side effect and debug asserts 2024-11-20 16:25:17 +01:00
a38344acb3 Replace eprintlns by tracing 2024-11-20 15:29:51 +01:00
4d616f8794 Parse every attributes and filter before tokenization 2024-11-20 15:15:25 +01:00
ff9c92c409 rename documents -> substep 2024-11-20 15:12:02 +01:00
8380ddbdcd Fix progress of into_changes 2024-11-20 15:10:09 +01:00
d4d8becfa7 Merge #5060
Some checks failed
Indexing bench (push) / Run and upload benchmarks (push) Waiting to run
Benchmarks of indexing (push) / Run and upload benchmarks (push) Waiting to run
Benchmarks of search for geo (push) / Run and upload benchmarks (push) Waiting to run
Benchmarks of search for songs (push) / Run and upload benchmarks (push) Waiting to run
Benchmarks of search for Wikipedia articles (push) / Run and upload benchmarks (push) Waiting to run
Publish binaries to GitHub release / Check the version validity (push) Successful in 11s
Publish binaries to GitHub release / Publish binary for ${{ matrix.os }} (meilisearch, meilisearch-macos-amd64, macos-13) (push) Waiting to run
Publish binaries to GitHub release / Publish binary for macOS silicon (meilisearch-macos-apple-silicon, aarch64-apple-darwin) (push) Waiting to run
Publish binaries to GitHub release / Publish binary for ${{ matrix.os }} (meilisearch.exe, meilisearch-windows-amd64.exe, windows-2022) (push) Failing after 21s
Publish binaries to GitHub release / Publish binary for Linux (push) Failing after 12s
Publish binaries to GitHub release / Publish binary for aarch64 (meilisearch-linux-aarch64, aarch64-unknown-linux-gnu) (push) Failing after 10s
Run the indexing fuzzer / Setup the action (push) Successful in 1h5m1s
Test suite / Tests on ubuntu-20.04 (push) Failing after 12s
Test suite / Tests on ${{ matrix.os }} (macos-13) (push) Waiting to run
Test suite / Tests almost all features (push) Failing after 9s
Test suite / Test disabled tokenization (push) Failing after 8s
Test suite / Run tests in debug (push) Failing after 10s
Test suite / Tests on ${{ matrix.os }} (windows-2022) (push) Failing after 40s
Test suite / Run Rustfmt (push) Successful in 1m28s
Test suite / Run Clippy (push) Successful in 5m29s
5060: Batch route r=Kerollmops a=irevoire

# Pull Request

See [usage](https://www.notion.so/meilisearch/Enhance-visibility-on-batched-tasks-1194b06b651f810b8fe0fab5d72846a8).

## Related issue
Fixes https://github.com/meilisearch/meilisearch/issues/4977

## What does this PR do?
- For more detailed information, see the PRD.
- Added a `batchUid` to the tasks (that's the cause of all the updates of the dumps):
  - For all enqueued tasks, it's set to `None`
  - For every other tasks it must be set to something
  - ⚠️ For all the tasks imported in a dump, the `batchUid` will be set to `None` as well.
- Add two new routes:
  - `GET /batches/:uid` - to query a batch by its id
  - `GET /batches` - to retrieve a list of batches. It accepts all the same query parameters that are available on the `GET /tasks` route
- Adds new databases to query the batches directly:
  - When doing a query against the batches, the rule of thumb is that we want to return a batch iif **at least one** task in it matches the provided filter.
  - We don't need a `canceledBy` batch specific database because we can just retrieve the task and if it's a `taskCancelation` retrieve its `batchUid`
- The task cancelation has been updated and simplified a bit:
  - Instead of updating the matching tasks on disk while processing the cancelation task, we instead retrieve the task and let the `tick` function do the work afterward.
  - In the `tick` function, we now have to take care of not missing any tasks
- All the tests applied to the tasks were duplicated and updated to works with the new batches routes
- The deletion of batches doesn't contain any tests because it's already tested in the deletion of tasks (and especially highlighted in the snapshots)


Currently, one part of the PRD is not implemented: it's the progress.

Co-authored-by: Tamo <tamo@meilisearch.com>
2024-11-20 14:07:48 +00:00
867138f166 Add SP to into_changes 2024-11-20 15:07:05 +01:00
567bd4538b Fxi the into_changes stop processing 2024-11-20 14:58:25 +01:00
84600a10d1 Add MSP to document_update.into_changes() 2024-11-20 14:53:37 +01:00
35bbe1c2a2 Add failing test on settings changes 2024-11-20 14:48:12 +01:00
7d64e8dbd3 Fix Windows compilation 2024-11-20 14:40:38 +01:00
ec06879d28 apply review changes 2024-11-20 14:40:36 +01:00
83d1f858c1 Update crates/index-scheduler/src/lib.rs
Co-authored-by: Clément Renault <clement@meilisearch.com>
2024-11-20 14:36:05 +01:00
cae8c89467 "fix" last warnings 2024-11-20 14:03:52 +01:00
a7ac590e9e implements the reverse query parameter for the batches 2024-11-20 13:29:52 +01:00
7cb8732b45 Introduce a new bincode internal error 2024-11-20 13:23:11 +01:00
8ad68dd708 stop leaking the update files of the canceled tasks 2024-11-20 13:17:54 +01:00
fe5d50969a Fix filed selector in extrators 2024-11-20 13:16:44 +01:00
56c7c5d5f0 Fix comments 2024-11-20 13:16:44 +01:00
4cdfdddd6d Fix one more 2024-11-20 13:16:43 +01:00
2afa33011a Fix tokenize_document 2024-11-20 13:16:43 +01:00
61feca1f41 More tests pass 2024-11-20 13:16:43 +01:00
f893b5153e Don't mark [""] as empty facet 2024-11-20 13:16:42 +01:00
ca779c21f9 facets: Handle boolean and skip empty strings 2024-11-20 13:16:42 +01:00
477077bdc2 Remove _vectors from fid map when there are no vectors in sight 2024-11-20 13:16:42 +01:00
b1f8aec348 Fix index_documents_check_exists_database 2024-11-20 13:16:41 +01:00
ba7f091db3 Use tokenizer on numbers and booleans 2024-11-20 13:16:41 +01:00
8049df125b Add depth to facet extraction so that null inside an array doesn't mark the entire field as null 2024-11-20 13:16:40 +01:00
50d1bd01df We no longer index geo lat and lng 2024-11-20 13:16:40 +01:00
a28d4f5d0c Fix setup_search_index_with_criteria 2024-11-20 13:16:40 +01:00
fc14f4bc66 Attempt to fix setup_search_index_with_criteria 2024-11-20 13:16:39 +01:00
5f8a82d6f5 Improve test 2024-11-20 13:16:39 +01:00
fe04e51a49 One more 2024-11-20 13:16:38 +01:00
01b27e40ad Fix a bit of the placeholder search tests 2024-11-20 13:16:38 +01:00
8076d98544 Fix stats_should_not_return_deleted_documents 2024-11-20 13:16:37 +01:00
9e951baad5 One more test passing 2024-11-20 13:16:37 +01:00
52f2fc4c46 Fail in case of user error in tests 2024-11-20 13:16:37 +01:00
3957917e0b Correctly count indexed documents 2024-11-20 13:16:36 +01:00
651c30899e Allow fetching embedders from inside tests 2024-11-20 13:16:36 +01:00
2c7a7fe4e8 Count the number of documents correctly 2024-11-20 13:16:35 +01:00
23f0c2c29b Generate internal ids only when needed 2024-11-20 13:16:35 +01:00
6641c3f59b Remove all autogenerated tests 2024-11-20 13:16:34 +01:00
07a72824b7 Subfields of _vectors are no longer part of the fid map 2024-11-20 13:16:34 +01:00
000eb55c4e fix one 2024-11-20 13:16:34 +01:00
b4bf7ce9b0 Increase the number of readers as the indexer uses readers too 2024-11-20 13:16:33 +01:00
1aef0e4037 documents! macro accepts a single object again 2024-11-20 13:16:33 +01:00
32d0e50a75 Fix all the benchmark compilation errors 2024-11-20 13:16:32 +01:00
df5884b0c1 Fix settings test 2024-11-20 13:16:32 +01:00
9e0eb5ebb0 Removed some warnings 2024-11-20 13:16:32 +01:00
3cf1352ae1 Fix the benchmark tests 2024-11-20 13:16:31 +01:00
aba8a0e9e0 Fix some tests but not all of them 2024-11-20 13:16:31 +01:00
670aff5553 Remove useless Transform methods 2024-11-20 13:16:08 +01:00
7e379b3d14 remove useless prints 2024-11-20 12:27:12 +01:00
56eacd221f update the tests after the rebase 2024-11-20 10:54:38 +01:00
bdb51a85fe now that the task cancelation shares their started at with all the tasks of their batch we don't need the trick of retrieving the previous batch anymore 2024-11-20 10:51:07 +01:00
b24a34830d fix the dump test -> the only change is that we now have a null batch_uid in all the tasks 2024-11-20 10:51:06 +01:00
e145d71a62 implements the two last TODOs 2024-11-20 10:51:06 +01:00
d9a4e69990 push a missing snapshot 2024-11-20 10:51:06 +01:00
b906e3ed70 improve the way we access the mutex 2024-11-20 10:51:06 +01:00
4abcd9c04e add some stats on the batches 2024-11-20 10:51:06 +01:00
229fa0f902 implements the batch details 2024-11-20 10:51:06 +01:00
5d10c2312b remove unused file 2024-11-20 10:51:06 +01:00
f1d38581e5 add the front end tests on the batches routes 2024-11-20 10:51:06 +01:00
62646af7b9 implements the automatic batch deletion 2024-11-20 10:51:06 +01:00
1fcb9526f5 fix the task cancelation 2024-11-20 10:51:06 +01:00
15eefa4fcc fixes a lot of small issue, the test about the cancellation is still failing 2024-11-20 10:51:05 +01:00
ad9763ffcd copy multiple task query tests to batches. Currently, they fails 2024-11-20 10:49:25 +01:00
d489f5635f add the mapping between the task and batches 2024-11-20 10:49:23 +01:00
a1251c3c83 Implements the get all batches route with filters working 2024-11-20 10:42:55 +01:00
6062914654 add the batch_id to the tasks 2024-11-20 10:42:54 +01:00
057fcb3993 Add indices field to _matchesPosition to specify where in an array a match comes from (#5005)
Some checks are pending
Indexing bench (push) / Run and upload benchmarks (push) Waiting to run
Benchmarks of indexing (push) / Run and upload benchmarks (push) Waiting to run
Benchmarks of search for geo (push) / Run and upload benchmarks (push) Waiting to run
Benchmarks of search for songs (push) / Run and upload benchmarks (push) Waiting to run
Benchmarks of search for Wikipedia articles (push) / Run and upload benchmarks (push) Waiting to run
Run the indexing fuzzer / Setup the action (push) Successful in 1h4m31s
* Remove unreachable code

* Add `indices` field to `MatchBounds`

For matches inside arrays, this field holds the indices of the array
elements that matched. For example, searching for `cat` inside
`{ "a": ["dog", "cat", "fox"] }` would return `indices: [1]`. For nested
arrays, this contains multiple indices, starting with the one for the
top-most array. For matches in fields without arrays, `indices` is not
serialized (does not exist) to save space.
2024-11-20 01:00:43 +01:00
41dbdd2d18 Fix filtered_placeholder_search_should_not_return_deleted_documents and word_scale_set_and_reset 2024-11-19 16:08:25 +01:00
bfefaf71c2 Progress displayed in logs 2024-11-19 09:32:52 +01:00
c782c09208 Move step to a dedicated mod and replace it with an enum 2024-11-18 18:22:13 +01:00
75943a5a9b Add TODO to remember replacing steps with an enum 2024-11-18 17:40:51 +01:00
c1d8ee2a8d Merge #5048
Some checks failed
Test suite / Tests almost all features (push) Has been skipped
Test suite / Test disabled tokenization (push) Has been skipped
Test suite / Tests on ubuntu-20.04 (push) Failing after 14s
Test suite / Run tests in debug (push) Failing after 24s
Test suite / Tests on ${{ matrix.os }} (windows-2022) (push) Failing after 57s
Test suite / Run Rustfmt (push) Successful in 1m36s
Test suite / Run Clippy (push) Successful in 6m8s
Indexing bench (push) / Run and upload benchmarks (push) Waiting to run
Benchmarks of indexing (push) / Run and upload benchmarks (push) Waiting to run
Benchmarks of search for geo (push) / Run and upload benchmarks (push) Waiting to run
Benchmarks of search for songs (push) / Run and upload benchmarks (push) Waiting to run
Benchmarks of search for Wikipedia articles (push) / Run and upload benchmarks (push) Waiting to run
Run the indexing fuzzer / Setup the action (push) Successful in 1h4m23s
Test suite / Tests on ${{ matrix.os }} (macos-13) (push) Has been cancelled
5048: Reverse the order of the task queue r=Kerollmops a=irevoire

# Pull Request

## Related issue
Fixes https://github.com/meilisearch/meilisearch/issues/5047

## What does this PR do?
- Provide a new parameter to reverse the order of the task queue
- Add tests
- Remove some unrelated tests that were duplicated in tests/tasks/mod.rs and tests/tasks/error.rs


Co-authored-by: Tamo <tamo@meilisearch.com>
2024-11-18 16:24:12 +00:00
04c38220ca Move MostlySend, ThreadLocal, FullySend to their own commit 2024-11-18 16:43:05 +01:00
5f93651cef fixes 2024-11-18 16:23:11 +01:00
510ca99996 Fixes #4974 2024-11-18 16:08:55 +01:00
8924d486db Add a test reproducing the bug 2024-11-18 16:08:55 +01:00
e0c3f3d560 Fix #4984 2024-11-18 16:08:53 +01:00
0a21d9bfb3 Fix double borrow of new fields id map 2024-11-18 15:56:01 +01:00
1f8b01a598 Fix snap since _vectors is no longer part of the field distributions 2024-11-18 12:50:59 +01:00
e736a74729 Remove infinite loop in import_vectors 2024-11-18 12:50:56 +01:00
e9d17136b2 Add deadline of 3 seconds to embedding requests made in the context of hybrid search 2024-11-18 12:15:11 +01:00
a05e448cf8 Add test 2024-11-18 12:15:11 +01:00
cd796b0f4b Fix SDK test 2024-11-18 11:46:00 +01:00
6570da3bcb Retry in case where the JSON deserialization fails 2024-11-18 11:33:09 +01:00
5b4c06c24c Plug the grenad max memory parameter 2024-11-18 11:28:04 +01:00
3a8051866a Use return_keyword_results function instead of returning raw keyword results when the embedder is broken 2024-11-18 11:17:15 +01:00
9150c8f052 Accept changes to vector format 2024-11-18 11:04:57 +01:00
c202f3dbe2 fix tests and revert change in behavior when primary_key_from_op != primary_key_from_db && index.is_empty() 2024-11-18 10:59:05 +01:00
677d7293f5 Fix a lot of primary key related tests 2024-11-18 10:59:05 +01:00
bd31ea2174 Check for at least one valid task after setting their statuses 2024-11-18 10:59:05 +01:00
83865d2ebd Expose intermediate errors when processing batches 2024-11-18 10:59:05 +01:00
72ba353498 reproduce sdk fail 2024-11-18 10:03:23 +01:00
4ff2b3c2ee Fix test on locales 2024-11-14 15:45:04 +01:00
91c58cfa38 Fix positional databases 2024-11-14 11:40:12 +01:00
9e8367f1e6 Move the rayon thread pool outside the extract method 2024-11-14 10:40:32 +01:00
0dd321afc7 reproduce #4984 2024-11-14 10:02:51 +01:00
0e3c5d91ab Document deletion test passes 2024-11-14 08:42:56 +01:00
695c2c6b99 Cosmetic fix 2024-11-14 08:42:39 +01:00
40dd25d6b2 Fix issue with Replace document method when adding and deleting a document in the same batch 2024-11-13 22:10:00 +01:00
8e5b1a3ec1 Compute the field distribution and convert _geo into an f64s 2024-11-13 17:44:05 +01:00
e627e182ce Fix facet strings 2024-11-13 17:43:02 +01:00
51b6293738 Add linear facet databases 2024-11-13 17:43:02 +01:00
b17896d899 Finialize the GeoExtractor 2024-11-13 17:43:02 +01:00
94fb55bb6f Merge #5049
Some checks failed
Test suite / Tests on ${{ matrix.os }} (macos-13) (push) Waiting to run
Test suite / Tests on ubuntu-20.04 (push) Failing after 59s
Test suite / Tests almost all features (push) Has been skipped
Test suite / Test disabled tokenization (push) Has been skipped
Test suite / Run tests in debug (push) Failing after 13s
Test suite / Tests on ${{ matrix.os }} (windows-2022) (push) Failing after 7m4s
Test suite / Run Clippy (push) Successful in 10m58s
Test suite / Run Rustfmt (push) Successful in 2m34s
Run the indexing fuzzer / Setup the action (push) Successful in 1h5m58s
Indexing bench (push) / Run and upload benchmarks (push) Has been cancelled
Benchmarks of indexing (push) / Run and upload benchmarks (push) Has been cancelled
Benchmarks of search for geo (push) / Run and upload benchmarks (push) Has been cancelled
Benchmarks of search for songs (push) / Run and upload benchmarks (push) Has been cancelled
Benchmarks of search for Wikipedia articles (push) / Run and upload benchmarks (push) Has been cancelled
5049: Fix the path used in the flaky tests CI r=irevoire a=Kerollmops

This PR fixes [the flaky tests CI](https://github.com/meilisearch/meilisearch/actions/runs/11741717787) path used.

Co-authored-by: Clément Renault <clement@meilisearch.com>
2024-11-13 10:26:50 +00:00
a01bc7b454 Fix error_document_field_limit_reached_in_one_document test 2024-11-13 10:34:54 +01:00
7accfea624 Don't short circuit when we encounter a semantic error while extracting fields and external docid 2024-11-13 10:33:59 +01:00
009709eace Fix the path used in the flaky tests CI 2024-11-13 09:52:10 +01:00
82dcaba6ca Fix test: somehow on main vectors where displayed even though retrieveVectors: false 2024-11-12 23:58:25 +01:00
cb1d6613dd Adjust snapshots 2024-11-12 23:26:30 +01:00
3b0cb5b487 Fix vector error messages 2024-11-12 23:26:16 +01:00
bfdcd1cf33 Space changes 2024-11-12 22:52:45 +01:00
1d13e804f7 Adjust test snapshots 2024-11-12 22:52:41 +01:00
c4e9f761e9 Emit better error messages when parsing vectors 2024-11-12 22:49:22 +01:00
8a6e61c77f InvalidVectorsEmbedderConf error takes a String rather than a deserr error 2024-11-12 22:47:57 +01:00
68bbf674c9 Make REST mock thread independent 2024-11-12 16:31:31 +01:00
980921e078 Vector fixes 2024-11-12 16:31:22 +01:00
1fcd5f091e Remove progress from task 2024-11-12 12:23:13 +01:00
6094bb299a Fix user_provided vectors 2024-11-12 10:15:55 +01:00
bef8fc6cf1 Fix hf embedder 2024-11-08 13:10:17 +01:00
e32677999f Adapt some snapshots 2024-11-08 00:06:33 +01:00
5185aa21b8 Know if your vectors are implicit when writing them back in documents + don't write empty _vectors 2024-11-08 00:05:36 +01:00
8a314ab81d Fix primary key fid order 2024-11-08 00:05:12 +01:00
4706a0eb49 Fix vector parsing 2024-11-07 23:26:20 +01:00
d97af4d8e6 fix field order of JSON documents 2024-11-07 22:36:52 +01:00
2eb1801e85 reverse the order of the task queue 2024-11-07 19:19:44 +01:00
a5d7ae23bd Merge #5044
5044: Adds new metrics to prometheus r=irevoire a=PedroTurik

not 100% confident in this solution, especially because i couldn't make the "Search Queue searches waiting" metric give me any value other than 0 with my local testing 😆. But i believe it solves the Issue.

# Pull Request

## Related issue
Fixes #4998 

## What does this PR do?
### Adds new metrics to prometheus;
- SearchQueue size, 
- SearchQueue searches running, 
- and Search Queue searches waiting.

## PR checklist
Please check if your PR fulfills the following requirements:
- [x] Does this PR fix an existing issue, or have you listed the changes applied in the PR description (and why they are needed)?
- [x] Have you read the contributing guidelines?
- [x] Have you made sure that the title is accurate and descriptive of the changes?

Co-authored-by: Pedro Turik Firmino <pedroturik@gmail.com>
2024-11-07 17:05:43 +00:00
1f5d801271 Fix crashes in facet search indexing 2024-11-07 17:22:30 +01:00
7864530589 Make the word prefix integer multi-threaded 2024-11-07 16:39:14 +01:00
03886d0012 Applies optimizations to formatted integration tests (#5043) 2024-11-07 15:58:55 +01:00
700757c01f Adding a new step 2024-11-07 15:32:04 +01:00
01f8f30a7a Fix indentation 2024-11-07 15:08:56 +01:00
0e4e9e866a Move the RefCellExt trait in a dedicated module 2024-11-07 11:36:09 +01:00
1477b81d38 Support cancelation in merge and send 2024-11-07 11:23:49 +01:00
c9f478bc45 Fix bbbul merger 2024-11-07 10:53:46 +01:00
b427b9e88f Merge #5025
5025: test: improve performance of get_documents.rs r=irevoire a=PedroTurik

# Pull Request

## Related issue
Fixes one item from #4840 

## What does this PR do?
- Applies the changes recommended on the issue for `meilisearch/tests/documents/get_documents.rs`

## PR checklist
Please check if your PR fulfills the following requirements:
- [x] Does this PR fix an existing issue, or have you listed the changes applied in the PR description (and why they are needed)?
- [x] Have you read the contributing guidelines?
- [x] Have you made sure that the title is accurate and descriptive of the changes?

Thank you so much for contributing to Meilisearch!


Co-authored-by: Pedro Turik Firmino <pedroturik@gmail.com>
2024-11-07 09:46:34 +00:00
39366a67c4 Top level fields don't return vector fields 2024-11-07 10:39:58 +01:00
e2138170ad some warning fix 2024-11-07 10:06:07 +01:00
03650e3217 Reverse order of computation 2024-11-07 09:39:46 +01:00
8b95f5ccc6 Adds new metrics to prometheus: SearchQueue size, SearchQueue searches running, and Search Queue searches waiting. 2024-11-06 15:37:16 -03:00
10f49f0d75 Post processing of the merge 2024-11-06 17:50:12 +01:00
ee03743355 Merge branch 'indexer-edition-2024' into indexer-edition-2024-doc-chunks 2024-11-06 15:50:53 +01:00
10feeb88f2 Merge branch 'main' into indexer-edition-2024 2024-11-06 15:19:18 +01:00
a9ecbf0b64 Use the Bbbul crate in the cache to better control memory 2024-11-06 14:40:14 +01:00
6b67f9fc4c Merge #5030
5030: Bump Swatinem/rust-cache from 2.7.1 to 2.7.5 r=curquiza a=dependabot[bot]

Bumps [Swatinem/rust-cache](https://github.com/swatinem/rust-cache) from 2.7.1 to 2.7.5.
<details>
<summary>Release notes</summary>
<p><em>Sourced from <a href="https://github.com/swatinem/rust-cache/releases">Swatinem/rust-cache's releases</a>.</em></p>
<blockquote>
<h2>v2.7.5</h2>
<h2>What's Changed</h2>
<ul>
<li>Upgrade checkout action from version 3 to 4 by <a href="https://github.com/carsten-wenderdel"><code>`@​carsten-wenderdel</code></a>` in <a href="https://redirect.github.com/Swatinem/rust-cache/pull/190">Swatinem/rust-cache#190</a></li>
<li>fix: usage of <code>deprecated</code> version of <code>node</code> by <a href="https://github.com/hamirmahal"><code>`@​hamirmahal</code></a>` in <a href="https://redirect.github.com/Swatinem/rust-cache/pull/197">Swatinem/rust-cache#197</a></li>
<li>Only run macOsWorkaround() on macOS by <a href="https://github.com/heksesang"><code>`@​heksesang</code></a>` in <a href="https://redirect.github.com/Swatinem/rust-cache/pull/206">Swatinem/rust-cache#206</a></li>
<li>Support Cargo.lock format cargo-lock v4 by <a href="https://github.com/NobodyXu"><code>`@​NobodyXu</code></a>` in <a href="https://redirect.github.com/Swatinem/rust-cache/pull/211">Swatinem/rust-cache#211</a></li>
</ul>
<h2>New Contributors</h2>
<ul>
<li><a href="https://github.com/carsten-wenderdel"><code>`@​carsten-wenderdel</code></a>` made their first contribution in <a href="https://redirect.github.com/Swatinem/rust-cache/pull/190">Swatinem/rust-cache#190</a></li>
<li><a href="https://github.com/hamirmahal"><code>`@​hamirmahal</code></a>` made their first contribution in <a href="https://redirect.github.com/Swatinem/rust-cache/pull/197">Swatinem/rust-cache#197</a></li>
<li><a href="https://github.com/heksesang"><code>`@​heksesang</code></a>` made their first contribution in <a href="https://redirect.github.com/Swatinem/rust-cache/pull/206">Swatinem/rust-cache#206</a></li>
</ul>
<p><strong>Full Changelog</strong>: <a href="https://github.com/Swatinem/rust-cache/compare/v2.7.3...v2.7.5">https://github.com/Swatinem/rust-cache/compare/v2.7.3...v2.7.5</a></p>
<h2>v2.7.3</h2>
<ul>
<li>Work around upstream problem that causes cache saving to hang for minutes.</li>
</ul>
<p><strong>Full Changelog</strong>: <a href="https://github.com/Swatinem/rust-cache/compare/v2.7.2...v2.7.3">https://github.com/Swatinem/rust-cache/compare/v2.7.2...v2.7.3</a></p>
<h2>v2.7.2</h2>
<h2>What's Changed</h2>
<ul>
<li>Update action runtime to <code>node20</code> by <a href="https://github.com/rhysd"><code>`@​rhysd</code></a>` in <a href="https://redirect.github.com/Swatinem/rust-cache/pull/175">Swatinem/rust-cache#175</a></li>
<li>Only key by <code>Cargo.toml</code> and <code>Cargo.lock</code> files of workspace members by <a href="https://github.com/max-heller"><code>`@​max-heller</code></a>` in <a href="https://redirect.github.com/Swatinem/rust-cache/pull/180">Swatinem/rust-cache#180</a></li>
</ul>
<h2>New Contributors</h2>
<ul>
<li><a href="https://github.com/rhysd"><code>`@​rhysd</code></a>` made their first contribution in <a href="https://redirect.github.com/Swatinem/rust-cache/pull/175">Swatinem/rust-cache#175</a></li>
<li><a href="https://github.com/max-heller"><code>`@​max-heller</code></a>` made their first contribution in <a href="https://redirect.github.com/Swatinem/rust-cache/pull/180">Swatinem/rust-cache#180</a></li>
</ul>
<p><strong>Full Changelog</strong>: <a href="https://github.com/Swatinem/rust-cache/compare/v2.7.1...v2.7.2">https://github.com/Swatinem/rust-cache/compare/v2.7.1...v2.7.2</a></p>
</blockquote>
</details>
<details>
<summary>Changelog</summary>
<p><em>Sourced from <a href="https://github.com/Swatinem/rust-cache/blob/master/CHANGELOG.md">Swatinem/rust-cache's changelog</a>.</em></p>
<blockquote>
<h1>Changelog</h1>
<h2>2.7.3</h2>
<ul>
<li>Work around upstream problem that causes cache saving to hang for minutes.</li>
</ul>
<h2>2.7.2</h2>
<ul>
<li>Only key by <code>Cargo.toml</code> and <code>Cargo.lock</code> files of workspace members.</li>
</ul>
</blockquote>
</details>
<details>
<summary>Commits</summary>
<ul>
<li><a href="82a92a6e8f"><code>82a92a6</code></a> 2.7.5</li>
<li><a href="598fe25fa1"><code>598fe25</code></a> update dependencies, rebuild</li>
<li><a href="8f842c2d45"><code>8f842c2</code></a> Support Cargo.lock format cargo-lock v4 (<a href="https://redirect.github.com/swatinem/rust-cache/issues/211">#211</a>)</li>
<li><a href="96a8d65dba"><code>96a8d65</code></a> Only run macOsWorkaround() on macOS (<a href="https://redirect.github.com/swatinem/rust-cache/issues/206">#206</a>)</li>
<li><a href="9bdad043e8"><code>9bdad04</code></a> fix: usage of <code>deprecated</code> version of <code>node</code> (<a href="https://redirect.github.com/swatinem/rust-cache/issues/197">#197</a>)</li>
<li><a href="f7a52f6914"><code>f7a52f6</code></a> &quot;add jsonpath test&quot;</li>
<li><a href="2bceda3912"><code>2bceda3</code></a> &quot;update dependencies&quot;</li>
<li><a href="640a22190e"><code>640a221</code></a> Upgrade checkout action from version 3 to 4 (<a href="https://redirect.github.com/swatinem/rust-cache/issues/190">#190</a>)</li>
<li><a href="1582741630"><code>1582741</code></a> update dependencies</li>
<li><a href="23bce251a8"><code>23bce25</code></a> 2.7.3</li>
<li>Additional commits viewable in <a href="https://github.com/swatinem/rust-cache/compare/v2.7.1...v2.7.5">compare view</a></li>
</ul>
</details>
<br />


[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=Swatinem/rust-cache&package-manager=github_actions&previous-version=2.7.1&new-version=2.7.5)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores)

You can trigger a rebase of this PR by commenting ``@dependabot` rebase`.

[//]: # (dependabot-automerge-start)
[//]: # (dependabot-automerge-end)

---

<details>
<summary>Dependabot commands and options</summary>
<br />

You can trigger Dependabot actions by commenting on this PR:
- ``@dependabot` rebase` will rebase this PR
- ``@dependabot` recreate` will recreate this PR, overwriting any edits that have been made to it
- ``@dependabot` merge` will merge this PR after your CI passes on it
- ``@dependabot` squash and merge` will squash and merge this PR after your CI passes on it
- ``@dependabot` cancel merge` will cancel a previously requested merge and block automerging
- ``@dependabot` reopen` will reopen this PR if it is closed
- ``@dependabot` close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually
- ``@dependabot` show <dependency name> ignore conditions` will show all of the ignore conditions of the specified dependency
- ``@dependabot` ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself)
- ``@dependabot` ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself)
- ``@dependabot` ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)


</details>

Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2024-11-06 12:59:36 +00:00
2e4d4b398d Bump Swatinem/rust-cache from 2.7.1 to 2.7.5
Bumps [Swatinem/rust-cache](https://github.com/swatinem/rust-cache) from 2.7.1 to 2.7.5.
- [Release notes](https://github.com/swatinem/rust-cache/releases)
- [Changelog](https://github.com/Swatinem/rust-cache/blob/master/CHANGELOG.md)
- [Commits](https://github.com/swatinem/rust-cache/compare/v2.7.1...v2.7.5)

---
updated-dependencies:
- dependency-name: Swatinem/rust-cache
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>
2024-11-06 12:57:04 +00:00
da59a043ba Fixes formatting issues 2024-11-06 09:55:48 -03:00
da4d47b5d0 Fixes formatting issues 2024-11-06 09:54:20 -03:00
0507f5d99b Merge #4928
4928: Make matches consider phrases as a single `Match` r=ManyTheFish a=flevi29

# Pull Request

## Related issue
Fixes #4732

## PR checklist
Please check if your PR fulfills the following requirements:
- [x] Does this PR fix an existing issue, or have you listed the changes applied in the PR description (and why they are needed)?
- [x] Have you read the contributing guidelines?
- [x] Have you made sure that the title is accurate and descriptive of the changes?

Thank you so much for contributing to Meilisearch!


Co-authored-by: F. Levi <55688616+flevi29@users.noreply.github.com>
2024-11-06 08:22:01 +00:00
8b260de5a0 Reimplement facet search and facetr level and put them in dedidcated functions 2024-11-05 16:46:43 +01:00
be2a7c70f2 Merge #5037
5037: Fix the benchmarks r=Kerollmops a=irevoire

# Pull Request

## Related issue
https://github.com/meilisearch/meilisearch/pull/5016 broke all benchmarks. This PR fix the benchmarks


Co-authored-by: Tamo <tamo@meilisearch.com>
2024-11-05 15:37:55 +00:00
33b1f54b41 Progress, in the task queue 2024-11-05 16:23:02 +01:00
ede086bc30 Merge #5034
5034: Upgrade from v1 10 to v1 11 r=irevoire a=irevoire

# Pull Request

## Related issue
Parts of https://github.com/meilisearch/meilisearch/issues/4978

## What does this PR do?
- Move the code around the offline upgrade to its own module with a file per version
- Fix the upgrade from v1.9 to v1.10 because I couldn’t make it work anymore. It now uses a specified format instead of relying on cargo to get the right set of feature
- ☝️ must be checked against docker
- Provide an update path from v1.10 to v1.11. Most of the code is boilerplate in meilitool, the real code is located here: 053807bf38/src/lib.rs (L161-L269)


Co-authored-by: Tamo <tamo@meilisearch.com>
2024-11-05 14:49:56 +00:00
7415ef7ff5 Update crates/meilitool/src/upgrade/v1_11.rs
Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2024-11-05 15:37:59 +01:00
a5d138ac34 use a tag while importing arroy instead of a loose branch or rev 2024-11-05 15:24:02 +01:00
0f74a93346 Update crates/meilitool/src/upgrade/v1_11.rs
Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2024-11-05 15:14:02 +01:00
e4993aa705 Update crates/meilitool/src/upgrade/mod.rs
Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2024-11-05 15:13:50 +01:00
66b7e0824e Update crates/meilitool/src/upgrade/mod.rs
Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2024-11-05 15:13:40 +01:00
f193c3a67c Update crates/meilitool/src/main.rs
Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2024-11-05 15:13:32 +01:00
9799812b27 fix the benchmarks 2024-11-05 15:08:01 +01:00
db55638714 Do not forget to recompute common prefixes 2024-11-05 11:26:46 +01:00
ad52c950ba Only run word pair proximity docids extraction if proximity_precision enables it 2024-11-05 11:08:47 +01:00
48ab898ca2 fix the datetime of v1.9 2024-11-05 10:30:53 +01:00
a5dc783ffa Merge with main branch 2024-11-05 10:56:17 +02:00
1b49b60486 Merge #5026
5026: test: improve performance of update_documents.rs  r=dureuill a=PedroTurik

# Pull Request

## Related issue
Fixes one item from #4840 

## What does this PR do?
- Applies the changes recommended on the issue for `meilisearch/tests/documents/update_documents.rs`

## PR checklist
Please check if your PR fulfills the following requirements:
- [x] Does this PR fix an existing issue, or have you listed the changes applied in the PR description (and why they are needed)?
- [x] Have you read the contributing guidelines?
- [x] Have you made sure that the title is accurate and descriptive of the changes?

Thank you so much for contributing to Meilisearch!


Co-authored-by: Pedro Turik Firmino <pedroturik@gmail.com>
2024-11-05 08:37:44 +00:00
d0b1ba20cb Improves usage of shared indexes 2024-11-04 17:26:50 -03:00
a1f228f662 remove the uneeded files after the rebase 2024-11-04 18:19:36 +01:00
99a9fde37f push back the removed files 2024-11-04 17:55:55 +01:00
106cc7fe3a fmt 2024-11-04 17:51:40 +01:00
4eef0cd332 fix the update from v1_9 to v1_10 by providing a custom datetime formatter myself 2024-11-04 17:47:10 +01:00
5f57306858 update the arroy version in meilitool 2024-11-04 17:47:10 +01:00
690eb42fc0 update the version of arroy 2024-11-04 17:47:10 +01:00
a9b61c8434 fix the version parsing and improve error handling 2024-11-04 17:47:10 +01:00
ddd03e9b37 implement the upgrade from v1.10 to v1.11 in meilitool 2024-11-04 17:47:10 +01:00
362836efb7 make an upgrade module where we'll be able to shove each version instead of putting everything in the same file 2024-11-04 17:47:10 +01:00
22229d3046 Merge #5022
5022: Briging changes from v1.11.0 back to main r=irevoire a=Kerollmops

Fixes https://github.com/meilisearch/meilisearch/issues/5035

...and fixing merge conflicts.

Co-authored-by: Tamo <tamo@meilisearch.com>
Co-authored-by: Louis Dureuil <louis@meilisearch.com>
Co-authored-by: meili-bors[bot] <89034592+meili-bors[bot]@users.noreply.github.com>
Co-authored-by: ManyTheFish <many@meilisearch.com>
Co-authored-by: curquiza <clementine@meilisearch.com>
2024-11-04 15:34:19 +00:00
186326fe40 update the macos version 2024-11-04 16:33:04 +01:00
cf6ad1ae5e Merge branch 'main' into tmp-release-v1.11.0 2024-11-04 16:14:44 +01:00
3658f57f93 Add progress 2024-11-04 15:10:40 +01:00
c79ca9679b Changes variable name to re-run CI 2024-11-02 18:25:33 -03:00
94a1f5a8ea First draft just for the commands 2024-10-31 16:30:05 +01:00
a77d5ea8c1 Pass embedders to documents 2024-10-30 14:03:29 +01:00
c9082130c8 support vectors or array of vectors 2024-10-30 13:50:51 +01:00
df5bc3c9fd Reintroduce vector errors 2024-10-30 10:55:57 +01:00
0f6a1dbce7 habemus field distribution 2024-10-30 10:06:46 +01:00
4ebedf4dc8 clippy fixes 2024-10-30 10:06:38 +01:00
b02a72c0c0 Applies optimizations to some integration tests 2024-10-29 19:30:11 -03:00
a934b0ac6a Applies optimizations to some integration tests 2024-10-29 18:49:06 -03:00
1075dd34bb Vectors 2024-10-29 17:43:36 +01:00
28274292d8 Merge #5021
5021: Update benchmarks to match the new crates subfolder r=dureuill a=Kerollmops



Co-authored-by: Clément Renault <clement@meilisearch.com>
2024-10-29 08:06:35 +00:00
7058959a46 Write into documents 2024-10-28 16:18:48 +01:00
9cbb2b066a WIP vector extraction 2024-10-28 14:23:54 +01:00
5efd70c251 Allow random access to fields in documents 2024-10-28 14:23:38 +01:00
65470e26e0 Document trait changes 2024-10-28 14:23:20 +01:00
bbb67ae0a8 todo channel 2024-10-28 14:23:02 +01:00
af9f96e2af Update older embedding 2024-10-28 14:22:45 +01:00
1960003805 Remove some warnings 2024-10-28 14:22:19 +01:00
2a91849660 Remove primary key from top id map 2024-10-28 14:21:50 +01:00
663deac236 Slight changes index scheduler 2024-10-28 14:21:39 +01:00
c8189e975c Add rendering based on document trait 2024-10-28 14:10:55 +01:00
9e7c455a01 GlobalFieldIdMap manages metadata 2024-10-28 14:09:48 +01:00
c22dc55694 Add embed_chunks_ref 2024-10-28 14:08:54 +01:00
50de3fba7b Update raw-collections 2024-10-28 14:07:23 +01:00
ee72f622c7 Update benchmarks to match the new crates subfolder 2024-10-28 14:06:46 +01:00
b0da626506 Merge #5016
5016: Hide code complexity into a subfolder r=Kerollmops a=Kerollmops

This PR moves the complexity and main code into a subfolder to make the main repository page more welcoming by reducing the number of visible files and showing the README earlier.

Co-authored-by: Clément Renault <clement@meilisearch.com>
2024-10-28 09:43:14 +00:00
3d29226a7f Merge pull request #5019 from meilisearch/indexer-edition-2024-bumpalo-in-extractors
Implement facet search extraction
2024-10-23 10:42:38 +02:00
f372ee505f Merge #5017
5017: Rollback the Meilisearch Kawaii logo r=curquiza a=Kerollmops

This PR reverts #4778 and brings back the official one. It's no longer the time to JOKE, OK !?

Co-authored-by: Clément Renault <clement@meilisearch.com>
2024-10-22 08:14:18 +00:00
3753f87fd8 Merge #5011
5011: Revamp analytics r=ManyTheFish a=irevoire

# Pull Request

## Related issue
Fixes https://github.com/meilisearch/meilisearch/issues/5009

## What does this PR do?
- Force every analytics to go through a trait that forces you to handle aggregation correcty
- Put the code to retrieve the `user-agent`, `timestamp` and `requests.total_received` in common between all aggregates, so there is no mistake
- Get rids of all the different channel for each kind of event in favor of an any map
- Ensure that we never [send empty event ever again](https://github.com/meilisearch/meilisearch/pull/5001)
- Merge all the sub-settings route into a global « Settings Updated » event.
- Fix: When using one of the three following feature, we were not sending any analytics IF they were set from the global route
  - /non-separator-tokens
  - /separator-tokens
  - /dictionary

Co-authored-by: Tamo <tamo@meilisearch.com>
2024-10-21 15:08:49 +00:00
89243f7df0 WIP vector extraction 2024-10-21 10:39:40 +02:00
9fe5122176 Fixup imports 2024-10-21 10:39:31 +02:00
aff8ca4397 Add raw versions of parsed vectors 2024-10-21 10:39:05 +02:00
1a3f4e719d Vector document trait 2024-10-21 10:38:21 +02:00
c278024709 Add vectors field and geo field to document trait 2024-10-21 10:37:40 +02:00
73e29ee155 EmbeddingSender stub 2024-10-21 10:35:56 +02:00
124b5c3df8 Update raw collections 2024-10-21 10:35:44 +02:00
60cc09abec Implement facet search exctraction 2024-10-21 09:28:49 +02:00
8ef8035bf2 Fix CI 2024-10-21 08:28:33 +02:00
3353bcd82d Revert "Change the Meilisearch logo to the kawaii version"
This reverts commit 13d1d78a2d.
2024-10-21 08:21:56 +02:00
9c1e54a2c8 Move crates under a sub folder to clean up the code 2024-10-21 08:18:43 +02:00
5675585fe8 move all the searches structures to new modules 2024-10-20 17:54:43 +02:00
af589c85ec reverse all the settings to keep the last one received instead of the first one received in case we receive the same setting multiple times 2024-10-20 17:40:31 +02:00
ac919df37d simplify the trait a bit more by getting rids of the downcast_aggregate method 2024-10-20 17:36:29 +02:00
73b5722896 rename the other parameter of the aggregate method to new to avoid confusion 2024-10-20 17:31:35 +02:00
c94679bde6 apply review comments 2024-10-20 17:24:12 +02:00
e51e6f902a Highlight partially cropped matches too 2024-10-19 13:42:02 +03:00
6c226a4580 Merge branch 'main' into change-matches-position-phrase-search 2024-10-17 21:25:42 +03:00
89e2d2b2b9 fix the doctest 2024-10-17 13:55:49 +02:00
3a7a20c716 remove the segment feature and always import segment 2024-10-17 11:21:14 +02:00
cd378e5bd2 Add chunking 2024-10-17 10:18:00 +02:00
fa1db6b721 fix the tests 2024-10-17 09:55:30 +02:00
1ab6fec903 send all experimental features in the info event including the runtime one 2024-10-17 09:49:21 +02:00
c1fcb2ebc6 add some warning 2024-10-17 09:43:11 +02:00
18ac4032aa Remove the experimental feature seen 2024-10-17 09:35:11 +02:00
d9115b74f0 move the analytics settings code to a dedicated file 2024-10-17 09:32:54 +02:00
0749633618 Don't sort in parallel in sorters of the new indexer 2024-10-17 09:30:18 +02:00
0fde49640a make clippy happy 2024-10-17 09:18:25 +02:00
4ee65d870e remove a lot of ununsed code 2024-10-17 09:14:34 +02:00
ef77c7699b add the required shared values between all the events and fix the timestamp 2024-10-17 09:06:23 +02:00
7382fb21e4 fix the main 2024-10-17 08:38:11 +02:00
e4ace98004 fix all the routes + move to a better version of mopa 2024-10-17 01:04:25 +02:00
aa7a34ffe8 make the aggregate method send 2024-10-17 00:43:34 +02:00
6728cfbfac fix the analytics 2024-10-17 00:38:18 +02:00
ea6883189e finish the analytics in all the routes 2024-10-16 21:17:06 +02:00
0647f75e6b Add borrow_mut_or_yield extension method 2024-10-16 17:36:41 +02:00
fdeb47fb54 implements all routes 2024-10-16 17:16:33 +02:00
e66fccc3f2 get rids of the analytics closure 2024-10-16 15:51:48 +02:00
73e87c152a rewrite most of the analytics especially the settings 2024-10-16 15:43:27 +02:00
86a0097311 Use bumpalo in word docids 2024-10-16 14:04:44 +02:00
c75de1f391 Remove TODO 2024-10-16 11:18:59 +02:00
198238687f Guess and retrieve primary key correctly in batch 2024-10-16 09:27:18 +02:00
f9a6c624a7 Put primary key, and use provided key in operation 2024-10-16 09:27:00 +02:00
017757004e Add PrimaryKey::new_or_insert 2024-10-16 09:26:18 +02:00
75b2f22add Merge #5008
5008: Display vectors when no custom vectors where ever provided r=irevoire a=dureuill

# Pull Request

## Related issue
Fixes the issue reported on [Discord](https://discord.com/channels/1006923006964154428/1294653031958446080/1295336784896589967).

## What does this PR do?
- Normal behavior of Meilisearch is to hide `_vectors` even when `retrieveVectors: true` when there is an explicit list of displayed attributes that does not contain vectors
- However, this relied on the field id for the `_vectors` field to exist, which wasn't the case when no `_vectors` was manually provided to documents. This would often be the case for people using autoembedders such as the OpenAI integration.
- This PR fixes the behavior by looking for the `_vectors` string in the `displayedAttributes` when there is no `_vectors` fid.
- This PR also adds a test for this specific situation, that would fail before the PR, and pass after the PR


Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2024-10-15 13:08:47 +00:00
152683083b Change document operation to use method in primary key 2024-10-15 14:08:37 +02:00
c283c95f6a Support nested primary keys 2024-10-15 14:08:37 +02:00
9a0e1dc375 Fix the prefix deletion 2024-10-15 11:20:09 +02:00
1e81d72b5f Use the fixed version of the Rhai crate 2024-10-14 18:18:59 +02:00
52b95c4e59 Make sure we edit the task statuses 2024-10-14 16:48:15 +02:00
7e1dc8439b Introduce the new update by function 2024-10-14 16:32:50 +02:00
5a74d4729c Add test failing before this PR, OK now 2024-10-14 16:23:28 +02:00
e44e7b5e81 Fix retrieveVectors when explicitly passed in displayed attributes without any document containing _vectors 2024-10-14 16:17:19 +02:00
a0b3887709 Merge #5006
5006: Bring back changes from v1.10.3 r=Kerollmops a=irevoire

# Pull Request

## Related issue
Port the following PR to the latest version: https://github.com/meilisearch/meilisearch/pull/5000
See its description for more information

Co-authored-by: Tamo <tamo@meilisearch.com>
2024-10-14 14:06:35 +00:00
96658ec775 Make de public 2024-10-14 15:41:58 +02:00
c01ee7b732 external changes 2024-10-14 15:41:58 +02:00
6ad3f57bc1 Changes to de 2024-10-14 15:41:58 +02:00
28d92c521a External docids to &'bump str 2024-10-14 15:41:58 +02:00
7df20d8282 Changes to primary key 2024-10-14 15:41:57 +02:00
b4102741e6 Fix duplicated fields when a document is modified 2024-10-14 14:59:40 +02:00
4b4a6c7863 Update meilisearch/src/option.rs
Co-authored-by: Clément Renault <clement@meilisearch.com>
2024-10-14 14:39:34 +02:00
3085092e04 Update meilisearch/src/option.rs
Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2024-10-14 14:39:34 +02:00
c4efd1df4e Update meilisearch/src/option.rs
Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2024-10-14 14:39:34 +02:00
c32282acb1 improve doc 2024-10-14 14:39:34 +02:00
92070a3578 Implement the experimental drop search after and nb search per core 2024-10-14 14:39:33 +02:00
a525598ad6 Fix facet string indexing 2024-10-14 11:12:10 +02:00
4e97e38177 Serialize docids bitmap one time 2024-10-14 11:12:10 +02:00
d675e73af1 Finish prefix databases 2024-10-14 11:12:10 +02:00
a2fbf2ea21 set updated at at the end of the indexing 2024-10-14 11:05:25 +02:00
132916f62c Only run word pair proximity docids extraction if proximity_precision enables it 2024-10-14 11:05:25 +02:00
8371819114 Some clippy related fixes 2024-10-14 10:58:37 +02:00
a90563df3f Merge #5001
5001: Do not send empty edit document by function r=Kerollmops a=irevoire

# Pull Request

We realized that we had a huge usage of the feature from user who didn’t enable the feature at all. That shouldn’t be possible.
After a big investigation with `@gmourier` 
![image](https://github.com/user-attachments/assets/eae3e851-dc5b-4616-80ee-7237a4871522)
We found the issue, it was in the engine

## What does this PR do?
- Do not send the edit by function event to segment if no event was received during this batch

Co-authored-by: Tamo <tamo@meilisearch.com>
2024-10-11 08:27:16 +00:00
466604725e Do not send empty edit document by function 2024-10-10 23:47:15 +02:00
6028d6ba43 Remove somme warnings 2024-10-10 22:42:37 +02:00
68a2502388 Introduce indexer level bumpalo 2024-10-10 22:23:05 +02:00
995394a516 Merge #4993
4993: Update mini-dashboard r=ManyTheFish a=curquiza

Remove the forced capitalized attribute name

Co-authored-by: curquiza <clementine@meilisearch.com>
2024-10-10 05:57:45 +00:00
6e37ae8619 Update mini-dashboard 2024-10-09 19:13:14 +02:00
657c645603 Merge #4992
4992: fix the bad experimental search queue size r=dureuill a=irevoire

# Pull Request

## Related issue
Fixes #4991 

## What does this PR do?
- Set the right default value for the experimental search queue size in the config file


Co-authored-by: Tamo <tamo@meilisearch.com>
2024-10-09 10:45:48 +00:00
7f5d0837c3 fix the bad experimental search queue size 2024-10-09 11:46:57 +02:00
39b27e42be Plug the deletion pipeline 2024-10-08 16:04:19 +02:00
470c2272dd Show much more stats about the LRU caches 2024-10-08 15:29:24 +02:00
30f3c30389 Merge #4962
4962: test: improve performance of create_index.rs r=irevoire a=DerTimonius

# Pull Request

## Related issue
related to #4840 

## What does this PR do?
This PR follows the instructions in #4840 and improves the performance of `meilisearch/tests/index/create_index.rs`. The tests run locally, if they fail in the CI I'll try to fix them

## PR checklist
Please check if your PR fulfills the following requirements:
- [x] Does this PR fix an existing issue, or have you listed the changes applied in the PR description (and why they are needed)?
- [x] Have you read the contributing guidelines?
- [x] Have you made sure that the title is accurate and descriptive of the changes?

Thank you so much for contributing to Meilisearch!


Co-authored-by: Timon Jurschitsch <timon.jurschitsch@gmail.com>
2024-10-08 13:00:56 +00:00
d907d1b22d Merge #4990
4990: Add image source label to dockerfiles r=curquiza a=wuast94

To get changelogs shown with Renovate a docker container has to add the source label described in the OCI Image Format Specification.

For reference: https://github.com/renovatebot/renovate/blob/main/lib/modules/datasource/docker/readme.md

Co-authored-by: Marc <github@wuast24.de>
Co-authored-by: Clémentine <clementine@meilisearch.com>
2024-10-08 12:19:38 +00:00
ed267fa063 Apply suggestions from code review 2024-10-08 14:14:16 +02:00
6af55b1a80 Update Dockerfile 2024-10-08 11:59:43 +02:00
2230674c0a Merge branch 'fix-append-only-vec' into indexer-edition-2024 2024-10-08 10:32:45 +02:00
5b04189f7a remove flaky assert 2024-10-07 16:50:57 +02:00
eb09dfed04 Avoid reallocation with the ThreadLocal pool 2024-10-07 16:41:17 +02:00
83c09d0db0 Remove the now, useless AppendOnlyVec library 2024-10-07 16:38:45 +02:00
c0912aa685 add missing shared servers 2024-10-07 16:29:47 +02:00
af38f46621 Merge branch 'main' of https://github.com/meilisearch/meilisearch into test/improve-create-index 2024-10-07 16:27:57 +02:00
c11b7e5c0f Reduce number of cache created by using thread_local 2024-10-07 15:58:16 +02:00
03579aba13 Adjust test 2024-10-04 11:38:47 +03:00
c3de3a9ab7 Refactor 2024-10-04 11:30:31 +03:00
386ca86297 Merge #4963
4963: test: improve performance of delete_index.rs r=curquiza a=DerTimonius

# Pull Request

## Related issue
related to #4840

## What does this PR do?
This PR follows the instructions in #4840 and improves the performance of `meilisearch/tests/index/delete_index.rs`. The tests run locally, if they fail in the CI I'll try to fix them

## PR checklist
Please check if your PR fulfills the following requirements:
- [x] Does this PR fix an existing issue, or have you listed the changes applied in the PR description (and why they are needed)?
- [x] Have you read the contributing guidelines?
- [x] Have you made sure that the title is accurate and descriptive of the changes?

Thank you so much for contributing to Meilisearch!


Co-authored-by: Timon Jurschitsch <timon.jurschitsch@gmail.com>
2024-10-03 15:40:07 +00:00
dff2d54784 Merge pull request #4976 from meilisearch/fix-append-only-vec
Fix append only `Vec` by using a `LinkedList`
2024-10-03 17:26:00 +02:00
58d96fbea3 Rename Node parent to next 2024-10-03 16:15:05 +02:00
4665bfcb19 Move the parent assignation before the exchange operation 2024-10-03 16:14:23 +02:00
a7a01646cf Remove the useless Manually drop 2024-10-03 15:57:31 +02:00
0409a26cd8 Replace the concurrent vec by a linked list 2024-10-03 15:15:29 +02:00
8221c94e7f Split into multiple files, refactor 2024-10-03 15:37:51 +03:00
35f78b5423 TO REMOVE: usefull debug prints 2024-10-03 11:13:01 +02:00
14261f8f04 Integrate facet level bulk update
Only the facet bulk update has been added so far, the incremental must be completely rewritten

Factorize facet merging

Fix facet level extraction
2024-10-03 11:13:00 +02:00
774ed28539 Fix Prefix FST when a document is modified 2024-10-03 11:12:26 +02:00
d79f75f630 Compute and Write external-documents-ids database 2024-10-03 11:11:56 +02:00
c427d9e2ad Merge branch 'main' into change-matches-position-phrase-search 2024-10-03 10:42:34 +03:00
40336ce87d Fix and refactor crop_bounds 2024-10-03 10:40:14 +03:00
2a18917af3 add delete_index_fail function 2024-10-02 16:23:21 +02:00
ccf01c2471 Merge pull request #4969 from meilisearch/indexer-edition-2024-try-map
Indexer edition 2024 try map
2024-10-02 11:25:05 +02:00
0566f2549d Merge #4972
4972: Add binary quantized to error messages r=irevoire a=dureuill

was missing in error messages

Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2024-10-02 09:23:55 +00:00
0c2661ea90 Fix tests 2024-10-02 11:20:29 +02:00
62dfbd6255 Add binary quantized to allowed fields for source adds its sources 2024-10-02 11:20:02 +02:00
cc669f90d5 Merge #4971
4971: update arroy r=dureuill a=irevoire

# Pull Request

Fix part of https://github.com/meilisearch/meilisearch/issues/3715


## What does this PR do?
- Update arroy to the latest version, most change are maintenance changes
- The performances of adding vectors to arroy should slightly improve
- Forward the build cancellation function to arroy so it can stop building trees when we have to stop an indexing process


Co-authored-by: Tamo <tamo@meilisearch.com>
2024-10-02 05:53:51 +00:00
37a9d64c44 Fix failing test, refactor 2024-10-01 22:52:01 +03:00
b1dc10e771 uses the new cancellation method in arroy 2024-10-01 17:45:49 +02:00
4b598fa648 update arroy 2024-10-01 17:31:12 +02:00
17571805b4 use shared servers 2024-10-01 17:27:27 +02:00
2654ce6e6c use shared servers 2024-10-01 17:01:47 +02:00
d9e4db9983 Refactor 2024-10-01 17:50:59 +03:00
6d16230f17 Refactor 2024-10-01 17:19:15 +03:00
b7a5ba100e Move the ParallelIteratorExt into the parallel_iterator_ext module 2024-10-01 11:11:52 +02:00
dead7a56a3 Keep the caches in the AppendOnlyVec 2024-10-01 11:11:39 +02:00
0a8cb471df Introduce the AppendOnlyVec struct for the parallel computing 2024-10-01 11:11:25 +02:00
00e045b249 Rename and use the try_arc_for_each_try_init method 2024-10-01 11:11:25 +02:00
d83c9a4074 Introduce the try_for_each_try_init method to be used with Arced Errors 2024-10-01 11:11:25 +02:00
f3356ddaa4 Fix the errors when using the try_map_try_init method 2024-10-01 11:11:10 +02:00
31de5c747e WIP using try_map_try_init 2024-10-01 11:10:53 +02:00
3843240940 Prefer using Ars instead of Options 2024-10-01 11:10:53 +02:00
8cb5e7437d try using try_map_try_init 2024-10-01 11:10:53 +02:00
5b776556fe Add ParallelIteratorExt 2024-10-01 11:10:53 +02:00
bb7a503e5d Compute prefix databases
We are now computing the prefix FST and a prefix delta in the Merger thread,
after all the databases are written, the main thread will recompute the prefix databases based on the prefix delta without needing any grenad temporary file anymore
2024-10-01 09:57:06 +02:00
eabc14c268 Refactor, handle more cases for phrases 2024-09-30 21:24:41 +03:00
e78da35287 Merge #4930
4930: Return `UserError::InvalidDocumentId` for primary keys with a length greater than 512 bytes r=curquiza a=flevi29

# Pull Request

## Related issue
Fixes #4843

## PR checklist
Please check if your PR fulfills the following requirements:
- [x] Does this PR fix an existing issue, or have you listed the changes applied in the PR description (and why they are needed)?
- [x] Have you read the contributing guidelines?
- [x] Have you made sure that the title is accurate and descriptive of the changes?

Thank you so much for contributing to Meilisearch!


Co-authored-by: F. Levi <55688616+flevi29@users.noreply.github.com>
2024-09-30 15:55:05 +00:00
64589278ac Appease *some* of clippy warnings 2024-09-30 16:08:29 +02:00
8df6daf308 Remove fid_wordcount_docids.rs 2024-09-30 11:52:31 +02:00
5b552caf42 Fix position in insertions 2024-09-30 11:46:32 +02:00
2b51a63418 Remove dead code 2024-09-30 11:42:36 +02:00
3d8024fb2b write the weighted fields ids map 2024-09-30 11:35:03 +02:00
4b0da0ff24 Fix inversion of field_id and position 2024-09-30 11:34:50 +02:00
079f2b5de0 Format error messages consistently 2024-09-30 11:34:31 +02:00
84b4219a4f test: improve delete_index.rs 2024-09-29 10:16:31 +02:00
5539a1904a test: improve performance of create_index.rs 2024-09-28 11:05:52 +02:00
00ccf53ffa Merge branch 'main' into change-matches-position-phrase-search 2024-09-27 15:52:05 +03:00
d20a39b959 Refactor find_best_match_interval 2024-09-27 15:44:30 +03:00
71b364286b Merge #4957
4957: Update charabia feature flags r=dureuill a=ManyTheFish

# Pull Request

Add charabia's `turkish` feature flag into Meilisearch default tokenization flag



[All tests pipeline](https://github.com/meilisearch/meilisearch/actions/runs/11030036031)

Co-authored-by: ManyTheFish <many@meilisearch.com>
2024-09-26 20:19:21 +00:00
86183e0807 Merge #4960
4960: Update rhai r=dureuill a=irevoire

# Pull Request

## Related issue
Fixes https://github.com/meilisearch/meilisearch/issues/4956

A fix has been implemented in https://github.com/rhaiscript/rhai/issues/916

## What does this PR do?
- Use the latest version of rhai containing the fix

Co-authored-by: Tamo <tamo@meilisearch.com>
2024-09-26 15:03:01 +00:00
78a4b7949d update rhai to a version that shouldn’t panic 2024-09-26 15:04:03 +02:00
960060ebdf Fix fst builder when their is no previous FST 2024-09-25 16:53:00 +02:00
3d244451df Reduce the lru key size from 8 to 12 bytes 2024-09-25 16:14:13 +02:00
5f53935c8a Fix a bug in the Lru 2024-09-25 16:09:34 +02:00
29a7623c3f Fxi some logs 2024-09-25 15:57:50 +02:00
e97041f7d0 Replace the Lru free list by a simple increment 2024-09-25 15:55:52 +02:00
52d7f3ed1c Reduce the lru key size from 20 to 8 bytes 2024-09-25 15:37:13 +02:00
86d5e6d9ff Use the new Lru 2024-09-25 14:54:56 +02:00
759b9b1546 Introduce a new custom Lru 2024-09-25 14:49:12 +02:00
3f7a500f3b Build prefix fst 2024-09-25 14:36:06 +02:00
dc2cb58cf1 use charabia default for all-tokenization 2024-09-25 11:12:30 +02:00
e9580fe619 Add turkish normalization 2024-09-25 11:03:17 +02:00
8205254f4c Merge #4955
4955: Upgrade "batch failed" log to error level r=irevoire a=dureuill

# Pull Request

## Related issue
Fixes #4916 


Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2024-09-25 08:18:44 +00:00
974272f2e9 Merge branch 'main' into indexer-edition-2024 2024-09-25 07:41:16 +02:00
7ad037841f Move the tracing info to eprintln 2024-09-24 18:21:58 +02:00
e0c7067355 Expose an IndexedParallelIterator to the index function 2024-09-24 17:24:59 +02:00
efdc5739d7 Merge #4953
4953: Move the multi arroy index logic to the arroy wrapper r=irevoire a=irevoire

# Pull Request

## Related issue
Fixes https://github.com/meilisearch/meilisearch/issues/4948

## What does this PR do?
- Make the `ArroyWrapper` we introduced in the last PR handle all the embedded for a specific docid itself.


Co-authored-by: Tamo <tamo@meilisearch.com>
2024-09-24 15:02:24 +00:00
b31e9bea26 while retrieving the readers on an arroywrapper, stops at the first empty reader 2024-09-24 16:33:17 +02:00
6e87332410 Change the way the FST is built 2024-09-24 16:28:31 +02:00
2d1caf27df Use eprintln to log 2024-09-24 15:59:50 +02:00
92678383d6 Update charabia 2024-09-24 15:37:56 +02:00
7f148c127c Measure the SmallVec efficacity 2024-09-24 15:32:15 +02:00
7f048b9732 early exit in the clear and contains 2024-09-24 15:02:38 +02:00
8b4e2c7b17 Remove now unused method 2024-09-24 15:00:25 +02:00
645a55317a merge the build and quantize method 2024-09-24 14:54:24 +02:00
8caf97db86 Merge #4954
4954: Fix bench by adding embedder r=ManyTheFish a=dureuill

Fix benchmark workloads following breaking change on embedders

Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2024-09-24 12:53:34 +00:00
b8a74e0464 fix comments 2024-09-24 10:59:15 +02:00
fd8447c521 fix the del items thing 2024-09-24 10:52:05 +02:00
f2d187ba3e rename the index method to embedder_index 2024-09-24 10:39:40 +02:00
79d8a7a51a rename the embedder index for clarity 2024-09-24 10:36:28 +02:00
86da0e83fe Upgrade "batch failed" log to ERROR level 2024-09-24 10:02:53 +02:00
0704fb71e9 Fix bench by adding embedder 2024-09-24 09:56:47 +02:00
4ce5d3d66d Do not check before pushing in bitmaps 2024-09-24 09:43:16 +02:00
1e4d4e69c4 finish the arroywrapper 2024-09-23 18:56:15 +02:00
ff931edb55 Update roaring to inline max calls 2024-09-23 16:53:42 +02:00
42b093687d Introduce the new PushOptimizedBitmap 2024-09-23 16:38:21 +02:00
835c5f98f9 Remove the debug symbols 2024-09-23 15:49:24 +02:00
6ba4baecbf first ugly step 2024-09-23 15:15:26 +02:00
f00664247d Add more stats about the channel message sent 2024-09-23 15:13:52 +02:00
3c63d4a1e5 Fix charabia Zho 2024-09-23 14:50:17 +02:00
4551abf6d4 Update roaring to the latest version 2024-09-23 14:35:33 +02:00
193d7f5d34 Add the mutualized charabia normalization 2024-09-23 14:24:25 +02:00
013acb3d93 Measure merger writer channel contention 2024-09-23 11:07:59 +02:00
7f20c13f3f Merge #4943
4943: Correct broken links in README r=curquiza a=iornstein

# Pull Request

## Related issue
Fixes #4942

## What does this PR do?
- Corrects some broken links in the README. My suspicion is that some of these documentation articles were moved around without someone updating links in the README.

## PR checklist
Please check if your PR fulfills the following requirements:
- [x] Does this PR fix an existing issue, or have you listed the changes applied in the PR description (and why they are needed)? _(well the contributing guidelines led me to create an issue first)_
- [x] Have you read the contributing guidelines? _yes_
- [x] Have you made sure that the title is accurate and descriptive of the changes? _yes_

Thank you so much for contributing to Meilisearch!


Co-authored-by: Ian Ornstein <ian.ornstein@gmail.com>
2024-09-19 19:22:04 +00:00
462a2329f1 Merge #4941
4941: Implement the binary quantization in meilisearch r=irevoire a=irevoire

# Pull Request

## Related issue
Fixes https://github.com/meilisearch/meilisearch/issues/4873

## What does this PR do?
- Add a settings for the binary quantization
- Once enabled, the bq cannot be disabled

TODO:
- [ ] Missing a bunch of tests

Co-authored-by: Tamo <tamo@meilisearch.com>
2024-09-19 15:50:24 +00:00
afa3ae0cbd WIP 2024-09-19 17:42:52 +02:00
f6483cf15d apply review comment 2024-09-19 16:47:06 +02:00
bd34ed01d9 Merge #4945
4945: Add swedish in default pipelines r=dureuill a=ManyTheFish

# Summary
## Fix Swedish support

In Swedish the characters `å`/`ä`/`ö` are completely different than `a` or `o`  and should not be normalized as the same character.
because the Swedish specialized pipeline was not activated by default, these characters were normalized even with the settings:
```json
{
  "localizedAttributes": [ { "locales": ["swe"], "attributePatterns": ["*"] } ]
}
```

## Update Charabia adding German support

German segmentation will now be activated using the setting:
```json
{
  "localizedAttributes": [ { "locales": ["deu"], "attributePatterns": ["*"] } ]
}
```

# TODO

- [x] Activate Swedish Pipeline
- [x] Add a test to avoid future regressions
- [x] Update Charabia


Co-authored-by: ManyTheFish <many@meilisearch.com>
2024-09-19 14:42:03 +00:00
74199f328d Make clippy happy 2024-09-19 16:27:34 +02:00
1113c42de0 fix broken comments 2024-09-19 16:18:36 +02:00
465afe01b2 Add test for German 2024-09-19 16:09:01 +02:00
7d6768e4c4 Add german tokenization pipeline 2024-09-19 16:09:01 +02:00
f77661ec44 Update Charabia v0.9.1 2024-09-19 16:08:59 +02:00
b8fd85a46d Get rids of useless collect before an iteration on the readers 2024-09-19 15:57:38 +02:00
fd43c6c404 Improve the error message explaining you can't un-bq an embedder 2024-09-19 15:51:29 +02:00
2564ec1496 Update milli/src/index.rs
Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2024-09-19 15:41:44 +02:00
b6b73fe41c Update milli/src/update/settings.rs
Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2024-09-19 15:41:14 +02:00
6dde41cc46 stop using a local version of arroy and instead point to the git repo with the rev 2024-09-19 15:25:38 +02:00
163f8023a1 remove debug println 2024-09-19 12:13:25 +02:00
2b120b89e4 update the test now that the embedder must be specified 2024-09-19 12:08:59 +02:00
84f842233d snapshots the embedder settings in the dump import with vector test 2024-09-19 12:00:58 +02:00
633537ccd7 fix updating documents without updating the settings 2024-09-19 12:00:58 +02:00
e8d7c00d30 add a test on the settings value 2024-09-19 12:00:58 +02:00
3f6301dbc9 fix the missing embedder name in the error message when trying to disable the binary quantization 2024-09-19 12:00:58 +02:00
ca71b63ed1 adds integration tests 2024-09-19 12:00:58 +02:00
2b6952eda1 rename the ArroyReader to an ArroyWrapper since it can read and write 2024-09-19 12:00:58 +02:00
79f29eed3c fix the tests and the arroy_readers method 2024-09-19 12:00:58 +02:00
cc45e264ca implement the binary quantization in meilisearch 2024-09-19 12:00:56 +02:00
5f474a640d Merge #4938
4938: Remove default embedder r=ManyTheFish a=dureuill

# Pull Request

## Related issue
Fixes #4738 

## What does this PR do?

[See public usage](https://meilisearch.notion.site/v1-11-AI-search-changes-0e37727193884a70999f254fa953ce6e#1044b06b651f80edb9d4ef6dc367bad0)

- Remove `hybrid.embedder` boolean from analytics because embedder is now mandatory and so the boolean would always be `true`
- Rework search kind so that a search without query but with vector is a vector search regardless of (non-zero) semantic ratio


Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2024-09-19 09:17:14 +00:00
bbaee3dbc6 Add Swedish pipeline in all-tokenization feature 2024-09-19 08:34:51 +02:00
877717cb26 Add a test using Swedish documents 2024-09-19 08:34:04 +02:00
0ffeea5a52 Remove wrong comments 2024-09-19 09:06:40 +03:00
716817122a Correct broken links in README 2024-09-18 16:30:29 -05:00
ff523a2357 Merge #4939
4939: Introduce the `STARTS WITH` filter operator r=irevoire a=Kerollmops

This PR fixes #4872 by introducing the `STARTS WITH` filter operator and gating it under the _contains filter_ experimental feature along with the `CONTAINS` one. I also updated [the experimental feature discussion page](https://github.com/orgs/meilisearch/discussions/763).

Co-authored-by: Clément Renault <clement@meilisearch.com>
2024-09-18 10:19:48 +00:00
29c3aca72a Merge #4929
4929: Add facets support to federated r=Kerollmops a=dureuill

# Pull Request

## Related issue 

- Fixes #4932 (sprint issue)
- Fixes  #4913 (user-opened issue)

## What does this PR do?

See [public usage](https://meilisearch.notion.site/v1-11-Federated-search-59b30e03383c40729d7541a3dffb0069)

> [!CAUTION]
> This PR introduces a 🚨**breaking change**🚨: `queries.facets` when `federation` is present and non-`null` is now **an error**

### Implementation standpoint:

- Facet distribution: fix issue where truncated facet distribution would have a wrong order
- facet distribution: implement Display for OrderBy


Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2024-09-18 09:47:20 +00:00
00f8d03f43 Use f32::min and f32::max 2024-09-18 11:46:10 +02:00
50981ea778 Update the error messages 2024-09-18 11:44:29 +02:00
c2caff1716 Remove obsolete enum 2024-09-18 11:26:43 +02:00
30aa1f6dea Merge with main 2024-09-18 11:03:33 +03:00
83113998f9 Add more test assertions 2024-09-18 10:35:23 +03:00
4c355bede7 Merge #4937
4937: Support iso 639 1 r=ManyTheFish a=ManyTheFish

# Pull Request

## Related issue
Fixes #4827

## What does this PR do?
- Add iso-639-1 variants to the Locales enum
- Convert iso-639-1 into iso-639-3


Co-authored-by: ManyTheFish <many@meilisearch.com>
2024-09-18 05:29:32 +00:00
174d69ff72 Don't override max value in indexes 2024-09-17 18:16:14 +02:00
52a52f97cf Update tests 2024-09-17 17:49:12 +02:00
5de4b48552 Fixup error messages 2024-09-17 17:49:00 +02:00
df648ce7a6 Update tests 2024-09-17 17:40:14 +02:00
af8edab21d Remove mention of sort order and recommend changing index settings on inconsistent order error 2024-09-17 17:39:51 +02:00
c42746c4cd Update tests 2024-09-17 17:22:14 +02:00
98b77aec66 Remove runtime sortFacetValuesBy 2024-09-17 17:22:03 +02:00
54d3ba3357 Fix tests that check error message content 2024-09-17 17:14:39 +02:00
6e058709f2 Rustfmt 2024-09-17 17:02:06 +02:00
0fbf9ea5b1 Factorize using macro 2024-09-17 17:00:03 +02:00
9f1fb4b425 Introduce the STARTS WITH filter operator gated under an experimental feature 2024-09-17 16:44:11 +02:00
f7337affd6 Adjust tests to changes 2024-09-17 17:31:09 +03:00
1120a5296c Update tests 2024-09-17 16:30:43 +02:00
a35a339c3d Touchup error message 2024-09-17 16:30:43 +02:00
cac5836f6f Remove hybrid.embedder boolean from analytics because embedder is now mandatory 2024-09-17 16:30:43 +02:00
5239ae0297 Rework search kind so that a search without query but with vector is a vector search regardless of semantic ratio 2024-09-17 16:30:43 +02:00
2fdb1d8018 SearchQueryGet can fail 2024-09-17 16:30:43 +02:00
3c5e363554 Remove default embedders 2024-09-17 16:30:43 +02:00
da0dd6febf Make embedder mandatory 2024-09-17 16:30:43 +02:00
a197d63ab6 simplify tests 2024-09-17 15:30:12 +02:00
390eadb733 Support iso-639-1 2024-09-17 15:01:01 +02:00
93f0317b94 Merge #4936
4936: Update version for the next release (v1.11.0) in Cargo.toml r=curquiza a=meili-bot

⚠️ This PR is automatically generated. Check the new version is the expected one and Cargo.lock has been updated before merging.

Co-authored-by: curquiza <curquiza@users.noreply.github.com>
2024-09-17 11:47:08 +00:00
29ff02f3ff Update version for the next release (v1.11.0) in Cargo.toml 2024-09-17 11:45:48 +00:00
d9e0df74ea update test 2024-09-17 10:39:48 +02:00
dc8a662209 federated queries: adjust error message 2024-09-17 10:39:48 +02:00
6732dd95d7 Update tests 2024-09-17 10:39:48 +02:00
95da428dc8 Use route in federated 2024-09-17 10:39:48 +02:00
38c4be1c8e compute_facets accepts Route argument to fixup error code 2024-09-17 10:39:48 +02:00
91dfab317f New error 2024-09-17 10:39:48 +02:00
47e3c4b5c3 Add new tests 2024-09-17 10:39:48 +02:00
533f1d4345 Federated search: support facets 2024-09-17 10:39:48 +02:00
7b55462610 BREAKING CHANGE: errors if queries.facets in federated search 2024-09-17 10:39:48 +02:00
f6114a1ff2 Introduce ComputedFacets and compute_facet_distribution_stats 2024-09-17 10:39:48 +02:00
7c084b1286 SearchQueriesWithIndex changes 2024-09-17 10:39:47 +02:00
57f9517a98 Required changes to IndexUid 2024-09-17 10:39:47 +02:00
72cc573e0a Add new error types 2024-09-17 10:39:47 +02:00
a48b1d5a79 Update existing tests following error message changes 2024-09-17 10:39:47 +02:00
a94a87ee54 Slightly changes existing error messages 2024-09-17 10:39:47 +02:00
e098cc8320 Make comparison simpler, add IndexUid error details similarly 2024-09-17 00:16:15 +03:00
ec815fa368 Format 2024-09-16 23:59:48 +03:00
4a922a176f Add test for > 512 byte ID 2024-09-16 23:53:34 +03:00
51bc7b3173 Update tests 2024-09-16 22:22:24 +03:00
f4ab1f168e Prefer using Rc<str> than String when cloning a lot 2024-09-16 15:41:29 +02:00
4b55ba68bc Merge #4911
4911: Bump quinn-proto from 0.11.3 to 0.11.8 r=Kerollmops a=dependabot[bot]

Bumps [quinn-proto](https://github.com/quinn-rs/quinn) from 0.11.3 to 0.11.8.
<details>
<summary>Release notes</summary>
<p><em>Sourced from <a href="https://github.com/quinn-rs/quinn/releases">quinn-proto's releases</a>.</em></p>
<blockquote>
<h2>quinn-proto 0.11.5</h2>
<h2>What's Changed</h2>
<ul>
<li>No workspace lints by <a href="https://github.com/Ralith"><code>`@​Ralith</code></a>` in <a href="https://redirect.github.com/quinn-rs/quinn/pull/1955">quinn-rs/quinn#1955</a></li>
</ul>
<h2>quinn-proto 0.11.4</h2>
<h2>What's Changed</h2>
<ul>
<li>Fix panic in example due to unset default crypto provider by <a href="https://github.com/Ralith"><code>`@​Ralith</code></a>` in <a href="https://redirect.github.com/quinn-rs/quinn/pull/1882">quinn-rs/quinn#1882</a></li>
<li>Fix zero-length connection IDs by <a href="https://github.com/Ralith"><code>`@​Ralith</code></a>` in <a href="https://redirect.github.com/quinn-rs/quinn/pull/1883">quinn-rs/quinn#1883</a></li>
<li>Add support for NetBSD, fix OpenBSD by <a href="https://github.com/flub"><code>`@​flub</code></a>` in <a href="https://redirect.github.com/quinn-rs/quinn/pull/1884">quinn-rs/quinn#1884</a></li>
<li>docs(udp): replace AsRawFd and AsRawSocket with AsFd and AsSocket by <a href="https://github.com/mxinden"><code>`@​mxinden</code></a>` in <a href="https://redirect.github.com/quinn-rs/quinn/pull/1890">quinn-rs/quinn#1890</a></li>
<li>Resolve stopped/received_reset futures on lost connections by <a href="https://github.com/Ralith"><code>`@​Ralith</code></a>` in <a href="https://redirect.github.com/quinn-rs/quinn/pull/1886">quinn-rs/quinn#1886</a></li>
<li>Bump version numbers (quinn 0.11.2, -proto 0.11.3) by <a href="https://github.com/djc"><code>`@​djc</code></a>` in <a href="https://redirect.github.com/quinn-rs/quinn/pull/1891">quinn-rs/quinn#1891</a></li>
<li>udp: bump version to 0.5.2 by <a href="https://github.com/djc"><code>`@​djc</code></a>` in <a href="https://redirect.github.com/quinn-rs/quinn/pull/1892">quinn-rs/quinn#1892</a></li>
<li>docs(quinn): Clarify effects of setting AckFrequencyConfig by <a href="https://github.com/gretchenfrage"><code>`@​gretchenfrage</code></a>` in <a href="https://redirect.github.com/quinn-rs/quinn/pull/1894">quinn-rs/quinn#1894</a></li>
<li>Apply clippy suggestions from Rust 1.79 by <a href="https://github.com/djc"><code>`@​djc</code></a>` in <a href="https://redirect.github.com/quinn-rs/quinn/pull/1895">quinn-rs/quinn#1895</a></li>
<li>Only send MAX_STREAMS when &gt;1/8 of flow control window is consumed  by <a href="https://github.com/Ralith"><code>`@​Ralith</code></a>` in <a href="https://redirect.github.com/quinn-rs/quinn/pull/1898">quinn-rs/quinn#1898</a></li>
<li>fix: remove unused dependency tracing-attributes by <a href="https://github.com/mxinden"><code>`@​mxinden</code></a>` in <a href="https://redirect.github.com/quinn-rs/quinn/pull/1903">quinn-rs/quinn#1903</a></li>
<li>proto: make initial destination cid configurable  by <a href="https://github.com/thynson"><code>`@​thynson</code></a>` in <a href="https://redirect.github.com/quinn-rs/quinn/pull/1897">quinn-rs/quinn#1897</a></li>
<li>Allow configuring rng seed through <code>EndpointConfig</code> by <a href="https://github.com/aochagavia"><code>`@​aochagavia</code></a>` in <a href="https://redirect.github.com/quinn-rs/quinn/pull/1901">quinn-rs/quinn#1901</a></li>
<li>quinn: introduce waking helpers by <a href="https://github.com/djc"><code>`@​djc</code></a>` in <a href="https://redirect.github.com/quinn-rs/quinn/pull/1908">quinn-rs/quinn#1908</a></li>
<li>Wake blocked streams on 0-RTT rejection by <a href="https://github.com/Ralith"><code>`@​Ralith</code></a>` in <a href="https://redirect.github.com/quinn-rs/quinn/pull/1905">quinn-rs/quinn#1905</a></li>
<li>Upgrade to rustc-hash 2 by <a href="https://github.com/djc"><code>`@​djc</code></a>` in <a href="https://redirect.github.com/quinn-rs/quinn/pull/1909">quinn-rs/quinn#1909</a></li>
<li>Fix unnecessary Incoming warning on Endpoint drop by <a href="https://github.com/gretchenfrage"><code>`@​gretchenfrage</code></a>` in <a href="https://redirect.github.com/quinn-rs/quinn/pull/1907">quinn-rs/quinn#1907</a></li>
<li>Revise and add additional 0-rtt doc comments by <a href="https://github.com/gretchenfrage"><code>`@​gretchenfrage</code></a>` in <a href="https://redirect.github.com/quinn-rs/quinn/pull/1826">quinn-rs/quinn#1826</a></li>
<li>docs: remove reference to sendmmsg by <a href="https://github.com/mxinden"><code>`@​mxinden</code></a>` in <a href="https://redirect.github.com/quinn-rs/quinn/pull/1914">quinn-rs/quinn#1914</a></li>
<li>Fix debug assert with reordered ACKs by <a href="https://github.com/Ralith"><code>`@​Ralith</code></a>` in <a href="https://redirect.github.com/quinn-rs/quinn/pull/1893">quinn-rs/quinn#1893</a></li>
<li>quinn: Make <code>Endpoint::client</code> dual-stack V6 by default by <a href="https://github.com/gretchenfrage"><code>`@​gretchenfrage</code></a>` in <a href="https://redirect.github.com/quinn-rs/quinn/pull/1913">quinn-rs/quinn#1913</a></li>
<li>bench(udp): measure non-GSO &amp; GSO on localhost by <a href="https://github.com/mxinden"><code>`@​mxinden</code></a>` in <a href="https://redirect.github.com/quinn-rs/quinn/pull/1915">quinn-rs/quinn#1915</a></li>
<li>proto: avoid overflow in handshake done statistic by <a href="https://github.com/djc"><code>`@​djc</code></a>` in <a href="https://redirect.github.com/quinn-rs/quinn/pull/1918">quinn-rs/quinn#1918</a></li>
<li>Use workspace dependencies for all external dependencies by <a href="https://github.com/djc"><code>`@​djc</code></a>` in <a href="https://redirect.github.com/quinn-rs/quinn/pull/1919">quinn-rs/quinn#1919</a></li>
<li>Fix lack of reexport of ConnectionStats and ResetError by <a href="https://github.com/TirushOne"><code>`@​TirushOne</code></a>` in <a href="https://redirect.github.com/quinn-rs/quinn/pull/1920">quinn-rs/quinn#1920</a></li>
<li>[non-breaking] deps(udp): make tracing optional and add optional log by <a href="https://github.com/mxinden"><code>`@​mxinden</code></a>` in <a href="https://redirect.github.com/quinn-rs/quinn/pull/1923">quinn-rs/quinn#1923</a></li>
<li>fix(udp): feature flag tracing in windows.rs by <a href="https://github.com/mxinden"><code>`@​mxinden</code></a>` in <a href="https://redirect.github.com/quinn-rs/quinn/pull/1932">quinn-rs/quinn#1932</a></li>
<li>Bump MSRV to 1.70 following tokio 1.39 by <a href="https://github.com/djc"><code>`@​djc</code></a>` in <a href="https://redirect.github.com/quinn-rs/quinn/pull/1939">quinn-rs/quinn#1939</a></li>
<li>Raise default idle timeout to 30 seconds by <a href="https://github.com/Ralith"><code>`@​Ralith</code></a>` in <a href="https://redirect.github.com/quinn-rs/quinn/pull/1938">quinn-rs/quinn#1938</a></li>
<li>Discard pre-handshake packets after the handshake by <a href="https://github.com/Ralith"><code>`@​Ralith</code></a>` in <a href="https://redirect.github.com/quinn-rs/quinn/pull/1937">quinn-rs/quinn#1937</a></li>
<li>Apply suggestions from Clippy 1.80 by <a href="https://github.com/djc"><code>`@​djc</code></a>` in <a href="https://redirect.github.com/quinn-rs/quinn/pull/1941">quinn-rs/quinn#1941</a></li>
<li>chore(quinn): feature flag socket2 imports by <a href="https://github.com/mxinden"><code>`@​mxinden</code></a>` in <a href="https://redirect.github.com/quinn-rs/quinn/pull/1933">quinn-rs/quinn#1933</a></li>
<li>refactor: move rust-version to workspace Cargo.toml by <a href="https://github.com/mxinden"><code>`@​mxinden</code></a>` in <a href="https://redirect.github.com/quinn-rs/quinn/pull/1940">quinn-rs/quinn#1940</a></li>
<li>chore: move common package data to workspace Cargo.toml by <a href="https://github.com/mxinden"><code>`@​mxinden</code></a>` in <a href="https://redirect.github.com/quinn-rs/quinn/pull/1943">quinn-rs/quinn#1943</a></li>
<li>Endpoint stats interface by <a href="https://github.com/ryleung-solana"><code>`@​ryleung-solana</code></a>` in <a href="https://redirect.github.com/quinn-rs/quinn/pull/1900">quinn-rs/quinn#1900</a></li>
<li>Expose the Handshake Confirmed state by <a href="https://github.com/Ralith"><code>`@​Ralith</code></a>` in <a href="https://redirect.github.com/quinn-rs/quinn/pull/1944">quinn-rs/quinn#1944</a></li>
<li>Exclude metrics with freestanding getters from EndpointStats by <a href="https://github.com/Ralith"><code>`@​Ralith</code></a>` in <a href="https://redirect.github.com/quinn-rs/quinn/pull/1945">quinn-rs/quinn#1945</a></li>
<li>Fix incorrect initial DCID indexing on retried connections by <a href="https://github.com/Ralith"><code>`@​Ralith</code></a>` in <a href="https://redirect.github.com/quinn-rs/quinn/pull/1946">quinn-rs/quinn#1946</a></li>
<li>Add expect message to unwrap in PacketBuilder by <a href="https://github.com/casey"><code>`@​casey</code></a>` in <a href="https://redirect.github.com/quinn-rs/quinn/pull/1951">quinn-rs/quinn#1951</a></li>
<li>Revert &quot;proto: yield transport error for Initial packets with no CRYPTO&quot; by <a href="https://github.com/Ralith"><code>`@​Ralith</code></a>` in <a href="https://redirect.github.com/quinn-rs/quinn/pull/1952">quinn-rs/quinn#1952</a></li>
<li>refactor(udp): introduce log facade by <a href="https://github.com/mxinden"><code>`@​mxinden</code></a>` in <a href="https://redirect.github.com/quinn-rs/quinn/pull/1935">quinn-rs/quinn#1935</a></li>
<li>Update cargo-deny-action to v2 by <a href="https://github.com/djc"><code>`@​djc</code></a>` in <a href="https://redirect.github.com/quinn-rs/quinn/pull/1953">quinn-rs/quinn#1953</a></li>
</ul>
<!-- raw HTML omitted -->
</blockquote>
<p>... (truncated)</p>
</details>
<details>
<summary>Commits</summary>
<ul>
<li><a href="7c09b02073"><code>7c09b02</code></a> proto: bump version to 0.11.8 for release (<a href="https://redirect.github.com/quinn-rs/quinn/issues/1981">#1981</a>)</li>
<li><a href="59bccd2e7e"><code>59bccd2</code></a> Version bump <code>quinn</code> to enforce patched <code>quinn-proto</code></li>
<li><a href="a8ec510fd1"><code>a8ec510</code></a> proto: avoid panicking on rustls server config errors</li>
<li><a href="c26e8cd2f7"><code>c26e8cd</code></a> Bump versions</li>
<li><a href="e01609ccd8"><code>e01609c</code></a> Merge commit from fork</li>
<li><a href="c292a3c6a6"><code>c292a3c</code></a> Fix and test validation of IDCID length</li>
<li><a href="bb02a12a84"><code>bb02a12</code></a> fix(.github/android): use API level 26</li>
<li><a href="5e5cc93645"><code>5e5cc93</code></a> fix(.github/android): pass matrix.target and increase api to v26</li>
<li><a href="cef42cccef"><code>cef42cc</code></a> fix(udp): typo in sendmsg error log</li>
<li><a href="edf16a6f10"><code>edf16a6</code></a> ci(rust.yml): add workflow testing feature permutations</li>
<li>Additional commits viewable in <a href="https://github.com/quinn-rs/quinn/compare/quinn-proto-0.11.3...quinn-proto-0.11.8">compare view</a></li>
</ul>
</details>
<br />


[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=quinn-proto&package-manager=cargo&previous-version=0.11.3&new-version=0.11.8)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores)

Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting ``@dependabot` rebase`.

[//]: # (dependabot-automerge-start)
[//]: # (dependabot-automerge-end)

---

<details>
<summary>Dependabot commands and options</summary>
<br />

You can trigger Dependabot actions by commenting on this PR:
- ``@dependabot` rebase` will rebase this PR
- ``@dependabot` recreate` will recreate this PR, overwriting any edits that have been made to it
- ``@dependabot` merge` will merge this PR after your CI passes on it
- ``@dependabot` squash and merge` will squash and merge this PR after your CI passes on it
- ``@dependabot` cancel merge` will cancel a previously requested merge and block automerging
- ``@dependabot` reopen` will reopen this PR if it is closed
- ``@dependabot` close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually
- ``@dependabot` show <dependency name> ignore conditions` will show all of the ignore conditions of the specified dependency
- ``@dependabot` ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself)
- ``@dependabot` ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself)
- ``@dependabot` ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)
You can disable automated security fix PRs for this repo from the [Security Alerts page](https://github.com/meilisearch/meilisearch/network/alerts).

</details>

Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2024-09-16 13:32:32 +00:00
1a0e962299 Replace hashmap by vectors in wpp 2024-09-16 15:01:20 +02:00
f13e076b8a Use hashmap instead of Btree in wpp extractor 2024-09-16 14:40:40 +02:00
7ba49b849e Extract and write facet databases 2024-09-16 09:35:16 +02:00
993408d3ba Change closure to fn 2024-09-15 16:15:09 +03:00
dcb61f8b3a Return error for primary keys with a length greater than 512 bytes 2024-09-14 11:34:13 +03:00
51085206cc Misc adjustments 2024-09-14 10:14:07 +03:00
a2a16bf846 Move MatchPosition impl to Match, adjust counting score for phrases 2024-09-13 21:20:06 +03:00
cab63abc84 Improve MatchesPosition enum with an impl 2024-09-13 14:35:28 +03:00
65e3d61a95 Make use of helper function in one more place 2024-09-13 13:35:58 +03:00
cc6a2aec06 Improve changes to Matcher 2024-09-13 13:31:07 +03:00
f7652186e1 WIP geo fields 2024-09-12 18:01:02 +02:00
23e14138bb facet distribution: implement Display for OrderBy 2024-09-12 17:43:50 +02:00
e44325683a Facet distribution: fix issue where truncated facet distribution would have a wrong order 2024-09-12 17:43:49 +02:00
e7af499314 Improve changes to Matcher 2024-09-12 16:58:13 +03:00
b2f4e67c9a Do not store useless updates 2024-09-12 15:38:31 +02:00
ff5d3b59f5 Move the document id extraction to the primary key code 2024-09-12 12:01:42 +02:00
aa69308e45 Use a bufWriter to build word FSTs 2024-09-12 11:48:00 +02:00
eb9a20ff0b Fix fid_word_docids extraction 2024-09-12 11:08:18 +02:00
edcb4c60ba Change Matcher so that phrases are counted as one instead of word by word 2024-09-12 09:46:08 +03:00
0d868f36d7 Make sure we always use a BufWriter to write the update files 2024-09-11 18:38:04 +02:00
e7d9db078f Use the right key name when convertir from CSV to NDJSON 2024-09-11 18:27:00 +02:00
3e9198ebaa Support guessing primary key again 2024-09-11 17:25:40 +02:00
2a0ad0982f Fix the document counter 2024-09-11 15:59:36 +02:00
2b317c681b Build mergers in parallel 2024-09-11 11:49:26 +02:00
39b5990f64 Mutualize tokenization 2024-09-11 10:22:38 +02:00
3848adf5a2 Improve error management and simplify JSON read 2024-09-11 10:10:51 +02:00
b4de06259e Better CSV support 2024-09-11 10:02:00 +02:00
02c2b660f8 Merge #4920
4920: Change OpenAI default model r=dureuill a=dureuill

# Pull Request

## Related issue
Fixes #4856

See also [public usage](https://meilisearch.notion.site/v1-11-AI-search-changes-0e37727193884a70999f254fa953ce6e#b4685a48c4784262a149ec307ec58671)

## What does this PR do?
- make the `text-embedding-3-small` the default model for OpenAI instead of `text-embedding-ada-002`. Existing embedders are not impacted


Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2024-09-11 07:08:39 +00:00
8287c2644f Support CSV again 2024-09-10 21:10:28 +01:00
c1c44a0b81 Impl serialize on TopLevelMap 2024-09-10 19:32:03 +01:00
04596f3616 Move the TopLevelMap into a dedicated module 2024-09-10 18:01:17 +01:00
24cb5839ad Move the document changes sorting logic to a new trait 2024-09-10 17:37:52 +01:00
8d97b7b28c Support JSON payloads again (not perfectly though) 2024-09-10 17:09:49 +01:00
f69688e8f7 Fix several warnings in extractors and remove unreachable macros 2024-09-09 14:52:50 +02:00
f18e9cb7b3 Change openai default model 2024-09-09 13:09:35 +02:00
8fd0afaaaa Make sure we iterate over the payload documents in order 2024-09-06 08:09:08 +02:00
72c6a21a30 Use raw JSON to read the payloads 2024-09-05 20:08:23 +02:00
8412be4a7d Cleanup CowStr and TopLevelMap struct 2024-09-05 18:32:55 +02:00
10f09c531f add some commented code to read from json with raw values 2024-09-05 18:22:16 +02:00
8fd99b111b Add tracing timers logs 2024-09-05 18:00:22 +02:00
f6b3d1f9a5 Increase some channel sizes 2024-09-05 15:12:07 +02:00
db0cf3b2ed Merge #4912
4912: Allow Meilitool to dumplessly, offline upgrade v1.9 -> v1.10 in some conditions r=Kerollmops a=dureuill

- bail early if the DB contains at least 1 REST embedder, providing the list of detected REST embedders, and without modifying the DB
- Might depend on the feature set that meilitool was compiled with and the featureset that the Meilisearch that created the DB was compiled with 💀. In case of runtime error, try again with a different feature set (passing or not passing `-p meilitool` when building after a `cargo clean`)

Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2024-09-05 09:11:23 +00:00
73ce67862d Use the word pair proximity and fid word count docids extractors
Co-authored-by: ManyTheFish <many@meilisearch.com>
2024-09-05 10:56:22 +02:00
f6abf01d2c Check REST embedders before touching the DB 2024-09-05 10:49:59 +02:00
0fc02f7351 Move the facet extraction to dedicated modules 2024-09-05 10:32:27 +02:00
34f11e3380 Implement word count and word pair proximity extractors 2024-09-05 10:30:39 +02:00
28da759f11 meilitool: Support dumpless upgrade from v1.9 to v1.10 when there are no REST embedders 2024-09-05 10:08:38 +02:00
ea96d19525 Change versioning in meili 2024-09-05 10:08:06 +02:00
d352b1ee83 Add serde to meilitool 2024-09-05 10:07:33 +02:00
27308eaab1 Import the facet extractors 2024-09-04 17:58:15 +02:00
b33ec9ba3f Introduce the FieldIdFacetIsNullDocidsExtractor 2024-09-04 17:50:08 +02:00
9c0a1cd9fd Introduce the FieldIdFacetExistsDocidsExtractor 2024-09-04 17:48:49 +02:00
0b061f1e70 Introduce the FieldIdFacetIsEmptyDocidsExtractor 2024-09-04 17:40:24 +02:00
19d937ab21 Introduce the facet extractors 2024-09-04 17:03:54 +02:00
1d59c19cd2 Send the WordsFst by using an Mmap 2024-09-04 14:30:09 +02:00
98e48371c3 Factorize some stuff 2024-09-04 12:17:13 +02:00
6d74fb0229 Introduce the WordFidWordDocids database 2024-09-04 11:40:55 +02:00
1eb75a1040 remove milli/src/update/new/extract/tokenize_document.rs 2024-09-04 11:40:26 +02:00
3b82d8b5b9 Fix the cache to serialize entries correctly 2024-09-04 10:55:36 +02:00
781a186f75 remove milli/src/update/new/extract/extract_word_docids.rs 2024-09-04 10:28:31 +02:00
6a399556b5 Implement more searchable extractor 2024-09-04 10:20:18 +02:00
27b4cab857 Extract and write the documents and words fst in the database 2024-09-04 09:59:19 +02:00
3f3cebf5f9 Bump quinn-proto from 0.11.3 to 0.11.8
Bumps [quinn-proto](https://github.com/quinn-rs/quinn) from 0.11.3 to 0.11.8.
- [Release notes](https://github.com/quinn-rs/quinn/releases)
- [Commits](https://github.com/quinn-rs/quinn/compare/quinn-proto-0.11.3...quinn-proto-0.11.8)

---
updated-dependencies:
- dependency-name: quinn-proto
  dependency-type: indirect
...

Signed-off-by: dependabot[bot] <support@github.com>
2024-09-03 20:50:30 +00:00
b278815617 Merge #4908
4908: Bring back changes from release v1.10.1 to main r=dureuill a=irevoire

# Pull Request

Following the [latest release](https://github.com/meilisearch/meilisearch/releases/tag/v1.10.1), this PR brings back the changes to main.

Co-authored-by: Tamo <tamo@meilisearch.com>
Co-authored-by: irevoire <irevoire@users.noreply.github.com>
2024-09-03 14:28:12 +00:00
52d32b4ee9 Move the channel sender in the closure to stop the merger thread 2024-09-03 16:08:33 +02:00
da61408e52 Remove unimplemented from document changes 2024-09-03 15:14:16 +02:00
fe69385bd7 Fix tokenizer test 2024-09-03 14:24:37 +02:00
40e13ceef3 Merge #4892
4892:  Add a documentTemplateMaxBytes parameter to limit the max length of document templates r=ManyTheFish a=dureuill

# Pull Request

## Related issue
Fixes #4885 

See [public usage](https://meilisearch.notion.site/v1-11-AI-search-changes-0e37727193884a70999f254fa953ce6e#a3d63628129e40adba943ae7b8ec06c2)



Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2024-09-03 11:50:07 +00:00
18a2c13e4e add analytics 2024-09-03 12:07:59 +02:00
ed19b7c3c3 Only reindex if the size increased 2024-09-03 12:07:59 +02:00
66bda2ce8a fix tests 2024-09-03 12:07:58 +02:00
1ac008926b Add maxBytes parameter 2024-09-03 12:07:15 +02:00
c49d892c82 Changes to prompt 2024-09-03 12:07:10 +02:00
de962a26f3 New error type when maxBytes is null 2024-09-03 12:01:04 +02:00
c1557734dc Use the GlobalFieldsIdsMap everywhere and write it to disk
Co-authored-by: Dureuill <louis@meilisearch.com>
Co-authored-by: ManyTheFish <many@meilisearch.com>
2024-09-03 12:01:01 +02:00
005204e9e5 make the code of init_web_app in common between most tests 2024-09-03 11:40:05 +02:00
1040e5e2b4 spawn on search queue per test 2024-09-03 11:20:25 +02:00
c50d3edc4a Integrate first searchable exctrator 2024-09-03 11:02:39 +02:00
80408c92dc Merge #4906
4906: Add searchable fields to template r=dureuill a=dureuill

# Pull Request

## Related issue
Fixes #4886 

See [public usage](https://meilisearch.notion.site/v1-11-AI-search-changes-0e37727193884a70999f254fa953ce6e#1dd6f0eee5a1422888e1c5d48e107cd1)

## What does this PR do?
- `Prompt::render` now requires and uses metadata to indicate if the fields are searchable or not
- Changes default template
- Updated tests
- Correctly reindex vectors when the list of searchable fields changes in a settings update.


Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2024-09-03 07:14:58 +00:00
5369bf4a62 Change some lifetimes 2024-09-02 19:51:22 +02:00
bcb1aa3d22 Find a temporary solution to par into iter on an HashMap
Spoiler: Do not use an HashMap but drain it into a Vec
2024-09-02 19:39:48 +02:00
fa1a0beb0c fix conflicts after rebase 2024-09-02 18:15:42 +02:00
5aefe7cd17 add the snapshots 2024-09-02 16:27:51 +02:00
e6dd66e4a0 Do not fail the whole batch when a single document deletion by filter fails 2024-09-02 16:27:51 +02:00
6e3839d8b6 autobatch document deletion by filter 2024-09-02 16:27:51 +02:00
cd271b8762 stop trying to process searches after one minute 2024-09-02 16:27:51 +02:00
3ce8500d4c ensure we never early exit when we have a permit and remove the warning when we implicitely drop a permit 2024-09-02 16:27:51 +02:00
588000d398 add a warning to help us find when we forget to drop explicitely drop a permit 2024-09-02 16:27:51 +02:00
92b151607c explicitely drop the search permit 2024-09-02 16:27:51 +02:00
42e7499260 Update version for the next release (v1.10.1) in Cargo.toml 2024-09-02 16:27:51 +02:00
41aa1e1424 Only spawn one search queue in actix-web 2024-09-02 16:27:50 +02:00
9b7858fb90 Expose the new indexer 2024-09-02 15:21:59 +02:00
ab01679a8f Remove the useless option from the document changes 2024-09-02 15:21:00 +02:00
521775f788 I push for Many 2024-09-02 15:10:21 +02:00
72e7b7846e Renaming the indexers 2024-09-02 14:42:27 +02:00
6526ce1208 Fix the merging of documents 2024-09-02 14:41:20 +02:00
24ace5c381 Add reindexing test 2024-09-02 13:37:01 +02:00
21296190a3 Reindex embedders 2024-09-02 13:00:53 +02:00
03fda78901 update other tests 2024-09-02 11:31:31 +02:00
30a143f149 Test new facilities 2024-09-02 11:31:23 +02:00
4464d319af Change default template to use the new facility 2024-09-02 11:30:59 +02:00
580ea2f450 Pass the fields <-> ids map with metadata to render 2024-09-02 11:30:10 +02:00
915cf4bae5 Add field.is_searchable property to fields 2024-09-02 11:28:53 +02:00
e639ec79d1 Move the indexers into their own modules 2024-09-02 10:42:19 +02:00
bb885a5810 Fix the merge for roaring bitmap 2024-09-01 23:20:19 +02:00
b625d31c7d Introduce the PartialDumpIndexer indexer that generates document ids in parallel 2024-08-30 15:07:21 +02:00
6487a67f2b Introduce the ConcurrentAvailableIds struct and rename the other to AvailableIds 2024-08-30 15:06:50 +02:00
271ce91b3b Add the rayon Threadpool to the index function parameter 2024-08-30 14:34:24 +02:00
54f2eb4507 Remove duplication of grenad merger 2024-08-30 14:34:05 +02:00
794ebcd582 Replace grenad with the new grenad various-improvement branch 2024-08-30 11:53:59 +02:00
b7c77c7a39 Use the latest version of the obkv crate 2024-08-30 11:53:59 +02:00
0c57cf7565 Replace obkv with the temporary new version of it 2024-08-30 11:53:58 +02:00
27df9e6c73 Introduce the indexer::index function that runs the indexation 2024-08-30 11:53:58 +02:00
45c060831e Introduce typed channels and the merger loop 2024-08-30 11:53:58 +02:00
874c1ac538 First channels types 2024-08-30 11:53:58 +02:00
e6ffa4d454 Implement the document merge function for the replace method 2024-08-30 11:53:58 +02:00
637a9c8bdd Implement the document merge function for the update method 2024-08-30 11:53:58 +02:00
c683fa98e6 WIP
Co-authored-by: Kerollmops <clement@meilisearch.com>
Co-authored-by: ManyTheFish <many@meilisearch.com>
2024-08-30 11:53:57 +02:00
9a756cf2c5 Merge #4888
4888: bring back v1.10.0 into main r=Kerollmops a=ManyTheFish



Co-authored-by: Louis Dureuil <louis@meilisearch.com>
Co-authored-by: meili-bors[bot] <89034592+meili-bors[bot]@users.noreply.github.com>
Co-authored-by: Tamo <tamo@meilisearch.com>
Co-authored-by: ManyTheFish <many@meilisearch.com>
2024-08-27 14:02:08 +00:00
36d8684dc8 Merge #4881
4881: Infer locales from index settings r=curquiza a=ManyTheFish

# Pull Request

## Related issue
Fixes #4828
Fixes #4816
## What does this PR do?
- Add some test using `AttributesToSearchOn`
- Make the search infer the language based on the index settings when the `locales` filed is not precise


CI is now working:
https://github.com/meilisearch/meilisearch/actions/runs/10490050545/job/29055955667



Co-authored-by: ManyTheFish <many@meilisearch.com>
2024-08-21 14:18:16 +00:00
b12e997c8a Add pinyin flag 2024-08-21 14:38:04 +02:00
8bf89ec394 Infer locales from index settings 2024-08-21 10:47:40 +02:00
ee62d9ce30 Merge #4845
4845: Fix perf regression facet strings r=ManyTheFish a=dureuill

Benchmarks between v1.9 and v1.10 show a performance regression of about x2 (+3dB regression) for most indexing workloads (+44s for hackernews).

[Benchmark interpretation in the engine weekly meeting](https://www.notion.so/meilisearch/Engine-weekly-4d49560d374c4a87b4e3d126a261d4a0?pvs=4#98a709683276450295fcfe1f8ea5cef3).

- Initial investigation pointed to #4819 as the origin of the regression.
- Further investigation points towards the hypernormalization of each facet value in `extract_facet_string_docids`
- Most of the slowdown is in `normalize_facet_strings`, and precisely in `detection.language()`.

This PR improves the situation (-10s compared with `main` for hackernews, so only +34s regression compared with `v1.9`) by skipping normalization when it can be skipped.

I'm not sure how to fix the root cause though. Should we skip facet locale normalization for now? Cc `@ManyTheFish` 

---

Tentative resolution options:

1. remove locale normalization from facet. I'm not sure why this is required, I believe we weren't doing this before, so maybe we can stop doing that again.
2. don't do language detection when it can be helped: won't help with the regressions in benchmark, but maybe we can skip language detection when the locales contain only one language?
3. use a faster language detection library: `@Kerollmops` told me about https://github.com/quickwit-oss/whichlang which bolsters x10 to x100 throughput compared with whatlang. Should we consider replacing whatlang with whichlang? Now I understand whichlang supports fewer languages than whatlang, so I also suggest:
4. use whichlang when the list of locales is empty (autodetection), or when it only contains locales that whichlang can detect. If the list of locales contains locales that whichlang *cannot* detect, **then** use whatlang instead.

---

> [!CAUTION]
> this PR contains a commit that adds detailed spans, that were used to detect which part of `extract_facet_string_docids` was taking too much time. As this commit adds spans that are called too often and adds 7s overhead, it should be removed before landing.

Co-authored-by: Louis Dureuil <louis@meilisearch.com>
Co-authored-by: ManyTheFish <many@meilisearch.com>
2024-08-19 06:29:48 +00:00
0f965d3574 Remove hotloop's spans 2024-08-14 14:33:36 +02:00
ade54493ab Only detect language for a facet if several locales have been specified by the user in the settings 2024-08-14 12:03:52 +02:00
07c8ed0459 Merge #4864
4864: Don't remove facet value when multiple original values map to the same normalized value r=ManyTheFish a=dureuill

# Pull Request

## Related issue

Fixes #4860 

> [!WARNING]  
> This PR contains a fix to the immediate issue, but it looks like the underlying data model is faulty: there is only one possible "original" value for each normalized value in a facet of a document, while because of array values (or manually written nested fields, if you're evil), it is technically possible to have multiple, distinct original values mapping to the same normalized value.

Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2024-08-13 14:04:17 +00:00
c3cdc407ec Avoid unnecessary clone() 2024-08-08 14:57:02 +02:00
2f10273d14 Group by normalized values, make sure you don't remove a value where there remains at still one value that normalizes towards it 2024-08-08 14:02:53 +02:00
321639364f Merge #4861
4861: Make sure the index scheduler never stops running r=irevoire a=irevoire

# Pull Request

## Related issue
Fixes https://github.com/meilisearch/meilisearch/issues/4748

## What does this PR do?
- Whatever happens, we always try to process tasks once every minute (if no tasks are enqueued that's practically free)

Co-authored-by: Tamo <tamo@meilisearch.com>
2024-08-07 16:21:54 +00:00
442d06dce7 ensure the run function doesn't panic even if the tick function does 2024-08-07 17:50:32 +02:00
8f6a98df07 make sure the index scheduler never stops running 2024-08-07 17:06:43 +02:00
b44e17c4c3 Merge #4858
4858: also intersect the universe for searchOnAttributes r=irevoire a=dureuill

# Pull Request

## Related issue
Fixes #4857 

## What does this PR do?
- intersect with the universe (which does not contain the filtered out ids) when looking up documents for words, even when using `searchOnAttributes`


Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2024-08-07 13:15:26 +00:00
e3ef0ae19e also intersect the universe for searchOnAttributes 2024-08-06 14:06:56 +02:00
57f7af77c7 Merge #4846
4846: Add OpenAI tests r=dureuill a=dureuill

# Pull Request

## Related issue
Part of fixing #4757 

## What does this PR do?
- OpenAI embedder: don't pass apiKey when it is empty (slightly improves error messages)
- rest embedder and rest-based embedders: specialize the authorization denied error message depending on the configuration source
- fix existing tests
- Adds assets containing prerecorded texts to embed and the embeddings obtained from OpenAI
- Adds an asset containing a tokenized long document and the embedding obtained from OpenAI for this token
- Uses the wiremock crate to mock the OpenAI API: parse the openai request, lookup the response in assets, craft an openai response


Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2024-08-05 10:49:28 +00:00
2d16d0aea1 Merge #4839
4839: In prometheus metrics return the route pattern instead of the real route when returning the HTTP requests total r=irevoire a=irevoire

# Pull Request

## Related issue
Fixes https://github.com/meilisearch/meilisearch/issues/4825

## What does this PR do?
- return the route pattern instead of the real route when returning the HTTP requests total


Co-authored-by: Tamo <tamo@meilisearch.com>
2024-08-05 10:14:51 +00:00
c817718e07 Merge #4853
4853: Fix rhai deletion r=irevoire a=dureuill

# Pull Request

## Related issue
Fixes #4849 

## What does this PR do?
- insert inside of the bitmap instead of pushing into it.


Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2024-08-01 16:34:31 +00:00
e64d0e0ca8 use insert instead of push for bitmaps 2024-08-01 18:32:45 +02:00
21aa430b5e Fix openai tests 2024-07-31 17:57:55 +02:00
8535dc0be2 Fix existing tests 2024-07-31 17:57:32 +02:00
72b9005344 Redact uid for Value 2024-07-31 17:57:13 +02:00
420c33132c Merge #4850
4850: Use a fixed date format regardless of features r=irevoire a=dureuill

# Pull Request

## Related issue
Fixes #4844 

## What does this PR do?

Given the following script: 
```
cargo run -- --db-path meili.ms
sleep 3
curl -s -X POST http://127.0.0.1:7700/indexes -H 'Content-Type: application/json' --data-binary '{"uid": "movies", "primaryKey": "id"}'
sleep 3
cargo run  -p meilisearch --db-path meili.ms
sleep 3
curl -s -X POST http://127.0.0.1:7700/indexes/movies/search -H 'Content-Type: application/json' --data-binary '{}'
```

- Before this PR, the final search returns a decoding error.
- After this PR, the search completes successfully

### Technical standpoint

This PR fixes two locations where the formatting of dates were dependent on the feature set of the `time` crate.

1. The `IndexStats` had two fields without the serialization format specified
2. More subtly, the index dates (`createdAt,` `updatedAt`) were using value remapping in the main DB to `SerdeJson<OffsetDateTime>`, which was using whatever default format was available. This was fixed by creating a local `OffsetDateTime` wrapper that would specify the serialization format 

Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2024-07-31 15:32:26 +00:00
9ef710cad4 Use wrapper that forces the desired date format 2024-07-31 17:12:19 +02:00
48f7329a83 Specify index_mapper on IndexStats 2024-07-31 17:11:28 +02:00
ab1ec9ca21 Add tokenized test 2024-07-31 15:03:45 +02:00
9d6efd92d2 new assets for tokenized test 2024-07-31 15:03:45 +02:00
abdb337fd6 Add openai tests 2024-07-31 15:03:45 +02:00
1c755c8899 Add openai responses 2024-07-31 15:03:45 +02:00
3a42c3134e update tests after changing authorized error message 2024-07-31 15:03:45 +02:00
5aa6cb3600 Specialize authorized error message depending on config source 2024-07-31 15:03:44 +02:00
9b7764575b openai: don't pass apiKey when it is empty 2024-07-31 15:03:44 +02:00
0e68718027 Add detailed spans 2024-07-31 13:05:47 +02:00
7c3fc8c655 Split settings and document facet string extractions 2024-07-31 10:57:46 +02:00
8acd3f50bb skip normalization when the locales and values are the same 2024-07-31 09:53:00 +02:00
25791e3f46 Merge #4836
4836: Attach declared localized-attributes subroutes r=dureuill a=dureuill

RC.0 unexpectedly doesn't contain the `GET /indexes/{indexUid}/localized-attributes` and `PUT /indexes/{indexUid}/localized-attributes` subroute.

This PR makes them available.

Co-authored-by: Louis Dureuil <louis@meilisearch.com>
Co-authored-by: Tamo <tamo@meilisearch.com>
2024-07-30 19:01:54 +00:00
866922ecc3 Merge #4808
4808: Make the tests run faster r=irevoire a=irevoire

## Index-Scheduler

### Only check the consistency of the index-scheduler on snapshots when running in release mode

This saves 12s on the tests, and since the tests run in release mode in the CI, we don't lose any information.
From 28s to 16s

### We were snapshotting the index for no reason in `advance_till`, I removed this call

This saved an additional 8s on the tests, going from 16s to 8s.

----

After these two optimizations, the test suite as a whole executes 14% quicker

## Meilisearch integration tests

While profiling this test suite, nothing stands out. The only noticeable thing is that we're losing most of our time creating and dropping threads.
I made the theory that by sharing a single common instance between all integrations tests I would gain some time again.

In 355a7acd1c I saved another 15s by only testing this theory on the module that tests the error messages. 
But we can do it on many more tests. **We must take care of not making any test flaky, though**.

## Use two indexing threads

By moving from one to two indexing threads, we gain an additional 30% in performance.

# Conclusion

## Before

The execution of the test suite was taking around:
- 4m40s on my computer
- 15 minutes on the debug CI with cache
- 29 minutes on the Windows CI with cache

## After

The execution of the test suite is taking around:
- 2m20 on my computer
- 8 minutes on the debug CI with cache
- 29 minutes on the Windows CI with cache

## This means the test suite should now run ~50% faster on your computer; the CI may report errors twice faster, but we'll still wait for ~the same amount of time to merge a PR


Co-authored-by: Tamo <tamo@meilisearch.com>
2024-07-30 15:11:30 +00:00
f05ea04879 In prometheus metrics return the route pattern instead of the real route when returning the HTTP requests total 2024-07-30 16:24:49 +02:00
b1b3a1a98b add a get, set and put test for the localized attributes setting 2024-07-30 15:51:02 +02:00
143d6cde10 Merge #4835
4835: Log error from main using tracing r=irevoire a=dureuill

Engine follow-up to https://github.com/meilisearch/meilisearch-support/issues/252#issuecomment-2251288276 (private link)

> `@meilisearch/engine-team` we need to open a PR to tracing::error! when an error occurs in the Meilisearch main. It would be nice to have it included in the second RC

<img width="1349" alt="Error logged when launching Meilisearch to import dump on path where the dump doesn't exist" src="https://github.com/user-attachments/assets/e5d2ae6e-f810-4029-9787-3b6ea9d47cfd">

---

<img width="1349" alt="Error logges when launching Meilisearch with a db path that is not writeable" src="https://github.com/user-attachments/assets/f672d78d-04b0-4d02-9402-259eaa6e2b62">



Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2024-07-30 13:43:50 +00:00
c457069367 ensure a test is 100% not flaky 2024-07-30 15:41:51 +02:00
bb1283222e make clippy happy 2024-07-30 15:10:56 +02:00
7a5a38f870 fix a sync issue on empty indexes 2024-07-30 15:09:12 +02:00
ded3cd0dd6 an additionnal 30% of perf for the tests 2024-07-30 15:03:54 +02:00
68f885f1c4 fix two snapshots 2024-07-30 14:45:59 +02:00
9372c34dab prepare the tests to share indexes with api key 2024-07-30 14:34:11 +02:00
6666c57880 reduce the number of thread spawned by milli 2024-07-30 14:34:10 +02:00
b53a019b07 fix the initialization problem over the shared indexes with documents 2024-07-30 14:24:57 +02:00
d262b1df32 craft an API over the Shared Server and Shared index to avoid hard to debug mistakes 2024-07-30 14:24:57 +02:00
ed795bc837 fmt 2024-07-30 14:24:57 +02:00
993264227d reuse an index with already indexed documents instead of reindexing from scratch 2024-07-30 14:24:57 +02:00
953d3a44bd make the new_shared function synchronous and stop indexing documents when it's not required 2024-07-30 14:24:57 +02:00
e5345fb0eb shave off 15s by providing a shared instance to the integration tests 2024-07-30 14:24:55 +02:00
2d9a055fb9 stops snapshotting in advance_till when we don't need to 2024-07-30 13:57:12 +02:00
110dc01f40 only check the consistency of the index-scheduler on snapshots when running in release mode 2024-07-30 13:57:12 +02:00
9719dec443 Attach declared attributes-localized subroutes 2024-07-29 16:19:35 +02:00
fa77a949aa Log error from main using tracing 2024-07-29 14:58:39 +02:00
abe128476f Merge #4830
4830: Use the dtolnay's Rust Toolchain r=dureuill a=Kerollmops

Fixes the CI by using another rust-toolchain GitHub repo.

Note: the [helix-editor/rust-toolchain repository](https://github.com/helix-editor/rust-toolchain) has been deleted so we moved to the [dtolnay/rust-toolchain](https://github.com/dtolnay/rust-toolchain) one. However, the dtolnay's one doesn't support `rust-toolchain.toml` and the version is directly in the rust-toolchain@version. We keep the `rust-toolchain.toml` for local builds only.

Co-authored-by: Clément Renault <clement@meilisearch.com>
2024-07-29 08:33:59 +00:00
a663e408ad Move to the right rust toolchain version 2024-07-29 10:06:34 +02:00
986991277f Use the dtolnay rust toolchain 2024-07-29 10:00:40 +02:00
c2c1ba39ee Merge #4826
4826: Update Charabia v0.9.0 r=dureuill a=ManyTheFish

# Pull Request

## Related Changelog
https://github.com/meilisearch/charabia/releases/tag/v0.9.0

## Notable Change for Meilisearch
Adds all math symbols from https://www.compart.com/en/unicode/category/Sm to the default separator list.



Co-authored-by: ManyTheFish <many@meilisearch.com>
2024-07-25 14:08:38 +00:00
35567b2137 Update Charabia v0.9.0 2024-07-25 16:02:14 +02:00
00c97c7152 Merge #4818
4818: Custom headers and QoL improvements r=ManyTheFish a=dureuill

# Pull Request

## Related issue
Fixes #4734 
Depends on #4815 

## What does this PR do?
- Adds custom headers for rest embedders ([public usage](https://meilisearch.notion.site/v1-10-AI-search-changes-737c9d7d010d4dd685582bf5dab579e2#41354652885242c899def07e36a66d49))
- Quality of life: allow specifying `dimensions` for `ollama` embedders ([public usage](https://meilisearch.notion.site/v1-10-AI-search-changes-737c9d7d010d4dd685582bf5dab579e2#37218531431343dab3d2d3a9a1937e9d)). As for `rest` embedders, specifying `dimensions` disables the "test" embedding when the embedder is spawned.
- Improve error message again when indexing documents that don't have a vector for a user-provided vector
  1. Remove the contents of the document
  2. Display the docid of the first document that triggered the error
  3. Indicate how many documents in that chunk suffered from the same issue for that embedder


Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2024-07-25 13:33:11 +00:00
d4ea7cc2a9 fix clippy 👉👈 2024-07-25 12:10:32 +02:00
8532fe8afc Fix tests 2024-07-25 12:10:32 +02:00
2413592bbf Display docid when there are documents without manual embeddings for a manual embedder 2024-07-25 12:10:32 +02:00
553440632e Introduce Setting::some_or_not_set 2024-07-25 12:01:52 +02:00
7a347966da Allow explicit dimensions for ollama 2024-07-25 12:01:51 +02:00
6c598fa06d test custom headers 2024-07-25 12:01:51 +02:00
8338df0dbe Fix tests 2024-07-25 12:01:51 +02:00
4654d51e05 Add custom headers for REST embedder 2024-07-25 12:01:51 +02:00
22ef2d877f Ensure test server has a single indexing thread 2024-07-25 12:01:51 +02:00
76bc2c18e8 Merge #4819
4819: Language settings r=dureuill a=ManyTheFish

# Pull Request

## Related issue
Fixes #4749 

## What does this PR do?
- [Implement localized search](c0c6955c0d)
- [Implement localized attributes settings](bde827b055)

## Related PRD

- [PRD](https://www.notion.so/meilisearch/Define-language-settings-to-impact-relevancy-bee62e18b7584c4f87d18a7654855329)
- [Public usage](https://www.notion.so/meilisearch/v1-10-Language-settings-usage-26c5d98b553349d9abacbe7aff698e4e)


Co-authored-by: ManyTheFish <many@meilisearch.com>
Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2024-07-25 09:00:33 +00:00
59115fd058 Fix tests 2024-07-25 10:52:57 +02:00
a918561ac1 Fix PR comments 2024-07-25 10:52:56 +02:00
70d71581ee fix clippy 2024-07-25 10:52:56 +02:00
4fbe048cbf Update Cargo.lock 2024-07-25 10:52:56 +02:00
e06fbcc607 Update snapshots 2024-07-25 10:52:56 +02:00
04fa44e7eb Implement localized attributes settings 2024-07-25 10:51:27 +02:00
90c0a6db7d Implement localized search 2024-07-25 10:51:27 +02:00
d82f8fd904 Add tests 2024-07-25 10:51:27 +02:00
cc02920f2b Update charabia 2024-07-25 10:51:27 +02:00
c26bd68de5 Merge #4815
4815: Rest embedder api mk2 r=ManyTheFish a=dureuill

# Pull Request

## Related issue
Fixes https://github.com/meilisearch/meilisearch/issues/4756

- [x] [REST API parameter names and behavior are unclear](https://github.com/meilisearch/documentation/pull/2824#issuecomment-2124073720)
  - unclear names are removed. There remain only two parameters: `request`, a template of what Meilisearch's request to the embedding server should be, and `response`, a template of what the embedding server's response to Meilisearch should look like
- [x] [Bad error message or bad default value when we don't specify the `query` parameter](85d8455c11/meilisearch/tests/vector/rest.rs (L105-L140))
  - The replacement for `query`, which is `request`, is now a mandatory parameter. Omitting it will result in the following error message : "`.embedders.rest`: Missing field `request` (note: this field is mandatory for source rest)", which is clear
- [x] [Bad error message when both `pathToEmbeddings` and `embeddingObject` are missing](2141cb3b69/meilisearch/tests/vector/rest.rs (L142-L178))
  - These parameters no longer exist. Now, the point of extraction is given directly by the location of an `{{embedding}}` placeholder in the `response` parameter.
- [x] [Unexpected error when we don't specify both `pathToEmbeddings` and `embeddingObject` (only once should be required)](2141cb3b69/meilisearch/tests/vector/rest.rs (L180-L260))
  - These parameters no longer exist. Now, the point of extraction is given directly by the location of an `{{embedding}}` placeholder in the `response` parameter.
- [x] [Should not panic when the dimensions specified do not work with the model](2141cb3b69/meilisearch/tests/vector/rest.rs (L262-L299))
  - This no longer panics, instead returns "While embedding documents for embedder `rest`: runtime error: was expecting embeddings of dimension `2`, got embeddings of dimensions `3`"
- [x] [Be more flexible on the type of data that is accepted](https://github.com/meilisearch/meilisearch/issues/4757#issuecomment-2201948531)
  - [x] Always accept arrays of embeddings even if `inputType` is set to `text`
    - This is controlled by the repeat placeholder `"{..}"`, an array of embeddings can be configured even if the input is not in an array.
  - [x] Accept arrays of result at the root level and texts/array of text at the root level.
    -  doable with `request: "{{text}}"` and `response: "{{embedding}}"` or `response: ["{{embedding}}"]` (see test `vector::rest::server_raw`)

## What does this PR do?
- [See public usage](https://meilisearch.notion.site/v1-10-AI-search-changes-737c9d7d010d4dd685582bf5dab579e2#8de842673ffa4a139210094a89c1ec3e)
- Add new `milli::vector::json_template` module to parse JSON templates with an injection placeholder and a repeat placeholder
- Change rest embedder to use two JSON templates
- Change ollama and openai embedders to use the new rest embedder
- Update settings
- Update and add tests

## Breaking change

> [!CAUTION]
> This PR is a breaking change to the REST embedder.
> Importing a dump containing a REST embedder configuration will fail in v1.10 with an error: "Error: unknown field `query`, expected one of `source`, `model`, `revision`, `apiKey`, `dimensions`, `documentTemplate`, `url`, `request`, `response`, `distribution` at line 1 column 752".

Upgrade procedure:

1. Remove any embedder with source "rest"
2. Create a dump
3. Import that dump in a v1.10
4. Re-add any removed embedder, using the new settings.

Co-authored-by: Louis Dureuil <louis@meilisearch.com>
Co-authored-by: Louis Dureuil <louis.dureuil@xinra.net>
Co-authored-by: Tamo <tamo@meilisearch.com>
2024-07-24 16:32:52 +00:00
80fdea9afc Merge pull request #4823 from meilisearch/explicit-check-bench
Explicitly check permissions when receiving a slash command
2024-07-24 17:34:07 +02:00
e3faacd160 Explicitly check permissions when receiving a slash command 2024-07-24 17:09:25 +02:00
988552e178 add tests on the rest embedder 2024-07-24 14:34:17 +02:00
0d8199f3b7 Change parameters in milli settings 2024-07-24 14:34:17 +02:00
4b74803dae Change parameters in vector settings 2024-07-24 14:34:17 +02:00
d731fa661b ollama and openai use new EmbedderOptions 2024-07-24 14:34:17 +02:00
a1beddd5d9 rest embedder: use json_template 2024-07-24 14:34:17 +02:00
4109182ca4 Add json_template module 2024-07-24 14:34:12 +02:00
1a297c048e Error changes 2024-07-24 14:34:12 +02:00
ecee0c922f Merge #4822
4822: HuggingFace: Clearer error message when a model is not supported r=Kerollmops a=dureuill

# Pull Request

## Related issue
Context: <https://github.com/meilisearch/meilisearch/discussions/4820>

## What does this PR do?
- Improve error message when a model configuration cannot be loaded and its "architectures" field does not contain "BertModel"

Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2024-07-23 14:09:47 +00:00
303e601b87 HuggingFace: Clearer error message when a model is not supported 2024-07-23 15:13:22 +02:00
f6d2c59bca Merge #4817
4817: Update version for the next release (v1.10.0) in Cargo.toml r=curquiza a=meili-bot

⚠️ This PR is automatically generated. Check the new version is the expected one and Cargo.lock has been updated before merging.

Co-authored-by: curquiza <curquiza@users.noreply.github.com>
2024-07-22 15:51:20 +00:00
50b7093f8e Update version for the next release (v1.10.0) in Cargo.toml 2024-07-22 13:54:38 +00:00
48bc797dce Merge #4812
4812: Allow `MEILI_NO_VERGEN` env var to skip vergen r=irevoire a=dureuill

- vergen checks the state of the `.git` directory to embed commit information into the `meilisearch` binary and the `cargo xtask bench` invocations.
- This check unfortunately results in too many recompilation of the `meilisearch` binary.
- This PR allows skipping vergen when the `MEILI_NO_VERGEN` variable is present in the environment

Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2024-07-18 16:16:01 +00:00
c6b33fd407 Allow MEILI_NO_VERGEN env var to skip vergen 2024-07-18 17:28:01 +02:00
6e9d0de8b7 Merge #4806
4806: Update rustls as much as possible r=Kerollmops a=irevoire

# Pull Request

## Related issue
Part of https://github.com/meilisearch/meilisearch/issues/4753

## What does this PR do?
- Update rustls as much as possible

## What is missing

In rustls-0.22.0 two structures we were using have been removed with no explanation or workaround
<img width="518" alt="image" src="https://github.com/user-attachments/assets/fa112db1-3400-4163-8819-7913f22d6b87">



Co-authored-by: Tamo <tamo@meilisearch.com>
2024-07-17 17:00:01 +00:00
1bfb16386c Update rustls as much as possible 2024-07-17 18:21:26 +02:00
ea73615abf Merge #4804
4804: Implements the experimental contains filter operator r=irevoire a=irevoire

# Pull Request
Related PRD: (private link) https://www.notion.so/meilisearch/Contains-Like-Filter-Operator-0d8ad53c6761466f913432eb1d843f1e
Public usage page: https://meilisearch.notion.site/Contains-filter-operator-usage-3e7421b0aacf45f48ab09abe259a1de6

## Related issue
Fixes https://github.com/meilisearch/meilisearch/issues/3613

## What does this PR do?
- Extract the contains operator from this PR: https://github.com/meilisearch/meilisearch/pull/3751
- Gate it behind a feature flag
- Add tests


Co-authored-by: Tamo <tamo@meilisearch.com>
2024-07-17 15:47:11 +00:00
02c61eabfa fix the range reported when the experimental feature has not been set 2024-07-17 16:54:33 +02:00
56b60ec7a0 apply review comment 2024-07-17 16:13:40 +02:00
8f416e8f34 Merge #4805
4805: Log the time to index a batch of task r=Kerollmops a=irevoire

This was proposed by `@qdequele` in a private conversation and I think it’s a nice addition.

Co-authored-by: Tamo <tamo@meilisearch.com>
2024-07-17 11:45:39 +00:00
cf760cbfb1 Log the time to index a batch of task 2024-07-17 11:56:57 +02:00
2af9481804 Implements the experimental contains filter operator« 2024-07-17 11:13:37 +02:00
7a292b572a Merge #4801
4801: AI quality-of-life improvements r=irevoire a=dureuill

# Pull Request

## Related issue
Fixes #4802 

## What does this PR do?
This PR implements several quality-of-life improvements described in the [public usage](https://meilisearch.notion.site/v1-10-AI-search-changes-737c9d7d010d4dd685582bf5dab579e2#ece824a1814e47a0a986d786baff1be9)


Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2024-07-17 09:00:47 +00:00
8d6ac261ae Add tests on various failure modes for embedders 2024-07-16 13:39:02 +02:00
b4c8b01c88 Update existing snapshots 2024-07-16 13:39:01 +02:00
24240934f9 Improve errors when indexing documents with a user provided embedder 2024-07-16 13:39:01 +02:00
f4c94ac57f manual embedders: limit max size of errors to 250 2024-07-16 13:39:01 +02:00
4087a88dbe rest|ollama|openai: increase tries to 10 + randomize retry duration 2024-07-16 13:39:00 +02:00
5adacf2f45 OpenAI: embed only the first MAX_TOKENS tokens 2024-07-16 13:39:00 +02:00
65d0c32aa7 Allow overriding OpenAI's url 2024-07-16 13:39:00 +02:00
82647bcded When retrieveVectors is true, retrieve _vectors.embedder even if there are no vector for that embedder 2024-07-16 13:39:00 +02:00
1582c7e788 Merge #4769
4769: Federated search r=ManyTheFish a=dureuill

# Pull Request

## Related issue
Fixes #4747 

[Usage](https://meilisearch.notion.site/v1-10-federated-search-698dfe36ab6b4668b044f735fb40f0b2)

## What does this PR do?
- multi-search now allows a top-level federation object. When not `null`, the results of multi-search are modified to be a single list of results rather than a list of a list of results
- changed lifetimes around tokenizer et al. to be able to make hits one by one rather than using a vector
- adds `roaring` to Meilisearch itself. As the federated search happens at the Meilisearch level (reuses the search functions declared at the Meilisearch level + merge happens after the hits were created), `RoaringBitmap`s are needed to track the candidates: hits that were seen,  all candidates.
- Refactor `make_hits` to allow for an individual, optimized `make_hit` 
- Score details comparison no longer fail when sorting on different field names or target point (for geo)

Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2024-07-16 08:14:46 +00:00
20094eba06 Apply review comments 2024-07-15 12:43:29 +02:00
c35904d6e8 search::federated::ranking_rules -> search::ranking_rules 2024-07-15 08:43:22 +02:00
2cacc448b6 Rename src/search.rs -> src/search/mod.rs 2024-07-15 08:43:21 +02:00
a61b852695 Add tests 2024-07-15 08:43:21 +02:00
3167411e98 Analytics 2024-07-15 08:43:21 +02:00
83d71662aa Changes to multi_search route 2024-07-15 08:43:21 +02:00
5c323cecc7 search: introduce federated search 2024-07-15 08:43:21 +02:00
77b9347fff Merge #4783
4783: Update minimal ubuntu version used from 18.04 to 20.04 r=curquiza a=curquiza

Fixes #4782 

Co-authored-by: curquiza <clementine@meilisearch.com>
Co-authored-by: Tamo <tamo@meilisearch.com>
2024-07-11 16:44:30 +00:00
c85dd9f635 install a default stable toolchain before cargo build tries to install cross 2024-07-11 18:43:47 +02:00
7da95d62e2 Add DEBIAN_FRONTEND to avoid interaction with tzdata 2024-07-11 18:43:47 +02:00
2cda1360ee Remove ACTIONS_ALLOW_USE_UNSECURE_NODE_VERSION in CI 2024-07-11 18:43:47 +02:00
5f9c05b944 Update minimal ubuntu version used from 18.04 to 20.04 2024-07-11 18:43:47 +02:00
d3a6d2a6fa search: introduce hitmaker 2024-07-11 16:35:59 +02:00
2123d76089 search: introduce "search_from_kind" 2024-07-11 16:35:11 +02:00
edab4e75b0 Make SearchKind cloneable 2024-07-11 16:33:24 +02:00
b9982587d4 Add new errors to meilisearch 2024-07-11 16:31:44 +02:00
e83da00446 Milli changes to match to allow for more flexible lifetimes 2024-07-11 16:29:35 +02:00
7fb3e378ff Do not fail sort comparisons when the field name or target point are different 2024-07-11 16:28:14 +02:00
12a7a45930 Add roaring to meilisearch 2024-07-11 16:27:50 +02:00
677ed6bbf6 Merge #4787
4787: Add index exists function in index_scheduler which stops opening indexes to only check if they exist. r=Kerollmops a=Karribalu

# Pull Request

## Related issue
Fixes #4784

## What does this PR do?
- Added index_exists function in the index_scheduler.
- Resolved opening indexes to only check if they exist.
- Made changes to existing tests to test this function.

## PR checklist
Please check if your PR fulfills the following requirements:
- [x] Does this PR fix an existing issue, or have you listed the changes applied in the PR description (and why they are needed)?
- [x] Have you read the contributing guidelines?
- [x] Have you made sure that the title is accurate and descriptive of the changes?

Thank you so much for contributing to Meilisearch!


Co-authored-by: karribalu <karri.balu123456@gmail.com>
2024-07-11 13:05:20 +00:00
29b44e5541 Merge #4626
4626: Edit Documents with Rhai r=ManyTheFish a=Kerollmops

This PR introduces a first version of [the _Update Documents with Function_ (internal)](https://www.notion.so/meilisearch/Update-Documents-by-Function-45f87b13e61c4435b73943768a490808). It uses [the Rhai programming language](https://rhai.rs/) to let users express the modifications they want apply.

You can read more about the way to use this functions on [the Usage PRD Page](https://meilisearch.notion.site/Edit-Documents-with-Rhai-0cff8fea7655436592e7c8a6de932062?pvs=25). The [prototype is available](https://github.com/meilisearch/meilisearch/actions/runs/9038384483) through Docker by using the following command:

```
docker run -p 7700:7700 -v $(pwd)/meili_data:/meili_data getmeili/meilisearch:prototype-edit-documents-with-rhai-3
```

## TODO
 - [x] Support the `DocumentEdition` task in dumps.
 - [x] Remove the unwraps and panics.
 - [x] Improve error codes for the `function` parameter.
 - [x] [Update Rhai to v1.19.0](https://github.com/rhaiscript/rhai/releases/tag/v1.19.0) 🚀
 - [x] Make it an experimental feature (only restrict the HTTP calls).
 - [x] It must be possible not to send a context.
 - [x] Rebase on main.
 - [x] Check that the script cannot do any io.
 - [x] ~Introduce a `Documents.edit` action or~ require the `Documents.all` action.
 - [x] Change the `editionCode` to the clearer `function` field name in the tasks.
 - [x] Support a user provided context and maybe more (but keep function execution isolated for reproducibility).
 - [x] Support deleting documents when the `doc` is `()` (nil, null).
 - [x] Support canceling document edition.
 - [x] Multithread document edition by using rayon (and [rayon-par-bridge](https://docs.rs/rayon-par-bridge/latest/rayon_par_bridge/)).
 - [x] Limit the number of instruction by function execution.
 - [ ] ~Expose the limit of instructions in the settings.~ Not sure, in fact.
 - [x] Ignore unmodified documents in the tasks count.
 - [x] Make the `filter` field optional (not forced to be `null`).

Co-authored-by: Clément Renault <clement@meilisearch.com>
2024-07-11 09:02:55 +00:00
6e80364c50 Apply review comments 2024-07-11 11:00:27 +02:00
603676cb3b Address PR review changes 2024-07-10 19:42:16 +01:00
23e102ca71 Address PR review changes 2024-07-10 19:33:16 +01:00
f36f34c2f7 Merge #4717
4717: Implement intersection at end on the search pipeline r=Kerollmops a=Kerollmops

This PR is akin to #4713 and #4682 because it uses the new RoaringBitmap method to do the intersections directly on the serialized bytes for the bytes LMDB/heed returns. More work related to this issue can be done, and I listed that in #4780.

Running the following command shows where we use bitand/intersection operations and where we can potentially apply this optimization.
```sh
rg --type rust --vimgrep '\s&[=\s]' milli/src/search
```

Co-authored-by: Clément Renault <clement@meilisearch.com>
2024-07-10 15:01:33 +00:00
3bac22fd87 We do not do intersections with the universe when it is related to cache 2024-07-10 16:49:36 +02:00
ce61cb7fe6 Simplify and speedup an intersection pass 2024-07-10 16:49:36 +02:00
1693d1a311 Simplify the check to decide to stop a loop 2024-07-10 16:49:36 +02:00
febea735ca Remove the unused universe parameter from resolve_negative_phrases 2024-07-10 16:49:36 +02:00
93ba051094 Remove the invalid get_phrases_docids universe parameter 2024-07-10 16:49:35 +02:00
cd7a20fa32 Make it work by avoid storing invalid stuff in the cache 2024-07-10 16:49:35 +02:00
41f51adbec Do less useless intersections 2024-07-10 16:49:35 +02:00
0ca1a4e805 Always do the intersections with the universe 2024-07-10 16:49:34 +02:00
50a7393c55 Modify the compute_query_term_subset_docids function to accept the universe 2024-07-10 16:49:34 +02:00
837274f853 Restrict even more the Rhai engine 2024-07-10 16:30:18 +02:00
487997f6ad Support the new editDocumentsByFunction experimental feature 2024-07-10 16:29:18 +02:00
94809090a3 Support not specifying a context 2024-07-10 16:29:18 +02:00
01144b2c74 Make the edit documents by function route experimental 2024-07-10 16:29:18 +02:00
e97600eead Improve the analytics for the document edition by function 2024-07-10 16:29:18 +02:00
767553519d Create errors for the HTTP route issues 2024-07-10 16:29:18 +02:00
aace587dd1 Create errors for the internal processing ones 2024-07-10 16:29:18 +02:00
e706023969 Fix some analytics issues 2024-07-10 16:29:17 +02:00
bcd0c5f5a4 Support DocumentEdition in dumps 2024-07-10 16:29:17 +02:00
f35d6710f3 Update rhai to v1.19.0 2024-07-10 16:29:17 +02:00
b7b8f564c3 delete-me: Simply support generating dump 2024-07-10 16:29:05 +02:00
862d49e4af Editing documents requires the documents.all action (add, get, and del) 2024-07-10 16:29:05 +02:00
81ec0abad1 Use the new rayon-par-bridge library 2024-07-10 16:29:04 +02:00
b67d385cf0 Parallelize the edition functions 2024-07-10 16:28:54 +02:00
dfecb25814 Disable the time package 2024-07-10 16:28:37 +02:00
2eae2015d7 Support aborting documents edition by function 2024-07-10 16:28:15 +02:00
33fa17bf12 Support deleting documents with functions 2024-07-10 16:28:15 +02:00
400e6b93ce Support user-provided context for documents edition 2024-07-10 16:28:15 +02:00
f32e6c32fc Rename editionCode to function 2024-07-10 16:28:15 +02:00
f4add93043 Limit the number of script operations 2024-07-10 16:28:14 +02:00
f07256971a Fix tests 2024-07-10 16:28:14 +02:00
2fae96ac14 Show the actual number of actually edited documents 2024-07-10 16:28:14 +02:00
246f0e7130 Make the filter field really optional 2024-07-10 16:28:14 +02:00
45af18ae9c Check the Rhai syntax before accepting the script 2024-07-10 16:28:13 +02:00
2d97164d9f It works perfectly with some Rhai 2024-07-10 16:28:13 +02:00
efc156a4a4 Executing Lua works correctly 2024-07-10 16:27:36 +02:00
ba85959642 Support filtering the documents to edit with lua 2024-07-10 16:23:21 +02:00
1702b5cf44 Prepare for processing documents edition 2024-07-10 16:23:21 +02:00
2099b4f0dd Merge #4786
4786: Update dependencies r=Kerollmops a=irevoire

# Pull Request

## Related issue
Fixes #4753

## What does this PR do?
- Update all dependencies except rustls
- [x] Release charabia
- [x] Update charabia
- [x] Double check that the docker build works after updating charabia



Co-authored-by: Tamo <tamo@meilisearch.com>
Co-authored-by: Clément Renault <clement@meilisearch.com>
2024-07-10 13:23:54 +00:00
0d5bc4578e Update CONTRIBUTING.md
Co-authored-by: Clément Renault <clement@meilisearch.com>
2024-07-10 15:21:43 +02:00
8f60ad0a23 apply review comments 2024-07-10 14:38:19 +02:00
9570139eeb update contributing.md with the new lindera update 2024-07-10 14:28:43 +02:00
9d6885793e Upgrade dependencies 2024-07-10 13:46:24 +02:00
98cd6a865c Update dependencies after removing useless ones 2024-07-10 13:37:24 +02:00
5f4530ce57 Remove more unused dependencies 2024-07-10 13:36:34 +02:00
0ecaf861fa fix ci 2024-07-10 10:06:59 +02:00
4d5005b01a make clippy happy 2024-07-10 10:06:59 +02:00
952e742321 update charabia 2024-07-09 23:41:29 +02:00
ee9aa63044 update rust version 2024-07-09 23:41:29 +02:00
43db4f4242 update fxprof_processed_profile 2024-07-09 23:41:29 +02:00
9feba5028d update byte-unit 2024-07-09 23:41:29 +02:00
0a40a98bb6 Make milli use edition 2021 (#4770)
* Make milli use edition 2021

* Add lifetime annotations to milli.

* Run cargo fmt
2024-07-09 17:25:39 +02:00
aac15f6719 Merge #4781
4781: Correct apk usages in Dockerfile r=curquiza a=PeterDaveHello


# Pull Request

## Related issue

No issue was created because this is very trivial.

## What does this PR do?

Correct apk usages in Dockerfile

There is no need to use apk with `update` or `--update-cache` when `--no-cache` is used, which will make sure the index is the latest, and leave no temporary files behind.

## PR checklist
Please check if your PR fulfills the following requirements:
- [x] Does this PR fix an existing issue, or have you listed the changes applied in the PR description (and why they are needed)?
- [x] Have you read the contributing guidelines?
- [x] Have you made sure that the title is accurate and descriptive of the changes?

Thank you so much for contributing to Meilisearch!


Co-authored-by: Peter Dave Hello <hsu@peterdavehello.org>
2024-07-09 08:51:29 +00:00
ea21b948b1 Address PR review changes 2024-07-09 09:18:57 +01:00
53a359286c Merge #4785
4785: Bump zerovec from 0.10.1 to 0.10.4 r=dureuill a=dependabot[bot]

Bumps [zerovec](https://github.com/unicode-org/icu4x) from 0.10.1 to 0.10.4.
<details>
<summary>Changelog</summary>
<p><em>Sourced from <a href="https://github.com/unicode-org/icu4x/blob/main/CHANGELOG.md">zerovec's changelog</a>.</em></p>
<blockquote>
<h1>Changelog</h1>
<h2>icu4x 1.5.x</h2>
<ul>
<li><code>icu_calendar</code>
<ul>
<li>(1.5.1) Fix Japanese calendar Gregorian era year 0 (<a href="https://redirect.github.com/unicode-org/icu4x/issues/4968">unicode-org/icu4x#4968</a>)</li>
<li>(1.5.2) Enforce C,packed, not just packed, on ULE types, fixing for incoming changes to <code>repr(Rust)</code> (<a href="https://redirect.github.com/unicode-org/icu4x/pull/5049">unicode-org/icu4x#5049</a>)</li>
</ul>
</li>
<li><code>icu_datetime</code>
<ul>
<li>(1.5.1) Fix incorrect assertion in week-of-year formatting (<a href="https://redirect.github.com/unicode-org/icu4x/issues/4977">unicode-org/icu4x#4977</a>)</li>
</ul>
</li>
<li><code>icu_casemap</code>
<ul>
<li>(1.5.1) Enforce C,packed, not just packed, on ULE types, fixing for incoming changes to <code>repr(Rust)</code> (<a href="https://redirect.github.com/unicode-org/icu4x/pull/5049">unicode-org/icu4x#5049</a>)</li>
</ul>
</li>
<li><code>icu_capi</code>
<ul>
<li>(1.5.1) Fix situations in which <code>libc_alloc</code> is specified as a dependency (<a href="https://redirect.github.com/unicode-org/icu4x/pull/5119">unicode-org/icu4x#5119</a>)</li>
</ul>
</li>
<li><code>icu_properties</code>
<ul>
<li>(1.5.1) Enforce C,packed, not just packed, on ULE types, fixing for incoming changes to <code>repr(Rust)</code> (<a href="https://redirect.github.com/unicode-org/icu4x/pull/5049">unicode-org/icu4x#5049</a>)</li>
</ul>
</li>
<li><code>zerovec</code>
<ul>
<li>(0.10.3) Fix size regression by making <code>twox-hash</code> dep <code>no_std</code> (<a href="https://redirect.github.com/unicode-org/icu4x/pull/5007">unicode-org/icu4x#5007</a>)</li>
<li>(0.10.3) Enforce C,packed, not just packed, on ULE types, fixing for incoming changes to <code>repr(Rust)</code> (<a href="https://redirect.github.com/unicode-org/icu4x/pull/5049">unicode-org/icu4x#5049</a>)</li>
<li>(0.10.4) Enforce C,packed on OptionVarULE (<a href="https://redirect.github.com/unicode-org/icu4x/pull/5143">unicode-org/icu4x#5143</a>)</li>
</ul>
</li>
<li><code>zerovec_derive</code>
<ul>
<li>(0.10.3) Enforce C,packed, not just packed, on ULE types, fixing for incoming changes to <code>repr(Rust)</code> (<a href="https://redirect.github.com/unicode-org/icu4x/pull/5049">unicode-org/icu4x#5049</a>)</li>
</ul>
</li>
</ul>
<h2>icu4x 1.5 (May 28, 2024)</h2>
<ul>
<li>Components
<ul>
<li>General
<ul>
<li>Compiled data updated to CLDR 45 and ICU 75 (unicode-org#4782)</li>
</ul>
</li>
<li><code>icu_calendar</code>
<ul>
<li>Fix duration offsetting and negative-year bugs in several calendars including Chinese, Islamic, Coptic, Ethiopian, and Hebrew (<a href="https://redirect.github.com/unicode-org/icu4x/issues/4904">#4904</a>)</li>
<li>Improved approximation for Persian calendrical calculations (<a href="https://redirect.github.com/unicode-org/icu4x/issues/4713">unicode-org/icu4x#4713</a>)</li>
<li>Fix weekday calculations in negative ISO years (<a href="https://redirect.github.com/unicode-org/icu4x/pull/4894">unicode-org/icu4x#4894</a>)</li>
<li>New <code>DateTime::local_unix_epoch()</code> convenience constructor (<a href="https://redirect.github.com/unicode-org/icu4x/pull/4479">unicode-org/icu4x#4479</a>)</li>
<li>Add caching for all islamic calendars (<a href="https://redirect.github.com/unicode-org/icu4x/pull/4785">unicode-org/icu4x#4785</a>)</li>
<li>Add caching for chinese based calendars (<a href="https://redirect.github.com/unicode-org/icu4x/pull/4411">unicode-org/icu4x#4411</a>, <a href="https://redirect.github.com/unicode-org/icu4x/pull/4468">unicode-org/icu4x#4468</a>)</li>
<li>Switch Hebrew to faster keviyah/Four Gates calculations (<a href="https://redirect.github.com/unicode-org/icu4x/pull/4504">unicode-org/icu4x#4504</a>)</li>
<li>Replace 2820-year with 33-year cycle in Persian calendar, with override table (<a href="https://redirect.github.com/unicode-org/icu4x/pull/4770">unicode-org/icu4x#4770</a>, <a href="https://redirect.github.com/unicode-org/icu4x/pull/4775">unicode-org/icu4x#4775</a>, <a href="https://redirect.github.com/unicode-org/icu4x/pull/4796">unicode-org/icu4x#4796</a>)</li>
<li>Fix bugs in several calendars with new continuity test (<a href="https://redirect.github.com/unicode-org/icu4x/pull/4904">unicode-org/icu4x#4904</a>)</li>
<li>Fix year 2319 in the Chinese calendar (<a href="https://redirect.github.com/unicode-org/icu4x/pull/4929">unicode-org/icu4x#4929</a>)</li>
<li>Fix ISO weekday calculations in negative years (<a href="https://redirect.github.com/unicode-org/icu4x/pull/4894">unicode-org/icu4x#4894</a>)</li>
</ul>
</li>
<li><code>icu_collections</code>
<ul>
<li>Switch from <code>wasmer</code> to <code>wasmi</code> in <code>icu_codepointtrie_builder</code> (<a href="https://redirect.github.com/unicode-org/icu4x/pull/4621">unicode-org/icu4x#4621</a>)</li>
</ul>
</li>
<li><code>icu_normalizer</code>
<ul>
<li>Make UTS 46 normalization non-experimental (<a href="https://redirect.github.com/unicode-org/icu4x/issues/4712">#4712</a>)</li>
</ul>
</li>
<li><code>icu_datetime</code>
<ul>
<li>Experimental &quot;neo&quot; datetime formatter with support for semantic skeleta and fine-grained data slicing (<a href="https://redirect.github.com/unicode-org/icu4x/issues/1317">unicode-org/icu4x#1317</a>, <a href="https://redirect.github.com/unicode-org/icu4x/issues/3347">unicode-org/icu4x#3347</a>)</li>
<li><code>Writeable</code> and <code>Display</code> implementations now don't return <code>fmt::Error</code>s that don't originate from the <code>fmt::Write</code> anymore (<a href="https://redirect.github.com/unicode-org/icu4x/issues/4732">#4732</a>, <a href="https://redirect.github.com/unicode-org/icu4x/issues/4851">#4851</a>, <a href="https://redirect.github.com/unicode-org/icu4x/issues/4863">#4863</a>)</li>
<li>Make <code>CldrCalendar</code> trait sealed except with experimental feature (<a href="https://redirect.github.com/unicode-org/icu4x/pull/4392">unicode-org/icu4x#4392</a>)</li>
<li><code>FormattedDateTime</code> and <code>FormattedZonedDateTime</code> now implement <code>Clone</code> and <code>Copy</code> (<a href="https://redirect.github.com/unicode-org/icu4x/pull/4476">unicode-org/icu4x#4476</a>)</li>
</ul>
</li>
<li><code>icu_experimental</code></li>
</ul>
</li>
</ul>
<!-- raw HTML omitted -->
</blockquote>
<p>... (truncated)</p>
</details>
<details>
<summary>Commits</summary>
<ul>
<li>See full diff in <a href="https://github.com/unicode-org/icu4x/commits/ind/zerovec@0.10.4">compare view</a></li>
</ul>
</details>
<br />


[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=zerovec&package-manager=cargo&previous-version=0.10.1&new-version=0.10.4)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores)

Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting ``@dependabot` rebase`.

[//]: # (dependabot-automerge-start)
[//]: # (dependabot-automerge-end)

---

<details>
<summary>Dependabot commands and options</summary>
<br />

You can trigger Dependabot actions by commenting on this PR:
- ``@dependabot` rebase` will rebase this PR
- ``@dependabot` recreate` will recreate this PR, overwriting any edits that have been made to it
- ``@dependabot` merge` will merge this PR after your CI passes on it
- ``@dependabot` squash and merge` will squash and merge this PR after your CI passes on it
- ``@dependabot` cancel merge` will cancel a previously requested merge and block automerging
- ``@dependabot` reopen` will reopen this PR if it is closed
- ``@dependabot` close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually
- ``@dependabot` show <dependency name> ignore conditions` will show all of the ignore conditions of the specified dependency
- ``@dependabot` ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself)
- ``@dependabot` ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself)
- ``@dependabot` ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)
You can disable automated security fix PRs for this repo from the [Security Alerts page](https://github.com/meilisearch/meilisearch/network/alerts).

</details>

Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2024-07-09 08:02:04 +00:00
47e526f5ea Add index exists function in index_scheduler 2024-07-08 22:27:10 +01:00
4aa7d386d8 remove http and uses actix_web::http instead 2024-07-08 21:17:10 +02:00
84fabb9314 Bump zerovec from 0.10.1 to 0.10.4
Bumps [zerovec](https://github.com/unicode-org/icu4x) from 0.10.1 to 0.10.4.
- [Release notes](https://github.com/unicode-org/icu4x/releases)
- [Changelog](https://github.com/unicode-org/icu4x/blob/main/CHANGELOG.md)
- [Commits](https://github.com/unicode-org/icu4x/commits/ind/zerovec@0.10.4)

---
updated-dependencies:
- dependency-name: zerovec
  dependency-type: indirect
...

Signed-off-by: dependabot[bot] <support@github.com>
2024-07-08 18:38:44 +00:00
cd46ebd6b5 remove insta deprecating 2024-07-08 18:38:05 +02:00
ef8d9a20f8 update actix-web 2024-07-08 18:36:32 +02:00
6afa578688 update most incompatible dependencies 2024-07-08 18:31:15 +02:00
300bdfc2a7 update most dependencies 2024-07-08 18:09:12 +02:00
e7e74c0099 Correct apk usages in Dockerfile
There is no need to use apk with `update` or `--update-cache` when `--no-cache` is used, which will make sure the index is the latest, and leave no temporary files behind.
2024-07-08 21:53:58 +08:00
05cc2d1fac Merge #4779
4779: CI: Add workaround to keep using Ubuntu 18.04 r=Kerollmops a=dureuill

Uses `ACTIONS_ALLOW_USE_UNSECURE_NODE_VERSION: true`

Refs: https://github.com/actions/checkout/issues/1590#issuecomment-2207052044

Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2024-07-08 09:58:28 +00:00
22b9c277d0 CI: Add ACTIONS_ALLOW_USE_UNSECURE_NODE_VERSION workaround to keep using Ubuntu 18.04 2024-07-08 11:04:11 +02:00
16bde973aa Merge pull request #4778 from meilisearch/meilisearch-kawaii-logo
Change the Meilisearch logo to the kawaii version
2024-07-07 18:18:32 +02:00
13d1d78a2d Change the Meilisearch logo to the kawaii version 2024-07-07 18:14:02 +02:00
b2b7a633a6 Merge #4774
4774: Rename the sortable into the filterable movies workload r=dureuill a=Kerollmops

Fixes the workload name of one movie searchable.

Co-authored-by: Clément Renault <clement@meilisearch.com>
2024-07-04 10:07:01 +00:00
7be109cafe Rename the sortable into the filterable movies workload 2024-07-04 11:53:18 +02:00
6ebefd1067 Merge #4773
4773: New workload to ignore the initial compression phase r=dureuill a=Kerollmops

This PR introduces a new workload to ignore the time spent initially compressing the documents.

Co-authored-by: Clément Renault <clement@meilisearch.com>
2024-07-04 09:02:02 +00:00
d25ae36e22 Introduce a new workload to ignore the initial compression phase 2024-07-04 10:58:16 +02:00
b64b4ab6ca Merge #4762
4762: Add search benchmarks r=Kerollmops a=dureuill

# Pull Request

## What does this PR do?
- [x] Modifies `xtask bench` so that workloads support an optional `target` argument. `target` defaults to `indexing::=trace`
- [x] Refactor the spans in the search to offer finer profiling granularity
- [x] Add search workloads  
- [x] Updates documentation in `BENCHMARKS.md`


Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2024-07-03 08:39:29 +00:00
427861b323 Update documentation in BENCHMARKS.md 2024-07-02 16:13:54 +02:00
d29cb75061 Add search workloads 2024-07-02 16:13:54 +02:00
128e6c7502 Search: spans with a finer granularity 2024-07-02 16:13:53 +02:00
3129f96603 xtask bench: Add support for overriding the profiling target 2024-07-02 16:12:50 +02:00
c701d89fdc Merge #4754
4754: bring back v1.9.0 changes to main r=irevoire a=ManyTheFish



Co-authored-by: Louis Dureuil <louis@meilisearch.com>
Co-authored-by: meili-bors[bot] <89034592+meili-bors[bot]@users.noreply.github.com>
Co-authored-by: Clément Renault <clement@meilisearch.com>
Co-authored-by: ManyTheFish <many@meilisearch.com>
2024-07-02 13:30:50 +00:00
3d9befd64f fix warning 2024-07-02 15:30:16 +02:00
ee14d5196c fix the tests 2024-07-02 15:18:30 +02:00
d96372b9c4 Merge branch 'main' into tmp-release-v1.9.0 2024-07-02 14:48:50 +02:00
ea67816a21 Merge #4758
4758: Bump docker/build-push-action from 5 to 6 r=curquiza a=dependabot[bot]

Bumps [docker/build-push-action](https://github.com/docker/build-push-action) from 5 to 6.
<details>
<summary>Release notes</summary>
<p><em>Sourced from <a href="https://github.com/docker/build-push-action/releases">docker/build-push-action's releases</a>.</em></p>
<blockquote>
<h2>v6.0.0</h2>
<ul>
<li>Export build record and generate <a href="https://docs.docker.com/build/ci/github-actions/build-summary/">build summary</a> by <a href="https://github.com/crazy-max"><code>`@​crazy-max</code></a>` in <a href="https://redirect.github.com/docker/build-push-action/pull/1120">docker/build-push-action#1120</a></li>
<li>Bump <code>`@​docker/actions-toolkit</code>` from 0.24.0 to 0.26.0 in <a href="https://redirect.github.com/docker/build-push-action/pull/1132">docker/build-push-action#1132</a> <a href="https://redirect.github.com/docker/build-push-action/pull/1136">docker/build-push-action#1136</a> <a href="https://redirect.github.com/docker/build-push-action/pull/1138">docker/build-push-action#1138</a></li>
<li>Bump braces from 3.0.2 to 3.0.3 in <a href="https://redirect.github.com/docker/build-push-action/pull/1137">docker/build-push-action#1137</a></li>
</ul>
<blockquote>
<p>[!NOTE]
This major release adds support for generating <a href="https://docs.docker.com/build/ci/github-actions/build-summary/">Build summary</a> and exporting build record for your build. You can disable this feature by setting <a href="https://docs.docker.com/build/ci/github-actions/build-summary/#disable-job-summary"> <code>DOCKER_BUILD_NO_SUMMARY: true</code> environment variable in your workflow</a>.</p>
</blockquote>
<p><strong>Full Changelog</strong>: <a href="https://github.com/docker/build-push-action/compare/v5.4.0...v6.0.0">https://github.com/docker/build-push-action/compare/v5.4.0...v6.0.0</a></p>
<h2>v5.4.0</h2>
<ul>
<li>Show builder information before building by <a href="https://github.com/crazy-max"><code>`@​crazy-max</code></a>` in <a href="https://redirect.github.com/docker/build-push-action/pull/1128">docker/build-push-action#1128</a></li>
<li>Handle attestations correctly with provenance and sbom inputs by <a href="https://github.com/crazy-max"><code>`@​crazy-max</code></a>` in <a href="https://redirect.github.com/docker/build-push-action/pull/1086">docker/build-push-action#1086</a></li>
<li>Bump <code>`@​docker/actions-toolkit</code>` from 0.19.0 to 0.24.0 in <a href="https://redirect.github.com/docker/build-push-action/pull/1088">docker/build-push-action#1088</a> <a href="https://redirect.github.com/docker/build-push-action/pull/1105">docker/build-push-action#1105</a> <a href="https://redirect.github.com/docker/build-push-action/pull/1121">docker/build-push-action#1121</a> <a href="https://redirect.github.com/docker/build-push-action/pull/1127">docker/build-push-action#1127</a></li>
<li>Bump undici from 5.28.3 to 5.28.4 in <a href="https://redirect.github.com/docker/build-push-action/pull/1090">docker/build-push-action#1090</a></li>
</ul>
<p><strong>Full Changelog</strong>: <a href="https://github.com/docker/build-push-action/compare/v5.3.0...v5.4.0">https://github.com/docker/build-push-action/compare/v5.3.0...v5.4.0</a></p>
<h2>v5.3.0</h2>
<ul>
<li>Bump <code>`@​docker/actions-toolkit</code>` from 0.18.0 to 0.19.0 in <a href="https://redirect.github.com/docker/build-push-action/pull/1080">docker/build-push-action#1080</a></li>
</ul>
<p><strong>Full Changelog</strong>: <a href="https://github.com/docker/build-push-action/compare/v5.2.0...v5.3.0">https://github.com/docker/build-push-action/compare/v5.2.0...v5.3.0</a></p>
<h2>v5.2.0</h2>
<ul>
<li>Disable quotes detection for <code>outputs</code> input by <a href="https://github.com/crazy-max"><code>`@​crazy-max</code></a>` in <a href="https://redirect.github.com/docker/build-push-action/pull/1074">docker/build-push-action#1074</a></li>
<li>Warn about ignored inputs by <a href="https://github.com/favonia"><code>`@​favonia</code></a>` in <a href="https://redirect.github.com/docker/build-push-action/pull/1019">docker/build-push-action#1019</a></li>
<li>Bump <code>`@​docker/actions-toolkit</code>` from 0.14.0 to 0.18.0 in <a href="https://redirect.github.com/docker/build-push-action/pull/1070">docker/build-push-action#1070</a></li>
<li>Bump undici from 5.26.3 to 5.28.3 in <a href="https://redirect.github.com/docker/build-push-action/pull/1057">docker/build-push-action#1057</a></li>
</ul>
<p><strong>Full Changelog</strong>: <a href="https://github.com/docker/build-push-action/compare/v5.1.0...v5.2.0">https://github.com/docker/build-push-action/compare/v5.1.0...v5.2.0</a></p>
<h2>v5.1.0</h2>
<ul>
<li>Add <code>annotations</code> input by <a href="https://github.com/crazy-max"><code>`@​crazy-max</code></a>` in <a href="https://redirect.github.com/docker/build-push-action/pull/992">docker/build-push-action#992</a></li>
<li>Add <code>secret-envs</code> input by <a href="https://github.com/elias-lundgren"><code>`@​elias-lundgren</code></a>` in <a href="https://redirect.github.com/docker/build-push-action/pull/980">docker/build-push-action#980</a></li>
<li>Bump <code>`@​babel/traverse</code>` from 7.17.3 to 7.23.2 in <a href="https://redirect.github.com/docker/build-push-action/pull/991">docker/build-push-action#991</a></li>
<li>Bump <code>`@​docker/actions-toolkit</code>` from 0.13.0-rc.1 to 0.14.0 in <a href="https://redirect.github.com/docker/build-push-action/pull/990">docker/build-push-action#990</a> <a href="https://redirect.github.com/docker/build-push-action/pull/1006">docker/build-push-action#1006</a></li>
</ul>
<p><strong>Full Changelog</strong>: <a href="https://github.com/docker/build-push-action/compare/v5.0.0...v5.1.0">https://github.com/docker/build-push-action/compare/v5.0.0...v5.1.0</a></p>
</blockquote>
</details>
<details>
<summary>Commits</summary>
<ul>
<li><a href="15560696de"><code>1556069</code></a> Merge pull request <a href="https://redirect.github.com/docker/build-push-action/issues/1158">#1158</a> from docker/dependabot/npm_and_yarn/docker/actions-t...</li>
<li><a href="57e1d34ac3"><code>57e1d34</code></a> chore: update generated content</li>
<li><a href="309982ebc9"><code>309982e</code></a> chore(deps): Bump <code>`@​docker/actions-toolkit</code>` from 0.27.0 to 0.28.0</li>
<li><a href="9476c25b2a"><code>9476c25</code></a> Merge pull request <a href="https://redirect.github.com/docker/build-push-action/issues/1153">#1153</a> from crazy-max/export-retention</li>
<li><a href="97be5a4928"><code>97be5a4</code></a> chore: update generated content</li>
<li><a href="9cac6c8ea0"><code>9cac6c8</code></a> use default retention days for build export artifact</li>
<li><a href="31159d49c0"><code>31159d4</code></a> Merge pull request <a href="https://redirect.github.com/docker/build-push-action/issues/1149">#1149</a> from docker/dependabot/npm_and_yarn/docker/actions-t...</li>
<li><a href="07e1c3e148"><code>07e1c3e</code></a> chore: update generated content</li>
<li><a href="f7febd621d"><code>f7febd6</code></a> chore(deps): Bump <code>`@​docker/actions-toolkit</code>` from 0.26.2 to 0.27.0</li>
<li><a href="f6010ea701"><code>f6010ea</code></a> Merge pull request <a href="https://redirect.github.com/docker/build-push-action/issues/1147">#1147</a> from docker/dependabot/npm_and_yarn/docker/actions-t...</li>
<li>Additional commits viewable in <a href="https://github.com/docker/build-push-action/compare/v5...v6">compare view</a></li>
</ul>
</details>
<br />


[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=docker/build-push-action&package-manager=github_actions&previous-version=5&new-version=6)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores)

You can trigger a rebase of this PR by commenting ``@dependabot` rebase`.

[//]: # (dependabot-automerge-start)
[//]: # (dependabot-automerge-end)

---

<details>
<summary>Dependabot commands and options</summary>
<br />

You can trigger Dependabot actions by commenting on this PR:
- ``@dependabot` rebase` will rebase this PR
- ``@dependabot` recreate` will recreate this PR, overwriting any edits that have been made to it
- ``@dependabot` merge` will merge this PR after your CI passes on it
- ``@dependabot` squash and merge` will squash and merge this PR after your CI passes on it
- ``@dependabot` cancel merge` will cancel a previously requested merge and block automerging
- ``@dependabot` reopen` will reopen this PR if it is closed
- ``@dependabot` close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually
- ``@dependabot` show <dependency name> ignore conditions` will show all of the ignore conditions of the specified dependency
- ``@dependabot` ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself)
- ``@dependabot` ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself)
- ``@dependabot` ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)


</details>

Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2024-07-02 12:36:19 +00:00
c885fcebcc Bump docker/build-push-action from 5 to 6
Bumps [docker/build-push-action](https://github.com/docker/build-push-action) from 5 to 6.
- [Release notes](https://github.com/docker/build-push-action/releases)
- [Commits](https://github.com/docker/build-push-action/compare/v5...v6)

---
updated-dependencies:
- dependency-name: docker/build-push-action
  dependency-type: direct:production
  update-type: version-update:semver-major
...

Signed-off-by: dependabot[bot] <support@github.com>
2024-07-02 12:28:28 +00:00
b6e1a1f2f5 Merge #4761
4761: Add vX Docker tag when publishing Docker image r=Kerollmops a=curquiza

Following this: https://github.com/meilisearch/meilisearch/discussions/4759

Co-authored-by: Clémentine <clementine@meilisearch.com>
2024-07-02 11:11:39 +00:00
277f4883f6 Add vX Docker tag when publishing Docker image 2024-07-02 12:11:44 +02:00
015d90a962 merge main 2024-07-01 11:50:36 +02:00
0df84bbba7 Merge #4746
4746: Fix hybrid search limit offset r=irevoire a=dureuill

# Pull Request

## Related issue
Fixes #4745

## What does this PR do?
- Apply offset and limit to the keyword search results when they are returned early.
- Add a test that is initially failing, and then passes


Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2024-06-27 12:47:08 +00:00
e53de15b8e Fix behavior of limit and offset for hybrid search when keyword results are returned early
The test is fixed
2024-06-27 14:25:33 +02:00
8c4921b9dd Add failing test on limit+offset for hybrid search 2024-06-27 14:21:34 +02:00
f6a00f4a90 Merge #4740
4740: Make `embeddings` optional and improve error message for `regenerate` r=dureuill a=irevoire

# Pull Request

## Related issue
Fixes https://github.com/meilisearch/meilisearch/issues/4741

## What does this PR do?
- Make the `embeddings` parameter optional when manually specifying embeddings for an embedder
- Adds a lot of tests around malformed `_vectors.embedder` objects
- Use `deserr` to deserialize the `_vectors.embedder` field, improving error messages


Co-authored-by: Tamo <tamo@meilisearch.com>
2024-06-27 10:06:28 +00:00
ce08dc509b add more tests and improve the location of the error 2024-06-27 11:51:45 +02:00
1daaed163a Make _vectors.:embedding.regenerate mandatory + tests + error messages 2024-06-27 11:04:58 +02:00
809e742253 Merge #4731
4731: Fix the missing geo distance when one or both of the lat / lng are string r=irevoire a=irevoire

# Pull Request

## Related issue
Fixes https://github.com/meilisearch/meilisearch/issues/4193

## What does this PR do?
- Properly extract the lat / lng when one or both of them are string
- Add a test 


Co-authored-by: Tamo <tamo@meilisearch.com>
2024-06-27 07:33:22 +00:00
decdfe03bc Merge #4724
4724: Improve tenant token error messages r=ManyTheFish a=irevoire

# Pull Request

## Related issue
Fixes  #4727

## What does this PR do?
- Introduce a bunch of new error messages around tenant tokens
- Ignore the error messages in most tests that were doing for loop over multiple kinds of errors
- Introduce new tests that specifically test these error messages


Co-authored-by: Tamo <tamo@meilisearch.com>
2024-06-27 06:47:40 +00:00
aae5c324d7 Merge #4703
4703: Update yaup r=ManyTheFish a=irevoire

There was a bug in `yaup` where serializing a structure with an array would give you a wrong query parameter.

Now, yaup is also in charge of sending the initial `?` before the query parameters.

Co-authored-by: Tamo <tamo@meilisearch.com>
2024-06-27 06:10:15 +00:00
a108d8f6f3 update yaup 2024-06-26 16:03:51 +02:00
34cf576339 Merge #4706
4706: specify the rust toolchain r=irevoire a=irevoire

The action we were using was not working with the `rust-toolchain.toml` file.
But the repository is not maintained anymore.
While looking for a solution, I found out that [helix](https://github.com/helix-editor/rust-toolchain) solved the issue on their side by forking the repo and adding a few fixes. That's what I use currently, but I don't know if it's a sustainable solution in the long term

Co-authored-by: Tamo <tamo@meilisearch.com>
2024-06-26 12:56:18 +00:00
eb292a7a62 Fix the missing geo distance when one or both of the lat / lng are string 2024-06-26 14:50:15 +02:00
e28332a904 set the rust toolchain to the v1.75.0 2024-06-26 14:01:28 +02:00
a1dcde6b9a Update meilisearch/src/extractors/authentication/mod.rs
Co-authored-by: Many the fish <many@meilisearch.com>
2024-06-26 14:00:21 +02:00
544e98ca99 use teh current version for clippy 2024-06-26 13:58:25 +02:00
1e4699b82c Merge #4716
4716: Fix bad http status and error message on wrong payload  r=irevoire a=Karribalu

# Pull Request

## Related issue
Fixes #4698

## What does this PR do?
- Fixes bad http status when bad payload with gzip Content-Encoding

## PR checklist
Please check if your PR fulfills the following requirements:
- [x] Does this PR fix an existing issue, or have you listed the changes applied in the PR description (and why they are needed)?
- [x] Have you read the contributing guidelines?
- [x] Have you made sure that the title is accurate and descriptive of the changes?

Thank you so much for contributing to Meilisearch!


Co-authored-by: karribalu <karri.balu123456@gmail.com>
2024-06-26 08:00:51 +00:00
2c09c324f7 Merge #4730
4730: fix a possibly flaky test r=irevoire a=irevoire

On slow CI, it was possible for a document addition to _not_ to be processed and then get autobatched with an index deletion, which changed their task summary details in the end.
Now, I wait for the task to finish, and the result will always be the same

Co-authored-by: Tamo <tamo@meilisearch.com>
2024-06-26 07:32:51 +00:00
3d6b61d8d2 fix flakyness for real 2024-06-26 09:24:09 +02:00
1374b661d1 fix a possibly flaky test 2024-06-26 09:14:59 +02:00
7e3c306c54 Merge #4725
4725: Store primary key as String when Number exceeds i64 range r=irevoire a=JWSong

# Pull Request

## Related issue
Fixes #4696 

## What does this PR do?
- When a Number value exceeding the range of i64 is received as a primary key, it will be stored as a String.

## PR checklist
Please check if your PR fulfills the following requirements:
- [x] Does this PR fix an existing issue, or have you listed the changes applied in the PR description (and why they are needed)?
- [x] Have you read the contributing guidelines?
- [x] Have you made sure that the title is accurate and descriptive of the changes?

Thank you so much for contributing to Meilisearch!


Co-authored-by: JWSong <thdwjddn123@gmail.com>
2024-06-26 07:06:04 +00:00
2608a596a0 Update error message and add tests for incomplete compressed document 2024-06-25 18:36:29 +01:00
e16edb2c35 use the helix action since the official one doesn't support the rust-toolchain file 2024-06-25 17:00:50 +02:00
5c758438fc Update the CI to take the rust-toolchain file into account 2024-06-25 16:59:23 +02:00
ab6cac2321 specify the rust toolchain 2024-06-25 16:59:23 +02:00
6fb36ed30e get rid of the redundant info in document_addition_with_huge_int_primary_key 2024-06-25 23:54:27 +09:00
dcdc83946f accept large number as string 2024-06-25 21:41:47 +09:00
3c4c46377b Merge #4665
4665: Add missing Korean support r=ManyTheFish a=junhochoi

Some configuration is missing `korean` features and add a test case in `milli/src/search/mod.rs`.

# Pull Request

## Related issue

#3443 #3882 

## What does this PR do?
- Improvement on enabling Korean support

Inspired by the work (#3882) I tried to enable Korean features but have found some missing configurations.
This PR is add those missing configs (mostly Cargo.toml) and added one test case.

## PR checklist
Please check if your PR fulfills the following requirements:
- [x] Does this PR fix an existing issue, or have you listed the changes applied in the PR description (and why they are needed)?
- [x] Have you read the contributing guidelines?
- [x] Have you made sure that the title is accurate and descriptive of the changes?

Thank you so much for contributing to Meilisearch!


Co-authored-by: Junho Choi <jh.choi@catenoid.net>
2024-06-25 11:51:21 +00:00
7da21bb601 introduce as many custom error message as possible 2024-06-25 12:40:51 +02:00
13161fd7d0 Merge #4722
4722: Grow by 1TB instead of 1MB r=dureuill a=dureuill

When an index reaches 1TB, increases its size by 1TB rather than 1MB

Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2024-06-25 10:17:58 +00:00
b81e2951a9 Merge #4723
4723: Fixes for Rust v1.79 r=ManyTheFish a=dureuill

cherry-picked from the `release-v1.9.0` branch

Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2024-06-25 09:21:29 +00:00
d75e0098c7 Fixes for Rust v1.79 2024-06-25 11:16:06 +02:00
27496354e2 Grow by 1TB instead of 1MB 2024-06-25 09:01:11 +02:00
2e0ff56f3f Add missing Korean support
Some configuration is missing `korean` features and
add a test case in `milli/src/search/mod.rs`.
2024-06-25 12:45:21 +09:00
a74fb87d1e start introducing new error messages 2024-06-24 19:00:53 +02:00
558b66e535 makes most tests works with variable error messages 2024-06-24 19:00:44 +02:00
cade18bd47 Update README.md (#4721) 2024-06-24 15:47:10 +02:00
298c7b0c93 Merge #4715
4715: Build all arroy indexes that need to be built r=dureuill a=irevoire

# Pull Request

## Related issue
Fixes https://github.com/meilisearch/meilisearch/issues/4588

## What does this PR do?
- Update arroy
- Ensure we always rebuild the arroy indexes that need to be built


Co-authored-by: Tamo <tamo@meilisearch.com>
2024-06-24 09:32:04 +00:00
606e108420 fix all the flaky snapshots 2024-06-24 11:13:45 +02:00
7be17b7e4c add the missing snapshots 2024-06-24 10:52:57 +02:00
1693332cab Update arroy and always build the tree that need to be built 2024-06-24 10:14:03 +02:00
ddd564665b Merge #4713
4713: Speed up facet distribution r=ManyTheFish a=Kerollmops

This PR is akin to #4682, but this time, the same logic is applied to the facets. Bitmaps are not decoded, and we do an intersection on the bytes with the search candidates instead of materializing the RoaringBitmap to destroy it just after the operation.

A prospect raised some slow requests when performing facet searches, and I found out that the disk optimization intersection wasn't performed on the facets.

Co-authored-by: Clément Renault <clement@meilisearch.com>
2024-06-24 05:23:46 +00:00
2a38f5c757 Run Rustfmt 2024-06-21 00:14:26 +01:00
133d33d72c Merge remote-tracking branch 'origin/main' 2024-06-20 23:55:17 +01:00
fb683fe88b Fix bad http status and error message on wrong payload 2024-06-20 23:55:09 +01:00
4ae11bfd31 Merge #4710
4710: Only spawn thread pool once (v1.9) r=irevoire a=dureuill

# Pull Request

See #4707 

Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2024-06-20 11:45:32 +00:00
9736e16a88 Make clippy happy 2024-06-20 13:02:44 +02:00
6fa4da8ae7 Improve facet distribution speed in count mode 2024-06-20 12:58:51 +02:00
19d7cdc20d Improve facet distribution speed in lexico mode 2024-06-20 12:57:08 +02:00
c229200820 Merge #4712
4712: Update mini-dashboard 2.14 r=irevoire a=curquiza

Fixes #4668

Co-authored-by: curquiza <clementine@meilisearch.com>
2024-06-20 08:47:22 +00:00
bad28cc9e2 Update mini-dashboard 2.14 2024-06-20 10:01:36 +02:00
534f696b29 Update the README to link more demos (#4711)
This Pull Request adds two new interesting demos to a brand new list, which replaces the short _Try it_ text just below the Where2Watch showcase image hoping people will notice them.
2024-06-20 09:53:06 +02:00
a04041c8f2 Only spawn the pool once 2024-06-19 16:25:33 +02:00
b347b66619 Revert "Add june 11th webinar banner" (#4705) 2024-06-18 18:45:50 +02:00
e580d6b98f Merge #4693
4693: Introduce distinct attributes at search time r=irevoire a=Kerollmops

This PR fixes #4611.

### To Do
- [x] Remove the `distinguishableAttributes` settings (not even a commit about that).
- [x] Use the `filterableAttributes` to be able to use the `distinct` parameter at search.
- [x] Work on the errors and make tests.

Co-authored-by: Clément Renault <clement@meilisearch.com>
Co-authored-by: Tamo <tamo@meilisearch.com>
2024-06-18 07:45:03 +00:00
8ba65e333b add snapshot files 2024-06-17 16:50:26 +02:00
43875e6758 fix bug around nested fields 2024-06-17 15:59:30 +02:00
d7844a6e45 add a bunch of tests on the errors of the distinct at search time 2024-06-17 15:37:32 +02:00
e9bf4c43a4 Merge #4649
4649: Don't store the vectors in the documents database r=dureuill a=irevoire

# Pull Request

## Related issue
Fixes https://github.com/meilisearch/meilisearch/issues/4607

## What does this PR do?
- Ensure that anything falling under `_vectors` is NOT searchable, filterable or sortable
- [x] per embedder, add a roaring bitmap of documents that provide "userProvided" embeddings
- [x] in the indexing process in extract_vector_points, set the bit corresponding to the document depending on the "userProvided" subfield in the _vectors field.
- [x] in the document DB in typed chunks, when writing the _vectors field, remove all keys corresponding to an embedder

Co-authored-by: Tamo <tamo@meilisearch.com>
Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2024-06-17 12:32:03 +00:00
a8a0854421 Update meilisearch/src/analytics/segment_analytics.rs 2024-06-17 14:30:50 +02:00
0a8f50695e Fixes for Rust v1.79 2024-06-13 17:47:44 +02:00
09d9b63e1c - test case where all vectors were generated
- update tests following changes in behavior from previous commit
2024-06-13 17:16:41 +02:00
b9b938c902 Change retrieveVectors behavior:
- when the feature is disabled, documents are never modified
- when the feature is enabled and `retrieveVectors` is disabled, `_vectors` is removed from documents
- when the feature is enabled and `retrieveVectors` is enabled, vectors from the vectors DB are merged with `_vectors` in documents

Additionally `_vectors` is never displayed when the `displayedAttributes` list does not contain either `*` or `_vectors`

- fixed an issue where `_vectors` was not injected when all vectors in the dataset where always generated
2024-06-13 17:13:36 +02:00
6bf07d969e add failing test 2024-06-13 15:49:42 +02:00
e35ef31738 Small changes following review 2024-06-13 14:20:48 +02:00
3f212a8202 Update tests 2024-06-12 18:13:34 +02:00
bc547dad6f Update dump file 2024-06-12 18:12:56 +02:00
3bc8f81abc user_provided => regenerate 2024-06-12 18:12:20 +02:00
a89eea233b Fix vectors injection 2024-06-12 17:10:19 +02:00
34fabed214 Add test for vector writeback 2024-06-12 17:09:34 +02:00
fca9fe39b3 Update test snapshots 2024-06-12 14:50:55 +02:00
f5cf01e7d1 Rework extraction to use EmbedderAction 2024-06-12 14:50:55 +02:00
d1dd7e5d09 In transform for removed embedders, write back their user provided vectors in documents, and clear the writers 2024-06-12 14:50:55 +02:00
d18c1f77d7 Update embedder configs with a finer granularity
- no longer clear vector DB between any two embedder changes
2024-06-12 14:50:55 +02:00
d0b05ae691 Add EmbedderAction to settings 2024-06-12 14:50:54 +02:00
e9bf4eb100 Reformulate ParsedVectorsDiff in terms of VectorState 2024-06-12 14:11:44 +02:00
b368105272 Add EmbedderConfigs::into_inner 2024-06-12 14:11:44 +02:00
e0eff08095 Merge #4685
4685: Fix ci tests r=dureuill a=ManyTheFish

# Pull Request
Make the all following CI succeed:
https://github.com/meilisearch/meilisearch/actions/runs/9477183091

## Related issue
Fixes #4629

## What does this PR do?
- Change the test behavior for `swedish-recomposition` feature flag
- Remove the `-v` parameter from grep

Co-authored-by: ManyTheFish <many@meilisearch.com>
Co-authored-by: Many the fish <many@meilisearch.com>
2024-06-12 07:58:33 +00:00
304a9df52d Remove -v parameter 2024-06-12 07:22:24 +02:00
39f60abd7d Add and modify distinct tests 2024-06-11 17:53:53 -04:00
1991bd03da Distinct at search erases the distinct in the settings 2024-06-11 17:02:39 -04:00
ee39309aae Improve errors and introduce a new InvalidSearchDistinct error code 2024-06-11 16:03:39 -04:00
0d31be1494 Make the distinct work at search 2024-06-11 11:39:35 -04:00
3493093c4f add a batch of tests 2024-06-11 16:03:54 +02:00
7cef2299cf Fix behavior when removing a document 2024-06-11 09:45:08 +02:00
a838f39fce Merge #4682
4682: Speed Up Filter ANDs operations r=Kerollmops a=Kerollmops

This PR fixes #4659 and improves the way we do AND operations by using the latest [RoaringBitmap feature to do intersections with serialized bitmaps](https://github.com/RoaringBitmap/roaring-rs/pull/281). Doing so drastically reduces the time spent reading, copying bytes in memory to use and keep a subset of the containers in the bitmap.

### Some Example Results

With a 45M documents dataset running on a good NVMe. This example filter was taking 77ms and with this PR only 13ms (6x speedup):

```sql
artist = 'The Beatles' AND (duration 150 TO 500 OR duration NOT EXISTS) AND genres IN [Rock, 'Rock and Roll'] AND rating > 4 AND released_year 1960 TO 1990
```

By reordering the filter AND clauses we can reach a constant 8ms execution time. However, note that it is a manual operation. On the other side the previous filter pipeline is still at a constant 45ms execution time with this filter. (6x speedup)

```sql
artist = 'The Beatles' AND genres IN [Rock, 'Rock and Roll'] AND released_year 1960 TO 1990 AND (duration 150 TO 500 OR duration NOT EXISTS)
```

### To Do
- [x] Rebase on `release-v1.9.0`.
- [ ] ~Skip branches of the facet/filter tree when nothing is in common with the universe~ slower this way.
- [x] When the universe is required use the universe given in parameter if possible.

Co-authored-by: Clément Renault <clement@meilisearch.com>
2024-06-11 02:51:17 +00:00
600e97d9dc gate the retrieveVectors parameter behind the vectors feature flag 2024-06-10 18:26:12 +02:00
d1962b2b0f Merge #4691
4691: Add june 11th webinar banner r=curquiza a=Strift

# Pull Request

This PR adds a banner in the README to promote tomorrow's webinar event.

## PR checklist
Please check if your PR fulfills the following requirements:
- [x] Does this PR fix an existing issue, or have you listed the changes applied in the PR description (and why they are needed)?
- [x] Have you read the contributing guidelines?
- [x] Have you made sure that the title is accurate and descriptive of the changes?

Thank you so much for contributing to Meilisearch!


Co-authored-by: Strift <laurent@meilisearch.com>
2024-06-10 16:17:21 +00:00
8b450b84f8 Add june 11th webinar banner 2024-06-10 17:45:14 +02:00
7add7d053c Merge #4689
4689: Bring back changes from v1.8.2 into v1.9.0 r=curquiza a=dureuill



Co-authored-by: dureuill <dureuill@users.noreply.github.com>
Co-authored-by: Louis Dureuil <louis@meilisearch.com>
Co-authored-by: meili-bors[bot] <89034592+meili-bors[bot]@users.noreply.github.com>
2024-06-10 14:03:55 +00:00
7559dfc814 Merge tag 'v1.8.2' into release-v1.9.0 2024-06-10 15:07:34 +02:00
6c6c4732a1 Merge #4681
4681: Fix concurrency issue r=irevoire a=dureuill

# Pull Request

## Related issue
Fixes #4654 

## What does this PR do?
- Asynchronously drop permits


Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2024-06-10 09:36:08 +00:00
0502b17501 log the state of the index-scheduler in all failed tests 2024-06-10 10:52:49 +02:00
3976fe660e Merge #4688
4688: Update version for the next release (v1.8.2) in Cargo.toml r=dureuill a=meili-bot

⚠️ This PR is automatically generated. Check the new version is the expected one and Cargo.lock has been updated before merging.

Co-authored-by: dureuill <dureuill@users.noreply.github.com>
2024-06-10 08:28:34 +00:00
50f8218a5d Asynchronously drop permits 2024-06-10 10:19:57 +02:00
19585f1a4f Update version for the next release (v1.8.2) in Cargo.toml 2024-06-10 07:59:36 +00:00
8ec6e175e5 Replace roaring patch to the v0.10.5 2024-06-07 22:11:26 -04:00
57d066595b fix Tests almost all features 2024-06-06 17:24:50 +02:00
75b2e02cd2 Log more stuff around filtering 2024-06-06 11:00:07 -04:00
40f05fe156 Bump roaring to the latest commit 2024-06-06 10:59:55 -04:00
734d1c53ad fix a panic in yaup 2024-06-06 16:31:07 +02:00
52d0d35b39 Revert "Reduce the universe while exploring the facet tree" because it's slower this way
This reverts commit 14026115f21409535772ede0ee4273f37848dd61.
2024-06-06 09:17:51 -04:00
5432776132 Reduce the universe while exploring the facet tree 2024-06-06 09:17:51 -04:00
66470b27e6 Use the MultiOps trait for IN operations 2024-06-06 09:17:51 -04:00
0a9bd398c7 Improve the NOT operator to use the universe when possible 2024-06-06 09:17:51 -04:00
7967e93c16 Skip evaluating when a universe is empty, nothing can be found 2024-06-06 09:17:51 -04:00
a6f3a01c6a Expose the universe to do efficient intersections on deserialization 2024-06-06 09:17:51 -04:00
4ca4a3f954 Make the CboRoaringBitmapCodec support intersection on deserialization 2024-06-06 09:17:51 -04:00
e4a69c5ac3 Introduce the FacetGroupLazyValue type 2024-06-06 09:17:50 -04:00
ff2e498267 Patch roaring to use the version supporting intersection on deserialization 2024-06-06 09:17:50 -04:00
531e3d7d6a MultiOps trait for OR operations 2024-06-06 09:17:50 -04:00
63dded3961 implements the new analytics for the get documents routes 2024-06-06 11:39:29 +02:00
2cdcb703d9 fix the deletion of vectors and add a test 2024-06-06 11:39:29 +02:00
6607875f49 add the retrieveVectors parameter to the get and fetch documents route 2024-06-06 11:39:29 +02:00
ea61e5cbec makes clippy happy x2 2024-06-06 11:39:29 +02:00
31a793d226 fix the regeneration of the embeddings in the search 2024-06-06 11:39:29 +02:00
d85ab23b82 rename all occurences of user_defined to user_provided for consistency 2024-06-06 11:39:29 +02:00
b7349910d9 implements mor review comments 2024-06-06 11:39:29 +02:00
49fa41ce65 apply first round of review comments 2024-06-06 11:39:29 +02:00
400cf3eb92 add api error test on the new retrieveVectors parameter 2024-06-06 11:39:29 +02:00
376b3a19a7 makes clippy and fmt happy 2024-06-06 11:39:29 +02:00
d92c173fdc update the new similar tests 2024-06-06 11:39:29 +02:00
b867829ef1 remove useless dbg 2024-06-06 11:39:29 +02:00
6b29676e7e update snapshots 2024-06-06 11:39:29 +02:00
caad40964a implements the analytics 2024-06-06 11:39:29 +02:00
cc5dca8321 fix two bug and add a dump test 2024-06-06 11:39:29 +02:00
5d50850e12 always push the user defined vectors in arroy 2024-06-06 11:39:29 +02:00
a73ccc78a6 forward the embedding config to the extractors 2024-06-06 11:39:28 +02:00
9eb6f522ea wraps the index embedding config in a struct 2024-06-06 11:37:30 +02:00
04f6523f3c expose a new parameter to retrieve the embedders at search time 2024-06-06 11:36:11 +02:00
30d66abf8d fix the test 2024-06-06 11:36:11 +02:00
84e498299b Remove the vectors from the documents database 2024-06-06 11:36:11 +02:00
7a84697570 never store the _vectors as searchable or faceted fields 2024-06-06 11:36:11 +02:00
4148fbbe85 provide a method to get all the nested fields ids from a name 2024-06-06 11:36:11 +02:00
cb765ad249 Merge #4684
4684: Update Charabia v0.8.11 r=irevoire a=ManyTheFish

# Update Charabia v0.8.11

### Adds a new normalizer to normalize œ to oe and æ to ae
Now search words containing `œ` or `æ` will be retrieved using `oe` or `ae`, like `Daemon` <=> `Dæmon`

### Fix: make `chinese-normalization-pinyin` feature flag compile
Fixes #4629



Co-authored-by: ManyTheFish <many@meilisearch.com>
2024-06-06 08:59:49 +00:00
2e50c6ec81 Update Charabia 2024-06-06 10:18:43 +02:00
40b2345394 Merge #4680
4680: Speedup additional searchables r=Kerollmops a=ManyTheFish

Fixes #4492.

## To Do
 - [x] Do not call the `InnerSettingsDiff::only_additional_fields` function too many times

Co-authored-by: Clément Renault <clement@meilisearch.com>
Co-authored-by: ManyTheFish <many@meilisearch.com>
2024-06-05 15:39:28 +00:00
30293883e0 Fix condition mistake 2024-06-05 17:30:07 +02:00
b833be46b9 Avoid running proximity when only the exact attributes changes 2024-06-05 17:30:07 +02:00
0a4118329e Put only_additional_fields to None if the difference gives an empty result. 2024-06-05 17:30:07 +02:00
261e92d7e6 Skip iterating over documents when the faceted field list doesn't change 2024-06-05 17:30:07 +02:00
5cd08979b1 iterate over the faceted fields instead of over the whole document 2024-06-05 17:30:07 +02:00
2af7e4dbe9 Rename the embeddings workloads 2024-06-05 17:30:07 +02:00
a998b881f6 Cache a lot of operations to know if a field must be indexed 2024-06-05 17:30:07 +02:00
b81953a65d Add a span for the prepare_for_documents_reindexing 2024-06-05 17:30:07 +02:00
091bb157f1 Add a span for the settings diff creation 2024-06-05 17:30:07 +02:00
1b639ce44b Reduce the number of complex calls to settings diff functions 2024-06-05 17:30:07 +02:00
87cf8a3c94 Introduce a new way to determine the operations to perform on the fields 2024-06-05 17:30:07 +02:00
0f578348f1 Introduce a dedicated function to write proximity entries in database 2024-06-05 17:30:07 +02:00
fad4675abe Give the settings diff to the write_typed_chunk_into_index function 2024-06-05 17:30:07 +02:00
1ab03c4ede Fix an issue with settings diff and * in the searchable attributes 2024-06-05 17:30:07 +02:00
0c6e4b2f00 Introducing a new into_del_add_obkv_conditional_operation function 2024-06-05 17:30:07 +02:00
42b3f52ef9 Introduce the SettingDiff only_additional_fields method 2024-06-05 17:30:07 +02:00
93f5defedc Merge #4656
4656: Adding a new `searchableAttribute` no longer re-index all the attributes r=ManyTheFish a=Kerollmops

Fixes #4492.

## To Do
 - [x] Do not call the `InnerSettingsDiff::only_additional_fields` function too many times
 - [ ] Add tests

Co-authored-by: Clément Renault <clement@meilisearch.com>
Co-authored-by: ManyTheFish <many@meilisearch.com>
2024-06-05 14:51:14 +00:00
33241a6b12 Fix condition mistake 2024-06-05 16:00:24 +02:00
ff87b4db26 Avoid running proximity when only the exact attributes changes 2024-06-05 12:48:44 +02:00
ba9fadc8f1 Put only_additional_fields to None if the difference gives an empty result. 2024-06-05 10:51:16 +02:00
98e062a714 Merge #4675
4675: Update actix-web 4.5.1 -> 4.6.0 r=dureuill a=dureuill

# Pull Request

- actix-web 4.5.1 -> 4.6.0
- actix-http 3.6.0 -> 3.7.0
- actix-web-static-files (commit 2d3b6160) -> 4.0.1
- tracing-actix-web 0.7.9 -> 0.7.10
- brotli 3.4.0 -> 6.0.0

## Related issue
Fixes #4625 


Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2024-06-05 07:40:35 +00:00
d29d4f88da Skip iterating over documents when the faceted field list doesn't change 2024-06-04 15:31:24 +02:00
17c5ceeb9d iterate over the faceted fields instead of over the whole document 2024-06-04 14:04:20 +02:00
8412665957 Update actix-web 4.5.1 -> 4.6.0 2024-06-04 09:54:30 +02:00
fc584f1db3 Merge #4666
4666: Add a score threshold search parameter r=ManyTheFish a=dureuill

# Pull Request

## Related issue
Fixes https://github.com/meilisearch/meilisearch/issues/4609

## What does this PR do?
- See [usage](https://meilisearch.notion.site/Filter-by-score-usage-224a183ce7b24ca99b6a9a8da755668a?pvs=25#95b76ded400342ba9ab3d67c734836f0) and [the known limitation](https://meilisearch.notion.site/Filter-by-score-usage-224a183ce7b24ca99b6a9a8da755668a?pvs=25#e4e32195bf0e4195b5daecdbb7a97a17)


Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2024-06-03 08:42:44 +00:00
2b6db6541e Changes after review 2024-06-03 10:30:00 +02:00
d6bd88ce4f Merge #4667
4667: Frequency matching strategy r=Kerollmops a=ManyTheFish

# Pull Request

## Related issue
Fixes #3773

## What does this PR do?
- add test for matching strategy
- implement frequency matching strategy

See the [PRD for more details](https://www.notion.so/meilisearch/Frequency-Matching-Strategy-0f3ba08833a442a39590a53a1505ab00).

[Public API](https://www.notion.so/meilisearch/frequency-matching-strategy-89868fb7fc584026bc56e378eb854a7f).


Co-authored-by: ManyTheFish <many@meilisearch.com>
2024-05-30 14:53:31 +00:00
c32d746069 Rename the embeddings workloads 2024-05-30 16:46:57 +02:00
b9a0ff0dd6 Cache a lot of operations to know if a field must be indexed 2024-05-30 16:18:23 +02:00
75496af985 Add a span for the prepare_for_documents_reindexing 2024-05-30 12:14:22 +02:00
0e9eb9eedb Add a span for the settings diff creation 2024-05-30 12:08:27 +02:00
c2fb7afe59 fmt 2024-05-30 12:06:46 +02:00
3f1a510069 Add tests and fix matching strategy 2024-05-30 12:02:42 +02:00
3a78e988da Reduce the number of complex calls to settings diff functions 2024-05-30 11:23:07 +02:00
d9e5074189 Introduce a new way to determine the operations to perform on the fields 2024-05-30 11:23:07 +02:00
bc210bdc00 Introduce a dedicated function to write proximity entries in database 2024-05-30 11:23:06 +02:00
4bf83f701c Give the settings diff to the write_typed_chunk_into_index function 2024-05-30 11:23:06 +02:00
db3887929f Fix an issue with settings diff and * in the searchable attributes 2024-05-30 11:22:50 +02:00
9af103a88e Introducing a new into_del_add_obkv_conditional_operation function 2024-05-30 11:22:49 +02:00
99211eb375 Introduce the SettingDiff only_additional_fields method 2024-05-30 11:22:49 +02:00
41976b82b1 Tests for ranking_score_threshold 2024-05-30 11:22:26 +02:00
c36410fcbf Analytics for ranking score threshold 2024-05-30 11:22:12 +02:00
7ce2691374 Add ranking score threshold to similar API 2024-05-30 11:21:31 +02:00
4f03b0cf5b Add ranking score threshold to similar 2024-05-30 11:20:50 +02:00
c26db7878c Expose rankingScoreThreshold in API 2024-05-30 10:32:35 +02:00
06a9803544 Merge #4664
4664: Update README.md r=curquiza a=tpayet

Add hybrid & semantic as a feature

# Pull Request

## Related issue
Fixes #<issue_number>

## What does this PR do?
- ...

## PR checklist
Please check if your PR fulfills the following requirements:
- [ ] Does this PR fix an existing issue, or have you listed the changes applied in the PR description (and why they are needed)?
- [ ] Have you read the contributing guidelines?
- [ ] Have you made sure that the title is accurate and descriptive of the changes?

Thank you so much for contributing to Meilisearch!


Co-authored-by: Thomas Payet <thomas@meilisearch.com>
2024-05-29 16:55:20 +00:00
b2588d8101 Update README.md
Add hybrid & semantic as a feature
2024-05-29 17:48:48 +02:00
62d27172f4 Merge #4663
4663: Bring back release v1.8.1 into main r=ManyTheFish a=ManyTheFish



Co-authored-by: Tamo <tamo@meilisearch.com>
Co-authored-by: ManyTheFish <many@meilisearch.com>
Co-authored-by: meili-bors[bot] <89034592+meili-bors[bot]@users.noreply.github.com>
Co-authored-by: ManyTheFish <ManyTheFish@users.noreply.github.com>
Co-authored-by: Many the fish <many@meilisearch.com>
2024-05-29 14:47:38 +00:00
1ab88e10b9 Merge branch 'main' into merge-release-v1.8.1-in-main 2024-05-29 16:24:00 +02:00
6a4b2516aa WIP 2024-05-29 16:21:24 +02:00
aac1d769a7 Add ranking_score_threshold to milli 2024-05-29 14:17:09 +02:00
abdc4afcca Implement Frequency matching strategy 2024-05-29 13:59:08 +02:00
75d5c0ae1f Merge #4647
4647: Feature: get similar documents r=dureuill a=dureuill

# Pull Request

## Related issue
Fixes #4610 

## What does this PR do?
[Usage](https://meilisearch.notion.site/Get-similar-documents-usage-540919ca755c4da0b7cdee273db3f290)

Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2024-05-29 11:42:23 +00:00
a88554216a Merge #4657
4657: Update version for the next release (v1.9.0) in Cargo.toml r=curquiza a=meili-bot

⚠️ This PR is automatically generated. Check the new version is the expected one and Cargo.lock has been updated before merging.

Co-authored-by: curquiza <curquiza@users.noreply.github.com>
2024-05-29 11:14:19 +00:00
2cf3e1c80a Temporarily ignore perform snapshot test under Windows 2024-05-29 12:42:47 +02:00
e1fbfde6c4 Merge branch 'main' into merge-release-v1.8.1-in-main 2024-05-29 11:31:03 +02:00
27b75ec648 merge main into v1.8.1 2024-05-29 11:26:07 +02:00
07fdb081a4 Update version for the next release (v1.9.0) in Cargo.toml 2024-05-28 14:19:40 +00:00
ca006e38ec Basic tests 2024-05-28 15:28:19 +02:00
e26bd87780 Error tests for similar routes 2024-05-28 15:28:19 +02:00
c01e498a63 Test server can call similar 2024-05-28 15:28:19 +02:00
ca6cc4654b Add similar route 2024-05-28 15:28:19 +02:00
3bd9d2478c Add error codes 2024-05-28 15:27:43 +02:00
54b15059a0 Analytics changes 2024-05-28 15:27:43 +02:00
d35278320e Add support functions for accessing arroy writers and readers 2024-05-28 15:27:43 +02:00
e172e938e7 add search rules directly takes the filter rather than the searchquery 2024-05-28 15:22:25 +02:00
02b3d82c60 filtered_universe accepts index and txn instead of SearchContext 2024-05-28 15:22:12 +02:00
fd2c95999d Change validate_document_id to public and remove extra layer of result 2024-05-28 15:21:19 +02:00
e248d2a1e6 Merge #4655
4655: Remove `exportPuffinReport` experimental feature r=Kerollmops a=Kerollmops

This PR fixes #4605 by removing every trace of Puffin. Puffin is a great tool, but we use a better approach to measuring performance.

Co-authored-by: Clément Renault <clement@meilisearch.com>
2024-05-28 07:01:16 +00:00
487431a035 Fix tests 2024-05-27 16:12:20 +02:00
b6d450d484 Remove puffin experimental feature 2024-05-27 15:59:28 +02:00
dc949ab46a Remove puffin usage 2024-05-27 15:59:14 +02:00
7f3e51349e Remove puffin for the dependencies 2024-05-27 15:53:06 +02:00
19acc65ad2 Merge #4646
4646: Reduce `Transform`'s disk usage r=Kerollmops a=Kerollmops

This PR implements what is described in #4485. It reduces the number of disk writes and disk usage.

Co-authored-by: Clément Renault <clement@meilisearch.com>
2024-05-23 16:06:50 +00:00
3a3ab17714 Merge #4651
4651: Allow to comment with the results of benchmark invocation r=Kerollmops a=dureuill



Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2024-05-23 15:32:09 +00:00
eaf57056ca comment with the results of benchmarks 2024-05-23 15:34:39 +02:00
e340705634 Change benchmark outputs
- logs to stderr instead of stdout
- prints links to the dashboard when there is a dashboard
2024-05-23 15:29:06 +02:00
fe17c0f52e Construct the minimal OBKVs according to the settings diff 2024-05-23 11:23:57 +02:00
14bc80e3df Merge #4633
4633: Allow to mark vectors as "userProvided" r=Kerollmops a=dureuill

# Pull Request

## Related issue
Fixes #4606 

## What does this PR do?

[See usage in PRD](https://meilisearch.notion.site/v1-9-AI-search-changes-e90d6803eca8417aa70a1ac5d0225697#deb96fb0595947bda7d4a371100326eb)

- Extends the shape of the special `_vectors` field in documents.
    - previously, the `_vectors` field had to be an object, with each field the name of a configured embedder, and each value either `null`, an embedding (array of numbers), or an array of embeddings.
    - In this PR, the value of an embedder in the `_vectors` field can additionally be an object. The object has two fields:
      1. `embeddings`: `null`, an embedding (array of numbers), or an array of embeddings.
      2. `userProvided`: a boolean indicating if the vector was provided by the user.
    - The previous form `embedder_or_array_of_embedders` is semantically equivalent to:
    ```json
    {
        "embeddings": embedder_or_array_of_embedders,
        "userProvided": true
    }
    ```
- During the indexing step, the subfields and values of the `_vectors` field that have `userProvided` set to **false** are added in the vector DB, but not in the documents DB: that means that future modifications of the documents will trigger a regeneration of that particular vector using the document template.
- This allows **importing** embeddings as a one-shot process, while still retaining the ability to regenerate embeddings on document change.
- The dump process now uses this ability: it enriches the `_vectors` fields of documents with the embeddings that were autogenerated, marking them as not `userProvided`. This allows importing the vectors from a dump without regenerating them.

### Tests

This PR adds the following tests

- Long-needed hybrid search tests of a simple hf embedder
- Dump test that imports vectors. Due to the difficulty of actually importing a dump in tests, we just read the dump and check it contains the expected content.
- Tests in the index-scheduler: this tests that documents containing the same kind of instructions as in the dump indexes as expected


Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2024-05-23 08:17:54 +00:00
bc5663e673 FieldIdsMap no longer useful thanks to #4631 2024-05-22 16:06:15 +02:00
8a941c0241 Smaller review changes 2024-05-22 14:44:42 +02:00
3412e7fbcf "[]" is deserialized as 0 embedding rather than 1 embedding of dim 0 2024-05-22 12:25:21 +02:00
16037e2169 Don't remove embedders that are not in the config from the document DB 2024-05-22 12:24:51 +02:00
8f7c8ca7f0 Remove now unused error variant 2024-05-22 12:23:43 +02:00
ba75d23bfe Merge #4648
4648: Update version for the next release (v1.8.1) in Cargo.toml r=ManyTheFish a=meili-bot

⚠️ This PR is automatically generated. Check the new version is the expected one and Cargo.lock has been updated before merging.

Co-authored-by: ManyTheFish <ManyTheFish@users.noreply.github.com>
2024-05-21 16:38:36 +00:00
7fbb3bf8e8 Update version for the next release (v1.8.1) in Cargo.toml 2024-05-21 15:13:03 +00:00
500ddc76b5 Make the flattened sorter optional 2024-05-21 16:16:36 +02:00
9066a446a3 Merge #4642
4642: Index the _geo fields when changing the setting while there is already documents in the DB r=ManyTheFish a=irevoire

# Pull Request

## Related issue
Fixes https://github.com/meilisearch/meilisearch/issues/4640
Fixes https://github.com/meilisearch/meilisearch/issues/4628

## What does this PR do?
- Add an integration test that first indexes the document and then changes the settings
- Fix `extract_geo_point` by detecting if the `_geo` field has been faceted in this setting change and index all documents

Co-authored-by: Tamo <tamo@meilisearch.com>
Co-authored-by: ManyTheFish <many@meilisearch.com>
2024-05-21 13:16:11 +00:00
eccbcf5130 Increase index-scheduler test timeouts 2024-05-21 14:59:08 +02:00
943f8dba0c Make clippy happy 2024-05-21 14:58:41 +02:00
1aa8ed9ef7 Make the original sorter optional 2024-05-21 14:53:26 +02:00
f762307838 Fix clippy 2024-05-21 13:44:20 +02:00
3e94a90722 Fixes 2024-05-21 13:39:46 +02:00
abe29772db Merge #4644
4644: Revert "Stream documents" and keep heed+arroy to the latest verion r=Kerollmops a=irevoire

Reverts meilisearch/meilisearch#4544

Fixes https://github.com/meilisearch/meilisearch/issues/4641

I didn’t realize that some http clients were not handling chunked http requests like you would expect (if you ask the body, it gives you the body), which made the previous PR breaking.

There is no way to provide a good fix to the issue we initially wanted to fix without breaking meilisearch and that’s not planned for now.

Co-authored-by: Tamo <irevoire@protonmail.ch>
Co-authored-by: Tamo <tamo@meilisearch.com>
2024-05-21 10:21:47 +00:00
c9ac7f2e7e update heed to latest version 2024-05-20 15:19:00 +02:00
7e251b43d4 Revert "Stream documents" 2024-05-20 15:09:45 +02:00
9969f7a638 Add test on index-scheduler 2024-05-20 14:44:10 +02:00
b17cb56dee Test array of vectors 2024-05-20 14:44:10 +02:00
afcd7b9f0c Test hybrid search with hf embedder 2024-05-20 14:44:10 +02:00
fc7e817221 Index geo points based on the settings differences 2024-05-20 12:27:26 +02:00
0f78703b85 add a test reproducing the bug 2024-05-20 10:58:08 +02:00
30cf972987 Add test with a dump 2024-05-20 10:36:18 +02:00
d05d49ffd8 Fix tests 2024-05-20 10:36:18 +02:00
0462ebbe58 Don't write an empty _vectors field 2024-05-20 10:36:18 +02:00
2f7a8a4efb Don't write vectors that weren't autogenerated in document DB 2024-05-20 10:36:18 +02:00
02714ef5ed Add vectors from vector DB in dump 2024-05-20 10:36:18 +02:00
52d9cb6e5a Refactor vector indexing
- use the parsed_vectors module
- only parse `_vectors` once per document, instead of once per embedder per document
2024-05-20 10:36:17 +02:00
261de888b7 Add function to get the embeddings of a document in an index 2024-05-20 10:36:17 +02:00
98c811247e Add parsed vectors module 2024-05-20 10:25:59 +02:00
59ecf1cea7 Merge #4544
4544: Stream documents r=curquiza a=irevoire

# Pull Request

## Related issue
Fixes https://github.com/meilisearch/meilisearch/issues/4383


### Perf
2M hackernews:

main:
Time to retrieve: 7s
RAM consumption: 2+GiB

stream:
Time to retrieve: 4.7s
RAM consumption: Too small

Co-authored-by: Tamo <tamo@meilisearch.com>
2024-05-17 14:49:08 +00:00
273c6e8c5c uses the latest version of heed to get rid of unsafe code 2024-05-16 18:31:32 +02:00
897d25780e update milli to latest version 2024-05-16 18:31:32 +02:00
c85d1752dd keep the same rtxn to compute the filters on the documents and to stream the documents later on 2024-05-16 18:31:32 +02:00
8e6ffbfc6f stream documents 2024-05-16 18:31:32 +02:00
7c19c072fa Merge #4631
4631: Split the field id map from the weight of each fields r=Kerollmops a=irevoire

# Pull Request

## Related issue
Fixes https://github.com/meilisearch/meilisearch/issues/4484

## What does this PR do?
- Make the (internal) searchable fields database always contain the searchable fields (instead of None when the user-defined searchable fields were not defined)
- Introduce a new « fieldids_weights_map » that does the mapping between a fieldId and its Weight
- Ensure that when two searchable fields are swapped, the field ID map doesn't change anymore (and thus, doesn't re-index)
- Uses the weight instead of the order of the searchable fields in the attribute ranking rule at search time
- When no searchable attributes are defined, make all their weights equal to zero
- When a field is declared as searchable and contains nested fields, all its subfields share the same weight

## Impact on relevancy

### When no searchable attributes are declared

When no searchable attributes are declared, all the fields have the same importance instead of randomly giving more importance to the field we've encountered « the most early » in the life of the index.

This means that before this PR, send the following json:
```json
[
  { "id": 0, "name": "kefir", "color": "white" },
  { "id": 1, "name": "white", "last name": "spirit" }
]
```

Would make the field `name` more important than the field `color` or `last name`.
This means that searching for `white` would make the document `1` automatically higher ranked than the document `0`.

After this PR, all the fields have the same weight, and none are considered more important than others.

### When a nested field is made searchable

The second behavior change that happened with this PR is in the case you're sending this document, for example:

```json
{
  "id": 0,
  "name": "tamo",
  "doggo": {
    "name": "kefir",
    "surname": "le kef"
  },
  "catto": "gromez"
}
```

Previously, defining the searchable attributes as: `["tamo", "doggo", "catto"]` was actually defining the « real » searchable attributes in the engine as: `["tamo", "doggo", "catto", "doggo.name", "doggo.surname"]`, which means that `doggo.name` and `doggo.surname` were _NOT_ where the user expected them and had completely different weights than `doggo`.
In this PR all the weights have been unified, and the « real » searchable fields look like this:
```json
[ "tamo", "doggo", "doggo.name", "doggo.surname", "catto"]
   ^^^^    ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^    ^^^^^
Weight 0                 Weight 1                  Weight 2

Co-authored-by: Tamo <tamo@meilisearch.com>
2024-05-16 09:59:24 +00:00
673b6e1dc0 fix a flaky test 2024-05-16 11:28:14 +02:00
f2d0a59f1d when no searchable attributes are defined, makes all the weight equals to zero 2024-05-16 01:06:33 +02:00
c78a2fa4f5 rename method and variable around the attributes to search on feature 2024-05-15 18:04:42 +02:00
5542f1d9f1 get back to what we were doingb efore in the DB cache and with the restricted field id 2024-05-15 18:00:39 +02:00
ad4d8502b3 stops storing the whole fieldids weights map when no searchable are defined 2024-05-15 17:16:10 +02:00
7ec4e2a3fb apply all style review comments 2024-05-15 15:02:26 +02:00
9fffb8e83d make clippy happy 2024-05-14 17:36:32 +02:00
caa6a7149a make the attribute ranking rule use the weights and fix the tests 2024-05-14 17:36:32 +02:00
a0082c4df9 add a failing test on the attribute ranking rule 2024-05-14 17:00:02 +02:00
b0afe0972e stop updating the fields ids map when fields are only swapped 2024-05-14 17:00:02 +02:00
9ecde41853 add a test on the current behaviour 2024-05-14 17:00:02 +02:00
685f452fb2 Fix the indexing of the searchable 2024-05-14 17:00:02 +02:00
4e4a1ddff7 gate a test behind the required feature 2024-05-14 17:00:02 +02:00
c22460045c Stops returning an option in the internal searchable fields 2024-05-14 17:00:02 +02:00
76bb6d565c Merge #4624
4624: Add "precommands" to benchmark r=dureuill a=dureuill

# Pull Request

## Related issue
Helps for https://github.com/meilisearch/meilisearch/issues/4493

## What does this PR do?
- Add support for precommands for cargo xtask bench
- update benchmark docs
- update workload files


Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2024-05-13 08:27:56 +00:00
9d3ff11b21 Modify existing workload files to use precommands 2024-05-07 14:03:14 +02:00
43763eb98a Document precommands 2024-05-07 12:26:22 +02:00
2a0ece814c Add precommands to workloads 2024-05-07 12:23:36 +02:00
95fcd17373 Merge #4622
4622: Bump Rustls to non-vulnerable versions r=Kerollmops a=Kerollmops

This PR Fixes #4599 by bumping the Rustls dependency to v0.21.12 and [ureq to v2.9.7](https://github.com/algesten/ureq/blob/main/CHANGELOG.md#297) (which bump rustls to v0.22.4).

Co-authored-by: Clément Renault <clement@meilisearch.com>
2024-05-07 09:47:30 +00:00
ac4bc143c4 Bump ureq to v2.9.7 2024-05-07 10:39:38 +02:00
f33a1282f8 Bump Rustls to v0.21.12 2024-05-07 10:31:39 +02:00
4d5971f343 Merge #4621
4621: Bring back changes from v1.8.0 into main r=curquiza a=curquiza



Co-authored-by: ManyTheFish <many@meilisearch.com>
Co-authored-by: Tamo <tamo@meilisearch.com>
Co-authored-by: meili-bors[bot] <89034592+meili-bors[bot]@users.noreply.github.com>
Co-authored-by: Clément Renault <clement@meilisearch.com>
2024-05-06 13:46:39 +00:00
ecb5c506b3 Merge #4619
4619: Use http path pattern instead of full path in metrics r=irevoire a=gh2k

# Pull Request

## Related issue

Fixes #3983 

## What does this PR do?

- This records only the HTTP pattern in metrics instead of the full path

An alternative solution was proposed in #4145, but this doesn't really fix the root cause of the issue. The problem I'm experiencing at my end is that by using the full path, the number of labels is far too high to be useful. It is normal practice to use the path with variable placeholders, instead of the fully-expanded path.

The example given in the ticket was endpoints under `/tasks`, but this can also be a very significant problem under `/indexes/{index-uid}/documents`. e.g.:
<img width="1510" alt="Screenshot 2024-05-03 at 12 14 36" src="https://github.com/meilisearch/meilisearch/assets/6530014/1df2ec19-5f69-4164-90d2-f65c59f9b544">

This patch replaces the fully-expanded path with the matched pattern.

The linked PR also mentions paths under other routes, e.g. `/static`, but this feels like a separate concern and these can be stripped out at the Prometheus end by filters if they are unwanted. The most important thing is to make the paths usable so that we can still get stats on e.g. the number of document deletes we see.

## PR checklist

Please check if your PR fulfills the following requirements:
- [x] Does this PR fix an existing issue, or have you listed the changes applied in the PR description (and why they are needed)?
- [x] Have you read the contributing guidelines?
- [x] Have you made sure that the title is accurate and descriptive of the changes?

Thank you so much for contributing to Meilisearch!


Co-authored-by: Simon Detheridge <s@sd.ai>
Co-authored-by: Tamo <tamo@meilisearch.com>
2024-05-06 09:37:32 +00:00
3698aef66b fix warning 2024-05-06 11:36:37 +02:00
7f5ab3cef5 Use http path pattern instead of full path in metrics 2024-05-03 12:29:31 +01:00
c668043c4f Merge #4617
4617: Destructure `EmbedderOptions` so we don't miss some options r=dureuill a=dureuill

# Pull Request

## Related issue
#4595 was caused by the code not destructuring the embedder options.


## What does this PR do?
This PR adds the missing `url` parameter for ollama, and makes sure similar issue cannot happen in the future



Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2024-05-02 14:55:32 +00:00
5a305bfdea Remove unused struct 2024-05-02 16:14:37 +02:00
f4dd73ec8c Destructure EmbedderOptions so we don't miss some options 2024-05-02 15:39:36 +02:00
66dce4600d Merge #4603
4603: Update charabia v0.8.10 r=Kerollmops a=ManyTheFish

- Update Charabia v0.8.10
- Add `swedish-recomposition` as an optional feature flag

Co-authored-by: ManyTheFish <many@meilisearch.com>
2024-04-30 13:04:02 +00:00
fe51ceca6d Update lock file 2024-04-30 14:33:37 +02:00
88174b8ae4 Update charabia v0.8.10 2024-04-30 14:30:23 +02:00
ebca29f3de Merge #4597
4597: Fix embeddings settings update r=ManyTheFish a=ManyTheFish

# Pull Request
- add some conditions reducing the work done when changing the settings
- add some benchmarks on embedders

## Related issue
Fixes #4585


Co-authored-by: ManyTheFish <many@meilisearch.com>
2024-04-25 16:37:28 +00:00
c793b6ef6d Merge #4600
4600: Fix embedders api r=ManyTheFish a=ManyTheFish

# Pull Request

## Related issue
Fixes #4594
Fixes #4595


Co-authored-by: ManyTheFish <many@meilisearch.com>
2024-04-25 13:16:33 +00:00
cbbfff3594 Remove debuging prints 2024-04-25 10:37:18 +02:00
dbcf50589b Fix clippy 2024-04-25 10:36:10 +02:00
3e5cd027a5 Merge #4593
4593: Stop crashing when panic occurs in thread pool r=ManyTheFish a=Kerollmops

This PR fixes #4362 by introducing a new boolean to catch panics in the rayon thread pool. The boolean is read after performing the operations in rayon, and the indexation process is stopped. This first version doesn't expose the panic message but marks the task as failed.

The current implementation exposes a `ThreadPoolNoAbort` wrapper. The `rayon::ThreadPool` has been wrapped to check that nothing went wrong after running the `ThreadPool::install` function. An atomic boolean and some `store/load` logic make the system work efficiently.

Before, Meilisearch was completely crashing...

<img width="1563" alt="Capture d’écran 2024-04-22 à 15 49 02" src="https://github.com/meilisearch/meilisearch/assets/3610253/ce114917-a881-4fbb-85df-c195fcf0c7cb">

Now, it handles the panics correctly and marks the task as failed.

<img width="1558" alt="Capture d’écran 2024-04-22 à 15 42 14" src="https://github.com/meilisearch/meilisearch/assets/3610253/8bd031ef-5e8f-4a12-a91e-c823597a2344">


Co-authored-by: Clément Renault <clement@meilisearch.com>
2024-04-24 16:27:08 +00:00
7468c1cf8d Introduce WildcardSetting that are serialized as wildcards by default 2024-04-24 18:15:03 +02:00
d4aeff92d0 Introduce the ThreadPoolNoAbort wrapper 2024-04-24 16:40:12 +02:00
e87cb373de Avoid intermediate serializing when displaying settings 2024-04-24 12:33:07 +02:00
9b76501875 Display set API key for Ollama embedder 2024-04-24 12:33:07 +02:00
6247e95dc3 Add benchmark for embeddings 2024-04-23 17:42:20 +02:00
b3173d0423 Remove useless dots in the error messages 2024-04-22 18:09:33 +02:00
96cc5319c8 Introduce a new internal error type to categorize panics 2024-04-22 18:09:33 +02:00
0c7003c5df Introduce an atomic to catch panics in thread pools 2024-04-22 18:09:33 +02:00
a1aa999026 Add conditions reducing wrok 2024-04-22 14:18:35 +02:00
aa0bbbb246 Merge #4578
4578: Remove useless analytics r=ManyTheFish a=irevoire

# Pull Request

## Related issue
Fixes #4577

## What does this PR do?
Remove the following analytics:
- `Health Seen`
- `Stats Seen`
- `Task Seen`
- `Version Seen`


Co-authored-by: Tamo <tamo@meilisearch.com>
2024-04-18 13:30:42 +00:00
a04012c33e Merge #4583
4583: Update charabia v0.8.9 r=irevoire a=ManyTheFish

# Pull Request
- Update Charabia v0.8.9
- Add the optional feature flag activating pinyin normalization

## Related issue
Fixes  #4574


Co-authored-by: ManyTheFish <many@meilisearch.com>
2024-04-18 09:42:42 +00:00
c71b5d09ff Updatre charabia v0.8.9 2024-04-18 11:38:26 +02:00
248e22005a Merge #4582
4582: Fix some typos in comments r=curquiza a=writegr

# Pull Request

## Related issue

No

## What does this PR do?

 fix some typos in comments

## PR checklist
Please check if your PR fulfills the following requirements:
- [ ] Does this PR fix an existing issue, or have you listed the changes applied in the PR description (and why they are needed)?
- [ ] Have you read the contributing guidelines?
- [ ] Have you made sure that the title is accurate and descriptive of the changes?

Thank you so much for contributing to Meilisearch!


Co-authored-by: writegr <wellweek@outlook.com>
2024-04-18 07:07:33 +00:00
ab43a8a949 chore: fix some typos in comments
Signed-off-by: writegr <wellweek@outlook.com>
2024-04-18 14:12:52 +08:00
4a8459b799 Merge #4576
4576: increase the default search time budget from 150ms to 1.5s r=ManyTheFish a=irevoire

# Pull Request

## Related issue
Fixes #4575

## What does this PR do?
- increase the default search time budget from 150ms to 1.5s


Co-authored-by: Tamo <tamo@meilisearch.com>
2024-04-17 16:04:47 +00:00
442de982a9 Merge #4581
4581: Always show facet numbers in alpha order in the facet distribution r=ManyTheFish a=Kerollmops

This PR fixes #4559 by making sure that the number facets (facets that come from numbers from the documents) are always displayed in alpha order, even when there is a small amount to display.

The issue was due to some algorithms executed when the number of facet values to display was small. We can see that now, facet values are always displayed correctly.

```json
"facetDistribution": {
    "release_year": {
        "2010": 1,
        "2011": 1,
        "2012": 1,
        "2013": 1,
        "2014": 1,
        "2015": 1,
        "2016": 1,
        "2017": 1,
        "2018": 1,
        "2019": 19,
        "2020": 1,
        "2021": 1,
        "2022": 1,
        "2023": 1,
        "2024": 1,
        "2025": 1
    }
}
```

Co-authored-by: Clément Renault <clement@meilisearch.com>
2024-04-17 15:18:58 +00:00
c923adf222 Fix facet distribution for alpha on facet numbers 2024-04-17 16:31:16 +02:00
2dfee2fad5 Merge #4580
4580: Update the search logs r=Kerollmops a=irevoire

# Pull Request

## Related issue
Fixes https://github.com/meilisearch/meilisearch/issues/4579

## What does this PR do?
- Update the debug implementation of the search query and search results so it’s way smaller and doesn’t display useless information


Co-authored-by: Tamo <tamo@meilisearch.com>
2024-04-17 14:25:43 +00:00
4a68e9f6ae reorganize the debug implementation of the search results and only dispaly the meaningful informations 2024-04-17 13:42:10 +02:00
206887c7a2 update the SearchQuery Debug implementation so it’s smaller and gives the most important informations first 2024-04-17 12:57:19 +02:00
2f170fe2d5 Merge #4504
4504: Avoid clearing db in transform r=ManyTheFish a=ManyTheFish

# Pull Request

## Related issue
Fixes #4478



Co-authored-by: ManyTheFish <many@meilisearch.com>
2024-04-17 10:41:00 +00:00
df29ba709a Make some cleaning in Arcs 2024-04-17 12:33:25 +02:00
2dd9dd6d0a remove the Health Seen analytic 2024-04-17 11:43:40 +02:00
3acfab2eb7 Fix PR comments 2024-04-17 10:55:51 +02:00
e1f27de51a remove the Stats Seen analytic 2024-04-16 18:49:41 +02:00
abae31aee0 remove the Task Seen analytic 2024-04-16 18:48:10 +02:00
70ce0095ea remove the Version Seen analytic 2024-04-16 18:48:03 +02:00
19137be0ea increase the default search time budget from 150ms to 1.5s 2024-04-16 18:09:49 +02:00
a1ea224da9 Fix tests 2024-04-16 17:29:34 +02:00
87a93ba47d fix clippy 2024-04-16 14:39:30 +02:00
eaf113ef34 Fix wod pair proximity error when nothing has to be extracted 2024-04-16 14:39:30 +02:00
5ab901dd30 Fix tests 2024-04-16 14:39:30 +02:00
e5ae337aae Comeback to sorters in extract_word_docids
using buffers and merge the keys manually is less efficient
2024-04-16 14:39:30 +02:00
bad46f88d6 Fix embedder test 2024-04-16 14:39:30 +02:00
a489b406b4 fix test 2024-04-16 14:39:06 +02:00
02c3d6b265 finish work 2024-04-16 14:39:06 +02:00
b5e4a55af6 refactor faceted and searchable pipeline 2024-04-16 14:39:06 +02:00
a7e368aaa6 Create InnerIndexSettingsDiffs struct and populate it 2024-04-16 14:39:06 +02:00
893200ab87 Avoid clearing documents in transform 2024-04-16 14:39:06 +02:00
aabce52b1b Fix test 2024-04-16 14:39:06 +02:00
64079fc894 Do more iterations on the settings benchmarks 2024-04-16 14:39:06 +02:00
8fff5fc281 update tests 2024-04-16 14:39:06 +02:00
4089dd04a5 Merge #4568
4568: Fix some typos in comments r=curquiza a=yudrywet

# Pull Request

## Related issue
No

## What does this PR do?
fix some typos in comments

## PR checklist
Please check if your PR fulfills the following requirements:
- [ ] Does this PR fix an existing issue, or have you listed the changes applied in the PR description (and why they are needed)?
- [ ] Have you read the contributing guidelines?
- [ ] Have you made sure that the title is accurate and descriptive of the changes?

Thank you so much for contributing to Meilisearch!


Co-authored-by: yudrywet <yudeyao@yeah.net>
2024-04-15 08:12:43 +00:00
cf864a1c2e chore: fix some typos in comments
Signed-off-by: yudrywet <yudeyao@yeah.net>
2024-04-14 20:11:34 +08:00
0661c86f16 Merge #4566
4566: Bring back changes from v1.7.6 to main r=irevoire a=dureuill



Co-authored-by: Louis Dureuil <louis@meilisearch.com>
Co-authored-by: dureuill <dureuill@users.noreply.github.com>
2024-04-11 19:32:29 +00:00
a6c02f7684 Update version for the next release (v1.7.6) in Cargo.toml 2024-04-11 21:08:57 +02:00
89e72fab32 Update grenad to fix rare DB corruption 2024-04-11 21:06:59 +02:00
171b41be24 Merge #4560
4560: Bring back change from v1.7.5 to main r=curquiza a=irevoire



Co-authored-by: Tamo <tamo@meilisearch.com>
Co-authored-by: irevoire <irevoire@users.noreply.github.com>
Co-authored-by: meili-bors[bot] <89034592+meili-bors[bot]@users.noreply.github.com>
2024-04-09 16:58:30 +00:00
c26d356a35 Merge branch 'main' into release-v1.7.5-tmp 2024-04-09 14:46:15 +02:00
d6b6cd322c Update sprint_issue.md (#4556) 2024-04-05 18:40:28 +02:00
217fbc777f Merge #4554
4554: Update version for the next release (v1.7.5) in Cargo.toml r=curquiza a=meili-bot

⚠️ This PR is automatically generated. Check the new version is the expected one and Cargo.lock has been updated before merging.

Co-authored-by: irevoire <irevoire@users.noreply.github.com>
2024-04-04 18:03:04 +00:00
c2c73c1f25 Merge #4553
4553: update h2 r=curquiza a=irevoire

# Pull Request

## Related issue
Fixes https://github.com/meilisearch/meilisearch/issues/4551


Co-authored-by: Tamo <tamo@meilisearch.com>
2024-04-04 17:23:00 +00:00
7a49a056fa Update version for the next release (v1.7.5) in Cargo.toml 2024-04-04 16:33:45 +00:00
fd4be26718 update h2 2024-04-04 18:27:16 +02:00
b1844b0c27 Merge #4548
4548: v1.8 hybrid search changes r=dureuill a=dureuill

Implements the search changes from the [usage page](https://meilisearch.notion.site/v1-8-AI-search-API-usage-135552d6e85a4a52bc7109be82aeca42#40f24df3da694428a39cc8043c9cfc64)

### ⚠️ Breaking changes in an experimental feature:

- Removed the `_semanticScore`. Use the `_rankingScore` instead.
- Removed `vector` in the response of the search (output was too big).
- Removed all the vectors from the `vectorSort` ranking score details
  - target vector appearing in the name of the rule
  - matched vector appearing in the details of the rule

### Other user-facing changes

- Added `semanticHitCount`, indicating how many hits were returned from the semantic search. This is especially useful in the hybrid search.
- Embed lazily: Meilisearch no longer generates an embedding when the keyword results are "good enough".
- Graceful embedding failure in hybrid search: when doing hybrid search (`semanticRatio in ]0.0, 1.0[`), an embedding failure no longer causes the search request to fail. Instead, only the keyword search is performed. When doing a full vector search (`semanticRatio==1.0`), a failure to embed will still result in failing that search.

Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2024-04-04 16:00:20 +00:00
a9013ed683 Fix comment mistake
Co-authored-by: Tamo <tamo@meilisearch.com>
2024-04-04 17:21:47 +02:00
ca499a0302 Fix test after rebase 2024-04-04 16:04:07 +02:00
355e5282b2 Remove _semanticScore 2024-04-04 16:04:07 +02:00
7c27417a5d Add tests 2024-04-04 16:04:07 +02:00
1ff2a2d6fb Add semanticHitCount 2024-04-04 16:04:06 +02:00
3c6e9851a4 Correct error formatting 2024-04-04 15:58:19 +02:00
4564a38ae7 Bail earlier when the experimental feature is not enabled 2024-04-04 15:58:19 +02:00
466d718a05 Fix test 2024-04-04 15:58:19 +02:00
6ebb6b55a6 Lazily embed, don't fail hybrid search on embedding failure 2024-04-04 15:58:17 +02:00
fabc9cf14a milli: add Embedder::embed_one 2024-04-04 15:57:29 +02:00
00c4ed3bc2 milli: refactor getting embedder and embedder name 2024-04-04 15:57:29 +02:00
190933f6e1 Breaking: Remove vector from SearchResult 2024-04-04 15:57:29 +02:00
928e6e4c05 Breaking change: remove vector for score details 2024-04-04 15:57:29 +02:00
339a5e3431 Merge #4549
4549: Hugging Face embedder improvements r=dureuill a=dureuill

Architectural changes/Internal improvements

### 1. Prefer safetensors weights over pytorch weights when available

safetensors weights are memory mapped, which reduces memory usage of supported models.

### 2. Update candle

Updates candle to `0.4.1`, now targeting crates.io and the tokenizers to `v0.15.2` (still on github).

This might fix https://github.com/meilisearch/meilisearch/issues/4399 thanks to the now included https://github.com/huggingface/candle/issues/1454

Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2024-04-04 13:47:18 +00:00
5509bafff8 Merge #4535
4535: Support Negative Keywords r=ManyTheFish a=Kerollmops

This PR fixes #4422 by supporting `-` before any word in the query.

The minus symbol `-`, from the ASCII table, is not the only character that can be considered the negative operator. You can see the two other matching characters under the `Based on "-" (U+002D)` section on [this unicode reference website](https://www.compart.com/en/unicode/U+002D).

It's important to notice the strange behavior when a query includes and excludes the same word; only the derivative ( synonyms and split) will be kept:
 - If you input `progamer -progamer`, the engine will still search for `pro gamer`.
 - If you have the synonym `like = love` and you input `like -like`, it will still search for `love`.

## TODO
 - [x] Add analytics
 - [x] Add support to the `-` operator
 - [x] Make sure to support spaces around `-` well
 - [x] Support phrase negation
 - [x] Add tests


Co-authored-by: Clément Renault <clement@meilisearch.com>
2024-04-04 13:10:27 +00:00
90e812fc0b Add some tests 2024-04-04 15:08:37 +02:00
58cafcc824 Update candle 2024-04-03 13:11:56 +02:00
56bf8503db Merge #4537
4537: Expose distribution shift in settings r=ManyTheFish a=dureuill

See [usage page](https://meilisearch.notion.site/v1-8-AI-search-API-usage-135552d6e85a4a52bc7109be82aeca42#d652adc0890445658aaf36352dbc8802)

# Changes

- Distribution shift added to all embedders.
- Exposed in settings
- Changed the reindexing logic to not trigger a reindex operation when only the distribution shift or API key change

Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2024-04-03 09:08:58 +00:00
a1eccc762a Prefer safetensors to pytorch when both are available 2024-04-03 11:05:59 +02:00
75f81a0bab Merge #4547
4547: Fix milli/Cargo.toml for usage as dependency via git r=dureuill a=Toromyx

# Pull Request

## Related issues/discussions
This enables th usage of `milli` [via git repository](https://doc.rust-lang.org/cargo/reference/specifying-dependencies.html#specifying-dependencies-from-git-repositories) as mentioned in <https://github.com/meilisearch/meilisearch/issues/3367#issuecomment-1422613815>, <https://github.com/meilisearch/meilisearch/discussions/1523#discussioncomment-1039338>, and <https://github.com/meilisearch/meilisearch/discussions/1981#discussioncomment-1771568>

## What does this PR do?
Trying to depend on `milli` like

```
[dependencies.milli]
git = "https://github.com/meilisearch/meilisearch.git"
tag = "v1.7.4"
```

leads to the following error:

```
error: failed to select a version for the requirement `candle-core = "^0.3.1"`
candidate versions found which didn't match: 0.4.2
location searched: Git repository https://github.com/huggingface/candle.git
required by package `milli v1.7.4 (https://github.com/meilisearch/meilisearch.git?tag=v1.7.4#0259ad60)`
```

because the default branch of <https://github.com/huggingface/candle> does not contain the correct version.

To fix this, i added a `rev="..."` entry in the relevant dependencies, specifiyng the commit already present in the `Cargo.lock` file.
I also updated the version to the one in the Cargo.lock. This also updated `candle-kernels` sub-dependency from 0.3.1 to 0.3.3 which is probably correct?

## PR checklist
Please check if your PR fulfills the following requirements:
- [x] Does this PR fix an existing issue, or have you listed the changes applied in the PR description (and why they are needed)?
- [x] Have you read the contributing guidelines?
- [x] Have you made sure that the title is accurate and descriptive of the changes?

Thank you so much for contributing to Meilisearch!


Co-authored-by: Thomas Gauges <thomas.gauges@gmail.com>
2024-04-03 07:31:36 +00:00
d55d496250 Fix milli/Cargo.toml for usage as dependency via git 2024-04-02 15:19:30 +02:00
5080bef0d6 Merge #4546
4546: Fix some typos in conments r=curquiza a=redistay

# Pull Request



## What does this PR do?
- fix some typos in conments 

## PR checklist
Please check if your PR fulfills the following requirements:
- [ ] Does this PR fix an existing issue, or have you listed the changes applied in the PR description (and why they are needed)?
- [ ] Have you read the contributing guidelines?
- [x] Have you made sure that the title is accurate and descriptive of the changes?

Thank you so much for contributing to Meilisearch!


Co-authored-by: redistay <wujunjing@outlook.com>
2024-04-02 12:07:09 +00:00
182cb42953 chore: fix some typos in conments
Signed-off-by: redistay <wujunjing@outlook.com>
2024-04-02 19:37:55 +08:00
92a049c2dd Merge #4543
4543: Bring back changes from v1.7.4 into main r=Kerollmops a=dureuill



Co-authored-by: Louis Dureuil <louis@meilisearch.com>
Co-authored-by: meili-bors[bot] <89034592+meili-bors[bot]@users.noreply.github.com>
Co-authored-by: dureuill <dureuill@users.noreply.github.com>
2024-03-28 16:53:51 +00:00
78668584cd Merge #4533
4533: Hide api key in settings and task queue r=dureuill a=dureuill

# Pull Request

See [Usage page](https://meilisearch.notion.site/v1-8-AI-search-API-usage-135552d6e85a4a52bc7109be82aeca42#117f5ff7b19f4d95bb3ae0005f6c6633)

## Motivation

See [slack discussion (internal link)](https://meilisearch.slack.com/archives/C06GQP7FQ6P/p1709804022298749)


## Changes

- The value of the `apiKey` parameter is now hidden in the settings and the details of the task queue.

Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2024-03-28 16:02:53 +00:00
fa9748cc99 Merge #4536
4536: Limit concurrent search requests r=ManyTheFish a=irevoire

# Pull Request

## Related issue
Fixes https://github.com/meilisearch/meilisearch/issues/4489

## What does this PR do?
- Adds a « search queue » that limits the number of search requests we can process at the same time and stores search requests to be processed
- Process only one search request per core/thread (we use available_parallelism)
- When the search queue is full, new search requests replace old ones **randomly**. The reason is that:
  - If we serve the oldest one first, like Typesense, we give the worst performances to everyone
  - If we serve the latest one, it gets too easy to DoS us (you just need to fill the queue with as many search requests as we can process simultaneously to ensure no other request will ever be processed)
  - By picking the search request randomly, we give a chance to recent search requests to be processed while ensuring that we can't be owned unless they fill our queue entirely and we start returning errors 5xx
- Adds an experimental parameter to control the size of the queue
- Adds a bunch of tests to ensure the search queue works correctly
- Ensure the loop consuming the search queue is running in the health route and crashes if it’s not the case

Co-authored-by: Tamo <tamo@meilisearch.com>
2024-03-28 15:01:52 +00:00
877f4b1045 Support negative phrases 2024-03-28 15:51:43 +01:00
781e2d7750 Merge #4532
4532: Add `url` and `api_key` to ollama r=ManyTheFish a=dureuill

See [Usage page](https://meilisearch.notion.site/v1-8-AI-search-API-usage-135552d6e85a4a52bc7109be82aeca42#5c77ef49e78e43388c1d3d5429151357)

### Motivation

- Before this PR, the url for ollama is only read from the environment. This is a needless restriction that will be troublesome in settings where passing an environment variable is complex or impossible (e.g., the Cloud)
- Before this PR, ollama did not support an api_key. While ollama does not natively support API keys, [a common practice](https://github.com/ollama/ollama/issues/849) is to put a publicly accessible ollama server behind a proxy to support authentication.

### Skip changelog

ollama embedder was added to v1.8

Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2024-03-28 12:35:19 +00:00
796213af9a Merge branch 'main' into tmp-release-v1.7.4 2024-03-28 10:51:49 +01:00
69f8b2730d Fix the tests 2024-03-28 10:47:04 +01:00
7385067c42 Merge #4542
4542: fixes typos r=irevoire a=brunoocasali

Just fix a typo 😬 

Co-authored-by: Bruno Casali <brunoocasali@gmail.com>
2024-03-27 18:21:48 +00:00
d1021c0f0d Merge #4520
4520: Add automation to create openAPI issue r=dureuill a=curquiza

Create automatically an issue to remind us to update open-api file when opening a milestone

Co-authored-by: curquiza <clementine@meilisearch.com>
2024-03-27 17:33:22 +00:00
8f2606d79d fixes typos 2024-03-27 14:26:47 -03:00
0259ad6082 Merge #4541
4541: Update version for the next release (v1.7.4) in Cargo.toml r=Kerollmops a=meili-bot

⚠️ This PR is automatically generated. Check the new version is the expected one and Cargo.lock has been updated before merging.

Co-authored-by: dureuill <dureuill@users.noreply.github.com>
2024-03-27 16:49:40 +00:00
06a11b5b21 Improve error message 2024-03-27 17:34:49 +01:00
b50f518764 Update version for the next release (v1.7.4) in Cargo.toml 2024-03-27 16:12:54 +00:00
94b7afcc55 Merge #4539
4539: Don't optimize reindexing when fields contain dots r=Kerollmops a=dureuill

# Pull Request

## Related issue
Fixes https://github.com/meilisearch/meilisearch/issues/4525

## What does this PR do?
- Don't try to optimize the amount of reindexing operation when nested fields are used anywhere in:
    - the field distribution (e.g. a key actually contains a `.`)
    - the old faceted fields
    - the new faceted fields

This is because the facet distribution is not reporting on existing nested fields.



Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2024-03-27 16:07:49 +00:00
ee8cbea810 Don't optimize reindexing when fields contain dots 2024-03-27 17:04:45 +01:00
b7c582e4f3 connect the search queue with the health route 2024-03-27 15:49:43 +01:00
03c886ac1b adds a bit of documentation 2024-03-27 15:38:36 +01:00
cde7ce4f44 Add test 2024-03-27 14:02:09 +01:00
92224f109a Fix tests 2024-03-27 12:19:10 +01:00
0d27d50740 Merge #4516
4516: Update sprint_issue.md r=Kerollmops a=curquiza

Following decision made about specification

Also
- removed useless parts of the template
- add automatic labels -> better to forget to remove them rather than forgetting to add them (some mistakes happened in the past)

Co-authored-by: Clémentine U. - curqui <clementine@meilisearch.com>
2024-03-27 11:04:06 +00:00
572fb3a51d Finer granularity for embedder needs reindex 2024-03-27 12:01:34 +01:00
4ff0255783 remove unused function 2024-03-27 11:51:14 +01:00
a25456120d Expose distribution in settings 2024-03-27 11:51:04 +01:00
168ded3b9d Deserr for distribution 2024-03-27 11:50:33 +01:00
afd1da5642 Add distribution to all embedders 2024-03-27 11:50:22 +01:00
087a96d22e fix flaky test 2024-03-27 11:05:37 +01:00
34dfea72cc Merge #4509
4509: Rest embedder r=ManyTheFish a=dureuill

Fixes #4531 

See [Usage page](https://meilisearch.notion.site/v1-8-AI-search-API-usage-135552d6e85a4a52bc7109be82aeca42?pvs=25#e6f58c3b742c4effb4ddc625ce12ee16)

### Implementation changes

- Remove tokio, futures, reqwests
- Add a new `milli::vector::rest::Embedder` embedder
- Update OpenAI and Ollama embedders to use the REST embedder internally
- Make Embedder::embed a sync method
- Add the new embedder source as described in the usage


Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2024-03-27 09:27:46 +00:00
3a1f458139 fix a flaky test 2024-03-26 21:06:55 +01:00
55df9daaa0 adds a comment about the safety of an operation 2024-03-26 19:34:55 +01:00
2e36f069c2 fmt imports 2024-03-26 19:23:55 +01:00
8f5d9f501a update the discussion link 2024-03-26 19:18:32 +01:00
8127c9a115 handle the case of a queue of zero elements 2024-03-26 19:04:39 +01:00
e7704f1fc1 add a test to ensure we effectively returns a retry-after when the search queue is full 2024-03-26 18:08:59 +01:00
34262c7a0d Add analytics for the negative operator 2024-03-26 18:01:27 +01:00
e2a1bbae37 simplify and improve the http error 2024-03-26 17:53:37 +01:00
1da9e0f246 Better support space around the negative operator (-) 2024-03-26 17:47:13 +01:00
e4a3e603b3 Expose a first working version of the negative keyword 2024-03-26 17:47:13 +01:00
e433fd53e6 rename the method to get a permit and use it in all search requests 2024-03-26 17:28:03 +01:00
3f23fbb46d create the experimental CLI argument 2024-03-26 16:43:40 +01:00
c41e1274dc push and test the search queue datastructure 2024-03-26 15:56:43 +01:00
9a95ed619d Add tests 2024-03-26 10:36:56 +01:00
f82d056072 Hide secrets in settings and task queue 2024-03-26 10:36:24 +01:00
5ea017b922 Merge #4530
4530: fix: set the histogram bucket boundaries to follow the otel spec r=curquiza a=rohankmr414

# Pull Request

## What does this PR do?
- Fixes the http request duration histogram bucket boundaries to follow the opentelemetry spec, currently the bucket boundaries are too granular and only track latencies below 1s.

## PR checklist
Please check if your PR fulfills the following requirements:
- [ ] Does this PR fix an existing issue, or have you listed the changes applied in the PR description (and why they are needed)?
- [x] Have you read the contributing guidelines?
- [x] Have you made sure that the title is accurate and descriptive of the changes?

Thank you so much for contributing to Meilisearch!


Co-authored-by: Rohan Kumar <rohankmr414@gmail.com>
2024-03-25 12:23:31 +00:00
817ccc089a also allow api_key 2024-03-25 11:50:00 +01:00
2ddd872ce6 Merge #4373
4373: feat: add status code label to prometheus http request counter r=irevoire a=rohankmr414

# Pull Request

## What does this PR do?
- This PR adds the `status` label (the value is http status code) to the `meilisearch_http_requests_total` metric.

## PR checklist
Please check if your PR fulfills the following requirements:
- [ ] Does this PR fix an existing issue, or have you listed the changes applied in the PR description (and why they are needed)?
- [x] Have you read the contributing guidelines?
- [x] Have you made sure that the title is accurate and descriptive of the changes?

Thank you so much for contributing to Meilisearch!


Co-authored-by: Rohan Kumar <rohankmr414@gmail.com>
2024-03-25 10:40:50 +00:00
4136630ea5 Use constants instead of raw strings in set_*set() 2024-03-25 11:39:33 +01:00
58972f35cb Allow url parameter for ollama embedder 2024-03-25 11:32:55 +01:00
dfa5e41ea6 Check validity of the URL setting 2024-03-25 11:23:16 +01:00
a1db342f01 Expose REST embedder to the API 2024-03-25 11:23:15 +01:00
f87747f4d3 Remove unwraps 2024-03-25 11:23:04 +01:00
b6b4b6bab7 Remove the tokio and the reqwests 2024-03-25 11:23:03 +01:00
f649f58013 embed no longer async 2024-03-25 11:23:03 +01:00
ac52c857e8 Update ollama and openai impls to use the rest embedder internally 2024-03-25 11:23:03 +01:00
8708cbef25 Add RestEmbedder 2024-03-25 11:23:03 +01:00
c3d02f092d OpenAI sync 2024-03-25 11:23:03 +01:00
bc58e8a310 Documentation for the vector module 2024-03-25 11:23:03 +01:00
ec81c2bf1a Merge #4511
4511: Bump charabia to 0.8.8 r=ManyTheFish a=6543

... and update lock file

this will add the fix (https://github.com/meilisearch/charabia/pull/275) to support markdown formatted codeblocks

Co-authored-by: 6543 <6543@obermui.de>
2024-03-25 09:26:11 +00:00
13a84ae557 fix: set the histogram bucket boundaries to follow the otel spec 2024-03-25 11:20:30 +05:30
325435ad43 feat: add request rate and error rate panels to grafana dashboard 2024-03-25 10:49:40 +05:30
5833070358 feat: add status code label to prometheus http request counter 2024-03-25 10:49:40 +05:30
ae3c31a82c Merge #4526
4526: chore: remove repetitive word r=curquiza a=availhang

# Pull Request

## Related issue
Fixes #<issue_number>

## What does this PR do?
- ...

## PR checklist
Please check if your PR fulfills the following requirements:
- [ ] Does this PR fix an existing issue, or have you listed the changes applied in the PR description (and why they are needed)?
- [x] Have you read the contributing guidelines?
- [x] Have you made sure that the title is accurate and descriptive of the changes?

Thank you so much for contributing to Meilisearch!


Co-authored-by: availhang <mayangang@outlook.com>
2024-03-22 16:06:54 +00:00
9865c58046 chore: remove repetitive words
Signed-off-by: availhang <mayangang@outlook.com>
2024-03-22 15:23:13 +08:00
bf95438ea8 Merge #4522
4522: Brings back change to main r=curquiza a=irevoire

# Pull Request

Bring back changes to main

Co-authored-by: meili-bors[bot] <89034592+meili-bors[bot]@users.noreply.github.com>
Co-authored-by: irevoire <irevoire@users.noreply.github.com>
Co-authored-by: Tamo <tamo@meilisearch.com>
Co-authored-by: curquiza <curquiza@users.noreply.github.com>
2024-03-21 15:57:50 +00:00
48d012c3e2 Merge branch 'main' into tmp-release-v1.7.3 2024-03-21 16:39:38 +01:00
8394be9484 Add automation to create openAPI issue 2024-03-21 15:52:11 +01:00
414fc14426 Merge #4519
4519: Update version for the next release (v1.7.3) in Cargo.toml r=curquiza a=meili-bot

⚠️ This PR is automatically generated. Check the new version is the expected one and Cargo.lock has been updated before merging.

Co-authored-by: curquiza <curquiza@users.noreply.github.com>
2024-03-21 11:21:56 +00:00
3b8e8b7f1a Update version for the next release (v1.7.3) in Cargo.toml 2024-03-21 11:20:30 +00:00
c67f04c746 Update sprint_issue.md 2024-03-20 18:45:56 +01:00
fc1c3f4a29 Merge #4466
4466: Implements the search cutoff r=irevoire a=irevoire

# Pull Request

## Related issue
Fixes https://github.com/meilisearch/meilisearch/issues/4488

## What does this PR do?
- Adds a cutoff to the bucket sort after 150ms has been spent
- Adds a new setting to customize the default value of 150ms
- When the time is exceeded, we exit early with what we had the time to sort
- If the cutoff has been reached, the search details are updated with a new `Skip` ranking details for the ranking rules that were skipped
- Adds analytics to measure the total number of degraded search requests
- Adds the number of degraded search requests to the Prometheus metrics and Grafana dashboard
- The cutoff **must not** skip the filters; otherwise, we would leak documents to people who don’t have the right to see them


Co-authored-by: Tamo <tamo@meilisearch.com>
Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2024-03-20 13:06:53 +00:00
f2f1367ec3 add a timeout to the webhook 2024-03-20 13:59:43 +01:00
18f17ed728 Update version for the next release (v1.7.2) in Cargo.toml 2024-03-20 13:59:42 +01:00
4628b7b7bd bump charabia to 0.8.8
and update lock file
2024-03-20 13:39:00 +01:00
d49250358d Merge #4513
4513: Revert "Merge remote-tracking branch 'origin/main' into release-v1.7.1" r=Kerollmops a=irevoire

This reverts commit bd74cce86a, reversing changes made to d2f77e88bd.

This commit wasn’t supposed to be merged on the `release-v1.7.1` branch


Co-authored-by: Tamo <tamo@meilisearch.com>
2024-03-20 09:57:24 +00:00
5046ffdf54 Merge #4512
4512: Revert "Revert "Merge remote-tracking branch 'origin/main' into release-v1.7.1"" r=Kerollmops a=irevoire

Reverts meilisearch/meilisearch#4510

This PR was supposed to be merged on `release-v1.7.1` not main 🤦 

Co-authored-by: Tamo <irevoire@protonmail.ch>
2024-03-20 09:14:43 +00:00
c5322df519 Revert "Revert "Merge remote-tracking branch 'origin/main' into release-v1.7.1"" 2024-03-20 10:08:28 +01:00
6079141ea6 snapshot the scores side by side with the score details 2024-03-19 18:30:14 +01:00
2c3af8e513 query the detailed score detail in the test 2024-03-19 18:09:02 +01:00
098ab594eb A score of 0.0 is now lesser than a sort result
handles the niche case 🐩 in the hybrid search where:
1. a sort ranking rule is the first rule.
2. the keyword search is skipped at the first rule.
3. the semantic search is not skipped at the first rule.

Previously, we would have the skipped search winning, whereas we want the non skipped one winning.
2024-03-19 17:32:32 +01:00
c495c8eb33 Merge #4510
4510: Revert "Merge remote-tracking branch 'origin/main' into release-v1.7.1" r=Kerollmops a=irevoire

In https://github.com/meilisearch/meilisearch/pull/4502 we merged main into release-v1.7.1 instead of a temporary branch thus we now need to revert this merge commit.

This reverts commit bd74cce86a, reversing changes made to d2f77e88bd.


Co-authored-by: Tamo <tamo@meilisearch.com>
2024-03-19 16:02:24 +00:00
567194b925 Revert "Merge remote-tracking branch 'origin/main' into release-v1.7.1"
This reverts commit bd74cce86a, reversing
changes made to d2f77e88bd.
2024-03-19 16:56:21 +01:00
d8fe4fe49d return the order in the score details 2024-03-19 15:45:04 +01:00
7b9e0d2944 forward the degraded parameter to the hybrid search 2024-03-19 15:11:21 +01:00
0ae39644f7 fix the facet search 2024-03-19 15:07:06 +01:00
bfec9468d4 Update milli/src/search/mod.rs
Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2024-03-19 14:49:15 +01:00
5233534dc0 Merge #4477
4477: Add documentation for benchmarks r=dureuill a=dureuill

See [CONTRIBUTING.md](https://github.com/meilisearch/meilisearch/blob/benchmark-docs/CONTRIBUTING.md#logging)

Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2024-03-19 13:23:48 +00:00
fced2ff9ab Merge #4502
4502: Release v1.7.1 r=dureuill a=Kerollmops

Bring the v1.7.1 changes back to main.

Co-authored-by: Clément Renault <clement@meilisearch.com>
Co-authored-by: Kerollmops <Kerollmops@users.noreply.github.com>
Co-authored-by: meili-bors[bot] <89034592+meili-bors[bot]@users.noreply.github.com>
2024-03-19 12:41:28 +00:00
bd74cce86a Merge remote-tracking branch 'origin/main' into release-v1.7.1 2024-03-19 13:39:17 +01:00
f85c80d059 Merge #4503
4503: Add settings diff indexing benchmarks r=dureuill a=ManyTheFish

Add several benchmarks targetting settings diff-indexing enhancements

Co-authored-by: ManyTheFish <many@meilisearch.com>
2024-03-19 10:35:46 +00:00
2a92c04100 Adding new assets 2024-03-19 11:31:32 +01:00
4369e9e97c add an error code test on the setting 2024-03-19 11:14:28 +01:00
e8516f00c4 move settings workload in root workload directory 2024-03-19 10:41:30 +01:00
7bd881b9bc adds the degraded searches to the prometheus dashboard 2024-03-19 10:35:47 +01:00
6a0c399c2f rename the search_cutoff parameter to search_cutoff_ms 2024-03-19 10:35:47 +01:00
038c26c118 stop returning the degraded boolean when a search was cutoff 2024-03-19 10:35:47 +01:00
ad9192fbbf reduce the size of an integration test 2024-03-19 10:35:47 +01:00
b8cda6c300 fix the search cutoff and add a test 2024-03-19 10:35:47 +01:00
b72495eb58 fix the settings tests 2024-03-19 10:28:23 +01:00
d1db495119 add a settings for the search cutoff 2024-03-19 10:28:23 +01:00
4a467739cd implements a first version of the cutoff without settings 2024-03-19 10:28:21 +01:00
29e71eedc7 Add benchmarks 2024-03-18 18:31:28 +01:00
10d053cd2f Merge #4500
4500: Don't display dimensions as 0 when it is not set r=ManyTheFish a=dureuill

Fixes regression in embedders where `dimensions: 0` was displayed when it hadn't be set for the `openAi` source.

Was breaking a PHP SDK integration test: cbaecb8c55/tests/Settings/EmbeddersTest.php (L28)

Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2024-03-18 15:21:24 +00:00
a302e258bd Don't display dimensions as 0 when it is not set 2024-03-18 16:10:12 +01:00
29840473b4 Merge #4499
4499: Fix milli link in contributing doc r=curquiza a=mohsen-alizadeh

# Pull Request

## Related issue
Fixes #4498

## What does this PR do?
 The milli link in CONTRIBUTING.md targeted the archived milli repository. it has to be changed to target to the milli crate in the main repo

## PR checklist
Please check if your PR fulfills the following requirements:
- [X] Does this PR fix an existing issue, or have you listed the changes applied in the PR description (and why they are needed)?
- [X] Have you read the contributing guidelines?
- [X] Have you made sure that the title is accurate and descriptive of the changes?

Thank you so much for contributing to Meilisearch!


Co-authored-by: Mohsen Alizadeh <mohsen@alizadeh.us>
Co-authored-by: Clémentine U. - curqui <clementine@meilisearch.com>
2024-03-18 14:39:26 +00:00
f4037c1a95 Update CONTRIBUTING.md
Co-authored-by: Clément Renault <renault.cle@gmail.com>
2024-03-18 15:39:01 +01:00
13cc62728b Fix milli link in contributing doc 2024-03-17 19:29:42 -07:00
f84bcb09e1 Merge #4491
4491: chore: remove repetitive words r=curquiza a=shuangcui

# Pull Request

## Related issue
Fixes #<issue_number>

## What does this PR do?
- ...

## PR checklist
Please check if your PR fulfills the following requirements:
- [ ] Does this PR fix an existing issue, or have you listed the changes applied in the PR description (and why they are needed)?
- [ ] Have you read the contributing guidelines?
- [ ] Have you made sure that the title is accurate and descriptive of the changes?

Thank you so much for contributing to Meilisearch!


Co-authored-by: shuangcui <fliter@qq.com>
2024-03-14 17:44:01 +00:00
5c95b5c933 chore: remove repetitive words
Signed-off-by: shuangcui <fliter@qq.com>
2024-03-14 21:28:55 +08:00
0b7bebeeb6 Merge #4483
4483: Workflows: Fix reason param when benches are triggered from a comment. r=irevoire a=dureuill



Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2024-03-13 17:05:30 +00:00
d2f77e88bd Merge #4479
4479: Skip reindexing when modifying unknown faceted fields r=dureuill a=Kerollmops

This PR improves Meilisearch's decision to reindex when a faceted field is added to the settings, but not a single document contains this field. It is effectively a waste of time to reindex documents when the engine needs to know a field.

This is related to a conversation [we have with our biggest customer (internal link)](https://discord.com/channels/1006923006964154428/1101213808627830794/1217112918857089187). They have 170 million documents, so reindexing this amount would be problematic.

---

The image is available by using the following Docker command. You can see the advancement of the image's build [on the GitHub CI page](https://github.com/meilisearch/meilisearch/actions/runs/8251688778).

```
docker pull getmeili/meilisearch:prototype-no-reindex-unknown-fields-0
```

Here is the hand-made test that shows that when modifying unknown filterable attributes, here `lol`, it doesn't reindex. However, when modifying the known `genre` field, it does reindex. You can see all that by looking at the time spent processing the update.

```json
{
  "uid": 3,
  "indexUid": "movies",
  "status": "succeeded",
  "type": "settingsUpdate",
  "canceledBy": null,
  "details": {
    "filterableAttributes": [
      "genres"
    ]
  },
  "error": null,
  "duration": "PT9.237703S",
  "enqueuedAt": "2024-03-12T15:34:26.836083Z",
  "startedAt": "2024-03-12T15:34:26.836374Z",
  "finishedAt": "2024-03-12T15:34:36.074077Z"
},
{
  "uid": 2,
  "indexUid": "movies",
  "status": "succeeded",
  "type": "settingsUpdate",
  "canceledBy": null,
  "details": {
    "filterableAttributes": [
      "lol"
    ]
  },
  "error": null,
  "duration": "PT0.000751S",
  "enqueuedAt": "2024-03-12T15:33:53.563923Z",
  "startedAt": "2024-03-12T15:33:53.565259Z",
  "finishedAt": "2024-03-12T15:33:53.56601Z"
},
{
  "uid": 0,
  "indexUid": "movies",
  "status": "succeeded",
  "type": "documentAdditionOrUpdate",
  "canceledBy": null,
  "details": {
    "receivedDocuments": 31944,
    "indexedDocuments": 31944
  },
  "error": null,
  "duration": "PT3.120723S",
  "enqueuedAt": "2024-02-17T10:35:55.042864Z",
  "startedAt": "2024-02-17T10:35:55.043505Z",
  "finishedAt": "2024-02-17T10:35:58.164228Z"
}
```

Co-authored-by: Clément Renault <clement@meilisearch.com>
2024-03-13 16:23:32 +00:00
1d8c13f595 Merge #4487
4487: Update version for the next release (v1.7.1) in Cargo.toml r=Kerollmops a=meili-bot

⚠️ This PR is automatically generated. Check the new version is the expected one and Cargo.lock has been updated before merging.

Co-authored-by: Kerollmops <Kerollmops@users.noreply.github.com>
2024-03-13 15:41:10 +00:00
7f3c495f5c Update version for the next release (v1.7.1) in Cargo.toml 2024-03-13 14:49:21 +00:00
abd954755d Merge #4476
4476: Make the `/facet-search` route use the `sortFacetValuesBy` setting r=irevoire a=Kerollmops

This PR fixes #4423 by ensuring that the `/facet-search` route uses the `sortFacetValuesBy` setting.

Note for the documentation team (to be moved in the tracking issue): Using the new `sortFacetValuesBy` setting can slow down the facet-search requests as Meilisearch iterates over the whole list of facet values and computes the count of documents on every entry. That is hardly or even impossible to optimize correctly.

### TODO
 - [x] Create a custom HashMap wrapper for the facet `OrderBy` settings.
         This wrapper will return the `OrderBy` setting of the facet, if not defined will use the default `*` one, and if not there either (strange) will fall back on the lexicographic one.
- [x] Create a `ValuesCollection` wrapper that implements the logic for the lexicographic and count order by.
  - [x] Use it when there is no search query.
  - [x] Use it when there is a search query with and without allowed typos.
  - [x] Do not change the original logic, only use a wrapper.
- [x] Add tests

Co-authored-by: Clément Renault <clement@meilisearch.com>
2024-03-13 14:36:14 +00:00
f3fc2bd01f Address some issues with preallocations 2024-03-13 15:22:14 +01:00
6fa3872268 Workflows: Fix reason param when benches are triggered from a comment. 2024-03-13 13:46:43 +01:00
6c9823d7bb Add tests to sortFacetValuesBy count 2024-03-13 11:59:39 +01:00
e0dac5a22f Simplify the algorithm by using the new facet values collection wrapper 2024-03-13 11:31:34 +01:00
b918b55c6b Introduce a new facet value collection wrapper to simply the usage 2024-03-13 11:31:34 +01:00
07b1d0edaf Merge #4475
4475: Allow running benchmarks without sending results to the dashboard r=irevoire a=dureuill

Adds a `--no-dashboard` option to avoid sending results to the dashboard.

Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2024-03-13 09:59:52 +00:00
306b25ad3a Move the searchForFacetValues struct into a dedicated module 2024-03-13 10:24:21 +01:00
9f7a4fbfeb Return the facets of a placeholder facet-search sorted by count 2024-03-13 10:09:01 +01:00
5ed7b6a0b2 Merge #4456
4456: Add Ollama as an embeddings provider r=dureuill a=jakobklemm

# Pull Request

## Related issue
[Related Discord Thread](https://discord.com/channels/1006923006964154428/1211977150316683305)

## What does this PR do?
- Adds Ollama as a provider of Embeddings besides HuggingFace and OpenAI under the name `ollama`
- Adds the environment variable `MEILI_OLLAMA_URL` to set the embeddings URL of an Ollama instance with a default value of `http://localhost:11434/api/embeddings` if no variable is set
- Changes some of the structs and functions in `openai.rs` to be public so that they can be shared.
- Added more error variants for Ollama specific errors
- It uses the model `nomic-embed-text` as default, but any string value is allowed, however it won't automatically check if the model actually exists or is an embedding model

Tested against Ollama version `v0.1.27` and the `nomic-embed-text` model.

## PR checklist
Please check if your PR fulfills the following requirements:
- [x] Does this PR fix an existing issue, or have you listed the changes applied in the PR description (and why they are needed)?
- [x] Have you read the contributing guidelines?
- [x] Have you made sure that the title is accurate and descriptive of the changes?

Co-authored-by: Jakob Klemm <jakob@jeykey.net>
Co-authored-by: Louis Dureuil <louis.dureuil@gmail.com>
2024-03-13 08:48:47 +00:00
ae67d5eef0 Update milli/src/vector/error.rs
Fix Meilisearch capitalization
2024-03-13 09:45:04 +01:00
88bc9556a9 Add Ollama dimension inference and add clearer errors
Instead of the user manually specifying the model dimensions it will now automatically get determined
Just like with hf.rs the word "test" gets embedded to determine the dimensions of the output
Add a dedicated error type for if the model doesn't exist (don't automatically pull it though) and set the fault of that error to be the user
2024-03-12 19:59:11 +01:00
ca4876fd10 Do not reindex when modifying unknown faceted field 2024-03-12 16:18:58 +01:00
d3a95ea2f6 Introduce a new OrderByMap struct to simplify the sort by usage 2024-03-12 13:56:56 +01:00
88d27949cd Add documentation for benchmarks 2024-03-12 10:56:16 +01:00
69c118ef76 Extract the facet order before extracting the facets values 2024-03-12 10:35:39 +01:00
d44e20aa89 Merge #4474
4474: Update cargo version r=irevoire a=curquiza

Fixes #4417

Co-authored-by: curquiza <clementine@meilisearch.com>
2024-03-12 09:27:22 +00:00
7b670a4afa Allow dry runs for benchmarks where reports are generated but not sent to the dashboard 2024-03-12 10:26:13 +01:00
fde209b7b6 Update cargo version 2024-03-12 10:20:07 +01:00
904b82a61d Merge #4473
4473: Bring back changes from v1.7.0 to main r=curquiza a=curquiza



Co-authored-by: ManyTheFish <many@meilisearch.com>
Co-authored-by: Louis Dureuil <louis@meilisearch.com>
Co-authored-by: Many the fish <many@meilisearch.com>
Co-authored-by: Tamo <tamo@meilisearch.com>
Co-authored-by: meili-bors[bot] <89034592+meili-bors[bot]@users.noreply.github.com>
2024-03-11 15:02:47 +00:00
8ec3e30d2b Merge branch 'main' into tmp-release-v1.7.0 2024-03-11 15:39:51 +01:00
0a59cb9734 Merge #4463
4463: Add tests when the field limit is reached r=Kerollmops a=irevoire

# Pull Request

## Related issue
Related to https://github.com/meilisearch/meilisearch/discussions/4429#discussioncomment-8689101

This user found out that the error message we’re supposed to return when the maximum number of attributes is reached is _not_ returned in some cases

## What does this PR do?
- This PR adds four tests around the maximum number of attributes:
  1. Add a document with u16::MAX + 1 fields - Meilisearch panics
  2. Add two documents which together adds up to u16::MAX + 1 fields - Meilisearch returns the expected error 
  3. Add a document with u16::MAX + 1 **nested fields** - No error message but the document isn’t indexed
  4. Add two documents which together add up to u16::MAX + 1 nested fields - Meilisearch doesn’t return any error but doesn’t index the document

## PR checklist
Please check if your PR fulfills the following requirements:
- [x] Does this PR fix an existing issue, or have you listed the changes applied in the PR description (and why they are needed)?
- [x] Have you read the contributing guidelines?
- [x] Have you made sure that the title is accurate and descriptive of the changes?

Thank you so much for contributing to Meilisearch!


Co-authored-by: Tamo <tamo@meilisearch.com>
2024-03-07 10:36:54 +00:00
f053c280e1 add tests when the field limit is reached 2024-03-06 18:42:41 +01:00
ee3076d5ba Merge #4462
4462: Divide threshold by ten r=dureuill a=ManyTheFish

Change the facet incremental vs bulk indexing threshold to better fit our user needs, it might be changed in the future if we have more insights


Co-authored-by: ManyTheFish <many@meilisearch.com>
2024-03-06 13:05:38 +00:00
ab1224bfa7 Merge #4458
4458: Replace logging timer by spans r=Kerollmops a=dureuill

- Remove logging timer dependency.
- Remplace last uses in search by spans

Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2024-03-05 16:43:23 +00:00
eefc1c421e Merge #4459
4459: Put a bound on OpenAI timeout r=dureuill a=dureuill

# Pull Request

## Related issue
Fixes #4460 

## What does this PR do?
- Makes sure that the timeout of the openai embedder is limited to max 1min, rather than the prior 15min+



Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2024-03-05 15:18:51 +00:00
4d42a7af7c Merge #4445
4445: Add subcommand to run benchmarks r=irevoire a=dureuill

# Pull Request

## Related issue
Not user-facing, no issue

## What does this PR do?
- Adds a new `cargo xtask bench` subcommand that can run one or multiple workload files and report the results to a server
- A workload file is a JSON file with a specific schema
- Refactor our use of the `vergen` crate:
  - update to the beta `vergen-git2` crate
  - VERGEN_GIT_SEMVER_LIGHTWEIGHT => VERGEN_GIT_DESCRIBE
  - factor logic in a single `build-info` crate that is used both by meilisearch and xtask (prevents vergen variables from overriding themselves)
  - checked that defining the variables by hand when no git repo is available (docker build case) still works.
- Add CI to run `cargo xtask bench`

Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2024-03-05 14:03:57 +00:00
7408db2a46 Meilisearch: fix date formatting 2024-03-05 14:56:48 +01:00
663629a9d6 Remove unused build dependency from xtask
Co-authored-by: Tamo <tamo@meilisearch.com>
2024-03-05 14:45:06 +01:00
15c38dca78 Output RFC 3339 dates where we can
Co-authored-by: Tamo <tamo@meilisearch.com>
2024-03-05 14:44:48 +01:00
7ee20b0895 Refactor xtask bench 2024-03-05 14:42:06 +01:00
0c216048b5 Cap timeout duration 2024-03-05 12:19:25 +01:00
36d17110d8 openai: Handle BAD_GETAWAY, be more resilient to failure 2024-03-05 12:18:54 +01:00
bdd428c22e Merge #4450
4450: Add the content type in the webhook + improve the test r=Kerollmops a=irevoire

# Pull Request

## Related issue
Fixes https://github.com/meilisearch/meilisearch/issues/4436

## What does this PR do?
- Specify the content type of the webhook
- Ensure it’s the case in the test

Co-authored-by: Tamo <tamo@meilisearch.com>
2024-03-05 10:36:53 +00:00
b130917933 add the content type in the webhook + improve the test 2024-03-05 11:22:29 +01:00
25f64ce7df Replace logging timer by spans 2024-03-05 11:05:42 +01:00
adcd848809 CI: Add bench workflows 2024-03-05 11:02:05 +01:00
84ae0cd456 Merge #4457
4457: Bump mio from 0.8.9 to 0.8.11 r=Kerollmops a=dependabot[bot]

Bumps [mio](https://github.com/tokio-rs/mio) from 0.8.9 to 0.8.11.
<details>
<summary>Changelog</summary>
<p><em>Sourced from <a href="https://github.com/tokio-rs/mio/blob/master/CHANGELOG.md">mio's changelog</a>.</em></p>
<blockquote>
<h1>0.8.11</h1>
<ul>
<li>Fix receiving IOCP events after deregistering a Windows named pipe
(<a href="https://redirect.github.com/tokio-rs/mio/pull/1760">tokio-rs/mio#1760</a>, backport pr:
<a href="https://redirect.github.com/tokio-rs/mio/pull/1761">tokio-rs/mio#1761</a>).</li>
</ul>
<h1>0.8.10</h1>
<h2>Added</h2>
<ul>
<li>Solaris support
(<a href="https://redirect.github.com/tokio-rs/mio/pull/1724">tokio-rs/mio#1724</a>).</li>
</ul>
</blockquote>
</details>
<details>
<summary>Commits</summary>
<ul>
<li><a href="0328bdef90"><code>0328bde</code></a> Release v0.8.11</li>
<li><a href="7084498512"><code>7084498</code></a> Fix warnings</li>
<li><a href="90d4fe00df"><code>90d4fe0</code></a> named-pipes: fix receiving IOCP events after deregister</li>
<li><a href="c710a307f8"><code>c710a30</code></a> Add v0.8.x to the CI</li>
<li><a href="c29e21c244"><code>c29e21c</code></a> Release v0.8.10</li>
<li><a href="f6a20da1c8"><code>f6a20da</code></a> Add Solaris operating system support (<a href="https://redirect.github.com/tokio-rs/mio/issues/1724">#1724</a>)</li>
<li>See full diff in <a href="https://github.com/tokio-rs/mio/compare/v0.8.9...v0.8.11">compare view</a></li>
</ul>
</details>
<br />


[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=mio&package-manager=cargo&previous-version=0.8.9&new-version=0.8.11)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores)

Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting ``@dependabot` rebase`.

[//]: # (dependabot-automerge-start)
[//]: # (dependabot-automerge-end)

---

<details>
<summary>Dependabot commands and options</summary>
<br />

You can trigger Dependabot actions by commenting on this PR:
- ``@dependabot` rebase` will rebase this PR
- ``@dependabot` recreate` will recreate this PR, overwriting any edits that have been made to it
- ``@dependabot` merge` will merge this PR after your CI passes on it
- ``@dependabot` squash and merge` will squash and merge this PR after your CI passes on it
- ``@dependabot` cancel merge` will cancel a previously requested merge and block automerging
- ``@dependabot` reopen` will reopen this PR if it is closed
- ``@dependabot` close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually
- ``@dependabot` show <dependency name> ignore conditions` will show all of the ignore conditions of the specified dependency
- ``@dependabot` ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself)
- ``@dependabot` ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself)
- ``@dependabot` ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)
You can disable automated security fix PRs for this repo from the [Security Alerts page](https://github.com/meilisearch/meilisearch/network/alerts).

</details>

Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2024-03-05 09:35:17 +00:00
eee46b7537 Add first workloads 2024-03-05 10:13:11 +01:00
55f60a3638 Update .gitignore
- Ignore `/bench` directory for git purposes
- Ignore benchmark DB
2024-03-05 10:12:52 +01:00
c608b3f9b5 Factor vergen stuff to a build-info crate 2024-03-05 10:11:43 +01:00
86ce843f3d Add cargo xtask bench 2024-03-05 10:11:43 +01:00
b11df7ec34 Meilisearch: fix some wrong spans 2024-03-05 10:11:43 +01:00
6862caef64 Span Stats compute self-time 2024-03-05 10:11:43 +01:00
f75c7ac979 Compile xtask in --release 2024-03-05 10:11:43 +01:00
f07069094b Bump mio from 0.8.9 to 0.8.11
Bumps [mio](https://github.com/tokio-rs/mio) from 0.8.9 to 0.8.11.
- [Release notes](https://github.com/tokio-rs/mio/releases)
- [Changelog](https://github.com/tokio-rs/mio/blob/master/CHANGELOG.md)
- [Commits](https://github.com/tokio-rs/mio/compare/v0.8.9...v0.8.11)

---
updated-dependencies:
- dependency-name: mio
  dependency-type: indirect
...

Signed-off-by: dependabot[bot] <support@github.com>
2024-03-04 22:03:25 +00:00
eada6de261 Divide threshold by ten 2024-03-04 18:02:54 +01:00
d3004d8040 Implemented Ollama as an embeddings provider
Initial prototype of Ollama embeddings actually working, error handlign / retries still missing.

Allow model to be any String and require dimensions parameter

Fixed rustfmt formatting issues

There were some formatting issues in the initial PR and this should not make the changes comply with the Rust style guidelines

Because I accidentally didn't follow the style guide for commits in my commit messages I squashed them into one to comply
2024-03-04 15:09:43 +01:00
f4a6261dea Merge #4453
4453: Don't test on nightly r=dureuill a=dureuill

# Pull Request

## Related issue
Fixes #4441 better 😅 

## What does this PR do?
- No longer run tests on nightly

The motivation for this change is that we are now updating Rust at fixed points in time, and so no longer need nightly runs to ensure that a change won't get into stable and break our build at the worst possible moment.


Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2024-02-29 14:41:59 +00:00
9806a3e5f6 Don't test on nightly 2024-02-29 14:24:50 +01:00
a96b45dda7 Merge #4451
4451: Fix nightly build r=dureuill a=dureuill

# Pull Request

## Related issue
Fixes #4441 

## What does this PR do?
- Change imports following https://github.com/rust-lang/rust/pull/117772

## Note

This one is going to be annoying a bit until the lint stabilizes:

- We only get the warning on nightly, so we will discover them when it runs in the CI that uses the nightly compiler (not on regular PRs)
- There's the case of `TryInto`/`TryFrom` traits. They have been added to the prelude in Rust edition 2021, so it means that `use`ing them is a warning on nightly for 2021 edition crates (most crates), but not `use`ing them is an error anywhere for 2018 Rust edition crates, such as `milli`

Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2024-02-29 07:20:22 +00:00
452a343a2b Fix imports 2024-02-28 18:09:40 +01:00
b87485e80d Merge #4433
4433: Enhance facet incremental r=Kerollmops a=ManyTheFish

# Pull Request

## Related issue
Fixes #4367
Fixes #4409

## What does this PR do?

- Add a test reproducing #4409
- Fix #4409 by removing a document from a level only if it is no more present in all the linked sub-level nodes
- Optimize facet Incremental indexing by creating or deleting a complete level once per field id instead of for each facet value
- Optimize facet Incremental indexing by doing the additions and the deletions in the same process instead of doing them separately


Co-authored-by: ManyTheFish <many@meilisearch.com>
2024-02-28 15:28:46 +00:00
147a67dc82 Merge #4446
4446: Do not omit vectors when importing a dump r=irevoire a=dureuill

# Pull Request

## Related issue
Fixes #4447 

## What does this PR do?
- Correctly populate the maps of embedders before starting the indexing operations, while importing a dump


Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2024-02-27 09:11:00 +00:00
716ffc07ee Build the embedders when importing a dump 2024-02-26 22:15:57 +01:00
b005eb3289 Merge #4435
4435: Make update file deletion atomic r=Kerollmops a=irevoire

# Pull Request

## Related issue
Fixes https://github.com/meilisearch/meilisearch/issues/4432
Fixes https://github.com/meilisearch/meilisearch/issues/4438 by adding the logs the user asked

## What does this PR do?
- Adds a bunch of logs to help debug this kind of issue in the future
- Delete the update files AFTER committing the update in the `index-scheduler` (thus, if a restart happens, we are able to re-process the batch successfully)
- Multi-thread the deletion of all update files.


Co-authored-by: Tamo <tamo@meilisearch.com>
2024-02-26 17:54:40 +00:00
9e664d87eb Merge #4443
4443: Add GPU analytics r=dureuill a=dureuill

# Pull Request

## Related issue

Adds analytics indicating whether Meilisearch  was compiled with the `milli/cuda` feature.

Cc `@macraig` 

Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2024-02-26 17:13:45 +00:00
6dcb5219a0 Merge #4442
4442: Send custom task r=ManyTheFish a=irevoire

This PR has already been merged on main but was supposed to be merged on `release-v1.7.0` thus we need to merge it a second time; sorry 😓 

### This PR implements the necessary parameters for the High Availability

Introduce a new CLI flag called `--experimental-replication-parameters` that changes a few behaviors in the engine:
- [The auto-deletion of tasks is disabled](https://specs.meilisearch.com/specifications/text/0060-tasks-api.html#_2-technical-details)
- Upon registering a task, you can choose its task ID by sending a new header: `TaskId: 456645`. It must be a valid number, which must be superior to the last task id ever seen.
- Add the ability to « dry-register » a task. That means meilisearch will answer to you with a valid task ID like everything went well, but won’t actually write anything in the database. To do that, you need to use the `DryRun: true` header.
- Specification’s here: https://github.com/meilisearch/specifications/pull/266

Co-authored-by: Tamo <tamo@meilisearch.com>
2024-02-26 15:20:16 +00:00
5e83bac448 Fix PR comments 2024-02-26 15:40:15 +01:00
0562818c2a fix and remove the file-store hack of /dev/null 2024-02-26 13:59:41 +01:00
a478392b7a create a test with the dry-run parameter enabled 2024-02-26 13:59:41 +01:00
bbf3fb88ca rename the cli parameter 2024-02-26 13:59:40 +01:00
60510e037b update the discussion link 2024-02-26 13:58:04 +01:00
36c27a18a1 implement the dry run ha parameter 2024-02-26 13:58:04 +01:00
1eb1c043b5 disable the auto deletion of tasks when the ha mode is enabled 2024-02-26 13:58:04 +01:00
507739bd98 add an experimental cli parameter to allow specifying your task id 2024-02-26 13:58:03 +01:00
eb25b07390 let you specify your task id 2024-02-26 13:56:31 +01:00
938149f814 Merge #4042
4042: Implements the new replication parameters r=ManyTheFish a=irevoire

### This PR implements the necessary parameters for the High Availability

- [ ] Update the spec

Introduce a new CLI flag called `--experimental-replication-parameters` that changes a few behaviors in the engine:
- [The auto-deletion of tasks is disabled](https://specs.meilisearch.com/specifications/text/0060-tasks-api.html#_2-technical-details)
- Upon registering a task, you can choose its task ID by sending a new header: `TaskId: 456645`. It must be a valid number, which must be superior to the last task id ever seen.
- Add the ability to « dry-register » a task. That means meilisearch will answer to you with a valid task ID like everything went well, but won’t actually write anything in the database. To do that, you need to use the `DryRun: true` header.

----

Old prototype `prototype-custom-task-id-0`:
-  Adds the capability to specify your own task ID via the `TaskId` http header
- Make the task IDs a u64 instead of a u32


Co-authored-by: Tamo <tamo@meilisearch.com>
2024-02-26 11:37:34 +00:00
066a7a3cde takes only one read transaction per thread 2024-02-26 10:43:04 +01:00
55796406c5 Add GPU analytics 2024-02-26 10:41:47 +01:00
eb90f0b4fb fix and remove the file-store hack of /dev/null 2024-02-26 10:19:07 +01:00
c2e2003a80 create a test with the dry-run parameter enabled 2024-02-22 15:51:47 +01:00
91cdd502f8 When processing tasks, make the update file deletion atomic 2024-02-22 14:56:22 +01:00
a493a50825 Fix clippy 2024-02-22 14:53:33 +01:00
9d1f489a37 Fix facet incremental indexing 2024-02-21 18:42:16 +01:00
693ba8dd15 rename the cli parameter 2024-02-21 14:33:40 +01:00
e1a3eed1eb update the discussion link 2024-02-21 12:30:28 +01:00
05ae291989 implement the dry run ha parameter 2024-02-21 11:21:26 +01:00
6ba9994916 disable the auto deletion of tasks when the ha mode is enabled 2024-02-20 12:23:39 +01:00
01ae46dd80 add an experimental cli parameter to allow specifying your task id 2024-02-20 11:24:44 +01:00
12f5389ba7 Merge #4416
4416: Create automation when creating Milestone to create update-version issue r=curquiza a=curquiza

Following our discussion `@irevoire` -> we miss reminder to update cargo version BEFORE rc0

Issue template [here](https://github.com/meilisearch/engine-team/blob/main/issue-templates/update-version-issue.md)

Co-authored-by: curquiza <clementine@meilisearch.com>
2024-02-20 08:47:29 +00:00
9ee4f55e6c let you specify your task id 2024-02-19 14:29:33 +01:00
865b415b3f Add test rerpoducing bug 2024-02-15 16:00:48 +01:00
5ee6aaddc4 Merge #4418
4418: Output logs to stderr r=dureuill a=irevoire

Output the logs to `stderr` instead of `stdout`. This was introduced in the `v1.7.0-rc.0` and is a bug; logs should always be outputted to stderr.

Fix #4419

Co-authored-by: Tamo <tamo@meilisearch.com>
2024-02-15 14:31:37 +00:00
4148d391b8 move logs to stderr 2024-02-15 15:24:16 +01:00
88c6165e20 Merge #4410
4410: Implement the experimental log mode cli flag and log level updates at runtime r=dureuill a=irevoire

# Pull Request
This PR fixes two issues at once because they’re highly correlated in the codebase.

## Related issue
Fixes https://github.com/meilisearch/meilisearch/issues/4415
Fixes https://github.com/meilisearch/meilisearch/issues/4413

## What does this PR do?
- It makes the fmt logger configurable to output json or human-readable logs (like we already do today)
- It moves the fmt logger under a `reload` layer so we can update its targets at runtime
- Add the possibility to stream logs in the json mode
- Adds an analytics for the new CLI flag

Co-authored-by: Tamo <tamo@meilisearch.com>
2024-02-15 10:01:06 +00:00
d097431113 Update meilisearch/src/option.rs
Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2024-02-15 10:58:43 +01:00
1f8af81ba9 update the log mode discussion link 2024-02-15 10:32:48 +01:00
5d3bad4120 Update meilisearch/src/option.rs
Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2024-02-15 10:31:23 +01:00
d34692e30b Merge #4365
4365: Update charabia r=dureuill a=ManyTheFish

Update Charabia v0.8.7,

- Add Vietnamese Normalization (Ð and Đ into d)

Fixes #4357

Charabia versions:
- https://github.com/meilisearch/charabia/releases/tag/v0.8.6
- https://github.com/meilisearch/charabia/releases/tag/v0.8.7

Co-authored-by: ManyTheFish <many@meilisearch.com>
2024-02-14 16:57:25 +00:00
024de0dcf8 Create automation when creating Milestone to create update-version issue 2024-02-14 17:36:47 +01:00
a081da0d90 add support for the json format in the stream route 2024-02-14 15:34:39 +01:00
78e04520fc Update charabia version 2024-02-14 15:16:16 +01:00
72c1674a31 Merge #4350
4350: Make several indexing optimizations r=Kerollmops a=ManyTheFish

# Summary

Implement several enhancements to reduce the indexing time.

# Steps

- Compute the indexing chunk size dynamically based on the available threads and the data size
- Remove the merging step before the writing step and merge at the writing time
- Remove append function
- Make Facet search indexing incremental

# Running Indexing process

## `main`
Each type of data is written after a merging phase:
![Capture d’écran 2024-01-23 à 10 18 08](https://github.com/meilisearch/meilisearch/assets/6482087/6203c3ce-407c-46b4-8b83-04282da1bb16)

> Highlighted parts are the writings

## `remove-merging-phase-from-indexing`
When the extraction of a chunk is finished, the data is written:
![Capture d’écran 2024-01-23 à 10 18 18](https://github.com/meilisearch/meilisearch/assets/6482087/ab1307b4-d0a9-42ac-abbb-fdeb27ddf0d4)

> Highlighted parts are the writings

## Related

This PR removes the appending writes on several indexing parts, which may fix https://github.com/meilisearch/meilisearch/issues/4300. However, all of the appending writes are not removed. There are 2 remaining calls that could trigger this bug:
- When [putting embedders in the settings](b6fc181993/milli/src/update/settings.rs (L996))
- when [bulk indexing the facets](b6fc181993/milli/src/update/facet/bulk.rs (L150))


Co-authored-by: ManyTheFish <many@meilisearch.com>
Co-authored-by: Louis Dureuil <louis@meilisearch.com>
Co-authored-by: Many the fish <many@meilisearch.com>
2024-02-14 14:12:48 +00:00
03bb6372af Change is_batchable_with by mergeable_with 2024-02-14 11:50:22 +01:00
3beda8833d Fix and add logs 2024-02-14 11:46:30 +01:00
3b6544db6d Implement the experimental log mode cli flag 2024-02-13 18:09:15 +01:00
55e942cd45 buggy 2024-02-13 15:26:30 +01:00
48026aa75c fix PR comments 2024-02-13 15:19:01 +01:00
e5e811e2c9 Update milli/src/update/index_documents/extract/mod.rs
Co-authored-by: Clément Renault <clement@meilisearch.com>
2024-02-13 14:22:21 +01:00
55de96f74e Update milli/src/update/facet/mod.rs
Co-authored-by: Clément Renault <clement@meilisearch.com>
2024-02-13 14:22:10 +01:00
82b43e9a7f Merge #4400
4400: Upgrade rustls to 0.21.10 and ring to 0.17 r=curquiza a=hack3ric

# Pull Request

## What does this PR do?
- Upgrade dependencies that uses ring 0.16 so that they rely on ring 0.17 instead
- Use rustls 0.21 for actix-{http,tls}, since newer versions of rustls uses ring 0.17
- Fix some trivial breaking API changes caused by above

## PR checklist
Please check if your PR fulfills the following requirements:
- [x] Does this PR fix an existing issue, or have you listed the changes applied in the PR description (and why they are needed)?
- [x] Have you read the contributing guidelines?
- [x] Have you made sure that the title is accurate and descriptive of the changes?

Thank you so much for contributing to Meilisearch!


Co-authored-by: Eric Long <i@hack3r.moe>
2024-02-12 13:17:40 +00:00
15dafde21d Merge #4401
4401: Update version for the next release (v1.7.0) in Cargo.toml r=irevoire a=meili-bot

⚠️ This PR is automatically generated. Check the new version is the expected one and Cargo.lock has been updated before merging.

Co-authored-by: irevoire <irevoire@users.noreply.github.com>
2024-02-12 10:17:10 +00:00
290f6d15e7 Update version for the next release (v1.7.0) in Cargo.toml 2024-02-12 10:15:00 +00:00
39c83cb3d9 fix clippy 2024-02-12 09:12:54 +01:00
7efb1cae11 yield in loop when the channel is not disconnected 2024-02-12 09:12:54 +01:00
7877788510 fix logs 2024-02-12 09:12:54 +01:00
c02d585f5b Upgrade rustls to 0.21.10 and ring to 0.17 2024-02-12 14:32:29 +08:00
be1b054b05 Compute chunk size based on the input data size ant the number of indexing threads 2024-02-08 17:28:37 +01:00
023c2d755f Merge #4391
4391: Tracing r=dureuill a=irevoire

# Pull Request

- [ ] Hide the parameters of the process batch
- [x] Make actix-web trace every call on every route
- [x] Remove all `env_logger`/`logs` dependencies
- [x] Be able to enable or disable the memory measurement using the `/logs` route parameters

See the following product discussion: https://github.com/orgs/meilisearch/discussions/721

Supersedes https://github.com/meilisearch/meilisearch/pull/4338

## Related issue
Fixes https://github.com/meilisearch/meilisearch/issues/4317

## What does this PR do?

Update the format of the logs from:
```
[2024-02-06T14:54:11Z INFO  actix_server::builder] starting 10 workers
```

to

```
2024-02-06T13:58:14.710803Z  INFO actix_server::builder: 200: starting 10 workers
```

First, run meilisearch with the route enabled via the feature flag:
- `cargo run --experimental-enable-logs-route`
- Or at runtime by sending the following payload:
```
curl \
  -X PATCH 'http://localhost:7700/experimental-features/' \
  -H 'Content-Type: application/json'  \
--data-binary '{
    "logsRoute": true
  }'
```

Then gather data from meilisearch by calling for example:
```
curl \
	-X POST http://localhost:7700/logs \
	-H 'Content-Type: application/json' \
	--data-binary '{
	    "mode": "fmt",
            "target": "milli=trace"
    }'
```

Once your operation is over, tell meilisearch to stop the route:
```
curl \
	-X DELETE http://localhost:7700/logs
```

----

In the case you’re profiling code, you will be interested by the next command that converts the output of the route to a format that the firefox profiler can understand.

```bash
cargo run --release --bin trace-to-firefox -- 2024-01-17_17:07:55-indexing-trace.json
```

Then go to https://profiler.firefox.com and load it.
Note that we can also share the profiles using the https://share.firefox.dev website.


Co-authored-by: Louis Dureuil <louis@meilisearch.com>
Co-authored-by: Clément Renault <clement@meilisearch.com>
Co-authored-by: Tamo <tamo@meilisearch.com>
2024-02-08 14:16:56 +00:00
407ad753ed rust fmt 2024-02-08 15:11:42 +01:00
285aa15d2f make the mode camelCase instead of lowercase 2024-02-08 15:04:06 +01:00
bf43a3f60a fix typo 2024-02-08 15:04:06 +01:00
2c88131bb1 rename the fmt mode to human 2024-02-08 15:04:06 +01:00
35aa9d5904 fix an error message 2024-02-08 15:04:06 +01:00
cfb3e6b51f update the actix-web trace 2024-02-08 15:04:06 +01:00
1502382316 use debug instead of debug_span 2024-02-08 15:04:06 +01:00
ef994d84d0 Change error messages and fix tests 2024-02-08 15:04:06 +01:00
1b74010e9e Remove "with_line_numbers" 2024-02-08 15:04:06 +01:00
08af0e690c Structures a bunch of logs 2024-02-08 15:04:06 +01:00
d71b77f18b Add panic hook to log panics 2024-02-08 15:04:06 +01:00
c443ed7e3f delete inner .gitignore 2024-02-08 15:04:06 +01:00
db722d201a Write entries into database downgraded to trace level 2024-02-08 15:04:05 +01:00
91eb67e981 logs route: make memory profiling toggling usable 2024-02-08 15:04:05 +01:00
902d700a24 Tracing trace: toggle the profiling of memory at runtime 2024-02-08 15:04:05 +01:00
f70a615ed9 update the github discussion links 2024-02-08 15:04:05 +01:00
7ff722b72e get rids of the log dependencies everywhere 2024-02-08 15:04:05 +01:00
bcf7909bba add a profile_memory parameter disabled by default 2024-02-08 15:04:05 +01:00
ceb211c515 move the /logs route to the /logs/stream route 2024-02-08 15:04:05 +01:00
f3c34d5b8c Simplify MemoryStats fetching 2024-02-08 15:04:05 +01:00
4de2db6786 add back the actix-web logs 2024-02-08 15:04:05 +01:00
661baa716b logs route profile mode: don't barf bytes if the buffer is not empty 2024-02-08 15:04:05 +01:00
02dcaf07db Replace the procfs by libproc 2024-02-08 15:04:05 +01:00
d78ada07b5 spanstats: change field names 2024-02-08 15:04:05 +01:00
bc097d90cb tracing-trace: Spanstats deserializable + public fields 2024-02-08 15:04:05 +01:00
b393823f36 Replace stats_alloc with procfs 2024-02-08 15:04:05 +01:00
e773dfa9ba get rids of log in milli and add logs for the bucket sort 2024-02-08 15:04:05 +01:00
f158e96fe7 fix the auth 2024-02-08 15:04:05 +01:00
e23ec4886d fix the tests and add tests on the experimental features 2024-02-08 15:04:03 +01:00
7793ba67a4 hide the route logs behind a feature flag 2024-02-08 15:03:33 +01:00
80774148fd handle and tests errors 2024-02-08 15:03:33 +01:00
bf5cea8b10 add a test 2024-02-08 15:03:33 +01:00
38e1c40f38 meilisearch: logs route disconnects in profile mode 2024-02-08 15:03:33 +01:00
afc0585c1c meilisearch: don't spawn a report everytime Meilisearch starts 2024-02-08 15:03:33 +01:00
0e7a411d4d tracing-trace: introduce TraceWriter, trace now only exposes the channel 2024-02-08 15:03:33 +01:00
0f327f2821 tracing-trace: implement Error on Error 2024-02-08 15:03:33 +01:00
77254765e8 get rids of env loggegr and fix the tests 2024-02-08 15:03:33 +01:00
ce6e6ec2c5 stops profiling in a file by default 2024-02-08 15:03:32 +01:00
91a8f74763 Add cancel log route 2024-02-08 15:03:32 +01:00
abaa72e2bf start handling reloads with profiling 2024-02-08 15:03:32 +01:00
3c3a258a22 start exposing the profiling layer 2024-02-08 15:03:32 +01:00
73e66d5a97 Add dummy log when calling tasks 2024-02-08 15:03:32 +01:00
b8da117b9c Simplify stream implementation 2024-02-08 15:03:32 +01:00
5e52107474 better than before??? 2024-02-08 15:03:32 +01:00
bcf1c4dae5 make it compile and runtime error 2024-02-08 15:03:32 +01:00
50f84d43f5 init commit 2024-02-08 15:03:32 +01:00
f76cc0806e WIP: first draft at introducing a new log route 2024-02-08 15:03:32 +01:00
2f1abd2c03 nelson is not used anymore 2024-02-08 15:03:32 +01:00
dedc91e2cf use json lines 2024-02-08 15:03:32 +01:00
a61d8c59ff Add span stats processor 2024-02-08 15:03:32 +01:00
6e23040464 Use with tokio channel in Meilisearch 2024-02-08 15:03:32 +01:00
8febbf64ce Switch to tokio channel 2024-02-08 15:03:32 +01:00
b141c82a04 Support Events in trace layer 2024-02-08 15:03:32 +01:00
cc79cd0b04 Switch to a single view indicating current usage 2024-02-08 15:03:32 +01:00
256538ccb9 Refactor memory handling and add markers 2024-02-08 15:03:31 +01:00
ca8990394e Remove the stats_alloc from the default features 2024-02-08 15:03:31 +01:00
83fb2949c3 Give the allocator to the tracer when necessary 2024-02-08 15:03:31 +01:00
6cf703387d Format the bytes as human readable bytes
Uses the same `byte_unit` version as `meilisearch`
2024-02-08 15:03:31 +01:00
771861599b Logging the memory usage over time 2024-02-08 15:03:31 +01:00
7e47cea0c4 Add tracing to Meilisearch 2024-02-08 15:03:31 +01:00
5d7061682e Add tracing to milli 2024-02-08 15:03:31 +01:00
02e6c8a440 Add tracing to index-scheduler 2024-02-08 15:03:31 +01:00
89401d097b Add tracing-trace 2024-02-08 15:03:30 +01:00
72ebac1fbb Merge #4388
4388: Cap the maximum memory of the grenad sorters r=curquiza a=Kerollmops

This PR clamps the memory usage of the grenad sorters to a reasonable maximum. Grenad sorters are opened on multiple threads at a time. This can result in higher memory usage than expected, even though it shouldn't consume more than the memory available.

Fixes #4152.

Co-authored-by: Clément Renault <clement@meilisearch.com>
2024-02-08 13:19:28 +00:00
a616a1d37b Merge #4389
4389: Stabilize scoreDetails r=dureuill a=dureuill

# Pull Request

## Related issue
Fixes #4359

## What does this PR do?

### User standpoint

- Users no longer need to enable the `scoreDetails` experimental feature to use `showRankingScoreDetails` in search queries.
- ⚠️ **Breaking change**: sending an object containing the key `"scoreDetails"` to the `/experimental-features` route is now an error. However, importing a dump of a database where that feature was enabled completes successfully.

### Implementation standpoint
- remove `scoreDetails` from the experimental features
- remove check on the experimental feature `scoreDetails` before accepting `showRankingScoreDetails`
- remove `scoreDetails` from the accepted fields in the `/experimental-features` route
- fix tests accordingly

## Manual tests

1. exported a dump with the `scoreDetails` feature enabled on `main`
    - tried to import the dump after the changes in this PR
    - the dump imported successfully
2. tried to make a search with `showRankingScoreDetails: true`
    - the ranking score details are displayed
    - an automated test case also exists and passes
3. tried to enable the `scoreDetails` in `/experimental-features`
    - get error message 
      ```
       Unknown field `scoreDetails`: expected one of `vectorStore`, `metrics`, `exportPuffinReports`
      ```

Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2024-02-08 10:40:00 +00:00
3e120619fa Merge #4375
4375: Feat: add new OpenAI models and ability to override dimensions r=dureuill a=Gosti

# Pull Request

Fixes #4394 

## Related discussion
https://github.com/orgs/meilisearch/discussions/677#discussioncomment-8306384

## What does this PR do?
- Add text-embedding-3-small
- Add text-embedding-3-large
- Add optional dimensions parameter for both new models


## Note
As the dimensions option is not available for text-embedding-ada-002 I've added a manual check to prevent, but I feel it could be implemented in a more idiomatic rust 

## PR checklist
Please check if your PR fulfills the following requirements:
- [x] Does this PR fix an existing issue, or have you listed the changes applied in the PR description (and why they are needed)?
- [x] Have you read the contributing guidelines?
- [x] Have you made sure that the title is accurate and descriptive of the changes?

Thank you so much for contributing to Meilisearch!


Co-authored-by: Gosti <gostitsog@gmail.com>
Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2024-02-07 16:20:15 +00:00
a1caac9bfb Correct distribution shifts for new models 2024-02-07 15:09:16 +01:00
88d03c56ab Don't accept dimensions of 0 (ever) or dimensions greater than the default dimensions of the model 2024-02-07 11:52:09 +01:00
32ee05ccef Fix default dimensions for models 2024-02-07 11:52:09 +01:00
74c180267e pass dimensions only when defined 2024-02-07 11:52:08 +01:00
517f5332d6 Allow actually passing dimensions for OpenAI source
-> make sure the settings change is rejected or the settings task fails when the specified model doesn't support
overriding `dimensions` and the passed `dimensions` differs from the model's default dimensions.
2024-02-07 11:51:44 +01:00
9ac5750096 Retrieve the overriden dimensions from the configuration when fetching settings 2024-02-07 11:51:44 +01:00
7ae4013478 Make sure the overriden dimensions are always used when embedding 2024-02-07 11:51:44 +01:00
fb705116a6 feat: add new models and ability to override dimensions 2024-02-07 11:51:42 +01:00
053306c0e7 Try with 500MiB 2024-02-07 11:24:43 +01:00
84235a63df Merge #4360
4360: fix readme broken links r=curquiza a=Elliot67

# Pull Request

## What does this PR do?
- fix some links in the readme

## PR checklist
Please check if your PR fulfills the following requirements:
- [x] Does this PR fix an existing issue, or have you listed the changes applied in the PR description (and why they are needed)?
- [x] Have you read the contributing guidelines?
- [x] Have you made sure that the title is accurate and descriptive of the changes?

Co-authored-by: Elliot Lintz <45725915+Elliot67@users.noreply.github.com>
Co-authored-by: gui machiavelli <hey@guimachiavelli.com>
2024-02-06 16:00:16 +00:00
29f8300ac7 Update README.md 2024-02-06 16:49:29 +01:00
05edd85d75 Stabilize scoreDetails 2024-02-06 11:15:19 +01:00
9eeb75d501 Clamp the max memory of the grenad sorters to a reasonable maximum 2024-02-06 10:47:04 +01:00
4792651462 Merge #4384
4384: Bump peter-evans/repository-dispatch from 2 to 3 r=curquiza a=dependabot[bot]

Bumps [peter-evans/repository-dispatch](https://github.com/peter-evans/repository-dispatch) from 2 to 3.
<details>
<summary>Release notes</summary>
<p><em>Sourced from <a href="https://github.com/peter-evans/repository-dispatch/releases">peter-evans/repository-dispatch's releases</a>.</em></p>
<blockquote>
<h2>Repository Dispatch v3.0.0</h2>
<p>⚙️  Updated runtime to Node.js 20</p>
<ul>
<li>The action now requires a minimum version of <a href="https://github.com/actions/runner/releases/tag/v2.308.0">v2.308.0</a> for the Actions runner. Update self-hosted runners to v2.308.0 or later to ensure compatibility.</li>
</ul>
<h2>What's Changed</h2>
<ul>
<li>Bump prettier to fix deps by <a href="https://github.com/peter-evans"><code>`@​peter-evans</code></a>` in <a href="https://redirect.github.com/peter-evans/repository-dispatch/pull/255">peter-evans/repository-dispatch#255</a></li>
<li>build(deps-dev): bump <code>`@​types/node</code>` from 18.17.12 to 18.17.14 by <a href="https://github.com/dependabot"><code>`@​dependabot</code></a>` in <a href="https://redirect.github.com/peter-evans/repository-dispatch/pull/257">peter-evans/repository-dispatch#257</a></li>
<li>build(deps-dev): bump <code>`@​vercel/ncc</code>` from 0.36.1 to 0.38.0 by <a href="https://github.com/dependabot"><code>`@​dependabot</code></a>` in <a href="https://redirect.github.com/peter-evans/repository-dispatch/pull/258">peter-evans/repository-dispatch#258</a></li>
<li>build(deps): bump actions/checkout from 3 to 4 by <a href="https://github.com/dependabot"><code>`@​dependabot</code></a>` in <a href="https://redirect.github.com/peter-evans/repository-dispatch/pull/259">peter-evans/repository-dispatch#259</a></li>
<li>build(deps-dev): bump <code>`@​types/node</code>` from 18.17.14 to 18.17.16 by <a href="https://github.com/dependabot"><code>`@​dependabot</code></a>` in <a href="https://redirect.github.com/peter-evans/repository-dispatch/pull/261">peter-evans/repository-dispatch#261</a></li>
<li>build(deps): bump <code>`@​actions/core</code>` from 1.10.0 to 1.10.1 by <a href="https://github.com/dependabot"><code>`@​dependabot</code></a>` in <a href="https://redirect.github.com/peter-evans/repository-dispatch/pull/262">peter-evans/repository-dispatch#262</a></li>
<li>build(deps-dev): bump jest-circus from 29.6.4 to 29.7.0 by <a href="https://github.com/dependabot"><code>`@​dependabot</code></a>` in <a href="https://redirect.github.com/peter-evans/repository-dispatch/pull/263">peter-evans/repository-dispatch#263</a></li>
<li>build(deps-dev): bump eslint from 8.48.0 to 8.49.0 by <a href="https://github.com/dependabot"><code>`@​dependabot</code></a>` in <a href="https://redirect.github.com/peter-evans/repository-dispatch/pull/264">peter-evans/repository-dispatch#264</a></li>
<li>Update distribution by <a href="https://github.com/actions-bot"><code>`@​actions-bot</code></a>` in <a href="https://redirect.github.com/peter-evans/repository-dispatch/pull/265">peter-evans/repository-dispatch#265</a></li>
<li>build(deps-dev): bump <code>`@​types/node</code>` from 18.17.16 to 18.17.18 by <a href="https://github.com/dependabot"><code>`@​dependabot</code></a>` in <a href="https://redirect.github.com/peter-evans/repository-dispatch/pull/266">peter-evans/repository-dispatch#266</a></li>
<li>build(deps-dev): bump eslint-plugin-github from 4.10.0 to 4.10.1 by <a href="https://github.com/dependabot"><code>`@​dependabot</code></a>` in <a href="https://redirect.github.com/peter-evans/repository-dispatch/pull/267">peter-evans/repository-dispatch#267</a></li>
<li>build(deps-dev): bump <code>`@​types/node</code>` from 18.17.18 to 18.18.0 by <a href="https://github.com/dependabot"><code>`@​dependabot</code></a>` in <a href="https://redirect.github.com/peter-evans/repository-dispatch/pull/268">peter-evans/repository-dispatch#268</a></li>
<li>build(deps-dev): bump eslint from 8.49.0 to 8.50.0 by <a href="https://github.com/dependabot"><code>`@​dependabot</code></a>` in <a href="https://redirect.github.com/peter-evans/repository-dispatch/pull/269">peter-evans/repository-dispatch#269</a></li>
<li>build(deps-dev): bump <code>`@​types/node</code>` from 18.18.0 to 18.18.3 by <a href="https://github.com/dependabot"><code>`@​dependabot</code></a>` in <a href="https://redirect.github.com/peter-evans/repository-dispatch/pull/271">peter-evans/repository-dispatch#271</a></li>
<li>build(deps-dev): bump eslint-plugin-prettier from 5.0.0 to 5.0.1 by <a href="https://github.com/dependabot"><code>`@​dependabot</code></a>` in <a href="https://redirect.github.com/peter-evans/repository-dispatch/pull/275">peter-evans/repository-dispatch#275</a></li>
<li>build(deps-dev): bump <code>`@​types/node</code>` from 18.18.3 to 18.18.5 by <a href="https://github.com/dependabot"><code>`@​dependabot</code></a>` in <a href="https://redirect.github.com/peter-evans/repository-dispatch/pull/274">peter-evans/repository-dispatch#274</a></li>
<li>build(deps-dev): bump eslint from 8.50.0 to 8.51.0 by <a href="https://github.com/dependabot"><code>`@​dependabot</code></a>` in <a href="https://redirect.github.com/peter-evans/repository-dispatch/pull/276">peter-evans/repository-dispatch#276</a></li>
<li>build(deps-dev): bump <code>`@​babel/traverse</code>` from 7.16.3 to 7.23.2 by <a href="https://github.com/dependabot"><code>`@​dependabot</code></a>` in <a href="https://redirect.github.com/peter-evans/repository-dispatch/pull/278">peter-evans/repository-dispatch#278</a></li>
<li>build(deps-dev): bump <code>`@​types/node</code>` from 18.18.5 to 18.18.6 by <a href="https://github.com/dependabot"><code>`@​dependabot</code></a>` in <a href="https://redirect.github.com/peter-evans/repository-dispatch/pull/279">peter-evans/repository-dispatch#279</a></li>
<li>build(deps-dev): bump <code>`@​vercel/ncc</code>` from 0.38.0 to 0.38.1 by <a href="https://github.com/dependabot"><code>`@​dependabot</code></a>` in <a href="https://redirect.github.com/peter-evans/repository-dispatch/pull/280">peter-evans/repository-dispatch#280</a></li>
<li>build(deps-dev): bump eslint from 8.51.0 to 8.52.0 by <a href="https://github.com/dependabot"><code>`@​dependabot</code></a>` in <a href="https://redirect.github.com/peter-evans/repository-dispatch/pull/281">peter-evans/repository-dispatch#281</a></li>
<li>build(deps-dev): bump <code>`@​types/node</code>` from 18.18.6 to 18.18.7 by <a href="https://github.com/dependabot"><code>`@​dependabot</code></a>` in <a href="https://redirect.github.com/peter-evans/repository-dispatch/pull/282">peter-evans/repository-dispatch#282</a></li>
<li>build(deps): bump actions/setup-node from 3 to 4 by <a href="https://github.com/dependabot"><code>`@​dependabot</code></a>` in <a href="https://redirect.github.com/peter-evans/repository-dispatch/pull/283">peter-evans/repository-dispatch#283</a></li>
<li>build(deps-dev): bump <code>`@​types/node</code>` from 18.18.7 to 18.18.8 by <a href="https://github.com/dependabot"><code>`@​dependabot</code></a>` in <a href="https://redirect.github.com/peter-evans/repository-dispatch/pull/284">peter-evans/repository-dispatch#284</a></li>
<li>build(deps-dev): bump <code>`@​types/node</code>` from 18.18.8 to 18.18.9 by <a href="https://github.com/dependabot"><code>`@​dependabot</code></a>` in <a href="https://redirect.github.com/peter-evans/repository-dispatch/pull/285">peter-evans/repository-dispatch#285</a></li>
<li>build(deps-dev): bump eslint from 8.52.0 to 8.53.0 by <a href="https://github.com/dependabot"><code>`@​dependabot</code></a>` in <a href="https://redirect.github.com/peter-evans/repository-dispatch/pull/286">peter-evans/repository-dispatch#286</a></li>
<li>build(deps-dev): bump prettier from 3.0.3 to 3.1.0 by <a href="https://github.com/dependabot"><code>`@​dependabot</code></a>` in <a href="https://redirect.github.com/peter-evans/repository-dispatch/pull/287">peter-evans/repository-dispatch#287</a></li>
<li>build(deps-dev): bump eslint from 8.53.0 to 8.54.0 by <a href="https://github.com/dependabot"><code>`@​dependabot</code></a>` in <a href="https://redirect.github.com/peter-evans/repository-dispatch/pull/289">peter-evans/repository-dispatch#289</a></li>
<li>build(deps-dev): bump <code>`@​types/node</code>` from 18.18.9 to 18.18.13 by <a href="https://github.com/dependabot"><code>`@​dependabot</code></a>` in <a href="https://redirect.github.com/peter-evans/repository-dispatch/pull/290">peter-evans/repository-dispatch#290</a></li>
<li>build(deps-dev): bump <code>`@​types/node</code>` from 18.18.13 to 18.19.0 by <a href="https://github.com/dependabot"><code>`@​dependabot</code></a>` in <a href="https://redirect.github.com/peter-evans/repository-dispatch/pull/291">peter-evans/repository-dispatch#291</a></li>
<li>build(deps-dev): bump <code>`@​types/node</code>` from 18.19.0 to 18.19.3 by <a href="https://github.com/dependabot"><code>`@​dependabot</code></a>` in <a href="https://redirect.github.com/peter-evans/repository-dispatch/pull/292">peter-evans/repository-dispatch#292</a></li>
<li>build(deps-dev): bump eslint from 8.54.0 to 8.55.0 by <a href="https://github.com/dependabot"><code>`@​dependabot</code></a>` in <a href="https://redirect.github.com/peter-evans/repository-dispatch/pull/293">peter-evans/repository-dispatch#293</a></li>
<li>build(deps-dev): bump prettier from 3.1.0 to 3.1.1 by <a href="https://github.com/dependabot"><code>`@​dependabot</code></a>` in <a href="https://redirect.github.com/peter-evans/repository-dispatch/pull/296">peter-evans/repository-dispatch#296</a></li>
<li>build(deps): bump actions/upload-artifact from 3 to 4 by <a href="https://github.com/dependabot"><code>`@​dependabot</code></a>` in <a href="https://redirect.github.com/peter-evans/repository-dispatch/pull/295">peter-evans/repository-dispatch#295</a></li>
<li>build(deps-dev): bump eslint from 8.55.0 to 8.56.0 by <a href="https://github.com/dependabot"><code>`@​dependabot</code></a>` in <a href="https://redirect.github.com/peter-evans/repository-dispatch/pull/297">peter-evans/repository-dispatch#297</a></li>
<li>build(deps-dev): bump eslint-plugin-prettier from 5.0.1 to 5.1.1 by <a href="https://github.com/dependabot"><code>`@​dependabot</code></a>` in <a href="https://redirect.github.com/peter-evans/repository-dispatch/pull/298">peter-evans/repository-dispatch#298</a></li>
<li>build(deps-dev): bump eslint-plugin-prettier from 5.1.1 to 5.1.2 by <a href="https://github.com/dependabot"><code>`@​dependabot</code></a>` in <a href="https://redirect.github.com/peter-evans/repository-dispatch/pull/299">peter-evans/repository-dispatch#299</a></li>
<li>build(deps-dev): bump <code>`@​types/node</code>` from 18.19.3 to 18.19.4 by <a href="https://github.com/dependabot"><code>`@​dependabot</code></a>` in <a href="https://redirect.github.com/peter-evans/repository-dispatch/pull/300">peter-evans/repository-dispatch#300</a></li>
<li>build(deps-dev): bump eslint-plugin-prettier from 5.1.2 to 5.1.3 by <a href="https://github.com/dependabot"><code>`@​dependabot</code></a>` in <a href="https://redirect.github.com/peter-evans/repository-dispatch/pull/301">peter-evans/repository-dispatch#301</a></li>
<li>build(deps-dev): bump <code>`@​types/node</code>` from 18.19.4 to 18.19.6 by <a href="https://github.com/dependabot"><code>`@​dependabot</code></a>` in <a href="https://redirect.github.com/peter-evans/repository-dispatch/pull/302">peter-evans/repository-dispatch#302</a></li>
<li>build(deps-dev): bump prettier from 3.1.1 to 3.2.4 by <a href="https://github.com/dependabot"><code>`@​dependabot</code></a>` in <a href="https://redirect.github.com/peter-evans/repository-dispatch/pull/303">peter-evans/repository-dispatch#303</a></li>
<li>build(deps-dev): bump <code>`@​types/node</code>` from 18.19.6 to 18.19.8 by <a href="https://github.com/dependabot"><code>`@​dependabot</code></a>` in <a href="https://redirect.github.com/peter-evans/repository-dispatch/pull/304">peter-evans/repository-dispatch#304</a></li>
<li>feat: update runtime to node 20 by <a href="https://github.com/peter-evans"><code>`@​peter-evans</code></a>` in <a href="https://redirect.github.com/peter-evans/repository-dispatch/pull/305">peter-evans/repository-dispatch#305</a></li>
</ul>
<!-- raw HTML omitted -->
</blockquote>
<p>... (truncated)</p>
</details>
<details>
<summary>Commits</summary>
<ul>
<li><a href="ff45666b94"><code>ff45666</code></a> feat: update runtime to node 20 (<a href="https://redirect.github.com/peter-evans/repository-dispatch/issues/305">#305</a>)</li>
<li><a href="a4a90276d0"><code>a4a9027</code></a> build(deps-dev): bump <code>`@​types/node</code>` from 18.19.6 to 18.19.8 (<a href="https://redirect.github.com/peter-evans/repository-dispatch/issues/304">#304</a>)</li>
<li><a href="2605253283"><code>2605253</code></a> build(deps-dev): bump prettier from 3.1.1 to 3.2.4 (<a href="https://redirect.github.com/peter-evans/repository-dispatch/issues/303">#303</a>)</li>
<li><a href="ab3258eeef"><code>ab3258e</code></a> build(deps-dev): bump <code>`@​types/node</code>` from 18.19.4 to 18.19.6 (<a href="https://redirect.github.com/peter-evans/repository-dispatch/issues/302">#302</a>)</li>
<li><a href="240bc73193"><code>240bc73</code></a> build(deps-dev): bump eslint-plugin-prettier from 5.1.2 to 5.1.3 (<a href="https://redirect.github.com/peter-evans/repository-dispatch/issues/301">#301</a>)</li>
<li><a href="8aa15c54a0"><code>8aa15c5</code></a> build(deps-dev): bump <code>`@​types/node</code>` from 18.19.3 to 18.19.4 (<a href="https://redirect.github.com/peter-evans/repository-dispatch/issues/300">#300</a>)</li>
<li><a href="22aa07cf23"><code>22aa07c</code></a> build(deps-dev): bump eslint-plugin-prettier from 5.1.1 to 5.1.2 (<a href="https://redirect.github.com/peter-evans/repository-dispatch/issues/299">#299</a>)</li>
<li><a href="ba0298574b"><code>ba02985</code></a> build(deps-dev): bump eslint-plugin-prettier from 5.0.1 to 5.1.1 (<a href="https://redirect.github.com/peter-evans/repository-dispatch/issues/298">#298</a>)</li>
<li><a href="accfd7b5bf"><code>accfd7b</code></a> build(deps-dev): bump eslint from 8.55.0 to 8.56.0 (<a href="https://redirect.github.com/peter-evans/repository-dispatch/issues/297">#297</a>)</li>
<li><a href="3c7d964ae9"><code>3c7d964</code></a> build(deps): bump actions/upload-artifact from 3 to 4 (<a href="https://redirect.github.com/peter-evans/repository-dispatch/issues/295">#295</a>)</li>
<li>Additional commits viewable in <a href="https://github.com/peter-evans/repository-dispatch/compare/v2...v3">compare view</a></li>
</ul>
</details>
<br />


[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=peter-evans/repository-dispatch&package-manager=github_actions&previous-version=2&new-version=3)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores)

You can trigger a rebase of this PR by commenting ``@dependabot` rebase`.

[//]: # (dependabot-automerge-start)
[//]: # (dependabot-automerge-end)

---

<details>
<summary>Dependabot commands and options</summary>
<br />

You can trigger Dependabot actions by commenting on this PR:
- ``@dependabot` rebase` will rebase this PR
- ``@dependabot` recreate` will recreate this PR, overwriting any edits that have been made to it
- ``@dependabot` merge` will merge this PR after your CI passes on it
- ``@dependabot` squash and merge` will squash and merge this PR after your CI passes on it
- ``@dependabot` cancel merge` will cancel a previously requested merge and block automerging
- ``@dependabot` reopen` will reopen this PR if it is closed
- ``@dependabot` close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually
- ``@dependabot` show <dependency name> ignore conditions` will show all of the ignore conditions of the specified dependency
- ``@dependabot` ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself)
- ``@dependabot` ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself)
- ``@dependabot` ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)


</details>

Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2024-02-05 14:13:35 +00:00
58c3501b54 Bump peter-evans/repository-dispatch from 2 to 3
Bumps [peter-evans/repository-dispatch](https://github.com/peter-evans/repository-dispatch) from 2 to 3.
- [Release notes](https://github.com/peter-evans/repository-dispatch/releases)
- [Commits](https://github.com/peter-evans/repository-dispatch/compare/v2...v3)

---
updated-dependencies:
- dependency-name: peter-evans/repository-dispatch
  dependency-type: direct:production
  update-type: version-update:semver-major
...

Signed-off-by: dependabot[bot] <support@github.com>
2024-02-01 17:05:50 +00:00
ff76d8f21a Merge #4382
4382: Bring back changes from `release-v1.6.1` into `main` r=curquiza a=dureuill

Bring back changes from release-v1.6.1 into main

Supersedes https://github.com/meilisearch/meilisearch/pull/4380 and #4381 

Third time's the charm

Co-authored-by: curquiza <curquiza@users.noreply.github.com>
Co-authored-by: Louis Dureuil <louis@meilisearch.com>
Co-authored-by: Tamo <tamo@meilisearch.com>
Co-authored-by: Morgane Dubus <30866152+mdubus@users.noreply.github.com>
2024-02-01 11:16:31 +00:00
698ea5139d Update Cargo.lock 2024-02-01 10:40:23 +01:00
880e790bff Update Cargo.toml 2024-02-01 10:33:27 +01:00
fbf5f2a392 Don't use a runtime in extract_embedder, use it only for OpenAI 2024-02-01 10:33:27 +01:00
1555870088 Truncate HuggingFace vectors that are too long 2024-02-01 10:33:27 +01:00
9f8f3105d5 make clippy happy 2024-02-01 10:33:27 +01:00
318843aacd add a bunch of tests and fix the error message when adding the geosearch as filterable/sortable while there is malformed documents in the DB 2024-02-01 10:33:27 +01:00
6d111139b5 Add test 2024-02-01 10:33:27 +01:00
dff2707471 Use MatchingWords from keyword search instead of the one from vector search 2024-02-01 10:33:27 +01:00
c57f7f7379 Update version for the next release (v1.6.1) in Cargo.toml 2024-02-01 10:33:26 +01:00
b968616a99 Merge #4364
4364: Revert "Remove panic on the geosearch" r=curquiza a=irevoire

After more thought about it, we want to fix this bug in a patch release instead of `main`.
I revert this PR for now, but the fix will still land on `main` once we bring back the change of the `v1.6.1` on `main`.

Reverts meilisearch/meilisearch#4337

Co-authored-by: Tamo <irevoire@protonmail.ch>
2024-01-25 18:01:08 +00:00
c1bf33a112 Revert "Remove panic on the geosearch" 2024-01-25 18:51:19 +01:00
ddc2b7129a fix readme broken links 2024-01-24 22:50:18 +01:00
b6fc181993 Merge #4304
4304: Add CUDA GPU support for Hugging Face embedders r=Kerollmops a=dureuill

Adds a "cuda" feature to `milli`.

Compiling with this feature requires that the CUDA support library be installed (see "with CUDA support" paragraph in https://huggingface.github.io/candle/guide/installation.html), and adds CUDA support to the `huggingFace` embedder.

To enable GPU support, users will need to:

1. Have a compatible NVidia GPU under Linux
2. Follow [the guide](https://huggingface.github.io/candle/guide/installation.html) to install the CUDA dependencies
3. Compile Meilisearch with the `cuda` feature: `cargo build --release --features cuda`

# Impact

Enabling the CUDA feature allows to use an available GPU to compute embeddings with a `huggingFace` embedder. 
On an AWS Graviton 2, this yields a x3 - x5 improvement on indexing time.

# Technical details

- I had to change the CI so that the cuda feature is not included in the `Tests all features` workflow
- To achieve that, I had to add a binary following the `cargo xtask` design pattern, to list all features excepted the cuda one.
- I then changed the workflow accordingly (renamed to "Tests almost all features" 😉)
- A test run of the new feature was done on a temporary version of this PR that had it enabled for PRs: [See the results here](https://github.com/meilisearch/meilisearch/actions/runs/7461331929/job/20301216732)

Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2024-01-22 13:55:04 +00:00
388fce9e46 Merge #4345
4345: Bump h2 from 0.3.20 to 0.3.24 r=curquiza a=dependabot[bot]

Bumps [h2](https://github.com/hyperium/h2) from 0.3.20 to 0.3.24.
<details>
<summary>Release notes</summary>
<p><em>Sourced from <a href="https://github.com/hyperium/h2/releases">h2's releases</a>.</em></p>
<blockquote>
<h2>v0.3.24</h2>
<h2>Fixed</h2>
<ul>
<li>Limit error resets for misbehaving connections.</li>
</ul>
<h2>v0.3.23</h2>
<h2>What's Changed</h2>
<ul>
<li>cherry-pick fix: streams awaiting capacity lockout in <a href="https://redirect.github.com/hyperium/h2/pull/734">hyperium/h2#734</a></li>
</ul>
<h2>v0.3.22</h2>
<h2>What's Changed</h2>
<ul>
<li>Add <code>header_table_size(usize)</code> option to client and server builders.</li>
<li>Improve throughput when vectored IO is not available.</li>
<li>Update indexmap to 2.</li>
</ul>
<h2>New Contributors</h2>
<ul>
<li><a href="https://github.com/tottoto"><code>`@​tottoto</code></a>` made their first contribution in <a href="https://redirect.github.com/hyperium/h2/pull/714">hyperium/h2#714</a></li>
<li><a href="https://github.com/xiaoyawei"><code>`@​xiaoyawei</code></a>` made their first contribution in <a href="https://redirect.github.com/hyperium/h2/pull/712">hyperium/h2#712</a></li>
<li><a href="https://github.com/Protryon"><code>`@​Protryon</code></a>` made their first contribution in <a href="https://redirect.github.com/hyperium/h2/pull/719">hyperium/h2#719</a></li>
<li><a href="https://github.com/4JX"><code>`@​4JX</code></a>` made their first contribution in <a href="https://redirect.github.com/hyperium/h2/pull/638">hyperium/h2#638</a></li>
<li><a href="https://github.com/vuittont60"><code>`@​vuittont60</code></a>` made their first contribution in <a href="https://redirect.github.com/hyperium/h2/pull/724">hyperium/h2#724</a></li>
</ul>
<h2>v0.3.21</h2>
<h2>What's Changed</h2>
<ul>
<li>Fix opening of new streams over peer's max concurrent limit.</li>
<li>Fix <code>RecvStream</code> to return data even if it has received a <code>CANCEL</code> stream error.</li>
<li>Update MSRV to 1.63.</li>
</ul>
<h2>New Contributors</h2>
<ul>
<li><a href="https://github.com/DDtKey"><code>`@​DDtKey</code></a>` made their first contribution in <a href="https://redirect.github.com/hyperium/h2/pull/703">hyperium/h2#703</a></li>
<li><a href="https://github.com/jwilm"><code>`@​jwilm</code></a>` made their first contribution in <a href="https://redirect.github.com/hyperium/h2/pull/707">hyperium/h2#707</a></li>
</ul>
</blockquote>
</details>
<details>
<summary>Changelog</summary>
<p><em>Sourced from <a href="https://github.com/hyperium/h2/blob/v0.3.24/CHANGELOG.md">h2's changelog</a>.</em></p>
<blockquote>
<h1>0.3.24 (January 17, 2024)</h1>
<ul>
<li>Limit error resets for misbehaving connections.</li>
</ul>
<h1>0.3.23 (January 10, 2024)</h1>
<ul>
<li>Backport fix from 0.4.1 for stream capacity assignment.</li>
</ul>
<h1>0.3.22 (November 15, 2023)</h1>
<ul>
<li>Add <code>header_table_size(usize)</code> option to client and server builders.</li>
<li>Improve throughput when vectored IO is not available.</li>
<li>Update indexmap to 2.</li>
</ul>
<h1>0.3.21 (August 21, 2023)</h1>
<ul>
<li>Fix opening of new streams over peer's max concurrent limit.</li>
<li>Fix <code>RecvStream</code> to return data even if it has received a <code>CANCEL</code> stream error.</li>
<li>Update MSRV to 1.63.</li>
</ul>
</blockquote>
</details>
<details>
<summary>Commits</summary>
<ul>
<li><a href="7243ab5854"><code>7243ab5</code></a> Prepare v0.3.24</li>
<li><a href="d919cd6fd8"><code>d919cd6</code></a> streams: limit error resets for misbehaving connections</li>
<li><a href="a7eb14a487"><code>a7eb14a</code></a> v0.3.23</li>
<li><a href="b668c7fbe2"><code>b668c7f</code></a> fix: streams awaiting capacity lockout (<a href="https://redirect.github.com/hyperium/h2/issues/730">#730</a>) (<a href="https://redirect.github.com/hyperium/h2/issues/734">#734</a>)</li>
<li><a href="0f412d8b9c"><code>0f412d8</code></a> v0.3.22</li>
<li><a href="c7ca62f69b"><code>c7ca62f</code></a> docs: fix typos (<a href="https://redirect.github.com/hyperium/h2/issues/724">#724</a>)</li>
<li><a href="ef743ecb22"><code>ef743ec</code></a> Add a setter for header_table_size (<a href="https://redirect.github.com/hyperium/h2/issues/638">#638</a>)</li>
<li><a href="56651e6e51"><code>56651e6</code></a> fix lint about unused import</li>
<li><a href="4aa7b16342"><code>4aa7b16</code></a> Fix documentation for max_send_buffer_size (<a href="https://redirect.github.com/hyperium/h2/issues/718">#718</a>)</li>
<li><a href="d03c54a80d"><code>d03c54a</code></a> chore(dependencies): update tracing minimal version to 0.1.35</li>
<li>Additional commits viewable in <a href="https://github.com/hyperium/h2/compare/v0.3.20...v0.3.24">compare view</a></li>
</ul>
</details>
<br />


[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=h2&package-manager=cargo&previous-version=0.3.20&new-version=0.3.24)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores)

Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting ``@dependabot` rebase`.

[//]: # (dependabot-automerge-start)
[//]: # (dependabot-automerge-end)

---

<details>
<summary>Dependabot commands and options</summary>
<br />

You can trigger Dependabot actions by commenting on this PR:
- ``@dependabot` rebase` will rebase this PR
- ``@dependabot` recreate` will recreate this PR, overwriting any edits that have been made to it
- ``@dependabot` merge` will merge this PR after your CI passes on it
- ``@dependabot` squash and merge` will squash and merge this PR after your CI passes on it
- ``@dependabot` cancel merge` will cancel a previously requested merge and block automerging
- ``@dependabot` reopen` will reopen this PR if it is closed
- ``@dependabot` close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually
- ``@dependabot` show <dependency name> ignore conditions` will show all of the ignore conditions of the specified dependency
- ``@dependabot` ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself)
- ``@dependabot` ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself)
- ``@dependabot` ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)
You can disable automated security fix PRs for this repo from the [Security Alerts page](https://github.com/meilisearch/meilisearch/network/alerts).

</details>

Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2024-01-22 11:53:51 +00:00
d35fe43fd5 Update lock file 2024-01-22 10:49:17 +01:00
f692021bfc Implement PR comments 2024-01-22 10:25:56 +01:00
1b90778bf5 Change CI 2024-01-22 10:25:56 +01:00
66ae81a909 Make it so binary can be used with cargo xtask 2024-01-22 10:25:56 +01:00
4aa4a15dc9 Add to Cargo.lock 2024-01-22 10:25:54 +01:00
4b4e8ea2a4 Add binary to list features 2024-01-22 10:25:16 +01:00
84f49d76cd Add cuda feature 2024-01-22 10:25:16 +01:00
afb0e8eab9 Merge #4325
4325: Add Setting API reminder in issue template r=ManyTheFish a=ManyTheFish

When adding a new setting, several important points can be easily forgotten.
This PR adds a small reminder list of some of these points in the issue template.


Co-authored-by: Many the fish <many@meilisearch.com>
2024-01-22 09:02:27 +00:00
b5b2333a05 Bump h2 from 0.3.20 to 0.3.24
Bumps [h2](https://github.com/hyperium/h2) from 0.3.20 to 0.3.24.
- [Release notes](https://github.com/hyperium/h2/releases)
- [Changelog](https://github.com/hyperium/h2/blob/v0.3.24/CHANGELOG.md)
- [Commits](https://github.com/hyperium/h2/compare/v0.3.20...v0.3.24)

---
updated-dependencies:
- dependency-name: h2
  dependency-type: indirect
...

Signed-off-by: dependabot[bot] <support@github.com>
2024-01-19 16:20:22 +00:00
40fa0b4df6 Update .github/ISSUE_TEMPLATE/sprint_issue.md 2024-01-18 11:17:29 +01:00
ab4d614599 Update .github/ISSUE_TEMPLATE/sprint_issue.md
Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2024-01-18 10:28:30 +01:00
262b20fdba Merge #4330
4330: Add job variable to grafana dashboard r=irevoire a=capJavert

# Pull Request

## Related issue
Fixes https://github.com/orgs/meilisearch/discussions/625#discussioncomment-8143282

## What does this PR do?

"meilisearch" as [job_name](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#job_name) was hardcoded in the dashboard config so if user sets anything but "meilisearch" as job_name on prometheus side the dashboard does not work.

With this change dasboard will auto load the values from data source (much like instance variable) and show the correct data. This now also adds support for multiple meilisearch jobs in single dashboard. 

See: https://github.com/orgs/meilisearch/discussions/625#discussioncomment-8143282

## PR checklist
Please check if your PR fulfills the following requirements:
- [x] Does this PR fix an existing issue, or have you listed the changes applied in the PR description (and why they are needed)?
- [x] Have you read the contributing guidelines?
- [x] Have you made sure that the title is accurate and descriptive of the changes?

Thank you so much for contributing to Meilisearch!


Co-authored-by: capJavert <ante@kickass.website>
2024-01-17 15:48:24 +00:00
9020606c45 Merge #4337
4337: Remove panic on the geosearch r=ManyTheFish a=irevoire

# Pull Request

## Related issue
Fixes  #4333

## What does this PR do?
- Add tests for the enrich pipeline on malformed documents with `null` value
- Reproduce the issue when updating the settings while there is malformed documents in the DB
- Fix the bug


Co-authored-by: Tamo <tamo@meilisearch.com>
2024-01-17 15:09:46 +00:00
0887186ecf make clippy happy 2024-01-17 16:07:10 +01:00
7d190d8078 add a bunch of tests and fix the error message when adding the geosearch as filterable/sortable while there is malformed documents in the DB 2024-01-17 15:51:52 +01:00
3b8a9597e2 Merge #4332
4332: Update the dependencies r=irevoire a=Kerollmops

This PR upgrades the dependencies and fixes #4287.

 - ~We keep arroy at the current commit. We will release and use the latest version published when possible~
 - We also updated arroy to 0.2.0.
 - I rolled back the version of rustls has too many breaking changes.
 - I had to keep HTTP to 0.2.11 due to actix-cors.

Co-authored-by: Clément Renault <clement@meilisearch.com>
2024-01-17 13:42:02 +00:00
f275554982 Make sure we override the default Rust version 2024-01-16 18:10:30 +01:00
d997ea1f01 Make Clippy happy 2024-01-16 17:10:48 +01:00
50e1d34c66 Rollback http to 0.2.11 2024-01-16 16:57:33 +01:00
406531c991 Fix sysinfo 2024-01-16 16:49:51 +01:00
01e2c3d6bb Bump arroy to v0.2.0 2024-01-16 16:45:55 +01:00
cfaa522d68 Bump the Rust version to 1.75.0 2024-01-16 16:36:54 +01:00
0c8d1644a6 Rollback rustls to 0.20.9 2024-01-16 15:55:16 +01:00
5e0268d40e Fix the sysinfo errors 2024-01-16 15:43:03 +01:00
9f9ad4cc05 Fix Clippy warnings 2024-01-16 15:27:24 +01:00
3ee7682fa7 Fix some integer comparisons 2024-01-16 15:22:23 +01:00
7f125bfb12 Update incompatible dependencies 2024-01-16 15:15:54 +01:00
5869ca7716 Upgrade all compatible dependencies 2024-01-16 15:05:03 +01:00
7a89abd2a0 Merge #4263
4263: Bump rustls-webpki from 0.101.3 to 0.101.7 r=irevoire a=dependabot[bot]

Bumps [rustls-webpki](https://github.com/rustls/webpki) from 0.101.3 to 0.101.7.
<details>
<summary>Release notes</summary>
<p><em>Sourced from <a href="https://github.com/rustls/webpki/releases">rustls-webpki's releases</a>.</em></p>
<blockquote>
<h2>0.101.7</h2>
<ul>
<li>Upgrades <code>*ring*</code> to 0.17, and <code>untrusted</code> to 0.9. Note: since <code>untrusted</code> appears in the <code>Error</code> API this may be a breaking change for applications using two <code>untrusted</code> versions.</li>
</ul>
<h2>What's Changed</h2>
<ul>
<li>Simplify tests for DER errors by <a href="https://github.com/djc"><code>`@​djc</code></a>` in <a href="https://redirect.github.com/rustls/webpki/pull/193">rustls/webpki#193</a></li>
<li>Upgrade to ring 0.17, untrusted 0.9 by <a href="https://github.com/djc"><code>`@​djc</code></a>` in <a href="https://redirect.github.com/rustls/webpki/pull/193">rustls/webpki#193</a></li>
<li>Bump MSRV to 1.61 by <a href="https://github.com/djc"><code>`@​djc</code></a>` in <a href="https://redirect.github.com/rustls/webpki/pull/193">rustls/webpki#193</a></li>
<li>Upgrade to rcgen 0.11.3 by <a href="https://github.com/cpu"><code>`@​cpu</code></a>` in <a href="https://redirect.github.com/rustls/webpki/pull/189">rustls/webpki#189</a>, <a href="https://redirect.github.com/rustls/webpki/pull/195">rustls/webpki#195</a></li>
<li>v0.101.7 preparation by <a href="https://github.com/cpu"><code>`@​cpu</code></a>` in <a href="https://redirect.github.com/rustls/webpki/pull/199">rustls/webpki#199</a></li>
</ul>
<p><strong>Full Changelog</strong>: <a href="https://github.com/rustls/webpki/compare/v/0.101.6...v/0.101.7">https://github.com/rustls/webpki/compare/v/0.101.6...v/0.101.7</a></p>
<h2>0.101.6</h2>
<ul>
<li>The <code>CertificateRevocationList</code> trait's <code>verify_signature</code> <code>Budget</code> argument was removed. This was a semver incompatible change mistakenly introduced in v0.101.5.</li>
</ul>
<h2>What's Changed</h2>
<ul>
<li>crl: rm Budget from verify_signature fn by <a href="https://github.com/cpu"><code>`@​cpu</code></a>` in <a href="https://redirect.github.com/rustls/webpki/pull/187">rustls/webpki#187</a></li>
</ul>
<p><strong>Full Changelog</strong>: <a href="https://github.com/rustls/webpki/compare/v/0.101.5...v/0.101.6">https://github.com/rustls/webpki/compare/v/0.101.5...v/0.101.6</a></p>
<h2>0.101.5</h2>
<ul>
<li>Path building complexity is now limited to a maximum budget of path finding operations, avoiding exponential processing time when encountering certificate chains containing many certificates with the same subject/issuer distinguished name but different subject public key information.</li>
<li>Name constraints evaluation is now limited to a maximum number of comparison operations, avoiding exponential processing time when encountering certificate chains containing many name constraints and subject alternate names.</li>
<li>Subject common names are no longer parsed for name iteration, or applying name constraints. Webpki only uses Subject Alternate Names when validating certificates, and the common name handling was buggy, producing <code>Error::BadDer</code> when iterating certificates with printable string subject common names, or omitted common names encoded as an empty sequence.</li>
</ul>
<h2>What's Changed</h2>
<p>The following PRs were backported to the rel-0.101 branch in <a href="https://redirect.github.com/rustls/webpki/issues/170">#170</a>:</p>
<ul>
<li>Further limits on expensive path building (<a href="https://redirect.github.com/rustls/webpki/issues/163">#163</a>)</li>
<li>Budget tweaks (<a href="https://redirect.github.com/rustls/webpki/issues/164">#164</a>)</li>
<li>Bound name constraint comparisons (<a href="https://redirect.github.com/rustls/webpki/issues/165">#165</a>)</li>
<li>Remove subject common name parsing (<a href="https://redirect.github.com/rustls/webpki/issues/169">#169</a>, thanks to <a href="https://github.com/hawkw"><code>`@​hawkw</code></a>)</li>`
<li>Correct handling of fatal errors (<a href="https://redirect.github.com/rustls/webpki/issues/168">#168</a>)</li>
</ul>
<p>Thanks to all who have contributed, on behalf of the rustls team (<a href="https://github.com/ctz"><code>`@​ctz</code></a>,` <a href="https://github.com/cpu"><code>`@​cpu</code></a>` and <a href="https://github.com/djc"><code>`@​djc</code></a>)!</p>`
<h2>0.101.4</h2>
<h2>Release notes</h2>
<ul>
<li>certificate path building and verification is now capped at 100 signature validation operations to avoid the risk of CPU usage denial-of-service attack when validating crafted certificate chains producing quadratic runtime. This risk affected both clients, as well as servers that verified client certificates.</li>
</ul>
<h2>What's Changed</h2>
<ul>
<li>v0.101.4 prep by <a href="https://github.com/cpu"><code>`@​cpu</code></a>` in <a href="https://redirect.github.com/rustls/webpki/pull/153">rustls/webpki#153</a></li>
</ul>
<p><strong>Full Changelog</strong>: <a href="https://github.com/rustls/webpki/compare/v/0.101.3...v/0.101.4">https://github.com/rustls/webpki/compare/v/0.101.3...v/0.101.4</a></p>
</blockquote>
</details>
<details>
<summary>Commits</summary>
<ul>
<li><a href="ee5aab1dff"><code>ee5aab1</code></a> Cargo: v0.101.6 -&gt; v0.101.7</li>
<li><a href="4f721a901f"><code>4f721a9</code></a> Upgrade to rcgen 0.11.3</li>
<li><a href="3be3625584"><code>3be3625</code></a> Bump MSRV to 1.61</li>
<li><a href="bb7c7f47ab"><code>bb7c7f4</code></a> Upgrade to ring 0.17, untrusted 0.9</li>
<li><a href="2eeb2920cf"><code>2eeb292</code></a> Simplify tests for DER errors</li>
<li><a href="7956538ee7"><code>7956538</code></a> Cargo: v0.101.5 -&gt; v0.101.6</li>
<li><a href="7f8208ec06"><code>7f8208e</code></a> crl: rm <code>Budget</code> from <code>verify_signature</code> fn</li>
<li><a href="7cb6c646a0"><code>7cb6c64</code></a> Cargo: bump version 0.101.4 -&gt; 0.101.5</li>
<li><a href="2dd2a06016"><code>2dd2a06</code></a> verify_cert: use enum for build chain error</li>
<li><a href="c255d61a6a"><code>c255d61</code></a> verify_cert: correct handling of fatal errors</li>
<li>Additional commits viewable in <a href="https://github.com/rustls/webpki/compare/v/0.101.3...v/0.101.7">compare view</a></li>
</ul>
</details>
<br />


[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=rustls-webpki&package-manager=cargo&previous-version=0.101.3&new-version=0.101.7)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores)

Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting ``@dependabot` rebase`.

[//]: # (dependabot-automerge-start)
[//]: # (dependabot-automerge-end)

---

<details>
<summary>Dependabot commands and options</summary>
<br />

You can trigger Dependabot actions by commenting on this PR:
- ``@dependabot` rebase` will rebase this PR
- ``@dependabot` recreate` will recreate this PR, overwriting any edits that have been made to it
- ``@dependabot` merge` will merge this PR after your CI passes on it
- ``@dependabot` squash and merge` will squash and merge this PR after your CI passes on it
- ``@dependabot` cancel merge` will cancel a previously requested merge and block automerging
- ``@dependabot` reopen` will reopen this PR if it is closed
- ``@dependabot` close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually
- ``@dependabot` show <dependency name> ignore conditions` will show all of the ignore conditions of the specified dependency
- ``@dependabot` ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself)
- ``@dependabot` ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself)
- ``@dependabot` ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)
You can disable automated security fix PRs for this repo from the [Security Alerts page](https://github.com/meilisearch/meilisearch/network/alerts).

</details>

Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2024-01-16 13:55:49 +00:00
d9d0419845 Update the dependencies 2024-01-16 14:38:48 +01:00
5dc8d9e9bf feat: add job variable to dashboard
meilisearch job_name was hardcoded in the dashboard config

so if user sets anything but meilisearch as job_name on

prometheus side the dashboard does not work

see: https://github.com/orgs/meilisearch/discussions/625#discussioncomment-8143282
2024-01-16 12:44:37 +01:00
9e12a91afb Update .github/ISSUE_TEMPLATE/sprint_issue.md 2024-01-16 11:04:50 +01:00
8e016fbfeb Merge #4319
4319: Update README r=curquiza a=codesmith-emmy

# Pull Request

## Related issue
Fixes #<issue_number>

## What does this PR do?
- ...

## PR checklist
Please check if your PR fulfills the following requirements:
- [ ] Does this PR fix an existing issue, or have you listed the changes applied in the PR description (and why they are needed)?
- [ ] Have you read the contributing guidelines?
- [ ] Have you made sure that the title is accurate and descriptive of the changes?

Thank you so much for contributing to Meilisearch!


Co-authored-by: emmanuel <154705254+codesmith-emmy@users.noreply.github.com>
2024-01-15 18:41:14 +00:00
1ccde9bf0b Merge #4316
4316: Autobatch the task deletions r=curquiza a=irevoire

# Pull Request

## Related issue
Fix part of https://github.com/meilisearch/meilisearch-support/issues/69
Fix #4315 

## What does this PR do?
- Autobatch the task deletions

Co-authored-by: Tamo <tamo@meilisearch.com>
2024-01-15 17:54:50 +00:00
34e814f400 Merge #4327
4327: Bring back changes from `release-v1.6.0` to `main` r=dureuill a=curquiza



Co-authored-by: Paul Sanders <psanders1@gmail.com>
Co-authored-by: meili-bors[bot] <89034592+meili-bors[bot]@users.noreply.github.com>
Co-authored-by: Louis Dureuil <louis.dureuil@xinra.net>
Co-authored-by: Tamo <tamo@meilisearch.com>
Co-authored-by: Clément Renault <clement@meilisearch.com>
Co-authored-by: Louis Dureuil <louis@meilisearch.com>
Co-authored-by: Morgane Dubus <30866152+mdubus@users.noreply.github.com>
2024-01-15 16:52:05 +00:00
857cd09285 Add Setting API reminder in issue template
When adding a new setting, there are several important points that can be easily forgotten.
This PR adds a small reminder list of some of these points.
2024-01-15 11:19:13 +01:00
a6fa0b97ec Merge #4318
4318: Hide embedders r=ManyTheFish a=dureuill

Hides `embedders` when it is an empty dictionary.

Manual tests:

- getting settings with empty embedders: not displayed
- getting settings with non-empty embedders: displayed like before
- dump with empty embedders: can be imported
- dump with non-empty embedders: can be imported

Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2024-01-15 09:37:31 +00:00
552127021f Update 2024-01-12 16:03:23 +01:00
38abfec611 Fix tests 2024-01-11 21:35:30 +01:00
84a5c304fc Don't display the embedders setting when it is an empty dict 2024-01-11 21:35:06 +01:00
e93d36d5b9 Merge #4313
4313: Fix document formatting performances r=Kerollmops a=ManyTheFish

reduce the formatted option list to the attributes that should be formatted,
instead of all the attributes to display.
The time to compute the `format` list scales with the number of fields to format;
cumulated with `map_leaf_values` that iterates over all the nested fields, it gives a quadratic complexity:
`d*f` where `d` is the total number of fields to display and `f` is the total number of fields to format.

Co-authored-by: ManyTheFish <many@meilisearch.com>
2024-01-11 14:19:44 +00:00
95f8e21533 fix typos 2024-01-11 15:07:08 +01:00
b4d7d80ad9 autobatch the task deletions 2024-01-11 14:58:07 +01:00
68f197624e Merge #4314
4314: Fix proximity precision telemetry r=Kerollmops a=ManyTheFish

The proximity precision telemetry was partially missing in the global setting route.
This PR adds the missing field and return the default value when the value is not set.


Co-authored-by: ManyTheFish <many@meilisearch.com>
2024-01-11 13:50:03 +00:00
b79b03d4e2 Fix proximity precision telemetry 2024-01-11 13:24:26 +01:00
86270e6878 Transform fields contained into _format into strings 2024-01-11 12:44:56 +01:00
81b6128b29 Update tests 2024-01-11 12:28:32 +01:00
5f5a486895 Reduce formatting time 2024-01-11 11:36:41 +01:00
5f4fc6c955 Add timer logs 2024-01-11 09:44:16 +01:00
1f5e8fc072 Merge #4311
4311: Limit the number of values returned by the facet search r=dureuill a=Kerollmops

This PR fixes a bug where the number of values per facet returned by the `indexes/{index}/facet-search` route was not tacking the `faceting.maxValuePerFacet` setting. It also adds a test.

Co-authored-by: Clément Renault <clement@meilisearch.com>
2024-01-10 16:04:06 +00:00
3f3462ab62 Limit the number of values returned by the facet search 2024-01-10 16:54:08 +01:00
93363b0201 Merge #4308
4308: Fix hang on `/indexes` and `/stats` routes r=Kerollmops a=dureuill

# Pull Request

## Related issue
Fixes #4218 

## Context

- A previous fix added a field to the `IndexScheduler` to memorize the `currently_updating_index`, so that accessing it through the search would return the handle without trying to open it. This resolved a hang on the search, but #4218 reported further hangs on the `/indexes` and `/stats` routes
- These routes were shunting the `IndexScheduler` and using internal `IndexMapper` logic to access the indexes, again trying to reopen the updating index.

## What does this PR do?

- Moves the logic relative to the `currently_updating_index` from the `IndexScheduler` to the `IndexMapper`, so that any index request to the `IndexMapper` can benefit from it.

## Test

1. Follow reproducer from #4218 
2. Before this PR, notice a hang on `/stats` and `/indexes`, but not on `/indexes/<updating_index>/search`
3. After this PR, notice no hang on either of `/stats`, `/indexes` or `/indexes/<updating_index>/search`



Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2024-01-10 10:46:20 +00:00
97bb1ff9e2 Move currently_updating_index to IndexMapper 2024-01-09 15:37:27 +01:00
5ee1378856 Merge #4303
4303: Display default value when proximityPrecision is not set r=dureuill a=ManyTheFish

# Pull Request

## Related
Issue: #4187
Spec change requests: https://github.com/meilisearch/specifications/pull/261#discussion_r1441725272

## What does this PR do?
- Display default value when proximityPrecision is not set instead of Null


Co-authored-by: ManyTheFish <many@meilisearch.com>
2024-01-08 14:29:57 +00:00
e27b850b09 move the default display strategy on setting getter function 2024-01-08 14:03:47 +01:00
f75f22e026 Display default value when proximityPrecision is not set 2024-01-08 11:09:37 +01:00
6203f4acef Merge #4296
4296: Fix single element search r=irevoire a=dureuill

# Pull Request

Before this PR, indexing a single vector in a single document would result in the vector not being found by the vector search.

This PR adds a test case for this condition, and resolves it by bumping arroy to a version containing the fix.

# Test case

Output of the test before and after this PR:

```diff
diff --git a/meilisearch/tests/search/hybrid.rs b/meilisearch/tests/search/hybrid.rs
index 2cd4b83e7..79819cab2 100644
--- a/meilisearch/tests/search/hybrid.rs on release-v1.6.0
+++ b/meilisearch/tests/search/hybrid.rs on fix-single-element-search
`@@` -171,5 +171,5 `@@` async fn single_document() {
     .await;

     snapshot!(code, `@"200` OK");
-    snapshot!(response["hits"][0], `@r###"{"title":"Shazam!","desc":"a` Captain Marvel ersatz","id":"1","_vectors":{"default":[1.0,3.0]},"_rankingScore":0.0}"###);
+    snapshot!(response["hits"][0], `@r###"{"title":"Shazam!","desc":"a` Captain Marvel ersatz","id":"1","_vectors":{"default":[1.0,3.0]},"_rankingScore":1.0,"_semanticScore":1.0}"###);
 }
```



Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2024-01-03 15:01:43 +00:00
12edc2c20a Update arroy to a fixed version 2024-01-03 15:59:37 +01:00
94b9f3b310 Add test 2024-01-03 15:56:20 +01:00
5204c0b60b Merge #4297
4297: Update license for 2024 r=curquiza a=meili-bot

_This PR is auto-generated._


Co-authored-by: meili-bot <74670311+meili-bot@users.noreply.github.com>
2024-01-03 13:54:19 +00:00
e73cd692db Update LICENSE 2024-01-03 14:32:41 +01:00
29b453346b Merge #4293
4293: Update SDK test dependencies r=curquiza a=curquiza

Replace dependabot updates

The changes are really un-impactful for the engine team velocity because is about a CI
- that does not run during release deployment
- that does not run to merge a PR

It's only a weekly scheduled CI to check the breaking we introduced in the integrations.

I updated the dependencies based on what we do on the integration CIs
For example for dart, I looked at what we have in the [Dart CI](63fd758882/.github/workflows/tests.yml (L16-L54)) and I updated our CI in this repo accordingly. I did the same for each repository. This ensures we test the same things.


Co-authored-by: curquiza <clementine@meilisearch.com>
2024-01-03 13:26:50 +00:00
c4bb435374 Merge #4295
4295: fix compilation warnings on main r=curquiza a=irevoire

# Pull Request

## Related issue
Fixes https://github.com/meilisearch/meilisearch/issues/4292

## What does this PR do?
- Removed unused imports

#4294 fixes the issue for the release v1.6

Co-authored-by: Tamo <tamo@meilisearch.com>
2024-01-02 15:33:06 +00:00
da99a04eb3 Merge #4294
4294: fix compilation warnings for release v1.6 r=curquiza a=irevoire

# Pull Request

## Related issue
Fixes #4292

## What does this PR do?
- Removed unused imports

#4295 fixes the issue no main

Co-authored-by: Tamo <tamo@meilisearch.com>
2024-01-02 15:00:40 +00:00
54ae6951eb fix warning 2024-01-02 15:19:30 +01:00
2bcff2ea46 fix warning 2024-01-02 15:19:00 +01:00
1275e72e0b Update SDK test dependencies 2024-01-02 09:59:46 +01:00
658ec6e0a4 Merge #4279
4279: Check experimental feature on setting update query rather than in the task. r=ManyTheFish a=dureuill

Improve the UX by checking for the vector store feature and returning an error synchronously when sending a setting update, rather than in the indexing task.

Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2023-12-22 11:36:12 +00:00
43e822e802 Merge #4238
4238: Task queue webhook r=dureuill a=irevoire

# Prototype `prototype-task-queue-webhook-1`

The prototype is available through Docker by using the following command:

```bash
docker run -p 7700:7700 -v $(pwd)/meili_data:/meili_data getmeili/meilisearch:prototype-task-queue-webhook-1
```

# Pull Request

Implements the task queue webhook.

## Related issue
Fixes https://github.com/meilisearch/meilisearch/issues/4236

## What does this PR do?
- Provide a new cli and env var for the webhook, respectively called `--task-webhook-url` and `MEILI_TASK_WEBHOOK_URL`
- Also supports sending the requests with a custom `Authorization` header by specifying the optional `--task-webhook-authorization-header` CLI parameter or `MEILI_TASK_WEBHOOK_AUTHORIZATION_HEADER` env variable.
- Throw an error if the specified URL is invalid
- Every time a batch is processed, send all the finished tasks into the webhook with our public `TaskView` type as a JSON Line GZIPed body.
- Add one test.

## PR checklist

### Before becoming ready to review
- [x] Add a test
- [x] Compress the data we send
- [x] Chunk and stream the data we send
- [x] Remove the unwrap in the index-scheduler when sending the data fails
- [x] The analytics are missing

### Before merging
- [x] Release a prototype



Co-authored-by: Tamo <tamo@meilisearch.com>
Co-authored-by: Clément Renault <clement@meilisearch.com>
2023-12-21 14:43:46 +00:00
ee54d3171e Check experimental feature at query time 2023-12-21 15:26:12 +01:00
a0e713c4e7 Merge #4277
4277: Update mini-dashboard to v0.2.12 r=curquiza a=mdubus

# Pull Request

## Related issue
Fixes #4276

## What does this PR do?
Upgrade mini-dashboard to version 0.2.12 ([see changes](https://github.com/meilisearch/mini-dashboard/releases/tag/v0.2.12))

## PR checklist
Please check if your PR fulfills the following requirements:
- [x] Does this PR fix an existing issue, or have you listed the changes applied in the PR description (and why they are needed)?
- [x] Have you read the contributing guidelines?
- [x] Have you made sure that the title is accurate and descriptive of the changes?

Thank you so much for contributing to Meilisearch!


Co-authored-by: Morgane Dubus <30866152+mdubus@users.noreply.github.com>
2023-12-21 11:03:46 +00:00
d4cb0a885b Merge #4275
4275: Flatten settings r=dureuill a=dureuill

# Pull Request

## Related issue
Initial internal feedback seems to indicate that the current shape of the `embedders` setting is undesirable: it has too much depth.

This PR changes this by flattening the structure of the embedders to the following:

```json5
// NEW structure
"embedders": {
  // still starts with the embedder name
  "default": {
    "source": "huggingFace", // now a string
    // properties of the source are all at the same level as the source
    "model": "sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2",
    "revision": "a9c555277f9bcf24f28fa5e092e665fc6f7c49cd",
    "documentTemplate": "A product titled '{{doc.title}}'" // now a string
  }
}
```

By comparison, the old structure was:

```json5
// PREVIOUS version, no longer working with this PR
"embedders": {
  // still starts with the embedder name
  "default": {
    "source": {
      "huggingFace": {
        "model": "sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2",
        "revision": "a9c555277f9bcf24f28fa5e092e665fc6f7c49cd"
      },
    "documentTemplate": { 
      "template": "A product titled '{{doc.title}}'" // now a string
    }
  }
}
```

The fields that are accepted in the new version of the `embedders` setting are depending on the value of the `source` field:

```json5
// huggingFace
"embedders": {
   "default": {
    "source": "huggingFace",
    "model": "sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2",
    "revision": "a9c555277f9bcf24f28fa5e092e665fc6f7c49cd",
    "documentTemplate": "A product titled '{{doc.title}}'"
  }
}

// openAi
"embedders": {
   "default": {
    "source": "openAi",
    "model": "text-embedding-ada-002",
    "apiKey": "open_ai_api_key",
    "documentTemplate": "A product titled '{{doc.title}}'"
  }
}

// userProvided
"embedders": {
   "default": {
    "source": "userProvided",
    "dimensions": 42, // mandatory
  }
}
```

## What does this PR do?
- Flatten the settings structure
- Validate the prompt earlier to return a synchronous error on setting change rather than in the failing task
- Make it an error to pass a field for the wrong source (see above for allowed fields for each source)
- Not changed: It is still an error not to pass `dimensions` to the `userProvided` embedder
- If `source` was specified in the settings, validate the setting early to return a synchronous error in case of a missing mandatory field for the userProvided source (dimensions) or a forbidden field for the specified source.
- If `source` was not specified in the settings, still validate the setting, but only at indexing time, by using the source stored in the DB.
- Resets all values if the source changes, even if the user did not reset them explicitly.

## PR checklist
Please check if your PR fulfills the following requirements:
- [ ] Change the public facing guide for using the API
- [ ] Change examples of use in the changelog


Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2023-12-21 09:58:01 +00:00
f52dee2b3b Update Cargo.toml
Update mini-dashboard with v0.2.12
2023-12-21 09:53:13 +01:00
0bf879fb88 Fix warning on rust stable 2023-12-20 17:48:09 +01:00
6ff81de401 Fix tests 2023-12-20 17:16:46 +01:00
2e4c9651df Validate settings in route 2023-12-20 17:16:46 +01:00
ec9649c922 Add function to validate settings in Meilisearch, to be used in the routes 2023-12-20 17:16:46 +01:00
9123370e90 Validate fused settings in settings task after fusing with existing setting 2023-12-20 17:16:46 +01:00
14b396d302 Add new errors 2023-12-20 17:16:45 +01:00
393216bf30 Flatten embedders settings 2023-12-20 17:16:43 +01:00
e249e4db7b Change Setting::apply function signature 2023-12-20 17:15:24 +01:00
de2ca7006e Merge #4272
4272: Don't pass default revision when the model is explicitly set in config r=Kerollmops a=dureuill

# Pull Request

## Related issue
Fixes #4271 

## What does this PR do?

- When the `model` is explicitly set in the `embedders` setting, we reset the `revision` to `None`, such that if the user doesn't specify a revision, the head of the model repository is chosen. 
- Not changed: If the user specifies a revision, it applies, like previously. 
- Not changed: If the user doesn't specify a model, the default model with the default revision applies, like previously.

## Manual testing on a fresh DB

1. Enable experimental feature:
```sh
curl \
  -X PATCH 'http://localhost:7700/experimental-features/' \
  -H 'Content-Type: application/json' -H 'Authorization: Bearer foo' \
--data-binary '{ "vectorStore": true
  }'
```
2. Send settings with a specified model but no specified revision:
```sh
curl \
-X PATCH 'http://localhost:7700/indexes/products/settings' \
-H 'Content-Type: application/json' --data-binary \
'{ "embedders": { "default": { "source": { "huggingFace": { "model": "sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2" } }, "documentTemplate": { "template": "A product titled '{{doc.title}}'"} } } }'
```
3. Check that the task was successful:
```sh
curl 'http://localhost:7700/tasks/0'

{"uid":0,"indexUid":"products","status":"succeeded","type":"settingsUpdate","canceledBy":null,"details":{"embedders":{"default":{"source":{"huggingFace":{"model":"sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2"}},"documentTemplate":{"template":"A product titled {{doc.title}}"}}}},"error":null,"duration":"PT0.001892S","enqueuedAt":"2023-12-20T09:17:01.73789Z","startedAt":"2023-12-20T09:17:01.73854Z","finishedAt":"2023-12-20T09:17:01.740432Z"}
```
4. Send documents to index:
```sh
curl 'https://localhost:7700/indexes/products/documents' -H 'Content-Type: application/json' --data-binary '{"id": 0, "title": "Best product"}'
```

Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2023-12-20 14:27:51 +00:00
333ce12eb2 Fixed issue where the default revision is always the one we picked for the default model 2023-12-20 10:17:49 +01:00
fb9db1eba6 Merge #4269
4269: Remove dependency that requires libstdc++ r=dureuill a=dureuill

Removes the dependency that caused the additional runtime dependency on libstdc++ by disabling the default features of the hf tokenizer.

## Discussion

- This removes a feature that is using a C++ dependency and is supposed to accelerate the tokenizer. As the tokenizer is likely to be a significant bottleneck for embedding texts using a HF model, this is an issue.
- We should at least rerun the movies vector indexing and check that it still works correctly and that it has a runtime in the ballpark of what it used to be.

Co-authored-by: Louis Dureuil <louis.dureuil@xinra.net>
2023-12-19 12:26:48 +00:00
fa2b96b9a5 Add an Authorization Header along with the webhook calls 2023-12-19 12:18:45 +01:00
19736cefe8 add the analytics 2023-12-19 10:36:04 +01:00
4fb25b8782 fix clippy 2023-12-19 10:35:51 +01:00
c83a33017e stream and chunk the data 2023-12-19 10:35:51 +01:00
be72326c0a gzip the tasks 2023-12-19 10:35:51 +01:00
547379abb0 parse the url correctly 2023-12-19 10:35:51 +01:00
0b2fff27f2 update and fix the test 2023-12-19 10:35:51 +01:00
3adbc2b942 return a task view instead of a task 2023-12-19 10:35:51 +01:00
fbea721378 add a first working test with actixweb 2023-12-19 10:35:51 +01:00
391eb72137 start writing a test with actix but it doesn't works 2023-12-19 10:35:50 +01:00
d78ad51082 Implement the webhook 2023-12-19 10:35:50 +01:00
1956045a06 add the option 2023-12-19 10:23:56 +01:00
b2193e612f Revert "Add libstdc++ in Dockerfile" as it is no longer needed
This reverts commit 9df8cfc013.
2023-12-18 22:17:29 +01:00
942d49314c Remove dependency that requires libstdc++ 2023-12-18 22:17:18 +01:00
9a846e82bc Merge #4268
4268: Add libstdc++ in Dockerfile r=curquiza a=sanders41

# Pull Request

## Related issue
Fixes #4267

## What does this PR do?
- Add libstdc++ in the Dockerfile

## PR checklist
Please check if your PR fulfills the following requirements:
- [x] Does this PR fix an existing issue, or have you listed the changes applied in the PR description (and why they are needed)?
- [x] Have you read the contributing guidelines?
- [x] Have you made sure that the title is accurate and descriptive of the changes?

Thank you so much for contributing to Meilisearch!


Co-authored-by: Paul Sanders <psanders1@gmail.com>
2023-12-18 18:35:53 +00:00
9df8cfc013 Add libstdc++ in Dockerfile 2023-12-18 13:05:46 -05:00
d868131bb7 Bump rustls-webpki from 0.101.3 to 0.101.7
Bumps [rustls-webpki](https://github.com/rustls/webpki) from 0.101.3 to 0.101.7.
- [Release notes](https://github.com/rustls/webpki/releases)
- [Commits](https://github.com/rustls/webpki/compare/v/0.101.3...v/0.101.7)

---
updated-dependencies:
- dependency-name: rustls-webpki
  dependency-type: indirect
...

Signed-off-by: dependabot[bot] <support@github.com>
2023-12-18 14:57:38 +00:00
248aaa6d45 Merge #4262
4262: Update version for the next release (v1.6.0) in Cargo.toml r=curquiza a=meili-bot

⚠️ This PR is automatically generated. Check the new version is the expected one and Cargo.lock has been updated before merging.

Co-authored-by: curquiza <curquiza@users.noreply.github.com>
2023-12-18 14:00:19 +00:00
50d6317ec0 Update version for the next release (v1.6.0) in Cargo.toml 2023-12-18 13:57:46 +00:00
b734bd9891 Merge #4261
4261: Set rust toolchain to 1.71.1 in dockerfile r=curquiza a=dureuill

Fixes docker [CI](https://github.com/meilisearch/meilisearch/actions/workflows/publish-docker-images.yml)

Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2023-12-18 12:32:26 +00:00
9800d5a103 Set rust toolchain to 1.71.1 in dockerfile 2023-12-18 10:59:25 +01:00
7c4ed07617 Merge #4257
4257: Change proximity precision settings r=dureuill a=ManyTheFish

- [x] Add proximity_precision value into the analytics
- [x] Change the naming of `attributeScale` and `wordScale` into `byAttribute` and `byWord`
- [x] Remove proximityPrecision from the experimental feature

Co-authored-by: ManyTheFish <many@meilisearch.com>
Co-authored-by: Many the fish <many@meilisearch.com>
2023-12-18 09:07:28 +00:00
3a99a555a2 Fix experimental features snapshots in tests 2023-12-18 10:05:51 +01:00
9e1b458010 Merge branch 'main' into change-proximity-precision-settings 2023-12-18 09:08:47 +01:00
2aede03bc2 Merge #4226
4226: Hybrid search r=dureuill a=dureuill

Allows to perform hybrid search requests that combine the results of semantic and keyword search and automatically generate embeddings.

## How to use

See [feature description](https://meilisearch.notion.site/v1-6-Hybrid-Search-Embedders-ea42c82f90cc4bc0be1eeb917c1118c8)

## Changes

- work is based on #4213 
- milli::new search now takes an input universe directly, rather than computing it from a filter. This adds flexibility to require results on a subset of documents
- vector search is now a regular ranking rule (akin to sort and geosort) and reports its score as a ScoreDetail
- separate keyword search and vector search functions, vector search now respects (geo)sort ranking rules
- add automatic embedding
- add hybrid search

Co-authored-by: Louis Dureuil <louis@meilisearch.com>
Co-authored-by: ManyTheFish <many@meilisearch.com>
2023-12-14 16:24:56 +00:00
e741bc1c62 Add proximity_precision value into the analytics 2023-12-14 16:48:06 +01:00
6425996e36 Change the naming of attributeScale and wordScale into byAttribute and byWord 2023-12-14 16:31:00 +01:00
eb5cb91da2 Switch default from hf to openai 2023-12-14 16:19:46 +01:00
87bba98bd8 Various changes
- fixed seed for arroy
- check vector dimensions as soon as it is provided to search
- don't embed whitespace
2023-12-14 16:08:42 +01:00
217105b7da hybrid search uses semantic ratio, error handling 2023-12-14 16:08:42 +01:00
1b7c164a55 Pass the semantic ratio to milli 2023-12-14 16:08:42 +01:00
f3f3944469 Fix error checking 2023-12-14 16:08:42 +01:00
93dcbf598d Deserialize semantic ratio 2023-12-14 16:08:42 +01:00
ac68f33194 Add simple test 2023-12-14 16:08:42 +01:00
9991152bbe Add TODOs 2023-12-14 16:08:42 +01:00
a4536b1381 Small adjustments to respect the spec 2023-12-14 16:08:42 +01:00
5b51cb04af Remove some settings 2023-12-14 16:08:42 +01:00
3c1a14f1cd Add settings routes 2023-12-14 16:08:42 +01:00
b8e4709dfa Remove prompt strategy and fallback 2023-12-14 16:08:41 +01:00
806e5b6899 Tests pass 2023-12-14 16:08:41 +01:00
61bd2fb7a9 Update arroy 2023-12-14 16:08:41 +01:00
e0cc775dc4 Various changes
- DistributionShift in Search object (to be set from model in embed?)
- Fix issue where embedder index wasn't computed at search time
- Accept as default embedder either the "default" one, or the only embedder when there is only one
2023-12-14 16:08:41 +01:00
12940d79a9 WIP
- manual embedder
- multi embedders OK
- clippy + tests OK
2023-12-14 16:08:41 +01:00
922a640188 WIP multi embedders
fixed template bugs
2023-12-14 16:08:41 +01:00
abbe131084 Cosmetic change 2023-12-14 16:08:41 +01:00
d4715e0c4d Fix same vector sort bug 2023-12-14 16:08:41 +01:00
11e2a2c1aa Fix geosort bug 2023-12-14 16:08:41 +01:00
65e49b7092 Remove stuff, add distribution shift (WIP) 2023-12-14 16:08:38 +01:00
e56f160032 Actually pass embedders on reindex 2023-12-14 16:07:49 +01:00
687d92f217 prompt bifluor+ 2023-12-14 16:07:49 +01:00
fb539f61fe WIP 2023-12-14 16:07:49 +01:00
cb4ebe163e WIP 2023-12-14 16:07:49 +01:00
dde3a04679 WIP arroy integration 2023-12-14 16:07:49 +01:00
13c2c6c16b Small commit to add hybrid search and autoembedding 2023-12-14 16:07:48 +01:00
21bcf32109 Add candle and hg_hub, updating a lot of deps in the process 2023-12-14 16:07:48 +01:00
35e1981488 Remove proximityPrecision form the experimental feature 2023-12-14 15:52:42 +01:00
e0f712b9d3 Merge #4254
4254: Bring back v1.5.1 changes into main r=ManyTheFish a=Kerollmops

This pull request brings back changes from the _release-v1.5.1_ branch into _main_.

Co-authored-by: ManyTheFish <many@meilisearch.com>
Co-authored-by: meili-bors[bot] <89034592+meili-bors[bot]@users.noreply.github.com>
Co-authored-by: curquiza <curquiza@users.noreply.github.com>
Co-authored-by: Clément Renault <clement@meilisearch.com>
2023-12-14 09:41:57 +00:00
56571f762a Merge remote-tracking branch 'origin/main' into tmp-release-v1.5.1 2023-12-13 11:57:01 +01:00
005800634d Merge pull request #4249 from meilisearch/flag-limit-batch-size
Introduce parameters to limit the number of batched tasks
2023-12-13 10:32:14 +01:00
976af4fa8f Add the default commented experimental batched tasks limit parameter to the config file 2023-12-12 10:59:00 +01:00
99fec27788 Make the --max-number-of-batched-tasks argument experimental 2023-12-12 10:55:39 +01:00
afa8f273a8 Merge #4250
4250: Update version for the next release (v1.5.1) in Cargo.toml r=dureuill a=meili-bot

⚠️ This PR is automatically generated. Check the new version is the expected one and Cargo.lock has been updated before merging.

Co-authored-by: curquiza <curquiza@users.noreply.github.com>
2023-12-12 08:26:06 +00:00
4b644f6bc0 Update version for the next release (v1.5.1) in Cargo.toml 2023-12-11 17:15:11 +00:00
7e259cb0d2 Expose the --max-number-of-batched-tasks argument 2023-12-11 16:08:39 +01:00
0fbc1511d7 Merge #4225
4225: [EXP] Let the user customize the proximity precision r=dureuill a=ManyTheFish

# Pull Request
This PR introduces a new setting `proximityPrecision` allowing the user to trade indexing time with search precision on proximity-based features:
- proximity ranking rules
- multi-word synonyms
- phrase search
- split-words

I put the API PRD below:
https://www.notion.so/meilisearch/3988b345b5b248948a4a0dc5932a18ce?v=45d79150adb84b0aa27826ff6da2e029&p=aa69c2bab2c3402bab9340ae4def4577&pm=s

## Related issue
Fixes #4187

Co-authored-by: ManyTheFish <many@meilisearch.com>
2023-12-06 17:21:43 +00:00
c9860c7913 Small test fixes 2023-12-06 15:49:05 +01:00
03ffabe889 Add a new dump test 2023-12-06 15:49:05 +01:00
1f4fc9c229 Make the feature experimental 2023-12-06 15:49:05 +01:00
8cc3c54117 Add proximityPrecision setting in settings route 2023-12-06 15:49:05 +01:00
467b49153d Implement proximityPrecision setting on milli side 2023-12-06 15:49:02 +01:00
0c3fa8cbc4 Add tests on proximityPrecision setting 2023-12-06 14:59:23 +01:00
bddc168d83 List TODOs 2023-12-06 14:59:23 +01:00
84a36002d7 Merge #4239
4239: Remove the actix-web dependency from milli r=dureuill a=Kerollmops

Just remove actix-web from milli.

Co-authored-by: Clément Renault <clement@meilisearch.com>
2023-11-29 10:19:40 +00:00
c95d68e244 Merge #4233
4233: Add test reproducing #4232 r=dureuill a=ManyTheFish

- add a test reproducing the bug
- fix the bug by creating 2 different restricting lists of attributes, one for the exact attributes, and the other for the tolerant attributes

## Related issue
Fixes #4232


Co-authored-by: ManyTheFish <many@meilisearch.com>
2023-11-29 08:47:17 +00:00
3b3fa38f27 Put the restrict list in a sub-struct 2023-11-28 18:37:57 +01:00
170e063b80 Remove the actix-web dependency from milli 2023-11-28 17:19:57 +01:00
d6c2ee15a9 Filter on attributes before computing the docids when attribute restriction is on 2023-11-28 14:55:29 +01:00
6376c342c1 Merge #4223
4223: Update to heed 0.20 r=dureuill a=Kerollmops

This PR brings the v0.20-alpha.9 version of heed into Meilisearch 🎉 The main goal is to test it in a real environment to make the necessary changes if needed. We also want to merge it as soon as possible during the pre-release phase to ensure we catch bugs before the release.

Most of the calls to heed are the same as before, except:
 - The `PolyDatabase` has been replaced with a `Database<Unspecified, Unspecified>`. We replaced the `get<T, U>()` by a `remap<T, U>().get()` calls.
 - The `Database` `append(...)` method has been replaced with a `put_with_flags(PutFlags::APPEND, ...)`.
 - The `RwTxn<'e, 'p>` has been simplified into a `RwTxn<'e>`.
 - The `BytesEncode/Decode` traits return a `Result<_, BoxedError>` instead of an `Option<_>`.
 - We no longer need to wrap and unwrap the `BEU32` integer when storing/getting them from heed.

### TODO
 - [x] Create actual, simple error types instead of using strings in the codecs.

### Follow-up work
 - Move the codecs into another member crate (we depend on the uuid one in the meilitool crate).
 - Display the internal decoding error in the `SerializationError` internal error variant.

Co-authored-by: Clément Renault <clement@meilisearch.com>
2023-11-28 13:39:44 +00:00
5b563f872b Move the clippy attribute on the problematic part of the code 2023-11-28 14:37:58 +01:00
ec9b52d608 Rename copy_to_path to copy_to_file 2023-11-28 14:32:30 +01:00
34c67ac389 Remove the possibility to fail fetching the env info 2023-11-28 14:31:23 +01:00
d050c9b4ae Only remap the main database once 2023-11-28 14:27:30 +01:00
7dd1226faf Clarify an unreachable unwrap 2023-11-28 14:26:31 +01:00
1575456594 Further reduce an async block 2023-11-28 14:23:32 +01:00
add2ceef67 Introduce error types to avoid panics 2023-11-28 14:21:49 +01:00
548c8247c2 Create and use real error types in the codecs 2023-11-28 10:11:17 +01:00
181ca48482 Merge #4234
4234: Fix puffin in the index scheduler r=dureuill a=irevoire

Currently, we can't compile the index scheduler without this feature.

It could be cool to specify the dependencies in the main workspace cargo toml like quickwit does to avoid this kind of error in the future; https://github.com/quickwit-oss/quickwit/blob/main/quickwit/Cargo.toml#L41

Co-authored-by: Tamo <tamo@meilisearch.com>
2023-11-28 08:23:48 +00:00
5751f5c640 fix puffin in the index scheduler 2023-11-27 15:18:33 +01:00
d32eb11329 Move to the v0.20.0-alpha.9 of heed 2023-11-27 11:52:22 +01:00
dc07790133 Add test reproducing #4232 2023-11-27 11:39:11 +01:00
3d23b388bc Merge #4231
4231: Fixed payload limit setting being ignored for delete documents by batch r=Kerollmops a=Karribalu


# Pull Request

## Related issue
Fixes #4224

## What does this PR do?
- Added http_payload_size_limit to JsonConfig to allow deleting documents in batches with a payload size greater than 2MB, which is the default limit set in the JsonConfig crate.

## PR checklist
Please check if your PR fulfills the following requirements:
- [Y] Does this PR fix an existing issue, or have you listed the changes applied in the PR description (and why they are needed)?
- [Y] Have you read the contributing guidelines?
- [Y] Have you made sure that the title is accurate and descriptive of the changes?

Thank you so much for contributing to Meilisearch!


Co-authored-by: karribalu <karri.balu123456@gmail.com>
2023-11-27 09:26:21 +00:00
85626cff8e Fixed payload limit setting being ignored for delete documents by batch route 2023-11-25 18:41:16 +00:00
58dac8af42 Remove the panics and unwraps 2023-11-23 15:00:48 +01:00
0dbf1a16ff Make clippy happy 2023-11-23 14:11:38 +01:00
462b4c0080 Fix the tests 2023-11-23 12:07:35 +01:00
0d4482625a Make the changes to use heed v0.20-alpha.6 2023-11-23 11:43:58 +01:00
56a0d91ecd Update the heed dependency and lock file 2023-11-22 15:11:09 +01:00
b366acdae6 Merge #4220
4220: Bring back changes from v1.5.0 into main r=dureuill a=Kerollmops

This will bring the fixes from v1.5.0 into main. By [following this guide](https://github.com/meilisearch/engine-team/blob/main/resources/meilisearch-release.md#after-the-release) I decided to create a temporary branch to fix the git conflicts and merge into main afterward.

Co-authored-by: curquiza <curquiza@users.noreply.github.com>
Co-authored-by: Vivek Kumar <vivek.26@outlook.com>
Co-authored-by: Louis Dureuil <louis.dureuil@gmail.com>
Co-authored-by: meili-bors[bot] <89034592+meili-bors[bot]@users.noreply.github.com>
Co-authored-by: ManyTheFish <many@meilisearch.com>
Co-authored-by: Tamo <tamo@meilisearch.com>
Co-authored-by: Clément Renault <clement@meilisearch.com>
Co-authored-by: Louis Dureuil <louis.dureuil@xinra.net>
Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2023-11-22 07:46:22 +00:00
7cb7e37ba8 Merge branch 'main' into tmp-release-v1.5.0 2023-11-21 16:30:46 +01:00
33b7c574ea Merge #4090
4090: Diff indexing r=ManyTheFish a=ManyTheFish

This pull request aims to reduce the indexing time by computing a difference between the data added to the index and the data removed from the index before writing in LMDB.

## Why focus on reducing the writings in LMDB?

The indexing in Meilisearch is split into 3 main phases:
1) The computing or the extraction of the data (Multi-threaded)
2) The writing of the data in LMDB (Mono-threaded)
3) The processing of the prefix databases (Mono-threaded)

see below:
![Capture d’écran 2023-09-28 à 20 01 45](https://github.com/meilisearch/meilisearch/assets/6482087/51513162-7c39-4244-978b-2c6b60c43a56)


Because the writing is mono-threaded, it represents a bottleneck in the indexing, reducing the number of writes in LMDB will reduce the pressure on the main thread and should reduce the global time spent on the indexing.

## Give Feedback

We created [a dedicated discussion](https://github.com/meilisearch/meilisearch/discussions/4196) for users to try this new feature and to give feedback on bugs or performance issues.

## Technical approach
### Part 1: merge the addition and the deletion process
This part:
a) Aims to reduce the time spent on indexing only the filterable/sortable fields of documents, for example:
  - Updating the number of "likes" or "stars" of a song or a movie
  - Updating the "stock count" or the "price" of a product

b) Aims to reduce the time spent on writing in LMDB which should reduce the global indexing time for the highly multi-threaded machines by reducing the writing bottleneck.

c) Aims to reduce the average time spent to delete documents without having to keep the soft-deleted documents implementation

- [x] Create a preprocessing function that creates the diff-based documents chuck (`OBKV<fid, OBKV<AddDel, value>>`)
  - [x] and clearly separate the faceted fields and the searchable fields in two different chunks
- Change the parameters of the input extractor by taking an `OBKV<fid, OBKV<AddDel, value>>` instead of  `OBKV<fid, value>`.
  - [x] extract_docid_word_positions
  - [x] extract_geo_points
  - [x] extract_vector_points
  - [x] extract_fid_docid_facet_values
- Adapt the searchable extractors to the new diff-chucks
  - [x] extract_fid_word_count_docids
  - [x] extract_word_pair_proximity_docids
  - [x] extract_word_position_docids
  - [x] extract_word_docids
- Adapt the facet extractors to the new diff-chucks
  - [x] extract_facet_number_docids
  - [x] extract_facet_string_docids
  - [x] extract_fid_docid_facet_values
  - [x] FacetsUpdate
- [x] Adapt the prefix database extractors ⚠️ ⚠️ 
- [x] Make the LMDB writer remove the document_ids to delete at the same time the new document_ids are added
- [x] Remove document deletion pipeline
  - [x] remove `new_documents_ids` entirely and `replaced_documents_ids`
  - [x] reuse extracted external id from transform instead of re-extracting in `TypedChunks::Documents`
  - [x] Remove deletion pipeline after autobatcher
  - [x] remove autobatcher deletion pipeline
    - [x] everything uses `IndexOperation::DocumentOperation`
    - [x] repair deletion by internal id for filter by delete
    - [x] Improve the deletion via internal ids by avoiding iterating over the whole set of external document ids.  
- [x] Remove soft-deleted documents

#### FIXME

- [x] field distribution is not correctly updated after deletion
- [x] missing documents in the tests of tokenizer_customization

### Part 2: Only compute the documents field by field
This part aims to reduce the global indexing time for any kind of partial document modification on any size of machine from the mono-threaded one to the highly multi-threaded one.

- [ ] Make the preprocessing function only send the fields that changed to the extractors
- [ ] remove the `word_docids` and `exact_word_docids` database and adapt the search (⚠️ could impact the search performances)
- [ ] replace the `word_pair_proximity_docids` database with a `word_pair_proximity_fid_docids` database and adapt the search (⚠️ could impact the search performances)
- [ ] Adapt the prefix database extractors ⚠️ ⚠️

## Technical Concerns
- The part 1 implementation could increase the indexing time for the smallest machines (with few threads) by increasing the extracting time (multi-threaded) more than the writing time (mono-threaded)
- The part 2 implementation needs to change the databases which could have a significant impact on the search performances
- The prefix databases are a bit special to process and may be a pain to adapt to the difference-based indexing

Co-authored-by: ManyTheFish <many@meilisearch.com>
Co-authored-by: Clément Renault <clement@meilisearch.com>
Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2023-11-21 09:44:38 +00:00
d3575fb028 Make into_del_add_obkv parameters more human readable 2023-11-20 16:10:39 +01:00
39cbb499c2 Small fixes 2023-11-20 10:20:39 +01:00
ebef6bc24d Simplify documents database writing 2023-11-20 10:14:57 +01:00
d59b7db8d0 remove unused code 2023-11-20 10:10:45 +01:00
263e825619 Fix typos in comments 2023-11-20 10:06:29 +01:00
69354a6144 Add the benchmarck name to the bot message 2023-11-15 13:56:54 +01:00
b0adc73ce6 Merge pull request #4207 from meilisearch/diff-indexing-prefix-databases
Diff indexing prefix databases
2023-11-14 16:04:05 +01:00
2b5d9042d1 Merge #4208
4208: Makes the dump cancellable r=Kerollmops a=irevoire

# Pull Request

Make the dump tasks cancellable even when they have already started processing.

## Related issue
Fixes https://github.com/meilisearch/meilisearch/issues/4157


Co-authored-by: Tamo <tamo@meilisearch.com>
2023-11-14 13:31:45 +00:00
5b57fbab08 makes the dump cancellable 2023-11-14 11:23:13 +01:00
72d3fa4898 Merge #4203
4203: Extract external document docids from docs on deletion by filter r=Kerollmops a=dureuill

This fixes some of the performance regression observed on `diff-indexing` when doing delete-by-filter with a filter matching many documents.

To delete 19 768 771 documents (hackernews dataset, all documents matching `type = comment`), here are the observed time:

|branch (commit sha1sum)|time|speed-down factor (lower is better)|
|--|--|--|
|`main` (48865470d7)|1212.885536s (~20min)|x1.0 (baseline)|
|`diff-indexing` (523519fdbf)|5385.550543s (90min)|x4.44|
|**`diff-indexing-extract-primary-key`**(f8289cd974)|2582.323324s (43min) | x2.13|

So we're still suffering a speed-down of x2.13, but that's much better than x4.44.

---

Changes:

- Refactor the logic of PrimaryKey extraction to a struct
- Add a trait to abstract the extraction of field id from a name between `DocumentBatch` and `FieldIdMap`.
- Add `Index::external_id_of` to get the external ids of a bitmap of internal ids.
- Use this new method to add new Transform and Batch methods to remove documents that are known to be from the DB.
- Modify delete-by-filter to use the new method

Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2023-11-13 13:02:10 +00:00
772964125d Factor removal of document from DB 2023-11-13 13:51:22 +01:00
378deb0bef Rename trait 2023-11-13 13:38:36 +01:00
1f36410541 Update tests 2023-11-13 13:36:39 +01:00
b11f85a635 Merge #4205
4205: Prevent search hang on the processing index r=Kerollmops a=dureuill

Fixes #4206, an issue originally [reported on Discord](https://discord.com/channels/1006923006964154428/1148983671026618579/1148983671026618579) where having parallel search requests on more indexes than the index cache capacity would cause search requests on the currently updating index to hang until the index is done updating.

## Test setup

- Create 20 empty indexes by sending settings to them
- repeatedly send placeholder search requests to each of the indexes in a loop
- Create another index and send a significant batch of documents to index.
- Attempt to perform a search request on that last index.
  - Before this PR, the search request hangs while the index update task is processing
  - After this PR, the search request respond immediately even while the index update task is processing

## Changes

- When getting the handle to an index for some potentially long running batches of tasks, save it in the index scheduler.
- Drop the handle from the index-scheduler when the task is done so that we don't leak indexes.
- When getting an index from outside the task queue processor, check if there is such an handle matching the requested index. If so, skip the cache entirely and clone the handle.

Co-authored-by: Louis Dureuil <louis.dureuil@xinra.net>
Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2023-11-13 10:36:01 +00:00
a2d6dc8571 Fix typo, remove caching for the change of index 2023-11-13 10:44:36 +01:00
ee1701157f Merge #4204
4204: Throw error when the vector search is sent with the wrong size r=Kerollmops a=dureuill

# Pull Request

## Related issue
Fixes #4201 


Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2023-11-13 09:43:20 +00:00
8c649d8061 Throw error when the vector search is sent with the wrong size 2023-11-13 09:57:42 +01:00
492fc086f0 cargo fmt 2023-11-12 21:53:11 +01:00
a2d0c73b41 Save the currently updating index so that the search can access it at all times 2023-11-10 10:52:03 +01:00
264b10ec20 Fixup documentation 2023-11-09 16:23:20 +01:00
825257da76 Use more efficient method for deletion in benchmarks 2023-11-09 16:13:15 +01:00
f8289cd974 Use it from delete-by-filter 2023-11-09 14:23:15 +01:00
3053e01c05 Batch::remove_documents_from_db_no_batch 2023-11-09 14:23:02 +01:00
b11c2afac0 Index::external_id_of 2023-11-09 14:22:43 +01:00
9cef800b2a Enrich uses the new type 2023-11-09 14:22:05 +01:00
db2fb86b8b Extract PrimaryKey logic to a type 2023-11-09 14:19:16 +01:00
882ab9cc85 remove warnings 2023-11-09 11:35:33 +01:00
5a9c96e1db Compute word integer prefix cache 2023-11-09 11:34:26 +01:00
70ce40828c Compute word docids prefix cache 2023-11-08 17:01:00 +01:00
688266c83e Remove word pair proximity prefix cache and compute it at search time 2023-11-08 14:16:01 +01:00
6dab826908 Reactivate prefix databases 2023-11-08 13:58:01 +01:00
1e2fbc6a42 revert "REVERT ME: ignore prefix pair databases tests"
This reverts commit 1b2ea6cf19.
2023-11-08 11:50:52 +01:00
523519fdbf Merge pull request #4195 from meilisearch/diff-indexing-remove-from-batch
Remove `IndexOperation::DocumentDeletion`
2023-11-08 10:29:49 +01:00
ef6fa10f7a Remove IndexOperation::DocumentDeletion 2023-11-06 12:16:15 +01:00
620fee35f9 Fix benches 2023-11-06 11:56:46 +01:00
cbaa54cafd Fix clippy issues 2023-11-06 11:19:31 +01:00
1bccf2079e Correctly mark non-tests as non-tests 2023-11-06 11:03:56 +01:00
1b2ea6cf19 REVERT ME: ignore prefix pair databases tests 2023-11-06 10:46:22 +01:00
1ad1fcc8c8 Remove all warnings 2023-11-06 10:31:14 +01:00
48865470d7 Merge #4191
4191: Remove banner r=Kerollmops a=curquiza



Co-authored-by: Clémentine U. - curqui <clementine@meilisearch.com>
2023-11-02 17:14:23 +00:00
c810df4d9f Update README.md 2023-11-02 17:40:18 +01:00
87610a5f98 Don't try to delete a document that is not in the database 2023-11-02 16:49:03 +01:00
2544bc1416 Merge pull request #4160 from meilisearch/diff-indexing-vector-points
Diff Indexing for the vector points
2023-11-02 16:01:51 +01:00
ff522c919d Fix the vector extractions for the diff indexing 2023-11-02 15:58:08 +01:00
1c39459cf4 Merge pull request #4179 from meilisearch/diff-indexing-fix-nested-primary-key
Diff indexing fix nested primary key
2023-11-02 15:39:50 +01:00
bf0651f23c Implement iter method on ExternalDocumentsIds 2023-11-02 15:38:00 +01:00
5b20e625f3 fix merge 2023-11-02 15:31:37 +01:00
bc51d6157a Fix transform reindexing path 2023-11-02 15:26:20 +01:00
1b4ff991c0 update typed chunks 2023-11-02 15:26:20 +01:00
4b64c33aa2 update vector extractor 2023-11-02 15:26:20 +01:00
12323d610e Change the original document sorter key from the internal docid to a concatenation of the internal and the external docid 2023-11-02 15:26:20 +01:00
44e9033b3a Merge pull request #4181 from meilisearch/diff-indexing-parallel-transform
Use rayon to sort entries in parallel
2023-11-02 15:16:10 +01:00
4d864f0702 Always sort internal Sorter entries in parallel 2023-11-02 14:47:43 +01:00
5e3df76699 Merge #4183
4183: Bump docker/login-action from 2 to 3 r=curquiza a=dependabot[bot]

Bumps [docker/login-action](https://github.com/docker/login-action) from 2 to 3.
<details>
<summary>Release notes</summary>
<p><em>Sourced from <a href="https://github.com/docker/login-action/releases">docker/login-action's releases</a>.</em></p>
<blockquote>
<h2>v3.0.0</h2>
<ul>
<li>Node 20 as default runtime (requires <a href="https://github.com/actions/runner/releases/tag/v2.308.0">Actions Runner v2.308.0</a> or later) by <a href="https://github.com/crazy-max"><code>`@​crazy-max</code></a>` in <a href="https://redirect.github.com/docker/login-action/pull/593">docker/login-action#593</a></li>
<li>Bump <code>`@​actions/core</code>` from 1.10.0 to 1.10.1 in <a href="https://redirect.github.com/docker/login-action/pull/598">docker/login-action#598</a></li>
<li>Bump <code>`@​aws-sdk/client-ecr</code>` and <code>`@​aws-sdk/client-ecr-public</code>` to 3.410.0 in <a href="https://redirect.github.com/docker/login-action/pull/555">docker/login-action#555</a> <a href="https://redirect.github.com/docker/login-action/pull/560">docker/login-action#560</a> <a href="https://redirect.github.com/docker/login-action/pull/582">docker/login-action#582</a> <a href="https://redirect.github.com/docker/login-action/pull/599">docker/login-action#599</a></li>
<li>Bump semver from 6.3.0 to 6.3.1 in <a href="https://redirect.github.com/docker/login-action/pull/556">docker/login-action#556</a></li>
<li>Bump https-proxy-agent to 7.0.2 <a href="https://redirect.github.com/docker/login-action/pull/561">docker/login-action#561</a> <a href="https://redirect.github.com/docker/login-action/pull/588">docker/login-action#588</a></li>
</ul>
<p><strong>Full Changelog</strong>: <a href="https://github.com/docker/login-action/compare/v2.2.0...v3.0.0">https://github.com/docker/login-action/compare/v2.2.0...v3.0.0</a></p>
<h2>v2.2.0</h2>
<ul>
<li>Switch to actions-toolkit implementation by <a href="https://github.com/crazy-max"><code>`@​crazy-max</code></a>` in <a href="https://redirect.github.com/docker/login-action/pull/409">docker/login-action#409</a> <a href="https://redirect.github.com/docker/login-action/pull/470">docker/login-action#470</a> <a href="https://redirect.github.com/docker/login-action/pull/476">docker/login-action#476</a></li>
<li>Bump <code>`@​aws-sdk/client-ecr</code>` and <code>`@​aws-sdk/client-ecr-public</code>` to 3.347.1 in <a href="https://redirect.github.com/docker/login-action/pull/524">docker/login-action#524</a> <a href="https://redirect.github.com/docker/login-action/pull/364">docker/login-action#364</a> <a href="https://redirect.github.com/docker/login-action/pull/363">docker/login-action#363</a></li>
<li>Bump minimatch from 3.0.4 to 3.1.2 in <a href="https://redirect.github.com/docker/login-action/pull/354">docker/login-action#354</a></li>
<li>Bump json5 from 2.2.0 to 2.2.3 in <a href="https://redirect.github.com/docker/login-action/pull/378">docker/login-action#378</a></li>
<li>Bump http-proxy-agent from 5.0.0 to 7.0.0 in <a href="https://redirect.github.com/docker/login-action/pull/509">docker/login-action#509</a></li>
<li>Bump https-proxy-agent from 5.0.1 to 7.0.0 in <a href="https://redirect.github.com/docker/login-action/pull/508">docker/login-action#508</a></li>
</ul>
<p><strong>Full Changelog</strong>: <a href="https://github.com/docker/login-action/compare/v2.1.0...v2.2.0">https://github.com/docker/login-action/compare/v2.1.0...v2.2.0</a></p>
<h2>v2.1.0</h2>
<ul>
<li>Ensure AWS temp credentials are redacted in workflow logs by <a href="https://github.com/crazy-max"><code>`@​crazy-max</code></a>` (<a href="https://redirect.github.com/docker/login-action/issues/275">#275</a>)</li>
<li>Bump <code>`@​actions/core</code>` from 1.6.0 to 1.10.0 (<a href="https://redirect.github.com/docker/login-action/issues/252">#252</a> <a href="https://redirect.github.com/docker/login-action/issues/292">#292</a>)</li>
<li>Bump <code>`@​aws-sdk/client-ecr</code>` from 3.53.0 to 3.186.0 (<a href="https://redirect.github.com/docker/login-action/issues/298">#298</a>)</li>
<li>Bump <code>`@​aws-sdk/client-ecr-public</code>` from 3.53.0 to 3.186.0 (<a href="https://redirect.github.com/docker/login-action/issues/299">#299</a>)</li>
</ul>
<p><strong>Full Changelog</strong>: <a href="https://github.com/docker/login-action/compare/v2.0.0...v2.1.0">https://github.com/docker/login-action/compare/v2.0.0...v2.1.0</a></p>
</blockquote>
</details>
<details>
<summary>Commits</summary>
<ul>
<li><a href="343f7c4344"><code>343f7c4</code></a> Merge pull request <a href="https://redirect.github.com/docker/login-action/issues/599">#599</a> from docker/dependabot/npm_and_yarn/aws-sdk-dependenc...</li>
<li><a href="aad0f974f2"><code>aad0f97</code></a> chore: update generated content</li>
<li><a href="2e0cd39144"><code>2e0cd39</code></a> build(deps): bump the aws-sdk-dependencies group with 2 updates</li>
<li><a href="203bc9c4ef"><code>203bc9c</code></a> Merge pull request <a href="https://redirect.github.com/docker/login-action/issues/588">#588</a> from docker/dependabot/npm_and_yarn/proxy-agent-depen...</li>
<li><a href="2199648fc8"><code>2199648</code></a> chore: update generated content</li>
<li><a href="b489376173"><code>b489376</code></a> build(deps): bump the proxy-agent-dependencies group with 1 update</li>
<li><a href="7c309e74e6"><code>7c309e7</code></a> Merge pull request <a href="https://redirect.github.com/docker/login-action/issues/598">#598</a> from docker/dependabot/npm_and_yarn/actions/core-1.10.1</li>
<li><a href="0ccf222961"><code>0ccf222</code></a> chore: update generated content</li>
<li><a href="56d703e106"><code>56d703e</code></a> Merge pull request <a href="https://redirect.github.com/docker/login-action/issues/597">#597</a> from docker/dependabot/github_actions/aws-actions/con...</li>
<li><a href="24d3b3519e"><code>24d3b35</code></a> build(deps): bump <code>`@​actions/core</code>` from 1.10.0 to 1.10.1</li>
<li>Additional commits viewable in <a href="https://github.com/docker/login-action/compare/v2...v3">compare view</a></li>
</ul>
</details>
<br />


[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=docker/login-action&package-manager=github_actions&previous-version=2&new-version=3)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores)

You can trigger a rebase of this PR by commenting ``@dependabot` rebase`.

[//]: # (dependabot-automerge-start)
[//]: # (dependabot-automerge-end)

---

<details>
<summary>Dependabot commands and options</summary>
<br />

You can trigger Dependabot actions by commenting on this PR:
- ``@dependabot` rebase` will rebase this PR
- ``@dependabot` recreate` will recreate this PR, overwriting any edits that have been made to it
- ``@dependabot` merge` will merge this PR after your CI passes on it
- ``@dependabot` squash and merge` will squash and merge this PR after your CI passes on it
- ``@dependabot` cancel merge` will cancel a previously requested merge and block automerging
- ``@dependabot` reopen` will reopen this PR if it is closed
- ``@dependabot` close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually
- ``@dependabot` show <dependency name> ignore conditions` will show all of the ignore conditions of the specified dependency
- ``@dependabot` ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself)
- ``@dependabot` ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself)
- ``@dependabot` ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)


</details>

Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2023-11-02 13:18:13 +00:00
02765fb267 Merge #4184
4184: Bump actions/setup-node from 3 to 4 r=curquiza a=dependabot[bot]

Bumps [actions/setup-node](https://github.com/actions/setup-node) from 3 to 4.
<details>
<summary>Release notes</summary>
<p><em>Sourced from <a href="https://github.com/actions/setup-node/releases">actions/setup-node's releases</a>.</em></p>
<blockquote>
<h2>v4.0.0</h2>
<h2>What's Changed</h2>
<p>In scope of this release we changed version of node runtime for action from node16 to node20 and updated dependencies in <a href="https://redirect.github.com/actions/setup-node/pull/866">actions/setup-node#866</a></p>
<p>Besides, release contains such changes as:</p>
<ul>
<li>Upgrade actions/checkout to v4 by <a href="https://github.com/gmembre-zenika"><code>`@​gmembre-zenika</code></a>` in <a href="https://redirect.github.com/actions/setup-node/pull/868">actions/setup-node#868</a></li>
<li>Update actions/checkout for documentation and yaml by <a href="https://github.com/dmitry-shibanov"><code>`@​dmitry-shibanov</code></a>` in <a href="https://redirect.github.com/actions/setup-node/pull/876">actions/setup-node#876</a></li>
</ul>
<h2>New Contributors</h2>
<ul>
<li><a href="https://github.com/gmembre-zenika"><code>`@​gmembre-zenika</code></a>` made their first contribution in <a href="https://redirect.github.com/actions/setup-node/pull/868">actions/setup-node#868</a></li>
</ul>
<p><strong>Full Changelog</strong>: <a href="https://github.com/actions/setup-node/compare/v3...v4.0.0">https://github.com/actions/setup-node/compare/v3...v4.0.0</a></p>
<h2>v3.8.2</h2>
<h2>What's Changed</h2>
<ul>
<li>Update semver by <a href="https://github.com/dmitry-shibanov"><code>`@​dmitry-shibanov</code></a>` in <a href="https://redirect.github.com/actions/setup-node/pull/861">actions/setup-node#861</a></li>
<li>Update temp directory creation by <a href="https://github.com/nikolai-laevskii"><code>`@​nikolai-laevskii</code></a>` in <a href="https://redirect.github.com/actions/setup-node/pull/859">actions/setup-node#859</a></li>
<li>Bump <code>`@​babel/traverse</code>` from 7.15.4 to 7.23.2 by <a href="https://github.com/dependabot"><code>`@​dependabot</code></a>` in <a href="https://redirect.github.com/actions/setup-node/pull/870">actions/setup-node#870</a></li>
<li>Add notice about binaries not being updated yet by <a href="https://github.com/nikolai-laevskii"><code>`@​nikolai-laevskii</code></a>` in <a href="https://redirect.github.com/actions/setup-node/pull/872">actions/setup-node#872</a></li>
<li>Update toolkit cache and core by <a href="https://github.com/dmitry-shibanov"><code>`@​dmitry-shibanov</code></a>` and <a href="https://github.com/seongwon-privatenote"><code>`@​seongwon-privatenote</code></a>` in <a href="https://redirect.github.com/actions/setup-node/pull/875">actions/setup-node#875</a></li>
</ul>
<p><strong>Full Changelog</strong>: <a href="https://github.com/actions/setup-node/compare/v3...v3.8.2">https://github.com/actions/setup-node/compare/v3...v3.8.2</a></p>
<h2>v3.8.1</h2>
<h2>What's Changed</h2>
<p>In scope of this release, the filter was removed within the cache-save step by <a href="https://github.com/dmitry-shibanov"><code>`@​dmitry-shibanov</code></a>` in <a href="https://redirect.github.com/actions/setup-node/pull/831">actions/setup-node#831</a>. It is filtered and checked in the toolkit/cache library.</p>
<p><strong>Full Changelog</strong>: <a href="https://github.com/actions/setup-node/compare/v3...v3.8.1">https://github.com/actions/setup-node/compare/v3...v3.8.1</a></p>
<h2>v3.8.0</h2>
<h2>What's Changed</h2>
<h3>Bug fixes:</h3>
<ul>
<li>Add check for existing paths by <a href="https://github.com/dmitry-shibanov"><code>`@​dmitry-shibanov</code></a>` in <a href="https://redirect.github.com/actions/setup-node/pull/803">actions/setup-node#803</a></li>
<li>Resolve SymbolicLink by <a href="https://github.com/dmitry-shibanov"><code>`@​dmitry-shibanov</code></a>` in <a href="https://redirect.github.com/actions/setup-node/pull/809">actions/setup-node#809</a></li>
<li>Change passing logic for cache input by <a href="https://github.com/dmitry-shibanov"><code>`@​dmitry-shibanov</code></a>` in <a href="https://redirect.github.com/actions/setup-node/pull/816">actions/setup-node#816</a></li>
<li>Fix armv7 cache issue by <a href="https://github.com/louislam"><code>`@​louislam</code></a>` in <a href="https://redirect.github.com/actions/setup-node/pull/794">actions/setup-node#794</a></li>
<li>Update check-dist workflow name by <a href="https://github.com/sinchang"><code>`@​sinchang</code></a>` in <a href="https://redirect.github.com/actions/setup-node/pull/710">actions/setup-node#710</a></li>
</ul>
<h3>Feature implementations:</h3>
<ul>
<li>feat: handling the case where &quot;node&quot; is used for tool-versions file. by <a href="https://github.com/xytis"><code>`@​xytis</code></a>` in <a href="https://redirect.github.com/actions/setup-node/pull/812">actions/setup-node#812</a></li>
</ul>
<h3>Documentation changes:</h3>
<ul>
<li>Refer to semver package name in README.md by <a href="https://github.com/olleolleolle"><code>`@​olleolleolle</code></a>` in <a href="https://redirect.github.com/actions/setup-node/pull/808">actions/setup-node#808</a></li>
</ul>
<h3>Update dependencies:</h3>
<ul>
<li>Update toolkit cache to fix zstd by <a href="https://github.com/dmitry-shibanov"><code>`@​dmitry-shibanov</code></a>` in <a href="https://redirect.github.com/actions/setup-node/pull/804">actions/setup-node#804</a></li>
<li>Bump tough-cookie and <code>`@​azure/ms-rest-js</code>` by <a href="https://github.com/dependabot"><code>`@​dependabot</code></a>` in <a href="https://redirect.github.com/actions/setup-node/pull/802">actions/setup-node#802</a></li>
<li>Bump semver from 6.1.2 to 6.3.1 by <a href="https://github.com/dependabot"><code>`@​dependabot</code></a>` in <a href="https://redirect.github.com/actions/setup-node/pull/807">actions/setup-node#807</a></li>
</ul>
<!-- raw HTML omitted -->
</blockquote>
<p>... (truncated)</p>
</details>
<details>
<summary>Commits</summary>
<ul>
<li><a href="8f152de45c"><code>8f152de</code></a> Update actions/checkout for documentation and yaml (<a href="https://redirect.github.com/actions/setup-node/issues/876">#876</a>)</li>
<li><a href="23755b521f"><code>23755b5</code></a> upgrade actions/checkout to v4 (<a href="https://redirect.github.com/actions/setup-node/issues/868">#868</a>)</li>
<li><a href="54534a2a9b"><code>54534a2</code></a> Change node version for action to node20 (<a href="https://redirect.github.com/actions/setup-node/issues/866">#866</a>)</li>
<li>See full diff in <a href="https://github.com/actions/setup-node/compare/v3...v4">compare view</a></li>
</ul>
</details>
<br />


[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=actions/setup-node&package-manager=github_actions&previous-version=3&new-version=4)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores)

You can trigger a rebase of this PR by commenting ``@dependabot` rebase`.

[//]: # (dependabot-automerge-start)
[//]: # (dependabot-automerge-end)

---

<details>
<summary>Dependabot commands and options</summary>
<br />

You can trigger Dependabot actions by commenting on this PR:
- ``@dependabot` rebase` will rebase this PR
- ``@dependabot` recreate` will recreate this PR, overwriting any edits that have been made to it
- ``@dependabot` merge` will merge this PR after your CI passes on it
- ``@dependabot` squash and merge` will squash and merge this PR after your CI passes on it
- ``@dependabot` cancel merge` will cancel a previously requested merge and block automerging
- ``@dependabot` reopen` will reopen this PR if it is closed
- ``@dependabot` close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually
- ``@dependabot` show <dependency name> ignore conditions` will show all of the ignore conditions of the specified dependency
- ``@dependabot` ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself)
- ``@dependabot` ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself)
- ``@dependabot` ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)


</details>

Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2023-11-02 11:28:03 +00:00
841165d529 Merge #4185
4185: Bump Swatinem/rust-cache from 2.6.2 to 2.7.1 r=curquiza a=dependabot[bot]

Bumps [Swatinem/rust-cache](https://github.com/swatinem/rust-cache) from 2.6.2 to 2.7.1.
<details>
<summary>Release notes</summary>
<p><em>Sourced from <a href="https://github.com/swatinem/rust-cache/releases">Swatinem/rust-cache's releases</a>.</em></p>
<blockquote>
<h2>v2.7.0</h2>
<h2>What's Changed</h2>
<ul>
<li>Fix save-if documentation in readme by <a href="https://github.com/rukai"><code>`@​rukai</code></a>` in <a href="https://redirect.github.com/Swatinem/rust-cache/pull/166">Swatinem/rust-cache#166</a></li>
<li>Support for <code>trybuild</code> and similar macro testing tools by <a href="https://github.com/neysofu"><code>`@​neysofu</code></a>` in <a href="https://redirect.github.com/Swatinem/rust-cache/pull/168">Swatinem/rust-cache#168</a></li>
</ul>
<h2>New Contributors</h2>
<ul>
<li><a href="https://github.com/rukai"><code>`@​rukai</code></a>` made their first contribution in <a href="https://redirect.github.com/Swatinem/rust-cache/pull/166">Swatinem/rust-cache#166</a></li>
<li><a href="https://github.com/neysofu"><code>`@​neysofu</code></a>` made their first contribution in <a href="https://redirect.github.com/Swatinem/rust-cache/pull/168">Swatinem/rust-cache#168</a></li>
</ul>
<p><strong>Full Changelog</strong>: <a href="https://github.com/Swatinem/rust-cache/compare/v2.6.2...v2.7.0">https://github.com/Swatinem/rust-cache/compare/v2.6.2...v2.7.0</a></p>
</blockquote>
</details>
<details>
<summary>Changelog</summary>
<p><em>Sourced from <a href="https://github.com/Swatinem/rust-cache/blob/master/CHANGELOG.md">Swatinem/rust-cache's changelog</a>.</em></p>
<blockquote>
<h2>2.7.1</h2>
<ul>
<li>Update toml parser to fix parsing errors.</li>
</ul>
<h2>2.7.0</h2>
<ul>
<li>Properly cache <code>trybuild</code> tests.</li>
</ul>
</blockquote>
</details>
<details>
<summary>Commits</summary>
<ul>
<li><a href="3cf7f8cc28"><code>3cf7f8c</code></a> 2.7.1</li>
<li><a href="e03705e031"><code>e03705e</code></a> changelog</li>
<li><a href="b86d1c6caa"><code>b86d1c6</code></a> bump all the other dependencies too</li>
<li><a href="f27990c89a"><code>f27990c</code></a> Update Dependencies (<a href="https://redirect.github.com/swatinem/rust-cache/issues/172">#172</a>)</li>
<li><a href="a95ba19544"><code>a95ba19</code></a> 2.7.0</li>
<li><a href="82c8487d00"><code>82c8487</code></a> changelog</li>
<li><a href="67c46e7159"><code>67c46e7</code></a> Support for <code>trybuild</code> and similar macro testing tools (<a href="https://redirect.github.com/swatinem/rust-cache/issues/168">#168</a>)</li>
<li><a href="44b6087283"><code>44b6087</code></a> Fix save-if documentation in readme (<a href="https://redirect.github.com/swatinem/rust-cache/issues/166">#166</a>)</li>
<li>See full diff in <a href="https://github.com/swatinem/rust-cache/compare/v2.6.2...v2.7.1">compare view</a></li>
</ul>
</details>
<br />


[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=Swatinem/rust-cache&package-manager=github_actions&previous-version=2.6.2&new-version=2.7.1)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores)

You can trigger a rebase of this PR by commenting ``@dependabot` rebase`.

[//]: # (dependabot-automerge-start)
[//]: # (dependabot-automerge-end)

---

<details>
<summary>Dependabot commands and options</summary>
<br />

You can trigger Dependabot actions by commenting on this PR:
- ``@dependabot` rebase` will rebase this PR
- ``@dependabot` recreate` will recreate this PR, overwriting any edits that have been made to it
- ``@dependabot` merge` will merge this PR after your CI passes on it
- ``@dependabot` squash and merge` will squash and merge this PR after your CI passes on it
- ``@dependabot` cancel merge` will cancel a previously requested merge and block automerging
- ``@dependabot` reopen` will reopen this PR if it is closed
- ``@dependabot` close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually
- ``@dependabot` show <dependency name> ignore conditions` will show all of the ignore conditions of the specified dependency
- ``@dependabot` ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself)
- ``@dependabot` ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself)
- ``@dependabot` ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)


</details>

Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2023-11-02 10:48:25 +00:00
ea4a266f08 Merge #4182
4182: Bump mislav/bump-homebrew-formula-action from 2 to 3 r=curquiza a=dependabot[bot]

Bumps [mislav/bump-homebrew-formula-action](https://github.com/mislav/bump-homebrew-formula-action) from 2 to 3.
<details>
<summary>Release notes</summary>
<p><em>Sourced from <a href="https://github.com/mislav/bump-homebrew-formula-action/releases">mislav/bump-homebrew-formula-action's releases</a>.</em></p>
<blockquote>
<h2>bump-homebrew-formula 3.0</h2>
<h2>What's Changed</h2>
<ul>
<li>feat: bump to use node20 runtime by <a href="https://github.com/chenrui333"><code>`@​chenrui333</code></a>` in <a href="https://redirect.github.com/mislav/bump-homebrew-formula-action/pull/61">mislav/bump-homebrew-formula-action#61</a></li>
<li>Bump actions/checkout from 3 to 4 by <a href="https://github.com/dependabot"><code>`@​dependabot</code></a>` in <a href="https://redirect.github.com/mislav/bump-homebrew-formula-action/pull/63">mislav/bump-homebrew-formula-action#63</a></li>
<li>Bump <code>`@​vercel/ncc</code>` from 0.34.0 to 0.38.0 by <a href="https://github.com/dependabot"><code>`@​dependabot</code></a>` in <a href="https://redirect.github.com/mislav/bump-homebrew-formula-action/pull/67">mislav/bump-homebrew-formula-action#67</a></li>
<li>Bump <code>`@​actions/core</code>` from 1.9.1 to 1.10.1 by <a href="https://github.com/dependabot"><code>`@​dependabot</code></a>` in <a href="https://redirect.github.com/mislav/bump-homebrew-formula-action/pull/68">mislav/bump-homebrew-formula-action#68</a></li>
<li>Bump <code>`@​octokit/core</code>` from 3.5.1 to 5.0.0 by <a href="https://github.com/dependabot"><code>`@​dependabot</code></a>` in <a href="https://redirect.github.com/mislav/bump-homebrew-formula-action/pull/65">mislav/bump-homebrew-formula-action#65</a></li>
<li>Bump TypeScript from 4.7 to 5.2</li>
<li>Bump <code>`@​typescript-eslint/eslint-plugin</code>` from 5.43.0 to 6.7.2 by <a href="https://github.com/dependabot"><code>`@​dependabot</code></a>` in <a href="https://redirect.github.com/mislav/bump-homebrew-formula-action/pull/66">mislav/bump-homebrew-formula-action#66</a></li>
</ul>
<p><strong>Full Changelog</strong>: <a href="https://github.com/mislav/bump-homebrew-formula-action/compare/v2.4...v3.0">https://github.com/mislav/bump-homebrew-formula-action/compare/v2.4...v3.0</a></p>
<h2>bump-homebrew-formula 2.4</h2>
<h2>What's Changed</h2>
<ul>
<li>chore: use <code>/archive/refs/tags/${tagName}.tar.gz</code> rather than <code>/archive/${tagName}.tar.gz</code> by <a href="https://github.com/chenrui333"><code>`@​chenrui333</code></a>` in <a href="https://redirect.github.com/mislav/bump-homebrew-formula-action/pull/53">mislav/bump-homebrew-formula-action#53</a></li>
<li>Fix extracting version tags from GitHub download URLs by <a href="https://github.com/mislav"><code>`@​mislav</code></a>` in <a href="https://redirect.github.com/mislav/bump-homebrew-formula-action/pull/62">mislav/bump-homebrew-formula-action#62</a></li>
</ul>
<p><strong>Full Changelog</strong>: <a href="https://github.com/mislav/bump-homebrew-formula-action/compare/v2.3...v2.4">https://github.com/mislav/bump-homebrew-formula-action/compare/v2.3...v2.4</a></p>
<h2>bump-homebrew-formula 2.3</h2>
<h2>What's Changed</h2>
<ul>
<li>Fix formula path after sharding of homebrew-core by <a href="https://github.com/williammartin"><code>`@​williammartin</code></a>` in <a href="https://redirect.github.com/mislav/bump-homebrew-formula-action/pull/59">mislav/bump-homebrew-formula-action#59</a></li>
<li>(docs): fix if condition in example by <a href="https://github.com/christian-bromann"><code>`@​christian-bromann</code></a>` in <a href="https://redirect.github.com/mislav/bump-homebrew-formula-action/pull/54">mislav/bump-homebrew-formula-action#54</a></li>
<li>(docs): use environment files instead of set-output by <a href="https://github.com/kyu08"><code>`@​kyu08</code></a>` in <a href="https://redirect.github.com/mislav/bump-homebrew-formula-action/pull/57">mislav/bump-homebrew-formula-action#57</a></li>
<li>Bump word-wrap from 1.2.3 to 1.2.4 by <a href="https://github.com/dependabot"><code>`@​dependabot</code></a>` in <a href="https://redirect.github.com/mislav/bump-homebrew-formula-action/pull/55">mislav/bump-homebrew-formula-action#55</a></li>
</ul>
<h2>New Contributors</h2>
<ul>
<li><a href="https://github.com/christian-bromann"><code>`@​christian-bromann</code></a>` made their first contribution in <a href="https://redirect.github.com/mislav/bump-homebrew-formula-action/pull/54">mislav/bump-homebrew-formula-action#54</a></li>
<li><a href="https://github.com/kyu08"><code>`@​kyu08</code></a>` made their first contribution in <a href="https://redirect.github.com/mislav/bump-homebrew-formula-action/pull/57">mislav/bump-homebrew-formula-action#57</a></li>
<li><a href="https://github.com/williammartin"><code>`@​williammartin</code></a>` made their first contribution in <a href="https://redirect.github.com/mislav/bump-homebrew-formula-action/pull/59">mislav/bump-homebrew-formula-action#59</a></li>
</ul>
<p><strong>Full Changelog</strong>: <a href="https://github.com/mislav/bump-homebrew-formula-action/compare/v2.2...v2.3">https://github.com/mislav/bump-homebrew-formula-action/compare/v2.2...v2.3</a></p>
<h2>bump-homebrew-formula 2.2</h2>
<h2>What's Changed</h2>
<ul>
<li>Fix scenario with generated GITHUB_TOKEN by <a href="https://github.com/mislav"><code>`@​mislav</code></a>` in <a href="https://redirect.github.com/mislav/bump-homebrew-formula-action/pull/45">mislav/bump-homebrew-formula-action#45</a></li>
<li>Bump <code>`@​actions/core</code>` from 1.6.0 to 1.9.1 by <a href="https://github.com/dependabot"><code>`@​dependabot</code></a>` in <a href="https://redirect.github.com/mislav/bump-homebrew-formula-action/pull/39">mislav/bump-homebrew-formula-action#39</a></li>
<li>Bump minimatch from 3.0.4 to 3.1.2 by <a href="https://github.com/dependabot"><code>`@​dependabot</code></a>` in <a href="https://redirect.github.com/mislav/bump-homebrew-formula-action/pull/40">mislav/bump-homebrew-formula-action#40</a></li>
<li>Bump got and ava by <a href="https://github.com/dependabot"><code>`@​dependabot</code></a>` in <a href="https://redirect.github.com/mislav/bump-homebrew-formula-action/pull/41">mislav/bump-homebrew-formula-action#41</a></li>
</ul>
<p><strong>Full Changelog</strong>: <a href="https://github.com/mislav/bump-homebrew-formula-action/compare/v2.1...v2.2">https://github.com/mislav/bump-homebrew-formula-action/compare/v2.1...v2.2</a></p>
<h2>bump-homebrew-formula 2.1</h2>
<ul>
<li>Fix extracting complex tag names from GitHub archive and release download URLs <a href="https://redirect.github.com/mislav/bump-homebrew-formula-action/issues/37">mislav/bump-homebrew-formula-action#37</a></li>
</ul>
</blockquote>
</details>
<details>
<summary>Commits</summary>
<ul>
<li><a href="b3327118b2"><code>b332711</code></a> lib</li>
<li><a href="d1d8ac114e"><code>d1d8ac1</code></a> Merge remote-tracking branch 'origin/main' into v3</li>
<li><a href="cf2d00157f"><code>cf2d001</code></a> Fix calculating checksum for resource at download URL (<a href="https://redirect.github.com/mislav/bump-homebrew-formula-action/issues/77">#77</a>)</li>
<li><a href="2bcfdc9312"><code>2bcfdc9</code></a> Merge pull request <a href="https://redirect.github.com/mislav/bump-homebrew-formula-action/issues/72">#72</a> from mislav/dependabot/npm_and_yarn/octokit/plugin-res...</li>
<li><a href="5678601dcb"><code>5678601</code></a> Merge pull request <a href="https://redirect.github.com/mislav/bump-homebrew-formula-action/issues/74">#74</a> from mislav/dependabot/npm_and_yarn/eslint-8.50.0</li>
<li><a href="addc60eb43"><code>addc60e</code></a> Bump <code>`@​octokit/plugin-rest-endpoint-methods</code>` from 9.0.0 to 10.0.0</li>
<li><a href="44b3287225"><code>44b3287</code></a> Merge pull request <a href="https://redirect.github.com/mislav/bump-homebrew-formula-action/issues/75">#75</a> from mislav/dependabot/npm_and_yarn/octokit/core-5.0.1</li>
<li><a href="fda81994d7"><code>fda8199</code></a> Merge pull request <a href="https://redirect.github.com/mislav/bump-homebrew-formula-action/issues/71">#71</a> from mislav/dependabot/npm_and_yarn/octokit/request-er...</li>
<li><a href="2fd87fd7ea"><code>2fd87fd</code></a> Bump <code>`@​octokit/core</code>` from 5.0.0 to 5.0.1</li>
<li><a href="0c20930845"><code>0c20930</code></a> Bump eslint from 8.49.0 to 8.50.0</li>
<li>Additional commits viewable in <a href="https://github.com/mislav/bump-homebrew-formula-action/compare/v2...v3">compare view</a></li>
</ul>
</details>
<br />


[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=mislav/bump-homebrew-formula-action&package-manager=github_actions&previous-version=2&new-version=3)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores)

You can trigger a rebase of this PR by commenting ``@dependabot` rebase`.

[//]: # (dependabot-automerge-start)
[//]: # (dependabot-automerge-end)

---

<details>
<summary>Dependabot commands and options</summary>
<br />

You can trigger Dependabot actions by commenting on this PR:
- ``@dependabot` rebase` will rebase this PR
- ``@dependabot` recreate` will recreate this PR, overwriting any edits that have been made to it
- ``@dependabot` merge` will merge this PR after your CI passes on it
- ``@dependabot` squash and merge` will squash and merge this PR after your CI passes on it
- ``@dependabot` cancel merge` will cancel a previously requested merge and block automerging
- ``@dependabot` reopen` will reopen this PR if it is closed
- ``@dependabot` close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually
- ``@dependabot` show <dependency name> ignore conditions` will show all of the ignore conditions of the specified dependency
- ``@dependabot` ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself)
- ``@dependabot` ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself)
- ``@dependabot` ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)


</details>

Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2023-11-02 08:48:19 +00:00
49f069ed97 Bump Swatinem/rust-cache from 2.6.2 to 2.7.1
Bumps [Swatinem/rust-cache](https://github.com/swatinem/rust-cache) from 2.6.2 to 2.7.1.
- [Release notes](https://github.com/swatinem/rust-cache/releases)
- [Changelog](https://github.com/Swatinem/rust-cache/blob/master/CHANGELOG.md)
- [Commits](https://github.com/swatinem/rust-cache/compare/v2.6.2...v2.7.1)

---
updated-dependencies:
- dependency-name: Swatinem/rust-cache
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>
2023-11-01 17:57:42 +00:00
be16b99d40 Bump actions/setup-node from 3 to 4
Bumps [actions/setup-node](https://github.com/actions/setup-node) from 3 to 4.
- [Release notes](https://github.com/actions/setup-node/releases)
- [Commits](https://github.com/actions/setup-node/compare/v3...v4)

---
updated-dependencies:
- dependency-name: actions/setup-node
  dependency-type: direct:production
  update-type: version-update:semver-major
...

Signed-off-by: dependabot[bot] <support@github.com>
2023-11-01 17:57:38 +00:00
ec0c09d17c Bump docker/login-action from 2 to 3
Bumps [docker/login-action](https://github.com/docker/login-action) from 2 to 3.
- [Release notes](https://github.com/docker/login-action/releases)
- [Commits](https://github.com/docker/login-action/compare/v2...v3)

---
updated-dependencies:
- dependency-name: docker/login-action
  dependency-type: direct:production
  update-type: version-update:semver-major
...

Signed-off-by: dependabot[bot] <support@github.com>
2023-11-01 17:57:33 +00:00
a9230f6e6c Bump mislav/bump-homebrew-formula-action from 2 to 3
Bumps [mislav/bump-homebrew-formula-action](https://github.com/mislav/bump-homebrew-formula-action) from 2 to 3.
- [Release notes](https://github.com/mislav/bump-homebrew-formula-action/releases)
- [Commits](https://github.com/mislav/bump-homebrew-formula-action/compare/v2...v3)

---
updated-dependencies:
- dependency-name: mislav/bump-homebrew-formula-action
  dependency-type: direct:production
  update-type: version-update:semver-major
...

Signed-off-by: dependabot[bot] <support@github.com>
2023-11-01 17:57:30 +00:00
b10c060bf7 Cleanup TOML 2023-11-01 14:03:04 +01:00
e507ef5932 Slow the logging down 2023-11-01 13:49:32 +01:00
c71b1d33ae Sort entries using rayon in the transform sorters 2023-11-01 11:07:16 +01:00
0fc446c62f Add more timing logs to the Transform 2023-11-01 11:07:16 +01:00
0fb6acefc3 Add snapshots for facets 2023-10-31 17:11:08 +01:00
b1d1355b69 remove tests on soft-deleted 2023-10-31 16:36:27 +01:00
f19332466e Extract field value as values instead of Option<Value> 2023-10-31 16:36:27 +01:00
03ddb4f310 use deladd in facet update tests 2023-10-31 16:36:27 +01:00
c855cc2721 Remove unused test 2023-10-31 16:36:27 +01:00
da0503ef80 Fix document count 2023-10-31 16:36:27 +01:00
54f0ee1ed2 Merge #4167
4167: Introduce the `meilitool` command line interface r=Kerollmops a=Kerollmops

This PR introduces a small tool to help the Cloud team:
 - Clear the tasks queue by removing all the tasks
 - Dump a Meilisearch database without having to enqueue the task
 - Access this `meilitool` binary from the Docker Image

## TODO
 - [x] Modify the Docker File to ship with this new tool (`@curquiza,` could you review that, please?)
 - [x] Clear the tasks queue by removing all the tasks
   - [x] Add more logs to explain what is happening
   - [x] Clear the `update_files` folder
 - [x] Dump a Meilisearch database without having to enqueue the task
   - [x] Add more logs to explain what is happening
   - [x] Introduce a flag to skip dumping enqueued and processing tasks.
   - [x] Dump the instance uid.
   - [x] Dump the keys.
   - [x] Dump the tasks with the update files.
   - [x] Dump the index documents and settings.
   - [ ] ~Dump the experimental features~

Co-authored-by: Clément Renault <clement@meilisearch.com>
2023-10-31 14:05:22 +00:00
94206b0055 Update tests 2023-10-31 13:48:47 +01:00
b40253bf18 update snapshots 2023-10-31 10:30:48 +01:00
d8bf3f3fc2 Remove unused snapshots 2023-10-31 10:12:49 +01:00
9d59e8011a fix some tests 2023-10-31 10:08:36 +01:00
dad78cbf8d Bulk facet remove deletes keys from DB when value empty 2023-10-31 09:53:55 +01:00
4e91707a06 Rename test 2023-10-31 09:41:17 +01:00
de10f20732 Fix field distribution again 2023-10-30 17:47:22 +01:00
ce5647e730 Fix Dockerfile WORKDIR path 2023-10-30 17:27:59 +01:00
b57b818b67 Don't use the last version of clap 2023-10-30 16:57:31 +01:00
f7ea94e5f4 Modify the Dockerfile to compile meilisearch and meilitool 2023-10-30 16:32:17 +01:00
be395c7944 Change order of arguments to tokenizer_builder 2023-10-30 16:26:29 +01:00
9fedd8101a Fix tests 2023-10-30 15:11:07 +01:00
54d07a8da3 Update field distribution taking into account both deletions and additions 2023-10-30 14:47:51 +01:00
53382bb1b8 Introduce a new flag to skip dumping enqueued/processing tasks 2023-10-30 14:32:10 +01:00
5b004a2583 Add more logs to the dump exporter 2023-10-30 14:31:55 +01:00
13416ccbf7 Introduce a new meilitool to help the cloud team 2023-10-30 14:30:20 +01:00
58690dfb19 Fix tests compilation after changes to ExternalDocumentsIds API 2023-10-30 13:34:07 +01:00
abf424ebfc Remove unused FromIterator 2023-10-30 11:41:56 +01:00
dfab6293c9 Use an LMDB database to store the external documents ids 2023-10-30 11:41:23 +01:00
fdf3f7f627 Fix facet distribution test 2023-10-30 11:41:23 +01:00
6260cff65f Actually delete documents from DB when the merge function says so 2023-10-30 11:41:22 +01:00
8e0d9c9a5e Recover delete_documents tests that were too eagerly deleted 2023-10-30 11:41:22 +01:00
ae4ec8ea55 Add delete_document_using_wtxn to TempIndex 2023-10-30 11:41:22 +01:00
652ac3052d use new iterator in batch 2023-10-30 11:41:22 +01:00
9a2dccc3bc Add iterator to find external ids of a bitmap of internal ids 2023-10-30 11:41:22 +01:00
a35988550c Fix some snapshots 2023-10-30 11:41:22 +01:00
e78281785c Actually execute the transform even if there are only documents to delete 2023-10-30 11:41:22 +01:00
3c15881818 Add simple delete test 2023-10-30 11:41:22 +01:00
73c06d31d9 snapshot always display stuff in consistent order 2023-10-30 11:41:22 +01:00
290e773d23 remove more warnings and fix some tests 2023-10-30 11:41:22 +01:00
fa6c7f65ca Add TmpIndex::delete_documents 2023-10-30 11:41:22 +01:00
113527f466 Remove soft-deleted related methods from Index 2023-10-30 11:41:22 +01:00
c534a1b687 Stop using delete documents pipeline in batch runner 2023-10-30 11:41:22 +01:00
2263dff02b Stop using removed delete pipelines almost everywhere 2023-10-30 11:41:22 +01:00
d651b3ef01 Remove delete documents files 2023-10-30 11:41:20 +01:00
762b0b47e6 Use deladd merging function in chunks mergers 2023-10-30 11:40:20 +01:00
01d5eedf2f Remove some warnings 2023-10-30 11:40:20 +01:00
073f89db79 Fix facet tests 2023-10-30 11:40:20 +01:00
8370fbc92b Fix snaps 2023-10-30 11:40:20 +01:00
85f42fbc03 Handle external to internal id mapping from TypedChunk::Documents 2023-10-30 11:40:20 +01:00
c6b3c18c85 WIP: Comment out document deletion in other pipelines than update
TODO: fix calls to DELETE route
2023-10-30 11:40:20 +01:00
bafeb892a7 Modify Index after changes to ExternalDocumentsIds 2023-10-30 11:40:20 +01:00
8fb221dae3 Refactor ExternalDocumentsIds
- Remove soft deleted
- Add apply method that takes a list of operations to encapsulate modifications to the external -> internal mapping
2023-10-30 11:40:20 +01:00
5be569e3e2 Update obkv 2023-10-30 11:40:20 +01:00
946c762d28 WIP: reset documents in TypedChunk::Documents 2023-10-30 11:40:20 +01:00
cda6ca1ee6 Remove TypedChunk::NewDocumentIds 2023-10-30 11:40:18 +01:00
696fcf4d18 Fix document insertion into LMDB 2023-10-30 11:39:31 +01:00
476e4d3dbe Use value buffer instead of the initial value when writting the final result in the sorter 2023-10-30 11:39:31 +01:00
576fa9c6da Remove useless comment 2023-10-30 11:39:31 +01:00
77dcbff6b2 Remove and Insert the DelAdd geo points 2023-10-30 11:39:31 +01:00
544440c363 Ignore geo fields when the Del and Add content is the same 2023-10-30 11:39:31 +01:00
a3dae4db9b Extract the geo fields DelAdd and generate a new DelAdd obkv with it 2023-10-30 11:39:31 +01:00
ba90a5ec0e update extract fid word count docids 2023-10-30 11:39:31 +01:00
b26dc9aabe Explanatory code comment 2023-10-30 11:39:31 +01:00
66abac9364 Use specialized KvReaderDelAdd type
Co-authored-by: Clément Renault <clement@meilisearch.com>
2023-10-30 11:39:31 +01:00
59f88c14b3 Simplify facet update after removing Index::faceted_documents_ids 2023-10-30 11:39:29 +01:00
14832cb324 Remove Index::faceted_documents_ids 2023-10-30 11:37:32 +01:00
04ec293024 Facet Incremental update 2023-10-30 11:37:30 +01:00
f67ff3a738 Facets Bulk update 2023-10-30 11:36:40 +01:00
560e8f5613 Introduce the CboRoaringBitmapCodec merge_deladd_into and use it 2023-10-30 11:34:55 +01:00
2d3f15f82c Introduce a function to only serialize the Add side of a DelAdd obkv 2023-10-30 11:34:55 +01:00
40186bf403 Rename FieldIdWordCountDocids correctly 2023-10-30 11:34:50 +01:00
87e3d27878 update extract word pair proximity to support deladd obkvs 2023-10-30 11:34:02 +01:00
6bcf8b4f8c update extract word position docids 2023-10-30 11:34:02 +01:00
46aa75abdb update extract word docids 2023-10-30 11:34:02 +01:00
2597bbd107 Make script language docids map taking a tuple of roaring bitmaps expressing the deletions and the additions 2023-10-30 11:34:00 +01:00
e2bc054604 Update extract_facet_string_docids to support deladd obkvs 2023-10-30 11:32:36 +01:00
fcd3a1434d Update extract_facet_number_docids to support deladd obkvs 2023-10-30 11:31:04 +01:00
a82dee21e0 Rename docid_fid into fid_docid 2023-10-30 11:31:02 +01:00
bc45c1206d Implement all the facet extraction paths and simplify them 2023-10-30 11:29:08 +01:00
6ae4100f07 Generate the DelAdd for is_null, is_empty, and exists 2023-10-30 11:29:08 +01:00
0c47defeee Work on fid docid facet values rewrite 2023-10-30 11:29:06 +01:00
313b16bec2 Support diff indexing on extract_docid_word_positions 2023-10-30 11:24:19 +01:00
1dd97578a8 Make the transform struct return diff-based documents obkvs 2023-10-30 11:22:07 +01:00
f5ef69293b deactivate prefix dbs 2023-10-30 11:22:07 +01:00
1c5705c164 clean PR warnings 2023-10-30 11:22:05 +01:00
66c2c82a18 Split wpp in several sorters 2023-10-30 11:15:02 +01:00
28a8d0ccda Fix word pair proximity 2023-10-30 11:15:02 +01:00
96be85396d Use a vecDeque in wpp database 2023-10-30 11:15:02 +01:00
df9e5c8651 Generalize usage of CboRoaringBitmap codec to ease the use 2023-10-30 11:15:02 +01:00
b541d48847 Add buffer to the obkv writter 2023-10-30 11:15:02 +01:00
8ccf32d1a0 Compute word_fid_docids before word_docids and exact_word_docids 2023-10-30 11:15:02 +01:00
db1ca21231 add puffin in sorter into reeder function 2023-10-30 11:15:00 +01:00
11ea5acff9 Fix 2023-10-30 11:13:10 +01:00
8d77736a67 Fix fid_word_docids 2023-10-30 11:13:10 +01:00
748b333161 Add usefull debug assert before key insertion in database 2023-10-30 11:13:10 +01:00
17b647dfe5 Wip 2023-10-30 11:13:08 +01:00
2614e7d9ca Merge #4174
4174: Fix warnings r=dureuill a=irevoire

Fix all the warnings found in the CI: https://github.com/meilisearch/meilisearch/actions/runs/6622576021/job/17988323623

Co-authored-by: Tamo <tamo@meilisearch.com>
2023-10-30 10:12:54 +00:00
e7244aa485 fix warnings 2023-10-30 11:00:46 +01:00
9cacc82307 Merge #4169
4169: update charabia r=curquiza a=ManyTheFish

Update Charabia to v0.8.5 and add the new khmer tokenizer

Co-authored-by: ManyTheFish <many@meilisearch.com>
2023-10-26 17:21:30 +00:00
4c6fddb1cb update charabia 2023-10-26 17:01:10 +02:00
62ea81bef6 Merge #4132
4132: Extract the creation and last updated timestamp from v2 dumps r=irevoire a=vivek-26

# Pull Request

## Related issue
Fixes #2989

## What does this PR do?
This PR - 
- extracts the `created_at` and `updated_at` dates from v2 dumps.
- updates the unit tests.

## PR checklist
Please check if your PR fulfills the following requirements:
- [x] Does this PR fix an existing issue, or have you listed the changes applied in the PR description (and why they are needed)?
- [x] Have you read the contributing guidelines?
- [x] Have you made sure that the title is accurate and descriptive of the changes?

Thank you so much for contributing to Meilisearch!


Co-authored-by: Vivek Kumar <vivek.26@outlook.com>
2023-10-24 08:50:57 +00:00
f28f09ae2f update tests for v2 dumps 2023-10-24 14:10:46 +05:30
ca52021079 Merge #4154
4154: Update version for the next release (v1.5.0) in Cargo.toml r=curquiza a=meili-bot

⚠️ This PR is automatically generated. Check the new version is the expected one and Cargo.lock has been updated before merging.

Co-authored-by: curquiza <curquiza@users.noreply.github.com>
2023-10-23 12:00:50 +00:00
ee6f79d60b Update version for the next release (v1.5.0) in Cargo.toml 2023-10-23 11:49:07 +00:00
e4c24ca6a3 Merge #4151
4151: Bring back changes from v1.4.2 into `release-v1.5.0` r=dureuill a=curquiza

This will bring the fixes in v1.4.2 for v1.5.0 release

Co-authored-by: curquiza <curquiza@users.noreply.github.com>
Co-authored-by: Vivek Kumar <vivek.26@outlook.com>
Co-authored-by: Louis Dureuil <louis.dureuil@gmail.com>
2023-10-23 10:11:11 +00:00
2bae9550c8 Add explanatory comment 2023-10-23 12:06:28 +02:00
32c78ac8b1 add/update tests when search with distinct attribute & pagination with no ranking 2023-10-23 12:06:27 +02:00
5fe7c4545a compute all candidates correctly when skipping 2023-10-23 12:02:45 +02:00
2042229927 Update version for the next release (v1.4.2) in Cargo.toml 2023-10-23 12:02:45 +02:00
eae9eab181 Merge #4126
4126: Make the experimental route /metrics activable via HTTP r=dureuill a=braddotcoffee

# Pull Request

## Related issue
Closes #4086

## What does this PR do?
- [x] Make `/metrics` available via HTTP as described in #4086 
- [x] The users can still launch Meilisearch using the `--experimental-enable-metrics` flag.
- [x] If the flag `--experimental-enable-metrics` is activated, a call to the `GET /experimental-features` route right after the launch will show `"metrics": true` even if the user has not called the `PATCH /experimental-features` route yet.
- [x] Even if the --experimental-enable-metrics flag is present at launch, calling the `PATCH /experimental-features` route with `"metrics": false` disables the experimental feature.
- [x] Update the spec
    - I was unable to find docs in this repository to update about the `/experimental-features` endpoint. I'll happily update if you point me in the right direction!

## PR checklist
Please check if your PR fulfills the following requirements:
- [x] Does this PR fix an existing issue, or have you listed the changes applied in the PR description (and why they are needed)?
- [x] Have you read the contributing guidelines?
- [x] Have you made sure that the title is accurate and descriptive of the changes?

Co-authored-by: bwbonanno <bradfordbonanno@gmail.com>
Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2023-10-23 08:51:37 +00:00
cf8dad1ca0 index_scheduler.features() is no longer fallible 2023-10-23 10:38:56 +02:00
dd619913da Use RwLock to never persist cli state to db 2023-10-19 12:45:57 -07:00
9b55ff16e9 Merge #4134
4134: Bump rustix from 0.36.15 to 0.36.16 r=Kerollmops a=dependabot[bot]

Bumps [rustix](https://github.com/bytecodealliance/rustix) from 0.36.15 to 0.36.16.
<details>
<summary>Commits</summary>
<ul>
<li><a href="6534992521"><code>6534992</code></a> chore: Release rustix version 0.36.16</li>
<li><a href="4928cf7a38"><code>4928cf7</code></a> Disable riscv64 testing.</li>
<li><a href="8cc159c4c3"><code>8cc159c</code></a> Fix the <code>test_ttyname_ok</code> test when /dev/stdin is inaccessable. (<a href="https://redirect.github.com/bytecodealliance/rustix/issues/821">#821</a>)</li>
<li><a href="6dc7ba9478"><code>6dc7ba9</code></a> Downgrade dependencies and disable tests to compile under Rust 1.48.</li>
<li><a href="ded8986e7e"><code>ded8986</code></a> Disable MIPS in CI. (<a href="https://redirect.github.com/bytecodealliance/rustix/issues/793">#793</a>)</li>
<li><a href="739f9c3ba0"><code>739f9c3</code></a> Fixes for <code>Dir</code> on macOS, FreeBSD, and WASI.</li>
<li><a href="87481a97f4"><code>87481a9</code></a> Merge pull request from GHSA-c827-hfw6-qwvm</li>
<li>See full diff in <a href="https://github.com/bytecodealliance/rustix/compare/v0.36.15...v0.36.16">compare view</a></li>
</ul>
</details>
<br />


[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=rustix&package-manager=cargo&previous-version=0.36.15&new-version=0.36.16)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores)

Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting ``@dependabot` rebase`.

[//]: # (dependabot-automerge-start)
[//]: # (dependabot-automerge-end)

---

<details>
<summary>Dependabot commands and options</summary>
<br />

You can trigger Dependabot actions by commenting on this PR:
- ``@dependabot` rebase` will rebase this PR
- ``@dependabot` recreate` will recreate this PR, overwriting any edits that have been made to it
- ``@dependabot` merge` will merge this PR after your CI passes on it
- ``@dependabot` squash and merge` will squash and merge this PR after your CI passes on it
- ``@dependabot` cancel merge` will cancel a previously requested merge and block automerging
- ``@dependabot` reopen` will reopen this PR if it is closed
- ``@dependabot` close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually
- ``@dependabot` show <dependency name> ignore conditions` will show all of the ignore conditions of the specified dependency
- ``@dependabot` ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself)
- ``@dependabot` ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself)
- ``@dependabot` ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)
You can disable automated security fix PRs for this repo from the [Security Alerts page](https://github.com/meilisearch/meilisearch/network/alerts).

</details>

Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2023-10-19 08:01:36 +00:00
e761db582f Bump rustix from 0.36.15 to 0.36.16
Bumps [rustix](https://github.com/bytecodealliance/rustix) from 0.36.15 to 0.36.16.
- [Release notes](https://github.com/bytecodealliance/rustix/releases)
- [Commits](https://github.com/bytecodealliance/rustix/compare/v0.36.15...v0.36.16)

---
updated-dependencies:
- dependency-name: rustix
  dependency-type: indirect
...

Signed-off-by: dependabot[bot] <support@github.com>
2023-10-18 18:42:12 +00:00
d8c649b3cd Return recoverable error if we fail to retrieve metrics state 2023-10-18 08:28:24 -07:00
5e0485d8dd Merge #4131
4131: Reduce proximity range from 7 to 3 r=Kerollmops a=ManyTheFish

## Summary
This PR aims to reduce the impact of the proximity databases on the indexing time and on the database size by reducing the maximum distance between two words to be indexed in the proximity database.

## Stats

### Impact on database size and indexing time
![Impact on datasets](https://github.com/meilisearch/meilisearch/assets/6482087/28ed3d96-bdde-41c1-bdac-e90c1b1dbb23)

### Impact on search relevancy

<details>

| dataset_name | host_name        | Relevancy rate (Precision) | completion_rate  25.00% | completion_rate 50.00% | completion_rate 75.00% | completion_rate 100.00% |
|--------------|------------------|------------------------------------|-----------------|-----------------|-----------------|-----------------|
| FBIS         | 1_4_0            | percentile-10 |           0.00% |           0.00% |           0.00% |           0.00% |
| FBIS         | 1_4_0            | percentile-25 |           0.00% |           0.00% |           0.00% |           0.00% |
| FBIS         | 1_4_0            | percentile-50 |           0.00% |           0.00% |           5.00% |           5.56% |
| FBIS         | 1_4_0            | percentile-75 |           0.00% |          12.50% |          35.00% |          45.00% |
| FBIS         | 1_4_0            | percentile-90 |          20.00% |          40.00% |                 |         100.00% |
| FBIS         | 1_4_0            | average       |           5.78% |          11.16% |          21.90% |          26.29% |
| FBIS         | reduce_proximity | percentile-10 |           0.00% |           0.00% |           0.00% |           0.00% |
| FBIS         | reduce_proximity | percentile-25 |           0.00% |           0.00% |           0.00% |           0.00% |
| FBIS         | reduce_proximity | percentile-50 |           0.00% |           0.00% |           5.00% |           5.56% |
| FBIS         | reduce_proximity | percentile-75 |           0.00% |          15.00% |          35.00% |          40.00% |
| FBIS         | reduce_proximity | percentile-90 |          20.00% |          40.00% |          85.00% |         100.00% |
| FBIS         | reduce_proximity | average       |           5.55% |          11.34% |          21.75% |          26.14% |
| FR94         | 1_4_0            | percentile-10 |           0.00% |           0.00% |           0.00% |           0.00% |
| FR94         | 1_4_0            | percentile-25 |           0.00% |           0.00% |           0.00% |           0.00% |
| FR94         | 1_4_0            | percentile-50 |           0.00% |           0.00% |           0.00% |           0.00% |
| FR94         | 1_4_0            | percentile-75 |           0.00% |           5.00% |          15.00% |          42.11% |
| FR94         | 1_4_0            | percentile-90 |          15.00% |          54.55% |         100.00% |         100.00% |
| FR94         | 1_4_0            | average       |           5.95% |          12.07% |          18.70% |          25.57% |
| FR94         | reduce_proximity | percentile-10 |           0.00% |           0.00% |           0.00% |           0.00% |
| FR94         | reduce_proximity | percentile-25 |           0.00% |           0.00% |           0.00% |           0.00% |
| FR94         | reduce_proximity | percentile-50 |           0.00% |           0.00% |           0.00% |           0.00% |
| FR94         | reduce_proximity | percentile-75 |           0.00% |           5.00% |          15.00% |          42.11% |
| FR94         | reduce_proximity | percentile-90 |          15.00% |          54.55% |         100.00% |         100.00% |
| FR94         | reduce_proximity | average       |           5.79% |          12.00% |          18.70% |          25.53% |
| FT           | 1_4_0            | percentile-10 |           0.00% |           0.00% |           0.00% |           0.00% |
| FT           | 1_4_0            | percentile-25 |           0.00% |           0.00% |           0.00% |           0.00% |
| FT           | 1_4_0            | percentile-50 |           0.00% |           0.00% |           5.00% |          10.00% |
| FT           | 1_4_0            | percentile-75 |           0.00% |          15.00% |          30.00% |          40.00% |
| FT           | 1_4_0            | percentile-90 |          20.00% |          50.00% |          65.00% |         100.00% |
| FT           | 1_4_0            | average       |           5.08% |          12.58% |          20.00% |          25.49% |
| FT           | reduce_proximity | percentile-10 |           0.00% |           0.00% |           0.00% |           0.00% |
| FT           | reduce_proximity | percentile-25 |           0.00% |           0.00% |           0.00% |           0.00% |
| FT           | reduce_proximity | percentile-50 |           0.00% |           0.00% |           5.00% |          10.00% |
| FT           | reduce_proximity | percentile-75 |           0.00% |          15.00% |          30.00% |          40.00% |
| FT           | reduce_proximity | percentile-90 |          10.00% |          45.00% |          60.00% |         100.00% |
| FT           | reduce_proximity | average       |           5.01% |          12.64% |          20.10% |          25.53% |
| LAT          | 1_4_0            | percentile-10 |           0.00% |           0.00% |           0.00% |           0.00% |
| LAT          | 1_4_0            | percentile-25 |           0.00% |           0.00% |           0.00% |           0.00% |
| LAT          | 1_4_0            | percentile-50 |           0.00% |           0.00% |           5.00% |           5.00% |
| LAT          | 1_4_0            | percentile-75 |           5.00% |          15.00% |          30.00% |          30.00% |
| LAT          | 1_4_0            | percentile-90 |          15.00% |          45.00% |          60.00% |          80.00% |
| LAT          | 1_4_0            | average       |           4.80% |          11.80% |          17.88% |          21.62% |
| LAT          | reduce_proximity | percentile-10 |           0.00% |           0.00% |           0.00% |           0.00% |
| LAT          | reduce_proximity | percentile-25 |           0.00% |           0.00% |           0.00% |           0.00% |
| LAT          | reduce_proximity | percentile-50 |           0.00% |           0.00% |           5.00% |           5.00% |
| LAT          | reduce_proximity | percentile-75 |           0.00% |          11.11% |          25.00% |          35.00% |
| LAT          | reduce_proximity | percentile-90 |          15.00% |          45.00% |          55.00% |          80.00% |
| LAT          | reduce_proximity | average       |           4.43% |          11.23% |          17.32% |          21.45% |

</details>

### Impact on Search time

| dataset_name | host_name        |      25.00% |      50.00% |      75.00% |     100.00% | Average     |
|--------------|------------------|------------:|------------:|------------:|------------:|-------------|
| FBIS         | 1_4_0            |        3.45 | 7.446666667 | 9.773489933 | 9.620300752 | 7.572614338 |
| FBIS         | reduce_proximity | 2.983333333 | 5.316666667 | 6.911073826 | 7.637218045 | 5.712072968 |
| FR94         | 1_4_0            | 2.236666667 |        4.45 | 5.523489933 | 4.560150376 | 4.192576744 |
| FR94         | reduce_proximity |        2.09 | 3.991666667 | 4.981543624 | 4.266917293 | 3.832531896 |
| FT           | 1_4_0            | 5.956666667 | 9.656666667 | 13.86912752 | 10.83270677 |  10.0787919 |
| FT           | reduce_proximity |        4.51 | 5.981666667 | 7.701342282 | 6.766917293 |  6.23998156 |
| LAT          | 1_4_0            | 5.856666667 | 9.233333333 | 12.98322148 | 10.78759398 | 9.715203865 |
| LAT          | reduce_proximity |        6.91 | 6.706666667 | 8.463087248 | 8.265037594 | 7.586197877 |

## Technical approach

- Ensure the MAX_DISTANCE constant is used everywhere needed
- Reduce the MAX_DISTANCE from 8 to 4

## Related

TBD

Co-authored-by: ManyTheFish <many@meilisearch.com>
2023-10-18 14:56:08 +00:00
27eec21415 Fix tests 2023-10-18 16:03:22 +02:00
62cc97ba70 update tests to include created_at and updated-at in v2 dumps 2023-10-18 13:31:39 +05:30
fed59cc1d5 extract created_at and updated_at dates from v2 dumps 2023-10-18 13:30:24 +05:30
2b3adef796 Use index_scheduler from configured app_data in middleware 2023-10-17 08:17:13 -07:00
956cfc5487 Add runtime check to metrics middleware 2023-10-16 13:48:57 -07:00
12fc878640 Merge remote-tracking branch 'origin/main' into enable-metrics-http 2023-10-16 13:48:01 -07:00
0a2e8b92a9 Merge #4129
4129: Add webinar banner in README r=curquiza a=curquiza



Co-authored-by: curquiza <clementine@meilisearch.com>
2023-10-16 17:35:48 +00:00
c7a3f80de6 Merge #4073
4073: Simplify Puffin report exports r=ManyTheFish a=Kerollmops

This PR changes how we export Puffin reports by directly writing them to disk when the `exportPuffinReports` [experimental feature is enabled](https://www.meilisearch.com/docs/learn/experimental/overview) on the `/experimental-features` route. It also adds more puffing logging to the deletion phase and grenad helpers. The puffin reports are identified by the date and time at which they are exported.

## Todo List
 - [x] Change the CLI flag to be an API experimental option.
 - [x] Create [a PRD for this experimental feature (private)](https://www.notion.so/meilisearch/Export-Puffin-Reports-091df151e71c4edfb7d72f4bf995b3ea).
 - [x] Create and complete [a product discussion](https://github.com/meilisearch/product/discussions/693) (copy/paste PROFILING markdown?).
 - [x] Update the _PROFILING.md_ markdown file instructions.
 - [x] Change the debug logs of the processing operation (visible in puffin viewer).

Co-authored-by: Clément Renault <clement@meilisearch.com>
Co-authored-by: Kerollmops <clement@meilisearch.com>
2023-10-16 15:48:15 +00:00
029d4de043 Add webinar banner in README 2023-10-16 14:38:10 +02:00
549f1bcccf Merge #4125
4125: Rename benchmark CI file to find it easily in the manifest list r=Kerollmops a=curquiza



Co-authored-by: curquiza <clementine@meilisearch.com>
2023-10-16 11:38:28 +00:00
689ec7c7ad Make the experimental route /metrics activable via HTTP 2023-10-13 22:12:54 +00:00
3655d4bdca Move the puffin file export logic into the run function 2023-10-13 13:11:30 +02:00
055ca3935b Update index-scheduler/src/batch.rs
Co-authored-by: Tamo <tamo@meilisearch.com>
2023-10-13 13:11:30 +02:00
1b8871a585 Make cargo insta happy 2023-10-13 13:11:30 +02:00
bf8fac6676 Fix the tests 2023-10-13 13:11:30 +02:00
f2a9e1ebbb Improve the debugging experience in the puffin reports 2023-10-13 13:11:30 +02:00
c45c6cf54c Update the PROFILING.md file 2023-10-13 13:11:30 +02:00
513e61e9a3 Remove the experimental CLI flag 2023-10-13 13:11:29 +02:00
90a626bf80 Use the runtime feature to enable puffin report exporting 2023-10-13 13:11:29 +02:00
0d4acf2daa Fix the metrics product URL 2023-10-13 13:11:29 +02:00
58db8d85ec Add the exportPuffinReports option to the runtime features route 2023-10-13 13:11:29 +02:00
62dfd09dc6 Add more puffin logs to the deletion functions 2023-10-13 13:11:09 +02:00
656dadabea Expose an experimental flag to write the puffin reports to disk 2023-10-13 13:11:09 +02:00
c5f7893fbb Remove the puffin http dependency 2023-10-13 13:11:08 +02:00
8cf2ccf168 Rename benchmark CI file to find it easily in the manifest list 2023-10-12 18:41:26 +02:00
0913373a5e Merge #4122
4122: Bring back changes from `release-v1.4.1` into `main` r=Kerollmops a=curquiza



Co-authored-by: curquiza <curquiza@users.noreply.github.com>
Co-authored-by: meili-bors[bot] <89034592+meili-bors[bot]@users.noreply.github.com>
Co-authored-by: Tamo <tamo@meilisearch.com>
Co-authored-by: Vivek Kumar <vivek.26@outlook.com>
Co-authored-by: Clément Renault <clement@meilisearch.com>
2023-10-12 15:57:47 +00:00
1a7f1282af Fix test to use new common Value type 2023-10-12 17:37:04 +02:00
bc747aac3a Cut the first 8 characters 2023-10-12 15:04:37 +02:00
be92376ab3 Fix originating commit branch 2023-10-12 13:51:41 +02:00
cf7e355735 Fix originating commit command 2023-10-12 13:12:53 +02:00
5f09d89ad1 Fetch the whole git history when cloning 2023-10-12 12:25:26 +02:00
6ecb26a3f8 Add more info on the commenting CI command 2023-10-12 11:54:56 +02:00
76c6f554d6 Merge #4101
4101: Bump webpki from 0.22.1 to 0.22.2 r=curquiza a=dependabot[bot]

Bumps [webpki](https://github.com/briansmith/webpki) from 0.22.1 to 0.22.2.
<details>
<summary>Commits</summary>
<ul>
<li>See full diff in <a href="https://github.com/briansmith/webpki/commits">compare view</a></li>
</ul>
</details>
<br />


[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=webpki&package-manager=cargo&previous-version=0.22.1&new-version=0.22.2)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores)

Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting ``@dependabot` rebase`.

[//]: # (dependabot-automerge-start)
[//]: # (dependabot-automerge-end)

---

<details>
<summary>Dependabot commands and options</summary>
<br />

You can trigger Dependabot actions by commenting on this PR:
- ``@dependabot` rebase` will rebase this PR
- ``@dependabot` recreate` will recreate this PR, overwriting any edits that have been made to it
- ``@dependabot` merge` will merge this PR after your CI passes on it
- ``@dependabot` squash and merge` will squash and merge this PR after your CI passes on it
- ``@dependabot` cancel merge` will cancel a previously requested merge and block automerging
- ``@dependabot` reopen` will reopen this PR if it is closed
- ``@dependabot` close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually
- ``@dependabot` show <dependency name> ignore conditions` will show all of the ignore conditions of the specified dependency
- ``@dependabot` ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself)
- ``@dependabot` ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself)
- ``@dependabot` ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)
You can disable automated security fix PRs for this repo from the [Security Alerts page](https://github.com/meilisearch/meilisearch/network/alerts).

</details>

Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2023-10-12 08:46:04 +00:00
f343ef5f2f Merge #4108
4108: Fix bug where search with distinct attribute and no ranking, returns offset+limit hits r=curquiza a=vivek-26

# Pull Request

## Related issue
Fixes #4078 

## What does this PR do?
This PR - 
- Fixes bug where search with distinct attribute and no ranking, returns offset+limit hits.
- Adds unit and integration tests.

## PR checklist
Please check if your PR fulfills the following requirements:
- [x] Does this PR fix an existing issue, or have you listed the changes applied in the PR description (and why they are needed)?
- [x] Have you read the contributing guidelines?
- [x] Have you made sure that the title is accurate and descriptive of the changes?

Thank you so much for contributing to Meilisearch!


Co-authored-by: Vivek Kumar <vivek.26@outlook.com>
2023-10-12 07:51:29 +00:00
96982a768a Triggers for every type of issue_comment 2023-10-11 23:18:29 +02:00
fca78fbc46 Merge #4082
4082: Update sprint_issue.md r=curquiza a=curquiza

Following internal recent discussions

Co-authored-by: Clémentine U. - curqui <clementine@meilisearch.com>
2023-10-11 15:12:38 +00:00
67a678cfb6 Merge #4089
4089: Use a bufreader and bufwriter everytime there is a grenad<file> r=curquiza a=irevoire

# Pull Request
Wrap all the files we give to a grenad in a `BufReader` or `BufWriter`.

The dump import I tried in the issue went from 2h to 10 minutes on my machine.

I also ran a bunch of benchmarks on my machine, and we're faster by a few seconds everywhere but nothing huge.

-----

The one thing I’m afraid about is if we used to get the inner file in a grenad and then do a read right after without a seek at the beginning of the file or a reopen.
Since we now use a bufreader our read would return the bytes one buffer later and probably completely corrupt what we were supposed to read.

From what I see, it looks like it works, but I may have missed something, I don't know much about this part of the codebase.

This issue should not arise on the bufwriter, though, because if we're not able to write the content of the buffer I ensured that the `into_inner` of the bufwriter should return an internal error.

## Related issue
Fixes #4087


Co-authored-by: Tamo <tamo@meilisearch.com>
2023-10-11 14:27:00 +00:00
d1331d8abf add integration test for distinct search with no ranking 2023-10-11 19:12:56 +05:30
19ba129165 add unit test for distinct search with no ranking 2023-10-11 19:02:27 +05:30
d4da06ff47 fix bug where distinct search with no ranking returns offset+limit hits 2023-10-11 19:02:16 +05:30
3e0471edae Only trigger CI on created or edited comments 2023-10-11 15:15:15 +02:00
432df03c4c Use the correct base filename in the comment bench CI 2023-10-11 14:57:03 +02:00
11958016dd Force a small if to evoid triggering the CI every time 2023-10-11 14:27:51 +02:00
63c250a04d Do not use the GITHUB_REF variable 2023-10-11 13:05:54 +02:00
06d8cd5b72 Make sure that we checkout on the right branch 2023-10-11 12:02:44 +02:00
c0f2724c2d get rids of the new introduced error code in favor of an io::Error 2023-10-10 15:12:23 +02:00
d772073dfa use a bufreader everytime there is a grenad<file> 2023-10-10 15:00:30 +02:00
8fe8ddea79 Merge #4112
4112: Update version for the next release (v1.4.1) in Cargo.toml r=curquiza a=meili-bot

⚠️ This PR is automatically generated. Check the new version is the expected one and Cargo.lock has been updated before merging.

Co-authored-by: curquiza <curquiza@users.noreply.github.com>
2023-10-10 09:05:10 +00:00
8a95bf28e5 Update version for the next release (v1.4.1) in Cargo.toml 2023-10-10 09:01:45 +00:00
c0fd3dffb8 Setup a Github Token env var 2023-10-09 18:04:49 +02:00
c42fd5375f Fix the git commands again 2023-10-09 17:36:19 +02:00
b418c3a756 Use the PAT token instead 2023-10-09 16:52:04 +02:00
1cde455758 Fix workflow CI 2023-10-09 16:30:46 +02:00
ca19bae72f Prefer using a action to manage commands 2023-10-09 14:56:41 +02:00
705878ff59 Merge #4102
4102: Introduce the first bot that shows benchmarks results r=curquiza a=Kerollmops

TBD

Co-authored-by: Kerollmops <clement@meilisearch.com>
Co-authored-by: Clément Renault <clement@meilisearch.com>
2023-10-05 10:11:06 +00:00
92c280d1c8 Update .github/workflows/trigger-benchmarks-on-message.yml
Co-authored-by: Clémentine U. - curqui <clementine@meilisearch.com>
2023-10-05 12:09:52 +02:00
181e7a1e53 Introduce the first bot that triggers benchmarks 2023-10-05 12:05:38 +02:00
2e5abb4d2c Merge #4098
4098: Bump docker/metadata-action from 4 to 5 r=curquiza a=dependabot[bot]

Bumps [docker/metadata-action](https://github.com/docker/metadata-action) from 4 to 5.
<details>
<summary>Release notes</summary>
<p><em>Sourced from <a href="https://github.com/docker/metadata-action/releases">docker/metadata-action's releases</a>.</em></p>
<blockquote>
<h2>v5.0.0</h2>
<ul>
<li>Node 20 as default runtime (requires <a href="https://github.com/actions/runner/releases/tag/v2.308.0">Actions Runner v2.308.0</a> or later) by <a href="https://github.com/crazy-max"><code>`@​crazy-max</code></a>` in <a href="https://redirect.github.com/docker/metadata-action/pull/328">docker/metadata-action#328</a></li>
<li>Bump <code>`@​actions/core</code>` from 1.10.0 to 1.10.1 in <a href="https://redirect.github.com/docker/metadata-action/pull/333">docker/metadata-action#333</a></li>
<li>Bump csv-parse from 5.4.0 to 5.5.0 in <a href="https://redirect.github.com/docker/metadata-action/pull/320">docker/metadata-action#320</a></li>
<li>Bump semver from 7.5.1 to 7.5.2 in <a href="https://redirect.github.com/docker/metadata-action/pull/304">docker/metadata-action#304</a></li>
<li>Bump handlebars from 4.7.7 to 4.7.8 in <a href="https://redirect.github.com/docker/metadata-action/pull/315">docker/metadata-action#315</a></li>
</ul>
<p><strong>Full Changelog</strong>: <a href="https://github.com/docker/metadata-action/compare/v4.6.0...v5.0.0">https://github.com/docker/metadata-action/compare/v4.6.0...v5.0.0</a></p>
<h2>v4.6.0</h2>
<ul>
<li>Dedup and sort labels by <a href="https://github.com/crazy-max"><code>`@​crazy-max</code></a>` in <a href="https://redirect.github.com/docker/metadata-action/pull/301">docker/metadata-action#301</a></li>
<li>Bump <code>`@​docker/actions-toolkit</code>` from 0.3.0 to 0.5.0 in <a href="https://redirect.github.com/docker/metadata-action/pull/302">docker/metadata-action#302</a></li>
</ul>
<p><strong>Full Changelog</strong>: <a href="https://github.com/docker/metadata-action/compare/v4.5.0...v4.6.0">https://github.com/docker/metadata-action/compare/v4.5.0...v4.6.0</a></p>
<h2>v4.5.0</h2>
<ul>
<li>Bump <code>`@​docker/actions-toolkit</code>` from 0.1.0 to 0.3.0 in <a href="https://redirect.github.com/docker/metadata-action/pull/296">docker/metadata-action#296</a></li>
<li>Bump csv-parse from 5.3.8 to 5.4.0 in <a href="https://redirect.github.com/docker/metadata-action/pull/294">docker/metadata-action#294</a></li>
</ul>
<p><strong>Full Changelog</strong>: <a href="https://github.com/docker/metadata-action/compare/v4.4.0...v4.5.0">https://github.com/docker/metadata-action/compare/v4.4.0...v4.5.0</a></p>
<h2>v4.4.0</h2>
<ul>
<li>Add <code>context</code> input to define the metadata provider by <a href="https://github.com/neilime"><code>`@​neilime</code></a>` in <a href="https://redirect.github.com/docker/metadata-action/pull/248">docker/metadata-action#248</a></li>
<li>Switch to actions-toolkit implementation by <a href="https://github.com/crazy-max"><code>`@​crazy-max</code></a>` in <a href="https://redirect.github.com/docker/metadata-action/pull/266">docker/metadata-action#266</a> <a href="https://redirect.github.com/docker/metadata-action/pull/273">docker/metadata-action#273</a> <a href="https://redirect.github.com/docker/metadata-action/pull/284">docker/metadata-action#284</a></li>
<li>Bump csv-parse from 5.3.3 to 5.3.8 in <a href="https://redirect.github.com/docker/metadata-action/pull/271">docker/metadata-action#271</a> <a href="https://redirect.github.com/docker/metadata-action/pull/286">docker/metadata-action#286</a></li>
<li>Bump moment-timezone from 0.5.40 to 0.5.43 in <a href="https://redirect.github.com/docker/metadata-action/pull/268">docker/metadata-action#268</a> <a href="https://redirect.github.com/docker/metadata-action/pull/278">docker/metadata-action#278</a> <a href="https://redirect.github.com/docker/metadata-action/pull/281">docker/metadata-action#281</a></li>
<li>Bump semver from 7.4.0 to 7.5.0 in <a href="https://redirect.github.com/docker/metadata-action/pull/285">docker/metadata-action#285</a></li>
</ul>
<p><strong>Full Changelog</strong>: <a href="https://github.com/docker/metadata-action/compare/v4.3.0...v4.4.0">https://github.com/docker/metadata-action/compare/v4.3.0...v4.4.0</a></p>
<h2>v4.3.0</h2>
<ul>
<li>Provide outputs as env vars by <a href="https://github.com/crazy-max"><code>`@​crazy-max</code></a>` (<a href="https://redirect.github.com/docker/metadata-action/issues/257">#257</a>)</li>
</ul>
<p><strong>Full Changelog</strong>: <a href="https://github.com/docker/metadata-action/compare/v4.2.0...v4.3.0">https://github.com/docker/metadata-action/compare/v4.2.0...v4.3.0</a></p>
<h2>v4.2.0</h2>
<ul>
<li>Add <code>tz</code> attribute to handlebar date function by <a href="https://github.com/chroju"><code>`@​chroju</code></a>` (<a href="https://redirect.github.com/docker/metadata-action/issues/251">#251</a>)</li>
<li>Bump minimatch from 3.0.4 to 3.1.2 (<a href="https://redirect.github.com/docker/metadata-action/issues/242">#242</a>)</li>
<li>Bump csv-parse from 5.3.1 to 5.3.3 (<a href="https://redirect.github.com/docker/metadata-action/issues/245">#245</a>)</li>
<li>Bump json5 from 2.2.0 to 2.2.3 (<a href="https://redirect.github.com/docker/metadata-action/issues/252">#252</a>)</li>
</ul>
<p><strong>Full Changelog</strong>: <a href="https://github.com/docker/metadata-action/compare/v4.1.1...v4.2.0">https://github.com/docker/metadata-action/compare/v4.1.1...v4.2.0</a></p>
<h2>v4.1.1</h2>
<ul>
<li>Revert changes to set associated head sha on pull request event by <a href="https://github.com/crazy-max"><code>`@​crazy-max</code></a>` (<a href="https://redirect.github.com/docker/metadata-action/issues/239">#239</a>)
<ul>
<li>User can still set associated head sha on PR by setting the env var <code>DOCKER_METADATA_PR_HEAD_SHA=true</code></li>
</ul>
</li>
<li>Bump csv-parse from 5.3.0 to 5.3.1 (<a href="https://redirect.github.com/docker/metadata-action/issues/237">#237</a>)</li>
</ul>
<p><strong>Full Changelog</strong>: <a href="https://github.com/docker/metadata-action/compare/v4.1.0...v4.1.1">https://github.com/docker/metadata-action/compare/v4.1.0...v4.1.1</a></p>
<!-- raw HTML omitted -->
</blockquote>
<p>... (truncated)</p>
</details>
<details>
<summary>Upgrade guide</summary>
<p><em>Sourced from <a href="https://github.com/docker/metadata-action/blob/master/UPGRADE.md">docker/metadata-action's upgrade guide</a>.</em></p>
<blockquote>
<h1>Upgrade notes</h1>
<h2>v2 to v3</h2>
<ul>
<li>Repository has been moved to docker org. Replace <code>crazy-max/ghaction-docker-meta@v2</code>
with <code>docker/metadata-action@v5</code></li>
<li>The default bake target has been changed: <code>ghaction-docker-meta</code> &gt; <code>docker-metadata-action</code></li>
</ul>
<h2>v1 to v2</h2>
<ul>
<li><a href="https://github.com/docker/metadata-action/blob/master/#inputs">inputs</a>
<ul>
<li><a href="https://github.com/docker/metadata-action/blob/master/#tag-sha"><code>tag-sha</code></a></li>
<li><a href="https://github.com/docker/metadata-action/blob/master/#tag-edge--tag-edge-branch"><code>tag-edge</code> / <code>tag-edge-branch</code></a></li>
<li><a href="https://github.com/docker/metadata-action/blob/master/#tag-semver"><code>tag-semver</code></a></li>
<li><a href="https://github.com/docker/metadata-action/blob/master/#tag-match--tag-match-group"><code>tag-match</code> / <code>tag-match-group</code></a></li>
<li><a href="https://github.com/docker/metadata-action/blob/master/#tag-latest"><code>tag-latest</code></a></li>
<li><a href="https://github.com/docker/metadata-action/blob/master/#tag-schedule"><code>tag-schedule</code></a></li>
<li><a href="https://github.com/docker/metadata-action/blob/master/#tag-custom--tag-custom-only"><code>tag-custom</code> / <code>tag-custom-only</code></a></li>
<li><a href="https://github.com/docker/metadata-action/blob/master/#label-custom"><code>label-custom</code></a></li>
</ul>
</li>
<li><a href="https://github.com/docker/metadata-action/blob/master/#basic-workflow">Basic workflow</a></li>
<li><a href="https://github.com/docker/metadata-action/blob/master/#semver-workflow">Semver workflow</a></li>
</ul>
<h3>inputs</h3>
<table>
<thead>
<tr>
<th>New</th>
<th>Unchanged</th>
<th>Removed</th>
</tr>
</thead>
<tbody>
<tr>
<td><code>tags</code></td>
<td><code>images</code></td>
<td><code>tag-sha</code></td>
</tr>
<tr>
<td><code>flavor</code></td>
<td><code>sep-tags</code></td>
<td><code>tag-edge</code></td>
</tr>
<tr>
<td><code>labels</code></td>
<td><code>sep-labels</code></td>
<td><code>tag-edge-branch</code></td>
</tr>
<tr>
<td></td>
<td></td>
<td><code>tag-semver</code></td>
</tr>
<tr>
<td></td>
<td></td>
<td><code>tag-match</code></td>
</tr>
<tr>
<td></td>
<td></td>
<td><code>tag-match-group</code></td>
</tr>
<tr>
<td></td>
<td></td>
<td><code>tag-latest</code></td>
</tr>
<tr>
<td></td>
<td></td>
<td><code>tag-schedule</code></td>
</tr>
<tr>
<td></td>
<td></td>
<td><code>tag-custom</code></td>
</tr>
<tr>
<td></td>
<td></td>
<td><code>tag-custom-only</code></td>
</tr>
<tr>
<td></td>
<td></td>
<td><code>label-custom</code></td>
</tr>
</tbody>
</table>
<h4><code>tag-sha</code></h4>
<pre lang="yaml"><code>tags: |
  type=sha
</code></pre>
<h4><code>tag-edge</code> / <code>tag-edge-branch</code></h4>
<pre lang="yaml"><code>tags: |
  # default branch
&lt;/tr&gt;&lt;/table&gt; 
</code></pre>
</blockquote>
<p>... (truncated)</p>
</details>
<details>
<summary>Commits</summary>
<ul>
<li><a href="96383f4557"><code>96383f4</code></a> Merge pull request <a href="https://redirect.github.com/docker/metadata-action/issues/320">#320</a> from docker/dependabot/npm_and_yarn/csv-parse-5.5.0</li>
<li><a href="f138b9677b"><code>f138b96</code></a> chore: update generated content</li>
<li><a href="9cf7015b15"><code>9cf7015</code></a> Bump csv-parse from 5.4.0 to 5.5.0</li>
<li><a href="5a8a5ff8df"><code>5a8a5ff</code></a> Merge pull request <a href="https://redirect.github.com/docker/metadata-action/issues/315">#315</a> from docker/dependabot/npm_and_yarn/handlebars-4.7.8</li>
<li><a href="2279d9af58"><code>2279d9a</code></a> chore: update generated content</li>
<li><a href="c659933213"><code>c659933</code></a> Bump handlebars from 4.7.7 to 4.7.8</li>
<li><a href="48d23ccc05"><code>48d23cc</code></a> Merge pull request <a href="https://redirect.github.com/docker/metadata-action/issues/333">#333</a> from docker/dependabot/npm_and_yarn/actions/core-1.10.1</li>
<li><a href="b83ffb48d6"><code>b83ffb4</code></a> chore: update generated content</li>
<li><a href="3207f2405f"><code>3207f24</code></a> Bump <code>`@​actions/core</code>` from 1.10.0 to 1.10.1</li>
<li><a href="63f4a263e5"><code>63f4a26</code></a> Merge pull request <a href="https://redirect.github.com/docker/metadata-action/issues/328">#328</a> from crazy-max/update-node20</li>
<li>Additional commits viewable in <a href="https://github.com/docker/metadata-action/compare/v4...v5">compare view</a></li>
</ul>
</details>
<br />


[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=docker/metadata-action&package-manager=github_actions&previous-version=4&new-version=5)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores)

You can trigger a rebase of this PR by commenting ``@dependabot` rebase`.

[//]: # (dependabot-automerge-start)
[//]: # (dependabot-automerge-end)

---

<details>
<summary>Dependabot commands and options</summary>
<br />

You can trigger Dependabot actions by commenting on this PR:
- ``@dependabot` rebase` will rebase this PR
- ``@dependabot` recreate` will recreate this PR, overwriting any edits that have been made to it
- ``@dependabot` merge` will merge this PR after your CI passes on it
- ``@dependabot` squash and merge` will squash and merge this PR after your CI passes on it
- ``@dependabot` cancel merge` will cancel a previously requested merge and block automerging
- ``@dependabot` reopen` will reopen this PR if it is closed
- ``@dependabot` close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually
- ``@dependabot` show <dependency name> ignore conditions` will show all of the ignore conditions of the specified dependency
- ``@dependabot` ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself)
- ``@dependabot` ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself)
- ``@dependabot` ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)


</details>

Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2023-10-04 08:40:29 +00:00
44aaf5d9e3 Bump docker/metadata-action from 4 to 5
Bumps [docker/metadata-action](https://github.com/docker/metadata-action) from 4 to 5.
- [Release notes](https://github.com/docker/metadata-action/releases)
- [Upgrade guide](https://github.com/docker/metadata-action/blob/master/UPGRADE.md)
- [Commits](https://github.com/docker/metadata-action/compare/v4...v5)

---
updated-dependencies:
- dependency-name: docker/metadata-action
  dependency-type: direct:production
  update-type: version-update:semver-major
...

Signed-off-by: dependabot[bot] <support@github.com>
2023-10-03 23:09:04 +00:00
ff0ababf65 Merge #4097
4097: Bump docker/setup-buildx-action from 2 to 3 r=curquiza a=dependabot[bot]

Bumps [docker/setup-buildx-action](https://github.com/docker/setup-buildx-action) from 2 to 3.
<details>
<summary>Release notes</summary>
<p><em>Sourced from <a href="https://github.com/docker/setup-buildx-action/releases">docker/setup-buildx-action's releases</a>.</em></p>
<blockquote>
<h2>v3.0.0</h2>
<ul>
<li>Node 20 as default runtime (requires <a href="https://github.com/actions/runner/releases/tag/v2.308.0">Actions Runner v2.308.0</a> or later) by <a href="https://github.com/crazy-max"><code>`@​crazy-max</code></a>` in <a href="https://redirect.github.com/docker/setup-buildx-action/pull/264">docker/setup-buildx-action#264</a></li>
<li>Bump <code>`@​actions/core</code>` from 1.10.0 to 1.10.1 in <a href="https://redirect.github.com/docker/setup-buildx-action/pull/267">docker/setup-buildx-action#267</a></li>
</ul>
<p><strong>Full Changelog</strong>: <a href="https://github.com/docker/setup-buildx-action/compare/v2.10.0...v3.0.0">https://github.com/docker/setup-buildx-action/compare/v2.10.0...v3.0.0</a></p>
<h2>v2.10.0</h2>
<h2>What's Changed</h2>
<ul>
<li>Bump <code>`@​docker/actions-toolkit</code>` from 0.7.1 to 0.10.0 by <a href="https://github.com/crazy-max"><code>`@​crazy-max</code></a>` in <a href="https://redirect.github.com/docker/setup-buildx-action/pull/258">docker/setup-buildx-action#258</a></li>
<li>Bump word-wrap from 1.2.3 to 1.2.5 in <a href="https://redirect.github.com/docker/setup-buildx-action/pull/253">docker/setup-buildx-action#253</a></li>
</ul>
<p><strong>Full Changelog</strong>: <a href="https://github.com/docker/setup-buildx-action/compare/v2.9.1...v2.10.0">https://github.com/docker/setup-buildx-action/compare/v2.9.1...v2.10.0</a></p>
<h2>v2.9.1</h2>
<ul>
<li>Bump <code>`@​docker/actions-toolkit</code>` from 0.7.0 to 0.7.1 in <a href="https://redirect.github.com/docker/setup-buildx-action/pull/248">docker/setup-buildx-action#248</a>
<ul>
<li>Fixes an issue where building Buildx does not match the local platform (<a href="https://redirect.github.com/docker/actions-toolkit/pull/135">docker/actions-toolkit#135</a>)</li>
</ul>
</li>
</ul>
<p><strong>Full Changelog</strong>: <a href="https://github.com/docker/setup-buildx-action/compare/v2.9.0...v2.9.1">https://github.com/docker/setup-buildx-action/compare/v2.9.0...v2.9.1</a></p>
<h2>v2.9.0</h2>
<ul>
<li>Bump <code>`@​docker/actions-toolkit</code>` from 0.6.0 to 0.7.0 in <a href="https://redirect.github.com/docker/setup-buildx-action/pull/246">docker/setup-buildx-action#246</a>
<ul>
<li>Adds support to cache Buildx binary to hosted tool cache and GHA cache backend</li>
</ul>
</li>
</ul>
<p><strong>Full Changelog</strong>: <a href="https://github.com/docker/setup-buildx-action/compare/v2.8.0...v2.9.0">https://github.com/docker/setup-buildx-action/compare/v2.8.0...v2.9.0</a></p>
<h2>v2.8.0</h2>
<ul>
<li>Only set specific flags for drivers supporting them by <a href="https://github.com/nicks"><code>`@​nicks</code></a>` in <a href="https://redirect.github.com/docker/setup-buildx-action/pull/241">docker/setup-buildx-action#241</a></li>
<li>Bump <code>`@​docker/actions-toolkit</code>` from 0.5.0 to 0.6.0 in <a href="https://redirect.github.com/docker/setup-buildx-action/pull/242">docker/setup-buildx-action#242</a></li>
</ul>
<p><strong>Full Changelog</strong>: <a href="https://github.com/docker/setup-buildx-action/compare/v2.7.0...v2.8.0">https://github.com/docker/setup-buildx-action/compare/v2.7.0...v2.8.0</a></p>
<h2>v2.7.0</h2>
<ul>
<li>Bump <code>`@​docker/actions-toolkit</code>` from 0.3.0 to 0.5.0 in <a href="https://redirect.github.com/docker/setup-buildx-action/pull/237">docker/setup-buildx-action#237</a> <a href="https://redirect.github.com/docker/setup-buildx-action/pull/238">docker/setup-buildx-action#238</a></li>
</ul>
<p><strong>Full Changelog</strong>: <a href="https://github.com/docker/setup-buildx-action/compare/v2.6.0...v2.7.0">https://github.com/docker/setup-buildx-action/compare/v2.6.0...v2.7.0</a></p>
<h2>v2.6.0</h2>
<ul>
<li>Set node name for k8s driver when appending nodes by <a href="https://github.com/crazy-max"><code>`@​crazy-max</code></a>` in <a href="https://redirect.github.com/docker/setup-buildx-action/pull/219">docker/setup-buildx-action#219</a></li>
<li>Bump <code>`@​docker/actions-toolkit</code>` from 0.1.0-beta.18 to 0.3.0 in <a href="https://redirect.github.com/docker/setup-buildx-action/pull/220">docker/setup-buildx-action#220</a> <a href="https://redirect.github.com/docker/setup-buildx-action/pull/229">docker/setup-buildx-action#229</a> <a href="https://redirect.github.com/docker/setup-buildx-action/pull/231">docker/setup-buildx-action#231</a> <a href="https://redirect.github.com/docker/setup-buildx-action/pull/236">docker/setup-buildx-action#236</a></li>
</ul>
<p><strong>Full Changelog</strong>: <a href="https://github.com/docker/setup-buildx-action/compare/v2.5.0...v2.6.0">https://github.com/docker/setup-buildx-action/compare/v2.5.0...v2.6.0</a></p>
<h2>v2.5.0</h2>
<ul>
<li><code>cleanup</code> input to remove builder and temp files by <a href="https://github.com/crazy-max"><code>`@​crazy-max</code></a>` in <a href="https://redirect.github.com/docker/setup-buildx-action/pull/213">docker/setup-buildx-action#213</a></li>
<li>do not remove builder using the <code>docker</code> driver by <a href="https://github.com/crazy-max"><code>`@​crazy-max</code></a>` in <a href="https://redirect.github.com/docker/setup-buildx-action/pull/218">docker/setup-buildx-action#218</a></li>
<li>fix current context as builder name for <code>docker</code> driver by <a href="https://github.com/crazy-max"><code>`@​crazy-max</code></a>` in <a href="https://redirect.github.com/docker/setup-buildx-action/pull/209">docker/setup-buildx-action#209</a></li>
</ul>
<p><strong>Full Changelog</strong>: <a href="https://github.com/docker/setup-buildx-action/compare/v2.4.1...v2.5.0">https://github.com/docker/setup-buildx-action/compare/v2.4.1...v2.5.0</a></p>
<h2>v2.4.1</h2>
<!-- raw HTML omitted -->
</blockquote>
<p>... (truncated)</p>
</details>
<details>
<summary>Commits</summary>
<ul>
<li><a href="f95db51fdd"><code>f95db51</code></a> Merge pull request <a href="https://redirect.github.com/docker/setup-buildx-action/issues/267">#267</a> from docker/dependabot/npm_and_yarn/actions/core-1.10.1</li>
<li><a href="998a87c2c1"><code>998a87c</code></a> chore: update generated content</li>
<li><a href="28bae59336"><code>28bae59</code></a> build(deps): bump <code>`@​actions/core</code>` from 1.10.0 to 1.10.1</li>
<li><a href="c215341715"><code>c215341</code></a> Merge pull request <a href="https://redirect.github.com/docker/setup-buildx-action/issues/264">#264</a> from crazy-max/update-node20</li>
<li><a href="02e9319239"><code>02e9319</code></a> chore: node 20 as default runtime</li>
<li><a href="5c9160effc"><code>5c9160e</code></a> chore: update generated content</li>
<li><a href="1283140f57"><code>1283140</code></a> chore: fix author in package.json</li>
<li><a href="c6afe06e4a"><code>c6afe06</code></a> vendor: bump <code>`@​docker/actions-toolkit</code>` from 0.10.0 to 0.12.0</li>
<li><a href="f35e0d5a04"><code>f35e0d5</code></a> chore: update dev dependencies</li>
<li><a href="baeb468fb2"><code>baeb468</code></a> dev: remove unneeded binaries</li>
<li>Additional commits viewable in <a href="https://github.com/docker/setup-buildx-action/compare/v2...v3">compare view</a></li>
</ul>
</details>
<br />


[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=docker/setup-buildx-action&package-manager=github_actions&previous-version=2&new-version=3)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores)

You can trigger a rebase of this PR by commenting ``@dependabot` rebase`.

[//]: # (dependabot-automerge-start)
[//]: # (dependabot-automerge-end)

---

<details>
<summary>Dependabot commands and options</summary>
<br />

You can trigger Dependabot actions by commenting on this PR:
- ``@dependabot` rebase` will rebase this PR
- ``@dependabot` recreate` will recreate this PR, overwriting any edits that have been made to it
- ``@dependabot` merge` will merge this PR after your CI passes on it
- ``@dependabot` squash and merge` will squash and merge this PR after your CI passes on it
- ``@dependabot` cancel merge` will cancel a previously requested merge and block automerging
- ``@dependabot` reopen` will reopen this PR if it is closed
- ``@dependabot` close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually
- ``@dependabot` show <dependency name> ignore conditions` will show all of the ignore conditions of the specified dependency
- ``@dependabot` ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself)
- ``@dependabot` ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself)
- ``@dependabot` ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)


</details>

Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2023-10-03 17:14:34 +00:00
c5336af1c5 Bump docker/setup-buildx-action from 2 to 3
Bumps [docker/setup-buildx-action](https://github.com/docker/setup-buildx-action) from 2 to 3.
- [Release notes](https://github.com/docker/setup-buildx-action/releases)
- [Commits](https://github.com/docker/setup-buildx-action/compare/v2...v3)

---
updated-dependencies:
- dependency-name: docker/setup-buildx-action
  dependency-type: direct:production
  update-type: version-update:semver-major
...

Signed-off-by: dependabot[bot] <support@github.com>
2023-10-03 15:41:06 +00:00
1567758a56 Merge #4099
4099: Bump docker/build-push-action from 4 to 5 r=curquiza a=dependabot[bot]

Bumps [docker/build-push-action](https://github.com/docker/build-push-action) from 4 to 5.
<details>
<summary>Release notes</summary>
<p><em>Sourced from <a href="https://github.com/docker/build-push-action/releases">docker/build-push-action's releases</a>.</em></p>
<blockquote>
<h2>v5.0.0</h2>
<ul>
<li>Node 20 as default runtime (requires <a href="https://github.com/actions/runner/releases/tag/v2.308.0">Actions Runner v2.308.0</a> or later) by <a href="https://github.com/crazy-max"><code>`@​crazy-max</code></a>` in <a href="https://redirect.github.com/docker/build-push-action/pull/954">docker/build-push-action#954</a></li>
<li>Bump <code>`@​actions/core</code>` from 1.10.0 to 1.10.1 in <a href="https://redirect.github.com/docker/build-push-action/pull/959">docker/build-push-action#959</a></li>
</ul>
<p><strong>Full Changelog</strong>: <a href="https://github.com/docker/build-push-action/compare/v4.2.1...v5.0.0">https://github.com/docker/build-push-action/compare/v4.2.1...v5.0.0</a></p>
<h2>v4.2.1</h2>
<blockquote>
<p><strong>Note</strong></p>
<p>Buildx v0.10 enables support for a minimal <a href="https://slsa.dev/provenance/">SLSA Provenance</a> attestation, which requires support for <a href="https://github.com/opencontainers/image-spec">OCI-compliant</a> multi-platform images. This may introduce issues with registry and runtime support (e.g. <a href="https://redirect.github.com/docker/buildx/issues/1533">Google Cloud Run and AWS Lambda</a>). You can optionally disable the default provenance attestation functionality using <code>provenance: false</code>.</p>
</blockquote>
<ul>
<li>warn if docker config can't be parsed by <a href="https://github.com/crazy-max"><code>`@​crazy-max</code></a>` in <a href="https://redirect.github.com/docker/build-push-action/pull/957">docker/build-push-action#957</a></li>
</ul>
<p><strong>Full Changelog</strong>: <a href="https://github.com/docker/build-push-action/compare/v4.2.0...v4.2.1">https://github.com/docker/build-push-action/compare/v4.2.0...v4.2.1</a></p>
<h2>v4.2.0</h2>
<blockquote>
<p><strong>Note</strong></p>
<p>Buildx v0.10 enables support for a minimal <a href="https://slsa.dev/provenance/">SLSA Provenance</a> attestation, which requires support for <a href="https://github.com/opencontainers/image-spec">OCI-compliant</a> multi-platform images. This may introduce issues with registry and runtime support (e.g. <a href="https://redirect.github.com/docker/buildx/issues/1533">Google Cloud Run and AWS Lambda</a>). You can optionally disable the default provenance attestation functionality using <code>provenance: false</code>.</p>
</blockquote>
<ul>
<li>display proxy configuration  by <a href="https://github.com/crazy-max"><code>`@​crazy-max</code></a>` in <a href="https://redirect.github.com/docker/build-push-action/pull/872">docker/build-push-action#872</a></li>
<li>chore(deps): Bump <code>`@​docker/actions-toolkit</code>` from 0.6.0 to 0.8.0 in <a href="https://redirect.github.com/docker/build-push-action/pull/930">docker/build-push-action#930</a></li>
<li>chore(deps): Bump word-wrap from 1.2.3 to 1.2.5 in <a href="https://redirect.github.com/docker/build-push-action/pull/925">docker/build-push-action#925</a></li>
<li>chore(deps): Bump semver from 6.3.0 to 6.3.1 in <a href="https://redirect.github.com/docker/build-push-action/pull/902">docker/build-push-action#902</a></li>
</ul>
<p><strong>Full Changelog</strong>: <a href="https://github.com/docker/build-push-action/compare/v4.1.1...v4.2.0">https://github.com/docker/build-push-action/compare/v4.1.1...v4.2.0</a></p>
<h2>v4.1.1</h2>
<blockquote>
<p><strong>Note</strong></p>
<p>Buildx v0.10 enables support for a minimal <a href="https://slsa.dev/provenance/">SLSA Provenance</a> attestation, which requires support for <a href="https://github.com/opencontainers/image-spec">OCI-compliant</a> multi-platform images. This may introduce issues with registry and runtime support (e.g. <a href="https://redirect.github.com/docker/buildx/issues/1533">Google Cloud Run and AWS Lambda</a>). You can optionally disable the default provenance attestation functionality using <code>provenance: false</code>.</p>
</blockquote>
<ul>
<li>Bump <code>`@​docker/actions-toolkit</code>` from 0.3.0 to 0.5.0 by <a href="https://github.com/crazy-max"><code>`@​crazy-max</code></a>` in <a href="https://redirect.github.com/docker/build-push-action/pull/880">docker/build-push-action#880</a></li>
</ul>
<p><strong>Full Changelog</strong>: <a href="https://github.com/docker/build-push-action/compare/v4.1.0...v4.1.1">https://github.com/docker/build-push-action/compare/v4.1.0...v4.1.1</a></p>
<h2>v4.1.0</h2>
<blockquote>
<p><strong>Note</strong></p>
<p>Buildx v0.10 enables support for a minimal <a href="https://slsa.dev/provenance/">SLSA Provenance</a> attestation, which requires support for <a href="https://github.com/opencontainers/image-spec">OCI-compliant</a> multi-platform images. This may introduce issues with registry and runtime support (e.g. <a href="https://redirect.github.com/docker/buildx/issues/1533">Google Cloud Run and AWS Lambda</a>). You can optionally disable the default provenance attestation functionality using <code>provenance: false</code>.</p>
</blockquote>
<ul>
<li>Switch to actions-toolkit implementation by <a href="https://github.com/crazy-max"><code>`@​crazy-max</code></a>` in <a href="https://redirect.github.com/docker/build-push-action/pull/811">docker/build-push-action#811</a>  <a href="https://redirect.github.com/docker/build-push-action/pull/838">docker/build-push-action#838</a> <a href="https://redirect.github.com/docker/build-push-action/pull/855">docker/build-push-action#855</a> <a href="https://redirect.github.com/docker/build-push-action/pull/860">docker/build-push-action#860</a> <a href="https://redirect.github.com/docker/build-push-action/pull/875">docker/build-push-action#875</a></li>
<li>e2e: quay.io by <a href="https://github.com/crazy-max"><code>`@​crazy-max</code></a>` in <a href="https://redirect.github.com/docker/build-push-action/pull/799">docker/build-push-action#799</a> <a href="https://redirect.github.com/docker/build-push-action/pull/805">docker/build-push-action#805</a></li>
<li>e2e: local harbor and nexus by <a href="https://github.com/crazy-max"><code>`@​crazy-max</code></a>` in <a href="https://redirect.github.com/docker/build-push-action/pull/800">docker/build-push-action#800</a></li>
<li>e2e: add artifactory container registry to test against by <a href="https://github.com/jedevc"><code>`@​jedevc</code></a>` in <a href="https://redirect.github.com/docker/build-push-action/pull/804">docker/build-push-action#804</a></li>
<li>e2e: add distribution tests by <a href="https://github.com/jedevc"><code>`@​jedevc</code></a>` in <a href="https://redirect.github.com/docker/build-push-action/pull/814">docker/build-push-action#814</a> <a href="https://redirect.github.com/docker/build-push-action/pull/815">docker/build-push-action#815</a></li>
</ul>
<p><strong>Full Changelog</strong>: <a href="https://github.com/docker/build-push-action/compare/v4.0.0...v4.1.0">https://github.com/docker/build-push-action/compare/v4.0.0...v4.1.0</a></p>
</blockquote>
</details>
<details>
<summary>Commits</summary>
<ul>
<li><a href="0565240e2d"><code>0565240</code></a> Merge pull request <a href="https://redirect.github.com/docker/build-push-action/issues/959">#959</a> from docker/dependabot/npm_and_yarn/actions/core-1.10.1</li>
<li><a href="3ab07f8801"><code>3ab07f8</code></a> chore: update generated content</li>
<li><a href="b9e7e4daec"><code>b9e7e4d</code></a> chore(deps): Bump <code>`@​actions/core</code>` from 1.10.0 to 1.10.1</li>
<li><a href="04d1a3b049"><code>04d1a3b</code></a> Merge pull request <a href="https://redirect.github.com/docker/build-push-action/issues/954">#954</a> from crazy-max/update-node20</li>
<li><a href="1a4d1a13fb"><code>1a4d1a1</code></a> chore: node 20 as default runtime</li>
<li><a href="675965c0e1"><code>675965c</code></a> chore: update generated content</li>
<li><a href="58ee34cb6b"><code>58ee34c</code></a> chore: fix author in package.json</li>
<li><a href="c97c4060bd"><code>c97c406</code></a> fix ProxyConfig type when checking length</li>
<li><a href="47d5369e0b"><code>47d5369</code></a> vendor: bump <code>`@​docker/actions-toolkit</code>` from 0.8.0 to 0.12.0</li>
<li><a href="8895c7468f"><code>8895c74</code></a> chore: update dev dependencies</li>
<li>Additional commits viewable in <a href="https://github.com/docker/build-push-action/compare/v4...v5">compare view</a></li>
</ul>
</details>
<br />


[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=docker/build-push-action&package-manager=github_actions&previous-version=4&new-version=5)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores)

You can trigger a rebase of this PR by commenting ``@dependabot` rebase`.

[//]: # (dependabot-automerge-start)
[//]: # (dependabot-automerge-end)

---

<details>
<summary>Dependabot commands and options</summary>
<br />

You can trigger Dependabot actions by commenting on this PR:
- ``@dependabot` rebase` will rebase this PR
- ``@dependabot` recreate` will recreate this PR, overwriting any edits that have been made to it
- ``@dependabot` merge` will merge this PR after your CI passes on it
- ``@dependabot` squash and merge` will squash and merge this PR after your CI passes on it
- ``@dependabot` cancel merge` will cancel a previously requested merge and block automerging
- ``@dependabot` reopen` will reopen this PR if it is closed
- ``@dependabot` close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually
- ``@dependabot` show <dependency name> ignore conditions` will show all of the ignore conditions of the specified dependency
- ``@dependabot` ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself)
- ``@dependabot` ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself)
- ``@dependabot` ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)


</details>

Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2023-10-03 11:55:31 +00:00
37953afe1a Bump docker/build-push-action from 4 to 5
Bumps [docker/build-push-action](https://github.com/docker/build-push-action) from 4 to 5.
- [Release notes](https://github.com/docker/build-push-action/releases)
- [Commits](https://github.com/docker/build-push-action/compare/v4...v5)

---
updated-dependencies:
- dependency-name: docker/build-push-action
  dependency-type: direct:production
  update-type: version-update:semver-major
...

Signed-off-by: dependabot[bot] <support@github.com>
2023-10-03 11:53:53 +00:00
43989fe2e4 Reduce porximity range from 7 to 3 2023-10-03 12:16:48 +02:00
de3f992ae4 Merge #4095
4095: Bump docker/setup-qemu-action from 2 to 3 r=curquiza a=dependabot[bot]

Bumps [docker/setup-qemu-action](https://github.com/docker/setup-qemu-action) from 2 to 3.
<details>
<summary>Release notes</summary>
<p><em>Sourced from <a href="https://github.com/docker/setup-qemu-action/releases">docker/setup-qemu-action's releases</a>.</em></p>
<blockquote>
<h2>v3.0.0</h2>
<ul>
<li>Node 20 as default runtime (requires <a href="https://github.com/actions/runner/releases/tag/v2.308.0">Actions Runner v2.308.0</a> or later) by <a href="https://github.com/crazy-max"><code>`@​crazy-max</code></a>` in <a href="https://redirect.github.com/docker/setup-qemu-action/pull/102">docker/setup-qemu-action#102</a></li>
<li>Bump <code>`@​actions/core</code>` from 1.10.0 to 1.10.1 in <a href="https://redirect.github.com/docker/setup-qemu-action/pull/103">docker/setup-qemu-action#103</a></li>
<li>Bump semver from 6.3.0 to 6.3.1 in <a href="https://redirect.github.com/docker/setup-qemu-action/pull/89">docker/setup-qemu-action#89</a></li>
</ul>
<p><strong>Full Changelog</strong>: <a href="https://github.com/docker/setup-qemu-action/compare/v2.2.0...v3.0.0">https://github.com/docker/setup-qemu-action/compare/v2.2.0...v3.0.0</a></p>
<h2>v2.2.0</h2>
<ul>
<li>Trim off spaces in <code>platforms</code> input by <a href="https://github.com/Chocobo1"><code>`@​Chocobo1</code></a>` in <a href="https://redirect.github.com/docker/setup-qemu-action/pull/64">docker/setup-qemu-action#64</a></li>
<li>Switch to actions-toolkit implementation by <a href="https://github.com/crazy-max"><code>`@​crazy-max</code></a>` in <a href="https://redirect.github.com/docker/setup-qemu-action/pull/70">docker/setup-qemu-action#70</a> <a href="https://redirect.github.com/docker/setup-qemu-action/pull/80">docker/setup-qemu-action#80</a> <a href="https://redirect.github.com/docker/setup-qemu-action/pull/83">docker/setup-qemu-action#83</a></li>
</ul>
<p><strong>Full Changelog</strong>: <a href="https://github.com/docker/setup-qemu-action/compare/v2.1.0...v2.2.0">https://github.com/docker/setup-qemu-action/compare/v2.1.0...v2.2.0</a></p>
<h2>v2.1.0</h2>
<ul>
<li>Use context for inputs by <a href="https://github.com/crazy-max"><code>`@​crazy-max</code></a>` (<a href="https://redirect.github.com/docker/setup-qemu-action/issues/62">#62</a>)</li>
<li>Use built-in <code>getExecOutput</code> by <a href="https://github.com/crazy-max"><code>`@​crazy-max</code></a>` (<a href="https://redirect.github.com/docker/setup-qemu-action/issues/61">#61</a>)</li>
<li>Remove workaround for <code>setOutput</code> by <a href="https://github.com/crazy-max"><code>`@​crazy-max</code></a>` (<a href="https://redirect.github.com/docker/setup-qemu-action/issues/63">#63</a>)</li>
<li>Bump <code>`@​actions/core</code>` from 1.6.0 to 1.10.0 (<a href="https://redirect.github.com/docker/setup-qemu-action/issues/54">#54</a> <a href="https://redirect.github.com/docker/setup-qemu-action/issues/58">#58</a> <a href="https://redirect.github.com/docker/setup-qemu-action/issues/59">#59</a>)</li>
</ul>
<p><strong>Full Changelog</strong>: <a href="https://github.com/docker/setup-qemu-action/compare/v2.0.0...v2.1.0">https://github.com/docker/setup-qemu-action/compare/v2.0.0...v2.1.0</a></p>
</blockquote>
</details>
<details>
<summary>Commits</summary>
<ul>
<li><a href="68827325e0"><code>6882732</code></a> Merge pull request <a href="https://redirect.github.com/docker/setup-qemu-action/issues/103">#103</a> from docker/dependabot/npm_and_yarn/actions/core-1.10.1</li>
<li><a href="183f4af504"><code>183f4af</code></a> chore: update generated content</li>
<li><a href="f17493529e"><code>f174935</code></a> build(deps): bump <code>`@​actions/core</code>` from 1.10.0 to 1.10.1</li>
<li><a href="2e423eb500"><code>2e423eb</code></a> Merge pull request <a href="https://redirect.github.com/docker/setup-qemu-action/issues/89">#89</a> from docker/dependabot/npm_and_yarn/semver-6.3.1</li>
<li><a href="ecc406afa7"><code>ecc406a</code></a> Bump semver from 6.3.0 to 6.3.1</li>
<li><a href="12dec5e201"><code>12dec5e</code></a> Merge pull request <a href="https://redirect.github.com/docker/setup-qemu-action/issues/102">#102</a> from crazy-max/update-node20</li>
<li><a href="c29b312130"><code>c29b312</code></a> chore: node 20 as default runtime</li>
<li><a href="34ae628c8f"><code>34ae628</code></a> chore: update generated content</li>
<li><a href="1f3d2e1ac0"><code>1f3d2e1</code></a> chore: fix author in package.json</li>
<li><a href="277dbe8c9c"><code>277dbe8</code></a> vendor: bump <code>`@​docker/actions-toolkit</code>` from 0.3.0 to 0.12.0</li>
<li>Additional commits viewable in <a href="https://github.com/docker/setup-qemu-action/compare/v2...v3">compare view</a></li>
</ul>
</details>
<br />


[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=docker/setup-qemu-action&package-manager=github_actions&previous-version=2&new-version=3)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores)

You can trigger a rebase of this PR by commenting ``@dependabot` rebase`.

[//]: # (dependabot-automerge-start)
[//]: # (dependabot-automerge-end)

---

<details>
<summary>Dependabot commands and options</summary>
<br />

You can trigger Dependabot actions by commenting on this PR:
- ``@dependabot` rebase` will rebase this PR
- ``@dependabot` recreate` will recreate this PR, overwriting any edits that have been made to it
- ``@dependabot` merge` will merge this PR after your CI passes on it
- ``@dependabot` squash and merge` will squash and merge this PR after your CI passes on it
- ``@dependabot` cancel merge` will cancel a previously requested merge and block automerging
- ``@dependabot` reopen` will reopen this PR if it is closed
- ``@dependabot` close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually
- ``@dependabot` show <dependency name> ignore conditions` will show all of the ignore conditions of the specified dependency
- ``@dependabot` ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself)
- ``@dependabot` ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself)
- ``@dependabot` ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)


</details>

Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2023-10-03 08:20:50 +00:00
c668a29ed5 Bump webpki from 0.22.1 to 0.22.2
Bumps [webpki](https://github.com/briansmith/webpki) from 0.22.1 to 0.22.2.
- [Commits](https://github.com/briansmith/webpki/commits)

---
updated-dependencies:
- dependency-name: webpki
  dependency-type: indirect
...

Signed-off-by: dependabot[bot] <support@github.com>
2023-10-02 21:53:45 +00:00
98f0618065 Bump docker/setup-qemu-action from 2 to 3
Bumps [docker/setup-qemu-action](https://github.com/docker/setup-qemu-action) from 2 to 3.
- [Release notes](https://github.com/docker/setup-qemu-action/releases)
- [Commits](https://github.com/docker/setup-qemu-action/compare/v2...v3)

---
updated-dependencies:
- dependency-name: docker/setup-qemu-action
  dependency-type: direct:production
  update-type: version-update:semver-major
...

Signed-off-by: dependabot[bot] <support@github.com>
2023-10-01 17:18:20 +00:00
b10eeb0e41 Update .github/ISSUE_TEMPLATE/sprint_issue.md 2023-09-26 16:47:04 +02:00
4a8515e9fc Update sprint_issue.md 2023-09-26 16:46:18 +02:00
86b314626d Merge #4080
4080: Bring back changes from v1.4.0 into main r=Kerollmops a=curquiza



Co-authored-by: ManyTheFish <many@meilisearch.com>
Co-authored-by: Clément Renault <clement@meilisearch.com>
Co-authored-by: Kerollmops <clement@meilisearch.com>
Co-authored-by: meili-bors[bot] <89034592+meili-bors[bot]@users.noreply.github.com>
Co-authored-by: curquiza <curquiza@users.noreply.github.com>
Co-authored-by: Tamo <tamo@meilisearch.com>
Co-authored-by: curquiza <clementine@meilisearch.com>
Co-authored-by: Vivek Kumar <vivek.26@outlook.com>
Co-authored-by: dogukanakkaya <doguakkaya27@hotmail.com>
2023-09-26 08:13:49 +00:00
bb79bdb3f8 Merge #4074
4074: Enable analytics in debug builds r=Kerollmops a=irevoire

# Pull Request

## Related issue
Fixes https://github.com/meilisearch/meilisearch/issues/4072

## What does this PR do?
- Stop disabling the analytics if meilisearch has been compiled in debug mode

Co-authored-by: Tamo <tamo@meilisearch.com>
2023-09-21 15:54:41 +00:00
d429e7da99 make clippy happy 2023-09-21 17:41:12 +02:00
584b772248 enable metrics in debug builds 2023-09-21 17:01:05 +02:00
1806c04a9a Merge #4065
4065: Dependency issue every 6 months r=curquiza a=curquiza

To avoid spending too much time on it (1 every two sprints)

If you disagree `@Kerollmops,` for security or any reason, please close the PR

Co-authored-by: Clémentine U. - curqui <clementine@meilisearch.com>
2023-09-19 15:58:06 +00:00
3485e8f1c4 Update .github/workflows/dependency-issue.yml 2023-09-18 09:59:22 +02:00
fe697a6685 Dependency issue every 6 months 2023-09-18 09:57:58 +02:00
eb4135f8ae Merge #4044
4044: Add more integrations to SDK CI r=curquiza a=curquiza

For the integration scope management, but also to anticipate bugs and breaking changes for engine team, we need to add more SDKs tests into the CI

Co-authored-by: curquiza <clementine@meilisearch.com>
2023-09-13 14:05:41 +00:00
ec4844c3a6 Add dart, swift, dotnet, and java test
Display docker image

Add strapi and firebase

Add rails and symfony tests

Remove strapi and firestore tests

Fix dotnet SDK CI

Use specific dart SDK version

Disable coverage for ruby SDK

Prevent pushing coverage information to codecov

Remove codecoverage token

Trigger Build

Trigger Build

Trigger Build

Trigger Build

Trigger Build
2023-09-12 17:31:17 +02:00
77c3787b78 Merge #4056
4056: Rewrite segment_analytics module with the destructuring syntax r=Kerollmops a=vivek-26

# Pull Request

## Related issue
Fixes #3928

## What does this PR do?
- This PR uses Rust Destructuring syntax in the `segment_analytics` module, such that adding or deleting fields causes an error at compile time.

## PR checklist
Please check if your PR fulfills the following requirements:
- [x] Does this PR fix an existing issue, or have you listed the changes applied in the PR description (and why they are needed)?
- [x] Have you read the contributing guidelines?
- [x] Have you made sure that the title is accurate and descriptive of the changes?

Thank you so much for contributing to Meilisearch!


Co-authored-by: Vivek Kumar <vivek.26@outlook.com>
2023-09-12 08:07:50 +00:00
4f902490b9 struct destructuring for DocumentsFetchAggregator 2023-09-12 10:39:28 +05:30
1faee92748 struct destructuring for HealthAggregator 2023-09-12 10:39:28 +05:30
5831466525 struct destructuring for DocumentsDeletionAggregator and TasksAggregator 2023-09-12 10:39:28 +05:30
3cdb3e4eaf struct destructuring for DocumentsAggregator 2023-09-12 10:39:27 +05:30
26f34ec7a2 struct destructuring for FacetSearchAggregator 2023-09-12 10:39:27 +05:30
07d36180ad struct destructuring for MultiSearchAggregator 2023-09-12 10:39:27 +05:30
4c641b79a2 use rust struct destructuring for SearchAggregator 2023-09-12 10:39:27 +05:30
76c05d1b20 Merge #4053
4053: Fix the stats of the documents deletion by filter r=Kerollmops a=irevoire

# Pull Request

The issue was that the operation « DocumentDeletionByFilter » was not declared as an index operation. That means the index stats were not reprocessed after the application of the operation.

## Related issue
Fixes #4018

## What does this PR do?
- Move the `DocumentDeletionByFilter` internal operation into the category of the `IndexOperation`. This means that the stats will automatically be re-processed after a batch is processed.
- Update a test to ensure that the stats are valid after each operation

## PR checklist
Please check if your PR fulfills the following requirements:
- [x] Does this PR fix an existing issue, or have you listed the changes applied in the PR description (and why they are needed)?
- [x] Have you read the contributing guidelines?
- [x] Have you made sure that the title is accurate and descriptive of the changes?

Thank you so much for contributing to Meilisearch!


Co-authored-by: Tamo <tamo@meilisearch.com>
2023-09-11 15:53:26 +00:00
ef31ab52a4 Merge #4051
4051: Implement the snapshots on demand r=Kerollmops a=irevoire

# Pull Request
Private link: [PRD available here](https://www.notion.so/meilisearch/On-demand-snapshots-5676e542b905459d96eec228da133b00#847ff0cafeb64fe09e8ee7150852b474)
Specification here: https://github.com/meilisearch/specifications/pull/258

## Prototype
A prototype is available under the name: `prototype-snapshot-on-demand-0`.

## Related issue
Fixes #4052
## What does this PR do?
- Introduce a new route, `POST /snapshots` to create snapshots on demand
- Introduce a new api-key action `snapshot.create`
- Introduce a new analytic `Snapshot Created` sent every time a snapshot is created.

## Notes for the team

I made a prototype so users can test the feature before the v1.5 comes out. But we can merge the PR as-is.

Co-authored-by: Tamo <tamo@meilisearch.com>
2023-09-11 15:16:08 +00:00
34fac115d5 fix clippy 2023-09-11 17:15:57 +02:00
791c5cd874 makes clippy happy 2023-09-11 17:02:01 +02:00
5bea1092fb fix the flaky test 2023-09-11 16:56:26 +02:00
056b2c387d refactor the tests suite slightly 2023-09-11 16:56:26 +02:00
a09686fcbd Merge #3997
3997: Refactor empty arrays/objects should return empty instead of null r=Kerollmops a=dogukanakkaya

# Pull Request

## What does this PR do?
At the moment if we select empty objects and array of object properties with dot notations like:
```json
{
  "array": [],
  "object": {}
}
```
```rs
GetDocumentOptions { fields: Some(vec!["array.name", "object.name"]) }
```
returns null if the array/object has no property yet.

I am not sure if this is expected or it's the correct behaviour but I add my document with a property that is assigned to an empty array/object, later on when I select it, returns null which is kinda weird and unexpected in my opinion.

This PR fixes that issue by returning an empty vector if the array is empty or an empty map if object is empty. This is not added for `permissive-json-pointer/src/lib.rs:224` because `create_array` loops over each item. Selecting a single property that is an object, in an array of objects would result other objects to be empty maps instead of none. 
```json
"doggos": [
  {
    "jean": {
      "race": {
        "name": "bernese mountain",
      }
    }
  },
  {
    "marc": {
       "age": 4,
       "race": {
          "name": "golden retriever",
        }
     }
   }
]
```
```rs
GetDocumentOptions { fields: Some(vec!["doggos.jean"]) }
```
Would result in `jean` object and an extra empty object for `marc`.

## PR checklist
Please check if your PR fulfills the following requirements:
- [ ] Does this PR fix an existing issue, or have you listed the changes applied in the PR description (and why they are needed)?
- [x] Have you read the contributing guidelines?
- [x] Have you made sure that the title is accurate and descriptive of the changes?

Thank you so much for contributing to Meilisearch!


Co-authored-by: dogukanakkaya <doguakkaya27@hotmail.com>
2023-09-11 13:46:02 +00:00
b4c44603db Merge #4009
4009: Bump rustls-webpki from 0.100.1 to 0.100.2 r=Kerollmops a=dependabot[bot]

Bumps [rustls-webpki](https://github.com/rustls/webpki) from 0.100.1 to 0.100.2.
<details>
<summary>Release notes</summary>
<p><em>Sourced from <a href="https://github.com/rustls/webpki/releases">rustls-webpki's releases</a>.</em></p>
<blockquote>
<h2>v/0.100.2</h2>
<h2>Release notes</h2>
<ul>
<li>certificate path building and verification is now capped at 100 signature validation operations to avoid the risk of CPU usage denial-of-service attack when validating crafted certificate chains producing quadratic runtime. This risk affected both clients, as well as servers that verified client certificates.</li>
</ul>
<h2>What's Changed</h2>
<ul>
<li>v0.100.2 prep by <a href="https://github.com/cpu"><code>`@​cpu</code></a>` in <a href="https://redirect.github.com/rustls/webpki/pull/154">rustls/webpki#154</a></li>
</ul>
<p><strong>Full Changelog</strong>: <a href="https://github.com/rustls/webpki/compare/v/0.100.1...v/0.100.2">https://github.com/rustls/webpki/compare/v/0.100.1...v/0.100.2</a></p>
</blockquote>
</details>
<details>
<summary>Commits</summary>
<ul>
<li><a href="c8b821450b"><code>c8b8214</code></a> Bump MSRV to 1.60</li>
<li><a href="855752292e"><code>8557522</code></a> Avoid testing MSRV of dev-dependencies</li>
<li><a href="73a7f0c7d7"><code>73a7f0c</code></a> Cargo: version 0.100.1 -&gt; 0.100.2</li>
<li><a href="4ea052366f"><code>4ea0523</code></a> verify_cert: enforce maximum number of signatures.</li>
<li>See full diff in <a href="https://github.com/rustls/webpki/compare/v/0.100.1...v/0.100.2">compare view</a></li>
</ul>
</details>
<br />


[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=rustls-webpki&package-manager=cargo&previous-version=0.100.1&new-version=0.100.2)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores)

Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting ``@dependabot` rebase`.

[//]: # (dependabot-automerge-start)
[//]: # (dependabot-automerge-end)

---

<details>
<summary>Dependabot commands and options</summary>
<br />

You can trigger Dependabot actions by commenting on this PR:
- ``@dependabot` rebase` will rebase this PR
- ``@dependabot` recreate` will recreate this PR, overwriting any edits that have been made to it
- ``@dependabot` merge` will merge this PR after your CI passes on it
- ``@dependabot` squash and merge` will squash and merge this PR after your CI passes on it
- ``@dependabot` cancel merge` will cancel a previously requested merge and block automerging
- ``@dependabot` reopen` will reopen this PR if it is closed
- ``@dependabot` close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually
- ``@dependabot` show <dependency name> ignore conditions` will show all of the ignore conditions of the specified dependency
- ``@dependabot` ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself)
- ``@dependabot` ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself)
- ``@dependabot` ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)
You can disable automated security fix PRs for this repo from the [Security Alerts page](https://github.com/meilisearch/meilisearch/network/alerts).

</details>

Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2023-09-11 13:11:07 +00:00
393be40179 Refactor empty arrays/objects should return empty instead of null 2023-09-11 15:56:15 +03:00
2c1d60f79b get rid of a warning 2023-09-11 14:40:22 +02:00
487d493f49 Merge #4043
4043: Bring back hotfixes from v1.3.3 into v1.4.0 r=Kerollmops a=curquiza



Co-authored-by: curquiza <curquiza@users.noreply.github.com>
Co-authored-by: meili-bors[bot] <89034592+meili-bors[bot]@users.noreply.github.com>
Co-authored-by: Kerollmops <clement@meilisearch.com>
Co-authored-by: curquiza <clementine@meilisearch.com>
2023-09-11 12:27:34 +00:00
08af69a33b improve a test to understand what's going on with the ci 2023-09-11 14:23:57 +02:00
9258e5b5bf Fix the stats of the documents deletion by filter
The issue was that the operation « DocumentDeletionByFilter » was not
declared as an index operation. That means the indexes stats were not
reprocessed after the application of the operation.
2023-09-11 14:04:10 +02:00
ddd34a488a update the api-key tests 2023-09-11 13:52:07 +02:00
526c2b3602 Merge #4050
4050: Bump webpki from 0.22.0 to 0.22.1 r=Kerollmops a=dependabot[bot]

Bumps [webpki](https://github.com/briansmith/webpki) from 0.22.0 to 0.22.1.
<details>
<summary>Commits</summary>
<ul>
<li>See full diff in <a href="https://github.com/briansmith/webpki/commits">compare view</a></li>
</ul>
</details>
<br />


[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=webpki&package-manager=cargo&previous-version=0.22.0&new-version=0.22.1)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores)

Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting ``@dependabot` rebase`.

[//]: # (dependabot-automerge-start)
[//]: # (dependabot-automerge-end)

---

<details>
<summary>Dependabot commands and options</summary>
<br />

You can trigger Dependabot actions by commenting on this PR:
- ``@dependabot` rebase` will rebase this PR
- ``@dependabot` recreate` will recreate this PR, overwriting any edits that have been made to it
- ``@dependabot` merge` will merge this PR after your CI passes on it
- ``@dependabot` squash and merge` will squash and merge this PR after your CI passes on it
- ``@dependabot` cancel merge` will cancel a previously requested merge and block automerging
- ``@dependabot` reopen` will reopen this PR if it is closed
- ``@dependabot` close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually
- ``@dependabot` show <dependency name> ignore conditions` will show all of the ignore conditions of the specified dependency
- ``@dependabot` ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself)
- ``@dependabot` ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself)
- ``@dependabot` ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)
You can disable automated security fix PRs for this repo from the [Security Alerts page](https://github.com/meilisearch/meilisearch/network/alerts).

</details>

Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2023-09-11 11:46:22 +00:00
e8c9367686 implement the snapshots on demand 2023-09-11 12:35:57 +02:00
9636c5f558 Bump webpki from 0.22.0 to 0.22.1
Bumps [webpki](https://github.com/briansmith/webpki) from 0.22.0 to 0.22.1.
- [Commits](https://github.com/briansmith/webpki/commits)

---
updated-dependencies:
- dependency-name: webpki
  dependency-type: indirect
...

Signed-off-by: dependabot[bot] <support@github.com>
2023-09-11 10:32:34 +00:00
b310830b5d Improve test-suite.yml for CI failing when disabling tokenization (#4005)
* [Update] test-suite.yml

Added New run command for cargo tree without default features using if-then block

* [Updated] test-disabled-tokenization in test-suite.yml

* [Updated] test-suite.yml

* Update .github/workflows/test-suite.yml

---------

Co-authored-by: Clémentine U. - curqui <clementine@meilisearch.com>
2023-09-11 12:30:53 +02:00
462b4654c4 Merge #4028
4028: Fix highlighting bug when searching for a phrase with cropping r=ManyTheFish a=vivek-26

# Pull Request

## Related issue
Fixes #3975

## What does this PR do?
This PR -
- Fixes the bug where searching **only** for a phrase (containing multiple words) along with cropping, highlighted only the first word of the phrase.
- Adds unit test case for the above mentioned scenario.

## PR checklist
Please check if your PR fulfills the following requirements:
- [x] Does this PR fix an existing issue, or have you listed the changes applied in the PR description (and why they are needed)?
- [x] Have you read the contributing guidelines?
- [x] Have you made sure that the title is accurate and descriptive of the changes?

Thank you so much for contributing to Meilisearch!


Co-authored-by: Vivek Kumar <vivek.26@outlook.com>
2023-09-11 07:58:41 +00:00
abfa7ded25 use a new temp index in the test 2023-09-08 12:32:47 +05:30
f2837aaec2 add another test case 2023-09-08 11:39:54 +05:30
11df155598 fix highlighting bug when searching for a phrase with cropping 2023-09-08 11:39:52 +05:30
651657c03e Fix git conflicts 2023-09-07 16:48:13 +02:00
b9ad59c969 Merge #4041
4041: Register the swap indexe task in a spawn blocking to be sure to never… r=ManyTheFish a=irevoire

# Pull Request

## Related issue
Fixes https://github.com/meilisearch/meilisearch/issues/4040

## What does this PR do?
- Register the swap indexes task in a spawn blocking task

Co-authored-by: Tamo <tamo@meilisearch.com>
2023-09-07 10:22:01 +00:00
66aa682e23 Register the swap indexe task in a spawn blocking to be sure to never block the main thread 2023-09-07 11:37:02 +02:00
256cf33bca Merge #4039
4039: Fix multiple vectors dimensions r=ManyTheFish a=Kerollmops

This PR fixes #4035, making providing multiple vectors in documents possible. This is fixed by extracting the vectors from the non-flattened version of the documents.

Co-authored-by: Kerollmops <clement@meilisearch.com>
2023-09-07 09:25:58 +00:00
9945cbf9db Merge #4038
4038: Fix filter escaping issues r=ManyTheFish a=Kerollmops

This PR fixes #4034 by always escaping the sequences. Users must always put quotes (simple or double) to escape the filter values.

Co-authored-by: Kerollmops <clement@meilisearch.com>
2023-09-06 12:29:29 +00:00
03d0f628bd Use the unescaper crate to unescape any char sequence 2023-09-06 13:59:45 +02:00
ea78060916 Fix tests that were supposed to escape characters 2023-09-06 13:59:45 +02:00
b42d48187a Add a test case scenario 2023-09-06 13:59:44 +02:00
679c0b0f97 Extract the vectors from the non-flattened version of the documents 2023-09-06 12:26:00 +02:00
e02d0064bd Add a test case scenario 2023-09-06 12:26:00 +02:00
7ef3572f11 Merge #4037
4037: Update version for the next release (v1.3.3) in Cargo.toml r=curquiza a=meili-bot

⚠️ This PR is automatically generated. Check the new version is the expected one and Cargo.lock has been updated before merging.

Co-authored-by: curquiza <curquiza@users.noreply.github.com>
2023-09-06 09:50:58 +00:00
93285041a9 Update version for the next release (v1.3.3) in Cargo.toml 2023-09-06 09:23:20 +00:00
dc3d9c90d9 Merge #3994
3994: Fix synonyms with separators r=Kerollmops a=ManyTheFish

# Pull Request

## Related issue
Fixes #3977

## Available prototype
```
$ docker pull getmeili/meilisearch:prototype-fix-synonyms-with-separators-0
```

## What does this PR do?
- add a new test
- filter the empty synonyms after normalization


Co-authored-by: ManyTheFish <many@meilisearch.com>
2023-09-05 14:42:46 +00:00
287cf25d39 Merge #4033
4033: Fix thai synonyms r=Kerollmops a=Kerollmops

Fixes #4031

Co-authored-by: Kerollmops <clement@meilisearch.com>
Co-authored-by: ManyTheFish <many@meilisearch.com>
2023-09-05 13:54:33 +00:00
66aa6d5871 Ignore tokens with empty normalized value during indexing process 2023-09-05 15:44:14 +02:00
8ac5b765bc Fix synonyms normalization 2023-09-04 16:12:48 +02:00
cea93e9a37 Merge #4016
4016: Define the full Homebrew formula path r=curquiza a=Kerollmops

This PR fixes #4015 by defining the full Homebrew formula path.

Co-authored-by: Clément Renault <clement@meilisearch.com>
2023-09-04 13:10:28 +00:00
085aad0a94 Add a test 2023-09-04 14:39:33 +02:00
e9b62aacb3 Merge #4025
4025: Bump Swatinem/rust-cache from 2.5.1 to 2.6.2 r=curquiza a=dependabot[bot]

Bumps [Swatinem/rust-cache](https://github.com/swatinem/rust-cache) from 2.5.1 to 2.6.2.
<details>
<summary>Release notes</summary>
<p><em>Sourced from <a href="https://github.com/swatinem/rust-cache/releases">Swatinem/rust-cache's releases</a>.</em></p>
<blockquote>
<h2>v2.6.2</h2>
<h2>What's Changed</h2>
<ul>
<li>dep: Use <code>smol-toml</code> instead of <code>toml</code> by <a href="https://github.com/NobodyXu"><code>`@​NobodyXu</code></a>` in <a href="https://redirect.github.com/Swatinem/rust-cache/pull/164">Swatinem/rust-cache#164</a></li>
</ul>
<p><strong>Full Changelog</strong>: <a href="https://github.com/Swatinem/rust-cache/compare/v2...v2.6.2">https://github.com/Swatinem/rust-cache/compare/v2...v2.6.2</a></p>
<h2>v2.6.1</h2>
<ul>
<li>Fix hash contributions of <code>Cargo.lock</code>/<code>Cargo.toml</code> files.</li>
</ul>
<h2>v2.6.0</h2>
<h2>What's Changed</h2>
<ul>
<li>Add &quot;buildjet&quot; as a second <code>cache-provider</code> backend <a href="https://github.com/joroshiba"><code>`@​joroshiba</code></a>` in <a href="https://redirect.github.com/Swatinem/rust-cache/pull/154">Swatinem/rust-cache#154</a></li>
<li>Clean up sparse registry index.</li>
<li>Do not clean up src of <code>-sys</code> crates.</li>
<li>Remove <code>.cargo/credentials.toml</code> before saving.</li>
</ul>
<h2>New Contributors</h2>
<ul>
<li><a href="https://github.com/joroshiba"><code>`@​joroshiba</code></a>` made their first contribution in <a href="https://redirect.github.com/Swatinem/rust-cache/pull/154">Swatinem/rust-cache#154</a></li>
</ul>
<p><strong>Full Changelog</strong>: <a href="https://github.com/Swatinem/rust-cache/compare/v2.5.1...v2.6.0">https://github.com/Swatinem/rust-cache/compare/v2.5.1...v2.6.0</a></p>
</blockquote>
</details>
<details>
<summary>Changelog</summary>
<p><em>Sourced from <a href="https://github.com/Swatinem/rust-cache/blob/master/CHANGELOG.md">Swatinem/rust-cache's changelog</a>.</em></p>
<blockquote>
<h2>2.6.2</h2>
<ul>
<li>Fix <code>toml</code> parsing.</li>
</ul>
<h2>2.6.1</h2>
<ul>
<li>Fix hash contributions of <code>Cargo.lock</code>/<code>Cargo.toml</code> files.</li>
</ul>
<h2>2.6.0</h2>
<ul>
<li>Add &quot;buildjet&quot; as a second <code>cache-provider</code> backend.</li>
<li>Clean up sparse registry index.</li>
<li>Do not clean up src of <code>-sys</code> crates.</li>
<li>Remove <code>.cargo/credentials.toml</code> before saving.</li>
</ul>
</blockquote>
</details>
<details>
<summary>Commits</summary>
<ul>
<li><a href="e207df5d26"><code>e207df5</code></a> 2.6.2</li>
<li><a href="decb69d790"><code>decb69d</code></a> Update dependencies and add changelog</li>
<li><a href="ab6b2769d1"><code>ab6b276</code></a> dep: Use <code>smol-toml</code> instead of <code>toml</code> (<a href="https://redirect.github.com/swatinem/rust-cache/issues/164">#164</a>)</li>
<li><a href="578b235f6e"><code>578b235</code></a> 2.6.1</li>
<li><a href="5113490c3f"><code>5113490</code></a> prepare 2.6.1</li>
<li><a href="c0e052c18c"><code>c0e052c</code></a> Fix hashing of parsed <code>Cargo.toml</code> (<a href="https://redirect.github.com/swatinem/rust-cache/issues/160">#160</a>)</li>
<li><a href="4e0f4b19dd"><code>4e0f4b1</code></a> Fix typo in hashing parsed <code>Cargo.lock</code> (<a href="https://redirect.github.com/swatinem/rust-cache/issues/159">#159</a>)</li>
<li><a href="b919e1427f"><code>b919e14</code></a> feat: Add logging to <code>Cargo.lock</code>/<code>Cargo.toml</code> hashing (<a href="https://redirect.github.com/swatinem/rust-cache/issues/156">#156</a>)</li>
<li><a href="b8a6852b4f"><code>b8a6852</code></a> 2.6.0</li>
<li><a href="80c47cc945"><code>80c47cc</code></a> Clean up <code>credentials.toml</code></li>
<li>Additional commits viewable in <a href="https://github.com/swatinem/rust-cache/compare/v2.5.1...v2.6.2">compare view</a></li>
</ul>
</details>
<br />


[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=Swatinem/rust-cache&package-manager=github_actions&previous-version=2.5.1&new-version=2.6.2)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores)

You can trigger a rebase of this PR by commenting ``@dependabot` rebase`.

[//]: # (dependabot-automerge-start)
[//]: # (dependabot-automerge-end)

---

<details>
<summary>Dependabot commands and options</summary>
<br />

You can trigger Dependabot actions by commenting on this PR:
- ``@dependabot` rebase` will rebase this PR
- ``@dependabot` recreate` will recreate this PR, overwriting any edits that have been made to it
- ``@dependabot` merge` will merge this PR after your CI passes on it
- ``@dependabot` squash and merge` will squash and merge this PR after your CI passes on it
- ``@dependabot` cancel merge` will cancel a previously requested merge and block automerging
- ``@dependabot` reopen` will reopen this PR if it is closed
- ``@dependabot` close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually
- ``@dependabot` show <dependency name> ignore conditions` will show all of the ignore conditions of the specified dependency
- ``@dependabot` ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself)
- ``@dependabot` ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself)
- ``@dependabot` ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)


</details>

Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2023-09-04 12:30:53 +00:00
456960d2c7 Bump Swatinem/rust-cache from 2.5.1 to 2.6.2
Bumps [Swatinem/rust-cache](https://github.com/swatinem/rust-cache) from 2.5.1 to 2.6.2.
- [Release notes](https://github.com/swatinem/rust-cache/releases)
- [Changelog](https://github.com/Swatinem/rust-cache/blob/master/CHANGELOG.md)
- [Commits](https://github.com/swatinem/rust-cache/compare/v2.5.1...v2.6.2)

---
updated-dependencies:
- dependency-name: Swatinem/rust-cache
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>
2023-09-01 17:17:39 +00:00
3dda176723 Merge #4020
4020: Update version for the next release (v1.4.0) in Cargo.toml r=Kerollmops a=meili-bot

⚠️ This PR is automatically generated. Check the new version is the expected one and Cargo.lock has been updated before merging.

Co-authored-by: Kerollmops <Kerollmops@users.noreply.github.com>
Co-authored-by: Clément Renault <clement@meilisearch.com>
2023-08-28 13:51:23 +00:00
af0f6f0bf0 Merge branch 'main' into update-version-v1.4.0 2023-08-28 15:08:59 +02:00
ccf3ba3f32 Merge #4019
4019: Bringing back changes from `v1.3.2` onto `main` r=irevoire a=Kerollmops



Co-authored-by: Kerollmops <clement@meilisearch.com>
Co-authored-by: meili-bors[bot] <89034592+meili-bors[bot]@users.noreply.github.com>
Co-authored-by: irevoire <irevoire@users.noreply.github.com>
Co-authored-by: Clément Renault <clement@meilisearch.com>
2023-08-28 12:14:11 +00:00
65528a3e06 Update version for the next release (v1.4.0) in Cargo.toml 2023-08-28 11:52:28 +00:00
6db80b0836 Define the full Homebrew formula path 2023-08-24 11:24:47 +02:00
cdb4b3e024 Merge #4013
4013: Fix the ranking rule by temporarily disabling an assert in the bucket sort algorithm r=Kerollmops a=Kerollmops

This PR temporarily disables an assertion, making the search crash. [I created a tracking issue](https://github.com/meilisearch/meilisearch/issues/4012) to find a better way to fix this.

It no longer reverts a20e4d447c, which seemed to generate unreachable graphs and make the bucket sort ranking algorithm panic because of entering an unreachable state. We discussed that below in the comments.

Temporary fixes #4002, fixes #4006, and fixes #3995.

---

It took me approximately 2 days to find the first bad commit just because I'm bad in `git bisect` x `bash`, i.e. [I misused `%1` with `$!` to kill the most recently backgrounded job](https://unix.stackexchange.com/a/340084/212574)...

<details>
  <summary>Here is the script I used to find the invalid commit</summary>

```bash
#!/usr/bin/env bash

set -x

# remove the data
rm -rf data.ms

# build meilisearch
cargo build --release
# ignore this commit if it doesn't compile
if [[ $? != 0 ]]; then
    exit 125
fi

# index the dump and start from it
./target/release/meilisearch \
--http-addr 'localhost:7705' \
--import-dump $HOME/Downloads/modified-20230822-083016113.dump &

# wait 10 sec while it indexes the docs
sleep 5

# check if the server crashes on requests
echo '{
    "q": "rtx 305",
    "attributesToHighlight": [
        "*"
    ],
    "highlightPreTag": "<ais-highlight-0000000000>",
    "highlightPostTag": "</ais-highlight-0000000000>",
    "limit": 21,
    "offset": 0
}' | xh 'localhost:7705/indexes/arvutitark_local_orderables/search'

last_exit_code=$?

# Now kill Meilisearch
kill $!

# Clean the potential Cargo.lock
git checkout .

exit $last_exit_code
```
</details>

Co-authored-by: Kerollmops <clement@meilisearch.com>
Co-authored-by: Clément Renault <clement@meilisearch.com>
2023-08-23 15:30:56 +00:00
8c0ebd1331 Update milli/src/search/new/bucket_sort.rs
Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2023-08-23 16:40:39 +02:00
5130e06b41 Temporarily disable an assert in the ranking rules 2023-08-23 16:11:54 +02:00
08e27ef73f Merge pull request #4008 from meilisearch/fix-highlighting-panic
Bump charabia to 0.8.3
2023-08-23 11:56:45 +02:00
914b125c5f Merge #3945
3945: Do not leak field information on error r=Kerollmops a=vivek-26

# Pull Request

## Related issue
Fixes #3865

## What does this PR do?
This PR ensures that `InvalidSortableAttribute`and `InvalidFacetSearchFacetName` errors do not leak field information i.e. fields which are not part of `displayedAttributes` in the settings are hidden from the error message.

## PR checklist
Please check if your PR fulfills the following requirements:
- [x] Does this PR fix an existing issue, or have you listed the changes applied in the PR description (and why they are needed)?
- [x] Have you read the contributing guidelines?
- [x] Have you made sure that the title is accurate and descriptive of the changes?

Thank you so much for contributing to Meilisearch!


Co-authored-by: Vivek Kumar <vivek.26@outlook.com>
2023-08-22 18:55:27 +00:00
e59d7f238c Bump rustls-webpki from 0.100.1 to 0.100.2
Bumps [rustls-webpki](https://github.com/rustls/webpki) from 0.100.1 to 0.100.2.
- [Release notes](https://github.com/rustls/webpki/releases)
- [Commits](https://github.com/rustls/webpki/compare/v/0.100.1...v/0.100.2)

---
updated-dependencies:
- dependency-name: rustls-webpki
  dependency-type: indirect
...

Signed-off-by: dependabot[bot] <support@github.com>
2023-08-22 18:10:53 +00:00
717b069907 Bump charabia to 0.8.3 2023-08-22 16:25:00 +02:00
7ea154673a Merge #4000
4000: Update version for the next release (v1.3.2) in Cargo.toml r=irevoire a=meili-bot

⚠️ This PR is automatically generated. Check the new version is the expected one and Cargo.lock has been updated before merging.

Co-authored-by: irevoire <irevoire@users.noreply.github.com>
2023-08-16 10:41:33 +00:00
b947f3bb9d Update version for the next release (v1.3.2) in Cargo.toml 2023-08-16 08:20:36 +00:00
4c35817c5f Merge #3998
3998: Accept the `null` JSON value as a value of the `_vectors` field r=irevoire a=Kerollmops

This PR fixes #3979 by accepting `null` JSON values in the `_vectors` fields provided by the user.

Can the reviewer please verify that I am merging in the right branch?
I think we must create a new _release-v1.3.2_.

Co-authored-by: Kerollmops <clement@meilisearch.com>
2023-08-16 08:12:24 +00:00
c53841e166 Accept the null JSON value as the value of _vectors 2023-08-14 16:03:55 +02:00
fd81945597 Merge #3987
3987: Update dependencies for v1.4 r=curquiza a=ManyTheFish

# Pull Request

## Related issue
Fixes #3870 

## What does this PR do?
- [Update dependencies](d7ff5368b4)
- [upgrade itertools = "0.10.5"](d0582d01f4)
- [upgrade sysinfo = "0.29.7"](507c661352)
- [upgrade memmap2 = "0.7.1"](489e0d5cd0)
- [upgrade rstar = "0.11.0"](3d9d08e3b2)
- [upgrade fastrand = "2.0.0"](1af7083c48)
- [upgrade deserr = "0.6.0"](7fe77045af)
- [upgrade indexmap = "2.0.0"](95e4960b0c)
- [update rust toolchain = "1.71.1"](937b7b5da5)

## Remaining un-upgraded dependencies
- vergen 7.5.1 --> 8.2.4: I wasn't able to quickly understand the changes in the lib API to upgrade the dependency
- rustls 0.20.8 --> 0.21.6: Meilisearch doesn't have any direct dependency on it


Co-authored-by: ManyTheFish <many@meilisearch.com>
2023-08-10 16:46:17 +00:00
794e491152 update rust toolchain 2023-08-10 18:09:02 +02:00
cab27c2ab4 upgrade indexmap = "2.0.0" 2023-08-10 18:09:02 +02:00
624fa9052f upgrade deserr = "0.6.0" 2023-08-10 18:09:02 +02:00
359ede4862 upgrade fastrand = "2.0.0" 2023-08-10 18:09:02 +02:00
60c11dbdbd upgrade rstar - "0.11.0" 2023-08-10 18:09:02 +02:00
dacee40ebc upgrade memmap2 = "0.7.1" 2023-08-10 18:09:02 +02:00
6089083a8e upgrade sysinfo = "0.29.7" 2023-08-10 18:09:02 +02:00
cc2c19d4c3 upgrade itertools = "0.10.5" 2023-08-10 18:09:02 +02:00
a5c56fac8a Update dependencies 2023-08-10 18:09:02 +02:00
e4e49e63d0 Merge #3993
3993: Bringing back changes from v1.3.1 to `main` r=irevoire a=curquiza



Co-authored-by: irevoire <irevoire@users.noreply.github.com>
Co-authored-by: meili-bors[bot] <89034592+meili-bors[bot]@users.noreply.github.com>
Co-authored-by: Tamo <tamo@meilisearch.com>
Co-authored-by: ManyTheFish <many@meilisearch.com>
2023-08-10 14:30:02 +00:00
00bd7bd19a Merge #3990
3990: Removed unnecessary borrow call that failed nightly tests r=irevoire a=JannisK89

# Pull Request

## Related issue
Fixes #3988

## What does this PR do?
- Removes unnecessary borrow call that was causing warnings when running tests on nightly.

## PR checklist
Please check if your PR fulfills the following requirements:
- [ x] Does this PR fix an existing issue, or have you listed the changes applied in the PR description (and why they are needed)?
- [ x] Have you read the contributing guidelines?
- [ x] Have you made sure that the title is accurate and descriptive of the changes?

Thank you so much for contributing to Meilisearch!

Please let me know if there is anything else I can do to improve this PR.
Thank you.

Co-authored-by: JannisK89 <jannis.karanikis@gmail.com>
2023-08-10 11:42:19 +00:00
ef3d098b4d Merge #3976
3976: Fix the get stats method r=ManyTheFish a=irevoire

# Pull Request

- The get stats method of the index-scheduler was not using at all the processing tasks. That was returning a wrong number of enqueued tasks and 0 processing tasks.
- Added a test
- Currently this method was **ONLY** used to compute the `meilisearch_nb_tasks` field of the **experimental feature** metrics.

## Related issue
Fixes https://github.com/meilisearch/meilisearch/issues/3972


Co-authored-by: Tamo <tamo@meilisearch.com>
2023-08-10 10:55:50 +00:00
8084cf29f3 Merge #3946
3946: Settings customizing tokenization r=irevoire a=ManyTheFish

# Pull Request
This pull Request allows the User to customize Meilisearch Tokenization by providing specialized settings.

## Small documentation
All the new settings can be set and reset like the other index settings by calling the route `/indexes/:name/settings`

### `nonSeparatorTokens`
The Meilisearch word segmentation uses a default list of separators to segment words, however, for specific use cases some of the default separators shouldn't be considered separators, the `nonSeparatorTokens` setting allows to remove of some tokens from the default list of separators.

***Request payload `PUT`- `/indexes/articles/settings/non-separator-tokens`***
```json
["`@",` "#", "&"]
```

### `separatorTokens`
Some use cases need to define additional separators, some are related to a specific way of parsing technical documents some others are related to encodings in documents,  the `separatorTokens` setting allows adding some tokens to the list of separators.

***Request payload `PUT`- `/indexes/articles/settings/separator-tokens`***
```json
["&sect;", "&sep"]
```

### `dictionary`
The Meilisearch word segmentation relies on separators and language-based word-dictionaries to segment words, however, this segmentation is inaccurate on technical or use-case specific vocabulary (like `G/Box` to say `Gear Box`), or on proper nouns (like `J. R. R.` when parsing `J. R. R. Tolkien`), the `dictionary` setting allows defining a list of words that would be segmented as described in the list.

***Request payload `PUT`- `/indexes/articles/settings/dictionary`***
```json
["J. R. R.", "J.R.R."]
```

these last feature synergies well with the `stopWords` setting or the `synonyms` setting allowing to segment words and correctly retrieve the synonyms:
***Request payload `PATCH`- `/indexes/articles/settings`***
```json
{
    "dictionary": ["J. R. R.", "J.R.R."],
    "synonyms": {
            "J.R.R.": ["jrr", "J. R. R."],
            "J. R. R.": ["jrr", "J.R.R."],
            "jrr": ["J.R.R.", "J. R. R."],
    }
}
```

### Related specifications:
- https://github.com/meilisearch/specifications/pull/255
- https://github.com/meilisearch/specifications/pull/254

### Try it with Docker

```bash
$ docker pull getmeili/meilisearch:prototype-tokenizer-customization-3
```

## Related issue
Fixes #3610
Fixes #3917
Fixes https://github.com/meilisearch/product/discussions/468
Fixes https://github.com/meilisearch/product/discussions/160
Fixes https://github.com/meilisearch/product/discussions/260
Fixes https://github.com/meilisearch/product/discussions/381
Fixes https://github.com/meilisearch/product/discussions/131
Related to https://github.com/meilisearch/meilisearch/issues/2879

Fixes #2760

## What does this PR do?
- Add a setting `nonSeparatorTokens` allowing to remove a token from the default separator tokens
- Add a setting `separatorTokens` allowing to add a token in the separator tokens
- Add a setting `dictionary` allowing to override the segmentation on specific words
- add new error code `invalid_settings_non_separator_tokens` (invalid_request)
- add new error code `invalid_settings_separator_tokens` (invalid_request)
- add new error code `invalid_settings_dictionary` (invalid_request)

Co-authored-by: ManyTheFish <many@meilisearch.com>
Co-authored-by: Many the fish <many@meilisearch.com>
2023-08-10 10:01:18 +00:00
5a7c1bde84 Fix clippy 2023-08-10 11:27:56 +02:00
6b2d671be7 Fix PR comments 2023-08-10 10:44:07 +02:00
43c13faeda Update milli/src/update/index_documents/extract/extract_docid_word_positions.rs
Co-authored-by: Tamo <tamo@meilisearch.com>
2023-08-10 10:05:03 +02:00
29adfc2f68 Merge #3989
3989: Improve test suite CI for manual trigger events r=irevoire a=curquiza

# Why?

To be able to test https://github.com/meilisearch/meilisearch/issues/3988 before merging the PR solving it

# How do we ensure this PR works?

I triggered `workflow_dispatch` (i.e. manual trigger) on this branch, and we can see all the jobs have been triggered (even if some of them are failing -> it's another issue)
https://github.com/meilisearch/meilisearch/actions/runs/5810609073

We can see the tests triggered by the PR are restricted as expected: https://github.com/meilisearch/meilisearch/actions/runs/5810605977

Co-authored-by: curquiza <clementine@meilisearch.com>
2023-08-10 07:55:48 +00:00
064ee95b1c removed unnecessary borrow call 2023-08-10 08:41:25 +02:00
604d533b31 Improve test suite CI for workflow_dispatch event 2023-08-09 16:47:28 +02:00
44c1900f36 Merge #3986
3986: Fix geo bounding box with strings r=ManyTheFish a=irevoire

# Pull Request

When sending a document with one geofield of type string (i.e.: `{ "_geo": { "lat": 12, "lng": "13" }}`), the geobounding box would exclude this document.

This PR fixes this issue by automatically parsing the string value in case we're working on a geofield.

## Related issue
Fixes https://github.com/meilisearch/meilisearch/issues/3973

## What does this PR do?
- Automatically parse the facet value iif we're working on a geofield.
- Make insta works with snapshots in loops or closure executed multiple times. (you may need to update your cli if it panics after this PR: `cargo install cargo-insta`).
- Add one integration test in milli and in meilisearch to ensure it works forever.
- Add three snapshots for the dump that mysteriously disappeared I don't know how


Co-authored-by: Tamo <tamo@meilisearch.com>
2023-08-09 07:58:15 +00:00
04671d0751 Merge #3981
3981: Truncate the normalized long facets used in the search for facet value r=irevoire a=ManyTheFish

# Pull Request
 Truncate the normalized long facets used in the search for facet value

## targeted release

v1.3.1

## Related issue
Fixes #3978


Co-authored-by: ManyTheFish <many@meilisearch.com>
2023-08-08 15:07:07 +00:00
4f4c669d50 add back some dump snapshots that disappeared. it's completely unrelated to this PR 2023-08-08 16:58:14 +02:00
8dc5acf998 Try fix 2023-08-08 16:52:36 +02:00
fc2590fc9d Add a test 2023-08-08 16:43:08 +02:00
35758db9ec Truncate the the normalized long facets used in search for facet value 2023-08-08 16:38:30 +02:00
4988199bb9 ensure the geoboundingbox works with strings and int geofields in milli and meilisearch 2023-08-08 16:29:25 +02:00
83991ee770 enable the multi-snapshot attribute in insta. This will let us use insta in loops 2023-08-08 16:28:38 +02:00
9d061cec26 automatically parse the filterable attribute to float if it's a geo field 2023-08-08 16:28:07 +02:00
4a21fecf67 Merge branch 'main' into settings-customizing-tokenization 2023-08-08 16:08:16 +02:00
ae8e69c030 Add API route for the new settings 2023-08-08 16:03:16 +02:00
fe819a9d80 fix the get stats method
It was not taking into account the processing tasks at all
2023-08-08 13:21:15 +02:00
e338ceb97f Merge #3982
3982: Update version for the next release (v1.3.1) in Cargo.toml r=irevoire a=meili-bot

⚠️ This PR is automatically generated. Check the new version is the expected one and Cargo.lock has been updated before merging.

Co-authored-by: irevoire <irevoire@users.noreply.github.com>
2023-08-08 10:30:56 +00:00
75c87d5391 Update version for the next release (v1.3.1) in Cargo.toml 2023-08-08 10:30:06 +00:00
dd57873f8e hide fields not in the displayedAttributes list from errors 2023-08-05 16:03:10 +05:30
3dda93d50f Merge #3968
3968: Bump svenstaro/upload-release-action from 2.6.1 to 2.7.0 r=curquiza a=dependabot[bot]

Bumps [svenstaro/upload-release-action](https://github.com/svenstaro/upload-release-action) from 2.6.1 to 2.7.0.
<details>
<summary>Release notes</summary>
<p><em>Sourced from <a href="https://github.com/svenstaro/upload-release-action/releases">svenstaro/upload-release-action's releases</a>.</em></p>
<blockquote>
<h2>2.7.0</h2>
<ul>
<li>Allow setting an explicit target_commitish <a href="https://redirect.github.com/svenstaro/upload-release-action/pull/46">#46</a> (thanks <a href="https://github.com/Spikatrix"><code>`@​Spikatrix</code></a>)</li>`
</ul>
</blockquote>
</details>
<details>
<summary>Changelog</summary>
<p><em>Sourced from <a href="https://github.com/svenstaro/upload-release-action/blob/master/CHANGELOG.md">svenstaro/upload-release-action's changelog</a>.</em></p>
<blockquote>
<h2>[2.7.0] - 2023-07-28</h2>
<ul>
<li>Allow setting an explicit target_commitish <a href="https://redirect.github.com/svenstaro/upload-release-action/pull/46">#46</a> (thanks <a href="https://github.com/Spikatrix"><code>`@​Spikatrix</code></a>)</li>`
</ul>
</blockquote>
</details>
<details>
<summary>Commits</summary>
<ul>
<li><a href="1beeb572c1"><code>1beeb57</code></a> 2.7.0</li>
<li><a href="5206d34958"><code>5206d34</code></a> Bump deps</li>
<li><a href="80d7a7e41c"><code>80d7a7e</code></a> Merge pull request <a href="https://redirect.github.com/svenstaro/upload-release-action/issues/46">#46</a> from Spikatrix/master</li>
<li><a href="5eb2ffd70b"><code>5eb2ffd</code></a> Merge pull request <a href="https://redirect.github.com/svenstaro/upload-release-action/issues/110">#110</a> from svenstaro/dependabot/npm_and_yarn/word-wrap-1.2.4</li>
<li><a href="07af2f374a"><code>07af2f3</code></a> Bump word-wrap from 1.2.3 to 1.2.4</li>
<li><a href="5164410c7d"><code>5164410</code></a> Push dist</li>
<li><a href="f47fb36ff1"><code>f47fb36</code></a> Use the ref api to check if a tag exists</li>
<li><a href="212d4babf8"><code>212d4ba</code></a> Rethrow getTag error if not 404</li>
<li><a href="7670b98fa0"><code>7670b98</code></a> Push dist files</li>
<li><a href="ac438791c4"><code>ac43879</code></a> Warn when target_commit is ignored</li>
<li>Additional commits viewable in <a href="https://github.com/svenstaro/upload-release-action/compare/2.6.1...2.7.0">compare view</a></li>
</ul>
</details>
<br />


[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=svenstaro/upload-release-action&package-manager=github_actions&previous-version=2.6.1&new-version=2.7.0)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores)

You can trigger a rebase of this PR by commenting ``@dependabot` rebase`.

[//]: # (dependabot-automerge-start)
[//]: # (dependabot-automerge-end)

---

<details>
<summary>Dependabot commands and options</summary>
<br />

You can trigger Dependabot actions by commenting on this PR:
- ``@dependabot` rebase` will rebase this PR
- ``@dependabot` recreate` will recreate this PR, overwriting any edits that have been made to it
- ``@dependabot` merge` will merge this PR after your CI passes on it
- ``@dependabot` squash and merge` will squash and merge this PR after your CI passes on it
- ``@dependabot` cancel merge` will cancel a previously requested merge and block automerging
- ``@dependabot` reopen` will reopen this PR if it is closed
- ``@dependabot` close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually
- ``@dependabot` ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself)
- ``@dependabot` ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself)
- ``@dependabot` ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)


</details>

Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2023-08-02 09:55:39 +00:00
117146ec4e Merge #3969
3969: Bump Swatinem/rust-cache from 2.5.0 to 2.5.1 r=curquiza a=dependabot[bot]

Bumps [Swatinem/rust-cache](https://github.com/swatinem/rust-cache) from 2.5.0 to 2.5.1.
<details>
<summary>Release notes</summary>
<p><em>Sourced from <a href="https://github.com/swatinem/rust-cache/releases">Swatinem/rust-cache's releases</a>.</em></p>
<blockquote>
<h2>v2.5.1</h2>
<ul>
<li>Fix hash contribution of <code>Cargo.lock</code>.</li>
</ul>
</blockquote>
</details>
<details>
<summary>Changelog</summary>
<p><em>Sourced from <a href="https://github.com/Swatinem/rust-cache/blob/master/CHANGELOG.md">Swatinem/rust-cache's changelog</a>.</em></p>
<blockquote>
<h2>2.5.1</h2>
<ul>
<li>Fix hash contribution of <code>Cargo.lock</code>.</li>
</ul>
</blockquote>
</details>
<details>
<summary>Commits</summary>
<ul>
<li><a href="dd05243424"><code>dd05243</code></a> 2.5.1</li>
<li><a href="65dbc54a5d"><code>65dbc54</code></a> update changelog</li>
<li><a href="be7377e68e"><code>be7377e</code></a> fix <code>src/config.ts</code>: Remove <code>sort_object</code> (<a href="https://redirect.github.com/swatinem/rust-cache/issues/152">#152</a>)</li>
<li>See full diff in <a href="https://github.com/swatinem/rust-cache/compare/v2.5.0...v2.5.1">compare view</a></li>
</ul>
</details>
<br />


[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=Swatinem/rust-cache&package-manager=github_actions&previous-version=2.5.0&new-version=2.5.1)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores)

You can trigger a rebase of this PR by commenting ``@dependabot` rebase`.

[//]: # (dependabot-automerge-start)
[//]: # (dependabot-automerge-end)

---

<details>
<summary>Dependabot commands and options</summary>
<br />

You can trigger Dependabot actions by commenting on this PR:
- ``@dependabot` rebase` will rebase this PR
- ``@dependabot` recreate` will recreate this PR, overwriting any edits that have been made to it
- ``@dependabot` merge` will merge this PR after your CI passes on it
- ``@dependabot` squash and merge` will squash and merge this PR after your CI passes on it
- ``@dependabot` cancel merge` will cancel a previously requested merge and block automerging
- ``@dependabot` reopen` will reopen this PR if it is closed
- ``@dependabot` close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually
- ``@dependabot` ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself)
- ``@dependabot` ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself)
- ``@dependabot` ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)


</details>

Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2023-08-02 09:19:03 +00:00
884b4d47b1 Bump Swatinem/rust-cache from 2.5.0 to 2.5.1
Bumps [Swatinem/rust-cache](https://github.com/swatinem/rust-cache) from 2.5.0 to 2.5.1.
- [Release notes](https://github.com/swatinem/rust-cache/releases)
- [Changelog](https://github.com/Swatinem/rust-cache/blob/master/CHANGELOG.md)
- [Commits](https://github.com/swatinem/rust-cache/compare/v2.5.0...v2.5.1)

---
updated-dependencies:
- dependency-name: Swatinem/rust-cache
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>
2023-08-01 17:22:43 +00:00
023cb0c2de Bump svenstaro/upload-release-action from 2.6.1 to 2.7.0
Bumps [svenstaro/upload-release-action](https://github.com/svenstaro/upload-release-action) from 2.6.1 to 2.7.0.
- [Release notes](https://github.com/svenstaro/upload-release-action/releases)
- [Changelog](https://github.com/svenstaro/upload-release-action/blob/master/CHANGELOG.md)
- [Commits](https://github.com/svenstaro/upload-release-action/compare/2.6.1...2.7.0)

---
updated-dependencies:
- dependency-name: svenstaro/upload-release-action
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>
2023-08-01 17:22:37 +00:00
f391039a6f Merge #3967
3967: Bring back changes from `release-v1.3.0` into `main` r=ManyTheFish a=curquiza

Using a temp branch because of git conflict

Co-authored-by: Cong Chen <cong.chen@ocrlabs.com>
Co-authored-by: ManyTheFish <many@meilisearch.com>
Co-authored-by: Louis Dureuil <louis@meilisearch.com>
Co-authored-by: meili-bors[bot] <89034592+meili-bors[bot]@users.noreply.github.com>
Co-authored-by: Tamo <tamo@meilisearch.com>
Co-authored-by: Kerollmops <clement@meilisearch.com>
Co-authored-by: Clément Renault <clement@meilisearch.com>
2023-08-01 16:22:09 +00:00
fcdd20b533 Fix README after git conflict 2023-08-01 16:06:33 +02:00
b45c36cd71 Merge branch 'main' into tmp-release-v1.3.0 2023-08-01 15:05:17 +02:00
151c31c18f Merge #3963
3963: Fix the milli crate r=ManyTheFish a=irevoire

Milli was using the serde feature of either without enabling it first; thus, it wasn't working.

It was working in meilisearch, though, because `meilisearch-types` was using the feature which enables it globally for all the other crates.

## Related issue
Fixes https://github.com/meilisearch/meilisearch/issues/3962

Co-authored-by: Tamo <tamo@meilisearch.com>
2023-07-31 09:32:08 +00:00
a8ad0902d3 Fix the milli crate
Milli was using the serde feature of either without enabling it first, thus it wasn't working
2023-07-31 11:08:27 +02:00
e917dbdebb Merge #3957
3957: fix: upgrade mimalloc dependency to resolve FreeBSD build r=irevoire a=ThatOneCalculator

# Pull Request

## Related issue
Fixes #3806

## What does this PR do?
- Upgrades mimalloc to 0.1.37
- Fixes build on FreeBSD

Ref: https://github.com/meilisearch/meilisearch/issues/3806#issuecomment-1653693468

Tested and working on FreeBSD 13.1-RELEASE-p5

## PR checklist
Please check if your PR fulfills the following requirements:
- [x] Does this PR fix an existing issue, or have you listed the changes applied in the PR description (and why they are needed)?
- [x] Have you read the contributing guidelines?
- [x] Have you made sure that the title is accurate and descriptive of the changes?

Thank you so much for contributing to Meilisearch!


Co-authored-by: ThatOneCalculator <kainoa@t1c.dev>
2023-07-31 08:49:36 +00:00
ba919b6123 fix: ⬆️ up mimalloc 2023-07-28 20:35:47 -07:00
9d5e3457e5 Fix clippy 2023-07-27 14:21:19 +02:00
04694071fe Fix the synonyms settings display 2023-07-27 14:12:23 +02:00
5b0157c6c6 Merge #3955
3955: Update mini-dashboard to version 0.2.11 r=curquiza a=bidoubiwa

# Pull Request

## What does this PR do?
- Updates the mini-dashboard to version [0.2.11](https://github.com/meilisearch/mini-dashboard/releases/tag/v0.2.11)

## PR checklist
- [x] Have you read the contributing guidelines?
- [x] Have you made sure that the title is accurate and descriptive of the changes?

Thank you so much for contributing to Meilisearch!


Co-authored-by: Charlotte Vermandel <charlottevermandel@gmail.com>
2023-07-27 11:59:55 +00:00
3b9a87c790 Update mini-dashboard to version 0.2.11 2023-07-27 13:16:32 +02:00
3a3414270d Merge #3952
3952: Use the new safe `read-txn-no-tls` heed feature r=ManyTheFish a=Kerollmops

[We recently found out](https://github.com/meilisearch/heed/issues/191#issuecomment-1650280513) that the `read-sync-txn` heed feature was invalid and must be removed from this crate. We were declaring it in milli/meilisearch but, fortunately, not sharing the `RoTxn`s across threads 😮‍💨

[I recently introduced the `read-txn-no-tls` heed feature](https://github.com/meilisearch/heed/pull/194), which implements `RoTxn: Send` and allows multiple read transactions on a single thread (which we use).

This PR removes the `sync-read-txn` heed feature from the _Cargo.toml_ file. I will fix this in heed v0.20.0 and will fill a RustSec advisory in the meantime.

Co-authored-by: Clément Renault <clement@meilisearch.com>
2023-07-26 16:40:58 +00:00
d06e0905db Merge #3953
3953: Update UTM campaign r=curquiza a=macraig

# Pull Request

## What does this PR do?
Redirect CTAs to Cloud landing page



Co-authored-by: María <maria@Marias-MacBook-Pro.local>
2023-07-26 15:20:40 +00:00
939b2fc6fd Merge #3949
3949: Fix score details casing r=Kerollmops a=ManyTheFish

# Pull Request

Fixes #3941


Co-authored-by: ManyTheFish <many@meilisearch.com>
2023-07-26 14:14:59 +00:00
fae61372be Redirect CTAs to Cloud landing page 2023-07-26 15:54:43 +02:00
d8b47b689e Use the new read-txn-no-tls heed feature 2023-07-26 15:45:15 +02:00
b0c1a9504a ensure the synonyms are updated when the tokenizer settings are changed 2023-07-26 09:33:42 +02:00
be72be7c0d Merge #3942
3942: Normalize for the search the facets values r=ManyTheFish a=Kerollmops

This PR improves and fixes the search for facet values feature. Searching for _bre_ wasn't returning facet values like _brévent_ or _brô_.

The issue was related to the fact that facets are normalized but not in the same way as the `searchableAttributes` are. We decided to normalize them further and add another intermediate database where the key is the normalized facet value, and the value is a set of the non-normalized facets. We then use these non-normalized ones to get the correct counts by fetching the associated databases.

### What's missing in this PR?
 - [x] Apply the change to the whole set of `SearchForFacetValue::execute` conditions.
 - [x] Factorize the code that does an intermediate normalized value fetch in a function.
 - [x] Add or modify the search for facet value test.

Co-authored-by: Clément Renault <clement@meilisearch.com>
Co-authored-by: Kerollmops <clement@meilisearch.com>
2023-07-25 14:37:17 +00:00
88559a2d54 Fix score details casing 2023-07-25 15:49:33 +02:00
59201a7852 Use snapshot instead of asserts
Co-authored-by: Many the fish <many@meilisearch.com>
2023-07-25 15:34:05 +02:00
9e3e69373e Merge #3948
3948: Fix hnsw internal panic by using another library r=ManyTheFish a=Kerollmops

This pull request fixes #3923. The issue concerns the `hnsw` crate panicking due to a wrong call to the `[T]::copy_from_slice` function.

I decided to switch the library to `instant-distance`, which is maintained [by someone of trust](https://lib.rs/~djc), who maintains a lot of very important crates.

- [x] Make Clippy happy with the first commit.
- [x] Reproduce the #3923 bug without this patch
- [x] Check if the bug disappeared with this PR.
- [x] Test with [the Algolia e-commerce dataset](https://www.notion.so/meilisearch/Algolia-Ecommerce-c5fa3b5f23a7485295df7e87306d5859).

Co-authored-by: Kerollmops <clement@meilisearch.com>
2023-07-25 13:28:25 +00:00
d57026cd96 Support synonyms sinergies 2023-07-25 15:01:42 +02:00
29ab54b259 Replace the hnsw crate by the instant-distance one 2023-07-25 12:37:35 +02:00
41c9e8856a Fix test 2023-07-25 10:55:37 +02:00
86d8bb3a3e Make clippy happy (again) 2023-07-25 10:30:50 +02:00
d4ff59fcf5 Fix clippy 2023-07-24 18:42:26 +02:00
9c485f8563 Make the search and the indexing work 2023-07-24 18:35:20 +02:00
0e2a5951b4 Add more advanced tests 2023-07-24 18:04:58 +02:00
691a536893 Implement the facet search with the normalized index 2023-07-24 17:56:17 +02:00
d8d12d5979 Be able to set and reset settings 2023-07-24 17:00:18 +02:00
df528b41d8 Normalize for the search the facets values 2023-07-20 17:57:07 +02:00
2452ec55b4 Merge #3940
3940: Update mini dashboard v0.2.9 r=gillian-meilisearch a=bidoubiwa

# Pull Request


## What does this PR do?
- Updates the mini-dashboard to version [0.2.9](https://github.com/meilisearch/mini-dashboard/releases/tag/v0.2.9)

## PR checklist
- [x] Have you read the contributing guidelines?
- [x] Have you made sure that the title is accurate and descriptive of the changes?

Thank you so much for contributing to Meilisearch!


Co-authored-by: Charlotte Vermandel <charlottevermandel@gmail.com>
2023-07-20 15:08:59 +00:00
54ae1b5a67 Update mini-dashboard to version 0.2.9 2023-07-20 14:11:17 +02:00
0597a97c84 Update tests 2023-07-20 11:15:10 +02:00
3070a20580 Merge #3937
3937: Update Charabia to the last version r=Kerollmops a=ManyTheFish

# Pull Request

## Related issue
Fixes #3924

## What does this PR do?
- Update Charabia


Co-authored-by: ManyTheFish <many@meilisearch.com>
2023-07-19 14:57:38 +00:00
0497f93494 Update Charabia to the last version 2023-07-19 15:19:32 +02:00
2dfbb6813a Merge #3913
3913: Expose a Puffin server to profile the indexing process r=Kerollmops a=Kerollmops

This PR exposes a puffin HTTP server to expose the internal timing it takes to index documents, delete documents, or update the settings of an index.

<img width="1752" alt="Capture d’écran 2023-07-10 à 18 44 58" src="https://github.com/meilisearch/meilisearch/assets/3610253/a3c7a6bf-db5b-42f4-8be1-c4e31c869843">

## To be done

 - [x] Move the puffin HTTP server under a feature flag.
 - [x] Use [the `puffin::set_scopes_on` function](https://docs.rs/puffin/latest/puffin/fn.set_scopes_on.html) to toggle it (by using the feature directly).
     When this function is called with `false`, [a call to `profile_scope!` talked 1-2ns](https://docs.rs/puffin/latest/puffin/fn.set_scopes_on.html).
 - [x] Create a _PROFILING.md_ file explaining how to use it.
   - [x] Explain that merging scopes on the interface is not always useful.
 - [x] Add more info on the number of batched tasks (using the `puffin::profile_scope!` macro data).
   - I added more info, but that's more continuous work when we consider we need more info here and there.
 - [x] Clean up some scopes, and don't touch too much code to inject puffin.
   - I am not sure that the _index_documents/mod.rs_ function is that complex with the addition of the scope.
 - [x] Think about what we consider frames. One indexation operation or the wall program. When must we stop the frame, then?
   - What we consider a frame is one single `IndexScheduler::tick` execution.
   - We can change that later.

Co-authored-by: Kerollmops <clement@meilisearch.com>
Co-authored-by: Clément Renault <clement@meilisearch.com>
2023-07-19 09:44:01 +00:00
8f589a5cce Introduce a PROFILING.md tutorial to profile Meilisearch 2023-07-18 17:38:13 +02:00
0b8bbd8750 Toggle the puffin profiling with a feature flag 2023-07-18 17:38:13 +02:00
eef95de30e First iteration on exposing puffin profiling 2023-07-18 17:38:13 +02:00
13a13a4862 Merge #3932
3932: Add UTM tracking to README r=gillian-meilisearch a=Strift

# Pull Request

Hi `@macraig` `@curquiza` 👋 

## Related issue

N/A

## What does this PR do?

This PR adds UTM tracking to the links in the README.

It add UTM params to:
- links in the nav
- links to where2watch
- links in the Features section
- Docs & Getting started links (cc `@guimachiavelli)`
- links in the SDKs section
- links in the Advanced usage section
- links in the Telemetry section
- links in the Get in touch section

Additionally, this PR adds a link to the Meilisearch logo (there is currently none.)

## On the UTM pattern

All links in this PR use the new convention `@gmourier` and I agreed on: 
- utm_campaign=oss
- utm_source=github
- utm_medium=meilisearch
- utm_content= where the link is in the page

It's worth considering updating the tracking link for the Cloud, which is the only one that doesn’t follow the new convention. It is currently using `utm_campaign=oss&utm_source=engine&utm_medium=meilisearch`.

Merging analytics from different UTMs is doable on Amplitude, but can't be done in Fathom. Plus, having two different conventions creates knowledge overhead, and is bound to result in corrupt analytics at some point. I suggest we change the Cloud UTM trackers too — the sooner we eat the frog, the better imo. 

## PR checklist

Please check if your PR fulfills the following requirements:
- [x] Does this PR fix an existing issue, or have you listed the changes applied in the PR description (and why they are needed)?
- [x] Have you read the contributing guidelines?
- [x] Have you made sure that the title is accurate and descriptive of the changes?

Thank you so much for contributing to Meilisearch!


Co-authored-by: Strift <strift@Strifts-MacBook-Pro.local>
Co-authored-by: Strift <laurent@meilisearch.com>
2023-07-18 13:42:50 +00:00
d5ab750627 Merge #3935
3935: Update mini-dashboard to version 0.2.8 r=Kerollmops a=bidoubiwa

# Pull Request


## What does this PR do?
- Updates the mini-dashboard to version [0.2.8](https://github.com/meilisearch/mini-dashboard/releases/tag/v0.2.8)

## PR checklist
- [x] Have you read the contributing guidelines?
- [x] Have you made sure that the title is accurate and descriptive of the changes?

Thank you so much for contributing to Meilisearch!


Co-authored-by: Charlotte Vermandel <charlottevermandel@gmail.com>
2023-07-18 12:59:29 +00:00
2afd10f96d Update mini-dashboard to version 0.2.8 2023-07-18 14:49:36 +02:00
e691c92ed5 Replace UTM link on Cloud 2023-07-18 14:48:00 +02:00
2d2619bd90 Merge #3933
3933: Stop computing the update files size r=ManyTheFish a=Kerollmops

This PR, related #3934, removes the part which computes the total size of the `data.ms/update_files` folder, which can take a lot of time when many updates must be processed.

It is not breaking API-side but is breaking on the result we will show to the user. The `databaseSize` field returned by the `/stats` endpoint will be reduced.

Co-authored-by: Kerollmops <clement@meilisearch.com>
2023-07-18 12:02:08 +00:00
516d2df862 Stop computing the update files size 2023-07-18 11:51:30 +02:00
c76b488ab1 Merge #3929
3929: Fix a panic when sorting geo fields represented by strings r=Kerollmops a=Kerollmops

This issue fixes #3927 by retrieving and parsing the original string values into f64s. I also added a test to ensure we don't break it in a future version.

Co-authored-by: Kerollmops <clement@meilisearch.com>
2023-07-18 09:13:22 +00:00
d383afc82b Fix the geo sort when lat and lng are strings 2023-07-17 18:28:04 +02:00
f9d94c5845 Test geo sort with string lat/lng 2023-07-17 18:28:03 +02:00
928ab2f9b1 Add UTM params to contact section links 2023-07-14 18:24:03 +02:00
7c18a9375f Add UTM params to telemetry section links 2023-07-14 18:19:46 +02:00
05a311f9be Add UTM params to Advanced usage links 2023-07-14 18:17:51 +02:00
9b1b9b409e Add UTM params to SDKs logos link 2023-07-14 18:17:28 +02:00
7f555f23e8 Add UTM params to SDKs section links 2023-07-14 18:15:17 +02:00
a0bfc9f63a Add UTM params to docs & getting started links 2023-07-14 18:02:21 +02:00
3155264381 Add UTM params to features links 2023-07-14 17:51:25 +02:00
42400c381e Add UTM on demo link 2023-07-14 17:43:05 +02:00
08c7dab528 Add UTM on demo gif 2023-07-14 17:40:37 +02:00
8590687515 Add UTM params to nav links 2023-07-14 17:34:45 +02:00
8f5d127b1e Add links on Meilisearch logo 2023-07-14 17:26:06 +02:00
7745cc9d3c Merge #3921
3921: Deactivate camel case segmentation r=dureuill a=ManyTheFish

# Pull Request
This PR deactivates the camel case segmentation to retrieve the possibility to accept typos over camel-cased words

## Related issue
Fixes #3869
Fixes #3818

## What does this PR do?
- deactivates camelcase segmentation

related to #3919



Co-authored-by: ManyTheFish <many@meilisearch.com>
2023-07-13 11:00:14 +00:00
657f24ec5f Merge #3907
3907: Add telemetry for define field to search on at query time r=dureuill a=ManyTheFish

Add "attributes_to_search_on" telemetry usage counter:
```json
"attributes_to_search_on": {
   "total_number_of_use": 12,
},
```

This measures the number of search queries that the user uses `attributesToSearchOn` field.

related to https://github.com/meilisearch/specifications/pull/251

## reviewers:

- `@macraig` for validating the telemetry's name
- `@dureuill` for validating the code

Co-authored-by: ManyTheFish <many@meilisearch.com>
2023-07-13 10:14:00 +00:00
c106906f8f deactivate camelCase segmentation 2023-07-13 12:06:27 +02:00
9c0691156f Add tests 2023-07-13 11:53:13 +02:00
359b90288d Use saturating add 2023-07-13 11:38:28 +02:00
13e3f8faae Fix typo 2023-07-13 11:34:50 +02:00
fd7c66fd62 Merge #3915
3915: `attributesToSearchOn` supports wildcards r=ManyTheFish a=dureuill

# Pull Request

## Related issue

Fixes #3912  and #3911 

## What does this PR do?
- Adding `*` in the list of `attributesToSearchOn` allows searching on all the `searchableAttributes`.
- If `searchableAttributes contains "*"`, then any attribute is accepted in the `attributesToSearchOn` list.


Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2023-07-13 09:33:10 +00:00
183f23f40d More relevant test
Co-authored-by: Many the fish <many@meilisearch.com>
2023-07-12 16:06:15 +02:00
2b4160ebb9 Merge #3918
3918: Update and fix the Test Suite CI r=dureuill a=Kerollmops

This Pull Request renames the _Run test with Rust_ into _Setup test with Rust_ for more clarity and `cargo update -p proc-macro2` to make the project compile with the latest Rust Nightly.

Co-authored-by: Kerollmops <clement@meilisearch.com>
2023-07-12 13:18:25 +00:00
8ba1c8f88f Update proc-macro2 to compile with the latest nightly 2023-07-12 11:47:27 +02:00
16c8437b28 Update tests 2023-07-12 11:21:19 +02:00
8e7edf8ea7 Rename the jobs in the CI for clarity 2023-07-12 11:16:01 +02:00
4310928803 Fixes #3912 2023-07-12 10:08:56 +02:00
74315b4ea8 Fixes #3911 2023-07-12 10:08:29 +02:00
177e6e27f9 Merge #3901
3901: Fix experimental analytics r=curquiza a=dureuill

# Pull Request

## Related issue
Fixes https://github.com/meilisearch/specifications/pull/250#discussion_r1253191583

## What does this PR do?
- `snake_case` instead of `camelCase` for feature fields


Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2023-07-10 16:22:59 +00:00
50afe724ae Merge #3909
3909: Effectively send the `vector.max_vector_size` telemetry r=curquiza a=Kerollmops

This PR effectively aggregates and sends the `vector.max_vector_size` analytics value.

Co-authored-by: Kerollmops <clement@meilisearch.com>
2023-07-10 15:44:30 +00:00
012c960fad Send the vector.max_vector_size telemetry 2023-07-10 16:50:37 +02:00
76f6d3357e Merge #3908
3908: Allow a comma-separated value to the `vector` argument in GET search r=Kerollmops a=dureuill

# Pull Request

For request:

```
 curl \
  -X GET 'http://localhost:7700/indexes/movies/search?vector=0.123,1.124,244'
```

Before PR: 

```
{"message":"Invalid value type for parameter `vector`: expected a string, but found a string: `0,1,2`","code":"invalid_search_vector","type":"invalid_request","link":"https://docs.meilisearch.com/errors#invalid_search_vector"}%
```

After PR:

```
{"hits":[],"query":"","vector":[0.123,1.124,244.0],"processingTimeMs":0,"limit":20,"offset":0,"estimatedTotalHits":1000}%
```

cc `@gmourier` `@bidoubiwa` 


Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2023-07-10 14:25:44 +00:00
d59e969c16 Allow a comma-separated value to the vector argument in GET search 2023-07-10 16:16:34 +02:00
eb7a1aa7af Merge #3904
3904: Sort by lexicographic order after normalization r=dureuill a=dureuill

# Pull Request

## Related issue
Fixes https://github.com/meilisearch/meilisearch/issues/3893

## What does this PR do?
- Re-sort stop words after normalization so they're not sent out-of-order to the FST


Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2023-07-10 12:12:05 +00:00
9daccdf7f0 Merge #3895
3895: Update README.md r=curquiza a=ferdi05

Adding the free-trial option

# Pull Request

## Related issue
Fixes #<issue_number>

## What does this PR do?
- ...

## PR checklist
Please check if your PR fulfills the following requirements:
- [ ] Does this PR fix an existing issue, or have you listed the changes applied in the PR description (and why they are needed)?
- [ ] Have you read the contributing guidelines?
- [ ] Have you made sure that the title is accurate and descriptive of the changes?

Thank you so much for contributing to Meilisearch!


Co-authored-by: Ferdinand Boas <ferdinand.boas@gmail.com>
2023-07-10 11:26:47 +00:00
c30a14cb97 Add telemetry 2023-07-10 13:12:12 +02:00
a3ca8412ce Merge #3906
3906: Add "scoring.*" analytics to multi search route r=Kerollmops a=dureuill

# Pull Request

## Related issue
Fixes https://github.com/meilisearch/specifications/pull/252#discussion_r1254375746 by implementing (3): multi search now returns the "score.show_ranking_rule" and "score.show_ranking_rule_details" analytics.


Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2023-07-10 09:51:30 +00:00
106f98aa72 Add "scoring.*" analytics to multi search route 2023-07-10 11:45:43 +02:00
40fa59d64c Sort by lexicographic order after normalization 2023-07-10 09:26:59 +02:00
bb40ce6e35 Experimental features analytics match the spec 2023-07-10 08:57:53 +02:00
0c8dbf6fa6 Merge #3897
3897: Add automated tests for `/experimental-features` route r=Kerollmops a=dureuill

# Pull Request

## What does this PR do?
- Make `RuntimeTogglableFeatures` `Eq`
- Add various tests for the `/experimental-features` route
  - Integration tests for the route itself
  - Integration tests for the effect of enabling `scoreDetails` and `vectorStore` through this route.
  - Dump integration tests


Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2023-07-06 13:37:56 +00:00
dd6519b64f Dump tests 2023-07-06 14:22:29 +02:00
da02a9cf32 Make RuntimeTogglableFeatures Eq 2023-07-06 14:20:58 +02:00
ff192bb480 Merge #3889
3889: Display the total number of tasks matching a filter/query r=dureuill a=Kerollmops

This PR returns a new field on the `/tasks` routes. The `total` field exposes the total number of tasks that matches the given filter/query. It is useful to display information on a user interface and can help understand when progress is made in processing tasks, i.e., the total number of tasks on `/tasks?statuses=succeeded` will increase over time.

Fixes #3888.

- [ ] Update the specs fo the `/tasks` route.

## How have I implemented it?

I found it much easier to run two times the task filtering system. Once with the original `from` and `limit` parameters and a second time without. The second call will return the total number of tasks that match the query, not only the number of tasks on the current page.

So far, in terms of performance, there doesn't seem to be any issue. I tried different filters with something like 250k tasks. Note that there is a limit of 1M tasks in the queue.

Co-authored-by: Clément Renault <clement@meilisearch.com>
2023-07-06 10:23:09 +00:00
437ee55c57 Update README.md
Co-authored-by: Guillaume Mourier <guillaume@meilisearch.com>
2023-07-06 12:15:52 +02:00
22762808ab Fix the tests 2023-07-06 12:13:29 +02:00
b1717865ea Update README.md
Adding the free-trial option
2023-07-06 11:52:35 +02:00
86b834c9e4 Display the total number of tasks in the tasks route 2023-07-06 10:05:18 +02:00
2d3cec11a7 Search integration test to check score details and vector store 2023-07-06 09:02:02 +02:00
76e1ee9988 integration test on "/experimental-features" route 2023-07-06 09:01:28 +02:00
222615d3df Allow to get/set features in integration test server 2023-07-06 09:01:05 +02:00
11d024c613 Authentication tests 2023-07-06 09:00:51 +02:00
886c8bb647 Merge #3891
3891: Fix the way we compute the 99th percentile r=dureuill a=Kerollmops

This PR fixes how we compute the 99th percentile by avoiding using float and doing the multiplication and divisions in the correct order avoiding going out of the buffer of timings. You can see the issue on [this rust playground](https://play.rust-lang.org/?version=stable&mode=debug&edition=2021).

When there are a very small number of successful requests, the number is so tiny that the 99th percentile calculus sometimes gives an index out of the buffer. In this example, the `1`/`1.0` represent the number of timings you collected (one). As you can see, the float computation gives us the index `1.0`, with is out of a vector of only one value. This makes the engine generate a `null` value.

```rust
1 * 99 / 100 = 0 // with integers
0.99_f64 * (1.0 - 1.0) + 1.0 = 1.0 // with floats
```

Co-authored-by: Clément Renault <clement@meilisearch.com>
2023-07-06 06:04:08 +00:00
b422e5fdc3 Merge #3890
3890: Fix the analytics of the sort facet values by count feature r=dureuill a=Kerollmops

This PR ensures we return the right analytics from the settings route.

Co-authored-by: Clément Renault <clement@meilisearch.com>
2023-07-06 05:24:40 +00:00
d727ebee05 Fix the way we compute the 99th percentile 2023-07-05 17:53:09 +02:00
da39a7b29e Return the right analytics 2023-07-05 17:27:51 +02:00
377fe33aac Merge #3885
3885: Exactness missing field r=dureuill a=dureuill

# Pull Request

Adds fields to score details that were [specified](c25d758264/text/0195-ranking-score.md (322-ranking-rule-specific-fields)), but missing in the implementation:

- `exactness.matchingWords`
- `exactness.maxMatchingWords` 


Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2023-07-04 15:14:53 +00:00
55cd7738b9 Update snapshots 2023-07-04 16:31:01 +02:00
48409c9183 Add missing exactness.matchingWords, exactness.maxMatchingWords 2023-07-04 16:31:01 +02:00
176f716292 Merge #3871
3871: Bump Swatinem/rust-cache from 2.4.0 to 2.5.0 r=curquiza a=dependabot[bot]

Bumps [Swatinem/rust-cache](https://github.com/swatinem/rust-cache) from 2.4.0 to 2.5.0.
<details>
<summary>Release notes</summary>
<p><em>Sourced from <a href="https://github.com/swatinem/rust-cache/releases">Swatinem/rust-cache's releases</a>.</em></p>
<blockquote>
<h2>v2.5.0</h2>
<h2>What's Changed</h2>
<ul>
<li>feat: Rm workspace crates version before caching by <a href="https://github.com/NobodyXu"><code>`@​NobodyXu</code></a>` in <a href="https://redirect.github.com/Swatinem/rust-cache/pull/147">Swatinem/rust-cache#147</a></li>
<li>feat: Add hash of <code>.cargo/config.toml</code> to key by <a href="https://github.com/NobodyXu"><code>`@​NobodyXu</code></a>` in <a href="https://redirect.github.com/Swatinem/rust-cache/pull/149">Swatinem/rust-cache#149</a></li>
</ul>
<h2>New Contributors</h2>
<ul>
<li><a href="https://github.com/NobodyXu"><code>`@​NobodyXu</code></a>` made their first contribution in <a href="https://redirect.github.com/Swatinem/rust-cache/pull/147">Swatinem/rust-cache#147</a></li>
</ul>
<p><strong>Full Changelog</strong>: <a href="https://github.com/Swatinem/rust-cache/compare/v2.4.0...v2.5.0">https://github.com/Swatinem/rust-cache/compare/v2.4.0...v2.5.0</a></p>
</blockquote>
</details>
<details>
<summary>Changelog</summary>
<p><em>Sourced from <a href="https://github.com/Swatinem/rust-cache/blob/master/CHANGELOG.md">Swatinem/rust-cache's changelog</a>.</em></p>
<blockquote>
<h1>Changelog</h1>
</blockquote>
</details>
<details>
<summary>Commits</summary>
<ul>
<li><a href="2656b87321"><code>2656b87</code></a> 2.5.0</li>
<li><a href="715970feed"><code>715970f</code></a> feat: Add hash of <code>.cargo/config.toml</code> to key (<a href="https://redirect.github.com/swatinem/rust-cache/issues/149">#149</a>)</li>
<li><a href="3d4000164d"><code>3d40001</code></a> feat: Rm workspace crates version before caching (<a href="https://redirect.github.com/swatinem/rust-cache/issues/147">#147</a>)</li>
<li>See full diff in <a href="https://github.com/swatinem/rust-cache/compare/v2.4.0...v2.5.0">compare view</a></li>
</ul>
</details>
<br />


[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=Swatinem/rust-cache&package-manager=github_actions&previous-version=2.4.0&new-version=2.5.0)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores)

You can trigger a rebase of this PR by commenting ``@dependabot` rebase`.

[//]: # (dependabot-automerge-start)
[//]: # (dependabot-automerge-end)

---

<details>
<summary>Dependabot commands and options</summary>
<br />

You can trigger Dependabot actions by commenting on this PR:
- ``@dependabot` rebase` will rebase this PR
- ``@dependabot` recreate` will recreate this PR, overwriting any edits that have been made to it
- ``@dependabot` merge` will merge this PR after your CI passes on it
- ``@dependabot` squash and merge` will squash and merge this PR after your CI passes on it
- ``@dependabot` cancel merge` will cancel a previously requested merge and block automerging
- ``@dependabot` reopen` will reopen this PR if it is closed
- ``@dependabot` close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually
- ``@dependabot` ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself)
- ``@dependabot` ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself)
- ``@dependabot` ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)


</details>

Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2023-07-04 12:57:38 +00:00
82650eaae1 Merge #3877
3877: update the total_received properties of multiple events r=dureuill a=dureuill

# Pull Request

## Related issue
Fixes #3814 

## What does this PR do?
-fix name of `total_received` for several events


Co-authored-by: Tamo <tamo@meilisearch.com>
2023-07-03 19:49:53 +00:00
b8ca09c13f Merge #3878
3878: Remove unsafe `atty` dependency r=dureuill a=Kerollmops

This PR replaces the `atty` dependency with the `is-terminal` one. We do that to fix GHSA-g98v-hv3f-hcfr.

Co-authored-by: Kerollmops <clement@meilisearch.com>
2023-07-03 19:07:03 +00:00
a442af6a7c Update the features of the either dependency to compile milli successfully 2023-07-03 18:51:43 +02:00
e7f8daaf86 Update criterion to 0.5.1 to remove the atty dependency 2023-07-03 18:51:42 +02:00
d1ff631df8 Replace the atty dependency with the is-terminal one 2023-07-03 18:51:42 +02:00
202183adf8 update the total_received properties of multiple events 2023-07-03 15:57:09 +02:00
aae099e330 Merge #3851
3851: Expose lastUpdate and isIndexing in /stats endpoint r=dureuill a=gentcys

# Pull Request

## Related issue
Fixes #3843

## What does this PR do?
- expose lastUpdate in `/stats` endpoint
- expose isIndex in `stats` endpoint
- add a method `is_task_processing` in index-scheduler/src/lib.rs.

## PR checklist
Please check if your PR fulfills the following requirements:
- [x] Does this PR fix an existing issue, or have you listed the changes applied in the PR description (and why they are needed)?
- [x] Have you read the contributing guidelines?
- [x] Have you made sure that the title is accurate and descriptive of the changes?

Thank you so much for contributing to Meilisearch!


Co-authored-by: Cong Chen <cong.chen@ocrlabs.com>
Co-authored-by: ManyTheFish <many@meilisearch.com>
Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2023-07-03 13:41:04 +00:00
5387cf1718 Don't unwrap in case of error/missing last_update field 2023-07-03 15:32:11 +02:00
a0df4becf4 Merge #3867
3867: Add a new link to the cloud pricing page r=curquiza a=Kerollmops

This PR promotes the Cloud by adding a link to the Pricing page to the startup message!

<img width="1002" alt="Capture d’écran 2023-06-29 à 17 40 22" src="https://github.com/meilisearch/meilisearch/assets/3610253/b0528c24-fcc2-43ff-a6a1-3ed91716663b">

Co-authored-by: Clément Renault <clement@meilisearch.com>
2023-07-03 11:25:26 +00:00
e0a2f88fb0 Merge #3874
3874: Update version for the next release (v1.3.0) in Cargo.toml r=curquiza a=meili-bot

⚠️ This PR is automatically generated. Check the new version is the expected one and Cargo.lock has been updated before merging.

Co-authored-by: gillian-meilisearch <gillian-meilisearch@users.noreply.github.com>
2023-07-03 10:37:03 +00:00
e871906370 Merge #3876
3876: Fix invalid attributeToSearchOn error code r=Kerollmops a=ManyTheFish

Fix the invalid attributeToSearchOn error code to be consistent with the other search parameters' error codes:

error code `invalid_attributes_to_search_on` becomes `invalid_search_attributes_to_search_on`:
```diff
- invalid_attributes_to_search_on
+ invalid_search_attributes_to_search_on
```

related to #3772


Co-authored-by: ManyTheFish <many@meilisearch.com>
2023-07-03 10:06:30 +00:00
7a80c0dfb3 Fix invalid attributeToSearchOn error code to be consistent with the others search parameters error codes 2023-07-03 11:52:43 +02:00
71500a4e15 Update tests 2023-07-03 11:20:43 +02:00
a9f691f279 Merge #3873
3873: Format let-else ❤️ 🎉 r=Kerollmops a=dureuill

# Pull Request

Allows passing CI after landing of 6162f6f123

## What does this PR do?
- `cargo +nightly fmt`

Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2023-07-03 09:01:20 +00:00
1d40452057 Update version for the next release (v1.3.0) in Cargo.toml 2023-07-03 08:32:21 +00:00
324d448236 Format let-else ❤️ 🎉 2023-07-03 10:20:28 +02:00
40ad19ba9e Bump Swatinem/rust-cache from 2.4.0 to 2.5.0
Bumps [Swatinem/rust-cache](https://github.com/swatinem/rust-cache) from 2.4.0 to 2.5.0.
- [Release notes](https://github.com/swatinem/rust-cache/releases)
- [Changelog](https://github.com/Swatinem/rust-cache/blob/master/CHANGELOG.md)
- [Commits](https://github.com/swatinem/rust-cache/compare/v2.4.0...v2.5.0)

---
updated-dependencies:
- dependency-name: Swatinem/rust-cache
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>
2023-07-01 17:46:11 +00:00
9859e65d2f fix tests 2023-07-01 09:32:50 +08:00
3bdf01bc1c Fix failed test 2023-06-30 17:39:23 +08:00
a5a31667b0 fix converse result of is_task_processing() 2023-06-30 11:28:18 +08:00
cab4c4d7c9 Add a UTMs to the Cloud link 2023-06-29 17:59:59 +02:00
4ec08e9430 Add a new link to the cloud pricing page 2023-06-29 17:38:10 +02:00
661d1f90dc Merge #3866
3866: Update charabia v0.8.0 r=dureuill a=ManyTheFish

# Pull Request

Update Charabia:
- enhance Japanese segmentation
- enhance Latin Tokenization
  - words containing `_` are now properly segmented into several words
  - brackets `{([])}` are no more considered as context separators so word separated by brackets are now considered near together for the proximity ranking rule
- fixes #3815
- fixes #3778
- fixes [product#151](https://github.com/meilisearch/product/discussions/151)

> Important note: now the float numbers are segmented around the `.` so `3.22` is segmented as [`3`, `.`, `22`] but the middle dot isn't considered as a hard separator, which means that if we search `3.22` we find documents containing `3.22`

Co-authored-by: ManyTheFish <many@meilisearch.com>
2023-06-29 15:24:36 +00:00
6ec7541026 Update inta snapshots 2023-06-29 17:18:39 +02:00
e8dee3ca65 Update lock file 2023-06-29 17:02:24 +02:00
a82c49ab08 Update test 2023-06-29 15:56:36 +02:00
84845de9ef Update Charabia 2023-06-29 15:56:32 +02:00
c9b3f80947 Merge #3780
3780: Be able to sort facet values by alpha or count r=dureuill a=Kerollmops

This PR introduces a new `sortFacetValuesBy` settings parameter to expose the facet distribution in either count or lexicographic/alpha order.

## Mini Spec of the `sortFacetValuesBy` Settings Parameter

This parameter can be set in the settings to change how the engine returns the facet values. There are two possible values to this parameter.

Please note that the current behavior changed a bit, and keys are returned in lexicographic order instead of undefined order. The previous order wasn't defined as we were using a `HashMap`, which returns entries in hash order (undefined), and we are now using an `IndexMap`, which returns them in insertion order (the order we actually want).

Also, note that there are performance issues when the dataset is enormous. Here are the timings of the engine running on my Macbook Pro M1 (16Go of RAM). [The dataset is 40 million songs file](https://www.notion.so/meilisearch/Songs-from-MusicBrainz-686e31b2bd3845898c7746f502a6e117), and the database size is about 50GiB. Even if you think 800ms is not that high, don't forget that the API is public, and anybody can ask for multiple facets in a single query.

| Search Kind | Get Facets | Max Values per Facet | Time for Alpha | Time for Count | Count but with #3788 |
|------------:|------------|----------------------|:--------------:|----------------|----------------------|
| Placeholder | genres     | default (100)        | 7ms            | 187ms          | 122ms                |
| Placeholder | genres     | 20                   | 6ms            | 124ms          | 75ms                 |
| Placeholder | album      | default (100)        | 9ms            | 808ms          | 677ms                |
| Placeholder | album      | 20                   | 8ms            | 579ms          | 446ms                |
| Placeholder | artist     | default (100)        | 9ms            | 462ms          | 344ms                |
| Placeholder | artist     | 20                   | 9ms            | 341ms          | 246ms                |

### Order Values in Alphanumeric Order

This is the default one. Values will be returned by lexicographic order, ascending from A to Z.

```bash
# First, update the settings
curl 'localhost:7700/indexes/movies/settings/facetting' \
  -H "Content-Type: application/json"  \
  -d '{ "sortFacetValuesBy": { "*": "alpha" } }'

# Then, ask for the facet distribution
curl 'localhost:7700/indexes/movies/search?facets=genres'
```

```json5
{
    "hits": [
        /* list of results */
    ],
    "query": "",
    "processingTimeMs": 0,
    "limit": 20,
    "offset": 0,
    "estimatedTotalHits": 1000,
    "facetDistribution": {
        "genres": {
            "Action": 3215,
            "Adventure": 1972,
            "Animation": 1577,
            "Comedy": 5883,
            "Crime": 1808,
            // ...
        }
    },
    "facetStats": {}
}
```

### Order Values in Count Order

Facet values are sorted by decreasing count. The count is the number of records containing this facet value in the query results.

```bash
# First, update the settings
curl 'localhost:7700/indexes/movies/settings/facetting' \
  -H "Content-Type: application/json"  \
  -d '{ "sortFacetValuesBy": { "*": "count" } }'

# Then, ask for the facet distribution
curl 'localhost:7700/indexes/movies/search?facets=genres'
```

```json5
{
    "hits": [
        /* list of results */
    ],
    "query": "",
    "processingTimeMs": 0,
    "limit": 20,
    "offset": 0,
    "estimatedTotalHits": 1000,
    "facetDistribution": {
        "genres": {
            "Drama": 7337,
            "Comedy": 5883,
            "Action": 3215,
            "Thriller": 3189,
            "Romance": 2507,
            // ...
        }
    },
    "facetStats": {}
}
```

## Todo List
 - [x] Add tests
 - [x] Send analytics when a user change the `sortFacetValuesBy`
 - [x] Create a prototype and announce it in https://github.com/meilisearch/product/discussions/519.

Co-authored-by: Kerollmops <clement@meilisearch.com>
Co-authored-by: Clément Renault <clement@meilisearch.com>
2023-06-29 12:43:25 +00:00
09c5edf242 Cargo fmt 2023-06-29 14:37:18 +02:00
4e85f91aee Add a non default value to the faceting settings of the dump tests 2023-06-29 14:33:33 +02:00
7c157fc442 Document that the LevelEntry fields order is important 2023-06-29 14:33:32 +02:00
0b97596c93 Replace unwraps with ? 2023-06-29 14:33:32 +02:00
a0e0fce677 Simplify a Rust lifetime trick 2023-06-29 14:33:32 +02:00
3c295c1ffc Fix typos 2023-06-29 14:33:32 +02:00
b951830461 Add more tests 2023-06-29 14:33:32 +02:00
9a13b72f25 Fix the tests 2023-06-29 14:33:32 +02:00
1d8dfafd25 Add analytics when all facets are sorted by count and the number of modified ones 2023-06-29 14:33:31 +02:00
eed9176e0c Also reset the sortFacetValuesBy when reseting the faceting settings 2023-06-29 14:33:31 +02:00
b132e859f7 Make clippy happy 2023-06-29 14:33:31 +02:00
9917bf046a Move the sortFacetValuesBy in the faceting settings 2023-06-29 14:33:31 +02:00
d9fea0143f Make Clippy happy 2023-06-29 14:33:31 +02:00
a385642ec3 Replace the BTreeMap by an IndexMap to return values in order 2023-06-29 14:33:31 +02:00
34b2e98fe9 Expose a sortFacetValuesBy parameter to the user 2023-06-29 14:33:00 +02:00
80bbd4b6f3 Clean and make the facet order configurable internally 2023-06-29 14:31:17 +02:00
f42bef2f66 Make the search to always return the facets ordered by count 2023-06-29 14:31:17 +02:00
bd3c026406 First to-test version of the algorithm 2023-06-29 14:31:17 +02:00
84f8938f33 Rename facet distribution to be explicit on the order to find them 2023-06-29 14:31:15 +02:00
34a07110de Merge #3864
3864: Remove `/experimental-features` verbs that weren't in the PRD r=dureuill a=dureuill

Removes:

- POST `/experimental-features`
- DELETE `/experimental-features`

keeping only:

- PATCH `/experimental-features`
- GET `/experimental-features`

The two routes that are described in the PRD.

Following `@guimachiavelli's` [question](https://github.com/meilisearch/documentation/issues/2482#issuecomment-1611845372) about the POST route.

Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2023-06-29 09:43:14 +00:00
73bb080a26 Merge #3699
3699: Search for Facet Values r=Kerollmops a=Kerollmops

This PR introduces the first version of [the _Search for Facet Values_ feature](https://github.com/meilisearch/product/discussions/515) that allows a user to search for facets, by optionally using a prefix string and optionally specifying the `q` and `filter` original search parameters to restrict the candidates to search in.

The steps to merge it into Meilisearch will first start by providing prototype Docker images. This way users will be able to test the prototypes before using them.

The current route to use the _Search for Facet Values_ feature is the `POST /indexes/{index}/facet-search` where the body is a JSON object that looks like the following:
```json5
{
  "q": "spiderman", // optional
  "filter": "rating > 10", // optional
  "facetName": "genres",
  "facetQuery": "a" // optional
}
```

## What is missing?

 - [x] Send some analytics.
 - [x] Support the `matchingStrategy` parameter.
 - [x] Make sure that the errors are the right ones.
 - [x] Use the [Index typo tolerance settings](https://www.meilisearch.com/docs/learn/configuration/typo_tolerance#minwordsizefortypos) when matching facet values.
    - [x] minWordSizeForTypos.oneTypo
    - [x] minWordSizeForTypos.twoTypo
 - [x] Add tests
 - [x] Log the time it took to compute the results.
 - [x] Fix the compilation warnings.
 - [x] [Create an issue to fix potential performance issues when indexing](https://github.com/meilisearch/meilisearch/issues/3862).


Co-authored-by: Clément Renault <clement@meilisearch.com>
Co-authored-by: Kerollmops <clement@meilisearch.com>
2023-06-29 09:08:55 +00:00
44b5b9e1a7 Improve the documentation of the FacetSearchQuery struct 2023-06-29 10:28:23 +02:00
68356869c0 Remove /experimental-features verbs that weren't in the PRD 2023-06-29 10:02:55 +02:00
e3fc7112bc use RoaringBitmap::is_empty instead 2023-06-29 11:46:47 +08:00
605c1dd54a Fix analytics 2023-06-28 16:41:56 +02:00
3e3f73ba1e Fix the analytics 2023-06-28 15:45:09 +02:00
efbe7ce78b Clean the facet string FSTs when we clear the documents 2023-06-28 15:36:32 +02:00
82e1f59f1e Add attributes_to_search_on 2023-06-28 15:28:24 +02:00
362e9ff845 Add more tests 2023-06-28 15:28:24 +02:00
32f2556d22 Move the additional_search_parameters_provided analytic inside facets 2023-06-28 15:06:09 +02:00
63fd10aaa5 Fix the invalid facet name field error code 2023-06-28 15:06:09 +02:00
29b40295b8 Ignore unknown facet search query parameters 2023-06-28 15:06:09 +02:00
26f0fa678d Change the error message when a facet is not searchable 2023-06-28 15:06:09 +02:00
60ddd53439 Return one of the original facet values when doing a facet search 2023-06-28 15:06:09 +02:00
2bcd8d2983 Make sure the facet queries are normalized 2023-06-28 15:06:09 +02:00
09079a4e88 Remove useless InvalidSearchFacet error 2023-06-28 15:06:09 +02:00
904f6574bf Make rustfmt happy 2023-06-28 15:06:08 +02:00
6fb8af423c Rename the hits and query output into facetHits and facetQuery respectively 2023-06-28 15:06:08 +02:00
cb0bb399fa Fix the error code returned when the facetName field is missing 2023-06-28 15:06:08 +02:00
41760a9306 Introduce a new invalid_facet_search_facet_name error code 2023-06-28 15:06:07 +02:00
e9a3029c30 Use the right field id to write the string facet values FST 2023-06-28 15:01:51 +02:00
ed0ff47551 Return an empty list of results if attribute is set as filterable 2023-06-28 15:01:51 +02:00
e1b8fb48ee Use the minWordSizeForTypos index settings 2023-06-28 15:01:51 +02:00
87e22e436a Fix compilation issues 2023-06-28 15:01:51 +02:00
0252cfe8b6 Simplify the placeholder search of the facet-search route 2023-06-28 15:01:50 +02:00
f35ad96afa Use the disableOnAttributes parameter on the facet-search route 2023-06-28 15:01:50 +02:00
2ceb781c73 Use the disableOnWords parameter on the facet-search route 2023-06-28 15:01:50 +02:00
7bd67543dd Support the typoTolerant.enabled parameter 2023-06-28 15:01:50 +02:00
8e86eb91bb Log an error when a facet value is missing from the database 2023-06-28 15:01:50 +02:00
55c17aa38b Rename the SearchForFacetValues struct 2023-06-28 15:01:50 +02:00
aadbe88048 Return an internal error when a field id is missing 2023-06-28 15:01:50 +02:00
f36de2115f Make clippy happy 2023-06-28 15:01:50 +02:00
702041b7e1 Improve the returned errors from the facet-search route 2023-06-28 15:01:48 +02:00
a05074e675 Fix the max number of facets to be returned to 100 2023-06-28 14:58:42 +02:00
93f30e65a9 Return the correct response JSON object from the facet-search route 2023-06-28 14:58:42 +02:00
893592c5e9 Send analytics about the facet-search route 2023-06-28 14:58:42 +02:00
e81809aae7 Make the search for facet work 2023-06-28 14:58:41 +02:00
ce7e7f12c8 Introduce the facet search route 2023-06-28 14:58:41 +02:00
addb21f110 Restrict the number of facet search results to 1000 2023-06-28 14:58:41 +02:00
c34de05106 Introduce the SearchForFacetValue struct 2023-06-28 14:58:41 +02:00
15a4c05379 Store the facet string values in multiple FSTs 2023-06-28 14:58:41 +02:00
9deeec88e0 Merge #3861
3861: Add "meilisearch" prefix to last metrics that were missing it r=Kerollmops a=dureuill

# Pull Request

## Related issue
Related to #3790 

## What does this PR do?
- change implementation to follow the spec on metrics name
- regenerate grafana dashboard from the code

## PR checklist
Please check if your PR fulfills the following requirements:
- [ ] Does this PR fix an existing issue, or have you listed the changes applied in the PR description (and why they are needed)?
- [ ] Have you read the contributing guidelines?
- [ ] Have you made sure that the title is accurate and descriptive of the changes?

Thank you so much for contributing to Meilisearch!


Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2023-06-28 09:28:31 +00:00
167ac55a2d Update dashboard generated from grafana 2023-06-28 11:22:16 +02:00
ea68ccd034 prefix http_* metrics by meilisearch 2023-06-28 11:21:50 +02:00
d4f10800f2 Merge #3834
3834: Define searchable fields at runtime r=Kerollmops a=ManyTheFish

## Summary
This feature allows the end-user to search in one or multiple attributes using the search parameter `attributesToSearchOn`:

```json
{
  "q": "Captain Marvel",
  "attributesToSearchOn": ["title"]
}
```

This feature act like a filter, forcing Meilisearch to only return the documents containing the requested words in the attributes-to-search-on. Note that, with the matching strategy `last`, Meilisearch will only ensure that the first word is in the attributes-to-search-on, but, the retrieved documents will be ordered taking into account the word contained in the attributes-to-search-on. 

## Trying the prototype

A dedicated docker image has been released for this feature:

#### last prototype version:

```bash
docker pull getmeili/meilisearch:prototype-define-searchable-fields-at-search-time-1
```

#### others prototype versions:

```bash
docker pull getmeili/meilisearch:prototype-define-searchable-fields-at-search-time-0
```

## Technical Detail

The attributes-to-search-on list is given to the search context, then, the search context uses the `fid_word_docids`database using only the allowed field ids instead of the global `word_docids` database. This is the same for the prefix databases.
The database cache is updated with the merged values, meaning that the union of the field-id-database values is only made if the requested key is missing from the cache.

### Relevancy limits

Almost all ranking rules behave as expected when ordering the documents.
Only `proximity` could miss-order documents if all the searched words are in the restricted attribute but a better proximity is found in an ignored attribute in a document that should be ranked lower. I put below a failing test showing it:
```rust
#[actix_rt::test]
async fn proximity_ranking_rule_order() {
    let server = Server::new().await;
    let index = index_with_documents(
        &server,
        &json!([
        {
            "title": "Captain super mega cool. A Marvel story",
            // Perfect distance between words in an ignored attribute
            "desc": "Captain Marvel",
            "id": "1",
        },
        {
            "title": "Captain America from Marvel",
            "desc": "a Shazam ersatz",
            "id": "2",
        }]),
    )
    .await;

    // Document 2 should appear before document 1.
    index
        .search(json!({"q": "Captain Marvel", "attributesToSearchOn": ["title"], "attributesToRetrieve": ["id"]}), |response, code| {
            assert_eq!(code, 200, "{}", response);
            assert_eq!(
                response["hits"],
                json!([
                    {"id": "2"},
                    {"id": "1"},
                ])
            );
        })
        .await;
}
```

Fixing this would force us to create a `fid_word_pair_proximity_docids` and a `fid_word_prefix_pair_proximity_docids` databases which may multiply the keys of `word_pair_proximity_docids` and `word_prefix_pair_proximity_docids` by the number of attributes in the searchable_attributes list. If we think we should fix this test, I'll suggest doing it in another PR.

## Related

Fixes #3772

Co-authored-by: Tamo <tamo@meilisearch.com>
Co-authored-by: ManyTheFish <many@meilisearch.com>
2023-06-28 08:19:23 +00:00
dc293911ad Merge #3745
3745: tests: add unit test for `PayloadTooLarge` error r=curquiza a=cymruu

# Pull Request
Add a unit test for the `Payload`, which verifies that a request with a payload that is too large is rejected with the appropriate message.
This was requested in this PR https://github.com/meilisearch/meilisearch/pull/3739

## Related issue
https://github.com/meilisearch/meilisearch/pull/3739

## What does this PR do?
- Adds requested test

## PR checklist
Please check if your PR fulfills the following requirements:
- [ ] Does this PR fix an existing issue, or have you listed the changes applied in the PR description (and why they are needed)?
- [ ] Have you read the contributing guidelines?
- [ ] Have you made sure that the title is accurate and descriptive of the changes?

Thank you so much for contributing to Meilisearch!


Co-authored-by: Filip Bachul <filipbachul@gmail.com>
2023-06-27 14:58:23 +00:00
9d68e6969e Merge #3859
3859: Merge all analytics events pertaining to updating the experimental features r=Kerollmops a=dureuill

Follow-up to #3850 

Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2023-06-27 13:26:01 +00:00
b4b686d253 Merge all analytics events pertaining to updating the experimental features 2023-06-27 15:16:23 +02:00
98ec476198 Merge #3855
3855: Change and add links to the Cloud r=Kerollmops a=dureuill

- add cloud link in banner
- add utm to existing links following https://github.com/meilisearch/integration-guides/issues/277#issuecomment-1592054536

Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2023-06-27 12:29:36 +00:00
c47b8a8bfe Fix typo
Co-authored-by: Guillaume Mourier <guillaume@meilisearch.com>
2023-06-27 14:27:54 +02:00
054f81a021 Make message consistent with the one in integration repos 2023-06-27 14:20:55 +02:00
d8ea688481 Merge #3825
3825: Accept semantic vectors and allow users to query nearest neighbors r=Kerollmops a=Kerollmops

This Pull Request brings a new feature to the current API. The engine accepts a new `_vectors` field akin to the `_geo` one. This vector is stored in Meilisearch and can be retrieved via search. This work is the first step toward hybrid search, bringing the best of both worlds: keyword and semantic search ❤️‍🔥

## ToDo
 - [x] Make it possible to get the `limit` nearest neighbors from a user-generated vector by using the `vector` field of search route.
 - [x] Delete the documents and vectors from the HNSW-related data structures.
     - [x] Do it the slow and ugly way (we need to be able to iterate over all the values).
     - [ ] Do it the efficient way (Wait for a new method or implement it myself).
 - [ ] ~~Move from the `hnsw` crate to the hgg one~~ The hgg crate is too slow.
   Meilisearch takes approximately 88s to answer a query. It is related to the time it takes to deserialize the `Hgg` data structure or search in it. I didn't take the time to measure precisely. We moved back to the hnsw crate which takes approximately 40ms to answer.
   - [ ] ~~Wait for a fix for https://github.com/rust-cv/hgg/issues/4.~~
 - [x] Fix the current dot product function.
 - [x] Fill in the other `SearchResult` fields.
 - [x] Remove the `hnsw` dependency of the meilisearch crate.
 - [x] Fix the pages by taking the offset into account.
 - [x] Release a first prototype https://github.com/meilisearch/product/discussions/621#discussioncomment-6183647
 - [x] Make the pagination and filtering faster and more correct.
 - [x] Return the original vector in the output search results (like `query`).
 - [x] Return an `_semanticSimilarity` field in the documents (it's a dot product)
   - [x] Return this score even if the `_vectors` field is not displayed
   - [x] Rename the field `_semanticScore`.
   - [ ] Return the `_geoDistance` value even if the `_geo` field is not displayed
 - [x] Store the HNSW on possibly multiple LMDB values.
   - [ ] Measure it and make it faster if needed
   - [ ] Export the `ReadableSlices` type into a small external crate
 - [x] Accept an `_vectors` field instead of the `_vector` one.
 - [x] Normalize all vectors.
 - [ ] Remove the `_vectors` field from the default searchable attributes (as we do with `_geo`?).
 - [ ] Correctly compute the candidates by remembering the documents having a valid `_vectors` field.
 - [ ] Return the right errors:
     - [ ] Return an error when the query vector is not the same length as the vectors in the HNSW.
     - [ ] We must return the user document id that triggered the vector dimension issue.
     - [x] If an indexation error occurs.
     - [ ] Fix the error codes when using the search route.
 - [ ] ~~Introduce some settings:~~
    We currently ensure that the vector length is consistent over the whole set of documents and return an error for when a vector dimension doesn't follow the current number of dimensions.
     - [ ] The length of the vector the user will provide.
     - [ ] The distance function (we only support dot as of now).
 - [ ] Introduce other distance functions
    - [ ] Euclidean
    - [ ] Dot Product
    - [ ] Cosine
    - [ ] Make them SIMD optimized
    - [ ] Give credit to qdrant
 - [ ] Add tests.
 - [ ] Write a mini spec.
 - [ ] Release it in v1.3 as an experimental feature.

Co-authored-by: Clément Renault <clement@meilisearch.com>
Co-authored-by: Kerollmops <clement@meilisearch.com>
2023-06-27 11:17:07 +00:00
e69be93e42 Log warn about using both q and vector field parameters 2023-06-27 12:32:44 +02:00
b2b413db12 Return all the _semanticScore values in the documents 2023-06-27 12:32:43 +02:00
30741d17fa Change the TODO message 2023-06-27 12:32:43 +02:00
ebad1f396f Remove the useless euclidean distance implementation 2023-06-27 12:32:43 +02:00
29d8268c94 Fix the vector query part by using the correct universe 2023-06-27 12:32:43 +02:00
63bfe1cee2 Ignore when there are too many vectors 2023-06-27 12:32:43 +02:00
f3e4d70638 Send analytics about the query vector length 2023-06-27 12:32:43 +02:00
eecf20f109 Introduce a new invalid_vector_store 2023-06-27 12:32:42 +02:00
816d7ed174 Update the Vector Store product feature link 2023-06-27 12:32:42 +02:00
864ad2a23c Check that vector store feature is enabled 2023-06-27 12:32:42 +02:00
66fb5c150c Rename _semanticSimilarity into _semanticScore 2023-06-27 12:32:42 +02:00
7c2f5f77b8 Make clippy and fmt happy 2023-06-27 12:32:42 +02:00
66b8cfd8c8 Introduce a way to store the HNSW on multiple LMDB entries 2023-06-27 12:32:42 +02:00
ff3664431f Make rustfmt happy 2023-06-27 12:32:42 +02:00
531748c536 Return a user error when the _vectors type is invalid 2023-06-27 12:32:41 +02:00
7aa1275337 Display the _semanticSimilarity even if the _vectors field is not displayed 2023-06-27 12:32:41 +02:00
737aec1705 Expose an _semanticSimilarity as a dot product in the documents 2023-06-27 12:32:41 +02:00
3e3c743392 Make Rustfmt happy 2023-06-27 12:32:41 +02:00
5c5a4e075d Make clippy happy 2023-06-27 12:32:41 +02:00
ab9f2269aa Normalize the vectors during indexation and search 2023-06-27 12:32:41 +02:00
321ec5f3fa Accept multiple vectors by documents using the _vectors field 2023-06-27 12:32:40 +02:00
1b2923f7c0 Return the vector in the output of the search routes 2023-06-27 12:32:40 +02:00
717d4fddd4 Remove the unused distance 2023-06-27 12:32:40 +02:00
a7e0f0de89 Introduce a new error message for invalid vector dimensions 2023-06-27 12:32:40 +02:00
3b560ef7d0 Make clippy happy 2023-06-27 12:32:40 +02:00
2cf747cb89 Fix the tests 2023-06-27 12:32:40 +02:00
3c31e1cdd1 Support more pages but in an ugly way 2023-06-27 12:32:39 +02:00
23eaaf1001 Change the name of the distance module 2023-06-27 12:32:39 +02:00
c2a402f3ae Implement an ugly deletion of values in the HNSW 2023-06-27 12:32:39 +02:00
436a10bef4 Replace the euclidean with a dot product 2023-06-27 12:32:39 +02:00
8debf6fe81 Use a basic euclidean distance function 2023-06-27 12:32:39 +02:00
c79e82c62a Move back to the hnsw crate
This reverts commit 7a4b6c065482f988b01298642f4c18775503f92f.
2023-06-27 12:32:39 +02:00
aca305bb77 Log more to make sure we insert vectors in the hgg data-structure 2023-06-27 12:32:38 +02:00
5816008139 Introduce an optimized version of the euclidean distance function 2023-06-27 12:32:38 +02:00
268a9ef416 Move to the hgg crate 2023-06-27 12:32:38 +02:00
642b0f3a1b Expose a new vector field on the search route 2023-06-27 12:32:38 +02:00
cad90e8cbc Add a vector field to the search routes 2023-06-27 12:32:38 +02:00
4571e512d2 Store the vectors in an HNSW in LMDB 2023-06-27 12:32:38 +02:00
7ac2f1489d Extract the vectors from the documents 2023-06-27 12:32:37 +02:00
34349faeae Create a new _vector extractor 2023-06-27 12:32:37 +02:00
ed0a5be4b6 Merge #3853
3853: docs: fixed some broken links r=gillian-meilisearch a=0xflotus

Some of the links in the README file were broken.


Co-authored-by: 0xflotus <0xflotus@gmail.com>
2023-06-27 10:30:13 +00:00
f105df6599 Merge #3850
3850: Experimental features r=Kerollmops a=dureuill

# Pull Request

## Related issue

- Fixes https://github.com/meilisearch/meilisearch/issues/3857
- Related to https://github.com/meilisearch/meilisearch/issues/3771
## What does this PR do?

### Example

<details>
<summary>Using the feature to enable `scoreDetails`</summary>

```json
❯ curl \
  -X POST 'http://localhost:7700/indexes/index-word-count-10-count/search' \
  -H 'Content-Type: application/json' \
  --data-binary '{ "q": "Batman", "limit": 1, "showRankingScoreDetails": true, "attributesToRetrieve": ["title"]}' | jsonxf

{
  "message": "Computing score details requires enabling the `score details` experimental feature. See https://github.com/meilisearch/product/discussions/674",
  "code": "feature_not_enabled",
  "type": "invalid_request",
  "link": "https://docs.meilisearch.com/errors#feature_not_enabled"
}
```

```json
❯ curl \
  -X PATCH 'http://localhost:7700/experimental-features/' \
  -H 'Content-Type: application/json'  \
--data-binary '{
    "scoreDetails": true
  }'
{"scoreDetails":true,"vectorSearch":false}
```

```json
❯ curl \
  -X POST 'http://localhost:7700/indexes/index-word-count-10-count/search' \
  -H 'Content-Type: application/json' \
  --data-binary '{ "q": "Batman", "limit": 1, "showRankingScoreDetails": true, "attributesToRetrieve": ["title"]}' | jsonxf
{
  "hits": [
    {
      "title": "Batman",
      "_rankingScoreDetails": {
        "words": {
          "order": 0,
          "matchingWords": 1,
          "maxMatchingWords": 1,
          "score": 1.0
        },
        "typo": {
          "order": 1,
          "typoCount": 0,
          "maxTypoCount": 1,
          "score": 1.0
        },
        "proximity": {
          "order": 2,
          "score": 1.0
        },
        "attribute": {
          "order": 3,
          "attribute_ranking_order_score": 1.0,
          "query_word_distance_score": 1.0,
          "score": 1.0
        },
        "exactness": {
          "order": 4,
          "matchType": "exactMatch",
          "score": 1.0
        }
      }
    }
  ],
  "query": "Batman",
  "processingTimeMs": 3,
  "limit": 1,
  "offset": 0,
  "estimatedTotalHits": 46
}
```


</details>

### User standpoint

- Add new route GET/POST/PATCH/DELETE `/experimental-features` to switch on or off some of the experimental features in a manner persistent between instance restarts
- Use these new routes to allow setting on/off the following experimental features:
  - vector store **TODO:** fill in issue 
  - score details (related to https://github.com/meilisearch/meilisearch/issues/3771)
- Make the way of checking feature availability and error message uniform for the Prometheus metrics experimental feature
- Save the enabled features in dump, restore from dumps
- **TODO:** tests:
  - Test new security permissions (do they allow access with ALL, do they prevent access when missing)
  - Test dump behavior, in particular ability to import existing v6 dumps
  - Test basic behavior when calling the rule 

### Implementation standpoint

- New DB "experimental-features"
- dumps are modified to save the state of that new DB as a `experimental-features.json` file, that is then loaded back when importing the dump. This doesn't change the dump version, as the file is optional and it missing will not cause the dump to fail

Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2023-06-26 15:13:43 +00:00
13e9b4c2e5 Add dump support 2023-06-26 16:29:43 +02:00
5a83cecb0f fix tests 2023-06-26 16:29:43 +02:00
cca6e47ec1 Errors when GETting metrics without the feature gate 2023-06-26 16:29:43 +02:00
6196a53668 Gate score_details behind a runtime experimental feature flag 2023-06-26 16:29:43 +02:00
bb6448dc2e Compute instance features from CLI options 2023-06-26 16:29:43 +02:00
eef9293630 New route to set some experimental features 2023-06-26 16:29:43 +02:00
dac77dfd14 Add new permissions for experimental-features route 2023-06-26 16:29:43 +02:00
072d81843f Persistently save to DB the status of experimental features 2023-06-26 16:29:43 +02:00
29ec02d4d4 Add meilisearch_types::features module 2023-06-26 16:09:03 +02:00
9d2a12821d Use insta snapshot 2023-06-26 14:56:19 +02:00
63ca25290b Take into account small Review requests 2023-06-26 14:56:19 +02:00
59f64a5256 Return an error when an attribute is not searchable 2023-06-26 14:56:19 +02:00
dc391deca0 Reverse assert comparison to have a consistent error message 2023-06-26 14:55:57 +02:00
114f878205 Rename restrictSearchableAttributes into attributesToSearchOn 2023-06-26 14:55:57 +02:00
42709ea9a5 Fix clippy warnings 2023-06-26 14:55:57 +02:00
993b0d012c Remove proximity_ranking_rule_order test, fixing this test would force us to create a fid_word_pair_proximity_docids and a fid_word_prefix_pair_proximity_docids databases which may multiply the keys of word_pair_proximity_docids and word_prefix_pair_proximity_docids by the number of attributes in the searchable_attributes list 2023-06-26 14:55:57 +02:00
fb8fa07169 Restrict field ids in search context 2023-06-26 14:55:57 +02:00
0ccf1e2e40 Allow the search cache to store owned values 2023-06-26 14:55:57 +02:00
9680e1e41f Introduce a BytesDecodeOwned trait in heed_codecs 2023-06-26 14:55:14 +02:00
a61ca4066e Add tests 2023-06-26 14:55:14 +02:00
461b5118bd Add API search setting 2023-06-26 14:55:14 +02:00
a3716c5678 add the new parameter to the search builder of milli 2023-06-26 14:55:14 +02:00
2d34005965 Merge #3821
3821: Add normalized and detailed scores to documents returned by a query r=dureuill a=dureuill

# Pull Request

## Related issue
Fixes #3771 

## What does this PR do?

### User standpoint

<details>
<summary>Request ranking score</summary>

```
echo '{ 
  "q": "Badman dark knight returns",
  "showRankingScore": true, 
  "limit": 10,
  "attributesToRetrieve": ["title"]
}' | mieli search -i index-word-count-10-count
```

</details>


<details>
<summary>Response</summary>

```json
{
  "hits": [
    {
      "title": "Batman: The Dark Knight Returns, Part 1",
      "_rankingScore": 0.947520325203252
    },
    {
      "title": "Batman: The Dark Knight Returns, Part 2",
      "_rankingScore": 0.947520325203252
    },
    {
      "title": "Batman Unmasked: The Psychology of the Dark Knight",
      "_rankingScore": 0.6657594086021505
    },
    {
      "title": "Legends of the Dark Knight: The History of Batman",
      "_rankingScore": 0.6654905913978495
    },
    {
      "title": "Angel and the Badman",
      "_rankingScore": 0.2196969696969697
    },
    {
      "title": "Angel and the Badman",
      "_rankingScore": 0.2196969696969697
    },
    {
      "title": "Batman",
      "_rankingScore": 0.11553030303030302
    },
    {
      "title": "Batman Begins",
      "_rankingScore": 0.11553030303030302
    },
    {
      "title": "Batman Returns",
      "_rankingScore": 0.11553030303030302
    },
    {
      "title": "Batman Forever",
      "_rankingScore": 0.11553030303030302
    }
  ],
  "query": "Badman dark knight returns",
  "processingTimeMs": 12,
  "limit": 10,
  "offset": 0,
  "estimatedTotalHits": 46
}
```

</details>



- If adding a `showRankingScore` parameter to the search query, then documents returned by a search now contain an additional field `_rankingScore` that is a float bigger than 0 and lower or equal to 1.0. This field represents the relevancy of the document, relatively to the search query and the settings of the index, with 1.0 meaning "perfect match" and 0 meaning "not matching the query" (Meilisearch should never return documents not matching the query at all). 
  - The `sort` and `geosort` ranking rules do not influence the `_rankingScore`.

<details>
<summary>Request detailed ranking scores</summary>

```
echo '{ 
  "q": "Badman dark knight returns",
  "showRankingScoreDetails": true, 
  "limit": 5, 
  "attributesToRetrieve": ["title"]
}' | mieli search -i index-word-count-10-count
```

</details>

<details>
<summary>Response</summary>

```json
{
  "hits": [
    {
      "title": "Batman: The Dark Knight Returns, Part 1",
      "_rankingScoreDetails": {
        "words": {
          "order": 0,
          "matchingWords": 4,
          "maxMatchingWords": 4,
          "score": 1.0
        },
        "typo": {
          "order": 1,
          "typoCount": 1,
          "maxTypoCount": 4,
          "score": 0.8
        },
        "proximity": {
          "order": 2,
          "score": 0.9545454545454546
        },
        "attribute": {
          "order": 3,
          "attributes_ranking_order": 1.0,
          "attributes_query_word_order": 0.926829268292683,
          "score": 0.926829268292683
        },
        "exactness": {
          "order": 4,
          "matchType": "noExactMatch",
          "score": 0.26666666666666666
        }
      }
    },
    {
      "title": "Batman: The Dark Knight Returns, Part 2",
      "_rankingScoreDetails": {
        "words": {
          "order": 0,
          "matchingWords": 4,
          "maxMatchingWords": 4,
          "score": 1.0
        },
        "typo": {
          "order": 1,
          "typoCount": 1,
          "maxTypoCount": 4,
          "score": 0.8
        },
        "proximity": {
          "order": 2,
          "score": 0.9545454545454546
        },
        "attribute": {
          "order": 3,
          "attributes_ranking_order": 1.0,
          "attributes_query_word_order": 0.926829268292683,
          "score": 0.926829268292683
        },
        "exactness": {
          "order": 4,
          "matchType": "noExactMatch",
          "score": 0.26666666666666666
        }
      }
    },
    {
      "title": "Batman Unmasked: The Psychology of the Dark Knight",
      "_rankingScoreDetails": {
        "words": {
          "order": 0,
          "matchingWords": 3,
          "maxMatchingWords": 4,
          "score": 0.75
        },
        "typo": {
          "order": 1,
          "typoCount": 1,
          "maxTypoCount": 3,
          "score": 0.75
        },
        "proximity": {
          "order": 2,
          "score": 0.6666666666666666
        },
        "attribute": {
          "order": 3,
          "attributes_ranking_order": 1.0,
          "attributes_query_word_order": 0.8064516129032258,
          "score": 0.8064516129032258
        },
        "exactness": {
          "order": 4,
          "matchType": "noExactMatch",
          "score": 0.25
        }
      }
    },
    {
      "title": "Legends of the Dark Knight: The History of Batman",
      "_rankingScoreDetails": {
        "words": {
          "order": 0,
          "matchingWords": 3,
          "maxMatchingWords": 4,
          "score": 0.75
        },
        "typo": {
          "order": 1,
          "typoCount": 1,
          "maxTypoCount": 3,
          "score": 0.75
        },
        "proximity": {
          "order": 2,
          "score": 0.6666666666666666
        },
        "attribute": {
          "order": 3,
          "attributes_ranking_order": 1.0,
          "attributes_query_word_order": 0.7419354838709677,
          "score": 0.7419354838709677
        },
        "exactness": {
          "order": 4,
          "matchType": "noExactMatch",
          "score": 0.25
        }
      }
    },
    {
      "title": "Angel and the Badman",
      "_rankingScoreDetails": {
        "words": {
          "order": 0,
          "matchingWords": 1,
          "maxMatchingWords": 4,
          "score": 0.25
        },
        "typo": {
          "order": 1,
          "typoCount": 0,
          "maxTypoCount": 1,
          "score": 1.0
        },
        "proximity": {
          "order": 2,
          "score": 1.0
        },
        "attribute": {
          "order": 3,
          "attributes_ranking_order": 1.0,
          "attributes_query_word_order": 0.8181818181818182,
          "score": 0.8181818181818182
        },
        "exactness": {
          "order": 4,
          "matchType": "noExactMatch",
          "score": 0.3333333333333333
        }
      }
    }
  ],
  "query": "Badman dark knight returns",
  "processingTimeMs": 9,
  "limit": 5,
  "offset": 0,
  "estimatedTotalHits": 46
}
```

</details>

- If adding a `showRankingScoreDetails` parameter to the search query, then the returned documents will now contain an additional `_rankingScoreDetails` field that is a JSON object containing one field per ranking rule that was applied, whose value is a JSON object with the following fields:
  - `order`: a number indicating the order this rule was applied (0 is the first applied ranking rule)
  - `score` (except for `sort` and `geosort`): a float indicating how the document matched this particular rule.
  - other fields that are specific to the rule, indicating for example how many words matched for a document and how many typos were counted in a matching document.
- If the `displayableAttributes` list is defined in the settings of the index, any ranking rule using an attribute **not** part of that list will be marked as `<hidden-rule>` in the `_rankingScoreDetails`.  

- Search queries that are part of a `multi-search` requests are modified in the same way and each of the queries can take the `showRankingScore` and `showRankingScoreDetails` parameters independently. The results are still returned in separate lists and providing a unified list of results between multiple queries is not in the scope of this PR (but is unblocked by this PR and can be done manually by using the scores of the various documents). 

### Implementation standpoint

- Fix difference in how the position of terms were computed at indexing time and query time: this difference meant that a query containing a hard separator would fail the exactness check.
- Fix the id reported by the sort ranking rule (very minor)
- Change how the cost of removing words is computed. After this change the cost no longer works for any other ranking rule than `words`. Also made `words` have a cost of 0 such that the entire cost of `words` is given by the termRemovalStrategy. The new cost computation makes it so the score is computed in a way consistent with the number of words in the query. Additionally, the words that appear in phrases in the query are also counted as matching words.
- When any score computation is requested through `showRankingScore` or `showRankingScoreDetails`, remove optimization where ranking rules are not executed on buckets of a single document: this is important to allow the computation of an accurate score.
- add virtual conditions to fid and position to always have the max cost: this ensures that the score is independent from the dataset
- the Position ranking rule now takes into account the distance to the position of the word in the query instead of the distance to the position 0.
- modified proximity ranking rule cost calculation so that the cost is 0 for documents that are perfectly matching the query
- Add a new `milli::score_details` module containing all the types that are involved in score computation.
- Make it so a bucket of result now contains a `ScoreDetails` and changed the ranking rules to produce their `ScoreDetails`.
- Expose the scores in the REST API.
- Add very light analytics for scoring.
- Update the search tests to add the expected scores.

Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2023-06-26 09:32:43 +00:00
62eefcda6e Change and add links to the Cloud 2023-06-26 09:17:15 +02:00
85a24775c5 Update README.md 2023-06-23 12:25:53 +02:00
6b0e9b9a7f Update README.md 2023-06-23 12:20:43 +02:00
b18c57ea7f docs: fixed some broken links
Some of the links in the README file were broken.
2023-06-23 12:18:43 +02:00
6d4981ec25 Expose lastUpdate and isIndexing in /stats endpoint 2023-06-23 07:24:25 +08:00
040b5a5b6f Merge #3842
3842: fix some typos r=dureuill a=cuishuang

# Pull Request

## Related issue
Fixes #<issue_number>

## What does this PR do?
- fix some typos

## PR checklist
Please check if your PR fulfills the following requirements:
- [x] Does this PR fix an existing issue, or have you listed the changes applied in the PR description (and why they are needed)?
- [x] Have you read the contributing guidelines?
- [x] Have you made sure that the title is accurate and descriptive of the changes?

Thank you so much for contributing to Meilisearch!


Co-authored-by: cui fliter <imcusg@gmail.com>
2023-06-22 18:01:10 +00:00
530a3e2df3 fix some typos
Signed-off-by: cui fliter <imcusg@gmail.com>
2023-06-22 21:59:00 +08:00
11d32ad192 Add very light analytics for scoring 2023-06-22 12:39:14 +02:00
d26e9a96ec Add score details to new search tests 2023-06-22 12:39:14 +02:00
49c8bc4de6 Fix tests 2023-06-22 12:39:14 +02:00
da833eb095 Expose the scores and detailed scores in the API 2023-06-22 12:39:14 +02:00
701d44bd91 Store the scores for each bucket
Remove optimization where ranking rules are not executed on buckets of a single document
when the score needs to be computed
2023-06-22 12:39:14 +02:00
c621a250a7 Score for graph based ranking rules
Count phrases in matchingWords and maxMatchingWords
2023-06-22 12:39:14 +02:00
8939e85f60 Add rank_to_score for graph based ranking rules 2023-06-22 12:39:14 +02:00
fa41d2489e Score for sort 2023-06-22 12:39:14 +02:00
59c5b992c2 Score for geosort 2023-06-22 12:39:14 +02:00
2ea8194c18 Score for exact_attributes 2023-06-22 12:39:14 +02:00
421df64602 RankingRuleOutput now contains a Score 2023-06-22 12:39:14 +02:00
c0fca6f884 Add score_details 2023-06-22 12:39:14 +02:00
9015a8e8d9 Merge branch 'main' into cymruu/payload-unit-test 2023-06-21 09:26:50 +02:00
28404d56b7 Merge #3799
3799: Fix error messages in `check-release.sh` r=curquiza a=vvv

- `check_tag`: Report file name correctly. Use named variables.
- Introduce `read_version` helper function. Simplify the implementation.
- Show meaningful error message if `GITHUB_REF` is not set or its format is incorrect.

Co-authored-by: Valeriy V. Vorotyntsev <valery.vv@gmail.com>
2023-06-20 13:35:33 +00:00
262c1f2baf Merge #3844
3844: Fix SDK CI (again) r=curquiza a=curquiza

Following this PR: https://github.com/meilisearch/meilisearch/pull/3813

Sorry `@Kerollmops,` here is (I hope) the latest fix 🙏 I made tests last time that were not sufficient. I really did a lot this time. I hope I have not missed anything.



Co-authored-by: curquiza <clementine@meilisearch.com>
2023-06-20 13:01:07 +00:00
cfed349aa3 Fix error messages in check-release.sh
- `check_tag`: Report file name correctly. Use named variables.
- Introduce `read_version` helper function. Simplify the implementation.
- Show meaningful error message if `GITHUB_REF` is not set or its format
  is incorrect.
2023-06-20 13:58:09 +03:00
f050634b1e add virtual conditions to fid and position to always have the max cost 2023-06-20 10:07:18 +02:00
becf1f066a Change how the cost of removing words is computed 2023-06-20 09:45:43 +02:00
701d299369 Remove out-of-date comment 2023-06-20 09:45:42 +02:00
a20e4d447c Position now takes into account the distance to the position of the word in the query
it used to be based on the distance to the position 0
2023-06-20 09:45:42 +02:00
af57c3c577 Proximity costs 0 for documents that are perfectly matching 2023-06-20 09:45:42 +02:00
0c40ef6911 Fix sort id 2023-06-20 09:45:42 +02:00
bbc9f68ff5 Use the input from the previous job instead of the workflow dispatch 2023-06-19 18:49:15 +02:00
45636d315c Merge #3670
3670: Fix addition deletion bug r=irevoire a=irevoire

The first commit of this PR is a revert of https://github.com/meilisearch/meilisearch/pull/3667. It re-enable the auto-batching of addition and deletion of tasks. No new changes have been introduced outside of `milli`. So all the changes you see on the autobatcher have actually already been reviewed.

It fixes https://github.com/meilisearch/meilisearch/issues/3440.

### What was happening?

The issue was that the `external_documents_ids` generated in the `transform` were used in a very strange way that wasn’t compatible with the deletion of documents.
Instead of doing a clear merge between the external document IDs of the DB and the one returned by the transform + writing it on disk, we were doing some weird tricks with the soft-deleted to avoid writing the fst on disk as much as possible.
The new algorithm may be a bit slower but is way more straightforward and doesn’t change depending on if the soft deletion was used or not. Here is a list of the changes introduced:
1. We now do a clear distinction between the `new_external_documents_ids` coming from the transform and only held on RAM and the `external_documents_ids` coming from the DB.
2. The `new_external_documents_ids` (coming out of the transform) are now represented as an `fst`. We don't need to struggle with the hard, soft distinction + the soft_deleted => That's easier to understand
3. When indexing documents, we merge the `external_documents_ids` coming from the DB and the `new_external_documents_ids` coming from the transform.

### Other things introduced in this  PR

Since we constantly have to write small, very specialized fuzzers for this kind of bug, we decided to push the one used to reproduce this bug.
It's not perfect, but it's easy to improve in the future.
It'll also run for as long as possible on every merge on the main branch.

Co-authored-by: Tamo <tamo@meilisearch.com>
Co-authored-by: Loïc Lecrenier <loic.lecrenier@icloud.com>
2023-06-19 09:09:30 +00:00
cb9d78fc7f Merge #3835
3835: Add more documentation to graph-based ranking rule algorithms + comment cleanup r=Kerollmops a=loiclec

In addition to documenting the `cheapest_path.rs` file, this PR cleans up a few outdated comments as well as some TODOs. These TODOs have been moved to https://github.com/meilisearch/meilisearch/issues/3776



Co-authored-by: Loïc Lecrenier <loic.lecrenier@icloud.com>
2023-06-15 15:30:24 +00:00
01d2ee5cc1 Merge #3836
3836: Remove trailing whitespace in snapshots r=dureuill a=dureuill

# Pull Request

## Related issue

No issue, maintenance

## What does this PR do?
- Remove trailing whitespace in snapshots by adding a trailing `|` at the end of lines that would previously end with fixed-width integers
- This allows contributors whose editor is configured to remove trailing whitespace not to modify the tests when changing an unrelated part of the file containing the tests


Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2023-06-14 13:00:52 +00:00
e0c4682758 Fix tests 2023-06-14 13:30:52 +02:00
d9b4b39922 Add trailing pipe to the snapshots so it doesn't end with trailing whitespace 2023-06-14 13:30:52 +02:00
2da86b31a6 Remove comments and add documentation 2023-06-14 12:39:42 +02:00
4e81445d42 Stop the fuzzer after an hour 2023-06-12 15:30:51 +02:00
4829348d6e Merge #3813
3813: Fix SDK CI for scheduled jobs r=curquiza a=curquiza

The SDK CI does not run for the scheduled job (`cron`) every day, and only works for manual triggers.

I added a job to define the Docker image we use depending on the event: `worflow_dispatch` = manual triggering, or `scheduled` = cron jobs

Co-authored-by: curquiza <clementine@meilisearch.com>
2023-06-12 08:41:03 +00:00
047d22fcb1 Merge #3824
3824: Changes the way words are counted in the word count DB r=ManyTheFish a=dureuill

# Pull Request

## Related issue

Fixes https://github.com/meilisearch/meilisearch/issues/3823

## What does this PR do?

- Apply offset when parsing query that is consistent with the indexing

### DB breaking changes

- Count the number of words in `field_id_word_count_docids`
- raise limit of word count for storing the entry in the DB from 10 to 30

Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2023-06-08 13:26:05 +00:00
a2a3b8c973 Fix offset difference between query and indexing for hard separators 2023-06-08 12:07:12 +02:00
9f37b61666 DB BREAKING: raise limit of word count from 10 to 30. 2023-06-08 12:07:12 +02:00
c15c076da9 DB BREAKING: Count the number of words in field_id_word_count_docids 2023-06-08 12:07:11 +02:00
9dcf1da59d Merge #3819
3819: Remove the `docid_word_positions` database r=Kerollmops a=loiclec

Remove the `docid_word_positions` database, which was only used during deletion operations. In the process, also fixes https://github.com/meilisearch/meilisearch/issues/3816




Co-authored-by: Loïc Lecrenier <loic.lecrenier@icloud.com>
2023-06-07 09:53:25 +00:00
8628a0c856 Remove docid_word_positions_db + fix deletion bug
That would happen when a word was deleted from all exact attributes
but not all regular attributes.
2023-06-07 10:52:50 +02:00
c1e3cc04b0 Merge #3811
3811: Bring back changes from `release-v1.2.0` to `main` r=Kerollmops a=curquiza



Co-authored-by: Loïc Lecrenier <loic.lecrenier@me.com>
Co-authored-by: meili-bors[bot] <89034592+meili-bors[bot]@users.noreply.github.com>
Co-authored-by: Tamo <tamo@meilisearch.com>
Co-authored-by: Filip Bachul <filipbachul@gmail.com>
Co-authored-by: Kerollmops <clement@meilisearch.com>
Co-authored-by: ManyTheFish <many@meilisearch.com>
Co-authored-by: Clément Renault <clement@meilisearch.com>
2023-06-06 13:10:24 +00:00
d96d8bb0dd Merge #3789
3789: Improve the metrics r=dureuill a=irevoire

# Pull Request

## Related issue
Implements https://github.com/meilisearch/meilisearch/issues/3790
Associated specification: https://github.com/meilisearch/specifications/pull/242

## Be cautious; it's DB-breaking 😱 

While reviewing and after merging this PR, be cautious; if you already have a `data.ms` and run meilisearch with this code on it, it won't work because we need to cache a new information on the index stats (that are backed up on disk). You'll get internal errors.

### About the breaking-change label

We only break the API of the metrics route, which does not pose any problem since it's experimental.

## What does this PR do?
- Create a method to get the « facet distribution » of the task queue.
- Prefix all the metrics by `meilisearch_`
- Add the real database size used by meilisearch
- Add metrics on the task queue
- Update the grafana dashboard to these new changes
- Move the dashboard to the `assets` directory
- Provide a new prometheus file to scrape meilisearch easily

Co-authored-by: Tamo <tamo@meilisearch.com>
2023-06-06 11:44:54 +00:00
4a3405afec comment the stats method 2023-06-06 12:59:58 +02:00
3cfd653db1 Apply suggestions from code review
Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2023-06-06 11:38:41 +02:00
b6b6a80b76 Fix SDK CI for scheduled jobs 2023-06-06 10:38:05 +02:00
f3e2f79290 Merge branch 'main' into tmp-release-v1.2.0 2023-06-05 18:36:28 +02:00
f517274d1f Merge #3788
3788: Use `RoaringBitmap::deserialize_unchecked_from` to reduce the deserialization time r=irevoire a=Kerollmops

This pull request replaces the `RoaringBitmap::deserialize_from` methods with the `deserialize_unchecked_from` to avoid doing too much checks. We know the written bitmaps are valid as we do not disable the checks during the indexation phase.

I did a small test with #3780 and discovered that the deserialization time changed from 32% to 9.46% when using these changes. It seems it was low-hanging fruit hidden behind a leaf.

Co-authored-by: Kerollmops <clement@meilisearch.com>
2023-06-05 09:20:30 +00:00
3f41bc642a Merge #3804 #3805
3804: Bump svenstaro/upload-release-action from 2.5.0 to 2.6.1 r=curquiza a=dependabot[bot]

Bumps [svenstaro/upload-release-action](https://github.com/svenstaro/upload-release-action) from 2.5.0 to 2.6.1.
<details>
<summary>Release notes</summary>
<p><em>Sourced from <a href="https://github.com/svenstaro/upload-release-action/releases">svenstaro/upload-release-action's releases</a>.</em></p>
<blockquote>
<h2>2.6.1</h2>
<ul>
<li>Do not overwrite body or name if empty <a href="https://redirect.github.com/svenstaro/upload-release-action/pull/108">#108</a> (thanks <a href="https://github.com/regevbr"><code>`@​regevbr</code></a>)</li>`
</ul>
<h2>2.6.0</h2>
<ul>
<li>Add <code>make_latest</code> input parameter. Can be set to <code>false</code> to prevent the created release from being marked as the latest release for the repository <a href="https://redirect.github.com/svenstaro/upload-release-action/pull/100">#100</a> (thanks <a href="https://github.com/brandonkelly"><code>`@​brandonkelly</code></a>)</li>`
<li>Don't try to upload empty files <a href="https://redirect.github.com/svenstaro/upload-release-action/pull/102">#102</a> (thanks <a href="https://github.com/Loyalsoldier"><code>`@​Loyalsoldier</code></a>)</li>`
<li>Bump all deps <a href="https://redirect.github.com/svenstaro/upload-release-action/pull/105">#105</a></li>
<li><code>overwrite</code> option also overwrites name and body <a href="https://redirect.github.com/svenstaro/upload-release-action/pull/106">#106</a> (thanks <a href="https://github.com/regevbr"><code>`@​regevbr</code></a>)</li>`
<li>Add <code>promote</code> option to allow prereleases to be promoted <a href="https://redirect.github.com/svenstaro/upload-release-action/pull/74">#74</a> (thanks <a href="https://github.com/regevbr"><code>`@​regevbr</code></a>)</li>`
</ul>
</blockquote>
</details>
<details>
<summary>Changelog</summary>
<p><em>Sourced from <a href="https://github.com/svenstaro/upload-release-action/blob/master/CHANGELOG.md">svenstaro/upload-release-action's changelog</a>.</em></p>
<blockquote>
<h2>[2.6.1] - 2023-05-31</h2>
<ul>
<li>Do not overwrite body or name if empty <a href="https://redirect.github.com/svenstaro/upload-release-action/pull/108">#108</a> (thanks <a href="https://github.com/regevbr"><code>`@​regevbr</code></a>)</li>`
</ul>
<h2>[2.6.0] - 2023-05-23</h2>
<ul>
<li>Add <code>make_latest</code> input parameter. Can be set to <code>false</code> to prevent the created release from being marked as the latest release for the repository <a href="https://redirect.github.com/svenstaro/upload-release-action/pull/100">#100</a> (thanks <a href="https://github.com/brandonkelly"><code>`@​brandonkelly</code></a>)</li>`
<li>Don't try to upload empty files <a href="https://redirect.github.com/svenstaro/upload-release-action/pull/102">#102</a> (thanks <a href="https://github.com/Loyalsoldier"><code>`@​Loyalsoldier</code></a>)</li>`
<li>Bump all deps <a href="https://redirect.github.com/svenstaro/upload-release-action/pull/105">#105</a></li>
<li><code>overwrite</code> option also overwrites name and body <a href="https://redirect.github.com/svenstaro/upload-release-action/pull/106">#106</a> (thanks <a href="https://github.com/regevbr"><code>`@​regevbr</code></a>)</li>`
<li>Add <code>promote</code> option to allow prereleases to be promoted <a href="https://redirect.github.com/svenstaro/upload-release-action/pull/74">#74</a> (thanks <a href="https://github.com/regevbr"><code>`@​regevbr</code></a>)</li>`
</ul>
</blockquote>
</details>
<details>
<summary>Commits</summary>
<ul>
<li><a href="2b9d2847a9"><code>2b9d284</code></a> 2.6.1</li>
<li><a href="f9beb0ad08"><code>f9beb0a</code></a> Merge pull request <a href="https://redirect.github.com/svenstaro/upload-release-action/issues/108">#108</a> from regevbr/<a href="https://redirect.github.com/svenstaro/upload-release-action/issues/107">#107</a></li>
<li><a href="1662cfa449"><code>1662cfa</code></a> fix <a href="https://redirect.github.com/svenstaro/upload-release-action/issues/197">#197</a> - do not overwrite, if empty</li>
<li><a href="a5002416a0"><code>a500241</code></a> Document running npm update after changing version</li>
<li><a href="58d5258088"><code>58d5258</code></a> 2.6.0</li>
<li><a href="ffc1afa9c0"><code>ffc1afa</code></a> Update CHANGELOG</li>
<li><a href="24bced81d9"><code>24bced8</code></a> Merge pull request <a href="https://redirect.github.com/svenstaro/upload-release-action/issues/74">#74</a> from regevbr/body</li>
<li><a href="794b3152e1"><code>794b315</code></a> fix <a href="https://redirect.github.com/svenstaro/upload-release-action/issues/42">#42</a> - overwrite body and name as well</li>
<li><a href="b00963776a"><code>b009637</code></a> fix <a href="https://redirect.github.com/svenstaro/upload-release-action/issues/42">#42</a> - overwrite body and name as well</li>
<li><a href="210500d479"><code>210500d</code></a> fix <a href="https://redirect.github.com/svenstaro/upload-release-action/issues/42">#42</a> - overwrite body and name as well</li>
<li>Additional commits viewable in <a href="https://github.com/svenstaro/upload-release-action/compare/2.5.0...2.6.1">compare view</a></li>
</ul>
</details>
<br />


[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=svenstaro/upload-release-action&package-manager=github_actions&previous-version=2.5.0&new-version=2.6.1)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores)

You can trigger a rebase of this PR by commenting ``@dependabot` rebase`.

[//]: # (dependabot-automerge-start)
[//]: # (dependabot-automerge-end)

---

<details>
<summary>Dependabot commands and options</summary>
<br />

You can trigger Dependabot actions by commenting on this PR:
- ``@dependabot` rebase` will rebase this PR
- ``@dependabot` recreate` will recreate this PR, overwriting any edits that have been made to it
- ``@dependabot` merge` will merge this PR after your CI passes on it
- ``@dependabot` squash and merge` will squash and merge this PR after your CI passes on it
- ``@dependabot` cancel merge` will cancel a previously requested merge and block automerging
- ``@dependabot` reopen` will reopen this PR if it is closed
- ``@dependabot` close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually
- ``@dependabot` ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself)
- ``@dependabot` ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself)
- ``@dependabot` ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)


</details>

3805: Bump actions/setup-go from 3 to 4 r=curquiza a=dependabot[bot]

Bumps [actions/setup-go](https://github.com/actions/setup-go) from 3 to 4.
<details>
<summary>Release notes</summary>
<p><em>Sourced from <a href="https://github.com/actions/setup-go/releases">actions/setup-go's releases</a>.</em></p>
<blockquote>
<h2>v4.0.0</h2>
<p>In scope of release we enable cache by default. The action won’t throw an error if the cache can’t be restored or saved. The action will throw a warning message but it won’t stop a build process. The cache can be disabled by specifying <code>cache: false</code>.</p>
<pre lang="yaml"><code>steps:
  - uses: actions/checkout@v3
  - uses: actions/setup-go@v4
    with:
      go-version: ‘1.19’
  - run: go run hello.go
</code></pre>
<p>Besides, we introduce such changes as</p>
<ul>
<li><a href="https://redirect.github.com/actions/setup-go/pull/305">Allow to use only GOCACHE for cache</a></li>
<li><a href="https://redirect.github.com/actions/setup-go/pull/315">Bump json5 from 2.2.1 to 2.2.3</a></li>
<li><a href="https://redirect.github.com/actions/setup-go/pull/323">Use proper version for primary key in cache</a></li>
<li><a href="https://redirect.github.com/actions/setup-go/pull/351">Always add Go bin to the PATH</a></li>
<li><a href="https://redirect.github.com/actions/setup-go/pull/350">Add step warning if go-version input is empty</a></li>
</ul>
<h2>Add support for stable and oldstable aliases</h2>
<p>In scope of this release we introduce aliases for the <code>go-version</code> input. The <code>stable</code> alias instals the latest stable version of Go. The <code>oldstable</code> alias installs previous latest minor release (the stable is 1.19.x -&gt; the oldstable is 1.18.x).</p>
<h3>Stable</h3>
<pre lang="yaml"><code>steps:
  - uses: actions/checkout@v3
  - uses: actions/setup-go@v3
    with:
      go-version: 'stable'
  - run: go run hello.go
</code></pre>
<h3>OldStable</h3>
<pre lang="yaml"><code>steps:
  - uses: actions/checkout@v3
  - uses: actions/setup-go@v3
    with:
      go-version: 'oldstable'
  - run: go run hello.go
</code></pre>
<h2>Add support for go.work and pass the token input through on GHES</h2>
<p>In scope of this release we added <a href="https://redirect.github.com/actions/setup-go/pull/283">support for go.work file to pass it in go-version-file input</a>.</p>
<pre lang="yaml"><code>steps:
  - uses: actions/checkout@v3
  - uses: actions/setup-go@v3
&lt;/tr&gt;&lt;/table&gt; 
</code></pre>
</blockquote>
<p>... (truncated)</p>
</details>
<details>
<summary>Commits</summary>
<ul>
<li><a href="fac708d667"><code>fac708d</code></a> Bump <code>`@​actions/cache</code>` dependency to v3.2.1 (<a href="https://redirect.github.com/actions/setup-go/issues/374">#374</a>)</li>
<li><a href="dd84a9531a"><code>dd84a95</code></a> Update xml2js (<a href="https://redirect.github.com/actions/setup-go/issues/370">#370</a>)</li>
<li><a href="41c2024c46"><code>41c2024</code></a> Fix glob bug in package.json scripts section (<a href="https://redirect.github.com/actions/setup-go/issues/359">#359</a>)</li>
<li><a href="8dbf352f06"><code>8dbf352</code></a> update README fo v4 (<a href="https://redirect.github.com/actions/setup-go/issues/354">#354</a>)</li>
<li><a href="4d34df0c23"><code>4d34df0</code></a> Update configuration files (<a href="https://redirect.github.com/actions/setup-go/issues/348">#348</a>)</li>
<li><a href="fdc0d672a1"><code>fdc0d67</code></a> Add Go bin if go-version input is empty (<a href="https://redirect.github.com/actions/setup-go/issues/351">#351</a>)</li>
<li><a href="ebfdf6ac95"><code>ebfdf6a</code></a> add warning if go-version is empty (<a href="https://redirect.github.com/actions/setup-go/issues/350">#350</a>)</li>
<li><a href="b27d76912e"><code>b27d769</code></a> fix lockfileVersion (<a href="https://redirect.github.com/actions/setup-go/issues/349">#349</a>)</li>
<li><a href="c51a720768"><code>c51a720</code></a> Enable caching by default with default input (<a href="https://redirect.github.com/actions/setup-go/issues/332">#332</a>)</li>
<li><a href="6b848af622"><code>6b848af</code></a> Merge pull request <a href="https://redirect.github.com/actions/setup-go/issues/343">#343</a> from akv-platform/reusable-workflow</li>
<li>Additional commits viewable in <a href="https://github.com/actions/setup-go/compare/v3...v4">compare view</a></li>
</ul>
</details>
<br />


[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=actions/setup-go&package-manager=github_actions&previous-version=3&new-version=4)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores)

You can trigger a rebase of this PR by commenting ``@dependabot` rebase`.

[//]: # (dependabot-automerge-start)
[//]: # (dependabot-automerge-end)

---

<details>
<summary>Dependabot commands and options</summary>
<br />

You can trigger Dependabot actions by commenting on this PR:
- ``@dependabot` rebase` will rebase this PR
- ``@dependabot` recreate` will recreate this PR, overwriting any edits that have been made to it
- ``@dependabot` merge` will merge this PR after your CI passes on it
- ``@dependabot` squash and merge` will squash and merge this PR after your CI passes on it
- ``@dependabot` cancel merge` will cancel a previously requested merge and block automerging
- ``@dependabot` reopen` will reopen this PR if it is closed
- ``@dependabot` close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually
- ``@dependabot` ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself)
- ``@dependabot` ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself)
- ``@dependabot` ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)


</details>

Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2023-06-05 08:36:22 +00:00
672abdb341 Merge #3803
3803: Bump Swatinem/rust-cache from 2.2.1 to 2.4.0 r=curquiza a=dependabot[bot]

Bumps [Swatinem/rust-cache](https://github.com/Swatinem/rust-cache) from 2.2.1 to 2.4.0.
<details>
<summary>Release notes</summary>
<p><em>Sourced from <a href="https://github.com/Swatinem/rust-cache/releases">Swatinem/rust-cache's releases</a>.</em></p>
<blockquote>
<h2>v2.4.0</h2>
<ul>
<li>Fix cache key stability.</li>
<li>Use 8 character hash components to reduce the key length, making it more readable.</li>
</ul>
<h2>v2.3.0</h2>
<ul>
<li>Add <code>cache-all-crates</code> option, which enables caching of crates installed by workflows.</li>
<li>Add installed packages to cache key, so changes to workflows that install rust tools are detected and cached properly.</li>
<li>Fix cache restore failures due to upstream bug.</li>
<li>Fix <code>EISDIR</code> error due to globed directories.</li>
<li>Update runtime <code>`@actions/cache</code>,` <code>`@actions/io</code>` and dev <code>typescript</code> dependencies.</li>
<li>Update <code>npm run prepare</code> so it creates distribution files with the right line endings.</li>
</ul>
</blockquote>
</details>
<details>
<summary>Changelog</summary>
<p><em>Sourced from <a href="https://github.com/Swatinem/rust-cache/blob/master/CHANGELOG.md">Swatinem/rust-cache's changelog</a>.</em></p>
<blockquote>
<h2>2.4.0</h2>
<ul>
<li>Fix cache key stability.</li>
<li>Use 8 character hash components to reduce the key length, making it more readable.</li>
</ul>
<h2>2.3.0</h2>
<ul>
<li>Add <code>cache-all-crates</code> option, which enables caching of crates installed by workflows.</li>
<li>Add installed packages to cache key, so changes to workflows that install rust tools are detected and cached properly.</li>
<li>Fix cache restore failures due to upstream bug.</li>
<li>Fix <code>EISDIR</code> error due to globed directories.</li>
<li>Update runtime <code>`@actions/cache</code>,` <code>`@actions/io</code>` and dev <code>typescript</code> dependencies.</li>
<li>Update <code>npm run prepare</code> so it creates distribution files with the right line endings.</li>
</ul>
</blockquote>
</details>
<details>
<summary>Commits</summary>
<ul>
<li><a href="988c164c3d"><code>988c164</code></a> 2.4.0</li>
<li><a href="bb80d0f127"><code>bb80d0f</code></a> chore: use 8 character hash components (<a href="https://redirect.github.com/Swatinem/rust-cache/issues/143">#143</a>)</li>
<li><a href="ad97570a01"><code>ad97570</code></a> fix: cache key stability (<a href="https://redirect.github.com/Swatinem/rust-cache/issues/142">#142</a>)</li>
<li><a href="060bda31e0"><code>060bda3</code></a> 2.3.0</li>
<li><a href="865fd1f6db"><code>865fd1f</code></a> &quot;update dependencies and changelog&quot;</li>
<li><a href="7c7e41ab01"><code>7c7e41a</code></a> chore: changelog v2.3.0 (<a href="https://redirect.github.com/Swatinem/rust-cache/issues/139">#139</a>)</li>
<li><a href="68aeeba167"><code>68aeeba</code></a> chore: use linefix to ensure platform line endings (<a href="https://redirect.github.com/Swatinem/rust-cache/issues/135">#135</a>)</li>
<li><a href="def0926359"><code>def0926</code></a> feat: add option to cache all crates (<a href="https://redirect.github.com/Swatinem/rust-cache/issues/137">#137</a>)</li>
<li><a href="827c240e23"><code>827c240</code></a> fix: cache key dependency on installed packages (<a href="https://redirect.github.com/Swatinem/rust-cache/issues/138">#138</a>)</li>
<li><a href="5e9fae966f"><code>5e9fae9</code></a> fix: cache restore failures (<a href="https://redirect.github.com/Swatinem/rust-cache/issues/136">#136</a>)</li>
<li>Additional commits viewable in <a href="https://github.com/Swatinem/rust-cache/compare/v2.2.1...v2.4.0">compare view</a></li>
</ul>
</details>
<br />


[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=Swatinem/rust-cache&package-manager=github_actions&previous-version=2.2.1&new-version=2.4.0)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores)

You can trigger a rebase of this PR by commenting ``@dependabot` rebase`.

[//]: # (dependabot-automerge-start)
[//]: # (dependabot-automerge-end)

---

<details>
<summary>Dependabot commands and options</summary>
<br />

You can trigger Dependabot actions by commenting on this PR:
- ``@dependabot` rebase` will rebase this PR
- ``@dependabot` recreate` will recreate this PR, overwriting any edits that have been made to it
- ``@dependabot` merge` will merge this PR after your CI passes on it
- ``@dependabot` squash and merge` will squash and merge this PR after your CI passes on it
- ``@dependabot` cancel merge` will cancel a previously requested merge and block automerging
- ``@dependabot` reopen` will reopen this PR if it is closed
- ``@dependabot` close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually
- ``@dependabot` ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself)
- ``@dependabot` ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself)
- ``@dependabot` ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)


</details>

Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2023-06-05 07:58:52 +00:00
a13ed4d0b0 Bump actions/setup-go from 3 to 4
Bumps [actions/setup-go](https://github.com/actions/setup-go) from 3 to 4.
- [Release notes](https://github.com/actions/setup-go/releases)
- [Commits](https://github.com/actions/setup-go/compare/v3...v4)

---
updated-dependencies:
- dependency-name: actions/setup-go
  dependency-type: direct:production
  update-type: version-update:semver-major
...

Signed-off-by: dependabot[bot] <support@github.com>
2023-06-01 17:57:48 +00:00
4cc2988482 Bump svenstaro/upload-release-action from 2.5.0 to 2.6.1
Bumps [svenstaro/upload-release-action](https://github.com/svenstaro/upload-release-action) from 2.5.0 to 2.6.1.
- [Release notes](https://github.com/svenstaro/upload-release-action/releases)
- [Changelog](https://github.com/svenstaro/upload-release-action/blob/master/CHANGELOG.md)
- [Commits](https://github.com/svenstaro/upload-release-action/compare/2.5.0...2.6.1)

---
updated-dependencies:
- dependency-name: svenstaro/upload-release-action
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>
2023-06-01 17:57:43 +00:00
26c7e31f25 Bump Swatinem/rust-cache from 2.2.1 to 2.4.0
Bumps [Swatinem/rust-cache](https://github.com/Swatinem/rust-cache) from 2.2.1 to 2.4.0.
- [Release notes](https://github.com/Swatinem/rust-cache/releases)
- [Changelog](https://github.com/Swatinem/rust-cache/blob/master/CHANGELOG.md)
- [Commits](https://github.com/Swatinem/rust-cache/compare/v2.2.1...v2.4.0)

---
updated-dependencies:
- dependency-name: Swatinem/rust-cache
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>
2023-06-01 17:57:40 +00:00
b2dee07b5e Merge #3783
3783: Improve SDK CI to choose the Docker image r=curquiza a=curquiza

The point is to have the following "form" when running the SDK CI manually
`nightly` is the default value if running the CI manually.

<img width="1105" alt="Capture d’écran 2023-05-25 à 12 17 35" src="https://github.com/meilisearch/meilisearch/assets/20380692/87ae7123-efe8-4e7b-a99b-4a40aafa3f79">


Co-authored-by: curquiza <clementine@meilisearch.com>
2023-05-31 12:10:07 +00:00
d963b5f85a Merge #3792
3792: fix the type of the document deletion by filter tasks r=dureuill a=irevoire

# Pull Request

## Related issue
Fixes https://github.com/meilisearch/meilisearch/issues/3791

## What does this PR do?
- Hide the deleteDocumentByFilter internal type from the users.


Co-authored-by: Tamo <tamo@meilisearch.com>
2023-05-30 18:20:28 +00:00
2acc3ec5ee fix the type of the document deletion by filter tasks 2023-05-30 15:18:52 +02:00
da04edff8c Better use deserialize_unchecked_from to reduce the deserialization time 2023-05-30 14:58:30 +02:00
85a80f4f4c move the grafana dashboard to the assets directory and upload a basic prometheus scraper to help new users 2023-05-29 18:39:34 +02:00
1213ec7164 update the dashboard once again 2023-05-29 18:37:55 +02:00
f03d99690d run the indexing fuzzer on every merge for as long as possible 2023-05-29 14:56:15 +02:00
0a7817a002 Merge #3786
3786: Consistently use wrapping add to avoid overflow in debug when query s… r=dureuill a=dureuill

# Pull Request

## Related issue
Fixes https://github.com/meilisearch/meilisearch/issues/3785

## What does this PR do?
- Some of the code paths would erroneously use the default addition operator that has the semantics that "overflow is an error, checked at runtime in debug" instead of the intended "overflow is expected" semantics that this code use (this code is using `u16::MAX` as a sentinel). This PR makes it so the wrapping add operator is used everywhere.

Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2023-05-29 12:39:54 +00:00
23a5b45ebf drop the old fuzz file 2023-05-29 14:02:37 +02:00
46fa99f486 make the fuzzer stops if an error occurs 2023-05-29 13:44:32 +02:00
67a583bedf handle the panic happening in milli 2023-05-29 13:39:26 +02:00
99e9057684 rename the indexing fuzzer to fuzz-indexing so it doesn't collide with other binary name when being called from the root of the workspace 2023-05-29 13:07:06 +02:00
8d40d300a5 rename the fuzzer to indexing 2023-05-29 12:37:24 +02:00
6c6387d05e move the fuzzer to its own crate 2023-05-29 12:27:39 +02:00
1dfc4038ab Add test that fails before PR and passes now 2023-05-29 11:58:26 +02:00
73198179f1 Consistently use wrapping add to avoid overflow in debug when query starts with a separator 2023-05-29 11:54:12 +02:00
51dce9e9d1 improve the dashboard slightly 2023-05-25 18:33:01 +02:00
c9b65677bf return the on disk size actually used by meilisearch 2023-05-25 18:30:30 +02:00
35d5556f1f prefix all the metrics by meilisearch_ 2023-05-25 17:41:53 +02:00
c433bdd1cd add a view for the task queue in the metrics 2023-05-25 12:58:13 +02:00
2db09725f8 Improve SDK CI to choose the Docker image 2023-05-25 12:22:35 +02:00
fdb23132d4 Merge #3781
3781: Revert "Improve docker cache" r=Kerollmops a=curquiza

Reverts meilisearch/meilisearch#3566 because does not work as expected, and so I want to remove useless complexity from the CI and Dockerfile

Co-authored-by: Clémentine U. - curqui <clementine@meilisearch.com>
2023-05-25 09:57:40 +00:00
11b95284cd Revert "Improve docker cache" 2023-05-25 11:48:26 +02:00
1b601f70c6 increase the bucketing of requests 2023-05-25 11:08:16 +02:00
8185731bbf Merge #3779
3779: Add a cron test with disabled tokenization (with @roy9495) r=Kerollmops a=curquiza

Replaces https://github.com/meilisearch/meilisearch/pull/3746 because of bors issue

Co-authored-by: TATHAGATA ROY <98920199+roy9495@users.noreply.github.com>
Co-authored-by: Clémentine U. - curqui <clementine@meilisearch.com>
2023-05-25 08:13:14 +00:00
840727d76f Update .github/workflows/test-suite.yml 2023-05-25 10:07:59 +02:00
ead07d0b9d Update .github/workflows/test-suite.yml 2023-05-25 10:07:52 +02:00
44f231d41e Update .github/workflows/test-suite.yml 2023-05-25 10:07:45 +02:00
3c5d1c93de Added a cron test for disabled all-tokenization 2023-05-25 10:07:32 +02:00
087866d59f Merge #3775
3775: Last error code changes on the new get/delete documents routes r=dureuill a=irevoire

# Pull Request

## Related issue
Fixes #3774

## What does this PR do?
Following the specification: https://github.com/meilisearch/specifications/pull/236

1. Get rid of the `invalid_document_delete_filter` and always use the `invalid_document_filter`
2. Introduce a new `missing_document_filter` instead of returning `invalid_document_delete_filter` (that’s consistent with all the other routes that have a mandatory parameter)
3. Always return the `original_filter` in the details (potentially set to `null`) instead of hiding it if it wasn’t used


Co-authored-by: Tamo <tamo@meilisearch.com>
2023-05-24 10:07:41 +00:00
9111f5176f get rid of the invalid document delete filter in favor of the invalid document filter 2023-05-24 11:53:16 +02:00
b9dd092a62 make the details return null in the originalFilter field if no filter was provided + add a big test on the details 2023-05-24 11:48:22 +02:00
ca99bc3188 implement the missing document filter error code when deleting documents 2023-05-24 11:29:20 +02:00
57d53de402 Increase the number of buckets 2023-05-24 10:47:15 +02:00
2e49d6aec1 Merge #3768
3768: Fix bugs in graph-based ranking rules + make `words` a graph-based ranking rule r=dureuill a=loiclec

This PR contains three changes:

## 1. Don't call the `words` ranking rule if the term matching strategy is `All`

This is because the purpose of `words` is only to remove nodes from the query graph. It would never do any useful work when the matching strategy was `All`. Remember that the universe was already computed before by computing all the docids corresponding to the "maximally reduced" query graph, which, in the case of `All`, is equal to the original graph.

## 2. The `words` ranking rule is replaced by a graph-based ranking rule. 

This is for three reasons:

1. **performance**: graph-based ranking rules benefit from a lot of optimisations by default, which ensures that they are never too slow. The previous implementation of `words` could call `compute_query_graph_docids` many times if some words had to be removed from the query, which would be quite expensive. I was especially worried about its performance in cases where it is placed right after the `sort` ranking rule. Furthermore, `compute_query_graph_docids` would clone a lot of bitmaps many times unnecessarily.

2. **consistency**: every other ranking rule (except `sort`) is graph-based. It makes sense to implement `words` like that as well. It will automatically benefit from all the features, optimisations, and bug fixes that all the other ranking rules get.

3. **surfacing bugs**: as the first ranking rule to be called (most of the time), I'd like `words` to behave the same as the other ranking rules so that we can quickly detect bugs in our graph algorithms. This actually already happened, which is why this PR also contains a bug fix.

## 3. Fix the `update_all_costs_before_nodes` function

It is a bit difficult to explain what was wrong, but I'll try. The bug happened when we had graphs like:
<img width="730" alt="Screenshot 2023-05-16 at 10 58 57" src="https://github.com/meilisearch/meilisearch/assets/6040237/40db1a68-d852-4e89-99d5-0d65757242a7">
and we gave the node `is` as argument.

Then, we'd walk backwards from the node breadth-first. We'd update the costs of:
1. `sun`
2. `thesun`
3. `start`
4. `the`

which is an incorrect order. The correct order is:

1. `sun`
2. `thesun`
3. `the`
4. `start`

That is, we can only update the cost of a node when all of its successors have either already been visited or were not affected by the update to the node passed as argument. To solve this bug, I factored out the graph-traversal logic into a `traverse_breadth_first_backward` function.


Co-authored-by: Loïc Lecrenier <loic.lecrenier@me.com>
Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2023-05-23 13:28:08 +00:00
51043f78f0 Remove trailing whitespace 2023-05-23 15:27:25 +02:00
a490a11325 Add explanatory comment on the way we're recomputing costs 2023-05-23 15:24:24 +02:00
002f42875f fix the fuzzer 2023-05-23 11:42:40 +02:00
22213dc604 push the fuzzer 2023-05-23 09:14:26 +02:00
602ad98cb8 improve the way we handle the fsts 2023-05-22 11:15:14 +02:00
7f619ff0e4 get rids of the now unused soft_deletion_used parameter 2023-05-22 10:33:49 +02:00
4391cba6ca fix the addition + deletion bug 2023-05-17 18:28:57 +02:00
d7ddf4925e Revert "Disable autobatching of additions and deletions"
This reverts commit a94e78ffb0.
2023-05-17 14:25:50 +02:00
101f5a20d2 Merge #3757
3757: Adjust the cost of edges in the `position` ranking rule by bucketing positions more aggressively r=loiclec a=loiclec

This PR significantly improves the performance of the `position` ranking rule when:
1. a query contains many words
2. the `position` ranking rule needs to be called many times
3. the score of the documents according to `position` is high

These conditions greatly increase:
1. the number of edge traversals that are needed to find a valid path from the `start` node to the `end` node
2. the number of edges that need to be deleted from the graph, and therefore the number of times that we need to recompute all the possible costs from START to END

As a result, a majority of the search time is spent in `visit_condition`, `visit_node`, and `update_all_costs_before_node`. This is frustrating because it often happens when the "universe" given to the rule consists of only a handful of document ids.

By limiting the number of possible edges between two nodes from `20` to `10`, we:
1. reduce the number of possible costs from START to END
2. reduce the number of edges that will be deleted 
3. make it faster to update the costs after deleting an edge
4. reduce the number of buckets that need to be computed

In terms of relevancy, I don't think we lose or gain much. We still prefer terms that are in a lower positions, with decreasing precision as we go further. The previous choice of bucketing wasn't chosen in a principled way, and neither is this one. They both "feel" right to me.


Co-authored-by: Loïc Lecrenier <loic.lecrenier@me.com>
Co-authored-by: meili-bors[bot] <89034592+meili-bors[bot]@users.noreply.github.com>
2023-05-17 11:43:59 +00:00
6ce1ce77e6 Merge #3738
3738: Add analytics on the get documents resource r=dureuill a=irevoire

# Pull Request

## Related issue
Fixes https://github.com/meilisearch/meilisearch/issues/3737
Related spec https://github.com/meilisearch/specifications/pull/234

## What does this PR do?
Add the analytics for the following routes:
- `GET` - `/indexes/:uid/documents`
- `GET` - `/indexes/:uid/documents/:doc_id`
- `POST` - `/indexes/:uid/documents/fetch`

These analytics are aggregated between two events:
- `Documents Fetched GET`
- `Documents Fetched POST`

That shares the same payload:
 Property name | Description | Example |
|---------------|-------------|---------|
| `requests.total_received` | Total number of request received in this batch | 325 |
| `per_document_id` | `false` | false |
| `per_filter` | `true` if `POST /indexes/:indexUid/documents/fetch` endpoint was used with a filter in this batch, otherwise `false` | false |
| `pagination.max_limit` | Highest value given for the `limit` parameter in this batch | 60 |
| `pagination.max_offset` | Highest value given for the `offset` parameter in this batch | 1000 |

Co-authored-by: Tamo <tamo@meilisearch.com>
2023-05-16 19:37:41 +00:00
ec8f685d84 Fix bug in cheapest path algorithm 2023-05-16 17:01:30 +02:00
5758268866 Don't compute split_words for phrases 2023-05-16 17:01:18 +02:00
4d037e6693 Merge #3759
3759: Invalid error code when parsing filters r=dureuill a=irevoire

# Pull Request

## Related issue
Fixes https://github.com/meilisearch/meilisearch/issues/3753

## What does this PR do?
Fix the error code in case the error comes from the evaluate of the filter for the get, fetch and delete documents routes.


Co-authored-by: Tamo <tamo@meilisearch.com>
2023-05-16 12:55:06 +00:00
96da5130a4 fix the error code in case of not filterable attributes on the get / delete documents by filter routes 2023-05-16 13:56:18 +02:00
3e19702de6 Update snapshot tests 2023-05-16 12:22:46 +02:00
1e762d151f Merge #3755
3755: Re-add final dot r=curquiza a=ManyTheFish

I removed the final dot of the error message in my last PR, this one re-adds it.

related to https://github.com/meilisearch/meilisearch/pull/3749

> Oups 😬 

Co-authored-by: ManyTheFish <many@meilisearch.com>
2023-05-16 10:10:58 +00:00
0b38f211ac test the new introduced route 2023-05-16 12:07:44 +02:00
f6524a6858 Adjust costs of edges in position ranking rule
To ensure good performance
2023-05-16 11:28:56 +02:00
65ad8cce36 Merge #3741
3741: Add ngram support to the highlighter r=ManyTheFish a=loiclec

This PR fixes a bug introduced by the search refactor, where ngrams were not highlighted. 

The solution was to add the ngrams to the vector of `LocatedQueryTerm` that is given to the `MatchingWords` structure.

Co-authored-by: Loïc Lecrenier <loic.lecrenier@me.com>
2023-05-16 09:03:31 +00:00
42650f82e8 Re-add final dot 2023-05-16 10:57:26 +02:00
a37da36766 Implement words as a graph-based ranking rule and fix some bugs 2023-05-16 10:42:11 +02:00
85d96d35a8 Highlight ngram matches as well 2023-05-16 10:39:36 +02:00
64b11f45d7 fix test name 2023-05-16 09:24:49 +02:00
bf66e97b48 Merge #3749
3749: Fix back: sort error message r=ManyTheFish a=ManyTheFish

This PR reintroduces the error message modified in https://github.com/meilisearch/milli/pull/375.
However, this added double-quotes around `sort` in the message. I don't think another message contains double-quotes, so I have added a separate commit replacing the double-quotes with back-ticks, which seems more consistent with the other error messages, this last change can be reverted easily.

## Detailed changes
#### v1.2-rc0
```
The sort ranking rule must be specified in the ranking rules settings to use the sort parameter at search time.
```
#### [Reintroduce fix (previous and expected behavior)](23d1c86825)
```
You must specify where "sort" is listed in the rankingRules setting to use the sort parameter at search time
```
#### [Replace double-quotes with back-ticks (my suggestion)](4d691d071a)
```
You must specify where `sort` is listed in the rankingRules setting to use the sort parameter at search time
```

## Related

Fixes #3722

## Reviewers

- technical review: `@irevoire`
- to validate the replacement: `@macraig`

Co-authored-by: ManyTheFish <many@meilisearch.com>
2023-05-15 14:55:51 +00:00
a7ea5ec748 Merge #3651
3651: Use the writemap flag to reduce the memory usage r=irevoire a=Kerollmops

This draft PR is showing some stats about the memory usage of Meilisearch when [the LMDB `MDB_WRITEMAP` flag](3947014aed/libraries/liblmdb/lmdb.h (L573-L581)) is enabled and when it is not. As you can see there is a reduction of about 50% of the memory usage pick. The dataset used was [the Wikipedia one](https://www.notion.so/meilisearch/Wikipedia-8b1486e4b17547c5bda485d2d97767a0) with the first 30 000 first CSV documents without settings. This PR depends on https://github.com/meilisearch/heed/pull/168.

I just [opened a discussion](https://github.com/meilisearch/product/discussions/652) for people to understand the tradeoffs and give their feedback.

- [x] Create an experiment flag `--experimental-reduce-indexing-memory-usage`.
- [x] Add it to the config file.
- [x] Explain the tradeoff and copy/link the LMDB documentation in the help message.
- [x] Add analytics about the experimental flag.
- [x] Document that this flag cannot be used on Windows, ~~or hide it~~.

<details>
  <summary>The command I used to run the tests</summary>

#### Sign the binary to be able to use Instruments / xcrun
```sh
codesign -s - -f --entitlements ~/ent.plist target/release/meilisearch
```

where `ent.plist` contains:
```xml
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
<plist version="1.0">
    <dict>
        <key>com.apple.security.get-task-allow</key>
        <true/>
    </dict>
</plist>
```

#### Run Meilisearch in measure-mode
```sh
xcrun xctrace record --template 'Allocations' --launch -- target/release/meilisearch --max-indexing-memory 0MiB
```

#### Send the wiki dataset available on notion.so / Public
```sh
for f in 0.csv 15000.csv; do echo sending $f; xh 'localhost:7700/indexes/wiki/documents' 'content-type:text/csv' `@$f;` done
```

#### Wait for the task to finish
```sh
watch --color xh --pretty all 'localhost:7700/tasks?statuses=processing'
```
</details>

Keep in mind that I tested that with the Instruments Apple tools on an iMac 5k 2019. More benchmarks must be done, especially on the indexation speed, as the flag is told to slow down writing into databases bigger that the amount of memory.

On the left Meilisearch is running without the flag. On the right, it is running with the flag.

<p align="center">
<img align="left" width="45%" alt="Instrument showing the memory usage of Meilisearch without the MDB_WRITEMAP flag" src="https://user-images.githubusercontent.com/3610253/234299524-7607f1df-6fc1-45d3-bd3d-4f9388002857.png">
<img align="right" width="45%" alt="Instrument showing the memory usage of Meilisearch with the MDB_WRITEMAP flag" src="https://user-images.githubusercontent.com/3610253/234299534-6cc3ae58-8bd9-426c-aa79-4c78f9e88b94.png">
</p>

Co-authored-by: Kerollmops <clement@meilisearch.com>
Co-authored-by: Clément Renault <clement@meilisearch.com>
2023-05-15 14:10:07 +00:00
dc7ba77e57 Add the option in the config file 2023-05-15 16:07:43 +02:00
13f870e993 Fix typos and documentation issues 2023-05-15 15:11:45 +02:00
1a79fd0c3c Use the new heed v0.12.6 2023-05-15 11:42:30 +02:00
f759ec7fad Expose a flag to enable the MDB_WRITEMAP flag 2023-05-15 11:38:43 +02:00
4d691d071a Change double-quotes by back-ticks in sort error message 2023-05-15 11:10:36 +02:00
23d1c86825 Re-introduce the sort error message fix 2023-05-15 11:07:23 +02:00
c4a40e7110 Use the writemap flag to reduce the memory usage 2023-05-15 10:15:33 +02:00
e68d86d6b6 tests: add unit test for PayloadTooLarge 2023-05-11 20:51:10 +02:00
e01980c6f4 Merge #3739
3739: fix: update `payload_too_large` error message to include human readable maximum acceptable payload size r=Kerollmops a=cymruu


# Pull Request

## Related issue
Fixes #3736 

## What does this PR do?
- update `payload_too_large` error message as requested in ticket

## PR checklist
Please check if your PR fulfills the following requirements:
- [x] Does this PR fix an existing issue, or have you listed the changes applied in the PR description (and why they are needed)?
- [x] Have you read the contributing guidelines?
- [x] Have you made sure that the title is accurate and descriptive of the changes?

Thank you so much for contributing to Meilisearch!


Co-authored-by: Filip Bachul <filipbachul@gmail.com>
2023-05-11 09:37:19 +00:00
25209a3590 introduce remaining field in Payload 2023-05-10 20:55:18 +02:00
3064ea6495 fix: update payload_too_large error message to include human readable maximum acceptable payload size 2023-05-10 18:16:59 +02:00
46ec8a97e9 rename the analytics according to the spec 2023-05-10 14:28:30 +02:00
c42a65a297 Update meilisearch/src/analytics/segment_analytics.rs
Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2023-05-10 14:28:30 +02:00
d08f8690d2 add analytics on the get documents resource 2023-05-10 14:28:30 +02:00
ad5f25d880 Merge #3742
3742: Compute split words derivations of terms that don't accept typos r=ManyTheFish a=loiclec

Allows looking for the split-word derivation for short words in the user's query (like `the -> "t he"` or `door -> do or`) as well as for 3grams.

Co-authored-by: Loïc Lecrenier <loic.lecrenier@me.com>
2023-05-10 12:12:52 +00:00
4d352a21ac Compute split words derivations of terms that don't accept typos 2023-05-10 13:31:19 +02:00
918ce1dd67 Merge #3731
3731: Move comments above keys in config.toml r=curquiza a=jirutka

The current style is very unusual, confusing and breaks compatibility with tools for parsing config files including comments. Everyone writes comments above the items to which they refer (maybe except pythonists), so let's stick to that.


Co-authored-by: Jakub Jirutka <jakub@jirutka.cz>
2023-05-09 09:24:36 +00:00
4a4210c116 Merge #3734
3734: Update version for the next release (v1.2.0) in Cargo.toml r=curquiza a=meili-bot

⚠️ This PR is automatically generated. Check the new version is the expected one and Cargo.lock has been updated before merging.

Co-authored-by: curquiza <curquiza@users.noreply.github.com>
2023-05-09 07:35:48 +00:00
3533d4f2bb Update version for the next release (v1.2.0) in Cargo.toml 2023-05-08 17:52:33 +00:00
3625389057 Highlight ngram matches as well 2023-05-08 15:35:41 +02:00
eace6df91b Merge #3726
3726: Fix prefix highlighting r=loiclec a=ManyTheFish

The prefix queries were not properly highlighted, this PR now highlights only the start of a word when it matched with a prefix

Co-authored-by: ManyTheFish <many@meilisearch.com>
Co-authored-by: Loïc Lecrenier <loic.lecrenier@me.com>
2023-05-08 07:46:46 +00:00
83ab8cf4e5 Remove dbg!(..) expression in highlighter tests 2023-05-08 09:45:23 +02:00
8095f21999 Move comments above keys in config.toml
The current style is very unusual, confusing and breaks compatibility
with tools for parsing config files including comments. Everyone writes
comments above the items to which they refer (maybe except pythonists),
so let's stick to that.
2023-05-06 18:10:54 +02:00
cd2573fcc3 Fix prefix highlighting 2023-05-04 16:53:50 +02:00
9f7981df28 Merge #3687
3687: Allow to disable specialized tokenizations (again) r=Kerollmops a=jirutka

In PR #2773, I added the `chinese`, `hebrew`, `japanese` and `thai` feature flags to allow melisearch to be built without huge specialed tokenizations that took up 90% of the melisearch binary size. Unfortunately, due to some recent changes, this doesn't work anymore. The problem lies in excessive use of the `default` feature flag, which infects the dependency graph.

Instead of adding `default-features = false` here and there, it's easier and more future-proof to not declare `default` in `milli` and `meilisearch-types`. I've renamed it to `all-tokenizers`, which also makes it a bit clearer what it's about.


Co-authored-by: Jakub Jirutka <jakub@jirutka.cz>
2023-05-04 14:48:01 +00:00
e615fa5ec6 Fix unused_imports warning in milli when japanese is not enabled 2023-05-04 15:46:11 +02:00
13f1277637 Allow to disable specialized tokenizations (again)
In PR #2773, I added the `chinese`, `hebrew`, `japanese` and `thai`
feature flags to allow melisearch to be built without huge specialed
tokenizations that took up 90% of the melisearch binary size.
Unfortunately, due to some recent changes, this doesn't work anymore.
The problem lies in excessive use of the `default` feature flag, which
infects the dependency graph.

Instead of adding `default-features = false` here and there, it's easier
and more future-proof to not declare `default` in `milli` and
`meilisearch-types`. I've renamed it to `all-tokenizers`, which also
makes it a bit clearer what it's about.
2023-05-04 15:45:40 +02:00
4919774f2e Merge #3570
3570: Get documents by filter r=irevoire a=dureuill

# Pull Request

## Related issue

Associated spec: https://github.com/meilisearch/specifications/pull/234

None really, this is more of an extension of #3477: since after this issue we'll be able to delete documents by filter, it makes sense to also be able to get documents by filter. 

## What does this PR do?

### User standpoint

- Add a new `filter` URL parameter to `GET /indexes/{:indexUid}/documents` and a new `POST /indexes/{:indexUid}/documents/fetch` route with the same `offset, limit, fields, filter` 

### Implementation standpoint

-  Add a new `Index::iter_documents` method to iterate on a set of documents rather than return a vector of these documents.
- Rewrite the other `Index::*documents` methods to use the new `Index::iter_documents` method.

## Usage

<details>
<summary>
Sample request and response
</summary>

```
curl -X POST 'http://localhost:7700/indexes/index-1101/documents/fetch' -H 'Content-Type: application/json' --data-binary '{ "filter": "genres = Comedy", "limit": 3, "offset": 8000}' | jsonxf
```

```json
{
  "results": [
    {
      "id": 326126,
      "title": "Bad Exorcists",
      "overview": "A trio of awkward teens intend to win a horror festival by making their own movie, but wind up getting their actress possessed in the process.",
      "genres": [
        "Horror",
        "Comedy"
      ],
      "poster": "https://image.tmdb.org/t/p/w500/lwd65kPbjFacAw3QSXiwSsW6cFU.jpg",
      "release_date": 1425081600
    },
    {
      "id": 326215,
      "title": "Ooops! Noah is Gone...",
      "overview": "It's the end of the world. A flood is coming. Luckily for Dave and his son Finny, a couple of clumsy Nestrians, an Ark has been built to save all animals. But as it turns out, Nestrians aren't allowed. Sneaking on board with the involuntary help of Hazel and her daughter Leah, two Grymps, they think they're safe. Until the curious kids fall off the Ark. Now Finny and Leah struggle to survive the flood and hungry predators and attempt to reach the top of a mountain, while Dave and Hazel must put aside their differences, turn the Ark around and save their kids. It's definitely not going to be smooth sailing.",
      "genres": [
        "Animation",
        "Adventure",
        "Comedy",
        "Family"
      ],
      "poster": "https://image.tmdb.org/t/p/w500/gEJXHgpiKh89Vwjc4XUY5CIgUdB.jpg",
      "release_date": 1427328000
    },
    {
      "id": 326241,
      "title": "For Here or to Go?",
      "overview": "An aspiring Indian tech entrepreneur in the Silicon Valley finds himself unexpectedly battling the bizarre American immigration system to keep his dream alive or prepare to return home forever.",
      "genres": [
        "Drama",
        "Comedy"
      ],
      "poster": "https://image.tmdb.org/t/p/w500/ff8WaA7ItBgl36kdT232i0d0Fnq.jpg",
      "release_date": 1490918400
    }
  ],
  "offset": 8000,
  "limit": 3,
  "total": 9331
}
```

<img width="1348" alt="Capture d’écran 2023-03-08 à 10 09 04" src="https://user-images.githubusercontent.com/41078892/223670905-6932b79b-f9b8-4a41-b59e-be2171705b7d.png">



</details>

# Draft status

- [ ] Route naming: having one route be `GET /indexes/{:indexUid}/documents` and the other `POST /indexes/{:indexUid}/documents/fetch` is suboptimal (also, technically a breaking change for documents with `fetch` as uid?), but `POST /indexes/{:indexUid}/documents` is already used to insert documents.

Co-authored-by: Louis Dureuil <louis@meilisearch.com>
Co-authored-by: Tamo <tamo@meilisearch.com>
2023-05-04 12:54:26 +00:00
a3da680ce6 Update meilisearch/tests/documents/errors.rs
Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2023-05-04 14:51:17 +02:00
11e394dba1 merge the document fetch and get error codes 2023-05-04 15:39:49 +02:00
469d2f2a9c fix the fields field of the POST fetch document API 2023-05-04 15:34:09 +02:00
ce6507d20c improve the test of the get document by filter 2023-05-04 15:34:09 +02:00
b92da5d15a add a big test on the get document by filter of the get route 2023-05-04 15:34:09 +02:00
ed3dfbe729 add error codes and tests 2023-05-04 15:34:08 +02:00
441641397b Implement document get with filters 2023-05-04 15:32:34 +02:00
a35d3fc708 Add Index::iter_documents 2023-05-04 15:31:54 +02:00
745c1a2668 Make parse_filter pub 2023-05-04 15:31:53 +02:00
a95128df6b Merge #3550
3550: Delete documents by filter r=irevoire a=dureuill

# Prototype `prototype-delete-by-filter-0`

Usage:
A new route is available under `POST /indexes/{index_uid}/documents/delete` that allows you to delete your documents by filter.
The expected payload looks like that:
```json
{
  "filter": "doggo = bernese",
}
```

It'll then enqueue a task in your task queue that'll delete all the documents matching this filter once it's processed.
Here is an example of the associated details;
```json
  "details": {
    "deletedDocuments": 53,
    "originalFilter": "\"doggo = bernese\""
  }
```

----------


# Pull Request

## Related issue
Related to https://github.com/meilisearch/meilisearch/issues/3477

## What does this PR do?

### User standpoint

- Modifies the `/indexes/{:indexUid}/documents/delete-batch` route to accept either the existing array of documents ids, or a JSON object with a `filter` field representing a filter to apply. If that latter variant is used, any document matching the filter will be deleted.

### Implementation standpoint

- (processing time version) Adds a new BatchKind that is not autobatchable and that performs the delete by filter
- Reuse the `documentDeletion` task with a new `originalFilter` detail that replaces the `providedIds` detail.

## Example

<details>
<summary>Sample request, response and task result</summary>

Request:

```
curl \
  -X POST 'http://localhost:7700/indexes/index-10/documents/delete-batch' \
  -H 'Content-Type: application/json' \
  --data-binary '{ "filter" : "mass = 600"}'
```

Response:

```
{
  "taskUid": 3902,
  "indexUid": "index-10",
  "status": "enqueued",
  "type": "documentDeletion",
  "enqueuedAt": "2023-02-28T20:50:31.667502Z"
}
```

Task log:

```json
    {
      "uid": 3906,
      "indexUid": "index-12",
      "status": "succeeded",
      "type": "documentDeletion",
      "canceledBy": null,
      "details": {
        "deletedDocuments": 3,
        "originalFilter": "\"mass = 600\""
      },
      "error": null,
      "duration": "PT0.001819S",
      "enqueuedAt": "2023-03-07T08:57:20.11387Z",
      "startedAt": "2023-03-07T08:57:20.115895Z",
      "finishedAt": "2023-03-07T08:57:20.117714Z"
    }
```

</details>

## Draft status

- [ ] Error handling
- [ ] Analytics
- [ ] Do we want to reuse the `delete-batch` route in this way, or create a new route instead?
- [ ] Should the filter be applied at request time or when the deletion task is processed? 
  - The first commit in this PR applies the filter at request time, meaning that even if a document is modified in a way that no longer matches the filter in a later update, it will be deleted as long as the deletion task is processed after that update. 
  - The other commits in this PR apply the filter only when the asynchronous deletion task is processed, meaning that documents that match the filter at processing time are deleted even if they didn't match the filter at request time.
- [ ] If keeping the filter at request time, find a more elegant way to recover the user document ids from the internal document ids. The current way implemented in the first commit of this PR involves getting all the documents matching the filter, looking for the value of their primary key, and turning it into a string by copy-pasting routines found in milli...
- [ ] Security consideration, if any
- [ ] Fix the tests (but waiting until product questions are resolved)
- [ ] Add delete by filter specific tests



Co-authored-by: Louis Dureuil <louis@meilisearch.com>
Co-authored-by: Tamo <tamo@meilisearch.com>
2023-05-04 10:44:41 +00:00
e0537c3870 Merge #3720
3720: Change links of docs everywhere r=curquiza a=curquiza

Completely fixes #3668 

Co-authored-by: curquiza <clementine@meilisearch.com>
2023-05-04 10:07:41 +00:00
da220294f6 Merge #3639
3639: Add a dedicated error variant for planned failures in index scheduler tests r=Kerollmops a=Sufflope

# Pull Request

## Related issue
Fixes #3086

## What does this PR do?
- Add a dedicated test variant in test cfg to avoid reusing a misleading existing error

## PR checklist
Please check if your PR fulfills the following requirements:
- [x] Does this PR fix an existing issue, or have you listed the changes applied in the PR description (and why they are needed)?
- [x] Have you read the contributing guidelines?
- [x] Have you made sure that the title is accurate and descriptive of the changes?

Thank you so much for contributing to Meilisearch!


Co-authored-by: Jean-Sébastien Bour <jean-sebastien@bour.name>
2023-05-04 09:33:57 +00:00
78e611f282 Merge #3693
3693: Implement the auto deletion of tasks r=dureuill a=irevoire

Fixes https://github.com/meilisearch/meilisearch/issues/3622

This PR should be the definite fix for #3622.

It adds a limit (1M) to the maximum number of tasks the task queue can hold.
Once the task queue reaches this limit (1M of tasks are in the task queue, whatever their status is), meilisearch will schedule a task deletion that tries to delete the oldest 100k tasks.
If meilisearch can't delete 100k tasks because some of them are not yet finished, it will delete as many tasks as possible.

Once the limit is reached, you're still able to register new tasks. The engine will only stop you from adding new tasks once [the other hard limit](https://github.com/meilisearch/meilisearch/pull/3659) of 10GiB of tasks is reached (that's between 5M and 15M of tasks depending on your workflow).

-------

Technically;
- We only try to schedule our task deletion when calling the tick function but before creating a new batch. This means we never enqueue a task we're not going to process ~right away.
- If our task deletion doesn't delete anything, we don't enqueue it and log a warn the user that the engine is not working properly

Co-authored-by: Tamo <tamo@meilisearch.com>
Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2023-05-04 08:30:22 +00:00
d8381eb790 Fix originalFilter 2023-05-04 10:07:59 +02:00
b212aef5db add one nanosecond to generated filter so as to generate a filter that would have matched the last task to delete 2023-05-04 09:56:48 +02:00
6bf66f35be Merge #3721
3721: Use new bors URL of our self hosted bors instance r=curquiza a=curquiza



Co-authored-by: curquiza <clementine@meilisearch.com>
2023-05-04 07:53:39 +00:00
52ab114f6c Fix test on macOS: 50 tasks would result in the test consistently failing on a local macOS 2023-05-04 00:06:49 +02:00
dcbfecf42c make the generated filter valid 2023-05-04 00:06:49 +02:00
9ca6f59546 Update index-scheduler/src/lib.rs
Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2023-05-04 00:06:49 +02:00
aa7537a11e make the autodeletion work with a fixed number of tasks and update the tests 2023-05-04 00:06:49 +02:00
972bb2831c log when meilisearch need to delete tasks 2023-05-04 00:06:49 +02:00
f9ddd32545 implement the auto-deletion of tasks 2023-05-04 00:06:49 +02:00
d5059520aa Fix typo 2023-05-03 22:27:03 +02:00
1c3642c9b2 Fix deletion per filter analytics 2023-05-03 22:26:51 +02:00
d2d2bacaf2 add a test on the complex filter 2023-05-03 20:07:08 +02:00
30edba3497 Update links of the docs 2023-05-03 19:14:57 +02:00
84e7bd9342 Fix test after rebase on filter additions 2023-05-03 17:51:28 +02:00
2b74e4d116 Fix test 2023-05-03 17:41:50 +02:00
b5fe0b2b07 fix the details 2023-05-03 17:41:50 +02:00
0f0cd2d929 handle the array of array form of filter in the dumps 2023-05-03 17:41:50 +02:00
fc8c1d118d fix the analytics 2023-05-03 17:41:50 +02:00
0548ab9038 create and use the error code 2023-05-03 17:41:50 +02:00
143acb9cdc update the tests 2023-05-03 17:41:49 +02:00
4b92f1b269 wip 2023-05-03 17:41:49 +02:00
c12a1cd956 test all the error messages 2023-05-03 17:41:49 +02:00
8af8aa5a33 add a test 2023-05-03 17:41:49 +02:00
6df2ba93a9 remove one useless txn 2023-05-03 17:41:49 +02:00
3680a6bf1e extract impl to a function 2023-05-03 17:41:49 +02:00
732c52093d Processing time without autobatching implementation 2023-05-03 17:41:48 +02:00
05cc463fbc Draft implementation of filter support for /delete-by-batch route 2023-05-03 17:41:48 +02:00
1afde4fea5 Merge #3542
3542: Refactor of the search algorithms r=dureuill a=loiclec

This PR refactors a large part of the search logic (related to https://github.com/meilisearch/meilisearch/issues/3547)

- The "query tree" is replaced by a "query graph", which describes the different ways in which the search query can be interpreted and precomputes the word derivations for each query term. Example:

<img width="1162" alt="Screenshot 2023-02-27 at 10 26 50" src="https://user-images.githubusercontent.com/6040237/221525270-87917cc0-60d1-473f-847f-2c5a7de9e370.png">

- The control flow between the ~criterions~ ranking rules is managed in a single place instead of being independently implemented by each ranking rule.

- The set of document candidates is determined greedily from the beginning. It is often referred as the "universe" in the code.

- The ranking rules  `proximity`, `attribute`, `typo`, and (maybe) `exactness` are or will be implemented using a K-shortest path graph algorithm. This minimises the number of database and bitmap operations we need to do to compute each ranking rule bucket. It also simplifies the code a lot since a lot of ranking rules will share a large part of their implementation.

- Pointers to database values are stored in a cache to avoid searching in the LMDB databases needlessly.

- The result of some roaring bitmap operations are also stored in a cache, although we'll need to measure the memory pressure this puts on the system and maybe deactivate this cache later on.

- Search requests can be visually logged and debugged in tests.

TODO:
- [ ] Reintroduce search benchmarks
- [x] Implement `disableOnWords` and `disableOnAttributes` settings of typo tolerance
- [x] Implement "exhaustive number of hits
- [x] Implement `attribute` ranking rule
   - [x] Indexing changes: split into `word_fid_docids` and `word_position_docids` (with bucketed position)
   - [x] Ranking rule implementations
- [ ] Implement `exactness` ranking rule
  - [x] Initial implementation
  - [ ] Correct implementation when followed by `Words`
- [ ] Implement `geosort` ranking rule
- [ ] Add tests
   - [x] Typo tolerance `disableOnWords`/`disableOnAttributes`
   - [ ] Geosort
   - [x] Exactness
   - [ ] Attribute/Position
   - [ ] Interactions between ranking rules:
     - [x] Typo/Proximity/Attribute not preceded by Words
     - [x] Exactness not preceded by Words
     - [x] Exactness -> Words (+ check universe correctness)
     - [x] Exactness -> Typo, etc.
     - [ ] Sort -> Words (performance tests)
     - [ ] Attribute/Position -> Typo
     - [ ] Attribute/Position -> Proximity
     - [x] Typo -> Exactness 
     - [x] Typo -> Proximity
     - [x] Proximity -> Typo
   - [x] Words 
   - [x] Typo
   - [x] Proximity
   - [x] Sort
   - [x] Ngrams
   - [x] Split words
   - [x] Ngram + Split Words
   - [x] Term matching strategy
   - [x] Distinct attribute
   - [x] Phrase Search
   - [x] Placeholder search
   - [x] Highlighter 
- [x] Limit the number of word derivations in a search query
- [x] Compute the initial universe correctly according to the terms matching strategy
- [x] Implement placeholder search
- [x] Get the list of ranking rules from the settings 
- [x] Implement `distinct`
- [x] Determine what to do when one of `attribute`, `proximity`, `typo`, or `exactness` is placed before `words`
- [x] Make sure the correct number of allowed typos is used for each word, including the prefix one
- [x] Make sure stop words are treated correctly (e.g. correct position in query graph), including in phrases
- [x] Support phrases correctly
- [x] Support synonyms
- [x] Support split words
- [x] Support combination of ngram + split-words (e.g. `whiteh orse` -> `"white horse"`)
- [x] Implement `typo` ranking rule
- [x] Implement `sort` ranking rule
- [x] Use existing `Search` interface to use the new search algorithms
- [x] Remove old code


Co-authored-by: Loïc Lecrenier <loic.lecrenier@me.com>
2023-05-03 13:42:51 +00:00
f8f190cd40 Update exactness tests following charabia camelCase tokenization 2023-05-03 14:45:09 +02:00
3a408e8287 Increase map size for tests following charabia camelCase tokenization 2023-05-03 14:44:48 +02:00
d3e5b10e23 fix nb of dbs 2023-05-03 14:11:20 +02:00
1aaf24ccbf Cargo fmt 2023-05-03 12:21:58 +02:00
90bc230820 Merge remote-tracking branch 'origin/main' into search-refactor
Conflicts | resolution
----------|-----------
Cargo.lock | added mimalloc
Cargo.toml |  took origin/main version
milli/src/search/criteria/exactness.rs | deleted after checking it was only clippy changes
milli/src/search/query_tree.rs | deleted after checking it was only clippy changes
2023-05-03 12:19:06 +02:00
342c4ff85d geosort: Remove rtree unwrap 2023-05-03 09:52:16 +02:00
c85392ce40 make the descendent geosort fast 2023-05-03 09:13:12 +02:00
8875d24a48 deserialize the rtree only when its needed, and keep it in memory once it has been deserialized 2023-05-03 09:13:12 +02:00
c470b67fa2 revamp the test to use execute_iterative_and_rtree_returns_the_same 2023-05-03 09:13:12 +02:00
c0e081cd98 Merge #3702 #3710
3702: Update charabia v0.7.2 r=curquiza a=ManyTheFish

fixes #3701
fixes #3689
fixes #3285 

3710: Updated messages pointing to the docs website r=curquiza a=roy9495

# Pull Request

Fixes partially #3668

## What does this PR do?
- ...Any messages referencing this docs site https://docs.meilisearch.com has been changed to this docs site https://meilisearch.com/docs .
 Thanks.

## PR checklist
Please check if your PR fulfills the following requirements:
- [x] Does this PR fix an existing issue, or have you listed the changes applied in the PR description (and why they are needed)?
- [x] Have you read the contributing guidelines?
- [x] Have you made sure that the title is accurate and descriptive of the changes?

Thank you so much for contributing to Meilisearch!


Co-authored-by: ManyTheFish <many@meilisearch.com>
Co-authored-by: TATHAGATA ROY <98920199+roy9495@users.noreply.github.com>
2023-05-02 17:27:57 +00:00
b60840ebff Remove self.iterating from words 2023-05-02 18:54:23 +02:00
fdc1763838 Use MultiOps for resolve_query_graph 2023-05-02 18:54:09 +02:00
75819bc940 Remove too many arguments on resolve_maximally_reduced_query_graph 2023-05-02 18:53:40 +02:00
7b8cc25625 rename located_query_terms_from_string -> located_query_terms_from_tokens 2023-05-02 18:53:01 +02:00
2be641f373 Merge #3718
3718: Fix broken README links r=curquiza a=Kerollmops

This PR fixes #3708 by changing the link to the new SDKs and API Reference pages. I would like to thank `@Tommy-42,` who also found the issue.

Co-authored-by: Clément Renault <clement@meilisearch.com>
2023-05-02 16:23:38 +00:00
ddcb661c19 Use new bors URL of our self hosted instance 2023-05-02 18:20:12 +02:00
d09b771bce Add a dedicated error variant for planned failures in index scheduler tests
Fixes #3086
2023-05-02 14:37:20 +02:00
d89d2efb7e Change a the text of a link 2023-05-02 13:53:36 +02:00
f284a9c0dd Fix the README.md broken links 2023-05-02 13:51:50 +02:00
134e7fc433 Merge #3709
3709: Add SDKs test in a CI r=Kerollmops a=curquiza

Add a CI running every week to run the `nightly` docker image of Meilisearch with the most "strategic" SDKs (most used, well tested, strongly typed SDK)
- meilisearch-js
- instant-meilisearch
- meilisearch-php
- meilisearch-python
- meilisearch-go
- meilisearch-ruby
- meilisearch-rust

Co-authored-by: Clémentine Urquizar <clementine@meilisearch.com>
2023-05-02 11:22:09 +00:00
0cba919228 Add SDKs test in a CI 2023-05-02 11:53:28 +02:00
aa63091752 Fix bug in exact_attribute 2023-05-02 10:48:32 +02:00
58735d6d8f Fix outdated relevancy test 2023-05-02 10:48:32 +02:00
1b514517f5 Fix bug in computation of query term at a position 2023-05-02 10:48:32 +02:00
11f814821d Minor cleanup 2023-05-02 10:48:32 +02:00
30fb1153cc Speed up graph based ranking rule when a lot of different costs exist 2023-05-02 09:59:42 +02:00
3b2c8b9f25 Improve performance of position rr 2023-05-02 09:59:42 +02:00
2a7f9adf78 Build query graph more correctly from paths
Update snapshots
2023-05-02 09:59:42 +02:00
608ceea440 Fix bug in position rr 2023-05-02 09:59:42 +02:00
79001b9c97 Improve performance of the cheapest path finder algorithm 2023-05-02 09:59:42 +02:00
59b12fca87 Fix errors, clippy warnings, and add review comments 2023-04-29 11:48:11 +02:00
48f5bb1693 Implements the geo-sort ranking rule 2023-04-29 11:02:16 +02:00
93188b3c88 Fix indexing of word_prefix_fid_docids 2023-04-29 10:56:48 +02:00
bc4efca611 Add more tests for the attribute ranking rule 2023-04-29 10:56:48 +02:00
feaf25a95d Updated messages pointing to the docs website 2023-04-28 20:52:03 +00:00
414b3fae89 Merge #3571
3571: Introduce two filters to select documents with `null` and empty fields r=irevoire a=Kerollmops

# Pull Request

## Related issue
This PR implements the `X IS NULL`, `X IS NOT NULL`, `X IS EMPTY`, `X IS NOT EMPTY` filters that [this comment](https://github.com/meilisearch/product/discussions/539#discussioncomment-5115884) is describing in a very detailed manner.

## What does this PR do?

### `IS NULL` and `IS NOT NULL`

This PR will be exposed as a prototype for now. Below is the copy/pasted version of a spec that defines this filter.

- `IS NULL` matches fields that `EXISTS` AND `= IS NULL`
- `IS NOT NULL` matches fields that `NOT EXISTS` OR `!= IS NULL`

1. `{"name": "A", "price": null}`
2. `{"name": "A", "price": 10}`
3. `{"name": "A"}`

`price IS NULL` would match 1
`price IS NOT NULL` or `NOT price IS NULL` would match 2,3
`price EXISTS` would match 1, 2
`price NOT EXISTS` or `NOT price EXISTS` would match 3

common query : `(price EXISTS) AND (price IS NOT NULL)` would match 2

### `IS EMPTY` and `IS NOT EMPTY`

- `IS EMPTY` matches Array `[]`, Object `{}`, or String `""` fields that `EXISTS` and are empty
- `IS NOT EMPTY` matches fields that `NOT EXISTS` OR are not empty.

1. `{"name": "A", "tags": null}`
2. `{"name": "A", "tags": [null]}`
3. `{"name": "A", "tags": []}`
4. `{"name": "A", "tags": ["hello","world"]}`
5. `{"name": "A", "tags": [""]}`
6. `{"name": "A"}`
7. `{"name": "A", "tags": {}}`
8. `{"name": "A", "tags": {"t1":"v1"}}`
9. `{"name": "A", "tags": {"t1":""}}`
10. `{"name": "A", "tags": ""}`

`tags IS EMPTY` would match 3,7,10
`tags IS NOT EMPTY` or `NOT tags IS EMPTY` would match 1,2,4,5,6,8,9
`tags IS NULL` would match 1
`tags IS NOT NULL` or `NOT tags IS NULL` would match 2,3,4,5,6,7,8,9,10
`tags EXISTS` would match 1,2,3,4,5,7,8,9,10
`tags NOT EXISTS` or `NOT tags EXISTS` would match 6

common query : `(tags EXISTS) AND (tags IS NOT NULL) AND (tags IS NOT EMPTY)` would match 2,4,5,8,9

## What should the reviewer do?

- Check that I tested the filters
- Check that I deleted the ids of the documents when deleting documents


Co-authored-by: Clément Renault <clement@meilisearch.com>
Co-authored-by: Kerollmops <clement@meilisearch.com>
2023-04-27 13:14:00 +00:00
899baa0ea5 Update forgotten snapshot from previous commit 2023-04-27 13:43:04 +02:00
374095d42c Add tests for stop words and fix a couple of bugs 2023-04-27 13:30:09 +02:00
dd007dceca Merge pull request #3703 from meilisearch/search-refactor-test-typo-tolerance
Search refactor test typo tolerance + some bugfixes
2023-04-27 11:01:35 +02:00
3ae587205c Merge #3464
3464: Remove CLI changes for clippy r=curquiza a=dureuill

# Pull Request

## Related issue

Reverts #3434, which was linked to https://github.com/rust-lang/rust-clippy/issues/10087, as putting the lint in the pedantic group [is being uplifted to Rust 1.67.1](https://github.com/rust-lang/rust/pull/107743#issue-1573438821) (my thanks to everyone involved in this work 🎉).

## Motivation

- Using "standard issue" clippy in the CI spares our contributors and us from knowing/remembering to add the lint when running clippy locally
- In particular, spares us from configuring tools like rust-analyzer to take the lint into account.
- Should this lint come back in another form in the future, we won't blindly ignore it, and we will be able to reassess it, which will be good wrt writing idiomatic Rust. By the time this occurs, lints might be configurable through `clippy.toml` too, which would make disabling one globally much more convenient if needs be.

## Note

We should wait for the release of Rust 1.67.1 and its propagation to our CI before merging this. The PR won't pass CI before this.


Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2023-04-26 17:36:56 +00:00
1bf2694604 Update cargo lock 2023-04-26 17:41:29 +02:00
ed9cc1af55 Remove CLI changes for clippy 2023-04-26 17:04:09 +02:00
b41a6cbd7a Check sort criteria also in placeholder search 2023-04-26 16:28:17 +02:00
c8af572697 Add tests for exact words and exact attributes 2023-04-26 16:13:01 +02:00
249053e514 Update feature flags 2023-04-26 14:59:25 +02:00
ff2cf2a5ae Update charabia in milli 2023-04-26 14:56:54 +02:00
b448aca49c Add more tests for exactness rr 2023-04-26 11:04:18 +02:00
55bad07c16 Fix bug in exact_attribute rr implementation 2023-04-26 10:40:05 +02:00
380469665f Merge #3696
3696: Remove the unused snapshot files r=dureuill a=irevoire

While « reverting by hand » the PR about the auto batching of the addition&deletion of documents, we forgot to remove the associated snapshot files.

Here is the command I used to generate this PR: `cargo insta test --delete-unreferenced-snapshots`

Co-authored-by: Tamo <tamo@meilisearch.com>
2023-04-26 07:18:29 +00:00
3421125a55 Prevent the exactness ranking rule from removing random words
Make it strictly follow the term matching strategy
2023-04-26 09:09:19 +02:00
0b2200e6e7 remove the unused snapshot files 2023-04-25 17:55:27 +02:00
0fd5ab9fcc Merge #3695
3695: Update clippy toolchain from v1.67 to v1.69 r=Kerollmops a=curquiza



Co-authored-by: curquiza <clementine@meilisearch.com>
Co-authored-by: Clément Renault <clement@meilisearch.com>
2023-04-25 15:45:18 +00:00
14293f6c8f Make rustfmt happy 2023-04-25 16:55:39 +02:00
d3a94e8b25 Fix bugs and add tests to exactness ranking rule 2023-04-25 16:49:08 +02:00
1944077a7f Merge #3566
3566: Improve docker cache r=curquiza a=inductor

# Pull Request

## Related issue
Fixes #<issue_number>

## What does this PR do?

- Use `--mount=type=cache` and GHA build cache for faster build
- `=> => transferring context: 75.37MB` to `=> => transferring context: 19.21MB` with `.dockerignore`

## PR checklist
Please check if your PR fulfills the following requirements:
- [ ] Does this PR fix an existing issue, or have you listed the changes applied in the PR description (and why they are needed)?
- [x] Have you read the contributing guidelines?
- [x] Have you made sure that the title is accurate and descriptive of the changes?

Thank you so much for contributing to Meilisearch!


Co-authored-by: inductor <kela@inductor.me>
Co-authored-by: Clémentine Urquizar - curqui <clementine@meilisearch.com>
2023-04-25 14:49:08 +00:00
8195d366fa Update .dockerignore 2023-04-25 16:48:25 +02:00
cfd1b2cc97 Fix the clippy warnings 2023-04-25 16:40:32 +02:00
19b044b4e6 Merge #3694
3694: Remove Uffizzi because not used by the team r=Kerollmops a=curquiza

After discussion with the team, we don't really use Uffizzi and even had issues with it recently: the preview build failing randomly leading to unwanted GitHub notifications + issue to reach the container

<img width="628" alt="Capture d’écran 2023-04-25 à 11 55 39" src="https://user-images.githubusercontent.com/20380692/234298586-ef9cff85-ded8-4ec5-ba13-bb7a24d476b3.png">

Thanks for the involvement of Uffizzi team anyway, the tool is just not adapted to our team 😊 




Co-authored-by: curquiza <clementine@meilisearch.com>
2023-04-25 14:14:16 +00:00
e0730b55b3 Update clippy toolchain from v1.67 to v1.69 2023-04-25 16:05:28 +02:00
729fa3770d Remove Uffizzi because not used by the team 2023-04-25 15:50:38 +02:00
9cbc85b2f9 Merge #3661
3661: Bump the dependencies r=Kerollmops a=Kerollmops

This PR bumps all the dependencies of Meilisearch and the sub-crates. I first did a `cargo upgrade --compatible`, fixed the tests, continued with a `cargo upgrade --incompatible`, and finally fixed the compilation issues.

I wasn't able to bump _rustls_ to _0.21.0_ (_actix-web_ is using _tokio-tls 0.23.4_, which uses the _0.20.8_) and neither _vergen_, which changed everything without any guide, I didn't find a way to declare that with the version 8.1.1.

bc25f378e8/meilisearch/build.rs (L4-L9)

Fixes #3285

Co-authored-by: Kerollmops <clement@meilisearch.com>
2023-04-25 08:23:01 +00:00
a3cf104736 Fix the compilation 2023-04-24 17:50:58 +02:00
a109802d45 Upgrade the incompatible versions of the dependencies 2023-04-24 17:50:57 +02:00
2d8060df80 Fix the tests 2023-04-24 17:50:57 +02:00
47b66e49b8 Upgrade the compatible versions of the dependencies 2023-04-24 17:50:52 +02:00
8f2e971879 Add tests for "exactness" rr, make correct universe computation 2023-04-24 16:57:34 +02:00
654a3a9e19 Merge #3688
3688: Following release v1.1.1: bring back changes into `main` r=curquiza a=curquiza

`@meilisearch/engine-team` ensure the changes we bring to `main` are the ones you want

Co-authored-by: Louis Dureuil <louis@meilisearch.com>
Co-authored-by: bors[bot] <26634292+bors[bot]@users.noreply.github.com>
Co-authored-by: Tamo <tamo@meilisearch.com>
Co-authored-by: dureuill <dureuill@users.noreply.github.com>
2023-04-24 11:38:23 +00:00
d1fdbb63da Make all search tests pass, fix distinctAttribute bug 2023-04-24 12:12:08 +02:00
fb9d9239b2 Merge #3674
3674: Bump h2 from 0.3.15 to 0.3.17 r=Kerollmops a=dependabot[bot]

Bumps [h2](https://github.com/hyperium/h2) from 0.3.15 to 0.3.17.
<details>
<summary>Release notes</summary>
<p><em>Sourced from <a href="https://github.com/hyperium/h2/releases">h2's releases</a>.</em></p>
<blockquote>
<h2>v0.3.17</h2>
<h2>What's Changed</h2>
<ul>
<li>Add <code>Error::is_library()</code> method to check if the originated inside <code>h2</code>.</li>
<li>Add <code>max_pending_accept_reset_streams(usize)</code> option to client and server
builders.</li>
<li>Fix theoretical memory growth when receiving too many HEADERS and then
RST_STREAM frames faster than an application can accept them off the queue.
(CVE-2023-26964)</li>
</ul>
<h2>v0.3.16</h2>
<h2>What's Changed</h2>
<ul>
<li>Set <code>Protocol</code> extension on requests when received Extended CONNECT requests.</li>
<li>Remove <code>B: Unpin + 'static</code> bound requiremented of bufs</li>
<li>Fix releasing of frames when stream is finished, reducing memory usage.</li>
<li>Fix panic when trying to send data and connection window is available, but stream window is not.</li>
<li>Fix spurious wakeups when stream capacity is not available.</li>
</ul>
<h2>New Contributors</h2>
<ul>
<li><a href="https://github.com/vi"><code>`@​vi</code></a>` made their first contribution in <a href="https://redirect.github.com/hyperium/h2/pull/646">hyperium/h2#646</a></li>
<li><a href="https://github.com/silence-coding"><code>`@​silence-coding</code></a>` made their first contribution in <a href="https://redirect.github.com/hyperium/h2/pull/651">hyperium/h2#651</a></li>
<li><a href="https://github.com/gtsiam"><code>`@​gtsiam</code></a>` made their first contribution in <a href="https://redirect.github.com/hyperium/h2/pull/649">hyperium/h2#649</a></li>
<li><a href="https://github.com/howardjohn"><code>`@​howardjohn</code></a>` made their first contribution in <a href="https://redirect.github.com/hyperium/h2/pull/658">hyperium/h2#658</a></li>
<li><a href="https://github.com/cloneable"><code>`@​cloneable</code></a>` made their first contribution in <a href="https://redirect.github.com/hyperium/h2/pull/655">hyperium/h2#655</a></li>
<li><a href="https://github.com/aftersnow"><code>`@​aftersnow</code></a>` made their first contribution in <a href="https://redirect.github.com/hyperium/h2/pull/657">hyperium/h2#657</a></li>
<li><a href="https://github.com/vadim-eg"><code>`@​vadim-eg</code></a>` made their first contribution in <a href="https://redirect.github.com/hyperium/h2/pull/661">hyperium/h2#661</a></li>
</ul>
</blockquote>
</details>
<details>
<summary>Changelog</summary>
<p><em>Sourced from <a href="https://github.com/hyperium/h2/blob/master/CHANGELOG.md">h2's changelog</a>.</em></p>
<blockquote>
<h1>0.3.17 (April 13, 2023)</h1>
<ul>
<li>Add <code>Error::is_library()</code> method to check if the originated inside <code>h2</code>.</li>
<li>Add <code>max_pending_accept_reset_streams(usize)</code> option to client and server
builders.</li>
<li>Fix theoretical memory growth when receiving too many HEADERS and then
RST_STREAM frames faster than an application can accept them off the queue.
(CVE-2023-26964)</li>
</ul>
<h1>0.3.16 (February 27, 2023)</h1>
<ul>
<li>Set <code>Protocol</code> extension on requests when received Extended CONNECT requests.</li>
<li>Remove <code>B: Unpin + 'static</code> bound requiremented of bufs</li>
<li>Fix releasing of frames when stream is finished, reducing memory usage.</li>
<li>Fix panic when trying to send data and connection window is available, but stream window is not.</li>
<li>Fix spurious wakeups when stream capacity is not available.</li>
</ul>
</blockquote>
</details>
<details>
<summary>Commits</summary>
<ul>
<li><a href="af4bcacf6d"><code>af4bcac</code></a> v0.3.17</li>
<li><a href="d3f37e9fba"><code>d3f37e9</code></a> feat: add <code>max_pending_accept_reset_streams(n)</code> options</li>
<li><a href="5bc8e72e5f"><code>5bc8e72</code></a> fix: limit the amount of pending-accept reset streams</li>
<li><a href="8088ca658d"><code>8088ca6</code></a> feat: add Error::is_library method</li>
<li><a href="481c31d528"><code>481c31d</code></a> chore: Use Cargo metadata for the MSRV build job</li>
<li><a href="d3d50ef812"><code>d3d50ef</code></a> chore: Replace unmaintained/outdated GitHub Actions</li>
<li><a href="45b9bccdfc"><code>45b9bcc</code></a> chore: set rust-version in Cargo.toml (<a href="https://redirect.github.com/hyperium/h2/issues/664">#664</a>)</li>
<li><a href="b9dcd39915"><code>b9dcd39</code></a> v0.3.16</li>
<li><a href="96caf4fca3"><code>96caf4f</code></a> Add a message for EOF-related broken pipe errors (<a href="https://redirect.github.com/hyperium/h2/issues/615">#615</a>)</li>
<li><a href="732319039f"><code>7323190</code></a> Avoid spurious wakeups when stream capacity is not available (<a href="https://redirect.github.com/hyperium/h2/issues/661">#661</a>)</li>
<li>Additional commits viewable in <a href="https://github.com/hyperium/h2/compare/v0.3.15...v0.3.17">compare view</a></li>
</ul>
</details>
<br />


[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=h2&package-manager=cargo&previous-version=0.3.15&new-version=0.3.17)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores)

Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting ``@dependabot` rebase`.

[//]: # (dependabot-automerge-start)
[//]: # (dependabot-automerge-end)

---

<details>
<summary>Dependabot commands and options</summary>
<br />

You can trigger Dependabot actions by commenting on this PR:
- ``@dependabot` rebase` will rebase this PR
- ``@dependabot` recreate` will recreate this PR, overwriting any edits that have been made to it
- ``@dependabot` merge` will merge this PR after your CI passes on it
- ``@dependabot` squash and merge` will squash and merge this PR after your CI passes on it
- ``@dependabot` cancel merge` will cancel a previously requested merge and block automerging
- ``@dependabot` reopen` will reopen this PR if it is closed
- ``@dependabot` close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually
- ``@dependabot` ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself)
- ``@dependabot` ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself)
- ``@dependabot` ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)
You can disable automated security fix PRs for this repo from the [Security Alerts page](https://github.com/meilisearch/meilisearch/network/alerts).

</details>

Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2023-04-24 09:34:35 +00:00
a7a0891210 Update examples 2023-04-24 10:07:49 +02:00
84d9c731f8 Fix bug in encoding of word_position_docids and word_fid_docids 2023-04-24 09:59:30 +02:00
11f4724957 ignore all .git 2023-04-18 16:32:31 +09:00
85182497ab revert mount 2023-04-18 15:15:33 +09:00
3e4a356638 EOF 2023-04-18 15:14:13 +09:00
dfd9c384aa use docker cache 2023-04-18 15:14:13 +09:00
f0b4046c43 Bump h2 from 0.3.15 to 0.3.17
Bumps [h2](https://github.com/hyperium/h2) from 0.3.15 to 0.3.17.
- [Release notes](https://github.com/hyperium/h2/releases)
- [Changelog](https://github.com/hyperium/h2/blob/master/CHANGELOG.md)
- [Commits](https://github.com/hyperium/h2/compare/v0.3.15...v0.3.17)

---
updated-dependencies:
- dependency-name: h2
  dependency-type: indirect
...

Signed-off-by: dependabot[bot] <support@github.com>
2023-04-13 17:03:48 +00:00
4b953d62fb Merge #3673
3673: Handle the task queue being full r=irevoire a=dureuill

# Pull Request

## Related issue
Fixes a remaining issue with #3659 where it was not always possible to send tasks back even after deleting some tasks when prompted.

## Tests

- see integration test
- also manually tested with a 1MiB task queue. Was not possible to become unblocked before this PR, is now possible.

## What does this PR do?
- Use the `non_free_pages_size` method to compute the space occupied by the task db instead of the `real_disk_size` which is not always affected by task deletion.
- Expand the test so that it adds a task after the deletion. The test now fails before this PR and succeeds after this PR.

Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2023-04-13 16:24:16 +00:00
c2f4b6ced0 Test: await for the deletion task to complete before trying to add another task 2023-04-13 18:22:42 +02:00
1e6cbcaf12 Update test comment
Co-authored-by: Tamo <tamo@meilisearch.com>
2023-04-13 17:27:12 +02:00
066c6bd875 test task db full now checks that a task can be successfully added after deleting tasks 2023-04-13 17:20:06 +02:00
fd583501d7 Use non_free_pages_size instead of real_disk_size to check task db space taken 2023-04-13 17:07:44 +02:00
bff4bde0ce Merge #3672
3672: Update version for the next release (v1.1.1) in Cargo.toml r=dureuill a=meili-bot

⚠️ This PR is automatically generated. Check the new version is the expected one and Cargo.lock has been updated before merging.

Co-authored-by: dureuill <dureuill@users.noreply.github.com>
2023-04-13 13:34:29 +00:00
cd45d21d6e Update version for the next release (v1.1.1) in Cargo.toml 2023-04-13 13:25:10 +00:00
f9960be115 Merge #3659
3659: stops receiving tasks once the task queue is full r=Kerollmops a=irevoire

Give 20GiB to the task queue + once 50% of the task queue is used, it blocks itself and only receives task deletion requests to ensure we never get in a state where we can’t do anything.

Also, create a new error message when we reach this case:
```
Meilisearch cannot receive write operations because the size limit of the tasks database has been reached. Please delete tasks to continue performing write operations.
```

Co-authored-by: Tamo <tamo@meilisearch.com>
2023-04-13 09:11:12 +00:00
bd9aba4d77 Add "position" part of the attribute ranking rule 2023-04-13 10:46:09 +02:00
8edad8291b Add logger to attribute rr, fix a bug 2023-04-13 10:25:00 +02:00
b3f60ee805 try to fix the ci 2023-04-13 10:18:58 +02:00
5acf953298 Merge branch 'search-refactor-attribute-ranking-rule' into search-refactor 2023-04-13 08:28:17 +02:00
d9cebff61c Add a simple test to check that attributes are ranking correctly 2023-04-13 08:27:09 +02:00
30f7bd03f6 Fix compiler warning/errors caused by previous merge 2023-04-13 08:27:09 +02:00
df0d9bb878 Introduce the attribute ranking rule in the list of ranking rules 2023-04-13 08:27:09 +02:00
5230ddb3ea Resolve the attribute ranking rule conditions 2023-04-13 08:27:09 +02:00
d6a7c28e4d Implement the attribute ranking rule edge computation 2023-04-13 08:27:09 +02:00
e55efc419e Introduce a new cache for the words fids 2023-04-13 08:27:09 +02:00
644e136aee Merge branch 'search-refactor-typo-attributes' into search-refactor 2023-04-13 08:26:56 +02:00
ec0ecb5515 Merge #3666
3666: Update README to reference new docs website r=curquiza a=guimachiavelli

With the launch of the new website, we need to update the README so it references the correct URLs.

Two minor details:
- we have removed the contact page from the documentation (it had the same links present in this readme and on the community section of the landing page) 
- we have recently separated filtering and faceted search into two separate articles

Co-authored-by: gui machiavelli <hey@guimachiavelli.com>
2023-04-12 17:30:51 +00:00
b4fabce36d update the error message + update the task db size to 20GiB with a limit at 50% 2023-04-12 18:54:11 +02:00
9350a7b017 improve the test and try to understand the issue happening on windows 2023-04-12 18:54:11 +02:00
be69ab320d stops receiving tasks once the task queue is full 2023-04-12 18:54:11 +02:00
d59d75c9cd Merge #3667
3667: Disable autobatching of additions and deletions r=irevoire a=dureuill

# Pull Request

## Related issue
Fixes #3664

## What does this PR do?
- Modifies the autobatcher to not batch document additions and deletions, as a workaround to the DB corruption in #3664 



Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2023-04-12 16:51:13 +00:00
38b7b31beb Decide to use prefix DB if the word is not an ngram 2023-04-12 16:45:38 +02:00
7a01f20df7 Use word_prefix_docids, make get_word_prefix_docids private 2023-04-12 16:45:38 +02:00
c20c38a7fa Add SearchContext::word_prefix_docids() method 2023-04-12 16:44:43 +02:00
5ab46324c4 Everyone uses the SearchContext::word_docids instead of get_db_word_docids
make get_db_word_docids private
2023-04-12 16:44:43 +02:00
325f17488a Add SearchContext::word_docids() method 2023-04-12 16:37:05 +02:00
e7ff987c46 Update call sites 2023-04-12 16:36:38 +02:00
244003e36f Refactor DB cache to return Roaring Bitmaps directly instead of byte slices 2023-04-12 16:35:48 +02:00
1f813a6f3b Simplify implementation of the detailed (=visual) logger 2023-04-12 16:32:53 +02:00
96183e804a Simplify the logger 2023-04-12 16:32:53 +02:00
5cfb066b0a Update README.md 2023-04-12 16:29:20 +02:00
a5f44a5ceb Update references to new docs website
With the launch of the new website, we need to update the README so it references the correct URLs.

Two minor details:
- we have removed the contact page from the documentation (it had the same links present in this readme and on the community section of the landing page) 
- we have recently separated filtering and faceted search into two separate articles
2023-04-12 16:27:04 +02:00
7ab48ed8c7 Matching words fixes 2023-04-12 16:21:43 +02:00
a94e78ffb0 Disable autobatching of additions and deletions 2023-04-12 10:53:00 +02:00
e7bb8c940f Merge branch 'search-refactor-highlighter' into search-refactor-highlighter-merged 2023-04-11 12:22:34 +02:00
8cb85294ef Remove unused import warning 2023-04-07 11:09:30 +02:00
d0e9d65025 Fix distinct attribute bugs 2023-04-07 11:09:01 +02:00
540a396e49 Fix indexing bug in words_prefix_position 2023-04-07 11:08:39 +02:00
a81165f0d8 Merge remote-tracking branch 'origin/main' into search-refactor 2023-04-07 10:15:55 +02:00
d6585eb10b Avoid splitting ngrams into their original component words 2023-04-07 10:13:49 +02:00
f7d90ad19f Merge remote-tracking branch 'origin/search-refactor-tests-doc' into search-refactor 2023-04-07 10:13:18 +02:00
bc25f378e8 Merge #3647
3647: Improve the health route by ensuring lmdb is not down r=irevoire a=irevoire

Fixes #3644

In this PR, I try to make a small read on the `AuthController` and `IndexScheduler` databases.
The idea is not to validate that everything works but just to avoid the bug we had last time when lmdb was stuck forever.

In order to get access to the `AuthController` without going through the extractor, I need to wrap it in the `Data` type from `actix-web`.
And to do that, I had to patch our extractor so it works with the `Data` type as well.

Co-authored-by: Tamo <tamo@meilisearch.com>
2023-04-06 18:23:52 +00:00
31630c85d0 exactness graph rr: Add important TODO/FIXME after review 2023-04-06 17:50:39 +02:00
ab09dc0167 exact_attributes: Add TODOs and additional check after review 2023-04-06 17:50:39 +02:00
618c54915d exact_attribute: dedup nodes after sorting them 2023-04-06 17:50:39 +02:00
130d2061bd Fix indexing of word_position_docid and fid 2023-04-06 17:50:39 +02:00
66ddee4390 Fix word_position_docids indexing 2023-04-06 17:50:39 +02:00
90a6c01495 Use correct codec in proximity 2023-04-06 17:50:39 +02:00
e58426109a Fix panics and issues in exactness graph ranking rule 2023-04-06 17:50:39 +02:00
f513cf930a Exact attribute with state 2023-04-06 17:50:39 +02:00
8a13ed7e3f Add exactness ranking rules 2023-04-06 17:50:39 +02:00
1b8e4d0301 Add ExactTerm and helper method 2023-04-06 17:50:39 +02:00
996619b22a Increase position by 8 on hard separator when building query terms 2023-04-06 17:50:39 +02:00
2c9822a337 Rename is_multiple_words to is_ngram and zero_typo to exact 2023-04-06 17:50:39 +02:00
7276deee0a Add new db caches 2023-04-06 17:50:39 +02:00
6a068fe36a Merge #3649
3649: Update the prototype section in CONTRIBUTING.md r=curquiza a=curquiza

Following the creation of this guide https://github.com/meilisearch/engine-team/blob/main/resources/prototypes.md and avoid redundant information.

Co-authored-by: curquiza <clementine@meilisearch.com>
2023-04-06 15:27:49 +00:00
f7e7f438f8 Patch prefix match 2023-04-06 17:22:31 +02:00
8d826e478f Update the prototype section in CONTRIBUTING.md 2023-04-06 17:10:00 +02:00
ba8dcc2d78 Fix clippy 2023-04-06 15:50:47 +02:00
4d308d5237 Improve the health route by ensuring lmdb is not down
And refactorize slightly the auth controller.
2023-04-06 15:31:42 +02:00
7ca91ebb71 Merge branch 'search-refactor-exactness' into search-refactor-tests-doc 2023-04-06 15:16:35 +02:00
1ba8a40d61 Remove formating benchmark because they can't be isoloated easily anymore 2023-04-06 15:10:16 +02:00
47f6a3ad3d Take into account that a logger need the search context 2023-04-06 15:02:23 +02:00
b4c01581cd Merge #3641
3641: Bring back changes from `release v1.1.0` into `main` after v1.1.0 release r=curquiza a=curquiza

Replace https://github.com/meilisearch/meilisearch/pull/3637 since we don't want to pull commits from `main` into `release-v1.1.0` when fixing git conflicts

Co-authored-by: ManyTheFish <many@meilisearch.com>
Co-authored-by: bors[bot] <26634292+bors[bot]@users.noreply.github.com>
Co-authored-by: Charlotte Vermandel <charlottevermandel@gmail.com>
Co-authored-by: Tamo <tamo@meilisearch.com>
Co-authored-by: Louis Dureuil <louis@meilisearch.com>
Co-authored-by: curquiza <clementine@meilisearch.com>
Co-authored-by: Clément Renault <clement@meilisearch.com>
Co-authored-by: Many the fish <many@meilisearch.com>
2023-04-06 12:37:54 +00:00
ae17c62e24 Remove warnings 2023-04-06 14:07:18 +02:00
a1148c09c2 remove old matcher 2023-04-06 14:00:21 +02:00
9c5f64769a Integrate the new Highlighter in the search 2023-04-06 13:58:56 +02:00
ebe23b04c9 Make the matcher consume the search context 2023-04-06 12:28:28 +02:00
13b7c826c1 add new highlighter 2023-04-06 12:15:37 +02:00
67fd3b08ef wait until all tasks are processed before running our dump integration tests 2023-04-05 18:35:43 +02:00
5440f43fd3 Fix indexing of word_position_docid and fid 2023-04-05 18:14:00 +02:00
d9460a76f4 Fix word_position_docids indexing 2023-04-05 18:14:00 +02:00
d1ddaa223d Use correct codec in proximity 2023-04-05 18:14:00 +02:00
f7ecea142e Fix panics and issues in exactness graph ranking rule 2023-04-05 18:13:46 +02:00
337e75b0e4 Exact attribute with state 2023-04-05 18:12:46 +02:00
b5691802a3 Add new tests and fix construction of query graph from paths 2023-04-05 16:31:10 +02:00
1690aec7f1 Merge #3638
3638: filter errors - `geo(x,y,z)` and `geoDistance(x,y,z)` r=irevoire a=cymruu

# Pull Request

## Related issue
Fixes #3006

## What does this PR do?
- fixes the display function of `ParseError::ReservedGeo`.  The previous display string was missing back ticks around available filters.
- makes  the filter-parser parse `_geo(x,y,z)` and `geoDistance(x,y,z)` filters. Both parsing functions will throw an error if the filter was used.
- removes `FilterError::ReservedGeo` and `FilterError::Reserved` error variants since they are now thrown by the filter-parser.

I ran `cargo test --package milli -- --test-threads 1` and the tests passed.

## PR checklist
Please check if your PR fulfills the following requirements:
- [x] Does this PR fix an existing issue, or have you listed the changes applied in the PR description (and why they are needed)?
- [x] Have you read the contributing guidelines?
- [ ] Have you made sure that the title is accurate and descriptive of the changes?

Thank you so much for contributing to Meilisearch!


Co-authored-by: Filip Bachul <filipbachul@gmail.com>
Co-authored-by: filip <filipbachul@gmail.com>
2023-04-05 13:23:50 +00:00
f267bed352 remove a unnecessary comment
Co-authored-by: Tamo <irevoire@protonmail.ch>
2023-04-05 13:44:55 +02:00
6e50f23896 Add more search tests 2023-04-05 13:33:23 +02:00
597d57bf1d Merge branch 'main' into bring-back-changes-v1.1.0 2023-04-05 11:32:14 +02:00
4c8a0179ba Add more search tests 2023-04-05 11:30:49 +02:00
c69cbec64a Add more search tests 2023-04-05 11:20:04 +02:00
01ac8344ad Merge #3643
3643: Add sprint issue to the template issues r=curquiza a=curquiza

Following our [internal guide](https://www.notion.so/meilisearch/Delivery-synchronization-policy-b4ec15c3e56a49539fa02d2f1d381de3) presentation, I put the template here

Co-authored-by: curquiza <clementine@meilisearch.com>
2023-04-05 08:05:21 +00:00
3508ba2f20 Add sprint issue to the template issues 2023-04-04 18:58:43 +02:00
ce328c329d Move bucket sort function to its own module and fix a bug 2023-04-04 18:03:08 +02:00
959e4607bb Add more search tests 2023-04-04 18:02:46 +02:00
4b4ffb8ec9 Add exactness ranking rules 2023-04-04 17:12:07 +02:00
3951fe22ab Add ExactTerm and helper method 2023-04-04 17:09:32 +02:00
4d5bc9df4c Increase position by 8 on hard separator when building query terms 2023-04-04 17:07:26 +02:00
ec2f8e8040 Rename is_multiple_words to is_ngram and zero_typo to exact 2023-04-04 17:06:07 +02:00
406b8bd248 Add new db caches 2023-04-04 17:04:46 +02:00
62b9c6fbee Add search tests 2023-04-04 16:18:22 +02:00
b439d36807 Split query_term module into multiple submodules 2023-04-04 15:38:30 +02:00
faceb661e3 Add note that a part of the code needs fixing 2023-04-04 15:02:01 +02:00
4129d657e2 Simplify query_term module a bit 2023-04-04 15:01:42 +02:00
1e6fe71a67 fix clippy warning 2023-04-03 20:18:26 +02:00
0fba08cd72 fmt 2023-04-03 20:18:26 +02:00
189d4c3b70 add geoPoint integration tests 2023-04-03 20:18:26 +02:00
2fff6f7f23 add parse_geo_distance to parse_primary 2023-04-03 20:18:26 +02:00
fddfb37f1f remove unnecessary FilterError:ReservedGeo and FilterError:ReservedGeo 2023-04-03 20:18:26 +02:00
52b4090286 update integration tests 2023-04-03 20:18:26 +02:00
3cabfb448b fix backticks in ErrorKind::ReservedGeo display 2023-04-03 20:18:26 +02:00
77cf5b3787 handle _geoDistance(x,y,z) filter error 2023-04-03 20:18:26 +02:00
3acc5bbb15 handle _geo(x,y,z) filter error 2023-04-03 20:18:26 +02:00
114436926f Merge #3631
3631: sort errors - `_geo(x,y)` and `_geoDistance(x,y)` return `ReservedKeyword` r=irevoire a=cymruu

# Pull Request

## Related issue
Fix part of #3006 (sort errors)

## What does this PR do?
- made meilisearch return `ReservedKeyword` when sorting by `_geo(x,y)` or `_geoDistance(x,y)`

Screenshot: 
![image](https://user-images.githubusercontent.com/2981598/228969970-56dae3d2-7851-48ea-a913-c4483224d709.png)


I ran `cargo test --package milli -- --test-threads 1` and the tests passed.

## PR checklist
Please check if your PR fulfills the following requirements:
- [x] Does this PR fix an existing issue, or have you listed the changes applied in the PR description (and why they are needed)?
- [x] Have you read the contributing guidelines?
- [ ] Have you made sure that the title is accurate and descriptive of the changes?


Co-authored-by: Filip Bachul <filipbachul@gmail.com>
2023-04-03 16:28:15 +00:00
0f7904fb38 Merge #3636
3636: Add a newline after the meilisearch version in the issue template r=curquiza a=bidoubiwa

# Pull Request
## What does this PR do?
Just a small change that makes it easier to remove the version example and adds consistency with the other examples in its positioning.


Co-authored-by: cvermand <33010418+bidoubiwa@users.noreply.github.com>
2023-04-03 13:52:08 +00:00
3f13608002 Fix computation of ngram derivations 2023-04-03 15:27:49 +02:00
590b1d8fb7 Add a newline after the meilisearch version in the issue template
Just a small change to make it easier in removing the version example and is consistent with the other examples in its positioning.
2023-04-03 13:14:20 +02:00
4708d9b016 Fix compiler warnings/errors 2023-04-03 10:09:27 +02:00
0d2e7bcc13 Implement the previous way for the exhaustive distinct candidates 2023-04-03 10:08:10 +02:00
55fbfb6124 Merge branch 'search-refactor-located-query-terms' into search-refactor 2023-04-03 10:04:36 +02:00
be9741eb8a Merge #3633
3633: Bump Swatinem/rust-cache from 2.2.0 to 2.2.1 r=curquiza a=dependabot[bot]

Bumps [Swatinem/rust-cache](https://github.com/Swatinem/rust-cache) from 2.2.0 to 2.2.1.
<details>
<summary>Release notes</summary>
<p><em>Sourced from <a href="https://github.com/Swatinem/rust-cache/releases">Swatinem/rust-cache's releases</a>.</em></p>
<blockquote>
<h2>v2.2.1</h2>
<ul>
<li>Update <code>`@actions/cache</code>` dependency to fix usage of <code>zstd</code> compression.</li>
</ul>
</blockquote>
</details>
<details>
<summary>Changelog</summary>
<p><em>Sourced from <a href="https://github.com/Swatinem/rust-cache/blob/master/CHANGELOG.md">Swatinem/rust-cache's changelog</a>.</em></p>
<blockquote>
<h2>2.2.1</h2>
<ul>
<li>Update <code>`@actions/cache</code>` dependency to fix usage of <code>zstd</code> compression.</li>
</ul>
<h2>2.2.0</h2>
<ul>
<li>Add new <code>save-if</code> option to always restore, but only conditionally save the cache.</li>
</ul>
<h2>2.1.0</h2>
<ul>
<li>Only hash <code>Cargo.{lock,toml}</code> files in the configured workspace directories.</li>
</ul>
<h2>2.0.2</h2>
<ul>
<li>Avoid calling <code>cargo metadata</code> on pre-cleanup.</li>
<li>Added <code>prefix-key</code>, <code>cache-directories</code> and <code>cache-targets</code> options.</li>
</ul>
<h2>2.0.1</h2>
<ul>
<li>Primarily just updating dependencies to fix GitHub deprecation notices.</li>
</ul>
<h2>2.0.0</h2>
<ul>
<li>The action code was refactored to allow for caching multiple workspaces and
different <code>target</code> directory layouts.</li>
<li>The <code>working-directory</code> and <code>target-dir</code> input options were replaced by a
single <code>workspaces</code> option that has the form of <code>$workspace -&gt; $target</code>.</li>
<li>Support for considering <code>env-vars</code> as part of the cache key.</li>
<li>The <code>sharedKey</code> input option was renamed to <code>shared-key</code> for consistency.</li>
</ul>
<h2>1.4.0</h2>
<ul>
<li>Clean both <code>debug</code> and <code>release</code> target directories.</li>
</ul>
<h2>1.3.0</h2>
<ul>
<li>Use Rust toolchain file as additional cache key.</li>
<li>Allow for a configurable target-dir.</li>
</ul>
<h2>1.2.0</h2>
<ul>
<li>Cache <code>~/.cargo/bin</code>.</li>
<li>Support for custom <code>$CARGO_HOME</code>.</li>
<li>Add a <code>cache-hit</code> output.</li>
<li>Add a new <code>sharedKey</code> option that overrides the automatic job-name based key.</li>
</ul>
<h2>1.1.0</h2>
<ul>
<li>Add a new <code>working-directory</code> input.</li>
<li>Support caching git dependencies.</li>
</ul>
<!-- raw HTML omitted -->
</blockquote>
<p>... (truncated)</p>
</details>
<details>
<summary>Commits</summary>
<ul>
<li><a href="6fd3edff69"><code>6fd3edf</code></a> 2.2.1</li>
<li><a href="a1c019f71a"><code>a1c019f</code></a> update dependencies and rebuild</li>
<li><a href="664ce0090f"><code>664ce00</code></a> chore: Create check-dist.yml (<a href="https://redirect.github.com/Swatinem/rust-cache/issues/96">#96</a>)</li>
<li>See full diff in <a href="https://github.com/Swatinem/rust-cache/compare/v2.2.0...v2.2.1">compare view</a></li>
</ul>
</details>
<br />


[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=Swatinem/rust-cache&package-manager=github_actions&previous-version=2.2.0&new-version=2.2.1)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores)

You can trigger a rebase of this PR by commenting ``@dependabot` rebase`.

[//]: # (dependabot-automerge-start)
[//]: # (dependabot-automerge-end)

---

<details>
<summary>Dependabot commands and options</summary>
<br />

You can trigger Dependabot actions by commenting on this PR:
- ``@dependabot` rebase` will rebase this PR
- ``@dependabot` recreate` will recreate this PR, overwriting any edits that have been made to it
- ``@dependabot` merge` will merge this PR after your CI passes on it
- ``@dependabot` squash and merge` will squash and merge this PR after your CI passes on it
- ``@dependabot` cancel merge` will cancel a previously requested merge and block automerging
- ``@dependabot` reopen` will reopen this PR if it is closed
- ``@dependabot` close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually
- ``@dependabot` ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself)
- ``@dependabot` ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself)
- ``@dependabot` ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)


</details>

Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2023-04-03 07:53:07 +00:00
58fe260c72 Allow removing all the terms from a query if it contains a phrase 2023-04-03 09:18:02 +02:00
24e5f6f7a9 Don't remove phrases with "last" term matching strategy 2023-04-03 09:17:33 +02:00
0177d66149 Bump Swatinem/rust-cache from 2.2.0 to 2.2.1
Bumps [Swatinem/rust-cache](https://github.com/Swatinem/rust-cache) from 2.2.0 to 2.2.1.
- [Release notes](https://github.com/Swatinem/rust-cache/releases)
- [Changelog](https://github.com/Swatinem/rust-cache/blob/master/CHANGELOG.md)
- [Commits](https://github.com/Swatinem/rust-cache/compare/v2.2.0...v2.2.1)

---
updated-dependencies:
- dependency-name: Swatinem/rust-cache
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>
2023-04-01 17:58:46 +00:00
9b87c36200 Limit the number of derivations for a single word. 2023-03-31 09:19:18 +02:00
1861c69964 fmt 2023-03-30 23:37:26 +02:00
cb2b5eb38e handle _geoDistance(x,x) sort error 2023-03-30 23:21:23 +02:00
53aa0a1b54 handle _geo(x,x) sort error 2023-03-30 23:17:34 +02:00
12b26cd54e Don't remove phrases from the query with term matching strategy Last 2023-03-30 14:54:08 +02:00
061b1e6d7c Tiny refactor of query graph remove_nodes method 2023-03-30 14:49:25 +02:00
0d6e8b5c31 Fix phrase search bug when the phrase has only one word 2023-03-30 14:48:12 +02:00
d48cdc67a0 Fix term matching strategy bugs 2023-03-30 14:01:52 +02:00
35c16ad047 Use new term matching strategy logic in words ranking rule 2023-03-30 13:15:43 +02:00
2997d1f186 Use new term matching strategy logic in resolve_maximally_reduced_... 2023-03-30 13:12:51 +02:00
2a5997fb20 Avoid expensive assert! in bucket sort function 2023-03-30 13:07:17 +02:00
ee8a9e0bad Remove outdated sentence in documentation 2023-03-30 12:22:24 +02:00
3b0737a092 Fix detailed logger 2023-03-30 12:20:44 +02:00
fdd02105ac Graph-based ranking rule + term matching strategy support 2023-03-30 12:19:21 +02:00
aa9592455c Refactor the paths_of_cost algorithm
Support conditions that require certain nodes to be skipped
2023-03-30 12:11:11 +02:00
01e24dd630 Rewrite proximity ranking rule 2023-03-30 11:59:06 +02:00
ae6bb1ce17 Update the ConditionDocidsCache after change to RankingRuleGraphTrait 2023-03-30 11:41:20 +02:00
5fd28620cd Build ranking rule graph correctly after changes to trait definition 2023-03-30 11:32:55 +02:00
728710d63a Update typo ranking rule to use new query term structure 2023-03-30 11:32:19 +02:00
fa81381865 Update the trait requirements of ranking-rule graphs 2023-03-30 11:19:45 +02:00
b96a682f16 Update resolve_graph module to work with lazy query terms 2023-03-30 11:10:38 +02:00
d0f048c068 Simplify the API of the DatabaseCache 2023-03-30 11:08:17 +02:00
223e82a10d Update QueryGraph to use new lazy query terms + build from paths 2023-03-30 11:06:02 +02:00
9507ff5e31 Update query term structure to allow for laziness 2023-03-30 11:06:02 +02:00
c2b025946a located_query_terms_from_string: use u16 for positions, hard limit number of iterated tokens.
- Refactor phrase logic to reduce number of possible states
2023-03-30 11:04:14 +02:00
950f73b8bb Merge #3623
3623: Update mini-dashboard to version v0.2.7 r=curquiza a=bidoubiwa

## Changes

* Retrieve the API Key from the url parameters (#416) `@qdequele`

## 🐛 Bug Fixes

* Fix show more button not displaying all fields (#419) `@bidoubiwa`

Thanks again to `@bidoubiwa,`     and `@qdequele!` 🎉


Co-authored-by: Charlotte Vermandel <charlottevermandel@gmail.com>
2023-03-30 08:31:29 +00:00
3a818c5e87 Add more functionality to interners 2023-03-30 09:56:23 +02:00
7871d12025 Merge #3624
3624: Reduce the time to import a dump r=irevoire a=irevoire

When importing a dump, this PR does multiple things;
- Stops committing the changes between each task import
- Stop deserializing + serializing every bitmap for every task

Pros:
Importing 1M tasks in a dump went from 3m36 on my computer to 6s

Cons: We use slightly more memory, but since we’re using roaring bitmaps, that really shouldn’t be noticeable.

Fixes #3620

Co-authored-by: Tamo <tamo@meilisearch.com>
2023-03-29 13:40:25 +00:00
d74134ce3a Check sort criteria 2023-03-29 15:21:54 +02:00
5ac129bfa1 Mark geosearch as currently unimplemented for sort rule 2023-03-29 15:20:42 +02:00
e7153e0a97 Update mini-dashboard to version V0.2.7 2023-03-29 14:49:39 +02:00
37a24a4a05 Merge #3621
3621: Fix facet normalization r=Kerollmops a=ManyTheFish

# Pull Request

Make sure the facet normalization is the same between indexing and search.

## Related issue
Fixes #3599



Co-authored-by: ManyTheFish <many@meilisearch.com>
2023-03-29 12:47:20 +00:00
3fb67f94f7 Reduce the time to import a dump by caching some datas
With this commit, for a dump containing 1M tasks we went form 1m02 to 6s
2023-03-29 14:44:15 +02:00
6592746337 Fix other unrelated tests 2023-03-29 14:36:17 +02:00
cf5145b542 Reduce the time to import a dump
With this commit, for a dump containing 1M tasks we went from 3m36s to import the task queue down to 1m02s
2023-03-29 14:27:40 +02:00
efea1e5837 Fix facet normalization 2023-03-29 12:02:24 +02:00
b744f33530 Add test 2023-03-29 12:01:52 +02:00
31bb61ba99 Merge #3608
3608: In a settings update, check to see if the primary key actually changes before erroring out r=irevoire a=GregoryConrad

Previously, if the primary key was set and a Settings update contained a primary key, an error would be returned.
However, this error is not needed if the new PK == the current PK. This PR just checks to see if the PK actually changes before raising an error.

I came across this slight hiccup in https://github.com/GregoryConrad/mimir/issues/156#issuecomment-1484128654

Co-authored-by: Gregory Conrad <gregorysconrad@gmail.com>
2023-03-29 09:07:51 +00:00
abb4522f76 Small comment on ignored rules for placeholder search 2023-03-29 09:11:06 +02:00
d4f54fc55e Merge #3617
3617: update the geoBoundingBox feature r=dureuill a=irevoire

Closing #3616
Implementing this change in the spec: 38a715c072


Now instead of using the (top_left, bottom_right) corners of the bounding box, it’s using the (top_right, bottom_left) corners.

Co-authored-by: Tamo <tamo@meilisearch.com>
2023-03-29 07:01:17 +00:00
ef084ef042 SmallBitmap: Consistently panic on incoherent universe lengths 2023-03-29 08:45:38 +02:00
3524bd1257 SmallBitmap: Add documentation 2023-03-29 08:44:11 +02:00
a50b058557 update the geoBoundingBox feature
Now instead of using the (top_left, bottom_right) corners of the bounding box it s using the (top_right, bottom_left) corners.
2023-03-28 18:26:18 +02:00
d4f6216966 Resolve rule time sort criteria 2023-03-28 16:42:02 +02:00
77acafe534 Resolve search time sort criteria for placeholder search 2023-03-28 16:41:03 +02:00
53afda3237 Update search usage in example 2023-03-28 16:35:46 +02:00
abb19d368d Initialize query time ranking rule for query search 2023-03-28 12:40:52 +02:00
b4a52a622e BoxRankingRule 2023-03-28 12:39:42 +02:00
8d7d8cdc2f Clean-up index example 2023-03-27 18:34:10 +02:00
626a93b348 Search example: panic when missing the index path 2023-03-27 18:18:01 +02:00
af65fe201a Clean-up search example 2023-03-27 17:49:43 +02:00
9b83b1deb0 Expose SearchLogger trait 2023-03-27 17:49:18 +02:00
e9eb271499 Remove empty attribute_rule mod 2023-03-27 11:08:03 +02:00
3281a88d08 SmallBitmap: don't expose internal items 2023-03-27 11:04:43 +02:00
5a644054ab Removed unused search impl 2023-03-27 11:04:27 +02:00
16fefd364e Add TODO notes 2023-03-27 11:04:04 +02:00
e7994cdeb3 feat: check to see if the PK changed before erroring out
Previously, if the primary key was set and a Settings update contained
a primary key, an error would be returned.
However, this error is not needed if the new PK == the current PK.
This commit just checks to see if the PK actually changes
before raising an error.
2023-03-26 12:18:39 -04:00
00bad8c716 Add comments suggesting performance improvements 2023-03-23 10:18:24 +01:00
862714a18b Remove criterion_implementation_strategy param of Search 2023-03-23 09:44:12 +01:00
d18ebe4f3a Remove more warnings 2023-03-23 09:41:18 +01:00
7169d85115 Remove old query_tree code and make clippy happy 2023-03-23 09:39:16 +01:00
f5f5f03ec0 Remove old criteria code 2023-03-23 09:35:53 +01:00
9b2653427d Split position DB into fid and relative position DB 2023-03-23 09:22:01 +01:00
56b7209f26 Make clippy happy 2023-03-23 09:16:17 +01:00
9b1f439a91 WIP 2023-03-23 09:12:35 +01:00
01c7d2de8f Add example targets to the milli crate 2023-03-22 14:50:41 +01:00
a86aeba411 WIP 2023-03-22 14:43:08 +01:00
514b60f8c8 Merge #3597
3597: ensure that the task queue is correctly imported r=irevoire a=irevoire

## Related issue
Fixes #3596

I updated all the dump's integration tests to ensure that we're effectively able to query the tasks

Co-authored-by: Tamo <tamo@meilisearch.com>
2023-03-21 17:31:26 +00:00
a2b151e877 ensure that the task queue is correctly imported
reduce the size of the snapshots file
2023-03-21 14:41:46 +01:00
384fdc2df4 Fix two bugs in proximity ranking rule 2023-03-21 11:43:25 +01:00
83e5b4ed0d Compute edges of proximity graph lazily 2023-03-21 10:44:40 +01:00
272cd7ebbd Small cleanup 2023-03-20 13:39:19 +01:00
c63c7377e6 Switch order of MappedInterner generic params 2023-03-20 09:41:56 +01:00
9259cdb12e Update Cargo.lock (was mistakenly changed during rebase) 2023-03-20 09:41:56 +01:00
5b50e49522 cargo fmt 2023-03-20 09:41:56 +01:00
65474c8de5 Update new sort ranking rule after rebasing 2023-03-20 09:41:56 +01:00
fbb1ba3de0 Cargo fmt 2023-03-20 09:41:56 +01:00
a59ca28e2c Add forgotten file 2023-03-20 09:41:56 +01:00
825f742000 Simplify graph-based ranking rule impl 2023-03-20 09:41:56 +01:00
dd491320e5 Simplify graph-based ranking rule impl 2023-03-20 09:41:56 +01:00
c6ff97a220 Rewrite the dead-ends cache to detect more dead-ends 2023-03-20 09:41:56 +01:00
49240c367a Fix bug in cost of typo conditions 2023-03-20 09:41:56 +01:00
1e6e624078 Fix bug in SmallBitmap 2023-03-20 09:41:56 +01:00
8b4e07e1a3 WIP 2023-03-20 09:41:56 +01:00
2853009987 Renaming Edge -> Condition 2023-03-20 09:41:56 +01:00
aa59c3bc2c Replace EdgeCondition with an Option<..> + other code cleanup 2023-03-20 09:41:56 +01:00
7b1d8f4c6d Make PathSet strongly typed 2023-03-20 09:41:56 +01:00
a49ddec9df Prune the query graph after executing a ranking rule 2023-03-20 09:41:56 +01:00
05fe856e6e Merge forward and backward proximity conditions in proximity graph 2023-03-20 09:41:56 +01:00
c0cdaf9f53 Fix bug in the proximity ranking rule for queries with ngrams 2023-03-20 09:41:56 +01:00
e9cf58d584 Refactor of the Interner 2023-03-20 09:41:56 +01:00
31628c5cd4 Merge Phrase and WordDerivations into one structure 2023-03-20 09:41:56 +01:00
3004e281d7 Support ngram typos + splitwords and splitwords+synonyms in proximity 2023-03-20 09:41:56 +01:00
14e8d0aaa2 Rename lifetime 2023-03-20 09:41:56 +01:00
1c58cf8426 Intern ranking rule graph edge conditions as well 2023-03-20 09:41:56 +01:00
5155fd2bf1 Reorganise initialisation of ranking rules + rename PathsMap -> PathSet 2023-03-20 09:41:56 +01:00
9ec9c204d3 Small code cleanup 2023-03-20 09:41:56 +01:00
78b9304d52 Implement distinct attribute 2023-03-20 09:41:56 +01:00
0465ba4a05 Intern more values 2023-03-20 09:41:56 +01:00
2099991dd1 Continue documenting and cleaning up the code 2023-03-20 09:41:56 +01:00
c232cdabf5 Add documentation 2023-03-20 09:41:56 +01:00
4e266211bf Small code reorganisation 2023-03-20 09:41:56 +01:00
57fa689131 Cargo fmt 2023-03-20 09:41:56 +01:00
10626dddfc Add a few more optimisations to new search algorithms 2023-03-20 09:41:56 +01:00
9051065c22 Apply a few optimisations for graph-based ranking rules 2023-03-20 09:41:56 +01:00
e8c76cf7bf Intern all strings and phrases in the search logic 2023-03-20 09:41:56 +01:00
3f1729a17f Update new search test 2023-03-20 09:41:56 +01:00
cab2b6bcda Fix: computation of initial universe, code organisation 2023-03-20 09:41:56 +01:00
c4979a2fda Fix code visibility issue + unimplemented detail in proximity rule 2023-03-20 09:41:56 +01:00
23931f8a4f Fix small bug in visual logger of search algo 2023-03-20 09:41:56 +01:00
aa414565bb Fix proximity graph edge builder to include all proximities 2023-03-20 09:41:56 +01:00
1db152046e WIP on split words and synonyms support 2023-03-20 09:41:56 +01:00
c27ea2677f Rewrite cheapest path algorithm and empty path cache
It is now much simpler and has much better performance.
2023-03-20 09:41:56 +01:00
caa1e1b923 Add typo ranking rule to new search impl 2023-03-20 09:41:56 +01:00
71f18e4379 Add sort ranking rule to new search impl 2023-03-20 09:41:56 +01:00
600e3dd1c5 Remove warnings 2023-03-20 09:41:56 +01:00
362eb0de86 Add support for filters 2023-03-20 09:41:56 +01:00
998d46ac10 Add support for search offset and limit 2023-03-20 09:41:56 +01:00
6c85c0d95e Fix more bugs + visual empty path cache logging 2023-03-20 09:41:56 +01:00
0e1fbbf7c6 Fix bugs in query graph's "remove word" and "cheapest paths" algos 2023-03-20 09:41:56 +01:00
6806640ef0 Fix d2 description of paths map 2023-03-20 09:41:56 +01:00
173e37584c Improve the visual/detailed search logger 2023-03-20 09:41:55 +01:00
6ba4d5e987 Add a search logger 2023-03-20 09:41:55 +01:00
dd12d44134 Support swapped word pairs in new proximity ranking rule impl 2023-03-20 09:41:55 +01:00
a61495d660 Update Cargo.toml (commit to be deleted later) 2023-03-20 09:41:55 +01:00
c8e251bf24 Remove noise in codebase 2023-03-20 09:41:55 +01:00
a938fbde4a Use a cache when resolving the query graph 2023-03-20 09:41:55 +01:00
dcf3f1d18a Remove EdgeIndex and NodeIndex types, prefer u32 instead 2023-03-20 09:41:55 +01:00
66d0c63694 Add some documentation and use bitmaps instead of hashmaps when possible 2023-03-20 09:41:55 +01:00
132191360b Introduce the sort ranking rule working with the new search structures 2023-03-20 09:41:55 +01:00
345c99d5bd Introduce the words ranking rule working with the new search structures 2023-03-20 09:41:55 +01:00
89d696c1e3 Introduce the proximity ranking rule as a graph-based ranking rule 2023-03-20 09:41:55 +01:00
c645853529 Introduce a generic graph-based ranking rule 2023-03-20 09:41:55 +01:00
a70ab8b072 Introduce a function to find the K shortest paths in a graph 2023-03-20 09:41:55 +01:00
48aae76b15 Introduce a function to find the docids of a set of paths in a graph 2023-03-20 09:41:55 +01:00
23bf572dea Introduce cache structures used with ranking rule graphs 2023-03-20 09:41:55 +01:00
864f6410ed Introduce a structure to represent a set of graph paths efficiently 2023-03-20 09:41:55 +01:00
c9bf6bb2fa Introduce a structure to implement ranking rules with graph algorithms 2023-03-20 09:41:55 +01:00
46249ea901 Implement a function to find a QueryGraph's docids 2023-03-20 09:41:55 +01:00
ce0d1e0e13 Introduce a common way to manage the coordination between ranking rules 2023-03-20 09:41:55 +01:00
5065d8b0c1 Introduce a DatabaseCache to memorize the addresses of LMDB values 2023-03-20 09:41:55 +01:00
a83007c013 Introduce structure to represent search queries as graphs 2023-03-20 09:41:55 +01:00
79e0a6dd4e Introduce a new search module, eventually meant to replace the old one
The code here does not compile, because I am merely splitting one giant
commit into smaller ones where each commit explains a single file.
2023-03-20 09:41:55 +01:00
2d88089129 Remove unused term matching strategies 2023-03-20 09:41:55 +01:00
1d937f831b Temporarily remove codegen-units - 1 2023-03-20 09:41:55 +01:00
6c659dc12f Use MiMalloc in milli tests 2023-03-20 09:41:37 +01:00
a8531053a0 Make sure the parser reject invalid syntax 2023-03-16 11:09:20 +01:00
cf34d1c95f Fix a test that forget to match a Null value 2023-03-15 17:17:19 +01:00
1a9c58a7ab Fix a bug with the new flattening rules 2023-03-15 16:56:44 +01:00
64571c8288 Improve the testing of the filters 2023-03-15 14:57:17 +01:00
72123c458b Fix the tests to make flattening work 2023-03-15 14:12:34 +01:00
d5881519cb Make the json flattener return the original values 2023-03-15 14:12:34 +01:00
ea016d97af Implementing an IS EMPTY filter 2023-03-15 14:12:34 +01:00
70c906d4b4 Merge #3576
3576: Add boolean support for csv documents r=irevoire a=irevoire

Fixes https://github.com/meilisearch/meilisearch/issues/3572

## What does this PR do?
Add support for the boolean types in csv documents.
The type definition is `boolean` and the possible values are
- `true` for true
- `false` for false
- ` ` for null

Here is an example:
```csv
#id,cute:boolean
0,true
1,false
2,
```

Co-authored-by: Tamo <tamo@meilisearch.com>
2023-03-14 12:28:12 +00:00
fa2ea4a379 Update the test to accept the new IS syntax 2023-03-14 10:31:27 +01:00
030263caa3 Change the IS NULL filter syntax to use the IS keyword 2023-03-14 10:31:04 +01:00
c25779afba Specify that the NULL keyword is a keyword too 2023-03-13 17:40:34 +01:00
0f33a65468 makes kero happy 2023-03-13 16:51:11 +01:00
7c9a8b1e1b Merge #3587
3587: Enable cache again in test suite CI r=curquiza a=curquiza

Following the change in this PR introduced in v1.1: https://github.com/meilisearch/meilisearch/pull/3422

The cache was removed due to failures (lack of space). Now the binary is smaller (from 250Mb to 50Mb) we want to try to enable the cache again.
Indeed, without the cache step, the CIs are wayyyy slower (45min instead of 20-30min).

For later: Rust 1.68 introduced a new way to fetch crates. Updating the rust version might also help in the future!

Co-authored-by: curquiza <clementine@meilisearch.com>
2023-03-13 13:51:32 +00:00
f45daf8031 Enable cache again in test suite CI 2023-03-13 14:24:15 +01:00
fb1260ee88 Merge #3568 #3569
3568: CI: Fix `publish-aarch64` job that still uses ubuntu-18.04 r=Kerollmops a=curquiza

Fixes #3563 

Main change
- add the usage of the `ubuntu-18.04` container instead of the native `ubuntu-18.04` of GitHub actions: I had to install docker in the container.

Small additional changes
- remove useless `fail-fast` and unused/irrelevant matrix inputs (`build`, `linker`, `os`, `use-cross`...)
- Remove useless step in job

Proof of work with this CI triggered on this current branch: https://github.com/meilisearch/meilisearch/actions/runs/4366233882

3569: Enhance Japanese language detection r=dureuill a=ManyTheFish

# Pull Request

This PR is a prototype and can be tested by downloading [the dedicated docker image](https://hub.docker.com/layers/getmeili/meilisearch/prototype-better-language-detection-0/images/sha256-a12847de00e21a71ab797879fd09777dadcb0881f65b5f810e7d1ed434d116ef?context=explore):

```bash
$ docker pull getmeili/meilisearch:prototype-better-language-detection-0
```

## Context
Some Languages are harder to detect than others, this miss-detection leads to bad tokenization making some words or even documents completely unsearchable. Japanese is the main Language affected and can be detected as Chinese which has a completely different way of tokenization.

A [first iteration has been implemented for v1.1.0](https://github.com/meilisearch/meilisearch/pull/3347) but is an insufficient enhancement to make Japanese work. This first implementation was detecting the Language during the indexing to avoid bad detections during the search.
Unfortunately, some documents (shorter ones) can be wrongly detected as Chinese running bad tokenization for these documents and making possible the detection of Chinese during the search because it has been detected during the indexing.

For instance, a Japanese document `{"id": 1, "name": "東京スカパラダイスオーケストラ"}` is detected as Japanese during indexing, during the search the query `東京` will be detected as Japanese because only Japanese documents have been detected during indexing despite the fact that v1.0.2 would detect it as Chinese.
However if in the dataset there is at least one document containing a field with only Kanjis like:
_A document with only 1 field containing only Kanjis:_
```json
{
 "id":4,
 "name": "東京特許許可局"
}
```
_A document with 1 field containing only Kanjis and 1 field containing several Japanese characters:_
```json
{
 "id":105,
 "name": "東京特許許可局",
 "desc": "日経平均株価は26日 に約8カ月ぶりに2万4000円の心理的な節目を上回った。株高を支える材料のひとつは、自民党総裁選で3選を決めた安倍晋三首相の経済政策への期待だ。恩恵が見込まれるとされる人材サービスや建設株の一角が買われている。ただ思惑が先行して資金が集まっている面 は否めない。実際に政策効果を取り込む企業はどこか、なお未知数だ。"
}
```

Then, in both cases, the field `name` will be detected as Chinese during indexing allowing the search to detect Chinese in queries. Therefore,  the query `東京` will be detected as Chinese and only the two last documents will be retrieved by Meilisearch.

## Technical Approach

The current PR partially fixes these issues by:
1) Adding a check over potential miss-detections and rerunning the extraction of the document forcing the tokenization over the main Languages detected in it.
 >  1) run a first extraction allowing the tokenizer to detect any Language in any Script
 >  2) generate a distribution of tokens by Script and Languages (`script_language`)
 >  3) if for a Script we have a token distribution of one of the Language that is under the threshold, then we rerun the extraction forbidding the tokenizer to detect the marginal Languages
 >  4) the tokenizer will fall back on the other available Languages to tokenize the text. For example, if the Chinese were marginally detected compared to the Japanese on the CJ script, then the second extraction will force Japanese tokenization for CJ text in the document. however, the text on another script like Latin will not be impacted by this restriction.

2) Adding a filtering threshold during the search over Languages that have been marginally detected in documents

## Limits
This PR introduces 2 arbitrary thresholds:
1) during the indexing, a Language is considered miss-detected if the number of detected tokens of this Language is under 10% of the tokens detected in the same Script (Japanese and Chinese are 2 different Languages sharing the "same" script "CJK").
2) during the search, a Language is considered marginal if less than 5% of documents are detected as this Language.

This PR only partially fixes these issues:
-  the query `東京` now find Japanese documents if less than 5% of documents are detected as Chinese.
-  the document with the id `105` containing the Japanese field `desc` but the miss-detected field `name` is now completely detected and tokenized as Japanese and is found with the query `東京`.
-  the document with the id `4` no longer breaks the search Language detection but continues to be detected as a Chinese document and can't be found during the search.

## Related issue
Fixes #3565

## Possible future enhancements
- Change or contribute to the Library used to detect the Language
  - the related issue on Whatlang: https://github.com/greyblake/whatlang-rs/issues/122

Co-authored-by: curquiza <clementine@meilisearch.com>
Co-authored-by: ManyTheFish <many@meilisearch.com>
Co-authored-by: Many the fish <many@meilisearch.com>
2023-03-09 15:34:35 +00:00
48a51e5cd6 Merge #3577
3577: Avoid fetching an LMDB value with an empty string r=ManyTheFish a=Kerollmops

# Pull Request

## Related issue
Fixes #3574 

## What does this PR do?
This PR fixes a bug where an empty key fetches an entry in the database. LMDB throws an error if an empty or too-long key is used to fetch an entry. This empty string seems to have been generated by the Charabia tokenizer.

Co-authored-by: Clément Renault <clement@meilisearch.com>
2023-03-09 14:35:25 +00:00
2f8eb4f54a last PR fixes 2023-03-09 15:34:36 +01:00
dea101e3d9 Update meilisearch/src/routes/indexes/mod.rs
Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2023-03-09 15:17:03 +01:00
175e8a8495 Fix a diacritic issue 2023-03-09 14:57:47 +01:00
6da54d0cb6 Add a test to fix a diacritic issue 2023-03-09 14:57:38 +01:00
667bb87e35 Merge #3541
3541: Add cache on the indexes stats r=dureuill a=irevoire

Fix https://github.com/meilisearch/meilisearch/issues/3540

Co-authored-by: Tamo <tamo@meilisearch.com>
Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2023-03-09 13:32:52 +00:00
df48ac8803 Add one more test for the NULL operator 2023-03-09 13:53:37 +01:00
ff86073288 Add a snapshot for the NULL facet database 2023-03-09 13:32:27 +01:00
0ad53784e7 Create a new struct to reduce the type complexity 2023-03-09 13:21:21 +01:00
7935bef4cd Merge #3567
3567: Clean CI file names r=curquiza a=curquiza

Make the CI names more consistent to ease the Gillian's onboarding 😇 

No impact for the users or the developers of the team

Co-authored-by: curquiza <clementine@meilisearch.com>
2023-03-09 12:20:18 +00:00
e064c52544 Rename an internal facet deletion method 2023-03-09 13:08:02 +01:00
e106b16148 Fix a typo in a variable
Co-authored-by: Louis Dureuil <louis@meilisearch.com>

aaa
2023-03-09 13:08:02 +01:00
eddefb0e0f refactor the error type of the milli::document thing
silence a warning
2023-03-09 13:03:14 +01:00
dff2715ef3 Try removing needless collect 2023-03-09 11:28:10 +01:00
5deea631ea fix clippy too many arguments 2023-03-09 11:19:13 +01:00
c5f22be6e1 add boolean support for csv documents 2023-03-09 11:12:49 +01:00
b4b859ec8c Fix typos 2023-03-09 10:58:35 +01:00
b1d61f5a02 Add more tests for the NULL filter 2023-03-09 10:04:27 +01:00
febc8d1b52 Clean CI file names 2023-03-08 19:12:33 +01:00
7dc04747fd Make clippy happy 2023-03-08 17:37:08 +01:00
7c0cd7172d Introduce the NULL and NOT value NULL operator 2023-03-08 17:14:34 +01:00
b99ef3d336 Update CI to still use ubuntu-18 2023-03-08 17:11:36 +01:00
43ff236df8 Write the NULL facet values in the database 2023-03-08 16:49:53 +01:00
19ab4d1a15 Classify the NULL fields values in the facet extractor 2023-03-08 16:49:31 +01:00
9287858997 Introduce a new facet_id_is_null_docids database in the index 2023-03-08 16:14:00 +01:00
7e2fd82e41 Use Language allow list in the highlighter 2023-03-08 12:44:16 +01:00
24c0775c67 Change indexing threshold 2023-03-08 12:36:04 +01:00
3092cf0448 Fix clippy errors 2023-03-08 10:53:42 +01:00
37d4551e8e Add a threshold filtering the Languages allowed to be detected at search time 2023-03-07 19:38:01 +01:00
da48506f15 Rerun extraction when language detection might have failed 2023-03-07 18:35:26 +01:00
2f5b9fbbd8 Restore contribution of the index sizes to the db size
- the index size now contributes to the db size even if the index is not authorized
2023-03-07 14:05:27 +01:00
7faa9a22f6 Pass IndexStat by ref in store_stats_of 2023-03-07 14:00:54 +01:00
370d88f626 Merge #3561
3561: Fix the snapshots permissions on unix system r=irevoire a=irevoire

# Pull Request

## Related issue
Fixes https://github.com/meilisearch/meilisearch/issues/3507

The snapshot permissions were wrong after the v0.30 and the huge refacto of the index scheduler.
Fix this issue + add a test on the permissions on unix

Co-authored-by: Tamo <tamo@meilisearch.com>
2023-03-07 08:51:38 +00:00
d34faa8f9c put back the sleep as it was and fix the from 2023-03-06 18:09:09 +01:00
e5d0bef6d8 update a comment 2023-03-06 17:04:24 +01:00
76288fad72 Fix snapshots 2023-03-06 16:57:31 +01:00
076a3d371c Eagerly compute stats as fallback to the cache.
- Refactor all around to avoid spawning indexes more times than necessary
2023-03-06 16:57:31 +01:00
3bbf760542 update most snapshots 2023-03-06 16:57:31 +01:00
fd5c48941a Add cache on the indexes stats 2023-03-06 16:57:31 +01:00
df3986cd83 Merge #3510 #3551 #3552 #3553
3510: Add scheduled test to Actions for all features r=curquiza a=jlucktay

# Pull Request

## Related issue

Fixes #3506.

## What does this PR do?

Add a new job to the Rust workflow to run `cargo build` and `cargo test` (on the cron schedule only) with the `--all-features` flag.
This will execute across all three environments: Linux, macOS, Windows.
    
Autoformat the Rust workflow file via [the Red Hat YAML extension for Visual Studio Code](https://marketplace.visualstudio.com/items?itemName=redhat.vscode-yaml).
This straightens out whitespace and string quoting for safer parsing.

As [pointed out by `@irevoire` here](https://github.com/meilisearch/meilisearch/issues/3506#issuecomment-1433501867), changes to CI such as this one will need to wait for #3496 before going ahead.
The new action [was executed on my fork](https://github.com/jlucktay/meilisearch/actions/runs/4211694210) but ended up failing on some metrics tests, as called out in that linked comment.

## PR checklist
Please check if your PR fulfills the following requirements:
- [x] Does this PR fix an existing issue, or have you listed the changes applied in the PR description (and why they are needed)?
- [x] Have you read the contributing guidelines?
- [x] Have you made sure that the title is accurate and descriptive of the changes?

Thank you so much for contributing to Meilisearch!


3551: Bump Swatinem/rust-cache from 2.2.0 to 2.2.1 r=curquiza a=dependabot[bot]

Bumps [Swatinem/rust-cache](https://github.com/Swatinem/rust-cache) from 2.2.0 to 2.2.1.
<details>
<summary>Release notes</summary>
<p><em>Sourced from <a href="https://github.com/Swatinem/rust-cache/releases">Swatinem/rust-cache's releases</a>.</em></p>
<blockquote>
<h2>v2.2.1</h2>
<ul>
<li>Update <code>`@actions/cache</code>` dependency to fix usage of <code>zstd</code> compression.</li>
</ul>
</blockquote>
</details>
<details>
<summary>Changelog</summary>
<p><em>Sourced from <a href="https://github.com/Swatinem/rust-cache/blob/master/CHANGELOG.md">Swatinem/rust-cache's changelog</a>.</em></p>
<blockquote>
<h2>2.2.1</h2>
<ul>
<li>Update <code>`@actions/cache</code>` dependency to fix usage of <code>zstd</code> compression.</li>
</ul>
</blockquote>
</details>
<details>
<summary>Commits</summary>
<ul>
<li><a href="6fd3edff69"><code>6fd3edf</code></a> 2.2.1</li>
<li><a href="a1c019f71a"><code>a1c019f</code></a> update dependencies and rebuild</li>
<li><a href="664ce0090f"><code>664ce00</code></a> chore: Create check-dist.yml (<a href="https://github-redirect.dependabot.com/Swatinem/rust-cache/issues/96">#96</a>)</li>
<li>See full diff in <a href="https://github.com/Swatinem/rust-cache/compare/v2.2.0...v2.2.1">compare view</a></li>
</ul>
</details>
<br />


[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=Swatinem/rust-cache&package-manager=github_actions&previous-version=2.2.0&new-version=2.2.1)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores)

You can trigger a rebase of this PR by commenting ``@dependabot` rebase`.

[//]: # (dependabot-automerge-start)
[//]: # (dependabot-automerge-end)

---

<details>
<summary>Dependabot commands and options</summary>
<br />

You can trigger Dependabot actions by commenting on this PR:
- ``@dependabot` rebase` will rebase this PR
- ``@dependabot` recreate` will recreate this PR, overwriting any edits that have been made to it
- ``@dependabot` merge` will merge this PR after your CI passes on it
- ``@dependabot` squash and merge` will squash and merge this PR after your CI passes on it
- ``@dependabot` cancel merge` will cancel a previously requested merge and block automerging
- ``@dependabot` reopen` will reopen this PR if it is closed
- ``@dependabot` close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually
- ``@dependabot` ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself)
- ``@dependabot` ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself)
- ``@dependabot` ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)


</details>

3552: Bump svenstaro/upload-release-action from 2.4.0 to 2.5.0 r=curquiza a=dependabot[bot]

Bumps [svenstaro/upload-release-action](https://github.com/svenstaro/upload-release-action) from 2.4.0 to 2.5.0.
<details>
<summary>Release notes</summary>
<p><em>Sourced from <a href="https://github.com/svenstaro/upload-release-action/releases">svenstaro/upload-release-action's releases</a>.</em></p>
<blockquote>
<h2>2.5.0</h2>
<ul>
<li>Add retry to upload release <a href="https://github-redirect.dependabot.com/svenstaro/upload-release-action/pull/96">#96</a> (thanks <a href="https://github.com/sonphantrung"><code>`@​sonphantrung</code></a>)</li>`
</ul>
<h2>2.4.1</h2>
<ul>
<li>Modernize octokit usage</li>
</ul>
</blockquote>
</details>
<details>
<summary>Changelog</summary>
<p><em>Sourced from <a href="https://github.com/svenstaro/upload-release-action/blob/master/CHANGELOG.md">svenstaro/upload-release-action's changelog</a>.</em></p>
<blockquote>
<h2>[2.5.0] - 2023-02-21</h2>
<ul>
<li>Add retry to upload release <a href="https://github-redirect.dependabot.com/svenstaro/upload-release-action/pull/96">#96</a> (thanks <a href="https://github.com/sonphantrung"><code>`@​sonphantrung</code></a>)</li>`
</ul>
<h2>[2.4.1] - 2023-02-01</h2>
<ul>
<li>Modernize octokit usage</li>
</ul>
</blockquote>
</details>
<details>
<summary>Commits</summary>
<ul>
<li><a href="7319e4733e"><code>7319e47</code></a> 2.5.0</li>
<li><a href="4e86b8565b"><code>4e86b85</code></a> Prepare release</li>
<li><a href="3a6baf0f12"><code>3a6baf0</code></a> Add CHANGELOG entry for retry feature</li>
<li><a href="e8c797e08e"><code>e8c797e</code></a> Merge pull request <a href="https://github-redirect.dependabot.com/svenstaro/upload-release-action/issues/96">#96</a> from sonphantrung/retry-v2</li>
<li><a href="cf83be2c7f"><code>cf83be2</code></a> Merge branch 'master' into retry-v2</li>
<li><a href="cfdd9b50bd"><code>cfdd9b5</code></a> Merge branch 'retry' of <a href="https://github.com/messense/upload-release-action">https://github.com/messense/upload-release-action</a></li>
<li><a href="cc92c9093e"><code>cc92c90</code></a> 2.4.1</li>
<li><a href="72f6bf584a"><code>72f6bf5</code></a> Merge pull request <a href="https://github-redirect.dependabot.com/svenstaro/upload-release-action/issues/93">#93</a> from ggreif/gabor/fix</li>
<li><a href="f2899b4677"><code>f2899b4</code></a> use <code>createReadStream</code></li>
<li><a href="af306bddfe"><code>af306bd</code></a> Revert &quot;use the <code>`@file</code>` mechanism of octokit-5&quot;</li>
<li>Additional commits viewable in <a href="https://github.com/svenstaro/upload-release-action/compare/2.4.0...2.5.0">compare view</a></li>
</ul>
</details>
<br />


[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=svenstaro/upload-release-action&package-manager=github_actions&previous-version=2.4.0&new-version=2.5.0)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores)

You can trigger a rebase of this PR by commenting ``@dependabot` rebase`.

[//]: # (dependabot-automerge-start)
[//]: # (dependabot-automerge-end)

---

<details>
<summary>Dependabot commands and options</summary>
<br />

You can trigger Dependabot actions by commenting on this PR:
- ``@dependabot` rebase` will rebase this PR
- ``@dependabot` recreate` will recreate this PR, overwriting any edits that have been made to it
- ``@dependabot` merge` will merge this PR after your CI passes on it
- ``@dependabot` squash and merge` will squash and merge this PR after your CI passes on it
- ``@dependabot` cancel merge` will cancel a previously requested merge and block automerging
- ``@dependabot` reopen` will reopen this PR if it is closed
- ``@dependabot` close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually
- ``@dependabot` ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself)
- ``@dependabot` ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself)
- ``@dependabot` ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)


</details>

3553: Bump docker/build-push-action from 3 to 4 r=curquiza a=dependabot[bot]

Bumps [docker/build-push-action](https://github.com/docker/build-push-action) from 3 to 4.
<details>
<summary>Release notes</summary>
<p><em>Sourced from <a href="https://github.com/docker/build-push-action/releases">docker/build-push-action's releases</a>.</em></p>
<blockquote>
<h2>v4.0.0</h2>
<blockquote>
<p><strong>Note</strong></p>
<p>Buildx v0.10 enables support for a minimal <a href="https://slsa.dev/provenance/">SLSA Provenance</a> attestation, which requires support for <a href="https://github.com/opencontainers/image-spec">OCI-compliant</a> multi-platform images. This may introduce issues with registry and runtime support (e.g. <a href="https://github-redirect.dependabot.com/docker/buildx/issues/1533">Google Cloud Run and AWS Lambda</a>). You can optionally disable the default provenance attestation functionality using <code>provenance: false</code>.</p>
</blockquote>
<ul>
<li>Revert disable provenance by default if not set by <a href="https://github.com/crazy-max"><code>`@​crazy-max</code></a>` in <a href="https://github-redirect.dependabot.com/docker/build-push-action/pull/784">docker/build-push-action#784</a></li>
</ul>
<p><strong>Full Changelog</strong>: <a href="https://github.com/docker/build-push-action/compare/v3.3.1...v4.0.0">https://github.com/docker/build-push-action/compare/v3.3.1...v4.0.0</a></p>
<h2>v3.3.1</h2>
<ul>
<li>Disable provenance by default if not set by <a href="https://github.com/crazy-max"><code>`@​crazy-max</code></a>` (<a href="https://github-redirect.dependabot.com/docker/build-push-action/issues/781">#781</a>)</li>
</ul>
<p><strong>Full Changelog</strong>: <a href="https://github.com/docker/build-push-action/compare/v3.3.0...v3.3.1">https://github.com/docker/build-push-action/compare/v3.3.0...v3.3.1</a></p>
<h2>v3.3.0</h2>
<blockquote>
<p><strong>Note</strong></p>
<p>Buildx v0.10 enables support for a minimal <a href="https://slsa.dev/provenance/">SLSA Provenance</a> attestation, which requires support for <a href="https://github.com/opencontainers/image-spec">OCI-compliant</a> multi-platform images. This may introduce issues with registry and runtime support (e.g. <a href="https://github-redirect.dependabot.com/docker/buildx/issues/1533">Google Cloud Run and AWS Lambda</a>). You can optionally disable the default provenance attestation functionality using <code>provenance: false</code>.</p>
</blockquote>
<ul>
<li>Add <code>attests</code>, <code>provenance</code> and <code>sbom</code> inputs by <a href="https://github.com/crazy-max"><code>`@​crazy-max</code></a>` (<a href="https://github-redirect.dependabot.com/docker/build-push-action/issues/746">#746</a> <a href="https://github-redirect.dependabot.com/docker/build-push-action/issues/759">#759</a>)</li>
<li>Log GitHub Actions runtime token access controls by <a href="https://github.com/crazy-max"><code>`@​crazy-max</code></a>` (<a href="https://github-redirect.dependabot.com/docker/build-push-action/issues/707">#707</a>)</li>
<li>Examples moved to <a href="https://docs.docker.com/build/ci/github-actions/examples/">docs website</a> by <a href="https://github.com/crazy-max"><code>`@​crazy-max</code></a>` (<a href="https://github-redirect.dependabot.com/docker/build-push-action/issues/718">#718</a>)</li>
<li>Bump minimatch from 3.0.4 to 3.1.2 (<a href="https://github-redirect.dependabot.com/docker/build-push-action/issues/732">#732</a>)</li>
<li>Bump csv-parse from 5.3.0 to 5.3.3 (<a href="https://github-redirect.dependabot.com/docker/build-push-action/issues/729">#729</a>)</li>
<li>Bump json5 from 2.2.0 to 2.2.3 (<a href="https://github-redirect.dependabot.com/docker/build-push-action/issues/749">#749</a>)</li>
</ul>
<p><strong>Full Changelog</strong>: <a href="https://github.com/docker/build-push-action/compare/v3.2.0...v3.3.0">https://github.com/docker/build-push-action/compare/v3.2.0...v3.3.0</a></p>
<h2>v3.2.0</h2>
<ul>
<li>Remove workaround for <code>setOutput</code> by <a href="https://github.com/crazy-max"><code>`@​crazy-max</code></a>` (<a href="https://github-redirect.dependabot.com/docker/build-push-action/issues/704">#704</a>)</li>
<li>Docs: fix Git context link and add more details about subdir support by <a href="https://github.com/crazy-max"><code>`@​crazy-max</code></a>` (<a href="https://github-redirect.dependabot.com/docker/build-push-action/issues/685">#685</a>)</li>
<li>Docs: named context by <a href="https://github.com/baibaratsky"><code>`@​baibaratsky</code></a>` and <a href="https://github.com/crazy-max"><code>`@​crazy-max</code></a>` (<a href="https://github-redirect.dependabot.com/docker/build-push-action/issues/665">#665</a>)</li>
<li>Bump <code>`@​actions/core</code>` from 1.9.0 to 1.10.0 (<a href="https://github-redirect.dependabot.com/docker/build-push-action/issues/667">#667</a> <a href="https://github-redirect.dependabot.com/docker/build-push-action/issues/695">#695</a>)</li>
<li>Bump <code>`@​actions/github</code>` from 5.0.3 to 5.1.1 (<a href="https://github-redirect.dependabot.com/docker/build-push-action/issues/696">#696</a>)</li>
</ul>
<p><strong>Full Changelog</strong>: <a href="https://github.com/docker/build-push-action/compare/v3.1.1...v3.2.0">https://github.com/docker/build-push-action/compare/v3.1.1...v3.2.0</a></p>
<h2>v3.1.1</h2>
<ul>
<li>Fix GitHub token not passed with Git context if subdir defined by <a href="https://github.com/crazy-max"><code>`@​crazy-max</code></a>` (<a href="https://github-redirect.dependabot.com/docker/build-push-action/issues/663">#663</a>)</li>
<li>Replace deprecated <code>fs.rmdir</code> with <code>fs.rm</code> by <a href="https://github.com/bendrucker"><code>`@​bendrucker</code></a>` (<a href="https://github-redirect.dependabot.com/docker/build-push-action/issues/657">#657</a>)</li>
</ul>
<p><strong>Full Changelog</strong>: <a href="https://github.com/docker/build-push-action/compare/v3.1.0...v3.1.1">https://github.com/docker/build-push-action/compare/v3.1.0...v3.1.1</a></p>
<h2>v3.1.0</h2>
<ul>
<li><code>no-cache-filters</code> input by <a href="https://github.com/crazy-max"><code>`@​crazy-max</code></a>` (<a href="https://github-redirect.dependabot.com/docker/build-push-action/issues/653">#653</a>)</li>
<li>Bump <code>`@​actions/github</code>` from 5.0.1 to 5.0.3 (<a href="https://github-redirect.dependabot.com/docker/build-push-action/issues/619">#619</a>)</li>
<li>Bump <code>`@​actions/core</code>` from 1.6.0 to 1.9.0 (<a href="https://github-redirect.dependabot.com/docker/build-push-action/issues/620">#620</a> <a href="https://github-redirect.dependabot.com/docker/build-push-action/issues/637">#637</a>)</li>
<li>Bump csv-parse from 5.0.4 to 5.3.0 (<a href="https://github-redirect.dependabot.com/docker/build-push-action/issues/623">#623</a> <a href="https://github-redirect.dependabot.com/docker/build-push-action/issues/650">#650</a>)</li>
</ul>
<p><strong>Full Changelog</strong>: <a href="https://github.com/docker/build-push-action/compare/v3.0.0...v3.1.0">https://github.com/docker/build-push-action/compare/v3.0.0...v3.1.0</a></p>
</blockquote>
</details>
<details>
<summary>Commits</summary>
<ul>
<li><a href="3b5e8027fc"><code>3b5e802</code></a> Merge pull request <a href="https://github-redirect.dependabot.com/docker/build-push-action/issues/784">#784</a> from crazy-max/enable-provenance</li>
<li><a href="02d3266a89"><code>02d3266</code></a> update generated content</li>
<li><a href="f403dafe18"><code>f403daf</code></a> revert disable provenance by default if not set</li>
<li>See full diff in <a href="https://github.com/docker/build-push-action/compare/v3...v4">compare view</a></li>
</ul>
</details>
<br />


[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=docker/build-push-action&package-manager=github_actions&previous-version=3&new-version=4)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores)

You can trigger a rebase of this PR by commenting ``@dependabot` rebase`.

[//]: # (dependabot-automerge-start)
[//]: # (dependabot-automerge-end)

---

<details>
<summary>Dependabot commands and options</summary>
<br />

You can trigger Dependabot actions by commenting on this PR:
- ``@dependabot` rebase` will rebase this PR
- ``@dependabot` recreate` will recreate this PR, overwriting any edits that have been made to it
- ``@dependabot` merge` will merge this PR after your CI passes on it
- ``@dependabot` squash and merge` will squash and merge this PR after your CI passes on it
- ``@dependabot` cancel merge` will cancel a previously requested merge and block automerging
- ``@dependabot` reopen` will reopen this PR if it is closed
- ``@dependabot` close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually
- ``@dependabot` ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself)
- ``@dependabot` ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself)
- ``@dependabot` ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)


</details>

Co-authored-by: James Lucktaylor <jlucktay+github@gmail.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2023-03-06 15:45:33 +00:00
e704728ee7 fix the snapshots permissions on unix system 2023-03-06 16:28:40 +01:00
34ed6518ae Merge #3554
3554: Bump docker/metadata-action from 3 to 4 r=curquiza a=dependabot[bot]

Bumps [docker/metadata-action](https://github.com/docker/metadata-action) from 3 to 4.
<details>
<summary>Release notes</summary>
<p><em>Sourced from <a href="https://github.com/docker/metadata-action/releases">docker/metadata-action's releases</a>.</em></p>
<blockquote>
<h2>v4.0.0</h2>
<ul>
<li>Node 16 as default runtime by <a href="https://github.com/crazy-max"><code>`@​crazy-max</code></a>` (<a href="https://github-redirect.dependabot.com/docker/metadata-action/issues/176">#176</a>)
<ul>
<li>This requires a minimum <a href="https://github.com/actions/runner/releases/tag/v2.285.0">Actions Runner</a> version of v2.285.0, which is by default available in GHES 3.4 or later.</li>
</ul>
</li>
<li>Do not sanitize before pattern matching by <a href="https://github.com/crazy-max"><code>`@​crazy-max</code></a>` (<a href="https://github-redirect.dependabot.com/docker/metadata-action/issues/201">#201</a>)
<ul>
<li>Breaking change with <code>type=match</code> pattern matching</li>
</ul>
</li>
</ul>
<p><strong>Full Changelog</strong>: <a href="https://github.com/docker/metadata-action/compare/v3.8.0...v4.0.0">https://github.com/docker/metadata-action/compare/v3.8.0...v4.0.0</a></p>
<h2>v3.8.0</h2>
<ul>
<li>Add attribute to enable/disable images by <a href="https://github.com/crazy-max"><code>`@​crazy-max</code></a>` (<a href="https://github-redirect.dependabot.com/docker/metadata-action/issues/193">#193</a>)</li>
<li>Add <code>is_default_branch</code> global expression by <a href="https://github.com/crazy-max"><code>`@​crazy-max</code></a>` (<a href="https://github-redirect.dependabot.com/docker/metadata-action/issues/192">#192</a> <a href="https://github-redirect.dependabot.com/docker/metadata-action/issues/197">#197</a> <a href="https://github-redirect.dependabot.com/docker/metadata-action/issues/198">#198</a>)</li>
<li>Update fixtures (dev) by <a href="https://github.com/crazy-max"><code>`@​crazy-max</code></a>` (<a href="https://github-redirect.dependabot.com/docker/metadata-action/issues/190">#190</a>)</li>
<li>Bump semver from 7.3.5 to 7.3.7 (<a href="https://github-redirect.dependabot.com/docker/metadata-action/issues/185">#185</a>)</li>
<li>Bump moment from 2.29.2 to 2.29.3 (<a href="https://github-redirect.dependabot.com/docker/metadata-action/issues/187">#187</a>)</li>
<li>Bump csv-parse from 4.16.3 to 5.0.4 (<a href="https://github-redirect.dependabot.com/docker/metadata-action/issues/195">#195</a>)</li>
</ul>
<p><strong>Full Changelog</strong>: <a href="https://github.com/docker/metadata-action/compare/v3.7.0...v3.8.0">https://github.com/docker/metadata-action/compare/v3.7.0...v3.8.0</a></p>
<h2>v3.7.0</h2>
<ul>
<li>Handle comments for multi-line inputs (<a href="https://github-redirect.dependabot.com/docker/metadata-action/issues/172">#172</a>)</li>
<li>Missing <code>json</code> output in <code>action.yml</code> (<a href="https://github-redirect.dependabot.com/docker/metadata-action/issues/167">#167</a>)</li>
<li>Update dev dependencies and workflow (<a href="https://github-redirect.dependabot.com/docker/metadata-action/issues/175">#175</a>)</li>
<li>Bump minimist from 1.2.5 to 1.2.6 (<a href="https://github-redirect.dependabot.com/docker/metadata-action/issues/182">#182</a>)</li>
<li>Bump moment from 2.29.1 to 2.29.2 (<a href="https://github-redirect.dependabot.com/docker/metadata-action/issues/180">#180</a>)</li>
<li>Bump <code>`@​actions/github</code>` from 5.0.0 to 5.0.1 (<a href="https://github-redirect.dependabot.com/docker/metadata-action/issues/179">#179</a>)</li>
<li>Bump node-fetch from 2.6.1 to 2.6.7 (<a href="https://github-redirect.dependabot.com/docker/metadata-action/issues/173">#173</a>)</li>
</ul>
<h2>v3.6.2</h2>
<ul>
<li>Handle raw statement for pre-release (<a href="https://github-redirect.dependabot.com/docker/metadata-action/issues/155">#155</a> <a href="https://github-redirect.dependabot.com/docker/metadata-action/issues/156">#156</a>)</li>
</ul>
<h2>v3.6.1</h2>
<ul>
<li>Preserve quotes inside unquoted field (<a href="https://github-redirect.dependabot.com/docker/metadata-action/issues/153">#153</a>)</li>
</ul>
<h2>v3.6.0</h2>
<ul>
<li><code>base_ref</code> global expression (<a href="https://github-redirect.dependabot.com/docker/metadata-action/issues/142">#142</a>)</li>
<li>Trim tags and flavor inputs (<a href="https://github-redirect.dependabot.com/docker/metadata-action/issues/143">#143</a>)</li>
<li>Bump <code>`@​actions/core</code>` from 1.5.0 to 1.6.0 (<a href="https://github-redirect.dependabot.com/docker/metadata-action/issues/135">#135</a>)</li>
<li>Bump ansi-regex from 5.0.0 to 5.0.1 (<a href="https://github-redirect.dependabot.com/docker/metadata-action/issues/134">#134</a>)</li>
<li>Bump tmpl from 1.0.4 to 1.0.5 (<a href="https://github-redirect.dependabot.com/docker/metadata-action/issues/132">#132</a>)</li>
<li>Bump csv-parse from 4.16.0 to 4.16.3 (<a href="https://github-redirect.dependabot.com/docker/metadata-action/issues/131">#131</a>)</li>
</ul>
<h2>v3.5.0</h2>
<ul>
<li>Add global expression <code>date</code> (<a href="https://github-redirect.dependabot.com/docker/metadata-action/issues/121">#121</a>)</li>
<li>Bump <code>`@​actions/core</code>` from 1.4.0 to 1.5.0 (<a href="https://github-redirect.dependabot.com/docker/metadata-action/issues/122">#122</a>)</li>
</ul>
<h2>v3.4.1</h2>
<ul>
<li>Only return edge if branch matches (<a href="https://github-redirect.dependabot.com/docker/metadata-action/issues/115">#115</a>)</li>
</ul>
<h2>v3.4.0</h2>
<ul>
<li>PEP 440 support (<a href="https://github-redirect.dependabot.com/docker/metadata-action/issues/108">#108</a>)</li>
</ul>
<!-- raw HTML omitted -->
</blockquote>
<p>... (truncated)</p>
</details>
<details>
<summary>Upgrade guide</summary>
<p><em>Sourced from <a href="https://github.com/docker/metadata-action/blob/master/UPGRADE.md">docker/metadata-action's upgrade guide</a>.</em></p>
<blockquote>
<h1>Upgrade notes</h1>
<h2>v2 to v3</h2>
<ul>
<li>Repository has been moved to docker org. Replace <code>crazy-max/ghaction-docker-meta@v2</code>
with <code>docker/metadata-action@v4</code></li>
<li>The default bake target has been changed: <code>ghaction-docker-meta</code> &gt; <code>docker-metadata-action</code></li>
</ul>
<h2>v1 to v2</h2>
<ul>
<li><a href="https://github.com/docker/metadata-action/blob/master/#inputs">inputs</a>
<ul>
<li><a href="https://github.com/docker/metadata-action/blob/master/#tag-sha"><code>tag-sha</code></a></li>
<li><a href="https://github.com/docker/metadata-action/blob/master/#tag-edge--tag-edge-branch"><code>tag-edge</code> / <code>tag-edge-branch</code></a></li>
<li><a href="https://github.com/docker/metadata-action/blob/master/#tag-semver"><code>tag-semver</code></a></li>
<li><a href="https://github.com/docker/metadata-action/blob/master/#tag-match--tag-match-group"><code>tag-match</code> / <code>tag-match-group</code></a></li>
<li><a href="https://github.com/docker/metadata-action/blob/master/#tag-latest"><code>tag-latest</code></a></li>
<li><a href="https://github.com/docker/metadata-action/blob/master/#tag-schedule"><code>tag-schedule</code></a></li>
<li><a href="https://github.com/docker/metadata-action/blob/master/#tag-custom--tag-custom-only"><code>tag-custom</code> / <code>tag-custom-only</code></a></li>
<li><a href="https://github.com/docker/metadata-action/blob/master/#label-custom"><code>label-custom</code></a></li>
</ul>
</li>
<li><a href="https://github.com/docker/metadata-action/blob/master/#basic-workflow">Basic workflow</a></li>
<li><a href="https://github.com/docker/metadata-action/blob/master/#semver-workflow">Semver workflow</a></li>
</ul>
<h3>inputs</h3>
<table>
<thead>
<tr>
<th>New</th>
<th>Unchanged</th>
<th>Removed</th>
</tr>
</thead>
<tbody>
<tr>
<td><code>tags</code></td>
<td><code>images</code></td>
<td><code>tag-sha</code></td>
</tr>
<tr>
<td><code>flavor</code></td>
<td><code>sep-tags</code></td>
<td><code>tag-edge</code></td>
</tr>
<tr>
<td><code>labels</code></td>
<td><code>sep-labels</code></td>
<td><code>tag-edge-branch</code></td>
</tr>
<tr>
<td></td>
<td></td>
<td><code>tag-semver</code></td>
</tr>
<tr>
<td></td>
<td></td>
<td><code>tag-match</code></td>
</tr>
<tr>
<td></td>
<td></td>
<td><code>tag-match-group</code></td>
</tr>
<tr>
<td></td>
<td></td>
<td><code>tag-latest</code></td>
</tr>
<tr>
<td></td>
<td></td>
<td><code>tag-schedule</code></td>
</tr>
<tr>
<td></td>
<td></td>
<td><code>tag-custom</code></td>
</tr>
<tr>
<td></td>
<td></td>
<td><code>tag-custom-only</code></td>
</tr>
<tr>
<td></td>
<td></td>
<td><code>label-custom</code></td>
</tr>
</tbody>
</table>
<h4><code>tag-sha</code></h4>
<pre lang="yaml"><code>tags: |
  type=sha
</code></pre>
<h4><code>tag-edge</code> / <code>tag-edge-branch</code></h4>
<pre lang="yaml"><code>tags: |
  # default branch
&lt;/tr&gt;&lt;/table&gt; 
</code></pre>
</blockquote>
<p>... (truncated)</p>
</details>
<details>
<summary>Commits</summary>
<ul>
<li><a href="507c2f2dc5"><code>507c2f2</code></a> Merge pull request <a href="https://github-redirect.dependabot.com/docker/metadata-action/issues/257">#257</a> from crazy-max/env-output</li>
<li><a href="04861f5102"><code>04861f5</code></a> update generated content</li>
<li><a href="6729545cde"><code>6729545</code></a> Provide outputs as env vars</li>
<li><a href="05d22bf317"><code>05d22bf</code></a> Merge pull request <a href="https://github-redirect.dependabot.com/docker/metadata-action/issues/256">#256</a> from crazy-max/fix-readme</li>
<li><a href="70b403b46b"><code>70b403b</code></a> Fix README</li>
<li><a href="9e6ae02878"><code>9e6ae02</code></a> Merge pull request <a href="https://github-redirect.dependabot.com/docker/metadata-action/issues/252">#252</a> from docker/dependabot/npm_and_yarn/json5-2.2.3</li>
<li><a href="3d239e8b8a"><code>3d239e8</code></a> Bump json5 from 2.2.0 to 2.2.3</li>
<li><a href="7cb52e2750"><code>7cb52e2</code></a> Merge pull request <a href="https://github-redirect.dependabot.com/docker/metadata-action/issues/251">#251</a> from chroju/set_timezone</li>
<li><a href="90a1d5cf21"><code>90a1d5c</code></a> Add tz attribute to handlebar date function</li>
<li><a href="c98ac5e987"><code>c98ac5e</code></a> Merge pull request <a href="https://github-redirect.dependabot.com/docker/metadata-action/issues/249">#249</a> from crazy-max/fix-readme</li>
<li>Additional commits viewable in <a href="https://github.com/docker/metadata-action/compare/v3...v4">compare view</a></li>
</ul>
</details>
<br />


[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=docker/metadata-action&package-manager=github_actions&previous-version=3&new-version=4)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores)

You can trigger a rebase of this PR by commenting ``@dependabot` rebase`.

[//]: # (dependabot-automerge-start)
[//]: # (dependabot-automerge-end)

---

<details>
<summary>Dependabot commands and options</summary>
<br />

You can trigger Dependabot actions by commenting on this PR:
- ``@dependabot` rebase` will rebase this PR
- ``@dependabot` recreate` will recreate this PR, overwriting any edits that have been made to it
- ``@dependabot` merge` will merge this PR after your CI passes on it
- ``@dependabot` squash and merge` will squash and merge this PR after your CI passes on it
- ``@dependabot` cancel merge` will cancel a previously requested merge and block automerging
- ``@dependabot` reopen` will reopen this PR if it is closed
- ``@dependabot` close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually
- ``@dependabot` ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself)
- ``@dependabot` ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself)
- ``@dependabot` ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)


</details>

Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2023-03-06 14:56:20 +00:00
c0ede6d152 Merge #3562
3562: Update version for the next release (v1.1.0) in Cargo.toml r=curquiza a=meili-bot

⚠️ This PR is automatically generated. Check the new version is the expected one and Cargo.lock has been updated before merging.

Co-authored-by: curquiza <curquiza@users.noreply.github.com>
2023-03-06 13:54:16 +00:00
577e7126f9 Update version for the next release (v1.1.0) in Cargo.toml 2023-03-06 13:52:54 +00:00
22219fd88f ci(actions/rust): explicitly set up dependencies and toolchain override 2023-03-06 12:45:08 +00:00
a9e17ab8c6 style(actions/rust): resolve PR review 2023-03-03 12:08:30 +00:00
2dd948a4a1 ci(actions/rust): align with test-linux job 2023-03-03 12:07:42 +00:00
76cf1bff87 Add scheduled test to Actions for all features
Add a new job to the Rust workflow to run 'cargo build' and 'cargo
test' (on the cron schedule only) with the '--all-features' flag.
This will execute across all three environments: Linux, macOS,
Windows.

Autoformat the Rust workflow file via the Red Hat YAML extension for
Visual Studio Code:
https://marketplace.visualstudio.com/items?itemName=redhat.vscode-yaml
This straightens out whitespace and string quoting for safer parsing.

Fixes #3506.
2023-03-03 12:01:14 +00:00
3d1046369c Merge #3529
3529: Add an analytics on the geo bounding box feature r=ManyTheFish a=irevoire

Fixes #3527

[The specification of the geoBoundingBox](https://github.com/meilisearch/specifications/pull/223) feature has been updated and now introduces a new analytics to follow the usage of the geoBoundingBox feature in the search requests.

Co-authored-by: Tamo <tamo@meilisearch.com>
2023-03-02 11:58:39 +00:00
4f1ccbc495 Merge #3525
3525: Fix phrase search containing stop words r=ManyTheFish a=ManyTheFish

# Summary
A search with a phrase containing only stop words was returning an HTTP error 500,
this PR filters the phrase containing only stop words dropping them before the search starts, a query with a phrase containing only stop words now behaves like a placeholder search.

fixes https://github.com/meilisearch/meilisearch/issues/3521

related v1.0.2 PR on milli: https://github.com/meilisearch/milli/pull/779



Co-authored-by: ManyTheFish <many@meilisearch.com>
2023-03-02 10:55:37 +00:00
37489fd495 Return an internal error in the case of matching word is invalid 2023-03-01 19:05:16 +01:00
c0d8eb295d Bump docker/metadata-action from 3 to 4
Bumps [docker/metadata-action](https://github.com/docker/metadata-action) from 3 to 4.
- [Release notes](https://github.com/docker/metadata-action/releases)
- [Upgrade guide](https://github.com/docker/metadata-action/blob/master/UPGRADE.md)
- [Commits](https://github.com/docker/metadata-action/compare/v3...v4)

---
updated-dependencies:
- dependency-name: docker/metadata-action
  dependency-type: direct:production
  update-type: version-update:semver-major
...

Signed-off-by: dependabot[bot] <support@github.com>
2023-03-01 17:58:18 +00:00
bcd3f6054a Bump docker/build-push-action from 3 to 4
Bumps [docker/build-push-action](https://github.com/docker/build-push-action) from 3 to 4.
- [Release notes](https://github.com/docker/build-push-action/releases)
- [Commits](https://github.com/docker/build-push-action/compare/v3...v4)

---
updated-dependencies:
- dependency-name: docker/build-push-action
  dependency-type: direct:production
  update-type: version-update:semver-major
...

Signed-off-by: dependabot[bot] <support@github.com>
2023-03-01 17:58:11 +00:00
3a0314f9de Bump svenstaro/upload-release-action from 2.4.0 to 2.5.0
Bumps [svenstaro/upload-release-action](https://github.com/svenstaro/upload-release-action) from 2.4.0 to 2.5.0.
- [Release notes](https://github.com/svenstaro/upload-release-action/releases)
- [Changelog](https://github.com/svenstaro/upload-release-action/blob/master/CHANGELOG.md)
- [Commits](https://github.com/svenstaro/upload-release-action/compare/2.4.0...2.5.0)

---
updated-dependencies:
- dependency-name: svenstaro/upload-release-action
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>
2023-03-01 17:58:05 +00:00
fa4d8b8348 Bump Swatinem/rust-cache from 2.2.0 to 2.2.1
Bumps [Swatinem/rust-cache](https://github.com/Swatinem/rust-cache) from 2.2.0 to 2.2.1.
- [Release notes](https://github.com/Swatinem/rust-cache/releases)
- [Changelog](https://github.com/Swatinem/rust-cache/blob/master/CHANGELOG.md)
- [Commits](https://github.com/Swatinem/rust-cache/compare/v2.2.0...v2.2.1)

---
updated-dependencies:
- dependency-name: Swatinem/rust-cache
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>
2023-03-01 17:57:57 +00:00
d9e19c89c5 Merge #3544
3544: Attempt to use default vram budget for faster startup r=Kerollmops a=dureuill

# Pull Request

## Related issue
Follow-up to #3382: addresses the added startup time on Windows/macOS.

## What does this PR do?
- Attempt to skip budget calculation by using "known good values" instead
- Perform dichotomic budget calculation as fallback only when the known value is not actually good.


Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2023-03-01 09:49:38 +00:00
18bf740ee2 Merge #3539
3539: Update migration link to the docs r=curquiza a=curquiza

Fixes https://github.com/meilisearch/meilisearch/issues/3449

Co-authored-by: curquiza <clementine@meilisearch.com>
2023-02-28 18:21:11 +00:00
0202ff8ab4 Attempt to use default budget for faster startup 2023-02-28 10:55:43 +01:00
fbe4ab158e Merge #3543
3543: config: case `experimental_enable_metrics` in snake_case r=dureuill a=dureuill

# Pull Request

Avoids "Error: unknown field `experimental-enable-metrics` at line 1 column 1" error when using the default config file.

Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2023-02-28 09:35:25 +00:00
92318ca573 config: case experimental_enable_metrics in snake_case 2023-02-27 17:14:06 +01:00
6ca7a109b9 Merge #3538
3538: Improve the api key of the metrics r=dureuill a=irevoire

Related to https://github.com/meilisearch/meilisearch/pull/3524#discussion_r1115903998
Update: https://github.com/meilisearch/meilisearch/issues/3523

Right after merging the PR, we changed our minds and decided to update the way we handle the API keys on the metrics route.
Now instead of bypassing all the applied rules of the API key, we forbid the usage of the `/metrics` route if you have any restrictions on the indexes.

Co-authored-by: Tamo <tamo@meilisearch.com>
Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2023-02-27 13:46:57 +00:00
d4d4702f1b Rephrase hint message 2023-02-27 13:46:16 +01:00
2648bbca25 Update migration link to the docs 2023-02-23 18:36:30 +01:00
562c86ea01 Merge #3519
3519: Update comments in version bump CI r=irevoire a=curquiza

Following https://github.com/meilisearch/meilisearch/pull/3499

Co-authored-by: curquiza <clementine@meilisearch.com>
2023-02-23 16:46:10 +00:00
7ae10abb6b fix the auth tests 2023-02-23 17:27:42 +01:00
dc533584c6 Forbid the usage of the metrics route if your API key have a limitation on the indexes 2023-02-23 17:13:22 +01:00
442c1e36de Merge #3537
3537: fix a bug where the filestore could try to parse its own tmp file and fail (main) r=irevoire a=curquiza



Co-authored-by: Tamo <tamo@meilisearch.com>
2023-02-23 15:58:05 +00:00
66b5e4b548 fix a bug where the filestore could try to parse its own tmp file and fail 2023-02-23 16:52:41 +01:00
89ac1015f3 Merge #3524
3524: Update the metrics route r=irevoire a=irevoire

Fixes #3523

Make the metrics available by default without a feature flag.
+ Rename the cli-flag to `experimental-enable-metrics`.

Co-authored-by: Tamo <tamo@meilisearch.com>
2023-02-23 15:11:10 +00:00
ca25904c26 Merge #3331
3331: Limit the number of concurrently opened indexes r=dureuill a=dureuill

# Pull Request

## Related issue
Relevant to #1841, fixes #3382

## What does this PR do?

### User standpoint

- Limit the number of concurrently opened indexes (currently, the number of indexes that can be concurrently opened is computed at startup)
- When too many an index is opened, the least recently used one is closed and its virtual memory released.
- This allows a user to have an arbitrary number of indexes of an arbitrary size

### Implementation standpoint

- Added a LRU cache map in `index-scheduler::lru`. A more complete implementation  (eg with helper functions not used here) is available but would better fit a dedicated crate.
- Use the LRU cache map in the `IndexScheduler`. To simplify the lifecycle of indexes, they are never removed from the cache when they are in the middle of a resize or delete operation. To achieve this, an intermediate `Vec` stores the UUIDs of the indexes that are in the middle of such an operation.
- Upon creating the index scheduler object, compute the total virtual memory that is adressable by using a dichotomic search on the max size of an index. Use this as a base to compute the number of indexes that can be open with 2TiB per index. If the virtual memory address space is lower than 2TiB, then only allow for 1 index of a fraction of that size.

Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2023-02-23 14:20:52 +00:00
8a1b1a95f3 comment the right of the metrics 2023-02-23 13:59:01 +01:00
8d47d2d018 update the auth api after the rebase 2023-02-23 13:15:51 +01:00
5082cd5e67 update the config file to mention the experimental metrics feature 2023-02-23 12:26:22 +01:00
750a2b6842 Update meilisearch/src/option.rs
Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2023-02-23 12:26:22 +01:00
bc7d4112d9 send the cli experimental feature in the analytics 2023-02-23 12:26:22 +01:00
88a18677d0 rename the metrics cli flag 2023-02-23 12:26:22 +01:00
68e30214ca remove the feature flag and reorganize the module slightly 2023-02-23 12:26:21 +01:00
b985b96e4e Merge #3530
3530: Fix highlighter bug r=Kerollmops a=ManyTheFish

# Pull Request

There was a highlighting issue on CJK's character, we were highlighting too many characters and these additional characters were duplicated after the highlight tag.

## Related issue
Fixes #3517 
Fixes #3526 

## What does this PR do?
- add a test showcasing the bug
- fix the bug by activating the char_map creation of the tokenizer during the highlighting process


Co-authored-by: ManyTheFish <many@meilisearch.com>
2023-02-23 10:59:43 +00:00
71e7900c67 move index_map to file 2023-02-23 11:29:11 +01:00
431782f3ee Move index_mapper to mod.rs 2023-02-23 11:29:11 +01:00
3db613ff77 Don't iterate all indexes manually 2023-02-23 11:29:09 +01:00
5822764be9 Skip computing index budget in tests 2023-02-23 11:23:39 +01:00
c63294f331 Switch to 2TiB default index size, updates documentation 2023-02-23 11:23:39 +01:00
a529bf160c Compute budget 2023-02-23 11:23:39 +01:00
f1119f2dc2 Add dichotomic search to utils 2023-02-23 11:23:39 +01:00
1db7d5d851 Add basic tests for index eviction and resize 2023-02-23 11:23:39 +01:00
80b060f920 Use LRU cache 2023-02-23 11:23:39 +01:00
fdf043580c Add LruMap 2023-02-23 11:23:38 +01:00
f62703cd67 Merge #3534
3534: Update the csv error code from InvalidIndexCsvDelimiter to InvalidDocumentCsvDelimiter r=Kerollmops a=irevoire

Fixes #3533

Co-authored-by: Tamo <tamo@meilisearch.com>
2023-02-23 07:05:12 +00:00
76f82c880d update the csv error code from InvalidIndexCsvDelimiter to InvalidDocumentCsvDelimiter 2023-02-22 19:26:48 +01:00
6eeba3a8ab Merge #3417
3417: Allow multiple searches in a single request r=irevoire a=dureuill

# Pull Request

## Related issue
Fixes #3427

## What does this PR do?

### User standpoint

- Adds a new `/multi-search` entry point (not to be confused with the existing `/{index_uid}/search` entry points) that accepts a POST whose body is an object containing an array of queries.
    - Each query must specify on which index it acts by providing its `indexUid`. Other parameters are identical to the one in the existing search routes (`q`, `limit`, etc.).
    - The response is a JSON object containing an array of the results for each search query as if it had been performed using the `/{index_uid}/search` routes.

### Implementation standpoint

- Refactor authentication module:
  -  Allow tenant token to be checked even without an index in URL
  - Add `meilisearch-auth` as a dependency to `index-scheduler` so as to have a working method of checking if the indexes are authorized there that takes into account both the API key and the tenant token (existing method relied on a behavior that was returning the allowed indexes from the API key as long as there weren't any tenant token)
  - Make `AuthFilter` an object with invariants and so its fields are now private
  - Use the methods of `AuthFilter` to know if an index is authorized rather than relying on its internal search rules.
  - Make tenant token search rules optional and `None` when the `AuthFilter` was not built with a tenant token.
- Add a new `routes::index::search::multiple_search` module containing a post handler that performs the same work as the existing `routes::index::search` post handler, but in a loop.
  - Add various tests
  - Add authentication test suite 

### Sample request


<details>
<summary>
Click to see request/response
</summary>

```json
~/datasets
❯ curl \
  -X POST 'http://localhost:7700/multi-search' \
  -H 'Content-Type: application/json' \
  --data-binary '{"queries": [{ "indexUid": "index-0", "q": "toto", "limit": 1 }, {"indexUid": "index-1", "q": "titi", "limit": 1}]}' | jsonxf
{ "results": [
  {
    "indexUid": "index-0",
    "hits": [
      {
        "id": 20480,
        "title": "Toto - 25th Anniversary - Live in Amsterdam",
        "overview": "Filmed in High Definition in Amsterdam on Toto's 25th Anniversary Tour in 2003, this stunning concert captures the band at their very best, reunited with original vocalist Bobby Kimball. The set combines all their hits with tracks from their latest album \"Through the Looking Glass\" and other live favorites, performed in front of a wildly enthusastic sell-out crowd. Extras include 35 minute behind-the-scenes film following the band through various stages of their world tour including footage from Japan, Thailand, South Korea, and France.  Toto celebrate their 25th anniversary with this blistering live concert, filmed in Amsterdam on February 25th, 2003. Proving they've still got exactly what it takes to move a crowd, the band perform a mixture of medley's, solo spots, and huge hits. Tracks include \"Rosanna,\" \"Africa,\" \"Hold The Line,\" a cover of the Beatles' \"While My Guitar Gently Weeps,\" and many more.",
        "genres": [
          "Music"
        ],
        "poster": "https://image.tmdb.org/t/p/w500/7SCbUPwoB8Z7VUIA1Rn1WWwjNiT.jpg",
        "release_date": 1064275200
      }
    ],
    "query": "toto",
    "processingTimeMs": 1,
    "limit": 1,
    "offset": 0,
    "estimatedTotalHits": 17
  },
  {
    "indexUid": "index-1",
    "hits": [
      {
        "id": 41212,
        "title": "Titicut Follies",
        "overview": "The film is a stark and graphic portrayal of the conditions that existed at the State Prison for the Criminally Insane at Bridgewater, Massachusetts. TITICUT FOLLIES documents the various ways the inmates are treated by the guards, social workers and psychiatrists.",
        "genres": [
          "Documentary"
        ],
        "poster": "https://image.tmdb.org/t/p/w500/2Ju5hn1ofOPeP1eRJtQWakiHuhW.jpg",
        "release_date": -70934400
      }
    ],
    "query": "titi",
    "processingTimeMs": 0,
    "limit": 1,
    "offset": 0,
    "estimatedTotalHits": 7
  }
]}
```

</details>


## PR checklist
Please check if your PR fulfills the following requirements:
- [ ] Does this PR fix an existing issue, or have you listed the changes applied in the PR description (and why they are needed)?
- [ ] Have you read the contributing guidelines?
- [ ] Have you made sure that the title is accurate and descriptive of the changes?

Thank you so much for contributing to Meilisearch!


Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2023-02-22 17:23:45 +00:00
28d6a4466d Make the tokenizer creating a char map during highlighting 2023-02-22 17:43:10 +01:00
1ba2fae3ae multi-search/authentication: Add authentication tests 2023-02-22 17:04:12 +01:00
28d6ab78de multi-search: Add multi search tests 2023-02-22 17:04:12 +01:00
3ba5dfb6ec multi-search: Add test server search method for multi search 2023-02-22 17:04:12 +01:00
a23fbf6c7b multi-search: Add search with an array of indexes 2023-02-22 17:04:12 +01:00
596a98f7c6 multi-search: Add basic analytics 2023-02-22 16:37:18 +01:00
14c4a222da Authentication: AuthFilter::allow_index_creation both check that the index is authorized and the IndexCreate action 2023-02-22 16:37:13 +01:00
690bb2e5cc Authentication: Make allow_index_creation a private field 2023-02-22 16:35:52 +01:00
d0f2c9c72e Authentication: Make search_rules optional in AuthFilter 2023-02-22 16:35:52 +01:00
42577403d8 Authentication: Directly pass the authfilter to the index scheduler 2023-02-22 16:35:52 +01:00
c8c5944094 Authentication: is_index_authorized takes into account API key indexes even with a tenant token 2023-02-22 16:35:52 +01:00
4b65851793 Authentication: Refactor authentication check to work for tenant token even without an index in URL
Callers need to manually check `is_index_authorized` when using the route without an index in URL
2023-02-22 16:35:51 +01:00
10d4a1a9af Make ResponseError code and message pub so that they can be modified 2023-02-22 16:35:51 +01:00
ad35edfa32 Add test 2023-02-22 15:47:15 +01:00
033417e9cc add an analytics on the geo bounding box feature 2023-02-22 15:35:26 +01:00
ac5a1e4c4b Merge #3423
3423: Add min and max facet stats r=dureuill a=dureuill

# Pull Request

## Related issue
Fixes #3426

## What does this PR do?

### User standpoint

- When using a `facets` parameter in search, the facets that have numeric values are displayed in a new section of the response called `facetStats` that contains, per facet, the numeric min and max value of the hits returned by the search.

<details>
<summary>
Sample request/response
</summary>

```json
❯ curl \
  -X POST 'http://localhost:7700/indexes/meteorites/search?facets=mass' \
  -H 'Content-Type: application/json' \
  --data-binary '{ "q": "LL6", "facets":["mass", "recclass"], "limit": 5 }' | jsonxf
{
  "hits": [
    {
      "name": "Niger (LL6)",
      "id": "16975",
      "nametype": "Valid",
      "recclass": "LL6",
      "mass": 3.3,
      "fall": "Fell"
    },
    {
      "name": "Appley Bridge",
      "id": "2318",
      "nametype": "Valid",
      "recclass": "LL6",
      "mass": 15000,
      "fall": "Fell",
      "_geo": {
        "lat": 53.58333,
        "lng": -2.71667
      }
    },
    {
      "name": "Athens",
      "id": "4885",
      "nametype": "Valid",
      "recclass": "LL6",
      "mass": 265,
      "fall": "Fell",
      "_geo": {
        "lat": 34.75,
        "lng": -87.0
      }
    },
    {
      "name": "Bandong",
      "id": "4935",
      "nametype": "Valid",
      "recclass": "LL6",
      "mass": 11500,
      "fall": "Fell",
      "_geo": {
        "lat": -6.91667,
        "lng": 107.6
      }
    },
    {
      "name": "Benguerir",
      "id": "30443",
      "nametype": "Valid",
      "recclass": "LL6",
      "mass": 25000,
      "fall": "Fell",
      "_geo": {
        "lat": 32.25,
        "lng": -8.15
      }
    }
  ],
  "query": "LL6",
  "processingTimeMs": 15,
  "limit": 5,
  "offset": 0,
  "estimatedTotalHits": 42,
  "facetDistribution": {
    "mass": {
      "110000": 1,
      "11500": 1,
      "1161": 1,
      "12000": 1,
      "1215.5": 1,
      "127000": 1,
      "15000": 1,
      "1676": 1,
      "1700": 1,
      "1710.5": 1,
      "18000": 1,
      "19000": 1,
      "220000": 1,
      "2220": 1,
      "22300": 1,
      "25000": 2,
      "265": 1,
      "271000": 1,
      "2840": 1,
      "3.3": 1,
      "3000": 1,
      "303": 1,
      "32000": 1,
      "34000": 1,
      "36.1": 1,
      "45000": 1,
      "460": 1,
      "478": 1,
      "483": 1,
      "5500": 2,
      "600": 1,
      "6000": 1,
      "67.8": 1,
      "678": 1,
      "680.5": 1,
      "6930": 1,
      "8": 1,
      "8300": 1,
      "840": 1,
      "8400": 1
    },
    "recclass": {
      "L/LL6": 3,
      "LL6": 39
    }
  },
  "facetStats": {
    "mass": {
      "min": 3.3,
      "max": 271000.0
    }
  }
}
```

</details>

## PR checklist
Please check if your PR fulfills the following requirements:
- [ ] Does this PR fix an existing issue, or have you listed the changes applied in the PR description (and why they are needed)?
- [ ] Have you read the contributing guidelines?
- [ ] Have you made sure that the title is accurate and descriptive of the changes?

Thank you so much for contributing to Meilisearch!


Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2023-02-22 13:06:43 +00:00
3eb9a08b5c Update comments in version bump CI 2023-02-21 19:14:59 +01:00
900bae3d9d keep phrases that has at least one word 2023-02-21 18:16:51 +01:00
28b7d73d4a Remove an unefficient part of a test on milli 2023-02-21 18:16:51 +01:00
6841f167b4 Add test 2023-02-21 18:02:52 +01:00
c88b6f331f Merge #3482
3482: Optimize meilisearch uffizzi build r=curquiza a=waveywaves

# Pull Request

## Related issue
Fixes https://github.com/meilisearch/meilisearch/issues/3476

## What does this PR do?
even though docker cache was being used earlier for uffizzi builds, seems like the cache layers weren't persisting. This commit adds changes to move meilisearch building outside the dockerfile so that we can use the rust cache action. We are also building to the musl target so that the binary for meilisearch which is created can be used for the uffizzi ttyd image which uses alpine.

Meilisearch build time brought to 5 mins example https://github.com/waveywaves/meilisearch/actions/runs/4142776058

we also update the version of uffizzi action used here which fixes another uffizzi bug where the environments are not deployed. https://app.uffizzi.com/github.com/waveywaves/meilisearch/pull/2 was built as a part of a test for this PR and we can be sure that the deployment works well now.

## PR checklist
Please check if your PR fulfills the following requirements:
- [ ] Does this PR fix an existing issue, or have you listed the changes applied in the PR description (and why they are needed)?
- [ ] Have you read the contributing guidelines?
- [ ] Have you made sure that the title is accurate and descriptive of the changes?

Thank you so much for contributing to Meilisearch!


Co-authored-by: Vibhav Bobade <vibhav.bobde@gmail.com>
2023-02-21 16:06:53 +00:00
09a94e0db3 optimize meilisearch uffizzi build
even though docker cache was being used earlier for uffizzi builds,
seems like the cache layers weren't persisting. This commit adds changes
to move meilisearch building outside the dockerfile so that we can
use the rust cache action. We are also building to the musl target
so that the binary for meilisearch which is created can be used for
the uffizzi ttyd image which uses alpine.
2023-02-21 17:25:28 +05:30
39407885c2 Merge #3347
3347: Enhance language detection r=irevoire a=ManyTheFish

## Summary

Some completely unrelated Languages can share the same characters, in Meilisearch we detect the Languages using `whatlang`, which works well on large texts but fails on small search queries leading to a bad segmentation and normalization of the query.

This PR now stores the Languages detected during the indexing in order to reduce the Languages list that can be detected during the search.

## Detail

- Create a 19th database mapping the scripts and the Languages detected with the documents where the Language is detected
- Fill the newly created database during indexing
- Create an allow-list with this database and pass it to Charabia
- Add a test ensuring that a Japanese request containing kanjis only is detected as Japanese and not Chinese

## Related issues
Fixes #2403
Fixes #3513

Co-authored-by: f3r10 <frledesma@outlook.com>
Co-authored-by: ManyTheFish <many@meilisearch.com>
Co-authored-by: Many the fish <many@meilisearch.com>
2023-02-21 10:52:13 +00:00
a3e41ba33e Merge #3496
3496: Fix metrics feature r=irevoire a=james-2001

# Pull Request

## Related issue

Resolves: #3469
See also: #2763

## What does this PR do?
As reported the metrics feature was broken by still using and old reference to `meilisearch_auth::actions`. This commit switches to the new location, `meilisearch_types::keys::actions`.

The original issue was not *that* clear as to exactly what was broken, and the build logs have disappeared, but it seemed to just be this one line fix. If this is not the case and I've missed the mark let me know, and i'll head back to the drawing board.

## PR checklist
Please check if your PR fulfills the following requirements:
- [x] Does this PR fix an existing issue, or have you listed the changes applied in the PR description (and why they are needed)?
- [x] Have you read the contributing guidelines?
- [x] Have you made sure that the title is accurate and descriptive of the changes?

Co-authored-by: James <james.a.may.2001@gmail.com>
2023-02-21 10:13:11 +00:00
ce807d760b Fix formatting issue on Opt struct
tab in enable_metrics_route to fix cargo fmt issues

Resolves: #3469
See also: #2763
2023-02-21 09:45:18 +00:00
bbecab8948 fix clippy 2023-02-21 10:18:44 +01:00
5cff435bf6 Add feature flags to Opt structure
Resolves: #3469
See also: #2763
2023-02-21 07:41:41 +00:00
8aa808d51b Merge branch 'main' into enhance-language-detection 2023-02-20 18:14:34 +01:00
1e9ac00800 Merge #3505
3505: Csv delimiter r=irevoire a=irevoire

Fixes https://github.com/meilisearch/meilisearch/issues/3442
Closes https://github.com/meilisearch/meilisearch/pull/2803
Specified in https://github.com/meilisearch/specifications/pull/221

This PR is a reimplementation of https://github.com/meilisearch/meilisearch/pull/2803, on the new engine. Thanks for your idea and initial PR `@MixusMinimax;` sorry I couldn’t update/merge your PR. Way too many changes happened on the engine in the meantime.

**Attention to reviewer**; I had to update deserr to implement the support of deserializing `char`s

-------

It introduces four new error messages;
- Invalid value in parameter csvDelimiter: expected a string of one character, but found an empty string
- Invalid value in parameter csvDelimiter: expected a string of one character, but found the following string of 5 characters: doggo
- csv delimiter must be an ascii character. Found: 🍰 
- The Content-Type application/json does not support the use of a csv delimiter. The csv delimiter can only be used with the Content-Type text/csv.

And one error code;
- `invalid_index_csv_delimiter`

The `invalid_content_type` error code is now also used when we encounter the `csvDelimiter` query parameter with a non-csv content type.

Co-authored-by: Tamo <tamo@meilisearch.com>
2023-02-20 17:01:36 +00:00
b08a49a16e Merge #3319 #3470
3319: Transparently resize indexes on MaxDatabaseSizeReached errors r=Kerollmops a=dureuill

# Pull Request

## Related issue
Related to https://github.com/meilisearch/meilisearch/discussions/3280, depends on https://github.com/meilisearch/milli/pull/760

## What does this PR do?

### User standpoint

- Meilisearch no longer fails tasks that encounter the `milli::UserError(MaxDatabaseSizeReached)` error.
- Instead, these tasks are retried after increasing the maximum size allocated to the index where the failure occurred.

### Implementation standpoint

- Add `Batch::index_uid` to get the `index_uid` of a batch of task if there is one
- `IndexMapper::create_or_open_index` now takes an additional `size` argument that allows to (re)open indexes with a size different from the base `IndexScheduler::index_size` field
- `IndexScheduler::tick` now returns a `Result<TickOutcome>` instead of a `Result<usize>`. This offers more explicit control over what the behavior should be wrt the next tick.
- Add `IndexStatus::BeingResized` that contains a handle that a thread can use to await for the resize operation to complete and the index to be available again.
- Add `IndexMapper::resize_index` to increase the size of an index.
- In `IndexScheduler::tick`, intercept task batches that failed due to `MaxDatabaseSizeReached` and resize the index that caused the error, then request a new tick that will eventually handle the still enqueued task.

## Testing the PR

The following diff can be applied to this branch to make testing the PR easier:

<details>


```diff
diff --git a/index-scheduler/src/index_mapper.rs b/index-scheduler/src/index_mapper.rs
index 553ab45a..022b2f00 100644
--- a/index-scheduler/src/index_mapper.rs
+++ b/index-scheduler/src/index_mapper.rs
`@@` -228,13 +228,15 `@@` impl IndexMapper {
 
         drop(lock);
 
+        std:🧵:sleep_ms(2000);
+
         let current_size = index.map_size()?;
         let closing_event = index.prepare_for_closing();
-        log::info!("Resizing index {} from {} to {} bytes", name, current_size, current_size * 2);
+        log::error!("Resizing index {} from {} to {} bytes", name, current_size, current_size * 2);
 
         closing_event.wait();
 
-        log::info!("Resized index {} from {} to {} bytes", name, current_size, current_size * 2);
+        log::error!("Resized index {} from {} to {} bytes", name, current_size, current_size * 2);
 
         let index_path = self.base_path.join(uuid.to_string());
         let index = self.create_or_open_index(&index_path, None, 2 * current_size)?;
`@@` -268,8 +270,10 `@@` impl IndexMapper {
             match index {
                 Some(Available(index)) => break index,
                 Some(BeingResized(ref resize_operation)) => {
+                    log::error!("waiting for resize end");
                     // Deadlock: no lock taken while doing this operation.
                     resize_operation.wait();
+                    log::error!("trying our luck again!");
                     continue;
                 }
                 Some(BeingDeleted) => return Err(Error::IndexNotFound(name.to_string())),
diff --git a/index-scheduler/src/lib.rs b/index-scheduler/src/lib.rs
index 11b17d05..242dc095 100644
--- a/index-scheduler/src/lib.rs
+++ b/index-scheduler/src/lib.rs
`@@` -908,6 +908,7 `@@` impl IndexScheduler {
     ///
     /// Returns the number of processed tasks.
     fn tick(&self) -> Result<TickOutcome> {
+        log::error!("ticking!");
         #[cfg(test)]
         {
             *self.run_loop_iteration.write().unwrap() += 1;
diff --git a/meilisearch/src/main.rs b/meilisearch/src/main.rs
index 050c825a..63f312f6 100644
--- a/meilisearch/src/main.rs
+++ b/meilisearch/src/main.rs
`@@` -25,7 +25,7 `@@` fn setup(opt: &Opt) -> anyhow::Result<()> {
 
 #[actix_web::main]
 async fn main() -> anyhow::Result<()> {
-    let (opt, config_read_from) = Opt::try_build()?;
+    let (mut opt, config_read_from) = Opt::try_build()?;
 
     setup(&opt)?;
 
`@@` -56,6 +56,8 `@@` We generated a secure master key for you (you can safely copy this token):
         _ => (),
     }
 
+    opt.max_index_size = byte_unit::Byte::from_str("1MB").unwrap();
+
     let (index_scheduler, auth_controller) = setup_meilisearch(&opt)?;
 
     #[cfg(all(not(debug_assertions), feature = "analytics"))]
```
</details>

Mainly, these debug changes do the following:

- Set the default index size to 1MiB so that index resizes are initially frequent
- Turn some logs from info to error so that they can be displayed with `--log-level ERROR` (hiding the other infos)
- Add a long sleep between the beginning and the end of the resize so that we can observe the `BeingResized` index status (otherwise it would never come up in my tests)

## Open questions

- Is the growth factor of x2 the correct solution? For a `Vec` in memory it makes sense, but here we're manipulating quantities that are potentially in the order of 500GiBs. For bigger indexes it may make more sense to add at most e.g. 100GiB on each resize operation, avoiding big steps like 500GiB -> 1TiB.

## PR checklist
Please check if your PR fulfills the following requirements:
- [ ] Does this PR fix an existing issue, or have you listed the changes applied in the PR description (and why they are needed)?
- [ ] Have you read the contributing guidelines?
- [ ] Have you made sure that the title is accurate and descriptive of the changes?

Thank you so much for contributing to Meilisearch!


3470: Autobatch addition and deletion r=irevoire a=irevoire

This PR adds the capability to meilisearch to batch document addition and deletion together.

Fix https://github.com/meilisearch/meilisearch/issues/3440

--------------

Things to check before merging;

- [x] What happens if we delete multiple time the same documents -> add a test
- [x] If a documentDeletion gets batched with a documentAddition but the index doesn't exist yet? It should not work

Co-authored-by: Louis Dureuil <louis@meilisearch.com>
Co-authored-by: Tamo <tamo@meilisearch.com>
2023-02-20 15:00:19 +00:00
23f4e82b53 Add test ensuring that Meilisearch works on kanji only requests 2023-02-20 15:43:29 +01:00
119e6d8811 Update milli/src/search/mod.rs
Co-authored-by: Tamo <tamo@meilisearch.com>
2023-02-20 15:33:10 +01:00
a8f6f108e0 Merge #3515
3515: Consider null as a valid geo field r=irevoire a=irevoire

Fix #3497
Associated spec; https://github.com/meilisearch/specifications/pull/222

Co-authored-by: Tamo <tamo@meilisearch.com>
2023-02-20 14:12:55 +00:00
1479050f7a apply review suggestions 2023-02-20 14:53:37 +01:00
97b8c32e22 Merge #3514
3514: Bump version of mini-dashboard to v0.2.6 r=irevoire a=bidoubiwa

Update the version of the mini-dashboard to v0.2.6.

See [release notes](https://github.com/meilisearch/mini-dashboard/releases/tag/v0.2.6).

Co-authored-by: Charlotte Vermandel <charlottevermandel@gmail.com>
2023-02-20 13:21:00 +00:00
cb8d5f2d4b Update Charabia to 0.7.1 2023-02-20 14:00:31 +01:00
35f6c624bc Make sure we don't leave the in memory hashmap in an inconsistent state 2023-02-20 13:55:32 +01:00
1116788475 Resize indexes when they're full 2023-02-20 13:55:32 +01:00
951a5b5832 Add IndexMapper::resize_index fn 2023-02-20 13:55:32 +01:00
1c670d7fa0 Add IndexStatus::BeingResized 2023-02-20 13:55:32 +01:00
6cc3797aa1 IndexScheduler::tick returns a TickOutcome 2023-02-20 13:55:31 +01:00
faf1e17a27 create_or_open_index takes a map_size argument 2023-02-20 13:55:31 +01:00
4c519c2ab3 Add Batch::index_uid 2023-02-20 13:55:31 +01:00
eb28d4c525 add facet test 2023-02-20 13:52:28 +01:00
9ac981d025 Remove some clippy type complexity warns by deboxing iters 2023-02-20 13:52:27 +01:00
74859ecd61 Add min and max facet stats 2023-02-20 13:52:27 +01:00
8ae441a4db Update usage of iterators 2023-02-20 13:52:27 +01:00
042d86cbb3 facet sort ascending/descending now also return the values 2023-02-20 13:52:27 +01:00
dd120e0e16 Bump version of mini-dashboard to v0.2.6 2023-02-20 13:45:57 +01:00
18796d6e6a Consider null as a valid geo object 2023-02-20 13:45:51 +01:00
c91bfeaf15 Merge #3467
3467: Identify builds git tagged with `prototype-...` in CLI and analytics r=curquiza a=dureuill

# Pull Request

## What does this PR do?

- Parses the last git tag to extract a prototype name if:
  - Current build uses the prototype tag (not after the tag) precisely
  - The prototype tag name respects the following conditions:
    1. starts with `prototype-`
    2. ends with a number
    3. the hyphen-separated segment right before the number is not a number (required to reject commits after the tag).
- Display the prototype name in the launch summary in the CLI
- Send the prototype name to analytics if any
- Update prototypes instructions in CONTRIBUTING.md

|`VERGEN_GIT_SEMVER_LIGHTWEIGHT` value | Prototype |
|---|---|
| `Some("prototype-geo-bounding-box-0-139-gcde89018")` | `None` (does not end with a number) |
| `Some("prototype-geo-bounding-box-0-139-89018")` | `None` (before the last segment is a number) |
| `Some("prototype-geo-bounding-box-0")` | `Some("prototype-geo-bounding-box-0")` |
| `Some("prototype-geo-bounding-box")` | `None` (does not end with a number") |
| `Some("geo-bounding-box-0")` | `None` (does not start with "prototype") |
| `None` | `None` | 

Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2023-02-20 09:27:51 +00:00
91048d209d Fix metrics feature
Metrics feature was relying on old references. Refactored with inspiration from the `get_stats` method in `meilisearch/src/routes/lib.rs`. `enable_metrics_routes` added to options in `segment_analytics`.

Resolves: #3469
See also: #2763
2023-02-17 20:11:57 +00:00
28961b2ad1 Merge #3499
3499: Use the workspace inheritance r=Kerollmops a=irevoire

Use the workspace inheritance [introduced in rust 1.64](https://blog.rust-lang.org/2022/09/22/Rust-1.64.0.html#cargo-improvements-workspace-inheritance-and-multi-target-builds).

It allows us to define the version of meilisearch once in the main `Cargo.toml` and let all the other `Cargo.toml` uses this version.

`@curquiza` I added you as a reviewer because I had to patch some CI scripts

And `@Kerollmops,` I had to bump the `cargo_toml` crates because our version was getting old and didn't support the feature yet.

Also, in another PR, I would like to unify some of our dependencies to ensure we always stay in sync between all our crates.

Co-authored-by: Tamo <tamo@meilisearch.com>
2023-02-17 09:52:29 +00:00
895ab2906c apply review suggestions 2023-02-16 18:42:47 +01:00
f11c7d4b62 cargo run execute meilisearch by default 2023-02-16 18:03:45 +01:00
e79f6f87f6 make cargo fmt&clippy happy 2023-02-16 18:00:40 +01:00
5367d8f05a add two tests on the indexing of csvs 2023-02-16 17:37:11 +01:00
52686da028 test various error on the document ressource 2023-02-16 17:37:10 +01:00
8c074f5028 implements the csv delimiter without tests
Co-authored-by: Maxi Barmetler <maxi.barmetler@gmail.com>
2023-02-16 17:35:36 +01:00
49e18da23e Do not escape tag name
$() syntax is not interpreted by the Dockerfile
2023-02-16 10:53:14 +01:00
54240db495 Add note in code so one does not forget next time 2023-02-16 10:53:14 +01:00
e1ed4bc750 Change Dockerfile to also pass the VERGEN_GIT_SEMVER_LIGHTWEIGHT when building 2023-02-16 10:53:14 +01:00
9bd1cfb3a3 Ignore -dirty flag 2023-02-16 10:53:14 +01:00
a341c94871 Update contributing.md 2023-02-16 10:53:14 +01:00
f46cf46b8c Add prototype to analytics if any 2023-02-16 10:53:14 +01:00
c3a30a5a91 If using a prototype, display its name at Meilisearch startup 2023-02-16 10:53:14 +01:00
143e3cf948 Merge #3490
3490: Fix attributes set candidates r=curquiza a=ManyTheFish

# Pull Request

Fix attributes set candidates for v1.1.0

## details

The attribute criterion was not returning the remaining candidates when its internal algorithm was been exhausted.
We had a loss of candidates by the attribute criterion leading to the bug reported in the issue linked below.
After some investigation, it seems that it was the only criterion that had this behavior.

We are now returning the remaining candidates instead of an empty bitmap.

## Related issue

Fixes #3483
PR on milli for v1.0.1: https://github.com/meilisearch/milli/pull/777


Co-authored-by: ManyTheFish <many@meilisearch.com>
2023-02-15 17:38:07 +00:00
ab2adba183 update our CI scripts accordingly 2023-02-15 13:56:24 +01:00
74d1a67a99 Use the workspace inheritance feature of rust 1.64 2023-02-15 13:51:07 +01:00
91ce8a5e67 Merge #3492
3492: Bump deserr r=Kerollmops a=irevoire

Bump deserr to the latest version;
- We now use the default actix-web extractors that deserr provides (which were copy/pasted from meilisearch)
- We also use the default `JsonError` message provided by deserr instead of defining our own in meilisearch
- Finally, we get the new `did you mean?` error message. Fix #3493

Co-authored-by: Tamo <tamo@meilisearch.com>
2023-02-15 10:05:05 +00:00
fd7ae1883b Merge #3495
3495: Add tests with rust nightly in CI r=curquiza a=ztkmkoo

# Pull Request

## Related issue
Fixes #3402 

## What does this PR do?
- add ci test with rust nightly
- make test with rust stable not run on schedule event

## PR checklist
Please check if your PR fulfills the following requirements:
- [x] Does this PR fix an existing issue, or have you listed the changes applied in the PR description (and why they are needed)?
- [x] Have you read the contributing guidelines?
- [x] Have you made sure that the title is accurate and descriptive of the changes?

Thank you so much for contributing to Meilisearch!


Co-authored-by: Kebron <ztkmkoo@gmail.com>
2023-02-15 07:53:17 +00:00
42a3cdca66 get rids of the unwrap_any function in favor of take_cf_content 2023-02-14 20:06:31 +01:00
a43765d454 use the pre-defined deserr extractors 2023-02-14 20:05:30 +01:00
769576fd94 get rids of the whole error_message module since it has been integrated into the last version of deserr 2023-02-14 20:05:27 +01:00
8fb7b1d10f bump deserr 2023-02-14 20:04:30 +01:00
d494c29768 Merge #3479
3479: Unify "Bad latitude" & "Bad longitude" errors r=irevoire a=cymruu

# Pull Request

## Related issue
Fix part of #3006

## What does this PR do?
- Moved out `BadGeoLat`, `BadGeoLng`, `BadGeoBoundingBoxTopIsBelowBottom` from `FilterError` into newly introduced error type `ParseGeoError`. 
- Renamed `BadGeo` error  to `ReservedGeo`
- Used new `ParseGeoError` type in `FilterError` and `AscDescError`

Screenshot: 
![image](https://user-images.githubusercontent.com/2981598/217927231-fe23b6a3-2ea8-4145-98af-38eb61c4ff16.png)

I ran `cargo test --package milli -- --test-threads 1` and tests passed.
`--test-threads` was set to 1 because my OS complained about too many opened files.

## PR checklist
Please check if your PR fulfills the following requirements:
- [x] Does this PR fix an existing issue, or have you listed the changes applied in the PR description (and why they are needed)?
- [x] Have you read the contributing guidelines?
- [ ] Have you made sure that the title is accurate and descriptive of the changes?


Co-authored-by: Filip Bachul <filipbachul@gmail.com>
Co-authored-by: filip <filipbachul@gmail.com>
2023-02-14 18:35:51 +00:00
74dcfe9676 Fix a bug when you update a document that was already present in the db, deleted and then inserted again in the same transform 2023-02-14 19:09:40 +01:00
1b1703a609 make a small optimization to merge obkvs a little bit faster 2023-02-14 18:32:41 +01:00
62358bd31c Fix metrics feature
As reported the metrics feature was broken by still using and old reference to `meilisearch_auth::actions`. This commit switches to the new location, `meilisearch_types::keys::actions`.

Resolves: #3469
See also: #2763
2023-02-14 17:29:38 +00:00
fb5e4957a6 fix and test the early exit in case a grenad ends with a deletion 2023-02-14 18:23:57 +01:00
8de3c9f737 Update milli/src/update/index_documents/transform.rs
Co-authored-by: Clément Renault <clement@meilisearch.com>
2023-02-14 17:57:14 +01:00
43a19d0709 document the operation enum + the grenads 2023-02-14 17:55:26 +01:00
29d14bed90 get rids of the let/else syntax 2023-02-14 17:45:46 +01:00
f3b54337f9 Merge #3174
3174: Allow wildcards at the end of index names for API Keys and Tenant tokens r=irevoire a=Kerollmops

This PR introduces the wildcards at the end of the index names when identifying indexes in the API Keys and tenant tokens. It fixes #2788 and fixes #2908. This PR is based on `@akhildevelops'` work.

Note that when a tenant token filter is chosen to restrict a search, it is always the most restrictive pattern that is chosen. If we have an index pattern _prod*_ that defines _filter1_ and _p*_ that defines _filter2_, the engine will choose _filter1_ over _filter2_ as it is defined for a most restrictive pattern, _prod*_. This restrictiveness is defined by 1. is it exact, without _*_ 2. the length of the pattern.

It is a continuation of work that has already started and should close #2869.

Co-authored-by: Clément Renault <clement@meilisearch.com>
Co-authored-by: Kerollmops <clement@meilisearch.com>
2023-02-14 16:12:01 +00:00
7f3ae40204 Remove a useless comment regarding the index pattern error code 2023-02-14 17:09:20 +01:00
a53536836b fmt 2023-02-14 17:04:22 +01:00
b095325bf8 Add tests with rust nightly in CI 2023-02-14 15:33:12 +00:00
d7ad39ad77 fix: clippy error 2023-02-14 00:15:35 +01:00
849de089d2 add thiserror for AscDescError 2023-02-14 00:15:35 +01:00
7f25007d31 Update milli/src/asc_desc.rs
Co-authored-by: Tamo <irevoire@protonmail.ch>
2023-02-14 00:15:35 +01:00
c810af3ebf implement From<ParseGeoError> for AscDescError 2023-02-14 00:15:35 +01:00
c0b77773ba fmt asc_desc 2023-02-14 00:15:35 +01:00
7481559e8b move BadGeo to FilterError 2023-02-14 00:15:35 +01:00
83c765ce6c implement From<ParseGeoError> for FilterError 2023-02-14 00:15:35 +01:00
4c91037602 use ParseGeoError in sort parser 2023-02-14 00:15:35 +01:00
825923f6fc export ParseGeoError 2023-02-14 00:15:35 +01:00
e405702733 chore: introduce new error ParseGeoError type 2023-02-14 00:15:35 +01:00
6fa877efb0 Fix attributes set candidates 2023-02-13 17:49:52 +01:00
4b1cd10653 Return an internal error when index pattern should be valid 2023-02-13 17:49:42 +01:00
47748395dc Update an authentication comment
Co-authored-by: Many the fish <many@meilisearch.com>
2023-02-13 17:20:08 +01:00
ff595156d7 Merge #3480
3480: Gitignore vscode & jetbrains IDE folders r=curquiza a=AymanHamdoun

# Pull Request

## Related issue
There is no issue for it, and i couldn't find an appropriate category to make an issue for it.

## What does this PR do?
- Its just a gitignore edit so people who use vscode and jetbrains IDEs (like IntelliJ) dont have to deal with committing the folder the IDE generates to store local project configs by mistake. (I honestly wanted to fork the repo to add something else but this bothered me enough to make a PR for it first) 

## PR checklist
Please check if your PR fulfills the following requirements:
- [ ✔️ ] Does this PR fix an existing issue, or have you listed the changes applied in the PR description (and why they are needed)?
- [✔️  ] Have you read the contributing guidelines?
- [✔️  ] Have you made sure that the title is accurate and descriptive of the changes?

Thank you so much for contributing to Meilisearch!


Co-authored-by: Ayman <ayman.s.hamdoun@gmail.com>
2023-02-13 10:47:25 +00:00
8770088df3 remove idea folder 2023-02-10 11:45:02 +04:00
827c1c8447 edit gitignore to ignore .idea and .vscode folders 2023-02-10 11:42:19 +04:00
764df24b7d Make clippy happy (again) 2023-02-09 13:21:20 +01:00
4570d5bf3a Merge remote-tracking branch 'origin/main' into temp-wildcard 2023-02-09 13:14:05 +01:00
746b31c1ce makes clippy happy 2023-02-09 12:23:01 +01:00
eaad84bd1d fix the test to handle the document deletion correctly 2023-02-09 11:29:13 +01:00
c690c4fec4 Added and modified the current API Key and Tenant Token tests 2023-02-09 11:17:30 +01:00
ea9ac46f28 stop autobatching the deletion without the index creation right with the addition 2023-02-08 21:24:27 +01:00
93db755d57 add a test to ensure we handle correctly a deletion of multiple time the same document 2023-02-08 21:03:34 +01:00
93f130a400 fix all warnings 2023-02-08 20:57:35 +01:00
860c993ef7 Handle the autobatching of deletion and addition in the scheduler 2023-02-08 20:53:19 +01:00
67dda0678f cleanup the autobatcher a little bit 2023-02-08 18:10:59 +01:00
2db6347686 update the autobatcher to batch the addition and deletion together 2023-02-08 18:07:59 +01:00
421a9cf05e provide a new method on the transform to remove documents 2023-02-08 16:06:09 +01:00
7b4b57ecc8 Fix the current tests 2023-02-08 14:54:05 +01:00
8f64fba1ce rewrite the current transform to handle a new byte specifying the kind of operation it's merging 2023-02-08 12:53:38 +01:00
9882029fa4 Merge #3456
3456: Bump tokio from 1.24.1 to 1.24.2 r=curquiza a=dependabot[bot]

Bumps [tokio](https://github.com/tokio-rs/tokio) from 1.24.1 to 1.24.2.
<details>
<summary>Commits</summary>
<ul>
<li>See full diff in <a href="https://github.com/tokio-rs/tokio/commits">compare view</a></li>
</ul>
</details>
<br />


[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=tokio&package-manager=cargo&previous-version=1.24.1&new-version=1.24.2)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores)

Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting ``@dependabot` rebase`.

[//]: # (dependabot-automerge-start)
[//]: # (dependabot-automerge-end)

---

<details>
<summary>Dependabot commands and options</summary>
<br />

You can trigger Dependabot actions by commenting on this PR:
- ``@dependabot` rebase` will rebase this PR
- ``@dependabot` recreate` will recreate this PR, overwriting any edits that have been made to it
- ``@dependabot` merge` will merge this PR after your CI passes on it
- ``@dependabot` squash and merge` will squash and merge this PR after your CI passes on it
- ``@dependabot` cancel merge` will cancel a previously requested merge and block automerging
- ``@dependabot` reopen` will reopen this PR if it is closed
- ``@dependabot` close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually
- ``@dependabot` ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself)
- ``@dependabot` ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself)
- ``@dependabot` ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)
You can disable automated security fix PRs for this repo from the [Security Alerts page](https://github.com/meilisearch/meilisearch/network/alerts).

</details>

Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2023-02-07 13:28:42 +00:00
5f56e6dd58 Bump tokio from 1.24.1 to 1.24.2
Bumps [tokio](https://github.com/tokio-rs/tokio) from 1.24.1 to 1.24.2.
- [Release notes](https://github.com/tokio-rs/tokio/releases)
- [Commits](https://github.com/tokio-rs/tokio/commits)

---
updated-dependencies:
- dependency-name: tokio
  dependency-type: direct:production
...

Signed-off-by: dependabot[bot] <support@github.com>
2023-02-07 12:14:05 +00:00
c88c3637b4 Merge #3461
3461: Bring v1 changes into main r=curquiza a=Kerollmops

Also bring back changes in milli (the remote repository) into main done during the pre-release

Co-authored-by: Loïc Lecrenier <loic.lecrenier@me.com>
Co-authored-by: bors[bot] <26634292+bors[bot]@users.noreply.github.com>
Co-authored-by: curquiza <curquiza@users.noreply.github.com>
Co-authored-by: Tamo <tamo@meilisearch.com>
Co-authored-by: Philipp Ahlner <philipp@ahlner.com>
Co-authored-by: Kerollmops <clement@meilisearch.com>
2023-02-07 11:27:27 +00:00
97fd9ac493 Merge #3405
3405: Implement geo bounding box r=irevoire a=curquiza

Following https://github.com/meilisearch/milli/pull/672 (work from `@gmourier)`

Fixes #2761

Co-authored-by: Guillaume Mourier <guillaume@meilisearch.com>
Co-authored-by: Louis Dureuil <louis@meilisearch.com>
Co-authored-by: Tamo <tamo@meilisearch.com>
2023-02-07 09:55:20 +00:00
821d92b5d0 Merge #3407
3407: Add Cargo feature for LMDB's POSIX semaphores r=dureuill a=GregoryConrad

See https://github.com/meilisearch/milli/pull/757

Co-authored-by: Gregory Conrad <gregorysconrad@gmail.com>
2023-02-07 08:25:20 +00:00
0b60928cbc Merge #3199
3199: Fixup dumps-destination -> dump-directory section header in help link r=curquiza a=dureuill

# Pull Request

## Related issue
See https://github.com/meilisearch/product/discussions/560#discussioncomment-4323938

## What does this PR do?
- change link in help message to the future new section header #dump-directory

Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2023-02-06 17:49:32 +00:00
42114325cd Apply suggestions from code review
Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2023-02-06 18:07:00 +01:00
7a38fe624f throw an error if the top left corner is found below the bottom right corner 2023-02-06 17:50:47 +01:00
1b005f697d update the syntax of the geoboundingbox filter to uses brackets instead of parens around lat and lng 2023-02-06 16:50:27 +01:00
fbec48f56e Merge remote-tracking branch 'milli/main' into bring-v1-changes 2023-02-06 16:48:10 +01:00
a377a49218 Make meiliserach depend on the local milli 2023-02-06 16:44:43 +01:00
41cbaad1cb Revert "Add git config about ownershio in Docker CI"
This reverts commit e269027cdd.
2023-02-06 16:42:16 +01:00
a015e232ab Merge remote-tracking branch 'origin/release-v1.0.0' into bring-v1-changes 2023-02-06 16:41:10 +01:00
3ebc99473f Apply suggestions from code review
Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2023-02-06 13:29:37 +01:00
fadea504ed Merge #3451
3451: Pin Rust version in Clippy job r=dureuill a=curquiza

Avoid "surprising" CI failure because of clippy when rust is releasing a new version

Co-authored-by: curquiza <clementine@meilisearch.com>
Co-authored-by: Clémentine Urquizar - curqui <clementine@meilisearch.com>
2023-02-06 12:19:35 +00:00
d27007005e comments the geoboundingbox + forbid the usage of the lexeme method which could introduce bugs 2023-02-06 11:36:49 +01:00
fcb09ccc3d add tests on the geoBoundingBox 2023-02-02 18:19:56 +01:00
734a9ecea8 Merge #3040
3040: feat: create a preview environment for every PR using Uffizzi r=curquiza a=waveywaves

# Pull Request

## Related discussion (was created as an issue initially)
https://github.com/meilisearch/meilisearch/discussions/2883

## What does this PR do?
This PR adds gha workflows to create preview environments on every PR. This workflow also posts the preview url as a comment on the PR.
[This PR created against my fork of meilisearch](https://github.com/waveywaves/meilisearch/pull/2) demonstrates how this change behaves. 

In [the demo preview](https://pr-2-deployment-7396-meilisearch.app.uffizzi.com/) you can run the `meilisearch` binary built from the PR and can access meilisearch running from the PR by adding `/meilisearch` to the url of the PR.

eg: I go to the demo preview at the URL https://app.uffizzi.com/github.com/waveywaves/meilisearch/pull/2, run `meilisearch` in the terminal. I can access this running instance of `meilisearch` in the preview env fromhttps://pr-2-deployment-7396-meilisearch.app.uffizzi.com/meilisearch

## PR checklist
Please check if your PR fulfills the following requirements:
- [x] Does this PR fix an existing issue, or have you listed the changes applied in the PR description (and why they are needed)?
- [ ] Have you read the contributing guidelines?
- [x] Have you made sure that the title is accurate and descriptive of the changes?

Thank you so much for contributing to Meilisearch!


Co-authored-by: Vibhav Bobade <vibhav.bobde@gmail.com>
2023-02-02 16:06:38 +00:00
69fcd3d05e Add comment information about the cron job 2023-02-02 15:58:03 +01:00
1ca7778e6a Update .github/workflows/create-issue-dependencies.yml
Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2023-02-02 15:54:33 +01:00
a11d992923 Update issue description for the dependency updates 2023-02-02 15:33:38 +01:00
781691191a Pin Rust version in Clippy job 2023-02-02 15:22:58 +01:00
ae8660e585 Add Token::original_span rather than making Token::span pub 2023-02-02 15:03:34 +01:00
d80ce00623 Update insta test 2023-02-02 12:34:51 +01:00
2d66fdc8e9 Apply review comments 2023-02-02 12:34:51 +01:00
b297b5deb0 cargo fmt 2023-02-02 12:34:49 +01:00
0d71c80ba6 add tests 2023-02-02 12:31:27 +01:00
b2054d3f6c Add insta test on geo filters whitespacing 2023-02-02 12:27:58 +01:00
65a3086cf1 fix test 2023-02-02 12:27:58 +01:00
426d63b01b Update insta test suite 2023-02-02 12:27:56 +01:00
b078477d80 Add error handling and earth lap collision with bounding box 2023-02-02 12:17:38 +01:00
5c525168a0 Add _geoBoundingBox parser 2023-02-02 11:57:21 +01:00
39b62b7158 Merge #3436
3436: Add more detailed contribution instructions for tests r=irevoire a=dureuill



Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2023-02-02 10:19:41 +00:00
3f97f630ed Merge #3448
3448: Bump docker/build-push-action from 3 to 4 r=curquiza a=dependabot[bot]

Bumps [docker/build-push-action](https://github.com/docker/build-push-action) from 3 to 4.
<details>
<summary>Release notes</summary>
<p><em>Sourced from <a href="https://github.com/docker/build-push-action/releases">docker/build-push-action's releases</a>.</em></p>
<blockquote>
<h2>v4.0.0</h2>
<blockquote>
<p><strong>Note</strong></p>
<p>Buildx v0.10 enables support for a minimal <a href="https://slsa.dev/provenance/">SLSA Provenance</a> attestation, which requires support for <a href="https://github.com/opencontainers/image-spec">OCI-compliant</a> multi-platform images. This may introduce issues with registry and runtime support (e.g. <a href="https://github-redirect.dependabot.com/docker/buildx/issues/1533">Google Cloud Run and AWS Lambda</a>). You can optionally disable the default provenance attestation functionality using <code>provenance: false</code>.</p>
</blockquote>
<ul>
<li>Revert disable provenance by default if not set by <a href="https://github.com/crazy-max"><code>`@​crazy-max</code></a>` in <a href="https://github-redirect.dependabot.com/docker/build-push-action/pull/784">docker/build-push-action#784</a></li>
</ul>
<p><strong>Full Changelog</strong>: <a href="https://github.com/docker/build-push-action/compare/v3.3.1...v4.0.0">https://github.com/docker/build-push-action/compare/v3.3.1...v4.0.0</a></p>
<h2>v3.3.1</h2>
<ul>
<li>Disable provenance by default if not set by <a href="https://github.com/crazy-max"><code>`@​crazy-max</code></a>` (<a href="https://github-redirect.dependabot.com/docker/build-push-action/issues/781">#781</a>)</li>
</ul>
<p><strong>Full Changelog</strong>: <a href="https://github.com/docker/build-push-action/compare/v3.3.0...v3.3.1">https://github.com/docker/build-push-action/compare/v3.3.0...v3.3.1</a></p>
<h2>v3.3.0</h2>
<blockquote>
<p><strong>Note</strong></p>
<p>Buildx v0.10 enables support for a minimal <a href="https://slsa.dev/provenance/">SLSA Provenance</a> attestation, which requires support for <a href="https://github.com/opencontainers/image-spec">OCI-compliant</a> multi-platform images. This may introduce issues with registry and runtime support (e.g. <a href="https://github-redirect.dependabot.com/docker/buildx/issues/1533">Google Cloud Run and AWS Lambda</a>). You can optionally disable the default provenance attestation functionality using <code>provenance: false</code>.</p>
</blockquote>
<ul>
<li>Add <code>attests</code>, <code>provenance</code> and <code>sbom</code> inputs by <a href="https://github.com/crazy-max"><code>`@​crazy-max</code></a>` (<a href="https://github-redirect.dependabot.com/docker/build-push-action/issues/746">#746</a> <a href="https://github-redirect.dependabot.com/docker/build-push-action/issues/759">#759</a>)</li>
<li>Log GitHub Actions runtime token access controls by <a href="https://github.com/crazy-max"><code>`@​crazy-max</code></a>` (<a href="https://github-redirect.dependabot.com/docker/build-push-action/issues/707">#707</a>)</li>
<li>Examples moved to <a href="https://docs.docker.com/build/ci/github-actions/examples/">docs website</a> by <a href="https://github.com/crazy-max"><code>`@​crazy-max</code></a>` (<a href="https://github-redirect.dependabot.com/docker/build-push-action/issues/718">#718</a>)</li>
<li>Bump minimatch from 3.0.4 to 3.1.2 (<a href="https://github-redirect.dependabot.com/docker/build-push-action/issues/732">#732</a>)</li>
<li>Bump csv-parse from 5.3.0 to 5.3.3 (<a href="https://github-redirect.dependabot.com/docker/build-push-action/issues/729">#729</a>)</li>
<li>Bump json5 from 2.2.0 to 2.2.3 (<a href="https://github-redirect.dependabot.com/docker/build-push-action/issues/749">#749</a>)</li>
</ul>
<p><strong>Full Changelog</strong>: <a href="https://github.com/docker/build-push-action/compare/v3.2.0...v3.3.0">https://github.com/docker/build-push-action/compare/v3.2.0...v3.3.0</a></p>
<h2>v3.2.0</h2>
<ul>
<li>Remove workaround for <code>setOutput</code> by <a href="https://github.com/crazy-max"><code>`@​crazy-max</code></a>` (<a href="https://github-redirect.dependabot.com/docker/build-push-action/issues/704">#704</a>)</li>
<li>Docs: fix Git context link and add more details about subdir support by <a href="https://github.com/crazy-max"><code>`@​crazy-max</code></a>` (<a href="https://github-redirect.dependabot.com/docker/build-push-action/issues/685">#685</a>)</li>
<li>Docs: named context by <a href="https://github.com/baibaratsky"><code>`@​baibaratsky</code></a>` and <a href="https://github.com/crazy-max"><code>`@​crazy-max</code></a>` (<a href="https://github-redirect.dependabot.com/docker/build-push-action/issues/665">#665</a>)</li>
<li>Bump <code>`@​actions/core</code>` from 1.9.0 to 1.10.0 (<a href="https://github-redirect.dependabot.com/docker/build-push-action/issues/667">#667</a> <a href="https://github-redirect.dependabot.com/docker/build-push-action/issues/695">#695</a>)</li>
<li>Bump <code>`@​actions/github</code>` from 5.0.3 to 5.1.1 (<a href="https://github-redirect.dependabot.com/docker/build-push-action/issues/696">#696</a>)</li>
</ul>
<p><strong>Full Changelog</strong>: <a href="https://github.com/docker/build-push-action/compare/v3.1.1...v3.2.0">https://github.com/docker/build-push-action/compare/v3.1.1...v3.2.0</a></p>
<h2>v3.1.1</h2>
<ul>
<li>Fix GitHub token not passed with Git context if subdir defined by <a href="https://github.com/crazy-max"><code>`@​crazy-max</code></a>` (<a href="https://github-redirect.dependabot.com/docker/build-push-action/issues/663">#663</a>)</li>
<li>Replace deprecated <code>fs.rmdir</code> with <code>fs.rm</code> by <a href="https://github.com/bendrucker"><code>`@​bendrucker</code></a>` (<a href="https://github-redirect.dependabot.com/docker/build-push-action/issues/657">#657</a>)</li>
</ul>
<p><strong>Full Changelog</strong>: <a href="https://github.com/docker/build-push-action/compare/v3.1.0...v3.1.1">https://github.com/docker/build-push-action/compare/v3.1.0...v3.1.1</a></p>
<h2>v3.1.0</h2>
<ul>
<li><code>no-cache-filters</code> input by <a href="https://github.com/crazy-max"><code>`@​crazy-max</code></a>` (<a href="https://github-redirect.dependabot.com/docker/build-push-action/issues/653">#653</a>)</li>
<li>Bump <code>`@​actions/github</code>` from 5.0.1 to 5.0.3 (<a href="https://github-redirect.dependabot.com/docker/build-push-action/issues/619">#619</a>)</li>
<li>Bump <code>`@​actions/core</code>` from 1.6.0 to 1.9.0 (<a href="https://github-redirect.dependabot.com/docker/build-push-action/issues/620">#620</a> <a href="https://github-redirect.dependabot.com/docker/build-push-action/issues/637">#637</a>)</li>
<li>Bump csv-parse from 5.0.4 to 5.3.0 (<a href="https://github-redirect.dependabot.com/docker/build-push-action/issues/623">#623</a> <a href="https://github-redirect.dependabot.com/docker/build-push-action/issues/650">#650</a>)</li>
</ul>
<p><strong>Full Changelog</strong>: <a href="https://github.com/docker/build-push-action/compare/v3.0.0...v3.1.0">https://github.com/docker/build-push-action/compare/v3.0.0...v3.1.0</a></p>
</blockquote>
</details>
<details>
<summary>Commits</summary>
<ul>
<li><a href="3b5e8027fc"><code>3b5e802</code></a> Merge pull request <a href="https://github-redirect.dependabot.com/docker/build-push-action/issues/784">#784</a> from crazy-max/enable-provenance</li>
<li><a href="02d3266a89"><code>02d3266</code></a> update generated content</li>
<li><a href="f403dafe18"><code>f403daf</code></a> revert disable provenance by default if not set</li>
<li>See full diff in <a href="https://github.com/docker/build-push-action/compare/v3...v4">compare view</a></li>
</ul>
</details>
<br />


[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=docker/build-push-action&package-manager=github_actions&previous-version=3&new-version=4)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores)

You can trigger a rebase of this PR by commenting ``@dependabot` rebase`.

[//]: # (dependabot-automerge-start)
[//]: # (dependabot-automerge-end)

---

<details>
<summary>Dependabot commands and options</summary>
<br />

You can trigger Dependabot actions by commenting on this PR:
- ``@dependabot` rebase` will rebase this PR
- ``@dependabot` recreate` will recreate this PR, overwriting any edits that have been made to it
- ``@dependabot` merge` will merge this PR after your CI passes on it
- ``@dependabot` squash and merge` will squash and merge this PR after your CI passes on it
- ``@dependabot` cancel merge` will cancel a previously requested merge and block automerging
- ``@dependabot` reopen` will reopen this PR if it is closed
- ``@dependabot` close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually
- ``@dependabot` ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself)
- ``@dependabot` ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself)
- ``@dependabot` ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)


</details>

Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2023-02-01 18:13:15 +00:00
0bc1a18f52 Use Languages list detected during indexing at search time 2023-02-01 18:57:43 +01:00
643d99e0f9 Add expectancy test 2023-02-01 18:39:54 +01:00
a36b1dbd70 Fix the tasks with the new patterns 2023-02-01 18:21:45 +01:00
5672165e44 Bump docker/build-push-action from 3 to 4
Bumps [docker/build-push-action](https://github.com/docker/build-push-action) from 3 to 4.
- [Release notes](https://github.com/docker/build-push-action/releases)
- [Commits](https://github.com/docker/build-push-action/compare/v3...v4)

---
updated-dependencies:
- dependency-name: docker/build-push-action
  dependency-type: direct:production
  update-type: version-update:semver-major
...

Signed-off-by: dependabot[bot] <support@github.com>
2023-02-01 17:02:17 +00:00
d563ed8a39 Making it work with index uid patterns 2023-02-01 17:51:30 +01:00
36cae3b480 Merge #3399
3399: Rework technical information in the README r=Kerollmops a=curquiza

Following this https://github.com/meilisearch/meilisearch/pull/3346#discussion_r1073289399

Co-authored-by: curquiza <clementine@meilisearch.com>
Co-authored-by: Clémentine Urquizar - curqui <clementine@meilisearch.com>
2023-02-01 14:34:55 +00:00
064158e4e2 Update test 2023-02-01 15:34:01 +01:00
77d32d0ee8 Fix codec deserialization 2023-02-01 15:26:26 +01:00
f4569b04ad Update Charabia version 2023-02-01 15:26:26 +01:00
5e12af88e2 Merge #3445
3445: Bump milli to v0.41.1 r=curquiza a=dureuill

# Pull Request

## Related issue

Fixes #3438.

## What does this PR do?
- Bump milli to [v0.41.1](https://github.com/meilisearch/milli/releases/tag/v0.41.1) that includes a bugfix for #3438 

Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2023-02-01 11:07:46 +00:00
231067a1c4 Bump milli to v0.41.1 2023-02-01 11:53:39 +01:00
2a1a7ef00a Integrate Uffizzi 2023-02-01 13:06:27 +05:30
758b4acea7 Merge #776
776: Reduce incremental indexing time of `words_prefix_position_docids` DB r=curquiza a=loiclec

Fixes partially https://github.com/meilisearch/milli/issues/605

The `words_prefix_position_docids` can easily contain millions of entries. Thus, iterating
over it can be very expensive. But we do so needlessly for every document addition tasks.

It can sometimes cause indexing performance issues when :
- a user sends many `documentAdditionOrUpdate` tasks that cannot be all batched together (for example if they are interspersed with `documentDeletion` tasks)
- the documents contain long, diverse text fields, thus increasing the number of entries in `words_prefix_position_docids`
- the index has accumulated many soft-deleted documents, further increasing the size of `words_prefix_position_docids`
- the machine running Meilisearch does not have great IO performance (e.g. slow SSD, or quota-limited by the cloud provider)

Note, before approving  the PR: the only changed file should be `milli/src/update/words_prefix_position_docids.rs`.

Co-authored-by: Loïc Lecrenier <loic.lecrenier@me.com>
2023-01-31 15:52:28 +00:00
20f8184c06 Merge #3441
3441: Fix import of dump v2 r=dureuill a=irevoire

# Pull Request
This bug was introduced because of a mistake we did earlier: We said the last version to export dump v2 was the v0.21.0 while it was the v0.22.0.
To fix the bug I updated our whole v2 reader to use the code from meilisearch v0.22.0.
Also:
- Import the bugged dump in the tests
- Test the import of this dump in the v2 reader and current reader

## Related issue
Fixes #3435


Co-authored-by: Tamo <tamo@meilisearch.com>
2023-01-31 13:23:57 +00:00
2f8ebd0501 Merge #3439
3439: Add git config about ownership in Docker CI r=curquiza a=curquiza

The docker CI si failing because of git usage: https://github.com/meilisearch/meilisearch/actions/runs/4053334082/jobs/6973827940

<img width="960" alt="Capture d’écran 2023-01-31 à 12 12 44" src="https://user-images.githubusercontent.com/20380692/215745119-b866bcf2-7077-48e4-b018-7a2085b23680.png">


> fatal: detected dubious ownership in repository at '/home/meili/actions-runner/_work/meilisearch/meilisearch'

I made some research and I found out this https://github.com/actions/runner-images/issues/6775

Co-authored-by: curquiza <clementine@meilisearch.com>
2023-01-31 12:58:59 +00:00
6be9a828fa makes clippy happy 2023-01-31 13:03:28 +01:00
4b7b2d6a90 fix the import of dump v2 generated by meilisearch v0.22.0 2023-01-31 13:03:28 +01:00
a4e8158239 Merge #774
774: Update version for the next release (v0.41.1) in Cargo.toml files r=curquiza a=meili-bot

⚠️ This PR is automatically generated. Check the new version is the expected one before merging.

Co-authored-by: curquiza <curquiza@users.noreply.github.com>
2023-01-31 11:51:42 +00:00
151e52c481 Merge #3433
3433: Add prototype guide to CONTRIBUTING.md r=curquiza a=curquiza



Co-authored-by: curquiza <clementine@meilisearch.com>
Co-authored-by: Clémentine Urquizar - curqui <clementine@meilisearch.com>
2023-01-31 11:25:46 +00:00
e269027cdd Add git config about ownershio in Docker CI 2023-01-31 12:04:41 +01:00
a2690ea8d4 Reduce incremental indexing time of words_prefix_position_docids DB
This database can easily contain millions of entries. Thus, iterating
over it can be very expensive.

For regular `documentAdditionOrUpdate` tasks, `del_prefix_fst_words`
will always be empty. Thus, we can save a significant amount of time
by adding this `if !del_prefix_fst_words.is_empty()` condition.

The code's behaviour remains completely unchanged.
2023-01-31 11:42:24 +01:00
33f61d2cd4 Merge #775
775: Fix clippy for Rust 1.67, allow `uninlined_format_args` r=dureuill a=dureuill

# Pull Request

milli part of https://github.com/meilisearch/meilisearch/pull/3437

Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2023-01-31 10:29:24 +00:00
544b581b15 Merge #3437
3437: Make clippy happy for Rust 1.67, allow uninlined_format_args r=Kerollmops a=dureuill

# Pull Request

This PR is the equivalent of #3434 for the `release-v1.0.0` branch.

See #3434 for more information.

Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2023-01-31 10:29:12 +00:00
2922c5c899 Fix code format 2023-01-31 11:28:05 +01:00
7681be5367 Format code 2023-01-31 11:28:05 +01:00
50bc156257 Fix tests 2023-01-31 11:28:05 +01:00
d8207356f4 Skip script,language insertion if language is undetected 2023-01-31 11:28:05 +01:00
2d58b28f43 Improve script language codec 2023-01-31 11:28:05 +01:00
fd60a39f1c Format code 2023-01-31 11:28:05 +01:00
369c05732e Add test checking if from script_language_docids database were removed
deleted docids
2023-01-31 11:28:05 +01:00
34d04f3d3f Filter from script_language_docids database soft deleted documents 2023-01-31 11:28:05 +01:00
a27f329e3a Add tests for checking that detected script and language associated with document(s) were stored during indexing 2023-01-31 11:28:05 +01:00
b216ddba63 Delete and clear data from the new database 2023-01-31 11:28:05 +01:00
d97fb6117e Extract and index data 2023-01-31 11:28:05 +01:00
c45d1e3610 Create a new database on index and add a specialized codec for it 2023-01-31 11:28:05 +01:00
5c0668afcf clippy: allow uninlined_format_args 2023-01-31 11:13:47 +01:00
20f05efb3c clippy: needless_lifetimes 2023-01-31 11:12:59 +01:00
cbf029f64c clippy: --fix 2023-01-31 11:12:59 +01:00
bffabf9cc6 Update version for the next release (v0.41.1) in Cargo.toml files 2023-01-31 09:56:22 +00:00
f647b20818 Merge #3434
3434: Make clippy happy for Rust 1.67, allow `uninlined_format_args` r=Kerollmops a=dureuill

# Pull Request

This PR allows `uninlined_format_args` in CI for clippy.

This is due to https://github.com/rust-lang/rust-clippy/issues/10087, which in particular has correctness issues wrt edition 2018 crates, and is a big change altogether. https://github.com/rust-lang/rust-clippy/pull/10265 is already open in order to change the category of this lint to "pedantic", meaning that if this latter PR merges, a future Rust release will accept our code unmodified wrt uninlined format arguments.

As a result, this PR introduces the following changes:

1. Allow `uninlined_format_args` in the clippy command in CI.
2. Use rewind rather than seek(0)
3. Remove lifetimes that clippy deems needless.

Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2023-01-31 09:45:12 +00:00
924d5d4c11 clippy: remove needless lifetimes 2023-01-31 10:40:48 +01:00
771a367b97 clippy: use rewind instead of seek 0 2023-01-31 10:40:48 +01:00
07603373f3 clippy: allow uninlined_format_args 2023-01-31 10:15:07 +01:00
d91f8fc493 clippy: Allow uninlined_format_args in CI 2023-01-31 09:56:26 +01:00
3296cf7ae6 clippy: remove needless lifetimes 2023-01-31 09:32:40 +01:00
89675e5f15 clippy: Replace seek 0 by rewind 2023-01-31 09:32:40 +01:00
47b7d515ed Add more detailed contribution instructions for tests 2023-01-30 17:39:05 +01:00
2ba4629938 Update CONTRIBUTING.md
Co-authored-by: Many the fish <many@meilisearch.com>
2023-01-30 15:56:30 +01:00
982dd76042 Improve readability 2023-01-30 14:36:22 +01:00
3505ee47f8 Add volume to docker command 2023-01-30 14:33:50 +01:00
b2d25c07d7 Add guide to create a proto 2023-01-30 14:31:36 +01:00
b9d8bd77fc Update README.md
Co-authored-by: gui machiavelli <hey@guimachiavelli.com>
2023-01-26 18:14:00 +01:00
8a66ba01d8 Update README.md
Co-authored-by: gui machiavelli <hey@guimachiavelli.com>
2023-01-26 18:13:53 +01:00
8a6d548041 Update README.md
Co-authored-by: gui machiavelli <hey@guimachiavelli.com>
2023-01-26 18:13:08 +01:00
b452358124 Update README.md
Co-authored-by: gui machiavelli <hey@guimachiavelli.com>
2023-01-26 18:12:56 +01:00
bfb1f9279b Merge #3420 #3422
3420: Add image hyperlink in README.md r=curquiza a=gregsadetsky

# Pull Request

## What does this PR do?
- tiny README.md improvement: under "SDKs & integration tools", add a hyperlink to the image with all of the language logos so that clicking the image leads to the integrations page. Otherwise, right now, clicking this image leads to the image file in the repo, which is not really useful.

## PR checklist
Please check if your PR fulfills the following requirements:
- [x] Does this PR fix an existing issue, or have you listed the changes applied in the PR description (and why they are needed)?
- [X] Have you read the contributing guidelines?
- [X] Have you made sure that the title is accurate and descriptive of the changes?

Thank you so much for contributing to Meilisearch!


3422: Remove cache from test CI r=dureuill a=curquiza

Comment the lines in CIs where we use the test CIs
We indeed have cache issues (lack of space on the machine) when running our test CIs
https://github.com/meilisearch/meilisearch/pull/3403

Co-authored-by: Greg Sadetsky <lepetitg@gmail.com>
Co-authored-by: Clémentine Urquizar - curqui <clementine@meilisearch.com>
Co-authored-by: curquiza <clementine@meilisearch.com>
2023-01-25 16:25:53 +00:00
48dabd27ea Update .github/workflows/rust.yml
Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2023-01-25 16:58:15 +01:00
4549e0a36e Merge #3415
3415: Test all the errors of wrong `_geo` field and bump milli r=dureuill a=irevoire

## Attention to reviewer

The first commit is only a refactoring of the test suite to use snapshot tests everywhere instead of `assert_eq`.
It doesn’t change the content of anything and there is probably nothing to review. I just made it for maintenance purpose in the future.


Fix https://github.com/meilisearch/meilisearch/issues/3414

Co-authored-by: Tamo <tamo@meilisearch.com>
2023-01-25 15:54:42 +00:00
cac93f149e fix the tests after rebasing 2023-01-25 16:52:54 +01:00
481df7a8b6 Update meilisearch/tests/documents/add_documents.rs
Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2023-01-25 16:45:11 +01:00
8356f109c1 bump milli to fix the last test 2023-01-25 16:45:11 +01:00
934f2b3cb5 exhaustively test all the errors that can arise from a bad geo field 2023-01-25 16:45:11 +01:00
a3f1b8fdb9 refactorize the test suite of the add_documents module to use snapshot tests when possible 2023-01-25 16:45:11 +01:00
9c3830a19c Remove cache everywhere 2023-01-25 16:35:02 +01:00
ff6b8dfac4 Remove cache from Windows and macOs CIs 2023-01-25 16:24:04 +01:00
ec7de4bae7 Make it work for any all routes including stats and index swaps 2023-01-25 16:12:40 +01:00
d963c2ce55 Merge #3419
3419: Test all the api key error codes r=dureuill a=irevoire

Partially fix #3325

Co-authored-by: Tamo <tamo@meilisearch.com>
2023-01-25 15:09:19 +00:00
5beb1aab7d Merge #3418
3418: Compute the size of the auth-controller, index-scheduler and all update files in the global stats r=dureuill a=irevoire

Fix https://github.com/meilisearch/meilisearch/issues/3201

Co-authored-by: Tamo <tamo@meilisearch.com>
2023-01-25 14:05:17 +00:00
184b8afd9e Make it work in the CreateApiKey struct 2023-01-25 15:01:50 +01:00
a858531574 apply review comments 2023-01-25 14:51:36 +01:00
29961b8c6b Make it work with the dumps 2023-01-25 14:41:36 +01:00
0b08413c98 Introduce the IndexUidPattern type 2023-01-25 14:22:17 +01:00
474d4ec498 Add tests for the index patterns 2023-01-25 14:22:16 +01:00
bf94f89035 Update index-scheduler/src/lib.rs
Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2023-01-25 11:31:50 +01:00
3bcff60d1c makes clippy happy 2023-01-25 11:31:48 +01:00
04c4487660 udpate the analytics with the new stats method 2023-01-25 11:25:04 +01:00
c92948b143 Compute the size of the auth-controller, index-scheduler and all update files in the global stats 2023-01-25 11:25:02 +01:00
0544b60974 Merge #3409
3409: Bump libgit2-sys from 0.14.1+1.5.0 to 0.14.2+1.5.1 r=Kerollmops a=dependabot[bot]

Bumps [libgit2-sys](https://github.com/rust-lang/git2-rs) from 0.14.1+1.5.0 to 0.14.2+1.5.1.
<details>
<summary>Commits</summary>
<ul>
<li><a href="a233483a39"><code>a233483</code></a> Update to libgit2 1.5.1</li>
<li><a href="bce15556ef"><code>bce1555</code></a> Merge pull request <a href="https://github-redirect.dependabot.com/rust-lang/git2-rs/issues/909">#909</a> from ehuss/ssh-keys</li>
<li><a href="222fbf3b9e"><code>222fbf3</code></a> Bump versions</li>
<li><a href="fa41943135"><code>fa41943</code></a> Change the certificate_check callback to support passthrough.</li>
<li><a href="84e21aad4e"><code>84e21aa</code></a> Add ability to get the SSH host key and its type.</li>
<li><a href="e6aa6666b9"><code>e6aa666</code></a> Bump git2-curl version. (<a href="https://github-redirect.dependabot.com/rust-lang/git2-rs/issues/861">#861</a>)</li>
<li><a href="46674cebd9"><code>46674ce</code></a> Fix warning about unused_must_use for Box::from_raw (<a href="https://github-redirect.dependabot.com/rust-lang/git2-rs/issues/860">#860</a>)</li>
<li><a href="951dce9dea"><code>951dce9</code></a> Merge pull request <a href="https://github-redirect.dependabot.com/rust-lang/git2-rs/issues/858">#858</a> from davidkna/git2150</li>
<li><a href="8871f8e9b3"><code>8871f8e</code></a> bump libgit2 to 1.5.0</li>
<li><a href="04278a24ba"><code>04278a2</code></a> Merge pull request <a href="https://github-redirect.dependabot.com/rust-lang/git2-rs/issues/839">#839</a> from davidkna/libgit2_143</li>
<li>Additional commits viewable in <a href="https://github.com/rust-lang/git2-rs/compare/0.14.1...libgit2-sys-0.14.2">compare view</a></li>
</ul>
</details>
<br />


[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=libgit2-sys&package-manager=cargo&previous-version=0.14.1+1.5.0&new-version=0.14.2+1.5.1)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores)

Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting ``@dependabot` rebase`.

[//]: # (dependabot-automerge-start)
[//]: # (dependabot-automerge-end)

---

<details>
<summary>Dependabot commands and options</summary>
<br />

You can trigger Dependabot actions by commenting on this PR:
- ``@dependabot` rebase` will rebase this PR
- ``@dependabot` recreate` will recreate this PR, overwriting any edits that have been made to it
- ``@dependabot` merge` will merge this PR after your CI passes on it
- ``@dependabot` squash and merge` will squash and merge this PR after your CI passes on it
- ``@dependabot` cancel merge` will cancel a previously requested merge and block automerging
- ``@dependabot` reopen` will reopen this PR if it is closed
- ``@dependabot` close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually
- ``@dependabot` ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself)
- ``@dependabot` ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself)
- ``@dependabot` ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)
You can disable automated security fix PRs for this repo from the [Security Alerts page](https://github.com/meilisearch/meilisearch/network/alerts).

</details>

Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2023-01-25 09:34:34 +00:00
4223c51838 Add image hyperlink in README.md 2023-01-24 15:24:09 -05:00
b3c2a4ae27 Merge #3412
3412: When adding documents, trying to update the primary-key now throw an error r=Kerollmops a=irevoire

While updating the test suite, I also noticed an issue with the indexed_documents value of failed tasks and had to update it. I also named a bunch of snapshots that had no name, sorry 😬

Fixes https://github.com/meilisearch/meilisearch/issues/3385
Fixes https://github.com/meilisearch/meilisearch/issues/3411

Co-authored-by: Tamo <tamo@meilisearch.com>
2023-01-24 17:14:11 +00:00
c7b2e3be87 apply review comments 2023-01-24 17:54:43 +01:00
aa17a54feb test all the api key error codes 2023-01-24 17:30:35 +01:00
6f71a2b38b Merge #3403
3403: Add `--all` to test CI r=curquiza a=curquiza

Discussed with `@irevoire` [here](https://meilisearch.slack.com/archives/G01A1F4KVGU/p1674144546920649?thread_ts=1674144456.561199&cid=G01A1F4KVGU) (internal link)

Co-authored-by: curquiza <clementine@meilisearch.com>
2023-01-24 16:23:08 +00:00
898160587f Merge #3416
3416: Add tests on the index resource r=Kerollmops a=irevoire

Fix part of https://github.com/meilisearch/meilisearch/issues/3325

Co-authored-by: Tamo <tamo@meilisearch.com>
2023-01-24 15:26:18 +00:00
7c9935f96a Merge #769
769: Modify README to prevent contributions r=Kerollmops a=curquiza



Co-authored-by: curquiza <clementine@meilisearch.com>
Co-authored-by: Clémentine Urquizar - curqui <clementine@meilisearch.com>
2023-01-24 15:14:31 +00:00
f7ae8bc065 Update README.md
Co-authored-by: Clément Renault <clement@meilisearch.com>
2023-01-24 15:58:41 +01:00
3d8a3d22d1 Update README.md
Co-authored-by: Clément Renault <clement@meilisearch.com>
2023-01-24 15:58:34 +01:00
30f88350c7 Merge #773
773: bump milli r=Kerollmops a=irevoire

I need a new release of milli for https://github.com/meilisearch/meilisearch/pull/3415

Co-authored-by: Tamo <tamo@meilisearch.com>
2023-01-24 14:51:32 +00:00
4c4baaf1ce Merge #3387
3387: Update create-issue-dependencies.yml r=Kerollmops a=curquiza



Co-authored-by: Clémentine Urquizar - curqui <clementine@meilisearch.com>
2023-01-24 14:44:09 +00:00
55e8046551 bump milli 2023-01-24 13:52:21 +01:00
32364e9919 add tests on the index resource 2023-01-24 13:20:20 +01:00
4e4d8dfda7 Merge #772
772: Throw an error on unknown fields specified in the _geo field r=irevoire a=irevoire

Fix parts of https://github.com/meilisearch/meilisearch/issues/3414

Co-authored-by: Tamo <tamo@meilisearch.com>
2023-01-24 11:36:00 +00:00
de3c4f1986 throw an error on unknown fields specified in the _geo field 2023-01-24 12:23:24 +01:00
ea3b269b77 reformat 2023-01-23 23:59:34 +01:00
a4be4c49e8 Update index-scheduler/src/batch.rs
Co-authored-by: Clément Renault <clement@meilisearch.com>
2023-01-23 23:58:03 +01:00
7d1ebb7295 add test on the autobatcher layer 2023-01-23 20:56:12 +01:00
e664f09045 Merge #3396
3396: Update our error message about negative integer r=dureuill a=irevoire

Fix https://github.com/meilisearch/meilisearch/issues/3394

Co-authored-by: Tamo <tamo@meilisearch.com>
2023-01-23 19:50:04 +00:00
767cb725a5 reimplement the batching of task with or without primary key in the autobatcher 2023-01-23 20:18:22 +01:00
13c2cd700d Update error message about negative integer 2023-01-23 18:08:46 +01:00
fea41ca788 Merge #3404
3404: Fix matching strategy error r=irevoire a=ManyTheFish

# Pull Request

## Related issue
Fixes #3391


Co-authored-by: ManyTheFish <many@meilisearch.com>
Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2023-01-23 17:04:05 +00:00
217504fff3 Merge #3406
3406: Master Key: Implements errors and warnings from the specification r=irevoire a=dureuill

<sub>Now in technicolor</sub>

# Pull Request

## What does this PR do?
- Uses `atty` and `termcolor` as dependency
- Use these dependencies to print colored background for warning messages
- Update messages to match https://github.com/meilisearch/specifications/pull/209

## PR checklist
Please check if your PR fulfills the following requirements:
- [ ] Does this PR fix an existing issue, or have you listed the changes applied in the PR description (and why they are needed)?
- [ ] Have you read the contributing guidelines?
- [ ] Have you made sure that the title is accurate and descriptive of the changes?

Thank you so much for contributing to Meilisearch!


Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2023-01-23 16:39:18 +00:00
5672118bfa When adding documents, trying to update the primary-key now throw an error
While updating the test suite I also noticed an issue with the indexed_documents value of failed task and had to update it.
I also named a bunch of snapshots that had no name sorry 😬
2023-01-23 17:32:13 +01:00
57682cbabe Fix test url after #3398 2023-01-23 15:43:17 +01:00
5dd582918d Add test 2023-01-23 15:40:42 +01:00
74747b65b1 Merge #3395
3395: Indicate filterable attributes in facet distributions when user requests a non filterable one. r=irevoire a=dureuill

# Pull Request

## Related issue
Fixes #3390 

## What does this PR do?
- bump milli & deserr
- Update and add tests

## PR checklist
Please check if your PR fulfills the following requirements:
- [x] Does this PR fix an existing issue, or have you listed the changes applied in the PR description (and why they are needed)?
- [x] Have you read the contributing guidelines?
- [x] Have you made sure that the title is accurate and descriptive of the changes?

Thank you so much for contributing to Meilisearch!


Co-authored-by: Louis Dureuil <louis@meilisearch.com>
Co-authored-by: ManyTheFish <many@meilisearch.com>
Co-authored-by: Tamo <tamo@meilisearch.com>
2023-01-23 13:53:55 +00:00
c79b6a1ee4 bump milli 2023-01-23 14:13:19 +01:00
f0e6b9c0c5 Update deserr to 0.3.0 2023-01-23 14:13:04 +01:00
56db54486c Add tests 2023-01-23 14:00:30 +01:00
a9b3f91467 Add missing space
Co-authored-by: Guillaume Mourier <guillaume@meilisearch.com>
2023-01-23 10:33:30 +01:00
5f4497935f Bump libgit2-sys from 0.14.1+1.5.0 to 0.14.2+1.5.1
Bumps [libgit2-sys](https://github.com/rust-lang/git2-rs) from 0.14.1+1.5.0 to 0.14.2+1.5.1.
- [Release notes](https://github.com/rust-lang/git2-rs/releases)
- [Commits](https://github.com/rust-lang/git2-rs/compare/0.14.1...libgit2-sys-0.14.2)

---
updated-dependencies:
- dependency-name: libgit2-sys
  dependency-type: indirect
...

Signed-off-by: dependabot[bot] <support@github.com>
2023-01-20 23:38:40 +00:00
3f69dd6450 feat: add Cargo feature for LMDB's POSIX semaphores 2023-01-19 12:08:38 -05:00
1c4b1b3b2d Merge #770
770: Update deserr v0.3.0 r=irevoire a=ManyTheFish

related to https://github.com/meilisearch/meilisearch/issues/3391


Co-authored-by: Many the fish <many@meilisearch.com>
2023-01-19 17:05:56 +00:00
0de9a3ffe7 Implements errors and warnings from the specification
Now in technicolor
2023-01-19 18:04:45 +01:00
b4f1e9bc36 Merge #771
771: Update version for the next release (v0.40.0) in Cargo.toml files r=curquiza a=meili-bot

⚠️ This PR is automatically generated. Check the new version is the expected one before merging.

Co-authored-by: curquiza <curquiza@users.noreply.github.com>
2023-01-19 16:45:20 +00:00
abd65d9307 Update version for the next release (v0.40.0) in Cargo.toml files 2023-01-19 16:43:45 +00:00
30fc376713 Update deserr v0.3.0 2023-01-19 17:37:30 +01:00
2a1787ed22 Add --all in test CI 2023-01-19 17:26:47 +01:00
d1a31afdd6 Modify README to prevent contributions 2023-01-19 17:17:34 +01:00
60018d0fe4 Merge #3343
3343: Extract creation and last updated timestamp for v3 dump r=curquiza a=FrancisMurillo

# Pull Request

## Related issue
Fixes #2988

## What does this PR do?

Inspired by the v4 dump implementation, this extracts the first `createdAt` and last `updatedAt` fields by parsing the task queue.

Questions:
- Should the parsing of the tasks be cached instead of being parsed for every index since it might add a performance penalty?
- I am not sure if the `created_at` and `processed_at` fields are correct 
- Should I assume the data is sorted in some order like with `uuid` or `updateId`? I assumed the list is unordered.
- I was planning to populate my dev instance with data and dump my data. Is there a way to dump with previous versions?

## PR checklist
Please check if your PR fulfills the following requirements:
- [x] Does this PR fix an existing issue, or have you listed the changes applied in the PR description (and why they are needed)?
- [x] Have you read the contributing guidelines?
- [x] Have you made sure that the title is accurate and descriptive of the changes?

Thank you so much for contributing to Meilisearch!


Co-authored-by: Francis Murillo <evacuee.overlap.vs3op@aleeas.com>
2023-01-19 16:14:21 +00:00
8fb685f5aa Merge #3401
3401: improve the error messages for the immutable fields r=dureuill a=irevoire

Fix https://github.com/meilisearch/meilisearch/issues/3400

Co-authored-by: Tamo <tamo@meilisearch.com>
2023-01-19 15:52:50 +00:00
e3742a38d4 improve the error messages for the immutable fields 2023-01-19 16:49:44 +01:00
13b1abceaf Rework technical information in the README 2023-01-19 16:23:06 +01:00
e16b5c615a Merge #3398
3398: Error links use underscores again r=irevoire a=dureuill

# Pull Request

## Related issue

Follow-up of #3288 where [it was decided](https://github.com/meilisearch/meilisearch/pull/3288#issuecomment-1396733603) to revert course on the separator to use in error anchors.

## What does this PR do?
- Use `_` again as separator in anchors of error link
- Fix tests


Impacts `@meilisearch/docs-team` : we need `_`-separated anchors to be generated in the online documentation to match the ones emitted from the engine.

Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2023-01-19 15:17:33 +00:00
3521a3a0b2 Merge #763
763: Fixes error message when lat and lng are unparseable r=loiclec a=ahlner

# Pull Request

## Related issue
Fixes partially [#3007](https://github.com/meilisearch/meilisearch/issues/3007)

## What does this PR do?
- Changes function validate_geo_from_json to return a BadLatitudeAndLongitude if lat or lng is a string and not parseable to f64
- implemented some unittests
- Derived PartialEq for GeoError to use assert_eq! in tests

## PR checklist
Please check if your PR fulfills the following requirements:
- [x] Does this PR fix an existing issue, or have you listed the changes applied in the PR description (and why they are needed)?
- [x] Have you read the contributing guidelines?
- [x] Have you made sure that the title is accurate and descriptive of the changes?

Thank you so much for contributing to Meilisearch!


Co-authored-by: Philipp Ahlner <philipp@ahlner.com>
2023-01-19 15:15:46 +00:00
d2420f5c8f Fix non insta tests 2023-01-19 16:10:05 +01:00
72e2b220ed Fix tests 2023-01-19 15:48:20 +01:00
40a53f8824 Merge #767
767: Update version for the next release (v0.39.2) in Cargo.toml files r=curquiza a=meili-bot

⚠️ This PR is automatically generated. Check the new version is the expected one before merging.

Co-authored-by: curquiza <curquiza@users.noreply.github.com>
2023-01-19 14:48:12 +00:00
b0c33ed6d2 Error codes are underscore again 2023-01-19 15:47:01 +01:00
f5ca421227 Superfluous test removed 2023-01-19 15:39:21 +01:00
3f048927a0 Update version for the next release (v0.39.2) in Cargo.toml files 2023-01-19 14:29:09 +00:00
e7c0617699 Merge #766
766: Indicate filterable attributes when the user sets a non filterable attribute in facet distributions r=irevoire a=dureuill

# Pull Request

## Related issue
Related to https://github.com/meilisearch/meilisearch/issues/3390

## What does this PR do?
- Title

## PR checklist
Please check if your PR fulfills the following requirements:
- [x] Does this PR fix an existing issue, or have you listed the changes applied in the PR description (and why they are needed)?
- [x] Have you read the contributing guidelines?
- [x] Have you made sure that the title is accurate and descriptive of the changes?

Thank you so much for contributing to Meilisearch!


Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2023-01-19 14:18:13 +00:00
a1e9c44fe5 Merge #3389
3389: Return `invalid_search_facets` rather than `bad_request` when using facet on a non filterable attribute r=irevoire a=dureuill

# Pull Request

## Related issue

Fixes https://github.com/meilisearch/meilisearch/issues/3384

## What does the PR does

- title
- also adds a test

## PR checklist
Please check if your PR fulfills the following requirements:
- [x] Does this PR fix an existing issue, or have you listed the changes applied in the PR description (and why they are needed)?
- [x] Have you read the contributing guidelines?
- [x] Have you made sure that the title is accurate and descriptive of the changes?

Thank you so much for contributing to Meilisearch!


Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2023-01-19 13:19:22 +00:00
7df1dda002 Merge #3393
3393: improve the error message when no task filter are specified for the cancelation or deletion of tasks r=dureuill a=irevoire

Close https://github.com/meilisearch/meilisearch/issues/3392

Was already present in v0.30

Co-authored-by: Tamo <tamo@meilisearch.com>
2023-01-19 12:55:52 +00:00
3d8ca62c35 InvalidFacetDistribution returns invalid_search_facet 2023-01-19 13:41:26 +01:00
e8e7070cc6 improve the error message when no task filter are specified for the cancelation or deletion of tasks 2023-01-19 12:42:08 +01:00
798aa4ee92 Fix clippy issues 2023-01-19 19:38:20 +08:00
4fd6fd9bef Indicate filterable attributes when the user set a non filterable attribute in facet distributions 2023-01-19 12:25:18 +01:00
f857d9c2df Merge #3383
3383: Fix api key patch r=irevoire a=irevoire

This was introduced in the previous rc

Fix https://github.com/meilisearch/meilisearch/issues/3374

Co-authored-by: Tamo <tamo@meilisearch.com>
2023-01-19 10:05:09 +00:00
a2cd7214f0 Fixes error message when lat/lng are unparseable 2023-01-19 10:10:26 +01:00
0ce1d6d487 Update create-issue-dependencies.yml 2023-01-18 23:43:33 +01:00
d0988e115f fix the patch of description and name for the api-key 2023-01-18 19:07:26 +01:00
5dcb920fb4 improve the tests 2023-01-18 18:27:00 +01:00
b3166df7ea Merge #3372
3372: Enhance facet string normalization r=ManyTheFish a=ManyTheFish

# Pull Request

Use compatibility decomposition normalizer in facet string extraction in order to have a more human friendly sort order.

Now, [é (U+00E9)](https://www.compart.com/fr/unicode/U+00E9) is converted to [e (U+0065)](https://www.compart.com/fr/unicode/U+0065) + [◌́ (U+0301)](https://www.compart.com/fr/unicode/U+0301). This way any word starting with an accented/diacritized version of a character is put just after the words starting with the unaccented version of the character.

## Related issue

Fixes #3260 


Co-authored-by: ManyTheFish <many@meilisearch.com>
2023-01-18 17:17:53 +00:00
6f7e0c431a Merge #3341
3341: add functionnal + error tests on the swap_indexes route and fix a confusing error message r=loiclec a=irevoire

Fix https://github.com/meilisearch/meilisearch/issues/3340
Fix part of https://github.com/meilisearch/meilisearch/issues/3325
Fix https://github.com/meilisearch/meilisearch/issues/3381

Test both the functionality and the error codes

Co-authored-by: Tamo <tamo@meilisearch.com>
Co-authored-by: Loïc Lecrenier <loic.lecrenier@me.com>
2023-01-18 16:32:22 +00:00
00f6af6475 fix a wrong error message 2023-01-18 17:26:48 +01:00
1803998017 Merge #3335 #3353
3335: Remove test badge r=curquiza a=curquiza

Suggestion of removal: from my point of view, this badge does not provide any useful information, and most of all is often outdated. Currently ours is "no status" despite our tests passing.
Plus, sometimes our tests are not passing because we are still in development, but it does not mean our current provided binaries are not.

<img width="619" alt="Capture d’écran 2023-01-12 à 14 06 40" src="https://user-images.githubusercontent.com/20380692/212074200-f9e3ab3e-ad1d-4171-bd13-46584c3cd117.png">


3353: Bump svenstaro/upload-release-action from 2.3.0 to 2.4.0 r=curquiza a=dependabot[bot]

Bumps [svenstaro/upload-release-action](https://github.com/svenstaro/upload-release-action) from 2.3.0 to 2.4.0.
<details>
<summary>Release notes</summary>
<p><em>Sourced from <a href="https://github.com/svenstaro/upload-release-action/releases">svenstaro/upload-release-action's releases</a>.</em></p>
<blockquote>
<h2>2.4.0</h2>
<ul>
<li>Update to node 16</li>
<li>Bump most dependencies</li>
</ul>
</blockquote>
</details>
<details>
<summary>Changelog</summary>
<p><em>Sourced from <a href="https://github.com/svenstaro/upload-release-action/blob/master/CHANGELOG.md">svenstaro/upload-release-action's changelog</a>.</em></p>
<blockquote>
<h2>[2.4.0] - 2023-01-09</h2>
<ul>
<li>Update to node 16</li>
<li>Bump most dependencies</li>
</ul>
</blockquote>
</details>
<details>
<summary>Commits</summary>
<ul>
<li><a href="2728235f7d"><code>2728235</code></a> 2.4.0</li>
<li><a href="c2e0608dc4"><code>c2e0608</code></a> Merge pull request <a href="https://github-redirect.dependabot.com/svenstaro/upload-release-action/issues/88">#88</a> from svenstaro/dependabot/npm_and_yarn/json5-1.0.2</li>
<li><a href="bd74772a1a"><code>bd74772</code></a> Don't shadow vars</li>
<li><a href="16e7903b2d"><code>16e7903</code></a> Bump json5 from 1.0.1 to 1.0.2</li>
<li><a href="f2c549b117"><code>f2c549b</code></a> Bump some more deps</li>
<li><a href="7a7d004438"><code>7a7d004</code></a> Bump some deps</li>
<li><a href="9c4a92ec0d"><code>9c4a92e</code></a> Use explicit any</li>
<li><a href="039214a996"><code>039214a</code></a> Bump jest and typescript versions</li>
<li><a href="2b373356cb"><code>2b37335</code></a> Update to node16</li>
<li><a href="fb1eb39e74"><code>fb1eb39</code></a> Merge pull request <a href="https://github-redirect.dependabot.com/svenstaro/upload-release-action/issues/75">#75</a> from svenstaro/dependabot/npm_and_yarn/jsdom-16.7.0</li>
<li>Additional commits viewable in <a href="https://github.com/svenstaro/upload-release-action/compare/2.3.0...2.4.0">compare view</a></li>
</ul>
</details>
<br />


[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=svenstaro/upload-release-action&package-manager=github_actions&previous-version=2.3.0&new-version=2.4.0)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores)

You can trigger a rebase of this PR by commenting ``@dependabot` rebase`.

[//]: # (dependabot-automerge-start)
[//]: # (dependabot-automerge-end)

---

<details>
<summary>Dependabot commands and options</summary>
<br />

You can trigger Dependabot actions by commenting on this PR:
- ``@dependabot` rebase` will rebase this PR
- ``@dependabot` recreate` will recreate this PR, overwriting any edits that have been made to it
- ``@dependabot` merge` will merge this PR after your CI passes on it
- ``@dependabot` squash and merge` will squash and merge this PR after your CI passes on it
- ``@dependabot` cancel merge` will cancel a previously requested merge and block automerging
- ``@dependabot` reopen` will reopen this PR if it is closed
- ``@dependabot` close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually
- ``@dependabot` ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself)
- ``@dependabot` ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself)
- ``@dependabot` ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)


</details>

Co-authored-by: Clémentine Urquizar - curqui <clementine@meilisearch.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2023-01-18 15:45:21 +00:00
3e5b3df487 Merge #3370 #3373 #3375
3370: make the swap indexes not found errors return an IndexNotFound error-code r=irevoire a=irevoire

Fix https://github.com/meilisearch/meilisearch/issues/3368

3373: fix a wrong error code and add tests on the document resource r=irevoire a=irevoire

Fix https://github.com/meilisearch/meilisearch/issues/3371

3375: Avoid deleting all task invalid canceled by r=irevoire a=Kerollmops

Fixes #3369 by making sure that at least one `canceledBy` task filter parameter matches something.

Co-authored-by: Tamo <tamo@meilisearch.com>
Co-authored-by: Kerollmops <clement@meilisearch.com>
2023-01-18 15:21:11 +00:00
e89973f1bf Do not delete all tasks when no canceled-by matches 2023-01-18 15:50:46 +01:00
d3c796af38 Add a new test to check that invalid canceledBy works correctly 2023-01-18 15:50:46 +01:00
182eea1f17 Introduce a canceledBy filter for the tests 2023-01-18 15:50:42 +01:00
1af3089456 Merge #3348
3348: fix cargo flaky r=irevoire a=irevoire

Partially fix https://github.com/meilisearch/meilisearch/issues/3273

Ideally, we should revert this commit and fix cargo-flaky directly to ensure we never forget to add a sub-crate to the CI.

-----

Here is an example of the CI running (and thus working); https://github.com/meilisearch/meilisearch/actions/runs/3932783699/jobs/6725755801

Co-authored-by: Tamo <tamo@meilisearch.com>
2023-01-18 14:46:50 +00:00
a4476c20f8 fix a wrong error code and add tests on the document resource 2023-01-18 15:28:02 +01:00
d1fc42b53a Use compatibility decomposition normalizer in facets 2023-01-18 15:02:13 +01:00
e64571a881 Add test sorting string with diacritics 2023-01-18 14:43:38 +01:00
57da80900d make the swap indexes not found errors return an IndexNotFound error code 2023-01-18 14:16:00 +01:00
7322f4e78e Merge #3355
3355: fix the wrong error code on minWordSizeForTypos r=irevoire a=irevoire

Fix #3354

Co-authored-by: Tamo <tamo@meilisearch.com>
2023-01-18 12:25:03 +00:00
497187083b Add test for bug #3007: Wrong error message
Adds a test for #3007: Wrong error message when lat and lng are
unparseable
2023-01-18 13:24:26 +01:00
0f727d079b fix the wrong error code on minWordSizeForTypos 2023-01-18 12:28:46 +01:00
32e2848a74 Bump svenstaro/upload-release-action from 2.3.0 to 2.4.0
Bumps [svenstaro/upload-release-action](https://github.com/svenstaro/upload-release-action) from 2.3.0 to 2.4.0.
- [Release notes](https://github.com/svenstaro/upload-release-action/releases)
- [Changelog](https://github.com/svenstaro/upload-release-action/blob/master/CHANGELOG.md)
- [Commits](https://github.com/svenstaro/upload-release-action/compare/2.3.0...2.4.0)

---
updated-dependencies:
- dependency-name: svenstaro/upload-release-action
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>
2023-01-18 11:02:44 +00:00
6b3da8a6de Merge #3346
3346: Import milli 🎉 r=Kerollmops a=Kerollmops

Fixes https://github.com/meilisearch/meilisearch/issues/2901

Main work
- integrate the milli repository as an internal crate into this repo  
- Update the Cargo.toml accordingly
- Ensure meilisearch-type now uses the internal milli crate and not the remote repository
- Update the milli's version to follow the meilisearch one

Also
- Removed the beta tests in test CI (will be re-integrated later if needed)
- Move and modify milli's README into the `milli` folder
- remove the script folder from `milli`
- Removed useless CI (release-drafter and enforce-label)

⚠️ Also, import all the `release-v1.0.0` until [a5c4fb](a5c4fbbcea) included (merged of the PR https://github.com/meilisearch/meilisearch/pull/3334)

Co-authored-by: Loïc Lecrenier <loic.lecrenier@me.com>
Co-authored-by: bors[bot] <26634292+bors[bot]@users.noreply.github.com>
Co-authored-by: Samyak S Sarnayak <samyak201@gmail.com>
Co-authored-by: unvalley <kirohi.code@gmail.com>
Co-authored-by: Samyak Sarnayak <samyak201@gmail.com>
2023-01-18 10:19:42 +00:00
0769090dd6 Add a note in the README about the crates versionning 2023-01-18 10:08:12 +01:00
82bdb54537 Update the index swap tests after git rebase 2023-01-18 09:40:41 +01:00
b6ec1f1c6d add functionnal + error tests on the swap_indexes route 2023-01-18 09:36:04 +01:00
1d507c84b2 Fix the formatting 2023-01-17 18:25:55 +01:00
1b78231e18 Make clippy happy 2023-01-17 18:25:54 +01:00
2b1f6a7f11 Fix the CI to ignore a missing file 2023-01-17 16:26:03 +01:00
6993924f32 Use finished_at for v3 dumps instead 2023-01-17 23:11:49 +08:00
41a970247e Merge #3339
3339: Continued deserr integration r=irevoire a=loiclec

Fix https://github.com/meilisearch/meilisearch/issues/3337
Fix https://github.com/meilisearch/meilisearch/issues/3338

1. Add new error codes that should have been implemented earlier:
- `MissingApiKeyActions`
- `MissingApiKeyExpiresAt`
- `MissingApiKeyIndexes`
- `MissingSwapIndexes`

2. Fix a bug where it was possible to create an API key without specifying the value of `expiresAt`

3. Improve the error messages generated by deserr. Have specific error messages for JSON and QueryParam deserialisation errors.

4. Improve error tests by passing query params as arguments to `GET` routes directly instead of using an intermediary JSON object

5. [Use invalid_index_uid error code in more places](e225608337)

Co-authored-by: Loïc Lecrenier <loic.lecrenier@me.com>
2023-01-17 14:41:22 +00:00
e225608337 Use invalid_index_uid error code in more places 2023-01-17 15:28:06 +01:00
56e79fa850 Update task snapshot test and clean up details 2023-01-17 13:19:04 +01:00
c71a8ea183 Update to latest milli and deserr 2023-01-17 13:10:38 +01:00
0c7d1f761e Merge #765
765: Update version for the next release (v0.39.1) in Cargo.toml files r=curquiza a=meili-bot

⚠️ This PR is automatically generated. Check the new version is the expected one before merging.

Co-authored-by: curquiza <curquiza@users.noreply.github.com>
2023-01-17 11:04:26 +00:00
e3d30e28ef Update version for the next release (v0.39.1) in Cargo.toml files 2023-01-17 10:50:29 +00:00
63af1e9f28 Merge #764
764: Update deserr to latest version r=irevoire a=loiclec

Update deserr to 0.1.5, which changes the `DeserializeFromValue` trait, getting rid of the `default()` method.


Co-authored-by: Loïc Lecrenier <loic.lecrenier@me.com>
2023-01-17 10:39:36 +00:00
f073a86387 Update deserr to latest version 2023-01-17 11:28:19 +01:00
b781f9a0f9 cargo fmt 2023-01-17 11:07:07 +01:00
07b90dec08 Remove unused proptest dependency 2023-01-17 11:07:07 +01:00
9194508a0f Refactor query parameter deserialisation logic 2023-01-17 11:07:07 +01:00
9dd01ff44b fix cargo flaky 2023-01-17 11:03:44 +01:00
49ddaaef49 Fix missing_swap_indexes error code and handling of expires_at param...
of create api key route
2023-01-17 09:43:07 +01:00
766dd830ae Update deserr to latest version + add new error codes for missing fields
- missing_api_key_indexes
- missing_api_key_actions
- missing_api_key_expires_at

- missing_swap_indexes_indexes
2023-01-17 09:43:07 +01:00
436ae4e466 Improve error messages generated by deserr
Split Json and Query Parameter error types
2023-01-17 09:43:07 +01:00
507a7bad96 Use the local milli subcrate 2023-01-16 17:35:54 +01:00
cde62fcb5b Merge remote-tracking branch 'origin/release-v1.0.0' into import-milli 2023-01-16 17:35:18 +01:00
03a82136dc Remove the useless cli subcrate 2023-01-16 17:08:43 +01:00
e68758cec4 Refine the cargo workspace profile settings 2023-01-16 17:04:25 +01:00
4fb47492e5 Make clippy happy 2023-01-16 16:35:58 +01:00
5bab8cf7ec Remove useless CI configs 2023-01-16 16:31:46 +01:00
97005dd505 Bump the milli-imported crates to v1.0.0 2023-01-16 16:29:12 +01:00
eabef5194a Remove the useless script folder 2023-01-16 16:26:07 +01:00
ebb2494879 Add a README to the milli crate 2023-01-16 16:25:12 +01:00
0cec352d2b Merge remote-tracking branch 'milli/main' into import-milli 2023-01-16 16:20:22 +01:00
a97281af08 Extract createdAt and updatedAt from v3 dump 2023-01-13 22:45:45 +08:00
a5c4fbbcea Merge #3334
3334: Add specific error codes `immutable_...` r=irevoire a=loiclec

Add the following error codes:

When an immutable field of API key is sent to the `PATCH /keys` route: 
- `ImmutableApiKeyUid` 
- `ImmutableApiKeyKey`
- `ImmutableApiKeyActions`
- `ImmutableApiKeyIndexes`
- `ImmutableApiKeyExpiresAt`
- `ImmutableApiKeyCreatedAt`
- `ImmutableApiKeyUpdatedAt`

When an immutable field of Index is sent to the `PATCH /indexes/{uid}` route:
- `ImmutableIndexUid`
- `ImmutableIndexCreatedAt`
- `ImmutableIndexUpdatedAt`

Co-authored-by: Loïc Lecrenier <loic.lecrenier@me.com>
Co-authored-by: Tamo <tamo@meilisearch.com>
2023-01-12 15:31:38 +00:00
21b8cd53b7 reformat 2023-01-12 16:20:24 +01:00
7f80b116bc Add specific immutable_field error codes 2023-01-12 16:20:14 +01:00
341f8478b4 Merge #3330
3330: test the error codes on the task routes + fix the missing error codes on the limit and from r=dureuill a=irevoire



Co-authored-by: Tamo <tamo@meilisearch.com>
Co-authored-by: Loïc Lecrenier <loic.lecrenier@me.com>
2023-01-12 15:02:44 +00:00
79c7f65c30 make a test more reliable 2023-01-12 15:39:28 +01:00
2bc60c29fc Merge #3336
3336: Add missing `needs:` to the git latest tag workflow r=curquiza a=curquiza

Fixes this problem: the workflow to update the latest git tag was triggered despite the first check failed

<img width="580" alt="Capture d’écran 2023-01-12 à 15 07 00" src="https://user-images.githubusercontent.com/20380692/212087926-975eb387-c8c9-4789-8a62-a56143b9bbd4.png">


These leads to update our latest git tag: our latest git tag corresponds to the `v1.0.0-rc.0` tag instead of `v0.30.5`. (I'm fixing this right now)

<img width="586" alt="Capture d’écran 2023-01-12 à 15 08 15" src="https://user-images.githubusercontent.com/20380692/212088136-f4bc2e9c-d824-4c23-8213-52598c742ebd.png">


Co-authored-by: curquiza <clementine@meilisearch.com>
2023-01-12 14:24:31 +00:00
680ea39bba Add missingneeds: to the git latest tag workflow 2023-01-12 15:04:11 +01:00
a524dfb713 fix the analytics 2023-01-12 14:49:50 +01:00
705fcaa3b8 reformat the imports 2023-01-12 14:09:15 +01:00
55605435bc Remove test badge 2023-01-12 14:04:48 +01:00
a09b6a341d Move tasks route to deserr 2023-01-12 13:57:29 +01:00
387874ea26 test the error codes on the task routes 2023-01-12 13:46:19 +01:00
5c1a7c3b9a Merge #3329
3329: Refactor error handling from deserr r=irevoire a=loiclec

Close https://github.com/meilisearch/meilisearch/issues/3318
Close https://github.com/meilisearch/meilisearch/issues/3289

[TODO]

Co-authored-by: Loïc Lecrenier <loic.lecrenier@me.com>
Co-authored-by: Tamo <tamo@meilisearch.com>
2023-01-11 18:15:32 +00:00
6d658f4c52 fix a wrong error code + update some error messages 2023-01-11 19:14:11 +01:00
bf573885ea integrate the latest version of milli 2023-01-11 19:08:39 +01:00
a68ac3a1dc reformat the headers 2023-01-11 19:08:39 +01:00
b252c87197 add tests on the sub settings routes 2023-01-11 19:08:39 +01:00
b0b7ad7caf Apply review suggestions 2023-01-11 19:08:39 +01:00
c91ffec72e Update Cargo.toml 2023-01-11 19:08:39 +01:00
1fc11264e8 Refactor deserr integration 2023-01-11 19:08:39 +01:00
2bc2e99ff3 Simplify declaration of the error codes 2023-01-11 19:08:39 +01:00
808e184069 Merge #3324
3324: Add a test on the search route for each possible error codes r=irevoire a=irevoire



Co-authored-by: Tamo <tamo@meilisearch.com>
2023-01-11 16:08:19 +00:00
e6bea99974 Merge #762
762: Update version for the next release (v0.39.0) in Cargo.toml files r=curquiza a=meili-bot

⚠️ This PR is automatically generated. Check the new version is the expected one before merging.

Co-authored-by: curquiza <curquiza@users.noreply.github.com>
2023-01-11 15:07:33 +00:00
9e32ac7cb2 Update version for the next release (v0.39.0) in Cargo.toml files 2023-01-11 15:05:06 +00:00
302d6cccd7 Merge #761
761: Integrate deserr r=irevoire a=loiclec

1. `Setting<T>` now implements `DeserializeFromValue`
2. The settings now store ranking rules as strongly typed `Criterion` instead of `String`, since the validation of the ranking rules will be done on meilisearch's side from now on


Co-authored-by: Loïc Lecrenier <loic.lecrenier@me.com>
2023-01-11 14:35:15 +00:00
21b7d709ad Merge #759
759: Change primary key inference error messages r=Kerollmops a=dureuill

# Pull Request

## Related issue
Milli part of https://github.com/meilisearch/meilisearch/issues/3301

## What does this PR do?
- Change error message strings

## PR checklist
Please check if your PR fulfills the following requirements:
- [x] Does this PR fix an existing issue, or have you listed the changes applied in the PR description (and why they are needed)?
- [x] Have you read the contributing guidelines?
- [x] Have you made sure that the title is accurate and descriptive of the changes?

Thank you so much for contributing to Meilisearch!


Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2023-01-11 14:04:25 +00:00
7a30d98264 fix a flaky test 2023-01-11 14:54:29 +01:00
02fd06ea0b Integrate deserr 2023-01-11 13:56:47 +01:00
d0a85057a3 fix the bad filter test 2023-01-11 11:37:12 +01:00
b3574de809 Merge #3321
3321: Update the system http error code to return an internal server error r=irevoire a=irevoire

Fix parts of https://github.com/meilisearch/meilisearch/issues/3318

Co-authored-by: Tamo <tamo@meilisearch.com>
2023-01-11 10:27:13 +00:00
59704c000c Merge #3326
3326: Test error codes on settings r=irevoire a=irevoire



Co-authored-by: Tamo <tamo@meilisearch.com>
2023-01-11 10:07:52 +00:00
b117c688f5 Merge #3328
3328: Replace published by released r=Kerollmops a=curquiza

Fix a bug introduced here: https://github.com/meilisearch/meilisearch/pull/3229

Regarding this line:

> * In multiple CIs: replace the `released` type by `published`, see [here](https://stackoverflow.com/questions/59319281/github-action-different-between-release-created-and-published) why. Will not impact anything, but will prevent to fail our future automation

I made mistakes by replacing some un-relevant lines in the 
- latest git workflow
- APT and brew workflow

-> the consequence was the workflow ran when releasing `rc0` but they shouldn't have. Luckily the check inside the workflow prevent any release.

<img width="1366" alt="Capture d’écran 2023-01-11 à 10 36 52" src="https://user-images.githubusercontent.com/20380692/211771382-d716ff16-0d53-41a9-90de-0d93e01e45fa.png">

This fix is not mandatory thanks to the check inside the workflow, but I would rather roll back to avoid any issues when releasing the official v1 release.

Co-authored-by: curquiza <clementine@meilisearch.com>
2023-01-11 09:43:42 +00:00
5ec85b7dfb Replace published by released 2023-01-11 10:30:18 +01:00
d80be0c28d Merge #3322
3322: Update mini-dashboard to v0.2.5 r=curquiza a=mdubus



Co-authored-by: Morgane Dubus <30866152+mdubus@users.noreply.github.com>
2023-01-11 09:08:11 +00:00
398c0c32cd test all the error codes that can be throw in the settings 2023-01-10 18:19:27 +01:00
d4157c0ce4 add a test on the search route for each possible error codes
snapshot the json directly instead of using the debug formatting
2023-01-10 17:59:24 +01:00
98dffbf213 Merge #3317
3317: Remove the unused error codes r=irevoire a=irevoire

Remove some unused error code + fix the usage of the search+settings sort and filter error_code

Co-authored-by: Tamo <tamo@meilisearch.com>
2023-01-10 16:36:11 +00:00
11ee7daa0f Merge #760
760: Add Index::map_size r=Kerollmops a=dureuill

# Pull Request

## Related issue
Related to discussion: https://github.com/meilisearch/meilisearch/discussions/3280

## What does this PR do?
- Expose `heed::Env::map_size` through `Index::map_size`. This allows knowing after the fact with which `map_size` an environment was opened (which is not always the `map_size` that was configured for the opening of the environment, see the documentation for `Index::map_size`), which will be necessary to guarantee we can reopen the index with a larger `map_size`.

## PR checklist
Please check if your PR fulfills the following requirements:
- [ ] Does this PR fix an existing issue, or have you listed the changes applied in the PR description (and why they are needed)?
- [ ] Have you read the contributing guidelines?
- [ ] Have you made sure that the title is accurate and descriptive of the changes?

Thank you so much for contributing to Meilisearch!


Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2023-01-10 15:59:05 +00:00
f63fee5e97 Update Cargo.toml 2023-01-10 15:11:25 +01:00
f0d408c295 update the system http error code to return an internal server error 2023-01-10 14:33:46 +01:00
d308684395 remove two ununsed error codes + fix the sort error_code 2023-01-10 11:32:11 +01:00
00746b32c0 Add Index::map_size 2023-01-10 11:16:51 +01:00
e27bb8ab3e Merge #3246
3246: Implement most of the error handling enhancement planned for v1.0 r=irevoire a=irevoire

Fix #3095 and #2325
Close https://github.com/meilisearch/meilisearch/pull/2540

Implements most of https://github.com/meilisearch/specifications/pull/212

## Generic error message we re-implements (in deserr):

- [x] Json
  - [x] Incorrect value kind
  - [x] Missing field
  - [x] Unknown key
  - [x] Unexpected
  - [x] Reimplement the way we show the location

- [x] Query parameter
  - [x] Incorrect value kind
  - [x] Missing field
  - [x] Unknown key
  - [x] Unexpected

## Routes to implements:
- [x] Get search
- [x] Post search
- [x] Settings
- [x] Swap indexes
- [x] Task API
- [x] Documents ressource

Error codes to implements;
## Swap API

- [x] `duplicate_index_found` → `invalid_swap_duplicate_index_found`

## Search API

- [x] `invalid_search_q`
- [x] `invalid_search_offset`
- [x] `invalid_search_limit`
- [x] `invalid_search_page`
- [x] `invalid_search_hits_per_page`
- [x] `invalid_search_attributes_to_retrieve`
- [x] `invalid_search_attributes_to_crop`
- [x] `invalid_search_crop_length`
- [x] `invalid_search_attributes_to_highlight`
- [x] `invalid_search_show_matches_position`
- [x] `invalid_search_filter`
- [x] `invalid_search_sort`
- [x] `invalid_search_facets`
- [x] `invalid_search_highlight_pre_tag`
- [x] `invalid_search_highlight_post_tag`
- [x] `invalid_search_crop_marker`
- [x] `invalid_search_matching_strategy`

## Settings API

- [x] invalid_settings_displayed_attributes
- [x] invalid_settings_searchable_attributes
- [x] invalid_settings_filterable_attributes
- [x] invalid_settings_sortable_attributes
- [x] invalid_settings_ranking_rules
- [x] invalid_settings_stop_words
- [x] invalid_settings_synonyms
- [x] invalid_settings_distinct_attribute
- [x] Add invalid_settings_typo_tolerance
    - [x] ~~invalid_settings_typo_tolerance_min_word_size_for_typos~~ (Merge in **invalid_settings_typo_tolerance**)
- [x] invalid_settings_faceting
- [x] invalid_settings_pagination

## Task API

- [x] invalid_task_date_filer → invalid_task_before_enqueued_at_filter (for all date filter) ?

## Document Resource

- [x] ~~`primary_key_inference_failed` → `index_primary_key_`~~ This doesn't exists anymore after `@dureuill` PR's on the primary key inference

------------------

# Changes

# `code` property

## Swap API

- [x] `invalid_swap_duplicate_index_found`  [RENAME]
- [x] `invalid_swap_indexes`  [NEW]

## Index API

### POST

- [x] `missing_index_uid`  [NEW]

### POST/PATCH

- [x] `invalid_index_primary_key`  [NEW]

### GET

- [x] `invalid_index_limit`  [NEW]
- [x] `invalid_index_offset`  [NEW]

## Documents API

### GET

- [x] `fields` parameter error `bad_request` → `invalid_document_fields`  [NEW]
- [x] `limit` parameter error `bad_request` → `invalid_document_limit`  [NEW]
- [x] `offset` parameter error `bad_request` → `invalid_document_offset`  [NEW]

### POST/PUT

- [x] `?primaryKey` parameter error `bad_request` →  `invalid_index_primary_key`  [NEW]

## Keys API

### POST

- ~~`missing_parameter`~~
    - [x] `missing_api_key_actions`  [NEW]
    - [x] `missing_api_key_indexes`  [NEW]
    - [x] `missing_api_key_expires_at`  [NEW]

### GET

- [x] `limit` parameter `bad_request` → `invalid_api_key_limit`  [NEW]
- [x] `offset` parameter `bad_request` → `invalid_api_key_offset`  [NEW]

## Misc
- [x] ~~`invalid_geo_field`~~ → `invalid_document_geo_field`  [RENAME]

# `type` property

## `system`   [NEW]

- [x] `no_space_left_on_device` error code
- [x] `io_error` error code (**does not exist in the current spec, need a catch-up**)
- [x] `too_many_open_files` error code (**does not exist in the current spec, need a catch-up**)

Co-authored-by: Tamo <tamo@meilisearch.com>
Co-authored-by: Loïc Lecrenier <loic.lecrenier@me.com>
2023-01-09 16:25:48 +00:00
ff843881c5 remove the documentation of the query parameter extractor module 2023-01-09 15:14:48 +01:00
ae08fba76e Remove forgotten comment 2023-01-09 13:45:03 +01:00
af6d4b3031 Remove unused deserr extractor 2023-01-09 13:43:16 +01:00
1cce613399 Fixup dumps-destination -> dump-directory section header in help link 2023-01-09 13:31:57 +01:00
b03ee54fe0 makes clippy turbo-happy 2023-01-09 13:04:31 +01:00
d17efb9ed6 use the published version of deserr 2023-01-09 12:51:10 +01:00
9ab791bedc Update error codes on the api key routes 2023-01-09 12:30:25 +01:00
96105a5e8d Update error codes on the documents/ routes 2023-01-09 12:30:25 +01:00
e706628bb1 fix the error code of the swap index route 2023-01-06 14:48:25 +01:00
3c630891bb fix the error code for the swap index 2023-01-05 21:25:20 +01:00
97854274b4 rename the invalid_geo_field error code to invalid_document_geo_field 2023-01-05 21:08:19 +01:00
0646f63404 implement the new type property for the system error 2023-01-05 21:06:50 +01:00
ce3e8794a2 fix the tests after the rebase 2023-01-05 20:52:26 +01:00
50ce0409bc Integrate deserr on the most important routes 2023-01-05 20:48:29 +01:00
839b05c43d Merge #3305
3305: Remove hidden but usable CLI arguments r=Kerollmops a=Kerollmops

`@curquiza` found out that we were exposing some internal CLI arguments: `nb-max-chunks` and `log-every-n`. In this PR I removed those two, the only two ones that I found. Those options shouldn't be accessible as non-documented in the documentation or the `--help` message.

Fixes https://github.com/meilisearch/meilisearch/issues/3307

Co-authored-by: Clément Renault <clement@meilisearch.com>
2023-01-05 17:11:58 +00:00
cc699fae40 Merge #3308
3308: Remove `--generate-master-key` option r=Kerollmops a=dureuill

# Pull Request

## Related issue

Related to https://github.com/meilisearch/specifications/pull/210#issuecomment-1372035525

## What does this PR do?
- Remove the short-lived `--generate-master-key` flag that was too beautiful for this world :D.

Removal of this option proceeds of the following reasoning:

1. It is the only option that starts meilisearch and then immediately exits
2. We are unsure if we want to keep it under this form in the future or switch to a subcommand.
3. Releasing this option in v1 would make it insta-stable.
5. The option is only marginally useful, as users will be presented with freshly generated key directly in the error messages if their master key is absent/too short.
6. If we remove this option now, we can still add it back in a future v1 release. If we add it now, we won't be able to remove it in any future v1 version.

## PR checklist
Please check if your PR fulfills the following requirements:
- [ ] Does this PR fix an existing issue, or have you listed the changes applied in the PR description (and why they are needed)?
- [ ] Have you read the contributing guidelines?
- [ ] Have you made sure that the title is accurate and descriptive of the changes?

Thank you so much for contributing to Meilisearch!

### Impacts

this impacts the docs team as they would previously have had to document this option, and they may have wanted to use it in the user workflow.

Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2023-01-05 16:19:40 +00:00
aa4b813237 Derive Default on IndexerOpts 2023-01-05 16:00:45 +01:00
eb08a0fb0b Remove --generate-master-key option 2023-01-05 14:55:24 +01:00
cda529c07b Remove hidden but usable CLI arguments 2023-01-05 14:25:41 +01:00
1f8ddb366c Merge #3302
3302: Update insta snap tests for index dates of dump v5 r=curquiza a=loiclec

This PR simply updates the content of the insta snapshot test following https://github.com/meilisearch/meilisearch/pull/3013 . I manually verified that the dates in the snaps are indeed correct.

Co-authored-by: Loïc Lecrenier <loic.lecrenier@me.com>
2023-01-05 12:58:10 +00:00
8a3da0c2a7 Merge #3304
3304: Fix update cargo.toml workflow r=Kerollmops a=curquiza

Following https://github.com/meilisearch/meilisearch/pull/3224

Fixes #3219 

Co-authored-by: Clémentine Urquizar - curqui <clementine@meilisearch.com>
2023-01-05 12:16:57 +00:00
c840d55e89 Fix update cargo.toml workflow 2023-01-05 12:56:02 +01:00
c7a3992510 Merge #3303
3303: Update version for the next release (v1.0.0) in Cargo.toml files r=curquiza a=meili-bot

⚠️ This PR is automatically generated. Check the new version is the expected one before merging.

Co-authored-by: curquiza <curquiza@users.noreply.github.com>
2023-01-05 11:53:09 +00:00
28408816ef Update version for the next release (v1.0.0) in Cargo.toml files 2023-01-05 11:45:15 +00:00
0eaa8ca255 Merge #3266
3266: Improve the way we receive the documents payload- serde multiple ndjson fix r=curquiza a=jiangbo212

# Pull Request

## Related issue
Fixes #3037 

## Related PR
#3164 

## What does this PR do?
Sorry, This PR is mainly to fix the problems caused by my previously provided PR #3164. It causes multiple ndjson data deserialization failures
- Fix serde multiple ndjson data failures and add test to it
- Fix serde jsonarray error and againest serde it use `from_slice`. only use `from_slice` when serde error category is `data`, it indicate json data is a single json.

## PR checklist
Please check if your PR fulfills the following requirements:
- [x] Does this PR fix an existing issue, or have you listed the changes applied in the PR description (and why they are needed)?
- [x] Have you read the contributing guidelines?
- [x] Have you made sure that the title is accurate and descriptive of the changes?

Thank you so much for contributing to Meilisearch!


Co-authored-by: jiangbo212 <peiyaoliukuan@126.com>
2023-01-05 11:30:29 +00:00
201bc633d2 Merge #3288
3288: Replace underscores with hyphens in documentation link to error code r=dureuill a=loiclec

# Pull Request

## Related issue
Fixes #3097 

## Implementation
Add a new dependency to `convert_case` (already used transitively by `deserr`) so that the link can be generated using:
```rust
    /// return the doc url associated with the error
    fn url(&self) -> String {
        format!(
            "https://docs.meilisearch.com/errors#{}",
            self.name().to_case(convert_case::Case::Kebab)
        )
    }
```

## Review
I'd like the reviewer to check whether it is expected that the content of some `dump` snapshot tests changed :-)

Co-authored-by: ManyTheFish <many@meilisearch.com>
Co-authored-by: bors[bot] <26634292+bors[bot]@users.noreply.github.com>
Co-authored-by: Loïc Lecrenier <loic.lecrenier@me.com>
2023-01-05 11:08:57 +00:00
ba839852f5 Update insta snap tests for index dates of dump v5 2023-01-05 11:45:40 +01:00
be9786bed9 Change primary key inference error messages 2023-01-05 10:40:09 +01:00
f9aa897ab5 Update insta tests 2023-01-05 10:19:19 +01:00
2d74678b51 Replace underscores with hyphens in doc link to error code 2023-01-05 10:09:02 +01:00
db7eaf23f4 Merge #3251
3251: Add a specific test on finite pagination placeolder search with disti… r=curquiza a=ManyTheFish

Add a specific test on finite pagination placeholder search with distinct attributes


related to https://github.com/meilisearch/milli/pull/743
related to https://github.com/meilisearch/meilisearch/issues/3200

poke `@curquiza` 

> note that the destination branch should be changed

Co-authored-by: ManyTheFish <many@meilisearch.com>
2023-01-05 09:06:53 +00:00
32f7cfa5cb Merge #3295
3295: Adjust Master Key-related messages r=dureuill a=dureuill

# Pull Request

## Related issue
Follow up for #3272 

## What does this PR do?
- Consistently capitalize "master key" (instead of "Master Key" sometimes) (see https://github.com/meilisearch/specifications/pull/209#discussion_r1060081094)
- Clarify that the counted unit for master key length is bytes, not characters (see https://github.com/meilisearch/documentation/issues/2069#issuecomment-1368873167)

## PR checklist
Please check if your PR fulfills the following requirements:
- [x] Does this PR fix an existing issue, or have you listed the changes applied in the PR description (and why they are needed)?
- [x] Have you read the contributing guidelines?
- [x] Have you made sure that the title is accurate and descriptive of the changes?

Thank you so much for contributing to Meilisearch!


Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2023-01-05 08:43:23 +00:00
a402fc4486 Merge #3013
3013: Extract the dates out of the dumpv5. r=loiclec a=funilrys

Hi there, 

please review this PR that tries to fix #2986. I'm still learning Rust and I found that #2986 is an excellent way for me to read and learn what others do with Rust. So please excuse my semantics ...

Stay safe and healthy.

---

# Pull Request

This patch possibly fixes #2986.

This patch introduces a way to fill the IndexMetadata.created_at and IndexMetadata.updated_at keys from the tasks events. This is done by reading the creation date of the first event (created_at) and the creation date of the last event (updated_at).


## Related issue
Fixes #2986

## What does this PR do?
- Extract the dates out of the dumpv5.

## PR checklist
Please check if your PR fulfills the following requirements:
- [x] Does this PR fix an existing issue, or have you listed the changes applied in the PR description (and why they are needed)?
- [x] Have you read the contributing guidelines?
- [x] Have you made sure that the title is accurate and descriptive of the changes?

Thank you so much for contributing to Meilisearch!


Co-authored-by: funilrys <contact@funilrys.com>
2023-01-05 08:23:52 +00:00
502d9e4b24 Merge #3278
3278: Remove `--max-index-size` and `--max-task-db-size` flags r=Kerollmops a=dureuill

# Pull Request

## Related issue
Fixes #3231 

## What does this PR do?
- Remove `--max-index-size` and `--max-task-db-size` flags from the CLI, config file and environment variable
- Set the size of all indexes to **500GiB** and the size of the task DB to **10GiB**.  Reviewers might want to review these values carefully.

## PR checklist
Please check if your PR fulfills the following requirements:
- [x] Does this PR fix an existing issue, or have you listed the changes applied in the PR description (and why they are needed)?
- [x] Have you read the contributing guidelines?
- [x] Have you made sure that the title is accurate and descriptive of the changes?

Thank you so much for contributing to Meilisearch!


Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2023-01-04 16:44:27 +00:00
a85ff1f690 Fix documentation
Co-authored-by: Clément Renault <clement@meilisearch.com>
2023-01-04 17:20:03 +01:00
233372abea Remove --max-index-size and --max-task-db-size 2023-01-04 17:20:01 +01:00
13d4ae264a Merge #3269
3269: Simplify primary key inference r=dureuill a=dureuill

# Pull Request

## Related issue
Related to https://github.com/meilisearch/meilisearch/issues/3233

## What does this PR do?
- Integrates https://github.com/meilisearch/milli/pull/752 in meilisearch
- Remove `Serialize` and `Deserialize` from `error::Code` as it is unused.
- No longer filter on `milli` logs when `--log-level` is "info".
  - `milli` only has the newly-added inference log at the `info` level (from greping `info` in the codebase)
  - the default value for `--log-level` is "INFO" and not "info" since `v0.30` so the filter is not active by default.
- updates milli to v0.38.0

## PR checklist
Please check if your PR fulfills the following requirements:
- [x] Does this PR fix an existing issue, or have you listed the changes applied in the PR description (and why they are needed)?
- [x] Have you read the contributing guidelines?
- [x] Have you made sure that the title is accurate and descriptive of the changes?

Thank you so much for contributing to Meilisearch!


Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2023-01-04 16:14:36 +00:00
c766e06003 Merge #3281
3281: Merge `--schedule-snapshot` and `--snapshot-interval-sec` options r=dureuill a=dureuill

# Pull Request

## Related issue
Fixes #3131

## What does this PR do?
- Removes `--snapshot-interval-sec`
- `--schedule-snapshot` now accepts an optional integer value specifying the interval in seconds
- The config file no longer has a snapshot_interval_sec key.  Instead, the schedule_snapshot key now additionally accepts an integer value specifying the interval in seconds
- The env variable MEILI_SNAPSHOT_INTERVAL no longer exists
- The env variable MEILI_SCHEDULE_SNAPSHOT is always specified to the interval of the snapshot in seconds when defined. If snapshots are disabled the variable is undefined.

---

Relevant part of the `--help`

<img width="885" alt="Capture d’écran 2022-12-27 à 18 22 32" src="https://user-images.githubusercontent.com/41078892/209700626-1a1292c1-14e3-45b6-8265-e0adbd76ecf1.png">

---

### Tests

| `schedule_snapshot` in config.toml | `--schedule-snapshot` flag on CLI | `MEILI_SCHEDULE_SNAPSHOT` | `opt.schedule_snapshot` |
|--|--|--|--|
| missing | missing | missing | `Disabled`
| `false` | missing | missing | `Disabled`
| `true` | missing | missing | `Enabled(86400)`
| `1234` | missing | missing | `Enabled(1234)`
| missing | `--schedule-snapshot` | missing | `Enabled(86400)`
| `false` | `--schedule-snapshot` | missing | `Enabled(86400)` 
| missing | `--schedule-snapshot 2345` | missing | `Enabled(2345)`
| `false` | `--schedule-snapshot 2345` | missing | `Enabled(2345)`
| `true` | `--schedule-snapshot 2345` | missing | `Enabled(2345)`
| `1234` | `--schedule-snapshot 2345` | missing | `Enabled(2345)`
| `false` | `--schedule-snapshot 2345` | 3456 | `Enabled(2345)`
| `false` | `--schedule-snapshot` | 3456 | **`Enabled(86400)`**
| `1234` | missing | 3456 | `Enabled(3456)`
| `false` | missing | 3456 | `Enabled(3456)`


## PR checklist
Please check if your PR fulfills the following requirements:
- [x] Does this PR fix an existing issue, or have you listed the changes applied in the PR description (and why they are needed)?
- [x] Have you read the contributing guidelines?
- [x] Have you made sure that the title is accurate and descriptive of the changes?

Thank you so much for contributing to Meilisearch!


Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2023-01-04 14:25:47 +00:00
fcbd47281b Fix tests 2023-01-04 14:24:20 +01:00
b6d80293f7 Propagate new error codes from milli 2023-01-04 14:24:20 +01:00
0e98a71a24 Update milli to v0.38 2023-01-04 14:24:20 +01:00
5cb566b165 No longer filter out milli logs when --log-level is "info" 2023-01-04 14:24:20 +01:00
9d46caba29 Code doesn't need to be serializable/deserializable 2023-01-04 14:16:22 +01:00
c4aa5cc7d0 Merge --schedule-snapshot and --snapshot-interval-sec options 2023-01-04 14:13:54 +01:00
12c3d432f9 Merge #3293
3293: Explicitly restrict log level options to those that are documented r=loiclec a=loiclec

Fixes https://github.com/meilisearch/meilisearch/issues/3292





Co-authored-by: Loïc Lecrenier <loic.lecrenier@me.com>
2023-01-04 10:30:35 +00:00
c3f4835e8e Merge #733
733: Avoid a prefix-related worst-case scenario in the proximity criterion r=loiclec a=loiclec

# Pull Request

## Related issue
Somewhat fixes (until merged into meilisearch) https://github.com/meilisearch/meilisearch/issues/3118

## What does this PR do?
When a query ends with a word and a prefix, such as:
```
word pr
```
Then we first determine whether `pre` *could possibly* be in the proximity prefix database before querying it. There are then three possibilities:

1. `pr` is not in any prefix cache because it is not the prefix of many words. We don't query the proximity prefix database. Instead, we list all the word derivations of `pre` through the FST and query the regular proximity databases.

2. `pr` is in the prefix cache but cannot be found in the proximity prefix databases. **In this case, we partially disable the proximity ranking rule for the pair `word pre`.** This is done as follows:
   1. Only find the documents where `word` is in proximity to `pre` **exactly** (no derivations)
   2. Otherwise, assume that their proximity in all the documents in which they coexist is >= 8

3. `pr` is in the prefix cache and can be found in the proximity prefix databases. In this case we simply query the proximity prefix databases.

Note that if a prefix is longer than 2 bytes, then it cannot be in the proximity prefix databases. Also, proximities larger than 4 are not present in these databases either. Therefore, the impact on relevancy is:

1. For common prefixes of one or two letters: we no longer distinguish between proximities from 4 to 8
2. For common prefixes of more than two letters: we no longer distinguish between any proximities
3. For uncommon prefixes: nothing changes

Regarding (1), it means that these two documents would be considered equally relevant according to the proximity rule for the query `heard pr` (IF `pr` is the prefix of more than 200 words in the dataset):
```json
[
    { "text": "I heard there is a faster proximity criterion" },
    { "text": "I heard there is a faster but less relevant proximity criterion" }
]
```

Regarding (2), it means that two documents would be considered equally relevant according to the proximity rule for the query "faster pro":
```json
[
    { "text": "I heard there is a faster but less relevant proximity criterion" }
    { "text": "I heard there is a faster proximity criterion" },
]
```
But the following document would be considered more relevant than the two documents above:
```json
{ "text": "I heard there is a faster swimmer who is competing in the pro section of the competition " }
```

Note, however, that this change of behaviour only occurs when using the set-based version of the proximity criterion. In cases where there are fewer than 1000 candidate documents when the proximity criterion is called, this PR does not change anything. 

---

## Performance

I couldn't use the existing search benchmarks to measure the impact of the PR, but I did some manual tests with the `songs` benchmark dataset.   

```
1. 10x 'a': 
	- 640ms ⟹ 630ms                  = no significant difference
2. 10x 'b':
	- set-based: 4.47s ⟹ 7.42        = bad, ~2x regression
	- dynamic: 1s ⟹ 870 ms           = no significant difference
3. 'Someone I l':
	- set-based: 250ms ⟹ 12 ms       = very good, x20 speedup
	- dynamic: 21ms ⟹ 11 ms          = good, x2 speedup 
4. 'billie e':
	- set-based: 623ms ⟹ 2ms         = very good, x300 speedup 
	- dynamic: ~4ms ⟹ 4ms            = no difference
5. 'billie ei':
	- set-based: 57ms ⟹ 20ms         = good, ~2x speedup
	- dynamic: ~4ms ⟹ ~2ms.          = no significant difference
6. 'i am getting o' 
	- set-based: 300ms ⟹ 60ms        = very good, 5x speedup
	- dynamic: 30ms ⟹ 6ms            = very good, 5x speedup
7. 'prologue 1 a 1:
	- set-based: 3.36s ⟹ 120ms       = very good, 30x speedup
	- dynamic: 200ms ⟹ 30ms          = very good, 6x speedup
8. 'prologue 1 a 10':
	- set-based: 590ms ⟹ 18ms        = very good, 30x speedup 
	- dynamic: 82ms ⟹ 35ms           = good, ~2x speedup
```

Performance is often significantly better, but there is also one regression in the set-based implementation with the query `b b b b b b b b b b`.

Co-authored-by: Loïc Lecrenier <loic.lecrenier@me.com>
2023-01-04 09:00:50 +00:00
d082ded7ad Explicitly restrict log level options to those that are documented
Fixes https://github.com/meilisearch/meilisearch/issues/3292
2023-01-04 09:40:24 +01:00
49f58b2c47 Merge #732
732: Interpret synonyms as phrases r=loiclec a=loiclec

# Pull Request

## Related issue
Fixes (when merged into meilisearch) https://github.com/meilisearch/meilisearch/issues/3125

## What does this PR do?
We now map multi-word synonyms to phrases instead of loose words. Such that the request:
```
btw I am going to nyc soon
```
is interpreted as (when the synonym interpretation is chosen for both `btw` and `nyc`):
```
"by the way" I am going to "New York City" soon
```
instead of:
```
by the way I am going to New York City soon
```

This prevents queries containing multi-word synonyms to exceed to word length limit and degrade the search performance.

In terms of relevancy, there is a debate to have. I personally think this could be considered an improvement, since it would be strange for a user to search for:
```
good DIY project
```
and have a result such as:
```
{
    "text": "whether it is a good project to do, you'll have to decide for yourself"
}
```
However, for synonyms such as `NYC -> New York City`, then we will stop matching documents where `New York` is separated from `City`. This is however solvable by adding an additional mapping: `NYC -> New York`.

## Performance

With the old behaviour, some long search requests making heavy uses of synonyms could take minutes to be executed. This is no longer the case, these search requests now take an average amount of time to be resolved.

Co-authored-by: Loïc Lecrenier <loic.lecrenier@me.com>
2023-01-04 08:34:18 +00:00
947f08793a Merge #3296
3296: Remove `--disable-auto-batching` CLI option r=gmourier a=loiclec

Fixes #3294 

The `index-scheduler` code is not modified, only the CLI options have changed.

Co-authored-by: Loïc Lecrenier <loic.lecrenier@me.com>
2023-01-03 16:57:14 +00:00
6a10e85707 Merge #736
736: Update charabia r=curquiza a=ManyTheFish

Update Charabia to the last version.

> We are now Romanizing Chinese characters into Pinyin.
> Note that we keep the accent because they are in fact never typed directly by the end-user, moreover, changing an accent leads to a different Chinese character, and I don't have sufficient knowledge to forecast the impact of removing accents in this context.

Co-authored-by: ManyTheFish <many@meilisearch.com>
2023-01-03 15:44:41 +00:00
17dac72464 Characters -> bytes 2023-01-03 15:31:02 +01:00
b821c72459 Remove --disable-auto-batching CLI option 2023-01-03 15:01:04 +01:00
7b2575c646 Master Key -> master key 2023-01-03 14:45:23 +01:00
c505fa9d7d Merge #758
758: Bump taiki-e/install-action from 1 to 2 r=curquiza a=dependabot[bot]

Bumps [taiki-e/install-action](https://github.com/taiki-e/install-action) from 1 to 2.
<details>
<summary>Release notes</summary>
<p><em>Sourced from <a href="https://github.com/taiki-e/install-action/releases">taiki-e/install-action's releases</a>.</em></p>
<blockquote>
<h2>2.0.0</h2>
<p>This release implements a mechanism to automatically track the latest version of the tool on our end. (<a href="https://github-redirect.dependabot.com/taiki-e/install-action/pull/27">#27</a>)
Hopefully, this will avoid situations such as &quot;new version of the tool has been released, but the maintainer has not been aware of it for a number of months&quot;.
This also makes it easier to add support for new tools.</p>
<p>This release also includes the following improvements:</p>
<ul>
<li>
<p>Verify SHA256 checksums for downloaded files in all tools installed from GH releases. (<a href="https://github-redirect.dependabot.com/taiki-e/install-action/pull/27">#27</a>)</p>
</li>
<li>
<p>Support omitting the patch/minor version in all tools installed from GH releases. (<a href="https://github-redirect.dependabot.com/taiki-e/install-action/pull/27">#27</a>)</p>
<p>For example:</p>
<pre lang="yaml"><code>- uses: taiki-e/install-action@v2
  with:
    tool: cargo-hack@0.5
</code></pre>
<p>You can also omit the minor version if the major version of tool is 1 or greater.</p>
</li>
<li>
<p>Support <code>just</code>. (<a href="https://github-redirect.dependabot.com/taiki-e/install-action/pull/34">#34</a>)</p>
</li>
<li>
<p>Support <code>dprint</code>. (<a href="https://github-redirect.dependabot.com/taiki-e/install-action/pull/34">#34</a>)</p>
</li>
</ul>
<p>Note: This release is considered a breaking change because installing on versions not yet recognized by the action or on pre-release versions will no longer work with this release. (They were never officially supported, but they could work before.) Please submit an issue if you need these supports again.</p>
<h2>1.17.3</h2>
<ul>
<li>Update <code>wasmtime@latest</code> to 4.0.0.</li>
</ul>
<h2>1.17.2</h2>
<ul>
<li>Update <code>mdbook@latest</code> to 0.4.25.</li>
</ul>
<h2>1.17.1</h2>
<ul>
<li>Update <code>mdbook@latest</code> to 0.4.23.</li>
<li>Support <code>mdbook</code> on Linux (musl).</li>
<li>Update <code>cargo-llvm-cov@latest</code> to 0.5.3.</li>
</ul>
<h2>1.17.0</h2>
<ul>
<li>Update <code>protoc@latest</code> to 3.21.12.</li>
<li>Support aarch64 self-hosted runners (Linux, macOS, Windows).</li>
<li>Improve support for Fedora/RHEL based containers/self-hosted runners.</li>
</ul>
<h2>1.16.0</h2>
<ul>
<li>
<p>Update <code>cargo-binstall@latest</code> to 0.18.1. (<a href="https://github-redirect.dependabot.com/taiki-e/install-action/pull/32">#32</a>, thanks <a href="https://github.com/NobodyXu"><code>`@​NobodyXu</code></a>)</p>`
</li>
<li>
<p>If the host environment lacks packages required for installation, such as <code>curl</code> or <code>tar</code>, install them if possible.</p>
<p>It is mainly intended to make the use of this action easy on containers or self-hosted runners, and currently supports Debian-based distributions (including Ubuntu) and Alpine.</p>
</li>
</ul>
<!-- raw HTML omitted -->
</blockquote>
<p>... (truncated)</p>
</details>
<details>
<summary>Changelog</summary>
<p><em>Sourced from <a href="https://github.com/taiki-e/install-action/blob/main/CHANGELOG.md">taiki-e/install-action's changelog</a>.</em></p>
</details>
<details>
<summary>Commits</summary>
<ul>
<li><a href="8ffc26aecd"><code>8ffc26a</code></a> Release 2.0.1</li>
<li><a href="82e9eb5996"><code>82e9eb5</code></a> Update DEVELOPMENT.md</li>
<li><a href="d3b7ad8380"><code>d3b7ad8</code></a> Update changelog</li>
<li><a href="f1a96ee3ed"><code>f1a96ee</code></a> Update changelog when manifest update</li>
<li><a href="46063c186c"><code>46063c1</code></a> Update cargo-minimal-versions</li>
<li><a href="048586d7a8"><code>048586d</code></a> Update cargo-hack</li>
<li><a href="d117b8d41a"><code>d117b8d</code></a> Remove outdated todo</li>
<li><a href="76828c33cd"><code>76828c3</code></a> Release 2.0.0</li>
<li><a href="eea8c318de"><code>eea8c31</code></a> Update readme and changelog</li>
<li><a href="ab0e193cf5"><code>ab0e193</code></a> Support dprint</li>
<li>Additional commits viewable in <a href="https://github.com/taiki-e/install-action/compare/v1...v2">compare view</a></li>
</ul>
</details>
<br />


[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=taiki-e/install-action&package-manager=github_actions&previous-version=1&new-version=2)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores)

You can trigger a rebase of this PR by commenting ``@dependabot` rebase`.

[//]: # (dependabot-automerge-start)
[//]: # (dependabot-automerge-end)

---

<details>
<summary>Dependabot commands and options</summary>
<br />

You can trigger Dependabot actions by commenting on this PR:
- ``@dependabot` rebase` will rebase this PR
- ``@dependabot` recreate` will recreate this PR, overwriting any edits that have been made to it
- ``@dependabot` merge` will merge this PR after your CI passes on it
- ``@dependabot` squash and merge` will squash and merge this PR after your CI passes on it
- ``@dependabot` cancel merge` will cancel a previously requested merge and block automerging
- ``@dependabot` reopen` will reopen this PR if it is closed
- ``@dependabot` close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually
- ``@dependabot` ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself)
- ``@dependabot` ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself)
- ``@dependabot` ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)


</details>

Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2023-01-03 10:44:36 +00:00
9519e60f97 Merge #709
709: Optimise the `ExactWords` sub-criterion within `Exactness` r=loiclec a=loiclec

# Pull Request

## Related issue
Fixes (partially) https://github.com/meilisearch/meilisearch/issues/3116

## What does this PR do?
1. Reduces the algorithmic complexity of finding the documents containing N exact words from something that is exponential to something that is polynomial.
2. Cache intermediary results between different calls to the `exactness` criterion.

## Performance Results
On the `smol_songs.csv` dataset, a request containing 10 common words now takes about 60ms instead of 5 seconds to execute. For example, this is the case with this (admittedly nonsensical) request: `Rock You Hip Hop Folk World Country Electronic Love The`.


Co-authored-by: Loïc Lecrenier <loic.lecrenier@me.com>
2023-01-02 12:28:30 +00:00
b5df889dcb Apply review suggestions: simplify implementation of exactness criterion 2023-01-02 13:11:47 +01:00
31155dce4c Merge #752
752: Simplify primary key inference r=irevoire a=dureuill

# Pull Request

## Related issue
Related to https://github.com/meilisearch/meilisearch/issues/3233

## What does this PR do?

### User PoV

- Change primary key inference to only consider a value as a candidate when it ends with "id", rather than when it simply contains "id".
- Change primary key inference to always fail when there are multiple candidates.
- Replace UserError::MissingPrimaryKey with `UserError::NoPrimaryKeyCandidateFound` and `UserError::MultiplePrimaryKeyCandidatesFound`

### Implementation-wise

- Remove uses of UserError::MissingPrimaryKey not pertaining to inference. This introduces a possible panicking path.

## PR checklist
Please check if your PR fulfills the following requirements:
- [x] Does this PR fix an existing issue, or have you listed the changes applied in the PR description (and why they are needed)?
- [x] Have you read the contributing guidelines?
- [x] Have you made sure that the title is accurate and descriptive of the changes?

Thank you so much for contributing to Meilisearch!


Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2023-01-02 11:44:22 +00:00
8d36570958 Add explicit criterion impl strategy to proximity search tests 2023-01-02 10:37:01 +01:00
939e7faf31 Bump taiki-e/install-action from 1 to 2
Bumps [taiki-e/install-action](https://github.com/taiki-e/install-action) from 1 to 2.
- [Release notes](https://github.com/taiki-e/install-action/releases)
- [Changelog](https://github.com/taiki-e/install-action/blob/main/CHANGELOG.md)
- [Commits](https://github.com/taiki-e/install-action/compare/v1...v2)

---
updated-dependencies:
- dependency-name: taiki-e/install-action
  dependency-type: direct:production
  update-type: version-update:semver-major
...

Signed-off-by: dependabot[bot] <support@github.com>
2023-01-01 10:02:00 +00:00
3e0e8164a3 fixup! Adjust + Cleanup changes. 2022-12-22 18:01:54 +01:00
0bc4572905 Adjust + Cleanup changes.
Indeed, I missed some of the changed that were introduced by #3190.
2022-12-22 17:53:33 +01:00
4e6c663a2e Release unecessary ownership. 2022-12-22 17:47:58 +01:00
e2775c6f49 Remove unused object. 2022-12-22 17:47:58 +01:00
c07a5932cb Apply fmt. 2022-12-22 17:47:58 +01:00
528a944997 Reimplement v5 date extraction.
Indeed, before this patch the implementation wasn't correct.
2022-12-22 17:47:58 +01:00
13fb5ce974 Re-Open tasks list when needed.
Indeed, before this patch we were using the reference instead of
"reopening" the task list each time we needed to access it.
Without this patch, all other usage of the task attribute will
break.
2022-12-22 17:47:57 +01:00
a43a0712fa Add reader.v5.tasks.Task.updated_at.
There was no way to "quickly" get the update date.
2022-12-22 17:47:57 +01:00
1be4619b91 Add reader.v5.tasks.Task.created_at.
There was no way to "quickly" get the creation date.
2022-12-22 17:47:57 +01:00
cf50f85986 Add reader.v5.tasks.Task.processed_at.
There was no way to "quickly" get the processed date.
2022-12-22 17:47:57 +01:00
61b3a29ff3 Extract the dates out of the dumpv5.
This patch possibly fixes #2986.

This patch introduces a way to fill the IndexMetadata.created_at
and IndexMetadata.updated_at keys from the tasks events.
This is done by reading the creation date of the first event
(created_at) and the creation date of the last event (updated_at).
2022-12-22 17:47:57 +01:00
32c6062e65 Optimise exactness criterion
1. Cache some results between calls to next()
2. Compute the combinations of exact words more efficiently
2022-12-22 12:28:45 +01:00
f097aafa1c Add unit test for prefix handling by the proximity criterion 2022-12-22 12:08:00 +01:00
777b387dc4 Avoid a prefix-related worst-case scenario in the proximity criterion 2022-12-22 12:08:00 +01:00
b0f3dc2c06 Interpret synonyms as phrases 2022-12-22 12:07:51 +01:00
4b166bea2b Add primary_key_inference test 2022-12-21 15:13:38 +01:00
5943100754 Fix existing tests 2022-12-21 15:13:38 +01:00
b24def3281 Add logging when inference took place.
Displays log message in the form:
```
[2022-12-21T09:19:42Z INFO  milli::update::index_documents::enrich] Primary key was not specified in index. Inferred to 'id'
```
2022-12-21 15:13:38 +01:00
402dcd6b2f Simplify primary key inference 2022-12-21 15:13:38 +01:00
13c95d25aa Remove uses of UserError::MissingPrimaryKey not related to inference 2022-12-21 15:13:36 +01:00
a8defb585b Merge #742
742: Add a "Criterion implementation strategy" parameter to Search r=irevoire a=loiclec

Add a parameter to search requests which determines the implementation strategy of the criteria. This can be either `set-based`, `iterative`, or `dynamic` (ie choosing between set-based or iterative at search time). See https://github.com/meilisearch/milli/issues/755 for more context about this change.


Co-authored-by: Loïc Lecrenier <loic.lecrenier@me.com>
2022-12-21 12:18:49 +00:00
339a4b0789 Make clippy happy 2022-12-21 12:49:34 +01:00
904fd2f6d1 Add a search strategy option to the cli 2022-12-21 12:48:53 +01:00
229405aeb9 Choose implementation strategy of criterion at runtime 2022-12-21 09:29:39 +01:00
2780e365e2 test update and ndjson serde use from_slice 2022-12-21 14:31:45 +08:00
bf2a401a05 serde ndjson fix 2022-12-21 11:27:15 +08:00
249e051cd4 Merge #750
750: Fix hard-deletion of an external id that was soft-deleted and then reimported - main r=irevoire a=loiclec

# Pull Request

## Related issue
Fixes (when merged into meilisearch) https://github.com/meilisearch/meilisearch/issues/3021

## What does this PR do?
There was a bug happening when:

1. Documents were added
2. Some of these documents were replaced using soft-deletion
3. A deletion of another non-replaced document takes place and triggers a hard-deletion
4. Documents with the same identifiers as the replaced documents are added again

Then, search results would return duplicate documents. No crash would happen at any time (this is the reason it wasn't caught by the previous fuzz test. I have updated the new one such that it also checks the result of a placeholder search request, which then finds the bug immediately).

The cause of the bug is: 

1. When a hard-deletion is triggered, we try to retrieve the external document id associated with each soft-deleted document id. 
2. Then, we take this list of external document ids and remove each of them from the `ExternalDocumentsIds` structure. 
3. However, this is not correct in case an existing (non-deleted) document shares the external id of a soft-deleted document. 
   
## Implementation of the fix
1. Before we process a permanent deletion, we update the list of soft-deleted document ids.
2. Then, the permanent deletion's job is to remove the soft-deleted documents from all data structures. Therefore, to update `ExternalDocumentsIds`, we can simply call the `delete_soft_deleted_documents_ids_from_fsts` method, which is faster and simpler.

## Correctness
A unit test was added to reproduce the bug. The new fuzz test, when adjusted to check the correctness of a placeholder search, could also instantly reproduce the bug, but now does not find any other problem.

Co-authored-by: Loïc Lecrenier <loic.lecrenier@me.com>
2022-12-20 16:13:20 +00:00
fc0e7382fe Fix hard-deletion of an external id that was soft-deleted 2022-12-20 15:33:31 +01:00
97fb64e40e Merge #747
747: Soft-deletion computation no longer depends on the mapsize r=irevoire a=dureuill

# Pull Request

## Related issue

Related to https://github.com/meilisearch/meilisearch/issues/3231: After removing `--max-index-size`, the `mapsize` will always be unrelated to the actual max size the user wants for their DB, so it doesn't make sense to use these values any longer.

This implements solution 2.3 from https://github.com/meilisearch/meilisearch/issues/3231#issuecomment-1348628824

## What does this PR do?

### User-visible

- Soft-deleted are no longer deleted when there is less than 10% of the mapsize available or when they take more than 10% of the mapsize
- Instead, they are deleted when they are more soft deleted than regular documents, or when they take more than 1GiB disk space (estimated).

### Implementation standpoint

1. Adds a `DeletionStrategy` struct to replace the boolean `disable_soft_deletion` that we had up until now. This enum allows us to specify that we want "always hard", "always soft", or to use the dynamic soft-deletion strategy (default).
2. Uses the current strategy when deleting documents, with the new heuristics being used in the `DeletionStrategy::Dynamic` variant.
3. Updates the tests to use the appropriate DeletionStrategy whenever needed (one of `AlwaysHard` or `AlwaysSoft` depending on the test)

Note to reviewers: this PR is optimized for a commit-by-commit review.

## PR checklist
Please check if your PR fulfills the following requirements:
- [x] Does this PR fix an existing issue, or have you listed the changes applied in the PR description (and why they are needed)?
- [x] Have you read the contributing guidelines?
- [x] Have you made sure that the title is accurate and descriptive of the changes?

Thank you so much for contributing to Meilisearch!


Co-authored-by: Louis Dureuil <louis@meilisearch.com>
Co-authored-by: Tamo <tamo@meilisearch.com>
2022-12-19 17:46:18 +00:00
69edbf9f6d Update milli/src/update/delete_documents.rs 2022-12-19 18:23:50 +01:00
8957251eed Merge #751
751: Update version for the next release (v0.38.0) in Cargo.toml files r=curquiza a=meili-bot

⚠️ This PR is automatically generated. Check the new version is the expected one before merging.

Co-authored-by: curquiza <curquiza@users.noreply.github.com>
2022-12-19 17:02:39 +00:00
c72535531b Update version for the next release (v0.38.0) in Cargo.toml files 2022-12-19 16:35:38 +00:00
916c23e7be Tests: rename snapshots 2022-12-19 10:07:17 +01:00
ad9937c755 Fix tests after adding DeletionStrategy 2022-12-19 10:07:17 +01:00
171c942282 Soft-deletion computation no longer takes into account the mapsize
Implemented solution 2.3 from https://github.com/meilisearch/meilisearch/issues/3231#issuecomment-1348628824
2022-12-19 10:07:17 +01:00
e2ae3b24aa Hard or soft delete according to the deletion strategy 2022-12-19 10:00:13 +01:00
fc7618d49b Add DeletionStrategy 2022-12-19 09:49:58 +01:00
7f88c4ff2f Fix #1714 test 2022-12-15 18:22:28 +01:00
96d4242b93 Update charabia 2022-12-15 18:22:22 +01:00
60ebf0ea0b Add a specific test on finite pagination placeolder search with distinct attributes 2022-12-15 17:28:20 +01:00
5114686394 Merge #743
743: Fix finite pagination with placeholder search r=Kerollmops a=ManyTheFish

this bug is reproducible on real datasets and is hard to isolate in a simple test.

related to: https://github.com/meilisearch/meilisearch/issues/3200

poke `@curquiza` 

Co-authored-by: ManyTheFish <many@meilisearch.com>
2022-12-15 09:31:47 +00:00
3322018c06 Fix placeholder search 2022-12-14 20:09:47 +01:00
0276d5212a Merge #728
728: Add some integration tests on the sort criterion r=ManyTheFish a=loiclec

This is simply an integration test ensuring that the sort criterion works properly. 

However, only one version of the algorithm is tested here (the iterative one). To test the version that uses the facet DB, one has to manually set the `CANDIDATES_THRESHOLD` constant to `0`. I have done that and ensured that the test still succeeds. However, in the future, we will probably want to have an option to force which algorithm is used at runtime, for testing purposes.


Co-authored-by: Loïc Lecrenier <loic.lecrenier@me.com>
2022-12-14 09:27:12 +00:00
e2ffc3d69a Merge #741
741: Add test reproducing the bug fixed by #737 r=Kerollmops a=ManyTheFish

related to #737

Co-authored-by: ManyTheFish <many@meilisearch.com>
2022-12-13 15:02:19 +00:00
739da9fd4d Add test 2022-12-13 15:54:43 +01:00
2af93966e0 Merge #740
740: Fix two nightly errors r=Kerollmops a=irevoire

Currently, we have these two errors on rust nightly. It would be nice to help rustc understand what's going on

```
error[E0658]: anonymous lifetimes in `impl Trait` are unstable
   --> filter-parser/src/lib.rs:173:53
    |
173 | fn ws<'a, O>(inner: impl FnMut(Span<'a>) -> IResult<O>) -> impl FnMut(Span<'a>) -> IResult<O> {
    |                                                     ^ expected named lifetime parameter
    |
    = help: add `#![feature(anonymous_lifetime_in_impl_trait)]` to the crate attributes to enable
help: consider introducing a named lifetime parameter
    |
173 | fn ws<'a, 'a, O>(inner: impl FnMut(Span<'a>) -> IResult<'a, O>) -> impl FnMut(Span<'a>) -> IResult<O> {
    |       +++                                               +++

error[E0658]: anonymous lifetimes in `impl Trait` are unstable
  --> filter-parser/src/error.rs:36:49
   |
36 |     mut parser: impl FnMut(Span<'a>) -> IResult<O>,
   |                                                 ^ expected named lifetime parameter
   |
   = help: add `#![feature(anonymous_lifetime_in_impl_trait)]` to the crate attributes to enable
help: consider introducing a named lifetime parameter
   |
35 ~ pub fn cut_with_err<'a, 'a, O>(
36 ~     mut parser: impl FnMut(Span<'a>) -> IResult<'a, O>,
   |

For more information about this error, try `rustc --explain E0658`.
error: could not compile `filter-parser` due to 2 previous errors
```

Co-authored-by: Tamo <tamo@meilisearch.com>
2022-12-13 14:33:40 +00:00
2c47500bc3 fix two nightly errors 2022-12-13 15:29:52 +01:00
406ee31d1a Merge #737
737: Fix typo initial candidates computation r=Kerollmops a=ManyTheFish

When `Typo` criterion was after a different criterion than `Words` and the previous criterion wasn't returning any candidates at the first iteration of the bucket sort, then the `initial_candidates` were lost.

Now, `Typo`ensure to keep the `initial_candidates` between iterations.


related to https://github.com/meilisearch/meilisearch/issues/3200#issuecomment-1345179578
related to https://github.com/meilisearch/meilisearch/issues/3228

Co-authored-by: ManyTheFish <many@meilisearch.com>
2022-12-13 10:29:28 +00:00
2d8d0af1a6 Rename short name bc by ic for initial_candidates 2022-12-13 10:56:38 +01:00
e0a8f8cb5a Merge #734
734: Fix bug 2945/3021 (missing key in documents database) r=Kerollmops a=loiclec

# Pull Request

## Related issue
Fixes (partially, until merged into meilisearch) https://github.com/meilisearch/meilisearch/issues/2945 (until we integrate the new milli bump into meilisearch).

**Note that a dump will not be sufficient to upgrade from meilisearch v0.30.2 to meilisearch v0.30.3 due to this fix** because the bug could have caused the `documents` database to be corrupted. Instead, a full manual reimport of the documents will be necessary.

## What does this PR do?
There was a bug happening when:
1. A few documents are added to the index
2. Some of these documents are soft-deleted
3. New documents are added, replacing existing ones and triggering a hard-deletion

The `IndexDocuments::execute` method would then perform the hard-deletion but forget to change the `external_document_ids` structure appropriately. As a result, the `external_document_ids` would contain keys corresponding to documents that do no exist anymore.

To fix this bug, I split the `DeleteDocuments::execute` method into two: `execute_inner` and `execute`. 
- `execute_inner` returns a `DetailedDocumentDeletionResult` which says whether soft-deletion was used or not
- `execute` keeps the exact same signature and behaviour

Then, when deleting replaced documents inside `IndexDocuments::execute`, we call `DeleteDocuments::execute_inner` instead of `DeleteDocuments::execute`. If soft-deletion was used, nothing more is done. But if hard-deletion was used, we remove every reference to soft-deleted documents in the new `external_documents_ids` structure.

## Correctness

- Every other test still passes
- The reproduction test case now passes
- In a different branch ([`update-fuzz-test`](https://github.com/meilisearch/milli/pull/735)), I created a fuzz-test that reproduces the past two bugs. This fuzz test cannot find this bug through any combination of some hand-selected `DocumentAddition / DocumentDeletion / DocumentClear / SettingsUpdate` operations. In that test, each relevant operations can be executed with or without soft-deletion, and document additions can be done in batches, replacing or updating existing documents.



Co-authored-by: Loïc Lecrenier <loic.lecrenier@me.com>
2022-12-13 09:45:57 +00:00
be3b00350c Apply review suggestions: naming and documentation 2022-12-13 10:15:22 +01:00
80d34a4169 Fix typo initial candiddates computation 2022-12-12 19:02:48 +01:00
e3ee553dcc Remove soft deleted ids from ExternalDocumentIds during document import
If the document import replaces a document using hard deletion
2022-12-12 14:16:09 +01:00
bebd050961 Add new test for bug 3021 2022-12-08 19:19:40 +01:00
1f1beae077 Merge #729
729: Fix distincted exhaustive hits r=Kerollmops a=ManyTheFish

This PR changes the name and behavior of `bucket_candidates`:
- `bucket_candidates` become `initial_candidates` that is less confusing
- `initial_candidates` is no more a simple `RoaringBitmap` but an enum allowing us to precise if the candidates are exhaustive or not
- this enum ensures that any modification is allowed only if the candidates are not already exhaustive.

The bug occurred because `initial_candidates` are modified during the bucket sort allowing the estimation to be more and more precise along the search, and this was an issue when the `initial_candidates` were already exhaustive, now, if candidates are exhaustive, then no modifications are made.

Co-authored-by: ManyTheFish <many@meilisearch.com>
2022-12-08 09:26:34 +00:00
55724f2412 Introduce an initial candidates set that makes the difference between an exhaustive count and an estimation 2022-12-08 09:41:34 +01:00
6d50ea0830 add tests 2022-12-08 08:56:57 +01:00
f37c86e0b2 Add some integration tests on the sort criterion 2022-12-07 15:59:33 +01:00
098c410612 Merge #727
727: Fix bug in filter search r=Kerollmops a=loiclec

# Pull Request

## Related issue
Fixes (partially, until merged into meilisearch) https://github.com/meilisearch/meilisearch/issues/3178

## What does this PR do?
The most important change is this one:
```rust
    // in milli/src/search/facet/facet_range_search.rs, line 239
    let should_stop = {
        match self.right {
            Bound::Included(right) => right < previous_key.left_bound,
            Bound::Excluded(right) => right <= previous_key.left_bound,
            Bound::Unbounded => false,
        }
    };
```
where the operations `<` and `<=` between the two branches were switched. This caused (very few) documents to be missing from filter results.

The second change is a simplification of the algorithm for filters such as `field = value`, where we now perform a direct query into the "Level 0" of the facet db to retrieve the docids instead of invoking the full facet search algorithm. This change is done in `milli/src/search/facet/filter.rs`.

I have added yet more insta-snapshot tests, rechecked the content of the snapshots, and added some integration tests as well. 

This is purely a fix in the search algorithms. Based on this PR alone, a dump will not be necessary to switch from v0.30.1 (where this bug is present) to v0.30.2 (where this PR is merged).


Co-authored-by: Loïc Lecrenier <loic.lecrenier@me.com>
2022-12-07 14:34:59 +00:00
ee10cb8c87 Merge #726
726: Update the contributing.md r=curquiza a=irevoire



Co-authored-by: Tamo <tamo@meilisearch.com>
2022-12-07 13:59:04 +00:00
d38cc73630 Add one more filter "integration" test 2022-12-07 14:38:25 +01:00
e688581c36 Add tests for facet range search on different field ids 2022-12-07 14:38:21 +01:00
4ac8f96342 Simplify implementation of equality condition in filters 2022-12-07 14:38:18 +01:00
1c9555566e Fix bug in facet range search 2022-12-07 14:38:14 +01:00
303d740245 Prepare fix within facet range search
By creating snapshots and updating the format of the existing
snapshots. The next commit will apply the fix, which will show
its effects cleanly on the old and new snapshot tests
2022-12-07 14:38:10 +01:00
250743885d add a sentence about installing rust-nightly 2022-12-07 12:31:43 +01:00
5eecb8489d Update CONTRIBUTING.md
Co-authored-by: Clémentine Urquizar - curqui <clementine@meilisearch.com>
2022-12-07 12:23:12 +01:00
0e5c3b1f64 Update CONTRIBUTING.md
Co-authored-by: Clémentine Urquizar - curqui <clementine@meilisearch.com>
2022-12-07 12:23:06 +01:00
f53bdc4320 update the contributing.md 2022-12-06 17:41:05 +01:00
0a301b5f88 Merge #723
723: Fix bug in handling of soft deleted documents when updating settings r=Kerollmops a=loiclec

# Pull Request

## Related issue
Fixes (partially, until merged into meilisearch) https://github.com/meilisearch/meilisearch/issues/3021

## What does this PR do?
This PR fixes the bug where a `missing key in documents database` internal error message could appear when indexing documents.

When updating the settings, before clearing the database and before creating the transform output, we now modify the `ExternalDocumentsIds` structure to get rid of all references to soft deleted document ids in its FSTs.

It used to be that updating the settings would clear the soft-deleted document ids, but keep the original `ExternalDocumentsIds` structure. As a consequence of this, when processing a future document addition, we could wrongly believe that a document was being replaced when, in fact, it was a completely new document. See the tests `bug_3021_first`, `bug_3021_second`, and `bug_3021` for a minimal test case that would have reproduced the issue.
 
We need to take special care to:
- evaluate how users should update to v0.30.1 (containing this fix): dump? reimporting all documents from scratch?
- understand IF/HOW this bug could have caused duplicate documents to be returned 
- and evaluate the correctness of the fix, of course :)


Co-authored-by: Loïc Lecrenier <loic.lecrenier@me.com>
2022-12-06 14:37:38 +00:00
a993b68684 Cargo fmt >:-( 2022-12-06 15:22:10 +01:00
80c7a00567 Fix compilation error in tests of settings update 2022-12-06 15:19:26 +01:00
67d8cec209 Fix bug in handling of soft deleted documents when updating settings 2022-12-06 15:09:19 +01:00
2a846aaae7 Merge #719
719: Add more members of `filter_parser` to `milli::` & `From<&str>` implementation for `Token` r=Kerollmops a=GregoryConrad

## What does this PR do?
The current `milli::Filter` and `milli::FilterCondition` APIs require working with some members of `filter_parser` directly that `milli::` does *not* re-export to its users (at least when not parsing input using `parse`). Also, using `filter_parser` does not make sense when using milli from an embedded context where there is no query to parse.

Instead of reworking `milli::Filter` and `milli::FilterCondition`, this PR adds two non-breaking changes that ease the use of milli:
- Re-exports more members of the dependent version of `filter_parser` in `milli`
- Implements `From<&str>` for `filter_parser::Token`
  - This will also allow some basic tests that need to create a `Token` from a string to avoid some boilerplate.

In conjunction, both of these will allow milli users to easily create a `Token` from a `&str` without needing to add `filter_parser` as an extra dependency.

Note: I wanted to use `FromStr` for the `From` implementation; however, it requires returning a `Result` which is not needed for the conversion. Thus, I just left it as `From<&str>`.

Co-authored-by: Gregory Conrad <gregorysconrad@gmail.com>
2022-12-06 10:36:00 +00:00
d6eacb2aac Merge #722
722: Geosearch for zero radius r=irevoire a=amab8901

# Pull Request

## Related issue
Fixes #3167 (https://github.com/meilisearch/meilisearch/issues/3167)

## What does this PR do?
- allows Geosearch with zero radius to return the specified location when the coordinates match perfectly (instead of returning nothing). See link for more details.
- new attempt on https://github.com/meilisearch/milli/pull/713

## PR checklist
Please check if your PR fulfills the following requirements:
- [ X ] Does this PR fix an existing issue, or have you listed the changes applied in the PR description (and why they are needed)?
- [ X ] Have you read the contributing guidelines?
- [ X ] Have you made sure that the title is accurate and descriptive of the changes?

Thank you so much for contributing to Meilisearch!


Co-authored-by: amab8901 <amab8901@protonmail.com>
Co-authored-by: Tamo <irevoire@protonmail.ch>
2022-12-05 19:57:08 +00:00
212dbfa3b5 Update milli/src/search/facet/filter.rs 2022-12-05 20:56:21 +01:00
456da5de9c Geosearch for zero radius 2022-12-05 20:11:46 +01:00
46e26ab550 Merge #720
720: Make soft deletion optional in document addition and deletion + add lots of tests r=irevoire a=loiclec

# Pull Request

## What does this PR do?
When debugging recent issues, I created a few unit tests in the hopes reproducing the bugs I was looking for. In the end, I didn't find any, but I thought it would still be good to keep those tests. 

More importantly, I added a field to the `DeleteDocuments` and `IndexDocuments` builders, called `disable_soft_deletion`. If set to `true`, the indexing/deletion will never add documents to the `soft_deleted_documents_ids` and instead perform a real deletion of the documents from the databases.

For the new tests, I have:
- Improved the insta-snapshot format of the `external_documents_ids` structure
- Added more tests for the facet DB indexing, deletion, and search algorithms, making sure to test them when the facet DB contains strings (instead of numbers) as well.
- Added more tests for the incremental indexing of the prefix proximity databases. For example, to see if documents are replaced correctly and if common prefixes are deleted correctly.
- Added tests that mix soft deletion and hard deletion, including when processing batches of document updates. 


Co-authored-by: Loïc Lecrenier <loic.lecrenier@me.com>
2022-12-05 18:26:01 +00:00
cda4ba2bb6 Add document import tests 2022-12-05 12:02:49 +01:00
ae59d37b75 Improve insta-snap of the external document ids 2022-12-05 10:51:02 +01:00
f2cf981641 Add more tests and allow disabling of soft-deletion outside of tests
Also allow disabling soft-deletion in the IndexDocumentsConfig
2022-12-05 10:51:01 +01:00
50954d31fa feat: Re-export Span and Token to milli:: 2022-12-03 13:37:33 -05:00
1b5b5778c1 feat: Add From<&str> implementation for Token 2022-12-03 13:13:41 -05:00
d3731dda48 Merge #706
706: Limit the reindexing caused by updating settings when not needed r=curquiza a=GregoryConrad

## What does this PR do?
When updating index settings using `update::Settings`, sometimes a `reindex` of `update::Settings` is triggered when it doesn't need to be. This PR aims to prevent those unnecessary `reindex` calls.

For reference, here is a snippet from the current `execute` method in `update::Settings`:
```rust
// ...
if stop_words_updated
    || faceted_updated
    || synonyms_updated
    || searchable_updated
    || exact_attributes_updated
{
    self.reindex(&progress_callback, &should_abort, old_fields_ids_map)?;
}
```

- [x] `faceted_updated` - looks good as-is 
- [x] `stop_words_updated` - looks good as-is 
- [x] `synonyms_updated` - looks good as-is 
- [x] `searchable_updated` - fixed in this PR
- [x] `exact_attributes_updated` - fixed in this PR

## PR checklist
Please check if your PR fulfills the following requirements:
- [x] Does this PR fix an existing issue, or have you listed the changes applied in the PR description (and why they are needed)?
- [x] Have you read the contributing guidelines?
- [x] Have you made sure that the title is accurate and descriptive of the changes?

Thank you so much for contributing to Meilisearch!


Co-authored-by: Gregory Conrad <gregorysconrad@gmail.com>
2022-12-01 13:58:02 +00:00
51a2613c5c Merge #715
715: Fix benchmark CI r=irevoire a=curquiza

Fixes #714 

Tested with our actions: https://github.com/meilisearch/milli/actions/runs/3591527753/jobs/6046157141

Co-authored-by: curquiza <clementine@meilisearch.com>
2022-12-01 10:39:38 +00:00
82e1c4f468 Merge #716
716: Bump Swatinem/rust-cache from 2.0.1 to 2.2.0 r=curquiza a=dependabot[bot]

Bumps [Swatinem/rust-cache](https://github.com/Swatinem/rust-cache) from 2.0.1 to 2.2.0.
<details>
<summary>Release notes</summary>
<p><em>Sourced from <a href="https://github.com/Swatinem/rust-cache/releases">Swatinem/rust-cache's releases</a>.</em></p>
<blockquote>
<h2>v2.2.0</h2>
<ul>
<li>Add new <code>save-if</code> option to always restore, but only conditionally save the cache.</li>
</ul>
<h2>v2.1.0</h2>
<ul>
<li>Only hash <code>Cargo.{lock,toml}</code> files in the configured workspace directories.</li>
</ul>
<h2>v2.0.2</h2>
<ul>
<li>Avoid calling cargo metadata on pre-cleanup.</li>
<li>Added <code>prefix-key</code>, <code>cache-directories</code> and <code>cache-targets</code> options.</li>
</ul>
</blockquote>
</details>
<details>
<summary>Changelog</summary>
<p><em>Sourced from <a href="https://github.com/Swatinem/rust-cache/blob/master/CHANGELOG.md">Swatinem/rust-cache's changelog</a>.</em></p>
<blockquote>
<h2>2.2.0</h2>
<ul>
<li>Add new <code>save-if</code> option to always restore, but only conditionally save the cache.</li>
</ul>
<h2>2.1.0</h2>
<ul>
<li>Only hash <code>Cargo.{lock,toml}</code> files in the configured workspace directories.</li>
</ul>
<h2>2.0.2</h2>
<ul>
<li>Avoid calling <code>cargo metadata</code> on pre-cleanup.</li>
<li>Added <code>prefix-key</code>, <code>cache-directories</code> and <code>cache-targets</code> options.</li>
</ul>
<h2>2.0.1</h2>
<ul>
<li>Primarily just updating dependencies to fix GitHub deprecation notices.</li>
</ul>
<h2>2.0.0</h2>
<ul>
<li>The action code was refactored to allow for caching multiple workspaces and
different <code>target</code> directory layouts.</li>
<li>The <code>working-directory</code> and <code>target-dir</code> input options were replaced by a
single <code>workspaces</code> option that has the form of <code>$workspace -&gt; $target</code>.</li>
<li>Support for considering <code>env-vars</code> as part of the cache key.</li>
<li>The <code>sharedKey</code> input option was renamed to <code>shared-key</code> for consistency.</li>
</ul>
<h2>1.4.0</h2>
<ul>
<li>Clean both <code>debug</code> and <code>release</code> target directories.</li>
</ul>
<h2>1.3.0</h2>
<ul>
<li>Use Rust toolchain file as additional cache key.</li>
<li>Allow for a configurable target-dir.</li>
</ul>
<h2>1.2.0</h2>
<ul>
<li>Cache <code>~/.cargo/bin</code>.</li>
<li>Support for custom <code>$CARGO_HOME</code>.</li>
<li>Add a <code>cache-hit</code> output.</li>
<li>Add a new <code>sharedKey</code> option that overrides the automatic job-name based key.</li>
</ul>
<h2>1.1.0</h2>
<ul>
<li>Add a new <code>working-directory</code> input.</li>
<li>Support caching git dependencies.</li>
<li>Lots of other improvements.</li>
</ul>
<h2>1.0.2</h2>
<!-- raw HTML omitted -->
</blockquote>
<p>... (truncated)</p>
</details>
<details>
<summary>Commits</summary>
<ul>
<li><a href="359a70e43a"><code>359a70e</code></a> 2.2.0</li>
<li><a href="ecee04e7b3"><code>ecee04e</code></a> feat: add save-if option, closes <a href="https://github-redirect.dependabot.com/Swatinem/rust-cache/issues/66">#66</a> (<a href="https://github-redirect.dependabot.com/Swatinem/rust-cache/issues/91">#91</a>)</li>
<li><a href="b894d59a8d"><code>b894d59</code></a> 2.1.0</li>
<li><a href="e78327dd9e"><code>e78327d</code></a> small code style improvements, README and CHANGELOG updates</li>
<li><a href="ccdddcc049"><code>ccdddcc</code></a> only hash Cargo.toml/Cargo.lock that belong to a configured workspace (<a href="https://github-redirect.dependabot.com/Swatinem/rust-cache/issues/90">#90</a>)</li>
<li><a href="b5ec9edd91"><code>b5ec9ed</code></a> 2.0.2</li>
<li><a href="3f2513fdf4"><code>3f2513f</code></a> avoid calling cargo metadata on pre-cleanup</li>
<li><a href="19c46583c5"><code>19c4658</code></a> update dependencies</li>
<li><a href="b8e72aae83"><code>b8e72aa</code></a> Added <code>prefix-key</code> <code>cache-directories</code> and <code>cache-targets</code> options (<a href="https://github-redirect.dependabot.com/Swatinem/rust-cache/issues/85">#85</a>)</li>
<li>See full diff in <a href="https://github.com/Swatinem/rust-cache/compare/v2.0.1...v2.2.0">compare view</a></li>
</ul>
</details>
<br />


[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=Swatinem/rust-cache&package-manager=github_actions&previous-version=2.0.1&new-version=2.2.0)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores)

You can trigger a rebase of this PR by commenting ``@dependabot` rebase`.

[//]: # (dependabot-automerge-start)
[//]: # (dependabot-automerge-end)

---

<details>
<summary>Dependabot commands and options</summary>
<br />

You can trigger Dependabot actions by commenting on this PR:
- ``@dependabot` rebase` will rebase this PR
- ``@dependabot` recreate` will recreate this PR, overwriting any edits that have been made to it
- ``@dependabot` merge` will merge this PR after your CI passes on it
- ``@dependabot` squash and merge` will squash and merge this PR after your CI passes on it
- ``@dependabot` cancel merge` will cancel a previously requested merge and block automerging
- ``@dependabot` reopen` will reopen this PR if it is closed
- ``@dependabot` close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually
- ``@dependabot` ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself)
- ``@dependabot` ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself)
- ``@dependabot` ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)


</details>

Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2022-12-01 10:08:58 +00:00
5bdf5c0aaf Update the steps to set variables 2022-12-01 11:07:54 +01:00
282b2e3b98 Bump Swatinem/rust-cache from 2.0.1 to 2.2.0
Bumps [Swatinem/rust-cache](https://github.com/Swatinem/rust-cache) from 2.0.1 to 2.2.0.
- [Release notes](https://github.com/Swatinem/rust-cache/releases)
- [Changelog](https://github.com/Swatinem/rust-cache/blob/master/CHANGELOG.md)
- [Commits](https://github.com/Swatinem/rust-cache/compare/v2.0.1...v2.2.0)

---
updated-dependencies:
- dependency-name: Swatinem/rust-cache
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>
2022-12-01 10:02:54 +00:00
5e754b3ee0 Merge #708
708: Reduce memory usage of the MatchingWords structure r=ManyTheFish a=loiclec

# Pull Request

## Related issue
Fixes (partially) https://github.com/meilisearch/meilisearch/issues/3115 

## What does this PR do?
1. Reduces the memory usage caused by the creation of a 10-word query tree by 20x. 
   This is done by deduplicating the `MatchingWord` values, which are heavy because of their inner DFA. The deduplication works by wrapping each `MatchingWord` in a reference-counted box and using a hash map to determine whether a  `MatchingWord` DFA already exists for a certain signature, or whether a new one needs to be built.
 
2. Avoid the worst-case scenario of creating a `MatchingWord` for extremely long words that cannot be indexed by milli.

Co-authored-by: Loïc Lecrenier <loic.lecrenier@me.com>
2022-11-30 17:47:34 +00:00
e1612fcb01 Merge #712
712: Fix bulk facet indexing bug r=Kerollmops a=loiclec

# Pull Request

## Related issue
Fixes (partially, until merged into meilisearch) https://github.com/meilisearch/meilisearch/issues/3165

## What does this PR do?
Fixes a bug where indexing certain numbers of filterable attribute values in bulk led to corrupted facet databases. This was due to a lossy integer conversion which would ultimately prevent entire levels of the facet database to be written into LMDB.

More specifically, this change was made:
```diff
      - if cur_writer_len as u8 >= self.min_level_size {
      + if cur_writer_len >= self.min_level_size as usize {
```
I also checked other comparisons to `min_level_size` and other conversions such as `x as u8` in this part of the codebase.



Co-authored-by: Loïc Lecrenier <loic.lecrenier@me.com>
2022-11-30 16:51:48 +00:00
9dd4b33a9a Fix bulk facet indexing bug 2022-11-30 14:27:36 +01:00
de22116b3d Merge #711
711: Replace deprecated gh actions r=curquiza a=pnhatminh

# Pull Request

## Related issue
Fixes #678

## What does this PR do?
- Replace deprecated github action command with newly defined command.

## PR checklist
Please check if your PR fulfills the following requirements:
- [x] Does this PR fix an existing issue, or have you listed the changes applied in the PR description (and why they are needed)?
- [x] Have you read the contributing guidelines?
- [x] Have you made sure that the title is accurate and descriptive of the changes?

Thank you so much for contributing to Meilisearch!


Co-authored-by: Minh Pham <minh.pham@codelink.io>
2022-11-29 09:56:22 +00:00
5f78522044 Updagte 2022-11-29 10:11:38 +07:00
87e2bc3bed fix(reindex): reindex in a few more cases
Cases: whenever searchable_fields OR user_defined_searchable_fields is modified
2022-11-28 13:12:19 -05:00
61b58b115a Don't create partial matching words for synonyms in ngrams 2022-11-28 16:32:28 +01:00
d3182f3830 refactor: Change return type to keep consistency with others 2022-11-28 10:02:03 -05:00
f698e6cfdf Merge #707
707: Add all_obkv_to_json function r=Kerollmops a=GregoryConrad

## What does this PR do?
When embedding milli in an application (other than Meilisearch), it often makes sense to not use the `displayed_attributes` functionality and instead just use milli as a full document store. Thus, this PR adds a function, `all_obkv_to_json`, to supplement the already exposed `milli::obkv_to_json` so that those embedding milli *do not* need to deal with `displayed_attributes` if they don't need to.

~This PR also introduces a slight breaking change: `obkv_to_json` now accepts a reference to `obkv::KvReaderU16` instead of taking ownership of it. As far as I can tell, this seems like a change for the better (`obkv_to_json` only acts upon `obkv` rather than consuming it), but I can change it back if you so desire.~ (reverted in [935a724](935a724c57))

## PR checklist
Please check if your PR fulfills the following requirements:
- [x] Does this PR fix an existing issue, or have you listed the changes applied in the PR description (and why they are needed)?
- [x] Have you read the contributing guidelines?
- [x] Have you made sure that the title is accurate and descriptive of the changes?

Thank you so much for contributing to Meilisearch!


Co-authored-by: Gregory Conrad <gregorysconrad@gmail.com>
2022-11-28 14:52:45 +00:00
f70856bab1 Remove memory usage test that fails when many tests are run in parallel 2022-11-28 12:55:28 +01:00
80588daae5 Fix compilation error in formatting benches 2022-11-28 10:27:15 +01:00
e2ebed62b1 Don't create partial matching words for synonyms, split words, phrases 2022-11-28 10:20:13 +01:00
8284bd760f Relax memory ordering of operations within the test CountingAlloc 2022-11-28 10:20:13 +01:00
8d0ace2d64 Avoid creating a MatchingWord for words that exceed the length limit 2022-11-28 10:20:13 +01:00
86c34a996b Deduplicate matching words 2022-11-28 10:20:13 +01:00
eba7af1d2c Replace deprecated gh actions 2022-11-27 06:47:08 +07:00
e0d24104a3 refactor: Rewrite another method chain to be more readable 2022-11-26 13:33:19 -05:00
2db738dbac refactor: rewrite method chain to be more readable 2022-11-26 13:26:39 -05:00
84dd2e4df1 Merge #710
710: Update Clippy to use Rust Stable r=irevoire a=Kerollmops

This PR changes the CI to use Rust stable for Clippy and Rustfmt. This way we will reduce the number of times we break the CI. [The version will only change every two months or so](https://www.whatrustisit.com/).

Co-authored-by: Clément Renault <clement@meilisearch.com>
2022-11-24 15:57:04 +00:00
3d06ea41ea Keep a nightly for rustfmt
Co-authored-by: Tamo <tamo@meilisearch.com>
2022-11-24 16:54:40 +01:00
3958db4b17 Update the CI to use Rust Stable 2022-11-24 16:26:48 +01:00
935a724c57 revert: Revert pass by reference API change 2022-11-24 10:08:23 -05:00
ed29cceae9 perf: Prevent reindex in searchable set case when not needed 2022-11-23 22:33:06 -05:00
bb9e33bf85 perf: Prevent reindex in searchable reset case when not needed 2022-11-23 22:01:46 -05:00
7c0e544839 feat: Add all_obkv_to_json function 2022-11-23 21:18:58 -05:00
d19c8672bb perf: limit reindex to when exact_attributes changes 2022-11-23 15:50:53 -05:00
57c9f03e51 Merge #697
697: Fix bug in prefix DB indexing r=loiclec a=loiclec

Where the batch's information was not properly updated in cases where only the proximity changed between two consecutive word pair proximities.

Closes partially https://github.com/meilisearch/meilisearch/issues/3043



Co-authored-by: Loïc Lecrenier <loic.lecrenier@me.com>
2022-11-17 15:22:01 +00:00
467e742bd1 Merge #702
702: Update version for the next release (v0.37.0) in Cargo.toml files r=ManyTheFish a=meili-bot

⚠️ This PR is automatically generated. Check the new version is the expected one before merging.

Co-authored-by: curquiza <curquiza@users.noreply.github.com>
2022-11-17 12:54:27 +00:00
cd5aaa3a9f Update version for the next release (v0.37.0) in Cargo.toml files 2022-11-17 12:50:07 +00:00
8ceb199dca Merge #696
696: Fix Facet Indexing bugs r=Kerollmops a=loiclec

1. Handle keys with variable length correctly

Closes (partially) https://github.com/meilisearch/meilisearch/issues/3042 
This issue is now easily reproducible with the updated fuzz tests, which now generate keys with variable lengths.

2. Prevent adding facets to the database if their encoded value does not satisfy `valid_lmdb_key`.

Closes (partially) https://github.com/meilisearch/meilisearch/issues/2743
This fixes an indexing failure when a document had a filterable attribute containing a value whose length is higher than ~500 bytes. For now, this fix is just meant to prevent crashes. Better handling of long values of filterable attributes will be handled in a separate PR.


Co-authored-by: Loïc Lecrenier <loic.lecrenier@me.com>
2022-11-17 11:56:16 +00:00
777eb3fa00 Add insta-snaps for test of bug 3043 2022-11-17 12:21:27 +01:00
0caadedd3b Make clippy happy 2022-11-17 12:17:53 +01:00
ac3baafbe8 Truncate facet values that are too long before indexing them 2022-11-17 11:29:42 +01:00
990a861241 Add test for indexing a document with a long facet value 2022-11-17 11:29:42 +01:00
d95d02cb8a Fix Facet Indexing bugs
1. Handle keys with variable length correctly

This fixes https://github.com/meilisearch/meilisearch/issues/3042 and
is easily reproducible with the updated fuzz tests, which now generate
keys with variable lengths.

2. Prevent adding facets to the database if their encoded value does
not satisfy `valid_lmdb_key`.

This fixes an indexing failure when a document had a filterable
attribute containing a value whose length is higher than ~500 bytes.
2022-11-17 11:29:42 +01:00
f00108d2ec Fix name of bug in reproduction test 2022-11-17 11:29:18 +01:00
f7c8730d09 Fix bug in prefix DB indexing
Where the batch's information was not properly updated in cases
where only the proximity changed between two consecutive word pair
proximities.

Closes https://github.com/meilisearch/meilisearch/issues/3043
2022-11-17 11:29:18 +01:00
a651397afc Merge #685
685: ci: Use pre-compiled binaries for faster CI r=irevoire a=azzamsa

# Pull Request

## Related issue
Fixes #<issue_number>

## What does this PR do?
- ...

## PR checklist
Please check if your PR fulfills the following requirements:
- [ ] Does this PR fix an existing issue, or have you listed the changes applied in the PR description (and why they are needed)?
- [ ] Have you read the contributing guidelines?
- [ ] Have you made sure that the title is accurate and descriptive of the changes?

Thank you so much for contributing to Meilisearch!


Co-authored-by: azzamsa <me@azzamsa.com>
2022-11-16 16:39:39 +00:00
2000db8453 Merge #701
701: Remove Hacktoberfest sections r=curquiza a=meili-bot

_This PR is auto-generated._

Remove Hacktoberfest sections from CONTRIBUTING file.


Co-authored-by: meili-bot <74670311+meili-bot@users.noreply.github.com>
2022-11-15 15:17:18 +00:00
92cc3550d8 Update CONTRIBUTING.md 2022-11-15 16:16:40 +01:00
cd3bca06e9 Merge #699
699: Force vendoring of LMDB even if a system version is available r=Kerollmops a=dureuill

# Pull Request

## Related issue
Related to https://github.com/meilisearch/meilisearch/issues/3017: will fix once ported to milli and meilisearch.

## What does this PR do?
- Force using vendored version of LMDB
- **don't use lmdb master3 branch anymore**: this is a bit of a side effect of using a tag instead of branch for heed as a dependency, but it is wanted anyway for now as lmdb master3 was more of an experiment
- **modifies CI to run `cargo check` on the release rather than the debug artifacts**. This is an attempt to reduce the necessary disk space and avoid "out of space" failures.

## PR checklist
Please check if your PR fulfills the following requirements:
- [x] Does this PR fix an existing issue, or have you listed the changes applied in the PR description (and why they are needed)?
- [x] Have you read the contributing guidelines?
- [x] Have you made sure that the title is accurate and descriptive of the changes?

Thank you so much for contributing to Meilisearch!


Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2022-11-15 11:09:20 +00:00
87576cf26c Perform cargo check on the release artifacts 2022-11-15 10:25:02 +01:00
6dc6a5d874 Force using vendored version of LMDB
- don't use lmdb master3 branch anymore
2022-11-14 17:17:51 +01:00
e75829aded Merge #694
694: Update version for the next release (v0.36.0) in Cargo.toml files r=curquiza a=meili-bot

⚠️ This PR is automatically generated. Check the new version is the expected one before merging.

Co-authored-by: Kerollmops <Kerollmops@users.noreply.github.com>
2022-11-09 11:22:24 +00:00
d00d2aab3f Update version for the next release (v0.36.0) in Cargo.toml files 2022-11-09 11:03:09 +00:00
f46a8ab2e2 Merge #693
693: use the lmdb-master.3 branch r=Kerollmops a=irevoire

After investigating https://github.com/meilisearch/meilisearch/issues/3017, we found out that it was due to lmdb and that, without any code change on our side, bumping using the lmdb-master-3 branch fix our issues.

But, we’re not really confident about what changed between the `mdb.master` and `mdb.master3` branches; thus this is a temporary change, and we hope we’ll be able to move to the new version of heed asap (either before the end of the pre-release or for the next release).

--------

The bug is hard to reproduce; I can reproduce it 100% of the time on my archlinux personal computer. But on a scaleway archlinux bare-metal machine, it doesn’t reproduce. It’s flaky on our test suite, but `@loiclec` was able to write a minimal test that reproduces it every time on macOS.
Basically, what happens is when there are multiple threads opening databases in a different directory at the same time.
If there are 10 or more threads running at the same time, lmdb starts throwing the `Invalid argument (os error 22)` error for no reason, we believe.
I would like to submit an issue to lmdb, but I don’t really have the time to write a test in C without heed currently.

`@hyc,` if you want to take a look at it, here is the repo that reproduces the issue on macOS: https://github.com/irevoire/heed-bug

Co-authored-by: Irevoire <tamo@meilisearch.com>
2022-11-09 09:42:38 +00:00
c3b75bbe5d Merge #691
691: Update version for the next release (v0.35.1) in Cargo.toml files r=curquiza a=meili-bot

⚠️ This PR is automatically generated. Check the new version is the expected one before merging.

Co-authored-by: Kerollmops <Kerollmops@users.noreply.github.com>
2022-11-08 15:31:50 +00:00
c7711daca3 use the lmdb-master.3 branch 2022-11-08 16:28:01 +01:00
f18a4581f1 Merge #692
692: Update CONTRIBUTING.md r=Kerollmops a=curquiza



Co-authored-by: Clémentine Urquizar - curqui <clementine@meilisearch.com>
2022-11-08 14:51:30 +00:00
8ce8bbcdfc Update CONTRIBUTING.md 2022-11-08 15:49:45 +01:00
bd12989610 Update version for the next release (v0.35.1) in Cargo.toml files 2022-11-08 14:31:39 +00:00
24a298a83c Merge #690
690: Fix soft deleted bug settings r=ManyTheFish a=Kerollmops



Co-authored-by: Kerollmops <clement@meilisearch.com>
2022-11-08 13:45:10 +00:00
d85cd9bf1a Merge #689
689: Handle non-finite floats consistently in filters r=irevoire a=dureuill

# Pull Request

## Related issue

Related meilisearch/meilisearch#3000

## What does this PR do?

### User

- Filters using `field = inf`, (or `infinite`, `NaN`) now match the value as a string rather than returning an internal error.
- Filters using `field < inf` (or other comparison operators) now return an invalid_filter error rather than returning an internal error, much like when using `field < aaa`.

### Implementation

- Add new `NonFiniteFloat` error variants to the filter-parser errors
- Add `Token::parse_as_finite_float` that can fail both when the string is not a float and when the float is not finite
- Refactor `Filter::inner_evaluate` to always use `parse_as_finite_float` instead of just `parse`
- Add corresponding tests

## PR checklist
Please check if your PR fulfills the following requirements:
- [x] Does this PR fix an existing issue, or have you listed the changes applied in the PR description (and why they are needed)?
- [x] Have you read the contributing guidelines?
- [x] Have you made sure that the title is accurate and descriptive of the changes?

Thank you so much for contributing to Meilisearch!


Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2022-11-08 13:24:38 +00:00
37b3c5c323 Fix transform to use all_documents and ignore soft_deleted documents 2022-11-08 14:23:16 +01:00
1b1ad1923b Add a test to check that we take care of soft deleted documents 2022-11-08 14:23:14 +01:00
a836b8e703 tests: Tests filter with non-finite floats 2022-11-08 13:56:55 +01:00
3328560788 fix: allow filters on = inf, = NaN, return InvalidFilter for < inf, < NaN
Fixes meilisearch/meilisearch#3000
2022-11-08 13:27:15 +01:00
cf76ec7b37 Merge #673
673: Add clippy job r=ManyTheFish a=unvalley

# Pull Request

## Related issue
Fixes #231 

## What does this PR do?
- fix some clippy errors remain
- add clippy job to CI (I set `nightly` as toolchain)

## PR checklist
Please check if your PR fulfills the following requirements:
- [x] Does this PR fix an existing issue, or have you listed the changes applied in the PR description (and why they are needed)?
- [x] Have you read the contributing guidelines?
- [x] Have you made sure that the title is accurate and descriptive of the changes?


Co-authored-by: unvalley <kirohi.code@gmail.com>
2022-11-08 09:43:26 +00:00
abf1cf9cd5 Fix clippy errors 2022-11-04 09:27:46 +09:00
b09676779d Use nightly for clippy and remove conflict mistake 2022-11-04 09:13:01 +09:00
70465aa5ce Execute cargo fmt 2022-11-04 08:59:58 +09:00
3009981d31 Fix clippy errors
Add clippy job

Add clippy job to CI
2022-11-04 08:58:14 +09:00
401e956128 Add clippy job
Add clippy job to CI
2022-11-04 08:58:12 +09:00
48eafc546f ci: Use pre-compiled binaries for faster CI 2022-11-04 00:03:53 +07:00
6add470805 Merge #659
659: Fix clippy error to add clippy job on Ci r=Kerollmops a=unvalley

## Related PR
This PR is for #673 

## What does this PR do?
- ~~add `Run Clippy` job to CI (rust.yml)~~
- apply `cargo clippy --fix` command
- fix some `cargo clippy` error manually (but warnings still remain on tests)

## PR checklist
Please check if your PR fulfills the following requirements:
- [x] Does this PR fix an existing issue, or have you listed the changes applied in the PR description (and why they are needed)?
- [x] Have you read the contributing guidelines?
- [x] Have you made sure that the title is accurate and descriptive of the changes?


Co-authored-by: unvalley <kirohi.code@gmail.com>
Co-authored-by: unvalley <38400669+unvalley@users.noreply.github.com>
2022-11-03 15:24:38 +00:00
13175f2339 refactor: match for filterCondition 2022-11-03 17:34:33 +09:00
1a1ad8a792 Merge #679
679: Bump Swatinem/rust-cache from 2.0.0 to 2.0.1 r=curquiza a=dependabot[bot]

Bumps [Swatinem/rust-cache](https://github.com/Swatinem/rust-cache) from 2.0.0 to 2.0.1.
<details>
<summary>Release notes</summary>
<p><em>Sourced from <a href="https://github.com/Swatinem/rust-cache/releases">Swatinem/rust-cache's releases</a>.</em></p>
<blockquote>
<h2>v2.0.1</h2>
<ul>
<li>Primarily just updating dependencies to fix GitHub deprecation notices.</li>
</ul>
</blockquote>
</details>
<details>
<summary>Changelog</summary>
<p><em>Sourced from <a href="https://github.com/Swatinem/rust-cache/blob/master/CHANGELOG.md">Swatinem/rust-cache's changelog</a>.</em></p>
<blockquote>
<h2>2.0.1</h2>
<ul>
<li>Primarily just updating dependencies to fix GitHub deprecation notices.</li>
</ul>
</blockquote>
</details>
<details>
<summary>Commits</summary>
<ul>
<li><a href="22c9328bcb"><code>22c9328</code></a> 2.0.1</li>
<li><a href="d4d463bd9b"><code>d4d463b</code></a> bump deps and rebuild</li>
<li><a href="c4652c677c"><code>c4652c6</code></a> Update <code>`@actions/core</code>` (<a href="https://github-redirect.dependabot.com/Swatinem/rust-cache/issues/83">#83</a>)</li>
<li><a href="76686c56f2"><code>76686c5</code></a> docs: Fix github workflows directory (<a href="https://github-redirect.dependabot.com/Swatinem/rust-cache/issues/79">#79</a>)</li>
<li><a href="1b43d2f2c3"><code>1b43d2f</code></a> remove outdated versioning note</li>
<li><a href="20b9201e8a"><code>20b9201</code></a> bump cargo hash</li>
<li><a href="0d72e5f9a0"><code>0d72e5f</code></a> revert explicit dir close</li>
<li><a href="86531941c2"><code>8653194</code></a> Merge branch 'master' of <a href="https://github.com/Swatinem/rust-cache">https://github.com/Swatinem/rust-cache</a></li>
<li><a href="be4be3720d"><code>be4be37</code></a> explicitly close dir handles, add more logging, cleanups</li>
<li><a href="213334cd98"><code>213334c</code></a> cargo update</li>
<li>Additional commits viewable in <a href="https://github.com/Swatinem/rust-cache/compare/v2.0.0...v2.0.1">compare view</a></li>
</ul>
</details>
<br />


[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=Swatinem/rust-cache&package-manager=github_actions&previous-version=2.0.0&new-version=2.0.1)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores)

You can trigger a rebase of this PR by commenting ``@dependabot` rebase`.

[//]: # (dependabot-automerge-start)
[//]: # (dependabot-automerge-end)

---

<details>
<summary>Dependabot commands and options</summary>
<br />

You can trigger Dependabot actions by commenting on this PR:
- ``@dependabot` rebase` will rebase this PR
- ``@dependabot` recreate` will recreate this PR, overwriting any edits that have been made to it
- ``@dependabot` merge` will merge this PR after your CI passes on it
- ``@dependabot` squash and merge` will squash and merge this PR after your CI passes on it
- ``@dependabot` cancel merge` will cancel a previously requested merge and block automerging
- ``@dependabot` reopen` will reopen this PR if it is closed
- ``@dependabot` close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually
- ``@dependabot` ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself)
- ``@dependabot` ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself)
- ``@dependabot` ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)


</details>

Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2022-11-02 12:38:05 +00:00
4492605a78 Bump Swatinem/rust-cache from 2.0.0 to 2.0.1
Bumps [Swatinem/rust-cache](https://github.com/Swatinem/rust-cache) from 2.0.0 to 2.0.1.
- [Release notes](https://github.com/Swatinem/rust-cache/releases)
- [Changelog](https://github.com/Swatinem/rust-cache/blob/master/CHANGELOG.md)
- [Commits](https://github.com/Swatinem/rust-cache/compare/v2.0.0...v2.0.1)

---
updated-dependencies:
- dependency-name: Swatinem/rust-cache
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>
2022-11-01 10:19:45 +00:00
fe5a0219e1 Merge #677
677: run the tests in all workspaces r=curquiza a=irevoire

With #676 I noticed the tests were not running in any of our sub crates.
Most of our sub crates didn't includes any tests though.
But the filter-parser did and we're lucky we never broke these one without noticing 😁 

Co-authored-by: Irevoire <tamo@meilisearch.com>
2022-10-31 18:05:04 +00:00
5ff066c3e7 run the tests in all workspaces 2022-10-31 18:38:48 +01:00
6770eb2a87 Merge #676
676: chore: added `IN`,`NOT IN` to `invalid_filter` msg r=irevoire a=Pranav-yadav

# Pull Request

## Related issue
`Fixes` https://github.com/meilisearch/meilisearch/issues/3004

## What does this PR do?
- Improves correct error msg in response

## PR checklist
Please check if your PR fulfills the following requirements:
- [x] Does this PR fix an existing issue, or have you listed the changes applied in the PR description (and why they are needed)?
- [x] Have you read the contributing guidelines?
- [x] Have you made sure that the title is accurate and descriptive of the changes?

Thank you so much for contributing to Meilisearch!


Co-authored-by: Pranav Yadav <Pranavyadav3912@gmail.com>
2022-10-31 17:29:24 +00:00
0d43ddbd85 Update filter-parser/src/lib.rs
Co-authored-by: Tamo <irevoire@protonmail.ch>
2022-11-01 01:32:54 +09:00
3950ec8d3c chore: update tests for invalid_filter msg 2022-10-31 15:41:49 +00:00
3b35ebda50 chore: added IN,NOT IN to invalid_filter msg 2022-10-31 15:01:14 +00:00
4bcfd14a45 Merge #675
675: Deleted empty files r=Kerollmops a=SKVKPandey

# Pull Request

## Related issue
Fixes #674

## What does this PR do?
Delete empty files:
- `milli/src/heed_codec/facet/facet_string_level_zero_value_codec.rs`
- `milli/src/heed_codec/facet/facet_string_zero_bounds_value_codec.rs`

## PR checklist
Please check if your PR fulfills the following requirements:
- [x] Does this PR fix an existing issue, or have you listed the changes applied in the PR description (and why they are needed)?
- [x] Have you read the contributing guidelines?
- [x] Have you made sure that the title is accurate and descriptive of the changes?

Thank you so much for contributing to Meilisearch!


Co-authored-by: Shashank Kashyap <50551759+SKVKPandey@users.noreply.github.com>
2022-10-30 07:09:30 +00:00
a07f0a4a43 Delete facet_string_zero_bounds_value_codec.rs 2022-10-30 08:59:04 +05:30
2dec6e86e9 Delete facet_string_level_zero_value_codec.rs 2022-10-30 08:58:36 +05:30
c965200010 Merge #664
664: Fix phrase search containing stop words r=ManyTheFish a=Samyak2

# Pull Request

This a WIP draft PR I wanted to create to let other potential contributors know that I'm working on this issue. I'll be completing this in a few hours from opening this.

## Related issue
Fixes #661 and towards fixing meilisearch/meilisearch#2905

## What does this PR do?
- [x] Change Phrase Operation to use a `Vec<Option<String>>` instead of `Vec<String>` where `None` corresponds to a stop word
- [x] Update all other uses of phrase operation
- [x] Update `resolve_phrase`
- [x] Update `create_primitive_query`?
- [x] Add test

## PR checklist
Please check if your PR fulfills the following requirements:
- [x] Does this PR fix an existing issue, or have you listed the changes applied in the PR description (and why they are needed)?
- [x] Have you read the contributing guidelines?
- [x] Have you made sure that the title is accurate and descriptive of the changes?


Co-authored-by: Samyak S Sarnayak <samyak201@gmail.com>
Co-authored-by: Samyak Sarnayak <samyak201@gmail.com>
2022-10-29 13:42:52 +00:00
d55f0e2e53 Execute cargo fmt 2022-10-28 23:42:23 +09:00
d53a80b408 Fix clippy error 2022-10-28 23:41:35 +09:00
ecb88143f9 Run cargo fmt 2022-10-28 19:37:02 +05:30
03eb5d87c1 Only call plane_sweep on subgroups when 2 or more are present 2022-10-28 19:32:05 +05:30
a1d7ed1258 fix clippy error and remove clippy job from ci
Remove clippy job

Fix clippy error type_complexity

Restore ambiguous change
2022-10-28 22:33:50 +09:00
f3c0b05ae8 Fix rust fmt 2022-10-28 09:32:31 +09:00
f4ec1abb9b Fix all clippy error after conflicts 2022-10-27 23:58:13 +09:00
d35afa0cf5 Change consecutive phrase search grouping logic
Co-authored-by: ManyTheFish <many@meilisearch.com>
2022-10-26 23:10:48 +05:30
752d031010 Update phrase search to use new execute method 2022-10-26 23:07:20 +05:30
c7322f704c Fix cargo clippy errors
Dont apply clippy for tests for now

Fix clippy warnings of filter-parser package

parent 8352febd646ec4bcf56a44161e5c4dce0e55111f
author unvalley <38400669+unvalley@users.noreply.github.com> 1666325847 +0900
committer unvalley <kirohi.code@gmail.com> 1666791316 +0900

Update .github/workflows/rust.yml

Co-authored-by: Clémentine Urquizar - curqui <clementine@meilisearch.com>

Allow clippy lint too_many_argments

Allow clippy lint needless_collect

Allow clippy lint too_many_arguments and type_complexity

Fix for clippy warnings comparison_chains

Fix for clippy warnings vec_init_then_push

Allow clippy lint should_implement_trait

Allow clippy lint drop_non_drop

Fix lifetime clipy warnings in filter-paprser

Execute cargo fmt

Fix clippy remaining warnings

Fix clippy remaining warnings again and allow lint on each place
2022-10-27 01:04:23 +09:00
811f156031 Execute cargo clippy --fix 2022-10-27 01:00:00 +09:00
d8fed1f7a9 Add clippy job
Add Run Clippy to bors.toml
2022-10-27 01:00:00 +09:00
2e539249cb Merge #619
619: Refactor the Facets databases to enable incremental indexing r=curquiza a=loiclec

# Pull Request

## What does this PR do?
Party fixes https://github.com/meilisearch/milli/issues/605 by making the indexing of the facet databases (i.e. `facet_id_f64_docids` and `facet_id_string_docids`) incremental. It also closes #327 and https://github.com/meilisearch/meilisearch/issues/2820 . Two more untracked bugs were also fixed:
1. The facet distribution algorithm did not respect the `maxFacetValues` parameter when there were only a few candidate document ids.
2. The structure of the levels > 0 of the facet databases were not updated following the deletion of documents

## How to review this PR

First, read this comment to get an overview of the changes.

Then, based on this comment, raise any concerns you might have about:
1. the new structure of the databases
2. the algorithms for sort, facet distribution, and range search
3. the new/removed heed codecs

Then, weigh in on the following concerns:
1. adding `fuzzcheck` as a fuzz-only dependency may add too much complexity for the benefits it provides
2. the `ByteSliceRef` and `StrRefCodec` are misnamed or should not exist
3. the new behaviour of facet distributions can be considered incorrect
4. incremental deletion is useless given that documents are always deleted in bulk

## What's left for me to do

1. Re-read everything once to make sure I haven't forgotten anything
2. Wait for the results of the benchmarks and see if (1) they provide enough information (2) there was any change in performance, especially for search queries. Then, maybe, spend some time optimising the code.
3. Test whether the `info`/`http-ui` crates survived the refactor

## Old structure of the `facet_id_f64_docids` and `facet_id_string_docids` databases

Previously, these two databases had different but conceptually similar structures. For each field id, the facet number database had the following format:
```
            ┌───────────────────────────────┬───────────────────────────────┬───────────────┐
┌───────┐   │            1.2 – 2            │           3.4 – 100           │   102 – 104   │
│Level 2│   │                               │                               │               │
└───────┘   │         a, b, d, f, z         │         c, d, e, f, g         │     u, y      │
            ├───────────────┬───────────────┼───────────────┬───────────────┼───────────────┤
┌───────┐   │   1.2 – 1.3   │    1.6 – 2    │   3.4 – 12    │  12.3 – 100   │   102 – 104   │
│Level 1│   │               │               │               │               │               │
└───────┘   │  a, b, d, z   │    a, b, f    │    c, d, g    │     e, f      │     u, y      │
            ├───────┬───────┼───────┬───────┼───────┬───────┼───────┬───────┼───────┬───────┤
┌───────┐   │  1.2  │  1.3  │  1.6  │   2   │  3.4  │   12  │  12.3 │  100  │  102  │  104  │
│Level 0│   │       │       │       │       │       │       │       │       │       │       │
└───────┘   │  a, b │  d, z │  b, f │  a, f │  c, d │   g   │   e   │  e, f │   y   │   u   │
            └───────┴───────┴───────┴───────┴───────┴───────┴───────┴───────┴───────┴───────┘
```
where the first line is the key of the database, consisting of :
- the field id
- the level height
- the left and right bound of the group 

and the second line is the value of the database, consisting of:
- a bitmap of all the docids that have a facet value within the bounds

The `facet_id_string_docids` had a similar structure:
```
            ┌───────────────────────────────┬───────────────────────────────┬───────────────┐
┌───────┐   │             0 – 3             │             4 – 7             │     8 – 9     │
│Level 2│   │                               │                               │               │
└───────┘   │         a, b, d, f, z         │         c, d, e, f, g         │     u, y      │
            ├───────────────┬───────────────┼───────────────┬───────────────┼───────────────┤
┌───────┐   │     0 – 1     │     2 – 3     │     4 – 5     │     6 – 7     │     8 – 9     │
│Level 1│   │  "ab" – "ac"  │ "ba" – "bac"  │ "gaf" – "gal" │"form" – "wow" │ "woz" – "zz"  │
└───────┘   │  a, b, d, z   │    a, b, f    │    c, d, g    │     e, f      │     u, y      │
            ├───────┬───────┼───────┬───────┼───────┬───────┼───────┬───────┼───────┬───────┤
┌───────┐   │  "ab" │  "ac" │  "ba" │ "bac" │ "gaf" │ "gal" │ "form"│ "wow" │ "woz" │  "zz" │
│Level 0│   │  "AB" │ " Ac" │ "ba " │ "Bac" │ " GAF"│ "gal" │ "Form"│ " wow"│ "woz" │  "ZZ" │
└───────┘   │  a, b │  d, z │  b, f │  a, f │  c, d │   g   │   e   │  e, f │   y   │   u   │
            └───────┴───────┴───────┴───────┴───────┴───────┴───────┴───────┴───────┴───────┘
```
where, **at level 0**, the key is:
* the normalised facet value (string)

and the value is:
* the original facet value (string)
* a bitmap of all the docids that have this normalised string facet value

**At level 1**, the key is:
* the left bound of the range as an index in level 0
* the right bound of the range as an index in level 0

and the value is:
* the left bound of the range as a normalised string
* the right bound of the range as a normalised string
* a bitmap of all the docids that have a string facet value within the bounds

**At level > 1**, the key is:
* the left bound of the range as an index in level 0
* the right bound of the range as an index in level 0

and the value is:
* a bitmap of all the docids that have a string facet value within the bounds

## New structure of the `facet_id_f64_docids` and `facet_id_string_docids` databases

Now both the `facet_id_f64_docids` and `facet_id_string_docids` databases have the exact same structure:
```                                                                                             
            ┌───────────────────────────────┬───────────────────────────────┬───────────────┐
┌───────┐   │           "ab" (2)            │           "gaf" (2)           │   "woz" (1)   │
│Level 2│   │                               │                               │               │
└───────┘   │        [a, b, d, f, z]        │        [c, d, e, f, g]        │    [u, y]     │
            ├───────────────┬───────────────┼───────────────┬───────────────┼───────────────┤
┌───────┐   │   "ab" (2)    │   "ba" (2)    │   "gaf" (2)   │  "form" (2)   │   "woz" (2)   │
│Level 1│   │               │               │               │               │               │
└───────┘   │ [a, b, d, z]  │   [a, b, f]   │   [c, d, g]   │    [e, f]     │    [u, y]     │
            ├───────┬───────┼───────┬───────┼───────┬───────┼───────┬───────┼───────┬───────┤
┌───────┐   │  "ab" │  "ac" │  "ba" │ "bac" │ "gaf" │ "gal" │ "form"│ "wow" │ "woz" │  "zz" │
│Level 0│   │       │       │       │       │       │       │       │       │       │       │
└───────┘   │ [a, b]│ [d, z]│ [b, f]│ [a, f]│ [c, d]│  [g]  │  [e]  │ [e, f]│  [y]  │  [u]  │
            └───────┴───────┴───────┴───────┴───────┴───────┴───────┴───────┴───────┴───────┘
```
where for all levels, the key is a `FacetGroupKey<T>` containing:
* the field id (`u16`)
* the level height (`u8`)
* the left bound of the range (`T`)

and the value is a `FacetGroupValue` containing:
* the number of elements from the level below that are part of the range (`u8`, =0 for level 0)
* a bitmap of all the docids that have a facet value within the bounds (`RoaringBitmap`)

The right bound of the range is now implicit, it is equal to `Excluded(next_left_bound)`.

In the code, the key is always encoded using `FacetGroupKeyCodec<C>` where `C` is the codec used to encode the facet value (either `OrderedF64Codec` or `StrRefCodec`) and the value is encoded with `FacetGroupValueCodec`.

Since both databases share the same structure, we can implement almost all operations only once by treating the facet value as a byte slice (i.e. `FacetGroupKey<&[u8]>` encoded as `FacetGroupKeyCodec<ByteSliceRef>`). This is, in my opinion, a big simplification.

The reason for changing the structure of the databases is to make it possible to incrementally add a facet value to an existing database. Since the `facet_id_string_docids` used to store indices to `level 0` in all levels > 0, adding an element to level 0 would potentially invalidate all the indices.

Note that the original string value of a facet is no longer stored in this database.

## Incrementally adding a facet value

Here I describe how we can add a facet value to the new database incrementally. If we want to add the document with id `z` and facet value `gap`., then we want to add/modify the elements highlighted below in pink:
<img width="946" alt="Screenshot 2022-09-12 at 10 14 54" src="https://user-images.githubusercontent.com/6040237/189605532-fe4b0f52-e13d-4b3c-92d9-10c705953e3d.png">

which results in:
<img width="662" alt="Screenshot 2022-09-12 at 10 23 29" src="https://user-images.githubusercontent.com/6040237/189607015-c3a37588-b825-43c2-878a-f8f85c000b94.png">

* one element was added in level 0
* one key/value was modified in level 1
* one value was modified in level 2

Adding this element was easy since we could simply add it to level 0 and then increase the `group_size` part of the value for the level above. However, in order to keep the structure balanced, we can't always do this. If the group size reaches a threshold (`max_group_size`), then we split the node into two. For example, let's imagine that `max_group_size` is `4` and we add the docid `y` with facet value `gas`. First, we add it in level 0:
<img width="904" alt="Screenshot 2022-09-12 at 10 30 40" src="https://user-images.githubusercontent.com/6040237/189608391-531f9df1-3424-4f1f-8344-73eb194570e5.png">
Then, we realise that the group size of its parent is going to reach the maximum group size (=4) and thus we split the parent into two nodes:
<img width="919" alt="Screenshot 2022-09-12 at 10 33 16" src="https://user-images.githubusercontent.com/6040237/189608884-66f87635-1fc6-41d2-a459-87c995491ac4.png">
and since we inserted an element in level 1, we also update level 2 accordingly, by increasing the group size of the parent:
<img width="915" alt="Screenshot 2022-09-12 at 10 34 42" src="https://user-images.githubusercontent.com/6040237/189609233-d4a893ff-254a-48a7-a5ad-c0dc337f23ca.png">

We also have two other parameters:
* `group_size` is the default group size when building the database from scratch
* `min_level_size` is the minimum number of elements that a level should contain

When the highest level size is greater than `group_size * min_level_size`, then we create an additional level above it.

There is one more edge case for the insertion algorithm. While we normally don't modify the existing left bounds of a key, we have to do it if the facet value being inserted is smaller than the first left bound. For example, inserting `"aa"` with the docid `w` would change the database to:
<img width="756" alt="Screenshot 2022-09-12 at 10 41 56" src="https://user-images.githubusercontent.com/6040237/189610637-a043ef71-7159-4bf1-b4fd-9903134fc095.png">

The root of the code for incremental indexing is the `FacetUpdateIncremental` builder.

## Incrementally removing a facet value
TODO: the algorithm was implemented and works, but its current API is: `fn delete(self, facet_value, single_docid)`. It removes the given document id from all keys containing the given facet value. I don't think it is the right way to implement it anymore. Perhaps a bitmap of docids should be given instead. This is fairly easy to do. But since we batch document deletions together (because of soft deletion), it's not clear to me anymore that incremental deletion should be implemented at all.  

## Bulk insertion
While it's faster to incrementally add a single facet value to the database, it is sometimes **slower** to repeatedly add facet values one-by-one instead of doing it in bulk. For example, during initial indexing, we'd like to build the database from a list of facet values and associated document ids in one go. The `FacetUpdateBulk` builder provides a way to do so. It works by:
1. clearing all levels > 0 from the DB
2. adding all new elements in level 0
3. rebuilding the higher levels from scratch 

The algorithm for bulk insertion is the same as the previous one.

## Choosing between incremental and bulk insertion
On my computer, I measured that is about 50x slower to add N facet values incrementally than it is to re-build a database with N facet values in level 0. Therefore, we dynamically choose to use either incremental insertion or bulk insertion based on (1) the number of existing elements in level 0 of the database and (2) the number of facet values from the new documents.

This is imprecise but is mainly aimed at avoiding the worst-case scenario where the incremental insertion method is used repeatedly millions of times.

## Fuzz-testing

**Potentially controversial:**
I fuzz-tested incremental addition and deletion using fuzzcheck, which found many bugs. The fuzz-test consists of inserting/deleting facet values and docids in succession, each operation is processed with different parameters for `group_size`, `max_group_size`, and `min_level_size`. After all the operations are processed, the content of level 0 is compared to the content of an equivalent structure with a simple and easily-checked implementation. Furthermore, we check that the database has a correct structure (all groups from levels > 0 correctly combine the content of their children). I also visualised the code coverage found by the fuzz-test. It covered 100% of the relevant code except for `unreachable/panic` statements and errors returned by `heed`.

The fuzz-test and the fuzzcheck dependency are only compiled when `cargo fuzzcheck` is used. For now, the dependency is from a local path on my computer, but it can be changed to a crate version if we decide to keep it. 

## Algorithms operating on the facet databases

There are four important algorithms making use of the facet databases:
1. Sort, ascending
2. Sort, descending
3. Facet distribution
4. Range search

Previously, the implementation of all four algorithms was based on a number of iterators specific to each database kind (number or string): `FacetNumberRange`, `FacetNumberRevRange`, `FacetNumberIter` (with a reversed and reducing/non-reducing option), `FacetStringGroupRange`, `FacetStringGroupRevRange`, `FacetStringLevel0Range`, `FacetStringLevel0RevRange`, and `FacetStringIter` (reversed + reducing/non-reducing). 

Now, all four algorithms have a unique implementation shared by both the string and number databases. There are four functions:
1. `ascending_facet_sort` in `search/facet/facet_sort_ascending.rs`
2. `descending_facet_sort` in `search/facet/facet_sort_descending.rs`
3. `iterate_over_facet_distribution` in `search/facet/facet_distribution_iter.rs`
4. `find_docids_of_facet_within_bounds` in `search/facet/facet_range_search.rs`

I have tried to test them with some snapshot tests but more testing could still be done. I don't *think* that the performance of these algorithms regressed, but that will need to be confirmed by benchmarks.

## Change of behaviour for facet distributions

Previously, the original string value of a facet was stored in the level 0 of `facet_id_string_docids `. This is no longer the case. The original string value was used in the implementation of the facet distribution algorithm. Now, to recover it, we pick a random document id which contains the normalised string value and look up the original one in `field_id_docid_facet_strings`. As a consequence, it may be that the string value returned in the field distribution does not appear in any of the candidates. For example,
```json
{ "id": 0, "colour": "RED" }
{ "id": 1, "colour": "red" }
```
Facet distribution for the `colour` field among the candidates `[1]`:
```
{ "RED": 1 }
```
Here, "RED" was given as the original facet value even though it does not appear in the document id `1`.

## Heed codecs

A number of heed codecs related to the facet databases were removed:
* `FacetLevelValueF64Codec`
* `FacetLevelValueU32Codec`
* `FacetStringLevelZeroCodec`
* `StringValueCodec`
* `FacetStringZeroBoundsValueCodec`
* `FacetValueStringCodec`
* `FieldDocIdFacetStringCodec`
* `FieldDocIdFacetF64Codec`

They were replaced by:
* `FacetGroupKeyCodec<C>` (replaces all key codecs for the facet databases)
* `FacetGroupValueCodec` (replaces all value codecs for the facet databases)
* `FieldDocIdFacetCodec<C>` (replaces `FieldDocIdFacetStringCodec` and `FieldDocIdFacetF64Codec`)

Since the associated encoded item of `FacetGroupKeyCodec<C>` is `FacetKey<T>` and we often work with `FacetKey<&[u8]>` and `FacetKey<&str>`, then we need to have codecs that encode values of type `&str` and `&[u8]`. The existing `ByteSlice` and `Str` codecs do not work for that purpose (their `EItem` are `[u8]` and `str`), I have also created two new codecs:
* `ByteSliceRef` is a codec with a `EItem = DItem = &[u8]`
* `StrRefCodec` is a codec with a `EItem = DItem = &str`

I have also factored out the code used to encode an ordered f64 into its own `OrderedF64Codec`.


Co-authored-by: Loïc Lecrenier <loic@meilisearch.com>
2022-10-26 15:04:53 +00:00
488d31ecdf Run cargo fmt 2022-10-26 19:09:45 +05:30
af33d22f25 Consecutive is false when at least 1 stop word is surrounded by words 2022-10-26 19:09:45 +05:30
f1da623af3 Add test for phrase search with stop words and all criteria at once
Moved the actual test into a separate function used by both the existing
test and the new test.
2022-10-26 19:09:44 +05:30
77f1ff019b Simplify stop word checking in create_primitive_query 2022-10-26 19:09:44 +05:30
2aa11afb87 Fix panic when phrase contains only one stop word and nothing else 2022-10-26 19:09:42 +05:30
bb9ce3c5c5 Run cargo fmt 2022-10-26 19:09:03 +05:30
d187b32a28 Fix snapshots to use new phrase type 2022-10-26 19:09:03 +05:30
c8c666c6a6 Use resolve_phrase in exactness and typo criteria 2022-10-26 19:09:01 +05:30
3e190503e6 Search for closest non-stop words in proximity criteria 2022-10-26 19:08:34 +05:30
709ab3c14c Increment position even when it's a stop word in exactness criteria 2022-10-26 19:08:33 +05:30
ef13c6a5b6 Perform filter after enumerate to keep origin indices 2022-10-26 19:08:33 +05:30
6a10b679ca Add test for phrase search with stop words
Originally written by ManyTheFish here:
https://gist.github.com/ManyTheFish/f840e37cb2d2e029ce05396b4d540762

Co-authored-by: ManyTheFish <many@meilisearch.com>
2022-10-26 19:08:32 +05:30
62816dddde [WIP] Fix phrase search containing stop words
Fixes #661 and meilisearch/meilisearch#2905
2022-10-26 19:08:06 +05:30
54c0cf93fe Merge remote-tracking branch 'origin/main' into facet-levels-refactor 2022-10-26 15:13:34 +02:00
365f44c39b Merge #668
668: Fix many Clippy errors part 2 r=ManyTheFish a=ehiggs

This brings us a step closer to enforcing clippy on each build.

# Pull Request

## Related issue
This does not fix any issue outright, but it is a second round of fixes for clippy after https://github.com/meilisearch/milli/pull/665. This should contribute to fixing https://github.com/meilisearch/milli/pull/659.

## What does this PR do?

Satisfies many issues for clippy. The complaints are mostly:

* Passing reference where a variable is already a reference.
* Using clone where a struct already implements `Copy`
* Using `ok_or_else` when it is a closure that returns a value instead of using the closure to call function (hence we use `ok_or`)
* Unambiguous lifetimes don't need names, so we can just use `'_`
* Using `return` when it is not needed as we are on the last expression of a function.

## PR checklist
Please check if your PR fulfills the following requirements:
- [x] Does this PR fix an existing issue, or have you listed the changes applied in the PR description (and why they are needed)?
- [x] Have you read the contributing guidelines?
- [x] Have you made sure that the title is accurate and descriptive of the changes?

Thank you so much for contributing to Meilisearch!


Co-authored-by: Ewan Higgs <ewan.higgs@gmail.com>
2022-10-26 12:16:24 +00:00
2fa85a24ec Remove outdated files from http-ui/ and infos/
... that were reintroduced after a rebase
2022-10-26 14:09:35 +02:00
631e9910da Depend on released version of fuzzcheck from crates.io 2022-10-26 14:06:59 +02:00
2741756248 Merge remote-tracking branch 'origin/main' into facet-levels-refactor 2022-10-26 14:03:23 +02:00
d3f95e6c69 Merge #671
671: Update version for the next release (v0.35.0) in Cargo.toml files r=Kerollmops a=meili-bot

⚠️ This PR is automatically generated. Check the new version is the expected one before merging.

Co-authored-by: curquiza <curquiza@users.noreply.github.com>
2022-10-26 11:58:05 +00:00
b7f2428961 Fix formatting and warning after rebasing from main 2022-10-26 13:49:33 +02:00
3b1f908e5e Revert behaviour of facet distribution to what it was before
Where the docid that is used to get the original facet string value
definitely belongs to the candidates
2022-10-26 13:48:01 +02:00
14ca8048a8 Add some documentation on how to run the facet db fuzzer 2022-10-26 13:48:01 +02:00
206a3e00e5 cargo fmt 2022-10-26 13:48:01 +02:00
f198b20c42 Add facet deletion tests that use both the incremental and bulk methods
+ update deletion snapshots to the new database format
2022-10-26 13:47:46 +02:00
e3ba1fc883 Make deletion tests for both soft-deletion and hard-deletion 2022-10-26 13:47:46 +02:00
ab5e56fd16 Add document deletion snapshot tests and tests for hard-deletion 2022-10-26 13:47:46 +02:00
d885de1600 Add option to avoid soft deletion of documents 2022-10-26 13:47:46 +02:00
ee1abfd1c1 Ignore files generated by fuzzcheck 2022-10-26 13:47:46 +02:00
2295e0e3ce Use real delete function in facet indexing fuzz tests
By deleting multiple docids at once instead of one-by-one
2022-10-26 13:47:46 +02:00
acc8caebe6 Add link to GitHub PR to document of update/facet module 2022-10-26 13:47:46 +02:00
a034a1e628 Move StrRefCodec and ByteSliceRefCodec to their own files 2022-10-26 13:47:46 +02:00
1165ba2171 Make facet deletion incremental 2022-10-26 13:47:04 +02:00
0ade699873 Don't crash when failing to decode using StrRef codec 2022-10-26 13:47:04 +02:00
d0109627b9 Fix a bug in facet_range_search and add documentation 2022-10-26 13:47:04 +02:00
a2270b7432 Change fuzzcheck dependency to point to git repository 2022-10-26 13:47:04 +02:00
1ecd3bb822 Fix bug in FieldDocIdFacetCodec 2022-10-26 13:47:04 +02:00
51961e1064 Polish some details 2022-10-26 13:47:04 +02:00
cb8442a119 Further unify facet databases of f64s and strings 2022-10-26 13:47:04 +02:00
3baa34d842 Fix compiler errors/warnings 2022-10-26 13:47:04 +02:00
86d9f50b9c Fix bugs in incremental facet indexing with variable parameters
e.g. add one facet value incrementally with a group_size = X and then
add another one with group_size = Y

It is not actually possible to do so with the public API of milli,
but I wanted to make sure the algorithm worked well in those cases
anyway.

The bugs were found by fuzzing the code with fuzzcheck, which I've added
to milli as a conditional dev-dependency. But it can be removed later.
2022-10-26 13:47:04 +02:00
de52a9bf75 Improve documentation of some facet-related algorithms 2022-10-26 13:47:04 +02:00
985a94adfc cargo fmt 2022-10-26 13:47:04 +02:00
b1ab09196c Remove outdated TODOs 2022-10-26 13:47:04 +02:00
3d7ed3263f Fix bug in string facet distribution with few candidates 2022-10-26 13:47:04 +02:00
fca4577e23 Return original string in facet distributions, work on facet tests 2022-10-26 13:47:04 +02:00
27454e9828 Document and refine facet indexing algorithms 2022-10-26 13:47:04 +02:00
bee3c23b45 Add comparison benchmark between bulk and incremental facet indexing 2022-10-26 13:47:04 +02:00
b2f01ad204 Refactor facet database tests 2022-10-26 13:47:04 +02:00
9026867d17 Give same interface to bulk and incremental facet indexing types
+ cargo fmt, oops, sorry for the bad history :(
2022-10-26 13:47:04 +02:00
330c9eb1b2 Rename facet codecs and refine FacetsUpdate API 2022-10-26 13:47:04 +02:00
485a72306d Refactor facet-related codecs 2022-10-26 13:47:04 +02:00
9b55e582cd Add FacetsUpdate type that wraps incremental and bulk indexing methods 2022-10-26 13:47:04 +02:00
3d145d7f48 Merge the two <facetttype>_faceted_documents_ids methods into one 2022-10-26 13:47:04 +02:00
982efab88f Fix encoding bugs in facet databases 2022-10-26 13:47:04 +02:00
079ed4a992 Add more snapshots 2022-10-26 13:47:04 +02:00
afdf87f6f7 Fix bugs in asc/desc criterion and facet indexing 2022-10-26 13:47:04 +02:00
a7201ece04 cargo fmt 2022-10-26 13:47:04 +02:00
36296bbb20 Add facet incremental indexing snapshot tests + fix bug 2022-10-26 13:47:04 +02:00
07ff92c663 Add more snapshots from facet tests 2022-10-26 13:47:04 +02:00
61252248fb Fix some facet indexing bugs 2022-10-26 13:47:04 +02:00
68cbcdf08b Fix compile errors/warnings in http-ui and infos 2022-10-26 13:47:04 +02:00
85824ee203 Try to make facet indexing incremental 2022-10-26 13:47:04 +02:00
d30c89e345 Fix compile error+warnings in new tests 2022-10-26 13:46:46 +02:00
e8a156d682 Reorganise facets database indexing code 2022-10-26 13:46:46 +02:00
fb8d23deb3 Reintroduce db_snap! for facet databases 2022-10-26 13:46:14 +02:00
e570c23153 Reintroduce asc/desc functionality 2022-10-26 13:46:14 +02:00
bd2c0e1ab6 Remove unused code 2022-10-26 13:46:14 +02:00
39a4a0a362 Reintroduce filter range search and facet extractors 2022-10-26 13:46:14 +02:00
22d80eeaf9 Reintroduce facet deletion functionality 2022-10-26 13:46:14 +02:00
6cc91824c1 Remove unused heed codec files 2022-10-26 13:46:14 +02:00
5a904cf29d Reintroduce facet distribution functionality 2022-10-26 13:46:14 +02:00
b8a1caad5e Add range search and incremental indexing algorithm 2022-10-26 13:46:14 +02:00
63ef0aba18 Start porting facet distribution and sort to new database structure 2022-10-26 13:46:14 +02:00
7913d6365c Update Facets indexing to be compatible with new database structure 2022-10-26 13:46:14 +02:00
c3f49f766d Prepare refactor of facets database
Prepare refactor of facets database
2022-10-26 13:46:14 +02:00
e883bccc76 Update version for the next release (v0.35.0) in Cargo.toml files 2022-10-26 11:43:54 +00:00
c8f16530d5 Merge #616
616: Introduce an indexation abortion function when indexing documents r=Kerollmops a=Kerollmops



Co-authored-by: Kerollmops <clement@meilisearch.com>
Co-authored-by: Clément Renault <clement@meilisearch.com>
2022-10-26 11:41:18 +00:00
9d27ac8a2e Ignore too many arguments to functions. 2022-10-25 21:22:53 +02:00
42cdc38c7b Allow weird ranges like 1..=0 to pass clippy.
Everything else is just a warning and exit code will be 0.
2022-10-25 21:12:59 +02:00
2ce025a906 Fixes after rebase to fix new issues. 2022-10-25 20:58:31 +02:00
17f7922bfc Remove unneeded lifetimes. 2022-10-25 20:49:04 +02:00
6b2fe94192 Fixes for clippy bringing us down to 18 remaining issues.
This brings us a step closer to enforcing clippy on each build.
2022-10-25 20:49:02 +02:00
004c09a8e2 Merge #669
669: Add method to create a new Index with specific creation dates r=irevoire a=loiclec

This functionality is needed to implement the import of dumps correctly.

Co-authored-by: Loïc Lecrenier <loic.lecrenier@me.com>
2022-10-25 12:44:43 +00:00
36bd66281d Add method to create a new Index with specific creation dates 2022-10-25 14:37:56 +02:00
d11a6e187f Merge #639
639: Reduce the size of the word_pair_proximity database  r=loiclec a=loiclec

# Pull Request

## What does this PR do?
Fixes #634 

Now, the value corresponding to the key `prox word1 word2` in the `word_pair_proximity_docids` database contains the ids of the documents in which:
- `word1` is followed by `word2`
- the minimum number of words between `word1` and `word2` is `prox-1`

Before this PR, the `word_pair_proximity_docids` had keys with the format `word1 word2 prox` and the value contained the ids of the documents in which either:
- `word1` is followed by `word2` after a minimum of `prox-1` words in between them
- `word2` is followed by `word1` after a minimum of `prox-2` words 

As a consequence of this change, calls such as:
```
let docids = word_pair_proximity_docids.get(rtxn, (word1, word2, prox));
```
have to be replaced with:
```
let docids1 = word_pair_proximity_docids.get(rtxn, (prox, word1, word2)) ;
let docids2 = word_pair_proximity_docids.get(rtxn, (prox-1, word2, word1)) ;
let docids = docids1 | docids2;
```

## Phrase search

The PR also fixes two bugs in the `resolve_phrase` function. The first bug is that a phrase containing twice the same word would always return zero documents (e.g. `"dog eats dog"`). 

The second bug occurs with a phrase such as "fox is smarter than a dog"` and the document with the text:
```
fox or dog? a fox is smarter than a dog
```
In that case, the phrase search would not return the documents because:
* we only have the key `fox dog 2` in `word_pair_proximity_docids`
* but the implementation of `resolve_phrase` looks for `fox dog 5`, which returns 0 documents 

### New implementation of `resolve_phrase`
Given the phrase:
```
fox is smarter than a dog
```
We select the document ids corresponding to all of the following keys in `word_pair_proximity_docids`:
- `1 fox is`
- `1 is smarter`
- `1 smarter than`
- (etc.)
- `1 fox smarter` OR `2 fox smarter`
- `1 is than` OR `2 is than`
- ...
- `1 than dog` OR `2 than dog`

## Benchmark Results

Indexing:
```
group                                                                     indexing_main_d94339a8                 indexing_word-pair-proximity-docids-refactor_2983dd8e
-----                                                                     ----------------------                 -----------------------------------------------------
indexing/-geo-delete-facetedNumber-facetedGeo-searchable-                 1.19    40.7±11.28ms        ? ?/sec    1.00     34.3±4.16ms        ? ?/sec
indexing/-movies-delete-facetedString-facetedNumber-searchable-           1.62     11.3±3.77ms        ? ?/sec    1.00      7.0±1.56ms        ? ?/sec
indexing/-movies-delete-facetedString-facetedNumber-searchable-nested-    1.00     12.5±2.62ms        ? ?/sec    1.07     13.4±4.24ms        ? ?/sec
indexing/-songs-delete-facetedString-facetedNumber-searchable-            1.26    50.2±12.63ms        ? ?/sec    1.00    39.8±20.25ms        ? ?/sec
indexing/-wiki-delete-searchable-                                         1.83   269.1±16.11ms        ? ?/sec    1.00    146.8±6.12ms        ? ?/sec
indexing/Indexing geo_point                                               1.00      47.2±0.46s        ? ?/sec    1.00      47.3±0.56s        ? ?/sec
indexing/Indexing movies in three batches                                 1.42      12.7±0.13s        ? ?/sec    1.00       9.0±0.07s        ? ?/sec
indexing/Indexing movies with default settings                            1.40      10.2±0.07s        ? ?/sec    1.00       7.3±0.06s        ? ?/sec
indexing/Indexing nested movies with default settings                     1.22       7.8±0.11s        ? ?/sec    1.00       6.4±0.13s        ? ?/sec
indexing/Indexing nested movies without any facets                        1.24       7.3±0.07s        ? ?/sec    1.00       5.9±0.06s        ? ?/sec
indexing/Indexing songs in three batches with default settings            1.14      47.6±0.67s        ? ?/sec    1.00      41.8±0.63s        ? ?/sec
indexing/Indexing songs with default settings                             1.13      44.1±0.74s        ? ?/sec    1.00      38.9±0.76s        ? ?/sec
indexing/Indexing songs without any facets                                1.19      42.0±0.66s        ? ?/sec    1.00      35.2±0.48s        ? ?/sec
indexing/Indexing songs without faceted numbers                           1.20      44.3±1.40s        ? ?/sec    1.00      37.0±0.48s        ? ?/sec
indexing/Indexing wiki                                                    1.39     862.9±9.95s        ? ?/sec    1.00    622.6±27.11s        ? ?/sec
indexing/Indexing wiki in three batches                                   1.40     934.4±5.97s        ? ?/sec    1.00     665.7±4.72s        ? ?/sec
indexing/Reindexing geo_point                                             1.01      15.9±0.39s        ? ?/sec    1.00      15.7±0.28s        ? ?/sec
indexing/Reindexing movies with default settings                          1.15   288.8±25.03ms        ? ?/sec    1.00    250.4±2.23ms        ? ?/sec
indexing/Reindexing songs with default settings                           1.01       4.1±0.06s        ? ?/sec    1.00       4.1±0.03s        ? ?/sec
indexing/Reindexing wiki                                                  1.41   1484.7±20.59s        ? ?/sec    1.00   1052.0±19.89s        ? ?/sec
```

Search Wiki:
<details>
<pre>
group                                                                                    search_wiki_main_d94339a8              search_wiki_word-pair-proximity-docids-refactor_2983dd8e
-----                                                                                    -------------------------              --------------------------------------------------------
smol-wiki-articles.csv: basic placeholder/                                               1.02     25.8±0.21µs        ? ?/sec    1.00     25.4±0.19µs        ? ?/sec
smol-wiki-articles.csv: basic with quote/"film"                                          1.00    441.7±2.57µs        ? ?/sec    1.00    442.3±2.41µs        ? ?/sec
smol-wiki-articles.csv: basic with quote/"france"                                        1.00    357.0±2.63µs        ? ?/sec    1.00    358.3±2.65µs        ? ?/sec
smol-wiki-articles.csv: basic with quote/"japan"                                         1.00    239.4±2.24µs        ? ?/sec    1.00    240.2±1.82µs        ? ?/sec
smol-wiki-articles.csv: basic with quote/"machine"                                       1.00    180.3±2.40µs        ? ?/sec    1.00    180.0±1.08µs        ? ?/sec
smol-wiki-articles.csv: basic with quote/"miles" "davis"                                 1.00      9.1±0.03ms        ? ?/sec    1.03      9.3±0.04ms        ? ?/sec
smol-wiki-articles.csv: basic with quote/"mingus"                                        1.00      3.6±0.01ms        ? ?/sec    1.03      3.7±0.02ms        ? ?/sec
smol-wiki-articles.csv: basic with quote/"rock" "and" "roll"                             1.00     34.0±0.11ms        ? ?/sec    1.03     35.1±0.13ms        ? ?/sec
smol-wiki-articles.csv: basic with quote/"spain"                                         1.00    162.0±0.88µs        ? ?/sec    1.00    161.9±0.98µs        ? ?/sec
smol-wiki-articles.csv: basic without quote/film                                         1.01    164.4±1.46µs        ? ?/sec    1.00    163.1±1.58µs        ? ?/sec
smol-wiki-articles.csv: basic without quote/france                                       1.00   1698.3±7.37µs        ? ?/sec    1.00  1697.7±11.53µs        ? ?/sec
smol-wiki-articles.csv: basic without quote/japan                                        1.00  1154.0±23.61µs        ? ?/sec    1.00   1150.7±9.27µs        ? ?/sec
smol-wiki-articles.csv: basic without quote/machine                                      1.00    524.6±3.45µs        ? ?/sec    1.01    528.1±4.56µs        ? ?/sec
smol-wiki-articles.csv: basic without quote/miles davis                                  1.00     13.5±0.05ms        ? ?/sec    1.02     13.8±0.05ms        ? ?/sec
smol-wiki-articles.csv: basic without quote/mingus                                       1.00      4.1±0.02ms        ? ?/sec    1.03      4.2±0.01ms        ? ?/sec
smol-wiki-articles.csv: basic without quote/rock and roll                                1.00     49.0±0.19ms        ? ?/sec    1.03     50.4±0.22ms        ? ?/sec
smol-wiki-articles.csv: basic without quote/spain                                        1.00    412.2±3.35µs        ? ?/sec    1.00    412.9±2.81µs        ? ?/sec
smol-wiki-articles.csv: prefix search/c                                                  1.00    383.9±2.53µs        ? ?/sec    1.00    383.4±2.44µs        ? ?/sec
smol-wiki-articles.csv: prefix search/g                                                  1.00    433.4±2.53µs        ? ?/sec    1.00    432.8±2.52µs        ? ?/sec
smol-wiki-articles.csv: prefix search/j                                                  1.00    424.3±2.05µs        ? ?/sec    1.00    424.0±2.15µs        ? ?/sec
smol-wiki-articles.csv: prefix search/q                                                  1.00    154.0±1.93µs        ? ?/sec    1.00    153.5±1.04µs        ? ?/sec
smol-wiki-articles.csv: prefix search/t                                                  1.04   658.5±91.93µs        ? ?/sec    1.00    631.4±3.89µs        ? ?/sec
smol-wiki-articles.csv: prefix search/x                                                  1.00    446.2±2.09µs        ? ?/sec    1.00    445.6±3.13µs        ? ?/sec
smol-wiki-articles.csv: proximity/april paris                                            1.02      3.4±0.39ms        ? ?/sec    1.00      3.3±0.01ms        ? ?/sec
smol-wiki-articles.csv: proximity/diesel engine                                          1.00  1022.1±17.52µs        ? ?/sec    1.00   1017.7±8.16µs        ? ?/sec
smol-wiki-articles.csv: proximity/herald sings                                           1.01  1872.5±97.70µs        ? ?/sec    1.00   1862.2±8.57µs        ? ?/sec
smol-wiki-articles.csv: proximity/tea two                                                1.00   295.2±34.91µs        ? ?/sec    1.00    296.6±4.08µs        ? ?/sec
smol-wiki-articles.csv: typo/Disnaylande                                                 1.00      3.4±0.51ms        ? ?/sec    1.04      3.5±0.01ms        ? ?/sec
smol-wiki-articles.csv: typo/aritmetric                                                  1.00      3.6±0.01ms        ? ?/sec    1.00      3.7±0.01ms        ? ?/sec
smol-wiki-articles.csv: typo/linax                                                       1.00    167.5±1.28µs        ? ?/sec    1.00    167.1±2.65µs        ? ?/sec
smol-wiki-articles.csv: typo/migrosoft                                                   1.01    217.9±1.84µs        ? ?/sec    1.00    216.2±1.61µs        ? ?/sec
smol-wiki-articles.csv: typo/nympalidea                                                  1.00      2.9±0.01ms        ? ?/sec    1.10      3.1±0.01ms        ? ?/sec
smol-wiki-articles.csv: typo/phytogropher                                                1.00      3.0±0.23ms        ? ?/sec    1.08      3.3±0.01ms        ? ?/sec
smol-wiki-articles.csv: typo/sisan                                                       1.00    234.6±1.38µs        ? ?/sec    1.01    235.8±1.67µs        ? ?/sec
smol-wiki-articles.csv: typo/the fronce                                                  1.00    104.4±0.84µs        ? ?/sec    1.00    103.9±0.81µs        ? ?/sec
smol-wiki-articles.csv: words/Abraham machin                                             1.02    675.5±4.74µs        ? ?/sec    1.00    662.1±5.13µs        ? ?/sec
smol-wiki-articles.csv: words/Idaho Bellevue pizza                                       1.02  1004.5±11.07µs        ? ?/sec    1.00   989.5±13.08µs        ? ?/sec
smol-wiki-articles.csv: words/Kameya Tokujirō mingus monk                                1.00  1650.8±10.92µs        ? ?/sec    1.00  1643.2±10.77µs        ? ?/sec
smol-wiki-articles.csv: words/Ulrich Hensel meilisearch milli                            1.00      5.4±0.03ms        ? ?/sec    1.00      5.4±0.02ms        ? ?/sec
smol-wiki-articles.csv: words/the black saint and the sinner lady and the good doggo     1.00     32.9±0.10ms        ? ?/sec    1.00     32.8±0.10ms        ? ?/sec
</pre>
</details>

Search songs:
<details>
<pre>
group                                                                                                    search_songs_main_d94339a8             search_songs_word-pair-proximity-docids-refactor_2983dd8e
-----                                                                                                    --------------------------             ---------------------------------------------------------
smol-songs.csv: asc + default/Notstandskomitee                                                           1.00      3.0±0.01ms        ? ?/sec    1.01      3.0±0.04ms        ? ?/sec
smol-songs.csv: asc + default/charles                                                                    1.00      2.2±0.01ms        ? ?/sec    1.01      2.2±0.01ms        ? ?/sec
smol-songs.csv: asc + default/charles mingus                                                             1.00      3.1±0.01ms        ? ?/sec    1.01      3.1±0.01ms        ? ?/sec
smol-songs.csv: asc + default/david                                                                      1.00      2.9±0.01ms        ? ?/sec    1.00      2.9±0.01ms        ? ?/sec
smol-songs.csv: asc + default/david bowie                                                                1.00      4.5±0.02ms        ? ?/sec    1.00      4.5±0.02ms        ? ?/sec
smol-songs.csv: asc + default/john                                                                       1.00      3.1±0.01ms        ? ?/sec    1.01      3.2±0.01ms        ? ?/sec
smol-songs.csv: asc + default/marcus miller                                                              1.00      5.0±0.02ms        ? ?/sec    1.00      5.0±0.02ms        ? ?/sec
smol-songs.csv: asc + default/michael jackson                                                            1.00      4.7±0.02ms        ? ?/sec    1.00      4.7±0.02ms        ? ?/sec
smol-songs.csv: asc + default/tamo                                                                       1.00  1463.4±12.17µs        ? ?/sec    1.01   1481.5±8.83µs        ? ?/sec
smol-songs.csv: asc + default/thelonious monk                                                            1.00      4.4±0.01ms        ? ?/sec    1.00      4.4±0.02ms        ? ?/sec
smol-songs.csv: asc/Notstandskomitee                                                                     1.01      2.6±0.01ms        ? ?/sec    1.00      2.6±0.01ms        ? ?/sec
smol-songs.csv: asc/charles                                                                              1.00    473.6±3.70µs        ? ?/sec    1.01   476.8±22.17µs        ? ?/sec
smol-songs.csv: asc/charles mingus                                                                       1.01    780.1±3.90µs        ? ?/sec    1.00    773.6±4.60µs        ? ?/sec
smol-songs.csv: asc/david                                                                                1.00    757.6±4.50µs        ? ?/sec    1.00    760.7±5.20µs        ? ?/sec
smol-songs.csv: asc/david bowie                                                                          1.00   1131.2±8.68µs        ? ?/sec    1.00   1130.7±8.36µs        ? ?/sec
smol-songs.csv: asc/john                                                                                 1.00    668.9±6.48µs        ? ?/sec    1.00    669.9±2.78µs        ? ?/sec
smol-songs.csv: asc/marcus miller                                                                        1.00    959.8±7.10µs        ? ?/sec    1.00    958.9±4.72µs        ? ?/sec
smol-songs.csv: asc/michael jackson                                                                      1.01  1076.7±16.73µs        ? ?/sec    1.00   1070.8±7.34µs        ? ?/sec
smol-songs.csv: asc/tamo                                                                                 1.00     70.4±0.55µs        ? ?/sec    1.00     70.5±0.51µs        ? ?/sec
smol-songs.csv: asc/thelonious monk                                                                      1.01      2.9±0.01ms        ? ?/sec    1.00      2.9±0.01ms        ? ?/sec
smol-songs.csv: basic filter: <=/Notstandskomitee                                                        1.00    162.0±0.91µs        ? ?/sec    1.01    163.6±1.72µs        ? ?/sec
smol-songs.csv: basic filter: <=/charles                                                                 1.00     38.3±0.24µs        ? ?/sec    1.01     38.7±0.31µs        ? ?/sec
smol-songs.csv: basic filter: <=/charles mingus                                                          1.01     85.3±0.44µs        ? ?/sec    1.00     84.6±0.47µs        ? ?/sec
smol-songs.csv: basic filter: <=/david                                                                   1.01     32.4±0.25µs        ? ?/sec    1.00     32.1±0.24µs        ? ?/sec
smol-songs.csv: basic filter: <=/david bowie                                                             1.00     68.6±0.99µs        ? ?/sec    1.01     68.9±0.88µs        ? ?/sec
smol-songs.csv: basic filter: <=/john                                                                    1.04     26.1±0.37µs        ? ?/sec    1.00     25.1±0.22µs        ? ?/sec
smol-songs.csv: basic filter: <=/marcus miller                                                           1.00     76.7±0.39µs        ? ?/sec    1.01     77.3±0.61µs        ? ?/sec
smol-songs.csv: basic filter: <=/michael jackson                                                         1.00     95.5±0.66µs        ? ?/sec    1.01     96.3±0.79µs        ? ?/sec
smol-songs.csv: basic filter: <=/tamo                                                                    1.03     26.2±0.36µs        ? ?/sec    1.00     25.3±0.23µs        ? ?/sec
smol-songs.csv: basic filter: <=/thelonious monk                                                         1.00    140.7±1.36µs        ? ?/sec    1.01    142.7±0.88µs        ? ?/sec
smol-songs.csv: basic filter: TO/Notstandskomitee                                                        1.00    165.4±1.25µs        ? ?/sec    1.00    165.7±1.72µs        ? ?/sec
smol-songs.csv: basic filter: TO/charles                                                                 1.01     40.6±0.57µs        ? ?/sec    1.00     40.1±0.54µs        ? ?/sec
smol-songs.csv: basic filter: TO/charles mingus                                                          1.01     87.1±0.80µs        ? ?/sec    1.00     86.3±0.61µs        ? ?/sec
smol-songs.csv: basic filter: TO/david                                                                   1.02     34.5±0.26µs        ? ?/sec    1.00     33.7±0.24µs        ? ?/sec
smol-songs.csv: basic filter: TO/david bowie                                                             1.00     70.6±0.38µs        ? ?/sec    1.00     70.6±0.68µs        ? ?/sec
smol-songs.csv: basic filter: TO/john                                                                    1.02     27.5±0.77µs        ? ?/sec    1.00     26.9±0.21µs        ? ?/sec
smol-songs.csv: basic filter: TO/marcus miller                                                           1.01     79.8±0.76µs        ? ?/sec    1.00     79.3±1.27µs        ? ?/sec
smol-songs.csv: basic filter: TO/michael jackson                                                         1.00     98.3±0.54µs        ? ?/sec    1.00     98.0±0.88µs        ? ?/sec
smol-songs.csv: basic filter: TO/tamo                                                                    1.03     27.9±0.23µs        ? ?/sec    1.00     27.1±0.32µs        ? ?/sec
smol-songs.csv: basic filter: TO/thelonious monk                                                         1.00    142.5±1.36µs        ? ?/sec    1.02    145.2±0.98µs        ? ?/sec
smol-songs.csv: basic placeholder/                                                                       1.00     49.4±0.34µs        ? ?/sec    1.00     49.3±0.45µs        ? ?/sec
smol-songs.csv: basic with quote/"Notstandskomitee"                                                      1.00    190.5±1.60µs        ? ?/sec    1.01    191.8±2.10µs        ? ?/sec
smol-songs.csv: basic with quote/"charles"                                                               1.00    165.0±1.13µs        ? ?/sec    1.01    166.0±1.39µs        ? ?/sec
smol-songs.csv: basic with quote/"charles" "mingus"                                                      1.00  1149.4±15.78µs        ? ?/sec    1.02   1171.1±9.95µs        ? ?/sec
smol-songs.csv: basic with quote/"david"                                                                 1.00    236.5±1.61µs        ? ?/sec    1.00    236.9±1.73µs        ? ?/sec
smol-songs.csv: basic with quote/"david" "bowie"                                                         1.00   1384.8±9.02µs        ? ?/sec    1.01  1393.8±11.39µs        ? ?/sec
smol-songs.csv: basic with quote/"john"                                                                  1.00    358.3±4.85µs        ? ?/sec    1.00    358.9±1.75µs        ? ?/sec
smol-songs.csv: basic with quote/"marcus" "miller"                                                       1.00    281.4±1.79µs        ? ?/sec    1.01    285.6±3.24µs        ? ?/sec
smol-songs.csv: basic with quote/"michael" "jackson"                                                     1.00   1328.4±8.01µs        ? ?/sec    1.00   1334.6±8.00µs        ? ?/sec
smol-songs.csv: basic with quote/"tamo"                                                                  1.00    528.7±3.72µs        ? ?/sec    1.01    533.4±5.31µs        ? ?/sec
smol-songs.csv: basic with quote/"thelonious" "monk"                                                     1.00   1223.0±7.24µs        ? ?/sec    1.02  1245.7±12.04µs        ? ?/sec
smol-songs.csv: basic without quote/Notstandskomitee                                                     1.00      2.8±0.01ms        ? ?/sec    1.00      2.8±0.01ms        ? ?/sec
smol-songs.csv: basic without quote/charles                                                              1.00    273.3±2.06µs        ? ?/sec    1.01    275.9±1.76µs        ? ?/sec
smol-songs.csv: basic without quote/charles mingus                                                       1.00      2.3±0.01ms        ? ?/sec    1.02      2.4±0.01ms        ? ?/sec
smol-songs.csv: basic without quote/david                                                                1.00    434.3±3.86µs        ? ?/sec    1.01    436.7±2.47µs        ? ?/sec
smol-songs.csv: basic without quote/david bowie                                                          1.00      5.6±0.02ms        ? ?/sec    1.01      5.7±0.02ms        ? ?/sec
smol-songs.csv: basic without quote/john                                                                 1.00   1322.5±9.98µs        ? ?/sec    1.00  1321.2±17.40µs        ? ?/sec
smol-songs.csv: basic without quote/marcus miller                                                        1.02      2.4±0.02ms        ? ?/sec    1.00      2.4±0.01ms        ? ?/sec
smol-songs.csv: basic without quote/michael jackson                                                      1.00      3.8±0.02ms        ? ?/sec    1.01      3.9±0.01ms        ? ?/sec
smol-songs.csv: basic without quote/tamo                                                                 1.00    809.0±4.01µs        ? ?/sec    1.01    819.0±6.22µs        ? ?/sec
smol-songs.csv: basic without quote/thelonious monk                                                      1.00      3.8±0.02ms        ? ?/sec    1.02      3.9±0.02ms        ? ?/sec
smol-songs.csv: big filter/Notstandskomitee                                                              1.00      2.7±0.01ms        ? ?/sec    1.01      2.8±0.01ms        ? ?/sec
smol-songs.csv: big filter/charles                                                                       1.00    266.5±1.34µs        ? ?/sec    1.01    270.1±8.17µs        ? ?/sec
smol-songs.csv: big filter/charles mingus                                                                1.00    651.0±5.40µs        ? ?/sec    1.00    651.0±2.73µs        ? ?/sec
smol-songs.csv: big filter/david                                                                         1.00  1018.1±11.16µs        ? ?/sec    1.00   1022.3±8.94µs        ? ?/sec
smol-songs.csv: big filter/david bowie                                                                   1.00  1912.2±11.13µs        ? ?/sec    1.00   1919.8±8.30µs        ? ?/sec
smol-songs.csv: big filter/john                                                                          1.00    867.2±6.66µs        ? ?/sec    1.01    873.3±3.44µs        ? ?/sec
smol-songs.csv: big filter/marcus miller                                                                 1.00    717.7±2.86µs        ? ?/sec    1.01    721.5±3.89µs        ? ?/sec
smol-songs.csv: big filter/michael jackson                                                               1.00  1668.4±16.76µs        ? ?/sec    1.00  1667.9±10.11µs        ? ?/sec
smol-songs.csv: big filter/tamo                                                                          1.01    136.7±0.88µs        ? ?/sec    1.00    135.5±1.22µs        ? ?/sec
smol-songs.csv: big filter/thelonious monk                                                               1.03      3.1±0.02ms        ? ?/sec    1.00      3.0±0.01ms        ? ?/sec
smol-songs.csv: desc + default/Notstandskomitee                                                          1.00      3.0±0.01ms        ? ?/sec    1.00      3.0±0.01ms        ? ?/sec
smol-songs.csv: desc + default/charles                                                                   1.00  1599.5±13.07µs        ? ?/sec    1.01  1622.9±22.43µs        ? ?/sec
smol-songs.csv: desc + default/charles mingus                                                            1.00      2.3±0.01ms        ? ?/sec    1.01      2.4±0.03ms        ? ?/sec
smol-songs.csv: desc + default/david                                                                     1.00      5.7±0.02ms        ? ?/sec    1.00      5.7±0.02ms        ? ?/sec
smol-songs.csv: desc + default/david bowie                                                               1.00      9.0±0.04ms        ? ?/sec    1.00      9.0±0.03ms        ? ?/sec
smol-songs.csv: desc + default/john                                                                      1.00      4.5±0.01ms        ? ?/sec    1.00      4.5±0.02ms        ? ?/sec
smol-songs.csv: desc + default/marcus miller                                                             1.00      3.9±0.01ms        ? ?/sec    1.00      3.9±0.02ms        ? ?/sec
smol-songs.csv: desc + default/michael jackson                                                           1.00      6.6±0.03ms        ? ?/sec    1.00      6.6±0.03ms        ? ?/sec
smol-songs.csv: desc + default/tamo                                                                      1.00  1472.4±10.38µs        ? ?/sec    1.01   1484.2±8.07µs        ? ?/sec
smol-songs.csv: desc + default/thelonious monk                                                           1.00      4.4±0.02ms        ? ?/sec    1.00      4.4±0.05ms        ? ?/sec
smol-songs.csv: desc/Notstandskomitee                                                                    1.01      2.6±0.01ms        ? ?/sec    1.00      2.6±0.01ms        ? ?/sec
smol-songs.csv: desc/charles                                                                             1.00    475.9±3.38µs        ? ?/sec    1.00    475.9±2.64µs        ? ?/sec
smol-songs.csv: desc/charles mingus                                                                      1.00    775.3±4.30µs        ? ?/sec    1.00    778.9±3.52µs        ? ?/sec
smol-songs.csv: desc/david                                                                               1.00    757.9±4.10µs        ? ?/sec    1.01    763.4±3.27µs        ? ?/sec
smol-songs.csv: desc/david bowie                                                                         1.00  1129.0±11.87µs        ? ?/sec    1.01   1135.1±8.86µs        ? ?/sec
smol-songs.csv: desc/john                                                                                1.00    670.2±4.38µs        ? ?/sec    1.00    670.2±3.46µs        ? ?/sec
smol-songs.csv: desc/marcus miller                                                                       1.00    961.2±4.47µs        ? ?/sec    1.00    961.9±4.03µs        ? ?/sec
smol-songs.csv: desc/michael jackson                                                                     1.00   1076.5±6.61µs        ? ?/sec    1.00   1077.9±7.11µs        ? ?/sec
smol-songs.csv: desc/tamo                                                                                1.00     70.6±0.57µs        ? ?/sec    1.01     71.3±0.48µs        ? ?/sec
smol-songs.csv: desc/thelonious monk                                                                     1.01      2.9±0.01ms        ? ?/sec    1.00      2.9±0.01ms        ? ?/sec
smol-songs.csv: prefix search/a                                                                          1.00   1236.2±9.43µs        ? ?/sec    1.00  1232.0±12.07µs        ? ?/sec
smol-songs.csv: prefix search/b                                                                          1.00   1090.8±9.89µs        ? ?/sec    1.00   1090.8±9.43µs        ? ?/sec
smol-songs.csv: prefix search/i                                                                          1.00   1333.9±8.28µs        ? ?/sec    1.00  1334.2±11.21µs        ? ?/sec
smol-songs.csv: prefix search/s                                                                          1.00    810.5±3.69µs        ? ?/sec    1.00    806.6±3.50µs        ? ?/sec
smol-songs.csv: prefix search/x                                                                          1.00    290.5±1.88µs        ? ?/sec    1.00    291.0±1.85µs        ? ?/sec
smol-songs.csv: proximity/7000 Danses Un Jour Dans Notre Vie                                             1.00      4.7±0.02ms        ? ?/sec    1.00      4.7±0.02ms        ? ?/sec
smol-songs.csv: proximity/The Disneyland Sing-Along Chorus                                               1.01      5.6±0.02ms        ? ?/sec    1.00      5.6±0.03ms        ? ?/sec
smol-songs.csv: proximity/Under Great Northern Lights                                                    1.00      2.5±0.01ms        ? ?/sec    1.00      2.5±0.01ms        ? ?/sec
smol-songs.csv: proximity/black saint sinner lady                                                        1.00      4.8±0.02ms        ? ?/sec    1.00      4.8±0.02ms        ? ?/sec
smol-songs.csv: proximity/les dangeureuses 1960                                                          1.00      3.2±0.01ms        ? ?/sec    1.01      3.2±0.01ms        ? ?/sec
smol-songs.csv: typo/Arethla Franklin                                                                    1.00    388.7±5.16µs        ? ?/sec    1.00    390.0±2.11µs        ? ?/sec
smol-songs.csv: typo/Disnaylande                                                                         1.01      2.6±0.01ms        ? ?/sec    1.00      2.6±0.01ms        ? ?/sec
smol-songs.csv: typo/dire straights                                                                      1.00    125.9±1.22µs        ? ?/sec    1.00    126.0±0.71µs        ? ?/sec
smol-songs.csv: typo/fear of the duck                                                                    1.00    373.7±4.25µs        ? ?/sec    1.01   375.7±14.17µs        ? ?/sec
smol-songs.csv: typo/indochie                                                                            1.00    103.6±0.94µs        ? ?/sec    1.00    103.4±0.74µs        ? ?/sec
smol-songs.csv: typo/indochien                                                                           1.00    155.6±1.14µs        ? ?/sec    1.01    157.5±1.75µs        ? ?/sec
smol-songs.csv: typo/klub des loopers                                                                    1.00    160.6±2.98µs        ? ?/sec    1.01    161.7±1.96µs        ? ?/sec
smol-songs.csv: typo/michel depech                                                                       1.00     79.4±0.54µs        ? ?/sec    1.01     79.9±0.60µs        ? ?/sec
smol-songs.csv: typo/mongus                                                                              1.00    126.7±1.85µs        ? ?/sec    1.00    126.1±0.74µs        ? ?/sec
smol-songs.csv: typo/stromal                                                                             1.01    132.9±0.99µs        ? ?/sec    1.00    131.9±1.09µs        ? ?/sec
smol-songs.csv: typo/the white striper                                                                   1.00    287.8±2.88µs        ? ?/sec    1.00    286.5±1.91µs        ? ?/sec
smol-songs.csv: typo/thelonius monk                                                                      1.00    304.2±1.49µs        ? ?/sec    1.01    306.5±1.50µs        ? ?/sec
smol-songs.csv: words/7000 Danses / Le Baiser / je me trompe de mots                                     1.01     20.9±0.08ms        ? ?/sec    1.00     20.7±0.07ms        ? ?/sec
smol-songs.csv: words/Bring Your Daughter To The Slaughter but now this is not part of the title         1.00     48.9±0.13ms        ? ?/sec    1.00     48.9±0.11ms        ? ?/sec
smol-songs.csv: words/The Disneyland Children's Sing-Alone song                                          1.01     13.9±0.06ms        ? ?/sec    1.00     13.8±0.07ms        ? ?/sec
smol-songs.csv: words/les liaisons dangeureuses 1793                                                     1.01      3.7±0.01ms        ? ?/sec    1.00      3.6±0.02ms        ? ?/sec
smol-songs.csv: words/seven nation mummy                                                                 1.00  1054.2±14.49µs        ? ?/sec    1.00  1056.6±10.53µs        ? ?/sec
smol-songs.csv: words/the black saint and the sinner lady and the good doggo                             1.00     58.2±0.29ms        ? ?/sec    1.00     57.9±0.21ms        ? ?/sec
smol-songs.csv: words/whathavenotnsuchforth and a good amount of words to pop to match the first one     1.00     66.1±0.21ms        ? ?/sec    1.00     66.0±0.24ms        ? ?/sec
</code>
</details>

Co-authored-by: Loïc Lecrenier <loic@meilisearch.com>
Co-authored-by: Loïc Lecrenier <loic.lecrenier@me.com>
2022-10-25 10:42:04 +00:00
9a569d73d1 Minor code style change 2022-10-24 15:30:43 +02:00
be302fd250 Remove outdated workaround for duplicate words in phrase search 2022-10-24 15:27:06 +02:00
d76d0cb1bf Merge branch 'main' into word-pair-proximity-docids-refactor 2022-10-24 15:23:00 +02:00
2bf867982a Merge #667
667: Update version for the next release (v0.34.0) in Cargo.toml files r=curquiza a=meili-bot

⚠️ This PR is automatically generated. Check the new version is the expected one before merging.

Co-authored-by: curquiza <curquiza@users.noreply.github.com>
2022-10-24 10:19:04 +00:00
f3874d58b9 Update version for the next release (v0.34.0) in Cargo.toml files 2022-10-24 10:13:25 +00:00
a983129613 Apply suggestions from code review 2022-10-20 09:49:37 +02:00
f11a4087da Merge #665
665: Fixing piles of clippy errors. r=ManyTheFish a=ehiggs

## Related issue
No issue fixed. Simply cleaning up some code for clippy on the march towards a clean build when #659 is merged.

## What does this PR do?
Most of these are calling clone when the struct supports Copy.

Many are using & and &mut on `self` when the function they are called from already has an immutable or mutable borrow so this isn't needed.

I tried to stay away from actual changes or places where I'd have to name fresh variables.

## PR checklist
Please check if your PR fulfills the following requirements:
- [x] Does this PR fix an existing issue, or have you listed the changes applied in the PR description (and why they are needed)?
- [x] Have you read the contributing guidelines?
- [x] Have you made sure that the title is accurate and descriptive of the changes?

Co-authored-by: Ewan Higgs <ewan.higgs@gmail.com>
2022-10-20 07:19:46 +00:00
176ffd23f5 Fix compile error after rebasing wppd-refactor 2022-10-18 10:40:26 +02:00
ab2f6f3aa4 Refine some details in word_prefix_pair_proximity indexing code 2022-10-18 10:37:34 +02:00
e6e76fbefe Improve performance of resolve_phrase at the cost of some relevancy 2022-10-18 10:37:34 +02:00
178d00f93a Cargo fmt 2022-10-18 10:37:34 +02:00
830a7c0c7a Use resolve_phrase function for exactness criteria as well 2022-10-18 10:37:34 +02:00
18d578dfc4 Adjust some algorithms using DBs of word pair proximities 2022-10-18 10:37:34 +02:00
072b576514 Fix proximity value in keys of prefix_word_pair_proximity_docids 2022-10-18 10:37:34 +02:00
6c3a5d69e1 Update snapshots 2022-10-18 10:37:34 +02:00
a7de4f5b85 Don't add swapped word pairs to the word_pair_proximity_docids db 2022-10-18 10:37:34 +02:00
264a04922d Add prefix_word_pair_proximity database
Similar to the word_prefix_pair_proximity one but instead the keys are:
(proximity, prefix, word2)
2022-10-18 10:37:34 +02:00
1dbbd8694f Rename StrStrU8Codec to U8StrStrCodec and reorder its fields 2022-10-18 10:37:34 +02:00
bdeb47305e Change encoding of word_pair_proximity DB to (proximity, word1, word2)
Same for word_prefix_pair_proximity
2022-10-18 10:37:34 +02:00
19b2326f3d Merge #586
586: Add settings to force milli to exhaustively compute the total number of hits r=Kerollmops a=ManyTheFish

Add a new setting `exhaustive_number_hits` to `Search` forcing the `Initial` criterion to exhaustively compute the bucket_candidates allowing the end users to implement finite pagination.
 
related to https://github.com/meilisearch/meilisearch/pull/2601

Co-authored-by: ManyTheFish <many@meilisearch.com>
Co-authored-by: Many the fish <many@meilisearch.com>
2022-10-17 16:24:35 +00:00
81919a35a2 Update milli/src/search/criteria/initial.rs
Co-authored-by: Clément Renault <clement@meilisearch.com>
2022-10-17 18:23:20 +02:00
516e838eb4 Update milli/src/search/criteria/initial.rs
Co-authored-by: Clément Renault <clement@meilisearch.com>
2022-10-17 18:23:15 +02:00
fc03e53615 Add a test to check that we can abort an indexation 2022-10-17 17:28:03 +02:00
6603437cb1 Introduce an indexation abortion function when indexing documents 2022-10-17 17:28:03 +02:00
6f55e7844c Add some code comments 2022-10-17 14:41:57 +02:00
cf203b7fde Take filter in account when computing the pages candidates 2022-10-17 14:13:44 +02:00
d71bc1e69f Compute an exact count when using distinct 2022-10-17 14:13:44 +02:00
a396806343 Add settings to force milli to exhaustively compute the total number of hits 2022-10-17 14:13:44 +02:00
fad0de4581 Merge #655
655: Upgrade all dependencies r=Kerollmops a=loiclec

Upgrade all dependencies to their latest versions.

Partly fixes https://github.com/meilisearch/meilisearch/issues/2822





Co-authored-by: Loïc Lecrenier <loic@meilisearch.com>
2022-10-17 11:19:46 +00:00
c2ca259f48 Update cli to latest indicatif crate version 2022-10-17 13:05:56 +02:00
4c481a8947 Upgrade all dependencies 2022-10-17 13:05:56 +02:00
beb987d3d1 Fixing piles of clippy errors.
Most of these are calling clone when the struct supports Copy.

Many are using & and &mut on `self` when the function they are called
from already has an immutable or mutable borrow so this isn't needed.

I tried to stay away from actual changes or places where I'd have to
name fresh variables.
2022-10-13 22:02:54 +02:00
95e45e1c2c Merge #663
663: Fix CONTRIBUTING.md step to make the project work r=Kerollmops a=curquiza

Following this discussion: https://github.com/meilisearch/milli/issues/76#issuecomment-1277459125

Co-authored-by: Clémentine Urquizar - curqui <clementine@meilisearch.com>
2022-10-13 11:47:34 +00:00
59fe1e8efa Update CONTRIBUTING.md 2022-10-13 13:46:18 +02:00
f30979d021 Merge #662
662: Enhance word splitting strategy r=ManyTheFish a=akki1306

# Pull Request

## Related issue
Fixes #648 

## What does this PR do?
- [split_best_frequency](55d889522b/milli/src/search/query_tree.rs (L282-L301)) to use frequency of word pairs near together with proximity value of 1 instead of considering the frequency of individual words. Word pairs having max frequency are considered.

## PR checklist
Please check if your PR fulfills the following requirements:
- [x] Does this PR fix an existing issue, or have you listed the changes applied in the PR description (and why they are needed)?
- [x] Have you read the contributing guidelines?
- [x] Have you made sure that the title is accurate and descriptive of the changes?

Thank you so much for contributing to Meilisearch!

Co-authored-by: Akshay Kulkarni <akshayk.gj@gmail.com>
2022-10-13 08:14:22 +00:00
85f3028317 remove underscore and introduce back word_documents_count 2022-10-13 13:21:59 +05:30
8195fc6141 revert removal of word_documents_count method 2022-10-13 13:14:27 +05:30
32f825d442 move default implementation of word_pair_frequency to TestContext 2022-10-13 12:57:50 +05:30
ff8b2d4422 formatting 2022-10-13 12:44:08 +05:30
6cb8b46900 use word_pair_frequency and remove word_documents_count 2022-10-13 12:43:11 +05:30
8c9245149e format file 2022-10-12 15:27:56 +05:30
2000f7958d Merge #604
604: Speed up debug builds r=Kerollmops a=loiclec

Note: this draft PR is based on https://github.com/meilisearch/milli/pull/601 , for no particular reason.

## What does this PR do?
Make a series of changes with the goal of speeding up debug builds:

1. Add an `all_languages` feature which compiles charabia with its `default` features activated.
The `all_languages` feature is activated by default. But running:
```
cargo build --no-default-features
```
on `milli` is now much faster.

2. Reduce the debug optimisation level from 3 to 0, except for a few critical dependencies.

3.  Compile the build dependencies quicker as well. Previously, all build dependencies were compiled with `opt-level = 3`. Now, only the critical build dependencies are compiled with optimisations.

4. Reduce the amount of code generated by the `documents!` macro

5. Make the "progress update" closure provided to indexing functions a trait object instead of a generic parameter. This avoids monomorphising the indexing code multiple times needlessly.

## Results
Initial build times on my computer before and after these changes:
|        | cargo check | cargo check --no-default-features | cargo test | cargo test --lib | cargo test --no-default-features | cargo test --lib --no-default-features |
|--------|-------------|-----------------------------------|------------|------------------|----------------------------------|----------------------------------------|
| before | 1m05s       | 1m05s                             | 2m06s      | 1m47s            | 2m06                             | 1m47s                                  |
| after  | 28.9s       | 13.1s                             | 40s      | 38s            | 23s                              | 21s                                  |



Co-authored-by: Loïc Lecrenier <loic@meilisearch.com>
2022-10-12 08:54:48 +00:00
63e79a9039 update comment 2022-10-12 13:36:48 +05:30
7f9680f0a0 Enhance word splitting strategy 2022-10-12 13:18:23 +05:30
53503f09ca Make milli's default features optional in other executable targets 2022-10-12 09:22:05 +02:00
6fbf5dac68 Simplify documents! macro to reduce compile times 2022-10-12 09:22:05 +02:00
98fc093823 Optimize a few performance sensitive dependencies on debug builds 2022-10-12 09:22:05 +02:00
5cfb5df31e Set opt-level to 0 for debug builds
But speed up compile times by optimising build dependencies of lindera
2022-10-12 09:22:05 +02:00
55d889522b Merge #658
658: Add proximity calculation for the same word r=ManyTheFish a=msvaljek

# Pull Request

## Related issue
Fixes https://github.com/meilisearch/milli/issues/647

## What does this PR do?
- During [the increase of the current word position](d94339a858/milli/src/update/index_documents/extract/extract_word_pair_proximity_docids.rs (L129-L135)) we extract the proximity between the current position and the next one.

## PR checklist
Please check if your PR fulfills the following requirements:
- [x] Does this PR fix an existing issue, or have you listed the changes applied in the PR description (and why they are needed)?
- [x] Have you read the contributing guidelines?
- [x] Have you made sure that the title is accurate and descriptive of the changes?

Thank you so much for contributing to Meilisearch!


Co-authored-by: msvaljek <marko.svaljek@commercetools.com>
2022-10-10 13:33:58 +00:00
762e320c35 Add proximity calculation for the same word 2022-10-07 12:59:12 +02:00
358aa337ea Merge #657
657: Fix link in Hacktoberfest section r=curquiza a=meili-bot

_This PR is auto-generated._

Fix link in CONTRIBUTING.md.
Following [this PR](https://github.com/meilisearch/meilisearch/pull/2845) and [this issue](https://github.com/meilisearch/meilisearch/issues/2840).


Co-authored-by: meili-bot <74670311+meili-bot@users.noreply.github.com>
2022-10-05 17:19:33 +00:00
1764a33690 Update CONTRIBUTING.md 2022-10-05 19:19:03 +02:00
a90d7e4cc7 Merge #654
654: Re-upload milli's logo r=curquiza a=jeertmans

# Pull Request

## Related issue
None

## What does this PR do?
Apparently, some [commit](add96f921b) deleted the logo file, and updated the `src` path. It seems to me that this was an error, and that the logo file should have been moved, not deleted.

This fixes the problem of seeing this (see image) instead of the actual logo.
![image](https://user-images.githubusercontent.com/27275099/193786803-e0d11a59-48fa-4331-bd92-48457969d766.png)


## PR checklist
Please check if your PR fulfills the following requirements:
- [x] Does this PR fix an existing issue, or have you listed the changes applied in the PR description (and why they are needed)?
- [x] Have you read the contributing guidelines?
- [x] Have you made sure that the title is accurate and descriptive of the changes?


Co-authored-by: Jérome Eertmans <jeertmans@icloud.com>
2022-10-04 10:56:33 +00:00
aec220ab63 chore: move logo to (new) assets folder 2022-10-04 12:20:24 +02:00
4348c49656 fix: re-upload milli's logo
The logo was deleted with this [commit](add96f921b).
2022-10-04 11:33:19 +02:00
a18de9b5f0 Merge #650
650: Add missing logging timer to extractors r=Kerollmops a=vishalsodani

# Pull Request

## What does this PR do?
#645 
<!-- Please link the issue you're trying to fix with this PR, if none then please create an issue first. -->

## PR checklist
Please check if your PR fulfills the following requirements:
- [x] Does this PR fix an existing issue?
- [x] Have you read the contributing guidelines?
- [x] Have you made sure that the title is accurate and descriptive of the changes?

Thank you so much for contributing to Meilisearch!


Co-authored-by: vishalsodani <vishalsodani@rediffmail.com>
2022-10-04 07:25:47 +00:00
f9c2dacf33 Merge #653
653: Fix #652 - Change Spelling of `author` in `README.md` r=curquiza a=anirudhRowjee

# Pull Request

## What does this PR do?
Fixes #652
- Changes spellings of `au{hor` to `author`
- Minor formatting changes in Markdown

## PR checklist
Please check if your PR fulfills the following requirements:
- [x] Does this PR fix an existing issue?
- [x] Have you read the contributing guidelines?
- [x] Have you made sure that the title is accurate and descriptive of the changes?


Co-authored-by: Anirudh Rowjee <ani.rowjee@gmail.com>
2022-10-03 08:20:48 +00:00
7d247353d0 [docs] contd - fix #652, revert capitalization of 'Meilisearch' 2022-10-03 09:52:20 +05:30
bc502ee125 [docs] Fixed #652, changes spelling of author 2022-10-03 09:38:59 +05:30
00c02d00f3 Add missing logging timer to extractors 2022-09-30 22:17:06 +05:30
804db03e41 Merge #649
649: Update Hacktoberfest section in CONTRIBUTING.md r=curquiza a=meili-bot

_This PR is auto-generated._

Following: af850854e4

Update Hacktoberfest section in CONTRIBUTING.md with the global guideline information.


Co-authored-by: meili-bot <74670311+meili-bot@users.noreply.github.com>
2022-09-29 15:50:20 +00:00
26efdf4dd9 Update CONTRIBUTING.md 2022-09-29 16:00:15 +02:00
4b903719a0 Merge #643
643: Add Hacktoberfest section to CONTRIBUTING.md r=curquiza a=meili-bot

_This PR is auto-generated._

Add Hacktoberfest section to CONTRIBUTING.md


Co-authored-by: meili-bot <74670311+meili-bot@users.noreply.github.com>
2022-09-22 16:44:51 +00:00
ed3d87f061 Update CONTRIBUTING.md 2022-09-22 18:43:42 +02:00
a3622eda46 Merge #642
642: Remove LTO in release profile r=Kerollmops a=loiclec

Since we can't enable it in Meilisearch (see https://github.com/meilisearch/meilisearch/pull/2717 ), we should not enable it in milli either. The goal is for milli's benchmarks to accurately represent its performance within meilisearch.


Co-authored-by: Loïc Lecrenier <loic@meilisearch.com>
2022-09-21 09:14:46 +00:00
513a38f07b Remove LTO in release profile
Since we can't enable it in Meilisearch, there is no point in having it
enabled in milli
2022-09-21 10:44:33 +02:00
e1e025c319 Merge #641
641: Remove `helpers` crate r=Kerollmops a=loiclec

# Pull Request

## What does this PR do?
Remove the `helpers` crates, because (I think) we don't use it. This should have been part of https://github.com/meilisearch/milli/pull/636 , but I forgot about it then :)





Co-authored-by: Loïc Lecrenier <loic@meilisearch.com>
2022-09-21 08:36:05 +00:00
b6fe6838d3 Remove helpers crate 2022-09-21 10:25:36 +02:00
d94339a858 Merge #636
636: Remove unused `infos`, `http-ui`, and `milli/fuzz`, crates r=ManyTheFish a=loiclec

We haven't used the `infos/`, `http-ui/` and `milli/fuzz/` crates in a long time. They are not properly maintained and probably do not work correctly anymore.

This PR removes these crates entirely from the workspace to reduce the amount of code we need to maintain.

Co-authored-by: Loïc Lecrenier <loic@meilisearch.com>
2022-09-14 12:39:57 +00:00
15d478cf4d Merge #635
635: Use an unstable algorithm for `grenad::Sorter` when possible r=Kerollmops a=loiclec

# Pull Request
## What does this PR do?

Use an unstable algorithm to sort the internal vector used by `grenad::Sorter` whenever possible to speed up indexing.

In practice, every time the merge function creates a `RoaringBitmap`, we use an unstable sort. For every other merge function, such as `keep_first`, `keep_last`, etc., a stable sort is used.


Co-authored-by: Loïc Lecrenier <loic@meilisearch.com>
2022-09-14 12:00:52 +00:00
add96f921b Remove unused infos/ http-ui/ and fuzz/ crates 2022-09-14 06:55:01 +02:00
4fc6331cb6 Merge #638
638: Update version for the next release (v0.33.4) in Cargo.toml files r=curquiza a=meili-bot

⚠️ This PR is automatically generated. Check the new version is the expected one before merging.

Co-authored-by: curquiza <curquiza@users.noreply.github.com>
2022-09-13 13:56:53 +00:00
753e76d451 Update version for the next release (v0.33.4) in Cargo.toml files 2022-09-13 13:55:50 +00:00
3794962330 Use an unstable algorithm for grenad::Sorter when possible 2022-09-13 14:49:53 +02:00
2865b063ad Merge #637
637: We avoid skipping errors in the indexing pipeline r=ManyTheFish a=Kerollmops

This PR is related to https://github.com/meilisearch/meilisearch/issues/2764 and should fix it when merged into Meilisearch.

Co-authored-by: Kerollmops <clement@meilisearch.com>
2022-09-13 12:12:05 +00:00
d4d7c9d577 We avoid skipping errors in the indexing pipeline 2022-09-13 14:03:00 +02:00
f8697075ea Merge #632
632: Make charabia default feature optional r=ManyTheFish a=vincent-herlemont

# Pull Request

## What does this PR do?
Fixes [#627](https://github.com/meilisearch/milli/issues/627#issuecomment-1239769122)

Thank you so much for contributing to Meilisearch!


Co-authored-by: Vincent Herlemont <vincent@herlemont.fr>
2022-09-08 14:33:26 +00:00
7cd0aea1d3 Merge #633
633: Upgrade ubuntu-18.04 to 20.04 r=Kerollmops a=curquiza

Ubuntu-18.04 is going to be deprecated by GitHub
https://github.com/actions/runner-images/issues/6002

Co-authored-by: Clémentine Urquizar <clementine@meilisearch.com>
2022-09-08 14:08:28 +00:00
69b2d31b71 Upgrade ubuntu-18.04 to 20.04 2022-09-08 14:58:06 +02:00
8cd5200f48 Make charabia languages configurable 2022-09-08 12:21:43 +02:00
99b45a7820 Merge #631
631: Revert "Remove Bors required test for Windows" r=Kerollmops a=curquiza

Reverts meilisearch/milli#612

Because the issue does not seem to be there!

Closes https://github.com/meilisearch/milli/issues/614

Co-authored-by: Clémentine Urquizar - curqui <clementine@meilisearch.com>
2022-09-07 21:07:44 +00:00
5e07ea79c2 Make charabia default feature optional 2022-09-07 20:54:31 +02:00
3af3d3f7d9 Revert "Remove Bors required test for Windows" 2022-09-07 18:36:10 +02:00
549fa12d5a Merge #629
629: Update version for the next release (v0.33.3) in Cargo.toml files r=curquiza a=meili-bot

⚠️ This PR is automatically generated. Check the new version is the expected one before merging.

Co-authored-by: curquiza <curquiza@users.noreply.github.com>
2022-09-07 15:55:04 +00:00
077dcd2002 Update version for the next release (v0.33.3) in Cargo.toml files 2022-09-07 15:48:53 +00:00
2907928d93 Merge #628
628: Make sure that long words are ignored r=ManyTheFish a=Kerollmops

This PR is related to https://github.com/meilisearch/meilisearch/issues/2743 and is fixing it.

Co-authored-by: Kerollmops <clement@meilisearch.com>
2022-09-07 13:04:59 +00:00
fe3973a51c Make sure that long words are correctly skipped 2022-09-07 15:03:32 +02:00
c83c3cd796 Add a test to make sure that long words are correctly skipped 2022-09-07 14:12:36 +02:00
b9539c59f3 Merge #625 #626
625: Bump actions/checkout from 2 to 3 r=curquiza a=dependabot[bot]

Bumps [actions/checkout](https://github.com/actions/checkout) from 2 to 3.
<details>
<summary>Release notes</summary>
<p><em>Sourced from <a href="https://github.com/actions/checkout/releases">actions/checkout's releases</a>.</em></p>
<blockquote>
<h2>v3.0.0</h2>
<ul>
<li>Updated to the node16 runtime by default
<ul>
<li>This requires a minimum <a href="https://github.com/actions/runner/releases/tag/v2.285.0">Actions Runner</a> version of v2.285.0 to run, which is by default available in GHES 3.4 or later.</li>
</ul>
</li>
</ul>
<h2>v2.4.2</h2>
<h2>What's Changed</h2>
<ul>
<li>Add set-safe-directory input to allow customers to take control. (<a href="https://github-redirect.dependabot.com/actions/checkout/issues/770">#770</a>) by <a href="https://github.com/TingluoHuang"><code>`@​TingluoHuang</code></a>` in <a href="https://github-redirect.dependabot.com/actions/checkout/pull/776">actions/checkout#776</a></li>
<li>Prepare changelog for v2.4.2. by <a href="https://github.com/TingluoHuang"><code>`@​TingluoHuang</code></a>` in <a href="https://github-redirect.dependabot.com/actions/checkout/pull/778">actions/checkout#778</a></li>
</ul>
<p><strong>Full Changelog</strong>: <a href="https://github.com/actions/checkout/compare/v2...v2.4.2">https://github.com/actions/checkout/compare/v2...v2.4.2</a></p>
<h2>v2.4.1</h2>
<ul>
<li>Fixed an issue where checkout failed to run in container jobs due to the new git setting <code>safe.directory</code></li>
</ul>
<h2>v2.4.0</h2>
<ul>
<li>Convert SSH URLs like <code>org-&lt;ORG_ID&gt;`@github.com:</code>` to <code>https://github.com/</code> - <a href="https://github-redirect.dependabot.com/actions/checkout/pull/621">pr</a></li>
</ul>
<h2>v2.3.5</h2>
<p>Update dependencies</p>
<h2>v2.3.4</h2>
<ul>
<li><a href="https://github-redirect.dependabot.com/actions/checkout/pull/379">Add missing <code>await</code>s</a></li>
<li><a href="https://github-redirect.dependabot.com/actions/checkout/pull/360">Swap to Environment Files</a></li>
</ul>
<h2>v2.3.3</h2>
<ul>
<li><a href="https://github-redirect.dependabot.com/actions/checkout/pull/345">Remove Unneeded commit information from build logs</a></li>
<li><a href="https://github-redirect.dependabot.com/actions/checkout/pull/326">Add Licensed to verify third party dependencies</a></li>
</ul>
<h2>v2.3.2</h2>
<p><a href="https://github-redirect.dependabot.com/actions/checkout/pull/320">Add Third Party License Information to Dist Files</a></p>
<h2>v2.3.1</h2>
<p><a href="https://github-redirect.dependabot.com/actions/checkout/pull/284">Fix default branch resolution for .wiki and when using SSH</a></p>
<h2>v2.3.0</h2>
<p><a href="https://github-redirect.dependabot.com/actions/checkout/pull/278">Fallback to the default branch</a></p>
<h2>v2.2.0</h2>
<p><a href="https://github-redirect.dependabot.com/actions/checkout/pull/258">Fetch all history for all tags and branches when fetch-depth=0</a></p>
<h2>v2.1.1</h2>
<p>Changes to support GHES (<a href="https://github-redirect.dependabot.com/actions/checkout/pull/236">here</a> and <a href="https://github-redirect.dependabot.com/actions/checkout/pull/248">here</a>)</p>
<h2>v2.1.0</h2>
<ul>
<li><a href="https://github-redirect.dependabot.com/actions/checkout/pull/191">Group output</a></li>
<li><a href="https://github-redirect.dependabot.com/actions/checkout/pull/199">Changes to support GHES alpha release</a></li>
<li><a href="https://github-redirect.dependabot.com/actions/checkout/pull/184">Persist core.sshCommand for submodules</a></li>
<li><a href="https://github-redirect.dependabot.com/actions/checkout/pull/163">Add support ssh</a></li>
<li><a href="https://github-redirect.dependabot.com/actions/checkout/pull/179">Convert submodule SSH URL to HTTPS, when not using SSH</a></li>
</ul>
<!-- raw HTML omitted -->
</blockquote>
<p>... (truncated)</p>
</details>
<details>
<summary>Changelog</summary>
<p><em>Sourced from <a href="https://github.com/actions/checkout/blob/main/CHANGELOG.md">actions/checkout's changelog</a>.</em></p>
<blockquote>
<h1>Changelog</h1>
<h2>v3.0.2</h2>
<ul>
<li><a href="https://github-redirect.dependabot.com/actions/checkout/pull/770">Add input <code>set-safe-directory</code></a></li>
</ul>
<h2>v3.0.1</h2>
<ul>
<li><a href="https://github-redirect.dependabot.com/actions/checkout/pull/762">Fixed an issue where checkout failed to run in container jobs due to the new git setting <code>safe.directory</code></a></li>
<li><a href="https://github-redirect.dependabot.com/actions/checkout/pull/744">Bumped various npm package versions</a></li>
</ul>
<h2>v3.0.0</h2>
<ul>
<li><a href="https://github-redirect.dependabot.com/actions/checkout/pull/689">Update to node 16</a></li>
</ul>
<h2>v2.3.1</h2>
<ul>
<li><a href="https://github-redirect.dependabot.com/actions/checkout/pull/284">Fix default branch resolution for .wiki and when using SSH</a></li>
</ul>
<h2>v2.3.0</h2>
<ul>
<li><a href="https://github-redirect.dependabot.com/actions/checkout/pull/278">Fallback to the default branch</a></li>
</ul>
<h2>v2.2.0</h2>
<ul>
<li><a href="https://github-redirect.dependabot.com/actions/checkout/pull/258">Fetch all history for all tags and branches when fetch-depth=0</a></li>
</ul>
<h2>v2.1.1</h2>
<ul>
<li>Changes to support GHES (<a href="https://github-redirect.dependabot.com/actions/checkout/pull/236">here</a> and <a href="https://github-redirect.dependabot.com/actions/checkout/pull/248">here</a>)</li>
</ul>
<h2>v2.1.0</h2>
<ul>
<li><a href="https://github-redirect.dependabot.com/actions/checkout/pull/191">Group output</a></li>
<li><a href="https://github-redirect.dependabot.com/actions/checkout/pull/199">Changes to support GHES alpha release</a></li>
<li><a href="https://github-redirect.dependabot.com/actions/checkout/pull/184">Persist core.sshCommand for submodules</a></li>
<li><a href="https://github-redirect.dependabot.com/actions/checkout/pull/163">Add support ssh</a></li>
<li><a href="https://github-redirect.dependabot.com/actions/checkout/pull/179">Convert submodule SSH URL to HTTPS, when not using SSH</a></li>
<li><a href="https://github-redirect.dependabot.com/actions/checkout/pull/157">Add submodule support</a></li>
<li><a href="https://github-redirect.dependabot.com/actions/checkout/pull/144">Follow proxy settings</a></li>
<li><a href="https://github-redirect.dependabot.com/actions/checkout/pull/141">Fix ref for pr closed event when a pr is merged</a></li>
<li><a href="https://github-redirect.dependabot.com/actions/checkout/pull/128">Fix issue checking detached when git less than 2.22</a></li>
</ul>
<h2>v2.0.0</h2>
<ul>
<li><a href="https://github-redirect.dependabot.com/actions/checkout/pull/108">Do not pass cred on command line</a></li>
<li><a href="https://github-redirect.dependabot.com/actions/checkout/pull/107">Add input persist-credentials</a></li>
<li><a href="https://github-redirect.dependabot.com/actions/checkout/pull/104">Fallback to REST API to download repo</a></li>
</ul>
<h2>v2 (beta)</h2>
<ul>
<li>Improved fetch performance</li>
</ul>
<!-- raw HTML omitted -->
</blockquote>
<p>... (truncated)</p>
</details>
<details>
<summary>Commits</summary>
<ul>
<li><a href="2541b1294d"><code>2541b12</code></a> Prepare changelog for v3.0.2. (<a href="https://github-redirect.dependabot.com/actions/checkout/issues/777">#777</a>)</li>
<li><a href="0ffe6f9c55"><code>0ffe6f9</code></a> Add set-safe-directory input to allow customers to take control. (<a href="https://github-redirect.dependabot.com/actions/checkout/issues/770">#770</a>)</li>
<li><a href="dcd71f6466"><code>dcd71f6</code></a> Enforce safe directory (<a href="https://github-redirect.dependabot.com/actions/checkout/issues/762">#762</a>)</li>
<li><a href="add3486cc3"><code>add3486</code></a> Patch to fix the dependbot alert. (<a href="https://github-redirect.dependabot.com/actions/checkout/issues/744">#744</a>)</li>
<li><a href="5126516654"><code>5126516</code></a> Bump minimist from 1.2.5 to 1.2.6 (<a href="https://github-redirect.dependabot.com/actions/checkout/issues/741">#741</a>)</li>
<li><a href="d50f8ea767"><code>d50f8ea</code></a> Add v3.0 release information to changelog (<a href="https://github-redirect.dependabot.com/actions/checkout/issues/740">#740</a>)</li>
<li><a href="2d1c1198e7"><code>2d1c119</code></a> update test workflows to checkout v3 (<a href="https://github-redirect.dependabot.com/actions/checkout/issues/709">#709</a>)</li>
<li><a href="a12a3943b4"><code>a12a394</code></a> update readme for v3 (<a href="https://github-redirect.dependabot.com/actions/checkout/issues/708">#708</a>)</li>
<li><a href="8f9e05e482"><code>8f9e05e</code></a> Update to node 16 (<a href="https://github-redirect.dependabot.com/actions/checkout/issues/689">#689</a>)</li>
<li>See full diff in <a href="https://github.com/actions/checkout/compare/v2...v3">compare view</a></li>
</ul>
</details>
<br />


[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=actions/checkout&package-manager=github_actions&previous-version=2&new-version=3)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores)

You can trigger a rebase of this PR by commenting ``@dependabot` rebase`.

[//]: # (dependabot-automerge-start)
[//]: # (dependabot-automerge-end)

---

<details>
<summary>Dependabot commands and options</summary>
<br />

You can trigger Dependabot actions by commenting on this PR:
- ``@dependabot` rebase` will rebase this PR
- ``@dependabot` recreate` will recreate this PR, overwriting any edits that have been made to it
- ``@dependabot` merge` will merge this PR after your CI passes on it
- ``@dependabot` squash and merge` will squash and merge this PR after your CI passes on it
- ``@dependabot` cancel merge` will cancel a previously requested merge and block automerging
- ``@dependabot` reopen` will reopen this PR if it is closed
- ``@dependabot` close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually
- ``@dependabot` ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself)
- ``@dependabot` ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself)
- ``@dependabot` ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)


</details>

626: Bump yogevbd/enforce-label-action from 2.1.0 to 2.2.2 r=curquiza a=dependabot[bot]

Bumps [yogevbd/enforce-label-action](https://github.com/yogevbd/enforce-label-action) from 2.1.0 to 2.2.2.
<details>
<summary>Commits</summary>
<ul>
<li><a href="a3c219da6b"><code>a3c219d</code></a> Merge pull request <a href="https://github-redirect.dependabot.com/yogevbd/enforce-label-action/issues/26">#26</a> from yogevbd/test1</li>
<li><a href="8279da6fd9"><code>8279da6</code></a> Update enforce-labels.yml</li>
<li><a href="0c6f806593"><code>0c6f806</code></a> Update package.json</li>
<li><a href="2e6b1550e4"><code>2e6b155</code></a> lock <code>`@​actions/http-client</code></li>`
<li><a href="732db2ff3a"><code>732db2f</code></a> test</li>
<li><a href="e662799851"><code>e662799</code></a> Update package.json</li>
<li><a href="f467829919"><code>f467829</code></a> Update enforce-labels.yml</li>
<li><a href="00ff95bb80"><code>00ff95b</code></a> Update package.json</li>
<li><a href="de4244ae68"><code>de4244a</code></a> Update action.yml</li>
<li><a href="9f40e51d60"><code>9f40e51</code></a> Merge pull request <a href="https://github-redirect.dependabot.com/yogevbd/enforce-label-action/issues/25">#25</a> from dominikmeyersap/patch-1</li>
<li>Additional commits viewable in <a href="https://github.com/yogevbd/enforce-label-action/compare/2.1.0...2.2.2">compare view</a></li>
</ul>
</details>
<br />


[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=yogevbd/enforce-label-action&package-manager=github_actions&previous-version=2.1.0&new-version=2.2.2)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores)

You can trigger a rebase of this PR by commenting ``@dependabot` rebase`.

[//]: # (dependabot-automerge-start)
[//]: # (dependabot-automerge-end)

---

<details>
<summary>Dependabot commands and options</summary>
<br />

You can trigger Dependabot actions by commenting on this PR:
- ``@dependabot` rebase` will rebase this PR
- ``@dependabot` recreate` will recreate this PR, overwriting any edits that have been made to it
- ``@dependabot` merge` will merge this PR after your CI passes on it
- ``@dependabot` squash and merge` will squash and merge this PR after your CI passes on it
- ``@dependabot` cancel merge` will cancel a previously requested merge and block automerging
- ``@dependabot` reopen` will reopen this PR if it is closed
- ``@dependabot` close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually
- ``@dependabot` ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself)
- ``@dependabot` ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself)
- ``@dependabot` ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)


</details>

Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2022-09-06 16:49:30 +00:00
f2b140d3d7 Merge #624
624: Bump Swatinem/rust-cache from 1.3.0 to 2.0.0 r=curquiza a=dependabot[bot]

Bumps [Swatinem/rust-cache](https://github.com/Swatinem/rust-cache) from 1.3.0 to 2.0.0.
<details>
<summary>Release notes</summary>
<p><em>Sourced from <a href="https://github.com/Swatinem/rust-cache/releases">Swatinem/rust-cache's releases</a>.</em></p>
<blockquote>
<h2>v2.0.0</h2>
<ul>
<li>The action code was refactored to allow for caching multiple workspaces and
different <code>target</code> directory layouts.</li>
<li>The <code>working-directory</code> and <code>target-dir</code> input options were replaced by a
single <code>workspaces</code> option that has the form of <code>$workspace -&gt; $target</code>.</li>
<li>Support for considering <code>env-vars</code> as part of the cache key.</li>
<li>The <code>sharedKey</code> input option was renamed to <code>shared-key</code> for consistency.</li>
</ul>
<h2>v1.4.0</h2>
<ul>
<li>Clean both debug and release target directories.</li>
</ul>
</blockquote>
</details>
<details>
<summary>Changelog</summary>
<p><em>Sourced from <a href="https://github.com/Swatinem/rust-cache/blob/master/CHANGELOG.md">Swatinem/rust-cache's changelog</a>.</em></p>
<blockquote>
<h2>2.0.0</h2>
<ul>
<li>The action code was refactored to allow for caching multiple workspaces and
different <code>target</code> directory layouts.</li>
<li>The <code>working-directory</code> and <code>target-dir</code> input options were replaced by a
single <code>workspaces</code> option that has the form of <code>$workspace -&gt; $target</code>.</li>
<li>Support for considering <code>env-vars</code> as part of the cache key.</li>
<li>The <code>sharedKey</code> input option was renamed to <code>shared-key</code> for consistency.</li>
</ul>
<h2>1.4.0</h2>
<ul>
<li>Clean both <code>debug</code> and <code>release</code> target directories.</li>
</ul>
</blockquote>
</details>
<details>
<summary>Commits</summary>
<ul>
<li><a href="6720f05bc4"><code>6720f05</code></a> 2.0.0</li>
<li><a href="5733786579"><code>5733786</code></a> rebuild</li>
<li><a href="622616010e"><code>6226160</code></a> prepare v2</li>
<li><a href="0497f9301f"><code>0497f93</code></a> improve registry cleanpu</li>
<li><a href="7b8626742a"><code>7b86267</code></a> update registry cleaning</li>
<li><a href="911d8e9e55"><code>911d8e9</code></a> test sparse registry</li>
<li><a href="875be5ce2d"><code>875be5c</code></a> bump cache</li>
<li><a href="07a2ee71bc"><code>07a2ee7</code></a> lol, dependency check was reversed</li>
<li><a href="7c190ef171"><code>7c190ef</code></a> fix actual test code ;-)</li>
<li><a href="fffd6895b2"><code>fffd689</code></a> add some more tests</li>
<li>Additional commits viewable in <a href="https://github.com/Swatinem/rust-cache/compare/v1.3.0...v2.0.0">compare view</a></li>
</ul>
</details>
<br />


[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=Swatinem/rust-cache&package-manager=github_actions&previous-version=1.3.0&new-version=2.0.0)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores)

You can trigger a rebase of this PR by commenting ``@dependabot` rebase`.

[//]: # (dependabot-automerge-start)
[//]: # (dependabot-automerge-end)

---

<details>
<summary>Dependabot commands and options</summary>
<br />

You can trigger Dependabot actions by commenting on this PR:
- ``@dependabot` rebase` will rebase this PR
- ``@dependabot` recreate` will recreate this PR, overwriting any edits that have been made to it
- ``@dependabot` merge` will merge this PR after your CI passes on it
- ``@dependabot` squash and merge` will squash and merge this PR after your CI passes on it
- ``@dependabot` cancel merge` will cancel a previously requested merge and block automerging
- ``@dependabot` reopen` will reopen this PR if it is closed
- ``@dependabot` close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually
- ``@dependabot` ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself)
- ``@dependabot` ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself)
- ``@dependabot` ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)


</details>

Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2022-09-06 16:12:55 +00:00
e3400a05d3 Bump yogevbd/enforce-label-action from 2.1.0 to 2.2.2
Bumps [yogevbd/enforce-label-action](https://github.com/yogevbd/enforce-label-action) from 2.1.0 to 2.2.2.
- [Release notes](https://github.com/yogevbd/enforce-label-action/releases)
- [Commits](https://github.com/yogevbd/enforce-label-action/compare/2.1.0...2.2.2)

---
updated-dependencies:
- dependency-name: yogevbd/enforce-label-action
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>
2022-09-06 16:08:54 +00:00
b308463022 Bump actions/checkout from 2 to 3
Bumps [actions/checkout](https://github.com/actions/checkout) from 2 to 3.
- [Release notes](https://github.com/actions/checkout/releases)
- [Changelog](https://github.com/actions/checkout/blob/main/CHANGELOG.md)
- [Commits](https://github.com/actions/checkout/compare/v2...v3)

---
updated-dependencies:
- dependency-name: actions/checkout
  dependency-type: direct:production
  update-type: version-update:semver-major
...

Signed-off-by: dependabot[bot] <support@github.com>
2022-09-06 16:08:51 +00:00
5e85059a71 Bump Swatinem/rust-cache from 1.3.0 to 2.0.0
Bumps [Swatinem/rust-cache](https://github.com/Swatinem/rust-cache) from 1.3.0 to 2.0.0.
- [Release notes](https://github.com/Swatinem/rust-cache/releases)
- [Changelog](https://github.com/Swatinem/rust-cache/blob/master/CHANGELOG.md)
- [Commits](https://github.com/Swatinem/rust-cache/compare/v1.3.0...v2.0.0)

---
updated-dependencies:
- dependency-name: Swatinem/rust-cache
  dependency-type: direct:production
  update-type: version-update:semver-major
...

Signed-off-by: dependabot[bot] <support@github.com>
2022-09-06 16:08:48 +00:00
9e661f2cb9 Merge #623
623: Add dependabot for GHA r=Kerollmops a=curquiza

Same as we added in Meilisearch. Only runs once a month.
https://github.com/meilisearch/meilisearch/blob/main/.github/dependabot.yml

Co-authored-by: Clémentine Urquizar <clementine@meilisearch.com>
2022-09-06 15:56:28 +00:00
44192d754f Add dependabot for GHA 2022-09-06 17:54:05 +02:00
1fa851a8d0 Merge #622
622: Minor fixes in the just added update-version CI r=ManyTheFish a=curquiza

These fixes are minor, and do not prevent us to use the current CI

Co-authored-by: Clémentine Urquizar <clementine@meilisearch.com>
2022-09-06 13:14:23 +00:00
61abc61a69 Minor fixes in the just added update-version CI 2022-09-05 16:01:32 +02:00
efee0e3f43 Merge #621
621: Add CI to update the Milli version r=ManyTheFish a=curquiza

Add a CI we can trigger manually to create a PR updating the Milli version
The next step is to create a Slack bot that will trigger this CI
In the meantime, we can trigger this CI manually in the [Actions tab](https://github.com/meilisearch/milli/actions)

The `MEILI_BOT_GH_PAT` secrets has been added to the organization level, and is accessible for the following repositories (so far): Meilisearch, Milli and Charabia

Co-authored-by: Clémentine Urquizar <clementine@meilisearch.com>
2022-09-05 08:31:48 +00:00
0639b14906 Add CI to update the Milli version 2022-09-04 11:49:50 +02:00
f7c352a32d Merge #620
620: Fix word criterion r=Kerollmops a=ManyTheFish

related to https://github.com/meilisearch/meilisearch/issues/2722

- fix the word strategy bug
- update milli version to v0.33.2

Co-authored-by: ManyTheFish <many@meilisearch.com>
2022-09-01 10:14:35 +00:00
bf750e45a1 Fix word removal issue 2022-09-01 12:10:47 +02:00
a38608fe59 Add test mixing phrased and no-phrased words 2022-09-01 12:02:10 +02:00
97a04887a3 Update version for next release (v0.33.2) in Cargo.toml 2022-09-01 11:47:23 +02:00
17d020e996 Merge #618
618: Update version for next release (v0.33.1) in Cargo.toml r=Kerollmops a=curquiza

No breaking for this release

Co-authored-by: Clémentine Urquizar <clementine@meilisearch.com>
2022-08-31 10:43:45 +00:00
c3363706c5 Update version for next release (v0.33.1) in Cargo.toml 2022-08-31 11:37:27 +02:00
2c2f3d38cc Merge #617
617: Accept integers as document ids again r=irevoire a=Kerollmops

This PR is related to https://github.com/meilisearch/meilisearch/issues/2723 and will fix when this PR will be merged, a new release deployed and used in Meilisearch itself.

This PR makes the indexer to try to parse the values of the fields identified as numbers i.e. `id:number` as integer first then as float if it fails.

Co-authored-by: Clément Renault <clement@meilisearch.com>
2022-08-31 09:25:17 +00:00
7f92116b51 Accept again integers as document ids 2022-08-31 10:56:39 +02:00
0b55e7ce6a Merge #615
615: Remove the artifacts of the past r=Kerollmops a=irevoire



Co-authored-by: Irevoire <tamo@meilisearch.com>
2022-08-23 14:22:43 +00:00
f6024b3269 Remove the artifacts of the past 2022-08-23 16:10:38 +02:00
a79ff8a1a9 Merge #611
611: Upgrade charabia v0.6.0 r=curquiza a=ManyTheFish

# Pull Request

## What does this PR do?

- Update `log`
- Upgrade `charabia`

related to https://github.com/meilisearch/meilisearch/issues/2686


Co-authored-by: ManyTheFish <many@meilisearch.com>
2022-08-23 10:17:29 +00:00
e314423653 Merge #613
613: Update version for next release (v0.33.0) r=Kerollmops a=curquiza



Co-authored-by: Clémentine Urquizar <clementine@meilisearch.com>
2022-08-23 10:01:20 +00:00
d0521e493f Merge #612
612: Remove Bors required test for Windows r=Kerollmops a=curquiza

Remove the required windows test for merging due to the issue with Lindera
https://github.com/meilisearch/milli/runs/7970141278?check_suite_focus=true

Co-authored-by: Clémentine Urquizar <clementine@meilisearch.com>
2022-08-23 09:47:51 +00:00
9ed7324995 Update version for next release (v0.33.0) 2022-08-23 11:47:48 +02:00
e140227065 Remove Bors required test for Windows 2022-08-23 11:45:29 +02:00
18886dc6b7 Merge #598
598: Matching query terms policy r=Kerollmops a=ManyTheFish

## Summary

Implement several optional words strategy.

## Content

Replace `optional_words` boolean with an enum containing several term matching strategies:
```rust
pub enum TermsMatchingStrategy {
    // remove last word first
    Last,
    // remove first word first
    First,
    // remove more frequent word first
    Frequency,
    // remove smallest word first
    Size,
    // only one of the word is mandatory
    Any,
    // all words are mandatory
    All,
}
```

All strategies implemented during the prototype are kept, but only `Last` and `All` will be published by Meilisearch in the `v0.29.0` release.

## Related

spec: https://github.com/meilisearch/specifications/pull/173
prototype discussion: https://github.com/meilisearch/meilisearch/discussions/2639#discussioncomment-3447699


Co-authored-by: ManyTheFish <many@meilisearch.com>
2022-08-22 15:51:37 +00:00
5391e3842c replace optional_words by term_matching_strategy 2022-08-22 17:47:19 +02:00
f9029727e0 Fix benchmarks 2022-08-22 14:55:53 +02:00
a5b9a35c50 Activate char_map for highlighting 2022-08-22 14:39:16 +02:00
ba5ca8a362 Upgrade charabia v0.6.0 2022-08-22 14:38:00 +02:00
5943e1c3b2 Update log dependency 2022-08-22 13:55:01 +02:00
b46225070f Merge #610
610: Share heed between all sub-crates r=Kerollmops a=irevoire

# Pull Request

## What does this PR do?
Use the reexported version of heed in the benchmarks and the fuzzer

Co-authored-by: Irevoire <tamo@meilisearch.com>
2022-08-22 08:44:31 +00:00
e7624abe63 share heed between all sub-crates 2022-08-19 11:23:41 +02:00
993aa1321c Fix query tree building 2022-08-18 17:56:06 +02:00
bff9653050 Fix remove count 2022-08-18 17:36:30 +02:00
9640976c79 Rename TermMatchingPolicies 2022-08-18 17:36:08 +02:00
60a7221827 Merge #609
609: Retry downloading the benchmarks datasets r=Kerollmops a=irevoire

Downloading the benchmarks datasets is failing [more and more](https://github.com/meilisearch/milli/pull/607#pullrequestreview-1076023074) often; thus, instead of fixing the issue, I thought we could retry multiple times.


Co-authored-by: Irevoire <tamo@meilisearch.com>
2022-08-18 11:47:09 +00:00
afc10acd19 Merge #596
596: Filter operators: NOT + IN[..] r=irevoire a=loiclec

# Pull Request

## What does this PR do?
Implements the changes described in https://github.com/meilisearch/meilisearch/issues/2580
It is based on top of #556 

Co-authored-by: Loïc Lecrenier <loic@meilisearch.com>
2022-08-18 11:24:32 +00:00
c7a86b56ef Fix filter parser compilation error 2022-08-18 13:16:56 +02:00
9b6602cba2 Avoid cloning FilterCondition in filter array parsing 2022-08-18 13:06:57 +02:00
8a271223a9 Change a macro_rules to a function in filter parser 2022-08-18 13:03:55 +02:00
dd34dbaca5 Add more filter parser tests 2022-08-18 11:55:01 +02:00
5d74ebd5e5 Cargo fmt 2022-08-18 11:36:38 +02:00
9af69c151b Limit the maximum depth of filters
This should have no impact on the user but is there to safeguard
meilisearch against malicious inputs.
2022-08-18 11:31:38 +02:00
c51dcad51b Don't recompute filterable fields in evaluation of IN[] filter 2022-08-18 10:59:21 +02:00
98f0da6b38 Simplify representation of nested NOT filters 2022-08-18 10:58:24 +02:00
b030efdc83 Fix parsing of IN[] filter followed by whitespace + factorise its impl 2022-08-18 10:58:04 +02:00
84a784834e retry downloading the benchmarks datasets 2022-08-17 19:25:05 +02:00
79094bcbcf Merge #607
607: Better threshold r=Kerollmops a=irevoire

# Pull Request

## What does this PR do?
Fixes #570 

This PR tries to improve the threshold used to trigger the real deletion of documents.
The deletion is now triggered in two cases;
- 10% of the total available space is used by soft deleted documents
- 90% of the total available space is used.

In this context, « total available space » means the `map_size` of lmdb.
And the size used by the soft deleted documents is actually an estimation. We can't determine precisely the size used by one document thus what we do is; take the total space used, divide it by the number of documents + soft deleted documents to estimate the size of one average document. Then multiply the size of one avg document by the number of soft deleted document.

--------

<img width="808" alt="image" src="https://user-images.githubusercontent.com/7032172/185083075-92cf379e-8ae1-4bfc-9ca6-93b54e6ab4e9.png">

Here we can see we have a ~10GB drift in the end between the space used by the soft deleted and the real space used by the documents.
Personally I don’t think that's a big issue because once the red line reach 90GB everything will be freed but now you know.

If you have an idea on how to improve this estimation I would love to hear it.
It look like the difference is linear so maybe we could simply multiply the current estimation by two?

Co-authored-by: Irevoire <tamo@meilisearch.com>
2022-08-17 16:31:04 +00:00
497f9817a2 Use snapshot testing for the filter parser 2022-08-17 17:35:01 +02:00
4aae07d5f5 expose the size methods 2022-08-17 17:07:38 +02:00
e96b852107 bump heed 2022-08-17 17:05:50 +02:00
238a7be58d Fix filter parser handling of keywords and surrounding spaces
Now the following fragments are allowed:

AND(field =

AND'field' =

AND"field" =
2022-08-17 16:53:40 +02:00
b09a8f1b91 Filters: add explicit error message when using a keyword as value 2022-08-17 16:07:00 +02:00
087da5621a Merge #587
587: Word prefix pair proximity docids indexation refactor r=Kerollmops a=loiclec

# Pull Request

## What does this PR do?
Refactor the code of `WordPrefixPairProximityDocIds` to make it much faster, fix a bug, and add a unit test.

## Why is it faster?
Because we avoid using a sorter to insert the (`word1`, `prefix`, `proximity`) keys and their associated bitmaps, and thus we don't have to sort a potentially very big set of data. I have also added a couple of other optimisations: 

1. reusing allocations
2. using a prefix trie instead of an array of prefixes to get all the prefixes of a word
3. inserting directly into the database instead of putting the data in an intermediary grenad when possible. Also avoid checking for pre-existing values in the database when we know for certain that they do not exist. 

## What bug was fixed?
When reindexing, the `new_prefix_fst_words` prefixes may look like:
```
["ant",  "axo", "bor"]
```
which we group by first letter:
```
[["ant", "axo"], ["bor"]]
```

Later in the code, if we have the word2 "axolotl", we try to find which subarray of prefixes contains its prefixes. This check is done with `word2.starts_with(subarray_prefixes[0])`, but `"axolotl".starts_with("ant")` is false, and thus we wrongly think that there are no prefixes in `new_prefix_fst_words` that are prefixes of `axolotl`.

## StrStrU8Codec
I had to change the encoding of `StrStrU8Codec` to make the second string null-terminated as well. I don't think this should be a problem, but I may have missed some nuances about the impacts of this change.

## Requests when reviewing this PR
I have explained what the code does in the module documentation of `word_pair_proximity_prefix_docids`. It would be nice if someone could read it and give their opinion on whether it is a clear explanation or not. 

I also have a couple questions regarding the code itself:
- Should we clean up and factor out the `PrefixTrieNode` code to try and make broader use of it outside this module? For now, the prefixes undergo a few transformations: from FST, to array, to prefix trie. It seems like it could be simplified.
- I wrote a function called `write_into_lmdb_database_without_merging`. (1) Are we okay with such a function existing? (2) Should it be in `grenad_helpers` instead?

## Benchmark Results

We reduce the time it takes to index about 8% in most cases, but it varies between -3% and -20%. 

```
group                                                                     indexing_main_ce90fc62                  indexing_word-prefix-pair-proximity-docids-refactor_cbad2023
-----                                                                     ----------------------                  ------------------------------------------------------------
indexing/-geo-delete-facetedNumber-facetedGeo-searchable-                 1.00  1893.0±233.03µs        ? ?/sec    1.01  1921.2±260.79µs        ? ?/sec
indexing/-movies-delete-facetedString-facetedNumber-searchable-           1.05      9.4±3.51ms        ? ?/sec     1.00      9.0±2.14ms        ? ?/sec
indexing/-movies-delete-facetedString-facetedNumber-searchable-nested-    1.22    18.3±11.42ms        ? ?/sec     1.00     15.0±5.79ms        ? ?/sec
indexing/-songs-delete-facetedString-facetedNumber-searchable-            1.00     41.4±4.20ms        ? ?/sec     1.28    53.0±13.97ms        ? ?/sec
indexing/-wiki-delete-searchable-                                         1.00   285.6±18.12ms        ? ?/sec     1.03   293.1±16.09ms        ? ?/sec
indexing/Indexing geo_point                                               1.03      60.8±0.45s        ? ?/sec     1.00      58.8±0.68s        ? ?/sec
indexing/Indexing movies in three batches                                 1.14      16.5±0.30s        ? ?/sec     1.00      14.5±0.24s        ? ?/sec
indexing/Indexing movies with default settings                            1.11      13.7±0.07s        ? ?/sec     1.00      12.3±0.28s        ? ?/sec
indexing/Indexing nested movies with default settings                     1.10      10.6±0.11s        ? ?/sec     1.00       9.6±0.15s        ? ?/sec
indexing/Indexing nested movies without any facets                        1.11       9.4±0.15s        ? ?/sec     1.00       8.5±0.10s        ? ?/sec
indexing/Indexing songs in three batches with default settings            1.18      66.2±0.39s        ? ?/sec     1.00      56.0±0.67s        ? ?/sec
indexing/Indexing songs with default settings                             1.07      58.7±1.26s        ? ?/sec     1.00      54.7±1.71s        ? ?/sec
indexing/Indexing songs without any facets                                1.08      53.1±0.88s        ? ?/sec     1.00      49.3±1.43s        ? ?/sec
indexing/Indexing songs without faceted numbers                           1.08      57.7±1.33s        ? ?/sec     1.00      53.3±0.98s        ? ?/sec
indexing/Indexing wiki                                                    1.06   1051.1±21.46s        ? ?/sec     1.00    989.6±24.55s        ? ?/sec
indexing/Indexing wiki in three batches                                   1.20    1184.8±8.93s        ? ?/sec     1.00     989.7±7.06s        ? ?/sec
indexing/Reindexing geo_point                                             1.04      67.5±0.75s        ? ?/sec     1.00      64.9±0.32s        ? ?/sec
indexing/Reindexing movies with default settings                          1.12      13.9±0.17s        ? ?/sec     1.00      12.4±0.13s        ? ?/sec
indexing/Reindexing songs with default settings                           1.05      60.6±0.84s        ? ?/sec     1.00      57.5±0.99s        ? ?/sec
indexing/Reindexing wiki                                                  1.07   1725.0±17.92s        ? ?/sec     1.00    1611.4±9.90s        ? ?/sec
```

Co-authored-by: Loïc Lecrenier <loic@meilisearch.com>
2022-08-17 14:06:12 +00:00
fb95e67a2a Merge #608
608: Fix soft deleted documents r=ManyTheFish a=ManyTheFish

When we replaced or updated some documents, the indexing was skipping the replaced documents.

Related to https://github.com/meilisearch/meilisearch/issues/2672

Co-authored-by: ManyTheFish <many@meilisearch.com>
2022-08-17 13:38:10 +00:00
e4a52e6e45 Merge #594
594: Fix(Search): Fix phrase search candidates computation r=Kerollmops a=ManyTheFish

This bug is an old bug but was hidden by the proximity criterion,
Phrase searches were always returning an empty candidates list when the proximity criterion is deactivated.

Before the fix, we were trying to find any words[n] near words[n]
instead of finding  any words[n] near words[n+1], for example:

for a phrase search '"Hello world"' we were searching for "hello" near "hello" first, instead of "hello" near "world".



Co-authored-by: ManyTheFish <many@meilisearch.com>
2022-08-17 13:22:52 +00:00
8c3f1a9c39 Remove useless lifetime declaration 2022-08-17 15:20:43 +02:00
e9e2349ce6 Fix typo in comment 2022-08-17 15:09:48 +02:00
2668f841d1 Fix update indexing 2022-08-17 15:03:37 +02:00
7384650d85 Update test to showcase the bug 2022-08-17 15:03:08 +02:00
39869be23b Merge #590
590: Optimise facets indexing r=Kerollmops a=loiclec

# Pull Request

## What does this PR do?
Fixes #589 

## Notes
I added documentation for the whole module which attempts to explain the shape of the databases and their purpose. However, I realise there is already some documentation about this, so I am not sure if we want to keep it.

## Benchmarks

We get a ~1.15x speed up on the geo_point benchmark.

```
group                                                                     indexing_main_57042355                  indexing_optimise-facets-indexation_5728619a
-----                                                                     ----------------------                  --------------------------------------------
indexing/-geo-delete-facetedNumber-facetedGeo-searchable-                 1.00  1862.7±294.45µs        ? ?/sec    1.58      2.9±1.32ms        ? ?/sec
indexing/-movies-delete-facetedString-facetedNumber-searchable-           1.11      8.9±2.44ms        ? ?/sec     1.00      8.0±1.42ms        ? ?/sec
indexing/-movies-delete-facetedString-facetedNumber-searchable-nested-    1.00     12.8±3.32ms        ? ?/sec     1.32     16.9±6.98ms        ? ?/sec
indexing/-songs-delete-facetedString-facetedNumber-searchable-            1.09     43.8±4.78ms        ? ?/sec     1.00     40.3±3.79ms        ? ?/sec
indexing/-wiki-delete-searchable-                                         1.08   287.4±28.72ms        ? ?/sec     1.00    264.9±9.46ms        ? ?/sec
indexing/Indexing geo_point                                               1.14      61.2±0.39s        ? ?/sec     1.00      53.8±0.57s        ? ?/sec
indexing/Indexing movies in three batches                                 1.00      16.6±0.12s        ? ?/sec     1.00      16.5±0.10s        ? ?/sec
indexing/Indexing movies with default settings                            1.00      14.1±0.30s        ? ?/sec     1.00      14.0±0.28s        ? ?/sec
indexing/Indexing nested movies with default settings                     1.10      10.9±0.50s        ? ?/sec     1.00      10.0±0.10s        ? ?/sec
indexing/Indexing nested movies without any facets                        1.01       9.6±0.23s        ? ?/sec     1.00       9.5±0.06s        ? ?/sec
indexing/Indexing songs in three batches with default settings            1.07      66.3±0.55s        ? ?/sec     1.00      61.8±0.63s        ? ?/sec
indexing/Indexing songs with default settings                             1.03      58.8±0.82s        ? ?/sec     1.00      57.1±1.22s        ? ?/sec
indexing/Indexing songs without any facets                                1.00      53.6±1.09s        ? ?/sec     1.01      54.0±0.58s        ? ?/sec
indexing/Indexing songs without faceted numbers                           1.02      58.0±1.29s        ? ?/sec     1.00      57.1±1.43s        ? ?/sec
indexing/Indexing wiki                                                    1.00   1064.1±21.20s        ? ?/sec     1.00   1068.0±20.49s        ? ?/sec
indexing/Indexing wiki in three batches                                   1.00    1182.5±9.62s        ? ?/sec     1.01   1191.2±10.96s        ? ?/sec
indexing/Reindexing geo_point                                             1.12      68.0±0.21s        ? ?/sec     1.00      60.5±0.82s        ? ?/sec
indexing/Reindexing movies with default settings                          1.01      14.1±0.21s        ? ?/sec     1.00      14.0±0.26s        ? ?/sec
indexing/Reindexing songs with default settings                           1.04      61.6±0.57s        ? ?/sec     1.00      59.2±0.87s        ? ?/sec
indexing/Reindexing wiki                                                  1.00   1734.0±11.38s        ? ?/sec     1.01   1746.6±22.48s        ? ?/sec
```


Co-authored-by: Loïc Lecrenier <loic@meilisearch.com>
2022-08-17 11:46:55 +00:00
6cc975704d Add some documentation to facets.rs 2022-08-17 12:59:52 +02:00
93252769af Apply review suggestions 2022-08-17 12:41:22 +02:00
196f79115a Run cargo fmt 2022-08-17 12:28:33 +02:00
d10d78d520 Add integration tests for the IN filter 2022-08-17 12:28:33 +02:00
4ecfb95d0c Improve syntax errors for IN filter 2022-08-17 12:28:33 +02:00
2fd20fadfc Implement the NOT IN syntax for negated IN filter 2022-08-17 12:28:33 +02:00
ca97cb0eda Implement the IN filter operator 2022-08-17 12:28:33 +02:00
90a304cb07 Fix tests after simplification of NOT filter 2022-08-17 12:28:33 +02:00
cc7415bb31 Simplify FilterCondition code, made possible by the new NOT operator 2022-08-17 12:28:33 +02:00
44744d9e67 Implement the simplified NOT operator 2022-08-17 12:28:33 +02:00
01675771d5 Reimplement != filter to select all docids not selected by = 2022-08-17 12:28:33 +02:00
258c3dd563 Make AND+OR filters n-ary (store a vector of subfilters instead of 2)
NOTE: The token_at_depth is method is a bit useless now, as the only
cases where there would be a toke at depth 1000 are the cases where
the parser already stack-overflowed earlier.

Example: (((((... (x=1) ...)))))
2022-08-17 12:28:33 +02:00
39687908f1 Add documentation and comments to facets.rs 2022-08-17 12:26:49 +02:00
8d4b21a005 Switch string facet levels indexation to new algo
Write the algorithm once for both numbers and strings
2022-08-17 12:26:49 +02:00
cf0cd92ed4 Refactor Facets::execute to increase performance 2022-08-17 12:26:49 +02:00
cd2635ccfc Merge #602
602: Use mimalloc as the default allocator r=Kerollmops a=loiclec

## What does this PR do?
Use mimalloc as the global allocator for milli's benchmarks on macOS.

## Why?
On Linux, we use jemalloc, which is a very fast allocator. But on macOS, we currently use the system allocator, which is very slow. In practice, this difference in allocator speed means that it is difficult to gain insight into milli's performance by running benchmarks locally on the Mac.

By using mimalloc, which is another excellent allocator, we reduce the speed difference between the two platforms.

Co-authored-by: Loïc Lecrenier <loic@meilisearch.com>
2022-08-17 10:26:13 +00:00
78d9f0622d cargo fmt 2022-08-17 12:21:24 +02:00
4f9edf13d7 Remove commented-out function 2022-08-17 12:21:24 +02:00
405555b401 Add some documentation to PrefixTrieNode 2022-08-17 12:21:24 +02:00
1bc4788e59 Remove cached Allocations struct from wpppd indexing 2022-08-17 12:18:22 +02:00
ef75a77464 Fix undefined behaviour caused by reusing key from the database
New full snapshot:
---
source: milli/src/update/word_prefix_pair_proximity_docids.rs
---
5                a    1  [101, ]
5                a    2  [101, ]
5                am   1  [101, ]
5                b    4  [101, ]
5                be   4  [101, ]
am               a    3  [101, ]
amazing          a    1  [100, ]
amazing          a    2  [100, ]
amazing          a    3  [100, ]
amazing          an   1  [100, ]
amazing          an   2  [100, ]
amazing          b    2  [100, ]
amazing          be   2  [100, ]
an               a    1  [100, ]
an               a    2  [100, 202, ]
an               am   1  [100, ]
an               an   2  [100, ]
an               b    3  [100, ]
an               be   3  [100, ]
and              a    2  [100, ]
and              a    3  [100, ]
and              a    4  [100, ]
and              am   2  [100, ]
and              an   3  [100, ]
and              b    1  [100, ]
and              be   1  [100, ]
at               a    1  [100, 202, ]
at               a    2  [100, 101, ]
at               a    3  [100, ]
at               am   2  [100, 101, ]
at               an   1  [100, 202, ]
at               an   3  [100, ]
at               b    3  [101, ]
at               b    4  [100, ]
at               be   3  [101, ]
at               be   4  [100, ]
beautiful        a    2  [100, ]
beautiful        a    3  [100, ]
beautiful        a    4  [100, ]
beautiful        am   3  [100, ]
beautiful        an   2  [100, ]
beautiful        an   4  [100, ]
bell             a    2  [101, ]
bell             a    4  [101, ]
bell             am   4  [101, ]
extraordinary    a    2  [202, ]
extraordinary    a    3  [202, ]
extraordinary    an   2  [202, ]
house            a    3  [100, 202, ]
house            a    4  [100, 202, ]
house            am   4  [100, ]
house            an   3  [100, 202, ]
house            b    2  [100, ]
house            be   2  [100, ]
rings            a    1  [101, ]
rings            a    3  [101, ]
rings            am   3  [101, ]
rings            b    2  [101, ]
rings            be   2  [101, ]
the              a    3  [101, ]
the              b    1  [101, ]
the              be   1  [101, ]
2022-08-17 12:17:45 +02:00
7309111433 Don't run block code in doc tests of word_pair_proximity_docids 2022-08-17 12:17:18 +02:00
f6f8f543e1 Run cargo fmt 2022-08-17 12:17:18 +02:00
34c991ea02 Add newlines in documentation of word_prefix_pair_proximity_docids 2022-08-17 12:17:18 +02:00
06f3fd8c6d Add more comments to WordPrefixPairProximityDocids::execute 2022-08-17 12:17:18 +02:00
474500362c Update wpppd snapshots
New snapshot (yes, it's wrong as well, it will get fixed later):

---
source: milli/src/update/word_prefix_pair_proximity_docids.rs
---
5                a    1  [101, ]
5                a    2  [101, ]
5                am   1  [101, ]
5                b    4  [101, ]
5                be   4  [101, ]
am               a    3  [101, ]
amazing          a    1  [100, ]
amazing          a    2  [100, ]
amazing          a    3  [100, ]
amazing          an   1  [100, ]
amazing          an   2  [100, ]
amazing          b    2  [100, ]
amazing          be   2  [100, ]
an               a    1  [100, ]
an               a    2  [100, 202, ]
an               am   1  [100, ]
an               b    3  [100, ]
an               be   3  [100, ]
and              a    2  [100, ]
and              a    3  [100, ]
and              a    4  [100, ]
and              b    1  [100, ]
and              be   1  [100, ]
                 d\0  0  [100, 202, ]
an               an   2  [100, ]
and              am   2  [100, ]
and              an   3  [100, ]
at               a    2  [100, 101, ]
at               a    3  [100, ]
at               am   2  [100, 101, ]
at               an   1  [100, 202, ]
at               an   3  [100, ]
at               b    3  [101, ]
at               b    4  [100, ]
at               be   3  [101, ]
at               be   4  [100, ]
beautiful        a    2  [100, ]
beautiful        a    3  [100, ]
beautiful        a    4  [100, ]
beautiful        am   3  [100, ]
beautiful        an   2  [100, ]
beautiful        an   4  [100, ]
bell             a    2  [101, ]
bell             a    4  [101, ]
bell             am   4  [101, ]
extraordinary    a    2  [202, ]
extraordinary    a    3  [202, ]
extraordinary    an   2  [202, ]
house            a    4  [100, 202, ]
house            a    4  [100, ]
house            am   4  [100, ]
house            an   3  [100, 202, ]
house            b    2  [100, ]
house            be   2  [100, ]
rings            a    1  [101, ]
rings            a    3  [101, ]
rings            am   3  [101, ]
rings            b    2  [101, ]
rings            be   2  [101, ]
the              a    3  [101, ]
the              b    1  [101, ]
the              be   1  [101, ]
2022-08-17 12:17:18 +02:00
ea4a96761c Move content of readme for WordPrefixPairProximityDocids into the code 2022-08-17 12:05:37 +02:00
220921628b Simplify and document WordPrefixPairProximityDocIds::execute 2022-08-17 11:59:19 +02:00
044356d221 Optimise WordPrefixPairProximityDocIds merge operation 2022-08-17 11:59:18 +02:00
d350114159 Add tests for WordPrefixPairProximityDocIds 2022-08-17 11:59:15 +02:00
86807ca848 Refactor word prefix pair proximity indexation further 2022-08-17 11:59:13 +02:00
306593144d Refactor word prefix pair proximity indexation 2022-08-17 11:59:00 +02:00
5d59bfde8a Sort Cargo.toml dependencies 2022-08-17 11:46:56 +02:00
f55034ed54 Merge #606
606: Make binaries faster on release profile through better compile options r=Kerollmops a=loiclec

Using `codegen-units = 1` and `lto = 'thin'` makes the compile time a bit longer, but also produces faster binaries.

I'd like to run milli's benchmark with these options, so that we can see whether it is worth enabling on meilisearch.

Co-authored-by: Loïc Lecrenier <loic@meilisearch.com>
2022-08-17 08:57:24 +00:00
03e679b634 Make binaries faster on release profile through better compile options 2022-08-17 10:29:33 +02:00
f20e588ec1 Make sure there is one newline at eof in cargo.toml 2022-08-17 07:44:33 +02:00
20be69e1b9 Always use mimalloc as the global allocator 2022-08-16 20:09:36 +02:00
293a246af8 Merge #601
601: Introduce snapshot tests r=Kerollmops a=loiclec

# Pull Request
## What does this PR do?
Introduce snapshot tests into milli, by using the `insta` crate. This implements the idea described by #597 

See: [insta.rs](https://insta.rs)

## Design
There is now a new file, `snapshot_tests.rs`, which is compiled only under `#[cfg(test)]`. It exposes the `db_snap!` macro, which is used to snapshot the content of a database.

When running `cargo test`, `insta` will check that the value of the current snapshot is the same as the previous one (on the file system). If they are the same, the test passes. If they are different, the test fails and you are asked to review the new snapshot to approve or reject it.

We don't want to save very large snapshots to the file system, because it will pollute the git repository and increase its size too much. Instead, we only save their `md5` hashes under the name `<snapshot_name>.hash.snap`. There is a new environment variable called `MILLI_TEST_FULL_SNAPS` which can be set to `true` in order to *also* save the full content of the snapshot under the name `<snapshot_name>.full.snap`. However, snapshots with the extension `.full.snap` are never saved to the git repository.

## Example
```rust
// In e.g. facets.rs
#[test]
fn my_test() {
    // create an index
    let index = TempIndex::new():
    index.add_documents(...);
    index.update_settings(|settings| ...);
    
    // then snapshot the content of one of its databases
    // the snapshot will be saved at the current folder under facets.rs/my_test/facet_id_string_docids.snap
    db_snap!(index, facet_id_string_docids);

    index.add_documents(...);   

    // we can also name the snapshot to ensure there is no conflict
    // this snapshot will be saved at facets.rs/my_test/updated/facet_id_string_docids.snap
    db_snap!(index, facet_id_string, docids, "updated");
    
    // and we can also use "inline" snapshots, which insert their content in the given string literal
    db_snap!(index, field_distributions, `@"");`
    // once the snapshot is approved, it will automatically get transformed to, e.g.:
    // db_snap!(index, field_distributions, `@"`
    // my_facet        21
    // other_field     3
    // ");
    
    // now let's add **many** documents
    index.add_documents(...);
    
    // because the snapshot is too big, its hash is saved instead
    // if the MILLI_TEST_FULL_SNAPS env variable is set to true, then the full snapshot will also be saved
    // at facets.rs/my_test/large/facet_id_string_docids.full.snap
    db_snap!(index, facet_id_string_docids, "large", `@"5348bbc46b5384455b6a900666d2a502");`
}
```

Co-authored-by: Loïc Lecrenier <loic@meilisearch.com>
2022-08-16 11:57:09 +00:00
dea00311b6 Add type annotations to remove compiler error 2022-08-16 09:19:30 +02:00
fb2b6c0c28 Use mimalloc for benchmarks on all platforms 2022-08-10 16:56:42 +02:00
6f49126223 Fix db_snap macro with inline parameter 2022-08-10 15:55:22 +02:00
12920f2a4f Fix paths of snapshot tests 2022-08-10 15:53:46 +02:00
4b7fd4dfae Update insta version 2022-08-10 15:53:46 +02:00
ce560fdcb5 Add documentation for db_snap! 2022-08-10 15:53:46 +02:00
748bb86b5b cargo fmt 2022-08-10 15:53:46 +02:00
051f24f674 Switch to snapshot tests for search/matches/mod.rs 2022-08-10 15:53:46 +02:00
d2e01528a6 Switch to snapshot tests for search/criteria/typo.rs 2022-08-10 15:53:46 +02:00
a9c7d82693 Switch to snapshot tests for search/criteria/attribute.rs 2022-08-10 15:53:46 +02:00
4bba2f41d7 Switch to snapshot tests for query_tree.rs 2022-08-10 15:53:46 +02:00
8ac24d3114 Cargo fmt + fix compiler warnings/error 2022-08-10 15:53:46 +02:00
6066256689 Add snapshot tests for indexing of word_prefix_pair_proximity_docids 2022-08-10 15:53:46 +02:00
3a734af159 Add snapshot tests for Facets::execute 2022-08-10 15:53:46 +02:00
b9907997e4 Remove old snapshot tests code 2022-08-10 15:53:46 +02:00
ef889ade5d Refactor snapshot tests 2022-08-10 15:53:46 +02:00
334098a7e0 Add index snapshot test helper function 2022-08-10 15:53:46 +02:00
8f73251012 Use mimalloc for benchmarks on macOS 2022-08-10 13:30:56 +02:00
b389be48a0 Factorize phrase computation 2022-08-08 10:37:31 +02:00
950d8e4c44 Merge #600
600: Simplify some unit tests r=ManyTheFish a=loiclec

# Pull Request

## What does this PR do?
Simplify the code that is used in unit tests to create and modify an index. Basically, the following code:
```rust
  let path = tempfile::tempdir().unwrap();
  let mut options = EnvOpenOptions::new();
  options.map_size(10 * 1024 * 1024); // 10 MB
  let index = Index::new(options, &path).unwrap();

  let mut wtxn = index.write_txn().unwrap();
  let content = documents!([
      { "id": 0, "name": "kevin" },
  ]);
  let config = IndexerConfig::default();
  let indexing_config = IndexDocumentsConfig::default();
  let builder =
      IndexDocuments::new(&mut wtxn, &index, &config, indexing_config, |_| ()).unwrap();
  let (builder, user_error) = builder.add_documents(content).unwrap();
  user_error.unwrap();
  builder.execute().unwrap();
  wtxn.commit.unwrap();

  let mut wtxn = index.write_txn().unwrap();
  let config = IndexerConfig::default();
  let mut builder = Settings::new(&mut wtxn, &index, &config);
  builder.set_primary_key(S("docid"));
  builder.set_filterable_fields(hashset! { S("label") });
  builder.execute(|_| ()).unwrap();
  wtxn.commit().unwrap();
```
becomes:
```rust
let index = TempIndex::new():
index.add_documents(documents!(
      { "id": 0, "name": "kevin" },
)).unwrap();
index.update_settings(|settings| {
    settings.set_primary_key(S("docid"));
    settings.set_filterable_fields(hashset! { S("label") });
}).unwrap();
```

Then there is a bunch of options to modify the indexing configs, the map size, to reuse a transaction, etc. For example:
```rust
let mut index = TempIndex::new_with_map_size(1000 * 4096 * 10);
index.index_documents_config.autogenerate_docids = true;
let mut wtxn = index.write_txn().unwrap();
index.update_settings_using_wtxn(&mut wtxn, |settings| {
    settings.set_primary_key(S("docids"));
}).unwrap();
wtxn.commit().unwrap();
```

Co-authored-by: Loïc Lecrenier <loic@meilisearch.com>
Co-authored-by: Loïc Lecrenier <loic.lecrenier@me.com>
Co-authored-by: bors[bot] <26634292+bors[bot]@users.noreply.github.com>
2022-08-04 10:19:42 +00:00
58cb1c1bda Simplify unit tests in facet/filter.rs 2022-08-04 12:03:44 +02:00
acff17fb88 Simplify indexing tests 2022-08-04 12:03:13 +02:00
21284cf235 Merge #556
556: Add EXISTS filter r=loiclec a=loiclec

## What does this PR do?

Fixes issue [#2484](https://github.com/meilisearch/meilisearch/issues/2484) in the meilisearch repo.

It creates a `field EXISTS` filter which selects all documents containing the `field` key. 
For example, with the following documents:
```json
[{
	"id": 0,
	"colour": []
},
{
	"id": 1,
	"colour": ["blue", "green"]
},
{
	"id": 2,
	"colour": 145238
},
{
	"id": 3,
	"colour": null
},
{
	"id": 4,
	"colour": {
		"green": []
	}
},
{
	"id": 5,
	"colour": {}
},
{
	"id": 6
}]
```
Then the filter `colour EXISTS` selects the ids `[0, 1, 2, 3, 4, 5]`. The filter `colour NOT EXISTS` selects `[6]`.

## Details
There is a new database named `facet-id-exists-docids`. Its keys are field ids and its values are bitmaps of all the document ids where the corresponding field exists.

To create this database, the indexing part of milli had to be adapted. The implementation there is basically copy/pasted from the code handling the `facet-id-f64-docids` database, with appropriate modifications in place.

There was an issue involving the flattening of documents during (re)indexing. Previously, the following JSON:
```json
{
    "id": 0,
    "colour": [],
    "size": {}
}
```
would be flattened to:
```json
{
    "id": 0
}
```
prior to being given to the extraction pipeline.

This transformation would lose the information that is needed to populate the `facet-id-exists-docids` database. Therefore, I have also changed the implementation of the `flatten-serde-json` crate. Now, as it traverses the Json, it keeps track of which key was encountered. Then, at the end, if a previously encountered key is not present in the flattened object, it adds that key to the object with an empty array as value. For example:
```json
{
    "id": 0,
    "colour": {
        "green": [],
        "blue": 1
    },
    "size": {}
} 
```
becomes
```json
{
    "id": 0,
    "colour": [],
    "colour.green": [],
    "colour.blue": 1,
    "size": []
} 
```


Co-authored-by: Kerollmops <clement@meilisearch.com>
2022-08-04 09:46:06 +00:00
50f6524ff2 Merge #579
579: Stop reindexing already indexed documents r=ManyTheFish a=irevoire

```
 % ./compare.sh indexing_stop-reindexing-unchanged-documents_cb5a1669.json indexing_main_eeba1960.json
group                                                                     indexing_main_eeba1960                 indexing_stop-reindexing-unchanged-documents_cb5a1669
-----                                                                     ----------------------                 -----------------------------------------------------
indexing/-geo-delete-facetedNumber-facetedGeo-searchable-                 1.03      2.0±0.22ms        ? ?/sec    1.00  1955.4±336.24µs        ? ?/sec
indexing/-movies-delete-facetedString-facetedNumber-searchable-           1.08     11.0±2.93ms        ? ?/sec    1.00     10.2±4.04ms        ? ?/sec
indexing/-movies-delete-facetedString-facetedNumber-searchable-nested-    1.00     15.1±3.89ms        ? ?/sec    1.14     17.1±5.18ms        ? ?/sec
indexing/-songs-delete-facetedString-facetedNumber-searchable-            1.26    59.2±12.01ms        ? ?/sec    1.00     47.1±8.52ms        ? ?/sec
indexing/-wiki-delete-searchable-                                         1.08   316.6±31.53ms        ? ?/sec    1.00   293.6±17.00ms        ? ?/sec
indexing/Indexing geo_point                                               1.01      60.9±0.31s        ? ?/sec    1.00      60.6±0.36s        ? ?/sec
indexing/Indexing movies in three batches                                 1.04      20.0±0.30s        ? ?/sec    1.00      19.2±0.25s        ? ?/sec
indexing/Indexing movies with default settings                            1.02      19.1±0.18s        ? ?/sec    1.00      18.7±0.24s        ? ?/sec
indexing/Indexing nested movies with default settings                     1.02      26.2±0.29s        ? ?/sec    1.00      25.9±0.22s        ? ?/sec
indexing/Indexing nested movies without any facets                        1.02      25.3±0.32s        ? ?/sec    1.00      24.7±0.26s        ? ?/sec
indexing/Indexing songs in three batches with default settings            1.00      66.7±0.41s        ? ?/sec    1.01      67.1±0.86s        ? ?/sec
indexing/Indexing songs with default settings                             1.00      58.3±0.90s        ? ?/sec    1.01      58.8±1.32s        ? ?/sec
indexing/Indexing songs without any facets                                1.00      54.5±1.43s        ? ?/sec    1.01      55.2±1.29s        ? ?/sec
indexing/Indexing songs without faceted numbers                           1.00      57.9±1.20s        ? ?/sec    1.01      58.4±0.93s        ? ?/sec
indexing/Indexing wiki                                                    1.00   1052.0±10.95s        ? ?/sec    1.02   1069.4±20.38s        ? ?/sec
indexing/Indexing wiki in three batches                                   1.00    1193.1±8.83s        ? ?/sec    1.00    1189.5±9.40s        ? ?/sec
indexing/Reindexing geo_point                                             3.22      67.5±0.73s        ? ?/sec    1.00      21.0±0.16s        ? ?/sec
indexing/Reindexing movies with default settings                          3.75      19.4±0.28s        ? ?/sec    1.00       5.2±0.05s        ? ?/sec
indexing/Reindexing songs with default settings                           8.90      61.4±0.91s        ? ?/sec    1.00       6.9±0.07s        ? ?/sec
indexing/Reindexing wiki                                                  1.00   1748.2±35.68s        ? ?/sec    1.00   1750.5±18.53s        ? ?/sec
```

tldr: We do not lose any performance on the normal indexing benchmark, but we get between 3 and 8 times faster on the reindexing benchmarks 👍 

Co-authored-by: Tamo <tamo@meilisearch.com>
2022-08-04 08:10:37 +00:00
e8987cf5aa Merge #599
599: fix: Remove whitespace trimming during document id validation r=ManyTheFish a=ManyTheFish

fix #592


related to https://github.com/meilisearch/meilisearch/issues/2640


Co-authored-by: ManyTheFish <many@meilisearch.com>
2022-08-03 14:55:25 +00:00
d6f9a60a32 fix: Remove whitespace trimming during document id validation
fix #592
2022-08-03 11:38:40 +02:00
7fc35c5586 remove the useless prints 2022-08-02 10:31:22 +02:00
f156d7dd3b Stop reindexing already indexed documents 2022-08-02 10:31:20 +02:00
1fe224f2c6 Update filter-parser/fuzz/.gitignore
Co-authored-by: Many the fish <many@meilisearch.com>
2022-07-21 16:12:01 +02:00
07003704a8 Merge branch 'filter/field-exist' 2022-07-21 14:51:41 +02:00
e1bc610d27 Merge #595
595: Update version for next release (v0.32.0) r=ManyTheFish a=curquiza

In order to release on `main` (for v0.29.0, not v0.28.1)

<img width="1014" alt="Capture d’écran 2022-07-21 à 13 20 35" src="https://user-images.githubusercontent.com/20380692/180178725-381fbdf1-c0fb-4fa9-9954-452aec5a1574.png">


Co-authored-by: Clémentine Urquizar <clementine@meilisearch.com>
2022-07-21 11:07:42 +00:00
d5e9b7305b Update version for next release (v0.32.0) 2022-07-21 13:20:02 +04:00
cbb3b25459 Fix(Search): Fix phrase search candidates computation
This bug is an old bug but was hidden by the proximity criterion,
Phrase search were always returning an empty candidates list.

Before the fix, we were trying to find any words[n] near words[n]
instead of finding  any words[n] near words[n+1], for example:

for a phrase search '"Hello world"' we were searching for "hello" near "hello" first, instead of "hello" near "world".
2022-07-21 10:04:30 +02:00
941af58239 Merge #561
561: Enriched documents batch reader r=curquiza a=Kerollmops

~This PR is based on #555 and must be rebased on main after it has been merged to ease the review.~
This PR contains the work in #555 and can be merged on main as soon as reviewed and approved.

- [x] Create an `EnrichedDocumentsBatchReader` that contains the external documents id.
- [x] Extract the primary key name and make it accessible in the `EnrichedDocumentsBatchReader`.
- [x] Use the external id from the `EnrichedDocumentsBatchReader` in the `Transform::read_documents`.
- [x] Remove the `update_primary_key` from the _transform.rs_ file.
- [x] Really generate the auto-generated documents ids.
- [x] Insert the (auto-generated) document ids in the document while processing it in `Transform::read_documents`.

Co-authored-by: Kerollmops <clement@meilisearch.com>
2022-07-21 07:08:50 +00:00
41a0ce07cb Add a code comment, as suggested in PR review
Co-authored-by: Many the fish <many@meilisearch.com>
2022-07-20 16:20:35 +02:00
1506683705 Avoid using too much memory when indexing facet-exists-docids 2022-07-19 14:42:35 +02:00
d0eee5ff7a Fix compiler error 2022-07-19 13:54:30 +02:00
aed8c69bcb Refactor indexation of the "facet-id-exists-docids" database
The idea is to directly create a sorted and merged list of bitmaps
in the form of a BTreeMap<FieldId, RoaringBitmap> instead of creating
a grenad::Reader where the keys are field_id and the values are docids.

Then we send that BTreeMap to the thing that handles TypedChunks, which
inserts its content into the database.
2022-07-19 10:07:33 +02:00
1eb1e73bb3 Add integration tests for the EXISTS filter 2022-07-19 10:07:33 +02:00
4f0bd317df Remove custom implementation of BytesEncode/Decode for the FieldId 2022-07-19 10:07:33 +02:00
80b962b4f4 Run cargo fmt 2022-07-19 10:07:33 +02:00
ea0642c32d Make filter parser more strict regarding spacing around operators
OR, AND, NOT, TO must now be followed by spaces
2022-07-19 10:07:33 +02:00
c17d616250 Refactor index_documents_check_exists_database tests 2022-07-19 10:07:33 +02:00
30bd4db0fc Simplify indexing task for facet_exists_docids database 2022-07-19 10:07:33 +02:00
392472f4bb Apply suggestions from code review
Co-authored-by: Tamo <tamo@meilisearch.com>
2022-07-19 10:07:33 +02:00
bd15f5625a Fix compiler warning 2022-07-19 10:07:33 +02:00
722db7b088 Ignore target directory of filter-parser/fuzz crate 2022-07-19 10:07:33 +02:00
a5c9162250 Improve parser for NOT EXISTS filter
Allow multiple spaces between NOT and EXISTS
2022-07-19 10:07:33 +02:00
0388b2d463 Run cargo fmt 2022-07-19 10:07:33 +02:00
dc64170a69 Improve syntax of EXISTS filter, allow “value NOT EXISTS” 2022-07-19 10:07:33 +02:00
72452f0cb2 Implements the EXIST filter operator 2022-07-19 10:07:33 +02:00
a8641b42a7 Modify flatten_serde_json to keep dummy value for all object keys
Example:
```json
{
    "id": 0,
    "colour" : { "green": 1 }
}
```
becomes:
```json
{
    "id": 0,
    "colour" : [],
    "colour.green": 1
}
```
to retain the information the key "colour" exists in the original
json value.
2022-07-19 10:07:33 +02:00
453d593ce8 Add a database containing the docids where each field exists 2022-07-19 10:07:33 +02:00
5704235521 Merge #584
584: Chores: Enhance smart-crop code comments r=curquiza a=ManyTheFish

Enhance explanation around smart crop algorithms

Co-authored-by: ManyTheFish <many@meilisearch.com>
Co-authored-by: Many the fish <many@meilisearch.com>
2022-07-19 07:08:14 +00:00
f6415b679f Merge #588
588: Fix name of "release_date" facet in movies benchmarks r=ManyTheFish a=loiclec

## What does this PR do?
The `movies.json` file in the benchmark datasets contains a filterable field called "release_date", but the indexing benchmarks wrongly called the field "released_date" instead. This PR fixes that.


Co-authored-by: Loïc Lecrenier <loic@meilisearch.com>
2022-07-18 15:51:09 +00:00
2d79720f5d Update milli/src/search/matches/mod.rs 2022-07-18 17:48:04 +02:00
8ddb4e750b Update milli/src/search/matches/mod.rs 2022-07-18 17:47:39 +02:00
a277daa1f2 Update milli/src/search/matches/mod.rs 2022-07-18 17:47:13 +02:00
fb794c6b5e Update milli/src/search/matches/mod.rs 2022-07-18 17:46:00 +02:00
1237cfc249 Update milli/src/search/matches/mod.rs 2022-07-18 17:45:37 +02:00
d7fd5c58cd Update milli/src/search/matches/mod.rs 2022-07-18 17:45:06 +02:00
fc9f3f31e7 Change DocumentsBatchReader to access cursor and index at same time
Otherwise it is not possible to iterate over all documents while
using the fields index at the same time.
2022-07-18 16:08:14 +02:00
ab1571cdec Simplify Transform::read_documents, enabled by enriched documents reader 2022-07-18 12:45:47 +02:00
8270e2b768 Fix name of "release_date" facet in movies benchmarks 2022-07-18 10:34:12 +02:00
e261ef64d7 Update milli/src/search/matches/mod.rs
Co-authored-by: Clément Renault <clement@meilisearch.com>
2022-07-18 10:18:51 +02:00
1da4ab5918 Update milli/src/search/matches/mod.rs
Co-authored-by: Clément Renault <clement@meilisearch.com>
2022-07-18 10:18:03 +02:00
448114cc1c Fix the benchmarks with the new indexation API 2022-07-12 15:22:09 +02:00
25e768f31c Fix another issue with the nested primary key selector 2022-07-12 15:14:07 +02:00
192793ee38 Add some tests to check for the nested documents ids 2022-07-12 15:14:07 +02:00
a892a4a79c Introduce a function to extend from a JSON array of objects 2022-07-12 15:14:06 +02:00
dc61105554 Fix the nested document id fetching function 2022-07-12 15:14:06 +02:00
2eec290424 Check the validity of the latitute and longitude numbers 2022-07-12 15:14:06 +02:00
5d149d631f Remove tests for a function that no more exists 2022-07-12 15:14:06 +02:00
0bbcc7b180 Expose the DocumentId struct to be sure to inject the generated ids 2022-07-12 15:14:06 +02:00
d1a4da9812 Generate a real UUIDv4 when ids are auto-generated 2022-07-12 15:14:06 +02:00
c8ebf0de47 Rename the validate function as an enriching function 2022-07-12 15:14:06 +02:00
905af2a2e9 Use the primary key and external id in the transform 2022-07-12 15:14:05 +02:00
742543091e Constify the default primary key name 2022-07-12 14:55:52 +02:00
5f1bfb73ee Extract the primary key name and make it accessible 2022-07-12 14:55:52 +02:00
6a0a0ae94f Make the Transform read from an EnrichedDocumentsBatchReader 2022-07-12 14:55:52 +02:00
ea852200bb Fix the format used for a geo deleting benchmark 2022-07-12 14:55:52 +02:00
dc3f092d07 Do not leak an internal grenad Error 2022-07-12 14:55:52 +02:00
8ebf5eed0d Make the nested primary key work 2022-07-12 14:55:52 +02:00
19eb3b4708 Make sur that we do not accept floats as documents ids 2022-07-12 14:55:52 +02:00
2ceeb51c37 Support the auto-generated ids when validating documents 2022-07-12 14:55:51 +02:00
399eec5c01 Fix the indexation tests 2022-07-12 14:55:51 +02:00
fcfc4caf8c Move the Object type in the lib.rs file and use it everywhere 2022-07-12 14:55:51 +02:00
0146175fe6 Introduce the validate_documents_batch function 2022-07-12 14:55:51 +02:00
cefffde9af Improve the .gitignore of the fuzz crate 2022-07-12 14:55:51 +02:00
bdc4263883 Introduce the validate_documents_batch function 2022-07-12 14:55:51 +02:00
a97d4d63b9 Fix the benchmarks 2022-07-12 14:55:50 +02:00
f29114f94a Fix http-ui to fit with the new DocumentsBatchBuilder/Reader structs 2022-07-12 14:52:56 +02:00
a4ceef9624 Fix the cli for the new DocumentsBatchBuilder/Reader structs 2022-07-12 14:52:56 +02:00
6d0498df24 Fix the fuzz tests 2022-07-12 14:52:56 +02:00
e8297ad27e Fix the tests for the new DocumentsBatchBuilder/Reader 2022-07-12 14:52:56 +02:00
419ce3966c Rework the DocumentsBatchBuilder/Reader to use grenad 2022-07-12 14:52:55 +02:00
eb63af1f10 Update grenad to 0.4.2 2022-07-12 14:52:55 +02:00
048e174efb Do not allocate when parsing CSV headers 2022-07-12 14:52:55 +02:00
5d79617a56 Chores: Enhance smart-crop code comments 2022-07-07 16:28:09 +02:00
ce90fc628a Merge #583
583: Use BufReader to read datasets in benchmarks r=ManyTheFish a=loiclec

## What does this PR do?
Ensure that the datasets used by the benchmarks are read efficiently by using a `BufReader`.

## Why?
Using a `BufReader` is more representative of how `meilisearch` works. It will also make performance comparisons between different branches of `milli` more  accurate.




Co-authored-by: Loïc Lecrenier <loic@meilisearch.com>
2022-07-07 08:13:07 +00:00
aae03356cb Use BufReader to read datasets in benchmarks 2022-07-06 18:20:15 +02:00
ebddfdb9a3 Merge #578
578: Bump uuid to 1.1.2 r=ManyTheFish a=Kerollmops

Just to [align the version with Meilisearch](https://github.com/meilisearch/meilisearch/pull/2584).

Co-authored-by: Kerollmops <clement@meilisearch.com>
2022-07-05 14:56:08 +00:00
eeba196053 Merge #572
572: Add reindexing benchmarks r=Kerollmops a=irevoire

With #557 coming, we should add benchmarks that measure our impact on the reindexing process.

Co-authored-by: Tamo <tamo@meilisearch.com>
2022-07-05 14:43:01 +00:00
1bfdcfc84f Bump uuid to 1.1.2 2022-07-05 16:23:36 +02:00
dd1e606f13 Merge #557
557: Fasten documents deletion and update r=Kerollmops a=irevoire

When a document deletion occurs, instead of deleting the document we mark it as deleted in the new “soft deleted” bitmap. It is then removed from the search and all the other endpoints.

I ran the benchmarks against main;
```
% ./compare.sh indexing_main_83ad1aaf.json indexing_fasten-document-deletion_abab51fb.json
group                                                                     indexing_fasten-document-deletion_abab51fb    indexing_main_83ad1aaf
-----                                                                     ------------------------------------------    ----------------------
indexing/-geo-delete-facetedNumber-facetedGeo-searchable-                 1.05      2.0±0.40ms        ? ?/sec           1.00  1904.9±190.00µs        ? ?/sec
indexing/-movies-delete-facetedString-facetedNumber-searchable-           1.00     10.3±2.64ms        ? ?/sec           961.61      9.9±0.12s        ? ?/sec
indexing/-movies-delete-facetedString-facetedNumber-searchable-nested-    1.00     15.1±3.90ms        ? ?/sec           554.63      8.4±0.12s        ? ?/sec
indexing/-songs-delete-facetedString-facetedNumber-searchable-            1.00     45.1±7.53ms        ? ?/sec           710.15     32.0±0.10s        ? ?/sec
indexing/-wiki-delete-searchable-                                         1.00    277.8±7.97ms        ? ?/sec           1946.57    540.8±3.15s        ? ?/sec
indexing/Indexing geo_point                                               1.00      12.0±0.20s        ? ?/sec           1.03      12.4±0.19s        ? ?/sec
indexing/Indexing movies in three batches                                 1.00      19.3±0.30s        ? ?/sec           1.01      19.4±0.16s        ? ?/sec
indexing/Indexing movies with default settings                            1.00      18.8±0.09s        ? ?/sec           1.00      18.9±0.10s        ? ?/sec
indexing/Indexing nested movies with default settings                     1.00      25.9±0.19s        ? ?/sec           1.00      25.9±0.12s        ? ?/sec
indexing/Indexing nested movies without any facets                        1.00      24.8±0.17s        ? ?/sec           1.00      24.8±0.18s        ? ?/sec
indexing/Indexing songs in three batches with default settings            1.00      65.9±0.96s        ? ?/sec           1.03      67.8±0.82s        ? ?/sec
indexing/Indexing songs with default settings                             1.00      58.8±1.11s        ? ?/sec           1.02      59.9±2.09s        ? ?/sec
indexing/Indexing songs without any facets                                1.00      53.4±0.72s        ? ?/sec           1.01      54.2±0.88s        ? ?/sec
indexing/Indexing songs without faceted numbers                           1.00      57.9±1.17s        ? ?/sec           1.01      58.3±1.20s        ? ?/sec
indexing/Indexing wiki                                                    1.00   1065.2±13.26s        ? ?/sec           1.00   1065.8±12.66s        ? ?/sec
indexing/Indexing wiki in three batches                                   1.00    1182.4±6.20s        ? ?/sec           1.01    1190.8±8.48s        ? ?/sec
```

Most things do not change, we lost 0.1ms on the indexing of geo point (I don’t get why), and then we are between 500 and 1900 times faster when we delete documents.


Co-authored-by: Tamo <tamo@meilisearch.com>
2022-07-05 14:14:38 +00:00
250be9fe6c put the threshold back to 10k 2022-07-05 15:57:44 +02:00
62692c171d Merge #577
577: Fix deserialisation of NDJson documents in benchmarks r=irevoire a=loiclec

Previously, the first document in the NDJson file was read over and over again. So the `geo_point` benchmark was not working properly: it only indexed one document.

Co-authored-by: Loïc Lecrenier <loic@meilisearch.com>
2022-07-05 13:54:47 +00:00
9bc7627e27 Fix deserialisation of NDJson documents in benchmarks 2022-07-05 15:51:06 +02:00
b61efd09fc Makes the internal soft deleted error a UserError 2022-07-05 15:34:45 +02:00
eaf28b0628 Apply review suggestions
Co-authored-by: Clément Renault <clement@meilisearch.com>
2022-07-05 15:30:33 +02:00
3b309f654a Fasten the document deletion
When a document deletion occurs, instead of deleting the document we mark it as deleted
in the new “soft deleted” bitmap. It is then removed from the search, and all the other
endpoints.
2022-07-05 15:30:33 +02:00
2700d8dc67 Add reindexing benchmarks 2022-07-05 14:46:46 +02:00
77c837fc1b Merge #575
575: Bump charabia r=loiclec a=irevoire

This fix #573

Co-authored-by: Tamo <tamo@meilisearch.com>
2022-07-05 11:53:57 +00:00
446439e8be bump charabia 2022-07-05 12:19:30 +02:00
c6f4775fde Merge #568
568: Fix not equal filter when field contains both number and strings r=Kerollmops a=GraDKh

Related to https://github.com/meilisearch/meilisearch/issues/2516
Looks like the issue should be moved to this repo, but I'm not sure what the right procedure for it.

Co-authored-by: Dmytro Gordon <dmytro@bigstream.co>
2022-06-28 08:46:23 +00:00
3ff03a3f5f Fix not equal filter when field contains both number and strings 2022-06-27 15:55:17 +03:00
83ad1aaf05 Merge #567
567: Bump the milli version to 0.31.1 r=curquiza a=Kerollmops



Co-authored-by: Kerollmops <clement@meilisearch.com>
2022-06-22 15:07:03 +00:00
cc48992e79 Bump the milli version to 0.31.1 2022-06-22 17:05:51 +02:00
68bb170732 Merge #566
566: Introduce the copy_to_path method on the Index r=irevoire a=Kerollmops

Meilisearch needs this method to do snapshots.

Co-authored-by: Kerollmops <clement@meilisearch.com>
2022-06-22 14:52:19 +00:00
238692a8e7 Introduce the copy_to_path method on the Index 2022-06-22 16:49:47 +02:00
290a40b7a5 Merge #564
564: Rename the limitedTo parameter into maxTotalHits r=curquiza a=Kerollmops

This PR is related to https://github.com/meilisearch/meilisearch/issues/2542, it renames the `limitedTo` parameter into `maxTotalHits`.

Co-authored-by: Kerollmops <clement@meilisearch.com>
2022-06-22 13:48:33 +00:00
d546f6f40e Merge #563
563: Improve the `estimatedNbHits` when a `distinctAttribute` is specified r=irevoire a=Kerollmops

This PR is related to https://github.com/meilisearch/meilisearch/issues/2532 but it doesn't fix it entirely. It improves it by computing the excluded documents (the ones with an already-seen distinct value) before stopping the loop, I think it was a mistake and should always have been this way.

The reason it doesn't fix the issue is that Meilisearch is lazy, just to be sure not to compute too many things and answer by taking too much time. When we deduplicate the documents by their distinct value we must do it along the water, everytime we see a new document we check that its distinct value of it doesn't collide with an already returned document. 

The reason we can see the correct result when enough documents are fetched is that we were lucky to see all of the different distinct values possible in the dataset and all of the deduplication was done, no document can be returned.

If we wanted to implement that to have a correct `extimatedNbHits` every time we should have done a pass on the whole set of possible distinct values for the distinct attribute and do a big intersection, this could cost a lot of CPU cycles.

Co-authored-by: Kerollmops <clement@meilisearch.com>
2022-06-22 12:39:44 +00:00
38a8d3cae1 Merge #565
565: Bump the milli version to 0.31.0 r=curquiza a=Kerollmops



Co-authored-by: Kerollmops <clement@meilisearch.com>
2022-06-22 10:09:41 +00:00
f5c3b951bc Bump the milli version to 0.31.0 2022-06-22 12:08:16 +02:00
d7c248042b Rename the limitedTo parameter into maxTotalHits 2022-06-22 12:00:48 +02:00
d2f84a9d9e Improve the estimatedNbHits when distinct is enabled 2022-06-22 11:39:21 +02:00
4f547eff02 Merge #560
560: Update version for next release (v0.30.0) r=curquiza a=curquiza



Co-authored-by: Clémentine Urquizar <clementine@meilisearch.com>
2022-06-20 12:37:01 +00:00
64b833410c Merge #559
559: Avoid having an ending separator before crop marker r=irevoire a=ManyTheFish

related to https://github.com/meilisearch/meilisearch/issues/2528


Co-authored-by: ManyTheFish <many@meilisearch.com>
2022-06-20 11:06:52 +00:00
31f749b5d8 Update version for next release (v0.30.0) 2022-06-20 12:09:57 +02:00
a0ab90a4d7 Avoid having an ending separator before crop marker 2022-06-16 18:23:57 +02:00
a59ae19842 Merge #558
558: Deletion benchmarks r=ManyTheFish a=ManyTheFish

Add benchmarks on the deletion and start rethinking benchmark names.

Co-authored-by: ManyTheFish <many@meilisearch.com>
2022-06-16 09:34:37 +00:00
2652310f2a Change delete benchmark names 2022-06-16 10:32:58 +02:00
adbb0ff318 Add deletion benchmarks 2022-06-16 10:17:58 +02:00
0a5d1a445e Merge #554
554: Enhance tests for soft deletetion r=irevoire a=ManyTheFish

#### tests: (skip in changelog)
- [x] placeholder search shouldn’t return soft deleted
- [x] search shouldn’t return soft deleted
- [x] filtered placeholder search shouldn’t return soft deleted
- [x] geo-filtered placeholder search shouldn’t return soft deleted
- [x] documents list/get shouldn’t return soft deleted
- [x] stats shouldn’t count soft deleted

#### other: (API breaking)
- [x] ensure that Index methods are not bypassed by Meilisearch


Poke `@irevoire,` we may merge this into your branch.

Co-authored-by: ManyTheFish <many@meilisearch.com>
2022-06-14 09:49:37 +00:00
447195a27a Replace format by to_string 2022-06-14 10:32:44 +02:00
177154828c Extends deletion tests 2022-06-13 17:34:16 +02:00
0d1d354052 Ensure that Index methods are not bypassed by Meilisearch 2022-06-13 17:34:11 +02:00
f1d848bb9a Merge #552
552: Fix escaped quotes in filter r=Kerollmops a=irevoire

Will fix https://github.com/meilisearch/meilisearch/issues/2380

The issue was that in the evaluation of the filter, I was using the deref implementation instead of calling the `value` method of my token.

To avoid the problem happening again, I removed the deref implementation; now, you need to either call the `lexeme` or the `value` methods but can't rely on a « default » implementation to get a string out of a token.

Co-authored-by: Tamo <tamo@meilisearch.com>
2022-06-09 14:56:44 +00:00
676187ba43 bump milli version 2022-06-09 16:53:32 +02:00
90afde435b fix escaped quotes in filter 2022-06-09 16:03:49 +02:00
19d44142a1 Merge #550
550: Add the two new pagination and faceting settings r=ManyTheFish a=Kerollmops

This PR adds two new settings in the database, those settings are described [in this spec](https://github.com/meilisearch/specifications/pull/157).

Co-authored-by: Kerollmops <clement@meilisearch.com>
2022-06-09 08:16:01 +00:00
445d5474cc Add the pagination_limited_to setting to the database 2022-06-08 18:14:27 +02:00
69931e50d2 Add the max_values_by_facet setting to the database 2022-06-08 17:54:56 +02:00
52a494bd3b Add the new pagination.limited_to and faceting.max_values_per_facet settings 2022-06-08 17:15:36 +02:00
9580b9de79 Merge #549
549: Bump the version to 0.29.2 r=curquiza a=Kerollmops



Co-authored-by: Kerollmops <clement@meilisearch.com>
2022-06-08 14:29:47 +00:00
a762d7f462 Merge #548
548: Setup the new limits on the number of facet values to return r=ManyTheFish a=Kerollmops

This PR implements the early draft of the new spec (waiting for it) specifying how the new facet limit feature should work and which limit we apply to the number of facet values to return by facet.

Co-authored-by: Kerollmops <clement@meilisearch.com>
2022-06-08 14:00:33 +00:00
56ee9cc21f Bump the version to 0.29.2 2022-06-08 16:00:06 +02:00
2a505503b3 Change the number of facet values returned by default to 100 2022-06-08 15:58:57 +02:00
bae4007447 Remove the hard limit on the number of facet values returned 2022-06-08 15:58:57 +02:00
7313d6c533 Merge #547
547: Update version for next release (v0.29.1) r=Kerollmops a=curquiza

A new milli version will be released once this PR is merged https://github.com/meilisearch/milli/pull/543

Co-authored-by: Clémentine Urquizar <clementine@meilisearch.com>
2022-06-08 10:20:24 +00:00
306d2f37ff Merge #543
543: Fix wrong internal ids assignments r=irevoire a=irevoire

Fix https://github.com/meilisearch/meilisearch/issues/2470

Co-authored-by: ad hoc <postma.marin@protonmail.com>
Co-authored-by: Tamo <tamo@meilisearch.com>
2022-06-08 09:19:58 +00:00
478dbfa45a Update version for next release (v0.29.1) 2022-06-07 18:59:33 +02:00
d0aaa7ff00 Fix wrong internal ids assignments 2022-06-07 15:49:33 +02:00
31776fdc3f add failing test 2022-06-07 15:49:33 +02:00
05ae6dbfa4 Merge #541
541: Update version for next release (v0.29.0) r=ManyTheFish a=curquiza

Need to update the version since #540 was merged and breaking

Co-authored-by: Clémentine Urquizar <clementine@meilisearch.com>
2022-06-02 16:53:28 +00:00
78f76c841d Merge #542
542: Refactor matching word r=Kerollmops a=ManyTheFish

Simplify MatchingWords API


Co-authored-by: ManyTheFish <many@meilisearch.com>
2022-06-02 16:23:41 +00:00
d212dc6b8b Remove useless newline 2022-06-02 18:22:56 +02:00
a5c790bf4b Update http-ui 2022-06-02 18:15:36 +02:00
6ce1c6487a Update version for next release (v0.29.0) 2022-06-02 18:07:55 +02:00
727d663f28 Update benchmarks 2022-06-02 18:07:10 +02:00
7aabe42ae0 Refactor matching words 2022-06-02 17:59:04 +02:00
dd186533f0 Merge #540
540: Integrate charabia r=Kerollmops a=ManyTheFish

related to https://github.com/meilisearch/meilisearch/issues/2375
related to https://github.com/meilisearch/meilisearch/issues/2144
related to https://github.com/meilisearch/meilisearch/issues/2417

Co-authored-by: ManyTheFish <many@meilisearch.com>
2022-06-02 15:34:33 +00:00
4dd7b20c32 Update benchmarks 2022-06-02 17:33:25 +02:00
4dd3675d2b Update http-ui 2022-06-02 16:59:11 +02:00
86ac8568e6 Use Charabia in milli 2022-06-02 16:59:11 +02:00
192e024ada Add Charabia in Cargo.toml 2022-06-02 16:59:07 +02:00
ac6df0df57 Merge #539
539: Update version to v0.28.1 r=Kerollmops a=curquiza



Co-authored-by: Clémentine Urquizar <clementine@meilisearch.com>
2022-06-01 16:40:12 +00:00
c19c17eddb Update version to v0.28.1 2022-06-01 18:31:02 +02:00
74d1914a64 Merge #535
535: Reintroduce the max values by facet limit r=ManyTheFish a=Kerollmops

This PR reintroduces the max values by facet limit this is related to https://github.com/meilisearch/meilisearch/issues/2349.

~I would like some help in deciding on whether I keep the default 100 max values in milli and set up the `FacetDistribution` settings in Meilisearch to use 1000 as the new value, I expose the `max_values_by_facet` for this purpose.~

I changed the default value to 1000 and the max to 10000, thank you `@ManyTheFish` for the help!

Co-authored-by: Kerollmops <clement@meilisearch.com>
2022-06-01 14:30:50 +00:00
582930dbbb Merge #538
538: speedup exact words r=Kerollmops a=MarinPostma

This PR make `exact_words` return an `Option` instead of an empty set, since set creation is costly, as noticed by `@kerollmops.`

I was not convinces that this was the cause for all of the performance drop we measured, and then realized that methods that initialized it were called recursively which caused initialization times to add up. While the first fix solves the issue when not using exact words, using exact word remained way more expensive that it should be. To address this issue, the exact words are cached into the `Context`, so they are only initialized once.


Co-authored-by: ad hoc <postma.marin@protonmail.com>
2022-05-30 08:20:34 +00:00
9f78e392b1 Merge #536
536: Improves ranking rules error message r=Kerollmops a=matthias-wright

This PR improves the ranking rules error message to properly reflect the case sensitivity.
The issue was highlighted in [meilisearch/issues/2407](https://github.com/meilisearch/meilisearch/issues/2407).
Cheers!

Co-authored-by: Matthias Wright <matthias.s.wright@gmail.com>
2022-05-24 16:43:52 +00:00
25fc576696 review changes 2022-05-24 14:15:33 +02:00
69dc4de80f change &Option<Set> to Option<&Set> 2022-05-24 12:14:55 +02:00
ac975cc747 cache context's exact words 2022-05-24 09:43:17 +02:00
8993fec8a3 return optional exact words 2022-05-24 09:15:49 +02:00
754f48a4fb Improves ranking rules error message 2022-05-20 21:25:43 +02:00
cd7c6e19ed Reintroduce the max values by facet limit 2022-05-18 15:57:57 +02:00
19dac01c5c Merge #534
534: Bump milli version to v0.28.0 r=curquiza a=ManyTheFish



Co-authored-by: ManyTheFish <many@meilisearch.com>
2022-05-18 09:04:46 +00:00
895f5d8a26 Bump milli version 2022-05-18 10:37:12 +02:00
3389561f34 Merge #532
532: Add some implementation on MatchBounds r=Kerollmops a=ManyTheFish

Theses Implementations are needed in meilisearch

Co-authored-by: ManyTheFish <many@meilisearch.com>
2022-05-17 14:50:22 +00:00
137434a1c8 Add some implementation on MatchBounds 2022-05-17 15:57:09 +02:00
08c6d50cd1 Merge #531
531: fix the mixed dataset geosearch indexing bug r=Kerollmops a=irevoire

port #529 to main

Co-authored-by: Tamo <tamo@meilisearch.com>
2022-05-16 16:06:36 +00:00
cf3e574cb4 Merge #530
530: fix the searchable fields bug when a field is nested r=Kerollmops a=irevoire

port #528 to main

Co-authored-by: Tamo <tamo@meilisearch.com>
2022-05-16 15:52:30 +00:00
0af399a6d7 fix the mixed dataset geosearch indexing bug 2022-05-16 17:37:45 +02:00
f586028f9a fix the searchable fields bug when a field is nested
Update milli/src/index.rs

Co-authored-by: Clément Renault <clement@meilisearch.com>
2022-05-16 17:24:36 +02:00
e1e85267fd Merge #526
526: remove useless comment r=irevoire a=MarinPostma



Co-authored-by: ad hoc <postma.marin@protonmail.com>
2022-05-16 10:01:43 +00:00
51809eb260 Merge #525
525: Simplify the error creation with thiserror r=irevoire a=irevoire

I introduced [`thiserror`](https://docs.rs/thiserror/latest/thiserror/) to implements all the `Display` trait and most of the `impl From<xxx> for yyy` in way less lines.
And then I introduced a cute macro to implements the `impl<X, Y, Z> From<X> for Z where Y: From<X>, Z: From<X>` more easily.

Co-authored-by: Tamo <tamo@meilisearch.com>
2022-05-04 15:47:32 +00:00
484a9ddb27 Simplify the error creation with thiserror and a smol friendly macro 2022-05-04 17:24:00 +02:00
65e6aa0de2 Merge #523
523: Improve geosearch error messages r=irevoire a=irevoire

Improve the geosearch error messages (#488).
And try to parse the string as specified in https://github.com/meilisearch/meilisearch/issues/2354

Co-authored-by: Tamo <tamo@meilisearch.com>
2022-05-04 13:36:11 +00:00
f3b9f7b867 Merge #527
527: Remove the wip section part of the contributing file r=curquiza a=Kerollmops

Everything was good in the _Development Workflow_ section so I removed the _WIP Section_ part, now this PR fixes https://github.com/meilisearch/milli/issues/513.

Co-authored-by: Kerollmops <clement@meilisearch.com>
2022-05-04 13:11:30 +00:00
48cdfddebf Remove the wip section part of the contributing file 2022-05-04 14:44:51 +02:00
c55368ddd4 apply code suggestion
Co-authored-by: Kerollmops <kero@meilisearch.com>
2022-05-04 14:11:03 +02:00
60ccb3fa4c Merge #524
524: Add benchmark on nested fields r=irevoire a=irevoire

fixes #500

Co-authored-by: Tamo <tamo@meilisearch.com>
2022-05-04 11:56:18 +00:00
5ad5d56f7e remove useless comment 2022-05-04 10:43:54 +02:00
0c2c8af44e Merge #520
520: fix mistake in Settings initialization r=irevoire a=MarinPostma

fix settings not being correctly initialized and add a test to make sure that they are in the future.

fix https://github.com/meilisearch/meilisearch/issues/2358


Co-authored-by: ad hoc <postma.marin@protonmail.com>
2022-05-03 15:32:18 +00:00
2fe9a02b1c Merge #522
522: Do not generate keys that are too long for LMDB r=Kerollmops a=Kerollmops

This PR fixes https://github.com/meilisearch/meilisearch/issues/2338 by making sure that we do not generate keys that are too long for LMDB especially when we are creating our prefix and proximity pairs keys.

Co-authored-by: Kerollmops <clement@meilisearch.com>
2022-05-03 11:54:10 +00:00
211c8763b9 Make sure that we do not generate too long keys 2022-05-03 10:03:15 +02:00
7e47031bdc Add a test for long keys in LMDB 2022-05-03 10:03:13 +02:00
f820c9804d add one nested benchmark 2022-05-02 19:35:57 +02:00
3cb1f6d0a1 improve geosearch error messages 2022-05-02 19:20:47 +02:00
1ee3d6ae33 fix mistake in Settings initialization 2022-04-29 16:24:25 +02:00
312515dd6b Merge #507
507: deny warnings in CI r=Kerollmops a=MarinPostma

Add `RUSTFLAGS= -D warnings` to the CI so all warnings are treated as hard errors.

Co-authored-by: ad hoc <postma.marin@protonmail.com>
2022-04-28 15:16:35 +00:00
3eb3f0269e deny warnings in CI 2022-04-28 15:35:12 +02:00
9db86aac51 Merge #518
518: Return facets even when there is no value associated to it r=Kerollmops a=Kerollmops

This PR is related to https://github.com/meilisearch/meilisearch/issues/2352 and should fix the issue when Meilisearch is up-to-date with this PR.

Co-authored-by: Kerollmops <clement@meilisearch.com>
2022-04-28 09:04:36 +00:00
2aae19dc52 Merge #517
517: Make nightly CI run every week r=Kerollmops a=curquiza



Co-authored-by: Clémentine Urquizar <clementine@meilisearch.com>
2022-04-26 16:22:25 +00:00
a4d343aade Add a test to check for the returned facet distribution 2022-04-26 18:12:58 +02:00
c2bd94c871 Merge #511
511: Update version in every workspace r=curquiza a=curquiza

Checked with `@Kerollmops` 

- Update the version into every workspace (the current version is v0.27.0, but I forgot to update it for the previous release)
- add `publish = false` except in `milli` workspace.


Co-authored-by: Clémentine Urquizar <clementine@meilisearch.com>
2022-04-26 16:06:47 +00:00
7d1c2d97bf Return facets even when there is no values associated to it 2022-04-26 17:59:53 +02:00
d388ea0f9d Merge #506
506: fix cargo warnings r=Kerollmops a=MarinPostma

fix cargo warnings


Co-authored-by: ad hoc <postma.marin@protonmail.com>
2022-04-26 15:45:20 +00:00
ec89030483 Update bors toml 2022-04-26 17:36:04 +02:00
5c29258e8e fix cargo warnings 2022-04-26 17:33:11 +02:00
2fdf520271 Merge #514
514: Stop flattening every field r=Kerollmops a=irevoire

When we need to flatten a document:
* The primary key contains a `.`.
* Some fields need to be flattened

Instead of flattening the whole object and thus creating a lot of allocations with the `serde_json_flatten_crate`, we instead generate a minimal sub-object containing only the fields that need to be flattened.
That should create fewer allocations and thus index faster.

---------

```
group                                                             indexing_main_e1e362fa                 indexing_stop-flattening-every-field_40d1bd6b
-----                                                             ----------------------                 ---------------------------------------------
indexing/Indexing geo_point                                       1.99      23.7±0.23s        ? ?/sec    1.00      11.9±0.21s        ? ?/sec
indexing/Indexing movies in three batches                         1.00      18.2±0.24s        ? ?/sec    1.01      18.3±0.29s        ? ?/sec
indexing/Indexing movies with default settings                    1.00      17.5±0.09s        ? ?/sec    1.01      17.7±0.26s        ? ?/sec
indexing/Indexing songs in three batches with default settings    1.00      64.8±0.47s        ? ?/sec    1.00      65.1±0.49s        ? ?/sec
indexing/Indexing songs with default settings                     1.00      54.9±0.99s        ? ?/sec    1.01      55.7±1.34s        ? ?/sec
indexing/Indexing songs without any facets                        1.00      50.6±0.62s        ? ?/sec    1.01      50.9±1.05s        ? ?/sec
indexing/Indexing songs without faceted numbers                   1.00      54.0±1.14s        ? ?/sec    1.01      54.7±1.13s        ? ?/sec
indexing/Indexing wiki                                            1.00     996.2±8.54s        ? ?/sec    1.02   1021.1±30.63s        ? ?/sec
indexing/Indexing wiki in three batches                           1.00    1136.8±9.72s        ? ?/sec    1.00    1138.6±6.59s        ? ?/sec
```

So basically everything slowed down a liiiiiittle bit except the dataset with a nested field which got twice faster

Co-authored-by: Tamo <tamo@meilisearch.com>
2022-04-26 11:50:33 +00:00
f19d2dc548 Only flatten the required fields
apply review comments

Co-authored-by: Kerollmops <kero@meilisearch.com>
2022-04-26 12:33:46 +02:00
5adeac8047 Merge #516
516: Fix the indexing fuzzer r=irevoire a=irevoire



Co-authored-by: Tamo <tamo@meilisearch.com>
2022-04-26 08:35:03 +00:00
7cb7643565 Make nightly CI run every week
Update CI

Fix CI
2022-04-25 18:52:27 +02:00
d138b3c704 Update version 2022-04-25 18:43:46 +02:00
fa6f495662 fix the indexing fuzzer 2022-04-25 18:32:06 +02:00
8cc86d5a8d Merge #515
515: Improve the README r=curquiza a=Kerollmops

This PR closes #512 by adding more content to the README. We listed all of the subcrates of the repository, changed the descriptions of the subcrates, and added a simple example usage in the README.

Co-authored-by: Kerollmops <clement@meilisearch.com>
Co-authored-by: Clémentine Urquizar - curqui <clementine@meilisearch.com>
2022-04-25 16:15:12 +00:00
5e562ffecf Update README.md 2022-04-25 18:14:43 +02:00
2277172f9c Update README.md 2022-04-25 18:14:39 +02:00
2db3d60259 Update README.md 2022-04-25 18:14:35 +02:00
7e19bf1c0e Add an example usage of the library in the README 2022-04-25 17:25:46 +02:00
fb192aaa9f Update the list of milli's subcrates 2022-04-25 15:55:38 +02:00
e1e362fa43 Merge #509
509: Remove pr_status from bors settings r=Kerollmops a=curquiza

Because of multiple issue we had with bors.
https://github.com/bors-ng/bors-ng/issues/1492

Co-authored-by: Clémentine Urquizar <clementine@meilisearch.com>
2022-04-25 11:45:37 +00:00
08753d002a Remove pr_status from bors settings 2022-04-25 13:39:45 +02:00
8d15ae37a1 Merge pull request #503 from meilisearch/improve-flatten-fuzzer
Improve the fuzzer of the flatten crate
2022-04-25 13:38:43 +02:00
3e53791de3 Merge pull request #508 from meilisearch/contributing
First version of new CONTRIBUTING.md
2022-04-25 13:36:41 +02:00
8010eca9c7 Merge #505
505: normalize exact words r=curquiza a=MarinPostma

Normalize the exact words, as specified in the specification.


Co-authored-by: ad hoc <postma.marin@protonmail.com>
2022-04-25 09:35:32 +00:00
dc0d4addd9 First version of new CONTRIBUTING.md 2022-04-21 19:02:22 +02:00
71414630fc Merge pull request #504 from meilisearch/test-long-words
Add a test to make sure that long words are handled
2022-04-21 16:06:13 +02:00
2e0089d5ff normalize exact words 2022-04-21 15:38:40 +02:00
3a2451fcba add test normalize exact words 2022-04-21 13:52:09 +02:00
eb5830aa40 Add a test to make sure that long words are handled 2022-04-21 13:45:28 +02:00
d81a3f4a74 improve the fuzzer of the flatten crate 2022-04-20 16:11:23 +02:00
c7d0097c97 Merge #498
498: Get rid of the threshold when comparing benchmarks r=curquiza a=irevoire

It just hides things

Co-authored-by: Tamo <tamo@meilisearch.com>
2022-04-19 14:04:11 +00:00
152a10344c Get rid of the threshold when comparing benchmarks
It just hide things
2022-04-19 15:39:58 +02:00
04eb32e539 Merge #499
499: fix min-word-len-for-typo not reset properly r=Kerollmops a=MarinPostma

fix min word len for typo not resettign properly, as reported in https://github.com/meilisearch/meilisearch/issues/2330


Co-authored-by: ad hoc <postma.marin@protonmail.com>
2022-04-19 13:22:19 +00:00
8b14090927 fix min-word-len-for-typo not reset properly 2022-04-19 15:20:16 +02:00
ea4bb9402f Merge #483
483: Enhance matching words r=Kerollmops a=ManyTheFish

# Summary

Enhance milli word-matcher making it handle match computing and cropping.

# Implementation

## Computing best matches for cropping

Before we were considering that the first match of the attribute was the best one, this was accurate when only one word was searched but was missing the target when more than one word was searched.

Now we are searching for the best matches interval to crop around, the chosen interval is the one:
1) that have the highest count of unique matches
> for example, if we have a query `split the world`, then the interval `the split the split the` has 5 matches but only 2 unique matches (1 for `split` and 1 for `the`) where the interval `split of the world` has 3 matches and 3 unique matches. So the interval `split of the world` is considered better.
2) that have the minimum distance between matches
> for example, if we have a query `split the world`, then the interval `split of the world` has a distance of 3 (2 between `split` and `the`, and 1 between `the` and `world`) where the interval `split the world` has a distance of 2. So the interval `split the world` is considered better.
3) that have the highest count of ordered matches
> for example, if we have a query `split the world`, then the interval `the world split` has 2 ordered words where the interval `split the world` has 3. So the interval `split the world` is considered better.

## Cropping around the best matches interval

Before we were cropping around the interval without checking the context.

Now we are cropping around words in the same context as matching words.
This means that we will keep words that are farther from the matching words but are in the same phrase, than words that are nearer but separated by a dot.

> For instance, for the matching word `Split` the text:
`Natalie risk her future. Split The World is a book written by Emily Henry. I never read it.`
will be cropped like:
`…. Split The World is a book written by Emily Henry. …`
and  not like:
`Natalie risk her future. Split The World is a book …`


Co-authored-by: ManyTheFish <many@meilisearch.com>
2022-04-19 11:42:32 +00:00
f1115e274f Use Copy impl of FormatOption instead of clonning 2022-04-19 10:35:50 +02:00
a68e3a79fb Merge pull request #497 from meilisearch/v0.26.1
Update version for the next release (v0.26.1)
2022-04-14 11:53:31 +02:00
8d630a6f62 Update version for the next release (v0.26.1) 2022-04-14 11:44:06 +02:00
d362278a41 Merge pull request #494 from meilisearch/flatten-what-is-needed
Only flatten the required objects
2022-04-14 11:43:28 +02:00
00f78d6b5a Apply code suggestions
Co-authored-by: Clément Renault <clement@meilisearch.com>
2022-04-14 11:14:08 +02:00
399fba16bb only flatten an object if it's nested 2022-04-14 11:14:08 +02:00
c2469b6765 create the json-depth-checker crate 2022-04-14 11:14:08 +02:00
7791ef90e7 Merge #493
493: Use smartstring to store the external id in our hashmap r=Kerollmops a=irevoire

We need to store all the external id (primary key) in a hashmap
associated to their internal id.
The smartstring remove heap allocation / memory usage and should
improve the cache locality.

I ran the benchmarks to measure the impact of this PR on the indexing time.
I think we should merge it whatever happens thought because it'll decrease the memory consumption.

---------

This improve really sliiiiiightly the performances but improve the memory usage thus it should be merged.
```
group                                                             indexing_main_6b073738                 indexing_use-smartsring_3f343511
-----                                                             ----------------------                 --------------------------------
indexing/Indexing geo_point                                       1.02      25.2±0.20s        ? ?/sec    1.00      24.8±0.13s        ? ?/sec
indexing/Indexing movies in three batches                         1.00      18.2±0.10s        ? ?/sec    1.00      18.2±0.23s        ? ?/sec
indexing/Indexing movies with default settings                    1.00      17.5±0.09s        ? ?/sec    1.01      17.7±0.11s        ? ?/sec
indexing/Indexing songs in three batches with default settings    1.00      68.3±1.01s        ? ?/sec    1.00      68.0±0.95s        ? ?/sec
indexing/Indexing songs with default settings                     1.00      63.2±0.78s        ? ?/sec    1.00      63.0±0.58s        ? ?/sec
indexing/Indexing songs without any facets                        1.02      59.6±1.00s        ? ?/sec    1.00      58.5±1.03s        ? ?/sec
indexing/Indexing songs without faceted numbers                   1.00      62.8±0.38s        ? ?/sec    1.00      62.6±1.02s        ? ?/sec
indexing/Indexing wiki                                            1.01   1009.2±25.25s        ? ?/sec    1.00    998.1±11.27s        ? ?/sec
indexing/Indexing wiki in three batches                           1.01    1142.0±9.97s        ? ?/sec    1.00   1134.4±11.21s        ? ?/sec
```

Co-authored-by: Tamo <tamo@meilisearch.com>
2022-04-13 20:28:28 +00:00
ee64f4a936 Use smartstring to store the external id in our hashmap
We need to store all the external id (primary key) in a hashmap
associated to their internal id during.
The smartstring remove heap allocation / memory usage and should
improve the cache locality.
2022-04-13 21:22:07 +02:00
456887a54a Merge #496
496: Improve the performances of the flattening subcrate r=irevoire a=Kerollmops

This PR adds some benchmarks to the _flatten-serde-json_ crate, this crate is responsible for transforming the original documents into flat versions that the engine can understand. It can probably be speed-up and this is why I added benchmarks to it.

I make some interesting performance improvements when I replaced the `json!` macro calls.

```
flatten/simple          time:   [452.44 ns 453.31 ns 454.18 ns]
                        change: [-15.036% -14.751% -14.473%] (p = 0.00 < 0.05)
                        Performance has improved.
Found 2 outliers among 100 measurements (2.00%)
  2 (2.00%) high mild

Benchmarking flatten/complex: Collecting 100 samples in estimated 5.0007 s (4.9M i                                                                                  flatten/complex         time:   [1.0101 us 1.0131 us 1.0160 us]
                        change: [-18.001% -17.775% -17.536%] (p = 0.00 < 0.05)
                        Performance has improved.
Found 6 outliers among 100 measurements (6.00%)
  5 (5.00%) high mild
  1 (1.00%) high severe
```

---

_I removed this particular commit from this PR._ The reason is that the two other commits were enough for this PR to give enough impact and be merged. We will continue to explore where we can get performances later.

But when I changed the flattening function to accept an owned version of the objects, we lost a lot of performances. Yes, I rewrote the benchmarks (locally) to clone the input object (and measured both, previous and new versions, with the cloning benchmarks). Maybe cloning the benchmark inputs is not the right thing to do...

```
Benchmarking flatten/simple: Collecting 100 samples in estimated 5.0005 s (6.7M it                                                                                  flatten/simple          time:   [746.46 ns 749.59 ns 752.70 ns]
                        change: [+40.082% +40.714% +41.347%] (p = 0.00 < 0.05)
                        Performance has regressed.

Benchmarking flatten/complex: Collecting 100 samples in estimated 5.0047 s (2.9M i                                                                                  flatten/complex         time:   [1.7311 us 1.7342 us 1.7368 us]
                        change: [+40.976% +41.398% +41.807%] (p = 0.00 < 0.05)
                        Performance has regressed.
Found 1 outliers among 100 measurements (1.00%)
  1 (1.00%) low mild
```

Co-authored-by: Kerollmops <clement@meilisearch.com>
2022-04-13 11:14:29 +00:00
b3cec1a383 Prefer using direct method calls instead of using the json macros 2022-04-13 13:12:57 +02:00
436d2032c4 Add benchmarks to the flatten-serde-json subcrate 2022-04-13 13:12:57 +02:00
3828635fb2 Merge #489
489: fix distinct count bug r=curquiza a=MarinPostma

fix https://github.com/meilisearch/meilisearch/issues/2152

I think the issue was that we didn't take off the excluded candidates from the initial candidates when returning the candidates with the search result.


Co-authored-by: ad hoc <postma.marin@protonmail.com>
2022-04-13 10:15:30 +00:00
dda28d7415 exclude excluded canditates from search result candidates 2022-04-13 12:10:35 +02:00
cd83014fff add test for disctinct nb hits 2022-04-13 12:10:35 +02:00
bbb6728d2f add distinct attributes to cli 2022-04-13 12:10:35 +02:00
49fbbacafc Merge #492
492: Add the new `Specify breaking` check to bors.toml r=curquiza a=curquiza

Should prevent this problem: https://github.com/meilisearch/milli/pull/489#issuecomment-1094988060

Co-authored-by: Clémentine Urquizar - curqui <clementine@meilisearch.com>
2022-04-13 08:59:40 +00:00
7ad582f39f Update bors.toml 2022-04-13 10:56:56 +02:00
aa896f0e7a Update bors.toml 2022-04-13 10:56:56 +02:00
0261a0e3cf Add the new Specify breaking check to bors.toml 2022-04-13 10:56:55 +02:00
5809d3ae0d Add first benchmarks on formatting 2022-04-12 16:31:58 +02:00
827cedcd15 Add format option structure 2022-04-12 13:42:14 +02:00
011f8210ed Make compute_matches more rust idiomatic 2022-04-12 10:19:02 +02:00
6b0737384b Merge #491
491: remove the unused key warning r=curquiza a=irevoire

When I copy-pasted my flatten crate I forgot to remove the key used to publish the package and that throw a warning.

Co-authored-by: Tamo <tamo@meilisearch.com>
2022-04-11 16:55:25 +00:00
e153418b8a remove the unused key warning 2022-04-11 14:52:41 +02:00
c8306616e0 Merge #490
490: Enforce labelling for the PRs r=curquiza a=curquiza

- Enforce one of the following labels to make the CI pass: `no breaking`, `DB breaking`, `API breaking` (milli API, not the Meilisearch API of course), or `skip changelog`. This new CI is now `Required` in the GitHub settings for merging a PR.
- Adapt the release drafter to these new labels
- rename `skip-changelog` into `skip changelog` according to the new label name

Co-authored-by: Clémentine Urquizar <clementine@meilisearch.com>
2022-04-11 08:24:23 +00:00
9383629d13 Enforce labelling for the PRs 2022-04-09 23:47:06 +02:00
a16de5de84 Symplify format and remove intermediate function 2022-04-08 11:20:41 +02:00
a769e09dfa Make token_crop_bounds more rust idiomatic 2022-04-07 20:15:14 +02:00
9ac2fd1c37 Merge #487
487: Update version (v0.26.0) r=Kerollmops a=curquiza

breaking because of #458 

Co-authored-by: Clémentine Urquizar <clementine@meilisearch.com>
2022-04-07 17:10:24 +00:00
80ae020bee Merge #458
458: Nested fields r=Kerollmops a=irevoire

For the following document:
```json
{
  "id": 1,
  "person": {
    "name": "tamo",
    "age": 25,
  }
}
```
Suppose the user sets `person` as a filterable attribute. We need to store `person` in the filterable _obviously_. But we also need to keep track of `person.name` and `person.age` somewhere.
That’s where I changed a little bit the logic of the engine.

Currently, we have a function called `faceted_field` that returns the union of the filterable and sortable.
I renamed this function in `user_defined_faceted_field`. And now, when we finish indexing documents, we look at all the fields and see if they « match » a `user_defined_faceted_field`.
So in our case:
- does `id` match `person`: 🔴 
- does `person.name` match `person`: 🟢 
- does `person.age` match `person`: 🟢 

And thus, we insert in the database the following faceted fields: `person, person.name, person.age`.

The good thing about that solution is that we generate everything during the indexing phase, and then during the search, we can access our field without recomputing too much globbing.

-----

Now the bad thing is that I had to create a new db.

And if that was only one db, that would be ok, but actually, I need to do the same for the:
- Displayed attributes
- Attributes to retrieve
- Attributes to highlight
- Attribute to crop

`@Kerollmops` 
Do you think there is a better way to do it?
Apart from all the code, can we have a problem because we have too many dbs?

Co-authored-by: Irevoire <tamo@meilisearch.com>
Co-authored-by: Tamo <tamo@meilisearch.com>
2022-04-07 16:26:09 +00:00
bab898ce86 move the flatten-serde-json crate inside of milli 2022-04-07 18:20:44 +02:00
c8ed1675a7 Add some documentation 2022-04-07 17:32:13 +02:00
b1905dfa24 Make split_best_frequency returns references instead of owned data 2022-04-07 17:05:44 +02:00
ab458d8840 fix tests after rebase 2022-04-07 17:00:00 +02:00
4f3ce6d9cd nested fields 2022-04-07 16:58:46 +02:00
ee1d627803 Update version (v0.26.0) 2022-04-07 15:56:10 +02:00
4ae7aea3b2 Merge #486
486: Update version (v0.25.0) r=curquiza a=curquiza

v0.25.0 will be released once #478 is merged

Co-authored-by: Clémentine Urquizar <clementine@meilisearch.com>
2022-04-06 11:40:41 +00:00
aadb0c58c9 Merge #478
478: Disable typo on attribute r=Kerollmops a=MarinPostma

disable typo on attributes


Co-authored-by: ad hoc <postma.marin@protonmail.com>
2022-04-05 23:45:35 +00:00
86249e2ae4 add missing \t in cli update display
Co-authored-by: Clément Renault <clement@meilisearch.com>
2022-04-05 21:35:06 +02:00
b799f3326b rename merge_nothing to merge_ignore_values 2022-04-05 18:44:35 +02:00
fa7d3a37c0 Make some cleaning and add comments 2022-04-05 17:48:56 +02:00
3bb1e35ada Fix match count 2022-04-05 17:48:45 +02:00
56e0edd621 Put crop markers direclty around words 2022-04-05 17:41:32 +02:00
a93cd8c61c Fix prefix highlight with special chars 2022-04-05 17:41:32 +02:00
b3f0f39106 Make some cleaning 2022-04-05 17:41:32 +02:00
6dc345bc53 Test and Fix prefix highlight 2022-04-05 17:41:32 +02:00
bd30ee97b8 Keep separators at start of the croped string 2022-04-05 17:41:32 +02:00
29c5f76d7f Use new matcher in http-ui 2022-04-05 17:41:32 +02:00
734d0899d3 Publish Matcher 2022-04-05 17:41:32 +02:00
4428cb5909 Add some tests and fix some corner cases 2022-04-05 17:41:32 +02:00
844f546a8b Add matches algorithm V1 2022-04-05 17:41:32 +02:00
3be1790803 Add crop algorithm with naive match algorithm 2022-04-05 17:41:32 +02:00
d96e72e5dc Create formater with some tests 2022-04-05 17:41:32 +02:00
201fea0fda limit extract_word_docids memory usage 2022-04-05 14:14:15 +02:00
5cfd3d8407 add exact attributes documentation 2022-04-05 14:10:22 +02:00
9eec44dd98 Update version (v0.25.0) 2022-04-05 12:06:42 +02:00
b85cd4983e remove field_id_from_position 2022-04-05 09:50:34 +02:00
dac81b2d44 add missing \n in cli settings 2022-04-05 09:48:56 +02:00
ab185a59b5 fix infos 2022-04-05 09:46:56 +02:00
59e41d98e3 add comments to integration test 2022-04-04 21:17:06 +02:00
1810927dbd rephrase exact_attributes doc 2022-04-04 21:04:49 +02:00
b7694c34f5 remove println 2022-04-04 21:00:07 +02:00
6cabd47c32 fix typo in comment 2022-04-04 20:59:20 +02:00
9963f11172 fix infos crate compilation issue 2022-04-04 20:54:03 +02:00
c8d3a09af8 add integration test for disabel typo on attributes 2022-04-04 20:54:03 +02:00
bfd81ce050 add exact atttributes to cli settings 2022-04-04 20:54:03 +02:00
6b2c2509b2 fix bug in exact search 2022-04-04 20:54:03 +02:00
56b4f5dce2 add exact prefix to query_docids 2022-04-04 20:54:03 +02:00
21ae4143b1 add exact_word_prefix to Context 2022-04-04 20:54:03 +02:00
e8f06f6c06 extract exact_word_prefix_docids 2022-04-04 20:54:03 +02:00
6dd2e4ffbd introduce exact_word_prefix database in index 2022-04-04 20:54:03 +02:00
ba0bb29cd8 refactor WordPrefixDocids to take dbs instead of indexes 2022-04-04 20:54:02 +02:00
c4c6e35352 query exact_word_docids in resolve_query_tree 2022-04-04 20:54:02 +02:00
8d46a5b0b5 extract exact word docids 2022-04-04 20:54:02 +02:00
5451c64d5d increase criteria asc desc test map size 2022-04-04 20:54:02 +02:00
0a77be4ec0 introduce exact_word_docids db 2022-04-04 20:54:02 +02:00
5f9f82757d refactor spawn_extraction_task 2022-04-04 20:54:02 +02:00
f82d4b36eb introduce exact attribute setting 2022-04-04 20:54:02 +02:00
c882d8daf0 add test for exact words 2022-04-04 20:54:01 +02:00
7e9d56a9e7 disable typos on exact words 2022-04-04 20:54:01 +02:00
900825bac0 Merge #474
474: Disable typos on exact word r=MarinPostma a=MarinPostma

This PR introduces the `exact_word` setting to disable typo tolerance on custom words.

If a user query contains a word from `exact_words`, no typo derivation will be made for that particular word.

I have chosen to store the words in a FST, to save on deserialization, and allow for fast lookups.

I had some trouble with the `serde` module, and had to rename it `serde_impl`.

## steps:
- [x] introduce new settings to register words to disable typos on
- [x] in `typos`, return exact match is the current word is part of the word to disable typos for.
- [x] update `Context` to return the exact words dictionary.
- [x] merge #473 


Co-authored-by: ad hoc <postma.marin@protonmail.com>
2022-04-04 18:39:43 +00:00
3e67d8818c fix typo in test comment 2022-04-04 20:34:23 +02:00
284d8a24e0 add intergration test for disabled typon on word 2022-04-04 20:15:51 +02:00
30a2711bac rename serde module to serde_impl module
needed because of issues with rustfmt
2022-04-04 20:10:55 +02:00
0fd55db21c fmt 2022-04-04 20:10:55 +02:00
559e46be5e fix bad rebase bug 2022-04-04 20:10:55 +02:00
8b1e5d9c6d add test for exact words 2022-04-04 20:10:55 +02:00
774fa8f065 disable typos on exact words 2022-04-04 20:10:55 +02:00
9bbffb8fee add exact words setting 2022-04-04 20:10:54 +02:00
48a5ce7434 Merge #473
473: set minimum word len for typos r=MarinPostma a=MarinPostma

this PR allows the configuration on the minimum word length for typos.

The default values are the same as previously.

## steps
- [x] introduce settings for the minimum word length for 1 and 2 typos
- [x] update the settings update flow to set this setting
- [x] create a structure `TypoConfig` to configure typo tolerance in the query builder
- [x] in `typo`, use the configuration to create the appropriate query tree node.
- [x] extend `Context` to return the setting for minimum word length for typos
- [x] return correct error message for wrong settings.
- [x] merge #469 

Co-authored-by: ad hoc <postma.marin@protonmail.com>
2022-04-04 17:53:14 +00:00
6bf9824fec Merge #485
485: fix bug on 2 typos derivation r=Kerollmops a=MarinPostma

I found a bug while working on #473. This pr fixes it and add the missing tests on word derivations.


Co-authored-by: ad hoc <postma.marin@protonmail.com>
2022-04-04 17:17:53 +00:00
853b4a520f fmt 2022-04-04 10:41:46 +02:00
2cb71dff4a add typo integration tests 2022-04-04 10:41:46 +02:00
1941072bb2 implement Copy on Setting 2022-04-04 10:41:46 +02:00
fdaf45aab2 replace hardcoded value with constant in TestContext 2022-04-04 10:41:46 +02:00
950a740bd4 refactor typos for readability 2022-04-04 10:41:46 +02:00
66020cd923 rename min_word_len* to use plain letter numbers 2022-04-04 10:41:46 +02:00
4c4b336ecb rename min word len for typo error 2022-04-01 11:17:03 +02:00
286dd7b2e4 rename min_word_len_2_typo 2022-04-01 11:17:03 +02:00
55af85db3c add tests for min_word_len_for_typo 2022-04-01 11:17:02 +02:00
9102de5500 fix error message 2022-04-01 11:17:02 +02:00
a1a3a49bc9 dynamic minimum word len for typos in query tree builder 2022-04-01 11:17:02 +02:00
5a24e60572 introduce word len for typo setting 2022-04-01 11:17:02 +02:00
9fe40df960 add word derivations tests 2022-04-01 11:05:18 +02:00
d5ddc6b080 fix 2 typos word derivation bug 2022-04-01 10:51:22 +02:00
d2d930dd3f Merge #469
469: add authorize typo setting r=Kerollmops a=MarinPostma

This PR adds support for an authorize typo settings. This makes is possible to disable typos for a whole index. Typos are enabled by default.


Co-authored-by: ad hoc <postma.marin@protonmail.com>
2022-03-31 15:18:08 +00:00
3e34981d9b add test for authorize_typos in update 2022-03-31 14:12:00 +02:00
6ef3bb9d83 fmt 2022-03-31 14:06:23 +02:00
f782fe2062 add authorize_typo_test 2022-03-31 10:08:39 +02:00
c4653347fd add authorize typo setting 2022-03-31 10:05:44 +02:00
d8dd357326 Merge #480
480: Increase benchmarks (push) CI timeout r=Kerollmops a=Kerollmops

This PR fixes the fact that the benchmarks CI on push were [canceled by GitHub](https://github.com/meilisearch/milli/actions/runs/2028844132) because they reached the default timeout of 6h. This PR changes the timeout to 72h, the same setting as the manually triggered benchmark one.

Co-authored-by: Kerollmops <clement@meilisearch.com>
2022-03-29 18:13:31 +00:00
6a77c81a28 Increase benchmarks (push) CI timeout 2022-03-29 09:45:36 -07:00
e10c26e70d Merge #479
479: Update version (v0.24.1) r=Kerollmops a=curquiza

From v0.23.1 to v0.24.1 since we had an issue with the versionning for the previous release

Co-authored-by: Clémentine Urquizar <clementine@meilisearch.com>
2022-03-24 20:12:37 +00:00
ddf78a735b Update version (v0.24.1) 2022-03-24 16:39:45 +01:00
2c7cafbf20 Merge #475
475: Bump tokenizer r=Kerollmops a=irevoire

This PR bump the tokenizer in v0.2.9 which fixes an issue we had with lindera where reqwest was used with openssl (which was breaking our benchmarks).

Co-authored-by: Irevoire <tamo@meilisearch.com>
2022-03-23 13:26:44 +00:00
86dd88698d bump tokenizer 2022-03-23 14:25:58 +01:00
b82f46e862 Merge #476
476: Rollback meilisearch-tokenizer version r=Kerollmops a=irevoire

Lindera often fails to download some data from google drive we can’t compile consistently meilisearch / milli.
We can’t bump to the latest version (that moved out of google drive) either because lindera uses reqwest with openssl with no way of configuring it our benchmarks were not able to run. The latter issue should be fixed by https://github.com/lindera-morphology/lindera/pull/164.

Co-authored-by: Irevoire <tamo@meilisearch.com>
2022-03-22 14:02:00 +00:00
5dc464b9a7 rollback meilisearch-tokenizer version 2022-03-21 17:29:10 +01:00
90276d9a2d Merge #472
472: Remove useless variables in proximity r=Kerollmops a=ManyTheFish

Was passing by plane sweep algorithm to find some inspiration, and I discover that we have useless variables that were not detected because of the recursive function.

Co-authored-by: ManyTheFish <many@meilisearch.com>
2022-03-16 15:33:11 +00:00
49d59d88c2 Remove useless variables in proximity 2022-03-16 16:12:52 +01:00
5863afa1a5 Merge #468
468: Add a new error message when the filterableAttributes are empty r=Kerollmops a=brunoocasali

Fixes https://github.com/meilisearch/meilisearch/issues/2140

Is there a good way to reduce de duplication here? Maybe adding a shared function? I don't know the best and idiomatic way to do that, I appreciate any tip!

Another doubt is related to the duplication of the calling:

```rs
// filter.rs:373
FilterError::AttributeNotFilterable {
    attribute,
    filterable: filterable_fields.into_iter().collect::<Vec<_>>().join(" "),
},
```

and

```rs
// filter.rs:424
return Err(point[0].as_external_error(FilterError::AttributeNotFilterable {
    attribute: "_geo",
    filterable: filterable_fields.into_iter().collect::<Vec<_>>().join(" "),
}))?;
```

I think we could make the `filterable_fields.into_iter().collect::<Vec<_>>().join(" ")` directly into the error handling like the sortable error. I made it into the last commit, if this is something to avoid, let me know and I can remove it :)

Co-authored-by: Bruno Casali <brunoocasali@gmail.com>
2022-03-16 15:02:19 +00:00
adc71742c8 Move string concat to the struct instead of in the calling 2022-03-16 10:26:12 -03:00
cb6b6915a4 Merge #470
470: Set the cargo crate resolver to v2 r=Kerollmops a=MarinPostma

This PR updates the workspace resolver to v2. This should fix [the benchmarks](https://github.com/meilisearch/milli/runs/5558347765?check_suite_focus=true#step:8:184).


Co-authored-by: ad hoc <postma.marin@protonmail.com>
2022-03-16 10:55:22 +00:00
2a31cd13c9 set resolver to v2 2022-03-16 11:47:27 +01:00
4822fe1beb Add a better error message when the filterable attrs are empty
Fixes https://github.com/meilisearch/meilisearch/issues/2140
2022-03-15 18:13:59 -03:00
f04ab67083 Merge #466
466: Bump version to 0.23.1 r=curquiza a=Kerollmops

This PR bumps the crate versions to 0.23.1. Nothing seems to be breaking in the next release.

Co-authored-by: Kerollmops <clement@meilisearch.com>
2022-03-15 17:19:05 +00:00
ad4c982c68 Merge #439
439: Optimize typo criterion r=Kerollmops a=MarinPostma

This pr implements a couple of optimization for the typo criterion:

- clamp max typo on concatenated query words to 1: By considering that a concatenated query word is a typo, we clamp the max number of typos allowed o it to 1. This is useful because we noticed that concatenated query words often introduced words with 2 typos in queries that otherwise didn't allow for 2 typo words.

- Make typos on the first letter count for 2. This change is a big performance gain: by considering the typos on the first letter to count as 2 typos, we drastically restrict the search space for 1 typo, and if we reach 2 typos, the search space is reduced as well, as we only consider: (2 typos ∩ correct first letter) ∪ (wrong first letter ∩ 1 typo) instead of 2 typos anywhere in the word.

## benches
```
group                                                                                                    main                                   typo
-----                                                                                                    ----                                   ----
smol-songs.csv: asc + default/Notstandskomitee                                                           2.51      5.8±0.01ms        ? ?/sec    1.00      2.3±0.01ms        ? ?/sec
smol-songs.csv: asc + default/charles                                                                    2.48      3.0±0.01ms        ? ?/sec    1.00   1190.9±1.29µs        ? ?/sec
smol-songs.csv: asc + default/charles mingus                                                             5.56     10.8±0.01ms        ? ?/sec    1.00   1935.3±1.00µs        ? ?/sec
smol-songs.csv: asc + default/david                                                                      1.65      3.9±0.00ms        ? ?/sec    1.00      2.4±0.01ms        ? ?/sec
smol-songs.csv: asc + default/david bowie                                                                3.34     12.5±0.02ms        ? ?/sec    1.00      3.7±0.00ms        ? ?/sec
smol-songs.csv: asc + default/john                                                                       1.00   1849.7±3.74µs        ? ?/sec    1.01   1875.1±4.65µs        ? ?/sec
smol-songs.csv: asc + default/marcus miller                                                              4.32     15.7±0.01ms        ? ?/sec    1.00      3.6±0.01ms        ? ?/sec
smol-songs.csv: asc + default/michael jackson                                                            3.31     12.5±0.01ms        ? ?/sec    1.00      3.8±0.00ms        ? ?/sec
smol-songs.csv: asc + default/tamo                                                                       1.05    565.4±0.86µs        ? ?/sec    1.00    539.3±1.22µs        ? ?/sec
smol-songs.csv: asc + default/thelonious monk                                                            3.49     11.5±0.01ms        ? ?/sec    1.00      3.3±0.00ms        ? ?/sec
smol-songs.csv: asc/Notstandskomitee                                                                     2.59      5.6±0.02ms        ? ?/sec    1.00      2.2±0.01ms        ? ?/sec
smol-songs.csv: asc/charles                                                                              6.05      2.1±0.00ms        ? ?/sec    1.00    347.8±0.60µs        ? ?/sec
smol-songs.csv: asc/charles mingus                                                                       14.46     9.4±0.01ms        ? ?/sec    1.00    649.2±0.97µs        ? ?/sec
smol-songs.csv: asc/david                                                                                3.87      2.4±0.00ms        ? ?/sec    1.00    618.2±0.69µs        ? ?/sec
smol-songs.csv: asc/david bowie                                                                          10.14     9.8±0.01ms        ? ?/sec    1.00    970.8±1.55µs        ? ?/sec
smol-songs.csv: asc/john                                                                                 1.00    546.5±1.10µs        ? ?/sec    1.00    547.1±2.11µs        ? ?/sec
smol-songs.csv: asc/marcus miller                                                                        11.45    10.4±0.06ms        ? ?/sec    1.00    907.9±1.37µs        ? ?/sec
smol-songs.csv: asc/michael jackson                                                                      10.56     9.7±0.01ms        ? ?/sec    1.00    919.6±1.03µs        ? ?/sec
smol-songs.csv: asc/tamo                                                                                 1.03     43.3±0.18µs        ? ?/sec    1.00     42.2±0.23µs        ? ?/sec
smol-songs.csv: asc/thelonious monk                                                                      4.16     10.7±0.02ms        ? ?/sec    1.00      2.6±0.00ms        ? ?/sec
smol-songs.csv: basic filter: <=/Notstandskomitee                                                        1.00     95.7±0.20µs        ? ?/sec    1.15   109.6±10.40µs        ? ?/sec
smol-songs.csv: basic filter: <=/charles                                                                 1.00     27.8±0.15µs        ? ?/sec    1.01     27.9±0.18µs        ? ?/sec
smol-songs.csv: basic filter: <=/charles mingus                                                          1.72    119.2±0.67µs        ? ?/sec    1.00     69.1±0.13µs        ? ?/sec
smol-songs.csv: basic filter: <=/david                                                                   1.00     22.3±0.33µs        ? ?/sec    1.05     23.4±0.19µs        ? ?/sec
smol-songs.csv: basic filter: <=/david bowie                                                             1.59     86.9±0.79µs        ? ?/sec    1.00     54.5±0.31µs        ? ?/sec
smol-songs.csv: basic filter: <=/john                                                                    1.00     17.9±0.06µs        ? ?/sec    1.06     18.9±0.15µs        ? ?/sec
smol-songs.csv: basic filter: <=/marcus miller                                                           1.65    102.7±1.63µs        ? ?/sec    1.00     62.3±0.18µs        ? ?/sec
smol-songs.csv: basic filter: <=/michael jackson                                                         1.76    128.2±1.85µs        ? ?/sec    1.00     72.9±0.19µs        ? ?/sec
smol-songs.csv: basic filter: <=/tamo                                                                    1.00     17.9±0.13µs        ? ?/sec    1.05     18.7±0.20µs        ? ?/sec
smol-songs.csv: basic filter: <=/thelonious monk                                                         1.53    157.5±2.38µs        ? ?/sec    1.00    102.8±0.88µs        ? ?/sec
smol-songs.csv: basic filter: TO/Notstandskomitee                                                        1.00    100.9±4.36µs        ? ?/sec    1.04    105.0±8.25µs        ? ?/sec
smol-songs.csv: basic filter: TO/charles                                                                 1.00     28.4±0.36µs        ? ?/sec    1.03     29.4±0.33µs        ? ?/sec
smol-songs.csv: basic filter: TO/charles mingus                                                          1.71    118.1±1.08µs        ? ?/sec    1.00     68.9±0.26µs        ? ?/sec
smol-songs.csv: basic filter: TO/david                                                                   1.00     24.0±0.26µs        ? ?/sec    1.03     24.6±0.43µs        ? ?/sec
smol-songs.csv: basic filter: TO/david bowie                                                             1.72     95.2±0.30µs        ? ?/sec    1.00     55.2±0.14µs        ? ?/sec
smol-songs.csv: basic filter: TO/john                                                                    1.00     18.8±0.09µs        ? ?/sec    1.06     19.8±0.17µs        ? ?/sec
smol-songs.csv: basic filter: TO/marcus miller                                                           1.61    102.4±1.65µs        ? ?/sec    1.00     63.4±0.24µs        ? ?/sec
smol-songs.csv: basic filter: TO/michael jackson                                                         1.77    132.1±1.41µs        ? ?/sec    1.00     74.5±0.59µs        ? ?/sec
smol-songs.csv: basic filter: TO/tamo                                                                    1.00     18.2±0.14µs        ? ?/sec    1.05     19.2±0.46µs        ? ?/sec
smol-songs.csv: basic filter: TO/thelonious monk                                                         1.49    150.8±1.92µs        ? ?/sec    1.00    101.3±0.44µs        ? ?/sec
smol-songs.csv: basic placeholder/                                                                       1.00     27.3±0.07µs        ? ?/sec    1.03     28.0±0.05µs        ? ?/sec
smol-songs.csv: basic with quote/"Notstandskomitee"                                                      1.00    122.4±0.17µs        ? ?/sec    1.03    125.6±0.16µs        ? ?/sec
smol-songs.csv: basic with quote/"charles"                                                               1.00     88.8±0.30µs        ? ?/sec    1.00     88.4±0.15µs        ? ?/sec
smol-songs.csv: basic with quote/"charles" "mingus"                                                      1.00    685.2±0.74µs        ? ?/sec    1.01    689.4±6.07µs        ? ?/sec
smol-songs.csv: basic with quote/"david"                                                                 1.00    161.6±0.42µs        ? ?/sec    1.01    162.6±0.17µs        ? ?/sec
smol-songs.csv: basic with quote/"david" "bowie"                                                         1.00    731.7±0.73µs        ? ?/sec    1.02    743.1±0.77µs        ? ?/sec
smol-songs.csv: basic with quote/"john"                                                                  1.00    267.1±0.33µs        ? ?/sec    1.01    270.9±0.33µs        ? ?/sec
smol-songs.csv: basic with quote/"marcus" "miller"                                                       1.00    138.7±0.31µs        ? ?/sec    1.02    140.9±0.13µs        ? ?/sec
smol-songs.csv: basic with quote/"michael" "jackson"                                                     1.01    841.4±0.72µs        ? ?/sec    1.00    833.8±0.92µs        ? ?/sec
smol-songs.csv: basic with quote/"tamo"                                                                  1.01    189.2±0.26µs        ? ?/sec    1.00    188.2±0.71µs        ? ?/sec
smol-songs.csv: basic with quote/"thelonious" "monk"                                                     1.00   1100.5±1.36µs        ? ?/sec    1.01   1111.7±2.17µs        ? ?/sec
smol-songs.csv: basic without quote/Notstandskomitee                                                     3.40      7.9±0.02ms        ? ?/sec    1.00      2.3±0.02ms        ? ?/sec
smol-songs.csv: basic without quote/charles                                                              2.57    494.4±0.89µs        ? ?/sec    1.00    192.5±0.18µs        ? ?/sec
smol-songs.csv: basic without quote/charles mingus                                                       1.29      2.8±0.02ms        ? ?/sec    1.00      2.1±0.01ms        ? ?/sec
smol-songs.csv: basic without quote/david                                                                1.95    623.8±0.90µs        ? ?/sec    1.00    319.2±1.22µs        ? ?/sec
smol-songs.csv: basic without quote/david bowie                                                          1.12      5.9±0.00ms        ? ?/sec    1.00      5.2±0.00ms        ? ?/sec
smol-songs.csv: basic without quote/john                                                                 1.24   1340.9±2.25µs        ? ?/sec    1.00   1084.7±7.76µs        ? ?/sec
smol-songs.csv: basic without quote/marcus miller                                                        7.97     14.6±0.01ms        ? ?/sec    1.00   1826.0±6.84µs        ? ?/sec
smol-songs.csv: basic without quote/michael jackson                                                      1.19      3.9±0.00ms        ? ?/sec    1.00      3.3±0.00ms        ? ?/sec
smol-songs.csv: basic without quote/tamo                                                                 1.65    737.7±3.58µs        ? ?/sec    1.00    446.7±0.51µs        ? ?/sec
smol-songs.csv: basic without quote/thelonious monk                                                      1.16      4.5±0.02ms        ? ?/sec    1.00      3.9±0.04ms        ? ?/sec
smol-songs.csv: big filter/Notstandskomitee                                                              3.27      7.6±0.02ms        ? ?/sec    1.00      2.3±0.01ms        ? ?/sec
smol-songs.csv: big filter/charles                                                                       8.26   1957.5±1.37µs        ? ?/sec    1.00    236.8±0.34µs        ? ?/sec
smol-songs.csv: big filter/charles mingus                                                                18.49    11.2±0.06ms        ? ?/sec    1.00    607.7±3.03µs        ? ?/sec
smol-songs.csv: big filter/david                                                                         3.78      2.4±0.00ms        ? ?/sec    1.00    622.8±0.80µs        ? ?/sec
smol-songs.csv: big filter/david bowie                                                                   9.00     12.0±0.01ms        ? ?/sec    1.00   1336.0±3.17µs        ? ?/sec
smol-songs.csv: big filter/john                                                                          1.00    554.2±0.95µs        ? ?/sec    1.01    560.4±0.79µs        ? ?/sec
smol-songs.csv: big filter/marcus miller                                                                 18.09    12.0±0.01ms        ? ?/sec    1.00    664.7±0.60µs        ? ?/sec
smol-songs.csv: big filter/michael jackson                                                               8.43     12.0±0.01ms        ? ?/sec    1.00   1421.6±1.37µs        ? ?/sec
smol-songs.csv: big filter/tamo                                                                          1.00     86.3±0.14µs        ? ?/sec    1.01     87.3±0.21µs        ? ?/sec
smol-songs.csv: big filter/thelonious monk                                                               5.55     14.3±0.02ms        ? ?/sec    1.00      2.6±0.01ms        ? ?/sec
smol-songs.csv: desc + default/Notstandskomitee                                                          2.52      5.8±0.01ms        ? ?/sec    1.00      2.3±0.01ms        ? ?/sec
smol-songs.csv: desc + default/charles                                                                   3.04      2.7±0.01ms        ? ?/sec    1.00    893.4±1.08µs        ? ?/sec
smol-songs.csv: desc + default/charles mingus                                                            6.77     10.3±0.01ms        ? ?/sec    1.00   1520.8±1.90µs        ? ?/sec
smol-songs.csv: desc + default/david                                                                     1.39      5.7±0.00ms        ? ?/sec    1.00      4.1±0.00ms        ? ?/sec
smol-songs.csv: desc + default/david bowie                                                               2.34     15.8±0.02ms        ? ?/sec    1.00      6.7±0.01ms        ? ?/sec
smol-songs.csv: desc + default/john                                                                      1.00      2.5±0.00ms        ? ?/sec    1.02      2.6±0.01ms        ? ?/sec
smol-songs.csv: desc + default/marcus miller                                                             5.06     14.5±0.02ms        ? ?/sec    1.00      2.9±0.01ms        ? ?/sec
smol-songs.csv: desc + default/michael jackson                                                           2.64     14.1±0.05ms        ? ?/sec    1.00      5.4±0.00ms        ? ?/sec
smol-songs.csv: desc + default/tamo                                                                      1.00    567.0±0.65µs        ? ?/sec    1.00    565.7±0.97µs        ? ?/sec
smol-songs.csv: desc + default/thelonious monk                                                           3.55     11.6±0.02ms        ? ?/sec    1.00      3.3±0.00ms        ? ?/sec
smol-songs.csv: desc/Notstandskomitee                                                                    2.58      5.6±0.02ms        ? ?/sec    1.00      2.2±0.02ms        ? ?/sec
smol-songs.csv: desc/charles                                                                             6.04      2.1±0.00ms        ? ?/sec    1.00    348.1±0.57µs        ? ?/sec
smol-songs.csv: desc/charles mingus                                                                      14.51     9.4±0.01ms        ? ?/sec    1.00    646.7±0.99µs        ? ?/sec
smol-songs.csv: desc/david                                                                               3.86      2.4±0.00ms        ? ?/sec    1.00    620.7±2.46µs        ? ?/sec
smol-songs.csv: desc/david bowie                                                                         10.10     9.8±0.01ms        ? ?/sec    1.00    973.9±3.31µs        ? ?/sec
smol-songs.csv: desc/john                                                                                1.00    545.5±0.78µs        ? ?/sec    1.00    547.2±0.48µs        ? ?/sec
smol-songs.csv: desc/marcus miller                                                                       11.39    10.3±0.01ms        ? ?/sec    1.00    903.7±0.95µs        ? ?/sec
smol-songs.csv: desc/michael jackson                                                                     10.51     9.7±0.01ms        ? ?/sec    1.00    924.7±2.02µs        ? ?/sec
smol-songs.csv: desc/tamo                                                                                1.01     43.2±0.33µs        ? ?/sec    1.00     42.6±0.35µs        ? ?/sec
smol-songs.csv: desc/thelonious monk                                                                     4.19     10.8±0.03ms        ? ?/sec    1.00      2.6±0.00ms        ? ?/sec
smol-songs.csv: prefix search/a                                                                          1.00   1008.7±1.00µs        ? ?/sec    1.00   1005.5±0.91µs        ? ?/sec
smol-songs.csv: prefix search/b                                                                          1.00    885.0±0.70µs        ? ?/sec    1.01    890.6±1.11µs        ? ?/sec
smol-songs.csv: prefix search/i                                                                          1.00   1051.8±1.25µs        ? ?/sec    1.00   1056.6±4.12µs        ? ?/sec
smol-songs.csv: prefix search/s                                                                          1.00    724.7±1.77µs        ? ?/sec    1.00    721.6±0.59µs        ? ?/sec
smol-songs.csv: prefix search/x                                                                          1.01    212.4±0.21µs        ? ?/sec    1.00    210.9±0.38µs        ? ?/sec
smol-songs.csv: proximity/7000 Danses Un Jour Dans Notre Vie                                             18.55    48.5±0.09ms        ? ?/sec    1.00      2.6±0.03ms        ? ?/sec
smol-songs.csv: proximity/The Disneyland Sing-Along Chorus                                               8.41     56.7±0.45ms        ? ?/sec    1.00      6.7±0.05ms        ? ?/sec
smol-songs.csv: proximity/Under Great Northern Lights                                                    15.74    38.9±0.14ms        ? ?/sec    1.00      2.5±0.00ms        ? ?/sec
smol-songs.csv: proximity/black saint sinner lady                                                        11.82    40.1±0.13ms        ? ?/sec    1.00      3.4±0.02ms        ? ?/sec
smol-songs.csv: proximity/les dangeureuses 1960                                                          6.90     26.1±0.13ms        ? ?/sec    1.00      3.8±0.04ms        ? ?/sec
smol-songs.csv: typo/Arethla Franklin                                                                    14.93     5.8±0.01ms        ? ?/sec    1.00    390.1±1.89µs        ? ?/sec
smol-songs.csv: typo/Disnaylande                                                                         3.18      7.3±0.01ms        ? ?/sec    1.00      2.3±0.00ms        ? ?/sec
smol-songs.csv: typo/dire straights                                                                      5.55     15.2±0.02ms        ? ?/sec    1.00      2.7±0.00ms        ? ?/sec
smol-songs.csv: typo/fear of the duck                                                                    28.03    20.0±0.03ms        ? ?/sec    1.00    713.3±1.54µs        ? ?/sec
smol-songs.csv: typo/indochie                                                                            19.25  1851.4±2.38µs        ? ?/sec    1.00     96.2±0.13µs        ? ?/sec
smol-songs.csv: typo/indochien                                                                           14.66  1887.7±3.18µs        ? ?/sec    1.00    128.8±0.18µs        ? ?/sec
smol-songs.csv: typo/klub des loopers                                                                    37.73    18.0±0.02ms        ? ?/sec    1.00    476.7±0.73µs        ? ?/sec
smol-songs.csv: typo/michel depech                                                                       10.17     5.8±0.01ms        ? ?/sec    1.00    565.8±1.16µs        ? ?/sec
smol-songs.csv: typo/mongus                                                                              15.33  1897.4±3.44µs        ? ?/sec    1.00    123.8±0.13µs        ? ?/sec
smol-songs.csv: typo/stromal                                                                             14.63  1859.3±2.40µs        ? ?/sec    1.00    127.1±0.29µs        ? ?/sec
smol-songs.csv: typo/the white striper                                                                   10.83     9.4±0.01ms        ? ?/sec    1.00    866.0±0.98µs        ? ?/sec
smol-songs.csv: typo/thelonius monk                                                                      14.40     3.8±0.00ms        ? ?/sec    1.00    261.5±1.30µs        ? ?/sec
smol-songs.csv: words/7000 Danses / Le Baiser / je me trompe de mots                                     5.54     70.8±0.09ms        ? ?/sec    1.00     12.8±0.03ms        ? ?/sec
smol-songs.csv: words/Bring Your Daughter To The Slaughter but now this is not part of the title         3.48    119.8±0.14ms        ? ?/sec    1.00     34.4±0.04ms        ? ?/sec
smol-songs.csv: words/The Disneyland Children's Sing-Alone song                                          8.98     71.9±0.12ms        ? ?/sec    1.00      8.0±0.01ms        ? ?/sec
smol-songs.csv: words/les liaisons dangeureuses 1793                                                     11.88    37.4±0.07ms        ? ?/sec    1.00      3.1±0.01ms        ? ?/sec
smol-songs.csv: words/seven nation mummy                                                                 22.86    23.4±0.04ms        ? ?/sec    1.00   1024.8±1.57µs        ? ?/sec
smol-songs.csv: words/the black saint and the sinner lady and the good doggo                             2.76    124.4±0.15ms        ? ?/sec    1.00     45.1±0.09ms        ? ?/sec
smol-songs.csv: words/whathavenotnsuchforth and a good amount of words to pop to match the first one     2.52    107.0±0.23ms        ? ?/sec    1.00     42.4±0.66ms        ? ?/sec

group                                                                                    main-wiki                              typo-wiki
-----                                                                                    ---------                              ---------
smol-wiki-articles.csv: basic placeholder/                                               1.02     13.7±0.02µs        ? ?/sec    1.00     13.4±0.03µs        ? ?/sec
smol-wiki-articles.csv: basic with quote/"film"                                          1.02    409.8±0.67µs        ? ?/sec    1.00    402.6±0.48µs        ? ?/sec
smol-wiki-articles.csv: basic with quote/"france"                                        1.00    325.9±0.91µs        ? ?/sec    1.00    326.4±0.49µs        ? ?/sec
smol-wiki-articles.csv: basic with quote/"japan"                                         1.00    218.4±0.26µs        ? ?/sec    1.01    220.5±0.20µs        ? ?/sec
smol-wiki-articles.csv: basic with quote/"machine"                                       1.00    143.0±0.12µs        ? ?/sec    1.04    148.8±0.21µs        ? ?/sec
smol-wiki-articles.csv: basic with quote/"miles" "davis"                                 1.00     11.7±0.06ms        ? ?/sec    1.00     11.8±0.01ms        ? ?/sec
smol-wiki-articles.csv: basic with quote/"mingus"                                        1.00      4.4±0.03ms        ? ?/sec    1.00      4.4±0.00ms        ? ?/sec
smol-wiki-articles.csv: basic with quote/"rock" "and" "roll"                             1.00     43.5±0.08ms        ? ?/sec    1.01     43.8±0.06ms        ? ?/sec
smol-wiki-articles.csv: basic with quote/"spain"                                         1.00    137.3±0.35µs        ? ?/sec    1.05    144.4±0.23µs        ? ?/sec
smol-wiki-articles.csv: basic without quote/film                                         1.00    125.3±0.30µs        ? ?/sec    1.06    133.1±0.37µs        ? ?/sec
smol-wiki-articles.csv: basic without quote/france                                       1.21   1782.6±1.65µs        ? ?/sec    1.00   1477.0±1.39µs        ? ?/sec
smol-wiki-articles.csv: basic without quote/japan                                        1.28   1363.9±0.80µs        ? ?/sec    1.00   1064.3±1.79µs        ? ?/sec
smol-wiki-articles.csv: basic without quote/machine                                      1.73    760.3±0.81µs        ? ?/sec    1.00    439.6±0.75µs        ? ?/sec
smol-wiki-articles.csv: basic without quote/miles davis                                  1.03     17.0±0.03ms        ? ?/sec    1.00     16.5±0.02ms        ? ?/sec
smol-wiki-articles.csv: basic without quote/mingus                                       1.07      5.3±0.01ms        ? ?/sec    1.00      5.0±0.00ms        ? ?/sec
smol-wiki-articles.csv: basic without quote/rock and roll                                1.01     63.9±0.18ms        ? ?/sec    1.00     63.0±0.07ms        ? ?/sec
smol-wiki-articles.csv: basic without quote/spain                                        2.07    667.4±0.93µs        ? ?/sec    1.00    322.8±0.29µs        ? ?/sec
smol-wiki-articles.csv: prefix search/c                                                  1.00    343.1±0.47µs        ? ?/sec    1.00    344.0±0.34µs        ? ?/sec
smol-wiki-articles.csv: prefix search/g                                                  1.00    374.4±3.42µs        ? ?/sec    1.00    374.1±0.44µs        ? ?/sec
smol-wiki-articles.csv: prefix search/j                                                  1.00    359.9±0.31µs        ? ?/sec    1.00    361.2±0.79µs        ? ?/sec
smol-wiki-articles.csv: prefix search/q                                                  1.01    102.0±0.12µs        ? ?/sec    1.00    101.4±0.32µs        ? ?/sec
smol-wiki-articles.csv: prefix search/t                                                  1.00    536.7±1.39µs        ? ?/sec    1.00    534.3±0.84µs        ? ?/sec
smol-wiki-articles.csv: prefix search/x                                                  1.00    400.9±1.00µs        ? ?/sec    1.00    399.5±0.45µs        ? ?/sec
smol-wiki-articles.csv: proximity/april paris                                            3.86     14.4±0.01ms        ? ?/sec    1.00      3.7±0.01ms        ? ?/sec
smol-wiki-articles.csv: proximity/diesel engine                                          12.98    10.4±0.01ms        ? ?/sec    1.00    803.5±1.13µs        ? ?/sec
smol-wiki-articles.csv: proximity/herald sings                                           1.00     12.7±0.06ms        ? ?/sec    5.29     67.1±0.09ms        ? ?/sec
smol-wiki-articles.csv: proximity/tea two                                                6.48   1452.1±2.78µs        ? ?/sec    1.00    224.1±0.38µs        ? ?/sec
smol-wiki-articles.csv: typo/Disnaylande                                                 3.89      8.5±0.01ms        ? ?/sec    1.00      2.2±0.01ms        ? ?/sec
smol-wiki-articles.csv: typo/aritmetric                                                  3.78     10.3±0.01ms        ? ?/sec    1.00      2.7±0.00ms        ? ?/sec
smol-wiki-articles.csv: typo/linax                                                       8.91   1426.7±0.97µs        ? ?/sec    1.00    160.1±0.18µs        ? ?/sec
smol-wiki-articles.csv: typo/migrosoft                                                   7.48   1417.3±5.84µs        ? ?/sec    1.00    189.5±0.88µs        ? ?/sec
smol-wiki-articles.csv: typo/nympalidea                                                  3.96      7.2±0.01ms        ? ?/sec    1.00   1810.1±2.03µs        ? ?/sec
smol-wiki-articles.csv: typo/phytogropher                                                3.71      7.2±0.01ms        ? ?/sec    1.00   1934.3±6.51µs        ? ?/sec
smol-wiki-articles.csv: typo/sisan                                                       6.44   1497.2±1.38µs        ? ?/sec    1.00    232.7±0.94µs        ? ?/sec
smol-wiki-articles.csv: typo/the fronce                                                  6.92      2.9±0.00ms        ? ?/sec    1.00    418.0±1.76µs        ? ?/sec
smol-wiki-articles.csv: words/Abraham machin                                             16.63    10.8±0.01ms        ? ?/sec    1.00    649.7±1.08µs        ? ?/sec
smol-wiki-articles.csv: words/Idaho Bellevue pizza                                       27.15    25.6±0.03ms        ? ?/sec    1.00    944.2±5.07µs        ? ?/sec
smol-wiki-articles.csv: words/Kameya Tokujirō mingus monk                                26.87    40.7±0.05ms        ? ?/sec    1.00   1515.3±2.73µs        ? ?/sec
smol-wiki-articles.csv: words/Ulrich Hensel meilisearch milli                            11.99    48.8±0.10ms        ? ?/sec    1.00      4.1±0.02ms        ? ?/sec
smol-wiki-articles.csv: words/the black saint and the sinner lady and the good doggo     4.90    110.0±0.15ms        ? ?/sec    1.00     22.4±0.03ms        ? ?/sec

```

Co-authored-by: mpostma <postma.marin@protonmail.com>
Co-authored-by: ad hoc <postma.marin@protonmail.com>
2022-03-15 16:43:36 +00:00
3f24555c3d custom fst automatons 2022-03-15 17:38:35 +01:00
628c835a22 fix tests 2022-03-15 17:38:34 +01:00
8efac33b53 Merge #467
467: optimize prefix database r=Kerollmops a=MarinPostma

This pr introduces two optimizations that greatly improve the speed of computing prefix databases.

- The time that it takes to create the prefix FST has been divided by 5 by inverting the way we iterated over the words FST.
- We unconditionally and needlessly checked for documents to remove in  `word_prefix_pair`, which caused an iteration over the whole database.

Co-authored-by: ad hoc <postma.marin@protonmail.com>
2022-03-15 16:14:35 +00:00
d127c57f2d review edits 2022-03-15 17:12:48 +01:00
d633ac5b9d optimize word prefix pair 2022-03-15 16:37:22 +01:00
d68fe2b3c7 optimize word prefix fst 2022-03-15 16:36:48 +01:00
08a06b49f0 Bump version to 0.23.1 2022-03-15 15:50:28 +01:00
d87e8b63a9 Merge #465
465: Update dependencies r=ManyTheFish a=Kerollmops

This PR upgrade and updates this crate's dependencies but first, it removes three dependencies that we don't use anymore. I used [cargo udeps](https://github.com/est31/cargo-udeps) to upgrade them ⬆️

Co-authored-by: Kerollmops <clement@meilisearch.com>
Co-authored-by: Clément Renault <clement@meilisearch.com>
2022-03-15 13:49:17 +00:00
0c5f4ed7de Apply suggestions
Co-authored-by: Many <many@meilisearch.com>
2022-03-15 14:18:29 +01:00
21ec334dcc Fix the compilation error of the dependency versions 2022-03-15 11:17:45 +01:00
63682c2c9a Upgrade the dependencies 2022-03-15 11:17:44 +01:00
288a879411 Remove three useless dependencies 2022-03-15 11:17:44 +01:00
712bf035a7 Merge #464
464: exporting heed to avoid having different versions of Heed in Meilisearch r=curquiza a=psvnlsaikumar

# Pull Request

## What does this PR do?
Fixes the issue in meilisearch https://github.com/meilisearch/meilisearch/issues/2210

## PR checklist
Please check if your PR fulfills the following requirements:
- [x] Does this PR fix an existing issue?
- [x] Have you read the contributing guidelines?
- [x] Have you made sure that the title is accurate and descriptive of the changes?

Thank you so much for contributing to Meilisearch!


Co-authored-by: psvnl sai kumar <psvnlsaikumar@gmail.com>
2022-03-15 09:51:56 +00:00
5e08fac729 fixes for rustfmt pass 2022-03-14 19:22:41 +05:30
92e2e09434 exporting heed to avoid having different versions of Heed in Meilisearch 2022-03-14 01:01:58 +05:30
290a29b5fb Merge #457
457: Avoid iterating on big databases when useless r=Kerollmops a=Kerollmops

This PR makes the prefix database updates to avoid iterating on big grenad files when it is unnecessary. We introduced this regression in #436 but it went unnoticed.

---

According to the following benchmark results, we take more time when we index documents in one run than before #436. It looks like it is probably due to the fact that, now, instead of computing the prefixes database by iterating on the LMDB we directly iterate on the grenad file. Those could be slower to iterate on and could be the slowdown cause.

I just pushed a commit that tests this branch with the new unreleased version of grenad where some work was done to speed up the iteration on grenad files. [The benchmarks for this last commit](https://github.com/meilisearch/milli/actions/runs/1927187408) are currently running. You can [see the diff](https://github.com/meilisearch/grenad/compare/v0.4.1...main) between the v0.4 and the unreleased v0.5 version of grenad.

```diff
  group                                                             indexing_benchmark-multi-batch-indexing-before-speed-up_45f52620    indexing_stop-iterating-on-big-grenad-files_ac8b85c4
  -----                                                             ----------------------------------------------------------------    ----------------------------------------------------
+ indexing/Indexing songs in three batches with default settings    1.12      57.7±2.14s        ? ?/sec                                 1.00      51.3±2.76s        ? ?/sec
- indexing/Indexing wiki                                            1.00    917.3±30.01s        ? ?/sec                                 1.10   1008.4±38.27s        ? ?/sec
+ indexing/Indexing wiki in three batches                           1.10   1091.2±32.73s        ? ?/sec                                 1.00    995.5±24.33s        ? ?/sec
```

Co-authored-by: Kerollmops <clement@meilisearch.com>
2022-03-09 16:46:34 +00:00
1ae13c1374 Avoid iterating on big databases when useless 2022-03-09 15:43:54 +01:00
a8d28e364d Merge #461
461: Add a new error message when the `valid_fields` is empty r=curquiza a=brunoocasali

I've created a test case to handle the new error formatting behavior, but I'm not sure if:

- this is the right place to add the test?
- this is the best way to test this behavior?

And I'm not sure also regarding the `match` implementation, is this something required? Or maybe just an `if` statement is ok as well?

I left the two messages literally without "reusing the prefix" in the implementation because I think this could help the "searchability" of the error in the future.

# Pull Request

## What does this PR do?
Fixes https://github.com/meilisearch/meilisearch/issues/2140

## PR checklist
Please check if your PR fulfills the following requirements:
- [x] Does this PR fix an existing issue?
- [ ] Have you read the contributing guidelines?
- [x] Have you made sure that the title is accurate and descriptive of the changes?

Thank you so much for contributing to Meilisearch!


Co-authored-by: Bruno Casali <brunoocasali@gmail.com>
2022-03-08 09:55:58 +00:00
2ef5751795 Merge #463
463: Allow setting the primary-key in the cli r=irevoire a=irevoire



Co-authored-by: Tamo <tamo@meilisearch.com>
2022-03-07 14:11:40 +00:00
8bb45956d4 allow to set the primary key in the cli 2022-03-07 14:56:49 +01:00
3cbadf92b6 Merge #462
462: cli improvements r=Kerollmops a=MarinPostma

a few improvements:
- use bufreader to load documents, so the loading of the document doesn't appear on flamegraphs
- set default db path to current directory so the `-i` flag can be omitted.



Co-authored-by: ad hoc <postma.marin@protonmail.com>
2022-03-07 09:39:01 +00:00
db3a1905de default db path 2022-03-07 10:30:47 +01:00
6cf82ba993 bufread documents 2022-03-07 10:29:52 +01:00
66c6d5e1ef Add a new error message when the valid_fields is empty
> "Attribute `{}` is not sortable. This index doesn't have configured sortable attributes."
> "Attribute `{}` is not sortable. Available sortable attributes are: `{}`."

coexist in the error handling
2022-03-05 10:38:18 -03:00
df518d8b0b Merge #459
459: Update heed link in cargo toml r=Kerollmops a=curquiza

Since grenad and heed have been moved to the meilisearch orga, this PR changes the link.
This is a minor change since GitHub handles automatically the redirection. This PR is only for consisitency.

Co-authored-by: Clémentine Urquizar <clementine@meilisearch.com>
2022-03-01 19:47:14 +00:00
d9ed9de2b0 Update heed link in cargo toml 2022-03-01 19:45:29 +01:00
51cf44d6fd Merge #456
456: Remove useless grenad merging r=Kerollmops a=Kerollmops

This PR must be merged after #454.

This PR removes the part of code that was merging all of the grenad Readers merging that we don't need as the indexer should have merged them and, therefore, we should only have one final grenad Reader. We reduce the amount of CPU usage and memory pressure we were doing uselessly.

`@ManyTheFish` are you sure I can skip merging the `word_docids` database?

Here is the benchmark comparison with the previously merged PR #454:
```
group                                              indexing_reintroduce-appending-sorted-values_c05e42a8    indexing_remove-useless-grenad-merging_d5b8b5a2
-----                                              -----------------------------------------------------    -----------------------------------------------
indexing/Indexing movies with default settings     1.06      16.6±1.04s        ? ?/sec                      1.00      15.7±0.93s        ? ?/sec
indexing/Indexing songs with default settings      1.16      60.1±7.07s        ? ?/sec                      1.00      51.7±5.98s        ? ?/sec
indexing/Indexing songs without faceted numbers    1.06      55.4±6.14s        ? ?/sec                      1.00      52.2±4.13s        ? ?/sec
```

And the comparison with multi-batch indexing before #436, we can see that we gain time for benchmarks that index datasets in multiple batches but there is _so much_ variance that it's not clear.

```
group                                                             indexing_benchmark-multi-batch-indexing-before-speed-up_45f52620    indexing_remove-useless-grenad-merging_d5b8b5a2
-----                                                             ----------------------------------------------------------------    -----------------------------------------------
indexing/Indexing geo_point                                       1.07       6.6±0.08s        ? ?/sec                                 1.00       6.2±0.11s        ? ?/sec
indexing/Indexing songs in three batches with default settings    1.12      57.7±2.14s        ? ?/sec                                 1.00      51.5±3.80s        ? ?/sec
indexing/Indexing songs with default settings                     1.00      47.5±2.52s        ? ?/sec                                 1.09      51.7±5.98s        ? ?/sec
indexing/Indexing songs without any facets                        1.00      43.5±1.43s        ? ?/sec                                 1.12      48.8±3.73s        ? ?/sec
indexing/Indexing songs without faceted numbers                   1.00      47.1±2.23s        ? ?/sec                                 1.11      52.2±4.13s        ? ?/sec
indexing/Indexing wiki                                            1.00    917.3±30.01s        ? ?/sec                                 1.09    998.7±38.92s        ? ?/sec
indexing/Indexing wiki in three batches                           1.09   1091.2±32.73s        ? ?/sec                                 1.00    996.5±15.70s        ? ?/sec
```

What do you think `@irevoire?` Should we change the benchmarks to make them do more runs?

Co-authored-by: Kerollmops <clement@meilisearch.com>
2022-03-01 16:48:08 +00:00
d5b8b5a2f8 Replace the ugly unwraps by clean if let Somes 2022-02-28 16:31:33 +01:00
8d26f3040c Remove a useless grenad file merging 2022-02-28 16:31:33 +01:00
21898ffc60 Merge #454
454: Reintroduce appending sorted entries when possible r=Kerollmops a=Kerollmops

This PR modifies the `sorter_into_lmdb_database` function to append values into the database instead of get-put-merging them, it should improve the indexation speed for when the database is empty.

```txt
group                                             indexing_main_25123af3                 indexing_reintroduce-appending-sorted-values_c05e42a8
-----                                             ----------------------                 -----------------------------------------------------
indexing/Indexing movies with default settings    1.07      17.8±0.99s        ? ?/sec    1.00      16.6±1.04s        ? ?/sec
indexing/Indexing songs with default settings     1.00      57.0±6.01s        ? ?/sec    1.05      60.1±7.07s        ? ?/sec
indexing/Indexing songs without any facets        1.10      51.8±5.36s        ? ?/sec    1.00      47.3±3.30s        ? ?/sec
```

Co-authored-by: Clément Renault <clement@meilisearch.com>
2022-02-28 14:55:37 +00:00
04b1bbf932 Reintroduce appending sorted entries when possible 2022-02-24 14:50:45 +01:00
382be56d36 Merge #453
453: Benchmark multi batch indexing r=Kerollmops a=Kerollmops

Hey `@irevoire,` could you please add the new benchmarks into influx?

Co-authored-by: Kerollmops <clement@meilisearch.com>
Co-authored-by: Clément Renault <clement@meilisearch.com>
2022-02-24 12:33:13 +00:00
acfc96525c Apply GitHub suggestions 2022-02-23 16:20:29 +01:00
a820aa11e6 Add a new movies benchmark to test multi batch indexing 2022-02-23 16:20:29 +01:00
8d2e3e4aba Add a new wiki benchmark to test multi batch indexing 2022-02-23 16:20:29 +01:00
ab5247dc64 Add a new songs benchmark to test multi batch indexing 2022-02-23 16:20:28 +01:00
acd9535588 Merge #455
455: Raise the GitHub CI timeout limit to 72h r=irevoire a=Kerollmops



Co-authored-by: Kerollmops <clement@meilisearch.com>
2022-02-23 14:33:31 +00:00
19bfb2649b Raise the GitHub CI timeout limit to 72h 2022-02-23 15:27:51 +01:00
25123af3b8 Merge #436
436: Speed up the word prefix databases computation time r=Kerollmops a=Kerollmops

This PR depends on the fixes done in #431 and must be merged after it.

In this PR we will bring the `WordPrefixPairProximityDocids`, `WordPrefixDocids` and, `WordPrefixPositionDocids` update structures to a new era, a better era, where computing the word prefix pair proximities costs much fewer CPU cycles, an era where this update structure can use the, previously computed, set of new word docids from the newly indexed batch of documents.

---

The `WordPrefixPairProximityDocids` is an update structure, which means that it is an object that we feed with some parameters and which modifies the LMDB database of an index when asked for. This structure specifically computes the list of word prefix pair proximities, which correspond to a list of pairs of words associated with a proximity (the distance between both words) where the second word is not a word but a prefix e.g. `s`, `se`, `a`. This word prefix pair proximity is associated with the list of documents ids which contains the pair of words and prefix at the given proximity.

The origin of the performances issue that this struct brings is related to the fact that it starts its job from the beginning, it clears the LMDB database before rewriting everything from scratch, using the other LMDB databases to achieve that. I hope you understand that this is absolutely not an optimized way of doing things.

Co-authored-by: Clément Renault <clement@meilisearch.com>
Co-authored-by: Kerollmops <clement@meilisearch.com>
2022-02-16 15:41:14 +00:00
ff8d7a810d Change the behavior of the as_cloneable_grenad by taking a ref 2022-02-16 15:40:08 +01:00
f367cc2e75 Finally bump grenad to v0.4.1 2022-02-16 15:28:48 +01:00
f2984f66e6 Merge #452
452: bump milli r=curquiza a=irevoire



Co-authored-by: Irevoire <tamo@meilisearch.com>
2022-02-16 13:49:14 +00:00
0defeb268c bump milli 2022-02-16 13:27:41 +01:00
030064da25 Merge #451
451: Update LICENSE with Meili SAS name r=Kerollmops a=curquiza

Check with thomas, we must put the real name of the company

Co-authored-by: Clémentine Urquizar - curqui <clementine@meilisearch.com>
2022-02-15 16:18:47 +00:00
84035a27f5 Update LICENSE 2022-02-15 15:52:50 +01:00
0885fcf973 Merge #450
450: Get rid of chrono in favor of time r=Kerollmops a=irevoire

We only use `chrono` as a wrapper around `time`, and since there has been an [open CVE on `chrono` for at least 3 months now](https://github.com/chronotope/chrono/pull/632) and the repo seems to be [struggling with maintenance](https://github.com/chronotope/chrono/pull/639), I think we should use `time` directly which is way more active and sufficient for our use case.

EDIT: Actually the CVE status has been known for more than 6 months: https://github.com/chronotope/chrono/issues/602

Co-authored-by: Irevoire <tamo@meilisearch.com>
2022-02-15 10:54:46 +00:00
48542ac8fd get rid of chrono in favor of time 2022-02-15 11:41:55 +01:00
ea15ad6c34 Merge #447
447: Update version for the next release (v0.22.1) r=curquiza a=curquiza



Co-authored-by: Clémentine Urquizar <clementine@meilisearch.com>
2022-02-07 17:44:09 +00:00
d03b3ceb58 Update version for the next release (v0.22.1) 2022-02-07 18:39:29 +01:00
5d58cb7449 Merge #442
442: fix phrase search r=curquiza a=MarinPostma

Run the exact match search on 7 words windows instead of only two. This makes false positive very very unlikely, and impossible on phrase query that are less than seven words.


Co-authored-by: ad hoc <postma.marin@protonmail.com>
2022-02-07 16:18:20 +00:00
c5a996aa78 Merge #446
446: Update LICENSE r=Kerollmops a=curquiza



Co-authored-by: Clémentine Urquizar - curqui <clementine@meilisearch.com>
2022-02-07 09:47:39 +00:00
1279c38ac9 Update LICENSE 2022-02-05 18:29:11 +01:00
267d14c28d Merge #445
445: allow null values in csv r=Kerollmops a=MarinPostma

This pr allows null values in csv:
- if the field is of type string, then an empty field is considered null (`,,`), anything other is turned into a string (i.e `, ,` is a single whitespace string)
- if the field is of type number, when the trimmed field is empty, we consider the value null (i.e `,,`, `, ,` are both null numbers) otherwise we try to parse the number.


Co-authored-by: ad hoc <postma.marin@protonmail.com>
2022-02-03 15:11:32 +00:00
bd2262ceea allow null values in csv 2022-02-03 16:03:01 +01:00
13de251047 rewrite word pair distance gathering 2022-02-03 15:57:20 +01:00
fda4f229bb Merge #417
417: Change chunk size to 4MiB to fit more the end user usage r=Kerollmops a=ManyTheFish

Reverts meilisearch/milli#379

We made several indexing tests using different sizes of datasets (5 datasets from 9MiB to 100MiB) on several typologies of VMs (`XS: 1GiB RAM, 1 VCPU`, `S: 2GiB RAM, 2 VCPU`, `M: 4GiB RAM, 3 VCPU`, `L: 8GiB RAM, 4 VCPU`).
The result of these tests shows that the `4MiB` chunk size seems to be the best size compared to other chunk sizes (`2Mib`, `4MiB`, `8Mib`, `16Mib`,  `32Mib`, `64Mib`, `128Mib`).

below is the average time per chunk size:

![Capture d’écran 2021-09-27 à 14 27 50](https://user-images.githubusercontent.com/6482087/134909368-ef0bc45e-68d5-49d1-aaf9-91113b7c410f.png)

<details>
<summary>Detailled data</summary>
<br>

![Capture d’écran 2021-09-27 à 14 39 48](https://user-images.githubusercontent.com/6482087/134909952-a36b1457-bbbd-4a6c-bbe5-519e4b926b5a.png)
</br>
</details> 


Co-authored-by: Many <many@meilisearch.com>
2022-02-02 18:30:59 +00:00
2468ebb76b Merge #444
444: Fix the parsing of ndjson requests to index more than the first line r=Kerollmops a=Kerollmops

This PR correctly uses the `BufRead` trait to read every line of the content instead of just the first one. This bug was only affecting the http-ui test crate.

Co-authored-by: Kerollmops <clement@meilisearch.com>
2022-02-02 17:59:44 +00:00
9142ba9dd4 Fix the parsing of ndjson requests to index more than the first line 2022-02-02 17:55:13 +01:00
d59bcea749 Revert "Revert "Change chunk size to 4MiB to fit more the end user usage"" 2022-02-02 17:01:13 +01:00
7541ab99cd review changes 2022-02-02 12:59:01 +01:00
d0aabde502 optimize 2 typos case 2022-02-02 12:56:09 +01:00
55e6cb9c7b typos on first letter counts as 2 2022-02-02 12:56:09 +01:00
642c01d0dc set max typos on ngram to 1 2022-02-02 12:56:08 +01:00
d852dc0d2b fix phrase search 2022-02-01 20:21:33 +01:00
fb79c32430 Compute the new, common and, deleted prefix words fst once 2022-01-27 11:00:18 +01:00
51d1e64b23 Remove, now useless, the WriteMethod enum 2022-01-27 10:08:35 +01:00
e9c02173cf Rework the WordsPrefixPositionDocids update to compute a subset of the database 2022-01-27 10:08:35 +01:00
dbba5fd461 Create a function to simplify the word prefix pair proximity docids compute 2022-01-27 10:08:35 +01:00
e760e02737 Fix the computation of the newly added and common prefix pair proximity words 2022-01-27 10:08:35 +01:00
d59e559317 Fix the computation of the newly added and common prefix words 2022-01-27 10:08:34 +01:00
2ec8542105 Rework the WordPrefixDocids update to compute a subset of the database 2022-01-27 10:08:34 +01:00
28692f65be Rework the WordPrefixDocids update to compute a subset of the database 2022-01-27 10:08:34 +01:00
5404bc02dd Move the fst_stream_into_hashset method in the helper methods 2022-01-27 10:06:00 +01:00
c90fa95f93 Only compute the word prefix pairs on the created word pair proximities 2022-01-27 10:06:00 +01:00
822f67e9ad Bring the newly created word pair proximity docids 2022-01-27 10:06:00 +01:00
d28f18658e Retrieve the previous version of the words prefixes FST 2022-01-27 10:05:59 +01:00
38d23546a5 Merge #431
431: Fix and improve word prefix pair proximity r=ManyTheFish a=Kerollmops

This PR first fixes the algorithm we used to select and compute the word prefix pair proximity database. The previous version was skipping nearly all of the prefixes. The issue is that this fix made this method to take more time and we were trying to reduce the time spent in it.

With `@ManyTheFish` we found out that we could skip some of the work we were doing by:
 - discarding the prefixes that were shorter than a specific threshold (default: 2).
 - discarding the word prefix pairs with proximity bigger than a specific threshold (default: 4).
 - remove the unused threshold that was specifying a minimum amount of word docids to merge.

We will take more time to do some more optimization, like stop clearing and recomputing from scratch the database, we will compute the subsets of keys to create, keep and merge. This change is a little bit more complex than what this PR does.

I keep this PR as a draft as I want to further test the real gain if it is enough or not if it is valid or not. I advise reviewers to review commit by commit to see the changes bit by bit, reviewing the whole PR can be hard.

Co-authored-by: Clément Renault <clement@meilisearch.com>
2022-01-27 07:04:56 +00:00
c63f945093 Merge #441
441: Changes related to the rebranding r=curquiza a=meili-bot

_This PR is auto-generated._

 - [X] Change the name `MeiliSearch` to `Meilisearch` in README.
 - [x] ⚠️ Ensure the bot did not update part you don’t want it to update, especially in the code examples in the Getting started.
 - [x] Please, ensure there is no other "MeiliSearch". For example, in the comments or in the tests name.
 - [x] Put the new logo on the README if needed -> still using the milli logo so far


Co-authored-by: meili-bot <74670311+meili-bot@users.noreply.github.com>
Co-authored-by: Clémentine Urquizar <clementine@meilisearch.com>
2022-01-26 17:07:37 +00:00
0f213f2202 Replace MeiliSearch by Meilisearch 2022-01-26 17:49:55 +01:00
de808a391a Replace meilisearch by Meilisearch 2022-01-26 17:48:22 +01:00
0d282e3cc5 Update README.md 2022-01-26 16:33:16 +01:00
d342c3c357 Merge #438
438: CLI improvements r=Kerollmops a=MarinPostma

I've made the following changes to the cli:
- `settings-update` become `settings`, with two subcommands: `update` and `show`.
- `document-addition` becomes `documents` with a subcommands: `add` (I'll add a feature to list documents later)
- `search` now has an interactive mode `-i`
- search return the number of documents and the time it took to perform the search.


Co-authored-by: mpostma <postma.marin@protonmail.com>
2022-01-26 15:18:20 +00:00
f9b214f34e Apply suggestions from code review
Co-authored-by: Many <legendre.maxime.isn@gmail.com>
2022-01-26 11:28:11 +01:00
e1cc025cbd Merge #440
440: fix(fuzzer): fix the fuzzer after #430 r=Kerollmops a=irevoire



Co-authored-by: Tamo <tamo@meilisearch.com>
2022-01-25 16:33:57 +00:00
f04cd19886 Introduce a max prefix length parameter to the word prefix pair proximity update 2022-01-25 17:04:23 +01:00
1514dfa1b7 Introduce a max proximity parameter to the word prefix pair proximity update 2022-01-25 17:04:23 +01:00
23ea3ad738 Remove the useless threshold when computing the word prefix pair proximity 2022-01-25 17:04:23 +01:00
e3c34684c6 Fix a bug where we were skipping most of the prefix pairs 2022-01-25 17:04:23 +01:00
b5f01b52c7 cli improvements 2022-01-25 14:08:30 +01:00
fb51d511be fix(fuzzer): fix the fuzzer after #430 2022-01-25 12:08:47 +01:00
9f2ff71581 Merge #434
434: bump milli to v0.22.0 r=curquiza a=irevoire

This is breaking because of this PR:
98a365aaae

Should we do a special branch to only release the [patch](https://github.com/meilisearch/milli/pull/433) for https://github.com/meilisearch/MeiliSearch/issues/2082 (which is non-breaking)?

Co-authored-by: Tamo <tamo@meilisearch.com>
2022-01-24 17:31:20 +00:00
fd177b63f8 Merge #423
423: Remove an unused file r=irevoire a=irevoire

This empty file is not included anywhere

Co-authored-by: Tamo <tamo@meilisearch.com>
2022-01-19 14:18:05 +00:00
8433516d85 Merge #430
430: Document batch support r=Kerollmops a=MarinPostma

This pr adds support for document batches in milli. It changes the API of the `IndexDocuments` builder by adding a `add_documents` method. The API of the updates is changed a little, with the `UpdateBuilder` being renamed to `IndexerConfig` and being passed to the update builders. This makes it easier to pass around structs that need to access the indexer config, rather that extracting the fields each time. This change impacts many function signatures and simplify them.

The change in not thorough, and may require another PR to propagate to the whole codebase. I restricted to the necessary for this PR.


Co-authored-by: Marin Postma <postma.marin@protonmail.com>
2022-01-19 13:32:59 +00:00
0c84a40298 document batch support
reusable transform

rework update api

add indexer config

fix tests

review changes

Co-authored-by: Clément Renault <clement@meilisearch.com>

fmt
2022-01-19 12:40:20 +01:00
74962b2fd9 Merge #435
435: Ensure we get no documents and no error when filtering on an empty db r=Kerollmops a=irevoire



Co-authored-by: Tamo <tamo@meilisearch.com>
2022-01-18 10:46:26 +00:00
01968d7ca7 ensure we get no documents and no error when filtering on an empty db 2022-01-18 11:40:30 +01:00
367f403693 bump milli 2022-01-17 16:41:34 +01:00
8f4499090b Merge #433
433: fix(filter): Fix two bugs. r=Kerollmops a=irevoire

- Stop lowercasing the field when looking in the field id map
- When a field id does not exist it means there is currently zero
  documents containing this field thus we return an empty RoaringBitmap
  instead of throwing an internal error

Will fix https://github.com/meilisearch/MeiliSearch/issues/2082 once meilisearch is released

Co-authored-by: Tamo <tamo@meilisearch.com>
2022-01-17 14:06:53 +00:00
4c516c00da Merge #426
426: Fix search highlight for non-unicode chars r=ManyTheFish a=Samyak2

# Pull Request

## What does this PR do?
Fixes https://github.com/meilisearch/MeiliSearch/issues/1480
<!-- Please link the issue you're trying to fix with this PR, if none then please create an issue first. -->

## PR checklist
Please check if your PR fulfills the following requirements:
- [x] Does this PR fix an existing issue?
- [x] Have you read the contributing guidelines?
- [x] Have you made sure that the title is accurate and descriptive of the changes?

## Changes

The `matching_bytes` function takes a `&Token` now and:
- gets the number of bytes to highlight (unchanged).
- uses `Token.num_graphemes_from_bytes` to get the number of grapheme clusters to highlight.

In essence, the `matching_bytes` function now returns the number of matching grapheme clusters instead of bytes.

Added proper highlighting in the HTTP UI:
- requires dependency on `unicode-segmentation` to extract grapheme clusters from tokens
- `<mark>` tag is put around only the matched part
    - before this change, the entire word was highlighted even if only a part of it matched

## Questions

Since `matching_bytes` does not return number of bytes but grapheme clusters, should it be renamed to something like `matching_chars` or `matching_graphemes`? Will this break the API?

Thank you very much `@ManyTheFish` for helping 😄 

Co-authored-by: Samyak S Sarnayak <samyak201@gmail.com>
2022-01-17 13:39:00 +00:00
d1ac40ea14 fix(filter): Fix two bugs.
- Stop lowercasing the field when looking in the field id map
- When a field id does not exist it means there is currently zero
  documents containing this field thus we returns an empty RoaringBitmap
  instead of throwing an internal error
2022-01-17 13:51:46 +01:00
15bbde1022 Merge #432
432: Fuzzer r=Kerollmops a=irevoire

Provide a first way of fuzzing the indexing part of milli.
It depends on [cargo-fuzz](https://rust-fuzz.github.io/book/cargo-fuzz.html)

Co-authored-by: Tamo <tamo@meilisearch.com>
2022-01-17 12:50:26 +00:00
c0313f3026 Use chars for highlight instead of graphemes
Tokenizer v0.2.7 uses chars instead of graphemes for matching bytes.
`unicode-segmentation` dependency isn't needed anymore.

Also, oxidised the highlight code :)

Co-authored-by: many <maxime@meilisearch.com>
2022-01-17 13:15:31 +05:30
2d7607734e Run cargo fmt on matching_words.rs 2022-01-17 13:04:33 +05:30
5ab505be33 Fix highlight by replacing num_graphemes_from_bytes
num_graphemes_from_bytes has been renamed in the tokenizer to
num_chars_from_bytes.

Highlight now works correctly!
2022-01-17 13:02:55 +05:30
c10f58b7bd Update tokenizer to v0.2.7 2022-01-17 13:02:00 +05:30
e752bd06f7 Fix matching_words tests to compile successfully
The tests still fail due to a bug in https://github.com/meilisearch/tokenizer/pull/59
2022-01-17 11:37:45 +05:30
30247d70cd Fix search highlight for non-unicode chars
The `matching_bytes` function takes a `&Token` now and:
- gets the number of bytes to highlight (unchanged).
- uses `Token.num_graphemes_from_bytes` to get the number of grapheme
  clusters to highlight.

In essence, the `matching_bytes` function returns the number of matching
grapheme clusters instead of bytes. Should this function be renamed
then?

Added proper highlighting in the HTTP UI:
- requires dependency on `unicode-segmentation` to extract grapheme
  clusters from tokens
- `<mark>` tag is put around only the matched part
    - before this change, the entire word was highlighted even if only a
      part of it matched
2022-01-17 11:37:44 +05:30
0605c0ac68 apply review comments 2022-01-13 18:51:08 +01:00
b22c80106f add some settings to the fuzzed milli and use the published version of arbitrary json 2022-01-13 15:35:24 +01:00
c94952e25d update the readme + dependencies 2022-01-12 18:30:11 +01:00
e1053989c0 add a fuzzer on milli 2022-01-12 17:57:54 +01:00
559e019de1 Merge #424
424: Store the geopoint in three dimensions r=Kerollmops a=irevoire

Related to this issue: https://github.com/meilisearch/MeiliSearch/issues/1872

Fix the whole computation of distance for any “geo” operations (sort or filter). Now when you sort points they are returned to you in the right order.
And when you filter on a specific radius you only get points included in the radius.

This PR changes the way we store the geo points in the RTree.
Instead of considering the latitude and longitude as orthogonal coordinates, we convert them to real orthogonal coordinates projected on a sphere with a radius of 1.
This is the conversion formulae.
![image](https://user-images.githubusercontent.com/7032172/145990456-eefe840a-384f-4486-848b-81d0036814ec.png)
Which, in rust, translate to this function:
```rust
pub fn lat_lng_to_xyz(coord: &[f64; 2]) -> [f64; 3] {
    let [lat, lng] = coord.map(|f| f.to_radians());
    let x = lat.cos() * lng.cos();
    let y = lat.cos() * lng.sin();
    let z = lat.sin();

    [x, y, z]
}
```

Storing the points on a sphere is easier / faster to compute than storing the point on an approximation of the real earth shape.
But when we need to compute the distance between two points we still need to use the haversine distance which works with latitude and longitude.
So, to do the fewest search-time computation possible I'm now associating every point with its `DocId` and its lat/lng.

Co-authored-by: Tamo <tamo@meilisearch.com>
2022-01-10 15:23:43 +00:00
660eac50b2 Merge #427
427: Handle escaped characters in filters r=Kerollmops a=irevoire



Co-authored-by: Tamo <tamo@meilisearch.com>
2022-01-10 15:01:23 +00:00
92804f6f45 apply clippy suggestions 2022-01-10 15:59:04 +01:00
0fcde35a20 Update filter-parser/src/value.rs
Co-authored-by: Clément Renault <clement@meilisearch.com>
2022-01-10 15:53:44 +01:00
3c7ea1d298 Apply code suggestions
Co-authored-by: Clément Renault <clement@meilisearch.com>
2022-01-10 15:19:21 +01:00
74594be234 Merge #429
429: Benchmark CIs: not use a default label to call the GH runner r=irevoire a=curquiza

Since we now have multiple self-hosted github runners, we need to differentiate them calling them in the CI. The `self-hosted` label is the default one, so we need to use the unique and appropriate one for the benchmark machine

<img width="925" alt="Capture d’écran 2022-01-04 à 15 42 18" src="https://user-images.githubusercontent.com/20380692/148079840-49cd7878-5912-46ff-8ab8-bf646777f782.png">


Co-authored-by: Clémentine Urquizar <clementine@meilisearch.com>
2022-01-04 15:41:08 +00:00
3d99686f7a Change self-hosted label by benchmarks 2022-01-04 16:01:01 +01:00
c039562723 Merge #428
428: Reintroduce the gitignore for the fuzzer r=Kerollmops a=irevoire

Reintroduce the gitignore in the fuzz directory

Co-authored-by: Tamo <tamo@meilisearch.com>
2022-01-04 12:09:06 +00:00
9bdcd42b9b reintroduce the gitignore for the fuzzer 2022-01-04 13:07:32 +01:00
4cae691b86 Merge #425
425: Push the result of the benchmarks to influxdb r=irevoire a=irevoire

Now execute a benchmark for every PR merged into main and then upload the results to influxdb.

Co-authored-by: Tamo <tamo@meilisearch.com>
2022-01-04 11:04:16 +00:00
6a1216bd51 Integrate telegraf into our CI 2022-01-04 11:59:05 +01:00
02a21fd309 Handle the escapes of quote in the filters 2022-01-04 04:04:10 +01:00
98a365aaae store the geopoint in three dimensions 2021-12-14 12:21:24 +01:00
d671d6f0f1 remove an unused file 2021-12-13 19:27:34 +01:00
11a056d116 Merge #422
422: Prefer returning `None` instead of using an `FilterCondition::Empty` state r=Kerollmops a=Kerollmops

This PR is related to the issue comment https://github.com/meilisearch/MeiliSearch/issues/1338#issuecomment-989322889 which exhibits the fact that when a filter is known to be empty no results are returned which is wrong, the filter should not apply as no restriction is done on the documents set.

The filter system on the milli side has introduced an Empty state which was used in this kind of situation but I found out that it is not needed and that when we parse a filter and that it is empty we can simply return `None` as the `Filter::from_array` constructor does. So I removed it and added tests!

On the MeiliSearch side, we just need to match on a `None` and completely ignore the filter in such a case.

Co-authored-by: Clément Renault <clement@meilisearch.com>
2021-12-09 15:03:04 +00:00
94011bb9a8 Fix the benchmarks to work with optional filters 2021-12-09 12:14:16 +01:00
1c6c89f345 Fix the binaries that use the new optional filters 2021-12-09 11:57:53 +01:00
25faef67d0 Remove the database setup in the filter_depth test 2021-12-09 11:57:53 +01:00
65519bc04b Test that empty filters return a None 2021-12-09 11:57:53 +01:00
ef59762d8e Prefer returning None instead of the Empty Filter state 2021-12-09 11:57:52 +01:00
80dcfd5c3e Merge #421
421: Introduce the depth method on FilterCondition r=Kerollmops a=Kerollmops

This PR introduces the depth method on the FilterCondition type to be able to react to it. It is meant to be used to reject filters that go too deep and can make the engine stack overflow.

Co-authored-by: Clément Renault <clement@meilisearch.com>
2021-12-09 10:28:52 +00:00
ee856a7a46 Limit the max filter depth to 2000 2021-12-07 17:36:45 +01:00
32bd9f091f Detect the filters that are too deep and return an error 2021-12-07 17:20:11 +01:00
90f49eab6d Check the filter max depth limit and reject the invalid ones 2021-12-07 16:32:48 +01:00
49c2db9485 Change the depth function to return the token depth 2021-12-07 16:06:10 +01:00
57502fcf6a Introduce the depth method on FilterCondition 2021-12-06 17:35:20 +01:00
c83b77304a Merge #420
420: Update milli 0.21.0 r=ManyTheFish a=ManyTheFish

Update all modules to 0.21.0

Co-authored-by: many <maxime@meilisearch.com>
2021-11-30 17:22:12 +00:00
1b3923b5ce Update all packages to 0.21.0 2021-11-29 12:17:59 +01:00
26629a3f9e Merge #419
419:  fix word pair proximity indexing r=ManyTheFish a=ManyTheFish

# Pull Request

Sort positions before iterating over them during word pair proximity extraction.

fixes [Meilisearch#1913](https://github.com/meilisearch/MeiliSearch/issues/1913)

Co-authored-by: many <maxime@meilisearch.com>
2021-11-23 10:21:05 +00:00
8970246bc4 Sort positions before iterating over them during word pair proximity extraction 2021-11-22 18:16:54 +01:00
cc32519a2d Merge #418
418: change visibility of DocumentDeletionResult r=Kerollmops a=MarinPostma

Change the visibility of `DocumentDeletionResult`, so its fields can be accesses from outside milli.


Co-authored-by: Marin Postma <postma.marin@protonmail.com>
2021-11-22 14:45:55 +00:00
6e977dd8e8 change visibility of DocumentDeletionResult 2021-11-22 15:44:44 +01:00
68f1db123a Merge #416
416: Update tokenizer v0.2.6 r=Kerollmops a=ManyTheFish



Co-authored-by: many <maxime@meilisearch.com>
2021-11-18 16:01:11 +00:00
35f9499638 Export tokenizer from milli 2021-11-18 16:57:12 +01:00
64ef5869d7 Update tokenizer v0.2.6 2021-11-18 16:56:05 +01:00
2c14efa8a2 Merge #409
409: remove update_id in UpdateBuilder r=ManyTheFish a=MarinPostma

Removing the `update_id` from `UpdateBuidler`, since it serves no purpose. I had introduced it when working in HA some time ago, but I think there are better ways to do it now, so it can be removed an stop being in our way.

Co-authored-by: Marin Postma <postma.marin@protonmail.com>
2021-11-16 14:59:09 +00:00
6eb47ab792 remove update_id in UpdateBuilder 2021-11-16 13:07:04 +01:00
21b78f3926 Merge #414
414: improve update result types r=ManyTheFish a=MarinPostma

Inprove the returned meta when performing document additions and deletions:

- On document addition return the number of indexed documents and the total number of documents in the index after the indexion
- On document deletion return the number of deleted documents, and the remaining number of documents in the index after the deletion is performed


I also fixed a potential bug when performing a document deletion and the primary key couldn't be found: before we assumed that the db was empty and returned that no documents were deleted, but since we checked before that the db wasn't empty, entering this branch is actually a bug, and now returns a 'MissingPrimaryKey' error.


Co-authored-by: Marin Postma <postma.marin@protonmail.com>
2021-11-15 09:06:10 +00:00
09b4281cff improve document addition returned metaimprove document addition
returned metaimprove document addition returned metaimprove document
addition returned metaimprove document addition returned metaimprove
document addition returned metaimprove document addition returned
metaimprove document addition returned meta
2021-11-10 14:08:36 +01:00
721fc294be improve document deletion returned meta
returns both the remaining number of documents and the number of deleted
documents.
2021-11-10 14:08:18 +01:00
8dff08d772 Merge #400
400: Rewrite the filter parser and add a lot of tests r=irevoire a=irevoire

This PR is a complete rewrite of #358, which was reverted in #403.
You can already try this PR in Meilisearch here https://github.com/meilisearch/MeiliSearch/pull/1880.

Since writing a parser is quite complicated, I moved all the logic to another workspace called `filter_parser`.
In this workspace, we don't know anything about milli, the filterable fields / field ID or anything.
As you can see in its `cargo.toml`, it has only three dependencies entirely focused on the parsing part:
```
nom = "7.0.0"
nom_locate = "4.0.0"
```

But introducing this new workspace made some changes necessary on the “AST”. Now the parser only returns `Tokens` (a simple `&str` with a bit of context). Everything is interpreted when we execute the filter later in milli.
This crate provides a new error type for all filter related errors.

---------
## Errors

Currently, we have multiple kinds of errors. Sometimes we are generating errors looking like that: (for `name = truc`)
```
Attribute `name` is not filterable. Available filterable attributes are: ``.
```
While sometimes pest was generating errors looking like that:
```
Invalid syntax for the filter parameter: ` --> 1:7
  |
1 | name =
  |       ^---
  |
  = expected word`.
```

Which most people were seeing like that: (for `name =`)
```
Invalid syntax for the filter parameter: ` --> 1:7\n  |\n1 | name =\n  |       ^---\n  |\n  = expected word`.
```

-----------

With this PR, the error format is unified between all errors.
All errors follow this more straightforward format:
```
The error message.
[from char]:[to char] filter
```

This should be way easier to read when embedded in the JSON for a human. And it should also allow us to parse the errors easily and provide highlighting or something with a frontend playground.

Here is an example of the two previous errors with the new format:
For `name = truc`:
```
Attribute `name` is not filterable. Available filterable attributes are: ``.
1:4 name = truc
```
Or in one line:
```
Attribute `name` is not filterable. Available filterable attributes are: ``.\n1:4 name = truc
```

And for `name =`:
```
Was expecting a value but instead got nothing.
7:7 name =
```
Or in one line:
```
Was expecting a value but instead got nothing.\n7:7 name =
```

Also, since we now have control over the parser, we can generate more explicit error messages so a lot of new errors have been created. I tried to be as helpful as possible for the user; here is a little overview of the new error message you can get when misusing a filter:
```
Expression `"truc` is missing the following closing delimiter: `"`.
8:13 name = "truc
```
The `_geoRadius` filter is an operation and can't be used as a value.
8:30 name = _geoRadius(12, 13, 14)
```
etc

## Tests
A lot of tests have been written in the `filter_parser` crate. I think there is a unit test for every part of the syntax. 
But since we can never be sure we covered all the cases, I also fuzzed the new parser A LOT (for ±8 hours on 20 threads). And the code to fuzz the parser is included in the workspace, so if one day we need to change something to the syntax, we'll be able to re-use it by simply running:
```
cargo fuzz run --release parse
```

## Milli
I renamed the type and module `filter_condition.rs` / `FilterCondition` to `filter.rs` / `Filter`.

Co-authored-by: Tamo <tamo@meilisearch.com>
2021-11-09 16:09:34 +00:00
7c3017734a re-ignore the ! symbol when generating a good error message 2021-11-09 17:08:04 +01:00
bff48681d2 Re-order the operator
Co-authored-by: Clément Renault <clement@meilisearch.com>
2021-11-09 17:05:36 +01:00
519d6b2bf3 remove the ! syntax for the not 2021-11-09 16:47:54 +01:00
73df873f44 fix typos 2021-11-09 16:41:10 +01:00
99197387af fix the test with the new escaped format 2021-11-09 16:41:10 +01:00
f28600031d Rename the filter_parser crate into filter-parser
Co-authored-by: Clément Renault <clement@meilisearch.com>
2021-11-09 16:41:10 +01:00
0ea0146e04 implement deref &str on the tokens 2021-11-09 11:34:10 +01:00
a211a9cdcd update the error format so it can be easily parsed by someone else 2021-11-09 11:19:30 +01:00
9b24f83456 in case of error return a range of chars position instead of one line and column 2021-11-09 10:27:29 +01:00
2c6d08c519 Simplify the tokens to only wrap one span and no inner value
Co-authored-by: marin <postma.marin@protonmail.com>
2021-11-09 10:12:20 +01:00
18eb4b9c51 fix spaces in the bnf 2021-11-09 01:04:50 +01:00
cf98bf37d0 Simplify some closure
Co-authored-by: marin <postma.marin@protonmail.com>
2021-11-09 01:03:02 +01:00
bc9daf9041 update the bnf
Co-authored-by: marin <postma.marin@protonmail.com>
2021-11-09 01:00:42 +01:00
9c36e497d9 Rename the key_component into a value_component
Co-authored-by: marin <postma.marin@protonmail.com>
2021-11-09 00:59:44 +01:00
6515838d35 improve the readability of the _geoPoint thingy in the value 2021-11-09 00:57:46 +01:00
ea52aff6dc Rename the ExtendNomError trait to NomErrorExt
Co-authored-by: marin <postma.marin@protonmail.com>
2021-11-09 00:52:17 +01:00
ef0d5a8240 flatten a match 2021-11-09 00:49:13 +01:00
15bd14297e Remove useless closure
Co-authored-by: marin <postma.marin@protonmail.com>
2021-11-09 00:45:46 +01:00
21d115dcbb remove greedy-error 2021-11-08 17:53:41 +01:00
959ca66125 improve the error diagnostic when parsing values 2021-11-08 15:58:21 +01:00
7483c7513a fix the filterable fields 2021-11-07 01:52:19 +01:00
e5af3ac65c rename the filter_condition.rs to filter.rs 2021-11-06 16:37:55 +01:00
6831c23449 merge with main 2021-11-06 16:34:30 +01:00
5c01e9bf7c fix the benchmarks 2021-11-06 16:03:49 +01:00
075d9c97c0 re-implement the equality between tokens to only compare the inner value 2021-11-06 16:02:27 +01:00
b249989bef fix most of the tests 2021-11-06 01:32:12 +01:00
070ec9bd97 small update on the README 2021-11-05 17:45:20 +01:00
27a6a26b4b makes the parse function part of the filter_parser 2021-11-05 10:46:54 +01:00
76d961cc77 implements the last errors 2021-11-04 17:42:06 +01:00
8234f9fdf3 recreate most filter error except for the geosearch 2021-11-04 17:24:55 +01:00
7328ffb034 stop panicking in case of internal error 2021-11-04 16:20:53 +01:00
3e5550c910 clean the errors 2021-11-04 16:12:17 +01:00
72a9071203 fix typo 2021-11-04 16:03:52 +01:00
07a5ffb04c update http-ui 2021-11-04 15:52:22 +01:00
a58bc5bebb update milli with the new parser_filter 2021-11-04 15:02:36 +01:00
b1a0110a47 update the main 2021-11-04 14:48:39 +01:00
d0fe9dea61 update the readme 2021-11-04 14:43:36 +01:00
b165c77fa7 add a smol README 2021-11-04 14:39:02 +01:00
54aec7ac5f update the filter parser and some code for the fuzzer 2021-11-04 14:22:35 +01:00
a2fc74f010 Merge #412
412: Change Attribute and Ranking rules errors r=ManyTheFish a=ManyTheFish

# Pull Request

Fixes Meilisearch [PR comment](https://github.com/meilisearch/MeiliSearch/pull/1873#issuecomment-959786406)


Co-authored-by: many <maxime@meilisearch.com>
2021-11-04 13:08:50 +00:00
743ed9f57f Bump milli version 2021-11-04 14:04:21 +01:00
7b3bac46a0 Change Attribute and Ranking rules errors 2021-11-04 13:19:32 +01:00
3be37b00e7 Merge #410
410: Update version for the next release (v0.20.1) r=Kerollmops a=ManyTheFish



Co-authored-by: many <maxime@meilisearch.com>
2021-11-03 13:32:03 +00:00
702589104d Update version for the next release (v0.20.1) 2021-11-03 14:20:01 +01:00
cb9e7e510b Merge #408
408: Change last error messages r=ManyTheFish a=ManyTheFish

Change forgotten error messages

Co-authored-by: many <maxime@meilisearch.com>
2021-11-03 10:51:33 +00:00
0c0038488c Change last error messages 2021-11-03 11:24:06 +01:00
5d3af5f273 remove all genericity in favor of my custom error type 2021-11-02 20:27:07 +01:00
76a2adb7c3 re-enable the tests in the parser and start the creation of an error type 2021-11-02 17:35:17 +01:00
5a6d22d4ec Merge #407
407: Update version for the next release (v0.20.0) r=curquiza a=curquiza

Breaking because of #405 and #406 

Co-authored-by: Clémentine Urquizar <clementine@meilisearch.com>
2021-10-28 13:43:48 +00:00
08ae47e475 Merge #405
405: Change some error messages r=ManyTheFish a=ManyTheFish



Co-authored-by: many <maxime@meilisearch.com>
2021-10-28 13:35:55 +00:00
056ff13c4d Update version for the next release (v0.20.0) 2021-10-28 14:52:57 +02:00
9f1e0d2a49 Refine asc/desc error messages 2021-10-28 14:47:17 +02:00
ed6db19681 Fix PR comments 2021-10-28 11:18:32 +02:00
9875f2646a Merge #406
406: return document count from builder r=MarinPostma a=MarinPostma

`DocumentBatchBuilder::finish` now returns the number of documents in the batch. This is more compact that calling `len()` just before calling finish.


Co-authored-by: marin postma <postma.marin@protonmail.com>
2021-10-28 08:42:38 +00:00
183d3dada7 return document count from builder 2021-10-28 10:33:04 +02:00
2be755ce75 Lower error check, already check in meilisearch 2021-10-27 19:50:41 +02:00
3599df77f0 Change some error messages 2021-10-27 19:33:01 +02:00
d7943fe225 Merge #402
402: Optimize document transform r=MarinPostma a=MarinPostma

This pr optimizes the transform of documents additions in the obkv format. Instead on accepting any serializable objects, we instead treat json and CSV specifically:
- For json, we build a serde `Visitor`, that transform the json straight into obkv without intermediate representation.
- For csv, we directly write the lines in the obkv, applying other optimization as well.

Co-authored-by: marin postma <postma.marin@protonmail.com>
2021-10-26 09:55:28 +00:00
6758146213 Merge #404
404: remove search crate r=Kerollmops a=MarinPostma

The functionalities of the search crate have been moved to the cli crate. The outstanding files are removed by this pr.


Co-authored-by: marin postma <postma.marin@protonmail.com>
2021-10-26 09:40:34 +00:00
9b8ab40d80 remove search folder 2021-10-26 11:35:49 +02:00
baddd80069 implement review suggestions 2021-10-25 18:29:12 +02:00
f9445c1d90 return float parsing error context in csv 2021-10-25 17:27:10 +02:00
15c29cdd9b Merge #401
401: Update version for the next release (v0.19.0) r=curquiza a=curquiza



Co-authored-by: Clémentine Urquizar <clementine@meilisearch.com>
2021-10-25 12:49:53 +00:00
13d8272173 Merge #403
403: Revert "Replacing pest with nom" r=curquiza a=curquiza

Reverts meilisearch/milli#358

Co-authored-by: Clémentine Urquizar <clementine@meilisearch.com>
2021-10-25 12:16:49 +00:00
208903ddde Revert "Replacing pest with nom " 2021-10-25 11:58:00 +02:00
679fe18b17 Update version for the next release (v0.19.0) 2021-10-25 11:52:17 +02:00
3fcccc31b5 add document builder example 2021-10-25 10:26:43 +02:00
430e9b13d3 add csv builder tests 2021-10-25 10:26:43 +02:00
53c79e85f2 document errors 2021-10-25 10:26:43 +02:00
2e62925a6e fix tests 2021-10-25 10:26:42 +02:00
0f86d6b28f implement csv serialization 2021-10-25 10:26:42 +02:00
8d70b01714 optimize document deserialization 2021-10-25 10:26:42 +02:00
1327807caa add some error messages 2021-10-22 19:00:33 +02:00
c8d03046bf add a check on the fid in the geosearch 2021-10-22 18:08:18 +02:00
3942b3732f re-implement the geosearch 2021-10-22 18:03:39 +02:00
7cd9109e2f lowercase value extracted from Token 2021-10-22 17:50:15 +02:00
4e113bbf1b handle the case of empty input 2021-10-22 17:49:08 +02:00
e25ca9776f start updating the exposed function to makes other modules happy 2021-10-22 17:23:22 +02:00
6c9165b6a8 provide a helper to parse the token but to not handle the errors 2021-10-22 16:52:13 +02:00
efb2f8b325 convert the errors 2021-10-22 16:38:35 +02:00
d6ba84ea99 re introduce the special error type to be able to add context to the errors 2021-10-22 15:09:56 +02:00
c27870e765 integrate a first version without any error handling 2021-10-22 14:33:18 +02:00
01dedde1c9 update some names and move some parser out of the lib.rs 2021-10-22 01:59:38 +02:00
7e5c5c4d27 start a new rewrite of the filter parser 2021-10-22 01:15:42 +02:00
c634d43ac5 add a simple test on the filters with an integer 2021-10-21 17:10:27 +02:00
6c15f50899 rewrite the parser logic 2021-10-21 16:45:42 +02:00
e1d81342cf add test on the or and and operator 2021-10-21 13:01:25 +02:00
423baac08b fix the tests 2021-10-21 12:45:40 +02:00
36281a653f write all the simple tests 2021-10-21 12:40:11 +02:00
f8fe9316c0 Update version for the next release (v0.18.1) 2021-10-21 11:56:14 +02:00
661bc21af5 Fix the filter parser
And add a bunch of tests on the filter::from_array
2021-10-21 11:45:03 +02:00
b6af84eb77 Merge #394
394:  Added search_geo benchmark in cron job r=irevoire a=fumblehool

fixes: #392 
`search_geo` cron will run every friday at 18:30

Co-authored-by: Damanpreet Singh <daman.4880@gmail.com>
2021-10-18 14:33:32 +00:00
7906461c14 Merge #396
396: Fix indexing benchmark GH actions upload filename r=irevoire a=fumblehool

fixes: #393 

Co-authored-by: Damanpreet Singh <daman.4880@gmail.com>
2021-10-18 13:34:10 +00:00
2e4604b0b9 fixed filename for search_* crons 2021-10-18 18:48:38 +05:30
4c34164d2e fixed filename for search_geo cron 2021-10-18 18:43:36 +05:30
9df4f3aaad Merge #397
397: Fix typo in repo r=curquiza a=saintmalik

Fix the single typo found in this repo

Co-authored-by: SaintMalik <37118134+saintmalik@users.noreply.github.com>
2021-10-18 11:59:48 +00:00
513d3178c6 Merge #398
398: Update version for the next release (v0.18.2) r=irevoire a=curquiza

Breaking because of https://github.com/meilisearch/milli/pull/358

Co-authored-by: Clémentine Urquizar <clementine@meilisearch.com>
2021-10-18 11:47:26 +00:00
2209acbfe2 Update version for the next release (v0.18.2) 2021-10-18 13:45:48 +02:00
70121e3c6b fix typo in repo 2021-10-18 04:00:19 +01:00
59cc59e93e Merge #358
358: Replacing pest with nom  r=Kerollmops a=CNLHC



Co-authored-by: 刘瀚骋 <cn_lhc@qq.com>
2021-10-16 20:44:38 +00:00
493d9b98f5 fix indexing benchmark GH actions upload filename 2021-10-16 21:52:36 +05:30
efaef4f748 Added search_geo benchmark in cron job 2021-10-16 21:41:45 +05:30
7666e4f34a follow the suggestions 2021-10-14 21:37:59 +08:00
2ea2f7570c use nightly cargo to format the code 2021-10-14 16:46:13 +08:00
e750465e15 check logic for geolocation. 2021-10-14 16:12:00 +08:00
aa5e099718 Merge #390
390: Add helper methods on the settings r=Kerollmops a=irevoire

This would be a good addition to look at the content of a setting without consuming it.
It’s useful for analytics.

Co-authored-by: Irevoire <tamo@meilisearch.com>
2021-10-13 20:36:30 +00:00
c7db4176f3 Merge #384
384: Replace memmap with memmap2 r=Kerollmops a=palfrey

[memmap is unmaintained](https://rustsec.org/advisories/RUSTSEC-2020-0077.html) and needs replacing. memmap2 is a drop-in replacement fork that's well maintained. Note that the version numbers got reset on fork, hence the lower values.

Co-authored-by: Tom Parker-Shemilt <palfrey@tevp.net>
2021-10-13 13:47:23 +00:00
a3e7c468cd add helper methods on the settings 2021-10-13 13:05:07 +02:00
cd359cd96e WIP: extract the error trait bound to new trait. 2021-10-13 18:04:15 +08:00
5de5dd80a3 WIP: remove '_nom' suffix/redundant error enum/... 2021-10-13 11:06:15 +08:00
2c65781d91 format 2021-10-12 22:20:22 +08:00
6e3b869e6a Merge #388
388: fix primary key inference r=MarinPostma a=MarinPostma

The primary key is was infered from a hashtable index of the field. For this reason the order in which the fields were interated upon was not deterministic, and the primary key was chosed ffrom the first field containing "id".

This fix sorts the the index by field_id when infering the primary key.


Co-authored-by: mpostma <postma.marin@protonmail.com>
2021-10-12 09:25:16 +00:00
86ead92ed5 infer primary key on sorted fields 2021-10-12 11:15:11 +02:00
9a266a531b test correct primary key inference 2021-10-12 11:08:53 +02:00
3f7f24b90e Merge #368
368: Remove limit of 1000 position per attribute r=irevoire a=ManyTheFish

Instead of using an arbitrary limit we encode the absolute position in a u32
using one strong u16 for the field id and a weak u16 for the relative position in the attribute.

- [x] check database size difference

below is the database size difference for each dataset:
![Capture d’écran 2021-09-27 à 18 01 44](https://user-images.githubusercontent.com/6482087/134944199-bd25fed0-6c34-475c-9afc-197871e06553.png)

- [ ] check search time on big dataset


Related to [product#202](https://github.com/meilisearch/product/issues/202)

Co-authored-by: many <maxime@meilisearch.com>
2021-10-12 08:30:33 +00:00
c5a6075484 Make max_position_per_attributes changable 2021-10-12 10:10:50 +02:00
360c5ff3df Remove limit of 1000 position per attribute
Instead of using an arbitrary limit we encode the absolute position in a u32
using one strong u16 for the field id and a weak u16 for the relative position in the attribute.
2021-10-12 10:10:50 +02:00
d323e35001 add a test case 2021-10-12 13:30:40 +08:00
70f576d5d3 error handling 2021-10-12 13:30:40 +08:00
28f9be8d7c support syntax 2021-10-12 13:30:40 +08:00
469d92c569 tweak error handling 2021-10-12 13:30:40 +08:00
7a90a101ee reorganize parser logic 2021-10-12 13:30:40 +08:00
f7796edc7e remove everything about pest 2021-10-12 13:30:40 +08:00
ac1df9d9d7 fix typo and remove pest 2021-10-12 13:30:40 +08:00
50ad750ec1 enhance error handling 2021-10-12 13:30:40 +08:00
8748df2ca4 draft without error handling 2021-10-12 13:30:40 +08:00
8f6b6c9042 Merge #385
385: Fix the wiki indexing benchmark r=ManyTheFish a=irevoire



Co-authored-by: Tamo <tamo@meilisearch.com>
2021-10-11 15:12:24 +00:00
07fb6d64e5 Merge #386
386: fix obkv document r=curquiza a=MarinPostma

When serializing a document, the serializer resolved the field_id of the current field and immediately added it to the obkv document under construction. The issue with that is that obkv expects the fields to be inserted in order, and when a document with out of order fields was added, obkv failed to insert the field.

The current fix first resolves each field_id, and adds all the fields to a temporary `BTreeMap`, until `end` is called on the map serializer, where all the fields are added to the obkv at once, and in order.


Co-authored-by: mpostma <postma.marin@protonmail.com>
2021-10-11 13:45:04 +00:00
e45c846af5 Merge #387
387: Update version for the next release (v0.17.2) r=Kerollmops a=curquiza



Co-authored-by: Clémentine Urquizar <clementine@meilisearch.com>
2021-10-11 13:21:47 +00:00
dd56e82dba Update version for the next release (v0.17.2) 2021-10-11 15:20:35 +02:00
99889a0ed0 add obkv document serialization test 2021-10-11 15:13:17 +02:00
799f3d43c8 fix serialization to obkv format 2021-10-11 15:04:47 +02:00
ed7fd855af fix the wiki indexing benchmark 2021-10-11 14:26:36 +02:00
2dfe24f067 memmap -> memmap2 2021-10-10 22:47:12 +01:00
a2743baaa3 Merge #383
383: Add check on latitude and longitude r=irevoire a=irevoire

Latitudes are not supposed to go beyond 90 degrees or below -90.
The same goes for longitudes with 180 or -180.

This was badly implemented in the filters, and was not implemented for the `AscDesc` rules.

Co-authored-by: Tamo <tamo@meilisearch.com>
Co-authored-by: Irevoire <tamo@meilisearch.com>
2021-10-08 10:15:25 +00:00
b65aa7b5ac Apply suggestions from code review
Co-authored-by: Clément Renault <clement@meilisearch.com>
2021-10-07 17:51:52 +02:00
11dfe38761 Update the check on the latitude and longitude
Latitude are not supposed to go beyound 90 degrees or below -90.
The same goes for longitude with 180 or -180.

This was badly implemented in the filters, and was not implemented for the AscDesc rules.
2021-10-07 16:10:43 +02:00
dde1da1c0e Merge #382
382: Refactor attribute criterion r=Kerollmops a=ManyTheFish

### Re-implement set based algorithm for attribute criterion
#### Levels
Instead of doing level iteration and digging in the interesting level, we only iterate over the lowest level.

#### crossword iteration VS minimal position iteration
Instead of crossing word position in order to iterate strictly over the position that gives the best rank in good order; we iterate word by word starting with the word that increases the rank the little as possible.
This new method is a bit less precise but way simpler.

### Simplify word-level-position database
We don't use levels anymore in the attribute criterion, and so we removed the level complexity of the database making a word-position-docids database.

### Benchmarks on search on big datasets

#### songs main VS refactor-attribute-criterion
```diff
  group                                                   search_songsmain_31c18f09               search_songsrefactor-attribute-criterion_1bd15d84
  -----                                                   -------------------------               -------------------------------------------------
- smol-songs.csv: basic filter: <=/Notstandskomitee       1.00     84.8±0.58µs        ? ?/sec     1.09     92.2±8.98µs        ? ?/sec
+ smol-songs.csv: basic filter: TO/Notstandskomitee       1.18     98.0±6.30µs        ? ?/sec     1.00     83.2±0.97µs        ? ?/sec
+ smol-songs.csv: basic with quote/"david" "bowie"        114.68    76.0±0.20ms        ? ?/sec    1.00    662.5±5.03µs        ? ?/sec
- smol-songs.csv: basic with quote/"john"                 1.00    197.4±1.06µs        ? ?/sec     1.05    208.1±1.53µs        ? ?/sec
+ smol-songs.csv: basic with quote/"michael" "jackson"    2.75      2.0±0.01ms        ? ?/sec     1.00    738.9±3.91µs        ? ?/sec
+ smol-songs.csv: basic without quote/david bowie         297.42  1499.3±0.86ms        ? ?/sec    1.00      5.0±0.02ms        ? ?/sec
+ smol-songs.csv: basic without quote/michael jackson     2.55      8.9±0.02ms        ? ?/sec     1.00      3.5±0.01ms        ? ?/sec
+ smol-songs.csv: big filter/john                         1.08    473.6±2.25µs        ? ?/sec     1.00    438.1±2.59µs        ? ?/sec
- smol-songs.csv: prefix search/a                         1.00    446.9±1.81µs        ? ?/sec     1.79    800.5±4.45µs        ? ?/sec
- smol-songs.csv: prefix search/b                         1.00    398.5±2.74µs        ? ?/sec     1.81    723.1±5.46µs        ? ?/sec
- smol-songs.csv: prefix search/i                         1.00    486.3±1.99µs        ? ?/sec     1.69    823.6±9.42µs        ? ?/sec
- smol-songs.csv: prefix search/s                         1.00    229.6±3.29µs        ? ?/sec     2.59    594.4±2.22µs        ? ?/sec
- smol-songs.csv: prefix search/x                         1.00    150.2±0.76µs        ? ?/sec     1.11    166.0±0.87µs        ? ?/sec
```

On songs, the new algorithm gives a big improvement on slow queries, and is slower on one char prefix search (fast queries <1ms).

#### wiki main VS refactor-attribute-criterion
```diff
  group                                                           search_wikimain_31c18f09               search_wikirefactor-attribute-criterion_1bd15d84
  -----                                                           ------------------------               ------------------------------------------------
- smol-wiki-articles.csv: basic with quote/"rock" "and" "roll"    1.00      3.2±0.01ms        ? ?/sec    1.15      3.7±0.01ms        ? ?/sec
- smol-wiki-articles.csv: basic without quote/film                1.00    351.5±2.47µs        ? ?/sec    1.13    396.8±1.63µs        ? ?/sec
+ smol-wiki-articles.csv: basic without quote/rock and roll       1.10      9.4±0.02ms        ? ?/sec    1.00      8.6±0.04ms        ? ?/sec
- smol-wiki-articles.csv: basic without quote/spain               1.00    446.0±3.23µs        ? ?/sec    1.11    496.6±7.75µs        ? ?/sec
- smol-wiki-articles.csv: prefix search/c                         1.00    115.6±0.61µs        ? ?/sec    2.22    256.7±1.24µs        ? ?/sec
- smol-wiki-articles.csv: prefix search/g                         1.00    189.7±2.03µs        ? ?/sec    1.57    297.0±1.35µs        ? ?/sec
- smol-wiki-articles.csv: prefix search/j                         1.00    209.2±1.11µs        ? ?/sec    1.40    293.0±2.09µs        ? ?/sec
- smol-wiki-articles.csv: prefix search/q                         1.00     79.0±0.44µs        ? ?/sec    1.10     87.2±0.69µs        ? ?/sec
- smol-wiki-articles.csv: prefix search/t                         1.00    270.1±1.15µs        ? ?/sec    1.55    419.9±5.16µs        ? ?/sec
- smol-wiki-articles.csv: prefix search/x                         1.00    244.9±1.33µs        ? ?/sec    1.07    260.9±1.95µs        ? ?/sec
- smol-wiki-articles.csv: words/Abraham machin                    1.00      8.1±0.03ms        ? ?/sec    1.17      9.4±0.02ms        ? ?/sec
- smol-wiki-articles.csv: words/Idaho Bellevue pizza              1.00     19.3±0.07ms        ? ?/sec    1.07     20.6±0.05ms        ? ?/sec
```
On wiki we have some regressions `+17%` and `+15%` on request `>1ms`.

Co-authored-by: many <maxime@meilisearch.com>
2021-10-06 09:19:33 +00:00
085bc6440c Apply PR comments 2021-10-06 11:12:26 +02:00
1bd15d849b Reduce candidates threshold 2021-10-05 18:52:14 +02:00
ea4bd29d14 Apply PR comments 2021-10-05 17:35:07 +02:00
5ed75de0db Update infos crate 2021-10-05 13:56:12 +02:00
3296bb243c Simplify word level position DB into a word position DB 2021-10-05 12:15:02 +02:00
75d341d928 Re-implement set based algorithm for attribute criterion 2021-10-05 12:14:50 +02:00
31c18f0953 Merge #381
381: Update version for the next release (v0.17.1) r=irevoire a=curquiza



Co-authored-by: Clémentine Urquizar <clementine@meilisearch.com>
2021-10-03 02:12:43 +00:00
05d8a33a28 Update version for the next release (v0.17.1) 2021-10-02 16:21:31 +02:00
c9092c72bf Merge #380
380: Reserved keyword error message r=Kerollmops a=irevoire

And I missed _another_ reserved keyword error message in the filter :(

Co-authored-by: Tamo <tamo@meilisearch.com>
2021-10-01 07:13:31 +00:00
d9eba9d145 improve and test the sort error message 2021-09-30 14:38:27 +02:00
0ee67bb7d1 improve the reserved keyword error message for the filters 2021-09-30 14:38:27 +02:00
22551d0941 Merge #379
379: Revert "Change chunk size to 4MiB to fit more the end user usage" r=curquiza a=ManyTheFish

Reverts meilisearch/milli#370

Co-authored-by: Many <legendre.maxime.isn@gmail.com>
2021-09-29 13:20:53 +00:00
26b5dad042 Revert "Change chunk size to 4MiB to fit more the end user usage" 2021-09-29 15:08:39 +02:00
6a057a3bd0 Merge #378
378: Hotfix meilisearch#1707 r=Kerollmops a=ManyTheFish

This PR contains an ugly quick fix of [meilisearch#1707](https://github.com/meilisearch/MeiliSearch/issues/1707).

- remove comparison reverse on rank. Enhancing relevancy and performances
- iterate over level 0 only. Enhancing performances.

A better fix is in development.

Co-authored-by: many <maxime@meilisearch.com>
Co-authored-by: Many <legendre.maxime.isn@gmail.com>
2021-09-29 12:57:31 +00:00
2e49230ca2 Update milli/src/search/criteria/attribute.rs
Co-authored-by: Clément Renault <clement@meilisearch.com>
2021-09-29 14:49:45 +02:00
7ad0214089 Update milli/src/search/criteria/attribute.rs
Co-authored-by: Clément Renault <clement@meilisearch.com>
2021-09-29 14:49:41 +02:00
1df5b8712b Hotfix meilisearch#1707 2021-09-29 14:41:56 +02:00
bfedbc1b6d Merge #374
374: Enhance CSV document parsing r=Kerollmops a=ManyTheFish

Benchmarks on `search_songs` were crashing because of the CSV parsing.

Co-authored-by: many <maxime@meilisearch.com>
2021-09-29 08:55:54 +00:00
68c758a533 Merge #376
376: Stop casting integer docids to string r=Kerollmops a=irevoire

When a docid is an integer, we stop casting it to a string, and thus we don't add `"` around it.

Co-authored-by: Tamo <tamo@meilisearch.com>
2021-09-29 08:32:48 +00:00
d2427f18e5 Enhance CSV document parsing 2021-09-29 10:25:33 +02:00
00f94b1ffd Merge #377
377: Update version for the next release (v0.17.0) r=Kerollmops a=curquiza



Co-authored-by: Clémentine Urquizar <clementine@meilisearch.com>
2021-09-28 20:43:33 +00:00
0e8665bf18 Update version for the next release (v0.17.0) 2021-09-28 19:38:12 +02:00
f65153ad64 stop casting integer docids to string 2021-09-28 18:35:54 +02:00
adddf3f179 Merge #375
375: Fixes #365 r=Kerollmops a=vishnugt



Co-authored-by: Vishnu Ganesan <vganesan@microsoft.com>
Co-authored-by: Vishnu Gt <vishnugt@hotmail.com>
2021-09-28 14:42:48 +00:00
785c1372f2 Change "settings" to "setting"
Co-authored-by: Clément Renault <renault.cle@gmail.com>
2021-09-28 20:11:32 +05:30
3580b2d803 Fixes #365 2021-09-28 19:30:23 +05:30
3a12f5887e Merge #373
373: Improve error message for bad sort syntax with geosearch r=Kerollmops a=irevoire

`@Kerollmops` This should be the last PR for the geosearch and error handling, sorry for doing it in so many steps 😬 

Co-authored-by: Tamo <tamo@meilisearch.com>
2021-09-28 12:39:32 +00:00
a80dcfd4a3 improve error message for bad sort syntax with geosearch 2021-09-28 14:32:24 +02:00
b2a332599e Merge #372
372: Fix Meilisearch 1714 r=Kerollmops a=ManyTheFish

The bug comes from the typo tolerance, to know how many typos are accepted we were counting bytes instead of characters in a word.
On Chinese Script characters, we were allowing  2 typos on 3 characters words.
We are now counting the number of char instead of counting bytes to assign the typo tolerance.

Related to [Meilisearch#1714](https://github.com/meilisearch/MeiliSearch/issues/1714)

Co-authored-by: many <maxime@meilisearch.com>
2021-09-28 11:59:45 +00:00
8046ae4bd5 Count the number of char instead of counting bytes to assign the typo tolerance 2021-09-28 12:10:43 +02:00
1988416295 Add failing test related to Meilisearch#1714 2021-09-28 12:05:11 +02:00
3b479948c6 Merge #371
371: Provide a sort error handler r=Kerollmops a=irevoire

This PR simplify the error handling of asc-desc rules for Meilisearch or any other wrapper by providing directly in milli a new error type called `SortError` that can be generated from an `AscDescError` and that can be automatically converted to a `UserError`.

Basically now, wherever you are in the code as a user or in milli you can parse an `AscDesc` syntax and depending on the context, cast it either as a `SortError` or a `CriterionError` in one line with improved error messages.

Co-authored-by: Tamo <tamo@meilisearch.com>
2021-09-28 09:28:32 +00:00
cc732fe95e update http-ui to use the sort-error 2021-09-28 11:15:24 +02:00
c7cb816ae1 simplify the error handling of the sort syntax for meilisearch 2021-09-27 19:07:22 +02:00
4c09f6838f Merge #370
370: Change chunk size to 4MiB to fit more the end user usage r=ManyTheFish a=ManyTheFish

We made several indexing tests using different sizes of datasets (5 datasets from 9MiB to 100MiB) on several typologies of VMs (`XS: 1GiB RAM, 1 VCPU`, `S: 2GiB RAM, 2 VCPU`, `M: 4GiB RAM, 3 VCPU`, `L: 8GiB RAM, 4 VCPU`).
The result of these tests shows that the `4MiB` chunk size seems to be the best size compared to other chunk sizes (`2Mib`, `4MiB`, `8Mib`, `16Mib`,  `32Mib`, `64Mib`, `128Mib`).

below is the average time per chunk size:

![Capture d’écran 2021-09-27 à 14 27 50](https://user-images.githubusercontent.com/6482087/134909368-ef0bc45e-68d5-49d1-aaf9-91113b7c410f.png)

<details>
<summary>Detailled data</summary>
<br>

![Capture d’écran 2021-09-27 à 14 39 48](https://user-images.githubusercontent.com/6482087/134909952-a36b1457-bbbd-4a6c-bbe5-519e4b926b5a.png)
</br>
</details> 


Co-authored-by: many <maxime@meilisearch.com>
2021-09-27 12:57:52 +00:00
b188063869 Change chunk size to 4MiB to fit more the end user usage 2021-09-27 14:26:21 +02:00
0f8320bdc2 Merge #369
369: Add test checking the bug reported in meilisearch issue 1716 r=Kerollmops a=ManyTheFish

The bug is not present in the newer milli version.

Related to [Meilisearch#1716](https://github.com/meilisearch/MeiliSearch/issues/1716)

Co-authored-by: many <maxime@meilisearch.com>
2021-09-23 14:27:34 +00:00
551df0cb77 Add test checking the bug reported in meilisearch issue 1716 2021-09-23 15:55:39 +02:00
87dd441a3a Merge #367
367: Update version for the next release (v0.16.0) r=Kerollmops a=curquiza



Co-authored-by: Clémentine Urquizar <clementine@meilisearch.com>
2021-09-22 15:20:20 +00:00
1eacab2169 Update version for the next release (v0.15.1) 2021-09-22 17:18:54 +02:00
b806097141 Merge #366
366: Geosearch error handling r=Kerollmops a=irevoire

Rewrite most of geosearch error handling and another batch of tests on the criterion parsing.

Co-authored-by: Tamo <tamo@meilisearch.com>
Co-authored-by: Irevoire <tamo@meilisearch.com>
2021-09-22 15:08:11 +00:00
218f0a6661 Apply suggestions from code review
Co-authored-by: Clément Renault <clement@meilisearch.com>
2021-09-22 17:00:27 +02:00
47ee93b0bd return an error when _geoPoint is used but _geo is not sortable 2021-09-22 16:37:41 +02:00
1e5e3d57e2 auto convert AscDescError into CriterionError 2021-09-22 16:37:41 +02:00
023446ecf3 create a smaller and easier to maintain CriterionError type 2021-09-22 16:37:41 +02:00
86e272856a create an asc_desc error type that is never supposed to be returned to the end user 2021-09-22 16:37:41 +02:00
257e621d40 create an asc_desc module 2021-09-22 16:37:41 +02:00
113a061bee fix the error handling on the criterion side 2021-09-22 15:09:07 +02:00
ad3befaaf5 Merge #364
364: Fix all the benchmarks  r=Kerollmops a=irevoire

#324 broke all benchmarks.
I fixed everything and noticed that `cargo check --all` was insufficient to check the bench in multiple workspaces, so I also updated the CI to use `cargo check --workspace --all-targets`.

Co-authored-by: Tamo <tamo@meilisearch.com>
2021-09-22 12:40:34 +00:00
176160d32f fix all benchmarks and add the compile time checking of the benhcmarks in the ci 2021-09-22 12:10:21 +02:00
16790ee620 Merge #363
363: Fix the returned `AscDesc` error r=Kerollmops a=irevoire

With my previous PR on the geosearch I erased the change I've introduced with my pre-previous PR about the new error type when we fail to parse the `AscDesc` type.

Sorry for that, here is the fix

Co-authored-by: Tamo <tamo@meilisearch.com>
2021-09-22 09:53:35 +00:00
78b0bce9a1 fix the returned error when asc desc fails to be parsed 2021-09-22 11:37:05 +02:00
2837cab5da Merge #362
362: Remove the `Cargo.lock` again r=Kerollmops a=irevoire



Co-authored-by: Tamo <tamo@meilisearch.com>
2021-09-22 09:33:09 +00:00
2e99fa8251 remove the cargo.lock again 2021-09-22 11:30:33 +02:00
fe9f380993 Merge #361
361: Update version for the next release (v0.15.0) r=Kerollmops a=curquiza



Co-authored-by: Clémentine Urquizar <clementine@meilisearch.com>
2021-09-21 16:19:16 +00:00
f8ecbc28e2 Update version for the next release (v0.15.0) 2021-09-21 18:09:14 +02:00
700318dc62 Merge #357
357: Add benchmarks for the geosearch r=Kerollmops a=irevoire

closes #336

Should I merge this PR in #322 and then we merge everything in `main` or should we wait for #322 to be merged and then merge this one in `main` later?

Co-authored-by: Tamo <tamo@meilisearch.com>
Co-authored-by: Irevoire <tamo@meilisearch.com>
2021-09-21 16:08:06 +00:00
9d9010e45f Merge #324
324: Implement documents API r=Kerollmops a=MarinPostma

This pr implement the intermediary document representation for milli. The JSON, JSONL and CSV formats are replaced with the format instead, to push the serialization duty on the client side.

The `documents` module contains the interface to the new document format:

- The `DocumentsBuilder` allows the creation of a writer backed document addition, when documents are added either one by one, or as arrays of depth 1. This is made possible by the fact that the seriliazer used by the `add_documents` methods only accepts `[Object]` and `Object`. The related serialization logic is located in the `serde.rs` file.
- The `DocumentsReader` allows to to iterate over the documents created by a `DocumentsBuilder`. A call to `next_document_with_index` returns the next obkv reader in the document addition, along with a reference to the index used to map the field ids in the obkv reader to the field names

All references to json, jsonl or csv in the tests have been replaced with the `documents!` macro, works exaclty like the `serde_json::json` macro, as a convenient way to create a `DocumentsReader`.

Rewrote the search cli, to the `cli` crate, to also allow index manipulation. This only offers basic functionalities for now, but is meant to be easier to extend than http ui


blocked by #308

Co-authored-by: mpostma <postma.marin@protonmail.com>
2021-09-21 15:40:03 +00:00
aa6c5df0bc Implement documents format
document reader transform

remove update format

support document sequences

fix document transform

clean transform

improve error handling

add documents! macro

fix transform bug

fix tests

remove csv dependency

Add comments on the transform process

replace search cli

fmt

review edits

fix http ui

fix clippy warnings

Revert "fix clippy warnings"

This reverts commit a1ce3cd96e603633dbf43e9e0b12b2453c9c5620.

fix review comments

remove smallvec in transform loop

review edits
2021-09-21 16:58:33 +02:00
94764e5c7c Merge #360
360: Update version for the next release (v0.14.0) r=Kerollmops a=curquiza

Release containing the geosearch, cf #322 

Co-authored-by: Clémentine Urquizar <clementine@meilisearch.com>
2021-09-21 08:43:27 +00:00
31c8de1cca Merge #322
322: Geosearch r=ManyTheFish a=irevoire

This PR introduces [basic geo-search functionalities](https://github.com/meilisearch/specifications/pull/59), it makes the engine able to index, filter and, sort by geo-point. We decided to use [the rstar library](https://docs.rs/rstar) and to save the points in [an RTree](https://docs.rs/rstar/0.9.1/rstar/struct.RTree.html) that we de/serialize in the index database [by using serde](https://serde.rs/) with [bincode](https://docs.rs/bincode). This is not an efficient way to query this tree as it will consume a lot of CPU and memory when a search is made, but at least it is an easy first way to do so.

### What we will have to do on the indexing part:
 - [x] Index the `_geo` fields from the documents.
   - [x] Create a new module with an extractor in the `extract` module that takes the `obkv_documents` and retrieves the latitude and longitude coordinates, outputting them in a `grenad::Reader` for further process.
   - [x] Call the extractor in the `extract::extract_documents_data` function and send the result to the `TypedChunk` module.
   - [x] Get the `grenad::Reader` in the `typed_chunk::write_typed_chunk_into_index` function and store all the points in the `rtree`
- [x] Delete the documents from the `RTree` when deleting documents from the database. All this can be done in the `delete_documents.rs` file by getting the data structure and removing the points from it, inserting it back after the modification.
- [x] Clearing the `RTree` entirely when we clear the documents from the database, everything happens in the `clear_documents.rs` file.
- [x] save a Roaring bitmap of all documents containing the `_geo` field

### What we will have to do on the query part:
- [x] Filter the documents at a certain distance around a point, this is done by [collecting the documents from the searched point](https://docs.rs/rstar/0.9.1/rstar/struct.RTree.html#method.nearest_neighbor_iter) while they are in range.
  - [x] We must introduce new `geoLowerThan` and `geoGreaterThan` variants to the `Operator` filter enum.
  - [x] Implement the `negative` method on both variants where the `geoGreaterThan` variant is implemented by executing the `geoLowerThan` and removing the results found from the whole list of geo faceted documents.
  - [x] Add the `_geoRadius` function in the pest parser.
- [x] Introduce a `_geo` ascending ranking function that takes a point in parameter, ~~this function must keep the iterator on the `RTree` and make it peekable~~ This was not possible for now, we had to collect the whole iterator. Only the documents that are part of the candidates must be sent too!
  - [x] This ascending ranking rule will only be active if the search is set up with the `_geoPoint` parameter that indicates the center point of the ascending ranking rule.

-----------

- On Meilisearch part: We must introduce a new concept, returning the documents with a new `_geoDistance` field when it passed by the `_geo` ranking rule, this has never been done before. We could maybe just do it afterward when the documents have been retrieved from the database, computing the distance from the `_geoPoint` and all of the documents to be returned.

Co-authored-by: Irevoire <tamo@meilisearch.com>
Co-authored-by: cvermand <33010418+bidoubiwa@users.noreply.github.com>
Co-authored-by: Tamo <tamo@meilisearch.com>
2021-09-20 19:04:57 +00:00
0d104a0fce Update milli/src/criterion.rs
Co-authored-by: Clément Renault <clement@meilisearch.com>
2021-09-20 18:13:17 +02:00
3f1453f470 Update version for the next release (v0.14.0) 2021-09-20 18:12:23 +02:00
f4b8e5675d move the reserved keyword logic for the criterion and sort + add test 2021-09-20 17:21:02 +02:00
3b7a2cdbce fix typo
Co-authored-by: Clément Renault <clement@meilisearch.com>
2021-09-20 16:10:39 +02:00
203aa727a7 Merge #359
359: Improve the benchmark comparison script r=irevoire a=irevoire

This modification allow us to compare more than 2 benchmarks or to only print the results of one benchmark



Co-authored-by: Irevoire <tamo@meilisearch.com>
Co-authored-by: Tamo <tamo@meilisearch.com>
2021-09-20 12:39:59 +00:00
eaba772f21 update the README to better match the new critcmp usage
Co-authored-by: Clémentine Urquizar <clementine@meilisearch.com>
2021-09-20 10:59:55 +02:00
9a920d1f93 Fix datasets links in the readme
Co-authored-by: Clémentine Urquizar <clementine@meilisearch.com>
2021-09-20 10:44:37 +02:00
5e683ba472 add benchmarks for the geosearch 2021-09-20 10:44:37 +02:00
f6c6b026bb improve the comparison script 2021-09-16 11:25:51 +02:00
c695a1ffd2 add the possibility to sort by descending order on geoPoint 2021-09-15 11:49:58 +02:00
91ce4d1721 Stop iterating through the whole list of points
We stop when there is no possible candidates left
2021-09-15 11:49:58 +02:00
3b1885859d Merge #356
356: Update the README r=curquiza a=Kerollmops

This PR updates a little bit the README and more specifically the indexing times, fixes #352.

Co-authored-by: Kerollmops <clement@meilisearch.com>
2021-09-14 10:13:05 +00:00
2741aa8589 Update the indexing timings in the README 2021-09-14 11:42:59 +02:00
a43f99c600 Inform the users that documents must have an id in there documents 2021-09-13 14:01:02 +02:00
90d64d257f Merge #354
354: Update version for the next release (v0.13.1) r=ManyTheFish a=curquiza



Co-authored-by: Clémentine Urquizar <clementine@meilisearch.com>
2021-09-13 09:30:07 +00:00
f167f7b412 Update version for the next release (v0.13.1) 2021-09-10 09:48:17 +02:00
4af31ec9a6 Merge #353
353: Add lacking parameter to word level position builder r=Kerollmops a=ManyTheFish



Co-authored-by: many <maxime@meilisearch.com>
2021-09-09 16:36:33 +00:00
cfc62a1c15 use geoutils instead of haversine 2021-09-09 18:11:38 +02:00
26deeb45a3 Add lacking parameter to word level position builder 2021-09-09 17:49:04 +02:00
3fc145c254 if we have no rtree we return all other provided documents 2021-09-09 17:44:09 +02:00
a84f3a8b31 Apply suggestions from code review
Co-authored-by: Clément Renault <clement@meilisearch.com>
2021-09-09 15:09:35 +02:00
c81ff22c5b delete the invalid criterion name error in favor of invalid ranking rule name 2021-09-08 19:17:00 +02:00
bad8ea47d5 edit the two lasts TODO comments 2021-09-08 18:24:09 +02:00
b15c77ebc4 return an error in case a user try to sort with :desc 2021-09-08 18:24:09 +02:00
4b618b95e4 rebase on main 2021-09-08 18:24:09 +02:00
2988d3c76d tests the geo filters 2021-09-08 18:24:09 +02:00
e5ef0cad9a use meters in the filters 2021-09-08 18:24:09 +02:00
4f69b190bc remove the distance from the search, the computation of the distance will be made on meilisearch side 2021-09-08 18:24:09 +02:00
7ae2a7341c introduce the reserved keywords in the filters 2021-09-08 18:24:09 +02:00
6d5762a6c8 handle the case where you forgot entirely the parenthesis 2021-09-08 18:24:09 +02:00
ebf82ac28c improve the error messages and add tests for the filters 2021-09-08 18:24:09 +02:00
bd4c248292 improve the error handling in general and introduce the concept of reserved keywords 2021-09-08 18:24:09 +02:00
e8c093c1d0 fix the error handling in the filters 2021-09-08 18:24:09 +02:00
f0b74637dc fix all the tests 2021-09-08 18:24:09 +02:00
b1bf7d4f40 reformat 2021-09-08 18:24:09 +02:00
aca707413c remove the memory leak 2021-09-08 18:24:09 +02:00
a8a1f5bd55 move the geosearch criteria out of asc_desc.rs 2021-09-08 18:24:09 +02:00
dc84ecc40b fix a bug 2021-09-08 18:24:09 +02:00
7483614b75 [HTTP-UI] add the sorters 2021-09-08 18:24:09 +02:00
4820ac71a6 allow spaces in a geoRadius 2021-09-08 18:24:09 +02:00
13c78e5aa2 Implement the _geoPoint in the sortable 2021-09-08 18:24:09 +02:00
5bb175fc90 only index _geo if it's set as sortable OR filterable
and only allow the filters if geo was set to filterable
2021-09-08 17:51:08 +02:00
f73273d71c only call the extractor if needed 2021-09-08 17:51:08 +02:00
4fd0116a0d Stringify objects on dashboard to avoid [Object object] 2021-09-08 17:51:08 +02:00
ea2f2ecf96 create a new database containing all the documents that were geo-faceted 2021-09-08 17:51:08 +02:00
4b459768a0 create the _geoRadius filter 2021-09-08 17:51:07 +02:00
6d70978edc update the facet filter grammar 2021-09-08 17:51:07 +02:00
216a8aa3b2 add a tests for the indexation of the geosearch 2021-09-08 17:51:07 +02:00
a21c854790 handle errors 2021-09-08 17:51:07 +02:00
70ab2c37c5 remove multiple bugs 2021-09-08 17:51:07 +02:00
b4b6ba6d82 rename all the ’long’ into ’lng’ like written in the specification 2021-09-08 17:51:07 +02:00
3b9f1db061 implement the clear of the rtree 2021-09-08 17:51:07 +02:00
d344489c12 implement the deletion of geo points 2021-09-08 17:51:07 +02:00
44d6b6ae9e Index the geo points 2021-09-08 17:51:07 +02:00
8d9c2c4425 create a new db with getters and setters 2021-09-08 17:51:07 +02:00
b22aac92ac Merge #342
342: Let the caller decide what kind of error they want to returns when parsing `AscDesc` r=Kerollmops a=irevoire

This is one possible fix for #339 
We would then need to patch these lines https://github.com/meilisearch/MeiliSearch/blob/main/meilisearch-http/src/index/search.rs#L110-L114 to return the error we want.

Another solution would be to add a parameter to the `from_str` to specify which context we are in.

Co-authored-by: Tamo <tamo@meilisearch.com>
2021-09-08 14:18:57 +00:00
932998f5cc let the caller decide if they want to return an invalidSortName or an
invalidCriterionName error
2021-09-08 16:17:31 +02:00
86c3b0c8c2 Merge #350
350: Fix mdb val size error r=Kerollmops a=ManyTheFish

Related to [#1677](https://github.com/meilisearch/MeiliSearch/issues/1677)

Co-authored-by: many <maxime@meilisearch.com>
2021-09-08 13:32:15 +00:00
e54280fbfc Skip empty normalized words 2021-09-08 15:25:23 +02:00
d18ee58ab9 Check if key are not empty in validator 2021-09-08 15:25:23 +02:00
63bc231243 Merge #349
349: Enable the grenad tempfile feature back r=ManyTheFish a=Kerollmops

This PR enables the grenad `tempfile` feature back, [when this is feature is disabled the sorter writes the entries in memory](7c082d05bf/src/sorter.rs (L470-L476)) instead of on disk and therefore, consumes more memory. By enabling this feature grenad merges on disk by using the `tempfile` dependency.

This PR also bumps milli to v0.3.1 where `@ManyTheFish` added an assert for when the allocator can't allocate and disable the default snappy compression in the `http-ui` crate.

Co-authored-by: Kerollmops <clement@meilisearch.com>
2021-09-08 13:23:57 +00:00
68856e5e2f Disable the default snappy compression for the http-ui crate 2021-09-08 14:17:32 +02:00
8a088fb99e Bump grenad to v0.3.1 2021-09-08 14:08:55 +02:00
20ad43b908 Enable the grenad tempfile feature back 2021-09-08 14:06:28 +02:00
772e55d174 Merge #347
347: Update version for the next release (v0.13.0) r=curquiza a=curquiza



Co-authored-by: Clémentine Urquizar <clementine@meilisearch.com>
2021-09-08 11:41:15 +00:00
d160305868 Merge #348
348: Drop sorter before creating a new one r=Kerollmops a=ManyTheFish



Co-authored-by: many <maxime@meilisearch.com>
2021-09-08 11:34:20 +00:00
9961b78b06 Drop sorter before creating a new one 2021-09-08 13:30:26 +02:00
eb7b9d9dbf Update version for the next release (v0.13.0) 2021-09-08 10:59:30 +02:00
f5e418ace7 Merge #345
345: Better dependencies cache for CI r=irevoire a=shekhirin



Co-authored-by: Alexey Shekhirin <a.shekhirin@gmail.com>
2021-09-08 08:43:19 +00:00
48d211b8b0 Merge #344
344: Move the sort ranking rule before the exactness ranking rule r=ManyTheFish a=Kerollmops

This PR moves the sort ranking rule at the 5th position by default, right before the exactness one.

Co-authored-by: Kerollmops <clement@meilisearch.com>
2021-09-07 15:47:15 +00:00
dbd91e7151 chore(ci): use smarter dependencies cache 2021-09-07 18:16:33 +03:00
720becb5e8 Merge #341
341: Throw a query time error when a sort parameter is used but the sort ranking rule is missing r=Kerollmops a=Kerollmops

This PR makes the engine throw an error for when the ranking rules don't contain the `sort` rule, the `sortable_fields` are correctly set but the user tries to use the `sort` query parameter. Doing so will have no effect on the returned documents so we preferred returning an error to help debug this.

That's breaking on the MeiliSearch side as we added a new variant to the `UserError` enum.

Co-authored-by: Kerollmops <clement@meilisearch.com>
2021-09-07 14:45:05 +00:00
e2cefc9b4f Move the sort ranking rule before the exactness ranking rule 2021-09-07 16:41:33 +02:00
a0b3620b05 Merge #346
346: remove unused grenad default features r=Kerollmops a=MarinPostma

Milli is not using any of grenad default features, and it's zstd feature creates conflict with meilisearch. This pr simply remove the unused features.


Co-authored-by: mpostma <postma.marin@protonmail.com>
2021-09-07 14:30:20 +00:00
cd043d4461 remove unused grenad default features 2021-09-07 16:21:46 +02:00
5989528833 Add a test to make sure we throw the right error message 2021-09-07 11:02:00 +02:00
fd3daa4423 Throw a query time error when a sort param is used but sort ranking rule is missing 2021-09-07 11:02:00 +02:00
8dca36433c Introduce the new SortRankingRuleMissing user error variant 2021-09-07 11:01:59 +02:00
446ed17589 Merge #338
338: Fix string fields sorting r=Kerollmops a=shekhirin

Resolves https://github.com/meilisearch/milli/issues/333

<details>
  <summary>curl checks</summary>
  
 ```console
➜  ~ curl -s 'localhost:7700/indexes/movies/search' -d '{"sort": ["title:asc"], "limit": 30}' | jq -r '.hits | map(.title)[]'
#1 Cheerleader Camp
#Horror
#RealityHigh
#Roxy
#SquadGoals
$5 a Day
$9.99
'71
(2)
(500) Days of Summer
(Girl)Friend
*batteries not included
...And God Created Woman
...And Justice for All
...E fuori nevica!
.45
1
1 Mile To You
1 Night
10
10 Cloverfield Lane
10 giorni senza mamma
10 Items or Less
10 Rillington Place
10 Rules for Sleeping Around
10 Things I Hate About You
10 to Midnight
10 Years
10,000 BC
10,000 Saints

➜  ~ curl -s 'localhost:7700/indexes/movies/search' -d '{"sort": ["title:desc"], "limit": 30}' | jq -r '.hits | map(.title)[]'
크게 될 놈
왓칭
뷰티플 마인드
노무현과 바보들
ハニー
Счастье – это… Часть 2
СОТКА
Смотри мою любовь
Позивний 'Бандерас'
Лошо момиче
Күлүк Хомус
Куда течет море
Каникулы президента
Ακίνητο Ποτάμι
Üç Harfliler: Beddua
È nata una Star?
Æon Flux
Ága
À propos de Nice
À Nos Amours
À l'aventure
¡Three Amigos!
Zulu Dawn
Zulu
Zulu
Zu: Warriors from the Magic Mountain
Zu Warriors
Zorro
Zorba the Greek
Zootopia
```
</details>

Co-authored-by: Alexey Shekhirin <a.shekhirin@gmail.com>
2021-09-07 08:28:23 +00:00
0be09555f1 test(search): asc/desc criteria for large datasets 2021-09-03 18:00:08 +03:00
c2517e7d5f fix(facet): string fields sorting 2021-09-03 11:58:26 +03:00
5cbe879325 Merge #308
308: Implement a better parallel indexer r=Kerollmops a=ManyTheFish

Rewrite the indexer:
- enhance memory consumption control
- optimize parallelism using rayon and crossbeam channel
- factorize the different parts and make new DB implementation easier
- optimize and fix prefix databases


Co-authored-by: many <maxime@meilisearch.com>
2021-09-02 15:03:52 +00:00
741a4444a9 Remove log in chunk generator 2021-09-02 16:57:46 +02:00
7f7fafb857 Make document_chunk_size settable from update builder 2021-09-02 15:25:39 +02:00
db0c681bae Fix Pr comments 2021-09-02 15:17:52 +02:00
46f7df232a Merge #337
337: Update version for the next release (v0.12.0) r=Kerollmops a=curquiza

Breaking because of the new indexer that implies DB changes #308 

Co-authored-by: Clémentine Urquizar <clementine@meilisearch.com>
2021-09-02 10:13:31 +00:00
285849e3a6 Update version for the next release (v0.12.0) 2021-09-02 10:08:41 +02:00
a589f6c60b Merge #335
335: Get sortable_fields from index only if criteria present in query r=Kerollmops a=shekhirin

Seems like we don't need to retrieve `sortable_fields` from the index if there's no any `sort_criteria` in the query.

Small 🤏  optimization opportunity out there.

Co-authored-by: Alexey Shekhirin <a.shekhirin@gmail.com>
2021-09-01 16:01:00 +00:00
3e0a78acf3 Merge #329
329: Run all benchmarks once every friday r=irevoire a=irevoire

All the benchmarks run every Friday on the `main` branch.
To avoid having pending benchmarks everywhere, we execute one benchmark every 8 hours.
Then the results are uploaded as if it was a normal user-run benchmark.

This PR closes #314 and #321 

Co-authored-by: Irevoire <tamo@meilisearch.com>
Co-authored-by: Tamo <tamo@meilisearch.com>
2021-09-01 15:20:49 +00:00
4860fd4529 Ignore empty facet values 2021-09-01 16:48:40 +02:00
b3a22f31f6 Fix memory consuption in word pair proximity extractor 2021-09-01 16:48:40 +02:00
9452fabfb2 Optimize cbo roaring bitmaps merge 2021-09-01 16:48:40 +02:00
8f702828ca Ignore errors comming from crossbeam channel senders 2021-09-01 16:48:40 +02:00
e09eec37bc Handle distance addition with hard separators 2021-09-01 16:48:40 +02:00
fc7cc770d4 Add logging timers 2021-09-01 16:48:40 +02:00
a2f59a28f7 Remove unwrap sending errors in channel 2021-09-01 16:48:40 +02:00
5c962c03dd Fix and optimize word_prefix_pair_proximity_docids database 2021-09-01 16:48:40 +02:00
2d1727697d Take stop word in account 2021-09-01 16:48:40 +02:00
823da19745 Fix test and use progress callback 2021-09-01 16:48:39 +02:00
1d314328f0 Plug new indexer 2021-09-01 16:48:36 +02:00
3aaf1d62f3 Publish grenad CompressionType type in milli 2021-09-01 16:42:08 +02:00
0e379558a1 fix(search): get sortable_fields only if criteria present 2021-08-31 21:35:41 +03:00
d6bba0663a Merge #334
334: Wrap long values into BStr for warn logs r=Kerollmops a=shekhirin

Resolves https://github.com/meilisearch/milli/issues/263

Co-authored-by: Alexey Shekhirin <a.shekhirin@gmail.com>
2021-08-31 17:38:54 +00:00
0b02eb456c chore(update): wrap long values into BStr for warn logs 2021-08-31 20:28:16 +03:00
df38794c7d Merge #330
330: Introduce the reset_sortable_fields Settings method r=irevoire a=Kerollmops

I forgot to add the `reset_sortable_fields` method on the `Settings` builder, it is no big deal as the library user (like MeiliSearch) can always call `set_sortable_fields` with an empty list of fields, it is equivalent.

Co-authored-by: Kerollmops <clement@meilisearch.com>
Co-authored-by: Tamo <tamo@meilisearch.com>
Co-authored-by: bors[bot] <26634292+bors[bot]@users.noreply.github.com>
2021-08-30 23:26:54 +00:00
6cdb6722d1 Merge #332
332: Sortable attributes in http-ui r=Kerollmops a=irevoire

- Add a `reset_sortable_attribute` method
- Add the `sortable_attributes` to http-ui
- Fix some broken test in http-ui

Co-authored-by: Tamo <tamo@meilisearch.com>
2021-08-30 15:31:05 +00:00
d106eb5b90 add the sortable attributes to http-ui and fix the tests 2021-08-30 16:25:10 +02:00
5e639bc0c1 postfix all action name with (cron) 2021-08-30 13:55:00 +02:00
49a6d2d5f1 run all benchmarks once every friday 2021-08-30 13:55:00 +02:00
f230ae6fd5 Introduce the reset_sortable_fields Settings method 2021-08-25 17:44:16 +02:00
c8930781eb Merge #328
328: Remove `beta` compilation in CI r=Kerollmops a=shekhirin

Resolves https://github.com/meilisearch/milli/issues/326

Co-authored-by: Alexey Shekhirin <a.shekhirin@gmail.com>
2021-08-25 08:45:18 +00:00
01461af333 chore(ci): remove Rust beta from tests job 2021-08-24 22:18:13 +03:00
c51bb6789c Merge #325
325: Update milli version to v0.11.0 r=curquiza a=Kerollmops

This PR also clean-up some dependencies in the Cargo.toml.

Co-authored-by: Kerollmops <clement@meilisearch.com>
2021-08-24 16:18:49 +00:00
af65485ba7 Reexport the grenad CompressionType from milli 2021-08-24 18:15:31 +02:00
f2e1591826 Remove the unused tinytemplate dependency 2021-08-24 18:10:58 +02:00
2f20257070 Update milli to the v0.11.0 2021-08-24 18:10:11 +02:00
794c0f64a9 Merge #315
315: Rewrite the indexing benchmarks r=Kerollmops a=irevoire

There was a panic on the benchmark and while I was trying to understand what was happening I decided to rewrite the way the benchmarks were working.

Before we were creating a database with the good setting, and then for each benchmarks we were:
1. Deleting all documents in the database
2. Indexing a batch of documents

Now for each iteration we recreate entirely a new database from scratch.
Since deleting all the documents in a database may not be the same as starting with a fresh new database I prefer this solution.

Co-authored-by: Irevoire <tamo@meilisearch.com>
2021-08-24 15:34:50 +00:00
731e0e5321 Merge #320
320: Sort at query time r=Kerollmops a=Kerollmops

Re-introduce the Sort at the query time (https://github.com/meilisearch/milli/issues/305)

Co-authored-by: Clément Renault <renault.cle@gmail.com>
2021-08-24 14:19:43 +00:00
89d0758713 Revert "Revert "Sort at query time"" 2021-08-24 11:55:16 +02:00
879d5e8799 Merge #319
319: Update version for the next release (v0.10.2) r=Kerollmops a=curquiza



Co-authored-by: Clémentine Urquizar <clementine@meilisearch.com>
2021-08-23 10:03:23 +00:00
88f6c18665 Update version for the next release (v0.10.2) 2021-08-23 11:33:30 +02:00
aa1ce97748 Merge #317
317: Fix the facet string docids filterable deletion bug r=Kerollmops a=Kerollmops

Fixes a bug where the deletion of documents was returning a decoding error. But only when the settings are set with filterable attributes.

This bug was introduced in #254 in which we made the engine faster in returning the facet distribution. We changed the way we were storing the inverted index, we were no more storing only documents ids with the original values but also groups identified with integers, depending on the facet level we were using. This is similar to how facet numbers are already stored.

⚠️ As `@curquiza` already said, we must first revert #309 before merging this!

Related to https://github.com/meilisearch/MeiliSearch/issues/1601.

Co-authored-by: Clément Renault <clement@meilisearch.com>
2021-08-23 08:57:16 +00:00
c084f7f731 Fix the facet string docids filterable deletion bug 2021-08-23 10:50:39 +02:00
0d1f83ba4b Merge #318
318: Revert "Sort at query time" r=Kerollmops a=curquiza

Reverts meilisearch/milli#309

We revert this from `main` not because this leads to a bug, but because we don't want to release it now and we have to merge and release an hotfix on `main`.
Cf:
- https://github.com/meilisearch/milli/issues/316
- https://github.com/meilisearch/milli/pull/317

Once the v0.21.0 is released, we should merge again this awesome addition 👌 

Co-authored-by: Clémentine Urquizar <clementine@meilisearch.com>
2021-08-21 08:25:17 +00:00
922f9fd4d5 Revert "Sort at query time" 2021-08-20 18:09:17 +02:00
4b99d8cb91 rewrite the indexing benchmarks 2021-08-19 15:02:43 +02:00
41fc0dcb62 Merge #309
309: Sort at query time r=Kerollmops a=Kerollmops

This PR:
 - Makes the `Asc/Desc` criteria work with strings too, it first returns documents ordered by numbers then by strings, and finally the documents that can't be ordered. Note that it is lexicographically ordered and not ordered by character, which means that it doesn't know about wide and short characters i.e. `a`, `丹`, `▲`.
 - Changes the syntax for the `Asc/Desc` criterion by now using a colon to separate the name and the order i.e. `title:asc`, `price:desc`.
 - Add the `Sort` criterion at the third position in the ranking rules by default.
 - Add the `sort_criteria` method to the `Search` builder struct to let the users define the `Asc/Desc` sortable attributes they want to use at query time. Note that we need to check that the fields are registered in the sortable attributes before performing the search.
 - Introduce a new `InvalidSortableAttribute` user error that is raised when the sort criteria declared at query time are not part of the sortable attributes.
 - `@ManyTheFish` introduced integration tests for the dynamic Sort criterion.

Fixes #305.

Co-authored-by: Kerollmops <clement@meilisearch.com>
Co-authored-by: many <maxime@meilisearch.com>
2021-08-18 16:55:32 +00:00
d1df0d20f9 Add integration test of SortBy criterion 2021-08-18 16:21:51 +02:00
1b7f6ea1e7 Return a new error when the sort criteria is not sortable 2021-08-18 15:04:07 +02:00
71602e0f1b Add the sortable fields into the settings and in the index 2021-08-18 15:04:07 +02:00
407f53872a Add a sort_criteria method to the Search builder struct 2021-08-18 15:04:07 +02:00
687cd2e205 Introduce the new Sort criterion and AscDesc enum 2021-08-18 15:04:07 +02:00
198c416bd8 Merge #312
312: Update milli version to v0.10.1 r=Kerollmops a=curquiza



Co-authored-by: Clémentine Urquizar <clementine@meilisearch.com>
2021-08-18 12:08:04 +00:00
6cb9c3b81f Update milli version to v0.10.1 2021-08-18 13:46:27 +02:00
2a67308e29 Merge #311
311: Update tokenizer version to v0.2.5 r=Kerollmops a=curquiza

Fixes panic when indexing data containing [control characters](https://en.wikipedia.org/wiki/Control_character) but continue accepting whitespace, obviously.

Related to https://github.com/meilisearch/MeiliSearch/issues/1590

Co-authored-by: Clémentine Urquizar <clementine@meilisearch.com>
2021-08-18 11:41:52 +00:00
42cf847a63 Update tokenizer version to v0.2.5 2021-08-18 13:37:41 +02:00
c4275f0d27 Merge #310
310: Modify the README file r=Kerollmops a=Kerollmops



Co-authored-by: Kerollmops <clement@meilisearch.com>
2021-08-17 15:20:43 +00:00
ecf8abc518 Modify the README file 2021-08-17 17:18:58 +02:00
5b88df508e Use the new Asc/Desc syntax everywhere 2021-08-17 14:15:22 +02:00
fcedff95e8 Change the Asc/Desc criterion syntax to use a colon (:) 2021-08-17 14:03:21 +02:00
e9ada44509 AscDesc criterion returns documents ordered by numbers then by strings 2021-08-17 13:21:31 +02:00
110bf6b778 Make the FacetStringIter work in both, ascending and descending orders 2021-08-17 11:18:40 +02:00
22ebd2658f Introduce the EitherString/RevRange private aliases 2021-08-17 10:47:15 +02:00
7a5889bc5a Introduce the highest_reverse_iter private method 2021-08-17 10:45:26 +02:00
ad0d311f8a Introduce the FacetStringLevelZeroRevRange struct 2021-08-17 10:44:43 +02:00
6214c38da9 Introduce the FacetStringGroupRevRange struct 2021-08-17 10:44:27 +02:00
1c604de158 Introduce the highest_iter private method on the FacetStringIter struct 2021-08-17 10:41:11 +02:00
64df159057 Introduce the new_reducing constructor on the FacetStringIter struct 2021-08-17 10:35:06 +02:00
01a4052828 Move the FacetStringIter creation logic into a private new method 2021-08-17 10:29:43 +02:00
51581d14f8 Merge #307
307: Update version for the next release (v0.10.0) r=Kerollmops a=curquiza

Replaces https://github.com/meilisearch/milli/pull/304

Co-authored-by: Clémentine Urquizar <clementine@meilisearch.com>
2021-08-16 10:33:53 +00:00
fcc520e49a Update version for the next release (v0.10.0) 2021-08-16 12:00:28 +02:00
1541bce952 Merge #303
303: Remove max values by facet limit for facet distribution r=Kerollmops a=ManyTheFish



Co-authored-by: many <maxime@meilisearch.com>
2021-08-16 09:58:53 +00:00
7dbefae1e3 Make facet string iterator non reducing 2021-08-12 17:23:39 +02:00
8fdf860c17 Remove max values by facet limit for facet distribution 2021-08-12 11:29:20 +02:00
2102e0da6b Merge #302
302: Update milli to v0.9.0 r=curquiza a=curquiza

Updating the minor and not patch since #300 seems to be breaking: it involves a re-indexation to get the fix, so it involves an additional step from the users, not only downloading the latest version.

Co-authored-by: Clémentine Urquizar <clementine@meilisearch.com>
2021-08-05 08:38:15 +00:00
89b9b61840 Merge #300
300: Fix prefix level position docids database r=curquiza a=ManyTheFish

The prefix search was inverted when we generated the DB.
Instead of searching if word had a prefix in prefix fst,
we were searching if the word was a prefix of a prefix contained in the prefix fst.
The indexer, now, iterate over prefix contained in the fst
and search them by prefix in the word-level-position-docids database,
aggregating matches in a sorter.

Fix #299

Co-authored-by: many <maxime@meilisearch.com>
2021-08-04 16:52:09 +00:00
7f26c75610 Update milli to v0.9.0 2021-08-04 16:04:55 +02:00
cdeb07f0fd Fix prefix level position docids database
The prefix search was inverted when we generated the DB.
Instead of searching if word had a prefix in prefix fst,
we were searching if the word was a prefix of a prefix contained in the prefix fst.
The indexer, now, iterate over prefix contained in the fst
and search them by prefix in the word-level-position-docids database,
aggregating matches in a sorter.

Fix #299
2021-08-04 14:11:49 +02:00
cb45a10bcd Merge #298
298: Rename the search benchmarks r=Kerollmops a=irevoire

And fix a bug. As always, I was not closing the env.

Co-authored-by: Tamo <tamo@meilisearch.com>
2021-07-29 15:33:15 +00:00
7eb2d71009 fix the benchmarks 2021-07-29 16:27:05 +02:00
976dc1f4bc prefix the search benchmarks with 'search' 2021-07-29 16:27:05 +02:00
1290edd58a Merge #297
297: Bump milli to v0.8.1 r=curquiza a=Kerollmops



Co-authored-by: Kerollmops <clement@meilisearch.com>
2021-07-29 14:19:41 +00:00
341c244965 Bump milli to v0.8.1 2021-07-29 15:56:36 +02:00
d962e46ed1 Merge #296
296: Fix invalid faceted documents ids buffer size r=Kerollmops a=Kerollmops

Fix a bug found by `@irevoire` when benchmarking the search.

Co-authored-by: Kerollmops <clement@meilisearch.com>
2021-07-29 13:52:34 +00:00
90514e03d1 Fix invalid faceted documents ids buffer size 2021-07-29 15:49:23 +02:00
200e98c211 Merge #293
293: Make sure that the relevancy is not impacted by other settings r=Kerollmops a=Kerollmops

Fix https://github.com/meilisearch/meilisearch/issues/1505.

fix https://github.com/meilisearch/MeiliSearch/issues/1529

Co-authored-by: Kerollmops <clement@meilisearch.com>
2021-07-27 16:04:52 +00:00
bc845324df Merge #295
295: Update version for the next release (v0.8.0) r=Kerollmops a=curquiza



Co-authored-by: Clémentine Urquizar <clementine@meilisearch.com>
2021-07-27 14:42:10 +00:00
6a141694da Update version for the next release (v0.8.0) 2021-07-27 16:38:42 +02:00
dc2b63abdf Introduce an empty FilterCondition variant to support unknown fields 2021-07-27 16:34:04 +02:00
4ab7ca0e83 Merge #288
288: Stop tracking the Cargo.lock and add cache + windows to the CI r=curquiza a=irevoire

We reuse the same `~/.cargo` and `./target` directory between each run on the same OS and rust toolchain.
The `key` to decide if we can use the cache or not is: `$OS_NAME-$RUST_TOOLCHAIN-$HASH(Cargo.toml)`

We also removed the `Cargo.lock` from this repository. Indeed, milli is a library and [should not track the `Cargo.lock`](https://doc.rust-lang.org/cargo/guide/cargo-toml-vs-cargo-lock.html)

And finally, we enabled the tests on `windows-latest`. Since `lmdb` has been updated, this is now possible.

Co-authored-by: Tamo <tamo@meilisearch.com>
2021-07-26 14:22:19 +00:00
0038b3848a add a simple github cache 2021-07-26 15:31:26 +02:00
88646a63a1 update bors 2021-07-26 15:31:00 +02:00
b12738cfe9 Use the right DB prefixes to store the faceted fields 2021-07-22 19:18:22 +02:00
7aa6cc9b04 Do not insert fields in the map when changing the settings 2021-07-22 18:40:12 +02:00
ee3a49cfba Merge #291
291: Fix a bug about zero bytes in the inputs r=irevoire a=Kerollmops

Ok, good news, after a little session of debugging with `@irevoire` we found out that the bug seems to be related to zeroes in the input update. The engine wasn't designed to accept those. The chosen solution is to update the tokenizer to remove those zeroes. We are waiting on https://github.com/meilisearch/tokenizer/pull/52 to be merged and a new version to be released.

It is not an undefined behavior, I repeat: it is a "normal" bug 🎉 👏

----

This PR tries to fix a bug where we use LMDB in the wrong way, leading to panic due to an undefined behavior on the Rust side. I thought [we fixed it in a previous PR](https://github.com/meilisearch/milli/pull/264) but we found out that _a similar_ bug was still present. `@bb` found a way to trigger this bug and helped us find the origin of it.

As I don't have a minimal reproducible example of this bug I bet on the unsafe `put_current` calls when we index new documents as the bug was trigger after a big indexation on a clean database, thus not triggering a deletion update. I only replaced the unsafe `put_current` with two safe calls to `get`/`put`.

I hope it helps and fixes the bug, only `@bb` can help us check that. I am not even sure how I can create a custom Docker image and expose it for testing purposes.

<details>
  <summary>The backtrace leading us to a panic in grenad.</summary>

```
meilisearch_1    | thread 'tokio-runtime-worker' panicked at 'assertion failed: key > &last_key', /root/.cargo/git/checkouts/grenad-e2cb77f65d31bb02/3adcb26/src/block_builder.rs:38:17
meilisearch_1    | stack backtrace:
meilisearch_1    |    0: rust_begin_unwind
meilisearch_1    |              at ./rustc/53cb7b09b00cbea8754ffb78e7e3cb521cb8af4b/library/std/src/panicking.rs:493:5
meilisearch_1    |    1: core::panicking::panic_fmt
meilisearch_1    |              at ./rustc/53cb7b09b00cbea8754ffb78e7e3cb521cb8af4b/library/core/src/panicking.rs:92:14
meilisearch_1    |    2: core::panicking::panic
meilisearch_1    |              at ./rustc/53cb7b09b00cbea8754ffb78e7e3cb521cb8af4b/library/core/src/panicking.rs:50:5
meilisearch_1    |    3: grenad::block_builder::BlockBuilder::insert
meilisearch_1    |              at ./root/.cargo/git/checkouts/grenad-e2cb77f65d31bb02/3adcb26/src/block_builder.rs:38:17
meilisearch_1    |    4: grenad::writer::Writer<W>::insert
meilisearch_1    |              at ./root/.cargo/git/checkouts/grenad-e2cb77f65d31bb02/3adcb26/src/writer.rs:92:12
meilisearch_1    |    5: milli::update::words_level_positions::write_level_entry
meilisearch_1    |              at ./root/.cargo/git/checkouts/milli-00376cd5db949a15/007fec2/milli/src/update/words_level_positions.rs:262:5
meilisearch_1    |    6: milli::update::words_level_positions::compute_positions_levels
meilisearch_1    |              at ./root/.cargo/git/checkouts/milli-00376cd5db949a15/007fec2/milli/src/update/words_level_positions.rs:211:13
meilisearch_1    |    7: milli::update::words_level_positions::WordsLevelPositions::execute
meilisearch_1    |              at ./root/.cargo/git/checkouts/milli-00376cd5db949a15/007fec2/milli/src/update/words_level_positions.rs:65:23
meilisearch_1    |    8: milli::update::index_documents::IndexDocuments::execute_raw
meilisearch_1    |              at ./root/.cargo/git/checkouts/milli-00376cd5db949a15/007fec2/milli/src/update/index_documents/mod.rs:831:9
meilisearch_1    |    9: milli::update::index_documents::IndexDocuments::execute
meilisearch_1    |              at ./root/.cargo/git/checkouts/milli-00376cd5db949a15/007fec2/milli/src/update/index_documents/mod.rs:372:9
meilisearch_1    |   10: meilisearch_http::index::updates::<impl meilisearch_http::index::Index>::update_documents_txn
meilisearch_1    |              at ./meilisearch/meilisearch-http/src/index/updates.rs:225:30
meilisearch_1    |   11: meilisearch_http::index::updates::<impl meilisearch_http::index::Index>::update_documents
meilisearch_1    |              at ./meilisearch/meilisearch-http/src/index/updates.rs:183:22
meilisearch_1    |   12: meilisearch_http::index::update_handler::UpdateHandler::handle_update
meilisearch_1    |              at ./meilisearch/meilisearch-http/src/index/update_handler.rs:75:18
meilisearch_1    |   13: meilisearch_http::index_controller::index_actor::actor::IndexActor<S>::handle_update::{{closure}}::{{closure}}
meilisearch_1    |              at ./meilisearch/meilisearch-http/src/index_controller/index_actor/actor.rs:174:35
meilisearch_1    |   14: <tokio::runtime::blocking::task::BlockingTask<T> as core::future::future::Future>::poll
meilisearch_1    |              at ./root/.cargo/registry/src/github.com-1ecc6299db9ec823/tokio-1.7.1/src/runtime/blocking/task.rs:42:21
meilisearch_1    |   15: tokio::runtime::task::core::CoreStage<T>::poll::{{closure}}
meilisearch_1    |              at ./root/.cargo/registry/src/github.com-1ecc6299db9ec823/tokio-1.7.1/src/runtime/task/core.rs:243:17
meilisearch_1    |   16: tokio::loom::std::unsafe_cell::UnsafeCell<T>::with_mut
meilisearch_1    |              at ./root/.cargo/registry/src/github.com-1ecc6299db9ec823/tokio-1.7.1/src/loom/std/unsafe_cell.rs:14:9
meilisearch_1    |   17: tokio::runtime::task::core::CoreStage<T>::poll
meilisearch_1    |              at ./root/.cargo/registry/src/github.com-1ecc6299db9ec823/tokio-1.7.1/src/runtime/task/core.rs:233:13
meilisearch_1    |   18: tokio::runtime::task::harness::poll_future::{{closure}}
meilisearch_1    |              at ./root/.cargo/registry/src/github.com-1ecc6299db9ec823/tokio-1.7.1/src/runtime/task/harness.rs:427:23
meilisearch_1    |   19: <std::panic::AssertUnwindSafe<F> as core::ops::function::FnOnce<()>>::call_once
meilisearch_1    |              at ./rustc/53cb7b09b00cbea8754ffb78e7e3cb521cb8af4b/library/std/src/panic.rs:344:9
meilisearch_1    |   20: std::panicking::try::do_call
meilisearch_1    |              at ./rustc/53cb7b09b00cbea8754ffb78e7e3cb521cb8af4b/library/std/src/panicking.rs:379:40
meilisearch_1    |   21: std::panicking::try
meilisearch_1    |              at ./rustc/53cb7b09b00cbea8754ffb78e7e3cb521cb8af4b/library/std/src/panicking.rs:343:19
meilisearch_1    |   22: std::panic::catch_unwind
meilisearch_1    |              at ./rustc/53cb7b09b00cbea8754ffb78e7e3cb521cb8af4b/library/std/src/panic.rs:431:14
meilisearch_1    |   23: tokio::runtime::task::harness::poll_future
meilisearch_1    |              at ./root/.cargo/registry/src/github.com-1ecc6299db9ec823/tokio-1.7.1/src/runtime/task/harness.rs:414:19
meilisearch_1    |   24: tokio::runtime::task::harness::Harness<T,S>::poll_inner
meilisearch_1    |              at ./root/.cargo/registry/src/github.com-1ecc6299db9ec823/tokio-1.7.1/src/runtime/task/harness.rs:89:9
meilisearch_1    |   25: tokio::runtime::task::harness::Harness<T,S>::poll
meilisearch_1    |              at ./root/.cargo/registry/src/github.com-1ecc6299db9ec823/tokio-1.7.1/src/runtime/task/harness.rs:59:15
meilisearch_1    |   26: tokio::runtime::task::raw::RawTask::poll
meilisearch_1    |              at ./root/.cargo/registry/src/github.com-1ecc6299db9ec823/tokio-1.7.1/src/runtime/task/raw.rs:66:18
meilisearch_1    |   27: tokio::runtime::task::Notified<S>::run
meilisearch_1    |              at ./root/.cargo/registry/src/github.com-1ecc6299db9ec823/tokio-1.7.1/src/runtime/task/mod.rs:171:9
meilisearch_1    |   28: tokio::runtime::blocking::pool::Inner::run
meilisearch_1    |              at ./root/.cargo/registry/src/github.com-1ecc6299db9ec823/tokio-1.7.1/src/runtime/blocking/pool.rs:265:17
meilisearch_1    |   29: tokio::runtime::blocking::pool::Spawner::spawn_thread::{{closure}}
meilisearch_1    |              at ./root/.cargo/registry/src/github.com-1ecc6299db9ec823/tokio-1.7.1/src/runtime/blocking/pool.rs:245:17
meilisearch_1    | note: Some details are omitted, run with `RUST_BACKTRACE=full` for a verbose backtrace.
```

</details>

Co-authored-by: Kerollmops <clement@meilisearch.com>
2021-07-22 16:14:35 +00:00
0353fbb5df Bump the tokenizer version to v0.2.4 2021-07-22 17:14:45 +02:00
92c0a2cdc1 Add a test that triggers a panic when indexing zeroes 2021-07-22 17:14:44 +02:00
aa02a7fdd8 Add a test to check that we indeed impact the relevancy 2021-07-22 17:04:38 +02:00
77de82aaa4 Merge #254
254: Improve the facet string distribution speed r=Kerollmops a=Kerollmops

This pull request creates a data structure similar to the one we use for the faceted numbers, a tetratomic decision tree but this time for the facet strings. This PR also changes the facet distribution behavior by returning one of the original facet values, fixes #260.

This data structure defines bucket-like structures where documents ids are stored under their facet value and helps the search decide if it wants to move to a lower level under a given bucket or not, depending on if the current bucket contains interesting documents or not. The whole format, algorithm, and previous attempts are explained in the [`facet_string.rs` file](ec1cfdd42b/milli/src/search/facet/facet_string.rs).

Note that this data structure **could** be used to sort by string lexicographically, that hypothetically possible. We need more testing, in terms of performance and quality, as we will sort on lowercased versions of the facet values.

 - [x] Implement a faster and more precise way to fetch the facet distribution.
 - [x] Store and return the original facet string value. We currently return the lowercased version.

Co-authored-by: Kerollmops <clement@meilisearch.com>
Co-authored-by: Clément Renault <clement@meilisearch.com>
2021-07-21 15:34:40 +00:00
0227254a65 Return the original string values for the inverted facet index database 2021-07-21 16:59:39 +02:00
03a01166ba Display the original facet string value from the linear facet database 2021-07-21 16:59:39 +02:00
d23c250ad5 Fix a bound error in the facet string range construction 2021-07-21 16:59:39 +02:00
081278dfd6 Use the facet string levels when computing the facet distribution 2021-07-21 16:59:39 +02:00
5676b204dd Fix the facet string levels codecs 2021-07-21 16:59:38 +02:00
8c86348119 Indexing the facet strings levels 2021-07-21 16:59:38 +02:00
a7ae552ba7 Fix the FacetStringLevelZeroRange range when unbounded 2021-07-21 16:59:38 +02:00
757b2b502a Remove the FacetValueStringCodec 2021-07-21 16:59:38 +02:00
adfd4da24c Introduce the FacetStringIter iterator 2021-07-21 16:59:38 +02:00
a79661c6dc Introduce a lot of facet string helper iterators 2021-07-21 16:59:38 +02:00
851f979039 Describe the way we want to group the facet strings 2021-07-21 16:59:38 +02:00
f858f64b1f Move the facet number iterators into their own module 2021-07-21 16:59:37 +02:00
9f8095c069 Make sure that we don't keep a reference on the LMDB key when using put_current 2021-07-21 10:35:35 +02:00
fa44e95c91 Merge #290
290: Add a $HOME to the CI r=Kerollmops a=irevoire

This should fix this issue:
https://github.com/meilisearch/milli/runs/3104228432?check_suite_focus=true

I think a real fix would be to fix the configuration of our github runner but I don't know how to do it.
@curquiza could probably help us on that once she's back from vacation 😄 

Co-authored-by: Tamo <tamo@meilisearch.com>
2021-07-20 07:32:46 +00:00
0ab541627b add a $HOME var to the ci 2021-07-19 14:33:49 +02:00
16698f714b Merge #287
287: Add benchmarks for indexing r=Kerollmops a=irevoire

closes #274 
I don't really know how much time this will take on our bench machine. I'm afraid the wiki dataset will take a really long time to bench (it takes 1h30 on my computer).

If you are ok with it, I would like to merge this first PR since it introduces a first set of benchmarks and see how much time it takes in reality on our setup.

Co-authored-by: Tamo <tamo@meilisearch.com>
2021-07-07 15:41:15 +00:00
931021fe57 add benchmarks for indexing 2021-07-07 13:09:05 +02:00
4c9531bdf3 Merge #285
285: Support documents with at most 65536 fields r=Kerollmops a=Kerollmops

Fixes #248.

In this PR I updated the `obkv` crate, it now supports arbitrary key length and therefore I was able to use an `u16` to represent the fields instead of a single byte. It was impressively easy to update the whole codebase 🍡 🍔

Co-authored-by: Kerollmops <clement@meilisearch.com>
2021-07-06 16:44:51 +00:00
0a78107525 Fix the infos crate to make it read u16 field ids 2021-07-06 11:58:03 +02:00
a9553af635 Add a test to check that we can index more that 256 fields 2021-07-06 11:58:03 +02:00
838ed1cd32 Use an u16 field id instead of one byte 2021-07-06 11:58:03 +02:00
cc54c41e30 Merge #283
283: Use the AlwaysFreePages flag when opening an index r=irevoire a=Kerollmops

We introduced a new flag in our fork of LMDB, this `AlwaysFreePages` flag forces LMDB to always free the single pages it uses before writing to the disk instead of keeping them in a linked list.

Declaring this flag reduces the memory print (leak) we have on memory after indexing a lot of documents.

Fixes #279.

Co-authored-by: Kerollmops <clement@meilisearch.com>
2021-07-05 16:59:16 +00:00
63db43cc7a Merge #284
284: [http-ui] Introduce the route `die` r=Kerollmops a=irevoire

This route just `exit` the process. This can come in handy when you run `http-ui` inside of another process (a profiler for example), and you don't want to kill everything

Co-authored-by: Tamo <tamo@meilisearch.com>
Co-authored-by: Irevoire <tamo@meilisearch.com>
2021-07-05 15:47:53 +00:00
4562b278a8 remove a warning and add a log
Co-authored-by: Clément Renault <clement@meilisearch.com>
2021-07-05 17:46:02 +02:00
a57e522a67 introduce a die route let the program exit itself alone 2021-07-05 17:38:10 +02:00
91c5d0c042 Use the AlwaysFreePages flag when opening an index 2021-07-05 16:36:13 +02:00
007fec21fc Merge #281
281: Bump to v0.7.2 r=ManyTheFish a=Kerollmops



Co-authored-by: Kerollmops <clement@meilisearch.com>
2021-07-05 09:00:26 +00:00
a6b4069172 Bump to v0.7.2 2021-07-05 10:54:53 +02:00
d7bc6a6999 Merge #280
280: Fix matching lenghth in matching_words r=Kerollmops a=ManyTheFish

related to https://github.com/meilisearch/MeiliSearch/issues/1441

Co-authored-by: many <maxime@meilisearch.com>
2021-07-01 18:50:46 +00:00
9f62149b94 Fix matching lenghth in matching_words 2021-07-01 19:03:28 +02:00
f25f454bd4 Merge #275
275: Fix the benchmarks dependencies r=Kerollmops a=irevoire

Import exactly the same dependency as milli instead of a wildcard that can do anything

Co-authored-by: Tamo <tamo@meilisearch.com>
Co-authored-by: Irevoire <irevoire@protonmail.ch>
2021-07-01 11:07:01 +00:00
885f243afc Merge #276
276: Fix the fmt of the auto-generated file r=Kerollmops a=irevoire

The file generated by the `build.rs` file of the benchmark was badly formatted and that was causing an issue with the git pre-commit hook I wrote [earlier](https://github.com/meilisearch/milli/blob/main/script/pre-commit)

Co-authored-by: Tamo <tamo@meilisearch.com>
2021-07-01 10:24:36 +00:00
ec87bf3dd5 Update benchmarks/Cargo.toml
Co-authored-by: Clément Renault <renault.cle@gmail.com>
2021-07-01 11:45:05 +02:00
ef965aa3f3 fix the fmt of the auto-generated file 2021-07-01 11:43:09 +02:00
fc09d77e89 fix the benchmarks dependcies 2021-07-01 11:38:30 +02:00
056180e6c8 Merge #273
273: Update tokenizer version to v0.2.3 r=Kerollmops a=curquiza



Co-authored-by: Clémentine Urquizar <clementine@meilisearch.com>
2021-07-01 09:02:16 +00:00
3c149d8a43 Update tokenizer version to v0.2.3 2021-06-30 18:41:35 +02:00
b4dcdbf00d Merge #269 #271
269: Fix bug when inserting previously deleted documents r=Kerollmops a=Kerollmops

This PR fixes #268.

The issue was in the `ExternalDocumentsIds` implementation in the specific case that an external document id was in the soft map marked as deleted.

The bug was due to a wrong assumption on my side about how the FST unions were returning the `IndexedValue`s, I thought the values returned in an array were in the same order as the FSTs given to the `OpBuilder` but in fact, [the `IndexedValue`'s `index` field was here to indicate from which FST the values were coming from](https://docs.rs/fst/0.4.7/fst/map/struct.IndexedValue.html).

271: Remove the roaring operation functions warnings r=Kerollmops a=Kerollmops

In this PR we are just replacing the usages of the roaring operations function by the new operators. This removes a lot of warnings.

Co-authored-by: Kerollmops <clement@meilisearch.com>
2021-06-30 12:34:55 +00:00
32b7bd366f Remove the roaring operation functions warnings 2021-06-30 14:12:56 +02:00
00e2845f0f Merge #270
270: Update milli version to v0.7.1 r=Kerollmops a=curquiza



Co-authored-by: Clémentine Urquizar <clementine@meilisearch.com>
2021-06-30 12:12:24 +00:00
c92ef54466 Add a test for when we insert a previously deleted document 2021-06-30 14:00:01 +02:00
28782ff99d Fix ExternalDocumentsIds struct when inserting previously deleted ids 2021-06-30 14:00:01 +02:00
b489515f4d Update milli version to v0.7.1 2021-06-30 13:52:46 +02:00
54889813ce Implement some debug functions on the ExternalDocumentsIds struct 2021-06-30 11:29:41 +02:00
4bce66d5ff Make the Index::delete_* method private 2021-06-30 10:07:31 +02:00
66e6ea56b8 Merge #267
267: Highlighting r=Kerollmops a=irevoire

closes #262 
I basically rewrote a part of the damerau-levenshtein function we were using for the highlighting to accept at most two errors from the user and stop on the third mistake.
Also, now it supports utf-8, so it should fix our issue.

Co-authored-by: Tamo <tamo@meilisearch.com>
Co-authored-by: Irevoire <irevoire@protonmail.ch>
2021-06-30 05:43:50 +00:00
6044b80362 Update milli/src/search/matching_words.rs
Co-authored-by: Clément Renault <renault.cle@gmail.com>
2021-06-30 00:35:26 +02:00
be75e738b1 add more tests 2021-06-29 16:24:58 +02:00
56fceb1928 re-implement the Damerau-Levenshtein used for the highlighting 2021-06-29 15:36:03 +02:00
9dbc8b2dd0 Merge #266
266: Bump LMDB to the latest version (v0.9.70) r=Kerollmops a=Kerollmops

By bumping to a new version of heed (from git, v0.12.0 unpublished yet), this PR fixes Windows disk reservation problems. This new version of heed changes the `del/put_current`, and `append` iterator methods signature by declaring them unsafe.

This PR also bumps milli itself into v0.7.0 as it is breaking due to the heed/LMDB bump.

This PR must be merged after #264.

Co-authored-by: Clément Renault <clement@meilisearch.com>
2021-06-28 17:11:41 +00:00
80c6aaf1fd Bump milli to 0.7.0 2021-06-28 18:31:56 +02:00
bdc5599b73 Bump heed to use the git repo with v0.12.0 2021-06-28 18:26:20 +02:00
73384aec21 Merge pull request #264 from meilisearch/fix-heed-undefined-behavior
Fix the invalid heed usage
2021-06-28 18:23:49 +02:00
0013236e5d Fix the LMDB and heed invalid interactions.
It is undefined behavior to keep a reference to the database while
modifying it, we were keeping references in the database and also
feeding the heed put_current methods with keys referenced inside
the database itself.

https://github.com/Kerollmops/heed/pull/108
2021-06-28 16:19:02 +02:00
9e5f9a8a10 Add a test for the words level positions generation bug 2021-06-28 16:08:31 +02:00
c38b0b883d Merge #257
257: Fix unconditional facet indexing r=Kerollmops a=Kerollmops

We were indexing every searchable field as filterable, this was a mistake.

Co-authored-by: Kerollmops <clement@meilisearch.com>
2021-06-23 15:32:46 +00:00
98285b4b18 Bump milli to 0.6.0 2021-06-23 17:30:26 +02:00
4fc8f06791 Rename faceted_fields into filterable_fields 2021-06-23 17:26:54 +02:00
c31cadb54f Do not consider the searchable field as filterable 2021-06-23 17:26:54 +02:00
41c4a5b60d Merge #246
246: Improve the ci r=Kerollmops a=irevoire

Rewrite the CI entirely:
- run the ci on Linux, macOS and Windows.
- run the ci on rust stable, beta and nightly
- add rustfmt to the CI.
- split the CI into multiple tasks, this way, the ci should be faster to fail

Co-authored-by: Tamo <tamo@meilisearch.com>
Co-authored-by: Irevoire <tamo@meilisearch.com>
2021-06-23 12:52:39 +00:00
faa3cd3b71 Update bors.toml
Don't check nightly and beta channel

Co-authored-by: Clément Renault <clement@meilisearch.com>
2021-06-23 14:30:33 +02:00
2ab24c4f49 Merge #256
256: Update version for the next release (v0.5.1) r=Kerollmops a=curquiza



Co-authored-by: Clémentine Urquizar <clementine@meilisearch.com>
2021-06-23 12:29:57 +00:00
9885fb4159 Update version for the next release (v0.5.1) 2021-06-23 14:05:20 +02:00
66f55e3e6a Merge #255
255: Fix facet distribution error r=Kerollmops a=Kerollmops

This PR fixes two invalid behaviors and fixes #253:
 - We were ignoring the list of fields for which the user wanted a facet distribution.
 - We were not raising any error for when a non-filterable field was requested a facet distribution.

~For the latter behavior I need the help of @curquiza to help me choose the right error type.~

Co-authored-by: Kerollmops <clement@meilisearch.com>
2021-06-23 12:03:05 +00:00
a6218a20ae Introduce a new InvalidFacetsDistribution user error 2021-06-23 13:56:19 +02:00
2364777838 Return an error for when a field distribution cannot be done 2021-06-23 11:50:49 +02:00
aeaac743ff Replace an if let some by a match 2021-06-23 11:33:30 +02:00
5099192c44 update bors.toml 2021-06-23 10:22:40 +02:00
d8695da1d1 improve the ci 2021-06-23 10:22:40 +02:00
28197b2435 Merge #252
252: Run the formatter on the whole project a second time r=Kerollmops a=irevoire



Co-authored-by: Tamo <tamo@meilisearch.com>
2021-06-22 13:56:09 +00:00
8d2a0b43ff run the formatter on the whole project a second time 2021-06-22 15:36:22 +02:00
634201244c Merge #250 #251
250: Add the limit field to http-ui r=Kerollmops a=irevoire



251: Fix the limit r=Kerollmops a=irevoire

There was no check on the limit and thus if a user specified a very large number this line could cause a panic.

Co-authored-by: Tamo <tamo@meilisearch.com>
2021-06-22 13:00:52 +00:00
3d90b03d7b fix the limit
There was no check on the limit and thus, if a user especified a very large number this line could causes a panic
2021-06-22 14:52:13 +02:00
81643e6d70 add the limit field to http-ui 2021-06-22 14:47:23 +02:00
5aea8dd75b Merge #249
249: Enable the jemallocator dependencies only when we are running on linux r=Kerollmops a=irevoire



Co-authored-by: Tamo <tamo@meilisearch.com>
2021-06-22 12:32:44 +00:00
77eb37934f add jemalloc to http-ui and the benchmarks 2021-06-22 14:17:56 +02:00
5b6adc6d96 Merge #245
245: Warn for when a key is too large for LMDB r=Kerollmops a=Kerollmops

Closes #191, and resolves #140.

Co-authored-by: Kerollmops <clement@meilisearch.com>
2021-06-22 12:10:52 +00:00
d53df8a002 enable the jemallocator dependencies only when we are running on linux 2021-06-22 14:04:16 +02:00
ca9fa329d1 Merge #247
247: Return a `MissingDocumentId` error when a document doesn't have one r=Kerollmops a=Kerollmops

We were wrongly returning a `MissingPrimaryKey` instead of a `MissingDocumentId` error for when a document was missing a document id. We also improved the error message for when a document id is invalid (wrong type or wrong format).

Co-authored-by: Kerollmops <clement@meilisearch.com>
2021-06-22 10:07:54 +00:00
51dbb2e06d Warn for when a key is too large for LMDB 2021-06-22 11:51:36 +02:00
aecbd14761 Improve the error message for InvalidDocumentId 2021-06-22 11:31:58 +02:00
0cca2ea24f Return a MissingDocumentId when a document doesn't have one 2021-06-22 11:22:33 +02:00
481b0bf277 Warn for when a facet key is too large for LMDB 2021-06-22 10:57:46 +02:00
b073fd49ea Merge #244
244: Update version for the next release (v0.5.0) r=Kerollmops a=curquiza



Co-authored-by: Clémentine Urquizar <clementine@meilisearch.com>
2021-06-21 14:27:10 +00:00
be2ebdd395 Merge #243
243: Rename FieldsDistribution into FieldDistribution r=Kerollmops a=curquiza



Co-authored-by: Clémentine Urquizar <clementine@meilisearch.com>
2021-06-21 14:00:35 +00:00
320670f8fe Update version for the next release (v0.5.0) 2021-06-21 15:59:17 +02:00
daef43f504 Rename FieldsDistribution into FieldDistribution 2021-06-21 15:57:41 +02:00
b120c32cad Merge #242
242: Update version for the next release (v0.4.2) r=Kerollmops a=curquiza



Co-authored-by: Clémentine Urquizar <clementine@meilisearch.com>
2021-06-21 09:01:42 +00:00
35fcc351a0 Update version for the next release (v0.4.2) 2021-06-20 17:37:24 +02:00
5b19dd23d9 Merge #240
240: Field distribution r=Kerollmops a=irevoire

closes #199
closes #198 


Co-authored-by: Tamo <tamo@meilisearch.com>
2021-06-19 10:14:25 +00:00
d08cfda796 convert the field_distribution to a BTreeMap and avoid counting twice the same documents 2021-06-17 18:31:54 +02:00
a9e552ab18 Merge #238
238: Integration tests on filters and distinct r=Kerollmops a=ManyTheFish

Fix #216 
Fix #120 

Co-authored-by: many <maxime@meilisearch.com>
2021-06-17 15:00:51 +00:00
6cb1102bdb Fix PR comments 2021-06-17 15:19:03 +02:00
969adaefdf rename fields_distribution in field_distribution 2021-06-17 15:16:20 +02:00
a67ccfdf3a Merge #239
239: Update version to the next release (0.4.1) r=Kerollmops a=Kerollmops



Co-authored-by: Kerollmops <clement@meilisearch.com>
2021-06-17 13:02:37 +00:00
ccd6f13793 Update version to the next release (0.4.1) 2021-06-17 15:01:20 +02:00
f496cd320d Add distinct integration tests 2021-06-17 14:33:18 +02:00
9f4184208e Add test on filters 2021-06-17 13:56:09 +02:00
bb89ef9fc0 Merge #237
237: change sub errors visibility r=Kerollmops a=MarinPostma

re-export sub-error types so they can be matched upon outside of milli.


Co-authored-by: marin postma <postma.marin@protonmail.com>
Co-authored-by: marin <postma.marin@protonmail.com>
2021-06-17 09:51:18 +00:00
70bee7d405 re-export remaining error types
Co-authored-by: Clément Renault <clement@meilisearch.com>
2021-06-17 11:49:03 +02:00
abbebad669 change sub errors visibility 2021-06-17 11:44:01 +02:00
1bcf43baac Merge #236
236: Format the whole project r=Kerollmops a=irevoire

I need to add `cargo fmt` in the CI before closing #231

Co-authored-by: Tamo <tamo@meilisearch.com>
2021-06-16 18:05:40 +00:00
9716fb3b36 format the whole project 2021-06-16 18:33:33 +02:00
ba30cef987 Merge #234
234: Revert "Enable optimization in every profile" r=Kerollmops a=ManyTheFish

compiling tests in release takes too much time.

Reverts meilisearch/milli#224

Fix #233 

Co-authored-by: Many <legendre.maxime.isn@gmail.com>
2021-06-16 13:38:58 +00:00
41bdc90f46 Revert "Enable optimization in every profile" 2021-06-16 14:17:02 +02:00
3bd4cf94cc Merge #235
235: Update version for the next release (v0.4.0) r=Kerollmops a=curquiza



Co-authored-by: Clémentine Urquizar <clementine@meilisearch.com>
2021-06-16 12:02:40 +00:00
f5ff3e8e19 Update version for the next release (v0.4.0) 2021-06-16 14:01:05 +02:00
02e0271e44 Merge #225
225: Introduce the error handler r=ManyTheFish a=Kerollmops

Fixes #109.

Co-authored-by: Kerollmops <clement@meilisearch.com>
Co-authored-by: many <maxime@meilisearch.com>
2021-06-16 09:46:23 +00:00
ce0315a10f Close write transaction in test 2021-06-16 11:03:37 +02:00
7ac441e473 Fix small typos 2021-06-16 11:03:37 +02:00
adf0c389c5 Rename FilterParsing into InvalidFilter 2021-06-16 11:03:36 +02:00
8cfe3e1ec0 Rename DatabaseSizeReached into MaxDatabaseSizeReached 2021-06-16 11:03:36 +02:00
4eda438f6f Add a new Error for when a user use a non-filtered attribute in a filter 2021-06-16 11:03:36 +02:00
713acc408b Introduce the primary key to the Settings builder structure 2021-06-16 11:03:36 +02:00
a7d6930905 Replace the panicking expect by tracked Errors 2021-06-15 11:51:32 +02:00
f0e804afd5 Rename the FieldIdMapMissingEntry from_db_name field into process 2021-06-15 11:13:04 +02:00
28c004aa2c Prefer using constant for the database names 2021-06-15 11:13:04 +02:00
78fe4259a9 Fix the http-ui crate 2021-06-14 18:06:23 +02:00
312c2d1d8e Use the Error enum everywhere in the project 2021-06-14 16:58:38 +02:00
ca78cb5aca Introduce more variants to the error module enums 2021-06-14 16:58:38 +02:00
456541e921 Implement the Display trait on the Error type 2021-06-14 16:48:51 +02:00
44c353fafd Introduce some way to construct an Error 2021-06-14 16:48:51 +02:00
23fcf7920e Introduce a basic version of the InternalError struct 2021-06-14 16:48:51 +02:00
d2b1ecc885 Remove a lot of serialization unreachable errors 2021-06-14 16:48:51 +02:00
65b1d09d55 Move the obkv merging functions into the merge_function module 2021-06-14 16:48:51 +02:00
ab727e428b Remove the docid_word_positions_merge method that must never be called 2021-06-14 16:48:51 +02:00
93a8633f18 Remove the documents_merge method that must never be called 2021-06-14 16:48:51 +02:00
cfc7314bd1 Prefer using an explicit merge function name 2021-06-14 16:48:50 +02:00
93978ec38a Serializing a RoaringBitmap into a Vec cannot fail 2021-06-14 16:48:50 +02:00
ff9414a6ba Use the out of the compute_primary_key_pair function 2021-06-14 16:48:50 +02:00
0542e2179f Merge #230
230: Update Tokenizer version to v0.2.3 r=ManyTheFish a=curquiza



Co-authored-by: Clémentine Urquizar <clementine@meilisearch.com>
2021-06-10 16:28:05 +00:00
7d5395c12b Update Tokenizer version to v0.2.3 2021-06-10 17:00:04 +02:00
3e6c05fe13 Merge #227
227: Replace Consecutive by Phrase in query tree r=Kerollmops a=ManyTheFish

Replace `Consecutive` by `Phrase` in the query tree in order to remove theoretical bugs,
due to the `Consecutive` enum type.

Co-authored-by: many <maxime@meilisearch.com>
Co-authored-by: Many <legendre.maxime.isn@gmail.com>
2021-06-10 09:31:39 +00:00
f4cab080a6 Update milli/src/search/query_tree.rs
Co-authored-by: Clément Renault <clement@meilisearch.com>
2021-06-10 11:30:51 +02:00
36715f571c Update milli/src/search/criteria/proximity.rs
Co-authored-by: Clément Renault <clement@meilisearch.com>
2021-06-10 11:30:33 +02:00
e923a3ed6a Replace Consecutive by Phrase in query tree
Replace Consecutive by Phrase in query tree in order to remove theorical bugs,
due of the Consecutive enum type.
2021-06-10 11:16:16 +02:00
bc02031793 Merge #226
226: Update version for the next release (v0.3.1) r=Kerollmops a=curquiza



Co-authored-by: Clémentine Urquizar <clementine@meilisearch.com>
2021-06-09 14:13:42 +00:00
dc64e139b9 Update version for the next release (v0.3.1) 2021-06-09 14:39:21 +02:00
5cf1b0b138 Merge #224
224: Enable optimization in every profile r=Kerollmops a=irevoire



Co-authored-by: Irevoire <tamo@meilisearch.com>
2021-06-09 09:13:21 +00:00
afb4133bd2 Merge #212 #222 #223
212: Introduce integration test on criteria r=Kerollmops a=ManyTheFish

- add pre-ranked dataset
- test each criterion 1 by 1
- test all criteria in several order

222: Move the `UpdateStore` into the http-ui crate r=Kerollmops a=Kerollmops

We no more need to have the `UpdateStore` inside of the mill crate as this is the job of the caller to stack the updates and sequentially give them to milli.

223: Update dataset links r=Kerollmops a=curquiza



Co-authored-by: many <maxime@meilisearch.com>
Co-authored-by: Many <legendre.maxime.isn@gmail.com>
Co-authored-by: Kerollmops <clement@meilisearch.com>
Co-authored-by: Clémentine Urquizar <clementine@meilisearch.com>
2021-06-09 08:47:19 +00:00
86b916b008 enable optimization in every profile 2021-06-09 10:26:57 +02:00
6faa87302c Merge #220
220: Make hard separators split phrase query r=Kerollmops a=ManyTheFish

hard separators will now split a phrase query as two sequential phrases (double-quoted strings):

the query `"Radioactive (Imagine Dragons)"` would be considered equivalent to `"Radioactive" "Imagine Dragons"` which as the little disadvantage of not keeping the order of the two (or more) separate phrases.

Fix #208

Co-authored-by: many <maxime@meilisearch.com>
Co-authored-by: Many <legendre.maxime.isn@gmail.com>
2021-06-09 08:22:58 +00:00
f4ff30e99d Update milli/tests/search/mod.rs
Co-authored-by: Clément Renault <clement@meilisearch.com>
2021-06-09 10:12:24 +02:00
ab696f6a23 Update milli/tests/search/query_criteria.rs
Co-authored-by: Clément Renault <clement@meilisearch.com>
2021-06-09 10:12:17 +02:00
d89f5ca48e Merge pull request #219 from meilisearch/fix-criteria-fields-ids-map
Save the criteria field name in the fields ids map
2021-06-08 18:46:57 +02:00
7e93811fbc Update dataset links 2021-06-08 18:18:54 +02:00
0bf4f3f48a Modify a test to check that criteria additions change the fields ids map 2021-06-08 18:14:34 +02:00
82df524e09 Make sure that we register the field when setting criteria 2021-06-08 18:14:33 +02:00
8e2c41e7f7 Merge pull request #221 from meilisearch/fix-primary-key-delete
Use the index primary key when deleting documents
2021-06-08 18:13:42 +02:00
103dddba2f Move the UpdateStore into the http-ui crate 2021-06-08 17:59:51 +02:00
faf148d297 Update milli/src/search/query_tree.rs
Co-authored-by: Clément Renault <clement@meilisearch.com>
2021-06-08 17:52:37 +02:00
133ab98260 Use the index primary key when deleting documents 2021-06-08 17:33:29 +02:00
b489d699ce Make hard separators split phrase query
hard separators will now split a phrase query as double double-quotes

Fix #208
2021-06-08 17:29:38 +02:00
afb09c914d Update milli/tests/search/query_criteria.rs
Co-authored-by: Clément Renault <clement@meilisearch.com>
2021-06-08 16:53:56 +02:00
b64cd2a3e3 Resolve PR comments 2021-06-08 14:14:34 +02:00
1fcc5f73ac Factorize tests using macro_rules 2021-06-08 12:33:02 +02:00
32cf5a29ce Merge #218
218: Enable optimization for build.rs and macro r=Kerollmops a=irevoire

It fasten the unzip of the benchmark’s dataset a lot


Co-authored-by: Irevoire <tamo@meilisearch.com>
2021-06-08 09:56:23 +00:00
e0c327bae2 Update Cargo.toml
Co-authored-by: Clément Renault <clement@meilisearch.com>
2021-06-08 11:39:10 +02:00
c82a382b0b compile every build.rs with optimization 2021-06-08 11:19:22 +02:00
eb149030eb Merge #215
215: Make the benchmark command more convenient in CI r=Kerollmops a=curquiza



Co-authored-by: Clémentine Urquizar <clementine@meilisearch.com>
2021-06-08 09:04:26 +00:00
fd032165d7 Merge #217
217: Improve the benchmarks readme r=Kerollmops a=irevoire

- Move the Dataset part to the end of the readme so when peoples just want to run the benchmarks they are not tempted to download the benchmarks by hand (which are going to be downloaded anyway by the `build.rs` scritp)
- Fix the links in the dataset -- wiki part


Co-authored-by: Irevoire <tamo@meilisearch.com>
2021-06-08 08:44:16 +00:00
d912c94034 improve the benchmark’s readme 2021-06-08 10:38:23 +02:00
563492f1e5 update the TOC order 2021-06-07 17:29:22 +02:00
38ab541f4a Make the benchmark command more convenient in CI 2021-06-04 00:21:39 +02:00
af38196a6b Merge #214
214: Add --locked in CI tests r=Kerollmops a=curquiza



Co-authored-by: Clémentine Urquizar <clementine@meilisearch.com>
2021-06-03 14:39:36 +00:00
e9104a0a32 Add --locked in CI tests 2021-06-03 16:23:59 +02:00
70229f07c8 Update Cargo.lock 2021-06-03 16:22:43 +02:00
ee7d291442 Merge #213
213: Fix the benchmarks script and names r=Kerollmops a=Kerollmops

The benchmarks compare script was not using the `--output` flag and was therefore failing the download of the JSON reports. We also modified the criterion benchmarks to use shorter names, it helps in looking at the benchmarks in the terminal.

Co-authored-by: Kerollmops <clement@meilisearch.com>
2021-06-03 14:18:45 +00:00
29824d05ab Reduce the length of the benchmarks names 2021-06-03 15:59:43 +02:00
76a2343639 Fix the compare script of the benchmarks 2021-06-03 15:39:52 +02:00
10882bcbce Introduce integration test on criteria 2021-06-03 14:44:53 +02:00
a32236c80c Merge #211
211: Update Cargo.toml for next release v0.3.0 r=Kerollmops a=curquiza



Co-authored-by: Clémentine Urquizar <clementine@meilisearch.com>
2021-06-03 10:42:52 +00:00
3b2b3aeea9 Update Cargo.toml for next release v0.3.0 2021-06-03 12:24:27 +02:00
39ed133f9f Merge #193
193: Fix primary key behavior r=Kerollmops a=MarinPostma

this pr:
- Adds early returns on empty document additions, avoiding error messages to be returned when adding no documents and no primary key was set.
- Changes the primary key inference logic to match that of legacy meilisearch.

close #194 

Co-authored-by: Marin Postma <postma.marin@protonmail.com>
Co-authored-by: marin postma <postma.marin@protonmail.com>
2021-06-03 10:24:21 +00:00
fd598f060c Merge #210
210: Check the benchmarks in the CI r=Kerollmops a=Kerollmops

Fixes #209.

Co-authored-by: Kerollmops <clement@meilisearch.com>
2021-06-03 09:16:06 +00:00
99b45d2aa0 Make sure that all the workspaces crates compile 2021-06-03 10:56:01 +02:00
57898d8a90 fix silent deserialize error 2021-06-03 10:42:55 +02:00
82fb5f0bef Fix the benchmarks compilation 2021-06-03 10:33:42 +02:00
6b7841fefc Make sure that the benchmarks always compile 2021-06-03 10:29:21 +02:00
834504aec0 Merge #204
204: Decorrelate Distinct, Asc/Desc, Filterable fields from the faceted fields r=Kerollmops a=Kerollmops

This PR decorrelates the fields that need to be stored in facet databases (big inverted indexes for fast access) from the filterable fields, the previously named faceted fields are now named filterable fields and are the union of the distinct attribute, all the Asc/Desc criteria and, the filterable fields.

I added two tests to make sure that the engine was correctly generating the faceted databases when a distinct attribute or an Asc/Desc criteria were added, and one to make sure that it was impossible to filter on a non-filterable field even if it was a faceted one.

Note that the `AttributesForFacetting` has also been renamed into `FilterableAttributes`. But it will be the Transplant's job to do that on the API, this change is only visible to the milli's library users.

- Related to https://github.com/meilisearch/transplant/issues/187.
- Fixes #161 by returning the documents that don't have the Asc/Desc field at the end of the bucket.
- Fixes #168.
- Fixes #152.

Co-authored-by: Kerollmops <clement@meilisearch.com>
Co-authored-by: Marin Postma <postma.marin@protonmail.com>
Co-authored-by: many <maxime@meilisearch.com>
2021-06-02 15:43:39 +00:00
26a9974667 Make asc/desc criterion return resting documents
Fix #161.2
2021-06-02 17:41:48 +02:00
28962bce99 Merge #207
207: Benchmarks r=Kerollmops a=irevoire



Co-authored-by: tamo <tamo@meilisearch.com>
Co-authored-by: Clémentine Urquizar <clementine@meilisearch.com>
Co-authored-by: Tamo <irevoire@hotmail.fr>
Co-authored-by: Irevoire <tamo@meilisearch.com>
2021-06-02 15:29:09 +00:00
6dc08bf45e remove the nop function 2021-06-02 17:09:21 +02:00
087ae64899 add a gitignore to avoid pushing the autogenerated file 2021-06-02 17:03:30 +02:00
3db25153e5 fix the faceted_fields one last time 2021-06-02 17:00:58 +02:00
3c304c89d4 Make sure that we generate the faceted database when required 2021-06-02 16:24:58 +02:00
b0c0490e85 Make sure that we can add a Asc/Desc field without it being filterable 2021-06-02 16:24:58 +02:00
3b1cd4c4b4 Rename the FacetCondition into FilterCondition 2021-06-02 16:24:58 +02:00
c2afdbb1fb Move and comment some internal facet_condition helper functions 2021-06-02 16:24:58 +02:00
6476827d3a Fix the indexer to be sure that distinct and Asc/Desc are also faceted 2021-06-02 16:24:58 +02:00
c10469ddb6 Patch the http-ui crate to support filterable fields 2021-06-02 16:24:58 +02:00
1e366dae3e remove useless lifetime on Distinct Trait 2021-06-02 16:24:58 +02:00
187c713de5 Remove the MapDistinct struct as now distinct attributes are faceted 2021-06-02 16:24:57 +02:00
ff440c1d9d Introduce the faceted fields method to retrieve those that needs faceting 2021-06-02 16:24:57 +02:00
2a3f9b32ff Rename the faceted fields into filterable fields 2021-06-02 16:24:57 +02:00
f346805c0c Update benchmarks/Cargo.toml
Co-authored-by: Clément Renault <clement@meilisearch.com>
2021-06-02 15:47:03 +02:00
ef1ac8a0cb Update README 2021-06-02 11:13:22 +02:00
edfcdb171c Update benchmarks/scripts/list.sh
Co-authored-by: Irevoire <tamo@meilisearch.com>
2021-06-02 11:13:22 +02:00
3c91a9a551 Update following reviews 2021-06-02 11:13:22 +02:00
bc4f4ee829 remove s3cmd as a dependency and provide a script to list all the available benchmarks 2021-06-02 11:13:22 +02:00
61fe422a88 Update benchmarks/scripts/compare.sh
Co-authored-by: Irevoire <tamo@meilisearch.com>
2021-06-02 11:13:22 +02:00
57ed96622b Update benchmarks/scripts/compare.sh
Co-authored-by: Irevoire <tamo@meilisearch.com>
2021-06-02 11:13:22 +02:00
b3c0d43890 Update benchmarks/scripts/compare.sh
Co-authored-by: Irevoire <tamo@meilisearch.com>
2021-06-02 11:13:22 +02:00
0d0e900158 Add CI for benchmarks 2021-06-02 11:13:22 +02:00
4536dfccd0 add a way to provide primary_key or autogenerate documents ids 2021-06-02 11:13:20 +02:00
06c414a753 move the benchmarks to another crate so we can download the datasets automatically without adding overhead to the build of milli 2021-06-02 11:11:50 +02:00
3c84075d2d uses an env variable to find the datasets 2021-06-02 11:05:07 +02:00
4969abeaab update the facets for the benchmarks 2021-06-02 11:05:07 +02:00
e5dfde88fd fix the facets conditions 2021-06-02 11:05:07 +02:00
7c7fba4e57 remove the time limitation to let criterion do what it wants 2021-06-02 11:05:07 +02:00
5d5d115608 reformat all the files 2021-06-02 11:05:07 +02:00
7086009f93 improve the base search 2021-06-02 11:05:07 +02:00
d0b44c380f add benchmarks on a wiki dataset 2021-06-02 11:05:07 +02:00
beae843766 add a missing space 2021-06-02 11:05:07 +02:00
5132a106a1 refactorize everything related to the songs dataset in a songs benchmark file 2021-06-02 11:05:07 +02:00
136efd6b53 fix the benches 2021-06-02 11:05:07 +02:00
4b78ef31b6 add the configuration of the searchable fields and displayed fields and a default configuration for the songs 2021-06-02 11:05:07 +02:00
ea0c6d8c40 add a bunch of queries and start the introduction of the filters and the new dataset 2021-06-02 11:05:07 +02:00
3def42abd8 merge all the criterion only benchmarks in one file 2021-06-02 11:05:07 +02:00
a2bff68c1a remove the optional words for the typo criterion 2021-06-02 11:05:07 +02:00
aee49bb3cd add the proximity criterion 2021-06-02 11:05:07 +02:00
49e4cc3daf add the words criterion to the bench 2021-06-02 11:05:07 +02:00
15cce89a45 update the README with instructions to get the download the dataset 2021-06-02 11:05:07 +02:00
e425f70ef9 let criterion decide how much iteration it wants to do in 10s 2021-06-02 11:05:07 +02:00
4fdbfd6048 push a first version of the benchmark for the typo 2021-06-02 11:05:07 +02:00
270da98c46 Merge #202
202: Add field id word count docids database r=Kerollmops a=LegendreM

This PR introduces a new database, `field_id_word_count_docids`, that maps the number of words in an attribute with a list of document ids. This relation is limited to attributes that contain less than 11 words.
This database is used by the exactness criterion to know if a document has an attribute that contains exactly the query without any additional word.

Fix #165 
Fix #196
Related to [specifications:#36](https://github.com/meilisearch/specifications/pull/36)

Co-authored-by: many <maxime@meilisearch.com>
Co-authored-by: Many <legendre.maxime.isn@gmail.com>
2021-06-01 16:09:48 +00:00
e857ca4d7d Fix PR comments 2021-06-01 18:06:46 +02:00
ab2cf69e8d Update milli/src/update/delete_documents.rs
Co-authored-by: Clément Renault <clement@meilisearch.com>
2021-06-01 17:04:10 +02:00
8e6d1ff0dc Update milli/src/update/index_documents/store.rs
Co-authored-by: Clément Renault <clement@meilisearch.com>
2021-06-01 17:04:02 +02:00
168fe0aa28 Merge #206
206: Fix http-ui r=Kerollmops a=irevoire

I just noticed that `http-ui` was not compiling on `main`.
I'm not sure this is the best fix, but it works 👀

Co-authored-by: Tamo <irevoire@hotmail.fr>
2021-06-01 14:31:32 +00:00
608c5bad24 fix http-ui 2021-06-01 16:24:46 +02:00
7d36d664a7 Merge #203
203: Make the MatchingWords return the number of matching bytes r=Kerollmops a=LegendreM

Make the MatchingWords return the number of matching bytes using a custom Levenshtein algorithm.

Fix #138

Co-authored-by: many <maxime@meilisearch.com>
2021-06-01 12:00:33 +00:00
225ae6fd25 Resolve PR comments 2021-06-01 11:53:09 +02:00
2f9f6a1f21 Merge #169
169: Optimize roaring codec r=Kerollmops a=MarinPostma

Optimize the `BoRoaringBitmapCodec` by preventing it from emiting useless error that caused allocation. On my flamegraph, the byte_decode function went from 4.13% to  1.70% (of transplant graph).

This may not be the greatest optimization ever, but hey, this was a low hanging fruit.

before:
![image](https://user-images.githubusercontent.com/28804882/116241125-17018880-a754-11eb-9f9d-a67418d100e1.png)
after:
![image](https://user-images.githubusercontent.com/28804882/116241167-21bc1d80-a754-11eb-9afc-d9d72727477c.png)



Co-authored-by: Marin Postma <postma.marin@protonmail.com>
2021-06-01 06:30:25 +00:00
984dc7c1ed rewrite roaring codec without byteorder. 2021-05-31 22:15:39 +02:00
1373637da1 optimize roaring codec 2021-05-31 22:15:35 +02:00
1df68d342a Make the MatchingWords return the number of matching bytes 2021-05-31 18:22:29 +02:00
b8e6db0feb Add database in infos crate 2021-05-31 16:29:27 +02:00
c701f8bf36 Use field id word count database in exactness criterion 2021-05-31 16:27:28 +02:00
4ddf008be2 add field id word count database 2021-05-31 16:27:28 +02:00
2f5e61bacb Merge #184
184: Transfer numbers and strings facets into the appropriate facet databases r=Kerollmops a=Kerollmops

This pull request is related to https://github.com/meilisearch/milli/issues/152 and changes the layout of the facets values, numbers and strings are now in dedicated databases and the user no more needs to define the type of the fields. No more conversion between the two types is done, numbers (floats and integers converted to f64) go to the facet float database and strings go to the strings facet database.

There is one related issue that I found regarding CSVs, the values in a CSV are always considered to be strings, [meilisearch/specifications#28](d916b57d74/text/0028-indexing-csv.md) fixes this issue by allowing the user to define the fields types using `:` in the "CSV Formatting Rules" section.

All previous tests on facets have been modified to pass again and I have also done hand-driven tests with the 115m songs dataset. Everything seems to be good!

Fixes #192.

Co-authored-by: Clément Renault <clement@meilisearch.com>
Co-authored-by: Kerollmops <clement@meilisearch.com>
2021-05-31 13:32:58 +00:00
1c0a5cd136 Resolve code modification suggestions 2021-05-31 15:22:50 +02:00
76b9178b16 Merge #200
200: Fix plane sweep algorithm r=Kerollmops a=LegendreM

Fix plain sweep algorithm after creating some tests on proximity.

Co-authored-by: many <maxime@meilisearch.com>
2021-05-26 11:36:24 +00:00
a5e98cf46d Fix plane sweep algorithm 2021-05-25 18:21:55 +02:00
5012cc3a32 Fix the http-ui crate to support split facet databases 2021-05-25 11:31:06 +02:00
28bd9e183e Fix the infos crate to support split facet databases 2021-05-25 11:31:06 +02:00
3a4a150ef0 Fix the tests and remaining warnings 2021-05-25 11:31:06 +02:00
02c655ff1a Refine the facet distribution to use both databases 2021-05-25 11:30:00 +02:00
79efded841 Refine the FacetCondition from_array constructor 2021-05-25 11:30:00 +02:00
f7efde11d9 Refine the facet condition to use both facet databases 2021-05-25 11:30:00 +02:00
e62b89a2ed Make the facet distinct work with the new split facets 2021-05-25 11:30:00 +02:00
bd7b285bae Split the update side to use the number and the strings facet databases 2021-05-25 11:30:00 +02:00
038e03a4e4 Use both facet databases in the FacetIter type 2021-05-25 11:30:00 +02:00
597144b0b9 Use both number and string facet databases in the distinct system 2021-05-25 11:29:59 +02:00
837c1041c7 Clear and delete the documents from the facet database 2021-05-25 11:28:36 +02:00
a56c46b6f1 Explode the string and f64 facet databases into two 2021-05-25 11:28:36 +02:00
df7a32e3d0 Move the creation date initialization into a function 2021-05-25 11:28:35 +02:00
49bee2ebc5 Merge #190
190: Make bucket candidates optionals r=Kerollmops a=LegendreM

Before the bucket candidates were the result of the facet filters or result of the query tree.
They will now be only the result of the query tree, making the number of candidates more consistent between the same request with or without facet filters.

Fix some clippy warnings.

Fix #186 

Co-authored-by: many <maxime@meilisearch.com>
2021-05-24 11:19:32 +00:00
a3944a7083 Introduce a filtered_candidates field 2021-05-11 11:37:40 +02:00
efba662ca6 Fix clippy warnings in cirteria 2021-05-10 10:27:18 +02:00
e923d51b8f Make bucket candidates optionals 2021-05-10 10:27:04 +02:00
eeb0c70ea2 meilisearch compatible primary key inference 2021-05-06 22:42:32 +02:00
313c362461 early return on empty document addition 2021-05-06 18:14:16 +02:00
c620626515 Merge pull request #188 from meilisearch/exactness-criterion
Exactness criterion
2021-05-06 17:56:21 +02:00
44b6843de7 Fix pull request reviews
Update milli/src/fields_ids_map.rs
Update milli/src/search/criteria/exactness.rs
Update milli/src/search/criteria/mod.rs
2021-05-06 14:31:03 +02:00
c1ce4e4ca9 Introduce mocked ExactAttribute step in exactness criterion 2021-05-06 14:28:31 +02:00
a3f8686fbf Introduce exactness criterion 2021-05-06 14:28:30 +02:00
25f75d4d03 Merge #189
189: Update version for the next release (v0.2.1) r=Kerollmops a=curquiza



Co-authored-by: Clémentine Urquizar <clementine@meilisearch.com>
2021-05-05 15:28:56 +00:00
7e63e32960 Merge #187
187: Fix fields distribution after documents merge r=Kerollmops a=shekhirin

Resolves https://github.com/meilisearch/milli/issues/174

The problem was with calculation of fields distribution before the merge in `output_from_sorter()`. So if you'd import two documents with the same primary key value, fields distribution will count it as two documents, while `output_from_sorter()` will merge these documents into one.

---

```console
➜ Downloads cat short_movies.json
[
{"id":"47474","title":"The Serpent's Egg","poster":"https://image.tmdb.org/t/p/w500/n7z0doFkXHcvo8QQWHLFnkEPXRU.jpg","overview":"The Serpent's Egg follows a week in the life of Abel Rosenberg, an out-of-work American circus acrobat living in poverty-stricken Berlin following Germany's defeat in World War I.","release_date":246844800,"genres":["Thriller","Drama","Mystery"]},
{"id":"47474","title":"The Serpent's Egg","poster":"https://image.tmdb.org/t/p/w500/n7z0doFkXHcvo8QQWHLFnkEPXRU.jpg","overview":"The Serpent's Egg follows a week in the life of Abel Rosenberg, an out-of-work American circus acrobat living in poverty-stricken Berlin following Germany's defeat in World War I.","release_date":246844800,"genres":["Thriller","Drama","Mystery"]}
]
➜ Downloads curl -X POST -H "Content-Type: text/json" --data-binary @short_movies.json 127.0.0.1:7700/indexes/movies/documents
{"updateId":0}
```

## Before
```console
➜ Downloads curl -s 127.0.0.1:7700/indexes/movies/stats | jq
{
  "numberOfDocuments": 1,
  "isIndexing": false,
  "fieldsDistribution": {
    "release_date": 2,
    "poster": 2,
    "title": 2,
    "overview": 2,
    "genres": 2,
    "id": 2
  }
}
```

## After
```console
➜ Downloads curl -s 127.0.0.1:7700/indexes/movies/stats | jq
{
  "numberOfDocuments": 1,
  "isIndexing": false,
  "fieldsDistribution": {
    "poster": 1,
    "release_date": 1,
    "title": 1,
    "genres": 1,
    "id": 1,
    "overview": 1
  }
}
```

Co-authored-by: Alexey Shekhirin <a.shekhirin@gmail.com>
2021-05-05 14:45:08 +00:00
1e11578ef0 Update version for the next release (v0.2.1) 2021-05-05 14:57:34 +02:00
f8d0f5265f fix(update): fields distribution after documents merge 2021-05-04 22:12:20 +03:00
1207a058d0 Merge #185
185: Provide an iterator over all the documents in a milli index r=Kerollmops a=irevoire



Co-authored-by: tamo <tamo@meilisearch.com>
2021-05-04 14:04:16 +00:00
d61566787e provide an iterator over all the documents in a milli index 2021-05-04 11:23:51 +02:00
c08f4599f2 Merge #183
183: remove tests on main r=Kerollmops a=MarinPostma

remove testing on main since we now use bors for merging.


Co-authored-by: Marin Postma <postma.marin@protonmail.com>
2021-05-03 15:06:28 +00:00
bb5823c775 remove tests on main 2021-05-03 15:21:20 +02:00
792225eaff Merge #182
182: Upgrade Milli version (v0.2.0) r=Kerollmops a=curquiza



Co-authored-by: Clémentine Urquizar <clementine@meilisearch.com>
2021-05-03 13:00:16 +00:00
a8680887d8 Upgrade Milli version (v0.2.0) 2021-05-03 14:50:47 +02:00
5b93d6ab91 Merge #181
181: Upgrade Tokenizer version (v0.2.2) r=Kerollmops a=curquiza



Co-authored-by: Clémentine Urquizar <clementine@meilisearch.com>
2021-05-03 11:03:25 +00:00
5c762b71dd Merge #177
177: Add bors r=Kerollmops a=curquiza



Co-authored-by: Clémentine Urquizar <clementine@meilisearch.com>
2021-05-03 10:57:09 +00:00
c30f17fafb Add bors 2021-05-03 12:29:30 +02:00
34e02aba42 Upgrade Tokenizer version (v0.2.2) 2021-05-03 10:55:55 +02:00
03bb95539b Merge pull request #180 from shekhirin/disable-autogenerated-doc-ids
Disable autogenerate_docids by default
2021-05-01 12:22:13 +02:00
d81c0e8bba feat(update): disable autogenerate_docids by default 2021-04-30 21:41:34 +03:00
c112877a4a Merge pull request #178 from meilisearch/visible-document-nb
make document addition number visible
2021-04-29 21:54:51 +02:00
e8e32e0ba1 make document addition number visible 2021-04-29 20:05:07 +02:00
b31f36d68c Merge pull request #173 from meilisearch/enhance-distinct-attributes
Remove excluded document in criteria iterations
2021-04-29 12:14:44 +02:00
ee09e50e7f Remove excluded document in criteria iterations
- pass excluded document to criteria to remove them in higher levels of the bucket-sort
- merge already returned document with excluded documents to avoid duplicas

Related to #125 and #112
Fix #170
2021-04-29 12:09:38 +02:00
374c2782ad Merge pull request #176 from yanns/patch-1
do not use echo that espaces newline
2021-04-29 10:50:15 +02:00
566c4a53c5 do not use echo that espaces newline
Fix https://github.com/meilisearch/milli/issues/175
2021-04-29 09:25:35 +02:00
5b9524e1ba Merge pull request #172 from meilisearch/optimize-proximity-criterion
Optimize proximity criterion
2021-04-28 15:41:57 +02:00
31607bf9cd Add a threshold on proximity when choosing between linear/set algorithm 2021-04-28 14:57:22 +02:00
5a10de1b9f Merge pull request #122 from meilisearch/attribute-criterion
Introduce the Attribute criterion
2021-04-28 14:34:50 +02:00
3b7e6afb55 Make some refacto and add documentation 2021-04-28 13:53:27 +02:00
0add4d735c Update milli/src/search/criteria/attribute.rs
Co-authored-by: Clément Renault <clement@meilisearch.com>
2021-04-27 17:40:34 +02:00
3794ffc952 Update milli/src/search/criteria/attribute.rs
Co-authored-by: Clément Renault <clement@meilisearch.com>
2021-04-27 17:39:23 +02:00
329bd4a1bb Update milli/src/search/criteria/attribute.rs
Co-authored-by: Clément Renault <clement@meilisearch.com>
2021-04-27 17:39:03 +02:00
3b1358b62f Update milli/src/search/criteria/attribute.rs
Co-authored-by: Clément Renault <clement@meilisearch.com>
2021-04-27 17:32:19 +02:00
c862b1bc6b Update milli/src/search/criteria/attribute.rs
Co-authored-by: Clément Renault <clement@meilisearch.com>
2021-04-27 17:32:10 +02:00
e92d137676 Update milli/src/search/criteria/attribute.rs
Co-authored-by: Clément Renault <clement@meilisearch.com>
2021-04-27 17:31:42 +02:00
b3d6c6a9a0 Update milli/src/search/criteria/attribute.rs
Co-authored-by: Clément Renault <clement@meilisearch.com>
2021-04-27 17:31:13 +02:00
498c2b298c Update milli/src/search/criteria/attribute.rs
Co-authored-by: Clément Renault <clement@meilisearch.com>
2021-04-27 17:30:02 +02:00
0e4e6dfada Update milli/src/search/criteria/proximity.rs
Co-authored-by: Clément Renault <clement@meilisearch.com>
2021-04-27 17:29:52 +02:00
47d780b8ce Update milli/src/search/criteria/mod.rs
Co-authored-by: Irevoire <tamo@meilisearch.com>
2021-04-27 14:39:53 +02:00
0daa0e170a Fix PR comments
Co-authored-by: Clément Renault <clement@meilisearch.com>
2021-04-27 14:39:53 +02:00
0d7d3ce802 Update roaring package 2021-04-27 14:39:53 +02:00
71740805a7 Fix forgotten typo tests 2021-04-27 14:39:53 +02:00
e77291a6f3 Optimize Atrribute criterion on big requests 2021-04-27 14:39:53 +02:00
716c8e22b0 Add style and comments 2021-04-27 14:39:52 +02:00
f853790016 Use the LCM of 10 first numbers to compute attribute rank 2021-04-27 14:39:52 +02:00
2b036449be Fix the return of equal candidates in different pages 2021-04-27 14:39:52 +02:00
0efa011e09 Make a small code clean-up 2021-04-27 14:39:52 +02:00
17c8c6f945 Make set algorithm return None when nothing can be returned 2021-04-27 14:39:52 +02:00
b3e2280bb9 Debug attribute criterion
* debug folding when initializing iterators
2021-04-27 14:39:52 +02:00
1eee0029a8 Make attribute criterion typo/prefix tolerant 2021-04-27 14:39:52 +02:00
59f58c15f7 Implement attribute criterion
* Implement WordLevelIterator
* Implement QueryLevelIterator
* Implement set algorithm based on iterators

Not tested + Some TODO to fix
2021-04-27 14:39:52 +02:00
361193099f Reduce the amount of branches when query tree flattened 2021-04-27 14:39:52 +02:00
7ff4a2a708 Display the number of entries in the infos crate 2021-04-27 14:39:52 +02:00
1aad66bdaa Compute stats about the word prefix level positions database in the infos crate 2021-04-27 14:39:52 +02:00
e65bad16cc Compute the words prefixes at the end of an update 2021-04-27 14:39:52 +02:00
ab92c814c3 Fix attributes score 2021-04-27 14:35:43 +02:00
0ad9499b93 Fix an indexing bug in the words level positions 2021-04-27 14:35:43 +02:00
7aa5753ed2 Make the attribute positions range bounds to be fixed 2021-04-27 14:35:43 +02:00
658f316511 Introduce the Initial Criterion 2021-04-27 14:35:43 +02:00
89ee2cf576 Introduce the TreeLevel struct 2021-04-27 14:25:35 +02:00
bd1a371c62 Compute the WordsLevelPositions only once 2021-04-27 14:25:34 +02:00
8bd4f5d93e Compute the biggest values of the words_level_positions_docids 2021-04-27 14:25:34 +02:00
f713828406 Implement the clear and delete documents for the word-level-positions database 2021-04-27 14:25:34 +02:00
3069bf4f4a Fix and improve the words-level-positions computation 2021-04-27 14:25:34 +02:00
6b1b42b928 Introduce an infos wordsLevelPositionsDocids subcommand 2021-04-27 14:25:34 +02:00
e8cc7f9cee Expose a route in the http-ui to update the WordsLevelPositions 2021-04-27 14:25:34 +02:00
3a25137ee4 Expose and use the WordsLevelPositions update 2021-04-27 14:25:34 +02:00
c765f277a3 Introduce the WordsLevelPositions update 2021-04-27 14:25:34 +02:00
9242f2f1d4 Store the first word positions levels 2021-04-27 14:25:34 +02:00
b0a417f342 Introduce the word_level_position_docids Index database 2021-04-27 14:25:34 +02:00
75e7b1e3da Implement test Context methods 2021-04-27 14:25:34 +02:00
4ff67ec2ee Implement attribute criterion for small amounts of candidates 2021-04-27 14:25:34 +02:00
0f4c0beffd Introduce the Attribute criterion 2021-04-27 14:25:34 +02:00
3bcc1c0560 Merge pull request #164 from meilisearch/clippy-fixes
Make clippy happy
2021-04-21 13:32:29 +02:00
f8dee1b402 [makes clippy happy] search/criteria/proximity.rs 2021-04-21 12:36:45 +02:00
7fa3a1d23e makes clippy happy http-ui 2021-04-21 12:36:45 +02:00
28a8df2f0a Merge pull request #160 from shekhirin/query-words-limit
Support query words limit
2021-04-21 11:14:09 +02:00
6fa00c61d2 feat(search): support words_limit 2021-04-20 12:22:04 +03:00
726fcf015a Merge pull request #146 from meilisearch/facet-float-integer-becomes-number
Facet float-integer becomes facet number
2021-04-20 10:31:47 +02:00
c9b2d3ae1a Warn instead of returning an error when a conversion fails 2021-04-20 10:23:31 +02:00
2aeef09316 Remove debug logs while iterating through the facet levels 2021-04-20 10:23:31 +02:00
51767725b2 Simplify integer and float functions trait bounds 2021-04-20 10:23:31 +02:00
efbfa81fa7 Merge the Float and Integer enum variant into the Number one 2021-04-20 10:23:30 +02:00
f5ec14c54c Merge pull request #163 from meilisearch/next-release-v0.1.1
Update version for the next release (v0.1.1)
2021-04-19 15:52:13 +02:00
127d3d028e Update version for the next release (v0.1.1) 2021-04-19 14:48:13 +02:00
1095874e7e Merge pull request #158 from shekhirin/synonyms
Support synonyms
2021-04-18 11:00:13 +02:00
33860bc3b7 test(update, settings): set & reset synonyms
fixes after review

more fixes after review
2021-04-18 11:24:17 +03:00
e39aabbfe6 feat(search, update): synonyms 2021-04-18 11:24:17 +03:00
995d1a07d4 Merge pull request #162 from michaelchiche/patch-1 2021-04-17 09:47:08 +02:00
f6b06d6e5d typo: wrong command in example 2021-04-16 20:08:43 +02:00
19b6620a92 Merge pull request #125 from meilisearch/distinct
Implement distinct attribute
2021-04-15 16:33:49 +02:00
9c4660d3d6 add tests 2021-04-15 16:25:56 +02:00
75464a1baa review fixes 2021-04-15 16:25:56 +02:00
2f73fa55ae add documentation 2021-04-15 16:25:55 +02:00
45c45e11dd implement distinct attribute
distinct can return error

facet distinct on numbers

return distinct error

review fixes

make get_facet_value more generic

fixes
2021-04-15 16:25:55 +02:00
6e126c96a9 Merge pull request #159 from meilisearch/upd-tokenizer-v0.2.1
Update Tokenizer version to v0.2.1
2021-04-14 19:02:36 +02:00
2c5c79d68e Update Tokenizer version to v0.2.1 2021-04-14 18:54:04 +02:00
c2df51aa95 Merge pull request #156 from meilisearch/stop-words
Stop words
2021-04-14 17:33:06 +02:00
dcb00b2e54 test a new implementation of the stop_words 2021-04-12 18:35:33 +02:00
da036dcc3e Revert "Integrate the stop_words in the querytree"
This reverts commit 12fb509d84.
We revert this commit because it's causing the bug #150.
The initial algorithm we implemented for the stop_words was:

1. remove the stop_words from the dataset
2. keep the stop_words in the query to see if we can generate new words by
   integrating typos or if the word was a prefix
=> This was causing the bug since, in the case of “The hobbit”, we were
   **always** looking for something starting with “t he” or “th e”
   instead of ignoring the word completely.

For now we are going to fix the bug by completely ignoring the
stop_words in the query.
This could cause another problem were someone mistyped a normal word and
ended up typing a stop_word.

For example imagine someone searching for the music “Won't he do it”.
If that person misplace one space and write “Won' the do it” then we
will loose a part of the request.

One fix would be to update our query tree to something like that:

---------------------
OR
  OR
    TOLERANT hobbit # the first option is to ignore the stop_word
    AND
      CONSECUTIVE   # the second option is to do as we are doing
        EXACT t	    # currently
        EXACT he
      TOLERANT hobbit
---------------------

This would increase drastically the size of our query tree on request
with a lot of stop_words. For example think of “The Lord Of The Rings”.

For now whatsoever we decided we were going to ignore this problem and consider
that it doesn't reduce too much the relevancy of the search to do that
while it improves the performances.
2021-04-12 18:35:33 +02:00
f9eab6e0de Merge pull request #151 from meilisearch/release-drafter
Add release drafter files
2021-04-12 10:25:52 +02:00
6a128d4ec7 Add release drafter files 2021-04-12 10:18:39 +02:00
5efe67f375 Merge pull request #154 from shekhirin/shekhirin/fix-settings-serde-tests
test(http): fix and refactor settings assert_(ser|de)_tokens
2021-04-11 10:52:38 +02:00
3af8fa194c test(http): combine settings assert_(ser|de)_tokens into 1 test 2021-04-10 12:13:59 +03:00
0d09c64dde Merge pull request #148 from shekhirin/shekhirin/setting-enum
refactor(http, update): introduce setting enum
2021-04-09 22:48:58 +02:00
84c1dda39d test(http): setting enum serialize/deserialize 2021-04-08 17:03:40 +03:00
dc636d190d refactor(http, update): introduce setting enum 2021-04-08 17:03:40 +03:00
2bcdd8844c Merge pull request #141 from meilisearch/reorganize-criterion
reorganize criterion
2021-04-01 19:50:16 +02:00
0a4bde1f2f update the default ordering of the criterion 2021-04-01 19:45:31 +02:00
ee3f93c029 Merge pull request #136 from shekhirin/index-fields-ids-distribution-cache
feat(index): store fields distribution in index
2021-04-01 18:36:21 +02:00
2658c5c545 feat(index): update fields distribution in clear & delete operations
fixes after review

bump the version of the tokenizer

implement a first version of the stop_words

The front must provide a BTreeSet containing the stop words
The stop_words are set at None if an empty Set is provided
add the stop-words in the http-ui interface

Use maplit in the test
and remove all the useless drop(rtxn) at the end of all tests

Integrate the stop_words in the querytree

remove the stop_words from the querytree except if it was a prefix or a typo

more fixes after review
2021-04-01 19:12:35 +03:00
27c7ab6e00 feat(index): store fields distribution in index 2021-04-01 18:35:19 +03:00
67e25f8724 Merge pull request #128 from meilisearch/stop-words
Stop words
2021-04-01 14:02:37 +02:00
12fb509d84 Integrate the stop_words in the querytree
remove the stop_words from the querytree except if it was a prefix or a typo
2021-04-01 13:57:55 +02:00
a2f46029c7 implement a first version of the stop_words
The front must provide a BTreeSet containing the stop words
The stop_words are set at None if an empty Set is provided
add the stop-words in the http-ui interface

Use maplit in the test
and remove all the useless drop(rtxn) at the end of all tests
2021-04-01 13:57:55 +02:00
62a8f1d707 bump the version of the tokenizer 2021-04-01 13:49:22 +02:00
56777af8e4 Merge pull request #135 from shekhirin/index-fields-ids-distribution
feat(index): introduce fields_ids_distribution
2021-03-31 17:53:45 +02:00
9205b640a4 feat(index): introduce fields_ids_distribution 2021-03-31 18:44:47 +03:00
f2a786ecbf Merge pull request #134 from meilisearch/improve_httpui
add a button to display or show the facets
2021-03-31 17:07:04 +02:00
13ce0ebb87 stop requestings the facets if the user has hidden them 2021-03-31 16:27:32 +02:00
bcc131e866 add a button to display or hide the facets 2021-03-31 16:18:53 +02:00
529c8f0eb1 Merge pull request #131 from shekhirin/criterion-asc-desc-regex
fix(criterion): compile asc/desc regex only once
2021-03-30 15:18:21 +02:00
2cb32edaa9 fix(criterion): compile asc/desc regex only once
use once_cell instead of lazy_static

reorder imports
2021-03-30 16:07:14 +03:00
5a1d3609a9 Merge pull request #127 from shekhirin/main
feat(search, criteria): const candidates threshold
2021-03-30 14:07:19 +02:00
1e3f05db8f use fixed number of candidates as a threshold 2021-03-30 11:57:10 +03:00
a776ec9718 fix division 2021-03-29 19:16:58 +03:00
522e79f2e0 feat(search, criteria): introduce a percentage threshold to the asc/desc 2021-03-29 19:08:31 +03:00
9ad8b74111 Merge pull request #123 from irevoire/pin_tokenizer
select a specific release of the tokenizer instead of using the latests git commit
2021-03-25 22:58:11 +01:00
73dcdb27f6 select a specific release of the tokenizer instead of using the latests git commit 2021-03-25 15:00:18 +01:00
6b7cc0022b Merge pull request #118 from meilisearch/fix-offset
fix broken offset
2021-03-15 22:15:18 +01:00
9c27183876 fix broken offset 2021-03-15 20:23:50 +01:00
25f8789aa5 Merge pull request #117 from meilisearch/update-license
Update LICENSE
2021-03-15 16:26:22 +01:00
3455082458 Update LICENSE 2021-03-15 16:15:14 +01:00
b7b23cd4a8 Merge pull request #116 from meilisearch/index-metadata
add index metadata
2021-03-15 14:20:50 +01:00
f0210453a6 add updated at on put primary key 2021-03-15 14:05:48 +01:00
615fe095e1 update index updated at on index writes 2021-03-15 14:05:47 +01:00
80d0f9c49d methods to update index time metadata 2021-03-15 14:05:47 +01:00
c9f9d39b54 Merge pull request #114 from meilisearch/github-ci-use-main
Rename master into main in the Github CI
2021-03-11 20:46:06 +01:00
0cc3132f5a Rename master into main in the Github CI 2021-03-11 14:44:47 +01:00
38b6e8decd Merge pull request #106 from meilisearch/optimize-words-typo-criteria
Optimize the words criterion
2021-03-10 11:28:46 +01:00
d48008339e Introduce two new optional_words and authorize_typos Search options 2021-03-10 11:16:30 +01:00
54b97ed8e1 Update the fetcher comments 2021-03-10 10:56:26 +01:00
d301859bbd Introduce a special word_derivations function for Proximity 2021-03-10 10:42:53 +01:00
facfb4b615 Fix the bucket candidates 2021-03-10 10:42:53 +01:00
42fd7dea78 Remove the useless typo cache 2021-03-10 10:42:53 +01:00
62a70c300d Optimize words criterion 2021-03-10 10:42:53 +01:00
c53be51460 Merge pull request #105 from meilisearch/optimize-number-of-documents
Optimize the number_of_documents function
2021-03-10 10:39:12 +01:00
f51eb46c69 Use the RoaringBitmapLenCodec to retrieve the count of documents 2021-03-09 10:25:39 +01:00
7a3ce9bb1d Merge pull request #104 from meilisearch/update-license
Update the LICENSE file to match the year 2021
2021-03-08 19:11:05 +01:00
2f9af6a707 Fix the REAMD.md bash example 2021-03-08 18:56:22 +01:00
f204344102 Update the LICENSE file to match the year 2021 2021-03-08 18:54:06 +01:00
22f20f0c29 Merge pull request #99 from meilisearch/infos-missing-db-names
Add missing databases to the infos subcommand
2021-03-08 18:52:08 +01:00
18844d60b5 Simplify the output of database sizes in the infos crate 2021-03-08 18:47:33 +01:00
3d02b19fbd Introduce the docids-words-positions subcommand to the infos crate 2021-03-08 18:47:33 +01:00
bd63da0a0e Add missing databases to the infos subcommand 2021-03-08 18:47:33 +01:00
f9be3ad3fd Merge pull request #103 from meilisearch/plane-sweep-proximity
Plane-Sweep proximity
2021-03-08 16:58:34 +01:00
d781a6164a Rewrite some code with idiomatic Rust 2021-03-08 16:27:52 +01:00
b18ec00a7a Add a logging_timer macro to te criterion next methods 2021-03-08 16:12:06 +01:00
82a0f678fb Introduce a cache on the docid_word_positions database method 2021-03-08 16:12:03 +01:00
5fcaedb880 Introduce a WordDerivationsCache struct 2021-03-08 16:00:53 +01:00
2606c92ef9 use plain sweep in proximity criterion 2021-03-08 15:58:39 +01:00
ae47bb3594 Introduce plane_sweep function in proximity criterion 2021-03-08 15:58:38 +01:00
636a9df177 Temporarily fix the tinytemplate doc hidden issue 2021-03-08 15:57:45 +01:00
f190d5f496 Merge pull request #100 from meilisearch/improve-asc-desc-criterion
Improve the Asc/Desc criteria
2021-03-08 13:37:00 +01:00
3c76b3548d Rework the Asc/Desc criteria to be facet iterator based 2021-03-08 13:32:25 +01:00
a58d2b6137 Print the Asc/Desc criterion field name in the debug prints 2021-03-08 13:32:25 +01:00
08a0ff7091 Merge pull request #101 from meilisearch/criterion-display
implement display for criterion
2021-03-08 13:29:05 +01:00
e3095be85c Remove Debug use in Display impl 2021-03-08 12:09:09 +01:00
9e1eb25232 implement display for criterion
Update milli/src/criterion.rs

Co-authored-by: Clément Renault <clement@meilisearch.com>
2021-03-08 11:00:30 +01:00
71b069d3e1 Merge pull request #102 from meilisearch/fix-searchable-settings-test
Fix the searchable settings test
2021-03-08 10:55:30 +01:00
e5bb96bc3b Fix the searchable settings test 2021-03-06 12:48:41 +01:00
2924ed31f3 Merge pull request #97 from meilisearch/criteria
Introduce all the criteria
2021-03-03 18:24:22 +01:00
9b6b35d9b7 Clean up some comments 2021-03-03 18:19:10 +01:00
2cc4a467a6 Change the criterion output that cannot fail 2021-03-03 18:18:33 +01:00
1fc25148da Remove useless where clauses for the criteria 2021-03-03 18:09:19 +01:00
07784c8990 Tune the words prefixes threshold to compute for 1/1000 instead 2021-03-03 15:51:28 +01:00
f376c6a728 Make sure we retrieve the docid word positions 2021-03-03 15:45:03 +01:00
5c5e51095c Fix the Asc/Desc criteria to alsways return the QueryTree when available 2021-03-03 15:45:03 +01:00
cdaa96df63 optimize proximity criterion 2021-03-03 15:45:03 +01:00
246286f0eb take hard separator into account 2021-03-03 15:45:03 +01:00
6bf6b40495 Remove unused files 2021-03-03 15:45:03 +01:00
f118d7e067 build criteria from settings 2021-03-03 15:45:03 +01:00
025835c5b2 Fix the criteria to avoid always returning a placeholder 2021-03-03 15:45:03 +01:00
36c1f93ceb Do an union of the bucket candidates 2021-03-03 15:45:03 +01:00
b0e0c5eba0 remove option of bucket_candidates 2021-03-03 15:45:03 +01:00
daf126a638 Introduce the final Fetcher criterion 2021-03-03 15:45:03 +01:00
7ac09d7b7c remove option of bucket_candidates 2021-03-03 15:45:03 +01:00
5af63c74e0 Speed-up the MatchingWords highlighting struct 2021-03-03 15:45:03 +01:00
4510bbccca Add a lot of debug 2021-03-03 15:43:44 +01:00
ae4a237e58 Fix the maximum_proximity function 2021-03-03 15:43:44 +01:00
9bc9b36645 Introduce the Proximity criterion 2021-03-03 15:43:44 +01:00
22b84fe543 Use the words criterion in the search module 2021-03-03 15:43:44 +01:00
3d731cc861 remove option on bucket_candidates 2021-03-03 15:43:44 +01:00
14f9f85c4b Introduce the AscDesc criterion 2021-03-03 15:43:44 +01:00
b5b7ec0162 implement initial state for words criterion 2021-03-03 15:43:44 +01:00
3415812b06 Imrpove the intersection speed in the words criterion 2021-03-03 15:43:43 +01:00
ef381e17bb Compute the candidates for each sub query tree 2021-03-03 15:43:43 +01:00
e174ccbd8e Use the words criterion in the search module 2021-03-03 15:43:43 +01:00
1e47f9b3ff Introduce the Words criterion 2021-03-03 15:43:43 +01:00
2d068bd45b implement Context trait for criteria 2021-03-03 15:43:43 +01:00
d92ad5640a remove option on bucket_candidates 2021-03-03 15:43:43 +01:00
64688b3786 fix query tree builder 2021-03-03 15:43:43 +01:00
fb7e6df790 add tests on typo criterion 2021-03-03 15:43:43 +01:00
c5a32fd4fa Fix the typo criterion 2021-03-03 15:43:42 +01:00
a273c46559 clean warnings 2021-03-03 15:43:42 +01:00
9e093d5ff3 add cache on alterate_query_tree function 2021-03-03 15:43:42 +01:00
41fc51ebcf optimize alterate_query_tree when number_typos is zero 2021-03-03 15:43:42 +01:00
4da6e1ea9c add cache in typo criterion 2021-03-03 15:43:42 +01:00
67c71130df Reduce the number of calls to alterate_query_tree 2021-03-03 15:43:42 +01:00
9ccaea2afc simplify criterion context 2021-03-03 15:43:42 +01:00
fea9ffc46a Use the bucket candidates in the search module 2021-03-03 15:43:42 +01:00
229130ed25 Correctly compute the bucket candidates for the Typo criterion 2021-03-03 15:43:42 +01:00
5344abc008 Introduce the CriterionResult return type 2021-03-03 15:43:41 +01:00
86bcecf840 change variable's name from distance to proximity 2021-03-03 15:43:41 +01:00
4128bdc859 reduce match possibilities in docids fetchers 2021-03-03 15:43:41 +01:00
907482c8ac clean docids fetchers 2021-03-03 15:43:41 +01:00
774a255f2e use prefix cache in criteria 2021-03-03 15:43:41 +01:00
98e69e63d2 implement Context trait for criteria 2021-03-03 15:43:41 +01:00
f091f370d0 Use the Typo criteria in the search module 2021-03-03 15:43:41 +01:00
ad20d72a39 Introduce the Typo criterion 2021-03-03 15:43:41 +01:00
f0ddea821c Introduce the Typo criterion 2021-03-03 15:43:41 +01:00
73286dc8bf Introduce the query tree data structure 2021-03-03 15:43:40 +01:00
4e84999f20 Merge pull request #80 from meilisearch/query_tree
Introduce the query tree data structure
2021-03-03 14:25:29 +01:00
411a118148 Avoid testing on nightly to fix a crate issue 2021-03-03 13:57:36 +01:00
240b02e175 Remove unused Operation constructors 2021-03-03 13:40:19 +01:00
a463ae821e Add methods optional_words and authorize_typos on the query tree 2021-03-03 13:40:19 +01:00
6d135beb21 Introduce the maximum_proximity helper function 2021-03-03 13:40:18 +01:00
6008f528d0 Introduce the maximum_typo helper function 2021-03-03 13:40:18 +01:00
1dc857a4b2 Fix the query tree optional word generation with phrases 2021-03-03 13:40:18 +01:00
4f19749252 Introduce the word_documents_count method on the Context trait 2021-03-03 13:40:18 +01:00
79a143b32f Introduce the query tree data structure 2021-03-03 13:40:18 +01:00
5f109e8589 Merge pull request #95 from meilisearch/helpers-crate
Introduce an helpers crate that export the database to stdout
2021-03-01 19:59:18 +01:00
9423310816 Introduce an helpers crate that export the database to stdout 2021-03-01 19:55:04 +01:00
68102fced8 Merge pull request #86 from meilisearch/clean-up-infos-crate
Clean up the infos crate
2021-03-01 19:54:21 +01:00
1eb7ce5cdb Improve the export-documents infos command by accepting internal ids 2021-03-01 19:48:01 +01:00
4884b324e6 Remove the useless external ids patch method in the infos crate 2021-03-01 19:48:01 +01:00
78bede1ffb Fix error displaying of the workspace members 2021-03-01 19:48:01 +01:00
b59fe77ec7 Avoid creating a default empty database in the search crate 2021-03-01 19:48:01 +01:00
45330a5e47 Avoid creating a default empty database in the infos crate 2021-03-01 19:48:00 +01:00
794fce7bff Merge pull request #91 from meilisearch/add-primary-key-to-fields-map
add primary key to fields_id_map when not present
2021-03-01 16:20:41 +01:00
e08b6b3ec7 add primary key to fields_id_map when not present 2021-03-01 16:10:16 +01:00
8dcb3e0c41 Merge pull request #90 from meilisearch/words-prefixes-update
Expose the WordsPrefixes update from the UpdateBuilder
2021-02-21 12:27:48 +01:00
c62d2f56d8 Expose an http route for the WordsPrefixes update 2021-02-21 12:16:53 +01:00
c318373b88 Expose the WordsPrefixes update on the UpdateBuilder 2021-02-21 12:15:35 +01:00
3090751dfc Merge pull request #94 from meilisearch/update-dependencies
Update dependencies
2021-02-21 12:08:18 +01:00
519b1cb5c9 Update dependencies 2021-02-21 10:26:04 +01:00
e62157e896 Merge pull request #88 from meilisearch/heed-error-word-documents-count
Return an heed error from the word_documents_count method
2021-02-18 15:05:00 +01:00
c2ffcc4bd1 Return an heed error from the word_documents_count method 2021-02-18 14:59:37 +01:00
09ca5d14c9 Merge pull request #87 from meilisearch/roaring-bitmap-length
Introduce fast methods to get roaring bitmap lengths
2021-02-18 14:52:40 +01:00
2f561c77f5 Introduce the word documents count method on the index 2021-02-18 14:35:14 +01:00
8d710c5130 Introduce heed codecs to retrieve the length of roaring bitmaps 2021-02-18 14:30:47 +01:00
fcfb39c5de Move the RoaringBitmap related codecs into a module 2021-02-18 13:56:28 +01:00
85c3d8aa52 Merge pull request #79 from meilisearch/prefix-caches
Introduce prefix databases
2021-02-17 11:27:15 +01:00
aa4d9882d2 Introduce the new words-prefixes-docids infos subcomand 2021-02-17 11:22:27 +01:00
49aee6d02c Fix the database-stats infos subcommand 2021-02-17 11:22:27 +01:00
7a0f86a04f Introduce an infos command to extract the words prefixes fst 2021-02-17 11:22:27 +01:00
a4a48be923 Run the words prefixes update inside of the indexing documents update 2021-02-17 11:22:26 +01:00
8788485924 Take the prefix databases into account in the infos subcommand 2021-02-17 11:22:26 +01:00
616ed8f73c Clean up the word prefix pair proximities when deleting documents 2021-02-17 11:22:26 +01:00
ea37fd821d Clean up the words prefixes when deleting documents and words 2021-02-17 11:22:25 +01:00
62eee9c69e Introduce the sorter_into_lmdb_database helper function 2021-02-17 11:12:39 +01:00
b5b89990eb Compute and write the word prefix pair proximities database 2021-02-17 11:12:38 +01:00
9b03b0a1b2 Introduce the word prefix pair proximity docids database 2021-02-17 11:12:38 +01:00
f365de636f Compute and write the word-prefix-docids database 2021-02-17 11:12:38 +01:00
ee5a60e1c5 Clear the words prefixes when clearing an index 2021-02-17 10:45:17 +01:00
5e7b26791b Take the words-prefixes into account while computing the biggest values 2021-02-17 10:45:17 +01:00
b3a21d5a50 Introduce the getters and setters for the words prefixes FST 2021-02-17 10:45:17 +01:00
48b470140b Merge pull request #84 from meilisearch/stringify-documents-ids
Stringify documents ids even when deleting documents
2021-02-15 21:30:51 +01:00
89ce4e74fe Do not change the primary key type when we serialize documents 2021-02-15 21:24:36 +01:00
69acdd437e Deserialize documents ids into JSON Values on deletion 2021-02-15 21:24:36 +01:00
b3776598d8 Add a test to check deletion of documents with number as primary key 2021-02-15 21:24:35 +01:00
5d0ac3e3e6 Merge pull request #81 from meilisearch/smart-workspace
Change the project to become a workspace
2021-02-14 19:02:00 +01:00
fecf3d6fc1 Move the command lines helpers into different crates 2021-02-14 18:55:15 +01:00
d8f3421608 Update the dependencies and remove the unused ones 2021-02-14 18:32:46 +01:00
e8639517da Change the project to become a workspace with milli as a default-member 2021-02-12 16:15:09 +01:00
d450b971f9 Merge pull request #78 from meilisearch/required-changes-for-transplant
Changes for transplant
2021-02-02 16:22:09 +01:00
8f43698a60 fix httpui 2021-02-01 19:49:51 +01:00
3b60432687 Use update_id in UpdateBuilder
Add `the update_id` to the to the updates. The rationale is the
following:
- It allows for better tracability of the update events, thus improved
  debugging and logging.
- The enigne is now aware of what he's already processed, and can return
  it if asked. It may not make sense now, but in the future, the update
  store may not work the same way, and this information about the state
  of the engine will be desirable (distributed environement).
2021-02-01 19:46:34 +01:00
d487791b03 derive serde for method and format
This is nicer when working with UpdateMeta struct
2021-02-01 19:46:34 +01:00
91d8198d17 return documents number on addition 2021-02-01 19:42:10 +01:00
fa0cc2dc13 Merge pull request #66 from meilisearch/show-available-facets
Expose an API to compute facets distribution
2021-02-01 18:39:45 +01:00
14ae01a6c9 Fix some typos in error messages 2021-02-01 18:10:57 +01:00
f5f4438b43 Remove the duplicated code inside the facet_values_from_documents method 2021-01-28 11:22:18 +01:00
b6e91291fb Add a comment to explain Serialize on FacetValue is implemented by hand 2021-01-27 18:29:56 +01:00
b41bf58658 Split the FacetDistribution facet_values method into three 2021-01-27 18:29:56 +01:00
a3e3bebed7 Rework the FacetDistribution execute method to use the faceted_fields struct 2021-01-27 18:29:54 +01:00
11309ee99c Rework the FacetDistribution execute method to use the faceted_fields struct 2021-01-27 14:53:50 +01:00
9c8a654079 Add comments to help read the facet_values branchings 2021-01-27 14:49:08 +01:00
2e00740515 Make sure that we don't iterate throught all string facet values 2021-01-27 14:41:36 +01:00
b52d500fbc Reorder the FacetType enum branching in the facet_value method 2021-01-27 14:36:49 +01:00
d91d321129 Introduce some constants to the FacetDistribution struct and settings 2021-01-27 14:32:30 +01:00
60480a1e2f Rework the FacetCondition from_array constructor 2021-01-27 14:25:53 +01:00
65b821b192 Rename the Index facets method into facets_distribution 2021-01-27 14:15:33 +01:00
433ac8c38a Remove the ordered-float serde feature 2021-01-27 14:11:10 +01:00
70e9b1e936 Introduce a flag to the search subcommand to display the facet distribution 2021-01-26 14:58:18 +01:00
61dbcfa44a Bump the roaring to 0.6.4 2021-01-26 14:38:43 +01:00
916dd3b7c5 Use the faceted_fields_ids method to fetch the ids 2021-01-26 14:14:38 +01:00
b0c31500fc Simplify the front page 2021-01-26 14:14:38 +01:00
7be275b692 Add the count to the facet distribution 2021-01-26 14:14:37 +01:00
4b9e81fc89 Order the facet values lexicographically 2021-01-26 14:09:09 +01:00
51a37de885 Introduce the FacetValue enum type 2021-01-26 14:09:09 +01:00
d893e83622 Speed-up facet aggregation by using a FacetIter 2021-01-26 14:09:08 +01:00
33945a3115 Introduce a new facet filters query field 2021-01-26 14:09:08 +01:00
afa86d8a45 Add a simple test to the FacetCondition from_array method 2021-01-26 14:06:29 +01:00
cb5e57e2dd FacetCondition can be created from array of facets 2021-01-26 14:06:28 +01:00
a8e3269ad6 Introduce a basic front to display facets 2021-01-26 14:06:28 +01:00
2cd8675734 Show facet values even for empty queries 2021-01-26 14:06:28 +01:00
3916c54501 Speed-up facet aggregation on low number of candidates 2021-01-26 14:06:28 +01:00
a17bb54d8f Limit the number of values by facets to a maximum of 1000 2021-01-26 14:06:28 +01:00
aa129dd7e8 Display the number of candidates instead of the returned document count 2021-01-26 14:06:28 +01:00
510df4729c Append the facet value to the facet query on click 2021-01-26 14:06:28 +01:00
d25a859985 Display the facet values on the HTML debug page 2021-01-26 14:06:28 +01:00
3b64735058 Introduce a struct to compute facets values 2021-01-26 14:06:27 +01:00
30dae0205e Merge pull request #67 from meilisearch/fix-settings
Fix displayed and searchable attributes
2021-01-26 14:03:43 +01:00
87a56d2bc9 Fix settings bug
replace ids with str in settings

This allows for better maintainability of the settings code, since
updating the searchable attributes is now straightforward.

criterion use string

fix reindexing fieldid remaping

add tests for primary_key compute

fix tests

fix http-ui

fixup! add tests for primary_key compute

code improvements settings

update deps

fixup! code improvements settings

fixup! refactor settings updates and fix bug

fixup! Fix settings bug

fixup! Fix settings bug

fixup! Fix settings bug

Update src/update/index_documents/transform.rs

Co-authored-by: Clément Renault <clement@meilisearch.com>

fixup! Fix settings bug
2021-01-26 13:53:08 +01:00
26f060f66b Merge pull request #75 from meilisearch/fix-search-subcommand
Fix the search subcommand document display loop
2021-01-20 10:07:16 +01:00
c35befbf38 Fix the search subcommand document display loop 2021-01-18 19:06:36 +01:00
2fa5808e3f Merge pull request #71 from meilisearch/cleanup-useless-build-rs
Cleanup useless custom build file
2021-01-15 15:45:47 +01:00
44c0dd0762 Fix an fst Set related warning 2021-01-13 11:03:03 +01:00
1bb9348a90 Remove the chinese-words.txt previous tokenizer related file 2021-01-13 11:01:57 +01:00
9141f5ef94 Remove the custom build.rs file 2021-01-13 11:01:38 +01:00
51d1785576 Merge pull request #63 from meilisearch/meilisearch-tokenizer
Meilisearch tokenizer
2021-01-12 13:26:24 +01:00
4f7f7538f7 highlight with new tokenizer 2021-01-11 21:59:37 +01:00
1ae761311e integrate with meilisearch tokenizer 2021-01-07 16:14:27 +01:00
7e1c94ab9c Merge pull request #65 from meilisearch/improve-facet-value-display
Improve the facet value displaying
2021-01-07 16:12:32 +01:00
0a1beb688c Improve the facet value displaying, extracting the facet level 2021-01-07 16:05:09 +01:00
5dd4dc2862 Merge pull request #60 from meilisearch/accept-compressed-documents-updates
Accept and mirror compression of documents additions
2020-12-23 10:59:26 +01:00
a576c7ae4b Display the update meta result content on the update page 2020-12-22 13:42:43 +01:00
6c7db3d956 Display the time it took to process an update 2020-12-22 13:42:43 +01:00
9fcbc83ebc Accept and mirror compression of documents additions 2020-12-22 13:42:42 +01:00
cd158d4cde Merge pull request #61 from meilisearch/update-handler
create update handler trait
2020-12-22 13:42:00 +01:00
49a016b53d create update handler trait
fix type inference error
2020-12-22 12:59:15 +01:00
5039528b56 Merge pull request #59 from meilisearch/improve-bytes-structopt
Use the byte-unit crate to ease library usage
2020-12-20 14:52:39 +01:00
77e951e933 Use the byte-unit crate to ease library usage 2020-12-20 12:00:37 +01:00
b032ceb5d4 Merge pull request #56 from meilisearch/asc-desc-criteria-non-faceted
Return non-faceted documents to complete the requested limit
2020-12-17 14:34:36 +01:00
914eab12f7 Return non-faceted documents as remaining results 2020-12-17 13:57:07 +01:00
0dec761e21 Merge pull request #54 from meilisearch/compress-updates
Compress updates content using gzip
2020-12-17 11:06:31 +01:00
5a23417499 Compress updates content using gzip 2020-12-17 10:59:58 +01:00
cd5605bb86 Merge pull request #50 from meilisearch/fix-asc-desc-criterion
Fix the Asc/Desc criteria
2020-12-13 11:59:11 +01:00
0e5609d40e Limit the number of elements after reversing it 2020-12-12 14:21:27 +01:00
9d966a28d3 Merge pull request #47 from meilisearch/fix-grenad-write-bug
Bump grenad to fix an indexing bug
2020-12-05 17:18:49 +01:00
e7f2ab9138 Bump grenad to fix an indexing bug 2020-12-05 16:39:15 +01:00
9628da2d17 Merge pull request #40 from meilisearch/asc-desc-faceted-fields
Ascending and descending custom ranking
2020-12-04 12:08:22 +01:00
026f54dcf7 Use the field id docid facet value database when sorting documents 2020-12-04 12:03:20 +01:00
3cdf14d4c5 Introduce the field-id-docid-facet-values database 2020-12-04 12:03:20 +01:00
4ffbddf21f Introduce debug info for the time it takes to fetch candidates 2020-12-04 12:03:20 +01:00
13217f072b Use the FacetRange iterator in the facet exploring function 2020-12-04 12:03:20 +01:00
0959e1501f Introduce the FacetRevRange Iterator struct 2020-12-04 12:02:23 +01:00
58d039a70d Introduce the FacetIter Iterator 2020-12-04 12:02:23 +01:00
d8e25a0863 Order documents by the first custom criterion on basic searches 2020-12-04 12:02:23 +01:00
e0cc7faea1 Use the facet ordered to the search 2020-12-04 12:02:23 +01:00
61b383f422 Introduce the criteria update setting 2020-12-04 12:02:22 +01:00
f8f33d35e0 Add the criteria list to the index 2020-12-02 11:21:26 +01:00
57e8e5c965 Move the FacetCondition to its own module 2020-12-02 11:21:26 +01:00
ecc8bc8910 Introduce the FieldId u8 alias type 2020-12-02 11:19:45 +01:00
0a63e69e04 Merge pull request #45 from meilisearch/infos-export-documents
Infos export documents
2020-12-02 10:50:54 +01:00
16755b26e2 Make the export words FST export infos subcommand outputs to stdout 2020-12-02 10:43:22 +01:00
85d51ab228 Introduce an infos subcommand to export documents from an index 2020-12-02 10:42:48 +01:00
92f253adb2 Merge pull request #41 from meilisearch/update-store-delete-updates
Allow users to abort pending updates
2020-12-01 14:56:00 +01:00
222f2913c1 Simplify the processing_update UpdateStore method 2020-12-01 14:51:05 +01:00
878b1873cd Make sure to avoid removing the first pending update as it is processed 2020-12-01 14:51:05 +01:00
96f64c629e Move the UpdateStore out of the update module 2020-12-01 14:51:05 +01:00
58a1f9081c Allow users to abort pending updates, one by one or all at once 2020-12-01 14:51:05 +01:00
e4c2abb1d9 Merge pull request #44 from meilisearch/clippy
Fix some clippy warnings
2020-12-01 14:50:31 +01:00
d0240bd9d0 Done a big clippy pass 2020-12-01 14:45:19 +01:00
6e3f4e5e45 Merge pull request #43 from meilisearch/lowercase-facet-strings
Lowercase the facet string value
2020-12-01 14:44:39 +01:00
844a9022fb Introduce the FacetStringOperator equal and not_equal constructors 2020-12-01 14:29:44 +01:00
45877b3154 Lowercase the facet string value 2020-12-01 14:10:00 +01:00
6120f6590b Merge pull request #38 from meilisearch/facet-queries
Introduce a facet filter system
2020-11-28 17:21:07 +01:00
ba4ba685f9 Make the facet levels maps to previous level groups and don't split them 2020-11-28 12:43:43 +01:00
276c87af68 Introduce more test to the FacetCondition struct 2020-11-23 16:43:57 +01:00
a50f63840f Return spanned pest error while parsing numbers in facet filters 2020-11-23 16:43:57 +01:00
54d5cec582 Transform numbers into strings when faceted and necessary 2020-11-23 16:43:56 +01:00
fc686aaca7 Use the De Morgan law to simplify the NOT operation 2020-11-23 16:43:56 +01:00
7370ef8c5e Add two simple test to the facet FacetCondition struct construction 2020-11-23 16:43:56 +01:00
fc242f6e1f Rewrite the FacetCondtion Debug impl in a defensive way 2020-11-23 16:43:56 +01:00
a0adfb5e8e Introduce a real pest parser and support every facet filter conditions 2020-11-23 16:43:55 +01:00
c52d09d5b1 Support a basic version of the string facet query system 2020-11-23 16:43:55 +01:00
498f0d8539 Output the documents count for each facet value in the infos subcommand 2020-11-23 16:43:55 +01:00
278391d961 Move the facets related system into the new search module 2020-11-23 16:43:54 +01:00
531bd6ddc7 Make the facet operator evaluation code generic 2020-11-23 16:43:54 +01:00
d40dd3e4da Reduce the amount of duplicated code to iterate over facet values 2020-11-23 16:43:54 +01:00
07a0c82790 Bump heed to 0.10.4 to use be able to lazily decode roaring bitmaps 2020-11-23 16:43:53 +01:00
59ca4b9fe4 Introduce a little bit of debug when deleting documents 2020-11-23 16:43:53 +01:00
0694cc4916 Drastically speed up documents deletion updates 2020-11-23 16:43:53 +01:00
38c76754ef Make the facet level search system generic on f64 and i64 2020-11-23 16:43:52 +01:00
9e2cbe3362 Improve the FacetLevelF64 serialization 2020-11-23 16:43:52 +01:00
ced0c29c56 Simplify getting the biggest level of a facet field 2020-11-23 16:43:52 +01:00
7d67c9e2e7 Improve the facet search algorithm performances 2020-11-23 16:43:52 +01:00
67d4a1b3fc Introduce a new update for the facet levels 2020-11-23 16:43:51 +01:00
45e0feab4e Speed up the facets stats infos subcommand 2020-11-23 16:43:51 +01:00
7a6e6eb5e2 Introduce a facets stats infos subcommand 2020-11-23 16:43:51 +01:00
9ec95679e1 Introduce a function to retrieve the facet level range docids 2020-11-23 16:43:50 +01:00
57d253aeda Improve the infos biggest-value subcommand to support facets 2020-11-23 16:43:50 +01:00
fd8360deb1 Update the facet indexing facet test 2020-11-23 16:43:50 +01:00
9b7e516a56 Fix the indexing process going back in time 2020-11-23 16:43:49 +01:00
b255be93fa Bump heed to 0.10.3 2020-11-23 16:43:49 +01:00
218eb97241 Introduce an input field for the facet filters on the http-ui 2020-11-23 16:43:49 +01:00
2341b99379 Support a basic facet based query system 2020-11-23 16:43:49 +01:00
1d5795d134 Merge pull request #39 from meilisearch/speedup-documents-ids-merging
Speedup documents ids merging
2020-11-22 19:32:24 +01:00
05c95dfdc6 Introduce an infos subcommand that patches the external documents ids 2020-11-22 19:27:34 +01:00
27f3ef5f7a Use the new ExternalDocumentsIds struct in the engine 2020-11-22 19:27:34 +01:00
fe82516f9f Use the ExternalDocumentsIds in the Index struct 2020-11-22 19:27:34 +01:00
415c0b86ba Introduce the ExternalDocumentsIds struct 2020-11-22 19:27:33 +01:00
eded5558b2 Rename the users ids documents ids into external documents ids 2020-11-22 17:17:47 +01:00
f06355b0bb Display the time it takes to merge user documents ids 2020-11-22 11:28:35 +01:00
b0c5f59c07 Merge pull request #36 from meilisearch/index-facets
Index facets values and support facet numbers
2020-11-14 14:32:05 +01:00
e76558b0cc Change the settings update system to reindex only one time 2020-11-14 11:17:49 +01:00
f9cc12ae0f Do not try to parse empty faceted strings 2020-11-13 18:35:47 +01:00
23f9a22edc Update the HTTP settings route to accept the faceted fields 2020-11-13 18:35:47 +01:00
8e6efe4d87 Introduce an infos subcommand to display the facet values 2020-11-13 18:35:47 +01:00
a18d9a1f87 Parse and store the faceted fields 2020-11-13 16:13:51 +01:00
4e5e55c21a Simplify the merge functions 2020-11-13 14:50:30 +01:00
8ae9888959 Store the field id instead of the field name in the facets database 2020-11-13 14:50:30 +01:00
cf9ddd293d Simplify the the facet types 2020-11-13 11:46:48 +01:00
466fb601d6 Faceted fields settings must specify the facet type 2020-11-13 11:46:48 +01:00
ebe7087bff Introduce the faceted fields setting 2020-11-11 17:08:18 +01:00
72f18759ba Introduce getters and setters for the facet fields ids facet types 2020-11-11 16:26:22 +01:00
92ec908303 Introduce the facet field id values engine database 2020-11-11 16:06:33 +01:00
e0058c1125 Introduce codecs for facet types (string, f64, u64, i64) 2020-11-11 15:48:24 +01:00
b4951c058b Merge pull request #35 from meilisearch/better-update-progress
Better update progress
2020-11-11 13:19:32 +01:00
a71a96894d Use the new indexing progress events in the http server 2020-11-11 13:14:24 +01:00
ea43080548 Make the indexing process send the new progress step events 2020-11-11 13:13:08 +01:00
e78b96a657 Introduce a more detailed progress status enum 2020-11-11 12:31:59 +01:00
8a4794fc51 Merge pull request #34 from meilisearch/speedup-indexing
Write the words pairs proximities directly into LMDB to speedup indexing
2020-11-11 11:30:28 +01:00
535f8088d7 Write the words pairs proximities directly into LMDB to speedup indexing 2020-11-11 11:25:31 +01:00
fbe8ec1fe7 Merge pull request #33 from meilisearch/speedup-CI
Avoid compiling benchmarks and speedup the CI
2020-11-11 11:20:26 +01:00
a55453e634 Avoid compiling benchmarks and speedup the CI 2020-11-11 11:14:57 +01:00
5a6b62e77c Merge pull request #32 from meilisearch/http-get-one-document
Introduce a route to get one document
2020-11-11 11:14:00 +01:00
63fab07047 Introduce a route to retrieve a document with its id 2020-11-11 11:04:11 +01:00
c00fc6f8bb Merge pull request #31 from meilisearch/improve-update-process
Improve update process
2020-11-09 17:45:19 +01:00
0cfeee13ee Reduce the number of documents limit when update progress are sent 2020-11-09 17:34:52 +01:00
cf8a6a042e Display a real progress bar when updates are processed 2020-11-09 17:33:36 +01:00
45ae086974 Make sure pending updates are process when restarting the UpdateStore 2020-11-09 17:33:07 +01:00
8ffdfa72e3 Merge pull request #28 from meilisearch/highlight-json-value
Make the engine able to highlight any json type
2020-11-09 10:23:22 +01:00
4fb138c42e Make sure we index all kind of JSON types 2020-11-06 16:35:07 +01:00
640c7d748a Modify the highlight function to support any JSON type 2020-11-05 13:59:32 +01:00
c94bc59d7e Introduce a function to transform an obk into a JSON 2020-11-05 13:57:29 +01:00
b220885f42 Fix the milli logo in the README 2020-11-05 11:43:47 +01:00
1c2d36d8a3 Merge pull request #27 from meilisearch/split-http-ui
Move the http server into its own sub-module
2020-11-05 11:36:04 +01:00
0408c9d66a Move the http server into its own sub-module 2020-11-05 11:16:39 +01:00
749764f35b Merge pull request #26 from meilisearch/searchable-attributes
Introduce the searchable attributes
2020-11-04 09:40:03 +01:00
a31db33e93 Introduce an optimization when the searchable attributes are ordered 2020-11-03 19:59:09 +01:00
01c4f5abcd Introduce the searchable attributes setting to the settings route 2020-11-03 19:35:55 +01:00
63f65bac3e Ignore the long running UpdateStore test 2020-11-03 19:12:00 +01:00
a20c871ece Add more tests to the Settings searchable attributes operation 2020-11-03 18:58:19 +01:00
649fb6e401 Make sure that the indexing Store only index searchable fields 2020-11-03 18:58:19 +01:00
e48630da72 Introduce the searchable parameter settings to the Settings update 2020-11-03 18:58:19 +01:00
68d783145b Introduce searchable fields methods on the index 2020-11-03 18:58:19 +01:00
32486b5beb Merge pull request #25 from meilisearch/update-ci
Update the Github Actions settings
2020-11-03 18:53:04 +01:00
a716ec61b9 Remove the fmt and clippy jobs 2020-11-03 18:52:45 +01:00
c059924a8f Remove the bors config at it does not work on private repositories 2020-11-03 18:25:49 +01:00
3ef031b2fe Update the CI to work on push and PRs 2020-11-03 18:25:12 +01:00
58c07e7f8c Merge pull request #23 from meilisearch/update-builder-thread-pool
Allow library users to specify the rayon ThreadPool for UpdateBuilder
2020-11-02 19:11:50 +01:00
7e120fc441 Allow library users to specify the rayon ThreadPool for UpdateBuilder 2020-11-02 19:11:22 +01:00
87902de010 Merge pull request #22 from meilisearch/update-readme
Update the README
2020-11-02 18:28:16 +01:00
1718fe3d74 Update the README to be up to date with the recent updates 2020-11-02 18:07:24 +01:00
82322ddab6 Merge pull request #21 from meilisearch/displayed-attributes
Add the displayed attributes setting to an index
2020-11-02 15:50:29 +01:00
3d1854ab95 Introduce an HTTP route to accept settings changes 2020-11-02 15:47:21 +01:00
995d72b8c1 Introduce the Settings update operation 2020-11-02 15:31:20 +01:00
0c612f08c7 Rename the indexing warp routes 2020-11-02 15:30:29 +01:00
9b08f48dbd Construct the documents based on the displayed fields or fields ids order 2020-11-02 13:01:32 +01:00
303c3ce89e Clean up the heed imports in the index module 2020-11-02 12:49:54 +01:00
8f56753a2f Introduce displayed fields methods on the index 2020-11-02 12:49:54 +01:00
4fded5bd0e Bump heed to be able to reference a RoTxn from multiple threads 2020-11-02 12:49:23 +01:00
3abfe8aa22 Validate documents ids before accepting them 2020-11-01 20:55:21 +01:00
0ccf4cf785 Simplify the IndexDocuments builder creation from the UpdateBuilder 2020-11-01 17:31:20 +01:00
d8ff939409 Introduce bors to the project 2020-11-01 14:49:07 +01:00
9047dc8163 Add a Github actions workflows 2020-11-01 14:47:44 +01:00
600aa223c2 Fix a bug where generated docids were not saved when indexing JSON docs 2020-11-01 12:19:07 +01:00
f0e63025b0 Update the Transform struct to support JSON stream updates 2020-11-01 12:19:06 +01:00
082ad84914 Fix the benchmarks 2020-10-31 22:18:29 +01:00
6d52c5b2f0 Introduce a parameter to disable the engine to autogenerate docids 2020-10-31 21:46:55 +01:00
21b4d60101 Add replace/update csv/json from the HTTP server 2020-10-31 20:52:49 +01:00
a4f8be7811 Support numbers and boolean when indexing JSON 2020-10-31 20:52:49 +01:00
f0d028d3a4 Update the Transform struct to support JSON updates 2020-10-31 20:52:49 +01:00
9d47ee52b4 Generate a uuid v4 based document id when missing 2020-10-31 15:11:06 +01:00
ddbd336387 Introduce primary key methods on the index 2020-10-31 11:50:59 +01:00
0d01e4854b Add a test to check that merging works correctly with CSVs 2020-10-30 13:46:56 +01:00
955302fd95 Introduce an HTTP route to clear the documents 2020-10-30 13:12:55 +01:00
7cc1a358f5 Fix a documents indexing bug and add a test 2020-10-30 12:14:25 +01:00
99da69c85f Introduce the prepare_for_closing Index method 2020-10-30 11:46:14 +01:00
222063b19d Introduce the Index path method 2020-10-30 11:46:00 +01:00
085d3b9d94 Update heed to 0.10.0 2020-10-30 11:42:00 +01:00
a30206a665 Prefer using iterator put_current instead of a get put method 2020-10-30 11:13:45 +01:00
e63fdf2b22 Move the heed env into the index itself to ease the usage of the library 2020-10-30 10:56:35 +01:00
b5d52b6b45 Prefer using a smallstr instead of a real String to reduce allocations 2020-10-29 14:32:32 +01:00
40993a0d25 Fix an indexing process bug, where documents were not written in order 2020-10-29 14:20:03 +01:00
855a251489 Enable the clear documents optimization that wasn't working due to a bug 2020-10-29 13:52:48 +01:00
1228c2948d Add a comment about the ClearDocuments operation in the DeleteDocuments 2020-10-28 11:17:36 +01:00
98fc24cbdf Bump heed to fix a prefix iter bug 2020-10-28 10:55:21 +01:00
d6338af766 Improve documents deletion by iterating over all the word pair positions 2020-10-27 18:50:09 +01:00
3889d956d9 Introduce the UpdateBuilder and use it in the HTTP routes 2020-10-27 18:47:58 +01:00
5c62fbb6a8 Move the IndexDocuments update into its own module 2020-10-26 12:21:13 +01:00
8f76ec97c0 Move the DeleteDocuments update into its own module 2020-10-26 11:01:00 +01:00
92ef1faa97 Move the ClearDocuments update into its own module 2020-10-26 10:58:17 +01:00
1e1821f002 Introduce the merge_two_obkv function to merge documents on update 2020-10-26 10:55:07 +01:00
60347a5483 Move the AvailableDocumentsIds iterator into the update module 2020-10-26 10:53:23 +01:00
b14cca2ad9 Introduce the UpdateBuilder type along with some update operations 2020-10-25 18:32:01 +01:00
adacc7977d Make the Index return default values when value don't exist 2020-10-25 18:30:24 +01:00
a7a4984175 Introduce the Transform type into the indexing system 2020-10-24 17:06:09 +02:00
b44b04d25b Serialize the CSV record values as JSON strings 2020-10-24 14:43:46 +02:00
656a851830 Introduce the Transform struct transforming CSVs
This allows us to:
  - transform a CSV, a JSON or a JSON lines data type into the same
    Grenad x Obkv streamable data type and creates the new FieldsIdsMap.
  - Extract all the documents user ids in advance to be able to delete
    the existing documents before re-indexing them.
  - Keep the last documents with the same user id avoiding duplicates
    in the same request.
2020-10-24 13:37:38 +02:00
8d82e37ec0 Introduce the AvailableDocumentsIds iterator 2020-10-23 12:07:01 +02:00
2a4cd81c86 Add documentation to the Index methods 2020-10-22 15:44:12 +02:00
566a7c3039 Make the FieldsIdsMap serialization more stable by using a BTreeMap 2020-10-22 14:53:20 +02:00
9133f38138 Introduce the FieldsIdsMap type 2020-10-22 12:56:35 +02:00
802e925fd7 Switch to a JSON protocol for the front page 2020-10-21 18:26:29 +02:00
5caf523fd9 Move the Index to its own module 2020-10-21 15:55:48 +02:00
2210818114 Introduce the obkv heed codec 2020-10-21 15:51:48 +02:00
f6eecb855e Send a basic progressing status to the updates front page 2020-10-21 15:38:28 +02:00
4eeeccb9cd Change the UpdateStore to have different processed and pending meta types 2020-10-21 13:52:15 +02:00
16ab3e02a9 Change the UpdateStore internal meta serializer 2020-10-21 13:42:49 +02:00
f948a03be2 Optimise the merge functions to avoid allocations 2020-10-20 16:40:50 +02:00
cde8478388 Replace the panic in the merge function by actual errors 2020-10-20 16:19:07 +02:00
8ed8abb9df Introduce an append-only indexing system 2020-10-20 15:00:58 +02:00
a122d3d466 Export the indexing part into a module 2020-10-20 14:22:09 +02:00
eb92e72e6c Updates can send progress update status 2020-10-20 12:28:10 +02:00
341046c96c Remove the js map file from the filesize.js script 2020-10-20 12:20:42 +02:00
3a934b7020 Split the update attributes on the updates front page 2020-10-20 12:19:48 +02:00
03ca1ff634 Make the updates page interactive 2020-10-20 12:09:38 +02:00
35c9a3c558 Brodacast the updates infos to every ws clients 2020-10-20 11:19:34 +02:00
56c3a61d83 Introduce a new updates page 2020-10-19 19:57:15 +02:00
871222aebd Introduce some new routes to handle live indexing 2020-10-19 16:06:43 +02:00
d3145be744 Rename the meta UpdateStore method 2020-10-19 14:00:00 +02:00
8bfa43f9a7 Update the iter_metas UpdateStore method 2020-10-19 13:58:08 +02:00
65e32fecb1 Move the binaries into one with subcommands 2020-10-19 13:44:17 +02:00
ff389f1270 Update heed-types to 0.7.1 2020-10-19 11:52:59 +02:00
5b4eda670b Add two tests for the UpdateStore 2020-10-18 18:55:09 +02:00
edb8c99fbe Introduce a method to get the meta of an update on the UpdateStore 2020-10-18 17:19:04 +02:00
eca49e3a03 Introduce a notification channel for the UpdateStore 2020-10-18 16:37:37 +02:00
83c1db8763 Introduce the UpdateStore 2020-10-18 15:26:57 +02:00
90d4c1d153 Simplify the words pair proximity computation 2020-10-15 16:18:43 +02:00
9021b2dba6 Introduce the enable-chunk-fusing flag 2020-10-14 18:44:59 +02:00
f980422c57 Move from oxidized-mtbl to grenad 2020-10-14 12:47:32 +02:00
b342a86c15 Divide the max-memory parameter by the number of sorters in the store 2020-10-08 17:27:53 +02:00
fb2c402ae1 Split the max-memory by the number of jobs 2020-10-07 14:23:22 +02:00
38820bc75c Improve and simplify the query tokenizer 2020-10-07 14:23:22 +02:00
4e9bd1fef5 Bump oxidized-mtbl 2020-10-07 14:23:22 +02:00
a00f5850ee Add support for placeholder search for empty queries 2020-10-06 20:19:50 +02:00
433d9bbc6e Use CompressionType::from_str rather than a custom function 2020-10-06 13:50:34 +02:00
4b819457c9 Enable the strucopt/clap warp help feature 2020-10-06 13:06:22 +02:00
a2182e68a6 Rewrite the parallel merge indexing part 2020-10-05 20:54:06 +02:00
e9e03259c1 Improve the mDFS performance and return the proximity 2020-10-05 18:13:56 +02:00
bb15f16d8c Merge other databases content while writing into LMDB at the same time 2020-10-05 16:35:10 +02:00
9af946a306 Merging the main, word docids and words pairs proximity docids in parallel 2020-10-04 18:40:34 +02:00
99705deb7d Directly use a writer for the docid word positions 2020-10-04 18:17:53 +02:00
67577a3760 It is an error to merge docid word positions 2020-10-04 17:31:12 +02:00
ce8e56ee18 Rewrite the indexer to use one MTBL by database
This allows us to avoid prefixing keys and appending into LMDB databases
2020-10-04 17:04:33 +02:00
770f29fd05 Bump the oxidized-mtbl dependency 2020-10-04 17:04:33 +02:00
acd2a63879 Introduce a simple FST based chinese word segmenter 2020-10-04 17:04:33 +02:00
6cc6addc2f Increase the CboRoaringBitmapCodec threshold 2020-10-02 17:06:17 +02:00
e41a3822a6 Add a simple test for the CboRoaringBitmapCodec 2020-10-02 16:52:36 +02:00
c4b0c57059 Reduce the default indexer max-memory parameter 2020-10-02 16:47:41 +02:00
007e647462 Introduce the Mdfs Iterator that explore the proximity graph using a mana DFS 2020-10-02 16:46:07 +02:00
d4e80407e5 Introduce the mana depth first search algorithm 2020-10-02 16:46:07 +02:00
f6a8096720 Rename the quartile as percentiles 25th, 50th and 75th 2020-10-02 16:46:07 +02:00
891e0188dd Introduce the database-stats infos subcommand 2020-10-02 16:46:07 +02:00
079742b4d3 Clean up the stats and size of database infos subcommands 2020-10-02 16:46:06 +02:00
d0c73564b1 Use the CboRoaringBitmapCodec for the word pair proximity docids 2020-10-02 16:46:06 +02:00
5a6a698e1d Introduce the CboRoaringBitmapCodec 2020-10-02 16:46:06 +02:00
4eda149ffa Rename the BoRoaringBitmap codec 2020-10-02 16:46:06 +02:00
ac84db2506 Move the words pairs proximities average into the stats infos subcommand 2020-10-02 16:46:06 +02:00
30755e31e7 Introduce the words pairs proximities stats info subcommand 2020-10-02 16:46:06 +02:00
bc35c9a598 Introduce the size_of_database infos subcommand 2020-10-02 16:46:05 +02:00
c6b883289c Remove the unused fetch_keywords function 2020-09-30 15:41:23 +02:00
58237bd67f Introduce the average-number-of-document-by-word-pair-proximity infos subcommand 2020-09-29 18:32:48 +02:00
991be8950e Rename the subcommand into average-number-of-positions-by-word-by-doc 2020-09-29 18:15:44 +02:00
54370e228a Search for documents with longer proximities until we find enough 2020-09-29 17:37:14 +02:00
f277ea134f Simplify some search function by reducing the number of parameters 2020-09-29 16:08:58 +02:00
68f4af7d2e Improve the display of the number of processed documents 2020-09-29 16:08:58 +02:00
59a127d022 Improve the indexing process
We now store the words pairs proximity in a cache and only compute the
shortest proximity between pairs of words in a document.
2020-09-29 15:09:18 +02:00
6ddb3e722c Depth-first search cache the docids unions 2020-09-28 16:55:21 +02:00
a3821a0b33 Introduce the depth_first_search path resolution function 2020-09-28 16:34:12 +02:00
51c237f9d8 Fix the benchmarks compilation 2020-09-28 13:39:17 +02:00
d8354f6f02 Fix the word_docids capacity limit detection 2020-09-27 11:52:05 +02:00
25b2853b70 Move the words pairs proximities compute into the write document function 2020-09-23 15:02:40 +02:00
ed05999f63 Replace the arc cache by a simple linked hash map 2020-09-23 14:50:52 +02:00
4d22d80281 Display only the key on heed error 2020-09-23 14:13:51 +02:00
5178b3d59d Make the search system be aware of query words typos 2020-09-23 12:01:39 +02:00
b597a92487 Add a default max-memory value to the indexer 2020-09-23 12:00:36 +02:00
1f6e00878d Use the words pair proximities in the search algorithm 2020-09-22 18:47:55 +02:00
31224a8425 Index the word pair proximities for both orders of the pair 2020-09-22 14:49:22 +02:00
a58ae5eb2a Introduce the word-pair-proximities-docids infos subcommand 2020-09-22 14:04:34 +02:00
d6fa9c0414 Index the intra documents word pair proximities 2020-09-22 14:04:33 +02:00
7b67ae6972 Introduce the StrStrU8 heed codec 2020-09-22 12:44:17 +02:00
e34437b2d7 Move the proximity function to a module 2020-09-22 10:54:59 +02:00
15208c7d3d Simplify the indexer record loop 2020-09-22 10:33:30 +02:00
e5adfaade0 Replace the token filter by a filter mapper 2020-09-22 10:24:31 +02:00
d21c80b865 Apply the chunk compression parameters on all the MTBL writers 2020-09-21 18:30:54 +02:00
944df52e2a Simplify the indexer main loop 2020-09-21 14:59:48 +02:00
3ded98e5fa Bump the roaring version that fix a deserialization bug 2020-09-10 22:37:51 +02:00
d5e5baa20f Bump the oxidized-mtbl dependency 2020-09-10 13:29:12 +02:00
0fb086f241 Use the crates.io raoring library 2020-09-08 15:16:04 +02:00
aed0704404 Remove the temporary optimisation 2020-09-08 14:48:33 +02:00
072382fa61 Sort the word docids to make intersections much faster 2020-09-07 22:38:49 +02:00
ad11c5fb3f Introduce the words-docids command for the infos binary 2020-09-07 22:36:35 +02:00
5664c37539 Introduce an heed codec that reduce the size of small amount of serialized integers 2020-09-07 20:06:23 +02:00
3e2250423c Introduce the average-number-of-positions infos subcommand 2020-09-07 15:26:42 +02:00
ea605b499c Introduce two new infos subcommands 2020-09-07 14:56:48 +02:00
bb1ab428db Use another function to define the proximity 2020-09-06 17:55:07 +02:00
f928b91e9d Specify the exact rev for the near-proximity dep 2020-09-06 17:21:38 +02:00
dec460ce52 Fix the infos binary and add commands 2020-09-06 17:14:20 +02:00
daa3673c1c Invert the word docid positions key order 2020-09-06 10:30:53 +02:00
c2405bcae2 Prefer using the word_docids db to create the words-fst 2020-09-06 10:23:56 +02:00
4ca9472e02 Fix the minimum proximity len 2020-09-06 10:19:34 +02:00
1c504471d3 Introduce the plane-sweep algorithm 2020-09-05 18:25:27 +02:00
dc88a86259 Store the word positions under the documents 2020-09-05 18:03:06 +02:00
580ed1119a Make the engine to return csv string records as documents and headers 2020-08-31 19:02:00 +02:00
bad0663138 Come back to the old tokenizer 2020-08-31 13:34:38 +02:00
220ba0785c Make the front-end to throttle the request by 100ms 2020-08-31 13:34:35 +02:00
4afc4d0751 Use the groups of four positions to speed up disjunctions tests 2020-08-30 16:25:11 +02:00
605f75b56f Add the words grouped by four positions in the infos binary 2020-08-29 18:23:33 +02:00
ad5cafbfed Introduce a database to store docids in groups of four positions 2020-08-29 17:42:55 +02:00
3db517548d Move the documents back into the LMDB database 2020-08-29 15:14:04 +02:00
816db7a0aa Improve the RoaringBitmap codec to reserve enough vector space 2020-08-29 11:21:30 +02:00
3fe497e129 Improve the Mtbl heed codec to only encode MTBL databases 2020-08-29 11:20:39 +02:00
21aafd603c Make sure the first document is associated to the document id 0 2020-08-29 10:56:40 +02:00
0a44ff86ab Put the documents MTBL back into LMDB
We makes sure to write the documents into a file before
memory mapping it and putting it into LMDB, this way we avoid
moving it to RAM
2020-08-28 15:43:24 +02:00
d784d87880 Remove the prefix LMDB databases 2020-08-28 14:41:43 +02:00
7cde312f14 Introduce the StrBEU32Codec heed codec 2020-08-28 14:16:37 +02:00
34db376ae5 Rename the RoaringBitmapCodec module 2020-08-28 13:31:16 +02:00
38ddc71b83 Simplify the search algorithm 2020-08-26 15:16:41 +02:00
ba2eb0d7ad Take the words-fst into account when retrieving the biggests values 2020-08-26 14:36:22 +02:00
32da07ccee Introduce the word-positions-doc-ids and words-positions infos commands 2020-08-23 10:52:47 +02:00
d19f394630 Make the indexer support gzipped CSV as input 2020-08-21 18:10:24 +02:00
ff479c865d Replace pipe by ringtail to improve stdin read performances 2020-08-21 17:45:52 +02:00
ada30c2789 Introducing more arguments to specify the different compression algorithms 2020-08-21 16:41:26 +02:00
02335ee72d Introduce the biggest-value-sizes command on the infos binary 2020-08-21 14:44:42 +02:00
1e3e756c19 Introduce the words-frequencies command on the infos binary 2020-08-21 14:44:42 +02:00
6a230fe803 Move the contains_documents logic to a function 2020-08-21 14:44:42 +02:00
e55a569629 Compress much more the documents database 2020-08-21 14:44:42 +02:00
962bad3cea Introduce an infos binary to fetch stats 2020-08-17 19:41:49 +02:00
8806fcd545 Introduce a better query and document lexer 2020-08-16 14:36:54 +02:00
1e358e3ae8 Introduce the AstarBagIter that iterates through best paths 2020-08-15 16:24:06 +02:00
7dc594ba4d Introduce the Search builder struct 2020-08-13 14:27:51 +02:00
bfb46cbfbe Introduce the Crtierion enum 2020-08-12 10:43:02 +02:00
6d04a285dc Retrieve and display the distances of the words found 2020-08-11 15:18:02 +02:00
1bd37d213a Lowercase quoted words 2020-08-10 14:49:09 +02:00
883a8109c8 Show both database and documents database sizes 2020-08-10 14:37:18 +02:00
a4e0f3f724 Remove the useless TransitiveArc from the serve binary 2020-08-10 14:06:27 +02:00
edc06a97d6 Remove the useless stats binary 2020-08-10 13:55:02 +02:00
ae77fe5a69 Introduce an option to specify the maximum database size 2020-08-10 13:53:53 +02:00
394844062f Move the documents MTBL database inside the Index 2020-08-10 13:47:19 +02:00
ecd2b2f217 Make the final merge done in parallel 2020-08-07 15:44:04 +02:00
91282c8b6a Move the documents into another file 2020-08-07 13:11:31 +02:00
fae694a102 Put the documents into an MTBL database 2020-08-07 12:14:40 +02:00
d5a356902a Update oxidized-mtbl 2020-08-07 12:14:03 +02:00
405a71d3a4 Accept csv from stdin 2020-08-06 13:38:21 +02:00
d3b1096510 Compute the word attribute postings lists on each threads 2020-08-06 11:50:27 +02:00
8d734941af Clean up some lines 2020-08-06 10:20:26 +02:00
a4e3c7c37c Force the Papa parse delimiter 2020-08-05 14:11:46 +02:00
6508d497ce Replace the regex highlighting by a simple algorithm 2020-08-05 13:52:27 +02:00
4873abe145 Introduce option flags to toggle the indexing engine 2020-08-05 12:10:41 +02:00
bd4b18541c Introduce a new indexer which uses an MTBL sorter 2020-08-04 15:44:37 +02:00
3f21760d56 Update README.md 2020-08-04 15:40:37 +02:00
bc3a0ac6a3 Display the milli logo and update the description 2020-08-04 15:40:02 +02:00
d7d8f38fb7 Update bulma to spread the logo more 2020-07-16 23:45:02 +02:00
ee305c9284 Replace the title by the milli logo 2020-07-15 23:55:28 +02:00
9ade00e27b Highlight all the matching words 2020-07-14 11:53:21 +02:00
085c376655 Use the regex crate to highlight "hello" 2020-07-14 11:28:40 +02:00
dd385ad05b Customize the mark tag css 2020-07-14 11:03:21 +02:00
aa92311d4e Add a dark theme to the dashboard 2020-07-13 23:51:41 +02:00
3d144e62c4 Search for best proximities in multiple attributes 2020-07-13 19:06:56 +02:00
576dd011a1 Compute the candidates but not by attribute 2020-07-13 18:16:05 +02:00
6b14b20369 Introduce a method to retrieve the number of attributes of the documents 2020-07-13 17:50:16 +02:00
54afec58a3 Add a fade in out animation when the server process 2020-07-12 11:34:48 +02:00
92c2b1dd2d Refine the help message of the binaries 2020-07-12 11:06:45 +02:00
f757df5dfd Introduce the stderr logger to the project 2020-07-12 11:04:35 +02:00
12358476da Use the log crate instead of stderr 2020-07-12 10:55:09 +02:00
2c62eeea3c Rename the project milli 2020-07-12 00:16:41 +02:00
d31da26a51 Avoid cloning RoraringBitmaps when unecessary 2020-07-11 23:51:32 +02:00
b8a1fc0126 Clean up the CSS style custom bulma rules 2020-07-11 14:51:59 +02:00
f6eae91c7d Pretty print the new dashboard numbers 2020-07-11 14:17:37 +02:00
d44428fa90 Display more informations on the dashboard 2020-07-11 11:51:56 +02:00
11c7fef80a Implement a memory dumper
It moves the in memory HashMaps used when indexing to a disk based MTBL file
2020-07-07 16:48:49 +02:00
b12bfcb03b Reduce the deepness of the word position document ids
This helps reduce the number of allocations.
2020-07-07 12:30:05 +02:00
7178b6c2c4 First basic version using MTBL again 2020-07-07 11:32:33 +02:00
45d0d7c3d4 Clean up the README 2020-07-06 17:38:22 +02:00
adb1038b26 Add a jobs parameter to set the number of threads the indexer uses 2020-07-06 12:17:17 +02:00
2a3b03138b Use heed 0.8.1 with the RwIter append method 2020-07-05 19:50:28 +02:00
ec1023e790 Intersect document ids by inverse popularity of the words
This reduces the worst request we had which took 56s to now took 3s ("the best of the do").
2020-07-05 19:33:51 +02:00
cd7e64b2b3 Allow users to set the arc cache size when indexing 2020-07-04 18:12:41 +02:00
ac8353a64f Merge pre-computed word attribute documents ids 2020-07-04 17:02:27 +02:00
fea7cac206 Display the time it took to compute the word attribute documents ids 2020-07-04 15:18:38 +02:00
46ced5c828 Introduce the RwIter append heed API 2020-07-04 12:34:10 +02:00
7e7440c431 Finalize the LMDB indexing design 2020-07-01 22:45:43 +02:00
2ae3f40971 Make the indexer ignore certain words
This is a preparation for making the indexing fully parallel by making the
indexer only be aware of certain words for each threads to avoid postings lists
conflicts for each words
2020-07-01 17:49:46 +02:00
a3ac2623d5 Introduce multiple functions to clean up the code 2020-07-01 17:24:55 +02:00
ac5cc7ddad Introduce an Iterator yielding owned entries for the LruCache 2020-07-01 17:21:52 +02:00
014a25697d Use only one ARC cache based on the words 2020-07-01 12:03:18 +02:00
fc4013a43f Fix the ARC cache 2020-07-01 10:35:07 +02:00
2fcae719ad Use another LRU impl which uses hashbrown 2020-06-29 22:26:06 +02:00
f98b615bf3 Replace the LRU by an Arc cache 2020-06-29 20:48:57 +02:00
07abebfc46 Introduce a (too big) LRU cache 2020-06-29 18:15:03 +02:00
5f0088594b Index by writing directly into LMDB 2020-06-29 13:54:47 +02:00
8453828a65 Update the README 2020-06-28 12:40:08 +02:00
63cbeca64e Skip all derived words when too short 2020-06-28 12:13:12 +02:00
736f0f7560 Use the proximity instead of the attributes when searching for <= 7 proximities 2020-06-28 12:13:12 +02:00
fe3be8f18a Replace the HashMap by a Vec for attributes documents ids 2020-06-28 12:13:12 +02:00
6a2834f2b0 Add a jobs parameter to set the number of threads the indexer uses 2020-06-28 12:13:10 +02:00
7e16afbdce Ignore documents which are not part of the candidates when exploring with A* 2020-06-24 15:06:45 +02:00
1c7a9a4132 Remove the found documents from the candidates list 2020-06-24 15:00:26 +02:00
50169b9798 Compute the full list of ids we are willing to find by attribute 2020-06-24 14:48:04 +02:00
374ec6773f Introduce a database to store all docids for a word and attribute 2020-06-22 19:24:20 +02:00
a044cb6cc8 Clean up the warnings for prefix postings 2020-06-22 18:10:31 +02:00
ba3e805981 Document the Index types and the internal LMDB databases 2020-06-22 18:09:22 +02:00
2f0e1afd16 Introduce the roaring bitmap heed codec 2020-06-22 17:56:07 +02:00
8148210860 Use the cache when retrieving the documents at the end 2020-06-21 12:25:19 +02:00
1628a31efa Cache the unions of the derived words positions 2020-06-20 15:38:10 +02:00
115e0142d9 Add a feature flags to enable the export of stats 2020-06-20 13:25:42 +02:00
beb49b24f6 Skip looking at connections for proximity 0 2020-06-20 13:19:03 +02:00
c84012d655 Accept queries from standard input when not given as argument 2020-06-20 12:01:15 +02:00
d6705d5529 Introduce the criterion dependency to bench the engine 2020-06-19 18:32:25 +02:00
55a8941922 Optimize things 2020-06-19 17:48:17 +02:00
a3ca80d20d Ignore every proximities bigger or equal to 8 2020-06-18 15:42:46 +02:00
3577de04b8 Reduce the number of KV lookups to the sucessfulls only 2020-06-16 12:58:29 +02:00
e974e6b3c9 Acquire search intersections metrics 2020-06-16 12:10:23 +02:00
8db16ff306 Add a cache to the contains_documents success function 2020-06-14 13:39:39 +02:00
a8cda248b4 Introduce a customized A* algorithm.
This custom algo lazily compute the intersections between words, to avoid too much set operations and database reads
2020-06-14 12:51:57 +02:00
69285b22d3 Check that an edges combination contains results 2020-06-13 11:16:02 +02:00
b9cc6c10af Introduce a function to ignore useless paths 2020-06-13 00:17:43 +02:00
d02c5cb023 Fix node skipping by computing the accumulated proximity 2020-06-12 14:08:46 +02:00
37a48489da Reworked the best proximity algo a little bit 2020-06-12 12:53:08 +02:00
302866ad73 Make the algo don't work with an astar 2020-06-11 17:43:06 +02:00
0a83a86e65 Fix multiple bugs 2020-06-11 11:55:03 +02:00
4e86ecf807 Retrieve the words before the intersect loops 2020-06-10 22:05:01 +02:00
6ca3579cc0 Add more time debug measurements 2020-06-10 21:35:01 +02:00
66a4b26811 Introduce a proximity based documents retriever 2020-06-10 16:54:28 +02:00
78f27c0465 squash-me: Remove debugs 2020-06-10 16:29:46 +02:00
3ad883d7c7 squash-me: Make the dijkstra work even with different attributes 2020-06-10 16:27:02 +02:00
fecd8ca54a squash-me: It works! we must remove the debug after having added more tests 2020-06-10 14:20:35 +02:00
13977d9338 squash-me 2020-06-09 23:06:59 +02:00
5d5b827f1a Squash-me 2020-06-09 17:32:25 +02:00
2a6d6a7f69 Introduce a first draft of the best_proximity algorithm 2020-06-09 10:11:43 +02:00
dfdaceb410 Introduce a first basic working positions-based engine 2020-06-05 20:13:19 +02:00
f51a63e4ef Store documents ids under attribute ids 2020-06-05 16:32:14 +02:00
ce86a43779 Make the query tokenizer a real Iterator 2020-06-05 09:49:28 +02:00
f55f4cb02a Not fetch the cached prefix postings when prefix is disabled 2020-06-04 21:22:45 +02:00
06bf03f075 Add an help message on the front page
aaa
2020-06-04 21:22:45 +02:00
eefc6d7c44 Add support for quoted query phrases 2020-06-04 20:25:51 +02:00
1f7035f18f Just do a little clean-up 2020-06-04 19:13:28 +02:00
71dc6a3828 Disable prefix search when query is ended by a whitespace 2020-06-04 18:37:20 +02:00
5d1c625b74 Change the page index texts 2020-06-04 18:20:57 +02:00
c42d3c19e2 Merge the whole list of generated MTBL in one go 2020-06-04 17:38:43 +02:00
3a23dc242e More efficiently merge MTBLs, more than two at a time 2020-06-04 16:17:24 +02:00
1df1f88fe1 Directly write to LMDB without intermediate final MTBL 2020-06-01 21:30:39 +02:00
2174042994 Merge only 3 MTBL at the same time 2020-06-01 19:49:58 +02:00
5cc81a0179 Merge many MTBL into one a the same time 2020-06-01 18:39:58 +02:00
6a047519f6 Do a merge two by two 2020-06-01 18:27:26 +02:00
5404776f7a Add a little bit more debug 2020-06-01 17:52:43 +02:00
dff68a339a Use OnceCell to cache levenshtein builders 2020-05-31 19:27:11 +02:00
dde3e01a59 Introduce prefix postings ids for better perfs 2020-05-31 18:20:49 +02:00
a26553c90a Reintroduce a simple HTTP server 2020-05-31 17:48:13 +02:00
2a10b2275e Support prefix typo tolerant search 2020-05-31 17:18:13 +02:00
ba9527abc0 Support typos with a levenshtein automata 2020-05-31 17:01:11 +02:00
6c726df9b9 Support multiple space seperated words 2020-05-31 16:09:34 +02:00
24587148fd Introduce MTBL parallel merging before LMDB writing 2020-05-31 14:22:57 +02:00
6762c2d08f Clean up a little bit 2020-05-31 14:22:57 +02:00
3a998cf39c Far better usage of rayon to fold indexed data 2020-05-31 14:22:57 +02:00
1237306ca8 Introduce a thread that write to heed 2020-05-31 14:22:57 +02:00
3668627e03 Use zerocopy without bitpacking as a first step 2020-05-31 14:22:07 +02:00
a81f201fad Inroduce the use of RocksDB instead of sled (RAM) 2020-05-31 14:22:06 +02:00
91ba938953 Initial commit 2020-05-31 14:22:06 +02:00
4573f00a0d Initial commit 2020-05-31 14:21:56 +02:00
1620 changed files with 233094 additions and 31015 deletions

2
.cargo/config.toml Normal file
View File

@ -0,0 +1,2 @@
[alias]
xtask = "run --release --package xtask --"

View File

@ -23,7 +23,8 @@ A clear and concise description of what you expected to happen.
**Screenshots**
If applicable, add screenshots to help explain your problem.
**Meilisearch version:** [e.g. v0.20.0]
**Meilisearch version:**
[e.g. v0.20.0]
**Additional context**
Additional information that may be relevant to the issue.

58
.github/ISSUE_TEMPLATE/sprint_issue.md vendored Normal file
View File

@ -0,0 +1,58 @@
---
name: New sprint issue
about: ⚠️ Should only be used by the engine team ⚠️
title: ''
labels: 'missing usage in PRD, impacts docs'
assignees: ''
---
Related product team resources: [PRD]() (_internal only_)
Related product discussion:
## Motivation
<!---Copy/paste the information in PRD or briefly detail the product motivation. Ask product team if any hesitation.-->
## Usage
<!---Link to the public part of the PRD, or to the related product discussion for experimental features-->
## TODO
<!---If necessary, create a list with technical/product steps-->
### Are you modifying a database?
- [ ] If not, add the `no db change` label to your PR, and you're good to merge.
- [ ] If yes, add the `db change` label to your PR. You'll receive a message explaining you what to do.
### Reminders when modifying the API
- [ ] Update the openAPI file with utoipa:
- [ ] If a new module has been introduced, create a new structure deriving [the OpenAPI proc-macro](https://docs.rs/utoipa/latest/utoipa/derive.OpenApi.html) and nest it in the main [openAPI structure](https://github.com/meilisearch/meilisearch/blob/f2185438eed60fa32d25b15480c5ee064f6fba4a/crates/meilisearch/src/routes/mod.rs#L64-L78).
- [ ] If a new route has been introduced, add the [path decorator](https://docs.rs/utoipa/latest/utoipa/attr.path.html) to it and add the route at the top of the file in its openAPI structure.
- [ ] If a structure which is deserialized or serialized in the API has been introduced or modified, it must derive the [`schema`](https://docs.rs/utoipa/latest/utoipa/macro.schema.html) or the [`IntoParams`](https://docs.rs/utoipa/latest/utoipa/derive.IntoParams.html) proc-macro.
If it's a **new** structure you must also add it to the big list of structures [in the main `OpenApi` structure](https://github.com/meilisearch/meilisearch/blob/f2185438eed60fa32d25b15480c5ee064f6fba4a/crates/meilisearch/src/routes/mod.rs#L88).
- [ ] Once everything is done, start Meilisearch with the swagger flag: `cargo run --features swagger`, open `http://localhost:7700/scalar` on your browser, and ensure everything works as expected.
- For more info, refer to [this presentation](https://pitch.com/v/generating-the-openapi-file-jrn3nh).
### Reminders when modifying the Setting API
<!--- Special steps to remind when adding a new index setting -->
- [ ] Ensure the new setting route is at least tested by the [`test_setting_routes` macro](https://github.com/meilisearch/meilisearch/blob/5204c0b60b384cbc79621b6b2176fca086069e8e/meilisearch/tests/settings/get_settings.rs#L276)
- [ ] Ensure Analytics are fully implemented
- [ ] `/settings/my-new-setting` configurated in the [`make_setting_routes` macro](https://github.com/meilisearch/meilisearch/blob/5204c0b60b384cbc79621b6b2176fca086069e8e/meilisearch/src/routes/indexes/settings.rs#L141-L165)
- [ ] global `/settings` route configurated in the [`update_all` function](https://github.com/meilisearch/meilisearch/blob/5204c0b60b384cbc79621b6b2176fca086069e8e/meilisearch/src/routes/indexes/settings.rs#L655-L751)
- [ ] Ensure the dump serializing is consistent with the `/settings` route serializing, e.g., enums case can be different (`camelCase` in route and `PascalCase` in the dump)
#### Special cases when adding a setting for an experimental feature
- [ ] ⚠️ API stability: The setting does not appear on the main settings route when the feature has never been enabled (e.g. mark it `Unset` when returned from the index in this situation. See [an example](https://github.com/meilisearch/meilisearch/blob/7a89abd2a025606a42f8b219e539117eb2eb029f/meilisearch-types/src/settings.rs#L608))
- [ ] The setting cannot be set when the feature is disabled, either by the main settings route or the subroute (see [`validate_settings` function](https://github.com/meilisearch/meilisearch/blob/7a89abd2a025606a42f8b219e539117eb2eb029f/meilisearch/src/routes/indexes/settings.rs#L811))
- [ ] If possible, the setting is reset when the feature is disabled (hard if it requires reindexing)
## Impacted teams
<!---Ping the related teams. Ask for the engine manager if any hesitation-->
<!---@meilisearch/docs-team when there is any API change, e.g. settings addition-->

View File

@ -2,7 +2,6 @@
version: 2
updates:
- package-ecosystem: "github-actions"
directory: "/"
schedule:

View File

@ -1,28 +1,41 @@
#!/bin/bash
#!/usr/bin/env bash
set -eu -o pipefail
# check_tag $current_tag $file_tag $file_name
function check_tag {
if [[ "$1" != "$2" ]]; then
echo "Error: the current tag does not match the version in $3: found $2 - expected $1"
ret=1
fi
check_tag() {
local expected=$1
local actual=$2
local filename=$3
if [[ $actual != $expected ]]; then
echo >&2 "Error: the current tag does not match the version in $filename: found $actual, expected $expected"
return 1
fi
}
read_version() {
grep '^version = ' | cut -d \" -f 2
}
if [[ -z "${GITHUB_REF:-}" ]]; then
echo >&2 "Error: GITHUB_REF is not set"
exit 1
fi
if [[ ! "$GITHUB_REF" =~ ^refs/tags/v[0-9]+\.[0-9]+\.[0-9]+(-[a-z0-9]+)?$ ]]; then
echo >&2 "Error: GITHUB_REF is not a valid tag: $GITHUB_REF"
exit 1
fi
current_tag=${GITHUB_REF#refs/tags/v}
ret=0
current_tag=${GITHUB_REF#'refs/tags/v'}
toml_files='*/Cargo.toml'
for toml_file in $toml_files;
do
file_tag="$(grep '^version = ' $toml_file | cut -d '=' -f 2 | tr -d '"' | tr -d ' ')"
check_tag $current_tag $file_tag $toml_file
done
toml_tag="$(cat Cargo.toml | read_version)"
check_tag "$current_tag" "$toml_tag" Cargo.toml || ret=1
lock_file='Cargo.lock'
lock_tag=$(grep -A 1 'name = "meilisearch-auth"' $lock_file | grep version | cut -d '=' -f 2 | tr -d '"' | tr -d ' ')
check_tag $current_tag $lock_tag $lock_file
lock_tag=$(grep -A 1 '^name = "meilisearch-auth"' Cargo.lock | read_version)
check_tag "$current_tag" "$lock_tag" Cargo.lock || ret=1
if [[ "$ret" -eq 0 ]] ; then
echo 'OK'
if (( ret == 0 )); then
echo 'OK'
fi
exit $ret

27
.github/workflows/bench-manual.yml vendored Normal file
View File

@ -0,0 +1,27 @@
name: Bench (manual)
on:
workflow_dispatch:
inputs:
workload:
description: "The path to the workloads to execute (workloads/...)"
required: true
default: "workloads/movies.json"
env:
WORKLOAD_NAME: ${{ github.event.inputs.workload }}
jobs:
benchmarks:
name: Run and upload benchmarks
runs-on: benchmarks
timeout-minutes: 180 # 3h
steps:
- uses: actions/checkout@v3
- uses: dtolnay/rust-toolchain@1.85
with:
profile: minimal
- name: Run benchmarks - workload ${WORKLOAD_NAME} - branch ${{ github.ref }} - commit ${{ github.sha }}
run: |
cargo xtask bench --api-key "${{ secrets.BENCHMARK_API_KEY }}" --dashboard-url "${{ vars.BENCHMARK_DASHBOARD_URL }}" --reason "Manual [Run #${{ github.run_id }}](https://github.com/meilisearch/meilisearch/actions/runs/${{ github.run_id }})" -- ${WORKLOAD_NAME}

82
.github/workflows/bench-pr.yml vendored Normal file
View File

@ -0,0 +1,82 @@
name: Bench (PR)
on:
issue_comment:
types: [created]
permissions:
issues: write
env:
GH_TOKEN: ${{ secrets.MEILI_BOT_GH_PAT }}
jobs:
run-benchmarks-on-comment:
if: startsWith(github.event.comment.body, '/bench')
name: Run and upload benchmarks
runs-on: benchmarks
timeout-minutes: 180 # 3h
steps:
- name: Check permissions
id: permission
env:
PR_AUTHOR: ${{github.event.issue.user.login }}
COMMENT_AUTHOR: ${{github.event.comment.user.login }}
REPOSITORY: ${{github.repository}}
PR_ID: ${{github.event.issue.number}}
run: |
PR_REPOSITORY=$(gh api /repos/"$REPOSITORY"/pulls/"$PR_ID" --jq .head.repo.full_name)
if $(gh api /repos/"$REPOSITORY"/collaborators/"$PR_AUTHOR"/permission --jq .user.permissions.push)
then
echo "::notice title=Authentication success::PR author authenticated"
else
echo "::error title=Authentication error::PR author doesn't have push permission on this repository"
exit 1
fi
if $(gh api /repos/"$REPOSITORY"/collaborators/"$COMMENT_AUTHOR"/permission --jq .user.permissions.push)
then
echo "::notice title=Authentication success::Comment author authenticated"
else
echo "::error title=Authentication error::Comment author doesn't have push permission on this repository"
exit 1
fi
if [ "$PR_REPOSITORY" = "$REPOSITORY" ]
then
echo "::notice title=Authentication success::PR started from main repository"
else
echo "::error title=Authentication error::PR started from a fork"
exit 1
fi
- name: Check for Command
id: command
uses: xt0rted/slash-command-action@v2
with:
command: bench
reaction-type: "rocket"
repo-token: ${{ env.GH_TOKEN }}
- uses: xt0rted/pull-request-comment-branch@v3
id: comment-branch
with:
repo_token: ${{ env.GH_TOKEN }}
- uses: actions/checkout@v3
if: success()
with:
fetch-depth: 0 # fetch full history to be able to get main commit sha
ref: ${{ steps.comment-branch.outputs.head_ref }}
- uses: dtolnay/rust-toolchain@1.85
with:
profile: minimal
- name: Run benchmarks on PR ${{ github.event.issue.id }}
run: |
cargo xtask bench --api-key "${{ secrets.BENCHMARK_API_KEY }}" \
--dashboard-url "${{ vars.BENCHMARK_DASHBOARD_URL }}" \
--reason "[Comment](${{ github.event.comment.html_url }}) on [#${{ github.event.issue.number }}](${{ github.event.issue.html_url }})" \
-- ${{ steps.command.outputs.command-arguments }} > benchlinks.txt
- name: Send comment in PR
run: |
gh pr comment ${{github.event.issue.number}} --body-file benchlinks.txt

View File

@ -0,0 +1,22 @@
name: Indexing bench (push)
on:
push:
branches:
- main
jobs:
benchmarks:
name: Run and upload benchmarks
runs-on: benchmarks
timeout-minutes: 180 # 3h
steps:
- uses: actions/checkout@v3
- uses: dtolnay/rust-toolchain@1.85
with:
profile: minimal
# Run benchmarks
- name: Run benchmarks - Dataset ${BENCH_NAME} - Branch main - Commit ${{ github.sha }}
run: |
cargo xtask bench --api-key "${{ secrets.BENCHMARK_API_KEY }}" --dashboard-url "${{ vars.BENCHMARK_DASHBOARD_URL }}" --reason "Push on `main` [Run #${{ github.run_id }}](https://github.com/meilisearch/meilisearch/actions/runs/${{ github.run_id }})" -- workloads/*.json

75
.github/workflows/benchmarks-manual.yml vendored Normal file
View File

@ -0,0 +1,75 @@
name: Benchmarks (manual)
on:
workflow_dispatch:
inputs:
dataset_name:
description: "The name of the dataset used to benchmark (search_songs, search_wiki, search_geo or indexing)"
required: false
default: "search_songs"
env:
BENCH_NAME: ${{ github.event.inputs.dataset_name }}
jobs:
benchmarks:
name: Run and upload benchmarks
runs-on: benchmarks
timeout-minutes: 4320 # 72h
steps:
- uses: actions/checkout@v3
- uses: dtolnay/rust-toolchain@1.85
with:
profile: minimal
# Set variables
- name: Set current branch name
shell: bash
run: echo "name=$(echo ${GITHUB_REF#refs/heads/})" >> $GITHUB_OUTPUT
id: current_branch
- name: Set normalized current branch name # Replace `/` by `_` in branch name to avoid issues when pushing to S3
shell: bash
run: echo "name=$(echo ${GITHUB_REF#refs/heads/} | tr '/' '_')" >> $GITHUB_OUTPUT
id: normalized_current_branch
- name: Set shorter commit SHA
shell: bash
run: echo "short=$(echo $GITHUB_SHA | cut -c1-8)" >> $GITHUB_OUTPUT
id: commit_sha
- name: Set file basename with format "dataset_branch_commitSHA"
shell: bash
run: echo "basename=$(echo ${BENCH_NAME}_${{ steps.normalized_current_branch.outputs.name }}_${{ steps.commit_sha.outputs.short }})" >> $GITHUB_OUTPUT
id: file
# Run benchmarks
- name: Run benchmarks - Dataset ${BENCH_NAME} - Branch ${{ steps.current_branch.outputs.name }} - Commit ${{ steps.commit_sha.outputs.short }}
run: |
cd crates/benchmarks
cargo bench --bench ${BENCH_NAME} -- --save-baseline ${{ steps.file.outputs.basename }}
# Generate critcmp files
- name: Install critcmp
uses: taiki-e/install-action@v2
with:
tool: critcmp
- name: Export cripcmp file
run: |
critcmp --export ${{ steps.file.outputs.basename }} > ${{ steps.file.outputs.basename }}.json
# Upload benchmarks
- name: Upload ${{ steps.file.outputs.basename }}.json to DO Spaces # DigitalOcean Spaces = S3
uses: BetaHuhn/do-spaces-action@v2
with:
access_key: ${{ secrets.DO_SPACES_ACCESS_KEY }}
secret_key: ${{ secrets.DO_SPACES_SECRET_KEY }}
space_name: ${{ secrets.DO_SPACES_SPACE_NAME }}
space_region: ${{ secrets.DO_SPACES_SPACE_REGION }}
source: ${{ steps.file.outputs.basename }}.json
out_dir: critcmp_results
# Helper
- name: "README: compare with another benchmark"
run: |
echo "${{ steps.file.outputs.basename }}.json has just been pushed."
echo 'How to compare this benchmark with another one?'
echo ' - Check the available files with: ./benchmarks/scripts/list.sh'
echo " - Run the following command: ./benchmaks/scripts/compare.sh <file-to-compare-with> ${{ steps.file.outputs.basename }}.json"

127
.github/workflows/benchmarks-pr.yml vendored Normal file
View File

@ -0,0 +1,127 @@
name: Benchmarks (PR)
on: issue_comment
permissions:
issues: write
env:
GH_TOKEN: ${{ secrets.MEILI_BOT_GH_PAT }}
jobs:
run-benchmarks-on-comment:
if: startsWith(github.event.comment.body, '/benchmark')
name: Run and upload benchmarks
runs-on: benchmarks
timeout-minutes: 4320 # 72h
steps:
- name: Check permissions
id: permission
env:
PR_AUTHOR: ${{github.event.issue.user.login }}
COMMENT_AUTHOR: ${{github.event.comment.user.login }}
REPOSITORY: ${{github.repository}}
PR_ID: ${{github.event.issue.number}}
run: |
PR_REPOSITORY=$(gh api /repos/"$REPOSITORY"/pulls/"$PR_ID" --jq .head.repo.full_name)
if $(gh api /repos/"$REPOSITORY"/collaborators/"$PR_AUTHOR"/permission --jq .user.permissions.push)
then
echo "::notice title=Authentication success::PR author authenticated"
else
echo "::error title=Authentication error::PR author doesn't have push permission on this repository"
exit 1
fi
if $(gh api /repos/"$REPOSITORY"/collaborators/"$COMMENT_AUTHOR"/permission --jq .user.permissions.push)
then
echo "::notice title=Authentication success::Comment author authenticated"
else
echo "::error title=Authentication error::Comment author doesn't have push permission on this repository"
exit 1
fi
if [ "$PR_REPOSITORY" = "$REPOSITORY" ]
then
echo "::notice title=Authentication success::PR started from main repository"
else
echo "::error title=Authentication error::PR started from a fork"
exit 1
fi
- uses: dtolnay/rust-toolchain@1.85
with:
profile: minimal
- name: Check for Command
id: command
uses: xt0rted/slash-command-action@v2
with:
command: benchmark
reaction-type: "eyes"
repo-token: ${{ env.GH_TOKEN }}
- uses: xt0rted/pull-request-comment-branch@v3
id: comment-branch
with:
repo_token: ${{ env.GH_TOKEN }}
- uses: actions/checkout@v3
if: success()
with:
fetch-depth: 0 # fetch full history to be able to get main commit sha
ref: ${{ steps.comment-branch.outputs.head_ref }}
# Set variables
- name: Set current branch name
shell: bash
run: echo "name=$(git rev-parse --abbrev-ref HEAD)" >> $GITHUB_OUTPUT
id: current_branch
- name: Set normalized current branch name # Replace `/` by `_` in branch name to avoid issues when pushing to S3
shell: bash
run: echo "name=$(git rev-parse --abbrev-ref HEAD | tr '/' '_')" >> $GITHUB_OUTPUT
id: normalized_current_branch
- name: Set shorter commit SHA
shell: bash
run: echo "short=$(echo $GITHUB_SHA | cut -c1-8)" >> $GITHUB_OUTPUT
id: commit_sha
- name: Set file basename with format "dataset_branch_commitSHA"
shell: bash
run: echo "basename=$(echo ${{ steps.command.outputs.command-arguments }}_${{ steps.normalized_current_branch.outputs.name }}_${{ steps.commit_sha.outputs.short }})" >> $GITHUB_OUTPUT
id: file
# Run benchmarks
- name: Run benchmarks - Dataset ${{ steps.command.outputs.command-arguments }} - Branch ${{ steps.current_branch.outputs.name }} - Commit ${{ steps.commit_sha.outputs.short }}
run: |
cd crates/benchmarks
cargo bench --bench ${{ steps.command.outputs.command-arguments }} -- --save-baseline ${{ steps.file.outputs.basename }}
# Generate critcmp files
- name: Install critcmp
uses: taiki-e/install-action@v2
with:
tool: critcmp
- name: Export cripcmp file
run: |
critcmp --export ${{ steps.file.outputs.basename }} > ${{ steps.file.outputs.basename }}.json
# Upload benchmarks
- name: Upload ${{ steps.file.outputs.basename }}.json to DO Spaces # DigitalOcean Spaces = S3
uses: BetaHuhn/do-spaces-action@v2
with:
access_key: ${{ secrets.DO_SPACES_ACCESS_KEY }}
secret_key: ${{ secrets.DO_SPACES_SECRET_KEY }}
space_name: ${{ secrets.DO_SPACES_SPACE_NAME }}
space_region: ${{ secrets.DO_SPACES_SPACE_REGION }}
source: ${{ steps.file.outputs.basename }}.json
out_dir: critcmp_results
# Compute the diff of the benchmarks and send a message on the GitHub PR
- name: Compute and send a message in the PR
env:
GITHUB_TOKEN: ${{ secrets.MEILI_BOT_GH_PAT }}
run: |
set -x
export base_ref=$(git merge-base origin/main ${{ steps.comment-branch.outputs.head_ref }} | head -c8)
export base_filename=$(echo ${{ steps.command.outputs.command-arguments }}_main_${base_ref}.json)
export bench_name=$(echo ${{ steps.command.outputs.command-arguments }})
echo "Here are your $bench_name benchmarks diff 👊" >> body.txt
echo '```' >> body.txt
./benchmarks/scripts/compare.sh $base_filename ${{ steps.file.outputs.basename }}.json >> body.txt
echo '```' >> body.txt
gh pr comment ${{ steps.current_branch.outputs.name }} --body-file body.txt

View File

@ -0,0 +1,77 @@
name: Benchmarks of indexing (push)
on:
push:
branches:
- main
env:
INFLUX_TOKEN: ${{ secrets.INFLUX_TOKEN }}
BENCH_NAME: "indexing"
jobs:
benchmarks:
name: Run and upload benchmarks
runs-on: benchmarks
timeout-minutes: 4320 # 72h
steps:
- uses: actions/checkout@v3
- uses: dtolnay/rust-toolchain@1.85
with:
profile: minimal
# Set variables
- name: Set current branch name
shell: bash
run: echo "name=$(echo ${GITHUB_REF#refs/heads/})" >> $GITHUB_OUTPUT
id: current_branch
- name: Set normalized current branch name # Replace `/` by `_` in branch name to avoid issues when pushing to S3
shell: bash
run: echo "name=$(echo ${GITHUB_REF#refs/heads/} | tr '/' '_')" >> $GITHUB_OUTPUT
id: normalized_current_branch
- name: Set shorter commit SHA
shell: bash
run: echo "short=$(echo $GITHUB_SHA | cut -c1-8)" >> $GITHUB_OUTPUT
id: commit_sha
- name: Set file basename with format "dataset_branch_commitSHA"
shell: bash
run: echo "basename=$(echo ${BENCH_NAME}_${{ steps.normalized_current_branch.outputs.name }}_${{ steps.commit_sha.outputs.short }})" >> $GITHUB_OUTPUT
id: file
# Run benchmarks
- name: Run benchmarks - Dataset ${BENCH_NAME} - Branch ${{ steps.current_branch.outputs.name }} - Commit ${{ steps.commit_sha.outputs.short }}
run: |
cd crates/benchmarks
cargo bench --bench ${BENCH_NAME} -- --save-baseline ${{ steps.file.outputs.basename }}
# Generate critcmp files
- name: Install critcmp
uses: taiki-e/install-action@v2
with:
tool: critcmp
- name: Export cripcmp file
run: |
critcmp --export ${{ steps.file.outputs.basename }} > ${{ steps.file.outputs.basename }}.json
# Upload benchmarks
- name: Upload ${{ steps.file.outputs.basename }}.json to DO Spaces # DigitalOcean Spaces = S3
uses: BetaHuhn/do-spaces-action@v2
with:
access_key: ${{ secrets.DO_SPACES_ACCESS_KEY }}
secret_key: ${{ secrets.DO_SPACES_SECRET_KEY }}
space_name: ${{ secrets.DO_SPACES_SPACE_NAME }}
space_region: ${{ secrets.DO_SPACES_SPACE_REGION }}
source: ${{ steps.file.outputs.basename }}.json
out_dir: critcmp_results
# Upload benchmarks to influxdb
- name: Upload ${{ steps.file.outputs.basename }}.json to influxDB
run: telegraf --config https://eu-central-1-1.aws.cloud2.influxdata.com/api/v2/telegrafs/08b52e34a370b000 --once --debug
# Helper
- name: "README: compare with another benchmark"
run: |
echo "${{ steps.file.outputs.basename }}.json has just been pushed."
echo 'How to compare this benchmark with another one?'
echo ' - Check the available files with: ./benchmarks/scripts/list.sh'
echo " - Run the following command: ./benchmaks/scipts/compare.sh <file-to-compare-with> ${{ steps.file.outputs.basename }}.json"

View File

@ -0,0 +1,76 @@
name: Benchmarks of search for geo (push)
on:
push:
branches:
- main
env:
BENCH_NAME: "search_geo"
INFLUX_TOKEN: ${{ secrets.INFLUX_TOKEN }}
jobs:
benchmarks:
name: Run and upload benchmarks
runs-on: benchmarks
steps:
- uses: actions/checkout@v3
- uses: dtolnay/rust-toolchain@1.85
with:
profile: minimal
# Set variables
- name: Set current branch name
shell: bash
run: echo "name=$(echo ${GITHUB_REF#refs/heads/})" >> $GITHUB_OUTPUT
id: current_branch
- name: Set normalized current branch name # Replace `/` by `_` in branch name to avoid issues when pushing to S3
shell: bash
run: echo "name=$(echo ${GITHUB_REF#refs/heads/} | tr '/' '_')" >> $GITHUB_OUTPUT
id: normalized_current_branch
- name: Set shorter commit SHA
shell: bash
run: echo "short=$(echo $GITHUB_SHA | cut -c1-8)" >> $GITHUB_OUTPUT
id: commit_sha
- name: Set file basename with format "dataset_branch_commitSHA"
shell: bash
run: echo "basename=$(echo ${BENCH_NAME}_${{ steps.normalized_current_branch.outputs.name }}_${{ steps.commit_sha.outputs.short }})" >> $GITHUB_OUTPUT
id: file
# Run benchmarks
- name: Run benchmarks - Dataset ${BENCH_NAME} - Branch ${{ steps.current_branch.outputs.name }} - Commit ${{ steps.commit_sha.outputs.short }}
run: |
cd crates/benchmarks
cargo bench --bench ${BENCH_NAME} -- --save-baseline ${{ steps.file.outputs.basename }}
# Generate critcmp files
- name: Install critcmp
uses: taiki-e/install-action@v2
with:
tool: critcmp
- name: Export cripcmp file
run: |
critcmp --export ${{ steps.file.outputs.basename }} > ${{ steps.file.outputs.basename }}.json
# Upload benchmarks
- name: Upload ${{ steps.file.outputs.basename }}.json to DO Spaces # DigitalOcean Spaces = S3
uses: BetaHuhn/do-spaces-action@v2
with:
access_key: ${{ secrets.DO_SPACES_ACCESS_KEY }}
secret_key: ${{ secrets.DO_SPACES_SECRET_KEY }}
space_name: ${{ secrets.DO_SPACES_SPACE_NAME }}
space_region: ${{ secrets.DO_SPACES_SPACE_REGION }}
source: ${{ steps.file.outputs.basename }}.json
out_dir: critcmp_results
# Upload benchmarks to influxdb
- name: Upload ${{ steps.file.outputs.basename }}.json to influxDB
run: telegraf --config https://eu-central-1-1.aws.cloud2.influxdata.com/api/v2/telegrafs/08b52e34a370b000 --once --debug
# Helper
- name: "README: compare with another benchmark"
run: |
echo "${{ steps.file.outputs.basename }}.json has just been pushed."
echo 'How to compare this benchmark with another one?'
echo ' - Check the available files with: ./benchmarks/scripts/list.sh'
echo " - Run the following command: ./benchmaks/scipts/compare.sh <file-to-compare-with> ${{ steps.file.outputs.basename }}.json"

View File

@ -0,0 +1,76 @@
name: Benchmarks of search for songs (push)
on:
push:
branches:
- main
env:
BENCH_NAME: "search_songs"
INFLUX_TOKEN: ${{ secrets.INFLUX_TOKEN }}
jobs:
benchmarks:
name: Run and upload benchmarks
runs-on: benchmarks
steps:
- uses: actions/checkout@v3
- uses: dtolnay/rust-toolchain@1.85
with:
profile: minimal
# Set variables
- name: Set current branch name
shell: bash
run: echo "name=$(echo ${GITHUB_REF#refs/heads/})" >> $GITHUB_OUTPUT
id: current_branch
- name: Set normalized current branch name # Replace `/` by `_` in branch name to avoid issues when pushing to S3
shell: bash
run: echo "name=$(echo ${GITHUB_REF#refs/heads/} | tr '/' '_')" >> $GITHUB_OUTPUT
id: normalized_current_branch
- name: Set shorter commit SHA
shell: bash
run: echo "short=$(echo $GITHUB_SHA | cut -c1-8)" >> $GITHUB_OUTPUT
id: commit_sha
- name: Set file basename with format "dataset_branch_commitSHA"
shell: bash
run: echo "basename=$(echo ${BENCH_NAME}_${{ steps.normalized_current_branch.outputs.name }}_${{ steps.commit_sha.outputs.short }})" >> $GITHUB_OUTPUT
id: file
# Run benchmarks
- name: Run benchmarks - Dataset ${BENCH_NAME} - Branch ${{ steps.current_branch.outputs.name }} - Commit ${{ steps.commit_sha.outputs.short }}
run: |
cd crates/benchmarks
cargo bench --bench ${BENCH_NAME} -- --save-baseline ${{ steps.file.outputs.basename }}
# Generate critcmp files
- name: Install critcmp
uses: taiki-e/install-action@v2
with:
tool: critcmp
- name: Export cripcmp file
run: |
critcmp --export ${{ steps.file.outputs.basename }} > ${{ steps.file.outputs.basename }}.json
# Upload benchmarks
- name: Upload ${{ steps.file.outputs.basename }}.json to DO Spaces # DigitalOcean Spaces = S3
uses: BetaHuhn/do-spaces-action@v2
with:
access_key: ${{ secrets.DO_SPACES_ACCESS_KEY }}
secret_key: ${{ secrets.DO_SPACES_SECRET_KEY }}
space_name: ${{ secrets.DO_SPACES_SPACE_NAME }}
space_region: ${{ secrets.DO_SPACES_SPACE_REGION }}
source: ${{ steps.file.outputs.basename }}.json
out_dir: critcmp_results
# Upload benchmarks to influxdb
- name: Upload ${{ steps.file.outputs.basename }}.json to influxDB
run: telegraf --config https://eu-central-1-1.aws.cloud2.influxdata.com/api/v2/telegrafs/08b52e34a370b000 --once --debug
# Helper
- name: "README: compare with another benchmark"
run: |
echo "${{ steps.file.outputs.basename }}.json has just been pushed."
echo 'How to compare this benchmark with another one?'
echo ' - Check the available files with: ./benchmarks/scripts/list.sh'
echo " - Run the following command: ./benchmaks/scipts/compare.sh <file-to-compare-with> ${{ steps.file.outputs.basename }}.json"

View File

@ -0,0 +1,76 @@
name: Benchmarks of search for Wikipedia articles (push)
on:
push:
branches:
- main
env:
BENCH_NAME: "search_wiki"
INFLUX_TOKEN: ${{ secrets.INFLUX_TOKEN }}
jobs:
benchmarks:
name: Run and upload benchmarks
runs-on: benchmarks
steps:
- uses: actions/checkout@v3
- uses: dtolnay/rust-toolchain@1.85
with:
profile: minimal
# Set variables
- name: Set current branch name
shell: bash
run: echo "name=$(echo ${GITHUB_REF#refs/heads/})" >> $GITHUB_OUTPUT
id: current_branch
- name: Set normalized current branch name # Replace `/` by `_` in branch name to avoid issues when pushing to S3
shell: bash
run: echo "name=$(echo ${GITHUB_REF#refs/heads/} | tr '/' '_')" >> $GITHUB_OUTPUT
id: normalized_current_branch
- name: Set shorter commit SHA
shell: bash
run: echo "short=$(echo $GITHUB_SHA | cut -c1-8)" >> $GITHUB_OUTPUT
id: commit_sha
- name: Set file basename with format "dataset_branch_commitSHA"
shell: bash
run: echo "basename=$(echo ${BENCH_NAME}_${{ steps.normalized_current_branch.outputs.name }}_${{ steps.commit_sha.outputs.short }})" >> $GITHUB_OUTPUT
id: file
# Run benchmarks
- name: Run benchmarks - Dataset ${BENCH_NAME} - Branch ${{ steps.current_branch.outputs.name }} - Commit ${{ steps.commit_sha.outputs.short }}
run: |
cd crates/benchmarks
cargo bench --bench ${BENCH_NAME} -- --save-baseline ${{ steps.file.outputs.basename }}
# Generate critcmp files
- name: Install critcmp
uses: taiki-e/install-action@v2
with:
tool: critcmp
- name: Export cripcmp file
run: |
critcmp --export ${{ steps.file.outputs.basename }} > ${{ steps.file.outputs.basename }}.json
# Upload benchmarks
- name: Upload ${{ steps.file.outputs.basename }}.json to DO Spaces # DigitalOcean Spaces = S3
uses: BetaHuhn/do-spaces-action@v2
with:
access_key: ${{ secrets.DO_SPACES_ACCESS_KEY }}
secret_key: ${{ secrets.DO_SPACES_SECRET_KEY }}
space_name: ${{ secrets.DO_SPACES_SPACE_NAME }}
space_region: ${{ secrets.DO_SPACES_SPACE_REGION }}
source: ${{ steps.file.outputs.basename }}.json
out_dir: critcmp_results
# Upload benchmarks to influxdb
- name: Upload ${{ steps.file.outputs.basename }}.json to influxDB
run: telegraf --config https://eu-central-1-1.aws.cloud2.influxdata.com/api/v2/telegrafs/08b52e34a370b000 --once --debug
# Helper
- name: "README: compare with another benchmark"
run: |
echo "${{ steps.file.outputs.basename }}.json has just been pushed."
echo 'How to compare this benchmark with another one?'
echo ' - Check the available files with: ./benchmarks/scripts/list.sh'
echo " - Run the following command: ./benchmaks/scipts/compare.sh <file-to-compare-with> ${{ steps.file.outputs.basename }}.json"

View File

@ -0,0 +1,100 @@
name: PR Milestone Check
on:
pull_request:
types: [opened, reopened, edited, synchronize, milestoned, demilestoned]
branches:
- "main"
- "release-v*.*.*"
jobs:
check-milestone:
name: Check PR Milestone
runs-on: ubuntu-latest
steps:
- name: Checkout code
uses: actions/checkout@v3
- name: Validate PR milestone
uses: actions/github-script@v7
with:
github-token: ${{ secrets.GITHUB_TOKEN }}
script: |
// Get PR number directly from the event payload
const prNumber = context.payload.pull_request.number;
// Get PR details
const { data: prData } = await github.rest.pulls.get({
owner: 'meilisearch',
repo: 'meilisearch',
pull_number: prNumber
});
// Get base branch name
const baseBranch = prData.base.ref;
console.log(`Base branch: ${baseBranch}`);
// Get PR milestone
const prMilestone = prData.milestone;
if (!prMilestone) {
core.setFailed('PR must have a milestone assigned');
return;
}
console.log(`PR milestone: ${prMilestone.title}`);
// Validate milestone format: vx.y.z
const milestoneRegex = /^v\d+\.\d+\.\d+$/;
if (!milestoneRegex.test(prMilestone.title)) {
core.setFailed(`Milestone "${prMilestone.title}" does not follow the required format vx.y.z`);
return;
}
// For main branch PRs, check if the milestone is the highest one
if (baseBranch === 'main') {
// Get all milestones
const { data: milestones } = await github.rest.issues.listMilestones({
owner: 'meilisearch',
repo: 'meilisearch',
state: 'open',
sort: 'due_on',
direction: 'desc'
});
// Sort milestones by version number (vx.y.z)
const sortedMilestones = milestones
.filter(m => milestoneRegex.test(m.title))
.sort((a, b) => {
const versionA = a.title.substring(1).split('.').map(Number);
const versionB = b.title.substring(1).split('.').map(Number);
// Compare major version
if (versionA[0] !== versionB[0]) return versionB[0] - versionA[0];
// Compare minor version
if (versionA[1] !== versionB[1]) return versionB[1] - versionA[1];
// Compare patch version
return versionB[2] - versionA[2];
});
if (sortedMilestones.length === 0) {
core.setFailed('No valid milestones found in the repository. Please create at least one milestone with the format vx.y.z');
return;
}
const highestMilestone = sortedMilestones[0];
console.log(`Highest milestone: ${highestMilestone.title}`);
if (prMilestone.title !== highestMilestone.title) {
core.setFailed(`PRs targeting the main branch must use the highest milestone (${highestMilestone.title}), but this PR uses ${prMilestone.title}`);
return;
}
} else {
// For release branches, the milestone should match the branch version
const branchVersion = baseBranch.substring(8); // remove 'release-'
if (prMilestone.title !== branchVersion) {
core.setFailed(`PRs targeting release branch "${baseBranch}" must use the matching milestone "${branchVersion}", but this PR uses "${prMilestone.title}"`);
return;
}
}
console.log('PR milestone validation passed!');

View File

@ -1,23 +0,0 @@
name: Create issue to upgrade dependencies
on:
schedule:
- cron: '0 0 1 */3 *'
workflow_dispatch:
jobs:
create-issue:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- name: Create an issue
uses: actions-ecosystem/action-create-issue@v1
with:
github_token: ${{ secrets.MEILI_BOT_GH_PAT }}
title: Upgrade dependencies
body: |
We need to update the dependencies of the Meilisearch repository, and, if possible, the dependencies of all the engine-team repositories that Meilisearch depends on (milli, charabia, heed...).
⚠️ This issue should only be done at the beginning of the sprint!
labels: |
dependencies
maintenance

View File

@ -0,0 +1,57 @@
name: Comment when db change labels are added
on:
pull_request:
types: [labeled]
env:
MESSAGE: |
### Hello, I'm a bot 🤖
You are receiving this message because you declared that this PR make changes to the Meilisearch database.
Depending on the nature of the change, additional actions might be required on your part. The following sections detail the additional actions depending on the nature of the change, please copy the relevant section in the description of your PR, and make sure to perform the required actions.
Thank you for contributing to Meilisearch :heart:
## This PR makes forward-compatible changes
*Forward-compatible changes are changes to the database such that databases created in an older version of Meilisearch are still valid in the new version of Meilisearch. They usually represent additive changes, like adding a new optional attribute or setting.*
- [ ] Detail the change to the DB format and why they are forward compatible
- [ ] Forward-compatibility: A database created before this PR and using the features touched by this PR was able to be opened by a Meilisearch produced by the code of this PR.
## This PR makes breaking changes
*Breaking changes are changes to the database such that databases created in an older version of Meilisearch need changes to remain valid in the new version of Meilisearch. This typically happens when the way to store the data changed (change of database, new required key, etc). This can also happen due to breaking changes in the API of an experimental feature. ⚠️ This kind of changes are more difficult to achieve safely, so proceed with caution and test dumpless upgrade right before merging the PR.*
- [ ] Detail the changes to the DB format,
- [ ] which are compatible, and why
- [ ] which are not compatible, why, and how they will be fixed up in the upgrade
- [ ] /!\ Ensure all the read operations still work!
- If the change happened in milli, you may need to check the version of the database before doing any read operation
- If the change happened in the index-scheduler, make sure the new code can immediately read the old database
- If the change happened in the meilisearch-auth database, reach out to the team; we don't know yet how to handle these changes
- [ ] Write the code to go from the old database to the new one
- If the change happened in milli, the upgrade function should be written and called [here](https://github.com/meilisearch/meilisearch/blob/3fd86e8d76d7d468b0095d679adb09211ca3b6c0/crates/milli/src/update/upgrade/mod.rs#L24-L47)
- If the change happened in the index-scheduler, we've never done it yet, but the right place to do it should be [here](https://github.com/meilisearch/meilisearch/blob/3fd86e8d76d7d468b0095d679adb09211ca3b6c0/crates/index-scheduler/src/scheduler/process_upgrade/mod.rs#L13)
- [ ] Write an integration test [here](https://github.com/meilisearch/meilisearch/blob/main/crates/meilisearch/tests/upgrade/mod.rs) ensuring you can read the old database, upgrade to the new database, and read the new database as expected
jobs:
add-comment:
runs-on: ubuntu-latest
if: github.event.label.name == 'db change'
steps:
- name: Add comment
uses: actions/github-script@v7
with:
github-token: ${{ secrets.GITHUB_TOKEN }}
script: |
const message = process.env.MESSAGE;
github.rest.issues.createComment({
issue_number: context.issue.number,
owner: context.repo.owner,
repo: context.repo.repo,
body: message
})

28
.github/workflows/db-change-missing.yml vendored Normal file
View File

@ -0,0 +1,28 @@
name: Check db change labels
on:
pull_request:
types: [opened, synchronize, reopened, labeled, unlabeled]
jobs:
check-labels:
runs-on: ubuntu-latest
steps:
- name: Checkout code
uses: actions/checkout@v4
- name: Check db change labels
id: check_labels
env:
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
run: |
URL=/repos/meilisearch/meilisearch/pulls/${{ github.event.pull_request.number }}/labels
echo ${{ github.event.pull_request.number }}
echo $URL
LABELS=$(gh api -H "Accept: application/vnd.github+json" -H "X-GitHub-Api-Version: 2022-11-28" /repos/${{ github.repository }}/issues/${{ github.event.pull_request.number }}/labels -q .[].name)
echo "Labels: $LABELS"
if [[ ! "$LABELS" =~ "db change" && ! "$LABELS" =~ "no db change" ]]; then
echo "::error::Pull request must contain either the 'db change' or 'no db change' label."
exit 1
else
echo "The label is set"
fi

24
.github/workflows/dependency-issue.yml vendored Normal file
View File

@ -0,0 +1,24 @@
name: Create issue to upgrade dependencies
on:
schedule:
# Run the first of the month, every 6 month
- cron: '0 0 1 */6 *'
workflow_dispatch:
jobs:
create-issue:
runs-on: ubuntu-latest
env:
ISSUE_TEMPLATE: issue-template.md
GH_TOKEN: ${{ secrets.MEILI_BOT_GH_PAT }}
steps:
- uses: actions/checkout@v3
- name: Download the issue template
run: curl -s https://raw.githubusercontent.com/meilisearch/engine-team/main/issue-templates/dependency-issue.md > $ISSUE_TEMPLATE
- name: Create issue
run: |
gh issue create \
--title 'Upgrade dependencies' \
--label 'dependencies,maintenance' \
--body-file $ISSUE_TEMPLATE

30
.github/workflows/flaky-tests.yml vendored Normal file
View File

@ -0,0 +1,30 @@
name: Look for flaky tests
on:
workflow_dispatch:
schedule:
- cron: "0 12 * * FRI" # Every Friday at 12:00PM
jobs:
flaky:
runs-on: ubuntu-latest
container:
# Use ubuntu-22.04 to compile with glibc 2.35
image: ubuntu:22.04
steps:
- uses: actions/checkout@v3
- name: Install needed dependencies
run: |
apt-get update && apt-get install -y curl
apt-get install build-essential -y
- uses: dtolnay/rust-toolchain@1.85
- name: Install cargo-flaky
run: cargo install cargo-flaky
- name: Run cargo flaky in the dumps
run: cd crates/dump; cargo flaky -i 100 --release
- name: Run cargo flaky in the index-scheduler
run: cd crates/index-scheduler; cargo flaky -i 100 --release
- name: Run cargo flaky in the auth
run: cd crates/meilisearch-auth; cargo flaky -i 100 --release
- name: Run cargo flaky in meilisearch
run: cd crates/meilisearch; cargo flaky -i 100 --release

View File

@ -1,26 +0,0 @@
name: Look for flaky tests
on:
workflow_dispatch:
schedule:
- cron: "0 12 * * FRI" # Every Friday at 12:00PM
jobs:
flaky:
runs-on: ubuntu-latest
container:
# Use ubuntu-18.04 to compile with glibc 2.27, which are the production expectations
image: ubuntu:18.04
steps:
- uses: actions/checkout@v3
- name: Install needed dependencies
run: |
apt-get update && apt-get install -y curl
apt-get install build-essential -y
- uses: actions-rs/toolchain@v1
with:
toolchain: stable
override: true
- name: Install cargo-flaky
run: cargo install cargo-flaky
- name: Run cargo flaky 100 times
run: cargo flaky -i 100 --release

22
.github/workflows/fuzzer-indexing.yml vendored Normal file
View File

@ -0,0 +1,22 @@
name: Run the indexing fuzzer
on:
push:
branches:
- main
jobs:
fuzz:
name: Setup the action
runs-on: ubuntu-latest
timeout-minutes: 4320 # 72h
steps:
- uses: actions/checkout@v3
- uses: dtolnay/rust-toolchain@1.85
with:
profile: minimal
# Run benchmarks
- name: Run the fuzzer
run: |
cargo run --release --bin fuzz-indexing

View File

@ -3,7 +3,7 @@ name: Update latest git tag
on:
workflow_dispatch:
release:
types: [published]
types: [released]
jobs:
check-version:
@ -17,6 +17,7 @@ jobs:
update-latest-tag:
runs-on: ubuntu-latest
needs: check-version
steps:
- uses: actions/checkout@v3
- uses: rickstaa/action-create-tag@v1

View File

@ -5,6 +5,7 @@ name: Milestone's workflow
# For each Milestone created (not opened!), and if the release is NOT a patch release (only the patch changed)
# - the roadmap issue is created, see https://github.com/meilisearch/engine-team/blob/main/issue-templates/roadmap-issue.md
# - the changelog issue is created, see https://github.com/meilisearch/engine-team/blob/main/issue-templates/changelog-issue.md
# - update the ruleset to add the current release version to the list of allowed versions and be able to use the merge queue.
# For each Milestone closed
# - the `release_version` label is created
@ -21,10 +22,9 @@ env:
GH_TOKEN: ${{ secrets.MEILI_BOT_GH_PAT }}
jobs:
# -----------------
# MILESTONE CREATED
# -----------------
# -----------------
# MILESTONE CREATED
# -----------------
get-release-version:
if: github.event.action == 'created'
@ -110,9 +110,79 @@ jobs:
--milestone $MILESTONE_VERSION \
--assignee curquiza
# ----------------
# MILESTONE CLOSED
# ----------------
create-update-version-issue:
needs: get-release-version
# Create the update-version issue even if the release is a patch release
if: github.event.action == 'created'
runs-on: ubuntu-latest
env:
ISSUE_TEMPLATE: issue-template.md
steps:
- uses: actions/checkout@v3
- name: Download the issue template
run: curl -s https://raw.githubusercontent.com/meilisearch/engine-team/main/issue-templates/update-version-issue.md > $ISSUE_TEMPLATE
- name: Create the issue
run: |
gh issue create \
--title "Update version in Cargo.toml for $MILESTONE_VERSION" \
--label 'maintenance' \
--body-file $ISSUE_TEMPLATE \
--milestone $MILESTONE_VERSION
create-update-openapi-issue:
needs: get-release-version
# Create the openAPI issue if the release is not only a patch release
if: github.event.action == 'created' && needs.get-release-version.outputs.is-patch == 'false'
runs-on: ubuntu-latest
env:
ISSUE_TEMPLATE: issue-template.md
steps:
- uses: actions/checkout@v3
- name: Download the issue template
run: curl -s https://raw.githubusercontent.com/meilisearch/engine-team/main/issue-templates/update-openapi-issue.md > $ISSUE_TEMPLATE
- name: Create the issue
run: |
gh issue create \
--title "Update Open API file for $MILESTONE_VERSION" \
--label 'maintenance' \
--body-file $ISSUE_TEMPLATE \
--milestone $MILESTONE_VERSION
update-ruleset:
runs-on: ubuntu-latest
if: github.event.action == 'created'
steps:
- uses: actions/checkout@v3
- name: Install jq
run: |
sudo apt-get update
sudo apt-get install -y jq
- name: Update ruleset
env:
# gh api repos/meilisearch/meilisearch/rulesets --jq '.[] | {name: .name, id: .id}'
RULESET_ID: 4253297
BRANCH_NAME: ${{ github.event.inputs.branch_name }}
run: |
echo "RULESET_ID: ${{ env.RULESET_ID }}"
echo "BRANCH_NAME: ${{ env.BRANCH_NAME }}"
# Get current ruleset conditions
CONDITIONS=$(gh api repos/meilisearch/meilisearch/rulesets/${{ env.RULESET_ID }} --jq '{ conditions: .conditions }')
# Update the conditions by appending the milestone version
UPDATED_CONDITIONS=$(echo $CONDITIONS | jq '.conditions.ref_name.include += ["refs/heads/release-'${{ env.MILESTONE_VERSION }}'"]')
# Update the ruleset from stdin (-)
echo $UPDATED_CONDITIONS |
gh api repos/meilisearch/meilisearch/rulesets/${{ env.RULESET_ID }} \
--method PUT \
-H "Accept: application/vnd.github+json" \
-H "X-GitHub-Api-Version: 2022-11-28" \
--input -
# ----------------
# MILESTONE CLOSED
# ----------------
create-release-label:
if: github.event.action == 'closed'

View File

@ -0,0 +1,55 @@
name: Publish to APT & Homebrew
on:
release:
types: [released]
jobs:
check-version:
name: Check the version validity
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- name: Check release validity
run: bash .github/scripts/check-release.sh
debian:
name: Publish debian packagge
runs-on: ubuntu-latest
needs: check-version
container:
# Use ubuntu-22.04 to compile with glibc 2.35
image: ubuntu:22.04
steps:
- name: Install needed dependencies
run: |
apt-get update && apt-get install -y curl
apt-get install build-essential -y
- uses: dtolnay/rust-toolchain@1.85
- name: Install cargo-deb
run: cargo install cargo-deb
- uses: actions/checkout@v3
- name: Build deb package
run: cargo deb -p meilisearch -o target/debian/meilisearch.deb
- name: Upload debian pkg to release
uses: svenstaro/upload-release-action@2.11.1
with:
repo_token: ${{ secrets.MEILI_BOT_GH_PAT }}
file: target/debian/meilisearch.deb
asset_name: meilisearch.deb
tag: ${{ github.ref }}
- name: Upload debian pkg to apt repository
run: curl -F package=@target/debian/meilisearch.deb https://${{ secrets.GEMFURY_PUSH_TOKEN }}@push.fury.io/meilisearch/
homebrew:
name: Bump Homebrew formula
runs-on: ubuntu-latest
needs: check-version
steps:
- name: Create PR to Homebrew
uses: mislav/bump-homebrew-formula-action@v3
with:
formula-name: meilisearch
formula-path: Formula/m/meilisearch.rb
env:
COMMITTER_TOKEN: ${{ secrets.HOMEBREW_COMMITTER_TOKEN }}

View File

@ -1,12 +1,12 @@
name: Publish binaries to GitHub release
on:
workflow_dispatch:
schedule:
- cron: '0 2 * * *' # Every day at 2:00am
- cron: "0 2 * * *" # Every day at 2:00am
release:
types: [published]
name: Publish binaries to release
jobs:
check-version:
name: Check the version validity
@ -37,29 +37,26 @@ jobs:
runs-on: ubuntu-latest
needs: check-version
container:
# Use ubuntu-18.04 to compile with glibc 2.27
image: ubuntu:18.04
# Use ubuntu-22.04 to compile with glibc 2.35
image: ubuntu:22.04
steps:
- uses: actions/checkout@v3
- name: Install needed dependencies
run: |
apt-get update && apt-get install -y curl
apt-get install build-essential -y
- uses: actions-rs/toolchain@v1
with:
toolchain: stable
override: true
- name: Build
run: cargo build --release --locked
# No need to upload binaries for dry run (cron)
- name: Upload binaries to release
if: github.event_name == 'release'
uses: svenstaro/upload-release-action@2.3.0
with:
repo_token: ${{ secrets.MEILI_BOT_GH_PAT }}
file: target/release/meilisearch
asset_name: meilisearch-linux-amd64
tag: ${{ github.ref }}
- uses: actions/checkout@v3
- name: Install needed dependencies
run: |
apt-get update && apt-get install -y curl
apt-get install build-essential -y
- uses: dtolnay/rust-toolchain@1.85
- name: Build
run: cargo build --release --locked
# No need to upload binaries for dry run (cron)
- name: Upload binaries to release
if: github.event_name == 'release'
uses: svenstaro/upload-release-action@2.11.1
with:
repo_token: ${{ secrets.MEILI_BOT_GH_PAT }}
file: target/release/meilisearch
asset_name: meilisearch-linux-amd64
tag: ${{ github.ref }}
publish-macos-windows:
name: Publish binary for ${{ matrix.os }}
@ -68,53 +65,46 @@ jobs:
strategy:
fail-fast: false
matrix:
os: [macos-12, windows-2022]
os: [macos-13, windows-2022]
include:
- os: macos-12
- os: macos-13
artifact_name: meilisearch
asset_name: meilisearch-macos-amd64
- os: windows-2022
artifact_name: meilisearch.exe
asset_name: meilisearch-windows-amd64.exe
steps:
- uses: actions/checkout@v3
- uses: actions-rs/toolchain@v1
with:
toolchain: stable
override: true
- name: Build
run: cargo build --release --locked
# No need to upload binaries for dry run (cron)
- name: Upload binaries to release
if: github.event_name == 'release'
uses: svenstaro/upload-release-action@2.3.0
with:
repo_token: ${{ secrets.MEILI_BOT_GH_PAT }}
file: target/release/${{ matrix.artifact_name }}
asset_name: ${{ matrix.asset_name }}
tag: ${{ github.ref }}
- uses: actions/checkout@v3
- uses: dtolnay/rust-toolchain@1.85
- name: Build
run: cargo build --release --locked
# No need to upload binaries for dry run (cron)
- name: Upload binaries to release
if: github.event_name == 'release'
uses: svenstaro/upload-release-action@2.11.1
with:
repo_token: ${{ secrets.MEILI_BOT_GH_PAT }}
file: target/release/${{ matrix.artifact_name }}
asset_name: ${{ matrix.asset_name }}
tag: ${{ github.ref }}
publish-macos-apple-silicon:
name: Publish binary for macOS silicon
runs-on: ${{ matrix.os }}
runs-on: macos-13
needs: check-version
strategy:
fail-fast: false
matrix:
include:
- os: macos-12
target: aarch64-apple-darwin
- target: aarch64-apple-darwin
asset_name: meilisearch-macos-apple-silicon
steps:
- name: Checkout repository
uses: actions/checkout@v3
- name: Installing Rust toolchain
uses: actions-rs/toolchain@v1
uses: dtolnay/rust-toolchain@1.85
with:
toolchain: stable
profile: minimal
target: ${{ matrix.target }}
override: true
- name: Cargo build
uses: actions-rs/cargo@v1
with:
@ -123,7 +113,7 @@ jobs:
- name: Upload the binary to release
# No need to upload binaries for dry run (cron)
if: github.event_name == 'release'
uses: svenstaro/upload-release-action@2.3.0
uses: svenstaro/upload-release-action@2.11.1
with:
repo_token: ${{ secrets.MEILI_BOT_GH_PAT }}
file: target/${{ matrix.target }}/release/meilisearch
@ -132,37 +122,37 @@ jobs:
publish-aarch64:
name: Publish binary for aarch64
runs-on: ${{ matrix.os }}
runs-on: ubuntu-latest
needs: check-version
env:
DEBIAN_FRONTEND: noninteractive
container:
# Use ubuntu-22.04 to compile with glibc 2.35
image: ubuntu:22.04
strategy:
fail-fast: false
matrix:
include:
- build: aarch64
os: ubuntu-18.04
target: aarch64-unknown-linux-gnu
linker: gcc-aarch64-linux-gnu
use-cross: true
- target: aarch64-unknown-linux-gnu
asset_name: meilisearch-linux-aarch64
steps:
- name: Checkout repository
uses: actions/checkout@v3
- name: Install needed dependencies
run: |
apt-get update -y && apt upgrade -y
apt-get install -y curl build-essential gcc-aarch64-linux-gnu
- name: Set up Docker for cross compilation
run: |
apt-get install -y curl apt-transport-https ca-certificates software-properties-common
curl -fsSL https://download.docker.com/linux/ubuntu/gpg | apt-key add -
add-apt-repository "deb [arch=$(dpkg --print-architecture)] https://download.docker.com/linux/ubuntu $(lsb_release -cs) stable"
apt-get update -y && apt-get install -y docker-ce
- name: Installing Rust toolchain
uses: actions-rs/toolchain@v1
uses: dtolnay/rust-toolchain@1.85
with:
toolchain: stable
profile: minimal
target: ${{ matrix.target }}
override: true
- name: APT update
run: |
sudo apt update
- name: Install target specific tools
if: matrix.use-cross
run: |
sudo apt-get install -y ${{ matrix.linker }}
- name: Configure target aarch64 GNU
if: matrix.target == 'aarch64-unknown-linux-gnu'
## Environment variable is not passed using env:
## LD gold won't work with MUSL
# env:
@ -172,18 +162,23 @@ jobs:
echo '[target.aarch64-unknown-linux-gnu]' >> ~/.cargo/config
echo 'linker = "aarch64-linux-gnu-gcc"' >> ~/.cargo/config
echo 'JEMALLOC_SYS_WITH_LG_PAGE=16' >> $GITHUB_ENV
- name: Install a default toolchain that will be used to build cargo cross
run: |
rustup default stable
- name: Cargo build
uses: actions-rs/cargo@v1
with:
command: build
use-cross: ${{ matrix.use-cross }}
use-cross: true
args: --release --target ${{ matrix.target }}
env:
CROSS_DOCKER_IN_DOCKER: true
- name: List target output files
run: ls -lR ./target
- name: Upload the binary to release
# No need to upload binaries for dry run (cron)
if: github.event_name == 'release'
uses: svenstaro/upload-release-action@2.3.0
uses: svenstaro/upload-release-action@2.11.1
with:
repo_token: ${{ secrets.MEILI_BOT_GH_PAT }}
file: target/${{ matrix.target }}/release/meilisearch

View File

@ -1,57 +0,0 @@
name: Publish to APT repository & Homebrew
on:
release:
types: [published]
jobs:
check-version:
name: Check the version validity
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- name: Check release validity
run: bash .github/scripts/check-release.sh
debian:
name: Publish debian packagge
runs-on: ubuntu-latest
needs: check-version
container:
# Use ubuntu-18.04 to compile with glibc 2.27
image: ubuntu:18.04
steps:
- name: Install needed dependencies
run: |
apt-get update && apt-get install -y curl
apt-get install build-essential -y
- uses: actions-rs/toolchain@v1
with:
toolchain: stable
override: true
- name: Install cargo-deb
run: cargo install cargo-deb
- uses: actions/checkout@v3
- name: Build deb package
run: cargo deb -p meilisearch -o target/debian/meilisearch.deb
- name: Upload debian pkg to release
uses: svenstaro/upload-release-action@2.3.0
with:
repo_token: ${{ secrets.MEILI_BOT_GH_PAT }}
file: target/debian/meilisearch.deb
asset_name: meilisearch.deb
tag: ${{ github.ref }}
- name: Upload debian pkg to apt repository
run: curl -F package=@target/debian/meilisearch.deb https://${{ secrets.GEMFURY_PUSH_TOKEN }}@push.fury.io/meilisearch/
homebrew:
name: Bump Homebrew formula
runs-on: ubuntu-latest
needs: check-version
steps:
- name: Create PR to Homebrew
uses: mislav/bump-homebrew-formula-action@v2
with:
formula-name: meilisearch
env:
COMMITTER_TOKEN: ${{ secrets.HOMEBREW_COMMITTER_TOKEN }}

View File

@ -1,4 +1,5 @@
---
name: Publish images to Docker Hub
on:
push:
# Will run for every tag pushed except `latest`
@ -12,8 +13,6 @@ on:
- cron: '0 23 * * *' # Every day at 11:00pm
workflow_dispatch:
name: Publish tagged images to Docker Hub
jobs:
docker:
runs-on: docker
@ -58,20 +57,20 @@ jobs:
echo "date=$commit_date" >> $GITHUB_OUTPUT
- name: Set up QEMU
uses: docker/setup-qemu-action@v2
uses: docker/setup-qemu-action@v3
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v2
uses: docker/setup-buildx-action@v3
- name: Login to Docker Hub
uses: docker/login-action@v2
uses: docker/login-action@v3
with:
username: ${{ secrets.DOCKERHUB_USERNAME }}
password: ${{ secrets.DOCKERHUB_TOKEN }}
- name: Docker meta
id: meta
uses: docker/metadata-action@v4
uses: docker/metadata-action@v5
with:
images: getmeili/meilisearch
# Prevent `latest` to be updated for each new tag pushed.
@ -81,10 +80,11 @@ jobs:
type=ref,event=tag
type=raw,value=nightly,enable=${{ github.event_name != 'push' }}
type=semver,pattern=v{{major}}.{{minor}},enable=${{ steps.check-tag-format.outputs.stable == 'true' }}
type=semver,pattern=v{{major}},enable=${{ steps.check-tag-format.outputs.stable == 'true' }}
type=raw,value=latest,enable=${{ steps.check-tag-format.outputs.stable == 'true' && steps.check-tag-format.outputs.latest == 'true' }}
- name: Build and push
uses: docker/build-push-action@v3
uses: docker/build-push-action@v6
with:
push: true
platforms: linux/amd64,linux/arm64
@ -92,14 +92,34 @@ jobs:
build-args: |
COMMIT_SHA=${{ github.sha }}
COMMIT_DATE=${{ steps.build-metadata.outputs.date }}
GIT_TAG=${{ github.ref_name }}
# /!\ Don't touch this without checking with Cloud team
- name: Send CI information to Cloud team
# Do not send if nightly build (i.e. 'schedule' or 'workflow_dispatch' event)
if: github.event_name == 'push'
uses: peter-evans/repository-dispatch@v2
uses: peter-evans/repository-dispatch@v3
with:
token: ${{ secrets.MEILI_BOT_GH_PAT }}
repository: meilisearch/meilisearch-cloud
event-type: cloud-docker-build
client-payload: '{ "meilisearch_version": "${{ github.ref_name }}", "stable": "${{ steps.check-tag-format.outputs.stable }}" }'
# Send notification to Swarmia to notify of a deployment: https://app.swarmia.com
# - name: 'Setup jq'
# uses: dcarbone/install-jq-action
# - name: Send deployment to Swarmia
# if: github.event_name == 'push' && success()
# run: |
# JSON_STRING=$( jq --null-input --compact-output \
# --arg version "${{ github.ref_name }}" \
# --arg appName "meilisearch" \
# --arg environment "production" \
# --arg commitSha "${{ github.sha }}" \
# --arg repositoryFullName "${{ github.repository }}" \
# '{"version": $version, "appName": $appName, "environment": $environment, "commitSha": $commitSha, "repositoryFullName": $repositoryFullName}' )
# curl -H "Authorization: ${{ secrets.SWARMIA_DEPLOYMENTS_AUTHORIZATION }}" \
# -H "Content-Type: application/json" \
# -d "$JSON_STRING" \
# https://hook.swarmia.com/deployments

View File

@ -1,127 +0,0 @@
name: Rust
on:
workflow_dispatch:
pull_request:
push:
# trying and staging branches are for Bors config
branches:
- trying
- staging
env:
CARGO_TERM_COLOR: always
RUST_BACKTRACE: 1
RUSTFLAGS: "-D warnings"
jobs:
test-linux:
name: Tests on ubuntu-18.04
runs-on: ubuntu-latest
container:
# Use ubuntu-18.04 to compile with glibc 2.27, which are the production expectations
image: ubuntu:18.04
steps:
- uses: actions/checkout@v3
- name: Install needed dependencies
run: |
apt-get update && apt-get install -y curl
apt-get install build-essential -y
- uses: actions-rs/toolchain@v1
with:
toolchain: stable
override: true
- name: Cache dependencies
uses: Swatinem/rust-cache@v2.2.0
- name: Run cargo check without any default features
uses: actions-rs/cargo@v1
with:
command: build
args: --locked --release --no-default-features
- name: Run cargo test
uses: actions-rs/cargo@v1
with:
command: test
args: --locked --release
test-others:
name: Tests on ${{ matrix.os }}
runs-on: ${{ matrix.os }}
strategy:
fail-fast: false
matrix:
os: [macos-12, windows-2022]
steps:
- uses: actions/checkout@v3
- name: Cache dependencies
uses: Swatinem/rust-cache@v2.2.0
- name: Run cargo check without any default features
uses: actions-rs/cargo@v1
with:
command: build
args: --locked --release --no-default-features
- name: Run cargo test
uses: actions-rs/cargo@v1
with:
command: test
args: --locked --release
# We run tests in debug also, to make sure that the debug_assertions are hit
test-debug:
name: Run tests in debug
runs-on: ubuntu-latest
container:
# Use ubuntu-18.04 to compile with glibc 2.27, which are the production expectations
image: ubuntu:18.04
steps:
- uses: actions/checkout@v3
- name: Install needed dependencies
run: |
apt-get update && apt-get install -y curl
apt-get install build-essential -y
- uses: actions-rs/toolchain@v1
with:
toolchain: stable
override: true
- name: Cache dependencies
uses: Swatinem/rust-cache@v2.2.0
- name: Run tests in debug
uses: actions-rs/cargo@v1
with:
command: test
args: --locked
clippy:
name: Run Clippy
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- uses: actions-rs/toolchain@v1
with:
profile: minimal
toolchain: stable
override: true
components: clippy
- name: Cache dependencies
uses: Swatinem/rust-cache@v2.2.0
- name: Run cargo clippy
uses: actions-rs/cargo@v1
with:
command: clippy
args: --all-targets -- --deny warnings
fmt:
name: Run Rustfmt
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- uses: actions-rs/toolchain@v1
with:
profile: minimal
toolchain: nightly
override: true
components: rustfmt
- name: Cache dependencies
uses: Swatinem/rust-cache@v2.2.0
- name: Run cargo fmt
run: cargo fmt --all -- --check

386
.github/workflows/sdks-tests.yml vendored Normal file
View File

@ -0,0 +1,386 @@
# If any test fails, the engine team should ensure the "breaking" changes are expected and contact the integration team
name: SDKs tests
on:
workflow_dispatch:
inputs:
docker_image:
description: 'The Meilisearch Docker image used'
required: false
default: nightly
schedule:
- cron: "0 6 * * MON" # Every Monday at 6:00AM
env:
MEILI_MASTER_KEY: 'masterKey'
MEILI_NO_ANALYTICS: 'true'
DISABLE_COVERAGE: 'true'
jobs:
define-docker-image:
runs-on: ubuntu-latest
outputs:
docker-image: ${{ steps.define-image.outputs.docker-image }}
steps:
- uses: actions/checkout@v3
- name: Define the Docker image we need to use
id: define-image
run: |
event=${{ github.event_name }}
echo "docker-image=nightly" >> $GITHUB_OUTPUT
if [[ $event == 'workflow_dispatch' ]]; then
echo "docker-image=${{ github.event.inputs.docker_image }}" >> $GITHUB_OUTPUT
fi
- name: Docker image is ${{ steps.define-image.outputs.docker-image }}
run: echo "Docker image is ${{ steps.define-image.outputs.docker-image }}"
##########
## SDKs ##
##########
meilisearch-dotnet-tests:
needs: define-docker-image
name: .NET SDK tests
runs-on: ubuntu-latest
env:
MEILISEARCH_VERSION: ${{ needs.define-docker-image.outputs.docker-image }}
steps:
- uses: actions/checkout@v3
with:
repository: meilisearch/meilisearch-dotnet
- name: Setup .NET Core
uses: actions/setup-dotnet@v4
with:
dotnet-version: "8.0.x"
- name: Install dependencies
run: dotnet restore
- name: Build
run: dotnet build --configuration Release --no-restore
- name: Meilisearch (latest version) setup with Docker
run: docker compose up -d
- name: Run tests
run: dotnet test --no-restore --verbosity normal
meilisearch-dart-tests:
needs: define-docker-image
name: Dart SDK tests
runs-on: ubuntu-latest
services:
meilisearch:
image: getmeili/meilisearch:${{ needs.define-docker-image.outputs.docker-image }}
env:
MEILI_MASTER_KEY: ${{ env.MEILI_MASTER_KEY }}
MEILI_NO_ANALYTICS: ${{ env.MEILI_NO_ANALYTICS }}
ports:
- '7700:7700'
steps:
- uses: actions/checkout@v3
with:
repository: meilisearch/meilisearch-dart
- uses: dart-lang/setup-dart@v1
with:
sdk: 'latest'
- name: Install dependencies
run: dart pub get
- name: Run integration tests
run: dart test --concurrency=4
meilisearch-go-tests:
needs: define-docker-image
name: Go SDK tests
runs-on: ubuntu-latest
services:
meilisearch:
image: getmeili/meilisearch:${{ needs.define-docker-image.outputs.docker-image }}
env:
MEILI_MASTER_KEY: ${{ env.MEILI_MASTER_KEY }}
MEILI_NO_ANALYTICS: ${{ env.MEILI_NO_ANALYTICS }}
ports:
- '7700:7700'
steps:
- name: Set up Go
uses: actions/setup-go@v5
with:
go-version: stable
- uses: actions/checkout@v3
with:
repository: meilisearch/meilisearch-go
- name: Get dependencies
run: |
go get -v -t -d ./...
if [ -f Gopkg.toml ]; then
curl https://raw.githubusercontent.com/golang/dep/master/install.sh | sh
dep ensure
fi
- name: Run integration tests
run: go test -v ./...
meilisearch-java-tests:
needs: define-docker-image
name: Java SDK tests
runs-on: ubuntu-latest
services:
meilisearch:
image: getmeili/meilisearch:${{ needs.define-docker-image.outputs.docker-image }}
env:
MEILI_MASTER_KEY: ${{ env.MEILI_MASTER_KEY }}
MEILI_NO_ANALYTICS: ${{ env.MEILI_NO_ANALYTICS }}
ports:
- '7700:7700'
steps:
- uses: actions/checkout@v3
with:
repository: meilisearch/meilisearch-java
- name: Set up Java
uses: actions/setup-java@v4
with:
java-version: 8
distribution: 'zulu'
cache: gradle
- name: Grant execute permission for gradlew
run: chmod +x gradlew
- name: Build and run unit and integration tests
run: ./gradlew build integrationTest
meilisearch-js-tests:
needs: define-docker-image
name: JS SDK tests
runs-on: ubuntu-latest
services:
meilisearch:
image: getmeili/meilisearch:${{ needs.define-docker-image.outputs.docker-image }}
env:
MEILI_MASTER_KEY: ${{ env.MEILI_MASTER_KEY }}
MEILI_NO_ANALYTICS: ${{ env.MEILI_NO_ANALYTICS }}
ports:
- '7700:7700'
steps:
- uses: actions/checkout@v3
with:
repository: meilisearch/meilisearch-js
- name: Setup node
uses: actions/setup-node@v4
with:
cache: 'yarn'
- name: Install dependencies
run: yarn --dev
- name: Run tests
run: yarn test
- name: Build project
run: yarn build
- name: Run ESM env
run: yarn test:env:esm
- name: Run Node.js env
run: yarn test:env:nodejs
- name: Run node typescript env
run: yarn test:env:node-ts
- name: Run Browser env
run: yarn test:env:browser
meilisearch-php-tests:
needs: define-docker-image
name: PHP SDK tests
runs-on: ubuntu-latest
services:
meilisearch:
image: getmeili/meilisearch:${{ needs.define-docker-image.outputs.docker-image }}
env:
MEILI_MASTER_KEY: ${{ env.MEILI_MASTER_KEY }}
MEILI_NO_ANALYTICS: ${{ env.MEILI_NO_ANALYTICS }}
ports:
- '7700:7700'
steps:
- uses: actions/checkout@v3
with:
repository: meilisearch/meilisearch-php
- name: Install PHP
uses: shivammathur/setup-php@v2
- name: Validate composer.json and composer.lock
run: composer validate
- name: Install dependencies
run: |
composer remove --dev friendsofphp/php-cs-fixer --no-update --no-interaction
composer update --prefer-dist --no-progress
- name: Run test suite - default HTTP client (Guzzle 7)
run: |
sh scripts/tests.sh
composer remove --dev guzzlehttp/guzzle http-interop/http-factory-guzzle
meilisearch-python-tests:
needs: define-docker-image
name: Python SDK tests
runs-on: ubuntu-latest
services:
meilisearch:
image: getmeili/meilisearch:${{ needs.define-docker-image.outputs.docker-image }}
env:
MEILI_MASTER_KEY: ${{ env.MEILI_MASTER_KEY }}
MEILI_NO_ANALYTICS: ${{ env.MEILI_NO_ANALYTICS }}
ports:
- '7700:7700'
steps:
- uses: actions/checkout@v3
with:
repository: meilisearch/meilisearch-python
- name: Set up Python
uses: actions/setup-python@v5
- name: Install pipenv
uses: dschep/install-pipenv-action@v1
- name: Install dependencies
run: pipenv install --dev --python=${{ matrix.python-version }}
- name: Test with pytest
run: pipenv run pytest
meilisearch-ruby-tests:
needs: define-docker-image
name: Ruby SDK tests
runs-on: ubuntu-latest
services:
meilisearch:
image: getmeili/meilisearch:${{ needs.define-docker-image.outputs.docker-image }}
env:
MEILI_MASTER_KEY: ${{ env.MEILI_MASTER_KEY }}
MEILI_NO_ANALYTICS: ${{ env.MEILI_NO_ANALYTICS }}
ports:
- '7700:7700'
steps:
- uses: actions/checkout@v3
with:
repository: meilisearch/meilisearch-ruby
- name: Set up Ruby 3
uses: ruby/setup-ruby@v1
with:
ruby-version: 3
- name: Install ruby dependencies
run: bundle install --with test
- name: Run test suite
run: bundle exec rspec
meilisearch-rust-tests:
needs: define-docker-image
name: Rust SDK tests
runs-on: ubuntu-latest
services:
meilisearch:
image: getmeili/meilisearch:${{ needs.define-docker-image.outputs.docker-image }}
env:
MEILI_MASTER_KEY: ${{ env.MEILI_MASTER_KEY }}
MEILI_NO_ANALYTICS: ${{ env.MEILI_NO_ANALYTICS }}
ports:
- '7700:7700'
steps:
- uses: actions/checkout@v3
with:
repository: meilisearch/meilisearch-rust
- name: Build
run: cargo build --verbose
- name: Run tests
run: cargo test --verbose
meilisearch-swift-tests:
needs: define-docker-image
name: Swift SDK tests
runs-on: ubuntu-latest
services:
meilisearch:
image: getmeili/meilisearch:${{ needs.define-docker-image.outputs.docker-image }}
env:
MEILI_MASTER_KEY: ${{ env.MEILI_MASTER_KEY }}
MEILI_NO_ANALYTICS: ${{ env.MEILI_NO_ANALYTICS }}
ports:
- '7700:7700'
steps:
- uses: actions/checkout@v3
with:
repository: meilisearch/meilisearch-swift
- name: Run tests
run: swift test
########################
## FRONT-END PLUGINS ##
########################
meilisearch-js-plugins-tests:
needs: define-docker-image
name: meilisearch-js-plugins tests
runs-on: ubuntu-latest
services:
meilisearch:
image: getmeili/meilisearch:${{ needs.define-docker-image.outputs.docker-image }}
env:
MEILI_MASTER_KEY: ${{ env.MEILI_MASTER_KEY }}
MEILI_NO_ANALYTICS: ${{ env.MEILI_NO_ANALYTICS }}
ports:
- '7700:7700'
steps:
- uses: actions/checkout@v3
with:
repository: meilisearch/meilisearch-js-plugins
- name: Setup node
uses: actions/setup-node@v4
with:
cache: yarn
- name: Install dependencies
run: yarn install
- name: Run tests
run: yarn test
- name: Build all the playgrounds and the packages
run: yarn build
########################
## BACK-END PLUGINS ###
########################
meilisearch-rails-tests:
needs: define-docker-image
name: meilisearch-rails tests
runs-on: ubuntu-latest
services:
meilisearch:
image: getmeili/meilisearch:${{ needs.define-docker-image.outputs.docker-image }}
env:
MEILI_MASTER_KEY: ${{ env.MEILI_MASTER_KEY }}
MEILI_NO_ANALYTICS: ${{ env.MEILI_NO_ANALYTICS }}
ports:
- '7700:7700'
steps:
- uses: actions/checkout@v3
with:
repository: meilisearch/meilisearch-rails
- name: Set up Ruby 3
uses: ruby/setup-ruby@v1
with:
ruby-version: 3
bundler-cache: true
- name: Run tests
run: bundle exec rspec
meilisearch-symfony-tests:
needs: define-docker-image
name: meilisearch-symfony tests
runs-on: ubuntu-latest
services:
meilisearch:
image: getmeili/meilisearch:${{ needs.define-docker-image.outputs.docker-image }}
env:
MEILI_MASTER_KEY: ${{ env.MEILI_MASTER_KEY }}
MEILI_NO_ANALYTICS: ${{ env.MEILI_NO_ANALYTICS }}
ports:
- '7700:7700'
steps:
- uses: actions/checkout@v3
with:
repository: meilisearch/meilisearch-symfony
- name: Install PHP
uses: shivammathur/setup-php@v2
with:
tools: composer:v2, flex
- name: Validate composer.json and composer.lock
run: composer validate
- name: Install dependencies
run: composer install --prefer-dist --no-progress --quiet
- name: Remove doctrine/annotations
run: composer remove --dev doctrine/annotations
- name: Run test suite
run: composer test:unit

201
.github/workflows/test-suite.yml vendored Normal file
View File

@ -0,0 +1,201 @@
name: Test suite
on:
workflow_dispatch:
schedule:
# Everyday at 5:00am
- cron: "0 5 * * *"
pull_request:
merge_group:
env:
CARGO_TERM_COLOR: always
RUST_BACKTRACE: 1
RUSTFLAGS: "-D warnings"
jobs:
test-linux:
name: Tests on ubuntu-22.04
runs-on: ubuntu-latest
container:
# Use ubuntu-22.04 to compile with glibc 2.35
image: ubuntu:22.04
steps:
- uses: actions/checkout@v3
- name: Install needed dependencies
run: |
apt-get update && apt-get install -y curl
apt-get install build-essential -y
- name: Setup test with Rust stable
uses: dtolnay/rust-toolchain@1.85
- name: Cache dependencies
uses: Swatinem/rust-cache@v2.8.0
- name: Run cargo check without any default features
uses: actions-rs/cargo@v1
with:
command: build
args: --locked --release --no-default-features --all
- name: Run cargo test
uses: actions-rs/cargo@v1
with:
command: test
args: --locked --release --all
test-others:
name: Tests on ${{ matrix.os }}
runs-on: ${{ matrix.os }}
strategy:
fail-fast: false
matrix:
os: [macos-13, windows-2022]
steps:
- uses: actions/checkout@v3
- name: Cache dependencies
uses: Swatinem/rust-cache@v2.8.0
- uses: dtolnay/rust-toolchain@1.85
- name: Run cargo check without any default features
uses: actions-rs/cargo@v1
with:
command: build
args: --locked --release --no-default-features --all
- name: Run cargo test
uses: actions-rs/cargo@v1
with:
command: test
args: --locked --release --all
test-all-features:
name: Tests almost all features
runs-on: ubuntu-latest
container:
# Use ubuntu-22.04 to compile with glibc 2.35
image: ubuntu:22.04
if: github.event_name == 'schedule' || github.event_name == 'workflow_dispatch'
steps:
- uses: actions/checkout@v3
- name: Install needed dependencies
run: |
apt-get update
apt-get install --assume-yes build-essential curl
- uses: dtolnay/rust-toolchain@1.85
- name: Run cargo build with almost all features
run: |
cargo build --workspace --locked --release --features "$(cargo xtask list-features --exclude-feature cuda,test-ollama)"
- name: Run cargo test with almost all features
run: |
cargo test --workspace --locked --release --features "$(cargo xtask list-features --exclude-feature cuda,test-ollama)"
ollama-ubuntu:
name: Test with Ollama
runs-on: ubuntu-latest
env:
MEILI_TEST_OLLAMA_SERVER: "http://localhost:11434"
steps:
- uses: actions/checkout@v3
- name: Install Ollama
run: |
curl -fsSL https://ollama.com/install.sh | sudo -E sh
- name: Start serving
run: |
# Run it in the background, there is no way to daemonise at the moment
ollama serve &
# A short pause is required before the HTTP port is opened
sleep 5
# This endpoint blocks until ready
time curl -i http://localhost:11434
- name: Pull nomic-embed-text & all-minilm
run: |
ollama pull nomic-embed-text
ollama pull all-minilm
- name: Run cargo test
uses: actions-rs/cargo@v1
with:
command: test
args: --locked --release --all --features test-ollama ollama
test-disabled-tokenization:
name: Test disabled tokenization
runs-on: ubuntu-latest
container:
image: ubuntu:22.04
if: github.event_name == 'schedule' || github.event_name == 'workflow_dispatch'
steps:
- uses: actions/checkout@v3
- name: Install needed dependencies
run: |
apt-get update
apt-get install --assume-yes build-essential curl
- uses: dtolnay/rust-toolchain@1.85
- name: Run cargo tree without default features and check lindera is not present
run: |
if cargo tree -f '{p} {f}' -e normal --no-default-features | grep -qz lindera; then
echo "lindera has been found in the sources and it shouldn't"
exit 1
fi
- name: Run cargo tree with default features and check lindera is pressent
run: |
cargo tree -f '{p} {f}' -e normal | grep lindera -qz
# We run tests in debug also, to make sure that the debug_assertions are hit
test-debug:
name: Run tests in debug
runs-on: ubuntu-latest
container:
# Use ubuntu-22.04 to compile with glibc 2.35
image: ubuntu:22.04
steps:
- uses: actions/checkout@v3
- name: Install needed dependencies
run: |
apt-get update && apt-get install -y curl
apt-get install build-essential -y
- uses: dtolnay/rust-toolchain@1.85
- name: Cache dependencies
uses: Swatinem/rust-cache@v2.8.0
- name: Run tests in debug
uses: actions-rs/cargo@v1
with:
command: test
args: --locked --all
clippy:
name: Run Clippy
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- uses: dtolnay/rust-toolchain@1.85
with:
profile: minimal
components: clippy
- name: Cache dependencies
uses: Swatinem/rust-cache@v2.8.0
- name: Run cargo clippy
uses: actions-rs/cargo@v1
with:
command: clippy
args: --all-targets -- --deny warnings
fmt:
name: Run Rustfmt
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- uses: dtolnay/rust-toolchain@1.85
with:
profile: minimal
toolchain: nightly-2024-07-09
override: true
components: rustfmt
- name: Cache dependencies
uses: Swatinem/rust-cache@v2.8.0
- name: Run cargo fmt
# Since we never ran the `build.rs` script in the benchmark directory we are missing one auto-generated import file.
# Since we want to trigger (and fail) this action as fast as possible, instead of building the benchmark crate
# we are going to create an empty file where rustfmt expects it.
run: |
echo -ne "\n" > crates/benchmarks/benches/datasets_paths.rs
cargo fmt --all -- --check

View File

@ -1,10 +1,10 @@
name: Update Meilisearch version in all Cargo.toml files
name: Update Meilisearch version in Cargo.toml
on:
workflow_dispatch:
inputs:
new_version:
description: 'The new version (vX.Y.Z)'
description: "The new version (vX.Y.Z)"
required: true
env:
@ -13,36 +13,33 @@ env:
GH_TOKEN: ${{ secrets.MEILI_BOT_GH_PAT }}
jobs:
update-version-cargo-toml:
name: Update version in Cargo.toml files
name: Update version in Cargo.toml
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- uses: actions-rs/toolchain@v1
- uses: dtolnay/rust-toolchain@1.85
with:
profile: minimal
toolchain: stable
override: true
- name: Install sd
run: cargo install sd
- name: Update Cargo.toml files
- name: Update Cargo.toml file
run: |
raw_new_version=$(echo $NEW_VERSION | cut -d 'v' -f 2)
new_string="version = \"$raw_new_version\""
sd '^version = "\d+.\d+.\w+"$' "$new_string" */Cargo.toml
sd '^version = "\d+.\d+.\w+"$' "$new_string" Cargo.toml
- name: Build Meilisearch to update Cargo.lock
run: cargo build
- name: Commit and push the changes to the ${{ env.NEW_BRANCH }} branch
uses: EndBug/add-and-commit@v9
with:
message: "Update version for the next release (${{ env.NEW_VERSION }}) in Cargo.toml files"
message: "Update version for the next release (${{ env.NEW_VERSION }}) in Cargo.toml"
new_branch: ${{ env.NEW_BRANCH }}
- name: Create the PR pointing to ${{ github.ref_name }}
run: |
gh pr create \
--title "Update version for the next release ($NEW_VERSION) in Cargo.toml files" \
--body '⚠️ This PR is automatically generated. Check the new version is the expected one before merging.' \
--title "Update version for the next release ($NEW_VERSION) in Cargo.toml" \
--body '⚠️ This PR is automatically generated. Check the new version is the expected one and Cargo.lock has been updated before merging.' \
--label 'skip changelog' \
--milestone $NEW_VERSION
--milestone $NEW_VERSION \
--base $GITHUB_REF_NAME

13
.gitignore vendored
View File

@ -1,16 +1,25 @@
.idea/
.vscode/
/target
**/*.csv
**/*.json_lines
**/*.rs.bk
/*.mdb
/query-history.txt
/data.ms
/snapshots
/dumps
/bench
/_xtask_benchmark.ms
/benchmarks
# Snapshots
## ... large
*.full.snap
## ... unreviewed
*.snap.new
# Database snapshot
crates/meilisearch/db.snapshot
# Fuzzcheck data for the facet indexing fuzz test
crates/milli/fuzz/update::facet::incremental::fuzz::fuzz/

392
BENCHMARKS.md Normal file
View File

@ -0,0 +1,392 @@
# Benchmarks
Currently this repository hosts two kinds of benchmarks:
1. The older "milli benchmarks", that use [criterion](https://github.com/bheisler/criterion.rs) and live in the "benchmarks" directory.
2. The newer "bench" that are workload-based and so split between the [`workloads`](./workloads/) directory and the [`xtask::bench`](./xtask/src/bench/) module.
This document describes the newer "bench" benchmarks. For more details on the "milli benchmarks", see [benchmarks/README.md](./benchmarks/README.md).
## Design philosophy for the benchmarks
The newer "bench" benchmarks are **integration** benchmarks, in the sense that they spawn an actual Meilisearch server and measure its performance end-to-end, including HTTP request overhead.
Since this is prone to fluctuating, the benchmarks regain a bit of precision by measuring the runtime of the individual spans using the [logging machinery](./CONTRIBUTING.md#logging) of Meilisearch.
A span roughly translates to a function call. The benchmark runner collects all the spans by name using the [logs route](https://github.com/orgs/meilisearch/discussions/721) and sums their runtime. The processed results are then sent to the [benchmark dashboard](https://bench.meilisearch.dev), which is in charge of storing and presenting the data.
## Running the benchmarks
Benchmarks can run locally or in CI.
### Locally
#### With a local benchmark dashboard
The benchmarks dashboard lives in its [own repository](https://github.com/meilisearch/benchboard). We provide binaries for Ubuntu/Debian, but you can build from source for other platforms (MacOS should work as it was developed under that platform).
Run the `benchboard` binary to create a fresh database of results. By default it will serve the results and the API to gather results on `http://localhost:9001`.
From the Meilisearch repository, you can then run benchmarks with:
```sh
cargo xtask bench -- workloads/my_workload_1.json ..
```
This command will build and run Meilisearch locally on port 7700, so make sure that this port is available.
To run benchmarks on a different commit, just use the usual git command to get back to the desired commit.
#### Without a local benchmark dashboard
To work with the raw results, you can also skip using a local benchmark dashboard.
Run:
```sh
cargo xtask bench --no-dashboard -- workloads/my_workload_1.json workloads/my_workload_2.json ..
```
For processing the results, look at [Looking at benchmark results/Without dashboard](#without-dashboard).
#### Sending a workload by hand
Sometimes you want to visualize the metrics of a worlkoad that comes from a custom report.
It is not quite easy to trick the benchboard in thinking that your report is legitimate but here are the commands you can run to upload your firefox report on a running benchboard.
```bash
# Name this hostname whatever you want
echo '{ "hostname": "the-best-place" }' | xh PUT 'http://127.0.0.1:9001/api/v1/machine'
# You'll receive an UUID from this command that we will call $invocation_uuid
echo '{ "commit": { "sha1": "1234567", "commit_date": "2024-09-05 12:00:12.0 +00:00:00", "message": "A cool message" }, "machine_hostname": "the-best-place", "max_workloads": 1 }' | xh PUT 'http://127.0.0.1:9001/api/v1/invocation'
# Just use UUID from the previous command
# and you'll receive another UUID that we will call $workload_uuid
echo '{ "invocation_uuid": "$invocation_uuid", "name": "toto", "max_runs": 1 }' | xh PUT 'http://127.0.0.1:9001/api/v1/workload'
# And now use your $workload_uuid and the content of your firefox report
# but don't forget to convert your firefox report from JSONLines into an object
echo '{ "workload_uuid": "$workload_uuid", "data": $REPORT_JSON_DATA }' | xh PUT 'http://127.0.0.1:9001/api/v1/run'
```
### In CI
We have dedicated runners to run workloads on CI. Currently, there are three ways of running the CI:
1. Automatically, on every push to `main`.
2. Manually, by clicking the [`Run workflow`](https://github.com/meilisearch/meilisearch/actions/workflows/bench-manual.yml) button and specifying the target reference (tag, commit or branch) as well as one or multiple workloads to run. The workloads must exist in the Meilisearch repository (conventionally, in the [`workloads`](./workloads/) directory) on the target reference. Globbing (e.g., `workloads/*.json`) works.
3. Manually on a PR, by posting a comment containing a `/bench` command, followed by one or multiple workloads to run. Globbing works. The workloads must exist in the Meilisearch repository in the branch of the PR.
```
/bench workloads/movies*.json /hackernews_1M.json
```
## Looking at benchmark results
### On the dashboard
Results are available on the global dashboard used by CI at <https://bench.meilisearch.dev> or on your [local dashboard](#with-a-local-benchmark-dashboard).
The dashboard homepage presents three sections:
1. The latest invocations (a call to `cargo xtask bench`, either local or by CI) with their reason (generally set to some helpful link in CI) and their status.
2. The latest workloads ran on `main`.
3. The latest workloads ran on other references.
By default, the workload shows the total runtime delta with the latest applicable commit on `main`. The latest applicable commit is the latest commit for workload invocations that do not originate on `main`, and the latest previous commit for workload invocations that originate on `main`.
You can explicitly request a detailed comparison by span with the `main` branch, the branch or origin, or any previous commit, by clicking the links at the bottom of the workload invocation.
In the detailed comparison view, the spans are sorted by improvements, regressions, stable (no statistically significant change) and unstable (the span runtime is comparable to its standard deviation).
You can click on the name of any span to get a box plot comparing the target commit with multiple commits of the selected branch.
### Without dashboard
After the workloads are done running, the reports will live in the Meilisearch repository, in the `bench/reports` directory (by default).
You can then convert these reports into other formats.
- To [Firefox profiler](https://profiler.firefox.com) format. Run:
```sh
cd bench/reports
cargo run --release --bin trace-to-firefox -- my_workload_1-0-trace.json
```
You can then upload the resulting `firefox-my_workload_1-0-trace.json` file to the online profiler.
## Designing benchmark workloads
Benchmark workloads conventionally live in the `workloads` directory of the Meilisearch repository.
They are JSON files with the following structure (comments are not actually supported, to make your own, remove them or copy some existing workload file):
```jsonc
{
// Name of the workload. Must be unique to the workload, as it will be used to group results on the dashboard.
"name": "hackernews.ndjson_1M,no-threads",
// Number of consecutive runs of the commands that should be performed.
// Each run uses a fresh instance of Meilisearch and a fresh database.
// Each run produces its own report file.
"run_count": 3,
// List of arguments to add to the Meilisearch command line.
"extra_cli_args": ["--max-indexing-threads=1"],
// An expression that can be parsed as a comma-separated list of targets and levels
// as described in [tracing_subscriber's documentation](https://docs.rs/tracing-subscriber/latest/tracing_subscriber/filter/targets/struct.Targets.html#examples).
// The expression is used to filter the spans that are measured for profiling purposes.
// Optional, defaults to "indexing::=trace" (for indexing workloads), common other values is
// "search::=trace"
"target": "indexing::=trace",
// List of named assets that can be used in the commands.
"assets": {
// name of the asset.
// Must be unique at the workload level.
// For better results, the same asset (same sha256) should have the same name accross workloads.
// Having multiple assets with the same name and distinct hashes is supported accross workloads,
// but will lead to superfluous downloads.
//
// Assets are stored in the `bench/assets/` directory by default.
"hackernews-100_000.ndjson": {
// If the assets exists in the local filesystem (Meilisearch repository or for your local workloads)
// Its file path can be specified here.
// `null` if the asset should be downloaded from a remote location.
"local_location": null,
// URL of the remote location where the asset can be downloaded.
// Use the `--assets-key` of the runner to pass an API key in the `Authorization: Bearer` header of the download requests.
// `null` if the asset should be imported from a local location.
// if both local and remote locations are specified, then the local one is tried first, then the remote one
// if the file is locally missing or its hash differs.
"remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/hackernews/hackernews-100_000.ndjson",
// SHA256 of the asset.
// Optional, the `sha256` of the asset will be displayed during a run of the workload if it is missing.
// If present, the hash of the asset in the `bench/assets/` directory will be compared against this hash before
// running the workload. If the hashes differ, the asset will be downloaded anew.
"sha256": "60ecd23485d560edbd90d9ca31f0e6dba1455422f2a44e402600fbb5f7f1b213",
// Optional, one of "Auto", "Json", "NdJson" or "Raw".
// If missing, assumed to be "Auto".
// If "Auto", the format will be determined from the extension in the asset name.
"format": "NdJson"
},
"hackernews-200_000.ndjson": {
"local_location": null,
"remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/hackernews/hackernews-200_000.ndjson",
"sha256": "785b0271fdb47cba574fab617d5d332276b835c05dd86e4a95251cf7892a1685"
},
"hackernews-300_000.ndjson": {
"local_location": null,
"remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/hackernews/hackernews-300_000.ndjson",
"sha256": "de73c7154652eddfaf69cdc3b2f824d5c452f095f40a20a1c97bb1b5c4d80ab2"
},
"hackernews-400_000.ndjson": {
"local_location": null,
"remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/hackernews/hackernews-400_000.ndjson",
"sha256": "c1b00a24689110f366447e434c201c086d6f456d54ed1c4995894102794d8fe7"
},
"hackernews-500_000.ndjson": {
"local_location": null,
"remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/hackernews/hackernews-500_000.ndjson",
"sha256": "ae98f9dbef8193d750e3e2dbb6a91648941a1edca5f6e82c143e7996f4840083"
},
"hackernews-600_000.ndjson": {
"local_location": null,
"remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/hackernews/hackernews-600_000.ndjson",
"sha256": "b495fdc72c4a944801f786400f22076ab99186bee9699f67cbab2f21f5b74dbe"
},
"hackernews-700_000.ndjson": {
"local_location": null,
"remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/hackernews/hackernews-700_000.ndjson",
"sha256": "4b2c63974f3dabaa4954e3d4598b48324d03c522321ac05b0d583f36cb78a28b"
},
"hackernews-800_000.ndjson": {
"local_location": null,
"remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/hackernews/hackernews-800_000.ndjson",
"sha256": "cb7b6afe0e6caa1be111be256821bc63b0771b2a0e1fad95af7aaeeffd7ba546"
},
"hackernews-900_000.ndjson": {
"local_location": null,
"remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/hackernews/hackernews-900_000.ndjson",
"sha256": "e1154ddcd398f1c867758a93db5bcb21a07b9e55530c188a2917fdef332d3ba9"
},
"hackernews-1_000_000.ndjson": {
"local_location": null,
"remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/hackernews/hackernews-1_000_000.ndjson",
"sha256": "27e25efd0b68b159b8b21350d9af76938710cb29ce0393fa71b41c4f3c630ffe"
}
},
// Core of the workload.
// A list of commands to run sequentially.
// Optional: A precommand is a request to the Meilisearch instance that is executed before the profiling runs.
"precommands": [
{
// Meilisearch route to call. `http://localhost:7700/` will be prepended.
"route": "indexes/movies/settings",
// HTTP method to call.
"method": "PATCH",
// If applicable, body of the request.
// Optional, if missing, the body will be empty.
"body": {
// One of "empty", "inline" or "asset".
// If using "empty", you can skip the entire "body" key.
"inline": {
// when "inline" is used, the body is the JSON object that is the value of the `"inline"` key.
"displayedAttributes": [
"title",
"by",
"score",
"time"
],
"searchableAttributes": [
"title"
],
"filterableAttributes": [
"by"
],
"sortableAttributes": [
"score",
"time"
]
}
},
// Whether to wait before running the next request.
// One of:
// - DontWait: run the next command without waiting the response to this one.
// - WaitForResponse: run the next command as soon as the response from the server is received.
// - WaitForTask: run the next command once **all** the Meilisearch tasks created up to now have finished processing.
"synchronous": "WaitForTask"
}
],
// A command is a request to the Meilisearch instance that is executed while the profiling runs.
"commands": [
{
"route": "indexes/movies/documents",
"method": "POST",
"body": {
// When using "asset", use the name of an asset as value to use the content of that asset as body.
// the content type is derived of the format of the asset:
// "NdJson" => "application/x-ndjson"
// "Json" => "application/json"
// "Raw" => "application/octet-stream"
// See [AssetFormat::to_content_type](https://github.com/meilisearch/meilisearch/blob/7b670a4afadb132ac4a01b6403108700501a391d/xtask/src/bench/assets.rs#L30)
// for details and up-to-date list.
"asset": "hackernews-100_000.ndjson"
},
"synchronous": "WaitForTask"
},
{
"route": "indexes/movies/documents",
"method": "POST",
"body": {
"asset": "hackernews-200_000.ndjson"
},
"synchronous": "WaitForResponse"
},
{
"route": "indexes/movies/documents",
"method": "POST",
"body": {
"asset": "hackernews-300_000.ndjson"
},
"synchronous": "WaitForResponse"
},
{
"route": "indexes/movies/documents",
"method": "POST",
"body": {
"asset": "hackernews-400_000.ndjson"
},
"synchronous": "WaitForResponse"
},
{
"route": "indexes/movies/documents",
"method": "POST",
"body": {
"asset": "hackernews-500_000.ndjson"
},
"synchronous": "WaitForResponse"
},
{
"route": "indexes/movies/documents",
"method": "POST",
"body": {
"asset": "hackernews-600_000.ndjson"
},
"synchronous": "WaitForResponse"
},
{
"route": "indexes/movies/documents",
"method": "POST",
"body": {
"asset": "hackernews-700_000.ndjson"
},
"synchronous": "WaitForResponse"
},
{
"route": "indexes/movies/documents",
"method": "POST",
"body": {
"asset": "hackernews-800_000.ndjson"
},
"synchronous": "WaitForResponse"
},
{
"route": "indexes/movies/documents",
"method": "POST",
"body": {
"asset": "hackernews-900_000.ndjson"
},
"synchronous": "WaitForResponse"
},
{
"route": "indexes/movies/documents",
"method": "POST",
"body": {
"asset": "hackernews-1_000_000.ndjson"
},
"synchronous": "WaitForTask"
}
]
}
```
### Adding new assets
Assets reside in our DigitalOcean S3 space. Assuming you have team access to the DigitalOcean S3 space:
1. go to <https://cloud.digitalocean.com/spaces/milli-benchmarks?i=d1c552&path=bench%2Fdatasets%2F>
2. upload your dataset:
1. if your dataset is a single file, upload that single file using the "upload" button,
2. otherwise, create a folder using the "create folder" button, then inside that folder upload your individual files.
## Upgrading `https://bench.meilisearch.dev`
The URL of the server is in our password manager (look for "benchboard").
1. Make the needed modifications on the [benchboard repository](https://github.com/meilisearch/benchboard) and merge them to main.
2. Publish a new release to produce the Ubuntu/Debian binary.
3. Download the binary locally, send it to the server:
```
scp -6 ~/Downloads/benchboard root@\[<ipv6-address>\]:/bench/new-benchboard
```
Note that the ipv6 must be between escaped square brackets for SCP.
4. SSH to the server:
```
ssh root@<ipv6-address>
```
Note the ipv6 must **NOT** be between escaped square brackets for SSH 🥲
5. On the server, set the correct permissions for the new binary:
```
chown bench:bench /bench/new-benchboard
chmod 700 /bench/new-benchboard
```
6. On the server, move the new binary to the location of the running binary (if unsure, start by making a backup of the running binary):
```
mv /bench/{new-,}benchboard
```
7. Restart the benchboard service.
```
systemctl restart benchboard
```
8. Check that the service runs correctly.
```
systemctl status benchboard
```
9. Check the availability of the service by going to <https://bench.meilisearch.dev> on your browser.

View File

@ -4,7 +4,7 @@ First, thank you for contributing to Meilisearch! The goal of this document is t
Remember that there are many ways to contribute other than writing code: writing [tutorials or blog posts](https://github.com/meilisearch/awesome-meilisearch), improving [the documentation](https://github.com/meilisearch/documentation), submitting [bug reports](https://github.com/meilisearch/meilisearch/issues/new?assignees=&labels=&template=bug_report.md&title=) and [feature requests](https://github.com/meilisearch/product/discussions/categories/feedback-feature-proposal)...
The code in this repository is only concerned with managing multiple indexes, handling the update store, and exposing an HTTP API. Search and indexation are the domain of our core engine, [`milli`](https://github.com/meilisearch/milli), while tokenization is handled by [our `charabia` library](https://github.com/meilisearch/charabia/).
Meilisearch can manage multiple indexes, handle the update store, and expose an HTTP API. Search and indexation are the domain of our core engine, [`milli`](https://github.com/meilisearch/meilisearch/tree/main/milli), while tokenization is handled by [our `charabia` library](https://github.com/meilisearch/charabia/).
If Meilisearch does not offer optimized support for your language, please consider contributing to `charabia` by following the [CONTRIBUTING.md file](https://github.com/meilisearch/charabia/blob/main/CONTRIBUTING.md) and integrating your intended normalizer/segmenter.
@ -18,9 +18,9 @@ If Meilisearch does not offer optimized support for your language, please consid
## Assumptions
1. **You're familiar with [GitHub](https://github.com) and the [Pull Requests](https://help.github.com/en/github/collaborating-with-issues-and-pull-requests/about-pull-requests)(PR) workflow.**
2. **You've read the Meilisearch [documentation](https://docs.meilisearch.com).**
3. **You know about the [Meilisearch community](https://docs.meilisearch.com/learn/what_is_meilisearch/contact.html).
1. **You're familiar with [GitHub](https://github.com) and the [Pull Requests (PR)](https://help.github.com/en/github/collaborating-with-issues-and-pull-requests/about-pull-requests) workflow.**
2. **You've read the Meilisearch [documentation](https://www.meilisearch.com/docs).**
3. **You know about the [Meilisearch community on Discord](https://discord.meilisearch.com).
Please use this for help.**
## How to Contribute
@ -52,12 +52,86 @@ cargo test
This command will be triggered to each PR as a requirement for merging it.
#### Faster build
You can set the `LINDERA_CACHE` environment variable to speed up your successive builds by up to 2 minutes.
It'll store some built artifacts in the directory of your choice.
We recommend using the `$HOME/.cache/meili/lindera` directory:
```sh
export LINDERA_CACHE=$HOME/.cache/meili/lindera
```
You can set the `MILLI_BENCH_DATASETS_PATH` environment variable to further speed up your builds.
It'll store some big files used for the benchmarks in the directory of your choice.
We recommend using the `$HOME/.cache/meili/benches` directory:
```sh
export MILLI_BENCH_DATASETS_PATH=$HOME/.cache/meili/benches
```
Furthermore, you can improve incremental compilation by setting the `MEILI_NO_VERGEN` environment variable.
Setting this variable will prevent the Meilisearch binary from being rebuilt each time the directory that hosts the Meilisearch repository changes.
Do not enable this environment variable for production builds (as it will break the `version` route, among other things).
#### Snapshot-based tests
We are using [insta](https://insta.rs) to perform snapshot-based testing.
We recommend using the insta tooling (such as `cargo-insta`) to update the snapshots if they change following a PR.
New tests should use insta where possible rather than manual `assert` statements.
Furthermore, we provide some macros on top of insta, notably a way to use snapshot hashes instead of inline snapshots, saving a lot of space in the repository.
To effectively debug snapshot-based hashes, we recommend you export the `MEILI_TEST_FULL_SNAPS` environment variable so that snapshot are fully created locally:
```sh
export MEILI_TEST_FULL_SNAPS=true # add this to your .bashrc, .zshrc, ...
```
#### Test troubleshooting
If you get a "Too many open files" error you might want to increase the open file limit using this command:
```bash
ulimit -Sn 3000
```
#### Build tools
Meilisearch follows the [cargo xtask](https://github.com/matklad/cargo-xtask) workflow to provide some build tools.
Run `cargo xtask --help` from the root of the repository to find out what is available.
#### Update the openAPI file if the APIchanged
To update the openAPI file in the code, see [sprint_issue.md](https://github.com/meilisearch/meilisearch/blob/main/.github/ISSUE_TEMPLATE/sprint_issue.md#reminders-when-modifying-the-api).
If you want to update the openAPI file on the [open-api repository](https://github.com/meilisearch/open-api), see [update-openapi-issue.md](https://github.com/meilisearch/engine-team/blob/main/issue-templates/update-openapi-issue.md).
### Logging
Meilisearch uses [`tracing`](https://lib.rs/crates/tracing) for logging purposes. Tracing logs are structured and can be displayed as JSON to the end user, so prefer passing arguments as fields rather than interpolating them in the message.
Refer to the [documentation](https://docs.rs/tracing/0.1.40/tracing/index.html#using-the-macros) for the syntax of the spans and events.
Logging spans are used for 3 distinct purposes:
1. Regular logging
2. Profiling
3. Benchmarking
As a result, the spans should follow some rules:
- They should not be put on functions that are called too often. That is because opening and closing a span causes some overhead. For regular logging, avoid putting spans on functions that are taking less than a few hundred nanoseconds. For profiling or benchmarking, avoid putting spans on functions that are taking less than a few microseconds.
- For profiling and benchmarking, use the `TRACE` level.
- For profiling and benchmarking, use the following `target` prefixes:
- `indexing::` for spans meant when profiling the indexing operations.
- `search::` for spans meant when profiling the search operations.
### Benchmarking
See [BENCHMARKS.md](./BENCHMARKS.md)
## Git Guidelines
### Git Branches
@ -84,7 +158,7 @@ Some notes on GitHub PRs:
- The PR title should be accurate and descriptive of the changes.
- [Convert your PR as a draft](https://help.github.com/en/github/collaborating-with-issues-and-pull-requests/changing-the-stage-of-a-pull-request) if your changes are a work in progress: no one will review it until you pass your PR as ready for review.<br>
The draft PRs are recommended when you want to show that you are working on something and make your work visible.
- The branch related to the PR must be **up-to-date with `main`** before merging. Fortunately, this project uses [Bors](https://github.com/bors-ng/bors-ng) to automatically enforce this requirement without the PR author having to rebase manually.
- The branch related to the PR must be **up-to-date with `main`** before merging. Fortunately, this project uses [GitHub Merge Queues](https://github.blog/news-insights/product-news/github-merge-queue-is-generally-available/) to automatically enforce this requirement without the PR author having to rebase manually.
## Release Process (for internal team only)
@ -92,13 +166,20 @@ Meilisearch tools follow the [Semantic Versioning Convention](https://semver.org
### Automation to rebase and Merge the PRs
This project integrates a bot that helps us manage pull requests merging.<br>
_[Read more about this](https://github.com/meilisearch/integration-guides/blob/main/resources/bors.md)._
This project uses GitHub Merge Queues that helps us manage pull requests merging.
### How to Publish a new Release
The full Meilisearch release process is described in [this guide](https://github.com/meilisearch/engine-team/blob/main/resources/meilisearch-release.md). Please follow it carefully before doing any release.
### How to publish a prototype
Depending on the developed feature, you might need to provide a prototyped version of Meilisearch to make it easier to test by the users.
This happens in two steps:
- [Release the prototype](https://github.com/meilisearch/engine-team/blob/main/resources/prototypes.md#how-to-publish-a-prototype)
- [Communicate about it](https://github.com/meilisearch/engine-team/blob/main/resources/prototypes.md#communication)
### Release assets
For each release, the following assets are created:

6232
Cargo.lock generated

File diff suppressed because it is too large Load Diff

View File

@ -1,21 +1,51 @@
[workspace]
resolver = "2"
members = [
"meilisearch",
"meilisearch-types",
"meilisearch-auth",
"meili-snap",
"index-scheduler",
"dump",
"file-store",
"permissive-json-pointer",
"crates/meilisearch",
"crates/meilitool",
"crates/meilisearch-types",
"crates/meilisearch-auth",
"crates/meili-snap",
"crates/index-scheduler",
"crates/dump",
"crates/file-store",
"crates/permissive-json-pointer",
"crates/milli",
"crates/filter-parser",
"crates/flatten-serde-json",
"crates/json-depth-checker",
"crates/benchmarks",
"crates/fuzzers",
"crates/tracing-trace",
"crates/xtask",
"crates/build-info",
]
[workspace.package]
version = "1.16.0"
authors = [
"Quentin de Quelen <quentin@dequelen.me>",
"Clément Renault <clement@meilisearch.com>",
]
description = "Meilisearch HTTP server"
homepage = "https://meilisearch.com"
readme = "README.md"
edition = "2021"
license = "MIT"
[profile.release]
codegen-units = 1
# We now compile heed without the NDEBUG define for better performance.
# However, we still enable debug assertions for a better detection of
# disk corruption on the cloud or in OSS.
[profile.release.package.heed]
debug-assertions = true
[profile.dev.package.flate2]
opt-level = 3
[profile.dev.package.milli]
[profile.dev.package.grenad]
opt-level = 3
[profile.dev.package.roaring]
opt-level = 3

View File

@ -1,13 +1,14 @@
# Compile
FROM rust:alpine3.16 AS compiler
FROM rust:1.85-alpine3.20 AS compiler
RUN apk add -q --update-cache --no-cache build-base openssl-dev
RUN apk add -q --no-cache build-base openssl-dev
WORKDIR /meilisearch
WORKDIR /
ARG COMMIT_SHA
ARG COMMIT_DATE
ENV VERGEN_GIT_SHA=${COMMIT_SHA} VERGEN_GIT_COMMIT_TIMESTAMP=${COMMIT_DATE}
ARG GIT_TAG
ENV VERGEN_GIT_SHA=${COMMIT_SHA} VERGEN_GIT_COMMIT_TIMESTAMP=${COMMIT_DATE} VERGEN_GIT_DESCRIBE=${GIT_TAG}
ENV RUSTFLAGS="-C target-feature=-crt-static"
COPY . .
@ -16,20 +17,21 @@ RUN set -eux; \
if [ "$apkArch" = "aarch64" ]; then \
export JEMALLOC_SYS_WITH_LG_PAGE=16; \
fi && \
cargo build --release
cargo build --release -p meilisearch -p meilitool
# Run
FROM alpine:3.16
FROM alpine:3.20
LABEL org.opencontainers.image.source="https://github.com/meilisearch/meilisearch"
ENV MEILI_HTTP_ADDR 0.0.0.0:7700
ENV MEILI_SERVER_PROVIDER docker
RUN apk update --quiet \
&& apk add -q --no-cache libgcc tini curl
RUN apk add -q --no-cache libgcc tini curl
# add meilisearch to the `/bin` so you can run it from anywhere and it's easy
# to find.
COPY --from=compiler /meilisearch/target/release/meilisearch /bin/meilisearch
# add meilisearch and meilitool to the `/bin` so you can run it from anywhere
# and it's easy to find.
COPY --from=compiler /target/release/meilisearch /bin/meilisearch
COPY --from=compiler /target/release/meilitool /bin/meilitool
# To stay compatible with the older version of the container (pre v0.27.0) we're
# going to symlink the meilisearch binary in the path to `/meilisearch`
RUN ln -s /bin/meilisearch /meilisearch

View File

@ -1,6 +1,6 @@
MIT License
Copyright (c) 2019-2022 Meili SAS
Copyright (c) 2019-2025 Meili SAS
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal

19
PROFILING.md Normal file
View File

@ -0,0 +1,19 @@
# Profiling Meilisearch
Search engine technologies are complex pieces of software that require thorough profiling tools. We chose to use [Puffin](https://github.com/EmbarkStudios/puffin), which the Rust gaming industry uses extensively. You can export and import the profiling reports using the top bar's _File_ menu options [in Puffin Viewer](https://github.com/embarkstudios/puffin#ui).
![An example profiling with Puffin viewer](assets/profiling-example.png)
## Profiling the Indexing Process
When you enable [the `exportPuffinReports` experimental feature](https://www.meilisearch.com/docs/learn/experimental/overview) of Meilisearch, Puffin reports with the `.puffin` extension will be automatically exported to disk. When this option is enabled, the engine will automatically create a "frame" whenever it executes the `IndexScheduler::tick` method.
[Puffin Viewer](https://github.com/EmbarkStudios/puffin/tree/main/puffin_viewer) is used to analyze the reports. Those reports show areas where Meilisearch spent time during indexing.
Another piece of advice on the Puffin viewer UI interface is to consider the _Merge children with same ID_ option. It can hide the exact actual timings at which events were sent. Please turn it off when you see strange gaps on the Flamegraph. It can help.
## Profiling the Search Process
We still need to take the time to profile the search side of the engine with Puffin. It would require time to profile the filtering phase, query parsing, creation, and execution. We could even profile the Actix HTTP server.
The only issue we see is the framing system. Puffin requires a global frame-based profiling phase, which collides with Meilisearch's ability to accept and answer multiple requests on different threads simultaneously.

103
README.md
View File

@ -1,95 +1,105 @@
<p align="center">
<img src="assets/meilisearch-logo-light.svg?sanitize=true#gh-light-mode-only">
<img src="assets/meilisearch-logo-dark.svg?sanitize=true#gh-dark-mode-only">
<a href="https://www.meilisearch.com/?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=logo#gh-light-mode-only" target="_blank">
<img src="assets/meilisearch-logo-light.svg?sanitize=true#gh-light-mode-only">
</a>
<a href="https://www.meilisearch.com/?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=logo#gh-dark-mode-only" target="_blank">
<img src="assets/meilisearch-logo-dark.svg?sanitize=true#gh-dark-mode-only">
</a>
</p>
<h4 align="center">
<a href="https://www.meilisearch.com">Website</a> |
<a href="https://www.meilisearch.com/?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=nav">Website</a> |
<a href="https://roadmap.meilisearch.com/tabs/1-under-consideration">Roadmap</a> |
<a href="https://blog.meilisearch.com">Blog</a> |
<a href="https://docs.meilisearch.com">Documentation</a> |
<a href="https://docs.meilisearch.com/faq/">FAQ</a> |
<a href="https://discord.meilisearch.com">Discord</a>
<a href="https://www.meilisearch.com/pricing?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=nav">Meilisearch Cloud</a> |
<a href="https://blog.meilisearch.com/?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=nav">Blog</a> |
<a href="https://www.meilisearch.com/docs?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=nav">Documentation</a> |
<a href="https://www.meilisearch.com/docs/faq?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=nav">FAQ</a> |
<a href="https://discord.meilisearch.com/?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=nav">Discord</a>
</h4>
<p align="center">
<a href="https://github.com/meilisearch/meilisearch/actions"><img src="https://github.com/meilisearch/meilisearch/workflows/Cargo%20test/badge.svg" alt="Build Status"></a>
<a href="https://deps.rs/repo/github/meilisearch/meilisearch"><img src="https://deps.rs/repo/github/meilisearch/meilisearch/status.svg" alt="Dependency status"></a>
<a href="https://github.com/meilisearch/meilisearch/blob/main/LICENSE"><img src="https://img.shields.io/badge/license-MIT-informational" alt="License"></a>
<a href="https://app.bors.tech/repositories/26457"><img src="https://bors.tech/images/badge_small.svg" alt="Bors enabled"></a>
<a href="https://github.com/meilisearch/meilisearch/queue"><img alt="Merge Queues enabled" src="https://img.shields.io/badge/Merge_Queues-enabled-%2357cf60?logo=github"></a>
</p>
<p align="center">⚡ A lightning-fast search engine that fits effortlessly into your apps, websites, and workflow 🔍</p>
Meilisearch helps you shape a delightful search experience in a snap, offering features that work out-of-the-box to speed up your workflow.
[Meilisearch](https://www.meilisearch.com?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=intro) helps you shape a delightful search experience in a snap, offering features that work out of the box to speed up your workflow.
<p align="center" name="demo">
<a href="https://where2watch.meilisearch.com/#gh-light-mode-only" target="_blank">
<a href="https://where2watch.meilisearch.com/?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=demo-gif#gh-light-mode-only" target="_blank">
<img src="assets/demo-light.gif#gh-light-mode-only" alt="A bright colored application for finding movies screening near the user">
</a>
<a href="https://where2watch.meilisearch.com/#gh-dark-mode-only" target="_blank">
<a href="https://where2watch.meilisearch.com/?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=demo-gif#gh-dark-mode-only" target="_blank">
<img src="assets/demo-dark.gif#gh-dark-mode-only" alt="A dark colored application for finding movies screening near the user">
</a>
</p>
🔥 [**Try it!**](https://where2watch.meilisearch.com/) 🔥
## 🖥 Examples
- [**Movies**](https://where2watch.meilisearch.com/?utm_campaign=oss&utm_source=github&utm_medium=organization) — An application to help you find streaming platforms to watch movies using [hybrid search](https://www.meilisearch.com/solutions/hybrid-search?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=demos).
- [**Ecommerce**](https://ecommerce.meilisearch.com/?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=demos) — Ecommerce website using disjunctive [facets](https://www.meilisearch.com/docs/learn/fine_tuning_results/faceted_search?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=demos), range and rating filtering, and pagination.
- [**Songs**](https://music.meilisearch.com/?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=demos) — Search through 47 million of songs.
- [**SaaS**](https://saas.meilisearch.com/?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=demos) — Search for contacts, deals, and companies in this [multi-tenant](https://www.meilisearch.com/docs/learn/security/multitenancy_tenant_tokens?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=demos) CRM application.
See the list of all our example apps in our [demos repository](https://github.com/meilisearch/demos).
## ✨ Features
- **Search-as-you-type:** find search results in less than 50 milliseconds
- **[Typo tolerance](https://docs.meilisearch.com/learn/getting_started/customizing_relevancy.html#typo-tolerance):** get relevant matches even when queries contain typos and misspellings
- **[Filtering and faceted search](https://docs.meilisearch.com/learn/advanced/filtering_and_faceted_search.html):** enhance your user's search experience with custom filters and build a faceted search interface in a few lines of code
- **[Sorting](https://docs.meilisearch.com/learn/advanced/sorting.html):** sort results based on price, date, or pretty much anything else your users need
- **[Synonym support](https://docs.meilisearch.com/learn/getting_started/customizing_relevancy.html#synonyms):** configure synonyms to include more relevant content in your search results
- **[Geosearch](https://docs.meilisearch.com/learn/advanced/geosearch.html):** filter and sort documents based on geographic data
- **[Extensive language support](https://docs.meilisearch.com/learn/what_is_meilisearch/language.html):** search datasets in any language, with optimized support for Chinese, Japanese, Hebrew, and languages using the Latin alphabet
- **[Security management](https://docs.meilisearch.com/learn/security/master_api_keys.html):** control which users can access what data with API keys that allow fine-grained permissions handling
- **[Multi-Tenancy](https://docs.meilisearch.com/learn/security/tenant_tokens.html):** personalize search results for any number of application tenants
- **Hybrid search:** Combine the best of both [semantic](https://www.meilisearch.com/docs/learn/experimental/vector_search?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features) & full-text search to get the most relevant results
- **Search-as-you-type:** Find & display results in less than 50 milliseconds to provide an intuitive experience
- **[Typo tolerance](https://www.meilisearch.com/docs/learn/relevancy/typo_tolerance_settings?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** get relevant matches even when queries contain typos and misspellings
- **[Filtering](https://www.meilisearch.com/docs/learn/fine_tuning_results/filtering?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features) and [faceted search](https://www.meilisearch.com/docs/learn/fine_tuning_results/faceted_search?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** enhance your users' search experience with custom filters and build a faceted search interface in a few lines of code
- **[Sorting](https://www.meilisearch.com/docs/learn/fine_tuning_results/sorting?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** sort results based on price, date, or pretty much anything else your users need
- **[Synonym support](https://www.meilisearch.com/docs/learn/relevancy/synonyms?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** configure synonyms to include more relevant content in your search results
- **[Geosearch](https://www.meilisearch.com/docs/learn/fine_tuning_results/geosearch?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** filter and sort documents based on geographic data
- **[Extensive language support](https://www.meilisearch.com/docs/learn/what_is_meilisearch/language?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** search datasets in any language, with optimized support for Chinese, Japanese, Hebrew, and languages using the Latin alphabet
- **[Security management](https://www.meilisearch.com/docs/learn/security/master_api_keys?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** control which users can access what data with API keys that allow fine-grained permissions handling
- **[Multi-Tenancy](https://www.meilisearch.com/docs/learn/security/multitenancy_tenant_tokens?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** personalize search results for any number of application tenants
- **Highly Customizable:** customize Meilisearch to your specific needs or use our out-of-the-box and hassle-free presets
- **[RESTful API](https://docs.meilisearch.com/reference/api/overview.html):** integrate Meilisearch in your technical stack with our plugins and SDKs
- **[RESTful API](https://www.meilisearch.com/docs/reference/api/overview?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** integrate Meilisearch in your technical stack with our plugins and SDKs
- **AI-ready:** works out of the box with [langchain](https://www.meilisearch.com/with/langchain) and the [model context protocol](https://github.com/meilisearch/meilisearch-mcp)
- **Easy to install, deploy, and maintain**
## 📖 Documentation
You can consult Meilisearch's documentation at [https://docs.meilisearch.com](https://docs.meilisearch.com/).
You can consult Meilisearch's documentation at [meilisearch.com/docs](https://www.meilisearch.com/docs/?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=docs).
## 🚀 Getting started
For basic instructions on how to set up Meilisearch, add documents to an index, and search for documents, take a look at our [Quick Start](https://docs.meilisearch.com/learn/getting_started/quick_start.html) guide.
For basic instructions on how to set up Meilisearch, add documents to an index, and search for documents, take a look at our [documentation](https://www.meilisearch.com/docs?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=get-started) guide.
You may also want to check out [Meilisearch 101](https://docs.meilisearch.com/learn/getting_started/filtering_and_sorting.html) for an introduction to some of Meilisearch's most popular features.
## 🌍 Supercharge your Meilisearch experience
## ☁️ Meilisearch cloud
Let us manage your infrastructure so you can focus on integrating a great search experience. Try [Meilisearch Cloud](https://meilisearch.com/pricing) today.
Say goodbye to server deployment and manual updates with [Meilisearch Cloud](https://www.meilisearch.com/cloud?utm_campaign=oss&utm_source=github&utm_medium=meilisearch). Additional features include analytics & monitoring in many regions around the world. No credit card is required.
## 🧰 SDKs & integration tools
Install one of our SDKs in your project for seamless integration between Meilisearch and your favorite language or framework!
Take a look at the complete [Meilisearch integration list](https://docs.meilisearch.com/learn/what_is_meilisearch/sdks.html).
Take a look at the complete [Meilisearch integration list](https://www.meilisearch.com/docs/learn/what_is_meilisearch/sdks?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=sdks-link).
![Logos belonging to different languages and frameworks supported by Meilisearch, including React, Ruby on Rails, Go, Rust, and PHP](assets/integrations.png)
[![Logos belonging to different languages and frameworks supported by Meilisearch, including React, Ruby on Rails, Go, Rust, and PHP](assets/integrations.png)](https://www.meilisearch.com/docs/learn/what_is_meilisearch/sdks?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=sdks-logos)
## ⚙️ Advanced usage
Experienced users will want to keep our [API Reference](https://docs.meilisearch.com/reference/api) close at hand.
Experienced users will want to keep our [API Reference](https://www.meilisearch.com/docs/reference/api/overview?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=advanced) close at hand.
We also offer a wide range of dedicated guides to all Meilisearch features, such as [filtering](https://docs.meilisearch.com/learn/advanced/filtering_and_faceted_search.html), [sorting](https://docs.meilisearch.com/learn/advanced/sorting.html), [geosearch](https://docs.meilisearch.com/learn/advanced/geosearch.html), [API keys](https://docs.meilisearch.com/learn/security/master_api_keys.html), and [tenant tokens](https://docs.meilisearch.com/learn/security/tenant_tokens.html).
We also offer a wide range of dedicated guides to all Meilisearch features, such as [filtering](https://www.meilisearch.com/docs/learn/fine_tuning_results/filtering?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=advanced), [sorting](https://www.meilisearch.com/docs/learn/fine_tuning_results/sorting?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=advanced), [geosearch](https://www.meilisearch.com/docs/learn/fine_tuning_results/geosearch?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=advanced), [API keys](https://www.meilisearch.com/docs/learn/security/master_api_keys?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=advanced), and [tenant tokens](https://www.meilisearch.com/docs/learn/security/tenant_tokens?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=advanced).
Finally, for more in-depth information, refer to our articles explaining fundamental Meilisearch concepts such as [documents](https://docs.meilisearch.com/learn/core_concepts/documents.html) and [indexes](https://docs.meilisearch.com/learn/core_concepts/indexes.html).
Finally, for more in-depth information, refer to our articles explaining fundamental Meilisearch concepts such as [documents](https://www.meilisearch.com/docs/learn/core_concepts/documents?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=advanced) and [indexes](https://www.meilisearch.com/docs/learn/core_concepts/indexes?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=advanced).
## 📊 Telemetry
Meilisearch collects **anonymized** data from users to help us improve our product. You can [deactivate this](https://docs.meilisearch.com/learn/what_is_meilisearch/telemetry.html#how-to-disable-data-collection) whenever you want.
Meilisearch collects **anonymized** user data to help us improve our product. You can [deactivate this](https://www.meilisearch.com/docs/learn/what_is_meilisearch/telemetry?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=telemetry#how-to-disable-data-collection) whenever you want.
To request deletion of collected data, please write to us at [privacy@meilisearch.com](mailto:privacy@meilisearch.com). Don't forget to include your `Instance UID` in the message, as this helps us quickly find and delete your data.
To request deletion of collected data, please write to us at [privacy@meilisearch.com](mailto:privacy@meilisearch.com). Remember to include your `Instance UID` in the message, as this helps us quickly find and delete your data.
If you want to know more about the kind of data we collect and what we use it for, check the [telemetry section](https://docs.meilisearch.com/learn/what_is_meilisearch/telemetry.html) of our documentation.
If you want to know more about the kind of data we collect and what we use it for, check the [telemetry section](https://www.meilisearch.com/docs/learn/what_is_meilisearch/telemetry?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=telemetry#how-to-disable-data-collection) of our documentation.
## 📫 Get in touch!
Meilisearch is a search engine created by [Meili](https://www.welcometothejungle.com/en/companies/meilisearch), a software development company based in France and with team members all over the world. Want to know more about us? [Check out our blog!](https://blog.meilisearch.com/)
Meilisearch is a search engine created by [Meili](https://www.meilisearch.com/careers), a software development company headquartered in France and with team members all over the world. Want to know more about us? [Check out our blog!](https://blog.meilisearch.com/?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=contact)
🗞 [Subscribe to our newsletter](https://meilisearch.us2.list-manage.com/subscribe?u=27870f7b71c908a8b359599fb&id=79582d828e) if you don't want to miss any updates! We promise we won't clutter your mailbox: we only send one edition every two months.
@ -97,7 +107,18 @@ Meilisearch is a search engine created by [Meili](https://www.welcometothejungle
- For feature requests, please visit our [product repository](https://github.com/meilisearch/product/discussions)
- Found a bug? Open an [issue](https://github.com/meilisearch/meilisearch/issues)!
- Want to be part of our Discord community? [Join us!](https://discord.gg/meilisearch)
- For everything else, please check [this page listing some of the other places where you can find us](https://docs.meilisearch.com/learn/what_is_meilisearch/contact.html)
- Want to be part of our Discord community? [Join us!](https://discord.meilisearch.com/?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=contact)
Thank you for your support!
## 👩‍💻 Contributing
Meilisearch is, and will always be, open-source! If you want to contribute to the project, please look at [our contribution guidelines](CONTRIBUTING.md).
## 📦 Versioning
Meilisearch releases and their associated binaries are available on the project's [releases page](https://github.com/meilisearch/meilisearch/releases).
The binaries are versioned following [SemVer conventions](https://semver.org/). To know more, read our [versioning policy](https://github.com/meilisearch/engine-team/blob/main/resources/versioning-policy.md).
Differently from the binaries, crates in this repository are not currently available on [crates.io](https://crates.io/) and do not follow [SemVer conventions](https://semver.org).

File diff suppressed because it is too large Load Diff

6
assets/milli-logo.svg Normal file
View File

@ -0,0 +1,6 @@
<svg width="277" height="236" viewBox="0 0 277 236" fill="none" xmlns="http://www.w3.org/2000/svg">
<path fill-rule="evenodd" clip-rule="evenodd" d="M213.085 190L242.907 86H276.196L246.375 190H213.085Z" fill="#494949"/>
<path fill-rule="evenodd" clip-rule="evenodd" d="M0 190L29.8215 86H63.1111L33.2896 190H0Z" fill="#494949"/>
<path fill-rule="evenodd" clip-rule="evenodd" d="M124.986 0L57.5772 235.083L60.7752 236H90.6038L158.276 0H124.986Z" fill="#494949"/>
<path fill-rule="evenodd" clip-rule="evenodd" d="M195.273 0L127.601 236H160.891L228.563 0H195.273Z" fill="#494949"/>
</svg>

After

Width:  |  Height:  |  Size: 585 B

BIN
assets/ph-banner.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 578 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.2 MiB

View File

@ -0,0 +1,19 @@
global:
scrape_interval: 15s # By default, scrape targets every 15 seconds.
# Attach these labels to any time series or alerts when communicating with
# external systems (federation, remote storage, Alertmanager).
external_labels:
monitor: 'codelab-monitor'
# A scrape configuration containing exactly one endpoint to scrape:
# Here it's Prometheus itself.
scrape_configs:
# The job name is added as a label `job=<job_name>` to any timeseries scraped from this config.
- job_name: 'meilisearch'
# Override the global default and scrape targets from this job every 5 seconds.
scrape_interval: 5s
static_configs:
- targets: ['localhost:7700']

View File

@ -1,11 +0,0 @@
status = [
'Tests on ubuntu-18.04',
'Tests on macos-12',
'Tests on windows-2022',
'Run Clippy',
'Run Rustfmt',
'Run tests in debug',
]
pr_status = ['Milestone Check']
# 3 hours timeout
timeout-sec = 10800

View File

@ -1,135 +1,134 @@
# This file shows the default configuration of Meilisearch.
# All variables are defined here: https://docs.meilisearch.com/learn/configuration/instance_options.html#environment-variables
# All variables are defined here: https://www.meilisearch.com/docs/learn/configuration/instance_options#environment-variables
db_path = "./data.ms"
# Designates the location where database files will be created and retrieved.
# https://docs.meilisearch.com/learn/configuration/instance_options.html#database-path
# https://www.meilisearch.com/docs/learn/configuration/instance_options#database-path
db_path = "./data.ms"
env = "development"
# Configures the instance's environment. Value must be either `production` or `development`.
# https://docs.meilisearch.com/learn/configuration/instance_options.html#environment
# https://www.meilisearch.com/docs/learn/configuration/instance_options#environment
env = "development"
http_addr = "localhost:7700"
# The address on which the HTTP server will listen.
http_addr = "localhost:7700"
# master_key = "YOUR_MASTER_KEY_VALUE"
# Sets the instance's master key, automatically protecting all routes except GET /health.
# https://docs.meilisearch.com/learn/configuration/instance_options.html#master-key
# https://www.meilisearch.com/docs/learn/configuration/instance_options#master-key
# master_key = "YOUR_MASTER_KEY_VALUE"
# no_analytics = true
# Deactivates Meilisearch's built-in telemetry when provided.
# Meilisearch automatically collects data from all instances that do not opt out using this flag.
# All gathered data is used solely for the purpose of improving Meilisearch, and can be deleted at any time.
# https://docs.meilisearch.com/learn/configuration/instance_options.html#disable-analytics
# https://www.meilisearch.com/docs/learn/configuration/instance_options#disable-analytics
# no_analytics = true
http_payload_size_limit = "100 MB"
# Sets the maximum size of accepted payloads.
# https://docs.meilisearch.com/learn/configuration/instance_options.html#payload-limit-size
# https://www.meilisearch.com/docs/learn/configuration/instance_options#payload-limit-size
http_payload_size_limit = "100 MB"
log_level = "INFO"
# Defines how much detail should be present in Meilisearch's logs.
# Meilisearch currently supports five log levels, listed in order of increasing verbosity: `ERROR`, `WARN`, `INFO`, `DEBUG`, `TRACE`
# https://docs.meilisearch.com/learn/configuration/instance_options.html#log-level
# Meilisearch currently supports six log levels, listed in order of increasing verbosity: `OFF`, `ERROR`, `WARN`, `INFO`, `DEBUG`, `TRACE`
# https://www.meilisearch.com/docs/learn/configuration/instance_options#log-level
log_level = "INFO"
max_index_size = "100 GiB"
# Sets the maximum size of the index.
# https://docs.meilisearch.com/learn/configuration/instance_options.html#max-index-size
max_task_db_size = "100 GiB"
# Sets the maximum size of the task database.
# https://docs.meilisearch.com/learn/configuration/instance_options.html#max-task-db-size
# max_indexing_memory = "2 GiB"
# Sets the maximum amount of RAM Meilisearch can use when indexing.
# https://docs.meilisearch.com/learn/configuration/instance_options.html#max-indexing-memory
# https://www.meilisearch.com/docs/learn/configuration/instance_options#max-indexing-memory
# max_indexing_memory = "2 GiB"
# max_indexing_threads = 4
# Sets the maximum number of threads Meilisearch can use during indexing.
# https://docs.meilisearch.com/learn/configuration/instance_options.html#max-indexing-threads
disable_auto_batching = false
# Deactivates auto-batching when provided.
# https://docs.meilisearch.com/learn/configuration/instance_options.html#disable-auto-batching
# https://www.meilisearch.com/docs/learn/configuration/instance_options#max-indexing-threads
# max_indexing_threads = 4
#############
### DUMPS ###
#############
dump_dir = "dumps/"
# Sets the directory where Meilisearch will create dump files.
# https://docs.meilisearch.com/learn/configuration/instance_options.html#dumps-destination
# https://www.meilisearch.com/docs/learn/configuration/instance_options#dump-directory
dump_dir = "dumps/"
# import_dump = "./path/to/my/file.dump"
# Imports the dump file located at the specified path. Path must point to a .dump file.
# https://docs.meilisearch.com/learn/configuration/instance_options.html#import-dump
# https://www.meilisearch.com/docs/learn/configuration/instance_options#import-dump
# import_dump = "./path/to/my/file.dump"
ignore_missing_dump = false
# Prevents Meilisearch from throwing an error when `import_dump` does not point to a valid dump file.
# https://docs.meilisearch.com/learn/configuration/instance_options.html#ignore-missing-dump
# https://www.meilisearch.com/docs/learn/configuration/instance_options#ignore-missing-dump
ignore_missing_dump = false
ignore_dump_if_db_exists = false
# Prevents a Meilisearch instance with an existing database from throwing an error when using `import_dump`.
# https://docs.meilisearch.com/learn/configuration/instance_options.html#ignore-dump-if-db-exists
# https://www.meilisearch.com/docs/learn/configuration/instance_options#ignore-dump-if-db-exists
ignore_dump_if_db_exists = false
#################
### SNAPSHOTS ###
#################
# Enables scheduled snapshots when true, disable when false (the default).
# If the value is given as an integer, then enables the scheduled snapshot with the passed value as the interval
# between each snapshot, in seconds.
# https://www.meilisearch.com/docs/learn/configuration/instance_options#schedule-snapshot-creation
schedule_snapshot = false
# Activates scheduled snapshots when provided.
# https://docs.meilisearch.com/learn/configuration/instance_options.html#schedule-snapshot-creation
snapshot_dir = "snapshots/"
# Sets the directory where Meilisearch will store snapshots.
# https://docs.meilisearch.com/learn/configuration/instance_options.html#snapshot-destination
# https://www.meilisearch.com/docs/learn/configuration/instance_options#snapshot-destination
snapshot_dir = "snapshots/"
snapshot_interval_sec = 86400
# Defines the interval between each snapshot. Value must be given in seconds.
# https://docs.meilisearch.com/learn/configuration/instance_options.html#snapshot-interval
# import_snapshot = "./path/to/my/snapshot"
# Launches Meilisearch after importing a previously-generated snapshot at the given filepath.
# https://docs.meilisearch.com/learn/configuration/instance_options.html#import-snapshot
# https://www.meilisearch.com/docs/learn/configuration/instance_options#import-snapshot
# import_snapshot = "./path/to/my/snapshot"
ignore_missing_snapshot = false
# Prevents a Meilisearch instance from throwing an error when `import_snapshot` does not point to a valid snapshot file.
# https://docs.meilisearch.com/learn/configuration/instance_options.html#ignore-missing-snapshot
# https://www.meilisearch.com/docs/learn/configuration/instance_options#ignore-missing-snapshot
ignore_missing_snapshot = false
ignore_snapshot_if_db_exists = false
# Prevents a Meilisearch instance with an existing database from throwing an error when using `import_snapshot`.
# https://docs.meilisearch.com/learn/configuration/instance_options.html#ignore-snapshot-if-db-exists
# https://www.meilisearch.com/docs/learn/configuration/instance_options#ignore-snapshot-if-db-exists
ignore_snapshot_if_db_exists = false
###########
### SSL ###
###########
# ssl_auth_path = "./path/to/root"
# Enables client authentication in the specified path.
# https://docs.meilisearch.com/learn/configuration/instance_options.html#ssl-authentication-path
# https://www.meilisearch.com/docs/learn/configuration/instance_options#ssl-authentication-path
# ssl_auth_path = "./path/to/root"
# ssl_cert_path = "./path/to/certfile"
# Sets the server's SSL certificates.
# https://docs.meilisearch.com/learn/configuration/instance_options.html#ssl-certificates-path
# https://www.meilisearch.com/docs/learn/configuration/instance_options#ssl-certificates-path
# ssl_cert_path = "./path/to/certfile"
# ssl_key_path = "./path/to/private-key"
# Sets the server's SSL key files.
# https://docs.meilisearch.com/learn/configuration/instance_options.html#ssl-key-path
# https://www.meilisearch.com/docs/learn/configuration/instance_options#ssl-key-path
# ssl_key_path = "./path/to/private-key"
# ssl_ocsp_path = "./path/to/ocsp-file"
# Sets the server's OCSP file.
# https://docs.meilisearch.com/learn/configuration/instance_options.html#ssl-ocsp-path
# https://www.meilisearch.com/docs/learn/configuration/instance_options#ssl-ocsp-path
# ssl_ocsp_path = "./path/to/ocsp-file"
ssl_require_auth = false
# Makes SSL authentication mandatory.
# https://docs.meilisearch.com/learn/configuration/instance_options.html#ssl-require-auth
# https://www.meilisearch.com/docs/learn/configuration/instance_options#ssl-require-auth
ssl_require_auth = false
ssl_resumption = false
# Activates SSL session resumption.
# https://docs.meilisearch.com/learn/configuration/instance_options.html#ssl-resumption
# https://www.meilisearch.com/docs/learn/configuration/instance_options#ssl-resumption
ssl_resumption = false
ssl_tickets = false
# Activates SSL tickets.
# https://docs.meilisearch.com/learn/configuration/instance_options.html#ssl-tickets
# https://www.meilisearch.com/docs/learn/configuration/instance_options#ssl-tickets
ssl_tickets = false
#############################
### Experimental features ###
#############################
# Experimental metrics feature. For more information, see: <https://github.com/meilisearch/meilisearch/discussions/3518>
# Enables the Prometheus metrics on the `GET /metrics` endpoint.
experimental_enable_metrics = false
# Experimental RAM reduction during indexing, do not use in production, see: <https://github.com/meilisearch/product/discussions/652>
experimental_reduce_indexing_memory_usage = false
# Experimentally reduces the maximum number of tasks that will be processed at once, see: <https://github.com/orgs/meilisearch/discussions/713>
# experimental_max_number_of_batched_tasks = 100

1
crates/benchmarks/.gitignore vendored Normal file
View File

@ -0,0 +1 @@
benches/datasets_paths.rs

View File

@ -0,0 +1,53 @@
[package]
name = "benchmarks"
publish = false
version.workspace = true
authors.workspace = true
description.workspace = true
homepage.workspace = true
readme.workspace = true
edition.workspace = true
license.workspace = true
[dependencies]
anyhow = "1.0.98"
bumpalo = "3.18.1"
csv = "1.3.1"
memmap2 = "0.9.5"
milli = { path = "../milli" }
mimalloc = { version = "0.1.47", default-features = false }
serde_json = { version = "1.0.140", features = ["preserve_order"] }
tempfile = "3.20.0"
[dev-dependencies]
criterion = { version = "0.6.0", features = ["html_reports"] }
rand = "0.8.5"
rand_chacha = "0.3.1"
roaring = "0.10.12"
[build-dependencies]
anyhow = "1.0.98"
bytes = "1.10.1"
convert_case = "0.8.0"
flate2 = "1.1.2"
reqwest = { version = "0.12.20", features = ["blocking", "rustls-tls"], default-features = false }
[features]
default = ["milli/all-tokenizations"]
[[bench]]
name = "search_songs"
harness = false
[[bench]]
name = "search_wiki"
harness = false
[[bench]]
name = "search_geo"
harness = false
[[bench]]
name = "indexing"
harness = false

138
crates/benchmarks/README.md Normal file
View File

@ -0,0 +1,138 @@
Benchmarks
==========
## TOC
- [Run the benchmarks](#run-the-benchmarks)
- [Comparison between benchmarks](#comparison-between-benchmarks)
- [Datasets](#datasets)
## Run the benchmarks
### On our private server
The Meili team has self-hosted his own GitHub runner to run benchmarks on our dedicated bare metal server.
To trigger the benchmark workflow:
- Go to the `Actions` tab of this repository.
- Select the `Benchmarks` workflow on the left.
- Click on `Run workflow` in the blue banner.
- Select the branch on which you want to run the benchmarks and select the dataset you want (default: `songs`).
- Finally, click on `Run workflow`.
This GitHub workflow will run the benchmarks and push the `critcmp` report to a DigitalOcean Space (= S3).
The name of the uploaded file is displayed in the workflow.
_[More about critcmp](https://github.com/BurntSushi/critcmp)._
💡 To compare the just-uploaded benchmark with another one, check out the [next section](#comparison-between-benchmarks).
### On your machine
To run all the benchmarks (~5h):
```bash
cargo bench
```
To run only the `search_songs` (~1h), `search_wiki` (~3h), `search_geo` (~20m) or `indexing` (~2h) benchmark:
```bash
cargo bench --bench <dataset name>
```
By default, the benchmarks will be downloaded and uncompressed automatically in the target directory.<br>
If you don't want to download the datasets every time you update something on the code, you can specify a custom directory with the environment variable `MILLI_BENCH_DATASETS_PATH`:
```bash
mkdir ~/datasets
MILLI_BENCH_DATASETS_PATH=~/datasets cargo bench --bench search_songs # the four datasets are downloaded
touch build.rs
MILLI_BENCH_DATASETS_PATH=~/datasets cargo bench --bench songs # the code is compiled again but the datasets are not downloaded
```
## Comparison between benchmarks
The benchmark reports we push are generated with `critcmp`. Thus, we use `critcmp` to show the result of a benchmark, or compare results between multiple benchmarks.
We provide a script to download and display the comparison report.
Requirements:
- `grep`
- `curl`
- [`critcmp`](https://github.com/BurntSushi/critcmp)
List the available file in the DO Space:
```bash
./benchmarks/script/list.sh
```
```bash
songs_main_09a4321.json
songs_geosearch_24ec456.json
search_songs_main_cb45a10b.json
```
Run the comparison script:
```bash
# we get the result of ONE benchmark, this give you an idea of how much time an operation took
./benchmarks/scripts/compare.sh son songs_geosearch_24ec456.json
# we compare two benchmarks
./benchmarks/scripts/compare.sh songs_main_09a4321.json songs_geosearch_24ec456.json
# we compare three benchmarks
./benchmarks/scripts/compare.sh songs_main_09a4321.json songs_geosearch_24ec456.json search_songs_main_cb45a10b.json
```
## Datasets
The benchmarks uses the following datasets:
- `smol-songs`
- `smol-wiki`
- `movies`
- `smol-all-countries`
### Songs
`smol-songs` is a subset of the [`songs.csv` dataset](https://milli-benchmarks.fra1.digitaloceanspaces.com/datasets/songs.csv.gz).
It was generated with this command:
```bash
xsv sample --seed 42 1000000 songs.csv -o smol-songs.csv
```
_[Download the generated `smol-songs` dataset](https://milli-benchmarks.fra1.digitaloceanspaces.com/datasets/smol-songs.csv.gz)._
### Wiki
`smol-wiki` is a subset of the [`wikipedia-articles.csv` dataset](https://milli-benchmarks.fra1.digitaloceanspaces.com/datasets/wiki-articles.csv.gz).
It was generated with the following command:
```bash
xsv sample --seed 42 500000 wiki-articles.csv -o smol-wiki-articles.csv
```
_[Download the `smol-wiki` dataset](https://milli-benchmarks.fra1.digitaloceanspaces.com/datasets/smol-wiki-articles.csv.gz)._
### Movies
`movies` is a really small dataset we uses as our example in the [getting started](https://www.meilisearch.com/docs/learn/getting_started/quick_start)
_[Download the `movies` dataset](https://www.meilisearch.com/movies.json)._
### All Countries
`smol-all-countries` is a subset of the [`all-countries.csv` dataset](https://milli-benchmarks.fra1.digitaloceanspaces.com/datasets/all-countries.csv.gz)
It has been converted to jsonlines and then edited so it matches our format for the `_geo` field.
It was generated with the following command:
```bash
bat all-countries.csv.gz | gunzip | xsv sample --seed 42 1000000 | csv2json-lite | sd '"latitude":"(.*?)","longitude":"(.*?)"' '"_geo": { "lat": $1, "lng": $2 }' | sd '\[|\]|,$' '' | gzip > smol-all-countries.jsonl.gz
```
_[Download the `smol-all-countries` dataset](https://milli-benchmarks.fra1.digitaloceanspaces.com/datasets/smol-all-countries.jsonl.gz)._

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,126 @@
mod datasets_paths;
mod utils;
use criterion::{criterion_group, criterion_main};
use milli::update::Settings;
use milli::FilterableAttributesRule;
use utils::Conf;
#[cfg(not(windows))]
#[global_allocator]
static ALLOC: mimalloc::MiMalloc = mimalloc::MiMalloc;
fn base_conf(builder: &mut Settings) {
let displayed_fields =
["geonameid", "name", "asciiname", "alternatenames", "_geo", "population"]
.iter()
.map(|s| s.to_string())
.collect();
builder.set_displayed_fields(displayed_fields);
let searchable_fields =
["name", "alternatenames", "elevation"].iter().map(|s| s.to_string()).collect();
builder.set_searchable_fields(searchable_fields);
let filterable_fields = ["_geo", "population", "elevation"]
.iter()
.map(|s| FilterableAttributesRule::Field(s.to_string()))
.collect();
builder.set_filterable_fields(filterable_fields);
let sortable_fields =
["_geo", "population", "elevation"].iter().map(|s| s.to_string()).collect();
builder.set_sortable_fields(sortable_fields);
}
#[rustfmt::skip]
const BASE_CONF: Conf = Conf {
dataset: datasets_paths::SMOL_ALL_COUNTRIES,
dataset_format: "jsonl",
queries: &[
"",
],
configure: base_conf,
primary_key: Some("geonameid"),
..Conf::BASE
};
fn bench_geo(c: &mut criterion::Criterion) {
#[rustfmt::skip]
let confs = &[
// A basic placeholder with no geo
utils::Conf {
group_name: "placeholder with no geo",
..BASE_CONF
},
// Medium aglomeration: probably the most common usecase
utils::Conf {
group_name: "asc sort from Lille",
sort: Some(vec!["_geoPoint(50.62999333378238, 3.086269263384099):asc"]),
..BASE_CONF
},
utils::Conf {
group_name: "desc sort from Lille",
sort: Some(vec!["_geoPoint(50.62999333378238, 3.086269263384099):desc"]),
..BASE_CONF
},
// Big agglomeration: a lot of documents close to our point
utils::Conf {
group_name: "asc sort from Tokyo",
sort: Some(vec!["_geoPoint(35.749512532692144, 139.61664952543356):asc"]),
..BASE_CONF
},
utils::Conf {
group_name: "desc sort from Tokyo",
sort: Some(vec!["_geoPoint(35.749512532692144, 139.61664952543356):desc"]),
..BASE_CONF
},
// The furthest point from any civilization
utils::Conf {
group_name: "asc sort from Point Nemo",
sort: Some(vec!["_geoPoint(-48.87561645055408, -123.39275749319793):asc"]),
..BASE_CONF
},
utils::Conf {
group_name: "desc sort from Point Nemo",
sort: Some(vec!["_geoPoint(-48.87561645055408, -123.39275749319793):desc"]),
..BASE_CONF
},
// Filters
utils::Conf {
group_name: "filter of 100km from Lille",
filter: Some("_geoRadius(50.62999333378238, 3.086269263384099, 100000)"),
..BASE_CONF
},
utils::Conf {
group_name: "filter of 1km from Lille",
filter: Some("_geoRadius(50.62999333378238, 3.086269263384099, 1000)"),
..BASE_CONF
},
utils::Conf {
group_name: "filter of 100km from Tokyo",
filter: Some("_geoRadius(35.749512532692144, 139.61664952543356, 100000)"),
..BASE_CONF
},
utils::Conf {
group_name: "filter of 1km from Tokyo",
filter: Some("_geoRadius(35.749512532692144, 139.61664952543356, 1000)"),
..BASE_CONF
},
utils::Conf {
group_name: "filter of 100km from Point Nemo",
filter: Some("_geoRadius(-48.87561645055408, -123.39275749319793, 100000)"),
..BASE_CONF
},
utils::Conf {
group_name: "filter of 1km from Point Nemo",
filter: Some("_geoRadius(-48.87561645055408, -123.39275749319793, 1000)"),
..BASE_CONF
},
];
utils::run_benches(c, confs);
}
criterion_group!(benches, bench_geo);
criterion_main!(benches);

View File

@ -0,0 +1,198 @@
mod datasets_paths;
mod utils;
use criterion::{criterion_group, criterion_main};
use milli::update::Settings;
use milli::FilterableAttributesRule;
use utils::Conf;
#[cfg(not(windows))]
#[global_allocator]
static ALLOC: mimalloc::MiMalloc = mimalloc::MiMalloc;
fn base_conf(builder: &mut Settings) {
let displayed_fields =
["id", "title", "album", "artist", "genre", "country", "released", "duration"]
.iter()
.map(|s| s.to_string())
.collect();
builder.set_displayed_fields(displayed_fields);
let searchable_fields = ["title", "album", "artist"].iter().map(|s| s.to_string()).collect();
builder.set_searchable_fields(searchable_fields);
let faceted_fields = ["released-timestamp", "duration-float", "genre", "country", "artist"]
.iter()
.map(|s| FilterableAttributesRule::Field(s.to_string()))
.collect();
builder.set_filterable_fields(faceted_fields);
}
#[rustfmt::skip]
const BASE_CONF: Conf = Conf {
dataset: datasets_paths::SMOL_SONGS,
queries: &[
"john ", // 9097
"david ", // 4794
"charles ", // 1957
"david bowie ", // 1200
"michael jackson ", // 600
"thelonious monk ", // 303
"charles mingus ", // 142
"marcus miller ", // 60
"tamo ", // 13
"Notstandskomitee ", // 4
],
configure: base_conf,
primary_key: Some("id"),
..Conf::BASE
};
fn bench_songs(c: &mut criterion::Criterion) {
let default_criterion: Vec<String> =
milli::default_criteria().iter().map(|criteria| criteria.to_string()).collect();
let default_criterion = default_criterion.iter().map(|s| s.as_str());
let asc_default: Vec<&str> =
std::iter::once("released-timestamp:asc").chain(default_criterion.clone()).collect();
let desc_default: Vec<&str> =
std::iter::once("released-timestamp:desc").chain(default_criterion.clone()).collect();
let basic_with_quote: Vec<String> = BASE_CONF
.queries
.iter()
.map(|s| {
s.trim().split(' ').map(|s| format!(r#""{}""#, s)).collect::<Vec<String>>().join(" ")
})
.collect();
let basic_with_quote: &[&str] =
&basic_with_quote.iter().map(|s| s.as_str()).collect::<Vec<&str>>();
#[rustfmt::skip]
let confs = &[
/* first we bench each criterion alone */
utils::Conf {
group_name: "proximity",
queries: &[
"black saint sinner lady ",
"les dangeureuses 1960 ",
"The Disneyland Sing-Along Chorus ",
"Under Great Northern Lights ",
"7000 Danses Un Jour Dans Notre Vie ",
],
criterion: Some(&["proximity"]),
optional_words: false,
..BASE_CONF
},
utils::Conf {
group_name: "typo",
queries: &[
"mongus ",
"thelonius monk ",
"Disnaylande ",
"the white striper ",
"indochie ",
"indochien ",
"klub des loopers ",
"fear of the duck ",
"michel depech ",
"stromal ",
"dire straights ",
"Arethla Franklin ",
],
criterion: Some(&["typo"]),
optional_words: false,
..BASE_CONF
},
utils::Conf {
group_name: "words",
queries: &[
"the black saint and the sinner lady and the good doggo ", // four words to pop
"les liaisons dangeureuses 1793 ", // one word to pop
"The Disneyland Children's Sing-Alone song ", // two words to pop
"seven nation mummy ", // one word to pop
"7000 Danses / Le Baiser / je me trompe de mots ", // four words to pop
"Bring Your Daughter To The Slaughter but now this is not part of the title ", // nine words to pop
"whathavenotnsuchforth and a good amount of words to pop to match the first one ", // 13
],
criterion: Some(&["words"]),
..BASE_CONF
},
utils::Conf {
group_name: "asc",
criterion: Some(&["released-timestamp:desc"]),
..BASE_CONF
},
utils::Conf {
group_name: "desc",
criterion: Some(&["released-timestamp:desc"]),
..BASE_CONF
},
/* then we bench the asc and desc criterion on top of the default criterion */
utils::Conf {
group_name: "asc + default",
criterion: Some(&asc_default[..]),
..BASE_CONF
},
utils::Conf {
group_name: "desc + default",
criterion: Some(&desc_default[..]),
..BASE_CONF
},
/* we bench the filters with the default request */
utils::Conf {
group_name: "basic filter: <=",
filter: Some("released-timestamp <= 946728000"), // year 2000
..BASE_CONF
},
utils::Conf {
group_name: "basic filter: TO",
filter: Some("released-timestamp 946728000 TO 1262347200"), // year 2000 to 2010
..BASE_CONF
},
utils::Conf {
group_name: "big filter",
filter: Some("released-timestamp != 1262347200 AND (NOT (released-timestamp = 946728000)) AND (duration-float = 1 OR (duration-float 1.1 TO 1.5 AND released-timestamp > 315576000))"),
..BASE_CONF
},
/* the we bench some global / normal search with all the default criterion in the default
* order */
utils::Conf {
group_name: "basic placeholder",
queries: &[""],
..BASE_CONF
},
utils::Conf {
group_name: "basic without quote",
queries: &BASE_CONF
.queries
.iter()
.map(|s| s.trim()) // we remove the space at the end of each request
.collect::<Vec<&str>>(),
..BASE_CONF
},
utils::Conf {
group_name: "basic with quote",
queries: basic_with_quote,
..BASE_CONF
},
utils::Conf {
group_name: "prefix search",
queries: &[
"s", // 500k+ results
"a", //
"b", //
"i", //
"x", // only 7k results
],
..BASE_CONF
},
];
utils::run_benches(c, confs);
}
criterion_group!(benches, bench_songs);
criterion_main!(benches);

View File

@ -0,0 +1,130 @@
mod datasets_paths;
mod utils;
use criterion::{criterion_group, criterion_main};
use milli::update::Settings;
use utils::Conf;
#[cfg(not(windows))]
#[global_allocator]
static ALLOC: mimalloc::MiMalloc = mimalloc::MiMalloc;
fn base_conf(builder: &mut Settings) {
let displayed_fields = ["title", "body", "url"].iter().map(|s| s.to_string()).collect();
builder.set_displayed_fields(displayed_fields);
let searchable_fields = ["title", "body"].iter().map(|s| s.to_string()).collect();
builder.set_searchable_fields(searchable_fields);
}
#[rustfmt::skip]
const BASE_CONF: Conf = Conf {
dataset: datasets_paths::SMOL_WIKI_ARTICLES,
queries: &[
"mingus ", // 46 candidates
"miles davis ", // 159
"rock and roll ", // 1007
"machine ", // 3448
"spain ", // 7002
"japan ", // 10.593
"france ", // 17.616
"film ", // 24.959
],
configure: base_conf,
..Conf::BASE
};
fn bench_songs(c: &mut criterion::Criterion) {
let basic_with_quote: Vec<String> = BASE_CONF
.queries
.iter()
.map(|s| {
s.trim().split(' ').map(|s| format!(r#""{}""#, s)).collect::<Vec<String>>().join(" ")
})
.collect();
let basic_with_quote: &[&str] =
&basic_with_quote.iter().map(|s| s.as_str()).collect::<Vec<&str>>();
#[rustfmt::skip]
let confs = &[
/* first we bench each criterion alone */
utils::Conf {
group_name: "proximity",
queries: &[
"herald sings ",
"april paris ",
"tea two ",
"diesel engine ",
],
criterion: Some(&["proximity"]),
optional_words: false,
..BASE_CONF
},
utils::Conf {
group_name: "typo",
queries: &[
"migrosoft ",
"linax ",
"Disnaylande ",
"phytogropher ",
"nympalidea ",
"aritmetric ",
"the fronce ",
"sisan ",
],
criterion: Some(&["typo"]),
optional_words: false,
..BASE_CONF
},
utils::Conf {
group_name: "words",
queries: &[
"the black saint and the sinner lady and the good doggo ", // four words to pop, 27 results
"Kameya Tokujirō mingus monk ", // two words to pop, 55
"Ulrich Hensel meilisearch milli ", // two words to pop, 306
"Idaho Bellevue pizza ", // one word to pop, 800
"Abraham machin ", // one word to pop, 1141
],
criterion: Some(&["words"]),
..BASE_CONF
},
/* the we bench some global / normal search with all the default criterion in the default
* order */
utils::Conf {
group_name: "basic placeholder",
queries: &[""],
..BASE_CONF
},
utils::Conf {
group_name: "basic without quote",
queries: &BASE_CONF
.queries
.iter()
.map(|s| s.trim()) // we remove the space at the end of each request
.collect::<Vec<&str>>(),
..BASE_CONF
},
utils::Conf {
group_name: "basic with quote",
queries: basic_with_quote,
..BASE_CONF
},
utils::Conf {
group_name: "prefix search",
queries: &[
"t", // 453k results
"c", // 405k
"g", // 318k
"j", // 227k
"q", // 71k
"x", // 17k
],
..BASE_CONF
},
];
utils::run_benches(c, confs);
}
criterion_group!(benches, bench_songs);
criterion_main!(benches);

View File

@ -0,0 +1,337 @@
#![allow(dead_code)]
use std::fs::{create_dir_all, remove_dir_all, File};
use std::io::{self, BufReader, BufWriter, Read};
use std::path::Path;
use std::str::FromStr as _;
use anyhow::Context;
use bumpalo::Bump;
use criterion::BenchmarkId;
use memmap2::Mmap;
use milli::heed::EnvOpenOptions;
use milli::progress::Progress;
use milli::update::new::indexer;
use milli::update::{IndexerConfig, Settings};
use milli::vector::RuntimeEmbedders;
use milli::{Criterion, Filter, Index, Object, TermsMatchingStrategy};
use serde_json::Value;
pub struct Conf<'a> {
/// where we are going to create our database.mmdb directory
/// each benchmark will first try to delete it and then recreate it
pub database_name: &'a str,
/// the dataset to be used, it must be an uncompressed csv
pub dataset: &'a str,
/// The format of the dataset
pub dataset_format: &'a str,
pub group_name: &'a str,
pub queries: &'a [&'a str],
/// here you can change which criterion are used and in which order.
/// - if you specify something all the base configuration will be thrown out
/// - if you don't specify anything (None) the default configuration will be kept
pub criterion: Option<&'a [&'a str]>,
/// the last chance to configure your database as you want
pub configure: fn(&mut Settings),
pub filter: Option<&'a str>,
pub sort: Option<Vec<&'a str>>,
/// enable or disable the optional words on the query
pub optional_words: bool,
/// primary key, if there is None we'll auto-generate docids for every documents
pub primary_key: Option<&'a str>,
}
impl Conf<'_> {
pub const BASE: Self = Conf {
database_name: "benches.mmdb",
dataset_format: "csv",
dataset: "",
group_name: "",
queries: &[],
criterion: None,
configure: |_| (),
filter: None,
sort: None,
optional_words: true,
primary_key: None,
};
}
pub fn base_setup(conf: &Conf) -> Index {
match remove_dir_all(conf.database_name) {
Ok(_) => (),
Err(e) if e.kind() == std::io::ErrorKind::NotFound => (),
Err(e) => panic!("{}", e),
}
create_dir_all(conf.database_name).unwrap();
let options = EnvOpenOptions::new();
let mut options = options.read_txn_without_tls();
options.map_size(100 * 1024 * 1024 * 1024); // 100 GB
options.max_readers(100);
let index = Index::new(options, conf.database_name, true).unwrap();
let config = IndexerConfig::default();
let mut wtxn = index.write_txn().unwrap();
let mut builder = Settings::new(&mut wtxn, &index, &config);
if let Some(primary_key) = conf.primary_key {
builder.set_primary_key(primary_key.to_string());
}
if let Some(criterion) = conf.criterion {
builder.reset_filterable_fields();
builder.reset_criteria();
builder.reset_stop_words();
let criterion = criterion.iter().map(|s| Criterion::from_str(s).unwrap()).collect();
builder.set_criteria(criterion);
}
(conf.configure)(&mut builder);
builder.execute(&|| false, &Progress::default(), Default::default()).unwrap();
wtxn.commit().unwrap();
let config = IndexerConfig::default();
let mut wtxn = index.write_txn().unwrap();
let rtxn = index.read_txn().unwrap();
let db_fields_ids_map = index.fields_ids_map(&rtxn).unwrap();
let mut new_fields_ids_map = db_fields_ids_map.clone();
let documents = documents_from(conf.dataset, conf.dataset_format);
let mut indexer = indexer::DocumentOperation::new();
indexer.replace_documents(&documents).unwrap();
let indexer_alloc = Bump::new();
let (document_changes, _operation_stats, primary_key) = indexer
.into_changes(
&indexer_alloc,
&index,
&rtxn,
None,
&mut new_fields_ids_map,
&|| false,
Progress::default(),
)
.unwrap();
indexer::index(
&mut wtxn,
&index,
&milli::ThreadPoolNoAbortBuilder::new().build().unwrap(),
config.grenad_parameters(),
&db_fields_ids_map,
new_fields_ids_map,
primary_key,
&document_changes,
RuntimeEmbedders::default(),
&|| false,
&Progress::default(),
&Default::default(),
)
.unwrap();
wtxn.commit().unwrap();
drop(rtxn);
index
}
pub fn run_benches(c: &mut criterion::Criterion, confs: &[Conf]) {
for conf in confs {
let index = base_setup(conf);
let file_name = Path::new(conf.dataset).file_name().and_then(|f| f.to_str()).unwrap();
let name = format!("{}: {}", file_name, conf.group_name);
let mut group = c.benchmark_group(&name);
for &query in conf.queries {
group.bench_with_input(BenchmarkId::from_parameter(query), &query, |b, &query| {
b.iter(|| {
let rtxn = index.read_txn().unwrap();
let mut search = index.search(&rtxn);
search.query(query).terms_matching_strategy(TermsMatchingStrategy::default());
if let Some(filter) = conf.filter {
let filter = Filter::from_str(filter).unwrap().unwrap();
search.filter(filter);
}
if let Some(sort) = &conf.sort {
let sort = sort.iter().map(|sort| sort.parse().unwrap()).collect();
search.sort_criteria(sort);
}
let _ids = search.execute().unwrap();
});
});
}
group.finish();
index.prepare_for_closing().wait();
}
}
pub fn documents_from(filename: &str, filetype: &str) -> Mmap {
let file = File::open(filename)
.unwrap_or_else(|_| panic!("could not find the dataset in: {filename}"));
match filetype {
"csv" => documents_from_csv(file).unwrap(),
"json" => documents_from_json(file).unwrap(),
"jsonl" => documents_from_jsonl(file).unwrap(),
otherwise => panic!("invalid update format {otherwise:?}"),
}
}
fn documents_from_jsonl(file: File) -> anyhow::Result<Mmap> {
unsafe { Mmap::map(&file).map_err(Into::into) }
}
fn documents_from_json(file: File) -> anyhow::Result<Mmap> {
let reader = BufReader::new(file);
let documents: Vec<milli::Object> = serde_json::from_reader(reader)?;
let mut output = tempfile::tempfile().map(BufWriter::new)?;
for document in documents {
serde_json::to_writer(&mut output, &document)?;
}
let file = output.into_inner()?;
unsafe { Mmap::map(&file).map_err(Into::into) }
}
fn documents_from_csv(file: File) -> anyhow::Result<Mmap> {
let output = tempfile::tempfile()?;
let mut output = BufWriter::new(output);
let mut reader = csv::ReaderBuilder::new().from_reader(file);
let headers = reader.headers().context("while retrieving headers")?.clone();
let typed_fields: Vec<_> = headers.iter().map(parse_csv_header).collect();
let mut object: serde_json::Map<_, _> =
typed_fields.iter().map(|(k, _)| (k.to_string(), Value::Null)).collect();
let mut line = 0;
let mut record = csv::StringRecord::new();
while reader.read_record(&mut record).context("while reading a record")? {
// We increment here and not at the end of the loop
// to take the header offset into account.
line += 1;
// Reset the document values
object.iter_mut().for_each(|(_, v)| *v = Value::Null);
for (i, (name, atype)) in typed_fields.iter().enumerate() {
let value = &record[i];
let trimmed_value = value.trim();
let value = match atype {
AllowedType::Number if trimmed_value.is_empty() => Value::Null,
AllowedType::Number => {
match trimmed_value.parse::<i64>() {
Ok(integer) => Value::from(integer),
Err(_) => match trimmed_value.parse::<f64>() {
Ok(float) => Value::from(float),
Err(error) => {
anyhow::bail!("document format error on line {line}: {error}. For value: {value}")
}
},
}
}
AllowedType::Boolean if trimmed_value.is_empty() => Value::Null,
AllowedType::Boolean => match trimmed_value.parse::<bool>() {
Ok(bool) => Value::from(bool),
Err(error) => {
anyhow::bail!(
"document format error on line {line}: {error}. For value: {value}"
)
}
},
AllowedType::String if value.is_empty() => Value::Null,
AllowedType::String => Value::from(value),
};
*object.get_mut(name).expect("encountered an unknown field") = value;
}
serde_json::to_writer(&mut output, &object).context("while writing to disk")?;
}
let output = output.into_inner()?;
unsafe { Mmap::map(&output).map_err(Into::into) }
}
enum AllowedType {
String,
Boolean,
Number,
}
fn parse_csv_header(header: &str) -> (String, AllowedType) {
// if there are several separators we only split on the last one.
match header.rsplit_once(':') {
Some((field_name, field_type)) => match field_type {
"string" => (field_name.to_string(), AllowedType::String),
"boolean" => (field_name.to_string(), AllowedType::Boolean),
"number" => (field_name.to_string(), AllowedType::Number),
// if the pattern isn't recognized, we keep the whole field.
_otherwise => (header.to_string(), AllowedType::String),
},
None => (header.to_string(), AllowedType::String),
}
}
struct CSVDocumentDeserializer<R>
where
R: Read,
{
documents: csv::StringRecordsIntoIter<R>,
headers: Vec<(String, AllowedType)>,
}
impl<R: Read> CSVDocumentDeserializer<R> {
fn from_reader(reader: R) -> io::Result<Self> {
let mut records = csv::Reader::from_reader(reader);
let headers = records.headers()?.into_iter().map(parse_csv_header).collect();
Ok(Self { documents: records.into_records(), headers })
}
}
impl<R: Read> Iterator for CSVDocumentDeserializer<R> {
type Item = anyhow::Result<Object>;
fn next(&mut self) -> Option<Self::Item> {
let csv_document = self.documents.next()?;
match csv_document {
Ok(csv_document) => {
let mut document = Object::new();
for ((field_name, field_type), value) in
self.headers.iter().zip(csv_document.into_iter())
{
let parsed_value: anyhow::Result<Value> = match field_type {
AllowedType::Number => {
value.parse::<f64>().map(Value::from).map_err(Into::into)
}
AllowedType::Boolean => {
value.parse::<bool>().map(Value::from).map_err(Into::into)
}
AllowedType::String => Ok(Value::String(value.to_string())),
};
match parsed_value {
Ok(value) => drop(document.insert(field_name.to_string(), value)),
Err(_e) => {
return Some(Err(anyhow::anyhow!(
"Value '{}' is not a valid number",
value
)))
}
}
}
Some(Ok(document))
}
Err(e) => Some(Err(anyhow::anyhow!("Error parsing csv document: {}", e))),
}
}
}

115
crates/benchmarks/build.rs Normal file
View File

@ -0,0 +1,115 @@
use std::fs::File;
use std::io::{Cursor, Read, Seek, Write};
use std::path::{Path, PathBuf};
use std::{env, fs};
use bytes::Bytes;
use convert_case::{Case, Casing};
use flate2::read::GzDecoder;
use reqwest::IntoUrl;
const BASE_URL: &str = "https://milli-benchmarks.fra1.digitaloceanspaces.com/datasets";
const DATASET_SONGS: (&str, &str) = ("smol-songs", "csv");
const DATASET_SONGS_1_2: (&str, &str) = ("smol-songs-1_2", "csv");
const DATASET_SONGS_3_4: (&str, &str) = ("smol-songs-3_4", "csv");
const DATASET_SONGS_4_4: (&str, &str) = ("smol-songs-4_4", "csv");
const DATASET_WIKI: (&str, &str) = ("smol-wiki-articles", "csv");
const DATASET_WIKI_1_2: (&str, &str) = ("smol-wiki-articles-1_2", "csv");
const DATASET_WIKI_3_4: (&str, &str) = ("smol-wiki-articles-3_4", "csv");
const DATASET_WIKI_4_4: (&str, &str) = ("smol-wiki-articles-4_4", "csv");
const DATASET_MOVIES: (&str, &str) = ("movies", "json");
const DATASET_MOVIES_1_2: (&str, &str) = ("movies-1_2", "json");
const DATASET_MOVIES_3_4: (&str, &str) = ("movies-3_4", "json");
const DATASET_MOVIES_4_4: (&str, &str) = ("movies-4_4", "json");
const DATASET_NESTED_MOVIES: (&str, &str) = ("nested_movies", "json");
const DATASET_GEO: (&str, &str) = ("smol-all-countries", "jsonl");
const ALL_DATASETS: &[(&str, &str)] = &[
DATASET_SONGS,
DATASET_SONGS_1_2,
DATASET_SONGS_3_4,
DATASET_SONGS_4_4,
DATASET_WIKI,
DATASET_WIKI_1_2,
DATASET_WIKI_3_4,
DATASET_WIKI_4_4,
DATASET_MOVIES,
DATASET_MOVIES_1_2,
DATASET_MOVIES_3_4,
DATASET_MOVIES_4_4,
DATASET_NESTED_MOVIES,
DATASET_GEO,
];
/// The name of the environment variable used to select the path
/// of the directory containing the datasets
const BASE_DATASETS_PATH_KEY: &str = "MILLI_BENCH_DATASETS_PATH";
fn main() -> anyhow::Result<()> {
let out_dir = PathBuf::from(env::var(BASE_DATASETS_PATH_KEY).unwrap_or(env::var("OUT_DIR")?));
let benches_dir = PathBuf::from(env::var("CARGO_MANIFEST_DIR")?).join("benches");
let mut manifest_paths_file = File::create(benches_dir.join("datasets_paths.rs"))?;
write!(
manifest_paths_file,
r#"//! This file is generated by the build script.
//! Do not modify by hand, use the build.rs file.
#![allow(dead_code)]
"#
)?;
writeln!(manifest_paths_file)?;
for (dataset, extension) in ALL_DATASETS {
let out_path = out_dir.join(dataset);
let out_file = out_path.with_extension(extension);
writeln!(
&mut manifest_paths_file,
r#"pub const {}: &str = {:?};"#,
dataset.to_case(Case::UpperSnake),
out_file.display(),
)?;
if out_file.exists() {
eprintln!(
"The dataset {} already exists on the file system and will not be downloaded again",
out_path.display(),
);
continue;
}
let url = format!("{}/{}.{}.gz", BASE_URL, dataset, extension);
eprintln!("downloading: {}", url);
let bytes = retry(|| download_dataset(url.clone()), 10)?;
eprintln!("{} downloaded successfully", url);
eprintln!("uncompressing in {}", out_file.display());
uncompress_in_file(bytes, &out_file)?;
}
Ok(())
}
fn retry<Ok, Err>(fun: impl Fn() -> Result<Ok, Err>, times: usize) -> Result<Ok, Err> {
for _ in 0..times {
if let ok @ Ok(_) = fun() {
return ok;
}
}
fun()
}
fn download_dataset<U: IntoUrl>(url: U) -> anyhow::Result<Cursor<Bytes>> {
let bytes =
reqwest::blocking::Client::builder().timeout(None).build()?.get(url).send()?.bytes()?;
Ok(Cursor::new(bytes))
}
fn uncompress_in_file<R: Read + Seek, P: AsRef<Path>>(bytes: R, path: P) -> anyhow::Result<()> {
let path = path.as_ref();
let mut gz = GzDecoder::new(bytes);
let mut dataset = Vec::new();
gz.read_to_end(&mut dataset)?;
fs::write(path, dataset)?;
Ok(())
}

View File

@ -0,0 +1,38 @@
#!/usr/bin/env bash
# Requirements:
# - critcmp. See: https://github.com/BurntSushi/critcmp
# - curl
# Usage
# $ bash compare.sh json_file1 json_file1
# ex: bash compare.sh songs_main_09a4321.json songs_geosearch_24ec456.json
# Checking that critcmp is installed
command -v critcmp > /dev/null 2>&1
if [[ "$?" -ne 0 ]]; then
echo 'You must install critcmp to make this script work.'
echo 'See: https://github.com/BurntSushi/critcmp'
echo ' $ cargo install critcmp'
exit 1
fi
s3_url='https://milli-benchmarks.fra1.digitaloceanspaces.com/critcmp_results'
for file in $@
do
file_s3_url="$s3_url/$file"
file_local_path="/tmp/$file"
if [[ ! -f $file_local_path ]]; then
curl $file_s3_url --output $file_local_path --silent
if [[ "$?" -ne 0 ]]; then
echo 'curl command failed.'
exit 1
fi
fi
done
path_list=$(echo " $@" | sed 's/ / \/tmp\//g')
critcmp $path_list

View File

@ -0,0 +1,14 @@
#!/usr/bin/env bash
# Requirements:
# - curl
# - grep
res=$(curl -s https://milli-benchmarks.fra1.digitaloceanspaces.com | grep -o '<Key>[^<]\+' | cut -c 5- | grep critcmp_results/ | cut -c 18-)
for pattern in "$@"
do
res=$(echo "$res" | grep $pattern)
done
echo "$res"

View File

@ -0,0 +1,5 @@
//! This library is only used to isolate the benchmarks
//! from the original milli library.
//!
//! It does not include interesting functions for milli library
//! users only for milli contributors.

View File

@ -0,0 +1,18 @@
[package]
name = "build-info"
version.workspace = true
authors.workspace = true
description.workspace = true
homepage.workspace = true
readme.workspace = true
edition.workspace = true
license.workspace = true
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]
time = { version = "0.3.41", features = ["parsing"] }
[build-dependencies]
anyhow = "1.0.98"
vergen-git2 = "1.0.7"

View File

@ -0,0 +1,29 @@
fn main() {
if let Err(err) = emit_git_variables() {
println!("cargo:warning=vergen: {}", err);
}
}
fn emit_git_variables() -> anyhow::Result<()> {
println!("cargo::rerun-if-env-changed=MEILI_NO_VERGEN");
let has_vergen =
!matches!(std::env::var_os("MEILI_NO_VERGEN"), Some(x) if x != "false" && x != "0");
anyhow::ensure!(has_vergen, "disabled via `MEILI_NO_VERGEN`");
// Note: any code that needs VERGEN_ environment variables should take care to define them manually in the Dockerfile and pass them
// in the corresponding GitHub workflow (publish_docker.yml).
// This is due to the Dockerfile building the binary outside of the git directory.
let mut builder = vergen_git2::Git2Builder::default();
builder.branch(true);
builder.commit_timestamp(true);
builder.commit_message(true);
builder.describe(true, true, None);
builder.sha(false);
let git2 = builder.build()?;
vergen_git2::Emitter::default().fail_on_error().add_instructions(&git2)?.emit()
}

View File

@ -0,0 +1,203 @@
use time::format_description::well_known::Iso8601;
#[derive(Debug, Clone)]
pub struct BuildInfo {
pub branch: Option<&'static str>,
pub describe: Option<DescribeResult>,
pub commit_sha1: Option<&'static str>,
pub commit_msg: Option<&'static str>,
pub commit_timestamp: Option<time::OffsetDateTime>,
}
impl BuildInfo {
pub fn from_build() -> Self {
let branch: Option<&'static str> = option_env!("VERGEN_GIT_BRANCH");
let describe = DescribeResult::from_build();
let commit_sha1 = option_env!("VERGEN_GIT_SHA");
let commit_msg = option_env!("VERGEN_GIT_COMMIT_MESSAGE");
let commit_timestamp = option_env!("VERGEN_GIT_COMMIT_TIMESTAMP");
let commit_timestamp = commit_timestamp.and_then(|commit_timestamp| {
time::OffsetDateTime::parse(commit_timestamp, &Iso8601::DEFAULT).ok()
});
Self { branch, describe, commit_sha1, commit_msg, commit_timestamp }
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub enum DescribeResult {
Prototype { name: &'static str },
Release { version: &'static str, major: u64, minor: u64, patch: u64 },
Prerelease { version: &'static str, major: u64, minor: u64, patch: u64, rc: u64 },
NotATag { describe: &'static str },
}
impl DescribeResult {
pub fn new(describe: &'static str) -> Self {
if let Some(name) = prototype_name(describe) {
Self::Prototype { name }
} else if let Some(release) = release_version(describe) {
release
} else if let Some(prerelease) = prerelease_version(describe) {
prerelease
} else {
Self::NotATag { describe }
}
}
pub fn from_build() -> Option<Self> {
let describe: &'static str = option_env!("VERGEN_GIT_DESCRIBE")?;
Some(Self::new(describe))
}
pub fn as_tag(&self) -> Option<&'static str> {
match self {
DescribeResult::Prototype { name } => Some(name),
DescribeResult::Release { version, .. } => Some(version),
DescribeResult::Prerelease { version, .. } => Some(version),
DescribeResult::NotATag { describe: _ } => None,
}
}
pub fn as_prototype(&self) -> Option<&'static str> {
match self {
DescribeResult::Prototype { name } => Some(name),
DescribeResult::Release { .. }
| DescribeResult::Prerelease { .. }
| DescribeResult::NotATag { .. } => None,
}
}
}
/// Parses the input as a prototype name.
///
/// Returns `Some(prototype_name)` if the following conditions are met on this value:
///
/// 1. starts with `prototype-`,
/// 2. ends with `-<some_number>`,
/// 3. does not end with `<some_number>-<some_number>`.
///
/// Otherwise, returns `None`.
fn prototype_name(describe: &'static str) -> Option<&'static str> {
if !describe.starts_with("prototype-") {
return None;
}
let mut rsplit_prototype = describe.rsplit('-');
// last component MUST be a number
rsplit_prototype.next()?.parse::<u64>().ok()?;
// before than last component SHALL NOT be a number
rsplit_prototype.next()?.parse::<u64>().err()?;
Some(describe)
}
fn release_version(describe: &'static str) -> Option<DescribeResult> {
if !describe.starts_with('v') {
return None;
}
// full release version don't contain a `-`
if describe.contains('-') {
return None;
}
// full release version parse as vX.Y.Z, with X, Y, Z numbers.
let mut dots = describe[1..].split('.');
let major: u64 = dots.next()?.parse().ok()?;
let minor: u64 = dots.next()?.parse().ok()?;
let patch: u64 = dots.next()?.parse().ok()?;
if dots.next().is_some() {
return None;
}
Some(DescribeResult::Release { version: describe, major, minor, patch })
}
fn prerelease_version(describe: &'static str) -> Option<DescribeResult> {
// prerelease version is in the shape vM.N.P-rc.C
let mut hyphen = describe.rsplit('-');
let prerelease = hyphen.next()?;
if !prerelease.starts_with("rc.") {
return None;
}
let rc: u64 = prerelease[3..].parse().ok()?;
let release = hyphen.next()?;
let DescribeResult::Release { version: _, major, minor, patch } = release_version(release)?
else {
return None;
};
Some(DescribeResult::Prerelease { version: describe, major, minor, patch, rc })
}
#[cfg(test)]
mod test {
use super::DescribeResult;
fn assert_not_a_tag(describe: &'static str) {
assert_eq!(DescribeResult::NotATag { describe }, DescribeResult::new(describe))
}
fn assert_proto(describe: &'static str) {
assert_eq!(DescribeResult::Prototype { name: describe }, DescribeResult::new(describe))
}
fn assert_release(describe: &'static str, major: u64, minor: u64, patch: u64) {
assert_eq!(
DescribeResult::Release { version: describe, major, minor, patch },
DescribeResult::new(describe)
)
}
fn assert_prerelease(describe: &'static str, major: u64, minor: u64, patch: u64, rc: u64) {
assert_eq!(
DescribeResult::Prerelease { version: describe, major, minor, patch, rc },
DescribeResult::new(describe)
)
}
#[test]
fn not_a_tag() {
assert_not_a_tag("whatever-fuzzy");
assert_not_a_tag("whatever-fuzzy-5-ggg-dirty");
assert_not_a_tag("whatever-fuzzy-120-ggg-dirty");
// technically a tag, but not a proto nor a version, so not parsed as a tag
assert_not_a_tag("whatever");
// dirty version
assert_not_a_tag("v1.7.0-1-ggga-dirty");
assert_not_a_tag("v1.7.0-rc.1-1-ggga-dirty");
// after version
assert_not_a_tag("v1.7.0-1-ggga");
assert_not_a_tag("v1.7.0-rc.1-1-ggga");
// after proto
assert_not_a_tag("protoype-tag-0-1-ggga");
assert_not_a_tag("protoype-tag-0-1-ggga-dirty");
}
#[test]
fn prototype() {
assert_proto("prototype-tag-0");
assert_proto("prototype-tag-10");
assert_proto("prototype-long-name-tag-10");
}
#[test]
fn release() {
assert_release("v1.7.2", 1, 7, 2);
}
#[test]
fn prerelease() {
assert_prerelease("v1.7.2-rc.3", 1, 7, 2, 3);
}
}

34
crates/dump/Cargo.toml Normal file
View File

@ -0,0 +1,34 @@
[package]
name = "dump"
publish = false
version.workspace = true
authors.workspace = true
description.workspace = true
edition.workspace = true
homepage.workspace = true
readme.workspace = true
license.workspace = true
[dependencies]
anyhow = "1.0.98"
flate2 = "1.1.2"
http = "1.3.1"
meilisearch-types = { path = "../meilisearch-types" }
once_cell = "1.21.3"
regex = "1.11.1"
roaring = { version = "0.10.12", features = ["serde"] }
serde = { version = "1.0.219", features = ["derive"] }
serde_json = { version = "1.0.140", features = ["preserve_order"] }
tar = "0.4.44"
tempfile = "3.20.0"
thiserror = "2.0.12"
time = { version = "0.3.41", features = ["serde-well-known", "formatting", "parsing", "macros"] }
tracing = "0.1.41"
uuid = { version = "1.17.0", features = ["serde", "v4"] }
[dev-dependencies]
big_s = "1.0.2"
maplit = "1.0.2"
meili-snap = { path = "../meili-snap" }
meilisearch-types = { path = "../meilisearch-types" }

View File

@ -10,8 +10,10 @@ dump
├── instance-uid.uuid
├── keys.jsonl
├── metadata.json
── tasks
├── update_files
│ └── [task_id].jsonl
── tasks
├── update_files
│ └── [task_id].jsonl
│ └── queue.jsonl
└── batches
└── queue.jsonl
```
```

View File

@ -1,11 +1,17 @@
#![allow(clippy::type_complexity)]
#![allow(clippy::wrong_self_convention)]
use std::collections::BTreeMap;
use meilisearch_types::batches::BatchId;
use meilisearch_types::byte_unit::Byte;
use meilisearch_types::error::ResponseError;
use meilisearch_types::keys::Key;
use meilisearch_types::milli::update::IndexDocumentsMethod;
use meilisearch_types::settings::Unchecked;
use meilisearch_types::tasks::{Details, IndexSwap, KindWithContent, Status, Task, TaskId};
use meilisearch_types::tasks::{
Details, ExportIndexSettings, IndexSwap, KindWithContent, Status, Task, TaskId,
};
use meilisearch_types::InstanceUid;
use roaring::RoaringBitmap;
use serde::{Deserialize, Serialize};
@ -57,6 +63,9 @@ pub enum Version {
#[serde(rename_all = "camelCase")]
pub struct TaskDump {
pub uid: TaskId,
// The batch ID were introduced in v1.12, everything prior to this version will be `None`.
#[serde(default)]
pub batch_uid: Option<BatchId>,
#[serde(default)]
pub index_uid: Option<String>,
pub status: Status,
@ -101,6 +110,14 @@ pub enum KindDump {
documents_ids: Vec<String>,
},
DocumentClear,
DocumentDeletionByFilter {
filter: serde_json::Value,
},
DocumentEdition {
filter: Option<serde_json::Value>,
context: Option<serde_json::Map<String, serde_json::Value>>,
function: String,
},
Settings {
settings: Box<meilisearch_types::settings::Settings<Unchecked>>,
is_deletion: bool,
@ -129,12 +146,22 @@ pub enum KindDump {
instance_uid: Option<InstanceUid>,
},
SnapshotCreation,
Export {
url: String,
api_key: Option<String>,
payload_size: Option<Byte>,
indexes: BTreeMap<String, ExportIndexSettings>,
},
UpgradeDatabase {
from: (u32, u32, u32),
},
}
impl From<Task> for TaskDump {
fn from(task: Task) -> Self {
TaskDump {
uid: task.uid,
batch_uid: task.batch_uid,
index_uid: task.index_uid().map(|uid| uid.to_string()),
status: task.status,
kind: task.kind.into(),
@ -166,6 +193,12 @@ impl From<KindWithContent> for KindDump {
KindWithContent::DocumentDeletion { documents_ids, .. } => {
KindDump::DocumentDeletion { documents_ids }
}
KindWithContent::DocumentDeletionByFilter { filter_expr, .. } => {
KindDump::DocumentDeletionByFilter { filter: filter_expr }
}
KindWithContent::DocumentEdition { filter_expr, context, function, .. } => {
KindDump::DocumentEdition { filter: filter_expr, context, function }
}
KindWithContent::DocumentClear { .. } => KindDump::DocumentClear,
KindWithContent::SettingsUpdate {
new_settings,
@ -191,6 +224,18 @@ impl From<KindWithContent> for KindDump {
KindDump::DumpCreation { keys, instance_uid }
}
KindWithContent::SnapshotCreation => KindDump::SnapshotCreation,
KindWithContent::Export { url, api_key, payload_size, indexes } => KindDump::Export {
url,
api_key,
payload_size,
indexes: indexes
.into_iter()
.map(|(pattern, settings)| (pattern.to_string(), settings))
.collect(),
},
KindWithContent::UpgradeDatabase { from: version } => {
KindDump::UpgradeDatabase { from: version }
}
}
}
}
@ -198,18 +243,21 @@ impl From<KindWithContent> for KindDump {
#[cfg(test)]
pub(crate) mod test {
use std::fs::File;
use std::io::{Seek, SeekFrom};
use std::io::Seek;
use std::str::FromStr;
use big_s::S;
use maplit::btreeset;
use meilisearch_types::index_uid::IndexUid;
use maplit::{btreemap, btreeset};
use meilisearch_types::batches::{Batch, BatchEnqueuedAt, BatchStats};
use meilisearch_types::facet_values_sort::FacetValuesSort;
use meilisearch_types::features::{Network, Remote, RuntimeTogglableFeatures};
use meilisearch_types::index_uid_pattern::IndexUidPattern;
use meilisearch_types::keys::{Action, Key};
use meilisearch_types::milli::update::Setting;
use meilisearch_types::milli::{self};
use meilisearch_types::settings::{Checked, Settings};
use meilisearch_types::star_or::StarOr;
use meilisearch_types::tasks::{Details, Status};
use meilisearch_types::milli::{self, FilterableAttributesRule};
use meilisearch_types::settings::{Checked, FacetingSettings, Settings};
use meilisearch_types::task_view::DetailsView;
use meilisearch_types::tasks::{BatchStopReason, Details, Kind, Status};
use serde_json::{json, Map, Value};
use time::macros::datetime;
use uuid::Uuid;
@ -249,27 +297,75 @@ pub(crate) mod test {
pub fn create_test_settings() -> Settings<Checked> {
let settings = Settings {
displayed_attributes: Setting::Set(vec![S("race"), S("name")]),
searchable_attributes: Setting::Set(vec![S("name"), S("race")]),
filterable_attributes: Setting::Set(btreeset! { S("race"), S("age") }),
displayed_attributes: Setting::Set(vec![S("race"), S("name")]).into(),
searchable_attributes: Setting::Set(vec![S("name"), S("race")]).into(),
filterable_attributes: Setting::Set(vec![
FilterableAttributesRule::Field(S("race")),
FilterableAttributesRule::Field(S("age")),
]),
sortable_attributes: Setting::Set(btreeset! { S("age") }),
ranking_rules: Setting::NotSet,
stop_words: Setting::NotSet,
non_separator_tokens: Setting::NotSet,
separator_tokens: Setting::NotSet,
dictionary: Setting::NotSet,
synonyms: Setting::NotSet,
distinct_attribute: Setting::NotSet,
proximity_precision: Setting::NotSet,
typo_tolerance: Setting::NotSet,
faceting: Setting::NotSet,
faceting: Setting::Set(FacetingSettings {
max_values_per_facet: Setting::Set(111),
sort_facet_values_by: Setting::Set(
btreemap! { S("age") => FacetValuesSort::Count },
),
}),
pagination: Setting::NotSet,
embedders: Setting::NotSet,
search_cutoff_ms: Setting::NotSet,
localized_attributes: Setting::NotSet,
facet_search: Setting::NotSet,
prefix_search: Setting::NotSet,
chat: Setting::NotSet,
_kind: std::marker::PhantomData,
};
settings.check()
}
pub fn create_test_batches() -> Vec<Batch> {
vec![Batch {
uid: 0,
details: DetailsView {
received_documents: Some(12),
indexed_documents: Some(Some(10)),
..DetailsView::default()
},
progress: None,
stats: BatchStats {
total_nb_tasks: 1,
status: maplit::btreemap! { Status::Succeeded => 1 },
types: maplit::btreemap! { Kind::DocumentAdditionOrUpdate => 1 },
index_uids: maplit::btreemap! { "doggo".to_string() => 1 },
progress_trace: Default::default(),
write_channel_congestion: None,
internal_database_sizes: Default::default(),
},
embedder_stats: Default::default(),
enqueued_at: Some(BatchEnqueuedAt {
earliest: datetime!(2022-11-11 0:00 UTC),
oldest: datetime!(2022-11-11 0:00 UTC),
}),
started_at: datetime!(2022-11-20 0:00 UTC),
finished_at: Some(datetime!(2022-11-21 0:00 UTC)),
stop_reason: BatchStopReason::Unspecified.to_string(),
}]
}
pub fn create_test_tasks() -> Vec<(TaskDump, Option<Vec<Document>>)> {
vec![
(
TaskDump {
uid: 0,
batch_uid: Some(0),
index_uid: Some(S("doggo")),
status: Status::Succeeded,
kind: KindDump::DocumentImport {
@ -293,6 +389,7 @@ pub(crate) mod test {
(
TaskDump {
uid: 1,
batch_uid: None,
index_uid: Some(S("doggo")),
status: Status::Enqueued,
kind: KindDump::DocumentImport {
@ -319,6 +416,7 @@ pub(crate) mod test {
(
TaskDump {
uid: 5,
batch_uid: None,
index_uid: Some(S("catto")),
status: Status::Enqueued,
kind: KindDump::IndexDeletion,
@ -341,7 +439,7 @@ pub(crate) mod test {
name: Some(S("doggos_key")),
uid: Uuid::from_str("9f8a34da-b6b2-42f0-939b-dbd4c3448655").unwrap(),
actions: vec![Action::DocumentsAll],
indexes: vec![StarOr::Other(IndexUid::from_str("doggos").unwrap())],
indexes: vec![IndexUidPattern::from_str("doggos").unwrap()],
expires_at: Some(datetime!(4130-03-14 12:21 UTC)),
created_at: datetime!(1960-11-15 0:00 UTC),
updated_at: datetime!(2022-11-10 0:00 UTC),
@ -351,7 +449,7 @@ pub(crate) mod test {
name: Some(S("master_key")),
uid: Uuid::from_str("4622f717-1c00-47bb-a494-39d76a49b591").unwrap(),
actions: vec![Action::All],
indexes: vec![StarOr::Star],
indexes: vec![IndexUidPattern::all()],
expires_at: None,
created_at: datetime!(0000-01-01 00:01 UTC),
updated_at: datetime!(1964-05-04 17:25 UTC),
@ -384,6 +482,15 @@ pub(crate) mod test {
index.flush().unwrap();
index.settings(&settings).unwrap();
// ========== pushing the batch queue
let batches = create_test_batches();
let mut batch_queue = dump.create_batches_queue().unwrap();
for batch in &batches {
batch_queue.push_batch(batch).unwrap();
}
batch_queue.flush().unwrap();
// ========== pushing the task queue
let tasks = create_test_tasks();
@ -407,14 +514,34 @@ pub(crate) mod test {
}
keys.flush().unwrap();
// ========== experimental features
let features = create_test_features();
dump.create_experimental_features(features).unwrap();
// ========== network
let network = create_test_network();
dump.create_network(network).unwrap();
// create the dump
let mut file = tempfile::tempfile().unwrap();
dump.persist_to(&mut file).unwrap();
file.seek(SeekFrom::Start(0)).unwrap();
file.rewind().unwrap();
file
}
fn create_test_features() -> RuntimeTogglableFeatures {
RuntimeTogglableFeatures::default()
}
fn create_test_network() -> Network {
Network {
local: Some("myself".to_string()),
remotes: maplit::btreemap! {"other".to_string() => Remote { url: "http://test".to_string(), search_api_key: Some("apiKey".to_string()) }},
}
}
#[test]
fn test_creating_and_read_dump() {
let mut file = create_test_dump();
@ -459,5 +586,13 @@ pub(crate) mod test {
for (key, expected) in dump.keys().unwrap().zip(create_test_api_keys()) {
assert_eq!(key.unwrap(), expected);
}
// ==== checking the features
let expected = create_test_features();
assert_eq!(dump.features().unwrap().unwrap(), expected);
// ==== checking the network
let expected = create_test_network();
assert_eq!(&expected, dump.network().unwrap().unwrap());
}
}

View File

@ -10,6 +10,7 @@ expression: products.settings().unwrap()
"*"
],
"filterableAttributes": [],
"sortableAttributes": [],
"rankingRules": [
"typo",
"words",

View File

@ -13,13 +13,17 @@ expression: movies.settings().unwrap()
"genres",
"id"
],
"sortableAttributes": [
"genres",
"id"
],
"rankingRules": [
"typo",
"words",
"proximity",
"attribute",
"exactness",
"asc(release_date)"
"release_date:asc"
],
"stopWords": [],
"synonyms": {},

View File

@ -10,6 +10,7 @@ expression: spells.settings().unwrap()
"*"
],
"filterableAttributes": [],
"sortableAttributes": [],
"rankingRules": [
"typo",
"words",

View File

@ -1,4 +1,3 @@
use std::collections::BTreeSet;
use std::str::FromStr;
use super::v2_to_v3::CompatV2ToV3;
@ -102,14 +101,15 @@ impl CompatIndexV1ToV2 {
impl From<v1::settings::Settings> for v2::Settings<v2::Unchecked> {
fn from(source: v1::settings::Settings) -> Self {
let displayed_attributes = source
.displayed_attributes
.map(|opt| opt.map(|displayed_attributes| displayed_attributes.into_iter().collect()));
let attributes_for_faceting = source.attributes_for_faceting.map(|opt| {
opt.map(|attributes_for_faceting| attributes_for_faceting.into_iter().collect())
});
let ranking_rules = source.ranking_rules.map(|opt| {
opt.map(|ranking_rules| {
Self {
displayed_attributes: option_to_setting(source.displayed_attributes)
.map(|displayed| displayed.into_iter().collect()),
searchable_attributes: option_to_setting(source.searchable_attributes),
filterable_attributes: option_to_setting(source.attributes_for_faceting.clone())
.map(|filterable| filterable.into_iter().collect()),
sortable_attributes: option_to_setting(source.attributes_for_faceting)
.map(|sortable| sortable.into_iter().collect()),
ranking_rules: option_to_setting(source.ranking_rules).map(|ranking_rules| {
ranking_rules
.into_iter()
.filter_map(|ranking_rule| {
@ -119,37 +119,44 @@ impl From<v1::settings::Settings> for v2::Settings<v2::Unchecked> {
ranking_rule.into();
criterion.as_ref().map(ToString::to_string)
}
Err(()) => Some(ranking_rule),
Err(()) => {
tracing::warn!(
"Could not import the following ranking rule: `{}`.",
ranking_rule
);
None
}
}
})
.collect()
})
});
Self {
displayed_attributes,
searchable_attributes: source.searchable_attributes,
filterable_attributes: attributes_for_faceting,
ranking_rules,
stop_words: source.stop_words,
synonyms: source.synonyms,
distinct_attribute: source.distinct_attribute,
}),
stop_words: option_to_setting(source.stop_words),
synonyms: option_to_setting(source.synonyms),
distinct_attribute: option_to_setting(source.distinct_attribute),
_kind: std::marker::PhantomData,
}
}
}
fn option_to_setting<T>(opt: Option<Option<T>>) -> v2::Setting<T> {
match opt {
Some(Some(t)) => v2::Setting::Set(t),
None => v2::Setting::NotSet,
Some(None) => v2::Setting::Reset,
}
}
impl From<v1::update::UpdateStatus> for Option<v2::updates::UpdateStatus> {
fn from(source: v1::update::UpdateStatus) -> Self {
use v1::update::UpdateStatus as UpdateStatusV1;
use v2::updates::UpdateStatus as UpdateStatusV2;
Some(match source {
UpdateStatusV1::Enqueued { content } => {
log::warn!(
tracing::warn!(
"Cannot import task {} (importing enqueued tasks from v1 dumps is unsupported)",
content.update_id
);
log::warn!("Task will be skipped in the queue of imported tasks.");
tracing::warn!("Task will be skipped in the queue of imported tasks.");
return None;
}
@ -222,7 +229,7 @@ impl From<v1::update::UpdateType> for Option<v2::updates::UpdateMeta> {
Some(match source {
v1::update::UpdateType::ClearAll => v2::updates::UpdateMeta::ClearDocuments,
v1::update::UpdateType::Customs => {
log::warn!("Ignoring task with type 'Customs' that is no longer supported");
tracing::warn!("Ignoring task with type 'Customs' that is no longer supported");
return None;
}
v1::update::UpdateType::DocumentsAddition { .. } => {
@ -251,38 +258,27 @@ impl From<v1::update::UpdateType> for Option<v2::updates::UpdateMeta> {
impl From<v1::settings::SettingsUpdate> for v2::Settings<v2::Unchecked> {
fn from(source: v1::settings::SettingsUpdate) -> Self {
let displayed_attributes: Option<Option<BTreeSet<String>>> =
source.displayed_attributes.into();
let attributes_for_faceting: Option<Option<Vec<String>>> =
source.attributes_for_faceting.into();
let ranking_rules: Option<Option<Vec<v1::settings::RankingRule>>> =
source.ranking_rules.into();
let ranking_rules = v2::Setting::from(source.ranking_rules);
// go from the concrete types of v1 (RankingRule) to the concrete type of v2 (Criterion),
// and then back to string as this is what the settings manipulate
let ranking_rules = ranking_rules.map(|opt| {
opt.map(|ranking_rules| {
ranking_rules
.into_iter()
// filter out the WordsPosition ranking rule that exists in v1 but not v2
.filter_map(|ranking_rule| {
Option::<v2::settings::Criterion>::from(ranking_rule)
})
.map(|criterion| criterion.to_string())
.collect()
})
let ranking_rules = ranking_rules.map(|ranking_rules| {
ranking_rules
.into_iter()
// filter out the WordsPosition ranking rule that exists in v1 but not v2
.filter_map(Option::<v2::settings::Criterion>::from)
.map(|criterion| criterion.to_string())
.collect()
});
Self {
displayed_attributes: displayed_attributes.map(|opt| {
opt.map(|displayed_attributes| displayed_attributes.into_iter().collect())
}),
displayed_attributes: v2::Setting::from(source.displayed_attributes)
.map(|displayed_attributes| displayed_attributes.into_iter().collect()),
searchable_attributes: source.searchable_attributes.into(),
filterable_attributes: attributes_for_faceting.map(|opt| {
opt.map(|attributes_for_faceting| attributes_for_faceting.into_iter().collect())
}),
filterable_attributes: v2::Setting::from(source.attributes_for_faceting.clone())
.map(|attributes_for_faceting| attributes_for_faceting.into_iter().collect()),
sortable_attributes: v2::Setting::from(source.attributes_for_faceting)
.map(|attributes_for_faceting| attributes_for_faceting.into_iter().collect()),
ranking_rules,
stop_words: source.stop_words.into(),
synonyms: source.synonyms.into(),
@ -300,7 +296,7 @@ impl From<v1::settings::RankingRule> for Option<v2::settings::Criterion> {
v1::settings::RankingRule::Proximity => Some(v2::settings::Criterion::Proximity),
v1::settings::RankingRule::Attribute => Some(v2::settings::Criterion::Attribute),
v1::settings::RankingRule::WordsPosition => {
log::warn!("Removing the 'WordsPosition' ranking rule that is no longer supported, please check the resulting ranking rules of your indexes");
tracing::warn!("Removing the 'WordsPosition' ranking rule that is no longer supported, please check the resulting ranking rules of your indexes");
None
}
v1::settings::RankingRule::Exactness => Some(v2::settings::Criterion::Exactness),
@ -314,12 +310,12 @@ impl From<v1::settings::RankingRule> for Option<v2::settings::Criterion> {
}
}
impl<T> From<v1::settings::UpdateState<T>> for Option<Option<T>> {
impl<T> From<v1::settings::UpdateState<T>> for v2::Setting<T> {
fn from(source: v1::settings::UpdateState<T>) -> Self {
match source {
v1::settings::UpdateState::Update(new_value) => Some(Some(new_value)),
v1::settings::UpdateState::Clear => Some(None),
v1::settings::UpdateState::Nothing => None,
v1::settings::UpdateState::Update(new_value) => v2::Setting::Set(new_value),
v1::settings::UpdateState::Clear => v2::Setting::Reset,
v1::settings::UpdateState::Nothing => v2::Setting::NotSet,
}
}
}
@ -352,7 +348,7 @@ pub(crate) mod test {
// tasks
let tasks = dump.tasks().collect::<Result<Vec<_>>>().unwrap();
let (tasks, update_files): (Vec<_>, Vec<_>) = tasks.into_iter().unzip();
meili_snap::snapshot_hash!(meili_snap::json_string!(tasks), @"ad6245d98d1a8e30535f3339a9a8d223");
meili_snap::snapshot_hash!(meili_snap::json_string!(tasks), @"2298010973ee98cf4670787314176a3a");
assert_eq!(update_files.len(), 9);
assert!(update_files[..].iter().all(|u| u.is_none())); // no update file in dumps v1

View File

@ -1,4 +1,3 @@
use std::convert::TryInto;
use std::str::FromStr;
use time::OffsetDateTime;
@ -25,7 +24,6 @@ impl CompatV2ToV3 {
CompatV2ToV3::Compat(compat) => compat.index_uuid(),
};
v2_uuids
.into_iter()
.into_iter()
.map(|index| v3::meta::IndexUuid { uid: index.uid, uuid: index.uuid })
.collect()
@ -147,8 +145,8 @@ impl From<v2::updates::UpdateStatus> for v3::updates::UpdateStatus {
started_processing_at: processing.started_processing_at,
}),
Err(e) => {
log::warn!("Error with task {}: {}", processing.from.update_id, e);
log::warn!("Task will be marked as `Failed`.");
tracing::warn!("Error with task {}: {}", processing.from.update_id, e);
tracing::warn!("Task will be marked as `Failed`.");
v3::updates::UpdateStatus::Failed(v3::updates::Failed {
from: v3::updates::Processing {
from: v3::updates::Enqueued {
@ -173,8 +171,8 @@ impl From<v2::updates::UpdateStatus> for v3::updates::UpdateStatus {
enqueued_at: enqueued.enqueued_at,
}),
Err(e) => {
log::warn!("Error with task {}: {}", enqueued.update_id, e);
log::warn!("Task will be marked as `Failed`.");
tracing::warn!("Error with task {}: {}", enqueued.update_id, e);
tracing::warn!("Task will be marked as `Failed`.");
v3::updates::UpdateStatus::Failed(v3::updates::Failed {
from: v3::updates::Processing {
from: v3::updates::Enqueued {
@ -354,35 +352,36 @@ impl From<String> for v3::Code {
"malformed_payload" => v3::Code::MalformedPayload,
"missing_payload" => v3::Code::MissingPayload,
other => {
log::warn!("Unknown error code {}", other);
tracing::warn!("Unknown error code {}", other);
v3::Code::UnretrievableErrorCode
}
}
}
}
fn option_to_setting<T>(opt: Option<Option<T>>) -> v3::Setting<T> {
match opt {
Some(Some(t)) => v3::Setting::Set(t),
None => v3::Setting::NotSet,
Some(None) => v3::Setting::Reset,
impl<A> From<v2::Setting<A>> for v3::Setting<A> {
fn from(setting: v2::Setting<A>) -> Self {
match setting {
v2::settings::Setting::Set(a) => v3::settings::Setting::Set(a),
v2::settings::Setting::Reset => v3::settings::Setting::Reset,
v2::settings::Setting::NotSet => v3::settings::Setting::NotSet,
}
}
}
impl<T> From<v2::Settings<T>> for v3::Settings<v3::Unchecked> {
fn from(settings: v2::Settings<T>) -> Self {
v3::Settings {
displayed_attributes: option_to_setting(settings.displayed_attributes),
searchable_attributes: option_to_setting(settings.searchable_attributes),
filterable_attributes: option_to_setting(settings.filterable_attributes)
.map(|f| f.into_iter().collect()),
sortable_attributes: v3::Setting::NotSet,
ranking_rules: option_to_setting(settings.ranking_rules).map(|criteria| {
displayed_attributes: settings.displayed_attributes.into(),
searchable_attributes: settings.searchable_attributes.into(),
filterable_attributes: settings.filterable_attributes.into(),
sortable_attributes: settings.sortable_attributes.into(),
ranking_rules: v3::Setting::from(settings.ranking_rules).map(|criteria| {
criteria.into_iter().map(|criterion| patch_ranking_rules(&criterion)).collect()
}),
stop_words: option_to_setting(settings.stop_words),
synonyms: option_to_setting(settings.synonyms),
distinct_attribute: option_to_setting(settings.distinct_attribute),
stop_words: settings.stop_words.into(),
synonyms: settings.synonyms.into(),
distinct_attribute: settings.distinct_attribute.into(),
_kind: std::marker::PhantomData,
}
}
@ -394,6 +393,7 @@ fn patch_ranking_rules(ranking_rule: &str) -> String {
Ok(v2::settings::Criterion::Typo) => String::from("typo"),
Ok(v2::settings::Criterion::Proximity) => String::from("proximity"),
Ok(v2::settings::Criterion::Attribute) => String::from("attribute"),
Ok(v2::settings::Criterion::Sort) => String::from("sort"),
Ok(v2::settings::Criterion::Exactness) => String::from("exactness"),
Ok(v2::settings::Criterion::Asc(name)) => format!("{name}:asc"),
Ok(v2::settings::Criterion::Desc(name)) => format!("{name}:desc"),
@ -425,7 +425,7 @@ pub(crate) mod test {
let mut dump = v2::V2Reader::open(dir).unwrap().to_v3();
// top level infos
insta::assert_display_snapshot!(dump.date().unwrap(), @"2022-10-09 20:27:59.904096267 +00:00:00");
insta::assert_snapshot!(dump.date().unwrap(), @"2022-10-09 20:27:59.904096267 +00:00:00");
// tasks
let tasks = dump.tasks().collect::<Result<Vec<_>>>().unwrap();

View File

@ -76,20 +76,20 @@ impl CompatV3ToV4 {
let index_uid = match index_uid {
Some(uid) => uid,
None => {
log::warn!(
tracing::warn!(
"Error while importing the update {}.",
task.update.id()
);
log::warn!(
tracing::warn!(
"The index associated to the uuid `{}` could not be retrieved.",
task.uuid.to_string()
);
if task.update.is_finished() {
// we're fucking with his history but not his data, that's ok-ish.
log::warn!("The index-uuid will be set as `unknown`.");
tracing::warn!("The index-uuid will be set as `unknown`.");
String::from("unknown")
} else {
log::warn!("The task will be ignored.");
tracing::warn!("The task will be ignored.");
return None;
}
}
@ -358,7 +358,7 @@ pub(crate) mod test {
let mut dump = v3::V3Reader::open(dir).unwrap().to_v4();
// top level infos
insta::assert_display_snapshot!(dump.date().unwrap(), @"2022-10-07 11:39:03.709153554 +00:00:00");
insta::assert_snapshot!(dump.date().unwrap(), @"2022-10-07 11:39:03.709153554 +00:00:00");
// tasks
let tasks = dump.tasks().collect::<Result<Vec<_>>>().unwrap();

View File

@ -305,7 +305,7 @@ impl From<v4::ResponseError> for v5::ResponseError {
"invalid_api_key_expires_at" => v5::Code::InvalidApiKeyExpiresAt,
"invalid_api_key_description" => v5::Code::InvalidApiKeyDescription,
other => {
log::warn!("Unknown error code {}", other);
tracing::warn!("Unknown error code {}", other);
v5::Code::UnretrievableErrorCode
}
};
@ -394,8 +394,8 @@ pub(crate) mod test {
let mut dump = v4::V4Reader::open(dir).unwrap().to_v5();
// top level infos
insta::assert_display_snapshot!(dump.date().unwrap(), @"2022-10-06 12:53:49.131989609 +00:00:00");
insta::assert_display_snapshot!(dump.instance_uid().unwrap().unwrap(), @"9e15e977-f2ae-4761-943f-1eaf75fd736d");
insta::assert_snapshot!(dump.date().unwrap(), @"2022-10-06 12:53:49.131989609 +00:00:00");
insta::assert_snapshot!(dump.instance_uid().unwrap().unwrap(), @"9e15e977-f2ae-4761-943f-1eaf75fd736d");
// tasks
let tasks = dump.tasks().collect::<Result<Vec<_>>>().unwrap();

View File

@ -1,3 +1,6 @@
use std::num::NonZeroUsize;
use std::str::FromStr;
use super::v4_to_v5::{CompatIndexV4ToV5, CompatV4ToV5};
use crate::reader::{v5, v6, Document, UpdateFile};
use crate::Result;
@ -68,6 +71,7 @@ impl CompatV5ToV6 {
let task = v6::Task {
uid: task_view.uid,
batch_uid: None,
index_uid: task_view.index_uid,
status: match task_view.status {
v5::Status::Enqueued => v6::Status::Enqueued,
@ -179,10 +183,8 @@ impl CompatV5ToV6 {
.indexes
.into_iter()
.map(|index| match index {
v5::StarOr::Star => v6::StarOr::Star,
v5::StarOr::Other(uid) => {
v6::StarOr::Other(v6::IndexUid::new_unchecked(uid.as_str()))
}
v5::StarOr::Star => v6::IndexUidPattern::all(),
v5::StarOr::Other(uid) => v6::IndexUidPattern::new_unchecked(uid.as_str()),
})
.collect(),
expires_at: key.expires_at,
@ -191,6 +193,14 @@ impl CompatV5ToV6 {
})
})))
}
pub fn features(&self) -> Result<Option<v6::RuntimeTogglableFeatures>> {
Ok(None)
}
pub fn network(&self) -> Result<Option<&v6::Network>> {
Ok(None)
}
}
pub enum CompatIndexV5ToV6 {
@ -254,54 +264,53 @@ impl<T> From<v5::Setting<T>> for v6::Setting<T> {
impl From<v5::ResponseError> for v6::ResponseError {
fn from(error: v5::ResponseError) -> Self {
let code = match error.error_code.as_ref() {
"index_creation_failed" => v6::Code::CreateIndex,
"index_creation_failed" => v6::Code::IndexCreationFailed,
"index_already_exists" => v6::Code::IndexAlreadyExists,
"index_not_found" => v6::Code::IndexNotFound,
"invalid_index_uid" => v6::Code::InvalidIndexUid,
"invalid_min_word_length_for_typo" => v6::Code::InvalidMinWordLengthForTypo,
"invalid_min_word_length_for_typo" => v6::Code::InvalidSettingsTypoTolerance,
"invalid_state" => v6::Code::InvalidState,
"primary_key_inference_failed" => v6::Code::MissingPrimaryKey,
"index_primary_key_already_exists" => v6::Code::PrimaryKeyAlreadyPresent,
"primary_key_inference_failed" => v6::Code::IndexPrimaryKeyNoCandidateFound,
"index_primary_key_already_exists" => v6::Code::IndexPrimaryKeyAlreadyExists,
"max_fields_limit_exceeded" => v6::Code::MaxFieldsLimitExceeded,
"missing_document_id" => v6::Code::MissingDocumentId,
"invalid_document_id" => v6::Code::InvalidDocumentId,
"invalid_filter" => v6::Code::Filter,
"invalid_sort" => v6::Code::Sort,
"invalid_filter" => v6::Code::InvalidSettingsFilterableAttributes,
"invalid_sort" => v6::Code::InvalidSettingsSortableAttributes,
"bad_parameter" => v6::Code::BadParameter,
"bad_request" => v6::Code::BadRequest,
"database_size_limit_reached" => v6::Code::DatabaseSizeLimitReached,
"document_not_found" => v6::Code::DocumentNotFound,
"internal" => v6::Code::Internal,
"invalid_geo_field" => v6::Code::InvalidGeoField,
"invalid_ranking_rule" => v6::Code::InvalidRankingRule,
"invalid_store_file" => v6::Code::InvalidStore,
"invalid_api_key" => v6::Code::InvalidToken,
"invalid_geo_field" => v6::Code::InvalidDocumentGeoField,
"invalid_ranking_rule" => v6::Code::InvalidSettingsRankingRules,
"invalid_store_file" => v6::Code::InvalidStoreFile,
"invalid_api_key" => v6::Code::InvalidApiKey,
"missing_authorization_header" => v6::Code::MissingAuthorizationHeader,
"no_space_left_on_device" => v6::Code::NoSpaceLeftOnDevice,
"dump_not_found" => v6::Code::DumpNotFound,
"task_not_found" => v6::Code::TaskNotFound,
"payload_too_large" => v6::Code::PayloadTooLarge,
"unretrievable_document" => v6::Code::RetrieveDocument,
"search_error" => v6::Code::SearchDocuments,
"unretrievable_document" => v6::Code::UnretrievableDocument,
"unsupported_media_type" => v6::Code::UnsupportedMediaType,
"dump_already_processing" => v6::Code::DumpAlreadyInProgress,
"dump_already_processing" => v6::Code::DumpAlreadyProcessing,
"dump_process_failed" => v6::Code::DumpProcessFailed,
"invalid_content_type" => v6::Code::InvalidContentType,
"missing_content_type" => v6::Code::MissingContentType,
"malformed_payload" => v6::Code::MalformedPayload,
"missing_payload" => v6::Code::MissingPayload,
"api_key_not_found" => v6::Code::ApiKeyNotFound,
"missing_parameter" => v6::Code::MissingParameter,
"missing_parameter" => v6::Code::BadRequest,
"invalid_api_key_actions" => v6::Code::InvalidApiKeyActions,
"invalid_api_key_indexes" => v6::Code::InvalidApiKeyIndexes,
"invalid_api_key_expires_at" => v6::Code::InvalidApiKeyExpiresAt,
"invalid_api_key_description" => v6::Code::InvalidApiKeyDescription,
"invalid_api_key_name" => v6::Code::InvalidApiKeyName,
"invalid_api_key_uid" => v6::Code::InvalidApiKeyUid,
"immutable_field" => v6::Code::ImmutableField,
"immutable_field" => v6::Code::BadRequest,
"api_key_already_exists" => v6::Code::ApiKeyAlreadyExists,
other => {
log::warn!("Unknown error code {}", other);
tracing::warn!("Unknown error code {}", other);
v6::Code::UnretrievableErrorCode
}
};
@ -312,14 +321,46 @@ impl From<v5::ResponseError> for v6::ResponseError {
impl<T> From<v5::Settings<T>> for v6::Settings<v6::Unchecked> {
fn from(settings: v5::Settings<T>) -> Self {
v6::Settings {
displayed_attributes: settings.displayed_attributes.into(),
searchable_attributes: settings.searchable_attributes.into(),
filterable_attributes: settings.filterable_attributes.into(),
displayed_attributes: v6::Setting::from(settings.displayed_attributes).into(),
searchable_attributes: v6::Setting::from(settings.searchable_attributes).into(),
filterable_attributes: match settings.filterable_attributes {
v5::settings::Setting::Set(filterable_attributes) => v6::Setting::Set(
filterable_attributes
.into_iter()
.map(v6::FilterableAttributesRule::Field)
.collect(),
),
v5::settings::Setting::Reset => v6::Setting::Reset,
v5::settings::Setting::NotSet => v6::Setting::NotSet,
},
sortable_attributes: settings.sortable_attributes.into(),
ranking_rules: settings.ranking_rules.into(),
ranking_rules: {
match settings.ranking_rules {
v5::settings::Setting::Set(ranking_rules) => {
let mut new_ranking_rules = vec![];
for rule in ranking_rules {
match v6::RankingRuleView::from_str(&rule) {
Ok(new_rule) => {
new_ranking_rules.push(new_rule);
}
Err(_) => {
tracing::warn!("Error while importing settings. The ranking rule `{rule}` does not exist anymore.")
}
}
}
v6::Setting::Set(new_ranking_rules)
}
v5::settings::Setting::Reset => v6::Setting::Reset,
v5::settings::Setting::NotSet => v6::Setting::NotSet,
}
},
stop_words: settings.stop_words.into(),
non_separator_tokens: v6::Setting::NotSet,
separator_tokens: v6::Setting::NotSet,
dictionary: v6::Setting::NotSet,
synonyms: settings.synonyms.into(),
distinct_attribute: settings.distinct_attribute.into(),
proximity_precision: v6::Setting::NotSet,
typo_tolerance: match settings.typo_tolerance {
v5::Setting::Set(typo) => v6::Setting::Set(v6::TypoTolerance {
enabled: typo.enabled.into(),
@ -333,6 +374,7 @@ impl<T> From<v5::Settings<T>> for v6::Settings<v6::Unchecked> {
},
disable_on_words: typo.disable_on_words.into(),
disable_on_attributes: typo.disable_on_attributes.into(),
disable_on_numbers: v6::Setting::NotSet,
}),
v5::Setting::Reset => v6::Setting::Reset,
v5::Setting::NotSet => v6::Setting::NotSet,
@ -340,17 +382,30 @@ impl<T> From<v5::Settings<T>> for v6::Settings<v6::Unchecked> {
faceting: match settings.faceting {
v5::Setting::Set(faceting) => v6::Setting::Set(v6::FacetingSettings {
max_values_per_facet: faceting.max_values_per_facet.into(),
sort_facet_values_by: v6::Setting::NotSet,
}),
v5::Setting::Reset => v6::Setting::Reset,
v5::Setting::NotSet => v6::Setting::NotSet,
},
pagination: match settings.pagination {
v5::Setting::Set(pagination) => v6::Setting::Set(v6::PaginationSettings {
max_total_hits: pagination.max_total_hits.into(),
max_total_hits: match pagination.max_total_hits {
v5::Setting::Set(max_total_hits) => v6::Setting::Set(
max_total_hits.try_into().unwrap_or(NonZeroUsize::new(1).unwrap()),
),
v5::Setting::Reset => v6::Setting::Reset,
v5::Setting::NotSet => v6::Setting::NotSet,
},
}),
v5::Setting::Reset => v6::Setting::Reset,
v5::Setting::NotSet => v6::Setting::NotSet,
},
embedders: v6::Setting::NotSet,
localized_attributes: v6::Setting::NotSet,
search_cutoff_ms: v6::Setting::NotSet,
facet_search: v6::Setting::NotSet,
prefix_search: v6::Setting::NotSet,
chat: v6::Setting::NotSet,
_kind: std::marker::PhantomData,
}
}
@ -413,13 +468,13 @@ pub(crate) mod test {
let mut dump = v5::V5Reader::open(dir).unwrap().to_v6();
// top level infos
insta::assert_display_snapshot!(dump.date().unwrap(), @"2022-10-04 15:55:10.344982459 +00:00:00");
insta::assert_display_snapshot!(dump.instance_uid().unwrap().unwrap(), @"9e15e977-f2ae-4761-943f-1eaf75fd736d");
insta::assert_snapshot!(dump.date().unwrap(), @"2022-10-04 15:55:10.344982459 +00:00:00");
insta::assert_snapshot!(dump.instance_uid().unwrap().unwrap(), @"9e15e977-f2ae-4761-943f-1eaf75fd736d");
// tasks
let tasks = dump.tasks().unwrap().collect::<Result<Vec<_>>>().unwrap();
let (tasks, update_files): (Vec<_>, Vec<_>) = tasks.into_iter().unzip();
meili_snap::snapshot_hash!(meili_snap::json_string!(tasks), @"6519f7064c45d2196dd59b71350a9bf5");
meili_snap::snapshot_hash!(meili_snap::json_string!(tasks), @"4b03e23e740b27bfb9d2a1faffe512e2");
assert_eq!(update_files.len(), 22);
assert!(update_files[0].is_none()); // the dump creation
assert!(update_files[1].is_some()); // the enqueued document addition

View File

@ -13,16 +13,17 @@ use crate::{Result, Version};
mod compat;
pub(self) mod v1;
pub(self) mod v2;
pub(self) mod v3;
pub(self) mod v4;
pub(self) mod v5;
pub(self) mod v6;
mod v1;
mod v2;
mod v3;
mod v4;
mod v5;
mod v6;
pub type Document = serde_json::Map<String, serde_json::Value>;
pub type UpdateFile = dyn Iterator<Item = Result<Document>>;
#[allow(clippy::large_enum_variant)]
pub enum DumpReader {
Current(V6Reader),
Compat(CompatV5ToV6),
@ -101,12 +102,42 @@ impl DumpReader {
}
}
pub fn batches(&mut self) -> Result<Box<dyn Iterator<Item = Result<v6::Batch>> + '_>> {
match self {
DumpReader::Current(current) => Ok(current.batches()),
DumpReader::Compat(_compat) => Ok(Box::new(std::iter::empty())),
}
}
pub fn keys(&mut self) -> Result<Box<dyn Iterator<Item = Result<v6::Key>> + '_>> {
match self {
DumpReader::Current(current) => Ok(current.keys()),
DumpReader::Compat(compat) => compat.keys(),
}
}
pub fn chat_completions_settings(
&mut self,
) -> Result<Box<dyn Iterator<Item = Result<(String, v6::ChatCompletionSettings)>> + '_>> {
match self {
DumpReader::Current(current) => current.chat_completions_settings(),
DumpReader::Compat(_compat) => Ok(Box::new(std::iter::empty())),
}
}
pub fn features(&self) -> Result<Option<v6::RuntimeTogglableFeatures>> {
match self {
DumpReader::Current(current) => Ok(current.features()),
DumpReader::Compat(compat) => compat.features(),
}
}
pub fn network(&self) -> Result<Option<&v6::Network>> {
match self {
DumpReader::Current(current) => Ok(current.network()),
DumpReader::Compat(compat) => compat.network(),
}
}
}
impl From<V6Reader> for DumpReader {
@ -188,6 +219,213 @@ pub(crate) mod test {
use meili_snap::insta;
use super::*;
use crate::reader::v6::RuntimeTogglableFeatures;
#[test]
fn import_dump_v6_with_vectors() {
// dump containing two indexes
//
// "vector", configured with an embedder
// contains:
// - one document with an overriden vector,
// - one document with a natural vector
// - one document with a _vectors map containing one additional embedder name and a natural vector
// - one document with a _vectors map containing one additional embedder name and an overriden vector
//
// "novector", no embedder
// contains:
// - a document without vector
// - a document with a random _vectors field
let dump = File::open("tests/assets/v6-with-vectors.dump").unwrap();
let mut dump = DumpReader::open(dump).unwrap();
// top level infos
insta::assert_snapshot!(dump.date().unwrap(), @"2024-05-16 15:51:34.151044 +00:00:00");
insta::assert_debug_snapshot!(dump.instance_uid().unwrap(), @"None");
// batches didn't exists at the time
let batches = dump.batches().unwrap().collect::<Result<Vec<_>>>().unwrap();
meili_snap::snapshot!(meili_snap::json_string!(batches), @"[]");
// tasks
let tasks = dump.tasks().unwrap().collect::<Result<Vec<_>>>().unwrap();
let (tasks, update_files): (Vec<_>, Vec<_>) = tasks.into_iter().unzip();
meili_snap::snapshot_hash!(meili_snap::json_string!(tasks), @"2b8a72d6bc6ba79980491966437daaf9");
assert_eq!(update_files.len(), 10);
assert!(update_files[0].is_none()); // the dump creation
assert!(update_files[1].is_none());
assert!(update_files[2].is_none());
assert!(update_files[3].is_none());
assert!(update_files[4].is_none());
assert!(update_files[5].is_none());
assert!(update_files[6].is_none());
assert!(update_files[7].is_none());
assert!(update_files[8].is_none());
assert!(update_files[9].is_none());
// indexes
let mut indexes = dump.indexes().unwrap().collect::<Result<Vec<_>>>().unwrap();
// the index are not ordered in any way by default
indexes.sort_by_key(|index| index.metadata().uid.to_string());
let mut vector_index = indexes.pop().unwrap();
let mut novector_index = indexes.pop().unwrap();
assert!(indexes.is_empty());
// vector
insta::assert_json_snapshot!(vector_index.metadata(), @r###"
{
"uid": "vector",
"primaryKey": "id",
"createdAt": "2024-05-16T15:33:17.240962Z",
"updatedAt": "2024-05-16T15:40:55.723052Z"
}
"###);
insta::assert_json_snapshot!(vector_index.settings().unwrap());
{
let documents: Result<Vec<_>> = vector_index.documents().unwrap().collect();
let mut documents = documents.unwrap();
assert_eq!(documents.len(), 4);
documents.sort_by_key(|doc| doc.get("id").unwrap().to_string());
{
let document = documents.pop().unwrap();
insta::assert_json_snapshot!(document);
}
{
let document = documents.pop().unwrap();
insta::assert_json_snapshot!(document);
}
{
let document = documents.pop().unwrap();
insta::assert_json_snapshot!(document);
}
{
let document = documents.pop().unwrap();
insta::assert_json_snapshot!(document);
}
}
// novector
insta::assert_json_snapshot!(novector_index.metadata(), @r###"
{
"uid": "novector",
"primaryKey": "id",
"createdAt": "2024-05-16T15:33:03.568055Z",
"updatedAt": "2024-05-16T15:33:07.530217Z"
}
"###);
insta::assert_json_snapshot!(novector_index.settings().unwrap().embedders, @"null");
{
let documents: Result<Vec<_>> = novector_index.documents().unwrap().collect();
let mut documents = documents.unwrap();
assert_eq!(documents.len(), 2);
documents.sort_by_key(|doc| doc.get("id").unwrap().to_string());
{
let document = documents.pop().unwrap();
insta::assert_json_snapshot!(document, @r###"
{
"id": "e1",
"other": "random1",
"_vectors": "toto"
}
"###);
}
{
let document = documents.pop().unwrap();
insta::assert_json_snapshot!(document, @r###"
{
"id": "e0",
"other": "random0"
}
"###);
}
}
assert_eq!(dump.features().unwrap().unwrap(), RuntimeTogglableFeatures::default());
assert_eq!(dump.network().unwrap(), None);
}
#[test]
fn import_dump_v6_experimental() {
let dump = File::open("tests/assets/v6-with-experimental.dump").unwrap();
let mut dump = DumpReader::open(dump).unwrap();
// top level infos
insta::assert_snapshot!(dump.date().unwrap(), @"2023-07-06 7:10:27.21958 +00:00:00");
insta::assert_debug_snapshot!(dump.instance_uid().unwrap(), @"None");
// batches didn't exists at the time
let batches = dump.batches().unwrap().collect::<Result<Vec<_>>>().unwrap();
meili_snap::snapshot!(meili_snap::json_string!(batches), @"[]");
// tasks
let tasks = dump.tasks().unwrap().collect::<Result<Vec<_>>>().unwrap();
let (tasks, update_files): (Vec<_>, Vec<_>) = tasks.into_iter().unzip();
meili_snap::snapshot_hash!(meili_snap::json_string!(tasks), @"3ddf6169b0a3703c5d770971f036fc5d");
assert_eq!(update_files.len(), 2);
assert!(update_files[0].is_none()); // the dump creation
assert!(update_files[1].is_none()); // the processed document addition
// keys
let keys = dump.keys().unwrap().collect::<Result<Vec<_>>>().unwrap();
meili_snap::snapshot_hash!(meili_snap::json_string!(keys), @"13c2da155e9729c2344688cab29af71d");
// indexes
let mut indexes = dump.indexes().unwrap().collect::<Result<Vec<_>>>().unwrap();
// the index are not ordered in any way by default
indexes.sort_by_key(|index| index.metadata().uid.to_string());
let mut test = indexes.pop().unwrap();
assert!(indexes.is_empty());
insta::assert_json_snapshot!(test.metadata(), @r###"
{
"uid": "test",
"primaryKey": "id",
"createdAt": "2023-07-06T07:07:41.364694Z",
"updatedAt": "2023-07-06T07:07:41.396114Z"
}
"###);
assert_eq!(test.documents().unwrap().count(), 1);
assert_eq!(dump.features().unwrap().unwrap(), RuntimeTogglableFeatures::default());
}
#[test]
fn import_dump_v6_network() {
let dump = File::open("tests/assets/v6-with-network.dump").unwrap();
let dump = DumpReader::open(dump).unwrap();
// top level infos
insta::assert_snapshot!(dump.date().unwrap(), @"2025-01-29 15:45:32.738676 +00:00:00");
insta::assert_debug_snapshot!(dump.instance_uid().unwrap(), @"None");
// network
let network = dump.network().unwrap().unwrap();
insta::assert_snapshot!(network.local.as_ref().unwrap(), @"ms-0");
insta::assert_snapshot!(network.remotes.get("ms-0").as_ref().unwrap().url, @"http://localhost:7700");
insta::assert_snapshot!(network.remotes.get("ms-0").as_ref().unwrap().search_api_key.is_none(), @"true");
insta::assert_snapshot!(network.remotes.get("ms-1").as_ref().unwrap().url, @"http://localhost:7701");
insta::assert_snapshot!(network.remotes.get("ms-1").as_ref().unwrap().search_api_key.is_none(), @"true");
insta::assert_snapshot!(network.remotes.get("ms-2").as_ref().unwrap().url, @"http://ms-5679.example.meilisearch.io");
insta::assert_snapshot!(network.remotes.get("ms-2").as_ref().unwrap().search_api_key.as_ref().unwrap(), @"foo");
}
#[test]
fn import_dump_v5() {
@ -195,13 +433,17 @@ pub(crate) mod test {
let mut dump = DumpReader::open(dump).unwrap();
// top level infos
insta::assert_display_snapshot!(dump.date().unwrap(), @"2022-10-04 15:55:10.344982459 +00:00:00");
insta::assert_display_snapshot!(dump.instance_uid().unwrap().unwrap(), @"9e15e977-f2ae-4761-943f-1eaf75fd736d");
insta::assert_snapshot!(dump.date().unwrap(), @"2022-10-04 15:55:10.344982459 +00:00:00");
insta::assert_snapshot!(dump.instance_uid().unwrap().unwrap(), @"9e15e977-f2ae-4761-943f-1eaf75fd736d");
// batches didn't exists at the time
let batches = dump.batches().unwrap().collect::<Result<Vec<_>>>().unwrap();
meili_snap::snapshot!(meili_snap::json_string!(batches), @"[]");
// tasks
let tasks = dump.tasks().unwrap().collect::<Result<Vec<_>>>().unwrap();
let (tasks, update_files): (Vec<_>, Vec<_>) = tasks.into_iter().unzip();
meili_snap::snapshot_hash!(meili_snap::json_string!(tasks), @"6519f7064c45d2196dd59b71350a9bf5");
meili_snap::snapshot_hash!(meili_snap::json_string!(tasks), @"4b03e23e740b27bfb9d2a1faffe512e2");
assert_eq!(update_files.len(), 22);
assert!(update_files[0].is_none()); // the dump creation
assert!(update_files[1].is_some()); // the enqueued document addition
@ -222,12 +464,12 @@ pub(crate) mod test {
assert!(indexes.is_empty());
// products
insta::assert_json_snapshot!(products.metadata(), { ".createdAt" => "[now]", ".updatedAt" => "[now]" }, @r###"
insta::assert_json_snapshot!(products.metadata(), @r###"
{
"uid": "products",
"primaryKey": "sku",
"createdAt": "[now]",
"updatedAt": "[now]"
"createdAt": "2022-10-04T15:51:35.939396731Z",
"updatedAt": "2022-10-04T15:55:01.897325373Z"
}
"###);
@ -237,12 +479,12 @@ pub(crate) mod test {
meili_snap::snapshot_hash!(format!("{:#?}", documents), @"b01c8371aea4c7171af0d4d846a2bdca");
// movies
insta::assert_json_snapshot!(movies.metadata(), { ".createdAt" => "[now]", ".updatedAt" => "[now]" }, @r###"
insta::assert_json_snapshot!(movies.metadata(), @r###"
{
"uid": "movies",
"primaryKey": "id",
"createdAt": "[now]",
"updatedAt": "[now]"
"createdAt": "2022-10-04T15:51:35.291992167Z",
"updatedAt": "2022-10-04T15:55:10.33561842Z"
}
"###);
@ -252,12 +494,12 @@ pub(crate) mod test {
meili_snap::snapshot_hash!(format!("{:#?}", documents), @"e962baafd2fbae4cdd14e876053b0c5a");
// spells
insta::assert_json_snapshot!(spells.metadata(), { ".createdAt" => "[now]", ".updatedAt" => "[now]" }, @r###"
insta::assert_json_snapshot!(spells.metadata(), @r###"
{
"uid": "dnd_spells",
"primaryKey": "index",
"createdAt": "[now]",
"updatedAt": "[now]"
"createdAt": "2022-10-04T15:51:37.381094632Z",
"updatedAt": "2022-10-04T15:55:02.394503431Z"
}
"###);
@ -265,6 +507,8 @@ pub(crate) mod test {
let documents = spells.documents().unwrap().collect::<Result<Vec<_>>>().unwrap();
assert_eq!(documents.len(), 10);
meili_snap::snapshot_hash!(format!("{:#?}", documents), @"235016433dd04262c7f2da01d1e808ce");
assert_eq!(dump.features().unwrap(), None);
}
#[test]
@ -273,13 +517,17 @@ pub(crate) mod test {
let mut dump = DumpReader::open(dump).unwrap();
// top level infos
insta::assert_display_snapshot!(dump.date().unwrap(), @"2022-10-06 12:53:49.131989609 +00:00:00");
insta::assert_display_snapshot!(dump.instance_uid().unwrap().unwrap(), @"9e15e977-f2ae-4761-943f-1eaf75fd736d");
insta::assert_snapshot!(dump.date().unwrap(), @"2022-10-06 12:53:49.131989609 +00:00:00");
insta::assert_snapshot!(dump.instance_uid().unwrap().unwrap(), @"9e15e977-f2ae-4761-943f-1eaf75fd736d");
// batches didn't exists at the time
let batches = dump.batches().unwrap().collect::<Result<Vec<_>>>().unwrap();
meili_snap::snapshot!(meili_snap::json_string!(batches), @"[]");
// tasks
let tasks = dump.tasks().unwrap().collect::<Result<Vec<_>>>().unwrap();
let (tasks, update_files): (Vec<_>, Vec<_>) = tasks.into_iter().unzip();
meili_snap::snapshot_hash!(meili_snap::json_string!(tasks), @"491e244a80a19fe2a900b809d310c24a");
meili_snap::snapshot_hash!(meili_snap::json_string!(tasks), @"c1b06a5ca60d5805483c16c5b3ff61ef");
assert_eq!(update_files.len(), 10);
assert!(update_files[0].is_some()); // the enqueued document addition
assert!(update_files[1..].iter().all(|u| u.is_none())); // everything already processed
@ -350,13 +598,17 @@ pub(crate) mod test {
let mut dump = DumpReader::open(dump).unwrap();
// top level infos
insta::assert_display_snapshot!(dump.date().unwrap(), @"2022-10-07 11:39:03.709153554 +00:00:00");
insta::assert_snapshot!(dump.date().unwrap(), @"2022-10-07 11:39:03.709153554 +00:00:00");
assert_eq!(dump.instance_uid().unwrap(), None);
// batches didn't exists at the time
let batches = dump.batches().unwrap().collect::<Result<Vec<_>>>().unwrap();
meili_snap::snapshot!(meili_snap::json_string!(batches), @"[]");
// tasks
let tasks = dump.tasks().unwrap().collect::<Result<Vec<_>>>().unwrap();
let (tasks, update_files): (Vec<_>, Vec<_>) = tasks.into_iter().unzip();
meili_snap::snapshot_hash!(meili_snap::json_string!(tasks), @"7cacce2e21702be696b866808c726946");
meili_snap::snapshot_hash!(meili_snap::json_string!(tasks), @"0e203b6095f7c68dbdf788321dcc8215");
assert_eq!(update_files.len(), 10);
assert!(update_files[0].is_some()); // the enqueued document addition
assert!(update_files[1..].iter().all(|u| u.is_none())); // everything already processed
@ -443,13 +695,17 @@ pub(crate) mod test {
let mut dump = DumpReader::open(dump).unwrap();
// top level infos
insta::assert_display_snapshot!(dump.date().unwrap(), @"2022-10-09 20:27:59.904096267 +00:00:00");
insta::assert_snapshot!(dump.date().unwrap(), @"2022-10-09 20:27:59.904096267 +00:00:00");
assert_eq!(dump.instance_uid().unwrap(), None);
// batches didn't exists at the time
let batches = dump.batches().unwrap().collect::<Result<Vec<_>>>().unwrap();
meili_snap::snapshot!(meili_snap::json_string!(batches), @"[]");
// tasks
let tasks = dump.tasks().unwrap().collect::<Result<Vec<_>>>().unwrap();
let (tasks, update_files): (Vec<_>, Vec<_>) = tasks.into_iter().unzip();
meili_snap::snapshot_hash!(meili_snap::json_string!(tasks), @"6cabec4e252b74c8f3a2c8517622e85f");
meili_snap::snapshot_hash!(meili_snap::json_string!(tasks), @"d216c7f90f538ffbb2a059531d7ac89a");
assert_eq!(update_files.len(), 9);
assert!(update_files[0].is_some()); // the enqueued document addition
assert!(update_files[1..].iter().all(|u| u.is_none())); // everything already processed
@ -470,12 +726,12 @@ pub(crate) mod test {
assert!(indexes.is_empty());
// products
insta::assert_json_snapshot!(products.metadata(), { ".createdAt" => "[now]", ".updatedAt" => "[now]" }, @r###"
insta::assert_json_snapshot!(products.metadata(), @r###"
{
"uid": "products",
"primaryKey": "sku",
"createdAt": "[now]",
"updatedAt": "[now]"
"createdAt": "2022-10-09T20:27:22.688964637Z",
"updatedAt": "2022-10-09T20:27:23.951017769Z"
}
"###);
@ -485,12 +741,12 @@ pub(crate) mod test {
meili_snap::snapshot_hash!(format!("{:#?}", documents), @"548284a84de510f71e88e6cdea495cf5");
// movies
insta::assert_json_snapshot!(movies.metadata(), { ".createdAt" => "[now]", ".updatedAt" => "[now]" }, @r###"
insta::assert_json_snapshot!(movies.metadata(), @r###"
{
"uid": "movies",
"primaryKey": "id",
"createdAt": "[now]",
"updatedAt": "[now]"
"createdAt": "2022-10-09T20:27:22.197788495Z",
"updatedAt": "2022-10-09T20:28:01.93111053Z"
}
"###);
@ -515,12 +771,92 @@ pub(crate) mod test {
meili_snap::snapshot_hash!(format!("{:#?}", documents), @"d751713988987e9331980363e24189ce");
// spells
insta::assert_json_snapshot!(spells.metadata(), { ".createdAt" => "[now]", ".updatedAt" => "[now]" }, @r###"
insta::assert_json_snapshot!(spells.metadata(), @r###"
{
"uid": "dnd_spells",
"primaryKey": "index",
"createdAt": "[now]",
"updatedAt": "[now]"
"createdAt": "2022-10-09T20:27:24.242683494Z",
"updatedAt": "2022-10-09T20:27:24.312809641Z"
}
"###);
insta::assert_json_snapshot!(spells.settings().unwrap());
let documents = spells.documents().unwrap().collect::<Result<Vec<_>>>().unwrap();
assert_eq!(documents.len(), 10);
meili_snap::snapshot_hash!(format!("{:#?}", documents), @"235016433dd04262c7f2da01d1e808ce");
}
#[test]
fn import_dump_v2_from_meilisearch_v0_22_0_issue_3435() {
let dump = File::open("tests/assets/v2-v0.22.0.dump").unwrap();
let mut dump = DumpReader::open(dump).unwrap();
// top level infos
insta::assert_snapshot!(dump.date().unwrap(), @"2023-01-30 16:26:09.247261 +00:00:00");
assert_eq!(dump.instance_uid().unwrap(), None);
// batches didn't exists at the time
let batches = dump.batches().unwrap().collect::<Result<Vec<_>>>().unwrap();
meili_snap::snapshot!(meili_snap::json_string!(batches), @"[]");
// tasks
let tasks = dump.tasks().unwrap().collect::<Result<Vec<_>>>().unwrap();
let (tasks, update_files): (Vec<_>, Vec<_>) = tasks.into_iter().unzip();
meili_snap::snapshot_hash!(meili_snap::json_string!(tasks), @"e27999f1112632222cb84f6cffff7c5f");
assert_eq!(update_files.len(), 8);
assert!(update_files[0..].iter().all(|u| u.is_none())); // everything already processed
// keys
let keys = dump.keys().unwrap().collect::<Result<Vec<_>>>().unwrap();
meili_snap::snapshot_hash!(meili_snap::json_string!(keys), @"d751713988987e9331980363e24189ce");
// indexes
let mut indexes = dump.indexes().unwrap().collect::<Result<Vec<_>>>().unwrap();
// the index are not ordered in any way by default
indexes.sort_by_key(|index| index.metadata().uid.to_string());
let mut products = indexes.pop().unwrap();
let mut movies = indexes.pop().unwrap();
let mut spells = indexes.pop().unwrap();
assert!(indexes.is_empty());
// products
insta::assert_json_snapshot!(products.metadata(), @r###"
{
"uid": "products",
"primaryKey": "sku",
"createdAt": "2023-01-30T16:25:56.595257Z",
"updatedAt": "2023-01-30T16:25:58.70348Z"
}
"###);
insta::assert_json_snapshot!(products.settings().unwrap());
let documents = products.documents().unwrap().collect::<Result<Vec<_>>>().unwrap();
assert_eq!(documents.len(), 10);
meili_snap::snapshot_hash!(format!("{:#?}", documents), @"548284a84de510f71e88e6cdea495cf5");
// movies
insta::assert_json_snapshot!(movies.metadata(), @r###"
{
"uid": "movies",
"primaryKey": "id",
"createdAt": "2023-01-30T16:25:56.192178Z",
"updatedAt": "2023-01-30T16:25:56.455714Z"
}
"###);
insta::assert_json_snapshot!(movies.settings().unwrap());
let documents = movies.documents().unwrap().collect::<Result<Vec<_>>>().unwrap();
assert_eq!(documents.len(), 10);
meili_snap::snapshot_hash!(format!("{:#?}", documents), @"0227598af846e574139ee0b80e03a720");
// spells
insta::assert_json_snapshot!(spells.metadata(), @r###"
{
"uid": "dnd_spells",
"primaryKey": "index",
"createdAt": "2023-01-30T16:25:58.876405Z",
"updatedAt": "2023-01-30T16:25:59.079906Z"
}
"###);
@ -539,10 +875,14 @@ pub(crate) mod test {
assert_eq!(dump.date(), None);
assert_eq!(dump.instance_uid().unwrap(), None);
// batches didn't exists at the time
let batches = dump.batches().unwrap().collect::<Result<Vec<_>>>().unwrap();
meili_snap::snapshot!(meili_snap::json_string!(batches), @"[]");
// tasks
let tasks = dump.tasks().unwrap().collect::<Result<Vec<_>>>().unwrap();
let (tasks, update_files): (Vec<_>, Vec<_>) = tasks.into_iter().unzip();
meili_snap::snapshot_hash!(meili_snap::json_string!(tasks), @"b3e3652bfc10a76670be157d2507d761");
meili_snap::snapshot_hash!(meili_snap::json_string!(tasks), @"0155a664b0cf62aae23db5138b6b03d7");
assert_eq!(update_files.len(), 9);
assert!(update_files[..].iter().all(|u| u.is_none())); // no update file in dump v1

View File

@ -10,6 +10,7 @@ expression: spells.settings().unwrap()
"*"
],
"filterableAttributes": [],
"sortableAttributes": [],
"rankingRules": [
"typo",
"words",

View File

@ -10,6 +10,7 @@ expression: products.settings().unwrap()
"*"
],
"filterableAttributes": [],
"sortableAttributes": [],
"rankingRules": [
"typo",
"words",

View File

@ -13,6 +13,10 @@ expression: movies.settings().unwrap()
"genres",
"id"
],
"sortableAttributes": [
"genres",
"id"
],
"rankingRules": [
"typo",
"words",

Some files were not shown because too many files have changed in this diff Show More