Compare commits

...

249 Commits

Author SHA1 Message Date
fc8182d7d3 Merge pull request #363 from meilisearch/bump-version
Bump meilisearch crates to v0.8.4
2019-12-03 17:30:31 +01:00
4f87465f18 Bump meilisearch crates to v0.8.4 2019-12-03 17:22:45 +01:00
5f1586ae85 Merge pull request #360 from meilisearch/fix-readme-broken-links
Fix README broken links
2019-12-02 19:10:40 +01:00
8d3161a2cf Reorder README parts 2019-12-02 18:29:53 +01:00
8bc8214279 Fix README broken links
Thanks to @baptistejamin!
2019-12-02 16:45:27 +01:00
3ea5aa18a2 Merge pull request #359 from bidoubiwa/fix_wording_in_readme
Fix bad wording in readme file
2019-12-02 14:06:49 +01:00
c4845b78a9 Fix bad wording in readme file 2019-12-02 11:15:39 +01:00
530e913e2f Merge pull request #356 from tpayet/fix-port-readme
Fix port in README & Dockerfile
2019-11-29 19:21:55 +01:00
5917f212ba Fix port in README & Dockerfile 2019-11-29 18:03:54 +01:00
d2b1690191 Merge pull request #355 from tpayet/master
Update binary default settings
2019-11-29 15:47:04 +01:00
710b7ea091 Update default listening port to 7700 2019-11-29 15:25:26 +01:00
089579d835 Update default database directory to working directory 2019-11-29 15:25:26 +01:00
7780293ddb Merge pull request #354 from meilisearch/camelcase-updates-result
Fix updates formattings and namings
2019-11-29 15:19:45 +01:00
773a51e7d0 Rename 'update_type' to 'type' on EnqueuedUpdateResult 2019-11-29 15:09:48 +01:00
7923752513 Serialize updates results to camelCase 2019-11-29 15:05:54 +01:00
9a48091b21 Merge pull request #353 from meilisearch/bump-version
Bump meilisearch crates to v0.8.3
2019-11-29 14:13:37 +01:00
30cb60f679 Bump meilisearch crates to v0.8.3 2019-11-29 14:06:17 +01:00
08687d8dab Merge pull request #351 from meilisearch/status-failed-updates-status
Add status failed on UpdateStatus
2019-11-28 18:53:31 +01:00
3a90233a3d Add status failed on UpdateStatus 2019-11-28 18:41:11 +01:00
32483cae2d Merge pull request #347 from curquiza/installation-script
Add script for binary installation
2019-11-28 18:34:58 +01:00
d7f28e0260 Add script for binary installation 2019-11-28 18:34:12 +01:00
9640c2aaa6 Merge pull request #349 from meilisearch/bump-version
Bump meilisearch crates to v0.8.2
2019-11-28 17:23:40 +01:00
9a2b4d08e1 Bump meilisearch crates to v0.8.2 2019-11-28 17:15:13 +01:00
e91615fe59 Merge pull request #348 from meilisearch/replace-isahc-by-ureq
Replace isahc by ureq
2019-11-28 17:14:32 +01:00
aed02b2e19 Remove many dependencies from the Dockerfile 2019-11-28 17:04:01 +01:00
83ad80d9db Replace isahc by ureq 2019-11-28 16:41:42 +01:00
abdb7793fb Merge pull request #345 from tpayet/readme_changes
Clarification of readme file
2019-11-28 16:35:44 +01:00
387eb3fde3 Clarification of readme file 2019-11-28 16:28:25 +01:00
e640bc90b4 Merge pull request #343 from meilisearch/explicit-index-clear
Change the update loop to be more explicit on index clear
2019-11-28 14:48:37 +01:00
3978378152 Merge pull request #344 from tpayet/patch-1
Update README license badge
2019-11-28 14:35:50 +01:00
61e3e4f0b9 Update README license badge 2019-11-28 14:28:30 +01:00
1def56ea11 Change the update loop to be more explicit on index clear 2019-11-27 13:43:28 +01:00
6d686ac14f Merge pull request #342 from meilisearch/update-lock
Update the lock file
2019-11-27 12:49:47 +01:00
641e0d15f5 Make sure the lock file is up to date 2019-11-27 12:06:14 +01:00
71b39426c0 Update the lock file 2019-11-27 12:01:22 +01:00
57584eaccc Merge pull request #341 from meilisearch/bump-version
Bump meilisearch crates to v0.8.1
2019-11-27 11:54:39 +01:00
f6fb31c531 Bump meilisearch crates to v0.8.1 2019-11-27 11:47:27 +01:00
0cea8ce5b5 Merge pull request #340 from meilisearch/separate-updates-kvstore
Separate the update and main databases
2019-11-27 11:39:14 +01:00
d08b76a323 Separate the update and main databases
We used the heed typed transaction to make it safe (https://github.com/Kerollmops/heed/pull/27).
2019-11-27 11:29:06 +01:00
86a87d6032 Merge pull request #339 from tpayet/action-docker-tag
Update action workflow for docker tagged image
2019-11-26 19:17:19 +01:00
e534929f80 Update action workflow for docker tagged image 2019-11-26 18:18:51 +01:00
fcc154da1c Merge pull request #336 from meilisearch/rename-to-meilisearch
Rename MeiliDB into MeiliSearch
2019-11-26 14:06:01 +01:00
00d1200704 Rename the meilisearch-http binary into meilisearch 2019-11-26 11:17:30 +01:00
7cc096e0a2 Rename MeiliDB into MeiliSearch 2019-11-26 11:12:30 +01:00
58eaf78dc4 Merge pull request #335 from tpayet/github-release-action
GitHub release action
2019-11-25 19:19:08 +01:00
3be2281483 Update workflows README 2019-11-25 18:14:21 +01:00
cc06d96993 Add gh actions to release binaries 2019-11-25 17:27:15 +01:00
93c7e700bc Merge pull request #333 from tpayet/update-dockerfile
Add meilihttp_addr env variable in docker build
2019-11-25 16:41:52 +01:00
97c6757fc7 Add meilihttp_addr env variable in docker build 2019-11-25 16:30:07 +01:00
276d3f8e22 Merge pull request #332 from meilisearch/jemalloc-only-on-linux
Make jemalloc only used on linux
2019-11-25 16:13:54 +01:00
4869a88ae2 Make jemalloc only used on linux 2019-11-25 15:35:13 +01:00
ae88bc31bc Merge pull request #331 from meilisearch/enable-jemalloc-linux-only
Enable jemalloc only on linux OSs
2019-11-25 14:59:56 +01:00
8aed1d96c5 Enable jemalloc only on linux OSs 2019-11-25 14:51:47 +01:00
c93949474c Merge pull request #330 from tpayet/fix-actions-badge-link
Update action badge link
2019-11-25 13:51:07 +01:00
8cf19f1c6b Update action badge link 2019-11-25 13:44:20 +01:00
a82ecb3cef Merge pull request #324 from tpayet/gh-actions
Replace Azure CI by Github Actions
2019-11-25 13:31:15 +01:00
04c2b37d82 Remove Azure CI
Add gh actions for cargo check using rust nightly

Add readme about actions workflows

Add basic Dockerfile

Add action workflow for docker publish

Change check action to test action

Update workflow readme without rust nightly

Rename test action file

Add gh actions to push latest docker image from master

Update github action for publish docker image

Add 2 steps dockerfile based on alpine

Update readme badges to match new CI
2019-11-25 13:20:54 +01:00
ab3e8d6537 Merge pull request #314 from meilisearch/fix-number-ord
Fix the ordering functions of the Number type
2019-11-22 15:14:05 +01:00
fd185a5e6b Add a test for the SorByAttr criterion 2019-11-22 15:04:23 +01:00
d9678f0040 Fix the ordering functions of the Number type 2019-11-22 14:44:02 +01:00
840217b111 Merge pull request #321 from meilisearch/fix-create-index
Fix index creation
2019-11-22 14:10:05 +01:00
9605a2cd88 Make possible to use a custom uid and simplify the usage 2019-11-22 14:01:00 +01:00
0f86ccc035 Index UID generation makes sure to not generate the same number 2019-11-22 14:01:00 +01:00
b3b73e2276 Merge pull request #323 from meilisearch/fix-index-deletion
Fix index deletion once again
2019-11-22 14:00:19 +01:00
f241c999ad Make the CI use rust stable 2019-11-22 13:47:29 +01:00
d4d2a2303a Fix a typo on timeout_ms used for multi index search 2019-11-22 13:47:29 +01:00
c8832409ad Fix the dead lock on index deletion once again 2019-11-22 13:47:29 +01:00
98f76aa952 Merge pull request #320 from meilisearch/send-amplitude-events
Add an Amplitude analysis loop tick
2019-11-22 10:52:29 +01:00
4236632af6 Add an amplitude analysis loop tick 2019-11-21 20:28:58 +01:00
e2c98244ec Merge pull request #313 from meilisearch/fix-dead-lock
Fix dead locks when deleting indexes
2019-11-21 12:42:40 +01:00
c1cf67c008 Join updates threads after dropping the indexes lock and avoid deadlocks 2019-11-21 12:01:46 +01:00
4abea919b2 Merge pull request #311 from meilisearch/add-index-name-and-id
Add index name and change some routes request body & response
2019-11-21 11:59:14 +01:00
d60aa722c0 Allow to update expireAt and revoked on token 2019-11-21 11:49:49 +01:00
055368acd8 Fix for review 2019-11-21 11:49:49 +01:00
7f2e5d091a Rename routes /synonym to /synonyms 2019-11-20 15:33:42 +01:00
c69ae8154f Allow to receive schema update formated as SchemaBuilder 2019-11-20 15:25:34 +01:00
cd95b243bb Add the update index route 2019-11-20 15:00:06 +01:00
1f1cb1f501 Rename browse_documents into get_all_documents and always respond HTTP Ok 2019-11-20 14:18:21 +01:00
530738cfe9 Format code 2019-11-20 14:12:12 +01:00
878dd6912e Return a HTTP 401 instead of 404 if token is not found 2019-11-20 14:06:56 +01:00
5f0f699f37 Move route to clear all synonyms on DELETE /synonyms 2019-11-20 14:03:55 +01:00
ca13900699 Add async routes should return ACCEPTED status code response 2019-11-20 14:03:19 +01:00
cc97889b37 Add stop-word is now PATCH method 2019-11-20 13:56:43 +01:00
45ded0498b Format code with cargo fmt 2019-11-20 11:45:23 +01:00
d01a3944c1 Add last_update information on global /stats route 2019-11-20 11:45:22 +01:00
a0caf0d6d7 Remove unused result response on indexes_uids function 2019-11-20 11:45:22 +01:00
e22debb994 Update index updated_at information at each update callback 2019-11-20 11:45:22 +01:00
1b8df0ed8b Remove last_update from stats 2019-11-20 11:45:22 +01:00
3286a5213c Move fields frequency from common store to index main store 2019-11-20 11:45:22 +01:00
394976d330 Update list_index route to return all index information, not only list of uid 2019-11-20 11:45:22 +01:00
b95acbece0 Function generate_uid return now lowercased uid 2019-11-20 11:45:22 +01:00
c94f4dff71 Do not return update_id on IndexCreateRespnse if it's none 2019-11-20 11:45:22 +01:00
e6465f4ea1 Create a new specific route for schema 2019-11-20 11:45:22 +01:00
2b3c91aabd Update get_index_schema to allow raw response 2019-11-20 11:45:22 +01:00
e97e13ce9f Rename index_name to index_uids 2019-11-20 11:45:22 +01:00
39e2b73718 Add updatedAt on main index store 2019-11-20 11:45:22 +01:00
a90facaa41 Rename index_name by index_uid 2019-11-20 11:45:22 +01:00
5527457655 Rewrite create_index route new path, body request and response 2019-11-20 11:45:21 +01:00
076e781810 Add name, created_at and updated_at informations into main index 2019-11-20 11:45:21 +01:00
750d336018 Bump Cargo.lock meili versions 2019-11-20 11:45:21 +01:00
e8251ad45b Merge pull request #310 from meilisearch/unify-crates-version
Unify the crates versions to 0.8.0
2019-11-20 11:05:54 +01:00
963ca1e2c7 Unify the crates versions to 0.8.0 2019-11-20 10:47:32 +01:00
12a6c7d54d Merge pull request #298 from bidoubiwa/add_ranked_movies_dataset
Create a dataset where the release_date is a numeric timestamp
2019-11-20 10:46:24 +01:00
2d0fc3f9d3 Create a dataset where the release_date is a numeric timestamp 2019-11-20 10:44:32 +01:00
e554784527 Merge pull request #309 from bidoubiwa/remove_stop_words_from_settings
Removed stop words from settings route
2019-11-19 18:35:27 +01:00
2cb43fa638 Removed stop words from settings route 2019-11-19 18:21:44 +01:00
66d5309a51 Merge pull request #308 from meilisearch/improve-structopt
Introduce better argument names
2019-11-19 18:09:44 +01:00
7eeedec7eb Bump meilidb-http to v0.3.0 2019-11-19 17:50:01 +01:00
4b798c71ae Introduce new arguments and understand env vars 2019-11-19 17:50:01 +01:00
685016bfec Bump meilidb-core to v0.7.0 and meilidb-http to v0.2.0 2019-11-18 15:49:23 +01:00
d30e5f6231 Merge pull request #299 from meilisearch/default-update-callbacks
Prefer using a global update callback common to all indexes
2019-11-18 15:05:21 +01:00
e854d67a55 Remove useless routes and checks 2019-11-18 14:41:49 +01:00
23a89732a5 Prefer using a global update callback common to all indexes 2019-11-18 14:41:49 +01:00
3a1f41ebdb Merge pull request #305 from meilisearch/fix-example
Make easier to interact with compacted databases
2019-11-17 20:31:06 +01:00
f873761a27 Make easier to interact with compacted databases 2019-11-17 20:01:02 +01:00
ebf620c7f9 Merge pull request #302 from meilisearch/fix-dataset-schema
Rename the movies dataset schema file
2019-11-17 17:17:33 +01:00
8b92bc3421 Rename the movies dataset schema file 2019-11-17 16:45:13 +01:00
70a5aa61e9 Merge pull request #301 from meilisearch/separate-types
Move the main types to a separate library
2019-11-17 12:45:25 +01:00
a76169042f Make the serde and zerocopy meilidb-types dependencies optional 2019-11-17 12:30:39 +01:00
c9c3cfcee9 Move the main types to a separate library 2019-11-17 12:19:36 +01:00
2e60ac5359 Merge pull request #300 from meilisearch/update-dependencies
Do not use a forked fst dependency
2019-11-17 12:19:08 +01:00
2dd7751e09 Disable the fst MemMap feature 2019-11-17 11:43:00 +01:00
26bdabcdec Do not use a forked fst dependency 2019-11-17 11:14:01 +01:00
fc8c7ed77e Merge pull request #297 from meilisearch/improve-highlights
Improve the highlight formatted outputs
2019-11-15 14:28:27 +01:00
521c96354f Improve the highlight formatted outputs 2019-11-15 14:16:21 +01:00
9788779894 Merge pull request #296 from meilisearch/update-readme
Update the README
2019-11-14 21:32:32 +01:00
9b965764ab Update the README 2019-11-14 19:09:04 +01:00
9a5a543311 Merge pull request #290 from curquiza/deploy-doc
Add information in documentation in Deploy Server part
2019-11-13 16:06:27 +01:00
b18fb868e8 Add information in documentation in Deploy Server part 2019-11-13 15:37:21 +01:00
c734af55c0 Merge pull request #289 from curquiza/status204-delete-index
Change the HTTP status code on index deletion
2019-11-13 15:33:27 +01:00
810b328ad2 Change the HTTP status code on index deletion 2019-11-13 15:14:23 +01:00
0a8039d8d8 Merge pull request #285 from bidoubiwa/remove_catching_same_index_creation
Change the error catching on the index creation route
2019-11-13 15:13:51 +01:00
e51704c09a Remove the error catching on the index creation route when the index already exist 2019-11-13 14:42:59 +01:00
623a9012d5 Merge pull request #279 from bidoubiwa/new_slogan_and_resume
Slogan and Resume proposition
2019-11-13 14:41:21 +01:00
b9a185634f Slogan and Resume proposition 2019-11-13 14:31:22 +01:00
b46889b5f0 Merge pull request #282 from meilisearch/fix-ci-artifacts
Add the meilidb-http binary to the artifacts
2019-11-13 11:39:00 +01:00
ef9a0c07db Add the meilidb-http binary to the artifacts 2019-11-13 11:15:39 +01:00
3a6f3947c9 Merge pull request #281 from meilisearch/fix-attributes-to-search-in
Take attributes to search in into account
2019-11-12 18:45:40 +01:00
5c5f41d755 Take attributes to search in into account 2019-11-12 18:35:58 +01:00
6803a8fad0 Merge pull request #280 from meilisearch/format-updates-json
Format updates json
2019-11-12 18:35:25 +01:00
8e4b362e4d Fixed the display of enqueued updates 2019-11-12 18:21:59 +01:00
acb5e624c6 Add enqueued and processed datetimes 2019-11-12 18:21:59 +01:00
a98949ff1d Improve updates JSON format 2019-11-12 16:57:22 +01:00
f355280250 Merge pull request #278 from meilisearch/mit-license
Change the license to an MIT one
2019-11-12 14:35:32 +01:00
cee8d6a8d9 Change the license to an MIT one 2019-11-12 14:24:28 +01:00
27326ea069 Merge pull request #277 from bidoubiwa/add_cmd_to_compile
Add cmd line to compile binary
2019-11-12 13:55:54 +01:00
7bbe5aca5b Add cmd line to compile binary 2019-11-12 10:57:03 +01:00
1c4afe6d0f Merge pull request #276 from meilisearch/support-slash-tokenizer
Add support for back/slashes
2019-11-11 21:46:14 +01:00
2d8f9a9849 Add support for back/slashes 2019-11-11 21:23:08 +01:00
3f41681b18 Merge pull request #274 from meilisearch/enable-env-logger
Add env logger to enable logging
2019-11-11 19:13:33 +01:00
64791815fa Add env logger to enable logging 2019-11-11 19:03:38 +01:00
8a36571a74 Merge pull request #272 from meilisearch/fix-long-words
Ignore words that are too long
2019-11-10 20:07:22 +01:00
d18e775bec Ignore words that are too long 2019-11-10 17:44:27 +01:00
78381f1818 Merge pull request #271 from meilisearch/update-dependencies
Update Dependencies
2019-11-10 11:17:09 +01:00
7f33a01ae1 Update dependencies 2019-11-10 11:04:56 +01:00
d07d14d33a Update crossbeam-channel to 0.4.0 2019-11-10 11:03:22 +01:00
540d7886ab Merge pull request #266 from meilisearch/update-readme
Update the readme and add a Quick Start section
2019-11-09 13:21:22 +01:00
5a5d10af52 Add an image description of the gif 2019-11-09 13:12:01 +01:00
f95d077ef8 Improve the README a little bit by adding a quick start section 2019-11-09 13:12:01 +01:00
05dd99936f Add a gif to show a demo using crates.io 2019-11-09 12:59:39 +01:00
c086625773 Merge pull request #269 from meilisearch/repo-became-binary
Make the repository be a binary and version the Cargo.lock
2019-11-09 12:58:52 +01:00
dc17bebf4a Make the repository be a binary and version the Cargo.lock 2019-11-09 12:13:28 +01:00
026464b2e4 Bump meilidb-core to v0.6.5 2019-11-06 11:52:34 +01:00
bd42158a70 Merge pull request #264 from meilisearch/index-soft-deletion
Index soft deletion
2019-11-06 11:51:50 +01:00
df066f4321 Introduce a new add or update documents PUT route 2019-11-06 11:42:41 +01:00
69832e8c70 Update the http index deletion route 2019-11-06 11:42:41 +01:00
95eb6ad09a Add a test to check index soft deletion works correctly 2019-11-06 11:02:30 +01:00
f3fc0bed45 Introduce index soft deletion 2019-11-06 11:02:30 +01:00
5dd6b697b9 Bump meilidb-core to v0.6.4 2019-11-05 18:46:16 +01:00
b7d170c7d1 Merge pull request #262 from meilisearch/fix-unidecoded-emojis
Fix an highlighting problem
2019-11-05 17:04:35 +01:00
7541172d12 Make the example show highlighted areas more explicitly 2019-11-05 16:40:48 +01:00
85bf5d113c Fix an highlighting problem when query was longer than original text 2019-11-05 16:40:34 +01:00
89fd397903 Bump meilidb-core to v0.6.3 2019-11-05 15:40:04 +01:00
d8392f2f18 Merge pull request #261 from meilisearch/partial-updates
Introduce the support of partial updates
2019-11-05 15:39:02 +01:00
36b74f0efe Introduce partial updates to the update system 2019-11-05 15:23:41 +01:00
68c0a36b00 Make the deserialization support correctly optional documents 2019-11-05 15:03:18 +01:00
a127b72a74 Merge pull request #259 from meilisearch/allow-add-schema-attributes-at-end
Allow to introduce attributes only at the end of a schema
2019-11-05 12:34:11 +01:00
5782fb9e52 Test the add of attributes only at the end of a schema 2019-11-05 12:09:52 +01:00
20319f7974 Allow to introduce attributes only at the end of a schema 2019-11-05 12:09:52 +01:00
c4087e2ec2 Merge pull request #258 from meilisearch/debug-schema
Implement a better debug for the schema
2019-11-05 11:35:02 +01:00
b1d1f2f627 Implement a better debug system for the schema 2019-11-05 11:21:07 +01:00
62fe6a8263 Merge pull request #257 from meilisearch/bump-version
Bump meilidb-core/tokenizer versions
2019-11-04 17:26:01 +01:00
d88c10f3b4 Bump meilidb-tokenizer to v0.6.1 2019-11-04 17:17:06 +01:00
00f49990c7 Bump meilidb-core to v0.6.2 2019-11-04 17:16:50 +01:00
89f30ad47e Merge pull request #256 from meilisearch/fix-tokenizer
Fix the tokenizer to make it work with unicode chars
2019-11-04 17:15:17 +01:00
3b1cbed238 Check that the unidecoded words are not empty 2019-11-04 17:03:11 +01:00
4571b80a49 Update the tests 2019-11-04 16:41:58 +01:00
de2b8672d4 Make the tokenizer understand strange whitespaces/quotes 2019-11-04 16:41:58 +01:00
ccded7b429 Improve the indexer to not not deunicode before indexing
Revert of #179
2019-11-04 16:41:58 +01:00
1d4e98410a Merge pull request #255 from meilisearch/bump-version
Bump meilidb-core to v0.6.1
2019-11-04 14:47:53 +01:00
e493b27ef1 Bump meilidb-core to v0.6.1 2019-11-04 14:22:08 +01:00
70589c136f Merge pull request #253 from meilisearch/fix-updates-system
Fix the updates system
2019-11-04 13:46:37 +01:00
1c3620a7d4 Add tests to the update system 2019-11-04 13:18:07 +01:00
c2cc0704d7 Clean up the update_awaiter function 2019-11-04 11:11:58 +01:00
2a50e08bb8 Moving to heed v0.5.0 2019-11-04 10:49:27 +01:00
6b326a45d7 Fix the update system to always consume updates even if failing 2019-10-31 17:44:13 +01:00
b73874bf24 Merge pull request #252 from meilisearch/examples-specify-index-name
Allow users to specify the index name to use with examples bins
2019-10-31 17:02:00 +01:00
95c8ad0f80 Allow users to specify the index name to use with examples bins 2019-10-31 16:20:31 +01:00
996763cc52 Merge pull request #251 from meilisearch/update-heed
Moving to heed 0.3.0
2019-10-31 16:20:07 +01:00
6a8171d335 Moving to heed 0.3.0 2019-10-31 16:11:02 +01:00
2f32586dab Merge pull request #250 from meilisearch/new-http-server
Introduce a brand new HTTP server
2019-10-31 16:07:52 +01:00
db898001eb Get rid of rust-crypto and uuid 2019-10-31 15:28:37 +01:00
c2a12b661a Make it a runnable server 2019-10-31 15:27:21 +01:00
f51c49db93 Introduce the HTTP tide based library 2019-10-31 15:02:34 +01:00
1be5b0f327 Bump the meili-core/schema/tokenizer crates to 0.6.0 2019-10-31 14:05:59 +01:00
a136c62208 Merge pull request #249 from meilisearch/display-all-updates
Display enqueued along with processed updates
2019-10-31 13:53:46 +01:00
cc461b1331 Display enqueued along with processed updates 2019-10-31 12:25:52 +01:00
dbe5363672 Merge pull request #248 from meilisearch/fix-highlight-too-long
Correctly highlight when query string is too long
2019-10-30 18:19:06 +01:00
45d4361e7d Correctly highlight when query string is longer 2019-10-30 17:49:50 +01:00
b28c44cc6b Merge pull request #247 from meilisearch/bump-meilidb
Bump the meili-core/schema/tokenizer crates to 0.5.11
2019-10-30 17:48:26 +01:00
b709a7a30a Bump the meili-core/schema/tokenizer crates to 0.5.11 2019-10-30 17:40:31 +01:00
64c25bdb40 Merge pull request #246 from meilisearch/better-highlighting-area
Make the highlight system much better
2019-10-30 17:39:12 +01:00
c230f244be Make the highlight system much better 2019-10-30 17:32:29 +01:00
02af4ff113 Merge pull request #245 from meilisearch/reindex-all-documents-reduce-memory-usage
Reduce the ram consumption when re-indexing all the documents
2019-10-29 17:54:47 +01:00
4dff8a215e Reduce the ram consumption when re-indexing all the documents 2019-10-29 17:46:23 +01:00
41065305aa Merge pull request #244 from meilisearch/reintroduce-stop-words
Reintroduce stop words
2019-10-29 16:35:03 +01:00
e9dce3ce81 Add a test to ensure that the indexer support stop words 2019-10-29 16:18:06 +01:00
ff7dde7522 Make the RawIndexer support stop words 2019-10-29 16:18:06 +01:00
a226fd23c3 Introduce the stop words deletion update type 2019-10-29 16:18:06 +01:00
776673ebae Introduce the stop words addition update type 2019-10-29 15:24:09 +01:00
32d2cc3aea Merge pull request #243 from meilisearch/all-updates-results
Introduce a function to get all updates results
2019-10-29 11:45:55 +01:00
8a17fcdda5 Introduce a function to get all updates results 2019-10-29 11:37:40 +01:00
9602d7a960 Merge pull request #242 from meilisearch/accept-dup-documents
Make documents additions accept only the last duplicate document
2019-10-28 20:52:40 +01:00
ac12a4b9c9 Make documents additions accept only the last duplicate document 2019-10-28 20:40:33 +01:00
af96050944 Merge pull request #241 from meilisearch/fix-dead-locks
Fix dead locks
2019-10-28 18:20:01 +01:00
a43b37dfc1 Send channel notification when clearing documents 2019-10-28 17:58:22 +01:00
c08dcac1d4 Abort the update transaction before calling the update callback 2019-10-28 17:55:43 +01:00
a17dccd84e Merge pull request #237 from meilisearch/fix-exactness-criterion
Fix the exactness criterion algorithm
2019-10-26 18:43:10 +02:00
9a57cab3ee Fix the exactness criterion algorithm 2019-10-26 18:34:40 +02:00
751b060320 Merge pull request #238 from meilisearch/improve-highlighting
Only highlight query words areas not the whole words
2019-10-26 18:23:19 +02:00
4111b99a6d Only highlight query words areas not the whole words 2019-10-26 15:56:34 +02:00
d6fb2b56d1 Merge pull request #236 from meilisearch/reorder-automatons
Make sure that automatons group with more automatons are better
2019-10-24 15:29:16 +02:00
cb5c77e536 Make sure that automatons group with more automatons are better 2019-10-24 15:18:53 +02:00
44c89b1ea2 Merge pull request #235 from meilisearch/readme-concat-split-query-words
Add information about search concat and split query words support
2019-10-23 18:20:59 +02:00
26a285053b Add information about search concat and split query words support 2019-10-23 18:19:15 +02:00
1446a6a2d2 Merge pull request #234 from meilisearch/clear-all-update-variant
Introduce a clear all documents update
2019-10-23 16:45:37 +02:00
047eba3ff3 Introduce a clear all documents update 2019-10-23 16:39:10 +02:00
8d9d183ce6 Merge pull request #233 from meilisearch/commit-when-update-ok
Commit an update only when it is Ok
2019-10-23 16:07:48 +02:00
eb67195840 Commit an update only when it is Ok 2019-10-23 15:52:40 +02:00
93306c2326 Merge pull request #232 from meilisearch/support-splitted-words
Support splitted words
2019-10-23 13:38:16 +02:00
7d9cf8d713 Clean up the fetch algorithm 2019-10-23 12:06:21 +02:00
03eb7898e7 Introduce a basic working version of phrase query for splitting words 2019-10-23 11:40:13 +02:00
0fbd4cd632 Merge pull request #231 from meilisearch/recursive-object-indexing
Make possible to convert recursive object into strings
2019-10-22 16:20:10 +02:00
858bf359b8 Make possible to convert recursive object into strings 2019-10-22 16:02:02 +02:00
5dc8465ebd Merge pull request #181 from meilisearch/diff-schema
Make possible to update an index schema
2019-10-22 14:23:43 +02:00
0f30a221fa Introduce the reindex_all_documents indexing function 2019-10-22 14:07:27 +02:00
e86a547e93 Introduce a basic schema diff function 2019-10-21 17:57:32 +02:00
32d8b4b83f Merge pull request #230 from meilisearch/moving-to-heed
Move to heed 0.1.0
2019-10-21 13:34:06 +02:00
78535b3e33 Move to heed 0.1.0 2019-10-21 12:05:53 +02:00
111 changed files with 30433 additions and 1658 deletions

5
.dockerignore Normal file
View File

@ -0,0 +1,5 @@
target
Dockerfile
.dockerignore
.git
.gitignore

11
.github/workflows/README.md vendored Normal file
View File

@ -0,0 +1,11 @@
# GitHub actions workflow for MeiliDB
> **Note:**
> - We do not use [cache](https://github.com/actions/cache) yet but we could use it to speed up CI
## Workflow
- On each pull request, we are triggering `cargo test`.
- On each commit on master, we are building the latest docker image.
- On each tag, we are building the tagged docker image and the binaries for MacOS & Ubuntu.

36
.github/workflows/publish-binaries.yml vendored Normal file
View File

@ -0,0 +1,36 @@
name: Publish binaries to GitHub release
on:
push:
tags:
- '*'
jobs:
publish:
name: Publish for ${{ matrix.os }}
runs-on: ${{ matrix.os }}
strategy:
matrix:
os: [ubuntu-latest, macos-latest]
include:
- os: ubuntu-latest
artifact_name: meilisearch
asset_name: meilisearch-linux-amd64
- os: macos-latest
artifact_name: meilisearch
asset_name: meilisearch-macos-amd64
steps:
- uses: hecrj/setup-rust-action@master
with:
rust-version: stable
- uses: actions/checkout@v1
- name: Build
run: cargo build --release --locked
- name: Upload binaries to release
uses: svenstaro/upload-release-action@v1-release
with:
repo_token: ${{ secrets.GITHUB_TOKEN }}
file: target/release/${{ matrix.artifact_name }}
asset_name: ${{ matrix.asset_name }}
tag: ${{ github.ref }}

View File

@ -0,0 +1,19 @@
---
on:
push:
branches:
- master
name: Publish latest image to Docker Hub
jobs:
build:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v1
- name: Publish to Registry
uses: elgohr/Publish-Docker-Github-Action@master
with:
name: getmeili/meilisearch
username: ${{ secrets.DOCKER_USERNAME }}
password: ${{ secrets.DOCKER_PASSWORD }}

View File

@ -0,0 +1,20 @@
---
on:
push:
tags:
- '*'
name: Publish tagged image to Docker Hub
jobs:
build:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v1
- name: Publish to Registry
uses: elgohr/Publish-Docker-Github-Action@master
with:
name: getmeili/meilisearch
username: ${{ secrets.DOCKER_USERNAME }}
password: ${{ secrets.DOCKER_PASSWORD }}
tag_names: true

21
.github/workflows/test.yml vendored Normal file
View File

@ -0,0 +1,21 @@
---
on: [pull_request]
name: Cargo test
jobs:
check:
name: MeiliSearch
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v1
- uses: actions-rs/toolchain@v1
with:
profile: minimal
toolchain: stable
override: true
- name: Run cargo test
uses: actions-rs/cargo@v1
with:
command: test
args: --locked

2
.gitignore vendored
View File

@ -1,7 +1,7 @@
/target /target
Cargo.lock
**/*.csv **/*.csv
**/*.json_lines **/*.json_lines
**/*.rs.bk **/*.rs.bk
/*.mdb /*.mdb
/query-history.txt /query-history.txt
/data.ms

2818
Cargo.lock generated Normal file

File diff suppressed because it is too large Load Diff

View File

@ -1,8 +1,10 @@
[workspace] [workspace]
members = [ members = [
"meilidb-core", "meilisearch-core",
"meilidb-schema", "meilisearch-http",
"meilidb-tokenizer", "meilisearch-schema",
"meilisearch-tokenizer",
"meilisearch-types",
] ]
[profile.release] [profile.release]

27
Dockerfile Normal file
View File

@ -0,0 +1,27 @@
# Compile
FROM alpine:3.10 AS compiler
RUN apk update --quiet
RUN apk add curl
RUN apk add build-base
RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y
WORKDIR /meilisearch
COPY . .
ENV RUSTFLAGS="-C target-feature=-crt-static"
RUN $HOME/.cargo/bin/cargo build --release
# Run
FROM alpine:3.10
RUN apk update --quiet
RUN apk add libgcc
COPY --from=compiler /meilisearch/target/release/meilisearch .
ENV MEILI_HTTP_ADDR 0.0.0.0:7700
CMD ./meilisearch

26
LICENSE
View File

@ -1,13 +1,21 @@
“Commons Clause” License Condition v1.0 MIT License
The Software is provided to you by the Licensor under the License, as defined below, subject to the following condition. Copyright (c) 2019-2020 Meili SAS
Without limiting other conditions in the License, the grant of rights under the License will not include, and the License does not grant to you, the right to Sell the Software. Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
For purposes of the foregoing, “Sell” means practicing any or all of the rights granted to you under the License to provide to third parties, for a fee or other consideration (including without limitation fees for hosting or consulting/ support services related to the Software), a product or service whose value derives, entirely or substantially, from the functionality of the Software. Any license notice or attribution required by the License must also include this Commons Clause License Condition notice. The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
Software: MeiliDB THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
License: MIT FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
Licensor: MEILI SAS LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

159
README.md
View File

@ -1,43 +1,133 @@
# MeiliDB # MeiliSearch
[![Build Status](https://dev.azure.com/thomas0884/thomas/_apis/build/status/meilisearch.MeiliDB?branchName=master)](https://dev.azure.com/thomas0884/thomas/_build/latest?definitionId=1&branchName=master) [![Build Status](https://github.com/meilisearch/MeiliSearch/workflows/Cargo%20test/badge.svg)](https://github.com/meilisearch/MeiliSearch/actions)
[![dependency status](https://deps.rs/repo/github/meilisearch/MeiliDB/status.svg)](https://deps.rs/repo/github/meilisearch/MeiliDB) [![dependency status](https://deps.rs/repo/github/meilisearch/MeiliSearch/status.svg)](https://deps.rs/repo/github/meilisearch/MeiliSearch)
[![License](https://img.shields.io/badge/license-commons%20clause-lightgrey)](https://commonsclause.com/) [![License](https://img.shields.io/badge/license-MIT-informational)](https://github.com/meilisearch/MeiliSearch/blob/master/LICENSE)
A _full-text search database_ based on the fast [LMDB key-value store](https://en.wikipedia.org/wiki/Lightning_Memory-Mapped_Database). ⚡ Ultra relevant and instant full-text search API 🔍
MeiliSearch is a powerful, fast, open-source, easy to use, and deploy search engine. The search and indexation are fully customizable and handles features like typo-tolerance, filters, and synonyms.
For more [details about those features, go to our documentation](https://docs.meilisearch.com/).
[![crates.io demo gif](misc/crates-io-demo.gif)](https://crates.meilisearch.com)
> Meili helps the Rust community find crates on [crates.meilisearch.com](https://crates.meilisearch.com)
## Features ## Features
* Search as-you-type experience (answers < 50ms)
* Full-text search
* Typo tolerant (understands typos and spelling mistakes)
* Supports Kanji
* Supports Synonym
* Easy to install, deploy, and maintain
* Whole documents returned
* Highly customizable
* RESTfull API
- Provides [6 default ranking criteria](https://github.com/meilisearch/MeiliDB/blob/dc5c42821e1340e96cb90a3da472264624a26326/meilidb-core/src/criterion/mod.rs#L107-L113) used to [bucket sort](https://en.wikipedia.org/wiki/Bucket_sort) documents ## Quick Start
- Accepts [custom criteria](https://github.com/meilisearch/MeiliDB/blob/dc5c42821e1340e96cb90a3da472264624a26326/meilidb-core/src/criterion/mod.rs#L24-L33) and can apply them in any custom order
- Support [ranged queries](https://github.com/meilisearch/MeiliDB/blob/dc5c42821e1340e96cb90a3da472264624a26326/meilidb-core/src/query_builder.rs#L283), useful for paginating results
- Can [distinct](https://github.com/meilisearch/MeiliDB/blob/dc5c42821e1340e96cb90a3da472264624a26326/meilidb-core/src/query_builder.rs#L265-L270) and [filter](https://github.com/meilisearch/MeiliDB/blob/dc5c42821e1340e96cb90a3da472264624a26326/meilidb-core/src/query_builder.rs#L246-L259) returned documents based on context defined rules
- Can store complete documents or only [user schema specified fields](https://github.com/meilisearch/MeiliDB/blob/dc5c42821e1340e96cb90a3da472264624a26326/meilidb-schema/src/lib.rs#L265-L279)
- The [default tokenizer](https://github.com/meilisearch/MeiliDB/blob/dc5c42821e1340e96cb90a3da472264624a26326/meilidb-tokenizer/src/lib.rs) can index latin and kanji based languages
- Returns [the matching text areas](https://github.com/meilisearch/MeiliDB/blob/dc5c42821e1340e96cb90a3da472264624a26326/meilidb-core/src/lib.rs#L66-L88), useful to highlight matched words in results
- Accepts query time search config like the [searchable attributes](https://github.com/meilisearch/MeiliDB/blob/dc5c42821e1340e96cb90a3da472264624a26326/meilidb-core/src/query_builder.rs#L272-L275)
- Supports [runtime incremental indexing](https://github.com/meilisearch/MeiliDB/blob/dc5c42821e1340e96cb90a3da472264624a26326/meilidb-core/src/store/mod.rs#L143-L173)
### Deploy the Server
```bash
# If you have the Rust toolchain already installed, you can compile from the source
git clone https://github.com/meilisearch/MeiliSearch.git
cd MeiliSearch
cargo run --release
It uses [LMDB](https://en.wikipedia.org/wiki/Lightning_Memory-Mapped_Database) as the internal key-value store. The key-value store allows us to handle updates and queries with small memory and CPU overheads. The whole ranking system is [data oriented](https://github.com/meilisearch/MeiliDB/issues/82) and provides great performances. # You can also use Docker
docker run -it -p 7700:7700 --rm getmeili/MeiliSearch
You can [read the deep dive](deep-dive.md) if you want more information on the engine, it describes the whole process of generating updates and handling queries or you can take a look at the [typos and ranking rules](typos-ranking-rules.md) if you want to know the default rules used to sort the documents. # You can also download the binary
curl -L https://install.meilisearch.com | sh
./meilisearch
```
We will be proud if you submit issues and pull requests. You can help to grow this project and start contributing by checking [issues tagged "good-first-issue"](https://github.com/meilisearch/MeiliDB/issues?q=is%3Aissue+is%3Aopen+label%3A%22good+first+issue%22). It is a good start! ### Create an Index and Upload Some Documents
The project is only a library yet. It means that there is no binary provided yet. To get started, you can check the examples wich are made to work with the data located in the `datasets/` folder. We provide a movie dataset that you can use for testing purposes.
MeiliDB will be a binary in a near future so you will be able to use it as a database out-of-the-box. We should be able to query it using HTTP. This is our current goal, [see the milestones](https://github.com/meilisearch/MeiliDB/milestones). In the end, the binary will be a bunch of network protocols and wrappers around the library - which will also be published on [crates.io](https://crates.io). Both the binary and the library will follow the same update cycle. ```bash
curl -L 'https://bit.ly/33MKvk4' -o movies.json
```
MeiliSearch can serve multiple indexes, with different kinds of documents,
therefore, it is required to create the index before sending documents to it.
```bash
curl -i -X POST 'http://127.0.0.1:7700/indexes' --data '{ "name": "Movies", "uid": "movies" }'
```
Now that the server knows about our brand new index, we can send it data.
We provided you a small dataset that is available in the `datasets/` directory.
```bash
curl -i -X POST 'http://127.0.0.1:7700/indexes/movies/documents' \
--header 'content-type: application/json' \
--data-binary @movies.json
```
### Search for Documents
The search engine is now aware of our documents and can serve those via our HTTP server again.
The [`jq` command-line tool](https://stedolan.github.io/jq/) can significantly help you read the server responses.
```bash
curl 'http://127.0.0.1:7700/indexes/movies/search?q=botman+robin&limit=2' | jq
```
```json
{
"hits": [
{
"id": "415",
"title": "Batman & Robin",
"poster": "https://image.tmdb.org/t/p/w1280/79AYCcxw3kSKbhGpx1LiqaCAbwo.jpg",
"overview": "Along with crime-fighting partner Robin and new recruit Batgirl...",
"release_date": "1997-06-20",
},
{
"id": "411736",
"title": "Batman: Return of the Caped Crusaders",
"poster": "https://image.tmdb.org/t/p/w1280/GW3IyMW5Xgl0cgCN8wu96IlNpD.jpg",
"overview": "Adam West and Burt Ward returns to their iconic roles of Batman and Robin...",
"release_date": "2016-10-08",
}
],
"offset": 0,
"limit": 2,
"processingTimeMs": 1,
"query": "botman robin"
}
```
### Documentation
Now, that you have a running MeiliSearch, you can learn more and tune your search engine using [the documentation](https://docs.meilisearch.com).
## How it works
MeiliSearch uses [LMDB](https://en.wikipedia.org/wiki/Lightning_Memory-Mapped_Database) as the internal key-value store. The key-value store allows us to handle updates and queries with small memory and CPU overheads. The whole ranking system is [data oriented](https://github.com/meilisearch/MeiliSearch/issues/82) and provides great performances.
You can [read the deep dive](deep-dive.md) if you want more information on the engine; it describes the whole process of generating updates and handling queries. Also, you can take a look at the [typos and ranking rules](typos-ranking-rules.md) if you want to know the default rules used to sort the documents.
### Technical features
- Provides [6 default ranking criteria](https://github.com/meilisearch/MeiliSearch/blob/3ea5aa18a209b6973b921542d46a79e1c753c163/meilisearch-core/src/criterion/mod.rs#L106-L111) used to [bucket sort](https://en.wikipedia.org/wiki/Bucket_sort) documents
- Accepts [custom criteria](https://github.com/meilisearch/MeiliSearch/blob/3ea5aa18a209b6973b921542d46a79e1c753c163/meilisearch-core/src/criterion/mod.rs#L20-L29) and can apply them in any custom order
- Support [ranged queries](https://github.com/meilisearch/MeiliSearch/blob/3ea5aa18a209b6973b921542d46a79e1c753c163/meilisearch-core/src/query_builder.rs#L342), useful for paginating results
- Can [distinct](https://github.com/meilisearch/MeiliSearch/blob/3ea5aa18a209b6973b921542d46a79e1c753c163/meilisearch-core/src/query_builder.rs#L324-L329) and [filter](https://github.com/meilisearch/MeiliSearch/blob/3ea5aa18a209b6973b921542d46a79e1c753c163/meilisearch-core/src/query_builder.rs#L313-L318) returned documents based on context defined rules
- Searches for [concatenated](https://github.com/meilisearch/MeiliSearch/pull/164) and [splitted query words](https://github.com/meilisearch/MeiliSearch/pull/232) to improve the search quality.
- Can store complete documents or only [user schema specified fields](https://github.com/meilisearch/MeiliSearch/blob/3ea5aa18a209b6973b921542d46a79e1c753c163/datasets/movies/schema.toml)
- The [default tokenizer](https://github.com/meilisearch/MeiliSearch/blob/3ea5aa18a209b6973b921542d46a79e1c753c163/meilisearch-tokenizer/src/lib.rs) can index latin and kanji based languages
- Returns [the matching text areas](https://github.com/meilisearch/MeiliSearch/blob/3ea5aa18a209b6973b921542d46a79e1c753c163/meilisearch-types/src/lib.rs#L49-L65), useful to highlight matched words in results
- Accepts query time search config like the [searchable attributes](https://github.com/meilisearch/MeiliSearch/blob/3ea5aa18a209b6973b921542d46a79e1c753c163/meilisearch-core/src/query_builder.rs#L331-L336)
- Supports [runtime incremental indexing](https://github.com/meilisearch/MeiliSearch/blob/3ea5aa18a209b6973b921542d46a79e1c753c163/meilisearch-core/src/store/mod.rs#L143-L212)
## Performances ## Performances
With a database composed of _100 353_ documents with _352_ attributes each and _3_ of them indexed. With a dataset composed of _100 353_ documents with _352_ attributes each and _3_ of them indexed.
So more than _300 000_ fields indexed for _35 million_ stored we can handle more than _2.8k req/sec_ with an average response time of _9 ms_ on an Intel i7-7700 (8) @ 4.2GHz. So more than _300 000_ fields indexed for _35 million_ stored we can handle more than _2.8k req/sec_ with an average response time of _9 ms_ on an Intel i7-7700 (8) @ 4.2GHz.
Requests are made using [wrk](https://github.com/wg/wrk) and scripted to simulate real users queries. Requests are made using [wrk](https://github.com/wg/wrk) and scripted to simulate real users' queries.
``` ```
Running 10s test @ http://localhost:2230 Running 10s test @ http://localhost:2230
@ -50,29 +140,20 @@ Requests/sec: 2806.46
Transfer/sec: 759.17KB Transfer/sec: 759.17KB
``` ```
We also indexed a dataset containing something like _12 millions_ cities names in _24 minutes_ on a machine with _8 cores_, _64 GB of RAM_, and a _300 GB NMVe_ SSD.<br/>
The resulting database was _16 GB_ and search results were between _30 ms_ and _4 seconds_ for short prefix queries.
### Notes ### Notes
With Rust 1.32 the allocator has been [changed to use the system allocator](https://blog.rust-lang.org/2019/01/17/Rust-1.32.0.html#jemalloc-is-removed-by-default). With Rust 1.32 the allocator has been [changed to use the system allocator](https://blog.rust-lang.org/2019/01/17/Rust-1.32.0.html#jemalloc-is-removed-by-default).
We have seen much better performances when [using jemalloc as the global allocator](https://github.com/alexcrichton/jemallocator#documentation). We have seen much better performances when [using jemalloc as the global allocator](https://github.com/alexcrichton/jemallocator#documentation).
## Usage and examples ## Contributing
Currently MeiliDB do not provide an http server but you can run the example binary. We will be glad if you submit issues and pull requests. You can help to grow this project and start contributing by checking [issues tagged "good-first-issue"](https://github.com/meilisearch/MeiliSearch/issues?q=is%3Aissue+is%3Aopen+label%3A%22good+first+issue%22). It is a good start!
The _index_ subcommand has been made to create an index and inject documents into it. Using the command line below, the index will be named _movies_ and the _19 700_ movies of the `datasets/` will be injected in MeiliDB. ### Analytic Events
```bash We send events to our Amplitude instance to be aware of the number of people who use MeiliSearch.<br/>
cargo run --release --example from_file -- \ We only send the platform on which the server runs once by day. No other information is sent.<br/>
index example.mdb datasets/movies/data.csv \ If you do not want us to send events, you can disable these analytics by using the `MEILI_NO_ANALYTICS` env variable.
--schema datasets/movies/schema.toml
```
Once the first command is done, you can query the freshly created _movies_ index using the _search_ subcomand. In this example we filtered the dataset to only show _non-adult_ movies using the non-definitive `!adult` syntax filter.
```bash
cargo run --release --example from_file -- \
search example.mdb
--number 4 \
--filter '!adult' \
id popularity adult original_title
```

View File

@ -1,52 +0,0 @@
---
trigger:
branches:
include: [ master ]
pr: [ master ]
jobs:
- job: test
pool:
vmImage: 'Ubuntu 16.04'
container: tpayet/chiquitita:latest
steps:
- script: |
curl https://sh.rustup.rs -sSf | sh -s -- -y --default-toolchain nightly
$HOME/.cargo/bin/rustup component add rustfmt
displayName: 'Install rustc and components'
- script: |
$HOME/.cargo/bin/cargo check
displayName: 'Check MeiliDB'
- script: |
$HOME/.cargo/bin/cargo test
displayName: 'Test MeiliDB'
- script: |
$HOME/.cargo/bin/cargo fmt --all -- --check
displayName: 'Fmt MeiliDB'
- job: build
dependsOn:
- test
condition: succeeded()
pool:
vmImage: 'Ubuntu 16.04'
container: tpayet/chiquitita:latest
steps:
- script: |
curl https://sh.rustup.rs -sSf | sh -s -- -y --default-toolchain nightly
$HOME/.cargo/bin/rustup component add rustfmt
displayName: 'Install rustc and components'
- script: |
$HOME/.cargo/bin/cargo build --release
displayName: 'Build MeiliDB'
- task: CopyFiles@2
inputs:
contents: '$(System.DefaultWorkingDirectory)/target/release/libmeilidb.rlib'
targetFolder: $(Build.ArtifactStagingDirectory)
displayName: 'Copy build'
- task: PublishBuildArtifacts@1
inputs:
artifactName: libmeilidb.rlib
displayName: 'Upload artifacts'

View File

@ -1 +1 @@
_datas in movies.csv are from https://www.themoviedb.org/_ _datas in movies.csv are from https://www.themoviedb.org/_

19655
datasets/movies/movies.json Normal file

File diff suppressed because it is too large Load Diff

View File

@ -1,8 +1,8 @@
# A deep dive in MeiliDB # A deep dive in MeiliSearch
On the 15 of May 2019. On the 15 of May 2019.
MeiliDB is a full text search engine based on a final state transducer named [fst](https://github.com/BurntSushi/fst) and a key-value store named [sled](https://github.com/spacejam/sled). The goal of a search engine is to store data and to respond to queries as accurate and fast as possible. To achieve this it must save the matching words in an [inverted index](https://en.wikipedia.org/wiki/Inverted_index). MeiliSearch is a full text search engine based on a final state transducer named [fst](https://github.com/BurntSushi/fst) and a key-value store named [sled](https://github.com/spacejam/sled). The goal of a search engine is to store data and to respond to queries as accurate and fast as possible. To achieve this it must save the matching words in an [inverted index](https://en.wikipedia.org/wiki/Inverted_index).
<!-- MarkdownTOC autolink="true" --> <!-- MarkdownTOC autolink="true" -->
@ -22,7 +22,7 @@ MeiliDB is a full text search engine based on a final state transducer named [fs
## Where is the data stored? ## Where is the data stored?
MeiliDB is entirely backed by a key-value store like any good database (i.e. Postgres, MySQL). This brings a great flexibility in the way documents can be stored and updates handled along time. MeiliSearch is entirely backed by a key-value store like any good database (i.e. Postgres, MySQL). This brings a great flexibility in the way documents can be stored and updates handled along time.
[sled will brings some](https://github.com/spacejam/sled/tree/434533332a3f485e6d2e467023be0a0b55d3a1af#plans) of the [A.C.I.D. properties](https://en.wikipedia.org/wiki/ACID_(computer_science)) to help us be sure the saved data is consistent. [sled will brings some](https://github.com/spacejam/sled/tree/434533332a3f485e6d2e467023be0a0b55d3a1af#plans) of the [A.C.I.D. properties](https://en.wikipedia.org/wiki/ACID_(computer_science)) to help us be sure the saved data is consistent.
@ -34,7 +34,7 @@ It contain the inverted word index, the schema and the documents fields.
### The inverted word index ### The inverted word index
[The inverted word index](https://github.com/meilisearch/MeiliDB/blob/3db823de002243004612e36a19b4578d800dab97/meilidb-data/src/database/words_index.rs) is a sled Tree dedicated to store and give access to all documents that contains a specific word. The information stored under the word is simply a big ordered array of where in the document the word has been found. In other word, a big list of [`DocIndex`](https://github.com/meilisearch/MeiliDB/blob/3db823de002243004612e36a19b4578d800dab97/meilidb-core/src/lib.rs#L35-L51). [The inverted word index](https://github.com/meilisearch/MeiliSearch/blob/3db823de002243004612e36a19b4578d800dab97/meilisearch-data/src/database/words_index.rs) is a sled Tree dedicated to store and give access to all documents that contains a specific word. The information stored under the word is simply a big ordered array of where in the document the word has been found. In other word, a big list of [`DocIndex`](https://github.com/meilisearch/MeiliSearch/blob/3db823de002243004612e36a19b4578d800dab97/meilisearch-core/src/lib.rs#L35-L51).
#### A final state transducer #### A final state transducer
@ -42,27 +42,27 @@ _...also abbreviated fst_
This is the first entry point of the engine, you can read more about how it work with the beautiful blog post of @BurntSushi, [Index 1,600,000,000 Keys with Automata and Rust](https://blog.burntsushi.net/transducers/). This is the first entry point of the engine, you can read more about how it work with the beautiful blog post of @BurntSushi, [Index 1,600,000,000 Keys with Automata and Rust](https://blog.burntsushi.net/transducers/).
To make it short it is a powerful way to store all the words that are present in the indexed documents. You construct it by giving it all the words you want to index. When you want to search in it you can provide any automaton you want, in MeiliDB [a custom levenshtein automaton](https://github.com/tantivy-search/levenshtein-automata/) is used. To make it short it is a powerful way to store all the words that are present in the indexed documents. You construct it by giving it all the words you want to index. When you want to search in it you can provide any automaton you want, in MeiliSearch [a custom levenshtein automaton](https://github.com/tantivy-search/levenshtein-automata/) is used.
#### Document indexes #### Document indexes
The `fst` will only return the words that match with the search automaton but the goal of the search engine is to retrieve all matches in all the documents when a query is made. You want it to return some sort of position in an attribute in a document, an information about where the given word matched. The `fst` will only return the words that match with the search automaton but the goal of the search engine is to retrieve all matches in all the documents when a query is made. You want it to return some sort of position in an attribute in a document, an information about where the given word matched.
To make it possible we retrieve all of the `DocIndex` corresponding to all the matching words in the fst, we use the [`WordsIndex`](https://github.com/meilisearch/MeiliDB/blob/3db823de002243004612e36a19b4578d800dab97/meilidb-data/src/database/words_index.rs#L11-L21) Tree to get the `DocIndexes` corresponding the words. To make it possible we retrieve all of the `DocIndex` corresponding to all the matching words in the fst, we use the [`WordsIndex`](https://github.com/meilisearch/MeiliSearch/blob/3db823de002243004612e36a19b4578d800dab97/meilisearch-data/src/database/words_index.rs#L11-L21) Tree to get the `DocIndexes` corresponding the words.
### The schema ### The schema
The schema is a data structure that represents which documents attributes should be stored and which should be indexed. It is stored under a the [`MainIndex`](https://github.com/meilisearch/MeiliDB/blob/3db823de002243004612e36a19b4578d800dab97/meilidb-data/src/database/main_index.rs#L12) Tree and given to MeiliDB only at the creation of an index. The schema is a data structure that represents which documents attributes should be stored and which should be indexed. It is stored under a the [`MainIndex`](https://github.com/meilisearch/MeiliSearch/blob/3db823de002243004612e36a19b4578d800dab97/meilisearch-data/src/database/main_index.rs#L12) Tree and given to MeiliSearch only at the creation of an index.
Each document attribute is associated to a unique 16 bit number named [`SchemaAttr`](https://github.com/meilisearch/MeiliDB/blob/3db823de002243004612e36a19b4578d800dab97/meilidb-data/src/schema.rs#L186). Each document attribute is associated to a unique 16 bit number named [`SchemaAttr`](https://github.com/meilisearch/MeiliSearch/blob/3db823de002243004612e36a19b4578d800dab97/meilisearch-data/src/schema.rs#L186).
In the future, this schema type could be given along with updates, the database could be able to handled a new schema and reindex the database according to the new one. In the future, this schema type could be given along with updates, the database could be able to handled a new schema and reindex the database according to the new one.
### Document attributes ### Document attributes
When the engine handle a query the result that the requester want is a document, not only the [`Matches`](https://github.com/meilisearch/MeiliDB/blob/3db823de002243004612e36a19b4578d800dab97/meilidb-core/src/lib.rs#L62-L88) associated to it, fields of the original document must be returned too. When the engine handle a query the result that the requester want is a document, not only the [`Matches`](https://github.com/meilisearch/MeiliSearch/blob/3db823de002243004612e36a19b4578d800dab97/meilisearch-core/src/lib.rs#L62-L88) associated to it, fields of the original document must be returned too.
So MeiliDB again uses the power of the underlying key-value store and save the documents attributes marked as _STORE_ in the schema. The dedicated Tree for this information is the [`DocumentsIndex`](https://github.com/meilisearch/MeiliDB/blob/3db823de002243004612e36a19b4578d800dab97/meilidb-data/src/database/documents_index.rs#L11). So MeiliSearch again uses the power of the underlying key-value store and save the documents attributes marked as _STORE_ in the schema. The dedicated Tree for this information is the [`DocumentsIndex`](https://github.com/meilisearch/MeiliSearch/blob/3db823de002243004612e36a19b4578d800dab97/meilisearch-data/src/database/documents_index.rs#L11).
When a document field is saved in the key-value store its value is binary encoded using [message pack](https://github.com/3Hren/msgpack-rust), so a document must be serializable using serde. When a document field is saved in the key-value store its value is binary encoded using [message pack](https://github.com/3Hren/msgpack-rust), so a document must be serializable using serde.
@ -70,26 +70,26 @@ When a document field is saved in the key-value store its value is binary encode
## How is a request processed? ## How is a request processed?
Now that we have our inverted index we are able to return results based on a query. In the MeiliDB universe a query is a simple string containing words. Now that we have our inverted index we are able to return results based on a query. In the MeiliSearch universe a query is a simple string containing words.
### Query lexemes ### Query lexemes
The first step to be able to call the underlying structures is to split the query in words, for that we use a [custom tokenizer](https://github.com/meilisearch/MeiliDB/blob/3db823de002243004612e36a19b4578d800dab97/meilidb-tokenizer/src/lib.rs#L82-L84). Note that a tokenizer is specialized for a human language, this is the hard part. The first step to be able to call the underlying structures is to split the query in words, for that we use a [custom tokenizer](https://github.com/meilisearch/MeiliSearch/blob/3db823de002243004612e36a19b4578d800dab97/meilisearch-tokenizer/src/lib.rs#L82-L84). Note that a tokenizer is specialized for a human language, this is the hard part.
### Automatons and query index ### Automatons and query index
So to query the fst we need an automaton, in MeiliDB we use a [levenshtein automaton](https://en.wikipedia.org/wiki/Levenshtein_automaton), this automaton is constructed using a string and a maximum distance. According to the [Algolia's blog post](https://blog.algolia.com/inside-the-algolia-engine-part-3-query-processing/#algolia%e2%80%99s-way-of-searching-for-alternatives) we [created the DFAs](https://github.com/meilisearch/MeiliDB/blob/3db823de002243004612e36a19b4578d800dab97/meilidb-core/src/automaton.rs#L59-L78) with different settings. So to query the fst we need an automaton, in MeiliSearch we use a [levenshtein automaton](https://en.wikipedia.org/wiki/Levenshtein_automaton), this automaton is constructed using a string and a maximum distance. According to the [Algolia's blog post](https://blog.algolia.com/inside-the-algolia-engine-part-3-query-processing/#algolia%e2%80%99s-way-of-searching-for-alternatives) we [created the DFAs](https://github.com/meilisearch/MeiliSearch/blob/3db823de002243004612e36a19b4578d800dab97/meilisearch-core/src/automaton.rs#L59-L78) with different settings.
Thanks to the power of the fst library [it is possible to union multiple automatons](https://docs.rs/fst/0.3.2/fst/map/struct.OpBuilder.html#method.union) on the same fst set. The `Stream` is able to return all the matching words. We use these words to find the whole list of `DocIndexes` associated. Thanks to the power of the fst library [it is possible to union multiple automatons](https://docs.rs/fst/0.3.2/fst/map/struct.OpBuilder.html#method.union) on the same fst set. The `Stream` is able to return all the matching words. We use these words to find the whole list of `DocIndexes` associated.
With all these informations it is possible [to reconstruct a list of all the `DocIndexes` associated](https://github.com/meilisearch/MeiliDB/blob/3db823de002243004612e36a19b4578d800dab97/meilidb-core/src/query_builder.rs#L103-L130) with the words queried. With all these informations it is possible [to reconstruct a list of all the `DocIndexes` associated](https://github.com/meilisearch/MeiliSearch/blob/3db823de002243004612e36a19b4578d800dab97/meilisearch-core/src/query_builder.rs#L103-L130) with the words queried.
### Sort by criteria ### Sort by criteria
Now that we are able to get a big list of [DocIndexes](https://github.com/Kerollmops/MeiliDB/blob/550dc1e99224e386516877450320f694947332d4/src/lib.rs#L21-L36) it is not enough to sort them by criteria, we need more informations like the levenshtein distance or the fact that a query word match exactly the word stored in the fst. So [we stuff it a little bit](https://github.com/Kerollmops/MeiliDB/blob/550dc1e99224e386516877450320f694947332d4/src/rank/query_builder.rs#L86-L93), and aggregate all these [Matches](https://github.com/Kerollmops/MeiliDB/blob/550dc1e99224e386516877450320f694947332d4/src/lib.rs#L47-L74) for each document. This way it will be easy to sort a simple vector of document using a bunch of functions. Now that we are able to get a big list of [DocIndexes](https://github.com/Kerollmops/MeiliSearch/blob/550dc1e99224e386516877450320f694947332d4/src/lib.rs#L21-L36) it is not enough to sort them by criteria, we need more informations like the levenshtein distance or the fact that a query word match exactly the word stored in the fst. So [we stuff it a little bit](https://github.com/Kerollmops/MeiliSearch/blob/550dc1e99224e386516877450320f694947332d4/src/rank/query_builder.rs#L86-L93), and aggregate all these [Matches](https://github.com/Kerollmops/MeiliSearch/blob/550dc1e99224e386516877450320f694947332d4/src/lib.rs#L47-L74) for each document. This way it will be easy to sort a simple vector of document using a bunch of functions.
With this big list of documents and associated matches [we are able to sort only the part of the slice that we want](https://github.com/meilisearch/MeiliDB/blob/3db823de002243004612e36a19b4578d800dab97/meilidb-core/src/query_builder.rs#L160-L188) using bucket sorting. [Each criterion](https://github.com/meilisearch/MeiliDB/blob/3db823de002243004612e36a19b4578d800dab97/meilidb-core/src/criterion/mod.rs#L95-L101) is evaluated on each subslice without copy, thanks to [GroupByMut](https://docs.rs/slice-group-by/0.2.4/slice_group_by/) which, I hope [will soon be merged](https://github.com/rust-lang/rfcs/pull/2477). With this big list of documents and associated matches [we are able to sort only the part of the slice that we want](https://github.com/meilisearch/MeiliSearch/blob/3db823de002243004612e36a19b4578d800dab97/meilisearch-core/src/query_builder.rs#L160-L188) using bucket sorting. [Each criterion](https://github.com/meilisearch/MeiliSearch/blob/3db823de002243004612e36a19b4578d800dab97/meilisearch-core/src/criterion/mod.rs#L95-L101) is evaluated on each subslice without copy, thanks to [GroupByMut](https://docs.rs/slice-group-by/0.2.4/slice_group_by/) which, I hope [will soon be merged](https://github.com/rust-lang/rfcs/pull/2477).
Note that it is possible to customize the criteria used by using the `QueryBuilder::with_criteria` constructor, this way you can implement some custom ranking based on the document attributes using the appropriate structure and the [`document` method](https://github.com/meilisearch/MeiliDB/blob/3db823de002243004612e36a19b4578d800dab97/meilidb-data/src/database/index.rs#L86). Note that it is possible to customize the criteria used by using the `QueryBuilder::with_criteria` constructor, this way you can implement some custom ranking based on the document attributes using the appropriate structure and the [`document` method](https://github.com/meilisearch/MeiliSearch/blob/3db823de002243004612e36a19b4578d800dab97/meilisearch-data/src/database/index.rs#L86).
At this point, MeiliDB work is over 🎉 At this point, MeiliSearch work is over 🎉

129
download-latest.sh Normal file
View File

@ -0,0 +1,129 @@
#!/bin/sh
# COLORS
RED="\033[31m"
GREEN="\033[32m"
DEFAULT="\033[0m"
# GLOBALS
REGEXP_SEMVER='v[^0-9]*\([0-9]*\)[.]\([0-9]*\)[.]\([0-9]*\)\([0-9A-Za-z-]*\)'
BINARY_NAME='meilisearch'
# semverParseInto and semverLT from https://github.com/cloudflare/semver_bash/blob/master/semver.sh
# usage: semverParseInto version major minor patch special
# version: the string version
# major, minor, patch, special: will be assigned by the function
semverParseInto() {
local RE='[^0-9]*\([0-9]*\)[.]\([0-9]*\)[.]\([0-9]*\)\([0-9A-Za-z-]*\)'
#MAJOR
eval $2=`echo $1 | sed -e "s#$RE#\1#"`
#MINOR
eval $3=`echo $1 | sed -e "s#$RE#\2#"`
#MINOR
eval $4=`echo $1 | sed -e "s#$RE#\3#"`
#SPECIAL
eval $5=`echo $1 | sed -e "s#$RE#\4#"`
}
# usage: semverLT version1 version2
semverLT() {
local MAJOR_A=0
local MINOR_A=0
local PATCH_A=0
local SPECIAL_A=0
local MAJOR_B=0
local MINOR_B=0
local PATCH_B=0
local SPECIAL_B=0
semverParseInto $1 MAJOR_A MINOR_A PATCH_A SPECIAL_A
semverParseInto $2 MAJOR_B MINOR_B PATCH_B SPECIAL_B
if [ $MAJOR_A -lt $MAJOR_B ]; then
return 0
fi
if [ $MAJOR_A -le $MAJOR_B ] && [ $MINOR_A -lt $MINOR_B ]; then
return 0
fi
if [ $MAJOR_A -le $MAJOR_B ] && [ $MINOR_A -le $MINOR_B ] && [ $PATCH_A -lt $PATCH_B ]; then
return 0
fi
if [ "_$SPECIAL_A" == "_" ] && [ "_$SPECIAL_B" == "_" ] ; then
return 1
fi
if [ "_$SPECIAL_A" == "_" ] && [ "_$SPECIAL_B" != "_" ] ; then
return 1
fi
if [ "_$SPECIAL_A" != "_" ] && [ "_$SPECIAL_B" == "_" ] ; then
return 0
fi
if [ "_$SPECIAL_A" < "_$SPECIAL_B" ]; then
return 0
fi
return 1
}
success_usage() {
printf "$GREEN%s\n$DEFAULT" "MeiliSearch binary successfully downloaded as '$BINARY_NAME' file."
echo ''
echo 'Run it:'
echo ' $ ./meilisearch'
echo 'Usage:'
echo ' $ ./meilisearch --help'
}
failure_usage() {
printf "$RED%s\n$DEFAULT" 'ERROR: MeiliSearch binary is not available for your OS distribution yet.'
echo ''
echo 'However, you can easily compile the binary from the source files.'
echo 'Follow the steps on the docs: https://docs.meilisearch.com/advanced_guides/binary.html#how-to-compile-meilisearch'
}
# OS DETECTION
echo 'Detecting OS distribution...'
os_name=$(uname -s)
if [ "$os_name" != "Darwin" ]; then
os_name=$(cat /etc/os-release | grep '^ID=' | tr -d '"' | cut -d '=' -f 2)
fi
echo "OS distribution detected: $os_name"
case "$os_name" in
'Darwin')
os='macos'
;;
'ubuntu' | 'debian')
os='linux'
;;
*)
failure_usage
exit 1
esac
# GET LATEST VERSION
tags=$(curl -s 'https://api.github.com/repos/meilisearch/MeiliSearch/tags' \
| grep "$REGEXP_SEMVER" \
| grep 'name' \
| tr -d '"' | tr -d ',' | cut -d 'v' -f 2)
latest=""
for tag in $tags; do
if [ "$latest" = "" ]; then
latest="$tag"
else
semverLT $tag $latest
if [ $? -eq 1 ]; then
latest="$tag"
fi
fi
done
# DOWNLOAD THE LATEST
echo "Downloading MeiliSearch binary v$latest for $os..."
release_file="meilisearch-$os-amd64"
link="https://github.com/meilisearch/MeiliSearch/releases/download/v$latest/$release_file"
curl -OL "$link"
mv "$release_file" "$BINARY_NAME"
chmod 744 "$BINARY_NAME"
success_usage

View File

@ -1,205 +0,0 @@
use std::collections::hash_map::{Entry, HashMap};
use std::fs::File;
use std::path::Path;
use std::sync::{Arc, RwLock};
use std::{fs, thread};
use crossbeam_channel::Receiver;
use log::{debug, error};
use zlmdb::types::{Str, Unit};
use zlmdb::{CompactionOption, Result as ZResult};
use crate::{store, update, Index, MResult};
pub type BoxUpdateFn = Box<dyn Fn(update::UpdateResult) + Send + Sync + 'static>;
type ArcSwapFn = arc_swap::ArcSwapOption<BoxUpdateFn>;
pub struct Database {
pub env: zlmdb::Env,
common_store: zlmdb::DynDatabase,
indexes_store: zlmdb::Database<Str, Unit>,
indexes: RwLock<HashMap<String, (Index, Arc<ArcSwapFn>, thread::JoinHandle<()>)>>,
}
fn update_awaiter(
receiver: Receiver<()>,
env: zlmdb::Env,
update_fn: Arc<ArcSwapFn>,
index: Index,
) {
for () in receiver {
// consume all updates in order (oldest first)
loop {
let mut writer = match env.write_txn() {
Ok(writer) => writer,
Err(e) => {
error!("LMDB writer transaction begin failed: {}", e);
break;
}
};
match update::update_task(&mut writer, index.clone()) {
Ok(Some(status)) => {
if let Err(e) = writer.commit() {
error!("update transaction failed: {}", e)
}
if let Some(ref callback) = *update_fn.load() {
(callback)(status);
}
}
// no more updates to handle for now
Ok(None) => {
debug!("no more updates");
writer.abort();
break;
}
Err(e) => {
error!("update task failed: {}", e);
writer.abort()
}
}
}
}
}
impl Database {
pub fn open_or_create(path: impl AsRef<Path>) -> MResult<Database> {
fs::create_dir_all(path.as_ref())?;
let env = zlmdb::EnvOpenOptions::new()
.map_size(10 * 1024 * 1024 * 1024) // 10GB
.max_dbs(3000)
.open(path)?;
let common_store = env.create_dyn_database(Some("common"))?;
let indexes_store = env.create_database::<Str, Unit>(Some("indexes"))?;
// list all indexes that needs to be opened
let mut must_open = Vec::new();
let reader = env.read_txn()?;
for result in indexes_store.iter(&reader)? {
let (index_name, _) = result?;
must_open.push(index_name.to_owned());
}
reader.abort();
// open the previously aggregated indexes
let mut indexes = HashMap::new();
for index_name in must_open {
let (sender, receiver) = crossbeam_channel::bounded(100);
let index = match store::open(&env, &index_name, sender.clone())? {
Some(index) => index,
None => {
log::warn!(
"the index {} doesn't exist or has not all the databases",
index_name
);
continue;
}
};
let update_fn = Arc::new(ArcSwapFn::empty());
let env_clone = env.clone();
let index_clone = index.clone();
let update_fn_clone = update_fn.clone();
let handle = thread::spawn(move || {
update_awaiter(receiver, env_clone, update_fn_clone, index_clone)
});
// send an update notification to make sure that
// possible pre-boot updates are consumed
sender.send(()).unwrap();
let result = indexes.insert(index_name, (index, update_fn, handle));
assert!(
result.is_none(),
"The index should not have been already open"
);
}
Ok(Database {
env,
common_store,
indexes_store,
indexes: RwLock::new(indexes),
})
}
pub fn open_index(&self, name: impl AsRef<str>) -> Option<Index> {
let indexes_lock = self.indexes.read().unwrap();
match indexes_lock.get(name.as_ref()) {
Some((index, ..)) => Some(index.clone()),
None => None,
}
}
pub fn create_index(&self, name: impl AsRef<str>) -> MResult<Index> {
let name = name.as_ref();
let mut indexes_lock = self.indexes.write().unwrap();
match indexes_lock.entry(name.to_owned()) {
Entry::Occupied(_) => Err(crate::Error::IndexAlreadyExists),
Entry::Vacant(entry) => {
let (sender, receiver) = crossbeam_channel::bounded(100);
let index = store::create(&self.env, name, sender)?;
let mut writer = self.env.write_txn()?;
self.indexes_store.put(&mut writer, name, &())?;
let env_clone = self.env.clone();
let index_clone = index.clone();
let no_update_fn = Arc::new(ArcSwapFn::empty());
let no_update_fn_clone = no_update_fn.clone();
let handle = thread::spawn(move || {
update_awaiter(receiver, env_clone, no_update_fn_clone, index_clone)
});
writer.commit()?;
entry.insert((index.clone(), no_update_fn, handle));
Ok(index)
}
}
}
pub fn set_update_callback(&self, name: impl AsRef<str>, update_fn: BoxUpdateFn) -> bool {
let indexes_lock = self.indexes.read().unwrap();
match indexes_lock.get(name.as_ref()) {
Some((_, current_update_fn, _)) => {
let update_fn = Some(Arc::new(update_fn));
current_update_fn.swap(update_fn);
true
}
None => false,
}
}
pub fn unset_update_callback(&self, name: impl AsRef<str>) -> bool {
let indexes_lock = self.indexes.read().unwrap();
match indexes_lock.get(name.as_ref()) {
Some((_, current_update_fn, _)) => {
current_update_fn.swap(None);
true
}
None => false,
}
}
pub fn copy_and_compact_to_path<P: AsRef<Path>>(&self, path: P) -> ZResult<File> {
self.env.copy_to_path(path, CompactionOption::Enabled)
}
pub fn indexes_names(&self) -> MResult<Vec<String>> {
let indexes = self.indexes.read().unwrap();
Ok(indexes.keys().cloned().collect())
}
pub fn common_store(&self) -> zlmdb::DynDatabase {
self.common_store
}
}

View File

@ -1,168 +0,0 @@
#[cfg(test)]
#[macro_use]
extern crate assert_matches;
mod automaton;
pub mod criterion;
mod database;
mod distinct_map;
mod error;
mod number;
mod query_builder;
mod ranked_map;
mod raw_document;
pub mod raw_indexer;
mod reordered_attrs;
pub mod serde;
pub mod store;
mod update;
pub use self::database::{BoxUpdateFn, Database};
pub use self::error::{Error, MResult};
pub use self::number::{Number, ParseNumberError};
pub use self::ranked_map::RankedMap;
pub use self::raw_document::RawDocument;
pub use self::store::Index;
pub use self::update::{UpdateResult, UpdateStatus, UpdateType};
use ::serde::{Deserialize, Serialize};
use zerocopy::{AsBytes, FromBytes};
/// Represent an internally generated document unique identifier.
///
/// It is used to inform the database the document you want to deserialize.
/// Helpful for custom ranking.
#[derive(
Debug,
Copy,
Clone,
Eq,
PartialEq,
PartialOrd,
Ord,
Hash,
Serialize,
Deserialize,
AsBytes,
FromBytes,
)]
#[repr(C)]
pub struct DocumentId(pub u64);
/// This structure represent the position of a word
/// in a document and its attributes.
///
/// This is stored in the map, generated at index time,
/// extracted and interpreted at search time.
#[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash, AsBytes, FromBytes)]
#[repr(C)]
pub struct DocIndex {
/// The document identifier where the word was found.
pub document_id: DocumentId,
/// The attribute in the document where the word was found
/// along with the index in it.
pub attribute: u16,
pub word_index: u16,
/// The position in bytes where the word was found
/// along with the length of it.
///
/// It informs on the original word area in the text indexed
/// without needing to run the tokenizer again.
pub char_index: u16,
pub char_length: u16,
}
/// This structure represent a matching word with informations
/// on the location of the word in the document.
///
/// The order of the field is important because it defines
/// the way these structures are ordered between themselves.
#[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub struct Highlight {
/// The attribute in the document where the word was found
/// along with the index in it.
pub attribute: u16,
/// The position in bytes where the word was found.
///
/// It informs on the original word area in the text indexed
/// without needing to run the tokenizer again.
pub char_index: u16,
/// The length in bytes of the found word.
///
/// It informs on the original word area in the text indexed
/// without needing to run the tokenizer again.
pub char_length: u16,
}
#[doc(hidden)]
#[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub struct TmpMatch {
pub query_index: u32,
pub distance: u8,
pub attribute: u16,
pub word_index: u16,
pub is_exact: bool,
}
#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub struct Document {
pub id: DocumentId,
pub highlights: Vec<Highlight>,
#[cfg(test)]
pub matches: Vec<TmpMatch>,
}
impl Document {
#[cfg(not(test))]
fn from_raw(raw: RawDocument) -> Document {
Document {
id: raw.id,
highlights: raw.highlights,
}
}
#[cfg(test)]
fn from_raw(raw: RawDocument) -> Document {
let len = raw.query_index().len();
let mut matches = Vec::with_capacity(len);
let query_index = raw.query_index();
let distance = raw.distance();
let attribute = raw.attribute();
let word_index = raw.word_index();
let is_exact = raw.is_exact();
for i in 0..len {
let match_ = TmpMatch {
query_index: query_index[i],
distance: distance[i],
attribute: attribute[i],
word_index: word_index[i],
is_exact: is_exact[i],
};
matches.push(match_);
}
Document {
id: raw.id,
matches,
highlights: raw.highlights,
}
}
}
#[cfg(test)]
mod tests {
use super::*;
use std::mem;
#[test]
fn docindex_mem_size() {
assert_eq!(mem::size_of::<DocIndex>(), 16);
}
}

View File

@ -1,101 +0,0 @@
use crate::RankedMap;
use meilidb_schema::Schema;
use std::sync::Arc;
use zlmdb::types::{ByteSlice, OwnedType, Serde, Str};
use zlmdb::Result as ZResult;
const CUSTOMS_KEY: &str = "customs-key";
const NUMBER_OF_DOCUMENTS_KEY: &str = "number-of-documents";
const RANKED_MAP_KEY: &str = "ranked-map";
const SCHEMA_KEY: &str = "schema";
const SYNONYMS_KEY: &str = "synonyms";
const WORDS_KEY: &str = "words";
#[derive(Copy, Clone)]
pub struct Main {
pub(crate) main: zlmdb::DynDatabase,
}
impl Main {
pub fn put_words_fst(self, writer: &mut zlmdb::RwTxn, fst: &fst::Set) -> ZResult<()> {
let bytes = fst.as_fst().as_bytes();
self.main.put::<Str, ByteSlice>(writer, WORDS_KEY, bytes)
}
pub fn words_fst(self, reader: &zlmdb::RoTxn) -> ZResult<Option<fst::Set>> {
match self.main.get::<Str, ByteSlice>(reader, WORDS_KEY)? {
Some(bytes) => {
let len = bytes.len();
let bytes = Arc::from(bytes);
let fst = fst::raw::Fst::from_shared_bytes(bytes, 0, len).unwrap();
Ok(Some(fst::Set::from(fst)))
}
None => Ok(None),
}
}
pub fn put_schema(self, writer: &mut zlmdb::RwTxn, schema: &Schema) -> ZResult<()> {
self.main
.put::<Str, Serde<Schema>>(writer, SCHEMA_KEY, schema)
}
pub fn schema(self, reader: &zlmdb::RoTxn) -> ZResult<Option<Schema>> {
self.main.get::<Str, Serde<Schema>>(reader, SCHEMA_KEY)
}
pub fn put_ranked_map(self, writer: &mut zlmdb::RwTxn, ranked_map: &RankedMap) -> ZResult<()> {
self.main
.put::<Str, Serde<RankedMap>>(writer, RANKED_MAP_KEY, &ranked_map)
}
pub fn ranked_map(self, reader: &zlmdb::RoTxn) -> ZResult<Option<RankedMap>> {
self.main
.get::<Str, Serde<RankedMap>>(reader, RANKED_MAP_KEY)
}
pub fn put_synonyms_fst(self, writer: &mut zlmdb::RwTxn, fst: &fst::Set) -> ZResult<()> {
let bytes = fst.as_fst().as_bytes();
self.main.put::<Str, ByteSlice>(writer, SYNONYMS_KEY, bytes)
}
pub fn synonyms_fst(self, reader: &zlmdb::RoTxn) -> ZResult<Option<fst::Set>> {
match self.main.get::<Str, ByteSlice>(reader, SYNONYMS_KEY)? {
Some(bytes) => {
let len = bytes.len();
let bytes = Arc::from(bytes);
let fst = fst::raw::Fst::from_shared_bytes(bytes, 0, len).unwrap();
Ok(Some(fst::Set::from(fst)))
}
None => Ok(None),
}
}
pub fn put_number_of_documents<F>(self, writer: &mut zlmdb::RwTxn, f: F) -> ZResult<u64>
where
F: Fn(u64) -> u64,
{
let new = self.number_of_documents(writer).map(f)?;
self.main
.put::<Str, OwnedType<u64>>(writer, NUMBER_OF_DOCUMENTS_KEY, &new)?;
Ok(new)
}
pub fn number_of_documents(self, reader: &zlmdb::RoTxn) -> ZResult<u64> {
match self
.main
.get::<Str, OwnedType<u64>>(reader, NUMBER_OF_DOCUMENTS_KEY)?
{
Some(value) => Ok(value),
None => Ok(0),
}
}
pub fn put_customs(self, writer: &mut zlmdb::RwTxn, customs: &[u8]) -> ZResult<()> {
self.main
.put::<Str, ByteSlice>(writer, CUSTOMS_KEY, customs)
}
pub fn customs<'txn>(self, reader: &'txn zlmdb::RoTxn) -> ZResult<Option<&'txn [u8]>> {
self.main.get::<Str, ByteSlice>(reader, CUSTOMS_KEY)
}
}

View File

@ -1,81 +0,0 @@
use super::BEU64;
use crate::update::Update;
use serde::{Deserialize, Serialize};
use std::borrow::Cow;
use zlmdb::types::OwnedType;
use zlmdb::{BytesDecode, BytesEncode, Result as ZResult};
pub struct SerdeJson<T>(std::marker::PhantomData<T>);
impl<T> BytesEncode for SerdeJson<T>
where
T: Serialize,
{
type EItem = T;
fn bytes_encode(item: &Self::EItem) -> Option<Cow<[u8]>> {
serde_json::to_vec(item).map(Cow::Owned).ok()
}
}
impl<'a, T: 'a> BytesDecode<'a> for SerdeJson<T>
where
T: Deserialize<'a> + Clone,
{
type DItem = T;
fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> {
serde_json::from_slice(bytes).ok()
}
}
#[derive(Copy, Clone)]
pub struct Updates {
pub(crate) updates: zlmdb::Database<OwnedType<BEU64>, SerdeJson<Update>>,
}
impl Updates {
// TODO do not trigger deserialize if possible
pub fn last_update_id(self, reader: &zlmdb::RoTxn) -> ZResult<Option<(u64, Update)>> {
match self.updates.last(reader)? {
Some((key, data)) => Ok(Some((key.get(), data))),
None => Ok(None),
}
}
// TODO do not trigger deserialize if possible
fn first_update_id(self, reader: &zlmdb::RoTxn) -> ZResult<Option<(u64, Update)>> {
match self.updates.first(reader)? {
Some((key, data)) => Ok(Some((key.get(), data))),
None => Ok(None),
}
}
// TODO do not trigger deserialize if possible
pub fn contains(self, reader: &zlmdb::RoTxn, update_id: u64) -> ZResult<bool> {
let update_id = BEU64::new(update_id);
self.updates.get(reader, &update_id).map(|v| v.is_some())
}
pub fn put_update(
self,
writer: &mut zlmdb::RwTxn,
update_id: u64,
update: &Update,
) -> ZResult<()> {
// TODO prefer using serde_json?
let update_id = BEU64::new(update_id);
self.updates.put(writer, &update_id, update)
}
pub fn pop_front(self, writer: &mut zlmdb::RwTxn) -> ZResult<Option<(u64, Update)>> {
match self.first_update_id(writer)? {
Some((update_id, update)) => {
let key = BEU64::new(update_id);
self.updates.delete(writer, &key)?;
Ok(Some((update_id, update)))
}
None => Ok(None),
}
}
}

View File

@ -1,37 +0,0 @@
use super::BEU64;
use crate::update::UpdateResult;
use zlmdb::types::{OwnedType, Serde};
use zlmdb::Result as ZResult;
#[derive(Copy, Clone)]
pub struct UpdatesResults {
pub(crate) updates_results: zlmdb::Database<OwnedType<BEU64>, Serde<UpdateResult>>,
}
impl UpdatesResults {
pub fn last_update_id(self, reader: &zlmdb::RoTxn) -> ZResult<Option<(u64, UpdateResult)>> {
match self.updates_results.last(reader)? {
Some((key, data)) => Ok(Some((key.get(), data))),
None => Ok(None),
}
}
pub fn put_update_result(
self,
writer: &mut zlmdb::RwTxn,
update_id: u64,
update_result: &UpdateResult,
) -> ZResult<()> {
let update_id = BEU64::new(update_id);
self.updates_results.put(writer, &update_id, update_result)
}
pub fn update_result(
self,
reader: &zlmdb::RoTxn,
update_id: u64,
) -> ZResult<Option<UpdateResult>> {
let update_id = BEU64::new(update_id);
self.updates_results.get(reader, &update_id)
}
}

View File

@ -1,194 +0,0 @@
use std::collections::{HashMap, HashSet};
use fst::{set::OpBuilder, SetBuilder};
use sdset::{duo::Union, SetOperation};
use serde::Serialize;
use crate::raw_indexer::RawIndexer;
use crate::serde::{extract_document_id, RamDocumentStore, Serializer};
use crate::store;
use crate::update::{apply_documents_deletion, next_update_id, Update};
use crate::{Error, MResult, RankedMap};
pub struct DocumentsAddition<D> {
updates_store: store::Updates,
updates_results_store: store::UpdatesResults,
updates_notifier: crossbeam_channel::Sender<()>,
documents: Vec<D>,
}
impl<D> DocumentsAddition<D> {
pub fn new(
updates_store: store::Updates,
updates_results_store: store::UpdatesResults,
updates_notifier: crossbeam_channel::Sender<()>,
) -> DocumentsAddition<D> {
DocumentsAddition {
updates_store,
updates_results_store,
updates_notifier,
documents: Vec::new(),
}
}
pub fn update_document(&mut self, document: D) {
self.documents.push(document);
}
pub fn finalize(self, writer: &mut zlmdb::RwTxn) -> MResult<u64>
where
D: serde::Serialize,
{
let _ = self.updates_notifier.send(());
let update_id = push_documents_addition(
writer,
self.updates_store,
self.updates_results_store,
self.documents,
)?;
Ok(update_id)
}
}
impl<D> Extend<D> for DocumentsAddition<D> {
fn extend<T: IntoIterator<Item = D>>(&mut self, iter: T) {
self.documents.extend(iter)
}
}
pub fn push_documents_addition<D: serde::Serialize>(
writer: &mut zlmdb::RwTxn,
updates_store: store::Updates,
updates_results_store: store::UpdatesResults,
addition: Vec<D>,
) -> MResult<u64> {
let mut values = Vec::with_capacity(addition.len());
for add in addition {
let vec = serde_json::to_vec(&add)?;
let add = serde_json::from_slice(&vec)?;
values.push(add);
}
let last_update_id = next_update_id(writer, updates_store, updates_results_store)?;
let update = Update::DocumentsAddition(values);
updates_store.put_update(writer, last_update_id, &update)?;
Ok(last_update_id)
}
pub fn apply_documents_addition(
writer: &mut zlmdb::RwTxn,
main_store: store::Main,
documents_fields_store: store::DocumentsFields,
documents_fields_counts_store: store::DocumentsFieldsCounts,
postings_lists_store: store::PostingsLists,
docs_words_store: store::DocsWords,
mut ranked_map: RankedMap,
addition: Vec<serde_json::Value>,
) -> MResult<()> {
let mut document_ids = HashSet::new();
let mut document_store = RamDocumentStore::new();
let mut document_fields_counts = HashMap::new();
let mut indexer = RawIndexer::new();
let schema = match main_store.schema(writer)? {
Some(schema) => schema,
None => return Err(Error::SchemaMissing),
};
let identifier = schema.identifier_name();
for document in addition {
let document_id = match extract_document_id(identifier, &document)? {
Some(id) => id,
None => return Err(Error::MissingDocumentId),
};
// 1. store the document id for future deletion
document_ids.insert(document_id);
// 2. index the document fields in ram stores
let serializer = Serializer {
schema: &schema,
document_store: &mut document_store,
document_fields_counts: &mut document_fields_counts,
indexer: &mut indexer,
ranked_map: &mut ranked_map,
document_id,
};
document.serialize(serializer)?;
}
// 1. remove the previous documents match indexes
let documents_to_insert = document_ids.iter().cloned().collect();
apply_documents_deletion(
writer,
main_store,
documents_fields_store,
documents_fields_counts_store,
postings_lists_store,
docs_words_store,
ranked_map.clone(),
documents_to_insert,
)?;
// 2. insert new document attributes in the database
for ((id, attr), value) in document_store.into_inner() {
documents_fields_store.put_document_field(writer, id, attr, &value)?;
}
// 3. insert new document attributes counts
for ((id, attr), count) in document_fields_counts {
documents_fields_counts_store.put_document_field_count(writer, id, attr, count)?;
}
let indexed = indexer.build();
let mut delta_words_builder = SetBuilder::memory();
for (word, delta_set) in indexed.words_doc_indexes {
delta_words_builder.insert(&word).unwrap();
let set = match postings_lists_store.postings_list(writer, &word)? {
Some(set) => Union::new(&set, &delta_set).into_set_buf(),
None => delta_set,
};
postings_lists_store.put_postings_list(writer, &word, &set)?;
}
for (id, words) in indexed.docs_words {
docs_words_store.put_doc_words(writer, id, &words)?;
}
let delta_words = delta_words_builder
.into_inner()
.and_then(fst::Set::from_bytes)
.unwrap();
let words = match main_store.words_fst(writer)? {
Some(words) => {
let op = OpBuilder::new()
.add(words.stream())
.add(delta_words.stream())
.r#union();
let mut words_builder = SetBuilder::memory();
words_builder.extend_stream(op).unwrap();
words_builder
.into_inner()
.and_then(fst::Set::from_bytes)
.unwrap()
}
None => delta_words,
};
main_store.put_words_fst(writer, &words)?;
main_store.put_ranked_map(writer, &ranked_map)?;
let inserted_documents_len = document_ids.len() as u64;
main_store.put_number_of_documents(writer, |old| old + inserted_documents_len)?;
Ok(())
}

View File

@ -1,223 +0,0 @@
mod customs_update;
mod documents_addition;
mod documents_deletion;
mod schema_update;
mod synonyms_addition;
mod synonyms_deletion;
pub use self::customs_update::{apply_customs_update, push_customs_update};
pub use self::documents_addition::{apply_documents_addition, DocumentsAddition};
pub use self::documents_deletion::{apply_documents_deletion, DocumentsDeletion};
pub use self::schema_update::{apply_schema_update, push_schema_update};
pub use self::synonyms_addition::{apply_synonyms_addition, SynonymsAddition};
pub use self::synonyms_deletion::{apply_synonyms_deletion, SynonymsDeletion};
use std::cmp;
use std::collections::BTreeMap;
use std::time::{Duration, Instant};
use log::debug;
use serde::{Deserialize, Serialize};
use zlmdb::Result as ZResult;
use crate::{store, DocumentId, MResult, RankedMap};
use meilidb_schema::Schema;
#[derive(Debug, Clone, Serialize, Deserialize)]
pub enum Update {
Schema(Schema),
Customs(Vec<u8>),
DocumentsAddition(Vec<serde_json::Value>),
DocumentsDeletion(Vec<DocumentId>),
SynonymsAddition(BTreeMap<String, Vec<String>>),
SynonymsDeletion(BTreeMap<String, Option<Vec<String>>>),
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub enum UpdateType {
Schema { schema: Schema },
Customs,
DocumentsAddition { number: usize },
DocumentsDeletion { number: usize },
SynonymsAddition { number: usize },
SynonymsDeletion { number: usize },
}
#[derive(Clone, Serialize, Deserialize)]
pub struct DetailedDuration {
pub main: Duration,
}
#[derive(Clone, Serialize, Deserialize)]
pub struct UpdateResult {
pub update_id: u64,
pub update_type: UpdateType,
pub result: Result<(), String>,
pub detailed_duration: DetailedDuration,
}
#[derive(Clone, Serialize, Deserialize)]
pub enum UpdateStatus {
Enqueued,
Processed(UpdateResult),
Unknown,
}
pub fn update_status(
reader: &zlmdb::RoTxn,
updates_store: store::Updates,
updates_results_store: store::UpdatesResults,
update_id: u64,
) -> MResult<UpdateStatus> {
match updates_results_store.update_result(reader, update_id)? {
Some(result) => Ok(UpdateStatus::Processed(result)),
None => {
if updates_store.contains(reader, update_id)? {
Ok(UpdateStatus::Enqueued)
} else {
Ok(UpdateStatus::Unknown)
}
}
}
}
pub fn next_update_id(
writer: &mut zlmdb::RwTxn,
updates_store: store::Updates,
updates_results_store: store::UpdatesResults,
) -> ZResult<u64> {
let last_update_id = updates_store.last_update_id(writer)?;
let last_update_id = last_update_id.map(|(n, _)| n);
let last_update_results_id = updates_results_store.last_update_id(writer)?;
let last_update_results_id = last_update_results_id.map(|(n, _)| n);
let max_update_id = cmp::max(last_update_id, last_update_results_id);
let new_update_id = max_update_id.map_or(0, |n| n + 1);
Ok(new_update_id)
}
pub fn update_task(
writer: &mut zlmdb::RwTxn,
index: store::Index,
) -> MResult<Option<UpdateResult>> {
let (update_id, update) = match index.updates.pop_front(writer)? {
Some(value) => value,
None => return Ok(None),
};
debug!("Processing update number {}", update_id);
let (update_type, result, duration) = match update {
Update::Schema(schema) => {
let start = Instant::now();
let update_type = UpdateType::Schema {
schema: schema.clone(),
};
let result = apply_schema_update(writer, index.main, &schema);
(update_type, result, start.elapsed())
}
Update::Customs(customs) => {
let start = Instant::now();
let update_type = UpdateType::Customs;
let result = apply_customs_update(writer, index.main, &customs).map_err(Into::into);
(update_type, result, start.elapsed())
}
Update::DocumentsAddition(documents) => {
let start = Instant::now();
let ranked_map = match index.main.ranked_map(writer)? {
Some(ranked_map) => ranked_map,
None => RankedMap::default(),
};
let update_type = UpdateType::DocumentsAddition {
number: documents.len(),
};
let result = apply_documents_addition(
writer,
index.main,
index.documents_fields,
index.documents_fields_counts,
index.postings_lists,
index.docs_words,
ranked_map,
documents,
);
(update_type, result, start.elapsed())
}
Update::DocumentsDeletion(documents) => {
let start = Instant::now();
let ranked_map = match index.main.ranked_map(writer)? {
Some(ranked_map) => ranked_map,
None => RankedMap::default(),
};
let update_type = UpdateType::DocumentsDeletion {
number: documents.len(),
};
let result = apply_documents_deletion(
writer,
index.main,
index.documents_fields,
index.documents_fields_counts,
index.postings_lists,
index.docs_words,
ranked_map,
documents,
);
(update_type, result, start.elapsed())
}
Update::SynonymsAddition(synonyms) => {
let start = Instant::now();
let update_type = UpdateType::SynonymsAddition {
number: synonyms.len(),
};
let result = apply_synonyms_addition(writer, index.main, index.synonyms, synonyms);
(update_type, result, start.elapsed())
}
Update::SynonymsDeletion(synonyms) => {
let start = Instant::now();
let update_type = UpdateType::SynonymsDeletion {
number: synonyms.len(),
};
let result = apply_synonyms_deletion(writer, index.main, index.synonyms, synonyms);
(update_type, result, start.elapsed())
}
};
debug!(
"Processed update number {} {:?} {:?}",
update_id, update_type, result
);
let detailed_duration = DetailedDuration { main: duration };
let status = UpdateResult {
update_id,
update_type,
result: result.map_err(|e| e.to_string()),
detailed_duration,
};
index
.updates_results
.put_update_result(writer, update_id, &status)?;
Ok(Some(status))
}

View File

@ -1,31 +0,0 @@
use crate::update::{next_update_id, Update};
use crate::{error::UnsupportedOperation, store, MResult};
use meilidb_schema::Schema;
pub fn apply_schema_update(
writer: &mut zlmdb::RwTxn,
main_store: store::Main,
new_schema: &Schema,
) -> MResult<()> {
if main_store.schema(writer)?.is_some() {
return Err(UnsupportedOperation::SchemaAlreadyExists.into());
}
main_store
.put_schema(writer, new_schema)
.map_err(Into::into)
}
pub fn push_schema_update(
writer: &mut zlmdb::RwTxn,
updates_store: store::Updates,
updates_results_store: store::UpdatesResults,
schema: Schema,
) -> MResult<u64> {
let last_update_id = next_update_id(writer, updates_store, updates_results_store)?;
let update = Update::Schema(schema);
updates_store.put_update(writer, last_update_id, &update)?;
Ok(last_update_id)
}

View File

@ -1,6 +1,6 @@
[package] [package]
name = "meilidb-core" name = "meilisearch-core"
version = "0.1.0" version = "0.8.4"
authors = ["Kerollmops <clement@meilisearch.com>"] authors = ["Kerollmops <clement@meilisearch.com>"]
edition = "2018" edition = "2018"
@ -8,36 +8,27 @@ edition = "2018"
arc-swap = "0.4.3" arc-swap = "0.4.3"
bincode = "1.1.4" bincode = "1.1.4"
byteorder = "1.3.2" byteorder = "1.3.2"
crossbeam-channel = "0.3.9" chrono = { version = "0.4.9", features = ["serde"] }
crossbeam-channel = "0.4.0"
deunicode = "1.0.0" deunicode = "1.0.0"
env_logger = "0.7.0" env_logger = "0.7.0"
fst = { version = "0.3.5", default-features = false }
hashbrown = { version = "0.6.0", features = ["serde"] } hashbrown = { version = "0.6.0", features = ["serde"] }
heed = "0.6.0"
levenshtein_automata = { version = "0.1.1", features = ["fst_automaton"] }
log = "0.4.8" log = "0.4.8"
meilidb-schema = { path = "../meilidb-schema", version = "0.1.0" } meilisearch-schema = { path = "../meilisearch-schema", version = "0.8.4" }
meilidb-tokenizer = { path = "../meilidb-tokenizer", version = "0.1.0" } meilisearch-tokenizer = { path = "../meilisearch-tokenizer", version = "0.8.4" }
meilisearch-types = { path = "../meilisearch-types", version = "0.8.4" }
once_cell = "1.2.0" once_cell = "1.2.0"
ordered-float = { version = "1.0.2", features = ["serde"] } ordered-float = { version = "1.0.2", features = ["serde"] }
sdset = "0.3.3" sdset = "0.3.3"
serde = { version = "1.0.101", features = ["derive"] } serde = { version = "1.0.101", features = ["derive"] }
serde_json = "1.0.41" serde_json = "1.0.41"
siphasher = "0.3.0" siphasher = "0.3.1"
slice-group-by = "0.2.6" slice-group-by = "0.2.6"
zerocopy = "0.2.8" zerocopy = "0.2.8"
[dependencies.zlmdb]
package = "zerocopy-lmdb"
git = "https://github.com/Kerollmops/zerocopy-lmdb.git"
branch = "master"
[dependencies.levenshtein_automata]
git = "https://github.com/Kerollmops/levenshtein-automata.git"
branch = "arc-byte-slice"
features = ["fst_automaton"]
[dependencies.fst]
git = "https://github.com/Kerollmops/fst.git"
branch = "arc-byte-slice"
[dev-dependencies] [dev-dependencies]
assert_matches = "1.3" assert_matches = "1.3"
csv = "1.0.7" csv = "1.0.7"

View File

@ -12,10 +12,8 @@ use serde::{Deserialize, Serialize};
use structopt::StructOpt; use structopt::StructOpt;
use termcolor::{Color, ColorChoice, ColorSpec, StandardStream, WriteColor}; use termcolor::{Color, ColorChoice, ColorSpec, StandardStream, WriteColor};
use meilidb_core::{Database, Highlight, UpdateResult}; use meilisearch_core::{Database, Highlight, ProcessedUpdateResult};
use meilidb_schema::SchemaAttr; use meilisearch_schema::SchemaAttr;
const INDEX_NAME: &str = "default";
#[derive(Debug, StructOpt)] #[derive(Debug, StructOpt)]
struct IndexCommand { struct IndexCommand {
@ -23,6 +21,9 @@ struct IndexCommand {
#[structopt(parse(from_os_str))] #[structopt(parse(from_os_str))]
database_path: PathBuf, database_path: PathBuf,
#[structopt(long, default_value = "default")]
index_uid: String,
/// The csv file to index. /// The csv file to index.
#[structopt(parse(from_os_str))] #[structopt(parse(from_os_str))]
csv_data_path: PathBuf, csv_data_path: PathBuf,
@ -40,10 +41,13 @@ struct IndexCommand {
#[derive(Debug, StructOpt)] #[derive(Debug, StructOpt)]
struct SearchCommand { struct SearchCommand {
/// The destination where the database must be created. /// The path of the database to work with.
#[structopt(parse(from_os_str))] #[structopt(parse(from_os_str))]
database_path: PathBuf, database_path: PathBuf,
#[structopt(long, default_value = "default")]
index_uid: String,
/// Timeout after which the search will return results. /// Timeout after which the search will return results.
#[structopt(long)] #[structopt(long)]
fetch_timeout_ms: Option<u64>, fetch_timeout_ms: Option<u64>,
@ -65,10 +69,21 @@ struct SearchCommand {
displayed_fields: Vec<String>, displayed_fields: Vec<String>,
} }
#[derive(Debug, StructOpt)]
struct ShowUpdatesCommand {
/// The path of the database to work with.
#[structopt(parse(from_os_str))]
database_path: PathBuf,
#[structopt(long, default_value = "default")]
index_uid: String,
}
#[derive(Debug, StructOpt)] #[derive(Debug, StructOpt)]
enum Command { enum Command {
Index(IndexCommand), Index(IndexCommand),
Search(SearchCommand), Search(SearchCommand),
ShowUpdates(ShowUpdatesCommand),
} }
impl Command { impl Command {
@ -76,6 +91,7 @@ impl Command {
match self { match self {
Command::Index(command) => &command.database_path, Command::Index(command) => &command.database_path,
Command::Search(command) => &command.database_path, Command::Search(command) => &command.database_path,
Command::ShowUpdates(command) => &command.database_path,
} }
} }
} }
@ -88,33 +104,34 @@ fn index_command(command: IndexCommand, database: Database) -> Result<(), Box<dy
let start = Instant::now(); let start = Instant::now();
let (sender, receiver) = mpsc::sync_channel(100); let (sender, receiver) = mpsc::sync_channel(100);
let update_fn = move |update: UpdateResult| sender.send(update.update_id).unwrap(); let update_fn =
let index = match database.open_index(INDEX_NAME) { move |_name: &str, update: ProcessedUpdateResult| sender.send(update.update_id).unwrap();
let index = match database.open_index(&command.index_uid) {
Some(index) => index, Some(index) => index,
None => database.create_index(INDEX_NAME).unwrap(), None => database.create_index(&command.index_uid).unwrap(),
}; };
let done = database.set_update_callback(INDEX_NAME, Box::new(update_fn)); database.set_update_callback(Box::new(update_fn));
assert!(done, "could not set the index update function");
let env = &database.env; let db = &database;
let schema = { let schema = {
let string = fs::read_to_string(&command.schema)?; let string = fs::read_to_string(&command.schema)?;
toml::from_str(&string).unwrap() toml::from_str(&string).unwrap()
}; };
let mut writer = env.write_txn().unwrap(); let reader = db.main_read_txn().unwrap();
match index.main.schema(&writer)? { let mut update_writer = db.update_write_txn().unwrap();
match index.main.schema(&reader)? {
Some(current_schema) => { Some(current_schema) => {
if current_schema != schema { if current_schema != schema {
return Err(meilidb_core::Error::SchemaDiffer.into()); return Err(meilisearch_core::Error::SchemaDiffer.into());
} }
writer.abort(); update_writer.abort();
} }
None => { None => {
index.schema_update(&mut writer, schema)?; index.schema_update(&mut update_writer, schema)?;
writer.commit().unwrap(); update_writer.commit().unwrap();
} }
} }
@ -157,10 +174,10 @@ fn index_command(command: IndexCommand, database: Database) -> Result<(), Box<dy
println!(); println!();
let mut writer = env.write_txn().unwrap(); let mut update_writer = db.update_write_txn().unwrap();
println!("committing update..."); println!("committing update...");
let update_id = additions.finalize(&mut writer)?; let update_id = additions.finalize(&mut update_writer)?;
writer.commit().unwrap(); update_writer.commit().unwrap();
max_update_id = max_update_id.max(update_id); max_update_id = max_update_id.max(update_id);
println!("committed update {}", update_id); println!("committed update {}", update_id);
} }
@ -179,8 +196,9 @@ fn index_command(command: IndexCommand, database: Database) -> Result<(), Box<dy
); );
if let Some(path) = command.compact_to_path { if let Some(path) = command.compact_to_path {
fs::create_dir_all(&path)?;
let start = Instant::now(); let start = Instant::now();
let _file = database.copy_and_compact_to_path(&path)?; let _file = database.copy_and_compact_to_path(path.join("data.mdb"))?;
println!( println!(
"database compacted in {:.2?} at: {:?}", "database compacted in {:.2?} at: {:?}",
start.elapsed(), start.elapsed(),
@ -201,7 +219,11 @@ fn display_highlights(text: &str, ranges: &[usize]) -> io::Result<()> {
_ => unreachable!(), _ => unreachable!(),
}; };
if highlighted { if highlighted {
stdout.set_color(ColorSpec::new().set_fg(Some(Color::Yellow)))?; stdout.set_color(
ColorSpec::new()
.set_fg(Some(Color::Yellow))
.set_underline(true),
)?;
} }
write!(&mut stdout, "{}", &text[start..end])?; write!(&mut stdout, "{}", &text[start..end])?;
stdout.reset()?; stdout.reset()?;
@ -295,15 +317,16 @@ fn crop_text(
} }
fn search_command(command: SearchCommand, database: Database) -> Result<(), Box<dyn Error>> { fn search_command(command: SearchCommand, database: Database) -> Result<(), Box<dyn Error>> {
let env = &database.env; let db = &database;
let index = database let index = database
.open_index(INDEX_NAME) .open_index(&command.index_uid)
.expect("Could not find index"); .expect("Could not find index");
let reader = env.read_txn().unwrap(); let reader = db.main_read_txn().unwrap();
let schema = index.main.schema(&reader)?; let schema = index.main.schema(&reader)?;
reader.abort(); reader.abort();
let schema = schema.ok_or(meilidb_core::Error::SchemaMissing)?;
let schema = schema.ok_or(meilisearch_core::Error::SchemaMissing)?;
let fields = command.displayed_fields.iter().map(String::as_str); let fields = command.displayed_fields.iter().map(String::as_str);
let fields = HashSet::from_iter(fields); let fields = HashSet::from_iter(fields);
@ -317,7 +340,7 @@ fn search_command(command: SearchCommand, database: Database) -> Result<(), Box<
Ok(query) => { Ok(query) => {
let start_total = Instant::now(); let start_total = Instant::now();
let reader = env.read_txn().unwrap(); let reader = db.main_read_txn().unwrap();
let ref_index = &index; let ref_index = &index;
let ref_reader = &reader; let ref_reader = &reader;
@ -418,6 +441,23 @@ fn search_command(command: SearchCommand, database: Database) -> Result<(), Box<
Ok(()) Ok(())
} }
fn show_updates_command(
command: ShowUpdatesCommand,
database: Database,
) -> Result<(), Box<dyn Error>> {
let db = &database;
let index = database
.open_index(&command.index_uid)
.expect("Could not find index");
let reader = db.update_read_txn().unwrap();
let updates = index.all_updates_status(&reader)?;
println!("{:#?}", updates);
reader.abort();
Ok(())
}
fn main() -> Result<(), Box<dyn Error>> { fn main() -> Result<(), Box<dyn Error>> {
env_logger::init(); env_logger::init();
@ -427,5 +467,6 @@ fn main() -> Result<(), Box<dyn Error>> {
match opt { match opt {
Command::Index(command) => index_command(command, database), Command::Index(command) => index_command(command, database),
Command::Search(command) => search_command(command, database), Command::Search(command) => search_command(command, database),
Command::ShowUpdates(command) => show_updates_command(command, database),
} }
} }

View File

@ -2,12 +2,13 @@ mod dfa;
mod query_enhancer; mod query_enhancer;
use std::cmp::Reverse; use std::cmp::Reverse;
use std::vec; use std::{cmp, vec};
use fst::{IntoStreamer, Streamer}; use fst::{IntoStreamer, Streamer};
use levenshtein_automata::DFA; use levenshtein_automata::DFA;
use meilidb_tokenizer::{is_cjk, split_query_string}; use meilisearch_tokenizer::{is_cjk, split_query_string};
use crate::database::MainT;
use crate::error::MResult; use crate::error::MResult;
use crate::store; use crate::store;
@ -18,27 +19,55 @@ use self::query_enhancer::QueryEnhancerBuilder;
const NGRAMS: usize = 3; const NGRAMS: usize = 3;
pub struct AutomatonProducer { pub struct AutomatonProducer {
automatons: Vec<Vec<Automaton>>, automatons: Vec<AutomatonGroup>,
} }
impl AutomatonProducer { impl AutomatonProducer {
pub fn new( pub fn new(
reader: &zlmdb::RoTxn, reader: &heed::RoTxn<MainT>,
query: &str, query: &str,
main_store: store::Main, main_store: store::Main,
postings_list_store: store::PostingsLists,
synonyms_store: store::Synonyms, synonyms_store: store::Synonyms,
) -> MResult<(AutomatonProducer, QueryEnhancer)> { ) -> MResult<(AutomatonProducer, QueryEnhancer)> {
let (automatons, query_enhancer) = let (automatons, query_enhancer) = generate_automatons(
generate_automatons(reader, query, main_store, synonyms_store)?; reader,
query,
main_store,
postings_list_store,
synonyms_store,
)?;
Ok((AutomatonProducer { automatons }, query_enhancer)) Ok((AutomatonProducer { automatons }, query_enhancer))
} }
pub fn into_iter(self) -> vec::IntoIter<Vec<Automaton>> { pub fn into_iter(self) -> vec::IntoIter<AutomatonGroup> {
self.automatons.into_iter() self.automatons.into_iter()
} }
} }
#[derive(Debug)]
pub struct AutomatonGroup {
pub is_phrase_query: bool,
pub automatons: Vec<Automaton>,
}
impl AutomatonGroup {
fn normal(automatons: Vec<Automaton>) -> AutomatonGroup {
AutomatonGroup {
is_phrase_query: false,
automatons,
}
}
fn phrase_query(automatons: Vec<Automaton>) -> AutomatonGroup {
AutomatonGroup {
is_phrase_query: true,
automatons,
}
}
}
#[derive(Debug)] #[derive(Debug)]
pub struct Automaton { pub struct Automaton {
pub index: usize, pub index: usize,
@ -102,12 +131,41 @@ pub fn normalize_str(string: &str) -> String {
string string
} }
fn split_best_frequency<'a>(
reader: &heed::RoTxn<MainT>,
word: &'a str,
postings_lists_store: store::PostingsLists,
) -> MResult<Option<(&'a str, &'a str)>> {
let chars = word.char_indices().skip(1);
let mut best = None;
for (i, _) in chars {
let (left, right) = word.split_at(i);
let left_freq = postings_lists_store
.postings_list(reader, left.as_ref())?
.map_or(0, |i| i.len());
let right_freq = postings_lists_store
.postings_list(reader, right.as_ref())?
.map_or(0, |i| i.len());
let min_freq = cmp::min(left_freq, right_freq);
if min_freq != 0 && best.map_or(true, |(old, _, _)| min_freq > old) {
best = Some((min_freq, left, right));
}
}
Ok(best.map(|(_, l, r)| (l, r)))
}
fn generate_automatons( fn generate_automatons(
reader: &zlmdb::RoTxn, reader: &heed::RoTxn<MainT>,
query: &str, query: &str,
main_store: store::Main, main_store: store::Main,
postings_lists_store: store::PostingsLists,
synonym_store: store::Synonyms, synonym_store: store::Synonyms,
) -> MResult<(Vec<Vec<Automaton>>, QueryEnhancer)> { ) -> MResult<(Vec<AutomatonGroup>, QueryEnhancer)> {
let has_end_whitespace = query.chars().last().map_or(false, char::is_whitespace); let has_end_whitespace = query.chars().last().map_or(false, char::is_whitespace);
let query_words: Vec<_> = split_query_string(query).map(str::to_lowercase).collect(); let query_words: Vec<_> = split_query_string(query).map(str::to_lowercase).collect();
let synonyms = match main_store.synonyms_fst(reader)? { let synonyms = match main_store.synonyms_fst(reader)? {
@ -136,7 +194,7 @@ fn generate_automatons(
original_automatons.push(automaton); original_automatons.push(automaton);
} }
automatons.push(original_automatons); automatons.push(AutomatonGroup::normal(original_automatons));
for n in 1..=NGRAMS { for n in 1..=NGRAMS {
let mut ngrams = query_words.windows(n).enumerate().peekable(); let mut ngrams = query_words.windows(n).enumerate().peekable();
@ -188,13 +246,27 @@ fn generate_automatons(
Automaton::non_exact(automaton_index, n, synonym) Automaton::non_exact(automaton_index, n, synonym)
}; };
automaton_index += 1; automaton_index += 1;
automatons.push(vec![automaton]); automatons.push(AutomatonGroup::normal(vec![automaton]));
} }
} }
} }
} }
if n != 1 { if n == 1 {
if let Some((left, right)) =
split_best_frequency(reader, &normalized, postings_lists_store)?
{
let a = Automaton::exact(automaton_index, 1, left);
enhancer_builder.declare(query_range.clone(), automaton_index, &[left]);
automaton_index += 1;
let b = Automaton::exact(automaton_index, 1, right);
enhancer_builder.declare(query_range.clone(), automaton_index, &[left]);
automaton_index += 1;
automatons.push(AutomatonGroup::phrase_query(vec![a, b]));
}
} else {
// automaton of concatenation of query words // automaton of concatenation of query words
let concat = ngram_slice.concat(); let concat = ngram_slice.concat();
let normalized = normalize_str(&concat); let normalized = normalize_str(&concat);
@ -204,16 +276,20 @@ fn generate_automatons(
let automaton = Automaton::exact(automaton_index, n, &normalized); let automaton = Automaton::exact(automaton_index, n, &normalized);
automaton_index += 1; automaton_index += 1;
automatons.push(vec![automaton]); automatons.push(AutomatonGroup::normal(vec![automaton]));
} }
} }
} }
// order automatons, the most important first, // order automatons, the most important first,
// we keep the original automatons at the front. // we keep the original automatons at the front.
automatons[1..].sort_by_key(|a| { automatons[1..].sort_by_key(|group| {
let a = a.first().unwrap(); let a = group.automatons.first().unwrap();
(Reverse(a.is_exact), a.ngram) (
Reverse(a.is_exact),
a.ngram,
Reverse(group.automatons.len()),
)
}); });
Ok((automatons, enhancer_builder.build())) Ok((automatons, enhancer_builder.build()))

View File

@ -1,6 +1,6 @@
use std::cmp::Ordering; use std::cmp::Ordering;
use meilidb_schema::SchemaAttr; use meilisearch_schema::SchemaAttr;
use sdset::Set; use sdset::Set;
use slice_group_by::GroupBy; use slice_group_by::GroupBy;
@ -21,16 +21,15 @@ fn number_exact_matches(
let len = group.len(); let len = group.len();
let mut found_exact = false; let mut found_exact = false;
for (pos, _) in is_exact[index..index + len] for (pos, is_exact) in is_exact[index..index + len].iter().enumerate() {
.iter() if *is_exact {
.filter(|x| **x) found_exact = true;
.enumerate() let attr = &attribute[index + pos];
{ if let Ok(pos) = fields_counts.binary_search_by_key(attr, |(a, _)| a.0) {
found_exact = true; let (_, count) = fields_counts[pos];
if let Ok(pos) = fields_counts.binary_search_by_key(&attribute[pos], |(a, _)| a.0) { if count == 1 {
let (_, count) = fields_counts[pos]; return usize::max_value();
if count == 1 { }
return usize::max_value();
} }
} }
} }

View File

@ -4,7 +4,7 @@ use std::fmt;
use crate::criterion::Criterion; use crate::criterion::Criterion;
use crate::{RankedMap, RawDocument}; use crate::{RankedMap, RawDocument};
use meilidb_schema::{Schema, SchemaAttr}; use meilisearch_schema::{Schema, SchemaAttr};
/// An helper struct that permit to sort documents by /// An helper struct that permit to sort documents by
/// some of their stored attributes. /// some of their stored attributes.
@ -23,7 +23,7 @@ use meilidb_schema::{Schema, SchemaAttr};
/// ///
/// ```ignore /// ```ignore
/// use serde_derive::Deserialize; /// use serde_derive::Deserialize;
/// use meilidb::rank::criterion::*; /// use meilisearch::rank::criterion::*;
/// ///
/// let custom_ranking = SortByAttr::lower_is_better(&ranked_map, &schema, "published_at")?; /// let custom_ranking = SortByAttr::lower_is_better(&ranked_map, &schema, "published_at")?;
/// ///

File diff suppressed because it is too large Load Diff

View File

@ -12,7 +12,7 @@ pub enum Error {
SchemaMissing, SchemaMissing,
WordIndexMissing, WordIndexMissing,
MissingDocumentId, MissingDocumentId,
Zlmdb(zlmdb::Error), Zlmdb(heed::Error),
Fst(fst::Error), Fst(fst::Error),
SerdeJson(SerdeJsonError), SerdeJson(SerdeJsonError),
Bincode(bincode::Error), Bincode(bincode::Error),
@ -27,8 +27,8 @@ impl From<io::Error> for Error {
} }
} }
impl From<zlmdb::Error> for Error { impl From<heed::Error> for Error {
fn from(error: zlmdb::Error) -> Error { fn from(error: heed::Error) -> Error {
Error::Zlmdb(error) Error::Zlmdb(error)
} }
} }
@ -79,7 +79,7 @@ impl fmt::Display for Error {
SchemaMissing => write!(f, "this index does not have a schema"), SchemaMissing => write!(f, "this index does not have a schema"),
WordIndexMissing => write!(f, "this index does not have a word index"), WordIndexMissing => write!(f, "this index does not have a word index"),
MissingDocumentId => write!(f, "document id is missing"), MissingDocumentId => write!(f, "document id is missing"),
Zlmdb(e) => write!(f, "zlmdb error; {}", e), Zlmdb(e) => write!(f, "heed error; {}", e),
Fst(e) => write!(f, "fst error; {}", e), Fst(e) => write!(f, "fst error; {}", e),
SerdeJson(e) => write!(f, "serde json error; {}", e), SerdeJson(e) => write!(f, "serde json error; {}", e),
Bincode(e) => write!(f, "bincode error; {}", e), Bincode(e) => write!(f, "bincode error; {}", e),
@ -95,6 +95,10 @@ impl error::Error for Error {}
#[derive(Debug)] #[derive(Debug)]
pub enum UnsupportedOperation { pub enum UnsupportedOperation {
SchemaAlreadyExists, SchemaAlreadyExists,
CannotUpdateSchemaIdentifier,
CannotReorderSchemaAttribute,
CanOnlyIntroduceNewSchemaAttributesAtEnd,
CannotRemoveSchemaAttribute,
} }
impl fmt::Display for UnsupportedOperation { impl fmt::Display for UnsupportedOperation {
@ -102,6 +106,12 @@ impl fmt::Display for UnsupportedOperation {
use self::UnsupportedOperation::*; use self::UnsupportedOperation::*;
match self { match self {
SchemaAlreadyExists => write!(f, "Cannot update index which already have a schema"), SchemaAlreadyExists => write!(f, "Cannot update index which already have a schema"),
CannotUpdateSchemaIdentifier => write!(f, "Cannot update the identifier of a schema"),
CannotReorderSchemaAttribute => write!(f, "Cannot reorder the attributes of a schema"),
CanOnlyIntroduceNewSchemaAttributesAtEnd => {
write!(f, "Can only introduce new attributes at end of a schema")
}
CannotRemoveSchemaAttribute => write!(f, "Cannot remove attributes from a schema"),
} }
} }
} }

View File

@ -0,0 +1,134 @@
use std::cmp::min;
use std::collections::BTreeMap;
use std::ops::{Index, IndexMut};
// A simple wrapper around vec so we can get contiguous but index it like it's 2D array.
struct N2Array<T> {
y_size: usize,
buf: Vec<T>,
}
impl<T: Clone> N2Array<T> {
fn new(x: usize, y: usize, value: T) -> N2Array<T> {
N2Array {
y_size: y,
buf: vec![value; x * y],
}
}
}
impl<T> Index<(usize, usize)> for N2Array<T> {
type Output = T;
#[inline]
fn index(&self, (x, y): (usize, usize)) -> &T {
&self.buf[(x * self.y_size) + y]
}
}
impl<T> IndexMut<(usize, usize)> for N2Array<T> {
#[inline]
fn index_mut(&mut self, (x, y): (usize, usize)) -> &mut T {
&mut self.buf[(x * self.y_size) + y]
}
}
pub fn prefix_damerau_levenshtein(source: &[u8], target: &[u8]) -> (u32, usize) {
let (n, m) = (source.len(), target.len());
assert!(
n <= m,
"the source string must be shorter than the target one"
);
if n == 0 {
return (m as u32, 0);
}
if m == 0 {
return (n as u32, 0);
}
if n == m && source == target {
return (0, m);
}
let inf = n + m;
let mut matrix = N2Array::new(n + 2, m + 2, 0);
matrix[(0, 0)] = inf;
for i in 0..n + 1 {
matrix[(i + 1, 0)] = inf;
matrix[(i + 1, 1)] = i;
}
for j in 0..m + 1 {
matrix[(0, j + 1)] = inf;
matrix[(1, j + 1)] = j;
}
let mut last_row = BTreeMap::new();
for (row, char_s) in source.iter().enumerate() {
let mut last_match_col = 0;
let row = row + 1;
for (col, char_t) in target.iter().enumerate() {
let col = col + 1;
let last_match_row = *last_row.get(&char_t).unwrap_or(&0);
let cost = if char_s == char_t { 0 } else { 1 };
let dist_add = matrix[(row, col + 1)] + 1;
let dist_del = matrix[(row + 1, col)] + 1;
let dist_sub = matrix[(row, col)] + cost;
let dist_trans = matrix[(last_match_row, last_match_col)]
+ (row - last_match_row - 1)
+ 1
+ (col - last_match_col - 1);
let dist = min(min(dist_add, dist_del), min(dist_sub, dist_trans));
matrix[(row + 1, col + 1)] = dist;
if cost == 0 {
last_match_col = col;
}
}
last_row.insert(char_s, row);
}
let mut minimum = (u32::max_value(), 0);
for x in n..=m {
let dist = matrix[(n + 1, x + 1)] as u32;
if dist < minimum.0 {
minimum = (dist, x)
}
}
minimum
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn matched_length() {
let query = "Levenste";
let text = "Levenshtein";
let (dist, length) = prefix_damerau_levenshtein(query.as_bytes(), text.as_bytes());
assert_eq!(dist, 1);
assert_eq!(&text[..length], "Levenshte");
}
#[test]
#[should_panic]
fn matched_length_panic() {
let query = "Levenshtein";
let text = "Levenste";
// this function will panic if source if longer than target
prefix_damerau_levenshtein(query.as_bytes(), text.as_bytes());
}
}

View File

@ -0,0 +1,97 @@
#[cfg(test)]
#[macro_use]
extern crate assert_matches;
mod automaton;
pub mod criterion;
mod database;
mod distinct_map;
mod error;
mod levenshtein;
mod number;
mod query_builder;
mod ranked_map;
mod raw_document;
pub mod raw_indexer;
mod reordered_attrs;
pub mod serde;
pub mod store;
mod update;
pub use self::database::{BoxUpdateFn, Database, MainT, UpdateT};
pub use self::error::{Error, MResult};
pub use self::number::{Number, ParseNumberError};
pub use self::ranked_map::RankedMap;
pub use self::raw_document::RawDocument;
pub use self::store::Index;
pub use self::update::{EnqueuedUpdateResult, ProcessedUpdateResult, UpdateStatus, UpdateType};
pub use meilisearch_types::{DocIndex, DocumentId, Highlight};
#[doc(hidden)]
#[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub struct TmpMatch {
pub query_index: u32,
pub distance: u8,
pub attribute: u16,
pub word_index: u16,
pub is_exact: bool,
}
#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub struct Document {
pub id: DocumentId,
pub highlights: Vec<Highlight>,
#[cfg(test)]
pub matches: Vec<TmpMatch>,
}
impl Document {
#[cfg(not(test))]
fn from_raw(raw: RawDocument) -> Document {
Document {
id: raw.id,
highlights: raw.highlights,
}
}
#[cfg(test)]
fn from_raw(raw: RawDocument) -> Document {
let len = raw.query_index().len();
let mut matches = Vec::with_capacity(len);
let query_index = raw.query_index();
let distance = raw.distance();
let attribute = raw.attribute();
let word_index = raw.word_index();
let is_exact = raw.is_exact();
for i in 0..len {
let match_ = TmpMatch {
query_index: query_index[i],
distance: distance[i],
attribute: attribute[i],
word_index: word_index[i],
is_exact: is_exact[i],
};
matches.push(match_);
}
Document {
id: raw.id,
matches,
highlights: raw.highlights,
}
}
}
#[cfg(test)]
mod tests {
use super::*;
use std::mem;
#[test]
fn docindex_mem_size() {
assert_eq!(mem::size_of::<DocIndex>(), 16);
}
}

View File

@ -1,3 +1,4 @@
use std::cmp::Ordering;
use std::fmt; use std::fmt;
use std::num::{ParseFloatError, ParseIntError}; use std::num::{ParseFloatError, ParseIntError};
use std::str::FromStr; use std::str::FromStr;
@ -5,7 +6,7 @@ use std::str::FromStr;
use ordered_float::OrderedFloat; use ordered_float::OrderedFloat;
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
#[derive(Serialize, Deserialize, Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)] #[derive(Serialize, Deserialize, Debug, Copy, Clone, Hash)]
pub enum Number { pub enum Number {
Unsigned(u64), Unsigned(u64),
Signed(i64), Signed(i64),
@ -39,6 +40,50 @@ impl FromStr for Number {
} }
} }
impl PartialEq for Number {
fn eq(&self, other: &Number) -> bool {
self.cmp(other) == Ordering::Equal
}
}
impl Eq for Number {}
impl PartialOrd for Number {
fn partial_cmp(&self, other: &Number) -> Option<Ordering> {
Some(self.cmp(other))
}
}
impl Ord for Number {
fn cmp(&self, other: &Self) -> Ordering {
use Number::{Float, Signed, Unsigned};
match (*self, *other) {
(Unsigned(a), Unsigned(b)) => a.cmp(&b),
(Unsigned(a), Signed(b)) => {
if b < 0 {
Ordering::Greater
} else {
a.cmp(&(b as u64))
}
}
(Unsigned(a), Float(b)) => (OrderedFloat(a as f64)).cmp(&b),
(Signed(a), Unsigned(b)) => {
if a < 0 {
Ordering::Less
} else {
(a as u64).cmp(&b)
}
}
(Signed(a), Signed(b)) => a.cmp(&b),
(Signed(a), Float(b)) => OrderedFloat(a as f64).cmp(&b),
(Float(a), Unsigned(b)) => a.cmp(&OrderedFloat(b as f64)),
(Float(a), Signed(b)) => a.cmp(&OrderedFloat(b as f64)),
(Float(a), Float(b)) => a.cmp(&b),
}
}
}
#[derive(Debug, Clone, PartialEq, Eq)] #[derive(Debug, Clone, PartialEq, Eq)]
pub struct ParseNumberError { pub struct ParseNumberError {
uint_error: ParseIntError, uint_error: ParseIntError,

View File

@ -1,15 +1,18 @@
use hashbrown::HashMap; use hashbrown::HashMap;
use std::mem; use std::convert::TryFrom;
use std::ops::Range; use std::ops::Range;
use std::rc::Rc; use std::rc::Rc;
use std::time::{Duration, Instant}; use std::time::{Duration, Instant};
use std::{cmp, mem};
use fst::{IntoStreamer, Streamer}; use fst::{IntoStreamer, Streamer};
use sdset::SetBuf; use sdset::SetBuf;
use slice_group_by::{GroupBy, GroupByMut}; use slice_group_by::{GroupBy, GroupByMut};
use crate::automaton::{Automaton, AutomatonProducer, QueryEnhancer}; use crate::database::MainT;
use crate::automaton::{Automaton, AutomatonGroup, AutomatonProducer, QueryEnhancer};
use crate::distinct_map::{BufferedDistinctMap, DistinctMap}; use crate::distinct_map::{BufferedDistinctMap, DistinctMap};
use crate::levenshtein::prefix_damerau_levenshtein;
use crate::raw_document::{raw_documents_from, RawDocument}; use crate::raw_document::{raw_documents_from, RawDocument};
use crate::{criterion::Criteria, Document, DocumentId, Highlight, TmpMatch}; use crate::{criterion::Criteria, Document, DocumentId, Highlight, TmpMatch};
use crate::{reordered_attrs::ReorderedAttrs, store, MResult}; use crate::{reordered_attrs::ReorderedAttrs, store, MResult};
@ -137,8 +140,8 @@ fn multiword_rewrite_matches(
} }
fn fetch_raw_documents( fn fetch_raw_documents(
reader: &zlmdb::RoTxn, reader: &heed::RoTxn<MainT>,
automatons: &[Automaton], automatons_groups: &[AutomatonGroup],
query_enhancer: &QueryEnhancer, query_enhancer: &QueryEnhancer,
searchables: Option<&ReorderedAttrs>, searchables: Option<&ReorderedAttrs>,
main_store: store::Main, main_store: store::Main,
@ -148,55 +151,104 @@ fn fetch_raw_documents(
let mut matches = Vec::new(); let mut matches = Vec::new();
let mut highlights = Vec::new(); let mut highlights = Vec::new();
for automaton in automatons { for group in automatons_groups {
let Automaton { let AutomatonGroup {
index, is_phrase_query,
is_exact, automatons,
query_len, } = group;
.. let phrase_query_len = automatons.len();
} = automaton;
let dfa = automaton.dfa();
let words = match main_store.words_fst(reader)? { let mut tmp_matches = Vec::new();
Some(words) => words, for (id, automaton) in automatons.into_iter().enumerate() {
None => return Ok(Vec::new()), let Automaton {
}; index,
is_exact,
query_len,
query,
..
} = automaton;
let dfa = automaton.dfa();
let mut stream = words.search(&dfa).into_stream(); let words = match main_store.words_fst(reader)? {
while let Some(input) = stream.next() { Some(words) => words,
let distance = dfa.eval(input).to_u8(); None => return Ok(Vec::new()),
let is_exact = *is_exact && distance == 0 && input.len() == *query_len;
let doc_indexes = match postings_lists_store.postings_list(reader, input)? {
Some(doc_indexes) => doc_indexes,
None => continue,
}; };
matches.reserve(doc_indexes.len()); let mut stream = words.search(&dfa).into_stream();
highlights.reserve(doc_indexes.len()); while let Some(input) = stream.next() {
let distance = dfa.eval(input).to_u8();
let is_exact = *is_exact && distance == 0 && input.len() == *query_len;
for di in doc_indexes.as_ref() { let covered_area = if *query_len > input.len() {
let attribute = searchables.map_or(Some(di.attribute), |r| r.get(di.attribute)); input.len()
if let Some(attribute) = attribute { } else {
let match_ = TmpMatch { prefix_damerau_levenshtein(query.as_bytes(), input).1
query_index: *index as u32, };
distance,
attribute,
word_index: di.word_index,
is_exact,
};
let highlight = Highlight { let doc_indexes = match postings_lists_store.postings_list(reader, input)? {
attribute: di.attribute, Some(doc_indexes) => doc_indexes,
char_index: di.char_index, None => continue,
char_length: di.char_length, };
};
matches.push((di.document_id, match_)); tmp_matches.reserve(doc_indexes.len());
highlights.push((di.document_id, highlight));
for di in doc_indexes.as_ref() {
let attribute = searchables.map_or(Some(di.attribute), |r| r.get(di.attribute));
if let Some(attribute) = attribute {
let match_ = TmpMatch {
query_index: *index as u32,
distance,
attribute,
word_index: di.word_index,
is_exact,
};
let covered_area = u16::try_from(covered_area).unwrap_or(u16::max_value());
let covered_area = cmp::min(covered_area, di.char_length);
let highlight = Highlight {
attribute: di.attribute,
char_index: di.char_index,
char_length: covered_area,
};
tmp_matches.push((di.document_id, id, match_, highlight));
}
} }
} }
} }
if *is_phrase_query {
tmp_matches.sort_unstable_by_key(|(id, _, m, _)| (*id, m.attribute, m.word_index));
for group in tmp_matches.linear_group_by_key(|(id, _, m, _)| (*id, m.attribute)) {
for window in group.windows(2) {
let (ida, ia, ma, ha) = window[0];
let (idb, ib, mb, hb) = window[1];
debug_assert_eq!(ida, idb);
// if matches must follow and actually follows themselves
if ia + 1 == ib && ma.word_index + 1 == mb.word_index {
// TODO we must make it work for phrase query longer than 2
// if the second match is the last phrase query word
if ib + 1 == phrase_query_len {
// insert first match
matches.push((ida, ma));
highlights.push((ida, ha));
// insert second match
matches.push((idb, mb));
highlights.push((idb, hb));
}
}
}
}
} else {
for (id, _, match_, highlight) in tmp_matches {
matches.push((id, match_));
highlights.push((id, highlight));
}
}
} }
let matches = multiword_rewrite_matches(matches, &query_enhancer); let matches = multiword_rewrite_matches(matches, &query_enhancer);
@ -285,7 +337,7 @@ impl<'c, 'f, 'd> QueryBuilder<'c, 'f, 'd> {
pub fn query( pub fn query(
self, self,
reader: &zlmdb::RoTxn, reader: &heed::RoTxn<MainT>,
query: &str, query: &str,
range: Range<usize>, range: Range<usize>,
) -> MResult<Vec<Document>> { ) -> MResult<Vec<Document>> {
@ -323,7 +375,7 @@ impl<'c, 'f, 'd> QueryBuilder<'c, 'f, 'd> {
} }
fn raw_query<'c, FI>( fn raw_query<'c, FI>(
reader: &zlmdb::RoTxn, reader: &heed::RoTxn<MainT>,
query: &str, query: &str,
range: Range<usize>, range: Range<usize>,
@ -367,15 +419,20 @@ where
let start_processing = Instant::now(); let start_processing = Instant::now();
let mut raw_documents_processed = Vec::with_capacity(range.len()); let mut raw_documents_processed = Vec::with_capacity(range.len());
let (automaton_producer, query_enhancer) = let (automaton_producer, query_enhancer) = AutomatonProducer::new(
AutomatonProducer::new(reader, query, main_store, synonyms_store)?; reader,
query,
main_store,
postings_lists_store,
synonyms_store,
)?;
let automaton_producer = automaton_producer.into_iter(); let automaton_producer = automaton_producer.into_iter();
let mut automatons = Vec::new(); let mut automatons = Vec::new();
// aggregate automatons groups by groups after time // aggregate automatons groups by groups after time
for auts in automaton_producer { for auts in automaton_producer {
automatons.extend(auts); automatons.push(auts);
// we must retrieve the documents associated // we must retrieve the documents associated
// with the current automatons // with the current automatons
@ -454,7 +511,7 @@ where
} }
fn raw_query_with_distinct<'c, FI, FD>( fn raw_query_with_distinct<'c, FI, FD>(
reader: &zlmdb::RoTxn, reader: &heed::RoTxn<MainT>,
query: &str, query: &str,
range: Range<usize>, range: Range<usize>,
@ -480,15 +537,20 @@ where
let start_processing = Instant::now(); let start_processing = Instant::now();
let mut raw_documents_processed = Vec::new(); let mut raw_documents_processed = Vec::new();
let (automaton_producer, query_enhancer) = let (automaton_producer, query_enhancer) = AutomatonProducer::new(
AutomatonProducer::new(reader, query, main_store, synonyms_store)?; reader,
query,
main_store,
postings_lists_store,
synonyms_store,
)?;
let automaton_producer = automaton_producer.into_iter(); let automaton_producer = automaton_producer.into_iter();
let mut automatons = Vec::new(); let mut automatons = Vec::new();
// aggregate automatons groups by groups after time // aggregate automatons groups by groups after time
for auts in automaton_producer { for auts in automaton_producer {
automatons.extend(auts); automatons.push(auts);
// we must retrieve the documents associated // we must retrieve the documents associated
// with the current automatons // with the current automatons
@ -634,7 +696,7 @@ mod tests {
use std::iter::FromIterator; use std::iter::FromIterator;
use fst::{IntoStreamer, Set}; use fst::{IntoStreamer, Set};
use meilidb_schema::SchemaAttr; use meilisearch_schema::SchemaAttr;
use sdset::SetBuf; use sdset::SetBuf;
use tempfile::TempDir; use tempfile::TempDir;
@ -704,8 +766,8 @@ mod tests {
} }
pub fn add_synonym(&mut self, word: &str, new: SetBuf<&str>) { pub fn add_synonym(&mut self, word: &str, new: SetBuf<&str>) {
let env = &self.database.env; let db = &self.database;
let mut writer = env.write_txn().unwrap(); let mut writer = db.main_write_txn().unwrap();
let word = word.to_lowercase(); let word = word.to_lowercase();
@ -748,8 +810,8 @@ mod tests {
let database = Database::open_or_create(&tempdir).unwrap(); let database = Database::open_or_create(&tempdir).unwrap();
let index = database.create_index("default").unwrap(); let index = database.create_index("default").unwrap();
let env = &database.env; let db = &database;
let mut writer = env.write_txn().unwrap(); let mut writer = db.main_write_txn().unwrap();
let mut words_fst = BTreeSet::new(); let mut words_fst = BTreeSet::new();
let mut postings_lists = HashMap::new(); let mut postings_lists = HashMap::new();
@ -811,8 +873,8 @@ mod tests {
("apple", &[doc_char_index(0, 2, 2)][..]), ("apple", &[doc_char_index(0, 2, 2)][..]),
]); ]);
let env = &store.database.env; let db = &store.database;
let reader = env.read_txn().unwrap(); let reader = db.main_read_txn().unwrap();
let builder = store.query_builder(); let builder = store.query_builder();
let results = builder.query(&reader, "iphone from apple", 0..20).unwrap(); let results = builder.query(&reader, "iphone from apple", 0..20).unwrap();
@ -834,8 +896,8 @@ mod tests {
store.add_synonym("bonjour", SetBuf::from_dirty(vec!["hello"])); store.add_synonym("bonjour", SetBuf::from_dirty(vec!["hello"]));
let env = &store.database.env; let db = &store.database;
let reader = env.read_txn().unwrap(); let reader = db.main_read_txn().unwrap();
let builder = store.query_builder(); let builder = store.query_builder();
let results = builder.query(&reader, "hello", 0..20).unwrap(); let results = builder.query(&reader, "hello", 0..20).unwrap();
@ -867,8 +929,8 @@ mod tests {
store.add_synonym("bonjour", SetBuf::from_dirty(vec!["hello"])); store.add_synonym("bonjour", SetBuf::from_dirty(vec!["hello"]));
store.add_synonym("salut", SetBuf::from_dirty(vec!["hello"])); store.add_synonym("salut", SetBuf::from_dirty(vec!["hello"]));
let env = &store.database.env; let db = &store.database;
let reader = env.read_txn().unwrap(); let reader = db.main_read_txn().unwrap();
let builder = store.query_builder(); let builder = store.query_builder();
let results = builder.query(&reader, "sal", 0..20).unwrap(); let results = builder.query(&reader, "sal", 0..20).unwrap();
@ -911,8 +973,8 @@ mod tests {
store.add_synonym("salutation", SetBuf::from_dirty(vec!["hello"])); store.add_synonym("salutation", SetBuf::from_dirty(vec!["hello"]));
let env = &store.database.env; let db = &store.database;
let reader = env.read_txn().unwrap(); let reader = db.main_read_txn().unwrap();
let builder = store.query_builder(); let builder = store.query_builder();
let results = builder.query(&reader, "salutution", 0..20).unwrap(); let results = builder.query(&reader, "salutution", 0..20).unwrap();
@ -949,8 +1011,8 @@ mod tests {
store.add_synonym("bonjour", SetBuf::from_dirty(vec!["hello", "salut"])); store.add_synonym("bonjour", SetBuf::from_dirty(vec!["hello", "salut"]));
store.add_synonym("salut", SetBuf::from_dirty(vec!["hello", "bonjour"])); store.add_synonym("salut", SetBuf::from_dirty(vec!["hello", "bonjour"]));
let env = &store.database.env; let db = &store.database;
let reader = env.read_txn().unwrap(); let reader = db.main_read_txn().unwrap();
let builder = store.query_builder(); let builder = store.query_builder();
let results = builder.query(&reader, "hello", 0..20).unwrap(); let results = builder.query(&reader, "hello", 0..20).unwrap();
@ -1037,8 +1099,8 @@ mod tests {
SetBuf::from_dirty(vec!["NY", "new york", "new york city"]), SetBuf::from_dirty(vec!["NY", "new york", "new york city"]),
); );
let env = &store.database.env; let db = &store.database;
let reader = env.read_txn().unwrap(); let reader = db.main_read_txn().unwrap();
let builder = store.query_builder(); let builder = store.query_builder();
let results = builder.query(&reader, "NY subway", 0..20).unwrap(); let results = builder.query(&reader, "NY subway", 0..20).unwrap();
@ -1107,8 +1169,8 @@ mod tests {
store.add_synonym("NY", SetBuf::from_dirty(vec!["york new"])); store.add_synonym("NY", SetBuf::from_dirty(vec!["york new"]));
let env = &store.database.env; let db = &store.database;
let reader = env.read_txn().unwrap(); let reader = db.main_read_txn().unwrap();
let builder = store.query_builder(); let builder = store.query_builder();
let results = builder.query(&reader, "NY", 0..20).unwrap(); let results = builder.query(&reader, "NY", 0..20).unwrap();
@ -1165,8 +1227,8 @@ mod tests {
store.add_synonym("new york", SetBuf::from_dirty(vec!["NY"])); store.add_synonym("new york", SetBuf::from_dirty(vec!["NY"]));
let env = &store.database.env; let db = &store.database;
let reader = env.read_txn().unwrap(); let reader = db.main_read_txn().unwrap();
let builder = store.query_builder(); let builder = store.query_builder();
let results = builder.query(&reader, "NY subway", 0..20).unwrap(); let results = builder.query(&reader, "NY subway", 0..20).unwrap();
@ -1230,8 +1292,8 @@ mod tests {
SetBuf::from_dirty(vec!["NY", "new york", "new york city"]), SetBuf::from_dirty(vec!["NY", "new york", "new york city"]),
); );
let env = &store.database.env; let db = &store.database;
let reader = env.read_txn().unwrap(); let reader = db.main_read_txn().unwrap();
let builder = store.query_builder(); let builder = store.query_builder();
let results = builder.query(&reader, "NY subway", 0..20).unwrap(); let results = builder.query(&reader, "NY subway", 0..20).unwrap();
@ -1311,8 +1373,8 @@ mod tests {
); );
store.add_synonym("subway", SetBuf::from_dirty(vec!["underground train"])); store.add_synonym("subway", SetBuf::from_dirty(vec!["underground train"]));
let env = &store.database.env; let db = &store.database;
let reader = env.read_txn().unwrap(); let reader = db.main_read_txn().unwrap();
let builder = store.query_builder(); let builder = store.query_builder();
let results = builder.query(&reader, "NY subway broken", 0..20).unwrap(); let results = builder.query(&reader, "NY subway broken", 0..20).unwrap();
@ -1398,8 +1460,8 @@ mod tests {
); );
store.add_synonym("underground train", SetBuf::from_dirty(vec!["subway"])); store.add_synonym("underground train", SetBuf::from_dirty(vec!["subway"]));
let env = &store.database.env; let db = &store.database;
let reader = env.read_txn().unwrap(); let reader = db.main_read_txn().unwrap();
let builder = store.query_builder(); let builder = store.query_builder();
let results = builder let results = builder
@ -1498,8 +1560,8 @@ mod tests {
store.add_synonym("new york", SetBuf::from_dirty(vec!["new york city"])); store.add_synonym("new york", SetBuf::from_dirty(vec!["new york city"]));
store.add_synonym("new york city", SetBuf::from_dirty(vec!["new york"])); store.add_synonym("new york city", SetBuf::from_dirty(vec!["new york"]));
let env = &store.database.env; let db = &store.database;
let reader = env.read_txn().unwrap(); let reader = db.main_read_txn().unwrap();
let builder = store.query_builder(); let builder = store.query_builder();
let results = builder.query(&reader, "new york big ", 0..20).unwrap(); let results = builder.query(&reader, "new york big ", 0..20).unwrap();
@ -1535,8 +1597,8 @@ mod tests {
store.add_synonym("NY", SetBuf::from_dirty(vec!["new york city story"])); store.add_synonym("NY", SetBuf::from_dirty(vec!["new york city story"]));
let env = &store.database.env; let db = &store.database;
let reader = env.read_txn().unwrap(); let reader = db.main_read_txn().unwrap();
let builder = store.query_builder(); let builder = store.query_builder();
let results = builder.query(&reader, "NY subway ", 0..20).unwrap(); let results = builder.query(&reader, "NY subway ", 0..20).unwrap();
@ -1585,8 +1647,8 @@ mod tests {
store.add_synonym("new york city", SetBuf::from_dirty(vec!["NYC"])); store.add_synonym("new york city", SetBuf::from_dirty(vec!["NYC"]));
store.add_synonym("subway", SetBuf::from_dirty(vec!["underground train"])); store.add_synonym("subway", SetBuf::from_dirty(vec!["underground train"]));
let env = &store.database.env; let db = &store.database;
let reader = env.read_txn().unwrap(); let reader = db.main_read_txn().unwrap();
let builder = store.query_builder(); let builder = store.query_builder();
let results = builder let results = builder
@ -1611,15 +1673,15 @@ mod tests {
#[test] #[test]
fn deunicoded_synonyms() { fn deunicoded_synonyms() {
let mut store = TempDatabase::from_iter(vec![ let mut store = TempDatabase::from_iter(vec![
("telephone", &[doc_index(0, 0)][..]), // meilidb indexes the unidecoded ("telephone", &[doc_index(0, 0)][..]), // meilisearch indexes the unidecoded
("téléphone", &[doc_index(0, 0)][..]), // and the original words on the same DocIndex ("téléphone", &[doc_index(0, 0)][..]), // and the original words on the same DocIndex
("iphone", &[doc_index(1, 0)][..]), ("iphone", &[doc_index(1, 0)][..]),
]); ]);
store.add_synonym("téléphone", SetBuf::from_dirty(vec!["iphone"])); store.add_synonym("téléphone", SetBuf::from_dirty(vec!["iphone"]));
let env = &store.database.env; let db = &store.database;
let reader = env.read_txn().unwrap(); let reader = db.main_read_txn().unwrap();
let builder = store.query_builder(); let builder = store.query_builder();
let results = builder.query(&reader, "telephone", 0..20).unwrap(); let results = builder.query(&reader, "telephone", 0..20).unwrap();
@ -1680,8 +1742,8 @@ mod tests {
("case", &[doc_index(0, 1)][..]), ("case", &[doc_index(0, 1)][..]),
]); ]);
let env = &store.database.env; let db = &store.database;
let reader = env.read_txn().unwrap(); let reader = db.main_read_txn().unwrap();
let builder = store.query_builder(); let builder = store.query_builder();
let results = builder.query(&reader, "i phone case", 0..20).unwrap(); let results = builder.query(&reader, "i phone case", 0..20).unwrap();
@ -1697,4 +1759,68 @@ mod tests {
}); });
assert_matches!(iter.next(), None); assert_matches!(iter.next(), None);
} }
#[test]
fn simple_phrase_query_splitting() {
let store = TempDatabase::from_iter(vec![
("search", &[doc_index(0, 0)][..]),
("engine", &[doc_index(0, 1)][..]),
("search", &[doc_index(1, 0)][..]),
("slow", &[doc_index(1, 1)][..]),
("engine", &[doc_index(1, 2)][..]),
]);
let db = &store.database;
let reader = db.main_read_txn().unwrap();
let builder = store.query_builder();
let results = builder.query(&reader, "searchengine", 0..20).unwrap();
let mut iter = results.into_iter();
assert_matches!(iter.next(), Some(Document { id: DocumentId(0), matches, .. }) => {
let mut iter = matches.into_iter();
assert_matches!(iter.next(), Some(TmpMatch { query_index: 0, word_index: 0, distance: 0, .. })); // search
assert_matches!(iter.next(), Some(TmpMatch { query_index: 0, word_index: 1, distance: 0, .. })); // engine
assert_matches!(iter.next(), None);
});
assert_matches!(iter.next(), None);
}
#[test]
fn harder_phrase_query_splitting() {
let store = TempDatabase::from_iter(vec![
("search", &[doc_index(0, 0)][..]),
("search", &[doc_index(0, 1)][..]),
("engine", &[doc_index(0, 2)][..]),
("search", &[doc_index(1, 0)][..]),
("slow", &[doc_index(1, 1)][..]),
("search", &[doc_index(1, 2)][..]),
("engine", &[doc_index(1, 3)][..]),
("search", &[doc_index(1, 0)][..]),
("search", &[doc_index(1, 1)][..]),
("slow", &[doc_index(1, 2)][..]),
("engine", &[doc_index(1, 3)][..]),
]);
let db = &store.database;
let reader = db.main_read_txn().unwrap();
let builder = store.query_builder();
let results = builder.query(&reader, "searchengine", 0..20).unwrap();
let mut iter = results.into_iter();
assert_matches!(iter.next(), Some(Document { id: DocumentId(0), matches, .. }) => {
let mut iter = matches.into_iter();
assert_matches!(iter.next(), Some(TmpMatch { query_index: 0, word_index: 1, distance: 0, .. })); // search
assert_matches!(iter.next(), Some(TmpMatch { query_index: 0, word_index: 2, distance: 0, .. })); // engine
assert_matches!(iter.next(), None);
});
assert_matches!(iter.next(), Some(Document { id: DocumentId(1), matches, .. }) => {
let mut iter = matches.into_iter();
assert_matches!(iter.next(), Some(TmpMatch { query_index: 0, word_index: 2, distance: 0, .. })); // search
assert_matches!(iter.next(), Some(TmpMatch { query_index: 0, word_index: 3, distance: 0, .. })); // engine
assert_matches!(iter.next(), None);
});
assert_matches!(iter.next(), None);
}
} }

View File

@ -1,7 +1,7 @@
use std::io::{Read, Write}; use std::io::{Read, Write};
use hashbrown::HashMap; use hashbrown::HashMap;
use meilidb_schema::SchemaAttr; use meilisearch_schema::SchemaAttr;
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
use crate::{DocumentId, Number}; use crate::{DocumentId, Number};

View File

@ -1,7 +1,7 @@
use std::fmt; use std::fmt;
use std::sync::Arc; use std::sync::Arc;
use meilidb_schema::SchemaAttr; use meilisearch_schema::SchemaAttr;
use sdset::SetBuf; use sdset::SetBuf;
use slice_group_by::GroupBy; use slice_group_by::GroupBy;

View File

@ -3,14 +3,17 @@ use std::convert::TryFrom;
use crate::{DocIndex, DocumentId}; use crate::{DocIndex, DocumentId};
use deunicode::deunicode_with_tofu; use deunicode::deunicode_with_tofu;
use meilidb_schema::SchemaAttr; use meilisearch_schema::SchemaAttr;
use meilidb_tokenizer::{is_cjk, SeqTokenizer, Token, Tokenizer}; use meilisearch_tokenizer::{is_cjk, SeqTokenizer, Token, Tokenizer};
use sdset::SetBuf; use sdset::SetBuf;
const WORD_LENGTH_LIMIT: usize = 80;
type Word = Vec<u8>; // TODO make it be a SmallVec type Word = Vec<u8>; // TODO make it be a SmallVec
pub struct RawIndexer { pub struct RawIndexer {
word_limit: usize, // the maximum number of indexed words word_limit: usize, // the maximum number of indexed words
stop_words: fst::Set,
words_doc_indexes: BTreeMap<Word, Vec<DocIndex>>, words_doc_indexes: BTreeMap<Word, Vec<DocIndex>>,
docs_words: HashMap<DocumentId, Vec<Word>>, docs_words: HashMap<DocumentId, Vec<Word>>,
} }
@ -21,13 +24,14 @@ pub struct Indexed {
} }
impl RawIndexer { impl RawIndexer {
pub fn new() -> RawIndexer { pub fn new(stop_words: fst::Set) -> RawIndexer {
RawIndexer::with_word_limit(1000) RawIndexer::with_word_limit(stop_words, 1000)
} }
pub fn with_word_limit(limit: usize) -> RawIndexer { pub fn with_word_limit(stop_words: fst::Set, limit: usize) -> RawIndexer {
RawIndexer { RawIndexer {
word_limit: limit, word_limit: limit,
stop_words,
words_doc_indexes: BTreeMap::new(), words_doc_indexes: BTreeMap::new(),
docs_words: HashMap::new(), docs_words: HashMap::new(),
} }
@ -35,89 +39,40 @@ impl RawIndexer {
pub fn index_text(&mut self, id: DocumentId, attr: SchemaAttr, text: &str) -> usize { pub fn index_text(&mut self, id: DocumentId, attr: SchemaAttr, text: &str) -> usize {
let mut number_of_words = 0; let mut number_of_words = 0;
let lowercase_text = text.to_lowercase();
let deunicoded = deunicode_with_tofu(&lowercase_text, "");
// TODO compute the deunicoded version after the cjk check for token in Tokenizer::new(text) {
let next = if !lowercase_text.contains(is_cjk) && lowercase_text != deunicoded {
Some(deunicoded)
} else {
None
};
let iter = Some(lowercase_text).into_iter().chain(next);
for text in iter {
// we must not count 2 times the same words
number_of_words = 0;
for token in Tokenizer::new(&text) {
let must_continue = index_token(
token,
id,
attr,
self.word_limit,
&mut self.words_doc_indexes,
&mut self.docs_words,
);
if !must_continue {
break;
}
number_of_words += 1;
}
}
number_of_words
}
pub fn index_text_seq<'a, I, IT>(&mut self, id: DocumentId, attr: SchemaAttr, iter: I)
where
I: IntoIterator<Item = &'a str, IntoIter = IT>,
IT: Iterator<Item = &'a str> + Clone,
{
// TODO serialize this to one call to the SeqTokenizer loop
let lowercased: Vec<_> = iter.into_iter().map(str::to_lowercase).collect();
let iter = lowercased.iter().map(|t| t.as_str());
for token in SeqTokenizer::new(iter) {
let must_continue = index_token( let must_continue = index_token(
token, token,
id, id,
attr, attr,
self.word_limit, self.word_limit,
&self.stop_words,
&mut self.words_doc_indexes, &mut self.words_doc_indexes,
&mut self.docs_words, &mut self.docs_words,
); );
number_of_words += 1;
if !must_continue { if !must_continue {
break; break;
} }
} }
let deunicoded: Vec<_> = lowercased number_of_words
.into_iter() }
.map(|lowercase_text| {
if lowercase_text.contains(is_cjk) {
return lowercase_text;
}
let deunicoded = deunicode_with_tofu(&lowercase_text, "");
if lowercase_text != deunicoded {
deunicoded
} else {
lowercase_text
}
})
.collect();
let iter = deunicoded.iter().map(|t| t.as_str());
pub fn index_text_seq<'a, I>(&mut self, id: DocumentId, attr: SchemaAttr, iter: I)
where
I: IntoIterator<Item = &'a str>,
{
let iter = iter.into_iter();
for token in SeqTokenizer::new(iter) { for token in SeqTokenizer::new(iter) {
let must_continue = index_token( let must_continue = index_token(
token, token,
id, id,
attr, attr,
self.word_limit, self.word_limit,
&self.stop_words,
&mut self.words_doc_indexes, &mut self.words_doc_indexes,
&mut self.docs_words, &mut self.docs_words,
); );
@ -152,17 +107,12 @@ impl RawIndexer {
} }
} }
impl Default for RawIndexer {
fn default() -> Self {
Self::new()
}
}
fn index_token( fn index_token(
token: Token, token: Token,
id: DocumentId, id: DocumentId,
attr: SchemaAttr, attr: SchemaAttr,
word_limit: usize, word_limit: usize,
stop_words: &fst::Set,
words_doc_indexes: &mut BTreeMap<Word, Vec<DocIndex>>, words_doc_indexes: &mut BTreeMap<Word, Vec<DocIndex>>,
docs_words: &mut HashMap<DocumentId, Vec<Word>>, docs_words: &mut HashMap<DocumentId, Vec<Word>>,
) -> bool { ) -> bool {
@ -170,16 +120,41 @@ fn index_token(
return false; return false;
} }
match token_to_docindex(id, attr, token) { let lower = token.word.to_lowercase();
Some(docindex) => { let token = Token {
let word = Vec::from(token.word); word: &lower,
words_doc_indexes ..token
.entry(word.clone()) };
.or_insert_with(Vec::new)
.push(docindex); if !stop_words.contains(&token.word) {
docs_words.entry(id).or_insert_with(Vec::new).push(word); match token_to_docindex(id, attr, token) {
Some(docindex) => {
let word = Vec::from(token.word);
if word.len() <= WORD_LENGTH_LIMIT {
words_doc_indexes
.entry(word.clone())
.or_insert_with(Vec::new)
.push(docindex);
docs_words.entry(id).or_insert_with(Vec::new).push(word);
if !lower.contains(is_cjk) {
let unidecoded = deunicode_with_tofu(&lower, "");
if unidecoded != lower && !unidecoded.is_empty() {
let word = Vec::from(unidecoded);
if word.len() <= WORD_LENGTH_LIMIT {
words_doc_indexes
.entry(word.clone())
.or_insert_with(Vec::new)
.push(docindex);
docs_words.entry(id).or_insert_with(Vec::new).push(word);
}
}
}
}
}
None => return false,
} }
None => return false,
} }
true true
@ -207,7 +182,7 @@ mod tests {
#[test] #[test]
fn strange_apostrophe() { fn strange_apostrophe() {
let mut indexer = RawIndexer::new(); let mut indexer = RawIndexer::new(fst::Set::default());
let docid = DocumentId(0); let docid = DocumentId(0);
let attr = SchemaAttr(0); let attr = SchemaAttr(0);
@ -222,16 +197,14 @@ mod tests {
assert!(words_doc_indexes.get(&b"aspirateur"[..]).is_some()); assert!(words_doc_indexes.get(&b"aspirateur"[..]).is_some());
assert!(words_doc_indexes.get(&b"ai"[..]).is_some()); assert!(words_doc_indexes.get(&b"ai"[..]).is_some());
assert!(words_doc_indexes.get(&b"eteindre"[..]).is_some()); assert!(words_doc_indexes.get(&b"eteindre"[..]).is_some());
// with the ugly apostrophe...
assert!(words_doc_indexes assert!(words_doc_indexes
.get(&"léteindre".to_owned().into_bytes()) .get(&"éteindre".to_owned().into_bytes())
.is_some()); .is_some());
} }
#[test] #[test]
fn strange_apostrophe_in_sequence() { fn strange_apostrophe_in_sequence() {
let mut indexer = RawIndexer::new(); let mut indexer = RawIndexer::new(fst::Set::default());
let docid = DocumentId(0); let docid = DocumentId(0);
let attr = SchemaAttr(0); let attr = SchemaAttr(0);
@ -246,10 +219,53 @@ mod tests {
assert!(words_doc_indexes.get(&b"aspirateur"[..]).is_some()); assert!(words_doc_indexes.get(&b"aspirateur"[..]).is_some());
assert!(words_doc_indexes.get(&b"ai"[..]).is_some()); assert!(words_doc_indexes.get(&b"ai"[..]).is_some());
assert!(words_doc_indexes.get(&b"eteindre"[..]).is_some()); assert!(words_doc_indexes.get(&b"eteindre"[..]).is_some());
// with the ugly apostrophe...
assert!(words_doc_indexes assert!(words_doc_indexes
.get(&"léteindre".to_owned().into_bytes()) .get(&"éteindre".to_owned().into_bytes())
.is_some());
}
#[test]
fn basic_stop_words() {
let stop_words = sdset::SetBuf::from_dirty(vec!["l", "j", "ai", "de"]);
let stop_words = fst::Set::from_iter(stop_words).unwrap();
let mut indexer = RawIndexer::new(stop_words);
let docid = DocumentId(0);
let attr = SchemaAttr(0);
let text = "Zut, laspirateur, jai oublié de léteindre !";
indexer.index_text(docid, attr, text);
let Indexed {
words_doc_indexes, ..
} = indexer.build();
assert!(words_doc_indexes.get(&b"l"[..]).is_none());
assert!(words_doc_indexes.get(&b"aspirateur"[..]).is_some());
assert!(words_doc_indexes.get(&b"j"[..]).is_none());
assert!(words_doc_indexes.get(&b"ai"[..]).is_none());
assert!(words_doc_indexes.get(&b"de"[..]).is_none());
assert!(words_doc_indexes.get(&b"eteindre"[..]).is_some());
assert!(words_doc_indexes
.get(&"éteindre".to_owned().into_bytes())
.is_some());
}
#[test]
fn no_empty_unidecode() {
let mut indexer = RawIndexer::new(fst::Set::default());
let docid = DocumentId(0);
let attr = SchemaAttr(0);
let text = "🇯🇵";
indexer.index_text(docid, attr, text);
let Indexed {
words_doc_indexes, ..
} = indexer.build();
assert!(words_doc_indexes
.get(&"🇯🇵".to_owned().into_bytes())
.is_some()); .is_some());
} }
} }

View File

@ -12,8 +12,8 @@ impl ser::Serializer for ConvertToString {
type SerializeTuple = ser::Impossible<Self::Ok, Self::Error>; type SerializeTuple = ser::Impossible<Self::Ok, Self::Error>;
type SerializeTupleStruct = ser::Impossible<Self::Ok, Self::Error>; type SerializeTupleStruct = ser::Impossible<Self::Ok, Self::Error>;
type SerializeTupleVariant = ser::Impossible<Self::Ok, Self::Error>; type SerializeTupleVariant = ser::Impossible<Self::Ok, Self::Error>;
type SerializeMap = ser::Impossible<Self::Ok, Self::Error>; type SerializeMap = MapConvertToString;
type SerializeStruct = ser::Impossible<Self::Ok, Self::Error>; type SerializeStruct = StructConvertToString;
type SerializeStructVariant = ser::Impossible<Self::Ok, Self::Error>; type SerializeStructVariant = ser::Impossible<Self::Ok, Self::Error>;
fn serialize_bool(self, _value: bool) -> Result<Self::Ok, Self::Error> { fn serialize_bool(self, _value: bool) -> Result<Self::Ok, Self::Error> {
@ -169,7 +169,9 @@ impl ser::Serializer for ConvertToString {
} }
fn serialize_map(self, _len: Option<usize>) -> Result<Self::SerializeMap, Self::Error> { fn serialize_map(self, _len: Option<usize>) -> Result<Self::SerializeMap, Self::Error> {
Err(SerializerError::UnserializableType { type_name: "map" }) Ok(MapConvertToString {
text: String::new(),
})
} }
fn serialize_struct( fn serialize_struct(
@ -177,8 +179,8 @@ impl ser::Serializer for ConvertToString {
_name: &'static str, _name: &'static str,
_len: usize, _len: usize,
) -> Result<Self::SerializeStruct, Self::Error> { ) -> Result<Self::SerializeStruct, Self::Error> {
Err(SerializerError::UnserializableType { Ok(StructConvertToString {
type_name: "struct", text: String::new(),
}) })
} }
@ -194,3 +196,63 @@ impl ser::Serializer for ConvertToString {
}) })
} }
} }
pub struct MapConvertToString {
text: String,
}
impl ser::SerializeMap for MapConvertToString {
type Ok = String;
type Error = SerializerError;
fn serialize_key<T: ?Sized>(&mut self, key: &T) -> Result<(), Self::Error>
where
T: ser::Serialize,
{
let text = key.serialize(ConvertToString)?;
self.text.push_str(&text);
self.text.push_str(" ");
Ok(())
}
fn serialize_value<T: ?Sized>(&mut self, value: &T) -> Result<(), Self::Error>
where
T: ser::Serialize,
{
let text = value.serialize(ConvertToString)?;
self.text.push_str(&text);
Ok(())
}
fn end(self) -> Result<Self::Ok, Self::Error> {
Ok(self.text)
}
}
pub struct StructConvertToString {
text: String,
}
impl ser::SerializeStruct for StructConvertToString {
type Ok = String;
type Error = SerializerError;
fn serialize_field<T: ?Sized>(
&mut self,
key: &'static str,
value: &T,
) -> Result<(), Self::Error>
where
T: ser::Serialize,
{
let value = value.serialize(ConvertToString)?;
self.text.push_str(key);
self.text.push_str(" ");
self.text.push_str(&value);
Ok(())
}
fn end(self) -> Result<Self::Ok, Self::Error> {
Ok(self.text)
}
}

View File

@ -2,19 +2,20 @@ use std::collections::HashSet;
use std::io::Cursor; use std::io::Cursor;
use std::{error::Error, fmt}; use std::{error::Error, fmt};
use meilidb_schema::{Schema, SchemaAttr}; use meilisearch_schema::{Schema, SchemaAttr};
use serde::{de, forward_to_deserialize_any}; use serde::{de, forward_to_deserialize_any};
use serde_json::de::IoRead as SerdeJsonIoRead; use serde_json::de::IoRead as SerdeJsonIoRead;
use serde_json::Deserializer as SerdeJsonDeserializer; use serde_json::Deserializer as SerdeJsonDeserializer;
use serde_json::Error as SerdeJsonError; use serde_json::Error as SerdeJsonError;
use crate::database::MainT;
use crate::store::DocumentsFields; use crate::store::DocumentsFields;
use crate::DocumentId; use crate::DocumentId;
#[derive(Debug)] #[derive(Debug)]
pub enum DeserializerError { pub enum DeserializerError {
SerdeJson(SerdeJsonError), SerdeJson(SerdeJsonError),
Zlmdb(zlmdb::Error), Zlmdb(heed::Error),
Custom(String), Custom(String),
} }
@ -28,7 +29,7 @@ impl fmt::Display for DeserializerError {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match self { match self {
DeserializerError::SerdeJson(e) => write!(f, "serde json related error: {}", e), DeserializerError::SerdeJson(e) => write!(f, "serde json related error: {}", e),
DeserializerError::Zlmdb(e) => write!(f, "zlmdb related error: {}", e), DeserializerError::Zlmdb(e) => write!(f, "heed related error: {}", e),
DeserializerError::Custom(s) => f.write_str(s), DeserializerError::Custom(s) => f.write_str(s),
} }
} }
@ -42,15 +43,15 @@ impl From<SerdeJsonError> for DeserializerError {
} }
} }
impl From<zlmdb::Error> for DeserializerError { impl From<heed::Error> for DeserializerError {
fn from(error: zlmdb::Error) -> DeserializerError { fn from(error: heed::Error) -> DeserializerError {
DeserializerError::Zlmdb(error) DeserializerError::Zlmdb(error)
} }
} }
pub struct Deserializer<'a> { pub struct Deserializer<'a> {
pub document_id: DocumentId, pub document_id: DocumentId,
pub reader: &'a zlmdb::RoTxn, pub reader: &'a heed::RoTxn<MainT>,
pub documents_fields: DocumentsFields, pub documents_fields: DocumentsFields,
pub schema: &'a Schema, pub schema: &'a Schema,
pub attributes: Option<&'a HashSet<SchemaAttr>>, pub attributes: Option<&'a HashSet<SchemaAttr>>,
@ -63,13 +64,14 @@ impl<'de, 'a, 'b> de::Deserializer<'de> for &'b mut Deserializer<'a> {
where where
V: de::Visitor<'de>, V: de::Visitor<'de>,
{ {
self.deserialize_map(visitor) self.deserialize_option(visitor)
} }
forward_to_deserialize_any! { fn deserialize_option<V>(self, visitor: V) -> Result<V::Value, Self::Error>
bool i8 i16 i32 i64 i128 u8 u16 u32 u64 u128 f32 f64 char str string where
bytes byte_buf option unit unit_struct newtype_struct seq tuple V: de::Visitor<'de>,
tuple_struct struct enum identifier ignored_any {
self.deserialize_map(visitor)
} }
fn deserialize_map<V>(self, visitor: V) -> Result<V::Value, Self::Error> fn deserialize_map<V>(self, visitor: V) -> Result<V::Value, Self::Error>
@ -104,16 +106,29 @@ impl<'de, 'a, 'b> de::Deserializer<'de> for &'b mut Deserializer<'a> {
} }
}); });
let map_deserializer = de::value::MapDeserializer::new(iter); let mut iter = iter.peekable();
let result = visitor
.visit_map(map_deserializer) let result = match iter.peek() {
.map_err(DeserializerError::from); Some(_) => {
let map_deserializer = de::value::MapDeserializer::new(iter);
visitor
.visit_some(map_deserializer)
.map_err(DeserializerError::from)
}
None => visitor.visit_none(),
};
match error.take() { match error.take() {
Some(error) => Err(error.into()), Some(error) => Err(error.into()),
None => result, None => result,
} }
} }
forward_to_deserialize_any! {
bool i8 i16 i32 i64 i128 u8 u16 u32 u64 u128 f32 f64 char str string
bytes byte_buf unit unit_struct newtype_struct seq tuple
tuple_struct struct enum identifier ignored_any
}
} }
struct Value(SerdeJsonDeserializer<SerdeJsonIoRead<Cursor<Vec<u8>>>>); struct Value(SerdeJsonDeserializer<SerdeJsonIoRead<Cursor<Vec<u8>>>>);

View File

@ -1,4 +1,4 @@
use meilidb_schema::SchemaAttr; use meilisearch_schema::SchemaAttr;
use serde::ser; use serde::ser;
use serde::Serialize; use serde::Serialize;
@ -20,7 +20,7 @@ impl<'a> ser::Serializer for Indexer<'a> {
type SerializeTupleStruct = ser::Impossible<Self::Ok, Self::Error>; type SerializeTupleStruct = ser::Impossible<Self::Ok, Self::Error>;
type SerializeTupleVariant = ser::Impossible<Self::Ok, Self::Error>; type SerializeTupleVariant = ser::Impossible<Self::Ok, Self::Error>;
type SerializeMap = MapIndexer<'a>; type SerializeMap = MapIndexer<'a>;
type SerializeStruct = StructSerializer<'a>; type SerializeStruct = StructIndexer<'a>;
type SerializeStructVariant = ser::Impossible<Self::Ok, Self::Error>; type SerializeStructVariant = ser::Impossible<Self::Ok, Self::Error>;
fn serialize_bool(self, _value: bool) -> Result<Self::Ok, Self::Error> { fn serialize_bool(self, _value: bool) -> Result<Self::Ok, Self::Error> {
@ -302,14 +302,14 @@ impl<'a> ser::SerializeMap for MapIndexer<'a> {
} }
} }
pub struct StructSerializer<'a> { pub struct StructIndexer<'a> {
attribute: SchemaAttr, attribute: SchemaAttr,
document_id: DocumentId, document_id: DocumentId,
indexer: &'a mut RawIndexer, indexer: &'a mut RawIndexer,
texts: Vec<String>, texts: Vec<String>,
} }
impl<'a> ser::SerializeStruct for StructSerializer<'a> { impl<'a> ser::SerializeStruct for StructIndexer<'a> {
type Ok = Option<usize>; type Ok = Option<usize>;
type Error = SerializerError; type Error = SerializerError;

View File

@ -20,22 +20,20 @@ pub use self::convert_to_string::ConvertToString;
pub use self::deserializer::{Deserializer, DeserializerError}; pub use self::deserializer::{Deserializer, DeserializerError};
pub use self::extract_document_id::{compute_document_id, extract_document_id, value_to_string}; pub use self::extract_document_id::{compute_document_id, extract_document_id, value_to_string};
pub use self::indexer::Indexer; pub use self::indexer::Indexer;
pub use self::serializer::Serializer; pub use self::serializer::{serialize_value, Serializer};
use std::collections::BTreeMap;
use std::{error::Error, fmt}; use std::{error::Error, fmt};
use meilidb_schema::SchemaAttr;
use serde::ser; use serde::ser;
use serde_json::Error as SerdeJsonError; use serde_json::Error as SerdeJsonError;
use crate::{DocumentId, ParseNumberError}; use crate::ParseNumberError;
#[derive(Debug)] #[derive(Debug)]
pub enum SerializerError { pub enum SerializerError {
DocumentIdNotFound, DocumentIdNotFound,
InvalidDocumentIdType, InvalidDocumentIdType,
Zlmdb(zlmdb::Error), Zlmdb(heed::Error),
SerdeJson(SerdeJsonError), SerdeJson(SerdeJsonError),
ParseNumber(ParseNumberError), ParseNumber(ParseNumberError),
UnserializableType { type_name: &'static str }, UnserializableType { type_name: &'static str },
@ -59,7 +57,7 @@ impl fmt::Display for SerializerError {
SerializerError::InvalidDocumentIdType => { SerializerError::InvalidDocumentIdType => {
f.write_str("document identifier can only be of type string or number") f.write_str("document identifier can only be of type string or number")
} }
SerializerError::Zlmdb(e) => write!(f, "zlmdb related error: {}", e), SerializerError::Zlmdb(e) => write!(f, "heed related error: {}", e),
SerializerError::SerdeJson(e) => write!(f, "serde json error: {}", e), SerializerError::SerdeJson(e) => write!(f, "serde json error: {}", e),
SerializerError::ParseNumber(e) => { SerializerError::ParseNumber(e) => {
write!(f, "error while trying to parse a number: {}", e) write!(f, "error while trying to parse a number: {}", e)
@ -92,8 +90,8 @@ impl From<SerdeJsonError> for SerializerError {
} }
} }
impl From<zlmdb::Error> for SerializerError { impl From<heed::Error> for SerializerError {
fn from(error: zlmdb::Error) -> SerializerError { fn from(error: heed::Error) -> SerializerError {
SerializerError::Zlmdb(error) SerializerError::Zlmdb(error)
} }
} }
@ -103,25 +101,3 @@ impl From<ParseNumberError> for SerializerError {
SerializerError::ParseNumber(error) SerializerError::ParseNumber(error)
} }
} }
pub struct RamDocumentStore(BTreeMap<(DocumentId, SchemaAttr), Vec<u8>>);
impl RamDocumentStore {
pub fn new() -> RamDocumentStore {
RamDocumentStore(BTreeMap::new())
}
pub fn set_document_field(&mut self, id: DocumentId, attr: SchemaAttr, value: Vec<u8>) {
self.0.insert((id, attr), value);
}
pub fn into_inner(self) -> BTreeMap<(DocumentId, SchemaAttr), Vec<u8>> {
self.0
}
}
impl Default for RamDocumentStore {
fn default() -> Self {
Self::new()
}
}

View File

@ -1,31 +1,32 @@
use meilidb_schema::{Schema, SchemaAttr}; use meilisearch_schema::{Schema, SchemaAttr, SchemaProps};
use serde::ser; use serde::ser;
use std::collections::HashMap;
use crate::database::MainT;
use crate::raw_indexer::RawIndexer; use crate::raw_indexer::RawIndexer;
use crate::serde::RamDocumentStore; use crate::store::{DocumentsFields, DocumentsFieldsCounts};
use crate::{DocumentId, RankedMap}; use crate::{DocumentId, RankedMap};
use super::{ConvertToNumber, ConvertToString, Indexer, SerializerError}; use super::{ConvertToNumber, ConvertToString, Indexer, SerializerError};
pub struct Serializer<'a> { pub struct Serializer<'a, 'b> {
pub txn: &'a mut heed::RwTxn<'b, MainT>,
pub schema: &'a Schema, pub schema: &'a Schema,
pub document_store: &'a mut RamDocumentStore, pub document_store: DocumentsFields,
pub document_fields_counts: &'a mut HashMap<(DocumentId, SchemaAttr), u64>, pub document_fields_counts: DocumentsFieldsCounts,
pub indexer: &'a mut RawIndexer, pub indexer: &'a mut RawIndexer,
pub ranked_map: &'a mut RankedMap, pub ranked_map: &'a mut RankedMap,
pub document_id: DocumentId, pub document_id: DocumentId,
} }
impl<'a> ser::Serializer for Serializer<'a> { impl<'a, 'b> ser::Serializer for Serializer<'a, 'b> {
type Ok = (); type Ok = ();
type Error = SerializerError; type Error = SerializerError;
type SerializeSeq = ser::Impossible<Self::Ok, Self::Error>; type SerializeSeq = ser::Impossible<Self::Ok, Self::Error>;
type SerializeTuple = ser::Impossible<Self::Ok, Self::Error>; type SerializeTuple = ser::Impossible<Self::Ok, Self::Error>;
type SerializeTupleStruct = ser::Impossible<Self::Ok, Self::Error>; type SerializeTupleStruct = ser::Impossible<Self::Ok, Self::Error>;
type SerializeTupleVariant = ser::Impossible<Self::Ok, Self::Error>; type SerializeTupleVariant = ser::Impossible<Self::Ok, Self::Error>;
type SerializeMap = MapSerializer<'a>; type SerializeMap = MapSerializer<'a, 'b>;
type SerializeStruct = StructSerializer<'a>; type SerializeStruct = StructSerializer<'a, 'b>;
type SerializeStructVariant = ser::Impossible<Self::Ok, Self::Error>; type SerializeStructVariant = ser::Impossible<Self::Ok, Self::Error>;
forward_to_unserializable_type! { forward_to_unserializable_type! {
@ -150,6 +151,7 @@ impl<'a> ser::Serializer for Serializer<'a> {
fn serialize_map(self, _len: Option<usize>) -> Result<Self::SerializeMap, Self::Error> { fn serialize_map(self, _len: Option<usize>) -> Result<Self::SerializeMap, Self::Error> {
Ok(MapSerializer { Ok(MapSerializer {
txn: self.txn,
schema: self.schema, schema: self.schema,
document_id: self.document_id, document_id: self.document_id,
document_store: self.document_store, document_store: self.document_store,
@ -166,6 +168,7 @@ impl<'a> ser::Serializer for Serializer<'a> {
_len: usize, _len: usize,
) -> Result<Self::SerializeStruct, Self::Error> { ) -> Result<Self::SerializeStruct, Self::Error> {
Ok(StructSerializer { Ok(StructSerializer {
txn: self.txn,
schema: self.schema, schema: self.schema,
document_id: self.document_id, document_id: self.document_id,
document_store: self.document_store, document_store: self.document_store,
@ -188,17 +191,18 @@ impl<'a> ser::Serializer for Serializer<'a> {
} }
} }
pub struct MapSerializer<'a> { pub struct MapSerializer<'a, 'b> {
txn: &'a mut heed::RwTxn<'b, MainT>,
schema: &'a Schema, schema: &'a Schema,
document_id: DocumentId, document_id: DocumentId,
document_store: &'a mut RamDocumentStore, document_store: DocumentsFields,
document_fields_counts: &'a mut HashMap<(DocumentId, SchemaAttr), u64>, document_fields_counts: DocumentsFieldsCounts,
indexer: &'a mut RawIndexer, indexer: &'a mut RawIndexer,
ranked_map: &'a mut RankedMap, ranked_map: &'a mut RankedMap,
current_key_name: Option<String>, current_key_name: Option<String>,
} }
impl<'a> ser::SerializeMap for MapSerializer<'a> { impl<'a, 'b> ser::SerializeMap for MapSerializer<'a, 'b> {
type Ok = (); type Ok = ();
type Error = SerializerError; type Error = SerializerError;
@ -229,17 +233,20 @@ impl<'a> ser::SerializeMap for MapSerializer<'a> {
V: ser::Serialize, V: ser::Serialize,
{ {
let key = key.serialize(ConvertToString)?; let key = key.serialize(ConvertToString)?;
match self.schema.attribute(&key) {
serialize_value( Some(attribute) => serialize_value(
self.schema, self.txn,
self.document_id, attribute,
self.document_store, self.schema.props(attribute),
self.document_fields_counts, self.document_id,
self.indexer, self.document_store,
self.ranked_map, self.document_fields_counts,
&key, self.indexer,
value, self.ranked_map,
) value,
),
None => Ok(()),
}
} }
fn end(self) -> Result<Self::Ok, Self::Error> { fn end(self) -> Result<Self::Ok, Self::Error> {
@ -247,16 +254,17 @@ impl<'a> ser::SerializeMap for MapSerializer<'a> {
} }
} }
pub struct StructSerializer<'a> { pub struct StructSerializer<'a, 'b> {
txn: &'a mut heed::RwTxn<'b, MainT>,
schema: &'a Schema, schema: &'a Schema,
document_id: DocumentId, document_id: DocumentId,
document_store: &'a mut RamDocumentStore, document_store: DocumentsFields,
document_fields_counts: &'a mut HashMap<(DocumentId, SchemaAttr), u64>, document_fields_counts: DocumentsFieldsCounts,
indexer: &'a mut RawIndexer, indexer: &'a mut RawIndexer,
ranked_map: &'a mut RankedMap, ranked_map: &'a mut RankedMap,
} }
impl<'a> ser::SerializeStruct for StructSerializer<'a> { impl<'a, 'b> ser::SerializeStruct for StructSerializer<'a, 'b> {
type Ok = (); type Ok = ();
type Error = SerializerError; type Error = SerializerError;
@ -268,16 +276,20 @@ impl<'a> ser::SerializeStruct for StructSerializer<'a> {
where where
T: ser::Serialize, T: ser::Serialize,
{ {
serialize_value( match self.schema.attribute(key) {
self.schema, Some(attribute) => serialize_value(
self.document_id, self.txn,
self.document_store, attribute,
self.document_fields_counts, self.schema.props(attribute),
self.indexer, self.document_id,
self.ranked_map, self.document_store,
key, self.document_fields_counts,
value, self.indexer,
) self.ranked_map,
value,
),
None => Ok(()),
}
} }
fn end(self) -> Result<Self::Ok, Self::Error> { fn end(self) -> Result<Self::Ok, Self::Error> {
@ -285,40 +297,42 @@ impl<'a> ser::SerializeStruct for StructSerializer<'a> {
} }
} }
fn serialize_value<T: ?Sized>( pub fn serialize_value<T: ?Sized>(
schema: &Schema, txn: &mut heed::RwTxn<MainT>,
attribute: SchemaAttr,
props: SchemaProps,
document_id: DocumentId, document_id: DocumentId,
document_store: &mut RamDocumentStore, document_store: DocumentsFields,
documents_fields_counts: &mut HashMap<(DocumentId, SchemaAttr), u64>, documents_fields_counts: DocumentsFieldsCounts,
indexer: &mut RawIndexer, indexer: &mut RawIndexer,
ranked_map: &mut RankedMap, ranked_map: &mut RankedMap,
key: &str,
value: &T, value: &T,
) -> Result<(), SerializerError> ) -> Result<(), SerializerError>
where where
T: ser::Serialize, T: ser::Serialize,
{ {
if let Some(attribute) = schema.attribute(key) { let serialized = serde_json::to_vec(value)?;
let props = schema.props(attribute); document_store.put_document_field(txn, document_id, attribute, &serialized)?;
let serialized = serde_json::to_vec(value)?; if props.is_indexed() {
document_store.set_document_field(document_id, attribute, serialized); let indexer = Indexer {
attribute,
if props.is_indexed() { indexer,
let indexer = Indexer { document_id,
attribute, };
indexer, if let Some(number_of_words) = value.serialize(indexer)? {
documents_fields_counts.put_document_field_count(
txn,
document_id, document_id,
}; attribute,
if let Some(number_of_words) = value.serialize(indexer)? { number_of_words as u64,
documents_fields_counts.insert((document_id, attribute), number_of_words as u64); )?;
}
} }
}
if props.is_ranked() { if props.is_ranked() {
let number = value.serialize(ConvertToNumber)?; let number = value.serialize(ConvertToNumber)?;
ranked_map.insert(document_id, attribute, number); ranked_map.insert(document_id, attribute, number);
}
} }
Ok(()) Ok(())

View File

@ -1,18 +1,19 @@
use super::BEU64; use super::BEU64;
use crate::database::MainT;
use crate::DocumentId; use crate::DocumentId;
use heed::types::{ByteSlice, OwnedType};
use heed::Result as ZResult;
use std::sync::Arc; use std::sync::Arc;
use zlmdb::types::{ByteSlice, OwnedType};
use zlmdb::Result as ZResult;
#[derive(Copy, Clone)] #[derive(Copy, Clone)]
pub struct DocsWords { pub struct DocsWords {
pub(crate) docs_words: zlmdb::Database<OwnedType<BEU64>, ByteSlice>, pub(crate) docs_words: heed::Database<OwnedType<BEU64>, ByteSlice>,
} }
impl DocsWords { impl DocsWords {
pub fn put_doc_words( pub fn put_doc_words(
self, self,
writer: &mut zlmdb::RwTxn, writer: &mut heed::RwTxn<MainT>,
document_id: DocumentId, document_id: DocumentId,
words: &fst::Set, words: &fst::Set,
) -> ZResult<()> { ) -> ZResult<()> {
@ -21,25 +22,25 @@ impl DocsWords {
self.docs_words.put(writer, &document_id, bytes) self.docs_words.put(writer, &document_id, bytes)
} }
pub fn del_doc_words( pub fn del_doc_words(self, writer: &mut heed::RwTxn<MainT>, document_id: DocumentId) -> ZResult<bool> {
self,
writer: &mut zlmdb::RwTxn,
document_id: DocumentId,
) -> ZResult<bool> {
let document_id = BEU64::new(document_id.0); let document_id = BEU64::new(document_id.0);
self.docs_words.delete(writer, &document_id) self.docs_words.delete(writer, &document_id)
} }
pub fn clear(self, writer: &mut heed::RwTxn<MainT>) -> ZResult<()> {
self.docs_words.clear(writer)
}
pub fn doc_words( pub fn doc_words(
self, self,
reader: &zlmdb::RoTxn, reader: &heed::RoTxn<MainT>,
document_id: DocumentId, document_id: DocumentId,
) -> ZResult<Option<fst::Set>> { ) -> ZResult<Option<fst::Set>> {
let document_id = BEU64::new(document_id.0); let document_id = BEU64::new(document_id.0);
match self.docs_words.get(reader, &document_id)? { match self.docs_words.get(reader, &document_id)? {
Some(bytes) => { Some(bytes) => {
let len = bytes.len(); let len = bytes.len();
let bytes = Arc::from(bytes); let bytes = Arc::new(bytes.to_owned());
let fst = fst::raw::Fst::from_shared_bytes(bytes, 0, len).unwrap(); let fst = fst::raw::Fst::from_shared_bytes(bytes, 0, len).unwrap();
Ok(Some(fst::Set::from(fst))) Ok(Some(fst::Set::from(fst)))
} }

View File

@ -1,19 +1,20 @@
use meilidb_schema::SchemaAttr; use heed::types::{ByteSlice, OwnedType};
use zlmdb::types::{ByteSlice, OwnedType}; use crate::database::MainT;
use zlmdb::Result as ZResult; use heed::Result as ZResult;
use meilisearch_schema::SchemaAttr;
use super::DocumentAttrKey; use super::DocumentAttrKey;
use crate::DocumentId; use crate::DocumentId;
#[derive(Copy, Clone)] #[derive(Copy, Clone)]
pub struct DocumentsFields { pub struct DocumentsFields {
pub(crate) documents_fields: zlmdb::Database<OwnedType<DocumentAttrKey>, ByteSlice>, pub(crate) documents_fields: heed::Database<OwnedType<DocumentAttrKey>, ByteSlice>,
} }
impl DocumentsFields { impl DocumentsFields {
pub fn put_document_field( pub fn put_document_field(
self, self,
writer: &mut zlmdb::RwTxn, writer: &mut heed::RwTxn<MainT>,
document_id: DocumentId, document_id: DocumentId,
attribute: SchemaAttr, attribute: SchemaAttr,
value: &[u8], value: &[u8],
@ -24,17 +25,21 @@ impl DocumentsFields {
pub fn del_all_document_fields( pub fn del_all_document_fields(
self, self,
writer: &mut zlmdb::RwTxn, writer: &mut heed::RwTxn<MainT>,
document_id: DocumentId, document_id: DocumentId,
) -> ZResult<usize> { ) -> ZResult<usize> {
let start = DocumentAttrKey::new(document_id, SchemaAttr::min()); let start = DocumentAttrKey::new(document_id, SchemaAttr::min());
let end = DocumentAttrKey::new(document_id, SchemaAttr::max()); let end = DocumentAttrKey::new(document_id, SchemaAttr::max());
self.documents_fields.delete_range(writer, start..=end) self.documents_fields.delete_range(writer, &(start..=end))
}
pub fn clear(self, writer: &mut heed::RwTxn<MainT>) -> ZResult<()> {
self.documents_fields.clear(writer)
} }
pub fn document_attribute<'txn>( pub fn document_attribute<'txn>(
self, self,
reader: &'txn zlmdb::RoTxn, reader: &'txn heed::RoTxn<MainT>,
document_id: DocumentId, document_id: DocumentId,
attribute: SchemaAttr, attribute: SchemaAttr,
) -> ZResult<Option<&'txn [u8]>> { ) -> ZResult<Option<&'txn [u8]>> {
@ -44,18 +49,18 @@ impl DocumentsFields {
pub fn document_fields<'txn>( pub fn document_fields<'txn>(
self, self,
reader: &'txn zlmdb::RoTxn, reader: &'txn heed::RoTxn<MainT>,
document_id: DocumentId, document_id: DocumentId,
) -> ZResult<DocumentFieldsIter<'txn>> { ) -> ZResult<DocumentFieldsIter<'txn>> {
let start = DocumentAttrKey::new(document_id, SchemaAttr::min()); let start = DocumentAttrKey::new(document_id, SchemaAttr::min());
let end = DocumentAttrKey::new(document_id, SchemaAttr::max()); let end = DocumentAttrKey::new(document_id, SchemaAttr::max());
let iter = self.documents_fields.range(reader, start..=end)?; let iter = self.documents_fields.range(reader, &(start..=end))?;
Ok(DocumentFieldsIter { iter }) Ok(DocumentFieldsIter { iter })
} }
} }
pub struct DocumentFieldsIter<'txn> { pub struct DocumentFieldsIter<'txn> {
iter: zlmdb::RoRange<'txn, OwnedType<DocumentAttrKey>, ByteSlice>, iter: heed::RoRange<'txn, OwnedType<DocumentAttrKey>, ByteSlice>,
} }
impl<'txn> Iterator for DocumentFieldsIter<'txn> { impl<'txn> Iterator for DocumentFieldsIter<'txn> {

View File

@ -1,18 +1,19 @@
use super::DocumentAttrKey; use super::DocumentAttrKey;
use crate::database::MainT;
use crate::DocumentId; use crate::DocumentId;
use meilidb_schema::SchemaAttr; use heed::types::OwnedType;
use zlmdb::types::OwnedType; use heed::Result as ZResult;
use zlmdb::Result as ZResult; use meilisearch_schema::SchemaAttr;
#[derive(Copy, Clone)] #[derive(Copy, Clone)]
pub struct DocumentsFieldsCounts { pub struct DocumentsFieldsCounts {
pub(crate) documents_fields_counts: zlmdb::Database<OwnedType<DocumentAttrKey>, OwnedType<u64>>, pub(crate) documents_fields_counts: heed::Database<OwnedType<DocumentAttrKey>, OwnedType<u64>>,
} }
impl DocumentsFieldsCounts { impl DocumentsFieldsCounts {
pub fn put_document_field_count( pub fn put_document_field_count(
self, self,
writer: &mut zlmdb::RwTxn, writer: &mut heed::RwTxn<MainT>,
document_id: DocumentId, document_id: DocumentId,
attribute: SchemaAttr, attribute: SchemaAttr,
value: u64, value: u64,
@ -23,18 +24,22 @@ impl DocumentsFieldsCounts {
pub fn del_all_document_fields_counts( pub fn del_all_document_fields_counts(
self, self,
writer: &mut zlmdb::RwTxn, writer: &mut heed::RwTxn<MainT>,
document_id: DocumentId, document_id: DocumentId,
) -> ZResult<usize> { ) -> ZResult<usize> {
let start = DocumentAttrKey::new(document_id, SchemaAttr::min()); let start = DocumentAttrKey::new(document_id, SchemaAttr::min());
let end = DocumentAttrKey::new(document_id, SchemaAttr::max()); let end = DocumentAttrKey::new(document_id, SchemaAttr::max());
self.documents_fields_counts self.documents_fields_counts
.delete_range(writer, start..=end) .delete_range(writer, &(start..=end))
}
pub fn clear(self, writer: &mut heed::RwTxn<MainT>) -> ZResult<()> {
self.documents_fields_counts.clear(writer)
} }
pub fn document_field_count( pub fn document_field_count(
self, self,
reader: &zlmdb::RoTxn, reader: &heed::RoTxn<MainT>,
document_id: DocumentId, document_id: DocumentId,
attribute: SchemaAttr, attribute: SchemaAttr,
) -> ZResult<Option<u64>> { ) -> ZResult<Option<u64>> {
@ -47,19 +52,16 @@ impl DocumentsFieldsCounts {
pub fn document_fields_counts<'txn>( pub fn document_fields_counts<'txn>(
self, self,
reader: &'txn zlmdb::RoTxn, reader: &'txn heed::RoTxn<MainT>,
document_id: DocumentId, document_id: DocumentId,
) -> ZResult<DocumentFieldsCountsIter<'txn>> { ) -> ZResult<DocumentFieldsCountsIter<'txn>> {
let start = DocumentAttrKey::new(document_id, SchemaAttr::min()); let start = DocumentAttrKey::new(document_id, SchemaAttr::min());
let end = DocumentAttrKey::new(document_id, SchemaAttr::max()); let end = DocumentAttrKey::new(document_id, SchemaAttr::max());
let iter = self.documents_fields_counts.range(reader, start..=end)?; let iter = self.documents_fields_counts.range(reader, &(start..=end))?;
Ok(DocumentFieldsCountsIter { iter }) Ok(DocumentFieldsCountsIter { iter })
} }
pub fn documents_ids<'txn>( pub fn documents_ids<'txn>(self, reader: &'txn heed::RoTxn<MainT>) -> ZResult<DocumentsIdsIter<'txn>> {
self,
reader: &'txn zlmdb::RoTxn,
) -> ZResult<DocumentsIdsIter<'txn>> {
let iter = self.documents_fields_counts.iter(reader)?; let iter = self.documents_fields_counts.iter(reader)?;
Ok(DocumentsIdsIter { Ok(DocumentsIdsIter {
last_seen_id: None, last_seen_id: None,
@ -69,7 +71,7 @@ impl DocumentsFieldsCounts {
pub fn all_documents_fields_counts<'txn>( pub fn all_documents_fields_counts<'txn>(
self, self,
reader: &'txn zlmdb::RoTxn, reader: &'txn heed::RoTxn<MainT>,
) -> ZResult<AllDocumentsFieldsCountsIter<'txn>> { ) -> ZResult<AllDocumentsFieldsCountsIter<'txn>> {
let iter = self.documents_fields_counts.iter(reader)?; let iter = self.documents_fields_counts.iter(reader)?;
Ok(AllDocumentsFieldsCountsIter { iter }) Ok(AllDocumentsFieldsCountsIter { iter })
@ -77,7 +79,7 @@ impl DocumentsFieldsCounts {
} }
pub struct DocumentFieldsCountsIter<'txn> { pub struct DocumentFieldsCountsIter<'txn> {
iter: zlmdb::RoRange<'txn, OwnedType<DocumentAttrKey>, OwnedType<u64>>, iter: heed::RoRange<'txn, OwnedType<DocumentAttrKey>, OwnedType<u64>>,
} }
impl Iterator for DocumentFieldsCountsIter<'_> { impl Iterator for DocumentFieldsCountsIter<'_> {
@ -97,7 +99,7 @@ impl Iterator for DocumentFieldsCountsIter<'_> {
pub struct DocumentsIdsIter<'txn> { pub struct DocumentsIdsIter<'txn> {
last_seen_id: Option<DocumentId>, last_seen_id: Option<DocumentId>,
iter: zlmdb::RoIter<'txn, OwnedType<DocumentAttrKey>, OwnedType<u64>>, iter: heed::RoIter<'txn, OwnedType<DocumentAttrKey>, OwnedType<u64>>,
} }
impl Iterator for DocumentsIdsIter<'_> { impl Iterator for DocumentsIdsIter<'_> {
@ -121,10 +123,10 @@ impl Iterator for DocumentsIdsIter<'_> {
} }
pub struct AllDocumentsFieldsCountsIter<'txn> { pub struct AllDocumentsFieldsCountsIter<'txn> {
iter: zlmdb::RoIter<'txn, OwnedType<DocumentAttrKey>, OwnedType<u64>>, iter: heed::RoIter<'txn, OwnedType<DocumentAttrKey>, OwnedType<u64>>,
} }
impl<'r> Iterator for AllDocumentsFieldsCountsIter<'r> { impl Iterator for AllDocumentsFieldsCountsIter<'_> {
type Item = ZResult<(DocumentId, SchemaAttr, u64)>; type Item = ZResult<(DocumentId, SchemaAttr, u64)>;
fn next(&mut self) -> Option<Self::Item> { fn next(&mut self) -> Option<Self::Item> {

View File

@ -0,0 +1,184 @@
use crate::database::MainT;
use crate::RankedMap;
use chrono::{DateTime, Utc};
use heed::types::{ByteSlice, OwnedType, SerdeBincode, Str};
use heed::Result as ZResult;
use meilisearch_schema::Schema;
use std::collections::HashMap;
use std::sync::Arc;
const CREATED_AT_KEY: &str = "created-at";
const CUSTOMS_KEY: &str = "customs-key";
const FIELDS_FREQUENCY_KEY: &str = "fields-frequency";
const NAME_KEY: &str = "name";
const NUMBER_OF_DOCUMENTS_KEY: &str = "number-of-documents";
const RANKED_MAP_KEY: &str = "ranked-map";
const SCHEMA_KEY: &str = "schema";
const STOP_WORDS_KEY: &str = "stop-words";
const SYNONYMS_KEY: &str = "synonyms";
const UPDATED_AT_KEY: &str = "updated-at";
const WORDS_KEY: &str = "words";
pub type FreqsMap = HashMap<String, usize>;
type SerdeFreqsMap = SerdeBincode<FreqsMap>;
type SerdeDatetime = SerdeBincode<DateTime<Utc>>;
#[derive(Copy, Clone)]
pub struct Main {
pub(crate) main: heed::PolyDatabase,
}
impl Main {
pub fn clear(self, writer: &mut heed::RwTxn<MainT>) -> ZResult<()> {
self.main.clear(writer)
}
pub fn put_name(self, writer: &mut heed::RwTxn<MainT>, name: &str) -> ZResult<()> {
self.main.put::<_, Str, Str>(writer, NAME_KEY, name)
}
pub fn name(self, reader: &heed::RoTxn<MainT>) -> ZResult<Option<String>> {
Ok(self
.main
.get::<_, Str, Str>(reader, NAME_KEY)?
.map(|name| name.to_owned()))
}
pub fn put_created_at(self, writer: &mut heed::RwTxn<MainT>) -> ZResult<()> {
self.main
.put::<_, Str, SerdeDatetime>(writer, CREATED_AT_KEY, &Utc::now())
}
pub fn created_at(self, reader: &heed::RoTxn<MainT>) -> ZResult<Option<DateTime<Utc>>> {
self.main.get::<_, Str, SerdeDatetime>(reader, CREATED_AT_KEY)
}
pub fn put_updated_at(self, writer: &mut heed::RwTxn<MainT>) -> ZResult<()> {
self.main
.put::<_, Str, SerdeDatetime>(writer, UPDATED_AT_KEY, &Utc::now())
}
pub fn updated_at(self, reader: &heed::RoTxn<MainT>) -> ZResult<Option<DateTime<Utc>>> {
self.main.get::<_, Str, SerdeDatetime>(reader, UPDATED_AT_KEY)
}
pub fn put_words_fst(self, writer: &mut heed::RwTxn<MainT>, fst: &fst::Set) -> ZResult<()> {
let bytes = fst.as_fst().as_bytes();
self.main.put::<_, Str, ByteSlice>(writer, WORDS_KEY, bytes)
}
pub fn words_fst(self, reader: &heed::RoTxn<MainT>) -> ZResult<Option<fst::Set>> {
match self.main.get::<_, Str, ByteSlice>(reader, WORDS_KEY)? {
Some(bytes) => {
let len = bytes.len();
let bytes = Arc::new(bytes.to_owned());
let fst = fst::raw::Fst::from_shared_bytes(bytes, 0, len).unwrap();
Ok(Some(fst::Set::from(fst)))
}
None => Ok(None),
}
}
pub fn put_schema(self, writer: &mut heed::RwTxn<MainT>, schema: &Schema) -> ZResult<()> {
self.main
.put::<_, Str, SerdeBincode<Schema>>(writer, SCHEMA_KEY, schema)
}
pub fn schema(self, reader: &heed::RoTxn<MainT>) -> ZResult<Option<Schema>> {
self.main
.get::<_, Str, SerdeBincode<Schema>>(reader, SCHEMA_KEY)
}
pub fn put_ranked_map(self, writer: &mut heed::RwTxn<MainT>, ranked_map: &RankedMap) -> ZResult<()> {
self.main
.put::<_, Str, SerdeBincode<RankedMap>>(writer, RANKED_MAP_KEY, &ranked_map)
}
pub fn ranked_map(self, reader: &heed::RoTxn<MainT>) -> ZResult<Option<RankedMap>> {
self.main
.get::<_, Str, SerdeBincode<RankedMap>>(reader, RANKED_MAP_KEY)
}
pub fn put_synonyms_fst(self, writer: &mut heed::RwTxn<MainT>, fst: &fst::Set) -> ZResult<()> {
let bytes = fst.as_fst().as_bytes();
self.main.put::<_, Str, ByteSlice>(writer, SYNONYMS_KEY, bytes)
}
pub fn synonyms_fst(self, reader: &heed::RoTxn<MainT>) -> ZResult<Option<fst::Set>> {
match self.main.get::<_, Str, ByteSlice>(reader, SYNONYMS_KEY)? {
Some(bytes) => {
let len = bytes.len();
let bytes = Arc::new(bytes.to_owned());
let fst = fst::raw::Fst::from_shared_bytes(bytes, 0, len).unwrap();
Ok(Some(fst::Set::from(fst)))
}
None => Ok(None),
}
}
pub fn put_stop_words_fst(self, writer: &mut heed::RwTxn<MainT>, fst: &fst::Set) -> ZResult<()> {
let bytes = fst.as_fst().as_bytes();
self.main
.put::<_, Str, ByteSlice>(writer, STOP_WORDS_KEY, bytes)
}
pub fn stop_words_fst(self, reader: &heed::RoTxn<MainT>) -> ZResult<Option<fst::Set>> {
match self.main.get::<_, Str, ByteSlice>(reader, STOP_WORDS_KEY)? {
Some(bytes) => {
let len = bytes.len();
let bytes = Arc::new(bytes.to_owned());
let fst = fst::raw::Fst::from_shared_bytes(bytes, 0, len).unwrap();
Ok(Some(fst::Set::from(fst)))
}
None => Ok(None),
}
}
pub fn put_number_of_documents<F>(self, writer: &mut heed::RwTxn<MainT>, f: F) -> ZResult<u64>
where
F: Fn(u64) -> u64,
{
let new = self.number_of_documents(&*writer).map(f)?;
self.main
.put::<_, Str, OwnedType<u64>>(writer, NUMBER_OF_DOCUMENTS_KEY, &new)?;
Ok(new)
}
pub fn number_of_documents(self, reader: &heed::RoTxn<MainT>) -> ZResult<u64> {
match self
.main
.get::<_, Str, OwnedType<u64>>(reader, NUMBER_OF_DOCUMENTS_KEY)?
{
Some(value) => Ok(value),
None => Ok(0),
}
}
pub fn put_fields_frequency(
self,
writer: &mut heed::RwTxn<MainT>,
fields_frequency: &FreqsMap,
) -> ZResult<()> {
self.main
.put::<_, Str, SerdeFreqsMap>(writer, FIELDS_FREQUENCY_KEY, fields_frequency)
}
pub fn fields_frequency(&self, reader: &heed::RoTxn<MainT>) -> ZResult<Option<FreqsMap>> {
match self
.main
.get::<_, Str, SerdeFreqsMap>(reader, FIELDS_FREQUENCY_KEY)?
{
Some(freqs) => Ok(Some(freqs)),
None => Ok(None),
}
}
pub fn put_customs(self, writer: &mut heed::RwTxn<MainT>, customs: &[u8]) -> ZResult<()> {
self.main
.put::<_, Str, ByteSlice>(writer, CUSTOMS_KEY, customs)
}
pub fn customs<'txn>(self, reader: &'txn heed::RoTxn<MainT>) -> ZResult<Option<&'txn [u8]>> {
self.main.get::<_, Str, ByteSlice>(reader, CUSTOMS_KEY)
}
}

View File

@ -20,12 +20,14 @@ pub use self::updates_results::UpdatesResults;
use std::collections::HashSet; use std::collections::HashSet;
use meilidb_schema::{Schema, SchemaAttr}; use heed::Result as ZResult;
use serde::de; use meilisearch_schema::{Schema, SchemaAttr};
use serde::de::{self, Deserialize};
use zerocopy::{AsBytes, FromBytes}; use zerocopy::{AsBytes, FromBytes};
use zlmdb::Result as ZResult;
use crate::criterion::Criteria; use crate::criterion::Criteria;
use crate::database::{UpdateEvent, UpdateEventsEmitter};
use crate::database::{MainT, UpdateT};
use crate::serde::Deserializer; use crate::serde::Deserializer;
use crate::{query_builder::QueryBuilder, update, DocumentId, Error, MResult}; use crate::{query_builder::QueryBuilder, update, DocumentId, Error, MResult};
@ -91,13 +93,13 @@ pub struct Index {
pub updates: Updates, pub updates: Updates,
pub updates_results: UpdatesResults, pub updates_results: UpdatesResults,
updates_notifier: crossbeam_channel::Sender<()>, pub(crate) updates_notifier: UpdateEventsEmitter,
} }
impl Index { impl Index {
pub fn document<T: de::DeserializeOwned>( pub fn document<T: de::DeserializeOwned>(
&self, &self,
reader: &zlmdb::RoTxn, reader: &heed::RoTxn<MainT>,
attributes: Option<&HashSet<&str>>, attributes: Option<&HashSet<&str>>,
document_id: DocumentId, document_id: DocumentId,
) -> MResult<Option<T>> { ) -> MResult<Option<T>> {
@ -120,14 +122,12 @@ impl Index {
attributes: attributes.as_ref(), attributes: attributes.as_ref(),
}; };
// TODO: currently we return an error if all document fields are missing, Ok(Option::<T>::deserialize(&mut deserializer)?)
// returning None would have been better
Ok(T::deserialize(&mut deserializer).map(Some)?)
} }
pub fn document_attribute<T: de::DeserializeOwned>( pub fn document_attribute<T: de::DeserializeOwned>(
&self, &self,
reader: &zlmdb::RoTxn, reader: &heed::RoTxn<MainT>,
document_id: DocumentId, document_id: DocumentId,
attribute: SchemaAttr, attribute: SchemaAttr,
) -> MResult<Option<T>> { ) -> MResult<Option<T>> {
@ -140,13 +140,13 @@ impl Index {
} }
} }
pub fn schema_update(&self, writer: &mut zlmdb::RwTxn, schema: Schema) -> MResult<u64> { pub fn schema_update(&self, writer: &mut heed::RwTxn<UpdateT>, schema: Schema) -> MResult<u64> {
let _ = self.updates_notifier.send(()); let _ = self.updates_notifier.send(UpdateEvent::NewUpdate);
update::push_schema_update(writer, self.updates, self.updates_results, schema) update::push_schema_update(writer, self.updates, self.updates_results, schema)
} }
pub fn customs_update(&self, writer: &mut zlmdb::RwTxn, customs: Vec<u8>) -> ZResult<u64> { pub fn customs_update(&self, writer: &mut heed::RwTxn<UpdateT>, customs: Vec<u8>) -> ZResult<u64> {
let _ = self.updates_notifier.send(()); let _ = self.updates_notifier.send(UpdateEvent::NewUpdate);
update::push_customs_update(writer, self.updates, self.updates_results, customs) update::push_customs_update(writer, self.updates, self.updates_results, customs)
} }
@ -158,6 +158,14 @@ impl Index {
) )
} }
pub fn documents_partial_addition<D>(&self) -> update::DocumentsAddition<D> {
update::DocumentsAddition::new_partial(
self.updates,
self.updates_results,
self.updates_notifier.clone(),
)
}
pub fn documents_deletion(&self) -> update::DocumentsDeletion { pub fn documents_deletion(&self) -> update::DocumentsDeletion {
update::DocumentsDeletion::new( update::DocumentsDeletion::new(
self.updates, self.updates,
@ -166,6 +174,11 @@ impl Index {
) )
} }
pub fn clear_all(&self, writer: &mut heed::RwTxn<UpdateT>) -> MResult<u64> {
let _ = self.updates_notifier.send(UpdateEvent::NewUpdate);
update::push_clear_all(writer, self.updates, self.updates_results)
}
pub fn synonyms_addition(&self) -> update::SynonymsAddition { pub fn synonyms_addition(&self) -> update::SynonymsAddition {
update::SynonymsAddition::new( update::SynonymsAddition::new(
self.updates, self.updates,
@ -182,8 +195,24 @@ impl Index {
) )
} }
pub fn current_update_id(&self, reader: &zlmdb::RoTxn) -> MResult<Option<u64>> { pub fn stop_words_addition(&self) -> update::StopWordsAddition {
match self.updates.last_update_id(reader)? { update::StopWordsAddition::new(
self.updates,
self.updates_results,
self.updates_notifier.clone(),
)
}
pub fn stop_words_deletion(&self) -> update::StopWordsDeletion {
update::StopWordsDeletion::new(
self.updates,
self.updates_results,
self.updates_notifier.clone(),
)
}
pub fn current_update_id(&self, reader: &heed::RoTxn<UpdateT>) -> MResult<Option<u64>> {
match self.updates.last_update(reader)? {
Some((id, _)) => Ok(Some(id)), Some((id, _)) => Ok(Some(id)),
None => Ok(None), None => Ok(None),
} }
@ -191,12 +220,40 @@ impl Index {
pub fn update_status( pub fn update_status(
&self, &self,
reader: &zlmdb::RoTxn, reader: &heed::RoTxn<UpdateT>,
update_id: u64, update_id: u64,
) -> MResult<update::UpdateStatus> { ) -> MResult<Option<update::UpdateStatus>> {
update::update_status(reader, self.updates, self.updates_results, update_id) update::update_status(reader, self.updates, self.updates_results, update_id)
} }
pub fn all_updates_status(&self, reader: &heed::RoTxn<UpdateT>) -> MResult<Vec<update::UpdateStatus>> {
let mut updates = Vec::new();
let mut last_update_result_id = 0;
// retrieve all updates results
if let Some((last_id, _)) = self.updates_results.last_update(reader)? {
updates.reserve(last_id as usize);
for id in 0..=last_id {
if let Some(update) = self.update_status(reader, id)? {
updates.push(update);
last_update_result_id = id;
}
}
}
// retrieve all enqueued updates
if let Some((last_id, _)) = self.updates.last_update(reader)? {
for id in last_update_result_id + 1..=last_id {
if let Some(update) = self.update_status(reader, id)? {
updates.push(update);
}
}
}
Ok(updates)
}
pub fn query_builder(&self) -> QueryBuilder { pub fn query_builder(&self) -> QueryBuilder {
QueryBuilder::new( QueryBuilder::new(
self.main, self.main,
@ -221,9 +278,10 @@ impl Index {
} }
pub fn create( pub fn create(
env: &zlmdb::Env, env: &heed::Env,
update_env: &heed::Env,
name: &str, name: &str,
updates_notifier: crossbeam_channel::Sender<()>, updates_notifier: UpdateEventsEmitter,
) -> MResult<Index> { ) -> MResult<Index> {
// create all the store names // create all the store names
let main_name = main_name(name); let main_name = main_name(name);
@ -236,14 +294,14 @@ pub fn create(
let updates_results_name = updates_results_name(name); let updates_results_name = updates_results_name(name);
// open all the stores // open all the stores
let main = env.create_dyn_database(Some(&main_name))?; let main = env.create_poly_database(Some(&main_name))?;
let postings_lists = env.create_database(Some(&postings_lists_name))?; let postings_lists = env.create_database(Some(&postings_lists_name))?;
let documents_fields = env.create_database(Some(&documents_fields_name))?; let documents_fields = env.create_database(Some(&documents_fields_name))?;
let documents_fields_counts = env.create_database(Some(&documents_fields_counts_name))?; let documents_fields_counts = env.create_database(Some(&documents_fields_counts_name))?;
let synonyms = env.create_database(Some(&synonyms_name))?; let synonyms = env.create_database(Some(&synonyms_name))?;
let docs_words = env.create_database(Some(&docs_words_name))?; let docs_words = env.create_database(Some(&docs_words_name))?;
let updates = env.create_database(Some(&updates_name))?; let updates = update_env.create_database(Some(&updates_name))?;
let updates_results = env.create_database(Some(&updates_results_name))?; let updates_results = update_env.create_database(Some(&updates_results_name))?;
Ok(Index { Ok(Index {
main: Main { main }, main: Main { main },
@ -261,9 +319,10 @@ pub fn create(
} }
pub fn open( pub fn open(
env: &zlmdb::Env, env: &heed::Env,
update_env: &heed::Env,
name: &str, name: &str,
updates_notifier: crossbeam_channel::Sender<()>, updates_notifier: UpdateEventsEmitter,
) -> MResult<Option<Index>> { ) -> MResult<Option<Index>> {
// create all the store names // create all the store names
let main_name = main_name(name); let main_name = main_name(name);
@ -276,7 +335,7 @@ pub fn open(
let updates_results_name = updates_results_name(name); let updates_results_name = updates_results_name(name);
// open all the stores // open all the stores
let main = match env.open_dyn_database(Some(&main_name))? { let main = match env.open_poly_database(Some(&main_name))? {
Some(main) => main, Some(main) => main,
None => return Ok(None), None => return Ok(None),
}; };
@ -300,11 +359,11 @@ pub fn open(
Some(docs_words) => docs_words, Some(docs_words) => docs_words,
None => return Ok(None), None => return Ok(None),
}; };
let updates = match env.open_database(Some(&updates_name))? { let updates = match update_env.open_database(Some(&updates_name))? {
Some(updates) => updates, Some(updates) => updates,
None => return Ok(None), None => return Ok(None),
}; };
let updates_results = match env.open_database(Some(&updates_results_name))? { let updates_results = match update_env.open_database(Some(&updates_results_name))? {
Some(updates_results) => updates_results, Some(updates_results) => updates_results,
None => return Ok(None), None => return Ok(None),
}; };
@ -323,3 +382,20 @@ pub fn open(
updates_notifier, updates_notifier,
})) }))
} }
pub fn clear(
writer: &mut heed::RwTxn<MainT>,
update_writer: &mut heed::RwTxn<UpdateT>,
index: &Index,
) -> MResult<()> {
// clear all the stores
index.main.clear(writer)?;
index.postings_lists.clear(writer)?;
index.documents_fields.clear(writer)?;
index.documents_fields_counts.clear(writer)?;
index.synonyms.clear(writer)?;
index.docs_words.clear(writer)?;
index.updates.clear(update_writer)?;
index.updates_results.clear(update_writer)?;
Ok(())
}

View File

@ -1,31 +1,36 @@
use crate::DocIndex; use crate::DocIndex;
use crate::database::MainT;
use heed::types::{ByteSlice, CowSlice};
use heed::Result as ZResult;
use sdset::{Set, SetBuf}; use sdset::{Set, SetBuf};
use std::borrow::Cow; use std::borrow::Cow;
use zlmdb::types::{ByteSlice, CowSlice};
use zlmdb::Result as ZResult;
#[derive(Copy, Clone)] #[derive(Copy, Clone)]
pub struct PostingsLists { pub struct PostingsLists {
pub(crate) postings_lists: zlmdb::Database<ByteSlice, CowSlice<DocIndex>>, pub(crate) postings_lists: heed::Database<ByteSlice, CowSlice<DocIndex>>,
} }
impl PostingsLists { impl PostingsLists {
pub fn put_postings_list( pub fn put_postings_list(
self, self,
writer: &mut zlmdb::RwTxn, writer: &mut heed::RwTxn<MainT>,
word: &[u8], word: &[u8],
words_indexes: &Set<DocIndex>, words_indexes: &Set<DocIndex>,
) -> ZResult<()> { ) -> ZResult<()> {
self.postings_lists.put(writer, word, words_indexes) self.postings_lists.put(writer, word, words_indexes)
} }
pub fn del_postings_list(self, writer: &mut zlmdb::RwTxn, word: &[u8]) -> ZResult<bool> { pub fn del_postings_list(self, writer: &mut heed::RwTxn<MainT>, word: &[u8]) -> ZResult<bool> {
self.postings_lists.delete(writer, word) self.postings_lists.delete(writer, word)
} }
pub fn clear(self, writer: &mut heed::RwTxn<MainT>) -> ZResult<()> {
self.postings_lists.clear(writer)
}
pub fn postings_list<'txn>( pub fn postings_list<'txn>(
self, self,
reader: &'txn zlmdb::RoTxn, reader: &'txn heed::RoTxn<MainT>,
word: &[u8], word: &[u8],
) -> ZResult<Option<Cow<'txn, Set<DocIndex>>>> { ) -> ZResult<Option<Cow<'txn, Set<DocIndex>>>> {
match self.postings_lists.get(reader, word)? { match self.postings_lists.get(reader, word)? {

View File

@ -1,16 +1,17 @@
use heed::types::ByteSlice;
use crate::database::MainT;
use heed::Result as ZResult;
use std::sync::Arc; use std::sync::Arc;
use zlmdb::types::ByteSlice;
use zlmdb::Result as ZResult;
#[derive(Copy, Clone)] #[derive(Copy, Clone)]
pub struct Synonyms { pub struct Synonyms {
pub(crate) synonyms: zlmdb::Database<ByteSlice, ByteSlice>, pub(crate) synonyms: heed::Database<ByteSlice, ByteSlice>,
} }
impl Synonyms { impl Synonyms {
pub fn put_synonyms( pub fn put_synonyms(
self, self,
writer: &mut zlmdb::RwTxn, writer: &mut heed::RwTxn<MainT>,
word: &[u8], word: &[u8],
synonyms: &fst::Set, synonyms: &fst::Set,
) -> ZResult<()> { ) -> ZResult<()> {
@ -18,15 +19,19 @@ impl Synonyms {
self.synonyms.put(writer, word, bytes) self.synonyms.put(writer, word, bytes)
} }
pub fn del_synonyms(self, writer: &mut zlmdb::RwTxn, word: &[u8]) -> ZResult<bool> { pub fn del_synonyms(self, writer: &mut heed::RwTxn<MainT>, word: &[u8]) -> ZResult<bool> {
self.synonyms.delete(writer, word) self.synonyms.delete(writer, word)
} }
pub fn synonyms(self, reader: &zlmdb::RoTxn, word: &[u8]) -> ZResult<Option<fst::Set>> { pub fn clear(self, writer: &mut heed::RwTxn<MainT>) -> ZResult<()> {
self.synonyms.clear(writer)
}
pub fn synonyms(self, reader: &heed::RoTxn<MainT>, word: &[u8]) -> ZResult<Option<fst::Set>> {
match self.synonyms.get(reader, word)? { match self.synonyms.get(reader, word)? {
Some(bytes) => { Some(bytes) => {
let len = bytes.len(); let len = bytes.len();
let bytes = Arc::from(bytes); let bytes = Arc::new(bytes.to_owned());
let fst = fst::raw::Fst::from_shared_bytes(bytes, 0, len).unwrap(); let fst = fst::raw::Fst::from_shared_bytes(bytes, 0, len).unwrap();
Ok(Some(fst::Set::from(fst))) Ok(Some(fst::Set::from(fst)))
} }

View File

@ -0,0 +1,65 @@
use super::BEU64;
use crate::database::UpdateT;
use crate::update::Update;
use heed::types::{OwnedType, SerdeJson};
use heed::Result as ZResult;
#[derive(Copy, Clone)]
pub struct Updates {
pub(crate) updates: heed::Database<OwnedType<BEU64>, SerdeJson<Update>>,
}
impl Updates {
// TODO do not trigger deserialize if possible
pub fn last_update(self, reader: &heed::RoTxn<UpdateT>) -> ZResult<Option<(u64, Update)>> {
match self.updates.last(reader)? {
Some((key, data)) => Ok(Some((key.get(), data))),
None => Ok(None),
}
}
// TODO do not trigger deserialize if possible
pub fn first_update(self, reader: &heed::RoTxn<UpdateT>) -> ZResult<Option<(u64, Update)>> {
match self.updates.first(reader)? {
Some((key, data)) => Ok(Some((key.get(), data))),
None => Ok(None),
}
}
// TODO do not trigger deserialize if possible
pub fn get(self, reader: &heed::RoTxn<UpdateT>, update_id: u64) -> ZResult<Option<Update>> {
let update_id = BEU64::new(update_id);
self.updates.get(reader, &update_id)
}
pub fn put_update(
self,
writer: &mut heed::RwTxn<UpdateT>,
update_id: u64,
update: &Update,
) -> ZResult<()> {
// TODO prefer using serde_json?
let update_id = BEU64::new(update_id);
self.updates.put(writer, &update_id, update)
}
pub fn del_update(self, writer: &mut heed::RwTxn<UpdateT>, update_id: u64) -> ZResult<bool> {
let update_id = BEU64::new(update_id);
self.updates.delete(writer, &update_id)
}
pub fn pop_front(self, writer: &mut heed::RwTxn<UpdateT>) -> ZResult<Option<(u64, Update)>> {
match self.first_update(writer)? {
Some((update_id, update)) => {
let key = BEU64::new(update_id);
self.updates.delete(writer, &key)?;
Ok(Some((update_id, update)))
}
None => Ok(None),
}
}
pub fn clear(self, writer: &mut heed::RwTxn<UpdateT>) -> ZResult<()> {
self.updates.clear(writer)
}
}

View File

@ -0,0 +1,45 @@
use super::BEU64;
use crate::database::UpdateT;
use crate::update::ProcessedUpdateResult;
use heed::types::{OwnedType, SerdeJson};
use heed::Result as ZResult;
#[derive(Copy, Clone)]
pub struct UpdatesResults {
pub(crate) updates_results: heed::Database<OwnedType<BEU64>, SerdeJson<ProcessedUpdateResult>>,
}
impl UpdatesResults {
pub fn last_update(
self,
reader: &heed::RoTxn<UpdateT>,
) -> ZResult<Option<(u64, ProcessedUpdateResult)>> {
match self.updates_results.last(reader)? {
Some((key, data)) => Ok(Some((key.get(), data))),
None => Ok(None),
}
}
pub fn put_update_result(
self,
writer: &mut heed::RwTxn<UpdateT>,
update_id: u64,
update_result: &ProcessedUpdateResult,
) -> ZResult<()> {
let update_id = BEU64::new(update_id);
self.updates_results.put(writer, &update_id, update_result)
}
pub fn update_result(
self,
reader: &heed::RoTxn<UpdateT>,
update_id: u64,
) -> ZResult<Option<ProcessedUpdateResult>> {
let update_id = BEU64::new(update_id);
self.updates_results.get(reader, &update_id)
}
pub fn clear(self, writer: &mut heed::RwTxn<UpdateT>) -> ZResult<()> {
self.updates_results.clear(writer)
}
}

View File

@ -0,0 +1,34 @@
use crate::database::{MainT, UpdateT};
use crate::update::{next_update_id, Update};
use crate::{store, MResult, RankedMap};
pub fn apply_clear_all(
writer: &mut heed::RwTxn<MainT>,
main_store: store::Main,
documents_fields_store: store::DocumentsFields,
documents_fields_counts_store: store::DocumentsFieldsCounts,
postings_lists_store: store::PostingsLists,
docs_words_store: store::DocsWords,
) -> MResult<()> {
main_store.put_words_fst(writer, &fst::Set::default())?;
main_store.put_ranked_map(writer, &RankedMap::default())?;
main_store.put_number_of_documents(writer, |_| 0)?;
documents_fields_store.clear(writer)?;
documents_fields_counts_store.clear(writer)?;
postings_lists_store.clear(writer)?;
docs_words_store.clear(writer)?;
Ok(())
}
pub fn push_clear_all(
writer: &mut heed::RwTxn<UpdateT>,
updates_store: store::Updates,
updates_results_store: store::UpdatesResults,
) -> MResult<u64> {
let last_update_id = next_update_id(writer, updates_store, updates_results_store)?;
let update = Update::clear_all();
updates_store.put_update(writer, last_update_id, &update)?;
Ok(last_update_id)
}

View File

@ -1,9 +1,11 @@
use heed::Result as ZResult;
use crate::database::{MainT, UpdateT};
use crate::store; use crate::store;
use crate::update::{next_update_id, Update}; use crate::update::{next_update_id, Update};
use zlmdb::Result as ZResult;
pub fn apply_customs_update( pub fn apply_customs_update(
writer: &mut zlmdb::RwTxn, writer: &mut heed::RwTxn<MainT>,
main_store: store::Main, main_store: store::Main,
customs: &[u8], customs: &[u8],
) -> ZResult<()> { ) -> ZResult<()> {
@ -11,14 +13,14 @@ pub fn apply_customs_update(
} }
pub fn push_customs_update( pub fn push_customs_update(
writer: &mut zlmdb::RwTxn, writer: &mut heed::RwTxn<UpdateT>,
updates_store: store::Updates, updates_store: store::Updates,
updates_results_store: store::UpdatesResults, updates_results_store: store::UpdatesResults,
customs: Vec<u8>, customs: Vec<u8>,
) -> ZResult<u64> { ) -> ZResult<u64> {
let last_update_id = next_update_id(writer, updates_store, updates_results_store)?; let last_update_id = next_update_id(writer, updates_store, updates_results_store)?;
let update = Update::Customs(customs); let update = Update::customs(customs);
updates_store.put_update(writer, last_update_id, &update)?; updates_store.put_update(writer, last_update_id, &update)?;
Ok(last_update_id) Ok(last_update_id)

View File

@ -0,0 +1,411 @@
use std::collections::HashMap;
use fst::{set::OpBuilder, SetBuilder};
use sdset::{duo::Union, SetOperation};
use serde::{Deserialize, Serialize};
use crate::database::{MainT, UpdateT};
use crate::database::{UpdateEvent, UpdateEventsEmitter};
use crate::raw_indexer::RawIndexer;
use crate::serde::{extract_document_id, serialize_value, Deserializer, Serializer};
use crate::store;
use crate::update::{apply_documents_deletion, next_update_id, Update};
use crate::{Error, MResult, RankedMap};
pub struct DocumentsAddition<D> {
updates_store: store::Updates,
updates_results_store: store::UpdatesResults,
updates_notifier: UpdateEventsEmitter,
documents: Vec<D>,
is_partial: bool,
}
impl<D> DocumentsAddition<D> {
pub fn new(
updates_store: store::Updates,
updates_results_store: store::UpdatesResults,
updates_notifier: UpdateEventsEmitter,
) -> DocumentsAddition<D> {
DocumentsAddition {
updates_store,
updates_results_store,
updates_notifier,
documents: Vec::new(),
is_partial: false,
}
}
pub fn new_partial(
updates_store: store::Updates,
updates_results_store: store::UpdatesResults,
updates_notifier: UpdateEventsEmitter,
) -> DocumentsAddition<D> {
DocumentsAddition {
updates_store,
updates_results_store,
updates_notifier,
documents: Vec::new(),
is_partial: true,
}
}
pub fn update_document(&mut self, document: D) {
self.documents.push(document);
}
pub fn finalize(self, writer: &mut heed::RwTxn<UpdateT>) -> MResult<u64>
where
D: serde::Serialize,
{
let _ = self.updates_notifier.send(UpdateEvent::NewUpdate);
let update_id = push_documents_addition(
writer,
self.updates_store,
self.updates_results_store,
self.documents,
self.is_partial,
)?;
Ok(update_id)
}
}
impl<D> Extend<D> for DocumentsAddition<D> {
fn extend<T: IntoIterator<Item = D>>(&mut self, iter: T) {
self.documents.extend(iter)
}
}
pub fn push_documents_addition<D: serde::Serialize>(
writer: &mut heed::RwTxn<UpdateT>,
updates_store: store::Updates,
updates_results_store: store::UpdatesResults,
addition: Vec<D>,
is_partial: bool,
) -> MResult<u64> {
let mut values = Vec::with_capacity(addition.len());
for add in addition {
let vec = serde_json::to_vec(&add)?;
let add = serde_json::from_slice(&vec)?;
values.push(add);
}
let last_update_id = next_update_id(writer, updates_store, updates_results_store)?;
let update = if is_partial {
Update::documents_partial(values)
} else {
Update::documents_addition(values)
};
updates_store.put_update(writer, last_update_id, &update)?;
Ok(last_update_id)
}
pub fn apply_documents_addition<'a, 'b>(
writer: &'a mut heed::RwTxn<'b, MainT>,
main_store: store::Main,
documents_fields_store: store::DocumentsFields,
documents_fields_counts_store: store::DocumentsFieldsCounts,
postings_lists_store: store::PostingsLists,
docs_words_store: store::DocsWords,
addition: Vec<HashMap<String, serde_json::Value>>,
) -> MResult<()> {
let mut documents_additions = HashMap::new();
let schema = match main_store.schema(writer)? {
Some(schema) => schema,
None => return Err(Error::SchemaMissing),
};
let identifier = schema.identifier_name();
// 1. store documents ids for future deletion
for document in addition {
let document_id = match extract_document_id(identifier, &document)? {
Some(id) => id,
None => return Err(Error::MissingDocumentId),
};
documents_additions.insert(document_id, document);
}
// 2. remove the documents posting lists
let number_of_inserted_documents = documents_additions.len();
let documents_ids = documents_additions.iter().map(|(id, _)| *id).collect();
apply_documents_deletion(
writer,
main_store,
documents_fields_store,
documents_fields_counts_store,
postings_lists_store,
docs_words_store,
documents_ids,
)?;
let mut ranked_map = match main_store.ranked_map(writer)? {
Some(ranked_map) => ranked_map,
None => RankedMap::default(),
};
let stop_words = match main_store.stop_words_fst(writer)? {
Some(stop_words) => stop_words,
None => fst::Set::default(),
};
// 3. index the documents fields in the stores
let mut indexer = RawIndexer::new(stop_words);
for (document_id, document) in documents_additions {
let serializer = Serializer {
txn: writer,
schema: &schema,
document_store: documents_fields_store,
document_fields_counts: documents_fields_counts_store,
indexer: &mut indexer,
ranked_map: &mut ranked_map,
document_id,
};
document.serialize(serializer)?;
}
write_documents_addition_index(
writer,
main_store,
postings_lists_store,
docs_words_store,
&ranked_map,
number_of_inserted_documents,
indexer,
)
}
pub fn apply_documents_partial_addition<'a, 'b>(
writer: &'a mut heed::RwTxn<'b, MainT>,
main_store: store::Main,
documents_fields_store: store::DocumentsFields,
documents_fields_counts_store: store::DocumentsFieldsCounts,
postings_lists_store: store::PostingsLists,
docs_words_store: store::DocsWords,
addition: Vec<HashMap<String, serde_json::Value>>,
) -> MResult<()> {
let mut documents_additions = HashMap::new();
let schema = match main_store.schema(writer)? {
Some(schema) => schema,
None => return Err(Error::SchemaMissing),
};
let identifier = schema.identifier_name();
// 1. store documents ids for future deletion
for mut document in addition {
let document_id = match extract_document_id(identifier, &document)? {
Some(id) => id,
None => return Err(Error::MissingDocumentId),
};
let mut deserializer = Deserializer {
document_id,
reader: writer,
documents_fields: documents_fields_store,
schema: &schema,
attributes: None,
};
// retrieve the old document and
// update the new one with missing keys found in the old one
let result = Option::<HashMap<String, serde_json::Value>>::deserialize(&mut deserializer)?;
if let Some(old_document) = result {
for (key, value) in old_document {
document.entry(key).or_insert(value);
}
}
documents_additions.insert(document_id, document);
}
// 2. remove the documents posting lists
let number_of_inserted_documents = documents_additions.len();
let documents_ids = documents_additions.iter().map(|(id, _)| *id).collect();
apply_documents_deletion(
writer,
main_store,
documents_fields_store,
documents_fields_counts_store,
postings_lists_store,
docs_words_store,
documents_ids,
)?;
let mut ranked_map = match main_store.ranked_map(writer)? {
Some(ranked_map) => ranked_map,
None => RankedMap::default(),
};
let stop_words = match main_store.stop_words_fst(writer)? {
Some(stop_words) => stop_words,
None => fst::Set::default(),
};
// 3. index the documents fields in the stores
let mut indexer = RawIndexer::new(stop_words);
for (document_id, document) in documents_additions {
let serializer = Serializer {
txn: writer,
schema: &schema,
document_store: documents_fields_store,
document_fields_counts: documents_fields_counts_store,
indexer: &mut indexer,
ranked_map: &mut ranked_map,
document_id,
};
document.serialize(serializer)?;
}
write_documents_addition_index(
writer,
main_store,
postings_lists_store,
docs_words_store,
&ranked_map,
number_of_inserted_documents,
indexer,
)
}
pub fn reindex_all_documents(
writer: &mut heed::RwTxn<MainT>,
main_store: store::Main,
documents_fields_store: store::DocumentsFields,
documents_fields_counts_store: store::DocumentsFieldsCounts,
postings_lists_store: store::PostingsLists,
docs_words_store: store::DocsWords,
) -> MResult<()> {
let schema = match main_store.schema(writer)? {
Some(schema) => schema,
None => return Err(Error::SchemaMissing),
};
let mut ranked_map = RankedMap::default();
// 1. retrieve all documents ids
let mut documents_ids_to_reindex = Vec::new();
for result in documents_fields_counts_store.documents_ids(writer)? {
let document_id = result?;
documents_ids_to_reindex.push(document_id);
}
// 2. remove the documents posting lists
main_store.put_words_fst(writer, &fst::Set::default())?;
main_store.put_ranked_map(writer, &ranked_map)?;
main_store.put_number_of_documents(writer, |_| 0)?;
postings_lists_store.clear(writer)?;
docs_words_store.clear(writer)?;
// 3. re-index chunks of documents (otherwise we make the borrow checker unhappy)
for documents_ids in documents_ids_to_reindex.chunks(100) {
let stop_words = match main_store.stop_words_fst(writer)? {
Some(stop_words) => stop_words,
None => fst::Set::default(),
};
let number_of_inserted_documents = documents_ids.len();
let mut indexer = RawIndexer::new(stop_words);
let mut ram_store = HashMap::new();
for document_id in documents_ids {
for result in documents_fields_store.document_fields(writer, *document_id)? {
let (attr, bytes) = result?;
let value: serde_json::Value = serde_json::from_slice(bytes)?;
ram_store.insert((document_id, attr), value);
}
for ((docid, attr), value) in ram_store.drain() {
serialize_value(
writer,
attr,
schema.props(attr),
*docid,
documents_fields_store,
documents_fields_counts_store,
&mut indexer,
&mut ranked_map,
&value,
)?;
}
}
// 4. write the new index in the main store
write_documents_addition_index(
writer,
main_store,
postings_lists_store,
docs_words_store,
&ranked_map,
number_of_inserted_documents,
indexer,
)?;
}
Ok(())
}
pub fn write_documents_addition_index(
writer: &mut heed::RwTxn<MainT>,
main_store: store::Main,
postings_lists_store: store::PostingsLists,
docs_words_store: store::DocsWords,
ranked_map: &RankedMap,
number_of_inserted_documents: usize,
indexer: RawIndexer,
) -> MResult<()> {
let indexed = indexer.build();
let mut delta_words_builder = SetBuilder::memory();
for (word, delta_set) in indexed.words_doc_indexes {
delta_words_builder.insert(&word).unwrap();
let set = match postings_lists_store.postings_list(writer, &word)? {
Some(set) => Union::new(&set, &delta_set).into_set_buf(),
None => delta_set,
};
postings_lists_store.put_postings_list(writer, &word, &set)?;
}
for (id, words) in indexed.docs_words {
docs_words_store.put_doc_words(writer, id, &words)?;
}
let delta_words = delta_words_builder
.into_inner()
.and_then(fst::Set::from_bytes)
.unwrap();
let words = match main_store.words_fst(writer)? {
Some(words) => {
let op = OpBuilder::new()
.add(words.stream())
.add(delta_words.stream())
.r#union();
let mut words_builder = SetBuilder::memory();
words_builder.extend_stream(op).unwrap();
words_builder
.into_inner()
.and_then(fst::Set::from_bytes)
.unwrap()
}
None => delta_words,
};
main_store.put_words_fst(writer, &words)?;
main_store.put_ranked_map(writer, ranked_map)?;
main_store.put_number_of_documents(writer, |old| old + number_of_inserted_documents as u64)?;
Ok(())
}

View File

@ -1,9 +1,11 @@
use std::collections::{BTreeSet, HashMap, HashSet}; use std::collections::{BTreeSet, HashMap, HashSet};
use fst::{SetBuilder, Streamer}; use fst::{SetBuilder, Streamer};
use meilidb_schema::Schema; use meilisearch_schema::Schema;
use sdset::{duo::DifferenceByKey, SetBuf, SetOperation}; use sdset::{duo::DifferenceByKey, SetBuf, SetOperation};
use crate::database::{MainT, UpdateT};
use crate::database::{UpdateEvent, UpdateEventsEmitter};
use crate::serde::extract_document_id; use crate::serde::extract_document_id;
use crate::store; use crate::store;
use crate::update::{next_update_id, Update}; use crate::update::{next_update_id, Update};
@ -12,7 +14,7 @@ use crate::{DocumentId, Error, MResult, RankedMap};
pub struct DocumentsDeletion { pub struct DocumentsDeletion {
updates_store: store::Updates, updates_store: store::Updates,
updates_results_store: store::UpdatesResults, updates_results_store: store::UpdatesResults,
updates_notifier: crossbeam_channel::Sender<()>, updates_notifier: UpdateEventsEmitter,
documents: Vec<DocumentId>, documents: Vec<DocumentId>,
} }
@ -20,7 +22,7 @@ impl DocumentsDeletion {
pub fn new( pub fn new(
updates_store: store::Updates, updates_store: store::Updates,
updates_results_store: store::UpdatesResults, updates_results_store: store::UpdatesResults,
updates_notifier: crossbeam_channel::Sender<()>, updates_notifier: UpdateEventsEmitter,
) -> DocumentsDeletion { ) -> DocumentsDeletion {
DocumentsDeletion { DocumentsDeletion {
updates_store, updates_store,
@ -49,8 +51,8 @@ impl DocumentsDeletion {
Ok(()) Ok(())
} }
pub fn finalize(self, writer: &mut zlmdb::RwTxn) -> MResult<u64> { pub fn finalize(self, writer: &mut heed::RwTxn<UpdateT>) -> MResult<u64> {
let _ = self.updates_notifier.send(()); let _ = self.updates_notifier.send(UpdateEvent::NewUpdate);
let update_id = push_documents_deletion( let update_id = push_documents_deletion(
writer, writer,
self.updates_store, self.updates_store,
@ -68,27 +70,26 @@ impl Extend<DocumentId> for DocumentsDeletion {
} }
pub fn push_documents_deletion( pub fn push_documents_deletion(
writer: &mut zlmdb::RwTxn, writer: &mut heed::RwTxn<UpdateT>,
updates_store: store::Updates, updates_store: store::Updates,
updates_results_store: store::UpdatesResults, updates_results_store: store::UpdatesResults,
deletion: Vec<DocumentId>, deletion: Vec<DocumentId>,
) -> MResult<u64> { ) -> MResult<u64> {
let last_update_id = next_update_id(writer, updates_store, updates_results_store)?; let last_update_id = next_update_id(writer, updates_store, updates_results_store)?;
let update = Update::DocumentsDeletion(deletion); let update = Update::documents_deletion(deletion);
updates_store.put_update(writer, last_update_id, &update)?; updates_store.put_update(writer, last_update_id, &update)?;
Ok(last_update_id) Ok(last_update_id)
} }
pub fn apply_documents_deletion( pub fn apply_documents_deletion(
writer: &mut zlmdb::RwTxn, writer: &mut heed::RwTxn<MainT>,
main_store: store::Main, main_store: store::Main,
documents_fields_store: store::DocumentsFields, documents_fields_store: store::DocumentsFields,
documents_fields_counts_store: store::DocumentsFieldsCounts, documents_fields_counts_store: store::DocumentsFieldsCounts,
postings_lists_store: store::PostingsLists, postings_lists_store: store::PostingsLists,
docs_words_store: store::DocsWords, docs_words_store: store::DocsWords,
mut ranked_map: RankedMap,
deletion: Vec<DocumentId>, deletion: Vec<DocumentId>,
) -> MResult<()> { ) -> MResult<()> {
let idset = SetBuf::from_dirty(deletion); let idset = SetBuf::from_dirty(deletion);
@ -98,6 +99,11 @@ pub fn apply_documents_deletion(
None => return Err(Error::SchemaMissing), None => return Err(Error::SchemaMissing),
}; };
let mut ranked_map = match main_store.ranked_map(writer)? {
Some(ranked_map) => ranked_map,
None => RankedMap::default(),
};
// collect the ranked attributes according to the schema // collect the ranked attributes according to the schema
let ranked_attrs: Vec<_> = schema let ranked_attrs: Vec<_> = schema
.iter() .iter()
@ -181,7 +187,6 @@ pub fn apply_documents_deletion(
main_store.put_words_fst(writer, &words)?; main_store.put_words_fst(writer, &words)?;
main_store.put_ranked_map(writer, &ranked_map)?; main_store.put_ranked_map(writer, &ranked_map)?;
main_store.put_number_of_documents(writer, |old| old - deleted_documents_len)?; main_store.put_number_of_documents(writer, |old| old - deleted_documents_len)?;
Ok(()) Ok(())

View File

@ -0,0 +1,434 @@
mod clear_all;
mod customs_update;
mod documents_addition;
mod documents_deletion;
mod schema_update;
mod stop_words_addition;
mod stop_words_deletion;
mod synonyms_addition;
mod synonyms_deletion;
pub use self::clear_all::{apply_clear_all, push_clear_all};
pub use self::customs_update::{apply_customs_update, push_customs_update};
pub use self::documents_addition::{
apply_documents_addition, apply_documents_partial_addition, DocumentsAddition,
};
pub use self::documents_deletion::{apply_documents_deletion, DocumentsDeletion};
pub use self::schema_update::{apply_schema_update, push_schema_update};
pub use self::stop_words_addition::{apply_stop_words_addition, StopWordsAddition};
pub use self::stop_words_deletion::{apply_stop_words_deletion, StopWordsDeletion};
pub use self::synonyms_addition::{apply_synonyms_addition, SynonymsAddition};
pub use self::synonyms_deletion::{apply_synonyms_deletion, SynonymsDeletion};
use std::cmp;
use std::collections::{BTreeMap, BTreeSet, HashMap};
use std::time::Instant;
use chrono::{DateTime, Utc};
use heed::Result as ZResult;
use log::debug;
use serde::{Deserialize, Serialize};
use crate::{store, DocumentId, MResult};
use crate::database::{MainT, UpdateT};
use meilisearch_schema::Schema;
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct Update {
data: UpdateData,
enqueued_at: DateTime<Utc>,
}
impl Update {
fn clear_all() -> Update {
Update {
data: UpdateData::ClearAll,
enqueued_at: Utc::now(),
}
}
fn schema(data: Schema) -> Update {
Update {
data: UpdateData::Schema(data),
enqueued_at: Utc::now(),
}
}
fn customs(data: Vec<u8>) -> Update {
Update {
data: UpdateData::Customs(data),
enqueued_at: Utc::now(),
}
}
fn documents_addition(data: Vec<HashMap<String, serde_json::Value>>) -> Update {
Update {
data: UpdateData::DocumentsAddition(data),
enqueued_at: Utc::now(),
}
}
fn documents_partial(data: Vec<HashMap<String, serde_json::Value>>) -> Update {
Update {
data: UpdateData::DocumentsPartial(data),
enqueued_at: Utc::now(),
}
}
fn documents_deletion(data: Vec<DocumentId>) -> Update {
Update {
data: UpdateData::DocumentsDeletion(data),
enqueued_at: Utc::now(),
}
}
fn synonyms_addition(data: BTreeMap<String, Vec<String>>) -> Update {
Update {
data: UpdateData::SynonymsAddition(data),
enqueued_at: Utc::now(),
}
}
fn synonyms_deletion(data: BTreeMap<String, Option<Vec<String>>>) -> Update {
Update {
data: UpdateData::SynonymsDeletion(data),
enqueued_at: Utc::now(),
}
}
fn stop_words_addition(data: BTreeSet<String>) -> Update {
Update {
data: UpdateData::StopWordsAddition(data),
enqueued_at: Utc::now(),
}
}
fn stop_words_deletion(data: BTreeSet<String>) -> Update {
Update {
data: UpdateData::StopWordsDeletion(data),
enqueued_at: Utc::now(),
}
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub enum UpdateData {
ClearAll,
Schema(Schema),
Customs(Vec<u8>),
DocumentsAddition(Vec<HashMap<String, serde_json::Value>>),
DocumentsPartial(Vec<HashMap<String, serde_json::Value>>),
DocumentsDeletion(Vec<DocumentId>),
SynonymsAddition(BTreeMap<String, Vec<String>>),
SynonymsDeletion(BTreeMap<String, Option<Vec<String>>>),
StopWordsAddition(BTreeSet<String>),
StopWordsDeletion(BTreeSet<String>),
}
impl UpdateData {
pub fn update_type(&self) -> UpdateType {
match self {
UpdateData::ClearAll => UpdateType::ClearAll,
UpdateData::Schema(_) => UpdateType::Schema,
UpdateData::Customs(_) => UpdateType::Customs,
UpdateData::DocumentsAddition(addition) => UpdateType::DocumentsAddition {
number: addition.len(),
},
UpdateData::DocumentsPartial(addition) => UpdateType::DocumentsPartial {
number: addition.len(),
},
UpdateData::DocumentsDeletion(deletion) => UpdateType::DocumentsDeletion {
number: deletion.len(),
},
UpdateData::SynonymsAddition(addition) => UpdateType::SynonymsAddition {
number: addition.len(),
},
UpdateData::SynonymsDeletion(deletion) => UpdateType::SynonymsDeletion {
number: deletion.len(),
},
UpdateData::StopWordsAddition(addition) => UpdateType::StopWordsAddition {
number: addition.len(),
},
UpdateData::StopWordsDeletion(deletion) => UpdateType::StopWordsDeletion {
number: deletion.len(),
},
}
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(tag = "name")]
pub enum UpdateType {
ClearAll,
Schema,
Customs,
DocumentsAddition { number: usize },
DocumentsPartial { number: usize },
DocumentsDeletion { number: usize },
SynonymsAddition { number: usize },
SynonymsDeletion { number: usize },
StopWordsAddition { number: usize },
StopWordsDeletion { number: usize },
}
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct ProcessedUpdateResult {
pub update_id: u64,
#[serde(rename = "type")]
pub update_type: UpdateType,
#[serde(skip_serializing_if = "Option::is_none")]
pub error: Option<String>,
pub duration: f64, // in seconds
pub enqueued_at: DateTime<Utc>,
pub processed_at: DateTime<Utc>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct EnqueuedUpdateResult {
pub update_id: u64,
#[serde(rename = "type")]
pub update_type: UpdateType,
pub enqueued_at: DateTime<Utc>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(rename_all = "camelCase", tag = "status")]
pub enum UpdateStatus {
Enqueued {
#[serde(flatten)]
content: EnqueuedUpdateResult,
},
Failed {
#[serde(flatten)]
content: ProcessedUpdateResult,
},
Processed {
#[serde(flatten)]
content: ProcessedUpdateResult,
},
}
pub fn update_status(
update_reader: &heed::RoTxn<UpdateT>,
updates_store: store::Updates,
updates_results_store: store::UpdatesResults,
update_id: u64,
) -> MResult<Option<UpdateStatus>> {
match updates_results_store.update_result(update_reader, update_id)? {
Some(result) => {
if result.error.is_some() {
Ok(Some(UpdateStatus::Failed { content: result }))
} else {
Ok(Some(UpdateStatus::Processed { content: result }))
}
},
None => match updates_store.get(update_reader, update_id)? {
Some(update) => Ok(Some(UpdateStatus::Enqueued {
content: EnqueuedUpdateResult {
update_id,
update_type: update.data.update_type(),
enqueued_at: update.enqueued_at,
},
})),
None => Ok(None),
},
}
}
pub fn next_update_id(
update_writer: &mut heed::RwTxn<UpdateT>,
updates_store: store::Updates,
updates_results_store: store::UpdatesResults,
) -> ZResult<u64> {
let last_update = updates_store.last_update(update_writer)?;
let last_update = last_update.map(|(n, _)| n);
let last_update_results_id = updates_results_store.last_update(update_writer)?;
let last_update_results_id = last_update_results_id.map(|(n, _)| n);
let max_update_id = cmp::max(last_update, last_update_results_id);
let new_update_id = max_update_id.map_or(0, |n| n + 1);
Ok(new_update_id)
}
pub fn update_task<'a, 'b>(
writer: &'a mut heed::RwTxn<'b, MainT>,
index: &store::Index,
update_id: u64,
update: Update,
) -> MResult<ProcessedUpdateResult> {
debug!("Processing update number {}", update_id);
let Update { enqueued_at, data } = update;
let (update_type, result, duration) = match data {
UpdateData::ClearAll => {
let start = Instant::now();
let update_type = UpdateType::ClearAll;
let result = apply_clear_all(
writer,
index.main,
index.documents_fields,
index.documents_fields_counts,
index.postings_lists,
index.docs_words,
);
(update_type, result, start.elapsed())
}
UpdateData::Schema(schema) => {
let start = Instant::now();
let update_type = UpdateType::Schema;
let result = apply_schema_update(
writer,
&schema,
index.main,
index.documents_fields,
index.documents_fields_counts,
index.postings_lists,
index.docs_words,
);
(update_type, result, start.elapsed())
}
UpdateData::Customs(customs) => {
let start = Instant::now();
let update_type = UpdateType::Customs;
let result = apply_customs_update(writer, index.main, &customs).map_err(Into::into);
(update_type, result, start.elapsed())
}
UpdateData::DocumentsAddition(documents) => {
let start = Instant::now();
let update_type = UpdateType::DocumentsAddition {
number: documents.len(),
};
let result = apply_documents_addition(
writer,
index.main,
index.documents_fields,
index.documents_fields_counts,
index.postings_lists,
index.docs_words,
documents,
);
(update_type, result, start.elapsed())
}
UpdateData::DocumentsPartial(documents) => {
let start = Instant::now();
let update_type = UpdateType::DocumentsPartial {
number: documents.len(),
};
let result = apply_documents_partial_addition(
writer,
index.main,
index.documents_fields,
index.documents_fields_counts,
index.postings_lists,
index.docs_words,
documents,
);
(update_type, result, start.elapsed())
}
UpdateData::DocumentsDeletion(documents) => {
let start = Instant::now();
let update_type = UpdateType::DocumentsDeletion {
number: documents.len(),
};
let result = apply_documents_deletion(
writer,
index.main,
index.documents_fields,
index.documents_fields_counts,
index.postings_lists,
index.docs_words,
documents,
);
(update_type, result, start.elapsed())
}
UpdateData::SynonymsAddition(synonyms) => {
let start = Instant::now();
let update_type = UpdateType::SynonymsAddition {
number: synonyms.len(),
};
let result = apply_synonyms_addition(writer, index.main, index.synonyms, synonyms);
(update_type, result, start.elapsed())
}
UpdateData::SynonymsDeletion(synonyms) => {
let start = Instant::now();
let update_type = UpdateType::SynonymsDeletion {
number: synonyms.len(),
};
let result = apply_synonyms_deletion(writer, index.main, index.synonyms, synonyms);
(update_type, result, start.elapsed())
}
UpdateData::StopWordsAddition(stop_words) => {
let start = Instant::now();
let update_type = UpdateType::StopWordsAddition {
number: stop_words.len(),
};
let result =
apply_stop_words_addition(writer, index.main, index.postings_lists, stop_words);
(update_type, result, start.elapsed())
}
UpdateData::StopWordsDeletion(stop_words) => {
let start = Instant::now();
let update_type = UpdateType::StopWordsDeletion {
number: stop_words.len(),
};
let result = apply_stop_words_deletion(
writer,
index.main,
index.documents_fields,
index.documents_fields_counts,
index.postings_lists,
index.docs_words,
stop_words,
);
(update_type, result, start.elapsed())
}
};
debug!(
"Processed update number {} {:?} {:?}",
update_id, update_type, result
);
let status = ProcessedUpdateResult {
update_id,
update_type,
error: result.map_err(|e| e.to_string()).err(),
duration: duration.as_secs_f64(),
enqueued_at,
processed_at: Utc::now(),
};
Ok(status)
}

View File

@ -0,0 +1,76 @@
use meilisearch_schema::{Diff, Schema};
use crate::database::{MainT, UpdateT};
use crate::update::documents_addition::reindex_all_documents;
use crate::update::{next_update_id, Update};
use crate::{error::UnsupportedOperation, store, MResult};
pub fn apply_schema_update(
writer: &mut heed::RwTxn<MainT>,
new_schema: &Schema,
main_store: store::Main,
documents_fields_store: store::DocumentsFields,
documents_fields_counts_store: store::DocumentsFieldsCounts,
postings_lists_store: store::PostingsLists,
docs_words_store: store::DocsWords,
) -> MResult<()> {
use UnsupportedOperation::{
CanOnlyIntroduceNewSchemaAttributesAtEnd, CannotRemoveSchemaAttribute,
CannotReorderSchemaAttribute, CannotUpdateSchemaIdentifier,
};
let mut need_full_reindexing = false;
if let Some(old_schema) = main_store.schema(writer)? {
for diff in meilisearch_schema::diff(&old_schema, new_schema) {
match diff {
Diff::IdentChange { .. } => return Err(CannotUpdateSchemaIdentifier.into()),
Diff::AttrMove { .. } => return Err(CannotReorderSchemaAttribute.into()),
Diff::AttrPropsChange { old, new, .. } => {
if new.indexed != old.indexed {
need_full_reindexing = true;
}
if new.ranked != old.ranked {
need_full_reindexing = true;
}
}
Diff::NewAttr { pos, .. } => {
// new attribute not at the end of the schema
if pos < old_schema.number_of_attributes() {
return Err(CanOnlyIntroduceNewSchemaAttributesAtEnd.into());
}
}
Diff::RemovedAttr { .. } => return Err(CannotRemoveSchemaAttribute.into()),
}
}
}
main_store.put_schema(writer, new_schema)?;
if need_full_reindexing {
reindex_all_documents(
writer,
main_store,
documents_fields_store,
documents_fields_counts_store,
postings_lists_store,
docs_words_store,
)?
}
Ok(())
}
pub fn push_schema_update(
writer: &mut heed::RwTxn<UpdateT>,
updates_store: store::Updates,
updates_results_store: store::UpdatesResults,
schema: Schema,
) -> MResult<u64> {
let last_update_id = next_update_id(writer, updates_store, updates_results_store)?;
let update = Update::schema(schema);
updates_store.put_update(writer, last_update_id, &update)?;
Ok(last_update_id)
}

View File

@ -0,0 +1,118 @@
use std::collections::BTreeSet;
use fst::{set::OpBuilder, SetBuilder};
use crate::database::{MainT, UpdateT};
use crate::automaton::normalize_str;
use crate::database::{UpdateEvent, UpdateEventsEmitter};
use crate::update::{next_update_id, Update};
use crate::{store, MResult};
pub struct StopWordsAddition {
updates_store: store::Updates,
updates_results_store: store::UpdatesResults,
updates_notifier: UpdateEventsEmitter,
stop_words: BTreeSet<String>,
}
impl StopWordsAddition {
pub fn new(
updates_store: store::Updates,
updates_results_store: store::UpdatesResults,
updates_notifier: UpdateEventsEmitter,
) -> StopWordsAddition {
StopWordsAddition {
updates_store,
updates_results_store,
updates_notifier,
stop_words: BTreeSet::new(),
}
}
pub fn add_stop_word<S: AsRef<str>>(&mut self, stop_word: S) {
let stop_word = normalize_str(stop_word.as_ref());
self.stop_words.insert(stop_word);
}
pub fn finalize(self, writer: &mut heed::RwTxn<UpdateT>) -> MResult<u64> {
let _ = self.updates_notifier.send(UpdateEvent::NewUpdate);
let update_id = push_stop_words_addition(
writer,
self.updates_store,
self.updates_results_store,
self.stop_words,
)?;
Ok(update_id)
}
}
pub fn push_stop_words_addition(
writer: &mut heed::RwTxn<UpdateT>,
updates_store: store::Updates,
updates_results_store: store::UpdatesResults,
addition: BTreeSet<String>,
) -> MResult<u64> {
let last_update_id = next_update_id(writer, updates_store, updates_results_store)?;
let update = Update::stop_words_addition(addition);
updates_store.put_update(writer, last_update_id, &update)?;
Ok(last_update_id)
}
pub fn apply_stop_words_addition(
writer: &mut heed::RwTxn<MainT>,
main_store: store::Main,
postings_lists_store: store::PostingsLists,
addition: BTreeSet<String>,
) -> MResult<()> {
let mut stop_words_builder = SetBuilder::memory();
for word in addition {
stop_words_builder.insert(&word).unwrap();
// we remove every posting list associated to a new stop word
postings_lists_store.del_postings_list(writer, word.as_bytes())?;
}
// create the new delta stop words fst
let delta_stop_words = stop_words_builder
.into_inner()
.and_then(fst::Set::from_bytes)
.unwrap();
// we also need to remove all the stop words from the main fst
if let Some(word_fst) = main_store.words_fst(writer)? {
let op = OpBuilder::new()
.add(&word_fst)
.add(&delta_stop_words)
.difference();
let mut word_fst_builder = SetBuilder::memory();
word_fst_builder.extend_stream(op).unwrap();
let word_fst = word_fst_builder
.into_inner()
.and_then(fst::Set::from_bytes)
.unwrap();
main_store.put_words_fst(writer, &word_fst)?;
}
// now we add all of these stop words from the main store
let stop_words_fst = main_store.stop_words_fst(writer)?.unwrap_or_default();
let op = OpBuilder::new()
.add(&stop_words_fst)
.add(&delta_stop_words)
.r#union();
let mut stop_words_builder = SetBuilder::memory();
stop_words_builder.extend_stream(op).unwrap();
let stop_words_fst = stop_words_builder
.into_inner()
.and_then(fst::Set::from_bytes)
.unwrap();
main_store.put_stop_words_fst(writer, &stop_words_fst)?;
Ok(())
}

View File

@ -0,0 +1,114 @@
use std::collections::BTreeSet;
use fst::{set::OpBuilder, SetBuilder};
use crate::database::{MainT, UpdateT};
use crate::automaton::normalize_str;
use crate::database::{UpdateEvent, UpdateEventsEmitter};
use crate::update::documents_addition::reindex_all_documents;
use crate::update::{next_update_id, Update};
use crate::{store, MResult};
pub struct StopWordsDeletion {
updates_store: store::Updates,
updates_results_store: store::UpdatesResults,
updates_notifier: UpdateEventsEmitter,
stop_words: BTreeSet<String>,
}
impl StopWordsDeletion {
pub fn new(
updates_store: store::Updates,
updates_results_store: store::UpdatesResults,
updates_notifier: UpdateEventsEmitter,
) -> StopWordsDeletion {
StopWordsDeletion {
updates_store,
updates_results_store,
updates_notifier,
stop_words: BTreeSet::new(),
}
}
pub fn delete_stop_word<S: AsRef<str>>(&mut self, stop_word: S) {
let stop_word = normalize_str(stop_word.as_ref());
self.stop_words.insert(stop_word);
}
pub fn finalize(self, writer: &mut heed::RwTxn<UpdateT>) -> MResult<u64> {
let _ = self.updates_notifier.send(UpdateEvent::NewUpdate);
let update_id = push_stop_words_deletion(
writer,
self.updates_store,
self.updates_results_store,
self.stop_words,
)?;
Ok(update_id)
}
}
pub fn push_stop_words_deletion(
writer: &mut heed::RwTxn<UpdateT>,
updates_store: store::Updates,
updates_results_store: store::UpdatesResults,
deletion: BTreeSet<String>,
) -> MResult<u64> {
let last_update_id = next_update_id(writer, updates_store, updates_results_store)?;
let update = Update::stop_words_deletion(deletion);
updates_store.put_update(writer, last_update_id, &update)?;
Ok(last_update_id)
}
pub fn apply_stop_words_deletion(
writer: &mut heed::RwTxn<MainT>,
main_store: store::Main,
documents_fields_store: store::DocumentsFields,
documents_fields_counts_store: store::DocumentsFieldsCounts,
postings_lists_store: store::PostingsLists,
docs_words_store: store::DocsWords,
deletion: BTreeSet<String>,
) -> MResult<()> {
let mut stop_words_builder = SetBuilder::memory();
for word in deletion {
stop_words_builder.insert(&word).unwrap();
}
// create the new delta stop words fst
let delta_stop_words = stop_words_builder
.into_inner()
.and_then(fst::Set::from_bytes)
.unwrap();
// now we delete all of these stop words from the main store
let stop_words_fst = main_store.stop_words_fst(writer)?.unwrap_or_default();
let op = OpBuilder::new()
.add(&stop_words_fst)
.add(&delta_stop_words)
.difference();
let mut stop_words_builder = SetBuilder::memory();
stop_words_builder.extend_stream(op).unwrap();
let stop_words_fst = stop_words_builder
.into_inner()
.and_then(fst::Set::from_bytes)
.unwrap();
main_store.put_stop_words_fst(writer, &stop_words_fst)?;
// now that we have setup the stop words
// lets reindex everything...
reindex_all_documents(
writer,
main_store,
documents_fields_store,
documents_fields_counts_store,
postings_lists_store,
docs_words_store,
)?;
Ok(())
}

View File

@ -3,14 +3,16 @@ use std::collections::BTreeMap;
use fst::{set::OpBuilder, SetBuilder}; use fst::{set::OpBuilder, SetBuilder};
use sdset::SetBuf; use sdset::SetBuf;
use crate::database::{MainT, UpdateT};
use crate::automaton::normalize_str; use crate::automaton::normalize_str;
use crate::database::{UpdateEvent, UpdateEventsEmitter};
use crate::update::{next_update_id, Update}; use crate::update::{next_update_id, Update};
use crate::{store, MResult}; use crate::{store, MResult};
pub struct SynonymsAddition { pub struct SynonymsAddition {
updates_store: store::Updates, updates_store: store::Updates,
updates_results_store: store::UpdatesResults, updates_results_store: store::UpdatesResults,
updates_notifier: crossbeam_channel::Sender<()>, updates_notifier: UpdateEventsEmitter,
synonyms: BTreeMap<String, Vec<String>>, synonyms: BTreeMap<String, Vec<String>>,
} }
@ -18,7 +20,7 @@ impl SynonymsAddition {
pub fn new( pub fn new(
updates_store: store::Updates, updates_store: store::Updates,
updates_results_store: store::UpdatesResults, updates_results_store: store::UpdatesResults,
updates_notifier: crossbeam_channel::Sender<()>, updates_notifier: UpdateEventsEmitter,
) -> SynonymsAddition { ) -> SynonymsAddition {
SynonymsAddition { SynonymsAddition {
updates_store, updates_store,
@ -42,8 +44,8 @@ impl SynonymsAddition {
.extend(alternatives); .extend(alternatives);
} }
pub fn finalize(self, writer: &mut zlmdb::RwTxn) -> MResult<u64> { pub fn finalize(self, writer: &mut heed::RwTxn<UpdateT>) -> MResult<u64> {
let _ = self.updates_notifier.send(()); let _ = self.updates_notifier.send(UpdateEvent::NewUpdate);
let update_id = push_synonyms_addition( let update_id = push_synonyms_addition(
writer, writer,
self.updates_store, self.updates_store,
@ -55,21 +57,21 @@ impl SynonymsAddition {
} }
pub fn push_synonyms_addition( pub fn push_synonyms_addition(
writer: &mut zlmdb::RwTxn, writer: &mut heed::RwTxn<UpdateT>,
updates_store: store::Updates, updates_store: store::Updates,
updates_results_store: store::UpdatesResults, updates_results_store: store::UpdatesResults,
addition: BTreeMap<String, Vec<String>>, addition: BTreeMap<String, Vec<String>>,
) -> MResult<u64> { ) -> MResult<u64> {
let last_update_id = next_update_id(writer, updates_store, updates_results_store)?; let last_update_id = next_update_id(writer, updates_store, updates_results_store)?;
let update = Update::SynonymsAddition(addition); let update = Update::synonyms_addition(addition);
updates_store.put_update(writer, last_update_id, &update)?; updates_store.put_update(writer, last_update_id, &update)?;
Ok(last_update_id) Ok(last_update_id)
} }
pub fn apply_synonyms_addition( pub fn apply_synonyms_addition(
writer: &mut zlmdb::RwTxn, writer: &mut heed::RwTxn<MainT>,
main_store: store::Main, main_store: store::Main,
synonyms_store: store::Synonyms, synonyms_store: store::Synonyms,
addition: BTreeMap<String, Vec<String>>, addition: BTreeMap<String, Vec<String>>,

View File

@ -4,14 +4,16 @@ use std::iter::FromIterator;
use fst::{set::OpBuilder, SetBuilder}; use fst::{set::OpBuilder, SetBuilder};
use sdset::SetBuf; use sdset::SetBuf;
use crate::database::{MainT, UpdateT};
use crate::automaton::normalize_str; use crate::automaton::normalize_str;
use crate::database::{UpdateEvent, UpdateEventsEmitter};
use crate::update::{next_update_id, Update}; use crate::update::{next_update_id, Update};
use crate::{store, MResult}; use crate::{store, MResult};
pub struct SynonymsDeletion { pub struct SynonymsDeletion {
updates_store: store::Updates, updates_store: store::Updates,
updates_results_store: store::UpdatesResults, updates_results_store: store::UpdatesResults,
updates_notifier: crossbeam_channel::Sender<()>, updates_notifier: UpdateEventsEmitter,
synonyms: BTreeMap<String, Option<Vec<String>>>, synonyms: BTreeMap<String, Option<Vec<String>>>,
} }
@ -19,7 +21,7 @@ impl SynonymsDeletion {
pub fn new( pub fn new(
updates_store: store::Updates, updates_store: store::Updates,
updates_results_store: store::UpdatesResults, updates_results_store: store::UpdatesResults,
updates_notifier: crossbeam_channel::Sender<()>, updates_notifier: UpdateEventsEmitter,
) -> SynonymsDeletion { ) -> SynonymsDeletion {
SynonymsDeletion { SynonymsDeletion {
updates_store, updates_store,
@ -49,8 +51,8 @@ impl SynonymsDeletion {
} }
} }
pub fn finalize(self, writer: &mut zlmdb::RwTxn) -> MResult<u64> { pub fn finalize(self, writer: &mut heed::RwTxn<UpdateT>) -> MResult<u64> {
let _ = self.updates_notifier.send(()); let _ = self.updates_notifier.send(UpdateEvent::NewUpdate);
let update_id = push_synonyms_deletion( let update_id = push_synonyms_deletion(
writer, writer,
self.updates_store, self.updates_store,
@ -62,21 +64,21 @@ impl SynonymsDeletion {
} }
pub fn push_synonyms_deletion( pub fn push_synonyms_deletion(
writer: &mut zlmdb::RwTxn, writer: &mut heed::RwTxn<UpdateT>,
updates_store: store::Updates, updates_store: store::Updates,
updates_results_store: store::UpdatesResults, updates_results_store: store::UpdatesResults,
deletion: BTreeMap<String, Option<Vec<String>>>, deletion: BTreeMap<String, Option<Vec<String>>>,
) -> MResult<u64> { ) -> MResult<u64> {
let last_update_id = next_update_id(writer, updates_store, updates_results_store)?; let last_update_id = next_update_id(writer, updates_store, updates_results_store)?;
let update = Update::SynonymsDeletion(deletion); let update = Update::synonyms_deletion(deletion);
updates_store.put_update(writer, last_update_id, &update)?; updates_store.put_update(writer, last_update_id, &update)?;
Ok(last_update_id) Ok(last_update_id)
} }
pub fn apply_synonyms_deletion( pub fn apply_synonyms_deletion(
writer: &mut zlmdb::RwTxn, writer: &mut heed::RwTxn<MainT>,
main_store: store::Main, main_store: store::Main,
synonyms_store: store::Synonyms, synonyms_store: store::Synonyms,
deletion: BTreeMap<String, Option<Vec<String>>>, deletion: BTreeMap<String, Option<Vec<String>>>,

View File

@ -0,0 +1,64 @@
[package]
name = "meilisearch-http"
version = "0.8.4"
authors = [
"Quentin de Quelen <quentin@dequelen.me>",
"Clément Renault <clement@meilisearch.com>",
]
edition = "2018"
[[bin]]
name = "meilisearch"
path = "src/main.rs"
[dependencies]
bincode = "1.2.0"
chrono = { version = "0.4.9", features = ["serde"] }
crossbeam-channel = "0.4.0"
env_logger = "0.7.1"
heed = "0.6.0"
http = "0.1.19"
indexmap = { version = "1.3.0", features = ["serde-1"] }
log = "0.4.8"
main_error = "0.1.0"
meilisearch-core = { path = "../meilisearch-core", version = "0.8.4" }
meilisearch-schema = { path = "../meilisearch-schema", version = "0.8.4" }
pretty-bytes = "0.2.2"
rand = "0.7.2"
rayon = "1.2.0"
serde = { version = "1.0.101", features = ["derive"] }
serde_json = { version = "1.0.41", features = ["preserve_order"] }
serde_qs = "0.5.1"
siphasher = "0.3.1"
structopt = "0.3.3"
sysinfo = "0.9.5"
ureq = { version = "0.11.2", features = ["tls"], default-features = false }
walkdir = "2.2.9"
whoami = "0.6"
[dependencies.async-compression]
default-features = false
features = ["stream", "gzip", "zlib", "brotli", "zstd"]
version = "=0.1.0-alpha.7"
[dependencies.tide]
git = "https://github.com/rustasync/tide"
rev = "e77709370bb24cf776fe6da902467c35131535b1"
[dependencies.tide-log]
git = "https://github.com/rustasync/tide"
rev = "e77709370bb24cf776fe6da902467c35131535b1"
[dependencies.tide-slog]
git = "https://github.com/rustasync/tide"
rev = "e77709370bb24cf776fe6da902467c35131535b1"
[dependencies.tide-compression]
git = "https://github.com/rustasync/tide"
rev = "e77709370bb24cf776fe6da902467c35131535b1"
[build-dependencies]
vergen = "3.0.4"
[target.'cfg(unix)'.dependencies]
jemallocator = "0.3.2"

10
meilisearch-http/build.rs Normal file
View File

@ -0,0 +1,10 @@
use vergen::{generate_cargo_keys, ConstantsFlags};
fn main() {
// Setup the flags, toggling off the 'SEMVER_FROM_CARGO_PKG' flag
let mut flags = ConstantsFlags::all();
flags.toggle(ConstantsFlags::SEMVER_FROM_CARGO_PKG);
// Generate the 'cargo:' key output
generate_cargo_keys(ConstantsFlags::all()).expect("Unable to generate the cargo keys!");
}

View File

@ -0,0 +1,69 @@
use std::hash::{Hash, Hasher};
use std::thread;
use std::time::{Duration, SystemTime, UNIX_EPOCH};
use log::error;
use serde::Serialize;
use serde_qs as qs;
use siphasher::sip::SipHasher;
const AMPLITUDE_API_KEY: &str = "f7fba398780e06d8fe6666a9be7e3d47";
#[derive(Debug, Serialize)]
struct Event<'a> {
user_id: &'a str,
event_type: &'a str,
device_id: &'a str,
time: u64,
}
#[derive(Debug, Serialize)]
struct AmplitudeRequest<'a> {
api_key: &'a str,
event: &'a str,
}
pub fn analytics_sender() {
let username = whoami::username();
let hostname = whoami::hostname();
let platform = whoami::platform();
let uid = username + &hostname + &platform.to_string();
let mut hasher = SipHasher::new();
uid.hash(&mut hasher);
let hash = hasher.finish();
let uid = format!("{:X}", hash);
let platform = platform.to_string();
loop {
let n = SystemTime::now().duration_since(UNIX_EPOCH).unwrap();
let user_id = &uid;
let device_id = &platform;
let time = n.as_secs();
let event_type = "runtime_tick";
let event = Event {
user_id,
event_type,
device_id,
time,
};
let event = serde_json::to_string(&event).unwrap();
let request = AmplitudeRequest {
api_key: AMPLITUDE_API_KEY,
event: &event,
};
let body = qs::to_string(&request).unwrap();
let response = ureq::post("https://api.amplitude.com/httpapi").send_string(&body);
if !response.ok() {
let body = response.into_string().unwrap();
error!("Unsuccessful call to Amplitude: {}", body);
}
thread::sleep(Duration::from_secs(86_400)) // one day
}
}

View File

@ -0,0 +1,129 @@
use std::collections::HashMap;
use std::ops::Deref;
use std::sync::Arc;
use chrono::{DateTime, Utc};
use heed::types::{SerdeBincode, Str};
use log::error;
use meilisearch_core::{Database, MainT, UpdateT, Error as MError, MResult};
use sysinfo::Pid;
use crate::option::Opt;
use crate::routes::index::index_update_callback;
const LAST_UPDATE_KEY: &str = "last-update";
type SerdeDatetime = SerdeBincode<DateTime<Utc>>;
#[derive(Clone)]
pub struct Data {
inner: Arc<DataInner>,
}
impl Deref for Data {
type Target = DataInner;
fn deref(&self) -> &Self::Target {
&self.inner
}
}
#[derive(Clone)]
pub struct DataInner {
pub db: Arc<Database>,
pub db_path: String,
pub api_key: Option<String>,
pub server_pid: Pid,
}
impl DataInner {
pub fn is_indexing(&self, reader: &heed::RoTxn<UpdateT>, index: &str) -> MResult<Option<bool>> {
match self.db.open_index(&index) {
Some(index) => index.current_update_id(&reader).map(|u| Some(u.is_some())),
None => Ok(None),
}
}
pub fn last_update(&self, reader: &heed::RoTxn<MainT>) -> MResult<Option<DateTime<Utc>>> {
match self
.db
.common_store()
.get::<_, Str, SerdeDatetime>(reader, LAST_UPDATE_KEY)?
{
Some(datetime) => Ok(Some(datetime)),
None => Ok(None),
}
}
pub fn set_last_update(&self, writer: &mut heed::RwTxn<MainT>) -> MResult<()> {
self.db
.common_store()
.put::<_, Str, SerdeDatetime>(writer, LAST_UPDATE_KEY, &Utc::now())
.map_err(Into::into)
}
pub fn compute_stats(&self, writer: &mut heed::RwTxn<MainT>, index_uid: &str) -> MResult<()> {
let index = match self.db.open_index(&index_uid) {
Some(index) => index,
None => {
error!("Impossible to retrieve index {}", index_uid);
return Ok(());
}
};
let schema = match index.main.schema(&writer)? {
Some(schema) => schema,
None => return Ok(()),
};
let all_documents_fields = index
.documents_fields_counts
.all_documents_fields_counts(&writer)?;
// count fields frequencies
let mut fields_frequency = HashMap::<_, usize>::new();
for result in all_documents_fields {
let (_, attr, _) = result?;
*fields_frequency.entry(attr).or_default() += 1;
}
// convert attributes to their names
let frequency: HashMap<_, _> = fields_frequency
.into_iter()
.map(|(a, c)| (schema.attribute_name(a).to_owned(), c))
.collect();
index
.main
.put_fields_frequency(writer, &frequency)
.map_err(MError::Zlmdb)
}
}
impl Data {
pub fn new(opt: Opt) -> Data {
let db_path = opt.db_path.clone();
let api_key = opt.api_key.clone();
let server_pid = sysinfo::get_current_pid().unwrap();
let db = Arc::new(Database::open_or_create(opt.db_path.clone()).unwrap());
let inner_data = DataInner {
db: db.clone(),
db_path,
api_key,
server_pid,
};
let data = Data {
inner: Arc::new(inner_data),
};
let callback_context = data.clone();
db.set_update_callback(Box::new(move |index_uid, status| {
index_update_callback(&index_uid, &callback_context, status);
}));
data
}
}

View File

@ -0,0 +1,126 @@
use std::fmt::Display;
use http::status::StatusCode;
use log::{error, warn};
use serde::{Deserialize, Serialize};
use tide::response::IntoResponse;
use tide::Response;
pub type SResult<T> = Result<T, ResponseError>;
pub enum ResponseError {
Internal(String),
BadRequest(String),
InvalidToken(String),
NotFound(String),
IndexNotFound(String),
DocumentNotFound(String),
MissingHeader(String),
BadParameter(String, String),
OpenIndex(String),
CreateIndex(String),
Maintenance,
}
impl ResponseError {
pub fn internal(message: impl Display) -> ResponseError {
ResponseError::Internal(message.to_string())
}
pub fn bad_request(message: impl Display) -> ResponseError {
ResponseError::BadRequest(message.to_string())
}
pub fn invalid_token(message: impl Display) -> ResponseError {
ResponseError::InvalidToken(message.to_string())
}
pub fn not_found(message: impl Display) -> ResponseError {
ResponseError::NotFound(message.to_string())
}
pub fn index_not_found(message: impl Display) -> ResponseError {
ResponseError::IndexNotFound(message.to_string())
}
pub fn document_not_found(message: impl Display) -> ResponseError {
ResponseError::DocumentNotFound(message.to_string())
}
pub fn missing_header(message: impl Display) -> ResponseError {
ResponseError::MissingHeader(message.to_string())
}
pub fn bad_parameter(name: impl Display, message: impl Display) -> ResponseError {
ResponseError::BadParameter(name.to_string(), message.to_string())
}
pub fn open_index(message: impl Display) -> ResponseError {
ResponseError::OpenIndex(message.to_string())
}
pub fn create_index(message: impl Display) -> ResponseError {
ResponseError::CreateIndex(message.to_string())
}
}
impl IntoResponse for ResponseError {
fn into_response(self) -> Response {
match self {
ResponseError::Internal(err) => {
error!("internal server error: {}", err);
error(
String::from("Internal server error"),
StatusCode::INTERNAL_SERVER_ERROR,
)
}
ResponseError::BadRequest(err) => {
warn!("bad request: {}", err);
error(err, StatusCode::BAD_REQUEST)
}
ResponseError::InvalidToken(err) => {
error(format!("Invalid Token: {}", err), StatusCode::FORBIDDEN)
}
ResponseError::NotFound(err) => error(err, StatusCode::NOT_FOUND),
ResponseError::IndexNotFound(index) => {
error(format!("Index {} not found", index), StatusCode::NOT_FOUND)
}
ResponseError::DocumentNotFound(id) => error(
format!("Document with id {} not found", id),
StatusCode::NOT_FOUND,
),
ResponseError::MissingHeader(header) => error(
format!("Header {} is missing", header),
StatusCode::UNAUTHORIZED,
),
ResponseError::BadParameter(param, e) => error(
format!("Url parameter {} error: {}", param, e),
StatusCode::BAD_REQUEST,
),
ResponseError::CreateIndex(err) => error(
format!("Impossible to create index; {}", err),
StatusCode::BAD_REQUEST,
),
ResponseError::OpenIndex(err) => error(
format!("Impossible to open index; {}", err),
StatusCode::BAD_REQUEST,
),
ResponseError::Maintenance => error(
String::from("Server is in maintenance, please try again later"),
StatusCode::SERVICE_UNAVAILABLE,
),
}
}
}
#[derive(Serialize, Deserialize)]
struct ErrorMessage {
message: String,
}
fn error(message: String, status: StatusCode) -> Response {
let message = ErrorMessage { message };
tide::response::json(message)
.with_status(status)
.into_response()
}

View File

@ -0,0 +1,571 @@
use crate::routes::setting::{RankingOrdering, SettingBody};
use indexmap::IndexMap;
use log::error;
use meilisearch_core::criterion::*;
use meilisearch_core::Highlight;
use meilisearch_core::{Index, RankedMap};
use meilisearch_core::MainT;
use meilisearch_schema::{Schema, SchemaAttr};
use serde::{Deserialize, Serialize};
use serde_json::Value;
use std::cmp::Ordering;
use std::collections::{HashMap, HashSet};
use std::convert::From;
use std::error;
use std::fmt;
use std::time::{Duration, Instant};
#[derive(Debug)]
pub enum Error {
SearchDocuments(String),
RetrieveDocument(u64, String),
DocumentNotFound(u64),
CropFieldWrongType(String),
AttributeNotFoundOnDocument(String),
AttributeNotFoundOnSchema(String),
MissingFilterValue,
UnknownFilteredAttribute,
Internal(String),
}
impl error::Error for Error {}
impl fmt::Display for Error {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
use Error::*;
match self {
SearchDocuments(err) => write!(f, "impossible to search documents; {}", err),
RetrieveDocument(id, err) => write!(
f,
"impossible to retrieve the document with id: {}; {}",
id, err
),
DocumentNotFound(id) => write!(f, "document {} not found", id),
CropFieldWrongType(field) => {
write!(f, "the field {} cannot be cropped it's not a string", field)
}
AttributeNotFoundOnDocument(field) => {
write!(f, "field {} is not found on document", field)
}
AttributeNotFoundOnSchema(field) => write!(f, "field {} is not found on schema", field),
MissingFilterValue => f.write_str("a filter doesn't have a value to compare it with"),
UnknownFilteredAttribute => {
f.write_str("a filter is specifying an unknown schema attribute")
}
Internal(err) => write!(f, "internal error; {}", err),
}
}
}
impl From<meilisearch_core::Error> for Error {
fn from(error: meilisearch_core::Error) -> Self {
Error::Internal(error.to_string())
}
}
pub trait IndexSearchExt {
fn new_search(&self, query: String) -> SearchBuilder;
}
impl IndexSearchExt for Index {
fn new_search(&self, query: String) -> SearchBuilder {
SearchBuilder {
index: self,
query,
offset: 0,
limit: 20,
attributes_to_crop: None,
attributes_to_retrieve: None,
attributes_to_search_in: None,
attributes_to_highlight: None,
filters: None,
timeout: Duration::from_millis(30),
matches: false,
}
}
}
pub struct SearchBuilder<'a> {
index: &'a Index,
query: String,
offset: usize,
limit: usize,
attributes_to_crop: Option<HashMap<String, usize>>,
attributes_to_retrieve: Option<HashSet<String>>,
attributes_to_search_in: Option<HashSet<String>>,
attributes_to_highlight: Option<HashSet<String>>,
filters: Option<String>,
timeout: Duration,
matches: bool,
}
impl<'a> SearchBuilder<'a> {
pub fn offset(&mut self, value: usize) -> &SearchBuilder {
self.offset = value;
self
}
pub fn limit(&mut self, value: usize) -> &SearchBuilder {
self.limit = value;
self
}
pub fn attributes_to_crop(&mut self, value: HashMap<String, usize>) -> &SearchBuilder {
self.attributes_to_crop = Some(value);
self
}
pub fn attributes_to_retrieve(&mut self, value: HashSet<String>) -> &SearchBuilder {
self.attributes_to_retrieve = Some(value);
self
}
pub fn add_retrievable_field(&mut self, value: String) -> &SearchBuilder {
let attributes_to_retrieve = self.attributes_to_retrieve.get_or_insert(HashSet::new());
attributes_to_retrieve.insert(value);
self
}
pub fn attributes_to_search_in(&mut self, value: HashSet<String>) -> &SearchBuilder {
self.attributes_to_search_in = Some(value);
self
}
pub fn add_attribute_to_search_in(&mut self, value: String) -> &SearchBuilder {
let attributes_to_search_in = self.attributes_to_search_in.get_or_insert(HashSet::new());
attributes_to_search_in.insert(value);
self
}
pub fn attributes_to_highlight(&mut self, value: HashSet<String>) -> &SearchBuilder {
self.attributes_to_highlight = Some(value);
self
}
pub fn filters(&mut self, value: String) -> &SearchBuilder {
self.filters = Some(value);
self
}
pub fn timeout(&mut self, value: Duration) -> &SearchBuilder {
self.timeout = value;
self
}
pub fn get_matches(&mut self) -> &SearchBuilder {
self.matches = true;
self
}
pub fn search(&self, reader: &heed::RoTxn<MainT>) -> Result<SearchResult, Error> {
let schema = self.index.main.schema(reader);
let schema = schema.map_err(|e| Error::Internal(e.to_string()))?;
let schema = match schema {
Some(schema) => schema,
None => return Err(Error::Internal(String::from("missing schema"))),
};
let ranked_map = self.index.main.ranked_map(reader);
let ranked_map = ranked_map.map_err(|e| Error::Internal(e.to_string()))?;
let ranked_map = ranked_map.unwrap_or_default();
let start = Instant::now();
// Change criteria
let mut query_builder = match self.get_criteria(reader, &ranked_map, &schema)? {
Some(criteria) => self.index.query_builder_with_criteria(criteria),
None => self.index.query_builder(),
};
// Filter searchable fields
if let Some(fields) = &self.attributes_to_search_in {
for attribute in fields.iter().filter_map(|f| schema.attribute(f)) {
query_builder.add_searchable_attribute(attribute.0);
}
}
if let Some(filters) = &self.filters {
let mut split = filters.split(':');
match (split.next(), split.next()) {
(Some(_), None) | (Some(_), Some("")) => return Err(Error::MissingFilterValue),
(Some(attr), Some(value)) => {
let ref_reader = reader;
let ref_index = &self.index;
let value = value.trim().to_lowercase();
let attr = match schema.attribute(attr) {
Some(attr) => attr,
None => return Err(Error::UnknownFilteredAttribute),
};
query_builder.with_filter(move |id| {
let attr = attr;
let index = ref_index;
let reader = ref_reader;
match index.document_attribute::<Value>(reader, id, attr) {
Ok(Some(Value::String(s))) => s.to_lowercase() == value,
Ok(Some(Value::Bool(b))) => {
(value == "true" && b) || (value == "false" && !b)
}
Ok(Some(Value::Array(a))) => {
a.into_iter().any(|s| s.as_str() == Some(&value))
}
_ => false,
}
});
}
(_, _) => (),
}
}
query_builder.with_fetch_timeout(self.timeout);
let docs =
query_builder.query(reader, &self.query, self.offset..(self.offset + self.limit));
let mut hits = Vec::with_capacity(self.limit);
for doc in docs.map_err(|e| Error::SearchDocuments(e.to_string()))? {
// retrieve the content of document in kv store
let mut fields: Option<HashSet<&str>> = None;
if let Some(attributes_to_retrieve) = &self.attributes_to_retrieve {
let mut set = HashSet::new();
for field in attributes_to_retrieve {
set.insert(field.as_str());
}
fields = Some(set);
}
let document: IndexMap<String, Value> = self
.index
.document(reader, fields.as_ref(), doc.id)
.map_err(|e| Error::RetrieveDocument(doc.id.0, e.to_string()))?
.ok_or(Error::DocumentNotFound(doc.id.0))?;
let mut formatted = document.clone();
let mut matches = doc.highlights.clone();
// Crops fields if needed
if let Some(fields) = &self.attributes_to_crop {
crop_document(&mut formatted, &mut matches, &schema, fields);
}
// Transform to readable matches
let matches = calculate_matches(matches, self.attributes_to_retrieve.clone(), &schema);
if !self.matches {
if let Some(attributes_to_highlight) = &self.attributes_to_highlight {
formatted = calculate_highlights(&formatted, &matches, attributes_to_highlight);
}
}
let matches_info = if self.matches { Some(matches) } else { None };
let hit = SearchHit {
document,
formatted,
matches_info,
};
hits.push(hit);
}
let time_ms = start.elapsed().as_millis() as usize;
let results = SearchResult {
hits,
offset: self.offset,
limit: self.limit,
processing_time_ms: time_ms,
query: self.query.to_string(),
};
Ok(results)
}
pub fn get_criteria(
&self,
reader: &heed::RoTxn<MainT>,
ranked_map: &'a RankedMap,
schema: &Schema,
) -> Result<Option<Criteria<'a>>, Error> {
let current_settings = match self.index.main.customs(reader).unwrap() {
Some(bytes) => bincode::deserialize(bytes).unwrap(),
None => SettingBody::default(),
};
let ranking_rules = &current_settings.ranking_rules;
let ranking_order = &current_settings.ranking_order;
if let Some(ranking_rules) = ranking_rules {
let mut builder = CriteriaBuilder::with_capacity(7 + ranking_rules.len());
if let Some(ranking_rules_order) = ranking_order {
for rule in ranking_rules_order {
match rule.as_str() {
"_sum_of_typos" => builder.push(SumOfTypos),
"_number_of_words" => builder.push(NumberOfWords),
"_word_proximity" => builder.push(WordsProximity),
"_sum_of_words_attribute" => builder.push(SumOfWordsAttribute),
"_sum_of_words_position" => builder.push(SumOfWordsPosition),
"_exact" => builder.push(Exact),
_ => {
let order = match ranking_rules.get(rule.as_str()) {
Some(o) => o,
None => continue,
};
let custom_ranking = match order {
RankingOrdering::Asc => {
SortByAttr::lower_is_better(&ranked_map, &schema, &rule)
.unwrap()
}
RankingOrdering::Dsc => {
SortByAttr::higher_is_better(&ranked_map, &schema, &rule)
.unwrap()
}
};
builder.push(custom_ranking);
}
}
}
builder.push(DocumentId);
return Ok(Some(builder.build()));
} else {
builder.push(SumOfTypos);
builder.push(NumberOfWords);
builder.push(WordsProximity);
builder.push(SumOfWordsAttribute);
builder.push(SumOfWordsPosition);
builder.push(Exact);
for (rule, order) in ranking_rules.iter() {
let custom_ranking = match order {
RankingOrdering::Asc => {
SortByAttr::lower_is_better(&ranked_map, &schema, &rule).unwrap()
}
RankingOrdering::Dsc => {
SortByAttr::higher_is_better(&ranked_map, &schema, &rule).unwrap()
}
};
builder.push(custom_ranking);
}
builder.push(DocumentId);
return Ok(Some(builder.build()));
}
}
Ok(None)
}
}
#[derive(Debug, Clone, Eq, PartialEq, PartialOrd, Serialize, Deserialize)]
pub struct MatchPosition {
pub start: usize,
pub length: usize,
}
impl Ord for MatchPosition {
fn cmp(&self, other: &Self) -> Ordering {
match self.start.cmp(&other.start) {
Ordering::Equal => self.length.cmp(&other.length),
_ => self.start.cmp(&other.start),
}
}
}
pub type HighlightInfos = HashMap<String, Value>;
pub type MatchesInfos = HashMap<String, Vec<MatchPosition>>;
// pub type RankingInfos = HashMap<String, u64>;
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct SearchHit {
#[serde(flatten)]
pub document: IndexMap<String, Value>,
#[serde(rename = "_formatted", skip_serializing_if = "IndexMap::is_empty")]
pub formatted: IndexMap<String, Value>,
#[serde(rename = "_matchesInfo", skip_serializing_if = "Option::is_none")]
pub matches_info: Option<MatchesInfos>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct SearchResult {
pub hits: Vec<SearchHit>,
pub offset: usize,
pub limit: usize,
pub processing_time_ms: usize,
pub query: String,
// pub parsed_query: String,
// pub params: Option<String>,
}
fn crop_text(
text: &str,
matches: impl IntoIterator<Item = Highlight>,
context: usize,
) -> (String, Vec<Highlight>) {
let mut matches = matches.into_iter().peekable();
let char_index = matches.peek().map(|m| m.char_index as usize).unwrap_or(0);
let start = char_index.saturating_sub(context);
let text = text.chars().skip(start).take(context * 2).collect();
let matches = matches
.take_while(|m| (m.char_index as usize) + (m.char_length as usize) <= start + (context * 2))
.map(|match_| Highlight {
char_index: match_.char_index - start as u16,
..match_
})
.collect();
(text, matches)
}
fn crop_document(
document: &mut IndexMap<String, Value>,
matches: &mut Vec<Highlight>,
schema: &Schema,
fields: &HashMap<String, usize>,
) {
matches.sort_unstable_by_key(|m| (m.char_index, m.char_length));
for (field, length) in fields {
let attribute = match schema.attribute(field) {
Some(attribute) => attribute,
None => continue,
};
let selected_matches = matches
.iter()
.filter(|m| SchemaAttr::new(m.attribute) == attribute)
.cloned();
if let Some(Value::String(ref mut original_text)) = document.get_mut(field) {
let (cropped_text, cropped_matches) =
crop_text(original_text, selected_matches, *length);
*original_text = cropped_text;
matches.retain(|m| SchemaAttr::new(m.attribute) != attribute);
matches.extend_from_slice(&cropped_matches);
}
}
}
fn calculate_matches(
matches: Vec<Highlight>,
attributes_to_retrieve: Option<HashSet<String>>,
schema: &Schema,
) -> MatchesInfos {
let mut matches_result: HashMap<String, Vec<MatchPosition>> = HashMap::new();
for m in matches.iter() {
let attribute = schema
.attribute_name(SchemaAttr::new(m.attribute))
.to_string();
if let Some(attributes_to_retrieve) = attributes_to_retrieve.clone() {
if !attributes_to_retrieve.contains(attribute.as_str()) {
continue;
}
};
if let Some(pos) = matches_result.get_mut(&attribute) {
pos.push(MatchPosition {
start: m.char_index as usize,
length: m.char_length as usize,
});
} else {
let mut positions = Vec::new();
positions.push(MatchPosition {
start: m.char_index as usize,
length: m.char_length as usize,
});
matches_result.insert(attribute, positions);
}
}
for (_, val) in matches_result.iter_mut() {
val.sort_unstable();
val.dedup();
}
matches_result
}
fn calculate_highlights(
document: &IndexMap<String, Value>,
matches: &MatchesInfos,
attributes_to_highlight: &HashSet<String>,
) -> IndexMap<String, Value> {
let mut highlight_result = IndexMap::new();
for (attribute, matches) in matches.iter() {
if attributes_to_highlight.contains(attribute) {
if let Some(Value::String(value)) = document.get(attribute) {
let value: Vec<_> = value.chars().collect();
let mut highlighted_value = String::new();
let mut index = 0;
for m in matches {
if m.start >= index {
let before = value.get(index..m.start);
let highlighted = value.get(m.start..(m.start + m.length));
if let (Some(before), Some(highlighted)) = (before, highlighted) {
highlighted_value.extend(before);
highlighted_value.push_str("<em>");
highlighted_value.extend(highlighted);
highlighted_value.push_str("</em>");
index = m.start + m.length;
} else {
error!("value: {:?}; index: {:?}, match: {:?}", value, index, m);
}
}
}
highlighted_value.extend(value[index..].iter());
highlight_result.insert(attribute.to_string(), Value::String(highlighted_value));
};
}
}
highlight_result
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn calculate_highlights() {
let data = r#"{
"title": "Fondation (Isaac ASIMOV)",
"description": "En ce début de trentième millénaire, l'Empire n'a jamais été aussi puissant, aussi étendu à travers toute la galaxie. C'est dans sa capitale, Trantor, que l'éminent savant Hari Seldon invente la psychohistoire, une science toute nouvelle, à base de psychologie et de mathématiques, qui lui permet de prédire l'avenir... C'est-à-dire l'effondrement de l'Empire d'ici cinq siècles et au-delà, trente mille années de chaos et de ténèbres. Pour empêcher cette catastrophe et sauver la civilisation, Seldon crée la Fondation."
}"#;
let document: IndexMap<String, Value> = serde_json::from_str(data).unwrap();
let mut attributes_to_highlight = HashSet::new();
attributes_to_highlight.insert("title".to_string());
attributes_to_highlight.insert("description".to_string());
let mut matches = HashMap::new();
let mut m = Vec::new();
m.push(MatchPosition {
start: 0,
length: 9,
});
matches.insert("title".to_string(), m);
let mut m = Vec::new();
m.push(MatchPosition {
start: 510,
length: 9,
});
matches.insert("description".to_string(), m);
let result = super::calculate_highlights(&document, &matches, &attributes_to_highlight);
let mut result_expected = IndexMap::new();
result_expected.insert(
"title".to_string(),
Value::String("<em>Fondation</em> (Isaac ASIMOV)".to_string()),
);
result_expected.insert("description".to_string(), Value::String("En ce début de trentième millénaire, l'Empire n'a jamais été aussi puissant, aussi étendu à travers toute la galaxie. C'est dans sa capitale, Trantor, que l'éminent savant Hari Seldon invente la psychohistoire, une science toute nouvelle, à base de psychologie et de mathématiques, qui lui permet de prédire l'avenir... C'est-à-dire l'effondrement de l'Empire d'ici cinq siècles et au-delà, trente mille années de chaos et de ténèbres. Pour empêcher cette catastrophe et sauver la civilisation, Seldon crée la <em>Fondation</em>.".to_string()));
assert_eq!(result, result_expected);
}
}

View File

@ -0,0 +1,2 @@
pub mod meilisearch;
pub mod tide;

View File

@ -0,0 +1,117 @@
use crate::error::{ResponseError, SResult};
use crate::models::token::*;
use crate::Data;
use chrono::Utc;
use heed::types::{SerdeBincode, Str};
use meilisearch_core::Index;
use serde_json::Value;
use tide::Context;
pub trait ContextExt {
fn is_allowed(&self, acl: ACL) -> SResult<()>;
fn header(&self, name: &str) -> Result<String, ResponseError>;
fn url_param(&self, name: &str) -> Result<String, ResponseError>;
fn index(&self) -> Result<Index, ResponseError>;
fn identifier(&self) -> Result<String, ResponseError>;
}
impl ContextExt for Context<Data> {
fn is_allowed(&self, acl: ACL) -> SResult<()> {
let api_key = match &self.state().api_key {
Some(api_key) => api_key,
None => return Ok(()),
};
let user_api_key = self.header("X-Meili-API-Key")?;
if user_api_key == *api_key {
return Ok(());
}
let request_index: Option<String> = None; //self.param::<String>("index").ok();
let db = &self.state().db;
let reader = db.main_read_txn().map_err(ResponseError::internal)?;
let token_key = format!("{}{}", TOKEN_PREFIX_KEY, user_api_key);
let token_config = db
.common_store()
.get::<_, Str, SerdeBincode<Token>>(&reader, &token_key)
.map_err(ResponseError::internal)?
.ok_or(ResponseError::invalid_token(format!(
"Api key does not exist: {}",
user_api_key
)))?;
if token_config.revoked {
return Err(ResponseError::invalid_token("token revoked"));
}
if let Some(index) = request_index {
if !token_config
.indexes
.iter()
.any(|r| match_wildcard(&r, &index))
{
return Err(ResponseError::invalid_token(
"token is not allowed to access to this index",
));
}
}
if token_config.expires_at < Utc::now() {
return Err(ResponseError::invalid_token("token expired"));
}
if token_config.acl.contains(&ACL::All) {
return Ok(());
}
if !token_config.acl.contains(&acl) {
return Err(ResponseError::invalid_token("token do not have this ACL"));
}
Ok(())
}
fn header(&self, name: &str) -> Result<String, ResponseError> {
let header = self
.headers()
.get(name)
.ok_or(ResponseError::missing_header(name))?
.to_str()
.map_err(|_| ResponseError::missing_header("X-Meili-API-Key"))?
.to_string();
Ok(header)
}
fn url_param(&self, name: &str) -> Result<String, ResponseError> {
let param = self
.param::<String>(name)
.map_err(|e| ResponseError::bad_parameter(name, e))?;
Ok(param)
}
fn index(&self) -> Result<Index, ResponseError> {
let index_uid = self.url_param("index")?;
let index = self
.state()
.db
.open_index(&index_uid)
.ok_or(ResponseError::index_not_found(index_uid))?;
Ok(index)
}
fn identifier(&self) -> Result<String, ResponseError> {
let name = self
.param::<Value>("identifier")
.as_ref()
.map(meilisearch_core::serde::value_to_string)
.map_err(|e| ResponseError::bad_parameter("identifier", e))?
.ok_or(ResponseError::bad_parameter(
"identifier",
"missing parameter",
))?;
Ok(name)
}
}

View File

@ -0,0 +1,8 @@
pub mod data;
pub mod error;
pub mod helpers;
pub mod models;
pub mod option;
pub mod routes;
pub use self::data::Data;

View File

@ -0,0 +1,54 @@
use std::env::VarError::NotPresent;
use std::{env, thread};
use http::header::HeaderValue;
use log::info;
use main_error::MainError;
use structopt::StructOpt;
use tide::middleware::{CorsMiddleware, CorsOrigin};
use tide_log::RequestLogger;
use meilisearch_http::data::Data;
use meilisearch_http::option::Opt;
use meilisearch_http::routes;
use meilisearch_http::routes::index::index_update_callback;
mod analytics;
#[cfg(target_os = "linux")]
#[global_allocator]
static ALLOC: jemallocator::Jemalloc = jemallocator::Jemalloc;
pub fn main() -> Result<(), MainError> {
env_logger::init();
let opt = Opt::from_args();
let data = Data::new(opt.clone());
if env::var("MEILI_NO_ANALYTICS") == Err(NotPresent) {
thread::spawn(|| analytics::analytics_sender());
}
let data_cloned = data.clone();
data.db.set_update_callback(Box::new(move |name, status| {
index_update_callback(name, &data_cloned, status);
}));
let mut app = tide::App::with_state(data);
app.middleware(
CorsMiddleware::new()
.allow_origin(CorsOrigin::from("*"))
.allow_methods(HeaderValue::from_static("GET, POST, OPTIONS")),
);
app.middleware(RequestLogger::new());
app.middleware(tide_compression::Compression::new());
app.middleware(tide_compression::Decompression::new());
routes::load_routes(&mut app);
info!("Server HTTP enabled");
app.run(opt.http_addr)?;
Ok(())
}

View File

@ -0,0 +1,3 @@
pub mod schema;
pub mod token;
pub mod update_operation;

View File

@ -0,0 +1,118 @@
use std::collections::HashSet;
use indexmap::IndexMap;
use meilisearch_schema::{Schema, SchemaBuilder, SchemaProps};
use serde::{Deserialize, Serialize};
#[derive(Debug, Clone, Eq, PartialEq, Hash, Serialize, Deserialize)]
#[serde(rename_all = "camelCase")]
pub enum FieldProperties {
Identifier,
Indexed,
Displayed,
Ranked,
}
#[derive(Debug, Clone, Eq, PartialEq, Serialize, Deserialize)]
pub struct SchemaBody(IndexMap<String, HashSet<FieldProperties>>);
impl From<Schema> for SchemaBody {
fn from(value: Schema) -> SchemaBody {
let mut map = IndexMap::new();
for (name, _attr, props) in value.iter() {
let old_properties = map.entry(name.to_owned()).or_insert(HashSet::new());
if props.is_indexed() {
old_properties.insert(FieldProperties::Indexed);
}
if props.is_displayed() {
old_properties.insert(FieldProperties::Displayed);
}
if props.is_ranked() {
old_properties.insert(FieldProperties::Ranked);
}
}
let old_properties = map
.entry(value.identifier_name().to_string())
.or_insert(HashSet::new());
old_properties.insert(FieldProperties::Identifier);
old_properties.insert(FieldProperties::Displayed);
SchemaBody(map)
}
}
impl Into<Schema> for SchemaBody {
fn into(self) -> Schema {
let mut identifier = "documentId".to_string();
let mut attributes = IndexMap::new();
for (field, properties) in self.0 {
let mut indexed = false;
let mut displayed = false;
let mut ranked = false;
for property in properties {
match property {
FieldProperties::Indexed => indexed = true,
FieldProperties::Displayed => displayed = true,
FieldProperties::Ranked => ranked = true,
FieldProperties::Identifier => identifier = field.clone(),
}
}
attributes.insert(
field,
SchemaProps {
indexed,
displayed,
ranked,
},
);
}
let mut builder = SchemaBuilder::with_identifier(identifier);
for (field, props) in attributes {
builder.new_attribute(field, props);
}
builder.build()
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_schema_body_conversion() {
let schema_body = r#"
{
"id": ["identifier", "indexed", "displayed"],
"title": ["indexed", "displayed"],
"date": ["displayed"]
}
"#;
let schema_builder = r#"
{
"identifier": "id",
"attributes": {
"id": {
"indexed": true,
"displayed": true
},
"title": {
"indexed": true,
"displayed": true
},
"date": {
"displayed": true
}
}
}
"#;
let schema_body: SchemaBody = serde_json::from_str(schema_body).unwrap();
let schema_builder: SchemaBuilder = serde_json::from_str(schema_builder).unwrap();
let schema_from_body: Schema = schema_body.into();
let schema_from_builder: Schema = schema_builder.build();
assert_eq!(schema_from_body, schema_from_builder);
}
}

View File

@ -0,0 +1,72 @@
use chrono::{DateTime, Utc};
use serde::{Deserialize, Serialize};
pub const TOKEN_PREFIX_KEY: &str = "_token_";
#[derive(Debug, Clone, Eq, PartialEq, Serialize, Deserialize)]
#[serde(rename_all = "camelCase")]
pub enum ACL {
IndexesRead,
IndexesWrite,
DocumentsRead,
DocumentsWrite,
SettingsRead,
SettingsWrite,
Admin,
#[serde(rename = "*")]
All,
}
pub type Wildcard = String;
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct Token {
pub key: String,
pub description: String,
pub acl: Vec<ACL>,
pub indexes: Vec<Wildcard>,
pub created_at: DateTime<Utc>,
pub updated_at: DateTime<Utc>,
pub expires_at: DateTime<Utc>,
pub revoked: bool,
}
fn cleanup_wildcard(input: &str) -> (bool, &str, bool) {
let first = input.chars().next().filter(|&c| c == '*').is_some();
let last = input.chars().last().filter(|&c| c == '*').is_some();
let bound_last = std::cmp::max(input.len().saturating_sub(last as usize), first as usize);
let output = input.get(first as usize..bound_last).unwrap();
(first, output, last)
}
pub fn match_wildcard(pattern: &str, input: &str) -> bool {
let (first, pattern, last) = cleanup_wildcard(pattern);
match (first, last) {
(false, false) => pattern == input,
(true, false) => input.ends_with(pattern),
(false, true) => input.starts_with(pattern),
(true, true) => input.contains(pattern),
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_match_wildcard() {
assert!(match_wildcard("*", "qqq"));
assert!(match_wildcard("*", ""));
assert!(match_wildcard("*ab", "qqqab"));
assert!(match_wildcard("*ab*", "qqqabqq"));
assert!(match_wildcard("ab*", "abqqq"));
assert!(match_wildcard("**", "ab"));
assert!(match_wildcard("ab", "ab"));
assert!(match_wildcard("ab*", "ab"));
assert!(match_wildcard("*ab", "ab"));
assert!(match_wildcard("*ab*", "ab"));
assert!(match_wildcard("*😆*", "ab😆dsa"));
}
}

View File

@ -0,0 +1,33 @@
use std::fmt;
#[allow(dead_code)]
#[derive(Debug)]
pub enum UpdateOperation {
ClearAllDocuments,
DocumentsAddition,
DocumentsDeletion,
SynonymsAddition,
SynonymsDeletion,
StopWordsAddition,
StopWordsDeletion,
Schema,
Config,
}
impl fmt::Display for UpdateOperation {
fn fmt(&self, f: &mut fmt::Formatter) -> std::fmt::Result {
use UpdateOperation::*;
match self {
ClearAllDocuments => write!(f, "ClearAllDocuments"),
DocumentsAddition => write!(f, "DocumentsAddition"),
DocumentsDeletion => write!(f, "DocumentsDeletion"),
SynonymsAddition => write!(f, "SynonymsAddition"),
SynonymsDeletion => write!(f, "SynonymsDelettion"),
StopWordsAddition => write!(f, "StopWordsAddition"),
StopWordsDeletion => write!(f, "StopWordsDeletion"),
Schema => write!(f, "Schema"),
Config => write!(f, "Config"),
}
}
}

View File

@ -0,0 +1,20 @@
use structopt::StructOpt;
#[derive(Debug, Clone, StructOpt)]
pub struct Opt {
/// The destination where the database must be created.
#[structopt(long, env = "MEILI_DB_PATH", default_value = "./data.ms")]
pub db_path: String,
/// The address on which the http server will listen.
#[structopt(long, env = "MEILI_HTTP_ADDR", default_value = "127.0.0.1:7700")]
pub http_addr: String,
/// The master key allowing you to do everything on the server.
#[structopt(long, env = "MEILI_API_KEY")]
pub api_key: Option<String>,
/// Do not send analytics to Meili.
#[structopt(long, env = "MEILI_NO_ANALYTICS")]
pub no_analytics: bool,
}

View File

@ -0,0 +1,245 @@
use std::collections::{BTreeSet, HashSet};
use http::StatusCode;
use indexmap::IndexMap;
use serde::{Deserialize, Serialize};
use serde_json::Value;
use tide::querystring::ContextExt as QSContextExt;
use tide::response::IntoResponse;
use tide::{Context, Response};
use crate::error::{ResponseError, SResult};
use crate::helpers::tide::ContextExt;
use crate::models::token::ACL::*;
use crate::Data;
pub async fn get_document(ctx: Context<Data>) -> SResult<Response> {
ctx.is_allowed(DocumentsRead)?;
let index = ctx.index()?;
let identifier = ctx.identifier()?;
let document_id = meilisearch_core::serde::compute_document_id(identifier.clone());
let db = &ctx.state().db;
let reader = db.main_read_txn().map_err(ResponseError::internal)?;
let response = index
.document::<IndexMap<String, Value>>(&reader, None, document_id)
.map_err(ResponseError::internal)?
.ok_or(ResponseError::document_not_found(&identifier))?;
if response.is_empty() {
return Err(ResponseError::document_not_found(identifier));
}
Ok(tide::response::json(response))
}
#[derive(Default, Serialize)]
#[serde(rename_all = "camelCase")]
pub struct IndexUpdateResponse {
pub update_id: u64,
}
pub async fn delete_document(ctx: Context<Data>) -> SResult<Response> {
ctx.is_allowed(DocumentsWrite)?;
let index = ctx.index()?;
let identifier = ctx.identifier()?;
let document_id = meilisearch_core::serde::compute_document_id(identifier.clone());
let db = &ctx.state().db;
let mut update_writer = db.update_write_txn().map_err(ResponseError::internal)?;
let mut documents_deletion = index.documents_deletion();
documents_deletion.delete_document_by_id(document_id);
let update_id = documents_deletion
.finalize(&mut update_writer)
.map_err(ResponseError::internal)?;
update_writer.commit().map_err(ResponseError::internal)?;
let response_body = IndexUpdateResponse { update_id };
Ok(tide::response::json(response_body)
.with_status(StatusCode::ACCEPTED)
.into_response())
}
#[derive(Default, Deserialize)]
#[serde(rename_all = "camelCase", deny_unknown_fields)]
struct BrowseQuery {
offset: Option<usize>,
limit: Option<usize>,
attributes_to_retrieve: Option<String>,
}
pub async fn get_all_documents(ctx: Context<Data>) -> SResult<Response> {
ctx.is_allowed(DocumentsRead)?;
let index = ctx.index()?;
let query: BrowseQuery = ctx.url_query().unwrap_or(BrowseQuery::default());
let offset = query.offset.unwrap_or(0);
let limit = query.limit.unwrap_or(20);
let db = &ctx.state().db;
let reader = db.main_read_txn().map_err(ResponseError::internal)?;
let documents_ids: Result<BTreeSet<_>, _> =
match index.documents_fields_counts.documents_ids(&reader) {
Ok(documents_ids) => documents_ids.skip(offset).take(limit).collect(),
Err(e) => return Err(ResponseError::internal(e)),
};
let documents_ids = match documents_ids {
Ok(documents_ids) => documents_ids,
Err(e) => return Err(ResponseError::internal(e)),
};
let mut response_body = Vec::<IndexMap<String, Value>>::new();
if let Some(attributes) = query.attributes_to_retrieve {
let attributes = attributes.split(',').collect::<HashSet<&str>>();
for document_id in documents_ids {
if let Ok(Some(document)) = index.document(&reader, Some(&attributes), document_id) {
response_body.push(document);
}
}
} else {
for document_id in documents_ids {
if let Ok(Some(document)) = index.document(&reader, None, document_id) {
response_body.push(document);
}
}
}
Ok(tide::response::json(response_body))
}
fn infered_schema(document: &IndexMap<String, Value>) -> Option<meilisearch_schema::Schema> {
use meilisearch_schema::{SchemaBuilder, DISPLAYED, INDEXED};
let mut identifier = None;
for key in document.keys() {
if identifier.is_none() && key.to_lowercase().contains("id") {
identifier = Some(key);
}
}
match identifier {
Some(identifier) => {
let mut builder = SchemaBuilder::with_identifier(identifier);
for key in document.keys() {
builder.new_attribute(key, DISPLAYED | INDEXED);
}
Some(builder.build())
}
None => None,
}
}
async fn update_multiple_documents(mut ctx: Context<Data>, is_partial: bool) -> SResult<Response> {
ctx.is_allowed(DocumentsWrite)?;
let data: Vec<IndexMap<String, Value>> =
ctx.body_json().await.map_err(ResponseError::bad_request)?;
let index = ctx.index()?;
let db = &ctx.state().db;
let reader = db.main_read_txn().map_err(ResponseError::internal)?;
let mut update_writer = db.update_write_txn().map_err(ResponseError::internal)?;
let current_schema = index
.main
.schema(&reader)
.map_err(ResponseError::internal)?;
if current_schema.is_none() {
match data.first().and_then(infered_schema) {
Some(schema) => {
index
.schema_update(&mut update_writer, schema)
.map_err(ResponseError::internal)?;
}
None => return Err(ResponseError::bad_request("Could not infer a schema")),
}
}
let mut document_addition = if is_partial {
index.documents_partial_addition()
} else {
index.documents_addition()
};
for document in data {
document_addition.update_document(document);
}
let update_id = document_addition
.finalize(&mut update_writer)
.map_err(ResponseError::internal)?;
update_writer.commit().map_err(ResponseError::internal)?;
let response_body = IndexUpdateResponse { update_id };
Ok(tide::response::json(response_body)
.with_status(StatusCode::ACCEPTED)
.into_response())
}
pub async fn add_or_replace_multiple_documents(ctx: Context<Data>) -> SResult<Response> {
update_multiple_documents(ctx, false).await
}
pub async fn add_or_update_multiple_documents(ctx: Context<Data>) -> SResult<Response> {
update_multiple_documents(ctx, true).await
}
pub async fn delete_multiple_documents(mut ctx: Context<Data>) -> SResult<Response> {
ctx.is_allowed(DocumentsWrite)?;
let data: Vec<Value> = ctx.body_json().await.map_err(ResponseError::bad_request)?;
let index = ctx.index()?;
let db = &ctx.state().db;
let mut writer = db.update_write_txn().map_err(ResponseError::internal)?;
let mut documents_deletion = index.documents_deletion();
for identifier in data {
if let Some(identifier) = meilisearch_core::serde::value_to_string(&identifier) {
documents_deletion
.delete_document_by_id(meilisearch_core::serde::compute_document_id(identifier));
}
}
let update_id = documents_deletion
.finalize(&mut writer)
.map_err(ResponseError::internal)?;
writer.commit().map_err(ResponseError::internal)?;
let response_body = IndexUpdateResponse { update_id };
Ok(tide::response::json(response_body)
.with_status(StatusCode::ACCEPTED)
.into_response())
}
pub async fn clear_all_documents(ctx: Context<Data>) -> SResult<Response> {
ctx.is_allowed(DocumentsWrite)?;
let index = ctx.index()?;
let db = &ctx.state().db;
let mut writer = db.update_write_txn().map_err(ResponseError::internal)?;
let update_id = index
.clear_all(&mut writer)
.map_err(ResponseError::internal)?;
writer.commit().map_err(ResponseError::internal)?;
let response_body = IndexUpdateResponse { update_id };
Ok(tide::response::json(response_body)
.with_status(StatusCode::ACCEPTED)
.into_response())
}

View File

@ -0,0 +1,76 @@
use crate::error::{ResponseError, SResult};
use crate::helpers::tide::ContextExt;
use crate::models::token::ACL::*;
use crate::Data;
use heed::types::{Str, Unit};
use serde::Deserialize;
use tide::Context;
const UNHEALTHY_KEY: &str = "_is_unhealthy";
pub async fn get_health(ctx: Context<Data>) -> SResult<()> {
let db = &ctx.state().db;
let reader = db.main_read_txn().map_err(ResponseError::internal)?;
let common_store = ctx.state().db.common_store();
if let Ok(Some(_)) = common_store.get::<_, Str, Unit>(&reader, UNHEALTHY_KEY) {
return Err(ResponseError::Maintenance);
}
Ok(())
}
pub async fn set_healthy(ctx: Context<Data>) -> SResult<()> {
ctx.is_allowed(Admin)?;
let db = &ctx.state().db;
let mut writer = db.main_write_txn().map_err(ResponseError::internal)?;
let common_store = ctx.state().db.common_store();
match common_store.delete::<_, Str>(&mut writer, UNHEALTHY_KEY) {
Ok(_) => (),
Err(e) => return Err(ResponseError::internal(e)),
}
if let Err(e) = writer.commit() {
return Err(ResponseError::internal(e));
}
Ok(())
}
pub async fn set_unhealthy(ctx: Context<Data>) -> SResult<()> {
ctx.is_allowed(Admin)?;
let db = &ctx.state().db;
let mut writer = db.main_write_txn().map_err(ResponseError::internal)?;
let common_store = ctx.state().db.common_store();
if let Err(e) = common_store.put::<_, Str, Unit>(&mut writer, UNHEALTHY_KEY, &()) {
return Err(ResponseError::internal(e));
}
if let Err(e) = writer.commit() {
return Err(ResponseError::internal(e));
}
Ok(())
}
#[derive(Deserialize, Clone)]
struct HealtBody {
health: bool,
}
pub async fn change_healthyness(mut ctx: Context<Data>) -> SResult<()> {
let body: HealtBody = ctx.body_json().await.map_err(ResponseError::bad_request)?;
if body.health {
set_healthy(ctx).await
} else {
set_unhealthy(ctx).await
}
}

View File

@ -0,0 +1,439 @@
use chrono::{DateTime, Utc};
use http::StatusCode;
use log::error;
use meilisearch_core::ProcessedUpdateResult;
use meilisearch_schema::{Schema, SchemaBuilder};
use rand::seq::SliceRandom;
use serde::{Deserialize, Serialize};
use serde_json::json;
use tide::querystring::ContextExt as QSContextExt;
use tide::response::IntoResponse;
use tide::{Context, Response};
use crate::error::{ResponseError, SResult};
use crate::helpers::tide::ContextExt;
use crate::models::schema::SchemaBody;
use crate::models::token::ACL::*;
use crate::routes::document::IndexUpdateResponse;
use crate::Data;
fn generate_uid() -> String {
let mut rng = rand::thread_rng();
let sample = b"abcdefghijklmnopqrstuvwxyz0123456789";
sample
.choose_multiple(&mut rng, 8)
.map(|c| *c as char)
.collect()
}
pub async fn list_indexes(ctx: Context<Data>) -> SResult<Response> {
ctx.is_allowed(IndexesRead)?;
let indexes_uids = ctx.state().db.indexes_uids();
let db = &ctx.state().db;
let reader = db.main_read_txn().map_err(ResponseError::internal)?;
let mut response_body = Vec::new();
for index_uid in indexes_uids {
let index = ctx.state().db.open_index(&index_uid);
match index {
Some(index) => {
let name = index
.main
.name(&reader)
.map_err(ResponseError::internal)?
.ok_or(ResponseError::internal("'name' not found"))?;
let created_at = index
.main
.created_at(&reader)
.map_err(ResponseError::internal)?
.ok_or(ResponseError::internal("'created_at' date not found"))?;
let updated_at = index
.main
.updated_at(&reader)
.map_err(ResponseError::internal)?
.ok_or(ResponseError::internal("'updated_at' date not found"))?;
let index_reponse = IndexResponse {
name,
uid: index_uid,
created_at,
updated_at,
};
response_body.push(index_reponse);
}
None => error!(
"Index {} is referenced in the indexes list but cannot be found",
index_uid
),
}
}
Ok(tide::response::json(response_body))
}
#[derive(Debug, Serialize)]
#[serde(rename_all = "camelCase")]
struct IndexResponse {
name: String,
uid: String,
created_at: DateTime<Utc>,
updated_at: DateTime<Utc>,
}
pub async fn get_index(ctx: Context<Data>) -> SResult<Response> {
ctx.is_allowed(IndexesRead)?;
let index = ctx.index()?;
let db = &ctx.state().db;
let reader = db.main_read_txn().map_err(ResponseError::internal)?;
let uid = ctx.url_param("index")?;
let name = index
.main
.name(&reader)
.map_err(ResponseError::internal)?
.ok_or(ResponseError::internal("'name' not found"))?;
let created_at = index
.main
.created_at(&reader)
.map_err(ResponseError::internal)?
.ok_or(ResponseError::internal("'created_at' date not found"))?;
let updated_at = index
.main
.updated_at(&reader)
.map_err(ResponseError::internal)?
.ok_or(ResponseError::internal("'updated_at' date not found"))?;
let response_body = IndexResponse {
name,
uid,
created_at,
updated_at,
};
Ok(tide::response::json(response_body))
}
#[derive(Debug, Deserialize)]
#[serde(rename_all = "camelCase", deny_unknown_fields)]
struct IndexCreateRequest {
name: String,
uid: Option<String>,
schema: Option<SchemaBody>,
}
#[derive(Debug, Serialize)]
#[serde(rename_all = "camelCase")]
struct IndexCreateResponse {
name: String,
uid: String,
schema: Option<SchemaBody>,
#[serde(skip_serializing_if = "Option::is_none")]
update_id: Option<u64>,
created_at: DateTime<Utc>,
updated_at: DateTime<Utc>,
}
pub async fn create_index(mut ctx: Context<Data>) -> SResult<Response> {
ctx.is_allowed(IndexesWrite)?;
let body = ctx
.body_json::<IndexCreateRequest>()
.await
.map_err(ResponseError::bad_request)?;
let db = &ctx.state().db;
let uid = match body.uid {
Some(uid) => uid,
None => loop {
let uid = generate_uid();
if db.open_index(&uid).is_none() {
break uid;
}
},
};
let created_index = match db.create_index(&uid) {
Ok(index) => index,
Err(e) => return Err(ResponseError::create_index(e)),
};
let mut writer = db.main_write_txn().map_err(ResponseError::internal)?;
let mut update_writer = db.update_write_txn().map_err(ResponseError::internal)?;
created_index
.main
.put_name(&mut writer, &body.name)
.map_err(ResponseError::internal)?;
created_index
.main
.put_created_at(&mut writer)
.map_err(ResponseError::internal)?;
created_index
.main
.put_updated_at(&mut writer)
.map_err(ResponseError::internal)?;
let schema: Option<Schema> = body.schema.clone().map(Into::into);
let mut response_update_id = None;
if let Some(schema) = schema {
let update_id = created_index
.schema_update(&mut update_writer, schema)
.map_err(ResponseError::internal)?;
response_update_id = Some(update_id)
}
writer.commit().map_err(ResponseError::internal)?;
update_writer.commit().map_err(ResponseError::internal)?;
let response_body = IndexCreateResponse {
name: body.name,
uid,
schema: body.schema,
update_id: response_update_id,
created_at: Utc::now(),
updated_at: Utc::now(),
};
Ok(tide::response::json(response_body)
.with_status(StatusCode::CREATED)
.into_response())
}
#[derive(Debug, Deserialize)]
#[serde(rename_all = "camelCase", deny_unknown_fields)]
struct UpdateIndexRequest {
name: String,
}
#[derive(Debug, Serialize)]
#[serde(rename_all = "camelCase")]
struct UpdateIndexResponse {
name: String,
uid: String,
created_at: DateTime<Utc>,
updated_at: DateTime<Utc>,
}
pub async fn update_index(mut ctx: Context<Data>) -> SResult<Response> {
ctx.is_allowed(IndexesWrite)?;
let body = ctx
.body_json::<UpdateIndexRequest>()
.await
.map_err(ResponseError::bad_request)?;
let index_uid = ctx.url_param("index")?;
let index = ctx.index()?;
let db = &ctx.state().db;
let mut writer = db.main_write_txn().map_err(ResponseError::internal)?;
index
.main
.put_name(&mut writer, &body.name)
.map_err(ResponseError::internal)?;
index
.main
.put_updated_at(&mut writer)
.map_err(ResponseError::internal)?;
writer.commit().map_err(ResponseError::internal)?;
let reader = db.main_read_txn().map_err(ResponseError::internal)?;
let created_at = index
.main
.created_at(&reader)
.map_err(ResponseError::internal)?
.ok_or(ResponseError::internal("'created_at' date not found"))?;
let updated_at = index
.main
.updated_at(&reader)
.map_err(ResponseError::internal)?
.ok_or(ResponseError::internal("'updated_at' date not found"))?;
let response_body = UpdateIndexResponse {
name: body.name,
uid: index_uid,
created_at,
updated_at,
};
Ok(tide::response::json(response_body)
.with_status(StatusCode::ACCEPTED)
.into_response())
}
#[derive(Default, Deserialize)]
#[serde(rename_all = "camelCase", deny_unknown_fields)]
struct SchemaParams {
raw: bool,
}
pub async fn get_index_schema(ctx: Context<Data>) -> SResult<Response> {
ctx.is_allowed(IndexesRead)?;
let index = ctx.index()?;
// Tide doesn't support "no query param"
let params: SchemaParams = ctx.url_query().unwrap_or_default();
let db = &ctx.state().db;
let reader = db.main_read_txn().map_err(ResponseError::internal)?;
let schema = index
.main
.schema(&reader)
.map_err(ResponseError::open_index)?;
match schema {
Some(schema) => {
if params.raw {
Ok(tide::response::json(schema))
} else {
Ok(tide::response::json(SchemaBody::from(schema)))
}
}
None => Err(ResponseError::not_found("missing index schema")),
}
}
pub async fn update_schema(mut ctx: Context<Data>) -> SResult<Response> {
ctx.is_allowed(IndexesWrite)?;
let index_uid = ctx.url_param("index")?;
let params: SchemaParams = ctx.url_query().unwrap_or_default();
let schema = if params.raw {
ctx.body_json::<SchemaBuilder>()
.await
.map_err(ResponseError::bad_request)?
.build()
} else {
ctx.body_json::<SchemaBody>()
.await
.map_err(ResponseError::bad_request)?
.into()
};
let db = &ctx.state().db;
let mut writer = db.update_write_txn().map_err(ResponseError::internal)?;
let index = db
.open_index(&index_uid)
.ok_or(ResponseError::index_not_found(index_uid))?;
let update_id = index
.schema_update(&mut writer, schema.clone())
.map_err(ResponseError::internal)?;
writer.commit().map_err(ResponseError::internal)?;
let response_body = IndexUpdateResponse { update_id };
Ok(tide::response::json(response_body)
.with_status(StatusCode::ACCEPTED)
.into_response())
}
pub async fn get_update_status(ctx: Context<Data>) -> SResult<Response> {
ctx.is_allowed(IndexesRead)?;
let db = &ctx.state().db;
let reader = db.update_read_txn().map_err(ResponseError::internal)?;
let update_id = ctx
.param::<u64>("update_id")
.map_err(|e| ResponseError::bad_parameter("update_id", e))?;
let index = ctx.index()?;
let status = index
.update_status(&reader, update_id)
.map_err(ResponseError::internal)?;
let response = match status {
Some(status) => tide::response::json(status)
.with_status(StatusCode::OK)
.into_response(),
None => tide::response::json(json!({ "message": "unknown update id" }))
.with_status(StatusCode::NOT_FOUND)
.into_response(),
};
Ok(response)
}
pub async fn get_all_updates_status(ctx: Context<Data>) -> SResult<Response> {
ctx.is_allowed(IndexesRead)?;
let db = &ctx.state().db;
let reader = db.update_read_txn().map_err(ResponseError::internal)?;
let index = ctx.index()?;
let all_status = index
.all_updates_status(&reader)
.map_err(ResponseError::internal)?;
let response = tide::response::json(all_status)
.with_status(StatusCode::OK)
.into_response();
Ok(response)
}
pub async fn delete_index(ctx: Context<Data>) -> SResult<StatusCode> {
ctx.is_allowed(IndexesWrite)?;
let index_uid = ctx.url_param("index")?;
let found = ctx
.state()
.db
.delete_index(&index_uid)
.map_err(ResponseError::internal)?;
if found {
Ok(StatusCode::NO_CONTENT)
} else {
Ok(StatusCode::NOT_FOUND)
}
}
pub fn index_update_callback(index_uid: &str, data: &Data, status: ProcessedUpdateResult) {
if status.error.is_some() {
return;
}
if let Some(index) = data.db.open_index(&index_uid) {
let db = &data.db;
let mut writer = match db.main_write_txn() {
Ok(writer) => writer,
Err(e) => {
error!("Impossible to get write_txn; {}", e);
return;
}
};
if let Err(e) = data.compute_stats(&mut writer, &index_uid) {
error!("Impossible to compute stats; {}", e)
}
if let Err(e) = data.set_last_update(&mut writer) {
error!("Impossible to update last_update; {}", e)
}
if let Err(e) = index.main.put_updated_at(&mut writer) {
error!("Impossible to update updated_at; {}", e)
}
if let Err(e) = writer.commit() {
error!("Impossible to get write_txn; {}", e);
}
}
}

View File

@ -0,0 +1,194 @@
use chrono::serde::ts_seconds;
use chrono::{DateTime, Utc};
use heed::types::{SerdeBincode, Str};
use http::StatusCode;
use rand::seq::SliceRandom;
use serde::{Deserialize, Serialize};
use tide::response::IntoResponse;
use tide::{Context, Response};
use crate::error::{ResponseError, SResult};
use crate::helpers::tide::ContextExt;
use crate::models::token::ACL::*;
use crate::models::token::*;
use crate::Data;
fn generate_api_key() -> String {
let mut rng = rand::thread_rng();
let sample = b"ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789";
sample
.choose_multiple(&mut rng, 40)
.map(|c| *c as char)
.collect()
}
pub async fn list(ctx: Context<Data>) -> SResult<Response> {
ctx.is_allowed(Admin)?;
let db = &ctx.state().db;
let reader = db.main_read_txn().map_err(ResponseError::internal)?;
let common_store = db.common_store();
let mut response: Vec<Token> = Vec::new();
let iter = common_store
.prefix_iter::<_, Str, SerdeBincode<Token>>(&reader, TOKEN_PREFIX_KEY)
.map_err(ResponseError::internal)?;
for result in iter {
let (_, token) = result.map_err(ResponseError::internal)?;
response.push(token);
}
Ok(tide::response::json(response))
}
pub async fn get(ctx: Context<Data>) -> SResult<Response> {
ctx.is_allowed(Admin)?;
let request_key = ctx.url_param("key")?;
let db = &ctx.state().db;
let reader = db.main_read_txn().map_err(ResponseError::internal)?;
let token_key = format!("{}{}", TOKEN_PREFIX_KEY, request_key);
let token_config = db
.common_store()
.get::<_, Str, SerdeBincode<Token>>(&reader, &token_key)
.map_err(ResponseError::internal)?
.ok_or(ResponseError::not_found(format!(
"token key: {}",
token_key
)))?;
Ok(tide::response::json(token_config))
}
#[derive(Debug, Serialize, Deserialize)]
#[serde(rename_all = "camelCase", deny_unknown_fields)]
pub struct CreatedRequest {
description: String,
acl: Vec<ACL>,
indexes: Vec<Wildcard>,
#[serde(with = "ts_seconds")]
expires_at: DateTime<Utc>,
}
pub async fn create(mut ctx: Context<Data>) -> SResult<Response> {
ctx.is_allowed(Admin)?;
let data: CreatedRequest = ctx.body_json().await.map_err(ResponseError::bad_request)?;
let key = generate_api_key();
let token_key = format!("{}{}", TOKEN_PREFIX_KEY, key);
let token_definition = Token {
key,
description: data.description,
acl: data.acl,
indexes: data.indexes,
expires_at: data.expires_at,
created_at: Utc::now(),
updated_at: Utc::now(),
revoked: false,
};
let db = &ctx.state().db;
let mut writer = db.main_write_txn().map_err(ResponseError::internal)?;
db.common_store()
.put::<_, Str, SerdeBincode<Token>>(&mut writer, &token_key, &token_definition)
.map_err(ResponseError::internal)?;
writer.commit().map_err(ResponseError::internal)?;
Ok(tide::response::json(token_definition)
.with_status(StatusCode::CREATED)
.into_response())
}
#[derive(Debug, Serialize, Deserialize)]
#[serde(rename_all = "camelCase", deny_unknown_fields)]
pub struct UpdatedRequest {
description: Option<String>,
acl: Option<Vec<ACL>>,
indexes: Option<Vec<Wildcard>>,
expires_at: Option<DateTime<Utc>>,
revoked: Option<bool>,
}
pub async fn update(mut ctx: Context<Data>) -> SResult<Response> {
ctx.is_allowed(Admin)?;
let request_key = ctx.url_param("key")?;
let data: UpdatedRequest = ctx.body_json().await.map_err(ResponseError::bad_request)?;
let db = &ctx.state().db;
let mut writer = db.main_write_txn().map_err(ResponseError::internal)?;
let common_store = db.common_store();
let token_key = format!("{}{}", TOKEN_PREFIX_KEY, request_key);
let mut token_config = common_store
.get::<_, Str, SerdeBincode<Token>>(&writer, &token_key)
.map_err(ResponseError::internal)?
.ok_or(ResponseError::not_found(format!(
"token key: {}",
token_key
)))?;
// apply the modifications
if let Some(description) = data.description {
token_config.description = description;
}
if let Some(acl) = data.acl {
token_config.acl = acl;
}
if let Some(indexes) = data.indexes {
token_config.indexes = indexes;
}
if let Some(expires_at) = data.expires_at {
token_config.expires_at = expires_at;
}
if let Some(revoked) = data.revoked {
token_config.revoked = revoked;
}
token_config.updated_at = Utc::now();
common_store
.put::<_, Str, SerdeBincode<Token>>(&mut writer, &token_key, &token_config)
.map_err(ResponseError::internal)?;
writer.commit().map_err(ResponseError::internal)?;
Ok(tide::response::json(token_config)
.with_status(StatusCode::OK)
.into_response())
}
pub async fn delete(ctx: Context<Data>) -> SResult<StatusCode> {
ctx.is_allowed(Admin)?;
let request_key = ctx.url_param("key")?;
let db = &ctx.state().db;
let mut writer = db.main_write_txn().map_err(ResponseError::internal)?;
let common_store = db.common_store();
let token_key = format!("{}{}", TOKEN_PREFIX_KEY, request_key);
common_store
.delete::<_, Str>(&mut writer, &token_key)
.map_err(ResponseError::internal)?;
writer.commit().map_err(ResponseError::internal)?;
Ok(StatusCode::NO_CONTENT)
}

View File

@ -0,0 +1,122 @@
use crate::data::Data;
pub mod document;
pub mod health;
pub mod index;
pub mod key;
pub mod search;
pub mod setting;
pub mod stats;
pub mod stop_words;
pub mod synonym;
pub fn load_routes(app: &mut tide::App<Data>) {
app.at("").nest(|router| {
router.at("/indexes").nest(|router| {
router
.at("/")
.get(index::list_indexes)
.post(index::create_index);
router.at("/search").post(search::search_multi_index);
router.at("/:index").nest(|router| {
router.at("/search").get(search::search_with_url_query);
router.at("/updates").nest(|router| {
router.at("/").get(index::get_all_updates_status);
router.at("/:update_id").get(index::get_update_status);
});
router
.at("/")
.get(index::get_index)
.put(index::update_index)
.delete(index::delete_index);
router
.at("/schema")
.get(index::get_index_schema)
.put(index::update_schema);
router.at("/documents").nest(|router| {
router
.at("/")
.get(document::get_all_documents)
.post(document::add_or_replace_multiple_documents)
.put(document::add_or_update_multiple_documents)
.delete(document::clear_all_documents);
router.at("/:identifier").nest(|router| {
router
.at("/")
.get(document::get_document)
.delete(document::delete_document);
});
router
.at("/delete")
.post(document::delete_multiple_documents);
});
router.at("/synonyms").nest(|router| {
router
.at("/")
.get(synonym::list)
.post(synonym::create)
.delete(synonym::clear);
router
.at("/:synonym")
.get(synonym::get)
.put(synonym::update)
.delete(synonym::delete);
router.at("/batch").post(synonym::batch_write);
});
router.at("/stop-words").nest(|router| {
router
.at("/")
.get(stop_words::list)
.patch(stop_words::add)
.delete(stop_words::delete);
});
router
.at("/settings")
.get(setting::get)
.post(setting::update);
});
});
router.at("/keys").nest(|router| {
router.at("/").get(key::list).post(key::create);
router
.at("/:key")
.get(key::get)
.put(key::update)
.delete(key::delete);
});
});
// Private
app.at("").nest(|router| {
router
.at("/health")
.get(health::get_health)
.post(health::set_healthy)
.put(health::change_healthyness)
.delete(health::set_unhealthy);
router.at("/stats").get(stats::get_stats);
router.at("/stats/:index").get(stats::index_stat);
router.at("/version").get(stats::get_version);
router.at("/sys-info").get(stats::get_sys_info);
router
.at("/sys-info/pretty")
.get(stats::get_sys_info_pretty);
});
}

View File

@ -0,0 +1,243 @@
use std::collections::HashMap;
use std::collections::HashSet;
use std::time::Duration;
use meilisearch_core::Index;
use rayon::iter::{IntoParallelIterator, ParallelIterator};
use serde::{Deserialize, Serialize};
use tide::querystring::ContextExt as QSContextExt;
use tide::{Context, Response};
use crate::error::{ResponseError, SResult};
use crate::helpers::meilisearch::{Error, IndexSearchExt, SearchHit};
use crate::helpers::tide::ContextExt;
use crate::Data;
#[derive(Deserialize)]
#[serde(rename_all = "camelCase", deny_unknown_fields)]
struct SearchQuery {
q: String,
offset: Option<usize>,
limit: Option<usize>,
attributes_to_retrieve: Option<String>,
attributes_to_search_in: Option<String>,
attributes_to_crop: Option<String>,
crop_length: Option<usize>,
attributes_to_highlight: Option<String>,
filters: Option<String>,
timeout_ms: Option<u64>,
matches: Option<bool>,
}
pub async fn search_with_url_query(ctx: Context<Data>) -> SResult<Response> {
// ctx.is_allowed(DocumentsRead)?;
let index = ctx.index()?;
let db = &ctx.state().db;
let reader = db.main_read_txn().map_err(ResponseError::internal)?;
let schema = index
.main
.schema(&reader)
.map_err(ResponseError::internal)?
.ok_or(ResponseError::open_index("No Schema found"))?;
let query: SearchQuery = ctx
.url_query()
.map_err(|_| ResponseError::bad_request("invalid query parameter"))?;
let mut search_builder = index.new_search(query.q.clone());
if let Some(offset) = query.offset {
search_builder.offset(offset);
}
if let Some(limit) = query.limit {
search_builder.limit(limit);
}
if let Some(attributes_to_retrieve) = query.attributes_to_retrieve {
for attr in attributes_to_retrieve.split(',') {
search_builder.add_retrievable_field(attr.to_string());
}
}
if let Some(attributes_to_search_in) = query.attributes_to_search_in {
for attr in attributes_to_search_in.split(',') {
search_builder.add_attribute_to_search_in(attr.to_string());
}
}
if let Some(attributes_to_crop) = query.attributes_to_crop {
let crop_length = query.crop_length.unwrap_or(200);
if attributes_to_crop == "*" {
let attributes_to_crop = schema
.iter()
.map(|(attr, ..)| (attr.to_string(), crop_length))
.collect();
search_builder.attributes_to_crop(attributes_to_crop);
} else {
let attributes_to_crop = attributes_to_crop
.split(',')
.map(|r| (r.to_string(), crop_length))
.collect();
search_builder.attributes_to_crop(attributes_to_crop);
}
}
if let Some(attributes_to_highlight) = query.attributes_to_highlight {
let attributes_to_highlight = if attributes_to_highlight == "*" {
schema.iter().map(|(attr, ..)| attr.to_string()).collect()
} else {
attributes_to_highlight
.split(',')
.map(ToString::to_string)
.collect()
};
search_builder.attributes_to_highlight(attributes_to_highlight);
}
if let Some(filters) = query.filters {
search_builder.filters(filters);
}
if let Some(timeout_ms) = query.timeout_ms {
search_builder.timeout(Duration::from_millis(timeout_ms));
}
if let Some(matches) = query.matches {
if matches {
search_builder.get_matches();
}
}
let response = match search_builder.search(&reader) {
Ok(response) => response,
Err(Error::Internal(message)) => return Err(ResponseError::Internal(message)),
Err(others) => return Err(ResponseError::bad_request(others)),
};
Ok(tide::response::json(response))
}
#[derive(Clone, Deserialize)]
#[serde(rename_all = "camelCase", deny_unknown_fields)]
struct SearchMultiBody {
indexes: HashSet<String>,
query: String,
offset: Option<usize>,
limit: Option<usize>,
attributes_to_retrieve: Option<HashSet<String>>,
attributes_to_search_in: Option<HashSet<String>>,
attributes_to_crop: Option<HashMap<String, usize>>,
attributes_to_highlight: Option<HashSet<String>>,
filters: Option<String>,
timeout_ms: Option<u64>,
matches: Option<bool>,
}
#[derive(Debug, Clone, Serialize)]
#[serde(rename_all = "camelCase")]
struct SearchMultiBodyResponse {
hits: HashMap<String, Vec<SearchHit>>,
offset: usize,
hits_per_page: usize,
processing_time_ms: usize,
query: String,
}
pub async fn search_multi_index(mut ctx: Context<Data>) -> SResult<Response> {
// ctx.is_allowed(DocumentsRead)?;
let body = ctx
.body_json::<SearchMultiBody>()
.await
.map_err(ResponseError::bad_request)?;
let mut index_list = body.clone().indexes;
for index in index_list.clone() {
if index == "*" {
index_list = ctx.state().db.indexes_uids().into_iter().collect();
break;
}
}
let mut offset = 0;
let mut count = 20;
if let Some(body_offset) = body.offset {
if let Some(limit) = body.limit {
offset = body_offset;
count = limit;
}
}
let offset = offset;
let count = count;
let db = &ctx.state().db;
let par_body = body.clone();
let responses_per_index: Vec<SResult<_>> = index_list
.into_par_iter()
.map(move |index_uid| {
let index: Index = db
.open_index(&index_uid)
.ok_or(ResponseError::index_not_found(&index_uid))?;
let mut search_builder = index.new_search(par_body.query.clone());
search_builder.offset(offset);
search_builder.limit(count);
if let Some(attributes_to_retrieve) = par_body.attributes_to_retrieve.clone() {
search_builder.attributes_to_retrieve(attributes_to_retrieve);
}
if let Some(attributes_to_search_in) = par_body.attributes_to_search_in.clone() {
search_builder.attributes_to_search_in(attributes_to_search_in);
}
if let Some(attributes_to_crop) = par_body.attributes_to_crop.clone() {
search_builder.attributes_to_crop(attributes_to_crop);
}
if let Some(attributes_to_highlight) = par_body.attributes_to_highlight.clone() {
search_builder.attributes_to_highlight(attributes_to_highlight);
}
if let Some(filters) = par_body.filters.clone() {
search_builder.filters(filters);
}
if let Some(timeout_ms) = par_body.timeout_ms {
search_builder.timeout(Duration::from_millis(timeout_ms));
}
if let Some(matches) = par_body.matches {
if matches {
search_builder.get_matches();
}
}
let reader = db.main_read_txn().map_err(ResponseError::internal)?;
let response = search_builder
.search(&reader)
.map_err(ResponseError::internal)?;
Ok((index_uid, response))
})
.collect();
let mut hits_map = HashMap::new();
let mut max_query_time = 0;
for response in responses_per_index {
if let Ok((index_uid, response)) = response {
if response.processing_time_ms > max_query_time {
max_query_time = response.processing_time_ms;
}
hits_map.insert(index_uid, response.hits);
}
}
let response = SearchMultiBodyResponse {
hits: hits_map,
offset,
hits_per_page: count,
processing_time_ms: max_query_time,
query: body.query,
};
Ok(tide::response::json(response))
}

View File

@ -0,0 +1,88 @@
use std::collections::HashMap;
use http::StatusCode;
use serde::{Deserialize, Serialize};
use tide::response::IntoResponse;
use tide::{Context, Response};
use crate::error::{ResponseError, SResult};
use crate::helpers::tide::ContextExt;
use crate::models::token::ACL::*;
use crate::routes::document::IndexUpdateResponse;
use crate::Data;
#[derive(Default, Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
#[serde(rename_all = "camelCase", deny_unknown_fields)]
pub struct SettingBody {
pub ranking_order: Option<RankingOrder>,
pub distinct_field: Option<DistinctField>,
pub ranking_rules: Option<RankingRules>,
}
#[derive(Debug, Copy, Clone, PartialEq, Eq, Serialize, Deserialize)]
#[serde(rename_all = "lowercase")]
pub enum RankingOrdering {
Asc,
Dsc,
}
pub type RankingOrder = Vec<String>;
pub type DistinctField = String;
pub type RankingRules = HashMap<String, RankingOrdering>;
pub async fn get(ctx: Context<Data>) -> SResult<Response> {
ctx.is_allowed(SettingsRead)?;
let index = ctx.index()?;
let db = &ctx.state().db;
let reader = db.main_read_txn().map_err(ResponseError::internal)?;
let settings = match index.main.customs(&reader).unwrap() {
Some(bytes) => bincode::deserialize(bytes).unwrap(),
None => SettingBody::default(),
};
Ok(tide::response::json(settings))
}
pub async fn update(mut ctx: Context<Data>) -> SResult<Response> {
ctx.is_allowed(SettingsWrite)?;
let settings: SettingBody = ctx.body_json().await.map_err(ResponseError::bad_request)?;
let index = ctx.index()?;
let db = &ctx.state().db;
let reader = db.main_write_txn().map_err(ResponseError::internal)?;
let mut writer = db.update_write_txn().map_err(ResponseError::internal)?;
let mut current_settings = match index.main.customs(&reader).unwrap() {
Some(bytes) => bincode::deserialize(bytes).unwrap(),
None => SettingBody::default(),
};
if let Some(ranking_order) = settings.ranking_order {
current_settings.ranking_order = Some(ranking_order);
}
if let Some(distinct_field) = settings.distinct_field {
current_settings.distinct_field = Some(distinct_field);
}
if let Some(ranking_rules) = settings.ranking_rules {
current_settings.ranking_rules = Some(ranking_rules);
}
let bytes = bincode::serialize(&current_settings).unwrap();
let update_id = index
.customs_update(&mut writer, bytes)
.map_err(ResponseError::internal)?;
writer.commit().map_err(ResponseError::internal)?;
let response_body = IndexUpdateResponse { update_id };
Ok(tide::response::json(response_body)
.with_status(StatusCode::ACCEPTED)
.into_response())
}

Some files were not shown because too many files have changed in this diff Show More