Compare commits

...

754 Commits

Author SHA1 Message Date
b7c287ffb7 Merge pull request #604 from meilisearch/personal-token-binaries
Use a personal access token to publish release binaries
2020-04-10 22:51:30 +02:00
457b645f3c Use a personal access token to publish bins
The default GITHUB_TOKEN expires after 1h
2020-04-10 18:28:28 +02:00
0185ffad89 Merge pull request #603 from meilisearch/bump-version
Bump version to v0.10
2020-04-10 15:56:56 +02:00
08edc9d5d0 Update the changelog to refer to the v0.10 2020-04-10 15:43:20 +02:00
979bea0327 Bump MeiliSearch version to v0.10 2020-04-10 15:43:03 +02:00
c7ea9f4cf3 Merge pull request #580 from meilisearch/rework-highlight-crop
Rework query highlight/crop parameters
2020-04-10 13:27:35 +02:00
233651bef8 update changelog 2020-04-10 12:26:53 +02:00
c6fb591348 add * on attributesToRetrieve 2020-04-10 12:26:34 +02:00
644e78df89 Add some tests 2020-04-10 12:26:34 +02:00
500eeca3fb Rework query highlight/crop parameters 2020-04-10 11:12:58 +02:00
c418abe92d Merge pull request #602 from meilisearch/fix-tide-cors
fix tide cors
2020-04-10 10:29:55 +02:00
2fdf33a006 update changelog 2020-04-10 10:13:43 +02:00
c3cf0cade9 fix tide cors 2020-04-10 10:13:43 +02:00
210bc68ced Merge pull request #592 from MarinPostma/query-filters
Implements query filters
2020-04-09 18:43:11 +02:00
193bded4b7 fixes broken tests 2020-04-09 18:26:48 +02:00
8f4d090f34 update changelog 2020-04-09 17:20:37 +02:00
a0a481697b replace lazy_static with once_cell 2020-04-09 17:13:34 +02:00
c3d5778aae allows to get names from schema 2020-04-09 17:13:34 +02:00
3e031d8297 adds error handling and integration 2020-04-09 17:13:34 +02:00
83f50914ec tests 2020-04-09 17:13:34 +02:00
d3916f28aa implements filter logic 2020-04-09 17:13:34 +02:00
dcf1096ac3 implements parser 2020-04-09 17:13:31 +02:00
66568a913c logic skeleton for filter and parser 2020-04-09 16:08:05 +02:00
6db6b40659 Merge pull request #594 from meilisearch/fix-stop-words
Fixes the stop words and words fst generation
2020-04-07 11:06:39 +02:00
780ac5cfd3 Update the CHANGELOG.md 2020-04-06 19:47:57 +02:00
d24209f5a7 Adds a test to check that stop word ar correctly handled 2020-04-06 19:47:57 +02:00
29d021ad4d Fixes the stop words and words fst generation 2020-04-06 18:53:02 +02:00
eb28276923 Merge pull request #589 from meilisearch/change-logo
change logo format
2020-04-05 12:18:36 +02:00
0679ec4f41 change logo format 2020-04-05 11:09:38 +02:00
1b5b71869f Merge pull request #588 from techieshark/patch-1
Fix typo in README
2020-04-05 10:35:30 +02:00
6681681a76 Merge branch 'master' into patch-1 2020-04-05 10:34:10 +02:00
83d8dc0d2b Merge pull request #587 from sgummaluri/fix_first_all_updates_call_after_indexing
Fix for 'Update Status after the first update comes up to be empty (#542)'
2020-04-05 10:32:27 +02:00
49499ca54d Fix typo in README
Non-plural would be more usual in English. I assume "performances" was a typo.
2020-04-05 17:34:12 +10:00
16a63c74ea Modifying the test name for better readability 2020-04-05 00:26:09 +05:30
b4df54197b Slight grammar modification to the changelog message 2020-04-05 00:17:47 +05:30
a28b428074 Update changelog to make the message more readable 2020-04-05 00:14:58 +05:30
e5a336a042 Fix for 'First update does not appear before being processed' #542 2020-04-04 23:18:43 +05:30
5e5702833c Merge pull request #583 from meilisearch/gha-ignore-changelog
Ignores the CHANGELOG when a specific label is set
2020-04-03 15:47:20 +02:00
03063cf349 Ingores the CHANGELOG when label asks for 2020-04-03 15:06:25 +02:00
241b842ef7 Merge pull request #581 from meilisearch/publish-armv8-binary
Publish an aarch64 binary on releases
2020-04-03 11:56:35 +02:00
184c290773 Update the CHANGELOG 2020-04-03 10:42:19 +02:00
5c638184e9 Publish an aarch64 (aka ARMv8) binary on releases 2020-04-03 10:39:28 +02:00
3a88910a24 Merge pull request #579 from meilisearch/update-deps
Update dependencies
2020-04-02 20:24:23 +02:00
eddd453564 Makes http-service a dev-dependency 2020-04-02 18:36:35 +02:00
38c43759bb Update most of the dependencies 2020-04-02 18:36:04 +02:00
26225a2fdf Merge pull request #576 from ppamorim/fix-bench
Fix benchmark
2020-04-02 12:23:31 +02:00
9950fffb6f Simplify imports of std::fs and std::io, remove space not needed, Remove UpdateState 2020-04-02 11:02:19 +01:00
f5d57c9dce Replace the toml reader with the JSON settings reader, directly parse the data to SettingsUpdate, Update CHANGELOG 2020-04-02 11:01:56 +01:00
bc9c80a5ee Merge pull request #577 from meilisearch/change-slogan
Change the slogan
2020-04-01 16:35:59 +02:00
702f7445ec Change the slogan 2020-04-01 16:34:24 +02:00
dcb93e3166 Merge pull request #575 from ppamorim/nested-seq
Support nested-seq
2020-04-01 14:16:47 +02:00
02b79e0040 Modified JSON to add move conditions 2020-04-01 12:59:40 +01:00
88b71fb6c4 Update CHANGELOG to add seq support 2020-04-01 12:59:40 +01:00
95bb443430 Add empty seq 2020-04-01 12:59:40 +01:00
1b47a10e89 Add support for seq values 2020-04-01 12:59:40 +01:00
006e54109b Merge pull request #570 from tpayet/clean-readme-heroku
Removing Heroku deployment from README
2020-04-01 11:35:29 +02:00
7eb6333933 Removing Heroku deployment from README 2020-04-01 11:04:16 +02:00
065da3d613 Merge pull request #572 from ppamorim/ignore-null-nested-obj
Add support of nested null
2020-03-31 16:33:16 +02:00
e698fa0b63 Add issue index in the CHANGELOG 2020-03-31 15:06:04 +01:00
8b662be42b Update CHANGELOG.md
Co-Authored-By: Clément Renault <renault.cle@gmail.com>
2020-03-31 15:03:35 +01:00
52a4f7cd23 Update readme 2020-03-31 14:41:22 +01:00
690b8e0dd0 Replace .toString to String::new() 2020-03-31 14:01:44 +01:00
bc6d86c8ce serialize_unit returns a empty string 2020-03-31 13:51:12 +01:00
fbf7117d6a Rename function, add trailing line, replace JSON string with macro 2020-03-31 13:13:09 +01:00
51472142c6 Add test to check if nested null will be ignored 2020-03-31 12:00:13 +01:00
91d1bd5903 Merge pull request #569 from meilisearch/ignore-bool-nested-obj
Make the engine index booleans
2020-03-31 11:01:26 +02:00
69aee870da Make the engine index booleans
The engine will see the values like text "true" and "false"
2020-03-31 10:39:58 +02:00
3b25bd71ab Merge pull request #567 from meilisearch/fix-not-dedup-matches
Construct a Set using the from_dirty method
2020-03-31 10:15:03 +02:00
c18e907f96 Construct a Set using the from_dirty method
This commit fixes #566 by ensuring that the slice of matches is
ordered and deduplicated.
2020-03-30 20:56:30 +02:00
e3808b8694 Merge pull request #558 from matboivin/update-readme
Update readme
2020-03-28 10:46:00 +01:00
116b301359 Add Slack 2020-03-28 10:28:48 +01:00
3ed510b78e Minor fix 2020-03-28 10:28:30 +01:00
565c46fdd4 Merge pull request #548 from tendant/master
Stringify nested JSON object
2020-03-27 19:57:34 +01:00
b0255076de Merge branch 'master' into master 2020-03-27 19:43:02 +01:00
67348f2251 Merge pull request #555 from meilisearch/add-changelog
Add a CHANGELOG.md file
2020-03-27 19:33:39 +01:00
227bc716d8 Add a Github Action to ensure the CHANGELOG is updated in PRs 2020-03-27 19:12:50 +01:00
c3467313e5 Add a CHANGELOG to help the documentation follow the engine udpates 2020-03-27 19:01:46 +01:00
c82eed010a Merge pull request #543 from MarinPostma/aligned-search-crops
adds support for aligned crop in search result
2020-03-27 18:58:45 +01:00
158c2b5382 tests aligned crop 2020-03-27 18:38:41 +01:00
2d1d59acb7 adds support for aligned cropping with cjk 2020-03-27 18:38:41 +01:00
0088de9802 adds support for aligned crop in search result 2020-03-27 18:38:41 +01:00
f49d2bca64 Merge branch 'master' into master 2020-03-27 17:07:06 +01:00
b7273c450f Merge pull request #545 from matboivin/update-readme
Update readme
2020-03-27 11:49:11 +01:00
4130fddcc8 Center-align crates demo gif 2020-03-27 11:28:57 +01:00
4f05045acb Center-align web interface gif 2020-03-27 11:20:30 +01:00
bc16c9beb7 Update gif links 2020-03-27 11:17:31 +01:00
0af9f6cf6e Add movies gif and move crates demo gif 2020-03-27 11:17:17 +01:00
022aeac808 Stringify nested JSON object 2020-03-26 18:45:57 -07:00
20461ccf36 Add gif
Co-Authored-By: cvermand <33010418+bidoubiwa@users.noreply.github.com>
2020-03-26 21:56:27 +01:00
7297396162 Update performance 2020-03-26 19:22:59 +01:00
c15deb41b0 Remove How it works (deep dive) section 2020-03-26 16:26:43 +01:00
cb2a08db7e Center-align badges 2020-03-26 16:24:03 +01:00
67703b5ea2 Remove Notes about system allocator 2020-03-26 16:17:47 +01:00
c445abb982 Replace a by an
Co-Authored-By: Clément Renault <renault.cle@gmail.com>
2020-03-26 16:14:52 +01:00
38d97fa339 Change phrasing 2020-03-26 13:48:08 +01:00
d45f0819be Remove repetitive word 2020-03-26 13:25:57 +01:00
9375d0efbe Fix details 2020-03-26 13:23:20 +01:00
2291c33074 Align with quick start guide 2020-03-26 13:18:11 +01:00
0a216066f4 Split commands 2020-03-26 13:13:02 +01:00
eea2a9cfc3 Add contact 2020-03-26 13:10:44 +01:00
33c2b9c5ff Add social 2020-03-26 13:04:23 +01:00
1129812e6e Update link formatting 2020-03-26 12:42:41 +01:00
b1b0c6b4b3 Add useful links 2020-03-26 12:31:58 +01:00
6ae3f2f8b9 Remove line under logo 2020-03-26 12:24:02 +01:00
f8d594e7ea Update formatting and add logo 2020-03-26 12:23:09 +01:00
38c3aa542f Add logo image 2020-03-26 12:05:53 +01:00
f3382125e1 Merge branch 'master' of git://github.com/meilisearch/MeiliSearch into update-readme 2020-03-26 12:01:40 +01:00
592a438ae8 Rephrase the readme 2020-03-26 11:59:40 +01:00
d84a86897c Merge pull request #540 from meilisearch/publish-arm-binaries
Publish an ARMv7 binary for the releases
2020-03-26 11:14:48 +01:00
88c063e887 Publish an ARMv7 binary for the releases 2020-03-26 10:51:47 +01:00
ba8a410d4c Merge pull request #539 from emresaglam/html-sanitize
html sanitize
2020-03-25 21:33:03 +01:00
451061f4b8 Merge branch 'master' into html-sanitize 2020-03-25 13:06:18 -07:00
ae17aa4955 Update meilisearch-http/public/interface.html
bypassing <em> tag after encoding the "<>"

Co-Authored-By: Clément Renault <renault.cle@gmail.com>
2020-03-25 12:48:59 -07:00
f589d07706 Merge pull request #544 from meilisearch/add-slack-link
Add a slack badge on readme
2020-03-25 20:29:00 +01:00
3f343ebfdb Update README.md 2020-03-25 20:22:04 +01:00
95ea3e39d2 Merge pull request #541 from MarinPostma/search-result-count
Adds number of hits in search result
2020-03-25 15:34:06 +01:00
a6dcd7a421 fixes tests
fixes tests impacted by sifnature change of query
2020-03-25 15:17:20 +01:00
fa9b7dd29f removes useless deserializer for SearchResult 2020-03-25 13:59:15 +01:00
fd65cf9dcb populates exhaustive number of hits 2020-03-25 12:44:38 +01:00
6e9d7f94d4 adds exhaustive number hits to search result 2020-03-25 12:11:37 +01:00
6151bc262f Added the missing function call 2020-03-24 11:03:16 -07:00
b62f9fabf2 Update meilisearch-http/public/interface.html
Co-Authored-By: Clément Renault <renault.cle@gmail.com>
2020-03-24 10:39:53 -07:00
86e1ba871f html sanitize
Added a function to sanitize the html
This is for browser side only.
2020-03-24 08:37:56 -07:00
a6ac902bf4 Merge pull request #534 from curquiza/homebrew-automatization
Automate homebrew publish
2020-03-20 16:14:41 +01:00
4cdb67c249 Automate homebrew publish 2020-03-20 12:14:08 +01:00
29622e11f5 Merge pull request #533 from meilisearch/bump-to-v0.9.0
Bump the workspace crates to 0.9.0
2020-03-19 13:50:55 +01:00
3ca8db2cc1 Bump the workspace crates to 0.9.0 2020-03-19 11:56:23 +01:00
cc5eb885ea Merge pull request #531 from meilisearch/bump-rc
Bump the workspace crates to 0.9.0-rc.1
2020-03-16 18:09:11 +01:00
f6972ec682 Bump the workspace crates to 0.9.0-rc.1 2020-03-16 16:58:20 +01:00
cfe21f7b02 Merge pull request #530 from meilisearch/fix-ranking-rules-inference
Ranking fields should be stored and indexed by default
2020-03-16 16:53:06 +01:00
2d82f1b655 ranking fields should be stored and indexed by default; fix #521 2020-03-16 16:19:23 +01:00
cf6e481c14 Merge pull request #520 from meilisearch/fix-http-issues
Fix http issues
2020-03-11 15:21:50 +01:00
7be376721c global settings update make partial update; fix #516 2020-03-11 14:42:58 +01:00
ce0e8415d5 adding primary-key when adding documents does not work; fix #519 2020-03-11 14:12:38 +01:00
4ccf1d10bd error message when impossible to infer the primary-key; fix #517 2020-03-11 12:27:42 +01:00
c25641ff2d fix that AcceptNewFields does not take into account the primary-key; fix #518 2020-03-11 12:00:40 +01:00
14c1aba6c7 Merge pull request #509 from meilisearch/fix-internal-schema
Fix internal schema
2020-03-10 16:25:36 +01:00
8204d961de allow api key in header when no master-key is set; fix #515 2020-03-10 15:59:16 +01:00
ef3bcd65ab fix comments from review 2020-03-10 15:59:11 +01:00
b06e33f3d3 fix errors on http parameter naming 2020-03-10 12:08:10 +01:00
179969a9e2 fix tests + fmt 2020-03-10 11:29:56 +01:00
c984d8d5a5 rename identifier into primaryKey; fix #514 2020-03-09 18:45:29 +01:00
8ffa80883a remove the unused function 2020-03-09 18:45:29 +01:00
86c3482cbd review the internal schema to allow to create schema without identifier; fix #513 2020-03-09 18:45:20 +01:00
16a99aa95e update to infer identifier; fix #498 2020-03-06 10:55:25 +01:00
6d86968c4c Merge pull request #496 from meilisearch/small-fixes-before-0.9
Fix some issues before v0.9
2020-03-06 10:28:45 +01:00
8df6d6e954 fix error 500 when sending bad rankingRules; fix #500 2020-03-06 10:15:19 +01:00
8aeddec982 remove the route to get identifier on settings; fix #502 2020-03-06 10:15:19 +01:00
f4ae0844ab replace index-new-field route to accept-new-fields; fix #503 2020-03-06 10:15:19 +01:00
d56968cb23 default values of synonyms and stop-words; fix #499 fix #504 2020-03-06 10:15:19 +01:00
c5b6e641a4 index UID format; fix #497 2020-03-06 10:15:19 +01:00
041eed2a06 no id returned; fix #492 2020-03-06 10:15:19 +01:00
54c675e195 fix delete-batch route; #493 2020-03-06 10:15:19 +01:00
81ce90e57f update test 2020-03-06 10:15:19 +01:00
6016f2e941 change wording of custom ranking rules dsc -> desc; #490 2020-03-06 10:15:19 +01:00
4d27318b72 remove unnecessary comment on env Opt; #491 2020-03-06 10:15:11 +01:00
decce4d8e4 change route /keys/ -> /keys; #495 2020-03-05 15:33:02 +01:00
1cb9f75026 Merge pull request #507 from meilisearch/fix-documents-fields-order-inference
Fix the inference of the documents searchable fields
2020-03-04 14:16:36 +01:00
5e31d28759 Fix the inference of the documents searchable fields 2020-03-03 20:54:17 +01:00
2b780ab2c5 Merge pull request #489 from meilisearch/fix-rank-distinct
Use distinct on search
2020-03-02 16:34:27 +01:00
a2f0f95337 use distinct on search 2020-03-02 16:19:41 +01:00
72450c765d Merge pull request #484 from meilisearch/fix-reindex-by-chunk
Stop reindexing by chunk during complete reindexing
2020-02-28 18:29:25 +01:00
250aeaa86c stop reindexing by chunk during complete reindexing 2020-02-28 11:49:12 +01:00
06ace88901 Merge pull request #482 from meilisearch/review-settings-endpoint
Review settings endpoint
2020-02-28 11:39:38 +01:00
47009615ee rename words_position to wordsPosition; fix #483 2020-02-27 16:24:49 +01:00
dda08d60d2 cargo fmt 2020-02-27 14:33:57 +01:00
f182afc50b update tests 2020-02-27 11:30:23 +01:00
bb5d931f16 rename criterions on settings route; fix #480 2020-02-27 11:30:22 +01:00
3c74e71d4f show default ranking rules if user reset them; fix #476 2020-02-27 11:30:17 +01:00
79e07fa852 reset value of searchable and displayed attributes; fix #473 2020-02-27 11:04:39 +01:00
aa95c26e07 update tests 2020-02-27 11:04:39 +01:00
2eb6f81c58 rename ranking_distinct to distinct_attribute; fix #474 2020-02-27 11:04:39 +01:00
a067a1b16b replace index_new_fields to accept_new_fields; fix #475 2020-02-27 11:04:38 +01:00
1df51c52e0 Merge pull request #458 from meilisearch/rename-exactness-criterion
Rename the Exact criterion into Exactness
2020-02-25 16:23:57 +01:00
96248d9bfa Change the exactness criterion in the tests 2020-02-25 14:24:15 +01:00
9d167c08f4 Rename the Exact criterion into Exactness 2020-02-25 14:16:55 +01:00
8e6560d102 Merge pull request #464 from meilisearch/simplify-keys
Simplify keys & add launcher resume
2020-02-17 13:59:41 +01:00
ad83c3ab5a add launch resume & environment 2020-02-17 10:13:08 +01:00
257b7b4df4 introduce new key management 2020-02-14 12:54:07 +01:00
5ac757a5fd Merge pull request #465 from meilisearch/fix-un-rankable-fields
fix un-rankable fields errors
2020-02-14 11:27:12 +01:00
2d7a1bfce0 fix un-rankable fields errors; fix #463 2020-02-14 10:34:33 +01:00
3845b89a16 Merge pull request #441 from meilisearch/issues-0.9.0
Stabilize http endpoint
2020-02-13 15:57:37 +01:00
ce8e12c7c5 update tests 2020-02-13 12:24:30 +01:00
4986adc186 move identifier from settings to index; fix #470 2020-02-12 17:00:14 +01:00
dc9ca2ebc9 fixes for review 2020-02-12 16:51:14 +01:00
40d7396d90 update tests for settings 2020-02-11 15:28:01 +01:00
559c2f8907 Add stop words on query 2020-02-11 15:28:00 +01:00
dc6907e748 rebase from master 2020-02-11 15:28:00 +01:00
2143226f04 setup clippy and make a pass on code 2020-02-11 15:28:00 +01:00
ea2a64a504 remove unecessary settings routes 2020-02-11 15:28:00 +01:00
a5b0e468ee fix for review 2020-02-11 15:28:00 +01:00
14b5fc4d6c cargo fmt 2020-02-11 15:28:00 +01:00
f498bfed51 add test on /settings/ranking 2020-02-11 15:27:59 +01:00
50a9825a0f fix some uses cases on settings 2020-02-11 15:27:59 +01:00
5c49f08bb2 update settings routes 2020-02-11 15:27:59 +01:00
bbf9f41a04 add cors 2020-02-11 15:27:59 +01:00
6a32432b01 add /settings/index-new-fields routes 2020-02-11 15:27:59 +01:00
037724576e update tests 2020-02-11 15:27:59 +01:00
10b8a0ab00 add request middleware 2020-02-11 15:27:59 +01:00
faf0dd2f44 do not show matches on undesired fields 2020-02-11 15:27:58 +01:00
585bba43a0 set new attributes indexed if needed 2020-02-11 15:27:58 +01:00
b1528f9466 allow to see highlihts with matches and crop; fix #450 #449 2020-02-11 15:27:58 +01:00
7a491a64c0 add test 2020-02-11 15:27:58 +01:00
57503ad9bf add test on search 2020-02-11 15:27:58 +01:00
c276dda305 run cargo fmt 2020-02-11 15:27:58 +01:00
9c0497c419 change the way settings are show in updates 2020-02-11 15:27:58 +01:00
b33dac9faa add test for search + update ci for test in release 2020-02-11 15:27:57 +01:00
f77f38dfa0 fix update system 2020-02-11 15:27:57 +01:00
58fe87067b finish settings 2020-02-11 15:27:57 +01:00
dbba310770 squash me 2020-02-11 15:27:57 +01:00
6deb481589 definitely remove attributes_ranked on settings; auto create it with ranking_rules 2020-02-11 15:27:57 +01:00
036977bfe4 add the possibility to totally clear the schema 2020-02-11 15:27:57 +01:00
d280848ff6 add test for settings 2020-02-11 15:27:56 +01:00
7a6f583b1f fix issue on ranking rules 2020-02-11 15:27:56 +01:00
e078eafb1f clean unused functions 2020-02-11 15:27:56 +01:00
6f534540a6 fix error on stop words fst 2020-02-11 15:27:56 +01:00
38d57d213f expose api for new settings 2020-02-11 15:27:56 +01:00
7c14769226 add test for index creation 2020-02-11 15:27:56 +01:00
b71bbcffaa simplify error handling 2020-02-11 15:27:56 +01:00
f83e874e35 return the good created_at and updated_at on index creation 2020-02-11 15:27:55 +01:00
ae0a11e422 fix schema & fix tests 2020-02-11 15:27:55 +01:00
116a637cfd set test for healthyness 2020-02-11 15:27:55 +01:00
83cf683db4 introduce test for meilisearch-http 2020-02-11 15:27:55 +01:00
1b3312871e set name optional during index creation 2020-02-11 15:27:55 +01:00
0e12920910 bump tide version 2020-02-11 15:27:55 +01:00
a35eb16a2a store the schema after each document updates 2020-02-11 15:27:54 +01:00
4f0ead625b adapt meilisearch-http to the new schemaless option 2020-02-11 15:27:54 +01:00
21d122a870 rewrite indexed_pos -> field_id for hightligths 2020-02-11 15:27:54 +01:00
130fb74928 introduce a new schemaless way 2020-02-11 15:27:54 +01:00
bbe1845f66 squash-me 2020-02-11 15:27:54 +01:00
2ee90a891c introduce a new settings update system 2020-02-11 15:27:54 +01:00
203c83bdb4 Remove SearchableAttributes; fix #429 2020-02-11 15:27:53 +01:00
73918d803c Rename AttributesToSearchIn into SearchableAttributes; fix #428 2020-02-11 15:27:53 +01:00
110adcae85 Remove the schema; fix #422 2020-02-11 15:27:53 +01:00
c536ea64c3 Change the indexes stats HTTP route; fix #423 2020-02-11 15:27:53 +01:00
aa7a6d5f8c Rewrite the synonyms endpoint; fix #418 2020-02-11 15:27:53 +01:00
91c6539baf Rewrite the stop-words endpoint; fix #417 2020-02-11 15:27:53 +01:00
f0590d3301 Change documents routes; fix #416 2020-02-11 15:27:53 +01:00
a5c5df0290 Merge pull request #443 from curquiza/brew
Add Brew installation in README
2020-02-10 16:36:33 +01:00
f0c2913dcf Add Brew installation in README 2020-02-10 16:26:50 +01:00
9c6d590950 Merge pull request #442 from curquiza/docker-github-action
Change github action for docker latest image
2020-02-10 16:26:14 +01:00
ab3339f5a1 Change github action for docker latest image 2020-02-10 16:11:45 +01:00
43ce45f62b Merge pull request #456 from djKooks/update/cjk-filter-ko-ja
Update CJK filter
2020-01-30 09:46:08 +01:00
2b5d153361 Update cjk filter 2020-01-30 09:55:16 +09:00
cde8845143 Merge pull request #454 from meilisearch/fix-db-compaction
Support compaction with the new split database
2020-01-24 17:45:34 +01:00
7c0d8f073b Support compaction with multi database 2020-01-24 17:38:14 +01:00
69adb1d771 Merge pull request #453 from meilisearch/introduce-query-tree
Introduce a query tree structure
2020-01-23 10:40:53 +01:00
a2bc689b92 Fix the tests a little bit 2020-01-22 18:12:56 +01:00
a9adbda2cd Make the engine support non-exact multi-words synonyms 2020-01-22 18:11:58 +01:00
0b9fe2c072 Introduce the new Query Tree creation supporting more operations 2020-01-22 17:46:46 +01:00
789e05304c Replace prints by debug logs 2020-01-21 11:05:34 +01:00
7604387701 Clean up the dependencies 2020-01-21 11:04:25 +01:00
daffcaf4c6 Make the docids OR operation method conditional 2020-01-19 12:29:06 +01:00
ff1ec599e0 Try a better version of sdset 2020-01-19 12:01:24 +01:00
e44d498c94 Display more debug info for prefix tolerant fetches 2020-01-19 11:07:32 +01:00
c334d6b7fe Avoid sorting sorted sequences, prefer using set operations 2020-01-19 10:58:01 +01:00
5465e401bb Catch query tree related errors 2020-01-17 10:41:27 +01:00
9cc3c56c9c Fix the prefix system 2020-01-16 18:41:27 +01:00
d7a7560220 Use an union instead of a sort for prefix fetching 2020-01-16 17:09:27 +01:00
70a529d197 Reduce the number of args of update functions 2020-01-16 16:29:50 +01:00
be31a14326 Make the clear all operation clear caches 2020-01-16 16:19:04 +01:00
96139da0d2 Reintroduce the distinct search system 2020-01-16 15:55:55 +01:00
74fa9ee4df Introduce a better higlighting system 2020-01-16 14:56:16 +01:00
00336c5154 Reintroduce a basic highlight display 2020-01-16 14:24:45 +01:00
3912d1ec4b Improve query parsing and interpretation 2020-01-16 14:11:17 +01:00
70d4f47f37 Differentiate short words as prefix or exact matches 2020-01-16 12:01:51 +01:00
9809ded23d Implement synonym fetching 2020-01-16 11:38:23 +01:00
5f9a3546e0 Use an union instead of a sort for OR ops 2020-01-15 15:14:24 +01:00
db625a08f7 Update lock file 2020-01-15 12:25:14 +01:00
44fec1b6c9 Cache prefixes of a length of 2 2020-01-14 18:17:52 +01:00
54dacb362d Use different algorithms for different documents ratios 2020-01-14 17:51:08 +01:00
6edb460bea Try with an exponential search 2020-01-14 16:52:24 +01:00
40dab80dfa Change the way we filter the documents 2020-01-14 14:18:01 +01:00
681711fced Fix query ids to be usize 2020-01-14 13:12:42 +01:00
21c1473e0c Introduce the distance data 2020-01-14 11:38:04 +01:00
8acbdcbbad wip: Make the new query tree work with the criteria 2020-01-13 14:36:06 +01:00
da8abebfa2 Introduce the query words mapping along with the query tree 2020-01-13 13:29:47 +01:00
4f7a7ea0bb Faster intersection group by 2020-01-09 16:30:03 +01:00
d6c9ba8f08 Store the postings lists 2020-01-09 15:04:53 +01:00
ec8916bf54 Change the debug outputs 2020-01-09 12:05:39 +01:00
81c573ec92 Add the raw document IDs to the postings lists 2020-01-08 15:30:43 +01:00
9420edadf4 Introduce the Postings type to decorrelate the DocumentIds 2020-01-08 14:48:23 +01:00
d724a7659e Introduce a query tree context struct 2020-01-08 13:37:22 +01:00
887c212b49 Add more logs about the docids construction 2020-01-08 13:22:42 +01:00
07937ed6d7 Use the prefix caches 2020-01-08 13:14:07 +01:00
a262c67ec3 limit the search in the FST 2020-01-08 13:06:12 +01:00
13ca30c4d8 WIP: Made the query tree traversing support prefix search 2020-01-08 12:02:58 +01:00
fbcec2975d wip: Impl a basic tree traversing 2020-01-07 18:24:13 +01:00
6e1f4af833 wip: Create a tree from query but need to show synonyms 2020-01-07 18:24:13 +01:00
856c5c4214 Fix group offset computing 2019-12-31 14:24:10 +01:00
670e80c151 Use the cached postings lists in the query system 2019-12-31 13:32:36 +01:00
eed07c724f Add more logging for postings lists fetching by word 2019-12-31 13:32:36 +01:00
99d35fb940 Introduce a first version of a number of candidates reducer
It works by ignoring the postings lists associated to documents that the previous words did not returned
2019-12-31 13:32:36 +01:00
106b886873 Cache the prefix postings lists 2019-12-30 18:01:32 +01:00
928876b553 Introduce the postings lists caching stores
Currently not used
2019-12-30 18:01:27 +01:00
58836d89aa Rename the PrefixCache into PrefixDocumentsCache 2019-12-30 15:42:09 +01:00
1a5a104f13 Display proximity evaluation number of calls 2019-12-30 15:42:09 +01:00
9790c393a0 Change the time measurement of the query 2019-12-30 15:42:08 +01:00
064cfa4755 Add more debug, where are those 100ms 2019-12-30 15:42:08 +01:00
ed6172aa94 Add a time measurement of the criterion loop 2019-12-30 15:42:08 +01:00
8c140f6bcd Increase the disk usage limit 2019-12-30 15:42:08 +01:00
1e1f0fcaf5 Introduce a basic cache system for first letters 2019-12-30 15:42:08 +01:00
d21352a109 Change the time measurement of the FST 2019-12-30 15:42:08 +01:00
4be11f961b Use an ugly trick to avoid cloning the FST 2019-12-30 15:42:07 +01:00
1163f390b3 Restrict FST search to the first letter of the word 2019-12-30 15:42:07 +01:00
534143e91d Merge pull request #439 from meilisearch/fix-update-deadlock
Fix a blocking channel, appearing like a deadlock
2019-12-30 15:41:26 +01:00
691e2a3c1d Fix a blocking channel, appearing like a deadlock 2019-12-30 15:28:28 +01:00
20b92fcb4c Merge pull request #435 from meilisearch/debug-missing-measurements
Add more debug timings
2019-12-20 18:04:21 +01:00
04bb49989f Add more debug timings 2019-12-20 14:18:48 +01:00
2aa7cb9d20 Merge pull request #433 from meilisearch/fix-index-creation
Set the indexes info in the create_index function
2019-12-19 10:59:47 +01:00
d12ff15ee3 Set the indexes info in the create_index function 2019-12-19 10:38:56 +01:00
11b684114d Merge pull request #431 from curquiza/web-interface-readme
Update REAME with the Web Interface introduction
2019-12-18 13:50:12 +01:00
1bf177f81a Update REAME with the Web Interface introduction
Co-Authored-By: cvermand <33010418+bidoubiwa@users.noreply.github.com>
2019-12-18 13:41:15 +01:00
df7dc54409 Merge pull request #415 from meilisearch/fix-blocking-settings
Use a main read transaction instead of a write one
2019-12-17 16:21:41 +01:00
7e86056a27 Use a main read transaction instead of a write one 2019-12-17 15:48:06 +01:00
59f74dabe7 Merge pull request #407 from meilisearch/friendly-web-interface
Friendly web interface
2019-12-17 14:47:24 +01:00
4610198ba2 Introduce a Bulma based web interface 2019-12-17 14:36:26 +01:00
3d19f566b6 Merge pull request #406 from bidoubiwa/remove_nsfw_movie
Removed nsfw movie from movies.json dataset
2019-12-13 17:56:09 +01:00
8d90cd8e35 Removed nsfw movie from movies.json dataset 2019-12-13 17:21:46 +01:00
610d44e703 Merge pull request #401 from tpayet/feat/heroku-button
Add heroku one-click deploy
2019-12-13 16:26:31 +01:00
0272b44d7e Add heroku one-click deploy 2019-12-13 16:03:00 +01:00
3eccf2fd76 Merge pull request #405 from meilisearch/disable-bench-workflow
Disable the benchmarks github workflow
2019-12-13 15:56:16 +01:00
736f285092 Disable the benchmarks github workflow 2019-12-13 15:37:24 +01:00
020cd7f9e8 Merge pull request #403 from meilisearch/lazy-data-fetching
Criteria lazy data preparation
2019-12-13 14:57:19 +01:00
40c0b14d1c Reintroduce searchable attributes and reordering 2019-12-13 14:38:25 +01:00
a4dd033ccf Rename raw_matches into bare_matches 2019-12-13 14:38:25 +01:00
48e8778881 Clean up the modules declarations 2019-12-13 14:38:25 +01:00
4be23efe66 Remove the AttrCount type
Could probably be reintroduced later
2019-12-13 14:38:25 +01:00
7d67750865 Reintroduce exacteness for one word document field 2019-12-13 14:38:25 +01:00
746e6e170c Make the test pass again 2019-12-13 14:38:24 +01:00
d93e35cace Introduce ContextMut and Context structs 2019-12-13 14:38:24 +01:00
d75339a271 Prefer summing the attribute 2019-12-13 14:38:24 +01:00
86ee0cbd6e Introduce bucket_sort_with_distinct function 2019-12-13 14:38:24 +01:00
248ccfc0d8 Update the criteria to the new ones 2019-12-13 14:38:24 +01:00
ea148575cf Remove the raw_query functions 2019-12-13 14:38:23 +01:00
efc2be0b7b Bump the sdset dependency to 0.3.6 2019-12-13 14:38:23 +01:00
8d71112dcb Rewrite the phrase query postings lists
This simplified the multiword_rewrite_matches function a little bit.
2019-12-13 14:38:23 +01:00
dd03a6256a Debug pre filtered number of documents 2019-12-13 14:38:23 +01:00
9c03bb3428 First probably working phrase query doc filtering 2019-12-13 14:38:23 +01:00
22b19c0d93 Fix the processed distance algorithm 2019-12-13 14:38:22 +01:00
0f698d6bd9 Work in progress: Bad Typo detection
I have an issue where "speakers" is split into "speaker" and "s",
when I compute the distances for the Typo criterion,
it takes "s" into account and put a distance of zero in the bucket 0
(the "speakers" bucket), therefore it reports any document matching "s"
without typos as best results.

I need to make sure to ignore "s" when its associated part "speaker"
doesn't even exist in the document and is not in the place
it should be ("speaker" followed by "s").

This is hard to think that it will had much computation time to
the Typo criterion like in the previous algorithm where I computed
the real query/words indexes based and removed the invalid ones
before sending the documents to the bucket sort.
2019-12-13 14:38:22 +01:00
4e91b31b1f Make the Typo and Words work with synonyms 2019-12-13 14:38:22 +01:00
f87c67fcad Improve the QueryEnhancer by doing a single lookup 2019-12-13 14:38:22 +01:00
902625601a Work in progress: It seems like we support synonyms, split and concat words 2019-12-13 14:38:22 +01:00
d17d4dc5ec Add more debug infos 2019-12-13 14:38:21 +01:00
ef6a4db182 Before improving fields AttrCount
Removing the fields_count fetching reduced by 2 times the serach time, we should look at lazily pulling them form the criterions in needs

ugly-test: Make the fields_count fetching lazy

Just before running the exactness criterion
2019-12-13 14:38:21 +01:00
11f3d7782d Introduce the AttrCount type 2019-12-13 14:38:21 +01:00
5b9fff6636 Merge pull request #352 from meilisearch/add-search-benchmarks
Add some criterion benchmarks to help detect regressions
2019-12-13 14:37:48 +01:00
a8272f0eef Add a benchmark github workflow 2019-12-13 14:17:40 +01:00
951f0bcb10 sqaush-me: Improve benchmarks naming 2019-12-13 14:17:40 +01:00
d8ba405baf Add some criterion benchmarks to help mesure improvements 2019-12-13 14:17:40 +01:00
70f18a8086 Merge pull request #400 from meilisearch/fix-issues
Close multiples issues on HTTP behavior
2019-12-13 10:30:42 +01:00
0b5db77511 Fix erase setting option 2019-12-13 10:22:35 +01:00
3a4130f344 Allow to index files with null or boolean 2019-12-12 19:25:05 +01:00
1ea29bb92e Fix unwrap if schema does not contain ranked attributes on a custom ranking setting 2019-12-12 16:37:46 +01:00
04d34cb8aa Search; return formated section only if it's necessary 2019-12-12 16:36:42 +01:00
bf80729e17 Update message on access forbidden 2019-12-12 15:39:32 +01:00
88b3c05155 Stop words; Do not reindex all documents if there is no documents 2019-12-12 15:31:39 +01:00
6edef07e29 HTTP delete index route; Fix error on index not found 2019-12-12 14:06:16 +01:00
5ad73fe08b Merge pull request #399 from meilisearch/rewrite-synonym-endpoint
Rewrite the synonym endpoint
2019-12-12 12:58:14 +01:00
a4f26e8e48 Rewrite the synonym endpoint 2019-12-12 12:47:02 +01:00
cc10804607 Merge pull request #395 from meilisearch/update-bitly-link
Update the bit.ly movies.json link
2019-12-10 18:13:52 +01:00
f959cd76ae Update the bit.ly movies.json link 2019-12-10 18:07:14 +01:00
dcd332e2e4 Merge pull request #396 from meilisearch/disable-windows-tests
Disable windows tests
2019-12-10 18:03:13 +01:00
f3a276d1e1 Update the workflow README.md 2019-12-10 17:56:24 +01:00
640d21a7d2 Disable the Windows tests workflow 2019-12-10 17:53:26 +01:00
216cccbfba Merge pull request #391 from meilisearch/fix-one-document-route
Do not expect a JSON value as a document indentifer
2019-12-09 21:53:04 +01:00
04d1da11f7 Do not expect a JSON value as a document indentifer 2019-12-09 21:34:40 +01:00
ee4e9dcc74 Merge pull request #388 from meilisearch/remove-synonyms-unwraps
Remove unsound unwraps from the synonym routes
2019-12-09 17:06:02 +01:00
6fef04be20 Remove unsound unwraps from the synonym routes 2019-12-09 16:54:54 +01:00
86347bff3a Merge pull request #384 from curquiza/install-script-prereleases
Change regexp in install script
2019-12-09 15:28:19 +01:00
e291d9954a Change regexp in install script to not take into acccount pre-releases 2019-12-09 15:14:25 +01:00
7a548467b9 Merge pull request #382 from curquiza/health-routes
Keep only useful routes for /health
2019-12-08 18:11:19 +01:00
06d8e00ff3 Keep only useful routes for /health 2019-12-08 17:56:33 +01:00
225f5a172d Merge pull request #381 from curquiza/update-index-httpstatus
Change HTTP status of update index route
2019-12-08 17:53:01 +01:00
e531ff2e98 Change HTTP status of update index route 2019-12-08 17:10:21 +01:00
8c8040884e Merge pull request #376 from meilisearch/windows-support
Update the actions to support Windows
2019-12-07 12:07:27 +01:00
e3611ad0e4 Update the action to test on more platforms 2019-12-07 11:57:33 +01:00
289bc6570b Update the action to publish windows binaries 2019-12-07 11:52:14 +01:00
dc1849d291 Bump heed to 0.6.1 2019-12-07 11:49:45 +01:00
17a66227f4 Merge pull request #375 from nithinkashyapn/master
Docker command updated
2019-12-06 12:11:56 +01:00
0e8b95f4bf Docker command updated
Docker does not allow Uppercase letters, throws this error 

`docker: invalid reference format: repository name must be lowercase.`
2019-12-06 16:30:37 +05:30
5b8344cfc3 Merge pull request #373 from curquiza/stop-words-deletion
Use POST instead of DELETE method to delete stops-word
2019-12-05 23:06:15 +01:00
075f4034d9 Use POST instead of DELETE method to delete stops-word 2019-12-05 18:07:56 +01:00
c616ce99a8 Merge pull request #368 from tpayet/add-push-debpkg
Add publish action to gemfury for apt pkg
2019-12-05 15:35:12 +01:00
6b9b5fda7e Add publish action to gemfury for apt pkg 2019-12-05 14:54:57 +01:00
b756fc382a Merge pull request #367 from meilisearch/support-stdin-example
Allow users to send csv files from stdin in examples
2019-12-05 12:33:18 +01:00
29fd54dcfa Allow users to send csv files from stdin in examples 2019-12-05 12:23:56 +01:00
d664e97104 Merge pull request #365 from meilisearch/update-readme
Reorder "Deploy the server" options on the README
2019-12-04 18:37:40 +01:00
4466097d44 Update readme.md; Deploy part 2019-12-04 18:16:56 +01:00
60b94d2dc1 Merge pull request #366 from tpayet/cargo-deb
Add debian package in CI
2019-12-04 18:14:10 +01:00
51636402c2 Add debian package in CI 2019-12-04 18:02:30 +01:00
fc8182d7d3 Merge pull request #363 from meilisearch/bump-version
Bump meilisearch crates to v0.8.4
2019-12-03 17:30:31 +01:00
4f87465f18 Bump meilisearch crates to v0.8.4 2019-12-03 17:22:45 +01:00
5f1586ae85 Merge pull request #360 from meilisearch/fix-readme-broken-links
Fix README broken links
2019-12-02 19:10:40 +01:00
8d3161a2cf Reorder README parts 2019-12-02 18:29:53 +01:00
8bc8214279 Fix README broken links
Thanks to @baptistejamin!
2019-12-02 16:45:27 +01:00
3ea5aa18a2 Merge pull request #359 from bidoubiwa/fix_wording_in_readme
Fix bad wording in readme file
2019-12-02 14:06:49 +01:00
c4845b78a9 Fix bad wording in readme file 2019-12-02 11:15:39 +01:00
530e913e2f Merge pull request #356 from tpayet/fix-port-readme
Fix port in README & Dockerfile
2019-11-29 19:21:55 +01:00
5917f212ba Fix port in README & Dockerfile 2019-11-29 18:03:54 +01:00
d2b1690191 Merge pull request #355 from tpayet/master
Update binary default settings
2019-11-29 15:47:04 +01:00
710b7ea091 Update default listening port to 7700 2019-11-29 15:25:26 +01:00
089579d835 Update default database directory to working directory 2019-11-29 15:25:26 +01:00
7780293ddb Merge pull request #354 from meilisearch/camelcase-updates-result
Fix updates formattings and namings
2019-11-29 15:19:45 +01:00
773a51e7d0 Rename 'update_type' to 'type' on EnqueuedUpdateResult 2019-11-29 15:09:48 +01:00
7923752513 Serialize updates results to camelCase 2019-11-29 15:05:54 +01:00
9a48091b21 Merge pull request #353 from meilisearch/bump-version
Bump meilisearch crates to v0.8.3
2019-11-29 14:13:37 +01:00
30cb60f679 Bump meilisearch crates to v0.8.3 2019-11-29 14:06:17 +01:00
08687d8dab Merge pull request #351 from meilisearch/status-failed-updates-status
Add status failed on UpdateStatus
2019-11-28 18:53:31 +01:00
3a90233a3d Add status failed on UpdateStatus 2019-11-28 18:41:11 +01:00
32483cae2d Merge pull request #347 from curquiza/installation-script
Add script for binary installation
2019-11-28 18:34:58 +01:00
d7f28e0260 Add script for binary installation 2019-11-28 18:34:12 +01:00
9640c2aaa6 Merge pull request #349 from meilisearch/bump-version
Bump meilisearch crates to v0.8.2
2019-11-28 17:23:40 +01:00
9a2b4d08e1 Bump meilisearch crates to v0.8.2 2019-11-28 17:15:13 +01:00
e91615fe59 Merge pull request #348 from meilisearch/replace-isahc-by-ureq
Replace isahc by ureq
2019-11-28 17:14:32 +01:00
aed02b2e19 Remove many dependencies from the Dockerfile 2019-11-28 17:04:01 +01:00
83ad80d9db Replace isahc by ureq 2019-11-28 16:41:42 +01:00
abdb7793fb Merge pull request #345 from tpayet/readme_changes
Clarification of readme file
2019-11-28 16:35:44 +01:00
387eb3fde3 Clarification of readme file 2019-11-28 16:28:25 +01:00
e640bc90b4 Merge pull request #343 from meilisearch/explicit-index-clear
Change the update loop to be more explicit on index clear
2019-11-28 14:48:37 +01:00
3978378152 Merge pull request #344 from tpayet/patch-1
Update README license badge
2019-11-28 14:35:50 +01:00
61e3e4f0b9 Update README license badge 2019-11-28 14:28:30 +01:00
1def56ea11 Change the update loop to be more explicit on index clear 2019-11-27 13:43:28 +01:00
6d686ac14f Merge pull request #342 from meilisearch/update-lock
Update the lock file
2019-11-27 12:49:47 +01:00
641e0d15f5 Make sure the lock file is up to date 2019-11-27 12:06:14 +01:00
71b39426c0 Update the lock file 2019-11-27 12:01:22 +01:00
57584eaccc Merge pull request #341 from meilisearch/bump-version
Bump meilisearch crates to v0.8.1
2019-11-27 11:54:39 +01:00
f6fb31c531 Bump meilisearch crates to v0.8.1 2019-11-27 11:47:27 +01:00
0cea8ce5b5 Merge pull request #340 from meilisearch/separate-updates-kvstore
Separate the update and main databases
2019-11-27 11:39:14 +01:00
d08b76a323 Separate the update and main databases
We used the heed typed transaction to make it safe (https://github.com/Kerollmops/heed/pull/27).
2019-11-27 11:29:06 +01:00
86a87d6032 Merge pull request #339 from tpayet/action-docker-tag
Update action workflow for docker tagged image
2019-11-26 19:17:19 +01:00
e534929f80 Update action workflow for docker tagged image 2019-11-26 18:18:51 +01:00
fcc154da1c Merge pull request #336 from meilisearch/rename-to-meilisearch
Rename MeiliDB into MeiliSearch
2019-11-26 14:06:01 +01:00
00d1200704 Rename the meilisearch-http binary into meilisearch 2019-11-26 11:17:30 +01:00
7cc096e0a2 Rename MeiliDB into MeiliSearch 2019-11-26 11:12:30 +01:00
58eaf78dc4 Merge pull request #335 from tpayet/github-release-action
GitHub release action
2019-11-25 19:19:08 +01:00
3be2281483 Update workflows README 2019-11-25 18:14:21 +01:00
cc06d96993 Add gh actions to release binaries 2019-11-25 17:27:15 +01:00
93c7e700bc Merge pull request #333 from tpayet/update-dockerfile
Add meilihttp_addr env variable in docker build
2019-11-25 16:41:52 +01:00
97c6757fc7 Add meilihttp_addr env variable in docker build 2019-11-25 16:30:07 +01:00
276d3f8e22 Merge pull request #332 from meilisearch/jemalloc-only-on-linux
Make jemalloc only used on linux
2019-11-25 16:13:54 +01:00
4869a88ae2 Make jemalloc only used on linux 2019-11-25 15:35:13 +01:00
ae88bc31bc Merge pull request #331 from meilisearch/enable-jemalloc-linux-only
Enable jemalloc only on linux OSs
2019-11-25 14:59:56 +01:00
8aed1d96c5 Enable jemalloc only on linux OSs 2019-11-25 14:51:47 +01:00
c93949474c Merge pull request #330 from tpayet/fix-actions-badge-link
Update action badge link
2019-11-25 13:51:07 +01:00
8cf19f1c6b Update action badge link 2019-11-25 13:44:20 +01:00
a82ecb3cef Merge pull request #324 from tpayet/gh-actions
Replace Azure CI by Github Actions
2019-11-25 13:31:15 +01:00
04c2b37d82 Remove Azure CI
Add gh actions for cargo check using rust nightly

Add readme about actions workflows

Add basic Dockerfile

Add action workflow for docker publish

Change check action to test action

Update workflow readme without rust nightly

Rename test action file

Add gh actions to push latest docker image from master

Update github action for publish docker image

Add 2 steps dockerfile based on alpine

Update readme badges to match new CI
2019-11-25 13:20:54 +01:00
ab3e8d6537 Merge pull request #314 from meilisearch/fix-number-ord
Fix the ordering functions of the Number type
2019-11-22 15:14:05 +01:00
fd185a5e6b Add a test for the SorByAttr criterion 2019-11-22 15:04:23 +01:00
d9678f0040 Fix the ordering functions of the Number type 2019-11-22 14:44:02 +01:00
840217b111 Merge pull request #321 from meilisearch/fix-create-index
Fix index creation
2019-11-22 14:10:05 +01:00
9605a2cd88 Make possible to use a custom uid and simplify the usage 2019-11-22 14:01:00 +01:00
0f86ccc035 Index UID generation makes sure to not generate the same number 2019-11-22 14:01:00 +01:00
b3b73e2276 Merge pull request #323 from meilisearch/fix-index-deletion
Fix index deletion once again
2019-11-22 14:00:19 +01:00
f241c999ad Make the CI use rust stable 2019-11-22 13:47:29 +01:00
d4d2a2303a Fix a typo on timeout_ms used for multi index search 2019-11-22 13:47:29 +01:00
c8832409ad Fix the dead lock on index deletion once again 2019-11-22 13:47:29 +01:00
98f76aa952 Merge pull request #320 from meilisearch/send-amplitude-events
Add an Amplitude analysis loop tick
2019-11-22 10:52:29 +01:00
4236632af6 Add an amplitude analysis loop tick 2019-11-21 20:28:58 +01:00
e2c98244ec Merge pull request #313 from meilisearch/fix-dead-lock
Fix dead locks when deleting indexes
2019-11-21 12:42:40 +01:00
c1cf67c008 Join updates threads after dropping the indexes lock and avoid deadlocks 2019-11-21 12:01:46 +01:00
4abea919b2 Merge pull request #311 from meilisearch/add-index-name-and-id
Add index name and change some routes request body & response
2019-11-21 11:59:14 +01:00
d60aa722c0 Allow to update expireAt and revoked on token 2019-11-21 11:49:49 +01:00
055368acd8 Fix for review 2019-11-21 11:49:49 +01:00
7f2e5d091a Rename routes /synonym to /synonyms 2019-11-20 15:33:42 +01:00
c69ae8154f Allow to receive schema update formated as SchemaBuilder 2019-11-20 15:25:34 +01:00
cd95b243bb Add the update index route 2019-11-20 15:00:06 +01:00
1f1cb1f501 Rename browse_documents into get_all_documents and always respond HTTP Ok 2019-11-20 14:18:21 +01:00
530738cfe9 Format code 2019-11-20 14:12:12 +01:00
878dd6912e Return a HTTP 401 instead of 404 if token is not found 2019-11-20 14:06:56 +01:00
5f0f699f37 Move route to clear all synonyms on DELETE /synonyms 2019-11-20 14:03:55 +01:00
ca13900699 Add async routes should return ACCEPTED status code response 2019-11-20 14:03:19 +01:00
cc97889b37 Add stop-word is now PATCH method 2019-11-20 13:56:43 +01:00
45ded0498b Format code with cargo fmt 2019-11-20 11:45:23 +01:00
d01a3944c1 Add last_update information on global /stats route 2019-11-20 11:45:22 +01:00
a0caf0d6d7 Remove unused result response on indexes_uids function 2019-11-20 11:45:22 +01:00
e22debb994 Update index updated_at information at each update callback 2019-11-20 11:45:22 +01:00
1b8df0ed8b Remove last_update from stats 2019-11-20 11:45:22 +01:00
3286a5213c Move fields frequency from common store to index main store 2019-11-20 11:45:22 +01:00
394976d330 Update list_index route to return all index information, not only list of uid 2019-11-20 11:45:22 +01:00
b95acbece0 Function generate_uid return now lowercased uid 2019-11-20 11:45:22 +01:00
c94f4dff71 Do not return update_id on IndexCreateRespnse if it's none 2019-11-20 11:45:22 +01:00
e6465f4ea1 Create a new specific route for schema 2019-11-20 11:45:22 +01:00
2b3c91aabd Update get_index_schema to allow raw response 2019-11-20 11:45:22 +01:00
e97e13ce9f Rename index_name to index_uids 2019-11-20 11:45:22 +01:00
39e2b73718 Add updatedAt on main index store 2019-11-20 11:45:22 +01:00
a90facaa41 Rename index_name by index_uid 2019-11-20 11:45:22 +01:00
5527457655 Rewrite create_index route new path, body request and response 2019-11-20 11:45:21 +01:00
076e781810 Add name, created_at and updated_at informations into main index 2019-11-20 11:45:21 +01:00
750d336018 Bump Cargo.lock meili versions 2019-11-20 11:45:21 +01:00
e8251ad45b Merge pull request #310 from meilisearch/unify-crates-version
Unify the crates versions to 0.8.0
2019-11-20 11:05:54 +01:00
963ca1e2c7 Unify the crates versions to 0.8.0 2019-11-20 10:47:32 +01:00
12a6c7d54d Merge pull request #298 from bidoubiwa/add_ranked_movies_dataset
Create a dataset where the release_date is a numeric timestamp
2019-11-20 10:46:24 +01:00
2d0fc3f9d3 Create a dataset where the release_date is a numeric timestamp 2019-11-20 10:44:32 +01:00
e554784527 Merge pull request #309 from bidoubiwa/remove_stop_words_from_settings
Removed stop words from settings route
2019-11-19 18:35:27 +01:00
2cb43fa638 Removed stop words from settings route 2019-11-19 18:21:44 +01:00
66d5309a51 Merge pull request #308 from meilisearch/improve-structopt
Introduce better argument names
2019-11-19 18:09:44 +01:00
7eeedec7eb Bump meilidb-http to v0.3.0 2019-11-19 17:50:01 +01:00
4b798c71ae Introduce new arguments and understand env vars 2019-11-19 17:50:01 +01:00
685016bfec Bump meilidb-core to v0.7.0 and meilidb-http to v0.2.0 2019-11-18 15:49:23 +01:00
d30e5f6231 Merge pull request #299 from meilisearch/default-update-callbacks
Prefer using a global update callback common to all indexes
2019-11-18 15:05:21 +01:00
e854d67a55 Remove useless routes and checks 2019-11-18 14:41:49 +01:00
23a89732a5 Prefer using a global update callback common to all indexes 2019-11-18 14:41:49 +01:00
3a1f41ebdb Merge pull request #305 from meilisearch/fix-example
Make easier to interact with compacted databases
2019-11-17 20:31:06 +01:00
f873761a27 Make easier to interact with compacted databases 2019-11-17 20:01:02 +01:00
ebf620c7f9 Merge pull request #302 from meilisearch/fix-dataset-schema
Rename the movies dataset schema file
2019-11-17 17:17:33 +01:00
8b92bc3421 Rename the movies dataset schema file 2019-11-17 16:45:13 +01:00
70a5aa61e9 Merge pull request #301 from meilisearch/separate-types
Move the main types to a separate library
2019-11-17 12:45:25 +01:00
a76169042f Make the serde and zerocopy meilidb-types dependencies optional 2019-11-17 12:30:39 +01:00
c9c3cfcee9 Move the main types to a separate library 2019-11-17 12:19:36 +01:00
2e60ac5359 Merge pull request #300 from meilisearch/update-dependencies
Do not use a forked fst dependency
2019-11-17 12:19:08 +01:00
2dd7751e09 Disable the fst MemMap feature 2019-11-17 11:43:00 +01:00
26bdabcdec Do not use a forked fst dependency 2019-11-17 11:14:01 +01:00
fc8c7ed77e Merge pull request #297 from meilisearch/improve-highlights
Improve the highlight formatted outputs
2019-11-15 14:28:27 +01:00
521c96354f Improve the highlight formatted outputs 2019-11-15 14:16:21 +01:00
9788779894 Merge pull request #296 from meilisearch/update-readme
Update the README
2019-11-14 21:32:32 +01:00
9b965764ab Update the README 2019-11-14 19:09:04 +01:00
9a5a543311 Merge pull request #290 from curquiza/deploy-doc
Add information in documentation in Deploy Server part
2019-11-13 16:06:27 +01:00
b18fb868e8 Add information in documentation in Deploy Server part 2019-11-13 15:37:21 +01:00
c734af55c0 Merge pull request #289 from curquiza/status204-delete-index
Change the HTTP status code on index deletion
2019-11-13 15:33:27 +01:00
810b328ad2 Change the HTTP status code on index deletion 2019-11-13 15:14:23 +01:00
0a8039d8d8 Merge pull request #285 from bidoubiwa/remove_catching_same_index_creation
Change the error catching on the index creation route
2019-11-13 15:13:51 +01:00
e51704c09a Remove the error catching on the index creation route when the index already exist 2019-11-13 14:42:59 +01:00
623a9012d5 Merge pull request #279 from bidoubiwa/new_slogan_and_resume
Slogan and Resume proposition
2019-11-13 14:41:21 +01:00
b9a185634f Slogan and Resume proposition 2019-11-13 14:31:22 +01:00
b46889b5f0 Merge pull request #282 from meilisearch/fix-ci-artifacts
Add the meilidb-http binary to the artifacts
2019-11-13 11:39:00 +01:00
ef9a0c07db Add the meilidb-http binary to the artifacts 2019-11-13 11:15:39 +01:00
3a6f3947c9 Merge pull request #281 from meilisearch/fix-attributes-to-search-in
Take attributes to search in into account
2019-11-12 18:45:40 +01:00
5c5f41d755 Take attributes to search in into account 2019-11-12 18:35:58 +01:00
6803a8fad0 Merge pull request #280 from meilisearch/format-updates-json
Format updates json
2019-11-12 18:35:25 +01:00
8e4b362e4d Fixed the display of enqueued updates 2019-11-12 18:21:59 +01:00
acb5e624c6 Add enqueued and processed datetimes 2019-11-12 18:21:59 +01:00
a98949ff1d Improve updates JSON format 2019-11-12 16:57:22 +01:00
f355280250 Merge pull request #278 from meilisearch/mit-license
Change the license to an MIT one
2019-11-12 14:35:32 +01:00
cee8d6a8d9 Change the license to an MIT one 2019-11-12 14:24:28 +01:00
27326ea069 Merge pull request #277 from bidoubiwa/add_cmd_to_compile
Add cmd line to compile binary
2019-11-12 13:55:54 +01:00
7bbe5aca5b Add cmd line to compile binary 2019-11-12 10:57:03 +01:00
1c4afe6d0f Merge pull request #276 from meilisearch/support-slash-tokenizer
Add support for back/slashes
2019-11-11 21:46:14 +01:00
2d8f9a9849 Add support for back/slashes 2019-11-11 21:23:08 +01:00
3f41681b18 Merge pull request #274 from meilisearch/enable-env-logger
Add env logger to enable logging
2019-11-11 19:13:33 +01:00
64791815fa Add env logger to enable logging 2019-11-11 19:03:38 +01:00
8a36571a74 Merge pull request #272 from meilisearch/fix-long-words
Ignore words that are too long
2019-11-10 20:07:22 +01:00
d18e775bec Ignore words that are too long 2019-11-10 17:44:27 +01:00
78381f1818 Merge pull request #271 from meilisearch/update-dependencies
Update Dependencies
2019-11-10 11:17:09 +01:00
7f33a01ae1 Update dependencies 2019-11-10 11:04:56 +01:00
d07d14d33a Update crossbeam-channel to 0.4.0 2019-11-10 11:03:22 +01:00
540d7886ab Merge pull request #266 from meilisearch/update-readme
Update the readme and add a Quick Start section
2019-11-09 13:21:22 +01:00
5a5d10af52 Add an image description of the gif 2019-11-09 13:12:01 +01:00
f95d077ef8 Improve the README a little bit by adding a quick start section 2019-11-09 13:12:01 +01:00
05dd99936f Add a gif to show a demo using crates.io 2019-11-09 12:59:39 +01:00
c086625773 Merge pull request #269 from meilisearch/repo-became-binary
Make the repository be a binary and version the Cargo.lock
2019-11-09 12:58:52 +01:00
dc17bebf4a Make the repository be a binary and version the Cargo.lock 2019-11-09 12:13:28 +01:00
026464b2e4 Bump meilidb-core to v0.6.5 2019-11-06 11:52:34 +01:00
bd42158a70 Merge pull request #264 from meilisearch/index-soft-deletion
Index soft deletion
2019-11-06 11:51:50 +01:00
df066f4321 Introduce a new add or update documents PUT route 2019-11-06 11:42:41 +01:00
69832e8c70 Update the http index deletion route 2019-11-06 11:42:41 +01:00
95eb6ad09a Add a test to check index soft deletion works correctly 2019-11-06 11:02:30 +01:00
f3fc0bed45 Introduce index soft deletion 2019-11-06 11:02:30 +01:00
5dd6b697b9 Bump meilidb-core to v0.6.4 2019-11-05 18:46:16 +01:00
b7d170c7d1 Merge pull request #262 from meilisearch/fix-unidecoded-emojis
Fix an highlighting problem
2019-11-05 17:04:35 +01:00
7541172d12 Make the example show highlighted areas more explicitly 2019-11-05 16:40:48 +01:00
85bf5d113c Fix an highlighting problem when query was longer than original text 2019-11-05 16:40:34 +01:00
89fd397903 Bump meilidb-core to v0.6.3 2019-11-05 15:40:04 +01:00
d8392f2f18 Merge pull request #261 from meilisearch/partial-updates
Introduce the support of partial updates
2019-11-05 15:39:02 +01:00
36b74f0efe Introduce partial updates to the update system 2019-11-05 15:23:41 +01:00
68c0a36b00 Make the deserialization support correctly optional documents 2019-11-05 15:03:18 +01:00
a127b72a74 Merge pull request #259 from meilisearch/allow-add-schema-attributes-at-end
Allow to introduce attributes only at the end of a schema
2019-11-05 12:34:11 +01:00
5782fb9e52 Test the add of attributes only at the end of a schema 2019-11-05 12:09:52 +01:00
20319f7974 Allow to introduce attributes only at the end of a schema 2019-11-05 12:09:52 +01:00
c4087e2ec2 Merge pull request #258 from meilisearch/debug-schema
Implement a better debug for the schema
2019-11-05 11:35:02 +01:00
b1d1f2f627 Implement a better debug system for the schema 2019-11-05 11:21:07 +01:00
62fe6a8263 Merge pull request #257 from meilisearch/bump-version
Bump meilidb-core/tokenizer versions
2019-11-04 17:26:01 +01:00
d88c10f3b4 Bump meilidb-tokenizer to v0.6.1 2019-11-04 17:17:06 +01:00
00f49990c7 Bump meilidb-core to v0.6.2 2019-11-04 17:16:50 +01:00
89f30ad47e Merge pull request #256 from meilisearch/fix-tokenizer
Fix the tokenizer to make it work with unicode chars
2019-11-04 17:15:17 +01:00
3b1cbed238 Check that the unidecoded words are not empty 2019-11-04 17:03:11 +01:00
4571b80a49 Update the tests 2019-11-04 16:41:58 +01:00
de2b8672d4 Make the tokenizer understand strange whitespaces/quotes 2019-11-04 16:41:58 +01:00
ccded7b429 Improve the indexer to not not deunicode before indexing
Revert of #179
2019-11-04 16:41:58 +01:00
1d4e98410a Merge pull request #255 from meilisearch/bump-version
Bump meilidb-core to v0.6.1
2019-11-04 14:47:53 +01:00
e493b27ef1 Bump meilidb-core to v0.6.1 2019-11-04 14:22:08 +01:00
70589c136f Merge pull request #253 from meilisearch/fix-updates-system
Fix the updates system
2019-11-04 13:46:37 +01:00
1c3620a7d4 Add tests to the update system 2019-11-04 13:18:07 +01:00
c2cc0704d7 Clean up the update_awaiter function 2019-11-04 11:11:58 +01:00
2a50e08bb8 Moving to heed v0.5.0 2019-11-04 10:49:27 +01:00
6b326a45d7 Fix the update system to always consume updates even if failing 2019-10-31 17:44:13 +01:00
b73874bf24 Merge pull request #252 from meilisearch/examples-specify-index-name
Allow users to specify the index name to use with examples bins
2019-10-31 17:02:00 +01:00
95c8ad0f80 Allow users to specify the index name to use with examples bins 2019-10-31 16:20:31 +01:00
996763cc52 Merge pull request #251 from meilisearch/update-heed
Moving to heed 0.3.0
2019-10-31 16:20:07 +01:00
6a8171d335 Moving to heed 0.3.0 2019-10-31 16:11:02 +01:00
2f32586dab Merge pull request #250 from meilisearch/new-http-server
Introduce a brand new HTTP server
2019-10-31 16:07:52 +01:00
db898001eb Get rid of rust-crypto and uuid 2019-10-31 15:28:37 +01:00
c2a12b661a Make it a runnable server 2019-10-31 15:27:21 +01:00
f51c49db93 Introduce the HTTP tide based library 2019-10-31 15:02:34 +01:00
1be5b0f327 Bump the meili-core/schema/tokenizer crates to 0.6.0 2019-10-31 14:05:59 +01:00
a136c62208 Merge pull request #249 from meilisearch/display-all-updates
Display enqueued along with processed updates
2019-10-31 13:53:46 +01:00
cc461b1331 Display enqueued along with processed updates 2019-10-31 12:25:52 +01:00
dbe5363672 Merge pull request #248 from meilisearch/fix-highlight-too-long
Correctly highlight when query string is too long
2019-10-30 18:19:06 +01:00
45d4361e7d Correctly highlight when query string is longer 2019-10-30 17:49:50 +01:00
b28c44cc6b Merge pull request #247 from meilisearch/bump-meilidb
Bump the meili-core/schema/tokenizer crates to 0.5.11
2019-10-30 17:48:26 +01:00
b709a7a30a Bump the meili-core/schema/tokenizer crates to 0.5.11 2019-10-30 17:40:31 +01:00
64c25bdb40 Merge pull request #246 from meilisearch/better-highlighting-area
Make the highlight system much better
2019-10-30 17:39:12 +01:00
c230f244be Make the highlight system much better 2019-10-30 17:32:29 +01:00
02af4ff113 Merge pull request #245 from meilisearch/reindex-all-documents-reduce-memory-usage
Reduce the ram consumption when re-indexing all the documents
2019-10-29 17:54:47 +01:00
4dff8a215e Reduce the ram consumption when re-indexing all the documents 2019-10-29 17:46:23 +01:00
41065305aa Merge pull request #244 from meilisearch/reintroduce-stop-words
Reintroduce stop words
2019-10-29 16:35:03 +01:00
e9dce3ce81 Add a test to ensure that the indexer support stop words 2019-10-29 16:18:06 +01:00
ff7dde7522 Make the RawIndexer support stop words 2019-10-29 16:18:06 +01:00
a226fd23c3 Introduce the stop words deletion update type 2019-10-29 16:18:06 +01:00
776673ebae Introduce the stop words addition update type 2019-10-29 15:24:09 +01:00
32d2cc3aea Merge pull request #243 from meilisearch/all-updates-results
Introduce a function to get all updates results
2019-10-29 11:45:55 +01:00
8a17fcdda5 Introduce a function to get all updates results 2019-10-29 11:37:40 +01:00
9602d7a960 Merge pull request #242 from meilisearch/accept-dup-documents
Make documents additions accept only the last duplicate document
2019-10-28 20:52:40 +01:00
ac12a4b9c9 Make documents additions accept only the last duplicate document 2019-10-28 20:40:33 +01:00
af96050944 Merge pull request #241 from meilisearch/fix-dead-locks
Fix dead locks
2019-10-28 18:20:01 +01:00
a43b37dfc1 Send channel notification when clearing documents 2019-10-28 17:58:22 +01:00
c08dcac1d4 Abort the update transaction before calling the update callback 2019-10-28 17:55:43 +01:00
a17dccd84e Merge pull request #237 from meilisearch/fix-exactness-criterion
Fix the exactness criterion algorithm
2019-10-26 18:43:10 +02:00
9a57cab3ee Fix the exactness criterion algorithm 2019-10-26 18:34:40 +02:00
751b060320 Merge pull request #238 from meilisearch/improve-highlighting
Only highlight query words areas not the whole words
2019-10-26 18:23:19 +02:00
4111b99a6d Only highlight query words areas not the whole words 2019-10-26 15:56:34 +02:00
d6fb2b56d1 Merge pull request #236 from meilisearch/reorder-automatons
Make sure that automatons group with more automatons are better
2019-10-24 15:29:16 +02:00
cb5c77e536 Make sure that automatons group with more automatons are better 2019-10-24 15:18:53 +02:00
44c89b1ea2 Merge pull request #235 from meilisearch/readme-concat-split-query-words
Add information about search concat and split query words support
2019-10-23 18:20:59 +02:00
26a285053b Add information about search concat and split query words support 2019-10-23 18:19:15 +02:00
1446a6a2d2 Merge pull request #234 from meilisearch/clear-all-update-variant
Introduce a clear all documents update
2019-10-23 16:45:37 +02:00
047eba3ff3 Introduce a clear all documents update 2019-10-23 16:39:10 +02:00
8d9d183ce6 Merge pull request #233 from meilisearch/commit-when-update-ok
Commit an update only when it is Ok
2019-10-23 16:07:48 +02:00
eb67195840 Commit an update only when it is Ok 2019-10-23 15:52:40 +02:00
93306c2326 Merge pull request #232 from meilisearch/support-splitted-words
Support splitted words
2019-10-23 13:38:16 +02:00
7d9cf8d713 Clean up the fetch algorithm 2019-10-23 12:06:21 +02:00
03eb7898e7 Introduce a basic working version of phrase query for splitting words 2019-10-23 11:40:13 +02:00
0fbd4cd632 Merge pull request #231 from meilisearch/recursive-object-indexing
Make possible to convert recursive object into strings
2019-10-22 16:20:10 +02:00
858bf359b8 Make possible to convert recursive object into strings 2019-10-22 16:02:02 +02:00
5dc8465ebd Merge pull request #181 from meilisearch/diff-schema
Make possible to update an index schema
2019-10-22 14:23:43 +02:00
0f30a221fa Introduce the reindex_all_documents indexing function 2019-10-22 14:07:27 +02:00
e86a547e93 Introduce a basic schema diff function 2019-10-21 17:57:32 +02:00
32d8b4b83f Merge pull request #230 from meilisearch/moving-to-heed
Move to heed 0.1.0
2019-10-21 13:34:06 +02:00
78535b3e33 Move to heed 0.1.0 2019-10-21 12:05:53 +02:00
6c9a238973 Merge pull request #229 from meilisearch/cargo-fmt-clippy
Cargo pass of fmt and clippy
2019-10-18 13:50:30 +02:00
cf5e228288 Update the CI to check the fmt and clippy 2019-10-18 13:33:38 +02:00
9dce41ed6b Cargo clippy pass 2019-10-18 13:30:06 +02:00
ca26a0f2e4 Cargo fmt pass 2019-10-18 13:30:06 +02:00
47d777c8f7 Merge pull request #228 from meilisearch/copy-and-compact-db
Introduce a function to copy and compact a database env
2019-10-18 13:21:55 +02:00
2ef51f7df9 Introduce a function to copy and compact a database env 2019-10-18 12:56:56 +02:00
2d7db2a80f Merge pull request #227 from meilisearch/damerau-distance-cost-1
Make the levenshtein algorithm consider transpositions to cost 1
2019-10-18 10:46:42 +02:00
526202ec8b Make the levenshtein algorithm consider transpositions to cost 1 2019-10-17 18:07:15 +02:00
86ab729356 Merge pull request #226 from meilisearch/fix-rotxn-number-documents
Use a read-only transaction to retrieve the number of documents
2019-10-17 17:39:56 +02:00
dd74af4c70 Use an RoTxn to retrieve the number of documents 2019-10-17 17:30:54 +02:00
b79a8457f9 Merge pull request #225 from meilisearch/improve-query-builder-pattern
Rework the QueryBuilder to make it easier to construct and use
2019-10-17 15:59:38 +02:00
d941c512db Rework the QueryBuilder to make it easier to construct and use 2019-10-17 14:45:21 +02:00
0ff73039e5 Merge pull request #224 from meilisearch/improve-automaton-producer
Improve the automaton producer
2019-10-17 13:51:44 +02:00
2ea3e9b081 Improve the automaton producer quality by changing the production order 2019-10-17 13:19:08 +02:00
da71821204 Make the example take the fetch-timeout-ms argument into account 2019-10-17 13:19:08 +02:00
16f0914f09 Merge pull request #223 from meilisearch/fix-update-serialization
Fix updates serialization to use serde_json instead of bincode
2019-10-17 13:05:25 +02:00
1cf6afad9a Fix updates serialization to use serde_json instead of bincode 2019-10-17 12:31:46 +02:00
261c21b057 Merge pull request #222 from meilisearch/update-readme
Update the README
2019-10-16 18:22:09 +02:00
925a22b644 Update the README 2019-10-16 18:04:45 +02:00
dc5c42821e Merge pull request #221 from meilisearch/zerocopy-lmdb
Moving to zerocopy-lmdb
2019-10-16 17:27:21 +02:00
1667e1b32f Move to zerocopy-lmdb 2019-10-16 17:12:55 +02:00
c332c7bc70 Merge pull request #220 from meilisearch/all-documents-fields-iter
Introduce an Iterator to visit all documents attributes counts
2019-10-15 15:42:30 +02:00
5e8d432614 Introduce an Iterator to visit all documents attributes counts 2019-10-15 15:27:18 +02:00
f6282ca031 Merge pull request #219 from meilisearch/current-update-id
Introduce an Index mathod to retrieve the currently processed update
2019-10-15 15:26:22 +02:00
3278d22279 Introduce an Index mathod to retrieve the currently processed update 2019-10-15 14:54:52 +02:00
c9618793e3 Merge pull request #218 from meilisearch/update-readme
Change the README to refer to LMDB instead of RocksDB
2019-10-15 11:40:10 +02:00
1ef785a9ef Change the README to refer to LMDB instead of RocksDB 2019-10-15 11:39:49 +02:00
fdc98f9ef3 Merge pull request #217 from meilisearch/improve-exactness-criterion
Improve the exactness criterion
2019-10-15 11:37:33 +02:00
0de37819b4 Simplify the document fields counts deletion 2019-10-15 11:17:23 +02:00
9ff92c5d15 Update the exact criterion to use the documents fields counts 2019-10-14 18:48:54 +02:00
e629f51af4 Use the documents_fileds_count store in the QueryBuilder 2019-10-14 18:48:32 +02:00
b377003192 Compute and store the number of words in documents fields 2019-10-14 14:07:10 +02:00
a7e40a78c1 Introduce the DocumentsFieldsCounts store 2019-10-14 14:06:34 +02:00
9cdda8c46a Make the RawIndexer index_text method return the number of words 2019-10-14 13:56:52 +02:00
b7ea812dcc Merge pull request #216 from meilisearch/get-ride-of-messagepack
Get ride of rust messagepack (rmp)
2019-10-11 16:41:37 +02:00
710ab2386c Get ride of rust messagepack (rmp) 2019-10-11 16:17:37 +02:00
81bf6d583d Merge pull request #214 from meilisearch/add-customs-updates
Add customs updates
2019-10-11 15:42:08 +02:00
02575a2ef6 Introduce customs updates 2019-10-11 15:33:35 +02:00
da6ab2753e Rename Update/Type SchemaUpdate into Schema 2019-10-11 13:49:17 +02:00
97de72de83 Merge pull request #213 from meilisearch/do-not-commit-ourselves
Do not commit updates, let the user do
2019-10-11 11:51:51 +02:00
12b80e08be Do not commit updates, let the user do 2019-10-11 11:29:47 +02:00
4b130fa2e5 Merge pull request #212 from meilisearch/fix-documents-ids-iter
Fix the DocumentsIdsIter and do not iter on an Option
2019-10-10 18:43:01 +02:00
9dca18f966 Fix the DocumentsIdsIter and do not iter on an Option 2019-10-10 18:32:22 +02:00
543b65b09b Merge pull request #211 from meilisearch/fix-documents-deletion-generic-param
Reemove the useless generic documents_deletion parameter
2019-10-10 17:09:49 +02:00
9eb27811b1 Remove the useless generic documents_deletion parameter 2019-10-10 16:16:53 +02:00
7c3d93e5da Merge pull request #210 from meilisearch/query-builder-with-criteria
Rename main_store into common_store
2019-10-10 15:40:56 +02:00
485480560a Add method to create a query builder along with criterion 2019-10-10 15:32:08 +02:00
0ac927794a Merge pull request #209 from meilisearch/rename-main-to-common-index
Rename main_store into common_store
2019-10-10 15:31:25 +02:00
e09d3b654d Rename main_store into common_store 2019-10-10 15:22:23 +02:00
c5af5de4f0 Merge pull request #208 from meilisearch/improve-open-or-create-index
Create two open and create index functions
2019-10-10 13:59:08 +02:00
19c22a8c5e Create two open and create index functions 2019-10-10 13:48:30 +02:00
0103c7bfd9 Merge pull request #207 from meilisearch/improve-documents-ids-iter
Improve the DocumentsIdsIter internal
2019-10-10 13:48:13 +02:00
7b26bd88c0 Improve the DocumentsIdsIter internal 2019-10-10 13:40:18 +02:00
da0168bd82 Merge pull request #206 from meilisearch/get-documents-ids
Introduce the DocumentsIds iterator
2019-10-10 10:54:21 +02:00
d1e59be46b Introduce the DocumentsIds iterator 2019-10-10 10:35:57 +02:00
9774db6011 Merge pull request #205 from meilisearch/expose-types
Expose the UpdateType
2019-10-10 10:35:42 +02:00
46c19dfc5a Expose the UpdateType 2019-10-10 10:24:41 +02:00
9ed6752573 Merge pull request #204 from meilisearch/optional-query-builder-timeout
Make the timeout QueryBuilder setting optional to and pass the tests
2019-10-09 18:17:52 +02:00
d8fdad1455 Make the timeout QueryBuilder setting optional to and pass the tests 2019-10-09 17:59:31 +02:00
f56636e1e9 Merge branch 'moving-to-lmdb' 2019-10-09 17:23:48 +02:00
03599f1fc9 Reintroduce the deep-dive and typos-ranking-rules explanations documents 2019-10-09 16:57:27 +02:00
be78ecbf9a Update the README to recall about LMDB 2019-10-09 16:55:07 +02:00
ba2b04ca89 Update ci with rust nightly only 2019-10-09 16:47:25 +02:00
121399f336 Add a movies example dataset to the repository 2019-10-09 16:46:11 +02:00
3fded51534 Update the README file to reflect the current repository 2019-10-09 16:46:11 +02:00
8f63ec39da Unrestrict static lifetime of Criterion names 2019-10-09 16:15:31 +02:00
5a1c1aeb02 Reintroduce the sort-by-attr criterion 2019-10-09 16:08:30 +02:00
6ec575f8de Use a buffered sync channel to avoid blocking the update system 2019-10-09 15:49:35 +02:00
683b6afbfb Introduce a way to filter documents with a basic syntax 2019-10-09 14:20:37 +02:00
663714bb6d Make the example return documents field in a consistent order 2019-10-09 13:48:33 +02:00
bb35ca0d40 Reintroduce the distinct and filtering of documents 2019-10-09 13:44:18 +02:00
5f3072e67e Support a basic update callback system 2019-10-09 11:45:19 +02:00
2a4707d51e Expose a function to be able to now the status of an update 2019-10-08 17:35:47 +02:00
6534a9ec1d Clean up many warning messages 2019-10-08 17:31:07 +02:00
0a5ad4db06 Move the push update functions to their related modules 2019-10-08 17:24:11 +02:00
6ee0d72c7b Expose the synonyms operation updates on the Index 2019-10-08 17:18:22 +02:00
ba32ce21d0 Introduce synonyms deletions updates 2019-10-08 17:16:48 +02:00
0e224efa46 Introduce synonyms additions updates 2019-10-08 17:06:56 +02:00
175461c13a Port all tests to the TempDatabase struct 2019-10-08 16:16:30 +02:00
c514692233 Introduce the TempDatabase in the QueryBuilder tests 2019-10-08 15:22:36 +02:00
d8d0442d63 Fix many indexing and searching related bugs 2019-10-08 14:56:14 +02:00
2236ebbd42 Introduce an example file to test indexing and searching csv documents 2019-10-08 14:48:48 +02:00
0bfba3e4ba Introduce a query_builder method on Index 2019-10-07 17:55:46 +02:00
a57a64823e Make possible to create an index and add a schema later on 2019-10-07 17:48:26 +02:00
aa05459e4f Introduce a background thread that manage updates to do 2019-10-07 16:16:04 +02:00
0615c5c52d Consume updates in the order of insertion 2019-10-07 15:00:28 +02:00
487411340a Prefix all the store names to avoid colliding with main stores 2019-10-07 10:56:55 +02:00
5139dc7f3e Let the caller commit/abort the operation 2019-10-07 10:52:45 +02:00
88d0d3931c Store the schema in the main index 2019-10-04 17:49:13 +02:00
df2ef8d2e1 Introduce update_task, popping an update and pushing the result of it 2019-10-04 17:49:13 +02:00
29229b2137 Remove the update from the database when popped out 2019-10-04 17:16:34 +02:00
851cc38216 Introduce the Database struct to manage indexes 2019-10-04 16:49:17 +02:00
effbbc7370 Load the indexes at startup 2019-10-04 13:26:33 +02:00
08e3f23408 Add the meilidb-schema/tokenizer projects 2019-10-04 10:29:44 +02:00
62a0aefe44 Make the project be a workspace 2019-10-04 10:26:32 +02:00
3476939b7e Prefer using the impl syntax 2019-10-04 10:21:09 +02:00
38e474deaf Introduce the MResult type 2019-10-03 17:33:15 +02:00
00c70d3cb5 Make the UpdatesResults store work 2019-10-03 16:54:37 +02:00
af9fd9f552 Make the Updates store work 2019-10-03 16:39:30 +02:00
0a731973b9 Made many stores do their jobs 2019-10-03 16:13:14 +02:00
c4bd13bcdf Introduce many SingleStore wrappers 2019-10-03 15:04:11 +02:00
a5bfbf244c Introduce the documents Deserializer 2019-10-03 11:49:13 +02:00
39e0d9fc4a Introduce a basically working rkv based MeiliDB 2019-10-02 17:35:18 +02:00
905bc5c1a6 Initial commit 2019-10-02 17:35:05 +02:00
0f395d43a0 Merge pull request #201 from meilisearch/updates-ids-api
Add more methods for updates process
2019-09-26 16:08:22 +02:00
0b5b7b0bf1 feat: add a method to get the current processed update id & next updates in queue 2019-09-26 15:50:16 +02:00
57dd679026 Merge pull request #199 from meilisearch/fix-soft-hard-separator
Do not consider underscores and middle dash hard separators
2019-09-24 23:09:38 +02:00
cdd69290c3 test: Make the tests work with new separator limits 2019-09-24 20:49:42 +02:00
175b3dcb75 fix: Do not consider underscores and middle dash hard 2019-09-24 20:14:20 +02:00
ca818e12a9 Merge pull request #198 from meilisearch/split-by-underscore
Support underscores and colon as split characters
2019-09-24 14:16:02 +02:00
6b9426a051 feat: Support underscore as a split character 2019-09-24 13:56:32 +02:00
cee5e50857 Merge pull request #197 from meilisearch/log-info-to-trace
Change logs in query_builder from info! to trace!
2019-09-24 13:48:46 +02:00
3fe346101b chore: change logs in query_builder from info! to trace! 2019-09-24 13:35:46 +02:00
185 changed files with 141656 additions and 7603 deletions

5
.dockerignore Normal file
View File

@ -0,0 +1,5 @@
target
Dockerfile
.dockerignore
.git
.gitignore

18
.github/workflows/README.md vendored Normal file
View File

@ -0,0 +1,18 @@
# GitHub actions workflow for MeiliDB
> **Note:**
> - We do not use [cache](https://github.com/actions/cache) yet but we could use it to speed up CI
## Workflow
- On each pull request, we are triggering `cargo test`.
- On each tag, we are building:
- the tagged docker image
- the binaries for MacOS, Ubuntu, and Windows
- the debian package
- On each stable release, we are build the latest docker image.
## Problems
- We do not test on Windows because we are unable to make it work, there is a disk space problem.

View File

@ -0,0 +1,16 @@
name: Check if the CHANGELOG.md has been updated
on: [pull_request]
jobs:
check:
name: Test on ${{ matrix.os }}
if: ${{ !contains(github.event.pull_request.labels.*.name, 'ignore-changelog') }}
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2
- name: Checking the CHANGELOG.md has been updated in this PR
run: |
set -e
git fetch origin ${{ github.base_ref }}
git diff --name-only origin/${{ github.base_ref }} | grep -q CHANGELOG.md

87
.github/workflows/publish-binaries.yml vendored Normal file
View File

@ -0,0 +1,87 @@
name: Publish binaries to GitHub release
on:
push:
tags:
- '*'
jobs:
publish:
name: Publish for ${{ matrix.os }}
runs-on: ${{ matrix.os }}
strategy:
matrix:
os: [ubuntu-latest, macos-latest, windows-latest]
include:
- os: ubuntu-latest
artifact_name: meilisearch
asset_name: meilisearch-linux-amd64
- os: macos-latest
artifact_name: meilisearch
asset_name: meilisearch-macos-amd64
- os: windows-latest
artifact_name: meilisearch
asset_name: meilisearch-windows-amd64
steps:
- uses: hecrj/setup-rust-action@master
with:
rust-version: stable
- uses: actions/checkout@v1
- name: Build
run: cargo build --release --locked
- name: Upload binaries to release
uses: svenstaro/upload-release-action@v1-release
with:
repo_token: ${{ secrets.PUBLISH_TOKEN }}
file: target/release/${{ matrix.artifact_name }}
asset_name: ${{ matrix.asset_name }}
tag: ${{ github.ref }}
publish-armv7:
name: Publish for ARMv7
runs-on: ubuntu-18.04
steps:
- uses: actions/checkout@v1.0.0
- uses: uraimo/run-on-arch-action@v1.0.7
id: runcmd
with:
architecture: armv7
distribution: ubuntu18.04
run: |
apt update
apt install -y curl gcc make
curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y --profile minimal --default-toolchain stable
source $HOME/.cargo/env
cargo build --release --locked
- name: Upload the binary to release
uses: svenstaro/upload-release-action@v1-release
with:
repo_token: ${{ secrets.PUBLISH_TOKEN }}
file: target/release/meilisearch
asset_name: meilisearch-linux-armv7
tag: ${{ github.ref }}
publish-armv8:
name: Publish for ARMv8
runs-on: ubuntu-18.04
steps:
- uses: actions/checkout@v1.0.0
- uses: uraimo/run-on-arch-action@v1.0.7
id: runcmd
with:
architecture: aarch64 # aka ARMv8
distribution: ubuntu18.04
run: |
apt update
apt install -y curl gcc make
curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y --profile minimal --default-toolchain stable
source $HOME/.cargo/env
cargo build --release --locked
- name: Upload the binary to release
uses: svenstaro/upload-release-action@v1-release
with:
repo_token: ${{ secrets.PUBLISH_TOKEN }}
file: target/release/meilisearch
asset_name: meilisearch-linux-armv8
tag: ${{ github.ref }}

View File

@ -0,0 +1,39 @@
name: Publish deb pkg to GitHub release & apt repository & Homebrew
on:
push:
tags:
- 'v[0-9]+.[0-9]+.[0-9]+'
jobs:
debian:
name: Publish debian packagge
runs-on: ubuntu-latest
steps:
- uses: hecrj/setup-rust-action@master
with:
rust-version: stable
- name: Install cargo-deb
run: cargo install cargo-deb
- uses: actions/checkout@v1
- name: Build deb package
run: cargo deb -p meilisearch-http -o target/debian/meilisearch.deb
- name: Upload debian pkg to release
uses: svenstaro/upload-release-action@v1-release
with:
repo_token: ${{ secrets.GITHUB_TOKEN }}
file: target/debian/meilisearch.deb
asset_name: meilisearch.deb
tag: ${{ github.ref }}
- name: Upload debian pkg to apt repository
run: curl -F package=@target/debian/meilisearch.deb https://${{ secrets.GEMFURY_PUSH_TOKEN }}@push.fury.io/meilisearch/
homebrew:
name: Bump Homebrew formula
runs-on: ubuntu-latest
steps:
- uses: mislav/bump-homebrew-formula-action@v1
with:
formula-name: meilisearch
env:
COMMITTER_TOKEN: ${{ secrets.HOMEBREW_COMMITTER_TOKEN }}

View File

@ -0,0 +1,19 @@
---
on:
push:
tags:
- 'v[0-9]+.[0-9]+.[0-9]+'
name: Publish latest image to Docker Hub
jobs:
build:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v1
- name: Publish to Registry
uses: elgohr/Publish-Docker-Github-Action@master
with:
name: getmeili/meilisearch
username: ${{ secrets.DOCKER_USERNAME }}
password: ${{ secrets.DOCKER_PASSWORD }}

View File

@ -0,0 +1,20 @@
---
on:
push:
tags:
- '*'
name: Publish tagged image to Docker Hub
jobs:
build:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v1
- name: Publish to Registry
uses: elgohr/Publish-Docker-Github-Action@master
with:
name: getmeili/meilisearch
username: ${{ secrets.DOCKER_USERNAME }}
password: ${{ secrets.DOCKER_PASSWORD }}
tag_names: true

25
.github/workflows/test.yml vendored Normal file
View File

@ -0,0 +1,25 @@
---
on: [pull_request]
name: Test binaries with cargo test
jobs:
check:
name: Test on ${{ matrix.os }}
runs-on: ${{ matrix.os }}
strategy:
matrix:
os: [ubuntu-latest, macos-latest]
steps:
- uses: actions/checkout@v1
- uses: actions-rs/toolchain@v1
with:
profile: minimal
toolchain: stable
override: true
- name: Run cargo test
uses: actions-rs/cargo@v1
with:
command: test
args: --locked --release

10
.gitignore vendored
View File

@ -1,8 +1,8 @@
/target
/Cargo.lock
meilidb/Cargo.lock
meilidb-core/Cargo.lock
**/*.rs.bk
meilisearch-core/target
**/*.csv
**/*.json_lines
**/*.rdb
**/*.rs.bk
/*.mdb
/query-history.txt
/data.ms

13
CHANGELOG.md Normal file
View File

@ -0,0 +1,13 @@
## v0.10
- Refined filtering (#592)
- Add the number of hits in search result (#541)
- Add support for aligned crop in search result (#543)
- Sanitize the content displayed in the web interface (#539)
- Add support of nested null, boolean and seq values (#571 and #568, #574)
- Fixed the core benchmark (#576)
- Publish an ARMv7 and ARMv8 binaries on releases (#540 and #581)
- Fixed a bug where the result of the update status after the first update was empty (#542)
- Fixed a bug where stop words were not handled correctly (#594)
- Fix CORS issues (#602)
- Support wildcard on attributes to retrieve, highlight, and crop (#549, #565, and #598)

2596
Cargo.lock generated Normal file

File diff suppressed because it is too large Load Diff

View File

@ -1,10 +1,10 @@
[workspace]
members = [
"meilidb",
"meilidb-core",
"meilidb-data",
"meilidb-schema",
"meilidb-tokenizer",
"meilisearch-core",
"meilisearch-http",
"meilisearch-schema",
"meilisearch-tokenizer",
"meilisearch-types",
]
[profile.release]

27
Dockerfile Normal file
View File

@ -0,0 +1,27 @@
# Compile
FROM alpine:3.10 AS compiler
RUN apk update --quiet
RUN apk add curl
RUN apk add build-base
RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y
WORKDIR /meilisearch
COPY . .
ENV RUSTFLAGS="-C target-feature=-crt-static"
RUN $HOME/.cargo/bin/cargo build --release
# Run
FROM alpine:3.10
RUN apk update --quiet
RUN apk add libgcc
COPY --from=compiler /meilisearch/target/release/meilisearch .
ENV MEILI_HTTP_ADDR 0.0.0.0:7700
CMD ./meilisearch

View File

@ -1,6 +1,6 @@
MIT License
Copyright (c) 2018 Clément Renault
Copyright (c) 2019-2020 Meili SAS
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal

241
README.md
View File

@ -1,82 +1,217 @@
# MeiliDB
<p align="center">
<img src="assets/logo.svg" alt="MeiliSearch" width="200" height="200" />
</p>
[![Build Status](https://dev.azure.com/thomas0884/thomas/_apis/build/status/meilisearch.MeiliDB?branchName=master)](https://dev.azure.com/thomas0884/thomas/_build/latest?definitionId=1&branchName=master)
[![dependency status](https://deps.rs/repo/github/Kerollmops/MeiliDB/status.svg)](https://deps.rs/repo/github/Kerollmops/MeiliDB)
[![License](https://img.shields.io/github/license/Kerollmops/MeiliDB.svg)](https://github.com/Kerollmops/MeiliDB)
[![Rust 1.31+](https://img.shields.io/badge/rust-1.31+-lightgray.svg)](
https://www.rust-lang.org)
<h1 align="center">MeiliSearch</h1>
A _full-text search database_ using a key-value store internally.
<h4 align="center">
<a href="https://www.meilisearch.com">Website</a> |
<a href="https://blog.meilisearch.com">Blog</a> |
<a href="https://fr.linkedin.com/company/meilisearch">LinkedIn</a> |
<a href="https://twitter.com/meilisearch">Twitter</a> |
<a href="https://docs.meilisearch.com">Documentation</a> |
<a href="https://docs.meilisearch.com/resources/faq.html">FAQ</a>
</h4>
<p align="center">
<a href="https://github.com/meilisearch/MeiliSearch/actions"><img src="https://github.com/meilisearch/MeiliSearch/workflows/Cargo%20test/badge.svg" alt="Build Status"></a>
<a href="https://deps.rs/repo/github/meilisearch/MeiliSearch"><img src="https://deps.rs/repo/github/meilisearch/MeiliSearch/status.svg" alt="Dependency status"></a>
<a href="https://github.com/meilisearch/MeiliSearch/blob/master/LICENSE"><img src="https://img.shields.io/badge/license-MIT-informational" alt="License"></a>
<a href="https://slack.meilisearch.com"><img src="https://img.shields.io/badge/slack-MeiliSearch-blue.svg?logo=slack" alt="Slack"></a>
</p>
<p align="center">⚡ Lightning Fast, Ultra Relevant, and Typo-Tolerant Search Engine 🔍</p>
**MeiliSearch** is a powerful, fast, open-source, easy to use and deploy search engine. Both searching and indexing are highly customizable. Features such as typo-tolerance, filters, and synonyms are provided out-of-the-box.
For more information about features go to [our documentation](https://docs.meilisearch.com/).
<p align="center">
<a href="https://crates.meilisearch.com"><img src="assets/crates-io-demo.gif" alt="crates.io demo gif" /></a>
</p>
> Meili helps the Rust community find crates on [crates.meilisearch.com](https://crates.meilisearch.com)
## Features
* Search as-you-type experience (answers < 50 milliseconds)
* Full-text search
* Typo tolerant (understands typos and miss-spelling)
* Supports Kanji
* Supports Synonym
* Easy to install, deploy, and maintain
* Whole documents are returned
* Highly customizable
* RESTful API
- Provides [6 default ranking criteria](https://github.com/meilisearch/MeiliDB/blob/3d85cbf0cfa3a3103cf1e151a75a443719cdd5d7/meilidb-core/src/criterion/mod.rs#L95-L101) used to [bucket sort](https://en.wikipedia.org/wiki/Bucket_sort) documents
- Accepts [custom criteria](https://github.com/meilisearch/MeiliDB/blob/3d85cbf0cfa3a3103cf1e151a75a443719cdd5d7/meilidb-core/src/criterion/mod.rs#L22-L29) and can apply them in any custom order
- Support [ranged queries](https://github.com/meilisearch/MeiliDB/blob/3d85cbf0cfa3a3103cf1e151a75a443719cdd5d7/meilidb-core/src/query_builder.rs#L146), useful for paginating results
- Can [distinct](https://github.com/meilisearch/MeiliDB/blob/3d85cbf0cfa3a3103cf1e151a75a443719cdd5d7/meilidb-core/src/query_builder.rs#L68) and [filter](https://github.com/meilisearch/MeiliDB/blob/3d85cbf0cfa3a3103cf1e151a75a443719cdd5d7/meilidb-core/src/query_builder.rs#L57) returned documents based on context defined rules
- Can store complete documents or only [user schema specified fields](https://github.com/meilisearch/MeiliDB/blob/3d85cbf0cfa3a3103cf1e151a75a443719cdd5d7/examples/movies/schema-movies.toml)
- The [default tokenizer](https://github.com/meilisearch/MeiliDB/blob/3d85cbf0cfa3a3103cf1e151a75a443719cdd5d7/meilidb-tokenizer/src/lib.rs#L99) can index latin and kanji based languages
- Returns [the matching text areas](https://github.com/meilisearch/MeiliDB/blob/3d85cbf0cfa3a3103cf1e151a75a443719cdd5d7/meilidb-core/src/lib.rs#L117-L120), useful to highlight matched words in results
- Accepts query time search config like the [searchable fields](https://github.com/meilisearch/MeiliDB/blob/3d85cbf0cfa3a3103cf1e151a75a443719cdd5d7/meilidb-core/src/query_builder.rs#L79)
- Supports run time indexing (incremental indexing)
## Get started
### Deploy the Server
#### Run it using Docker
It uses [RocksDB](https://github.com/facebook/rocksdb) as the internal key-value store. The key-value store allows us to handle updates and queries with small memory and CPU overheads. The whole ranking system is [data oriented](https://github.com/meilisearch/MeiliDB/issues/82) and provides great performances.
```bash
docker run -p 7700:7700 -v $(pwd)/data.ms:/data.ms getmeili/meilisearch
```
You can [read the deep dive](deep-dive.md) if you want more information on the engine, it describes the whole process of generating updates and handling queries or you can take a look at the [typos and ranking rules](typos-ranking-rules.md) if you want to know the default rules used to sort the documents.
#### Installing with Homebrew
We will be proud if you submit issues and pull requests. You can help to grow this project and start contributing by checking [issues tagged "good-first-issue"](https://github.com/meilisearch/MeiliDB/issues?q=is%3Aissue+is%3Aopen+label%3A%22good+first+issue%22). It is a good start!
```bash
brew update && brew install meilisearch
meilisearch
```
The project is only a library yet. It means that there is no binary provided yet. To get started, you can check the examples wich are made to work with the data located in the `misc/` folder.
#### Installing with APT
MeiliDB will be a binary in a near future so you will be able to use it as a database out-of-the-box. We should be able to query it using a [to-be-defined](https://github.com/meilisearch/MeiliDB/issues/38) protocol. This is our current goal, [see the milestones](https://github.com/meilisearch/MeiliDB/milestones). In the end, the binary will be a bunch of network protocols and wrappers around the library - which will also be published on [crates.io](https://crates.io). Both the binary and the library will follow the same update cycle.
```bash
echo "deb [trusted=yes] https://apt.fury.io/meilisearch/ /" > /etc/apt/sources.list.d/fury.list
apt update && apt install meilisearch-http
meilisearch
```
#### Download the binary
```bash
curl -L https://install.meilisearch.com | sh
./meilisearch
```
## Performances
#### Compile and run it from sources
With a database composed of _100 353_ documents with _352_ attributes each and _3_ of them indexed.
So more than _300 000_ fields indexed for _35 million_ stored we can handle more than _2.8k req/sec_ with an average response time of _9 ms_ on an Intel i7-7700 (8) @ 4.2GHz.
If you have the Rust toolchain already installed on your local system, clone the repository and change it to your working directory.
Requests are made using [wrk](https://github.com/wg/wrk) and scripted to simulate real users queries.
```bash
git clone https://github.com/meilisearch/MeiliSearch.git
cd MeiliSearch
```
In the cloned repository, compile MeiliSearch.
```bash
cargo run --release
```
### Create an Index and Upload Some Documents
Let's create an index! If you need a sample dataset, use [this movie database](https://www.notion.so/meilisearch/A-movies-dataset-to-test-Meili-1cbf7c9cfa4247249c40edfa22d7ca87#b5ae399b81834705ba5420ac70358a65). You can also find it in the `datasets/` directory.
```bash
curl -L 'https://bit.ly/2PAcw9l' -o movies.json
```
MeiliSearch can serve multiple indexes, with different kinds of documents.
It is required to create an index before sending documents to it.
```bash
curl -i -X POST 'http://127.0.0.1:7700/indexes' --data '{ "name": "Movies", "uid": "movies" }'
```
Now that the server knows about your brand new index, you're ready to send it some data.
```bash
curl -i -X POST 'http://127.0.0.1:7700/indexes/movies/documents' \
--header 'content-type: application/json' \
--data-binary @movies.json
```
### Search for Documents
#### In command line
The search engine is now aware of your documents and can serve those via an HTTP server.
The [`jq` command-line tool](https://stedolan.github.io/jq/) can greatly help you read the server responses.
```bash
curl 'http://127.0.0.1:7700/indexes/movies/search?q=botman+robin&limit=2' | jq
```
```json
{
"hits": [
{
"id": "415",
"title": "Batman & Robin",
"poster": "https://image.tmdb.org/t/p/w1280/79AYCcxw3kSKbhGpx1LiqaCAbwo.jpg",
"overview": "Along with crime-fighting partner Robin and new recruit Batgirl...",
"release_date": "1997-06-20",
},
{
"id": "411736",
"title": "Batman: Return of the Caped Crusaders",
"poster": "https://image.tmdb.org/t/p/w1280/GW3IyMW5Xgl0cgCN8wu96IlNpD.jpg",
"overview": "Adam West and Burt Ward returns to their iconic roles of Batman and Robin...",
"release_date": "2016-10-08",
}
],
"offset": 0,
"limit": 2,
"processingTimeMs": 1,
"query": "botman robin"
}
```
#### Use the Web Interface
We also deliver an **out-of-the-box web interface** in which you can test MeiliSearch interactively.
You can access the web interface in your web browser at the root of the server. The default URL is [http://127.0.0.1:7700](http://127.0.0.1:7700). All you need to do is open your web browser and enter MeiliSearchs address to visit it. This will lead you to a web page with a search bar that will allow you to search in the selected index.
<p align="center">
<img src="assets/movies-web-demo.gif" alt="Web interface gif" />
</p>
### Documentation
Now that your MeiliSearch server is up and running, you can learn more about how to tune your search engine in [the documentation](https://docs.meilisearch.com).
### Technical features
- Provides [6 default ranking criteria](https://github.com/meilisearch/MeiliSearch/blob/3ea5aa18a209b6973b921542d46a79e1c753c163/meilisearch-core/src/criterion/mod.rs#L106-L111) used to [bucket sort](https://en.wikipedia.org/wiki/Bucket_sort) documents
- Accepts [custom criteria](https://github.com/meilisearch/MeiliSearch/blob/3ea5aa18a209b6973b921542d46a79e1c753c163/meilisearch-core/src/criterion/mod.rs#L20-L29) and can apply them in any custom order
- Supports [ranged queries](https://github.com/meilisearch/MeiliSearch/blob/3ea5aa18a209b6973b921542d46a79e1c753c163/meilisearch-core/src/query_builder.rs#L342), useful for paginating results
- Can [distinct](https://github.com/meilisearch/MeiliSearch/blob/3ea5aa18a209b6973b921542d46a79e1c753c163/meilisearch-core/src/query_builder.rs#L324-L329) and [filter](https://github.com/meilisearch/MeiliSearch/blob/3ea5aa18a209b6973b921542d46a79e1c753c163/meilisearch-core/src/query_builder.rs#L313-L318) returned documents based on context defined rules
- Searches for [concatenated](https://github.com/meilisearch/MeiliSearch/pull/164) and [splitted query words](https://github.com/meilisearch/MeiliSearch/pull/232) to improve the search quality.
- Can store complete documents or only [user schema specified fields](https://github.com/meilisearch/MeiliSearch/blob/3ea5aa18a209b6973b921542d46a79e1c753c163/datasets/movies/schema.toml)
- The [default tokenizer](https://github.com/meilisearch/MeiliSearch/blob/3ea5aa18a209b6973b921542d46a79e1c753c163/meilisearch-tokenizer/src/lib.rs) can index Latin and Kanji based languages
- Returns [the matching text areas](https://github.com/meilisearch/MeiliSearch/blob/3ea5aa18a209b6973b921542d46a79e1c753c163/meilisearch-types/src/lib.rs#L49-L65), useful to highlight matched words in results
- Accepts query time search config like the [searchable attributes](https://github.com/meilisearch/MeiliSearch/blob/3ea5aa18a209b6973b921542d46a79e1c753c163/meilisearch-core/src/query_builder.rs#L331-L336)
- Supports [runtime incremental indexing](https://github.com/meilisearch/MeiliSearch/blob/3ea5aa18a209b6973b921542d46a79e1c753c163/meilisearch-core/src/store/mod.rs#L143-L212)
## Performance
When processing a dataset composed of 5M books, each with their own titles and authors, MeiliSearch is able to carry out more than 553 req/sec with an average response time of 21 ms on an Intel i7-7700 (8) @ 4.2GHz.
Requests are made using [wrk](https://github.com/wg/wrk) and scripted to simulate real users' queries.
```
Running 10s test @ http://localhost:2230
2 threads and 25 connections
Running 10s test @ http://1.2.3.4:7700
2 threads and 10 connections
Thread Stats Avg Stdev Max +/- Stdev
Latency 9.52ms 7.61ms 99.25ms 84.58%
Req/Sec 1.41k 119.11 1.78k 64.50%
28080 requests in 10.01s, 7.42MB read
Requests/sec: 2806.46
Transfer/sec: 759.17KB
Latency 21.45ms 15.64ms 214.10ms 85.95%
Req/Sec 256.48 37.66 330.00 69.50%
5132 requests in 10.05s, 2.31MB read
Requests/sec: 510.46
Transfer/sec: 234.77KB
```
### Notes
We also indexed a dataset containing about _12 millions_ cities names in _24 minutes_ on a _8 cores_, _64 GB of RAM_, and a _300 GB NMVe_ SSD machine.<br/>
The size of the resulting database reached _16 GB_ and search results were presented between _30 ms_ and _4 seconds_ for short prefix queries.
The default Rust allocator has recently been [changed to use the system allocator](https://github.com/rust-lang/rust/pull/51241/).
We have seen much better performances when [using jemalloc as the global allocator](https://github.com/alexcrichton/jemallocator#documentation).
## Contributing
## Usage and examples
Hey! We're glad you're thinking about contributing to MeiliSearch! If you think something is missing or could be improved, please open issues and pull requests. If you'd like to help this project grow, we'd love to have you! To start contributing, checking [issues tagged as "good-first-issue"](https://github.com/meilisearch/MeiliSearch/issues?q=is%3Aissue+is%3Aopen+label%3A%22good+first+issue%22) is a good start!
Currently MeiliDB do not provide an http server but you can run these two examples to try it out.
### Analytic Events
It creates an index named _movies_ and insert _19 700_ (in batches of _1000_) movies into it.
Once a day, events are being sent to our Amplitude instance so we can know how many people are using MeiliSearch.<br/>
Only information about the platform on which the server runs is stored. No other information is being sent.<br/>
If this doesn't suit you, you can disable these analytics by using the `MEILI_NO_ANALYTICS` env variable.
```bash
cargo run --release --example create-database -- \
--schema examples/movies/schema-movies.toml \
--update-group-size 1000 \
movies.mdb \
examples/movies/movies.csv
```
## Contact
Once this is done, you can query this database using the second binary example.
Feel free to contact us about any questions you may have:
* At [bonjour@meilisearch.com](mailto:bonjour@meilisearch.com): English or French is welcome! 🇬🇧 🇫🇷
* Via the chat box available on every page of [our documentation](https://docs.meilisearch.com/) and on [our landing page](https://www.meilisearch.com/).
* Join our [Slack community](https://slack.meilisearch.com/).
* By opening an issue.
```bash
cargo run --release --example query-database -- \
movies.mdb \
--fetch-timeout-ms 50 \
-n 4 \
id title overview release_date poster
```
Any suggestion or feedback is highly appreciated. Thank you for your support!

BIN
assets/crates-io-demo.gif Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 7.2 MiB

17
assets/logo.svg Normal file
View File

@ -0,0 +1,17 @@
<svg width="360" height="360" viewBox="0 0 360 360" fill="none" xmlns="http://www.w3.org/2000/svg">
<g id="logo_main">
<rect id="Rectangle" x="107.333" y="0.150146" width="274.315" height="274.315" rx="98.8334" transform="rotate(23 107.333 0.150146)" fill="url(#paint0_linear)"/>
<path id="Rectangle_2" fill-rule="evenodd" clip-rule="evenodd" d="M61.3296 230.199C46.2224 194.608 38.6688 176.813 38.208 160.329C37.5286 136.025 47.0175 112.539 64.3891 95.5282C76.1718 83.9904 93.9669 76.4368 129.557 61.3296C165.147 46.2224 182.943 38.6688 199.427 38.208C223.731 37.5286 247.217 47.0175 264.228 64.3891C275.766 76.1718 283.319 93.9669 298.426 129.557C313.534 165.147 321.087 182.943 321.548 199.427C322.227 223.731 312.738 247.217 295.367 264.228C283.584 275.766 265.789 283.319 230.199 298.426C194.608 313.534 176.813 321.087 160.329 321.548C136.025 322.227 112.539 312.738 95.5282 295.367C83.9903 283.584 76.4368 265.789 61.3296 230.199Z" fill="url(#paint1_linear)"/>
<path id="m" fill-rule="evenodd" clip-rule="evenodd" d="M219.568 130.748C242.363 130.748 259.263 147.451 259.263 174.569V229.001H227.232V179.678C227.232 166.119 220.747 159.634 210.136 159.634C205.223 159.634 200.311 161.796 195.595 167.494C195.791 169.852 195.988 172.21 195.988 174.569V229.001H164.154V179.678C164.154 166.119 157.472 159.634 147.057 159.634C142.145 159.634 137.429 161.992 132.712 168.084V229.001H100.878V133.695H132.712V139.394C139.197 133.892 145.878 130.748 156.49 130.748C168.477 130.748 178.695 135.267 185.769 143.52C195.791 134.678 205.42 130.748 219.568 130.748Z" fill="white"/>
</g>
<defs>
<linearGradient id="paint0_linear" x1="-13.6248" y1="129.208" x2="244.49" y2="403.522" gradientUnits="userSpaceOnUse">
<stop stop-color="#E41359"/>
<stop offset="1" stop-color="#F23C79"/>
</linearGradient>
<linearGradient id="paint1_linear" x1="11.0088" y1="111.65" x2="111.65" y2="348.747" gradientUnits="userSpaceOnUse">
<stop stop-color="#24222F"/>
<stop offset="1" stop-color="#2B2937"/>
</linearGradient>
</defs>
</svg>

After

Width:  |  Height:  |  Size: 2.0 KiB

BIN
assets/movies-web-demo.gif Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 5.3 MiB

View File

@ -1,47 +0,0 @@
---
trigger:
branches:
include: [ master ]
pr: [ master ]
jobs:
- job: test
pool:
vmImage: 'Ubuntu 16.04'
container: tpayet/chiquitita:latest
steps:
- script: |
curl https://sh.rustup.rs -sSf | sh -s -- -y --default-toolchain nightly
displayName: 'Install rustc'
- script: |
$HOME/.cargo/bin/cargo check
displayName: 'Check MeiliDB'
- script: |
$HOME/.cargo/bin/cargo test
displayName: 'Test MeiliDB'
- job: build
dependsOn:
- test
condition: succeeded()
pool:
vmImage: 'Ubuntu 16.04'
container: tpayet/chiquitita:latest
steps:
- script: |
curl https://sh.rustup.rs -sSf | sh -s -- -y --default-toolchain nightly
displayName: 'Install rustc'
- script: |
$HOME/.cargo/bin/cargo build --release
displayName: 'Build MeiliDB'
- task: CopyFiles@2
inputs:
contents: '$(System.DefaultWorkingDirectory)/target/release/libmeilidb.rlib'
targetFolder: $(Build.ArtifactStagingDirectory)
displayName: 'Copy build'
- task: PublishBuildArtifacts@1
inputs:
artifactName: libmeilidb.rlib
displayName: 'Upload artifacts'

View File

@ -1,15 +0,0 @@
#!/bin/bash
cd "$(dirname "$0")"/..
set -ex
export RUSTFLAGS="-D warnings"
cargo check --no-default-features
cargo check --bins --examples --tests
cargo test
if [[ "$TRAVIS_RUST_VERSION" == "nightly" ]]; then
cargo check --no-default-features --features nightly
cargo test --features nightly
fi

View File

@ -0,0 +1 @@
_datas in movies.csv are from https://www.themoviedb.org/_

View File

Can't render this file because it is too large.

19654
datasets/movies/movies.json Normal file

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,10 @@
{
"searchableAttributes": ["title", "overview"],
"displayedAttributes": [
"id",
"title",
"overview",
"release_date",
"poster"
]
}

View File

@ -1,95 +0,0 @@
# A deep dive in MeiliDB
On the 15 of May 2019.
MeiliDB is a full text search engine based on a final state transducer named [fst](https://github.com/BurntSushi/fst) and a key-value store named [sled](https://github.com/spacejam/sled). The goal of a search engine is to store data and to respond to queries as accurate and fast as possible. To achieve this it must save the matching words in an [inverted index](https://en.wikipedia.org/wiki/Inverted_index).
<!-- MarkdownTOC autolink="true" -->
- [Where is the data stored?](#where-is-the-data-stored)
- [What does the key-value store contains?](#what-does-the-key-value-store-contains)
- [The inverted word index](#the-inverted-word-index)
- [A final state transducer](#a-final-state-transducer)
- [Document indexes](#document-indexes)
- [The schema](#the-schema)
- [Document attributes](#document-attributes)
- [How is a request processed?](#how-is-a-request-processed)
- [Query lexemes](#query-lexemes)
- [Automatons and query index](#automatons-and-query-index)
- [Sort by criteria](#sort-by-criteria)
<!-- /MarkdownTOC -->
## Where is the data stored?
MeiliDB is entirely backed by a key-value store like any good database (i.e. Postgres, MySQL). This brings a great flexibility in the way documents can be stored and updates handled along time.
[sled will brings some](https://github.com/spacejam/sled/tree/434533332a3f485e6d2e467023be0a0b55d3a1af#plans) of the [A.C.I.D. properties](https://en.wikipedia.org/wiki/ACID_(computer_science)) to help us be sure the saved data is consistent.
## What does the key-value store contains?
It contain the inverted word index, the schema and the documents fields.
### The inverted word index
[The inverted word index](https://github.com/meilisearch/MeiliDB/blob/3db823de002243004612e36a19b4578d800dab97/meilidb-data/src/database/words_index.rs) is a sled Tree dedicated to store and give access to all documents that contains a specific word. The information stored under the word is simply a big ordered array of where in the document the word has been found. In other word, a big list of [`DocIndex`](https://github.com/meilisearch/MeiliDB/blob/3db823de002243004612e36a19b4578d800dab97/meilidb-core/src/lib.rs#L35-L51).
#### A final state transducer
_...also abbreviated fst_
This is the first entry point of the engine, you can read more about how it work with the beautiful blog post of @BurntSushi, [Index 1,600,000,000 Keys with Automata and Rust](https://blog.burntsushi.net/transducers/).
To make it short it is a powerful way to store all the words that are present in the indexed documents. You construct it by giving it all the words you want to index. When you want to search in it you can provide any automaton you want, in MeiliDB [a custom levenshtein automaton](https://github.com/tantivy-search/levenshtein-automata/) is used.
#### Document indexes
The `fst` will only return the words that match with the search automaton but the goal of the search engine is to retrieve all matches in all the documents when a query is made. You want it to return some sort of position in an attribute in a document, an information about where the given word matched.
To make it possible we retrieve all of the `DocIndex` corresponding to all the matching words in the fst, we use the [`WordsIndex`](https://github.com/meilisearch/MeiliDB/blob/3db823de002243004612e36a19b4578d800dab97/meilidb-data/src/database/words_index.rs#L11-L21) Tree to get the `DocIndexes` corresponding the words.
### The schema
The schema is a data structure that represents which documents attributes should be stored and which should be indexed. It is stored under a the [`MainIndex`](https://github.com/meilisearch/MeiliDB/blob/3db823de002243004612e36a19b4578d800dab97/meilidb-data/src/database/main_index.rs#L12) Tree and given to MeiliDB only at the creation of an index.
Each document attribute is associated to a unique 16 bit number named [`SchemaAttr`](https://github.com/meilisearch/MeiliDB/blob/3db823de002243004612e36a19b4578d800dab97/meilidb-data/src/schema.rs#L186).
In the future, this schema type could be given along with updates, the database could be able to handled a new schema and reindex the database according to the new one.
### Document attributes
When the engine handle a query the result that the requester want is a document, not only the [`Matches`](https://github.com/meilisearch/MeiliDB/blob/3db823de002243004612e36a19b4578d800dab97/meilidb-core/src/lib.rs#L62-L88) associated to it, fields of the original document must be returned too.
So MeiliDB again uses the power of the underlying key-value store and save the documents attributes marked as _STORE_ in the schema. The dedicated Tree for this information is the [`DocumentsIndex`](https://github.com/meilisearch/MeiliDB/blob/3db823de002243004612e36a19b4578d800dab97/meilidb-data/src/database/documents_index.rs#L11).
When a document field is saved in the key-value store its value is binary encoded using [message pack](https://github.com/3Hren/msgpack-rust), so a document must be serializable using serde.
## How is a request processed?
Now that we have our inverted index we are able to return results based on a query. In the MeiliDB universe a query is a simple string containing words.
### Query lexemes
The first step to be able to call the underlying structures is to split the query in words, for that we use a [custom tokenizer](https://github.com/meilisearch/MeiliDB/blob/3db823de002243004612e36a19b4578d800dab97/meilidb-tokenizer/src/lib.rs#L82-L84). Note that a tokenizer is specialized for a human language, this is the hard part.
### Automatons and query index
So to query the fst we need an automaton, in MeiliDB we use a [levenshtein automaton](https://en.wikipedia.org/wiki/Levenshtein_automaton), this automaton is constructed using a string and a maximum distance. According to the [Algolia's blog post](https://blog.algolia.com/inside-the-algolia-engine-part-3-query-processing/#algolia%e2%80%99s-way-of-searching-for-alternatives) we [created the DFAs](https://github.com/meilisearch/MeiliDB/blob/3db823de002243004612e36a19b4578d800dab97/meilidb-core/src/automaton.rs#L59-L78) with different settings.
Thanks to the power of the fst library [it is possible to union multiple automatons](https://docs.rs/fst/0.3.2/fst/map/struct.OpBuilder.html#method.union) on the same fst set. The `Stream` is able to return all the matching words. We use these words to find the whole list of `DocIndexes` associated.
With all these informations it is possible [to reconstruct a list of all the `DocIndexes` associated](https://github.com/meilisearch/MeiliDB/blob/3db823de002243004612e36a19b4578d800dab97/meilidb-core/src/query_builder.rs#L103-L130) with the words queried.
### Sort by criteria
Now that we are able to get a big list of [DocIndexes](https://github.com/Kerollmops/MeiliDB/blob/550dc1e99224e386516877450320f694947332d4/src/lib.rs#L21-L36) it is not enough to sort them by criteria, we need more informations like the levenshtein distance or the fact that a query word match exactly the word stored in the fst. So [we stuff it a little bit](https://github.com/Kerollmops/MeiliDB/blob/550dc1e99224e386516877450320f694947332d4/src/rank/query_builder.rs#L86-L93), and aggregate all these [Matches](https://github.com/Kerollmops/MeiliDB/blob/550dc1e99224e386516877450320f694947332d4/src/lib.rs#L47-L74) for each document. This way it will be easy to sort a simple vector of document using a bunch of functions.
With this big list of documents and associated matches [we are able to sort only the part of the slice that we want](https://github.com/meilisearch/MeiliDB/blob/3db823de002243004612e36a19b4578d800dab97/meilidb-core/src/query_builder.rs#L160-L188) using bucket sorting. [Each criterion](https://github.com/meilisearch/MeiliDB/blob/3db823de002243004612e36a19b4578d800dab97/meilidb-core/src/criterion/mod.rs#L95-L101) is evaluated on each subslice without copy, thanks to [GroupByMut](https://docs.rs/slice-group-by/0.2.4/slice_group_by/) which, I hope [will soon be merged](https://github.com/rust-lang/rfcs/pull/2477).
Note that it is possible to customize the criteria used by using the `QueryBuilder::with_criteria` constructor, this way you can implement some custom ranking based on the document attributes using the appropriate structure and the [`document` method](https://github.com/meilisearch/MeiliDB/blob/3db823de002243004612e36a19b4578d800dab97/meilidb-data/src/database/index.rs#L86).
At this point, MeiliDB work is over 🎉

129
download-latest.sh Normal file
View File

@ -0,0 +1,129 @@
#!/bin/sh
# COLORS
RED='\033[31m'
GREEN='\033[32m'
DEFAULT='\033[0m'
# GLOBALS
GREP_SEMVER_REGEXP='\"v\([0-9]*\)[.]\([0-9]*\)[.]\([0-9]*\)\"' # i.e. "v[number].[number].[number]"
BINARY_NAME='meilisearch'
# semverParseInto and semverLT from https://github.com/cloudflare/semver_bash/blob/master/semver.sh
# usage: semverParseInto version major minor patch special
# version: the string version
# major, minor, patch, special: will be assigned by the function
semverParseInto() {
local RE='[^0-9]*\([0-9]*\)[.]\([0-9]*\)[.]\([0-9]*\)\([0-9A-Za-z-]*\)'
#MAJOR
eval $2=`echo $1 | sed -e "s#$RE#\1#"`
#MINOR
eval $3=`echo $1 | sed -e "s#$RE#\2#"`
#MINOR
eval $4=`echo $1 | sed -e "s#$RE#\3#"`
#SPECIAL
eval $5=`echo $1 | sed -e "s#$RE#\4#"`
}
# usage: semverLT version1 version2
semverLT() {
local MAJOR_A=0
local MINOR_A=0
local PATCH_A=0
local SPECIAL_A=0
local MAJOR_B=0
local MINOR_B=0
local PATCH_B=0
local SPECIAL_B=0
semverParseInto $1 MAJOR_A MINOR_A PATCH_A SPECIAL_A
semverParseInto $2 MAJOR_B MINOR_B PATCH_B SPECIAL_B
if [ $MAJOR_A -lt $MAJOR_B ]; then
return 0
fi
if [ $MAJOR_A -le $MAJOR_B ] && [ $MINOR_A -lt $MINOR_B ]; then
return 0
fi
if [ $MAJOR_A -le $MAJOR_B ] && [ $MINOR_A -le $MINOR_B ] && [ $PATCH_A -lt $PATCH_B ]; then
return 0
fi
if [ "_$SPECIAL_A" == "_" ] && [ "_$SPECIAL_B" == "_" ] ; then
return 1
fi
if [ "_$SPECIAL_A" == "_" ] && [ "_$SPECIAL_B" != "_" ] ; then
return 1
fi
if [ "_$SPECIAL_A" != "_" ] && [ "_$SPECIAL_B" == "_" ] ; then
return 0
fi
if [ "_$SPECIAL_A" < "_$SPECIAL_B" ]; then
return 0
fi
return 1
}
success_usage() {
printf "$GREEN%s\n$DEFAULT" "MeiliSearch binary successfully downloaded as '$BINARY_NAME' file."
echo ''
echo 'Run it:'
echo ' $ ./meilisearch'
echo 'Usage:'
echo ' $ ./meilisearch --help'
}
failure_usage() {
printf "$RED%s\n$DEFAULT" 'ERROR: MeiliSearch binary is not available for your OS distribution yet.'
echo ''
echo 'However, you can easily compile the binary from the source files.'
echo 'Follow the steps on the docs: https://docs.meilisearch.com/advanced_guides/binary.html#how-to-compile-meilisearch'
}
# OS DETECTION
echo 'Detecting OS distribution...'
os_name=$(uname -s)
if [ "$os_name" != "Darwin" ]; then
os_name=$(cat /etc/os-release | grep '^ID=' | tr -d '"' | cut -d '=' -f 2)
fi
echo "OS distribution detected: $os_name"
case "$os_name" in
'Darwin')
os='macos'
;;
'ubuntu' | 'debian')
os='linux'
;;
*)
failure_usage
exit 1
esac
# GET LATEST VERSION
tags=$(curl -s 'https://api.github.com/repos/meilisearch/MeiliSearch/tags' \
| grep "$GREP_SEMVER_REGEXP" \
| grep 'name' \
| tr -d '"' | tr -d ',' | cut -d 'v' -f 2)
latest=""
for tag in $tags; do
if [ "$latest" = "" ]; then
latest="$tag"
else
semverLT $tag $latest
if [ $? -eq 1 ]; then
latest="$tag"
fi
fi
done
# DOWNLOAD THE LATEST
echo "Downloading MeiliSearch binary v$latest for $os..."
release_file="meilisearch-$os-amd64"
link="https://github.com/meilisearch/MeiliSearch/releases/download/v$latest/$release_file"
curl -OL "$link"
mv "$release_file" "$BINARY_NAME"
chmod 744 "$BINARY_NAME"
success_usage

View File

@ -1,122 +0,0 @@
id,title,description,image
711158459,Sony PlayStation 4 (PS4) (Latest Model)- 500 GB Jet Black Console,"The PlayStation 4 system opens the door to an incredible journey through immersive new gaming worlds and a deeply connected gaming community. Step into living, breathing worlds where you are hero of your epic journey. Explore gritty urban environments, vast galactic landscapes, and fantastic historical settings brought to life on an epic scale, without limits. With an astounding launch lineup and over 180 games in development the PS4 system offers more top-tier blockbusters and inventive indie hits than any other next-gen console. The PS4 system is developer inspired, gamer focused. The PS4 system learns how you play and intuitively curates the content you use most often. Fire it up, and your PS4 system points the way to new, amazing experiences you can jump into alone or with friends. Create your own legend using a sophisticated, intuitive network built for gamers. Broadcast your gameplay live and direct to the world, complete with your commentary. Or immortalize your most epic moments and share at the press of a button. Access the best in music, movies, sports and television. PS4 system doesn t require a membership fee to access your digital entertainment subscriptions. You get the full spectrum of entertainment that matters to you on the PS4 system. PlayStation 4: The Best Place to Play The PlayStation 4 system provides dynamic, connected gaming, powerful graphics and speed, intelligent personalization, deeply integrated social capabilities, and innovative second-screen features. Combining unparalleled content, immersive gaming experiences, all of your favorite digital entertainment apps, and PlayStation exclusives, the PS4 system focuses on the gamers.Gamer Focused, Developer InspiredThe PS4 system focuses on the gamer, ensuring that the very best games and the most immersive experiences are possible on the platform.<br>Read more about the PS4 on ebay guides.</br>",http://thumbs2.ebaystatic.com/d/l225/m/mzvzEUIknaQclZ801YCY1ew.jpg
711158460,Sony PlayStation 4 (Latest Model)- 500 GB Jet Black Console,"The PlayStation 4 system opens the door to an incredible journey through immersive new gaming worlds and a deeply connected gaming community. Step into living, breathing worlds where you are hero of your epic journey. Explore gritty urban environments, vast galactic landscapes, and fantastic historical settings brought to life on an epic scale, without limits. With an astounding launch lineup and over 180 games in development the PS4 system offers more top-tier blockbusters and inventive indie hits than any other next-gen console. The PS4 system is developer inspired, gamer focused. The PS4 system learns how you play and intuitively curates the content you use most often. Fire it up, and your PS4 system points the way to new, amazing experiences you can jump into alone or with friends. Create your own legend using a sophisticated, intuitive network built for gamers. Broadcast your gameplay live and direct to the world, complete with your commentary. Or immortalize your most epic moments and share at the press of a button. Access the best in music, movies, sports and television. PS4 system doesn t require a membership fee to access your digital entertainment subscriptions. You get the full spectrum of entertainment that matters to you on the PS4 system. PlayStation 4: The Best Place to Play The PlayStation 4 system provides dynamic, connected gaming, powerful graphics and speed, intelligent personalization, deeply integrated social capabilities, and innovative second-screen features. Combining unparalleled content, immersive gaming experiences, all of your favorite digital entertainment apps, and PlayStation exclusives, the PS4 system focuses on the gamers.Gamer Focused, Developer InspiredThe PS4 system focuses on the gamer, ensuring that the very best games and the most immersive experiences are possible on the platform.<br>Read more about the PS4 on ebay guides.</br>",http://thumbs3.ebaystatic.com/d/l225/m/mJNDmSyIS3vUasKIJEBy4Cw.jpg
711158461,Sony PlayStation 4 PS4 500 GB Jet Black Console,"The PlayStation 4 system opens the door to an incredible journey through immersive new gaming worlds and a deeply connected gaming community. Step into living, breathing worlds where you are hero of your epic journey. Explore gritty urban environments, vast galactic landscapes, and fantastic historical settings brought to life on an epic scale, without limits. With an astounding launch lineup and over 180 games in development the PS4 system offers more top-tier blockbusters and inventive indie hits than any other next-gen console. The PS4 system is developer inspired, gamer focused. The PS4 system learns how you play and intuitively curates the content you use most often. Fire it up, and your PS4 system points the way to new, amazing experiences you can jump into alone or with friends. Create your own legend using a sophisticated, intuitive network built for gamers. Broadcast your gameplay live and direct to the world, complete with your commentary. Or immortalize your most epic moments and share at the press of a button. Access the best in music, movies, sports and television. PS4 system doesn t require a membership fee to access your digital entertainment subscriptions. You get the full spectrum of entertainment that matters to you on the PS4 system. PlayStation 4: The Best Place to Play The PlayStation 4 system provides dynamic, connected gaming, powerful graphics and speed, intelligent personalization, deeply integrated social capabilities, and innovative second-screen features. Combining unparalleled content, immersive gaming experiences, all of your favorite digital entertainment apps, and PlayStation exclusives, the PS4 system focuses on the gamers.Gamer Focused, Developer InspiredThe PS4 system focuses on the gamer, ensuring that the very best games and the most immersive experiences are possible on the platform.<br>Read more about the PS4 on ebay guides.</br>",http://thumbs4.ebaystatic.com/d/l225/m/m10NZXArmiIkpkTDDkAUVvA.jpg
711158462,Sony - PlayStation 4 500GB The Last of Us Remastered Bundle - Black,,http://thumbs2.ebaystatic.com/d/l225/m/mZZXTmAE8WZDH1l_E_PPAkg.jpg
711158463,Sony PlayStation 4 (PS4) (Latest Model)- 500 GB Jet Black Console,"The PlayStation 4 system opens the door to an incredible journey through immersive new gaming worlds and a deeply connected gaming community. Step into living, breathing worlds where you are hero of your epic journey. Explore gritty urban environments, vast galactic landscapes, and fantastic historical settings brought to life on an epic scale, without limits. With an astounding launch lineup and over 180 games in development the PS4 system offers more top-tier blockbusters and inventive indie hits than any other next-gen console. The PS4 system is developer inspired, gamer focused. The PS4 system learns how you play and intuitively curates the content you use most often. Fire it up, and your PS4 system points the way to new, amazing experiences you can jump into alone or with friends. Create your own legend using a sophisticated, intuitive network built for gamers. Broadcast your gameplay live and direct to the world, complete with your commentary. Or immortalize your most epic moments and share at the press of a button. Access the best in music, movies, sports and television. PS4 system doesn t require a membership fee to access your digital entertainment subscriptions. You get the full spectrum of entertainment that matters to you on the PS4 system. PlayStation 4: The Best Place to Play The PlayStation 4 system provides dynamic, connected gaming, powerful graphics and speed, intelligent personalization, deeply integrated social capabilities, and innovative second-screen features. Combining unparalleled content, immersive gaming experiences, all of your favorite digital entertainment apps, and PlayStation exclusives, the PS4 system focuses on the gamers.Gamer Focused, Developer InspiredThe PS4 system focuses on the gamer, ensuring that the very best games and the most immersive experiences are possible on the platform.<br>Read more about the PS4 on ebay guides.</br>",http://thumbs3.ebaystatic.com/d/l225/m/mzvzEUIknaQclZ801YCY1ew.jpg
711158464,Sony PlayStation 4 (PS4) (Latest Model)- 500 GB Jet Black Console,"The PlayStation 4 system opens the door to an incredible journey through immersive new gaming worlds and a deeply connected gaming community. Step into living, breathing worlds where you are hero of your epic journey. Explore gritty urban environments, vast galactic landscapes, and fantastic historical settings brought to life on an epic scale, without limits. With an astounding launch lineup and over 180 games in development the PS4 system offers more top-tier blockbusters and inventive indie hits than any other next-gen console. The PS4 system is developer inspired, gamer focused. The PS4 system learns how you play and intuitively curates the content you use most often. Fire it up, and your PS4 system points the way to new, amazing experiences you can jump into alone or with friends. Create your own legend using a sophisticated, intuitive network built for gamers. Broadcast your gameplay live and direct to the world, complete with your commentary. Or immortalize your most epic moments and share at the press of a button. Access the best in music, movies, sports and television. PS4 system doesn t require a membership fee to access your digital entertainment subscriptions. You get the full spectrum of entertainment that matters to you on the PS4 system. PlayStation 4: The Best Place to Play The PlayStation 4 system provides dynamic, connected gaming, powerful graphics and speed, intelligent personalization, deeply integrated social capabilities, and innovative second-screen features. Combining unparalleled content, immersive gaming experiences, all of your favorite digital entertainment apps, and PlayStation exclusives, the PS4 system focuses on the gamers.Gamer Focused, Developer InspiredThe PS4 system focuses on the gamer, ensuring that the very best games and the most immersive experiences are possible on the platform.<br>Read more about the PS4 on ebay guides.</br>",http://thumbs4.ebaystatic.com/d/l225/m/mzvzEUIknaQclZ801YCY1ew.jpg
711158465,BRAND NEW Sony PlayStation 4 BUNDLE 500gb,,http://thumbs4.ebaystatic.com/d/l225/m/m9TQTiWcWig7SeQh9algLZg.jpg
711158466,"Sony PlayStation 4 500GB, Dualshock Wireless Control, HDMI Gaming Console Refurb","The PlayStation 4 system opens the door to an incredible journey through immersive new gaming worlds and a deeply connected gaming community. Step into living, breathing worlds where you are hero of your epic journey. Explore gritty urban environments, vast galactic landscapes, and fantastic historical settings brought to life on an epic scale, without limits. With an astounding launch lineup and over 180 games in development the PS4 system offers more top-tier blockbusters and inventive indie hits than any other next-gen console. The PS4 system is developer inspired, gamer focused. The PS4 system learns how you play and intuitively curates the content you use most often. Fire it up, and your PS4 system points the way to new, amazing experiences you can jump into alone or with friends. Create your own legend using a sophisticated, intuitive network built for gamers. Broadcast your gameplay live and direct to the world, complete with your commentary. Or immortalize your most epic moments and share at the press of a button. Access the best in music, movies, sports and television. PS4 system doesn t require a membership fee to access your digital entertainment subscriptions. You get the full spectrum of entertainment that matters to you on the PS4 system. PlayStation 4: The Best Place to Play The PlayStation 4 system provides dynamic, connected gaming, powerful graphics and speed, intelligent personalization, deeply integrated social capabilities, and innovative second-screen features. Combining unparalleled content, immersive gaming experiences, all of your favorite digital entertainment apps, and PlayStation exclusives, the PS4 system focuses on the gamers.Gamer Focused, Developer InspiredThe PS4 system focuses on the gamer, ensuring that the very best games and the most immersive experiences are possible on the platform.<br>Read more about the PS4 on ebay guides.</br>",http://thumbs4.ebaystatic.com/d/l225/m/mTZYG5N6xWfBi4Ok03HmpMw.jpg
711158467,Sony PlayStation 4 (Latest Model)- 500 GB Jet Black Console w/ 2 Controllers,,http://thumbs2.ebaystatic.com/d/l225/m/mX5Qphrygqeoi7tAH5eku2A.jpg
711158468,Sony PlayStation 4 (Latest Model)- 500 GB Jet Black Console *NEW*,"The PlayStation 4 system opens the door to an incredible journey through immersive new gaming worlds and a deeply connected gaming community. Step into living, breathing worlds where you are hero of your epic journey. Explore gritty urban environments, vast galactic landscapes, and fantastic historical settings brought to life on an epic scale, without limits. With an astounding launch lineup and over 180 games in development the PS4 system offers more top-tier blockbusters and inventive indie hits than any other next-gen console. The PS4 system is developer inspired, gamer focused. The PS4 system learns how you play and intuitively curates the content you use most often. Fire it up, and your PS4 system points the way to new, amazing experiences you can jump into alone or with friends. Create your own legend using a sophisticated, intuitive network built for gamers. Broadcast your gameplay live and direct to the world, complete with your commentary. Or immortalize your most epic moments and share at the press of a button. Access the best in music, movies, sports and television. PS4 system doesn t require a membership fee to access your digital entertainment subscriptions. You get the full spectrum of entertainment that matters to you on the PS4 system. PlayStation 4: The Best Place to Play The PlayStation 4 system provides dynamic, connected gaming, powerful graphics and speed, intelligent personalization, deeply integrated social capabilities, and innovative second-screen features. Combining unparalleled content, immersive gaming experiences, all of your favorite digital entertainment apps, and PlayStation exclusives, the PS4 system focuses on the gamers.Gamer Focused, Developer InspiredThe PS4 system focuses on the gamer, ensuring that the very best games and the most immersive experiences are possible on the platform.<br>Read more about the PS4 on ebay guides.</br>",http://thumbs2.ebaystatic.com/d/l225/m/mGjN4IrJ0O8kKD_TYMWgGgQ.jpg
711158469,Sony PlayStation 4 (Latest Model)- 500 GB Jet Black Console..wth Mortal Kombat X,,http://thumbs2.ebaystatic.com/d/l225/m/mrpqSNXwlnUVKnEscE4348w.jpg
711158470,Genuine SONY PS4 Playstation 4 500GB Gaming Console - Black,,http://thumbs4.ebaystatic.com/d/l225/m/myrPBFCpb4H5rHI8NyiS2zA.jpg
711158471,[Sony] Playstation 4 PS4 Video Game Console Black - Latest Model,,http://thumbs4.ebaystatic.com/d/l225/m/mce0c7mCuv3xpjllJXx093w.jpg
711158472,Sony PlayStation 4 (Latest Model) 500 GB Jet Black Console,"The PlayStation 4 system opens the door to an incredible journey through immersive new gaming worlds and a deeply connected gaming community. Step into living, breathing worlds where you are hero of your epic journey. Explore gritty urban environments, vast galactic landscapes, and fantastic historical settings brought to life on an epic scale, without limits. With an astounding launch lineup and over 180 games in development the PS4 system offers more top-tier blockbusters and inventive indie hits than any other next-gen console. The PS4 system is developer inspired, gamer focused. The PS4 system learns how you play and intuitively curates the content you use most often. Fire it up, and your PS4 system points the way to new, amazing experiences you can jump into alone or with friends. Create your own legend using a sophisticated, intuitive network built for gamers. Broadcast your gameplay live and direct to the world, complete with your commentary. Or immortalize your most epic moments and share at the press of a button. Access the best in music, movies, sports and television. PS4 system doesn t require a membership fee to access your digital entertainment subscriptions. You get the full spectrum of entertainment that matters to you on the PS4 system. PlayStation 4: The Best Place to Play The PlayStation 4 system provides dynamic, connected gaming, powerful graphics and speed, intelligent personalization, deeply integrated social capabilities, and innovative second-screen features. Combining unparalleled content, immersive gaming experiences, all of your favorite digital entertainment apps, and PlayStation exclusives, the PS4 system focuses on the gamers.Gamer Focused, Developer InspiredThe PS4 system focuses on the gamer, ensuring that the very best games and the most immersive experiences are possible on the platform.<br>Read more about the PS4 on ebay guides.</br>",http://thumbs2.ebaystatic.com/d/l225/m/miVSA1xPO5fCNdYzEMc8rSQ.jpg
711158473,Sony PlayStation 4 - 500 GB Jet Black Console - WITH LAST OF US REMASTERED,,http://thumbs2.ebaystatic.com/d/l225/m/mLjnOxv2GWkrkCtgsDGhJ6A.jpg
711158474,Sony PlayStation 4 (Latest Model)- 500 GB Jet Black Console,,http://thumbs3.ebaystatic.com/d/l225/m/mjMittBaXmm_n4AMpETBXhQ.jpg
711158475,Sony PlayStation 4 (Latest Model)- 500 GB Jet Black Console,,http://thumbs2.ebaystatic.com/d/l225/m/m1n1qrJ7-VGbe7xQvGdeD6Q.jpg
711158476,"Sony PlayStation 4 - 500 GB Jet Black Console (3 controllers,3 games included)",,http://thumbs3.ebaystatic.com/d/l225/m/mIoGIj9FZG7HoEVkPlnyizA.jpg
711158477,Sony PlayStation 4 500GB Console with 2 Controllers,"The PlayStation 4 system opens the door to an incredible journey through immersive new gaming worlds and a deeply connected gaming community. Step into living, breathing worlds where you are hero of your epic journey. Explore gritty urban environments, vast galactic landscapes, and fantastic historical settings brought to life on an epic scale, without limits. With an astounding launch lineup and over 180 games in development the PS4 system offers more top-tier blockbusters and inventive indie hits than any other next-gen console. The PS4 system is developer inspired, gamer focused. The PS4 system learns how you play and intuitively curates the content you use most often. Fire it up, and your PS4 system points the way to new, amazing experiences you can jump into alone or with friends. Create your own legend using a sophisticated, intuitive network built for gamers. Broadcast your gameplay live and direct to the world, complete with your commentary. Or immortalize your most epic moments and share at the press of a button. Access the best in music, movies, sports and television. PS4 system doesn t require a membership fee to access your digital entertainment subscriptions. You get the full spectrum of entertainment that matters to you on the PS4 system. PlayStation 4: The Best Place to Play The PlayStation 4 system provides dynamic, connected gaming, powerful graphics and speed, intelligent personalization, deeply integrated social capabilities, and innovative second-screen features. Combining unparalleled content, immersive gaming experiences, all of your favorite digital entertainment apps, and PlayStation exclusives, the PS4 system focuses on the gamers.Gamer Focused, Developer InspiredThe PS4 system focuses on the gamer, ensuring that the very best games and the most immersive experiences are possible on the platform.<br>Read more about the PS4 on ebay guides.</br>",http://thumbs2.ebaystatic.com/d/l225/m/m4fuJ5Ibrj450-TZ83FAkIQ.jpg
711158478,Sony - PlayStation 4 500GB The Last of Us Remastered Bundle - Black,,http://thumbs3.ebaystatic.com/d/l225/m/mzXSIw8Hlnff8IjXJQrXJSw.jpg
711158479,Sony PlayStation 4 (Latest Model)- 500 GB Jet Black Console,,http://thumbs2.ebaystatic.com/d/l225/m/m-9S63CgFoUijY3ZTyNs3KA.jpg
711158480,Sony PlayStation 4 (Latest Model)- 500 GB Jet Black Console,,http://thumbs1.ebaystatic.com/d/l225/m/mdF9Bisg9wXjv_R9Y_13MWw.jpg
711158481,Sony PlayStation 4 (Latest Model)- 500 GB Jet Black Console*,,http://thumbs1.ebaystatic.com/d/l225/m/m4_OQHMmIOCa8uEkBepRR5A.jpg
711158482,Sony PlayStation 4 (Latest Model)- 500 GB Jet Black Console,,http://thumbs2.ebaystatic.com/d/l225/m/mZ0nR8iz-QAfLssJZMp3L5Q.jpg
711158483,[Sony] Playstation 4 PS4 1105A Video Game Console 500GB White - Latest Model,,http://thumbs4.ebaystatic.com/d/l225/m/m8iTz5cLQLNjD9D3O2jT3IQ.jpg
711158484,NEW! Clinique Repairwear Laser Focus Wrinkle Correcting Eye Cream 5ml,,http://thumbs2.ebaystatic.com/d/l225/m/mrraWCpvP5YKk5rYgotVDLg.jpg
711158485,Obagi Elastiderm Eye Treatment Cream 0.5 oz / 15g Authentic NiB Sealed [5],,http://thumbs1.ebaystatic.com/d/l225/m/mJ4ekz6_bDT5G7wYtjM-qRg.jpg
711158486,Lancome Renergie Eye Anti-Wrinkle & Firming Eye Cream 0.5oz New,,http://thumbs2.ebaystatic.com/d/l225/m/mxwwyDQraZ-TEtr_Y6qRi7Q.jpg
711158487,OZ Naturals - The BEST Eye Gel - Eye Cream For Dark Circles Puffiness and,,http://thumbs2.ebaystatic.com/d/l225/m/mk2Z-hX5sT4kUxfG6g_KFpg.jpg
711158488,Elastiderm Eye Cream (0.5oz/15g),,http://thumbs3.ebaystatic.com/d/l225/m/mHxb5WUc5MtGzCT2UXgY_hg.jpg
711158489,new CLINIQUE Repairwear Laser Focus Wrinkle Correcting Eye Cream 0.17 oz/ 5 ml,,http://thumbs1.ebaystatic.com/d/l225/m/mQSX2wfrSeGy3uA8Q4SbOKw.jpg
711158490,NIB Full Size Dermalogica Multivitamin Power Firm Eye Cream,,http://thumbs4.ebaystatic.com/d/l225/m/m2hxo12e5NjXgGiKIaCvTLA.jpg
711158491,24K Gold Collagen Anti-Dark Circles Anti-Aging Bio Essence Repairing Eye Cream,,http://thumbs4.ebaystatic.com/d/l225/m/mt96efUK5cPAe60B9aGmgMA.jpg
711158492,Clinique Repairwear Laser Focus Wrinkle Correcting Eye Cream Full Size .5oz 15mL,,http://thumbs3.ebaystatic.com/d/l225/m/mZyV3wKejCMx9RrnC8X-eMw.jpg
711158493,NEW! Clinique Repairwear Laser Focus Wrinkle Correcting Eye Cream 5ml,,http://thumbs4.ebaystatic.com/d/l225/m/m9hX_z_DFnbNCTh0VFv3KcQ.jpg
711158494,3 Clinique Repairwear Laser Focus Wrinkle Correcting Eye Cream .17 oz/5 ml Each,,http://thumbs1.ebaystatic.com/d/l225/m/mYiHsrGffCg_qgkTbUWZU1A.jpg
711158495,Lancome High Resolution Eye Cream .95 Oz Refill-3X .25 Oz Plus .20 Oz Lot,,http://thumbs1.ebaystatic.com/d/l225/m/mFuQxKoEKQ6wtk2bGxfKwow.jpg
711158496,NEW! Clinique Repairwear Laser Focus Wrinkle Correcting Eye Cream 5ml,,http://thumbs4.ebaystatic.com/d/l225/m/mLBRCDiELUnYos-vFmIcc7A.jpg
711158497,Neutrogena Rapid Wrinkle Repair Eye Cream -0.5 Oz. -New-,,http://thumbs4.ebaystatic.com/d/l225/m/mE1RWpCOxkCGuuiJBX6HiBQ.jpg
711158498,20g Snail Repair Eye Cream Natural Anti-Dark Circles Puffiness Aging Wrinkles,,http://thumbs4.ebaystatic.com/d/l225/m/mh4gBNzINDwds_r778sJRjg.jpg
711158499,Vichy-Neovadiol GF Eye & Lip Contour Cream 0.5 Fl. Oz,,http://thumbs4.ebaystatic.com/d/l225/m/m_6f0ofCm7PTzuithYuZx3w.jpg
711158500,Obagi Elastiderm Eye Cream 0.5 oz. New In Box. 100% Authentic! New Packaging!,,http://thumbs2.ebaystatic.com/d/l225/m/ma0PK-ASBXUiHERR19MyImA.jpg
711158501,NEW! Clinique Repairwear Laser Focus Wrinkle Correcting Eye Cream .17oz / 5ml,,http://thumbs3.ebaystatic.com/d/l225/m/m72NaXYlcXcEeqQFKWvsdZA.jpg
711158502,Kiehl's CREAMY EYE TREATMENT cream with AVOCADO 0.5 oz FULL SIZE,,http://thumbs3.ebaystatic.com/d/l225/m/mOI407HnILb_tf-RgdvfYyA.jpg
711158503,Clinique repairwear laser focus wrinkle correcting eye cream .5 oz 15ml,,http://thumbs4.ebaystatic.com/d/l225/m/mQwNVst3bYG6QXouubmLaJg.jpg
711158504,Caudalie Premier Cru The Eye Cream La Creme New Anti Aging Eye Treatment,,http://thumbs1.ebaystatic.com/d/l225/m/mM4hPTAWXeOjovNk9s_Cqag.jpg
711158505,Jeunesse Instantly Ageless -- New Box Of 50 Sachets -- Eye - Face Wrinkle Cream,,http://thumbs2.ebaystatic.com/d/l225/m/m5EfWbi6ZYs4JpYcsl0Ubaw.jpg
711158506,VELOUR SKIN EYE CREAM .5 FL OZ 15ML NEW NIP ANTI-AGING WRINKLE CREAM,,http://thumbs1.ebaystatic.com/d/l225/m/m2uEf6q1yASH8FkWqYdOv1w.jpg
711158507,Shiseido White Lucent Anti-Dark Circles/Puffiness Eye Cream 15ml/.53oz Full Size,,http://thumbs1.ebaystatic.com/d/l225/m/m_CtzoqU2Vgv4GKx8ONS6qw.jpg
711158508,Murad Resurgence Renewing Eye Cream Anti-Aging .25 oz NEW Dark Circles Wrinkle,,http://thumbs1.ebaystatic.com/d/l225/m/mhWJC10iowgUDGm4KMQKNMg.jpg
711158509,D-Link DIR-615 300Mbps Wireless-N Router 4-Port w/Firewall,,http://thumbs3.ebaystatic.com/d/l225/m/mdSBH9ROXRn3TBb8OFDT6jA.jpg
711158510,Triton MOF001 2 1/4hp dual mode precision Router. New!! *3 day auction*,,http://thumbs1.ebaystatic.com/d/l225/m/mozWd2SBskbDBlWAKsMlVew.jpg
711158511,Porter-Cable 3-1/4 HP Five-Speed Router 7518 - Power Tools Routers,,http://thumbs2.ebaystatic.com/d/l225/m/mpZDTXpiyesDrZh_FLMyqXQ.jpg
711158512,Linksys EA6900 AC1900 Wi-Fi Wireless Router Dual Band with Gigabit &USB 3.0 Port,,http://thumbs4.ebaystatic.com/d/l225/m/m3OfBSnHBDhhs_Ve-DSBKQw.jpg
711158513,Linksys EA6500 1300 Mbps 4-Port Gigabit Wireless AC Router,,http://thumbs1.ebaystatic.com/d/l225/m/m7cfymJPc7CLADoTiEYFzwA.jpg
711158514,Makita RT0700CX3 1-1/4 Horsepower Compact Router Kit / Trimmer NEW,,http://thumbs2.ebaystatic.com/d/l225/m/mr-F3rCxDYsLcj8hnmaRN4A.jpg
711158515,NETGEAR R6250 AC1600 Smart WiFi Dual Band Gigabit Router 802.11ac 300 1300 Mbps,,http://thumbs4.ebaystatic.com/d/l225/m/mc8Ic8Cq2lPqPnjNGAQBBCQ.jpg
711158516,NETGEAR Nighthawk AC1900 Dual Band Wi-Fi Gigabit Router (R7000) BRAND NEW SEALED,,http://thumbs3.ebaystatic.com/d/l225/m/mdL34EQi0l-Kg-DlvF6wpqA.jpg
711158517,Netgear WNDR3400 N600 Wireless Dual Band Router (WNDR3400-100),,http://thumbs4.ebaystatic.com/d/l225/m/mKr4cNk6utJXSdVYXzwrScQ.jpg
711158518,Netgear N600 300 Mbps 4-Port 10/100 Wireless N Router (WNDR3400),,http://thumbs2.ebaystatic.com/d/l225/m/mUPdyhbW9pzEm1VbqX0YudA.jpg
711158519,NETGEAR N600 WNDR3400 Wireless Dual Band Router F/S,,http://thumbs1.ebaystatic.com/d/l225/m/my55jF5kHnG9ipzFycnjooA.jpg
711158520,Netgear NIGHTHAWK AC1900 1300 Mbps 4-Port Gigabit Wireless AC Router (R7000),,http://thumbs3.ebaystatic.com/d/l225/m/mrPLRTnWx_JXLNIp5pCBnzQ.jpg
711158521,Netgear N900 450 Mbps 4-Port Gigabit Wireless N Router (WNDR4500),,http://thumbs2.ebaystatic.com/d/l225/m/mXBL01faHlHm7Ukh188t3yQ.jpg
711158522,Netgear R6300V2 AC1750 1300 Mbps 4-Port Gigabit Wireless AC Router,,http://thumbs1.ebaystatic.com/d/l225/m/mTdnFB9Z71efYJ9I5-k186w.jpg
711158523,Makita RT0701C 1-1/4 HP Compact Router With FACTORY WARRANTY!!!,,http://thumbs2.ebaystatic.com/d/l225/m/m7AA4k3MzYFJcTlBrT3DwhA.jpg
711158524,"CISCO LINKSYS EA4500 DUAL-BAND N9000 WIRELESS ROUTER, 802.11N, UP TO 450 MBPs",,http://thumbs4.ebaystatic.com/d/l225/m/mwfVIXD3dZYt_qpHyprd7hg.jpg
711158525,Netgear N300 v.3 300 Mbps 5-Port 10/100 Wireless N Router (WNR2000),,http://thumbs4.ebaystatic.com/d/l225/m/mopRjvnZwbsVH9euqGov5kw.jpg
711158526,Netgear Nighthawk R7000 2330 Mbps 4-Port Gigabit Wireless N Router...,,http://thumbs4.ebaystatic.com/d/l225/m/mns82UY4FfqYXPgqrpJ9Bzw.jpg
711158527,Netgear N900 450 Mbps 4-Port Gigabit Wireless N Router R4500 ~ FreE ShiPPinG ~,,http://thumbs1.ebaystatic.com/d/l225/m/m_o0mSRmySgJUuqHYDIQiuA.jpg
711158528,D-Link Wireless Router Model DIR-625,,http://thumbs2.ebaystatic.com/d/l225/m/mYPXwZMlDUjOQ3Sm3EtU37Q.jpg
711158529,D-Link DIR-657 300 Mbps 4-Port Gigabit Wireless N Router Hd Media Router 1000,"Stream multiple media content - videos, music and more to multiple devices all at the same time without lag or skipping. The HD Fuel technology in the DIR-657 lets you watch Netflix and Vudu , play your Wii or Xbox 360 online or make Skype calls all without worrying about the skipping or latency you might experience with standard routers. It does so by automatically giving extra bandwidth for video, gaming and VoIP calls using HD Fuel QoS technology. The D-Link HD Media Router 1000(DIR-657) also comes equipped with 4 Gigabit ports to provide speeds up to 10x faster than standard 10/100 ports. What s more, it uses 802.11n technology with multiple intelligent antennas to maximize the speed and range of your wireless signal to significantly outperform 802.11g devices.",http://thumbs1.ebaystatic.com/d/l225/m/m0xyPdWrdVKe7By4QFouVeA.jpg
711158530,D-Link DIR-860L AC1200 4-Port Cloud Router Gigabit Wireless 802.11 AC,,http://thumbs3.ebaystatic.com/d/l225/m/mk4KNj6oLm7863qCS-TqmbQ.jpg
711158531,D-Link DIR-862L Wireless AC1600 Dual Band Gigabit Router,,http://thumbs2.ebaystatic.com/d/l225/m/m6Arw8kaZ4EUbyKjHtJZLkA.jpg
711158532,LINKSYS AC1600 DUAL BAND SMART WI-FI ROUTER EA6400 BRAND NEW,,http://thumbs3.ebaystatic.com/d/l225/m/mdK7igTS7_TDD7ajfVqj-_w.jpg
711158533,Netgear AC1900 1300 Mbps 4-Port Gigabit Wireless AC Router (R7000),,http://thumbs4.ebaystatic.com/d/l225/m/mdL34EQi0l-Kg-DlvF6wpqA.jpg
711158534,Panasonic ES-LA63 Cordless Rechargeable Men's Electric Shaver,,http://thumbs3.ebaystatic.com/d/l225/m/mzKKlCxbADObevcgoNjbXRg.jpg
711158535,Panasonic ARC 5 Best Mens Shaver,,http://thumbs4.ebaystatic.com/d/l225/m/mt34Y-u0okj-SqQm8Ng_rbQ.jpg
711158536,Panasonic Es8092 Wet Dry Electric Razor Shaver Cordless,,http://thumbs3.ebaystatic.com/d/l225/m/mlIxTz1LsVjXiZz2CzDquJw.jpg
711158537,Panasonic ARC4 ES-RF31-s Rechargeable Electric Shaver Wet/dry 4 Nanotech Blade,"Made for folks who need a great shave, the Panasonic electric shaver is convenient and consistent. Featuring an ergonomic design, this Panasonic ES-RF31-S is ideal for keeping a stubble-free face, so you can retain wonderfully smooth skin. With the precision blades included on the Panasonic electric shaver, you can get smooth shaves with every use. As this men's electric shaver features a gentle shaving mechanism, you can help avoid burning sensations on tender skin. Make sure you consistently get multiple perfect shaves without depleting the power with the exceptional shave time typical of this Panasonic ES-RF31-S.",http://thumbs1.ebaystatic.com/d/l225/m/mi4QM99Jq4oma5WLAL0K7Wg.jpg
711158538,"Panasonic ES3831K Single Blade Travel Shaver, Black New","Strong and trustworthy, the Panasonic electric shaver is built for folks who are worried about a wonderful shave every day. This Panasonic ES3833S is just right for taming your beard, with an easy-to-maneuver design, so you can retain wonderfully soft skin. Spend as much time as you need getting a complete shave by making use of the outstanding shave time typical of the Panasonic electric shaver. Moreover, this men's electric shaver includes precision foil blades, so you can get wonderful shaves over a prolonged period. With the gentle shaving mechanism on this Panasonic ES3833S, you can help avoid burning sensations on tender skin.",http://thumbs3.ebaystatic.com/d/l225/m/mfqMoj4xDlBFXp1ZznxCGbQ.jpg
711158539,Panasonic ES8103S Arc3 Electric Shaver Wet/Dry with Nanotech Blades for Men,,http://thumbs1.ebaystatic.com/d/l225/m/myaZLqzt3I7O-3xXxsJ_4fQ.jpg
711158540,Panasonic ES8103S Arc3 Electric Shaver Wet/Dry with Nanotech Blades,,http://thumbs1.ebaystatic.com/d/l225/m/mcrO4BkjBkM78XHm-aClRGg.jpg
711158543,Panasonic ES3831K Single Blade Wet & Dry Travel Shaver - New & Sealed,,http://thumbs4.ebaystatic.com/d/l225/m/mqWDU2mHsFWAuGosMIGcIMg.jpg
711158544,Panasonic ES8103S Arc 3 E W/O POUCH & MANUAL Men's Wet/Dry Rechargeable Shaver,,http://thumbs2.ebaystatic.com/d/l225/m/mZXgTj-fQfcgAlzOGQYkqFw.jpg
711158545,PANASONIC ES3831K Pro-Curve Battery Operated Travel Wet/Dry Shaver,,http://thumbs1.ebaystatic.com/d/l225/m/m8McQMCfgdp50trM_YJ88cw.jpg
711158546,PANASONIC ARC3 ES-LT33-S WET DRY WASHABLE RECHARGEABLE MEN'S ELECTRIC SHAVER NIB,,http://thumbs1.ebaystatic.com/d/l225/m/m9yUif5xyhGfh7Ag-_fcLdA.jpg
711158547,Panasonic ES-LV81-k Arc 5 Wet & Dry Rechargeable Men's Foil Shaver New,,http://thumbs1.ebaystatic.com/d/l225/m/mEfZHzDoKrH4DBfU8e_K93A.jpg
711158548,"NEW Panasonic ES-RF31-S 4 Blade Men's Electric Razor Wet/Dry, Factory Sealed",,http://thumbs2.ebaystatic.com/d/l225/m/mfhMhMoDkrGtqWW_IyqVGuQ.jpg
711158549,Panasonic ES8243A E Arc4 Men's Electric Shaver Wet/Dry,"eBay item number:181670746515
Seller assumes all responsibility for this listing.
Last updated on
&nbsp;Mar 23, 2015 08:55:50 PDT&nbsp;
View all revisions
<strong>Item specifics</strong>
<table>
<tr>
<th>Condition:</th>
<td><strong>Used</strong>
<strong>:</strong>
</td></tr></table>",http://thumbs4.ebaystatic.com/d/l225/m/mcxFUwt3FrGEEPzT7cfQn7w.jpg
711158550,Panasonic ES-3833 Wet/Dry Men Shaver Razor Battery Operate Compact Travel ES3833,,http://thumbs2.ebaystatic.com/d/l225/m/mAqa9pHisKsLSk5nqMg4JJQ.jpg
711158551,Panasonic Pro-Curve ES3831K Shaver - Dry/Wet Technology - Stainless Steel Foil,,http://thumbs3.ebaystatic.com/d/l225/m/mGqD8eGIwseT5nsM53W3uRQ.jpg
711158552,Panasonic Wet and Dry Shaver - ES-RW30s ES-RW30-S,"The Panasonic electric shaver is well-suited to shielding particularly sensitive skin and providing a smooth shave. It's both trustworthy and transportable. Because this Panasonic ES-RW30-S has a gentle shaving mechanism, you can avoid irritation and raw feeling skin in particularly tender areas. The Panasonic electric shaver is ideal for ridding yourself of stubble, with its special design, so you can sustain wonderfully supple skin. The exceptional shave time featured on this men's electric shaver helps you to make sure you consistently receive many complete shaves without depleting the power. Plus, this Panasonic ES-RW30-S features precision blades, so you can enjoy smooth shaves for months on end.",http://thumbs1.ebaystatic.com/d/l225/m/mvPElpjXmgo0NhP-P5F8LlQ.jpg
711158553,Panasonic ES-LF51-A Arc4 Electric Shaver Wet/Dry with Flexible Pivoting Head,,http://thumbs3.ebaystatic.com/d/l225/m/mC_zAQrMQKPLHdENU7N3UjQ.jpg
711158554,Panasonic ES8103S Arc3 Men's Electric Shaver Wet/Dry with Nanotech Blades,,http://thumbs3.ebaystatic.com/d/l225/m/moBByNwPn93-g-oBBceS2kw.jpg
711158555,panasonic ARC3 shaver es8103s,,http://thumbs1.ebaystatic.com/d/l225/m/mJlAp6t6OMIOaYgKnyelIMg.jpg
711158556,Panasonic ES-534 Men's Electric Shaver New ES534 Battery Operated Compact Travel,,http://thumbs3.ebaystatic.com/d/l225/m/mDr2kpZLVSdy1KTPVYK2YUg.jpg
711158557,Panasonic Portable Shaving Machine Cclippers Washable Single Blade Shaver+Brush,,http://thumbs3.ebaystatic.com/d/l225/m/mJdzJPoOALps0Lv4WtW2b0A.jpg
711158559,Baratza Solis Maestro Conical Burr Coffee Bean Grinder Works Great Nice Cond,,http://thumbs4.ebaystatic.com/d/l225/m/mdjbD7YFR6JRq-pkeajhK7w.jpg
711158560,Proctor Silex Fresh Grind Electric Coffee Bean Grinder White,,http://thumbs4.ebaystatic.com/d/l225/m/mtXoRn5Ytmqz0GLHYmBUxpA.jpg
711158561,Cuisinart 8-oz. Supreme Grind Automatic Burr Coffee Grinder,,http://thumbs4.ebaystatic.com/d/l225/m/my_9cXPvwwRVFqo6MXWfpag.jpg
1 id title description image
2 711158459 Sony PlayStation 4 (PS4) (Latest Model)- 500 GB Jet Black Console The PlayStation 4 system opens the door to an incredible journey through immersive new gaming worlds and a deeply connected gaming community. Step into living, breathing worlds where you are hero of your epic journey. Explore gritty urban environments, vast galactic landscapes, and fantastic historical settings brought to life on an epic scale, without limits. With an astounding launch lineup and over 180 games in development the PS4 system offers more top-tier blockbusters and inventive indie hits than any other next-gen console. The PS4 system is developer inspired, gamer focused. The PS4 system learns how you play and intuitively curates the content you use most often. Fire it up, and your PS4 system points the way to new, amazing experiences you can jump into alone or with friends. Create your own legend using a sophisticated, intuitive network built for gamers. Broadcast your gameplay live and direct to the world, complete with your commentary. Or immortalize your most epic moments and share at the press of a button. Access the best in music, movies, sports and television. PS4 system doesn t require a membership fee to access your digital entertainment subscriptions. You get the full spectrum of entertainment that matters to you on the PS4 system. PlayStation 4: The Best Place to Play The PlayStation 4 system provides dynamic, connected gaming, powerful graphics and speed, intelligent personalization, deeply integrated social capabilities, and innovative second-screen features. Combining unparalleled content, immersive gaming experiences, all of your favorite digital entertainment apps, and PlayStation exclusives, the PS4 system focuses on the gamers.Gamer Focused, Developer InspiredThe PS4 system focuses on the gamer, ensuring that the very best games and the most immersive experiences are possible on the platform.<br>Read more about the PS4 on ebay guides.</br> http://thumbs2.ebaystatic.com/d/l225/m/mzvzEUIknaQclZ801YCY1ew.jpg
3 711158460 Sony PlayStation 4 (Latest Model)- 500 GB Jet Black Console The PlayStation 4 system opens the door to an incredible journey through immersive new gaming worlds and a deeply connected gaming community. Step into living, breathing worlds where you are hero of your epic journey. Explore gritty urban environments, vast galactic landscapes, and fantastic historical settings brought to life on an epic scale, without limits. With an astounding launch lineup and over 180 games in development the PS4 system offers more top-tier blockbusters and inventive indie hits than any other next-gen console. The PS4 system is developer inspired, gamer focused. The PS4 system learns how you play and intuitively curates the content you use most often. Fire it up, and your PS4 system points the way to new, amazing experiences you can jump into alone or with friends. Create your own legend using a sophisticated, intuitive network built for gamers. Broadcast your gameplay live and direct to the world, complete with your commentary. Or immortalize your most epic moments and share at the press of a button. Access the best in music, movies, sports and television. PS4 system doesn t require a membership fee to access your digital entertainment subscriptions. You get the full spectrum of entertainment that matters to you on the PS4 system. PlayStation 4: The Best Place to Play The PlayStation 4 system provides dynamic, connected gaming, powerful graphics and speed, intelligent personalization, deeply integrated social capabilities, and innovative second-screen features. Combining unparalleled content, immersive gaming experiences, all of your favorite digital entertainment apps, and PlayStation exclusives, the PS4 system focuses on the gamers.Gamer Focused, Developer InspiredThe PS4 system focuses on the gamer, ensuring that the very best games and the most immersive experiences are possible on the platform.<br>Read more about the PS4 on ebay guides.</br> http://thumbs3.ebaystatic.com/d/l225/m/mJNDmSyIS3vUasKIJEBy4Cw.jpg
4 711158461 Sony PlayStation 4 PS4 500 GB Jet Black Console The PlayStation 4 system opens the door to an incredible journey through immersive new gaming worlds and a deeply connected gaming community. Step into living, breathing worlds where you are hero of your epic journey. Explore gritty urban environments, vast galactic landscapes, and fantastic historical settings brought to life on an epic scale, without limits. With an astounding launch lineup and over 180 games in development the PS4 system offers more top-tier blockbusters and inventive indie hits than any other next-gen console. The PS4 system is developer inspired, gamer focused. The PS4 system learns how you play and intuitively curates the content you use most often. Fire it up, and your PS4 system points the way to new, amazing experiences you can jump into alone or with friends. Create your own legend using a sophisticated, intuitive network built for gamers. Broadcast your gameplay live and direct to the world, complete with your commentary. Or immortalize your most epic moments and share at the press of a button. Access the best in music, movies, sports and television. PS4 system doesn t require a membership fee to access your digital entertainment subscriptions. You get the full spectrum of entertainment that matters to you on the PS4 system. PlayStation 4: The Best Place to Play The PlayStation 4 system provides dynamic, connected gaming, powerful graphics and speed, intelligent personalization, deeply integrated social capabilities, and innovative second-screen features. Combining unparalleled content, immersive gaming experiences, all of your favorite digital entertainment apps, and PlayStation exclusives, the PS4 system focuses on the gamers.Gamer Focused, Developer InspiredThe PS4 system focuses on the gamer, ensuring that the very best games and the most immersive experiences are possible on the platform.<br>Read more about the PS4 on ebay guides.</br> http://thumbs4.ebaystatic.com/d/l225/m/m10NZXArmiIkpkTDDkAUVvA.jpg
5 711158462 Sony - PlayStation 4 500GB The Last of Us Remastered Bundle - Black http://thumbs2.ebaystatic.com/d/l225/m/mZZXTmAE8WZDH1l_E_PPAkg.jpg
6 711158463 Sony PlayStation 4 (PS4) (Latest Model)- 500 GB Jet Black Console The PlayStation 4 system opens the door to an incredible journey through immersive new gaming worlds and a deeply connected gaming community. Step into living, breathing worlds where you are hero of your epic journey. Explore gritty urban environments, vast galactic landscapes, and fantastic historical settings brought to life on an epic scale, without limits. With an astounding launch lineup and over 180 games in development the PS4 system offers more top-tier blockbusters and inventive indie hits than any other next-gen console. The PS4 system is developer inspired, gamer focused. The PS4 system learns how you play and intuitively curates the content you use most often. Fire it up, and your PS4 system points the way to new, amazing experiences you can jump into alone or with friends. Create your own legend using a sophisticated, intuitive network built for gamers. Broadcast your gameplay live and direct to the world, complete with your commentary. Or immortalize your most epic moments and share at the press of a button. Access the best in music, movies, sports and television. PS4 system doesn t require a membership fee to access your digital entertainment subscriptions. You get the full spectrum of entertainment that matters to you on the PS4 system. PlayStation 4: The Best Place to Play The PlayStation 4 system provides dynamic, connected gaming, powerful graphics and speed, intelligent personalization, deeply integrated social capabilities, and innovative second-screen features. Combining unparalleled content, immersive gaming experiences, all of your favorite digital entertainment apps, and PlayStation exclusives, the PS4 system focuses on the gamers.Gamer Focused, Developer InspiredThe PS4 system focuses on the gamer, ensuring that the very best games and the most immersive experiences are possible on the platform.<br>Read more about the PS4 on ebay guides.</br> http://thumbs3.ebaystatic.com/d/l225/m/mzvzEUIknaQclZ801YCY1ew.jpg
7 711158464 Sony PlayStation 4 (PS4) (Latest Model)- 500 GB Jet Black Console The PlayStation 4 system opens the door to an incredible journey through immersive new gaming worlds and a deeply connected gaming community. Step into living, breathing worlds where you are hero of your epic journey. Explore gritty urban environments, vast galactic landscapes, and fantastic historical settings brought to life on an epic scale, without limits. With an astounding launch lineup and over 180 games in development the PS4 system offers more top-tier blockbusters and inventive indie hits than any other next-gen console. The PS4 system is developer inspired, gamer focused. The PS4 system learns how you play and intuitively curates the content you use most often. Fire it up, and your PS4 system points the way to new, amazing experiences you can jump into alone or with friends. Create your own legend using a sophisticated, intuitive network built for gamers. Broadcast your gameplay live and direct to the world, complete with your commentary. Or immortalize your most epic moments and share at the press of a button. Access the best in music, movies, sports and television. PS4 system doesn t require a membership fee to access your digital entertainment subscriptions. You get the full spectrum of entertainment that matters to you on the PS4 system. PlayStation 4: The Best Place to Play The PlayStation 4 system provides dynamic, connected gaming, powerful graphics and speed, intelligent personalization, deeply integrated social capabilities, and innovative second-screen features. Combining unparalleled content, immersive gaming experiences, all of your favorite digital entertainment apps, and PlayStation exclusives, the PS4 system focuses on the gamers.Gamer Focused, Developer InspiredThe PS4 system focuses on the gamer, ensuring that the very best games and the most immersive experiences are possible on the platform.<br>Read more about the PS4 on ebay guides.</br> http://thumbs4.ebaystatic.com/d/l225/m/mzvzEUIknaQclZ801YCY1ew.jpg
8 711158465 BRAND NEW Sony PlayStation 4 BUNDLE 500gb http://thumbs4.ebaystatic.com/d/l225/m/m9TQTiWcWig7SeQh9algLZg.jpg
9 711158466 Sony PlayStation 4 500GB, Dualshock Wireless Control, HDMI Gaming Console Refurb The PlayStation 4 system opens the door to an incredible journey through immersive new gaming worlds and a deeply connected gaming community. Step into living, breathing worlds where you are hero of your epic journey. Explore gritty urban environments, vast galactic landscapes, and fantastic historical settings brought to life on an epic scale, without limits. With an astounding launch lineup and over 180 games in development the PS4 system offers more top-tier blockbusters and inventive indie hits than any other next-gen console. The PS4 system is developer inspired, gamer focused. The PS4 system learns how you play and intuitively curates the content you use most often. Fire it up, and your PS4 system points the way to new, amazing experiences you can jump into alone or with friends. Create your own legend using a sophisticated, intuitive network built for gamers. Broadcast your gameplay live and direct to the world, complete with your commentary. Or immortalize your most epic moments and share at the press of a button. Access the best in music, movies, sports and television. PS4 system doesn t require a membership fee to access your digital entertainment subscriptions. You get the full spectrum of entertainment that matters to you on the PS4 system. PlayStation 4: The Best Place to Play The PlayStation 4 system provides dynamic, connected gaming, powerful graphics and speed, intelligent personalization, deeply integrated social capabilities, and innovative second-screen features. Combining unparalleled content, immersive gaming experiences, all of your favorite digital entertainment apps, and PlayStation exclusives, the PS4 system focuses on the gamers.Gamer Focused, Developer InspiredThe PS4 system focuses on the gamer, ensuring that the very best games and the most immersive experiences are possible on the platform.<br>Read more about the PS4 on ebay guides.</br> http://thumbs4.ebaystatic.com/d/l225/m/mTZYG5N6xWfBi4Ok03HmpMw.jpg
10 711158467 Sony PlayStation 4 (Latest Model)- 500 GB Jet Black Console w/ 2 Controllers http://thumbs2.ebaystatic.com/d/l225/m/mX5Qphrygqeoi7tAH5eku2A.jpg
11 711158468 Sony PlayStation 4 (Latest Model)- 500 GB Jet Black Console *NEW* The PlayStation 4 system opens the door to an incredible journey through immersive new gaming worlds and a deeply connected gaming community. Step into living, breathing worlds where you are hero of your epic journey. Explore gritty urban environments, vast galactic landscapes, and fantastic historical settings brought to life on an epic scale, without limits. With an astounding launch lineup and over 180 games in development the PS4 system offers more top-tier blockbusters and inventive indie hits than any other next-gen console. The PS4 system is developer inspired, gamer focused. The PS4 system learns how you play and intuitively curates the content you use most often. Fire it up, and your PS4 system points the way to new, amazing experiences you can jump into alone or with friends. Create your own legend using a sophisticated, intuitive network built for gamers. Broadcast your gameplay live and direct to the world, complete with your commentary. Or immortalize your most epic moments and share at the press of a button. Access the best in music, movies, sports and television. PS4 system doesn t require a membership fee to access your digital entertainment subscriptions. You get the full spectrum of entertainment that matters to you on the PS4 system. PlayStation 4: The Best Place to Play The PlayStation 4 system provides dynamic, connected gaming, powerful graphics and speed, intelligent personalization, deeply integrated social capabilities, and innovative second-screen features. Combining unparalleled content, immersive gaming experiences, all of your favorite digital entertainment apps, and PlayStation exclusives, the PS4 system focuses on the gamers.Gamer Focused, Developer InspiredThe PS4 system focuses on the gamer, ensuring that the very best games and the most immersive experiences are possible on the platform.<br>Read more about the PS4 on ebay guides.</br> http://thumbs2.ebaystatic.com/d/l225/m/mGjN4IrJ0O8kKD_TYMWgGgQ.jpg
12 711158469 Sony PlayStation 4 (Latest Model)- 500 GB Jet Black Console..wth Mortal Kombat X http://thumbs2.ebaystatic.com/d/l225/m/mrpqSNXwlnUVKnEscE4348w.jpg
13 711158470 Genuine SONY PS4 Playstation 4 500GB Gaming Console - Black http://thumbs4.ebaystatic.com/d/l225/m/myrPBFCpb4H5rHI8NyiS2zA.jpg
14 711158471 [Sony] Playstation 4 PS4 Video Game Console Black - Latest Model http://thumbs4.ebaystatic.com/d/l225/m/mce0c7mCuv3xpjllJXx093w.jpg
15 711158472 Sony PlayStation 4 (Latest Model) 500 GB Jet Black Console The PlayStation 4 system opens the door to an incredible journey through immersive new gaming worlds and a deeply connected gaming community. Step into living, breathing worlds where you are hero of your epic journey. Explore gritty urban environments, vast galactic landscapes, and fantastic historical settings brought to life on an epic scale, without limits. With an astounding launch lineup and over 180 games in development the PS4 system offers more top-tier blockbusters and inventive indie hits than any other next-gen console. The PS4 system is developer inspired, gamer focused. The PS4 system learns how you play and intuitively curates the content you use most often. Fire it up, and your PS4 system points the way to new, amazing experiences you can jump into alone or with friends. Create your own legend using a sophisticated, intuitive network built for gamers. Broadcast your gameplay live and direct to the world, complete with your commentary. Or immortalize your most epic moments and share at the press of a button. Access the best in music, movies, sports and television. PS4 system doesn t require a membership fee to access your digital entertainment subscriptions. You get the full spectrum of entertainment that matters to you on the PS4 system. PlayStation 4: The Best Place to Play The PlayStation 4 system provides dynamic, connected gaming, powerful graphics and speed, intelligent personalization, deeply integrated social capabilities, and innovative second-screen features. Combining unparalleled content, immersive gaming experiences, all of your favorite digital entertainment apps, and PlayStation exclusives, the PS4 system focuses on the gamers.Gamer Focused, Developer InspiredThe PS4 system focuses on the gamer, ensuring that the very best games and the most immersive experiences are possible on the platform.<br>Read more about the PS4 on ebay guides.</br> http://thumbs2.ebaystatic.com/d/l225/m/miVSA1xPO5fCNdYzEMc8rSQ.jpg
16 711158473 Sony PlayStation 4 - 500 GB Jet Black Console - WITH LAST OF US REMASTERED http://thumbs2.ebaystatic.com/d/l225/m/mLjnOxv2GWkrkCtgsDGhJ6A.jpg
17 711158474 Sony PlayStation 4 (Latest Model)- 500 GB Jet Black Console http://thumbs3.ebaystatic.com/d/l225/m/mjMittBaXmm_n4AMpETBXhQ.jpg
18 711158475 Sony PlayStation 4 (Latest Model)- 500 GB Jet Black Console http://thumbs2.ebaystatic.com/d/l225/m/m1n1qrJ7-VGbe7xQvGdeD6Q.jpg
19 711158476 Sony PlayStation 4 - 500 GB Jet Black Console (3 controllers,3 games included) http://thumbs3.ebaystatic.com/d/l225/m/mIoGIj9FZG7HoEVkPlnyizA.jpg
20 711158477 Sony PlayStation 4 500GB Console with 2 Controllers The PlayStation 4 system opens the door to an incredible journey through immersive new gaming worlds and a deeply connected gaming community. Step into living, breathing worlds where you are hero of your epic journey. Explore gritty urban environments, vast galactic landscapes, and fantastic historical settings brought to life on an epic scale, without limits. With an astounding launch lineup and over 180 games in development the PS4 system offers more top-tier blockbusters and inventive indie hits than any other next-gen console. The PS4 system is developer inspired, gamer focused. The PS4 system learns how you play and intuitively curates the content you use most often. Fire it up, and your PS4 system points the way to new, amazing experiences you can jump into alone or with friends. Create your own legend using a sophisticated, intuitive network built for gamers. Broadcast your gameplay live and direct to the world, complete with your commentary. Or immortalize your most epic moments and share at the press of a button. Access the best in music, movies, sports and television. PS4 system doesn t require a membership fee to access your digital entertainment subscriptions. You get the full spectrum of entertainment that matters to you on the PS4 system. PlayStation 4: The Best Place to Play The PlayStation 4 system provides dynamic, connected gaming, powerful graphics and speed, intelligent personalization, deeply integrated social capabilities, and innovative second-screen features. Combining unparalleled content, immersive gaming experiences, all of your favorite digital entertainment apps, and PlayStation exclusives, the PS4 system focuses on the gamers.Gamer Focused, Developer InspiredThe PS4 system focuses on the gamer, ensuring that the very best games and the most immersive experiences are possible on the platform.<br>Read more about the PS4 on ebay guides.</br> http://thumbs2.ebaystatic.com/d/l225/m/m4fuJ5Ibrj450-TZ83FAkIQ.jpg
21 711158478 Sony - PlayStation 4 500GB The Last of Us Remastered Bundle - Black http://thumbs3.ebaystatic.com/d/l225/m/mzXSIw8Hlnff8IjXJQrXJSw.jpg
22 711158479 Sony PlayStation 4 (Latest Model)- 500 GB Jet Black Console http://thumbs2.ebaystatic.com/d/l225/m/m-9S63CgFoUijY3ZTyNs3KA.jpg
23 711158480 Sony PlayStation 4 (Latest Model)- 500 GB Jet Black Console http://thumbs1.ebaystatic.com/d/l225/m/mdF9Bisg9wXjv_R9Y_13MWw.jpg
24 711158481 Sony PlayStation 4 (Latest Model)- 500 GB Jet Black Console* http://thumbs1.ebaystatic.com/d/l225/m/m4_OQHMmIOCa8uEkBepRR5A.jpg
25 711158482 Sony PlayStation 4 (Latest Model)- 500 GB Jet Black Console http://thumbs2.ebaystatic.com/d/l225/m/mZ0nR8iz-QAfLssJZMp3L5Q.jpg
26 711158483 [Sony] Playstation 4 PS4 1105A Video Game Console 500GB White - Latest Model http://thumbs4.ebaystatic.com/d/l225/m/m8iTz5cLQLNjD9D3O2jT3IQ.jpg
27 711158484 NEW! Clinique Repairwear Laser Focus Wrinkle Correcting Eye Cream 5ml http://thumbs2.ebaystatic.com/d/l225/m/mrraWCpvP5YKk5rYgotVDLg.jpg
28 711158485 Obagi Elastiderm Eye Treatment Cream 0.5 oz / 15g Authentic NiB Sealed [5] http://thumbs1.ebaystatic.com/d/l225/m/mJ4ekz6_bDT5G7wYtjM-qRg.jpg
29 711158486 Lancome Renergie Eye Anti-Wrinkle & Firming Eye Cream 0.5oz New http://thumbs2.ebaystatic.com/d/l225/m/mxwwyDQraZ-TEtr_Y6qRi7Q.jpg
30 711158487 OZ Naturals - The BEST Eye Gel - Eye Cream For Dark Circles Puffiness and http://thumbs2.ebaystatic.com/d/l225/m/mk2Z-hX5sT4kUxfG6g_KFpg.jpg
31 711158488 Elastiderm Eye Cream (0.5oz/15g) http://thumbs3.ebaystatic.com/d/l225/m/mHxb5WUc5MtGzCT2UXgY_hg.jpg
32 711158489 new CLINIQUE Repairwear Laser Focus Wrinkle Correcting Eye Cream 0.17 oz/ 5 ml http://thumbs1.ebaystatic.com/d/l225/m/mQSX2wfrSeGy3uA8Q4SbOKw.jpg
33 711158490 NIB Full Size Dermalogica Multivitamin Power Firm Eye Cream http://thumbs4.ebaystatic.com/d/l225/m/m2hxo12e5NjXgGiKIaCvTLA.jpg
34 711158491 24K Gold Collagen Anti-Dark Circles Anti-Aging Bio Essence Repairing Eye Cream http://thumbs4.ebaystatic.com/d/l225/m/mt96efUK5cPAe60B9aGmgMA.jpg
35 711158492 Clinique Repairwear Laser Focus Wrinkle Correcting Eye Cream Full Size .5oz 15mL http://thumbs3.ebaystatic.com/d/l225/m/mZyV3wKejCMx9RrnC8X-eMw.jpg
36 711158493 NEW! Clinique Repairwear Laser Focus Wrinkle Correcting Eye Cream 5ml http://thumbs4.ebaystatic.com/d/l225/m/m9hX_z_DFnbNCTh0VFv3KcQ.jpg
37 711158494 3 Clinique Repairwear Laser Focus Wrinkle Correcting Eye Cream .17 oz/5 ml Each http://thumbs1.ebaystatic.com/d/l225/m/mYiHsrGffCg_qgkTbUWZU1A.jpg
38 711158495 Lancome High Resolution Eye Cream .95 Oz Refill-3X .25 Oz Plus .20 Oz Lot http://thumbs1.ebaystatic.com/d/l225/m/mFuQxKoEKQ6wtk2bGxfKwow.jpg
39 711158496 NEW! Clinique Repairwear Laser Focus Wrinkle Correcting Eye Cream 5ml http://thumbs4.ebaystatic.com/d/l225/m/mLBRCDiELUnYos-vFmIcc7A.jpg
40 711158497 Neutrogena Rapid Wrinkle Repair Eye Cream -0.5 Oz. -New- http://thumbs4.ebaystatic.com/d/l225/m/mE1RWpCOxkCGuuiJBX6HiBQ.jpg
41 711158498 20g Snail Repair Eye Cream Natural Anti-Dark Circles Puffiness Aging Wrinkles http://thumbs4.ebaystatic.com/d/l225/m/mh4gBNzINDwds_r778sJRjg.jpg
42 711158499 Vichy-Neovadiol GF Eye & Lip Contour Cream 0.5 Fl. Oz http://thumbs4.ebaystatic.com/d/l225/m/m_6f0ofCm7PTzuithYuZx3w.jpg
43 711158500 Obagi Elastiderm Eye Cream 0.5 oz. New In Box. 100% Authentic! New Packaging! http://thumbs2.ebaystatic.com/d/l225/m/ma0PK-ASBXUiHERR19MyImA.jpg
44 711158501 NEW! Clinique Repairwear Laser Focus Wrinkle Correcting Eye Cream .17oz / 5ml http://thumbs3.ebaystatic.com/d/l225/m/m72NaXYlcXcEeqQFKWvsdZA.jpg
45 711158502 Kiehl's CREAMY EYE TREATMENT cream with AVOCADO 0.5 oz FULL SIZE http://thumbs3.ebaystatic.com/d/l225/m/mOI407HnILb_tf-RgdvfYyA.jpg
46 711158503 Clinique repairwear laser focus wrinkle correcting eye cream .5 oz 15ml http://thumbs4.ebaystatic.com/d/l225/m/mQwNVst3bYG6QXouubmLaJg.jpg
47 711158504 Caudalie Premier Cru The Eye Cream La Creme New Anti Aging Eye Treatment http://thumbs1.ebaystatic.com/d/l225/m/mM4hPTAWXeOjovNk9s_Cqag.jpg
48 711158505 Jeunesse Instantly Ageless -- New Box Of 50 Sachets -- Eye - Face Wrinkle Cream http://thumbs2.ebaystatic.com/d/l225/m/m5EfWbi6ZYs4JpYcsl0Ubaw.jpg
49 711158506 VELOUR SKIN EYE CREAM .5 FL OZ 15ML NEW NIP ANTI-AGING WRINKLE CREAM http://thumbs1.ebaystatic.com/d/l225/m/m2uEf6q1yASH8FkWqYdOv1w.jpg
50 711158507 Shiseido White Lucent Anti-Dark Circles/Puffiness Eye Cream 15ml/.53oz Full Size http://thumbs1.ebaystatic.com/d/l225/m/m_CtzoqU2Vgv4GKx8ONS6qw.jpg
51 711158508 Murad Resurgence Renewing Eye Cream Anti-Aging .25 oz NEW Dark Circles Wrinkle http://thumbs1.ebaystatic.com/d/l225/m/mhWJC10iowgUDGm4KMQKNMg.jpg
52 711158509 D-Link DIR-615 300Mbps Wireless-N Router 4-Port w/Firewall http://thumbs3.ebaystatic.com/d/l225/m/mdSBH9ROXRn3TBb8OFDT6jA.jpg
53 711158510 Triton MOF001 2 1/4hp dual mode precision Router. New!! *3 day auction* http://thumbs1.ebaystatic.com/d/l225/m/mozWd2SBskbDBlWAKsMlVew.jpg
54 711158511 Porter-Cable 3-1/4 HP Five-Speed Router 7518 - Power Tools Routers http://thumbs2.ebaystatic.com/d/l225/m/mpZDTXpiyesDrZh_FLMyqXQ.jpg
55 711158512 Linksys EA6900 AC1900 Wi-Fi Wireless Router Dual Band with Gigabit &USB 3.0 Port http://thumbs4.ebaystatic.com/d/l225/m/m3OfBSnHBDhhs_Ve-DSBKQw.jpg
56 711158513 Linksys EA6500 1300 Mbps 4-Port Gigabit Wireless AC Router http://thumbs1.ebaystatic.com/d/l225/m/m7cfymJPc7CLADoTiEYFzwA.jpg
57 711158514 Makita RT0700CX3 1-1/4 Horsepower Compact Router Kit / Trimmer NEW http://thumbs2.ebaystatic.com/d/l225/m/mr-F3rCxDYsLcj8hnmaRN4A.jpg
58 711158515 NETGEAR R6250 AC1600 Smart WiFi Dual Band Gigabit Router 802.11ac 300 1300 Mbps http://thumbs4.ebaystatic.com/d/l225/m/mc8Ic8Cq2lPqPnjNGAQBBCQ.jpg
59 711158516 NETGEAR Nighthawk AC1900 Dual Band Wi-Fi Gigabit Router (R7000) BRAND NEW SEALED http://thumbs3.ebaystatic.com/d/l225/m/mdL34EQi0l-Kg-DlvF6wpqA.jpg
60 711158517 Netgear WNDR3400 N600 Wireless Dual Band Router (WNDR3400-100) http://thumbs4.ebaystatic.com/d/l225/m/mKr4cNk6utJXSdVYXzwrScQ.jpg
61 711158518 Netgear N600 300 Mbps 4-Port 10/100 Wireless N Router (WNDR3400) http://thumbs2.ebaystatic.com/d/l225/m/mUPdyhbW9pzEm1VbqX0YudA.jpg
62 711158519 NETGEAR N600 WNDR3400 Wireless Dual Band Router F/S http://thumbs1.ebaystatic.com/d/l225/m/my55jF5kHnG9ipzFycnjooA.jpg
63 711158520 Netgear NIGHTHAWK AC1900 1300 Mbps 4-Port Gigabit Wireless AC Router (R7000) http://thumbs3.ebaystatic.com/d/l225/m/mrPLRTnWx_JXLNIp5pCBnzQ.jpg
64 711158521 Netgear N900 450 Mbps 4-Port Gigabit Wireless N Router (WNDR4500) http://thumbs2.ebaystatic.com/d/l225/m/mXBL01faHlHm7Ukh188t3yQ.jpg
65 711158522 Netgear R6300V2 AC1750 1300 Mbps 4-Port Gigabit Wireless AC Router http://thumbs1.ebaystatic.com/d/l225/m/mTdnFB9Z71efYJ9I5-k186w.jpg
66 711158523 Makita RT0701C 1-1/4 HP Compact Router With FACTORY WARRANTY!!! http://thumbs2.ebaystatic.com/d/l225/m/m7AA4k3MzYFJcTlBrT3DwhA.jpg
67 711158524 CISCO LINKSYS EA4500 DUAL-BAND N9000 WIRELESS ROUTER, 802.11N, UP TO 450 MBPs http://thumbs4.ebaystatic.com/d/l225/m/mwfVIXD3dZYt_qpHyprd7hg.jpg
68 711158525 Netgear N300 v.3 300 Mbps 5-Port 10/100 Wireless N Router (WNR2000) http://thumbs4.ebaystatic.com/d/l225/m/mopRjvnZwbsVH9euqGov5kw.jpg
69 711158526 Netgear Nighthawk R7000 2330 Mbps 4-Port Gigabit Wireless N Router... http://thumbs4.ebaystatic.com/d/l225/m/mns82UY4FfqYXPgqrpJ9Bzw.jpg
70 711158527 Netgear N900 450 Mbps 4-Port Gigabit Wireless N Router R4500 ~ FreE ShiPPinG ~ http://thumbs1.ebaystatic.com/d/l225/m/m_o0mSRmySgJUuqHYDIQiuA.jpg
71 711158528 D-Link Wireless Router Model DIR-625 http://thumbs2.ebaystatic.com/d/l225/m/mYPXwZMlDUjOQ3Sm3EtU37Q.jpg
72 711158529 D-Link DIR-657 300 Mbps 4-Port Gigabit Wireless N Router Hd Media Router 1000 Stream multiple media content - videos, music and more to multiple devices all at the same time without lag or skipping. The HD Fuel technology in the DIR-657 lets you watch Netflix and Vudu , play your Wii or Xbox 360 online or make Skype calls all without worrying about the skipping or latency you might experience with standard routers. It does so by automatically giving extra bandwidth for video, gaming and VoIP calls using HD Fuel QoS technology. The D-Link HD Media Router 1000(DIR-657) also comes equipped with 4 Gigabit ports to provide speeds up to 10x faster than standard 10/100 ports. What s more, it uses 802.11n technology with multiple intelligent antennas to maximize the speed and range of your wireless signal to significantly outperform 802.11g devices. http://thumbs1.ebaystatic.com/d/l225/m/m0xyPdWrdVKe7By4QFouVeA.jpg
73 711158530 D-Link DIR-860L AC1200 4-Port Cloud Router Gigabit Wireless 802.11 AC http://thumbs3.ebaystatic.com/d/l225/m/mk4KNj6oLm7863qCS-TqmbQ.jpg
74 711158531 D-Link DIR-862L Wireless AC1600 Dual Band Gigabit Router http://thumbs2.ebaystatic.com/d/l225/m/m6Arw8kaZ4EUbyKjHtJZLkA.jpg
75 711158532 LINKSYS AC1600 DUAL BAND SMART WI-FI ROUTER EA6400 BRAND NEW http://thumbs3.ebaystatic.com/d/l225/m/mdK7igTS7_TDD7ajfVqj-_w.jpg
76 711158533 Netgear AC1900 1300 Mbps 4-Port Gigabit Wireless AC Router (R7000) http://thumbs4.ebaystatic.com/d/l225/m/mdL34EQi0l-Kg-DlvF6wpqA.jpg
77 711158534 Panasonic ES-LA63 Cordless Rechargeable Men's Electric Shaver http://thumbs3.ebaystatic.com/d/l225/m/mzKKlCxbADObevcgoNjbXRg.jpg
78 711158535 Panasonic ARC 5 Best Mens Shaver http://thumbs4.ebaystatic.com/d/l225/m/mt34Y-u0okj-SqQm8Ng_rbQ.jpg
79 711158536 Panasonic Es8092 Wet Dry Electric Razor Shaver Cordless http://thumbs3.ebaystatic.com/d/l225/m/mlIxTz1LsVjXiZz2CzDquJw.jpg
80 711158537 Panasonic ARC4 ES-RF31-s Rechargeable Electric Shaver Wet/dry 4 Nanotech Blade Made for folks who need a great shave, the Panasonic electric shaver is convenient and consistent. Featuring an ergonomic design, this Panasonic ES-RF31-S is ideal for keeping a stubble-free face, so you can retain wonderfully smooth skin. With the precision blades included on the Panasonic electric shaver, you can get smooth shaves with every use. As this men's electric shaver features a gentle shaving mechanism, you can help avoid burning sensations on tender skin. Make sure you consistently get multiple perfect shaves without depleting the power with the exceptional shave time typical of this Panasonic ES-RF31-S. http://thumbs1.ebaystatic.com/d/l225/m/mi4QM99Jq4oma5WLAL0K7Wg.jpg
81 711158538 Panasonic ES3831K Single Blade Travel Shaver, Black New Strong and trustworthy, the Panasonic electric shaver is built for folks who are worried about a wonderful shave every day. This Panasonic ES3833S is just right for taming your beard, with an easy-to-maneuver design, so you can retain wonderfully soft skin. Spend as much time as you need getting a complete shave by making use of the outstanding shave time typical of the Panasonic electric shaver. Moreover, this men's electric shaver includes precision foil blades, so you can get wonderful shaves over a prolonged period. With the gentle shaving mechanism on this Panasonic ES3833S, you can help avoid burning sensations on tender skin. http://thumbs3.ebaystatic.com/d/l225/m/mfqMoj4xDlBFXp1ZznxCGbQ.jpg
82 711158539 Panasonic ES8103S Arc3 Electric Shaver Wet/Dry with Nanotech Blades for Men http://thumbs1.ebaystatic.com/d/l225/m/myaZLqzt3I7O-3xXxsJ_4fQ.jpg
83 711158540 Panasonic ES8103S Arc3 Electric Shaver Wet/Dry with Nanotech Blades http://thumbs1.ebaystatic.com/d/l225/m/mcrO4BkjBkM78XHm-aClRGg.jpg
84 711158543 Panasonic ES3831K Single Blade Wet & Dry Travel Shaver - New & Sealed http://thumbs4.ebaystatic.com/d/l225/m/mqWDU2mHsFWAuGosMIGcIMg.jpg
85 711158544 Panasonic ES8103S Arc 3 E W/O POUCH & MANUAL Men's Wet/Dry Rechargeable Shaver http://thumbs2.ebaystatic.com/d/l225/m/mZXgTj-fQfcgAlzOGQYkqFw.jpg
86 711158545 PANASONIC ES3831K Pro-Curve Battery Operated Travel Wet/Dry Shaver http://thumbs1.ebaystatic.com/d/l225/m/m8McQMCfgdp50trM_YJ88cw.jpg
87 711158546 PANASONIC ARC3 ES-LT33-S WET DRY WASHABLE RECHARGEABLE MEN'S ELECTRIC SHAVER NIB http://thumbs1.ebaystatic.com/d/l225/m/m9yUif5xyhGfh7Ag-_fcLdA.jpg
88 711158547 Panasonic ES-LV81-k Arc 5 Wet & Dry Rechargeable Men's Foil Shaver New http://thumbs1.ebaystatic.com/d/l225/m/mEfZHzDoKrH4DBfU8e_K93A.jpg
89 711158548 NEW Panasonic ES-RF31-S 4 Blade Men's Electric Razor Wet/Dry, Factory Sealed http://thumbs2.ebaystatic.com/d/l225/m/mfhMhMoDkrGtqWW_IyqVGuQ.jpg
90 711158549 Panasonic ES8243A E Arc4 Men's Electric Shaver Wet/Dry eBay item number:181670746515 Seller assumes all responsibility for this listing. Last updated on &nbsp;Mar 23, 2015 08:55:50 PDT&nbsp; View all revisions <strong>Item specifics</strong> <table> <tr> <th>Condition:</th> <td><strong>Used</strong> <strong>:</strong> </td></tr></table> http://thumbs4.ebaystatic.com/d/l225/m/mcxFUwt3FrGEEPzT7cfQn7w.jpg
91 711158550 Panasonic ES-3833 Wet/Dry Men Shaver Razor Battery Operate Compact Travel ES3833 http://thumbs2.ebaystatic.com/d/l225/m/mAqa9pHisKsLSk5nqMg4JJQ.jpg
92 711158551 Panasonic Pro-Curve ES3831K Shaver - Dry/Wet Technology - Stainless Steel Foil http://thumbs3.ebaystatic.com/d/l225/m/mGqD8eGIwseT5nsM53W3uRQ.jpg
93 711158552 Panasonic Wet and Dry Shaver - ES-RW30s ES-RW30-S The Panasonic electric shaver is well-suited to shielding particularly sensitive skin and providing a smooth shave. It's both trustworthy and transportable. Because this Panasonic ES-RW30-S has a gentle shaving mechanism, you can avoid irritation and raw feeling skin in particularly tender areas. The Panasonic electric shaver is ideal for ridding yourself of stubble, with its special design, so you can sustain wonderfully supple skin. The exceptional shave time featured on this men's electric shaver helps you to make sure you consistently receive many complete shaves without depleting the power. Plus, this Panasonic ES-RW30-S features precision blades, so you can enjoy smooth shaves for months on end. http://thumbs1.ebaystatic.com/d/l225/m/mvPElpjXmgo0NhP-P5F8LlQ.jpg
94 711158553 Panasonic ES-LF51-A Arc4 Electric Shaver Wet/Dry with Flexible Pivoting Head http://thumbs3.ebaystatic.com/d/l225/m/mC_zAQrMQKPLHdENU7N3UjQ.jpg
95 711158554 Panasonic ES8103S Arc3 Men's Electric Shaver Wet/Dry with Nanotech Blades http://thumbs3.ebaystatic.com/d/l225/m/moBByNwPn93-g-oBBceS2kw.jpg
96 711158555 panasonic ARC3 shaver es8103s http://thumbs1.ebaystatic.com/d/l225/m/mJlAp6t6OMIOaYgKnyelIMg.jpg
97 711158556 Panasonic ES-534 Men's Electric Shaver New ES534 Battery Operated Compact Travel http://thumbs3.ebaystatic.com/d/l225/m/mDr2kpZLVSdy1KTPVYK2YUg.jpg
98 711158557 Panasonic Portable Shaving Machine Cclippers Washable Single Blade Shaver+Brush http://thumbs3.ebaystatic.com/d/l225/m/mJdzJPoOALps0Lv4WtW2b0A.jpg
99 711158559 Baratza Solis Maestro Conical Burr Coffee Bean Grinder Works Great Nice Cond http://thumbs4.ebaystatic.com/d/l225/m/mdjbD7YFR6JRq-pkeajhK7w.jpg
100 711158560 Proctor Silex Fresh Grind Electric Coffee Bean Grinder White http://thumbs4.ebaystatic.com/d/l225/m/mtXoRn5Ytmqz0GLHYmBUxpA.jpg
101 711158561 Cuisinart 8-oz. Supreme Grind Automatic Burr Coffee Grinder http://thumbs4.ebaystatic.com/d/l225/m/my_9cXPvwwRVFqo6MXWfpag.jpg

View File

@ -1,19 +0,0 @@
# This schema has been generated ...
# The order in which the attributes are declared is important,
# it specify the attribute xxx...
identifier = "id"
[attributes.id]
displayed = true
[attributes.title]
displayed = true
indexed = true
[attributes.description]
displayed = true
indexed = true
[attributes.image]
displayed = true

View File

@ -1 +0,0 @@
_datas in movies.csv are from https://www.themoviedb.org/_

View File

@ -1,21 +0,0 @@
# This schema has been generated ...
# The order in which the attributes are declared is important,
# it specify the attribute xxx...
identifier = "id"
[attributes.id]
displayed = true
[attributes.title]
displayed = true
indexed = true
[attributes.overview]
displayed = true
indexed = true
[attributes.release_date]
displayed = true
[attributes.poster]
displayed = true

View File

@ -1,34 +0,0 @@
[package]
name = "meilidb-core"
version = "0.1.0"
authors = ["Kerollmops <renault.cle@gmail.com>"]
edition = "2018"
[dependencies]
byteorder = "1.3.1"
deunicode = "1.0.0"
hashbrown = "0.6.0"
lazy_static = "1.2.0"
log = "0.4.6"
meilidb-tokenizer = { path = "../meilidb-tokenizer", version = "0.1.0" }
rayon = "1.2.0"
sdset = "0.3.2"
serde = { version = "1.0.88", features = ["derive"] }
slice-group-by = "0.2.6"
zerocopy = "0.2.8"
[dependencies.fst]
git = "https://github.com/Kerollmops/fst.git"
branch = "arc-byte-slice"
[dependencies.levenshtein_automata]
git = "https://github.com/Kerollmops/levenshtein-automata.git"
branch = "arc-byte-slice"
features = ["fst_automaton"]
[dev-dependencies]
assert_matches = "1.3"
[features]
i128 = ["byteorder/i128"]
nightly = ["hashbrown/nightly", "slice-group-by/nightly"]

View File

@ -1,44 +0,0 @@
use lazy_static::lazy_static;
use levenshtein_automata::{
LevenshteinAutomatonBuilder as LevBuilder,
DFA,
};
lazy_static! {
static ref LEVDIST0: LevBuilder = LevBuilder::new(0, false);
static ref LEVDIST1: LevBuilder = LevBuilder::new(1, false);
static ref LEVDIST2: LevBuilder = LevBuilder::new(2, false);
}
#[derive(Copy, Clone)]
enum PrefixSetting {
Prefix,
NoPrefix,
}
fn build_dfa_with_setting(query: &str, setting: PrefixSetting) -> DFA {
use self::PrefixSetting::{Prefix, NoPrefix};
match query.len() {
0 ..= 4 => match setting {
Prefix => LEVDIST0.build_prefix_dfa(query),
NoPrefix => LEVDIST0.build_dfa(query),
},
5 ..= 8 => match setting {
Prefix => LEVDIST1.build_prefix_dfa(query),
NoPrefix => LEVDIST1.build_dfa(query),
},
_ => match setting {
Prefix => LEVDIST2.build_prefix_dfa(query),
NoPrefix => LEVDIST2.build_dfa(query),
},
}
}
pub fn build_prefix_dfa(query: &str) -> DFA {
build_dfa_with_setting(query, PrefixSetting::Prefix)
}
pub fn build_dfa(query: &str) -> DFA {
build_dfa_with_setting(query, PrefixSetting::NoPrefix)
}

View File

@ -1,16 +0,0 @@
use std::cmp::Ordering;
use crate::criterion::Criterion;
use crate::RawDocument;
#[derive(Debug, Clone, Copy)]
pub struct DocumentId;
impl Criterion for DocumentId {
fn evaluate(&self, lhs: &RawDocument, rhs: &RawDocument) -> Ordering {
lhs.id.cmp(&rhs.id)
}
fn name(&self) -> &'static str {
"DocumentId"
}
}

View File

@ -1,65 +0,0 @@
use std::cmp::Ordering;
use slice_group_by::GroupBy;
use crate::criterion::Criterion;
use crate::RawDocument;
#[inline]
fn number_exact_matches(query_index: &[u32], is_exact: &[bool]) -> usize {
let mut count = 0;
let mut index = 0;
for group in query_index.linear_group() {
let len = group.len();
count += is_exact[index..index + len].contains(&true) as usize;
index += len;
}
count
}
#[derive(Debug, Clone, Copy)]
pub struct Exact;
impl Criterion for Exact {
fn evaluate(&self, lhs: &RawDocument, rhs: &RawDocument) -> Ordering {
let lhs = {
let query_index = lhs.query_index();
let is_exact = lhs.is_exact();
number_exact_matches(query_index, is_exact)
};
let rhs = {
let query_index = rhs.query_index();
let is_exact = rhs.is_exact();
number_exact_matches(query_index, is_exact)
};
lhs.cmp(&rhs).reverse()
}
fn name(&self) -> &'static str {
"Exact"
}
}
#[cfg(test)]
mod tests {
use super::*;
// typing: "soulier"
//
// doc0: "Soulier bleu"
// doc1: "souliereres rouge"
#[test]
fn easy_case() {
let query_index0 = &[0];
let is_exact0 = &[true];
let query_index1 = &[0];
let is_exact1 = &[false];
let doc0 = number_exact_matches(query_index0, is_exact0);
let doc1 = number_exact_matches(query_index1, is_exact1);
assert_eq!(doc0.cmp(&doc1).reverse(), Ordering::Less);
}
}

View File

@ -1,120 +0,0 @@
mod sum_of_typos;
mod number_of_words;
mod words_proximity;
mod sum_of_words_attribute;
mod sum_of_words_position;
mod exact;
mod document_id;
use std::cmp::Ordering;
use crate::RawDocument;
pub use self::{
sum_of_typos::SumOfTypos,
number_of_words::NumberOfWords,
words_proximity::WordsProximity,
sum_of_words_attribute::SumOfWordsAttribute,
sum_of_words_position::SumOfWordsPosition,
exact::Exact,
document_id::DocumentId,
};
pub trait Criterion: Send + Sync {
fn evaluate(&self, lhs: &RawDocument, rhs: &RawDocument) -> Ordering;
fn name(&self) -> &'static str;
#[inline]
fn eq(&self, lhs: &RawDocument, rhs: &RawDocument) -> bool {
self.evaluate(lhs, rhs) == Ordering::Equal
}
}
impl<'a, T: Criterion + ?Sized + Send + Sync> Criterion for &'a T {
fn evaluate(&self, lhs: &RawDocument, rhs: &RawDocument) -> Ordering {
(**self).evaluate(lhs, rhs)
}
fn name(&self) -> &'static str {
(**self).name()
}
fn eq(&self, lhs: &RawDocument, rhs: &RawDocument) -> bool {
(**self).eq(lhs, rhs)
}
}
impl<T: Criterion + ?Sized> Criterion for Box<T> {
fn evaluate(&self, lhs: &RawDocument, rhs: &RawDocument) -> Ordering {
(**self).evaluate(lhs, rhs)
}
fn name(&self) -> &'static str {
(**self).name()
}
fn eq(&self, lhs: &RawDocument, rhs: &RawDocument) -> bool {
(**self).eq(lhs, rhs)
}
}
#[derive(Default)]
pub struct CriteriaBuilder<'a> {
inner: Vec<Box<dyn Criterion + 'a>>
}
impl<'a> CriteriaBuilder<'a>
{
pub fn new() -> CriteriaBuilder<'a> {
CriteriaBuilder { inner: Vec::new() }
}
pub fn with_capacity(capacity: usize) -> CriteriaBuilder<'a> {
CriteriaBuilder { inner: Vec::with_capacity(capacity) }
}
pub fn reserve(&mut self, additional: usize) {
self.inner.reserve(additional)
}
pub fn add<C: 'a>(mut self, criterion: C) -> CriteriaBuilder<'a>
where C: Criterion,
{
self.push(criterion);
self
}
pub fn push<C: 'a>(&mut self, criterion: C)
where C: Criterion,
{
self.inner.push(Box::new(criterion));
}
pub fn build(self) -> Criteria<'a> {
Criteria { inner: self.inner }
}
}
pub struct Criteria<'a> {
inner: Vec<Box<dyn Criterion + 'a>>,
}
impl<'a> Default for Criteria<'a> {
fn default() -> Self {
CriteriaBuilder::with_capacity(7)
.add(SumOfTypos)
.add(NumberOfWords)
.add(WordsProximity)
.add(SumOfWordsAttribute)
.add(SumOfWordsPosition)
.add(Exact)
.add(DocumentId)
.build()
}
}
impl<'a> AsRef<[Box<dyn Criterion + 'a>]> for Criteria<'a> {
fn as_ref(&self) -> &[Box<dyn Criterion + 'a>] {
&self.inner
}
}

View File

@ -1,31 +0,0 @@
use std::cmp::Ordering;
use slice_group_by::GroupBy;
use crate::criterion::Criterion;
use crate::RawDocument;
#[inline]
fn number_of_query_words(query_index: &[u32]) -> usize {
query_index.linear_group().count()
}
#[derive(Debug, Clone, Copy)]
pub struct NumberOfWords;
impl Criterion for NumberOfWords {
fn evaluate(&self, lhs: &RawDocument, rhs: &RawDocument) -> Ordering {
let lhs = {
let query_index = lhs.query_index();
number_of_query_words(query_index)
};
let rhs = {
let query_index = rhs.query_index();
number_of_query_words(query_index)
};
lhs.cmp(&rhs).reverse()
}
fn name(&self) -> &'static str {
"NumberOfWords"
}
}

View File

@ -1,116 +0,0 @@
use std::cmp::Ordering;
use slice_group_by::GroupBy;
use crate::criterion::Criterion;
use crate::RawDocument;
// This function is a wrong logarithmic 10 function.
// It is safe to panic on input number higher than 3,
// the number of typos is never bigger than that.
#[inline]
fn custom_log10(n: u8) -> f32 {
match n {
0 => 0.0, // log(1)
1 => 0.30102, // log(2)
2 => 0.47712, // log(3)
3 => 0.60205, // log(4)
_ => panic!("invalid number"),
}
}
#[inline]
fn sum_matches_typos(query_index: &[u32], distance: &[u8]) -> usize {
let mut number_words: usize = 0;
let mut sum_typos = 0.0;
let mut index = 0;
for group in query_index.linear_group() {
sum_typos += custom_log10(distance[index]);
number_words += 1;
index += group.len();
}
(number_words as f32 / (sum_typos + 1.0) * 1000.0) as usize
}
#[derive(Debug, Clone, Copy)]
pub struct SumOfTypos;
impl Criterion for SumOfTypos {
fn evaluate(&self, lhs: &RawDocument, rhs: &RawDocument) -> Ordering {
let lhs = {
let query_index = lhs.query_index();
let distance = lhs.distance();
sum_matches_typos(query_index, distance)
};
let rhs = {
let query_index = rhs.query_index();
let distance = rhs.distance();
sum_matches_typos(query_index, distance)
};
lhs.cmp(&rhs).reverse()
}
fn name(&self) -> &'static str {
"SumOfTypos"
}
}
#[cfg(test)]
mod tests {
use super::*;
// typing: "Geox CEO"
//
// doc0: "Geox SpA: CEO and Executive"
// doc1: "Mt. Gox CEO Resigns From Bitcoin Foundation"
#[test]
fn one_typo_reference() {
let query_index0 = &[0, 1];
let distance0 = &[0, 0];
let query_index1 = &[0, 1];
let distance1 = &[1, 0];
let doc0 = sum_matches_typos(query_index0, distance0);
let doc1 = sum_matches_typos(query_index1, distance1);
assert_eq!(doc0.cmp(&doc1).reverse(), Ordering::Less);
}
// typing: "bouton manchette"
//
// doc0: "bouton manchette"
// doc1: "bouton"
#[test]
fn no_typo() {
let query_index0 = &[0, 1];
let distance0 = &[0, 0];
let query_index1 = &[0];
let distance1 = &[0];
let doc0 = sum_matches_typos(query_index0, distance0);
let doc1 = sum_matches_typos(query_index1, distance1);
assert_eq!(doc0.cmp(&doc1).reverse(), Ordering::Less);
}
// typing: "bouton manchztte"
//
// doc0: "bouton manchette"
// doc1: "bouton"
#[test]
fn one_typo() {
let query_index0 = &[0, 1];
let distance0 = &[0, 1];
let query_index1 = &[0];
let distance1 = &[0];
let doc0 = sum_matches_typos(query_index0, distance0);
let doc1 = sum_matches_typos(query_index1, distance1);
assert_eq!(doc0.cmp(&doc1).reverse(), Ordering::Less);
}
}

View File

@ -1,64 +0,0 @@
use std::cmp::Ordering;
use slice_group_by::GroupBy;
use crate::criterion::Criterion;
use crate::RawDocument;
#[inline]
fn sum_matches_attributes(query_index: &[u32], attribute: &[u16]) -> usize {
let mut sum_attributes = 0;
let mut index = 0;
for group in query_index.linear_group() {
sum_attributes += attribute[index] as usize;
index += group.len();
}
sum_attributes
}
#[derive(Debug, Clone, Copy)]
pub struct SumOfWordsAttribute;
impl Criterion for SumOfWordsAttribute {
fn evaluate(&self, lhs: &RawDocument, rhs: &RawDocument) -> Ordering {
let lhs = {
let query_index = lhs.query_index();
let attribute = lhs.attribute();
sum_matches_attributes(query_index, attribute)
};
let rhs = {
let query_index = rhs.query_index();
let attribute = rhs.attribute();
sum_matches_attributes(query_index, attribute)
};
lhs.cmp(&rhs)
}
fn name(&self) -> &'static str {
"SumOfWordsAttribute"
}
}
#[cfg(test)]
mod tests {
use super::*;
// typing: "soulier"
//
// doc0: { 0. "Soulier bleu", 1. "bla bla bla" }
// doc1: { 0. "Botte rouge", 1. "Soulier en cuir" }
#[test]
fn title_vs_description() {
let query_index0 = &[0];
let attribute0 = &[0];
let query_index1 = &[0];
let attribute1 = &[1];
let doc0 = sum_matches_attributes(query_index0, attribute0);
let doc1 = sum_matches_attributes(query_index1, attribute1);
assert_eq!(doc0.cmp(&doc1), Ordering::Less);
}
}

View File

@ -1,64 +0,0 @@
use std::cmp::Ordering;
use slice_group_by::GroupBy;
use crate::criterion::Criterion;
use crate::RawDocument;
#[inline]
fn sum_matches_attribute_index(query_index: &[u32], word_index: &[u16]) -> usize {
let mut sum_word_index = 0;
let mut index = 0;
for group in query_index.linear_group() {
sum_word_index += word_index[index] as usize;
index += group.len();
}
sum_word_index
}
#[derive(Debug, Clone, Copy)]
pub struct SumOfWordsPosition;
impl Criterion for SumOfWordsPosition {
fn evaluate(&self, lhs: &RawDocument, rhs: &RawDocument) -> Ordering {
let lhs = {
let query_index = lhs.query_index();
let word_index = lhs.word_index();
sum_matches_attribute_index(query_index, word_index)
};
let rhs = {
let query_index = rhs.query_index();
let word_index = rhs.word_index();
sum_matches_attribute_index(query_index, word_index)
};
lhs.cmp(&rhs)
}
fn name(&self) -> &'static str {
"SumOfWordsPosition"
}
}
#[cfg(test)]
mod tests {
use super::*;
// typing: "soulier"
//
// doc0: "Soulier bleu"
// doc1: "Botte rouge et soulier noir"
#[test]
fn easy_case() {
let query_index0 = &[0];
let word_index0 = &[0];
let query_index1 = &[0];
let word_index1 = &[3];
let doc0 = sum_matches_attribute_index(query_index0, word_index0);
let doc1 = sum_matches_attribute_index(query_index1, word_index1);
assert_eq!(doc0.cmp(&doc1), Ordering::Less);
}
}

View File

@ -1,155 +0,0 @@
use std::cmp::{self, Ordering};
use slice_group_by::GroupBy;
use crate::criterion::Criterion;
use crate::RawDocument;
const MAX_DISTANCE: u16 = 8;
#[inline]
fn clone_tuple<T: Clone, U: Clone>((a, b): (&T, &U)) -> (T, U) {
(a.clone(), b.clone())
}
fn index_proximity(lhs: u16, rhs: u16) -> u16 {
if lhs < rhs {
cmp::min(rhs - lhs, MAX_DISTANCE)
} else {
cmp::min(lhs - rhs, MAX_DISTANCE) + 1
}
}
fn attribute_proximity((lattr, lwi): (u16, u16), (rattr, rwi): (u16, u16)) -> u16 {
if lattr != rattr { return MAX_DISTANCE }
index_proximity(lwi, rwi)
}
fn min_proximity((lattr, lwi): (&[u16], &[u16]), (rattr, rwi): (&[u16], &[u16])) -> u16 {
let mut min_prox = u16::max_value();
for a in lattr.iter().zip(lwi) {
for b in rattr.iter().zip(rwi) {
let a = clone_tuple(a);
let b = clone_tuple(b);
min_prox = cmp::min(min_prox, attribute_proximity(a, b));
}
}
min_prox
}
fn matches_proximity(
query_index: &[u32],
distance: &[u8],
attribute: &[u16],
word_index: &[u16],
) -> u16
{
let mut query_index_groups = query_index.linear_group();
let mut proximity = 0;
let mut index = 0;
let get_attr_wi = |index: usize, group_len: usize| {
// retrieve the first distance group (with the lowest values)
let len = distance[index..index + group_len].linear_group().next().unwrap().len();
let rattr = &attribute[index..index + len];
let rwi = &word_index[index..index + len];
(rattr, rwi)
};
let mut last = query_index_groups.next().map(|group| {
let attr_wi = get_attr_wi(index, group.len());
index += group.len();
attr_wi
});
// iter by windows of size 2
while let (Some(lhs), Some(rhs)) = (last, query_index_groups.next()) {
let attr_wi = get_attr_wi(index, rhs.len());
proximity += min_proximity(lhs, attr_wi);
last = Some(attr_wi);
index += rhs.len();
}
proximity
}
#[derive(Debug, Clone, Copy)]
pub struct WordsProximity;
impl Criterion for WordsProximity {
fn evaluate(&self, lhs: &RawDocument, rhs: &RawDocument) -> Ordering {
let lhs = {
let query_index = lhs.query_index();
let distance = lhs.distance();
let attribute = lhs.attribute();
let word_index = lhs.word_index();
matches_proximity(query_index, distance, attribute, word_index)
};
let rhs = {
let query_index = rhs.query_index();
let distance = rhs.distance();
let attribute = rhs.attribute();
let word_index = rhs.word_index();
matches_proximity(query_index, distance, attribute, word_index)
};
lhs.cmp(&rhs)
}
fn name(&self) -> &'static str {
"WordsProximity"
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn three_different_attributes() {
// "soup" "of the" "the day"
//
// { id: 0, attr: 0, attr_index: 0 }
// { id: 1, attr: 1, attr_index: 0 }
// { id: 2, attr: 1, attr_index: 1 }
// { id: 2, attr: 2, attr_index: 0 }
// { id: 3, attr: 3, attr_index: 1 }
let query_index = &[0, 1, 2, 2, 3];
let distance = &[0, 0, 0, 0, 0];
let attribute = &[0, 1, 1, 2, 3];
let word_index = &[0, 0, 1, 0, 1];
// soup -> of = 8
// + of -> the = 1
// + the -> day = 8 (not 1)
assert_eq!(matches_proximity(query_index, distance, attribute, word_index), 17);
}
#[test]
fn two_different_attributes() {
// "soup day" "soup of the day"
//
// { id: 0, attr: 0, attr_index: 0 }
// { id: 0, attr: 1, attr_index: 0 }
// { id: 1, attr: 1, attr_index: 1 }
// { id: 2, attr: 1, attr_index: 2 }
// { id: 3, attr: 0, attr_index: 1 }
// { id: 3, attr: 1, attr_index: 3 }
let query_index = &[0, 0, 1, 2, 3, 3];
let distance = &[0, 0, 0, 0, 0, 0];
let attribute = &[0, 1, 1, 1, 0, 1];
let word_index = &[0, 0, 1, 2, 1, 3];
// soup -> of = 1
// + of -> the = 1
// + the -> day = 1
assert_eq!(matches_proximity(query_index, distance, attribute, word_index), 3);
}
}

View File

@ -1,144 +0,0 @@
#![feature(checked_duration_since)]
#[cfg(test)]
#[macro_use] extern crate assert_matches;
mod automaton;
mod distinct_map;
mod query_builder;
mod query_enhancer;
mod raw_document;
mod reordered_attrs;
mod store;
pub mod criterion;
use serde::{Serialize, Deserialize};
use zerocopy::{AsBytes, FromBytes};
use self::raw_document::raw_documents_from;
pub use self::query_builder::{QueryBuilder, DistinctQueryBuilder, normalize_str};
pub use self::raw_document::RawDocument;
pub use self::store::Store;
/// Represent an internally generated document unique identifier.
///
/// It is used to inform the database the document you want to deserialize.
/// Helpful for custom ranking.
#[derive(Debug, Copy, Clone, Eq, PartialEq, PartialOrd, Ord, Hash)]
#[derive(Serialize, Deserialize)]
#[derive(AsBytes, FromBytes)]
#[repr(C)]
pub struct DocumentId(pub u64);
/// This structure represent the position of a word
/// in a document and its attributes.
///
/// This is stored in the map, generated at index time,
/// extracted and interpreted at search time.
#[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
#[derive(AsBytes, FromBytes)]
#[repr(C)]
pub struct DocIndex {
/// The document identifier where the word was found.
pub document_id: DocumentId,
/// The attribute in the document where the word was found
/// along with the index in it.
pub attribute: u16,
pub word_index: u16,
/// The position in bytes where the word was found
/// along with the length of it.
///
/// It informs on the original word area in the text indexed
/// without needing to run the tokenizer again.
pub char_index: u16,
pub char_length: u16,
}
/// This structure represent a matching word with informations
/// on the location of the word in the document.
///
/// The order of the field is important because it defines
/// the way these structures are ordered between themselves.
#[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub struct Highlight {
/// The attribute in the document where the word was found
/// along with the index in it.
pub attribute: u16,
/// The position in bytes where the word was found.
///
/// It informs on the original word area in the text indexed
/// without needing to run the tokenizer again.
pub char_index: u16,
/// The length in bytes of the found word.
///
/// It informs on the original word area in the text indexed
/// without needing to run the tokenizer again.
pub char_length: u16,
}
#[doc(hidden)]
#[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub struct TmpMatch {
pub query_index: u32,
pub distance: u8,
pub attribute: u16,
pub word_index: u16,
pub is_exact: bool,
}
#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub struct Document {
pub id: DocumentId,
pub highlights: Vec<Highlight>,
#[cfg(test)]
pub matches: Vec<TmpMatch>,
}
impl Document {
#[cfg(not(test))]
fn from_raw(raw: RawDocument) -> Document {
Document { id: raw.id, highlights: raw.highlights }
}
#[cfg(test)]
fn from_raw(raw: RawDocument) -> Document {
let len = raw.query_index().len();
let mut matches = Vec::with_capacity(len);
let query_index = raw.query_index();
let distance = raw.distance();
let attribute = raw.attribute();
let word_index = raw.word_index();
let is_exact = raw.is_exact();
for i in 0..len {
let match_ = TmpMatch {
query_index: query_index[i],
distance: distance[i],
attribute: attribute[i],
word_index: word_index[i],
is_exact: is_exact[i],
};
matches.push(match_);
}
Document { id: raw.id, matches, highlights: raw.highlights }
}
}
#[cfg(test)]
mod tests {
use super::*;
use std::mem;
#[test]
fn docindex_mem_size() {
assert_eq!(mem::size_of::<DocIndex>(), 16);
}
}

File diff suppressed because it is too large Load Diff

View File

@ -1,398 +0,0 @@
use std::ops::Range;
use std::cmp::Ordering::{Less, Greater, Equal};
/// Return `true` if the specified range can accept the given replacements words.
/// Returns `false` if the replacements words are already present in the original query
/// or if there is fewer replacement words than the range to replace.
//
//
// ## Ignored because already present in original
//
// new york city subway
// -------- ^^^^
// / \
// [new york city]
//
//
// ## Ignored because smaller than the original
//
// new york city subway
// -------------
// \ /
// [new york]
//
//
// ## Accepted because bigger than the original
//
// NYC subway
// ---
// / \
// / \
// / \
// / \
// / \
// [new york city]
//
fn rewrite_range_with<S, T>(query: &[S], range: Range<usize>, words: &[T]) -> bool
where S: AsRef<str>,
T: AsRef<str>,
{
if words.len() <= range.len() {
// there is fewer or equal replacement words
// than there is already in the replaced range
return false
}
// retrieve the part to rewrite but with the length
// of the replacement part
let original = query.iter().skip(range.start).take(words.len());
// check if the original query doesn't already contain
// the replacement words
!original.map(AsRef::as_ref).eq(words.iter().map(AsRef::as_ref))
}
type Origin = usize;
type RealLength = usize;
struct FakeIntervalTree {
intervals: Vec<(Range<usize>, (Origin, RealLength))>,
}
impl FakeIntervalTree {
fn new(mut intervals: Vec<(Range<usize>, (Origin, RealLength))>) -> FakeIntervalTree {
intervals.sort_unstable_by_key(|(r, _)| (r.start, r.end));
FakeIntervalTree { intervals }
}
fn query(&self, point: usize) -> Option<(Range<usize>, (Origin, RealLength))> {
let element = self.intervals.binary_search_by(|(r, _)| {
if point >= r.start {
if point < r.end { Equal } else { Less }
} else { Greater }
});
let n = match element { Ok(n) => n, Err(n) => n };
match self.intervals.get(n) {
Some((range, value)) if range.contains(&point) => Some((range.clone(), *value)),
_otherwise => None,
}
}
}
pub struct QueryEnhancerBuilder<'a, S> {
query: &'a [S],
origins: Vec<usize>,
real_to_origin: Vec<(Range<usize>, (Origin, RealLength))>,
}
impl<S: AsRef<str>> QueryEnhancerBuilder<'_, S> {
pub fn new(query: &[S]) -> QueryEnhancerBuilder<S> {
// we initialize origins query indices based on their positions
let origins: Vec<_> = (0..query.len() + 1).collect();
let real_to_origin = origins.iter().map(|&o| (o..o+1, (o, 1))).collect();
QueryEnhancerBuilder { query, origins, real_to_origin }
}
/// Update the final real to origin query indices mapping.
///
/// `range` is the original words range that this `replacement` words replace
/// and `real` is the first real query index of these replacement words.
pub fn declare<T>(&mut self, range: Range<usize>, real: usize, replacement: &[T])
where T: AsRef<str>,
{
// check if the range of original words
// can be rewritten with the replacement words
if rewrite_range_with(self.query, range.clone(), replacement) {
// this range can be replaced so we need to
// modify the origins accordingly
let offset = replacement.len() - range.len();
let previous_padding = self.origins[range.end - 1];
let current_offset = (self.origins[range.end] - 1) - previous_padding;
let diff = offset.saturating_sub(current_offset);
self.origins[range.end] += diff;
for r in &mut self.origins[range.end + 1..] {
*r += diff;
}
}
// we need to store the real number and origins relations
// this way it will be possible to know by how many
// we need to pad real query indices
let real_range = real..real + replacement.len().max(range.len());
let real_length = replacement.len();
self.real_to_origin.push((real_range, (range.start, real_length)));
}
pub fn build(self) -> QueryEnhancer {
QueryEnhancer {
origins: self.origins,
real_to_origin: FakeIntervalTree::new(self.real_to_origin),
}
}
}
pub struct QueryEnhancer {
origins: Vec<usize>,
real_to_origin: FakeIntervalTree,
}
impl QueryEnhancer {
/// Returns the query indices to use to replace this real query index.
pub fn replacement(&self, real: u32) -> Range<u32> {
let real = real as usize;
// query the fake interval tree with the real query index
let (range, (origin, real_length)) =
self.real_to_origin
.query(real)
.expect("real has never been declared");
// if `real` is the end bound of the range
if (range.start + real_length - 1) == real {
let mut count = range.len();
let mut new_origin = origin;
for (i, slice) in self.origins[new_origin..].windows(2).enumerate() {
let len = slice[1] - slice[0];
count = count.saturating_sub(len);
if count == 0 { new_origin = origin + i; break }
}
let n = real - range.start;
let start = self.origins[origin];
let end = self.origins[new_origin + 1];
let remaining = (end - start) - n;
Range { start: (start + n) as u32, end: (start + n + remaining) as u32 }
} else {
// just return the origin along with
// the real position of the word
let n = real as usize - range.start;
let origin = self.origins[origin];
Range { start: (origin + n) as u32, end: (origin + n + 1) as u32 }
}
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn original_unmodified() {
let query = ["new", "york", "city", "subway"];
// 0 1 2 3
let mut builder = QueryEnhancerBuilder::new(&query);
// new york = new york city
builder.declare(0..2, 4, &["new", "york", "city"]);
// ^ 4 5 6
let enhancer = builder.build();
assert_eq!(enhancer.replacement(0), 0..1); // new
assert_eq!(enhancer.replacement(1), 1..2); // york
assert_eq!(enhancer.replacement(2), 2..3); // city
assert_eq!(enhancer.replacement(3), 3..4); // subway
assert_eq!(enhancer.replacement(4), 0..1); // new
assert_eq!(enhancer.replacement(5), 1..2); // york
assert_eq!(enhancer.replacement(6), 2..3); // city
}
#[test]
fn simple_growing() {
let query = ["new", "york", "subway"];
// 0 1 2
let mut builder = QueryEnhancerBuilder::new(&query);
// new york = new york city
builder.declare(0..2, 3, &["new", "york", "city"]);
// ^ 3 4 5
let enhancer = builder.build();
assert_eq!(enhancer.replacement(0), 0..1); // new
assert_eq!(enhancer.replacement(1), 1..3); // york
assert_eq!(enhancer.replacement(2), 3..4); // subway
assert_eq!(enhancer.replacement(3), 0..1); // new
assert_eq!(enhancer.replacement(4), 1..2); // york
assert_eq!(enhancer.replacement(5), 2..3); // city
}
#[test]
fn same_place_growings() {
let query = ["NY", "subway"];
// 0 1
let mut builder = QueryEnhancerBuilder::new(&query);
// NY = new york
builder.declare(0..1, 2, &["new", "york"]);
// ^ 2 3
// NY = new york city
builder.declare(0..1, 4, &["new", "york", "city"]);
// ^ 4 5 6
// NY = NYC
builder.declare(0..1, 7, &["NYC"]);
// ^ 7
// NY = new york city
builder.declare(0..1, 8, &["new", "york", "city"]);
// ^ 8 9 10
// subway = underground train
builder.declare(1..2, 11, &["underground", "train"]);
// ^ 11 12
let enhancer = builder.build();
assert_eq!(enhancer.replacement(0), 0..3); // NY
assert_eq!(enhancer.replacement(1), 3..5); // subway
assert_eq!(enhancer.replacement(2), 0..1); // new
assert_eq!(enhancer.replacement(3), 1..3); // york
assert_eq!(enhancer.replacement(4), 0..1); // new
assert_eq!(enhancer.replacement(5), 1..2); // york
assert_eq!(enhancer.replacement(6), 2..3); // city
assert_eq!(enhancer.replacement(7), 0..3); // NYC
assert_eq!(enhancer.replacement(8), 0..1); // new
assert_eq!(enhancer.replacement(9), 1..2); // york
assert_eq!(enhancer.replacement(10), 2..3); // city
assert_eq!(enhancer.replacement(11), 3..4); // underground
assert_eq!(enhancer.replacement(12), 4..5); // train
}
#[test]
fn bigger_growing() {
let query = ["NYC", "subway"];
// 0 1
let mut builder = QueryEnhancerBuilder::new(&query);
// NYC = new york city
builder.declare(0..1, 2, &["new", "york", "city"]);
// ^ 2 3 4
let enhancer = builder.build();
assert_eq!(enhancer.replacement(0), 0..3); // NYC
assert_eq!(enhancer.replacement(1), 3..4); // subway
assert_eq!(enhancer.replacement(2), 0..1); // new
assert_eq!(enhancer.replacement(3), 1..2); // york
assert_eq!(enhancer.replacement(4), 2..3); // city
}
#[test]
fn middle_query_growing() {
let query = ["great", "awesome", "NYC", "subway"];
// 0 1 2 3
let mut builder = QueryEnhancerBuilder::new(&query);
// NYC = new york city
builder.declare(2..3, 4, &["new", "york", "city"]);
// ^ 4 5 6
let enhancer = builder.build();
assert_eq!(enhancer.replacement(0), 0..1); // great
assert_eq!(enhancer.replacement(1), 1..2); // awesome
assert_eq!(enhancer.replacement(2), 2..5); // NYC
assert_eq!(enhancer.replacement(3), 5..6); // subway
assert_eq!(enhancer.replacement(4), 2..3); // new
assert_eq!(enhancer.replacement(5), 3..4); // york
assert_eq!(enhancer.replacement(6), 4..5); // city
}
#[test]
fn end_query_growing() {
let query = ["NYC", "subway"];
// 0 1
let mut builder = QueryEnhancerBuilder::new(&query);
// NYC = new york city
builder.declare(1..2, 2, &["underground", "train"]);
// ^ 2 3
let enhancer = builder.build();
assert_eq!(enhancer.replacement(0), 0..1); // NYC
assert_eq!(enhancer.replacement(1), 1..3); // subway
assert_eq!(enhancer.replacement(2), 1..2); // underground
assert_eq!(enhancer.replacement(3), 2..3); // train
}
#[test]
fn multiple_growings() {
let query = ["great", "awesome", "NYC", "subway"];
// 0 1 2 3
let mut builder = QueryEnhancerBuilder::new(&query);
// NYC = new york city
builder.declare(2..3, 4, &["new", "york", "city"]);
// ^ 4 5 6
// subway = underground train
builder.declare(3..4, 7, &["underground", "train"]);
// ^ 7 8
let enhancer = builder.build();
assert_eq!(enhancer.replacement(0), 0..1); // great
assert_eq!(enhancer.replacement(1), 1..2); // awesome
assert_eq!(enhancer.replacement(2), 2..5); // NYC
assert_eq!(enhancer.replacement(3), 5..7); // subway
assert_eq!(enhancer.replacement(4), 2..3); // new
assert_eq!(enhancer.replacement(5), 3..4); // york
assert_eq!(enhancer.replacement(6), 4..5); // city
assert_eq!(enhancer.replacement(7), 5..6); // underground
assert_eq!(enhancer.replacement(8), 6..7); // train
}
#[test]
fn multiple_probable_growings() {
let query = ["great", "awesome", "NYC", "subway"];
// 0 1 2 3
let mut builder = QueryEnhancerBuilder::new(&query);
// NYC = new york city
builder.declare(2..3, 4, &["new", "york", "city"]);
// ^ 4 5 6
// subway = underground train
builder.declare(3..4, 7, &["underground", "train"]);
// ^ 7 8
// great awesome = good
builder.declare(0..2, 9, &["good"]);
// ^ 9
// awesome NYC = NY
builder.declare(1..3, 10, &["NY"]);
// ^^ 10
// NYC subway = metro
builder.declare(2..4, 11, &["metro"]);
// ^^ 11
let enhancer = builder.build();
assert_eq!(enhancer.replacement(0), 0..1); // great
assert_eq!(enhancer.replacement(1), 1..2); // awesome
assert_eq!(enhancer.replacement(2), 2..5); // NYC
assert_eq!(enhancer.replacement(3), 5..7); // subway
assert_eq!(enhancer.replacement(4), 2..3); // new
assert_eq!(enhancer.replacement(5), 3..4); // york
assert_eq!(enhancer.replacement(6), 4..5); // city
assert_eq!(enhancer.replacement(7), 5..6); // underground
assert_eq!(enhancer.replacement(8), 6..7); // train
assert_eq!(enhancer.replacement(9), 0..2); // good
assert_eq!(enhancer.replacement(10), 1..5); // NY
assert_eq!(enhancer.replacement(11), 2..5); // metro
}
}

View File

@ -1,141 +0,0 @@
use std::sync::Arc;
use std::fmt;
use sdset::SetBuf;
use slice_group_by::GroupBy;
use crate::{TmpMatch, DocumentId, Highlight};
#[derive(Clone)]
pub struct RawDocument {
pub id: DocumentId,
pub matches: SharedMatches,
pub highlights: Vec<Highlight>,
}
impl RawDocument {
fn new(id: DocumentId, matches: SharedMatches, highlights: Vec<Highlight>) -> RawDocument {
RawDocument { id, matches, highlights }
}
pub fn query_index(&self) -> &[u32] {
let r = self.matches.range;
// it is safe because construction/modifications
// can only be done in this module
unsafe { &self.matches.matches.query_index.get_unchecked(r.start..r.end) }
}
pub fn distance(&self) -> &[u8] {
let r = self.matches.range;
// it is safe because construction/modifications
// can only be done in this module
unsafe { &self.matches.matches.distance.get_unchecked(r.start..r.end) }
}
pub fn attribute(&self) -> &[u16] {
let r = self.matches.range;
// it is safe because construction/modifications
// can only be done in this module
unsafe { &self.matches.matches.attribute.get_unchecked(r.start..r.end) }
}
pub fn word_index(&self) -> &[u16] {
let r = self.matches.range;
// it is safe because construction/modifications
// can only be done in this module
unsafe { &self.matches.matches.word_index.get_unchecked(r.start..r.end) }
}
pub fn is_exact(&self) -> &[bool] {
let r = self.matches.range;
// it is safe because construction/modifications
// can only be done in this module
unsafe { &self.matches.matches.is_exact.get_unchecked(r.start..r.end) }
}
}
impl fmt::Debug for RawDocument {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
f.write_str("RawDocument {\r\n")?;
f.write_fmt(format_args!("{:>15}: {:?},\r\n", "id", self.id))?;
f.write_fmt(format_args!("{:>15}: {:^5?},\r\n", "query_index", self.query_index()))?;
f.write_fmt(format_args!("{:>15}: {:^5?},\r\n", "distance", self.distance()))?;
f.write_fmt(format_args!("{:>15}: {:^5?},\r\n", "attribute", self.attribute()))?;
f.write_fmt(format_args!("{:>15}: {:^5?},\r\n", "word_index", self.word_index()))?;
f.write_fmt(format_args!("{:>15}: {:^5?},\r\n", "is_exact", self.is_exact()))?;
f.write_str("}")?;
Ok(())
}
}
pub fn raw_documents_from(
matches: SetBuf<(DocumentId, TmpMatch)>,
highlights: SetBuf<(DocumentId, Highlight)>,
) -> Vec<RawDocument>
{
let mut docs_ranges: Vec<(_, Range, _)> = Vec::new();
let mut matches2 = Matches::with_capacity(matches.len());
let matches = matches.linear_group_by_key(|(id, _)| *id);
let highlights = highlights.linear_group_by_key(|(id, _)| *id);
for (mgroup, hgroup) in matches.zip(highlights) {
debug_assert_eq!(mgroup[0].0, hgroup[0].0);
let document_id = mgroup[0].0;
let start = docs_ranges.last().map(|(_, r, _)| r.end).unwrap_or(0);
let end = start + mgroup.len();
let highlights = hgroup.iter().map(|(_, h)| *h).collect();
docs_ranges.push((document_id, Range { start, end }, highlights));
matches2.extend_from_slice(mgroup);
}
let matches = Arc::new(matches2);
docs_ranges.into_iter().map(|(id, range, highlights)| {
let matches = SharedMatches { range, matches: matches.clone() };
RawDocument::new(id, matches, highlights)
}).collect()
}
#[derive(Debug, Copy, Clone)]
struct Range {
start: usize,
end: usize,
}
#[derive(Clone)]
pub struct SharedMatches {
range: Range,
matches: Arc<Matches>,
}
#[derive(Clone)]
struct Matches {
query_index: Vec<u32>,
distance: Vec<u8>,
attribute: Vec<u16>,
word_index: Vec<u16>,
is_exact: Vec<bool>,
}
impl Matches {
fn with_capacity(cap: usize) -> Matches {
Matches {
query_index: Vec::with_capacity(cap),
distance: Vec::with_capacity(cap),
attribute: Vec::with_capacity(cap),
word_index: Vec::with_capacity(cap),
is_exact: Vec::with_capacity(cap),
}
}
fn extend_from_slice(&mut self, matches: &[(DocumentId, TmpMatch)]) {
for (_, match_) in matches {
self.query_index.push(match_.query_index);
self.distance.push(match_.distance);
self.attribute.push(match_.attribute);
self.word_index.push(match_.word_index);
self.is_exact.push(match_.is_exact);
}
}
}

View File

@ -1,24 +0,0 @@
#[derive(Default, Clone)]
pub struct ReorderedAttrs {
count: usize,
reorders: Vec<Option<u16>>,
}
impl ReorderedAttrs {
pub fn new() -> ReorderedAttrs {
ReorderedAttrs { count: 0, reorders: Vec::new() }
}
pub fn insert_attribute(&mut self, attribute: u16) {
self.reorders.resize(attribute as usize + 1, None);
self.reorders[attribute as usize] = Some(self.count as u16);
self.count += 1;
}
pub fn get(&self, attribute: u16) -> Option<u16> {
match self.reorders.get(attribute as usize) {
Some(Some(attribute)) => Some(*attribute),
_ => None,
}
}
}

View File

@ -1,34 +0,0 @@
use std::error::Error;
use fst::Set;
use sdset::SetBuf;
use crate::DocIndex;
pub trait Store {
type Error: Error;
fn words(&self) -> Result<&Set, Self::Error>;
fn word_indexes(&self, word: &[u8]) -> Result<Option<SetBuf<DocIndex>>, Self::Error>;
fn synonyms(&self) -> Result<&Set, Self::Error>;
fn alternatives_to(&self, word: &[u8]) -> Result<Option<Set>, Self::Error>;
}
impl<T> Store for &'_ T where T: Store {
type Error = T::Error;
fn words(&self) -> Result<&Set, Self::Error> {
(*self).words()
}
fn word_indexes(&self, word: &[u8]) -> Result<Option<SetBuf<DocIndex>>, Self::Error> {
(*self).word_indexes(word)
}
fn synonyms(&self) -> Result<&Set, Self::Error> {
(*self).synonyms()
}
fn alternatives_to(&self, word: &[u8]) -> Result<Option<Set>, Self::Error> {
(*self).alternatives_to(word)
}
}

View File

@ -1,41 +0,0 @@
[package]
name = "meilidb-data"
version = "0.1.0"
authors = ["Kerollmops <renault.cle@gmail.com>"]
edition = "2018"
[dependencies]
arc-swap = "0.4.2"
bincode = "1.1.4"
crossbeam-channel = "0.3.9"
deunicode = "1.0.0"
hashbrown = { version = "0.6.0", features = ["serde"] }
log = "0.4.6"
meilidb-core = { path = "../meilidb-core", version = "0.1.0" }
meilidb-schema = { path = "../meilidb-schema", version = "0.1.0" }
meilidb-tokenizer = { path = "../meilidb-tokenizer", version = "0.1.0" }
ordered-float = { version = "1.0.2", features = ["serde"] }
rocksdb = "0.12.3"
sdset = "0.3.2"
serde = { version = "1.0.99", features = ["derive"] }
serde_json = "1.0.40"
siphasher = "0.3.0"
zerocopy = "0.2.8"
[dependencies.rmp-serde]
git = "https://github.com/3Hren/msgpack-rust.git"
rev = "40b3d48"
[dependencies.rmpv]
git = "https://github.com/3Hren/msgpack-rust.git"
rev = "40b3d48"
features = ["with-serde"]
[dependencies.fst]
git = "https://github.com/Kerollmops/fst.git"
branch = "arc-byte-slice"
[dev-dependencies]
tempfile = "3.1.0"
maplit = "1.0.2"
big_s = "1.0.2"

View File

@ -1,126 +0,0 @@
use std::sync::Arc;
use crossbeam_channel::{unbounded, Sender, Receiver};
use rocksdb::{DBVector, IteratorMode, Direction};
use crate::RocksDbResult;
#[derive(Clone)]
pub struct CfTree {
index: Arc<CfTreeInner>,
sender: Option<Sender<()>>,
}
struct CfTreeInner {
db: Arc<rocksdb::DB>,
name: String,
}
impl CfTree {
pub fn create(db: Arc<rocksdb::DB>, name: String) -> RocksDbResult<CfTree> {
let mut options = rocksdb::Options::default();
options.create_missing_column_families(true); // this doesn't work
if db.cf_handle(&name).is_none() {
let _cf = db.create_cf(&name, &options)?;
}
let index = Arc::new(CfTreeInner { db, name });
Ok(CfTree { index, sender: None })
}
pub fn create_with_subcription(
db: Arc<rocksdb::DB>,
name: String,
) -> RocksDbResult<(CfTree, Receiver<()>)>
{
let mut options = rocksdb::Options::default();
options.create_missing_column_families(true); // this doesn't work
if db.cf_handle(&name).is_none() {
let _cf = db.create_cf(&name, &options)?;
}
let index = Arc::new(CfTreeInner { db, name });
let (sender, receiver) = unbounded();
Ok((CfTree { index, sender: Some(sender) }, receiver))
}
pub fn insert<K, V>(&self, key: K, value: V) -> RocksDbResult<()>
where K: AsRef<[u8]>,
V: AsRef<[u8]>,
{
let cf = self.index.db.cf_handle(&self.index.name).unwrap();
let result = self.index.db.put_cf(cf, key, value);
if let Some(sender) = &self.sender {
let _err = sender.send(());
}
result
}
pub fn get<K>(&self, key: K) -> RocksDbResult<Option<DBVector>>
where K: AsRef<[u8]>,
{
let cf = self.index.db.cf_handle(&self.index.name).unwrap();
self.index.db.get_cf(cf, key)
}
pub fn remove<K>(&self, key: K) -> RocksDbResult<()>
where K: AsRef<[u8]>
{
let cf = self.index.db.cf_handle(&self.index.name).unwrap();
self.index.db.delete_cf(cf, key)
}
/// Start and end key range is inclusive on both bounds.
pub fn range<KS, KE>(&self, start: KS, end: KE) -> RocksDbResult<CfIter>
where KS: AsRef<[u8]>,
KE: AsRef<[u8]>,
{
let cf = self.index.db.cf_handle(&self.index.name).unwrap();
let mut iter = self.index.db.iterator_cf(cf, IteratorMode::Start)?;
iter.set_mode(IteratorMode::From(start.as_ref(), Direction::Forward));
let end_bound = Box::from(end.as_ref());
Ok(CfIter { iter, end_bound: Some(end_bound) })
}
pub fn iter(&self) -> RocksDbResult<CfIter> {
let cf = self.index.db.cf_handle(&self.index.name).unwrap();
let iter = self.index.db.iterator_cf(cf, IteratorMode::Start)?;
Ok(CfIter { iter, end_bound: None })
}
pub fn last_key(&self) -> RocksDbResult<Option<Box<[u8]>>> {
let cf = self.index.db.cf_handle(&self.index.name).unwrap();
let mut iter = self.index.db.iterator_cf(cf, IteratorMode::End)?;
Ok(iter.next().map(|(key, _)| key))
}
pub fn prefix_iterator<P>(&self, prefix: P) -> RocksDbResult<rocksdb::DBIterator>
where P: AsRef<[u8]>,
{
let cf = self.index.db.cf_handle(&self.index.name).unwrap();
self.index.db.prefix_iterator_cf(cf, prefix)
}
}
pub struct CfIter<'a> {
iter: rocksdb::DBIterator<'a>,
end_bound: Option<Box<[u8]>>,
}
impl Iterator for CfIter<'_> {
type Item = (Box<[u8]>, Box<[u8]>);
fn next(&mut self) -> Option<Self::Item> {
match (self.iter.next(), &self.end_bound) {
(Some((ref key, _)), Some(end_bound)) if key > end_bound => None,
(Some(entry), _) => Some(entry),
(None, _) => None,
}
}
}

View File

@ -1,73 +0,0 @@
use std::{error, fmt};
use crate::serde::SerializerError;
#[derive(Debug)]
pub enum Error {
SchemaDiffer,
SchemaMissing,
WordIndexMissing,
MissingDocumentId,
RocksDbError(rocksdb::Error),
FstError(fst::Error),
RmpDecodeError(rmp_serde::decode::Error),
RmpEncodeError(rmp_serde::encode::Error),
BincodeError(bincode::Error),
SerializerError(SerializerError),
}
impl From<rocksdb::Error> for Error {
fn from(error: rocksdb::Error) -> Error {
Error::RocksDbError(error)
}
}
impl From<fst::Error> for Error {
fn from(error: fst::Error) -> Error {
Error::FstError(error)
}
}
impl From<rmp_serde::decode::Error> for Error {
fn from(error: rmp_serde::decode::Error) -> Error {
Error::RmpDecodeError(error)
}
}
impl From<rmp_serde::encode::Error> for Error {
fn from(error: rmp_serde::encode::Error) -> Error {
Error::RmpEncodeError(error)
}
}
impl From<bincode::Error> for Error {
fn from(error: bincode::Error) -> Error {
Error::BincodeError(error)
}
}
impl From<SerializerError> for Error {
fn from(error: SerializerError) -> Error {
Error::SerializerError(error)
}
}
impl fmt::Display for Error {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
use self::Error::*;
match self {
SchemaDiffer => write!(f, "schemas differ"),
SchemaMissing => write!(f, "this index does not have a schema"),
WordIndexMissing => write!(f, "this index does not have a word index"),
MissingDocumentId => write!(f, "document id is missing"),
RocksDbError(e) => write!(f, "RocksDB error; {}", e),
FstError(e) => write!(f, "fst error; {}", e),
RmpDecodeError(e) => write!(f, "rmp decode error; {}", e),
RmpEncodeError(e) => write!(f, "rmp encode error; {}", e),
BincodeError(e) => write!(f, "bincode error; {}", e),
SerializerError(e) => write!(f, "serializer error; {}", e),
}
}
}
impl error::Error for Error { }

View File

@ -1,77 +0,0 @@
use std::ops::Deref;
use serde::de::DeserializeOwned;
use serde::Serialize;
use super::Error;
use std::marker::PhantomData;
#[derive(Clone)]
pub struct CommonIndex(pub crate::CfTree);
impl Deref for CommonIndex {
type Target = crate::CfTree;
fn deref(&self) -> &Self::Target {
&self.0
}
}
impl CommonIndex {
pub fn get<T, K>(&self, key: K) -> Result<Option<T>, Error>
where T: DeserializeOwned,
K: AsRef<[u8]>,
{
let raw = match self.0.get(key)? {
Some(raw) => raw,
None => return Ok(None),
};
let data = bincode::deserialize(&raw)?;
Ok(Some(data))
}
pub fn set<T, K>(&self, key: K, data: &T) -> Result<(), Error>
where T: Serialize,
K: AsRef<[u8]>,
{
let raw = bincode::serialize(data)?;
self.0.insert(key, &raw)?;
Ok(())
}
pub fn prefix_iterator<T, P>(&self, prefix: P) -> Result<SerializedIterator<T>, Error>
where T: DeserializeOwned,
P: AsRef<[u8]>,
{
let iter = self.0.prefix_iterator(prefix)?;
Ok(SerializedIterator { iter, _marker: PhantomData })
}
}
pub struct SerializedIterator<'a, T> {
iter: rocksdb::DBIterator<'a>,
_marker: PhantomData<T>,
}
impl<T> Iterator for SerializedIterator<'_, T>
where T: DeserializeOwned,
{
type Item = (String, T);
fn next(&mut self) -> Option<Self::Item> {
let (raw_key, raw_value) = match self.iter.next() {
Some((key, value)) => (key, value),
None => return None,
};
let value: T = match bincode::deserialize(&raw_value) {
Ok(data) => data,
Err(_) => return None,
};
let key = match std::str::from_utf8(&raw_key) {
Ok(key) => key.to_string(),
Err(_) => return None,
};
Some((key, value))
}
}

View File

@ -1,89 +0,0 @@
use serde::de::DeserializeOwned;
use serde::{Serialize, Deserialize};
use std::collections::{HashMap, HashSet};
use std::ops::Deref;
use super::Error;
#[derive(Debug, Copy, Clone, PartialEq, Eq, Serialize, Deserialize)]
#[serde(rename_all = "lowercase")]
pub enum RankingOrdering {
Asc,
Dsc
}
pub type StopWords = HashSet<String>;
pub type RankingOrder = Vec<String>;
pub type DistinctField = String;
pub type RankingRules = HashMap<String, RankingOrdering>;
const STOP_WORDS_KEY: &str = "stop-words";
const RANKING_ORDER_KEY: &str = "ranking-order";
const DISTINCT_FIELD_KEY: &str = "distinct-field";
const RANKING_RULES_KEY: &str = "ranking-rules";
#[derive(Clone)]
pub struct CustomSettingsIndex(pub(crate) crate::CfTree);
impl Deref for CustomSettingsIndex {
type Target = crate::CfTree;
fn deref(&self) -> &Self::Target {
&self.0
}
}
impl CustomSettingsIndex {
fn get<K, T>(&self, key: K) -> Result<Option<T>, Error>
where K: AsRef<[u8]>,
T: DeserializeOwned,
{
let setting = self.0.get(key)?;
let raw = match setting {
Some(raw) => raw,
None => return Ok(None)
};
Ok(Some(bincode::deserialize(&raw)?))
}
fn set<K, T>(&self, key: K, data: &T) -> Result<(), Error>
where K: AsRef<[u8]>,
T: Serialize,
{
let raw = bincode::serialize(data)?;
self.0.insert(key, &raw)?;
Ok(())
}
pub fn get_stop_words(&self) -> Result<Option<StopWords>, Error> {
self.get(STOP_WORDS_KEY)
}
pub fn get_ranking_order(&self) -> Result<Option<RankingOrder>, Error> {
self.get(RANKING_ORDER_KEY)
}
pub fn get_distinct_field(&self) -> Result<Option<DistinctField>, Error> {
self.get(DISTINCT_FIELD_KEY)
}
pub fn get_ranking_rules(&self) -> Result<Option<RankingRules>, Error> {
self.get(RANKING_RULES_KEY)
}
pub fn set_stop_words(&self, value: &StopWords) -> Result<(), Error> {
self.set(STOP_WORDS_KEY, value)
}
pub fn set_ranking_order(&self, value: &RankingOrder) -> Result<(), Error> {
self.set(RANKING_ORDER_KEY, value)
}
pub fn set_distinct_field(&self, value: &DistinctField) -> Result<(), Error> {
self.set(DISTINCT_FIELD_KEY, value)
}
pub fn set_ranking_rules(&self, value: &RankingRules) -> Result<(), Error> {
self.set(RANKING_RULES_KEY, value)
}
}

View File

@ -1,33 +0,0 @@
use std::sync::Arc;
use meilidb_core::DocumentId;
use crate::database::Error;
#[derive(Clone)]
pub struct DocsWordsIndex(pub crate::CfTree);
impl DocsWordsIndex {
pub fn doc_words(&self, id: DocumentId) -> Result<Option<fst::Set>, Error> {
let key = id.0.to_be_bytes();
match self.0.get(key)? {
Some(bytes) => {
let len = bytes.len();
let value = Arc::from(bytes.as_ref());
let fst = fst::raw::Fst::from_shared_bytes(value, 0, len)?;
Ok(Some(fst::Set::from(fst)))
},
None => Ok(None)
}
}
pub fn set_doc_words(&self, id: DocumentId, words: &fst::Set) -> Result<(), Error> {
let key = id.0.to_be_bytes();
self.0.insert(key, words.as_fst().as_bytes())?;
Ok(())
}
pub fn del_doc_words(&self, id: DocumentId) -> Result<(), Error> {
let key = id.0.to_be_bytes();
self.0.remove(key)?;
Ok(())
}
}

View File

@ -1,146 +0,0 @@
use std::convert::TryInto;
use std::collections::HashMap;
use meilidb_core::DocumentId;
use meilidb_schema::{Schema, SchemaAttr};
use rocksdb::DBVector;
use crate::document_attr_key::DocumentAttrKey;
use crate::RocksDbResult;
fn document_fields_range(id: DocumentId) -> ([u8; 10], [u8; 10]) {
let start = DocumentAttrKey::new(id, SchemaAttr::min()).to_be_bytes();
let end = DocumentAttrKey::new(id, SchemaAttr::max()).to_be_bytes();
(start, end)
}
#[derive(Clone)]
pub struct DocumentsIndex(pub(crate) crate::CfTree);
impl DocumentsIndex {
pub fn document_field(&self, id: DocumentId, attr: SchemaAttr) -> RocksDbResult<Option<DBVector>> {
let key = DocumentAttrKey::new(id, attr).to_be_bytes();
self.0.get(key)
}
pub fn set_document_field(&self, id: DocumentId, attr: SchemaAttr, value: Vec<u8>) -> RocksDbResult<()> {
let key = DocumentAttrKey::new(id, attr).to_be_bytes();
self.0.insert(key, value)?;
Ok(())
}
pub fn del_document_field(&self, id: DocumentId, attr: SchemaAttr) -> RocksDbResult<()> {
let key = DocumentAttrKey::new(id, attr).to_be_bytes();
self.0.remove(key)?;
Ok(())
}
pub fn del_all_document_fields(&self, id: DocumentId) -> RocksDbResult<usize> {
let (start, end) = document_fields_range(id);
let mut count = 0;
for (key, _) in self.0.range(start, end)? {
self.0.remove(key)?;
count += 1;
}
Ok(count)
}
pub fn document_fields(&self, id: DocumentId) -> RocksDbResult<DocumentFieldsIter> {
let (start, end) = document_fields_range(id);
let iter = self.0.range(start, end)?;
Ok(DocumentFieldsIter(iter))
}
pub fn documents_ids(&self) -> RocksDbResult<DocumentsIdsIter> {
let iter = DocumentsKeysIter(self.0.iter()?);
Ok(DocumentsIdsIter { inner: iter, last: None })
}
pub fn documents_fields_repartition(&self, schema: Schema) -> RocksDbResult<HashMap<String, u64>> {
let iter = self.0.iter()?;
let mut repartition_attributes_id = HashMap::new();
for key in DocumentsKeysIter(iter) {
let counter = repartition_attributes_id.entry(key.attribute).or_insert(0);
*counter += 1u64;
}
let mut repartition_with_attribute_name = HashMap::new();
for (key, val) in repartition_attributes_id {
repartition_with_attribute_name.insert(schema.attribute_name(key).to_owned(), val);
}
Ok(repartition_with_attribute_name)
}
pub fn len(&self) -> RocksDbResult<u64> {
let mut last_document_id = None;
let mut count = 0;
for (key, _) in self.0.iter()? {
let array = key.as_ref().try_into().unwrap();
let document_id = DocumentAttrKey::from_be_bytes(array).document_id;
if Some(document_id) != last_document_id {
last_document_id = Some(document_id);
count += 1;
}
}
Ok(count)
}
}
pub struct DocumentFieldsIter<'a>(crate::CfIter<'a>);
impl Iterator for DocumentFieldsIter<'_> {
type Item = (SchemaAttr, Box<[u8]>);
fn next(&mut self) -> Option<Self::Item> {
match self.0.next() {
Some((key, value)) => {
let array = key.as_ref().try_into().unwrap();
let key = DocumentAttrKey::from_be_bytes(array);
Some((key.attribute, value))
},
None => None,
}
}
}
pub struct DocumentsKeysIter<'a>(crate::CfIter<'a>);
impl Iterator for DocumentsKeysIter<'_> {
type Item = DocumentAttrKey;
fn next(&mut self) -> Option<Self::Item> {
match self.0.next() {
Some((key, _)) => {
let array = key.as_ref().try_into().unwrap();
let key = DocumentAttrKey::from_be_bytes(array);
Some(key)
},
None => None,
}
}
}
pub struct DocumentsIdsIter<'a> {
inner: DocumentsKeysIter<'a>,
last: Option<DocumentId>,
}
impl Iterator for DocumentsIdsIter<'_> {
type Item = DocumentId;
fn next(&mut self) -> Option<Self::Item> {
for DocumentAttrKey { document_id, .. } in &mut self.inner {
if self.last != Some(document_id) {
self.last = Some(document_id);
return Some(document_id)
}
}
None
}
}

View File

@ -1,101 +0,0 @@
use std::sync::Arc;
use std::convert::TryInto;
use meilidb_schema::Schema;
use crate::ranked_map::RankedMap;
use crate::database::Error;
const SCHEMA_KEY: &str = "schema";
const WORDS_KEY: &str = "words";
const SYNONYMS_KEY: &str = "synonyms";
const RANKED_MAP_KEY: &str = "ranked-map";
const NUMBER_OF_DOCUMENTS_KEY: &str = "number-of-documents";
#[derive(Clone)]
pub struct MainIndex(pub(crate) crate::CfTree);
impl MainIndex {
pub fn schema(&self) -> Result<Option<Schema>, Error> {
match self.0.get(SCHEMA_KEY)? {
Some(bytes) => {
let schema = bincode::deserialize_from(bytes.as_ref())?;
Ok(Some(schema))
},
None => Ok(None),
}
}
pub fn set_schema(&self, schema: &Schema) -> Result<(), Error> {
let bytes = bincode::serialize(schema)?;
self.0.insert(SCHEMA_KEY, bytes)?;
Ok(())
}
pub fn words_set(&self) -> Result<Option<fst::Set>, Error> {
match self.0.get(WORDS_KEY)? {
Some(bytes) => {
let len = bytes.len();
let value = Arc::from(bytes.as_ref());
let fst = fst::raw::Fst::from_shared_bytes(value, 0, len)?;
Ok(Some(fst::Set::from(fst)))
},
None => Ok(None),
}
}
pub fn set_words_set(&self, value: &fst::Set) -> Result<(), Error> {
self.0.insert(WORDS_KEY, value.as_fst().as_bytes()).map(drop).map_err(Into::into)
}
pub fn synonyms_set(&self) -> Result<Option<fst::Set>, Error> {
match self.0.get(SYNONYMS_KEY)? {
Some(bytes) => {
let len = bytes.len();
let value = Arc::from(bytes.as_ref());
let fst = fst::raw::Fst::from_shared_bytes(value, 0, len)?;
Ok(Some(fst::Set::from(fst)))
},
None => Ok(None),
}
}
pub fn set_synonyms_set(&self, value: &fst::Set) -> Result<(), Error> {
self.0.insert(SYNONYMS_KEY, value.as_fst().as_bytes()).map(drop).map_err(Into::into)
}
pub fn ranked_map(&self) -> Result<Option<RankedMap>, Error> {
match self.0.get(RANKED_MAP_KEY)? {
Some(bytes) => {
let ranked_map = RankedMap::read_from_bin(bytes.as_ref())?;
Ok(Some(ranked_map))
},
None => Ok(None),
}
}
pub fn set_ranked_map(&self, value: &RankedMap) -> Result<(), Error> {
let mut bytes = Vec::new();
value.write_to_bin(&mut bytes)?;
self.0.insert(RANKED_MAP_KEY, bytes)?;
Ok(())
}
pub fn number_of_documents(&self) -> Result<u64, Error> {
match self.0.get(NUMBER_OF_DOCUMENTS_KEY)? {
Some(bytes) => {
let array = (*bytes).try_into().unwrap();
Ok(u64::from_be_bytes(array))
},
None => Ok(0),
}
}
pub fn set_number_of_documents<F>(&self, f: F) -> Result<u64, Error>
where F: FnOnce(u64) -> u64,
{
let new = self.number_of_documents().map(f)?;
self.0.insert(NUMBER_OF_DOCUMENTS_KEY, new.to_be_bytes())?;
Ok(new)
}
}

View File

@ -1,507 +0,0 @@
use std::collections::{HashMap, HashSet, BTreeMap};
use std::convert::TryInto;
use std::sync::Arc;
use std::sync::atomic::{AtomicU64, Ordering};
use std::thread;
use std::time::{Duration, Instant};
use arc_swap::{ArcSwap, ArcSwapOption, Guard};
use crossbeam_channel::Receiver;
use meilidb_core::criterion::Criteria;
use meilidb_core::{DocIndex, Store, DocumentId, QueryBuilder};
use meilidb_schema::Schema;
use sdset::SetBuf;
use serde::{de, Serialize, Deserialize};
use crate::CfTree;
use crate::ranked_map::RankedMap;
use crate::serde::{Deserializer, DeserializerError};
pub use self::custom_settings_index::{CustomSettingsIndex, RankingOrdering, StopWords, RankingOrder, DistinctField, RankingRules};
pub use self::common_index::CommonIndex;
pub use self::documents_index::DocumentsIdsIter;
use self::docs_words_index::DocsWordsIndex;
use self::documents_index::DocumentsIndex;
use self::main_index::MainIndex;
use self::synonyms_index::SynonymsIndex;
use self::words_index::WordsIndex;
use crate::RocksDbResult;
use crate::database::{
Error,
DocumentsAddition, DocumentsDeletion,
SynonymsAddition, SynonymsDeletion,
apply_documents_addition, apply_documents_deletion,
apply_synonyms_addition, apply_synonyms_deletion,
};
mod common_index;
mod custom_settings_index;
mod docs_words_index;
mod documents_index;
mod main_index;
mod synonyms_index;
mod words_index;
#[derive(Serialize, Deserialize)]
enum Update {
DocumentsAddition(Vec<rmpv::Value>),
DocumentsDeletion(Vec<DocumentId>),
SynonymsAddition(BTreeMap<String, Vec<String>>),
SynonymsDeletion(BTreeMap<String, Option<Vec<String>>>),
}
#[derive(Clone, Serialize, Deserialize)]
pub enum UpdateType {
DocumentsAddition { number: usize },
DocumentsDeletion { number: usize },
SynonymsAddition { number: usize },
SynonymsDeletion { number: usize },
}
#[derive(Clone, Serialize, Deserialize)]
pub struct DetailedDuration {
pub main: Duration,
}
#[derive(Clone, Serialize, Deserialize)]
pub struct UpdateResult {
pub update_id: u64,
pub update_type: UpdateType,
pub result: Result<(), String>,
pub detailed_duration: DetailedDuration,
}
#[derive(Clone, Serialize, Deserialize)]
pub enum UpdateStatus {
Enqueued,
Processed(UpdateResult),
Unknown,
}
fn spawn_update_system(index: Index, subscription: Receiver<()>) -> thread::JoinHandle<()> {
thread::spawn(move || {
let mut subscription = subscription.into_iter();
loop {
while let Some((key, _)) = index.updates_index.iter().unwrap().next() {
let update_id = key.as_ref().try_into().map(u64::from_be_bytes).unwrap();
let updates = &index.updates_index;
let results = &index.updates_results_index;
let update = updates.get(&key).unwrap().unwrap();
let (update_type, result, duration) = match rmp_serde::from_read_ref(&update).unwrap() {
Update::DocumentsAddition(documents) => {
let update_type = UpdateType::DocumentsAddition { number: documents.len() };
let ranked_map = index.cache.load().ranked_map.clone();
let start = Instant::now();
let result = apply_documents_addition(&index, ranked_map, documents);
(update_type, result, start.elapsed())
},
Update::DocumentsDeletion(documents) => {
let update_type = UpdateType::DocumentsDeletion { number: documents.len() };
let ranked_map = index.cache.load().ranked_map.clone();
let start = Instant::now();
let result = apply_documents_deletion(&index, ranked_map, documents);
(update_type, result, start.elapsed())
},
Update::SynonymsAddition(synonyms) => {
let update_type = UpdateType::SynonymsAddition { number: synonyms.len() };
let start = Instant::now();
let result = apply_synonyms_addition(&index, synonyms);
(update_type, result, start.elapsed())
},
Update::SynonymsDeletion(synonyms) => {
let update_type = UpdateType::SynonymsDeletion { number: synonyms.len() };
let start = Instant::now();
let result = apply_synonyms_deletion(&index, synonyms);
(update_type, result, start.elapsed())
},
};
let detailed_duration = DetailedDuration { main: duration };
let status = UpdateResult {
update_id,
update_type,
result: result.map_err(|e| e.to_string()),
detailed_duration,
};
if let Some(callback) = &*index.update_callback.load() {
(callback)(status.clone());
}
let value = bincode::serialize(&status).unwrap();
results.insert(&key, value).unwrap();
updates.remove(&key).unwrap();
}
// this subscription is just used to block
// the loop until a new update is inserted
subscription.next();
}
})
}
fn last_update_id(
update_index: &crate::CfTree,
update_results_index: &crate::CfTree,
) -> RocksDbResult<u64>
{
let uikey = match update_index.last_key()? {
Some(key) => Some(key.as_ref().try_into().map(u64::from_be_bytes).unwrap()),
None => None,
};
let urikey = match update_results_index.last_key()? {
Some(key) => Some(key.as_ref().try_into().map(u64::from_be_bytes).unwrap()),
None => None,
};
Ok(uikey.max(urikey).unwrap_or(0))
}
#[derive(Clone)]
pub struct IndexStats {
pub number_of_words: usize,
pub number_of_documents: u64,
pub number_attrs_in_ranked_map: usize,
pub documents_fields_repartition: HashMap<String, u64>,
}
#[derive(Clone)]
pub struct Index {
pub(crate) cache: Arc<ArcSwap<Cache>>,
// TODO this will be a snapshot in the future
main_index: MainIndex,
synonyms_index: SynonymsIndex,
words_index: WordsIndex,
docs_words_index: DocsWordsIndex,
documents_index: DocumentsIndex,
custom_settings_index: CustomSettingsIndex,
// used by the update system
updates_id: Arc<AtomicU64>,
updates_index: crate::CfTree,
updates_results_index: crate::CfTree,
update_callback: Arc<ArcSwapOption<Box<dyn Fn(UpdateResult) + Send + Sync + 'static>>>,
}
pub(crate) struct Cache {
pub words: Arc<fst::Set>,
pub synonyms: Arc<fst::Set>,
pub schema: Schema,
pub ranked_map: RankedMap,
pub number_of_documents: u64,
}
impl Index {
pub fn new(db: Arc<rocksdb::DB>, name: &str) -> Result<Index, Error> {
Index::new_raw(db, name, None)
}
pub fn with_schema(db: Arc<rocksdb::DB>, name: &str, schema: Schema) -> Result<Index, Error> {
Index::new_raw(db, name, Some(schema))
}
fn new_raw(db: Arc<rocksdb::DB>, name: &str, schema: Option<Schema>) -> Result<Index, Error> {
let main_index = CfTree::create(db.clone(), name.to_string()).map(MainIndex)?;
let synonyms_index = CfTree::create(db.clone(), format!("{}-synonyms", name)).map(SynonymsIndex)?;
let words_index = CfTree::create(db.clone(), format!("{}-words", name)).map(WordsIndex)?;
let docs_words_index = CfTree::create(db.clone(), format!("{}-docs-words", name)).map(DocsWordsIndex)?;
let documents_index = CfTree::create(db.clone(), format!("{}-documents", name)).map(DocumentsIndex)?;
let custom_settings_index = CfTree::create(db.clone(), format!("{}-custom", name)).map(CustomSettingsIndex)?;
let (updates_index, subscription) = CfTree::create_with_subcription(db.clone(), format!("{}-updates", name))?;
let updates_results_index = CfTree::create(db.clone(), format!("{}-updates-results", name))?;
let words = match main_index.words_set()? {
Some(words) => Arc::new(words),
None => Arc::new(fst::Set::default()),
};
let synonyms = match main_index.synonyms_set()? {
Some(synonyms) => Arc::new(synonyms),
None => Arc::new(fst::Set::default()),
};
let schema = match (schema, main_index.schema()?) {
(Some(ref expected), Some(ref current)) if current != expected => {
return Err(Error::SchemaDiffer)
},
(Some(expected), Some(_)) => expected,
(Some(expected), None) => {
main_index.set_schema(&expected)?;
expected
},
(None, Some(current)) => current,
(None, None) => return Err(Error::SchemaMissing),
};
let ranked_map = match main_index.ranked_map()? {
Some(map) => map,
None => RankedMap::default(),
};
let number_of_documents = documents_index.len()?;
let cache = Cache { words, synonyms, schema, ranked_map, number_of_documents };
let cache = Arc::new(ArcSwap::from_pointee(cache));
let last_update_id = last_update_id(&updates_index, &updates_results_index)?;
let updates_id = Arc::new(AtomicU64::new(last_update_id + 1));
let index = Index {
cache,
main_index,
synonyms_index,
words_index,
docs_words_index,
documents_index,
custom_settings_index,
updates_id,
updates_index,
updates_results_index,
update_callback: Arc::new(ArcSwapOption::empty()),
};
let _handle = spawn_update_system(index.clone(), subscription);
Ok(index)
}
pub fn set_update_callback<F>(&self, callback: F)
where F: Fn(UpdateResult) + Send + Sync + 'static
{
self.update_callback.store(Some(Arc::new(Box::new(callback))));
}
pub fn unset_update_callback(&self) {
self.update_callback.store(None);
}
pub fn stats(&self) -> RocksDbResult<IndexStats> {
let cache = self.cache.load();
let documents_fields_repartition = self.documents_index.documents_fields_repartition(cache.schema.clone())?;
Ok(IndexStats {
number_of_words: cache.words.len(),
number_of_documents: cache.number_of_documents,
number_attrs_in_ranked_map: cache.ranked_map.len(),
documents_fields_repartition,
})
}
pub fn query_builder(&self) -> QueryBuilder<RefIndex> {
let ref_index = self.as_ref();
QueryBuilder::new(ref_index)
}
pub fn query_builder_with_criteria<'c>(
&self,
criteria: Criteria<'c>,
) -> QueryBuilder<'c, RefIndex>
{
let ref_index = self.as_ref();
QueryBuilder::with_criteria(ref_index, criteria)
}
pub fn as_ref(&self) -> RefIndex {
RefIndex {
cache: self.cache.load(),
main_index: &self.main_index,
synonyms_index: &self.synonyms_index,
words_index: &self.words_index,
docs_words_index: &self.docs_words_index,
documents_index: &self.documents_index,
custom_settings_index: &self.custom_settings_index,
}
}
pub fn schema(&self) -> Schema {
self.cache.load().schema.clone()
}
pub fn ranked_map(&self) -> RankedMap {
self.cache.load().ranked_map.clone()
}
pub fn synonyms_index(&self) -> SynonymsIndex {
self.synonyms_index.clone()
}
pub fn synonyms_set(&self) -> Arc<fst::Set> {
self.cache.load().synonyms.clone()
}
pub fn custom_settings(&self) -> CustomSettingsIndex {
self.custom_settings_index.clone()
}
pub fn number_of_documents(&self) -> u64 {
self.cache.load().number_of_documents
}
pub fn documents_addition<D>(&self) -> DocumentsAddition<D> {
DocumentsAddition::new(self)
}
pub fn documents_deletion(&self) -> DocumentsDeletion {
DocumentsDeletion::new(self)
}
pub fn synonyms_addition(&self) -> SynonymsAddition {
SynonymsAddition::new(self)
}
pub fn synonyms_deletion(&self) -> SynonymsDeletion {
SynonymsDeletion::new(self)
}
pub fn update_status(
&self,
update_id: u64,
) -> Result<UpdateStatus, Error>
{
let update_id = update_id.to_be_bytes();
match self.updates_results_index.get(update_id)? {
Some(value) => {
let value = bincode::deserialize(&value)?;
Ok(UpdateStatus::Processed(value))
},
None => {
match self.updates_index.get(update_id)? {
Some(_) => Ok(UpdateStatus::Enqueued),
None => Ok(UpdateStatus::Unknown),
}
}
}
}
pub fn update_status_blocking(
&self,
update_id: u64,
) -> Result<UpdateResult, Error>
{
loop {
if let Some(value) = self.updates_results_index.get(&update_id.to_be_bytes())? {
let value = bincode::deserialize(&value)?;
return Ok(value)
}
std::thread::sleep(Duration::from_millis(300));
}
}
pub fn documents_ids(&self) -> Result<DocumentsIdsIter, Error> {
Ok(self.documents_index.documents_ids()?)
}
pub fn document<T>(
&self,
fields: Option<&HashSet<&str>>,
id: DocumentId,
) -> Result<Option<T>, DeserializerError>
where T: de::DeserializeOwned,
{
let schema = self.schema();
let fields = match fields {
Some(fields) => fields.into_iter().map(|name| schema.attribute(name)).collect(),
None => None,
};
let mut deserializer = Deserializer {
document_id: id,
index: &self,
fields: fields.as_ref(),
};
// TODO: currently we return an error if all document fields are missing,
// returning None would have been better
T::deserialize(&mut deserializer).map(Some)
}
}
impl Index {
pub(crate) fn push_documents_addition<D>(&self, addition: Vec<D>) -> Result<u64, Error>
where D: serde::Serialize
{
let mut values = Vec::with_capacity(addition.len());
for add in addition {
let vec = rmp_serde::to_vec_named(&add)?;
let add = rmp_serde::from_read(&vec[..])?;
values.push(add);
}
let addition = Update::DocumentsAddition(values);
let update = rmp_serde::to_vec_named(&addition)?;
self.raw_push_update(update)
}
pub(crate) fn push_documents_deletion(
&self,
deletion: Vec<DocumentId>,
) -> Result<u64, Error>
{
let deletion = Update::DocumentsDeletion(deletion);
let update = rmp_serde::to_vec_named(&deletion)?;
self.raw_push_update(update)
}
pub(crate) fn push_synonyms_addition(
&self,
addition: BTreeMap<String, Vec<String>>,
) -> Result<u64, Error>
{
let addition = Update::SynonymsAddition(addition);
let update = rmp_serde::to_vec_named(&addition)?;
self.raw_push_update(update)
}
pub(crate) fn push_synonyms_deletion(
&self,
deletion: BTreeMap<String, Option<Vec<String>>>,
) -> Result<u64, Error>
{
let deletion = Update::SynonymsDeletion(deletion);
let update = rmp_serde::to_vec_named(&deletion)?;
self.raw_push_update(update)
}
fn raw_push_update(&self, raw_update: Vec<u8>) -> Result<u64, Error> {
let update_id = self.updates_id.fetch_add(1, Ordering::SeqCst);
let update_id_array = update_id.to_be_bytes();
self.updates_index.insert(update_id_array, raw_update)?;
Ok(update_id)
}
}
pub struct RefIndex<'a> {
pub(crate) cache: Guard<'static, Arc<Cache>>,
pub main_index: &'a MainIndex,
pub synonyms_index: &'a SynonymsIndex,
pub words_index: &'a WordsIndex,
pub docs_words_index: &'a DocsWordsIndex,
pub documents_index: &'a DocumentsIndex,
pub custom_settings_index: &'a CustomSettingsIndex,
}
impl Store for RefIndex<'_> {
type Error = Error;
fn words(&self) -> Result<&fst::Set, Self::Error> {
Ok(&self.cache.words)
}
fn word_indexes(&self, word: &[u8]) -> Result<Option<SetBuf<DocIndex>>, Self::Error> {
Ok(self.words_index.doc_indexes(word)?)
}
fn synonyms(&self) -> Result<&fst::Set, Self::Error> {
Ok(&self.cache.synonyms)
}
fn alternatives_to(&self, word: &[u8]) -> Result<Option<fst::Set>, Self::Error> {
Ok(self.synonyms_index.alternatives_to(word)?)
}
}

View File

@ -1,21 +0,0 @@
use crate::RocksDbResult;
#[derive(Clone)]
pub struct SynonymsIndex(pub(crate) crate::CfTree);
impl SynonymsIndex {
pub fn alternatives_to(&self, word: &[u8]) -> RocksDbResult<Option<fst::Set>> {
match self.0.get(word)? {
Some(vector) => Ok(Some(fst::Set::from_bytes(vector.to_vec()).unwrap())),
None => Ok(None),
}
}
pub fn set_alternatives_to(&self, word: &[u8], value: Vec<u8>) -> RocksDbResult<()> {
self.0.insert(word, value).map(drop)
}
pub fn del_alternatives_of(&self, word: &[u8]) -> RocksDbResult<()> {
self.0.remove(word).map(drop)
}
}

View File

@ -1,45 +0,0 @@
use meilidb_core::DocIndex;
use sdset::{Set, SetBuf};
use zerocopy::{LayoutVerified, AsBytes};
use crate::RocksDbResult;
#[derive(Clone)]
pub struct WordsIndex(pub(crate) crate::CfTree);
impl WordsIndex {
pub fn doc_indexes(&self, word: &[u8]) -> RocksDbResult<Option<SetBuf<DocIndex>>> {
// we must force an allocation to make the memory aligned
match self.0.get(word)? {
Some(bytes) => {
let vec = match LayoutVerified::new_slice(bytes.as_ref()) {
Some(layout) => layout.into_slice().to_vec(),
None => {
let len = bytes.as_ref().len();
let count = len / std::mem::size_of::<DocIndex>();
let mut buf: Vec<DocIndex> = Vec::with_capacity(count);
unsafe {
let src = bytes.as_ref().as_ptr();
let dst = buf.as_mut_ptr() as *mut u8;
std::ptr::copy_nonoverlapping(src, dst, len);
buf.set_len(count);
}
buf
}
};
let setbuf = SetBuf::new_unchecked(vec);
Ok(Some(setbuf))
},
None => Ok(None),
}
}
pub fn set_doc_indexes(&self, word: &[u8], set: &Set<DocIndex>) -> RocksDbResult<()> {
self.0.insert(word, set.as_bytes()).map(drop)
}
pub fn del_doc_indexes(&self, word: &[u8]) -> RocksDbResult<()> {
self.0.remove(word).map(drop)
}
}

View File

@ -1,155 +0,0 @@
use std::collections::hash_map::Entry;
use std::collections::{HashSet, HashMap};
use std::path::Path;
use std::sync::Arc;
use std::sync::RwLock;
use meilidb_schema::Schema;
mod error;
mod index;
mod update;
use crate::CfTree;
pub use self::error::Error;
pub use self::index::{
Index, CustomSettingsIndex, CommonIndex, RankingOrdering,
StopWords, RankingOrder, DistinctField, RankingRules,
UpdateType, DetailedDuration, UpdateResult, UpdateStatus
};
pub use self::update::DocumentsAddition;
pub use self::update::DocumentsDeletion;
pub use self::update::SynonymsAddition;
pub use self::update::SynonymsDeletion;
use self::update::apply_documents_addition;
use self::update::apply_documents_deletion;
use self::update::apply_synonyms_addition;
use self::update::apply_synonyms_deletion;
const INDEXES_KEY: &str = "indexes";
const COMMON_KEY: &str = "common-index";
fn load_indexes(tree: &rocksdb::DB) -> Result<HashSet<String>, Error> {
match tree.get(INDEXES_KEY)? {
Some(bytes) => Ok(bincode::deserialize(&bytes)?),
None => Ok(HashSet::new())
}
}
pub struct Database {
cache: RwLock<HashMap<String, Index>>,
inner: Arc<rocksdb::DB>,
common: Arc<CommonIndex>,
}
impl Database {
pub fn open<P: AsRef<Path>>(path: P) -> Result<Database, Error> {
let cache = RwLock::new(HashMap::new());
let mut options = rocksdb::Options::default();
options.create_if_missing(true);
let cfs = rocksdb::DB::list_cf(&options, &path).unwrap_or_default();
let inner = Arc::new(rocksdb::DB::open_cf(&options, path, cfs)?);
let common_tree = CfTree::create(inner.clone(), COMMON_KEY.to_owned())?;
let common = Arc::new(CommonIndex(common_tree));
let indexes = load_indexes(&inner)?;
let database = Database { cache, inner, common };
for index in indexes {
database.open_index(&index)?;
}
Ok(database)
}
pub fn indexes(&self) -> Result<HashSet<String>, Error> {
load_indexes(&self.inner)
}
fn set_indexes(&self, value: &HashSet<String>) -> Result<(), Error> {
let bytes = bincode::serialize(value)?;
self.inner.put(INDEXES_KEY, bytes)?;
Ok(())
}
pub fn open_index(&self, name: &str) -> Result<Option<Index>, Error> {
{
let cache = self.cache.read().unwrap();
if let Some(index) = cache.get(name).cloned() {
return Ok(Some(index))
}
}
let mut cache = self.cache.write().unwrap();
let index = match cache.entry(name.to_string()) {
Entry::Occupied(occupied) => {
occupied.get().clone()
},
Entry::Vacant(vacant) => {
if !self.indexes()?.contains(name) {
return Ok(None)
}
let index = Index::new(self.inner.clone(), name)?;
vacant.insert(index).clone()
},
};
Ok(Some(index))
}
pub fn create_index(&self, name: &str, schema: Schema) -> Result<Index, Error> {
let mut cache = self.cache.write().unwrap();
let index = match cache.entry(name.to_string()) {
Entry::Occupied(occupied) => {
occupied.get().clone()
},
Entry::Vacant(vacant) => {
let index = Index::with_schema(self.inner.clone(), name, schema)?;
let mut indexes = self.indexes()?;
indexes.insert(name.to_string());
self.set_indexes(&indexes)?;
vacant.insert(index).clone()
},
};
Ok(index)
}
pub fn delete_index(&self, name: &str) -> Result<(), Error> {
let mut cache = self.cache.write().unwrap();
self.inner.drop_cf(name)?;
let _ = self.inner.drop_cf(&format!("{}-synonyms", name));
let _ = self.inner.drop_cf(&format!("{}-words", name));
let _ = self.inner.drop_cf(&format!("{}-docs-words", name));
let _ = self.inner.drop_cf(&format!("{}-documents", name));
let _ = self.inner.drop_cf(&format!("{}-custom", name));
let _ = self.inner.drop_cf(&format!("{}-updates", name));
let _ = self.inner.drop_cf(&format!("{}-updates-results", name));
cache.remove(name);
if let Ok(mut index_list) = self.indexes() {
index_list.remove(name);
let _ = self.set_indexes(&index_list);
}
Ok(())
}
pub fn common_index(&self) -> Arc<CommonIndex> {
self.common.clone()
}
pub fn checkpoint_to<P>(&self, path: P) -> Result<(), Error>
where P: AsRef<Path>,
{
let checkpoint = rocksdb::checkpoint::Checkpoint::new(&self.inner)?;
Ok(checkpoint.create_checkpoint(path)?)
}
}

View File

@ -1,139 +0,0 @@
use std::collections::HashSet;
use std::sync::Arc;
use fst::{SetBuilder, set::OpBuilder};
use sdset::{SetOperation, duo::Union};
use serde::Serialize;
use crate::RankedMap;
use crate::database::{Error, Index, index::Cache, apply_documents_deletion};
use crate::indexer::Indexer;
use crate::serde::{extract_document_id, Serializer, RamDocumentStore};
pub struct DocumentsAddition<'a, D> {
index: &'a Index,
documents: Vec<D>,
}
impl<'a, D> DocumentsAddition<'a, D> {
pub fn new(index: &'a Index) -> DocumentsAddition<'a, D> {
DocumentsAddition { index, documents: Vec::new() }
}
pub fn update_document(&mut self, document: D) {
self.documents.push(document);
}
pub fn finalize(self) -> Result<u64, Error>
where D: serde::Serialize
{
self.index.push_documents_addition(self.documents)
}
}
pub fn apply_documents_addition(
index: &Index,
mut ranked_map: RankedMap,
addition: Vec<rmpv::Value>,
) -> Result<(), Error>
{
let mut document_ids = HashSet::new();
let mut document_store = RamDocumentStore::new();
let mut indexer = Indexer::new();
let schema = &index.schema();
let identifier = schema.identifier_name();
for document in addition {
let document_id = match extract_document_id(identifier, &document)? {
Some(id) => id,
None => return Err(Error::MissingDocumentId),
};
// 1. store the document id for future deletion
document_ids.insert(document_id);
// 2. index the document fields in ram stores
let serializer = Serializer {
schema,
document_store: &mut document_store,
indexer: &mut indexer,
ranked_map: &mut ranked_map,
document_id,
};
document.serialize(serializer)?;
}
let ref_index = index.as_ref();
let docs_words = ref_index.docs_words_index;
let documents = ref_index.documents_index;
let main = ref_index.main_index;
let words = ref_index.words_index;
// 1. remove the previous documents match indexes
let documents_to_insert = document_ids.iter().cloned().collect();
apply_documents_deletion(index, ranked_map.clone(), documents_to_insert)?;
// 2. insert new document attributes in the database
for ((id, attr), value) in document_store.into_inner() {
documents.set_document_field(id, attr, value)?;
}
let indexed = indexer.build();
let mut delta_words_builder = SetBuilder::memory();
for (word, delta_set) in indexed.words_doc_indexes {
delta_words_builder.insert(&word).unwrap();
let set = match words.doc_indexes(&word)? {
Some(set) => Union::new(&set, &delta_set).into_set_buf(),
None => delta_set,
};
words.set_doc_indexes(&word, &set)?;
}
for (id, words) in indexed.docs_words {
docs_words.set_doc_words(id, &words)?;
}
let delta_words = delta_words_builder
.into_inner()
.and_then(fst::Set::from_bytes)
.unwrap();
let words = match main.words_set()? {
Some(words) => {
let op = OpBuilder::new()
.add(words.stream())
.add(delta_words.stream())
.r#union();
let mut words_builder = SetBuilder::memory();
words_builder.extend_stream(op).unwrap();
words_builder
.into_inner()
.and_then(fst::Set::from_bytes)
.unwrap()
},
None => delta_words,
};
main.set_words_set(&words)?;
main.set_ranked_map(&ranked_map)?;
let inserted_documents_len = document_ids.len() as u64;
let number_of_documents = main.set_number_of_documents(|old| old + inserted_documents_len)?;
// update the "consistent" view of the Index
let cache = ref_index.cache;
let words = Arc::new(words);
let synonyms = cache.synonyms.clone();
let schema = cache.schema.clone();
let cache = Cache { words, synonyms, schema, ranked_map, number_of_documents };
index.cache.store(Arc::new(cache));
Ok(())
}

View File

@ -1,150 +0,0 @@
use std::collections::{HashMap, HashSet, BTreeSet};
use std::sync::Arc;
use fst::{SetBuilder, Streamer};
use meilidb_core::DocumentId;
use sdset::{SetBuf, SetOperation, duo::DifferenceByKey};
use crate::RankedMap;
use crate::serde::extract_document_id;
use crate::database::{Index, Error, index::Cache};
pub struct DocumentsDeletion<'a> {
index: &'a Index,
documents: Vec<DocumentId>,
}
impl<'a> DocumentsDeletion<'a> {
pub fn new(index: &'a Index) -> DocumentsDeletion<'a> {
DocumentsDeletion { index, documents: Vec::new() }
}
pub fn delete_document_by_id(&mut self, document_id: DocumentId) {
self.documents.push(document_id);
}
pub fn delete_document<D>(&mut self, document: D) -> Result<(), Error>
where D: serde::Serialize,
{
let schema = self.index.schema();
let identifier = schema.identifier_name();
let document_id = match extract_document_id(identifier, &document)? {
Some(id) => id,
None => return Err(Error::MissingDocumentId),
};
self.delete_document_by_id(document_id);
Ok(())
}
pub fn finalize(self) -> Result<u64, Error> {
self.index.push_documents_deletion(self.documents)
}
}
impl Extend<DocumentId> for DocumentsDeletion<'_> {
fn extend<T: IntoIterator<Item=DocumentId>>(&mut self, iter: T) {
self.documents.extend(iter)
}
}
pub fn apply_documents_deletion(
index: &Index,
mut ranked_map: RankedMap,
deletion: Vec<DocumentId>,
) -> Result<(), Error>
{
let ref_index = index.as_ref();
let schema = index.schema();
let docs_words = ref_index.docs_words_index;
let documents = ref_index.documents_index;
let main = ref_index.main_index;
let words = ref_index.words_index;
let idset = SetBuf::from_dirty(deletion);
// collect the ranked attributes according to the schema
let ranked_attrs: Vec<_> = schema.iter()
.filter_map(|(_, attr, prop)| {
if prop.is_ranked() { Some(attr) } else { None }
})
.collect();
let mut words_document_ids = HashMap::new();
for id in idset {
// remove all the ranked attributes from the ranked_map
for ranked_attr in &ranked_attrs {
ranked_map.remove(id, *ranked_attr);
}
if let Some(words) = docs_words.doc_words(id)? {
let mut stream = words.stream();
while let Some(word) = stream.next() {
let word = word.to_vec();
words_document_ids.entry(word).or_insert_with(Vec::new).push(id);
}
}
}
let mut deleted_documents = HashSet::new();
let mut removed_words = BTreeSet::new();
for (word, document_ids) in words_document_ids {
let document_ids = SetBuf::from_dirty(document_ids);
if let Some(doc_indexes) = words.doc_indexes(&word)? {
let op = DifferenceByKey::new(&doc_indexes, &document_ids, |d| d.document_id, |id| *id);
let doc_indexes = op.into_set_buf();
if !doc_indexes.is_empty() {
words.set_doc_indexes(&word, &doc_indexes)?;
} else {
words.del_doc_indexes(&word)?;
removed_words.insert(word);
}
}
for id in document_ids {
if documents.del_all_document_fields(id)? != 0 {
deleted_documents.insert(id);
}
docs_words.del_doc_words(id)?;
}
}
let removed_words = fst::Set::from_iter(removed_words).unwrap();
let words = match main.words_set()? {
Some(words_set) => {
let op = fst::set::OpBuilder::new()
.add(words_set.stream())
.add(removed_words.stream())
.difference();
let mut words_builder = SetBuilder::memory();
words_builder.extend_stream(op).unwrap();
words_builder
.into_inner()
.and_then(fst::Set::from_bytes)
.unwrap()
},
None => fst::Set::default(),
};
main.set_words_set(&words)?;
main.set_ranked_map(&ranked_map)?;
let deleted_documents_len = deleted_documents.len() as u64;
let number_of_documents = main.set_number_of_documents(|old| old - deleted_documents_len)?;
// update the "consistent" view of the Index
let cache = ref_index.cache;
let words = Arc::new(words);
let synonyms = cache.synonyms.clone();
let schema = cache.schema.clone();
let cache = Cache { words, synonyms, schema, ranked_map, number_of_documents };
index.cache.store(Arc::new(cache));
Ok(())
}

View File

@ -1,9 +0,0 @@
mod documents_addition;
mod documents_deletion;
mod synonyms_addition;
mod synonyms_deletion;
pub use self::documents_addition::{DocumentsAddition, apply_documents_addition};
pub use self::documents_deletion::{DocumentsDeletion, apply_documents_deletion};
pub use self::synonyms_addition::{SynonymsAddition, apply_synonyms_addition};
pub use self::synonyms_deletion::{SynonymsDeletion, apply_synonyms_deletion};

View File

@ -1,94 +0,0 @@
use std::collections::BTreeMap;
use std::sync::Arc;
use fst::{SetBuilder, set::OpBuilder};
use meilidb_core::normalize_str;
use sdset::SetBuf;
use crate::database::{Error, Index,index::Cache};
pub struct SynonymsAddition<'a> {
index: &'a Index,
synonyms: BTreeMap<String, Vec<String>>,
}
impl<'a> SynonymsAddition<'a> {
pub fn new(index: &'a Index) -> SynonymsAddition<'a> {
SynonymsAddition { index, synonyms: BTreeMap::new() }
}
pub fn add_synonym<S, T, I>(&mut self, synonym: S, alternatives: I)
where S: AsRef<str>,
T: AsRef<str>,
I: IntoIterator<Item=T>,
{
let synonym = normalize_str(synonym.as_ref());
let alternatives = alternatives.into_iter().map(|s| s.as_ref().to_lowercase());
self.synonyms.entry(synonym).or_insert_with(Vec::new).extend(alternatives);
}
pub fn finalize(self) -> Result<u64, Error> {
self.index.push_synonyms_addition(self.synonyms)
}
}
pub fn apply_synonyms_addition(
index: &Index,
addition: BTreeMap<String, Vec<String>>,
) -> Result<(), Error>
{
let ref_index = index.as_ref();
let synonyms = ref_index.synonyms_index;
let main = ref_index.main_index;
let mut synonyms_builder = SetBuilder::memory();
for (synonym, alternatives) in addition {
synonyms_builder.insert(&synonym).unwrap();
let alternatives = {
let alternatives = SetBuf::from_dirty(alternatives);
let mut alternatives_builder = SetBuilder::memory();
alternatives_builder.extend_iter(alternatives).unwrap();
alternatives_builder.into_inner().unwrap()
};
synonyms.set_alternatives_to(synonym.as_bytes(), alternatives)?;
}
let delta_synonyms = synonyms_builder
.into_inner()
.and_then(fst::Set::from_bytes)
.unwrap();
let synonyms = match main.synonyms_set()? {
Some(synonyms) => {
let op = OpBuilder::new()
.add(synonyms.stream())
.add(delta_synonyms.stream())
.r#union();
let mut synonyms_builder = SetBuilder::memory();
synonyms_builder.extend_stream(op).unwrap();
synonyms_builder
.into_inner()
.and_then(fst::Set::from_bytes)
.unwrap()
},
None => delta_synonyms,
};
main.set_synonyms_set(&synonyms)?;
// update the "consistent" view of the Index
let cache = ref_index.cache;
let words = Arc::new(main.words_set()?.unwrap_or_default());
let ranked_map = cache.ranked_map.clone();
let synonyms = Arc::new(synonyms);
let schema = cache.schema.clone();
let number_of_documents = cache.number_of_documents;
let cache = Cache { words, synonyms, schema, ranked_map, number_of_documents };
index.cache.store(Arc::new(cache));
Ok(())
}

View File

@ -1,137 +0,0 @@
use std::collections::BTreeMap;
use std::iter::FromIterator;
use std::sync::Arc;
use fst::{SetBuilder, set::OpBuilder};
use meilidb_core::normalize_str;
use sdset::SetBuf;
use crate::database::{Error, Index, index::Cache};
pub struct SynonymsDeletion<'a> {
index: &'a Index,
synonyms: BTreeMap<String, Option<Vec<String>>>,
}
impl<'a> SynonymsDeletion<'a> {
pub fn new(index: &'a Index) -> SynonymsDeletion<'a> {
SynonymsDeletion { index, synonyms: BTreeMap::new() }
}
pub fn delete_all_alternatives_of<S: AsRef<str>>(&mut self, synonym: S) {
let synonym = normalize_str(synonym.as_ref());
self.synonyms.insert(synonym, None);
}
pub fn delete_specific_alternatives_of<S, T, I>(&mut self, synonym: S, alternatives: I)
where S: AsRef<str>,
T: AsRef<str>,
I: Iterator<Item=T>,
{
let synonym = normalize_str(synonym.as_ref());
let value = self.synonyms.entry(synonym).or_insert(None);
let alternatives = alternatives.map(|s| s.as_ref().to_lowercase());
match value {
Some(v) => v.extend(alternatives),
None => *value = Some(Vec::from_iter(alternatives)),
}
}
pub fn finalize(self) -> Result<u64, Error> {
self.index.push_synonyms_deletion(self.synonyms)
}
}
pub fn apply_synonyms_deletion(
index: &Index,
deletion: BTreeMap<String, Option<Vec<String>>>,
) -> Result<(), Error>
{
let ref_index = index.as_ref();
let synonyms = ref_index.synonyms_index;
let main = ref_index.main_index;
let mut delete_whole_synonym_builder = SetBuilder::memory();
for (synonym, alternatives) in deletion {
match alternatives {
Some(alternatives) => {
let prev_alternatives = synonyms.alternatives_to(synonym.as_bytes())?;
let prev_alternatives = match prev_alternatives {
Some(alternatives) => alternatives,
None => continue,
};
let delta_alternatives = {
let alternatives = SetBuf::from_dirty(alternatives);
let mut builder = SetBuilder::memory();
builder.extend_iter(alternatives).unwrap();
builder.into_inner()
.and_then(fst::Set::from_bytes)
.unwrap()
};
let op = OpBuilder::new()
.add(prev_alternatives.stream())
.add(delta_alternatives.stream())
.difference();
let (alternatives, empty_alternatives) = {
let mut builder = SetBuilder::memory();
let len = builder.get_ref().len();
builder.extend_stream(op).unwrap();
let is_empty = len == builder.get_ref().len();
let alternatives = builder.into_inner().unwrap();
(alternatives, is_empty)
};
if empty_alternatives {
delete_whole_synonym_builder.insert(synonym.as_bytes())?;
} else {
synonyms.set_alternatives_to(synonym.as_bytes(), alternatives)?;
}
},
None => {
delete_whole_synonym_builder.insert(&synonym).unwrap();
synonyms.del_alternatives_of(synonym.as_bytes())?;
}
}
}
let delta_synonyms = delete_whole_synonym_builder
.into_inner()
.and_then(fst::Set::from_bytes)
.unwrap();
let synonyms = match main.synonyms_set()? {
Some(synonyms) => {
let op = OpBuilder::new()
.add(synonyms.stream())
.add(delta_synonyms.stream())
.difference();
let mut synonyms_builder = SetBuilder::memory();
synonyms_builder.extend_stream(op).unwrap();
synonyms_builder
.into_inner()
.and_then(fst::Set::from_bytes)
.unwrap()
},
None => fst::Set::default(),
};
main.set_synonyms_set(&synonyms)?;
// update the "consistent" view of the Index
let cache = ref_index.cache;
let words = Arc::new(main.words_set()?.unwrap_or_default());
let ranked_map = cache.ranked_map.clone();
let synonyms = Arc::new(synonyms);
let schema = cache.schema.clone();
let number_of_documents = cache.number_of_documents;
let cache = Cache { words, synonyms, schema, ranked_map, number_of_documents };
index.cache.store(Arc::new(cache));
Ok(())
}

View File

@ -1,69 +0,0 @@
use meilidb_core::DocumentId;
use meilidb_schema::SchemaAttr;
#[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub struct DocumentAttrKey {
pub document_id: DocumentId,
pub attribute: SchemaAttr,
}
impl DocumentAttrKey {
pub fn new(document_id: DocumentId, attribute: SchemaAttr) -> DocumentAttrKey {
DocumentAttrKey { document_id, attribute }
}
pub fn to_be_bytes(self) -> [u8; 10] {
let mut output = [0u8; 10];
let document_id = self.document_id.0.to_be_bytes();
let attribute = self.attribute.0.to_be_bytes();
unsafe {
use std::{mem::size_of, ptr::copy_nonoverlapping};
let output = output.as_mut_ptr();
copy_nonoverlapping(document_id.as_ptr(), output, size_of::<u64>());
let output = output.add(size_of::<u64>());
copy_nonoverlapping(attribute.as_ptr(), output, size_of::<u16>());
}
output
}
pub fn from_be_bytes(bytes: [u8; 10]) -> DocumentAttrKey {
let document_id;
let attribute;
unsafe {
use std::ptr::read_unaligned;
let pointer = bytes.as_ptr() as *const _;
let document_id_bytes = read_unaligned(pointer);
document_id = u64::from_be_bytes(document_id_bytes);
let pointer = pointer.add(1) as *const _;
let attribute_bytes = read_unaligned(pointer);
attribute = u16::from_be_bytes(attribute_bytes);
}
DocumentAttrKey {
document_id: DocumentId(document_id),
attribute: SchemaAttr(attribute),
}
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn to_from_be_bytes() {
let document_id = DocumentId(67578308);
let schema_attr = SchemaAttr(3456);
let x = DocumentAttrKey::new(document_id, schema_attr);
assert_eq!(x, DocumentAttrKey::from_be_bytes(x.to_be_bytes()));
}
}

View File

@ -1,208 +0,0 @@
use std::collections::{BTreeMap, HashMap};
use std::convert::TryFrom;
use deunicode::deunicode_with_tofu;
use meilidb_core::{DocumentId, DocIndex};
use meilidb_schema::SchemaAttr;
use meilidb_tokenizer::{is_cjk, Tokenizer, SeqTokenizer, Token};
use sdset::SetBuf;
type Word = Vec<u8>; // TODO make it be a SmallVec
pub struct Indexer {
word_limit: usize, // the maximum number of indexed words
words_doc_indexes: BTreeMap<Word, Vec<DocIndex>>,
docs_words: HashMap<DocumentId, Vec<Word>>,
}
pub struct Indexed {
pub words_doc_indexes: BTreeMap<Word, SetBuf<DocIndex>>,
pub docs_words: HashMap<DocumentId, fst::Set>,
}
impl Indexer {
pub fn new() -> Indexer {
Indexer::with_word_limit(1000)
}
pub fn with_word_limit(limit: usize) -> Indexer {
Indexer {
word_limit: limit,
words_doc_indexes: BTreeMap::new(),
docs_words: HashMap::new(),
}
}
pub fn index_text(&mut self, id: DocumentId, attr: SchemaAttr, text: &str) {
let lowercase_text = text.to_lowercase();
let deunicoded = deunicode_with_tofu(&lowercase_text, "");
// TODO compute the deunicoded version after the cjk check
let next = if !lowercase_text.contains(is_cjk) && lowercase_text != deunicoded {
Some(deunicoded)
} else {
None
};
let iter = Some(lowercase_text).into_iter().chain(next);
for text in iter {
for token in Tokenizer::new(&text) {
let must_continue = index_token(
token,
id,
attr,
self.word_limit,
&mut self.words_doc_indexes,
&mut self.docs_words,
);
if !must_continue { break }
}
}
}
pub fn index_text_seq<'a, I, IT>(&mut self, id: DocumentId, attr: SchemaAttr, iter: I)
where I: IntoIterator<Item=&'a str, IntoIter=IT>,
IT: Iterator<Item = &'a str> + Clone,
{
// TODO serialize this to one call to the SeqTokenizer loop
let lowercased: Vec<_> = iter.into_iter().map(str::to_lowercase).collect();
let iter = lowercased.iter().map(|t| t.as_str());
for token in SeqTokenizer::new(iter) {
let must_continue = index_token(
token,
id,
attr,
self.word_limit,
&mut self.words_doc_indexes,
&mut self.docs_words,
);
if !must_continue { break }
}
let deunicoded: Vec<_> = lowercased.into_iter().map(|lowercase_text| {
if lowercase_text.contains(is_cjk) { return lowercase_text }
let deunicoded = deunicode_with_tofu(&lowercase_text, "");
if lowercase_text != deunicoded { deunicoded } else { lowercase_text }
}).collect();
let iter = deunicoded.iter().map(|t| t.as_str());
for token in SeqTokenizer::new(iter) {
let must_continue = index_token(
token,
id,
attr,
self.word_limit,
&mut self.words_doc_indexes,
&mut self.docs_words,
);
if !must_continue { break }
}
}
pub fn build(self) -> Indexed {
let words_doc_indexes = self.words_doc_indexes
.into_iter()
.map(|(word, indexes)| (word, SetBuf::from_dirty(indexes)))
.collect();
let docs_words = self.docs_words
.into_iter()
.map(|(id, mut words)| {
words.sort_unstable();
words.dedup();
(id, fst::Set::from_iter(words).unwrap())
})
.collect();
Indexed { words_doc_indexes, docs_words }
}
}
fn index_token(
token: Token,
id: DocumentId,
attr: SchemaAttr,
word_limit: usize,
words_doc_indexes: &mut BTreeMap<Word, Vec<DocIndex>>,
docs_words: &mut HashMap<DocumentId, Vec<Word>>,
) -> bool
{
if token.word_index >= word_limit { return false }
match token_to_docindex(id, attr, token) {
Some(docindex) => {
let word = Vec::from(token.word);
words_doc_indexes.entry(word.clone()).or_insert_with(Vec::new).push(docindex);
docs_words.entry(id).or_insert_with(Vec::new).push(word);
},
None => return false,
}
true
}
fn token_to_docindex(id: DocumentId, attr: SchemaAttr, token: Token) -> Option<DocIndex> {
let word_index = u16::try_from(token.word_index).ok()?;
let char_index = u16::try_from(token.char_index).ok()?;
let char_length = u16::try_from(token.word.chars().count()).ok()?;
let docindex = DocIndex {
document_id: id,
attribute: attr.0,
word_index,
char_index,
char_length,
};
Some(docindex)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn strange_apostrophe() {
let mut indexer = Indexer::new();
let docid = DocumentId(0);
let attr = SchemaAttr(0);
let text = "Zut, laspirateur, jai oublié de léteindre !";
indexer.index_text(docid, attr, text);
let Indexed { words_doc_indexes, .. } = indexer.build();
assert!(words_doc_indexes.get(&b"l"[..]).is_some());
assert!(words_doc_indexes.get(&b"aspirateur"[..]).is_some());
assert!(words_doc_indexes.get(&b"ai"[..]).is_some());
assert!(words_doc_indexes.get(&b"eteindre"[..]).is_some());
// with the ugly apostrophe...
assert!(words_doc_indexes.get(&"léteindre".to_owned().into_bytes()).is_some());
}
#[test]
fn strange_apostrophe_in_sequence() {
let mut indexer = Indexer::new();
let docid = DocumentId(0);
let attr = SchemaAttr(0);
let text = vec!["Zut, laspirateur, jai oublié de léteindre !"];
indexer.index_text_seq(docid, attr, text);
let Indexed { words_doc_indexes, .. } = indexer.build();
assert!(words_doc_indexes.get(&b"l"[..]).is_some());
assert!(words_doc_indexes.get(&b"aspirateur"[..]).is_some());
assert!(words_doc_indexes.get(&b"ai"[..]).is_some());
assert!(words_doc_indexes.get(&b"eteindre"[..]).is_some());
// with the ugly apostrophe...
assert!(words_doc_indexes.get(&"léteindre".to_owned().into_bytes()).is_some());
}
}

View File

@ -1,20 +0,0 @@
mod cf_tree;
mod database;
mod document_attr_key;
mod indexer;
mod number;
mod ranked_map;
mod serde;
pub use self::cf_tree::{CfTree, CfIter};
pub use self::database::{
Database, Index, CustomSettingsIndex, RankingOrdering,
StopWords, RankingOrder, DistinctField, RankingRules,
UpdateType, DetailedDuration, UpdateResult, UpdateStatus,
Error,
};
pub use self::number::Number;
pub use self::ranked_map::RankedMap;
pub use self::serde::{compute_document_id, extract_document_id, value_to_string};
pub type RocksDbResult<T> = Result<T, rocksdb::Error>;

View File

@ -1,55 +0,0 @@
use std::num::{ParseIntError, ParseFloatError};
use std::str::FromStr;
use std::fmt;
use ordered_float::OrderedFloat;
use serde::{Serialize, Deserialize};
#[derive(Serialize, Deserialize)]
#[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub enum Number {
Unsigned(u64),
Signed(i64),
Float(OrderedFloat<f64>),
}
impl FromStr for Number {
type Err = ParseNumberError;
fn from_str(s: &str) -> Result<Self, Self::Err> {
let uint_error = match u64::from_str(s) {
Ok(unsigned) => return Ok(Number::Unsigned(unsigned)),
Err(error) => error,
};
let int_error = match i64::from_str(s) {
Ok(signed) => return Ok(Number::Signed(signed)),
Err(error) => error,
};
let float_error = match f64::from_str(s) {
Ok(float) => return Ok(Number::Float(OrderedFloat(float))),
Err(error) => error,
};
Err(ParseNumberError { uint_error, int_error, float_error })
}
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct ParseNumberError {
uint_error: ParseIntError,
int_error: ParseIntError,
float_error: ParseFloatError,
}
impl fmt::Display for ParseNumberError {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
if self.uint_error == self.int_error {
write!(f, "can not parse number: {}, {}", self.uint_error, self.float_error)
} else {
write!(f, "can not parse number: {}, {}, {}",
self.uint_error, self.int_error, self.float_error)
}
}
}

View File

@ -1,36 +0,0 @@
use std::io::{Read, Write};
use hashbrown::HashMap;
use meilidb_core::DocumentId;
use meilidb_schema::SchemaAttr;
use crate::Number;
#[derive(Debug, Default, Clone, PartialEq, Eq)]
pub struct RankedMap(HashMap<(DocumentId, SchemaAttr), Number>);
impl RankedMap {
pub fn len(&self) -> usize {
self.0.len()
}
pub fn insert(&mut self, document: DocumentId, attribute: SchemaAttr, number: Number) {
self.0.insert((document, attribute), number);
}
pub fn remove(&mut self, document: DocumentId, attribute: SchemaAttr) {
self.0.remove(&(document, attribute));
}
pub fn get(&self, document: DocumentId, attribute: SchemaAttr) -> Option<Number> {
self.0.get(&(document, attribute)).cloned()
}
pub fn read_from_bin<R: Read>(reader: R) -> bincode::Result<RankedMap> {
bincode::deserialize_from(reader).map(RankedMap)
}
pub fn write_to_bin<W: Write>(&self, writer: W) -> bincode::Result<()> {
bincode::serialize_into(writer, &self.0)
}
}

View File

@ -1,132 +0,0 @@
use std::collections::HashSet;
use std::io::Cursor;
use std::{fmt, error::Error};
use meilidb_core::DocumentId;
use meilidb_schema::SchemaAttr;
use rmp_serde::decode::{Deserializer as RmpDeserializer, ReadReader};
use rmp_serde::decode::{Error as RmpError};
use serde::{de, forward_to_deserialize_any};
use crate::database::Index;
#[derive(Debug)]
pub enum DeserializerError {
RmpError(RmpError),
RocksDbError(rocksdb::Error),
Custom(String),
}
impl de::Error for DeserializerError {
fn custom<T: fmt::Display>(msg: T) -> Self {
DeserializerError::Custom(msg.to_string())
}
}
impl fmt::Display for DeserializerError {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match self {
DeserializerError::RmpError(e) => write!(f, "rmp serde related error: {}", e),
DeserializerError::RocksDbError(e) => write!(f, "RocksDB related error: {}", e),
DeserializerError::Custom(s) => f.write_str(s),
}
}
}
impl Error for DeserializerError {}
impl From<RmpError> for DeserializerError {
fn from(error: RmpError) -> DeserializerError {
DeserializerError::RmpError(error)
}
}
impl From<rocksdb::Error> for DeserializerError {
fn from(error: rocksdb::Error) -> DeserializerError {
DeserializerError::RocksDbError(error)
}
}
pub struct Deserializer<'a> {
pub document_id: DocumentId,
pub index: &'a Index,
pub fields: Option<&'a HashSet<SchemaAttr>>,
}
impl<'de, 'a, 'b> de::Deserializer<'de> for &'b mut Deserializer<'a>
{
type Error = DeserializerError;
fn deserialize_any<V>(self, visitor: V) -> Result<V::Value, Self::Error>
where V: de::Visitor<'de>
{
self.deserialize_map(visitor)
}
forward_to_deserialize_any! {
bool i8 i16 i32 i64 i128 u8 u16 u32 u64 u128 f32 f64 char str string
bytes byte_buf option unit unit_struct newtype_struct seq tuple
tuple_struct struct enum identifier ignored_any
}
fn deserialize_map<V>(self, visitor: V) -> Result<V::Value, Self::Error>
where V: de::Visitor<'de>
{
let schema = self.index.schema();
let documents = self.index.as_ref().documents_index;
let iter = documents
.document_fields(self.document_id)?
.filter_map(|(attr, value)| {
let is_displayed = schema.props(attr).is_displayed();
if is_displayed && self.fields.map_or(true, |f| f.contains(&attr)) {
let attribute_name = schema.attribute_name(attr);
Some((attribute_name, Value::new(value)))
} else {
None
}
});
let map_deserializer = de::value::MapDeserializer::new(iter);
let result = visitor.visit_map(map_deserializer).map_err(DeserializerError::from);
result
}
}
struct Value<A>(RmpDeserializer<ReadReader<Cursor<A>>>) where A: AsRef<[u8]>;
impl<A> Value<A> where A: AsRef<[u8]>
{
fn new(value: A) -> Value<A> {
Value(RmpDeserializer::new(Cursor::new(value)))
}
}
impl<'de, A> de::IntoDeserializer<'de, RmpError> for Value<A>
where A: AsRef<[u8]>,
{
type Deserializer = Self;
fn into_deserializer(self) -> Self::Deserializer {
self
}
}
impl<'de, 'a, A> de::Deserializer<'de> for Value<A>
where A: AsRef<[u8]>,
{
type Error = RmpError;
fn deserialize_any<V>(mut self, visitor: V) -> Result<V::Value, Self::Error>
where V: de::Visitor<'de>
{
self.0.deserialize_any(visitor)
}
forward_to_deserialize_any! {
bool i8 i16 i32 i64 i128 u8 u16 u32 u64 u128 f32 f64 char str string
bytes byte_buf option unit unit_struct newtype_struct seq tuple
tuple_struct map struct enum identifier ignored_any
}
}

View File

@ -1,15 +0,0 @@
use meilidb_data::{Database};
use meilidb_data::Index;
use meilidb_schema::{SchemaBuilder, DISPLAYED, INDEXED};
pub fn simple_index() -> Index {
let tmp_dir = tempfile::tempdir().unwrap();
let database = Database::open(&tmp_dir).unwrap();
let mut builder = SchemaBuilder::with_identifier("objectId");
builder.new_attribute("objectId", DISPLAYED | INDEXED);
builder.new_attribute("title", DISPLAYED | INDEXED);
let schema = builder.build();
database.create_index("hello", schema).unwrap()
}

View File

@ -1,43 +0,0 @@
#[macro_use] extern crate maplit;
mod common;
use big_s::S;
use meilidb_data::RankingOrdering;
#[test]
fn stop_words() {
let index = common::simple_index();
let stop_words = hashset!{ S("le"), S("la"), S("les"), };
index.custom_settings().set_stop_words(&stop_words).unwrap();
let ret_stop_words = index.custom_settings().get_stop_words().unwrap().unwrap();
assert_eq!(ret_stop_words, stop_words);
}
#[test]
fn ranking_order() {
let index = common::simple_index();
let ranking_order = vec![S("SumOfTypos"), S("NumberOfWords"), S("WordsProximity"), S("SumOfWordsAttribute"), S("SumOfWordsPosition"), S("Exact"), S("DocumentId")];
index.custom_settings().set_ranking_order(&ranking_order).unwrap();
let ret_ranking_orderer = index.custom_settings().get_ranking_order().unwrap().unwrap();
assert_eq!(ret_ranking_orderer, ranking_order);
}
#[test]
fn distinct_field() {
let index = common::simple_index();
let distinct_field = S("title");
index.custom_settings().set_distinct_field(&distinct_field).unwrap();
let ret_distinct_field = index.custom_settings().get_distinct_field().unwrap().unwrap();
assert_eq!(ret_distinct_field, distinct_field);
}
#[test]
fn ranking_rules() {
let index = common::simple_index();
let ranking_rules = hashmap!{ S("objectId") => RankingOrdering::Asc };
index.custom_settings().set_ranking_rules(&ranking_rules).unwrap();
let ret_ranking_rules = index.custom_settings().get_ranking_rules().unwrap().unwrap();
assert_eq!(ret_ranking_rules, ranking_rules);
}

View File

@ -1,67 +0,0 @@
#[macro_use] extern crate maplit;
mod common;
use std::sync::atomic::{AtomicBool, Ordering::Relaxed};
use std::sync::Arc;
use big_s::S;
use serde_json::json;
#[test]
fn database_stats() {
let index = common::simple_index();
let as_been_updated = Arc::new(AtomicBool::new(false));
let as_been_updated_clone = as_been_updated.clone();
index.set_update_callback(move |_| as_been_updated_clone.store(true, Relaxed));
let doc1 = json!({ "objectId": 123, "title": "hello" });
let mut addition = index.documents_addition();
addition.update_document(&doc1);
let update_id = addition.finalize().unwrap();
let status = index.update_status_blocking(update_id).unwrap();
assert!(as_been_updated.swap(false, Relaxed));
assert!(status.result.is_ok());
let stats = index.stats().unwrap();
let repartition = hashmap!{
S("objectId") => 1u64,
S("title") => 1u64,
};
assert_eq!(stats.number_of_documents, 1);
assert_eq!(stats.documents_fields_repartition, repartition);
let doc2 = json!({ "objectId": 456, "title": "world" });
let mut addition = index.documents_addition();
addition.update_document(&doc2);
let update_id = addition.finalize().unwrap();
let status = index.update_status_blocking(update_id).unwrap();
assert!(as_been_updated.swap(false, Relaxed));
assert!(status.result.is_ok());
let stats = index.stats().unwrap();
let repartition = hashmap!{
S("objectId") => 2u64,
S("title") => 2u64,
};
assert_eq!(stats.number_of_documents, 2);
assert_eq!(stats.documents_fields_repartition, repartition);
let doc3 = json!({ "objectId": 789 });
let mut addition = index.documents_addition();
addition.update_document(&doc3);
let update_id = addition.finalize().unwrap();
let status = index.update_status_blocking(update_id).unwrap();
assert!(as_been_updated.swap(false, Relaxed));
assert!(status.result.is_ok());
let stats = index.stats().unwrap();
let repartition = hashmap!{
S("objectId") => 3u64,
S("title") => 2u64,
};
assert_eq!(stats.number_of_documents, 3);
assert_eq!(stats.documents_fields_repartition, repartition);
}

View File

@ -1,99 +0,0 @@
mod common;
use std::sync::atomic::{AtomicBool, Ordering::Relaxed};
use std::sync::Arc;
use serde_json::json;
#[test]
fn insert_delete_document() {
let index = common::simple_index();
let as_been_updated = Arc::new(AtomicBool::new(false));
let as_been_updated_clone = as_been_updated.clone();
index.set_update_callback(move |_| as_been_updated_clone.store(true, Relaxed));
let doc1 = json!({ "objectId": 123, "title": "hello" });
let mut addition = index.documents_addition();
addition.update_document(&doc1);
let update_id = addition.finalize().unwrap();
let status = index.update_status_blocking(update_id).unwrap();
assert!(as_been_updated.swap(false, Relaxed));
assert!(status.result.is_ok());
assert_eq!(index.number_of_documents(), 1);
let docs = index.query_builder().query("hello", 0..10).unwrap();
assert_eq!(docs.len(), 1);
assert_eq!(index.document(None, docs[0].id).unwrap().as_ref(), Some(&doc1));
let mut deletion = index.documents_deletion();
deletion.delete_document(&doc1).unwrap();
let update_id = deletion.finalize().unwrap();
let status = index.update_status_blocking(update_id).unwrap();
assert!(as_been_updated.swap(false, Relaxed));
assert!(status.result.is_ok());
assert_eq!(index.number_of_documents(), 0);
let docs = index.query_builder().query("hello", 0..10).unwrap();
assert_eq!(docs.len(), 0);
}
#[test]
fn replace_document() {
let index = common::simple_index();
let as_been_updated = Arc::new(AtomicBool::new(false));
let as_been_updated_clone = as_been_updated.clone();
index.set_update_callback(move |_| as_been_updated_clone.store(true, Relaxed));
let doc1 = json!({ "objectId": 123, "title": "hello" });
let doc2 = json!({ "objectId": 123, "title": "coucou" });
let mut addition = index.documents_addition();
addition.update_document(&doc1);
let update_id = addition.finalize().unwrap();
let status = index.update_status_blocking(update_id).unwrap();
assert!(as_been_updated.swap(false, Relaxed));
assert!(status.result.is_ok());
assert_eq!(index.number_of_documents(), 1);
let docs = index.query_builder().query("hello", 0..10).unwrap();
assert_eq!(docs.len(), 1);
assert_eq!(index.document(None, docs[0].id).unwrap().as_ref(), Some(&doc1));
let mut addition = index.documents_addition();
addition.update_document(&doc2);
let update_id = addition.finalize().unwrap();
let status = index.update_status_blocking(update_id).unwrap();
assert!(as_been_updated.swap(false, Relaxed));
assert!(status.result.is_ok());
assert_eq!(index.number_of_documents(), 1);
let docs = index.query_builder().query("hello", 0..10).unwrap();
assert_eq!(docs.len(), 0);
let docs = index.query_builder().query("coucou", 0..10).unwrap();
assert_eq!(docs.len(), 1);
assert_eq!(index.document(None, docs[0].id).unwrap().as_ref(), Some(&doc2));
}
#[test]
fn documents_ids() {
let index = common::simple_index();
let doc1 = json!({ "objectId": 123, "title": "hello" });
let doc2 = json!({ "objectId": 456, "title": "world" });
let doc3 = json!({ "objectId": 789 });
let mut addition = index.documents_addition();
addition.update_document(&doc1);
addition.update_document(&doc2);
addition.update_document(&doc3);
let update_id = addition.finalize().unwrap();
let status = index.update_status_blocking(update_id).unwrap();
assert!(status.result.is_ok());
let documents_ids_count = index.documents_ids().unwrap().count();
assert_eq!(documents_ids_count, 3);
}

View File

@ -1,12 +0,0 @@
[package]
name = "meilidb-schema"
version = "0.1.0"
authors = ["Kerollmops <renault.cle@gmail.com>"]
edition = "2018"
[dependencies]
bincode = "1.1.2"
indexmap = { version = "1.1.0", features = ["serde-1"] }
serde = { version = "1.0.91", features = ["derive"] }
serde_json = { version = "1.0.39", features = ["preserve_order"] }
toml = { version = "0.5.0", features = ["preserve_order"] }

View File

@ -1,285 +0,0 @@
use std::collections::{HashMap, BTreeMap};
use std::{fmt, u16};
use std::ops::BitOr;
use std::sync::Arc;
use serde::{Serialize, Deserialize};
use indexmap::IndexMap;
pub const DISPLAYED: SchemaProps = SchemaProps { displayed: true, indexed: false, ranked: false };
pub const INDEXED: SchemaProps = SchemaProps { displayed: false, indexed: true, ranked: false };
pub const RANKED: SchemaProps = SchemaProps { displayed: false, indexed: false, ranked: true };
#[derive(Debug, Copy, Clone, PartialEq, Eq, Serialize, Deserialize)]
pub struct SchemaProps {
#[serde(default)]
pub displayed: bool,
#[serde(default)]
pub indexed: bool,
#[serde(default)]
pub ranked: bool,
}
impl SchemaProps {
pub fn is_displayed(self) -> bool {
self.displayed
}
pub fn is_indexed(self) -> bool {
self.indexed
}
pub fn is_ranked(self) -> bool {
self.ranked
}
}
impl BitOr for SchemaProps {
type Output = Self;
fn bitor(self, other: Self) -> Self::Output {
SchemaProps {
displayed: self.displayed | other.displayed,
indexed: self.indexed | other.indexed,
ranked: self.ranked | other.ranked,
}
}
}
#[derive(Serialize, Deserialize)]
pub struct SchemaBuilder {
identifier: String,
attributes: IndexMap<String, SchemaProps>,
}
impl SchemaBuilder {
pub fn with_identifier<S: Into<String>>(name: S) -> SchemaBuilder {
SchemaBuilder {
identifier: name.into(),
attributes: IndexMap::new(),
}
}
pub fn new_attribute<S: Into<String>>(&mut self, name: S, props: SchemaProps) -> SchemaAttr {
let len = self.attributes.len();
if self.attributes.insert(name.into(), props).is_some() {
panic!("Field already inserted.")
}
SchemaAttr(len as u16)
}
pub fn build(self) -> Schema {
let mut attrs = HashMap::new();
let mut props = Vec::new();
for (i, (name, prop)) in self.attributes.into_iter().enumerate() {
attrs.insert(name.clone(), SchemaAttr(i as u16));
props.push((name, prop));
}
let identifier = self.identifier;
Schema { inner: Arc::new(InnerSchema { identifier, attrs, props }) }
}
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct Schema {
inner: Arc<InnerSchema>,
}
#[derive(Debug, Clone, PartialEq, Eq)]
struct InnerSchema {
identifier: String,
attrs: HashMap<String, SchemaAttr>,
props: Vec<(String, SchemaProps)>,
}
impl Schema {
fn to_builder(&self) -> SchemaBuilder {
let identifier = self.inner.identifier.clone();
let attributes = self.attributes_ordered();
SchemaBuilder { identifier, attributes }
}
fn attributes_ordered(&self) -> IndexMap<String, SchemaProps> {
let mut ordered = BTreeMap::new();
for (name, attr) in &self.inner.attrs {
let (_, props) = self.inner.props[attr.0 as usize];
ordered.insert(attr.0, (name, props));
}
let mut attributes = IndexMap::with_capacity(ordered.len());
for (_, (name, props)) in ordered {
attributes.insert(name.clone(), props);
}
attributes
}
pub fn props(&self, attr: SchemaAttr) -> SchemaProps {
let (_, props) = self.inner.props[attr.0 as usize];
props
}
pub fn identifier_name(&self) -> &str {
&self.inner.identifier
}
pub fn attribute<S: AsRef<str>>(&self, name: S) -> Option<SchemaAttr> {
self.inner.attrs.get(name.as_ref()).cloned()
}
pub fn attribute_name(&self, attr: SchemaAttr) -> &str {
let (name, _) = &self.inner.props[attr.0 as usize];
name
}
pub fn iter<'a>(&'a self) -> impl Iterator<Item=(&str, SchemaAttr, SchemaProps)> + 'a {
self.inner.props.iter()
.map(move |(name, prop)| {
let attr = self.inner.attrs.get(name).unwrap();
(name.as_str(), *attr, *prop)
})
}
}
impl Serialize for Schema {
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
where S: serde::ser::Serializer,
{
self.to_builder().serialize(serializer)
}
}
impl<'de> Deserialize<'de> for Schema {
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
where D: serde::de::Deserializer<'de>,
{
let builder = SchemaBuilder::deserialize(deserializer)?;
Ok(builder.build())
}
}
#[derive(Serialize, Deserialize)]
#[derive(Debug, Copy, Clone, PartialOrd, Ord, PartialEq, Eq, Hash)]
pub struct SchemaAttr(pub u16);
impl SchemaAttr {
pub const fn new(value: u16) -> SchemaAttr {
SchemaAttr(value)
}
pub const fn min() -> SchemaAttr {
SchemaAttr(u16::min_value())
}
pub const fn max() -> SchemaAttr {
SchemaAttr(u16::max_value())
}
pub fn next(self) -> Option<SchemaAttr> {
self.0.checked_add(1).map(SchemaAttr)
}
pub fn prev(self) -> Option<SchemaAttr> {
self.0.checked_sub(1).map(SchemaAttr)
}
}
impl fmt::Display for SchemaAttr {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
self.0.fmt(f)
}
}
#[cfg(test)]
mod tests {
use super::*;
use std::error::Error;
#[test]
fn serialize_deserialize() -> bincode::Result<()> {
let mut builder = SchemaBuilder::with_identifier("id");
builder.new_attribute("alpha", DISPLAYED);
builder.new_attribute("beta", DISPLAYED | INDEXED);
builder.new_attribute("gamma", INDEXED);
let schema = builder.build();
let mut buffer = Vec::new();
bincode::serialize_into(&mut buffer, &schema)?;
let schema2 = bincode::deserialize_from(buffer.as_slice())?;
assert_eq!(schema, schema2);
Ok(())
}
#[test]
fn serialize_deserialize_toml() -> Result<(), Box<dyn Error>> {
let mut builder = SchemaBuilder::with_identifier("id");
builder.new_attribute("alpha", DISPLAYED);
builder.new_attribute("beta", DISPLAYED | INDEXED);
builder.new_attribute("gamma", INDEXED);
let schema = builder.build();
let buffer = toml::to_vec(&schema)?;
let schema2 = toml::from_slice(buffer.as_slice())?;
assert_eq!(schema, schema2);
let data = r#"
identifier = "id"
[attributes."alpha"]
displayed = true
[attributes."beta"]
displayed = true
indexed = true
[attributes."gamma"]
indexed = true
"#;
let schema2 = toml::from_str(data)?;
assert_eq!(schema, schema2);
Ok(())
}
#[test]
fn serialize_deserialize_json() -> Result<(), Box<dyn Error>> {
let mut builder = SchemaBuilder::with_identifier("id");
builder.new_attribute("alpha", DISPLAYED);
builder.new_attribute("beta", DISPLAYED | INDEXED);
builder.new_attribute("gamma", INDEXED);
let schema = builder.build();
let buffer = serde_json::to_vec(&schema)?;
let schema2 = serde_json::from_slice(buffer.as_slice())?;
assert_eq!(schema, schema2);
let data = r#"
{
"identifier": "id",
"attributes": {
"alpha": {
"displayed": true
},
"beta": {
"displayed": true,
"indexed": true
},
"gamma": {
"indexed": true
}
}
}"#;
let schema2 = serde_json::from_str(data)?;
assert_eq!(schema, schema2);
Ok(())
}
}

View File

@ -1,8 +0,0 @@
[package]
name = "meilidb-tokenizer"
version = "0.1.0"
authors = ["Kerollmops <renault.cle@gmail.com>"]
edition = "2018"
[dependencies]
slice-group-by = "0.2.4"

View File

@ -1,295 +0,0 @@
use std::iter::Peekable;
use slice_group_by::StrGroupBy;
use self::SeparatorCategory::*;
pub fn is_cjk(c: char) -> bool {
(c >= '\u{2e80}' && c <= '\u{2eff}') ||
(c >= '\u{2f00}' && c <= '\u{2fdf}') ||
(c >= '\u{3040}' && c <= '\u{309f}') ||
(c >= '\u{30a0}' && c <= '\u{30ff}') ||
(c >= '\u{3100}' && c <= '\u{312f}') ||
(c >= '\u{3200}' && c <= '\u{32ff}') ||
(c >= '\u{3400}' && c <= '\u{4dbf}') ||
(c >= '\u{4e00}' && c <= '\u{9fff}') ||
(c >= '\u{f900}' && c <= '\u{faff}')
}
#[derive(Debug, Copy, Clone, PartialEq, Eq)]
enum SeparatorCategory {
Soft,
Hard,
}
impl SeparatorCategory {
fn merge(self, other: SeparatorCategory) -> SeparatorCategory {
if let (Soft, Soft) = (self, other) { Soft } else { Hard }
}
fn to_usize(self) -> usize {
match self {
Soft => 1,
Hard => 8,
}
}
}
fn is_separator(c: char) -> bool {
classify_separator(c).is_some()
}
fn classify_separator(c: char) -> Option<SeparatorCategory> {
match c {
' ' | '\'' | '"' => Some(Soft),
'.' | ';' | ',' | '!' | '?' | '-' | '(' | ')' => Some(Hard),
_ => None,
}
}
#[derive(Debug, Copy, Clone, PartialEq, Eq)]
enum CharCategory {
Separator(SeparatorCategory),
Cjk,
Other,
}
fn classify_char(c: char) -> CharCategory {
if let Some(category) = classify_separator(c) {
CharCategory::Separator(category)
} else if is_cjk(c) {
CharCategory::Cjk
} else {
CharCategory::Other
}
}
fn is_str_word(s: &str) -> bool {
!s.chars().any(is_separator)
}
fn same_group_category(a: char, b: char) -> bool {
match (classify_char(a), classify_char(b)) {
(CharCategory::Cjk, _) | (_, CharCategory::Cjk) => false,
(CharCategory::Separator(_), CharCategory::Separator(_)) => true,
(a, b) => a == b,
}
}
// fold the number of chars along with the index position
fn chars_count_index((n, _): (usize, usize), (i, c): (usize, char)) -> (usize, usize) {
(n + 1, i + c.len_utf8())
}
pub fn split_query_string(query: &str) -> impl Iterator<Item=&str> {
Tokenizer::new(query).map(|t| t.word)
}
#[derive(Debug, Copy, Clone, PartialEq, Eq)]
pub struct Token<'a> {
pub word: &'a str,
pub word_index: usize,
pub char_index: usize,
}
pub struct Tokenizer<'a> {
inner: &'a str,
word_index: usize,
char_index: usize,
}
impl<'a> Tokenizer<'a> {
pub fn new(string: &str) -> Tokenizer {
// skip every separator and set `char_index`
// to the number of char trimmed
let (count, index) = string.char_indices()
.take_while(|(_, c)| is_separator(*c))
.fold((0, 0), chars_count_index);
Tokenizer {
inner: &string[index..],
word_index: 0,
char_index: count,
}
}
}
impl<'a> Iterator for Tokenizer<'a> {
type Item = Token<'a>;
fn next(&mut self) -> Option<Self::Item> {
let mut iter = self.inner.linear_group_by(same_group_category).peekable();
while let (Some(string), next_string) = (iter.next(), iter.peek()) {
let (count, index) = string.char_indices().fold((0, 0), chars_count_index);
if !is_str_word(string) {
self.word_index += string.chars()
.filter_map(classify_separator)
.fold(Soft, |a, x| a.merge(x))
.to_usize();
self.char_index += count;
self.inner = &self.inner[index..];
continue;
}
let token = Token {
word: string,
word_index: self.word_index,
char_index: self.char_index,
};
if next_string.filter(|s| is_str_word(s)).is_some() {
self.word_index += 1;
}
self.char_index += count;
self.inner = &self.inner[index..];
return Some(token);
}
self.inner = "";
None
}
}
pub struct SeqTokenizer<'a, I>
where I: Iterator<Item=&'a str>,
{
inner: I,
current: Option<Peekable<Tokenizer<'a>>>,
word_offset: usize,
char_offset: usize,
}
impl<'a, I> SeqTokenizer<'a, I>
where I: Iterator<Item=&'a str>,
{
pub fn new(mut iter: I) -> SeqTokenizer<'a, I> {
let current = iter.next().map(|s| Tokenizer::new(s).peekable());
SeqTokenizer {
inner: iter,
current: current,
word_offset: 0,
char_offset: 0,
}
}
}
impl<'a, I> Iterator for SeqTokenizer<'a, I>
where I: Iterator<Item=&'a str>,
{
type Item = Token<'a>;
fn next(&mut self) -> Option<Self::Item> {
match &mut self.current {
Some(current) => {
match current.next() {
Some(token) => {
// we must apply the word and char offsets
// to the token before returning it
let token = Token {
word: token.word,
word_index: token.word_index + self.word_offset,
char_index: token.char_index + self.char_offset,
};
// if this is the last iteration on this text
// we must save the offsets for next texts
if current.peek().is_none() {
let hard_space = SeparatorCategory::Hard.to_usize();
self.word_offset = token.word_index + hard_space;
self.char_offset = token.char_index + hard_space;
}
Some(token)
},
None => {
// no more words in this text we must
// start tokenizing the next text
self.current = self.inner.next().map(|s| Tokenizer::new(s).peekable());
self.next()
},
}
},
// no more texts available
None => None,
}
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn easy() {
let mut tokenizer = Tokenizer::new("salut");
assert_eq!(tokenizer.next(), Some(Token { word: "salut", word_index: 0, char_index: 0 }));
assert_eq!(tokenizer.next(), None);
let mut tokenizer = Tokenizer::new("yo ");
assert_eq!(tokenizer.next(), Some(Token { word: "yo", word_index: 0, char_index: 0 }));
assert_eq!(tokenizer.next(), None);
}
#[test]
fn hard() {
let mut tokenizer = Tokenizer::new(" .? yo lolo. aïe (ouch)");
assert_eq!(tokenizer.next(), Some(Token { word: "yo", word_index: 0, char_index: 4 }));
assert_eq!(tokenizer.next(), Some(Token { word: "lolo", word_index: 1, char_index: 7 }));
assert_eq!(tokenizer.next(), Some(Token { word: "aïe", word_index: 9, char_index: 13 }));
assert_eq!(tokenizer.next(), Some(Token { word: "ouch", word_index: 17, char_index: 18 }));
assert_eq!(tokenizer.next(), None);
let mut tokenizer = Tokenizer::new("yo ! lolo ? wtf - lol . aïe ,");
assert_eq!(tokenizer.next(), Some(Token { word: "yo", word_index: 0, char_index: 0 }));
assert_eq!(tokenizer.next(), Some(Token { word: "lolo", word_index: 8, char_index: 5 }));
assert_eq!(tokenizer.next(), Some(Token { word: "wtf", word_index: 16, char_index: 12 }));
assert_eq!(tokenizer.next(), Some(Token { word: "lol", word_index: 24, char_index: 18 }));
assert_eq!(tokenizer.next(), Some(Token { word: "aïe", word_index: 32, char_index: 24 }));
assert_eq!(tokenizer.next(), None);
}
#[test]
fn hard_long_chars() {
let mut tokenizer = Tokenizer::new(" .? yo 😂. aïe");
assert_eq!(tokenizer.next(), Some(Token { word: "yo", word_index: 0, char_index: 4 }));
assert_eq!(tokenizer.next(), Some(Token { word: "😂", word_index: 1, char_index: 7 }));
assert_eq!(tokenizer.next(), Some(Token { word: "aïe", word_index: 9, char_index: 10 }));
assert_eq!(tokenizer.next(), None);
let mut tokenizer = Tokenizer::new("yo ! lolo ? 😱 - lol . 😣 ,");
assert_eq!(tokenizer.next(), Some(Token { word: "yo", word_index: 0, char_index: 0 }));
assert_eq!(tokenizer.next(), Some(Token { word: "lolo", word_index: 8, char_index: 5 }));
assert_eq!(tokenizer.next(), Some(Token { word: "😱", word_index: 16, char_index: 12 }));
assert_eq!(tokenizer.next(), Some(Token { word: "lol", word_index: 24, char_index: 16 }));
assert_eq!(tokenizer.next(), Some(Token { word: "😣", word_index: 32, char_index: 22 }));
assert_eq!(tokenizer.next(), None);
}
#[test]
fn hard_kanjis() {
let mut tokenizer = Tokenizer::new("\u{2ec4}lolilol\u{2ec7}");
assert_eq!(tokenizer.next(), Some(Token { word: "\u{2ec4}", word_index: 0, char_index: 0 }));
assert_eq!(tokenizer.next(), Some(Token { word: "lolilol", word_index: 1, char_index: 1 }));
assert_eq!(tokenizer.next(), Some(Token { word: "\u{2ec7}", word_index: 2, char_index: 8 }));
assert_eq!(tokenizer.next(), None);
let mut tokenizer = Tokenizer::new("\u{2ec4}\u{2ed3}\u{2ef2} lolilol - hello \u{2ec7}");
assert_eq!(tokenizer.next(), Some(Token { word: "\u{2ec4}", word_index: 0, char_index: 0 }));
assert_eq!(tokenizer.next(), Some(Token { word: "\u{2ed3}", word_index: 1, char_index: 1 }));
assert_eq!(tokenizer.next(), Some(Token { word: "\u{2ef2}", word_index: 2, char_index: 2 }));
assert_eq!(tokenizer.next(), Some(Token { word: "lolilol", word_index: 3, char_index: 4 }));
assert_eq!(tokenizer.next(), Some(Token { word: "hello", word_index: 11, char_index: 14 }));
assert_eq!(tokenizer.next(), Some(Token { word: "\u{2ec7}", word_index: 12, char_index: 23 }));
assert_eq!(tokenizer.next(), None);
}
}

View File

@ -1,29 +0,0 @@
[package]
edition = "2018"
name = "meilidb"
version = "0.3.1"
authors = ["Kerollmops <renault.cle@gmail.com>"]
[dependencies]
meilidb-core = { path = "../meilidb-core", version = "0.1.0" }
meilidb-data = { path = "../meilidb-data", version = "0.1.0" }
meilidb-schema = { path = "../meilidb-schema", version = "0.1.0" }
[dev-dependencies]
csv = "1.0.7"
diskus = "0.5.0"
env_logger = "0.6.1"
indexmap = { version = "1.1.0", features = ["serde-1"] }
jemallocator = "0.3.2"
meilidb-core = { path = "../meilidb-core", version = "0.1.0" }
quickcheck = "0.9.0"
rand = "0.7.2"
rand_xorshift = "0.2.0"
rustyline = { version = "5.0.0", default-features = false }
serde = { version = "1.0.91" , features = ["derive"] }
serde_json = "1.0.39"
structopt = "0.3.2"
sysinfo = "0.9.5"
tempfile = "3.0.7"
termcolor = "1.0.4"
toml = "0.5.3"

View File

@ -1,215 +0,0 @@
#[global_allocator]
static ALLOC: jemallocator::Jemalloc = jemallocator::Jemalloc;
use std::collections::{HashMap, HashSet};
use std::io::{self, BufRead, BufReader};
use std::path::{Path, PathBuf};
use std::time::Instant;
use std::error::Error;
use std::fs::{self, File};
use diskus::Walk;
use sysinfo::{SystemExt, ProcessExt};
use serde::{Serialize, Deserialize};
use structopt::StructOpt;
use meilidb_data::Database;
use meilidb_schema::Schema;
#[derive(Debug, StructOpt)]
pub struct Opt {
/// The destination where the database must be created.
#[structopt(parse(from_os_str))]
pub database_path: PathBuf,
/// The csv file to index.
#[structopt(parse(from_os_str))]
pub csv_data_path: PathBuf,
/// The path to the schema.
#[structopt(long = "schema", parse(from_os_str))]
pub schema_path: PathBuf,
/// The file with the synonyms.
#[structopt(long = "synonyms", parse(from_os_str))]
pub synonyms: Option<PathBuf>,
/// The path to the list of stop words (one by line).
#[structopt(long = "stop-words", parse(from_os_str))]
pub stop_words: Option<PathBuf>,
#[structopt(long = "update-group-size")]
pub update_group_size: Option<usize>,
}
#[derive(Serialize, Deserialize)]
struct Document (
HashMap<String, String>
);
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(untagged)]
pub enum Synonym {
OneWay(SynonymOneWay),
MultiWay { synonyms: Vec<String> },
}
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct SynonymOneWay {
pub search_terms: String,
pub synonyms: Synonyms,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(untagged)]
pub enum Synonyms {
Multiple(Vec<String>),
Single(String),
}
fn read_synomys(path: &Path) -> Result<Vec<Synonym>, Box<dyn Error>> {
let file = File::open(path)?;
let synonyms = serde_json::from_reader(file)?;
Ok(synonyms)
}
fn index(
schema: Schema,
database_path: &Path,
csv_data_path: &Path,
update_group_size: Option<usize>,
stop_words: &HashSet<String>,
synonyms: Vec<Synonym>,
) -> Result<Database, Box<dyn Error>>
{
let database = Database::open(database_path)?;
let mut wtr = csv::Writer::from_path("./stats.csv").unwrap();
wtr.write_record(&["NumberOfDocuments", "DiskUsed", "MemoryUsed"])?;
let mut system = sysinfo::System::new();
let index = database.create_index("test", schema.clone())?;
let mut synonyms_adder = index.synonyms_addition();
for synonym in synonyms {
match synonym {
Synonym::OneWay(SynonymOneWay { search_terms, synonyms }) => {
let alternatives = match synonyms {
Synonyms::Multiple(alternatives) => alternatives,
Synonyms::Single(alternative) => vec![alternative],
};
synonyms_adder.add_synonym(search_terms, alternatives);
},
Synonym::MultiWay { mut synonyms } => {
for _ in 0..synonyms.len() {
if let Some((synonym, alternatives)) = synonyms.split_first() {
synonyms_adder.add_synonym(synonym, alternatives);
}
synonyms.rotate_left(1);
}
},
}
}
synonyms_adder.finalize()?;
let mut rdr = csv::Reader::from_path(csv_data_path)?;
let mut raw_record = csv::StringRecord::new();
let headers = rdr.headers()?.clone();
let mut i = 0;
let mut end_of_file = false;
while !end_of_file {
let mut update = index.documents_addition();
loop {
end_of_file = !rdr.read_record(&mut raw_record)?;
if end_of_file { break }
let document: Document = match raw_record.deserialize(Some(&headers)) {
Ok(document) => document,
Err(e) => {
eprintln!("{:?}", e);
continue;
}
};
update.update_document(document);
print!("\rindexing document {}", i);
i += 1;
if let Some(group_size) = update_group_size {
if i % group_size == 0 { break }
}
}
println!();
println!("committing update...");
update.finalize()?;
// write stats
let directory_size = Walk::new(&[database_path.to_owned()], 4).run();
system.refresh_all();
let pid = sysinfo::get_current_pid()?;
let memory = system.get_process(pid).unwrap().memory(); // in kb
wtr.write_record(&[i.to_string(), directory_size.to_string(), memory.to_string()])?;
wtr.flush()?;
}
Ok(database)
}
fn retrieve_stop_words(path: &Path) -> io::Result<HashSet<String>> {
let f = File::open(path)?;
let reader = BufReader::new(f);
let mut words = HashSet::new();
for line in reader.lines() {
let line = line?;
let word = line.trim().to_string();
words.insert(word);
}
Ok(words)
}
fn main() -> Result<(), Box<dyn Error>> {
let _ = env_logger::init();
let opt = Opt::from_args();
let schema = {
let string = fs::read_to_string(&opt.schema_path)?;
toml::from_str(&string)?
};
let stop_words = match opt.stop_words {
Some(ref path) => retrieve_stop_words(path)?,
None => HashSet::new(),
};
let synonyms = match opt.synonyms {
Some(ref path) => read_synomys(path)?,
None => Vec::new(),
};
let start = Instant::now();
let result = index(
schema,
&opt.database_path,
&opt.csv_data_path,
opt.update_group_size,
&stop_words,
synonyms,
);
if let Err(e) = result {
return Err(e.into())
}
println!("database created in {:.2?} at: {:?}", start.elapsed(), opt.database_path);
Ok(())
}

View File

@ -1,229 +0,0 @@
#[global_allocator]
static ALLOC: jemallocator::Jemalloc = jemallocator::Jemalloc;
use std::collections::btree_map::{BTreeMap, Entry};
use std::collections::HashSet;
use std::error::Error;
use std::io::{self, Write};
use std::iter::FromIterator;
use std::path::PathBuf;
use std::time::{Instant, Duration};
use indexmap::IndexMap;
use rustyline::{Editor, Config};
use structopt::StructOpt;
use termcolor::{Color, ColorChoice, ColorSpec, StandardStream, WriteColor};
use meilidb_core::Highlight;
use meilidb_data::Database;
use meilidb_schema::SchemaAttr;
#[derive(Debug, StructOpt)]
pub struct Opt {
/// The destination where the database must be created
#[structopt(parse(from_os_str))]
pub database_path: PathBuf,
#[structopt(long = "fetch-timeout-ms")]
pub fetch_timeout_ms: Option<u64>,
/// Fields that must be displayed.
pub displayed_fields: Vec<String>,
/// The number of returned results
#[structopt(short = "n", long = "number-results", default_value = "10")]
pub number_results: usize,
/// The number of characters before and after the first match
#[structopt(short = "C", long = "context", default_value = "35")]
pub char_context: usize,
}
type Document = IndexMap<String, String>;
fn display_highlights(text: &str, ranges: &[usize]) -> io::Result<()> {
let mut stdout = StandardStream::stdout(ColorChoice::Always);
let mut highlighted = false;
for range in ranges.windows(2) {
let [start, end] = match range { [start, end] => [*start, *end], _ => unreachable!() };
if highlighted {
stdout.set_color(ColorSpec::new().set_fg(Some(Color::Yellow)))?;
}
write!(&mut stdout, "{}", &text[start..end])?;
stdout.reset()?;
highlighted = !highlighted;
}
Ok(())
}
fn char_to_byte_range(index: usize, length: usize, text: &str) -> (usize, usize) {
let mut byte_index = 0;
let mut byte_length = 0;
for (n, (i, c)) in text.char_indices().enumerate() {
if n == index {
byte_index = i;
}
if n + 1 == index + length {
byte_length = i - byte_index + c.len_utf8();
break;
}
}
(byte_index, byte_length)
}
fn create_highlight_areas(text: &str, highlights: &[Highlight]) -> Vec<usize> {
let mut byte_indexes = BTreeMap::new();
for highlight in highlights {
let char_index = highlight.char_index as usize;
let char_length = highlight.char_length as usize;
let (byte_index, byte_length) = char_to_byte_range(char_index, char_length, text);
match byte_indexes.entry(byte_index) {
Entry::Vacant(entry) => { entry.insert(byte_length); },
Entry::Occupied(mut entry) => {
if *entry.get() < byte_length {
entry.insert(byte_length);
}
},
}
}
let mut title_areas = Vec::new();
title_areas.push(0);
for (byte_index, length) in byte_indexes {
title_areas.push(byte_index);
title_areas.push(byte_index + length);
}
title_areas.push(text.len());
title_areas.sort_unstable();
title_areas
}
/// note: matches must have been sorted by `char_index` and `char_length` before being passed.
///
/// ```no_run
/// matches.sort_unstable_by_key(|m| (m.char_index, m.char_length));
///
/// let matches = matches.matches.iter().filter(|m| SchemaAttr::new(m.attribute) == attr).cloned();
///
/// let (text, matches) = crop_text(&text, matches, 35);
/// ```
fn crop_text(
text: &str,
highlights: impl IntoIterator<Item=Highlight>,
context: usize,
) -> (String, Vec<Highlight>)
{
let mut highlights = highlights.into_iter().peekable();
let char_index = highlights.peek().map(|m| m.char_index as usize).unwrap_or(0);
let start = char_index.saturating_sub(context);
let text = text.chars().skip(start).take(context * 2).collect();
let highlights = highlights
.take_while(|m| {
(m.char_index as usize) + (m.char_length as usize) <= start + (context * 2)
})
.map(|highlight| {
Highlight { char_index: highlight.char_index - start as u16, ..highlight }
})
.collect();
(text, highlights)
}
fn main() -> Result<(), Box<dyn Error>> {
let _ = env_logger::init();
let opt = Opt::from_args();
let start = Instant::now();
let database = Database::open(&opt.database_path)?;
let index = database.open_index("test")?.unwrap();
let schema = index.schema();
println!("database prepared for you in {:.2?}", start.elapsed());
let fields = opt.displayed_fields.iter().map(String::as_str);
let fields = HashSet::from_iter(fields);
let config = Config::builder().auto_add_history(true).build();
let mut readline = Editor::<()>::with_config(config);
let _ = readline.load_history("query-history.txt");
for result in readline.iter("Searching for: ") {
match result {
Ok(query) => {
let start_total = Instant::now();
let builder = match opt.fetch_timeout_ms {
Some(timeout_ms) => {
let timeout = Duration::from_millis(timeout_ms);
index.query_builder().with_fetch_timeout(timeout)
},
None => index.query_builder(),
};
let documents = builder.query(&query, 0..opt.number_results)?;
let mut retrieve_duration = Duration::default();
let number_of_documents = documents.len();
for mut doc in documents {
doc.highlights.sort_unstable_by_key(|m| (m.char_index, m.char_length));
let start_retrieve = Instant::now();
let result = index.document::<Document>(Some(&fields), doc.id);
retrieve_duration += start_retrieve.elapsed();
match result {
Ok(Some(document)) => {
for (name, text) in document {
print!("{}: ", name);
let attr = schema.attribute(&name).unwrap();
let highlights = doc.highlights.iter()
.filter(|m| SchemaAttr::new(m.attribute) == attr)
.cloned();
let (text, highlights) = crop_text(&text, highlights, opt.char_context);
let areas = create_highlight_areas(&text, &highlights);
display_highlights(&text, &areas)?;
println!();
}
},
Ok(None) => eprintln!("missing document"),
Err(e) => eprintln!("{}", e),
}
let mut matching_attributes = HashSet::new();
for highlight in doc.highlights {
let attr = SchemaAttr::new(highlight.attribute);
let name = schema.attribute_name(attr);
matching_attributes.insert(name);
}
let matching_attributes = Vec::from_iter(matching_attributes);
println!("matching in: {:?}", matching_attributes);
println!();
}
eprintln!("document field retrieve took {:.2?}", retrieve_duration);
eprintln!("===== Found {} results in {:.2?} =====", number_of_documents, start_total.elapsed());
},
Err(err) => {
println!("Error: {:?}", err);
break
}
}
}
readline.save_history("query-history.txt").unwrap();
Ok(())
}

View File

@ -1,3 +0,0 @@
mod sort_by_attr;
pub use self::sort_by_attr::SortByAttr;

View File

@ -0,0 +1,53 @@
[package]
name = "meilisearch-core"
version = "0.10.0"
license = "MIT"
authors = ["Kerollmops <clement@meilisearch.com>"]
edition = "2018"
[dependencies]
arc-swap = "0.4.5"
bincode = "1.2.1"
byteorder = "1.3.4"
chrono = { version = "0.4.11", features = ["serde"] }
compact_arena = "0.4.0"
crossbeam-channel = "0.4.2"
deunicode = "1.1.0"
env_logger = "0.7.1"
fst = { version = "0.3.5", default-features = false }
hashbrown = { version = "0.7.1", features = ["serde"] }
heed = "0.7.0"
indexmap = { version = "1.3.2", features = ["serde-1"] }
intervaltree = "0.2.5"
itertools = "0.9.0"
levenshtein_automata = { version = "0.1.1", features = ["fst_automaton"] }
log = "0.4.8"
meilisearch-schema = { path = "../meilisearch-schema", version = "0.10.0" }
meilisearch-tokenizer = { path = "../meilisearch-tokenizer", version = "0.10.0" }
meilisearch-types = { path = "../meilisearch-types", version = "0.10.0" }
once_cell = "1.3.1"
ordered-float = { version = "1.0.2", features = ["serde"] }
pest = { git = "https://github.com/MarinPostma/pest.git", tag = "meilisearch-patch1" }
pest_derive = "2.0"
regex = "1.3.6"
sdset = "0.4.0"
serde = { version = "1.0.105", features = ["derive"] }
serde_json = "1.0.50"
siphasher = "0.3.2"
slice-group-by = "0.2.6"
unicase = "2.6.0"
zerocopy = "0.3.0"
[dev-dependencies]
assert_matches = "1.3.0"
criterion = "0.3.1"
csv = "1.1.3"
jemallocator = "0.3.2"
rustyline = { version = "6.0.0", default-features = false }
structopt = "0.3.12"
tempfile = "3.1.0"
termcolor = "1.1.0"
[[bench]]
name = "search_benchmark"
harness = false

View File

@ -0,0 +1,104 @@
#[cfg(test)]
#[macro_use]
extern crate assert_matches;
use std::sync::mpsc;
use std::path::Path;
use std::fs::File;
use std::io::BufReader;
use std::iter;
use meilisearch_core::Database;
use meilisearch_core::{ProcessedUpdateResult, UpdateStatus};
use meilisearch_core::settings::{Settings, SettingsUpdate};
use meilisearch_schema::Schema;
use serde_json::Value;
use criterion::{criterion_group, criterion_main, Criterion, BenchmarkId};
fn prepare_database(path: &Path) -> Database {
let database = Database::open_or_create(path).unwrap();
let db = &database;
let (sender, receiver) = mpsc::sync_channel(100);
let update_fn = move |_name: &str, update: ProcessedUpdateResult| {
sender.send(update.update_id).unwrap()
};
let index = database.create_index("bench").unwrap();
database.set_update_callback(Box::new(update_fn));
let mut writer = db.main_write_txn().unwrap();
index.main.put_schema(&mut writer, &Schema::with_primary_key("id")).unwrap();
writer.commit().unwrap();
let settings_update: SettingsUpdate = {
let path = concat!(env!("CARGO_MANIFEST_DIR"), "/../datasets/movies/settings.json");
let file = File::open(path).unwrap();
let reader = BufReader::new(file);
let settings: Settings = serde_json::from_reader(reader).unwrap();
settings.into_update().unwrap()
};
let mut update_writer = db.update_write_txn().unwrap();
let _update_id = index.settings_update(&mut update_writer, settings_update).unwrap();
update_writer.commit().unwrap();
let mut additions = index.documents_addition();
let json: Value = {
let path = concat!(env!("CARGO_MANIFEST_DIR"), "/../datasets/movies/movies.json");
let movies_file = File::open(path).expect("find movies");
serde_json::from_reader(movies_file).unwrap()
};
let documents = json.as_array().unwrap();
for document in documents {
additions.update_document(document);
}
let mut update_writer = db.update_write_txn().unwrap();
let update_id = additions.finalize(&mut update_writer).unwrap();
update_writer.commit().unwrap();
// block until the transaction is processed
let _ = receiver.into_iter().find(|id| *id == update_id);
let update_reader = db.update_read_txn().unwrap();
let result = index.update_status(&update_reader, update_id).unwrap();
assert_matches!(result, Some(UpdateStatus::Processed { content }) if content.error.is_none());
database
}
pub fn criterion_benchmark(c: &mut Criterion) {
let dir = tempfile::tempdir().unwrap();
let database = prepare_database(dir.path());
let reader = database.main_read_txn().unwrap();
let index = database.open_index("bench").unwrap();
let mut count = 0;
let query = "I love paris ";
let iter = iter::from_fn(|| {
count += 1;
query.get(0..count)
});
let mut group = c.benchmark_group("searching in movies (19654 docs)");
group.sample_size(10);
for query in iter {
let bench_name = BenchmarkId::from_parameter(format!("{:?}", query));
group.bench_with_input(bench_name, &query, |b, query| b.iter(|| {
let builder = index.query_builder();
builder.query(&reader, query, 0..20).unwrap();
}));
}
group.finish();
}
criterion_group!(benches, criterion_benchmark);
criterion_main!(benches);

View File

@ -0,0 +1,473 @@
use std::collections::HashSet;
use std::collections::btree_map::{BTreeMap, Entry};
use std::error::Error;
use std::io::{Read, Write};
use std::iter::FromIterator;
use std::path::{Path, PathBuf};
use std::time::{Duration, Instant};
use std::{fs, io, sync::mpsc};
use rustyline::{Config, Editor};
use serde::{Deserialize, Serialize};
use structopt::StructOpt;
use termcolor::{Color, ColorChoice, ColorSpec, StandardStream, WriteColor};
use meilisearch_core::{Database, Highlight, ProcessedUpdateResult};
use meilisearch_core::settings::Settings;
use meilisearch_schema::FieldId;
// #[cfg(target_os = "linux")]
#[global_allocator]
static ALLOC: jemallocator::Jemalloc = jemallocator::Jemalloc;
#[derive(Debug, StructOpt)]
struct IndexCommand {
/// The destination where the database must be created.
#[structopt(parse(from_os_str))]
database_path: PathBuf,
#[structopt(long, default_value = "default")]
index_uid: String,
/// The csv file path to index, you can also use `-` to specify the standard input.
#[structopt(parse(from_os_str))]
csv_data_path: PathBuf,
/// The path to the settings.
#[structopt(long, parse(from_os_str))]
settings: PathBuf,
#[structopt(long)]
update_group_size: Option<usize>,
#[structopt(long, parse(from_os_str))]
compact_to_path: Option<PathBuf>,
}
#[derive(Debug, StructOpt)]
struct SearchCommand {
/// The path of the database to work with.
#[structopt(parse(from_os_str))]
database_path: PathBuf,
#[structopt(long, default_value = "default")]
index_uid: String,
/// Timeout after which the search will return results.
#[structopt(long)]
fetch_timeout_ms: Option<u64>,
/// The number of returned results
#[structopt(short, long, default_value = "10")]
number_results: usize,
/// The number of characters before and after the first match
#[structopt(short = "C", long, default_value = "35")]
char_context: usize,
/// A filter string that can be `!adult` or `adult` to
/// filter documents on this specfied field
#[structopt(short, long)]
filter: Option<String>,
/// Fields that must be displayed.
displayed_fields: Vec<String>,
}
#[derive(Debug, StructOpt)]
struct ShowUpdatesCommand {
/// The path of the database to work with.
#[structopt(parse(from_os_str))]
database_path: PathBuf,
#[structopt(long, default_value = "default")]
index_uid: String,
}
#[derive(Debug, StructOpt)]
enum Command {
Index(IndexCommand),
Search(SearchCommand),
ShowUpdates(ShowUpdatesCommand),
}
impl Command {
fn path(&self) -> &Path {
match self {
Command::Index(command) => &command.database_path,
Command::Search(command) => &command.database_path,
Command::ShowUpdates(command) => &command.database_path,
}
}
}
#[derive(Serialize, Deserialize)]
#[serde(transparent)]
struct Document(indexmap::IndexMap<String, String>);
fn index_command(command: IndexCommand, database: Database) -> Result<(), Box<dyn Error>> {
let start = Instant::now();
let (sender, receiver) = mpsc::sync_channel(100);
let update_fn =
move |_name: &str, update: ProcessedUpdateResult| sender.send(update.update_id).unwrap();
let index = match database.open_index(&command.index_uid) {
Some(index) => index,
None => database.create_index(&command.index_uid).unwrap(),
};
database.set_update_callback(Box::new(update_fn));
let db = &database;
let settings = {
let string = fs::read_to_string(&command.settings)?;
let settings: Settings = serde_json::from_str(&string).unwrap();
settings.into_update().unwrap()
};
let mut update_writer = db.update_write_txn().unwrap();
index.settings_update(&mut update_writer, settings)?;
update_writer.commit().unwrap();
let mut rdr = if command.csv_data_path.as_os_str() == "-" {
csv::Reader::from_reader(Box::new(io::stdin()) as Box<dyn Read>)
} else {
let file = std::fs::File::open(command.csv_data_path)?;
csv::Reader::from_reader(Box::new(file) as Box<dyn Read>)
};
let mut raw_record = csv::StringRecord::new();
let headers = rdr.headers()?.clone();
let mut max_update_id = 0;
let mut i = 0;
let mut end_of_file = false;
while !end_of_file {
let mut additions = index.documents_addition();
loop {
end_of_file = !rdr.read_record(&mut raw_record)?;
if end_of_file {
break;
}
let document: Document = match raw_record.deserialize(Some(&headers)) {
Ok(document) => document,
Err(e) => {
eprintln!("{:?}", e);
continue;
}
};
additions.update_document(document);
print!("\rindexing document {}", i);
i += 1;
if let Some(group_size) = command.update_group_size {
if i % group_size == 0 {
break;
}
}
}
println!();
let mut update_writer = db.update_write_txn().unwrap();
println!("committing update...");
let update_id = additions.finalize(&mut update_writer)?;
update_writer.commit().unwrap();
max_update_id = max_update_id.max(update_id);
println!("committed update {}", update_id);
}
println!("Waiting for update {}", max_update_id);
for id in receiver {
if id == max_update_id {
break;
}
}
println!(
"database created in {:.2?} at: {:?}",
start.elapsed(),
command.database_path
);
if let Some(path) = command.compact_to_path {
fs::create_dir_all(&path)?;
let start = Instant::now();
let _file = database.copy_and_compact_to_path(path.join("data.mdb"))?;
println!(
"database compacted in {:.2?} at: {:?}",
start.elapsed(),
path
);
}
Ok(())
}
fn display_highlights(text: &str, ranges: &[usize]) -> io::Result<()> {
let mut stdout = StandardStream::stdout(ColorChoice::Always);
let mut highlighted = false;
for range in ranges.windows(2) {
let [start, end] = match range {
[start, end] => [*start, *end],
_ => unreachable!(),
};
if highlighted {
stdout.set_color(
ColorSpec::new()
.set_fg(Some(Color::Yellow))
.set_underline(true),
)?;
}
write!(&mut stdout, "{}", &text[start..end])?;
stdout.reset()?;
highlighted = !highlighted;
}
Ok(())
}
fn char_to_byte_range(index: usize, length: usize, text: &str) -> (usize, usize) {
let mut byte_index = 0;
let mut byte_length = 0;
for (n, (i, c)) in text.char_indices().enumerate() {
if n == index {
byte_index = i;
}
if n + 1 == index + length {
byte_length = i - byte_index + c.len_utf8();
break;
}
}
(byte_index, byte_length)
}
fn create_highlight_areas(text: &str, highlights: &[Highlight]) -> Vec<usize> {
let mut byte_indexes = BTreeMap::new();
for highlight in highlights {
let char_index = highlight.char_index as usize;
let char_length = highlight.char_length as usize;
let (byte_index, byte_length) = char_to_byte_range(char_index, char_length, text);
match byte_indexes.entry(byte_index) {
Entry::Vacant(entry) => {
entry.insert(byte_length);
}
Entry::Occupied(mut entry) => {
if *entry.get() < byte_length {
entry.insert(byte_length);
}
}
}
}
let mut title_areas = Vec::new();
title_areas.push(0);
for (byte_index, length) in byte_indexes {
title_areas.push(byte_index);
title_areas.push(byte_index + length);
}
title_areas.push(text.len());
title_areas.sort_unstable();
title_areas
}
/// note: matches must have been sorted by `char_index` and `char_length` before being passed.
///
/// ```no_run
/// matches.sort_unstable_by_key(|m| (m.char_index, m.char_length));
///
/// let matches = matches.matches.iter().filter(|m| SchemaAttr::new(m.attribute) == attr).cloned();
///
/// let (text, matches) = crop_text(&text, matches, 35);
/// ```
fn crop_text(
text: &str,
highlights: impl IntoIterator<Item = Highlight>,
context: usize,
) -> (String, Vec<Highlight>) {
let mut highlights = highlights.into_iter().peekable();
let char_index = highlights
.peek()
.map(|m| m.char_index as usize)
.unwrap_or(0);
let start = char_index.saturating_sub(context);
let text = text.chars().skip(start).take(context * 2).collect();
let highlights = highlights
.take_while(|m| (m.char_index as usize) + (m.char_length as usize) <= start + (context * 2))
.map(|highlight| Highlight {
char_index: highlight.char_index - start as u16,
..highlight
})
.collect();
(text, highlights)
}
fn search_command(command: SearchCommand, database: Database) -> Result<(), Box<dyn Error>> {
let db = &database;
let index = database
.open_index(&command.index_uid)
.expect("Could not find index");
let reader = db.main_read_txn().unwrap();
let schema = index.main.schema(&reader)?;
reader.abort();
let schema = schema.ok_or(meilisearch_core::Error::SchemaMissing)?;
let fields = command.displayed_fields.iter().map(String::as_str);
let fields = HashSet::from_iter(fields);
let config = Config::builder().auto_add_history(true).build();
let mut readline = Editor::<()>::with_config(config);
let _ = readline.load_history("query-history.txt");
for result in readline.iter("Searching for: ") {
match result {
Ok(query) => {
let start_total = Instant::now();
let reader = db.main_read_txn().unwrap();
let ref_index = &index;
let ref_reader = &reader;
let mut builder = index.query_builder();
if let Some(timeout) = command.fetch_timeout_ms {
builder.with_fetch_timeout(Duration::from_millis(timeout));
}
if let Some(ref filter) = command.filter {
let filter = filter.as_str();
let (positive, filter) = if filter.chars().next() == Some('!') {
(false, &filter[1..])
} else {
(true, filter)
};
let attr = schema
.id(filter)
.expect("Could not find filtered attribute");
builder.with_filter(move |document_id| {
let string: String = ref_index
.document_attribute(ref_reader, document_id, attr)
.unwrap()
.unwrap();
(string == "true") == positive
});
}
let (documents, _nb_hits) = builder.query(ref_reader, &query, 0..command.number_results)?;
let mut retrieve_duration = Duration::default();
let number_of_documents = documents.len();
for mut doc in documents {
doc.highlights
.sort_unstable_by_key(|m| (m.char_index, m.char_length));
let start_retrieve = Instant::now();
let result = index.document::<Document>(&reader, Some(&fields), doc.id);
retrieve_duration += start_retrieve.elapsed();
match result {
Ok(Some(document)) => {
println!("raw-id: {:?}", doc.id);
for (name, text) in document.0 {
print!("{}: ", name);
let attr = schema.id(&name).unwrap();
let highlights = doc
.highlights
.iter()
.filter(|m| FieldId::new(m.attribute) == attr)
.cloned();
let (text, highlights) =
crop_text(&text, highlights, command.char_context);
let areas = create_highlight_areas(&text, &highlights);
display_highlights(&text, &areas)?;
println!();
}
}
Ok(None) => eprintln!("missing document"),
Err(e) => eprintln!("{}", e),
}
let mut matching_attributes = HashSet::new();
for highlight in doc.highlights {
let attr = FieldId::new(highlight.attribute);
let name = schema.name(attr);
matching_attributes.insert(name);
}
let matching_attributes = Vec::from_iter(matching_attributes);
println!("matching in: {:?}", matching_attributes);
println!();
}
eprintln!(
"whole documents fields retrieve took {:.2?}",
retrieve_duration
);
eprintln!(
"===== Found {} results in {:.2?} =====",
number_of_documents,
start_total.elapsed()
);
}
Err(err) => {
println!("Error: {:?}", err);
break;
}
}
}
readline.save_history("query-history.txt").unwrap();
Ok(())
}
fn show_updates_command(
command: ShowUpdatesCommand,
database: Database,
) -> Result<(), Box<dyn Error>> {
let db = &database;
let index = database
.open_index(&command.index_uid)
.expect("Could not find index");
let reader = db.update_read_txn().unwrap();
let updates = index.all_updates_status(&reader)?;
println!("{:#?}", updates);
reader.abort();
Ok(())
}
fn main() -> Result<(), Box<dyn Error>> {
env_logger::init();
let opt = Command::from_args();
let database = Database::open_or_create(opt.path())?;
match opt {
Command::Index(command) => index_command(command, database),
Command::Search(command) => search_command(command, database),
Command::ShowUpdates(command) => show_updates_command(command, database),
}
}

View File

@ -0,0 +1,53 @@
use levenshtein_automata::{LevenshteinAutomatonBuilder as LevBuilder, DFA};
use once_cell::sync::OnceCell;
static LEVDIST0: OnceCell<LevBuilder> = OnceCell::new();
static LEVDIST1: OnceCell<LevBuilder> = OnceCell::new();
static LEVDIST2: OnceCell<LevBuilder> = OnceCell::new();
#[derive(Copy, Clone)]
enum PrefixSetting {
Prefix,
NoPrefix,
}
fn build_dfa_with_setting(query: &str, setting: PrefixSetting) -> DFA {
use PrefixSetting::{NoPrefix, Prefix};
match query.len() {
0..=4 => {
let builder = LEVDIST0.get_or_init(|| LevBuilder::new(0, true));
match setting {
Prefix => builder.build_prefix_dfa(query),
NoPrefix => builder.build_dfa(query),
}
}
5..=8 => {
let builder = LEVDIST1.get_or_init(|| LevBuilder::new(1, true));
match setting {
Prefix => builder.build_prefix_dfa(query),
NoPrefix => builder.build_dfa(query),
}
}
_ => {
let builder = LEVDIST2.get_or_init(|| LevBuilder::new(2, true));
match setting {
Prefix => builder.build_prefix_dfa(query),
NoPrefix => builder.build_dfa(query),
}
}
}
}
pub fn build_prefix_dfa(query: &str) -> DFA {
build_dfa_with_setting(query, PrefixSetting::Prefix)
}
pub fn build_dfa(query: &str) -> DFA {
build_dfa_with_setting(query, PrefixSetting::NoPrefix)
}
pub fn build_exact_dfa(query: &str) -> DFA {
let builder = LEVDIST0.get_or_init(|| LevBuilder::new(0, true));
builder.build_dfa(query)
}

View File

@ -0,0 +1,15 @@
mod dfa;
use meilisearch_tokenizer::is_cjk;
pub use self::dfa::{build_dfa, build_prefix_dfa, build_exact_dfa};
pub fn normalize_str(string: &str) -> String {
let mut string = string.to_lowercase();
if !string.contains(is_cjk) {
string = deunicode::deunicode_with_tofu(&string, "");
}
string
}

View File

@ -0,0 +1,560 @@
use std::borrow::Cow;
use std::collections::HashMap;
use std::mem;
use std::ops::Deref;
use std::ops::Range;
use std::rc::Rc;
use std::sync::atomic::{AtomicUsize, Ordering};
use std::time::Instant;
use std::fmt;
use compact_arena::{SmallArena, Idx32, mk_arena};
use log::debug;
use meilisearch_types::DocIndex;
use sdset::{Set, SetBuf, exponential_search};
use slice_group_by::{GroupBy, GroupByMut};
use crate::error::Error;
use crate::criterion::{Criteria, Context, ContextMut};
use crate::distinct_map::{BufferedDistinctMap, DistinctMap};
use crate::raw_document::RawDocument;
use crate::{database::MainT, reordered_attrs::ReorderedAttrs};
use crate::{store, Document, DocumentId, MResult};
use crate::query_tree::{create_query_tree, traverse_query_tree};
use crate::query_tree::{Operation, QueryResult, QueryKind, QueryId, PostingsKey};
use crate::query_tree::Context as QTContext;
pub fn bucket_sort<'c, FI>(
reader: &heed::RoTxn<MainT>,
query: &str,
range: Range<usize>,
filter: Option<FI>,
criteria: Criteria<'c>,
searchable_attrs: Option<ReorderedAttrs>,
main_store: store::Main,
postings_lists_store: store::PostingsLists,
documents_fields_counts_store: store::DocumentsFieldsCounts,
synonyms_store: store::Synonyms,
prefix_documents_cache_store: store::PrefixDocumentsCache,
prefix_postings_lists_cache_store: store::PrefixPostingsListsCache,
) -> MResult<(Vec<Document>, usize)>
where
FI: Fn(DocumentId) -> bool,
{
// We delegate the filter work to the distinct query builder,
// specifying a distinct rule that has no effect.
if filter.is_some() {
let distinct = |_| None;
let distinct_size = 1;
return bucket_sort_with_distinct(
reader,
query,
range,
filter,
distinct,
distinct_size,
criteria,
searchable_attrs,
main_store,
postings_lists_store,
documents_fields_counts_store,
synonyms_store,
prefix_documents_cache_store,
prefix_postings_lists_cache_store,
);
}
let words_set = match unsafe { main_store.static_words_fst(reader)? } {
Some(words) => words,
None => return Ok((Vec::new(), 0)),
};
let stop_words = main_store.stop_words_fst(reader)?.unwrap_or_default();
let context = QTContext {
words_set,
stop_words,
synonyms: synonyms_store,
postings_lists: postings_lists_store,
prefix_postings_lists: prefix_postings_lists_cache_store,
};
let (operation, mapping) = create_query_tree(reader, &context, query)?;
debug!("operation:\n{:?}", operation);
debug!("mapping:\n{:?}", mapping);
fn recurs_operation<'o>(map: &mut HashMap<QueryId, &'o QueryKind>, operation: &'o Operation) {
match operation {
Operation::And(ops) => ops.iter().for_each(|op| recurs_operation(map, op)),
Operation::Or(ops) => ops.iter().for_each(|op| recurs_operation(map, op)),
Operation::Query(query) => { map.insert(query.id, &query.kind); },
}
}
let mut queries_kinds = HashMap::new();
recurs_operation(&mut queries_kinds, &operation);
let QueryResult { docids, queries } = traverse_query_tree(reader, &context, &operation)?;
debug!("found {} documents", docids.len());
debug!("number of postings {:?}", queries.len());
let before = Instant::now();
mk_arena!(arena);
let mut bare_matches = cleanup_bare_matches(&mut arena, &docids, queries);
debug!("matches cleaned in {:.02?}", before.elapsed());
let before_bucket_sort = Instant::now();
let before_raw_documents_building = Instant::now();
let mut raw_documents = Vec::new();
for bare_matches in bare_matches.linear_group_by_key_mut(|sm| sm.document_id) {
let raw_document = RawDocument::new(bare_matches, &mut arena, searchable_attrs.as_ref());
raw_documents.push(raw_document);
}
debug!("creating {} candidates documents took {:.02?}",
raw_documents.len(),
before_raw_documents_building.elapsed(),
);
let before_criterion_loop = Instant::now();
let proximity_count = AtomicUsize::new(0);
let mut groups = vec![raw_documents.as_mut_slice()];
'criteria: for criterion in criteria.as_ref() {
let tmp_groups = mem::replace(&mut groups, Vec::new());
let mut documents_seen = 0;
for mut group in tmp_groups {
let before_criterion_preparation = Instant::now();
let ctx = ContextMut {
reader,
postings_lists: &mut arena,
query_mapping: &mapping,
documents_fields_counts_store,
};
criterion.prepare(ctx, &mut group)?;
debug!("{:?} preparation took {:.02?}", criterion.name(), before_criterion_preparation.elapsed());
let ctx = Context {
postings_lists: &arena,
query_mapping: &mapping,
};
let before_criterion_sort = Instant::now();
group.sort_unstable_by(|a, b| criterion.evaluate(&ctx, a, b));
debug!("{:?} evaluation took {:.02?}", criterion.name(), before_criterion_sort.elapsed());
for group in group.binary_group_by_mut(|a, b| criterion.eq(&ctx, a, b)) {
debug!("{:?} produced a group of size {}", criterion.name(), group.len());
documents_seen += group.len();
groups.push(group);
// we have sort enough documents if the last document sorted is after
// the end of the requested range, we can continue to the next criterion
if documents_seen >= range.end {
continue 'criteria;
}
}
}
}
debug!("criterion loop took {:.02?}", before_criterion_loop.elapsed());
debug!("proximity evaluation called {} times", proximity_count.load(Ordering::Relaxed));
let schema = main_store.schema(reader)?.ok_or(Error::SchemaMissing)?;
let iter = raw_documents.into_iter().skip(range.start).take(range.len());
let iter = iter.map(|rd| Document::from_raw(rd, &queries_kinds, &arena, searchable_attrs.as_ref(), &schema));
let documents = iter.collect();
debug!("bucket sort took {:.02?}", before_bucket_sort.elapsed());
Ok((documents, docids.len()))
}
pub fn bucket_sort_with_distinct<'c, FI, FD>(
reader: &heed::RoTxn<MainT>,
query: &str,
range: Range<usize>,
filter: Option<FI>,
distinct: FD,
distinct_size: usize,
criteria: Criteria<'c>,
searchable_attrs: Option<ReorderedAttrs>,
main_store: store::Main,
postings_lists_store: store::PostingsLists,
documents_fields_counts_store: store::DocumentsFieldsCounts,
synonyms_store: store::Synonyms,
_prefix_documents_cache_store: store::PrefixDocumentsCache,
prefix_postings_lists_cache_store: store::PrefixPostingsListsCache,
) -> MResult<(Vec<Document>, usize)>
where
FI: Fn(DocumentId) -> bool,
FD: Fn(DocumentId) -> Option<u64>,
{
let words_set = match unsafe { main_store.static_words_fst(reader)? } {
Some(words) => words,
None => return Ok((Vec::new(), 0)),
};
let stop_words = main_store.stop_words_fst(reader)?.unwrap_or_default();
let context = QTContext {
words_set,
stop_words,
synonyms: synonyms_store,
postings_lists: postings_lists_store,
prefix_postings_lists: prefix_postings_lists_cache_store,
};
let (operation, mapping) = create_query_tree(reader, &context, query)?;
debug!("operation:\n{:?}", operation);
debug!("mapping:\n{:?}", mapping);
fn recurs_operation<'o>(map: &mut HashMap<QueryId, &'o QueryKind>, operation: &'o Operation) {
match operation {
Operation::And(ops) => ops.iter().for_each(|op| recurs_operation(map, op)),
Operation::Or(ops) => ops.iter().for_each(|op| recurs_operation(map, op)),
Operation::Query(query) => { map.insert(query.id, &query.kind); },
}
}
let mut queries_kinds = HashMap::new();
recurs_operation(&mut queries_kinds, &operation);
let QueryResult { docids, queries } = traverse_query_tree(reader, &context, &operation)?;
debug!("found {} documents", docids.len());
debug!("number of postings {:?}", queries.len());
let before = Instant::now();
mk_arena!(arena);
let mut bare_matches = cleanup_bare_matches(&mut arena, &docids, queries);
debug!("matches cleaned in {:.02?}", before.elapsed());
let before_raw_documents_building = Instant::now();
let mut raw_documents = Vec::new();
for bare_matches in bare_matches.linear_group_by_key_mut(|sm| sm.document_id) {
let raw_document = RawDocument::new(bare_matches, &mut arena, searchable_attrs.as_ref());
raw_documents.push(raw_document);
}
debug!("creating {} candidates documents took {:.02?}",
raw_documents.len(),
before_raw_documents_building.elapsed(),
);
let mut groups = vec![raw_documents.as_mut_slice()];
let mut key_cache = HashMap::new();
let mut filter_map = HashMap::new();
// these two variables informs on the current distinct map and
// on the raw offset of the start of the group where the
// range.start bound is located according to the distinct function
let mut distinct_map = DistinctMap::new(distinct_size);
let mut distinct_raw_offset = 0;
'criteria: for criterion in criteria.as_ref() {
let tmp_groups = mem::replace(&mut groups, Vec::new());
let mut buf_distinct = BufferedDistinctMap::new(&mut distinct_map);
let mut documents_seen = 0;
for mut group in tmp_groups {
// if this group does not overlap with the requested range,
// push it without sorting and splitting it
if documents_seen + group.len() < distinct_raw_offset {
documents_seen += group.len();
groups.push(group);
continue;
}
let ctx = ContextMut {
reader,
postings_lists: &mut arena,
query_mapping: &mapping,
documents_fields_counts_store,
};
let before_criterion_preparation = Instant::now();
criterion.prepare(ctx, &mut group)?;
debug!("{:?} preparation took {:.02?}", criterion.name(), before_criterion_preparation.elapsed());
let ctx = Context {
postings_lists: &arena,
query_mapping: &mapping,
};
let before_criterion_sort = Instant::now();
group.sort_unstable_by(|a, b| criterion.evaluate(&ctx, a, b));
debug!("{:?} evaluation took {:.02?}", criterion.name(), before_criterion_sort.elapsed());
for group in group.binary_group_by_mut(|a, b| criterion.eq(&ctx, a, b)) {
// we must compute the real distinguished len of this sub-group
for document in group.iter() {
let filter_accepted = match &filter {
Some(filter) => {
let entry = filter_map.entry(document.id);
*entry.or_insert_with(|| (filter)(document.id))
}
None => true,
};
if filter_accepted {
let entry = key_cache.entry(document.id);
let key = entry.or_insert_with(|| (distinct)(document.id).map(Rc::new));
match key.clone() {
Some(key) => buf_distinct.register(key),
None => buf_distinct.register_without_key(),
};
}
// the requested range end is reached: stop computing distinct
if buf_distinct.len() >= range.end {
break;
}
}
documents_seen += group.len();
groups.push(group);
// if this sub-group does not overlap with the requested range
// we must update the distinct map and its start index
if buf_distinct.len() < range.start {
buf_distinct.transfert_to_internal();
distinct_raw_offset = documents_seen;
}
// we have sort enough documents if the last document sorted is after
// the end of the requested range, we can continue to the next criterion
if buf_distinct.len() >= range.end {
continue 'criteria;
}
}
}
}
// once we classified the documents related to the current
// automatons we save that as the next valid result
let mut seen = BufferedDistinctMap::new(&mut distinct_map);
let schema = main_store.schema(reader)?.ok_or(Error::SchemaMissing)?;
let mut documents = Vec::with_capacity(range.len());
for raw_document in raw_documents.into_iter().skip(distinct_raw_offset) {
let filter_accepted = match &filter {
Some(_) => filter_map.remove(&raw_document.id).unwrap(),
None => true,
};
if filter_accepted {
let key = key_cache.remove(&raw_document.id).unwrap();
let distinct_accepted = match key {
Some(key) => seen.register(key),
None => seen.register_without_key(),
};
if distinct_accepted && seen.len() > range.start {
documents.push(Document::from_raw(raw_document, &queries_kinds, &arena, searchable_attrs.as_ref(), &schema));
if documents.len() == range.len() {
break;
}
}
}
}
Ok((documents, docids.len()))
}
fn cleanup_bare_matches<'tag, 'txn>(
arena: &mut SmallArena<'tag, PostingsListView<'txn>>,
docids: &Set<DocumentId>,
queries: HashMap<PostingsKey, Cow<'txn, Set<DocIndex>>>,
) -> Vec<BareMatch<'tag>>
{
let docidslen = docids.len() as f32;
let mut bare_matches = Vec::new();
for (PostingsKey { query, input, distance, is_exact }, matches) in queries {
let postings_list_view = PostingsListView::original(Rc::from(input), Rc::new(matches));
let pllen = postings_list_view.len() as f32;
if docidslen / pllen >= 0.8 {
let mut offset = 0;
for matches in postings_list_view.linear_group_by_key(|m| m.document_id) {
let document_id = matches[0].document_id;
if docids.contains(&document_id) {
let range = postings_list_view.range(offset, matches.len());
let posting_list_index = arena.add(range);
let bare_match = BareMatch {
document_id,
query_index: query.id,
distance,
is_exact,
postings_list: posting_list_index,
};
bare_matches.push(bare_match);
}
offset += matches.len();
}
} else {
let mut offset = 0;
for id in docids.as_slice() {
let di = DocIndex { document_id: *id, ..DocIndex::default() };
let pos = exponential_search(&postings_list_view[offset..], &di).unwrap_or_else(|x| x);
offset += pos;
let group = postings_list_view[offset..]
.linear_group_by_key(|m| m.document_id)
.next()
.filter(|matches| matches[0].document_id == *id);
if let Some(matches) = group {
let range = postings_list_view.range(offset, matches.len());
let posting_list_index = arena.add(range);
let bare_match = BareMatch {
document_id: *id,
query_index: query.id,
distance,
is_exact,
postings_list: posting_list_index,
};
bare_matches.push(bare_match);
}
}
}
}
let before_raw_documents_presort = Instant::now();
bare_matches.sort_unstable_by_key(|sm| sm.document_id);
debug!("sort by documents ids took {:.02?}", before_raw_documents_presort.elapsed());
bare_matches
}
pub struct BareMatch<'tag> {
pub document_id: DocumentId,
pub query_index: usize,
pub distance: u8,
pub is_exact: bool,
pub postings_list: Idx32<'tag>,
}
impl fmt::Debug for BareMatch<'_> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.debug_struct("BareMatch")
.field("document_id", &self.document_id)
.field("query_index", &self.query_index)
.field("distance", &self.distance)
.field("is_exact", &self.is_exact)
.finish()
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
pub struct SimpleMatch {
pub query_index: usize,
pub distance: u8,
pub attribute: u16,
pub word_index: u16,
pub is_exact: bool,
}
#[derive(Clone)]
pub enum PostingsListView<'txn> {
Original {
input: Rc<[u8]>,
postings_list: Rc<Cow<'txn, Set<DocIndex>>>,
offset: usize,
len: usize,
},
Rewritten {
input: Rc<[u8]>,
postings_list: SetBuf<DocIndex>,
},
}
impl fmt::Debug for PostingsListView<'_> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.debug_struct("PostingsListView")
.field("input", &std::str::from_utf8(&self.input()).unwrap())
.field("postings_list", &self.as_ref())
.finish()
}
}
impl<'txn> PostingsListView<'txn> {
pub fn original(input: Rc<[u8]>, postings_list: Rc<Cow<'txn, Set<DocIndex>>>) -> PostingsListView<'txn> {
let len = postings_list.len();
PostingsListView::Original { input, postings_list, offset: 0, len }
}
pub fn rewritten(input: Rc<[u8]>, postings_list: SetBuf<DocIndex>) -> PostingsListView<'txn> {
PostingsListView::Rewritten { input, postings_list }
}
pub fn rewrite_with(&mut self, postings_list: SetBuf<DocIndex>) {
let input = match self {
PostingsListView::Original { input, .. } => input.clone(),
PostingsListView::Rewritten { input, .. } => input.clone(),
};
*self = PostingsListView::rewritten(input, postings_list);
}
pub fn len(&self) -> usize {
match self {
PostingsListView::Original { len, .. } => *len,
PostingsListView::Rewritten { postings_list, .. } => postings_list.len(),
}
}
pub fn input(&self) -> &[u8] {
match self {
PostingsListView::Original { ref input, .. } => input,
PostingsListView::Rewritten { ref input, .. } => input,
}
}
pub fn range(&self, range_offset: usize, range_len: usize) -> PostingsListView<'txn> {
match self {
PostingsListView::Original { input, postings_list, offset, len } => {
assert!(range_offset + range_len <= *len);
PostingsListView::Original {
input: input.clone(),
postings_list: postings_list.clone(),
offset: offset + range_offset,
len: range_len,
}
},
PostingsListView::Rewritten { .. } => {
panic!("Cannot create a range on a rewritten postings list view");
}
}
}
}
impl AsRef<Set<DocIndex>> for PostingsListView<'_> {
fn as_ref(&self) -> &Set<DocIndex> {
self
}
}
impl Deref for PostingsListView<'_> {
type Target = Set<DocIndex>;
fn deref(&self) -> &Set<DocIndex> {
match *self {
PostingsListView::Original { ref postings_list, offset, len, .. } => {
Set::new_unchecked(&postings_list[offset..offset + len])
},
PostingsListView::Rewritten { ref postings_list, .. } => postings_list,
}
}
}

View File

@ -0,0 +1,37 @@
use std::cmp::Ordering;
use slice_group_by::GroupBy;
use crate::{RawDocument, MResult};
use crate::bucket_sort::SimpleMatch;
use super::{Criterion, Context, ContextMut, prepare_bare_matches};
pub struct Attribute;
impl Criterion for Attribute {
fn name(&self) -> &str { "attribute" }
fn prepare<'h, 'p, 'tag, 'txn, 'q, 'r>(
&self,
ctx: ContextMut<'h, 'p, 'tag, 'txn, 'q>,
documents: &mut [RawDocument<'r, 'tag>],
) -> MResult<()>
{
prepare_bare_matches(documents, ctx.postings_lists, ctx.query_mapping);
Ok(())
}
fn evaluate(&self, _ctx: &Context, lhs: &RawDocument, rhs: &RawDocument) -> Ordering {
#[inline]
fn sum_of_attribute(matches: &[SimpleMatch]) -> usize {
let mut sum_of_attribute = 0;
for group in matches.linear_group_by_key(|bm| bm.query_index) {
sum_of_attribute += group[0].attribute as usize;
}
sum_of_attribute
}
let lhs = sum_of_attribute(&lhs.processed_matches);
let rhs = sum_of_attribute(&rhs.processed_matches);
lhs.cmp(&rhs)
}
}

View File

@ -0,0 +1,16 @@
use std::cmp::Ordering;
use crate::RawDocument;
use super::{Criterion, Context};
pub struct DocumentId;
impl Criterion for DocumentId {
fn name(&self) -> &str { "stable document id" }
fn evaluate(&self, _ctx: &Context, lhs: &RawDocument, rhs: &RawDocument) -> Ordering {
let lhs = &lhs.id;
let rhs = &rhs.id;
lhs.cmp(rhs)
}
}

View File

@ -0,0 +1,78 @@
use std::cmp::{Ordering, Reverse};
use std::collections::hash_map::{HashMap, Entry};
use meilisearch_schema::IndexedPos;
use slice_group_by::GroupBy;
use crate::{RawDocument, MResult};
use crate::bucket_sort::BareMatch;
use super::{Criterion, Context, ContextMut};
pub struct Exactness;
impl Criterion for Exactness {
fn name(&self) -> &str { "exactness" }
fn prepare<'h, 'p, 'tag, 'txn, 'q, 'r>(
&self,
ctx: ContextMut<'h, 'p, 'tag, 'txn, 'q>,
documents: &mut [RawDocument<'r, 'tag>],
) -> MResult<()>
{
let store = ctx.documents_fields_counts_store;
let reader = ctx.reader;
'documents: for doc in documents {
doc.bare_matches.sort_unstable_by_key(|bm| (bm.query_index, Reverse(bm.is_exact)));
// mark the document if we find a "one word field" that matches
let mut fields_counts = HashMap::new();
for group in doc.bare_matches.linear_group_by_key(|bm| bm.query_index) {
for group in group.linear_group_by_key(|bm| bm.is_exact) {
if !group[0].is_exact { break }
for bm in group {
for di in ctx.postings_lists[bm.postings_list].as_ref() {
let attr = IndexedPos(di.attribute);
let count = match fields_counts.entry(attr) {
Entry::Occupied(entry) => *entry.get(),
Entry::Vacant(entry) => {
let count = store.document_field_count(reader, doc.id, attr)?;
*entry.insert(count)
},
};
if count == Some(1) {
doc.contains_one_word_field = true;
continue 'documents
}
}
}
}
}
}
Ok(())
}
fn evaluate(&self, _ctx: &Context, lhs: &RawDocument, rhs: &RawDocument) -> Ordering {
#[inline]
fn sum_exact_query_words(matches: &[BareMatch]) -> usize {
let mut sum_exact_query_words = 0;
for group in matches.linear_group_by_key(|bm| bm.query_index) {
sum_exact_query_words += group[0].is_exact as usize;
}
sum_exact_query_words
}
// does it contains a "one word field"
lhs.contains_one_word_field.cmp(&rhs.contains_one_word_field).reverse()
// if not, with document contains the more exact words
.then_with(|| {
let lhs = sum_exact_query_words(&lhs.bare_matches);
let rhs = sum_exact_query_words(&rhs.bare_matches);
lhs.cmp(&rhs).reverse()
})
}
}

View File

@ -0,0 +1,291 @@
use std::cmp::{self, Ordering};
use std::collections::HashMap;
use std::ops::Range;
use compact_arena::SmallArena;
use sdset::SetBuf;
use slice_group_by::GroupBy;
use crate::bucket_sort::{SimpleMatch, PostingsListView};
use crate::database::MainT;
use crate::query_tree::QueryId;
use crate::{store, RawDocument, MResult};
mod typo;
mod words;
mod proximity;
mod attribute;
mod words_position;
mod exactness;
mod document_id;
mod sort_by_attr;
pub use self::typo::Typo;
pub use self::words::Words;
pub use self::proximity::Proximity;
pub use self::attribute::Attribute;
pub use self::words_position::WordsPosition;
pub use self::exactness::Exactness;
pub use self::document_id::DocumentId;
pub use self::sort_by_attr::SortByAttr;
pub trait Criterion {
fn name(&self) -> &str;
fn prepare<'h, 'p, 'tag, 'txn, 'q, 'r>(
&self,
_ctx: ContextMut<'h, 'p, 'tag, 'txn, 'q>,
_documents: &mut [RawDocument<'r, 'tag>],
) -> MResult<()>
{
Ok(())
}
fn evaluate<'p, 'tag, 'txn, 'q, 'r>(
&self,
ctx: &Context<'p, 'tag, 'txn, 'q>,
lhs: &RawDocument<'r, 'tag>,
rhs: &RawDocument<'r, 'tag>,
) -> Ordering;
#[inline]
fn eq<'p, 'tag, 'txn, 'q, 'r>(
&self,
ctx: &Context<'p, 'tag, 'txn, 'q>,
lhs: &RawDocument<'r, 'tag>,
rhs: &RawDocument<'r, 'tag>,
) -> bool
{
self.evaluate(ctx, lhs, rhs) == Ordering::Equal
}
}
pub struct ContextMut<'h, 'p, 'tag, 'txn, 'q> {
pub reader: &'h heed::RoTxn<MainT>,
pub postings_lists: &'p mut SmallArena<'tag, PostingsListView<'txn>>,
pub query_mapping: &'q HashMap<QueryId, Range<usize>>,
pub documents_fields_counts_store: store::DocumentsFieldsCounts,
}
pub struct Context<'p, 'tag, 'txn, 'q> {
pub postings_lists: &'p SmallArena<'tag, PostingsListView<'txn>>,
pub query_mapping: &'q HashMap<QueryId, Range<usize>>,
}
#[derive(Default)]
pub struct CriteriaBuilder<'a> {
inner: Vec<Box<dyn Criterion + 'a>>,
}
impl<'a> CriteriaBuilder<'a> {
pub fn new() -> CriteriaBuilder<'a> {
CriteriaBuilder { inner: Vec::new() }
}
pub fn with_capacity(capacity: usize) -> CriteriaBuilder<'a> {
CriteriaBuilder {
inner: Vec::with_capacity(capacity),
}
}
pub fn reserve(&mut self, additional: usize) {
self.inner.reserve(additional)
}
pub fn add<C: 'a>(mut self, criterion: C) -> CriteriaBuilder<'a>
where
C: Criterion,
{
self.push(criterion);
self
}
pub fn push<C: 'a>(&mut self, criterion: C)
where
C: Criterion,
{
self.inner.push(Box::new(criterion));
}
pub fn build(self) -> Criteria<'a> {
Criteria { inner: self.inner }
}
}
pub struct Criteria<'a> {
inner: Vec<Box<dyn Criterion + 'a>>,
}
impl<'a> Default for Criteria<'a> {
fn default() -> Self {
CriteriaBuilder::with_capacity(7)
.add(Typo)
.add(Words)
.add(Proximity)
.add(Attribute)
.add(WordsPosition)
.add(Exactness)
.add(DocumentId)
.build()
}
}
impl<'a> AsRef<[Box<dyn Criterion + 'a>]> for Criteria<'a> {
fn as_ref(&self) -> &[Box<dyn Criterion + 'a>] {
&self.inner
}
}
fn prepare_query_distances<'a, 'tag, 'txn>(
documents: &mut [RawDocument<'a, 'tag>],
query_mapping: &HashMap<QueryId, Range<usize>>,
postings_lists: &SmallArena<'tag, PostingsListView<'txn>>,
) {
for document in documents {
if !document.processed_distances.is_empty() { continue }
let mut processed = Vec::new();
for m in document.bare_matches.iter() {
if postings_lists[m.postings_list].is_empty() { continue }
let range = query_mapping[&(m.query_index as usize)].clone();
let new_len = cmp::max(range.end as usize, processed.len());
processed.resize(new_len, None);
for index in range {
let index = index as usize;
processed[index] = match processed[index] {
Some(distance) if distance > m.distance => Some(m.distance),
Some(distance) => Some(distance),
None => Some(m.distance),
};
}
}
document.processed_distances = processed;
}
}
fn prepare_bare_matches<'a, 'tag, 'txn>(
documents: &mut [RawDocument<'a, 'tag>],
postings_lists: &mut SmallArena<'tag, PostingsListView<'txn>>,
query_mapping: &HashMap<QueryId, Range<usize>>,
) {
for document in documents {
if !document.processed_matches.is_empty() { continue }
let mut processed = Vec::new();
for m in document.bare_matches.iter() {
let postings_list = &postings_lists[m.postings_list];
processed.reserve(postings_list.len());
for di in postings_list.as_ref() {
let simple_match = SimpleMatch {
query_index: m.query_index,
distance: m.distance,
attribute: di.attribute,
word_index: di.word_index,
is_exact: m.is_exact,
};
processed.push(simple_match);
}
}
let processed = multiword_rewrite_matches(&mut processed, query_mapping);
document.processed_matches = processed.into_vec();
}
}
fn multiword_rewrite_matches(
matches: &mut [SimpleMatch],
query_mapping: &HashMap<QueryId, Range<usize>>,
) -> SetBuf<SimpleMatch>
{
matches.sort_unstable_by_key(|m| (m.attribute, m.word_index));
let mut padded_matches = Vec::with_capacity(matches.len());
// let before_padding = Instant::now();
// for each attribute of each document
for same_document_attribute in matches.linear_group_by_key(|m| m.attribute) {
// padding will only be applied
// to word indices in the same attribute
let mut padding = 0;
let mut iter = same_document_attribute.linear_group_by_key(|m| m.word_index);
// for each match at the same position
// in this document attribute
while let Some(same_word_index) = iter.next() {
// find the biggest padding
let mut biggest = 0;
for match_ in same_word_index {
let mut replacement = query_mapping[&(match_.query_index as usize)].clone();
let replacement_len = replacement.len();
let nexts = iter.remainder().linear_group_by_key(|m| m.word_index);
if let Some(query_index) = replacement.next() {
let word_index = match_.word_index + padding as u16;
let match_ = SimpleMatch { query_index, word_index, ..*match_ };
padded_matches.push(match_);
}
let mut found = false;
// look ahead and if there already is a match
// corresponding to this padding word, abort the padding
'padding: for (x, next_group) in nexts.enumerate() {
for (i, query_index) in replacement.clone().enumerate().skip(x) {
let word_index = match_.word_index + padding as u16 + (i + 1) as u16;
let padmatch = SimpleMatch { query_index, word_index, ..*match_ };
for nmatch_ in next_group {
let mut rep = query_mapping[&(nmatch_.query_index as usize)].clone();
let query_index = rep.next().unwrap();
if query_index == padmatch.query_index {
if !found {
// if we find a corresponding padding for the
// first time we must push preceding paddings
for (i, query_index) in replacement.clone().enumerate().take(i) {
let word_index = match_.word_index + padding as u16 + (i + 1) as u16;
let match_ = SimpleMatch { query_index, word_index, ..*match_ };
padded_matches.push(match_);
biggest = biggest.max(i + 1);
}
}
padded_matches.push(padmatch);
found = true;
continue 'padding;
}
}
}
// if we do not find a corresponding padding in the
// next groups so stop here and pad what was found
break;
}
if !found {
// if no padding was found in the following matches
// we must insert the entire padding
for (i, query_index) in replacement.enumerate() {
let word_index = match_.word_index + padding as u16 + (i + 1) as u16;
let match_ = SimpleMatch { query_index, word_index, ..*match_ };
padded_matches.push(match_);
}
biggest = biggest.max(replacement_len - 1);
}
}
padding += biggest;
}
}
// debug!("padding matches took {:.02?}", before_padding.elapsed());
// With this check we can see that the loop above takes something
// like 43% of the search time even when no rewrite is needed.
// assert_eq!(before_matches, padded_matches);
SetBuf::from_dirty(padded_matches)
}

View File

@ -0,0 +1,68 @@
use std::cmp::{self, Ordering};
use slice_group_by::GroupBy;
use crate::bucket_sort::{SimpleMatch};
use crate::{RawDocument, MResult};
use super::{Criterion, Context, ContextMut, prepare_bare_matches};
const MAX_DISTANCE: u16 = 8;
pub struct Proximity;
impl Criterion for Proximity {
fn name(&self) -> &str { "proximity" }
fn prepare<'h, 'p, 'tag, 'txn, 'q, 'r>(
&self,
ctx: ContextMut<'h, 'p, 'tag, 'txn, 'q>,
documents: &mut [RawDocument<'r, 'tag>],
) -> MResult<()>
{
prepare_bare_matches(documents, ctx.postings_lists, ctx.query_mapping);
Ok(())
}
fn evaluate(&self, _ctx: &Context, lhs: &RawDocument, rhs: &RawDocument) -> Ordering {
fn index_proximity(lhs: u16, rhs: u16) -> u16 {
if lhs < rhs {
cmp::min(rhs - lhs, MAX_DISTANCE)
} else {
cmp::min(lhs - rhs, MAX_DISTANCE) + 1
}
}
fn attribute_proximity(lhs: SimpleMatch, rhs: SimpleMatch) -> u16 {
if lhs.attribute != rhs.attribute { MAX_DISTANCE }
else { index_proximity(lhs.word_index, rhs.word_index) }
}
fn min_proximity(lhs: &[SimpleMatch], rhs: &[SimpleMatch]) -> u16 {
let mut min_prox = u16::max_value();
for a in lhs {
for b in rhs {
let prox = attribute_proximity(*a, *b);
min_prox = cmp::min(min_prox, prox);
}
}
min_prox
}
fn matches_proximity(matches: &[SimpleMatch],) -> u16 {
let mut proximity = 0;
let mut iter = matches.linear_group_by_key(|m| m.query_index);
// iterate over groups by windows of size 2
let mut last = iter.next();
while let (Some(lhs), Some(rhs)) = (last, iter.next()) {
proximity += min_proximity(lhs, rhs);
last = Some(rhs);
}
proximity
}
let lhs = matches_proximity(&lhs.processed_matches);
let rhs = matches_proximity(&rhs.processed_matches);
lhs.cmp(&rhs)
}
}

View File

@ -1,10 +1,9 @@
use std::cmp::Ordering;
use std::error::Error;
use std::fmt;
use meilidb_core::{criterion::Criterion, RawDocument};
use meilidb_data::RankedMap;
use meilidb_schema::{Schema, SchemaAttr};
use meilisearch_schema::{Schema, FieldId};
use crate::{RankedMap, RawDocument};
use super::{Criterion, Context};
/// An helper struct that permit to sort documents by
/// some of their stored attributes.
@ -23,17 +22,17 @@ use meilidb_schema::{Schema, SchemaAttr};
///
/// ```ignore
/// use serde_derive::Deserialize;
/// use meilidb::rank::criterion::*;
/// use meilisearch::rank::criterion::*;
///
/// let custom_ranking = SortByAttr::lower_is_better(&ranked_map, &schema, "published_at")?;
///
/// let builder = CriteriaBuilder::with_capacity(8)
/// .add(SumOfTypos)
/// .add(NumberOfWords)
/// .add(WordsProximity)
/// .add(SumOfWordsAttribute)
/// .add(SumOfWordsPosition)
/// .add(Exact)
/// .add(Typo)
/// .add(Words)
/// .add(Proximity)
/// .add(Attribute)
/// .add(WordsPosition)
/// .add(Exactness)
/// .add(custom_ranking)
/// .add(DocumentId);
///
@ -42,7 +41,7 @@ use meilidb_schema::{Schema, SchemaAttr};
/// ```
pub struct SortByAttr<'a> {
ranked_map: &'a RankedMap,
attr: SchemaAttr,
field_id: FieldId,
reversed: bool,
}
@ -51,8 +50,7 @@ impl<'a> SortByAttr<'a> {
ranked_map: &'a RankedMap,
schema: &Schema,
attr_name: &str,
) -> Result<SortByAttr<'a>, SortByAttrError>
{
) -> Result<SortByAttr<'a>, SortByAttrError> {
SortByAttr::new(ranked_map, schema, attr_name, false)
}
@ -60,8 +58,7 @@ impl<'a> SortByAttr<'a> {
ranked_map: &'a RankedMap,
schema: &Schema,
attr_name: &str,
) -> Result<SortByAttr<'a>, SortByAttrError>
{
) -> Result<SortByAttr<'a>, SortByAttrError> {
SortByAttr::new(ranked_map, schema, attr_name, true)
}
@ -70,40 +67,47 @@ impl<'a> SortByAttr<'a> {
schema: &Schema,
attr_name: &str,
reversed: bool,
) -> Result<SortByAttr<'a>, SortByAttrError>
{
let attr = match schema.attribute(attr_name) {
Some(attr) => attr,
) -> Result<SortByAttr<'a>, SortByAttrError> {
let field_id = match schema.id(attr_name) {
Some(field_id) => field_id,
None => return Err(SortByAttrError::AttributeNotFound),
};
if !schema.props(attr).is_ranked() {
if !schema.is_ranked(field_id) {
return Err(SortByAttrError::AttributeNotRegisteredForRanking);
}
Ok(SortByAttr { ranked_map, attr, reversed })
Ok(SortByAttr {
ranked_map,
field_id,
reversed,
})
}
}
impl<'a> Criterion for SortByAttr<'a> {
fn evaluate(&self, lhs: &RawDocument, rhs: &RawDocument) -> Ordering {
let lhs = self.ranked_map.get(lhs.id, self.attr);
let rhs = self.ranked_map.get(rhs.id, self.attr);
impl Criterion for SortByAttr<'_> {
fn name(&self) -> &str {
"sort by attribute"
}
fn evaluate(&self, _ctx: &Context, lhs: &RawDocument, rhs: &RawDocument) -> Ordering {
let lhs = self.ranked_map.get(lhs.id, self.field_id);
let rhs = self.ranked_map.get(rhs.id, self.field_id);
match (lhs, rhs) {
(Some(lhs), Some(rhs)) => {
let order = lhs.cmp(&rhs);
if self.reversed { order.reverse() } else { order }
},
(None, Some(_)) => Ordering::Greater,
(Some(_), None) => Ordering::Less,
(None, None) => Ordering::Equal,
if self.reversed {
order.reverse()
} else {
order
}
}
(None, Some(_)) => Ordering::Greater,
(Some(_), None) => Ordering::Less,
(None, None) => Ordering::Equal,
}
}
fn name(&self) -> &'static str {
"SortByAttr"
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
@ -122,4 +126,4 @@ impl fmt::Display for SortByAttrError {
}
}
impl Error for SortByAttrError { }
impl Error for SortByAttrError {}

View File

@ -0,0 +1,55 @@
use std::cmp::Ordering;
use crate::{RawDocument, MResult};
use super::{Criterion, Context, ContextMut, prepare_query_distances};
pub struct Typo;
impl Criterion for Typo {
fn name(&self) -> &str { "typo" }
fn prepare<'h, 'p, 'tag, 'txn, 'q, 'r>(
&self,
ctx: ContextMut<'h, 'p, 'tag, 'txn, 'q>,
documents: &mut [RawDocument<'r, 'tag>],
) -> MResult<()>
{
prepare_query_distances(documents, ctx.query_mapping, ctx.postings_lists);
Ok(())
}
fn evaluate(&self, _ctx: &Context, lhs: &RawDocument, rhs: &RawDocument) -> Ordering {
// This function is a wrong logarithmic 10 function.
// It is safe to panic on input number higher than 3,
// the number of typos is never bigger than that.
#[inline]
fn custom_log10(n: u8) -> f32 {
match n {
0 => 0.0, // log(1)
1 => 0.30102, // log(2)
2 => 0.47712, // log(3)
3 => 0.60205, // log(4)
_ => panic!("invalid number"),
}
}
#[inline]
fn compute_typos(distances: &[Option<u8>]) -> usize {
let mut number_words: usize = 0;
let mut sum_typos = 0.0;
for distance in distances {
if let Some(distance) = distance {
sum_typos += custom_log10(*distance);
number_words += 1;
}
}
(number_words as f32 / (sum_typos + 1.0) * 1000.0) as usize
}
let lhs = compute_typos(&lhs.processed_distances);
let rhs = compute_typos(&rhs.processed_distances);
lhs.cmp(&rhs).reverse()
}
}

View File

@ -0,0 +1,31 @@
use std::cmp::Ordering;
use crate::{RawDocument, MResult};
use super::{Criterion, Context, ContextMut, prepare_query_distances};
pub struct Words;
impl Criterion for Words {
fn name(&self) -> &str { "words" }
fn prepare<'h, 'p, 'tag, 'txn, 'q, 'r>(
&self,
ctx: ContextMut<'h, 'p, 'tag, 'txn, 'q>,
documents: &mut [RawDocument<'r, 'tag>],
) -> MResult<()>
{
prepare_query_distances(documents, ctx.query_mapping, ctx.postings_lists);
Ok(())
}
fn evaluate(&self, _ctx: &Context, lhs: &RawDocument, rhs: &RawDocument) -> Ordering {
#[inline]
fn number_of_query_words(distances: &[Option<u8>]) -> usize {
distances.iter().cloned().filter(Option::is_some).count()
}
let lhs = number_of_query_words(&lhs.processed_distances);
let rhs = number_of_query_words(&rhs.processed_distances);
lhs.cmp(&rhs).reverse()
}
}

View File

@ -0,0 +1,37 @@
use std::cmp::Ordering;
use slice_group_by::GroupBy;
use crate::bucket_sort::SimpleMatch;
use crate::{RawDocument, MResult};
use super::{Criterion, Context, ContextMut, prepare_bare_matches};
pub struct WordsPosition;
impl Criterion for WordsPosition {
fn name(&self) -> &str { "words position" }
fn prepare<'h, 'p, 'tag, 'txn, 'q, 'r>(
&self,
ctx: ContextMut<'h, 'p, 'tag, 'txn, 'q>,
documents: &mut [RawDocument<'r, 'tag>],
) -> MResult<()>
{
prepare_bare_matches(documents, ctx.postings_lists, ctx.query_mapping);
Ok(())
}
fn evaluate(&self, _ctx: &Context, lhs: &RawDocument, rhs: &RawDocument) -> Ordering {
#[inline]
fn sum_words_position(matches: &[SimpleMatch]) -> usize {
let mut sum_words_position = 0;
for group in matches.linear_group_by_key(|bm| bm.query_index) {
sum_words_position += group[0].word_index as usize;
}
sum_words_position
}
let lhs = sum_words_position(&lhs.processed_matches);
let rhs = sum_words_position(&rhs.processed_matches);
lhs.cmp(&rhs)
}
}

File diff suppressed because it is too large Load Diff

View File

@ -1,5 +1,5 @@
use std::hash::Hash;
use hashbrown::HashMap;
use std::hash::Hash;
pub struct DistinctMap<K> {
inner: HashMap<K, usize>,

View File

@ -0,0 +1,158 @@
use crate::serde::{DeserializerError, SerializerError};
use serde_json::Error as SerdeJsonError;
use pest::error::Error as PestError;
use crate::filters::Rule;
use std::{error, fmt, io};
pub use bincode::Error as BincodeError;
pub use fst::Error as FstError;
pub use heed::Error as HeedError;
pub use pest::error as pest_error;
pub type MResult<T> = Result<T, Error>;
#[derive(Debug)]
pub enum Error {
Io(io::Error),
IndexAlreadyExists,
MissingPrimaryKey,
SchemaMissing,
WordIndexMissing,
MissingDocumentId,
MaxFieldsLimitExceeded,
Schema(meilisearch_schema::Error),
Zlmdb(heed::Error),
Fst(fst::Error),
SerdeJson(SerdeJsonError),
Bincode(bincode::Error),
Serializer(SerializerError),
Deserializer(DeserializerError),
UnsupportedOperation(UnsupportedOperation),
FilterParseError(PestError<Rule>)
}
impl From<io::Error> for Error {
fn from(error: io::Error) -> Error {
Error::Io(error)
}
}
impl From<PestError<Rule>> for Error {
fn from(error: PestError<Rule>) -> Error {
Error::FilterParseError(error.renamed_rules(|r| {
let s = match r {
Rule::or => "OR",
Rule::and => "AND",
Rule::not => "NOT",
Rule::string => "string",
Rule::word => "word",
Rule::greater => "field > value",
Rule::less => "field < value",
Rule::eq => "field = value",
Rule::leq => "field <= value",
Rule::geq => "field >= value",
Rule::key => "key",
_ => "other",
};
s.to_string()
}))
}
}
impl From<meilisearch_schema::Error> for Error {
fn from(error: meilisearch_schema::Error) -> Error {
Error::Schema(error)
}
}
impl From<HeedError> for Error {
fn from(error: HeedError) -> Error {
Error::Zlmdb(error)
}
}
impl From<FstError> for Error {
fn from(error: FstError) -> Error {
Error::Fst(error)
}
}
impl From<SerdeJsonError> for Error {
fn from(error: SerdeJsonError) -> Error {
Error::SerdeJson(error)
}
}
impl From<BincodeError> for Error {
fn from(error: BincodeError) -> Error {
Error::Bincode(error)
}
}
impl From<SerializerError> for Error {
fn from(error: SerializerError) -> Error {
Error::Serializer(error)
}
}
impl From<DeserializerError> for Error {
fn from(error: DeserializerError) -> Error {
Error::Deserializer(error)
}
}
impl From<UnsupportedOperation> for Error {
fn from(op: UnsupportedOperation) -> Error {
Error::UnsupportedOperation(op)
}
}
impl fmt::Display for Error {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
use self::Error::*;
match self {
Io(e) => write!(f, "{}", e),
IndexAlreadyExists => write!(f, "index already exists"),
MissingPrimaryKey => write!(f, "schema cannot be built without a primary key"),
SchemaMissing => write!(f, "this index does not have a schema"),
WordIndexMissing => write!(f, "this index does not have a word index"),
MissingDocumentId => write!(f, "document id is missing"),
MaxFieldsLimitExceeded => write!(f, "maximum number of fields in a document exceeded"),
Schema(e) => write!(f, "schema error; {}", e),
Zlmdb(e) => write!(f, "heed error; {}", e),
Fst(e) => write!(f, "fst error; {}", e),
SerdeJson(e) => write!(f, "serde json error; {}", e),
Bincode(e) => write!(f, "bincode error; {}", e),
Serializer(e) => write!(f, "serializer error; {}", e),
Deserializer(e) => write!(f, "deserializer error; {}", e),
UnsupportedOperation(op) => write!(f, "unsupported operation; {}", op),
FilterParseError(e) => write!(f, "error parsing filter; {}", e),
}
}
}
impl error::Error for Error {}
#[derive(Debug)]
pub enum UnsupportedOperation {
SchemaAlreadyExists,
CannotUpdateSchemaPrimaryKey,
CannotReorderSchemaAttribute,
CanOnlyIntroduceNewSchemaAttributesAtEnd,
CannotRemoveSchemaAttribute,
}
impl fmt::Display for UnsupportedOperation {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
use self::UnsupportedOperation::*;
match self {
SchemaAlreadyExists => write!(f, "Cannot update index which already have a schema"),
CannotUpdateSchemaPrimaryKey => write!(f, "Cannot update the primary key of a schema"),
CannotReorderSchemaAttribute => write!(f, "Cannot reorder the attributes of a schema"),
CanOnlyIntroduceNewSchemaAttributesAtEnd => {
write!(f, "Can only introduce new attributes at end of a schema")
}
CannotRemoveSchemaAttribute => write!(f, "Cannot remove attributes from a schema"),
}
}
}

View File

@ -0,0 +1,267 @@
use std::str::FromStr;
use std::cmp::Ordering;
use crate::error::Error;
use crate::{store::Index, DocumentId, MainT};
use heed::RoTxn;
use meilisearch_schema::{FieldId, Schema};
use pest::error::{Error as PestError, ErrorVariant};
use pest::iterators::Pair;
use serde_json::{Value, Number};
use super::parser::Rule;
#[derive(Debug)]
enum ConditionType {
Greater,
Less,
Equal,
LessEqual,
GreaterEqual,
NotEqual,
}
/// We need to infer type when the filter is constructed
/// and match every possible types it can be parsed into.
#[derive(Debug)]
struct ConditionValue<'a> {
string: &'a str,
boolean: Option<bool>,
number: Option<Number>
}
impl<'a> ConditionValue<'a> {
pub fn new(value: &Pair<'a, Rule>) -> Self {
let value = match value.as_rule() {
Rule::string | Rule::word => {
let string = value.as_str();
let boolean = match value.as_str() {
"true" => Some(true),
"false" => Some(false),
_ => None,
};
let number = Number::from_str(value.as_str()).ok();
ConditionValue { string, boolean, number }
},
_ => unreachable!(),
};
value
}
pub fn as_str(&self) -> &str {
self.string.as_ref()
}
pub fn as_number(&self) -> Option<&Number> {
self.number.as_ref()
}
pub fn as_bool(&self) -> Option<bool> {
self.boolean
}
}
#[derive(Debug)]
pub struct Condition<'a> {
field: FieldId,
condition: ConditionType,
value: ConditionValue<'a>
}
fn get_field_value<'a>(schema: &Schema, pair: Pair<'a, Rule>) -> Result<(FieldId, ConditionValue<'a>), Error> {
let mut items = pair.into_inner();
// lexing ensures that we at least have a key
let key = items.next().unwrap();
let field = schema
.id(key.as_str())
.ok_or::<PestError<Rule>>(PestError::new_from_span(
ErrorVariant::CustomError {
message: format!(
"attribute `{}` not found, available attributes are: {}",
key.as_str(),
schema.names().collect::<Vec<_>>().join(", ")
),
},
key.as_span()))?;
let value = ConditionValue::new(&items.next().unwrap());
Ok((field, value))
}
// undefined behavior with big numbers
fn compare_numbers(lhs: &Number, rhs: &Number) -> Option<Ordering> {
match (lhs.as_i64(), lhs.as_u64(), lhs.as_f64(),
rhs.as_i64(), rhs.as_u64(), rhs.as_f64()) {
// i64 u64 f64 i64 u64 f64
(Some(lhs), _, _, Some(rhs), _, _) => lhs.partial_cmp(&rhs),
(_, Some(lhs), _, _, Some(rhs), _) => lhs.partial_cmp(&rhs),
(_, _, Some(lhs), _, _, Some(rhs)) => lhs.partial_cmp(&rhs),
(_, _, _, _, _, _) => None,
}
}
impl<'a> Condition<'a> {
pub fn less(
item: Pair<'a, Rule>,
schema: &'a Schema,
) -> Result<Self, Error> {
let (field, value) = get_field_value(schema, item)?;
let condition = ConditionType::Less;
Ok(Self { field, condition, value })
}
pub fn greater(
item: Pair<'a, Rule>,
schema: &'a Schema,
) -> Result<Self, Error> {
let (field, value) = get_field_value(schema, item)?;
let condition = ConditionType::Greater;
Ok(Self { field, condition, value })
}
pub fn neq(
item: Pair<'a, Rule>,
schema: &'a Schema,
) -> Result<Self, Error> {
let (field, value) = get_field_value(schema, item)?;
let condition = ConditionType::NotEqual;
Ok(Self { field, condition, value })
}
pub fn geq(
item: Pair<'a, Rule>,
schema: &'a Schema,
) -> Result<Self, Error> {
let (field, value) = get_field_value(schema, item)?;
let condition = ConditionType::GreaterEqual;
Ok(Self { field, condition, value })
}
pub fn leq(
item: Pair<'a, Rule>,
schema: &'a Schema,
) -> Result<Self, Error> {
let (field, value) = get_field_value(schema, item)?;
let condition = ConditionType::LessEqual;
Ok(Self { field, condition, value })
}
pub fn eq(
item: Pair<'a, Rule>,
schema: &'a Schema,
) -> Result<Self, Error> {
let (field, value) = get_field_value(schema, item)?;
let condition = ConditionType::Equal;
Ok(Self { field, condition, value })
}
pub fn test(
&self,
reader: &RoTxn<MainT>,
index: &Index,
document_id: DocumentId,
) -> Result<bool, Error> {
match index.document_attribute::<Value>(reader, document_id, self.field)? {
Some(Value::String(s)) => {
let value = self.value.as_str();
match self.condition {
ConditionType::Equal => Ok(unicase::eq(value, &s)),
ConditionType::NotEqual => Ok(!unicase::eq(value, &s)),
_ => Ok(false)
}
},
Some(Value::Number(n)) => {
if let Some(value) = self.value.as_number() {
if let Some(ord) = compare_numbers(&n, value) {
let res = match self.condition {
ConditionType::Equal => ord == Ordering::Equal,
ConditionType::NotEqual => ord != Ordering::Equal,
ConditionType::GreaterEqual => ord != Ordering::Less,
ConditionType::LessEqual => ord != Ordering::Greater,
ConditionType::Greater => ord == Ordering::Greater,
ConditionType::Less => ord == Ordering::Less,
};
return Ok(res)
}
}
Ok(false)
},
Some(Value::Bool(b)) => {
if let Some(value) = self.value.as_bool() {
return match self.condition {
ConditionType::Equal => Ok(b == value),
ConditionType::NotEqual => Ok(b != value),
_ => Ok(false)
}
}
Ok(false)
},
_ => Ok(false),
}
}
}
#[cfg(test)]
mod test {
use super::*;
use serde_json::Number;
use std::cmp::Ordering;
#[test]
fn test_number_comp() {
// test both u64
let n1 = Number::from(1u64);
let n2 = Number::from(2u64);
assert_eq!(Some(Ordering::Less), compare_numbers(&n1, &n2));
assert_eq!(Some(Ordering::Greater), compare_numbers(&n2, &n1));
let n1 = Number::from(1u64);
let n2 = Number::from(1u64);
assert_eq!(Some(Ordering::Equal), compare_numbers(&n1, &n2));
// test both i64
let n1 = Number::from(1i64);
let n2 = Number::from(2i64);
assert_eq!(Some(Ordering::Less), compare_numbers(&n1, &n2));
assert_eq!(Some(Ordering::Greater), compare_numbers(&n2, &n1));
let n1 = Number::from(1i64);
let n2 = Number::from(1i64);
assert_eq!(Some(Ordering::Equal), compare_numbers(&n1, &n2));
// test both f64
let n1 = Number::from_f64(1f64).unwrap();
let n2 = Number::from_f64(2f64).unwrap();
assert_eq!(Some(Ordering::Less), compare_numbers(&n1, &n2));
assert_eq!(Some(Ordering::Greater), compare_numbers(&n2, &n1));
let n1 = Number::from_f64(1f64).unwrap();
let n2 = Number::from_f64(1f64).unwrap();
assert_eq!(Some(Ordering::Equal), compare_numbers(&n1, &n2));
// test one u64 and one f64
let n1 = Number::from_f64(1f64).unwrap();
let n2 = Number::from(2u64);
assert_eq!(Some(Ordering::Less), compare_numbers(&n1, &n2));
assert_eq!(Some(Ordering::Greater), compare_numbers(&n2, &n1));
// equality
let n1 = Number::from_f64(1f64).unwrap();
let n2 = Number::from(1u64);
assert_eq!(Some(Ordering::Equal), compare_numbers(&n1, &n2));
assert_eq!(Some(Ordering::Equal), compare_numbers(&n2, &n1));
// float is neg
let n1 = Number::from_f64(-1f64).unwrap();
let n2 = Number::from(1u64);
assert_eq!(Some(Ordering::Less), compare_numbers(&n1, &n2));
assert_eq!(Some(Ordering::Greater), compare_numbers(&n2, &n1));
// float is too big
let n1 = Number::from_f64(std::f64::MAX).unwrap();
let n2 = Number::from(1u64);
assert_eq!(Some(Ordering::Greater), compare_numbers(&n1, &n2));
assert_eq!(Some(Ordering::Less), compare_numbers(&n2, &n1));
// misc
let n1 = Number::from_f64(std::f64::MAX).unwrap();
let n2 = Number::from(std::u64::MAX);
assert_eq!(Some(Ordering::Greater), compare_numbers(&n1, &n2));
assert_eq!(Some( Ordering::Less ), compare_numbers(&n2, &n1));
}
}

Some files were not shown because too many files have changed in this diff Show More