Compare commits

...

292 Commits

Author SHA1 Message Date
685016bfec Bump meilidb-core to v0.7.0 and meilidb-http to v0.2.0 2019-11-18 15:49:23 +01:00
d30e5f6231 Merge pull request #299 from meilisearch/default-update-callbacks
Prefer using a global update callback common to all indexes
2019-11-18 15:05:21 +01:00
e854d67a55 Remove useless routes and checks 2019-11-18 14:41:49 +01:00
23a89732a5 Prefer using a global update callback common to all indexes 2019-11-18 14:41:49 +01:00
3a1f41ebdb Merge pull request #305 from meilisearch/fix-example
Make easier to interact with compacted databases
2019-11-17 20:31:06 +01:00
f873761a27 Make easier to interact with compacted databases 2019-11-17 20:01:02 +01:00
ebf620c7f9 Merge pull request #302 from meilisearch/fix-dataset-schema
Rename the movies dataset schema file
2019-11-17 17:17:33 +01:00
8b92bc3421 Rename the movies dataset schema file 2019-11-17 16:45:13 +01:00
70a5aa61e9 Merge pull request #301 from meilisearch/separate-types
Move the main types to a separate library
2019-11-17 12:45:25 +01:00
a76169042f Make the serde and zerocopy meilidb-types dependencies optional 2019-11-17 12:30:39 +01:00
c9c3cfcee9 Move the main types to a separate library 2019-11-17 12:19:36 +01:00
2e60ac5359 Merge pull request #300 from meilisearch/update-dependencies
Do not use a forked fst dependency
2019-11-17 12:19:08 +01:00
2dd7751e09 Disable the fst MemMap feature 2019-11-17 11:43:00 +01:00
26bdabcdec Do not use a forked fst dependency 2019-11-17 11:14:01 +01:00
fc8c7ed77e Merge pull request #297 from meilisearch/improve-highlights
Improve the highlight formatted outputs
2019-11-15 14:28:27 +01:00
521c96354f Improve the highlight formatted outputs 2019-11-15 14:16:21 +01:00
9788779894 Merge pull request #296 from meilisearch/update-readme
Update the README
2019-11-14 21:32:32 +01:00
9b965764ab Update the README 2019-11-14 19:09:04 +01:00
9a5a543311 Merge pull request #290 from curquiza/deploy-doc
Add information in documentation in Deploy Server part
2019-11-13 16:06:27 +01:00
b18fb868e8 Add information in documentation in Deploy Server part 2019-11-13 15:37:21 +01:00
c734af55c0 Merge pull request #289 from curquiza/status204-delete-index
Change the HTTP status code on index deletion
2019-11-13 15:33:27 +01:00
810b328ad2 Change the HTTP status code on index deletion 2019-11-13 15:14:23 +01:00
0a8039d8d8 Merge pull request #285 from bidoubiwa/remove_catching_same_index_creation
Change the error catching on the index creation route
2019-11-13 15:13:51 +01:00
e51704c09a Remove the error catching on the index creation route when the index already exist 2019-11-13 14:42:59 +01:00
623a9012d5 Merge pull request #279 from bidoubiwa/new_slogan_and_resume
Slogan and Resume proposition
2019-11-13 14:41:21 +01:00
b9a185634f Slogan and Resume proposition 2019-11-13 14:31:22 +01:00
b46889b5f0 Merge pull request #282 from meilisearch/fix-ci-artifacts
Add the meilidb-http binary to the artifacts
2019-11-13 11:39:00 +01:00
ef9a0c07db Add the meilidb-http binary to the artifacts 2019-11-13 11:15:39 +01:00
3a6f3947c9 Merge pull request #281 from meilisearch/fix-attributes-to-search-in
Take attributes to search in into account
2019-11-12 18:45:40 +01:00
5c5f41d755 Take attributes to search in into account 2019-11-12 18:35:58 +01:00
6803a8fad0 Merge pull request #280 from meilisearch/format-updates-json
Format updates json
2019-11-12 18:35:25 +01:00
8e4b362e4d Fixed the display of enqueued updates 2019-11-12 18:21:59 +01:00
acb5e624c6 Add enqueued and processed datetimes 2019-11-12 18:21:59 +01:00
a98949ff1d Improve updates JSON format 2019-11-12 16:57:22 +01:00
f355280250 Merge pull request #278 from meilisearch/mit-license
Change the license to an MIT one
2019-11-12 14:35:32 +01:00
cee8d6a8d9 Change the license to an MIT one 2019-11-12 14:24:28 +01:00
27326ea069 Merge pull request #277 from bidoubiwa/add_cmd_to_compile
Add cmd line to compile binary
2019-11-12 13:55:54 +01:00
7bbe5aca5b Add cmd line to compile binary 2019-11-12 10:57:03 +01:00
1c4afe6d0f Merge pull request #276 from meilisearch/support-slash-tokenizer
Add support for back/slashes
2019-11-11 21:46:14 +01:00
2d8f9a9849 Add support for back/slashes 2019-11-11 21:23:08 +01:00
3f41681b18 Merge pull request #274 from meilisearch/enable-env-logger
Add env logger to enable logging
2019-11-11 19:13:33 +01:00
64791815fa Add env logger to enable logging 2019-11-11 19:03:38 +01:00
8a36571a74 Merge pull request #272 from meilisearch/fix-long-words
Ignore words that are too long
2019-11-10 20:07:22 +01:00
d18e775bec Ignore words that are too long 2019-11-10 17:44:27 +01:00
78381f1818 Merge pull request #271 from meilisearch/update-dependencies
Update Dependencies
2019-11-10 11:17:09 +01:00
7f33a01ae1 Update dependencies 2019-11-10 11:04:56 +01:00
d07d14d33a Update crossbeam-channel to 0.4.0 2019-11-10 11:03:22 +01:00
540d7886ab Merge pull request #266 from meilisearch/update-readme
Update the readme and add a Quick Start section
2019-11-09 13:21:22 +01:00
5a5d10af52 Add an image description of the gif 2019-11-09 13:12:01 +01:00
f95d077ef8 Improve the README a little bit by adding a quick start section 2019-11-09 13:12:01 +01:00
05dd99936f Add a gif to show a demo using crates.io 2019-11-09 12:59:39 +01:00
c086625773 Merge pull request #269 from meilisearch/repo-became-binary
Make the repository be a binary and version the Cargo.lock
2019-11-09 12:58:52 +01:00
dc17bebf4a Make the repository be a binary and version the Cargo.lock 2019-11-09 12:13:28 +01:00
026464b2e4 Bump meilidb-core to v0.6.5 2019-11-06 11:52:34 +01:00
bd42158a70 Merge pull request #264 from meilisearch/index-soft-deletion
Index soft deletion
2019-11-06 11:51:50 +01:00
df066f4321 Introduce a new add or update documents PUT route 2019-11-06 11:42:41 +01:00
69832e8c70 Update the http index deletion route 2019-11-06 11:42:41 +01:00
95eb6ad09a Add a test to check index soft deletion works correctly 2019-11-06 11:02:30 +01:00
f3fc0bed45 Introduce index soft deletion 2019-11-06 11:02:30 +01:00
5dd6b697b9 Bump meilidb-core to v0.6.4 2019-11-05 18:46:16 +01:00
b7d170c7d1 Merge pull request #262 from meilisearch/fix-unidecoded-emojis
Fix an highlighting problem
2019-11-05 17:04:35 +01:00
7541172d12 Make the example show highlighted areas more explicitly 2019-11-05 16:40:48 +01:00
85bf5d113c Fix an highlighting problem when query was longer than original text 2019-11-05 16:40:34 +01:00
89fd397903 Bump meilidb-core to v0.6.3 2019-11-05 15:40:04 +01:00
d8392f2f18 Merge pull request #261 from meilisearch/partial-updates
Introduce the support of partial updates
2019-11-05 15:39:02 +01:00
36b74f0efe Introduce partial updates to the update system 2019-11-05 15:23:41 +01:00
68c0a36b00 Make the deserialization support correctly optional documents 2019-11-05 15:03:18 +01:00
a127b72a74 Merge pull request #259 from meilisearch/allow-add-schema-attributes-at-end
Allow to introduce attributes only at the end of a schema
2019-11-05 12:34:11 +01:00
5782fb9e52 Test the add of attributes only at the end of a schema 2019-11-05 12:09:52 +01:00
20319f7974 Allow to introduce attributes only at the end of a schema 2019-11-05 12:09:52 +01:00
c4087e2ec2 Merge pull request #258 from meilisearch/debug-schema
Implement a better debug for the schema
2019-11-05 11:35:02 +01:00
b1d1f2f627 Implement a better debug system for the schema 2019-11-05 11:21:07 +01:00
62fe6a8263 Merge pull request #257 from meilisearch/bump-version
Bump meilidb-core/tokenizer versions
2019-11-04 17:26:01 +01:00
d88c10f3b4 Bump meilidb-tokenizer to v0.6.1 2019-11-04 17:17:06 +01:00
00f49990c7 Bump meilidb-core to v0.6.2 2019-11-04 17:16:50 +01:00
89f30ad47e Merge pull request #256 from meilisearch/fix-tokenizer
Fix the tokenizer to make it work with unicode chars
2019-11-04 17:15:17 +01:00
3b1cbed238 Check that the unidecoded words are not empty 2019-11-04 17:03:11 +01:00
4571b80a49 Update the tests 2019-11-04 16:41:58 +01:00
de2b8672d4 Make the tokenizer understand strange whitespaces/quotes 2019-11-04 16:41:58 +01:00
ccded7b429 Improve the indexer to not not deunicode before indexing
Revert of #179
2019-11-04 16:41:58 +01:00
1d4e98410a Merge pull request #255 from meilisearch/bump-version
Bump meilidb-core to v0.6.1
2019-11-04 14:47:53 +01:00
e493b27ef1 Bump meilidb-core to v0.6.1 2019-11-04 14:22:08 +01:00
70589c136f Merge pull request #253 from meilisearch/fix-updates-system
Fix the updates system
2019-11-04 13:46:37 +01:00
1c3620a7d4 Add tests to the update system 2019-11-04 13:18:07 +01:00
c2cc0704d7 Clean up the update_awaiter function 2019-11-04 11:11:58 +01:00
2a50e08bb8 Moving to heed v0.5.0 2019-11-04 10:49:27 +01:00
6b326a45d7 Fix the update system to always consume updates even if failing 2019-10-31 17:44:13 +01:00
b73874bf24 Merge pull request #252 from meilisearch/examples-specify-index-name
Allow users to specify the index name to use with examples bins
2019-10-31 17:02:00 +01:00
95c8ad0f80 Allow users to specify the index name to use with examples bins 2019-10-31 16:20:31 +01:00
996763cc52 Merge pull request #251 from meilisearch/update-heed
Moving to heed 0.3.0
2019-10-31 16:20:07 +01:00
6a8171d335 Moving to heed 0.3.0 2019-10-31 16:11:02 +01:00
2f32586dab Merge pull request #250 from meilisearch/new-http-server
Introduce a brand new HTTP server
2019-10-31 16:07:52 +01:00
db898001eb Get rid of rust-crypto and uuid 2019-10-31 15:28:37 +01:00
c2a12b661a Make it a runnable server 2019-10-31 15:27:21 +01:00
f51c49db93 Introduce the HTTP tide based library 2019-10-31 15:02:34 +01:00
1be5b0f327 Bump the meili-core/schema/tokenizer crates to 0.6.0 2019-10-31 14:05:59 +01:00
a136c62208 Merge pull request #249 from meilisearch/display-all-updates
Display enqueued along with processed updates
2019-10-31 13:53:46 +01:00
cc461b1331 Display enqueued along with processed updates 2019-10-31 12:25:52 +01:00
dbe5363672 Merge pull request #248 from meilisearch/fix-highlight-too-long
Correctly highlight when query string is too long
2019-10-30 18:19:06 +01:00
45d4361e7d Correctly highlight when query string is longer 2019-10-30 17:49:50 +01:00
b28c44cc6b Merge pull request #247 from meilisearch/bump-meilidb
Bump the meili-core/schema/tokenizer crates to 0.5.11
2019-10-30 17:48:26 +01:00
b709a7a30a Bump the meili-core/schema/tokenizer crates to 0.5.11 2019-10-30 17:40:31 +01:00
64c25bdb40 Merge pull request #246 from meilisearch/better-highlighting-area
Make the highlight system much better
2019-10-30 17:39:12 +01:00
c230f244be Make the highlight system much better 2019-10-30 17:32:29 +01:00
02af4ff113 Merge pull request #245 from meilisearch/reindex-all-documents-reduce-memory-usage
Reduce the ram consumption when re-indexing all the documents
2019-10-29 17:54:47 +01:00
4dff8a215e Reduce the ram consumption when re-indexing all the documents 2019-10-29 17:46:23 +01:00
41065305aa Merge pull request #244 from meilisearch/reintroduce-stop-words
Reintroduce stop words
2019-10-29 16:35:03 +01:00
e9dce3ce81 Add a test to ensure that the indexer support stop words 2019-10-29 16:18:06 +01:00
ff7dde7522 Make the RawIndexer support stop words 2019-10-29 16:18:06 +01:00
a226fd23c3 Introduce the stop words deletion update type 2019-10-29 16:18:06 +01:00
776673ebae Introduce the stop words addition update type 2019-10-29 15:24:09 +01:00
32d2cc3aea Merge pull request #243 from meilisearch/all-updates-results
Introduce a function to get all updates results
2019-10-29 11:45:55 +01:00
8a17fcdda5 Introduce a function to get all updates results 2019-10-29 11:37:40 +01:00
9602d7a960 Merge pull request #242 from meilisearch/accept-dup-documents
Make documents additions accept only the last duplicate document
2019-10-28 20:52:40 +01:00
ac12a4b9c9 Make documents additions accept only the last duplicate document 2019-10-28 20:40:33 +01:00
af96050944 Merge pull request #241 from meilisearch/fix-dead-locks
Fix dead locks
2019-10-28 18:20:01 +01:00
a43b37dfc1 Send channel notification when clearing documents 2019-10-28 17:58:22 +01:00
c08dcac1d4 Abort the update transaction before calling the update callback 2019-10-28 17:55:43 +01:00
a17dccd84e Merge pull request #237 from meilisearch/fix-exactness-criterion
Fix the exactness criterion algorithm
2019-10-26 18:43:10 +02:00
9a57cab3ee Fix the exactness criterion algorithm 2019-10-26 18:34:40 +02:00
751b060320 Merge pull request #238 from meilisearch/improve-highlighting
Only highlight query words areas not the whole words
2019-10-26 18:23:19 +02:00
4111b99a6d Only highlight query words areas not the whole words 2019-10-26 15:56:34 +02:00
d6fb2b56d1 Merge pull request #236 from meilisearch/reorder-automatons
Make sure that automatons group with more automatons are better
2019-10-24 15:29:16 +02:00
cb5c77e536 Make sure that automatons group with more automatons are better 2019-10-24 15:18:53 +02:00
44c89b1ea2 Merge pull request #235 from meilisearch/readme-concat-split-query-words
Add information about search concat and split query words support
2019-10-23 18:20:59 +02:00
26a285053b Add information about search concat and split query words support 2019-10-23 18:19:15 +02:00
1446a6a2d2 Merge pull request #234 from meilisearch/clear-all-update-variant
Introduce a clear all documents update
2019-10-23 16:45:37 +02:00
047eba3ff3 Introduce a clear all documents update 2019-10-23 16:39:10 +02:00
8d9d183ce6 Merge pull request #233 from meilisearch/commit-when-update-ok
Commit an update only when it is Ok
2019-10-23 16:07:48 +02:00
eb67195840 Commit an update only when it is Ok 2019-10-23 15:52:40 +02:00
93306c2326 Merge pull request #232 from meilisearch/support-splitted-words
Support splitted words
2019-10-23 13:38:16 +02:00
7d9cf8d713 Clean up the fetch algorithm 2019-10-23 12:06:21 +02:00
03eb7898e7 Introduce a basic working version of phrase query for splitting words 2019-10-23 11:40:13 +02:00
0fbd4cd632 Merge pull request #231 from meilisearch/recursive-object-indexing
Make possible to convert recursive object into strings
2019-10-22 16:20:10 +02:00
858bf359b8 Make possible to convert recursive object into strings 2019-10-22 16:02:02 +02:00
5dc8465ebd Merge pull request #181 from meilisearch/diff-schema
Make possible to update an index schema
2019-10-22 14:23:43 +02:00
0f30a221fa Introduce the reindex_all_documents indexing function 2019-10-22 14:07:27 +02:00
e86a547e93 Introduce a basic schema diff function 2019-10-21 17:57:32 +02:00
32d8b4b83f Merge pull request #230 from meilisearch/moving-to-heed
Move to heed 0.1.0
2019-10-21 13:34:06 +02:00
78535b3e33 Move to heed 0.1.0 2019-10-21 12:05:53 +02:00
6c9a238973 Merge pull request #229 from meilisearch/cargo-fmt-clippy
Cargo pass of fmt and clippy
2019-10-18 13:50:30 +02:00
cf5e228288 Update the CI to check the fmt and clippy 2019-10-18 13:33:38 +02:00
9dce41ed6b Cargo clippy pass 2019-10-18 13:30:06 +02:00
ca26a0f2e4 Cargo fmt pass 2019-10-18 13:30:06 +02:00
47d777c8f7 Merge pull request #228 from meilisearch/copy-and-compact-db
Introduce a function to copy and compact a database env
2019-10-18 13:21:55 +02:00
2ef51f7df9 Introduce a function to copy and compact a database env 2019-10-18 12:56:56 +02:00
2d7db2a80f Merge pull request #227 from meilisearch/damerau-distance-cost-1
Make the levenshtein algorithm consider transpositions to cost 1
2019-10-18 10:46:42 +02:00
526202ec8b Make the levenshtein algorithm consider transpositions to cost 1 2019-10-17 18:07:15 +02:00
86ab729356 Merge pull request #226 from meilisearch/fix-rotxn-number-documents
Use a read-only transaction to retrieve the number of documents
2019-10-17 17:39:56 +02:00
dd74af4c70 Use an RoTxn to retrieve the number of documents 2019-10-17 17:30:54 +02:00
b79a8457f9 Merge pull request #225 from meilisearch/improve-query-builder-pattern
Rework the QueryBuilder to make it easier to construct and use
2019-10-17 15:59:38 +02:00
d941c512db Rework the QueryBuilder to make it easier to construct and use 2019-10-17 14:45:21 +02:00
0ff73039e5 Merge pull request #224 from meilisearch/improve-automaton-producer
Improve the automaton producer
2019-10-17 13:51:44 +02:00
2ea3e9b081 Improve the automaton producer quality by changing the production order 2019-10-17 13:19:08 +02:00
da71821204 Make the example take the fetch-timeout-ms argument into account 2019-10-17 13:19:08 +02:00
16f0914f09 Merge pull request #223 from meilisearch/fix-update-serialization
Fix updates serialization to use serde_json instead of bincode
2019-10-17 13:05:25 +02:00
1cf6afad9a Fix updates serialization to use serde_json instead of bincode 2019-10-17 12:31:46 +02:00
261c21b057 Merge pull request #222 from meilisearch/update-readme
Update the README
2019-10-16 18:22:09 +02:00
925a22b644 Update the README 2019-10-16 18:04:45 +02:00
dc5c42821e Merge pull request #221 from meilisearch/zerocopy-lmdb
Moving to zerocopy-lmdb
2019-10-16 17:27:21 +02:00
1667e1b32f Move to zerocopy-lmdb 2019-10-16 17:12:55 +02:00
c332c7bc70 Merge pull request #220 from meilisearch/all-documents-fields-iter
Introduce an Iterator to visit all documents attributes counts
2019-10-15 15:42:30 +02:00
5e8d432614 Introduce an Iterator to visit all documents attributes counts 2019-10-15 15:27:18 +02:00
f6282ca031 Merge pull request #219 from meilisearch/current-update-id
Introduce an Index mathod to retrieve the currently processed update
2019-10-15 15:26:22 +02:00
3278d22279 Introduce an Index mathod to retrieve the currently processed update 2019-10-15 14:54:52 +02:00
c9618793e3 Merge pull request #218 from meilisearch/update-readme
Change the README to refer to LMDB instead of RocksDB
2019-10-15 11:40:10 +02:00
1ef785a9ef Change the README to refer to LMDB instead of RocksDB 2019-10-15 11:39:49 +02:00
fdc98f9ef3 Merge pull request #217 from meilisearch/improve-exactness-criterion
Improve the exactness criterion
2019-10-15 11:37:33 +02:00
0de37819b4 Simplify the document fields counts deletion 2019-10-15 11:17:23 +02:00
9ff92c5d15 Update the exact criterion to use the documents fields counts 2019-10-14 18:48:54 +02:00
e629f51af4 Use the documents_fileds_count store in the QueryBuilder 2019-10-14 18:48:32 +02:00
b377003192 Compute and store the number of words in documents fields 2019-10-14 14:07:10 +02:00
a7e40a78c1 Introduce the DocumentsFieldsCounts store 2019-10-14 14:06:34 +02:00
9cdda8c46a Make the RawIndexer index_text method return the number of words 2019-10-14 13:56:52 +02:00
b7ea812dcc Merge pull request #216 from meilisearch/get-ride-of-messagepack
Get ride of rust messagepack (rmp)
2019-10-11 16:41:37 +02:00
710ab2386c Get ride of rust messagepack (rmp) 2019-10-11 16:17:37 +02:00
81bf6d583d Merge pull request #214 from meilisearch/add-customs-updates
Add customs updates
2019-10-11 15:42:08 +02:00
02575a2ef6 Introduce customs updates 2019-10-11 15:33:35 +02:00
da6ab2753e Rename Update/Type SchemaUpdate into Schema 2019-10-11 13:49:17 +02:00
97de72de83 Merge pull request #213 from meilisearch/do-not-commit-ourselves
Do not commit updates, let the user do
2019-10-11 11:51:51 +02:00
12b80e08be Do not commit updates, let the user do 2019-10-11 11:29:47 +02:00
4b130fa2e5 Merge pull request #212 from meilisearch/fix-documents-ids-iter
Fix the DocumentsIdsIter and do not iter on an Option
2019-10-10 18:43:01 +02:00
9dca18f966 Fix the DocumentsIdsIter and do not iter on an Option 2019-10-10 18:32:22 +02:00
543b65b09b Merge pull request #211 from meilisearch/fix-documents-deletion-generic-param
Reemove the useless generic documents_deletion parameter
2019-10-10 17:09:49 +02:00
9eb27811b1 Remove the useless generic documents_deletion parameter 2019-10-10 16:16:53 +02:00
7c3d93e5da Merge pull request #210 from meilisearch/query-builder-with-criteria
Rename main_store into common_store
2019-10-10 15:40:56 +02:00
485480560a Add method to create a query builder along with criterion 2019-10-10 15:32:08 +02:00
0ac927794a Merge pull request #209 from meilisearch/rename-main-to-common-index
Rename main_store into common_store
2019-10-10 15:31:25 +02:00
e09d3b654d Rename main_store into common_store 2019-10-10 15:22:23 +02:00
c5af5de4f0 Merge pull request #208 from meilisearch/improve-open-or-create-index
Create two open and create index functions
2019-10-10 13:59:08 +02:00
19c22a8c5e Create two open and create index functions 2019-10-10 13:48:30 +02:00
0103c7bfd9 Merge pull request #207 from meilisearch/improve-documents-ids-iter
Improve the DocumentsIdsIter internal
2019-10-10 13:48:13 +02:00
7b26bd88c0 Improve the DocumentsIdsIter internal 2019-10-10 13:40:18 +02:00
da0168bd82 Merge pull request #206 from meilisearch/get-documents-ids
Introduce the DocumentsIds iterator
2019-10-10 10:54:21 +02:00
d1e59be46b Introduce the DocumentsIds iterator 2019-10-10 10:35:57 +02:00
9774db6011 Merge pull request #205 from meilisearch/expose-types
Expose the UpdateType
2019-10-10 10:35:42 +02:00
46c19dfc5a Expose the UpdateType 2019-10-10 10:24:41 +02:00
9ed6752573 Merge pull request #204 from meilisearch/optional-query-builder-timeout
Make the timeout QueryBuilder setting optional to and pass the tests
2019-10-09 18:17:52 +02:00
d8fdad1455 Make the timeout QueryBuilder setting optional to and pass the tests 2019-10-09 17:59:31 +02:00
f56636e1e9 Merge branch 'moving-to-lmdb' 2019-10-09 17:23:48 +02:00
03599f1fc9 Reintroduce the deep-dive and typos-ranking-rules explanations documents 2019-10-09 16:57:27 +02:00
be78ecbf9a Update the README to recall about LMDB 2019-10-09 16:55:07 +02:00
ba2b04ca89 Update ci with rust nightly only 2019-10-09 16:47:25 +02:00
121399f336 Add a movies example dataset to the repository 2019-10-09 16:46:11 +02:00
3fded51534 Update the README file to reflect the current repository 2019-10-09 16:46:11 +02:00
8f63ec39da Unrestrict static lifetime of Criterion names 2019-10-09 16:15:31 +02:00
5a1c1aeb02 Reintroduce the sort-by-attr criterion 2019-10-09 16:08:30 +02:00
6ec575f8de Use a buffered sync channel to avoid blocking the update system 2019-10-09 15:49:35 +02:00
683b6afbfb Introduce a way to filter documents with a basic syntax 2019-10-09 14:20:37 +02:00
663714bb6d Make the example return documents field in a consistent order 2019-10-09 13:48:33 +02:00
bb35ca0d40 Reintroduce the distinct and filtering of documents 2019-10-09 13:44:18 +02:00
5f3072e67e Support a basic update callback system 2019-10-09 11:45:19 +02:00
2a4707d51e Expose a function to be able to now the status of an update 2019-10-08 17:35:47 +02:00
6534a9ec1d Clean up many warning messages 2019-10-08 17:31:07 +02:00
0a5ad4db06 Move the push update functions to their related modules 2019-10-08 17:24:11 +02:00
6ee0d72c7b Expose the synonyms operation updates on the Index 2019-10-08 17:18:22 +02:00
ba32ce21d0 Introduce synonyms deletions updates 2019-10-08 17:16:48 +02:00
0e224efa46 Introduce synonyms additions updates 2019-10-08 17:06:56 +02:00
175461c13a Port all tests to the TempDatabase struct 2019-10-08 16:16:30 +02:00
c514692233 Introduce the TempDatabase in the QueryBuilder tests 2019-10-08 15:22:36 +02:00
d8d0442d63 Fix many indexing and searching related bugs 2019-10-08 14:56:14 +02:00
2236ebbd42 Introduce an example file to test indexing and searching csv documents 2019-10-08 14:48:48 +02:00
0bfba3e4ba Introduce a query_builder method on Index 2019-10-07 17:55:46 +02:00
a57a64823e Make possible to create an index and add a schema later on 2019-10-07 17:48:26 +02:00
aa05459e4f Introduce a background thread that manage updates to do 2019-10-07 16:16:04 +02:00
0615c5c52d Consume updates in the order of insertion 2019-10-07 15:00:28 +02:00
487411340a Prefix all the store names to avoid colliding with main stores 2019-10-07 10:56:55 +02:00
5139dc7f3e Let the caller commit/abort the operation 2019-10-07 10:52:45 +02:00
88d0d3931c Store the schema in the main index 2019-10-04 17:49:13 +02:00
df2ef8d2e1 Introduce update_task, popping an update and pushing the result of it 2019-10-04 17:49:13 +02:00
29229b2137 Remove the update from the database when popped out 2019-10-04 17:16:34 +02:00
851cc38216 Introduce the Database struct to manage indexes 2019-10-04 16:49:17 +02:00
effbbc7370 Load the indexes at startup 2019-10-04 13:26:33 +02:00
08e3f23408 Add the meilidb-schema/tokenizer projects 2019-10-04 10:29:44 +02:00
62a0aefe44 Make the project be a workspace 2019-10-04 10:26:32 +02:00
3476939b7e Prefer using the impl syntax 2019-10-04 10:21:09 +02:00
38e474deaf Introduce the MResult type 2019-10-03 17:33:15 +02:00
00c70d3cb5 Make the UpdatesResults store work 2019-10-03 16:54:37 +02:00
af9fd9f552 Make the Updates store work 2019-10-03 16:39:30 +02:00
0a731973b9 Made many stores do their jobs 2019-10-03 16:13:14 +02:00
c4bd13bcdf Introduce many SingleStore wrappers 2019-10-03 15:04:11 +02:00
a5bfbf244c Introduce the documents Deserializer 2019-10-03 11:49:13 +02:00
39e0d9fc4a Introduce a basically working rkv based MeiliDB 2019-10-02 17:35:18 +02:00
905bc5c1a6 Initial commit 2019-10-02 17:35:05 +02:00
0f395d43a0 Merge pull request #201 from meilisearch/updates-ids-api
Add more methods for updates process
2019-09-26 16:08:22 +02:00
0b5b7b0bf1 feat: add a method to get the current processed update id & next updates in queue 2019-09-26 15:50:16 +02:00
57dd679026 Merge pull request #199 from meilisearch/fix-soft-hard-separator
Do not consider underscores and middle dash hard separators
2019-09-24 23:09:38 +02:00
cdd69290c3 test: Make the tests work with new separator limits 2019-09-24 20:49:42 +02:00
175b3dcb75 fix: Do not consider underscores and middle dash hard 2019-09-24 20:14:20 +02:00
ca818e12a9 Merge pull request #198 from meilisearch/split-by-underscore
Support underscores and colon as split characters
2019-09-24 14:16:02 +02:00
6b9426a051 feat: Support underscore as a split character 2019-09-24 13:56:32 +02:00
cee5e50857 Merge pull request #197 from meilisearch/log-info-to-trace
Change logs in query_builder from info! to trace!
2019-09-24 13:48:46 +02:00
3fe346101b chore: change logs in query_builder from info! to trace! 2019-09-24 13:35:46 +02:00
87e5998489 Merge pull request #194 from meilisearch/set-code-public
Set code public
2019-09-19 18:25:13 +02:00
d7d1b6ff02 chore: reformat tests 2019-09-19 18:08:25 +02:00
7073b42afa feat: get update status Enqueued / Processed / Unknown 2019-09-19 18:08:14 +02:00
120d209e66 chore: set public SchemaProps values 2019-09-19 12:43:36 +02:00
62e981c6b8 chore: set public the main duration on update status 2019-09-19 12:43:36 +02:00
941302a4be chore: export ranked map 2019-09-19 12:43:36 +02:00
20f423268e chore: re-export database::Error type 2019-09-19 12:43:36 +02:00
522013425b chore: export a getter for synonyms 2019-09-19 12:43:35 +02:00
e3c413759f chore: implement deref on CommonIndex 2019-09-19 12:43:35 +02:00
6ed97d1c19 chore: re-export UpdateType/DetailedDuration/UpdateStatus 2019-09-19 12:43:35 +02:00
53ad1fc068 chore: split tests into multiples files 2019-09-19 12:43:35 +02:00
1e2ef06c5c Merge pull request #196 from meilisearch/fix-cf-handle-creation
Create the Column Family only when it doesn't already exist
2019-09-19 12:29:50 +02:00
9db86f13f3 fix: Only create the Column Family when it doesn't already exist 2019-09-19 12:02:34 +02:00
369461e635 Merge pull request #195 from meilisearch/update-readme
Update the README
2019-09-19 12:01:09 +02:00
d2d22ac76d doc: Update the README and refer to examples instead of the main binary 2019-09-19 12:00:34 +02:00
a5a19fc9dd Merge pull request #193 from meilisearch/get-documents-id
Add a method to get an iterator over all documents ids
2019-09-18 16:09:30 +02:00
a36c991897 feat: add a method to get an iterator over all documents ids 2019-09-18 15:41:06 +02:00
4f71219e17 Merge pull request #192 from meilisearch/bump-dependencies
Bump dependencies
2019-09-18 15:10:15 +02:00
69e0bae75e chore: Bump dependencies 2019-09-18 14:42:23 +02:00
1b18679950 Merge pull request #191 from meilisearch/typed-settings
Typed settings
2019-09-18 14:04:07 +02:00
e1c119b5a8 chore: add test for custom settings 2019-09-18 12:22:26 +02:00
03709910fd feat: add typed index custom settings for common uses 2019-09-18 12:22:21 +02:00
8fdb330195 Merge pull request #190 from meilisearch/bump-dependencies-versions
Bump dependency
2019-09-18 10:29:22 +02:00
59ae6458dc chore: bump dependencies 2019-09-17 18:50:44 +02:00
c10b701b9a Merge pull request #189 from meilisearch/documents-fields-repartition
Add the documents fields repartition into stats
2019-09-17 16:23:49 +02:00
80caa8b60d feat: add the documents fields repartition into stats 2019-09-17 15:56:13 +02:00
97cf5cca2a Merge pull request #188 from meilisearch/delete-index
Delete an index
2019-09-17 14:25:38 +02:00
3e76dc718b feat: delete an index and all it's associated data 2019-09-17 13:29:56 +02:00
5a17b5a63b Merge pull request #187 from meilisearch/export-snapshots
Re-export rocksdb snapshot function
2019-09-17 12:54:14 +02:00
5bc5185ac5 feat: re-export rocksdb snapshot function 2019-09-17 11:37:17 +02:00
3712fa7c24 Merge pull request #186 from meilisearch/common-db-tree
feat: expose a common DB tree for the database
2019-09-16 19:08:52 +02:00
918cc235a4 feat: expose a common DB tree for the database 2019-09-16 16:05:05 +02:00
8d24e54fa1 Merge pull request #185 from meilisearch/serde-schema
Implement De/Serialize on schema
2019-09-16 15:18:02 +02:00
35b7b58ff7 feat: Remove the Schema to/from_toml/json/bin methods 2019-09-16 14:50:38 +02:00
ffc29a319f feat: Implement De/Serialize on schema 2019-09-16 14:50:37 +02:00
ba3ac5ea7b chore: Create an internal Schema::to_builder method 2019-09-16 14:50:37 +02:00
ee6a54fe4c feat: Replace the linked-hash-map dependency by indexmap 2019-09-16 14:50:37 +02:00
f6ff79085e Merge pull request #184 from meilisearch/unify-update-types
Unify the Update and UpdateOwned types
2019-09-16 14:00:12 +02:00
bcd38c7d5a feat: Unify the Update and UpdateOwned types 2019-09-16 12:33:08 +02:00
121 changed files with 33123 additions and 4165 deletions

8
.gitignore vendored
View File

@ -1,8 +1,6 @@
/target
/Cargo.lock
meilidb/Cargo.lock
meilidb-core/Cargo.lock
**/*.rs.bk
**/*.csv
**/*.json_lines
**/*.rdb
**/*.rs.bk
/*.mdb
/query-history.txt

2519
Cargo.lock generated Normal file

File diff suppressed because it is too large Load Diff

View File

@ -1,10 +1,10 @@
[workspace]
members = [
"meilidb",
"meilidb-core",
"meilidb-data",
"meilidb-http",
"meilidb-schema",
"meilidb-tokenizer",
"meilidb-types",
]
[profile.release]

View File

@ -1,6 +1,6 @@
MIT License
Copyright (c) 2018 Clément Renault
Copyright (c) [year] [fullname]
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal

159
README.md
View File

@ -1,43 +1,119 @@
# MeiliDB
[![Build Status](https://dev.azure.com/thomas0884/thomas/_apis/build/status/meilisearch.MeiliDB?branchName=master)](https://dev.azure.com/thomas0884/thomas/_build/latest?definitionId=1&branchName=master)
[![dependency status](https://deps.rs/repo/github/Kerollmops/MeiliDB/status.svg)](https://deps.rs/repo/github/Kerollmops/MeiliDB)
[![License](https://img.shields.io/github/license/Kerollmops/MeiliDB.svg)](https://github.com/Kerollmops/MeiliDB)
[![Rust 1.31+](https://img.shields.io/badge/rust-1.31+-lightgray.svg)](
https://www.rust-lang.org)
[![dependency status](https://deps.rs/repo/github/meilisearch/MeiliDB/status.svg)](https://deps.rs/repo/github/meilisearch/MeiliDB)
[![License](https://img.shields.io/badge/license-commons%20clause-lightgrey)](https://commonsclause.com/)
A _full-text search database_ using a key-value store internally.
Ultra relevant and instant full-text search API.
MeiliSearch is a powerful, fast, open-source, easy to use and deploy search engine. The search and indexation are fully customizable and handles features like typo-tolerance, filters, and ranking.
## Features
- Provides [6 default ranking criteria](https://github.com/meilisearch/MeiliDB/blob/3d85cbf0cfa3a3103cf1e151a75a443719cdd5d7/meilidb-core/src/criterion/mod.rs#L95-L101) used to [bucket sort](https://en.wikipedia.org/wiki/Bucket_sort) documents
- Accepts [custom criteria](https://github.com/meilisearch/MeiliDB/blob/3d85cbf0cfa3a3103cf1e151a75a443719cdd5d7/meilidb-core/src/criterion/mod.rs#L22-L29) and can apply them in any custom order
- Support [ranged queries](https://github.com/meilisearch/MeiliDB/blob/3d85cbf0cfa3a3103cf1e151a75a443719cdd5d7/meilidb-core/src/query_builder.rs#L146), useful for paginating results
- Can [distinct](https://github.com/meilisearch/MeiliDB/blob/3d85cbf0cfa3a3103cf1e151a75a443719cdd5d7/meilidb-core/src/query_builder.rs#L68) and [filter](https://github.com/meilisearch/MeiliDB/blob/3d85cbf0cfa3a3103cf1e151a75a443719cdd5d7/meilidb-core/src/query_builder.rs#L57) returned documents based on context defined rules
- Can store complete documents or only [user schema specified fields](https://github.com/meilisearch/MeiliDB/blob/3d85cbf0cfa3a3103cf1e151a75a443719cdd5d7/examples/movies/schema-movies.toml)
- The [default tokenizer](https://github.com/meilisearch/MeiliDB/blob/3d85cbf0cfa3a3103cf1e151a75a443719cdd5d7/meilidb-tokenizer/src/lib.rs#L99) can index latin and kanji based languages
- Returns [the matching text areas](https://github.com/meilisearch/MeiliDB/blob/3d85cbf0cfa3a3103cf1e151a75a443719cdd5d7/meilidb-core/src/lib.rs#L117-L120), useful to highlight matched words in results
- Accepts query time search config like the [searchable fields](https://github.com/meilisearch/MeiliDB/blob/3d85cbf0cfa3a3103cf1e151a75a443719cdd5d7/meilidb-core/src/query_builder.rs#L79)
- Supports run time indexing (incremental indexing)
- Provides [6 default ranking criteria](https://github.com/meilisearch/MeiliDB/blob/dc5c42821e1340e96cb90a3da472264624a26326/meilidb-core/src/criterion/mod.rs#L107-L113) used to [bucket sort](https://en.wikipedia.org/wiki/Bucket_sort) documents
- Accepts [custom criteria](https://github.com/meilisearch/MeiliDB/blob/dc5c42821e1340e96cb90a3da472264624a26326/meilidb-core/src/criterion/mod.rs#L24-L33) and can apply them in any custom order
- Support [ranged queries](https://github.com/meilisearch/MeiliDB/blob/dc5c42821e1340e96cb90a3da472264624a26326/meilidb-core/src/query_builder.rs#L283), useful for paginating results
- Can [distinct](https://github.com/meilisearch/MeiliDB/blob/dc5c42821e1340e96cb90a3da472264624a26326/meilidb-core/src/query_builder.rs#L265-L270) and [filter](https://github.com/meilisearch/MeiliDB/blob/dc5c42821e1340e96cb90a3da472264624a26326/meilidb-core/src/query_builder.rs#L246-L259) returned documents based on context defined rules
- Searches for [concatenated](https://github.com/meilisearch/MeiliDB/pull/164) and [splitted query words](https://github.com/meilisearch/MeiliDB/pull/232) to improve the search quality.
- Can store complete documents or only [user schema specified fields](https://github.com/meilisearch/MeiliDB/blob/dc5c42821e1340e96cb90a3da472264624a26326/meilidb-schema/src/lib.rs#L265-L279)
- The [default tokenizer](https://github.com/meilisearch/MeiliDB/blob/dc5c42821e1340e96cb90a3da472264624a26326/meilidb-tokenizer/src/lib.rs) can index latin and kanji based languages
- Returns [the matching text areas](https://github.com/meilisearch/MeiliDB/blob/dc5c42821e1340e96cb90a3da472264624a26326/meilidb-core/src/lib.rs#L66-L88), useful to highlight matched words in results
- Accepts query time search config like the [searchable attributes](https://github.com/meilisearch/MeiliDB/blob/dc5c42821e1340e96cb90a3da472264624a26326/meilidb-core/src/query_builder.rs#L272-L275)
- Supports [runtime incremental indexing](https://github.com/meilisearch/MeiliDB/blob/dc5c42821e1340e96cb90a3da472264624a26326/meilidb-core/src/store/mod.rs#L143-L173)
It uses [RocksDB](https://github.com/facebook/rocksdb) as the internal key-value store. The key-value store allows us to handle updates and queries with small memory and CPU overheads. The whole ranking system is [data oriented](https://github.com/meilisearch/MeiliDB/issues/82) and provides great performances.
It uses [LMDB](https://en.wikipedia.org/wiki/Lightning_Memory-Mapped_Database) as the internal key-value store. The key-value store allows us to handle updates and queries with small memory and CPU overheads. The whole ranking system is [data oriented](https://github.com/meilisearch/MeiliDB/issues/82) and provides great performances.
You can [read the deep dive](deep-dive.md) if you want more information on the engine, it describes the whole process of generating updates and handling queries or you can take a look at the [typos and ranking rules](typos-ranking-rules.md) if you want to know the default rules used to sort the documents.
We will be proud if you submit issues and pull requests. You can help to grow this project and start contributing by checking [issues tagged "good-first-issue"](https://github.com/meilisearch/MeiliDB/issues?q=is%3Aissue+is%3Aopen+label%3A%22good+first+issue%22). It is a good start!
The project is only a library yet. It means that there is no binary provided yet. To get started, you can check the examples wich are made to work with the data located in the `misc/` folder.
[![crates.io demo gif](misc/crates-io-demo.gif)](https://crates.meilisearch.com)
MeiliDB will be a binary in a near future so you will be able to use it as a database out-of-the-box. We should be able to query it using a [to-be-defined](https://github.com/meilisearch/MeiliDB/issues/38) protocol. This is our current goal, [see the milestones](https://github.com/meilisearch/MeiliDB/milestones). In the end, the binary will be a bunch of network protocols and wrappers around the library - which will also be published on [crates.io](https://crates.io). Both the binary and the library will follow the same update cycle.
> Meili helps the Rust community find crates on [crates.meilisearch.com](https://crates.meilisearch.com)
## Quick Start
You can deploy your own instant, relevant and typo-tolerant MeiliDB search engine by yourself too.
Something similar to the demo above can be achieve by following these little three steps first.
You will need to create your own web front display to make it pretty though.
### Deploy the Server
If you have not installed Rust and its package manager `cargo` yet, go to [the installation page](https://www.rust-lang.org/tools/install).<br/>
You can deploy the server on your own machine, it will listen to HTTP requests on the 8080 port by default.
```bash
rustup override set nightly
cargo run --release
```
For more logs during the execution, run:
```bash
RUST_LOG=info cargo run --release
```
### Create an Index and Upload Some Documents
MeiliDB can serve multiple indexes, with different kinds of documents,
therefore, it is required to create the index before sending documents to it.
```bash
curl -i -X POST 'http://127.0.0.1:8080/indexes/movies'
```
Now that the server knows about our brand new index, we can send it data.
We provided you a little dataset, it is available in the `datasets/` directory.
```bash
curl -i -X POST 'http://127.0.0.1:8080/indexes/movies/documents' \
--header 'content-type: application/json' \
--data @datasets/movies/movies.json
```
### Search for Documents
The search engine is now aware of our documents and can serve those via our HTTP server again.
The [`jq` command line tool](https://stedolan.github.io/jq/) can greatly help you read the server responses.
```bash
curl 'http://127.0.0.1:8080/indexes/movies/search?q=botman'
```
```json
{
"hits": [
{
"id": "29751",
"title": "Batman Unmasked: The Psychology of the Dark Knight",
"poster": "https://image.tmdb.org/t/p/w1280/jjHu128XLARc2k4cJrblAvZe0HE.jpg",
"overview": "Delve into the world of Batman and the vigilante justice tha",
"release_date": "2008-07-15"
},
{
"id": "471474",
"title": "Batman: Gotham by Gaslight",
"poster": "https://image.tmdb.org/t/p/w1280/7souLi5zqQCnpZVghaXv0Wowi0y.jpg",
"overview": "ve Victorian Age Gotham City, Batman begins his war on crime",
"release_date": "2018-01-12"
}
],
"offset": 0,
"limit": 2,
"processingTimeMs": 1,
"query": "botman"
}
```
## Performances
With a database composed of _100 353_ documents with _352_ attributes each and _3_ of them indexed.
So more than _300 000_ fields indexed for _35 million_ stored we can handle more than _2.8k req/sec_ with an average response time of _9 ms_ on an Intel i7-7700 (8) @ 4.2GHz.
With a dataset composed of _100 353_ documents with _352_ attributes each and _3_ of them indexed.
So more than _300 000_ fields indexed for _35 million_ stored we can handle more than _2.8k req/sec_ with an average response time of _9 ms_ on an Intel i7-7700 (8) @ 4.2GHz.
Requests are made using [wrk](https://github.com/wg/wrk) and scripted to simulate real users queries.
@ -52,42 +128,33 @@ Requests/sec: 2806.46
Transfer/sec: 759.17KB
```
We also indexed a dataset containing something like _12 millions_ cities names in _24 minutes_ on a machine with _8 cores_, _64 GB of RAM_ and a _300 GB NMVe_ SSD.<br/>
The resulting database was _16 GB_ and search results were between _30 ms_ and _4 seconds_ for short prefix queries.
### Notes
The default Rust allocator has recently been [changed to use the system allocator](https://github.com/rust-lang/rust/pull/51241/).
With Rust 1.32 the allocator has been [changed to use the system allocator](https://blog.rust-lang.org/2019/01/17/Rust-1.32.0.html#jemalloc-is-removed-by-default).
We have seen much better performances when [using jemalloc as the global allocator](https://github.com/alexcrichton/jemallocator#documentation).
## Usage and examples
## Usage and Examples
You can try a little part of MeiliDB with the following commands.
It creates an index named _movies_ and insert two great Tarantino movies in it.
MeiliDB also provides an example binary that is mostly used for features testing.
Notice that the example binary is faster to index data as it does read direct CSV files and not JSON HTTP payloads.
The _index_ subcommand has been made to create an index and inject documents into it. Using the command line below, the index will be named _movies_ and the _19 700_ movies of the `datasets/` will be injected in MeiliDB.
```bash
cargo run --release
curl -XPOST 'http://127.0.0.1:8000/movies' \
-d '
identifier = "id"
[attributes.id]
stored = true
[attributes.title]
stored = true
indexed = true
'
curl -H 'Content-Type: application/json' \
-XPUT 'http://127.0.0.1:8000/movies' \
-d '{ "id": 123, "title": "Inglorious Bastards" }'
curl -H 'Content-Type: application/json' \
-XPUT 'http://127.0.0.1:8000/movies' \
-d '{ "id": 456, "title": "Django Unchained" }'
cargo run --release --example from_file -- \
index example.mdb datasets/movies/movies.csv \
--schema datasets/movies/schema.toml
```
Once the database is initialized you can query it by using the following command:
Once the first command is done, you can query the freshly created _movies_ index using the _search_ subcomand. In this example we filtered the dataset to only show _non-adult_ movies using the non-definitive `!adult` syntax filter.
```bash
curl -XGET 'http://127.0.0.1:8000/movies/search?q=inglo'
cargo run --release --example from_file -- \
search example.mdb \
--number-results 4 \
--filter '!adult' \
id popularity adult original_title
```

View File

@ -13,13 +13,17 @@ jobs:
steps:
- script: |
curl https://sh.rustup.rs -sSf | sh -s -- -y --default-toolchain nightly
displayName: 'Install rustc'
$HOME/.cargo/bin/rustup component add rustfmt
displayName: 'Install rustc and components'
- script: |
$HOME/.cargo/bin/cargo check
displayName: 'Check MeiliDB'
- script: |
$HOME/.cargo/bin/cargo test
displayName: 'Test MeiliDB'
- script: |
$HOME/.cargo/bin/cargo fmt --all -- --check
displayName: 'Fmt MeiliDB'
- job: build
dependsOn:
@ -31,17 +35,18 @@ jobs:
steps:
- script: |
curl https://sh.rustup.rs -sSf | sh -s -- -y --default-toolchain nightly
displayName: 'Install rustc'
$HOME/.cargo/bin/rustup component add rustfmt
displayName: 'Install rustc and components'
- script: |
$HOME/.cargo/bin/cargo build --release
displayName: 'Build MeiliDB'
- task: CopyFiles@2
inputs:
contents: '$(System.DefaultWorkingDirectory)/target/release/libmeilidb.rlib'
contents: '$(System.DefaultWorkingDirectory)/target/release/meilidb-http'
targetFolder: $(Build.ArtifactStagingDirectory)
displayName: 'Copy build'
- task: PublishBuildArtifacts@1
inputs:
artifactName: libmeilidb.rlib
artifactName: meilidb
displayName: 'Upload artifacts'

View File

@ -1,15 +0,0 @@
#!/bin/bash
cd "$(dirname "$0")"/..
set -ex
export RUSTFLAGS="-D warnings"
cargo check --no-default-features
cargo check --bins --examples --tests
cargo test
if [[ "$TRAVIS_RUST_VERSION" == "nightly" ]]; then
cargo check --no-default-features --features nightly
cargo test --features nightly
fi

View File

Can't render this file because it is too large.

19654
datasets/movies/movies.json Normal file

File diff suppressed because it is too large Load Diff

View File

@ -1,122 +0,0 @@
id,title,description,image
711158459,Sony PlayStation 4 (PS4) (Latest Model)- 500 GB Jet Black Console,"The PlayStation 4 system opens the door to an incredible journey through immersive new gaming worlds and a deeply connected gaming community. Step into living, breathing worlds where you are hero of your epic journey. Explore gritty urban environments, vast galactic landscapes, and fantastic historical settings brought to life on an epic scale, without limits. With an astounding launch lineup and over 180 games in development the PS4 system offers more top-tier blockbusters and inventive indie hits than any other next-gen console. The PS4 system is developer inspired, gamer focused. The PS4 system learns how you play and intuitively curates the content you use most often. Fire it up, and your PS4 system points the way to new, amazing experiences you can jump into alone or with friends. Create your own legend using a sophisticated, intuitive network built for gamers. Broadcast your gameplay live and direct to the world, complete with your commentary. Or immortalize your most epic moments and share at the press of a button. Access the best in music, movies, sports and television. PS4 system doesn t require a membership fee to access your digital entertainment subscriptions. You get the full spectrum of entertainment that matters to you on the PS4 system. PlayStation 4: The Best Place to Play The PlayStation 4 system provides dynamic, connected gaming, powerful graphics and speed, intelligent personalization, deeply integrated social capabilities, and innovative second-screen features. Combining unparalleled content, immersive gaming experiences, all of your favorite digital entertainment apps, and PlayStation exclusives, the PS4 system focuses on the gamers.Gamer Focused, Developer InspiredThe PS4 system focuses on the gamer, ensuring that the very best games and the most immersive experiences are possible on the platform.<br>Read more about the PS4 on ebay guides.</br>",http://thumbs2.ebaystatic.com/d/l225/m/mzvzEUIknaQclZ801YCY1ew.jpg
711158460,Sony PlayStation 4 (Latest Model)- 500 GB Jet Black Console,"The PlayStation 4 system opens the door to an incredible journey through immersive new gaming worlds and a deeply connected gaming community. Step into living, breathing worlds where you are hero of your epic journey. Explore gritty urban environments, vast galactic landscapes, and fantastic historical settings brought to life on an epic scale, without limits. With an astounding launch lineup and over 180 games in development the PS4 system offers more top-tier blockbusters and inventive indie hits than any other next-gen console. The PS4 system is developer inspired, gamer focused. The PS4 system learns how you play and intuitively curates the content you use most often. Fire it up, and your PS4 system points the way to new, amazing experiences you can jump into alone or with friends. Create your own legend using a sophisticated, intuitive network built for gamers. Broadcast your gameplay live and direct to the world, complete with your commentary. Or immortalize your most epic moments and share at the press of a button. Access the best in music, movies, sports and television. PS4 system doesn t require a membership fee to access your digital entertainment subscriptions. You get the full spectrum of entertainment that matters to you on the PS4 system. PlayStation 4: The Best Place to Play The PlayStation 4 system provides dynamic, connected gaming, powerful graphics and speed, intelligent personalization, deeply integrated social capabilities, and innovative second-screen features. Combining unparalleled content, immersive gaming experiences, all of your favorite digital entertainment apps, and PlayStation exclusives, the PS4 system focuses on the gamers.Gamer Focused, Developer InspiredThe PS4 system focuses on the gamer, ensuring that the very best games and the most immersive experiences are possible on the platform.<br>Read more about the PS4 on ebay guides.</br>",http://thumbs3.ebaystatic.com/d/l225/m/mJNDmSyIS3vUasKIJEBy4Cw.jpg
711158461,Sony PlayStation 4 PS4 500 GB Jet Black Console,"The PlayStation 4 system opens the door to an incredible journey through immersive new gaming worlds and a deeply connected gaming community. Step into living, breathing worlds where you are hero of your epic journey. Explore gritty urban environments, vast galactic landscapes, and fantastic historical settings brought to life on an epic scale, without limits. With an astounding launch lineup and over 180 games in development the PS4 system offers more top-tier blockbusters and inventive indie hits than any other next-gen console. The PS4 system is developer inspired, gamer focused. The PS4 system learns how you play and intuitively curates the content you use most often. Fire it up, and your PS4 system points the way to new, amazing experiences you can jump into alone or with friends. Create your own legend using a sophisticated, intuitive network built for gamers. Broadcast your gameplay live and direct to the world, complete with your commentary. Or immortalize your most epic moments and share at the press of a button. Access the best in music, movies, sports and television. PS4 system doesn t require a membership fee to access your digital entertainment subscriptions. You get the full spectrum of entertainment that matters to you on the PS4 system. PlayStation 4: The Best Place to Play The PlayStation 4 system provides dynamic, connected gaming, powerful graphics and speed, intelligent personalization, deeply integrated social capabilities, and innovative second-screen features. Combining unparalleled content, immersive gaming experiences, all of your favorite digital entertainment apps, and PlayStation exclusives, the PS4 system focuses on the gamers.Gamer Focused, Developer InspiredThe PS4 system focuses on the gamer, ensuring that the very best games and the most immersive experiences are possible on the platform.<br>Read more about the PS4 on ebay guides.</br>",http://thumbs4.ebaystatic.com/d/l225/m/m10NZXArmiIkpkTDDkAUVvA.jpg
711158462,Sony - PlayStation 4 500GB The Last of Us Remastered Bundle - Black,,http://thumbs2.ebaystatic.com/d/l225/m/mZZXTmAE8WZDH1l_E_PPAkg.jpg
711158463,Sony PlayStation 4 (PS4) (Latest Model)- 500 GB Jet Black Console,"The PlayStation 4 system opens the door to an incredible journey through immersive new gaming worlds and a deeply connected gaming community. Step into living, breathing worlds where you are hero of your epic journey. Explore gritty urban environments, vast galactic landscapes, and fantastic historical settings brought to life on an epic scale, without limits. With an astounding launch lineup and over 180 games in development the PS4 system offers more top-tier blockbusters and inventive indie hits than any other next-gen console. The PS4 system is developer inspired, gamer focused. The PS4 system learns how you play and intuitively curates the content you use most often. Fire it up, and your PS4 system points the way to new, amazing experiences you can jump into alone or with friends. Create your own legend using a sophisticated, intuitive network built for gamers. Broadcast your gameplay live and direct to the world, complete with your commentary. Or immortalize your most epic moments and share at the press of a button. Access the best in music, movies, sports and television. PS4 system doesn t require a membership fee to access your digital entertainment subscriptions. You get the full spectrum of entertainment that matters to you on the PS4 system. PlayStation 4: The Best Place to Play The PlayStation 4 system provides dynamic, connected gaming, powerful graphics and speed, intelligent personalization, deeply integrated social capabilities, and innovative second-screen features. Combining unparalleled content, immersive gaming experiences, all of your favorite digital entertainment apps, and PlayStation exclusives, the PS4 system focuses on the gamers.Gamer Focused, Developer InspiredThe PS4 system focuses on the gamer, ensuring that the very best games and the most immersive experiences are possible on the platform.<br>Read more about the PS4 on ebay guides.</br>",http://thumbs3.ebaystatic.com/d/l225/m/mzvzEUIknaQclZ801YCY1ew.jpg
711158464,Sony PlayStation 4 (PS4) (Latest Model)- 500 GB Jet Black Console,"The PlayStation 4 system opens the door to an incredible journey through immersive new gaming worlds and a deeply connected gaming community. Step into living, breathing worlds where you are hero of your epic journey. Explore gritty urban environments, vast galactic landscapes, and fantastic historical settings brought to life on an epic scale, without limits. With an astounding launch lineup and over 180 games in development the PS4 system offers more top-tier blockbusters and inventive indie hits than any other next-gen console. The PS4 system is developer inspired, gamer focused. The PS4 system learns how you play and intuitively curates the content you use most often. Fire it up, and your PS4 system points the way to new, amazing experiences you can jump into alone or with friends. Create your own legend using a sophisticated, intuitive network built for gamers. Broadcast your gameplay live and direct to the world, complete with your commentary. Or immortalize your most epic moments and share at the press of a button. Access the best in music, movies, sports and television. PS4 system doesn t require a membership fee to access your digital entertainment subscriptions. You get the full spectrum of entertainment that matters to you on the PS4 system. PlayStation 4: The Best Place to Play The PlayStation 4 system provides dynamic, connected gaming, powerful graphics and speed, intelligent personalization, deeply integrated social capabilities, and innovative second-screen features. Combining unparalleled content, immersive gaming experiences, all of your favorite digital entertainment apps, and PlayStation exclusives, the PS4 system focuses on the gamers.Gamer Focused, Developer InspiredThe PS4 system focuses on the gamer, ensuring that the very best games and the most immersive experiences are possible on the platform.<br>Read more about the PS4 on ebay guides.</br>",http://thumbs4.ebaystatic.com/d/l225/m/mzvzEUIknaQclZ801YCY1ew.jpg
711158465,BRAND NEW Sony PlayStation 4 BUNDLE 500gb,,http://thumbs4.ebaystatic.com/d/l225/m/m9TQTiWcWig7SeQh9algLZg.jpg
711158466,"Sony PlayStation 4 500GB, Dualshock Wireless Control, HDMI Gaming Console Refurb","The PlayStation 4 system opens the door to an incredible journey through immersive new gaming worlds and a deeply connected gaming community. Step into living, breathing worlds where you are hero of your epic journey. Explore gritty urban environments, vast galactic landscapes, and fantastic historical settings brought to life on an epic scale, without limits. With an astounding launch lineup and over 180 games in development the PS4 system offers more top-tier blockbusters and inventive indie hits than any other next-gen console. The PS4 system is developer inspired, gamer focused. The PS4 system learns how you play and intuitively curates the content you use most often. Fire it up, and your PS4 system points the way to new, amazing experiences you can jump into alone or with friends. Create your own legend using a sophisticated, intuitive network built for gamers. Broadcast your gameplay live and direct to the world, complete with your commentary. Or immortalize your most epic moments and share at the press of a button. Access the best in music, movies, sports and television. PS4 system doesn t require a membership fee to access your digital entertainment subscriptions. You get the full spectrum of entertainment that matters to you on the PS4 system. PlayStation 4: The Best Place to Play The PlayStation 4 system provides dynamic, connected gaming, powerful graphics and speed, intelligent personalization, deeply integrated social capabilities, and innovative second-screen features. Combining unparalleled content, immersive gaming experiences, all of your favorite digital entertainment apps, and PlayStation exclusives, the PS4 system focuses on the gamers.Gamer Focused, Developer InspiredThe PS4 system focuses on the gamer, ensuring that the very best games and the most immersive experiences are possible on the platform.<br>Read more about the PS4 on ebay guides.</br>",http://thumbs4.ebaystatic.com/d/l225/m/mTZYG5N6xWfBi4Ok03HmpMw.jpg
711158467,Sony PlayStation 4 (Latest Model)- 500 GB Jet Black Console w/ 2 Controllers,,http://thumbs2.ebaystatic.com/d/l225/m/mX5Qphrygqeoi7tAH5eku2A.jpg
711158468,Sony PlayStation 4 (Latest Model)- 500 GB Jet Black Console *NEW*,"The PlayStation 4 system opens the door to an incredible journey through immersive new gaming worlds and a deeply connected gaming community. Step into living, breathing worlds where you are hero of your epic journey. Explore gritty urban environments, vast galactic landscapes, and fantastic historical settings brought to life on an epic scale, without limits. With an astounding launch lineup and over 180 games in development the PS4 system offers more top-tier blockbusters and inventive indie hits than any other next-gen console. The PS4 system is developer inspired, gamer focused. The PS4 system learns how you play and intuitively curates the content you use most often. Fire it up, and your PS4 system points the way to new, amazing experiences you can jump into alone or with friends. Create your own legend using a sophisticated, intuitive network built for gamers. Broadcast your gameplay live and direct to the world, complete with your commentary. Or immortalize your most epic moments and share at the press of a button. Access the best in music, movies, sports and television. PS4 system doesn t require a membership fee to access your digital entertainment subscriptions. You get the full spectrum of entertainment that matters to you on the PS4 system. PlayStation 4: The Best Place to Play The PlayStation 4 system provides dynamic, connected gaming, powerful graphics and speed, intelligent personalization, deeply integrated social capabilities, and innovative second-screen features. Combining unparalleled content, immersive gaming experiences, all of your favorite digital entertainment apps, and PlayStation exclusives, the PS4 system focuses on the gamers.Gamer Focused, Developer InspiredThe PS4 system focuses on the gamer, ensuring that the very best games and the most immersive experiences are possible on the platform.<br>Read more about the PS4 on ebay guides.</br>",http://thumbs2.ebaystatic.com/d/l225/m/mGjN4IrJ0O8kKD_TYMWgGgQ.jpg
711158469,Sony PlayStation 4 (Latest Model)- 500 GB Jet Black Console..wth Mortal Kombat X,,http://thumbs2.ebaystatic.com/d/l225/m/mrpqSNXwlnUVKnEscE4348w.jpg
711158470,Genuine SONY PS4 Playstation 4 500GB Gaming Console - Black,,http://thumbs4.ebaystatic.com/d/l225/m/myrPBFCpb4H5rHI8NyiS2zA.jpg
711158471,[Sony] Playstation 4 PS4 Video Game Console Black - Latest Model,,http://thumbs4.ebaystatic.com/d/l225/m/mce0c7mCuv3xpjllJXx093w.jpg
711158472,Sony PlayStation 4 (Latest Model) 500 GB Jet Black Console,"The PlayStation 4 system opens the door to an incredible journey through immersive new gaming worlds and a deeply connected gaming community. Step into living, breathing worlds where you are hero of your epic journey. Explore gritty urban environments, vast galactic landscapes, and fantastic historical settings brought to life on an epic scale, without limits. With an astounding launch lineup and over 180 games in development the PS4 system offers more top-tier blockbusters and inventive indie hits than any other next-gen console. The PS4 system is developer inspired, gamer focused. The PS4 system learns how you play and intuitively curates the content you use most often. Fire it up, and your PS4 system points the way to new, amazing experiences you can jump into alone or with friends. Create your own legend using a sophisticated, intuitive network built for gamers. Broadcast your gameplay live and direct to the world, complete with your commentary. Or immortalize your most epic moments and share at the press of a button. Access the best in music, movies, sports and television. PS4 system doesn t require a membership fee to access your digital entertainment subscriptions. You get the full spectrum of entertainment that matters to you on the PS4 system. PlayStation 4: The Best Place to Play The PlayStation 4 system provides dynamic, connected gaming, powerful graphics and speed, intelligent personalization, deeply integrated social capabilities, and innovative second-screen features. Combining unparalleled content, immersive gaming experiences, all of your favorite digital entertainment apps, and PlayStation exclusives, the PS4 system focuses on the gamers.Gamer Focused, Developer InspiredThe PS4 system focuses on the gamer, ensuring that the very best games and the most immersive experiences are possible on the platform.<br>Read more about the PS4 on ebay guides.</br>",http://thumbs2.ebaystatic.com/d/l225/m/miVSA1xPO5fCNdYzEMc8rSQ.jpg
711158473,Sony PlayStation 4 - 500 GB Jet Black Console - WITH LAST OF US REMASTERED,,http://thumbs2.ebaystatic.com/d/l225/m/mLjnOxv2GWkrkCtgsDGhJ6A.jpg
711158474,Sony PlayStation 4 (Latest Model)- 500 GB Jet Black Console,,http://thumbs3.ebaystatic.com/d/l225/m/mjMittBaXmm_n4AMpETBXhQ.jpg
711158475,Sony PlayStation 4 (Latest Model)- 500 GB Jet Black Console,,http://thumbs2.ebaystatic.com/d/l225/m/m1n1qrJ7-VGbe7xQvGdeD6Q.jpg
711158476,"Sony PlayStation 4 - 500 GB Jet Black Console (3 controllers,3 games included)",,http://thumbs3.ebaystatic.com/d/l225/m/mIoGIj9FZG7HoEVkPlnyizA.jpg
711158477,Sony PlayStation 4 500GB Console with 2 Controllers,"The PlayStation 4 system opens the door to an incredible journey through immersive new gaming worlds and a deeply connected gaming community. Step into living, breathing worlds where you are hero of your epic journey. Explore gritty urban environments, vast galactic landscapes, and fantastic historical settings brought to life on an epic scale, without limits. With an astounding launch lineup and over 180 games in development the PS4 system offers more top-tier blockbusters and inventive indie hits than any other next-gen console. The PS4 system is developer inspired, gamer focused. The PS4 system learns how you play and intuitively curates the content you use most often. Fire it up, and your PS4 system points the way to new, amazing experiences you can jump into alone or with friends. Create your own legend using a sophisticated, intuitive network built for gamers. Broadcast your gameplay live and direct to the world, complete with your commentary. Or immortalize your most epic moments and share at the press of a button. Access the best in music, movies, sports and television. PS4 system doesn t require a membership fee to access your digital entertainment subscriptions. You get the full spectrum of entertainment that matters to you on the PS4 system. PlayStation 4: The Best Place to Play The PlayStation 4 system provides dynamic, connected gaming, powerful graphics and speed, intelligent personalization, deeply integrated social capabilities, and innovative second-screen features. Combining unparalleled content, immersive gaming experiences, all of your favorite digital entertainment apps, and PlayStation exclusives, the PS4 system focuses on the gamers.Gamer Focused, Developer InspiredThe PS4 system focuses on the gamer, ensuring that the very best games and the most immersive experiences are possible on the platform.<br>Read more about the PS4 on ebay guides.</br>",http://thumbs2.ebaystatic.com/d/l225/m/m4fuJ5Ibrj450-TZ83FAkIQ.jpg
711158478,Sony - PlayStation 4 500GB The Last of Us Remastered Bundle - Black,,http://thumbs3.ebaystatic.com/d/l225/m/mzXSIw8Hlnff8IjXJQrXJSw.jpg
711158479,Sony PlayStation 4 (Latest Model)- 500 GB Jet Black Console,,http://thumbs2.ebaystatic.com/d/l225/m/m-9S63CgFoUijY3ZTyNs3KA.jpg
711158480,Sony PlayStation 4 (Latest Model)- 500 GB Jet Black Console,,http://thumbs1.ebaystatic.com/d/l225/m/mdF9Bisg9wXjv_R9Y_13MWw.jpg
711158481,Sony PlayStation 4 (Latest Model)- 500 GB Jet Black Console*,,http://thumbs1.ebaystatic.com/d/l225/m/m4_OQHMmIOCa8uEkBepRR5A.jpg
711158482,Sony PlayStation 4 (Latest Model)- 500 GB Jet Black Console,,http://thumbs2.ebaystatic.com/d/l225/m/mZ0nR8iz-QAfLssJZMp3L5Q.jpg
711158483,[Sony] Playstation 4 PS4 1105A Video Game Console 500GB White - Latest Model,,http://thumbs4.ebaystatic.com/d/l225/m/m8iTz5cLQLNjD9D3O2jT3IQ.jpg
711158484,NEW! Clinique Repairwear Laser Focus Wrinkle Correcting Eye Cream 5ml,,http://thumbs2.ebaystatic.com/d/l225/m/mrraWCpvP5YKk5rYgotVDLg.jpg
711158485,Obagi Elastiderm Eye Treatment Cream 0.5 oz / 15g Authentic NiB Sealed [5],,http://thumbs1.ebaystatic.com/d/l225/m/mJ4ekz6_bDT5G7wYtjM-qRg.jpg
711158486,Lancome Renergie Eye Anti-Wrinkle & Firming Eye Cream 0.5oz New,,http://thumbs2.ebaystatic.com/d/l225/m/mxwwyDQraZ-TEtr_Y6qRi7Q.jpg
711158487,OZ Naturals - The BEST Eye Gel - Eye Cream For Dark Circles Puffiness and,,http://thumbs2.ebaystatic.com/d/l225/m/mk2Z-hX5sT4kUxfG6g_KFpg.jpg
711158488,Elastiderm Eye Cream (0.5oz/15g),,http://thumbs3.ebaystatic.com/d/l225/m/mHxb5WUc5MtGzCT2UXgY_hg.jpg
711158489,new CLINIQUE Repairwear Laser Focus Wrinkle Correcting Eye Cream 0.17 oz/ 5 ml,,http://thumbs1.ebaystatic.com/d/l225/m/mQSX2wfrSeGy3uA8Q4SbOKw.jpg
711158490,NIB Full Size Dermalogica Multivitamin Power Firm Eye Cream,,http://thumbs4.ebaystatic.com/d/l225/m/m2hxo12e5NjXgGiKIaCvTLA.jpg
711158491,24K Gold Collagen Anti-Dark Circles Anti-Aging Bio Essence Repairing Eye Cream,,http://thumbs4.ebaystatic.com/d/l225/m/mt96efUK5cPAe60B9aGmgMA.jpg
711158492,Clinique Repairwear Laser Focus Wrinkle Correcting Eye Cream Full Size .5oz 15mL,,http://thumbs3.ebaystatic.com/d/l225/m/mZyV3wKejCMx9RrnC8X-eMw.jpg
711158493,NEW! Clinique Repairwear Laser Focus Wrinkle Correcting Eye Cream 5ml,,http://thumbs4.ebaystatic.com/d/l225/m/m9hX_z_DFnbNCTh0VFv3KcQ.jpg
711158494,3 Clinique Repairwear Laser Focus Wrinkle Correcting Eye Cream .17 oz/5 ml Each,,http://thumbs1.ebaystatic.com/d/l225/m/mYiHsrGffCg_qgkTbUWZU1A.jpg
711158495,Lancome High Resolution Eye Cream .95 Oz Refill-3X .25 Oz Plus .20 Oz Lot,,http://thumbs1.ebaystatic.com/d/l225/m/mFuQxKoEKQ6wtk2bGxfKwow.jpg
711158496,NEW! Clinique Repairwear Laser Focus Wrinkle Correcting Eye Cream 5ml,,http://thumbs4.ebaystatic.com/d/l225/m/mLBRCDiELUnYos-vFmIcc7A.jpg
711158497,Neutrogena Rapid Wrinkle Repair Eye Cream -0.5 Oz. -New-,,http://thumbs4.ebaystatic.com/d/l225/m/mE1RWpCOxkCGuuiJBX6HiBQ.jpg
711158498,20g Snail Repair Eye Cream Natural Anti-Dark Circles Puffiness Aging Wrinkles,,http://thumbs4.ebaystatic.com/d/l225/m/mh4gBNzINDwds_r778sJRjg.jpg
711158499,Vichy-Neovadiol GF Eye & Lip Contour Cream 0.5 Fl. Oz,,http://thumbs4.ebaystatic.com/d/l225/m/m_6f0ofCm7PTzuithYuZx3w.jpg
711158500,Obagi Elastiderm Eye Cream 0.5 oz. New In Box. 100% Authentic! New Packaging!,,http://thumbs2.ebaystatic.com/d/l225/m/ma0PK-ASBXUiHERR19MyImA.jpg
711158501,NEW! Clinique Repairwear Laser Focus Wrinkle Correcting Eye Cream .17oz / 5ml,,http://thumbs3.ebaystatic.com/d/l225/m/m72NaXYlcXcEeqQFKWvsdZA.jpg
711158502,Kiehl's CREAMY EYE TREATMENT cream with AVOCADO 0.5 oz FULL SIZE,,http://thumbs3.ebaystatic.com/d/l225/m/mOI407HnILb_tf-RgdvfYyA.jpg
711158503,Clinique repairwear laser focus wrinkle correcting eye cream .5 oz 15ml,,http://thumbs4.ebaystatic.com/d/l225/m/mQwNVst3bYG6QXouubmLaJg.jpg
711158504,Caudalie Premier Cru The Eye Cream La Creme New Anti Aging Eye Treatment,,http://thumbs1.ebaystatic.com/d/l225/m/mM4hPTAWXeOjovNk9s_Cqag.jpg
711158505,Jeunesse Instantly Ageless -- New Box Of 50 Sachets -- Eye - Face Wrinkle Cream,,http://thumbs2.ebaystatic.com/d/l225/m/m5EfWbi6ZYs4JpYcsl0Ubaw.jpg
711158506,VELOUR SKIN EYE CREAM .5 FL OZ 15ML NEW NIP ANTI-AGING WRINKLE CREAM,,http://thumbs1.ebaystatic.com/d/l225/m/m2uEf6q1yASH8FkWqYdOv1w.jpg
711158507,Shiseido White Lucent Anti-Dark Circles/Puffiness Eye Cream 15ml/.53oz Full Size,,http://thumbs1.ebaystatic.com/d/l225/m/m_CtzoqU2Vgv4GKx8ONS6qw.jpg
711158508,Murad Resurgence Renewing Eye Cream Anti-Aging .25 oz NEW Dark Circles Wrinkle,,http://thumbs1.ebaystatic.com/d/l225/m/mhWJC10iowgUDGm4KMQKNMg.jpg
711158509,D-Link DIR-615 300Mbps Wireless-N Router 4-Port w/Firewall,,http://thumbs3.ebaystatic.com/d/l225/m/mdSBH9ROXRn3TBb8OFDT6jA.jpg
711158510,Triton MOF001 2 1/4hp dual mode precision Router. New!! *3 day auction*,,http://thumbs1.ebaystatic.com/d/l225/m/mozWd2SBskbDBlWAKsMlVew.jpg
711158511,Porter-Cable 3-1/4 HP Five-Speed Router 7518 - Power Tools Routers,,http://thumbs2.ebaystatic.com/d/l225/m/mpZDTXpiyesDrZh_FLMyqXQ.jpg
711158512,Linksys EA6900 AC1900 Wi-Fi Wireless Router Dual Band with Gigabit &USB 3.0 Port,,http://thumbs4.ebaystatic.com/d/l225/m/m3OfBSnHBDhhs_Ve-DSBKQw.jpg
711158513,Linksys EA6500 1300 Mbps 4-Port Gigabit Wireless AC Router,,http://thumbs1.ebaystatic.com/d/l225/m/m7cfymJPc7CLADoTiEYFzwA.jpg
711158514,Makita RT0700CX3 1-1/4 Horsepower Compact Router Kit / Trimmer NEW,,http://thumbs2.ebaystatic.com/d/l225/m/mr-F3rCxDYsLcj8hnmaRN4A.jpg
711158515,NETGEAR R6250 AC1600 Smart WiFi Dual Band Gigabit Router 802.11ac 300 1300 Mbps,,http://thumbs4.ebaystatic.com/d/l225/m/mc8Ic8Cq2lPqPnjNGAQBBCQ.jpg
711158516,NETGEAR Nighthawk AC1900 Dual Band Wi-Fi Gigabit Router (R7000) BRAND NEW SEALED,,http://thumbs3.ebaystatic.com/d/l225/m/mdL34EQi0l-Kg-DlvF6wpqA.jpg
711158517,Netgear WNDR3400 N600 Wireless Dual Band Router (WNDR3400-100),,http://thumbs4.ebaystatic.com/d/l225/m/mKr4cNk6utJXSdVYXzwrScQ.jpg
711158518,Netgear N600 300 Mbps 4-Port 10/100 Wireless N Router (WNDR3400),,http://thumbs2.ebaystatic.com/d/l225/m/mUPdyhbW9pzEm1VbqX0YudA.jpg
711158519,NETGEAR N600 WNDR3400 Wireless Dual Band Router F/S,,http://thumbs1.ebaystatic.com/d/l225/m/my55jF5kHnG9ipzFycnjooA.jpg
711158520,Netgear NIGHTHAWK AC1900 1300 Mbps 4-Port Gigabit Wireless AC Router (R7000),,http://thumbs3.ebaystatic.com/d/l225/m/mrPLRTnWx_JXLNIp5pCBnzQ.jpg
711158521,Netgear N900 450 Mbps 4-Port Gigabit Wireless N Router (WNDR4500),,http://thumbs2.ebaystatic.com/d/l225/m/mXBL01faHlHm7Ukh188t3yQ.jpg
711158522,Netgear R6300V2 AC1750 1300 Mbps 4-Port Gigabit Wireless AC Router,,http://thumbs1.ebaystatic.com/d/l225/m/mTdnFB9Z71efYJ9I5-k186w.jpg
711158523,Makita RT0701C 1-1/4 HP Compact Router With FACTORY WARRANTY!!!,,http://thumbs2.ebaystatic.com/d/l225/m/m7AA4k3MzYFJcTlBrT3DwhA.jpg
711158524,"CISCO LINKSYS EA4500 DUAL-BAND N9000 WIRELESS ROUTER, 802.11N, UP TO 450 MBPs",,http://thumbs4.ebaystatic.com/d/l225/m/mwfVIXD3dZYt_qpHyprd7hg.jpg
711158525,Netgear N300 v.3 300 Mbps 5-Port 10/100 Wireless N Router (WNR2000),,http://thumbs4.ebaystatic.com/d/l225/m/mopRjvnZwbsVH9euqGov5kw.jpg
711158526,Netgear Nighthawk R7000 2330 Mbps 4-Port Gigabit Wireless N Router...,,http://thumbs4.ebaystatic.com/d/l225/m/mns82UY4FfqYXPgqrpJ9Bzw.jpg
711158527,Netgear N900 450 Mbps 4-Port Gigabit Wireless N Router R4500 ~ FreE ShiPPinG ~,,http://thumbs1.ebaystatic.com/d/l225/m/m_o0mSRmySgJUuqHYDIQiuA.jpg
711158528,D-Link Wireless Router Model DIR-625,,http://thumbs2.ebaystatic.com/d/l225/m/mYPXwZMlDUjOQ3Sm3EtU37Q.jpg
711158529,D-Link DIR-657 300 Mbps 4-Port Gigabit Wireless N Router Hd Media Router 1000,"Stream multiple media content - videos, music and more to multiple devices all at the same time without lag or skipping. The HD Fuel technology in the DIR-657 lets you watch Netflix and Vudu , play your Wii or Xbox 360 online or make Skype calls all without worrying about the skipping or latency you might experience with standard routers. It does so by automatically giving extra bandwidth for video, gaming and VoIP calls using HD Fuel QoS technology. The D-Link HD Media Router 1000(DIR-657) also comes equipped with 4 Gigabit ports to provide speeds up to 10x faster than standard 10/100 ports. What s more, it uses 802.11n technology with multiple intelligent antennas to maximize the speed and range of your wireless signal to significantly outperform 802.11g devices.",http://thumbs1.ebaystatic.com/d/l225/m/m0xyPdWrdVKe7By4QFouVeA.jpg
711158530,D-Link DIR-860L AC1200 4-Port Cloud Router Gigabit Wireless 802.11 AC,,http://thumbs3.ebaystatic.com/d/l225/m/mk4KNj6oLm7863qCS-TqmbQ.jpg
711158531,D-Link DIR-862L Wireless AC1600 Dual Band Gigabit Router,,http://thumbs2.ebaystatic.com/d/l225/m/m6Arw8kaZ4EUbyKjHtJZLkA.jpg
711158532,LINKSYS AC1600 DUAL BAND SMART WI-FI ROUTER EA6400 BRAND NEW,,http://thumbs3.ebaystatic.com/d/l225/m/mdK7igTS7_TDD7ajfVqj-_w.jpg
711158533,Netgear AC1900 1300 Mbps 4-Port Gigabit Wireless AC Router (R7000),,http://thumbs4.ebaystatic.com/d/l225/m/mdL34EQi0l-Kg-DlvF6wpqA.jpg
711158534,Panasonic ES-LA63 Cordless Rechargeable Men's Electric Shaver,,http://thumbs3.ebaystatic.com/d/l225/m/mzKKlCxbADObevcgoNjbXRg.jpg
711158535,Panasonic ARC 5 Best Mens Shaver,,http://thumbs4.ebaystatic.com/d/l225/m/mt34Y-u0okj-SqQm8Ng_rbQ.jpg
711158536,Panasonic Es8092 Wet Dry Electric Razor Shaver Cordless,,http://thumbs3.ebaystatic.com/d/l225/m/mlIxTz1LsVjXiZz2CzDquJw.jpg
711158537,Panasonic ARC4 ES-RF31-s Rechargeable Electric Shaver Wet/dry 4 Nanotech Blade,"Made for folks who need a great shave, the Panasonic electric shaver is convenient and consistent. Featuring an ergonomic design, this Panasonic ES-RF31-S is ideal for keeping a stubble-free face, so you can retain wonderfully smooth skin. With the precision blades included on the Panasonic electric shaver, you can get smooth shaves with every use. As this men's electric shaver features a gentle shaving mechanism, you can help avoid burning sensations on tender skin. Make sure you consistently get multiple perfect shaves without depleting the power with the exceptional shave time typical of this Panasonic ES-RF31-S.",http://thumbs1.ebaystatic.com/d/l225/m/mi4QM99Jq4oma5WLAL0K7Wg.jpg
711158538,"Panasonic ES3831K Single Blade Travel Shaver, Black New","Strong and trustworthy, the Panasonic electric shaver is built for folks who are worried about a wonderful shave every day. This Panasonic ES3833S is just right for taming your beard, with an easy-to-maneuver design, so you can retain wonderfully soft skin. Spend as much time as you need getting a complete shave by making use of the outstanding shave time typical of the Panasonic electric shaver. Moreover, this men's electric shaver includes precision foil blades, so you can get wonderful shaves over a prolonged period. With the gentle shaving mechanism on this Panasonic ES3833S, you can help avoid burning sensations on tender skin.",http://thumbs3.ebaystatic.com/d/l225/m/mfqMoj4xDlBFXp1ZznxCGbQ.jpg
711158539,Panasonic ES8103S Arc3 Electric Shaver Wet/Dry with Nanotech Blades for Men,,http://thumbs1.ebaystatic.com/d/l225/m/myaZLqzt3I7O-3xXxsJ_4fQ.jpg
711158540,Panasonic ES8103S Arc3 Electric Shaver Wet/Dry with Nanotech Blades,,http://thumbs1.ebaystatic.com/d/l225/m/mcrO4BkjBkM78XHm-aClRGg.jpg
711158543,Panasonic ES3831K Single Blade Wet & Dry Travel Shaver - New & Sealed,,http://thumbs4.ebaystatic.com/d/l225/m/mqWDU2mHsFWAuGosMIGcIMg.jpg
711158544,Panasonic ES8103S Arc 3 E W/O POUCH & MANUAL Men's Wet/Dry Rechargeable Shaver,,http://thumbs2.ebaystatic.com/d/l225/m/mZXgTj-fQfcgAlzOGQYkqFw.jpg
711158545,PANASONIC ES3831K Pro-Curve Battery Operated Travel Wet/Dry Shaver,,http://thumbs1.ebaystatic.com/d/l225/m/m8McQMCfgdp50trM_YJ88cw.jpg
711158546,PANASONIC ARC3 ES-LT33-S WET DRY WASHABLE RECHARGEABLE MEN'S ELECTRIC SHAVER NIB,,http://thumbs1.ebaystatic.com/d/l225/m/m9yUif5xyhGfh7Ag-_fcLdA.jpg
711158547,Panasonic ES-LV81-k Arc 5 Wet & Dry Rechargeable Men's Foil Shaver New,,http://thumbs1.ebaystatic.com/d/l225/m/mEfZHzDoKrH4DBfU8e_K93A.jpg
711158548,"NEW Panasonic ES-RF31-S 4 Blade Men's Electric Razor Wet/Dry, Factory Sealed",,http://thumbs2.ebaystatic.com/d/l225/m/mfhMhMoDkrGtqWW_IyqVGuQ.jpg
711158549,Panasonic ES8243A E Arc4 Men's Electric Shaver Wet/Dry,"eBay item number:181670746515
Seller assumes all responsibility for this listing.
Last updated on
&nbsp;Mar 23, 2015 08:55:50 PDT&nbsp;
View all revisions
<strong>Item specifics</strong>
<table>
<tr>
<th>Condition:</th>
<td><strong>Used</strong>
<strong>:</strong>
</td></tr></table>",http://thumbs4.ebaystatic.com/d/l225/m/mcxFUwt3FrGEEPzT7cfQn7w.jpg
711158550,Panasonic ES-3833 Wet/Dry Men Shaver Razor Battery Operate Compact Travel ES3833,,http://thumbs2.ebaystatic.com/d/l225/m/mAqa9pHisKsLSk5nqMg4JJQ.jpg
711158551,Panasonic Pro-Curve ES3831K Shaver - Dry/Wet Technology - Stainless Steel Foil,,http://thumbs3.ebaystatic.com/d/l225/m/mGqD8eGIwseT5nsM53W3uRQ.jpg
711158552,Panasonic Wet and Dry Shaver - ES-RW30s ES-RW30-S,"The Panasonic electric shaver is well-suited to shielding particularly sensitive skin and providing a smooth shave. It's both trustworthy and transportable. Because this Panasonic ES-RW30-S has a gentle shaving mechanism, you can avoid irritation and raw feeling skin in particularly tender areas. The Panasonic electric shaver is ideal for ridding yourself of stubble, with its special design, so you can sustain wonderfully supple skin. The exceptional shave time featured on this men's electric shaver helps you to make sure you consistently receive many complete shaves without depleting the power. Plus, this Panasonic ES-RW30-S features precision blades, so you can enjoy smooth shaves for months on end.",http://thumbs1.ebaystatic.com/d/l225/m/mvPElpjXmgo0NhP-P5F8LlQ.jpg
711158553,Panasonic ES-LF51-A Arc4 Electric Shaver Wet/Dry with Flexible Pivoting Head,,http://thumbs3.ebaystatic.com/d/l225/m/mC_zAQrMQKPLHdENU7N3UjQ.jpg
711158554,Panasonic ES8103S Arc3 Men's Electric Shaver Wet/Dry with Nanotech Blades,,http://thumbs3.ebaystatic.com/d/l225/m/moBByNwPn93-g-oBBceS2kw.jpg
711158555,panasonic ARC3 shaver es8103s,,http://thumbs1.ebaystatic.com/d/l225/m/mJlAp6t6OMIOaYgKnyelIMg.jpg
711158556,Panasonic ES-534 Men's Electric Shaver New ES534 Battery Operated Compact Travel,,http://thumbs3.ebaystatic.com/d/l225/m/mDr2kpZLVSdy1KTPVYK2YUg.jpg
711158557,Panasonic Portable Shaving Machine Cclippers Washable Single Blade Shaver+Brush,,http://thumbs3.ebaystatic.com/d/l225/m/mJdzJPoOALps0Lv4WtW2b0A.jpg
711158559,Baratza Solis Maestro Conical Burr Coffee Bean Grinder Works Great Nice Cond,,http://thumbs4.ebaystatic.com/d/l225/m/mdjbD7YFR6JRq-pkeajhK7w.jpg
711158560,Proctor Silex Fresh Grind Electric Coffee Bean Grinder White,,http://thumbs4.ebaystatic.com/d/l225/m/mtXoRn5Ytmqz0GLHYmBUxpA.jpg
711158561,Cuisinart 8-oz. Supreme Grind Automatic Burr Coffee Grinder,,http://thumbs4.ebaystatic.com/d/l225/m/my_9cXPvwwRVFqo6MXWfpag.jpg
1 id title description image
2 711158459 Sony PlayStation 4 (PS4) (Latest Model)- 500 GB Jet Black Console The PlayStation 4 system opens the door to an incredible journey through immersive new gaming worlds and a deeply connected gaming community. Step into living, breathing worlds where you are hero of your epic journey. Explore gritty urban environments, vast galactic landscapes, and fantastic historical settings brought to life on an epic scale, without limits. With an astounding launch lineup and over 180 games in development the PS4 system offers more top-tier blockbusters and inventive indie hits than any other next-gen console. The PS4 system is developer inspired, gamer focused. The PS4 system learns how you play and intuitively curates the content you use most often. Fire it up, and your PS4 system points the way to new, amazing experiences you can jump into alone or with friends. Create your own legend using a sophisticated, intuitive network built for gamers. Broadcast your gameplay live and direct to the world, complete with your commentary. Or immortalize your most epic moments and share at the press of a button. Access the best in music, movies, sports and television. PS4 system doesn t require a membership fee to access your digital entertainment subscriptions. You get the full spectrum of entertainment that matters to you on the PS4 system. PlayStation 4: The Best Place to Play The PlayStation 4 system provides dynamic, connected gaming, powerful graphics and speed, intelligent personalization, deeply integrated social capabilities, and innovative second-screen features. Combining unparalleled content, immersive gaming experiences, all of your favorite digital entertainment apps, and PlayStation exclusives, the PS4 system focuses on the gamers.Gamer Focused, Developer InspiredThe PS4 system focuses on the gamer, ensuring that the very best games and the most immersive experiences are possible on the platform.<br>Read more about the PS4 on ebay guides.</br> http://thumbs2.ebaystatic.com/d/l225/m/mzvzEUIknaQclZ801YCY1ew.jpg
3 711158460 Sony PlayStation 4 (Latest Model)- 500 GB Jet Black Console The PlayStation 4 system opens the door to an incredible journey through immersive new gaming worlds and a deeply connected gaming community. Step into living, breathing worlds where you are hero of your epic journey. Explore gritty urban environments, vast galactic landscapes, and fantastic historical settings brought to life on an epic scale, without limits. With an astounding launch lineup and over 180 games in development the PS4 system offers more top-tier blockbusters and inventive indie hits than any other next-gen console. The PS4 system is developer inspired, gamer focused. The PS4 system learns how you play and intuitively curates the content you use most often. Fire it up, and your PS4 system points the way to new, amazing experiences you can jump into alone or with friends. Create your own legend using a sophisticated, intuitive network built for gamers. Broadcast your gameplay live and direct to the world, complete with your commentary. Or immortalize your most epic moments and share at the press of a button. Access the best in music, movies, sports and television. PS4 system doesn t require a membership fee to access your digital entertainment subscriptions. You get the full spectrum of entertainment that matters to you on the PS4 system. PlayStation 4: The Best Place to Play The PlayStation 4 system provides dynamic, connected gaming, powerful graphics and speed, intelligent personalization, deeply integrated social capabilities, and innovative second-screen features. Combining unparalleled content, immersive gaming experiences, all of your favorite digital entertainment apps, and PlayStation exclusives, the PS4 system focuses on the gamers.Gamer Focused, Developer InspiredThe PS4 system focuses on the gamer, ensuring that the very best games and the most immersive experiences are possible on the platform.<br>Read more about the PS4 on ebay guides.</br> http://thumbs3.ebaystatic.com/d/l225/m/mJNDmSyIS3vUasKIJEBy4Cw.jpg
4 711158461 Sony PlayStation 4 PS4 500 GB Jet Black Console The PlayStation 4 system opens the door to an incredible journey through immersive new gaming worlds and a deeply connected gaming community. Step into living, breathing worlds where you are hero of your epic journey. Explore gritty urban environments, vast galactic landscapes, and fantastic historical settings brought to life on an epic scale, without limits. With an astounding launch lineup and over 180 games in development the PS4 system offers more top-tier blockbusters and inventive indie hits than any other next-gen console. The PS4 system is developer inspired, gamer focused. The PS4 system learns how you play and intuitively curates the content you use most often. Fire it up, and your PS4 system points the way to new, amazing experiences you can jump into alone or with friends. Create your own legend using a sophisticated, intuitive network built for gamers. Broadcast your gameplay live and direct to the world, complete with your commentary. Or immortalize your most epic moments and share at the press of a button. Access the best in music, movies, sports and television. PS4 system doesn t require a membership fee to access your digital entertainment subscriptions. You get the full spectrum of entertainment that matters to you on the PS4 system. PlayStation 4: The Best Place to Play The PlayStation 4 system provides dynamic, connected gaming, powerful graphics and speed, intelligent personalization, deeply integrated social capabilities, and innovative second-screen features. Combining unparalleled content, immersive gaming experiences, all of your favorite digital entertainment apps, and PlayStation exclusives, the PS4 system focuses on the gamers.Gamer Focused, Developer InspiredThe PS4 system focuses on the gamer, ensuring that the very best games and the most immersive experiences are possible on the platform.<br>Read more about the PS4 on ebay guides.</br> http://thumbs4.ebaystatic.com/d/l225/m/m10NZXArmiIkpkTDDkAUVvA.jpg
5 711158462 Sony - PlayStation 4 500GB The Last of Us Remastered Bundle - Black http://thumbs2.ebaystatic.com/d/l225/m/mZZXTmAE8WZDH1l_E_PPAkg.jpg
6 711158463 Sony PlayStation 4 (PS4) (Latest Model)- 500 GB Jet Black Console The PlayStation 4 system opens the door to an incredible journey through immersive new gaming worlds and a deeply connected gaming community. Step into living, breathing worlds where you are hero of your epic journey. Explore gritty urban environments, vast galactic landscapes, and fantastic historical settings brought to life on an epic scale, without limits. With an astounding launch lineup and over 180 games in development the PS4 system offers more top-tier blockbusters and inventive indie hits than any other next-gen console. The PS4 system is developer inspired, gamer focused. The PS4 system learns how you play and intuitively curates the content you use most often. Fire it up, and your PS4 system points the way to new, amazing experiences you can jump into alone or with friends. Create your own legend using a sophisticated, intuitive network built for gamers. Broadcast your gameplay live and direct to the world, complete with your commentary. Or immortalize your most epic moments and share at the press of a button. Access the best in music, movies, sports and television. PS4 system doesn t require a membership fee to access your digital entertainment subscriptions. You get the full spectrum of entertainment that matters to you on the PS4 system. PlayStation 4: The Best Place to Play The PlayStation 4 system provides dynamic, connected gaming, powerful graphics and speed, intelligent personalization, deeply integrated social capabilities, and innovative second-screen features. Combining unparalleled content, immersive gaming experiences, all of your favorite digital entertainment apps, and PlayStation exclusives, the PS4 system focuses on the gamers.Gamer Focused, Developer InspiredThe PS4 system focuses on the gamer, ensuring that the very best games and the most immersive experiences are possible on the platform.<br>Read more about the PS4 on ebay guides.</br> http://thumbs3.ebaystatic.com/d/l225/m/mzvzEUIknaQclZ801YCY1ew.jpg
7 711158464 Sony PlayStation 4 (PS4) (Latest Model)- 500 GB Jet Black Console The PlayStation 4 system opens the door to an incredible journey through immersive new gaming worlds and a deeply connected gaming community. Step into living, breathing worlds where you are hero of your epic journey. Explore gritty urban environments, vast galactic landscapes, and fantastic historical settings brought to life on an epic scale, without limits. With an astounding launch lineup and over 180 games in development the PS4 system offers more top-tier blockbusters and inventive indie hits than any other next-gen console. The PS4 system is developer inspired, gamer focused. The PS4 system learns how you play and intuitively curates the content you use most often. Fire it up, and your PS4 system points the way to new, amazing experiences you can jump into alone or with friends. Create your own legend using a sophisticated, intuitive network built for gamers. Broadcast your gameplay live and direct to the world, complete with your commentary. Or immortalize your most epic moments and share at the press of a button. Access the best in music, movies, sports and television. PS4 system doesn t require a membership fee to access your digital entertainment subscriptions. You get the full spectrum of entertainment that matters to you on the PS4 system. PlayStation 4: The Best Place to Play The PlayStation 4 system provides dynamic, connected gaming, powerful graphics and speed, intelligent personalization, deeply integrated social capabilities, and innovative second-screen features. Combining unparalleled content, immersive gaming experiences, all of your favorite digital entertainment apps, and PlayStation exclusives, the PS4 system focuses on the gamers.Gamer Focused, Developer InspiredThe PS4 system focuses on the gamer, ensuring that the very best games and the most immersive experiences are possible on the platform.<br>Read more about the PS4 on ebay guides.</br> http://thumbs4.ebaystatic.com/d/l225/m/mzvzEUIknaQclZ801YCY1ew.jpg
8 711158465 BRAND NEW Sony PlayStation 4 BUNDLE 500gb http://thumbs4.ebaystatic.com/d/l225/m/m9TQTiWcWig7SeQh9algLZg.jpg
9 711158466 Sony PlayStation 4 500GB, Dualshock Wireless Control, HDMI Gaming Console Refurb The PlayStation 4 system opens the door to an incredible journey through immersive new gaming worlds and a deeply connected gaming community. Step into living, breathing worlds where you are hero of your epic journey. Explore gritty urban environments, vast galactic landscapes, and fantastic historical settings brought to life on an epic scale, without limits. With an astounding launch lineup and over 180 games in development the PS4 system offers more top-tier blockbusters and inventive indie hits than any other next-gen console. The PS4 system is developer inspired, gamer focused. The PS4 system learns how you play and intuitively curates the content you use most often. Fire it up, and your PS4 system points the way to new, amazing experiences you can jump into alone or with friends. Create your own legend using a sophisticated, intuitive network built for gamers. Broadcast your gameplay live and direct to the world, complete with your commentary. Or immortalize your most epic moments and share at the press of a button. Access the best in music, movies, sports and television. PS4 system doesn t require a membership fee to access your digital entertainment subscriptions. You get the full spectrum of entertainment that matters to you on the PS4 system. PlayStation 4: The Best Place to Play The PlayStation 4 system provides dynamic, connected gaming, powerful graphics and speed, intelligent personalization, deeply integrated social capabilities, and innovative second-screen features. Combining unparalleled content, immersive gaming experiences, all of your favorite digital entertainment apps, and PlayStation exclusives, the PS4 system focuses on the gamers.Gamer Focused, Developer InspiredThe PS4 system focuses on the gamer, ensuring that the very best games and the most immersive experiences are possible on the platform.<br>Read more about the PS4 on ebay guides.</br> http://thumbs4.ebaystatic.com/d/l225/m/mTZYG5N6xWfBi4Ok03HmpMw.jpg
10 711158467 Sony PlayStation 4 (Latest Model)- 500 GB Jet Black Console w/ 2 Controllers http://thumbs2.ebaystatic.com/d/l225/m/mX5Qphrygqeoi7tAH5eku2A.jpg
11 711158468 Sony PlayStation 4 (Latest Model)- 500 GB Jet Black Console *NEW* The PlayStation 4 system opens the door to an incredible journey through immersive new gaming worlds and a deeply connected gaming community. Step into living, breathing worlds where you are hero of your epic journey. Explore gritty urban environments, vast galactic landscapes, and fantastic historical settings brought to life on an epic scale, without limits. With an astounding launch lineup and over 180 games in development the PS4 system offers more top-tier blockbusters and inventive indie hits than any other next-gen console. The PS4 system is developer inspired, gamer focused. The PS4 system learns how you play and intuitively curates the content you use most often. Fire it up, and your PS4 system points the way to new, amazing experiences you can jump into alone or with friends. Create your own legend using a sophisticated, intuitive network built for gamers. Broadcast your gameplay live and direct to the world, complete with your commentary. Or immortalize your most epic moments and share at the press of a button. Access the best in music, movies, sports and television. PS4 system doesn t require a membership fee to access your digital entertainment subscriptions. You get the full spectrum of entertainment that matters to you on the PS4 system. PlayStation 4: The Best Place to Play The PlayStation 4 system provides dynamic, connected gaming, powerful graphics and speed, intelligent personalization, deeply integrated social capabilities, and innovative second-screen features. Combining unparalleled content, immersive gaming experiences, all of your favorite digital entertainment apps, and PlayStation exclusives, the PS4 system focuses on the gamers.Gamer Focused, Developer InspiredThe PS4 system focuses on the gamer, ensuring that the very best games and the most immersive experiences are possible on the platform.<br>Read more about the PS4 on ebay guides.</br> http://thumbs2.ebaystatic.com/d/l225/m/mGjN4IrJ0O8kKD_TYMWgGgQ.jpg
12 711158469 Sony PlayStation 4 (Latest Model)- 500 GB Jet Black Console..wth Mortal Kombat X http://thumbs2.ebaystatic.com/d/l225/m/mrpqSNXwlnUVKnEscE4348w.jpg
13 711158470 Genuine SONY PS4 Playstation 4 500GB Gaming Console - Black http://thumbs4.ebaystatic.com/d/l225/m/myrPBFCpb4H5rHI8NyiS2zA.jpg
14 711158471 [Sony] Playstation 4 PS4 Video Game Console Black - Latest Model http://thumbs4.ebaystatic.com/d/l225/m/mce0c7mCuv3xpjllJXx093w.jpg
15 711158472 Sony PlayStation 4 (Latest Model) 500 GB Jet Black Console The PlayStation 4 system opens the door to an incredible journey through immersive new gaming worlds and a deeply connected gaming community. Step into living, breathing worlds where you are hero of your epic journey. Explore gritty urban environments, vast galactic landscapes, and fantastic historical settings brought to life on an epic scale, without limits. With an astounding launch lineup and over 180 games in development the PS4 system offers more top-tier blockbusters and inventive indie hits than any other next-gen console. The PS4 system is developer inspired, gamer focused. The PS4 system learns how you play and intuitively curates the content you use most often. Fire it up, and your PS4 system points the way to new, amazing experiences you can jump into alone or with friends. Create your own legend using a sophisticated, intuitive network built for gamers. Broadcast your gameplay live and direct to the world, complete with your commentary. Or immortalize your most epic moments and share at the press of a button. Access the best in music, movies, sports and television. PS4 system doesn t require a membership fee to access your digital entertainment subscriptions. You get the full spectrum of entertainment that matters to you on the PS4 system. PlayStation 4: The Best Place to Play The PlayStation 4 system provides dynamic, connected gaming, powerful graphics and speed, intelligent personalization, deeply integrated social capabilities, and innovative second-screen features. Combining unparalleled content, immersive gaming experiences, all of your favorite digital entertainment apps, and PlayStation exclusives, the PS4 system focuses on the gamers.Gamer Focused, Developer InspiredThe PS4 system focuses on the gamer, ensuring that the very best games and the most immersive experiences are possible on the platform.<br>Read more about the PS4 on ebay guides.</br> http://thumbs2.ebaystatic.com/d/l225/m/miVSA1xPO5fCNdYzEMc8rSQ.jpg
16 711158473 Sony PlayStation 4 - 500 GB Jet Black Console - WITH LAST OF US REMASTERED http://thumbs2.ebaystatic.com/d/l225/m/mLjnOxv2GWkrkCtgsDGhJ6A.jpg
17 711158474 Sony PlayStation 4 (Latest Model)- 500 GB Jet Black Console http://thumbs3.ebaystatic.com/d/l225/m/mjMittBaXmm_n4AMpETBXhQ.jpg
18 711158475 Sony PlayStation 4 (Latest Model)- 500 GB Jet Black Console http://thumbs2.ebaystatic.com/d/l225/m/m1n1qrJ7-VGbe7xQvGdeD6Q.jpg
19 711158476 Sony PlayStation 4 - 500 GB Jet Black Console (3 controllers,3 games included) http://thumbs3.ebaystatic.com/d/l225/m/mIoGIj9FZG7HoEVkPlnyizA.jpg
20 711158477 Sony PlayStation 4 500GB Console with 2 Controllers The PlayStation 4 system opens the door to an incredible journey through immersive new gaming worlds and a deeply connected gaming community. Step into living, breathing worlds where you are hero of your epic journey. Explore gritty urban environments, vast galactic landscapes, and fantastic historical settings brought to life on an epic scale, without limits. With an astounding launch lineup and over 180 games in development the PS4 system offers more top-tier blockbusters and inventive indie hits than any other next-gen console. The PS4 system is developer inspired, gamer focused. The PS4 system learns how you play and intuitively curates the content you use most often. Fire it up, and your PS4 system points the way to new, amazing experiences you can jump into alone or with friends. Create your own legend using a sophisticated, intuitive network built for gamers. Broadcast your gameplay live and direct to the world, complete with your commentary. Or immortalize your most epic moments and share at the press of a button. Access the best in music, movies, sports and television. PS4 system doesn t require a membership fee to access your digital entertainment subscriptions. You get the full spectrum of entertainment that matters to you on the PS4 system. PlayStation 4: The Best Place to Play The PlayStation 4 system provides dynamic, connected gaming, powerful graphics and speed, intelligent personalization, deeply integrated social capabilities, and innovative second-screen features. Combining unparalleled content, immersive gaming experiences, all of your favorite digital entertainment apps, and PlayStation exclusives, the PS4 system focuses on the gamers.Gamer Focused, Developer InspiredThe PS4 system focuses on the gamer, ensuring that the very best games and the most immersive experiences are possible on the platform.<br>Read more about the PS4 on ebay guides.</br> http://thumbs2.ebaystatic.com/d/l225/m/m4fuJ5Ibrj450-TZ83FAkIQ.jpg
21 711158478 Sony - PlayStation 4 500GB The Last of Us Remastered Bundle - Black http://thumbs3.ebaystatic.com/d/l225/m/mzXSIw8Hlnff8IjXJQrXJSw.jpg
22 711158479 Sony PlayStation 4 (Latest Model)- 500 GB Jet Black Console http://thumbs2.ebaystatic.com/d/l225/m/m-9S63CgFoUijY3ZTyNs3KA.jpg
23 711158480 Sony PlayStation 4 (Latest Model)- 500 GB Jet Black Console http://thumbs1.ebaystatic.com/d/l225/m/mdF9Bisg9wXjv_R9Y_13MWw.jpg
24 711158481 Sony PlayStation 4 (Latest Model)- 500 GB Jet Black Console* http://thumbs1.ebaystatic.com/d/l225/m/m4_OQHMmIOCa8uEkBepRR5A.jpg
25 711158482 Sony PlayStation 4 (Latest Model)- 500 GB Jet Black Console http://thumbs2.ebaystatic.com/d/l225/m/mZ0nR8iz-QAfLssJZMp3L5Q.jpg
26 711158483 [Sony] Playstation 4 PS4 1105A Video Game Console 500GB White - Latest Model http://thumbs4.ebaystatic.com/d/l225/m/m8iTz5cLQLNjD9D3O2jT3IQ.jpg
27 711158484 NEW! Clinique Repairwear Laser Focus Wrinkle Correcting Eye Cream 5ml http://thumbs2.ebaystatic.com/d/l225/m/mrraWCpvP5YKk5rYgotVDLg.jpg
28 711158485 Obagi Elastiderm Eye Treatment Cream 0.5 oz / 15g Authentic NiB Sealed [5] http://thumbs1.ebaystatic.com/d/l225/m/mJ4ekz6_bDT5G7wYtjM-qRg.jpg
29 711158486 Lancome Renergie Eye Anti-Wrinkle & Firming Eye Cream 0.5oz New http://thumbs2.ebaystatic.com/d/l225/m/mxwwyDQraZ-TEtr_Y6qRi7Q.jpg
30 711158487 OZ Naturals - The BEST Eye Gel - Eye Cream For Dark Circles Puffiness and http://thumbs2.ebaystatic.com/d/l225/m/mk2Z-hX5sT4kUxfG6g_KFpg.jpg
31 711158488 Elastiderm Eye Cream (0.5oz/15g) http://thumbs3.ebaystatic.com/d/l225/m/mHxb5WUc5MtGzCT2UXgY_hg.jpg
32 711158489 new CLINIQUE Repairwear Laser Focus Wrinkle Correcting Eye Cream 0.17 oz/ 5 ml http://thumbs1.ebaystatic.com/d/l225/m/mQSX2wfrSeGy3uA8Q4SbOKw.jpg
33 711158490 NIB Full Size Dermalogica Multivitamin Power Firm Eye Cream http://thumbs4.ebaystatic.com/d/l225/m/m2hxo12e5NjXgGiKIaCvTLA.jpg
34 711158491 24K Gold Collagen Anti-Dark Circles Anti-Aging Bio Essence Repairing Eye Cream http://thumbs4.ebaystatic.com/d/l225/m/mt96efUK5cPAe60B9aGmgMA.jpg
35 711158492 Clinique Repairwear Laser Focus Wrinkle Correcting Eye Cream Full Size .5oz 15mL http://thumbs3.ebaystatic.com/d/l225/m/mZyV3wKejCMx9RrnC8X-eMw.jpg
36 711158493 NEW! Clinique Repairwear Laser Focus Wrinkle Correcting Eye Cream 5ml http://thumbs4.ebaystatic.com/d/l225/m/m9hX_z_DFnbNCTh0VFv3KcQ.jpg
37 711158494 3 Clinique Repairwear Laser Focus Wrinkle Correcting Eye Cream .17 oz/5 ml Each http://thumbs1.ebaystatic.com/d/l225/m/mYiHsrGffCg_qgkTbUWZU1A.jpg
38 711158495 Lancome High Resolution Eye Cream .95 Oz Refill-3X .25 Oz Plus .20 Oz Lot http://thumbs1.ebaystatic.com/d/l225/m/mFuQxKoEKQ6wtk2bGxfKwow.jpg
39 711158496 NEW! Clinique Repairwear Laser Focus Wrinkle Correcting Eye Cream 5ml http://thumbs4.ebaystatic.com/d/l225/m/mLBRCDiELUnYos-vFmIcc7A.jpg
40 711158497 Neutrogena Rapid Wrinkle Repair Eye Cream -0.5 Oz. -New- http://thumbs4.ebaystatic.com/d/l225/m/mE1RWpCOxkCGuuiJBX6HiBQ.jpg
41 711158498 20g Snail Repair Eye Cream Natural Anti-Dark Circles Puffiness Aging Wrinkles http://thumbs4.ebaystatic.com/d/l225/m/mh4gBNzINDwds_r778sJRjg.jpg
42 711158499 Vichy-Neovadiol GF Eye & Lip Contour Cream 0.5 Fl. Oz http://thumbs4.ebaystatic.com/d/l225/m/m_6f0ofCm7PTzuithYuZx3w.jpg
43 711158500 Obagi Elastiderm Eye Cream 0.5 oz. New In Box. 100% Authentic! New Packaging! http://thumbs2.ebaystatic.com/d/l225/m/ma0PK-ASBXUiHERR19MyImA.jpg
44 711158501 NEW! Clinique Repairwear Laser Focus Wrinkle Correcting Eye Cream .17oz / 5ml http://thumbs3.ebaystatic.com/d/l225/m/m72NaXYlcXcEeqQFKWvsdZA.jpg
45 711158502 Kiehl's CREAMY EYE TREATMENT cream with AVOCADO 0.5 oz FULL SIZE http://thumbs3.ebaystatic.com/d/l225/m/mOI407HnILb_tf-RgdvfYyA.jpg
46 711158503 Clinique repairwear laser focus wrinkle correcting eye cream .5 oz 15ml http://thumbs4.ebaystatic.com/d/l225/m/mQwNVst3bYG6QXouubmLaJg.jpg
47 711158504 Caudalie Premier Cru The Eye Cream La Creme New Anti Aging Eye Treatment http://thumbs1.ebaystatic.com/d/l225/m/mM4hPTAWXeOjovNk9s_Cqag.jpg
48 711158505 Jeunesse Instantly Ageless -- New Box Of 50 Sachets -- Eye - Face Wrinkle Cream http://thumbs2.ebaystatic.com/d/l225/m/m5EfWbi6ZYs4JpYcsl0Ubaw.jpg
49 711158506 VELOUR SKIN EYE CREAM .5 FL OZ 15ML NEW NIP ANTI-AGING WRINKLE CREAM http://thumbs1.ebaystatic.com/d/l225/m/m2uEf6q1yASH8FkWqYdOv1w.jpg
50 711158507 Shiseido White Lucent Anti-Dark Circles/Puffiness Eye Cream 15ml/.53oz Full Size http://thumbs1.ebaystatic.com/d/l225/m/m_CtzoqU2Vgv4GKx8ONS6qw.jpg
51 711158508 Murad Resurgence Renewing Eye Cream Anti-Aging .25 oz NEW Dark Circles Wrinkle http://thumbs1.ebaystatic.com/d/l225/m/mhWJC10iowgUDGm4KMQKNMg.jpg
52 711158509 D-Link DIR-615 300Mbps Wireless-N Router 4-Port w/Firewall http://thumbs3.ebaystatic.com/d/l225/m/mdSBH9ROXRn3TBb8OFDT6jA.jpg
53 711158510 Triton MOF001 2 1/4hp dual mode precision Router. New!! *3 day auction* http://thumbs1.ebaystatic.com/d/l225/m/mozWd2SBskbDBlWAKsMlVew.jpg
54 711158511 Porter-Cable 3-1/4 HP Five-Speed Router 7518 - Power Tools Routers http://thumbs2.ebaystatic.com/d/l225/m/mpZDTXpiyesDrZh_FLMyqXQ.jpg
55 711158512 Linksys EA6900 AC1900 Wi-Fi Wireless Router Dual Band with Gigabit &USB 3.0 Port http://thumbs4.ebaystatic.com/d/l225/m/m3OfBSnHBDhhs_Ve-DSBKQw.jpg
56 711158513 Linksys EA6500 1300 Mbps 4-Port Gigabit Wireless AC Router http://thumbs1.ebaystatic.com/d/l225/m/m7cfymJPc7CLADoTiEYFzwA.jpg
57 711158514 Makita RT0700CX3 1-1/4 Horsepower Compact Router Kit / Trimmer NEW http://thumbs2.ebaystatic.com/d/l225/m/mr-F3rCxDYsLcj8hnmaRN4A.jpg
58 711158515 NETGEAR R6250 AC1600 Smart WiFi Dual Band Gigabit Router 802.11ac 300 1300 Mbps http://thumbs4.ebaystatic.com/d/l225/m/mc8Ic8Cq2lPqPnjNGAQBBCQ.jpg
59 711158516 NETGEAR Nighthawk AC1900 Dual Band Wi-Fi Gigabit Router (R7000) BRAND NEW SEALED http://thumbs3.ebaystatic.com/d/l225/m/mdL34EQi0l-Kg-DlvF6wpqA.jpg
60 711158517 Netgear WNDR3400 N600 Wireless Dual Band Router (WNDR3400-100) http://thumbs4.ebaystatic.com/d/l225/m/mKr4cNk6utJXSdVYXzwrScQ.jpg
61 711158518 Netgear N600 300 Mbps 4-Port 10/100 Wireless N Router (WNDR3400) http://thumbs2.ebaystatic.com/d/l225/m/mUPdyhbW9pzEm1VbqX0YudA.jpg
62 711158519 NETGEAR N600 WNDR3400 Wireless Dual Band Router F/S http://thumbs1.ebaystatic.com/d/l225/m/my55jF5kHnG9ipzFycnjooA.jpg
63 711158520 Netgear NIGHTHAWK AC1900 1300 Mbps 4-Port Gigabit Wireless AC Router (R7000) http://thumbs3.ebaystatic.com/d/l225/m/mrPLRTnWx_JXLNIp5pCBnzQ.jpg
64 711158521 Netgear N900 450 Mbps 4-Port Gigabit Wireless N Router (WNDR4500) http://thumbs2.ebaystatic.com/d/l225/m/mXBL01faHlHm7Ukh188t3yQ.jpg
65 711158522 Netgear R6300V2 AC1750 1300 Mbps 4-Port Gigabit Wireless AC Router http://thumbs1.ebaystatic.com/d/l225/m/mTdnFB9Z71efYJ9I5-k186w.jpg
66 711158523 Makita RT0701C 1-1/4 HP Compact Router With FACTORY WARRANTY!!! http://thumbs2.ebaystatic.com/d/l225/m/m7AA4k3MzYFJcTlBrT3DwhA.jpg
67 711158524 CISCO LINKSYS EA4500 DUAL-BAND N9000 WIRELESS ROUTER, 802.11N, UP TO 450 MBPs http://thumbs4.ebaystatic.com/d/l225/m/mwfVIXD3dZYt_qpHyprd7hg.jpg
68 711158525 Netgear N300 v.3 300 Mbps 5-Port 10/100 Wireless N Router (WNR2000) http://thumbs4.ebaystatic.com/d/l225/m/mopRjvnZwbsVH9euqGov5kw.jpg
69 711158526 Netgear Nighthawk R7000 2330 Mbps 4-Port Gigabit Wireless N Router... http://thumbs4.ebaystatic.com/d/l225/m/mns82UY4FfqYXPgqrpJ9Bzw.jpg
70 711158527 Netgear N900 450 Mbps 4-Port Gigabit Wireless N Router R4500 ~ FreE ShiPPinG ~ http://thumbs1.ebaystatic.com/d/l225/m/m_o0mSRmySgJUuqHYDIQiuA.jpg
71 711158528 D-Link Wireless Router Model DIR-625 http://thumbs2.ebaystatic.com/d/l225/m/mYPXwZMlDUjOQ3Sm3EtU37Q.jpg
72 711158529 D-Link DIR-657 300 Mbps 4-Port Gigabit Wireless N Router Hd Media Router 1000 Stream multiple media content - videos, music and more to multiple devices all at the same time without lag or skipping. The HD Fuel technology in the DIR-657 lets you watch Netflix and Vudu , play your Wii or Xbox 360 online or make Skype calls all without worrying about the skipping or latency you might experience with standard routers. It does so by automatically giving extra bandwidth for video, gaming and VoIP calls using HD Fuel QoS technology. The D-Link HD Media Router 1000(DIR-657) also comes equipped with 4 Gigabit ports to provide speeds up to 10x faster than standard 10/100 ports. What s more, it uses 802.11n technology with multiple intelligent antennas to maximize the speed and range of your wireless signal to significantly outperform 802.11g devices. http://thumbs1.ebaystatic.com/d/l225/m/m0xyPdWrdVKe7By4QFouVeA.jpg
73 711158530 D-Link DIR-860L AC1200 4-Port Cloud Router Gigabit Wireless 802.11 AC http://thumbs3.ebaystatic.com/d/l225/m/mk4KNj6oLm7863qCS-TqmbQ.jpg
74 711158531 D-Link DIR-862L Wireless AC1600 Dual Band Gigabit Router http://thumbs2.ebaystatic.com/d/l225/m/m6Arw8kaZ4EUbyKjHtJZLkA.jpg
75 711158532 LINKSYS AC1600 DUAL BAND SMART WI-FI ROUTER EA6400 BRAND NEW http://thumbs3.ebaystatic.com/d/l225/m/mdK7igTS7_TDD7ajfVqj-_w.jpg
76 711158533 Netgear AC1900 1300 Mbps 4-Port Gigabit Wireless AC Router (R7000) http://thumbs4.ebaystatic.com/d/l225/m/mdL34EQi0l-Kg-DlvF6wpqA.jpg
77 711158534 Panasonic ES-LA63 Cordless Rechargeable Men's Electric Shaver http://thumbs3.ebaystatic.com/d/l225/m/mzKKlCxbADObevcgoNjbXRg.jpg
78 711158535 Panasonic ARC 5 Best Mens Shaver http://thumbs4.ebaystatic.com/d/l225/m/mt34Y-u0okj-SqQm8Ng_rbQ.jpg
79 711158536 Panasonic Es8092 Wet Dry Electric Razor Shaver Cordless http://thumbs3.ebaystatic.com/d/l225/m/mlIxTz1LsVjXiZz2CzDquJw.jpg
80 711158537 Panasonic ARC4 ES-RF31-s Rechargeable Electric Shaver Wet/dry 4 Nanotech Blade Made for folks who need a great shave, the Panasonic electric shaver is convenient and consistent. Featuring an ergonomic design, this Panasonic ES-RF31-S is ideal for keeping a stubble-free face, so you can retain wonderfully smooth skin. With the precision blades included on the Panasonic electric shaver, you can get smooth shaves with every use. As this men's electric shaver features a gentle shaving mechanism, you can help avoid burning sensations on tender skin. Make sure you consistently get multiple perfect shaves without depleting the power with the exceptional shave time typical of this Panasonic ES-RF31-S. http://thumbs1.ebaystatic.com/d/l225/m/mi4QM99Jq4oma5WLAL0K7Wg.jpg
81 711158538 Panasonic ES3831K Single Blade Travel Shaver, Black New Strong and trustworthy, the Panasonic electric shaver is built for folks who are worried about a wonderful shave every day. This Panasonic ES3833S is just right for taming your beard, with an easy-to-maneuver design, so you can retain wonderfully soft skin. Spend as much time as you need getting a complete shave by making use of the outstanding shave time typical of the Panasonic electric shaver. Moreover, this men's electric shaver includes precision foil blades, so you can get wonderful shaves over a prolonged period. With the gentle shaving mechanism on this Panasonic ES3833S, you can help avoid burning sensations on tender skin. http://thumbs3.ebaystatic.com/d/l225/m/mfqMoj4xDlBFXp1ZznxCGbQ.jpg
82 711158539 Panasonic ES8103S Arc3 Electric Shaver Wet/Dry with Nanotech Blades for Men http://thumbs1.ebaystatic.com/d/l225/m/myaZLqzt3I7O-3xXxsJ_4fQ.jpg
83 711158540 Panasonic ES8103S Arc3 Electric Shaver Wet/Dry with Nanotech Blades http://thumbs1.ebaystatic.com/d/l225/m/mcrO4BkjBkM78XHm-aClRGg.jpg
84 711158543 Panasonic ES3831K Single Blade Wet & Dry Travel Shaver - New & Sealed http://thumbs4.ebaystatic.com/d/l225/m/mqWDU2mHsFWAuGosMIGcIMg.jpg
85 711158544 Panasonic ES8103S Arc 3 E W/O POUCH & MANUAL Men's Wet/Dry Rechargeable Shaver http://thumbs2.ebaystatic.com/d/l225/m/mZXgTj-fQfcgAlzOGQYkqFw.jpg
86 711158545 PANASONIC ES3831K Pro-Curve Battery Operated Travel Wet/Dry Shaver http://thumbs1.ebaystatic.com/d/l225/m/m8McQMCfgdp50trM_YJ88cw.jpg
87 711158546 PANASONIC ARC3 ES-LT33-S WET DRY WASHABLE RECHARGEABLE MEN'S ELECTRIC SHAVER NIB http://thumbs1.ebaystatic.com/d/l225/m/m9yUif5xyhGfh7Ag-_fcLdA.jpg
88 711158547 Panasonic ES-LV81-k Arc 5 Wet & Dry Rechargeable Men's Foil Shaver New http://thumbs1.ebaystatic.com/d/l225/m/mEfZHzDoKrH4DBfU8e_K93A.jpg
89 711158548 NEW Panasonic ES-RF31-S 4 Blade Men's Electric Razor Wet/Dry, Factory Sealed http://thumbs2.ebaystatic.com/d/l225/m/mfhMhMoDkrGtqWW_IyqVGuQ.jpg
90 711158549 Panasonic ES8243A E Arc4 Men's Electric Shaver Wet/Dry eBay item number:181670746515 Seller assumes all responsibility for this listing. Last updated on &nbsp;Mar 23, 2015 08:55:50 PDT&nbsp; View all revisions <strong>Item specifics</strong> <table> <tr> <th>Condition:</th> <td><strong>Used</strong> <strong>:</strong> </td></tr></table> http://thumbs4.ebaystatic.com/d/l225/m/mcxFUwt3FrGEEPzT7cfQn7w.jpg
91 711158550 Panasonic ES-3833 Wet/Dry Men Shaver Razor Battery Operate Compact Travel ES3833 http://thumbs2.ebaystatic.com/d/l225/m/mAqa9pHisKsLSk5nqMg4JJQ.jpg
92 711158551 Panasonic Pro-Curve ES3831K Shaver - Dry/Wet Technology - Stainless Steel Foil http://thumbs3.ebaystatic.com/d/l225/m/mGqD8eGIwseT5nsM53W3uRQ.jpg
93 711158552 Panasonic Wet and Dry Shaver - ES-RW30s ES-RW30-S The Panasonic electric shaver is well-suited to shielding particularly sensitive skin and providing a smooth shave. It's both trustworthy and transportable. Because this Panasonic ES-RW30-S has a gentle shaving mechanism, you can avoid irritation and raw feeling skin in particularly tender areas. The Panasonic electric shaver is ideal for ridding yourself of stubble, with its special design, so you can sustain wonderfully supple skin. The exceptional shave time featured on this men's electric shaver helps you to make sure you consistently receive many complete shaves without depleting the power. Plus, this Panasonic ES-RW30-S features precision blades, so you can enjoy smooth shaves for months on end. http://thumbs1.ebaystatic.com/d/l225/m/mvPElpjXmgo0NhP-P5F8LlQ.jpg
94 711158553 Panasonic ES-LF51-A Arc4 Electric Shaver Wet/Dry with Flexible Pivoting Head http://thumbs3.ebaystatic.com/d/l225/m/mC_zAQrMQKPLHdENU7N3UjQ.jpg
95 711158554 Panasonic ES8103S Arc3 Men's Electric Shaver Wet/Dry with Nanotech Blades http://thumbs3.ebaystatic.com/d/l225/m/moBByNwPn93-g-oBBceS2kw.jpg
96 711158555 panasonic ARC3 shaver es8103s http://thumbs1.ebaystatic.com/d/l225/m/mJlAp6t6OMIOaYgKnyelIMg.jpg
97 711158556 Panasonic ES-534 Men's Electric Shaver New ES534 Battery Operated Compact Travel http://thumbs3.ebaystatic.com/d/l225/m/mDr2kpZLVSdy1KTPVYK2YUg.jpg
98 711158557 Panasonic Portable Shaving Machine Cclippers Washable Single Blade Shaver+Brush http://thumbs3.ebaystatic.com/d/l225/m/mJdzJPoOALps0Lv4WtW2b0A.jpg
99 711158559 Baratza Solis Maestro Conical Burr Coffee Bean Grinder Works Great Nice Cond http://thumbs4.ebaystatic.com/d/l225/m/mdjbD7YFR6JRq-pkeajhK7w.jpg
100 711158560 Proctor Silex Fresh Grind Electric Coffee Bean Grinder White http://thumbs4.ebaystatic.com/d/l225/m/mtXoRn5Ytmqz0GLHYmBUxpA.jpg
101 711158561 Cuisinart 8-oz. Supreme Grind Automatic Burr Coffee Grinder http://thumbs4.ebaystatic.com/d/l225/m/my_9cXPvwwRVFqo6MXWfpag.jpg

View File

@ -1,19 +0,0 @@
# This schema has been generated ...
# The order in which the attributes are declared is important,
# it specify the attribute xxx...
identifier = "id"
[attributes.id]
displayed = true
[attributes.title]
displayed = true
indexed = true
[attributes.description]
displayed = true
indexed = true
[attributes.image]
displayed = true

View File

@ -1,34 +1,40 @@
[package]
name = "meilidb-core"
version = "0.1.0"
authors = ["Kerollmops <renault.cle@gmail.com>"]
version = "0.7.0"
authors = ["Kerollmops <clement@meilisearch.com>"]
edition = "2018"
[dependencies]
byteorder = "1.3.1"
arc-swap = "0.4.3"
bincode = "1.1.4"
byteorder = "1.3.2"
chrono = { version = "0.4.9", features = ["serde"] }
crossbeam-channel = "0.4.0"
deunicode = "1.0.0"
hashbrown = "0.2.2"
lazy_static = "1.2.0"
log = "0.4.6"
meilidb-tokenizer = { path = "../meilidb-tokenizer", version = "0.1.0" }
rayon = "1.2.0"
sdset = "0.3.2"
serde = { version = "1.0.88", features = ["derive"] }
env_logger = "0.7.0"
fst = { version = "0.3.5", default-features = false }
hashbrown = { version = "0.6.0", features = ["serde"] }
heed = "0.5.0"
levenshtein_automata = { version = "0.1.1", features = ["fst_automaton"] }
log = "0.4.8"
meilidb-schema = { path = "../meilidb-schema", version = "0.6.0" }
meilidb-tokenizer = { path = "../meilidb-tokenizer", version = "0.6.0" }
meilidb-types = { path = "../meilidb-types", version = "0.1.0" }
once_cell = "1.2.0"
ordered-float = { version = "1.0.2", features = ["serde"] }
sdset = "0.3.3"
serde = { version = "1.0.101", features = ["derive"] }
serde_json = "1.0.41"
siphasher = "0.3.0"
slice-group-by = "0.2.6"
zerocopy = "0.2.2"
[dependencies.fst]
git = "https://github.com/Kerollmops/fst.git"
branch = "arc-byte-slice"
[dependencies.levenshtein_automata]
git = "https://github.com/Kerollmops/levenshtein-automata.git"
branch = "arc-byte-slice"
features = ["fst_automaton"]
zerocopy = "0.2.8"
[dev-dependencies]
assert_matches = "1.3"
[features]
i128 = ["byteorder/i128"]
nightly = ["hashbrown/nightly", "slice-group-by/nightly"]
csv = "1.0.7"
indexmap = { version = "1.2.0", features = ["serde-1"] }
rustyline = { version = "5.0.0", default-features = false }
structopt = "0.3.2"
tempfile = "3.1.0"
termcolor = "1.0.4"
toml = "0.5.3"

View File

@ -0,0 +1,471 @@
use std::collections::btree_map::{BTreeMap, Entry};
use std::collections::HashSet;
use std::error::Error;
use std::io::Write;
use std::iter::FromIterator;
use std::path::{Path, PathBuf};
use std::time::{Duration, Instant};
use std::{fs, io, sync::mpsc};
use rustyline::{Config, Editor};
use serde::{Deserialize, Serialize};
use structopt::StructOpt;
use termcolor::{Color, ColorChoice, ColorSpec, StandardStream, WriteColor};
use meilidb_core::{Database, Highlight, ProcessedUpdateResult};
use meilidb_schema::SchemaAttr;
#[derive(Debug, StructOpt)]
struct IndexCommand {
/// The destination where the database must be created.
#[structopt(parse(from_os_str))]
database_path: PathBuf,
#[structopt(long, default_value = "default")]
index_name: String,
/// The csv file to index.
#[structopt(parse(from_os_str))]
csv_data_path: PathBuf,
/// The path to the schema.
#[structopt(long, parse(from_os_str))]
schema: PathBuf,
#[structopt(long)]
update_group_size: Option<usize>,
#[structopt(long, parse(from_os_str))]
compact_to_path: Option<PathBuf>,
}
#[derive(Debug, StructOpt)]
struct SearchCommand {
/// The path of the database to work with.
#[structopt(parse(from_os_str))]
database_path: PathBuf,
#[structopt(long, default_value = "default")]
index_name: String,
/// Timeout after which the search will return results.
#[structopt(long)]
fetch_timeout_ms: Option<u64>,
/// The number of returned results
#[structopt(short, long, default_value = "10")]
number_results: usize,
/// The number of characters before and after the first match
#[structopt(short = "C", long, default_value = "35")]
char_context: usize,
/// A filter string that can be `!adult` or `adult` to
/// filter documents on this specfied field
#[structopt(short, long)]
filter: Option<String>,
/// Fields that must be displayed.
displayed_fields: Vec<String>,
}
#[derive(Debug, StructOpt)]
struct ShowUpdatesCommand {
/// The path of the database to work with.
#[structopt(parse(from_os_str))]
database_path: PathBuf,
#[structopt(long, default_value = "default")]
index_name: String,
}
#[derive(Debug, StructOpt)]
enum Command {
Index(IndexCommand),
Search(SearchCommand),
ShowUpdates(ShowUpdatesCommand),
}
impl Command {
fn path(&self) -> &Path {
match self {
Command::Index(command) => &command.database_path,
Command::Search(command) => &command.database_path,
Command::ShowUpdates(command) => &command.database_path,
}
}
}
#[derive(Serialize, Deserialize)]
#[serde(transparent)]
struct Document(indexmap::IndexMap<String, String>);
fn index_command(command: IndexCommand, database: Database) -> Result<(), Box<dyn Error>> {
let start = Instant::now();
let (sender, receiver) = mpsc::sync_channel(100);
let update_fn =
move |_name: &str, update: ProcessedUpdateResult| sender.send(update.update_id).unwrap();
let index = match database.open_index(&command.index_name) {
Some(index) => index,
None => database.create_index(&command.index_name).unwrap(),
};
database.set_update_callback(Box::new(update_fn));
let env = &database.env;
let schema = {
let string = fs::read_to_string(&command.schema)?;
toml::from_str(&string).unwrap()
};
let mut writer = env.write_txn().unwrap();
match index.main.schema(&writer)? {
Some(current_schema) => {
if current_schema != schema {
return Err(meilidb_core::Error::SchemaDiffer.into());
}
writer.abort();
}
None => {
index.schema_update(&mut writer, schema)?;
writer.commit().unwrap();
}
}
let mut rdr = csv::Reader::from_path(command.csv_data_path)?;
let mut raw_record = csv::StringRecord::new();
let headers = rdr.headers()?.clone();
let mut max_update_id = 0;
let mut i = 0;
let mut end_of_file = false;
while !end_of_file {
let mut additions = index.documents_addition();
loop {
end_of_file = !rdr.read_record(&mut raw_record)?;
if end_of_file {
break;
}
let document: Document = match raw_record.deserialize(Some(&headers)) {
Ok(document) => document,
Err(e) => {
eprintln!("{:?}", e);
continue;
}
};
additions.update_document(document);
print!("\rindexing document {}", i);
i += 1;
if let Some(group_size) = command.update_group_size {
if i % group_size == 0 {
break;
}
}
}
println!();
let mut writer = env.write_txn().unwrap();
println!("committing update...");
let update_id = additions.finalize(&mut writer)?;
writer.commit().unwrap();
max_update_id = max_update_id.max(update_id);
println!("committed update {}", update_id);
}
println!("Waiting for update {}", max_update_id);
for id in receiver {
if id == max_update_id {
break;
}
}
println!(
"database created in {:.2?} at: {:?}",
start.elapsed(),
command.database_path
);
if let Some(path) = command.compact_to_path {
fs::create_dir_all(&path)?;
let start = Instant::now();
let _file = database.copy_and_compact_to_path(path.join("data.mdb"))?;
println!(
"database compacted in {:.2?} at: {:?}",
start.elapsed(),
path
);
}
Ok(())
}
fn display_highlights(text: &str, ranges: &[usize]) -> io::Result<()> {
let mut stdout = StandardStream::stdout(ColorChoice::Always);
let mut highlighted = false;
for range in ranges.windows(2) {
let [start, end] = match range {
[start, end] => [*start, *end],
_ => unreachable!(),
};
if highlighted {
stdout.set_color(
ColorSpec::new()
.set_fg(Some(Color::Yellow))
.set_underline(true),
)?;
}
write!(&mut stdout, "{}", &text[start..end])?;
stdout.reset()?;
highlighted = !highlighted;
}
Ok(())
}
fn char_to_byte_range(index: usize, length: usize, text: &str) -> (usize, usize) {
let mut byte_index = 0;
let mut byte_length = 0;
for (n, (i, c)) in text.char_indices().enumerate() {
if n == index {
byte_index = i;
}
if n + 1 == index + length {
byte_length = i - byte_index + c.len_utf8();
break;
}
}
(byte_index, byte_length)
}
fn create_highlight_areas(text: &str, highlights: &[Highlight]) -> Vec<usize> {
let mut byte_indexes = BTreeMap::new();
for highlight in highlights {
let char_index = highlight.char_index as usize;
let char_length = highlight.char_length as usize;
let (byte_index, byte_length) = char_to_byte_range(char_index, char_length, text);
match byte_indexes.entry(byte_index) {
Entry::Vacant(entry) => {
entry.insert(byte_length);
}
Entry::Occupied(mut entry) => {
if *entry.get() < byte_length {
entry.insert(byte_length);
}
}
}
}
let mut title_areas = Vec::new();
title_areas.push(0);
for (byte_index, length) in byte_indexes {
title_areas.push(byte_index);
title_areas.push(byte_index + length);
}
title_areas.push(text.len());
title_areas.sort_unstable();
title_areas
}
/// note: matches must have been sorted by `char_index` and `char_length` before being passed.
///
/// ```no_run
/// matches.sort_unstable_by_key(|m| (m.char_index, m.char_length));
///
/// let matches = matches.matches.iter().filter(|m| SchemaAttr::new(m.attribute) == attr).cloned();
///
/// let (text, matches) = crop_text(&text, matches, 35);
/// ```
fn crop_text(
text: &str,
highlights: impl IntoIterator<Item = Highlight>,
context: usize,
) -> (String, Vec<Highlight>) {
let mut highlights = highlights.into_iter().peekable();
let char_index = highlights
.peek()
.map(|m| m.char_index as usize)
.unwrap_or(0);
let start = char_index.saturating_sub(context);
let text = text.chars().skip(start).take(context * 2).collect();
let highlights = highlights
.take_while(|m| (m.char_index as usize) + (m.char_length as usize) <= start + (context * 2))
.map(|highlight| Highlight {
char_index: highlight.char_index - start as u16,
..highlight
})
.collect();
(text, highlights)
}
fn search_command(command: SearchCommand, database: Database) -> Result<(), Box<dyn Error>> {
let env = &database.env;
let index = database
.open_index(&command.index_name)
.expect("Could not find index");
let reader = env.read_txn().unwrap();
let schema = index.main.schema(&reader)?;
reader.abort();
let schema = schema.ok_or(meilidb_core::Error::SchemaMissing)?;
let fields = command.displayed_fields.iter().map(String::as_str);
let fields = HashSet::from_iter(fields);
let config = Config::builder().auto_add_history(true).build();
let mut readline = Editor::<()>::with_config(config);
let _ = readline.load_history("query-history.txt");
for result in readline.iter("Searching for: ") {
match result {
Ok(query) => {
let start_total = Instant::now();
let reader = env.read_txn().unwrap();
let ref_index = &index;
let ref_reader = &reader;
let mut builder = index.query_builder();
if let Some(timeout) = command.fetch_timeout_ms {
builder.with_fetch_timeout(Duration::from_millis(timeout));
}
if let Some(ref filter) = command.filter {
let filter = filter.as_str();
let (positive, filter) = if filter.chars().next() == Some('!') {
(false, &filter[1..])
} else {
(true, filter)
};
let attr = schema
.attribute(&filter)
.expect("Could not find filtered attribute");
builder.with_filter(move |document_id| {
let string: String = ref_index
.document_attribute(ref_reader, document_id, attr)
.unwrap()
.unwrap();
(string == "true") == positive
});
}
let documents = builder.query(ref_reader, &query, 0..command.number_results)?;
let mut retrieve_duration = Duration::default();
let number_of_documents = documents.len();
for mut doc in documents {
doc.highlights
.sort_unstable_by_key(|m| (m.char_index, m.char_length));
let start_retrieve = Instant::now();
let result = index.document::<Document>(&reader, Some(&fields), doc.id);
retrieve_duration += start_retrieve.elapsed();
match result {
Ok(Some(document)) => {
println!("raw-id: {:?}", doc.id);
for (name, text) in document.0 {
print!("{}: ", name);
let attr = schema.attribute(&name).unwrap();
let highlights = doc
.highlights
.iter()
.filter(|m| SchemaAttr::new(m.attribute) == attr)
.cloned();
let (text, highlights) =
crop_text(&text, highlights, command.char_context);
let areas = create_highlight_areas(&text, &highlights);
display_highlights(&text, &areas)?;
println!();
}
}
Ok(None) => eprintln!("missing document"),
Err(e) => eprintln!("{}", e),
}
let mut matching_attributes = HashSet::new();
for highlight in doc.highlights {
let attr = SchemaAttr::new(highlight.attribute);
let name = schema.attribute_name(attr);
matching_attributes.insert(name);
}
let matching_attributes = Vec::from_iter(matching_attributes);
println!("matching in: {:?}", matching_attributes);
println!();
}
eprintln!(
"whole documents fields retrieve took {:.2?}",
retrieve_duration
);
eprintln!(
"===== Found {} results in {:.2?} =====",
number_of_documents,
start_total.elapsed()
);
}
Err(err) => {
println!("Error: {:?}", err);
break;
}
}
}
readline.save_history("query-history.txt").unwrap();
Ok(())
}
fn show_updates_command(
command: ShowUpdatesCommand,
database: Database,
) -> Result<(), Box<dyn Error>> {
let env = &database.env;
let index = database
.open_index(&command.index_name)
.expect("Could not find index");
let reader = env.read_txn().unwrap();
let updates = index.all_updates_status(&reader)?;
println!("{:#?}", updates);
reader.abort();
Ok(())
}
fn main() -> Result<(), Box<dyn Error>> {
env_logger::init();
let opt = Command::from_args();
let database = Database::open_or_create(opt.path())?;
match opt {
Command::Index(command) => index_command(command, database),
Command::Search(command) => search_command(command, database),
Command::ShowUpdates(command) => show_updates_command(command, database),
}
}

View File

@ -1,44 +0,0 @@
use lazy_static::lazy_static;
use levenshtein_automata::{
LevenshteinAutomatonBuilder as LevBuilder,
DFA,
};
lazy_static! {
static ref LEVDIST0: LevBuilder = LevBuilder::new(0, false);
static ref LEVDIST1: LevBuilder = LevBuilder::new(1, false);
static ref LEVDIST2: LevBuilder = LevBuilder::new(2, false);
}
#[derive(Copy, Clone)]
enum PrefixSetting {
Prefix,
NoPrefix,
}
fn build_dfa_with_setting(query: &str, setting: PrefixSetting) -> DFA {
use self::PrefixSetting::{Prefix, NoPrefix};
match query.len() {
0 ..= 4 => match setting {
Prefix => LEVDIST0.build_prefix_dfa(query),
NoPrefix => LEVDIST0.build_dfa(query),
},
5 ..= 8 => match setting {
Prefix => LEVDIST1.build_prefix_dfa(query),
NoPrefix => LEVDIST1.build_dfa(query),
},
_ => match setting {
Prefix => LEVDIST2.build_prefix_dfa(query),
NoPrefix => LEVDIST2.build_dfa(query),
},
}
}
pub fn build_prefix_dfa(query: &str) -> DFA {
build_dfa_with_setting(query, PrefixSetting::Prefix)
}
pub fn build_dfa(query: &str) -> DFA {
build_dfa_with_setting(query, PrefixSetting::NoPrefix)
}

View File

@ -0,0 +1,48 @@
use levenshtein_automata::{LevenshteinAutomatonBuilder as LevBuilder, DFA};
use once_cell::sync::OnceCell;
static LEVDIST0: OnceCell<LevBuilder> = OnceCell::new();
static LEVDIST1: OnceCell<LevBuilder> = OnceCell::new();
static LEVDIST2: OnceCell<LevBuilder> = OnceCell::new();
#[derive(Copy, Clone)]
enum PrefixSetting {
Prefix,
NoPrefix,
}
fn build_dfa_with_setting(query: &str, setting: PrefixSetting) -> DFA {
use PrefixSetting::{NoPrefix, Prefix};
match query.len() {
0..=4 => {
let builder = LEVDIST0.get_or_init(|| LevBuilder::new(0, true));
match setting {
Prefix => builder.build_prefix_dfa(query),
NoPrefix => builder.build_dfa(query),
}
}
5..=8 => {
let builder = LEVDIST1.get_or_init(|| LevBuilder::new(1, true));
match setting {
Prefix => builder.build_prefix_dfa(query),
NoPrefix => builder.build_dfa(query),
}
}
_ => {
let builder = LEVDIST2.get_or_init(|| LevBuilder::new(2, true));
match setting {
Prefix => builder.build_prefix_dfa(query),
NoPrefix => builder.build_dfa(query),
}
}
}
}
pub fn build_prefix_dfa(query: &str) -> DFA {
build_dfa_with_setting(query, PrefixSetting::Prefix)
}
pub fn build_dfa(query: &str) -> DFA {
build_dfa_with_setting(query, PrefixSetting::NoPrefix)
}

View File

@ -0,0 +1,295 @@
mod dfa;
mod query_enhancer;
use std::cmp::Reverse;
use std::{cmp, vec};
use fst::{IntoStreamer, Streamer};
use levenshtein_automata::DFA;
use meilidb_tokenizer::{is_cjk, split_query_string};
use crate::error::MResult;
use crate::store;
use self::dfa::{build_dfa, build_prefix_dfa};
pub use self::query_enhancer::QueryEnhancer;
use self::query_enhancer::QueryEnhancerBuilder;
const NGRAMS: usize = 3;
pub struct AutomatonProducer {
automatons: Vec<AutomatonGroup>,
}
impl AutomatonProducer {
pub fn new(
reader: &heed::RoTxn,
query: &str,
main_store: store::Main,
postings_list_store: store::PostingsLists,
synonyms_store: store::Synonyms,
) -> MResult<(AutomatonProducer, QueryEnhancer)> {
let (automatons, query_enhancer) = generate_automatons(
reader,
query,
main_store,
postings_list_store,
synonyms_store,
)?;
Ok((AutomatonProducer { automatons }, query_enhancer))
}
pub fn into_iter(self) -> vec::IntoIter<AutomatonGroup> {
self.automatons.into_iter()
}
}
#[derive(Debug)]
pub struct AutomatonGroup {
pub is_phrase_query: bool,
pub automatons: Vec<Automaton>,
}
impl AutomatonGroup {
fn normal(automatons: Vec<Automaton>) -> AutomatonGroup {
AutomatonGroup {
is_phrase_query: false,
automatons,
}
}
fn phrase_query(automatons: Vec<Automaton>) -> AutomatonGroup {
AutomatonGroup {
is_phrase_query: true,
automatons,
}
}
}
#[derive(Debug)]
pub struct Automaton {
pub index: usize,
pub ngram: usize,
pub query_len: usize,
pub is_exact: bool,
pub is_prefix: bool,
pub query: String,
}
impl Automaton {
pub fn dfa(&self) -> DFA {
if self.is_prefix {
build_prefix_dfa(&self.query)
} else {
build_dfa(&self.query)
}
}
fn exact(index: usize, ngram: usize, query: &str) -> Automaton {
Automaton {
index,
ngram,
query_len: query.len(),
is_exact: true,
is_prefix: false,
query: query.to_string(),
}
}
fn prefix_exact(index: usize, ngram: usize, query: &str) -> Automaton {
Automaton {
index,
ngram,
query_len: query.len(),
is_exact: true,
is_prefix: true,
query: query.to_string(),
}
}
fn non_exact(index: usize, ngram: usize, query: &str) -> Automaton {
Automaton {
index,
ngram,
query_len: query.len(),
is_exact: false,
is_prefix: false,
query: query.to_string(),
}
}
}
pub fn normalize_str(string: &str) -> String {
let mut string = string.to_lowercase();
if !string.contains(is_cjk) {
string = deunicode::deunicode_with_tofu(&string, "");
}
string
}
fn split_best_frequency<'a>(
reader: &heed::RoTxn,
word: &'a str,
postings_lists_store: store::PostingsLists,
) -> MResult<Option<(&'a str, &'a str)>> {
let chars = word.char_indices().skip(1);
let mut best = None;
for (i, _) in chars {
let (left, right) = word.split_at(i);
let left_freq = postings_lists_store
.postings_list(reader, left.as_ref())?
.map_or(0, |i| i.len());
let right_freq = postings_lists_store
.postings_list(reader, right.as_ref())?
.map_or(0, |i| i.len());
let min_freq = cmp::min(left_freq, right_freq);
if min_freq != 0 && best.map_or(true, |(old, _, _)| min_freq > old) {
best = Some((min_freq, left, right));
}
}
Ok(best.map(|(_, l, r)| (l, r)))
}
fn generate_automatons(
reader: &heed::RoTxn,
query: &str,
main_store: store::Main,
postings_lists_store: store::PostingsLists,
synonym_store: store::Synonyms,
) -> MResult<(Vec<AutomatonGroup>, QueryEnhancer)> {
let has_end_whitespace = query.chars().last().map_or(false, char::is_whitespace);
let query_words: Vec<_> = split_query_string(query).map(str::to_lowercase).collect();
let synonyms = match main_store.synonyms_fst(reader)? {
Some(synonym) => synonym,
None => fst::Set::default(),
};
let mut automaton_index = 0;
let mut automatons = Vec::new();
let mut enhancer_builder = QueryEnhancerBuilder::new(&query_words);
// We must not declare the original words to the query enhancer
// *but* we need to push them in the automatons list first
let mut original_automatons = Vec::new();
let mut original_words = query_words.iter().peekable();
while let Some(word) = original_words.next() {
let has_following_word = original_words.peek().is_some();
let not_prefix_dfa = has_following_word || has_end_whitespace || word.chars().all(is_cjk);
let automaton = if not_prefix_dfa {
Automaton::exact(automaton_index, 1, word)
} else {
Automaton::prefix_exact(automaton_index, 1, word)
};
automaton_index += 1;
original_automatons.push(automaton);
}
automatons.push(AutomatonGroup::normal(original_automatons));
for n in 1..=NGRAMS {
let mut ngrams = query_words.windows(n).enumerate().peekable();
while let Some((query_index, ngram_slice)) = ngrams.next() {
let query_range = query_index..query_index + n;
let ngram_nb_words = ngram_slice.len();
let ngram = ngram_slice.join(" ");
let has_following_word = ngrams.peek().is_some();
let not_prefix_dfa =
has_following_word || has_end_whitespace || ngram.chars().all(is_cjk);
// automaton of synonyms of the ngrams
let normalized = normalize_str(&ngram);
let lev = if not_prefix_dfa {
build_dfa(&normalized)
} else {
build_prefix_dfa(&normalized)
};
let mut stream = synonyms.search(&lev).into_stream();
while let Some(base) = stream.next() {
// only trigger alternatives when the last word has been typed
// i.e. "new " do not but "new yo" triggers alternatives to "new york"
let base = std::str::from_utf8(base).unwrap();
let base_nb_words = split_query_string(base).count();
if ngram_nb_words != base_nb_words {
continue;
}
if let Some(synonyms) = synonym_store.synonyms(reader, base.as_bytes())? {
let mut stream = synonyms.into_stream();
while let Some(synonyms) = stream.next() {
let synonyms = std::str::from_utf8(synonyms).unwrap();
let synonyms_words: Vec<_> = split_query_string(synonyms).collect();
let nb_synonym_words = synonyms_words.len();
let real_query_index = automaton_index;
enhancer_builder.declare(
query_range.clone(),
real_query_index,
&synonyms_words,
);
for synonym in synonyms_words {
let automaton = if nb_synonym_words == 1 {
Automaton::exact(automaton_index, n, synonym)
} else {
Automaton::non_exact(automaton_index, n, synonym)
};
automaton_index += 1;
automatons.push(AutomatonGroup::normal(vec![automaton]));
}
}
}
}
if n == 1 {
if let Some((left, right)) =
split_best_frequency(reader, &normalized, postings_lists_store)?
{
let a = Automaton::exact(automaton_index, 1, left);
enhancer_builder.declare(query_range.clone(), automaton_index, &[left]);
automaton_index += 1;
let b = Automaton::exact(automaton_index, 1, right);
enhancer_builder.declare(query_range.clone(), automaton_index, &[left]);
automaton_index += 1;
automatons.push(AutomatonGroup::phrase_query(vec![a, b]));
}
} else {
// automaton of concatenation of query words
let concat = ngram_slice.concat();
let normalized = normalize_str(&concat);
let real_query_index = automaton_index;
enhancer_builder.declare(query_range.clone(), real_query_index, &[&normalized]);
let automaton = Automaton::exact(automaton_index, n, &normalized);
automaton_index += 1;
automatons.push(AutomatonGroup::normal(vec![automaton]));
}
}
}
// order automatons, the most important first,
// we keep the original automatons at the front.
automatons[1..].sort_by_key(|group| {
let a = group.automatons.first().unwrap();
(
Reverse(a.is_exact),
a.ngram,
Reverse(group.automatons.len()),
)
});
Ok((automatons, enhancer_builder.build()))
}

View File

@ -0,0 +1,423 @@
use std::cmp::Ordering::{Equal, Greater, Less};
use std::ops::Range;
/// Return `true` if the specified range can accept the given replacements words.
/// Returns `false` if the replacements words are already present in the original query
/// or if there is fewer replacement words than the range to replace.
//
//
// ## Ignored because already present in original
//
// new york city subway
// -------- ^^^^
// / \
// [new york city]
//
//
// ## Ignored because smaller than the original
//
// new york city subway
// -------------
// \ /
// [new york]
//
//
// ## Accepted because bigger than the original
//
// NYC subway
// ---
// / \
// / \
// / \
// / \
// / \
// [new york city]
//
fn rewrite_range_with<S, T>(query: &[S], range: Range<usize>, words: &[T]) -> bool
where
S: AsRef<str>,
T: AsRef<str>,
{
if words.len() <= range.len() {
// there is fewer or equal replacement words
// than there is already in the replaced range
return false;
}
// retrieve the part to rewrite but with the length
// of the replacement part
let original = query.iter().skip(range.start).take(words.len());
// check if the original query doesn't already contain
// the replacement words
!original
.map(AsRef::as_ref)
.eq(words.iter().map(AsRef::as_ref))
}
type Origin = usize;
type RealLength = usize;
struct FakeIntervalTree {
intervals: Vec<(Range<usize>, (Origin, RealLength))>,
}
impl FakeIntervalTree {
fn new(mut intervals: Vec<(Range<usize>, (Origin, RealLength))>) -> FakeIntervalTree {
intervals.sort_unstable_by_key(|(r, _)| (r.start, r.end));
FakeIntervalTree { intervals }
}
fn query(&self, point: usize) -> Option<(Range<usize>, (Origin, RealLength))> {
let element = self.intervals.binary_search_by(|(r, _)| {
if point >= r.start {
if point < r.end {
Equal
} else {
Less
}
} else {
Greater
}
});
let n = match element {
Ok(n) => n,
Err(n) => n,
};
match self.intervals.get(n) {
Some((range, value)) if range.contains(&point) => Some((range.clone(), *value)),
_otherwise => None,
}
}
}
pub struct QueryEnhancerBuilder<'a, S> {
query: &'a [S],
origins: Vec<usize>,
real_to_origin: Vec<(Range<usize>, (Origin, RealLength))>,
}
impl<S: AsRef<str>> QueryEnhancerBuilder<'_, S> {
pub fn new(query: &[S]) -> QueryEnhancerBuilder<S> {
// we initialize origins query indices based on their positions
let origins: Vec<_> = (0..=query.len()).collect();
let real_to_origin = origins.iter().map(|&o| (o..o + 1, (o, 1))).collect();
QueryEnhancerBuilder {
query,
origins,
real_to_origin,
}
}
/// Update the final real to origin query indices mapping.
///
/// `range` is the original words range that this `replacement` words replace
/// and `real` is the first real query index of these replacement words.
pub fn declare<T>(&mut self, range: Range<usize>, real: usize, replacement: &[T])
where
T: AsRef<str>,
{
// check if the range of original words
// can be rewritten with the replacement words
if rewrite_range_with(self.query, range.clone(), replacement) {
// this range can be replaced so we need to
// modify the origins accordingly
let offset = replacement.len() - range.len();
let previous_padding = self.origins[range.end - 1];
let current_offset = (self.origins[range.end] - 1) - previous_padding;
let diff = offset.saturating_sub(current_offset);
self.origins[range.end] += diff;
for r in &mut self.origins[range.end + 1..] {
*r += diff;
}
}
// we need to store the real number and origins relations
// this way it will be possible to know by how many
// we need to pad real query indices
let real_range = real..real + replacement.len().max(range.len());
let real_length = replacement.len();
self.real_to_origin
.push((real_range, (range.start, real_length)));
}
pub fn build(self) -> QueryEnhancer {
QueryEnhancer {
origins: self.origins,
real_to_origin: FakeIntervalTree::new(self.real_to_origin),
}
}
}
pub struct QueryEnhancer {
origins: Vec<usize>,
real_to_origin: FakeIntervalTree,
}
impl QueryEnhancer {
/// Returns the query indices to use to replace this real query index.
pub fn replacement(&self, real: u32) -> Range<u32> {
let real = real as usize;
// query the fake interval tree with the real query index
let (range, (origin, real_length)) = self
.real_to_origin
.query(real)
.expect("real has never been declared");
// if `real` is the end bound of the range
if (range.start + real_length - 1) == real {
let mut count = range.len();
let mut new_origin = origin;
for (i, slice) in self.origins[new_origin..].windows(2).enumerate() {
let len = slice[1] - slice[0];
count = count.saturating_sub(len);
if count == 0 {
new_origin = origin + i;
break;
}
}
let n = real - range.start;
let start = self.origins[origin];
let end = self.origins[new_origin + 1];
let remaining = (end - start) - n;
Range {
start: (start + n) as u32,
end: (start + n + remaining) as u32,
}
} else {
// just return the origin along with
// the real position of the word
let n = real as usize - range.start;
let origin = self.origins[origin];
Range {
start: (origin + n) as u32,
end: (origin + n + 1) as u32,
}
}
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn original_unmodified() {
let query = ["new", "york", "city", "subway"];
// 0 1 2 3
let mut builder = QueryEnhancerBuilder::new(&query);
// new york = new york city
builder.declare(0..2, 4, &["new", "york", "city"]);
// ^ 4 5 6
let enhancer = builder.build();
assert_eq!(enhancer.replacement(0), 0..1); // new
assert_eq!(enhancer.replacement(1), 1..2); // york
assert_eq!(enhancer.replacement(2), 2..3); // city
assert_eq!(enhancer.replacement(3), 3..4); // subway
assert_eq!(enhancer.replacement(4), 0..1); // new
assert_eq!(enhancer.replacement(5), 1..2); // york
assert_eq!(enhancer.replacement(6), 2..3); // city
}
#[test]
fn simple_growing() {
let query = ["new", "york", "subway"];
// 0 1 2
let mut builder = QueryEnhancerBuilder::new(&query);
// new york = new york city
builder.declare(0..2, 3, &["new", "york", "city"]);
// ^ 3 4 5
let enhancer = builder.build();
assert_eq!(enhancer.replacement(0), 0..1); // new
assert_eq!(enhancer.replacement(1), 1..3); // york
assert_eq!(enhancer.replacement(2), 3..4); // subway
assert_eq!(enhancer.replacement(3), 0..1); // new
assert_eq!(enhancer.replacement(4), 1..2); // york
assert_eq!(enhancer.replacement(5), 2..3); // city
}
#[test]
fn same_place_growings() {
let query = ["NY", "subway"];
// 0 1
let mut builder = QueryEnhancerBuilder::new(&query);
// NY = new york
builder.declare(0..1, 2, &["new", "york"]);
// ^ 2 3
// NY = new york city
builder.declare(0..1, 4, &["new", "york", "city"]);
// ^ 4 5 6
// NY = NYC
builder.declare(0..1, 7, &["NYC"]);
// ^ 7
// NY = new york city
builder.declare(0..1, 8, &["new", "york", "city"]);
// ^ 8 9 10
// subway = underground train
builder.declare(1..2, 11, &["underground", "train"]);
// ^ 11 12
let enhancer = builder.build();
assert_eq!(enhancer.replacement(0), 0..3); // NY
assert_eq!(enhancer.replacement(1), 3..5); // subway
assert_eq!(enhancer.replacement(2), 0..1); // new
assert_eq!(enhancer.replacement(3), 1..3); // york
assert_eq!(enhancer.replacement(4), 0..1); // new
assert_eq!(enhancer.replacement(5), 1..2); // york
assert_eq!(enhancer.replacement(6), 2..3); // city
assert_eq!(enhancer.replacement(7), 0..3); // NYC
assert_eq!(enhancer.replacement(8), 0..1); // new
assert_eq!(enhancer.replacement(9), 1..2); // york
assert_eq!(enhancer.replacement(10), 2..3); // city
assert_eq!(enhancer.replacement(11), 3..4); // underground
assert_eq!(enhancer.replacement(12), 4..5); // train
}
#[test]
fn bigger_growing() {
let query = ["NYC", "subway"];
// 0 1
let mut builder = QueryEnhancerBuilder::new(&query);
// NYC = new york city
builder.declare(0..1, 2, &["new", "york", "city"]);
// ^ 2 3 4
let enhancer = builder.build();
assert_eq!(enhancer.replacement(0), 0..3); // NYC
assert_eq!(enhancer.replacement(1), 3..4); // subway
assert_eq!(enhancer.replacement(2), 0..1); // new
assert_eq!(enhancer.replacement(3), 1..2); // york
assert_eq!(enhancer.replacement(4), 2..3); // city
}
#[test]
fn middle_query_growing() {
let query = ["great", "awesome", "NYC", "subway"];
// 0 1 2 3
let mut builder = QueryEnhancerBuilder::new(&query);
// NYC = new york city
builder.declare(2..3, 4, &["new", "york", "city"]);
// ^ 4 5 6
let enhancer = builder.build();
assert_eq!(enhancer.replacement(0), 0..1); // great
assert_eq!(enhancer.replacement(1), 1..2); // awesome
assert_eq!(enhancer.replacement(2), 2..5); // NYC
assert_eq!(enhancer.replacement(3), 5..6); // subway
assert_eq!(enhancer.replacement(4), 2..3); // new
assert_eq!(enhancer.replacement(5), 3..4); // york
assert_eq!(enhancer.replacement(6), 4..5); // city
}
#[test]
fn end_query_growing() {
let query = ["NYC", "subway"];
// 0 1
let mut builder = QueryEnhancerBuilder::new(&query);
// NYC = new york city
builder.declare(1..2, 2, &["underground", "train"]);
// ^ 2 3
let enhancer = builder.build();
assert_eq!(enhancer.replacement(0), 0..1); // NYC
assert_eq!(enhancer.replacement(1), 1..3); // subway
assert_eq!(enhancer.replacement(2), 1..2); // underground
assert_eq!(enhancer.replacement(3), 2..3); // train
}
#[test]
fn multiple_growings() {
let query = ["great", "awesome", "NYC", "subway"];
// 0 1 2 3
let mut builder = QueryEnhancerBuilder::new(&query);
// NYC = new york city
builder.declare(2..3, 4, &["new", "york", "city"]);
// ^ 4 5 6
// subway = underground train
builder.declare(3..4, 7, &["underground", "train"]);
// ^ 7 8
let enhancer = builder.build();
assert_eq!(enhancer.replacement(0), 0..1); // great
assert_eq!(enhancer.replacement(1), 1..2); // awesome
assert_eq!(enhancer.replacement(2), 2..5); // NYC
assert_eq!(enhancer.replacement(3), 5..7); // subway
assert_eq!(enhancer.replacement(4), 2..3); // new
assert_eq!(enhancer.replacement(5), 3..4); // york
assert_eq!(enhancer.replacement(6), 4..5); // city
assert_eq!(enhancer.replacement(7), 5..6); // underground
assert_eq!(enhancer.replacement(8), 6..7); // train
}
#[test]
fn multiple_probable_growings() {
let query = ["great", "awesome", "NYC", "subway"];
// 0 1 2 3
let mut builder = QueryEnhancerBuilder::new(&query);
// NYC = new york city
builder.declare(2..3, 4, &["new", "york", "city"]);
// ^ 4 5 6
// subway = underground train
builder.declare(3..4, 7, &["underground", "train"]);
// ^ 7 8
// great awesome = good
builder.declare(0..2, 9, &["good"]);
// ^ 9
// awesome NYC = NY
builder.declare(1..3, 10, &["NY"]);
// ^^ 10
// NYC subway = metro
builder.declare(2..4, 11, &["metro"]);
// ^^ 11
let enhancer = builder.build();
assert_eq!(enhancer.replacement(0), 0..1); // great
assert_eq!(enhancer.replacement(1), 1..2); // awesome
assert_eq!(enhancer.replacement(2), 2..5); // NYC
assert_eq!(enhancer.replacement(3), 5..7); // subway
assert_eq!(enhancer.replacement(4), 2..3); // new
assert_eq!(enhancer.replacement(5), 3..4); // york
assert_eq!(enhancer.replacement(6), 4..5); // city
assert_eq!(enhancer.replacement(7), 5..6); // underground
assert_eq!(enhancer.replacement(8), 6..7); // train
assert_eq!(enhancer.replacement(9), 0..2); // good
assert_eq!(enhancer.replacement(10), 1..5); // NY
assert_eq!(enhancer.replacement(11), 2..5); // metro
}
}

View File

@ -1,6 +1,6 @@
use std::cmp::Ordering;
use crate::criterion::Criterion;
use crate::RawDocument;
use std::cmp::Ordering;
#[derive(Debug, Clone, Copy)]
pub struct DocumentId;
@ -10,7 +10,7 @@ impl Criterion for DocumentId {
lhs.id.cmp(&rhs.id)
}
fn name(&self) -> &'static str {
fn name(&self) -> &str {
"DocumentId"
}
}

View File

@ -1,16 +1,40 @@
use std::cmp::Ordering;
use meilidb_schema::SchemaAttr;
use sdset::Set;
use slice_group_by::GroupBy;
use crate::criterion::Criterion;
use crate::RawDocument;
#[inline]
fn number_exact_matches(query_index: &[u32], is_exact: &[bool]) -> usize {
fn number_exact_matches(
query_index: &[u32],
attribute: &[u16],
is_exact: &[bool],
fields_counts: &Set<(SchemaAttr, u64)>,
) -> usize {
let mut count = 0;
let mut index = 0;
for group in query_index.linear_group() {
let len = group.len();
count += is_exact[index..index + len].contains(&true) as usize;
let mut found_exact = false;
for (pos, is_exact) in is_exact[index..index + len].iter().enumerate() {
if *is_exact {
found_exact = true;
let attr = &attribute[index + pos];
if let Ok(pos) = fields_counts.binary_search_by_key(attr, |(a, _)| a.0) {
let (_, count) = fields_counts[pos];
if count == 1 {
return usize::max_value();
}
}
}
}
count += found_exact as usize;
index += len;
}
@ -25,19 +49,25 @@ impl Criterion for Exact {
let lhs = {
let query_index = lhs.query_index();
let is_exact = lhs.is_exact();
number_exact_matches(query_index, is_exact)
let attribute = lhs.attribute();
let fields_counts = &lhs.fields_counts;
number_exact_matches(query_index, attribute, is_exact, fields_counts)
};
let rhs = {
let query_index = rhs.query_index();
let is_exact = rhs.is_exact();
number_exact_matches(query_index, is_exact)
let attribute = rhs.attribute();
let fields_counts = &rhs.fields_counts;
number_exact_matches(query_index, attribute, is_exact, fields_counts)
};
lhs.cmp(&rhs).reverse()
}
fn name(&self) -> &'static str {
fn name(&self) -> &str {
"Exact"
}
}
@ -52,14 +82,51 @@ mod tests {
// doc1: "souliereres rouge"
#[test]
fn easy_case() {
let query_index0 = &[0];
let is_exact0 = &[true];
let doc0 = {
let query_index = &[0];
let attribute = &[0];
let is_exact = &[true];
let fields_counts = Set::new(&[(SchemaAttr(0), 2)]).unwrap();
let query_index1 = &[0];
let is_exact1 = &[false];
number_exact_matches(query_index, attribute, is_exact, fields_counts)
};
let doc1 = {
let query_index = &[0];
let attribute = &[0];
let is_exact = &[false];
let fields_counts = Set::new(&[(SchemaAttr(0), 2)]).unwrap();
number_exact_matches(query_index, attribute, is_exact, fields_counts)
};
assert_eq!(doc0.cmp(&doc1).reverse(), Ordering::Less);
}
// typing: "soulier"
//
// doc0: { 0. "soulier" }
// doc1: { 0. "soulier bleu et blanc" }
#[test]
fn basic() {
let doc0 = {
let query_index = &[0];
let attribute = &[0];
let is_exact = &[true];
let fields_counts = Set::new(&[(SchemaAttr(0), 1)]).unwrap();
number_exact_matches(query_index, attribute, is_exact, fields_counts)
};
let doc1 = {
let query_index = &[0];
let attribute = &[0];
let is_exact = &[true];
let fields_counts = Set::new(&[(SchemaAttr(0), 4)]).unwrap();
number_exact_matches(query_index, attribute, is_exact, fields_counts)
};
let doc0 = number_exact_matches(query_index0, is_exact0);
let doc1 = number_exact_matches(query_index1, is_exact1);
assert_eq!(doc0.cmp(&doc1).reverse(), Ordering::Less);
}
}

View File

@ -1,28 +1,26 @@
mod sum_of_typos;
mod document_id;
mod exact;
mod number_of_words;
mod words_proximity;
mod sort_by_attr;
mod sum_of_typos;
mod sum_of_words_attribute;
mod sum_of_words_position;
mod exact;
mod document_id;
mod words_proximity;
use std::cmp::Ordering;
use crate::RawDocument;
use std::cmp::Ordering;
pub use self::{
sum_of_typos::SumOfTypos,
number_of_words::NumberOfWords,
document_id::DocumentId, exact::Exact, number_of_words::NumberOfWords,
sort_by_attr::SortByAttr, sum_of_typos::SumOfTypos,
sum_of_words_attribute::SumOfWordsAttribute, sum_of_words_position::SumOfWordsPosition,
words_proximity::WordsProximity,
sum_of_words_attribute::SumOfWordsAttribute,
sum_of_words_position::SumOfWordsPosition,
exact::Exact,
document_id::DocumentId,
};
pub trait Criterion: Send + Sync {
fn evaluate(&self, lhs: &RawDocument, rhs: &RawDocument) -> Ordering;
fn name(&self) -> &'static str;
fn name(&self) -> &str;
#[inline]
fn eq(&self, lhs: &RawDocument, rhs: &RawDocument) -> bool {
@ -35,7 +33,7 @@ impl<'a, T: Criterion + ?Sized + Send + Sync> Criterion for &'a T {
(**self).evaluate(lhs, rhs)
}
fn name(&self) -> &'static str {
fn name(&self) -> &str {
(**self).name()
}
@ -49,7 +47,7 @@ impl<T: Criterion + ?Sized> Criterion for Box<T> {
(**self).evaluate(lhs, rhs)
}
fn name(&self) -> &'static str {
fn name(&self) -> &str {
(**self).name()
}
@ -60,17 +58,18 @@ impl<T: Criterion + ?Sized> Criterion for Box<T> {
#[derive(Default)]
pub struct CriteriaBuilder<'a> {
inner: Vec<Box<dyn Criterion + 'a>>
inner: Vec<Box<dyn Criterion + 'a>>,
}
impl<'a> CriteriaBuilder<'a>
{
impl<'a> CriteriaBuilder<'a> {
pub fn new() -> CriteriaBuilder<'a> {
CriteriaBuilder { inner: Vec::new() }
}
pub fn with_capacity(capacity: usize) -> CriteriaBuilder<'a> {
CriteriaBuilder { inner: Vec::with_capacity(capacity) }
CriteriaBuilder {
inner: Vec::with_capacity(capacity),
}
}
pub fn reserve(&mut self, additional: usize) {
@ -78,14 +77,16 @@ impl<'a> CriteriaBuilder<'a>
}
pub fn add<C: 'a>(mut self, criterion: C) -> CriteriaBuilder<'a>
where C: Criterion,
where
C: Criterion,
{
self.push(criterion);
self
}
pub fn push<C: 'a>(&mut self, criterion: C)
where C: Criterion,
where
C: Criterion,
{
self.inner.push(Box::new(criterion));
}

View File

@ -1,7 +1,7 @@
use std::cmp::Ordering;
use slice_group_by::GroupBy;
use crate::criterion::Criterion;
use crate::RawDocument;
use slice_group_by::GroupBy;
use std::cmp::Ordering;
#[inline]
fn number_of_query_words(query_index: &[u32]) -> usize {
@ -25,7 +25,7 @@ impl Criterion for NumberOfWords {
lhs.cmp(&rhs).reverse()
}
fn name(&self) -> &'static str {
fn name(&self) -> &str {
"NumberOfWords"
}
}

View File

@ -2,8 +2,8 @@ use std::cmp::Ordering;
use std::error::Error;
use std::fmt;
use meilidb_core::{criterion::Criterion, RawDocument};
use meilidb_data::RankedMap;
use crate::criterion::Criterion;
use crate::{RankedMap, RawDocument};
use meilidb_schema::{Schema, SchemaAttr};
/// An helper struct that permit to sort documents by
@ -51,8 +51,7 @@ impl<'a> SortByAttr<'a> {
ranked_map: &'a RankedMap,
schema: &Schema,
attr_name: &str,
) -> Result<SortByAttr<'a>, SortByAttrError>
{
) -> Result<SortByAttr<'a>, SortByAttrError> {
SortByAttr::new(ranked_map, schema, attr_name, false)
}
@ -60,8 +59,7 @@ impl<'a> SortByAttr<'a> {
ranked_map: &'a RankedMap,
schema: &Schema,
attr_name: &str,
) -> Result<SortByAttr<'a>, SortByAttrError>
{
) -> Result<SortByAttr<'a>, SortByAttrError> {
SortByAttr::new(ranked_map, schema, attr_name, true)
}
@ -70,8 +68,7 @@ impl<'a> SortByAttr<'a> {
schema: &Schema,
attr_name: &str,
reversed: bool,
) -> Result<SortByAttr<'a>, SortByAttrError>
{
) -> Result<SortByAttr<'a>, SortByAttrError> {
let attr = match schema.attribute(attr_name) {
Some(attr) => attr,
None => return Err(SortByAttrError::AttributeNotFound),
@ -81,7 +78,11 @@ impl<'a> SortByAttr<'a> {
return Err(SortByAttrError::AttributeNotRegisteredForRanking);
}
Ok(SortByAttr { ranked_map, attr, reversed })
Ok(SortByAttr {
ranked_map,
attr,
reversed,
})
}
}
@ -93,15 +94,19 @@ impl<'a> Criterion for SortByAttr<'a> {
match (lhs, rhs) {
(Some(lhs), Some(rhs)) => {
let order = lhs.cmp(&rhs);
if self.reversed { order.reverse() } else { order }
},
(None, Some(_)) => Ordering::Greater,
(Some(_), None) => Ordering::Less,
(None, None) => Ordering::Equal,
if self.reversed {
order.reverse()
} else {
order
}
}
(None, Some(_)) => Ordering::Greater,
(Some(_), None) => Ordering::Less,
(None, None) => Ordering::Equal,
}
}
fn name(&self) -> &'static str {
fn name(&self) -> &str {
"SortByAttr"
}
}
@ -122,4 +127,4 @@ impl fmt::Display for SortByAttrError {
}
}
impl Error for SortByAttrError { }
impl Error for SortByAttrError {}

View File

@ -11,10 +11,10 @@ use crate::RawDocument;
#[inline]
fn custom_log10(n: u8) -> f32 {
match n {
0 => 0.0, // log(1)
1 => 0.30102, // log(2)
2 => 0.47712, // log(3)
3 => 0.60205, // log(4)
0 => 0.0, // log(1)
1 => 0.30102, // log(2)
2 => 0.47712, // log(3)
3 => 0.60205, // log(4)
_ => panic!("invalid number"),
}
}
@ -54,7 +54,7 @@ impl Criterion for SumOfTypos {
lhs.cmp(&rhs).reverse()
}
fn name(&self) -> &'static str {
fn name(&self) -> &str {
"SumOfTypos"
}
}

View File

@ -1,7 +1,7 @@
use std::cmp::Ordering;
use slice_group_by::GroupBy;
use crate::criterion::Criterion;
use crate::RawDocument;
use slice_group_by::GroupBy;
use std::cmp::Ordering;
#[inline]
fn sum_matches_attributes(query_index: &[u32], attribute: &[u16]) -> usize {
@ -36,7 +36,7 @@ impl Criterion for SumOfWordsAttribute {
lhs.cmp(&rhs)
}
fn name(&self) -> &'static str {
fn name(&self) -> &str {
"SumOfWordsAttribute"
}
}

View File

@ -1,7 +1,7 @@
use std::cmp::Ordering;
use slice_group_by::GroupBy;
use crate::criterion::Criterion;
use crate::RawDocument;
use slice_group_by::GroupBy;
use std::cmp::Ordering;
#[inline]
fn sum_matches_attribute_index(query_index: &[u32], word_index: &[u16]) -> usize {
@ -36,7 +36,7 @@ impl Criterion for SumOfWordsPosition {
lhs.cmp(&rhs)
}
fn name(&self) -> &'static str {
fn name(&self) -> &str {
"SumOfWordsPosition"
}
}

View File

@ -1,7 +1,7 @@
use std::cmp::{self, Ordering};
use slice_group_by::GroupBy;
use crate::criterion::Criterion;
use crate::RawDocument;
use slice_group_by::GroupBy;
use std::cmp::{self, Ordering};
const MAX_DISTANCE: u16 = 8;
@ -19,7 +19,9 @@ fn index_proximity(lhs: u16, rhs: u16) -> u16 {
}
fn attribute_proximity((lattr, lwi): (u16, u16), (rattr, rwi): (u16, u16)) -> u16 {
if lattr != rattr { return MAX_DISTANCE }
if lattr != rattr {
return MAX_DISTANCE;
}
index_proximity(lwi, rwi)
}
@ -42,15 +44,18 @@ fn matches_proximity(
distance: &[u8],
attribute: &[u16],
word_index: &[u16],
) -> u16
{
) -> u16 {
let mut query_index_groups = query_index.linear_group();
let mut proximity = 0;
let mut index = 0;
let get_attr_wi = |index: usize, group_len: usize| {
// retrieve the first distance group (with the lowest values)
let len = distance[index..index + group_len].linear_group().next().unwrap().len();
let len = distance[index..index + group_len]
.linear_group()
.next()
.unwrap()
.len();
let rattr = &attribute[index..index + len];
let rwi = &word_index[index..index + len];
@ -99,7 +104,7 @@ impl Criterion for WordsProximity {
lhs.cmp(&rhs)
}
fn name(&self) -> &'static str {
fn name(&self) -> &str {
"WordsProximity"
}
}
@ -110,7 +115,6 @@ mod tests {
#[test]
fn three_different_attributes() {
// "soup" "of the" "the day"
//
// { id: 0, attr: 0, attr_index: 0 }
@ -120,19 +124,21 @@ mod tests {
// { id: 3, attr: 3, attr_index: 1 }
let query_index = &[0, 1, 2, 2, 3];
let distance = &[0, 0, 0, 0, 0];
let attribute = &[0, 1, 1, 2, 3];
let word_index = &[0, 0, 1, 0, 1];
let distance = &[0, 0, 0, 0, 0];
let attribute = &[0, 1, 1, 2, 3];
let word_index = &[0, 0, 1, 0, 1];
// soup -> of = 8
// + of -> the = 1
// + the -> day = 8 (not 1)
assert_eq!(matches_proximity(query_index, distance, attribute, word_index), 17);
assert_eq!(
matches_proximity(query_index, distance, attribute, word_index),
17
);
}
#[test]
fn two_different_attributes() {
// "soup day" "soup of the day"
//
// { id: 0, attr: 0, attr_index: 0 }
@ -143,13 +149,16 @@ mod tests {
// { id: 3, attr: 1, attr_index: 3 }
let query_index = &[0, 0, 1, 2, 3, 3];
let distance = &[0, 0, 0, 0, 0, 0];
let attribute = &[0, 1, 1, 1, 0, 1];
let word_index = &[0, 0, 1, 2, 1, 3];
let distance = &[0, 0, 0, 0, 0, 0];
let attribute = &[0, 1, 1, 1, 0, 1];
let word_index = &[0, 0, 1, 2, 1, 3];
// soup -> of = 1
// + of -> the = 1
// + the -> day = 1
assert_eq!(matches_proximity(query_index, distance, attribute, word_index), 3);
assert_eq!(
matches_proximity(query_index, distance, attribute, word_index),
3
);
}
}

View File

@ -0,0 +1,844 @@
use std::collections::hash_map::{Entry, HashMap};
use std::fs::File;
use std::path::Path;
use std::sync::{Arc, RwLock};
use std::{fs, thread};
use crossbeam_channel::{Receiver, Sender};
use heed::types::{Str, Unit};
use heed::{CompactionOption, Result as ZResult};
use log::debug;
use crate::{store, update, Index, MResult};
pub type BoxUpdateFn = Box<dyn Fn(&str, update::ProcessedUpdateResult) + Send + Sync + 'static>;
type ArcSwapFn = arc_swap::ArcSwapOption<BoxUpdateFn>;
pub struct Database {
pub env: heed::Env,
common_store: heed::PolyDatabase,
indexes_store: heed::Database<Str, Unit>,
indexes: RwLock<HashMap<String, (Index, thread::JoinHandle<()>)>>,
update_fn: Arc<ArcSwapFn>,
}
macro_rules! r#break_try {
($expr:expr, $msg:tt) => {
match $expr {
core::result::Result::Ok(val) => val,
core::result::Result::Err(err) => {
log::error!(concat!($msg, ": {}"), err);
break;
}
}
};
}
pub enum UpdateEvent {
NewUpdate,
MustStop,
}
pub type UpdateEvents = Receiver<UpdateEvent>;
pub type UpdateEventsEmitter = Sender<UpdateEvent>;
fn update_awaiter(
receiver: UpdateEvents,
env: heed::Env,
index_name: &str,
update_fn: Arc<ArcSwapFn>,
index: Index,
) {
let mut receiver = receiver.into_iter();
while let Some(UpdateEvent::NewUpdate) = receiver.next() {
loop {
// instantiate a main/parent transaction
let mut writer = break_try!(env.write_txn(), "LMDB write transaction begin failed");
// retrieve the update that needs to be processed
let result = index.updates.pop_front(&mut writer);
let (update_id, update) = match break_try!(result, "pop front update failed") {
Some(value) => value,
None => {
debug!("no more updates");
writer.abort();
break;
}
};
// instantiate a nested transaction
let result = env.nested_write_txn(&mut writer);
let mut nested_writer = break_try!(result, "LMDB nested write transaction failed");
// try to apply the update to the database using the nested transaction
let result = update::update_task(&mut nested_writer, index.clone(), update_id, update);
let status = break_try!(result, "update task failed");
// commit the nested transaction if the update was successful, abort it otherwise
if status.error.is_none() {
break_try!(nested_writer.commit(), "commit nested transaction failed");
} else {
nested_writer.abort()
}
// write the result of the update in the updates-results store
let updates_results = index.updates_results;
let result = updates_results.put_update_result(&mut writer, update_id, &status);
// always commit the main/parent transaction, even if the update was unsuccessful
break_try!(result, "update result store commit failed");
break_try!(writer.commit(), "update parent transaction failed");
// call the user callback when the update and the result are written consistently
if let Some(ref callback) = *update_fn.load() {
(callback)(index_name, status);
}
}
}
debug!("update loop system stopped");
}
impl Database {
pub fn open_or_create(path: impl AsRef<Path>) -> MResult<Database> {
fs::create_dir_all(path.as_ref())?;
let env = heed::EnvOpenOptions::new()
.map_size(10 * 1024 * 1024 * 1024) // 10GB
.max_dbs(3000)
.open(path)?;
let common_store = env.create_poly_database(Some("common"))?;
let indexes_store = env.create_database::<Str, Unit>(Some("indexes"))?;
let update_fn = Arc::new(ArcSwapFn::empty());
// list all indexes that needs to be opened
let mut must_open = Vec::new();
let reader = env.read_txn()?;
for result in indexes_store.iter(&reader)? {
let (index_name, _) = result?;
must_open.push(index_name.to_owned());
}
reader.abort();
// open the previously aggregated indexes
let mut indexes = HashMap::new();
for index_name in must_open {
let (sender, receiver) = crossbeam_channel::bounded(100);
let index = match store::open(&env, &index_name, sender.clone())? {
Some(index) => index,
None => {
log::warn!(
"the index {} doesn't exist or has not all the databases",
index_name
);
continue;
}
};
let env_clone = env.clone();
let index_clone = index.clone();
let name_clone = index_name.clone();
let update_fn_clone = update_fn.clone();
let handle = thread::spawn(move || {
update_awaiter(
receiver,
env_clone,
&name_clone,
update_fn_clone,
index_clone,
)
});
// send an update notification to make sure that
// possible pre-boot updates are consumed
sender.send(UpdateEvent::NewUpdate).unwrap();
let result = indexes.insert(index_name, (index, handle));
assert!(
result.is_none(),
"The index should not have been already open"
);
}
Ok(Database {
env,
common_store,
indexes_store,
indexes: RwLock::new(indexes),
update_fn,
})
}
pub fn open_index(&self, name: impl AsRef<str>) -> Option<Index> {
let indexes_lock = self.indexes.read().unwrap();
match indexes_lock.get(name.as_ref()) {
Some((index, ..)) => Some(index.clone()),
None => None,
}
}
pub fn create_index(&self, name: impl AsRef<str>) -> MResult<Index> {
let name = name.as_ref();
let mut indexes_lock = self.indexes.write().unwrap();
match indexes_lock.entry(name.to_owned()) {
Entry::Occupied(_) => Err(crate::Error::IndexAlreadyExists),
Entry::Vacant(entry) => {
let (sender, receiver) = crossbeam_channel::bounded(100);
let index = store::create(&self.env, name, sender)?;
let mut writer = self.env.write_txn()?;
self.indexes_store.put(&mut writer, name, &())?;
let env_clone = self.env.clone();
let index_clone = index.clone();
let name_clone = name.to_owned();
let update_fn_clone = self.update_fn.clone();
let handle = thread::spawn(move || {
update_awaiter(
receiver,
env_clone,
&name_clone,
update_fn_clone,
index_clone,
)
});
writer.commit()?;
entry.insert((index.clone(), handle));
Ok(index)
}
}
}
pub fn delete_index(&self, name: impl AsRef<str>) -> MResult<bool> {
let name = name.as_ref();
let mut indexes_lock = self.indexes.write().unwrap();
match indexes_lock.remove_entry(name) {
Some((name, (index, handle))) => {
// remove the index name from the list of indexes
// and clear all the LMDB dbi
let mut writer = self.env.write_txn()?;
self.indexes_store.delete(&mut writer, &name)?;
store::clear(&mut writer, &index)?;
writer.commit()?;
// join the update loop thread to ensure it is stopped
handle.join().unwrap();
Ok(true)
}
None => Ok(false),
}
}
pub fn set_update_callback(&self, update_fn: BoxUpdateFn) {
let update_fn = Some(Arc::new(update_fn));
self.update_fn.swap(update_fn);
}
pub fn unset_update_callback(&self) {
self.update_fn.swap(None);
}
pub fn copy_and_compact_to_path<P: AsRef<Path>>(&self, path: P) -> ZResult<File> {
self.env.copy_to_path(path, CompactionOption::Enabled)
}
pub fn indexes_names(&self) -> MResult<Vec<String>> {
let indexes = self.indexes.read().unwrap();
Ok(indexes.keys().cloned().collect())
}
pub fn common_store(&self) -> heed::PolyDatabase {
self.common_store
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::update::{ProcessedUpdateResult, UpdateStatus};
use crate::DocumentId;
use serde::de::IgnoredAny;
use std::sync::mpsc;
#[test]
fn valid_updates() {
let dir = tempfile::tempdir().unwrap();
let database = Database::open_or_create(dir.path()).unwrap();
let env = &database.env;
let (sender, receiver) = mpsc::sync_channel(100);
let update_fn = move |_name: &str, update: ProcessedUpdateResult| {
sender.send(update.update_id).unwrap()
};
let index = database.create_index("test").unwrap();
database.set_update_callback(Box::new(update_fn));
let schema = {
let data = r#"
identifier = "id"
[attributes."name"]
displayed = true
indexed = true
[attributes."description"]
displayed = true
indexed = true
"#;
toml::from_str(data).unwrap()
};
let mut writer = env.write_txn().unwrap();
let _update_id = index.schema_update(&mut writer, schema).unwrap();
writer.commit().unwrap();
let mut additions = index.documents_addition();
let doc1 = serde_json::json!({
"id": 123,
"name": "Marvin",
"description": "My name is Marvin",
});
let doc2 = serde_json::json!({
"id": 234,
"name": "Kevin",
"description": "My name is Kevin",
});
additions.update_document(doc1);
additions.update_document(doc2);
let mut writer = env.write_txn().unwrap();
let update_id = additions.finalize(&mut writer).unwrap();
writer.commit().unwrap();
// block until the transaction is processed
let _ = receiver.into_iter().find(|id| *id == update_id);
let reader = env.read_txn().unwrap();
let result = index.update_status(&reader, update_id).unwrap();
assert_matches!(result, Some(UpdateStatus::Processed { content }) if content.error.is_none());
}
#[test]
fn invalid_updates() {
let dir = tempfile::tempdir().unwrap();
let database = Database::open_or_create(dir.path()).unwrap();
let env = &database.env;
let (sender, receiver) = mpsc::sync_channel(100);
let update_fn = move |_name: &str, update: ProcessedUpdateResult| {
sender.send(update.update_id).unwrap()
};
let index = database.create_index("test").unwrap();
database.set_update_callback(Box::new(update_fn));
let schema = {
let data = r#"
identifier = "id"
[attributes."name"]
displayed = true
indexed = true
[attributes."description"]
displayed = true
indexed = true
"#;
toml::from_str(data).unwrap()
};
let mut writer = env.write_txn().unwrap();
let _update_id = index.schema_update(&mut writer, schema).unwrap();
writer.commit().unwrap();
let mut additions = index.documents_addition();
let doc1 = serde_json::json!({
"id": 123,
"name": "Marvin",
"description": "My name is Marvin",
});
let doc2 = serde_json::json!({
"name": "Kevin",
"description": "My name is Kevin",
});
additions.update_document(doc1);
additions.update_document(doc2);
let mut writer = env.write_txn().unwrap();
let update_id = additions.finalize(&mut writer).unwrap();
writer.commit().unwrap();
// block until the transaction is processed
let _ = receiver.into_iter().find(|id| *id == update_id);
let reader = env.read_txn().unwrap();
let result = index.update_status(&reader, update_id).unwrap();
assert_matches!(result, Some(UpdateStatus::Processed { content }) if content.error.is_some());
}
#[test]
fn ignored_words_too_long() {
let dir = tempfile::tempdir().unwrap();
let database = Database::open_or_create(dir.path()).unwrap();
let env = &database.env;
let (sender, receiver) = mpsc::sync_channel(100);
let update_fn = move |_name: &str, update: ProcessedUpdateResult| {
sender.send(update.update_id).unwrap()
};
let index = database.create_index("test").unwrap();
database.set_update_callback(Box::new(update_fn));
let schema = {
let data = r#"
identifier = "id"
[attributes."name"]
displayed = true
indexed = true
"#;
toml::from_str(data).unwrap()
};
let mut writer = env.write_txn().unwrap();
let _update_id = index.schema_update(&mut writer, schema).unwrap();
writer.commit().unwrap();
let mut additions = index.documents_addition();
let doc1 = serde_json::json!({
"id": 123,
"name": "s̷̡̢̡̧̺̜̞͕͉͉͕̜͔̟̼̥̝͍̟̖͔͔̪͉̲̹̝̣̖͎̞̤̥͓͎̭̩͕̙̩̿̀̋̅̈́̌́̏̍̄̽͂̆̾̀̿̕̚̚͜͠͠ͅͅļ̵̨̨̨̰̦̻̳̖̳͚̬̫͚̦͖͈̲̫̣̩̥̻̙̦̱̼̠̖̻̼̘̖͉̪̜̠̙͖̙̩͔̖̯̩̲̿̽͋̔̿̍̓͂̍̿͊͆̃͗̔̎͐͌̾̆͗́̆̒̔̾̅̚̚͜͜ͅͅī̵̛̦̅̔̓͂͌̾́͂͛̎̋͐͆̽̂̋̋́̾̀̉̓̏̽́̑̀͒̇͋͛̈́̃̉̏͊̌̄̽̿̏̇͘̕̚̕p̶̧̛̛̖̯̗͕̝̗̭̱͙̖̗̟̟̐͆̊̂͐̋̓̂̈́̓͊̆͌̾̾͐͋͗͌̆̿̅͆̈́̈́̉͋̍͊͗̌̓̅̈̎̇̃̎̈́̉̐̋͑̃͘̕͘d̴̢̨̛͕̘̯͖̭̮̝̝̐̊̈̅̐̀͒̀́̈́̀͌̽͛͆͑̀̽̿͛̃̋̇̎̀́̂́͘͠͝ǫ̵̨̛̮̩̘͚̬̯̖̱͍̼͑͑̓̐́̑̿̈́̔͌̂̄͐͝ģ̶̧̜͇̣̭̺̪̺̖̻͖̮̭̣̙̻͒͊͗̓̓͒̀̀ͅ",
});
additions.update_document(doc1);
let mut writer = env.write_txn().unwrap();
let update_id = additions.finalize(&mut writer).unwrap();
writer.commit().unwrap();
// block until the transaction is processed
let _ = receiver.into_iter().find(|id| *id == update_id);
let reader = env.read_txn().unwrap();
let result = index.update_status(&reader, update_id).unwrap();
assert_matches!(result, Some(UpdateStatus::Processed { content }) if content.error.is_none());
}
#[test]
fn add_schema_attributes_at_end() {
let dir = tempfile::tempdir().unwrap();
let database = Database::open_or_create(dir.path()).unwrap();
let env = &database.env;
let (sender, receiver) = mpsc::sync_channel(100);
let update_fn = move |_name: &str, update: ProcessedUpdateResult| {
sender.send(update.update_id).unwrap()
};
let index = database.create_index("test").unwrap();
database.set_update_callback(Box::new(update_fn));
let schema = {
let data = r#"
identifier = "id"
[attributes."name"]
displayed = true
indexed = true
[attributes."description"]
displayed = true
indexed = true
"#;
toml::from_str(data).unwrap()
};
let mut writer = env.write_txn().unwrap();
let _update_id = index.schema_update(&mut writer, schema).unwrap();
writer.commit().unwrap();
let mut additions = index.documents_addition();
let doc1 = serde_json::json!({
"id": 123,
"name": "Marvin",
"description": "My name is Marvin",
});
let doc2 = serde_json::json!({
"id": 234,
"name": "Kevin",
"description": "My name is Kevin",
});
additions.update_document(doc1);
additions.update_document(doc2);
let mut writer = env.write_txn().unwrap();
let _update_id = additions.finalize(&mut writer).unwrap();
writer.commit().unwrap();
let schema = {
let data = r#"
identifier = "id"
[attributes."name"]
displayed = true
indexed = true
[attributes."description"]
displayed = true
indexed = true
[attributes."age"]
displayed = true
indexed = true
[attributes."sex"]
displayed = true
indexed = true
"#;
toml::from_str(data).unwrap()
};
let mut writer = env.write_txn().unwrap();
let update_id = index.schema_update(&mut writer, schema).unwrap();
writer.commit().unwrap();
// block until the transaction is processed
let _ = receiver.iter().find(|id| *id == update_id);
// check if it has been accepted
let reader = env.read_txn().unwrap();
let result = index.update_status(&reader, update_id).unwrap();
assert_matches!(result, Some(UpdateStatus::Processed { content }) if content.error.is_none());
reader.abort();
let mut additions = index.documents_addition();
let doc1 = serde_json::json!({
"id": 123,
"name": "Marvin",
"description": "My name is Marvin",
"age": 21,
"sex": "Male",
});
let doc2 = serde_json::json!({
"id": 234,
"name": "Kevin",
"description": "My name is Kevin",
"age": 23,
"sex": "Male",
});
additions.update_document(doc1);
additions.update_document(doc2);
let mut writer = env.write_txn().unwrap();
let update_id = additions.finalize(&mut writer).unwrap();
writer.commit().unwrap();
// block until the transaction is processed
let _ = receiver.iter().find(|id| *id == update_id);
// check if it has been accepted
let reader = env.read_txn().unwrap();
let result = index.update_status(&reader, update_id).unwrap();
assert_matches!(result, Some(UpdateStatus::Processed { content }) if content.error.is_none());
// even try to search for a document
let results = index.query_builder().query(&reader, "21 ", 0..20).unwrap();
assert_matches!(results.len(), 1);
reader.abort();
// try to introduce attributes in the middle of the schema
let schema = {
let data = r#"
identifier = "id"
[attributes."name"]
displayed = true
indexed = true
[attributes."description"]
displayed = true
indexed = true
[attributes."city"]
displayed = true
indexed = true
[attributes."age"]
displayed = true
indexed = true
[attributes."sex"]
displayed = true
indexed = true
"#;
toml::from_str(data).unwrap()
};
let mut writer = env.write_txn().unwrap();
let update_id = index.schema_update(&mut writer, schema).unwrap();
writer.commit().unwrap();
// block until the transaction is processed
let _ = receiver.iter().find(|id| *id == update_id);
// check if it has been accepted
let reader = env.read_txn().unwrap();
let result = index.update_status(&reader, update_id).unwrap();
assert_matches!(result, Some(UpdateStatus::Processed { content }) if content.error.is_some());
}
#[test]
fn deserialize_documents() {
let dir = tempfile::tempdir().unwrap();
let database = Database::open_or_create(dir.path()).unwrap();
let env = &database.env;
let (sender, receiver) = mpsc::sync_channel(100);
let update_fn = move |_name: &str, update: ProcessedUpdateResult| {
sender.send(update.update_id).unwrap()
};
let index = database.create_index("test").unwrap();
database.set_update_callback(Box::new(update_fn));
let schema = {
let data = r#"
identifier = "id"
[attributes."name"]
displayed = true
indexed = true
[attributes."description"]
displayed = true
indexed = true
"#;
toml::from_str(data).unwrap()
};
let mut writer = env.write_txn().unwrap();
let _update_id = index.schema_update(&mut writer, schema).unwrap();
writer.commit().unwrap();
let mut additions = index.documents_addition();
// DocumentId(7900334843754999545)
let doc1 = serde_json::json!({
"id": 123,
"name": "Marvin",
"description": "My name is Marvin",
});
// DocumentId(8367468610878465872)
let doc2 = serde_json::json!({
"id": 234,
"name": "Kevin",
"description": "My name is Kevin",
});
additions.update_document(doc1);
additions.update_document(doc2);
let mut writer = env.write_txn().unwrap();
let update_id = additions.finalize(&mut writer).unwrap();
writer.commit().unwrap();
// block until the transaction is processed
let _ = receiver.into_iter().find(|id| *id == update_id);
let reader = env.read_txn().unwrap();
let result = index.update_status(&reader, update_id).unwrap();
assert_matches!(result, Some(UpdateStatus::Processed { content }) if content.error.is_none());
let document: Option<IgnoredAny> = index.document(&reader, None, DocumentId(25)).unwrap();
assert!(document.is_none());
let document: Option<IgnoredAny> = index
.document(&reader, None, DocumentId(7900334843754999545))
.unwrap();
assert!(document.is_some());
let document: Option<IgnoredAny> = index
.document(&reader, None, DocumentId(8367468610878465872))
.unwrap();
assert!(document.is_some());
}
#[test]
fn partial_document_update() {
let dir = tempfile::tempdir().unwrap();
let database = Database::open_or_create(dir.path()).unwrap();
let env = &database.env;
let (sender, receiver) = mpsc::sync_channel(100);
let update_fn = move |_name: &str, update: ProcessedUpdateResult| {
sender.send(update.update_id).unwrap()
};
let index = database.create_index("test").unwrap();
database.set_update_callback(Box::new(update_fn));
let schema = {
let data = r#"
identifier = "id"
[attributes."id"]
displayed = true
[attributes."name"]
displayed = true
indexed = true
[attributes."description"]
displayed = true
indexed = true
"#;
toml::from_str(data).unwrap()
};
let mut writer = env.write_txn().unwrap();
let _update_id = index.schema_update(&mut writer, schema).unwrap();
writer.commit().unwrap();
let mut additions = index.documents_addition();
// DocumentId(7900334843754999545)
let doc1 = serde_json::json!({
"id": 123,
"name": "Marvin",
"description": "My name is Marvin",
});
// DocumentId(8367468610878465872)
let doc2 = serde_json::json!({
"id": 234,
"name": "Kevin",
"description": "My name is Kevin",
});
additions.update_document(doc1);
additions.update_document(doc2);
let mut writer = env.write_txn().unwrap();
let update_id = additions.finalize(&mut writer).unwrap();
writer.commit().unwrap();
// block until the transaction is processed
let _ = receiver.iter().find(|id| *id == update_id);
let reader = env.read_txn().unwrap();
let result = index.update_status(&reader, update_id).unwrap();
assert_matches!(result, Some(UpdateStatus::Processed { content }) if content.error.is_none());
let document: Option<IgnoredAny> = index.document(&reader, None, DocumentId(25)).unwrap();
assert!(document.is_none());
let document: Option<IgnoredAny> = index
.document(&reader, None, DocumentId(7900334843754999545))
.unwrap();
assert!(document.is_some());
let document: Option<IgnoredAny> = index
.document(&reader, None, DocumentId(8367468610878465872))
.unwrap();
assert!(document.is_some());
reader.abort();
let mut partial_additions = index.documents_partial_addition();
// DocumentId(7900334843754999545)
let partial_doc1 = serde_json::json!({
"id": 123,
"description": "I am the new Marvin",
});
// DocumentId(8367468610878465872)
let partial_doc2 = serde_json::json!({
"id": 234,
"description": "I am the new Kevin",
});
partial_additions.update_document(partial_doc1);
partial_additions.update_document(partial_doc2);
let mut writer = env.write_txn().unwrap();
let update_id = partial_additions.finalize(&mut writer).unwrap();
writer.commit().unwrap();
// block until the transaction is processed
let _ = receiver.iter().find(|id| *id == update_id);
let reader = env.read_txn().unwrap();
let result = index.update_status(&reader, update_id).unwrap();
assert_matches!(result, Some(UpdateStatus::Processed { content }) if content.error.is_none());
let document: Option<serde_json::Value> = index
.document(&reader, None, DocumentId(7900334843754999545))
.unwrap();
let new_doc1 = serde_json::json!({
"id": 123,
"name": "Marvin",
"description": "I am the new Marvin",
});
assert_eq!(document, Some(new_doc1));
let document: Option<serde_json::Value> = index
.document(&reader, None, DocumentId(8367468610878465872))
.unwrap();
let new_doc2 = serde_json::json!({
"id": 234,
"name": "Kevin",
"description": "I am the new Kevin",
});
assert_eq!(document, Some(new_doc2));
}
#[test]
fn delete_index() {
let dir = tempfile::tempdir().unwrap();
let database = Database::open_or_create(dir.path()).unwrap();
let _index = database.create_index("test").unwrap();
let deleted = database.delete_index("test").unwrap();
assert!(deleted);
let result = database.open_index("test");
assert!(result.is_none());
}
}

View File

@ -1,5 +1,5 @@
use std::hash::Hash;
use hashbrown::HashMap;
use std::hash::Hash;
pub struct DistinctMap<K> {
inner: HashMap<K, usize>,

117
meilidb-core/src/error.rs Normal file
View File

@ -0,0 +1,117 @@
use crate::serde::{DeserializerError, SerializerError};
use serde_json::Error as SerdeJsonError;
use std::{error, fmt, io};
pub type MResult<T> = Result<T, Error>;
#[derive(Debug)]
pub enum Error {
Io(io::Error),
IndexAlreadyExists,
SchemaDiffer,
SchemaMissing,
WordIndexMissing,
MissingDocumentId,
Zlmdb(heed::Error),
Fst(fst::Error),
SerdeJson(SerdeJsonError),
Bincode(bincode::Error),
Serializer(SerializerError),
Deserializer(DeserializerError),
UnsupportedOperation(UnsupportedOperation),
}
impl From<io::Error> for Error {
fn from(error: io::Error) -> Error {
Error::Io(error)
}
}
impl From<heed::Error> for Error {
fn from(error: heed::Error) -> Error {
Error::Zlmdb(error)
}
}
impl From<fst::Error> for Error {
fn from(error: fst::Error) -> Error {
Error::Fst(error)
}
}
impl From<SerdeJsonError> for Error {
fn from(error: SerdeJsonError) -> Error {
Error::SerdeJson(error)
}
}
impl From<bincode::Error> for Error {
fn from(error: bincode::Error) -> Error {
Error::Bincode(error)
}
}
impl From<SerializerError> for Error {
fn from(error: SerializerError) -> Error {
Error::Serializer(error)
}
}
impl From<DeserializerError> for Error {
fn from(error: DeserializerError) -> Error {
Error::Deserializer(error)
}
}
impl From<UnsupportedOperation> for Error {
fn from(op: UnsupportedOperation) -> Error {
Error::UnsupportedOperation(op)
}
}
impl fmt::Display for Error {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
use self::Error::*;
match self {
Io(e) => write!(f, "{}", e),
IndexAlreadyExists => write!(f, "index already exists"),
SchemaDiffer => write!(f, "schemas differ"),
SchemaMissing => write!(f, "this index does not have a schema"),
WordIndexMissing => write!(f, "this index does not have a word index"),
MissingDocumentId => write!(f, "document id is missing"),
Zlmdb(e) => write!(f, "heed error; {}", e),
Fst(e) => write!(f, "fst error; {}", e),
SerdeJson(e) => write!(f, "serde json error; {}", e),
Bincode(e) => write!(f, "bincode error; {}", e),
Serializer(e) => write!(f, "serializer error; {}", e),
Deserializer(e) => write!(f, "deserializer error; {}", e),
UnsupportedOperation(op) => write!(f, "unsupported operation; {}", op),
}
}
}
impl error::Error for Error {}
#[derive(Debug)]
pub enum UnsupportedOperation {
SchemaAlreadyExists,
CannotUpdateSchemaIdentifier,
CannotReorderSchemaAttribute,
CanOnlyIntroduceNewSchemaAttributesAtEnd,
CannotRemoveSchemaAttribute,
}
impl fmt::Display for UnsupportedOperation {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
use self::UnsupportedOperation::*;
match self {
SchemaAlreadyExists => write!(f, "Cannot update index which already have a schema"),
CannotUpdateSchemaIdentifier => write!(f, "Cannot update the identifier of a schema"),
CannotReorderSchemaAttribute => write!(f, "Cannot reorder the attributes of a schema"),
CanOnlyIntroduceNewSchemaAttributesAtEnd => {
write!(f, "Can only introduce new attributes at end of a schema")
}
CannotRemoveSchemaAttribute => write!(f, "Cannot remove attributes from a schema"),
}
}
}

View File

@ -0,0 +1,134 @@
use std::cmp::min;
use std::collections::BTreeMap;
use std::ops::{Index, IndexMut};
// A simple wrapper around vec so we can get contiguous but index it like it's 2D array.
struct N2Array<T> {
y_size: usize,
buf: Vec<T>,
}
impl<T: Clone> N2Array<T> {
fn new(x: usize, y: usize, value: T) -> N2Array<T> {
N2Array {
y_size: y,
buf: vec![value; x * y],
}
}
}
impl<T> Index<(usize, usize)> for N2Array<T> {
type Output = T;
#[inline]
fn index(&self, (x, y): (usize, usize)) -> &T {
&self.buf[(x * self.y_size) + y]
}
}
impl<T> IndexMut<(usize, usize)> for N2Array<T> {
#[inline]
fn index_mut(&mut self, (x, y): (usize, usize)) -> &mut T {
&mut self.buf[(x * self.y_size) + y]
}
}
pub fn prefix_damerau_levenshtein(source: &[u8], target: &[u8]) -> (u32, usize) {
let (n, m) = (source.len(), target.len());
assert!(
n <= m,
"the source string must be shorter than the target one"
);
if n == 0 {
return (m as u32, 0);
}
if m == 0 {
return (n as u32, 0);
}
if n == m && source == target {
return (0, m);
}
let inf = n + m;
let mut matrix = N2Array::new(n + 2, m + 2, 0);
matrix[(0, 0)] = inf;
for i in 0..n + 1 {
matrix[(i + 1, 0)] = inf;
matrix[(i + 1, 1)] = i;
}
for j in 0..m + 1 {
matrix[(0, j + 1)] = inf;
matrix[(1, j + 1)] = j;
}
let mut last_row = BTreeMap::new();
for (row, char_s) in source.iter().enumerate() {
let mut last_match_col = 0;
let row = row + 1;
for (col, char_t) in target.iter().enumerate() {
let col = col + 1;
let last_match_row = *last_row.get(&char_t).unwrap_or(&0);
let cost = if char_s == char_t { 0 } else { 1 };
let dist_add = matrix[(row, col + 1)] + 1;
let dist_del = matrix[(row + 1, col)] + 1;
let dist_sub = matrix[(row, col)] + cost;
let dist_trans = matrix[(last_match_row, last_match_col)]
+ (row - last_match_row - 1)
+ 1
+ (col - last_match_col - 1);
let dist = min(min(dist_add, dist_del), min(dist_sub, dist_trans));
matrix[(row + 1, col + 1)] = dist;
if cost == 0 {
last_match_col = col;
}
}
last_row.insert(char_s, row);
}
let mut minimum = (u32::max_value(), 0);
for x in n..=m {
let dist = matrix[(n + 1, x + 1)] as u32;
if dist < minimum.0 {
minimum = (dist, x)
}
}
minimum
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn matched_length() {
let query = "Levenste";
let text = "Levenshtein";
let (dist, length) = prefix_damerau_levenshtein(query.as_bytes(), text.as_bytes());
assert_eq!(dist, 1);
assert_eq!(&text[..length], "Levenshte");
}
#[test]
#[should_panic]
fn matched_length_panic() {
let query = "Levenshtein";
let text = "Levenste";
// this function will panic if source if longer than target
prefix_damerau_levenshtein(query.as_bytes(), text.as_bytes());
}
}

View File

@ -1,85 +1,31 @@
#![feature(checked_duration_since)]
#[cfg(test)]
#[macro_use] extern crate assert_matches;
#[macro_use]
extern crate assert_matches;
mod automaton;
mod distinct_map;
mod query_builder;
mod query_enhancer;
mod raw_document;
mod reordered_attrs;
mod store;
pub mod criterion;
mod database;
mod distinct_map;
mod error;
mod levenshtein;
mod number;
mod query_builder;
mod ranked_map;
mod raw_document;
pub mod raw_indexer;
mod reordered_attrs;
pub mod serde;
pub mod store;
mod update;
use serde::{Serialize, Deserialize};
use zerocopy::{AsBytes, FromBytes};
use self::raw_document::raw_documents_from;
pub use self::query_builder::{QueryBuilder, DistinctQueryBuilder, normalize_str};
pub use self::database::{BoxUpdateFn, Database};
pub use self::error::{Error, MResult};
pub use self::number::{Number, ParseNumberError};
pub use self::ranked_map::RankedMap;
pub use self::raw_document::RawDocument;
pub use self::store::Store;
/// Represent an internally generated document unique identifier.
///
/// It is used to inform the database the document you want to deserialize.
/// Helpful for custom ranking.
#[derive(Debug, Copy, Clone, Eq, PartialEq, PartialOrd, Ord, Hash)]
#[derive(Serialize, Deserialize)]
#[derive(AsBytes, FromBytes)]
#[repr(C)]
pub struct DocumentId(pub u64);
/// This structure represent the position of a word
/// in a document and its attributes.
///
/// This is stored in the map, generated at index time,
/// extracted and interpreted at search time.
#[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
#[derive(AsBytes, FromBytes)]
#[repr(C)]
pub struct DocIndex {
/// The document identifier where the word was found.
pub document_id: DocumentId,
/// The attribute in the document where the word was found
/// along with the index in it.
pub attribute: u16,
pub word_index: u16,
/// The position in bytes where the word was found
/// along with the length of it.
///
/// It informs on the original word area in the text indexed
/// without needing to run the tokenizer again.
pub char_index: u16,
pub char_length: u16,
}
/// This structure represent a matching word with informations
/// on the location of the word in the document.
///
/// The order of the field is important because it defines
/// the way these structures are ordered between themselves.
#[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub struct Highlight {
/// The attribute in the document where the word was found
/// along with the index in it.
pub attribute: u16,
/// The position in bytes where the word was found.
///
/// It informs on the original word area in the text indexed
/// without needing to run the tokenizer again.
pub char_index: u16,
/// The length in bytes of the found word.
///
/// It informs on the original word area in the text indexed
/// without needing to run the tokenizer again.
pub char_length: u16,
}
pub use self::store::Index;
pub use self::update::{EnqueuedUpdateResult, ProcessedUpdateResult, UpdateStatus, UpdateType};
pub use meilidb_types::{DocIndex, DocumentId, Highlight};
#[doc(hidden)]
#[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
@ -103,7 +49,10 @@ pub struct Document {
impl Document {
#[cfg(not(test))]
fn from_raw(raw: RawDocument) -> Document {
Document { id: raw.id, highlights: raw.highlights }
Document {
id: raw.id,
highlights: raw.highlights,
}
}
#[cfg(test)]
@ -128,7 +77,11 @@ impl Document {
matches.push(match_);
}
Document { id: raw.id, matches, highlights: raw.highlights }
Document {
id: raw.id,
matches,
highlights: raw.highlights,
}
}
}

View File

@ -1,12 +1,11 @@
use std::num::{ParseIntError, ParseFloatError};
use std::str::FromStr;
use std::fmt;
use std::num::{ParseFloatError, ParseIntError};
use std::str::FromStr;
use ordered_float::OrderedFloat;
use serde::{Serialize, Deserialize};
use serde::{Deserialize, Serialize};
#[derive(Serialize, Deserialize)]
#[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
#[derive(Serialize, Deserialize, Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub enum Number {
Unsigned(u64),
Signed(i64),
@ -32,7 +31,11 @@ impl FromStr for Number {
Err(error) => error,
};
Err(ParseNumberError { uint_error, int_error, float_error })
Err(ParseNumberError {
uint_error,
int_error,
float_error,
})
}
}
@ -46,10 +49,17 @@ pub struct ParseNumberError {
impl fmt::Display for ParseNumberError {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
if self.uint_error == self.int_error {
write!(f, "can not parse number: {}, {}", self.uint_error, self.float_error)
write!(
f,
"can not parse number: {}, {}",
self.uint_error, self.float_error
)
} else {
write!(f, "can not parse number: {}, {}, {}",
self.uint_error, self.int_error, self.float_error)
write!(
f,
"can not parse number: {}, {}, {}",
self.uint_error, self.int_error, self.float_error
)
}
}
}

File diff suppressed because it is too large Load Diff

View File

@ -1,12 +1,13 @@
use std::io::{Read, Write};
use hashbrown::HashMap;
use meilidb_core::DocumentId;
use meilidb_schema::SchemaAttr;
use serde::{Deserialize, Serialize};
use crate::Number;
use crate::{DocumentId, Number};
#[derive(Debug, Default, Clone, PartialEq, Eq)]
#[derive(Debug, Default, Clone, PartialEq, Eq, Serialize, Deserialize)]
#[serde(transparent)]
pub struct RankedMap(HashMap<(DocumentId, SchemaAttr), Number>);
impl RankedMap {
@ -14,6 +15,10 @@ impl RankedMap {
self.0.len()
}
pub fn is_empty(&self) -> bool {
self.0.is_empty()
}
pub fn insert(&mut self, document: DocumentId, attribute: SchemaAttr, number: Number) {
self.0.insert((document, attribute), number);
}

View File

@ -1,26 +1,32 @@
use std::sync::Arc;
use std::fmt;
use std::sync::Arc;
use meilidb_schema::SchemaAttr;
use sdset::SetBuf;
use slice_group_by::GroupBy;
use crate::{TmpMatch, DocumentId, Highlight};
use crate::{DocumentId, Highlight, TmpMatch};
#[derive(Clone)]
pub struct RawDocument {
pub id: DocumentId,
pub matches: SharedMatches,
pub highlights: Vec<Highlight>,
pub fields_counts: SetBuf<(SchemaAttr, u64)>,
}
impl RawDocument {
fn new(id: DocumentId, matches: SharedMatches, highlights: Vec<Highlight>) -> RawDocument {
RawDocument { id, matches, highlights }
}
pub fn query_index(&self) -> &[u32] {
let r = self.matches.range;
// it is safe because construction/modifications
// can only be done in this module
unsafe { &self.matches.matches.query_index.get_unchecked(r.start..r.end) }
unsafe {
&self
.matches
.matches
.query_index
.get_unchecked(r.start..r.end)
}
}
pub fn distance(&self) -> &[u8] {
@ -41,7 +47,13 @@ impl RawDocument {
let r = self.matches.range;
// it is safe because construction/modifications
// can only be done in this module
unsafe { &self.matches.matches.word_index.get_unchecked(r.start..r.end) }
unsafe {
&self
.matches
.matches
.word_index
.get_unchecked(r.start..r.end)
}
}
pub fn is_exact(&self) -> &[bool] {
@ -55,12 +67,32 @@ impl RawDocument {
impl fmt::Debug for RawDocument {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
f.write_str("RawDocument {\r\n")?;
f.write_fmt(format_args!("{:>15}: {:?},\r\n", "id", self.id))?;
f.write_fmt(format_args!("{:>15}: {:^5?},\r\n", "query_index", self.query_index()))?;
f.write_fmt(format_args!("{:>15}: {:^5?},\r\n", "distance", self.distance()))?;
f.write_fmt(format_args!("{:>15}: {:^5?},\r\n", "attribute", self.attribute()))?;
f.write_fmt(format_args!("{:>15}: {:^5?},\r\n", "word_index", self.word_index()))?;
f.write_fmt(format_args!("{:>15}: {:^5?},\r\n", "is_exact", self.is_exact()))?;
f.write_fmt(format_args!("{:>15}: {:?},\r\n", "id", self.id))?;
f.write_fmt(format_args!(
"{:>15}: {:^5?},\r\n",
"query_index",
self.query_index()
))?;
f.write_fmt(format_args!(
"{:>15}: {:^5?},\r\n",
"distance",
self.distance()
))?;
f.write_fmt(format_args!(
"{:>15}: {:^5?},\r\n",
"attribute",
self.attribute()
))?;
f.write_fmt(format_args!(
"{:>15}: {:^5?},\r\n",
"word_index",
self.word_index()
))?;
f.write_fmt(format_args!(
"{:>15}: {:^5?},\r\n",
"is_exact",
self.is_exact()
))?;
f.write_str("}")?;
Ok(())
}
@ -69,32 +101,45 @@ impl fmt::Debug for RawDocument {
pub fn raw_documents_from(
matches: SetBuf<(DocumentId, TmpMatch)>,
highlights: SetBuf<(DocumentId, Highlight)>,
) -> Vec<RawDocument>
{
let mut docs_ranges: Vec<(_, Range, _)> = Vec::new();
fields_counts: SetBuf<(DocumentId, SchemaAttr, u64)>,
) -> Vec<RawDocument> {
let mut docs_ranges: Vec<(_, Range, _, _)> = Vec::new();
let mut matches2 = Matches::with_capacity(matches.len());
let matches = matches.linear_group_by_key(|(id, _)| *id);
let highlights = highlights.linear_group_by_key(|(id, _)| *id);
let fields_counts = fields_counts.linear_group_by_key(|(id, _, _)| *id);
for (mgroup, hgroup) in matches.zip(highlights) {
for ((mgroup, hgroup), fgroup) in matches.zip(highlights).zip(fields_counts) {
debug_assert_eq!(mgroup[0].0, hgroup[0].0);
debug_assert_eq!(mgroup[0].0, fgroup[0].0);
let document_id = mgroup[0].0;
let start = docs_ranges.last().map(|(_, r, _)| r.end).unwrap_or(0);
let start = docs_ranges.last().map(|(_, r, _, _)| r.end).unwrap_or(0);
let end = start + mgroup.len();
let highlights = hgroup.iter().map(|(_, h)| *h).collect();
docs_ranges.push((document_id, Range { start, end }, highlights));
let fields_counts = SetBuf::new(fgroup.iter().map(|(_, a, c)| (*a, *c)).collect()).unwrap();
docs_ranges.push((document_id, Range { start, end }, highlights, fields_counts));
matches2.extend_from_slice(mgroup);
}
let matches = Arc::new(matches2);
docs_ranges.into_iter().map(|(id, range, highlights)| {
let matches = SharedMatches { range, matches: matches.clone() };
RawDocument::new(id, matches, highlights)
}).collect()
docs_ranges
.into_iter()
.map(|(id, range, highlights, fields_counts)| {
let matches = SharedMatches {
range,
matches: matches.clone(),
};
RawDocument {
id,
matches,
highlights,
fields_counts,
}
})
.collect()
}
#[derive(Debug, Copy, Clone)]

View File

@ -0,0 +1,271 @@
use std::collections::{BTreeMap, HashMap};
use std::convert::TryFrom;
use crate::{DocIndex, DocumentId};
use deunicode::deunicode_with_tofu;
use meilidb_schema::SchemaAttr;
use meilidb_tokenizer::{is_cjk, SeqTokenizer, Token, Tokenizer};
use sdset::SetBuf;
const WORD_LENGTH_LIMIT: usize = 80;
type Word = Vec<u8>; // TODO make it be a SmallVec
pub struct RawIndexer {
word_limit: usize, // the maximum number of indexed words
stop_words: fst::Set,
words_doc_indexes: BTreeMap<Word, Vec<DocIndex>>,
docs_words: HashMap<DocumentId, Vec<Word>>,
}
pub struct Indexed {
pub words_doc_indexes: BTreeMap<Word, SetBuf<DocIndex>>,
pub docs_words: HashMap<DocumentId, fst::Set>,
}
impl RawIndexer {
pub fn new(stop_words: fst::Set) -> RawIndexer {
RawIndexer::with_word_limit(stop_words, 1000)
}
pub fn with_word_limit(stop_words: fst::Set, limit: usize) -> RawIndexer {
RawIndexer {
word_limit: limit,
stop_words,
words_doc_indexes: BTreeMap::new(),
docs_words: HashMap::new(),
}
}
pub fn index_text(&mut self, id: DocumentId, attr: SchemaAttr, text: &str) -> usize {
let mut number_of_words = 0;
for token in Tokenizer::new(text) {
let must_continue = index_token(
token,
id,
attr,
self.word_limit,
&self.stop_words,
&mut self.words_doc_indexes,
&mut self.docs_words,
);
number_of_words += 1;
if !must_continue {
break;
}
}
number_of_words
}
pub fn index_text_seq<'a, I>(&mut self, id: DocumentId, attr: SchemaAttr, iter: I)
where
I: IntoIterator<Item = &'a str>,
{
let iter = iter.into_iter();
for token in SeqTokenizer::new(iter) {
let must_continue = index_token(
token,
id,
attr,
self.word_limit,
&self.stop_words,
&mut self.words_doc_indexes,
&mut self.docs_words,
);
if !must_continue {
break;
}
}
}
pub fn build(self) -> Indexed {
let words_doc_indexes = self
.words_doc_indexes
.into_iter()
.map(|(word, indexes)| (word, SetBuf::from_dirty(indexes)))
.collect();
let docs_words = self
.docs_words
.into_iter()
.map(|(id, mut words)| {
words.sort_unstable();
words.dedup();
(id, fst::Set::from_iter(words).unwrap())
})
.collect();
Indexed {
words_doc_indexes,
docs_words,
}
}
}
fn index_token(
token: Token,
id: DocumentId,
attr: SchemaAttr,
word_limit: usize,
stop_words: &fst::Set,
words_doc_indexes: &mut BTreeMap<Word, Vec<DocIndex>>,
docs_words: &mut HashMap<DocumentId, Vec<Word>>,
) -> bool {
if token.word_index >= word_limit {
return false;
}
let lower = token.word.to_lowercase();
let token = Token {
word: &lower,
..token
};
if !stop_words.contains(&token.word) {
match token_to_docindex(id, attr, token) {
Some(docindex) => {
let word = Vec::from(token.word);
if word.len() <= WORD_LENGTH_LIMIT {
words_doc_indexes
.entry(word.clone())
.or_insert_with(Vec::new)
.push(docindex);
docs_words.entry(id).or_insert_with(Vec::new).push(word);
if !lower.contains(is_cjk) {
let unidecoded = deunicode_with_tofu(&lower, "");
if unidecoded != lower && !unidecoded.is_empty() {
let word = Vec::from(unidecoded);
if word.len() <= WORD_LENGTH_LIMIT {
words_doc_indexes
.entry(word.clone())
.or_insert_with(Vec::new)
.push(docindex);
docs_words.entry(id).or_insert_with(Vec::new).push(word);
}
}
}
}
}
None => return false,
}
}
true
}
fn token_to_docindex(id: DocumentId, attr: SchemaAttr, token: Token) -> Option<DocIndex> {
let word_index = u16::try_from(token.word_index).ok()?;
let char_index = u16::try_from(token.char_index).ok()?;
let char_length = u16::try_from(token.word.chars().count()).ok()?;
let docindex = DocIndex {
document_id: id,
attribute: attr.0,
word_index,
char_index,
char_length,
};
Some(docindex)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn strange_apostrophe() {
let mut indexer = RawIndexer::new(fst::Set::default());
let docid = DocumentId(0);
let attr = SchemaAttr(0);
let text = "Zut, laspirateur, jai oublié de léteindre !";
indexer.index_text(docid, attr, text);
let Indexed {
words_doc_indexes, ..
} = indexer.build();
assert!(words_doc_indexes.get(&b"l"[..]).is_some());
assert!(words_doc_indexes.get(&b"aspirateur"[..]).is_some());
assert!(words_doc_indexes.get(&b"ai"[..]).is_some());
assert!(words_doc_indexes.get(&b"eteindre"[..]).is_some());
assert!(words_doc_indexes
.get(&"éteindre".to_owned().into_bytes())
.is_some());
}
#[test]
fn strange_apostrophe_in_sequence() {
let mut indexer = RawIndexer::new(fst::Set::default());
let docid = DocumentId(0);
let attr = SchemaAttr(0);
let text = vec!["Zut, laspirateur, jai oublié de léteindre !"];
indexer.index_text_seq(docid, attr, text);
let Indexed {
words_doc_indexes, ..
} = indexer.build();
assert!(words_doc_indexes.get(&b"l"[..]).is_some());
assert!(words_doc_indexes.get(&b"aspirateur"[..]).is_some());
assert!(words_doc_indexes.get(&b"ai"[..]).is_some());
assert!(words_doc_indexes.get(&b"eteindre"[..]).is_some());
assert!(words_doc_indexes
.get(&"éteindre".to_owned().into_bytes())
.is_some());
}
#[test]
fn basic_stop_words() {
let stop_words = sdset::SetBuf::from_dirty(vec!["l", "j", "ai", "de"]);
let stop_words = fst::Set::from_iter(stop_words).unwrap();
let mut indexer = RawIndexer::new(stop_words);
let docid = DocumentId(0);
let attr = SchemaAttr(0);
let text = "Zut, laspirateur, jai oublié de léteindre !";
indexer.index_text(docid, attr, text);
let Indexed {
words_doc_indexes, ..
} = indexer.build();
assert!(words_doc_indexes.get(&b"l"[..]).is_none());
assert!(words_doc_indexes.get(&b"aspirateur"[..]).is_some());
assert!(words_doc_indexes.get(&b"j"[..]).is_none());
assert!(words_doc_indexes.get(&b"ai"[..]).is_none());
assert!(words_doc_indexes.get(&b"de"[..]).is_none());
assert!(words_doc_indexes.get(&b"eteindre"[..]).is_some());
assert!(words_doc_indexes
.get(&"éteindre".to_owned().into_bytes())
.is_some());
}
#[test]
fn no_empty_unidecode() {
let mut indexer = RawIndexer::new(fst::Set::default());
let docid = DocumentId(0);
let attr = SchemaAttr(0);
let text = "🇯🇵";
indexer.index_text(docid, attr, text);
let Indexed {
words_doc_indexes, ..
} = indexer.build();
assert!(words_doc_indexes
.get(&"🇯🇵".to_owned().into_bytes())
.is_some());
}
}

View File

@ -6,7 +6,10 @@ pub struct ReorderedAttrs {
impl ReorderedAttrs {
pub fn new() -> ReorderedAttrs {
ReorderedAttrs { count: 0, reorders: Vec::new() }
ReorderedAttrs {
count: 0,
reorders: Vec::new(),
}
}
pub fn insert_attribute(&mut self, attribute: u16) {

View File

@ -77,13 +77,18 @@ impl ser::Serializer for ConvertToNumber {
}
fn serialize_none(self) -> Result<Self::Ok, Self::Error> {
Err(SerializerError::UnrankableType { type_name: "Option" })
Err(SerializerError::UnrankableType {
type_name: "Option",
})
}
fn serialize_some<T: ?Sized>(self, _value: &T) -> Result<Self::Ok, Self::Error>
where T: Serialize,
where
T: Serialize,
{
Err(SerializerError::UnrankableType { type_name: "Option" })
Err(SerializerError::UnrankableType {
type_name: "Option",
})
}
fn serialize_unit(self) -> Result<Self::Ok, Self::Error> {
@ -91,25 +96,29 @@ impl ser::Serializer for ConvertToNumber {
}
fn serialize_unit_struct(self, _name: &'static str) -> Result<Self::Ok, Self::Error> {
Err(SerializerError::UnrankableType { type_name: "unit struct" })
Err(SerializerError::UnrankableType {
type_name: "unit struct",
})
}
fn serialize_unit_variant(
self,
_name: &'static str,
_variant_index: u32,
_variant: &'static str
) -> Result<Self::Ok, Self::Error>
{
Err(SerializerError::UnrankableType { type_name: "unit variant" })
_variant: &'static str,
) -> Result<Self::Ok, Self::Error> {
Err(SerializerError::UnrankableType {
type_name: "unit variant",
})
}
fn serialize_newtype_struct<T: ?Sized>(
self,
_name: &'static str,
value: &T
value: &T,
) -> Result<Self::Ok, Self::Error>
where T: Serialize,
where
T: Serialize,
{
value.serialize(self)
}
@ -119,15 +128,20 @@ impl ser::Serializer for ConvertToNumber {
_name: &'static str,
_variant_index: u32,
_variant: &'static str,
_value: &T
_value: &T,
) -> Result<Self::Ok, Self::Error>
where T: Serialize,
where
T: Serialize,
{
Err(SerializerError::UnrankableType { type_name: "newtype variant" })
Err(SerializerError::UnrankableType {
type_name: "newtype variant",
})
}
fn serialize_seq(self, _len: Option<usize>) -> Result<Self::SerializeSeq, Self::Error> {
Err(SerializerError::UnrankableType { type_name: "sequence" })
Err(SerializerError::UnrankableType {
type_name: "sequence",
})
}
fn serialize_tuple(self, _len: usize) -> Result<Self::SerializeTuple, Self::Error> {
@ -137,10 +151,11 @@ impl ser::Serializer for ConvertToNumber {
fn serialize_tuple_struct(
self,
_name: &'static str,
_len: usize
) -> Result<Self::SerializeTupleStruct, Self::Error>
{
Err(SerializerError::UnrankableType { type_name: "tuple struct" })
_len: usize,
) -> Result<Self::SerializeTupleStruct, Self::Error> {
Err(SerializerError::UnrankableType {
type_name: "tuple struct",
})
}
fn serialize_tuple_variant(
@ -148,10 +163,11 @@ impl ser::Serializer for ConvertToNumber {
_name: &'static str,
_variant_index: u32,
_variant: &'static str,
_len: usize
) -> Result<Self::SerializeTupleVariant, Self::Error>
{
Err(SerializerError::UnrankableType { type_name: "tuple variant" })
_len: usize,
) -> Result<Self::SerializeTupleVariant, Self::Error> {
Err(SerializerError::UnrankableType {
type_name: "tuple variant",
})
}
fn serialize_map(self, _len: Option<usize>) -> Result<Self::SerializeMap, Self::Error> {
@ -161,10 +177,11 @@ impl ser::Serializer for ConvertToNumber {
fn serialize_struct(
self,
_name: &'static str,
_len: usize
) -> Result<Self::SerializeStruct, Self::Error>
{
Err(SerializerError::UnrankableType { type_name: "struct" })
_len: usize,
) -> Result<Self::SerializeStruct, Self::Error> {
Err(SerializerError::UnrankableType {
type_name: "struct",
})
}
fn serialize_struct_variant(
@ -172,9 +189,10 @@ impl ser::Serializer for ConvertToNumber {
_name: &'static str,
_variant_index: u32,
_variant: &'static str,
_len: usize
) -> Result<Self::SerializeStructVariant, Self::Error>
{
Err(SerializerError::UnrankableType { type_name: "struct variant" })
_len: usize,
) -> Result<Self::SerializeStructVariant, Self::Error> {
Err(SerializerError::UnrankableType {
type_name: "struct variant",
})
}
}

View File

@ -1,5 +1,5 @@
use serde::Serialize;
use serde::ser;
use serde::Serialize;
use super::SerializerError;
@ -12,12 +12,14 @@ impl ser::Serializer for ConvertToString {
type SerializeTuple = ser::Impossible<Self::Ok, Self::Error>;
type SerializeTupleStruct = ser::Impossible<Self::Ok, Self::Error>;
type SerializeTupleVariant = ser::Impossible<Self::Ok, Self::Error>;
type SerializeMap = ser::Impossible<Self::Ok, Self::Error>;
type SerializeStruct = ser::Impossible<Self::Ok, Self::Error>;
type SerializeMap = MapConvertToString;
type SerializeStruct = StructConvertToString;
type SerializeStructVariant = ser::Impossible<Self::Ok, Self::Error>;
fn serialize_bool(self, _value: bool) -> Result<Self::Ok, Self::Error> {
Err(SerializerError::UnserializableType { type_name: "boolean" })
Err(SerializerError::UnserializableType {
type_name: "boolean",
})
}
fn serialize_char(self, value: char) -> Result<Self::Ok, Self::Error> {
@ -73,13 +75,18 @@ impl ser::Serializer for ConvertToString {
}
fn serialize_none(self) -> Result<Self::Ok, Self::Error> {
Err(SerializerError::UnserializableType { type_name: "Option" })
Err(SerializerError::UnserializableType {
type_name: "Option",
})
}
fn serialize_some<T: ?Sized>(self, _value: &T) -> Result<Self::Ok, Self::Error>
where T: Serialize,
where
T: Serialize,
{
Err(SerializerError::UnserializableType { type_name: "Option" })
Err(SerializerError::UnserializableType {
type_name: "Option",
})
}
fn serialize_unit(self) -> Result<Self::Ok, Self::Error> {
@ -87,25 +94,29 @@ impl ser::Serializer for ConvertToString {
}
fn serialize_unit_struct(self, _name: &'static str) -> Result<Self::Ok, Self::Error> {
Err(SerializerError::UnserializableType { type_name: "unit struct" })
Err(SerializerError::UnserializableType {
type_name: "unit struct",
})
}
fn serialize_unit_variant(
self,
_name: &'static str,
_variant_index: u32,
_variant: &'static str
) -> Result<Self::Ok, Self::Error>
{
Err(SerializerError::UnserializableType { type_name: "unit variant" })
_variant: &'static str,
) -> Result<Self::Ok, Self::Error> {
Err(SerializerError::UnserializableType {
type_name: "unit variant",
})
}
fn serialize_newtype_struct<T: ?Sized>(
self,
_name: &'static str,
value: &T
value: &T,
) -> Result<Self::Ok, Self::Error>
where T: Serialize,
where
T: Serialize,
{
value.serialize(self)
}
@ -115,15 +126,20 @@ impl ser::Serializer for ConvertToString {
_name: &'static str,
_variant_index: u32,
_variant: &'static str,
_value: &T
_value: &T,
) -> Result<Self::Ok, Self::Error>
where T: Serialize,
where
T: Serialize,
{
Err(SerializerError::UnserializableType { type_name: "newtype variant" })
Err(SerializerError::UnserializableType {
type_name: "newtype variant",
})
}
fn serialize_seq(self, _len: Option<usize>) -> Result<Self::SerializeSeq, Self::Error> {
Err(SerializerError::UnserializableType { type_name: "sequence" })
Err(SerializerError::UnserializableType {
type_name: "sequence",
})
}
fn serialize_tuple(self, _len: usize) -> Result<Self::SerializeTuple, Self::Error> {
@ -133,10 +149,11 @@ impl ser::Serializer for ConvertToString {
fn serialize_tuple_struct(
self,
_name: &'static str,
_len: usize
) -> Result<Self::SerializeTupleStruct, Self::Error>
{
Err(SerializerError::UnserializableType { type_name: "tuple struct" })
_len: usize,
) -> Result<Self::SerializeTupleStruct, Self::Error> {
Err(SerializerError::UnserializableType {
type_name: "tuple struct",
})
}
fn serialize_tuple_variant(
@ -144,23 +161,27 @@ impl ser::Serializer for ConvertToString {
_name: &'static str,
_variant_index: u32,
_variant: &'static str,
_len: usize
) -> Result<Self::SerializeTupleVariant, Self::Error>
{
Err(SerializerError::UnserializableType { type_name: "tuple variant" })
_len: usize,
) -> Result<Self::SerializeTupleVariant, Self::Error> {
Err(SerializerError::UnserializableType {
type_name: "tuple variant",
})
}
fn serialize_map(self, _len: Option<usize>) -> Result<Self::SerializeMap, Self::Error> {
Err(SerializerError::UnserializableType { type_name: "map" })
Ok(MapConvertToString {
text: String::new(),
})
}
fn serialize_struct(
self,
_name: &'static str,
_len: usize
) -> Result<Self::SerializeStruct, Self::Error>
{
Err(SerializerError::UnserializableType { type_name: "struct" })
_len: usize,
) -> Result<Self::SerializeStruct, Self::Error> {
Ok(StructConvertToString {
text: String::new(),
})
}
fn serialize_struct_variant(
@ -168,9 +189,70 @@ impl ser::Serializer for ConvertToString {
_name: &'static str,
_variant_index: u32,
_variant: &'static str,
_len: usize
) -> Result<Self::SerializeStructVariant, Self::Error>
{
Err(SerializerError::UnserializableType { type_name: "struct variant" })
_len: usize,
) -> Result<Self::SerializeStructVariant, Self::Error> {
Err(SerializerError::UnserializableType {
type_name: "struct variant",
})
}
}
pub struct MapConvertToString {
text: String,
}
impl ser::SerializeMap for MapConvertToString {
type Ok = String;
type Error = SerializerError;
fn serialize_key<T: ?Sized>(&mut self, key: &T) -> Result<(), Self::Error>
where
T: ser::Serialize,
{
let text = key.serialize(ConvertToString)?;
self.text.push_str(&text);
self.text.push_str(" ");
Ok(())
}
fn serialize_value<T: ?Sized>(&mut self, value: &T) -> Result<(), Self::Error>
where
T: ser::Serialize,
{
let text = value.serialize(ConvertToString)?;
self.text.push_str(&text);
Ok(())
}
fn end(self) -> Result<Self::Ok, Self::Error> {
Ok(self.text)
}
}
pub struct StructConvertToString {
text: String,
}
impl ser::SerializeStruct for StructConvertToString {
type Ok = String;
type Error = SerializerError;
fn serialize_field<T: ?Sized>(
&mut self,
key: &'static str,
value: &T,
) -> Result<(), Self::Error>
where
T: ser::Serialize,
{
let value = value.serialize(ConvertToString)?;
self.text.push_str(key);
self.text.push_str(" ");
self.text.push_str(&value);
Ok(())
}
fn end(self) -> Result<Self::Ok, Self::Error> {
Ok(self.text)
}
}

View File

@ -0,0 +1,158 @@
use std::collections::HashSet;
use std::io::Cursor;
use std::{error::Error, fmt};
use meilidb_schema::{Schema, SchemaAttr};
use serde::{de, forward_to_deserialize_any};
use serde_json::de::IoRead as SerdeJsonIoRead;
use serde_json::Deserializer as SerdeJsonDeserializer;
use serde_json::Error as SerdeJsonError;
use crate::store::DocumentsFields;
use crate::DocumentId;
#[derive(Debug)]
pub enum DeserializerError {
SerdeJson(SerdeJsonError),
Zlmdb(heed::Error),
Custom(String),
}
impl de::Error for DeserializerError {
fn custom<T: fmt::Display>(msg: T) -> Self {
DeserializerError::Custom(msg.to_string())
}
}
impl fmt::Display for DeserializerError {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match self {
DeserializerError::SerdeJson(e) => write!(f, "serde json related error: {}", e),
DeserializerError::Zlmdb(e) => write!(f, "heed related error: {}", e),
DeserializerError::Custom(s) => f.write_str(s),
}
}
}
impl Error for DeserializerError {}
impl From<SerdeJsonError> for DeserializerError {
fn from(error: SerdeJsonError) -> DeserializerError {
DeserializerError::SerdeJson(error)
}
}
impl From<heed::Error> for DeserializerError {
fn from(error: heed::Error) -> DeserializerError {
DeserializerError::Zlmdb(error)
}
}
pub struct Deserializer<'a> {
pub document_id: DocumentId,
pub reader: &'a heed::RoTxn,
pub documents_fields: DocumentsFields,
pub schema: &'a Schema,
pub attributes: Option<&'a HashSet<SchemaAttr>>,
}
impl<'de, 'a, 'b> de::Deserializer<'de> for &'b mut Deserializer<'a> {
type Error = DeserializerError;
fn deserialize_any<V>(self, visitor: V) -> Result<V::Value, Self::Error>
where
V: de::Visitor<'de>,
{
self.deserialize_option(visitor)
}
fn deserialize_option<V>(self, visitor: V) -> Result<V::Value, Self::Error>
where
V: de::Visitor<'de>,
{
self.deserialize_map(visitor)
}
fn deserialize_map<V>(self, visitor: V) -> Result<V::Value, Self::Error>
where
V: de::Visitor<'de>,
{
let mut error = None;
let iter = self
.documents_fields
.document_fields(self.reader, self.document_id)?
.filter_map(|result| {
let (attr, value) = match result {
Ok(value) => value,
Err(e) => {
error = Some(e);
return None;
}
};
let is_displayed = self.schema.props(attr).is_displayed();
if is_displayed && self.attributes.map_or(true, |f| f.contains(&attr)) {
let attribute_name = self.schema.attribute_name(attr);
let cursor = Cursor::new(value.to_owned());
let ioread = SerdeJsonIoRead::new(cursor);
let value = Value(SerdeJsonDeserializer::new(ioread));
Some((attribute_name, value))
} else {
None
}
});
let mut iter = iter.peekable();
let result = match iter.peek() {
Some(_) => {
let map_deserializer = de::value::MapDeserializer::new(iter);
visitor
.visit_some(map_deserializer)
.map_err(DeserializerError::from)
}
None => visitor.visit_none(),
};
match error.take() {
Some(error) => Err(error.into()),
None => result,
}
}
forward_to_deserialize_any! {
bool i8 i16 i32 i64 i128 u8 u16 u32 u64 u128 f32 f64 char str string
bytes byte_buf unit unit_struct newtype_struct seq tuple
tuple_struct struct enum identifier ignored_any
}
}
struct Value(SerdeJsonDeserializer<SerdeJsonIoRead<Cursor<Vec<u8>>>>);
impl<'de> de::IntoDeserializer<'de, SerdeJsonError> for Value {
type Deserializer = Self;
fn into_deserializer(self) -> Self::Deserializer {
self
}
}
impl<'de> de::Deserializer<'de> for Value {
type Error = SerdeJsonError;
fn deserialize_any<V>(mut self, visitor: V) -> Result<V::Value, Self::Error>
where
V: de::Visitor<'de>,
{
self.0.deserialize_any(visitor)
}
forward_to_deserialize_any! {
bool i8 i16 i32 i64 i128 u8 u16 u32 u64 u128 f32 f64 char str string
bytes byte_buf option unit unit_struct newtype_struct seq tuple
tuple_struct map struct enum identifier ignored_any
}
}

View File

@ -1,17 +1,18 @@
use std::hash::{Hash, Hasher};
use meilidb_core::DocumentId;
use crate::DocumentId;
use serde::{ser, Serialize};
use serde_json::Value;
use siphasher::sip::SipHasher;
use super::{SerializerError, ConvertToString};
use super::{ConvertToString, SerializerError};
pub fn extract_document_id<D>(
identifier: &str,
document: &D,
) -> Result<Option<DocumentId>, SerializerError>
where D: serde::Serialize,
where
D: serde::Serialize,
{
let serializer = ExtractDocumentId { identifier };
document.serialize(serializer)
@ -77,13 +78,18 @@ impl<'a> ser::Serializer for ExtractDocumentId<'a> {
}
fn serialize_none(self) -> Result<Self::Ok, Self::Error> {
Err(SerializerError::UnserializableType { type_name: "Option" })
Err(SerializerError::UnserializableType {
type_name: "Option",
})
}
fn serialize_some<T: ?Sized>(self, _value: &T) -> Result<Self::Ok, Self::Error>
where T: Serialize,
where
T: Serialize,
{
Err(SerializerError::UnserializableType { type_name: "Option" })
Err(SerializerError::UnserializableType {
type_name: "Option",
})
}
fn serialize_unit(self) -> Result<Self::Ok, Self::Error> {
@ -91,25 +97,29 @@ impl<'a> ser::Serializer for ExtractDocumentId<'a> {
}
fn serialize_unit_struct(self, _name: &'static str) -> Result<Self::Ok, Self::Error> {
Err(SerializerError::UnserializableType { type_name: "unit struct" })
Err(SerializerError::UnserializableType {
type_name: "unit struct",
})
}
fn serialize_unit_variant(
self,
_name: &'static str,
_variant_index: u32,
_variant: &'static str
) -> Result<Self::Ok, Self::Error>
{
Err(SerializerError::UnserializableType { type_name: "unit variant" })
_variant: &'static str,
) -> Result<Self::Ok, Self::Error> {
Err(SerializerError::UnserializableType {
type_name: "unit variant",
})
}
fn serialize_newtype_struct<T: ?Sized>(
self,
_name: &'static str,
value: &T
value: &T,
) -> Result<Self::Ok, Self::Error>
where T: Serialize,
where
T: Serialize,
{
value.serialize(self)
}
@ -119,15 +129,20 @@ impl<'a> ser::Serializer for ExtractDocumentId<'a> {
_name: &'static str,
_variant_index: u32,
_variant: &'static str,
_value: &T
_value: &T,
) -> Result<Self::Ok, Self::Error>
where T: Serialize,
where
T: Serialize,
{
Err(SerializerError::UnserializableType { type_name: "newtype variant" })
Err(SerializerError::UnserializableType {
type_name: "newtype variant",
})
}
fn serialize_seq(self, _len: Option<usize>) -> Result<Self::SerializeSeq, Self::Error> {
Err(SerializerError::UnserializableType { type_name: "sequence" })
Err(SerializerError::UnserializableType {
type_name: "sequence",
})
}
fn serialize_tuple(self, _len: usize) -> Result<Self::SerializeTuple, Self::Error> {
@ -137,10 +152,11 @@ impl<'a> ser::Serializer for ExtractDocumentId<'a> {
fn serialize_tuple_struct(
self,
_name: &'static str,
_len: usize
) -> Result<Self::SerializeTupleStruct, Self::Error>
{
Err(SerializerError::UnserializableType { type_name: "tuple struct" })
_len: usize,
) -> Result<Self::SerializeTupleStruct, Self::Error> {
Err(SerializerError::UnserializableType {
type_name: "tuple struct",
})
}
fn serialize_tuple_variant(
@ -148,10 +164,11 @@ impl<'a> ser::Serializer for ExtractDocumentId<'a> {
_name: &'static str,
_variant_index: u32,
_variant: &'static str,
_len: usize
) -> Result<Self::SerializeTupleVariant, Self::Error>
{
Err(SerializerError::UnserializableType { type_name: "tuple variant" })
_len: usize,
) -> Result<Self::SerializeTupleVariant, Self::Error> {
Err(SerializerError::UnserializableType {
type_name: "tuple variant",
})
}
fn serialize_map(self, _len: Option<usize>) -> Result<Self::SerializeMap, Self::Error> {
@ -167,9 +184,8 @@ impl<'a> ser::Serializer for ExtractDocumentId<'a> {
fn serialize_struct(
self,
_name: &'static str,
_len: usize
) -> Result<Self::SerializeStruct, Self::Error>
{
_len: usize,
) -> Result<Self::SerializeStruct, Self::Error> {
let serializer = ExtractDocumentIdStructSerializer {
identifier: self.identifier,
document_id: None,
@ -183,10 +199,11 @@ impl<'a> ser::Serializer for ExtractDocumentId<'a> {
_name: &'static str,
_variant_index: u32,
_variant: &'static str,
_len: usize
) -> Result<Self::SerializeStructVariant, Self::Error>
{
Err(SerializerError::UnserializableType { type_name: "struct variant" })
_len: usize,
) -> Result<Self::SerializeStructVariant, Self::Error> {
Err(SerializerError::UnserializableType {
type_name: "struct variant",
})
}
}
@ -201,7 +218,8 @@ impl<'a> ser::SerializeMap for ExtractDocumentIdMapSerializer<'a> {
type Error = SerializerError;
fn serialize_key<T: ?Sized>(&mut self, key: &T) -> Result<(), Self::Error>
where T: Serialize,
where
T: Serialize,
{
let key = key.serialize(ConvertToString)?;
self.current_key_name = Some(key);
@ -209,7 +227,8 @@ impl<'a> ser::SerializeMap for ExtractDocumentIdMapSerializer<'a> {
}
fn serialize_value<T: ?Sized>(&mut self, value: &T) -> Result<(), Self::Error>
where T: Serialize,
where
T: Serialize,
{
let key = self.current_key_name.take().unwrap();
self.serialize_entry(&key, value)
@ -218,9 +237,11 @@ impl<'a> ser::SerializeMap for ExtractDocumentIdMapSerializer<'a> {
fn serialize_entry<K: ?Sized, V: ?Sized>(
&mut self,
key: &K,
value: &V
value: &V,
) -> Result<(), Self::Error>
where K: Serialize, V: Serialize,
where
K: Serialize,
V: Serialize,
{
let key = key.serialize(ConvertToString)?;
@ -252,9 +273,10 @@ impl<'a> ser::SerializeStruct for ExtractDocumentIdStructSerializer<'a> {
fn serialize_field<T: ?Sized>(
&mut self,
key: &'static str,
value: &T
value: &T,
) -> Result<(), Self::Error>
where T: Serialize,
where
T: Serialize,
{
if self.identifier == key {
let value = serde_json::to_string(value).and_then(|s| serde_json::from_str(&s))?;

View File

@ -1,10 +1,10 @@
use meilidb_core::DocumentId;
use meilidb_schema::SchemaAttr;
use serde::ser;
use serde::Serialize;
use crate::indexer::Indexer as RawIndexer;
use super::{SerializerError, ConvertToString};
use super::{ConvertToString, SerializerError};
use crate::raw_indexer::RawIndexer;
use crate::DocumentId;
pub struct Indexer<'a> {
pub attribute: SchemaAttr,
@ -13,18 +13,20 @@ pub struct Indexer<'a> {
}
impl<'a> ser::Serializer for Indexer<'a> {
type Ok = ();
type Ok = Option<usize>;
type Error = SerializerError;
type SerializeSeq = SeqIndexer<'a>;
type SerializeTuple = TupleIndexer<'a>;
type SerializeTupleStruct = ser::Impossible<Self::Ok, Self::Error>;
type SerializeTupleVariant = ser::Impossible<Self::Ok, Self::Error>;
type SerializeMap = MapIndexer<'a>;
type SerializeStruct = StructSerializer<'a>;
type SerializeStruct = StructIndexer<'a>;
type SerializeStructVariant = ser::Impossible<Self::Ok, Self::Error>;
fn serialize_bool(self, _value: bool) -> Result<Self::Ok, Self::Error> {
Err(SerializerError::UnindexableType { type_name: "boolean" })
Err(SerializerError::UnindexableType {
type_name: "boolean",
})
}
fn serialize_char(self, value: char) -> Result<Self::Ok, Self::Error> {
@ -83,8 +85,10 @@ impl<'a> ser::Serializer for Indexer<'a> {
}
fn serialize_str(self, text: &str) -> Result<Self::Ok, Self::Error> {
self.indexer.index_text(self.document_id, self.attribute, text);
Ok(())
let number_of_words = self
.indexer
.index_text(self.document_id, self.attribute, text);
Ok(Some(number_of_words))
}
fn serialize_bytes(self, _v: &[u8]) -> Result<Self::Ok, Self::Error> {
@ -92,15 +96,20 @@ impl<'a> ser::Serializer for Indexer<'a> {
}
fn serialize_none(self) -> Result<Self::Ok, Self::Error> {
Err(SerializerError::UnindexableType { type_name: "Option" })
Err(SerializerError::UnindexableType {
type_name: "Option",
})
}
fn serialize_some<T: ?Sized>(self, value: &T) -> Result<Self::Ok, Self::Error>
where T: ser::Serialize,
where
T: ser::Serialize,
{
let text = value.serialize(ConvertToString)?;
self.indexer.index_text(self.document_id, self.attribute, &text);
Ok(())
let number_of_words = self
.indexer
.index_text(self.document_id, self.attribute, &text);
Ok(Some(number_of_words))
}
fn serialize_unit(self) -> Result<Self::Ok, Self::Error> {
@ -108,25 +117,29 @@ impl<'a> ser::Serializer for Indexer<'a> {
}
fn serialize_unit_struct(self, _name: &'static str) -> Result<Self::Ok, Self::Error> {
Err(SerializerError::UnindexableType { type_name: "unit struct" })
Err(SerializerError::UnindexableType {
type_name: "unit struct",
})
}
fn serialize_unit_variant(
self,
_name: &'static str,
_variant_index: u32,
_variant: &'static str
) -> Result<Self::Ok, Self::Error>
{
Err(SerializerError::UnindexableType { type_name: "unit variant" })
_variant: &'static str,
) -> Result<Self::Ok, Self::Error> {
Err(SerializerError::UnindexableType {
type_name: "unit variant",
})
}
fn serialize_newtype_struct<T: ?Sized>(
self,
_name: &'static str,
value: &T
value: &T,
) -> Result<Self::Ok, Self::Error>
where T: ser::Serialize,
where
T: ser::Serialize,
{
value.serialize(self)
}
@ -136,11 +149,14 @@ impl<'a> ser::Serializer for Indexer<'a> {
_name: &'static str,
_variant_index: u32,
_variant: &'static str,
_value: &T
_value: &T,
) -> Result<Self::Ok, Self::Error>
where T: ser::Serialize,
where
T: ser::Serialize,
{
Err(SerializerError::UnindexableType { type_name: "newtype variant" })
Err(SerializerError::UnindexableType {
type_name: "newtype variant",
})
}
fn serialize_seq(self, _len: Option<usize>) -> Result<Self::SerializeSeq, Self::Error> {
@ -168,10 +184,11 @@ impl<'a> ser::Serializer for Indexer<'a> {
fn serialize_tuple_struct(
self,
_name: &'static str,
_len: usize
) -> Result<Self::SerializeTupleStruct, Self::Error>
{
Err(SerializerError::UnindexableType { type_name: "tuple struct" })
_len: usize,
) -> Result<Self::SerializeTupleStruct, Self::Error> {
Err(SerializerError::UnindexableType {
type_name: "tuple struct",
})
}
fn serialize_tuple_variant(
@ -179,10 +196,11 @@ impl<'a> ser::Serializer for Indexer<'a> {
_name: &'static str,
_variant_index: u32,
_variant: &'static str,
_len: usize
) -> Result<Self::SerializeTupleVariant, Self::Error>
{
Err(SerializerError::UnindexableType { type_name: "tuple variant" })
_len: usize,
) -> Result<Self::SerializeTupleVariant, Self::Error> {
Err(SerializerError::UnindexableType {
type_name: "tuple variant",
})
}
fn serialize_map(self, _len: Option<usize>) -> Result<Self::SerializeMap, Self::Error> {
@ -199,10 +217,11 @@ impl<'a> ser::Serializer for Indexer<'a> {
fn serialize_struct(
self,
_name: &'static str,
_len: usize
) -> Result<Self::SerializeStruct, Self::Error>
{
Err(SerializerError::UnindexableType { type_name: "struct" })
_len: usize,
) -> Result<Self::SerializeStruct, Self::Error> {
Err(SerializerError::UnindexableType {
type_name: "struct",
})
}
fn serialize_struct_variant(
@ -210,10 +229,11 @@ impl<'a> ser::Serializer for Indexer<'a> {
_name: &'static str,
_variant_index: u32,
_variant: &'static str,
_len: usize
) -> Result<Self::SerializeStructVariant, Self::Error>
{
Err(SerializerError::UnindexableType { type_name: "struct variant" })
_len: usize,
) -> Result<Self::SerializeStructVariant, Self::Error> {
Err(SerializerError::UnindexableType {
type_name: "struct variant",
})
}
}
@ -225,11 +245,12 @@ pub struct SeqIndexer<'a> {
}
impl<'a> ser::SerializeSeq for SeqIndexer<'a> {
type Ok = ();
type Ok = Option<usize>;
type Error = SerializerError;
fn serialize_element<T: ?Sized>(&mut self, value: &T) -> Result<(), Self::Error>
where T: ser::Serialize
where
T: ser::Serialize,
{
let text = value.serialize(ConvertToString)?;
self.texts.push(text);
@ -238,8 +259,9 @@ impl<'a> ser::SerializeSeq for SeqIndexer<'a> {
fn end(self) -> Result<Self::Ok, Self::Error> {
let texts = self.texts.iter().map(String::as_str);
self.indexer.index_text_seq(self.document_id, self.attribute, texts);
Ok(())
self.indexer
.index_text_seq(self.document_id, self.attribute, texts);
Ok(None)
}
}
@ -251,11 +273,12 @@ pub struct MapIndexer<'a> {
}
impl<'a> ser::SerializeMap for MapIndexer<'a> {
type Ok = ();
type Ok = Option<usize>;
type Error = SerializerError;
fn serialize_key<T: ?Sized>(&mut self, key: &T) -> Result<(), Self::Error>
where T: ser::Serialize,
where
T: ser::Serialize,
{
let text = key.serialize(ConvertToString)?;
self.texts.push(text);
@ -263,7 +286,8 @@ impl<'a> ser::SerializeMap for MapIndexer<'a> {
}
fn serialize_value<T: ?Sized>(&mut self, value: &T) -> Result<(), Self::Error>
where T: ser::Serialize,
where
T: ser::Serialize,
{
let text = value.serialize(ConvertToString)?;
self.texts.push(text);
@ -272,20 +296,21 @@ impl<'a> ser::SerializeMap for MapIndexer<'a> {
fn end(self) -> Result<Self::Ok, Self::Error> {
let texts = self.texts.iter().map(String::as_str);
self.indexer.index_text_seq(self.document_id, self.attribute, texts);
Ok(())
self.indexer
.index_text_seq(self.document_id, self.attribute, texts);
Ok(None)
}
}
pub struct StructSerializer<'a> {
pub struct StructIndexer<'a> {
attribute: SchemaAttr,
document_id: DocumentId,
indexer: &'a mut RawIndexer,
texts: Vec<String>,
}
impl<'a> ser::SerializeStruct for StructSerializer<'a> {
type Ok = ();
impl<'a> ser::SerializeStruct for StructIndexer<'a> {
type Ok = Option<usize>;
type Error = SerializerError;
fn serialize_field<T: ?Sized>(
@ -293,7 +318,8 @@ impl<'a> ser::SerializeStruct for StructSerializer<'a> {
key: &'static str,
value: &T,
) -> Result<(), Self::Error>
where T: ser::Serialize,
where
T: ser::Serialize,
{
let key_text = key.to_owned();
let value_text = value.serialize(ConvertToString)?;
@ -304,8 +330,9 @@ impl<'a> ser::SerializeStruct for StructSerializer<'a> {
fn end(self) -> Result<Self::Ok, Self::Error> {
let texts = self.texts.iter().map(String::as_str);
self.indexer.index_text_seq(self.document_id, self.attribute, texts);
Ok(())
self.indexer
.index_text_seq(self.document_id, self.attribute, texts);
Ok(None)
}
}
@ -317,11 +344,12 @@ pub struct TupleIndexer<'a> {
}
impl<'a> ser::SerializeTuple for TupleIndexer<'a> {
type Ok = ();
type Ok = Option<usize>;
type Error = SerializerError;
fn serialize_element<T: ?Sized>(&mut self, value: &T) -> Result<(), Self::Error>
where T: Serialize
where
T: Serialize,
{
let text = value.serialize(ConvertToString)?;
self.texts.push(text);
@ -330,7 +358,8 @@ impl<'a> ser::SerializeTuple for TupleIndexer<'a> {
fn end(self) -> Result<Self::Ok, Self::Error> {
let texts = self.texts.iter().map(String::as_str);
self.indexer.index_text_seq(self.document_id, self.attribute, texts);
Ok(())
self.indexer
.index_text_seq(self.document_id, self.attribute, texts);
Ok(None)
}
}

View File

@ -15,32 +15,27 @@ mod extract_document_id;
mod indexer;
mod serializer;
pub use self::deserializer::{Deserializer, DeserializerError};
pub use self::extract_document_id::{extract_document_id, compute_document_id, value_to_string};
pub use self::convert_to_string::ConvertToString;
pub use self::convert_to_number::ConvertToNumber;
pub use self::convert_to_string::ConvertToString;
pub use self::deserializer::{Deserializer, DeserializerError};
pub use self::extract_document_id::{compute_document_id, extract_document_id, value_to_string};
pub use self::indexer::Indexer;
pub use self::serializer::Serializer;
pub use self::serializer::{serialize_value, Serializer};
use std::collections::BTreeMap;
use std::{fmt, error::Error};
use std::{error::Error, fmt};
use meilidb_core::DocumentId;
use meilidb_schema::SchemaAttr;
use rmp_serde::encode::Error as RmpError;
use serde_json::Error as SerdeJsonError;
use serde::ser;
use serde_json::Error as SerdeJsonError;
use crate::number::ParseNumberError;
use crate::ParseNumberError;
#[derive(Debug)]
pub enum SerializerError {
DocumentIdNotFound,
InvalidDocumentIdType,
RmpError(RmpError),
RocksDbError(rocksdb::Error),
SerdeJsonError(SerdeJsonError),
ParseNumberError(ParseNumberError),
Zlmdb(heed::Error),
SerdeJson(SerdeJsonError),
ParseNumber(ParseNumberError),
UnserializableType { type_name: &'static str },
UnindexableType { type_name: &'static str },
UnrankableType { type_name: &'static str },
@ -57,26 +52,25 @@ impl fmt::Display for SerializerError {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match self {
SerializerError::DocumentIdNotFound => {
write!(f, "serialized document does not have an id according to the schema")
},
f.write_str("serialized document does not have an id according to the schema")
}
SerializerError::InvalidDocumentIdType => {
write!(f, "document identifier can only be of type string or number")
},
SerializerError::RmpError(e) => write!(f, "rmp serde related error: {}", e),
SerializerError::RocksDbError(e) => write!(f, "RocksDB related error: {}", e),
SerializerError::SerdeJsonError(e) => write!(f, "serde json error: {}", e),
SerializerError::ParseNumberError(e) => {
f.write_str("document identifier can only be of type string or number")
}
SerializerError::Zlmdb(e) => write!(f, "heed related error: {}", e),
SerializerError::SerdeJson(e) => write!(f, "serde json error: {}", e),
SerializerError::ParseNumber(e) => {
write!(f, "error while trying to parse a number: {}", e)
},
}
SerializerError::UnserializableType { type_name } => {
write!(f, "{} are not a serializable type", type_name)
},
write!(f, "{} is not a serializable type", type_name)
}
SerializerError::UnindexableType { type_name } => {
write!(f, "{} are not an indexable type", type_name)
},
write!(f, "{} is not an indexable type", type_name)
}
SerializerError::UnrankableType { type_name } => {
write!(f, "{} types can not be used for ranking", type_name)
},
}
SerializerError::Custom(s) => f.write_str(s),
}
}
@ -90,42 +84,20 @@ impl From<String> for SerializerError {
}
}
impl From<RmpError> for SerializerError {
fn from(error: RmpError) -> SerializerError {
SerializerError::RmpError(error)
}
}
impl From<SerdeJsonError> for SerializerError {
fn from(error: SerdeJsonError) -> SerializerError {
SerializerError::SerdeJsonError(error)
SerializerError::SerdeJson(error)
}
}
impl From<rocksdb::Error> for SerializerError {
fn from(error: rocksdb::Error) -> SerializerError {
SerializerError::RocksDbError(error)
impl From<heed::Error> for SerializerError {
fn from(error: heed::Error) -> SerializerError {
SerializerError::Zlmdb(error)
}
}
impl From<ParseNumberError> for SerializerError {
fn from(error: ParseNumberError) -> SerializerError {
SerializerError::ParseNumberError(error)
}
}
pub struct RamDocumentStore(BTreeMap<(DocumentId, SchemaAttr), Vec<u8>>);
impl RamDocumentStore {
pub fn new() -> RamDocumentStore {
RamDocumentStore(BTreeMap::new())
}
pub fn set_document_field(&mut self, id: DocumentId, attr: SchemaAttr, value: Vec<u8>) {
self.0.insert((id, attr), value);
}
pub fn into_inner(self) -> BTreeMap<(DocumentId, SchemaAttr), Vec<u8>> {
self.0
SerializerError::ParseNumber(error)
}
}

View File

@ -1,28 +1,31 @@
use meilidb_core::DocumentId;
use meilidb_schema::Schema;
use meilidb_schema::{Schema, SchemaAttr, SchemaProps};
use serde::ser;
use crate::indexer::Indexer as RawIndexer;
use crate::ranked_map::RankedMap;
use super::{RamDocumentStore, SerializerError, ConvertToString, ConvertToNumber, Indexer};
use crate::raw_indexer::RawIndexer;
use crate::store::{DocumentsFields, DocumentsFieldsCounts};
use crate::{DocumentId, RankedMap};
pub struct Serializer<'a> {
use super::{ConvertToNumber, ConvertToString, Indexer, SerializerError};
pub struct Serializer<'a, 'b> {
pub txn: &'a mut heed::RwTxn<'b>,
pub schema: &'a Schema,
pub document_store: &'a mut RamDocumentStore,
pub document_store: DocumentsFields,
pub document_fields_counts: DocumentsFieldsCounts,
pub indexer: &'a mut RawIndexer,
pub ranked_map: &'a mut RankedMap,
pub document_id: DocumentId,
}
impl<'a> ser::Serializer for Serializer<'a> {
impl<'a, 'b> ser::Serializer for Serializer<'a, 'b> {
type Ok = ();
type Error = SerializerError;
type SerializeSeq = ser::Impossible<Self::Ok, Self::Error>;
type SerializeTuple = ser::Impossible<Self::Ok, Self::Error>;
type SerializeTupleStruct = ser::Impossible<Self::Ok, Self::Error>;
type SerializeTupleVariant = ser::Impossible<Self::Ok, Self::Error>;
type SerializeMap = MapSerializer<'a>;
type SerializeStruct = StructSerializer<'a>;
type SerializeMap = MapSerializer<'a, 'b>;
type SerializeStruct = StructSerializer<'a, 'b>;
type SerializeStructVariant = ser::Impossible<Self::Ok, Self::Error>;
forward_to_unserializable_type! {
@ -52,13 +55,18 @@ impl<'a> ser::Serializer for Serializer<'a> {
}
fn serialize_none(self) -> Result<Self::Ok, Self::Error> {
Err(SerializerError::UnserializableType { type_name: "Option" })
Err(SerializerError::UnserializableType {
type_name: "Option",
})
}
fn serialize_some<T: ?Sized>(self, _value: &T) -> Result<Self::Ok, Self::Error>
where T: ser::Serialize,
where
T: ser::Serialize,
{
Err(SerializerError::UnserializableType { type_name: "Option" })
Err(SerializerError::UnserializableType {
type_name: "Option",
})
}
fn serialize_unit(self) -> Result<Self::Ok, Self::Error> {
@ -66,25 +74,29 @@ impl<'a> ser::Serializer for Serializer<'a> {
}
fn serialize_unit_struct(self, _name: &'static str) -> Result<Self::Ok, Self::Error> {
Err(SerializerError::UnserializableType { type_name: "unit struct" })
Err(SerializerError::UnserializableType {
type_name: "unit struct",
})
}
fn serialize_unit_variant(
self,
_name: &'static str,
_variant_index: u32,
_variant: &'static str
) -> Result<Self::Ok, Self::Error>
{
Err(SerializerError::UnserializableType { type_name: "unit variant" })
_variant: &'static str,
) -> Result<Self::Ok, Self::Error> {
Err(SerializerError::UnserializableType {
type_name: "unit variant",
})
}
fn serialize_newtype_struct<T: ?Sized>(
self,
_name: &'static str,
value: &T
value: &T,
) -> Result<Self::Ok, Self::Error>
where T: ser::Serialize,
where
T: ser::Serialize,
{
value.serialize(self)
}
@ -94,15 +106,20 @@ impl<'a> ser::Serializer for Serializer<'a> {
_name: &'static str,
_variant_index: u32,
_variant: &'static str,
_value: &T
_value: &T,
) -> Result<Self::Ok, Self::Error>
where T: ser::Serialize,
where
T: ser::Serialize,
{
Err(SerializerError::UnserializableType { type_name: "newtype variant" })
Err(SerializerError::UnserializableType {
type_name: "newtype variant",
})
}
fn serialize_seq(self, _len: Option<usize>) -> Result<Self::SerializeSeq, Self::Error> {
Err(SerializerError::UnserializableType { type_name: "sequence" })
Err(SerializerError::UnserializableType {
type_name: "sequence",
})
}
fn serialize_tuple(self, _len: usize) -> Result<Self::SerializeTuple, Self::Error> {
@ -112,10 +129,11 @@ impl<'a> ser::Serializer for Serializer<'a> {
fn serialize_tuple_struct(
self,
_name: &'static str,
_len: usize
) -> Result<Self::SerializeTupleStruct, Self::Error>
{
Err(SerializerError::UnserializableType { type_name: "tuple struct" })
_len: usize,
) -> Result<Self::SerializeTupleStruct, Self::Error> {
Err(SerializerError::UnserializableType {
type_name: "tuple struct",
})
}
fn serialize_tuple_variant(
@ -123,17 +141,20 @@ impl<'a> ser::Serializer for Serializer<'a> {
_name: &'static str,
_variant_index: u32,
_variant: &'static str,
_len: usize
) -> Result<Self::SerializeTupleVariant, Self::Error>
{
Err(SerializerError::UnserializableType { type_name: "tuple variant" })
_len: usize,
) -> Result<Self::SerializeTupleVariant, Self::Error> {
Err(SerializerError::UnserializableType {
type_name: "tuple variant",
})
}
fn serialize_map(self, _len: Option<usize>) -> Result<Self::SerializeMap, Self::Error> {
Ok(MapSerializer {
txn: self.txn,
schema: self.schema,
document_id: self.document_id,
document_store: self.document_store,
document_fields_counts: self.document_fields_counts,
indexer: self.indexer,
ranked_map: self.ranked_map,
current_key_name: None,
@ -143,13 +164,14 @@ impl<'a> ser::Serializer for Serializer<'a> {
fn serialize_struct(
self,
_name: &'static str,
_len: usize
) -> Result<Self::SerializeStruct, Self::Error>
{
_len: usize,
) -> Result<Self::SerializeStruct, Self::Error> {
Ok(StructSerializer {
txn: self.txn,
schema: self.schema,
document_id: self.document_id,
document_store: self.document_store,
document_fields_counts: self.document_fields_counts,
indexer: self.indexer,
ranked_map: self.ranked_map,
})
@ -160,28 +182,32 @@ impl<'a> ser::Serializer for Serializer<'a> {
_name: &'static str,
_variant_index: u32,
_variant: &'static str,
_len: usize
) -> Result<Self::SerializeStructVariant, Self::Error>
{
Err(SerializerError::UnserializableType { type_name: "struct variant" })
_len: usize,
) -> Result<Self::SerializeStructVariant, Self::Error> {
Err(SerializerError::UnserializableType {
type_name: "struct variant",
})
}
}
pub struct MapSerializer<'a> {
pub struct MapSerializer<'a, 'b> {
txn: &'a mut heed::RwTxn<'b>,
schema: &'a Schema,
document_id: DocumentId,
document_store: &'a mut RamDocumentStore,
document_store: DocumentsFields,
document_fields_counts: DocumentsFieldsCounts,
indexer: &'a mut RawIndexer,
ranked_map: &'a mut RankedMap,
current_key_name: Option<String>,
}
impl<'a> ser::SerializeMap for MapSerializer<'a> {
impl<'a, 'b> ser::SerializeMap for MapSerializer<'a, 'b> {
type Ok = ();
type Error = SerializerError;
fn serialize_key<T: ?Sized>(&mut self, key: &T) -> Result<(), Self::Error>
where T: ser::Serialize,
where
T: ser::Serialize,
{
let key = key.serialize(ConvertToString)?;
self.current_key_name = Some(key);
@ -189,7 +215,8 @@ impl<'a> ser::SerializeMap for MapSerializer<'a> {
}
fn serialize_value<T: ?Sized>(&mut self, value: &T) -> Result<(), Self::Error>
where T: ser::Serialize,
where
T: ser::Serialize,
{
let key = self.current_key_name.take().unwrap();
self.serialize_entry(&key, value)
@ -200,19 +227,25 @@ impl<'a> ser::SerializeMap for MapSerializer<'a> {
key: &K,
value: &V,
) -> Result<(), Self::Error>
where K: ser::Serialize, V: ser::Serialize,
where
K: ser::Serialize,
V: ser::Serialize,
{
let key = key.serialize(ConvertToString)?;
serialize_value(
self.schema,
self.document_id,
self.document_store,
self.indexer,
self.ranked_map,
&key,
value,
)
match self.schema.attribute(&key) {
Some(attribute) => serialize_value(
self.txn,
attribute,
self.schema.props(attribute),
self.document_id,
self.document_store,
self.document_fields_counts,
self.indexer,
self.ranked_map,
value,
),
None => Ok(()),
}
}
fn end(self) -> Result<Self::Ok, Self::Error> {
@ -220,15 +253,17 @@ impl<'a> ser::SerializeMap for MapSerializer<'a> {
}
}
pub struct StructSerializer<'a> {
pub struct StructSerializer<'a, 'b> {
txn: &'a mut heed::RwTxn<'b>,
schema: &'a Schema,
document_id: DocumentId,
document_store: &'a mut RamDocumentStore,
document_store: DocumentsFields,
document_fields_counts: DocumentsFieldsCounts,
indexer: &'a mut RawIndexer,
ranked_map: &'a mut RankedMap,
}
impl<'a> ser::SerializeStruct for StructSerializer<'a> {
impl<'a, 'b> ser::SerializeStruct for StructSerializer<'a, 'b> {
type Ok = ();
type Error = SerializerError;
@ -237,17 +272,23 @@ impl<'a> ser::SerializeStruct for StructSerializer<'a> {
key: &'static str,
value: &T,
) -> Result<(), Self::Error>
where T: ser::Serialize,
where
T: ser::Serialize,
{
serialize_value(
self.schema,
self.document_id,
self.document_store,
self.indexer,
self.ranked_map,
key,
value,
)
match self.schema.attribute(key) {
Some(attribute) => serialize_value(
self.txn,
attribute,
self.schema.props(attribute),
self.document_id,
self.document_store,
self.document_fields_counts,
self.indexer,
self.ranked_map,
value,
),
None => Ok(()),
}
}
fn end(self) -> Result<Self::Ok, Self::Error> {
@ -255,32 +296,42 @@ impl<'a> ser::SerializeStruct for StructSerializer<'a> {
}
}
fn serialize_value<T: ?Sized>(
schema: &Schema,
pub fn serialize_value<T: ?Sized>(
txn: &mut heed::RwTxn,
attribute: SchemaAttr,
props: SchemaProps,
document_id: DocumentId,
document_store: &mut RamDocumentStore,
document_store: DocumentsFields,
documents_fields_counts: DocumentsFieldsCounts,
indexer: &mut RawIndexer,
ranked_map: &mut RankedMap,
key: &str,
value: &T,
) -> Result<(), SerializerError>
where T: ser::Serialize,
where
T: ser::Serialize,
{
if let Some(attribute) = schema.attribute(key) {
let props = schema.props(attribute);
let serialized = serde_json::to_vec(value)?;
document_store.put_document_field(txn, document_id, attribute, &serialized)?;
let serialized = rmp_serde::to_vec_named(value)?;
document_store.set_document_field(document_id, attribute, serialized);
if props.is_indexed() {
let indexer = Indexer { attribute, indexer, document_id };
value.serialize(indexer)?;
if props.is_indexed() {
let indexer = Indexer {
attribute,
indexer,
document_id,
};
if let Some(number_of_words) = value.serialize(indexer)? {
documents_fields_counts.put_document_field_count(
txn,
document_id,
attribute,
number_of_words as u64,
)?;
}
}
if props.is_ranked() {
let number = value.serialize(ConvertToNumber)?;
ranked_map.insert(document_id, attribute, number);
}
if props.is_ranked() {
let number = value.serialize(ConvertToNumber)?;
ranked_map.insert(document_id, attribute, number);
}
Ok(())

View File

@ -1,34 +0,0 @@
use std::error::Error;
use fst::Set;
use sdset::SetBuf;
use crate::DocIndex;
pub trait Store {
type Error: Error;
fn words(&self) -> Result<&Set, Self::Error>;
fn word_indexes(&self, word: &[u8]) -> Result<Option<SetBuf<DocIndex>>, Self::Error>;
fn synonyms(&self) -> Result<&Set, Self::Error>;
fn alternatives_to(&self, word: &[u8]) -> Result<Option<Set>, Self::Error>;
}
impl<T> Store for &'_ T where T: Store {
type Error = T::Error;
fn words(&self) -> Result<&Set, Self::Error> {
(*self).words()
}
fn word_indexes(&self, word: &[u8]) -> Result<Option<SetBuf<DocIndex>>, Self::Error> {
(*self).word_indexes(word)
}
fn synonyms(&self) -> Result<&Set, Self::Error> {
(*self).synonyms()
}
fn alternatives_to(&self, word: &[u8]) -> Result<Option<Set>, Self::Error> {
(*self).alternatives_to(word)
}
}

View File

@ -0,0 +1,49 @@
use super::BEU64;
use crate::DocumentId;
use heed::types::{ByteSlice, OwnedType};
use heed::Result as ZResult;
use std::sync::Arc;
#[derive(Copy, Clone)]
pub struct DocsWords {
pub(crate) docs_words: heed::Database<OwnedType<BEU64>, ByteSlice>,
}
impl DocsWords {
pub fn put_doc_words(
self,
writer: &mut heed::RwTxn,
document_id: DocumentId,
words: &fst::Set,
) -> ZResult<()> {
let document_id = BEU64::new(document_id.0);
let bytes = words.as_fst().as_bytes();
self.docs_words.put(writer, &document_id, bytes)
}
pub fn del_doc_words(self, writer: &mut heed::RwTxn, document_id: DocumentId) -> ZResult<bool> {
let document_id = BEU64::new(document_id.0);
self.docs_words.delete(writer, &document_id)
}
pub fn clear(self, writer: &mut heed::RwTxn) -> ZResult<()> {
self.docs_words.clear(writer)
}
pub fn doc_words(
self,
reader: &heed::RoTxn,
document_id: DocumentId,
) -> ZResult<Option<fst::Set>> {
let document_id = BEU64::new(document_id.0);
match self.docs_words.get(reader, &document_id)? {
Some(bytes) => {
let len = bytes.len();
let bytes = Arc::new(bytes.to_owned());
let fst = fst::raw::Fst::from_shared_bytes(bytes, 0, len).unwrap();
Ok(Some(fst::Set::from(fst)))
}
None => Ok(None),
}
}
}

View File

@ -0,0 +1,78 @@
use heed::types::{ByteSlice, OwnedType};
use heed::Result as ZResult;
use meilidb_schema::SchemaAttr;
use super::DocumentAttrKey;
use crate::DocumentId;
#[derive(Copy, Clone)]
pub struct DocumentsFields {
pub(crate) documents_fields: heed::Database<OwnedType<DocumentAttrKey>, ByteSlice>,
}
impl DocumentsFields {
pub fn put_document_field(
self,
writer: &mut heed::RwTxn,
document_id: DocumentId,
attribute: SchemaAttr,
value: &[u8],
) -> ZResult<()> {
let key = DocumentAttrKey::new(document_id, attribute);
self.documents_fields.put(writer, &key, value)
}
pub fn del_all_document_fields(
self,
writer: &mut heed::RwTxn,
document_id: DocumentId,
) -> ZResult<usize> {
let start = DocumentAttrKey::new(document_id, SchemaAttr::min());
let end = DocumentAttrKey::new(document_id, SchemaAttr::max());
self.documents_fields.delete_range(writer, &(start..=end))
}
pub fn clear(self, writer: &mut heed::RwTxn) -> ZResult<()> {
self.documents_fields.clear(writer)
}
pub fn document_attribute<'txn>(
self,
reader: &'txn heed::RoTxn,
document_id: DocumentId,
attribute: SchemaAttr,
) -> ZResult<Option<&'txn [u8]>> {
let key = DocumentAttrKey::new(document_id, attribute);
self.documents_fields.get(reader, &key)
}
pub fn document_fields<'txn>(
self,
reader: &'txn heed::RoTxn,
document_id: DocumentId,
) -> ZResult<DocumentFieldsIter<'txn>> {
let start = DocumentAttrKey::new(document_id, SchemaAttr::min());
let end = DocumentAttrKey::new(document_id, SchemaAttr::max());
let iter = self.documents_fields.range(reader, &(start..=end))?;
Ok(DocumentFieldsIter { iter })
}
}
pub struct DocumentFieldsIter<'txn> {
iter: heed::RoRange<'txn, OwnedType<DocumentAttrKey>, ByteSlice>,
}
impl<'txn> Iterator for DocumentFieldsIter<'txn> {
type Item = ZResult<(SchemaAttr, &'txn [u8])>;
fn next(&mut self) -> Option<Self::Item> {
match self.iter.next() {
Some(Ok((key, bytes))) => {
let attr = SchemaAttr(key.attr.get());
Some(Ok((attr, bytes)))
}
Some(Err(e)) => Some(Err(e)),
None => None,
}
}
}

View File

@ -0,0 +1,142 @@
use super::DocumentAttrKey;
use crate::DocumentId;
use heed::types::OwnedType;
use heed::Result as ZResult;
use meilidb_schema::SchemaAttr;
#[derive(Copy, Clone)]
pub struct DocumentsFieldsCounts {
pub(crate) documents_fields_counts: heed::Database<OwnedType<DocumentAttrKey>, OwnedType<u64>>,
}
impl DocumentsFieldsCounts {
pub fn put_document_field_count(
self,
writer: &mut heed::RwTxn,
document_id: DocumentId,
attribute: SchemaAttr,
value: u64,
) -> ZResult<()> {
let key = DocumentAttrKey::new(document_id, attribute);
self.documents_fields_counts.put(writer, &key, &value)
}
pub fn del_all_document_fields_counts(
self,
writer: &mut heed::RwTxn,
document_id: DocumentId,
) -> ZResult<usize> {
let start = DocumentAttrKey::new(document_id, SchemaAttr::min());
let end = DocumentAttrKey::new(document_id, SchemaAttr::max());
self.documents_fields_counts
.delete_range(writer, &(start..=end))
}
pub fn clear(self, writer: &mut heed::RwTxn) -> ZResult<()> {
self.documents_fields_counts.clear(writer)
}
pub fn document_field_count(
self,
reader: &heed::RoTxn,
document_id: DocumentId,
attribute: SchemaAttr,
) -> ZResult<Option<u64>> {
let key = DocumentAttrKey::new(document_id, attribute);
match self.documents_fields_counts.get(reader, &key)? {
Some(count) => Ok(Some(count)),
None => Ok(None),
}
}
pub fn document_fields_counts<'txn>(
self,
reader: &'txn heed::RoTxn,
document_id: DocumentId,
) -> ZResult<DocumentFieldsCountsIter<'txn>> {
let start = DocumentAttrKey::new(document_id, SchemaAttr::min());
let end = DocumentAttrKey::new(document_id, SchemaAttr::max());
let iter = self.documents_fields_counts.range(reader, &(start..=end))?;
Ok(DocumentFieldsCountsIter { iter })
}
pub fn documents_ids<'txn>(self, reader: &'txn heed::RoTxn) -> ZResult<DocumentsIdsIter<'txn>> {
let iter = self.documents_fields_counts.iter(reader)?;
Ok(DocumentsIdsIter {
last_seen_id: None,
iter,
})
}
pub fn all_documents_fields_counts<'txn>(
self,
reader: &'txn heed::RoTxn,
) -> ZResult<AllDocumentsFieldsCountsIter<'txn>> {
let iter = self.documents_fields_counts.iter(reader)?;
Ok(AllDocumentsFieldsCountsIter { iter })
}
}
pub struct DocumentFieldsCountsIter<'txn> {
iter: heed::RoRange<'txn, OwnedType<DocumentAttrKey>, OwnedType<u64>>,
}
impl Iterator for DocumentFieldsCountsIter<'_> {
type Item = ZResult<(SchemaAttr, u64)>;
fn next(&mut self) -> Option<Self::Item> {
match self.iter.next() {
Some(Ok((key, count))) => {
let attr = SchemaAttr(key.attr.get());
Some(Ok((attr, count)))
}
Some(Err(e)) => Some(Err(e)),
None => None,
}
}
}
pub struct DocumentsIdsIter<'txn> {
last_seen_id: Option<DocumentId>,
iter: heed::RoIter<'txn, OwnedType<DocumentAttrKey>, OwnedType<u64>>,
}
impl Iterator for DocumentsIdsIter<'_> {
type Item = ZResult<DocumentId>;
fn next(&mut self) -> Option<Self::Item> {
for result in &mut self.iter {
match result {
Ok((key, _)) => {
let document_id = DocumentId(key.docid.get());
if Some(document_id) != self.last_seen_id {
self.last_seen_id = Some(document_id);
return Some(Ok(document_id));
}
}
Err(e) => return Some(Err(e)),
}
}
None
}
}
pub struct AllDocumentsFieldsCountsIter<'txn> {
iter: heed::RoIter<'txn, OwnedType<DocumentAttrKey>, OwnedType<u64>>,
}
impl Iterator for AllDocumentsFieldsCountsIter<'_> {
type Item = ZResult<(DocumentId, SchemaAttr, u64)>;
fn next(&mut self) -> Option<Self::Item> {
match self.iter.next() {
Some(Ok((key, count))) => {
let docid = DocumentId(key.docid.get());
let attr = SchemaAttr(key.attr.get());
Some(Ok((docid, attr, count)))
}
Some(Err(e)) => Some(Err(e)),
None => None,
}
}
}

View File

@ -0,0 +1,125 @@
use crate::RankedMap;
use heed::types::{ByteSlice, OwnedType, SerdeBincode, Str};
use heed::Result as ZResult;
use meilidb_schema::Schema;
use std::sync::Arc;
const CUSTOMS_KEY: &str = "customs-key";
const NUMBER_OF_DOCUMENTS_KEY: &str = "number-of-documents";
const RANKED_MAP_KEY: &str = "ranked-map";
const SCHEMA_KEY: &str = "schema";
const SYNONYMS_KEY: &str = "synonyms";
const STOP_WORDS_KEY: &str = "stop-words";
const WORDS_KEY: &str = "words";
#[derive(Copy, Clone)]
pub struct Main {
pub(crate) main: heed::PolyDatabase,
}
impl Main {
pub fn clear(self, writer: &mut heed::RwTxn) -> ZResult<()> {
self.main.clear(writer)
}
pub fn put_words_fst(self, writer: &mut heed::RwTxn, fst: &fst::Set) -> ZResult<()> {
let bytes = fst.as_fst().as_bytes();
self.main.put::<Str, ByteSlice>(writer, WORDS_KEY, bytes)
}
pub fn words_fst(self, reader: &heed::RoTxn) -> ZResult<Option<fst::Set>> {
match self.main.get::<Str, ByteSlice>(reader, WORDS_KEY)? {
Some(bytes) => {
let len = bytes.len();
let bytes = Arc::new(bytes.to_owned());
let fst = fst::raw::Fst::from_shared_bytes(bytes, 0, len).unwrap();
Ok(Some(fst::Set::from(fst)))
}
None => Ok(None),
}
}
pub fn put_schema(self, writer: &mut heed::RwTxn, schema: &Schema) -> ZResult<()> {
self.main
.put::<Str, SerdeBincode<Schema>>(writer, SCHEMA_KEY, schema)
}
pub fn schema(self, reader: &heed::RoTxn) -> ZResult<Option<Schema>> {
self.main
.get::<Str, SerdeBincode<Schema>>(reader, SCHEMA_KEY)
}
pub fn put_ranked_map(self, writer: &mut heed::RwTxn, ranked_map: &RankedMap) -> ZResult<()> {
self.main
.put::<Str, SerdeBincode<RankedMap>>(writer, RANKED_MAP_KEY, &ranked_map)
}
pub fn ranked_map(self, reader: &heed::RoTxn) -> ZResult<Option<RankedMap>> {
self.main
.get::<Str, SerdeBincode<RankedMap>>(reader, RANKED_MAP_KEY)
}
pub fn put_synonyms_fst(self, writer: &mut heed::RwTxn, fst: &fst::Set) -> ZResult<()> {
let bytes = fst.as_fst().as_bytes();
self.main.put::<Str, ByteSlice>(writer, SYNONYMS_KEY, bytes)
}
pub fn synonyms_fst(self, reader: &heed::RoTxn) -> ZResult<Option<fst::Set>> {
match self.main.get::<Str, ByteSlice>(reader, SYNONYMS_KEY)? {
Some(bytes) => {
let len = bytes.len();
let bytes = Arc::new(bytes.to_owned());
let fst = fst::raw::Fst::from_shared_bytes(bytes, 0, len).unwrap();
Ok(Some(fst::Set::from(fst)))
}
None => Ok(None),
}
}
pub fn put_stop_words_fst(self, writer: &mut heed::RwTxn, fst: &fst::Set) -> ZResult<()> {
let bytes = fst.as_fst().as_bytes();
self.main
.put::<Str, ByteSlice>(writer, STOP_WORDS_KEY, bytes)
}
pub fn stop_words_fst(self, reader: &heed::RoTxn) -> ZResult<Option<fst::Set>> {
match self.main.get::<Str, ByteSlice>(reader, STOP_WORDS_KEY)? {
Some(bytes) => {
let len = bytes.len();
let bytes = Arc::new(bytes.to_owned());
let fst = fst::raw::Fst::from_shared_bytes(bytes, 0, len).unwrap();
Ok(Some(fst::Set::from(fst)))
}
None => Ok(None),
}
}
pub fn put_number_of_documents<F>(self, writer: &mut heed::RwTxn, f: F) -> ZResult<u64>
where
F: Fn(u64) -> u64,
{
let new = self.number_of_documents(writer).map(f)?;
self.main
.put::<Str, OwnedType<u64>>(writer, NUMBER_OF_DOCUMENTS_KEY, &new)?;
Ok(new)
}
pub fn number_of_documents(self, reader: &heed::RoTxn) -> ZResult<u64> {
match self
.main
.get::<Str, OwnedType<u64>>(reader, NUMBER_OF_DOCUMENTS_KEY)?
{
Some(value) => Ok(value),
None => Ok(0),
}
}
pub fn put_customs(self, writer: &mut heed::RwTxn, customs: &[u8]) -> ZResult<()> {
self.main
.put::<Str, ByteSlice>(writer, CUSTOMS_KEY, customs)
}
pub fn customs<'txn>(self, reader: &'txn heed::RoTxn) -> ZResult<Option<&'txn [u8]>> {
self.main.get::<Str, ByteSlice>(reader, CUSTOMS_KEY)
}
}

View File

@ -0,0 +1,397 @@
mod docs_words;
mod documents_fields;
mod documents_fields_counts;
mod main;
mod postings_lists;
mod synonyms;
mod updates;
mod updates_results;
pub use self::docs_words::DocsWords;
pub use self::documents_fields::{DocumentFieldsIter, DocumentsFields};
pub use self::documents_fields_counts::{
DocumentFieldsCountsIter, DocumentsFieldsCounts, DocumentsIdsIter,
};
pub use self::main::Main;
pub use self::postings_lists::PostingsLists;
pub use self::synonyms::Synonyms;
pub use self::updates::Updates;
pub use self::updates_results::UpdatesResults;
use std::collections::HashSet;
use heed::Result as ZResult;
use meilidb_schema::{Schema, SchemaAttr};
use serde::de::{self, Deserialize};
use zerocopy::{AsBytes, FromBytes};
use crate::criterion::Criteria;
use crate::database::{UpdateEvent, UpdateEventsEmitter};
use crate::serde::Deserializer;
use crate::{query_builder::QueryBuilder, update, DocumentId, Error, MResult};
type BEU64 = zerocopy::U64<byteorder::BigEndian>;
type BEU16 = zerocopy::U16<byteorder::BigEndian>;
#[derive(Debug, Copy, Clone, AsBytes, FromBytes)]
#[repr(C)]
pub struct DocumentAttrKey {
docid: BEU64,
attr: BEU16,
}
impl DocumentAttrKey {
fn new(docid: DocumentId, attr: SchemaAttr) -> DocumentAttrKey {
DocumentAttrKey {
docid: BEU64::new(docid.0),
attr: BEU16::new(attr.0),
}
}
}
fn main_name(name: &str) -> String {
format!("store-{}", name)
}
fn postings_lists_name(name: &str) -> String {
format!("store-{}-postings-lists", name)
}
fn documents_fields_name(name: &str) -> String {
format!("store-{}-documents-fields", name)
}
fn documents_fields_counts_name(name: &str) -> String {
format!("store-{}-documents-fields-counts", name)
}
fn synonyms_name(name: &str) -> String {
format!("store-{}-synonyms", name)
}
fn docs_words_name(name: &str) -> String {
format!("store-{}-docs-words", name)
}
fn updates_name(name: &str) -> String {
format!("store-{}-updates", name)
}
fn updates_results_name(name: &str) -> String {
format!("store-{}-updates-results", name)
}
#[derive(Clone)]
pub struct Index {
pub main: Main,
pub postings_lists: PostingsLists,
pub documents_fields: DocumentsFields,
pub documents_fields_counts: DocumentsFieldsCounts,
pub synonyms: Synonyms,
pub docs_words: DocsWords,
pub updates: Updates,
pub updates_results: UpdatesResults,
updates_notifier: UpdateEventsEmitter,
}
impl Index {
pub fn document<T: de::DeserializeOwned>(
&self,
reader: &heed::RoTxn,
attributes: Option<&HashSet<&str>>,
document_id: DocumentId,
) -> MResult<Option<T>> {
let schema = self.main.schema(reader)?;
let schema = schema.ok_or(Error::SchemaMissing)?;
let attributes = match attributes {
Some(attributes) => attributes
.iter()
.map(|name| schema.attribute(name))
.collect(),
None => None,
};
let mut deserializer = Deserializer {
document_id,
reader,
documents_fields: self.documents_fields,
schema: &schema,
attributes: attributes.as_ref(),
};
Ok(Option::<T>::deserialize(&mut deserializer)?)
}
pub fn document_attribute<T: de::DeserializeOwned>(
&self,
reader: &heed::RoTxn,
document_id: DocumentId,
attribute: SchemaAttr,
) -> MResult<Option<T>> {
let bytes = self
.documents_fields
.document_attribute(reader, document_id, attribute)?;
match bytes {
Some(bytes) => Ok(Some(serde_json::from_slice(bytes)?)),
None => Ok(None),
}
}
pub fn schema_update(&self, writer: &mut heed::RwTxn, schema: Schema) -> MResult<u64> {
let _ = self.updates_notifier.send(UpdateEvent::NewUpdate);
update::push_schema_update(writer, self.updates, self.updates_results, schema)
}
pub fn customs_update(&self, writer: &mut heed::RwTxn, customs: Vec<u8>) -> ZResult<u64> {
let _ = self.updates_notifier.send(UpdateEvent::NewUpdate);
update::push_customs_update(writer, self.updates, self.updates_results, customs)
}
pub fn documents_addition<D>(&self) -> update::DocumentsAddition<D> {
update::DocumentsAddition::new(
self.updates,
self.updates_results,
self.updates_notifier.clone(),
)
}
pub fn documents_partial_addition<D>(&self) -> update::DocumentsAddition<D> {
update::DocumentsAddition::new_partial(
self.updates,
self.updates_results,
self.updates_notifier.clone(),
)
}
pub fn documents_deletion(&self) -> update::DocumentsDeletion {
update::DocumentsDeletion::new(
self.updates,
self.updates_results,
self.updates_notifier.clone(),
)
}
pub fn clear_all(&self, writer: &mut heed::RwTxn) -> MResult<u64> {
let _ = self.updates_notifier.send(UpdateEvent::NewUpdate);
update::push_clear_all(writer, self.updates, self.updates_results)
}
pub fn synonyms_addition(&self) -> update::SynonymsAddition {
update::SynonymsAddition::new(
self.updates,
self.updates_results,
self.updates_notifier.clone(),
)
}
pub fn synonyms_deletion(&self) -> update::SynonymsDeletion {
update::SynonymsDeletion::new(
self.updates,
self.updates_results,
self.updates_notifier.clone(),
)
}
pub fn stop_words_addition(&self) -> update::StopWordsAddition {
update::StopWordsAddition::new(
self.updates,
self.updates_results,
self.updates_notifier.clone(),
)
}
pub fn stop_words_deletion(&self) -> update::StopWordsDeletion {
update::StopWordsDeletion::new(
self.updates,
self.updates_results,
self.updates_notifier.clone(),
)
}
pub fn current_update_id(&self, reader: &heed::RoTxn) -> MResult<Option<u64>> {
match self.updates.last_update_id(reader)? {
Some((id, _)) => Ok(Some(id)),
None => Ok(None),
}
}
pub fn update_status(
&self,
reader: &heed::RoTxn,
update_id: u64,
) -> MResult<Option<update::UpdateStatus>> {
update::update_status(reader, self.updates, self.updates_results, update_id)
}
pub fn all_updates_status(&self, reader: &heed::RoTxn) -> MResult<Vec<update::UpdateStatus>> {
let mut updates = Vec::new();
let mut last_update_result_id = 0;
// retrieve all updates results
if let Some((last_id, _)) = self.updates_results.last_update_id(reader)? {
updates.reserve(last_id as usize);
for id in 0..=last_id {
if let Some(update) = self.update_status(reader, id)? {
updates.push(update);
last_update_result_id = id;
}
}
}
// retrieve all enqueued updates
if let Some((last_id, _)) = self.updates.last_update_id(reader)? {
for id in last_update_result_id + 1..=last_id {
if let Some(update) = self.update_status(reader, id)? {
updates.push(update);
}
}
}
Ok(updates)
}
pub fn query_builder(&self) -> QueryBuilder {
QueryBuilder::new(
self.main,
self.postings_lists,
self.documents_fields_counts,
self.synonyms,
)
}
pub fn query_builder_with_criteria<'c, 'f, 'd>(
&self,
criteria: Criteria<'c>,
) -> QueryBuilder<'c, 'f, 'd> {
QueryBuilder::with_criteria(
self.main,
self.postings_lists,
self.documents_fields_counts,
self.synonyms,
criteria,
)
}
}
pub fn create(
env: &heed::Env,
name: &str,
updates_notifier: UpdateEventsEmitter,
) -> MResult<Index> {
// create all the store names
let main_name = main_name(name);
let postings_lists_name = postings_lists_name(name);
let documents_fields_name = documents_fields_name(name);
let documents_fields_counts_name = documents_fields_counts_name(name);
let synonyms_name = synonyms_name(name);
let docs_words_name = docs_words_name(name);
let updates_name = updates_name(name);
let updates_results_name = updates_results_name(name);
// open all the stores
let main = env.create_poly_database(Some(&main_name))?;
let postings_lists = env.create_database(Some(&postings_lists_name))?;
let documents_fields = env.create_database(Some(&documents_fields_name))?;
let documents_fields_counts = env.create_database(Some(&documents_fields_counts_name))?;
let synonyms = env.create_database(Some(&synonyms_name))?;
let docs_words = env.create_database(Some(&docs_words_name))?;
let updates = env.create_database(Some(&updates_name))?;
let updates_results = env.create_database(Some(&updates_results_name))?;
Ok(Index {
main: Main { main },
postings_lists: PostingsLists { postings_lists },
documents_fields: DocumentsFields { documents_fields },
documents_fields_counts: DocumentsFieldsCounts {
documents_fields_counts,
},
synonyms: Synonyms { synonyms },
docs_words: DocsWords { docs_words },
updates: Updates { updates },
updates_results: UpdatesResults { updates_results },
updates_notifier,
})
}
pub fn open(
env: &heed::Env,
name: &str,
updates_notifier: UpdateEventsEmitter,
) -> MResult<Option<Index>> {
// create all the store names
let main_name = main_name(name);
let postings_lists_name = postings_lists_name(name);
let documents_fields_name = documents_fields_name(name);
let documents_fields_counts_name = documents_fields_counts_name(name);
let synonyms_name = synonyms_name(name);
let docs_words_name = docs_words_name(name);
let updates_name = updates_name(name);
let updates_results_name = updates_results_name(name);
// open all the stores
let main = match env.open_poly_database(Some(&main_name))? {
Some(main) => main,
None => return Ok(None),
};
let postings_lists = match env.open_database(Some(&postings_lists_name))? {
Some(postings_lists) => postings_lists,
None => return Ok(None),
};
let documents_fields = match env.open_database(Some(&documents_fields_name))? {
Some(documents_fields) => documents_fields,
None => return Ok(None),
};
let documents_fields_counts = match env.open_database(Some(&documents_fields_counts_name))? {
Some(documents_fields_counts) => documents_fields_counts,
None => return Ok(None),
};
let synonyms = match env.open_database(Some(&synonyms_name))? {
Some(synonyms) => synonyms,
None => return Ok(None),
};
let docs_words = match env.open_database(Some(&docs_words_name))? {
Some(docs_words) => docs_words,
None => return Ok(None),
};
let updates = match env.open_database(Some(&updates_name))? {
Some(updates) => updates,
None => return Ok(None),
};
let updates_results = match env.open_database(Some(&updates_results_name))? {
Some(updates_results) => updates_results,
None => return Ok(None),
};
Ok(Some(Index {
main: Main { main },
postings_lists: PostingsLists { postings_lists },
documents_fields: DocumentsFields { documents_fields },
documents_fields_counts: DocumentsFieldsCounts {
documents_fields_counts,
},
synonyms: Synonyms { synonyms },
docs_words: DocsWords { docs_words },
updates: Updates { updates },
updates_results: UpdatesResults { updates_results },
updates_notifier,
}))
}
pub fn clear(writer: &mut heed::RwTxn, index: &Index) -> MResult<()> {
// send a stop event to the update loop of the index
index.updates_notifier.send(UpdateEvent::MustStop).unwrap();
// clear all the stores
index.main.clear(writer)?;
index.postings_lists.clear(writer)?;
index.documents_fields.clear(writer)?;
index.documents_fields_counts.clear(writer)?;
index.synonyms.clear(writer)?;
index.docs_words.clear(writer)?;
index.updates.clear(writer)?;
index.updates_results.clear(writer)?;
Ok(())
}

View File

@ -0,0 +1,41 @@
use crate::DocIndex;
use heed::types::{ByteSlice, CowSlice};
use heed::Result as ZResult;
use sdset::{Set, SetBuf};
use std::borrow::Cow;
#[derive(Copy, Clone)]
pub struct PostingsLists {
pub(crate) postings_lists: heed::Database<ByteSlice, CowSlice<DocIndex>>,
}
impl PostingsLists {
pub fn put_postings_list(
self,
writer: &mut heed::RwTxn,
word: &[u8],
words_indexes: &Set<DocIndex>,
) -> ZResult<()> {
self.postings_lists.put(writer, word, words_indexes)
}
pub fn del_postings_list(self, writer: &mut heed::RwTxn, word: &[u8]) -> ZResult<bool> {
self.postings_lists.delete(writer, word)
}
pub fn clear(self, writer: &mut heed::RwTxn) -> ZResult<()> {
self.postings_lists.clear(writer)
}
pub fn postings_list<'txn>(
self,
reader: &'txn heed::RoTxn,
word: &[u8],
) -> ZResult<Option<Cow<'txn, Set<DocIndex>>>> {
match self.postings_lists.get(reader, word)? {
Some(Cow::Borrowed(slice)) => Ok(Some(Cow::Borrowed(Set::new_unchecked(slice)))),
Some(Cow::Owned(vec)) => Ok(Some(Cow::Owned(SetBuf::new_unchecked(vec)))),
None => Ok(None),
}
}
}

View File

@ -0,0 +1,40 @@
use heed::types::ByteSlice;
use heed::Result as ZResult;
use std::sync::Arc;
#[derive(Copy, Clone)]
pub struct Synonyms {
pub(crate) synonyms: heed::Database<ByteSlice, ByteSlice>,
}
impl Synonyms {
pub fn put_synonyms(
self,
writer: &mut heed::RwTxn,
word: &[u8],
synonyms: &fst::Set,
) -> ZResult<()> {
let bytes = synonyms.as_fst().as_bytes();
self.synonyms.put(writer, word, bytes)
}
pub fn del_synonyms(self, writer: &mut heed::RwTxn, word: &[u8]) -> ZResult<bool> {
self.synonyms.delete(writer, word)
}
pub fn clear(self, writer: &mut heed::RwTxn) -> ZResult<()> {
self.synonyms.clear(writer)
}
pub fn synonyms(self, reader: &heed::RoTxn, word: &[u8]) -> ZResult<Option<fst::Set>> {
match self.synonyms.get(reader, word)? {
Some(bytes) => {
let len = bytes.len();
let bytes = Arc::new(bytes.to_owned());
let fst = fst::raw::Fst::from_shared_bytes(bytes, 0, len).unwrap();
Ok(Some(fst::Set::from(fst)))
}
None => Ok(None),
}
}
}

View File

@ -0,0 +1,59 @@
use super::BEU64;
use crate::update::Update;
use heed::types::{OwnedType, SerdeJson};
use heed::Result as ZResult;
#[derive(Copy, Clone)]
pub struct Updates {
pub(crate) updates: heed::Database<OwnedType<BEU64>, SerdeJson<Update>>,
}
impl Updates {
// TODO do not trigger deserialize if possible
pub fn last_update_id(self, reader: &heed::RoTxn) -> ZResult<Option<(u64, Update)>> {
match self.updates.last(reader)? {
Some((key, data)) => Ok(Some((key.get(), data))),
None => Ok(None),
}
}
// TODO do not trigger deserialize if possible
fn first_update_id(self, reader: &heed::RoTxn) -> ZResult<Option<(u64, Update)>> {
match self.updates.first(reader)? {
Some((key, data)) => Ok(Some((key.get(), data))),
None => Ok(None),
}
}
// TODO do not trigger deserialize if possible
pub fn get(self, reader: &heed::RoTxn, update_id: u64) -> ZResult<Option<Update>> {
let update_id = BEU64::new(update_id);
self.updates.get(reader, &update_id)
}
pub fn put_update(
self,
writer: &mut heed::RwTxn,
update_id: u64,
update: &Update,
) -> ZResult<()> {
// TODO prefer using serde_json?
let update_id = BEU64::new(update_id);
self.updates.put(writer, &update_id, update)
}
pub fn pop_front(self, writer: &mut heed::RwTxn) -> ZResult<Option<(u64, Update)>> {
match self.first_update_id(writer)? {
Some((update_id, update)) => {
let key = BEU64::new(update_id);
self.updates.delete(writer, &key)?;
Ok(Some((update_id, update)))
}
None => Ok(None),
}
}
pub fn clear(self, writer: &mut heed::RwTxn) -> ZResult<()> {
self.updates.clear(writer)
}
}

View File

@ -0,0 +1,44 @@
use super::BEU64;
use crate::update::ProcessedUpdateResult;
use heed::types::{OwnedType, SerdeJson};
use heed::Result as ZResult;
#[derive(Copy, Clone)]
pub struct UpdatesResults {
pub(crate) updates_results: heed::Database<OwnedType<BEU64>, SerdeJson<ProcessedUpdateResult>>,
}
impl UpdatesResults {
pub fn last_update_id(
self,
reader: &heed::RoTxn,
) -> ZResult<Option<(u64, ProcessedUpdateResult)>> {
match self.updates_results.last(reader)? {
Some((key, data)) => Ok(Some((key.get(), data))),
None => Ok(None),
}
}
pub fn put_update_result(
self,
writer: &mut heed::RwTxn,
update_id: u64,
update_result: &ProcessedUpdateResult,
) -> ZResult<()> {
let update_id = BEU64::new(update_id);
self.updates_results.put(writer, &update_id, update_result)
}
pub fn update_result(
self,
reader: &heed::RoTxn,
update_id: u64,
) -> ZResult<Option<ProcessedUpdateResult>> {
let update_id = BEU64::new(update_id);
self.updates_results.get(reader, &update_id)
}
pub fn clear(self, writer: &mut heed::RwTxn) -> ZResult<()> {
self.updates_results.clear(writer)
}
}

View File

@ -0,0 +1,33 @@
use crate::update::{next_update_id, Update};
use crate::{store, MResult, RankedMap};
pub fn apply_clear_all(
writer: &mut heed::RwTxn,
main_store: store::Main,
documents_fields_store: store::DocumentsFields,
documents_fields_counts_store: store::DocumentsFieldsCounts,
postings_lists_store: store::PostingsLists,
docs_words_store: store::DocsWords,
) -> MResult<()> {
main_store.put_words_fst(writer, &fst::Set::default())?;
main_store.put_ranked_map(writer, &RankedMap::default())?;
main_store.put_number_of_documents(writer, |_| 0)?;
documents_fields_store.clear(writer)?;
documents_fields_counts_store.clear(writer)?;
postings_lists_store.clear(writer)?;
docs_words_store.clear(writer)?;
Ok(())
}
pub fn push_clear_all(
writer: &mut heed::RwTxn,
updates_store: store::Updates,
updates_results_store: store::UpdatesResults,
) -> MResult<u64> {
let last_update_id = next_update_id(writer, updates_store, updates_results_store)?;
let update = Update::clear_all();
updates_store.put_update(writer, last_update_id, &update)?;
Ok(last_update_id)
}

View File

@ -0,0 +1,25 @@
use crate::store;
use crate::update::{next_update_id, Update};
use heed::Result as ZResult;
pub fn apply_customs_update(
writer: &mut heed::RwTxn,
main_store: store::Main,
customs: &[u8],
) -> ZResult<()> {
main_store.put_customs(writer, customs)
}
pub fn push_customs_update(
writer: &mut heed::RwTxn,
updates_store: store::Updates,
updates_results_store: store::UpdatesResults,
customs: Vec<u8>,
) -> ZResult<u64> {
let last_update_id = next_update_id(writer, updates_store, updates_results_store)?;
let update = Update::customs(customs);
updates_store.put_update(writer, last_update_id, &update)?;
Ok(last_update_id)
}

View File

@ -0,0 +1,410 @@
use std::collections::HashMap;
use fst::{set::OpBuilder, SetBuilder};
use sdset::{duo::Union, SetOperation};
use serde::{Deserialize, Serialize};
use crate::database::{UpdateEvent, UpdateEventsEmitter};
use crate::raw_indexer::RawIndexer;
use crate::serde::{extract_document_id, serialize_value, Deserializer, Serializer};
use crate::store;
use crate::update::{apply_documents_deletion, next_update_id, Update};
use crate::{Error, MResult, RankedMap};
pub struct DocumentsAddition<D> {
updates_store: store::Updates,
updates_results_store: store::UpdatesResults,
updates_notifier: UpdateEventsEmitter,
documents: Vec<D>,
is_partial: bool,
}
impl<D> DocumentsAddition<D> {
pub fn new(
updates_store: store::Updates,
updates_results_store: store::UpdatesResults,
updates_notifier: UpdateEventsEmitter,
) -> DocumentsAddition<D> {
DocumentsAddition {
updates_store,
updates_results_store,
updates_notifier,
documents: Vec::new(),
is_partial: false,
}
}
pub fn new_partial(
updates_store: store::Updates,
updates_results_store: store::UpdatesResults,
updates_notifier: UpdateEventsEmitter,
) -> DocumentsAddition<D> {
DocumentsAddition {
updates_store,
updates_results_store,
updates_notifier,
documents: Vec::new(),
is_partial: true,
}
}
pub fn update_document(&mut self, document: D) {
self.documents.push(document);
}
pub fn finalize(self, writer: &mut heed::RwTxn) -> MResult<u64>
where
D: serde::Serialize,
{
let _ = self.updates_notifier.send(UpdateEvent::NewUpdate);
let update_id = push_documents_addition(
writer,
self.updates_store,
self.updates_results_store,
self.documents,
self.is_partial,
)?;
Ok(update_id)
}
}
impl<D> Extend<D> for DocumentsAddition<D> {
fn extend<T: IntoIterator<Item = D>>(&mut self, iter: T) {
self.documents.extend(iter)
}
}
pub fn push_documents_addition<D: serde::Serialize>(
writer: &mut heed::RwTxn,
updates_store: store::Updates,
updates_results_store: store::UpdatesResults,
addition: Vec<D>,
is_partial: bool,
) -> MResult<u64> {
let mut values = Vec::with_capacity(addition.len());
for add in addition {
let vec = serde_json::to_vec(&add)?;
let add = serde_json::from_slice(&vec)?;
values.push(add);
}
let last_update_id = next_update_id(writer, updates_store, updates_results_store)?;
let update = if is_partial {
Update::documents_partial(values)
} else {
Update::documents_addition(values)
};
updates_store.put_update(writer, last_update_id, &update)?;
Ok(last_update_id)
}
pub fn apply_documents_addition<'a, 'b>(
writer: &'a mut heed::RwTxn<'b>,
main_store: store::Main,
documents_fields_store: store::DocumentsFields,
documents_fields_counts_store: store::DocumentsFieldsCounts,
postings_lists_store: store::PostingsLists,
docs_words_store: store::DocsWords,
addition: Vec<HashMap<String, serde_json::Value>>,
) -> MResult<()> {
let mut documents_additions = HashMap::new();
let schema = match main_store.schema(writer)? {
Some(schema) => schema,
None => return Err(Error::SchemaMissing),
};
let identifier = schema.identifier_name();
// 1. store documents ids for future deletion
for document in addition {
let document_id = match extract_document_id(identifier, &document)? {
Some(id) => id,
None => return Err(Error::MissingDocumentId),
};
documents_additions.insert(document_id, document);
}
// 2. remove the documents posting lists
let number_of_inserted_documents = documents_additions.len();
let documents_ids = documents_additions.iter().map(|(id, _)| *id).collect();
apply_documents_deletion(
writer,
main_store,
documents_fields_store,
documents_fields_counts_store,
postings_lists_store,
docs_words_store,
documents_ids,
)?;
let mut ranked_map = match main_store.ranked_map(writer)? {
Some(ranked_map) => ranked_map,
None => RankedMap::default(),
};
let stop_words = match main_store.stop_words_fst(writer)? {
Some(stop_words) => stop_words,
None => fst::Set::default(),
};
// 3. index the documents fields in the stores
let mut indexer = RawIndexer::new(stop_words);
for (document_id, document) in documents_additions {
let serializer = Serializer {
txn: writer,
schema: &schema,
document_store: documents_fields_store,
document_fields_counts: documents_fields_counts_store,
indexer: &mut indexer,
ranked_map: &mut ranked_map,
document_id,
};
document.serialize(serializer)?;
}
write_documents_addition_index(
writer,
main_store,
postings_lists_store,
docs_words_store,
&ranked_map,
number_of_inserted_documents,
indexer,
)
}
pub fn apply_documents_partial_addition<'a, 'b>(
writer: &'a mut heed::RwTxn<'b>,
main_store: store::Main,
documents_fields_store: store::DocumentsFields,
documents_fields_counts_store: store::DocumentsFieldsCounts,
postings_lists_store: store::PostingsLists,
docs_words_store: store::DocsWords,
addition: Vec<HashMap<String, serde_json::Value>>,
) -> MResult<()> {
let mut documents_additions = HashMap::new();
let schema = match main_store.schema(writer)? {
Some(schema) => schema,
None => return Err(Error::SchemaMissing),
};
let identifier = schema.identifier_name();
// 1. store documents ids for future deletion
for mut document in addition {
let document_id = match extract_document_id(identifier, &document)? {
Some(id) => id,
None => return Err(Error::MissingDocumentId),
};
let mut deserializer = Deserializer {
document_id,
reader: writer,
documents_fields: documents_fields_store,
schema: &schema,
attributes: None,
};
// retrieve the old document and
// update the new one with missing keys found in the old one
let result = Option::<HashMap<String, serde_json::Value>>::deserialize(&mut deserializer)?;
if let Some(old_document) = result {
for (key, value) in old_document {
document.entry(key).or_insert(value);
}
}
documents_additions.insert(document_id, document);
}
// 2. remove the documents posting lists
let number_of_inserted_documents = documents_additions.len();
let documents_ids = documents_additions.iter().map(|(id, _)| *id).collect();
apply_documents_deletion(
writer,
main_store,
documents_fields_store,
documents_fields_counts_store,
postings_lists_store,
docs_words_store,
documents_ids,
)?;
let mut ranked_map = match main_store.ranked_map(writer)? {
Some(ranked_map) => ranked_map,
None => RankedMap::default(),
};
let stop_words = match main_store.stop_words_fst(writer)? {
Some(stop_words) => stop_words,
None => fst::Set::default(),
};
// 3. index the documents fields in the stores
let mut indexer = RawIndexer::new(stop_words);
for (document_id, document) in documents_additions {
let serializer = Serializer {
txn: writer,
schema: &schema,
document_store: documents_fields_store,
document_fields_counts: documents_fields_counts_store,
indexer: &mut indexer,
ranked_map: &mut ranked_map,
document_id,
};
document.serialize(serializer)?;
}
write_documents_addition_index(
writer,
main_store,
postings_lists_store,
docs_words_store,
&ranked_map,
number_of_inserted_documents,
indexer,
)
}
pub fn reindex_all_documents(
writer: &mut heed::RwTxn,
main_store: store::Main,
documents_fields_store: store::DocumentsFields,
documents_fields_counts_store: store::DocumentsFieldsCounts,
postings_lists_store: store::PostingsLists,
docs_words_store: store::DocsWords,
) -> MResult<()> {
let schema = match main_store.schema(writer)? {
Some(schema) => schema,
None => return Err(Error::SchemaMissing),
};
let mut ranked_map = RankedMap::default();
// 1. retrieve all documents ids
let mut documents_ids_to_reindex = Vec::new();
for result in documents_fields_counts_store.documents_ids(writer)? {
let document_id = result?;
documents_ids_to_reindex.push(document_id);
}
// 2. remove the documents posting lists
main_store.put_words_fst(writer, &fst::Set::default())?;
main_store.put_ranked_map(writer, &ranked_map)?;
main_store.put_number_of_documents(writer, |_| 0)?;
postings_lists_store.clear(writer)?;
docs_words_store.clear(writer)?;
// 3. re-index chunks of documents (otherwise we make the borrow checker unhappy)
for documents_ids in documents_ids_to_reindex.chunks(100) {
let stop_words = match main_store.stop_words_fst(writer)? {
Some(stop_words) => stop_words,
None => fst::Set::default(),
};
let number_of_inserted_documents = documents_ids.len();
let mut indexer = RawIndexer::new(stop_words);
let mut ram_store = HashMap::new();
for document_id in documents_ids {
for result in documents_fields_store.document_fields(writer, *document_id)? {
let (attr, bytes) = result?;
let value: serde_json::Value = serde_json::from_slice(bytes)?;
ram_store.insert((document_id, attr), value);
}
for ((docid, attr), value) in ram_store.drain() {
serialize_value(
writer,
attr,
schema.props(attr),
*docid,
documents_fields_store,
documents_fields_counts_store,
&mut indexer,
&mut ranked_map,
&value,
)?;
}
}
// 4. write the new index in the main store
write_documents_addition_index(
writer,
main_store,
postings_lists_store,
docs_words_store,
&ranked_map,
number_of_inserted_documents,
indexer,
)?;
}
Ok(())
}
pub fn write_documents_addition_index(
writer: &mut heed::RwTxn,
main_store: store::Main,
postings_lists_store: store::PostingsLists,
docs_words_store: store::DocsWords,
ranked_map: &RankedMap,
number_of_inserted_documents: usize,
indexer: RawIndexer,
) -> MResult<()> {
let indexed = indexer.build();
let mut delta_words_builder = SetBuilder::memory();
for (word, delta_set) in indexed.words_doc_indexes {
delta_words_builder.insert(&word).unwrap();
let set = match postings_lists_store.postings_list(writer, &word)? {
Some(set) => Union::new(&set, &delta_set).into_set_buf(),
None => delta_set,
};
postings_lists_store.put_postings_list(writer, &word, &set)?;
}
for (id, words) in indexed.docs_words {
docs_words_store.put_doc_words(writer, id, &words)?;
}
let delta_words = delta_words_builder
.into_inner()
.and_then(fst::Set::from_bytes)
.unwrap();
let words = match main_store.words_fst(writer)? {
Some(words) => {
let op = OpBuilder::new()
.add(words.stream())
.add(delta_words.stream())
.r#union();
let mut words_builder = SetBuilder::memory();
words_builder.extend_stream(op).unwrap();
words_builder
.into_inner()
.and_then(fst::Set::from_bytes)
.unwrap()
}
None => delta_words,
};
main_store.put_words_fst(writer, &words)?;
main_store.put_ranked_map(writer, ranked_map)?;
main_store.put_number_of_documents(writer, |old| old + number_of_inserted_documents as u64)?;
Ok(())
}

View File

@ -0,0 +1,192 @@
use std::collections::{BTreeSet, HashMap, HashSet};
use fst::{SetBuilder, Streamer};
use meilidb_schema::Schema;
use sdset::{duo::DifferenceByKey, SetBuf, SetOperation};
use crate::database::{UpdateEvent, UpdateEventsEmitter};
use crate::serde::extract_document_id;
use crate::store;
use crate::update::{next_update_id, Update};
use crate::{DocumentId, Error, MResult, RankedMap};
pub struct DocumentsDeletion {
updates_store: store::Updates,
updates_results_store: store::UpdatesResults,
updates_notifier: UpdateEventsEmitter,
documents: Vec<DocumentId>,
}
impl DocumentsDeletion {
pub fn new(
updates_store: store::Updates,
updates_results_store: store::UpdatesResults,
updates_notifier: UpdateEventsEmitter,
) -> DocumentsDeletion {
DocumentsDeletion {
updates_store,
updates_results_store,
updates_notifier,
documents: Vec::new(),
}
}
pub fn delete_document_by_id(&mut self, document_id: DocumentId) {
self.documents.push(document_id);
}
pub fn delete_document<D>(&mut self, schema: &Schema, document: D) -> MResult<()>
where
D: serde::Serialize,
{
let identifier = schema.identifier_name();
let document_id = match extract_document_id(identifier, &document)? {
Some(id) => id,
None => return Err(Error::MissingDocumentId),
};
self.delete_document_by_id(document_id);
Ok(())
}
pub fn finalize(self, writer: &mut heed::RwTxn) -> MResult<u64> {
let _ = self.updates_notifier.send(UpdateEvent::NewUpdate);
let update_id = push_documents_deletion(
writer,
self.updates_store,
self.updates_results_store,
self.documents,
)?;
Ok(update_id)
}
}
impl Extend<DocumentId> for DocumentsDeletion {
fn extend<T: IntoIterator<Item = DocumentId>>(&mut self, iter: T) {
self.documents.extend(iter)
}
}
pub fn push_documents_deletion(
writer: &mut heed::RwTxn,
updates_store: store::Updates,
updates_results_store: store::UpdatesResults,
deletion: Vec<DocumentId>,
) -> MResult<u64> {
let last_update_id = next_update_id(writer, updates_store, updates_results_store)?;
let update = Update::documents_deletion(deletion);
updates_store.put_update(writer, last_update_id, &update)?;
Ok(last_update_id)
}
pub fn apply_documents_deletion(
writer: &mut heed::RwTxn,
main_store: store::Main,
documents_fields_store: store::DocumentsFields,
documents_fields_counts_store: store::DocumentsFieldsCounts,
postings_lists_store: store::PostingsLists,
docs_words_store: store::DocsWords,
deletion: Vec<DocumentId>,
) -> MResult<()> {
let idset = SetBuf::from_dirty(deletion);
let schema = match main_store.schema(writer)? {
Some(schema) => schema,
None => return Err(Error::SchemaMissing),
};
let mut ranked_map = match main_store.ranked_map(writer)? {
Some(ranked_map) => ranked_map,
None => RankedMap::default(),
};
// collect the ranked attributes according to the schema
let ranked_attrs: Vec<_> = schema
.iter()
.filter_map(
|(_, attr, prop)| {
if prop.is_ranked() {
Some(attr)
} else {
None
}
},
)
.collect();
let mut words_document_ids = HashMap::new();
for id in idset {
// remove all the ranked attributes from the ranked_map
for ranked_attr in &ranked_attrs {
ranked_map.remove(id, *ranked_attr);
}
if let Some(words) = docs_words_store.doc_words(writer, id)? {
let mut stream = words.stream();
while let Some(word) = stream.next() {
let word = word.to_vec();
words_document_ids
.entry(word)
.or_insert_with(Vec::new)
.push(id);
}
}
}
let mut deleted_documents = HashSet::new();
let mut removed_words = BTreeSet::new();
for (word, document_ids) in words_document_ids {
let document_ids = SetBuf::from_dirty(document_ids);
if let Some(doc_indexes) = postings_lists_store.postings_list(writer, &word)? {
let op = DifferenceByKey::new(&doc_indexes, &document_ids, |d| d.document_id, |id| *id);
let doc_indexes = op.into_set_buf();
if !doc_indexes.is_empty() {
postings_lists_store.put_postings_list(writer, &word, &doc_indexes)?;
} else {
postings_lists_store.del_postings_list(writer, &word)?;
removed_words.insert(word);
}
}
for id in document_ids {
documents_fields_counts_store.del_all_document_fields_counts(writer, id)?;
if documents_fields_store.del_all_document_fields(writer, id)? != 0 {
deleted_documents.insert(id);
}
}
}
let deleted_documents_len = deleted_documents.len() as u64;
for id in deleted_documents {
docs_words_store.del_doc_words(writer, id)?;
}
let removed_words = fst::Set::from_iter(removed_words).unwrap();
let words = match main_store.words_fst(writer)? {
Some(words_set) => {
let op = fst::set::OpBuilder::new()
.add(words_set.stream())
.add(removed_words.stream())
.difference();
let mut words_builder = SetBuilder::memory();
words_builder.extend_stream(op).unwrap();
words_builder
.into_inner()
.and_then(fst::Set::from_bytes)
.unwrap()
}
None => fst::Set::default(),
};
main_store.put_words_fst(writer, &words)?;
main_store.put_ranked_map(writer, &ranked_map)?;
main_store.put_number_of_documents(writer, |old| old - deleted_documents_len)?;
Ok(())
}

View File

@ -0,0 +1,420 @@
mod clear_all;
mod customs_update;
mod documents_addition;
mod documents_deletion;
mod schema_update;
mod stop_words_addition;
mod stop_words_deletion;
mod synonyms_addition;
mod synonyms_deletion;
pub use self::clear_all::{apply_clear_all, push_clear_all};
pub use self::customs_update::{apply_customs_update, push_customs_update};
pub use self::documents_addition::{
apply_documents_addition, apply_documents_partial_addition, DocumentsAddition,
};
pub use self::documents_deletion::{apply_documents_deletion, DocumentsDeletion};
pub use self::schema_update::{apply_schema_update, push_schema_update};
pub use self::stop_words_addition::{apply_stop_words_addition, StopWordsAddition};
pub use self::stop_words_deletion::{apply_stop_words_deletion, StopWordsDeletion};
pub use self::synonyms_addition::{apply_synonyms_addition, SynonymsAddition};
pub use self::synonyms_deletion::{apply_synonyms_deletion, SynonymsDeletion};
use std::cmp;
use std::collections::{BTreeMap, BTreeSet, HashMap};
use std::time::Instant;
use chrono::{DateTime, Utc};
use heed::Result as ZResult;
use log::debug;
use serde::{Deserialize, Serialize};
use crate::{store, DocumentId, MResult};
use meilidb_schema::Schema;
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct Update {
data: UpdateData,
enqueued_at: DateTime<Utc>,
}
impl Update {
fn clear_all() -> Update {
Update {
data: UpdateData::ClearAll,
enqueued_at: Utc::now(),
}
}
fn schema(data: Schema) -> Update {
Update {
data: UpdateData::Schema(data),
enqueued_at: Utc::now(),
}
}
fn customs(data: Vec<u8>) -> Update {
Update {
data: UpdateData::Customs(data),
enqueued_at: Utc::now(),
}
}
fn documents_addition(data: Vec<HashMap<String, serde_json::Value>>) -> Update {
Update {
data: UpdateData::DocumentsAddition(data),
enqueued_at: Utc::now(),
}
}
fn documents_partial(data: Vec<HashMap<String, serde_json::Value>>) -> Update {
Update {
data: UpdateData::DocumentsPartial(data),
enqueued_at: Utc::now(),
}
}
fn documents_deletion(data: Vec<DocumentId>) -> Update {
Update {
data: UpdateData::DocumentsDeletion(data),
enqueued_at: Utc::now(),
}
}
fn synonyms_addition(data: BTreeMap<String, Vec<String>>) -> Update {
Update {
data: UpdateData::SynonymsAddition(data),
enqueued_at: Utc::now(),
}
}
fn synonyms_deletion(data: BTreeMap<String, Option<Vec<String>>>) -> Update {
Update {
data: UpdateData::SynonymsDeletion(data),
enqueued_at: Utc::now(),
}
}
fn stop_words_addition(data: BTreeSet<String>) -> Update {
Update {
data: UpdateData::StopWordsAddition(data),
enqueued_at: Utc::now(),
}
}
fn stop_words_deletion(data: BTreeSet<String>) -> Update {
Update {
data: UpdateData::StopWordsDeletion(data),
enqueued_at: Utc::now(),
}
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub enum UpdateData {
ClearAll,
Schema(Schema),
Customs(Vec<u8>),
DocumentsAddition(Vec<HashMap<String, serde_json::Value>>),
DocumentsPartial(Vec<HashMap<String, serde_json::Value>>),
DocumentsDeletion(Vec<DocumentId>),
SynonymsAddition(BTreeMap<String, Vec<String>>),
SynonymsDeletion(BTreeMap<String, Option<Vec<String>>>),
StopWordsAddition(BTreeSet<String>),
StopWordsDeletion(BTreeSet<String>),
}
impl UpdateData {
pub fn update_type(&self) -> UpdateType {
match self {
UpdateData::ClearAll => UpdateType::ClearAll,
UpdateData::Schema(_) => UpdateType::Schema,
UpdateData::Customs(_) => UpdateType::Customs,
UpdateData::DocumentsAddition(addition) => UpdateType::DocumentsAddition {
number: addition.len(),
},
UpdateData::DocumentsPartial(addition) => UpdateType::DocumentsPartial {
number: addition.len(),
},
UpdateData::DocumentsDeletion(deletion) => UpdateType::DocumentsDeletion {
number: deletion.len(),
},
UpdateData::SynonymsAddition(addition) => UpdateType::SynonymsAddition {
number: addition.len(),
},
UpdateData::SynonymsDeletion(deletion) => UpdateType::SynonymsDeletion {
number: deletion.len(),
},
UpdateData::StopWordsAddition(addition) => UpdateType::StopWordsAddition {
number: addition.len(),
},
UpdateData::StopWordsDeletion(deletion) => UpdateType::StopWordsDeletion {
number: deletion.len(),
},
}
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(tag = "name")]
pub enum UpdateType {
ClearAll,
Schema,
Customs,
DocumentsAddition { number: usize },
DocumentsPartial { number: usize },
DocumentsDeletion { number: usize },
SynonymsAddition { number: usize },
SynonymsDeletion { number: usize },
StopWordsAddition { number: usize },
StopWordsDeletion { number: usize },
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ProcessedUpdateResult {
pub update_id: u64,
#[serde(rename = "type")]
pub update_type: UpdateType,
#[serde(skip_serializing_if = "Option::is_none")]
pub error: Option<String>,
pub duration: f64, // in seconds
pub enqueued_at: DateTime<Utc>,
pub processed_at: DateTime<Utc>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct EnqueuedUpdateResult {
pub update_id: u64,
pub update_type: UpdateType,
pub enqueued_at: DateTime<Utc>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(rename_all = "camelCase", tag = "status")]
pub enum UpdateStatus {
Enqueued {
#[serde(flatten)]
content: EnqueuedUpdateResult,
},
Processed {
#[serde(flatten)]
content: ProcessedUpdateResult,
},
}
pub fn update_status(
reader: &heed::RoTxn,
updates_store: store::Updates,
updates_results_store: store::UpdatesResults,
update_id: u64,
) -> MResult<Option<UpdateStatus>> {
match updates_results_store.update_result(reader, update_id)? {
Some(result) => Ok(Some(UpdateStatus::Processed { content: result })),
None => match updates_store.get(reader, update_id)? {
Some(update) => Ok(Some(UpdateStatus::Enqueued {
content: EnqueuedUpdateResult {
update_id,
update_type: update.data.update_type(),
enqueued_at: update.enqueued_at,
},
})),
None => Ok(None),
},
}
}
pub fn next_update_id(
writer: &mut heed::RwTxn,
updates_store: store::Updates,
updates_results_store: store::UpdatesResults,
) -> ZResult<u64> {
let last_update_id = updates_store.last_update_id(writer)?;
let last_update_id = last_update_id.map(|(n, _)| n);
let last_update_results_id = updates_results_store.last_update_id(writer)?;
let last_update_results_id = last_update_results_id.map(|(n, _)| n);
let max_update_id = cmp::max(last_update_id, last_update_results_id);
let new_update_id = max_update_id.map_or(0, |n| n + 1);
Ok(new_update_id)
}
pub fn update_task<'a, 'b>(
writer: &'a mut heed::RwTxn<'b>,
index: store::Index,
update_id: u64,
update: Update,
) -> MResult<ProcessedUpdateResult> {
debug!("Processing update number {}", update_id);
let Update { enqueued_at, data } = update;
let (update_type, result, duration) = match data {
UpdateData::ClearAll => {
let start = Instant::now();
let update_type = UpdateType::ClearAll;
let result = apply_clear_all(
writer,
index.main,
index.documents_fields,
index.documents_fields_counts,
index.postings_lists,
index.docs_words,
);
(update_type, result, start.elapsed())
}
UpdateData::Schema(schema) => {
let start = Instant::now();
let update_type = UpdateType::Schema;
let result = apply_schema_update(
writer,
&schema,
index.main,
index.documents_fields,
index.documents_fields_counts,
index.postings_lists,
index.docs_words,
);
(update_type, result, start.elapsed())
}
UpdateData::Customs(customs) => {
let start = Instant::now();
let update_type = UpdateType::Customs;
let result = apply_customs_update(writer, index.main, &customs).map_err(Into::into);
(update_type, result, start.elapsed())
}
UpdateData::DocumentsAddition(documents) => {
let start = Instant::now();
let update_type = UpdateType::DocumentsAddition {
number: documents.len(),
};
let result = apply_documents_addition(
writer,
index.main,
index.documents_fields,
index.documents_fields_counts,
index.postings_lists,
index.docs_words,
documents,
);
(update_type, result, start.elapsed())
}
UpdateData::DocumentsPartial(documents) => {
let start = Instant::now();
let update_type = UpdateType::DocumentsPartial {
number: documents.len(),
};
let result = apply_documents_partial_addition(
writer,
index.main,
index.documents_fields,
index.documents_fields_counts,
index.postings_lists,
index.docs_words,
documents,
);
(update_type, result, start.elapsed())
}
UpdateData::DocumentsDeletion(documents) => {
let start = Instant::now();
let update_type = UpdateType::DocumentsDeletion {
number: documents.len(),
};
let result = apply_documents_deletion(
writer,
index.main,
index.documents_fields,
index.documents_fields_counts,
index.postings_lists,
index.docs_words,
documents,
);
(update_type, result, start.elapsed())
}
UpdateData::SynonymsAddition(synonyms) => {
let start = Instant::now();
let update_type = UpdateType::SynonymsAddition {
number: synonyms.len(),
};
let result = apply_synonyms_addition(writer, index.main, index.synonyms, synonyms);
(update_type, result, start.elapsed())
}
UpdateData::SynonymsDeletion(synonyms) => {
let start = Instant::now();
let update_type = UpdateType::SynonymsDeletion {
number: synonyms.len(),
};
let result = apply_synonyms_deletion(writer, index.main, index.synonyms, synonyms);
(update_type, result, start.elapsed())
}
UpdateData::StopWordsAddition(stop_words) => {
let start = Instant::now();
let update_type = UpdateType::StopWordsAddition {
number: stop_words.len(),
};
let result =
apply_stop_words_addition(writer, index.main, index.postings_lists, stop_words);
(update_type, result, start.elapsed())
}
UpdateData::StopWordsDeletion(stop_words) => {
let start = Instant::now();
let update_type = UpdateType::StopWordsDeletion {
number: stop_words.len(),
};
let result = apply_stop_words_deletion(
writer,
index.main,
index.documents_fields,
index.documents_fields_counts,
index.postings_lists,
index.docs_words,
stop_words,
);
(update_type, result, start.elapsed())
}
};
debug!(
"Processed update number {} {:?} {:?}",
update_id, update_type, result
);
let status = ProcessedUpdateResult {
update_id,
update_type,
error: result.map_err(|e| e.to_string()).err(),
duration: duration.as_secs_f64(),
enqueued_at,
processed_at: Utc::now(),
};
Ok(status)
}

View File

@ -0,0 +1,75 @@
use meilidb_schema::{Diff, Schema};
use crate::update::documents_addition::reindex_all_documents;
use crate::update::{next_update_id, Update};
use crate::{error::UnsupportedOperation, store, MResult};
pub fn apply_schema_update(
writer: &mut heed::RwTxn,
new_schema: &Schema,
main_store: store::Main,
documents_fields_store: store::DocumentsFields,
documents_fields_counts_store: store::DocumentsFieldsCounts,
postings_lists_store: store::PostingsLists,
docs_words_store: store::DocsWords,
) -> MResult<()> {
use UnsupportedOperation::{
CanOnlyIntroduceNewSchemaAttributesAtEnd, CannotRemoveSchemaAttribute,
CannotReorderSchemaAttribute, CannotUpdateSchemaIdentifier,
};
let mut need_full_reindexing = false;
if let Some(old_schema) = main_store.schema(writer)? {
for diff in meilidb_schema::diff(&old_schema, new_schema) {
match diff {
Diff::IdentChange { .. } => return Err(CannotUpdateSchemaIdentifier.into()),
Diff::AttrMove { .. } => return Err(CannotReorderSchemaAttribute.into()),
Diff::AttrPropsChange { old, new, .. } => {
if new.indexed != old.indexed {
need_full_reindexing = true;
}
if new.ranked != old.ranked {
need_full_reindexing = true;
}
}
Diff::NewAttr { pos, .. } => {
// new attribute not at the end of the schema
if pos < old_schema.number_of_attributes() {
return Err(CanOnlyIntroduceNewSchemaAttributesAtEnd.into());
}
}
Diff::RemovedAttr { .. } => return Err(CannotRemoveSchemaAttribute.into()),
}
}
}
main_store.put_schema(writer, new_schema)?;
if need_full_reindexing {
reindex_all_documents(
writer,
main_store,
documents_fields_store,
documents_fields_counts_store,
postings_lists_store,
docs_words_store,
)?
}
Ok(())
}
pub fn push_schema_update(
writer: &mut heed::RwTxn,
updates_store: store::Updates,
updates_results_store: store::UpdatesResults,
schema: Schema,
) -> MResult<u64> {
let last_update_id = next_update_id(writer, updates_store, updates_results_store)?;
let update = Update::schema(schema);
updates_store.put_update(writer, last_update_id, &update)?;
Ok(last_update_id)
}

View File

@ -0,0 +1,117 @@
use std::collections::BTreeSet;
use fst::{set::OpBuilder, SetBuilder};
use crate::automaton::normalize_str;
use crate::database::{UpdateEvent, UpdateEventsEmitter};
use crate::update::{next_update_id, Update};
use crate::{store, MResult};
pub struct StopWordsAddition {
updates_store: store::Updates,
updates_results_store: store::UpdatesResults,
updates_notifier: UpdateEventsEmitter,
stop_words: BTreeSet<String>,
}
impl StopWordsAddition {
pub fn new(
updates_store: store::Updates,
updates_results_store: store::UpdatesResults,
updates_notifier: UpdateEventsEmitter,
) -> StopWordsAddition {
StopWordsAddition {
updates_store,
updates_results_store,
updates_notifier,
stop_words: BTreeSet::new(),
}
}
pub fn add_stop_word<S: AsRef<str>>(&mut self, stop_word: S) {
let stop_word = normalize_str(stop_word.as_ref());
self.stop_words.insert(stop_word);
}
pub fn finalize(self, writer: &mut heed::RwTxn) -> MResult<u64> {
let _ = self.updates_notifier.send(UpdateEvent::NewUpdate);
let update_id = push_stop_words_addition(
writer,
self.updates_store,
self.updates_results_store,
self.stop_words,
)?;
Ok(update_id)
}
}
pub fn push_stop_words_addition(
writer: &mut heed::RwTxn,
updates_store: store::Updates,
updates_results_store: store::UpdatesResults,
addition: BTreeSet<String>,
) -> MResult<u64> {
let last_update_id = next_update_id(writer, updates_store, updates_results_store)?;
let update = Update::stop_words_addition(addition);
updates_store.put_update(writer, last_update_id, &update)?;
Ok(last_update_id)
}
pub fn apply_stop_words_addition(
writer: &mut heed::RwTxn,
main_store: store::Main,
postings_lists_store: store::PostingsLists,
addition: BTreeSet<String>,
) -> MResult<()> {
let mut stop_words_builder = SetBuilder::memory();
for word in addition {
stop_words_builder.insert(&word).unwrap();
// we remove every posting list associated to a new stop word
postings_lists_store.del_postings_list(writer, word.as_bytes())?;
}
// create the new delta stop words fst
let delta_stop_words = stop_words_builder
.into_inner()
.and_then(fst::Set::from_bytes)
.unwrap();
// we also need to remove all the stop words from the main fst
if let Some(word_fst) = main_store.words_fst(writer)? {
let op = OpBuilder::new()
.add(&word_fst)
.add(&delta_stop_words)
.difference();
let mut word_fst_builder = SetBuilder::memory();
word_fst_builder.extend_stream(op).unwrap();
let word_fst = word_fst_builder
.into_inner()
.and_then(fst::Set::from_bytes)
.unwrap();
main_store.put_words_fst(writer, &word_fst)?;
}
// now we add all of these stop words from the main store
let stop_words_fst = main_store.stop_words_fst(writer)?.unwrap_or_default();
let op = OpBuilder::new()
.add(&stop_words_fst)
.add(&delta_stop_words)
.r#union();
let mut stop_words_builder = SetBuilder::memory();
stop_words_builder.extend_stream(op).unwrap();
let stop_words_fst = stop_words_builder
.into_inner()
.and_then(fst::Set::from_bytes)
.unwrap();
main_store.put_stop_words_fst(writer, &stop_words_fst)?;
Ok(())
}

View File

@ -0,0 +1,113 @@
use std::collections::BTreeSet;
use fst::{set::OpBuilder, SetBuilder};
use crate::automaton::normalize_str;
use crate::database::{UpdateEvent, UpdateEventsEmitter};
use crate::update::documents_addition::reindex_all_documents;
use crate::update::{next_update_id, Update};
use crate::{store, MResult};
pub struct StopWordsDeletion {
updates_store: store::Updates,
updates_results_store: store::UpdatesResults,
updates_notifier: UpdateEventsEmitter,
stop_words: BTreeSet<String>,
}
impl StopWordsDeletion {
pub fn new(
updates_store: store::Updates,
updates_results_store: store::UpdatesResults,
updates_notifier: UpdateEventsEmitter,
) -> StopWordsDeletion {
StopWordsDeletion {
updates_store,
updates_results_store,
updates_notifier,
stop_words: BTreeSet::new(),
}
}
pub fn delete_stop_word<S: AsRef<str>>(&mut self, stop_word: S) {
let stop_word = normalize_str(stop_word.as_ref());
self.stop_words.insert(stop_word);
}
pub fn finalize(self, writer: &mut heed::RwTxn) -> MResult<u64> {
let _ = self.updates_notifier.send(UpdateEvent::NewUpdate);
let update_id = push_stop_words_deletion(
writer,
self.updates_store,
self.updates_results_store,
self.stop_words,
)?;
Ok(update_id)
}
}
pub fn push_stop_words_deletion(
writer: &mut heed::RwTxn,
updates_store: store::Updates,
updates_results_store: store::UpdatesResults,
deletion: BTreeSet<String>,
) -> MResult<u64> {
let last_update_id = next_update_id(writer, updates_store, updates_results_store)?;
let update = Update::stop_words_deletion(deletion);
updates_store.put_update(writer, last_update_id, &update)?;
Ok(last_update_id)
}
pub fn apply_stop_words_deletion(
writer: &mut heed::RwTxn,
main_store: store::Main,
documents_fields_store: store::DocumentsFields,
documents_fields_counts_store: store::DocumentsFieldsCounts,
postings_lists_store: store::PostingsLists,
docs_words_store: store::DocsWords,
deletion: BTreeSet<String>,
) -> MResult<()> {
let mut stop_words_builder = SetBuilder::memory();
for word in deletion {
stop_words_builder.insert(&word).unwrap();
}
// create the new delta stop words fst
let delta_stop_words = stop_words_builder
.into_inner()
.and_then(fst::Set::from_bytes)
.unwrap();
// now we delete all of these stop words from the main store
let stop_words_fst = main_store.stop_words_fst(writer)?.unwrap_or_default();
let op = OpBuilder::new()
.add(&stop_words_fst)
.add(&delta_stop_words)
.difference();
let mut stop_words_builder = SetBuilder::memory();
stop_words_builder.extend_stream(op).unwrap();
let stop_words_fst = stop_words_builder
.into_inner()
.and_then(fst::Set::from_bytes)
.unwrap();
main_store.put_stop_words_fst(writer, &stop_words_fst)?;
// now that we have setup the stop words
// lets reindex everything...
reindex_all_documents(
writer,
main_store,
documents_fields_store,
documents_fields_counts_store,
postings_lists_store,
docs_words_store,
)?;
Ok(())
}

View File

@ -0,0 +1,119 @@
use std::collections::BTreeMap;
use fst::{set::OpBuilder, SetBuilder};
use sdset::SetBuf;
use crate::automaton::normalize_str;
use crate::database::{UpdateEvent, UpdateEventsEmitter};
use crate::update::{next_update_id, Update};
use crate::{store, MResult};
pub struct SynonymsAddition {
updates_store: store::Updates,
updates_results_store: store::UpdatesResults,
updates_notifier: UpdateEventsEmitter,
synonyms: BTreeMap<String, Vec<String>>,
}
impl SynonymsAddition {
pub fn new(
updates_store: store::Updates,
updates_results_store: store::UpdatesResults,
updates_notifier: UpdateEventsEmitter,
) -> SynonymsAddition {
SynonymsAddition {
updates_store,
updates_results_store,
updates_notifier,
synonyms: BTreeMap::new(),
}
}
pub fn add_synonym<S, T, I>(&mut self, synonym: S, alternatives: I)
where
S: AsRef<str>,
T: AsRef<str>,
I: IntoIterator<Item = T>,
{
let synonym = normalize_str(synonym.as_ref());
let alternatives = alternatives.into_iter().map(|s| s.as_ref().to_lowercase());
self.synonyms
.entry(synonym)
.or_insert_with(Vec::new)
.extend(alternatives);
}
pub fn finalize(self, writer: &mut heed::RwTxn) -> MResult<u64> {
let _ = self.updates_notifier.send(UpdateEvent::NewUpdate);
let update_id = push_synonyms_addition(
writer,
self.updates_store,
self.updates_results_store,
self.synonyms,
)?;
Ok(update_id)
}
}
pub fn push_synonyms_addition(
writer: &mut heed::RwTxn,
updates_store: store::Updates,
updates_results_store: store::UpdatesResults,
addition: BTreeMap<String, Vec<String>>,
) -> MResult<u64> {
let last_update_id = next_update_id(writer, updates_store, updates_results_store)?;
let update = Update::synonyms_addition(addition);
updates_store.put_update(writer, last_update_id, &update)?;
Ok(last_update_id)
}
pub fn apply_synonyms_addition(
writer: &mut heed::RwTxn,
main_store: store::Main,
synonyms_store: store::Synonyms,
addition: BTreeMap<String, Vec<String>>,
) -> MResult<()> {
let mut synonyms_builder = SetBuilder::memory();
for (word, alternatives) in addition {
synonyms_builder.insert(&word).unwrap();
let alternatives = {
let alternatives = SetBuf::from_dirty(alternatives);
let mut alternatives_builder = SetBuilder::memory();
alternatives_builder.extend_iter(alternatives).unwrap();
let bytes = alternatives_builder.into_inner().unwrap();
fst::Set::from_bytes(bytes).unwrap()
};
synonyms_store.put_synonyms(writer, word.as_bytes(), &alternatives)?;
}
let delta_synonyms = synonyms_builder
.into_inner()
.and_then(fst::Set::from_bytes)
.unwrap();
let synonyms = match main_store.synonyms_fst(writer)? {
Some(synonyms) => {
let op = OpBuilder::new()
.add(synonyms.stream())
.add(delta_synonyms.stream())
.r#union();
let mut synonyms_builder = SetBuilder::memory();
synonyms_builder.extend_stream(op).unwrap();
synonyms_builder
.into_inner()
.and_then(fst::Set::from_bytes)
.unwrap()
}
None => delta_synonyms,
};
main_store.put_synonyms_fst(writer, &synonyms)?;
Ok(())
}

View File

@ -1,21 +1,33 @@
use std::collections::BTreeMap;
use std::iter::FromIterator;
use std::sync::Arc;
use fst::{SetBuilder, set::OpBuilder};
use meilidb_core::normalize_str;
use fst::{set::OpBuilder, SetBuilder};
use sdset::SetBuf;
use crate::database::{Error, Index, index::Cache};
use crate::automaton::normalize_str;
use crate::database::{UpdateEvent, UpdateEventsEmitter};
use crate::update::{next_update_id, Update};
use crate::{store, MResult};
pub struct SynonymsDeletion<'a> {
index: &'a Index,
pub struct SynonymsDeletion {
updates_store: store::Updates,
updates_results_store: store::UpdatesResults,
updates_notifier: UpdateEventsEmitter,
synonyms: BTreeMap<String, Option<Vec<String>>>,
}
impl<'a> SynonymsDeletion<'a> {
pub fn new(index: &'a Index) -> SynonymsDeletion<'a> {
SynonymsDeletion { index, synonyms: BTreeMap::new() }
impl SynonymsDeletion {
pub fn new(
updates_store: store::Updates,
updates_results_store: store::UpdatesResults,
updates_notifier: UpdateEventsEmitter,
) -> SynonymsDeletion {
SynonymsDeletion {
updates_store,
updates_results_store,
updates_notifier,
synonyms: BTreeMap::new(),
}
}
pub fn delete_all_alternatives_of<S: AsRef<str>>(&mut self, synonym: S) {
@ -24,9 +36,10 @@ impl<'a> SynonymsDeletion<'a> {
}
pub fn delete_specific_alternatives_of<S, T, I>(&mut self, synonym: S, alternatives: I)
where S: AsRef<str>,
T: AsRef<str>,
I: Iterator<Item=T>,
where
S: AsRef<str>,
T: AsRef<str>,
I: Iterator<Item = T>,
{
let synonym = normalize_str(synonym.as_ref());
let value = self.synonyms.entry(synonym).or_insert(None);
@ -37,26 +50,44 @@ impl<'a> SynonymsDeletion<'a> {
}
}
pub fn finalize(self) -> Result<u64, Error> {
self.index.push_synonyms_deletion(self.synonyms)
pub fn finalize(self, writer: &mut heed::RwTxn) -> MResult<u64> {
let _ = self.updates_notifier.send(UpdateEvent::NewUpdate);
let update_id = push_synonyms_deletion(
writer,
self.updates_store,
self.updates_results_store,
self.synonyms,
)?;
Ok(update_id)
}
}
pub fn apply_synonyms_deletion(
index: &Index,
pub fn push_synonyms_deletion(
writer: &mut heed::RwTxn,
updates_store: store::Updates,
updates_results_store: store::UpdatesResults,
deletion: BTreeMap<String, Option<Vec<String>>>,
) -> Result<(), Error>
{
let ref_index = index.as_ref();
let synonyms = ref_index.synonyms_index;
let main = ref_index.main_index;
) -> MResult<u64> {
let last_update_id = next_update_id(writer, updates_store, updates_results_store)?;
let update = Update::synonyms_deletion(deletion);
updates_store.put_update(writer, last_update_id, &update)?;
Ok(last_update_id)
}
pub fn apply_synonyms_deletion(
writer: &mut heed::RwTxn,
main_store: store::Main,
synonyms_store: store::Synonyms,
deletion: BTreeMap<String, Option<Vec<String>>>,
) -> MResult<()> {
let mut delete_whole_synonym_builder = SetBuilder::memory();
for (synonym, alternatives) in deletion {
match alternatives {
Some(alternatives) => {
let prev_alternatives = synonyms.alternatives_to(synonym.as_bytes())?;
let prev_alternatives = synonyms_store.synonyms(writer, synonym.as_bytes())?;
let prev_alternatives = match prev_alternatives {
Some(alternatives) => alternatives,
None => continue,
@ -66,9 +97,7 @@ pub fn apply_synonyms_deletion(
let alternatives = SetBuf::from_dirty(alternatives);
let mut builder = SetBuilder::memory();
builder.extend_iter(alternatives).unwrap();
builder.into_inner()
.and_then(fst::Set::from_bytes)
.unwrap()
builder.into_inner().and_then(fst::Set::from_bytes).unwrap()
};
let op = OpBuilder::new()
@ -81,19 +110,21 @@ pub fn apply_synonyms_deletion(
let len = builder.get_ref().len();
builder.extend_stream(op).unwrap();
let is_empty = len == builder.get_ref().len();
let alternatives = builder.into_inner().unwrap();
let bytes = builder.into_inner().unwrap();
let alternatives = fst::Set::from_bytes(bytes).unwrap();
(alternatives, is_empty)
};
if empty_alternatives {
delete_whole_synonym_builder.insert(synonym.as_bytes())?;
} else {
synonyms.set_alternatives_to(synonym.as_bytes(), alternatives)?;
synonyms_store.put_synonyms(writer, synonym.as_bytes(), &alternatives)?;
}
},
}
None => {
delete_whole_synonym_builder.insert(&synonym).unwrap();
synonyms.del_alternatives_of(synonym.as_bytes())?;
synonyms_store.del_synonyms(writer, synonym.as_bytes())?;
}
}
}
@ -103,7 +134,7 @@ pub fn apply_synonyms_deletion(
.and_then(fst::Set::from_bytes)
.unwrap();
let synonyms = match main.synonyms_set()? {
let synonyms = match main_store.synonyms_fst(writer)? {
Some(synonyms) => {
let op = OpBuilder::new()
.add(synonyms.stream())
@ -116,22 +147,11 @@ pub fn apply_synonyms_deletion(
.into_inner()
.and_then(fst::Set::from_bytes)
.unwrap()
},
}
None => fst::Set::default(),
};
main.set_synonyms_set(&synonyms)?;
// update the "consistent" view of the Index
let cache = ref_index.cache;
let words = Arc::new(main.words_set()?.unwrap_or_default());
let ranked_map = cache.ranked_map.clone();
let synonyms = Arc::new(synonyms);
let schema = cache.schema.clone();
let number_of_documents = cache.number_of_documents;
let cache = Cache { words, synonyms, schema, ranked_map, number_of_documents };
index.cache.store(Arc::new(cache));
main_store.put_synonyms_fst(writer, &synonyms)?;
Ok(())
}

View File

@ -1,39 +0,0 @@
[package]
name = "meilidb-data"
version = "0.1.0"
authors = ["Kerollmops <renault.cle@gmail.com>"]
edition = "2018"
[dependencies]
arc-swap = "0.4.2"
bincode = "1.1.4"
crossbeam-channel = "0.3.9"
deunicode = "1.0.0"
hashbrown = { version = "0.6.0", features = ["serde"] }
log = "0.4.6"
meilidb-core = { path = "../meilidb-core", version = "0.1.0" }
meilidb-schema = { path = "../meilidb-schema", version = "0.1.0" }
meilidb-tokenizer = { path = "../meilidb-tokenizer", version = "0.1.0" }
ordered-float = { version = "1.0.2", features = ["serde"] }
rocksdb = "0.12.3"
sdset = "0.3.2"
serde = { version = "1.0.99", features = ["derive"] }
serde_json = "1.0.40"
siphasher = "0.3.0"
zerocopy = "0.2.8"
[dependencies.rmp-serde]
git = "https://github.com/3Hren/msgpack-rust.git"
rev = "40b3d48"
[dependencies.rmpv]
git = "https://github.com/3Hren/msgpack-rust.git"
rev = "40b3d48"
features = ["with-serde"]
[dependencies.fst]
git = "https://github.com/Kerollmops/fst.git"
branch = "arc-byte-slice"
[dev-dependencies]
tempfile = "3.1.0"

View File

@ -1,113 +0,0 @@
use std::sync::Arc;
use crossbeam_channel::{unbounded, Sender, Receiver};
use rocksdb::{DBVector, IteratorMode, Direction};
use crate::RocksDbResult;
#[derive(Clone)]
pub struct CfTree {
index: Arc<CfTreeInner>,
sender: Option<Sender<()>>,
}
struct CfTreeInner {
db: Arc<rocksdb::DB>,
name: String,
}
impl CfTree {
pub fn create(db: Arc<rocksdb::DB>, name: String) -> RocksDbResult<CfTree> {
let mut options = rocksdb::Options::default();
options.create_missing_column_families(true);
let _cf = db.create_cf(&name, &options)?;
let index = Arc::new(CfTreeInner { db, name });
Ok(CfTree { index, sender: None })
}
pub fn create_with_subcription(
db: Arc<rocksdb::DB>,
name: String,
) -> RocksDbResult<(CfTree, Receiver<()>)>
{
let mut options = rocksdb::Options::default();
options.create_missing_column_families(true);
let _cf = db.create_cf(&name, &options)?;
let index = Arc::new(CfTreeInner { db, name });
let (sender, receiver) = unbounded();
Ok((CfTree { index, sender: Some(sender) }, receiver))
}
pub fn insert<K, V>(&self, key: K, value: V) -> RocksDbResult<()>
where K: AsRef<[u8]>,
V: AsRef<[u8]>,
{
let cf = self.index.db.cf_handle(&self.index.name).unwrap();
let result = self.index.db.put_cf(cf, key, value);
if let Some(sender) = &self.sender {
let _err = sender.send(());
}
result
}
pub fn get<K>(&self, key: K) -> RocksDbResult<Option<DBVector>>
where K: AsRef<[u8]>,
{
let cf = self.index.db.cf_handle(&self.index.name).unwrap();
self.index.db.get_cf(cf, key)
}
pub fn remove<K>(&self, key: K) -> RocksDbResult<()>
where K: AsRef<[u8]>
{
let cf = self.index.db.cf_handle(&self.index.name).unwrap();
self.index.db.delete_cf(cf, key)
}
/// Start and end key range is inclusive on both bounds.
pub fn range<KS, KE>(&self, start: KS, end: KE) -> RocksDbResult<CfIter>
where KS: AsRef<[u8]>,
KE: AsRef<[u8]>,
{
let cf = self.index.db.cf_handle(&self.index.name).unwrap();
let mut iter = self.index.db.iterator_cf(cf, IteratorMode::Start)?;
iter.set_mode(IteratorMode::From(start.as_ref(), Direction::Forward));
let end_bound = Box::from(end.as_ref());
Ok(CfIter { iter, end_bound: Some(end_bound) })
}
pub fn iter(&self) -> RocksDbResult<CfIter> {
let cf = self.index.db.cf_handle(&self.index.name).unwrap();
let iter = self.index.db.iterator_cf(cf, IteratorMode::Start)?;
Ok(CfIter { iter, end_bound: None })
}
pub fn last_key(&self) -> RocksDbResult<Option<Box<[u8]>>> {
let cf = self.index.db.cf_handle(&self.index.name).unwrap();
let mut iter = self.index.db.iterator_cf(cf, IteratorMode::End)?;
Ok(iter.next().map(|(key, _)| key))
}
}
pub struct CfIter<'a> {
iter: rocksdb::DBIterator<'a>,
end_bound: Option<Box<[u8]>>,
}
impl Iterator for CfIter<'_> {
type Item = (Box<[u8]>, Box<[u8]>);
fn next(&mut self) -> Option<Self::Item> {
match (self.iter.next(), &self.end_bound) {
(Some((ref key, _)), Some(end_bound)) if key > end_bound => None,
(Some(entry), _) => Some(entry),
(None, _) => None,
}
}
}

View File

@ -1,73 +0,0 @@
use std::{error, fmt};
use crate::serde::SerializerError;
#[derive(Debug)]
pub enum Error {
SchemaDiffer,
SchemaMissing,
WordIndexMissing,
MissingDocumentId,
RocksDbError(rocksdb::Error),
FstError(fst::Error),
RmpDecodeError(rmp_serde::decode::Error),
RmpEncodeError(rmp_serde::encode::Error),
BincodeError(bincode::Error),
SerializerError(SerializerError),
}
impl From<rocksdb::Error> for Error {
fn from(error: rocksdb::Error) -> Error {
Error::RocksDbError(error)
}
}
impl From<fst::Error> for Error {
fn from(error: fst::Error) -> Error {
Error::FstError(error)
}
}
impl From<rmp_serde::decode::Error> for Error {
fn from(error: rmp_serde::decode::Error) -> Error {
Error::RmpDecodeError(error)
}
}
impl From<rmp_serde::encode::Error> for Error {
fn from(error: rmp_serde::encode::Error) -> Error {
Error::RmpEncodeError(error)
}
}
impl From<bincode::Error> for Error {
fn from(error: bincode::Error) -> Error {
Error::BincodeError(error)
}
}
impl From<SerializerError> for Error {
fn from(error: SerializerError) -> Error {
Error::SerializerError(error)
}
}
impl fmt::Display for Error {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
use self::Error::*;
match self {
SchemaDiffer => write!(f, "schemas differ"),
SchemaMissing => write!(f, "this index does not have a schema"),
WordIndexMissing => write!(f, "this index does not have a word index"),
MissingDocumentId => write!(f, "document id is missing"),
RocksDbError(e) => write!(f, "RocksDB error; {}", e),
FstError(e) => write!(f, "fst error; {}", e),
RmpDecodeError(e) => write!(f, "rmp decode error; {}", e),
RmpEncodeError(e) => write!(f, "rmp encode error; {}", e),
BincodeError(e) => write!(f, "bincode error; {}", e),
SerializerError(e) => write!(f, "serializer error; {}", e),
}
}
}
impl error::Error for Error { }

View File

@ -1,12 +0,0 @@
use std::ops::Deref;
#[derive(Clone)]
pub struct CustomSettingsIndex(pub(crate) crate::CfTree);
impl Deref for CustomSettingsIndex {
type Target = crate::CfTree;
fn deref(&self) -> &Self::Target {
&self.0
}
}

View File

@ -1,33 +0,0 @@
use std::sync::Arc;
use meilidb_core::DocumentId;
use crate::database::Error;
#[derive(Clone)]
pub struct DocsWordsIndex(pub crate::CfTree);
impl DocsWordsIndex {
pub fn doc_words(&self, id: DocumentId) -> Result<Option<fst::Set>, Error> {
let key = id.0.to_be_bytes();
match self.0.get(key)? {
Some(bytes) => {
let len = bytes.len();
let value = Arc::from(bytes.as_ref());
let fst = fst::raw::Fst::from_shared_bytes(value, 0, len)?;
Ok(Some(fst::Set::from(fst)))
},
None => Ok(None)
}
}
pub fn set_doc_words(&self, id: DocumentId, words: &fst::Set) -> Result<(), Error> {
let key = id.0.to_be_bytes();
self.0.insert(key, words.as_fst().as_bytes())?;
Ok(())
}
pub fn del_doc_words(&self, id: DocumentId) -> Result<(), Error> {
let key = id.0.to_be_bytes();
self.0.remove(key)?;
Ok(())
}
}

View File

@ -1,90 +0,0 @@
use std::convert::TryInto;
use meilidb_core::DocumentId;
use meilidb_schema::SchemaAttr;
use rocksdb::DBVector;
use crate::document_attr_key::DocumentAttrKey;
use crate::RocksDbResult;
fn document_fields_range(id: DocumentId) -> ([u8; 10], [u8; 10]) {
let start = DocumentAttrKey::new(id, SchemaAttr::min()).to_be_bytes();
let end = DocumentAttrKey::new(id, SchemaAttr::max()).to_be_bytes();
(start, end)
}
#[derive(Clone)]
pub struct DocumentsIndex(pub(crate) crate::CfTree);
impl DocumentsIndex {
pub fn document_field(&self, id: DocumentId, attr: SchemaAttr) -> RocksDbResult<Option<DBVector>> {
let key = DocumentAttrKey::new(id, attr).to_be_bytes();
self.0.get(key)
}
pub fn set_document_field(&self, id: DocumentId, attr: SchemaAttr, value: Vec<u8>) -> RocksDbResult<()> {
let key = DocumentAttrKey::new(id, attr).to_be_bytes();
self.0.insert(key, value)?;
Ok(())
}
pub fn del_document_field(&self, id: DocumentId, attr: SchemaAttr) -> RocksDbResult<()> {
let key = DocumentAttrKey::new(id, attr).to_be_bytes();
self.0.remove(key)?;
Ok(())
}
pub fn del_all_document_fields(&self, id: DocumentId) -> RocksDbResult<usize> {
let (start, end) = document_fields_range(id);
let mut count = 0;
for (key, _) in self.0.range(start, end)? {
self.0.remove(key)?;
count += 1;
}
Ok(count)
}
pub fn document_fields(&self, id: DocumentId) -> RocksDbResult<DocumentFieldsIter> {
let (start, end) = document_fields_range(id);
let iter = self.0.range(start, end)?;
Ok(DocumentFieldsIter(iter))
}
pub fn len(&self) -> RocksDbResult<u64> {
let mut last_document_id = None;
let mut count = 0;
for (key, _) in self.0.iter()? {
let array = key.as_ref().try_into().unwrap();
let document_id = DocumentAttrKey::from_be_bytes(array).document_id;
if Some(document_id) != last_document_id {
last_document_id = Some(document_id);
count += 1;
}
}
Ok(count)
}
}
pub struct DocumentFieldsIter<'a>(crate::CfIter<'a>);
impl Iterator for DocumentFieldsIter<'_> {
type Item = (SchemaAttr, Box<[u8]>);
fn next(&mut self) -> Option<Self::Item> {
match self.0.next() {
Some((key, value)) => {
let array = key.as_ref().try_into().unwrap();
let key = DocumentAttrKey::from_be_bytes(array);
Some((key.attribute, value))
},
None => None,
}
}
}

View File

@ -1,102 +0,0 @@
use std::sync::Arc;
use std::convert::TryInto;
use meilidb_schema::Schema;
use crate::ranked_map::RankedMap;
use crate::database::Error;
const SCHEMA_KEY: &str = "schema";
const WORDS_KEY: &str = "words";
const SYNONYMS_KEY: &str = "synonyms";
const RANKED_MAP_KEY: &str = "ranked-map";
const NUMBER_OF_DOCUMENTS_KEY: &str = "number-of-documents";
#[derive(Clone)]
pub struct MainIndex(pub(crate) crate::CfTree);
impl MainIndex {
pub fn schema(&self) -> Result<Option<Schema>, Error> {
match self.0.get(SCHEMA_KEY)? {
Some(bytes) => {
let schema = Schema::read_from_bin(bytes.as_ref())?;
Ok(Some(schema))
},
None => Ok(None),
}
}
pub fn set_schema(&self, schema: &Schema) -> Result<(), Error> {
let mut bytes = Vec::new();
schema.write_to_bin(&mut bytes)?;
self.0.insert(SCHEMA_KEY, bytes)?;
Ok(())
}
pub fn words_set(&self) -> Result<Option<fst::Set>, Error> {
match self.0.get(WORDS_KEY)? {
Some(bytes) => {
let len = bytes.len();
let value = Arc::from(bytes.as_ref());
let fst = fst::raw::Fst::from_shared_bytes(value, 0, len)?;
Ok(Some(fst::Set::from(fst)))
},
None => Ok(None),
}
}
pub fn set_words_set(&self, value: &fst::Set) -> Result<(), Error> {
self.0.insert(WORDS_KEY, value.as_fst().as_bytes()).map(drop).map_err(Into::into)
}
pub fn synonyms_set(&self) -> Result<Option<fst::Set>, Error> {
match self.0.get(SYNONYMS_KEY)? {
Some(bytes) => {
let len = bytes.len();
let value = Arc::from(bytes.as_ref());
let fst = fst::raw::Fst::from_shared_bytes(value, 0, len)?;
Ok(Some(fst::Set::from(fst)))
},
None => Ok(None),
}
}
pub fn set_synonyms_set(&self, value: &fst::Set) -> Result<(), Error> {
self.0.insert(SYNONYMS_KEY, value.as_fst().as_bytes()).map(drop).map_err(Into::into)
}
pub fn ranked_map(&self) -> Result<Option<RankedMap>, Error> {
match self.0.get(RANKED_MAP_KEY)? {
Some(bytes) => {
let ranked_map = RankedMap::read_from_bin(bytes.as_ref())?;
Ok(Some(ranked_map))
},
None => Ok(None),
}
}
pub fn set_ranked_map(&self, value: &RankedMap) -> Result<(), Error> {
let mut bytes = Vec::new();
value.write_to_bin(&mut bytes)?;
self.0.insert(RANKED_MAP_KEY, bytes)?;
Ok(())
}
pub fn number_of_documents(&self) -> Result<u64, Error> {
match self.0.get(NUMBER_OF_DOCUMENTS_KEY)? {
Some(bytes) => {
let array = (*bytes).try_into().unwrap();
Ok(u64::from_be_bytes(array))
},
None => Ok(0),
}
}
pub fn set_number_of_documents<F>(&self, f: F) -> Result<u64, Error>
where F: FnOnce(u64) -> u64,
{
let new = self.number_of_documents().map(f)?;
self.0.insert(NUMBER_OF_DOCUMENTS_KEY, new.to_be_bytes())?;
Ok(new)
}
}

View File

@ -1,487 +0,0 @@
use std::collections::{HashSet, BTreeMap};
use std::convert::TryInto;
use std::sync::Arc;
use std::sync::atomic::{AtomicU64, Ordering};
use std::thread;
use std::time::{Duration, Instant};
use arc_swap::{ArcSwap, ArcSwapOption, Guard};
use crossbeam_channel::Receiver;
use meilidb_core::criterion::Criteria;
use meilidb_core::{DocIndex, Store, DocumentId, QueryBuilder};
use meilidb_schema::Schema;
use sdset::SetBuf;
use serde::{de, Serialize, Deserialize};
use crate::CfTree;
use crate::ranked_map::RankedMap;
use crate::serde::{Deserializer, DeserializerError};
pub use self::custom_settings_index::CustomSettingsIndex;
use self::docs_words_index::DocsWordsIndex;
use self::documents_index::DocumentsIndex;
use self::main_index::MainIndex;
use self::synonyms_index::SynonymsIndex;
use self::words_index::WordsIndex;
use crate::RocksDbResult;
use crate::database::{
Error,
DocumentsAddition, DocumentsDeletion,
SynonymsAddition, SynonymsDeletion,
apply_documents_addition, apply_documents_deletion,
apply_synonyms_addition, apply_synonyms_deletion,
};
mod custom_settings_index;
mod docs_words_index;
mod documents_index;
mod main_index;
mod synonyms_index;
mod words_index;
#[derive(Deserialize)]
enum UpdateOwned {
DocumentsAddition(Vec<rmpv::Value>),
DocumentsDeletion(Vec<DocumentId>),
SynonymsAddition(BTreeMap<String, Vec<String>>),
SynonymsDeletion(BTreeMap<String, Option<Vec<String>>>),
}
#[derive(Serialize)]
enum Update {
DocumentsAddition(Vec<rmpv::Value>),
DocumentsDeletion(Vec<DocumentId>),
SynonymsAddition(BTreeMap<String, Vec<String>>),
SynonymsDeletion(BTreeMap<String, Option<Vec<String>>>),
}
#[derive(Clone, Serialize, Deserialize)]
pub enum UpdateType {
DocumentsAddition { number: usize },
DocumentsDeletion { number: usize },
SynonymsAddition { number: usize },
SynonymsDeletion { number: usize },
}
#[derive(Clone, Serialize, Deserialize)]
pub struct DetailedDuration {
main: Duration,
}
#[derive(Clone, Serialize, Deserialize)]
pub struct UpdateStatus {
pub update_id: u64,
pub update_type: UpdateType,
pub result: Result<(), String>,
pub detailed_duration: DetailedDuration,
}
fn spawn_update_system(index: Index, subscription: Receiver<()>) -> thread::JoinHandle<()> {
thread::spawn(move || {
let mut subscription = subscription.into_iter();
loop {
while let Some((key, _)) = index.updates_index.iter().unwrap().next() {
let update_id = key.as_ref().try_into().map(u64::from_be_bytes).unwrap();
let updates = &index.updates_index;
let results = &index.updates_results_index;
let update = updates.get(&key).unwrap().unwrap();
let (update_type, result, duration) = match rmp_serde::from_read_ref(&update).unwrap() {
UpdateOwned::DocumentsAddition(documents) => {
let update_type = UpdateType::DocumentsAddition { number: documents.len() };
let ranked_map = index.cache.load().ranked_map.clone();
let start = Instant::now();
let result = apply_documents_addition(&index, ranked_map, documents);
(update_type, result, start.elapsed())
},
UpdateOwned::DocumentsDeletion(documents) => {
let update_type = UpdateType::DocumentsDeletion { number: documents.len() };
let ranked_map = index.cache.load().ranked_map.clone();
let start = Instant::now();
let result = apply_documents_deletion(&index, ranked_map, documents);
(update_type, result, start.elapsed())
},
UpdateOwned::SynonymsAddition(synonyms) => {
let update_type = UpdateType::SynonymsAddition { number: synonyms.len() };
let start = Instant::now();
let result = apply_synonyms_addition(&index, synonyms);
(update_type, result, start.elapsed())
},
UpdateOwned::SynonymsDeletion(synonyms) => {
let update_type = UpdateType::SynonymsDeletion { number: synonyms.len() };
let start = Instant::now();
let result = apply_synonyms_deletion(&index, synonyms);
(update_type, result, start.elapsed())
},
};
let detailed_duration = DetailedDuration { main: duration };
let status = UpdateStatus {
update_id,
update_type,
result: result.map_err(|e| e.to_string()),
detailed_duration,
};
if let Some(callback) = &*index.update_callback.load() {
(callback)(status.clone());
}
let value = bincode::serialize(&status).unwrap();
results.insert(&key, value).unwrap();
updates.remove(&key).unwrap();
}
// this subscription is just used to block
// the loop until a new update is inserted
subscription.next();
}
})
}
fn last_update_id(
update_index: &crate::CfTree,
update_results_index: &crate::CfTree,
) -> RocksDbResult<u64>
{
let uikey = match update_index.last_key()? {
Some(key) => Some(key.as_ref().try_into().map(u64::from_be_bytes).unwrap()),
None => None,
};
let urikey = match update_results_index.last_key()? {
Some(key) => Some(key.as_ref().try_into().map(u64::from_be_bytes).unwrap()),
None => None,
};
Ok(uikey.max(urikey).unwrap_or(0))
}
#[derive(Copy, Clone)]
pub struct IndexStats {
pub number_of_words: usize,
pub number_of_documents: u64,
pub number_attrs_in_ranked_map: usize,
}
#[derive(Clone)]
pub struct Index {
pub(crate) cache: Arc<ArcSwap<Cache>>,
// TODO this will be a snapshot in the future
main_index: MainIndex,
synonyms_index: SynonymsIndex,
words_index: WordsIndex,
docs_words_index: DocsWordsIndex,
documents_index: DocumentsIndex,
custom_settings_index: CustomSettingsIndex,
// used by the update system
updates_id: Arc<AtomicU64>,
updates_index: crate::CfTree,
updates_results_index: crate::CfTree,
update_callback: Arc<ArcSwapOption<Box<dyn Fn(UpdateStatus) + Send + Sync + 'static>>>,
}
pub(crate) struct Cache {
pub words: Arc<fst::Set>,
pub synonyms: Arc<fst::Set>,
pub schema: Schema,
pub ranked_map: RankedMap,
pub number_of_documents: u64,
}
impl Index {
pub fn new(db: Arc<rocksdb::DB>, name: &str) -> Result<Index, Error> {
Index::new_raw(db, name, None)
}
pub fn with_schema(db: Arc<rocksdb::DB>, name: &str, schema: Schema) -> Result<Index, Error> {
Index::new_raw(db, name, Some(schema))
}
fn new_raw(db: Arc<rocksdb::DB>, name: &str, schema: Option<Schema>) -> Result<Index, Error> {
let main_index = CfTree::create(db.clone(), name.to_string()).map(MainIndex)?;
let synonyms_index = CfTree::create(db.clone(), format!("{}-synonyms", name)).map(SynonymsIndex)?;
let words_index = CfTree::create(db.clone(), format!("{}-words", name)).map(WordsIndex)?;
let docs_words_index = CfTree::create(db.clone(), format!("{}-docs-words", name)).map(DocsWordsIndex)?;
let documents_index = CfTree::create(db.clone(), format!("{}-documents", name)).map(DocumentsIndex)?;
let custom_settings_index = CfTree::create(db.clone(), format!("{}-custom", name)).map(CustomSettingsIndex)?;
let (updates_index, subscription) = CfTree::create_with_subcription(db.clone(), format!("{}-updates", name))?;
let updates_results_index = CfTree::create(db.clone(), format!("{}-updates-results", name))?;
let words = match main_index.words_set()? {
Some(words) => Arc::new(words),
None => Arc::new(fst::Set::default()),
};
let synonyms = match main_index.synonyms_set()? {
Some(synonyms) => Arc::new(synonyms),
None => Arc::new(fst::Set::default()),
};
let schema = match (schema, main_index.schema()?) {
(Some(ref expected), Some(ref current)) if current != expected => {
return Err(Error::SchemaDiffer)
},
(Some(expected), Some(_)) => expected,
(Some(expected), None) => {
main_index.set_schema(&expected)?;
expected
},
(None, Some(current)) => current,
(None, None) => return Err(Error::SchemaMissing),
};
let ranked_map = match main_index.ranked_map()? {
Some(map) => map,
None => RankedMap::default(),
};
let number_of_documents = documents_index.len()?;
let cache = Cache { words, synonyms, schema, ranked_map, number_of_documents };
let cache = Arc::new(ArcSwap::from_pointee(cache));
let last_update_id = last_update_id(&updates_index, &updates_results_index)?;
let updates_id = Arc::new(AtomicU64::new(last_update_id + 1));
let index = Index {
cache,
main_index,
synonyms_index,
words_index,
docs_words_index,
documents_index,
custom_settings_index,
updates_id,
updates_index,
updates_results_index,
update_callback: Arc::new(ArcSwapOption::empty()),
};
let _handle = spawn_update_system(index.clone(), subscription);
Ok(index)
}
pub fn set_update_callback<F>(&self, callback: F)
where F: Fn(UpdateStatus) + Send + Sync + 'static
{
self.update_callback.store(Some(Arc::new(Box::new(callback))));
}
pub fn unset_update_callback(&self) {
self.update_callback.store(None);
}
pub fn stats(&self) -> RocksDbResult<IndexStats> {
let cache = self.cache.load();
Ok(IndexStats {
number_of_words: cache.words.len(),
number_of_documents: cache.number_of_documents,
number_attrs_in_ranked_map: cache.ranked_map.len(),
})
}
pub fn query_builder(&self) -> QueryBuilder<RefIndex> {
let ref_index = self.as_ref();
QueryBuilder::new(ref_index)
}
pub fn query_builder_with_criteria<'c>(
&self,
criteria: Criteria<'c>,
) -> QueryBuilder<'c, RefIndex>
{
let ref_index = self.as_ref();
QueryBuilder::with_criteria(ref_index, criteria)
}
pub fn as_ref(&self) -> RefIndex {
RefIndex {
cache: self.cache.load(),
main_index: &self.main_index,
synonyms_index: &self.synonyms_index,
words_index: &self.words_index,
docs_words_index: &self.docs_words_index,
documents_index: &self.documents_index,
custom_settings_index: &self.custom_settings_index,
}
}
pub fn schema(&self) -> Schema {
self.cache.load().schema.clone()
}
pub fn custom_settings(&self) -> CustomSettingsIndex {
self.custom_settings_index.clone()
}
pub fn number_of_documents(&self) -> u64 {
self.cache.load().number_of_documents
}
pub fn documents_addition<D>(&self) -> DocumentsAddition<D> {
DocumentsAddition::new(self)
}
pub fn documents_deletion(&self) -> DocumentsDeletion {
DocumentsDeletion::new(self)
}
pub fn synonyms_addition(&self) -> SynonymsAddition {
SynonymsAddition::new(self)
}
pub fn synonyms_deletion(&self) -> SynonymsDeletion {
SynonymsDeletion::new(self)
}
pub fn update_status(
&self,
update_id: u64,
) -> Result<Option<UpdateStatus>, Error>
{
let update_id = update_id.to_be_bytes();
match self.updates_results_index.get(update_id)? {
Some(value) => {
let value = bincode::deserialize(&value)?;
Ok(Some(value))
},
None => Ok(None),
}
}
pub fn update_status_blocking(
&self,
update_id: u64,
) -> Result<UpdateStatus, Error>
{
// if we find the update result return it now
if let Some(result) = self.update_status(update_id)? {
return Ok(result)
}
loop {
if self.updates_results_index.get(&update_id.to_be_bytes())?.is_some() { break }
std::thread::sleep(Duration::from_millis(300));
}
// the thread has been unblocked, it means that the update result
// has been inserted in the tree, retrieve it
Ok(self.update_status(update_id)?.unwrap())
}
pub fn document<T>(
&self,
fields: Option<&HashSet<&str>>,
id: DocumentId,
) -> Result<Option<T>, DeserializerError>
where T: de::DeserializeOwned,
{
let schema = self.schema();
let fields = match fields {
Some(fields) => fields.into_iter().map(|name| schema.attribute(name)).collect(),
None => None,
};
let mut deserializer = Deserializer {
document_id: id,
index: &self,
fields: fields.as_ref(),
};
// TODO: currently we return an error if all document fields are missing,
// returning None would have been better
T::deserialize(&mut deserializer).map(Some)
}
}
impl Index {
pub(crate) fn push_documents_addition<D>(&self, addition: Vec<D>) -> Result<u64, Error>
where D: serde::Serialize
{
let mut values = Vec::with_capacity(addition.len());
for add in addition {
let vec = rmp_serde::to_vec_named(&add)?;
let add = rmp_serde::from_read(&vec[..])?;
values.push(add);
}
let addition = Update::DocumentsAddition(values);
let update = rmp_serde::to_vec_named(&addition)?;
self.raw_push_update(update)
}
pub(crate) fn push_documents_deletion(
&self,
deletion: Vec<DocumentId>,
) -> Result<u64, Error>
{
let deletion = Update::DocumentsDeletion(deletion);
let update = rmp_serde::to_vec_named(&deletion)?;
self.raw_push_update(update)
}
pub(crate) fn push_synonyms_addition(
&self,
addition: BTreeMap<String, Vec<String>>,
) -> Result<u64, Error>
{
let addition = Update::SynonymsAddition(addition);
let update = rmp_serde::to_vec_named(&addition)?;
self.raw_push_update(update)
}
pub(crate) fn push_synonyms_deletion(
&self,
deletion: BTreeMap<String, Option<Vec<String>>>,
) -> Result<u64, Error>
{
let deletion = Update::SynonymsDeletion(deletion);
let update = rmp_serde::to_vec_named(&deletion)?;
self.raw_push_update(update)
}
fn raw_push_update(&self, raw_update: Vec<u8>) -> Result<u64, Error> {
let update_id = self.updates_id.fetch_add(1, Ordering::SeqCst);
let update_id_array = update_id.to_be_bytes();
self.updates_index.insert(update_id_array, raw_update)?;
Ok(update_id)
}
}
pub struct RefIndex<'a> {
pub(crate) cache: Guard<'static, Arc<Cache>>,
pub main_index: &'a MainIndex,
pub synonyms_index: &'a SynonymsIndex,
pub words_index: &'a WordsIndex,
pub docs_words_index: &'a DocsWordsIndex,
pub documents_index: &'a DocumentsIndex,
pub custom_settings_index: &'a CustomSettingsIndex,
}
impl Store for RefIndex<'_> {
type Error = Error;
fn words(&self) -> Result<&fst::Set, Self::Error> {
Ok(&self.cache.words)
}
fn word_indexes(&self, word: &[u8]) -> Result<Option<SetBuf<DocIndex>>, Self::Error> {
Ok(self.words_index.doc_indexes(word)?)
}
fn synonyms(&self) -> Result<&fst::Set, Self::Error> {
Ok(&self.cache.synonyms)
}
fn alternatives_to(&self, word: &[u8]) -> Result<Option<fst::Set>, Self::Error> {
Ok(self.synonyms_index.alternatives_to(word)?)
}
}

View File

@ -1,21 +0,0 @@
use crate::RocksDbResult;
#[derive(Clone)]
pub struct SynonymsIndex(pub(crate) crate::CfTree);
impl SynonymsIndex {
pub fn alternatives_to(&self, word: &[u8]) -> RocksDbResult<Option<fst::Set>> {
match self.0.get(word)? {
Some(vector) => Ok(Some(fst::Set::from_bytes(vector.to_vec()).unwrap())),
None => Ok(None),
}
}
pub fn set_alternatives_to(&self, word: &[u8], value: Vec<u8>) -> RocksDbResult<()> {
self.0.insert(word, value).map(drop)
}
pub fn del_alternatives_of(&self, word: &[u8]) -> RocksDbResult<()> {
self.0.remove(word).map(drop)
}
}

View File

@ -1,45 +0,0 @@
use meilidb_core::DocIndex;
use sdset::{Set, SetBuf};
use zerocopy::{LayoutVerified, AsBytes};
use crate::RocksDbResult;
#[derive(Clone)]
pub struct WordsIndex(pub(crate) crate::CfTree);
impl WordsIndex {
pub fn doc_indexes(&self, word: &[u8]) -> RocksDbResult<Option<SetBuf<DocIndex>>> {
// we must force an allocation to make the memory aligned
match self.0.get(word)? {
Some(bytes) => {
let vec = match LayoutVerified::new_slice(bytes.as_ref()) {
Some(layout) => layout.into_slice().to_vec(),
None => {
let len = bytes.as_ref().len();
let count = len / std::mem::size_of::<DocIndex>();
let mut buf: Vec<DocIndex> = Vec::with_capacity(count);
unsafe {
let src = bytes.as_ref().as_ptr();
let dst = buf.as_mut_ptr() as *mut u8;
std::ptr::copy_nonoverlapping(src, dst, len);
buf.set_len(count);
}
buf
}
};
let setbuf = SetBuf::new_unchecked(vec);
Ok(Some(setbuf))
},
None => Ok(None),
}
}
pub fn set_doc_indexes(&self, word: &[u8], set: &Set<DocIndex>) -> RocksDbResult<()> {
self.0.insert(word, set.as_bytes()).map(drop)
}
pub fn del_doc_indexes(&self, word: &[u8]) -> RocksDbResult<()> {
self.0.remove(word).map(drop)
}
}

View File

@ -1,115 +0,0 @@
use std::collections::hash_map::Entry;
use std::collections::{HashSet, HashMap};
use std::path::Path;
use std::sync::Arc;
use std::sync::RwLock;
use meilidb_schema::Schema;
mod error;
mod index;
mod update;
pub use self::error::Error;
pub use self::index::{Index, CustomSettingsIndex};
pub use self::update::DocumentsAddition;
pub use self::update::DocumentsDeletion;
pub use self::update::SynonymsAddition;
pub use self::update::SynonymsDeletion;
use self::update::apply_documents_addition;
use self::update::apply_documents_deletion;
use self::update::apply_synonyms_addition;
use self::update::apply_synonyms_deletion;
const INDEXES_KEY: &str = "indexes";
fn load_indexes(tree: &rocksdb::DB) -> Result<HashSet<String>, Error> {
match tree.get(INDEXES_KEY)? {
Some(bytes) => Ok(bincode::deserialize(&bytes)?),
None => Ok(HashSet::new())
}
}
pub struct Database {
cache: RwLock<HashMap<String, Index>>,
inner: Arc<rocksdb::DB>,
}
impl Database {
pub fn open<P: AsRef<Path>>(path: P) -> Result<Database, Error> {
let cache = RwLock::new(HashMap::new());
let mut options = rocksdb::Options::default();
options.create_if_missing(true);
let cfs = rocksdb::DB::list_cf(&options, &path).unwrap_or_default();
let inner = Arc::new(rocksdb::DB::open_cf(&options, path, cfs)?);
let indexes = load_indexes(&inner)?;
let database = Database { cache, inner };
for index in indexes {
database.open_index(&index)?;
}
Ok(database)
}
pub fn indexes(&self) -> Result<HashSet<String>, Error> {
load_indexes(&self.inner)
}
fn set_indexes(&self, value: &HashSet<String>) -> Result<(), Error> {
let bytes = bincode::serialize(value)?;
self.inner.put(INDEXES_KEY, bytes)?;
Ok(())
}
pub fn open_index(&self, name: &str) -> Result<Option<Index>, Error> {
{
let cache = self.cache.read().unwrap();
if let Some(index) = cache.get(name).cloned() {
return Ok(Some(index))
}
}
let mut cache = self.cache.write().unwrap();
let index = match cache.entry(name.to_string()) {
Entry::Occupied(occupied) => {
occupied.get().clone()
},
Entry::Vacant(vacant) => {
if !self.indexes()?.contains(name) {
return Ok(None)
}
let index = Index::new(self.inner.clone(), name)?;
vacant.insert(index).clone()
},
};
Ok(Some(index))
}
pub fn create_index(&self, name: &str, schema: Schema) -> Result<Index, Error> {
let mut cache = self.cache.write().unwrap();
let index = match cache.entry(name.to_string()) {
Entry::Occupied(occupied) => {
occupied.get().clone()
},
Entry::Vacant(vacant) => {
let index = Index::with_schema(self.inner.clone(), name, schema)?;
let mut indexes = self.indexes()?;
indexes.insert(name.to_string());
self.set_indexes(&indexes)?;
vacant.insert(index).clone()
},
};
Ok(index)
}
}

View File

@ -1,139 +0,0 @@
use std::collections::HashSet;
use std::sync::Arc;
use fst::{SetBuilder, set::OpBuilder};
use sdset::{SetOperation, duo::Union};
use serde::Serialize;
use crate::RankedMap;
use crate::database::{Error, Index, index::Cache, apply_documents_deletion};
use crate::indexer::Indexer;
use crate::serde::{extract_document_id, Serializer, RamDocumentStore};
pub struct DocumentsAddition<'a, D> {
index: &'a Index,
documents: Vec<D>,
}
impl<'a, D> DocumentsAddition<'a, D> {
pub fn new(index: &'a Index) -> DocumentsAddition<'a, D> {
DocumentsAddition { index, documents: Vec::new() }
}
pub fn update_document(&mut self, document: D) {
self.documents.push(document);
}
pub fn finalize(self) -> Result<u64, Error>
where D: serde::Serialize
{
self.index.push_documents_addition(self.documents)
}
}
pub fn apply_documents_addition(
index: &Index,
mut ranked_map: RankedMap,
addition: Vec<rmpv::Value>,
) -> Result<(), Error>
{
let mut document_ids = HashSet::new();
let mut document_store = RamDocumentStore::new();
let mut indexer = Indexer::new();
let schema = &index.schema();
let identifier = schema.identifier_name();
for document in addition {
let document_id = match extract_document_id(identifier, &document)? {
Some(id) => id,
None => return Err(Error::MissingDocumentId),
};
// 1. store the document id for future deletion
document_ids.insert(document_id);
// 2. index the document fields in ram stores
let serializer = Serializer {
schema,
document_store: &mut document_store,
indexer: &mut indexer,
ranked_map: &mut ranked_map,
document_id,
};
document.serialize(serializer)?;
}
let ref_index = index.as_ref();
let docs_words = ref_index.docs_words_index;
let documents = ref_index.documents_index;
let main = ref_index.main_index;
let words = ref_index.words_index;
// 1. remove the previous documents match indexes
let documents_to_insert = document_ids.iter().cloned().collect();
apply_documents_deletion(index, ranked_map.clone(), documents_to_insert)?;
// 2. insert new document attributes in the database
for ((id, attr), value) in document_store.into_inner() {
documents.set_document_field(id, attr, value)?;
}
let indexed = indexer.build();
let mut delta_words_builder = SetBuilder::memory();
for (word, delta_set) in indexed.words_doc_indexes {
delta_words_builder.insert(&word).unwrap();
let set = match words.doc_indexes(&word)? {
Some(set) => Union::new(&set, &delta_set).into_set_buf(),
None => delta_set,
};
words.set_doc_indexes(&word, &set)?;
}
for (id, words) in indexed.docs_words {
docs_words.set_doc_words(id, &words)?;
}
let delta_words = delta_words_builder
.into_inner()
.and_then(fst::Set::from_bytes)
.unwrap();
let words = match main.words_set()? {
Some(words) => {
let op = OpBuilder::new()
.add(words.stream())
.add(delta_words.stream())
.r#union();
let mut words_builder = SetBuilder::memory();
words_builder.extend_stream(op).unwrap();
words_builder
.into_inner()
.and_then(fst::Set::from_bytes)
.unwrap()
},
None => delta_words,
};
main.set_words_set(&words)?;
main.set_ranked_map(&ranked_map)?;
let inserted_documents_len = document_ids.len() as u64;
let number_of_documents = main.set_number_of_documents(|old| old + inserted_documents_len)?;
// update the "consistent" view of the Index
let cache = ref_index.cache;
let words = Arc::new(words);
let synonyms = cache.synonyms.clone();
let schema = cache.schema.clone();
let cache = Cache { words, synonyms, schema, ranked_map, number_of_documents };
index.cache.store(Arc::new(cache));
Ok(())
}

View File

@ -1,150 +0,0 @@
use std::collections::{HashMap, HashSet, BTreeSet};
use std::sync::Arc;
use fst::{SetBuilder, Streamer};
use meilidb_core::DocumentId;
use sdset::{SetBuf, SetOperation, duo::DifferenceByKey};
use crate::RankedMap;
use crate::serde::extract_document_id;
use crate::database::{Index, Error, index::Cache};
pub struct DocumentsDeletion<'a> {
index: &'a Index,
documents: Vec<DocumentId>,
}
impl<'a> DocumentsDeletion<'a> {
pub fn new(index: &'a Index) -> DocumentsDeletion<'a> {
DocumentsDeletion { index, documents: Vec::new() }
}
pub fn delete_document_by_id(&mut self, document_id: DocumentId) {
self.documents.push(document_id);
}
pub fn delete_document<D>(&mut self, document: D) -> Result<(), Error>
where D: serde::Serialize,
{
let schema = self.index.schema();
let identifier = schema.identifier_name();
let document_id = match extract_document_id(identifier, &document)? {
Some(id) => id,
None => return Err(Error::MissingDocumentId),
};
self.delete_document_by_id(document_id);
Ok(())
}
pub fn finalize(self) -> Result<u64, Error> {
self.index.push_documents_deletion(self.documents)
}
}
impl Extend<DocumentId> for DocumentsDeletion<'_> {
fn extend<T: IntoIterator<Item=DocumentId>>(&mut self, iter: T) {
self.documents.extend(iter)
}
}
pub fn apply_documents_deletion(
index: &Index,
mut ranked_map: RankedMap,
deletion: Vec<DocumentId>,
) -> Result<(), Error>
{
let ref_index = index.as_ref();
let schema = index.schema();
let docs_words = ref_index.docs_words_index;
let documents = ref_index.documents_index;
let main = ref_index.main_index;
let words = ref_index.words_index;
let idset = SetBuf::from_dirty(deletion);
// collect the ranked attributes according to the schema
let ranked_attrs: Vec<_> = schema.iter()
.filter_map(|(_, attr, prop)| {
if prop.is_ranked() { Some(attr) } else { None }
})
.collect();
let mut words_document_ids = HashMap::new();
for id in idset {
// remove all the ranked attributes from the ranked_map
for ranked_attr in &ranked_attrs {
ranked_map.remove(id, *ranked_attr);
}
if let Some(words) = docs_words.doc_words(id)? {
let mut stream = words.stream();
while let Some(word) = stream.next() {
let word = word.to_vec();
words_document_ids.entry(word).or_insert_with(Vec::new).push(id);
}
}
}
let mut deleted_documents = HashSet::new();
let mut removed_words = BTreeSet::new();
for (word, document_ids) in words_document_ids {
let document_ids = SetBuf::from_dirty(document_ids);
if let Some(doc_indexes) = words.doc_indexes(&word)? {
let op = DifferenceByKey::new(&doc_indexes, &document_ids, |d| d.document_id, |id| *id);
let doc_indexes = op.into_set_buf();
if !doc_indexes.is_empty() {
words.set_doc_indexes(&word, &doc_indexes)?;
} else {
words.del_doc_indexes(&word)?;
removed_words.insert(word);
}
}
for id in document_ids {
if documents.del_all_document_fields(id)? != 0 {
deleted_documents.insert(id);
}
docs_words.del_doc_words(id)?;
}
}
let removed_words = fst::Set::from_iter(removed_words).unwrap();
let words = match main.words_set()? {
Some(words_set) => {
let op = fst::set::OpBuilder::new()
.add(words_set.stream())
.add(removed_words.stream())
.difference();
let mut words_builder = SetBuilder::memory();
words_builder.extend_stream(op).unwrap();
words_builder
.into_inner()
.and_then(fst::Set::from_bytes)
.unwrap()
},
None => fst::Set::default(),
};
main.set_words_set(&words)?;
main.set_ranked_map(&ranked_map)?;
let deleted_documents_len = deleted_documents.len() as u64;
let number_of_documents = main.set_number_of_documents(|old| old - deleted_documents_len)?;
// update the "consistent" view of the Index
let cache = ref_index.cache;
let words = Arc::new(words);
let synonyms = cache.synonyms.clone();
let schema = cache.schema.clone();
let cache = Cache { words, synonyms, schema, ranked_map, number_of_documents };
index.cache.store(Arc::new(cache));
Ok(())
}

View File

@ -1,9 +0,0 @@
mod documents_addition;
mod documents_deletion;
mod synonyms_addition;
mod synonyms_deletion;
pub use self::documents_addition::{DocumentsAddition, apply_documents_addition};
pub use self::documents_deletion::{DocumentsDeletion, apply_documents_deletion};
pub use self::synonyms_addition::{SynonymsAddition, apply_synonyms_addition};
pub use self::synonyms_deletion::{SynonymsDeletion, apply_synonyms_deletion};

View File

@ -1,94 +0,0 @@
use std::collections::BTreeMap;
use std::sync::Arc;
use fst::{SetBuilder, set::OpBuilder};
use meilidb_core::normalize_str;
use sdset::SetBuf;
use crate::database::{Error, Index,index::Cache};
pub struct SynonymsAddition<'a> {
index: &'a Index,
synonyms: BTreeMap<String, Vec<String>>,
}
impl<'a> SynonymsAddition<'a> {
pub fn new(index: &'a Index) -> SynonymsAddition<'a> {
SynonymsAddition { index, synonyms: BTreeMap::new() }
}
pub fn add_synonym<S, T, I>(&mut self, synonym: S, alternatives: I)
where S: AsRef<str>,
T: AsRef<str>,
I: IntoIterator<Item=T>,
{
let synonym = normalize_str(synonym.as_ref());
let alternatives = alternatives.into_iter().map(|s| s.as_ref().to_lowercase());
self.synonyms.entry(synonym).or_insert_with(Vec::new).extend(alternatives);
}
pub fn finalize(self) -> Result<u64, Error> {
self.index.push_synonyms_addition(self.synonyms)
}
}
pub fn apply_synonyms_addition(
index: &Index,
addition: BTreeMap<String, Vec<String>>,
) -> Result<(), Error>
{
let ref_index = index.as_ref();
let synonyms = ref_index.synonyms_index;
let main = ref_index.main_index;
let mut synonyms_builder = SetBuilder::memory();
for (synonym, alternatives) in addition {
synonyms_builder.insert(&synonym).unwrap();
let alternatives = {
let alternatives = SetBuf::from_dirty(alternatives);
let mut alternatives_builder = SetBuilder::memory();
alternatives_builder.extend_iter(alternatives).unwrap();
alternatives_builder.into_inner().unwrap()
};
synonyms.set_alternatives_to(synonym.as_bytes(), alternatives)?;
}
let delta_synonyms = synonyms_builder
.into_inner()
.and_then(fst::Set::from_bytes)
.unwrap();
let synonyms = match main.synonyms_set()? {
Some(synonyms) => {
let op = OpBuilder::new()
.add(synonyms.stream())
.add(delta_synonyms.stream())
.r#union();
let mut synonyms_builder = SetBuilder::memory();
synonyms_builder.extend_stream(op).unwrap();
synonyms_builder
.into_inner()
.and_then(fst::Set::from_bytes)
.unwrap()
},
None => delta_synonyms,
};
main.set_synonyms_set(&synonyms)?;
// update the "consistent" view of the Index
let cache = ref_index.cache;
let words = Arc::new(main.words_set()?.unwrap_or_default());
let ranked_map = cache.ranked_map.clone();
let synonyms = Arc::new(synonyms);
let schema = cache.schema.clone();
let number_of_documents = cache.number_of_documents;
let cache = Cache { words, synonyms, schema, ranked_map, number_of_documents };
index.cache.store(Arc::new(cache));
Ok(())
}

View File

@ -1,69 +0,0 @@
use meilidb_core::DocumentId;
use meilidb_schema::SchemaAttr;
#[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub struct DocumentAttrKey {
pub document_id: DocumentId,
pub attribute: SchemaAttr,
}
impl DocumentAttrKey {
pub fn new(document_id: DocumentId, attribute: SchemaAttr) -> DocumentAttrKey {
DocumentAttrKey { document_id, attribute }
}
pub fn to_be_bytes(self) -> [u8; 10] {
let mut output = [0u8; 10];
let document_id = self.document_id.0.to_be_bytes();
let attribute = self.attribute.0.to_be_bytes();
unsafe {
use std::{mem::size_of, ptr::copy_nonoverlapping};
let output = output.as_mut_ptr();
copy_nonoverlapping(document_id.as_ptr(), output, size_of::<u64>());
let output = output.add(size_of::<u64>());
copy_nonoverlapping(attribute.as_ptr(), output, size_of::<u16>());
}
output
}
pub fn from_be_bytes(bytes: [u8; 10]) -> DocumentAttrKey {
let document_id;
let attribute;
unsafe {
use std::ptr::read_unaligned;
let pointer = bytes.as_ptr() as *const _;
let document_id_bytes = read_unaligned(pointer);
document_id = u64::from_be_bytes(document_id_bytes);
let pointer = pointer.add(1) as *const _;
let attribute_bytes = read_unaligned(pointer);
attribute = u16::from_be_bytes(attribute_bytes);
}
DocumentAttrKey {
document_id: DocumentId(document_id),
attribute: SchemaAttr(attribute),
}
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn to_from_be_bytes() {
let document_id = DocumentId(67578308);
let schema_attr = SchemaAttr(3456);
let x = DocumentAttrKey::new(document_id, schema_attr);
assert_eq!(x, DocumentAttrKey::from_be_bytes(x.to_be_bytes()));
}
}

View File

@ -1,208 +0,0 @@
use std::collections::{BTreeMap, HashMap};
use std::convert::TryFrom;
use deunicode::deunicode_with_tofu;
use meilidb_core::{DocumentId, DocIndex};
use meilidb_schema::SchemaAttr;
use meilidb_tokenizer::{is_cjk, Tokenizer, SeqTokenizer, Token};
use sdset::SetBuf;
type Word = Vec<u8>; // TODO make it be a SmallVec
pub struct Indexer {
word_limit: usize, // the maximum number of indexed words
words_doc_indexes: BTreeMap<Word, Vec<DocIndex>>,
docs_words: HashMap<DocumentId, Vec<Word>>,
}
pub struct Indexed {
pub words_doc_indexes: BTreeMap<Word, SetBuf<DocIndex>>,
pub docs_words: HashMap<DocumentId, fst::Set>,
}
impl Indexer {
pub fn new() -> Indexer {
Indexer::with_word_limit(1000)
}
pub fn with_word_limit(limit: usize) -> Indexer {
Indexer {
word_limit: limit,
words_doc_indexes: BTreeMap::new(),
docs_words: HashMap::new(),
}
}
pub fn index_text(&mut self, id: DocumentId, attr: SchemaAttr, text: &str) {
let lowercase_text = text.to_lowercase();
let deunicoded = deunicode_with_tofu(&lowercase_text, "");
// TODO compute the deunicoded version after the cjk check
let next = if !lowercase_text.contains(is_cjk) && lowercase_text != deunicoded {
Some(deunicoded)
} else {
None
};
let iter = Some(lowercase_text).into_iter().chain(next);
for text in iter {
for token in Tokenizer::new(&text) {
let must_continue = index_token(
token,
id,
attr,
self.word_limit,
&mut self.words_doc_indexes,
&mut self.docs_words,
);
if !must_continue { break }
}
}
}
pub fn index_text_seq<'a, I, IT>(&mut self, id: DocumentId, attr: SchemaAttr, iter: I)
where I: IntoIterator<Item=&'a str, IntoIter=IT>,
IT: Iterator<Item = &'a str> + Clone,
{
// TODO serialize this to one call to the SeqTokenizer loop
let lowercased: Vec<_> = iter.into_iter().map(str::to_lowercase).collect();
let iter = lowercased.iter().map(|t| t.as_str());
for token in SeqTokenizer::new(iter) {
let must_continue = index_token(
token,
id,
attr,
self.word_limit,
&mut self.words_doc_indexes,
&mut self.docs_words,
);
if !must_continue { break }
}
let deunicoded: Vec<_> = lowercased.into_iter().map(|lowercase_text| {
if lowercase_text.contains(is_cjk) { return lowercase_text }
let deunicoded = deunicode_with_tofu(&lowercase_text, "");
if lowercase_text != deunicoded { deunicoded } else { lowercase_text }
}).collect();
let iter = deunicoded.iter().map(|t| t.as_str());
for token in SeqTokenizer::new(iter) {
let must_continue = index_token(
token,
id,
attr,
self.word_limit,
&mut self.words_doc_indexes,
&mut self.docs_words,
);
if !must_continue { break }
}
}
pub fn build(self) -> Indexed {
let words_doc_indexes = self.words_doc_indexes
.into_iter()
.map(|(word, indexes)| (word, SetBuf::from_dirty(indexes)))
.collect();
let docs_words = self.docs_words
.into_iter()
.map(|(id, mut words)| {
words.sort_unstable();
words.dedup();
(id, fst::Set::from_iter(words).unwrap())
})
.collect();
Indexed { words_doc_indexes, docs_words }
}
}
fn index_token(
token: Token,
id: DocumentId,
attr: SchemaAttr,
word_limit: usize,
words_doc_indexes: &mut BTreeMap<Word, Vec<DocIndex>>,
docs_words: &mut HashMap<DocumentId, Vec<Word>>,
) -> bool
{
if token.word_index >= word_limit { return false }
match token_to_docindex(id, attr, token) {
Some(docindex) => {
let word = Vec::from(token.word);
words_doc_indexes.entry(word.clone()).or_insert_with(Vec::new).push(docindex);
docs_words.entry(id).or_insert_with(Vec::new).push(word);
},
None => return false,
}
true
}
fn token_to_docindex(id: DocumentId, attr: SchemaAttr, token: Token) -> Option<DocIndex> {
let word_index = u16::try_from(token.word_index).ok()?;
let char_index = u16::try_from(token.char_index).ok()?;
let char_length = u16::try_from(token.word.chars().count()).ok()?;
let docindex = DocIndex {
document_id: id,
attribute: attr.0,
word_index,
char_index,
char_length,
};
Some(docindex)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn strange_apostrophe() {
let mut indexer = Indexer::new();
let docid = DocumentId(0);
let attr = SchemaAttr(0);
let text = "Zut, laspirateur, jai oublié de léteindre !";
indexer.index_text(docid, attr, text);
let Indexed { words_doc_indexes, .. } = indexer.build();
assert!(words_doc_indexes.get(&b"l"[..]).is_some());
assert!(words_doc_indexes.get(&b"aspirateur"[..]).is_some());
assert!(words_doc_indexes.get(&b"ai"[..]).is_some());
assert!(words_doc_indexes.get(&b"eteindre"[..]).is_some());
// with the ugly apostrophe...
assert!(words_doc_indexes.get(&"léteindre".to_owned().into_bytes()).is_some());
}
#[test]
fn strange_apostrophe_in_sequence() {
let mut indexer = Indexer::new();
let docid = DocumentId(0);
let attr = SchemaAttr(0);
let text = vec!["Zut, laspirateur, jai oublié de léteindre !"];
indexer.index_text_seq(docid, attr, text);
let Indexed { words_doc_indexes, .. } = indexer.build();
assert!(words_doc_indexes.get(&b"l"[..]).is_some());
assert!(words_doc_indexes.get(&b"aspirateur"[..]).is_some());
assert!(words_doc_indexes.get(&b"ai"[..]).is_some());
assert!(words_doc_indexes.get(&b"eteindre"[..]).is_some());
// with the ugly apostrophe...
assert!(words_doc_indexes.get(&"léteindre".to_owned().into_bytes()).is_some());
}
}

View File

@ -1,15 +0,0 @@
mod cf_tree;
mod database;
mod document_attr_key;
mod indexer;
mod number;
mod ranked_map;
mod serde;
pub use self::cf_tree::{CfTree, CfIter};
pub use self::database::{Database, Index, CustomSettingsIndex};
pub use self::number::Number;
pub use self::ranked_map::RankedMap;
pub use self::serde::{compute_document_id, extract_document_id, value_to_string};
pub type RocksDbResult<T> = Result<T, rocksdb::Error>;

View File

@ -1,132 +0,0 @@
use std::collections::HashSet;
use std::io::Cursor;
use std::{fmt, error::Error};
use meilidb_core::DocumentId;
use meilidb_schema::SchemaAttr;
use rmp_serde::decode::{Deserializer as RmpDeserializer, ReadReader};
use rmp_serde::decode::{Error as RmpError};
use serde::{de, forward_to_deserialize_any};
use crate::database::Index;
#[derive(Debug)]
pub enum DeserializerError {
RmpError(RmpError),
RocksDbError(rocksdb::Error),
Custom(String),
}
impl de::Error for DeserializerError {
fn custom<T: fmt::Display>(msg: T) -> Self {
DeserializerError::Custom(msg.to_string())
}
}
impl fmt::Display for DeserializerError {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match self {
DeserializerError::RmpError(e) => write!(f, "rmp serde related error: {}", e),
DeserializerError::RocksDbError(e) => write!(f, "RocksDB related error: {}", e),
DeserializerError::Custom(s) => f.write_str(s),
}
}
}
impl Error for DeserializerError {}
impl From<RmpError> for DeserializerError {
fn from(error: RmpError) -> DeserializerError {
DeserializerError::RmpError(error)
}
}
impl From<rocksdb::Error> for DeserializerError {
fn from(error: rocksdb::Error) -> DeserializerError {
DeserializerError::RocksDbError(error)
}
}
pub struct Deserializer<'a> {
pub document_id: DocumentId,
pub index: &'a Index,
pub fields: Option<&'a HashSet<SchemaAttr>>,
}
impl<'de, 'a, 'b> de::Deserializer<'de> for &'b mut Deserializer<'a>
{
type Error = DeserializerError;
fn deserialize_any<V>(self, visitor: V) -> Result<V::Value, Self::Error>
where V: de::Visitor<'de>
{
self.deserialize_map(visitor)
}
forward_to_deserialize_any! {
bool i8 i16 i32 i64 i128 u8 u16 u32 u64 u128 f32 f64 char str string
bytes byte_buf option unit unit_struct newtype_struct seq tuple
tuple_struct struct enum identifier ignored_any
}
fn deserialize_map<V>(self, visitor: V) -> Result<V::Value, Self::Error>
where V: de::Visitor<'de>
{
let schema = self.index.schema();
let documents = self.index.as_ref().documents_index;
let iter = documents
.document_fields(self.document_id)?
.filter_map(|(attr, value)| {
let is_displayed = schema.props(attr).is_displayed();
if is_displayed && self.fields.map_or(true, |f| f.contains(&attr)) {
let attribute_name = schema.attribute_name(attr);
Some((attribute_name, Value::new(value)))
} else {
None
}
});
let map_deserializer = de::value::MapDeserializer::new(iter);
let result = visitor.visit_map(map_deserializer).map_err(DeserializerError::from);
result
}
}
struct Value<A>(RmpDeserializer<ReadReader<Cursor<A>>>) where A: AsRef<[u8]>;
impl<A> Value<A> where A: AsRef<[u8]>
{
fn new(value: A) -> Value<A> {
Value(RmpDeserializer::new(Cursor::new(value)))
}
}
impl<'de, A> de::IntoDeserializer<'de, RmpError> for Value<A>
where A: AsRef<[u8]>,
{
type Deserializer = Self;
fn into_deserializer(self) -> Self::Deserializer {
self
}
}
impl<'de, 'a, A> de::Deserializer<'de> for Value<A>
where A: AsRef<[u8]>,
{
type Error = RmpError;
fn deserialize_any<V>(mut self, visitor: V) -> Result<V::Value, Self::Error>
where V: de::Visitor<'de>
{
self.0.deserialize_any(visitor)
}
forward_to_deserialize_any! {
bool i8 i16 i32 i64 i128 u8 u16 u32 u64 u128 f32 f64 char str string
bytes byte_buf option unit unit_struct newtype_struct seq tuple
tuple_struct map struct enum identifier ignored_any
}
}

View File

@ -1,96 +0,0 @@
use std::sync::atomic::{AtomicBool, Ordering::Relaxed};
use std::sync::Arc;
use serde_json::json;
use meilidb_data::Database;
use meilidb_schema::{Schema, SchemaBuilder, DISPLAYED, INDEXED};
fn simple_schema() -> Schema {
let mut builder = SchemaBuilder::with_identifier("objectId");
builder.new_attribute("objectId", DISPLAYED | INDEXED);
builder.new_attribute("title", DISPLAYED | INDEXED);
builder.build()
}
#[test]
fn insert_delete_document() {
let tmp_dir = tempfile::tempdir().unwrap();
let database = Database::open(&tmp_dir).unwrap();
let as_been_updated = Arc::new(AtomicBool::new(false));
let schema = simple_schema();
let index = database.create_index("hello", schema).unwrap();
let as_been_updated_clone = as_been_updated.clone();
index.set_update_callback(move |_| as_been_updated_clone.store(true, Relaxed));
let doc1 = json!({ "objectId": 123, "title": "hello" });
let mut addition = index.documents_addition();
addition.update_document(&doc1);
let update_id = addition.finalize().unwrap();
let status = index.update_status_blocking(update_id).unwrap();
assert!(as_been_updated.swap(false, Relaxed));
assert!(status.result.is_ok());
assert_eq!(index.number_of_documents(), 1);
let docs = index.query_builder().query("hello", 0..10).unwrap();
assert_eq!(docs.len(), 1);
assert_eq!(index.document(None, docs[0].id).unwrap().as_ref(), Some(&doc1));
let mut deletion = index.documents_deletion();
deletion.delete_document(&doc1).unwrap();
let update_id = deletion.finalize().unwrap();
let status = index.update_status_blocking(update_id).unwrap();
assert!(as_been_updated.swap(false, Relaxed));
assert!(status.result.is_ok());
assert_eq!(index.number_of_documents(), 0);
let docs = index.query_builder().query("hello", 0..10).unwrap();
assert_eq!(docs.len(), 0);
}
#[test]
fn replace_document() {
let tmp_dir = tempfile::tempdir().unwrap();
let database = Database::open(&tmp_dir).unwrap();
let as_been_updated = Arc::new(AtomicBool::new(false));
let schema = simple_schema();
let index = database.create_index("hello", schema).unwrap();
let as_been_updated_clone = as_been_updated.clone();
index.set_update_callback(move |_| as_been_updated_clone.store(true, Relaxed));
let doc1 = json!({ "objectId": 123, "title": "hello" });
let doc2 = json!({ "objectId": 123, "title": "coucou" });
let mut addition = index.documents_addition();
addition.update_document(&doc1);
let update_id = addition.finalize().unwrap();
let status = index.update_status_blocking(update_id).unwrap();
assert!(as_been_updated.swap(false, Relaxed));
assert!(status.result.is_ok());
assert_eq!(index.number_of_documents(), 1);
let docs = index.query_builder().query("hello", 0..10).unwrap();
assert_eq!(docs.len(), 1);
assert_eq!(index.document(None, docs[0].id).unwrap().as_ref(), Some(&doc1));
let mut addition = index.documents_addition();
addition.update_document(&doc2);
let update_id = addition.finalize().unwrap();
let status = index.update_status_blocking(update_id).unwrap();
assert!(as_been_updated.swap(false, Relaxed));
assert!(status.result.is_ok());
assert_eq!(index.number_of_documents(), 1);
let docs = index.query_builder().query("hello", 0..10).unwrap();
assert_eq!(docs.len(), 0);
let docs = index.query_builder().query("coucou", 0..10).unwrap();
assert_eq!(docs.len(), 1);
assert_eq!(index.document(None, docs[0].id).unwrap().as_ref(), Some(&doc2));
}

56
meilidb-http/Cargo.toml Normal file
View File

@ -0,0 +1,56 @@
[package]
name = "meilidb-http"
version = "0.2.0"
authors = [
"Quentin de Quelen <quentin@dequelen.me>",
"Clément Renault <clement@meilisearch.com>",
]
edition = "2018"
[dependencies]
bincode = "1.2.0"
chrono = { version = "0.4.9", features = ["serde"] }
crossbeam-channel = "0.4.0"
env_logger = "0.7.1"
envconfig = "0.5.1"
envconfig_derive = "0.5.1"
heed = "0.5.0"
http = "0.1.19"
indexmap = { version = "1.3.0", features = ["serde-1"] }
jemallocator = "0.3.2"
log = "0.4.8"
main_error = "0.1.0"
meilidb-core = { path = "../meilidb-core", version = "0.7.0" }
meilidb-schema = { path = "../meilidb-schema", version = "0.6.0" }
pretty-bytes = "0.2.2"
rand = "0.7.2"
rayon = "1.2.0"
serde = { version = "1.0.101", features = ["derive"] }
serde_json = { version = "1.0.41", features = ["preserve_order"] }
structopt = "0.3.3"
sysinfo = "0.9.5"
walkdir = "2.2.9"
[dependencies.async-compression]
default-features = false
features = ["stream", "gzip", "zlib", "brotli", "zstd"]
version = "=0.1.0-alpha.7"
[dependencies.tide]
git = "https://github.com/rustasync/tide"
rev = "e77709370bb24cf776fe6da902467c35131535b1"
[dependencies.tide-log]
git = "https://github.com/rustasync/tide"
rev = "e77709370bb24cf776fe6da902467c35131535b1"
[dependencies.tide-slog]
git = "https://github.com/rustasync/tide"
rev = "e77709370bb24cf776fe6da902467c35131535b1"
[dependencies.tide-compression]
git = "https://github.com/rustasync/tide"
rev = "e77709370bb24cf776fe6da902467c35131535b1"
[build-dependencies]
vergen = "3.0.4"

10
meilidb-http/build.rs Normal file
View File

@ -0,0 +1,10 @@
use vergen::{generate_cargo_keys, ConstantsFlags};
fn main() {
// Setup the flags, toggling off the 'SEMVER_FROM_CARGO_PKG' flag
let mut flags = ConstantsFlags::all();
flags.toggle(ConstantsFlags::SEMVER_FROM_CARGO_PKG);
// Generate the 'cargo:' key output
generate_cargo_keys(ConstantsFlags::all()).expect("Unable to generate the cargo keys!");
}

153
meilidb-http/src/data.rs Normal file
View File

@ -0,0 +1,153 @@
use std::collections::HashMap;
use std::ops::Deref;
use std::sync::Arc;
use chrono::{DateTime, Utc};
use heed::types::{SerdeBincode, Str};
use log::*;
use meilidb_core::{Database, MResult};
use sysinfo::Pid;
use crate::option::Opt;
use crate::routes::index::index_update_callback;
pub type FreqsMap = HashMap<String, usize>;
type SerdeFreqsMap = SerdeBincode<FreqsMap>;
type SerdeDatetime = SerdeBincode<DateTime<Utc>>;
#[derive(Clone)]
pub struct Data {
inner: Arc<DataInner>,
}
impl Deref for Data {
type Target = DataInner;
fn deref(&self) -> &Self::Target {
&self.inner
}
}
#[derive(Clone)]
pub struct DataInner {
pub db: Arc<Database>,
pub db_path: String,
pub admin_token: Option<String>,
pub server_pid: Pid,
}
impl DataInner {
pub fn is_indexing(&self, reader: &heed::RoTxn, index: &str) -> MResult<Option<bool>> {
match self.db.open_index(&index) {
Some(index) => index.current_update_id(&reader).map(|u| Some(u.is_some())),
None => Ok(None),
}
}
pub fn last_update(
&self,
reader: &heed::RoTxn,
index_name: &str,
) -> MResult<Option<DateTime<Utc>>> {
let key = format!("last-update-{}", index_name);
match self
.db
.common_store()
.get::<Str, SerdeDatetime>(&reader, &key)?
{
Some(datetime) => Ok(Some(datetime)),
None => Ok(None),
}
}
pub fn set_last_update(&self, writer: &mut heed::RwTxn, index_name: &str) -> MResult<()> {
let key = format!("last-update-{}", index_name);
self.db
.common_store()
.put::<Str, SerdeDatetime>(writer, &key, &Utc::now())
.map_err(Into::into)
}
pub fn fields_frequency(
&self,
reader: &heed::RoTxn,
index_name: &str,
) -> MResult<Option<FreqsMap>> {
let key = format!("fields-frequency-{}", index_name);
match self
.db
.common_store()
.get::<Str, SerdeFreqsMap>(&reader, &key)?
{
Some(freqs) => Ok(Some(freqs)),
None => Ok(None),
}
}
pub fn compute_stats(&self, writer: &mut heed::RwTxn, index_name: &str) -> MResult<()> {
let index = match self.db.open_index(&index_name) {
Some(index) => index,
None => {
error!("Impossible to retrieve index {}", index_name);
return Ok(());
}
};
let schema = match index.main.schema(&writer)? {
Some(schema) => schema,
None => return Ok(()),
};
let all_documents_fields = index
.documents_fields_counts
.all_documents_fields_counts(&writer)?;
// count fields frequencies
let mut fields_frequency = HashMap::<_, usize>::new();
for result in all_documents_fields {
let (_, attr, _) = result?;
*fields_frequency.entry(attr).or_default() += 1;
}
// convert attributes to their names
let frequency: HashMap<_, _> = fields_frequency
.into_iter()
.map(|(a, c)| (schema.attribute_name(a).to_owned(), c))
.collect();
let key = format!("fields-frequency-{}", index_name);
self.db
.common_store()
.put::<Str, SerdeFreqsMap>(writer, &key, &frequency)?;
Ok(())
}
}
impl Data {
pub fn new(opt: Opt) -> Data {
let db_path = opt.database_path.clone();
let admin_token = opt.admin_token.clone();
let server_pid = sysinfo::get_current_pid().unwrap();
let db = Arc::new(Database::open_or_create(opt.database_path.clone()).unwrap());
let inner_data = DataInner {
db: db.clone(),
db_path,
admin_token,
server_pid,
};
let data = Data {
inner: Arc::new(inner_data),
};
let callback_context = data.clone();
db.set_update_callback(Box::new(move |index_name, status| {
index_update_callback(&index_name, &callback_context, status);
}));
data
}
}

126
meilidb-http/src/error.rs Normal file
View File

@ -0,0 +1,126 @@
use std::fmt::Display;
use http::status::StatusCode;
use log::{error, warn};
use serde::{Deserialize, Serialize};
use tide::response::IntoResponse;
use tide::Response;
pub type SResult<T> = Result<T, ResponseError>;
pub enum ResponseError {
Internal(String),
BadRequest(String),
InvalidToken(String),
NotFound(String),
IndexNotFound(String),
DocumentNotFound(String),
MissingHeader(String),
BadParameter(String, String),
OpenIndex(String),
CreateIndex(String),
Maintenance,
}
impl ResponseError {
pub fn internal(message: impl Display) -> ResponseError {
ResponseError::Internal(message.to_string())
}
pub fn bad_request(message: impl Display) -> ResponseError {
ResponseError::BadRequest(message.to_string())
}
pub fn invalid_token(message: impl Display) -> ResponseError {
ResponseError::InvalidToken(message.to_string())
}
pub fn not_found(message: impl Display) -> ResponseError {
ResponseError::NotFound(message.to_string())
}
pub fn index_not_found(message: impl Display) -> ResponseError {
ResponseError::IndexNotFound(message.to_string())
}
pub fn document_not_found(message: impl Display) -> ResponseError {
ResponseError::DocumentNotFound(message.to_string())
}
pub fn missing_header(message: impl Display) -> ResponseError {
ResponseError::MissingHeader(message.to_string())
}
pub fn bad_parameter(name: impl Display, message: impl Display) -> ResponseError {
ResponseError::BadParameter(name.to_string(), message.to_string())
}
pub fn open_index(message: impl Display) -> ResponseError {
ResponseError::OpenIndex(message.to_string())
}
pub fn create_index(message: impl Display) -> ResponseError {
ResponseError::CreateIndex(message.to_string())
}
}
impl IntoResponse for ResponseError {
fn into_response(self) -> Response {
match self {
ResponseError::Internal(err) => {
error!("internal server error: {}", err);
error(
String::from("Internal server error"),
StatusCode::INTERNAL_SERVER_ERROR,
)
}
ResponseError::BadRequest(err) => {
warn!("bad request: {}", err);
error(err, StatusCode::BAD_REQUEST)
}
ResponseError::InvalidToken(err) => {
error(format!("Invalid Token: {}", err), StatusCode::FORBIDDEN)
}
ResponseError::NotFound(err) => error(err, StatusCode::NOT_FOUND),
ResponseError::IndexNotFound(index) => {
error(format!("Index {} not found", index), StatusCode::NOT_FOUND)
}
ResponseError::DocumentNotFound(id) => error(
format!("Document with id {} not found", id),
StatusCode::NOT_FOUND,
),
ResponseError::MissingHeader(header) => error(
format!("Header {} is missing", header),
StatusCode::UNAUTHORIZED,
),
ResponseError::BadParameter(param, e) => error(
format!("Url parameter {} error: {}", param, e),
StatusCode::BAD_REQUEST,
),
ResponseError::CreateIndex(err) => error(
format!("Impossible to create index; {}", err),
StatusCode::BAD_REQUEST,
),
ResponseError::OpenIndex(err) => error(
format!("Impossible to open index; {}", err),
StatusCode::BAD_REQUEST,
),
ResponseError::Maintenance => error(
String::from("Server is in maintenance, please try again later"),
StatusCode::SERVICE_UNAVAILABLE,
),
}
}
}
#[derive(Serialize, Deserialize)]
struct ErrorMessage {
message: String,
}
fn error(message: String, status: StatusCode) -> Response {
let message = ErrorMessage { message };
tide::response::json(message)
.with_status(status)
.into_response()
}

View File

@ -0,0 +1,570 @@
use crate::routes::setting::{RankingOrdering, SettingBody};
use indexmap::IndexMap;
use log::*;
use meilidb_core::criterion::*;
use meilidb_core::Highlight;
use meilidb_core::{Index, RankedMap};
use meilidb_schema::{Schema, SchemaAttr};
use serde::{Deserialize, Serialize};
use serde_json::Value;
use std::cmp::Ordering;
use std::collections::{HashMap, HashSet};
use std::convert::From;
use std::error;
use std::fmt;
use std::time::{Duration, Instant};
#[derive(Debug)]
pub enum Error {
SearchDocuments(String),
RetrieveDocument(u64, String),
DocumentNotFound(u64),
CropFieldWrongType(String),
AttributeNotFoundOnDocument(String),
AttributeNotFoundOnSchema(String),
MissingFilterValue,
UnknownFilteredAttribute,
Internal(String),
}
impl error::Error for Error {}
impl fmt::Display for Error {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
use Error::*;
match self {
SearchDocuments(err) => write!(f, "impossible to search documents; {}", err),
RetrieveDocument(id, err) => write!(
f,
"impossible to retrieve the document with id: {}; {}",
id, err
),
DocumentNotFound(id) => write!(f, "document {} not found", id),
CropFieldWrongType(field) => {
write!(f, "the field {} cannot be cropped it's not a string", field)
}
AttributeNotFoundOnDocument(field) => {
write!(f, "field {} is not found on document", field)
}
AttributeNotFoundOnSchema(field) => write!(f, "field {} is not found on schema", field),
MissingFilterValue => f.write_str("a filter doesn't have a value to compare it with"),
UnknownFilteredAttribute => {
f.write_str("a filter is specifying an unknown schema attribute")
}
Internal(err) => write!(f, "internal error; {}", err),
}
}
}
impl From<meilidb_core::Error> for Error {
fn from(error: meilidb_core::Error) -> Self {
Error::Internal(error.to_string())
}
}
pub trait IndexSearchExt {
fn new_search(&self, query: String) -> SearchBuilder;
}
impl IndexSearchExt for Index {
fn new_search(&self, query: String) -> SearchBuilder {
SearchBuilder {
index: self,
query,
offset: 0,
limit: 20,
attributes_to_crop: None,
attributes_to_retrieve: None,
attributes_to_search_in: None,
attributes_to_highlight: None,
filters: None,
timeout: Duration::from_millis(30),
matches: false,
}
}
}
pub struct SearchBuilder<'a> {
index: &'a Index,
query: String,
offset: usize,
limit: usize,
attributes_to_crop: Option<HashMap<String, usize>>,
attributes_to_retrieve: Option<HashSet<String>>,
attributes_to_search_in: Option<HashSet<String>>,
attributes_to_highlight: Option<HashSet<String>>,
filters: Option<String>,
timeout: Duration,
matches: bool,
}
impl<'a> SearchBuilder<'a> {
pub fn offset(&mut self, value: usize) -> &SearchBuilder {
self.offset = value;
self
}
pub fn limit(&mut self, value: usize) -> &SearchBuilder {
self.limit = value;
self
}
pub fn attributes_to_crop(&mut self, value: HashMap<String, usize>) -> &SearchBuilder {
self.attributes_to_crop = Some(value);
self
}
pub fn attributes_to_retrieve(&mut self, value: HashSet<String>) -> &SearchBuilder {
self.attributes_to_retrieve = Some(value);
self
}
pub fn add_retrievable_field(&mut self, value: String) -> &SearchBuilder {
let attributes_to_retrieve = self.attributes_to_retrieve.get_or_insert(HashSet::new());
attributes_to_retrieve.insert(value);
self
}
pub fn attributes_to_search_in(&mut self, value: HashSet<String>) -> &SearchBuilder {
self.attributes_to_search_in = Some(value);
self
}
pub fn add_attribute_to_search_in(&mut self, value: String) -> &SearchBuilder {
let attributes_to_search_in = self.attributes_to_search_in.get_or_insert(HashSet::new());
attributes_to_search_in.insert(value);
self
}
pub fn attributes_to_highlight(&mut self, value: HashSet<String>) -> &SearchBuilder {
self.attributes_to_highlight = Some(value);
self
}
pub fn filters(&mut self, value: String) -> &SearchBuilder {
self.filters = Some(value);
self
}
pub fn timeout(&mut self, value: Duration) -> &SearchBuilder {
self.timeout = value;
self
}
pub fn get_matches(&mut self) -> &SearchBuilder {
self.matches = true;
self
}
pub fn search(&self, reader: &heed::RoTxn) -> Result<SearchResult, Error> {
let schema = self.index.main.schema(reader);
let schema = schema.map_err(|e| Error::Internal(e.to_string()))?;
let schema = match schema {
Some(schema) => schema,
None => return Err(Error::Internal(String::from("missing schema"))),
};
let ranked_map = self.index.main.ranked_map(reader);
let ranked_map = ranked_map.map_err(|e| Error::Internal(e.to_string()))?;
let ranked_map = ranked_map.unwrap_or_default();
let start = Instant::now();
// Change criteria
let mut query_builder = match self.get_criteria(reader, &ranked_map, &schema)? {
Some(criteria) => self.index.query_builder_with_criteria(criteria),
None => self.index.query_builder(),
};
// Filter searchable fields
if let Some(fields) = &self.attributes_to_search_in {
for attribute in fields.iter().filter_map(|f| schema.attribute(f)) {
query_builder.add_searchable_attribute(attribute.0);
}
}
if let Some(filters) = &self.filters {
let mut split = filters.split(':');
match (split.next(), split.next()) {
(Some(_), None) | (Some(_), Some("")) => return Err(Error::MissingFilterValue),
(Some(attr), Some(value)) => {
let ref_reader = reader;
let ref_index = &self.index;
let value = value.trim().to_lowercase();
let attr = match schema.attribute(attr) {
Some(attr) => attr,
None => return Err(Error::UnknownFilteredAttribute),
};
query_builder.with_filter(move |id| {
let attr = attr;
let index = ref_index;
let reader = ref_reader;
match index.document_attribute::<Value>(reader, id, attr) {
Ok(Some(Value::String(s))) => s.to_lowercase() == value,
Ok(Some(Value::Bool(b))) => {
(value == "true" && b) || (value == "false" && !b)
}
Ok(Some(Value::Array(a))) => {
a.into_iter().any(|s| s.as_str() == Some(&value))
}
_ => false,
}
});
}
(_, _) => (),
}
}
query_builder.with_fetch_timeout(self.timeout);
let docs =
query_builder.query(reader, &self.query, self.offset..(self.offset + self.limit));
let mut hits = Vec::with_capacity(self.limit);
for doc in docs.map_err(|e| Error::SearchDocuments(e.to_string()))? {
// retrieve the content of document in kv store
let mut fields: Option<HashSet<&str>> = None;
if let Some(attributes_to_retrieve) = &self.attributes_to_retrieve {
let mut set = HashSet::new();
for field in attributes_to_retrieve {
set.insert(field.as_str());
}
fields = Some(set);
}
let document: IndexMap<String, Value> = self
.index
.document(reader, fields.as_ref(), doc.id)
.map_err(|e| Error::RetrieveDocument(doc.id.0, e.to_string()))?
.ok_or(Error::DocumentNotFound(doc.id.0))?;
let mut formatted = document.clone();
let mut matches = doc.highlights.clone();
// Crops fields if needed
if let Some(fields) = &self.attributes_to_crop {
crop_document(&mut formatted, &mut matches, &schema, fields);
}
// Transform to readable matches
let matches = calculate_matches(matches, self.attributes_to_retrieve.clone(), &schema);
if !self.matches {
if let Some(attributes_to_highlight) = &self.attributes_to_highlight {
formatted = calculate_highlights(&formatted, &matches, attributes_to_highlight);
}
}
let matches_info = if self.matches { Some(matches) } else { None };
let hit = SearchHit {
document,
formatted,
matches_info,
};
hits.push(hit);
}
let time_ms = start.elapsed().as_millis() as usize;
let results = SearchResult {
hits,
offset: self.offset,
limit: self.limit,
processing_time_ms: time_ms,
query: self.query.to_string(),
};
Ok(results)
}
pub fn get_criteria(
&self,
reader: &heed::RoTxn,
ranked_map: &'a RankedMap,
schema: &Schema,
) -> Result<Option<Criteria<'a>>, Error> {
let current_settings = match self.index.main.customs(reader).unwrap() {
Some(bytes) => bincode::deserialize(bytes).unwrap(),
None => SettingBody::default(),
};
let ranking_rules = &current_settings.ranking_rules;
let ranking_order = &current_settings.ranking_order;
if let Some(ranking_rules) = ranking_rules {
let mut builder = CriteriaBuilder::with_capacity(7 + ranking_rules.len());
if let Some(ranking_rules_order) = ranking_order {
for rule in ranking_rules_order {
match rule.as_str() {
"_sum_of_typos" => builder.push(SumOfTypos),
"_number_of_words" => builder.push(NumberOfWords),
"_word_proximity" => builder.push(WordsProximity),
"_sum_of_words_attribute" => builder.push(SumOfWordsAttribute),
"_sum_of_words_position" => builder.push(SumOfWordsPosition),
"_exact" => builder.push(Exact),
_ => {
let order = match ranking_rules.get(rule.as_str()) {
Some(o) => o,
None => continue,
};
let custom_ranking = match order {
RankingOrdering::Asc => {
SortByAttr::lower_is_better(&ranked_map, &schema, &rule)
.unwrap()
}
RankingOrdering::Dsc => {
SortByAttr::higher_is_better(&ranked_map, &schema, &rule)
.unwrap()
}
};
builder.push(custom_ranking);
}
}
}
builder.push(DocumentId);
return Ok(Some(builder.build()));
} else {
builder.push(SumOfTypos);
builder.push(NumberOfWords);
builder.push(WordsProximity);
builder.push(SumOfWordsAttribute);
builder.push(SumOfWordsPosition);
builder.push(Exact);
for (rule, order) in ranking_rules.iter() {
let custom_ranking = match order {
RankingOrdering::Asc => {
SortByAttr::lower_is_better(&ranked_map, &schema, &rule).unwrap()
}
RankingOrdering::Dsc => {
SortByAttr::higher_is_better(&ranked_map, &schema, &rule).unwrap()
}
};
builder.push(custom_ranking);
}
builder.push(DocumentId);
return Ok(Some(builder.build()));
}
}
Ok(None)
}
}
#[derive(Debug, Clone, Eq, PartialEq, PartialOrd, Serialize, Deserialize)]
pub struct MatchPosition {
pub start: usize,
pub length: usize,
}
impl Ord for MatchPosition {
fn cmp(&self, other: &Self) -> Ordering {
match self.start.cmp(&other.start) {
Ordering::Equal => self.length.cmp(&other.length),
_ => self.start.cmp(&other.start),
}
}
}
pub type HighlightInfos = HashMap<String, Value>;
pub type MatchesInfos = HashMap<String, Vec<MatchPosition>>;
// pub type RankingInfos = HashMap<String, u64>;
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct SearchHit {
#[serde(flatten)]
pub document: IndexMap<String, Value>,
#[serde(rename = "_formatted", skip_serializing_if = "IndexMap::is_empty")]
pub formatted: IndexMap<String, Value>,
#[serde(rename = "_matchesInfo", skip_serializing_if = "Option::is_none")]
pub matches_info: Option<MatchesInfos>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct SearchResult {
pub hits: Vec<SearchHit>,
pub offset: usize,
pub limit: usize,
pub processing_time_ms: usize,
pub query: String,
// pub parsed_query: String,
// pub params: Option<String>,
}
fn crop_text(
text: &str,
matches: impl IntoIterator<Item = Highlight>,
context: usize,
) -> (String, Vec<Highlight>) {
let mut matches = matches.into_iter().peekable();
let char_index = matches.peek().map(|m| m.char_index as usize).unwrap_or(0);
let start = char_index.saturating_sub(context);
let text = text.chars().skip(start).take(context * 2).collect();
let matches = matches
.take_while(|m| (m.char_index as usize) + (m.char_length as usize) <= start + (context * 2))
.map(|match_| Highlight {
char_index: match_.char_index - start as u16,
..match_
})
.collect();
(text, matches)
}
fn crop_document(
document: &mut IndexMap<String, Value>,
matches: &mut Vec<Highlight>,
schema: &Schema,
fields: &HashMap<String, usize>,
) {
matches.sort_unstable_by_key(|m| (m.char_index, m.char_length));
for (field, length) in fields {
let attribute = match schema.attribute(field) {
Some(attribute) => attribute,
None => continue,
};
let selected_matches = matches
.iter()
.filter(|m| SchemaAttr::new(m.attribute) == attribute)
.cloned();
if let Some(Value::String(ref mut original_text)) = document.get_mut(field) {
let (cropped_text, cropped_matches) =
crop_text(original_text, selected_matches, *length);
*original_text = cropped_text;
matches.retain(|m| SchemaAttr::new(m.attribute) != attribute);
matches.extend_from_slice(&cropped_matches);
}
}
}
fn calculate_matches(
matches: Vec<Highlight>,
attributes_to_retrieve: Option<HashSet<String>>,
schema: &Schema,
) -> MatchesInfos {
let mut matches_result: HashMap<String, Vec<MatchPosition>> = HashMap::new();
for m in matches.iter() {
let attribute = schema
.attribute_name(SchemaAttr::new(m.attribute))
.to_string();
if let Some(attributes_to_retrieve) = attributes_to_retrieve.clone() {
if !attributes_to_retrieve.contains(attribute.as_str()) {
continue;
}
};
if let Some(pos) = matches_result.get_mut(&attribute) {
pos.push(MatchPosition {
start: m.char_index as usize,
length: m.char_length as usize,
});
} else {
let mut positions = Vec::new();
positions.push(MatchPosition {
start: m.char_index as usize,
length: m.char_length as usize,
});
matches_result.insert(attribute, positions);
}
}
for (_, val) in matches_result.iter_mut() {
val.sort_unstable();
val.dedup();
}
matches_result
}
fn calculate_highlights(
document: &IndexMap<String, Value>,
matches: &MatchesInfos,
attributes_to_highlight: &HashSet<String>,
) -> IndexMap<String, Value> {
let mut highlight_result = IndexMap::new();
for (attribute, matches) in matches.iter() {
if attributes_to_highlight.contains(attribute) {
if let Some(Value::String(value)) = document.get(attribute) {
let value: Vec<_> = value.chars().collect();
let mut highlighted_value = String::new();
let mut index = 0;
for m in matches {
if m.start >= index {
let before = value.get(index..m.start);
let highlighted = value.get(m.start..(m.start + m.length));
if let (Some(before), Some(highlighted)) = (before, highlighted) {
highlighted_value.extend(before);
highlighted_value.push_str("<em>");
highlighted_value.extend(highlighted);
highlighted_value.push_str("</em>");
index = m.start + m.length;
} else {
error!("value: {:?}; index: {:?}, match: {:?}", value, index, m);
}
}
}
highlighted_value.extend(value[index..].iter());
highlight_result.insert(attribute.to_string(), Value::String(highlighted_value));
};
}
}
highlight_result
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn calculate_highlights() {
let data = r#"{
"title": "Fondation (Isaac ASIMOV)",
"description": "En ce début de trentième millénaire, l'Empire n'a jamais été aussi puissant, aussi étendu à travers toute la galaxie. C'est dans sa capitale, Trantor, que l'éminent savant Hari Seldon invente la psychohistoire, une science toute nouvelle, à base de psychologie et de mathématiques, qui lui permet de prédire l'avenir... C'est-à-dire l'effondrement de l'Empire d'ici cinq siècles et au-delà, trente mille années de chaos et de ténèbres. Pour empêcher cette catastrophe et sauver la civilisation, Seldon crée la Fondation."
}"#;
let document: IndexMap<String, Value> = serde_json::from_str(data).unwrap();
let mut attributes_to_highlight = HashSet::new();
attributes_to_highlight.insert("title".to_string());
attributes_to_highlight.insert("description".to_string());
let mut matches = HashMap::new();
let mut m = Vec::new();
m.push(MatchPosition {
start: 0,
length: 9,
});
matches.insert("title".to_string(), m);
let mut m = Vec::new();
m.push(MatchPosition {
start: 510,
length: 9,
});
matches.insert("description".to_string(), m);
let result = super::calculate_highlights(&document, &matches, &attributes_to_highlight);
let mut result_expected = IndexMap::new();
result_expected.insert(
"title".to_string(),
Value::String("<em>Fondation</em> (Isaac ASIMOV)".to_string()),
);
result_expected.insert("description".to_string(), Value::String("En ce début de trentième millénaire, l'Empire n'a jamais été aussi puissant, aussi étendu à travers toute la galaxie. C'est dans sa capitale, Trantor, que l'éminent savant Hari Seldon invente la psychohistoire, une science toute nouvelle, à base de psychologie et de mathématiques, qui lui permet de prédire l'avenir... C'est-à-dire l'effondrement de l'Empire d'ici cinq siècles et au-delà, trente mille années de chaos et de ténèbres. Pour empêcher cette catastrophe et sauver la civilisation, Seldon crée la <em>Fondation</em>.".to_string()));
assert_eq!(result, result_expected);
}
}

View File

@ -0,0 +1,2 @@
pub mod meilidb;
pub mod tide;

View File

@ -0,0 +1,118 @@
use crate::error::{ResponseError, SResult};
use crate::models::token::*;
use crate::Data;
use chrono::Utc;
use heed::types::{SerdeBincode, Str};
use meilidb_core::Index;
use serde_json::Value;
use tide::Context;
pub trait ContextExt {
fn is_allowed(&self, acl: ACL) -> SResult<()>;
fn header(&self, name: &str) -> Result<String, ResponseError>;
fn url_param(&self, name: &str) -> Result<String, ResponseError>;
fn index(&self) -> Result<Index, ResponseError>;
fn identifier(&self) -> Result<String, ResponseError>;
}
impl ContextExt for Context<Data> {
fn is_allowed(&self, acl: ACL) -> SResult<()> {
let admin_token = match &self.state().admin_token {
Some(admin_token) => admin_token,
None => return Ok(()),
};
let user_api_key = self.header("X-Meili-API-Key")?;
if user_api_key == *admin_token {
return Ok(());
}
let request_index: Option<String> = None; //self.param::<String>("index").ok();
let db = &self.state().db;
let env = &db.env;
let reader = env.read_txn().map_err(ResponseError::internal)?;
let token_key = format!("{}{}", TOKEN_PREFIX_KEY, user_api_key);
let token_config = db
.common_store()
.get::<Str, SerdeBincode<Token>>(&reader, &token_key)
.map_err(ResponseError::internal)?
.ok_or(ResponseError::not_found(format!(
"token key: {}",
token_key
)))?;
if token_config.revoked {
return Err(ResponseError::invalid_token("token revoked"));
}
if let Some(index) = request_index {
if !token_config
.indexes
.iter()
.any(|r| match_wildcard(&r, &index))
{
return Err(ResponseError::invalid_token(
"token is not allowed to access to this index",
));
}
}
if token_config.expires_at < Utc::now() {
return Err(ResponseError::invalid_token("token expired"));
}
if token_config.acl.contains(&ACL::All) {
return Ok(());
}
if !token_config.acl.contains(&acl) {
return Err(ResponseError::invalid_token("token do not have this ACL"));
}
Ok(())
}
fn header(&self, name: &str) -> Result<String, ResponseError> {
let header = self
.headers()
.get(name)
.ok_or(ResponseError::missing_header(name))?
.to_str()
.map_err(|_| ResponseError::missing_header("X-Meili-API-Key"))?
.to_string();
Ok(header)
}
fn url_param(&self, name: &str) -> Result<String, ResponseError> {
let param = self
.param::<String>(name)
.map_err(|e| ResponseError::bad_parameter(name, e))?;
Ok(param)
}
fn index(&self) -> Result<Index, ResponseError> {
let index_name = self.url_param("index")?;
let index = self
.state()
.db
.open_index(&index_name)
.ok_or(ResponseError::index_not_found(index_name))?;
Ok(index)
}
fn identifier(&self) -> Result<String, ResponseError> {
let name = self
.param::<Value>("identifier")
.as_ref()
.map(meilidb_core::serde::value_to_string)
.map_err(|e| ResponseError::bad_parameter("identifier", e))?
.ok_or(ResponseError::bad_parameter(
"identifier",
"missing parameter",
))?;
Ok(name)
}
}

11
meilidb-http/src/lib.rs Normal file
View File

@ -0,0 +1,11 @@
#[macro_use]
extern crate envconfig_derive;
pub mod data;
pub mod error;
pub mod helpers;
pub mod models;
pub mod option;
pub mod routes;
pub use self::data::Data;

44
meilidb-http/src/main.rs Normal file
View File

@ -0,0 +1,44 @@
use http::header::HeaderValue;
use log::info;
use main_error::MainError;
use tide::middleware::{CorsMiddleware, CorsOrigin};
use tide_log::RequestLogger;
use meilidb_http::data::Data;
use meilidb_http::option::Opt;
use meilidb_http::routes;
use meilidb_http::routes::index::index_update_callback;
#[cfg(not(target_os = "macos"))]
#[global_allocator]
static ALLOC: jemallocator::Jemalloc = jemallocator::Jemalloc;
pub fn main() -> Result<(), MainError> {
env_logger::init();
let opt = Opt::new();
let data = Data::new(opt.clone());
let data_cloned = data.clone();
data.db.set_update_callback(Box::new(move |name, status| {
index_update_callback(name, &data_cloned, status);
}));
let mut app = tide::App::with_state(data);
app.middleware(
CorsMiddleware::new()
.allow_origin(CorsOrigin::from("*"))
.allow_methods(HeaderValue::from_static("GET, POST, OPTIONS")),
);
app.middleware(RequestLogger::new());
app.middleware(tide_compression::Compression::new());
app.middleware(tide_compression::Decompression::new());
routes::load_routes(&mut app);
info!("Server HTTP enabled");
app.run(opt.http_addr)?;
Ok(())
}

View File

@ -0,0 +1,3 @@
pub mod schema;
pub mod token;
pub mod update_operation;

View File

@ -0,0 +1,118 @@
use std::collections::HashSet;
use indexmap::IndexMap;
use meilidb_schema::{Schema, SchemaBuilder, SchemaProps};
use serde::{Deserialize, Serialize};
#[derive(Debug, Clone, Eq, PartialEq, Hash, Serialize, Deserialize)]
#[serde(rename_all = "camelCase")]
pub enum FieldProperties {
Identifier,
Indexed,
Displayed,
Ranked,
}
#[derive(Debug, Clone, Eq, PartialEq, Serialize, Deserialize)]
pub struct SchemaBody(IndexMap<String, HashSet<FieldProperties>>);
impl From<Schema> for SchemaBody {
fn from(value: Schema) -> SchemaBody {
let mut map = IndexMap::new();
for (name, _attr, props) in value.iter() {
let old_properties = map.entry(name.to_owned()).or_insert(HashSet::new());
if props.is_indexed() {
old_properties.insert(FieldProperties::Indexed);
}
if props.is_displayed() {
old_properties.insert(FieldProperties::Displayed);
}
if props.is_ranked() {
old_properties.insert(FieldProperties::Ranked);
}
}
let old_properties = map
.entry(value.identifier_name().to_string())
.or_insert(HashSet::new());
old_properties.insert(FieldProperties::Identifier);
old_properties.insert(FieldProperties::Displayed);
SchemaBody(map)
}
}
impl Into<Schema> for SchemaBody {
fn into(self) -> Schema {
let mut identifier = "documentId".to_string();
let mut attributes = IndexMap::new();
for (field, properties) in self.0 {
let mut indexed = false;
let mut displayed = false;
let mut ranked = false;
for property in properties {
match property {
FieldProperties::Indexed => indexed = true,
FieldProperties::Displayed => displayed = true,
FieldProperties::Ranked => ranked = true,
FieldProperties::Identifier => identifier = field.clone(),
}
}
attributes.insert(
field,
SchemaProps {
indexed,
displayed,
ranked,
},
);
}
let mut builder = SchemaBuilder::with_identifier(identifier);
for (field, props) in attributes {
builder.new_attribute(field, props);
}
builder.build()
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_schema_body_conversion() {
let schema_body = r#"
{
"id": ["identifier", "indexed", "displayed"],
"title": ["indexed", "displayed"],
"date": ["displayed"]
}
"#;
let schema_builder = r#"
{
"identifier": "id",
"attributes": {
"id": {
"indexed": true,
"displayed": true
},
"title": {
"indexed": true,
"displayed": true
},
"date": {
"displayed": true
}
}
}
"#;
let schema_body: SchemaBody = serde_json::from_str(schema_body).unwrap();
let schema_builder: SchemaBuilder = serde_json::from_str(schema_builder).unwrap();
let schema_from_body: Schema = schema_body.into();
let schema_from_builder: Schema = schema_builder.build();
assert_eq!(schema_from_body, schema_from_builder);
}
}

View File

@ -0,0 +1,72 @@
use chrono::{DateTime, Utc};
use serde::{Deserialize, Serialize};
pub const TOKEN_PREFIX_KEY: &str = "_token_";
#[derive(Debug, Clone, Eq, PartialEq, Serialize, Deserialize)]
#[serde(rename_all = "camelCase")]
pub enum ACL {
IndexesRead,
IndexesWrite,
DocumentsRead,
DocumentsWrite,
SettingsRead,
SettingsWrite,
Admin,
#[serde(rename = "*")]
All,
}
pub type Wildcard = String;
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct Token {
pub key: String,
pub description: String,
pub acl: Vec<ACL>,
pub indexes: Vec<Wildcard>,
pub created_at: DateTime<Utc>,
pub updated_at: DateTime<Utc>,
pub expires_at: DateTime<Utc>,
pub revoked: bool,
}
fn cleanup_wildcard(input: &str) -> (bool, &str, bool) {
let first = input.chars().next().filter(|&c| c == '*').is_some();
let last = input.chars().last().filter(|&c| c == '*').is_some();
let bound_last = std::cmp::max(input.len().saturating_sub(last as usize), first as usize);
let output = input.get(first as usize..bound_last).unwrap();
(first, output, last)
}
pub fn match_wildcard(pattern: &str, input: &str) -> bool {
let (first, pattern, last) = cleanup_wildcard(pattern);
match (first, last) {
(false, false) => pattern == input,
(true, false) => input.ends_with(pattern),
(false, true) => input.starts_with(pattern),
(true, true) => input.contains(pattern),
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_match_wildcard() {
assert!(match_wildcard("*", "qqq"));
assert!(match_wildcard("*", ""));
assert!(match_wildcard("*ab", "qqqab"));
assert!(match_wildcard("*ab*", "qqqabqq"));
assert!(match_wildcard("ab*", "abqqq"));
assert!(match_wildcard("**", "ab"));
assert!(match_wildcard("ab", "ab"));
assert!(match_wildcard("ab*", "ab"));
assert!(match_wildcard("*ab", "ab"));
assert!(match_wildcard("*ab*", "ab"));
assert!(match_wildcard("*😆*", "ab😆dsa"));
}
}

View File

@ -0,0 +1,33 @@
use std::fmt;
#[allow(dead_code)]
#[derive(Debug)]
pub enum UpdateOperation {
ClearAllDocuments,
DocumentsAddition,
DocumentsDeletion,
SynonymsAddition,
SynonymsDeletion,
StopWordsAddition,
StopWordsDeletion,
Schema,
Config,
}
impl fmt::Display for UpdateOperation {
fn fmt(&self, f: &mut fmt::Formatter) -> std::fmt::Result {
use UpdateOperation::*;
match self {
ClearAllDocuments => write!(f, "ClearAllDocuments"),
DocumentsAddition => write!(f, "DocumentsAddition"),
DocumentsDeletion => write!(f, "DocumentsDeletion"),
SynonymsAddition => write!(f, "SynonymsAddition"),
SynonymsDeletion => write!(f, "SynonymsDelettion"),
StopWordsAddition => write!(f, "StopWordsAddition"),
StopWordsDeletion => write!(f, "StopWordsDeletion"),
Schema => write!(f, "Schema"),
Config => write!(f, "Config"),
}
}
}

View File

@ -0,0 +1,56 @@
use envconfig::Envconfig;
use structopt::StructOpt;
#[derive(Debug, Clone, StructOpt, Envconfig)]
struct Vars {
/// The destination where the database must be created.
#[structopt(long)]
#[envconfig(from = "MEILI_DATABASE_PATH")]
pub database_path: Option<String>,
/// The addr on which the http server will listen.
#[structopt(long)]
#[envconfig(from = "MEILI_HTTP_ADDR")]
pub http_addr: Option<String>,
#[structopt(long)]
#[envconfig(from = "MEILI_ADMIN_TOKEN")]
pub admin_token: Option<String>,
}
#[derive(Clone, Debug)]
pub struct Opt {
pub database_path: String,
pub http_addr: String,
pub admin_token: Option<String>,
}
impl Default for Opt {
fn default() -> Self {
Opt {
database_path: String::from("/tmp/meilidb"),
http_addr: String::from("127.0.0.1:8080"),
admin_token: None,
}
}
}
impl Opt {
pub fn new() -> Self {
let default = Self::default();
let args = Vars::from_args();
let env = Vars::init().unwrap();
Self {
database_path: env
.database_path
.or(args.database_path)
.unwrap_or(default.database_path),
http_addr: env
.http_addr
.or(args.http_addr)
.unwrap_or(default.http_addr),
admin_token: env.admin_token.or(args.admin_token).or(default.admin_token),
}
}
}

Some files were not shown because too many files have changed in this diff Show More