mirror of
https://github.com/meilisearch/meilisearch.git
synced 2025-07-18 04:11:07 +00:00
Compare commits
152 Commits
Author | SHA1 | Date | |
---|---|---|---|
6c9a238973 | |||
cf5e228288 | |||
9dce41ed6b | |||
ca26a0f2e4 | |||
47d777c8f7 | |||
2ef51f7df9 | |||
2d7db2a80f | |||
526202ec8b | |||
86ab729356 | |||
dd74af4c70 | |||
b79a8457f9 | |||
d941c512db | |||
0ff73039e5 | |||
2ea3e9b081 | |||
da71821204 | |||
16f0914f09 | |||
1cf6afad9a | |||
261c21b057 | |||
925a22b644 | |||
dc5c42821e | |||
1667e1b32f | |||
c332c7bc70 | |||
5e8d432614 | |||
f6282ca031 | |||
3278d22279 | |||
c9618793e3 | |||
1ef785a9ef | |||
fdc98f9ef3 | |||
0de37819b4 | |||
9ff92c5d15 | |||
e629f51af4 | |||
b377003192 | |||
a7e40a78c1 | |||
9cdda8c46a | |||
b7ea812dcc | |||
710ab2386c | |||
81bf6d583d | |||
02575a2ef6 | |||
da6ab2753e | |||
97de72de83 | |||
12b80e08be | |||
4b130fa2e5 | |||
9dca18f966 | |||
543b65b09b | |||
9eb27811b1 | |||
7c3d93e5da | |||
485480560a | |||
0ac927794a | |||
e09d3b654d | |||
c5af5de4f0 | |||
19c22a8c5e | |||
0103c7bfd9 | |||
7b26bd88c0 | |||
da0168bd82 | |||
d1e59be46b | |||
9774db6011 | |||
46c19dfc5a | |||
9ed6752573 | |||
d8fdad1455 | |||
f56636e1e9 | |||
03599f1fc9 | |||
be78ecbf9a | |||
ba2b04ca89 | |||
121399f336 | |||
3fded51534 | |||
8f63ec39da | |||
5a1c1aeb02 | |||
6ec575f8de | |||
683b6afbfb | |||
663714bb6d | |||
bb35ca0d40 | |||
5f3072e67e | |||
2a4707d51e | |||
6534a9ec1d | |||
0a5ad4db06 | |||
6ee0d72c7b | |||
ba32ce21d0 | |||
0e224efa46 | |||
175461c13a | |||
c514692233 | |||
d8d0442d63 | |||
2236ebbd42 | |||
0bfba3e4ba | |||
a57a64823e | |||
aa05459e4f | |||
0615c5c52d | |||
487411340a | |||
5139dc7f3e | |||
88d0d3931c | |||
df2ef8d2e1 | |||
29229b2137 | |||
851cc38216 | |||
effbbc7370 | |||
08e3f23408 | |||
62a0aefe44 | |||
3476939b7e | |||
38e474deaf | |||
00c70d3cb5 | |||
af9fd9f552 | |||
0a731973b9 | |||
c4bd13bcdf | |||
a5bfbf244c | |||
39e0d9fc4a | |||
905bc5c1a6 | |||
0f395d43a0 | |||
0b5b7b0bf1 | |||
57dd679026 | |||
cdd69290c3 | |||
175b3dcb75 | |||
ca818e12a9 | |||
6b9426a051 | |||
cee5e50857 | |||
3fe346101b | |||
87e5998489 | |||
d7d1b6ff02 | |||
7073b42afa | |||
120d209e66 | |||
62e981c6b8 | |||
941302a4be | |||
20f423268e | |||
522013425b | |||
e3c413759f | |||
6ed97d1c19 | |||
53ad1fc068 | |||
1e2ef06c5c | |||
9db86f13f3 | |||
369461e635 | |||
d2d22ac76d | |||
a5a19fc9dd | |||
a36c991897 | |||
4f71219e17 | |||
69e0bae75e | |||
1b18679950 | |||
e1c119b5a8 | |||
03709910fd | |||
8fdb330195 | |||
59ae6458dc | |||
c10b701b9a | |||
80caa8b60d | |||
97cf5cca2a | |||
3e76dc718b | |||
5a17b5a63b | |||
5bc5185ac5 | |||
3712fa7c24 | |||
918cc235a4 | |||
8d24e54fa1 | |||
35b7b58ff7 | |||
ffc29a319f | |||
ba3ac5ea7b | |||
ee6a54fe4c | |||
f6ff79085e | |||
bcd38c7d5a |
9
.gitignore
vendored
9
.gitignore
vendored
@ -1,8 +1,7 @@
|
||||
/target
|
||||
/Cargo.lock
|
||||
meilidb/Cargo.lock
|
||||
meilidb-core/Cargo.lock
|
||||
**/*.rs.bk
|
||||
Cargo.lock
|
||||
**/*.csv
|
||||
**/*.json_lines
|
||||
**/*.rdb
|
||||
**/*.rs.bk
|
||||
/*.mdb
|
||||
/query-history.txt
|
||||
|
@ -1,8 +1,6 @@
|
||||
[workspace]
|
||||
members = [
|
||||
"meilidb",
|
||||
"meilidb-core",
|
||||
"meilidb-data",
|
||||
"meilidb-schema",
|
||||
"meilidb-tokenizer",
|
||||
]
|
||||
|
26
LICENSE
26
LICENSE
@ -1,21 +1,13 @@
|
||||
MIT License
|
||||
“Commons Clause” License Condition v1.0
|
||||
|
||||
Copyright (c) 2018 Clément Renault
|
||||
The Software is provided to you by the Licensor under the License, as defined below, subject to the following condition.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
Without limiting other conditions in the License, the grant of rights under the License will not include, and the License does not grant to you, the right to Sell the Software.
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all
|
||||
copies or substantial portions of the Software.
|
||||
For purposes of the foregoing, “Sell” means practicing any or all of the rights granted to you under the License to provide to third parties, for a fee or other consideration (including without limitation fees for hosting or consulting/ support services related to the Software), a product or service whose value derives, entirely or substantially, from the functionality of the Software. Any license notice or attribution required by the License must also include this Commons Clause License Condition notice.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
SOFTWARE.
|
||||
Software: MeiliDB
|
||||
|
||||
License: MIT
|
||||
|
||||
Licensor: MEILI SAS
|
||||
|
71
README.md
71
README.md
@ -1,36 +1,34 @@
|
||||
# MeiliDB
|
||||
|
||||
[](https://dev.azure.com/thomas0884/thomas/_build/latest?definitionId=1&branchName=master)
|
||||
[](https://deps.rs/repo/github/Kerollmops/MeiliDB)
|
||||
[](https://github.com/Kerollmops/MeiliDB)
|
||||
[](
|
||||
https://www.rust-lang.org)
|
||||
[](https://deps.rs/repo/github/meilisearch/MeiliDB)
|
||||
[](https://commonsclause.com/)
|
||||
|
||||
A _full-text search database_ using a key-value store internally.
|
||||
A _full-text search database_ based on the fast [LMDB key-value store](https://en.wikipedia.org/wiki/Lightning_Memory-Mapped_Database).
|
||||
|
||||
## Features
|
||||
|
||||
- Provides [6 default ranking criteria](https://github.com/meilisearch/MeiliDB/blob/3d85cbf0cfa3a3103cf1e151a75a443719cdd5d7/meilidb-core/src/criterion/mod.rs#L95-L101) used to [bucket sort](https://en.wikipedia.org/wiki/Bucket_sort) documents
|
||||
- Accepts [custom criteria](https://github.com/meilisearch/MeiliDB/blob/3d85cbf0cfa3a3103cf1e151a75a443719cdd5d7/meilidb-core/src/criterion/mod.rs#L22-L29) and can apply them in any custom order
|
||||
- Support [ranged queries](https://github.com/meilisearch/MeiliDB/blob/3d85cbf0cfa3a3103cf1e151a75a443719cdd5d7/meilidb-core/src/query_builder.rs#L146), useful for paginating results
|
||||
- Can [distinct](https://github.com/meilisearch/MeiliDB/blob/3d85cbf0cfa3a3103cf1e151a75a443719cdd5d7/meilidb-core/src/query_builder.rs#L68) and [filter](https://github.com/meilisearch/MeiliDB/blob/3d85cbf0cfa3a3103cf1e151a75a443719cdd5d7/meilidb-core/src/query_builder.rs#L57) returned documents based on context defined rules
|
||||
- Can store complete documents or only [user schema specified fields](https://github.com/meilisearch/MeiliDB/blob/3d85cbf0cfa3a3103cf1e151a75a443719cdd5d7/examples/movies/schema-movies.toml)
|
||||
- The [default tokenizer](https://github.com/meilisearch/MeiliDB/blob/3d85cbf0cfa3a3103cf1e151a75a443719cdd5d7/meilidb-tokenizer/src/lib.rs#L99) can index latin and kanji based languages
|
||||
- Returns [the matching text areas](https://github.com/meilisearch/MeiliDB/blob/3d85cbf0cfa3a3103cf1e151a75a443719cdd5d7/meilidb-core/src/lib.rs#L117-L120), useful to highlight matched words in results
|
||||
- Accepts query time search config like the [searchable fields](https://github.com/meilisearch/MeiliDB/blob/3d85cbf0cfa3a3103cf1e151a75a443719cdd5d7/meilidb-core/src/query_builder.rs#L79)
|
||||
- Supports run time indexing (incremental indexing)
|
||||
- Provides [6 default ranking criteria](https://github.com/meilisearch/MeiliDB/blob/dc5c42821e1340e96cb90a3da472264624a26326/meilidb-core/src/criterion/mod.rs#L107-L113) used to [bucket sort](https://en.wikipedia.org/wiki/Bucket_sort) documents
|
||||
- Accepts [custom criteria](https://github.com/meilisearch/MeiliDB/blob/dc5c42821e1340e96cb90a3da472264624a26326/meilidb-core/src/criterion/mod.rs#L24-L33) and can apply them in any custom order
|
||||
- Support [ranged queries](https://github.com/meilisearch/MeiliDB/blob/dc5c42821e1340e96cb90a3da472264624a26326/meilidb-core/src/query_builder.rs#L283), useful for paginating results
|
||||
- Can [distinct](https://github.com/meilisearch/MeiliDB/blob/dc5c42821e1340e96cb90a3da472264624a26326/meilidb-core/src/query_builder.rs#L265-L270) and [filter](https://github.com/meilisearch/MeiliDB/blob/dc5c42821e1340e96cb90a3da472264624a26326/meilidb-core/src/query_builder.rs#L246-L259) returned documents based on context defined rules
|
||||
- Can store complete documents or only [user schema specified fields](https://github.com/meilisearch/MeiliDB/blob/dc5c42821e1340e96cb90a3da472264624a26326/meilidb-schema/src/lib.rs#L265-L279)
|
||||
- The [default tokenizer](https://github.com/meilisearch/MeiliDB/blob/dc5c42821e1340e96cb90a3da472264624a26326/meilidb-tokenizer/src/lib.rs) can index latin and kanji based languages
|
||||
- Returns [the matching text areas](https://github.com/meilisearch/MeiliDB/blob/dc5c42821e1340e96cb90a3da472264624a26326/meilidb-core/src/lib.rs#L66-L88), useful to highlight matched words in results
|
||||
- Accepts query time search config like the [searchable attributes](https://github.com/meilisearch/MeiliDB/blob/dc5c42821e1340e96cb90a3da472264624a26326/meilidb-core/src/query_builder.rs#L272-L275)
|
||||
- Supports [runtime incremental indexing](https://github.com/meilisearch/MeiliDB/blob/dc5c42821e1340e96cb90a3da472264624a26326/meilidb-core/src/store/mod.rs#L143-L173)
|
||||
|
||||
|
||||
|
||||
It uses [RocksDB](https://github.com/facebook/rocksdb) as the internal key-value store. The key-value store allows us to handle updates and queries with small memory and CPU overheads. The whole ranking system is [data oriented](https://github.com/meilisearch/MeiliDB/issues/82) and provides great performances.
|
||||
It uses [LMDB](https://en.wikipedia.org/wiki/Lightning_Memory-Mapped_Database) as the internal key-value store. The key-value store allows us to handle updates and queries with small memory and CPU overheads. The whole ranking system is [data oriented](https://github.com/meilisearch/MeiliDB/issues/82) and provides great performances.
|
||||
|
||||
You can [read the deep dive](deep-dive.md) if you want more information on the engine, it describes the whole process of generating updates and handling queries or you can take a look at the [typos and ranking rules](typos-ranking-rules.md) if you want to know the default rules used to sort the documents.
|
||||
|
||||
We will be proud if you submit issues and pull requests. You can help to grow this project and start contributing by checking [issues tagged "good-first-issue"](https://github.com/meilisearch/MeiliDB/issues?q=is%3Aissue+is%3Aopen+label%3A%22good+first+issue%22). It is a good start!
|
||||
|
||||
The project is only a library yet. It means that there is no binary provided yet. To get started, you can check the examples wich are made to work with the data located in the `misc/` folder.
|
||||
The project is only a library yet. It means that there is no binary provided yet. To get started, you can check the examples wich are made to work with the data located in the `datasets/` folder.
|
||||
|
||||
MeiliDB will be a binary in a near future so you will be able to use it as a database out-of-the-box. We should be able to query it using a [to-be-defined](https://github.com/meilisearch/MeiliDB/issues/38) protocol. This is our current goal, [see the milestones](https://github.com/meilisearch/MeiliDB/milestones). In the end, the binary will be a bunch of network protocols and wrappers around the library - which will also be published on [crates.io](https://crates.io). Both the binary and the library will follow the same update cycle.
|
||||
MeiliDB will be a binary in a near future so you will be able to use it as a database out-of-the-box. We should be able to query it using HTTP. This is our current goal, [see the milestones](https://github.com/meilisearch/MeiliDB/milestones). In the end, the binary will be a bunch of network protocols and wrappers around the library - which will also be published on [crates.io](https://crates.io). Both the binary and the library will follow the same update cycle.
|
||||
|
||||
|
||||
|
||||
@ -54,40 +52,27 @@ Transfer/sec: 759.17KB
|
||||
|
||||
### Notes
|
||||
|
||||
The default Rust allocator has recently been [changed to use the system allocator](https://github.com/rust-lang/rust/pull/51241/).
|
||||
With Rust 1.32 the allocator has been [changed to use the system allocator](https://blog.rust-lang.org/2019/01/17/Rust-1.32.0.html#jemalloc-is-removed-by-default).
|
||||
We have seen much better performances when [using jemalloc as the global allocator](https://github.com/alexcrichton/jemallocator#documentation).
|
||||
|
||||
## Usage and examples
|
||||
|
||||
You can try a little part of MeiliDB with the following commands.
|
||||
It creates an index named _movies_ and insert two great Tarantino movies in it.
|
||||
Currently MeiliDB do not provide an http server but you can run the example binary.
|
||||
|
||||
The _index_ subcommand has been made to create an index and inject documents into it. Using the command line below, the index will be named _movies_ and the _19 700_ movies of the `datasets/` will be injected in MeiliDB.
|
||||
|
||||
```bash
|
||||
cargo run --release
|
||||
|
||||
curl -XPOST 'http://127.0.0.1:8000/movies' \
|
||||
-d '
|
||||
identifier = "id"
|
||||
|
||||
[attributes.id]
|
||||
stored = true
|
||||
|
||||
[attributes.title]
|
||||
stored = true
|
||||
indexed = true
|
||||
'
|
||||
|
||||
curl -H 'Content-Type: application/json' \
|
||||
-XPUT 'http://127.0.0.1:8000/movies' \
|
||||
-d '{ "id": 123, "title": "Inglorious Bastards" }'
|
||||
|
||||
curl -H 'Content-Type: application/json' \
|
||||
-XPUT 'http://127.0.0.1:8000/movies' \
|
||||
-d '{ "id": 456, "title": "Django Unchained" }'
|
||||
cargo run --release --example from_file -- \
|
||||
index example.mdb datasets/movies/data.csv \
|
||||
--schema datasets/movies/schema.toml
|
||||
```
|
||||
|
||||
Once the database is initialized you can query it by using the following command:
|
||||
Once the first command is done, you can query the freshly created _movies_ index using the _search_ subcomand. In this example we filtered the dataset to only show _non-adult_ movies using the non-definitive `!adult` syntax filter.
|
||||
|
||||
```bash
|
||||
curl -XGET 'http://127.0.0.1:8000/movies/search?q=inglo'
|
||||
cargo run --release --example from_file -- \
|
||||
search example.mdb
|
||||
--number 4 \
|
||||
--filter '!adult' \
|
||||
id popularity adult original_title
|
||||
```
|
||||
|
@ -13,13 +13,17 @@ jobs:
|
||||
steps:
|
||||
- script: |
|
||||
curl https://sh.rustup.rs -sSf | sh -s -- -y --default-toolchain nightly
|
||||
displayName: 'Install rustc'
|
||||
$HOME/.cargo/bin/rustup component add rustfmt
|
||||
displayName: 'Install rustc and components'
|
||||
- script: |
|
||||
$HOME/.cargo/bin/cargo check
|
||||
displayName: 'Check MeiliDB'
|
||||
- script: |
|
||||
$HOME/.cargo/bin/cargo test
|
||||
displayName: 'Test MeiliDB'
|
||||
- script: |
|
||||
$HOME/.cargo/bin/cargo fmt --all -- --check
|
||||
displayName: 'Fmt MeiliDB'
|
||||
|
||||
- job: build
|
||||
dependsOn:
|
||||
@ -31,7 +35,8 @@ jobs:
|
||||
steps:
|
||||
- script: |
|
||||
curl https://sh.rustup.rs -sSf | sh -s -- -y --default-toolchain nightly
|
||||
displayName: 'Install rustc'
|
||||
$HOME/.cargo/bin/rustup component add rustfmt
|
||||
displayName: 'Install rustc and components'
|
||||
- script: |
|
||||
$HOME/.cargo/bin/cargo build --release
|
||||
displayName: 'Build MeiliDB'
|
||||
|
@ -1,15 +0,0 @@
|
||||
#!/bin/bash
|
||||
|
||||
cd "$(dirname "$0")"/..
|
||||
set -ex
|
||||
|
||||
export RUSTFLAGS="-D warnings"
|
||||
|
||||
cargo check --no-default-features
|
||||
cargo check --bins --examples --tests
|
||||
cargo test
|
||||
|
||||
if [[ "$TRAVIS_RUST_VERSION" == "nightly" ]]; then
|
||||
cargo check --no-default-features --features nightly
|
||||
cargo test --features nightly
|
||||
fi
|
Can't render this file because it is too large.
|
@ -1,122 +0,0 @@
|
||||
id,title,description,image
|
||||
711158459,Sony PlayStation 4 (PS4) (Latest Model)- 500 GB Jet Black Console,"The PlayStation 4 system opens the door to an incredible journey through immersive new gaming worlds and a deeply connected gaming community. Step into living, breathing worlds where you are hero of your epic journey. Explore gritty urban environments, vast galactic landscapes, and fantastic historical settings brought to life on an epic scale, without limits. With an astounding launch lineup and over 180 games in development the PS4 system offers more top-tier blockbusters and inventive indie hits than any other next-gen console. The PS4 system is developer inspired, gamer focused. The PS4 system learns how you play and intuitively curates the content you use most often. Fire it up, and your PS4 system points the way to new, amazing experiences you can jump into alone or with friends. Create your own legend using a sophisticated, intuitive network built for gamers. Broadcast your gameplay live and direct to the world, complete with your commentary. Or immortalize your most epic moments and share at the press of a button. Access the best in music, movies, sports and television. PS4 system doesn t require a membership fee to access your digital entertainment subscriptions. You get the full spectrum of entertainment that matters to you on the PS4 system. PlayStation 4: The Best Place to Play The PlayStation 4 system provides dynamic, connected gaming, powerful graphics and speed, intelligent personalization, deeply integrated social capabilities, and innovative second-screen features. Combining unparalleled content, immersive gaming experiences, all of your favorite digital entertainment apps, and PlayStation exclusives, the PS4 system focuses on the gamers.Gamer Focused, Developer InspiredThe PS4 system focuses on the gamer, ensuring that the very best games and the most immersive experiences are possible on the platform.<br>Read more about the PS4 on ebay guides.</br>",http://thumbs2.ebaystatic.com/d/l225/m/mzvzEUIknaQclZ801YCY1ew.jpg
|
||||
711158460,Sony PlayStation 4 (Latest Model)- 500 GB Jet Black Console,"The PlayStation 4 system opens the door to an incredible journey through immersive new gaming worlds and a deeply connected gaming community. Step into living, breathing worlds where you are hero of your epic journey. Explore gritty urban environments, vast galactic landscapes, and fantastic historical settings brought to life on an epic scale, without limits. With an astounding launch lineup and over 180 games in development the PS4 system offers more top-tier blockbusters and inventive indie hits than any other next-gen console. The PS4 system is developer inspired, gamer focused. The PS4 system learns how you play and intuitively curates the content you use most often. Fire it up, and your PS4 system points the way to new, amazing experiences you can jump into alone or with friends. Create your own legend using a sophisticated, intuitive network built for gamers. Broadcast your gameplay live and direct to the world, complete with your commentary. Or immortalize your most epic moments and share at the press of a button. Access the best in music, movies, sports and television. PS4 system doesn t require a membership fee to access your digital entertainment subscriptions. You get the full spectrum of entertainment that matters to you on the PS4 system. PlayStation 4: The Best Place to Play The PlayStation 4 system provides dynamic, connected gaming, powerful graphics and speed, intelligent personalization, deeply integrated social capabilities, and innovative second-screen features. Combining unparalleled content, immersive gaming experiences, all of your favorite digital entertainment apps, and PlayStation exclusives, the PS4 system focuses on the gamers.Gamer Focused, Developer InspiredThe PS4 system focuses on the gamer, ensuring that the very best games and the most immersive experiences are possible on the platform.<br>Read more about the PS4 on ebay guides.</br>",http://thumbs3.ebaystatic.com/d/l225/m/mJNDmSyIS3vUasKIJEBy4Cw.jpg
|
||||
711158461,Sony PlayStation 4 PS4 500 GB Jet Black Console,"The PlayStation 4 system opens the door to an incredible journey through immersive new gaming worlds and a deeply connected gaming community. Step into living, breathing worlds where you are hero of your epic journey. Explore gritty urban environments, vast galactic landscapes, and fantastic historical settings brought to life on an epic scale, without limits. With an astounding launch lineup and over 180 games in development the PS4 system offers more top-tier blockbusters and inventive indie hits than any other next-gen console. The PS4 system is developer inspired, gamer focused. The PS4 system learns how you play and intuitively curates the content you use most often. Fire it up, and your PS4 system points the way to new, amazing experiences you can jump into alone or with friends. Create your own legend using a sophisticated, intuitive network built for gamers. Broadcast your gameplay live and direct to the world, complete with your commentary. Or immortalize your most epic moments and share at the press of a button. Access the best in music, movies, sports and television. PS4 system doesn t require a membership fee to access your digital entertainment subscriptions. You get the full spectrum of entertainment that matters to you on the PS4 system. PlayStation 4: The Best Place to Play The PlayStation 4 system provides dynamic, connected gaming, powerful graphics and speed, intelligent personalization, deeply integrated social capabilities, and innovative second-screen features. Combining unparalleled content, immersive gaming experiences, all of your favorite digital entertainment apps, and PlayStation exclusives, the PS4 system focuses on the gamers.Gamer Focused, Developer InspiredThe PS4 system focuses on the gamer, ensuring that the very best games and the most immersive experiences are possible on the platform.<br>Read more about the PS4 on ebay guides.</br>",http://thumbs4.ebaystatic.com/d/l225/m/m10NZXArmiIkpkTDDkAUVvA.jpg
|
||||
711158462,Sony - PlayStation 4 500GB The Last of Us Remastered Bundle - Black,,http://thumbs2.ebaystatic.com/d/l225/m/mZZXTmAE8WZDH1l_E_PPAkg.jpg
|
||||
711158463,Sony PlayStation 4 (PS4) (Latest Model)- 500 GB Jet Black Console,"The PlayStation 4 system opens the door to an incredible journey through immersive new gaming worlds and a deeply connected gaming community. Step into living, breathing worlds where you are hero of your epic journey. Explore gritty urban environments, vast galactic landscapes, and fantastic historical settings brought to life on an epic scale, without limits. With an astounding launch lineup and over 180 games in development the PS4 system offers more top-tier blockbusters and inventive indie hits than any other next-gen console. The PS4 system is developer inspired, gamer focused. The PS4 system learns how you play and intuitively curates the content you use most often. Fire it up, and your PS4 system points the way to new, amazing experiences you can jump into alone or with friends. Create your own legend using a sophisticated, intuitive network built for gamers. Broadcast your gameplay live and direct to the world, complete with your commentary. Or immortalize your most epic moments and share at the press of a button. Access the best in music, movies, sports and television. PS4 system doesn t require a membership fee to access your digital entertainment subscriptions. You get the full spectrum of entertainment that matters to you on the PS4 system. PlayStation 4: The Best Place to Play The PlayStation 4 system provides dynamic, connected gaming, powerful graphics and speed, intelligent personalization, deeply integrated social capabilities, and innovative second-screen features. Combining unparalleled content, immersive gaming experiences, all of your favorite digital entertainment apps, and PlayStation exclusives, the PS4 system focuses on the gamers.Gamer Focused, Developer InspiredThe PS4 system focuses on the gamer, ensuring that the very best games and the most immersive experiences are possible on the platform.<br>Read more about the PS4 on ebay guides.</br>",http://thumbs3.ebaystatic.com/d/l225/m/mzvzEUIknaQclZ801YCY1ew.jpg
|
||||
711158464,Sony PlayStation 4 (PS4) (Latest Model)- 500 GB Jet Black Console,"The PlayStation 4 system opens the door to an incredible journey through immersive new gaming worlds and a deeply connected gaming community. Step into living, breathing worlds where you are hero of your epic journey. Explore gritty urban environments, vast galactic landscapes, and fantastic historical settings brought to life on an epic scale, without limits. With an astounding launch lineup and over 180 games in development the PS4 system offers more top-tier blockbusters and inventive indie hits than any other next-gen console. The PS4 system is developer inspired, gamer focused. The PS4 system learns how you play and intuitively curates the content you use most often. Fire it up, and your PS4 system points the way to new, amazing experiences you can jump into alone or with friends. Create your own legend using a sophisticated, intuitive network built for gamers. Broadcast your gameplay live and direct to the world, complete with your commentary. Or immortalize your most epic moments and share at the press of a button. Access the best in music, movies, sports and television. PS4 system doesn t require a membership fee to access your digital entertainment subscriptions. You get the full spectrum of entertainment that matters to you on the PS4 system. PlayStation 4: The Best Place to Play The PlayStation 4 system provides dynamic, connected gaming, powerful graphics and speed, intelligent personalization, deeply integrated social capabilities, and innovative second-screen features. Combining unparalleled content, immersive gaming experiences, all of your favorite digital entertainment apps, and PlayStation exclusives, the PS4 system focuses on the gamers.Gamer Focused, Developer InspiredThe PS4 system focuses on the gamer, ensuring that the very best games and the most immersive experiences are possible on the platform.<br>Read more about the PS4 on ebay guides.</br>",http://thumbs4.ebaystatic.com/d/l225/m/mzvzEUIknaQclZ801YCY1ew.jpg
|
||||
711158465,BRAND NEW Sony PlayStation 4 BUNDLE 500gb,,http://thumbs4.ebaystatic.com/d/l225/m/m9TQTiWcWig7SeQh9algLZg.jpg
|
||||
711158466,"Sony PlayStation 4 500GB, Dualshock Wireless Control, HDMI Gaming Console Refurb","The PlayStation 4 system opens the door to an incredible journey through immersive new gaming worlds and a deeply connected gaming community. Step into living, breathing worlds where you are hero of your epic journey. Explore gritty urban environments, vast galactic landscapes, and fantastic historical settings brought to life on an epic scale, without limits. With an astounding launch lineup and over 180 games in development the PS4 system offers more top-tier blockbusters and inventive indie hits than any other next-gen console. The PS4 system is developer inspired, gamer focused. The PS4 system learns how you play and intuitively curates the content you use most often. Fire it up, and your PS4 system points the way to new, amazing experiences you can jump into alone or with friends. Create your own legend using a sophisticated, intuitive network built for gamers. Broadcast your gameplay live and direct to the world, complete with your commentary. Or immortalize your most epic moments and share at the press of a button. Access the best in music, movies, sports and television. PS4 system doesn t require a membership fee to access your digital entertainment subscriptions. You get the full spectrum of entertainment that matters to you on the PS4 system. PlayStation 4: The Best Place to Play The PlayStation 4 system provides dynamic, connected gaming, powerful graphics and speed, intelligent personalization, deeply integrated social capabilities, and innovative second-screen features. Combining unparalleled content, immersive gaming experiences, all of your favorite digital entertainment apps, and PlayStation exclusives, the PS4 system focuses on the gamers.Gamer Focused, Developer InspiredThe PS4 system focuses on the gamer, ensuring that the very best games and the most immersive experiences are possible on the platform.<br>Read more about the PS4 on ebay guides.</br>",http://thumbs4.ebaystatic.com/d/l225/m/mTZYG5N6xWfBi4Ok03HmpMw.jpg
|
||||
711158467,Sony PlayStation 4 (Latest Model)- 500 GB Jet Black Console w/ 2 Controllers,,http://thumbs2.ebaystatic.com/d/l225/m/mX5Qphrygqeoi7tAH5eku2A.jpg
|
||||
711158468,Sony PlayStation 4 (Latest Model)- 500 GB Jet Black Console *NEW*,"The PlayStation 4 system opens the door to an incredible journey through immersive new gaming worlds and a deeply connected gaming community. Step into living, breathing worlds where you are hero of your epic journey. Explore gritty urban environments, vast galactic landscapes, and fantastic historical settings brought to life on an epic scale, without limits. With an astounding launch lineup and over 180 games in development the PS4 system offers more top-tier blockbusters and inventive indie hits than any other next-gen console. The PS4 system is developer inspired, gamer focused. The PS4 system learns how you play and intuitively curates the content you use most often. Fire it up, and your PS4 system points the way to new, amazing experiences you can jump into alone or with friends. Create your own legend using a sophisticated, intuitive network built for gamers. Broadcast your gameplay live and direct to the world, complete with your commentary. Or immortalize your most epic moments and share at the press of a button. Access the best in music, movies, sports and television. PS4 system doesn t require a membership fee to access your digital entertainment subscriptions. You get the full spectrum of entertainment that matters to you on the PS4 system. PlayStation 4: The Best Place to Play The PlayStation 4 system provides dynamic, connected gaming, powerful graphics and speed, intelligent personalization, deeply integrated social capabilities, and innovative second-screen features. Combining unparalleled content, immersive gaming experiences, all of your favorite digital entertainment apps, and PlayStation exclusives, the PS4 system focuses on the gamers.Gamer Focused, Developer InspiredThe PS4 system focuses on the gamer, ensuring that the very best games and the most immersive experiences are possible on the platform.<br>Read more about the PS4 on ebay guides.</br>",http://thumbs2.ebaystatic.com/d/l225/m/mGjN4IrJ0O8kKD_TYMWgGgQ.jpg
|
||||
711158469,Sony PlayStation 4 (Latest Model)- 500 GB Jet Black Console..wth Mortal Kombat X,,http://thumbs2.ebaystatic.com/d/l225/m/mrpqSNXwlnUVKnEscE4348w.jpg
|
||||
711158470,Genuine SONY PS4 Playstation 4 500GB Gaming Console - Black,,http://thumbs4.ebaystatic.com/d/l225/m/myrPBFCpb4H5rHI8NyiS2zA.jpg
|
||||
711158471,[Sony] Playstation 4 PS4 Video Game Console Black - Latest Model,,http://thumbs4.ebaystatic.com/d/l225/m/mce0c7mCuv3xpjllJXx093w.jpg
|
||||
711158472,Sony PlayStation 4 (Latest Model) 500 GB Jet Black Console,"The PlayStation 4 system opens the door to an incredible journey through immersive new gaming worlds and a deeply connected gaming community. Step into living, breathing worlds where you are hero of your epic journey. Explore gritty urban environments, vast galactic landscapes, and fantastic historical settings brought to life on an epic scale, without limits. With an astounding launch lineup and over 180 games in development the PS4 system offers more top-tier blockbusters and inventive indie hits than any other next-gen console. The PS4 system is developer inspired, gamer focused. The PS4 system learns how you play and intuitively curates the content you use most often. Fire it up, and your PS4 system points the way to new, amazing experiences you can jump into alone or with friends. Create your own legend using a sophisticated, intuitive network built for gamers. Broadcast your gameplay live and direct to the world, complete with your commentary. Or immortalize your most epic moments and share at the press of a button. Access the best in music, movies, sports and television. PS4 system doesn t require a membership fee to access your digital entertainment subscriptions. You get the full spectrum of entertainment that matters to you on the PS4 system. PlayStation 4: The Best Place to Play The PlayStation 4 system provides dynamic, connected gaming, powerful graphics and speed, intelligent personalization, deeply integrated social capabilities, and innovative second-screen features. Combining unparalleled content, immersive gaming experiences, all of your favorite digital entertainment apps, and PlayStation exclusives, the PS4 system focuses on the gamers.Gamer Focused, Developer InspiredThe PS4 system focuses on the gamer, ensuring that the very best games and the most immersive experiences are possible on the platform.<br>Read more about the PS4 on ebay guides.</br>",http://thumbs2.ebaystatic.com/d/l225/m/miVSA1xPO5fCNdYzEMc8rSQ.jpg
|
||||
711158473,Sony PlayStation 4 - 500 GB Jet Black Console - WITH LAST OF US REMASTERED,,http://thumbs2.ebaystatic.com/d/l225/m/mLjnOxv2GWkrkCtgsDGhJ6A.jpg
|
||||
711158474,Sony PlayStation 4 (Latest Model)- 500 GB Jet Black Console,,http://thumbs3.ebaystatic.com/d/l225/m/mjMittBaXmm_n4AMpETBXhQ.jpg
|
||||
711158475,Sony PlayStation 4 (Latest Model)- 500 GB Jet Black Console,,http://thumbs2.ebaystatic.com/d/l225/m/m1n1qrJ7-VGbe7xQvGdeD6Q.jpg
|
||||
711158476,"Sony PlayStation 4 - 500 GB Jet Black Console (3 controllers,3 games included)",,http://thumbs3.ebaystatic.com/d/l225/m/mIoGIj9FZG7HoEVkPlnyizA.jpg
|
||||
711158477,Sony PlayStation 4 500GB Console with 2 Controllers,"The PlayStation 4 system opens the door to an incredible journey through immersive new gaming worlds and a deeply connected gaming community. Step into living, breathing worlds where you are hero of your epic journey. Explore gritty urban environments, vast galactic landscapes, and fantastic historical settings brought to life on an epic scale, without limits. With an astounding launch lineup and over 180 games in development the PS4 system offers more top-tier blockbusters and inventive indie hits than any other next-gen console. The PS4 system is developer inspired, gamer focused. The PS4 system learns how you play and intuitively curates the content you use most often. Fire it up, and your PS4 system points the way to new, amazing experiences you can jump into alone or with friends. Create your own legend using a sophisticated, intuitive network built for gamers. Broadcast your gameplay live and direct to the world, complete with your commentary. Or immortalize your most epic moments and share at the press of a button. Access the best in music, movies, sports and television. PS4 system doesn t require a membership fee to access your digital entertainment subscriptions. You get the full spectrum of entertainment that matters to you on the PS4 system. PlayStation 4: The Best Place to Play The PlayStation 4 system provides dynamic, connected gaming, powerful graphics and speed, intelligent personalization, deeply integrated social capabilities, and innovative second-screen features. Combining unparalleled content, immersive gaming experiences, all of your favorite digital entertainment apps, and PlayStation exclusives, the PS4 system focuses on the gamers.Gamer Focused, Developer InspiredThe PS4 system focuses on the gamer, ensuring that the very best games and the most immersive experiences are possible on the platform.<br>Read more about the PS4 on ebay guides.</br>",http://thumbs2.ebaystatic.com/d/l225/m/m4fuJ5Ibrj450-TZ83FAkIQ.jpg
|
||||
711158478,Sony - PlayStation 4 500GB The Last of Us Remastered Bundle - Black,,http://thumbs3.ebaystatic.com/d/l225/m/mzXSIw8Hlnff8IjXJQrXJSw.jpg
|
||||
711158479,Sony PlayStation 4 (Latest Model)- 500 GB Jet Black Console,,http://thumbs2.ebaystatic.com/d/l225/m/m-9S63CgFoUijY3ZTyNs3KA.jpg
|
||||
711158480,Sony PlayStation 4 (Latest Model)- 500 GB Jet Black Console,,http://thumbs1.ebaystatic.com/d/l225/m/mdF9Bisg9wXjv_R9Y_13MWw.jpg
|
||||
711158481,Sony PlayStation 4 (Latest Model)- 500 GB Jet Black Console*,,http://thumbs1.ebaystatic.com/d/l225/m/m4_OQHMmIOCa8uEkBepRR5A.jpg
|
||||
711158482,Sony PlayStation 4 (Latest Model)- 500 GB Jet Black Console,,http://thumbs2.ebaystatic.com/d/l225/m/mZ0nR8iz-QAfLssJZMp3L5Q.jpg
|
||||
711158483,[Sony] Playstation 4 PS4 1105A Video Game Console 500GB White - Latest Model,,http://thumbs4.ebaystatic.com/d/l225/m/m8iTz5cLQLNjD9D3O2jT3IQ.jpg
|
||||
711158484,NEW! Clinique Repairwear Laser Focus Wrinkle Correcting Eye Cream 5ml,,http://thumbs2.ebaystatic.com/d/l225/m/mrraWCpvP5YKk5rYgotVDLg.jpg
|
||||
711158485,Obagi Elastiderm Eye Treatment Cream 0.5 oz / 15g Authentic NiB Sealed [5],,http://thumbs1.ebaystatic.com/d/l225/m/mJ4ekz6_bDT5G7wYtjM-qRg.jpg
|
||||
711158486,Lancome Renergie Eye Anti-Wrinkle & Firming Eye Cream 0.5oz New,,http://thumbs2.ebaystatic.com/d/l225/m/mxwwyDQraZ-TEtr_Y6qRi7Q.jpg
|
||||
711158487,OZ Naturals - The BEST Eye Gel - Eye Cream For Dark Circles Puffiness and,,http://thumbs2.ebaystatic.com/d/l225/m/mk2Z-hX5sT4kUxfG6g_KFpg.jpg
|
||||
711158488,Elastiderm Eye Cream (0.5oz/15g),,http://thumbs3.ebaystatic.com/d/l225/m/mHxb5WUc5MtGzCT2UXgY_hg.jpg
|
||||
711158489,new CLINIQUE Repairwear Laser Focus Wrinkle Correcting Eye Cream 0.17 oz/ 5 ml,,http://thumbs1.ebaystatic.com/d/l225/m/mQSX2wfrSeGy3uA8Q4SbOKw.jpg
|
||||
711158490,NIB Full Size Dermalogica Multivitamin Power Firm Eye Cream,,http://thumbs4.ebaystatic.com/d/l225/m/m2hxo12e5NjXgGiKIaCvTLA.jpg
|
||||
711158491,24K Gold Collagen Anti-Dark Circles Anti-Aging Bio Essence Repairing Eye Cream,,http://thumbs4.ebaystatic.com/d/l225/m/mt96efUK5cPAe60B9aGmgMA.jpg
|
||||
711158492,Clinique Repairwear Laser Focus Wrinkle Correcting Eye Cream Full Size .5oz 15mL,,http://thumbs3.ebaystatic.com/d/l225/m/mZyV3wKejCMx9RrnC8X-eMw.jpg
|
||||
711158493,NEW! Clinique Repairwear Laser Focus Wrinkle Correcting Eye Cream 5ml,,http://thumbs4.ebaystatic.com/d/l225/m/m9hX_z_DFnbNCTh0VFv3KcQ.jpg
|
||||
711158494,3 Clinique Repairwear Laser Focus Wrinkle Correcting Eye Cream .17 oz/5 ml Each,,http://thumbs1.ebaystatic.com/d/l225/m/mYiHsrGffCg_qgkTbUWZU1A.jpg
|
||||
711158495,Lancome High Resolution Eye Cream .95 Oz Refill-3X .25 Oz Plus .20 Oz Lot,,http://thumbs1.ebaystatic.com/d/l225/m/mFuQxKoEKQ6wtk2bGxfKwow.jpg
|
||||
711158496,NEW! Clinique Repairwear Laser Focus Wrinkle Correcting Eye Cream 5ml,,http://thumbs4.ebaystatic.com/d/l225/m/mLBRCDiELUnYos-vFmIcc7A.jpg
|
||||
711158497,Neutrogena Rapid Wrinkle Repair Eye Cream -0.5 Oz. -New-,,http://thumbs4.ebaystatic.com/d/l225/m/mE1RWpCOxkCGuuiJBX6HiBQ.jpg
|
||||
711158498,20g Snail Repair Eye Cream Natural Anti-Dark Circles Puffiness Aging Wrinkles,,http://thumbs4.ebaystatic.com/d/l225/m/mh4gBNzINDwds_r778sJRjg.jpg
|
||||
711158499,Vichy-Neovadiol GF Eye & Lip Contour Cream 0.5 Fl. Oz,,http://thumbs4.ebaystatic.com/d/l225/m/m_6f0ofCm7PTzuithYuZx3w.jpg
|
||||
711158500,Obagi Elastiderm Eye Cream 0.5 oz. New In Box. 100% Authentic! New Packaging!,,http://thumbs2.ebaystatic.com/d/l225/m/ma0PK-ASBXUiHERR19MyImA.jpg
|
||||
711158501,NEW! Clinique Repairwear Laser Focus Wrinkle Correcting Eye Cream .17oz / 5ml,,http://thumbs3.ebaystatic.com/d/l225/m/m72NaXYlcXcEeqQFKWvsdZA.jpg
|
||||
711158502,Kiehl's CREAMY EYE TREATMENT cream with AVOCADO 0.5 oz FULL SIZE,,http://thumbs3.ebaystatic.com/d/l225/m/mOI407HnILb_tf-RgdvfYyA.jpg
|
||||
711158503,Clinique repairwear laser focus wrinkle correcting eye cream .5 oz 15ml,,http://thumbs4.ebaystatic.com/d/l225/m/mQwNVst3bYG6QXouubmLaJg.jpg
|
||||
711158504,Caudalie Premier Cru The Eye Cream La Creme New Anti Aging Eye Treatment,,http://thumbs1.ebaystatic.com/d/l225/m/mM4hPTAWXeOjovNk9s_Cqag.jpg
|
||||
711158505,Jeunesse Instantly Ageless -- New Box Of 50 Sachets -- Eye - Face Wrinkle Cream,,http://thumbs2.ebaystatic.com/d/l225/m/m5EfWbi6ZYs4JpYcsl0Ubaw.jpg
|
||||
711158506,VELOUR SKIN EYE CREAM .5 FL OZ 15ML NEW NIP ANTI-AGING WRINKLE CREAM,,http://thumbs1.ebaystatic.com/d/l225/m/m2uEf6q1yASH8FkWqYdOv1w.jpg
|
||||
711158507,Shiseido White Lucent Anti-Dark Circles/Puffiness Eye Cream 15ml/.53oz Full Size,,http://thumbs1.ebaystatic.com/d/l225/m/m_CtzoqU2Vgv4GKx8ONS6qw.jpg
|
||||
711158508,Murad Resurgence Renewing Eye Cream Anti-Aging .25 oz NEW Dark Circles Wrinkle,,http://thumbs1.ebaystatic.com/d/l225/m/mhWJC10iowgUDGm4KMQKNMg.jpg
|
||||
711158509,D-Link DIR-615 300Mbps Wireless-N Router 4-Port w/Firewall,,http://thumbs3.ebaystatic.com/d/l225/m/mdSBH9ROXRn3TBb8OFDT6jA.jpg
|
||||
711158510,Triton MOF001 2 1/4hp dual mode precision Router. New!! *3 day auction*,,http://thumbs1.ebaystatic.com/d/l225/m/mozWd2SBskbDBlWAKsMlVew.jpg
|
||||
711158511,Porter-Cable 3-1/4 HP Five-Speed Router 7518 - Power Tools Routers,,http://thumbs2.ebaystatic.com/d/l225/m/mpZDTXpiyesDrZh_FLMyqXQ.jpg
|
||||
711158512,Linksys EA6900 AC1900 Wi-Fi Wireless Router Dual Band with Gigabit &USB 3.0 Port,,http://thumbs4.ebaystatic.com/d/l225/m/m3OfBSnHBDhhs_Ve-DSBKQw.jpg
|
||||
711158513,Linksys EA6500 1300 Mbps 4-Port Gigabit Wireless AC Router,,http://thumbs1.ebaystatic.com/d/l225/m/m7cfymJPc7CLADoTiEYFzwA.jpg
|
||||
711158514,Makita RT0700CX3 1-1/4 Horsepower Compact Router Kit / Trimmer NEW,,http://thumbs2.ebaystatic.com/d/l225/m/mr-F3rCxDYsLcj8hnmaRN4A.jpg
|
||||
711158515,NETGEAR R6250 AC1600 Smart WiFi Dual Band Gigabit Router 802.11ac 300 1300 Mbps,,http://thumbs4.ebaystatic.com/d/l225/m/mc8Ic8Cq2lPqPnjNGAQBBCQ.jpg
|
||||
711158516,NETGEAR Nighthawk AC1900 Dual Band Wi-Fi Gigabit Router (R7000) BRAND NEW SEALED,,http://thumbs3.ebaystatic.com/d/l225/m/mdL34EQi0l-Kg-DlvF6wpqA.jpg
|
||||
711158517,Netgear WNDR3400 N600 Wireless Dual Band Router (WNDR3400-100),,http://thumbs4.ebaystatic.com/d/l225/m/mKr4cNk6utJXSdVYXzwrScQ.jpg
|
||||
711158518,Netgear N600 300 Mbps 4-Port 10/100 Wireless N Router (WNDR3400),,http://thumbs2.ebaystatic.com/d/l225/m/mUPdyhbW9pzEm1VbqX0YudA.jpg
|
||||
711158519,NETGEAR N600 WNDR3400 Wireless Dual Band Router F/S,,http://thumbs1.ebaystatic.com/d/l225/m/my55jF5kHnG9ipzFycnjooA.jpg
|
||||
711158520,Netgear NIGHTHAWK AC1900 1300 Mbps 4-Port Gigabit Wireless AC Router (R7000),,http://thumbs3.ebaystatic.com/d/l225/m/mrPLRTnWx_JXLNIp5pCBnzQ.jpg
|
||||
711158521,Netgear N900 450 Mbps 4-Port Gigabit Wireless N Router (WNDR4500),,http://thumbs2.ebaystatic.com/d/l225/m/mXBL01faHlHm7Ukh188t3yQ.jpg
|
||||
711158522,Netgear R6300V2 AC1750 1300 Mbps 4-Port Gigabit Wireless AC Router,,http://thumbs1.ebaystatic.com/d/l225/m/mTdnFB9Z71efYJ9I5-k186w.jpg
|
||||
711158523,Makita RT0701C 1-1/4 HP Compact Router With FACTORY WARRANTY!!!,,http://thumbs2.ebaystatic.com/d/l225/m/m7AA4k3MzYFJcTlBrT3DwhA.jpg
|
||||
711158524,"CISCO LINKSYS EA4500 DUAL-BAND N9000 WIRELESS ROUTER, 802.11N, UP TO 450 MBPs",,http://thumbs4.ebaystatic.com/d/l225/m/mwfVIXD3dZYt_qpHyprd7hg.jpg
|
||||
711158525,Netgear N300 v.3 300 Mbps 5-Port 10/100 Wireless N Router (WNR2000),,http://thumbs4.ebaystatic.com/d/l225/m/mopRjvnZwbsVH9euqGov5kw.jpg
|
||||
711158526,Netgear Nighthawk R7000 2330 Mbps 4-Port Gigabit Wireless N Router...,,http://thumbs4.ebaystatic.com/d/l225/m/mns82UY4FfqYXPgqrpJ9Bzw.jpg
|
||||
711158527,Netgear N900 450 Mbps 4-Port Gigabit Wireless N Router R4500 ~ FreE ShiPPinG ~,,http://thumbs1.ebaystatic.com/d/l225/m/m_o0mSRmySgJUuqHYDIQiuA.jpg
|
||||
711158528,D-Link Wireless Router Model DIR-625,,http://thumbs2.ebaystatic.com/d/l225/m/mYPXwZMlDUjOQ3Sm3EtU37Q.jpg
|
||||
711158529,D-Link DIR-657 300 Mbps 4-Port Gigabit Wireless N Router Hd Media Router 1000,"Stream multiple media content - videos, music and more to multiple devices all at the same time without lag or skipping. The HD Fuel technology in the DIR-657 lets you watch Netflix and Vudu , play your Wii or Xbox 360 online or make Skype calls all without worrying about the skipping or latency you might experience with standard routers. It does so by automatically giving extra bandwidth for video, gaming and VoIP calls using HD Fuel QoS technology. The D-Link HD Media Router 1000(DIR-657) also comes equipped with 4 Gigabit ports to provide speeds up to 10x faster than standard 10/100 ports. What s more, it uses 802.11n technology with multiple intelligent antennas to maximize the speed and range of your wireless signal to significantly outperform 802.11g devices.",http://thumbs1.ebaystatic.com/d/l225/m/m0xyPdWrdVKe7By4QFouVeA.jpg
|
||||
711158530,D-Link DIR-860L AC1200 4-Port Cloud Router Gigabit Wireless 802.11 AC,,http://thumbs3.ebaystatic.com/d/l225/m/mk4KNj6oLm7863qCS-TqmbQ.jpg
|
||||
711158531,D-Link DIR-862L Wireless AC1600 Dual Band Gigabit Router,,http://thumbs2.ebaystatic.com/d/l225/m/m6Arw8kaZ4EUbyKjHtJZLkA.jpg
|
||||
711158532,LINKSYS AC1600 DUAL BAND SMART WI-FI ROUTER EA6400 BRAND NEW,,http://thumbs3.ebaystatic.com/d/l225/m/mdK7igTS7_TDD7ajfVqj-_w.jpg
|
||||
711158533,Netgear AC1900 1300 Mbps 4-Port Gigabit Wireless AC Router (R7000),,http://thumbs4.ebaystatic.com/d/l225/m/mdL34EQi0l-Kg-DlvF6wpqA.jpg
|
||||
711158534,Panasonic ES-LA63 Cordless Rechargeable Men's Electric Shaver,,http://thumbs3.ebaystatic.com/d/l225/m/mzKKlCxbADObevcgoNjbXRg.jpg
|
||||
711158535,Panasonic ARC 5 Best Mens Shaver,,http://thumbs4.ebaystatic.com/d/l225/m/mt34Y-u0okj-SqQm8Ng_rbQ.jpg
|
||||
711158536,Panasonic Es8092 Wet Dry Electric Razor Shaver Cordless,,http://thumbs3.ebaystatic.com/d/l225/m/mlIxTz1LsVjXiZz2CzDquJw.jpg
|
||||
711158537,Panasonic ARC4 ES-RF31-s Rechargeable Electric Shaver Wet/dry 4 Nanotech Blade,"Made for folks who need a great shave, the Panasonic electric shaver is convenient and consistent. Featuring an ergonomic design, this Panasonic ES-RF31-S is ideal for keeping a stubble-free face, so you can retain wonderfully smooth skin. With the precision blades included on the Panasonic electric shaver, you can get smooth shaves with every use. As this men's electric shaver features a gentle shaving mechanism, you can help avoid burning sensations on tender skin. Make sure you consistently get multiple perfect shaves without depleting the power with the exceptional shave time typical of this Panasonic ES-RF31-S.",http://thumbs1.ebaystatic.com/d/l225/m/mi4QM99Jq4oma5WLAL0K7Wg.jpg
|
||||
711158538,"Panasonic ES3831K Single Blade Travel Shaver, Black New","Strong and trustworthy, the Panasonic electric shaver is built for folks who are worried about a wonderful shave every day. This Panasonic ES3833S is just right for taming your beard, with an easy-to-maneuver design, so you can retain wonderfully soft skin. Spend as much time as you need getting a complete shave by making use of the outstanding shave time typical of the Panasonic electric shaver. Moreover, this men's electric shaver includes precision foil blades, so you can get wonderful shaves over a prolonged period. With the gentle shaving mechanism on this Panasonic ES3833S, you can help avoid burning sensations on tender skin.",http://thumbs3.ebaystatic.com/d/l225/m/mfqMoj4xDlBFXp1ZznxCGbQ.jpg
|
||||
711158539,Panasonic ES8103S Arc3 Electric Shaver Wet/Dry with Nanotech Blades for Men,,http://thumbs1.ebaystatic.com/d/l225/m/myaZLqzt3I7O-3xXxsJ_4fQ.jpg
|
||||
711158540,Panasonic ES8103S Arc3 Electric Shaver Wet/Dry with Nanotech Blades,,http://thumbs1.ebaystatic.com/d/l225/m/mcrO4BkjBkM78XHm-aClRGg.jpg
|
||||
711158543,Panasonic ES3831K Single Blade Wet & Dry Travel Shaver - New & Sealed,,http://thumbs4.ebaystatic.com/d/l225/m/mqWDU2mHsFWAuGosMIGcIMg.jpg
|
||||
711158544,Panasonic ES8103S Arc 3 E W/O POUCH & MANUAL Men's Wet/Dry Rechargeable Shaver,,http://thumbs2.ebaystatic.com/d/l225/m/mZXgTj-fQfcgAlzOGQYkqFw.jpg
|
||||
711158545,PANASONIC ES3831K Pro-Curve Battery Operated Travel Wet/Dry Shaver,,http://thumbs1.ebaystatic.com/d/l225/m/m8McQMCfgdp50trM_YJ88cw.jpg
|
||||
711158546,PANASONIC ARC3 ES-LT33-S WET DRY WASHABLE RECHARGEABLE MEN'S ELECTRIC SHAVER NIB,,http://thumbs1.ebaystatic.com/d/l225/m/m9yUif5xyhGfh7Ag-_fcLdA.jpg
|
||||
711158547,Panasonic ES-LV81-k Arc 5 Wet & Dry Rechargeable Men's Foil Shaver New,,http://thumbs1.ebaystatic.com/d/l225/m/mEfZHzDoKrH4DBfU8e_K93A.jpg
|
||||
711158548,"NEW Panasonic ES-RF31-S 4 Blade Men's Electric Razor Wet/Dry, Factory Sealed",,http://thumbs2.ebaystatic.com/d/l225/m/mfhMhMoDkrGtqWW_IyqVGuQ.jpg
|
||||
711158549,Panasonic ES8243A E Arc4 Men's Electric Shaver Wet/Dry,"eBay item number:181670746515
|
||||
|
||||
|
||||
Seller assumes all responsibility for this listing.
|
||||
|
||||
Last updated on
|
||||
Mar 23, 2015 08:55:50 PDT
|
||||
View all revisions
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
<strong>Item specifics</strong>
|
||||
<table>
|
||||
<tr>
|
||||
<th>Condition:</th>
|
||||
<td><strong>Used</strong>
|
||||
<strong>:</strong>
|
||||
|
||||
|
||||
</td></tr></table>",http://thumbs4.ebaystatic.com/d/l225/m/mcxFUwt3FrGEEPzT7cfQn7w.jpg
|
||||
711158550,Panasonic ES-3833 Wet/Dry Men Shaver Razor Battery Operate Compact Travel ES3833,,http://thumbs2.ebaystatic.com/d/l225/m/mAqa9pHisKsLSk5nqMg4JJQ.jpg
|
||||
711158551,Panasonic Pro-Curve ES3831K Shaver - Dry/Wet Technology - Stainless Steel Foil,,http://thumbs3.ebaystatic.com/d/l225/m/mGqD8eGIwseT5nsM53W3uRQ.jpg
|
||||
711158552,Panasonic Wet and Dry Shaver - ES-RW30s ES-RW30-S,"The Panasonic electric shaver is well-suited to shielding particularly sensitive skin and providing a smooth shave. It's both trustworthy and transportable. Because this Panasonic ES-RW30-S has a gentle shaving mechanism, you can avoid irritation and raw feeling skin in particularly tender areas. The Panasonic electric shaver is ideal for ridding yourself of stubble, with its special design, so you can sustain wonderfully supple skin. The exceptional shave time featured on this men's electric shaver helps you to make sure you consistently receive many complete shaves without depleting the power. Plus, this Panasonic ES-RW30-S features precision blades, so you can enjoy smooth shaves for months on end.",http://thumbs1.ebaystatic.com/d/l225/m/mvPElpjXmgo0NhP-P5F8LlQ.jpg
|
||||
711158553,Panasonic ES-LF51-A Arc4 Electric Shaver Wet/Dry with Flexible Pivoting Head,,http://thumbs3.ebaystatic.com/d/l225/m/mC_zAQrMQKPLHdENU7N3UjQ.jpg
|
||||
711158554,Panasonic ES8103S Arc3 Men's Electric Shaver Wet/Dry with Nanotech Blades,,http://thumbs3.ebaystatic.com/d/l225/m/moBByNwPn93-g-oBBceS2kw.jpg
|
||||
711158555,panasonic ARC3 shaver es8103s,,http://thumbs1.ebaystatic.com/d/l225/m/mJlAp6t6OMIOaYgKnyelIMg.jpg
|
||||
711158556,Panasonic ES-534 Men's Electric Shaver New ES534 Battery Operated Compact Travel,,http://thumbs3.ebaystatic.com/d/l225/m/mDr2kpZLVSdy1KTPVYK2YUg.jpg
|
||||
711158557,Panasonic Portable Shaving Machine Cclippers Washable Single Blade Shaver+Brush,,http://thumbs3.ebaystatic.com/d/l225/m/mJdzJPoOALps0Lv4WtW2b0A.jpg
|
||||
711158559,Baratza Solis Maestro Conical Burr Coffee Bean Grinder Works Great Nice Cond,,http://thumbs4.ebaystatic.com/d/l225/m/mdjbD7YFR6JRq-pkeajhK7w.jpg
|
||||
711158560,Proctor Silex Fresh Grind Electric Coffee Bean Grinder White,,http://thumbs4.ebaystatic.com/d/l225/m/mtXoRn5Ytmqz0GLHYmBUxpA.jpg
|
||||
711158561,Cuisinart 8-oz. Supreme Grind Automatic Burr Coffee Grinder,,http://thumbs4.ebaystatic.com/d/l225/m/my_9cXPvwwRVFqo6MXWfpag.jpg
|
|
@ -1,19 +0,0 @@
|
||||
# This schema has been generated ...
|
||||
# The order in which the attributes are declared is important,
|
||||
# it specify the attribute xxx...
|
||||
|
||||
identifier = "id"
|
||||
|
||||
[attributes.id]
|
||||
displayed = true
|
||||
|
||||
[attributes.title]
|
||||
displayed = true
|
||||
indexed = true
|
||||
|
||||
[attributes.description]
|
||||
displayed = true
|
||||
indexed = true
|
||||
|
||||
[attributes.image]
|
||||
displayed = true
|
@ -1,34 +1,49 @@
|
||||
[package]
|
||||
name = "meilidb-core"
|
||||
version = "0.1.0"
|
||||
authors = ["Kerollmops <renault.cle@gmail.com>"]
|
||||
authors = ["Kerollmops <clement@meilisearch.com>"]
|
||||
edition = "2018"
|
||||
|
||||
[dependencies]
|
||||
byteorder = "1.3.1"
|
||||
arc-swap = "0.4.3"
|
||||
bincode = "1.1.4"
|
||||
byteorder = "1.3.2"
|
||||
crossbeam-channel = "0.3.9"
|
||||
deunicode = "1.0.0"
|
||||
hashbrown = "0.2.2"
|
||||
lazy_static = "1.2.0"
|
||||
log = "0.4.6"
|
||||
env_logger = "0.7.0"
|
||||
hashbrown = { version = "0.6.0", features = ["serde"] }
|
||||
log = "0.4.8"
|
||||
meilidb-schema = { path = "../meilidb-schema", version = "0.1.0" }
|
||||
meilidb-tokenizer = { path = "../meilidb-tokenizer", version = "0.1.0" }
|
||||
rayon = "1.2.0"
|
||||
sdset = "0.3.2"
|
||||
serde = { version = "1.0.88", features = ["derive"] }
|
||||
once_cell = "1.2.0"
|
||||
ordered-float = { version = "1.0.2", features = ["serde"] }
|
||||
sdset = "0.3.3"
|
||||
serde = { version = "1.0.101", features = ["derive"] }
|
||||
serde_json = "1.0.41"
|
||||
siphasher = "0.3.0"
|
||||
slice-group-by = "0.2.6"
|
||||
zerocopy = "0.2.2"
|
||||
zerocopy = "0.2.8"
|
||||
|
||||
[dependencies.fst]
|
||||
git = "https://github.com/Kerollmops/fst.git"
|
||||
branch = "arc-byte-slice"
|
||||
[dependencies.zlmdb]
|
||||
package = "zerocopy-lmdb"
|
||||
git = "https://github.com/Kerollmops/zerocopy-lmdb.git"
|
||||
branch = "master"
|
||||
|
||||
[dependencies.levenshtein_automata]
|
||||
git = "https://github.com/Kerollmops/levenshtein-automata.git"
|
||||
branch = "arc-byte-slice"
|
||||
features = ["fst_automaton"]
|
||||
|
||||
[dependencies.fst]
|
||||
git = "https://github.com/Kerollmops/fst.git"
|
||||
branch = "arc-byte-slice"
|
||||
|
||||
[dev-dependencies]
|
||||
assert_matches = "1.3"
|
||||
|
||||
[features]
|
||||
i128 = ["byteorder/i128"]
|
||||
nightly = ["hashbrown/nightly", "slice-group-by/nightly"]
|
||||
csv = "1.0.7"
|
||||
indexmap = { version = "1.2.0", features = ["serde-1"] }
|
||||
rustyline = { version = "5.0.0", default-features = false }
|
||||
structopt = "0.3.2"
|
||||
tempfile = "3.1.0"
|
||||
termcolor = "1.0.4"
|
||||
toml = "0.5.3"
|
||||
|
431
meilidb-core/examples/from_file.rs
Normal file
431
meilidb-core/examples/from_file.rs
Normal file
@ -0,0 +1,431 @@
|
||||
use std::collections::btree_map::{BTreeMap, Entry};
|
||||
use std::collections::HashSet;
|
||||
use std::error::Error;
|
||||
use std::io::Write;
|
||||
use std::iter::FromIterator;
|
||||
use std::path::{Path, PathBuf};
|
||||
use std::time::{Duration, Instant};
|
||||
use std::{fs, io, sync::mpsc};
|
||||
|
||||
use rustyline::{Config, Editor};
|
||||
use serde::{Deserialize, Serialize};
|
||||
use structopt::StructOpt;
|
||||
use termcolor::{Color, ColorChoice, ColorSpec, StandardStream, WriteColor};
|
||||
|
||||
use meilidb_core::{Database, Highlight, UpdateResult};
|
||||
use meilidb_schema::SchemaAttr;
|
||||
|
||||
const INDEX_NAME: &str = "default";
|
||||
|
||||
#[derive(Debug, StructOpt)]
|
||||
struct IndexCommand {
|
||||
/// The destination where the database must be created.
|
||||
#[structopt(parse(from_os_str))]
|
||||
database_path: PathBuf,
|
||||
|
||||
/// The csv file to index.
|
||||
#[structopt(parse(from_os_str))]
|
||||
csv_data_path: PathBuf,
|
||||
|
||||
/// The path to the schema.
|
||||
#[structopt(long, parse(from_os_str))]
|
||||
schema: PathBuf,
|
||||
|
||||
#[structopt(long)]
|
||||
update_group_size: Option<usize>,
|
||||
|
||||
#[structopt(long, parse(from_os_str))]
|
||||
compact_to_path: Option<PathBuf>,
|
||||
}
|
||||
|
||||
#[derive(Debug, StructOpt)]
|
||||
struct SearchCommand {
|
||||
/// The destination where the database must be created.
|
||||
#[structopt(parse(from_os_str))]
|
||||
database_path: PathBuf,
|
||||
|
||||
/// Timeout after which the search will return results.
|
||||
#[structopt(long)]
|
||||
fetch_timeout_ms: Option<u64>,
|
||||
|
||||
/// The number of returned results
|
||||
#[structopt(short, long, default_value = "10")]
|
||||
number_results: usize,
|
||||
|
||||
/// The number of characters before and after the first match
|
||||
#[structopt(short = "C", long, default_value = "35")]
|
||||
char_context: usize,
|
||||
|
||||
/// A filter string that can be `!adult` or `adult` to
|
||||
/// filter documents on this specfied field
|
||||
#[structopt(short, long)]
|
||||
filter: Option<String>,
|
||||
|
||||
/// Fields that must be displayed.
|
||||
displayed_fields: Vec<String>,
|
||||
}
|
||||
|
||||
#[derive(Debug, StructOpt)]
|
||||
enum Command {
|
||||
Index(IndexCommand),
|
||||
Search(SearchCommand),
|
||||
}
|
||||
|
||||
impl Command {
|
||||
fn path(&self) -> &Path {
|
||||
match self {
|
||||
Command::Index(command) => &command.database_path,
|
||||
Command::Search(command) => &command.database_path,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize)]
|
||||
#[serde(transparent)]
|
||||
struct Document(indexmap::IndexMap<String, String>);
|
||||
|
||||
fn index_command(command: IndexCommand, database: Database) -> Result<(), Box<dyn Error>> {
|
||||
let start = Instant::now();
|
||||
|
||||
let (sender, receiver) = mpsc::sync_channel(100);
|
||||
let update_fn = move |update: UpdateResult| sender.send(update.update_id).unwrap();
|
||||
let index = match database.open_index(INDEX_NAME) {
|
||||
Some(index) => index,
|
||||
None => database.create_index(INDEX_NAME).unwrap(),
|
||||
};
|
||||
|
||||
let done = database.set_update_callback(INDEX_NAME, Box::new(update_fn));
|
||||
assert!(done, "could not set the index update function");
|
||||
|
||||
let env = &database.env;
|
||||
|
||||
let schema = {
|
||||
let string = fs::read_to_string(&command.schema)?;
|
||||
toml::from_str(&string).unwrap()
|
||||
};
|
||||
|
||||
let mut writer = env.write_txn().unwrap();
|
||||
match index.main.schema(&writer)? {
|
||||
Some(current_schema) => {
|
||||
if current_schema != schema {
|
||||
return Err(meilidb_core::Error::SchemaDiffer.into());
|
||||
}
|
||||
writer.abort();
|
||||
}
|
||||
None => {
|
||||
index.schema_update(&mut writer, schema)?;
|
||||
writer.commit().unwrap();
|
||||
}
|
||||
}
|
||||
|
||||
let mut rdr = csv::Reader::from_path(command.csv_data_path)?;
|
||||
let mut raw_record = csv::StringRecord::new();
|
||||
let headers = rdr.headers()?.clone();
|
||||
|
||||
let mut max_update_id = 0;
|
||||
let mut i = 0;
|
||||
let mut end_of_file = false;
|
||||
|
||||
while !end_of_file {
|
||||
let mut additions = index.documents_addition();
|
||||
|
||||
loop {
|
||||
end_of_file = !rdr.read_record(&mut raw_record)?;
|
||||
if end_of_file {
|
||||
break;
|
||||
}
|
||||
|
||||
let document: Document = match raw_record.deserialize(Some(&headers)) {
|
||||
Ok(document) => document,
|
||||
Err(e) => {
|
||||
eprintln!("{:?}", e);
|
||||
continue;
|
||||
}
|
||||
};
|
||||
|
||||
additions.update_document(document);
|
||||
|
||||
print!("\rindexing document {}", i);
|
||||
i += 1;
|
||||
|
||||
if let Some(group_size) = command.update_group_size {
|
||||
if i % group_size == 0 {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
println!();
|
||||
|
||||
let mut writer = env.write_txn().unwrap();
|
||||
println!("committing update...");
|
||||
let update_id = additions.finalize(&mut writer)?;
|
||||
writer.commit().unwrap();
|
||||
max_update_id = max_update_id.max(update_id);
|
||||
println!("committed update {}", update_id);
|
||||
}
|
||||
|
||||
println!("Waiting for update {}", max_update_id);
|
||||
for id in receiver {
|
||||
if id == max_update_id {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
println!(
|
||||
"database created in {:.2?} at: {:?}",
|
||||
start.elapsed(),
|
||||
command.database_path
|
||||
);
|
||||
|
||||
if let Some(path) = command.compact_to_path {
|
||||
let start = Instant::now();
|
||||
let _file = database.copy_and_compact_to_path(&path)?;
|
||||
println!(
|
||||
"database compacted in {:.2?} at: {:?}",
|
||||
start.elapsed(),
|
||||
path
|
||||
);
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn display_highlights(text: &str, ranges: &[usize]) -> io::Result<()> {
|
||||
let mut stdout = StandardStream::stdout(ColorChoice::Always);
|
||||
let mut highlighted = false;
|
||||
|
||||
for range in ranges.windows(2) {
|
||||
let [start, end] = match range {
|
||||
[start, end] => [*start, *end],
|
||||
_ => unreachable!(),
|
||||
};
|
||||
if highlighted {
|
||||
stdout.set_color(ColorSpec::new().set_fg(Some(Color::Yellow)))?;
|
||||
}
|
||||
write!(&mut stdout, "{}", &text[start..end])?;
|
||||
stdout.reset()?;
|
||||
highlighted = !highlighted;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn char_to_byte_range(index: usize, length: usize, text: &str) -> (usize, usize) {
|
||||
let mut byte_index = 0;
|
||||
let mut byte_length = 0;
|
||||
|
||||
for (n, (i, c)) in text.char_indices().enumerate() {
|
||||
if n == index {
|
||||
byte_index = i;
|
||||
}
|
||||
|
||||
if n + 1 == index + length {
|
||||
byte_length = i - byte_index + c.len_utf8();
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
(byte_index, byte_length)
|
||||
}
|
||||
|
||||
fn create_highlight_areas(text: &str, highlights: &[Highlight]) -> Vec<usize> {
|
||||
let mut byte_indexes = BTreeMap::new();
|
||||
|
||||
for highlight in highlights {
|
||||
let char_index = highlight.char_index as usize;
|
||||
let char_length = highlight.char_length as usize;
|
||||
let (byte_index, byte_length) = char_to_byte_range(char_index, char_length, text);
|
||||
|
||||
match byte_indexes.entry(byte_index) {
|
||||
Entry::Vacant(entry) => {
|
||||
entry.insert(byte_length);
|
||||
}
|
||||
Entry::Occupied(mut entry) => {
|
||||
if *entry.get() < byte_length {
|
||||
entry.insert(byte_length);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let mut title_areas = Vec::new();
|
||||
title_areas.push(0);
|
||||
for (byte_index, length) in byte_indexes {
|
||||
title_areas.push(byte_index);
|
||||
title_areas.push(byte_index + length);
|
||||
}
|
||||
title_areas.push(text.len());
|
||||
title_areas.sort_unstable();
|
||||
title_areas
|
||||
}
|
||||
|
||||
/// note: matches must have been sorted by `char_index` and `char_length` before being passed.
|
||||
///
|
||||
/// ```no_run
|
||||
/// matches.sort_unstable_by_key(|m| (m.char_index, m.char_length));
|
||||
///
|
||||
/// let matches = matches.matches.iter().filter(|m| SchemaAttr::new(m.attribute) == attr).cloned();
|
||||
///
|
||||
/// let (text, matches) = crop_text(&text, matches, 35);
|
||||
/// ```
|
||||
fn crop_text(
|
||||
text: &str,
|
||||
highlights: impl IntoIterator<Item = Highlight>,
|
||||
context: usize,
|
||||
) -> (String, Vec<Highlight>) {
|
||||
let mut highlights = highlights.into_iter().peekable();
|
||||
|
||||
let char_index = highlights
|
||||
.peek()
|
||||
.map(|m| m.char_index as usize)
|
||||
.unwrap_or(0);
|
||||
let start = char_index.saturating_sub(context);
|
||||
let text = text.chars().skip(start).take(context * 2).collect();
|
||||
|
||||
let highlights = highlights
|
||||
.take_while(|m| (m.char_index as usize) + (m.char_length as usize) <= start + (context * 2))
|
||||
.map(|highlight| Highlight {
|
||||
char_index: highlight.char_index - start as u16,
|
||||
..highlight
|
||||
})
|
||||
.collect();
|
||||
|
||||
(text, highlights)
|
||||
}
|
||||
|
||||
fn search_command(command: SearchCommand, database: Database) -> Result<(), Box<dyn Error>> {
|
||||
let env = &database.env;
|
||||
let index = database
|
||||
.open_index(INDEX_NAME)
|
||||
.expect("Could not find index");
|
||||
|
||||
let reader = env.read_txn().unwrap();
|
||||
let schema = index.main.schema(&reader)?;
|
||||
reader.abort();
|
||||
let schema = schema.ok_or(meilidb_core::Error::SchemaMissing)?;
|
||||
|
||||
let fields = command.displayed_fields.iter().map(String::as_str);
|
||||
let fields = HashSet::from_iter(fields);
|
||||
|
||||
let config = Config::builder().auto_add_history(true).build();
|
||||
let mut readline = Editor::<()>::with_config(config);
|
||||
let _ = readline.load_history("query-history.txt");
|
||||
|
||||
for result in readline.iter("Searching for: ") {
|
||||
match result {
|
||||
Ok(query) => {
|
||||
let start_total = Instant::now();
|
||||
|
||||
let reader = env.read_txn().unwrap();
|
||||
let ref_index = &index;
|
||||
let ref_reader = &reader;
|
||||
|
||||
let mut builder = index.query_builder();
|
||||
if let Some(timeout) = command.fetch_timeout_ms {
|
||||
builder.with_fetch_timeout(Duration::from_millis(timeout));
|
||||
}
|
||||
|
||||
if let Some(ref filter) = command.filter {
|
||||
let filter = filter.as_str();
|
||||
let (positive, filter) = if filter.chars().next() == Some('!') {
|
||||
(false, &filter[1..])
|
||||
} else {
|
||||
(true, filter)
|
||||
};
|
||||
|
||||
let attr = schema
|
||||
.attribute(&filter)
|
||||
.expect("Could not find filtered attribute");
|
||||
|
||||
builder.with_filter(move |document_id| {
|
||||
let string: String = ref_index
|
||||
.document_attribute(ref_reader, document_id, attr)
|
||||
.unwrap()
|
||||
.unwrap();
|
||||
(string == "true") == positive
|
||||
});
|
||||
}
|
||||
|
||||
let documents = builder.query(ref_reader, &query, 0..command.number_results)?;
|
||||
|
||||
let mut retrieve_duration = Duration::default();
|
||||
|
||||
let number_of_documents = documents.len();
|
||||
for mut doc in documents {
|
||||
doc.highlights
|
||||
.sort_unstable_by_key(|m| (m.char_index, m.char_length));
|
||||
|
||||
let start_retrieve = Instant::now();
|
||||
let result = index.document::<Document>(&reader, Some(&fields), doc.id);
|
||||
retrieve_duration += start_retrieve.elapsed();
|
||||
|
||||
match result {
|
||||
Ok(Some(document)) => {
|
||||
println!("raw-id: {:?}", doc.id);
|
||||
for (name, text) in document.0 {
|
||||
print!("{}: ", name);
|
||||
|
||||
let attr = schema.attribute(&name).unwrap();
|
||||
let highlights = doc
|
||||
.highlights
|
||||
.iter()
|
||||
.filter(|m| SchemaAttr::new(m.attribute) == attr)
|
||||
.cloned();
|
||||
let (text, highlights) =
|
||||
crop_text(&text, highlights, command.char_context);
|
||||
let areas = create_highlight_areas(&text, &highlights);
|
||||
display_highlights(&text, &areas)?;
|
||||
println!();
|
||||
}
|
||||
}
|
||||
Ok(None) => eprintln!("missing document"),
|
||||
Err(e) => eprintln!("{}", e),
|
||||
}
|
||||
|
||||
let mut matching_attributes = HashSet::new();
|
||||
for highlight in doc.highlights {
|
||||
let attr = SchemaAttr::new(highlight.attribute);
|
||||
let name = schema.attribute_name(attr);
|
||||
matching_attributes.insert(name);
|
||||
}
|
||||
|
||||
let matching_attributes = Vec::from_iter(matching_attributes);
|
||||
println!("matching in: {:?}", matching_attributes);
|
||||
|
||||
println!();
|
||||
}
|
||||
|
||||
eprintln!(
|
||||
"whole documents fields retrieve took {:.2?}",
|
||||
retrieve_duration
|
||||
);
|
||||
eprintln!(
|
||||
"===== Found {} results in {:.2?} =====",
|
||||
number_of_documents,
|
||||
start_total.elapsed()
|
||||
);
|
||||
}
|
||||
Err(err) => {
|
||||
println!("Error: {:?}", err);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
readline.save_history("query-history.txt").unwrap();
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn main() -> Result<(), Box<dyn Error>> {
|
||||
env_logger::init();
|
||||
|
||||
let opt = Command::from_args();
|
||||
let database = Database::open_or_create(opt.path())?;
|
||||
|
||||
match opt {
|
||||
Command::Index(command) => index_command(command, database),
|
||||
Command::Search(command) => search_command(command, database),
|
||||
}
|
||||
}
|
@ -1,44 +0,0 @@
|
||||
use lazy_static::lazy_static;
|
||||
use levenshtein_automata::{
|
||||
LevenshteinAutomatonBuilder as LevBuilder,
|
||||
DFA,
|
||||
};
|
||||
|
||||
lazy_static! {
|
||||
static ref LEVDIST0: LevBuilder = LevBuilder::new(0, false);
|
||||
static ref LEVDIST1: LevBuilder = LevBuilder::new(1, false);
|
||||
static ref LEVDIST2: LevBuilder = LevBuilder::new(2, false);
|
||||
}
|
||||
|
||||
#[derive(Copy, Clone)]
|
||||
enum PrefixSetting {
|
||||
Prefix,
|
||||
NoPrefix,
|
||||
}
|
||||
|
||||
fn build_dfa_with_setting(query: &str, setting: PrefixSetting) -> DFA {
|
||||
use self::PrefixSetting::{Prefix, NoPrefix};
|
||||
|
||||
match query.len() {
|
||||
0 ..= 4 => match setting {
|
||||
Prefix => LEVDIST0.build_prefix_dfa(query),
|
||||
NoPrefix => LEVDIST0.build_dfa(query),
|
||||
},
|
||||
5 ..= 8 => match setting {
|
||||
Prefix => LEVDIST1.build_prefix_dfa(query),
|
||||
NoPrefix => LEVDIST1.build_dfa(query),
|
||||
},
|
||||
_ => match setting {
|
||||
Prefix => LEVDIST2.build_prefix_dfa(query),
|
||||
NoPrefix => LEVDIST2.build_dfa(query),
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
pub fn build_prefix_dfa(query: &str) -> DFA {
|
||||
build_dfa_with_setting(query, PrefixSetting::Prefix)
|
||||
}
|
||||
|
||||
pub fn build_dfa(query: &str) -> DFA {
|
||||
build_dfa_with_setting(query, PrefixSetting::NoPrefix)
|
||||
}
|
48
meilidb-core/src/automaton/dfa.rs
Normal file
48
meilidb-core/src/automaton/dfa.rs
Normal file
@ -0,0 +1,48 @@
|
||||
use levenshtein_automata::{LevenshteinAutomatonBuilder as LevBuilder, DFA};
|
||||
use once_cell::sync::OnceCell;
|
||||
|
||||
static LEVDIST0: OnceCell<LevBuilder> = OnceCell::new();
|
||||
static LEVDIST1: OnceCell<LevBuilder> = OnceCell::new();
|
||||
static LEVDIST2: OnceCell<LevBuilder> = OnceCell::new();
|
||||
|
||||
#[derive(Copy, Clone)]
|
||||
enum PrefixSetting {
|
||||
Prefix,
|
||||
NoPrefix,
|
||||
}
|
||||
|
||||
fn build_dfa_with_setting(query: &str, setting: PrefixSetting) -> DFA {
|
||||
use PrefixSetting::{NoPrefix, Prefix};
|
||||
|
||||
match query.len() {
|
||||
0..=4 => {
|
||||
let builder = LEVDIST0.get_or_init(|| LevBuilder::new(0, true));
|
||||
match setting {
|
||||
Prefix => builder.build_prefix_dfa(query),
|
||||
NoPrefix => builder.build_dfa(query),
|
||||
}
|
||||
}
|
||||
5..=8 => {
|
||||
let builder = LEVDIST1.get_or_init(|| LevBuilder::new(1, true));
|
||||
match setting {
|
||||
Prefix => builder.build_prefix_dfa(query),
|
||||
NoPrefix => builder.build_dfa(query),
|
||||
}
|
||||
}
|
||||
_ => {
|
||||
let builder = LEVDIST2.get_or_init(|| LevBuilder::new(2, true));
|
||||
match setting {
|
||||
Prefix => builder.build_prefix_dfa(query),
|
||||
NoPrefix => builder.build_dfa(query),
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub fn build_prefix_dfa(query: &str) -> DFA {
|
||||
build_dfa_with_setting(query, PrefixSetting::Prefix)
|
||||
}
|
||||
|
||||
pub fn build_dfa(query: &str) -> DFA {
|
||||
build_dfa_with_setting(query, PrefixSetting::NoPrefix)
|
||||
}
|
220
meilidb-core/src/automaton/mod.rs
Normal file
220
meilidb-core/src/automaton/mod.rs
Normal file
@ -0,0 +1,220 @@
|
||||
mod dfa;
|
||||
mod query_enhancer;
|
||||
|
||||
use std::cmp::Reverse;
|
||||
use std::vec;
|
||||
|
||||
use fst::{IntoStreamer, Streamer};
|
||||
use levenshtein_automata::DFA;
|
||||
use meilidb_tokenizer::{is_cjk, split_query_string};
|
||||
|
||||
use crate::error::MResult;
|
||||
use crate::store;
|
||||
|
||||
use self::dfa::{build_dfa, build_prefix_dfa};
|
||||
pub use self::query_enhancer::QueryEnhancer;
|
||||
use self::query_enhancer::QueryEnhancerBuilder;
|
||||
|
||||
const NGRAMS: usize = 3;
|
||||
|
||||
pub struct AutomatonProducer {
|
||||
automatons: Vec<Vec<Automaton>>,
|
||||
}
|
||||
|
||||
impl AutomatonProducer {
|
||||
pub fn new(
|
||||
reader: &zlmdb::RoTxn,
|
||||
query: &str,
|
||||
main_store: store::Main,
|
||||
synonyms_store: store::Synonyms,
|
||||
) -> MResult<(AutomatonProducer, QueryEnhancer)> {
|
||||
let (automatons, query_enhancer) =
|
||||
generate_automatons(reader, query, main_store, synonyms_store)?;
|
||||
|
||||
Ok((AutomatonProducer { automatons }, query_enhancer))
|
||||
}
|
||||
|
||||
pub fn into_iter(self) -> vec::IntoIter<Vec<Automaton>> {
|
||||
self.automatons.into_iter()
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct Automaton {
|
||||
pub index: usize,
|
||||
pub ngram: usize,
|
||||
pub query_len: usize,
|
||||
pub is_exact: bool,
|
||||
pub is_prefix: bool,
|
||||
pub query: String,
|
||||
}
|
||||
|
||||
impl Automaton {
|
||||
pub fn dfa(&self) -> DFA {
|
||||
if self.is_prefix {
|
||||
build_prefix_dfa(&self.query)
|
||||
} else {
|
||||
build_dfa(&self.query)
|
||||
}
|
||||
}
|
||||
|
||||
fn exact(index: usize, ngram: usize, query: &str) -> Automaton {
|
||||
Automaton {
|
||||
index,
|
||||
ngram,
|
||||
query_len: query.len(),
|
||||
is_exact: true,
|
||||
is_prefix: false,
|
||||
query: query.to_string(),
|
||||
}
|
||||
}
|
||||
|
||||
fn prefix_exact(index: usize, ngram: usize, query: &str) -> Automaton {
|
||||
Automaton {
|
||||
index,
|
||||
ngram,
|
||||
query_len: query.len(),
|
||||
is_exact: true,
|
||||
is_prefix: true,
|
||||
query: query.to_string(),
|
||||
}
|
||||
}
|
||||
|
||||
fn non_exact(index: usize, ngram: usize, query: &str) -> Automaton {
|
||||
Automaton {
|
||||
index,
|
||||
ngram,
|
||||
query_len: query.len(),
|
||||
is_exact: false,
|
||||
is_prefix: false,
|
||||
query: query.to_string(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub fn normalize_str(string: &str) -> String {
|
||||
let mut string = string.to_lowercase();
|
||||
|
||||
if !string.contains(is_cjk) {
|
||||
string = deunicode::deunicode_with_tofu(&string, "");
|
||||
}
|
||||
|
||||
string
|
||||
}
|
||||
|
||||
fn generate_automatons(
|
||||
reader: &zlmdb::RoTxn,
|
||||
query: &str,
|
||||
main_store: store::Main,
|
||||
synonym_store: store::Synonyms,
|
||||
) -> MResult<(Vec<Vec<Automaton>>, QueryEnhancer)> {
|
||||
let has_end_whitespace = query.chars().last().map_or(false, char::is_whitespace);
|
||||
let query_words: Vec<_> = split_query_string(query).map(str::to_lowercase).collect();
|
||||
let synonyms = match main_store.synonyms_fst(reader)? {
|
||||
Some(synonym) => synonym,
|
||||
None => fst::Set::default(),
|
||||
};
|
||||
|
||||
let mut automaton_index = 0;
|
||||
let mut automatons = Vec::new();
|
||||
let mut enhancer_builder = QueryEnhancerBuilder::new(&query_words);
|
||||
|
||||
// We must not declare the original words to the query enhancer
|
||||
// *but* we need to push them in the automatons list first
|
||||
let mut original_automatons = Vec::new();
|
||||
let mut original_words = query_words.iter().peekable();
|
||||
while let Some(word) = original_words.next() {
|
||||
let has_following_word = original_words.peek().is_some();
|
||||
let not_prefix_dfa = has_following_word || has_end_whitespace || word.chars().all(is_cjk);
|
||||
|
||||
let automaton = if not_prefix_dfa {
|
||||
Automaton::exact(automaton_index, 1, word)
|
||||
} else {
|
||||
Automaton::prefix_exact(automaton_index, 1, word)
|
||||
};
|
||||
automaton_index += 1;
|
||||
original_automatons.push(automaton);
|
||||
}
|
||||
|
||||
automatons.push(original_automatons);
|
||||
|
||||
for n in 1..=NGRAMS {
|
||||
let mut ngrams = query_words.windows(n).enumerate().peekable();
|
||||
while let Some((query_index, ngram_slice)) = ngrams.next() {
|
||||
let query_range = query_index..query_index + n;
|
||||
let ngram_nb_words = ngram_slice.len();
|
||||
let ngram = ngram_slice.join(" ");
|
||||
|
||||
let has_following_word = ngrams.peek().is_some();
|
||||
let not_prefix_dfa =
|
||||
has_following_word || has_end_whitespace || ngram.chars().all(is_cjk);
|
||||
|
||||
// automaton of synonyms of the ngrams
|
||||
let normalized = normalize_str(&ngram);
|
||||
let lev = if not_prefix_dfa {
|
||||
build_dfa(&normalized)
|
||||
} else {
|
||||
build_prefix_dfa(&normalized)
|
||||
};
|
||||
|
||||
let mut stream = synonyms.search(&lev).into_stream();
|
||||
while let Some(base) = stream.next() {
|
||||
// only trigger alternatives when the last word has been typed
|
||||
// i.e. "new " do not but "new yo" triggers alternatives to "new york"
|
||||
let base = std::str::from_utf8(base).unwrap();
|
||||
let base_nb_words = split_query_string(base).count();
|
||||
if ngram_nb_words != base_nb_words {
|
||||
continue;
|
||||
}
|
||||
|
||||
if let Some(synonyms) = synonym_store.synonyms(reader, base.as_bytes())? {
|
||||
let mut stream = synonyms.into_stream();
|
||||
while let Some(synonyms) = stream.next() {
|
||||
let synonyms = std::str::from_utf8(synonyms).unwrap();
|
||||
let synonyms_words: Vec<_> = split_query_string(synonyms).collect();
|
||||
let nb_synonym_words = synonyms_words.len();
|
||||
|
||||
let real_query_index = automaton_index;
|
||||
enhancer_builder.declare(
|
||||
query_range.clone(),
|
||||
real_query_index,
|
||||
&synonyms_words,
|
||||
);
|
||||
|
||||
for synonym in synonyms_words {
|
||||
let automaton = if nb_synonym_words == 1 {
|
||||
Automaton::exact(automaton_index, n, synonym)
|
||||
} else {
|
||||
Automaton::non_exact(automaton_index, n, synonym)
|
||||
};
|
||||
automaton_index += 1;
|
||||
automatons.push(vec![automaton]);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if n != 1 {
|
||||
// automaton of concatenation of query words
|
||||
let concat = ngram_slice.concat();
|
||||
let normalized = normalize_str(&concat);
|
||||
|
||||
let real_query_index = automaton_index;
|
||||
enhancer_builder.declare(query_range.clone(), real_query_index, &[&normalized]);
|
||||
|
||||
let automaton = Automaton::exact(automaton_index, n, &normalized);
|
||||
automaton_index += 1;
|
||||
automatons.push(vec![automaton]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// order automatons, the most important first,
|
||||
// we keep the original automatons at the front.
|
||||
automatons[1..].sort_by_key(|a| {
|
||||
let a = a.first().unwrap();
|
||||
(Reverse(a.is_exact), a.ngram)
|
||||
});
|
||||
|
||||
Ok((automatons, enhancer_builder.build()))
|
||||
}
|
423
meilidb-core/src/automaton/query_enhancer.rs
Normal file
423
meilidb-core/src/automaton/query_enhancer.rs
Normal file
@ -0,0 +1,423 @@
|
||||
use std::cmp::Ordering::{Equal, Greater, Less};
|
||||
use std::ops::Range;
|
||||
|
||||
/// Return `true` if the specified range can accept the given replacements words.
|
||||
/// Returns `false` if the replacements words are already present in the original query
|
||||
/// or if there is fewer replacement words than the range to replace.
|
||||
//
|
||||
//
|
||||
// ## Ignored because already present in original
|
||||
//
|
||||
// new york city subway
|
||||
// -------- ^^^^
|
||||
// / \
|
||||
// [new york city]
|
||||
//
|
||||
//
|
||||
// ## Ignored because smaller than the original
|
||||
//
|
||||
// new york city subway
|
||||
// -------------
|
||||
// \ /
|
||||
// [new york]
|
||||
//
|
||||
//
|
||||
// ## Accepted because bigger than the original
|
||||
//
|
||||
// NYC subway
|
||||
// ---
|
||||
// / \
|
||||
// / \
|
||||
// / \
|
||||
// / \
|
||||
// / \
|
||||
// [new york city]
|
||||
//
|
||||
fn rewrite_range_with<S, T>(query: &[S], range: Range<usize>, words: &[T]) -> bool
|
||||
where
|
||||
S: AsRef<str>,
|
||||
T: AsRef<str>,
|
||||
{
|
||||
if words.len() <= range.len() {
|
||||
// there is fewer or equal replacement words
|
||||
// than there is already in the replaced range
|
||||
return false;
|
||||
}
|
||||
|
||||
// retrieve the part to rewrite but with the length
|
||||
// of the replacement part
|
||||
let original = query.iter().skip(range.start).take(words.len());
|
||||
|
||||
// check if the original query doesn't already contain
|
||||
// the replacement words
|
||||
!original
|
||||
.map(AsRef::as_ref)
|
||||
.eq(words.iter().map(AsRef::as_ref))
|
||||
}
|
||||
|
||||
type Origin = usize;
|
||||
type RealLength = usize;
|
||||
|
||||
struct FakeIntervalTree {
|
||||
intervals: Vec<(Range<usize>, (Origin, RealLength))>,
|
||||
}
|
||||
|
||||
impl FakeIntervalTree {
|
||||
fn new(mut intervals: Vec<(Range<usize>, (Origin, RealLength))>) -> FakeIntervalTree {
|
||||
intervals.sort_unstable_by_key(|(r, _)| (r.start, r.end));
|
||||
FakeIntervalTree { intervals }
|
||||
}
|
||||
|
||||
fn query(&self, point: usize) -> Option<(Range<usize>, (Origin, RealLength))> {
|
||||
let element = self.intervals.binary_search_by(|(r, _)| {
|
||||
if point >= r.start {
|
||||
if point < r.end {
|
||||
Equal
|
||||
} else {
|
||||
Less
|
||||
}
|
||||
} else {
|
||||
Greater
|
||||
}
|
||||
});
|
||||
|
||||
let n = match element {
|
||||
Ok(n) => n,
|
||||
Err(n) => n,
|
||||
};
|
||||
|
||||
match self.intervals.get(n) {
|
||||
Some((range, value)) if range.contains(&point) => Some((range.clone(), *value)),
|
||||
_otherwise => None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub struct QueryEnhancerBuilder<'a, S> {
|
||||
query: &'a [S],
|
||||
origins: Vec<usize>,
|
||||
real_to_origin: Vec<(Range<usize>, (Origin, RealLength))>,
|
||||
}
|
||||
|
||||
impl<S: AsRef<str>> QueryEnhancerBuilder<'_, S> {
|
||||
pub fn new(query: &[S]) -> QueryEnhancerBuilder<S> {
|
||||
// we initialize origins query indices based on their positions
|
||||
let origins: Vec<_> = (0..=query.len()).collect();
|
||||
let real_to_origin = origins.iter().map(|&o| (o..o + 1, (o, 1))).collect();
|
||||
|
||||
QueryEnhancerBuilder {
|
||||
query,
|
||||
origins,
|
||||
real_to_origin,
|
||||
}
|
||||
}
|
||||
|
||||
/// Update the final real to origin query indices mapping.
|
||||
///
|
||||
/// `range` is the original words range that this `replacement` words replace
|
||||
/// and `real` is the first real query index of these replacement words.
|
||||
pub fn declare<T>(&mut self, range: Range<usize>, real: usize, replacement: &[T])
|
||||
where
|
||||
T: AsRef<str>,
|
||||
{
|
||||
// check if the range of original words
|
||||
// can be rewritten with the replacement words
|
||||
if rewrite_range_with(self.query, range.clone(), replacement) {
|
||||
// this range can be replaced so we need to
|
||||
// modify the origins accordingly
|
||||
let offset = replacement.len() - range.len();
|
||||
|
||||
let previous_padding = self.origins[range.end - 1];
|
||||
let current_offset = (self.origins[range.end] - 1) - previous_padding;
|
||||
let diff = offset.saturating_sub(current_offset);
|
||||
self.origins[range.end] += diff;
|
||||
|
||||
for r in &mut self.origins[range.end + 1..] {
|
||||
*r += diff;
|
||||
}
|
||||
}
|
||||
|
||||
// we need to store the real number and origins relations
|
||||
// this way it will be possible to know by how many
|
||||
// we need to pad real query indices
|
||||
let real_range = real..real + replacement.len().max(range.len());
|
||||
let real_length = replacement.len();
|
||||
self.real_to_origin
|
||||
.push((real_range, (range.start, real_length)));
|
||||
}
|
||||
|
||||
pub fn build(self) -> QueryEnhancer {
|
||||
QueryEnhancer {
|
||||
origins: self.origins,
|
||||
real_to_origin: FakeIntervalTree::new(self.real_to_origin),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub struct QueryEnhancer {
|
||||
origins: Vec<usize>,
|
||||
real_to_origin: FakeIntervalTree,
|
||||
}
|
||||
|
||||
impl QueryEnhancer {
|
||||
/// Returns the query indices to use to replace this real query index.
|
||||
pub fn replacement(&self, real: u32) -> Range<u32> {
|
||||
let real = real as usize;
|
||||
|
||||
// query the fake interval tree with the real query index
|
||||
let (range, (origin, real_length)) = self
|
||||
.real_to_origin
|
||||
.query(real)
|
||||
.expect("real has never been declared");
|
||||
|
||||
// if `real` is the end bound of the range
|
||||
if (range.start + real_length - 1) == real {
|
||||
let mut count = range.len();
|
||||
let mut new_origin = origin;
|
||||
for (i, slice) in self.origins[new_origin..].windows(2).enumerate() {
|
||||
let len = slice[1] - slice[0];
|
||||
count = count.saturating_sub(len);
|
||||
if count == 0 {
|
||||
new_origin = origin + i;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
let n = real - range.start;
|
||||
let start = self.origins[origin];
|
||||
let end = self.origins[new_origin + 1];
|
||||
let remaining = (end - start) - n;
|
||||
|
||||
Range {
|
||||
start: (start + n) as u32,
|
||||
end: (start + n + remaining) as u32,
|
||||
}
|
||||
} else {
|
||||
// just return the origin along with
|
||||
// the real position of the word
|
||||
let n = real as usize - range.start;
|
||||
let origin = self.origins[origin];
|
||||
|
||||
Range {
|
||||
start: (origin + n) as u32,
|
||||
end: (origin + n + 1) as u32,
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn original_unmodified() {
|
||||
let query = ["new", "york", "city", "subway"];
|
||||
// 0 1 2 3
|
||||
let mut builder = QueryEnhancerBuilder::new(&query);
|
||||
|
||||
// new york = new york city
|
||||
builder.declare(0..2, 4, &["new", "york", "city"]);
|
||||
// ^ 4 5 6
|
||||
|
||||
let enhancer = builder.build();
|
||||
|
||||
assert_eq!(enhancer.replacement(0), 0..1); // new
|
||||
assert_eq!(enhancer.replacement(1), 1..2); // york
|
||||
assert_eq!(enhancer.replacement(2), 2..3); // city
|
||||
assert_eq!(enhancer.replacement(3), 3..4); // subway
|
||||
assert_eq!(enhancer.replacement(4), 0..1); // new
|
||||
assert_eq!(enhancer.replacement(5), 1..2); // york
|
||||
assert_eq!(enhancer.replacement(6), 2..3); // city
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn simple_growing() {
|
||||
let query = ["new", "york", "subway"];
|
||||
// 0 1 2
|
||||
let mut builder = QueryEnhancerBuilder::new(&query);
|
||||
|
||||
// new york = new york city
|
||||
builder.declare(0..2, 3, &["new", "york", "city"]);
|
||||
// ^ 3 4 5
|
||||
|
||||
let enhancer = builder.build();
|
||||
|
||||
assert_eq!(enhancer.replacement(0), 0..1); // new
|
||||
assert_eq!(enhancer.replacement(1), 1..3); // york
|
||||
assert_eq!(enhancer.replacement(2), 3..4); // subway
|
||||
assert_eq!(enhancer.replacement(3), 0..1); // new
|
||||
assert_eq!(enhancer.replacement(4), 1..2); // york
|
||||
assert_eq!(enhancer.replacement(5), 2..3); // city
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn same_place_growings() {
|
||||
let query = ["NY", "subway"];
|
||||
// 0 1
|
||||
let mut builder = QueryEnhancerBuilder::new(&query);
|
||||
|
||||
// NY = new york
|
||||
builder.declare(0..1, 2, &["new", "york"]);
|
||||
// ^ 2 3
|
||||
|
||||
// NY = new york city
|
||||
builder.declare(0..1, 4, &["new", "york", "city"]);
|
||||
// ^ 4 5 6
|
||||
|
||||
// NY = NYC
|
||||
builder.declare(0..1, 7, &["NYC"]);
|
||||
// ^ 7
|
||||
|
||||
// NY = new york city
|
||||
builder.declare(0..1, 8, &["new", "york", "city"]);
|
||||
// ^ 8 9 10
|
||||
|
||||
// subway = underground train
|
||||
builder.declare(1..2, 11, &["underground", "train"]);
|
||||
// ^ 11 12
|
||||
|
||||
let enhancer = builder.build();
|
||||
|
||||
assert_eq!(enhancer.replacement(0), 0..3); // NY
|
||||
assert_eq!(enhancer.replacement(1), 3..5); // subway
|
||||
assert_eq!(enhancer.replacement(2), 0..1); // new
|
||||
assert_eq!(enhancer.replacement(3), 1..3); // york
|
||||
assert_eq!(enhancer.replacement(4), 0..1); // new
|
||||
assert_eq!(enhancer.replacement(5), 1..2); // york
|
||||
assert_eq!(enhancer.replacement(6), 2..3); // city
|
||||
assert_eq!(enhancer.replacement(7), 0..3); // NYC
|
||||
assert_eq!(enhancer.replacement(8), 0..1); // new
|
||||
assert_eq!(enhancer.replacement(9), 1..2); // york
|
||||
assert_eq!(enhancer.replacement(10), 2..3); // city
|
||||
assert_eq!(enhancer.replacement(11), 3..4); // underground
|
||||
assert_eq!(enhancer.replacement(12), 4..5); // train
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn bigger_growing() {
|
||||
let query = ["NYC", "subway"];
|
||||
// 0 1
|
||||
let mut builder = QueryEnhancerBuilder::new(&query);
|
||||
|
||||
// NYC = new york city
|
||||
builder.declare(0..1, 2, &["new", "york", "city"]);
|
||||
// ^ 2 3 4
|
||||
|
||||
let enhancer = builder.build();
|
||||
|
||||
assert_eq!(enhancer.replacement(0), 0..3); // NYC
|
||||
assert_eq!(enhancer.replacement(1), 3..4); // subway
|
||||
assert_eq!(enhancer.replacement(2), 0..1); // new
|
||||
assert_eq!(enhancer.replacement(3), 1..2); // york
|
||||
assert_eq!(enhancer.replacement(4), 2..3); // city
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn middle_query_growing() {
|
||||
let query = ["great", "awesome", "NYC", "subway"];
|
||||
// 0 1 2 3
|
||||
let mut builder = QueryEnhancerBuilder::new(&query);
|
||||
|
||||
// NYC = new york city
|
||||
builder.declare(2..3, 4, &["new", "york", "city"]);
|
||||
// ^ 4 5 6
|
||||
|
||||
let enhancer = builder.build();
|
||||
|
||||
assert_eq!(enhancer.replacement(0), 0..1); // great
|
||||
assert_eq!(enhancer.replacement(1), 1..2); // awesome
|
||||
assert_eq!(enhancer.replacement(2), 2..5); // NYC
|
||||
assert_eq!(enhancer.replacement(3), 5..6); // subway
|
||||
assert_eq!(enhancer.replacement(4), 2..3); // new
|
||||
assert_eq!(enhancer.replacement(5), 3..4); // york
|
||||
assert_eq!(enhancer.replacement(6), 4..5); // city
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn end_query_growing() {
|
||||
let query = ["NYC", "subway"];
|
||||
// 0 1
|
||||
let mut builder = QueryEnhancerBuilder::new(&query);
|
||||
|
||||
// NYC = new york city
|
||||
builder.declare(1..2, 2, &["underground", "train"]);
|
||||
// ^ 2 3
|
||||
|
||||
let enhancer = builder.build();
|
||||
|
||||
assert_eq!(enhancer.replacement(0), 0..1); // NYC
|
||||
assert_eq!(enhancer.replacement(1), 1..3); // subway
|
||||
assert_eq!(enhancer.replacement(2), 1..2); // underground
|
||||
assert_eq!(enhancer.replacement(3), 2..3); // train
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn multiple_growings() {
|
||||
let query = ["great", "awesome", "NYC", "subway"];
|
||||
// 0 1 2 3
|
||||
let mut builder = QueryEnhancerBuilder::new(&query);
|
||||
|
||||
// NYC = new york city
|
||||
builder.declare(2..3, 4, &["new", "york", "city"]);
|
||||
// ^ 4 5 6
|
||||
|
||||
// subway = underground train
|
||||
builder.declare(3..4, 7, &["underground", "train"]);
|
||||
// ^ 7 8
|
||||
|
||||
let enhancer = builder.build();
|
||||
|
||||
assert_eq!(enhancer.replacement(0), 0..1); // great
|
||||
assert_eq!(enhancer.replacement(1), 1..2); // awesome
|
||||
assert_eq!(enhancer.replacement(2), 2..5); // NYC
|
||||
assert_eq!(enhancer.replacement(3), 5..7); // subway
|
||||
assert_eq!(enhancer.replacement(4), 2..3); // new
|
||||
assert_eq!(enhancer.replacement(5), 3..4); // york
|
||||
assert_eq!(enhancer.replacement(6), 4..5); // city
|
||||
assert_eq!(enhancer.replacement(7), 5..6); // underground
|
||||
assert_eq!(enhancer.replacement(8), 6..7); // train
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn multiple_probable_growings() {
|
||||
let query = ["great", "awesome", "NYC", "subway"];
|
||||
// 0 1 2 3
|
||||
let mut builder = QueryEnhancerBuilder::new(&query);
|
||||
|
||||
// NYC = new york city
|
||||
builder.declare(2..3, 4, &["new", "york", "city"]);
|
||||
// ^ 4 5 6
|
||||
|
||||
// subway = underground train
|
||||
builder.declare(3..4, 7, &["underground", "train"]);
|
||||
// ^ 7 8
|
||||
|
||||
// great awesome = good
|
||||
builder.declare(0..2, 9, &["good"]);
|
||||
// ^ 9
|
||||
|
||||
// awesome NYC = NY
|
||||
builder.declare(1..3, 10, &["NY"]);
|
||||
// ^^ 10
|
||||
|
||||
// NYC subway = metro
|
||||
builder.declare(2..4, 11, &["metro"]);
|
||||
// ^^ 11
|
||||
|
||||
let enhancer = builder.build();
|
||||
|
||||
assert_eq!(enhancer.replacement(0), 0..1); // great
|
||||
assert_eq!(enhancer.replacement(1), 1..2); // awesome
|
||||
assert_eq!(enhancer.replacement(2), 2..5); // NYC
|
||||
assert_eq!(enhancer.replacement(3), 5..7); // subway
|
||||
assert_eq!(enhancer.replacement(4), 2..3); // new
|
||||
assert_eq!(enhancer.replacement(5), 3..4); // york
|
||||
assert_eq!(enhancer.replacement(6), 4..5); // city
|
||||
assert_eq!(enhancer.replacement(7), 5..6); // underground
|
||||
assert_eq!(enhancer.replacement(8), 6..7); // train
|
||||
assert_eq!(enhancer.replacement(9), 0..2); // good
|
||||
assert_eq!(enhancer.replacement(10), 1..5); // NY
|
||||
assert_eq!(enhancer.replacement(11), 2..5); // metro
|
||||
}
|
||||
}
|
@ -1,6 +1,6 @@
|
||||
use std::cmp::Ordering;
|
||||
use crate::criterion::Criterion;
|
||||
use crate::RawDocument;
|
||||
use std::cmp::Ordering;
|
||||
|
||||
#[derive(Debug, Clone, Copy)]
|
||||
pub struct DocumentId;
|
||||
@ -10,7 +10,7 @@ impl Criterion for DocumentId {
|
||||
lhs.id.cmp(&rhs.id)
|
||||
}
|
||||
|
||||
fn name(&self) -> &'static str {
|
||||
fn name(&self) -> &str {
|
||||
"DocumentId"
|
||||
}
|
||||
}
|
||||
|
@ -1,16 +1,41 @@
|
||||
use std::cmp::Ordering;
|
||||
|
||||
use meilidb_schema::SchemaAttr;
|
||||
use sdset::Set;
|
||||
use slice_group_by::GroupBy;
|
||||
|
||||
use crate::criterion::Criterion;
|
||||
use crate::RawDocument;
|
||||
|
||||
#[inline]
|
||||
fn number_exact_matches(query_index: &[u32], is_exact: &[bool]) -> usize {
|
||||
fn number_exact_matches(
|
||||
query_index: &[u32],
|
||||
attribute: &[u16],
|
||||
is_exact: &[bool],
|
||||
fields_counts: &Set<(SchemaAttr, u64)>,
|
||||
) -> usize {
|
||||
let mut count = 0;
|
||||
let mut index = 0;
|
||||
|
||||
for group in query_index.linear_group() {
|
||||
let len = group.len();
|
||||
count += is_exact[index..index + len].contains(&true) as usize;
|
||||
|
||||
let mut found_exact = false;
|
||||
for (pos, _) in is_exact[index..index + len]
|
||||
.iter()
|
||||
.filter(|x| **x)
|
||||
.enumerate()
|
||||
{
|
||||
found_exact = true;
|
||||
if let Ok(pos) = fields_counts.binary_search_by_key(&attribute[pos], |(a, _)| a.0) {
|
||||
let (_, count) = fields_counts[pos];
|
||||
if count == 1 {
|
||||
return usize::max_value();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
count += found_exact as usize;
|
||||
index += len;
|
||||
}
|
||||
|
||||
@ -25,19 +50,25 @@ impl Criterion for Exact {
|
||||
let lhs = {
|
||||
let query_index = lhs.query_index();
|
||||
let is_exact = lhs.is_exact();
|
||||
number_exact_matches(query_index, is_exact)
|
||||
let attribute = lhs.attribute();
|
||||
let fields_counts = &lhs.fields_counts;
|
||||
|
||||
number_exact_matches(query_index, attribute, is_exact, fields_counts)
|
||||
};
|
||||
|
||||
let rhs = {
|
||||
let query_index = rhs.query_index();
|
||||
let is_exact = rhs.is_exact();
|
||||
number_exact_matches(query_index, is_exact)
|
||||
let attribute = rhs.attribute();
|
||||
let fields_counts = &rhs.fields_counts;
|
||||
|
||||
number_exact_matches(query_index, attribute, is_exact, fields_counts)
|
||||
};
|
||||
|
||||
lhs.cmp(&rhs).reverse()
|
||||
}
|
||||
|
||||
fn name(&self) -> &'static str {
|
||||
fn name(&self) -> &str {
|
||||
"Exact"
|
||||
}
|
||||
}
|
||||
@ -52,14 +83,51 @@ mod tests {
|
||||
// doc1: "souliereres rouge"
|
||||
#[test]
|
||||
fn easy_case() {
|
||||
let query_index0 = &[0];
|
||||
let is_exact0 = &[true];
|
||||
let doc0 = {
|
||||
let query_index = &[0];
|
||||
let attribute = &[0];
|
||||
let is_exact = &[true];
|
||||
let fields_counts = Set::new(&[(SchemaAttr(0), 2)]).unwrap();
|
||||
|
||||
let query_index1 = &[0];
|
||||
let is_exact1 = &[false];
|
||||
number_exact_matches(query_index, attribute, is_exact, fields_counts)
|
||||
};
|
||||
|
||||
let doc1 = {
|
||||
let query_index = &[0];
|
||||
let attribute = &[0];
|
||||
let is_exact = &[false];
|
||||
let fields_counts = Set::new(&[(SchemaAttr(0), 2)]).unwrap();
|
||||
|
||||
number_exact_matches(query_index, attribute, is_exact, fields_counts)
|
||||
};
|
||||
|
||||
assert_eq!(doc0.cmp(&doc1).reverse(), Ordering::Less);
|
||||
}
|
||||
|
||||
// typing: "soulier"
|
||||
//
|
||||
// doc0: { 0. "soulier" }
|
||||
// doc1: { 0. "soulier bleu et blanc" }
|
||||
#[test]
|
||||
fn basic() {
|
||||
let doc0 = {
|
||||
let query_index = &[0];
|
||||
let attribute = &[0];
|
||||
let is_exact = &[true];
|
||||
let fields_counts = Set::new(&[(SchemaAttr(0), 1)]).unwrap();
|
||||
|
||||
number_exact_matches(query_index, attribute, is_exact, fields_counts)
|
||||
};
|
||||
|
||||
let doc1 = {
|
||||
let query_index = &[0];
|
||||
let attribute = &[0];
|
||||
let is_exact = &[true];
|
||||
let fields_counts = Set::new(&[(SchemaAttr(0), 4)]).unwrap();
|
||||
|
||||
number_exact_matches(query_index, attribute, is_exact, fields_counts)
|
||||
};
|
||||
|
||||
let doc0 = number_exact_matches(query_index0, is_exact0);
|
||||
let doc1 = number_exact_matches(query_index1, is_exact1);
|
||||
assert_eq!(doc0.cmp(&doc1).reverse(), Ordering::Less);
|
||||
}
|
||||
}
|
||||
|
@ -1,28 +1,26 @@
|
||||
mod sum_of_typos;
|
||||
mod document_id;
|
||||
mod exact;
|
||||
mod number_of_words;
|
||||
mod words_proximity;
|
||||
mod sort_by_attr;
|
||||
mod sum_of_typos;
|
||||
mod sum_of_words_attribute;
|
||||
mod sum_of_words_position;
|
||||
mod exact;
|
||||
mod document_id;
|
||||
mod words_proximity;
|
||||
|
||||
use std::cmp::Ordering;
|
||||
use crate::RawDocument;
|
||||
use std::cmp::Ordering;
|
||||
|
||||
pub use self::{
|
||||
sum_of_typos::SumOfTypos,
|
||||
number_of_words::NumberOfWords,
|
||||
document_id::DocumentId, exact::Exact, number_of_words::NumberOfWords,
|
||||
sort_by_attr::SortByAttr, sum_of_typos::SumOfTypos,
|
||||
sum_of_words_attribute::SumOfWordsAttribute, sum_of_words_position::SumOfWordsPosition,
|
||||
words_proximity::WordsProximity,
|
||||
sum_of_words_attribute::SumOfWordsAttribute,
|
||||
sum_of_words_position::SumOfWordsPosition,
|
||||
exact::Exact,
|
||||
document_id::DocumentId,
|
||||
};
|
||||
|
||||
pub trait Criterion: Send + Sync {
|
||||
fn evaluate(&self, lhs: &RawDocument, rhs: &RawDocument) -> Ordering;
|
||||
|
||||
fn name(&self) -> &'static str;
|
||||
fn name(&self) -> &str;
|
||||
|
||||
#[inline]
|
||||
fn eq(&self, lhs: &RawDocument, rhs: &RawDocument) -> bool {
|
||||
@ -35,7 +33,7 @@ impl<'a, T: Criterion + ?Sized + Send + Sync> Criterion for &'a T {
|
||||
(**self).evaluate(lhs, rhs)
|
||||
}
|
||||
|
||||
fn name(&self) -> &'static str {
|
||||
fn name(&self) -> &str {
|
||||
(**self).name()
|
||||
}
|
||||
|
||||
@ -49,7 +47,7 @@ impl<T: Criterion + ?Sized> Criterion for Box<T> {
|
||||
(**self).evaluate(lhs, rhs)
|
||||
}
|
||||
|
||||
fn name(&self) -> &'static str {
|
||||
fn name(&self) -> &str {
|
||||
(**self).name()
|
||||
}
|
||||
|
||||
@ -60,17 +58,18 @@ impl<T: Criterion + ?Sized> Criterion for Box<T> {
|
||||
|
||||
#[derive(Default)]
|
||||
pub struct CriteriaBuilder<'a> {
|
||||
inner: Vec<Box<dyn Criterion + 'a>>
|
||||
inner: Vec<Box<dyn Criterion + 'a>>,
|
||||
}
|
||||
|
||||
impl<'a> CriteriaBuilder<'a>
|
||||
{
|
||||
impl<'a> CriteriaBuilder<'a> {
|
||||
pub fn new() -> CriteriaBuilder<'a> {
|
||||
CriteriaBuilder { inner: Vec::new() }
|
||||
}
|
||||
|
||||
pub fn with_capacity(capacity: usize) -> CriteriaBuilder<'a> {
|
||||
CriteriaBuilder { inner: Vec::with_capacity(capacity) }
|
||||
CriteriaBuilder {
|
||||
inner: Vec::with_capacity(capacity),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn reserve(&mut self, additional: usize) {
|
||||
@ -78,14 +77,16 @@ impl<'a> CriteriaBuilder<'a>
|
||||
}
|
||||
|
||||
pub fn add<C: 'a>(mut self, criterion: C) -> CriteriaBuilder<'a>
|
||||
where C: Criterion,
|
||||
where
|
||||
C: Criterion,
|
||||
{
|
||||
self.push(criterion);
|
||||
self
|
||||
}
|
||||
|
||||
pub fn push<C: 'a>(&mut self, criterion: C)
|
||||
where C: Criterion,
|
||||
where
|
||||
C: Criterion,
|
||||
{
|
||||
self.inner.push(Box::new(criterion));
|
||||
}
|
||||
|
@ -1,7 +1,7 @@
|
||||
use std::cmp::Ordering;
|
||||
use slice_group_by::GroupBy;
|
||||
use crate::criterion::Criterion;
|
||||
use crate::RawDocument;
|
||||
use slice_group_by::GroupBy;
|
||||
use std::cmp::Ordering;
|
||||
|
||||
#[inline]
|
||||
fn number_of_query_words(query_index: &[u32]) -> usize {
|
||||
@ -25,7 +25,7 @@ impl Criterion for NumberOfWords {
|
||||
lhs.cmp(&rhs).reverse()
|
||||
}
|
||||
|
||||
fn name(&self) -> &'static str {
|
||||
fn name(&self) -> &str {
|
||||
"NumberOfWords"
|
||||
}
|
||||
}
|
||||
|
@ -2,8 +2,8 @@ use std::cmp::Ordering;
|
||||
use std::error::Error;
|
||||
use std::fmt;
|
||||
|
||||
use meilidb_core::{criterion::Criterion, RawDocument};
|
||||
use meilidb_data::RankedMap;
|
||||
use crate::criterion::Criterion;
|
||||
use crate::{RankedMap, RawDocument};
|
||||
use meilidb_schema::{Schema, SchemaAttr};
|
||||
|
||||
/// An helper struct that permit to sort documents by
|
||||
@ -51,8 +51,7 @@ impl<'a> SortByAttr<'a> {
|
||||
ranked_map: &'a RankedMap,
|
||||
schema: &Schema,
|
||||
attr_name: &str,
|
||||
) -> Result<SortByAttr<'a>, SortByAttrError>
|
||||
{
|
||||
) -> Result<SortByAttr<'a>, SortByAttrError> {
|
||||
SortByAttr::new(ranked_map, schema, attr_name, false)
|
||||
}
|
||||
|
||||
@ -60,8 +59,7 @@ impl<'a> SortByAttr<'a> {
|
||||
ranked_map: &'a RankedMap,
|
||||
schema: &Schema,
|
||||
attr_name: &str,
|
||||
) -> Result<SortByAttr<'a>, SortByAttrError>
|
||||
{
|
||||
) -> Result<SortByAttr<'a>, SortByAttrError> {
|
||||
SortByAttr::new(ranked_map, schema, attr_name, true)
|
||||
}
|
||||
|
||||
@ -70,8 +68,7 @@ impl<'a> SortByAttr<'a> {
|
||||
schema: &Schema,
|
||||
attr_name: &str,
|
||||
reversed: bool,
|
||||
) -> Result<SortByAttr<'a>, SortByAttrError>
|
||||
{
|
||||
) -> Result<SortByAttr<'a>, SortByAttrError> {
|
||||
let attr = match schema.attribute(attr_name) {
|
||||
Some(attr) => attr,
|
||||
None => return Err(SortByAttrError::AttributeNotFound),
|
||||
@ -81,7 +78,11 @@ impl<'a> SortByAttr<'a> {
|
||||
return Err(SortByAttrError::AttributeNotRegisteredForRanking);
|
||||
}
|
||||
|
||||
Ok(SortByAttr { ranked_map, attr, reversed })
|
||||
Ok(SortByAttr {
|
||||
ranked_map,
|
||||
attr,
|
||||
reversed,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
@ -93,15 +94,19 @@ impl<'a> Criterion for SortByAttr<'a> {
|
||||
match (lhs, rhs) {
|
||||
(Some(lhs), Some(rhs)) => {
|
||||
let order = lhs.cmp(&rhs);
|
||||
if self.reversed { order.reverse() } else { order }
|
||||
},
|
||||
(None, Some(_)) => Ordering::Greater,
|
||||
(Some(_), None) => Ordering::Less,
|
||||
(None, None) => Ordering::Equal,
|
||||
if self.reversed {
|
||||
order.reverse()
|
||||
} else {
|
||||
order
|
||||
}
|
||||
}
|
||||
(None, Some(_)) => Ordering::Greater,
|
||||
(Some(_), None) => Ordering::Less,
|
||||
(None, None) => Ordering::Equal,
|
||||
}
|
||||
}
|
||||
|
||||
fn name(&self) -> &'static str {
|
||||
fn name(&self) -> &str {
|
||||
"SortByAttr"
|
||||
}
|
||||
}
|
||||
@ -122,4 +127,4 @@ impl fmt::Display for SortByAttrError {
|
||||
}
|
||||
}
|
||||
|
||||
impl Error for SortByAttrError { }
|
||||
impl Error for SortByAttrError {}
|
@ -11,10 +11,10 @@ use crate::RawDocument;
|
||||
#[inline]
|
||||
fn custom_log10(n: u8) -> f32 {
|
||||
match n {
|
||||
0 => 0.0, // log(1)
|
||||
1 => 0.30102, // log(2)
|
||||
2 => 0.47712, // log(3)
|
||||
3 => 0.60205, // log(4)
|
||||
0 => 0.0, // log(1)
|
||||
1 => 0.30102, // log(2)
|
||||
2 => 0.47712, // log(3)
|
||||
3 => 0.60205, // log(4)
|
||||
_ => panic!("invalid number"),
|
||||
}
|
||||
}
|
||||
@ -54,7 +54,7 @@ impl Criterion for SumOfTypos {
|
||||
lhs.cmp(&rhs).reverse()
|
||||
}
|
||||
|
||||
fn name(&self) -> &'static str {
|
||||
fn name(&self) -> &str {
|
||||
"SumOfTypos"
|
||||
}
|
||||
}
|
||||
|
@ -1,7 +1,7 @@
|
||||
use std::cmp::Ordering;
|
||||
use slice_group_by::GroupBy;
|
||||
use crate::criterion::Criterion;
|
||||
use crate::RawDocument;
|
||||
use slice_group_by::GroupBy;
|
||||
use std::cmp::Ordering;
|
||||
|
||||
#[inline]
|
||||
fn sum_matches_attributes(query_index: &[u32], attribute: &[u16]) -> usize {
|
||||
@ -36,7 +36,7 @@ impl Criterion for SumOfWordsAttribute {
|
||||
lhs.cmp(&rhs)
|
||||
}
|
||||
|
||||
fn name(&self) -> &'static str {
|
||||
fn name(&self) -> &str {
|
||||
"SumOfWordsAttribute"
|
||||
}
|
||||
}
|
||||
|
@ -1,7 +1,7 @@
|
||||
use std::cmp::Ordering;
|
||||
use slice_group_by::GroupBy;
|
||||
use crate::criterion::Criterion;
|
||||
use crate::RawDocument;
|
||||
use slice_group_by::GroupBy;
|
||||
use std::cmp::Ordering;
|
||||
|
||||
#[inline]
|
||||
fn sum_matches_attribute_index(query_index: &[u32], word_index: &[u16]) -> usize {
|
||||
@ -36,7 +36,7 @@ impl Criterion for SumOfWordsPosition {
|
||||
lhs.cmp(&rhs)
|
||||
}
|
||||
|
||||
fn name(&self) -> &'static str {
|
||||
fn name(&self) -> &str {
|
||||
"SumOfWordsPosition"
|
||||
}
|
||||
}
|
||||
|
@ -1,7 +1,7 @@
|
||||
use std::cmp::{self, Ordering};
|
||||
use slice_group_by::GroupBy;
|
||||
use crate::criterion::Criterion;
|
||||
use crate::RawDocument;
|
||||
use slice_group_by::GroupBy;
|
||||
use std::cmp::{self, Ordering};
|
||||
|
||||
const MAX_DISTANCE: u16 = 8;
|
||||
|
||||
@ -19,7 +19,9 @@ fn index_proximity(lhs: u16, rhs: u16) -> u16 {
|
||||
}
|
||||
|
||||
fn attribute_proximity((lattr, lwi): (u16, u16), (rattr, rwi): (u16, u16)) -> u16 {
|
||||
if lattr != rattr { return MAX_DISTANCE }
|
||||
if lattr != rattr {
|
||||
return MAX_DISTANCE;
|
||||
}
|
||||
index_proximity(lwi, rwi)
|
||||
}
|
||||
|
||||
@ -42,15 +44,18 @@ fn matches_proximity(
|
||||
distance: &[u8],
|
||||
attribute: &[u16],
|
||||
word_index: &[u16],
|
||||
) -> u16
|
||||
{
|
||||
) -> u16 {
|
||||
let mut query_index_groups = query_index.linear_group();
|
||||
let mut proximity = 0;
|
||||
let mut index = 0;
|
||||
|
||||
let get_attr_wi = |index: usize, group_len: usize| {
|
||||
// retrieve the first distance group (with the lowest values)
|
||||
let len = distance[index..index + group_len].linear_group().next().unwrap().len();
|
||||
let len = distance[index..index + group_len]
|
||||
.linear_group()
|
||||
.next()
|
||||
.unwrap()
|
||||
.len();
|
||||
|
||||
let rattr = &attribute[index..index + len];
|
||||
let rwi = &word_index[index..index + len];
|
||||
@ -99,7 +104,7 @@ impl Criterion for WordsProximity {
|
||||
lhs.cmp(&rhs)
|
||||
}
|
||||
|
||||
fn name(&self) -> &'static str {
|
||||
fn name(&self) -> &str {
|
||||
"WordsProximity"
|
||||
}
|
||||
}
|
||||
@ -110,7 +115,6 @@ mod tests {
|
||||
|
||||
#[test]
|
||||
fn three_different_attributes() {
|
||||
|
||||
// "soup" "of the" "the day"
|
||||
//
|
||||
// { id: 0, attr: 0, attr_index: 0 }
|
||||
@ -120,19 +124,21 @@ mod tests {
|
||||
// { id: 3, attr: 3, attr_index: 1 }
|
||||
|
||||
let query_index = &[0, 1, 2, 2, 3];
|
||||
let distance = &[0, 0, 0, 0, 0];
|
||||
let attribute = &[0, 1, 1, 2, 3];
|
||||
let word_index = &[0, 0, 1, 0, 1];
|
||||
let distance = &[0, 0, 0, 0, 0];
|
||||
let attribute = &[0, 1, 1, 2, 3];
|
||||
let word_index = &[0, 0, 1, 0, 1];
|
||||
|
||||
// soup -> of = 8
|
||||
// + of -> the = 1
|
||||
// + the -> day = 8 (not 1)
|
||||
assert_eq!(matches_proximity(query_index, distance, attribute, word_index), 17);
|
||||
assert_eq!(
|
||||
matches_proximity(query_index, distance, attribute, word_index),
|
||||
17
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn two_different_attributes() {
|
||||
|
||||
// "soup day" "soup of the day"
|
||||
//
|
||||
// { id: 0, attr: 0, attr_index: 0 }
|
||||
@ -143,13 +149,16 @@ mod tests {
|
||||
// { id: 3, attr: 1, attr_index: 3 }
|
||||
|
||||
let query_index = &[0, 0, 1, 2, 3, 3];
|
||||
let distance = &[0, 0, 0, 0, 0, 0];
|
||||
let attribute = &[0, 1, 1, 1, 0, 1];
|
||||
let word_index = &[0, 0, 1, 2, 1, 3];
|
||||
let distance = &[0, 0, 0, 0, 0, 0];
|
||||
let attribute = &[0, 1, 1, 1, 0, 1];
|
||||
let word_index = &[0, 0, 1, 2, 1, 3];
|
||||
|
||||
// soup -> of = 1
|
||||
// + of -> the = 1
|
||||
// + the -> day = 1
|
||||
assert_eq!(matches_proximity(query_index, distance, attribute, word_index), 3);
|
||||
assert_eq!(
|
||||
matches_proximity(query_index, distance, attribute, word_index),
|
||||
3
|
||||
);
|
||||
}
|
||||
}
|
||||
|
205
meilidb-core/src/database.rs
Normal file
205
meilidb-core/src/database.rs
Normal file
@ -0,0 +1,205 @@
|
||||
use std::collections::hash_map::{Entry, HashMap};
|
||||
use std::fs::File;
|
||||
use std::path::Path;
|
||||
use std::sync::{Arc, RwLock};
|
||||
use std::{fs, thread};
|
||||
|
||||
use crossbeam_channel::Receiver;
|
||||
use log::{debug, error};
|
||||
use zlmdb::types::{Str, Unit};
|
||||
use zlmdb::{CompactionOption, Result as ZResult};
|
||||
|
||||
use crate::{store, update, Index, MResult};
|
||||
|
||||
pub type BoxUpdateFn = Box<dyn Fn(update::UpdateResult) + Send + Sync + 'static>;
|
||||
type ArcSwapFn = arc_swap::ArcSwapOption<BoxUpdateFn>;
|
||||
|
||||
pub struct Database {
|
||||
pub env: zlmdb::Env,
|
||||
common_store: zlmdb::DynDatabase,
|
||||
indexes_store: zlmdb::Database<Str, Unit>,
|
||||
indexes: RwLock<HashMap<String, (Index, Arc<ArcSwapFn>, thread::JoinHandle<()>)>>,
|
||||
}
|
||||
|
||||
fn update_awaiter(
|
||||
receiver: Receiver<()>,
|
||||
env: zlmdb::Env,
|
||||
update_fn: Arc<ArcSwapFn>,
|
||||
index: Index,
|
||||
) {
|
||||
for () in receiver {
|
||||
// consume all updates in order (oldest first)
|
||||
loop {
|
||||
let mut writer = match env.write_txn() {
|
||||
Ok(writer) => writer,
|
||||
Err(e) => {
|
||||
error!("LMDB writer transaction begin failed: {}", e);
|
||||
break;
|
||||
}
|
||||
};
|
||||
|
||||
match update::update_task(&mut writer, index.clone()) {
|
||||
Ok(Some(status)) => {
|
||||
if let Err(e) = writer.commit() {
|
||||
error!("update transaction failed: {}", e)
|
||||
}
|
||||
|
||||
if let Some(ref callback) = *update_fn.load() {
|
||||
(callback)(status);
|
||||
}
|
||||
}
|
||||
// no more updates to handle for now
|
||||
Ok(None) => {
|
||||
debug!("no more updates");
|
||||
writer.abort();
|
||||
break;
|
||||
}
|
||||
Err(e) => {
|
||||
error!("update task failed: {}", e);
|
||||
writer.abort()
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Database {
|
||||
pub fn open_or_create(path: impl AsRef<Path>) -> MResult<Database> {
|
||||
fs::create_dir_all(path.as_ref())?;
|
||||
|
||||
let env = zlmdb::EnvOpenOptions::new()
|
||||
.map_size(10 * 1024 * 1024 * 1024) // 10GB
|
||||
.max_dbs(3000)
|
||||
.open(path)?;
|
||||
|
||||
let common_store = env.create_dyn_database(Some("common"))?;
|
||||
let indexes_store = env.create_database::<Str, Unit>(Some("indexes"))?;
|
||||
|
||||
// list all indexes that needs to be opened
|
||||
let mut must_open = Vec::new();
|
||||
let reader = env.read_txn()?;
|
||||
for result in indexes_store.iter(&reader)? {
|
||||
let (index_name, _) = result?;
|
||||
must_open.push(index_name.to_owned());
|
||||
}
|
||||
|
||||
reader.abort();
|
||||
|
||||
// open the previously aggregated indexes
|
||||
let mut indexes = HashMap::new();
|
||||
for index_name in must_open {
|
||||
let (sender, receiver) = crossbeam_channel::bounded(100);
|
||||
let index = match store::open(&env, &index_name, sender.clone())? {
|
||||
Some(index) => index,
|
||||
None => {
|
||||
log::warn!(
|
||||
"the index {} doesn't exist or has not all the databases",
|
||||
index_name
|
||||
);
|
||||
continue;
|
||||
}
|
||||
};
|
||||
let update_fn = Arc::new(ArcSwapFn::empty());
|
||||
|
||||
let env_clone = env.clone();
|
||||
let index_clone = index.clone();
|
||||
let update_fn_clone = update_fn.clone();
|
||||
|
||||
let handle = thread::spawn(move || {
|
||||
update_awaiter(receiver, env_clone, update_fn_clone, index_clone)
|
||||
});
|
||||
|
||||
// send an update notification to make sure that
|
||||
// possible pre-boot updates are consumed
|
||||
sender.send(()).unwrap();
|
||||
|
||||
let result = indexes.insert(index_name, (index, update_fn, handle));
|
||||
assert!(
|
||||
result.is_none(),
|
||||
"The index should not have been already open"
|
||||
);
|
||||
}
|
||||
|
||||
Ok(Database {
|
||||
env,
|
||||
common_store,
|
||||
indexes_store,
|
||||
indexes: RwLock::new(indexes),
|
||||
})
|
||||
}
|
||||
|
||||
pub fn open_index(&self, name: impl AsRef<str>) -> Option<Index> {
|
||||
let indexes_lock = self.indexes.read().unwrap();
|
||||
match indexes_lock.get(name.as_ref()) {
|
||||
Some((index, ..)) => Some(index.clone()),
|
||||
None => None,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn create_index(&self, name: impl AsRef<str>) -> MResult<Index> {
|
||||
let name = name.as_ref();
|
||||
let mut indexes_lock = self.indexes.write().unwrap();
|
||||
|
||||
match indexes_lock.entry(name.to_owned()) {
|
||||
Entry::Occupied(_) => Err(crate::Error::IndexAlreadyExists),
|
||||
Entry::Vacant(entry) => {
|
||||
let (sender, receiver) = crossbeam_channel::bounded(100);
|
||||
let index = store::create(&self.env, name, sender)?;
|
||||
|
||||
let mut writer = self.env.write_txn()?;
|
||||
self.indexes_store.put(&mut writer, name, &())?;
|
||||
|
||||
let env_clone = self.env.clone();
|
||||
let index_clone = index.clone();
|
||||
|
||||
let no_update_fn = Arc::new(ArcSwapFn::empty());
|
||||
let no_update_fn_clone = no_update_fn.clone();
|
||||
|
||||
let handle = thread::spawn(move || {
|
||||
update_awaiter(receiver, env_clone, no_update_fn_clone, index_clone)
|
||||
});
|
||||
|
||||
writer.commit()?;
|
||||
entry.insert((index.clone(), no_update_fn, handle));
|
||||
|
||||
Ok(index)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub fn set_update_callback(&self, name: impl AsRef<str>, update_fn: BoxUpdateFn) -> bool {
|
||||
let indexes_lock = self.indexes.read().unwrap();
|
||||
match indexes_lock.get(name.as_ref()) {
|
||||
Some((_, current_update_fn, _)) => {
|
||||
let update_fn = Some(Arc::new(update_fn));
|
||||
current_update_fn.swap(update_fn);
|
||||
true
|
||||
}
|
||||
None => false,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn unset_update_callback(&self, name: impl AsRef<str>) -> bool {
|
||||
let indexes_lock = self.indexes.read().unwrap();
|
||||
match indexes_lock.get(name.as_ref()) {
|
||||
Some((_, current_update_fn, _)) => {
|
||||
current_update_fn.swap(None);
|
||||
true
|
||||
}
|
||||
None => false,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn copy_and_compact_to_path<P: AsRef<Path>>(&self, path: P) -> ZResult<File> {
|
||||
self.env.copy_to_path(path, CompactionOption::Enabled)
|
||||
}
|
||||
|
||||
pub fn indexes_names(&self) -> MResult<Vec<String>> {
|
||||
let indexes = self.indexes.read().unwrap();
|
||||
Ok(indexes.keys().cloned().collect())
|
||||
}
|
||||
|
||||
pub fn common_store(&self) -> zlmdb::DynDatabase {
|
||||
self.common_store
|
||||
}
|
||||
}
|
@ -1,5 +1,5 @@
|
||||
use std::hash::Hash;
|
||||
use hashbrown::HashMap;
|
||||
use std::hash::Hash;
|
||||
|
||||
pub struct DistinctMap<K> {
|
||||
inner: HashMap<K, usize>,
|
||||
|
107
meilidb-core/src/error.rs
Normal file
107
meilidb-core/src/error.rs
Normal file
@ -0,0 +1,107 @@
|
||||
use crate::serde::{DeserializerError, SerializerError};
|
||||
use serde_json::Error as SerdeJsonError;
|
||||
use std::{error, fmt, io};
|
||||
|
||||
pub type MResult<T> = Result<T, Error>;
|
||||
|
||||
#[derive(Debug)]
|
||||
pub enum Error {
|
||||
Io(io::Error),
|
||||
IndexAlreadyExists,
|
||||
SchemaDiffer,
|
||||
SchemaMissing,
|
||||
WordIndexMissing,
|
||||
MissingDocumentId,
|
||||
Zlmdb(zlmdb::Error),
|
||||
Fst(fst::Error),
|
||||
SerdeJson(SerdeJsonError),
|
||||
Bincode(bincode::Error),
|
||||
Serializer(SerializerError),
|
||||
Deserializer(DeserializerError),
|
||||
UnsupportedOperation(UnsupportedOperation),
|
||||
}
|
||||
|
||||
impl From<io::Error> for Error {
|
||||
fn from(error: io::Error) -> Error {
|
||||
Error::Io(error)
|
||||
}
|
||||
}
|
||||
|
||||
impl From<zlmdb::Error> for Error {
|
||||
fn from(error: zlmdb::Error) -> Error {
|
||||
Error::Zlmdb(error)
|
||||
}
|
||||
}
|
||||
|
||||
impl From<fst::Error> for Error {
|
||||
fn from(error: fst::Error) -> Error {
|
||||
Error::Fst(error)
|
||||
}
|
||||
}
|
||||
|
||||
impl From<SerdeJsonError> for Error {
|
||||
fn from(error: SerdeJsonError) -> Error {
|
||||
Error::SerdeJson(error)
|
||||
}
|
||||
}
|
||||
|
||||
impl From<bincode::Error> for Error {
|
||||
fn from(error: bincode::Error) -> Error {
|
||||
Error::Bincode(error)
|
||||
}
|
||||
}
|
||||
|
||||
impl From<SerializerError> for Error {
|
||||
fn from(error: SerializerError) -> Error {
|
||||
Error::Serializer(error)
|
||||
}
|
||||
}
|
||||
|
||||
impl From<DeserializerError> for Error {
|
||||
fn from(error: DeserializerError) -> Error {
|
||||
Error::Deserializer(error)
|
||||
}
|
||||
}
|
||||
|
||||
impl From<UnsupportedOperation> for Error {
|
||||
fn from(op: UnsupportedOperation) -> Error {
|
||||
Error::UnsupportedOperation(op)
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Display for Error {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
use self::Error::*;
|
||||
match self {
|
||||
Io(e) => write!(f, "{}", e),
|
||||
IndexAlreadyExists => write!(f, "index already exists"),
|
||||
SchemaDiffer => write!(f, "schemas differ"),
|
||||
SchemaMissing => write!(f, "this index does not have a schema"),
|
||||
WordIndexMissing => write!(f, "this index does not have a word index"),
|
||||
MissingDocumentId => write!(f, "document id is missing"),
|
||||
Zlmdb(e) => write!(f, "zlmdb error; {}", e),
|
||||
Fst(e) => write!(f, "fst error; {}", e),
|
||||
SerdeJson(e) => write!(f, "serde json error; {}", e),
|
||||
Bincode(e) => write!(f, "bincode error; {}", e),
|
||||
Serializer(e) => write!(f, "serializer error; {}", e),
|
||||
Deserializer(e) => write!(f, "deserializer error; {}", e),
|
||||
UnsupportedOperation(op) => write!(f, "unsupported operation; {}", op),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl error::Error for Error {}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub enum UnsupportedOperation {
|
||||
SchemaAlreadyExists,
|
||||
}
|
||||
|
||||
impl fmt::Display for UnsupportedOperation {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
use self::UnsupportedOperation::*;
|
||||
match self {
|
||||
SchemaAlreadyExists => write!(f, "Cannot update index which already have a schema"),
|
||||
}
|
||||
}
|
||||
}
|
@ -1,33 +1,51 @@
|
||||
#![feature(checked_duration_since)]
|
||||
|
||||
#[cfg(test)]
|
||||
#[macro_use] extern crate assert_matches;
|
||||
#[macro_use]
|
||||
extern crate assert_matches;
|
||||
|
||||
mod automaton;
|
||||
mod distinct_map;
|
||||
mod query_builder;
|
||||
mod query_enhancer;
|
||||
mod raw_document;
|
||||
mod reordered_attrs;
|
||||
mod store;
|
||||
pub mod criterion;
|
||||
mod database;
|
||||
mod distinct_map;
|
||||
mod error;
|
||||
mod number;
|
||||
mod query_builder;
|
||||
mod ranked_map;
|
||||
mod raw_document;
|
||||
pub mod raw_indexer;
|
||||
mod reordered_attrs;
|
||||
pub mod serde;
|
||||
pub mod store;
|
||||
mod update;
|
||||
|
||||
use serde::{Serialize, Deserialize};
|
||||
use zerocopy::{AsBytes, FromBytes};
|
||||
|
||||
use self::raw_document::raw_documents_from;
|
||||
|
||||
pub use self::query_builder::{QueryBuilder, DistinctQueryBuilder, normalize_str};
|
||||
pub use self::database::{BoxUpdateFn, Database};
|
||||
pub use self::error::{Error, MResult};
|
||||
pub use self::number::{Number, ParseNumberError};
|
||||
pub use self::ranked_map::RankedMap;
|
||||
pub use self::raw_document::RawDocument;
|
||||
pub use self::store::Store;
|
||||
pub use self::store::Index;
|
||||
pub use self::update::{UpdateResult, UpdateStatus, UpdateType};
|
||||
|
||||
use ::serde::{Deserialize, Serialize};
|
||||
use zerocopy::{AsBytes, FromBytes};
|
||||
|
||||
/// Represent an internally generated document unique identifier.
|
||||
///
|
||||
/// It is used to inform the database the document you want to deserialize.
|
||||
/// Helpful for custom ranking.
|
||||
#[derive(Debug, Copy, Clone, Eq, PartialEq, PartialOrd, Ord, Hash)]
|
||||
#[derive(Serialize, Deserialize)]
|
||||
#[derive(AsBytes, FromBytes)]
|
||||
#[derive(
|
||||
Debug,
|
||||
Copy,
|
||||
Clone,
|
||||
Eq,
|
||||
PartialEq,
|
||||
PartialOrd,
|
||||
Ord,
|
||||
Hash,
|
||||
Serialize,
|
||||
Deserialize,
|
||||
AsBytes,
|
||||
FromBytes,
|
||||
)]
|
||||
#[repr(C)]
|
||||
pub struct DocumentId(pub u64);
|
||||
|
||||
@ -36,8 +54,7 @@ pub struct DocumentId(pub u64);
|
||||
///
|
||||
/// This is stored in the map, generated at index time,
|
||||
/// extracted and interpreted at search time.
|
||||
#[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
|
||||
#[derive(AsBytes, FromBytes)]
|
||||
#[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash, AsBytes, FromBytes)]
|
||||
#[repr(C)]
|
||||
pub struct DocIndex {
|
||||
/// The document identifier where the word was found.
|
||||
@ -103,7 +120,10 @@ pub struct Document {
|
||||
impl Document {
|
||||
#[cfg(not(test))]
|
||||
fn from_raw(raw: RawDocument) -> Document {
|
||||
Document { id: raw.id, highlights: raw.highlights }
|
||||
Document {
|
||||
id: raw.id,
|
||||
highlights: raw.highlights,
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
@ -128,7 +148,11 @@ impl Document {
|
||||
matches.push(match_);
|
||||
}
|
||||
|
||||
Document { id: raw.id, matches, highlights: raw.highlights }
|
||||
Document {
|
||||
id: raw.id,
|
||||
matches,
|
||||
highlights: raw.highlights,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1,12 +1,11 @@
|
||||
use std::num::{ParseIntError, ParseFloatError};
|
||||
use std::str::FromStr;
|
||||
use std::fmt;
|
||||
use std::num::{ParseFloatError, ParseIntError};
|
||||
use std::str::FromStr;
|
||||
|
||||
use ordered_float::OrderedFloat;
|
||||
use serde::{Serialize, Deserialize};
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
#[derive(Serialize, Deserialize)]
|
||||
#[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
|
||||
#[derive(Serialize, Deserialize, Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
|
||||
pub enum Number {
|
||||
Unsigned(u64),
|
||||
Signed(i64),
|
||||
@ -32,7 +31,11 @@ impl FromStr for Number {
|
||||
Err(error) => error,
|
||||
};
|
||||
|
||||
Err(ParseNumberError { uint_error, int_error, float_error })
|
||||
Err(ParseNumberError {
|
||||
uint_error,
|
||||
int_error,
|
||||
float_error,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
@ -46,10 +49,17 @@ pub struct ParseNumberError {
|
||||
impl fmt::Display for ParseNumberError {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
if self.uint_error == self.int_error {
|
||||
write!(f, "can not parse number: {}, {}", self.uint_error, self.float_error)
|
||||
write!(
|
||||
f,
|
||||
"can not parse number: {}, {}",
|
||||
self.uint_error, self.float_error
|
||||
)
|
||||
} else {
|
||||
write!(f, "can not parse number: {}, {}, {}",
|
||||
self.uint_error, self.int_error, self.float_error)
|
||||
write!(
|
||||
f,
|
||||
"can not parse number: {}, {}, {}",
|
||||
self.uint_error, self.int_error, self.float_error
|
||||
)
|
||||
}
|
||||
}
|
||||
}
|
File diff suppressed because it is too large
Load Diff
@ -1,12 +1,13 @@
|
||||
use std::io::{Read, Write};
|
||||
|
||||
use hashbrown::HashMap;
|
||||
use meilidb_core::DocumentId;
|
||||
use meilidb_schema::SchemaAttr;
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
use crate::Number;
|
||||
use crate::{DocumentId, Number};
|
||||
|
||||
#[derive(Debug, Default, Clone, PartialEq, Eq)]
|
||||
#[derive(Debug, Default, Clone, PartialEq, Eq, Serialize, Deserialize)]
|
||||
#[serde(transparent)]
|
||||
pub struct RankedMap(HashMap<(DocumentId, SchemaAttr), Number>);
|
||||
|
||||
impl RankedMap {
|
||||
@ -14,6 +15,10 @@ impl RankedMap {
|
||||
self.0.len()
|
||||
}
|
||||
|
||||
pub fn is_empty(&self) -> bool {
|
||||
self.0.is_empty()
|
||||
}
|
||||
|
||||
pub fn insert(&mut self, document: DocumentId, attribute: SchemaAttr, number: Number) {
|
||||
self.0.insert((document, attribute), number);
|
||||
}
|
@ -1,26 +1,32 @@
|
||||
use std::sync::Arc;
|
||||
use std::fmt;
|
||||
use std::sync::Arc;
|
||||
|
||||
use meilidb_schema::SchemaAttr;
|
||||
use sdset::SetBuf;
|
||||
use slice_group_by::GroupBy;
|
||||
use crate::{TmpMatch, DocumentId, Highlight};
|
||||
|
||||
use crate::{DocumentId, Highlight, TmpMatch};
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct RawDocument {
|
||||
pub id: DocumentId,
|
||||
pub matches: SharedMatches,
|
||||
pub highlights: Vec<Highlight>,
|
||||
pub fields_counts: SetBuf<(SchemaAttr, u64)>,
|
||||
}
|
||||
|
||||
impl RawDocument {
|
||||
fn new(id: DocumentId, matches: SharedMatches, highlights: Vec<Highlight>) -> RawDocument {
|
||||
RawDocument { id, matches, highlights }
|
||||
}
|
||||
|
||||
pub fn query_index(&self) -> &[u32] {
|
||||
let r = self.matches.range;
|
||||
// it is safe because construction/modifications
|
||||
// can only be done in this module
|
||||
unsafe { &self.matches.matches.query_index.get_unchecked(r.start..r.end) }
|
||||
unsafe {
|
||||
&self
|
||||
.matches
|
||||
.matches
|
||||
.query_index
|
||||
.get_unchecked(r.start..r.end)
|
||||
}
|
||||
}
|
||||
|
||||
pub fn distance(&self) -> &[u8] {
|
||||
@ -41,7 +47,13 @@ impl RawDocument {
|
||||
let r = self.matches.range;
|
||||
// it is safe because construction/modifications
|
||||
// can only be done in this module
|
||||
unsafe { &self.matches.matches.word_index.get_unchecked(r.start..r.end) }
|
||||
unsafe {
|
||||
&self
|
||||
.matches
|
||||
.matches
|
||||
.word_index
|
||||
.get_unchecked(r.start..r.end)
|
||||
}
|
||||
}
|
||||
|
||||
pub fn is_exact(&self) -> &[bool] {
|
||||
@ -55,12 +67,32 @@ impl RawDocument {
|
||||
impl fmt::Debug for RawDocument {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
f.write_str("RawDocument {\r\n")?;
|
||||
f.write_fmt(format_args!("{:>15}: {:?},\r\n", "id", self.id))?;
|
||||
f.write_fmt(format_args!("{:>15}: {:^5?},\r\n", "query_index", self.query_index()))?;
|
||||
f.write_fmt(format_args!("{:>15}: {:^5?},\r\n", "distance", self.distance()))?;
|
||||
f.write_fmt(format_args!("{:>15}: {:^5?},\r\n", "attribute", self.attribute()))?;
|
||||
f.write_fmt(format_args!("{:>15}: {:^5?},\r\n", "word_index", self.word_index()))?;
|
||||
f.write_fmt(format_args!("{:>15}: {:^5?},\r\n", "is_exact", self.is_exact()))?;
|
||||
f.write_fmt(format_args!("{:>15}: {:?},\r\n", "id", self.id))?;
|
||||
f.write_fmt(format_args!(
|
||||
"{:>15}: {:^5?},\r\n",
|
||||
"query_index",
|
||||
self.query_index()
|
||||
))?;
|
||||
f.write_fmt(format_args!(
|
||||
"{:>15}: {:^5?},\r\n",
|
||||
"distance",
|
||||
self.distance()
|
||||
))?;
|
||||
f.write_fmt(format_args!(
|
||||
"{:>15}: {:^5?},\r\n",
|
||||
"attribute",
|
||||
self.attribute()
|
||||
))?;
|
||||
f.write_fmt(format_args!(
|
||||
"{:>15}: {:^5?},\r\n",
|
||||
"word_index",
|
||||
self.word_index()
|
||||
))?;
|
||||
f.write_fmt(format_args!(
|
||||
"{:>15}: {:^5?},\r\n",
|
||||
"is_exact",
|
||||
self.is_exact()
|
||||
))?;
|
||||
f.write_str("}")?;
|
||||
Ok(())
|
||||
}
|
||||
@ -69,32 +101,45 @@ impl fmt::Debug for RawDocument {
|
||||
pub fn raw_documents_from(
|
||||
matches: SetBuf<(DocumentId, TmpMatch)>,
|
||||
highlights: SetBuf<(DocumentId, Highlight)>,
|
||||
) -> Vec<RawDocument>
|
||||
{
|
||||
let mut docs_ranges: Vec<(_, Range, _)> = Vec::new();
|
||||
fields_counts: SetBuf<(DocumentId, SchemaAttr, u64)>,
|
||||
) -> Vec<RawDocument> {
|
||||
let mut docs_ranges: Vec<(_, Range, _, _)> = Vec::new();
|
||||
let mut matches2 = Matches::with_capacity(matches.len());
|
||||
|
||||
let matches = matches.linear_group_by_key(|(id, _)| *id);
|
||||
let highlights = highlights.linear_group_by_key(|(id, _)| *id);
|
||||
let fields_counts = fields_counts.linear_group_by_key(|(id, _, _)| *id);
|
||||
|
||||
for (mgroup, hgroup) in matches.zip(highlights) {
|
||||
for ((mgroup, hgroup), fgroup) in matches.zip(highlights).zip(fields_counts) {
|
||||
debug_assert_eq!(mgroup[0].0, hgroup[0].0);
|
||||
debug_assert_eq!(mgroup[0].0, fgroup[0].0);
|
||||
|
||||
let document_id = mgroup[0].0;
|
||||
let start = docs_ranges.last().map(|(_, r, _)| r.end).unwrap_or(0);
|
||||
let start = docs_ranges.last().map(|(_, r, _, _)| r.end).unwrap_or(0);
|
||||
let end = start + mgroup.len();
|
||||
|
||||
let highlights = hgroup.iter().map(|(_, h)| *h).collect();
|
||||
docs_ranges.push((document_id, Range { start, end }, highlights));
|
||||
let fields_counts = SetBuf::new(fgroup.iter().map(|(_, a, c)| (*a, *c)).collect()).unwrap();
|
||||
|
||||
docs_ranges.push((document_id, Range { start, end }, highlights, fields_counts));
|
||||
matches2.extend_from_slice(mgroup);
|
||||
}
|
||||
|
||||
let matches = Arc::new(matches2);
|
||||
docs_ranges.into_iter().map(|(id, range, highlights)| {
|
||||
let matches = SharedMatches { range, matches: matches.clone() };
|
||||
RawDocument::new(id, matches, highlights)
|
||||
}).collect()
|
||||
docs_ranges
|
||||
.into_iter()
|
||||
.map(|(id, range, highlights, fields_counts)| {
|
||||
let matches = SharedMatches {
|
||||
range,
|
||||
matches: matches.clone(),
|
||||
};
|
||||
RawDocument {
|
||||
id,
|
||||
matches,
|
||||
highlights,
|
||||
fields_counts,
|
||||
}
|
||||
})
|
||||
.collect()
|
||||
}
|
||||
|
||||
#[derive(Debug, Copy, Clone)]
|
||||
|
@ -1,15 +1,15 @@
|
||||
use std::collections::{BTreeMap, HashMap};
|
||||
use std::convert::TryFrom;
|
||||
|
||||
use crate::{DocIndex, DocumentId};
|
||||
use deunicode::deunicode_with_tofu;
|
||||
use meilidb_core::{DocumentId, DocIndex};
|
||||
use meilidb_schema::SchemaAttr;
|
||||
use meilidb_tokenizer::{is_cjk, Tokenizer, SeqTokenizer, Token};
|
||||
use meilidb_tokenizer::{is_cjk, SeqTokenizer, Token, Tokenizer};
|
||||
use sdset::SetBuf;
|
||||
|
||||
type Word = Vec<u8>; // TODO make it be a SmallVec
|
||||
|
||||
pub struct Indexer {
|
||||
pub struct RawIndexer {
|
||||
word_limit: usize, // the maximum number of indexed words
|
||||
words_doc_indexes: BTreeMap<Word, Vec<DocIndex>>,
|
||||
docs_words: HashMap<DocumentId, Vec<Word>>,
|
||||
@ -20,20 +20,21 @@ pub struct Indexed {
|
||||
pub docs_words: HashMap<DocumentId, fst::Set>,
|
||||
}
|
||||
|
||||
impl Indexer {
|
||||
pub fn new() -> Indexer {
|
||||
Indexer::with_word_limit(1000)
|
||||
impl RawIndexer {
|
||||
pub fn new() -> RawIndexer {
|
||||
RawIndexer::with_word_limit(1000)
|
||||
}
|
||||
|
||||
pub fn with_word_limit(limit: usize) -> Indexer {
|
||||
Indexer {
|
||||
pub fn with_word_limit(limit: usize) -> RawIndexer {
|
||||
RawIndexer {
|
||||
word_limit: limit,
|
||||
words_doc_indexes: BTreeMap::new(),
|
||||
docs_words: HashMap::new(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn index_text(&mut self, id: DocumentId, attr: SchemaAttr, text: &str) {
|
||||
pub fn index_text(&mut self, id: DocumentId, attr: SchemaAttr, text: &str) -> usize {
|
||||
let mut number_of_words = 0;
|
||||
let lowercase_text = text.to_lowercase();
|
||||
let deunicoded = deunicode_with_tofu(&lowercase_text, "");
|
||||
|
||||
@ -46,6 +47,9 @@ impl Indexer {
|
||||
let iter = Some(lowercase_text).into_iter().chain(next);
|
||||
|
||||
for text in iter {
|
||||
// we must not count 2 times the same words
|
||||
number_of_words = 0;
|
||||
|
||||
for token in Tokenizer::new(&text) {
|
||||
let must_continue = index_token(
|
||||
token,
|
||||
@ -56,14 +60,21 @@ impl Indexer {
|
||||
&mut self.docs_words,
|
||||
);
|
||||
|
||||
if !must_continue { break }
|
||||
if !must_continue {
|
||||
break;
|
||||
}
|
||||
|
||||
number_of_words += 1;
|
||||
}
|
||||
}
|
||||
|
||||
number_of_words
|
||||
}
|
||||
|
||||
pub fn index_text_seq<'a, I, IT>(&mut self, id: DocumentId, attr: SchemaAttr, iter: I)
|
||||
where I: IntoIterator<Item=&'a str, IntoIter=IT>,
|
||||
IT: Iterator<Item = &'a str> + Clone,
|
||||
where
|
||||
I: IntoIterator<Item = &'a str, IntoIter = IT>,
|
||||
IT: Iterator<Item = &'a str> + Clone,
|
||||
{
|
||||
// TODO serialize this to one call to the SeqTokenizer loop
|
||||
|
||||
@ -80,14 +91,25 @@ impl Indexer {
|
||||
&mut self.docs_words,
|
||||
);
|
||||
|
||||
if !must_continue { break }
|
||||
if !must_continue {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
let deunicoded: Vec<_> = lowercased.into_iter().map(|lowercase_text| {
|
||||
if lowercase_text.contains(is_cjk) { return lowercase_text }
|
||||
let deunicoded = deunicode_with_tofu(&lowercase_text, "");
|
||||
if lowercase_text != deunicoded { deunicoded } else { lowercase_text }
|
||||
}).collect();
|
||||
let deunicoded: Vec<_> = lowercased
|
||||
.into_iter()
|
||||
.map(|lowercase_text| {
|
||||
if lowercase_text.contains(is_cjk) {
|
||||
return lowercase_text;
|
||||
}
|
||||
let deunicoded = deunicode_with_tofu(&lowercase_text, "");
|
||||
if lowercase_text != deunicoded {
|
||||
deunicoded
|
||||
} else {
|
||||
lowercase_text
|
||||
}
|
||||
})
|
||||
.collect();
|
||||
let iter = deunicoded.iter().map(|t| t.as_str());
|
||||
|
||||
for token in SeqTokenizer::new(iter) {
|
||||
@ -100,17 +122,21 @@ impl Indexer {
|
||||
&mut self.docs_words,
|
||||
);
|
||||
|
||||
if !must_continue { break }
|
||||
if !must_continue {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub fn build(self) -> Indexed {
|
||||
let words_doc_indexes = self.words_doc_indexes
|
||||
let words_doc_indexes = self
|
||||
.words_doc_indexes
|
||||
.into_iter()
|
||||
.map(|(word, indexes)| (word, SetBuf::from_dirty(indexes)))
|
||||
.collect();
|
||||
|
||||
let docs_words = self.docs_words
|
||||
let docs_words = self
|
||||
.docs_words
|
||||
.into_iter()
|
||||
.map(|(id, mut words)| {
|
||||
words.sort_unstable();
|
||||
@ -119,7 +145,16 @@ impl Indexer {
|
||||
})
|
||||
.collect();
|
||||
|
||||
Indexed { words_doc_indexes, docs_words }
|
||||
Indexed {
|
||||
words_doc_indexes,
|
||||
docs_words,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for RawIndexer {
|
||||
fn default() -> Self {
|
||||
Self::new()
|
||||
}
|
||||
}
|
||||
|
||||
@ -130,16 +165,20 @@ fn index_token(
|
||||
word_limit: usize,
|
||||
words_doc_indexes: &mut BTreeMap<Word, Vec<DocIndex>>,
|
||||
docs_words: &mut HashMap<DocumentId, Vec<Word>>,
|
||||
) -> bool
|
||||
{
|
||||
if token.word_index >= word_limit { return false }
|
||||
) -> bool {
|
||||
if token.word_index >= word_limit {
|
||||
return false;
|
||||
}
|
||||
|
||||
match token_to_docindex(id, attr, token) {
|
||||
Some(docindex) => {
|
||||
let word = Vec::from(token.word);
|
||||
words_doc_indexes.entry(word.clone()).or_insert_with(Vec::new).push(docindex);
|
||||
words_doc_indexes
|
||||
.entry(word.clone())
|
||||
.or_insert_with(Vec::new)
|
||||
.push(docindex);
|
||||
docs_words.entry(id).or_insert_with(Vec::new).push(word);
|
||||
},
|
||||
}
|
||||
None => return false,
|
||||
}
|
||||
|
||||
@ -168,14 +207,16 @@ mod tests {
|
||||
|
||||
#[test]
|
||||
fn strange_apostrophe() {
|
||||
let mut indexer = Indexer::new();
|
||||
let mut indexer = RawIndexer::new();
|
||||
|
||||
let docid = DocumentId(0);
|
||||
let attr = SchemaAttr(0);
|
||||
let text = "Zut, l’aspirateur, j’ai oublié de l’éteindre !";
|
||||
indexer.index_text(docid, attr, text);
|
||||
|
||||
let Indexed { words_doc_indexes, .. } = indexer.build();
|
||||
let Indexed {
|
||||
words_doc_indexes, ..
|
||||
} = indexer.build();
|
||||
|
||||
assert!(words_doc_indexes.get(&b"l"[..]).is_some());
|
||||
assert!(words_doc_indexes.get(&b"aspirateur"[..]).is_some());
|
||||
@ -183,19 +224,23 @@ mod tests {
|
||||
assert!(words_doc_indexes.get(&b"eteindre"[..]).is_some());
|
||||
|
||||
// with the ugly apostrophe...
|
||||
assert!(words_doc_indexes.get(&"l’éteindre".to_owned().into_bytes()).is_some());
|
||||
assert!(words_doc_indexes
|
||||
.get(&"l’éteindre".to_owned().into_bytes())
|
||||
.is_some());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn strange_apostrophe_in_sequence() {
|
||||
let mut indexer = Indexer::new();
|
||||
let mut indexer = RawIndexer::new();
|
||||
|
||||
let docid = DocumentId(0);
|
||||
let attr = SchemaAttr(0);
|
||||
let text = vec!["Zut, l’aspirateur, j’ai oublié de l’éteindre !"];
|
||||
indexer.index_text_seq(docid, attr, text);
|
||||
|
||||
let Indexed { words_doc_indexes, .. } = indexer.build();
|
||||
let Indexed {
|
||||
words_doc_indexes, ..
|
||||
} = indexer.build();
|
||||
|
||||
assert!(words_doc_indexes.get(&b"l"[..]).is_some());
|
||||
assert!(words_doc_indexes.get(&b"aspirateur"[..]).is_some());
|
||||
@ -203,6 +248,8 @@ mod tests {
|
||||
assert!(words_doc_indexes.get(&b"eteindre"[..]).is_some());
|
||||
|
||||
// with the ugly apostrophe...
|
||||
assert!(words_doc_indexes.get(&"l’éteindre".to_owned().into_bytes()).is_some());
|
||||
assert!(words_doc_indexes
|
||||
.get(&"l’éteindre".to_owned().into_bytes())
|
||||
.is_some());
|
||||
}
|
||||
}
|
@ -6,7 +6,10 @@ pub struct ReorderedAttrs {
|
||||
|
||||
impl ReorderedAttrs {
|
||||
pub fn new() -> ReorderedAttrs {
|
||||
ReorderedAttrs { count: 0, reorders: Vec::new() }
|
||||
ReorderedAttrs {
|
||||
count: 0,
|
||||
reorders: Vec::new(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn insert_attribute(&mut self, attribute: u16) {
|
||||
|
@ -77,13 +77,18 @@ impl ser::Serializer for ConvertToNumber {
|
||||
}
|
||||
|
||||
fn serialize_none(self) -> Result<Self::Ok, Self::Error> {
|
||||
Err(SerializerError::UnrankableType { type_name: "Option" })
|
||||
Err(SerializerError::UnrankableType {
|
||||
type_name: "Option",
|
||||
})
|
||||
}
|
||||
|
||||
fn serialize_some<T: ?Sized>(self, _value: &T) -> Result<Self::Ok, Self::Error>
|
||||
where T: Serialize,
|
||||
where
|
||||
T: Serialize,
|
||||
{
|
||||
Err(SerializerError::UnrankableType { type_name: "Option" })
|
||||
Err(SerializerError::UnrankableType {
|
||||
type_name: "Option",
|
||||
})
|
||||
}
|
||||
|
||||
fn serialize_unit(self) -> Result<Self::Ok, Self::Error> {
|
||||
@ -91,25 +96,29 @@ impl ser::Serializer for ConvertToNumber {
|
||||
}
|
||||
|
||||
fn serialize_unit_struct(self, _name: &'static str) -> Result<Self::Ok, Self::Error> {
|
||||
Err(SerializerError::UnrankableType { type_name: "unit struct" })
|
||||
Err(SerializerError::UnrankableType {
|
||||
type_name: "unit struct",
|
||||
})
|
||||
}
|
||||
|
||||
fn serialize_unit_variant(
|
||||
self,
|
||||
_name: &'static str,
|
||||
_variant_index: u32,
|
||||
_variant: &'static str
|
||||
) -> Result<Self::Ok, Self::Error>
|
||||
{
|
||||
Err(SerializerError::UnrankableType { type_name: "unit variant" })
|
||||
_variant: &'static str,
|
||||
) -> Result<Self::Ok, Self::Error> {
|
||||
Err(SerializerError::UnrankableType {
|
||||
type_name: "unit variant",
|
||||
})
|
||||
}
|
||||
|
||||
fn serialize_newtype_struct<T: ?Sized>(
|
||||
self,
|
||||
_name: &'static str,
|
||||
value: &T
|
||||
value: &T,
|
||||
) -> Result<Self::Ok, Self::Error>
|
||||
where T: Serialize,
|
||||
where
|
||||
T: Serialize,
|
||||
{
|
||||
value.serialize(self)
|
||||
}
|
||||
@ -119,15 +128,20 @@ impl ser::Serializer for ConvertToNumber {
|
||||
_name: &'static str,
|
||||
_variant_index: u32,
|
||||
_variant: &'static str,
|
||||
_value: &T
|
||||
_value: &T,
|
||||
) -> Result<Self::Ok, Self::Error>
|
||||
where T: Serialize,
|
||||
where
|
||||
T: Serialize,
|
||||
{
|
||||
Err(SerializerError::UnrankableType { type_name: "newtype variant" })
|
||||
Err(SerializerError::UnrankableType {
|
||||
type_name: "newtype variant",
|
||||
})
|
||||
}
|
||||
|
||||
fn serialize_seq(self, _len: Option<usize>) -> Result<Self::SerializeSeq, Self::Error> {
|
||||
Err(SerializerError::UnrankableType { type_name: "sequence" })
|
||||
Err(SerializerError::UnrankableType {
|
||||
type_name: "sequence",
|
||||
})
|
||||
}
|
||||
|
||||
fn serialize_tuple(self, _len: usize) -> Result<Self::SerializeTuple, Self::Error> {
|
||||
@ -137,10 +151,11 @@ impl ser::Serializer for ConvertToNumber {
|
||||
fn serialize_tuple_struct(
|
||||
self,
|
||||
_name: &'static str,
|
||||
_len: usize
|
||||
) -> Result<Self::SerializeTupleStruct, Self::Error>
|
||||
{
|
||||
Err(SerializerError::UnrankableType { type_name: "tuple struct" })
|
||||
_len: usize,
|
||||
) -> Result<Self::SerializeTupleStruct, Self::Error> {
|
||||
Err(SerializerError::UnrankableType {
|
||||
type_name: "tuple struct",
|
||||
})
|
||||
}
|
||||
|
||||
fn serialize_tuple_variant(
|
||||
@ -148,10 +163,11 @@ impl ser::Serializer for ConvertToNumber {
|
||||
_name: &'static str,
|
||||
_variant_index: u32,
|
||||
_variant: &'static str,
|
||||
_len: usize
|
||||
) -> Result<Self::SerializeTupleVariant, Self::Error>
|
||||
{
|
||||
Err(SerializerError::UnrankableType { type_name: "tuple variant" })
|
||||
_len: usize,
|
||||
) -> Result<Self::SerializeTupleVariant, Self::Error> {
|
||||
Err(SerializerError::UnrankableType {
|
||||
type_name: "tuple variant",
|
||||
})
|
||||
}
|
||||
|
||||
fn serialize_map(self, _len: Option<usize>) -> Result<Self::SerializeMap, Self::Error> {
|
||||
@ -161,10 +177,11 @@ impl ser::Serializer for ConvertToNumber {
|
||||
fn serialize_struct(
|
||||
self,
|
||||
_name: &'static str,
|
||||
_len: usize
|
||||
) -> Result<Self::SerializeStruct, Self::Error>
|
||||
{
|
||||
Err(SerializerError::UnrankableType { type_name: "struct" })
|
||||
_len: usize,
|
||||
) -> Result<Self::SerializeStruct, Self::Error> {
|
||||
Err(SerializerError::UnrankableType {
|
||||
type_name: "struct",
|
||||
})
|
||||
}
|
||||
|
||||
fn serialize_struct_variant(
|
||||
@ -172,9 +189,10 @@ impl ser::Serializer for ConvertToNumber {
|
||||
_name: &'static str,
|
||||
_variant_index: u32,
|
||||
_variant: &'static str,
|
||||
_len: usize
|
||||
) -> Result<Self::SerializeStructVariant, Self::Error>
|
||||
{
|
||||
Err(SerializerError::UnrankableType { type_name: "struct variant" })
|
||||
_len: usize,
|
||||
) -> Result<Self::SerializeStructVariant, Self::Error> {
|
||||
Err(SerializerError::UnrankableType {
|
||||
type_name: "struct variant",
|
||||
})
|
||||
}
|
||||
}
|
@ -1,5 +1,5 @@
|
||||
use serde::Serialize;
|
||||
use serde::ser;
|
||||
use serde::Serialize;
|
||||
|
||||
use super::SerializerError;
|
||||
|
||||
@ -17,7 +17,9 @@ impl ser::Serializer for ConvertToString {
|
||||
type SerializeStructVariant = ser::Impossible<Self::Ok, Self::Error>;
|
||||
|
||||
fn serialize_bool(self, _value: bool) -> Result<Self::Ok, Self::Error> {
|
||||
Err(SerializerError::UnserializableType { type_name: "boolean" })
|
||||
Err(SerializerError::UnserializableType {
|
||||
type_name: "boolean",
|
||||
})
|
||||
}
|
||||
|
||||
fn serialize_char(self, value: char) -> Result<Self::Ok, Self::Error> {
|
||||
@ -73,13 +75,18 @@ impl ser::Serializer for ConvertToString {
|
||||
}
|
||||
|
||||
fn serialize_none(self) -> Result<Self::Ok, Self::Error> {
|
||||
Err(SerializerError::UnserializableType { type_name: "Option" })
|
||||
Err(SerializerError::UnserializableType {
|
||||
type_name: "Option",
|
||||
})
|
||||
}
|
||||
|
||||
fn serialize_some<T: ?Sized>(self, _value: &T) -> Result<Self::Ok, Self::Error>
|
||||
where T: Serialize,
|
||||
where
|
||||
T: Serialize,
|
||||
{
|
||||
Err(SerializerError::UnserializableType { type_name: "Option" })
|
||||
Err(SerializerError::UnserializableType {
|
||||
type_name: "Option",
|
||||
})
|
||||
}
|
||||
|
||||
fn serialize_unit(self) -> Result<Self::Ok, Self::Error> {
|
||||
@ -87,25 +94,29 @@ impl ser::Serializer for ConvertToString {
|
||||
}
|
||||
|
||||
fn serialize_unit_struct(self, _name: &'static str) -> Result<Self::Ok, Self::Error> {
|
||||
Err(SerializerError::UnserializableType { type_name: "unit struct" })
|
||||
Err(SerializerError::UnserializableType {
|
||||
type_name: "unit struct",
|
||||
})
|
||||
}
|
||||
|
||||
fn serialize_unit_variant(
|
||||
self,
|
||||
_name: &'static str,
|
||||
_variant_index: u32,
|
||||
_variant: &'static str
|
||||
) -> Result<Self::Ok, Self::Error>
|
||||
{
|
||||
Err(SerializerError::UnserializableType { type_name: "unit variant" })
|
||||
_variant: &'static str,
|
||||
) -> Result<Self::Ok, Self::Error> {
|
||||
Err(SerializerError::UnserializableType {
|
||||
type_name: "unit variant",
|
||||
})
|
||||
}
|
||||
|
||||
fn serialize_newtype_struct<T: ?Sized>(
|
||||
self,
|
||||
_name: &'static str,
|
||||
value: &T
|
||||
value: &T,
|
||||
) -> Result<Self::Ok, Self::Error>
|
||||
where T: Serialize,
|
||||
where
|
||||
T: Serialize,
|
||||
{
|
||||
value.serialize(self)
|
||||
}
|
||||
@ -115,15 +126,20 @@ impl ser::Serializer for ConvertToString {
|
||||
_name: &'static str,
|
||||
_variant_index: u32,
|
||||
_variant: &'static str,
|
||||
_value: &T
|
||||
_value: &T,
|
||||
) -> Result<Self::Ok, Self::Error>
|
||||
where T: Serialize,
|
||||
where
|
||||
T: Serialize,
|
||||
{
|
||||
Err(SerializerError::UnserializableType { type_name: "newtype variant" })
|
||||
Err(SerializerError::UnserializableType {
|
||||
type_name: "newtype variant",
|
||||
})
|
||||
}
|
||||
|
||||
fn serialize_seq(self, _len: Option<usize>) -> Result<Self::SerializeSeq, Self::Error> {
|
||||
Err(SerializerError::UnserializableType { type_name: "sequence" })
|
||||
Err(SerializerError::UnserializableType {
|
||||
type_name: "sequence",
|
||||
})
|
||||
}
|
||||
|
||||
fn serialize_tuple(self, _len: usize) -> Result<Self::SerializeTuple, Self::Error> {
|
||||
@ -133,10 +149,11 @@ impl ser::Serializer for ConvertToString {
|
||||
fn serialize_tuple_struct(
|
||||
self,
|
||||
_name: &'static str,
|
||||
_len: usize
|
||||
) -> Result<Self::SerializeTupleStruct, Self::Error>
|
||||
{
|
||||
Err(SerializerError::UnserializableType { type_name: "tuple struct" })
|
||||
_len: usize,
|
||||
) -> Result<Self::SerializeTupleStruct, Self::Error> {
|
||||
Err(SerializerError::UnserializableType {
|
||||
type_name: "tuple struct",
|
||||
})
|
||||
}
|
||||
|
||||
fn serialize_tuple_variant(
|
||||
@ -144,10 +161,11 @@ impl ser::Serializer for ConvertToString {
|
||||
_name: &'static str,
|
||||
_variant_index: u32,
|
||||
_variant: &'static str,
|
||||
_len: usize
|
||||
) -> Result<Self::SerializeTupleVariant, Self::Error>
|
||||
{
|
||||
Err(SerializerError::UnserializableType { type_name: "tuple variant" })
|
||||
_len: usize,
|
||||
) -> Result<Self::SerializeTupleVariant, Self::Error> {
|
||||
Err(SerializerError::UnserializableType {
|
||||
type_name: "tuple variant",
|
||||
})
|
||||
}
|
||||
|
||||
fn serialize_map(self, _len: Option<usize>) -> Result<Self::SerializeMap, Self::Error> {
|
||||
@ -157,10 +175,11 @@ impl ser::Serializer for ConvertToString {
|
||||
fn serialize_struct(
|
||||
self,
|
||||
_name: &'static str,
|
||||
_len: usize
|
||||
) -> Result<Self::SerializeStruct, Self::Error>
|
||||
{
|
||||
Err(SerializerError::UnserializableType { type_name: "struct" })
|
||||
_len: usize,
|
||||
) -> Result<Self::SerializeStruct, Self::Error> {
|
||||
Err(SerializerError::UnserializableType {
|
||||
type_name: "struct",
|
||||
})
|
||||
}
|
||||
|
||||
fn serialize_struct_variant(
|
||||
@ -168,9 +187,10 @@ impl ser::Serializer for ConvertToString {
|
||||
_name: &'static str,
|
||||
_variant_index: u32,
|
||||
_variant: &'static str,
|
||||
_len: usize
|
||||
) -> Result<Self::SerializeStructVariant, Self::Error>
|
||||
{
|
||||
Err(SerializerError::UnserializableType { type_name: "struct variant" })
|
||||
_len: usize,
|
||||
) -> Result<Self::SerializeStructVariant, Self::Error> {
|
||||
Err(SerializerError::UnserializableType {
|
||||
type_name: "struct variant",
|
||||
})
|
||||
}
|
||||
}
|
144
meilidb-core/src/serde/deserializer.rs
Normal file
144
meilidb-core/src/serde/deserializer.rs
Normal file
@ -0,0 +1,144 @@
|
||||
use std::collections::HashSet;
|
||||
use std::io::Cursor;
|
||||
use std::{error::Error, fmt};
|
||||
|
||||
use meilidb_schema::{Schema, SchemaAttr};
|
||||
use serde::{de, forward_to_deserialize_any};
|
||||
use serde_json::de::IoRead as SerdeJsonIoRead;
|
||||
use serde_json::Deserializer as SerdeJsonDeserializer;
|
||||
use serde_json::Error as SerdeJsonError;
|
||||
|
||||
use crate::store::DocumentsFields;
|
||||
use crate::DocumentId;
|
||||
|
||||
#[derive(Debug)]
|
||||
pub enum DeserializerError {
|
||||
SerdeJson(SerdeJsonError),
|
||||
Zlmdb(zlmdb::Error),
|
||||
Custom(String),
|
||||
}
|
||||
|
||||
impl de::Error for DeserializerError {
|
||||
fn custom<T: fmt::Display>(msg: T) -> Self {
|
||||
DeserializerError::Custom(msg.to_string())
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Display for DeserializerError {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
match self {
|
||||
DeserializerError::SerdeJson(e) => write!(f, "serde json related error: {}", e),
|
||||
DeserializerError::Zlmdb(e) => write!(f, "zlmdb related error: {}", e),
|
||||
DeserializerError::Custom(s) => f.write_str(s),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Error for DeserializerError {}
|
||||
|
||||
impl From<SerdeJsonError> for DeserializerError {
|
||||
fn from(error: SerdeJsonError) -> DeserializerError {
|
||||
DeserializerError::SerdeJson(error)
|
||||
}
|
||||
}
|
||||
|
||||
impl From<zlmdb::Error> for DeserializerError {
|
||||
fn from(error: zlmdb::Error) -> DeserializerError {
|
||||
DeserializerError::Zlmdb(error)
|
||||
}
|
||||
}
|
||||
|
||||
pub struct Deserializer<'a> {
|
||||
pub document_id: DocumentId,
|
||||
pub reader: &'a zlmdb::RoTxn,
|
||||
pub documents_fields: DocumentsFields,
|
||||
pub schema: &'a Schema,
|
||||
pub attributes: Option<&'a HashSet<SchemaAttr>>,
|
||||
}
|
||||
|
||||
impl<'de, 'a, 'b> de::Deserializer<'de> for &'b mut Deserializer<'a> {
|
||||
type Error = DeserializerError;
|
||||
|
||||
fn deserialize_any<V>(self, visitor: V) -> Result<V::Value, Self::Error>
|
||||
where
|
||||
V: de::Visitor<'de>,
|
||||
{
|
||||
self.deserialize_map(visitor)
|
||||
}
|
||||
|
||||
forward_to_deserialize_any! {
|
||||
bool i8 i16 i32 i64 i128 u8 u16 u32 u64 u128 f32 f64 char str string
|
||||
bytes byte_buf option unit unit_struct newtype_struct seq tuple
|
||||
tuple_struct struct enum identifier ignored_any
|
||||
}
|
||||
|
||||
fn deserialize_map<V>(self, visitor: V) -> Result<V::Value, Self::Error>
|
||||
where
|
||||
V: de::Visitor<'de>,
|
||||
{
|
||||
let mut error = None;
|
||||
|
||||
let iter = self
|
||||
.documents_fields
|
||||
.document_fields(self.reader, self.document_id)?
|
||||
.filter_map(|result| {
|
||||
let (attr, value) = match result {
|
||||
Ok(value) => value,
|
||||
Err(e) => {
|
||||
error = Some(e);
|
||||
return None;
|
||||
}
|
||||
};
|
||||
|
||||
let is_displayed = self.schema.props(attr).is_displayed();
|
||||
if is_displayed && self.attributes.map_or(true, |f| f.contains(&attr)) {
|
||||
let attribute_name = self.schema.attribute_name(attr);
|
||||
|
||||
let cursor = Cursor::new(value.to_owned());
|
||||
let ioread = SerdeJsonIoRead::new(cursor);
|
||||
let value = Value(SerdeJsonDeserializer::new(ioread));
|
||||
|
||||
Some((attribute_name, value))
|
||||
} else {
|
||||
None
|
||||
}
|
||||
});
|
||||
|
||||
let map_deserializer = de::value::MapDeserializer::new(iter);
|
||||
let result = visitor
|
||||
.visit_map(map_deserializer)
|
||||
.map_err(DeserializerError::from);
|
||||
|
||||
match error.take() {
|
||||
Some(error) => Err(error.into()),
|
||||
None => result,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
struct Value(SerdeJsonDeserializer<SerdeJsonIoRead<Cursor<Vec<u8>>>>);
|
||||
|
||||
impl<'de> de::IntoDeserializer<'de, SerdeJsonError> for Value {
|
||||
type Deserializer = Self;
|
||||
|
||||
fn into_deserializer(self) -> Self::Deserializer {
|
||||
self
|
||||
}
|
||||
}
|
||||
|
||||
impl<'de> de::Deserializer<'de> for Value {
|
||||
type Error = SerdeJsonError;
|
||||
|
||||
fn deserialize_any<V>(mut self, visitor: V) -> Result<V::Value, Self::Error>
|
||||
where
|
||||
V: de::Visitor<'de>,
|
||||
{
|
||||
self.0.deserialize_any(visitor)
|
||||
}
|
||||
|
||||
forward_to_deserialize_any! {
|
||||
bool i8 i16 i32 i64 i128 u8 u16 u32 u64 u128 f32 f64 char str string
|
||||
bytes byte_buf option unit unit_struct newtype_struct seq tuple
|
||||
tuple_struct map struct enum identifier ignored_any
|
||||
}
|
||||
}
|
@ -1,17 +1,18 @@
|
||||
use std::hash::{Hash, Hasher};
|
||||
|
||||
use meilidb_core::DocumentId;
|
||||
use crate::DocumentId;
|
||||
use serde::{ser, Serialize};
|
||||
use serde_json::Value;
|
||||
use siphasher::sip::SipHasher;
|
||||
|
||||
use super::{SerializerError, ConvertToString};
|
||||
use super::{ConvertToString, SerializerError};
|
||||
|
||||
pub fn extract_document_id<D>(
|
||||
identifier: &str,
|
||||
document: &D,
|
||||
) -> Result<Option<DocumentId>, SerializerError>
|
||||
where D: serde::Serialize,
|
||||
where
|
||||
D: serde::Serialize,
|
||||
{
|
||||
let serializer = ExtractDocumentId { identifier };
|
||||
document.serialize(serializer)
|
||||
@ -77,13 +78,18 @@ impl<'a> ser::Serializer for ExtractDocumentId<'a> {
|
||||
}
|
||||
|
||||
fn serialize_none(self) -> Result<Self::Ok, Self::Error> {
|
||||
Err(SerializerError::UnserializableType { type_name: "Option" })
|
||||
Err(SerializerError::UnserializableType {
|
||||
type_name: "Option",
|
||||
})
|
||||
}
|
||||
|
||||
fn serialize_some<T: ?Sized>(self, _value: &T) -> Result<Self::Ok, Self::Error>
|
||||
where T: Serialize,
|
||||
where
|
||||
T: Serialize,
|
||||
{
|
||||
Err(SerializerError::UnserializableType { type_name: "Option" })
|
||||
Err(SerializerError::UnserializableType {
|
||||
type_name: "Option",
|
||||
})
|
||||
}
|
||||
|
||||
fn serialize_unit(self) -> Result<Self::Ok, Self::Error> {
|
||||
@ -91,25 +97,29 @@ impl<'a> ser::Serializer for ExtractDocumentId<'a> {
|
||||
}
|
||||
|
||||
fn serialize_unit_struct(self, _name: &'static str) -> Result<Self::Ok, Self::Error> {
|
||||
Err(SerializerError::UnserializableType { type_name: "unit struct" })
|
||||
Err(SerializerError::UnserializableType {
|
||||
type_name: "unit struct",
|
||||
})
|
||||
}
|
||||
|
||||
fn serialize_unit_variant(
|
||||
self,
|
||||
_name: &'static str,
|
||||
_variant_index: u32,
|
||||
_variant: &'static str
|
||||
) -> Result<Self::Ok, Self::Error>
|
||||
{
|
||||
Err(SerializerError::UnserializableType { type_name: "unit variant" })
|
||||
_variant: &'static str,
|
||||
) -> Result<Self::Ok, Self::Error> {
|
||||
Err(SerializerError::UnserializableType {
|
||||
type_name: "unit variant",
|
||||
})
|
||||
}
|
||||
|
||||
fn serialize_newtype_struct<T: ?Sized>(
|
||||
self,
|
||||
_name: &'static str,
|
||||
value: &T
|
||||
value: &T,
|
||||
) -> Result<Self::Ok, Self::Error>
|
||||
where T: Serialize,
|
||||
where
|
||||
T: Serialize,
|
||||
{
|
||||
value.serialize(self)
|
||||
}
|
||||
@ -119,15 +129,20 @@ impl<'a> ser::Serializer for ExtractDocumentId<'a> {
|
||||
_name: &'static str,
|
||||
_variant_index: u32,
|
||||
_variant: &'static str,
|
||||
_value: &T
|
||||
_value: &T,
|
||||
) -> Result<Self::Ok, Self::Error>
|
||||
where T: Serialize,
|
||||
where
|
||||
T: Serialize,
|
||||
{
|
||||
Err(SerializerError::UnserializableType { type_name: "newtype variant" })
|
||||
Err(SerializerError::UnserializableType {
|
||||
type_name: "newtype variant",
|
||||
})
|
||||
}
|
||||
|
||||
fn serialize_seq(self, _len: Option<usize>) -> Result<Self::SerializeSeq, Self::Error> {
|
||||
Err(SerializerError::UnserializableType { type_name: "sequence" })
|
||||
Err(SerializerError::UnserializableType {
|
||||
type_name: "sequence",
|
||||
})
|
||||
}
|
||||
|
||||
fn serialize_tuple(self, _len: usize) -> Result<Self::SerializeTuple, Self::Error> {
|
||||
@ -137,10 +152,11 @@ impl<'a> ser::Serializer for ExtractDocumentId<'a> {
|
||||
fn serialize_tuple_struct(
|
||||
self,
|
||||
_name: &'static str,
|
||||
_len: usize
|
||||
) -> Result<Self::SerializeTupleStruct, Self::Error>
|
||||
{
|
||||
Err(SerializerError::UnserializableType { type_name: "tuple struct" })
|
||||
_len: usize,
|
||||
) -> Result<Self::SerializeTupleStruct, Self::Error> {
|
||||
Err(SerializerError::UnserializableType {
|
||||
type_name: "tuple struct",
|
||||
})
|
||||
}
|
||||
|
||||
fn serialize_tuple_variant(
|
||||
@ -148,10 +164,11 @@ impl<'a> ser::Serializer for ExtractDocumentId<'a> {
|
||||
_name: &'static str,
|
||||
_variant_index: u32,
|
||||
_variant: &'static str,
|
||||
_len: usize
|
||||
) -> Result<Self::SerializeTupleVariant, Self::Error>
|
||||
{
|
||||
Err(SerializerError::UnserializableType { type_name: "tuple variant" })
|
||||
_len: usize,
|
||||
) -> Result<Self::SerializeTupleVariant, Self::Error> {
|
||||
Err(SerializerError::UnserializableType {
|
||||
type_name: "tuple variant",
|
||||
})
|
||||
}
|
||||
|
||||
fn serialize_map(self, _len: Option<usize>) -> Result<Self::SerializeMap, Self::Error> {
|
||||
@ -167,9 +184,8 @@ impl<'a> ser::Serializer for ExtractDocumentId<'a> {
|
||||
fn serialize_struct(
|
||||
self,
|
||||
_name: &'static str,
|
||||
_len: usize
|
||||
) -> Result<Self::SerializeStruct, Self::Error>
|
||||
{
|
||||
_len: usize,
|
||||
) -> Result<Self::SerializeStruct, Self::Error> {
|
||||
let serializer = ExtractDocumentIdStructSerializer {
|
||||
identifier: self.identifier,
|
||||
document_id: None,
|
||||
@ -183,10 +199,11 @@ impl<'a> ser::Serializer for ExtractDocumentId<'a> {
|
||||
_name: &'static str,
|
||||
_variant_index: u32,
|
||||
_variant: &'static str,
|
||||
_len: usize
|
||||
) -> Result<Self::SerializeStructVariant, Self::Error>
|
||||
{
|
||||
Err(SerializerError::UnserializableType { type_name: "struct variant" })
|
||||
_len: usize,
|
||||
) -> Result<Self::SerializeStructVariant, Self::Error> {
|
||||
Err(SerializerError::UnserializableType {
|
||||
type_name: "struct variant",
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
@ -201,7 +218,8 @@ impl<'a> ser::SerializeMap for ExtractDocumentIdMapSerializer<'a> {
|
||||
type Error = SerializerError;
|
||||
|
||||
fn serialize_key<T: ?Sized>(&mut self, key: &T) -> Result<(), Self::Error>
|
||||
where T: Serialize,
|
||||
where
|
||||
T: Serialize,
|
||||
{
|
||||
let key = key.serialize(ConvertToString)?;
|
||||
self.current_key_name = Some(key);
|
||||
@ -209,7 +227,8 @@ impl<'a> ser::SerializeMap for ExtractDocumentIdMapSerializer<'a> {
|
||||
}
|
||||
|
||||
fn serialize_value<T: ?Sized>(&mut self, value: &T) -> Result<(), Self::Error>
|
||||
where T: Serialize,
|
||||
where
|
||||
T: Serialize,
|
||||
{
|
||||
let key = self.current_key_name.take().unwrap();
|
||||
self.serialize_entry(&key, value)
|
||||
@ -218,9 +237,11 @@ impl<'a> ser::SerializeMap for ExtractDocumentIdMapSerializer<'a> {
|
||||
fn serialize_entry<K: ?Sized, V: ?Sized>(
|
||||
&mut self,
|
||||
key: &K,
|
||||
value: &V
|
||||
value: &V,
|
||||
) -> Result<(), Self::Error>
|
||||
where K: Serialize, V: Serialize,
|
||||
where
|
||||
K: Serialize,
|
||||
V: Serialize,
|
||||
{
|
||||
let key = key.serialize(ConvertToString)?;
|
||||
|
||||
@ -252,9 +273,10 @@ impl<'a> ser::SerializeStruct for ExtractDocumentIdStructSerializer<'a> {
|
||||
fn serialize_field<T: ?Sized>(
|
||||
&mut self,
|
||||
key: &'static str,
|
||||
value: &T
|
||||
value: &T,
|
||||
) -> Result<(), Self::Error>
|
||||
where T: Serialize,
|
||||
where
|
||||
T: Serialize,
|
||||
{
|
||||
if self.identifier == key {
|
||||
let value = serde_json::to_string(value).and_then(|s| serde_json::from_str(&s))?;
|
@ -1,10 +1,10 @@
|
||||
use meilidb_core::DocumentId;
|
||||
use meilidb_schema::SchemaAttr;
|
||||
use serde::ser;
|
||||
use serde::Serialize;
|
||||
|
||||
use crate::indexer::Indexer as RawIndexer;
|
||||
use super::{SerializerError, ConvertToString};
|
||||
use super::{ConvertToString, SerializerError};
|
||||
use crate::raw_indexer::RawIndexer;
|
||||
use crate::DocumentId;
|
||||
|
||||
pub struct Indexer<'a> {
|
||||
pub attribute: SchemaAttr,
|
||||
@ -13,7 +13,7 @@ pub struct Indexer<'a> {
|
||||
}
|
||||
|
||||
impl<'a> ser::Serializer for Indexer<'a> {
|
||||
type Ok = ();
|
||||
type Ok = Option<usize>;
|
||||
type Error = SerializerError;
|
||||
type SerializeSeq = SeqIndexer<'a>;
|
||||
type SerializeTuple = TupleIndexer<'a>;
|
||||
@ -24,7 +24,9 @@ impl<'a> ser::Serializer for Indexer<'a> {
|
||||
type SerializeStructVariant = ser::Impossible<Self::Ok, Self::Error>;
|
||||
|
||||
fn serialize_bool(self, _value: bool) -> Result<Self::Ok, Self::Error> {
|
||||
Err(SerializerError::UnindexableType { type_name: "boolean" })
|
||||
Err(SerializerError::UnindexableType {
|
||||
type_name: "boolean",
|
||||
})
|
||||
}
|
||||
|
||||
fn serialize_char(self, value: char) -> Result<Self::Ok, Self::Error> {
|
||||
@ -83,8 +85,10 @@ impl<'a> ser::Serializer for Indexer<'a> {
|
||||
}
|
||||
|
||||
fn serialize_str(self, text: &str) -> Result<Self::Ok, Self::Error> {
|
||||
self.indexer.index_text(self.document_id, self.attribute, text);
|
||||
Ok(())
|
||||
let number_of_words = self
|
||||
.indexer
|
||||
.index_text(self.document_id, self.attribute, text);
|
||||
Ok(Some(number_of_words))
|
||||
}
|
||||
|
||||
fn serialize_bytes(self, _v: &[u8]) -> Result<Self::Ok, Self::Error> {
|
||||
@ -92,15 +96,20 @@ impl<'a> ser::Serializer for Indexer<'a> {
|
||||
}
|
||||
|
||||
fn serialize_none(self) -> Result<Self::Ok, Self::Error> {
|
||||
Err(SerializerError::UnindexableType { type_name: "Option" })
|
||||
Err(SerializerError::UnindexableType {
|
||||
type_name: "Option",
|
||||
})
|
||||
}
|
||||
|
||||
fn serialize_some<T: ?Sized>(self, value: &T) -> Result<Self::Ok, Self::Error>
|
||||
where T: ser::Serialize,
|
||||
where
|
||||
T: ser::Serialize,
|
||||
{
|
||||
let text = value.serialize(ConvertToString)?;
|
||||
self.indexer.index_text(self.document_id, self.attribute, &text);
|
||||
Ok(())
|
||||
let number_of_words = self
|
||||
.indexer
|
||||
.index_text(self.document_id, self.attribute, &text);
|
||||
Ok(Some(number_of_words))
|
||||
}
|
||||
|
||||
fn serialize_unit(self) -> Result<Self::Ok, Self::Error> {
|
||||
@ -108,25 +117,29 @@ impl<'a> ser::Serializer for Indexer<'a> {
|
||||
}
|
||||
|
||||
fn serialize_unit_struct(self, _name: &'static str) -> Result<Self::Ok, Self::Error> {
|
||||
Err(SerializerError::UnindexableType { type_name: "unit struct" })
|
||||
Err(SerializerError::UnindexableType {
|
||||
type_name: "unit struct",
|
||||
})
|
||||
}
|
||||
|
||||
fn serialize_unit_variant(
|
||||
self,
|
||||
_name: &'static str,
|
||||
_variant_index: u32,
|
||||
_variant: &'static str
|
||||
) -> Result<Self::Ok, Self::Error>
|
||||
{
|
||||
Err(SerializerError::UnindexableType { type_name: "unit variant" })
|
||||
_variant: &'static str,
|
||||
) -> Result<Self::Ok, Self::Error> {
|
||||
Err(SerializerError::UnindexableType {
|
||||
type_name: "unit variant",
|
||||
})
|
||||
}
|
||||
|
||||
fn serialize_newtype_struct<T: ?Sized>(
|
||||
self,
|
||||
_name: &'static str,
|
||||
value: &T
|
||||
value: &T,
|
||||
) -> Result<Self::Ok, Self::Error>
|
||||
where T: ser::Serialize,
|
||||
where
|
||||
T: ser::Serialize,
|
||||
{
|
||||
value.serialize(self)
|
||||
}
|
||||
@ -136,11 +149,14 @@ impl<'a> ser::Serializer for Indexer<'a> {
|
||||
_name: &'static str,
|
||||
_variant_index: u32,
|
||||
_variant: &'static str,
|
||||
_value: &T
|
||||
_value: &T,
|
||||
) -> Result<Self::Ok, Self::Error>
|
||||
where T: ser::Serialize,
|
||||
where
|
||||
T: ser::Serialize,
|
||||
{
|
||||
Err(SerializerError::UnindexableType { type_name: "newtype variant" })
|
||||
Err(SerializerError::UnindexableType {
|
||||
type_name: "newtype variant",
|
||||
})
|
||||
}
|
||||
|
||||
fn serialize_seq(self, _len: Option<usize>) -> Result<Self::SerializeSeq, Self::Error> {
|
||||
@ -168,10 +184,11 @@ impl<'a> ser::Serializer for Indexer<'a> {
|
||||
fn serialize_tuple_struct(
|
||||
self,
|
||||
_name: &'static str,
|
||||
_len: usize
|
||||
) -> Result<Self::SerializeTupleStruct, Self::Error>
|
||||
{
|
||||
Err(SerializerError::UnindexableType { type_name: "tuple struct" })
|
||||
_len: usize,
|
||||
) -> Result<Self::SerializeTupleStruct, Self::Error> {
|
||||
Err(SerializerError::UnindexableType {
|
||||
type_name: "tuple struct",
|
||||
})
|
||||
}
|
||||
|
||||
fn serialize_tuple_variant(
|
||||
@ -179,10 +196,11 @@ impl<'a> ser::Serializer for Indexer<'a> {
|
||||
_name: &'static str,
|
||||
_variant_index: u32,
|
||||
_variant: &'static str,
|
||||
_len: usize
|
||||
) -> Result<Self::SerializeTupleVariant, Self::Error>
|
||||
{
|
||||
Err(SerializerError::UnindexableType { type_name: "tuple variant" })
|
||||
_len: usize,
|
||||
) -> Result<Self::SerializeTupleVariant, Self::Error> {
|
||||
Err(SerializerError::UnindexableType {
|
||||
type_name: "tuple variant",
|
||||
})
|
||||
}
|
||||
|
||||
fn serialize_map(self, _len: Option<usize>) -> Result<Self::SerializeMap, Self::Error> {
|
||||
@ -199,10 +217,11 @@ impl<'a> ser::Serializer for Indexer<'a> {
|
||||
fn serialize_struct(
|
||||
self,
|
||||
_name: &'static str,
|
||||
_len: usize
|
||||
) -> Result<Self::SerializeStruct, Self::Error>
|
||||
{
|
||||
Err(SerializerError::UnindexableType { type_name: "struct" })
|
||||
_len: usize,
|
||||
) -> Result<Self::SerializeStruct, Self::Error> {
|
||||
Err(SerializerError::UnindexableType {
|
||||
type_name: "struct",
|
||||
})
|
||||
}
|
||||
|
||||
fn serialize_struct_variant(
|
||||
@ -210,10 +229,11 @@ impl<'a> ser::Serializer for Indexer<'a> {
|
||||
_name: &'static str,
|
||||
_variant_index: u32,
|
||||
_variant: &'static str,
|
||||
_len: usize
|
||||
) -> Result<Self::SerializeStructVariant, Self::Error>
|
||||
{
|
||||
Err(SerializerError::UnindexableType { type_name: "struct variant" })
|
||||
_len: usize,
|
||||
) -> Result<Self::SerializeStructVariant, Self::Error> {
|
||||
Err(SerializerError::UnindexableType {
|
||||
type_name: "struct variant",
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
@ -225,11 +245,12 @@ pub struct SeqIndexer<'a> {
|
||||
}
|
||||
|
||||
impl<'a> ser::SerializeSeq for SeqIndexer<'a> {
|
||||
type Ok = ();
|
||||
type Ok = Option<usize>;
|
||||
type Error = SerializerError;
|
||||
|
||||
fn serialize_element<T: ?Sized>(&mut self, value: &T) -> Result<(), Self::Error>
|
||||
where T: ser::Serialize
|
||||
where
|
||||
T: ser::Serialize,
|
||||
{
|
||||
let text = value.serialize(ConvertToString)?;
|
||||
self.texts.push(text);
|
||||
@ -238,8 +259,9 @@ impl<'a> ser::SerializeSeq for SeqIndexer<'a> {
|
||||
|
||||
fn end(self) -> Result<Self::Ok, Self::Error> {
|
||||
let texts = self.texts.iter().map(String::as_str);
|
||||
self.indexer.index_text_seq(self.document_id, self.attribute, texts);
|
||||
Ok(())
|
||||
self.indexer
|
||||
.index_text_seq(self.document_id, self.attribute, texts);
|
||||
Ok(None)
|
||||
}
|
||||
}
|
||||
|
||||
@ -251,11 +273,12 @@ pub struct MapIndexer<'a> {
|
||||
}
|
||||
|
||||
impl<'a> ser::SerializeMap for MapIndexer<'a> {
|
||||
type Ok = ();
|
||||
type Ok = Option<usize>;
|
||||
type Error = SerializerError;
|
||||
|
||||
fn serialize_key<T: ?Sized>(&mut self, key: &T) -> Result<(), Self::Error>
|
||||
where T: ser::Serialize,
|
||||
where
|
||||
T: ser::Serialize,
|
||||
{
|
||||
let text = key.serialize(ConvertToString)?;
|
||||
self.texts.push(text);
|
||||
@ -263,7 +286,8 @@ impl<'a> ser::SerializeMap for MapIndexer<'a> {
|
||||
}
|
||||
|
||||
fn serialize_value<T: ?Sized>(&mut self, value: &T) -> Result<(), Self::Error>
|
||||
where T: ser::Serialize,
|
||||
where
|
||||
T: ser::Serialize,
|
||||
{
|
||||
let text = value.serialize(ConvertToString)?;
|
||||
self.texts.push(text);
|
||||
@ -272,8 +296,9 @@ impl<'a> ser::SerializeMap for MapIndexer<'a> {
|
||||
|
||||
fn end(self) -> Result<Self::Ok, Self::Error> {
|
||||
let texts = self.texts.iter().map(String::as_str);
|
||||
self.indexer.index_text_seq(self.document_id, self.attribute, texts);
|
||||
Ok(())
|
||||
self.indexer
|
||||
.index_text_seq(self.document_id, self.attribute, texts);
|
||||
Ok(None)
|
||||
}
|
||||
}
|
||||
|
||||
@ -285,7 +310,7 @@ pub struct StructSerializer<'a> {
|
||||
}
|
||||
|
||||
impl<'a> ser::SerializeStruct for StructSerializer<'a> {
|
||||
type Ok = ();
|
||||
type Ok = Option<usize>;
|
||||
type Error = SerializerError;
|
||||
|
||||
fn serialize_field<T: ?Sized>(
|
||||
@ -293,7 +318,8 @@ impl<'a> ser::SerializeStruct for StructSerializer<'a> {
|
||||
key: &'static str,
|
||||
value: &T,
|
||||
) -> Result<(), Self::Error>
|
||||
where T: ser::Serialize,
|
||||
where
|
||||
T: ser::Serialize,
|
||||
{
|
||||
let key_text = key.to_owned();
|
||||
let value_text = value.serialize(ConvertToString)?;
|
||||
@ -304,8 +330,9 @@ impl<'a> ser::SerializeStruct for StructSerializer<'a> {
|
||||
|
||||
fn end(self) -> Result<Self::Ok, Self::Error> {
|
||||
let texts = self.texts.iter().map(String::as_str);
|
||||
self.indexer.index_text_seq(self.document_id, self.attribute, texts);
|
||||
Ok(())
|
||||
self.indexer
|
||||
.index_text_seq(self.document_id, self.attribute, texts);
|
||||
Ok(None)
|
||||
}
|
||||
}
|
||||
|
||||
@ -317,11 +344,12 @@ pub struct TupleIndexer<'a> {
|
||||
}
|
||||
|
||||
impl<'a> ser::SerializeTuple for TupleIndexer<'a> {
|
||||
type Ok = ();
|
||||
type Ok = Option<usize>;
|
||||
type Error = SerializerError;
|
||||
|
||||
fn serialize_element<T: ?Sized>(&mut self, value: &T) -> Result<(), Self::Error>
|
||||
where T: Serialize
|
||||
where
|
||||
T: Serialize,
|
||||
{
|
||||
let text = value.serialize(ConvertToString)?;
|
||||
self.texts.push(text);
|
||||
@ -330,7 +358,8 @@ impl<'a> ser::SerializeTuple for TupleIndexer<'a> {
|
||||
|
||||
fn end(self) -> Result<Self::Ok, Self::Error> {
|
||||
let texts = self.texts.iter().map(String::as_str);
|
||||
self.indexer.index_text_seq(self.document_id, self.attribute, texts);
|
||||
Ok(())
|
||||
self.indexer
|
||||
.index_text_seq(self.document_id, self.attribute, texts);
|
||||
Ok(None)
|
||||
}
|
||||
}
|
@ -15,32 +15,29 @@ mod extract_document_id;
|
||||
mod indexer;
|
||||
mod serializer;
|
||||
|
||||
pub use self::deserializer::{Deserializer, DeserializerError};
|
||||
pub use self::extract_document_id::{extract_document_id, compute_document_id, value_to_string};
|
||||
pub use self::convert_to_string::ConvertToString;
|
||||
pub use self::convert_to_number::ConvertToNumber;
|
||||
pub use self::convert_to_string::ConvertToString;
|
||||
pub use self::deserializer::{Deserializer, DeserializerError};
|
||||
pub use self::extract_document_id::{compute_document_id, extract_document_id, value_to_string};
|
||||
pub use self::indexer::Indexer;
|
||||
pub use self::serializer::Serializer;
|
||||
|
||||
use std::collections::BTreeMap;
|
||||
use std::{fmt, error::Error};
|
||||
use std::{error::Error, fmt};
|
||||
|
||||
use meilidb_core::DocumentId;
|
||||
use meilidb_schema::SchemaAttr;
|
||||
use rmp_serde::encode::Error as RmpError;
|
||||
use serde_json::Error as SerdeJsonError;
|
||||
use serde::ser;
|
||||
use serde_json::Error as SerdeJsonError;
|
||||
|
||||
use crate::number::ParseNumberError;
|
||||
use crate::{DocumentId, ParseNumberError};
|
||||
|
||||
#[derive(Debug)]
|
||||
pub enum SerializerError {
|
||||
DocumentIdNotFound,
|
||||
InvalidDocumentIdType,
|
||||
RmpError(RmpError),
|
||||
RocksDbError(rocksdb::Error),
|
||||
SerdeJsonError(SerdeJsonError),
|
||||
ParseNumberError(ParseNumberError),
|
||||
Zlmdb(zlmdb::Error),
|
||||
SerdeJson(SerdeJsonError),
|
||||
ParseNumber(ParseNumberError),
|
||||
UnserializableType { type_name: &'static str },
|
||||
UnindexableType { type_name: &'static str },
|
||||
UnrankableType { type_name: &'static str },
|
||||
@ -57,26 +54,25 @@ impl fmt::Display for SerializerError {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
match self {
|
||||
SerializerError::DocumentIdNotFound => {
|
||||
write!(f, "serialized document does not have an id according to the schema")
|
||||
},
|
||||
f.write_str("serialized document does not have an id according to the schema")
|
||||
}
|
||||
SerializerError::InvalidDocumentIdType => {
|
||||
write!(f, "document identifier can only be of type string or number")
|
||||
},
|
||||
SerializerError::RmpError(e) => write!(f, "rmp serde related error: {}", e),
|
||||
SerializerError::RocksDbError(e) => write!(f, "RocksDB related error: {}", e),
|
||||
SerializerError::SerdeJsonError(e) => write!(f, "serde json error: {}", e),
|
||||
SerializerError::ParseNumberError(e) => {
|
||||
f.write_str("document identifier can only be of type string or number")
|
||||
}
|
||||
SerializerError::Zlmdb(e) => write!(f, "zlmdb related error: {}", e),
|
||||
SerializerError::SerdeJson(e) => write!(f, "serde json error: {}", e),
|
||||
SerializerError::ParseNumber(e) => {
|
||||
write!(f, "error while trying to parse a number: {}", e)
|
||||
},
|
||||
}
|
||||
SerializerError::UnserializableType { type_name } => {
|
||||
write!(f, "{} are not a serializable type", type_name)
|
||||
},
|
||||
write!(f, "{} is not a serializable type", type_name)
|
||||
}
|
||||
SerializerError::UnindexableType { type_name } => {
|
||||
write!(f, "{} are not an indexable type", type_name)
|
||||
},
|
||||
write!(f, "{} is not an indexable type", type_name)
|
||||
}
|
||||
SerializerError::UnrankableType { type_name } => {
|
||||
write!(f, "{} types can not be used for ranking", type_name)
|
||||
},
|
||||
}
|
||||
SerializerError::Custom(s) => f.write_str(s),
|
||||
}
|
||||
}
|
||||
@ -90,27 +86,21 @@ impl From<String> for SerializerError {
|
||||
}
|
||||
}
|
||||
|
||||
impl From<RmpError> for SerializerError {
|
||||
fn from(error: RmpError) -> SerializerError {
|
||||
SerializerError::RmpError(error)
|
||||
}
|
||||
}
|
||||
|
||||
impl From<SerdeJsonError> for SerializerError {
|
||||
fn from(error: SerdeJsonError) -> SerializerError {
|
||||
SerializerError::SerdeJsonError(error)
|
||||
SerializerError::SerdeJson(error)
|
||||
}
|
||||
}
|
||||
|
||||
impl From<rocksdb::Error> for SerializerError {
|
||||
fn from(error: rocksdb::Error) -> SerializerError {
|
||||
SerializerError::RocksDbError(error)
|
||||
impl From<zlmdb::Error> for SerializerError {
|
||||
fn from(error: zlmdb::Error) -> SerializerError {
|
||||
SerializerError::Zlmdb(error)
|
||||
}
|
||||
}
|
||||
|
||||
impl From<ParseNumberError> for SerializerError {
|
||||
fn from(error: ParseNumberError) -> SerializerError {
|
||||
SerializerError::ParseNumberError(error)
|
||||
SerializerError::ParseNumber(error)
|
||||
}
|
||||
}
|
||||
|
||||
@ -129,3 +119,9 @@ impl RamDocumentStore {
|
||||
self.0
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for RamDocumentStore {
|
||||
fn default() -> Self {
|
||||
Self::new()
|
||||
}
|
||||
}
|
@ -1,14 +1,17 @@
|
||||
use meilidb_core::DocumentId;
|
||||
use meilidb_schema::Schema;
|
||||
use meilidb_schema::{Schema, SchemaAttr};
|
||||
use serde::ser;
|
||||
use std::collections::HashMap;
|
||||
|
||||
use crate::indexer::Indexer as RawIndexer;
|
||||
use crate::ranked_map::RankedMap;
|
||||
use super::{RamDocumentStore, SerializerError, ConvertToString, ConvertToNumber, Indexer};
|
||||
use crate::raw_indexer::RawIndexer;
|
||||
use crate::serde::RamDocumentStore;
|
||||
use crate::{DocumentId, RankedMap};
|
||||
|
||||
use super::{ConvertToNumber, ConvertToString, Indexer, SerializerError};
|
||||
|
||||
pub struct Serializer<'a> {
|
||||
pub schema: &'a Schema,
|
||||
pub document_store: &'a mut RamDocumentStore,
|
||||
pub document_fields_counts: &'a mut HashMap<(DocumentId, SchemaAttr), u64>,
|
||||
pub indexer: &'a mut RawIndexer,
|
||||
pub ranked_map: &'a mut RankedMap,
|
||||
pub document_id: DocumentId,
|
||||
@ -52,13 +55,18 @@ impl<'a> ser::Serializer for Serializer<'a> {
|
||||
}
|
||||
|
||||
fn serialize_none(self) -> Result<Self::Ok, Self::Error> {
|
||||
Err(SerializerError::UnserializableType { type_name: "Option" })
|
||||
Err(SerializerError::UnserializableType {
|
||||
type_name: "Option",
|
||||
})
|
||||
}
|
||||
|
||||
fn serialize_some<T: ?Sized>(self, _value: &T) -> Result<Self::Ok, Self::Error>
|
||||
where T: ser::Serialize,
|
||||
where
|
||||
T: ser::Serialize,
|
||||
{
|
||||
Err(SerializerError::UnserializableType { type_name: "Option" })
|
||||
Err(SerializerError::UnserializableType {
|
||||
type_name: "Option",
|
||||
})
|
||||
}
|
||||
|
||||
fn serialize_unit(self) -> Result<Self::Ok, Self::Error> {
|
||||
@ -66,25 +74,29 @@ impl<'a> ser::Serializer for Serializer<'a> {
|
||||
}
|
||||
|
||||
fn serialize_unit_struct(self, _name: &'static str) -> Result<Self::Ok, Self::Error> {
|
||||
Err(SerializerError::UnserializableType { type_name: "unit struct" })
|
||||
Err(SerializerError::UnserializableType {
|
||||
type_name: "unit struct",
|
||||
})
|
||||
}
|
||||
|
||||
fn serialize_unit_variant(
|
||||
self,
|
||||
_name: &'static str,
|
||||
_variant_index: u32,
|
||||
_variant: &'static str
|
||||
) -> Result<Self::Ok, Self::Error>
|
||||
{
|
||||
Err(SerializerError::UnserializableType { type_name: "unit variant" })
|
||||
_variant: &'static str,
|
||||
) -> Result<Self::Ok, Self::Error> {
|
||||
Err(SerializerError::UnserializableType {
|
||||
type_name: "unit variant",
|
||||
})
|
||||
}
|
||||
|
||||
fn serialize_newtype_struct<T: ?Sized>(
|
||||
self,
|
||||
_name: &'static str,
|
||||
value: &T
|
||||
value: &T,
|
||||
) -> Result<Self::Ok, Self::Error>
|
||||
where T: ser::Serialize,
|
||||
where
|
||||
T: ser::Serialize,
|
||||
{
|
||||
value.serialize(self)
|
||||
}
|
||||
@ -94,15 +106,20 @@ impl<'a> ser::Serializer for Serializer<'a> {
|
||||
_name: &'static str,
|
||||
_variant_index: u32,
|
||||
_variant: &'static str,
|
||||
_value: &T
|
||||
_value: &T,
|
||||
) -> Result<Self::Ok, Self::Error>
|
||||
where T: ser::Serialize,
|
||||
where
|
||||
T: ser::Serialize,
|
||||
{
|
||||
Err(SerializerError::UnserializableType { type_name: "newtype variant" })
|
||||
Err(SerializerError::UnserializableType {
|
||||
type_name: "newtype variant",
|
||||
})
|
||||
}
|
||||
|
||||
fn serialize_seq(self, _len: Option<usize>) -> Result<Self::SerializeSeq, Self::Error> {
|
||||
Err(SerializerError::UnserializableType { type_name: "sequence" })
|
||||
Err(SerializerError::UnserializableType {
|
||||
type_name: "sequence",
|
||||
})
|
||||
}
|
||||
|
||||
fn serialize_tuple(self, _len: usize) -> Result<Self::SerializeTuple, Self::Error> {
|
||||
@ -112,10 +129,11 @@ impl<'a> ser::Serializer for Serializer<'a> {
|
||||
fn serialize_tuple_struct(
|
||||
self,
|
||||
_name: &'static str,
|
||||
_len: usize
|
||||
) -> Result<Self::SerializeTupleStruct, Self::Error>
|
||||
{
|
||||
Err(SerializerError::UnserializableType { type_name: "tuple struct" })
|
||||
_len: usize,
|
||||
) -> Result<Self::SerializeTupleStruct, Self::Error> {
|
||||
Err(SerializerError::UnserializableType {
|
||||
type_name: "tuple struct",
|
||||
})
|
||||
}
|
||||
|
||||
fn serialize_tuple_variant(
|
||||
@ -123,10 +141,11 @@ impl<'a> ser::Serializer for Serializer<'a> {
|
||||
_name: &'static str,
|
||||
_variant_index: u32,
|
||||
_variant: &'static str,
|
||||
_len: usize
|
||||
) -> Result<Self::SerializeTupleVariant, Self::Error>
|
||||
{
|
||||
Err(SerializerError::UnserializableType { type_name: "tuple variant" })
|
||||
_len: usize,
|
||||
) -> Result<Self::SerializeTupleVariant, Self::Error> {
|
||||
Err(SerializerError::UnserializableType {
|
||||
type_name: "tuple variant",
|
||||
})
|
||||
}
|
||||
|
||||
fn serialize_map(self, _len: Option<usize>) -> Result<Self::SerializeMap, Self::Error> {
|
||||
@ -134,6 +153,7 @@ impl<'a> ser::Serializer for Serializer<'a> {
|
||||
schema: self.schema,
|
||||
document_id: self.document_id,
|
||||
document_store: self.document_store,
|
||||
document_fields_counts: self.document_fields_counts,
|
||||
indexer: self.indexer,
|
||||
ranked_map: self.ranked_map,
|
||||
current_key_name: None,
|
||||
@ -143,13 +163,13 @@ impl<'a> ser::Serializer for Serializer<'a> {
|
||||
fn serialize_struct(
|
||||
self,
|
||||
_name: &'static str,
|
||||
_len: usize
|
||||
) -> Result<Self::SerializeStruct, Self::Error>
|
||||
{
|
||||
_len: usize,
|
||||
) -> Result<Self::SerializeStruct, Self::Error> {
|
||||
Ok(StructSerializer {
|
||||
schema: self.schema,
|
||||
document_id: self.document_id,
|
||||
document_store: self.document_store,
|
||||
document_fields_counts: self.document_fields_counts,
|
||||
indexer: self.indexer,
|
||||
ranked_map: self.ranked_map,
|
||||
})
|
||||
@ -160,10 +180,11 @@ impl<'a> ser::Serializer for Serializer<'a> {
|
||||
_name: &'static str,
|
||||
_variant_index: u32,
|
||||
_variant: &'static str,
|
||||
_len: usize
|
||||
) -> Result<Self::SerializeStructVariant, Self::Error>
|
||||
{
|
||||
Err(SerializerError::UnserializableType { type_name: "struct variant" })
|
||||
_len: usize,
|
||||
) -> Result<Self::SerializeStructVariant, Self::Error> {
|
||||
Err(SerializerError::UnserializableType {
|
||||
type_name: "struct variant",
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
@ -171,6 +192,7 @@ pub struct MapSerializer<'a> {
|
||||
schema: &'a Schema,
|
||||
document_id: DocumentId,
|
||||
document_store: &'a mut RamDocumentStore,
|
||||
document_fields_counts: &'a mut HashMap<(DocumentId, SchemaAttr), u64>,
|
||||
indexer: &'a mut RawIndexer,
|
||||
ranked_map: &'a mut RankedMap,
|
||||
current_key_name: Option<String>,
|
||||
@ -181,7 +203,8 @@ impl<'a> ser::SerializeMap for MapSerializer<'a> {
|
||||
type Error = SerializerError;
|
||||
|
||||
fn serialize_key<T: ?Sized>(&mut self, key: &T) -> Result<(), Self::Error>
|
||||
where T: ser::Serialize,
|
||||
where
|
||||
T: ser::Serialize,
|
||||
{
|
||||
let key = key.serialize(ConvertToString)?;
|
||||
self.current_key_name = Some(key);
|
||||
@ -189,7 +212,8 @@ impl<'a> ser::SerializeMap for MapSerializer<'a> {
|
||||
}
|
||||
|
||||
fn serialize_value<T: ?Sized>(&mut self, value: &T) -> Result<(), Self::Error>
|
||||
where T: ser::Serialize,
|
||||
where
|
||||
T: ser::Serialize,
|
||||
{
|
||||
let key = self.current_key_name.take().unwrap();
|
||||
self.serialize_entry(&key, value)
|
||||
@ -200,7 +224,9 @@ impl<'a> ser::SerializeMap for MapSerializer<'a> {
|
||||
key: &K,
|
||||
value: &V,
|
||||
) -> Result<(), Self::Error>
|
||||
where K: ser::Serialize, V: ser::Serialize,
|
||||
where
|
||||
K: ser::Serialize,
|
||||
V: ser::Serialize,
|
||||
{
|
||||
let key = key.serialize(ConvertToString)?;
|
||||
|
||||
@ -208,6 +234,7 @@ impl<'a> ser::SerializeMap for MapSerializer<'a> {
|
||||
self.schema,
|
||||
self.document_id,
|
||||
self.document_store,
|
||||
self.document_fields_counts,
|
||||
self.indexer,
|
||||
self.ranked_map,
|
||||
&key,
|
||||
@ -224,6 +251,7 @@ pub struct StructSerializer<'a> {
|
||||
schema: &'a Schema,
|
||||
document_id: DocumentId,
|
||||
document_store: &'a mut RamDocumentStore,
|
||||
document_fields_counts: &'a mut HashMap<(DocumentId, SchemaAttr), u64>,
|
||||
indexer: &'a mut RawIndexer,
|
||||
ranked_map: &'a mut RankedMap,
|
||||
}
|
||||
@ -237,12 +265,14 @@ impl<'a> ser::SerializeStruct for StructSerializer<'a> {
|
||||
key: &'static str,
|
||||
value: &T,
|
||||
) -> Result<(), Self::Error>
|
||||
where T: ser::Serialize,
|
||||
where
|
||||
T: ser::Serialize,
|
||||
{
|
||||
serialize_value(
|
||||
self.schema,
|
||||
self.document_id,
|
||||
self.document_store,
|
||||
self.document_fields_counts,
|
||||
self.indexer,
|
||||
self.ranked_map,
|
||||
key,
|
||||
@ -259,22 +289,30 @@ fn serialize_value<T: ?Sized>(
|
||||
schema: &Schema,
|
||||
document_id: DocumentId,
|
||||
document_store: &mut RamDocumentStore,
|
||||
documents_fields_counts: &mut HashMap<(DocumentId, SchemaAttr), u64>,
|
||||
indexer: &mut RawIndexer,
|
||||
ranked_map: &mut RankedMap,
|
||||
key: &str,
|
||||
value: &T,
|
||||
) -> Result<(), SerializerError>
|
||||
where T: ser::Serialize,
|
||||
where
|
||||
T: ser::Serialize,
|
||||
{
|
||||
if let Some(attribute) = schema.attribute(key) {
|
||||
let props = schema.props(attribute);
|
||||
|
||||
let serialized = rmp_serde::to_vec_named(value)?;
|
||||
let serialized = serde_json::to_vec(value)?;
|
||||
document_store.set_document_field(document_id, attribute, serialized);
|
||||
|
||||
if props.is_indexed() {
|
||||
let indexer = Indexer { attribute, indexer, document_id };
|
||||
value.serialize(indexer)?;
|
||||
let indexer = Indexer {
|
||||
attribute,
|
||||
indexer,
|
||||
document_id,
|
||||
};
|
||||
if let Some(number_of_words) = value.serialize(indexer)? {
|
||||
documents_fields_counts.insert((document_id, attribute), number_of_words as u64);
|
||||
}
|
||||
}
|
||||
|
||||
if props.is_ranked() {
|
@ -1,34 +0,0 @@
|
||||
use std::error::Error;
|
||||
use fst::Set;
|
||||
use sdset::SetBuf;
|
||||
use crate::DocIndex;
|
||||
|
||||
pub trait Store {
|
||||
type Error: Error;
|
||||
|
||||
fn words(&self) -> Result<&Set, Self::Error>;
|
||||
fn word_indexes(&self, word: &[u8]) -> Result<Option<SetBuf<DocIndex>>, Self::Error>;
|
||||
|
||||
fn synonyms(&self) -> Result<&Set, Self::Error>;
|
||||
fn alternatives_to(&self, word: &[u8]) -> Result<Option<Set>, Self::Error>;
|
||||
}
|
||||
|
||||
impl<T> Store for &'_ T where T: Store {
|
||||
type Error = T::Error;
|
||||
|
||||
fn words(&self) -> Result<&Set, Self::Error> {
|
||||
(*self).words()
|
||||
}
|
||||
|
||||
fn word_indexes(&self, word: &[u8]) -> Result<Option<SetBuf<DocIndex>>, Self::Error> {
|
||||
(*self).word_indexes(word)
|
||||
}
|
||||
|
||||
fn synonyms(&self) -> Result<&Set, Self::Error> {
|
||||
(*self).synonyms()
|
||||
}
|
||||
|
||||
fn alternatives_to(&self, word: &[u8]) -> Result<Option<Set>, Self::Error> {
|
||||
(*self).alternatives_to(word)
|
||||
}
|
||||
}
|
49
meilidb-core/src/store/docs_words.rs
Normal file
49
meilidb-core/src/store/docs_words.rs
Normal file
@ -0,0 +1,49 @@
|
||||
use super::BEU64;
|
||||
use crate::DocumentId;
|
||||
use std::sync::Arc;
|
||||
use zlmdb::types::{ByteSlice, OwnedType};
|
||||
use zlmdb::Result as ZResult;
|
||||
|
||||
#[derive(Copy, Clone)]
|
||||
pub struct DocsWords {
|
||||
pub(crate) docs_words: zlmdb::Database<OwnedType<BEU64>, ByteSlice>,
|
||||
}
|
||||
|
||||
impl DocsWords {
|
||||
pub fn put_doc_words(
|
||||
self,
|
||||
writer: &mut zlmdb::RwTxn,
|
||||
document_id: DocumentId,
|
||||
words: &fst::Set,
|
||||
) -> ZResult<()> {
|
||||
let document_id = BEU64::new(document_id.0);
|
||||
let bytes = words.as_fst().as_bytes();
|
||||
self.docs_words.put(writer, &document_id, bytes)
|
||||
}
|
||||
|
||||
pub fn del_doc_words(
|
||||
self,
|
||||
writer: &mut zlmdb::RwTxn,
|
||||
document_id: DocumentId,
|
||||
) -> ZResult<bool> {
|
||||
let document_id = BEU64::new(document_id.0);
|
||||
self.docs_words.delete(writer, &document_id)
|
||||
}
|
||||
|
||||
pub fn doc_words(
|
||||
self,
|
||||
reader: &zlmdb::RoTxn,
|
||||
document_id: DocumentId,
|
||||
) -> ZResult<Option<fst::Set>> {
|
||||
let document_id = BEU64::new(document_id.0);
|
||||
match self.docs_words.get(reader, &document_id)? {
|
||||
Some(bytes) => {
|
||||
let len = bytes.len();
|
||||
let bytes = Arc::from(bytes);
|
||||
let fst = fst::raw::Fst::from_shared_bytes(bytes, 0, len).unwrap();
|
||||
Ok(Some(fst::Set::from(fst)))
|
||||
}
|
||||
None => Ok(None),
|
||||
}
|
||||
}
|
||||
}
|
74
meilidb-core/src/store/documents_fields.rs
Normal file
74
meilidb-core/src/store/documents_fields.rs
Normal file
@ -0,0 +1,74 @@
|
||||
use meilidb_schema::SchemaAttr;
|
||||
use zlmdb::types::{ByteSlice, OwnedType};
|
||||
use zlmdb::Result as ZResult;
|
||||
|
||||
use super::DocumentAttrKey;
|
||||
use crate::DocumentId;
|
||||
|
||||
#[derive(Copy, Clone)]
|
||||
pub struct DocumentsFields {
|
||||
pub(crate) documents_fields: zlmdb::Database<OwnedType<DocumentAttrKey>, ByteSlice>,
|
||||
}
|
||||
|
||||
impl DocumentsFields {
|
||||
pub fn put_document_field(
|
||||
self,
|
||||
writer: &mut zlmdb::RwTxn,
|
||||
document_id: DocumentId,
|
||||
attribute: SchemaAttr,
|
||||
value: &[u8],
|
||||
) -> ZResult<()> {
|
||||
let key = DocumentAttrKey::new(document_id, attribute);
|
||||
self.documents_fields.put(writer, &key, value)
|
||||
}
|
||||
|
||||
pub fn del_all_document_fields(
|
||||
self,
|
||||
writer: &mut zlmdb::RwTxn,
|
||||
document_id: DocumentId,
|
||||
) -> ZResult<usize> {
|
||||
let start = DocumentAttrKey::new(document_id, SchemaAttr::min());
|
||||
let end = DocumentAttrKey::new(document_id, SchemaAttr::max());
|
||||
self.documents_fields.delete_range(writer, start..=end)
|
||||
}
|
||||
|
||||
pub fn document_attribute<'txn>(
|
||||
self,
|
||||
reader: &'txn zlmdb::RoTxn,
|
||||
document_id: DocumentId,
|
||||
attribute: SchemaAttr,
|
||||
) -> ZResult<Option<&'txn [u8]>> {
|
||||
let key = DocumentAttrKey::new(document_id, attribute);
|
||||
self.documents_fields.get(reader, &key)
|
||||
}
|
||||
|
||||
pub fn document_fields<'txn>(
|
||||
self,
|
||||
reader: &'txn zlmdb::RoTxn,
|
||||
document_id: DocumentId,
|
||||
) -> ZResult<DocumentFieldsIter<'txn>> {
|
||||
let start = DocumentAttrKey::new(document_id, SchemaAttr::min());
|
||||
let end = DocumentAttrKey::new(document_id, SchemaAttr::max());
|
||||
let iter = self.documents_fields.range(reader, start..=end)?;
|
||||
Ok(DocumentFieldsIter { iter })
|
||||
}
|
||||
}
|
||||
|
||||
pub struct DocumentFieldsIter<'txn> {
|
||||
iter: zlmdb::RoRange<'txn, OwnedType<DocumentAttrKey>, ByteSlice>,
|
||||
}
|
||||
|
||||
impl<'txn> Iterator for DocumentFieldsIter<'txn> {
|
||||
type Item = ZResult<(SchemaAttr, &'txn [u8])>;
|
||||
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
match self.iter.next() {
|
||||
Some(Ok((key, bytes))) => {
|
||||
let attr = SchemaAttr(key.attr.get());
|
||||
Some(Ok((attr, bytes)))
|
||||
}
|
||||
Some(Err(e)) => Some(Err(e)),
|
||||
None => None,
|
||||
}
|
||||
}
|
||||
}
|
141
meilidb-core/src/store/documents_fields_counts.rs
Normal file
141
meilidb-core/src/store/documents_fields_counts.rs
Normal file
@ -0,0 +1,141 @@
|
||||
use super::DocumentAttrKey;
|
||||
use crate::DocumentId;
|
||||
use meilidb_schema::SchemaAttr;
|
||||
use zlmdb::types::OwnedType;
|
||||
use zlmdb::Result as ZResult;
|
||||
|
||||
#[derive(Copy, Clone)]
|
||||
pub struct DocumentsFieldsCounts {
|
||||
pub(crate) documents_fields_counts: zlmdb::Database<OwnedType<DocumentAttrKey>, OwnedType<u64>>,
|
||||
}
|
||||
|
||||
impl DocumentsFieldsCounts {
|
||||
pub fn put_document_field_count(
|
||||
self,
|
||||
writer: &mut zlmdb::RwTxn,
|
||||
document_id: DocumentId,
|
||||
attribute: SchemaAttr,
|
||||
value: u64,
|
||||
) -> ZResult<()> {
|
||||
let key = DocumentAttrKey::new(document_id, attribute);
|
||||
self.documents_fields_counts.put(writer, &key, &value)
|
||||
}
|
||||
|
||||
pub fn del_all_document_fields_counts(
|
||||
self,
|
||||
writer: &mut zlmdb::RwTxn,
|
||||
document_id: DocumentId,
|
||||
) -> ZResult<usize> {
|
||||
let start = DocumentAttrKey::new(document_id, SchemaAttr::min());
|
||||
let end = DocumentAttrKey::new(document_id, SchemaAttr::max());
|
||||
self.documents_fields_counts
|
||||
.delete_range(writer, start..=end)
|
||||
}
|
||||
|
||||
pub fn document_field_count(
|
||||
self,
|
||||
reader: &zlmdb::RoTxn,
|
||||
document_id: DocumentId,
|
||||
attribute: SchemaAttr,
|
||||
) -> ZResult<Option<u64>> {
|
||||
let key = DocumentAttrKey::new(document_id, attribute);
|
||||
match self.documents_fields_counts.get(reader, &key)? {
|
||||
Some(count) => Ok(Some(count)),
|
||||
None => Ok(None),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn document_fields_counts<'txn>(
|
||||
self,
|
||||
reader: &'txn zlmdb::RoTxn,
|
||||
document_id: DocumentId,
|
||||
) -> ZResult<DocumentFieldsCountsIter<'txn>> {
|
||||
let start = DocumentAttrKey::new(document_id, SchemaAttr::min());
|
||||
let end = DocumentAttrKey::new(document_id, SchemaAttr::max());
|
||||
let iter = self.documents_fields_counts.range(reader, start..=end)?;
|
||||
Ok(DocumentFieldsCountsIter { iter })
|
||||
}
|
||||
|
||||
pub fn documents_ids<'txn>(
|
||||
self,
|
||||
reader: &'txn zlmdb::RoTxn,
|
||||
) -> ZResult<DocumentsIdsIter<'txn>> {
|
||||
let iter = self.documents_fields_counts.iter(reader)?;
|
||||
Ok(DocumentsIdsIter {
|
||||
last_seen_id: None,
|
||||
iter,
|
||||
})
|
||||
}
|
||||
|
||||
pub fn all_documents_fields_counts<'txn>(
|
||||
self,
|
||||
reader: &'txn zlmdb::RoTxn,
|
||||
) -> ZResult<AllDocumentsFieldsCountsIter<'txn>> {
|
||||
let iter = self.documents_fields_counts.iter(reader)?;
|
||||
Ok(AllDocumentsFieldsCountsIter { iter })
|
||||
}
|
||||
}
|
||||
|
||||
pub struct DocumentFieldsCountsIter<'txn> {
|
||||
iter: zlmdb::RoRange<'txn, OwnedType<DocumentAttrKey>, OwnedType<u64>>,
|
||||
}
|
||||
|
||||
impl Iterator for DocumentFieldsCountsIter<'_> {
|
||||
type Item = ZResult<(SchemaAttr, u64)>;
|
||||
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
match self.iter.next() {
|
||||
Some(Ok((key, count))) => {
|
||||
let attr = SchemaAttr(key.attr.get());
|
||||
Some(Ok((attr, count)))
|
||||
}
|
||||
Some(Err(e)) => Some(Err(e)),
|
||||
None => None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub struct DocumentsIdsIter<'txn> {
|
||||
last_seen_id: Option<DocumentId>,
|
||||
iter: zlmdb::RoIter<'txn, OwnedType<DocumentAttrKey>, OwnedType<u64>>,
|
||||
}
|
||||
|
||||
impl Iterator for DocumentsIdsIter<'_> {
|
||||
type Item = ZResult<DocumentId>;
|
||||
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
for result in &mut self.iter {
|
||||
match result {
|
||||
Ok((key, _)) => {
|
||||
let document_id = DocumentId(key.docid.get());
|
||||
if Some(document_id) != self.last_seen_id {
|
||||
self.last_seen_id = Some(document_id);
|
||||
return Some(Ok(document_id));
|
||||
}
|
||||
}
|
||||
Err(e) => return Some(Err(e)),
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
pub struct AllDocumentsFieldsCountsIter<'txn> {
|
||||
iter: zlmdb::RoIter<'txn, OwnedType<DocumentAttrKey>, OwnedType<u64>>,
|
||||
}
|
||||
|
||||
impl<'r> Iterator for AllDocumentsFieldsCountsIter<'r> {
|
||||
type Item = ZResult<(DocumentId, SchemaAttr, u64)>;
|
||||
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
match self.iter.next() {
|
||||
Some(Ok((key, count))) => {
|
||||
let docid = DocumentId(key.docid.get());
|
||||
let attr = SchemaAttr(key.attr.get());
|
||||
Some(Ok((docid, attr, count)))
|
||||
}
|
||||
Some(Err(e)) => Some(Err(e)),
|
||||
None => None,
|
||||
}
|
||||
}
|
||||
}
|
101
meilidb-core/src/store/main.rs
Normal file
101
meilidb-core/src/store/main.rs
Normal file
@ -0,0 +1,101 @@
|
||||
use crate::RankedMap;
|
||||
use meilidb_schema::Schema;
|
||||
use std::sync::Arc;
|
||||
use zlmdb::types::{ByteSlice, OwnedType, Serde, Str};
|
||||
use zlmdb::Result as ZResult;
|
||||
|
||||
const CUSTOMS_KEY: &str = "customs-key";
|
||||
const NUMBER_OF_DOCUMENTS_KEY: &str = "number-of-documents";
|
||||
const RANKED_MAP_KEY: &str = "ranked-map";
|
||||
const SCHEMA_KEY: &str = "schema";
|
||||
const SYNONYMS_KEY: &str = "synonyms";
|
||||
const WORDS_KEY: &str = "words";
|
||||
|
||||
#[derive(Copy, Clone)]
|
||||
pub struct Main {
|
||||
pub(crate) main: zlmdb::DynDatabase,
|
||||
}
|
||||
|
||||
impl Main {
|
||||
pub fn put_words_fst(self, writer: &mut zlmdb::RwTxn, fst: &fst::Set) -> ZResult<()> {
|
||||
let bytes = fst.as_fst().as_bytes();
|
||||
self.main.put::<Str, ByteSlice>(writer, WORDS_KEY, bytes)
|
||||
}
|
||||
|
||||
pub fn words_fst(self, reader: &zlmdb::RoTxn) -> ZResult<Option<fst::Set>> {
|
||||
match self.main.get::<Str, ByteSlice>(reader, WORDS_KEY)? {
|
||||
Some(bytes) => {
|
||||
let len = bytes.len();
|
||||
let bytes = Arc::from(bytes);
|
||||
let fst = fst::raw::Fst::from_shared_bytes(bytes, 0, len).unwrap();
|
||||
Ok(Some(fst::Set::from(fst)))
|
||||
}
|
||||
None => Ok(None),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn put_schema(self, writer: &mut zlmdb::RwTxn, schema: &Schema) -> ZResult<()> {
|
||||
self.main
|
||||
.put::<Str, Serde<Schema>>(writer, SCHEMA_KEY, schema)
|
||||
}
|
||||
|
||||
pub fn schema(self, reader: &zlmdb::RoTxn) -> ZResult<Option<Schema>> {
|
||||
self.main.get::<Str, Serde<Schema>>(reader, SCHEMA_KEY)
|
||||
}
|
||||
|
||||
pub fn put_ranked_map(self, writer: &mut zlmdb::RwTxn, ranked_map: &RankedMap) -> ZResult<()> {
|
||||
self.main
|
||||
.put::<Str, Serde<RankedMap>>(writer, RANKED_MAP_KEY, &ranked_map)
|
||||
}
|
||||
|
||||
pub fn ranked_map(self, reader: &zlmdb::RoTxn) -> ZResult<Option<RankedMap>> {
|
||||
self.main
|
||||
.get::<Str, Serde<RankedMap>>(reader, RANKED_MAP_KEY)
|
||||
}
|
||||
|
||||
pub fn put_synonyms_fst(self, writer: &mut zlmdb::RwTxn, fst: &fst::Set) -> ZResult<()> {
|
||||
let bytes = fst.as_fst().as_bytes();
|
||||
self.main.put::<Str, ByteSlice>(writer, SYNONYMS_KEY, bytes)
|
||||
}
|
||||
|
||||
pub fn synonyms_fst(self, reader: &zlmdb::RoTxn) -> ZResult<Option<fst::Set>> {
|
||||
match self.main.get::<Str, ByteSlice>(reader, SYNONYMS_KEY)? {
|
||||
Some(bytes) => {
|
||||
let len = bytes.len();
|
||||
let bytes = Arc::from(bytes);
|
||||
let fst = fst::raw::Fst::from_shared_bytes(bytes, 0, len).unwrap();
|
||||
Ok(Some(fst::Set::from(fst)))
|
||||
}
|
||||
None => Ok(None),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn put_number_of_documents<F>(self, writer: &mut zlmdb::RwTxn, f: F) -> ZResult<u64>
|
||||
where
|
||||
F: Fn(u64) -> u64,
|
||||
{
|
||||
let new = self.number_of_documents(writer).map(f)?;
|
||||
self.main
|
||||
.put::<Str, OwnedType<u64>>(writer, NUMBER_OF_DOCUMENTS_KEY, &new)?;
|
||||
Ok(new)
|
||||
}
|
||||
|
||||
pub fn number_of_documents(self, reader: &zlmdb::RoTxn) -> ZResult<u64> {
|
||||
match self
|
||||
.main
|
||||
.get::<Str, OwnedType<u64>>(reader, NUMBER_OF_DOCUMENTS_KEY)?
|
||||
{
|
||||
Some(value) => Ok(value),
|
||||
None => Ok(0),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn put_customs(self, writer: &mut zlmdb::RwTxn, customs: &[u8]) -> ZResult<()> {
|
||||
self.main
|
||||
.put::<Str, ByteSlice>(writer, CUSTOMS_KEY, customs)
|
||||
}
|
||||
|
||||
pub fn customs<'txn>(self, reader: &'txn zlmdb::RoTxn) -> ZResult<Option<&'txn [u8]>> {
|
||||
self.main.get::<Str, ByteSlice>(reader, CUSTOMS_KEY)
|
||||
}
|
||||
}
|
325
meilidb-core/src/store/mod.rs
Normal file
325
meilidb-core/src/store/mod.rs
Normal file
@ -0,0 +1,325 @@
|
||||
mod docs_words;
|
||||
mod documents_fields;
|
||||
mod documents_fields_counts;
|
||||
mod main;
|
||||
mod postings_lists;
|
||||
mod synonyms;
|
||||
mod updates;
|
||||
mod updates_results;
|
||||
|
||||
pub use self::docs_words::DocsWords;
|
||||
pub use self::documents_fields::{DocumentFieldsIter, DocumentsFields};
|
||||
pub use self::documents_fields_counts::{
|
||||
DocumentFieldsCountsIter, DocumentsFieldsCounts, DocumentsIdsIter,
|
||||
};
|
||||
pub use self::main::Main;
|
||||
pub use self::postings_lists::PostingsLists;
|
||||
pub use self::synonyms::Synonyms;
|
||||
pub use self::updates::Updates;
|
||||
pub use self::updates_results::UpdatesResults;
|
||||
|
||||
use std::collections::HashSet;
|
||||
|
||||
use meilidb_schema::{Schema, SchemaAttr};
|
||||
use serde::de;
|
||||
use zerocopy::{AsBytes, FromBytes};
|
||||
use zlmdb::Result as ZResult;
|
||||
|
||||
use crate::criterion::Criteria;
|
||||
use crate::serde::Deserializer;
|
||||
use crate::{query_builder::QueryBuilder, update, DocumentId, Error, MResult};
|
||||
|
||||
type BEU64 = zerocopy::U64<byteorder::BigEndian>;
|
||||
type BEU16 = zerocopy::U16<byteorder::BigEndian>;
|
||||
|
||||
#[derive(Debug, Copy, Clone, AsBytes, FromBytes)]
|
||||
#[repr(C)]
|
||||
pub struct DocumentAttrKey {
|
||||
docid: BEU64,
|
||||
attr: BEU16,
|
||||
}
|
||||
|
||||
impl DocumentAttrKey {
|
||||
fn new(docid: DocumentId, attr: SchemaAttr) -> DocumentAttrKey {
|
||||
DocumentAttrKey {
|
||||
docid: BEU64::new(docid.0),
|
||||
attr: BEU16::new(attr.0),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn main_name(name: &str) -> String {
|
||||
format!("store-{}", name)
|
||||
}
|
||||
|
||||
fn postings_lists_name(name: &str) -> String {
|
||||
format!("store-{}-postings-lists", name)
|
||||
}
|
||||
|
||||
fn documents_fields_name(name: &str) -> String {
|
||||
format!("store-{}-documents-fields", name)
|
||||
}
|
||||
|
||||
fn documents_fields_counts_name(name: &str) -> String {
|
||||
format!("store-{}-documents-fields-counts", name)
|
||||
}
|
||||
|
||||
fn synonyms_name(name: &str) -> String {
|
||||
format!("store-{}-synonyms", name)
|
||||
}
|
||||
|
||||
fn docs_words_name(name: &str) -> String {
|
||||
format!("store-{}-docs-words", name)
|
||||
}
|
||||
|
||||
fn updates_name(name: &str) -> String {
|
||||
format!("store-{}-updates", name)
|
||||
}
|
||||
|
||||
fn updates_results_name(name: &str) -> String {
|
||||
format!("store-{}-updates-results", name)
|
||||
}
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct Index {
|
||||
pub main: Main,
|
||||
pub postings_lists: PostingsLists,
|
||||
pub documents_fields: DocumentsFields,
|
||||
pub documents_fields_counts: DocumentsFieldsCounts,
|
||||
pub synonyms: Synonyms,
|
||||
pub docs_words: DocsWords,
|
||||
|
||||
pub updates: Updates,
|
||||
pub updates_results: UpdatesResults,
|
||||
updates_notifier: crossbeam_channel::Sender<()>,
|
||||
}
|
||||
|
||||
impl Index {
|
||||
pub fn document<T: de::DeserializeOwned>(
|
||||
&self,
|
||||
reader: &zlmdb::RoTxn,
|
||||
attributes: Option<&HashSet<&str>>,
|
||||
document_id: DocumentId,
|
||||
) -> MResult<Option<T>> {
|
||||
let schema = self.main.schema(reader)?;
|
||||
let schema = schema.ok_or(Error::SchemaMissing)?;
|
||||
|
||||
let attributes = match attributes {
|
||||
Some(attributes) => attributes
|
||||
.iter()
|
||||
.map(|name| schema.attribute(name))
|
||||
.collect(),
|
||||
None => None,
|
||||
};
|
||||
|
||||
let mut deserializer = Deserializer {
|
||||
document_id,
|
||||
reader,
|
||||
documents_fields: self.documents_fields,
|
||||
schema: &schema,
|
||||
attributes: attributes.as_ref(),
|
||||
};
|
||||
|
||||
// TODO: currently we return an error if all document fields are missing,
|
||||
// returning None would have been better
|
||||
Ok(T::deserialize(&mut deserializer).map(Some)?)
|
||||
}
|
||||
|
||||
pub fn document_attribute<T: de::DeserializeOwned>(
|
||||
&self,
|
||||
reader: &zlmdb::RoTxn,
|
||||
document_id: DocumentId,
|
||||
attribute: SchemaAttr,
|
||||
) -> MResult<Option<T>> {
|
||||
let bytes = self
|
||||
.documents_fields
|
||||
.document_attribute(reader, document_id, attribute)?;
|
||||
match bytes {
|
||||
Some(bytes) => Ok(Some(serde_json::from_slice(bytes)?)),
|
||||
None => Ok(None),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn schema_update(&self, writer: &mut zlmdb::RwTxn, schema: Schema) -> MResult<u64> {
|
||||
let _ = self.updates_notifier.send(());
|
||||
update::push_schema_update(writer, self.updates, self.updates_results, schema)
|
||||
}
|
||||
|
||||
pub fn customs_update(&self, writer: &mut zlmdb::RwTxn, customs: Vec<u8>) -> ZResult<u64> {
|
||||
let _ = self.updates_notifier.send(());
|
||||
update::push_customs_update(writer, self.updates, self.updates_results, customs)
|
||||
}
|
||||
|
||||
pub fn documents_addition<D>(&self) -> update::DocumentsAddition<D> {
|
||||
update::DocumentsAddition::new(
|
||||
self.updates,
|
||||
self.updates_results,
|
||||
self.updates_notifier.clone(),
|
||||
)
|
||||
}
|
||||
|
||||
pub fn documents_deletion(&self) -> update::DocumentsDeletion {
|
||||
update::DocumentsDeletion::new(
|
||||
self.updates,
|
||||
self.updates_results,
|
||||
self.updates_notifier.clone(),
|
||||
)
|
||||
}
|
||||
|
||||
pub fn synonyms_addition(&self) -> update::SynonymsAddition {
|
||||
update::SynonymsAddition::new(
|
||||
self.updates,
|
||||
self.updates_results,
|
||||
self.updates_notifier.clone(),
|
||||
)
|
||||
}
|
||||
|
||||
pub fn synonyms_deletion(&self) -> update::SynonymsDeletion {
|
||||
update::SynonymsDeletion::new(
|
||||
self.updates,
|
||||
self.updates_results,
|
||||
self.updates_notifier.clone(),
|
||||
)
|
||||
}
|
||||
|
||||
pub fn current_update_id(&self, reader: &zlmdb::RoTxn) -> MResult<Option<u64>> {
|
||||
match self.updates.last_update_id(reader)? {
|
||||
Some((id, _)) => Ok(Some(id)),
|
||||
None => Ok(None),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn update_status(
|
||||
&self,
|
||||
reader: &zlmdb::RoTxn,
|
||||
update_id: u64,
|
||||
) -> MResult<update::UpdateStatus> {
|
||||
update::update_status(reader, self.updates, self.updates_results, update_id)
|
||||
}
|
||||
|
||||
pub fn query_builder(&self) -> QueryBuilder {
|
||||
QueryBuilder::new(
|
||||
self.main,
|
||||
self.postings_lists,
|
||||
self.documents_fields_counts,
|
||||
self.synonyms,
|
||||
)
|
||||
}
|
||||
|
||||
pub fn query_builder_with_criteria<'c, 'f, 'd>(
|
||||
&self,
|
||||
criteria: Criteria<'c>,
|
||||
) -> QueryBuilder<'c, 'f, 'd> {
|
||||
QueryBuilder::with_criteria(
|
||||
self.main,
|
||||
self.postings_lists,
|
||||
self.documents_fields_counts,
|
||||
self.synonyms,
|
||||
criteria,
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
pub fn create(
|
||||
env: &zlmdb::Env,
|
||||
name: &str,
|
||||
updates_notifier: crossbeam_channel::Sender<()>,
|
||||
) -> MResult<Index> {
|
||||
// create all the store names
|
||||
let main_name = main_name(name);
|
||||
let postings_lists_name = postings_lists_name(name);
|
||||
let documents_fields_name = documents_fields_name(name);
|
||||
let documents_fields_counts_name = documents_fields_counts_name(name);
|
||||
let synonyms_name = synonyms_name(name);
|
||||
let docs_words_name = docs_words_name(name);
|
||||
let updates_name = updates_name(name);
|
||||
let updates_results_name = updates_results_name(name);
|
||||
|
||||
// open all the stores
|
||||
let main = env.create_dyn_database(Some(&main_name))?;
|
||||
let postings_lists = env.create_database(Some(&postings_lists_name))?;
|
||||
let documents_fields = env.create_database(Some(&documents_fields_name))?;
|
||||
let documents_fields_counts = env.create_database(Some(&documents_fields_counts_name))?;
|
||||
let synonyms = env.create_database(Some(&synonyms_name))?;
|
||||
let docs_words = env.create_database(Some(&docs_words_name))?;
|
||||
let updates = env.create_database(Some(&updates_name))?;
|
||||
let updates_results = env.create_database(Some(&updates_results_name))?;
|
||||
|
||||
Ok(Index {
|
||||
main: Main { main },
|
||||
postings_lists: PostingsLists { postings_lists },
|
||||
documents_fields: DocumentsFields { documents_fields },
|
||||
documents_fields_counts: DocumentsFieldsCounts {
|
||||
documents_fields_counts,
|
||||
},
|
||||
synonyms: Synonyms { synonyms },
|
||||
docs_words: DocsWords { docs_words },
|
||||
updates: Updates { updates },
|
||||
updates_results: UpdatesResults { updates_results },
|
||||
updates_notifier,
|
||||
})
|
||||
}
|
||||
|
||||
pub fn open(
|
||||
env: &zlmdb::Env,
|
||||
name: &str,
|
||||
updates_notifier: crossbeam_channel::Sender<()>,
|
||||
) -> MResult<Option<Index>> {
|
||||
// create all the store names
|
||||
let main_name = main_name(name);
|
||||
let postings_lists_name = postings_lists_name(name);
|
||||
let documents_fields_name = documents_fields_name(name);
|
||||
let documents_fields_counts_name = documents_fields_counts_name(name);
|
||||
let synonyms_name = synonyms_name(name);
|
||||
let docs_words_name = docs_words_name(name);
|
||||
let updates_name = updates_name(name);
|
||||
let updates_results_name = updates_results_name(name);
|
||||
|
||||
// open all the stores
|
||||
let main = match env.open_dyn_database(Some(&main_name))? {
|
||||
Some(main) => main,
|
||||
None => return Ok(None),
|
||||
};
|
||||
let postings_lists = match env.open_database(Some(&postings_lists_name))? {
|
||||
Some(postings_lists) => postings_lists,
|
||||
None => return Ok(None),
|
||||
};
|
||||
let documents_fields = match env.open_database(Some(&documents_fields_name))? {
|
||||
Some(documents_fields) => documents_fields,
|
||||
None => return Ok(None),
|
||||
};
|
||||
let documents_fields_counts = match env.open_database(Some(&documents_fields_counts_name))? {
|
||||
Some(documents_fields_counts) => documents_fields_counts,
|
||||
None => return Ok(None),
|
||||
};
|
||||
let synonyms = match env.open_database(Some(&synonyms_name))? {
|
||||
Some(synonyms) => synonyms,
|
||||
None => return Ok(None),
|
||||
};
|
||||
let docs_words = match env.open_database(Some(&docs_words_name))? {
|
||||
Some(docs_words) => docs_words,
|
||||
None => return Ok(None),
|
||||
};
|
||||
let updates = match env.open_database(Some(&updates_name))? {
|
||||
Some(updates) => updates,
|
||||
None => return Ok(None),
|
||||
};
|
||||
let updates_results = match env.open_database(Some(&updates_results_name))? {
|
||||
Some(updates_results) => updates_results,
|
||||
None => return Ok(None),
|
||||
};
|
||||
|
||||
Ok(Some(Index {
|
||||
main: Main { main },
|
||||
postings_lists: PostingsLists { postings_lists },
|
||||
documents_fields: DocumentsFields { documents_fields },
|
||||
documents_fields_counts: DocumentsFieldsCounts {
|
||||
documents_fields_counts,
|
||||
},
|
||||
synonyms: Synonyms { synonyms },
|
||||
docs_words: DocsWords { docs_words },
|
||||
updates: Updates { updates },
|
||||
updates_results: UpdatesResults { updates_results },
|
||||
updates_notifier,
|
||||
}))
|
||||
}
|
37
meilidb-core/src/store/postings_lists.rs
Normal file
37
meilidb-core/src/store/postings_lists.rs
Normal file
@ -0,0 +1,37 @@
|
||||
use crate::DocIndex;
|
||||
use sdset::{Set, SetBuf};
|
||||
use std::borrow::Cow;
|
||||
use zlmdb::types::{ByteSlice, CowSlice};
|
||||
use zlmdb::Result as ZResult;
|
||||
|
||||
#[derive(Copy, Clone)]
|
||||
pub struct PostingsLists {
|
||||
pub(crate) postings_lists: zlmdb::Database<ByteSlice, CowSlice<DocIndex>>,
|
||||
}
|
||||
|
||||
impl PostingsLists {
|
||||
pub fn put_postings_list(
|
||||
self,
|
||||
writer: &mut zlmdb::RwTxn,
|
||||
word: &[u8],
|
||||
words_indexes: &Set<DocIndex>,
|
||||
) -> ZResult<()> {
|
||||
self.postings_lists.put(writer, word, words_indexes)
|
||||
}
|
||||
|
||||
pub fn del_postings_list(self, writer: &mut zlmdb::RwTxn, word: &[u8]) -> ZResult<bool> {
|
||||
self.postings_lists.delete(writer, word)
|
||||
}
|
||||
|
||||
pub fn postings_list<'txn>(
|
||||
self,
|
||||
reader: &'txn zlmdb::RoTxn,
|
||||
word: &[u8],
|
||||
) -> ZResult<Option<Cow<'txn, Set<DocIndex>>>> {
|
||||
match self.postings_lists.get(reader, word)? {
|
||||
Some(Cow::Borrowed(slice)) => Ok(Some(Cow::Borrowed(Set::new_unchecked(slice)))),
|
||||
Some(Cow::Owned(vec)) => Ok(Some(Cow::Owned(SetBuf::new_unchecked(vec)))),
|
||||
None => Ok(None),
|
||||
}
|
||||
}
|
||||
}
|
36
meilidb-core/src/store/synonyms.rs
Normal file
36
meilidb-core/src/store/synonyms.rs
Normal file
@ -0,0 +1,36 @@
|
||||
use std::sync::Arc;
|
||||
use zlmdb::types::ByteSlice;
|
||||
use zlmdb::Result as ZResult;
|
||||
|
||||
#[derive(Copy, Clone)]
|
||||
pub struct Synonyms {
|
||||
pub(crate) synonyms: zlmdb::Database<ByteSlice, ByteSlice>,
|
||||
}
|
||||
|
||||
impl Synonyms {
|
||||
pub fn put_synonyms(
|
||||
self,
|
||||
writer: &mut zlmdb::RwTxn,
|
||||
word: &[u8],
|
||||
synonyms: &fst::Set,
|
||||
) -> ZResult<()> {
|
||||
let bytes = synonyms.as_fst().as_bytes();
|
||||
self.synonyms.put(writer, word, bytes)
|
||||
}
|
||||
|
||||
pub fn del_synonyms(self, writer: &mut zlmdb::RwTxn, word: &[u8]) -> ZResult<bool> {
|
||||
self.synonyms.delete(writer, word)
|
||||
}
|
||||
|
||||
pub fn synonyms(self, reader: &zlmdb::RoTxn, word: &[u8]) -> ZResult<Option<fst::Set>> {
|
||||
match self.synonyms.get(reader, word)? {
|
||||
Some(bytes) => {
|
||||
let len = bytes.len();
|
||||
let bytes = Arc::from(bytes);
|
||||
let fst = fst::raw::Fst::from_shared_bytes(bytes, 0, len).unwrap();
|
||||
Ok(Some(fst::Set::from(fst)))
|
||||
}
|
||||
None => Ok(None),
|
||||
}
|
||||
}
|
||||
}
|
81
meilidb-core/src/store/updates.rs
Normal file
81
meilidb-core/src/store/updates.rs
Normal file
@ -0,0 +1,81 @@
|
||||
use super::BEU64;
|
||||
use crate::update::Update;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::borrow::Cow;
|
||||
use zlmdb::types::OwnedType;
|
||||
use zlmdb::{BytesDecode, BytesEncode, Result as ZResult};
|
||||
|
||||
pub struct SerdeJson<T>(std::marker::PhantomData<T>);
|
||||
|
||||
impl<T> BytesEncode for SerdeJson<T>
|
||||
where
|
||||
T: Serialize,
|
||||
{
|
||||
type EItem = T;
|
||||
|
||||
fn bytes_encode(item: &Self::EItem) -> Option<Cow<[u8]>> {
|
||||
serde_json::to_vec(item).map(Cow::Owned).ok()
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a, T: 'a> BytesDecode<'a> for SerdeJson<T>
|
||||
where
|
||||
T: Deserialize<'a> + Clone,
|
||||
{
|
||||
type DItem = T;
|
||||
|
||||
fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> {
|
||||
serde_json::from_slice(bytes).ok()
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Copy, Clone)]
|
||||
pub struct Updates {
|
||||
pub(crate) updates: zlmdb::Database<OwnedType<BEU64>, SerdeJson<Update>>,
|
||||
}
|
||||
|
||||
impl Updates {
|
||||
// TODO do not trigger deserialize if possible
|
||||
pub fn last_update_id(self, reader: &zlmdb::RoTxn) -> ZResult<Option<(u64, Update)>> {
|
||||
match self.updates.last(reader)? {
|
||||
Some((key, data)) => Ok(Some((key.get(), data))),
|
||||
None => Ok(None),
|
||||
}
|
||||
}
|
||||
|
||||
// TODO do not trigger deserialize if possible
|
||||
fn first_update_id(self, reader: &zlmdb::RoTxn) -> ZResult<Option<(u64, Update)>> {
|
||||
match self.updates.first(reader)? {
|
||||
Some((key, data)) => Ok(Some((key.get(), data))),
|
||||
None => Ok(None),
|
||||
}
|
||||
}
|
||||
|
||||
// TODO do not trigger deserialize if possible
|
||||
pub fn contains(self, reader: &zlmdb::RoTxn, update_id: u64) -> ZResult<bool> {
|
||||
let update_id = BEU64::new(update_id);
|
||||
self.updates.get(reader, &update_id).map(|v| v.is_some())
|
||||
}
|
||||
|
||||
pub fn put_update(
|
||||
self,
|
||||
writer: &mut zlmdb::RwTxn,
|
||||
update_id: u64,
|
||||
update: &Update,
|
||||
) -> ZResult<()> {
|
||||
// TODO prefer using serde_json?
|
||||
let update_id = BEU64::new(update_id);
|
||||
self.updates.put(writer, &update_id, update)
|
||||
}
|
||||
|
||||
pub fn pop_front(self, writer: &mut zlmdb::RwTxn) -> ZResult<Option<(u64, Update)>> {
|
||||
match self.first_update_id(writer)? {
|
||||
Some((update_id, update)) => {
|
||||
let key = BEU64::new(update_id);
|
||||
self.updates.delete(writer, &key)?;
|
||||
Ok(Some((update_id, update)))
|
||||
}
|
||||
None => Ok(None),
|
||||
}
|
||||
}
|
||||
}
|
37
meilidb-core/src/store/updates_results.rs
Normal file
37
meilidb-core/src/store/updates_results.rs
Normal file
@ -0,0 +1,37 @@
|
||||
use super::BEU64;
|
||||
use crate::update::UpdateResult;
|
||||
use zlmdb::types::{OwnedType, Serde};
|
||||
use zlmdb::Result as ZResult;
|
||||
|
||||
#[derive(Copy, Clone)]
|
||||
pub struct UpdatesResults {
|
||||
pub(crate) updates_results: zlmdb::Database<OwnedType<BEU64>, Serde<UpdateResult>>,
|
||||
}
|
||||
|
||||
impl UpdatesResults {
|
||||
pub fn last_update_id(self, reader: &zlmdb::RoTxn) -> ZResult<Option<(u64, UpdateResult)>> {
|
||||
match self.updates_results.last(reader)? {
|
||||
Some((key, data)) => Ok(Some((key.get(), data))),
|
||||
None => Ok(None),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn put_update_result(
|
||||
self,
|
||||
writer: &mut zlmdb::RwTxn,
|
||||
update_id: u64,
|
||||
update_result: &UpdateResult,
|
||||
) -> ZResult<()> {
|
||||
let update_id = BEU64::new(update_id);
|
||||
self.updates_results.put(writer, &update_id, update_result)
|
||||
}
|
||||
|
||||
pub fn update_result(
|
||||
self,
|
||||
reader: &zlmdb::RoTxn,
|
||||
update_id: u64,
|
||||
) -> ZResult<Option<UpdateResult>> {
|
||||
let update_id = BEU64::new(update_id);
|
||||
self.updates_results.get(reader, &update_id)
|
||||
}
|
||||
}
|
25
meilidb-core/src/update/customs_update.rs
Normal file
25
meilidb-core/src/update/customs_update.rs
Normal file
@ -0,0 +1,25 @@
|
||||
use crate::store;
|
||||
use crate::update::{next_update_id, Update};
|
||||
use zlmdb::Result as ZResult;
|
||||
|
||||
pub fn apply_customs_update(
|
||||
writer: &mut zlmdb::RwTxn,
|
||||
main_store: store::Main,
|
||||
customs: &[u8],
|
||||
) -> ZResult<()> {
|
||||
main_store.put_customs(writer, customs)
|
||||
}
|
||||
|
||||
pub fn push_customs_update(
|
||||
writer: &mut zlmdb::RwTxn,
|
||||
updates_store: store::Updates,
|
||||
updates_results_store: store::UpdatesResults,
|
||||
customs: Vec<u8>,
|
||||
) -> ZResult<u64> {
|
||||
let last_update_id = next_update_id(writer, updates_store, updates_results_store)?;
|
||||
|
||||
let update = Update::Customs(customs);
|
||||
updates_store.put_update(writer, last_update_id, &update)?;
|
||||
|
||||
Ok(last_update_id)
|
||||
}
|
194
meilidb-core/src/update/documents_addition.rs
Normal file
194
meilidb-core/src/update/documents_addition.rs
Normal file
@ -0,0 +1,194 @@
|
||||
use std::collections::{HashMap, HashSet};
|
||||
|
||||
use fst::{set::OpBuilder, SetBuilder};
|
||||
use sdset::{duo::Union, SetOperation};
|
||||
use serde::Serialize;
|
||||
|
||||
use crate::raw_indexer::RawIndexer;
|
||||
use crate::serde::{extract_document_id, RamDocumentStore, Serializer};
|
||||
use crate::store;
|
||||
use crate::update::{apply_documents_deletion, next_update_id, Update};
|
||||
use crate::{Error, MResult, RankedMap};
|
||||
|
||||
pub struct DocumentsAddition<D> {
|
||||
updates_store: store::Updates,
|
||||
updates_results_store: store::UpdatesResults,
|
||||
updates_notifier: crossbeam_channel::Sender<()>,
|
||||
documents: Vec<D>,
|
||||
}
|
||||
|
||||
impl<D> DocumentsAddition<D> {
|
||||
pub fn new(
|
||||
updates_store: store::Updates,
|
||||
updates_results_store: store::UpdatesResults,
|
||||
updates_notifier: crossbeam_channel::Sender<()>,
|
||||
) -> DocumentsAddition<D> {
|
||||
DocumentsAddition {
|
||||
updates_store,
|
||||
updates_results_store,
|
||||
updates_notifier,
|
||||
documents: Vec::new(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn update_document(&mut self, document: D) {
|
||||
self.documents.push(document);
|
||||
}
|
||||
|
||||
pub fn finalize(self, writer: &mut zlmdb::RwTxn) -> MResult<u64>
|
||||
where
|
||||
D: serde::Serialize,
|
||||
{
|
||||
let _ = self.updates_notifier.send(());
|
||||
let update_id = push_documents_addition(
|
||||
writer,
|
||||
self.updates_store,
|
||||
self.updates_results_store,
|
||||
self.documents,
|
||||
)?;
|
||||
Ok(update_id)
|
||||
}
|
||||
}
|
||||
|
||||
impl<D> Extend<D> for DocumentsAddition<D> {
|
||||
fn extend<T: IntoIterator<Item = D>>(&mut self, iter: T) {
|
||||
self.documents.extend(iter)
|
||||
}
|
||||
}
|
||||
|
||||
pub fn push_documents_addition<D: serde::Serialize>(
|
||||
writer: &mut zlmdb::RwTxn,
|
||||
updates_store: store::Updates,
|
||||
updates_results_store: store::UpdatesResults,
|
||||
addition: Vec<D>,
|
||||
) -> MResult<u64> {
|
||||
let mut values = Vec::with_capacity(addition.len());
|
||||
for add in addition {
|
||||
let vec = serde_json::to_vec(&add)?;
|
||||
let add = serde_json::from_slice(&vec)?;
|
||||
values.push(add);
|
||||
}
|
||||
|
||||
let last_update_id = next_update_id(writer, updates_store, updates_results_store)?;
|
||||
|
||||
let update = Update::DocumentsAddition(values);
|
||||
updates_store.put_update(writer, last_update_id, &update)?;
|
||||
|
||||
Ok(last_update_id)
|
||||
}
|
||||
|
||||
pub fn apply_documents_addition(
|
||||
writer: &mut zlmdb::RwTxn,
|
||||
main_store: store::Main,
|
||||
documents_fields_store: store::DocumentsFields,
|
||||
documents_fields_counts_store: store::DocumentsFieldsCounts,
|
||||
postings_lists_store: store::PostingsLists,
|
||||
docs_words_store: store::DocsWords,
|
||||
mut ranked_map: RankedMap,
|
||||
addition: Vec<serde_json::Value>,
|
||||
) -> MResult<()> {
|
||||
let mut document_ids = HashSet::new();
|
||||
let mut document_store = RamDocumentStore::new();
|
||||
let mut document_fields_counts = HashMap::new();
|
||||
let mut indexer = RawIndexer::new();
|
||||
|
||||
let schema = match main_store.schema(writer)? {
|
||||
Some(schema) => schema,
|
||||
None => return Err(Error::SchemaMissing),
|
||||
};
|
||||
|
||||
let identifier = schema.identifier_name();
|
||||
|
||||
for document in addition {
|
||||
let document_id = match extract_document_id(identifier, &document)? {
|
||||
Some(id) => id,
|
||||
None => return Err(Error::MissingDocumentId),
|
||||
};
|
||||
|
||||
// 1. store the document id for future deletion
|
||||
document_ids.insert(document_id);
|
||||
|
||||
// 2. index the document fields in ram stores
|
||||
let serializer = Serializer {
|
||||
schema: &schema,
|
||||
document_store: &mut document_store,
|
||||
document_fields_counts: &mut document_fields_counts,
|
||||
indexer: &mut indexer,
|
||||
ranked_map: &mut ranked_map,
|
||||
document_id,
|
||||
};
|
||||
|
||||
document.serialize(serializer)?;
|
||||
}
|
||||
|
||||
// 1. remove the previous documents match indexes
|
||||
let documents_to_insert = document_ids.iter().cloned().collect();
|
||||
apply_documents_deletion(
|
||||
writer,
|
||||
main_store,
|
||||
documents_fields_store,
|
||||
documents_fields_counts_store,
|
||||
postings_lists_store,
|
||||
docs_words_store,
|
||||
ranked_map.clone(),
|
||||
documents_to_insert,
|
||||
)?;
|
||||
|
||||
// 2. insert new document attributes in the database
|
||||
for ((id, attr), value) in document_store.into_inner() {
|
||||
documents_fields_store.put_document_field(writer, id, attr, &value)?;
|
||||
}
|
||||
|
||||
// 3. insert new document attributes counts
|
||||
for ((id, attr), count) in document_fields_counts {
|
||||
documents_fields_counts_store.put_document_field_count(writer, id, attr, count)?;
|
||||
}
|
||||
|
||||
let indexed = indexer.build();
|
||||
let mut delta_words_builder = SetBuilder::memory();
|
||||
|
||||
for (word, delta_set) in indexed.words_doc_indexes {
|
||||
delta_words_builder.insert(&word).unwrap();
|
||||
|
||||
let set = match postings_lists_store.postings_list(writer, &word)? {
|
||||
Some(set) => Union::new(&set, &delta_set).into_set_buf(),
|
||||
None => delta_set,
|
||||
};
|
||||
|
||||
postings_lists_store.put_postings_list(writer, &word, &set)?;
|
||||
}
|
||||
|
||||
for (id, words) in indexed.docs_words {
|
||||
docs_words_store.put_doc_words(writer, id, &words)?;
|
||||
}
|
||||
|
||||
let delta_words = delta_words_builder
|
||||
.into_inner()
|
||||
.and_then(fst::Set::from_bytes)
|
||||
.unwrap();
|
||||
|
||||
let words = match main_store.words_fst(writer)? {
|
||||
Some(words) => {
|
||||
let op = OpBuilder::new()
|
||||
.add(words.stream())
|
||||
.add(delta_words.stream())
|
||||
.r#union();
|
||||
|
||||
let mut words_builder = SetBuilder::memory();
|
||||
words_builder.extend_stream(op).unwrap();
|
||||
words_builder
|
||||
.into_inner()
|
||||
.and_then(fst::Set::from_bytes)
|
||||
.unwrap()
|
||||
}
|
||||
None => delta_words,
|
||||
};
|
||||
|
||||
main_store.put_words_fst(writer, &words)?;
|
||||
main_store.put_ranked_map(writer, &ranked_map)?;
|
||||
|
||||
let inserted_documents_len = document_ids.len() as u64;
|
||||
main_store.put_number_of_documents(writer, |old| old + inserted_documents_len)?;
|
||||
|
||||
Ok(())
|
||||
}
|
188
meilidb-core/src/update/documents_deletion.rs
Normal file
188
meilidb-core/src/update/documents_deletion.rs
Normal file
@ -0,0 +1,188 @@
|
||||
use std::collections::{BTreeSet, HashMap, HashSet};
|
||||
|
||||
use fst::{SetBuilder, Streamer};
|
||||
use meilidb_schema::Schema;
|
||||
use sdset::{duo::DifferenceByKey, SetBuf, SetOperation};
|
||||
|
||||
use crate::serde::extract_document_id;
|
||||
use crate::store;
|
||||
use crate::update::{next_update_id, Update};
|
||||
use crate::{DocumentId, Error, MResult, RankedMap};
|
||||
|
||||
pub struct DocumentsDeletion {
|
||||
updates_store: store::Updates,
|
||||
updates_results_store: store::UpdatesResults,
|
||||
updates_notifier: crossbeam_channel::Sender<()>,
|
||||
documents: Vec<DocumentId>,
|
||||
}
|
||||
|
||||
impl DocumentsDeletion {
|
||||
pub fn new(
|
||||
updates_store: store::Updates,
|
||||
updates_results_store: store::UpdatesResults,
|
||||
updates_notifier: crossbeam_channel::Sender<()>,
|
||||
) -> DocumentsDeletion {
|
||||
DocumentsDeletion {
|
||||
updates_store,
|
||||
updates_results_store,
|
||||
updates_notifier,
|
||||
documents: Vec::new(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn delete_document_by_id(&mut self, document_id: DocumentId) {
|
||||
self.documents.push(document_id);
|
||||
}
|
||||
|
||||
pub fn delete_document<D>(&mut self, schema: &Schema, document: D) -> MResult<()>
|
||||
where
|
||||
D: serde::Serialize,
|
||||
{
|
||||
let identifier = schema.identifier_name();
|
||||
let document_id = match extract_document_id(identifier, &document)? {
|
||||
Some(id) => id,
|
||||
None => return Err(Error::MissingDocumentId),
|
||||
};
|
||||
|
||||
self.delete_document_by_id(document_id);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn finalize(self, writer: &mut zlmdb::RwTxn) -> MResult<u64> {
|
||||
let _ = self.updates_notifier.send(());
|
||||
let update_id = push_documents_deletion(
|
||||
writer,
|
||||
self.updates_store,
|
||||
self.updates_results_store,
|
||||
self.documents,
|
||||
)?;
|
||||
Ok(update_id)
|
||||
}
|
||||
}
|
||||
|
||||
impl Extend<DocumentId> for DocumentsDeletion {
|
||||
fn extend<T: IntoIterator<Item = DocumentId>>(&mut self, iter: T) {
|
||||
self.documents.extend(iter)
|
||||
}
|
||||
}
|
||||
|
||||
pub fn push_documents_deletion(
|
||||
writer: &mut zlmdb::RwTxn,
|
||||
updates_store: store::Updates,
|
||||
updates_results_store: store::UpdatesResults,
|
||||
deletion: Vec<DocumentId>,
|
||||
) -> MResult<u64> {
|
||||
let last_update_id = next_update_id(writer, updates_store, updates_results_store)?;
|
||||
|
||||
let update = Update::DocumentsDeletion(deletion);
|
||||
updates_store.put_update(writer, last_update_id, &update)?;
|
||||
|
||||
Ok(last_update_id)
|
||||
}
|
||||
|
||||
pub fn apply_documents_deletion(
|
||||
writer: &mut zlmdb::RwTxn,
|
||||
main_store: store::Main,
|
||||
documents_fields_store: store::DocumentsFields,
|
||||
documents_fields_counts_store: store::DocumentsFieldsCounts,
|
||||
postings_lists_store: store::PostingsLists,
|
||||
docs_words_store: store::DocsWords,
|
||||
mut ranked_map: RankedMap,
|
||||
deletion: Vec<DocumentId>,
|
||||
) -> MResult<()> {
|
||||
let idset = SetBuf::from_dirty(deletion);
|
||||
|
||||
let schema = match main_store.schema(writer)? {
|
||||
Some(schema) => schema,
|
||||
None => return Err(Error::SchemaMissing),
|
||||
};
|
||||
|
||||
// collect the ranked attributes according to the schema
|
||||
let ranked_attrs: Vec<_> = schema
|
||||
.iter()
|
||||
.filter_map(
|
||||
|(_, attr, prop)| {
|
||||
if prop.is_ranked() {
|
||||
Some(attr)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
},
|
||||
)
|
||||
.collect();
|
||||
|
||||
let mut words_document_ids = HashMap::new();
|
||||
for id in idset {
|
||||
// remove all the ranked attributes from the ranked_map
|
||||
for ranked_attr in &ranked_attrs {
|
||||
ranked_map.remove(id, *ranked_attr);
|
||||
}
|
||||
|
||||
if let Some(words) = docs_words_store.doc_words(writer, id)? {
|
||||
let mut stream = words.stream();
|
||||
while let Some(word) = stream.next() {
|
||||
let word = word.to_vec();
|
||||
words_document_ids
|
||||
.entry(word)
|
||||
.or_insert_with(Vec::new)
|
||||
.push(id);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let mut deleted_documents = HashSet::new();
|
||||
let mut removed_words = BTreeSet::new();
|
||||
for (word, document_ids) in words_document_ids {
|
||||
let document_ids = SetBuf::from_dirty(document_ids);
|
||||
|
||||
if let Some(doc_indexes) = postings_lists_store.postings_list(writer, &word)? {
|
||||
let op = DifferenceByKey::new(&doc_indexes, &document_ids, |d| d.document_id, |id| *id);
|
||||
let doc_indexes = op.into_set_buf();
|
||||
|
||||
if !doc_indexes.is_empty() {
|
||||
postings_lists_store.put_postings_list(writer, &word, &doc_indexes)?;
|
||||
} else {
|
||||
postings_lists_store.del_postings_list(writer, &word)?;
|
||||
removed_words.insert(word);
|
||||
}
|
||||
}
|
||||
|
||||
for id in document_ids {
|
||||
documents_fields_counts_store.del_all_document_fields_counts(writer, id)?;
|
||||
if documents_fields_store.del_all_document_fields(writer, id)? != 0 {
|
||||
deleted_documents.insert(id);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let deleted_documents_len = deleted_documents.len() as u64;
|
||||
for id in deleted_documents {
|
||||
docs_words_store.del_doc_words(writer, id)?;
|
||||
}
|
||||
|
||||
let removed_words = fst::Set::from_iter(removed_words).unwrap();
|
||||
let words = match main_store.words_fst(writer)? {
|
||||
Some(words_set) => {
|
||||
let op = fst::set::OpBuilder::new()
|
||||
.add(words_set.stream())
|
||||
.add(removed_words.stream())
|
||||
.difference();
|
||||
|
||||
let mut words_builder = SetBuilder::memory();
|
||||
words_builder.extend_stream(op).unwrap();
|
||||
words_builder
|
||||
.into_inner()
|
||||
.and_then(fst::Set::from_bytes)
|
||||
.unwrap()
|
||||
}
|
||||
None => fst::Set::default(),
|
||||
};
|
||||
|
||||
main_store.put_words_fst(writer, &words)?;
|
||||
main_store.put_ranked_map(writer, &ranked_map)?;
|
||||
|
||||
main_store.put_number_of_documents(writer, |old| old - deleted_documents_len)?;
|
||||
|
||||
Ok(())
|
||||
}
|
223
meilidb-core/src/update/mod.rs
Normal file
223
meilidb-core/src/update/mod.rs
Normal file
@ -0,0 +1,223 @@
|
||||
mod customs_update;
|
||||
mod documents_addition;
|
||||
mod documents_deletion;
|
||||
mod schema_update;
|
||||
mod synonyms_addition;
|
||||
mod synonyms_deletion;
|
||||
|
||||
pub use self::customs_update::{apply_customs_update, push_customs_update};
|
||||
pub use self::documents_addition::{apply_documents_addition, DocumentsAddition};
|
||||
pub use self::documents_deletion::{apply_documents_deletion, DocumentsDeletion};
|
||||
pub use self::schema_update::{apply_schema_update, push_schema_update};
|
||||
pub use self::synonyms_addition::{apply_synonyms_addition, SynonymsAddition};
|
||||
pub use self::synonyms_deletion::{apply_synonyms_deletion, SynonymsDeletion};
|
||||
|
||||
use std::cmp;
|
||||
use std::collections::BTreeMap;
|
||||
use std::time::{Duration, Instant};
|
||||
|
||||
use log::debug;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use zlmdb::Result as ZResult;
|
||||
|
||||
use crate::{store, DocumentId, MResult, RankedMap};
|
||||
use meilidb_schema::Schema;
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub enum Update {
|
||||
Schema(Schema),
|
||||
Customs(Vec<u8>),
|
||||
DocumentsAddition(Vec<serde_json::Value>),
|
||||
DocumentsDeletion(Vec<DocumentId>),
|
||||
SynonymsAddition(BTreeMap<String, Vec<String>>),
|
||||
SynonymsDeletion(BTreeMap<String, Option<Vec<String>>>),
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub enum UpdateType {
|
||||
Schema { schema: Schema },
|
||||
Customs,
|
||||
DocumentsAddition { number: usize },
|
||||
DocumentsDeletion { number: usize },
|
||||
SynonymsAddition { number: usize },
|
||||
SynonymsDeletion { number: usize },
|
||||
}
|
||||
|
||||
#[derive(Clone, Serialize, Deserialize)]
|
||||
pub struct DetailedDuration {
|
||||
pub main: Duration,
|
||||
}
|
||||
|
||||
#[derive(Clone, Serialize, Deserialize)]
|
||||
pub struct UpdateResult {
|
||||
pub update_id: u64,
|
||||
pub update_type: UpdateType,
|
||||
pub result: Result<(), String>,
|
||||
pub detailed_duration: DetailedDuration,
|
||||
}
|
||||
|
||||
#[derive(Clone, Serialize, Deserialize)]
|
||||
pub enum UpdateStatus {
|
||||
Enqueued,
|
||||
Processed(UpdateResult),
|
||||
Unknown,
|
||||
}
|
||||
|
||||
pub fn update_status(
|
||||
reader: &zlmdb::RoTxn,
|
||||
updates_store: store::Updates,
|
||||
updates_results_store: store::UpdatesResults,
|
||||
update_id: u64,
|
||||
) -> MResult<UpdateStatus> {
|
||||
match updates_results_store.update_result(reader, update_id)? {
|
||||
Some(result) => Ok(UpdateStatus::Processed(result)),
|
||||
None => {
|
||||
if updates_store.contains(reader, update_id)? {
|
||||
Ok(UpdateStatus::Enqueued)
|
||||
} else {
|
||||
Ok(UpdateStatus::Unknown)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub fn next_update_id(
|
||||
writer: &mut zlmdb::RwTxn,
|
||||
updates_store: store::Updates,
|
||||
updates_results_store: store::UpdatesResults,
|
||||
) -> ZResult<u64> {
|
||||
let last_update_id = updates_store.last_update_id(writer)?;
|
||||
let last_update_id = last_update_id.map(|(n, _)| n);
|
||||
|
||||
let last_update_results_id = updates_results_store.last_update_id(writer)?;
|
||||
let last_update_results_id = last_update_results_id.map(|(n, _)| n);
|
||||
|
||||
let max_update_id = cmp::max(last_update_id, last_update_results_id);
|
||||
let new_update_id = max_update_id.map_or(0, |n| n + 1);
|
||||
|
||||
Ok(new_update_id)
|
||||
}
|
||||
|
||||
pub fn update_task(
|
||||
writer: &mut zlmdb::RwTxn,
|
||||
index: store::Index,
|
||||
) -> MResult<Option<UpdateResult>> {
|
||||
let (update_id, update) = match index.updates.pop_front(writer)? {
|
||||
Some(value) => value,
|
||||
None => return Ok(None),
|
||||
};
|
||||
|
||||
debug!("Processing update number {}", update_id);
|
||||
|
||||
let (update_type, result, duration) = match update {
|
||||
Update::Schema(schema) => {
|
||||
let start = Instant::now();
|
||||
|
||||
let update_type = UpdateType::Schema {
|
||||
schema: schema.clone(),
|
||||
};
|
||||
let result = apply_schema_update(writer, index.main, &schema);
|
||||
|
||||
(update_type, result, start.elapsed())
|
||||
}
|
||||
Update::Customs(customs) => {
|
||||
let start = Instant::now();
|
||||
|
||||
let update_type = UpdateType::Customs;
|
||||
let result = apply_customs_update(writer, index.main, &customs).map_err(Into::into);
|
||||
|
||||
(update_type, result, start.elapsed())
|
||||
}
|
||||
Update::DocumentsAddition(documents) => {
|
||||
let start = Instant::now();
|
||||
|
||||
let ranked_map = match index.main.ranked_map(writer)? {
|
||||
Some(ranked_map) => ranked_map,
|
||||
None => RankedMap::default(),
|
||||
};
|
||||
|
||||
let update_type = UpdateType::DocumentsAddition {
|
||||
number: documents.len(),
|
||||
};
|
||||
|
||||
let result = apply_documents_addition(
|
||||
writer,
|
||||
index.main,
|
||||
index.documents_fields,
|
||||
index.documents_fields_counts,
|
||||
index.postings_lists,
|
||||
index.docs_words,
|
||||
ranked_map,
|
||||
documents,
|
||||
);
|
||||
|
||||
(update_type, result, start.elapsed())
|
||||
}
|
||||
Update::DocumentsDeletion(documents) => {
|
||||
let start = Instant::now();
|
||||
|
||||
let ranked_map = match index.main.ranked_map(writer)? {
|
||||
Some(ranked_map) => ranked_map,
|
||||
None => RankedMap::default(),
|
||||
};
|
||||
|
||||
let update_type = UpdateType::DocumentsDeletion {
|
||||
number: documents.len(),
|
||||
};
|
||||
|
||||
let result = apply_documents_deletion(
|
||||
writer,
|
||||
index.main,
|
||||
index.documents_fields,
|
||||
index.documents_fields_counts,
|
||||
index.postings_lists,
|
||||
index.docs_words,
|
||||
ranked_map,
|
||||
documents,
|
||||
);
|
||||
|
||||
(update_type, result, start.elapsed())
|
||||
}
|
||||
Update::SynonymsAddition(synonyms) => {
|
||||
let start = Instant::now();
|
||||
|
||||
let update_type = UpdateType::SynonymsAddition {
|
||||
number: synonyms.len(),
|
||||
};
|
||||
|
||||
let result = apply_synonyms_addition(writer, index.main, index.synonyms, synonyms);
|
||||
|
||||
(update_type, result, start.elapsed())
|
||||
}
|
||||
Update::SynonymsDeletion(synonyms) => {
|
||||
let start = Instant::now();
|
||||
|
||||
let update_type = UpdateType::SynonymsDeletion {
|
||||
number: synonyms.len(),
|
||||
};
|
||||
|
||||
let result = apply_synonyms_deletion(writer, index.main, index.synonyms, synonyms);
|
||||
|
||||
(update_type, result, start.elapsed())
|
||||
}
|
||||
};
|
||||
|
||||
debug!(
|
||||
"Processed update number {} {:?} {:?}",
|
||||
update_id, update_type, result
|
||||
);
|
||||
|
||||
let detailed_duration = DetailedDuration { main: duration };
|
||||
let status = UpdateResult {
|
||||
update_id,
|
||||
update_type,
|
||||
result: result.map_err(|e| e.to_string()),
|
||||
detailed_duration,
|
||||
};
|
||||
|
||||
index
|
||||
.updates_results
|
||||
.put_update_result(writer, update_id, &status)?;
|
||||
|
||||
Ok(Some(status))
|
||||
}
|
31
meilidb-core/src/update/schema_update.rs
Normal file
31
meilidb-core/src/update/schema_update.rs
Normal file
@ -0,0 +1,31 @@
|
||||
use crate::update::{next_update_id, Update};
|
||||
use crate::{error::UnsupportedOperation, store, MResult};
|
||||
use meilidb_schema::Schema;
|
||||
|
||||
pub fn apply_schema_update(
|
||||
writer: &mut zlmdb::RwTxn,
|
||||
main_store: store::Main,
|
||||
new_schema: &Schema,
|
||||
) -> MResult<()> {
|
||||
if main_store.schema(writer)?.is_some() {
|
||||
return Err(UnsupportedOperation::SchemaAlreadyExists.into());
|
||||
}
|
||||
|
||||
main_store
|
||||
.put_schema(writer, new_schema)
|
||||
.map_err(Into::into)
|
||||
}
|
||||
|
||||
pub fn push_schema_update(
|
||||
writer: &mut zlmdb::RwTxn,
|
||||
updates_store: store::Updates,
|
||||
updates_results_store: store::UpdatesResults,
|
||||
schema: Schema,
|
||||
) -> MResult<u64> {
|
||||
let last_update_id = next_update_id(writer, updates_store, updates_results_store)?;
|
||||
|
||||
let update = Update::Schema(schema);
|
||||
updates_store.put_update(writer, last_update_id, &update)?;
|
||||
|
||||
Ok(last_update_id)
|
||||
}
|
118
meilidb-core/src/update/synonyms_addition.rs
Normal file
118
meilidb-core/src/update/synonyms_addition.rs
Normal file
@ -0,0 +1,118 @@
|
||||
use std::collections::BTreeMap;
|
||||
|
||||
use fst::{set::OpBuilder, SetBuilder};
|
||||
use sdset::SetBuf;
|
||||
|
||||
use crate::automaton::normalize_str;
|
||||
use crate::update::{next_update_id, Update};
|
||||
use crate::{store, MResult};
|
||||
|
||||
pub struct SynonymsAddition {
|
||||
updates_store: store::Updates,
|
||||
updates_results_store: store::UpdatesResults,
|
||||
updates_notifier: crossbeam_channel::Sender<()>,
|
||||
synonyms: BTreeMap<String, Vec<String>>,
|
||||
}
|
||||
|
||||
impl SynonymsAddition {
|
||||
pub fn new(
|
||||
updates_store: store::Updates,
|
||||
updates_results_store: store::UpdatesResults,
|
||||
updates_notifier: crossbeam_channel::Sender<()>,
|
||||
) -> SynonymsAddition {
|
||||
SynonymsAddition {
|
||||
updates_store,
|
||||
updates_results_store,
|
||||
updates_notifier,
|
||||
synonyms: BTreeMap::new(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn add_synonym<S, T, I>(&mut self, synonym: S, alternatives: I)
|
||||
where
|
||||
S: AsRef<str>,
|
||||
T: AsRef<str>,
|
||||
I: IntoIterator<Item = T>,
|
||||
{
|
||||
let synonym = normalize_str(synonym.as_ref());
|
||||
let alternatives = alternatives.into_iter().map(|s| s.as_ref().to_lowercase());
|
||||
self.synonyms
|
||||
.entry(synonym)
|
||||
.or_insert_with(Vec::new)
|
||||
.extend(alternatives);
|
||||
}
|
||||
|
||||
pub fn finalize(self, writer: &mut zlmdb::RwTxn) -> MResult<u64> {
|
||||
let _ = self.updates_notifier.send(());
|
||||
let update_id = push_synonyms_addition(
|
||||
writer,
|
||||
self.updates_store,
|
||||
self.updates_results_store,
|
||||
self.synonyms,
|
||||
)?;
|
||||
Ok(update_id)
|
||||
}
|
||||
}
|
||||
|
||||
pub fn push_synonyms_addition(
|
||||
writer: &mut zlmdb::RwTxn,
|
||||
updates_store: store::Updates,
|
||||
updates_results_store: store::UpdatesResults,
|
||||
addition: BTreeMap<String, Vec<String>>,
|
||||
) -> MResult<u64> {
|
||||
let last_update_id = next_update_id(writer, updates_store, updates_results_store)?;
|
||||
|
||||
let update = Update::SynonymsAddition(addition);
|
||||
updates_store.put_update(writer, last_update_id, &update)?;
|
||||
|
||||
Ok(last_update_id)
|
||||
}
|
||||
|
||||
pub fn apply_synonyms_addition(
|
||||
writer: &mut zlmdb::RwTxn,
|
||||
main_store: store::Main,
|
||||
synonyms_store: store::Synonyms,
|
||||
addition: BTreeMap<String, Vec<String>>,
|
||||
) -> MResult<()> {
|
||||
let mut synonyms_builder = SetBuilder::memory();
|
||||
|
||||
for (word, alternatives) in addition {
|
||||
synonyms_builder.insert(&word).unwrap();
|
||||
|
||||
let alternatives = {
|
||||
let alternatives = SetBuf::from_dirty(alternatives);
|
||||
let mut alternatives_builder = SetBuilder::memory();
|
||||
alternatives_builder.extend_iter(alternatives).unwrap();
|
||||
let bytes = alternatives_builder.into_inner().unwrap();
|
||||
fst::Set::from_bytes(bytes).unwrap()
|
||||
};
|
||||
|
||||
synonyms_store.put_synonyms(writer, word.as_bytes(), &alternatives)?;
|
||||
}
|
||||
|
||||
let delta_synonyms = synonyms_builder
|
||||
.into_inner()
|
||||
.and_then(fst::Set::from_bytes)
|
||||
.unwrap();
|
||||
|
||||
let synonyms = match main_store.synonyms_fst(writer)? {
|
||||
Some(synonyms) => {
|
||||
let op = OpBuilder::new()
|
||||
.add(synonyms.stream())
|
||||
.add(delta_synonyms.stream())
|
||||
.r#union();
|
||||
|
||||
let mut synonyms_builder = SetBuilder::memory();
|
||||
synonyms_builder.extend_stream(op).unwrap();
|
||||
synonyms_builder
|
||||
.into_inner()
|
||||
.and_then(fst::Set::from_bytes)
|
||||
.unwrap()
|
||||
}
|
||||
None => delta_synonyms,
|
||||
};
|
||||
|
||||
main_store.put_synonyms_fst(writer, &synonyms)?;
|
||||
|
||||
Ok(())
|
||||
}
|
@ -1,21 +1,32 @@
|
||||
use std::collections::BTreeMap;
|
||||
use std::iter::FromIterator;
|
||||
use std::sync::Arc;
|
||||
|
||||
use fst::{SetBuilder, set::OpBuilder};
|
||||
use meilidb_core::normalize_str;
|
||||
use fst::{set::OpBuilder, SetBuilder};
|
||||
use sdset::SetBuf;
|
||||
|
||||
use crate::database::{Error, Index, index::Cache};
|
||||
use crate::automaton::normalize_str;
|
||||
use crate::update::{next_update_id, Update};
|
||||
use crate::{store, MResult};
|
||||
|
||||
pub struct SynonymsDeletion<'a> {
|
||||
index: &'a Index,
|
||||
pub struct SynonymsDeletion {
|
||||
updates_store: store::Updates,
|
||||
updates_results_store: store::UpdatesResults,
|
||||
updates_notifier: crossbeam_channel::Sender<()>,
|
||||
synonyms: BTreeMap<String, Option<Vec<String>>>,
|
||||
}
|
||||
|
||||
impl<'a> SynonymsDeletion<'a> {
|
||||
pub fn new(index: &'a Index) -> SynonymsDeletion<'a> {
|
||||
SynonymsDeletion { index, synonyms: BTreeMap::new() }
|
||||
impl SynonymsDeletion {
|
||||
pub fn new(
|
||||
updates_store: store::Updates,
|
||||
updates_results_store: store::UpdatesResults,
|
||||
updates_notifier: crossbeam_channel::Sender<()>,
|
||||
) -> SynonymsDeletion {
|
||||
SynonymsDeletion {
|
||||
updates_store,
|
||||
updates_results_store,
|
||||
updates_notifier,
|
||||
synonyms: BTreeMap::new(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn delete_all_alternatives_of<S: AsRef<str>>(&mut self, synonym: S) {
|
||||
@ -24,9 +35,10 @@ impl<'a> SynonymsDeletion<'a> {
|
||||
}
|
||||
|
||||
pub fn delete_specific_alternatives_of<S, T, I>(&mut self, synonym: S, alternatives: I)
|
||||
where S: AsRef<str>,
|
||||
T: AsRef<str>,
|
||||
I: Iterator<Item=T>,
|
||||
where
|
||||
S: AsRef<str>,
|
||||
T: AsRef<str>,
|
||||
I: Iterator<Item = T>,
|
||||
{
|
||||
let synonym = normalize_str(synonym.as_ref());
|
||||
let value = self.synonyms.entry(synonym).or_insert(None);
|
||||
@ -37,26 +49,44 @@ impl<'a> SynonymsDeletion<'a> {
|
||||
}
|
||||
}
|
||||
|
||||
pub fn finalize(self) -> Result<u64, Error> {
|
||||
self.index.push_synonyms_deletion(self.synonyms)
|
||||
pub fn finalize(self, writer: &mut zlmdb::RwTxn) -> MResult<u64> {
|
||||
let _ = self.updates_notifier.send(());
|
||||
let update_id = push_synonyms_deletion(
|
||||
writer,
|
||||
self.updates_store,
|
||||
self.updates_results_store,
|
||||
self.synonyms,
|
||||
)?;
|
||||
Ok(update_id)
|
||||
}
|
||||
}
|
||||
|
||||
pub fn apply_synonyms_deletion(
|
||||
index: &Index,
|
||||
pub fn push_synonyms_deletion(
|
||||
writer: &mut zlmdb::RwTxn,
|
||||
updates_store: store::Updates,
|
||||
updates_results_store: store::UpdatesResults,
|
||||
deletion: BTreeMap<String, Option<Vec<String>>>,
|
||||
) -> Result<(), Error>
|
||||
{
|
||||
let ref_index = index.as_ref();
|
||||
let synonyms = ref_index.synonyms_index;
|
||||
let main = ref_index.main_index;
|
||||
) -> MResult<u64> {
|
||||
let last_update_id = next_update_id(writer, updates_store, updates_results_store)?;
|
||||
|
||||
let update = Update::SynonymsDeletion(deletion);
|
||||
updates_store.put_update(writer, last_update_id, &update)?;
|
||||
|
||||
Ok(last_update_id)
|
||||
}
|
||||
|
||||
pub fn apply_synonyms_deletion(
|
||||
writer: &mut zlmdb::RwTxn,
|
||||
main_store: store::Main,
|
||||
synonyms_store: store::Synonyms,
|
||||
deletion: BTreeMap<String, Option<Vec<String>>>,
|
||||
) -> MResult<()> {
|
||||
let mut delete_whole_synonym_builder = SetBuilder::memory();
|
||||
|
||||
for (synonym, alternatives) in deletion {
|
||||
match alternatives {
|
||||
Some(alternatives) => {
|
||||
let prev_alternatives = synonyms.alternatives_to(synonym.as_bytes())?;
|
||||
let prev_alternatives = synonyms_store.synonyms(writer, synonym.as_bytes())?;
|
||||
let prev_alternatives = match prev_alternatives {
|
||||
Some(alternatives) => alternatives,
|
||||
None => continue,
|
||||
@ -66,9 +96,7 @@ pub fn apply_synonyms_deletion(
|
||||
let alternatives = SetBuf::from_dirty(alternatives);
|
||||
let mut builder = SetBuilder::memory();
|
||||
builder.extend_iter(alternatives).unwrap();
|
||||
builder.into_inner()
|
||||
.and_then(fst::Set::from_bytes)
|
||||
.unwrap()
|
||||
builder.into_inner().and_then(fst::Set::from_bytes).unwrap()
|
||||
};
|
||||
|
||||
let op = OpBuilder::new()
|
||||
@ -81,19 +109,21 @@ pub fn apply_synonyms_deletion(
|
||||
let len = builder.get_ref().len();
|
||||
builder.extend_stream(op).unwrap();
|
||||
let is_empty = len == builder.get_ref().len();
|
||||
let alternatives = builder.into_inner().unwrap();
|
||||
let bytes = builder.into_inner().unwrap();
|
||||
let alternatives = fst::Set::from_bytes(bytes).unwrap();
|
||||
|
||||
(alternatives, is_empty)
|
||||
};
|
||||
|
||||
if empty_alternatives {
|
||||
delete_whole_synonym_builder.insert(synonym.as_bytes())?;
|
||||
} else {
|
||||
synonyms.set_alternatives_to(synonym.as_bytes(), alternatives)?;
|
||||
synonyms_store.put_synonyms(writer, synonym.as_bytes(), &alternatives)?;
|
||||
}
|
||||
},
|
||||
}
|
||||
None => {
|
||||
delete_whole_synonym_builder.insert(&synonym).unwrap();
|
||||
synonyms.del_alternatives_of(synonym.as_bytes())?;
|
||||
synonyms_store.del_synonyms(writer, synonym.as_bytes())?;
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -103,7 +133,7 @@ pub fn apply_synonyms_deletion(
|
||||
.and_then(fst::Set::from_bytes)
|
||||
.unwrap();
|
||||
|
||||
let synonyms = match main.synonyms_set()? {
|
||||
let synonyms = match main_store.synonyms_fst(writer)? {
|
||||
Some(synonyms) => {
|
||||
let op = OpBuilder::new()
|
||||
.add(synonyms.stream())
|
||||
@ -116,22 +146,11 @@ pub fn apply_synonyms_deletion(
|
||||
.into_inner()
|
||||
.and_then(fst::Set::from_bytes)
|
||||
.unwrap()
|
||||
},
|
||||
}
|
||||
None => fst::Set::default(),
|
||||
};
|
||||
|
||||
main.set_synonyms_set(&synonyms)?;
|
||||
|
||||
// update the "consistent" view of the Index
|
||||
let cache = ref_index.cache;
|
||||
let words = Arc::new(main.words_set()?.unwrap_or_default());
|
||||
let ranked_map = cache.ranked_map.clone();
|
||||
let synonyms = Arc::new(synonyms);
|
||||
let schema = cache.schema.clone();
|
||||
let number_of_documents = cache.number_of_documents;
|
||||
|
||||
let cache = Cache { words, synonyms, schema, ranked_map, number_of_documents };
|
||||
index.cache.store(Arc::new(cache));
|
||||
main_store.put_synonyms_fst(writer, &synonyms)?;
|
||||
|
||||
Ok(())
|
||||
}
|
@ -1,39 +0,0 @@
|
||||
[package]
|
||||
name = "meilidb-data"
|
||||
version = "0.1.0"
|
||||
authors = ["Kerollmops <renault.cle@gmail.com>"]
|
||||
edition = "2018"
|
||||
|
||||
[dependencies]
|
||||
arc-swap = "0.4.2"
|
||||
bincode = "1.1.4"
|
||||
crossbeam-channel = "0.3.9"
|
||||
deunicode = "1.0.0"
|
||||
hashbrown = { version = "0.6.0", features = ["serde"] }
|
||||
log = "0.4.6"
|
||||
meilidb-core = { path = "../meilidb-core", version = "0.1.0" }
|
||||
meilidb-schema = { path = "../meilidb-schema", version = "0.1.0" }
|
||||
meilidb-tokenizer = { path = "../meilidb-tokenizer", version = "0.1.0" }
|
||||
ordered-float = { version = "1.0.2", features = ["serde"] }
|
||||
rocksdb = "0.12.3"
|
||||
sdset = "0.3.2"
|
||||
serde = { version = "1.0.99", features = ["derive"] }
|
||||
serde_json = "1.0.40"
|
||||
siphasher = "0.3.0"
|
||||
zerocopy = "0.2.8"
|
||||
|
||||
[dependencies.rmp-serde]
|
||||
git = "https://github.com/3Hren/msgpack-rust.git"
|
||||
rev = "40b3d48"
|
||||
|
||||
[dependencies.rmpv]
|
||||
git = "https://github.com/3Hren/msgpack-rust.git"
|
||||
rev = "40b3d48"
|
||||
features = ["with-serde"]
|
||||
|
||||
[dependencies.fst]
|
||||
git = "https://github.com/Kerollmops/fst.git"
|
||||
branch = "arc-byte-slice"
|
||||
|
||||
[dev-dependencies]
|
||||
tempfile = "3.1.0"
|
@ -1,113 +0,0 @@
|
||||
use std::sync::Arc;
|
||||
use crossbeam_channel::{unbounded, Sender, Receiver};
|
||||
use rocksdb::{DBVector, IteratorMode, Direction};
|
||||
use crate::RocksDbResult;
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct CfTree {
|
||||
index: Arc<CfTreeInner>,
|
||||
sender: Option<Sender<()>>,
|
||||
}
|
||||
|
||||
struct CfTreeInner {
|
||||
db: Arc<rocksdb::DB>,
|
||||
name: String,
|
||||
}
|
||||
|
||||
impl CfTree {
|
||||
pub fn create(db: Arc<rocksdb::DB>, name: String) -> RocksDbResult<CfTree> {
|
||||
let mut options = rocksdb::Options::default();
|
||||
options.create_missing_column_families(true);
|
||||
|
||||
let _cf = db.create_cf(&name, &options)?;
|
||||
let index = Arc::new(CfTreeInner { db, name });
|
||||
|
||||
Ok(CfTree { index, sender: None })
|
||||
}
|
||||
|
||||
pub fn create_with_subcription(
|
||||
db: Arc<rocksdb::DB>,
|
||||
name: String,
|
||||
) -> RocksDbResult<(CfTree, Receiver<()>)>
|
||||
{
|
||||
let mut options = rocksdb::Options::default();
|
||||
options.create_missing_column_families(true);
|
||||
|
||||
let _cf = db.create_cf(&name, &options)?;
|
||||
let index = Arc::new(CfTreeInner { db, name });
|
||||
let (sender, receiver) = unbounded();
|
||||
|
||||
Ok((CfTree { index, sender: Some(sender) }, receiver))
|
||||
}
|
||||
|
||||
pub fn insert<K, V>(&self, key: K, value: V) -> RocksDbResult<()>
|
||||
where K: AsRef<[u8]>,
|
||||
V: AsRef<[u8]>,
|
||||
{
|
||||
let cf = self.index.db.cf_handle(&self.index.name).unwrap();
|
||||
let result = self.index.db.put_cf(cf, key, value);
|
||||
|
||||
if let Some(sender) = &self.sender {
|
||||
let _err = sender.send(());
|
||||
}
|
||||
|
||||
result
|
||||
}
|
||||
|
||||
pub fn get<K>(&self, key: K) -> RocksDbResult<Option<DBVector>>
|
||||
where K: AsRef<[u8]>,
|
||||
{
|
||||
let cf = self.index.db.cf_handle(&self.index.name).unwrap();
|
||||
self.index.db.get_cf(cf, key)
|
||||
}
|
||||
|
||||
pub fn remove<K>(&self, key: K) -> RocksDbResult<()>
|
||||
where K: AsRef<[u8]>
|
||||
{
|
||||
let cf = self.index.db.cf_handle(&self.index.name).unwrap();
|
||||
self.index.db.delete_cf(cf, key)
|
||||
}
|
||||
|
||||
/// Start and end key range is inclusive on both bounds.
|
||||
pub fn range<KS, KE>(&self, start: KS, end: KE) -> RocksDbResult<CfIter>
|
||||
where KS: AsRef<[u8]>,
|
||||
KE: AsRef<[u8]>,
|
||||
{
|
||||
let cf = self.index.db.cf_handle(&self.index.name).unwrap();
|
||||
|
||||
let mut iter = self.index.db.iterator_cf(cf, IteratorMode::Start)?;
|
||||
iter.set_mode(IteratorMode::From(start.as_ref(), Direction::Forward));
|
||||
|
||||
let end_bound = Box::from(end.as_ref());
|
||||
Ok(CfIter { iter, end_bound: Some(end_bound) })
|
||||
}
|
||||
|
||||
pub fn iter(&self) -> RocksDbResult<CfIter> {
|
||||
let cf = self.index.db.cf_handle(&self.index.name).unwrap();
|
||||
let iter = self.index.db.iterator_cf(cf, IteratorMode::Start)?;
|
||||
Ok(CfIter { iter, end_bound: None })
|
||||
}
|
||||
|
||||
pub fn last_key(&self) -> RocksDbResult<Option<Box<[u8]>>> {
|
||||
let cf = self.index.db.cf_handle(&self.index.name).unwrap();
|
||||
let mut iter = self.index.db.iterator_cf(cf, IteratorMode::End)?;
|
||||
Ok(iter.next().map(|(key, _)| key))
|
||||
}
|
||||
}
|
||||
|
||||
pub struct CfIter<'a> {
|
||||
iter: rocksdb::DBIterator<'a>,
|
||||
end_bound: Option<Box<[u8]>>,
|
||||
}
|
||||
|
||||
impl Iterator for CfIter<'_> {
|
||||
type Item = (Box<[u8]>, Box<[u8]>);
|
||||
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
match (self.iter.next(), &self.end_bound) {
|
||||
(Some((ref key, _)), Some(end_bound)) if key > end_bound => None,
|
||||
(Some(entry), _) => Some(entry),
|
||||
(None, _) => None,
|
||||
}
|
||||
}
|
||||
}
|
@ -1,73 +0,0 @@
|
||||
use std::{error, fmt};
|
||||
use crate::serde::SerializerError;
|
||||
|
||||
#[derive(Debug)]
|
||||
pub enum Error {
|
||||
SchemaDiffer,
|
||||
SchemaMissing,
|
||||
WordIndexMissing,
|
||||
MissingDocumentId,
|
||||
RocksDbError(rocksdb::Error),
|
||||
FstError(fst::Error),
|
||||
RmpDecodeError(rmp_serde::decode::Error),
|
||||
RmpEncodeError(rmp_serde::encode::Error),
|
||||
BincodeError(bincode::Error),
|
||||
SerializerError(SerializerError),
|
||||
}
|
||||
|
||||
impl From<rocksdb::Error> for Error {
|
||||
fn from(error: rocksdb::Error) -> Error {
|
||||
Error::RocksDbError(error)
|
||||
}
|
||||
}
|
||||
|
||||
impl From<fst::Error> for Error {
|
||||
fn from(error: fst::Error) -> Error {
|
||||
Error::FstError(error)
|
||||
}
|
||||
}
|
||||
|
||||
impl From<rmp_serde::decode::Error> for Error {
|
||||
fn from(error: rmp_serde::decode::Error) -> Error {
|
||||
Error::RmpDecodeError(error)
|
||||
}
|
||||
}
|
||||
|
||||
impl From<rmp_serde::encode::Error> for Error {
|
||||
fn from(error: rmp_serde::encode::Error) -> Error {
|
||||
Error::RmpEncodeError(error)
|
||||
}
|
||||
}
|
||||
|
||||
impl From<bincode::Error> for Error {
|
||||
fn from(error: bincode::Error) -> Error {
|
||||
Error::BincodeError(error)
|
||||
}
|
||||
}
|
||||
|
||||
impl From<SerializerError> for Error {
|
||||
fn from(error: SerializerError) -> Error {
|
||||
Error::SerializerError(error)
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Display for Error {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
use self::Error::*;
|
||||
match self {
|
||||
SchemaDiffer => write!(f, "schemas differ"),
|
||||
SchemaMissing => write!(f, "this index does not have a schema"),
|
||||
WordIndexMissing => write!(f, "this index does not have a word index"),
|
||||
MissingDocumentId => write!(f, "document id is missing"),
|
||||
RocksDbError(e) => write!(f, "RocksDB error; {}", e),
|
||||
FstError(e) => write!(f, "fst error; {}", e),
|
||||
RmpDecodeError(e) => write!(f, "rmp decode error; {}", e),
|
||||
RmpEncodeError(e) => write!(f, "rmp encode error; {}", e),
|
||||
BincodeError(e) => write!(f, "bincode error; {}", e),
|
||||
SerializerError(e) => write!(f, "serializer error; {}", e),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl error::Error for Error { }
|
||||
|
@ -1,12 +0,0 @@
|
||||
use std::ops::Deref;
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct CustomSettingsIndex(pub(crate) crate::CfTree);
|
||||
|
||||
impl Deref for CustomSettingsIndex {
|
||||
type Target = crate::CfTree;
|
||||
|
||||
fn deref(&self) -> &Self::Target {
|
||||
&self.0
|
||||
}
|
||||
}
|
@ -1,33 +0,0 @@
|
||||
use std::sync::Arc;
|
||||
use meilidb_core::DocumentId;
|
||||
use crate::database::Error;
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct DocsWordsIndex(pub crate::CfTree);
|
||||
|
||||
impl DocsWordsIndex {
|
||||
pub fn doc_words(&self, id: DocumentId) -> Result<Option<fst::Set>, Error> {
|
||||
let key = id.0.to_be_bytes();
|
||||
match self.0.get(key)? {
|
||||
Some(bytes) => {
|
||||
let len = bytes.len();
|
||||
let value = Arc::from(bytes.as_ref());
|
||||
let fst = fst::raw::Fst::from_shared_bytes(value, 0, len)?;
|
||||
Ok(Some(fst::Set::from(fst)))
|
||||
},
|
||||
None => Ok(None)
|
||||
}
|
||||
}
|
||||
|
||||
pub fn set_doc_words(&self, id: DocumentId, words: &fst::Set) -> Result<(), Error> {
|
||||
let key = id.0.to_be_bytes();
|
||||
self.0.insert(key, words.as_fst().as_bytes())?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn del_doc_words(&self, id: DocumentId) -> Result<(), Error> {
|
||||
let key = id.0.to_be_bytes();
|
||||
self.0.remove(key)?;
|
||||
Ok(())
|
||||
}
|
||||
}
|
@ -1,90 +0,0 @@
|
||||
use std::convert::TryInto;
|
||||
|
||||
use meilidb_core::DocumentId;
|
||||
use meilidb_schema::SchemaAttr;
|
||||
use rocksdb::DBVector;
|
||||
|
||||
use crate::document_attr_key::DocumentAttrKey;
|
||||
use crate::RocksDbResult;
|
||||
|
||||
fn document_fields_range(id: DocumentId) -> ([u8; 10], [u8; 10]) {
|
||||
let start = DocumentAttrKey::new(id, SchemaAttr::min()).to_be_bytes();
|
||||
let end = DocumentAttrKey::new(id, SchemaAttr::max()).to_be_bytes();
|
||||
|
||||
(start, end)
|
||||
}
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct DocumentsIndex(pub(crate) crate::CfTree);
|
||||
|
||||
impl DocumentsIndex {
|
||||
pub fn document_field(&self, id: DocumentId, attr: SchemaAttr) -> RocksDbResult<Option<DBVector>> {
|
||||
let key = DocumentAttrKey::new(id, attr).to_be_bytes();
|
||||
self.0.get(key)
|
||||
}
|
||||
|
||||
pub fn set_document_field(&self, id: DocumentId, attr: SchemaAttr, value: Vec<u8>) -> RocksDbResult<()> {
|
||||
let key = DocumentAttrKey::new(id, attr).to_be_bytes();
|
||||
self.0.insert(key, value)?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn del_document_field(&self, id: DocumentId, attr: SchemaAttr) -> RocksDbResult<()> {
|
||||
let key = DocumentAttrKey::new(id, attr).to_be_bytes();
|
||||
self.0.remove(key)?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn del_all_document_fields(&self, id: DocumentId) -> RocksDbResult<usize> {
|
||||
let (start, end) = document_fields_range(id);
|
||||
|
||||
let mut count = 0;
|
||||
for (key, _) in self.0.range(start, end)? {
|
||||
self.0.remove(key)?;
|
||||
count += 1;
|
||||
}
|
||||
|
||||
Ok(count)
|
||||
}
|
||||
|
||||
pub fn document_fields(&self, id: DocumentId) -> RocksDbResult<DocumentFieldsIter> {
|
||||
let (start, end) = document_fields_range(id);
|
||||
|
||||
let iter = self.0.range(start, end)?;
|
||||
Ok(DocumentFieldsIter(iter))
|
||||
}
|
||||
|
||||
pub fn len(&self) -> RocksDbResult<u64> {
|
||||
let mut last_document_id = None;
|
||||
let mut count = 0;
|
||||
|
||||
for (key, _) in self.0.iter()? {
|
||||
let array = key.as_ref().try_into().unwrap();
|
||||
let document_id = DocumentAttrKey::from_be_bytes(array).document_id;
|
||||
|
||||
if Some(document_id) != last_document_id {
|
||||
last_document_id = Some(document_id);
|
||||
count += 1;
|
||||
}
|
||||
}
|
||||
|
||||
Ok(count)
|
||||
}
|
||||
}
|
||||
|
||||
pub struct DocumentFieldsIter<'a>(crate::CfIter<'a>);
|
||||
|
||||
impl Iterator for DocumentFieldsIter<'_> {
|
||||
type Item = (SchemaAttr, Box<[u8]>);
|
||||
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
match self.0.next() {
|
||||
Some((key, value)) => {
|
||||
let array = key.as_ref().try_into().unwrap();
|
||||
let key = DocumentAttrKey::from_be_bytes(array);
|
||||
Some((key.attribute, value))
|
||||
},
|
||||
None => None,
|
||||
}
|
||||
}
|
||||
}
|
@ -1,102 +0,0 @@
|
||||
use std::sync::Arc;
|
||||
use std::convert::TryInto;
|
||||
|
||||
use meilidb_schema::Schema;
|
||||
|
||||
use crate::ranked_map::RankedMap;
|
||||
use crate::database::Error;
|
||||
|
||||
const SCHEMA_KEY: &str = "schema";
|
||||
const WORDS_KEY: &str = "words";
|
||||
const SYNONYMS_KEY: &str = "synonyms";
|
||||
const RANKED_MAP_KEY: &str = "ranked-map";
|
||||
const NUMBER_OF_DOCUMENTS_KEY: &str = "number-of-documents";
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct MainIndex(pub(crate) crate::CfTree);
|
||||
|
||||
impl MainIndex {
|
||||
pub fn schema(&self) -> Result<Option<Schema>, Error> {
|
||||
match self.0.get(SCHEMA_KEY)? {
|
||||
Some(bytes) => {
|
||||
let schema = Schema::read_from_bin(bytes.as_ref())?;
|
||||
Ok(Some(schema))
|
||||
},
|
||||
None => Ok(None),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn set_schema(&self, schema: &Schema) -> Result<(), Error> {
|
||||
let mut bytes = Vec::new();
|
||||
schema.write_to_bin(&mut bytes)?;
|
||||
self.0.insert(SCHEMA_KEY, bytes)?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn words_set(&self) -> Result<Option<fst::Set>, Error> {
|
||||
match self.0.get(WORDS_KEY)? {
|
||||
Some(bytes) => {
|
||||
let len = bytes.len();
|
||||
let value = Arc::from(bytes.as_ref());
|
||||
let fst = fst::raw::Fst::from_shared_bytes(value, 0, len)?;
|
||||
Ok(Some(fst::Set::from(fst)))
|
||||
},
|
||||
None => Ok(None),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn set_words_set(&self, value: &fst::Set) -> Result<(), Error> {
|
||||
self.0.insert(WORDS_KEY, value.as_fst().as_bytes()).map(drop).map_err(Into::into)
|
||||
}
|
||||
|
||||
pub fn synonyms_set(&self) -> Result<Option<fst::Set>, Error> {
|
||||
match self.0.get(SYNONYMS_KEY)? {
|
||||
Some(bytes) => {
|
||||
let len = bytes.len();
|
||||
let value = Arc::from(bytes.as_ref());
|
||||
let fst = fst::raw::Fst::from_shared_bytes(value, 0, len)?;
|
||||
Ok(Some(fst::Set::from(fst)))
|
||||
},
|
||||
None => Ok(None),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn set_synonyms_set(&self, value: &fst::Set) -> Result<(), Error> {
|
||||
self.0.insert(SYNONYMS_KEY, value.as_fst().as_bytes()).map(drop).map_err(Into::into)
|
||||
}
|
||||
|
||||
pub fn ranked_map(&self) -> Result<Option<RankedMap>, Error> {
|
||||
match self.0.get(RANKED_MAP_KEY)? {
|
||||
Some(bytes) => {
|
||||
let ranked_map = RankedMap::read_from_bin(bytes.as_ref())?;
|
||||
Ok(Some(ranked_map))
|
||||
},
|
||||
None => Ok(None),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn set_ranked_map(&self, value: &RankedMap) -> Result<(), Error> {
|
||||
let mut bytes = Vec::new();
|
||||
value.write_to_bin(&mut bytes)?;
|
||||
self.0.insert(RANKED_MAP_KEY, bytes)?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn number_of_documents(&self) -> Result<u64, Error> {
|
||||
match self.0.get(NUMBER_OF_DOCUMENTS_KEY)? {
|
||||
Some(bytes) => {
|
||||
let array = (*bytes).try_into().unwrap();
|
||||
Ok(u64::from_be_bytes(array))
|
||||
},
|
||||
None => Ok(0),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn set_number_of_documents<F>(&self, f: F) -> Result<u64, Error>
|
||||
where F: FnOnce(u64) -> u64,
|
||||
{
|
||||
let new = self.number_of_documents().map(f)?;
|
||||
self.0.insert(NUMBER_OF_DOCUMENTS_KEY, new.to_be_bytes())?;
|
||||
Ok(new)
|
||||
}
|
||||
}
|
@ -1,487 +0,0 @@
|
||||
use std::collections::{HashSet, BTreeMap};
|
||||
use std::convert::TryInto;
|
||||
use std::sync::Arc;
|
||||
use std::sync::atomic::{AtomicU64, Ordering};
|
||||
use std::thread;
|
||||
use std::time::{Duration, Instant};
|
||||
|
||||
use arc_swap::{ArcSwap, ArcSwapOption, Guard};
|
||||
use crossbeam_channel::Receiver;
|
||||
use meilidb_core::criterion::Criteria;
|
||||
use meilidb_core::{DocIndex, Store, DocumentId, QueryBuilder};
|
||||
use meilidb_schema::Schema;
|
||||
use sdset::SetBuf;
|
||||
use serde::{de, Serialize, Deserialize};
|
||||
|
||||
use crate::CfTree;
|
||||
use crate::ranked_map::RankedMap;
|
||||
use crate::serde::{Deserializer, DeserializerError};
|
||||
|
||||
pub use self::custom_settings_index::CustomSettingsIndex;
|
||||
use self::docs_words_index::DocsWordsIndex;
|
||||
use self::documents_index::DocumentsIndex;
|
||||
use self::main_index::MainIndex;
|
||||
use self::synonyms_index::SynonymsIndex;
|
||||
use self::words_index::WordsIndex;
|
||||
|
||||
use crate::RocksDbResult;
|
||||
use crate::database::{
|
||||
Error,
|
||||
DocumentsAddition, DocumentsDeletion,
|
||||
SynonymsAddition, SynonymsDeletion,
|
||||
apply_documents_addition, apply_documents_deletion,
|
||||
apply_synonyms_addition, apply_synonyms_deletion,
|
||||
};
|
||||
|
||||
mod custom_settings_index;
|
||||
mod docs_words_index;
|
||||
mod documents_index;
|
||||
mod main_index;
|
||||
mod synonyms_index;
|
||||
mod words_index;
|
||||
|
||||
#[derive(Deserialize)]
|
||||
enum UpdateOwned {
|
||||
DocumentsAddition(Vec<rmpv::Value>),
|
||||
DocumentsDeletion(Vec<DocumentId>),
|
||||
SynonymsAddition(BTreeMap<String, Vec<String>>),
|
||||
SynonymsDeletion(BTreeMap<String, Option<Vec<String>>>),
|
||||
}
|
||||
|
||||
#[derive(Serialize)]
|
||||
enum Update {
|
||||
DocumentsAddition(Vec<rmpv::Value>),
|
||||
DocumentsDeletion(Vec<DocumentId>),
|
||||
SynonymsAddition(BTreeMap<String, Vec<String>>),
|
||||
SynonymsDeletion(BTreeMap<String, Option<Vec<String>>>),
|
||||
}
|
||||
|
||||
#[derive(Clone, Serialize, Deserialize)]
|
||||
pub enum UpdateType {
|
||||
DocumentsAddition { number: usize },
|
||||
DocumentsDeletion { number: usize },
|
||||
SynonymsAddition { number: usize },
|
||||
SynonymsDeletion { number: usize },
|
||||
}
|
||||
|
||||
#[derive(Clone, Serialize, Deserialize)]
|
||||
pub struct DetailedDuration {
|
||||
main: Duration,
|
||||
}
|
||||
|
||||
#[derive(Clone, Serialize, Deserialize)]
|
||||
pub struct UpdateStatus {
|
||||
pub update_id: u64,
|
||||
pub update_type: UpdateType,
|
||||
pub result: Result<(), String>,
|
||||
pub detailed_duration: DetailedDuration,
|
||||
}
|
||||
|
||||
fn spawn_update_system(index: Index, subscription: Receiver<()>) -> thread::JoinHandle<()> {
|
||||
thread::spawn(move || {
|
||||
let mut subscription = subscription.into_iter();
|
||||
|
||||
loop {
|
||||
while let Some((key, _)) = index.updates_index.iter().unwrap().next() {
|
||||
let update_id = key.as_ref().try_into().map(u64::from_be_bytes).unwrap();
|
||||
|
||||
let updates = &index.updates_index;
|
||||
let results = &index.updates_results_index;
|
||||
|
||||
let update = updates.get(&key).unwrap().unwrap();
|
||||
|
||||
let (update_type, result, duration) = match rmp_serde::from_read_ref(&update).unwrap() {
|
||||
UpdateOwned::DocumentsAddition(documents) => {
|
||||
let update_type = UpdateType::DocumentsAddition { number: documents.len() };
|
||||
let ranked_map = index.cache.load().ranked_map.clone();
|
||||
let start = Instant::now();
|
||||
let result = apply_documents_addition(&index, ranked_map, documents);
|
||||
(update_type, result, start.elapsed())
|
||||
},
|
||||
UpdateOwned::DocumentsDeletion(documents) => {
|
||||
let update_type = UpdateType::DocumentsDeletion { number: documents.len() };
|
||||
let ranked_map = index.cache.load().ranked_map.clone();
|
||||
let start = Instant::now();
|
||||
let result = apply_documents_deletion(&index, ranked_map, documents);
|
||||
(update_type, result, start.elapsed())
|
||||
},
|
||||
UpdateOwned::SynonymsAddition(synonyms) => {
|
||||
let update_type = UpdateType::SynonymsAddition { number: synonyms.len() };
|
||||
let start = Instant::now();
|
||||
let result = apply_synonyms_addition(&index, synonyms);
|
||||
(update_type, result, start.elapsed())
|
||||
},
|
||||
UpdateOwned::SynonymsDeletion(synonyms) => {
|
||||
let update_type = UpdateType::SynonymsDeletion { number: synonyms.len() };
|
||||
let start = Instant::now();
|
||||
let result = apply_synonyms_deletion(&index, synonyms);
|
||||
(update_type, result, start.elapsed())
|
||||
},
|
||||
};
|
||||
|
||||
let detailed_duration = DetailedDuration { main: duration };
|
||||
let status = UpdateStatus {
|
||||
update_id,
|
||||
update_type,
|
||||
result: result.map_err(|e| e.to_string()),
|
||||
detailed_duration,
|
||||
};
|
||||
|
||||
if let Some(callback) = &*index.update_callback.load() {
|
||||
(callback)(status.clone());
|
||||
}
|
||||
|
||||
let value = bincode::serialize(&status).unwrap();
|
||||
results.insert(&key, value).unwrap();
|
||||
updates.remove(&key).unwrap();
|
||||
}
|
||||
|
||||
// this subscription is just used to block
|
||||
// the loop until a new update is inserted
|
||||
subscription.next();
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
fn last_update_id(
|
||||
update_index: &crate::CfTree,
|
||||
update_results_index: &crate::CfTree,
|
||||
) -> RocksDbResult<u64>
|
||||
{
|
||||
let uikey = match update_index.last_key()? {
|
||||
Some(key) => Some(key.as_ref().try_into().map(u64::from_be_bytes).unwrap()),
|
||||
None => None,
|
||||
};
|
||||
|
||||
let urikey = match update_results_index.last_key()? {
|
||||
Some(key) => Some(key.as_ref().try_into().map(u64::from_be_bytes).unwrap()),
|
||||
None => None,
|
||||
};
|
||||
|
||||
Ok(uikey.max(urikey).unwrap_or(0))
|
||||
}
|
||||
|
||||
#[derive(Copy, Clone)]
|
||||
pub struct IndexStats {
|
||||
pub number_of_words: usize,
|
||||
pub number_of_documents: u64,
|
||||
pub number_attrs_in_ranked_map: usize,
|
||||
}
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct Index {
|
||||
pub(crate) cache: Arc<ArcSwap<Cache>>,
|
||||
|
||||
// TODO this will be a snapshot in the future
|
||||
main_index: MainIndex,
|
||||
synonyms_index: SynonymsIndex,
|
||||
words_index: WordsIndex,
|
||||
docs_words_index: DocsWordsIndex,
|
||||
documents_index: DocumentsIndex,
|
||||
custom_settings_index: CustomSettingsIndex,
|
||||
|
||||
// used by the update system
|
||||
updates_id: Arc<AtomicU64>,
|
||||
updates_index: crate::CfTree,
|
||||
updates_results_index: crate::CfTree,
|
||||
update_callback: Arc<ArcSwapOption<Box<dyn Fn(UpdateStatus) + Send + Sync + 'static>>>,
|
||||
}
|
||||
|
||||
pub(crate) struct Cache {
|
||||
pub words: Arc<fst::Set>,
|
||||
pub synonyms: Arc<fst::Set>,
|
||||
pub schema: Schema,
|
||||
pub ranked_map: RankedMap,
|
||||
pub number_of_documents: u64,
|
||||
}
|
||||
|
||||
impl Index {
|
||||
pub fn new(db: Arc<rocksdb::DB>, name: &str) -> Result<Index, Error> {
|
||||
Index::new_raw(db, name, None)
|
||||
}
|
||||
|
||||
pub fn with_schema(db: Arc<rocksdb::DB>, name: &str, schema: Schema) -> Result<Index, Error> {
|
||||
Index::new_raw(db, name, Some(schema))
|
||||
}
|
||||
|
||||
fn new_raw(db: Arc<rocksdb::DB>, name: &str, schema: Option<Schema>) -> Result<Index, Error> {
|
||||
let main_index = CfTree::create(db.clone(), name.to_string()).map(MainIndex)?;
|
||||
let synonyms_index = CfTree::create(db.clone(), format!("{}-synonyms", name)).map(SynonymsIndex)?;
|
||||
let words_index = CfTree::create(db.clone(), format!("{}-words", name)).map(WordsIndex)?;
|
||||
let docs_words_index = CfTree::create(db.clone(), format!("{}-docs-words", name)).map(DocsWordsIndex)?;
|
||||
let documents_index = CfTree::create(db.clone(), format!("{}-documents", name)).map(DocumentsIndex)?;
|
||||
let custom_settings_index = CfTree::create(db.clone(), format!("{}-custom", name)).map(CustomSettingsIndex)?;
|
||||
let (updates_index, subscription) = CfTree::create_with_subcription(db.clone(), format!("{}-updates", name))?;
|
||||
let updates_results_index = CfTree::create(db.clone(), format!("{}-updates-results", name))?;
|
||||
|
||||
let words = match main_index.words_set()? {
|
||||
Some(words) => Arc::new(words),
|
||||
None => Arc::new(fst::Set::default()),
|
||||
};
|
||||
|
||||
let synonyms = match main_index.synonyms_set()? {
|
||||
Some(synonyms) => Arc::new(synonyms),
|
||||
None => Arc::new(fst::Set::default()),
|
||||
};
|
||||
|
||||
let schema = match (schema, main_index.schema()?) {
|
||||
(Some(ref expected), Some(ref current)) if current != expected => {
|
||||
return Err(Error::SchemaDiffer)
|
||||
},
|
||||
(Some(expected), Some(_)) => expected,
|
||||
(Some(expected), None) => {
|
||||
main_index.set_schema(&expected)?;
|
||||
expected
|
||||
},
|
||||
(None, Some(current)) => current,
|
||||
(None, None) => return Err(Error::SchemaMissing),
|
||||
};
|
||||
|
||||
let ranked_map = match main_index.ranked_map()? {
|
||||
Some(map) => map,
|
||||
None => RankedMap::default(),
|
||||
};
|
||||
|
||||
let number_of_documents = documents_index.len()?;
|
||||
|
||||
let cache = Cache { words, synonyms, schema, ranked_map, number_of_documents };
|
||||
let cache = Arc::new(ArcSwap::from_pointee(cache));
|
||||
|
||||
let last_update_id = last_update_id(&updates_index, &updates_results_index)?;
|
||||
let updates_id = Arc::new(AtomicU64::new(last_update_id + 1));
|
||||
|
||||
let index = Index {
|
||||
cache,
|
||||
main_index,
|
||||
synonyms_index,
|
||||
words_index,
|
||||
docs_words_index,
|
||||
documents_index,
|
||||
custom_settings_index,
|
||||
updates_id,
|
||||
updates_index,
|
||||
updates_results_index,
|
||||
update_callback: Arc::new(ArcSwapOption::empty()),
|
||||
};
|
||||
|
||||
let _handle = spawn_update_system(index.clone(), subscription);
|
||||
|
||||
Ok(index)
|
||||
}
|
||||
|
||||
pub fn set_update_callback<F>(&self, callback: F)
|
||||
where F: Fn(UpdateStatus) + Send + Sync + 'static
|
||||
{
|
||||
self.update_callback.store(Some(Arc::new(Box::new(callback))));
|
||||
}
|
||||
|
||||
pub fn unset_update_callback(&self) {
|
||||
self.update_callback.store(None);
|
||||
}
|
||||
|
||||
pub fn stats(&self) -> RocksDbResult<IndexStats> {
|
||||
let cache = self.cache.load();
|
||||
Ok(IndexStats {
|
||||
number_of_words: cache.words.len(),
|
||||
number_of_documents: cache.number_of_documents,
|
||||
number_attrs_in_ranked_map: cache.ranked_map.len(),
|
||||
})
|
||||
}
|
||||
|
||||
pub fn query_builder(&self) -> QueryBuilder<RefIndex> {
|
||||
let ref_index = self.as_ref();
|
||||
QueryBuilder::new(ref_index)
|
||||
}
|
||||
|
||||
pub fn query_builder_with_criteria<'c>(
|
||||
&self,
|
||||
criteria: Criteria<'c>,
|
||||
) -> QueryBuilder<'c, RefIndex>
|
||||
{
|
||||
let ref_index = self.as_ref();
|
||||
QueryBuilder::with_criteria(ref_index, criteria)
|
||||
}
|
||||
|
||||
pub fn as_ref(&self) -> RefIndex {
|
||||
RefIndex {
|
||||
cache: self.cache.load(),
|
||||
main_index: &self.main_index,
|
||||
synonyms_index: &self.synonyms_index,
|
||||
words_index: &self.words_index,
|
||||
docs_words_index: &self.docs_words_index,
|
||||
documents_index: &self.documents_index,
|
||||
custom_settings_index: &self.custom_settings_index,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn schema(&self) -> Schema {
|
||||
self.cache.load().schema.clone()
|
||||
}
|
||||
|
||||
pub fn custom_settings(&self) -> CustomSettingsIndex {
|
||||
self.custom_settings_index.clone()
|
||||
}
|
||||
|
||||
pub fn number_of_documents(&self) -> u64 {
|
||||
self.cache.load().number_of_documents
|
||||
}
|
||||
|
||||
pub fn documents_addition<D>(&self) -> DocumentsAddition<D> {
|
||||
DocumentsAddition::new(self)
|
||||
}
|
||||
|
||||
pub fn documents_deletion(&self) -> DocumentsDeletion {
|
||||
DocumentsDeletion::new(self)
|
||||
}
|
||||
|
||||
pub fn synonyms_addition(&self) -> SynonymsAddition {
|
||||
SynonymsAddition::new(self)
|
||||
}
|
||||
|
||||
pub fn synonyms_deletion(&self) -> SynonymsDeletion {
|
||||
SynonymsDeletion::new(self)
|
||||
}
|
||||
|
||||
pub fn update_status(
|
||||
&self,
|
||||
update_id: u64,
|
||||
) -> Result<Option<UpdateStatus>, Error>
|
||||
{
|
||||
let update_id = update_id.to_be_bytes();
|
||||
match self.updates_results_index.get(update_id)? {
|
||||
Some(value) => {
|
||||
let value = bincode::deserialize(&value)?;
|
||||
Ok(Some(value))
|
||||
},
|
||||
None => Ok(None),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn update_status_blocking(
|
||||
&self,
|
||||
update_id: u64,
|
||||
) -> Result<UpdateStatus, Error>
|
||||
{
|
||||
// if we find the update result return it now
|
||||
if let Some(result) = self.update_status(update_id)? {
|
||||
return Ok(result)
|
||||
}
|
||||
|
||||
loop {
|
||||
if self.updates_results_index.get(&update_id.to_be_bytes())?.is_some() { break }
|
||||
std::thread::sleep(Duration::from_millis(300));
|
||||
}
|
||||
|
||||
// the thread has been unblocked, it means that the update result
|
||||
// has been inserted in the tree, retrieve it
|
||||
Ok(self.update_status(update_id)?.unwrap())
|
||||
}
|
||||
|
||||
pub fn document<T>(
|
||||
&self,
|
||||
fields: Option<&HashSet<&str>>,
|
||||
id: DocumentId,
|
||||
) -> Result<Option<T>, DeserializerError>
|
||||
where T: de::DeserializeOwned,
|
||||
{
|
||||
let schema = self.schema();
|
||||
let fields = match fields {
|
||||
Some(fields) => fields.into_iter().map(|name| schema.attribute(name)).collect(),
|
||||
None => None,
|
||||
};
|
||||
|
||||
let mut deserializer = Deserializer {
|
||||
document_id: id,
|
||||
index: &self,
|
||||
fields: fields.as_ref(),
|
||||
};
|
||||
|
||||
// TODO: currently we return an error if all document fields are missing,
|
||||
// returning None would have been better
|
||||
T::deserialize(&mut deserializer).map(Some)
|
||||
}
|
||||
}
|
||||
|
||||
impl Index {
|
||||
pub(crate) fn push_documents_addition<D>(&self, addition: Vec<D>) -> Result<u64, Error>
|
||||
where D: serde::Serialize
|
||||
{
|
||||
let mut values = Vec::with_capacity(addition.len());
|
||||
for add in addition {
|
||||
let vec = rmp_serde::to_vec_named(&add)?;
|
||||
let add = rmp_serde::from_read(&vec[..])?;
|
||||
values.push(add);
|
||||
}
|
||||
|
||||
let addition = Update::DocumentsAddition(values);
|
||||
let update = rmp_serde::to_vec_named(&addition)?;
|
||||
self.raw_push_update(update)
|
||||
}
|
||||
|
||||
pub(crate) fn push_documents_deletion(
|
||||
&self,
|
||||
deletion: Vec<DocumentId>,
|
||||
) -> Result<u64, Error>
|
||||
{
|
||||
let deletion = Update::DocumentsDeletion(deletion);
|
||||
let update = rmp_serde::to_vec_named(&deletion)?;
|
||||
self.raw_push_update(update)
|
||||
}
|
||||
|
||||
pub(crate) fn push_synonyms_addition(
|
||||
&self,
|
||||
addition: BTreeMap<String, Vec<String>>,
|
||||
) -> Result<u64, Error>
|
||||
{
|
||||
let addition = Update::SynonymsAddition(addition);
|
||||
let update = rmp_serde::to_vec_named(&addition)?;
|
||||
self.raw_push_update(update)
|
||||
}
|
||||
|
||||
pub(crate) fn push_synonyms_deletion(
|
||||
&self,
|
||||
deletion: BTreeMap<String, Option<Vec<String>>>,
|
||||
) -> Result<u64, Error>
|
||||
{
|
||||
let deletion = Update::SynonymsDeletion(deletion);
|
||||
let update = rmp_serde::to_vec_named(&deletion)?;
|
||||
self.raw_push_update(update)
|
||||
}
|
||||
|
||||
fn raw_push_update(&self, raw_update: Vec<u8>) -> Result<u64, Error> {
|
||||
let update_id = self.updates_id.fetch_add(1, Ordering::SeqCst);
|
||||
let update_id_array = update_id.to_be_bytes();
|
||||
self.updates_index.insert(update_id_array, raw_update)?;
|
||||
Ok(update_id)
|
||||
}
|
||||
}
|
||||
|
||||
pub struct RefIndex<'a> {
|
||||
pub(crate) cache: Guard<'static, Arc<Cache>>,
|
||||
pub main_index: &'a MainIndex,
|
||||
pub synonyms_index: &'a SynonymsIndex,
|
||||
pub words_index: &'a WordsIndex,
|
||||
pub docs_words_index: &'a DocsWordsIndex,
|
||||
pub documents_index: &'a DocumentsIndex,
|
||||
pub custom_settings_index: &'a CustomSettingsIndex,
|
||||
}
|
||||
|
||||
impl Store for RefIndex<'_> {
|
||||
type Error = Error;
|
||||
|
||||
fn words(&self) -> Result<&fst::Set, Self::Error> {
|
||||
Ok(&self.cache.words)
|
||||
}
|
||||
|
||||
fn word_indexes(&self, word: &[u8]) -> Result<Option<SetBuf<DocIndex>>, Self::Error> {
|
||||
Ok(self.words_index.doc_indexes(word)?)
|
||||
}
|
||||
|
||||
fn synonyms(&self) -> Result<&fst::Set, Self::Error> {
|
||||
Ok(&self.cache.synonyms)
|
||||
}
|
||||
|
||||
fn alternatives_to(&self, word: &[u8]) -> Result<Option<fst::Set>, Self::Error> {
|
||||
Ok(self.synonyms_index.alternatives_to(word)?)
|
||||
}
|
||||
}
|
@ -1,21 +0,0 @@
|
||||
use crate::RocksDbResult;
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct SynonymsIndex(pub(crate) crate::CfTree);
|
||||
|
||||
impl SynonymsIndex {
|
||||
pub fn alternatives_to(&self, word: &[u8]) -> RocksDbResult<Option<fst::Set>> {
|
||||
match self.0.get(word)? {
|
||||
Some(vector) => Ok(Some(fst::Set::from_bytes(vector.to_vec()).unwrap())),
|
||||
None => Ok(None),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn set_alternatives_to(&self, word: &[u8], value: Vec<u8>) -> RocksDbResult<()> {
|
||||
self.0.insert(word, value).map(drop)
|
||||
}
|
||||
|
||||
pub fn del_alternatives_of(&self, word: &[u8]) -> RocksDbResult<()> {
|
||||
self.0.remove(word).map(drop)
|
||||
}
|
||||
}
|
@ -1,45 +0,0 @@
|
||||
use meilidb_core::DocIndex;
|
||||
use sdset::{Set, SetBuf};
|
||||
use zerocopy::{LayoutVerified, AsBytes};
|
||||
use crate::RocksDbResult;
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct WordsIndex(pub(crate) crate::CfTree);
|
||||
|
||||
impl WordsIndex {
|
||||
pub fn doc_indexes(&self, word: &[u8]) -> RocksDbResult<Option<SetBuf<DocIndex>>> {
|
||||
// we must force an allocation to make the memory aligned
|
||||
match self.0.get(word)? {
|
||||
Some(bytes) => {
|
||||
let vec = match LayoutVerified::new_slice(bytes.as_ref()) {
|
||||
Some(layout) => layout.into_slice().to_vec(),
|
||||
None => {
|
||||
let len = bytes.as_ref().len();
|
||||
let count = len / std::mem::size_of::<DocIndex>();
|
||||
let mut buf: Vec<DocIndex> = Vec::with_capacity(count);
|
||||
unsafe {
|
||||
let src = bytes.as_ref().as_ptr();
|
||||
let dst = buf.as_mut_ptr() as *mut u8;
|
||||
std::ptr::copy_nonoverlapping(src, dst, len);
|
||||
buf.set_len(count);
|
||||
}
|
||||
buf
|
||||
}
|
||||
};
|
||||
|
||||
let setbuf = SetBuf::new_unchecked(vec);
|
||||
|
||||
Ok(Some(setbuf))
|
||||
},
|
||||
None => Ok(None),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn set_doc_indexes(&self, word: &[u8], set: &Set<DocIndex>) -> RocksDbResult<()> {
|
||||
self.0.insert(word, set.as_bytes()).map(drop)
|
||||
}
|
||||
|
||||
pub fn del_doc_indexes(&self, word: &[u8]) -> RocksDbResult<()> {
|
||||
self.0.remove(word).map(drop)
|
||||
}
|
||||
}
|
@ -1,115 +0,0 @@
|
||||
use std::collections::hash_map::Entry;
|
||||
use std::collections::{HashSet, HashMap};
|
||||
use std::path::Path;
|
||||
use std::sync::Arc;
|
||||
use std::sync::RwLock;
|
||||
use meilidb_schema::Schema;
|
||||
|
||||
mod error;
|
||||
mod index;
|
||||
mod update;
|
||||
|
||||
pub use self::error::Error;
|
||||
pub use self::index::{Index, CustomSettingsIndex};
|
||||
|
||||
pub use self::update::DocumentsAddition;
|
||||
pub use self::update::DocumentsDeletion;
|
||||
pub use self::update::SynonymsAddition;
|
||||
pub use self::update::SynonymsDeletion;
|
||||
|
||||
use self::update::apply_documents_addition;
|
||||
use self::update::apply_documents_deletion;
|
||||
use self::update::apply_synonyms_addition;
|
||||
use self::update::apply_synonyms_deletion;
|
||||
|
||||
const INDEXES_KEY: &str = "indexes";
|
||||
|
||||
fn load_indexes(tree: &rocksdb::DB) -> Result<HashSet<String>, Error> {
|
||||
match tree.get(INDEXES_KEY)? {
|
||||
Some(bytes) => Ok(bincode::deserialize(&bytes)?),
|
||||
None => Ok(HashSet::new())
|
||||
}
|
||||
}
|
||||
|
||||
pub struct Database {
|
||||
cache: RwLock<HashMap<String, Index>>,
|
||||
inner: Arc<rocksdb::DB>,
|
||||
}
|
||||
|
||||
impl Database {
|
||||
pub fn open<P: AsRef<Path>>(path: P) -> Result<Database, Error> {
|
||||
let cache = RwLock::new(HashMap::new());
|
||||
|
||||
let mut options = rocksdb::Options::default();
|
||||
options.create_if_missing(true);
|
||||
|
||||
let cfs = rocksdb::DB::list_cf(&options, &path).unwrap_or_default();
|
||||
let inner = Arc::new(rocksdb::DB::open_cf(&options, path, cfs)?);
|
||||
|
||||
let indexes = load_indexes(&inner)?;
|
||||
let database = Database { cache, inner };
|
||||
|
||||
for index in indexes {
|
||||
database.open_index(&index)?;
|
||||
}
|
||||
|
||||
Ok(database)
|
||||
}
|
||||
|
||||
pub fn indexes(&self) -> Result<HashSet<String>, Error> {
|
||||
load_indexes(&self.inner)
|
||||
}
|
||||
|
||||
fn set_indexes(&self, value: &HashSet<String>) -> Result<(), Error> {
|
||||
let bytes = bincode::serialize(value)?;
|
||||
self.inner.put(INDEXES_KEY, bytes)?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn open_index(&self, name: &str) -> Result<Option<Index>, Error> {
|
||||
{
|
||||
let cache = self.cache.read().unwrap();
|
||||
if let Some(index) = cache.get(name).cloned() {
|
||||
return Ok(Some(index))
|
||||
}
|
||||
}
|
||||
|
||||
let mut cache = self.cache.write().unwrap();
|
||||
let index = match cache.entry(name.to_string()) {
|
||||
Entry::Occupied(occupied) => {
|
||||
occupied.get().clone()
|
||||
},
|
||||
Entry::Vacant(vacant) => {
|
||||
if !self.indexes()?.contains(name) {
|
||||
return Ok(None)
|
||||
}
|
||||
|
||||
let index = Index::new(self.inner.clone(), name)?;
|
||||
vacant.insert(index).clone()
|
||||
},
|
||||
};
|
||||
|
||||
Ok(Some(index))
|
||||
}
|
||||
|
||||
pub fn create_index(&self, name: &str, schema: Schema) -> Result<Index, Error> {
|
||||
let mut cache = self.cache.write().unwrap();
|
||||
|
||||
let index = match cache.entry(name.to_string()) {
|
||||
Entry::Occupied(occupied) => {
|
||||
occupied.get().clone()
|
||||
},
|
||||
Entry::Vacant(vacant) => {
|
||||
let index = Index::with_schema(self.inner.clone(), name, schema)?;
|
||||
|
||||
let mut indexes = self.indexes()?;
|
||||
indexes.insert(name.to_string());
|
||||
self.set_indexes(&indexes)?;
|
||||
|
||||
vacant.insert(index).clone()
|
||||
},
|
||||
};
|
||||
|
||||
Ok(index)
|
||||
}
|
||||
}
|
@ -1,139 +0,0 @@
|
||||
use std::collections::HashSet;
|
||||
use std::sync::Arc;
|
||||
|
||||
use fst::{SetBuilder, set::OpBuilder};
|
||||
use sdset::{SetOperation, duo::Union};
|
||||
use serde::Serialize;
|
||||
|
||||
use crate::RankedMap;
|
||||
use crate::database::{Error, Index, index::Cache, apply_documents_deletion};
|
||||
use crate::indexer::Indexer;
|
||||
use crate::serde::{extract_document_id, Serializer, RamDocumentStore};
|
||||
|
||||
pub struct DocumentsAddition<'a, D> {
|
||||
index: &'a Index,
|
||||
documents: Vec<D>,
|
||||
}
|
||||
|
||||
impl<'a, D> DocumentsAddition<'a, D> {
|
||||
pub fn new(index: &'a Index) -> DocumentsAddition<'a, D> {
|
||||
DocumentsAddition { index, documents: Vec::new() }
|
||||
}
|
||||
|
||||
pub fn update_document(&mut self, document: D) {
|
||||
self.documents.push(document);
|
||||
}
|
||||
|
||||
pub fn finalize(self) -> Result<u64, Error>
|
||||
where D: serde::Serialize
|
||||
{
|
||||
self.index.push_documents_addition(self.documents)
|
||||
}
|
||||
}
|
||||
|
||||
pub fn apply_documents_addition(
|
||||
index: &Index,
|
||||
mut ranked_map: RankedMap,
|
||||
addition: Vec<rmpv::Value>,
|
||||
) -> Result<(), Error>
|
||||
{
|
||||
let mut document_ids = HashSet::new();
|
||||
let mut document_store = RamDocumentStore::new();
|
||||
let mut indexer = Indexer::new();
|
||||
|
||||
let schema = &index.schema();
|
||||
let identifier = schema.identifier_name();
|
||||
|
||||
for document in addition {
|
||||
let document_id = match extract_document_id(identifier, &document)? {
|
||||
Some(id) => id,
|
||||
None => return Err(Error::MissingDocumentId),
|
||||
};
|
||||
|
||||
// 1. store the document id for future deletion
|
||||
document_ids.insert(document_id);
|
||||
|
||||
// 2. index the document fields in ram stores
|
||||
let serializer = Serializer {
|
||||
schema,
|
||||
document_store: &mut document_store,
|
||||
indexer: &mut indexer,
|
||||
ranked_map: &mut ranked_map,
|
||||
document_id,
|
||||
};
|
||||
|
||||
document.serialize(serializer)?;
|
||||
}
|
||||
|
||||
let ref_index = index.as_ref();
|
||||
let docs_words = ref_index.docs_words_index;
|
||||
let documents = ref_index.documents_index;
|
||||
let main = ref_index.main_index;
|
||||
let words = ref_index.words_index;
|
||||
|
||||
// 1. remove the previous documents match indexes
|
||||
let documents_to_insert = document_ids.iter().cloned().collect();
|
||||
apply_documents_deletion(index, ranked_map.clone(), documents_to_insert)?;
|
||||
|
||||
// 2. insert new document attributes in the database
|
||||
for ((id, attr), value) in document_store.into_inner() {
|
||||
documents.set_document_field(id, attr, value)?;
|
||||
}
|
||||
|
||||
let indexed = indexer.build();
|
||||
let mut delta_words_builder = SetBuilder::memory();
|
||||
|
||||
for (word, delta_set) in indexed.words_doc_indexes {
|
||||
delta_words_builder.insert(&word).unwrap();
|
||||
|
||||
let set = match words.doc_indexes(&word)? {
|
||||
Some(set) => Union::new(&set, &delta_set).into_set_buf(),
|
||||
None => delta_set,
|
||||
};
|
||||
|
||||
words.set_doc_indexes(&word, &set)?;
|
||||
}
|
||||
|
||||
for (id, words) in indexed.docs_words {
|
||||
docs_words.set_doc_words(id, &words)?;
|
||||
}
|
||||
|
||||
let delta_words = delta_words_builder
|
||||
.into_inner()
|
||||
.and_then(fst::Set::from_bytes)
|
||||
.unwrap();
|
||||
|
||||
let words = match main.words_set()? {
|
||||
Some(words) => {
|
||||
let op = OpBuilder::new()
|
||||
.add(words.stream())
|
||||
.add(delta_words.stream())
|
||||
.r#union();
|
||||
|
||||
let mut words_builder = SetBuilder::memory();
|
||||
words_builder.extend_stream(op).unwrap();
|
||||
words_builder
|
||||
.into_inner()
|
||||
.and_then(fst::Set::from_bytes)
|
||||
.unwrap()
|
||||
},
|
||||
None => delta_words,
|
||||
};
|
||||
|
||||
main.set_words_set(&words)?;
|
||||
main.set_ranked_map(&ranked_map)?;
|
||||
|
||||
let inserted_documents_len = document_ids.len() as u64;
|
||||
let number_of_documents = main.set_number_of_documents(|old| old + inserted_documents_len)?;
|
||||
|
||||
// update the "consistent" view of the Index
|
||||
let cache = ref_index.cache;
|
||||
let words = Arc::new(words);
|
||||
let synonyms = cache.synonyms.clone();
|
||||
let schema = cache.schema.clone();
|
||||
|
||||
let cache = Cache { words, synonyms, schema, ranked_map, number_of_documents };
|
||||
index.cache.store(Arc::new(cache));
|
||||
|
||||
Ok(())
|
||||
}
|
@ -1,150 +0,0 @@
|
||||
use std::collections::{HashMap, HashSet, BTreeSet};
|
||||
use std::sync::Arc;
|
||||
|
||||
use fst::{SetBuilder, Streamer};
|
||||
use meilidb_core::DocumentId;
|
||||
use sdset::{SetBuf, SetOperation, duo::DifferenceByKey};
|
||||
|
||||
use crate::RankedMap;
|
||||
use crate::serde::extract_document_id;
|
||||
|
||||
use crate::database::{Index, Error, index::Cache};
|
||||
|
||||
pub struct DocumentsDeletion<'a> {
|
||||
index: &'a Index,
|
||||
documents: Vec<DocumentId>,
|
||||
}
|
||||
|
||||
impl<'a> DocumentsDeletion<'a> {
|
||||
pub fn new(index: &'a Index) -> DocumentsDeletion<'a> {
|
||||
DocumentsDeletion { index, documents: Vec::new() }
|
||||
}
|
||||
|
||||
pub fn delete_document_by_id(&mut self, document_id: DocumentId) {
|
||||
self.documents.push(document_id);
|
||||
}
|
||||
|
||||
pub fn delete_document<D>(&mut self, document: D) -> Result<(), Error>
|
||||
where D: serde::Serialize,
|
||||
{
|
||||
let schema = self.index.schema();
|
||||
let identifier = schema.identifier_name();
|
||||
let document_id = match extract_document_id(identifier, &document)? {
|
||||
Some(id) => id,
|
||||
None => return Err(Error::MissingDocumentId),
|
||||
};
|
||||
|
||||
self.delete_document_by_id(document_id);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn finalize(self) -> Result<u64, Error> {
|
||||
self.index.push_documents_deletion(self.documents)
|
||||
}
|
||||
}
|
||||
|
||||
impl Extend<DocumentId> for DocumentsDeletion<'_> {
|
||||
fn extend<T: IntoIterator<Item=DocumentId>>(&mut self, iter: T) {
|
||||
self.documents.extend(iter)
|
||||
}
|
||||
}
|
||||
|
||||
pub fn apply_documents_deletion(
|
||||
index: &Index,
|
||||
mut ranked_map: RankedMap,
|
||||
deletion: Vec<DocumentId>,
|
||||
) -> Result<(), Error>
|
||||
{
|
||||
let ref_index = index.as_ref();
|
||||
let schema = index.schema();
|
||||
let docs_words = ref_index.docs_words_index;
|
||||
let documents = ref_index.documents_index;
|
||||
let main = ref_index.main_index;
|
||||
let words = ref_index.words_index;
|
||||
|
||||
let idset = SetBuf::from_dirty(deletion);
|
||||
|
||||
// collect the ranked attributes according to the schema
|
||||
let ranked_attrs: Vec<_> = schema.iter()
|
||||
.filter_map(|(_, attr, prop)| {
|
||||
if prop.is_ranked() { Some(attr) } else { None }
|
||||
})
|
||||
.collect();
|
||||
|
||||
let mut words_document_ids = HashMap::new();
|
||||
for id in idset {
|
||||
// remove all the ranked attributes from the ranked_map
|
||||
for ranked_attr in &ranked_attrs {
|
||||
ranked_map.remove(id, *ranked_attr);
|
||||
}
|
||||
|
||||
if let Some(words) = docs_words.doc_words(id)? {
|
||||
let mut stream = words.stream();
|
||||
while let Some(word) = stream.next() {
|
||||
let word = word.to_vec();
|
||||
words_document_ids.entry(word).or_insert_with(Vec::new).push(id);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let mut deleted_documents = HashSet::new();
|
||||
let mut removed_words = BTreeSet::new();
|
||||
for (word, document_ids) in words_document_ids {
|
||||
let document_ids = SetBuf::from_dirty(document_ids);
|
||||
|
||||
if let Some(doc_indexes) = words.doc_indexes(&word)? {
|
||||
let op = DifferenceByKey::new(&doc_indexes, &document_ids, |d| d.document_id, |id| *id);
|
||||
let doc_indexes = op.into_set_buf();
|
||||
|
||||
if !doc_indexes.is_empty() {
|
||||
words.set_doc_indexes(&word, &doc_indexes)?;
|
||||
} else {
|
||||
words.del_doc_indexes(&word)?;
|
||||
removed_words.insert(word);
|
||||
}
|
||||
}
|
||||
|
||||
for id in document_ids {
|
||||
if documents.del_all_document_fields(id)? != 0 {
|
||||
deleted_documents.insert(id);
|
||||
}
|
||||
docs_words.del_doc_words(id)?;
|
||||
}
|
||||
}
|
||||
|
||||
let removed_words = fst::Set::from_iter(removed_words).unwrap();
|
||||
let words = match main.words_set()? {
|
||||
Some(words_set) => {
|
||||
let op = fst::set::OpBuilder::new()
|
||||
.add(words_set.stream())
|
||||
.add(removed_words.stream())
|
||||
.difference();
|
||||
|
||||
let mut words_builder = SetBuilder::memory();
|
||||
words_builder.extend_stream(op).unwrap();
|
||||
words_builder
|
||||
.into_inner()
|
||||
.and_then(fst::Set::from_bytes)
|
||||
.unwrap()
|
||||
},
|
||||
None => fst::Set::default(),
|
||||
};
|
||||
|
||||
main.set_words_set(&words)?;
|
||||
main.set_ranked_map(&ranked_map)?;
|
||||
|
||||
let deleted_documents_len = deleted_documents.len() as u64;
|
||||
let number_of_documents = main.set_number_of_documents(|old| old - deleted_documents_len)?;
|
||||
|
||||
// update the "consistent" view of the Index
|
||||
let cache = ref_index.cache;
|
||||
let words = Arc::new(words);
|
||||
let synonyms = cache.synonyms.clone();
|
||||
let schema = cache.schema.clone();
|
||||
|
||||
let cache = Cache { words, synonyms, schema, ranked_map, number_of_documents };
|
||||
index.cache.store(Arc::new(cache));
|
||||
|
||||
Ok(())
|
||||
}
|
@ -1,9 +0,0 @@
|
||||
mod documents_addition;
|
||||
mod documents_deletion;
|
||||
mod synonyms_addition;
|
||||
mod synonyms_deletion;
|
||||
|
||||
pub use self::documents_addition::{DocumentsAddition, apply_documents_addition};
|
||||
pub use self::documents_deletion::{DocumentsDeletion, apply_documents_deletion};
|
||||
pub use self::synonyms_addition::{SynonymsAddition, apply_synonyms_addition};
|
||||
pub use self::synonyms_deletion::{SynonymsDeletion, apply_synonyms_deletion};
|
@ -1,94 +0,0 @@
|
||||
use std::collections::BTreeMap;
|
||||
use std::sync::Arc;
|
||||
|
||||
use fst::{SetBuilder, set::OpBuilder};
|
||||
use meilidb_core::normalize_str;
|
||||
use sdset::SetBuf;
|
||||
|
||||
use crate::database::{Error, Index,index::Cache};
|
||||
|
||||
pub struct SynonymsAddition<'a> {
|
||||
index: &'a Index,
|
||||
synonyms: BTreeMap<String, Vec<String>>,
|
||||
}
|
||||
|
||||
impl<'a> SynonymsAddition<'a> {
|
||||
pub fn new(index: &'a Index) -> SynonymsAddition<'a> {
|
||||
SynonymsAddition { index, synonyms: BTreeMap::new() }
|
||||
}
|
||||
|
||||
pub fn add_synonym<S, T, I>(&mut self, synonym: S, alternatives: I)
|
||||
where S: AsRef<str>,
|
||||
T: AsRef<str>,
|
||||
I: IntoIterator<Item=T>,
|
||||
{
|
||||
let synonym = normalize_str(synonym.as_ref());
|
||||
let alternatives = alternatives.into_iter().map(|s| s.as_ref().to_lowercase());
|
||||
self.synonyms.entry(synonym).or_insert_with(Vec::new).extend(alternatives);
|
||||
}
|
||||
|
||||
pub fn finalize(self) -> Result<u64, Error> {
|
||||
self.index.push_synonyms_addition(self.synonyms)
|
||||
}
|
||||
}
|
||||
|
||||
pub fn apply_synonyms_addition(
|
||||
index: &Index,
|
||||
addition: BTreeMap<String, Vec<String>>,
|
||||
) -> Result<(), Error>
|
||||
{
|
||||
let ref_index = index.as_ref();
|
||||
let synonyms = ref_index.synonyms_index;
|
||||
let main = ref_index.main_index;
|
||||
|
||||
let mut synonyms_builder = SetBuilder::memory();
|
||||
|
||||
for (synonym, alternatives) in addition {
|
||||
synonyms_builder.insert(&synonym).unwrap();
|
||||
|
||||
let alternatives = {
|
||||
let alternatives = SetBuf::from_dirty(alternatives);
|
||||
let mut alternatives_builder = SetBuilder::memory();
|
||||
alternatives_builder.extend_iter(alternatives).unwrap();
|
||||
alternatives_builder.into_inner().unwrap()
|
||||
};
|
||||
synonyms.set_alternatives_to(synonym.as_bytes(), alternatives)?;
|
||||
}
|
||||
|
||||
let delta_synonyms = synonyms_builder
|
||||
.into_inner()
|
||||
.and_then(fst::Set::from_bytes)
|
||||
.unwrap();
|
||||
|
||||
let synonyms = match main.synonyms_set()? {
|
||||
Some(synonyms) => {
|
||||
let op = OpBuilder::new()
|
||||
.add(synonyms.stream())
|
||||
.add(delta_synonyms.stream())
|
||||
.r#union();
|
||||
|
||||
let mut synonyms_builder = SetBuilder::memory();
|
||||
synonyms_builder.extend_stream(op).unwrap();
|
||||
synonyms_builder
|
||||
.into_inner()
|
||||
.and_then(fst::Set::from_bytes)
|
||||
.unwrap()
|
||||
},
|
||||
None => delta_synonyms,
|
||||
};
|
||||
|
||||
main.set_synonyms_set(&synonyms)?;
|
||||
|
||||
// update the "consistent" view of the Index
|
||||
let cache = ref_index.cache;
|
||||
let words = Arc::new(main.words_set()?.unwrap_or_default());
|
||||
let ranked_map = cache.ranked_map.clone();
|
||||
let synonyms = Arc::new(synonyms);
|
||||
let schema = cache.schema.clone();
|
||||
let number_of_documents = cache.number_of_documents;
|
||||
|
||||
let cache = Cache { words, synonyms, schema, ranked_map, number_of_documents };
|
||||
index.cache.store(Arc::new(cache));
|
||||
|
||||
Ok(())
|
||||
}
|
@ -1,69 +0,0 @@
|
||||
use meilidb_core::DocumentId;
|
||||
use meilidb_schema::SchemaAttr;
|
||||
|
||||
#[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
|
||||
pub struct DocumentAttrKey {
|
||||
pub document_id: DocumentId,
|
||||
pub attribute: SchemaAttr,
|
||||
}
|
||||
|
||||
impl DocumentAttrKey {
|
||||
pub fn new(document_id: DocumentId, attribute: SchemaAttr) -> DocumentAttrKey {
|
||||
DocumentAttrKey { document_id, attribute }
|
||||
}
|
||||
|
||||
pub fn to_be_bytes(self) -> [u8; 10] {
|
||||
let mut output = [0u8; 10];
|
||||
|
||||
let document_id = self.document_id.0.to_be_bytes();
|
||||
let attribute = self.attribute.0.to_be_bytes();
|
||||
|
||||
unsafe {
|
||||
use std::{mem::size_of, ptr::copy_nonoverlapping};
|
||||
|
||||
let output = output.as_mut_ptr();
|
||||
copy_nonoverlapping(document_id.as_ptr(), output, size_of::<u64>());
|
||||
|
||||
let output = output.add(size_of::<u64>());
|
||||
copy_nonoverlapping(attribute.as_ptr(), output, size_of::<u16>());
|
||||
}
|
||||
|
||||
output
|
||||
}
|
||||
|
||||
pub fn from_be_bytes(bytes: [u8; 10]) -> DocumentAttrKey {
|
||||
let document_id;
|
||||
let attribute;
|
||||
|
||||
unsafe {
|
||||
use std::ptr::read_unaligned;
|
||||
|
||||
let pointer = bytes.as_ptr() as *const _;
|
||||
let document_id_bytes = read_unaligned(pointer);
|
||||
document_id = u64::from_be_bytes(document_id_bytes);
|
||||
|
||||
let pointer = pointer.add(1) as *const _;
|
||||
let attribute_bytes = read_unaligned(pointer);
|
||||
attribute = u16::from_be_bytes(attribute_bytes);
|
||||
}
|
||||
|
||||
DocumentAttrKey {
|
||||
document_id: DocumentId(document_id),
|
||||
attribute: SchemaAttr(attribute),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn to_from_be_bytes() {
|
||||
let document_id = DocumentId(67578308);
|
||||
let schema_attr = SchemaAttr(3456);
|
||||
let x = DocumentAttrKey::new(document_id, schema_attr);
|
||||
|
||||
assert_eq!(x, DocumentAttrKey::from_be_bytes(x.to_be_bytes()));
|
||||
}
|
||||
}
|
@ -1,15 +0,0 @@
|
||||
mod cf_tree;
|
||||
mod database;
|
||||
mod document_attr_key;
|
||||
mod indexer;
|
||||
mod number;
|
||||
mod ranked_map;
|
||||
mod serde;
|
||||
|
||||
pub use self::cf_tree::{CfTree, CfIter};
|
||||
pub use self::database::{Database, Index, CustomSettingsIndex};
|
||||
pub use self::number::Number;
|
||||
pub use self::ranked_map::RankedMap;
|
||||
pub use self::serde::{compute_document_id, extract_document_id, value_to_string};
|
||||
|
||||
pub type RocksDbResult<T> = Result<T, rocksdb::Error>;
|
@ -1,132 +0,0 @@
|
||||
use std::collections::HashSet;
|
||||
use std::io::Cursor;
|
||||
use std::{fmt, error::Error};
|
||||
|
||||
use meilidb_core::DocumentId;
|
||||
use meilidb_schema::SchemaAttr;
|
||||
use rmp_serde::decode::{Deserializer as RmpDeserializer, ReadReader};
|
||||
use rmp_serde::decode::{Error as RmpError};
|
||||
use serde::{de, forward_to_deserialize_any};
|
||||
|
||||
use crate::database::Index;
|
||||
|
||||
#[derive(Debug)]
|
||||
pub enum DeserializerError {
|
||||
RmpError(RmpError),
|
||||
RocksDbError(rocksdb::Error),
|
||||
Custom(String),
|
||||
}
|
||||
|
||||
impl de::Error for DeserializerError {
|
||||
fn custom<T: fmt::Display>(msg: T) -> Self {
|
||||
DeserializerError::Custom(msg.to_string())
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Display for DeserializerError {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
match self {
|
||||
DeserializerError::RmpError(e) => write!(f, "rmp serde related error: {}", e),
|
||||
DeserializerError::RocksDbError(e) => write!(f, "RocksDB related error: {}", e),
|
||||
DeserializerError::Custom(s) => f.write_str(s),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Error for DeserializerError {}
|
||||
|
||||
impl From<RmpError> for DeserializerError {
|
||||
fn from(error: RmpError) -> DeserializerError {
|
||||
DeserializerError::RmpError(error)
|
||||
}
|
||||
}
|
||||
|
||||
impl From<rocksdb::Error> for DeserializerError {
|
||||
fn from(error: rocksdb::Error) -> DeserializerError {
|
||||
DeserializerError::RocksDbError(error)
|
||||
}
|
||||
}
|
||||
|
||||
pub struct Deserializer<'a> {
|
||||
pub document_id: DocumentId,
|
||||
pub index: &'a Index,
|
||||
pub fields: Option<&'a HashSet<SchemaAttr>>,
|
||||
}
|
||||
|
||||
impl<'de, 'a, 'b> de::Deserializer<'de> for &'b mut Deserializer<'a>
|
||||
{
|
||||
type Error = DeserializerError;
|
||||
|
||||
fn deserialize_any<V>(self, visitor: V) -> Result<V::Value, Self::Error>
|
||||
where V: de::Visitor<'de>
|
||||
{
|
||||
self.deserialize_map(visitor)
|
||||
}
|
||||
|
||||
forward_to_deserialize_any! {
|
||||
bool i8 i16 i32 i64 i128 u8 u16 u32 u64 u128 f32 f64 char str string
|
||||
bytes byte_buf option unit unit_struct newtype_struct seq tuple
|
||||
tuple_struct struct enum identifier ignored_any
|
||||
}
|
||||
|
||||
fn deserialize_map<V>(self, visitor: V) -> Result<V::Value, Self::Error>
|
||||
where V: de::Visitor<'de>
|
||||
{
|
||||
let schema = self.index.schema();
|
||||
let documents = self.index.as_ref().documents_index;
|
||||
|
||||
let iter = documents
|
||||
.document_fields(self.document_id)?
|
||||
.filter_map(|(attr, value)| {
|
||||
let is_displayed = schema.props(attr).is_displayed();
|
||||
if is_displayed && self.fields.map_or(true, |f| f.contains(&attr)) {
|
||||
let attribute_name = schema.attribute_name(attr);
|
||||
Some((attribute_name, Value::new(value)))
|
||||
} else {
|
||||
None
|
||||
}
|
||||
});
|
||||
|
||||
let map_deserializer = de::value::MapDeserializer::new(iter);
|
||||
let result = visitor.visit_map(map_deserializer).map_err(DeserializerError::from);
|
||||
|
||||
result
|
||||
}
|
||||
}
|
||||
|
||||
struct Value<A>(RmpDeserializer<ReadReader<Cursor<A>>>) where A: AsRef<[u8]>;
|
||||
|
||||
impl<A> Value<A> where A: AsRef<[u8]>
|
||||
{
|
||||
fn new(value: A) -> Value<A> {
|
||||
Value(RmpDeserializer::new(Cursor::new(value)))
|
||||
}
|
||||
}
|
||||
|
||||
impl<'de, A> de::IntoDeserializer<'de, RmpError> for Value<A>
|
||||
where A: AsRef<[u8]>,
|
||||
{
|
||||
type Deserializer = Self;
|
||||
|
||||
fn into_deserializer(self) -> Self::Deserializer {
|
||||
self
|
||||
}
|
||||
}
|
||||
|
||||
impl<'de, 'a, A> de::Deserializer<'de> for Value<A>
|
||||
where A: AsRef<[u8]>,
|
||||
{
|
||||
type Error = RmpError;
|
||||
|
||||
fn deserialize_any<V>(mut self, visitor: V) -> Result<V::Value, Self::Error>
|
||||
where V: de::Visitor<'de>
|
||||
{
|
||||
self.0.deserialize_any(visitor)
|
||||
}
|
||||
|
||||
forward_to_deserialize_any! {
|
||||
bool i8 i16 i32 i64 i128 u8 u16 u32 u64 u128 f32 f64 char str string
|
||||
bytes byte_buf option unit unit_struct newtype_struct seq tuple
|
||||
tuple_struct map struct enum identifier ignored_any
|
||||
}
|
||||
}
|
@ -1,96 +0,0 @@
|
||||
use std::sync::atomic::{AtomicBool, Ordering::Relaxed};
|
||||
use std::sync::Arc;
|
||||
|
||||
use serde_json::json;
|
||||
use meilidb_data::Database;
|
||||
use meilidb_schema::{Schema, SchemaBuilder, DISPLAYED, INDEXED};
|
||||
|
||||
fn simple_schema() -> Schema {
|
||||
let mut builder = SchemaBuilder::with_identifier("objectId");
|
||||
builder.new_attribute("objectId", DISPLAYED | INDEXED);
|
||||
builder.new_attribute("title", DISPLAYED | INDEXED);
|
||||
builder.build()
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn insert_delete_document() {
|
||||
let tmp_dir = tempfile::tempdir().unwrap();
|
||||
let database = Database::open(&tmp_dir).unwrap();
|
||||
|
||||
let as_been_updated = Arc::new(AtomicBool::new(false));
|
||||
|
||||
let schema = simple_schema();
|
||||
let index = database.create_index("hello", schema).unwrap();
|
||||
|
||||
let as_been_updated_clone = as_been_updated.clone();
|
||||
index.set_update_callback(move |_| as_been_updated_clone.store(true, Relaxed));
|
||||
|
||||
let doc1 = json!({ "objectId": 123, "title": "hello" });
|
||||
|
||||
let mut addition = index.documents_addition();
|
||||
addition.update_document(&doc1);
|
||||
let update_id = addition.finalize().unwrap();
|
||||
let status = index.update_status_blocking(update_id).unwrap();
|
||||
assert!(as_been_updated.swap(false, Relaxed));
|
||||
assert!(status.result.is_ok());
|
||||
assert_eq!(index.number_of_documents(), 1);
|
||||
|
||||
let docs = index.query_builder().query("hello", 0..10).unwrap();
|
||||
assert_eq!(docs.len(), 1);
|
||||
assert_eq!(index.document(None, docs[0].id).unwrap().as_ref(), Some(&doc1));
|
||||
|
||||
let mut deletion = index.documents_deletion();
|
||||
deletion.delete_document(&doc1).unwrap();
|
||||
let update_id = deletion.finalize().unwrap();
|
||||
let status = index.update_status_blocking(update_id).unwrap();
|
||||
assert!(as_been_updated.swap(false, Relaxed));
|
||||
assert!(status.result.is_ok());
|
||||
assert_eq!(index.number_of_documents(), 0);
|
||||
|
||||
let docs = index.query_builder().query("hello", 0..10).unwrap();
|
||||
assert_eq!(docs.len(), 0);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn replace_document() {
|
||||
let tmp_dir = tempfile::tempdir().unwrap();
|
||||
let database = Database::open(&tmp_dir).unwrap();
|
||||
|
||||
let as_been_updated = Arc::new(AtomicBool::new(false));
|
||||
|
||||
let schema = simple_schema();
|
||||
let index = database.create_index("hello", schema).unwrap();
|
||||
|
||||
let as_been_updated_clone = as_been_updated.clone();
|
||||
index.set_update_callback(move |_| as_been_updated_clone.store(true, Relaxed));
|
||||
|
||||
let doc1 = json!({ "objectId": 123, "title": "hello" });
|
||||
let doc2 = json!({ "objectId": 123, "title": "coucou" });
|
||||
|
||||
let mut addition = index.documents_addition();
|
||||
addition.update_document(&doc1);
|
||||
let update_id = addition.finalize().unwrap();
|
||||
let status = index.update_status_blocking(update_id).unwrap();
|
||||
assert!(as_been_updated.swap(false, Relaxed));
|
||||
assert!(status.result.is_ok());
|
||||
assert_eq!(index.number_of_documents(), 1);
|
||||
|
||||
let docs = index.query_builder().query("hello", 0..10).unwrap();
|
||||
assert_eq!(docs.len(), 1);
|
||||
assert_eq!(index.document(None, docs[0].id).unwrap().as_ref(), Some(&doc1));
|
||||
|
||||
let mut addition = index.documents_addition();
|
||||
addition.update_document(&doc2);
|
||||
let update_id = addition.finalize().unwrap();
|
||||
let status = index.update_status_blocking(update_id).unwrap();
|
||||
assert!(as_been_updated.swap(false, Relaxed));
|
||||
assert!(status.result.is_ok());
|
||||
assert_eq!(index.number_of_documents(), 1);
|
||||
|
||||
let docs = index.query_builder().query("hello", 0..10).unwrap();
|
||||
assert_eq!(docs.len(), 0);
|
||||
|
||||
let docs = index.query_builder().query("coucou", 0..10).unwrap();
|
||||
assert_eq!(docs.len(), 1);
|
||||
assert_eq!(index.document(None, docs[0].id).unwrap().as_ref(), Some(&doc2));
|
||||
}
|
@ -6,7 +6,7 @@ edition = "2018"
|
||||
|
||||
[dependencies]
|
||||
bincode = "1.1.2"
|
||||
linked-hash-map = { version = "0.5.2", features = ["serde_impl"] }
|
||||
indexmap = { version = "1.1.0", features = ["serde-1"] }
|
||||
serde = { version = "1.0.91", features = ["derive"] }
|
||||
serde_json = { version = "1.0.39", features = ["preserve_order"] }
|
||||
toml = { version = "0.5.0", features = ["preserve_order"] }
|
||||
|
@ -1,27 +1,37 @@
|
||||
use std::collections::{HashMap, BTreeMap};
|
||||
use std::io::{Read, Write};
|
||||
use std::error::Error;
|
||||
use std::{fmt, u16};
|
||||
use std::collections::{BTreeMap, HashMap};
|
||||
use std::ops::BitOr;
|
||||
use std::sync::Arc;
|
||||
use std::{fmt, u16};
|
||||
|
||||
use serde::{Serialize, Deserialize};
|
||||
use linked_hash_map::LinkedHashMap;
|
||||
use indexmap::IndexMap;
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
pub const DISPLAYED: SchemaProps = SchemaProps { displayed: true, indexed: false, ranked: false };
|
||||
pub const INDEXED: SchemaProps = SchemaProps { displayed: false, indexed: true, ranked: false };
|
||||
pub const RANKED: SchemaProps = SchemaProps { displayed: false, indexed: false, ranked: true };
|
||||
pub const DISPLAYED: SchemaProps = SchemaProps {
|
||||
displayed: true,
|
||||
indexed: false,
|
||||
ranked: false,
|
||||
};
|
||||
pub const INDEXED: SchemaProps = SchemaProps {
|
||||
displayed: false,
|
||||
indexed: true,
|
||||
ranked: false,
|
||||
};
|
||||
pub const RANKED: SchemaProps = SchemaProps {
|
||||
displayed: false,
|
||||
indexed: false,
|
||||
ranked: true,
|
||||
};
|
||||
|
||||
#[derive(Debug, Copy, Clone, PartialEq, Eq, Serialize, Deserialize)]
|
||||
pub struct SchemaProps {
|
||||
#[serde(default)]
|
||||
displayed: bool,
|
||||
pub displayed: bool,
|
||||
|
||||
#[serde(default)]
|
||||
indexed: bool,
|
||||
pub indexed: bool,
|
||||
|
||||
#[serde(default)]
|
||||
ranked: bool,
|
||||
pub ranked: bool,
|
||||
}
|
||||
|
||||
impl SchemaProps {
|
||||
@ -53,14 +63,14 @@ impl BitOr for SchemaProps {
|
||||
#[derive(Serialize, Deserialize)]
|
||||
pub struct SchemaBuilder {
|
||||
identifier: String,
|
||||
attributes: LinkedHashMap<String, SchemaProps>,
|
||||
attributes: IndexMap<String, SchemaProps>,
|
||||
}
|
||||
|
||||
impl SchemaBuilder {
|
||||
pub fn with_identifier<S: Into<String>>(name: S) -> SchemaBuilder {
|
||||
SchemaBuilder {
|
||||
identifier: name.into(),
|
||||
attributes: LinkedHashMap::new(),
|
||||
attributes: IndexMap::new(),
|
||||
}
|
||||
}
|
||||
|
||||
@ -82,7 +92,13 @@ impl SchemaBuilder {
|
||||
}
|
||||
|
||||
let identifier = self.identifier;
|
||||
Schema { inner: Arc::new(InnerSchema { identifier, attrs, props }) }
|
||||
Schema {
|
||||
inner: Arc::new(InnerSchema {
|
||||
identifier,
|
||||
attrs,
|
||||
props,
|
||||
}),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -99,62 +115,23 @@ struct InnerSchema {
|
||||
}
|
||||
|
||||
impl Schema {
|
||||
pub fn from_toml<R: Read>(mut reader: R) -> Result<Schema, Box<dyn Error>> {
|
||||
let mut buffer = Vec::new();
|
||||
reader.read_to_end(&mut buffer)?;
|
||||
let builder: SchemaBuilder = toml::from_slice(&buffer)?;
|
||||
Ok(builder.build())
|
||||
}
|
||||
|
||||
pub fn to_toml<W: Write>(&self, mut writer: W) -> Result<(), Box<dyn Error>> {
|
||||
fn to_builder(&self) -> SchemaBuilder {
|
||||
let identifier = self.inner.identifier.clone();
|
||||
let attributes = self.attributes_ordered();
|
||||
let builder = SchemaBuilder { identifier, attributes };
|
||||
|
||||
let string = toml::to_string_pretty(&builder)?;
|
||||
writer.write_all(string.as_bytes())?;
|
||||
|
||||
Ok(())
|
||||
SchemaBuilder {
|
||||
identifier,
|
||||
attributes,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn from_json<R: Read>(mut reader: R) -> Result<Schema, Box<dyn Error>> {
|
||||
let mut buffer = Vec::new();
|
||||
reader.read_to_end(&mut buffer)?;
|
||||
let builder: SchemaBuilder = serde_json::from_slice(&buffer)?;
|
||||
Ok(builder.build())
|
||||
}
|
||||
|
||||
pub fn to_json<W: Write>(&self, mut writer: W) -> Result<(), Box<dyn Error>> {
|
||||
let identifier = self.inner.identifier.clone();
|
||||
let attributes = self.attributes_ordered();
|
||||
let builder = SchemaBuilder { identifier, attributes };
|
||||
let string = serde_json::to_string_pretty(&builder)?;
|
||||
writer.write_all(string.as_bytes())?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn read_from_bin<R: Read>(reader: R) -> bincode::Result<Schema> {
|
||||
let builder: SchemaBuilder = bincode::deserialize_from(reader)?;
|
||||
Ok(builder.build())
|
||||
}
|
||||
|
||||
pub fn write_to_bin<W: Write>(&self, writer: W) -> bincode::Result<()> {
|
||||
let identifier = self.inner.identifier.clone();
|
||||
let attributes = self.attributes_ordered();
|
||||
let builder = SchemaBuilder { identifier, attributes };
|
||||
|
||||
bincode::serialize_into(writer, &builder)
|
||||
}
|
||||
|
||||
fn attributes_ordered(&self) -> LinkedHashMap<String, SchemaProps> {
|
||||
fn attributes_ordered(&self) -> IndexMap<String, SchemaProps> {
|
||||
let mut ordered = BTreeMap::new();
|
||||
for (name, attr) in &self.inner.attrs {
|
||||
let (_, props) = self.inner.props[attr.0 as usize];
|
||||
ordered.insert(attr.0, (name, props));
|
||||
}
|
||||
|
||||
let mut attributes = LinkedHashMap::with_capacity(ordered.len());
|
||||
let mut attributes = IndexMap::with_capacity(ordered.len());
|
||||
for (_, (name, props)) in ordered {
|
||||
attributes.insert(name.clone(), props);
|
||||
}
|
||||
@ -180,17 +157,34 @@ impl Schema {
|
||||
name
|
||||
}
|
||||
|
||||
pub fn iter<'a>(&'a self) -> impl Iterator<Item=(&str, SchemaAttr, SchemaProps)> + 'a {
|
||||
self.inner.props.iter()
|
||||
.map(move |(name, prop)| {
|
||||
let attr = self.inner.attrs.get(name).unwrap();
|
||||
(name.as_str(), *attr, *prop)
|
||||
})
|
||||
pub fn iter<'a>(&'a self) -> impl Iterator<Item = (&str, SchemaAttr, SchemaProps)> + 'a {
|
||||
self.inner.props.iter().map(move |(name, prop)| {
|
||||
let attr = self.inner.attrs.get(name).unwrap();
|
||||
(name.as_str(), *attr, *prop)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize)]
|
||||
#[derive(Debug, Copy, Clone, PartialOrd, Ord, PartialEq, Eq, Hash)]
|
||||
impl Serialize for Schema {
|
||||
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
|
||||
where
|
||||
S: serde::ser::Serializer,
|
||||
{
|
||||
self.to_builder().serialize(serializer)
|
||||
}
|
||||
}
|
||||
|
||||
impl<'de> Deserialize<'de> for Schema {
|
||||
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
|
||||
where
|
||||
D: serde::de::Deserializer<'de>,
|
||||
{
|
||||
let builder = SchemaBuilder::deserialize(deserializer)?;
|
||||
Ok(builder.build())
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug, Copy, Clone, PartialOrd, Ord, PartialEq, Eq, Hash)]
|
||||
pub struct SchemaAttr(pub u16);
|
||||
|
||||
impl SchemaAttr {
|
||||
@ -235,9 +229,8 @@ mod tests {
|
||||
let schema = builder.build();
|
||||
|
||||
let mut buffer = Vec::new();
|
||||
|
||||
schema.write_to_bin(&mut buffer)?;
|
||||
let schema2 = Schema::read_from_bin(buffer.as_slice())?;
|
||||
bincode::serialize_into(&mut buffer, &schema)?;
|
||||
let schema2 = bincode::deserialize_from(buffer.as_slice())?;
|
||||
|
||||
assert_eq!(schema, schema2);
|
||||
|
||||
@ -252,10 +245,9 @@ mod tests {
|
||||
builder.new_attribute("gamma", INDEXED);
|
||||
let schema = builder.build();
|
||||
|
||||
let mut buffer = Vec::new();
|
||||
schema.to_toml(&mut buffer)?;
|
||||
let buffer = toml::to_vec(&schema)?;
|
||||
let schema2 = toml::from_slice(buffer.as_slice())?;
|
||||
|
||||
let schema2 = Schema::from_toml(buffer.as_slice())?;
|
||||
assert_eq!(schema, schema2);
|
||||
|
||||
let data = r#"
|
||||
@ -271,7 +263,7 @@ mod tests {
|
||||
[attributes."gamma"]
|
||||
indexed = true
|
||||
"#;
|
||||
let schema2 = Schema::from_toml(data.as_bytes())?;
|
||||
let schema2 = toml::from_str(data)?;
|
||||
assert_eq!(schema, schema2);
|
||||
|
||||
Ok(())
|
||||
@ -285,10 +277,9 @@ mod tests {
|
||||
builder.new_attribute("gamma", INDEXED);
|
||||
let schema = builder.build();
|
||||
|
||||
let mut buffer = Vec::new();
|
||||
schema.to_json(&mut buffer)?;
|
||||
let buffer = serde_json::to_vec(&schema)?;
|
||||
let schema2 = serde_json::from_slice(buffer.as_slice())?;
|
||||
|
||||
let schema2 = Schema::from_json(buffer.as_slice())?;
|
||||
assert_eq!(schema, schema2);
|
||||
|
||||
let data = r#"
|
||||
@ -307,7 +298,7 @@ mod tests {
|
||||
}
|
||||
}
|
||||
}"#;
|
||||
let schema2 = Schema::from_json(data.as_bytes())?;
|
||||
let schema2 = serde_json::from_str(data)?;
|
||||
assert_eq!(schema, schema2);
|
||||
|
||||
Ok(())
|
||||
|
@ -1,17 +1,17 @@
|
||||
use std::iter::Peekable;
|
||||
use slice_group_by::StrGroupBy;
|
||||
use self::SeparatorCategory::*;
|
||||
use slice_group_by::StrGroupBy;
|
||||
use std::iter::Peekable;
|
||||
|
||||
pub fn is_cjk(c: char) -> bool {
|
||||
(c >= '\u{2e80}' && c <= '\u{2eff}') ||
|
||||
(c >= '\u{2f00}' && c <= '\u{2fdf}') ||
|
||||
(c >= '\u{3040}' && c <= '\u{309f}') ||
|
||||
(c >= '\u{30a0}' && c <= '\u{30ff}') ||
|
||||
(c >= '\u{3100}' && c <= '\u{312f}') ||
|
||||
(c >= '\u{3200}' && c <= '\u{32ff}') ||
|
||||
(c >= '\u{3400}' && c <= '\u{4dbf}') ||
|
||||
(c >= '\u{4e00}' && c <= '\u{9fff}') ||
|
||||
(c >= '\u{f900}' && c <= '\u{faff}')
|
||||
(c >= '\u{2e80}' && c <= '\u{2eff}')
|
||||
|| (c >= '\u{2f00}' && c <= '\u{2fdf}')
|
||||
|| (c >= '\u{3040}' && c <= '\u{309f}')
|
||||
|| (c >= '\u{30a0}' && c <= '\u{30ff}')
|
||||
|| (c >= '\u{3100}' && c <= '\u{312f}')
|
||||
|| (c >= '\u{3200}' && c <= '\u{32ff}')
|
||||
|| (c >= '\u{3400}' && c <= '\u{4dbf}')
|
||||
|| (c >= '\u{4e00}' && c <= '\u{9fff}')
|
||||
|| (c >= '\u{f900}' && c <= '\u{faff}')
|
||||
}
|
||||
|
||||
#[derive(Debug, Copy, Clone, PartialEq, Eq)]
|
||||
@ -22,7 +22,11 @@ enum SeparatorCategory {
|
||||
|
||||
impl SeparatorCategory {
|
||||
fn merge(self, other: SeparatorCategory) -> SeparatorCategory {
|
||||
if let (Soft, Soft) = (self, other) { Soft } else { Hard }
|
||||
if let (Soft, Soft) = (self, other) {
|
||||
Soft
|
||||
} else {
|
||||
Hard
|
||||
}
|
||||
}
|
||||
|
||||
fn to_usize(self) -> usize {
|
||||
@ -39,8 +43,8 @@ fn is_separator(c: char) -> bool {
|
||||
|
||||
fn classify_separator(c: char) -> Option<SeparatorCategory> {
|
||||
match c {
|
||||
' ' | '\'' | '"' => Some(Soft),
|
||||
'.' | ';' | ',' | '!' | '?' | '-' | '(' | ')' => Some(Hard),
|
||||
' ' | '-' | '_' | '\'' | ':' | '"' => Some(Soft),
|
||||
'.' | ';' | ',' | '!' | '?' | '(' | ')' => Some(Hard),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
@ -79,7 +83,7 @@ fn chars_count_index((n, _): (usize, usize), (i, c): (usize, char)) -> (usize, u
|
||||
(n + 1, i + c.len_utf8())
|
||||
}
|
||||
|
||||
pub fn split_query_string(query: &str) -> impl Iterator<Item=&str> {
|
||||
pub fn split_query_string(query: &str) -> impl Iterator<Item = &str> {
|
||||
Tokenizer::new(query).map(|t| t.word)
|
||||
}
|
||||
|
||||
@ -100,9 +104,10 @@ impl<'a> Tokenizer<'a> {
|
||||
pub fn new(string: &str) -> Tokenizer {
|
||||
// skip every separator and set `char_index`
|
||||
// to the number of char trimmed
|
||||
let (count, index) = string.char_indices()
|
||||
.take_while(|(_, c)| is_separator(*c))
|
||||
.fold((0, 0), chars_count_index);
|
||||
let (count, index) = string
|
||||
.char_indices()
|
||||
.take_while(|(_, c)| is_separator(*c))
|
||||
.fold((0, 0), chars_count_index);
|
||||
|
||||
Tokenizer {
|
||||
inner: &string[index..],
|
||||
@ -122,10 +127,11 @@ impl<'a> Iterator for Tokenizer<'a> {
|
||||
let (count, index) = string.char_indices().fold((0, 0), chars_count_index);
|
||||
|
||||
if !is_str_word(string) {
|
||||
self.word_index += string.chars()
|
||||
.filter_map(classify_separator)
|
||||
.fold(Soft, |a, x| a.merge(x))
|
||||
.to_usize();
|
||||
self.word_index += string
|
||||
.chars()
|
||||
.filter_map(classify_separator)
|
||||
.fold(Soft, |a, x| a.merge(x))
|
||||
.to_usize();
|
||||
self.char_index += count;
|
||||
self.inner = &self.inner[index..];
|
||||
continue;
|
||||
@ -153,7 +159,8 @@ impl<'a> Iterator for Tokenizer<'a> {
|
||||
}
|
||||
|
||||
pub struct SeqTokenizer<'a, I>
|
||||
where I: Iterator<Item=&'a str>,
|
||||
where
|
||||
I: Iterator<Item = &'a str>,
|
||||
{
|
||||
inner: I,
|
||||
current: Option<Peekable<Tokenizer<'a>>>,
|
||||
@ -162,13 +169,14 @@ where I: Iterator<Item=&'a str>,
|
||||
}
|
||||
|
||||
impl<'a, I> SeqTokenizer<'a, I>
|
||||
where I: Iterator<Item=&'a str>,
|
||||
where
|
||||
I: Iterator<Item = &'a str>,
|
||||
{
|
||||
pub fn new(mut iter: I) -> SeqTokenizer<'a, I> {
|
||||
let current = iter.next().map(|s| Tokenizer::new(s).peekable());
|
||||
SeqTokenizer {
|
||||
inner: iter,
|
||||
current: current,
|
||||
current,
|
||||
word_offset: 0,
|
||||
char_offset: 0,
|
||||
}
|
||||
@ -176,7 +184,8 @@ where I: Iterator<Item=&'a str>,
|
||||
}
|
||||
|
||||
impl<'a, I> Iterator for SeqTokenizer<'a, I>
|
||||
where I: Iterator<Item=&'a str>,
|
||||
where
|
||||
I: Iterator<Item = &'a str>,
|
||||
{
|
||||
type Item = Token<'a>;
|
||||
|
||||
@ -202,15 +211,15 @@ where I: Iterator<Item=&'a str>,
|
||||
}
|
||||
|
||||
Some(token)
|
||||
},
|
||||
}
|
||||
None => {
|
||||
// no more words in this text we must
|
||||
// start tokenizing the next text
|
||||
self.current = self.inner.next().map(|s| Tokenizer::new(s).peekable());
|
||||
self.next()
|
||||
},
|
||||
}
|
||||
}
|
||||
},
|
||||
}
|
||||
// no more texts available
|
||||
None => None,
|
||||
}
|
||||
@ -225,12 +234,26 @@ mod tests {
|
||||
fn easy() {
|
||||
let mut tokenizer = Tokenizer::new("salut");
|
||||
|
||||
assert_eq!(tokenizer.next(), Some(Token { word: "salut", word_index: 0, char_index: 0 }));
|
||||
assert_eq!(
|
||||
tokenizer.next(),
|
||||
Some(Token {
|
||||
word: "salut",
|
||||
word_index: 0,
|
||||
char_index: 0
|
||||
})
|
||||
);
|
||||
assert_eq!(tokenizer.next(), None);
|
||||
|
||||
let mut tokenizer = Tokenizer::new("yo ");
|
||||
|
||||
assert_eq!(tokenizer.next(), Some(Token { word: "yo", word_index: 0, char_index: 0 }));
|
||||
assert_eq!(
|
||||
tokenizer.next(),
|
||||
Some(Token {
|
||||
word: "yo",
|
||||
word_index: 0,
|
||||
char_index: 0
|
||||
})
|
||||
);
|
||||
assert_eq!(tokenizer.next(), None);
|
||||
}
|
||||
|
||||
@ -238,19 +261,82 @@ mod tests {
|
||||
fn hard() {
|
||||
let mut tokenizer = Tokenizer::new(" .? yo lolo. aïe (ouch)");
|
||||
|
||||
assert_eq!(tokenizer.next(), Some(Token { word: "yo", word_index: 0, char_index: 4 }));
|
||||
assert_eq!(tokenizer.next(), Some(Token { word: "lolo", word_index: 1, char_index: 7 }));
|
||||
assert_eq!(tokenizer.next(), Some(Token { word: "aïe", word_index: 9, char_index: 13 }));
|
||||
assert_eq!(tokenizer.next(), Some(Token { word: "ouch", word_index: 17, char_index: 18 }));
|
||||
assert_eq!(
|
||||
tokenizer.next(),
|
||||
Some(Token {
|
||||
word: "yo",
|
||||
word_index: 0,
|
||||
char_index: 4
|
||||
})
|
||||
);
|
||||
assert_eq!(
|
||||
tokenizer.next(),
|
||||
Some(Token {
|
||||
word: "lolo",
|
||||
word_index: 1,
|
||||
char_index: 7
|
||||
})
|
||||
);
|
||||
assert_eq!(
|
||||
tokenizer.next(),
|
||||
Some(Token {
|
||||
word: "aïe",
|
||||
word_index: 9,
|
||||
char_index: 13
|
||||
})
|
||||
);
|
||||
assert_eq!(
|
||||
tokenizer.next(),
|
||||
Some(Token {
|
||||
word: "ouch",
|
||||
word_index: 17,
|
||||
char_index: 18
|
||||
})
|
||||
);
|
||||
assert_eq!(tokenizer.next(), None);
|
||||
|
||||
let mut tokenizer = Tokenizer::new("yo ! lolo ? wtf - lol . aïe ,");
|
||||
|
||||
assert_eq!(tokenizer.next(), Some(Token { word: "yo", word_index: 0, char_index: 0 }));
|
||||
assert_eq!(tokenizer.next(), Some(Token { word: "lolo", word_index: 8, char_index: 5 }));
|
||||
assert_eq!(tokenizer.next(), Some(Token { word: "wtf", word_index: 16, char_index: 12 }));
|
||||
assert_eq!(tokenizer.next(), Some(Token { word: "lol", word_index: 24, char_index: 18 }));
|
||||
assert_eq!(tokenizer.next(), Some(Token { word: "aïe", word_index: 32, char_index: 24 }));
|
||||
assert_eq!(
|
||||
tokenizer.next(),
|
||||
Some(Token {
|
||||
word: "yo",
|
||||
word_index: 0,
|
||||
char_index: 0
|
||||
})
|
||||
);
|
||||
assert_eq!(
|
||||
tokenizer.next(),
|
||||
Some(Token {
|
||||
word: "lolo",
|
||||
word_index: 8,
|
||||
char_index: 5
|
||||
})
|
||||
);
|
||||
assert_eq!(
|
||||
tokenizer.next(),
|
||||
Some(Token {
|
||||
word: "wtf",
|
||||
word_index: 16,
|
||||
char_index: 12
|
||||
})
|
||||
);
|
||||
assert_eq!(
|
||||
tokenizer.next(),
|
||||
Some(Token {
|
||||
word: "lol",
|
||||
word_index: 17,
|
||||
char_index: 18
|
||||
})
|
||||
);
|
||||
assert_eq!(
|
||||
tokenizer.next(),
|
||||
Some(Token {
|
||||
word: "aïe",
|
||||
word_index: 25,
|
||||
char_index: 24
|
||||
})
|
||||
);
|
||||
assert_eq!(tokenizer.next(), None);
|
||||
}
|
||||
|
||||
@ -258,18 +344,74 @@ mod tests {
|
||||
fn hard_long_chars() {
|
||||
let mut tokenizer = Tokenizer::new(" .? yo 😂. aïe");
|
||||
|
||||
assert_eq!(tokenizer.next(), Some(Token { word: "yo", word_index: 0, char_index: 4 }));
|
||||
assert_eq!(tokenizer.next(), Some(Token { word: "😂", word_index: 1, char_index: 7 }));
|
||||
assert_eq!(tokenizer.next(), Some(Token { word: "aïe", word_index: 9, char_index: 10 }));
|
||||
assert_eq!(
|
||||
tokenizer.next(),
|
||||
Some(Token {
|
||||
word: "yo",
|
||||
word_index: 0,
|
||||
char_index: 4
|
||||
})
|
||||
);
|
||||
assert_eq!(
|
||||
tokenizer.next(),
|
||||
Some(Token {
|
||||
word: "😂",
|
||||
word_index: 1,
|
||||
char_index: 7
|
||||
})
|
||||
);
|
||||
assert_eq!(
|
||||
tokenizer.next(),
|
||||
Some(Token {
|
||||
word: "aïe",
|
||||
word_index: 9,
|
||||
char_index: 10
|
||||
})
|
||||
);
|
||||
assert_eq!(tokenizer.next(), None);
|
||||
|
||||
let mut tokenizer = Tokenizer::new("yo ! lolo ? 😱 - lol . 😣 ,");
|
||||
|
||||
assert_eq!(tokenizer.next(), Some(Token { word: "yo", word_index: 0, char_index: 0 }));
|
||||
assert_eq!(tokenizer.next(), Some(Token { word: "lolo", word_index: 8, char_index: 5 }));
|
||||
assert_eq!(tokenizer.next(), Some(Token { word: "😱", word_index: 16, char_index: 12 }));
|
||||
assert_eq!(tokenizer.next(), Some(Token { word: "lol", word_index: 24, char_index: 16 }));
|
||||
assert_eq!(tokenizer.next(), Some(Token { word: "😣", word_index: 32, char_index: 22 }));
|
||||
assert_eq!(
|
||||
tokenizer.next(),
|
||||
Some(Token {
|
||||
word: "yo",
|
||||
word_index: 0,
|
||||
char_index: 0
|
||||
})
|
||||
);
|
||||
assert_eq!(
|
||||
tokenizer.next(),
|
||||
Some(Token {
|
||||
word: "lolo",
|
||||
word_index: 8,
|
||||
char_index: 5
|
||||
})
|
||||
);
|
||||
assert_eq!(
|
||||
tokenizer.next(),
|
||||
Some(Token {
|
||||
word: "😱",
|
||||
word_index: 16,
|
||||
char_index: 12
|
||||
})
|
||||
);
|
||||
assert_eq!(
|
||||
tokenizer.next(),
|
||||
Some(Token {
|
||||
word: "lol",
|
||||
word_index: 17,
|
||||
char_index: 16
|
||||
})
|
||||
);
|
||||
assert_eq!(
|
||||
tokenizer.next(),
|
||||
Some(Token {
|
||||
word: "😣",
|
||||
word_index: 25,
|
||||
char_index: 22
|
||||
})
|
||||
);
|
||||
assert_eq!(tokenizer.next(), None);
|
||||
}
|
||||
|
||||
@ -277,19 +419,82 @@ mod tests {
|
||||
fn hard_kanjis() {
|
||||
let mut tokenizer = Tokenizer::new("\u{2ec4}lolilol\u{2ec7}");
|
||||
|
||||
assert_eq!(tokenizer.next(), Some(Token { word: "\u{2ec4}", word_index: 0, char_index: 0 }));
|
||||
assert_eq!(tokenizer.next(), Some(Token { word: "lolilol", word_index: 1, char_index: 1 }));
|
||||
assert_eq!(tokenizer.next(), Some(Token { word: "\u{2ec7}", word_index: 2, char_index: 8 }));
|
||||
assert_eq!(
|
||||
tokenizer.next(),
|
||||
Some(Token {
|
||||
word: "\u{2ec4}",
|
||||
word_index: 0,
|
||||
char_index: 0
|
||||
})
|
||||
);
|
||||
assert_eq!(
|
||||
tokenizer.next(),
|
||||
Some(Token {
|
||||
word: "lolilol",
|
||||
word_index: 1,
|
||||
char_index: 1
|
||||
})
|
||||
);
|
||||
assert_eq!(
|
||||
tokenizer.next(),
|
||||
Some(Token {
|
||||
word: "\u{2ec7}",
|
||||
word_index: 2,
|
||||
char_index: 8
|
||||
})
|
||||
);
|
||||
assert_eq!(tokenizer.next(), None);
|
||||
|
||||
let mut tokenizer = Tokenizer::new("\u{2ec4}\u{2ed3}\u{2ef2} lolilol - hello \u{2ec7}");
|
||||
|
||||
assert_eq!(tokenizer.next(), Some(Token { word: "\u{2ec4}", word_index: 0, char_index: 0 }));
|
||||
assert_eq!(tokenizer.next(), Some(Token { word: "\u{2ed3}", word_index: 1, char_index: 1 }));
|
||||
assert_eq!(tokenizer.next(), Some(Token { word: "\u{2ef2}", word_index: 2, char_index: 2 }));
|
||||
assert_eq!(tokenizer.next(), Some(Token { word: "lolilol", word_index: 3, char_index: 4 }));
|
||||
assert_eq!(tokenizer.next(), Some(Token { word: "hello", word_index: 11, char_index: 14 }));
|
||||
assert_eq!(tokenizer.next(), Some(Token { word: "\u{2ec7}", word_index: 12, char_index: 23 }));
|
||||
assert_eq!(
|
||||
tokenizer.next(),
|
||||
Some(Token {
|
||||
word: "\u{2ec4}",
|
||||
word_index: 0,
|
||||
char_index: 0
|
||||
})
|
||||
);
|
||||
assert_eq!(
|
||||
tokenizer.next(),
|
||||
Some(Token {
|
||||
word: "\u{2ed3}",
|
||||
word_index: 1,
|
||||
char_index: 1
|
||||
})
|
||||
);
|
||||
assert_eq!(
|
||||
tokenizer.next(),
|
||||
Some(Token {
|
||||
word: "\u{2ef2}",
|
||||
word_index: 2,
|
||||
char_index: 2
|
||||
})
|
||||
);
|
||||
assert_eq!(
|
||||
tokenizer.next(),
|
||||
Some(Token {
|
||||
word: "lolilol",
|
||||
word_index: 3,
|
||||
char_index: 4
|
||||
})
|
||||
);
|
||||
assert_eq!(
|
||||
tokenizer.next(),
|
||||
Some(Token {
|
||||
word: "hello",
|
||||
word_index: 4,
|
||||
char_index: 14
|
||||
})
|
||||
);
|
||||
assert_eq!(
|
||||
tokenizer.next(),
|
||||
Some(Token {
|
||||
word: "\u{2ec7}",
|
||||
word_index: 5,
|
||||
char_index: 23
|
||||
})
|
||||
);
|
||||
assert_eq!(tokenizer.next(), None);
|
||||
}
|
||||
}
|
||||
|
@ -1,28 +0,0 @@
|
||||
[package]
|
||||
edition = "2018"
|
||||
name = "meilidb"
|
||||
version = "0.3.1"
|
||||
authors = ["Kerollmops <renault.cle@gmail.com>"]
|
||||
|
||||
[dependencies]
|
||||
meilidb-core = { path = "../meilidb-core", version = "0.1.0" }
|
||||
meilidb-data = { path = "../meilidb-data", version = "0.1.0" }
|
||||
meilidb-schema = { path = "../meilidb-schema", version = "0.1.0" }
|
||||
|
||||
[dev-dependencies]
|
||||
csv = "1.0.7"
|
||||
diskus = "0.5.0"
|
||||
env_logger = "0.6.1"
|
||||
jemallocator = "0.1.9"
|
||||
linked-hash-map = "0.5.2"
|
||||
meilidb-core = { path = "../meilidb-core", version = "0.1.0" }
|
||||
quickcheck = "0.8.2"
|
||||
rand = "0.6.5"
|
||||
rand_xorshift = "0.1.1"
|
||||
rustyline = { version = "5.0.0", default-features = false }
|
||||
serde = { version = "1.0.91" , features = ["derive"] }
|
||||
serde_json = "1.0.39"
|
||||
structopt = "0.2.15"
|
||||
sysinfo = "0.8.4"
|
||||
tempfile = "3.0.7"
|
||||
termcolor = "1.0.4"
|
@ -1,214 +0,0 @@
|
||||
#[global_allocator]
|
||||
static ALLOC: jemallocator::Jemalloc = jemallocator::Jemalloc;
|
||||
|
||||
use std::collections::{HashMap, HashSet};
|
||||
use std::io::{self, BufRead, BufReader};
|
||||
use std::path::{Path, PathBuf};
|
||||
use std::time::Instant;
|
||||
use std::error::Error;
|
||||
use std::fs::File;
|
||||
|
||||
use diskus::Walk;
|
||||
use sysinfo::{SystemExt, ProcessExt};
|
||||
use serde::{Serialize, Deserialize};
|
||||
use structopt::StructOpt;
|
||||
|
||||
use meilidb_data::Database;
|
||||
use meilidb_schema::Schema;
|
||||
|
||||
#[derive(Debug, StructOpt)]
|
||||
pub struct Opt {
|
||||
/// The destination where the database must be created.
|
||||
#[structopt(parse(from_os_str))]
|
||||
pub database_path: PathBuf,
|
||||
|
||||
/// The csv file to index.
|
||||
#[structopt(parse(from_os_str))]
|
||||
pub csv_data_path: PathBuf,
|
||||
|
||||
/// The path to the schema.
|
||||
#[structopt(long = "schema", parse(from_os_str))]
|
||||
pub schema_path: PathBuf,
|
||||
|
||||
/// The file with the synonyms.
|
||||
#[structopt(long = "synonyms", parse(from_os_str))]
|
||||
pub synonyms: Option<PathBuf>,
|
||||
|
||||
/// The path to the list of stop words (one by line).
|
||||
#[structopt(long = "stop-words", parse(from_os_str))]
|
||||
pub stop_words: Option<PathBuf>,
|
||||
|
||||
#[structopt(long = "update-group-size")]
|
||||
pub update_group_size: Option<usize>,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize)]
|
||||
struct Document (
|
||||
HashMap<String, String>
|
||||
);
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
#[serde(untagged)]
|
||||
pub enum Synonym {
|
||||
OneWay(SynonymOneWay),
|
||||
MultiWay { synonyms: Vec<String> },
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct SynonymOneWay {
|
||||
pub search_terms: String,
|
||||
pub synonyms: Synonyms,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
#[serde(untagged)]
|
||||
pub enum Synonyms {
|
||||
Multiple(Vec<String>),
|
||||
Single(String),
|
||||
}
|
||||
|
||||
fn read_synomys(path: &Path) -> Result<Vec<Synonym>, Box<dyn Error>> {
|
||||
let file = File::open(path)?;
|
||||
let synonyms = serde_json::from_reader(file)?;
|
||||
Ok(synonyms)
|
||||
}
|
||||
|
||||
fn index(
|
||||
schema: Schema,
|
||||
database_path: &Path,
|
||||
csv_data_path: &Path,
|
||||
update_group_size: Option<usize>,
|
||||
stop_words: &HashSet<String>,
|
||||
synonyms: Vec<Synonym>,
|
||||
) -> Result<Database, Box<dyn Error>>
|
||||
{
|
||||
let database = Database::open(database_path)?;
|
||||
|
||||
let mut wtr = csv::Writer::from_path("./stats.csv").unwrap();
|
||||
wtr.write_record(&["NumberOfDocuments", "DiskUsed", "MemoryUsed"])?;
|
||||
|
||||
let mut system = sysinfo::System::new();
|
||||
|
||||
let index = database.create_index("test", schema.clone())?;
|
||||
|
||||
let mut synonyms_adder = index.synonyms_addition();
|
||||
for synonym in synonyms {
|
||||
match synonym {
|
||||
Synonym::OneWay(SynonymOneWay { search_terms, synonyms }) => {
|
||||
let alternatives = match synonyms {
|
||||
Synonyms::Multiple(alternatives) => alternatives,
|
||||
Synonyms::Single(alternative) => vec![alternative],
|
||||
};
|
||||
synonyms_adder.add_synonym(search_terms, alternatives);
|
||||
},
|
||||
Synonym::MultiWay { mut synonyms } => {
|
||||
for _ in 0..synonyms.len() {
|
||||
if let Some((synonym, alternatives)) = synonyms.split_first() {
|
||||
synonyms_adder.add_synonym(synonym, alternatives);
|
||||
}
|
||||
synonyms.rotate_left(1);
|
||||
}
|
||||
},
|
||||
}
|
||||
}
|
||||
synonyms_adder.finalize()?;
|
||||
|
||||
let mut rdr = csv::Reader::from_path(csv_data_path)?;
|
||||
let mut raw_record = csv::StringRecord::new();
|
||||
let headers = rdr.headers()?.clone();
|
||||
|
||||
let mut i = 0;
|
||||
let mut end_of_file = false;
|
||||
|
||||
while !end_of_file {
|
||||
let mut update = index.documents_addition();
|
||||
|
||||
loop {
|
||||
end_of_file = !rdr.read_record(&mut raw_record)?;
|
||||
if end_of_file { break }
|
||||
|
||||
let document: Document = match raw_record.deserialize(Some(&headers)) {
|
||||
Ok(document) => document,
|
||||
Err(e) => {
|
||||
eprintln!("{:?}", e);
|
||||
continue;
|
||||
}
|
||||
};
|
||||
|
||||
update.update_document(document);
|
||||
|
||||
print!("\rindexing document {}", i);
|
||||
i += 1;
|
||||
|
||||
if let Some(group_size) = update_group_size {
|
||||
if i % group_size == 0 { break }
|
||||
}
|
||||
}
|
||||
|
||||
println!();
|
||||
|
||||
println!("committing update...");
|
||||
update.finalize()?;
|
||||
|
||||
// write stats
|
||||
let directory_size = Walk::new(&[database_path.to_owned()], 4).run();
|
||||
system.refresh_all();
|
||||
let memory = system.get_process(sysinfo::get_current_pid()).unwrap().memory(); // in kb
|
||||
wtr.write_record(&[i.to_string(), directory_size.to_string(), memory.to_string()])?;
|
||||
wtr.flush()?;
|
||||
}
|
||||
|
||||
Ok(database)
|
||||
}
|
||||
|
||||
fn retrieve_stop_words(path: &Path) -> io::Result<HashSet<String>> {
|
||||
let f = File::open(path)?;
|
||||
let reader = BufReader::new(f);
|
||||
let mut words = HashSet::new();
|
||||
|
||||
for line in reader.lines() {
|
||||
let line = line?;
|
||||
let word = line.trim().to_string();
|
||||
words.insert(word);
|
||||
}
|
||||
|
||||
Ok(words)
|
||||
}
|
||||
|
||||
fn main() -> Result<(), Box<dyn Error>> {
|
||||
let _ = env_logger::init();
|
||||
let opt = Opt::from_args();
|
||||
|
||||
let schema = {
|
||||
let file = File::open(&opt.schema_path)?;
|
||||
Schema::from_toml(file)?
|
||||
};
|
||||
|
||||
let stop_words = match opt.stop_words {
|
||||
Some(ref path) => retrieve_stop_words(path)?,
|
||||
None => HashSet::new(),
|
||||
};
|
||||
|
||||
let synonyms = match opt.synonyms {
|
||||
Some(ref path) => read_synomys(path)?,
|
||||
None => Vec::new(),
|
||||
};
|
||||
|
||||
let start = Instant::now();
|
||||
let result = index(
|
||||
schema,
|
||||
&opt.database_path,
|
||||
&opt.csv_data_path,
|
||||
opt.update_group_size,
|
||||
&stop_words,
|
||||
synonyms,
|
||||
);
|
||||
|
||||
if let Err(e) = result {
|
||||
return Err(e.into())
|
||||
}
|
||||
|
||||
println!("database created in {:.2?} at: {:?}", start.elapsed(), opt.database_path);
|
||||
Ok(())
|
||||
}
|
@ -1,229 +0,0 @@
|
||||
#[global_allocator]
|
||||
static ALLOC: jemallocator::Jemalloc = jemallocator::Jemalloc;
|
||||
|
||||
use std::collections::btree_map::{BTreeMap, Entry};
|
||||
use std::collections::HashSet;
|
||||
use std::error::Error;
|
||||
use std::io::{self, Write};
|
||||
use std::iter::FromIterator;
|
||||
use std::path::PathBuf;
|
||||
use std::time::{Instant, Duration};
|
||||
|
||||
use linked_hash_map::LinkedHashMap;
|
||||
use rustyline::{Editor, Config};
|
||||
use structopt::StructOpt;
|
||||
use termcolor::{Color, ColorChoice, ColorSpec, StandardStream, WriteColor};
|
||||
|
||||
use meilidb_core::Highlight;
|
||||
use meilidb_data::Database;
|
||||
use meilidb_schema::SchemaAttr;
|
||||
|
||||
#[derive(Debug, StructOpt)]
|
||||
pub struct Opt {
|
||||
/// The destination where the database must be created
|
||||
#[structopt(parse(from_os_str))]
|
||||
pub database_path: PathBuf,
|
||||
|
||||
#[structopt(long = "fetch-timeout-ms")]
|
||||
pub fetch_timeout_ms: Option<u64>,
|
||||
|
||||
/// Fields that must be displayed.
|
||||
pub displayed_fields: Vec<String>,
|
||||
|
||||
/// The number of returned results
|
||||
#[structopt(short = "n", long = "number-results", default_value = "10")]
|
||||
pub number_results: usize,
|
||||
|
||||
/// The number of characters before and after the first match
|
||||
#[structopt(short = "C", long = "context", default_value = "35")]
|
||||
pub char_context: usize,
|
||||
}
|
||||
|
||||
type Document = LinkedHashMap<String, String>;
|
||||
|
||||
fn display_highlights(text: &str, ranges: &[usize]) -> io::Result<()> {
|
||||
let mut stdout = StandardStream::stdout(ColorChoice::Always);
|
||||
let mut highlighted = false;
|
||||
|
||||
for range in ranges.windows(2) {
|
||||
let [start, end] = match range { [start, end] => [*start, *end], _ => unreachable!() };
|
||||
if highlighted {
|
||||
stdout.set_color(ColorSpec::new().set_fg(Some(Color::Yellow)))?;
|
||||
}
|
||||
write!(&mut stdout, "{}", &text[start..end])?;
|
||||
stdout.reset()?;
|
||||
highlighted = !highlighted;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn char_to_byte_range(index: usize, length: usize, text: &str) -> (usize, usize) {
|
||||
let mut byte_index = 0;
|
||||
let mut byte_length = 0;
|
||||
|
||||
for (n, (i, c)) in text.char_indices().enumerate() {
|
||||
if n == index {
|
||||
byte_index = i;
|
||||
}
|
||||
|
||||
if n + 1 == index + length {
|
||||
byte_length = i - byte_index + c.len_utf8();
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
(byte_index, byte_length)
|
||||
}
|
||||
|
||||
fn create_highlight_areas(text: &str, highlights: &[Highlight]) -> Vec<usize> {
|
||||
let mut byte_indexes = BTreeMap::new();
|
||||
|
||||
for highlight in highlights {
|
||||
let char_index = highlight.char_index as usize;
|
||||
let char_length = highlight.char_length as usize;
|
||||
let (byte_index, byte_length) = char_to_byte_range(char_index, char_length, text);
|
||||
|
||||
match byte_indexes.entry(byte_index) {
|
||||
Entry::Vacant(entry) => { entry.insert(byte_length); },
|
||||
Entry::Occupied(mut entry) => {
|
||||
if *entry.get() < byte_length {
|
||||
entry.insert(byte_length);
|
||||
}
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
let mut title_areas = Vec::new();
|
||||
title_areas.push(0);
|
||||
for (byte_index, length) in byte_indexes {
|
||||
title_areas.push(byte_index);
|
||||
title_areas.push(byte_index + length);
|
||||
}
|
||||
title_areas.push(text.len());
|
||||
title_areas.sort_unstable();
|
||||
title_areas
|
||||
}
|
||||
|
||||
/// note: matches must have been sorted by `char_index` and `char_length` before being passed.
|
||||
///
|
||||
/// ```no_run
|
||||
/// matches.sort_unstable_by_key(|m| (m.char_index, m.char_length));
|
||||
///
|
||||
/// let matches = matches.matches.iter().filter(|m| SchemaAttr::new(m.attribute) == attr).cloned();
|
||||
///
|
||||
/// let (text, matches) = crop_text(&text, matches, 35);
|
||||
/// ```
|
||||
fn crop_text(
|
||||
text: &str,
|
||||
highlights: impl IntoIterator<Item=Highlight>,
|
||||
context: usize,
|
||||
) -> (String, Vec<Highlight>)
|
||||
{
|
||||
let mut highlights = highlights.into_iter().peekable();
|
||||
|
||||
let char_index = highlights.peek().map(|m| m.char_index as usize).unwrap_or(0);
|
||||
let start = char_index.saturating_sub(context);
|
||||
let text = text.chars().skip(start).take(context * 2).collect();
|
||||
|
||||
let highlights = highlights
|
||||
.take_while(|m| {
|
||||
(m.char_index as usize) + (m.char_length as usize) <= start + (context * 2)
|
||||
})
|
||||
.map(|highlight| {
|
||||
Highlight { char_index: highlight.char_index - start as u16, ..highlight }
|
||||
})
|
||||
.collect();
|
||||
|
||||
(text, highlights)
|
||||
}
|
||||
|
||||
fn main() -> Result<(), Box<dyn Error>> {
|
||||
let _ = env_logger::init();
|
||||
let opt = Opt::from_args();
|
||||
|
||||
let start = Instant::now();
|
||||
let database = Database::open(&opt.database_path)?;
|
||||
|
||||
let index = database.open_index("test")?.unwrap();
|
||||
let schema = index.schema();
|
||||
|
||||
println!("database prepared for you in {:.2?}", start.elapsed());
|
||||
|
||||
let fields = opt.displayed_fields.iter().map(String::as_str);
|
||||
let fields = HashSet::from_iter(fields);
|
||||
|
||||
let config = Config::builder().auto_add_history(true).build();
|
||||
let mut readline = Editor::<()>::with_config(config);
|
||||
let _ = readline.load_history("query-history.txt");
|
||||
|
||||
for result in readline.iter("Searching for: ") {
|
||||
match result {
|
||||
Ok(query) => {
|
||||
let start_total = Instant::now();
|
||||
|
||||
let builder = match opt.fetch_timeout_ms {
|
||||
Some(timeout_ms) => {
|
||||
let timeout = Duration::from_millis(timeout_ms);
|
||||
index.query_builder().with_fetch_timeout(timeout)
|
||||
},
|
||||
None => index.query_builder(),
|
||||
};
|
||||
let documents = builder.query(&query, 0..opt.number_results)?;
|
||||
|
||||
let mut retrieve_duration = Duration::default();
|
||||
|
||||
let number_of_documents = documents.len();
|
||||
for mut doc in documents {
|
||||
|
||||
doc.highlights.sort_unstable_by_key(|m| (m.char_index, m.char_length));
|
||||
|
||||
let start_retrieve = Instant::now();
|
||||
let result = index.document::<Document>(Some(&fields), doc.id);
|
||||
retrieve_duration += start_retrieve.elapsed();
|
||||
|
||||
match result {
|
||||
Ok(Some(document)) => {
|
||||
for (name, text) in document {
|
||||
print!("{}: ", name);
|
||||
|
||||
let attr = schema.attribute(&name).unwrap();
|
||||
let highlights = doc.highlights.iter()
|
||||
.filter(|m| SchemaAttr::new(m.attribute) == attr)
|
||||
.cloned();
|
||||
let (text, highlights) = crop_text(&text, highlights, opt.char_context);
|
||||
let areas = create_highlight_areas(&text, &highlights);
|
||||
display_highlights(&text, &areas)?;
|
||||
println!();
|
||||
}
|
||||
},
|
||||
Ok(None) => eprintln!("missing document"),
|
||||
Err(e) => eprintln!("{}", e),
|
||||
}
|
||||
|
||||
let mut matching_attributes = HashSet::new();
|
||||
for highlight in doc.highlights {
|
||||
let attr = SchemaAttr::new(highlight.attribute);
|
||||
let name = schema.attribute_name(attr);
|
||||
matching_attributes.insert(name);
|
||||
}
|
||||
|
||||
let matching_attributes = Vec::from_iter(matching_attributes);
|
||||
println!("matching in: {:?}", matching_attributes);
|
||||
|
||||
println!();
|
||||
}
|
||||
|
||||
eprintln!("document field retrieve took {:.2?}", retrieve_duration);
|
||||
eprintln!("===== Found {} results in {:.2?} =====", number_of_documents, start_total.elapsed());
|
||||
},
|
||||
Err(err) => {
|
||||
println!("Error: {:?}", err);
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
readline.save_history("query-history.txt").unwrap();
|
||||
Ok(())
|
||||
}
|
@ -1,3 +0,0 @@
|
||||
mod sort_by_attr;
|
||||
|
||||
pub use self::sort_by_attr::SortByAttr;
|
Reference in New Issue
Block a user