mirror of
https://github.com/meilisearch/meilisearch.git
synced 2025-07-18 04:11:07 +00:00
Compare commits
113 Commits
Author | SHA1 | Date | |
---|---|---|---|
685016bfec | |||
d30e5f6231 | |||
e854d67a55 | |||
23a89732a5 | |||
3a1f41ebdb | |||
f873761a27 | |||
ebf620c7f9 | |||
8b92bc3421 | |||
70a5aa61e9 | |||
a76169042f | |||
c9c3cfcee9 | |||
2e60ac5359 | |||
2dd7751e09 | |||
26bdabcdec | |||
fc8c7ed77e | |||
521c96354f | |||
9788779894 | |||
9b965764ab | |||
9a5a543311 | |||
b18fb868e8 | |||
c734af55c0 | |||
810b328ad2 | |||
0a8039d8d8 | |||
e51704c09a | |||
623a9012d5 | |||
b9a185634f | |||
b46889b5f0 | |||
ef9a0c07db | |||
3a6f3947c9 | |||
5c5f41d755 | |||
6803a8fad0 | |||
8e4b362e4d | |||
acb5e624c6 | |||
a98949ff1d | |||
f355280250 | |||
cee8d6a8d9 | |||
27326ea069 | |||
7bbe5aca5b | |||
1c4afe6d0f | |||
2d8f9a9849 | |||
3f41681b18 | |||
64791815fa | |||
8a36571a74 | |||
d18e775bec | |||
78381f1818 | |||
7f33a01ae1 | |||
d07d14d33a | |||
540d7886ab | |||
5a5d10af52 | |||
f95d077ef8 | |||
05dd99936f | |||
c086625773 | |||
dc17bebf4a | |||
026464b2e4 | |||
bd42158a70 | |||
df066f4321 | |||
69832e8c70 | |||
95eb6ad09a | |||
f3fc0bed45 | |||
5dd6b697b9 | |||
b7d170c7d1 | |||
7541172d12 | |||
85bf5d113c | |||
89fd397903 | |||
d8392f2f18 | |||
36b74f0efe | |||
68c0a36b00 | |||
a127b72a74 | |||
5782fb9e52 | |||
20319f7974 | |||
c4087e2ec2 | |||
b1d1f2f627 | |||
62fe6a8263 | |||
d88c10f3b4 | |||
00f49990c7 | |||
89f30ad47e | |||
3b1cbed238 | |||
4571b80a49 | |||
de2b8672d4 | |||
ccded7b429 | |||
1d4e98410a | |||
e493b27ef1 | |||
70589c136f | |||
1c3620a7d4 | |||
c2cc0704d7 | |||
2a50e08bb8 | |||
6b326a45d7 | |||
b73874bf24 | |||
95c8ad0f80 | |||
996763cc52 | |||
6a8171d335 | |||
2f32586dab | |||
db898001eb | |||
c2a12b661a | |||
f51c49db93 | |||
1be5b0f327 | |||
a136c62208 | |||
cc461b1331 | |||
dbe5363672 | |||
45d4361e7d | |||
b28c44cc6b | |||
b709a7a30a | |||
64c25bdb40 | |||
c230f244be | |||
02af4ff113 | |||
4dff8a215e | |||
41065305aa | |||
e9dce3ce81 | |||
ff7dde7522 | |||
a226fd23c3 | |||
776673ebae | |||
32d2cc3aea | |||
8a17fcdda5 |
1
.gitignore
vendored
1
.gitignore
vendored
@ -1,5 +1,4 @@
|
||||
/target
|
||||
Cargo.lock
|
||||
**/*.csv
|
||||
**/*.json_lines
|
||||
**/*.rs.bk
|
||||
|
2519
Cargo.lock
generated
Normal file
2519
Cargo.lock
generated
Normal file
File diff suppressed because it is too large
Load Diff
@ -1,8 +1,10 @@
|
||||
[workspace]
|
||||
members = [
|
||||
"meilidb-core",
|
||||
"meilidb-http",
|
||||
"meilidb-schema",
|
||||
"meilidb-tokenizer",
|
||||
"meilidb-types",
|
||||
]
|
||||
|
||||
[profile.release]
|
||||
|
26
LICENSE
26
LICENSE
@ -1,13 +1,21 @@
|
||||
“Commons Clause” License Condition v1.0
|
||||
MIT License
|
||||
|
||||
The Software is provided to you by the Licensor under the License, as defined below, subject to the following condition.
|
||||
Copyright (c) [year] [fullname]
|
||||
|
||||
Without limiting other conditions in the License, the grant of rights under the License will not include, and the License does not grant to you, the right to Sell the Software.
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
For purposes of the foregoing, “Sell” means practicing any or all of the rights granted to you under the License to provide to third parties, for a fee or other consideration (including without limitation fees for hosting or consulting/ support services related to the Software), a product or service whose value derives, entirely or substantially, from the functionality of the Software. Any license notice or attribution required by the License must also include this Commons Clause License Condition notice.
|
||||
The above copyright notice and this permission notice shall be included in all
|
||||
copies or substantial portions of the Software.
|
||||
|
||||
Software: MeiliDB
|
||||
|
||||
License: MIT
|
||||
|
||||
Licensor: MEILI SAS
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
SOFTWARE.
|
||||
|
101
README.md
101
README.md
@ -4,7 +4,9 @@
|
||||
[](https://deps.rs/repo/github/meilisearch/MeiliDB)
|
||||
[](https://commonsclause.com/)
|
||||
|
||||
A _full-text search database_ based on the fast [LMDB key-value store](https://en.wikipedia.org/wiki/Lightning_Memory-Mapped_Database).
|
||||
Ultra relevant and instant full-text search API.
|
||||
|
||||
MeiliSearch is a powerful, fast, open-source, easy to use and deploy search engine. The search and indexation are fully customizable and handles features like typo-tolerance, filters, and ranking.
|
||||
|
||||
## Features
|
||||
|
||||
@ -27,16 +29,91 @@ You can [read the deep dive](deep-dive.md) if you want more information on the e
|
||||
|
||||
We will be proud if you submit issues and pull requests. You can help to grow this project and start contributing by checking [issues tagged "good-first-issue"](https://github.com/meilisearch/MeiliDB/issues?q=is%3Aissue+is%3Aopen+label%3A%22good+first+issue%22). It is a good start!
|
||||
|
||||
The project is only a library yet. It means that there is no binary provided yet. To get started, you can check the examples wich are made to work with the data located in the `datasets/` folder.
|
||||
[](https://crates.meilisearch.com)
|
||||
|
||||
MeiliDB will be a binary in a near future so you will be able to use it as a database out-of-the-box. We should be able to query it using HTTP. This is our current goal, [see the milestones](https://github.com/meilisearch/MeiliDB/milestones). In the end, the binary will be a bunch of network protocols and wrappers around the library - which will also be published on [crates.io](https://crates.io). Both the binary and the library will follow the same update cycle.
|
||||
> Meili helps the Rust community find crates on [crates.meilisearch.com](https://crates.meilisearch.com)
|
||||
|
||||
|
||||
|
||||
## Quick Start
|
||||
|
||||
You can deploy your own instant, relevant and typo-tolerant MeiliDB search engine by yourself too.
|
||||
Something similar to the demo above can be achieve by following these little three steps first.
|
||||
You will need to create your own web front display to make it pretty though.
|
||||
|
||||
### Deploy the Server
|
||||
|
||||
If you have not installed Rust and its package manager `cargo` yet, go to [the installation page](https://www.rust-lang.org/tools/install).<br/>
|
||||
You can deploy the server on your own machine, it will listen to HTTP requests on the 8080 port by default.
|
||||
|
||||
```bash
|
||||
rustup override set nightly
|
||||
cargo run --release
|
||||
```
|
||||
|
||||
For more logs during the execution, run:
|
||||
```bash
|
||||
RUST_LOG=info cargo run --release
|
||||
```
|
||||
|
||||
### Create an Index and Upload Some Documents
|
||||
|
||||
MeiliDB can serve multiple indexes, with different kinds of documents,
|
||||
therefore, it is required to create the index before sending documents to it.
|
||||
|
||||
```bash
|
||||
curl -i -X POST 'http://127.0.0.1:8080/indexes/movies'
|
||||
```
|
||||
|
||||
Now that the server knows about our brand new index, we can send it data.
|
||||
We provided you a little dataset, it is available in the `datasets/` directory.
|
||||
|
||||
```bash
|
||||
curl -i -X POST 'http://127.0.0.1:8080/indexes/movies/documents' \
|
||||
--header 'content-type: application/json' \
|
||||
--data @datasets/movies/movies.json
|
||||
```
|
||||
|
||||
### Search for Documents
|
||||
|
||||
The search engine is now aware of our documents and can serve those via our HTTP server again.
|
||||
The [`jq` command line tool](https://stedolan.github.io/jq/) can greatly help you read the server responses.
|
||||
|
||||
```bash
|
||||
curl 'http://127.0.0.1:8080/indexes/movies/search?q=botman'
|
||||
```
|
||||
|
||||
```json
|
||||
{
|
||||
"hits": [
|
||||
{
|
||||
"id": "29751",
|
||||
"title": "Batman Unmasked: The Psychology of the Dark Knight",
|
||||
"poster": "https://image.tmdb.org/t/p/w1280/jjHu128XLARc2k4cJrblAvZe0HE.jpg",
|
||||
"overview": "Delve into the world of Batman and the vigilante justice tha",
|
||||
"release_date": "2008-07-15"
|
||||
},
|
||||
{
|
||||
"id": "471474",
|
||||
"title": "Batman: Gotham by Gaslight",
|
||||
"poster": "https://image.tmdb.org/t/p/w1280/7souLi5zqQCnpZVghaXv0Wowi0y.jpg",
|
||||
"overview": "ve Victorian Age Gotham City, Batman begins his war on crime",
|
||||
"release_date": "2018-01-12"
|
||||
}
|
||||
],
|
||||
"offset": 0,
|
||||
"limit": 2,
|
||||
"processingTimeMs": 1,
|
||||
"query": "botman"
|
||||
}
|
||||
```
|
||||
|
||||
|
||||
|
||||
## Performances
|
||||
|
||||
With a database composed of _100 353_ documents with _352_ attributes each and _3_ of them indexed.
|
||||
So more than _300 000_ fields indexed for _35 million_ stored we can handle more than _2.8k req/sec_ with an average response time of _9 ms_ on an Intel i7-7700 (8) @ 4.2GHz.
|
||||
With a dataset composed of _100 353_ documents with _352_ attributes each and _3_ of them indexed.
|
||||
So more than _300 000_ fields indexed for _35 million_ stored we can handle more than _2.8k req/sec_ with an average response time of _9 ms_ on an Intel i7-7700 (8) @ 4.2GHz.
|
||||
|
||||
Requests are made using [wrk](https://github.com/wg/wrk) and scripted to simulate real users queries.
|
||||
|
||||
@ -51,20 +128,24 @@ Requests/sec: 2806.46
|
||||
Transfer/sec: 759.17KB
|
||||
```
|
||||
|
||||
We also indexed a dataset containing something like _12 millions_ cities names in _24 minutes_ on a machine with _8 cores_, _64 GB of RAM_ and a _300 GB NMVe_ SSD.<br/>
|
||||
The resulting database was _16 GB_ and search results were between _30 ms_ and _4 seconds_ for short prefix queries.
|
||||
|
||||
### Notes
|
||||
|
||||
With Rust 1.32 the allocator has been [changed to use the system allocator](https://blog.rust-lang.org/2019/01/17/Rust-1.32.0.html#jemalloc-is-removed-by-default).
|
||||
We have seen much better performances when [using jemalloc as the global allocator](https://github.com/alexcrichton/jemallocator#documentation).
|
||||
|
||||
## Usage and examples
|
||||
## Usage and Examples
|
||||
|
||||
Currently MeiliDB do not provide an http server but you can run the example binary.
|
||||
MeiliDB also provides an example binary that is mostly used for features testing.
|
||||
Notice that the example binary is faster to index data as it does read direct CSV files and not JSON HTTP payloads.
|
||||
|
||||
The _index_ subcommand has been made to create an index and inject documents into it. Using the command line below, the index will be named _movies_ and the _19 700_ movies of the `datasets/` will be injected in MeiliDB.
|
||||
|
||||
```bash
|
||||
cargo run --release --example from_file -- \
|
||||
index example.mdb datasets/movies/data.csv \
|
||||
index example.mdb datasets/movies/movies.csv \
|
||||
--schema datasets/movies/schema.toml
|
||||
```
|
||||
|
||||
@ -72,8 +153,8 @@ Once the first command is done, you can query the freshly created _movies_ index
|
||||
|
||||
```bash
|
||||
cargo run --release --example from_file -- \
|
||||
search example.mdb
|
||||
--number 4 \
|
||||
search example.mdb \
|
||||
--number-results 4 \
|
||||
--filter '!adult' \
|
||||
id popularity adult original_title
|
||||
```
|
||||
|
@ -42,11 +42,11 @@ jobs:
|
||||
displayName: 'Build MeiliDB'
|
||||
- task: CopyFiles@2
|
||||
inputs:
|
||||
contents: '$(System.DefaultWorkingDirectory)/target/release/libmeilidb.rlib'
|
||||
contents: '$(System.DefaultWorkingDirectory)/target/release/meilidb-http'
|
||||
targetFolder: $(Build.ArtifactStagingDirectory)
|
||||
displayName: 'Copy build'
|
||||
- task: PublishBuildArtifacts@1
|
||||
inputs:
|
||||
artifactName: libmeilidb.rlib
|
||||
artifactName: meilidb
|
||||
displayName: 'Upload artifacts'
|
||||
|
||||
|
19654
datasets/movies/movies.json
Normal file
19654
datasets/movies/movies.json
Normal file
File diff suppressed because it is too large
Load Diff
@ -1,6 +1,6 @@
|
||||
[package]
|
||||
name = "meilidb-core"
|
||||
version = "0.1.0"
|
||||
version = "0.7.0"
|
||||
authors = ["Kerollmops <clement@meilisearch.com>"]
|
||||
edition = "2018"
|
||||
|
||||
@ -8,14 +8,18 @@ edition = "2018"
|
||||
arc-swap = "0.4.3"
|
||||
bincode = "1.1.4"
|
||||
byteorder = "1.3.2"
|
||||
crossbeam-channel = "0.3.9"
|
||||
chrono = { version = "0.4.9", features = ["serde"] }
|
||||
crossbeam-channel = "0.4.0"
|
||||
deunicode = "1.0.0"
|
||||
env_logger = "0.7.0"
|
||||
fst = { version = "0.3.5", default-features = false }
|
||||
hashbrown = { version = "0.6.0", features = ["serde"] }
|
||||
heed = "0.1.0"
|
||||
heed = "0.5.0"
|
||||
levenshtein_automata = { version = "0.1.1", features = ["fst_automaton"] }
|
||||
log = "0.4.8"
|
||||
meilidb-schema = { path = "../meilidb-schema", version = "0.1.0" }
|
||||
meilidb-tokenizer = { path = "../meilidb-tokenizer", version = "0.1.0" }
|
||||
meilidb-schema = { path = "../meilidb-schema", version = "0.6.0" }
|
||||
meilidb-tokenizer = { path = "../meilidb-tokenizer", version = "0.6.0" }
|
||||
meilidb-types = { path = "../meilidb-types", version = "0.1.0" }
|
||||
once_cell = "1.2.0"
|
||||
ordered-float = { version = "1.0.2", features = ["serde"] }
|
||||
sdset = "0.3.3"
|
||||
@ -25,15 +29,6 @@ siphasher = "0.3.0"
|
||||
slice-group-by = "0.2.6"
|
||||
zerocopy = "0.2.8"
|
||||
|
||||
[dependencies.levenshtein_automata]
|
||||
git = "https://github.com/Kerollmops/levenshtein-automata.git"
|
||||
branch = "arc-byte-slice"
|
||||
features = ["fst_automaton"]
|
||||
|
||||
[dependencies.fst]
|
||||
git = "https://github.com/Kerollmops/fst.git"
|
||||
branch = "arc-byte-slice"
|
||||
|
||||
[dev-dependencies]
|
||||
assert_matches = "1.3"
|
||||
csv = "1.0.7"
|
||||
|
@ -12,17 +12,18 @@ use serde::{Deserialize, Serialize};
|
||||
use structopt::StructOpt;
|
||||
use termcolor::{Color, ColorChoice, ColorSpec, StandardStream, WriteColor};
|
||||
|
||||
use meilidb_core::{Database, Highlight, UpdateResult};
|
||||
use meilidb_core::{Database, Highlight, ProcessedUpdateResult};
|
||||
use meilidb_schema::SchemaAttr;
|
||||
|
||||
const INDEX_NAME: &str = "default";
|
||||
|
||||
#[derive(Debug, StructOpt)]
|
||||
struct IndexCommand {
|
||||
/// The destination where the database must be created.
|
||||
#[structopt(parse(from_os_str))]
|
||||
database_path: PathBuf,
|
||||
|
||||
#[structopt(long, default_value = "default")]
|
||||
index_name: String,
|
||||
|
||||
/// The csv file to index.
|
||||
#[structopt(parse(from_os_str))]
|
||||
csv_data_path: PathBuf,
|
||||
@ -40,10 +41,13 @@ struct IndexCommand {
|
||||
|
||||
#[derive(Debug, StructOpt)]
|
||||
struct SearchCommand {
|
||||
/// The destination where the database must be created.
|
||||
/// The path of the database to work with.
|
||||
#[structopt(parse(from_os_str))]
|
||||
database_path: PathBuf,
|
||||
|
||||
#[structopt(long, default_value = "default")]
|
||||
index_name: String,
|
||||
|
||||
/// Timeout after which the search will return results.
|
||||
#[structopt(long)]
|
||||
fetch_timeout_ms: Option<u64>,
|
||||
@ -65,10 +69,21 @@ struct SearchCommand {
|
||||
displayed_fields: Vec<String>,
|
||||
}
|
||||
|
||||
#[derive(Debug, StructOpt)]
|
||||
struct ShowUpdatesCommand {
|
||||
/// The path of the database to work with.
|
||||
#[structopt(parse(from_os_str))]
|
||||
database_path: PathBuf,
|
||||
|
||||
#[structopt(long, default_value = "default")]
|
||||
index_name: String,
|
||||
}
|
||||
|
||||
#[derive(Debug, StructOpt)]
|
||||
enum Command {
|
||||
Index(IndexCommand),
|
||||
Search(SearchCommand),
|
||||
ShowUpdates(ShowUpdatesCommand),
|
||||
}
|
||||
|
||||
impl Command {
|
||||
@ -76,6 +91,7 @@ impl Command {
|
||||
match self {
|
||||
Command::Index(command) => &command.database_path,
|
||||
Command::Search(command) => &command.database_path,
|
||||
Command::ShowUpdates(command) => &command.database_path,
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -88,14 +104,14 @@ fn index_command(command: IndexCommand, database: Database) -> Result<(), Box<dy
|
||||
let start = Instant::now();
|
||||
|
||||
let (sender, receiver) = mpsc::sync_channel(100);
|
||||
let update_fn = move |update: UpdateResult| sender.send(update.update_id).unwrap();
|
||||
let index = match database.open_index(INDEX_NAME) {
|
||||
let update_fn =
|
||||
move |_name: &str, update: ProcessedUpdateResult| sender.send(update.update_id).unwrap();
|
||||
let index = match database.open_index(&command.index_name) {
|
||||
Some(index) => index,
|
||||
None => database.create_index(INDEX_NAME).unwrap(),
|
||||
None => database.create_index(&command.index_name).unwrap(),
|
||||
};
|
||||
|
||||
let done = database.set_update_callback(INDEX_NAME, Box::new(update_fn));
|
||||
assert!(done, "could not set the index update function");
|
||||
database.set_update_callback(Box::new(update_fn));
|
||||
|
||||
let env = &database.env;
|
||||
|
||||
@ -179,8 +195,9 @@ fn index_command(command: IndexCommand, database: Database) -> Result<(), Box<dy
|
||||
);
|
||||
|
||||
if let Some(path) = command.compact_to_path {
|
||||
fs::create_dir_all(&path)?;
|
||||
let start = Instant::now();
|
||||
let _file = database.copy_and_compact_to_path(&path)?;
|
||||
let _file = database.copy_and_compact_to_path(path.join("data.mdb"))?;
|
||||
println!(
|
||||
"database compacted in {:.2?} at: {:?}",
|
||||
start.elapsed(),
|
||||
@ -201,7 +218,11 @@ fn display_highlights(text: &str, ranges: &[usize]) -> io::Result<()> {
|
||||
_ => unreachable!(),
|
||||
};
|
||||
if highlighted {
|
||||
stdout.set_color(ColorSpec::new().set_fg(Some(Color::Yellow)))?;
|
||||
stdout.set_color(
|
||||
ColorSpec::new()
|
||||
.set_fg(Some(Color::Yellow))
|
||||
.set_underline(true),
|
||||
)?;
|
||||
}
|
||||
write!(&mut stdout, "{}", &text[start..end])?;
|
||||
stdout.reset()?;
|
||||
@ -297,12 +318,13 @@ fn crop_text(
|
||||
fn search_command(command: SearchCommand, database: Database) -> Result<(), Box<dyn Error>> {
|
||||
let env = &database.env;
|
||||
let index = database
|
||||
.open_index(INDEX_NAME)
|
||||
.open_index(&command.index_name)
|
||||
.expect("Could not find index");
|
||||
|
||||
let reader = env.read_txn().unwrap();
|
||||
let schema = index.main.schema(&reader)?;
|
||||
reader.abort();
|
||||
|
||||
let schema = schema.ok_or(meilidb_core::Error::SchemaMissing)?;
|
||||
|
||||
let fields = command.displayed_fields.iter().map(String::as_str);
|
||||
@ -418,6 +440,23 @@ fn search_command(command: SearchCommand, database: Database) -> Result<(), Box<
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn show_updates_command(
|
||||
command: ShowUpdatesCommand,
|
||||
database: Database,
|
||||
) -> Result<(), Box<dyn Error>> {
|
||||
let env = &database.env;
|
||||
let index = database
|
||||
.open_index(&command.index_name)
|
||||
.expect("Could not find index");
|
||||
|
||||
let reader = env.read_txn().unwrap();
|
||||
let updates = index.all_updates_status(&reader)?;
|
||||
println!("{:#?}", updates);
|
||||
reader.abort();
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn main() -> Result<(), Box<dyn Error>> {
|
||||
env_logger::init();
|
||||
|
||||
@ -427,5 +466,6 @@ fn main() -> Result<(), Box<dyn Error>> {
|
||||
match opt {
|
||||
Command::Index(command) => index_command(command, database),
|
||||
Command::Search(command) => search_command(command, database),
|
||||
Command::ShowUpdates(command) => show_updates_command(command, database),
|
||||
}
|
||||
}
|
||||
|
@ -4,63 +4,99 @@ use std::path::Path;
|
||||
use std::sync::{Arc, RwLock};
|
||||
use std::{fs, thread};
|
||||
|
||||
use crossbeam_channel::Receiver;
|
||||
use crossbeam_channel::{Receiver, Sender};
|
||||
use heed::types::{Str, Unit};
|
||||
use heed::{CompactionOption, Result as ZResult};
|
||||
use log::{debug, error};
|
||||
use log::debug;
|
||||
|
||||
use crate::{store, update, Index, MResult};
|
||||
|
||||
pub type BoxUpdateFn = Box<dyn Fn(update::UpdateResult) + Send + Sync + 'static>;
|
||||
pub type BoxUpdateFn = Box<dyn Fn(&str, update::ProcessedUpdateResult) + Send + Sync + 'static>;
|
||||
type ArcSwapFn = arc_swap::ArcSwapOption<BoxUpdateFn>;
|
||||
|
||||
pub struct Database {
|
||||
pub env: heed::Env,
|
||||
common_store: heed::PolyDatabase,
|
||||
indexes_store: heed::Database<Str, Unit>,
|
||||
indexes: RwLock<HashMap<String, (Index, Arc<ArcSwapFn>, thread::JoinHandle<()>)>>,
|
||||
indexes: RwLock<HashMap<String, (Index, thread::JoinHandle<()>)>>,
|
||||
update_fn: Arc<ArcSwapFn>,
|
||||
}
|
||||
|
||||
fn update_awaiter(receiver: Receiver<()>, env: heed::Env, update_fn: Arc<ArcSwapFn>, index: Index) {
|
||||
for () in receiver {
|
||||
// consume all updates in order (oldest first)
|
||||
macro_rules! r#break_try {
|
||||
($expr:expr, $msg:tt) => {
|
||||
match $expr {
|
||||
core::result::Result::Ok(val) => val,
|
||||
core::result::Result::Err(err) => {
|
||||
log::error!(concat!($msg, ": {}"), err);
|
||||
break;
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
pub enum UpdateEvent {
|
||||
NewUpdate,
|
||||
MustStop,
|
||||
}
|
||||
|
||||
pub type UpdateEvents = Receiver<UpdateEvent>;
|
||||
pub type UpdateEventsEmitter = Sender<UpdateEvent>;
|
||||
|
||||
fn update_awaiter(
|
||||
receiver: UpdateEvents,
|
||||
env: heed::Env,
|
||||
index_name: &str,
|
||||
update_fn: Arc<ArcSwapFn>,
|
||||
index: Index,
|
||||
) {
|
||||
let mut receiver = receiver.into_iter();
|
||||
while let Some(UpdateEvent::NewUpdate) = receiver.next() {
|
||||
loop {
|
||||
let mut writer = match env.write_txn() {
|
||||
Ok(writer) => writer,
|
||||
Err(e) => {
|
||||
error!("LMDB writer transaction begin failed: {}", e);
|
||||
break;
|
||||
}
|
||||
};
|
||||
// instantiate a main/parent transaction
|
||||
let mut writer = break_try!(env.write_txn(), "LMDB write transaction begin failed");
|
||||
|
||||
match update::update_task(&mut writer, index.clone()) {
|
||||
Ok(Some(status)) => {
|
||||
match status.result {
|
||||
Ok(_) => {
|
||||
if let Err(e) = writer.commit() {
|
||||
error!("update transaction failed: {}", e)
|
||||
}
|
||||
}
|
||||
Err(_) => writer.abort(),
|
||||
}
|
||||
|
||||
if let Some(ref callback) = *update_fn.load() {
|
||||
(callback)(status);
|
||||
}
|
||||
}
|
||||
// no more updates to handle for now
|
||||
Ok(None) => {
|
||||
// retrieve the update that needs to be processed
|
||||
let result = index.updates.pop_front(&mut writer);
|
||||
let (update_id, update) = match break_try!(result, "pop front update failed") {
|
||||
Some(value) => value,
|
||||
None => {
|
||||
debug!("no more updates");
|
||||
writer.abort();
|
||||
break;
|
||||
}
|
||||
Err(e) => {
|
||||
error!("update task failed: {}", e);
|
||||
writer.abort()
|
||||
}
|
||||
};
|
||||
|
||||
// instantiate a nested transaction
|
||||
let result = env.nested_write_txn(&mut writer);
|
||||
let mut nested_writer = break_try!(result, "LMDB nested write transaction failed");
|
||||
|
||||
// try to apply the update to the database using the nested transaction
|
||||
let result = update::update_task(&mut nested_writer, index.clone(), update_id, update);
|
||||
let status = break_try!(result, "update task failed");
|
||||
|
||||
// commit the nested transaction if the update was successful, abort it otherwise
|
||||
if status.error.is_none() {
|
||||
break_try!(nested_writer.commit(), "commit nested transaction failed");
|
||||
} else {
|
||||
nested_writer.abort()
|
||||
}
|
||||
|
||||
// write the result of the update in the updates-results store
|
||||
let updates_results = index.updates_results;
|
||||
let result = updates_results.put_update_result(&mut writer, update_id, &status);
|
||||
|
||||
// always commit the main/parent transaction, even if the update was unsuccessful
|
||||
break_try!(result, "update result store commit failed");
|
||||
break_try!(writer.commit(), "update parent transaction failed");
|
||||
|
||||
// call the user callback when the update and the result are written consistently
|
||||
if let Some(ref callback) = *update_fn.load() {
|
||||
(callback)(index_name, status);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
debug!("update loop system stopped");
|
||||
}
|
||||
|
||||
impl Database {
|
||||
@ -72,8 +108,9 @@ impl Database {
|
||||
.max_dbs(3000)
|
||||
.open(path)?;
|
||||
|
||||
let common_store = env.create_dyn_database(Some("common"))?;
|
||||
let common_store = env.create_poly_database(Some("common"))?;
|
||||
let indexes_store = env.create_database::<Str, Unit>(Some("indexes"))?;
|
||||
let update_fn = Arc::new(ArcSwapFn::empty());
|
||||
|
||||
// list all indexes that needs to be opened
|
||||
let mut must_open = Vec::new();
|
||||
@ -99,21 +136,27 @@ impl Database {
|
||||
continue;
|
||||
}
|
||||
};
|
||||
let update_fn = Arc::new(ArcSwapFn::empty());
|
||||
|
||||
let env_clone = env.clone();
|
||||
let index_clone = index.clone();
|
||||
let name_clone = index_name.clone();
|
||||
let update_fn_clone = update_fn.clone();
|
||||
|
||||
let handle = thread::spawn(move || {
|
||||
update_awaiter(receiver, env_clone, update_fn_clone, index_clone)
|
||||
update_awaiter(
|
||||
receiver,
|
||||
env_clone,
|
||||
&name_clone,
|
||||
update_fn_clone,
|
||||
index_clone,
|
||||
)
|
||||
});
|
||||
|
||||
// send an update notification to make sure that
|
||||
// possible pre-boot updates are consumed
|
||||
sender.send(()).unwrap();
|
||||
sender.send(UpdateEvent::NewUpdate).unwrap();
|
||||
|
||||
let result = indexes.insert(index_name, (index, update_fn, handle));
|
||||
let result = indexes.insert(index_name, (index, handle));
|
||||
assert!(
|
||||
result.is_none(),
|
||||
"The index should not have been already open"
|
||||
@ -125,6 +168,7 @@ impl Database {
|
||||
common_store,
|
||||
indexes_store,
|
||||
indexes: RwLock::new(indexes),
|
||||
update_fn,
|
||||
})
|
||||
}
|
||||
|
||||
@ -151,43 +195,56 @@ impl Database {
|
||||
|
||||
let env_clone = self.env.clone();
|
||||
let index_clone = index.clone();
|
||||
|
||||
let no_update_fn = Arc::new(ArcSwapFn::empty());
|
||||
let no_update_fn_clone = no_update_fn.clone();
|
||||
let name_clone = name.to_owned();
|
||||
let update_fn_clone = self.update_fn.clone();
|
||||
|
||||
let handle = thread::spawn(move || {
|
||||
update_awaiter(receiver, env_clone, no_update_fn_clone, index_clone)
|
||||
update_awaiter(
|
||||
receiver,
|
||||
env_clone,
|
||||
&name_clone,
|
||||
update_fn_clone,
|
||||
index_clone,
|
||||
)
|
||||
});
|
||||
|
||||
writer.commit()?;
|
||||
entry.insert((index.clone(), no_update_fn, handle));
|
||||
entry.insert((index.clone(), handle));
|
||||
|
||||
Ok(index)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub fn set_update_callback(&self, name: impl AsRef<str>, update_fn: BoxUpdateFn) -> bool {
|
||||
let indexes_lock = self.indexes.read().unwrap();
|
||||
match indexes_lock.get(name.as_ref()) {
|
||||
Some((_, current_update_fn, _)) => {
|
||||
let update_fn = Some(Arc::new(update_fn));
|
||||
current_update_fn.swap(update_fn);
|
||||
true
|
||||
pub fn delete_index(&self, name: impl AsRef<str>) -> MResult<bool> {
|
||||
let name = name.as_ref();
|
||||
let mut indexes_lock = self.indexes.write().unwrap();
|
||||
|
||||
match indexes_lock.remove_entry(name) {
|
||||
Some((name, (index, handle))) => {
|
||||
// remove the index name from the list of indexes
|
||||
// and clear all the LMDB dbi
|
||||
let mut writer = self.env.write_txn()?;
|
||||
self.indexes_store.delete(&mut writer, &name)?;
|
||||
store::clear(&mut writer, &index)?;
|
||||
writer.commit()?;
|
||||
|
||||
// join the update loop thread to ensure it is stopped
|
||||
handle.join().unwrap();
|
||||
|
||||
Ok(true)
|
||||
}
|
||||
None => false,
|
||||
None => Ok(false),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn unset_update_callback(&self, name: impl AsRef<str>) -> bool {
|
||||
let indexes_lock = self.indexes.read().unwrap();
|
||||
match indexes_lock.get(name.as_ref()) {
|
||||
Some((_, current_update_fn, _)) => {
|
||||
current_update_fn.swap(None);
|
||||
true
|
||||
}
|
||||
None => false,
|
||||
}
|
||||
pub fn set_update_callback(&self, update_fn: BoxUpdateFn) {
|
||||
let update_fn = Some(Arc::new(update_fn));
|
||||
self.update_fn.swap(update_fn);
|
||||
}
|
||||
|
||||
pub fn unset_update_callback(&self) {
|
||||
self.update_fn.swap(None);
|
||||
}
|
||||
|
||||
pub fn copy_and_compact_to_path<P: AsRef<Path>>(&self, path: P) -> ZResult<File> {
|
||||
@ -203,3 +260,585 @@ impl Database {
|
||||
self.common_store
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
use crate::update::{ProcessedUpdateResult, UpdateStatus};
|
||||
use crate::DocumentId;
|
||||
use serde::de::IgnoredAny;
|
||||
use std::sync::mpsc;
|
||||
|
||||
#[test]
|
||||
fn valid_updates() {
|
||||
let dir = tempfile::tempdir().unwrap();
|
||||
|
||||
let database = Database::open_or_create(dir.path()).unwrap();
|
||||
let env = &database.env;
|
||||
|
||||
let (sender, receiver) = mpsc::sync_channel(100);
|
||||
let update_fn = move |_name: &str, update: ProcessedUpdateResult| {
|
||||
sender.send(update.update_id).unwrap()
|
||||
};
|
||||
let index = database.create_index("test").unwrap();
|
||||
|
||||
database.set_update_callback(Box::new(update_fn));
|
||||
|
||||
let schema = {
|
||||
let data = r#"
|
||||
identifier = "id"
|
||||
|
||||
[attributes."name"]
|
||||
displayed = true
|
||||
indexed = true
|
||||
|
||||
[attributes."description"]
|
||||
displayed = true
|
||||
indexed = true
|
||||
"#;
|
||||
toml::from_str(data).unwrap()
|
||||
};
|
||||
|
||||
let mut writer = env.write_txn().unwrap();
|
||||
let _update_id = index.schema_update(&mut writer, schema).unwrap();
|
||||
writer.commit().unwrap();
|
||||
|
||||
let mut additions = index.documents_addition();
|
||||
|
||||
let doc1 = serde_json::json!({
|
||||
"id": 123,
|
||||
"name": "Marvin",
|
||||
"description": "My name is Marvin",
|
||||
});
|
||||
|
||||
let doc2 = serde_json::json!({
|
||||
"id": 234,
|
||||
"name": "Kevin",
|
||||
"description": "My name is Kevin",
|
||||
});
|
||||
|
||||
additions.update_document(doc1);
|
||||
additions.update_document(doc2);
|
||||
|
||||
let mut writer = env.write_txn().unwrap();
|
||||
let update_id = additions.finalize(&mut writer).unwrap();
|
||||
writer.commit().unwrap();
|
||||
|
||||
// block until the transaction is processed
|
||||
let _ = receiver.into_iter().find(|id| *id == update_id);
|
||||
|
||||
let reader = env.read_txn().unwrap();
|
||||
let result = index.update_status(&reader, update_id).unwrap();
|
||||
assert_matches!(result, Some(UpdateStatus::Processed { content }) if content.error.is_none());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn invalid_updates() {
|
||||
let dir = tempfile::tempdir().unwrap();
|
||||
|
||||
let database = Database::open_or_create(dir.path()).unwrap();
|
||||
let env = &database.env;
|
||||
|
||||
let (sender, receiver) = mpsc::sync_channel(100);
|
||||
let update_fn = move |_name: &str, update: ProcessedUpdateResult| {
|
||||
sender.send(update.update_id).unwrap()
|
||||
};
|
||||
let index = database.create_index("test").unwrap();
|
||||
|
||||
database.set_update_callback(Box::new(update_fn));
|
||||
|
||||
let schema = {
|
||||
let data = r#"
|
||||
identifier = "id"
|
||||
|
||||
[attributes."name"]
|
||||
displayed = true
|
||||
indexed = true
|
||||
|
||||
[attributes."description"]
|
||||
displayed = true
|
||||
indexed = true
|
||||
"#;
|
||||
toml::from_str(data).unwrap()
|
||||
};
|
||||
|
||||
let mut writer = env.write_txn().unwrap();
|
||||
let _update_id = index.schema_update(&mut writer, schema).unwrap();
|
||||
writer.commit().unwrap();
|
||||
|
||||
let mut additions = index.documents_addition();
|
||||
|
||||
let doc1 = serde_json::json!({
|
||||
"id": 123,
|
||||
"name": "Marvin",
|
||||
"description": "My name is Marvin",
|
||||
});
|
||||
|
||||
let doc2 = serde_json::json!({
|
||||
"name": "Kevin",
|
||||
"description": "My name is Kevin",
|
||||
});
|
||||
|
||||
additions.update_document(doc1);
|
||||
additions.update_document(doc2);
|
||||
|
||||
let mut writer = env.write_txn().unwrap();
|
||||
let update_id = additions.finalize(&mut writer).unwrap();
|
||||
writer.commit().unwrap();
|
||||
|
||||
// block until the transaction is processed
|
||||
let _ = receiver.into_iter().find(|id| *id == update_id);
|
||||
|
||||
let reader = env.read_txn().unwrap();
|
||||
let result = index.update_status(&reader, update_id).unwrap();
|
||||
assert_matches!(result, Some(UpdateStatus::Processed { content }) if content.error.is_some());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn ignored_words_too_long() {
|
||||
let dir = tempfile::tempdir().unwrap();
|
||||
|
||||
let database = Database::open_or_create(dir.path()).unwrap();
|
||||
let env = &database.env;
|
||||
|
||||
let (sender, receiver) = mpsc::sync_channel(100);
|
||||
let update_fn = move |_name: &str, update: ProcessedUpdateResult| {
|
||||
sender.send(update.update_id).unwrap()
|
||||
};
|
||||
let index = database.create_index("test").unwrap();
|
||||
|
||||
database.set_update_callback(Box::new(update_fn));
|
||||
|
||||
let schema = {
|
||||
let data = r#"
|
||||
identifier = "id"
|
||||
|
||||
[attributes."name"]
|
||||
displayed = true
|
||||
indexed = true
|
||||
"#;
|
||||
toml::from_str(data).unwrap()
|
||||
};
|
||||
|
||||
let mut writer = env.write_txn().unwrap();
|
||||
let _update_id = index.schema_update(&mut writer, schema).unwrap();
|
||||
writer.commit().unwrap();
|
||||
|
||||
let mut additions = index.documents_addition();
|
||||
|
||||
let doc1 = serde_json::json!({
|
||||
"id": 123,
|
||||
"name": "s̷̡̢̡̧̺̜̞͕͉͉͕̜͔̟̼̥̝͍̟̖͔͔̪͉̲̹̝̣̖͎̞̤̥͓͎̭̩͕̙̩̿̀̋̅̈́̌́̏̍̄̽͂̆̾̀̿̕̚̚͜͠͠ͅͅļ̵̨̨̨̰̦̻̳̖̳͚̬̫͚̦͖͈̲̫̣̩̥̻̙̦̱̼̠̖̻̼̘̖͉̪̜̠̙͖̙̩͔̖̯̩̲̿̽͋̔̿̍̓͂̍̿͊͆̃͗̔̎͐͌̾̆͗́̆̒̔̾̅̚̚͜͜ͅͅī̵̛̦̅̔̓͂͌̾́͂͛̎̋͐͆̽̂̋̋́̾̀̉̓̏̽́̑̀͒̇͋͛̈́̃̉̏͊̌̄̽̿̏̇͘̕̚̕p̶̧̛̛̖̯̗͕̝̗̭̱͙̖̗̟̟̐͆̊̂͐̋̓̂̈́̓͊̆͌̾̾͐͋͗͌̆̿̅͆̈́̈́̉͋̍͊͗̌̓̅̈̎̇̃̎̈́̉̐̋͑̃͘̕͘d̴̢̨̛͕̘̯͖̭̮̝̝̐̊̈̅̐̀͒̀́̈́̀͌̽͛͆͑̀̽̿͛̃̋̇̎̀́̂́͘͠͝ǫ̵̨̛̮̩̘͚̬̯̖̱͍̼͑͑̓̐́̑̿̈́̔͌̂̄͐͝ģ̶̧̜͇̣̭̺̪̺̖̻͖̮̭̣̙̻͒͊͗̓̓͒̀̀ͅ",
|
||||
});
|
||||
|
||||
additions.update_document(doc1);
|
||||
|
||||
let mut writer = env.write_txn().unwrap();
|
||||
let update_id = additions.finalize(&mut writer).unwrap();
|
||||
writer.commit().unwrap();
|
||||
|
||||
// block until the transaction is processed
|
||||
let _ = receiver.into_iter().find(|id| *id == update_id);
|
||||
|
||||
let reader = env.read_txn().unwrap();
|
||||
let result = index.update_status(&reader, update_id).unwrap();
|
||||
assert_matches!(result, Some(UpdateStatus::Processed { content }) if content.error.is_none());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn add_schema_attributes_at_end() {
|
||||
let dir = tempfile::tempdir().unwrap();
|
||||
|
||||
let database = Database::open_or_create(dir.path()).unwrap();
|
||||
let env = &database.env;
|
||||
|
||||
let (sender, receiver) = mpsc::sync_channel(100);
|
||||
let update_fn = move |_name: &str, update: ProcessedUpdateResult| {
|
||||
sender.send(update.update_id).unwrap()
|
||||
};
|
||||
let index = database.create_index("test").unwrap();
|
||||
|
||||
database.set_update_callback(Box::new(update_fn));
|
||||
|
||||
let schema = {
|
||||
let data = r#"
|
||||
identifier = "id"
|
||||
|
||||
[attributes."name"]
|
||||
displayed = true
|
||||
indexed = true
|
||||
|
||||
[attributes."description"]
|
||||
displayed = true
|
||||
indexed = true
|
||||
"#;
|
||||
toml::from_str(data).unwrap()
|
||||
};
|
||||
|
||||
let mut writer = env.write_txn().unwrap();
|
||||
let _update_id = index.schema_update(&mut writer, schema).unwrap();
|
||||
writer.commit().unwrap();
|
||||
|
||||
let mut additions = index.documents_addition();
|
||||
|
||||
let doc1 = serde_json::json!({
|
||||
"id": 123,
|
||||
"name": "Marvin",
|
||||
"description": "My name is Marvin",
|
||||
});
|
||||
|
||||
let doc2 = serde_json::json!({
|
||||
"id": 234,
|
||||
"name": "Kevin",
|
||||
"description": "My name is Kevin",
|
||||
});
|
||||
|
||||
additions.update_document(doc1);
|
||||
additions.update_document(doc2);
|
||||
|
||||
let mut writer = env.write_txn().unwrap();
|
||||
let _update_id = additions.finalize(&mut writer).unwrap();
|
||||
writer.commit().unwrap();
|
||||
|
||||
let schema = {
|
||||
let data = r#"
|
||||
identifier = "id"
|
||||
|
||||
[attributes."name"]
|
||||
displayed = true
|
||||
indexed = true
|
||||
|
||||
[attributes."description"]
|
||||
displayed = true
|
||||
indexed = true
|
||||
|
||||
[attributes."age"]
|
||||
displayed = true
|
||||
indexed = true
|
||||
|
||||
[attributes."sex"]
|
||||
displayed = true
|
||||
indexed = true
|
||||
"#;
|
||||
toml::from_str(data).unwrap()
|
||||
};
|
||||
|
||||
let mut writer = env.write_txn().unwrap();
|
||||
let update_id = index.schema_update(&mut writer, schema).unwrap();
|
||||
writer.commit().unwrap();
|
||||
|
||||
// block until the transaction is processed
|
||||
let _ = receiver.iter().find(|id| *id == update_id);
|
||||
|
||||
// check if it has been accepted
|
||||
let reader = env.read_txn().unwrap();
|
||||
let result = index.update_status(&reader, update_id).unwrap();
|
||||
assert_matches!(result, Some(UpdateStatus::Processed { content }) if content.error.is_none());
|
||||
reader.abort();
|
||||
|
||||
let mut additions = index.documents_addition();
|
||||
|
||||
let doc1 = serde_json::json!({
|
||||
"id": 123,
|
||||
"name": "Marvin",
|
||||
"description": "My name is Marvin",
|
||||
"age": 21,
|
||||
"sex": "Male",
|
||||
});
|
||||
|
||||
let doc2 = serde_json::json!({
|
||||
"id": 234,
|
||||
"name": "Kevin",
|
||||
"description": "My name is Kevin",
|
||||
"age": 23,
|
||||
"sex": "Male",
|
||||
});
|
||||
|
||||
additions.update_document(doc1);
|
||||
additions.update_document(doc2);
|
||||
|
||||
let mut writer = env.write_txn().unwrap();
|
||||
let update_id = additions.finalize(&mut writer).unwrap();
|
||||
writer.commit().unwrap();
|
||||
|
||||
// block until the transaction is processed
|
||||
let _ = receiver.iter().find(|id| *id == update_id);
|
||||
|
||||
// check if it has been accepted
|
||||
let reader = env.read_txn().unwrap();
|
||||
let result = index.update_status(&reader, update_id).unwrap();
|
||||
assert_matches!(result, Some(UpdateStatus::Processed { content }) if content.error.is_none());
|
||||
|
||||
// even try to search for a document
|
||||
let results = index.query_builder().query(&reader, "21 ", 0..20).unwrap();
|
||||
assert_matches!(results.len(), 1);
|
||||
|
||||
reader.abort();
|
||||
|
||||
// try to introduce attributes in the middle of the schema
|
||||
let schema = {
|
||||
let data = r#"
|
||||
identifier = "id"
|
||||
|
||||
[attributes."name"]
|
||||
displayed = true
|
||||
indexed = true
|
||||
|
||||
[attributes."description"]
|
||||
displayed = true
|
||||
indexed = true
|
||||
|
||||
[attributes."city"]
|
||||
displayed = true
|
||||
indexed = true
|
||||
|
||||
[attributes."age"]
|
||||
displayed = true
|
||||
indexed = true
|
||||
|
||||
[attributes."sex"]
|
||||
displayed = true
|
||||
indexed = true
|
||||
"#;
|
||||
toml::from_str(data).unwrap()
|
||||
};
|
||||
|
||||
let mut writer = env.write_txn().unwrap();
|
||||
let update_id = index.schema_update(&mut writer, schema).unwrap();
|
||||
writer.commit().unwrap();
|
||||
|
||||
// block until the transaction is processed
|
||||
let _ = receiver.iter().find(|id| *id == update_id);
|
||||
|
||||
// check if it has been accepted
|
||||
let reader = env.read_txn().unwrap();
|
||||
let result = index.update_status(&reader, update_id).unwrap();
|
||||
assert_matches!(result, Some(UpdateStatus::Processed { content }) if content.error.is_some());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn deserialize_documents() {
|
||||
let dir = tempfile::tempdir().unwrap();
|
||||
|
||||
let database = Database::open_or_create(dir.path()).unwrap();
|
||||
let env = &database.env;
|
||||
|
||||
let (sender, receiver) = mpsc::sync_channel(100);
|
||||
let update_fn = move |_name: &str, update: ProcessedUpdateResult| {
|
||||
sender.send(update.update_id).unwrap()
|
||||
};
|
||||
let index = database.create_index("test").unwrap();
|
||||
|
||||
database.set_update_callback(Box::new(update_fn));
|
||||
|
||||
let schema = {
|
||||
let data = r#"
|
||||
identifier = "id"
|
||||
|
||||
[attributes."name"]
|
||||
displayed = true
|
||||
indexed = true
|
||||
|
||||
[attributes."description"]
|
||||
displayed = true
|
||||
indexed = true
|
||||
"#;
|
||||
toml::from_str(data).unwrap()
|
||||
};
|
||||
|
||||
let mut writer = env.write_txn().unwrap();
|
||||
let _update_id = index.schema_update(&mut writer, schema).unwrap();
|
||||
writer.commit().unwrap();
|
||||
|
||||
let mut additions = index.documents_addition();
|
||||
|
||||
// DocumentId(7900334843754999545)
|
||||
let doc1 = serde_json::json!({
|
||||
"id": 123,
|
||||
"name": "Marvin",
|
||||
"description": "My name is Marvin",
|
||||
});
|
||||
|
||||
// DocumentId(8367468610878465872)
|
||||
let doc2 = serde_json::json!({
|
||||
"id": 234,
|
||||
"name": "Kevin",
|
||||
"description": "My name is Kevin",
|
||||
});
|
||||
|
||||
additions.update_document(doc1);
|
||||
additions.update_document(doc2);
|
||||
|
||||
let mut writer = env.write_txn().unwrap();
|
||||
let update_id = additions.finalize(&mut writer).unwrap();
|
||||
writer.commit().unwrap();
|
||||
|
||||
// block until the transaction is processed
|
||||
let _ = receiver.into_iter().find(|id| *id == update_id);
|
||||
|
||||
let reader = env.read_txn().unwrap();
|
||||
let result = index.update_status(&reader, update_id).unwrap();
|
||||
assert_matches!(result, Some(UpdateStatus::Processed { content }) if content.error.is_none());
|
||||
|
||||
let document: Option<IgnoredAny> = index.document(&reader, None, DocumentId(25)).unwrap();
|
||||
assert!(document.is_none());
|
||||
|
||||
let document: Option<IgnoredAny> = index
|
||||
.document(&reader, None, DocumentId(7900334843754999545))
|
||||
.unwrap();
|
||||
assert!(document.is_some());
|
||||
|
||||
let document: Option<IgnoredAny> = index
|
||||
.document(&reader, None, DocumentId(8367468610878465872))
|
||||
.unwrap();
|
||||
assert!(document.is_some());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn partial_document_update() {
|
||||
let dir = tempfile::tempdir().unwrap();
|
||||
|
||||
let database = Database::open_or_create(dir.path()).unwrap();
|
||||
let env = &database.env;
|
||||
|
||||
let (sender, receiver) = mpsc::sync_channel(100);
|
||||
let update_fn = move |_name: &str, update: ProcessedUpdateResult| {
|
||||
sender.send(update.update_id).unwrap()
|
||||
};
|
||||
let index = database.create_index("test").unwrap();
|
||||
|
||||
database.set_update_callback(Box::new(update_fn));
|
||||
|
||||
let schema = {
|
||||
let data = r#"
|
||||
identifier = "id"
|
||||
|
||||
[attributes."id"]
|
||||
displayed = true
|
||||
|
||||
[attributes."name"]
|
||||
displayed = true
|
||||
indexed = true
|
||||
|
||||
[attributes."description"]
|
||||
displayed = true
|
||||
indexed = true
|
||||
"#;
|
||||
toml::from_str(data).unwrap()
|
||||
};
|
||||
|
||||
let mut writer = env.write_txn().unwrap();
|
||||
let _update_id = index.schema_update(&mut writer, schema).unwrap();
|
||||
writer.commit().unwrap();
|
||||
|
||||
let mut additions = index.documents_addition();
|
||||
|
||||
// DocumentId(7900334843754999545)
|
||||
let doc1 = serde_json::json!({
|
||||
"id": 123,
|
||||
"name": "Marvin",
|
||||
"description": "My name is Marvin",
|
||||
});
|
||||
|
||||
// DocumentId(8367468610878465872)
|
||||
let doc2 = serde_json::json!({
|
||||
"id": 234,
|
||||
"name": "Kevin",
|
||||
"description": "My name is Kevin",
|
||||
});
|
||||
|
||||
additions.update_document(doc1);
|
||||
additions.update_document(doc2);
|
||||
|
||||
let mut writer = env.write_txn().unwrap();
|
||||
let update_id = additions.finalize(&mut writer).unwrap();
|
||||
writer.commit().unwrap();
|
||||
|
||||
// block until the transaction is processed
|
||||
let _ = receiver.iter().find(|id| *id == update_id);
|
||||
|
||||
let reader = env.read_txn().unwrap();
|
||||
let result = index.update_status(&reader, update_id).unwrap();
|
||||
assert_matches!(result, Some(UpdateStatus::Processed { content }) if content.error.is_none());
|
||||
|
||||
let document: Option<IgnoredAny> = index.document(&reader, None, DocumentId(25)).unwrap();
|
||||
assert!(document.is_none());
|
||||
|
||||
let document: Option<IgnoredAny> = index
|
||||
.document(&reader, None, DocumentId(7900334843754999545))
|
||||
.unwrap();
|
||||
assert!(document.is_some());
|
||||
|
||||
let document: Option<IgnoredAny> = index
|
||||
.document(&reader, None, DocumentId(8367468610878465872))
|
||||
.unwrap();
|
||||
assert!(document.is_some());
|
||||
|
||||
reader.abort();
|
||||
|
||||
let mut partial_additions = index.documents_partial_addition();
|
||||
|
||||
// DocumentId(7900334843754999545)
|
||||
let partial_doc1 = serde_json::json!({
|
||||
"id": 123,
|
||||
"description": "I am the new Marvin",
|
||||
});
|
||||
|
||||
// DocumentId(8367468610878465872)
|
||||
let partial_doc2 = serde_json::json!({
|
||||
"id": 234,
|
||||
"description": "I am the new Kevin",
|
||||
});
|
||||
|
||||
partial_additions.update_document(partial_doc1);
|
||||
partial_additions.update_document(partial_doc2);
|
||||
|
||||
let mut writer = env.write_txn().unwrap();
|
||||
let update_id = partial_additions.finalize(&mut writer).unwrap();
|
||||
writer.commit().unwrap();
|
||||
|
||||
// block until the transaction is processed
|
||||
let _ = receiver.iter().find(|id| *id == update_id);
|
||||
|
||||
let reader = env.read_txn().unwrap();
|
||||
let result = index.update_status(&reader, update_id).unwrap();
|
||||
assert_matches!(result, Some(UpdateStatus::Processed { content }) if content.error.is_none());
|
||||
|
||||
let document: Option<serde_json::Value> = index
|
||||
.document(&reader, None, DocumentId(7900334843754999545))
|
||||
.unwrap();
|
||||
|
||||
let new_doc1 = serde_json::json!({
|
||||
"id": 123,
|
||||
"name": "Marvin",
|
||||
"description": "I am the new Marvin",
|
||||
});
|
||||
assert_eq!(document, Some(new_doc1));
|
||||
|
||||
let document: Option<serde_json::Value> = index
|
||||
.document(&reader, None, DocumentId(8367468610878465872))
|
||||
.unwrap();
|
||||
|
||||
let new_doc2 = serde_json::json!({
|
||||
"id": 234,
|
||||
"name": "Kevin",
|
||||
"description": "I am the new Kevin",
|
||||
});
|
||||
assert_eq!(document, Some(new_doc2));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn delete_index() {
|
||||
let dir = tempfile::tempdir().unwrap();
|
||||
|
||||
let database = Database::open_or_create(dir.path()).unwrap();
|
||||
let _index = database.create_index("test").unwrap();
|
||||
|
||||
let deleted = database.delete_index("test").unwrap();
|
||||
assert!(deleted);
|
||||
|
||||
let result = database.open_index("test");
|
||||
assert!(result.is_none());
|
||||
}
|
||||
}
|
||||
|
@ -97,7 +97,7 @@ pub enum UnsupportedOperation {
|
||||
SchemaAlreadyExists,
|
||||
CannotUpdateSchemaIdentifier,
|
||||
CannotReorderSchemaAttribute,
|
||||
CannotIntroduceNewSchemaAttribute,
|
||||
CanOnlyIntroduceNewSchemaAttributesAtEnd,
|
||||
CannotRemoveSchemaAttribute,
|
||||
}
|
||||
|
||||
@ -108,8 +108,8 @@ impl fmt::Display for UnsupportedOperation {
|
||||
SchemaAlreadyExists => write!(f, "Cannot update index which already have a schema"),
|
||||
CannotUpdateSchemaIdentifier => write!(f, "Cannot update the identifier of a schema"),
|
||||
CannotReorderSchemaAttribute => write!(f, "Cannot reorder the attributes of a schema"),
|
||||
CannotIntroduceNewSchemaAttribute => {
|
||||
write!(f, "Cannot introduce new attributes in a schema")
|
||||
CanOnlyIntroduceNewSchemaAttributesAtEnd => {
|
||||
write!(f, "Can only introduce new attributes at end of a schema")
|
||||
}
|
||||
CannotRemoveSchemaAttribute => write!(f, "Cannot remove attributes from a schema"),
|
||||
}
|
||||
|
134
meilidb-core/src/levenshtein.rs
Normal file
134
meilidb-core/src/levenshtein.rs
Normal file
@ -0,0 +1,134 @@
|
||||
use std::cmp::min;
|
||||
use std::collections::BTreeMap;
|
||||
use std::ops::{Index, IndexMut};
|
||||
|
||||
// A simple wrapper around vec so we can get contiguous but index it like it's 2D array.
|
||||
struct N2Array<T> {
|
||||
y_size: usize,
|
||||
buf: Vec<T>,
|
||||
}
|
||||
|
||||
impl<T: Clone> N2Array<T> {
|
||||
fn new(x: usize, y: usize, value: T) -> N2Array<T> {
|
||||
N2Array {
|
||||
y_size: y,
|
||||
buf: vec![value; x * y],
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<T> Index<(usize, usize)> for N2Array<T> {
|
||||
type Output = T;
|
||||
|
||||
#[inline]
|
||||
fn index(&self, (x, y): (usize, usize)) -> &T {
|
||||
&self.buf[(x * self.y_size) + y]
|
||||
}
|
||||
}
|
||||
|
||||
impl<T> IndexMut<(usize, usize)> for N2Array<T> {
|
||||
#[inline]
|
||||
fn index_mut(&mut self, (x, y): (usize, usize)) -> &mut T {
|
||||
&mut self.buf[(x * self.y_size) + y]
|
||||
}
|
||||
}
|
||||
|
||||
pub fn prefix_damerau_levenshtein(source: &[u8], target: &[u8]) -> (u32, usize) {
|
||||
let (n, m) = (source.len(), target.len());
|
||||
|
||||
assert!(
|
||||
n <= m,
|
||||
"the source string must be shorter than the target one"
|
||||
);
|
||||
|
||||
if n == 0 {
|
||||
return (m as u32, 0);
|
||||
}
|
||||
if m == 0 {
|
||||
return (n as u32, 0);
|
||||
}
|
||||
|
||||
if n == m && source == target {
|
||||
return (0, m);
|
||||
}
|
||||
|
||||
let inf = n + m;
|
||||
let mut matrix = N2Array::new(n + 2, m + 2, 0);
|
||||
|
||||
matrix[(0, 0)] = inf;
|
||||
for i in 0..n + 1 {
|
||||
matrix[(i + 1, 0)] = inf;
|
||||
matrix[(i + 1, 1)] = i;
|
||||
}
|
||||
for j in 0..m + 1 {
|
||||
matrix[(0, j + 1)] = inf;
|
||||
matrix[(1, j + 1)] = j;
|
||||
}
|
||||
|
||||
let mut last_row = BTreeMap::new();
|
||||
|
||||
for (row, char_s) in source.iter().enumerate() {
|
||||
let mut last_match_col = 0;
|
||||
let row = row + 1;
|
||||
|
||||
for (col, char_t) in target.iter().enumerate() {
|
||||
let col = col + 1;
|
||||
let last_match_row = *last_row.get(&char_t).unwrap_or(&0);
|
||||
let cost = if char_s == char_t { 0 } else { 1 };
|
||||
|
||||
let dist_add = matrix[(row, col + 1)] + 1;
|
||||
let dist_del = matrix[(row + 1, col)] + 1;
|
||||
let dist_sub = matrix[(row, col)] + cost;
|
||||
let dist_trans = matrix[(last_match_row, last_match_col)]
|
||||
+ (row - last_match_row - 1)
|
||||
+ 1
|
||||
+ (col - last_match_col - 1);
|
||||
|
||||
let dist = min(min(dist_add, dist_del), min(dist_sub, dist_trans));
|
||||
|
||||
matrix[(row + 1, col + 1)] = dist;
|
||||
|
||||
if cost == 0 {
|
||||
last_match_col = col;
|
||||
}
|
||||
}
|
||||
|
||||
last_row.insert(char_s, row);
|
||||
}
|
||||
|
||||
let mut minimum = (u32::max_value(), 0);
|
||||
|
||||
for x in n..=m {
|
||||
let dist = matrix[(n + 1, x + 1)] as u32;
|
||||
if dist < minimum.0 {
|
||||
minimum = (dist, x)
|
||||
}
|
||||
}
|
||||
|
||||
minimum
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn matched_length() {
|
||||
let query = "Levenste";
|
||||
let text = "Levenshtein";
|
||||
|
||||
let (dist, length) = prefix_damerau_levenshtein(query.as_bytes(), text.as_bytes());
|
||||
assert_eq!(dist, 1);
|
||||
assert_eq!(&text[..length], "Levenshte");
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[should_panic]
|
||||
fn matched_length_panic() {
|
||||
let query = "Levenshtein";
|
||||
let text = "Levenste";
|
||||
|
||||
// this function will panic if source if longer than target
|
||||
prefix_damerau_levenshtein(query.as_bytes(), text.as_bytes());
|
||||
}
|
||||
}
|
@ -7,6 +7,7 @@ pub mod criterion;
|
||||
mod database;
|
||||
mod distinct_map;
|
||||
mod error;
|
||||
mod levenshtein;
|
||||
mod number;
|
||||
mod query_builder;
|
||||
mod ranked_map;
|
||||
@ -23,80 +24,8 @@ pub use self::number::{Number, ParseNumberError};
|
||||
pub use self::ranked_map::RankedMap;
|
||||
pub use self::raw_document::RawDocument;
|
||||
pub use self::store::Index;
|
||||
pub use self::update::{UpdateResult, UpdateStatus, UpdateType};
|
||||
|
||||
use ::serde::{Deserialize, Serialize};
|
||||
use zerocopy::{AsBytes, FromBytes};
|
||||
|
||||
/// Represent an internally generated document unique identifier.
|
||||
///
|
||||
/// It is used to inform the database the document you want to deserialize.
|
||||
/// Helpful for custom ranking.
|
||||
#[derive(
|
||||
Debug,
|
||||
Copy,
|
||||
Clone,
|
||||
Eq,
|
||||
PartialEq,
|
||||
PartialOrd,
|
||||
Ord,
|
||||
Hash,
|
||||
Serialize,
|
||||
Deserialize,
|
||||
AsBytes,
|
||||
FromBytes,
|
||||
)]
|
||||
#[repr(C)]
|
||||
pub struct DocumentId(pub u64);
|
||||
|
||||
/// This structure represent the position of a word
|
||||
/// in a document and its attributes.
|
||||
///
|
||||
/// This is stored in the map, generated at index time,
|
||||
/// extracted and interpreted at search time.
|
||||
#[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash, AsBytes, FromBytes)]
|
||||
#[repr(C)]
|
||||
pub struct DocIndex {
|
||||
/// The document identifier where the word was found.
|
||||
pub document_id: DocumentId,
|
||||
|
||||
/// The attribute in the document where the word was found
|
||||
/// along with the index in it.
|
||||
pub attribute: u16,
|
||||
pub word_index: u16,
|
||||
|
||||
/// The position in bytes where the word was found
|
||||
/// along with the length of it.
|
||||
///
|
||||
/// It informs on the original word area in the text indexed
|
||||
/// without needing to run the tokenizer again.
|
||||
pub char_index: u16,
|
||||
pub char_length: u16,
|
||||
}
|
||||
|
||||
/// This structure represent a matching word with informations
|
||||
/// on the location of the word in the document.
|
||||
///
|
||||
/// The order of the field is important because it defines
|
||||
/// the way these structures are ordered between themselves.
|
||||
#[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
|
||||
pub struct Highlight {
|
||||
/// The attribute in the document where the word was found
|
||||
/// along with the index in it.
|
||||
pub attribute: u16,
|
||||
|
||||
/// The position in bytes where the word was found.
|
||||
///
|
||||
/// It informs on the original word area in the text indexed
|
||||
/// without needing to run the tokenizer again.
|
||||
pub char_index: u16,
|
||||
|
||||
/// The length in bytes of the found word.
|
||||
///
|
||||
/// It informs on the original word area in the text indexed
|
||||
/// without needing to run the tokenizer again.
|
||||
pub char_length: u16,
|
||||
}
|
||||
pub use self::update::{EnqueuedUpdateResult, ProcessedUpdateResult, UpdateStatus, UpdateType};
|
||||
pub use meilidb_types::{DocIndex, DocumentId, Highlight};
|
||||
|
||||
#[doc(hidden)]
|
||||
#[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
|
||||
|
@ -1,9 +1,9 @@
|
||||
use hashbrown::HashMap;
|
||||
use std::convert::TryFrom;
|
||||
use std::mem;
|
||||
use std::ops::Range;
|
||||
use std::rc::Rc;
|
||||
use std::time::{Duration, Instant};
|
||||
use std::{cmp, mem};
|
||||
|
||||
use fst::{IntoStreamer, Streamer};
|
||||
use sdset::SetBuf;
|
||||
@ -11,6 +11,7 @@ use slice_group_by::{GroupBy, GroupByMut};
|
||||
|
||||
use crate::automaton::{Automaton, AutomatonGroup, AutomatonProducer, QueryEnhancer};
|
||||
use crate::distinct_map::{BufferedDistinctMap, DistinctMap};
|
||||
use crate::levenshtein::prefix_damerau_levenshtein;
|
||||
use crate::raw_document::{raw_documents_from, RawDocument};
|
||||
use crate::{criterion::Criteria, Document, DocumentId, Highlight, TmpMatch};
|
||||
use crate::{reordered_attrs::ReorderedAttrs, store, MResult};
|
||||
@ -162,6 +163,7 @@ fn fetch_raw_documents(
|
||||
index,
|
||||
is_exact,
|
||||
query_len,
|
||||
query,
|
||||
..
|
||||
} = automaton;
|
||||
let dfa = automaton.dfa();
|
||||
@ -176,6 +178,12 @@ fn fetch_raw_documents(
|
||||
let distance = dfa.eval(input).to_u8();
|
||||
let is_exact = *is_exact && distance == 0 && input.len() == *query_len;
|
||||
|
||||
let covered_area = if *query_len > input.len() {
|
||||
input.len()
|
||||
} else {
|
||||
prefix_damerau_levenshtein(query.as_bytes(), input).1
|
||||
};
|
||||
|
||||
let doc_indexes = match postings_lists_store.postings_list(reader, input)? {
|
||||
Some(doc_indexes) => doc_indexes,
|
||||
None => continue,
|
||||
@ -194,10 +202,13 @@ fn fetch_raw_documents(
|
||||
is_exact,
|
||||
};
|
||||
|
||||
let covered_area = u16::try_from(covered_area).unwrap_or(u16::max_value());
|
||||
let covered_area = cmp::min(covered_area, di.char_length);
|
||||
|
||||
let highlight = Highlight {
|
||||
attribute: di.attribute,
|
||||
char_index: di.char_index,
|
||||
char_length: u16::try_from(*query_len).unwrap_or(u16::max_value()),
|
||||
char_length: covered_area,
|
||||
};
|
||||
|
||||
tmp_matches.push((di.document_id, id, match_, highlight));
|
||||
|
@ -7,10 +7,13 @@ use meilidb_schema::SchemaAttr;
|
||||
use meilidb_tokenizer::{is_cjk, SeqTokenizer, Token, Tokenizer};
|
||||
use sdset::SetBuf;
|
||||
|
||||
const WORD_LENGTH_LIMIT: usize = 80;
|
||||
|
||||
type Word = Vec<u8>; // TODO make it be a SmallVec
|
||||
|
||||
pub struct RawIndexer {
|
||||
word_limit: usize, // the maximum number of indexed words
|
||||
stop_words: fst::Set,
|
||||
words_doc_indexes: BTreeMap<Word, Vec<DocIndex>>,
|
||||
docs_words: HashMap<DocumentId, Vec<Word>>,
|
||||
}
|
||||
@ -21,13 +24,14 @@ pub struct Indexed {
|
||||
}
|
||||
|
||||
impl RawIndexer {
|
||||
pub fn new() -> RawIndexer {
|
||||
RawIndexer::with_word_limit(1000)
|
||||
pub fn new(stop_words: fst::Set) -> RawIndexer {
|
||||
RawIndexer::with_word_limit(stop_words, 1000)
|
||||
}
|
||||
|
||||
pub fn with_word_limit(limit: usize) -> RawIndexer {
|
||||
pub fn with_word_limit(stop_words: fst::Set, limit: usize) -> RawIndexer {
|
||||
RawIndexer {
|
||||
word_limit: limit,
|
||||
stop_words,
|
||||
words_doc_indexes: BTreeMap::new(),
|
||||
docs_words: HashMap::new(),
|
||||
}
|
||||
@ -35,89 +39,40 @@ impl RawIndexer {
|
||||
|
||||
pub fn index_text(&mut self, id: DocumentId, attr: SchemaAttr, text: &str) -> usize {
|
||||
let mut number_of_words = 0;
|
||||
let lowercase_text = text.to_lowercase();
|
||||
let deunicoded = deunicode_with_tofu(&lowercase_text, "");
|
||||
|
||||
// TODO compute the deunicoded version after the cjk check
|
||||
let next = if !lowercase_text.contains(is_cjk) && lowercase_text != deunicoded {
|
||||
Some(deunicoded)
|
||||
} else {
|
||||
None
|
||||
};
|
||||
let iter = Some(lowercase_text).into_iter().chain(next);
|
||||
|
||||
for text in iter {
|
||||
// we must not count 2 times the same words
|
||||
number_of_words = 0;
|
||||
|
||||
for token in Tokenizer::new(&text) {
|
||||
let must_continue = index_token(
|
||||
token,
|
||||
id,
|
||||
attr,
|
||||
self.word_limit,
|
||||
&mut self.words_doc_indexes,
|
||||
&mut self.docs_words,
|
||||
);
|
||||
|
||||
if !must_continue {
|
||||
break;
|
||||
}
|
||||
|
||||
number_of_words += 1;
|
||||
}
|
||||
}
|
||||
|
||||
number_of_words
|
||||
}
|
||||
|
||||
pub fn index_text_seq<'a, I, IT>(&mut self, id: DocumentId, attr: SchemaAttr, iter: I)
|
||||
where
|
||||
I: IntoIterator<Item = &'a str, IntoIter = IT>,
|
||||
IT: Iterator<Item = &'a str> + Clone,
|
||||
{
|
||||
// TODO serialize this to one call to the SeqTokenizer loop
|
||||
|
||||
let lowercased: Vec<_> = iter.into_iter().map(str::to_lowercase).collect();
|
||||
let iter = lowercased.iter().map(|t| t.as_str());
|
||||
|
||||
for token in SeqTokenizer::new(iter) {
|
||||
for token in Tokenizer::new(text) {
|
||||
let must_continue = index_token(
|
||||
token,
|
||||
id,
|
||||
attr,
|
||||
self.word_limit,
|
||||
&self.stop_words,
|
||||
&mut self.words_doc_indexes,
|
||||
&mut self.docs_words,
|
||||
);
|
||||
|
||||
number_of_words += 1;
|
||||
|
||||
if !must_continue {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
let deunicoded: Vec<_> = lowercased
|
||||
.into_iter()
|
||||
.map(|lowercase_text| {
|
||||
if lowercase_text.contains(is_cjk) {
|
||||
return lowercase_text;
|
||||
}
|
||||
let deunicoded = deunicode_with_tofu(&lowercase_text, "");
|
||||
if lowercase_text != deunicoded {
|
||||
deunicoded
|
||||
} else {
|
||||
lowercase_text
|
||||
}
|
||||
})
|
||||
.collect();
|
||||
let iter = deunicoded.iter().map(|t| t.as_str());
|
||||
number_of_words
|
||||
}
|
||||
|
||||
pub fn index_text_seq<'a, I>(&mut self, id: DocumentId, attr: SchemaAttr, iter: I)
|
||||
where
|
||||
I: IntoIterator<Item = &'a str>,
|
||||
{
|
||||
let iter = iter.into_iter();
|
||||
for token in SeqTokenizer::new(iter) {
|
||||
let must_continue = index_token(
|
||||
token,
|
||||
id,
|
||||
attr,
|
||||
self.word_limit,
|
||||
&self.stop_words,
|
||||
&mut self.words_doc_indexes,
|
||||
&mut self.docs_words,
|
||||
);
|
||||
@ -152,17 +107,12 @@ impl RawIndexer {
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for RawIndexer {
|
||||
fn default() -> Self {
|
||||
Self::new()
|
||||
}
|
||||
}
|
||||
|
||||
fn index_token(
|
||||
token: Token,
|
||||
id: DocumentId,
|
||||
attr: SchemaAttr,
|
||||
word_limit: usize,
|
||||
stop_words: &fst::Set,
|
||||
words_doc_indexes: &mut BTreeMap<Word, Vec<DocIndex>>,
|
||||
docs_words: &mut HashMap<DocumentId, Vec<Word>>,
|
||||
) -> bool {
|
||||
@ -170,16 +120,41 @@ fn index_token(
|
||||
return false;
|
||||
}
|
||||
|
||||
match token_to_docindex(id, attr, token) {
|
||||
Some(docindex) => {
|
||||
let word = Vec::from(token.word);
|
||||
words_doc_indexes
|
||||
.entry(word.clone())
|
||||
.or_insert_with(Vec::new)
|
||||
.push(docindex);
|
||||
docs_words.entry(id).or_insert_with(Vec::new).push(word);
|
||||
let lower = token.word.to_lowercase();
|
||||
let token = Token {
|
||||
word: &lower,
|
||||
..token
|
||||
};
|
||||
|
||||
if !stop_words.contains(&token.word) {
|
||||
match token_to_docindex(id, attr, token) {
|
||||
Some(docindex) => {
|
||||
let word = Vec::from(token.word);
|
||||
|
||||
if word.len() <= WORD_LENGTH_LIMIT {
|
||||
words_doc_indexes
|
||||
.entry(word.clone())
|
||||
.or_insert_with(Vec::new)
|
||||
.push(docindex);
|
||||
docs_words.entry(id).or_insert_with(Vec::new).push(word);
|
||||
|
||||
if !lower.contains(is_cjk) {
|
||||
let unidecoded = deunicode_with_tofu(&lower, "");
|
||||
if unidecoded != lower && !unidecoded.is_empty() {
|
||||
let word = Vec::from(unidecoded);
|
||||
if word.len() <= WORD_LENGTH_LIMIT {
|
||||
words_doc_indexes
|
||||
.entry(word.clone())
|
||||
.or_insert_with(Vec::new)
|
||||
.push(docindex);
|
||||
docs_words.entry(id).or_insert_with(Vec::new).push(word);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
None => return false,
|
||||
}
|
||||
None => return false,
|
||||
}
|
||||
|
||||
true
|
||||
@ -207,7 +182,7 @@ mod tests {
|
||||
|
||||
#[test]
|
||||
fn strange_apostrophe() {
|
||||
let mut indexer = RawIndexer::new();
|
||||
let mut indexer = RawIndexer::new(fst::Set::default());
|
||||
|
||||
let docid = DocumentId(0);
|
||||
let attr = SchemaAttr(0);
|
||||
@ -222,16 +197,14 @@ mod tests {
|
||||
assert!(words_doc_indexes.get(&b"aspirateur"[..]).is_some());
|
||||
assert!(words_doc_indexes.get(&b"ai"[..]).is_some());
|
||||
assert!(words_doc_indexes.get(&b"eteindre"[..]).is_some());
|
||||
|
||||
// with the ugly apostrophe...
|
||||
assert!(words_doc_indexes
|
||||
.get(&"l’éteindre".to_owned().into_bytes())
|
||||
.get(&"éteindre".to_owned().into_bytes())
|
||||
.is_some());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn strange_apostrophe_in_sequence() {
|
||||
let mut indexer = RawIndexer::new();
|
||||
let mut indexer = RawIndexer::new(fst::Set::default());
|
||||
|
||||
let docid = DocumentId(0);
|
||||
let attr = SchemaAttr(0);
|
||||
@ -246,10 +219,53 @@ mod tests {
|
||||
assert!(words_doc_indexes.get(&b"aspirateur"[..]).is_some());
|
||||
assert!(words_doc_indexes.get(&b"ai"[..]).is_some());
|
||||
assert!(words_doc_indexes.get(&b"eteindre"[..]).is_some());
|
||||
|
||||
// with the ugly apostrophe...
|
||||
assert!(words_doc_indexes
|
||||
.get(&"l’éteindre".to_owned().into_bytes())
|
||||
.get(&"éteindre".to_owned().into_bytes())
|
||||
.is_some());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn basic_stop_words() {
|
||||
let stop_words = sdset::SetBuf::from_dirty(vec!["l", "j", "ai", "de"]);
|
||||
let stop_words = fst::Set::from_iter(stop_words).unwrap();
|
||||
|
||||
let mut indexer = RawIndexer::new(stop_words);
|
||||
|
||||
let docid = DocumentId(0);
|
||||
let attr = SchemaAttr(0);
|
||||
let text = "Zut, l’aspirateur, j’ai oublié de l’éteindre !";
|
||||
indexer.index_text(docid, attr, text);
|
||||
|
||||
let Indexed {
|
||||
words_doc_indexes, ..
|
||||
} = indexer.build();
|
||||
|
||||
assert!(words_doc_indexes.get(&b"l"[..]).is_none());
|
||||
assert!(words_doc_indexes.get(&b"aspirateur"[..]).is_some());
|
||||
assert!(words_doc_indexes.get(&b"j"[..]).is_none());
|
||||
assert!(words_doc_indexes.get(&b"ai"[..]).is_none());
|
||||
assert!(words_doc_indexes.get(&b"de"[..]).is_none());
|
||||
assert!(words_doc_indexes.get(&b"eteindre"[..]).is_some());
|
||||
assert!(words_doc_indexes
|
||||
.get(&"éteindre".to_owned().into_bytes())
|
||||
.is_some());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn no_empty_unidecode() {
|
||||
let mut indexer = RawIndexer::new(fst::Set::default());
|
||||
|
||||
let docid = DocumentId(0);
|
||||
let attr = SchemaAttr(0);
|
||||
let text = "🇯🇵";
|
||||
indexer.index_text(docid, attr, text);
|
||||
|
||||
let Indexed {
|
||||
words_doc_indexes, ..
|
||||
} = indexer.build();
|
||||
|
||||
assert!(words_doc_indexes
|
||||
.get(&"🇯🇵".to_owned().into_bytes())
|
||||
.is_some());
|
||||
}
|
||||
}
|
||||
|
@ -63,13 +63,14 @@ impl<'de, 'a, 'b> de::Deserializer<'de> for &'b mut Deserializer<'a> {
|
||||
where
|
||||
V: de::Visitor<'de>,
|
||||
{
|
||||
self.deserialize_map(visitor)
|
||||
self.deserialize_option(visitor)
|
||||
}
|
||||
|
||||
forward_to_deserialize_any! {
|
||||
bool i8 i16 i32 i64 i128 u8 u16 u32 u64 u128 f32 f64 char str string
|
||||
bytes byte_buf option unit unit_struct newtype_struct seq tuple
|
||||
tuple_struct struct enum identifier ignored_any
|
||||
fn deserialize_option<V>(self, visitor: V) -> Result<V::Value, Self::Error>
|
||||
where
|
||||
V: de::Visitor<'de>,
|
||||
{
|
||||
self.deserialize_map(visitor)
|
||||
}
|
||||
|
||||
fn deserialize_map<V>(self, visitor: V) -> Result<V::Value, Self::Error>
|
||||
@ -104,16 +105,29 @@ impl<'de, 'a, 'b> de::Deserializer<'de> for &'b mut Deserializer<'a> {
|
||||
}
|
||||
});
|
||||
|
||||
let map_deserializer = de::value::MapDeserializer::new(iter);
|
||||
let result = visitor
|
||||
.visit_map(map_deserializer)
|
||||
.map_err(DeserializerError::from);
|
||||
let mut iter = iter.peekable();
|
||||
|
||||
let result = match iter.peek() {
|
||||
Some(_) => {
|
||||
let map_deserializer = de::value::MapDeserializer::new(iter);
|
||||
visitor
|
||||
.visit_some(map_deserializer)
|
||||
.map_err(DeserializerError::from)
|
||||
}
|
||||
None => visitor.visit_none(),
|
||||
};
|
||||
|
||||
match error.take() {
|
||||
Some(error) => Err(error.into()),
|
||||
None => result,
|
||||
}
|
||||
}
|
||||
|
||||
forward_to_deserialize_any! {
|
||||
bool i8 i16 i32 i64 i128 u8 u16 u32 u64 u128 f32 f64 char str string
|
||||
bytes byte_buf unit unit_struct newtype_struct seq tuple
|
||||
tuple_struct struct enum identifier ignored_any
|
||||
}
|
||||
}
|
||||
|
||||
struct Value(SerdeJsonDeserializer<SerdeJsonIoRead<Cursor<Vec<u8>>>>);
|
||||
|
@ -7,8 +7,8 @@ use crate::{DocumentId, RankedMap};
|
||||
|
||||
use super::{ConvertToNumber, ConvertToString, Indexer, SerializerError};
|
||||
|
||||
pub struct Serializer<'a> {
|
||||
pub txn: &'a mut heed::RwTxn,
|
||||
pub struct Serializer<'a, 'b> {
|
||||
pub txn: &'a mut heed::RwTxn<'b>,
|
||||
pub schema: &'a Schema,
|
||||
pub document_store: DocumentsFields,
|
||||
pub document_fields_counts: DocumentsFieldsCounts,
|
||||
@ -17,15 +17,15 @@ pub struct Serializer<'a> {
|
||||
pub document_id: DocumentId,
|
||||
}
|
||||
|
||||
impl<'a> ser::Serializer for Serializer<'a> {
|
||||
impl<'a, 'b> ser::Serializer for Serializer<'a, 'b> {
|
||||
type Ok = ();
|
||||
type Error = SerializerError;
|
||||
type SerializeSeq = ser::Impossible<Self::Ok, Self::Error>;
|
||||
type SerializeTuple = ser::Impossible<Self::Ok, Self::Error>;
|
||||
type SerializeTupleStruct = ser::Impossible<Self::Ok, Self::Error>;
|
||||
type SerializeTupleVariant = ser::Impossible<Self::Ok, Self::Error>;
|
||||
type SerializeMap = MapSerializer<'a>;
|
||||
type SerializeStruct = StructSerializer<'a>;
|
||||
type SerializeMap = MapSerializer<'a, 'b>;
|
||||
type SerializeStruct = StructSerializer<'a, 'b>;
|
||||
type SerializeStructVariant = ser::Impossible<Self::Ok, Self::Error>;
|
||||
|
||||
forward_to_unserializable_type! {
|
||||
@ -190,8 +190,8 @@ impl<'a> ser::Serializer for Serializer<'a> {
|
||||
}
|
||||
}
|
||||
|
||||
pub struct MapSerializer<'a> {
|
||||
txn: &'a mut heed::RwTxn,
|
||||
pub struct MapSerializer<'a, 'b> {
|
||||
txn: &'a mut heed::RwTxn<'b>,
|
||||
schema: &'a Schema,
|
||||
document_id: DocumentId,
|
||||
document_store: DocumentsFields,
|
||||
@ -201,7 +201,7 @@ pub struct MapSerializer<'a> {
|
||||
current_key_name: Option<String>,
|
||||
}
|
||||
|
||||
impl<'a> ser::SerializeMap for MapSerializer<'a> {
|
||||
impl<'a, 'b> ser::SerializeMap for MapSerializer<'a, 'b> {
|
||||
type Ok = ();
|
||||
type Error = SerializerError;
|
||||
|
||||
@ -253,8 +253,8 @@ impl<'a> ser::SerializeMap for MapSerializer<'a> {
|
||||
}
|
||||
}
|
||||
|
||||
pub struct StructSerializer<'a> {
|
||||
txn: &'a mut heed::RwTxn,
|
||||
pub struct StructSerializer<'a, 'b> {
|
||||
txn: &'a mut heed::RwTxn<'b>,
|
||||
schema: &'a Schema,
|
||||
document_id: DocumentId,
|
||||
document_store: DocumentsFields,
|
||||
@ -263,7 +263,7 @@ pub struct StructSerializer<'a> {
|
||||
ranked_map: &'a mut RankedMap,
|
||||
}
|
||||
|
||||
impl<'a> ser::SerializeStruct for StructSerializer<'a> {
|
||||
impl<'a, 'b> ser::SerializeStruct for StructSerializer<'a, 'b> {
|
||||
type Ok = ();
|
||||
type Error = SerializerError;
|
||||
|
||||
|
@ -39,7 +39,7 @@ impl DocsWords {
|
||||
match self.docs_words.get(reader, &document_id)? {
|
||||
Some(bytes) => {
|
||||
let len = bytes.len();
|
||||
let bytes = Arc::from(bytes);
|
||||
let bytes = Arc::new(bytes.to_owned());
|
||||
let fst = fst::raw::Fst::from_shared_bytes(bytes, 0, len).unwrap();
|
||||
Ok(Some(fst::Set::from(fst)))
|
||||
}
|
||||
|
@ -29,7 +29,7 @@ impl DocumentsFields {
|
||||
) -> ZResult<usize> {
|
||||
let start = DocumentAttrKey::new(document_id, SchemaAttr::min());
|
||||
let end = DocumentAttrKey::new(document_id, SchemaAttr::max());
|
||||
self.documents_fields.delete_range(writer, start..=end)
|
||||
self.documents_fields.delete_range(writer, &(start..=end))
|
||||
}
|
||||
|
||||
pub fn clear(self, writer: &mut heed::RwTxn) -> ZResult<()> {
|
||||
@ -53,7 +53,7 @@ impl DocumentsFields {
|
||||
) -> ZResult<DocumentFieldsIter<'txn>> {
|
||||
let start = DocumentAttrKey::new(document_id, SchemaAttr::min());
|
||||
let end = DocumentAttrKey::new(document_id, SchemaAttr::max());
|
||||
let iter = self.documents_fields.range(reader, start..=end)?;
|
||||
let iter = self.documents_fields.range(reader, &(start..=end))?;
|
||||
Ok(DocumentFieldsIter { iter })
|
||||
}
|
||||
}
|
||||
|
@ -29,7 +29,7 @@ impl DocumentsFieldsCounts {
|
||||
let start = DocumentAttrKey::new(document_id, SchemaAttr::min());
|
||||
let end = DocumentAttrKey::new(document_id, SchemaAttr::max());
|
||||
self.documents_fields_counts
|
||||
.delete_range(writer, start..=end)
|
||||
.delete_range(writer, &(start..=end))
|
||||
}
|
||||
|
||||
pub fn clear(self, writer: &mut heed::RwTxn) -> ZResult<()> {
|
||||
@ -56,7 +56,7 @@ impl DocumentsFieldsCounts {
|
||||
) -> ZResult<DocumentFieldsCountsIter<'txn>> {
|
||||
let start = DocumentAttrKey::new(document_id, SchemaAttr::min());
|
||||
let end = DocumentAttrKey::new(document_id, SchemaAttr::max());
|
||||
let iter = self.documents_fields_counts.range(reader, start..=end)?;
|
||||
let iter = self.documents_fields_counts.range(reader, &(start..=end))?;
|
||||
Ok(DocumentFieldsCountsIter { iter })
|
||||
}
|
||||
|
||||
|
@ -9,6 +9,7 @@ const NUMBER_OF_DOCUMENTS_KEY: &str = "number-of-documents";
|
||||
const RANKED_MAP_KEY: &str = "ranked-map";
|
||||
const SCHEMA_KEY: &str = "schema";
|
||||
const SYNONYMS_KEY: &str = "synonyms";
|
||||
const STOP_WORDS_KEY: &str = "stop-words";
|
||||
const WORDS_KEY: &str = "words";
|
||||
|
||||
#[derive(Copy, Clone)]
|
||||
@ -17,6 +18,10 @@ pub struct Main {
|
||||
}
|
||||
|
||||
impl Main {
|
||||
pub fn clear(self, writer: &mut heed::RwTxn) -> ZResult<()> {
|
||||
self.main.clear(writer)
|
||||
}
|
||||
|
||||
pub fn put_words_fst(self, writer: &mut heed::RwTxn, fst: &fst::Set) -> ZResult<()> {
|
||||
let bytes = fst.as_fst().as_bytes();
|
||||
self.main.put::<Str, ByteSlice>(writer, WORDS_KEY, bytes)
|
||||
@ -26,7 +31,7 @@ impl Main {
|
||||
match self.main.get::<Str, ByteSlice>(reader, WORDS_KEY)? {
|
||||
Some(bytes) => {
|
||||
let len = bytes.len();
|
||||
let bytes = Arc::from(bytes);
|
||||
let bytes = Arc::new(bytes.to_owned());
|
||||
let fst = fst::raw::Fst::from_shared_bytes(bytes, 0, len).unwrap();
|
||||
Ok(Some(fst::Set::from(fst)))
|
||||
}
|
||||
@ -63,7 +68,25 @@ impl Main {
|
||||
match self.main.get::<Str, ByteSlice>(reader, SYNONYMS_KEY)? {
|
||||
Some(bytes) => {
|
||||
let len = bytes.len();
|
||||
let bytes = Arc::from(bytes);
|
||||
let bytes = Arc::new(bytes.to_owned());
|
||||
let fst = fst::raw::Fst::from_shared_bytes(bytes, 0, len).unwrap();
|
||||
Ok(Some(fst::Set::from(fst)))
|
||||
}
|
||||
None => Ok(None),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn put_stop_words_fst(self, writer: &mut heed::RwTxn, fst: &fst::Set) -> ZResult<()> {
|
||||
let bytes = fst.as_fst().as_bytes();
|
||||
self.main
|
||||
.put::<Str, ByteSlice>(writer, STOP_WORDS_KEY, bytes)
|
||||
}
|
||||
|
||||
pub fn stop_words_fst(self, reader: &heed::RoTxn) -> ZResult<Option<fst::Set>> {
|
||||
match self.main.get::<Str, ByteSlice>(reader, STOP_WORDS_KEY)? {
|
||||
Some(bytes) => {
|
||||
let len = bytes.len();
|
||||
let bytes = Arc::new(bytes.to_owned());
|
||||
let fst = fst::raw::Fst::from_shared_bytes(bytes, 0, len).unwrap();
|
||||
Ok(Some(fst::Set::from(fst)))
|
||||
}
|
||||
|
@ -22,10 +22,11 @@ use std::collections::HashSet;
|
||||
|
||||
use heed::Result as ZResult;
|
||||
use meilidb_schema::{Schema, SchemaAttr};
|
||||
use serde::de;
|
||||
use serde::de::{self, Deserialize};
|
||||
use zerocopy::{AsBytes, FromBytes};
|
||||
|
||||
use crate::criterion::Criteria;
|
||||
use crate::database::{UpdateEvent, UpdateEventsEmitter};
|
||||
use crate::serde::Deserializer;
|
||||
use crate::{query_builder::QueryBuilder, update, DocumentId, Error, MResult};
|
||||
|
||||
@ -91,7 +92,7 @@ pub struct Index {
|
||||
|
||||
pub updates: Updates,
|
||||
pub updates_results: UpdatesResults,
|
||||
updates_notifier: crossbeam_channel::Sender<()>,
|
||||
updates_notifier: UpdateEventsEmitter,
|
||||
}
|
||||
|
||||
impl Index {
|
||||
@ -120,9 +121,7 @@ impl Index {
|
||||
attributes: attributes.as_ref(),
|
||||
};
|
||||
|
||||
// TODO: currently we return an error if all document fields are missing,
|
||||
// returning None would have been better
|
||||
Ok(T::deserialize(&mut deserializer).map(Some)?)
|
||||
Ok(Option::<T>::deserialize(&mut deserializer)?)
|
||||
}
|
||||
|
||||
pub fn document_attribute<T: de::DeserializeOwned>(
|
||||
@ -141,12 +140,12 @@ impl Index {
|
||||
}
|
||||
|
||||
pub fn schema_update(&self, writer: &mut heed::RwTxn, schema: Schema) -> MResult<u64> {
|
||||
let _ = self.updates_notifier.send(());
|
||||
let _ = self.updates_notifier.send(UpdateEvent::NewUpdate);
|
||||
update::push_schema_update(writer, self.updates, self.updates_results, schema)
|
||||
}
|
||||
|
||||
pub fn customs_update(&self, writer: &mut heed::RwTxn, customs: Vec<u8>) -> ZResult<u64> {
|
||||
let _ = self.updates_notifier.send(());
|
||||
let _ = self.updates_notifier.send(UpdateEvent::NewUpdate);
|
||||
update::push_customs_update(writer, self.updates, self.updates_results, customs)
|
||||
}
|
||||
|
||||
@ -158,6 +157,14 @@ impl Index {
|
||||
)
|
||||
}
|
||||
|
||||
pub fn documents_partial_addition<D>(&self) -> update::DocumentsAddition<D> {
|
||||
update::DocumentsAddition::new_partial(
|
||||
self.updates,
|
||||
self.updates_results,
|
||||
self.updates_notifier.clone(),
|
||||
)
|
||||
}
|
||||
|
||||
pub fn documents_deletion(&self) -> update::DocumentsDeletion {
|
||||
update::DocumentsDeletion::new(
|
||||
self.updates,
|
||||
@ -167,7 +174,7 @@ impl Index {
|
||||
}
|
||||
|
||||
pub fn clear_all(&self, writer: &mut heed::RwTxn) -> MResult<u64> {
|
||||
let _ = self.updates_notifier.send(());
|
||||
let _ = self.updates_notifier.send(UpdateEvent::NewUpdate);
|
||||
update::push_clear_all(writer, self.updates, self.updates_results)
|
||||
}
|
||||
|
||||
@ -187,6 +194,22 @@ impl Index {
|
||||
)
|
||||
}
|
||||
|
||||
pub fn stop_words_addition(&self) -> update::StopWordsAddition {
|
||||
update::StopWordsAddition::new(
|
||||
self.updates,
|
||||
self.updates_results,
|
||||
self.updates_notifier.clone(),
|
||||
)
|
||||
}
|
||||
|
||||
pub fn stop_words_deletion(&self) -> update::StopWordsDeletion {
|
||||
update::StopWordsDeletion::new(
|
||||
self.updates,
|
||||
self.updates_results,
|
||||
self.updates_notifier.clone(),
|
||||
)
|
||||
}
|
||||
|
||||
pub fn current_update_id(&self, reader: &heed::RoTxn) -> MResult<Option<u64>> {
|
||||
match self.updates.last_update_id(reader)? {
|
||||
Some((id, _)) => Ok(Some(id)),
|
||||
@ -198,10 +221,38 @@ impl Index {
|
||||
&self,
|
||||
reader: &heed::RoTxn,
|
||||
update_id: u64,
|
||||
) -> MResult<update::UpdateStatus> {
|
||||
) -> MResult<Option<update::UpdateStatus>> {
|
||||
update::update_status(reader, self.updates, self.updates_results, update_id)
|
||||
}
|
||||
|
||||
pub fn all_updates_status(&self, reader: &heed::RoTxn) -> MResult<Vec<update::UpdateStatus>> {
|
||||
let mut updates = Vec::new();
|
||||
let mut last_update_result_id = 0;
|
||||
|
||||
// retrieve all updates results
|
||||
if let Some((last_id, _)) = self.updates_results.last_update_id(reader)? {
|
||||
updates.reserve(last_id as usize);
|
||||
|
||||
for id in 0..=last_id {
|
||||
if let Some(update) = self.update_status(reader, id)? {
|
||||
updates.push(update);
|
||||
last_update_result_id = id;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// retrieve all enqueued updates
|
||||
if let Some((last_id, _)) = self.updates.last_update_id(reader)? {
|
||||
for id in last_update_result_id + 1..=last_id {
|
||||
if let Some(update) = self.update_status(reader, id)? {
|
||||
updates.push(update);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(updates)
|
||||
}
|
||||
|
||||
pub fn query_builder(&self) -> QueryBuilder {
|
||||
QueryBuilder::new(
|
||||
self.main,
|
||||
@ -228,7 +279,7 @@ impl Index {
|
||||
pub fn create(
|
||||
env: &heed::Env,
|
||||
name: &str,
|
||||
updates_notifier: crossbeam_channel::Sender<()>,
|
||||
updates_notifier: UpdateEventsEmitter,
|
||||
) -> MResult<Index> {
|
||||
// create all the store names
|
||||
let main_name = main_name(name);
|
||||
@ -241,7 +292,7 @@ pub fn create(
|
||||
let updates_results_name = updates_results_name(name);
|
||||
|
||||
// open all the stores
|
||||
let main = env.create_dyn_database(Some(&main_name))?;
|
||||
let main = env.create_poly_database(Some(&main_name))?;
|
||||
let postings_lists = env.create_database(Some(&postings_lists_name))?;
|
||||
let documents_fields = env.create_database(Some(&documents_fields_name))?;
|
||||
let documents_fields_counts = env.create_database(Some(&documents_fields_counts_name))?;
|
||||
@ -268,7 +319,7 @@ pub fn create(
|
||||
pub fn open(
|
||||
env: &heed::Env,
|
||||
name: &str,
|
||||
updates_notifier: crossbeam_channel::Sender<()>,
|
||||
updates_notifier: UpdateEventsEmitter,
|
||||
) -> MResult<Option<Index>> {
|
||||
// create all the store names
|
||||
let main_name = main_name(name);
|
||||
@ -281,7 +332,7 @@ pub fn open(
|
||||
let updates_results_name = updates_results_name(name);
|
||||
|
||||
// open all the stores
|
||||
let main = match env.open_dyn_database(Some(&main_name))? {
|
||||
let main = match env.open_poly_database(Some(&main_name))? {
|
||||
Some(main) => main,
|
||||
None => return Ok(None),
|
||||
};
|
||||
@ -328,3 +379,19 @@ pub fn open(
|
||||
updates_notifier,
|
||||
}))
|
||||
}
|
||||
|
||||
pub fn clear(writer: &mut heed::RwTxn, index: &Index) -> MResult<()> {
|
||||
// send a stop event to the update loop of the index
|
||||
index.updates_notifier.send(UpdateEvent::MustStop).unwrap();
|
||||
|
||||
// clear all the stores
|
||||
index.main.clear(writer)?;
|
||||
index.postings_lists.clear(writer)?;
|
||||
index.documents_fields.clear(writer)?;
|
||||
index.documents_fields_counts.clear(writer)?;
|
||||
index.synonyms.clear(writer)?;
|
||||
index.docs_words.clear(writer)?;
|
||||
index.updates.clear(writer)?;
|
||||
index.updates_results.clear(writer)?;
|
||||
Ok(())
|
||||
}
|
||||
|
@ -22,11 +22,15 @@ impl Synonyms {
|
||||
self.synonyms.delete(writer, word)
|
||||
}
|
||||
|
||||
pub fn clear(self, writer: &mut heed::RwTxn) -> ZResult<()> {
|
||||
self.synonyms.clear(writer)
|
||||
}
|
||||
|
||||
pub fn synonyms(self, reader: &heed::RoTxn, word: &[u8]) -> ZResult<Option<fst::Set>> {
|
||||
match self.synonyms.get(reader, word)? {
|
||||
Some(bytes) => {
|
||||
let len = bytes.len();
|
||||
let bytes = Arc::from(bytes);
|
||||
let bytes = Arc::new(bytes.to_owned());
|
||||
let fst = fst::raw::Fst::from_shared_bytes(bytes, 0, len).unwrap();
|
||||
Ok(Some(fst::Set::from(fst)))
|
||||
}
|
||||
|
@ -26,9 +26,9 @@ impl Updates {
|
||||
}
|
||||
|
||||
// TODO do not trigger deserialize if possible
|
||||
pub fn contains(self, reader: &heed::RoTxn, update_id: u64) -> ZResult<bool> {
|
||||
pub fn get(self, reader: &heed::RoTxn, update_id: u64) -> ZResult<Option<Update>> {
|
||||
let update_id = BEU64::new(update_id);
|
||||
self.updates.get(reader, &update_id).map(|v| v.is_some())
|
||||
self.updates.get(reader, &update_id)
|
||||
}
|
||||
|
||||
pub fn put_update(
|
||||
@ -52,4 +52,8 @@ impl Updates {
|
||||
None => Ok(None),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn clear(self, writer: &mut heed::RwTxn) -> ZResult<()> {
|
||||
self.updates.clear(writer)
|
||||
}
|
||||
}
|
||||
|
@ -1,15 +1,18 @@
|
||||
use super::BEU64;
|
||||
use crate::update::UpdateResult;
|
||||
use heed::types::{OwnedType, SerdeBincode};
|
||||
use crate::update::ProcessedUpdateResult;
|
||||
use heed::types::{OwnedType, SerdeJson};
|
||||
use heed::Result as ZResult;
|
||||
|
||||
#[derive(Copy, Clone)]
|
||||
pub struct UpdatesResults {
|
||||
pub(crate) updates_results: heed::Database<OwnedType<BEU64>, SerdeBincode<UpdateResult>>,
|
||||
pub(crate) updates_results: heed::Database<OwnedType<BEU64>, SerdeJson<ProcessedUpdateResult>>,
|
||||
}
|
||||
|
||||
impl UpdatesResults {
|
||||
pub fn last_update_id(self, reader: &heed::RoTxn) -> ZResult<Option<(u64, UpdateResult)>> {
|
||||
pub fn last_update_id(
|
||||
self,
|
||||
reader: &heed::RoTxn,
|
||||
) -> ZResult<Option<(u64, ProcessedUpdateResult)>> {
|
||||
match self.updates_results.last(reader)? {
|
||||
Some((key, data)) => Ok(Some((key.get(), data))),
|
||||
None => Ok(None),
|
||||
@ -20,7 +23,7 @@ impl UpdatesResults {
|
||||
self,
|
||||
writer: &mut heed::RwTxn,
|
||||
update_id: u64,
|
||||
update_result: &UpdateResult,
|
||||
update_result: &ProcessedUpdateResult,
|
||||
) -> ZResult<()> {
|
||||
let update_id = BEU64::new(update_id);
|
||||
self.updates_results.put(writer, &update_id, update_result)
|
||||
@ -30,8 +33,12 @@ impl UpdatesResults {
|
||||
self,
|
||||
reader: &heed::RoTxn,
|
||||
update_id: u64,
|
||||
) -> ZResult<Option<UpdateResult>> {
|
||||
) -> ZResult<Option<ProcessedUpdateResult>> {
|
||||
let update_id = BEU64::new(update_id);
|
||||
self.updates_results.get(reader, &update_id)
|
||||
}
|
||||
|
||||
pub fn clear(self, writer: &mut heed::RwTxn) -> ZResult<()> {
|
||||
self.updates_results.clear(writer)
|
||||
}
|
||||
}
|
||||
|
@ -26,7 +26,7 @@ pub fn push_clear_all(
|
||||
updates_results_store: store::UpdatesResults,
|
||||
) -> MResult<u64> {
|
||||
let last_update_id = next_update_id(writer, updates_store, updates_results_store)?;
|
||||
let update = Update::ClearAll;
|
||||
let update = Update::clear_all();
|
||||
updates_store.put_update(writer, last_update_id, &update)?;
|
||||
|
||||
Ok(last_update_id)
|
||||
|
@ -18,7 +18,7 @@ pub fn push_customs_update(
|
||||
) -> ZResult<u64> {
|
||||
let last_update_id = next_update_id(writer, updates_store, updates_results_store)?;
|
||||
|
||||
let update = Update::Customs(customs);
|
||||
let update = Update::customs(customs);
|
||||
updates_store.put_update(writer, last_update_id, &update)?;
|
||||
|
||||
Ok(last_update_id)
|
||||
|
@ -2,10 +2,11 @@ use std::collections::HashMap;
|
||||
|
||||
use fst::{set::OpBuilder, SetBuilder};
|
||||
use sdset::{duo::Union, SetOperation};
|
||||
use serde::Serialize;
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
use crate::database::{UpdateEvent, UpdateEventsEmitter};
|
||||
use crate::raw_indexer::RawIndexer;
|
||||
use crate::serde::{extract_document_id, serialize_value, Serializer};
|
||||
use crate::serde::{extract_document_id, serialize_value, Deserializer, Serializer};
|
||||
use crate::store;
|
||||
use crate::update::{apply_documents_deletion, next_update_id, Update};
|
||||
use crate::{Error, MResult, RankedMap};
|
||||
@ -13,21 +14,37 @@ use crate::{Error, MResult, RankedMap};
|
||||
pub struct DocumentsAddition<D> {
|
||||
updates_store: store::Updates,
|
||||
updates_results_store: store::UpdatesResults,
|
||||
updates_notifier: crossbeam_channel::Sender<()>,
|
||||
updates_notifier: UpdateEventsEmitter,
|
||||
documents: Vec<D>,
|
||||
is_partial: bool,
|
||||
}
|
||||
|
||||
impl<D> DocumentsAddition<D> {
|
||||
pub fn new(
|
||||
updates_store: store::Updates,
|
||||
updates_results_store: store::UpdatesResults,
|
||||
updates_notifier: crossbeam_channel::Sender<()>,
|
||||
updates_notifier: UpdateEventsEmitter,
|
||||
) -> DocumentsAddition<D> {
|
||||
DocumentsAddition {
|
||||
updates_store,
|
||||
updates_results_store,
|
||||
updates_notifier,
|
||||
documents: Vec::new(),
|
||||
is_partial: false,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn new_partial(
|
||||
updates_store: store::Updates,
|
||||
updates_results_store: store::UpdatesResults,
|
||||
updates_notifier: UpdateEventsEmitter,
|
||||
) -> DocumentsAddition<D> {
|
||||
DocumentsAddition {
|
||||
updates_store,
|
||||
updates_results_store,
|
||||
updates_notifier,
|
||||
documents: Vec::new(),
|
||||
is_partial: true,
|
||||
}
|
||||
}
|
||||
|
||||
@ -39,12 +56,13 @@ impl<D> DocumentsAddition<D> {
|
||||
where
|
||||
D: serde::Serialize,
|
||||
{
|
||||
let _ = self.updates_notifier.send(());
|
||||
let _ = self.updates_notifier.send(UpdateEvent::NewUpdate);
|
||||
let update_id = push_documents_addition(
|
||||
writer,
|
||||
self.updates_store,
|
||||
self.updates_results_store,
|
||||
self.documents,
|
||||
self.is_partial,
|
||||
)?;
|
||||
Ok(update_id)
|
||||
}
|
||||
@ -61,6 +79,7 @@ pub fn push_documents_addition<D: serde::Serialize>(
|
||||
updates_store: store::Updates,
|
||||
updates_results_store: store::UpdatesResults,
|
||||
addition: Vec<D>,
|
||||
is_partial: bool,
|
||||
) -> MResult<u64> {
|
||||
let mut values = Vec::with_capacity(addition.len());
|
||||
for add in addition {
|
||||
@ -71,23 +90,27 @@ pub fn push_documents_addition<D: serde::Serialize>(
|
||||
|
||||
let last_update_id = next_update_id(writer, updates_store, updates_results_store)?;
|
||||
|
||||
let update = Update::DocumentsAddition(values);
|
||||
let update = if is_partial {
|
||||
Update::documents_partial(values)
|
||||
} else {
|
||||
Update::documents_addition(values)
|
||||
};
|
||||
|
||||
updates_store.put_update(writer, last_update_id, &update)?;
|
||||
|
||||
Ok(last_update_id)
|
||||
}
|
||||
|
||||
pub fn apply_documents_addition(
|
||||
writer: &mut heed::RwTxn,
|
||||
pub fn apply_documents_addition<'a, 'b>(
|
||||
writer: &'a mut heed::RwTxn<'b>,
|
||||
main_store: store::Main,
|
||||
documents_fields_store: store::DocumentsFields,
|
||||
documents_fields_counts_store: store::DocumentsFieldsCounts,
|
||||
postings_lists_store: store::PostingsLists,
|
||||
docs_words_store: store::DocsWords,
|
||||
addition: Vec<serde_json::Value>,
|
||||
addition: Vec<HashMap<String, serde_json::Value>>,
|
||||
) -> MResult<()> {
|
||||
let mut documents_additions = HashMap::new();
|
||||
let mut indexer = RawIndexer::new();
|
||||
|
||||
let schema = match main_store.schema(writer)? {
|
||||
Some(schema) => schema,
|
||||
@ -124,7 +147,14 @@ pub fn apply_documents_addition(
|
||||
None => RankedMap::default(),
|
||||
};
|
||||
|
||||
let stop_words = match main_store.stop_words_fst(writer)? {
|
||||
Some(stop_words) => stop_words,
|
||||
None => fst::Set::default(),
|
||||
};
|
||||
|
||||
// 3. index the documents fields in the stores
|
||||
let mut indexer = RawIndexer::new(stop_words);
|
||||
|
||||
for (document_id, document) in documents_additions {
|
||||
let serializer = Serializer {
|
||||
txn: writer,
|
||||
@ -144,7 +174,103 @@ pub fn apply_documents_addition(
|
||||
main_store,
|
||||
postings_lists_store,
|
||||
docs_words_store,
|
||||
ranked_map,
|
||||
&ranked_map,
|
||||
number_of_inserted_documents,
|
||||
indexer,
|
||||
)
|
||||
}
|
||||
|
||||
pub fn apply_documents_partial_addition<'a, 'b>(
|
||||
writer: &'a mut heed::RwTxn<'b>,
|
||||
main_store: store::Main,
|
||||
documents_fields_store: store::DocumentsFields,
|
||||
documents_fields_counts_store: store::DocumentsFieldsCounts,
|
||||
postings_lists_store: store::PostingsLists,
|
||||
docs_words_store: store::DocsWords,
|
||||
addition: Vec<HashMap<String, serde_json::Value>>,
|
||||
) -> MResult<()> {
|
||||
let mut documents_additions = HashMap::new();
|
||||
|
||||
let schema = match main_store.schema(writer)? {
|
||||
Some(schema) => schema,
|
||||
None => return Err(Error::SchemaMissing),
|
||||
};
|
||||
|
||||
let identifier = schema.identifier_name();
|
||||
|
||||
// 1. store documents ids for future deletion
|
||||
for mut document in addition {
|
||||
let document_id = match extract_document_id(identifier, &document)? {
|
||||
Some(id) => id,
|
||||
None => return Err(Error::MissingDocumentId),
|
||||
};
|
||||
|
||||
let mut deserializer = Deserializer {
|
||||
document_id,
|
||||
reader: writer,
|
||||
documents_fields: documents_fields_store,
|
||||
schema: &schema,
|
||||
attributes: None,
|
||||
};
|
||||
|
||||
// retrieve the old document and
|
||||
// update the new one with missing keys found in the old one
|
||||
let result = Option::<HashMap<String, serde_json::Value>>::deserialize(&mut deserializer)?;
|
||||
if let Some(old_document) = result {
|
||||
for (key, value) in old_document {
|
||||
document.entry(key).or_insert(value);
|
||||
}
|
||||
}
|
||||
|
||||
documents_additions.insert(document_id, document);
|
||||
}
|
||||
|
||||
// 2. remove the documents posting lists
|
||||
let number_of_inserted_documents = documents_additions.len();
|
||||
let documents_ids = documents_additions.iter().map(|(id, _)| *id).collect();
|
||||
apply_documents_deletion(
|
||||
writer,
|
||||
main_store,
|
||||
documents_fields_store,
|
||||
documents_fields_counts_store,
|
||||
postings_lists_store,
|
||||
docs_words_store,
|
||||
documents_ids,
|
||||
)?;
|
||||
|
||||
let mut ranked_map = match main_store.ranked_map(writer)? {
|
||||
Some(ranked_map) => ranked_map,
|
||||
None => RankedMap::default(),
|
||||
};
|
||||
|
||||
let stop_words = match main_store.stop_words_fst(writer)? {
|
||||
Some(stop_words) => stop_words,
|
||||
None => fst::Set::default(),
|
||||
};
|
||||
|
||||
// 3. index the documents fields in the stores
|
||||
let mut indexer = RawIndexer::new(stop_words);
|
||||
|
||||
for (document_id, document) in documents_additions {
|
||||
let serializer = Serializer {
|
||||
txn: writer,
|
||||
schema: &schema,
|
||||
document_store: documents_fields_store,
|
||||
document_fields_counts: documents_fields_counts_store,
|
||||
indexer: &mut indexer,
|
||||
ranked_map: &mut ranked_map,
|
||||
document_id,
|
||||
};
|
||||
|
||||
document.serialize(serializer)?;
|
||||
}
|
||||
|
||||
write_documents_addition_index(
|
||||
writer,
|
||||
main_store,
|
||||
postings_lists_store,
|
||||
docs_words_store,
|
||||
&ranked_map,
|
||||
number_of_inserted_documents,
|
||||
indexer,
|
||||
)
|
||||
@ -173,49 +299,58 @@ pub fn reindex_all_documents(
|
||||
}
|
||||
|
||||
// 2. remove the documents posting lists
|
||||
let number_of_inserted_documents = documents_ids_to_reindex.len();
|
||||
main_store.put_words_fst(writer, &fst::Set::default())?;
|
||||
main_store.put_ranked_map(writer, &ranked_map)?;
|
||||
main_store.put_number_of_documents(writer, |_| 0)?;
|
||||
postings_lists_store.clear(writer)?;
|
||||
docs_words_store.clear(writer)?;
|
||||
|
||||
// 3. re-index one document by one document (otherwise we make the borrow checker unhappy)
|
||||
let mut indexer = RawIndexer::new();
|
||||
let mut ram_store = HashMap::new();
|
||||
// 3. re-index chunks of documents (otherwise we make the borrow checker unhappy)
|
||||
for documents_ids in documents_ids_to_reindex.chunks(100) {
|
||||
let stop_words = match main_store.stop_words_fst(writer)? {
|
||||
Some(stop_words) => stop_words,
|
||||
None => fst::Set::default(),
|
||||
};
|
||||
|
||||
for document_id in documents_ids_to_reindex {
|
||||
for result in documents_fields_store.document_fields(writer, document_id)? {
|
||||
let (attr, bytes) = result?;
|
||||
let value: serde_json::Value = serde_json::from_slice(bytes)?;
|
||||
ram_store.insert((document_id, attr), value);
|
||||
let number_of_inserted_documents = documents_ids.len();
|
||||
let mut indexer = RawIndexer::new(stop_words);
|
||||
let mut ram_store = HashMap::new();
|
||||
|
||||
for document_id in documents_ids {
|
||||
for result in documents_fields_store.document_fields(writer, *document_id)? {
|
||||
let (attr, bytes) = result?;
|
||||
let value: serde_json::Value = serde_json::from_slice(bytes)?;
|
||||
ram_store.insert((document_id, attr), value);
|
||||
}
|
||||
|
||||
for ((docid, attr), value) in ram_store.drain() {
|
||||
serialize_value(
|
||||
writer,
|
||||
attr,
|
||||
schema.props(attr),
|
||||
*docid,
|
||||
documents_fields_store,
|
||||
documents_fields_counts_store,
|
||||
&mut indexer,
|
||||
&mut ranked_map,
|
||||
&value,
|
||||
)?;
|
||||
}
|
||||
}
|
||||
|
||||
for ((docid, attr), value) in ram_store.drain() {
|
||||
serialize_value(
|
||||
writer,
|
||||
attr,
|
||||
schema.props(attr),
|
||||
docid,
|
||||
documents_fields_store,
|
||||
documents_fields_counts_store,
|
||||
&mut indexer,
|
||||
&mut ranked_map,
|
||||
&value,
|
||||
)?;
|
||||
}
|
||||
// 4. write the new index in the main store
|
||||
write_documents_addition_index(
|
||||
writer,
|
||||
main_store,
|
||||
postings_lists_store,
|
||||
docs_words_store,
|
||||
&ranked_map,
|
||||
number_of_inserted_documents,
|
||||
indexer,
|
||||
)?;
|
||||
}
|
||||
|
||||
// 4. write the new index in the main store
|
||||
write_documents_addition_index(
|
||||
writer,
|
||||
main_store,
|
||||
postings_lists_store,
|
||||
docs_words_store,
|
||||
ranked_map,
|
||||
number_of_inserted_documents,
|
||||
indexer,
|
||||
)
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn write_documents_addition_index(
|
||||
@ -223,7 +358,7 @@ pub fn write_documents_addition_index(
|
||||
main_store: store::Main,
|
||||
postings_lists_store: store::PostingsLists,
|
||||
docs_words_store: store::DocsWords,
|
||||
ranked_map: RankedMap,
|
||||
ranked_map: &RankedMap,
|
||||
number_of_inserted_documents: usize,
|
||||
indexer: RawIndexer,
|
||||
) -> MResult<()> {
|
||||
@ -268,7 +403,7 @@ pub fn write_documents_addition_index(
|
||||
};
|
||||
|
||||
main_store.put_words_fst(writer, &words)?;
|
||||
main_store.put_ranked_map(writer, &ranked_map)?;
|
||||
main_store.put_ranked_map(writer, ranked_map)?;
|
||||
main_store.put_number_of_documents(writer, |old| old + number_of_inserted_documents as u64)?;
|
||||
|
||||
Ok(())
|
||||
|
@ -4,6 +4,7 @@ use fst::{SetBuilder, Streamer};
|
||||
use meilidb_schema::Schema;
|
||||
use sdset::{duo::DifferenceByKey, SetBuf, SetOperation};
|
||||
|
||||
use crate::database::{UpdateEvent, UpdateEventsEmitter};
|
||||
use crate::serde::extract_document_id;
|
||||
use crate::store;
|
||||
use crate::update::{next_update_id, Update};
|
||||
@ -12,7 +13,7 @@ use crate::{DocumentId, Error, MResult, RankedMap};
|
||||
pub struct DocumentsDeletion {
|
||||
updates_store: store::Updates,
|
||||
updates_results_store: store::UpdatesResults,
|
||||
updates_notifier: crossbeam_channel::Sender<()>,
|
||||
updates_notifier: UpdateEventsEmitter,
|
||||
documents: Vec<DocumentId>,
|
||||
}
|
||||
|
||||
@ -20,7 +21,7 @@ impl DocumentsDeletion {
|
||||
pub fn new(
|
||||
updates_store: store::Updates,
|
||||
updates_results_store: store::UpdatesResults,
|
||||
updates_notifier: crossbeam_channel::Sender<()>,
|
||||
updates_notifier: UpdateEventsEmitter,
|
||||
) -> DocumentsDeletion {
|
||||
DocumentsDeletion {
|
||||
updates_store,
|
||||
@ -50,7 +51,7 @@ impl DocumentsDeletion {
|
||||
}
|
||||
|
||||
pub fn finalize(self, writer: &mut heed::RwTxn) -> MResult<u64> {
|
||||
let _ = self.updates_notifier.send(());
|
||||
let _ = self.updates_notifier.send(UpdateEvent::NewUpdate);
|
||||
let update_id = push_documents_deletion(
|
||||
writer,
|
||||
self.updates_store,
|
||||
@ -75,7 +76,7 @@ pub fn push_documents_deletion(
|
||||
) -> MResult<u64> {
|
||||
let last_update_id = next_update_id(writer, updates_store, updates_results_store)?;
|
||||
|
||||
let update = Update::DocumentsDeletion(deletion);
|
||||
let update = Update::documents_deletion(deletion);
|
||||
updates_store.put_update(writer, last_update_id, &update)?;
|
||||
|
||||
Ok(last_update_id)
|
||||
|
@ -3,21 +3,28 @@ mod customs_update;
|
||||
mod documents_addition;
|
||||
mod documents_deletion;
|
||||
mod schema_update;
|
||||
mod stop_words_addition;
|
||||
mod stop_words_deletion;
|
||||
mod synonyms_addition;
|
||||
mod synonyms_deletion;
|
||||
|
||||
pub use self::clear_all::{apply_clear_all, push_clear_all};
|
||||
pub use self::customs_update::{apply_customs_update, push_customs_update};
|
||||
pub use self::documents_addition::{apply_documents_addition, DocumentsAddition};
|
||||
pub use self::documents_addition::{
|
||||
apply_documents_addition, apply_documents_partial_addition, DocumentsAddition,
|
||||
};
|
||||
pub use self::documents_deletion::{apply_documents_deletion, DocumentsDeletion};
|
||||
pub use self::schema_update::{apply_schema_update, push_schema_update};
|
||||
pub use self::stop_words_addition::{apply_stop_words_addition, StopWordsAddition};
|
||||
pub use self::stop_words_deletion::{apply_stop_words_deletion, StopWordsDeletion};
|
||||
pub use self::synonyms_addition::{apply_synonyms_addition, SynonymsAddition};
|
||||
pub use self::synonyms_deletion::{apply_synonyms_deletion, SynonymsDeletion};
|
||||
|
||||
use std::cmp;
|
||||
use std::collections::BTreeMap;
|
||||
use std::time::{Duration, Instant};
|
||||
use std::collections::{BTreeMap, BTreeSet, HashMap};
|
||||
use std::time::Instant;
|
||||
|
||||
use chrono::{DateTime, Utc};
|
||||
use heed::Result as ZResult;
|
||||
use log::debug;
|
||||
use serde::{Deserialize, Serialize};
|
||||
@ -26,45 +33,173 @@ use crate::{store, DocumentId, MResult};
|
||||
use meilidb_schema::Schema;
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub enum Update {
|
||||
ClearAll,
|
||||
Schema(Schema),
|
||||
Customs(Vec<u8>),
|
||||
DocumentsAddition(Vec<serde_json::Value>),
|
||||
DocumentsDeletion(Vec<DocumentId>),
|
||||
SynonymsAddition(BTreeMap<String, Vec<String>>),
|
||||
SynonymsDeletion(BTreeMap<String, Option<Vec<String>>>),
|
||||
pub struct Update {
|
||||
data: UpdateData,
|
||||
enqueued_at: DateTime<Utc>,
|
||||
}
|
||||
|
||||
impl Update {
|
||||
fn clear_all() -> Update {
|
||||
Update {
|
||||
data: UpdateData::ClearAll,
|
||||
enqueued_at: Utc::now(),
|
||||
}
|
||||
}
|
||||
|
||||
fn schema(data: Schema) -> Update {
|
||||
Update {
|
||||
data: UpdateData::Schema(data),
|
||||
enqueued_at: Utc::now(),
|
||||
}
|
||||
}
|
||||
|
||||
fn customs(data: Vec<u8>) -> Update {
|
||||
Update {
|
||||
data: UpdateData::Customs(data),
|
||||
enqueued_at: Utc::now(),
|
||||
}
|
||||
}
|
||||
|
||||
fn documents_addition(data: Vec<HashMap<String, serde_json::Value>>) -> Update {
|
||||
Update {
|
||||
data: UpdateData::DocumentsAddition(data),
|
||||
enqueued_at: Utc::now(),
|
||||
}
|
||||
}
|
||||
|
||||
fn documents_partial(data: Vec<HashMap<String, serde_json::Value>>) -> Update {
|
||||
Update {
|
||||
data: UpdateData::DocumentsPartial(data),
|
||||
enqueued_at: Utc::now(),
|
||||
}
|
||||
}
|
||||
|
||||
fn documents_deletion(data: Vec<DocumentId>) -> Update {
|
||||
Update {
|
||||
data: UpdateData::DocumentsDeletion(data),
|
||||
enqueued_at: Utc::now(),
|
||||
}
|
||||
}
|
||||
|
||||
fn synonyms_addition(data: BTreeMap<String, Vec<String>>) -> Update {
|
||||
Update {
|
||||
data: UpdateData::SynonymsAddition(data),
|
||||
enqueued_at: Utc::now(),
|
||||
}
|
||||
}
|
||||
|
||||
fn synonyms_deletion(data: BTreeMap<String, Option<Vec<String>>>) -> Update {
|
||||
Update {
|
||||
data: UpdateData::SynonymsDeletion(data),
|
||||
enqueued_at: Utc::now(),
|
||||
}
|
||||
}
|
||||
|
||||
fn stop_words_addition(data: BTreeSet<String>) -> Update {
|
||||
Update {
|
||||
data: UpdateData::StopWordsAddition(data),
|
||||
enqueued_at: Utc::now(),
|
||||
}
|
||||
}
|
||||
|
||||
fn stop_words_deletion(data: BTreeSet<String>) -> Update {
|
||||
Update {
|
||||
data: UpdateData::StopWordsDeletion(data),
|
||||
enqueued_at: Utc::now(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub enum UpdateData {
|
||||
ClearAll,
|
||||
Schema(Schema),
|
||||
Customs(Vec<u8>),
|
||||
DocumentsAddition(Vec<HashMap<String, serde_json::Value>>),
|
||||
DocumentsPartial(Vec<HashMap<String, serde_json::Value>>),
|
||||
DocumentsDeletion(Vec<DocumentId>),
|
||||
SynonymsAddition(BTreeMap<String, Vec<String>>),
|
||||
SynonymsDeletion(BTreeMap<String, Option<Vec<String>>>),
|
||||
StopWordsAddition(BTreeSet<String>),
|
||||
StopWordsDeletion(BTreeSet<String>),
|
||||
}
|
||||
|
||||
impl UpdateData {
|
||||
pub fn update_type(&self) -> UpdateType {
|
||||
match self {
|
||||
UpdateData::ClearAll => UpdateType::ClearAll,
|
||||
UpdateData::Schema(_) => UpdateType::Schema,
|
||||
UpdateData::Customs(_) => UpdateType::Customs,
|
||||
UpdateData::DocumentsAddition(addition) => UpdateType::DocumentsAddition {
|
||||
number: addition.len(),
|
||||
},
|
||||
UpdateData::DocumentsPartial(addition) => UpdateType::DocumentsPartial {
|
||||
number: addition.len(),
|
||||
},
|
||||
UpdateData::DocumentsDeletion(deletion) => UpdateType::DocumentsDeletion {
|
||||
number: deletion.len(),
|
||||
},
|
||||
UpdateData::SynonymsAddition(addition) => UpdateType::SynonymsAddition {
|
||||
number: addition.len(),
|
||||
},
|
||||
UpdateData::SynonymsDeletion(deletion) => UpdateType::SynonymsDeletion {
|
||||
number: deletion.len(),
|
||||
},
|
||||
UpdateData::StopWordsAddition(addition) => UpdateType::StopWordsAddition {
|
||||
number: addition.len(),
|
||||
},
|
||||
UpdateData::StopWordsDeletion(deletion) => UpdateType::StopWordsDeletion {
|
||||
number: deletion.len(),
|
||||
},
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
#[serde(tag = "name")]
|
||||
pub enum UpdateType {
|
||||
ClearAll,
|
||||
Schema { schema: Schema },
|
||||
Schema,
|
||||
Customs,
|
||||
DocumentsAddition { number: usize },
|
||||
DocumentsPartial { number: usize },
|
||||
DocumentsDeletion { number: usize },
|
||||
SynonymsAddition { number: usize },
|
||||
SynonymsDeletion { number: usize },
|
||||
StopWordsAddition { number: usize },
|
||||
StopWordsDeletion { number: usize },
|
||||
}
|
||||
|
||||
#[derive(Clone, Serialize, Deserialize)]
|
||||
pub struct DetailedDuration {
|
||||
pub main: Duration,
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct ProcessedUpdateResult {
|
||||
pub update_id: u64,
|
||||
#[serde(rename = "type")]
|
||||
pub update_type: UpdateType,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub error: Option<String>,
|
||||
pub duration: f64, // in seconds
|
||||
pub enqueued_at: DateTime<Utc>,
|
||||
pub processed_at: DateTime<Utc>,
|
||||
}
|
||||
|
||||
#[derive(Clone, Serialize, Deserialize)]
|
||||
pub struct UpdateResult {
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct EnqueuedUpdateResult {
|
||||
pub update_id: u64,
|
||||
pub update_type: UpdateType,
|
||||
pub result: Result<(), String>,
|
||||
pub detailed_duration: DetailedDuration,
|
||||
pub enqueued_at: DateTime<Utc>,
|
||||
}
|
||||
|
||||
#[derive(Clone, Serialize, Deserialize)]
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
#[serde(rename_all = "camelCase", tag = "status")]
|
||||
pub enum UpdateStatus {
|
||||
Enqueued,
|
||||
Processed(UpdateResult),
|
||||
Unknown,
|
||||
Enqueued {
|
||||
#[serde(flatten)]
|
||||
content: EnqueuedUpdateResult,
|
||||
},
|
||||
Processed {
|
||||
#[serde(flatten)]
|
||||
content: ProcessedUpdateResult,
|
||||
},
|
||||
}
|
||||
|
||||
pub fn update_status(
|
||||
@ -72,16 +207,19 @@ pub fn update_status(
|
||||
updates_store: store::Updates,
|
||||
updates_results_store: store::UpdatesResults,
|
||||
update_id: u64,
|
||||
) -> MResult<UpdateStatus> {
|
||||
) -> MResult<Option<UpdateStatus>> {
|
||||
match updates_results_store.update_result(reader, update_id)? {
|
||||
Some(result) => Ok(UpdateStatus::Processed(result)),
|
||||
None => {
|
||||
if updates_store.contains(reader, update_id)? {
|
||||
Ok(UpdateStatus::Enqueued)
|
||||
} else {
|
||||
Ok(UpdateStatus::Unknown)
|
||||
}
|
||||
}
|
||||
Some(result) => Ok(Some(UpdateStatus::Processed { content: result })),
|
||||
None => match updates_store.get(reader, update_id)? {
|
||||
Some(update) => Ok(Some(UpdateStatus::Enqueued {
|
||||
content: EnqueuedUpdateResult {
|
||||
update_id,
|
||||
update_type: update.data.update_type(),
|
||||
enqueued_at: update.enqueued_at,
|
||||
},
|
||||
})),
|
||||
None => Ok(None),
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
@ -102,16 +240,18 @@ pub fn next_update_id(
|
||||
Ok(new_update_id)
|
||||
}
|
||||
|
||||
pub fn update_task(writer: &mut heed::RwTxn, index: store::Index) -> MResult<Option<UpdateResult>> {
|
||||
let (update_id, update) = match index.updates.pop_front(writer)? {
|
||||
Some(value) => value,
|
||||
None => return Ok(None),
|
||||
};
|
||||
|
||||
pub fn update_task<'a, 'b>(
|
||||
writer: &'a mut heed::RwTxn<'b>,
|
||||
index: store::Index,
|
||||
update_id: u64,
|
||||
update: Update,
|
||||
) -> MResult<ProcessedUpdateResult> {
|
||||
debug!("Processing update number {}", update_id);
|
||||
|
||||
let (update_type, result, duration) = match update {
|
||||
Update::ClearAll => {
|
||||
let Update { enqueued_at, data } = update;
|
||||
|
||||
let (update_type, result, duration) = match data {
|
||||
UpdateData::ClearAll => {
|
||||
let start = Instant::now();
|
||||
|
||||
let update_type = UpdateType::ClearAll;
|
||||
@ -126,12 +266,10 @@ pub fn update_task(writer: &mut heed::RwTxn, index: store::Index) -> MResult<Opt
|
||||
|
||||
(update_type, result, start.elapsed())
|
||||
}
|
||||
Update::Schema(schema) => {
|
||||
UpdateData::Schema(schema) => {
|
||||
let start = Instant::now();
|
||||
|
||||
let update_type = UpdateType::Schema {
|
||||
schema: schema.clone(),
|
||||
};
|
||||
let update_type = UpdateType::Schema;
|
||||
let result = apply_schema_update(
|
||||
writer,
|
||||
&schema,
|
||||
@ -144,7 +282,7 @@ pub fn update_task(writer: &mut heed::RwTxn, index: store::Index) -> MResult<Opt
|
||||
|
||||
(update_type, result, start.elapsed())
|
||||
}
|
||||
Update::Customs(customs) => {
|
||||
UpdateData::Customs(customs) => {
|
||||
let start = Instant::now();
|
||||
|
||||
let update_type = UpdateType::Customs;
|
||||
@ -152,7 +290,7 @@ pub fn update_task(writer: &mut heed::RwTxn, index: store::Index) -> MResult<Opt
|
||||
|
||||
(update_type, result, start.elapsed())
|
||||
}
|
||||
Update::DocumentsAddition(documents) => {
|
||||
UpdateData::DocumentsAddition(documents) => {
|
||||
let start = Instant::now();
|
||||
|
||||
let update_type = UpdateType::DocumentsAddition {
|
||||
@ -171,7 +309,26 @@ pub fn update_task(writer: &mut heed::RwTxn, index: store::Index) -> MResult<Opt
|
||||
|
||||
(update_type, result, start.elapsed())
|
||||
}
|
||||
Update::DocumentsDeletion(documents) => {
|
||||
UpdateData::DocumentsPartial(documents) => {
|
||||
let start = Instant::now();
|
||||
|
||||
let update_type = UpdateType::DocumentsPartial {
|
||||
number: documents.len(),
|
||||
};
|
||||
|
||||
let result = apply_documents_partial_addition(
|
||||
writer,
|
||||
index.main,
|
||||
index.documents_fields,
|
||||
index.documents_fields_counts,
|
||||
index.postings_lists,
|
||||
index.docs_words,
|
||||
documents,
|
||||
);
|
||||
|
||||
(update_type, result, start.elapsed())
|
||||
}
|
||||
UpdateData::DocumentsDeletion(documents) => {
|
||||
let start = Instant::now();
|
||||
|
||||
let update_type = UpdateType::DocumentsDeletion {
|
||||
@ -190,7 +347,7 @@ pub fn update_task(writer: &mut heed::RwTxn, index: store::Index) -> MResult<Opt
|
||||
|
||||
(update_type, result, start.elapsed())
|
||||
}
|
||||
Update::SynonymsAddition(synonyms) => {
|
||||
UpdateData::SynonymsAddition(synonyms) => {
|
||||
let start = Instant::now();
|
||||
|
||||
let update_type = UpdateType::SynonymsAddition {
|
||||
@ -201,7 +358,7 @@ pub fn update_task(writer: &mut heed::RwTxn, index: store::Index) -> MResult<Opt
|
||||
|
||||
(update_type, result, start.elapsed())
|
||||
}
|
||||
Update::SynonymsDeletion(synonyms) => {
|
||||
UpdateData::SynonymsDeletion(synonyms) => {
|
||||
let start = Instant::now();
|
||||
|
||||
let update_type = UpdateType::SynonymsDeletion {
|
||||
@ -210,6 +367,37 @@ pub fn update_task(writer: &mut heed::RwTxn, index: store::Index) -> MResult<Opt
|
||||
|
||||
let result = apply_synonyms_deletion(writer, index.main, index.synonyms, synonyms);
|
||||
|
||||
(update_type, result, start.elapsed())
|
||||
}
|
||||
UpdateData::StopWordsAddition(stop_words) => {
|
||||
let start = Instant::now();
|
||||
|
||||
let update_type = UpdateType::StopWordsAddition {
|
||||
number: stop_words.len(),
|
||||
};
|
||||
|
||||
let result =
|
||||
apply_stop_words_addition(writer, index.main, index.postings_lists, stop_words);
|
||||
|
||||
(update_type, result, start.elapsed())
|
||||
}
|
||||
UpdateData::StopWordsDeletion(stop_words) => {
|
||||
let start = Instant::now();
|
||||
|
||||
let update_type = UpdateType::StopWordsDeletion {
|
||||
number: stop_words.len(),
|
||||
};
|
||||
|
||||
let result = apply_stop_words_deletion(
|
||||
writer,
|
||||
index.main,
|
||||
index.documents_fields,
|
||||
index.documents_fields_counts,
|
||||
index.postings_lists,
|
||||
index.docs_words,
|
||||
stop_words,
|
||||
);
|
||||
|
||||
(update_type, result, start.elapsed())
|
||||
}
|
||||
};
|
||||
@ -219,17 +407,14 @@ pub fn update_task(writer: &mut heed::RwTxn, index: store::Index) -> MResult<Opt
|
||||
update_id, update_type, result
|
||||
);
|
||||
|
||||
let detailed_duration = DetailedDuration { main: duration };
|
||||
let status = UpdateResult {
|
||||
let status = ProcessedUpdateResult {
|
||||
update_id,
|
||||
update_type,
|
||||
result: result.map_err(|e| e.to_string()),
|
||||
detailed_duration,
|
||||
error: result.map_err(|e| e.to_string()).err(),
|
||||
duration: duration.as_secs_f64(),
|
||||
enqueued_at,
|
||||
processed_at: Utc::now(),
|
||||
};
|
||||
|
||||
index
|
||||
.updates_results
|
||||
.put_update_result(writer, update_id, &status)?;
|
||||
|
||||
Ok(Some(status))
|
||||
Ok(status)
|
||||
}
|
||||
|
@ -14,7 +14,7 @@ pub fn apply_schema_update(
|
||||
docs_words_store: store::DocsWords,
|
||||
) -> MResult<()> {
|
||||
use UnsupportedOperation::{
|
||||
CannotIntroduceNewSchemaAttribute, CannotRemoveSchemaAttribute,
|
||||
CanOnlyIntroduceNewSchemaAttributesAtEnd, CannotRemoveSchemaAttribute,
|
||||
CannotReorderSchemaAttribute, CannotUpdateSchemaIdentifier,
|
||||
};
|
||||
|
||||
@ -33,7 +33,12 @@ pub fn apply_schema_update(
|
||||
need_full_reindexing = true;
|
||||
}
|
||||
}
|
||||
Diff::NewAttr { .. } => return Err(CannotIntroduceNewSchemaAttribute.into()),
|
||||
Diff::NewAttr { pos, .. } => {
|
||||
// new attribute not at the end of the schema
|
||||
if pos < old_schema.number_of_attributes() {
|
||||
return Err(CanOnlyIntroduceNewSchemaAttributesAtEnd.into());
|
||||
}
|
||||
}
|
||||
Diff::RemovedAttr { .. } => return Err(CannotRemoveSchemaAttribute.into()),
|
||||
}
|
||||
}
|
||||
@ -63,7 +68,7 @@ pub fn push_schema_update(
|
||||
) -> MResult<u64> {
|
||||
let last_update_id = next_update_id(writer, updates_store, updates_results_store)?;
|
||||
|
||||
let update = Update::Schema(schema);
|
||||
let update = Update::schema(schema);
|
||||
updates_store.put_update(writer, last_update_id, &update)?;
|
||||
|
||||
Ok(last_update_id)
|
||||
|
117
meilidb-core/src/update/stop_words_addition.rs
Normal file
117
meilidb-core/src/update/stop_words_addition.rs
Normal file
@ -0,0 +1,117 @@
|
||||
use std::collections::BTreeSet;
|
||||
|
||||
use fst::{set::OpBuilder, SetBuilder};
|
||||
|
||||
use crate::automaton::normalize_str;
|
||||
use crate::database::{UpdateEvent, UpdateEventsEmitter};
|
||||
use crate::update::{next_update_id, Update};
|
||||
use crate::{store, MResult};
|
||||
|
||||
pub struct StopWordsAddition {
|
||||
updates_store: store::Updates,
|
||||
updates_results_store: store::UpdatesResults,
|
||||
updates_notifier: UpdateEventsEmitter,
|
||||
stop_words: BTreeSet<String>,
|
||||
}
|
||||
|
||||
impl StopWordsAddition {
|
||||
pub fn new(
|
||||
updates_store: store::Updates,
|
||||
updates_results_store: store::UpdatesResults,
|
||||
updates_notifier: UpdateEventsEmitter,
|
||||
) -> StopWordsAddition {
|
||||
StopWordsAddition {
|
||||
updates_store,
|
||||
updates_results_store,
|
||||
updates_notifier,
|
||||
stop_words: BTreeSet::new(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn add_stop_word<S: AsRef<str>>(&mut self, stop_word: S) {
|
||||
let stop_word = normalize_str(stop_word.as_ref());
|
||||
self.stop_words.insert(stop_word);
|
||||
}
|
||||
|
||||
pub fn finalize(self, writer: &mut heed::RwTxn) -> MResult<u64> {
|
||||
let _ = self.updates_notifier.send(UpdateEvent::NewUpdate);
|
||||
let update_id = push_stop_words_addition(
|
||||
writer,
|
||||
self.updates_store,
|
||||
self.updates_results_store,
|
||||
self.stop_words,
|
||||
)?;
|
||||
Ok(update_id)
|
||||
}
|
||||
}
|
||||
|
||||
pub fn push_stop_words_addition(
|
||||
writer: &mut heed::RwTxn,
|
||||
updates_store: store::Updates,
|
||||
updates_results_store: store::UpdatesResults,
|
||||
addition: BTreeSet<String>,
|
||||
) -> MResult<u64> {
|
||||
let last_update_id = next_update_id(writer, updates_store, updates_results_store)?;
|
||||
|
||||
let update = Update::stop_words_addition(addition);
|
||||
updates_store.put_update(writer, last_update_id, &update)?;
|
||||
|
||||
Ok(last_update_id)
|
||||
}
|
||||
|
||||
pub fn apply_stop_words_addition(
|
||||
writer: &mut heed::RwTxn,
|
||||
main_store: store::Main,
|
||||
postings_lists_store: store::PostingsLists,
|
||||
addition: BTreeSet<String>,
|
||||
) -> MResult<()> {
|
||||
let mut stop_words_builder = SetBuilder::memory();
|
||||
|
||||
for word in addition {
|
||||
stop_words_builder.insert(&word).unwrap();
|
||||
// we remove every posting list associated to a new stop word
|
||||
postings_lists_store.del_postings_list(writer, word.as_bytes())?;
|
||||
}
|
||||
|
||||
// create the new delta stop words fst
|
||||
let delta_stop_words = stop_words_builder
|
||||
.into_inner()
|
||||
.and_then(fst::Set::from_bytes)
|
||||
.unwrap();
|
||||
|
||||
// we also need to remove all the stop words from the main fst
|
||||
if let Some(word_fst) = main_store.words_fst(writer)? {
|
||||
let op = OpBuilder::new()
|
||||
.add(&word_fst)
|
||||
.add(&delta_stop_words)
|
||||
.difference();
|
||||
|
||||
let mut word_fst_builder = SetBuilder::memory();
|
||||
word_fst_builder.extend_stream(op).unwrap();
|
||||
let word_fst = word_fst_builder
|
||||
.into_inner()
|
||||
.and_then(fst::Set::from_bytes)
|
||||
.unwrap();
|
||||
|
||||
main_store.put_words_fst(writer, &word_fst)?;
|
||||
}
|
||||
|
||||
// now we add all of these stop words from the main store
|
||||
let stop_words_fst = main_store.stop_words_fst(writer)?.unwrap_or_default();
|
||||
|
||||
let op = OpBuilder::new()
|
||||
.add(&stop_words_fst)
|
||||
.add(&delta_stop_words)
|
||||
.r#union();
|
||||
|
||||
let mut stop_words_builder = SetBuilder::memory();
|
||||
stop_words_builder.extend_stream(op).unwrap();
|
||||
let stop_words_fst = stop_words_builder
|
||||
.into_inner()
|
||||
.and_then(fst::Set::from_bytes)
|
||||
.unwrap();
|
||||
|
||||
main_store.put_stop_words_fst(writer, &stop_words_fst)?;
|
||||
|
||||
Ok(())
|
||||
}
|
113
meilidb-core/src/update/stop_words_deletion.rs
Normal file
113
meilidb-core/src/update/stop_words_deletion.rs
Normal file
@ -0,0 +1,113 @@
|
||||
use std::collections::BTreeSet;
|
||||
|
||||
use fst::{set::OpBuilder, SetBuilder};
|
||||
|
||||
use crate::automaton::normalize_str;
|
||||
use crate::database::{UpdateEvent, UpdateEventsEmitter};
|
||||
use crate::update::documents_addition::reindex_all_documents;
|
||||
use crate::update::{next_update_id, Update};
|
||||
use crate::{store, MResult};
|
||||
|
||||
pub struct StopWordsDeletion {
|
||||
updates_store: store::Updates,
|
||||
updates_results_store: store::UpdatesResults,
|
||||
updates_notifier: UpdateEventsEmitter,
|
||||
stop_words: BTreeSet<String>,
|
||||
}
|
||||
|
||||
impl StopWordsDeletion {
|
||||
pub fn new(
|
||||
updates_store: store::Updates,
|
||||
updates_results_store: store::UpdatesResults,
|
||||
updates_notifier: UpdateEventsEmitter,
|
||||
) -> StopWordsDeletion {
|
||||
StopWordsDeletion {
|
||||
updates_store,
|
||||
updates_results_store,
|
||||
updates_notifier,
|
||||
stop_words: BTreeSet::new(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn delete_stop_word<S: AsRef<str>>(&mut self, stop_word: S) {
|
||||
let stop_word = normalize_str(stop_word.as_ref());
|
||||
self.stop_words.insert(stop_word);
|
||||
}
|
||||
|
||||
pub fn finalize(self, writer: &mut heed::RwTxn) -> MResult<u64> {
|
||||
let _ = self.updates_notifier.send(UpdateEvent::NewUpdate);
|
||||
let update_id = push_stop_words_deletion(
|
||||
writer,
|
||||
self.updates_store,
|
||||
self.updates_results_store,
|
||||
self.stop_words,
|
||||
)?;
|
||||
Ok(update_id)
|
||||
}
|
||||
}
|
||||
|
||||
pub fn push_stop_words_deletion(
|
||||
writer: &mut heed::RwTxn,
|
||||
updates_store: store::Updates,
|
||||
updates_results_store: store::UpdatesResults,
|
||||
deletion: BTreeSet<String>,
|
||||
) -> MResult<u64> {
|
||||
let last_update_id = next_update_id(writer, updates_store, updates_results_store)?;
|
||||
|
||||
let update = Update::stop_words_deletion(deletion);
|
||||
updates_store.put_update(writer, last_update_id, &update)?;
|
||||
|
||||
Ok(last_update_id)
|
||||
}
|
||||
|
||||
pub fn apply_stop_words_deletion(
|
||||
writer: &mut heed::RwTxn,
|
||||
main_store: store::Main,
|
||||
documents_fields_store: store::DocumentsFields,
|
||||
documents_fields_counts_store: store::DocumentsFieldsCounts,
|
||||
postings_lists_store: store::PostingsLists,
|
||||
docs_words_store: store::DocsWords,
|
||||
deletion: BTreeSet<String>,
|
||||
) -> MResult<()> {
|
||||
let mut stop_words_builder = SetBuilder::memory();
|
||||
|
||||
for word in deletion {
|
||||
stop_words_builder.insert(&word).unwrap();
|
||||
}
|
||||
|
||||
// create the new delta stop words fst
|
||||
let delta_stop_words = stop_words_builder
|
||||
.into_inner()
|
||||
.and_then(fst::Set::from_bytes)
|
||||
.unwrap();
|
||||
|
||||
// now we delete all of these stop words from the main store
|
||||
let stop_words_fst = main_store.stop_words_fst(writer)?.unwrap_or_default();
|
||||
|
||||
let op = OpBuilder::new()
|
||||
.add(&stop_words_fst)
|
||||
.add(&delta_stop_words)
|
||||
.difference();
|
||||
|
||||
let mut stop_words_builder = SetBuilder::memory();
|
||||
stop_words_builder.extend_stream(op).unwrap();
|
||||
let stop_words_fst = stop_words_builder
|
||||
.into_inner()
|
||||
.and_then(fst::Set::from_bytes)
|
||||
.unwrap();
|
||||
|
||||
main_store.put_stop_words_fst(writer, &stop_words_fst)?;
|
||||
|
||||
// now that we have setup the stop words
|
||||
// lets reindex everything...
|
||||
reindex_all_documents(
|
||||
writer,
|
||||
main_store,
|
||||
documents_fields_store,
|
||||
documents_fields_counts_store,
|
||||
postings_lists_store,
|
||||
docs_words_store,
|
||||
)?;
|
||||
|
||||
Ok(())
|
||||
}
|
@ -4,13 +4,14 @@ use fst::{set::OpBuilder, SetBuilder};
|
||||
use sdset::SetBuf;
|
||||
|
||||
use crate::automaton::normalize_str;
|
||||
use crate::database::{UpdateEvent, UpdateEventsEmitter};
|
||||
use crate::update::{next_update_id, Update};
|
||||
use crate::{store, MResult};
|
||||
|
||||
pub struct SynonymsAddition {
|
||||
updates_store: store::Updates,
|
||||
updates_results_store: store::UpdatesResults,
|
||||
updates_notifier: crossbeam_channel::Sender<()>,
|
||||
updates_notifier: UpdateEventsEmitter,
|
||||
synonyms: BTreeMap<String, Vec<String>>,
|
||||
}
|
||||
|
||||
@ -18,7 +19,7 @@ impl SynonymsAddition {
|
||||
pub fn new(
|
||||
updates_store: store::Updates,
|
||||
updates_results_store: store::UpdatesResults,
|
||||
updates_notifier: crossbeam_channel::Sender<()>,
|
||||
updates_notifier: UpdateEventsEmitter,
|
||||
) -> SynonymsAddition {
|
||||
SynonymsAddition {
|
||||
updates_store,
|
||||
@ -43,7 +44,7 @@ impl SynonymsAddition {
|
||||
}
|
||||
|
||||
pub fn finalize(self, writer: &mut heed::RwTxn) -> MResult<u64> {
|
||||
let _ = self.updates_notifier.send(());
|
||||
let _ = self.updates_notifier.send(UpdateEvent::NewUpdate);
|
||||
let update_id = push_synonyms_addition(
|
||||
writer,
|
||||
self.updates_store,
|
||||
@ -62,7 +63,7 @@ pub fn push_synonyms_addition(
|
||||
) -> MResult<u64> {
|
||||
let last_update_id = next_update_id(writer, updates_store, updates_results_store)?;
|
||||
|
||||
let update = Update::SynonymsAddition(addition);
|
||||
let update = Update::synonyms_addition(addition);
|
||||
updates_store.put_update(writer, last_update_id, &update)?;
|
||||
|
||||
Ok(last_update_id)
|
||||
|
@ -5,13 +5,14 @@ use fst::{set::OpBuilder, SetBuilder};
|
||||
use sdset::SetBuf;
|
||||
|
||||
use crate::automaton::normalize_str;
|
||||
use crate::database::{UpdateEvent, UpdateEventsEmitter};
|
||||
use crate::update::{next_update_id, Update};
|
||||
use crate::{store, MResult};
|
||||
|
||||
pub struct SynonymsDeletion {
|
||||
updates_store: store::Updates,
|
||||
updates_results_store: store::UpdatesResults,
|
||||
updates_notifier: crossbeam_channel::Sender<()>,
|
||||
updates_notifier: UpdateEventsEmitter,
|
||||
synonyms: BTreeMap<String, Option<Vec<String>>>,
|
||||
}
|
||||
|
||||
@ -19,7 +20,7 @@ impl SynonymsDeletion {
|
||||
pub fn new(
|
||||
updates_store: store::Updates,
|
||||
updates_results_store: store::UpdatesResults,
|
||||
updates_notifier: crossbeam_channel::Sender<()>,
|
||||
updates_notifier: UpdateEventsEmitter,
|
||||
) -> SynonymsDeletion {
|
||||
SynonymsDeletion {
|
||||
updates_store,
|
||||
@ -50,7 +51,7 @@ impl SynonymsDeletion {
|
||||
}
|
||||
|
||||
pub fn finalize(self, writer: &mut heed::RwTxn) -> MResult<u64> {
|
||||
let _ = self.updates_notifier.send(());
|
||||
let _ = self.updates_notifier.send(UpdateEvent::NewUpdate);
|
||||
let update_id = push_synonyms_deletion(
|
||||
writer,
|
||||
self.updates_store,
|
||||
@ -69,7 +70,7 @@ pub fn push_synonyms_deletion(
|
||||
) -> MResult<u64> {
|
||||
let last_update_id = next_update_id(writer, updates_store, updates_results_store)?;
|
||||
|
||||
let update = Update::SynonymsDeletion(deletion);
|
||||
let update = Update::synonyms_deletion(deletion);
|
||||
updates_store.put_update(writer, last_update_id, &update)?;
|
||||
|
||||
Ok(last_update_id)
|
||||
|
56
meilidb-http/Cargo.toml
Normal file
56
meilidb-http/Cargo.toml
Normal file
@ -0,0 +1,56 @@
|
||||
[package]
|
||||
name = "meilidb-http"
|
||||
version = "0.2.0"
|
||||
authors = [
|
||||
"Quentin de Quelen <quentin@dequelen.me>",
|
||||
"Clément Renault <clement@meilisearch.com>",
|
||||
]
|
||||
edition = "2018"
|
||||
|
||||
[dependencies]
|
||||
bincode = "1.2.0"
|
||||
chrono = { version = "0.4.9", features = ["serde"] }
|
||||
crossbeam-channel = "0.4.0"
|
||||
env_logger = "0.7.1"
|
||||
envconfig = "0.5.1"
|
||||
envconfig_derive = "0.5.1"
|
||||
heed = "0.5.0"
|
||||
http = "0.1.19"
|
||||
indexmap = { version = "1.3.0", features = ["serde-1"] }
|
||||
jemallocator = "0.3.2"
|
||||
log = "0.4.8"
|
||||
main_error = "0.1.0"
|
||||
meilidb-core = { path = "../meilidb-core", version = "0.7.0" }
|
||||
meilidb-schema = { path = "../meilidb-schema", version = "0.6.0" }
|
||||
pretty-bytes = "0.2.2"
|
||||
rand = "0.7.2"
|
||||
rayon = "1.2.0"
|
||||
serde = { version = "1.0.101", features = ["derive"] }
|
||||
serde_json = { version = "1.0.41", features = ["preserve_order"] }
|
||||
structopt = "0.3.3"
|
||||
sysinfo = "0.9.5"
|
||||
walkdir = "2.2.9"
|
||||
|
||||
[dependencies.async-compression]
|
||||
default-features = false
|
||||
features = ["stream", "gzip", "zlib", "brotli", "zstd"]
|
||||
version = "=0.1.0-alpha.7"
|
||||
|
||||
[dependencies.tide]
|
||||
git = "https://github.com/rustasync/tide"
|
||||
rev = "e77709370bb24cf776fe6da902467c35131535b1"
|
||||
|
||||
[dependencies.tide-log]
|
||||
git = "https://github.com/rustasync/tide"
|
||||
rev = "e77709370bb24cf776fe6da902467c35131535b1"
|
||||
|
||||
[dependencies.tide-slog]
|
||||
git = "https://github.com/rustasync/tide"
|
||||
rev = "e77709370bb24cf776fe6da902467c35131535b1"
|
||||
|
||||
[dependencies.tide-compression]
|
||||
git = "https://github.com/rustasync/tide"
|
||||
rev = "e77709370bb24cf776fe6da902467c35131535b1"
|
||||
|
||||
[build-dependencies]
|
||||
vergen = "3.0.4"
|
10
meilidb-http/build.rs
Normal file
10
meilidb-http/build.rs
Normal file
@ -0,0 +1,10 @@
|
||||
use vergen::{generate_cargo_keys, ConstantsFlags};
|
||||
|
||||
fn main() {
|
||||
// Setup the flags, toggling off the 'SEMVER_FROM_CARGO_PKG' flag
|
||||
let mut flags = ConstantsFlags::all();
|
||||
flags.toggle(ConstantsFlags::SEMVER_FROM_CARGO_PKG);
|
||||
|
||||
// Generate the 'cargo:' key output
|
||||
generate_cargo_keys(ConstantsFlags::all()).expect("Unable to generate the cargo keys!");
|
||||
}
|
153
meilidb-http/src/data.rs
Normal file
153
meilidb-http/src/data.rs
Normal file
@ -0,0 +1,153 @@
|
||||
use std::collections::HashMap;
|
||||
use std::ops::Deref;
|
||||
use std::sync::Arc;
|
||||
|
||||
use chrono::{DateTime, Utc};
|
||||
use heed::types::{SerdeBincode, Str};
|
||||
use log::*;
|
||||
use meilidb_core::{Database, MResult};
|
||||
use sysinfo::Pid;
|
||||
|
||||
use crate::option::Opt;
|
||||
use crate::routes::index::index_update_callback;
|
||||
|
||||
pub type FreqsMap = HashMap<String, usize>;
|
||||
type SerdeFreqsMap = SerdeBincode<FreqsMap>;
|
||||
type SerdeDatetime = SerdeBincode<DateTime<Utc>>;
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct Data {
|
||||
inner: Arc<DataInner>,
|
||||
}
|
||||
|
||||
impl Deref for Data {
|
||||
type Target = DataInner;
|
||||
|
||||
fn deref(&self) -> &Self::Target {
|
||||
&self.inner
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct DataInner {
|
||||
pub db: Arc<Database>,
|
||||
pub db_path: String,
|
||||
pub admin_token: Option<String>,
|
||||
pub server_pid: Pid,
|
||||
}
|
||||
|
||||
impl DataInner {
|
||||
pub fn is_indexing(&self, reader: &heed::RoTxn, index: &str) -> MResult<Option<bool>> {
|
||||
match self.db.open_index(&index) {
|
||||
Some(index) => index.current_update_id(&reader).map(|u| Some(u.is_some())),
|
||||
None => Ok(None),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn last_update(
|
||||
&self,
|
||||
reader: &heed::RoTxn,
|
||||
index_name: &str,
|
||||
) -> MResult<Option<DateTime<Utc>>> {
|
||||
let key = format!("last-update-{}", index_name);
|
||||
match self
|
||||
.db
|
||||
.common_store()
|
||||
.get::<Str, SerdeDatetime>(&reader, &key)?
|
||||
{
|
||||
Some(datetime) => Ok(Some(datetime)),
|
||||
None => Ok(None),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn set_last_update(&self, writer: &mut heed::RwTxn, index_name: &str) -> MResult<()> {
|
||||
let key = format!("last-update-{}", index_name);
|
||||
self.db
|
||||
.common_store()
|
||||
.put::<Str, SerdeDatetime>(writer, &key, &Utc::now())
|
||||
.map_err(Into::into)
|
||||
}
|
||||
|
||||
pub fn fields_frequency(
|
||||
&self,
|
||||
reader: &heed::RoTxn,
|
||||
index_name: &str,
|
||||
) -> MResult<Option<FreqsMap>> {
|
||||
let key = format!("fields-frequency-{}", index_name);
|
||||
match self
|
||||
.db
|
||||
.common_store()
|
||||
.get::<Str, SerdeFreqsMap>(&reader, &key)?
|
||||
{
|
||||
Some(freqs) => Ok(Some(freqs)),
|
||||
None => Ok(None),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn compute_stats(&self, writer: &mut heed::RwTxn, index_name: &str) -> MResult<()> {
|
||||
let index = match self.db.open_index(&index_name) {
|
||||
Some(index) => index,
|
||||
None => {
|
||||
error!("Impossible to retrieve index {}", index_name);
|
||||
return Ok(());
|
||||
}
|
||||
};
|
||||
|
||||
let schema = match index.main.schema(&writer)? {
|
||||
Some(schema) => schema,
|
||||
None => return Ok(()),
|
||||
};
|
||||
|
||||
let all_documents_fields = index
|
||||
.documents_fields_counts
|
||||
.all_documents_fields_counts(&writer)?;
|
||||
|
||||
// count fields frequencies
|
||||
let mut fields_frequency = HashMap::<_, usize>::new();
|
||||
for result in all_documents_fields {
|
||||
let (_, attr, _) = result?;
|
||||
*fields_frequency.entry(attr).or_default() += 1;
|
||||
}
|
||||
|
||||
// convert attributes to their names
|
||||
let frequency: HashMap<_, _> = fields_frequency
|
||||
.into_iter()
|
||||
.map(|(a, c)| (schema.attribute_name(a).to_owned(), c))
|
||||
.collect();
|
||||
|
||||
let key = format!("fields-frequency-{}", index_name);
|
||||
self.db
|
||||
.common_store()
|
||||
.put::<Str, SerdeFreqsMap>(writer, &key, &frequency)?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
impl Data {
|
||||
pub fn new(opt: Opt) -> Data {
|
||||
let db_path = opt.database_path.clone();
|
||||
let admin_token = opt.admin_token.clone();
|
||||
let server_pid = sysinfo::get_current_pid().unwrap();
|
||||
|
||||
let db = Arc::new(Database::open_or_create(opt.database_path.clone()).unwrap());
|
||||
|
||||
let inner_data = DataInner {
|
||||
db: db.clone(),
|
||||
db_path,
|
||||
admin_token,
|
||||
server_pid,
|
||||
};
|
||||
|
||||
let data = Data {
|
||||
inner: Arc::new(inner_data),
|
||||
};
|
||||
|
||||
let callback_context = data.clone();
|
||||
db.set_update_callback(Box::new(move |index_name, status| {
|
||||
index_update_callback(&index_name, &callback_context, status);
|
||||
}));
|
||||
|
||||
data
|
||||
}
|
||||
}
|
126
meilidb-http/src/error.rs
Normal file
126
meilidb-http/src/error.rs
Normal file
@ -0,0 +1,126 @@
|
||||
use std::fmt::Display;
|
||||
|
||||
use http::status::StatusCode;
|
||||
use log::{error, warn};
|
||||
use serde::{Deserialize, Serialize};
|
||||
use tide::response::IntoResponse;
|
||||
use tide::Response;
|
||||
|
||||
pub type SResult<T> = Result<T, ResponseError>;
|
||||
|
||||
pub enum ResponseError {
|
||||
Internal(String),
|
||||
BadRequest(String),
|
||||
InvalidToken(String),
|
||||
NotFound(String),
|
||||
IndexNotFound(String),
|
||||
DocumentNotFound(String),
|
||||
MissingHeader(String),
|
||||
BadParameter(String, String),
|
||||
OpenIndex(String),
|
||||
CreateIndex(String),
|
||||
Maintenance,
|
||||
}
|
||||
|
||||
impl ResponseError {
|
||||
pub fn internal(message: impl Display) -> ResponseError {
|
||||
ResponseError::Internal(message.to_string())
|
||||
}
|
||||
|
||||
pub fn bad_request(message: impl Display) -> ResponseError {
|
||||
ResponseError::BadRequest(message.to_string())
|
||||
}
|
||||
|
||||
pub fn invalid_token(message: impl Display) -> ResponseError {
|
||||
ResponseError::InvalidToken(message.to_string())
|
||||
}
|
||||
|
||||
pub fn not_found(message: impl Display) -> ResponseError {
|
||||
ResponseError::NotFound(message.to_string())
|
||||
}
|
||||
|
||||
pub fn index_not_found(message: impl Display) -> ResponseError {
|
||||
ResponseError::IndexNotFound(message.to_string())
|
||||
}
|
||||
|
||||
pub fn document_not_found(message: impl Display) -> ResponseError {
|
||||
ResponseError::DocumentNotFound(message.to_string())
|
||||
}
|
||||
|
||||
pub fn missing_header(message: impl Display) -> ResponseError {
|
||||
ResponseError::MissingHeader(message.to_string())
|
||||
}
|
||||
|
||||
pub fn bad_parameter(name: impl Display, message: impl Display) -> ResponseError {
|
||||
ResponseError::BadParameter(name.to_string(), message.to_string())
|
||||
}
|
||||
|
||||
pub fn open_index(message: impl Display) -> ResponseError {
|
||||
ResponseError::OpenIndex(message.to_string())
|
||||
}
|
||||
|
||||
pub fn create_index(message: impl Display) -> ResponseError {
|
||||
ResponseError::CreateIndex(message.to_string())
|
||||
}
|
||||
}
|
||||
|
||||
impl IntoResponse for ResponseError {
|
||||
fn into_response(self) -> Response {
|
||||
match self {
|
||||
ResponseError::Internal(err) => {
|
||||
error!("internal server error: {}", err);
|
||||
error(
|
||||
String::from("Internal server error"),
|
||||
StatusCode::INTERNAL_SERVER_ERROR,
|
||||
)
|
||||
}
|
||||
ResponseError::BadRequest(err) => {
|
||||
warn!("bad request: {}", err);
|
||||
error(err, StatusCode::BAD_REQUEST)
|
||||
}
|
||||
ResponseError::InvalidToken(err) => {
|
||||
error(format!("Invalid Token: {}", err), StatusCode::FORBIDDEN)
|
||||
}
|
||||
ResponseError::NotFound(err) => error(err, StatusCode::NOT_FOUND),
|
||||
ResponseError::IndexNotFound(index) => {
|
||||
error(format!("Index {} not found", index), StatusCode::NOT_FOUND)
|
||||
}
|
||||
ResponseError::DocumentNotFound(id) => error(
|
||||
format!("Document with id {} not found", id),
|
||||
StatusCode::NOT_FOUND,
|
||||
),
|
||||
ResponseError::MissingHeader(header) => error(
|
||||
format!("Header {} is missing", header),
|
||||
StatusCode::UNAUTHORIZED,
|
||||
),
|
||||
ResponseError::BadParameter(param, e) => error(
|
||||
format!("Url parameter {} error: {}", param, e),
|
||||
StatusCode::BAD_REQUEST,
|
||||
),
|
||||
ResponseError::CreateIndex(err) => error(
|
||||
format!("Impossible to create index; {}", err),
|
||||
StatusCode::BAD_REQUEST,
|
||||
),
|
||||
ResponseError::OpenIndex(err) => error(
|
||||
format!("Impossible to open index; {}", err),
|
||||
StatusCode::BAD_REQUEST,
|
||||
),
|
||||
ResponseError::Maintenance => error(
|
||||
String::from("Server is in maintenance, please try again later"),
|
||||
StatusCode::SERVICE_UNAVAILABLE,
|
||||
),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize)]
|
||||
struct ErrorMessage {
|
||||
message: String,
|
||||
}
|
||||
|
||||
fn error(message: String, status: StatusCode) -> Response {
|
||||
let message = ErrorMessage { message };
|
||||
tide::response::json(message)
|
||||
.with_status(status)
|
||||
.into_response()
|
||||
}
|
570
meilidb-http/src/helpers/meilidb.rs
Normal file
570
meilidb-http/src/helpers/meilidb.rs
Normal file
@ -0,0 +1,570 @@
|
||||
use crate::routes::setting::{RankingOrdering, SettingBody};
|
||||
use indexmap::IndexMap;
|
||||
use log::*;
|
||||
use meilidb_core::criterion::*;
|
||||
use meilidb_core::Highlight;
|
||||
use meilidb_core::{Index, RankedMap};
|
||||
use meilidb_schema::{Schema, SchemaAttr};
|
||||
use serde::{Deserialize, Serialize};
|
||||
use serde_json::Value;
|
||||
use std::cmp::Ordering;
|
||||
use std::collections::{HashMap, HashSet};
|
||||
use std::convert::From;
|
||||
use std::error;
|
||||
use std::fmt;
|
||||
use std::time::{Duration, Instant};
|
||||
|
||||
#[derive(Debug)]
|
||||
pub enum Error {
|
||||
SearchDocuments(String),
|
||||
RetrieveDocument(u64, String),
|
||||
DocumentNotFound(u64),
|
||||
CropFieldWrongType(String),
|
||||
AttributeNotFoundOnDocument(String),
|
||||
AttributeNotFoundOnSchema(String),
|
||||
MissingFilterValue,
|
||||
UnknownFilteredAttribute,
|
||||
Internal(String),
|
||||
}
|
||||
|
||||
impl error::Error for Error {}
|
||||
|
||||
impl fmt::Display for Error {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
use Error::*;
|
||||
|
||||
match self {
|
||||
SearchDocuments(err) => write!(f, "impossible to search documents; {}", err),
|
||||
RetrieveDocument(id, err) => write!(
|
||||
f,
|
||||
"impossible to retrieve the document with id: {}; {}",
|
||||
id, err
|
||||
),
|
||||
DocumentNotFound(id) => write!(f, "document {} not found", id),
|
||||
CropFieldWrongType(field) => {
|
||||
write!(f, "the field {} cannot be cropped it's not a string", field)
|
||||
}
|
||||
AttributeNotFoundOnDocument(field) => {
|
||||
write!(f, "field {} is not found on document", field)
|
||||
}
|
||||
AttributeNotFoundOnSchema(field) => write!(f, "field {} is not found on schema", field),
|
||||
MissingFilterValue => f.write_str("a filter doesn't have a value to compare it with"),
|
||||
UnknownFilteredAttribute => {
|
||||
f.write_str("a filter is specifying an unknown schema attribute")
|
||||
}
|
||||
Internal(err) => write!(f, "internal error; {}", err),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<meilidb_core::Error> for Error {
|
||||
fn from(error: meilidb_core::Error) -> Self {
|
||||
Error::Internal(error.to_string())
|
||||
}
|
||||
}
|
||||
|
||||
pub trait IndexSearchExt {
|
||||
fn new_search(&self, query: String) -> SearchBuilder;
|
||||
}
|
||||
|
||||
impl IndexSearchExt for Index {
|
||||
fn new_search(&self, query: String) -> SearchBuilder {
|
||||
SearchBuilder {
|
||||
index: self,
|
||||
query,
|
||||
offset: 0,
|
||||
limit: 20,
|
||||
attributes_to_crop: None,
|
||||
attributes_to_retrieve: None,
|
||||
attributes_to_search_in: None,
|
||||
attributes_to_highlight: None,
|
||||
filters: None,
|
||||
timeout: Duration::from_millis(30),
|
||||
matches: false,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub struct SearchBuilder<'a> {
|
||||
index: &'a Index,
|
||||
query: String,
|
||||
offset: usize,
|
||||
limit: usize,
|
||||
attributes_to_crop: Option<HashMap<String, usize>>,
|
||||
attributes_to_retrieve: Option<HashSet<String>>,
|
||||
attributes_to_search_in: Option<HashSet<String>>,
|
||||
attributes_to_highlight: Option<HashSet<String>>,
|
||||
filters: Option<String>,
|
||||
timeout: Duration,
|
||||
matches: bool,
|
||||
}
|
||||
|
||||
impl<'a> SearchBuilder<'a> {
|
||||
pub fn offset(&mut self, value: usize) -> &SearchBuilder {
|
||||
self.offset = value;
|
||||
self
|
||||
}
|
||||
|
||||
pub fn limit(&mut self, value: usize) -> &SearchBuilder {
|
||||
self.limit = value;
|
||||
self
|
||||
}
|
||||
|
||||
pub fn attributes_to_crop(&mut self, value: HashMap<String, usize>) -> &SearchBuilder {
|
||||
self.attributes_to_crop = Some(value);
|
||||
self
|
||||
}
|
||||
|
||||
pub fn attributes_to_retrieve(&mut self, value: HashSet<String>) -> &SearchBuilder {
|
||||
self.attributes_to_retrieve = Some(value);
|
||||
self
|
||||
}
|
||||
|
||||
pub fn add_retrievable_field(&mut self, value: String) -> &SearchBuilder {
|
||||
let attributes_to_retrieve = self.attributes_to_retrieve.get_or_insert(HashSet::new());
|
||||
attributes_to_retrieve.insert(value);
|
||||
self
|
||||
}
|
||||
|
||||
pub fn attributes_to_search_in(&mut self, value: HashSet<String>) -> &SearchBuilder {
|
||||
self.attributes_to_search_in = Some(value);
|
||||
self
|
||||
}
|
||||
|
||||
pub fn add_attribute_to_search_in(&mut self, value: String) -> &SearchBuilder {
|
||||
let attributes_to_search_in = self.attributes_to_search_in.get_or_insert(HashSet::new());
|
||||
attributes_to_search_in.insert(value);
|
||||
self
|
||||
}
|
||||
|
||||
pub fn attributes_to_highlight(&mut self, value: HashSet<String>) -> &SearchBuilder {
|
||||
self.attributes_to_highlight = Some(value);
|
||||
self
|
||||
}
|
||||
|
||||
pub fn filters(&mut self, value: String) -> &SearchBuilder {
|
||||
self.filters = Some(value);
|
||||
self
|
||||
}
|
||||
|
||||
pub fn timeout(&mut self, value: Duration) -> &SearchBuilder {
|
||||
self.timeout = value;
|
||||
self
|
||||
}
|
||||
|
||||
pub fn get_matches(&mut self) -> &SearchBuilder {
|
||||
self.matches = true;
|
||||
self
|
||||
}
|
||||
|
||||
pub fn search(&self, reader: &heed::RoTxn) -> Result<SearchResult, Error> {
|
||||
let schema = self.index.main.schema(reader);
|
||||
let schema = schema.map_err(|e| Error::Internal(e.to_string()))?;
|
||||
let schema = match schema {
|
||||
Some(schema) => schema,
|
||||
None => return Err(Error::Internal(String::from("missing schema"))),
|
||||
};
|
||||
|
||||
let ranked_map = self.index.main.ranked_map(reader);
|
||||
let ranked_map = ranked_map.map_err(|e| Error::Internal(e.to_string()))?;
|
||||
let ranked_map = ranked_map.unwrap_or_default();
|
||||
|
||||
let start = Instant::now();
|
||||
|
||||
// Change criteria
|
||||
let mut query_builder = match self.get_criteria(reader, &ranked_map, &schema)? {
|
||||
Some(criteria) => self.index.query_builder_with_criteria(criteria),
|
||||
None => self.index.query_builder(),
|
||||
};
|
||||
|
||||
// Filter searchable fields
|
||||
if let Some(fields) = &self.attributes_to_search_in {
|
||||
for attribute in fields.iter().filter_map(|f| schema.attribute(f)) {
|
||||
query_builder.add_searchable_attribute(attribute.0);
|
||||
}
|
||||
}
|
||||
|
||||
if let Some(filters) = &self.filters {
|
||||
let mut split = filters.split(':');
|
||||
match (split.next(), split.next()) {
|
||||
(Some(_), None) | (Some(_), Some("")) => return Err(Error::MissingFilterValue),
|
||||
(Some(attr), Some(value)) => {
|
||||
let ref_reader = reader;
|
||||
let ref_index = &self.index;
|
||||
let value = value.trim().to_lowercase();
|
||||
|
||||
let attr = match schema.attribute(attr) {
|
||||
Some(attr) => attr,
|
||||
None => return Err(Error::UnknownFilteredAttribute),
|
||||
};
|
||||
|
||||
query_builder.with_filter(move |id| {
|
||||
let attr = attr;
|
||||
let index = ref_index;
|
||||
let reader = ref_reader;
|
||||
|
||||
match index.document_attribute::<Value>(reader, id, attr) {
|
||||
Ok(Some(Value::String(s))) => s.to_lowercase() == value,
|
||||
Ok(Some(Value::Bool(b))) => {
|
||||
(value == "true" && b) || (value == "false" && !b)
|
||||
}
|
||||
Ok(Some(Value::Array(a))) => {
|
||||
a.into_iter().any(|s| s.as_str() == Some(&value))
|
||||
}
|
||||
_ => false,
|
||||
}
|
||||
});
|
||||
}
|
||||
(_, _) => (),
|
||||
}
|
||||
}
|
||||
|
||||
query_builder.with_fetch_timeout(self.timeout);
|
||||
|
||||
let docs =
|
||||
query_builder.query(reader, &self.query, self.offset..(self.offset + self.limit));
|
||||
|
||||
let mut hits = Vec::with_capacity(self.limit);
|
||||
for doc in docs.map_err(|e| Error::SearchDocuments(e.to_string()))? {
|
||||
// retrieve the content of document in kv store
|
||||
let mut fields: Option<HashSet<&str>> = None;
|
||||
if let Some(attributes_to_retrieve) = &self.attributes_to_retrieve {
|
||||
let mut set = HashSet::new();
|
||||
for field in attributes_to_retrieve {
|
||||
set.insert(field.as_str());
|
||||
}
|
||||
fields = Some(set);
|
||||
}
|
||||
|
||||
let document: IndexMap<String, Value> = self
|
||||
.index
|
||||
.document(reader, fields.as_ref(), doc.id)
|
||||
.map_err(|e| Error::RetrieveDocument(doc.id.0, e.to_string()))?
|
||||
.ok_or(Error::DocumentNotFound(doc.id.0))?;
|
||||
|
||||
let mut formatted = document.clone();
|
||||
let mut matches = doc.highlights.clone();
|
||||
|
||||
// Crops fields if needed
|
||||
if let Some(fields) = &self.attributes_to_crop {
|
||||
crop_document(&mut formatted, &mut matches, &schema, fields);
|
||||
}
|
||||
|
||||
// Transform to readable matches
|
||||
let matches = calculate_matches(matches, self.attributes_to_retrieve.clone(), &schema);
|
||||
|
||||
if !self.matches {
|
||||
if let Some(attributes_to_highlight) = &self.attributes_to_highlight {
|
||||
formatted = calculate_highlights(&formatted, &matches, attributes_to_highlight);
|
||||
}
|
||||
}
|
||||
|
||||
let matches_info = if self.matches { Some(matches) } else { None };
|
||||
|
||||
let hit = SearchHit {
|
||||
document,
|
||||
formatted,
|
||||
matches_info,
|
||||
};
|
||||
|
||||
hits.push(hit);
|
||||
}
|
||||
|
||||
let time_ms = start.elapsed().as_millis() as usize;
|
||||
|
||||
let results = SearchResult {
|
||||
hits,
|
||||
offset: self.offset,
|
||||
limit: self.limit,
|
||||
processing_time_ms: time_ms,
|
||||
query: self.query.to_string(),
|
||||
};
|
||||
|
||||
Ok(results)
|
||||
}
|
||||
|
||||
pub fn get_criteria(
|
||||
&self,
|
||||
reader: &heed::RoTxn,
|
||||
ranked_map: &'a RankedMap,
|
||||
schema: &Schema,
|
||||
) -> Result<Option<Criteria<'a>>, Error> {
|
||||
let current_settings = match self.index.main.customs(reader).unwrap() {
|
||||
Some(bytes) => bincode::deserialize(bytes).unwrap(),
|
||||
None => SettingBody::default(),
|
||||
};
|
||||
|
||||
let ranking_rules = ¤t_settings.ranking_rules;
|
||||
let ranking_order = ¤t_settings.ranking_order;
|
||||
|
||||
if let Some(ranking_rules) = ranking_rules {
|
||||
let mut builder = CriteriaBuilder::with_capacity(7 + ranking_rules.len());
|
||||
if let Some(ranking_rules_order) = ranking_order {
|
||||
for rule in ranking_rules_order {
|
||||
match rule.as_str() {
|
||||
"_sum_of_typos" => builder.push(SumOfTypos),
|
||||
"_number_of_words" => builder.push(NumberOfWords),
|
||||
"_word_proximity" => builder.push(WordsProximity),
|
||||
"_sum_of_words_attribute" => builder.push(SumOfWordsAttribute),
|
||||
"_sum_of_words_position" => builder.push(SumOfWordsPosition),
|
||||
"_exact" => builder.push(Exact),
|
||||
_ => {
|
||||
let order = match ranking_rules.get(rule.as_str()) {
|
||||
Some(o) => o,
|
||||
None => continue,
|
||||
};
|
||||
|
||||
let custom_ranking = match order {
|
||||
RankingOrdering::Asc => {
|
||||
SortByAttr::lower_is_better(&ranked_map, &schema, &rule)
|
||||
.unwrap()
|
||||
}
|
||||
RankingOrdering::Dsc => {
|
||||
SortByAttr::higher_is_better(&ranked_map, &schema, &rule)
|
||||
.unwrap()
|
||||
}
|
||||
};
|
||||
|
||||
builder.push(custom_ranking);
|
||||
}
|
||||
}
|
||||
}
|
||||
builder.push(DocumentId);
|
||||
return Ok(Some(builder.build()));
|
||||
} else {
|
||||
builder.push(SumOfTypos);
|
||||
builder.push(NumberOfWords);
|
||||
builder.push(WordsProximity);
|
||||
builder.push(SumOfWordsAttribute);
|
||||
builder.push(SumOfWordsPosition);
|
||||
builder.push(Exact);
|
||||
for (rule, order) in ranking_rules.iter() {
|
||||
let custom_ranking = match order {
|
||||
RankingOrdering::Asc => {
|
||||
SortByAttr::lower_is_better(&ranked_map, &schema, &rule).unwrap()
|
||||
}
|
||||
RankingOrdering::Dsc => {
|
||||
SortByAttr::higher_is_better(&ranked_map, &schema, &rule).unwrap()
|
||||
}
|
||||
};
|
||||
builder.push(custom_ranking);
|
||||
}
|
||||
builder.push(DocumentId);
|
||||
return Ok(Some(builder.build()));
|
||||
}
|
||||
}
|
||||
|
||||
Ok(None)
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Eq, PartialEq, PartialOrd, Serialize, Deserialize)]
|
||||
pub struct MatchPosition {
|
||||
pub start: usize,
|
||||
pub length: usize,
|
||||
}
|
||||
|
||||
impl Ord for MatchPosition {
|
||||
fn cmp(&self, other: &Self) -> Ordering {
|
||||
match self.start.cmp(&other.start) {
|
||||
Ordering::Equal => self.length.cmp(&other.length),
|
||||
_ => self.start.cmp(&other.start),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub type HighlightInfos = HashMap<String, Value>;
|
||||
pub type MatchesInfos = HashMap<String, Vec<MatchPosition>>;
|
||||
// pub type RankingInfos = HashMap<String, u64>;
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct SearchHit {
|
||||
#[serde(flatten)]
|
||||
pub document: IndexMap<String, Value>,
|
||||
#[serde(rename = "_formatted", skip_serializing_if = "IndexMap::is_empty")]
|
||||
pub formatted: IndexMap<String, Value>,
|
||||
#[serde(rename = "_matchesInfo", skip_serializing_if = "Option::is_none")]
|
||||
pub matches_info: Option<MatchesInfos>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct SearchResult {
|
||||
pub hits: Vec<SearchHit>,
|
||||
pub offset: usize,
|
||||
pub limit: usize,
|
||||
pub processing_time_ms: usize,
|
||||
pub query: String,
|
||||
// pub parsed_query: String,
|
||||
// pub params: Option<String>,
|
||||
}
|
||||
|
||||
fn crop_text(
|
||||
text: &str,
|
||||
matches: impl IntoIterator<Item = Highlight>,
|
||||
context: usize,
|
||||
) -> (String, Vec<Highlight>) {
|
||||
let mut matches = matches.into_iter().peekable();
|
||||
|
||||
let char_index = matches.peek().map(|m| m.char_index as usize).unwrap_or(0);
|
||||
let start = char_index.saturating_sub(context);
|
||||
let text = text.chars().skip(start).take(context * 2).collect();
|
||||
|
||||
let matches = matches
|
||||
.take_while(|m| (m.char_index as usize) + (m.char_length as usize) <= start + (context * 2))
|
||||
.map(|match_| Highlight {
|
||||
char_index: match_.char_index - start as u16,
|
||||
..match_
|
||||
})
|
||||
.collect();
|
||||
|
||||
(text, matches)
|
||||
}
|
||||
|
||||
fn crop_document(
|
||||
document: &mut IndexMap<String, Value>,
|
||||
matches: &mut Vec<Highlight>,
|
||||
schema: &Schema,
|
||||
fields: &HashMap<String, usize>,
|
||||
) {
|
||||
matches.sort_unstable_by_key(|m| (m.char_index, m.char_length));
|
||||
|
||||
for (field, length) in fields {
|
||||
let attribute = match schema.attribute(field) {
|
||||
Some(attribute) => attribute,
|
||||
None => continue,
|
||||
};
|
||||
|
||||
let selected_matches = matches
|
||||
.iter()
|
||||
.filter(|m| SchemaAttr::new(m.attribute) == attribute)
|
||||
.cloned();
|
||||
|
||||
if let Some(Value::String(ref mut original_text)) = document.get_mut(field) {
|
||||
let (cropped_text, cropped_matches) =
|
||||
crop_text(original_text, selected_matches, *length);
|
||||
|
||||
*original_text = cropped_text;
|
||||
|
||||
matches.retain(|m| SchemaAttr::new(m.attribute) != attribute);
|
||||
matches.extend_from_slice(&cropped_matches);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn calculate_matches(
|
||||
matches: Vec<Highlight>,
|
||||
attributes_to_retrieve: Option<HashSet<String>>,
|
||||
schema: &Schema,
|
||||
) -> MatchesInfos {
|
||||
let mut matches_result: HashMap<String, Vec<MatchPosition>> = HashMap::new();
|
||||
for m in matches.iter() {
|
||||
let attribute = schema
|
||||
.attribute_name(SchemaAttr::new(m.attribute))
|
||||
.to_string();
|
||||
if let Some(attributes_to_retrieve) = attributes_to_retrieve.clone() {
|
||||
if !attributes_to_retrieve.contains(attribute.as_str()) {
|
||||
continue;
|
||||
}
|
||||
};
|
||||
if let Some(pos) = matches_result.get_mut(&attribute) {
|
||||
pos.push(MatchPosition {
|
||||
start: m.char_index as usize,
|
||||
length: m.char_length as usize,
|
||||
});
|
||||
} else {
|
||||
let mut positions = Vec::new();
|
||||
positions.push(MatchPosition {
|
||||
start: m.char_index as usize,
|
||||
length: m.char_length as usize,
|
||||
});
|
||||
matches_result.insert(attribute, positions);
|
||||
}
|
||||
}
|
||||
for (_, val) in matches_result.iter_mut() {
|
||||
val.sort_unstable();
|
||||
val.dedup();
|
||||
}
|
||||
matches_result
|
||||
}
|
||||
|
||||
fn calculate_highlights(
|
||||
document: &IndexMap<String, Value>,
|
||||
matches: &MatchesInfos,
|
||||
attributes_to_highlight: &HashSet<String>,
|
||||
) -> IndexMap<String, Value> {
|
||||
let mut highlight_result = IndexMap::new();
|
||||
|
||||
for (attribute, matches) in matches.iter() {
|
||||
if attributes_to_highlight.contains(attribute) {
|
||||
if let Some(Value::String(value)) = document.get(attribute) {
|
||||
let value: Vec<_> = value.chars().collect();
|
||||
let mut highlighted_value = String::new();
|
||||
let mut index = 0;
|
||||
for m in matches {
|
||||
if m.start >= index {
|
||||
let before = value.get(index..m.start);
|
||||
let highlighted = value.get(m.start..(m.start + m.length));
|
||||
if let (Some(before), Some(highlighted)) = (before, highlighted) {
|
||||
highlighted_value.extend(before);
|
||||
highlighted_value.push_str("<em>");
|
||||
highlighted_value.extend(highlighted);
|
||||
highlighted_value.push_str("</em>");
|
||||
index = m.start + m.length;
|
||||
} else {
|
||||
error!("value: {:?}; index: {:?}, match: {:?}", value, index, m);
|
||||
}
|
||||
}
|
||||
}
|
||||
highlighted_value.extend(value[index..].iter());
|
||||
highlight_result.insert(attribute.to_string(), Value::String(highlighted_value));
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
highlight_result
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn calculate_highlights() {
|
||||
let data = r#"{
|
||||
"title": "Fondation (Isaac ASIMOV)",
|
||||
"description": "En ce début de trentième millénaire, l'Empire n'a jamais été aussi puissant, aussi étendu à travers toute la galaxie. C'est dans sa capitale, Trantor, que l'éminent savant Hari Seldon invente la psychohistoire, une science toute nouvelle, à base de psychologie et de mathématiques, qui lui permet de prédire l'avenir... C'est-à-dire l'effondrement de l'Empire d'ici cinq siècles et au-delà, trente mille années de chaos et de ténèbres. Pour empêcher cette catastrophe et sauver la civilisation, Seldon crée la Fondation."
|
||||
}"#;
|
||||
|
||||
let document: IndexMap<String, Value> = serde_json::from_str(data).unwrap();
|
||||
let mut attributes_to_highlight = HashSet::new();
|
||||
attributes_to_highlight.insert("title".to_string());
|
||||
attributes_to_highlight.insert("description".to_string());
|
||||
|
||||
let mut matches = HashMap::new();
|
||||
|
||||
let mut m = Vec::new();
|
||||
m.push(MatchPosition {
|
||||
start: 0,
|
||||
length: 9,
|
||||
});
|
||||
matches.insert("title".to_string(), m);
|
||||
|
||||
let mut m = Vec::new();
|
||||
m.push(MatchPosition {
|
||||
start: 510,
|
||||
length: 9,
|
||||
});
|
||||
matches.insert("description".to_string(), m);
|
||||
let result = super::calculate_highlights(&document, &matches, &attributes_to_highlight);
|
||||
|
||||
let mut result_expected = IndexMap::new();
|
||||
result_expected.insert(
|
||||
"title".to_string(),
|
||||
Value::String("<em>Fondation</em> (Isaac ASIMOV)".to_string()),
|
||||
);
|
||||
result_expected.insert("description".to_string(), Value::String("En ce début de trentième millénaire, l'Empire n'a jamais été aussi puissant, aussi étendu à travers toute la galaxie. C'est dans sa capitale, Trantor, que l'éminent savant Hari Seldon invente la psychohistoire, une science toute nouvelle, à base de psychologie et de mathématiques, qui lui permet de prédire l'avenir... C'est-à-dire l'effondrement de l'Empire d'ici cinq siècles et au-delà, trente mille années de chaos et de ténèbres. Pour empêcher cette catastrophe et sauver la civilisation, Seldon crée la <em>Fondation</em>.".to_string()));
|
||||
|
||||
assert_eq!(result, result_expected);
|
||||
}
|
||||
}
|
2
meilidb-http/src/helpers/mod.rs
Normal file
2
meilidb-http/src/helpers/mod.rs
Normal file
@ -0,0 +1,2 @@
|
||||
pub mod meilidb;
|
||||
pub mod tide;
|
118
meilidb-http/src/helpers/tide.rs
Normal file
118
meilidb-http/src/helpers/tide.rs
Normal file
@ -0,0 +1,118 @@
|
||||
use crate::error::{ResponseError, SResult};
|
||||
use crate::models::token::*;
|
||||
use crate::Data;
|
||||
use chrono::Utc;
|
||||
use heed::types::{SerdeBincode, Str};
|
||||
use meilidb_core::Index;
|
||||
use serde_json::Value;
|
||||
use tide::Context;
|
||||
|
||||
pub trait ContextExt {
|
||||
fn is_allowed(&self, acl: ACL) -> SResult<()>;
|
||||
fn header(&self, name: &str) -> Result<String, ResponseError>;
|
||||
fn url_param(&self, name: &str) -> Result<String, ResponseError>;
|
||||
fn index(&self) -> Result<Index, ResponseError>;
|
||||
fn identifier(&self) -> Result<String, ResponseError>;
|
||||
}
|
||||
|
||||
impl ContextExt for Context<Data> {
|
||||
fn is_allowed(&self, acl: ACL) -> SResult<()> {
|
||||
let admin_token = match &self.state().admin_token {
|
||||
Some(admin_token) => admin_token,
|
||||
None => return Ok(()),
|
||||
};
|
||||
|
||||
let user_api_key = self.header("X-Meili-API-Key")?;
|
||||
if user_api_key == *admin_token {
|
||||
return Ok(());
|
||||
}
|
||||
let request_index: Option<String> = None; //self.param::<String>("index").ok();
|
||||
|
||||
let db = &self.state().db;
|
||||
let env = &db.env;
|
||||
let reader = env.read_txn().map_err(ResponseError::internal)?;
|
||||
|
||||
let token_key = format!("{}{}", TOKEN_PREFIX_KEY, user_api_key);
|
||||
|
||||
let token_config = db
|
||||
.common_store()
|
||||
.get::<Str, SerdeBincode<Token>>(&reader, &token_key)
|
||||
.map_err(ResponseError::internal)?
|
||||
.ok_or(ResponseError::not_found(format!(
|
||||
"token key: {}",
|
||||
token_key
|
||||
)))?;
|
||||
|
||||
if token_config.revoked {
|
||||
return Err(ResponseError::invalid_token("token revoked"));
|
||||
}
|
||||
|
||||
if let Some(index) = request_index {
|
||||
if !token_config
|
||||
.indexes
|
||||
.iter()
|
||||
.any(|r| match_wildcard(&r, &index))
|
||||
{
|
||||
return Err(ResponseError::invalid_token(
|
||||
"token is not allowed to access to this index",
|
||||
));
|
||||
}
|
||||
}
|
||||
|
||||
if token_config.expires_at < Utc::now() {
|
||||
return Err(ResponseError::invalid_token("token expired"));
|
||||
}
|
||||
|
||||
if token_config.acl.contains(&ACL::All) {
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
if !token_config.acl.contains(&acl) {
|
||||
return Err(ResponseError::invalid_token("token do not have this ACL"));
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn header(&self, name: &str) -> Result<String, ResponseError> {
|
||||
let header = self
|
||||
.headers()
|
||||
.get(name)
|
||||
.ok_or(ResponseError::missing_header(name))?
|
||||
.to_str()
|
||||
.map_err(|_| ResponseError::missing_header("X-Meili-API-Key"))?
|
||||
.to_string();
|
||||
Ok(header)
|
||||
}
|
||||
|
||||
fn url_param(&self, name: &str) -> Result<String, ResponseError> {
|
||||
let param = self
|
||||
.param::<String>(name)
|
||||
.map_err(|e| ResponseError::bad_parameter(name, e))?;
|
||||
Ok(param)
|
||||
}
|
||||
|
||||
fn index(&self) -> Result<Index, ResponseError> {
|
||||
let index_name = self.url_param("index")?;
|
||||
let index = self
|
||||
.state()
|
||||
.db
|
||||
.open_index(&index_name)
|
||||
.ok_or(ResponseError::index_not_found(index_name))?;
|
||||
Ok(index)
|
||||
}
|
||||
|
||||
fn identifier(&self) -> Result<String, ResponseError> {
|
||||
let name = self
|
||||
.param::<Value>("identifier")
|
||||
.as_ref()
|
||||
.map(meilidb_core::serde::value_to_string)
|
||||
.map_err(|e| ResponseError::bad_parameter("identifier", e))?
|
||||
.ok_or(ResponseError::bad_parameter(
|
||||
"identifier",
|
||||
"missing parameter",
|
||||
))?;
|
||||
|
||||
Ok(name)
|
||||
}
|
||||
}
|
11
meilidb-http/src/lib.rs
Normal file
11
meilidb-http/src/lib.rs
Normal file
@ -0,0 +1,11 @@
|
||||
#[macro_use]
|
||||
extern crate envconfig_derive;
|
||||
|
||||
pub mod data;
|
||||
pub mod error;
|
||||
pub mod helpers;
|
||||
pub mod models;
|
||||
pub mod option;
|
||||
pub mod routes;
|
||||
|
||||
pub use self::data::Data;
|
44
meilidb-http/src/main.rs
Normal file
44
meilidb-http/src/main.rs
Normal file
@ -0,0 +1,44 @@
|
||||
use http::header::HeaderValue;
|
||||
use log::info;
|
||||
use main_error::MainError;
|
||||
use tide::middleware::{CorsMiddleware, CorsOrigin};
|
||||
use tide_log::RequestLogger;
|
||||
|
||||
use meilidb_http::data::Data;
|
||||
use meilidb_http::option::Opt;
|
||||
use meilidb_http::routes;
|
||||
use meilidb_http::routes::index::index_update_callback;
|
||||
|
||||
#[cfg(not(target_os = "macos"))]
|
||||
#[global_allocator]
|
||||
static ALLOC: jemallocator::Jemalloc = jemallocator::Jemalloc;
|
||||
|
||||
pub fn main() -> Result<(), MainError> {
|
||||
env_logger::init();
|
||||
|
||||
let opt = Opt::new();
|
||||
let data = Data::new(opt.clone());
|
||||
|
||||
let data_cloned = data.clone();
|
||||
data.db.set_update_callback(Box::new(move |name, status| {
|
||||
index_update_callback(name, &data_cloned, status);
|
||||
}));
|
||||
|
||||
let mut app = tide::App::with_state(data);
|
||||
|
||||
app.middleware(
|
||||
CorsMiddleware::new()
|
||||
.allow_origin(CorsOrigin::from("*"))
|
||||
.allow_methods(HeaderValue::from_static("GET, POST, OPTIONS")),
|
||||
);
|
||||
app.middleware(RequestLogger::new());
|
||||
app.middleware(tide_compression::Compression::new());
|
||||
app.middleware(tide_compression::Decompression::new());
|
||||
|
||||
routes::load_routes(&mut app);
|
||||
|
||||
info!("Server HTTP enabled");
|
||||
app.run(opt.http_addr)?;
|
||||
|
||||
Ok(())
|
||||
}
|
3
meilidb-http/src/models/mod.rs
Normal file
3
meilidb-http/src/models/mod.rs
Normal file
@ -0,0 +1,3 @@
|
||||
pub mod schema;
|
||||
pub mod token;
|
||||
pub mod update_operation;
|
118
meilidb-http/src/models/schema.rs
Normal file
118
meilidb-http/src/models/schema.rs
Normal file
@ -0,0 +1,118 @@
|
||||
use std::collections::HashSet;
|
||||
|
||||
use indexmap::IndexMap;
|
||||
use meilidb_schema::{Schema, SchemaBuilder, SchemaProps};
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
#[derive(Debug, Clone, Eq, PartialEq, Hash, Serialize, Deserialize)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub enum FieldProperties {
|
||||
Identifier,
|
||||
Indexed,
|
||||
Displayed,
|
||||
Ranked,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Eq, PartialEq, Serialize, Deserialize)]
|
||||
pub struct SchemaBody(IndexMap<String, HashSet<FieldProperties>>);
|
||||
|
||||
impl From<Schema> for SchemaBody {
|
||||
fn from(value: Schema) -> SchemaBody {
|
||||
let mut map = IndexMap::new();
|
||||
for (name, _attr, props) in value.iter() {
|
||||
let old_properties = map.entry(name.to_owned()).or_insert(HashSet::new());
|
||||
if props.is_indexed() {
|
||||
old_properties.insert(FieldProperties::Indexed);
|
||||
}
|
||||
if props.is_displayed() {
|
||||
old_properties.insert(FieldProperties::Displayed);
|
||||
}
|
||||
if props.is_ranked() {
|
||||
old_properties.insert(FieldProperties::Ranked);
|
||||
}
|
||||
}
|
||||
let old_properties = map
|
||||
.entry(value.identifier_name().to_string())
|
||||
.or_insert(HashSet::new());
|
||||
old_properties.insert(FieldProperties::Identifier);
|
||||
old_properties.insert(FieldProperties::Displayed);
|
||||
SchemaBody(map)
|
||||
}
|
||||
}
|
||||
|
||||
impl Into<Schema> for SchemaBody {
|
||||
fn into(self) -> Schema {
|
||||
let mut identifier = "documentId".to_string();
|
||||
let mut attributes = IndexMap::new();
|
||||
for (field, properties) in self.0 {
|
||||
let mut indexed = false;
|
||||
let mut displayed = false;
|
||||
let mut ranked = false;
|
||||
for property in properties {
|
||||
match property {
|
||||
FieldProperties::Indexed => indexed = true,
|
||||
FieldProperties::Displayed => displayed = true,
|
||||
FieldProperties::Ranked => ranked = true,
|
||||
FieldProperties::Identifier => identifier = field.clone(),
|
||||
}
|
||||
}
|
||||
attributes.insert(
|
||||
field,
|
||||
SchemaProps {
|
||||
indexed,
|
||||
displayed,
|
||||
ranked,
|
||||
},
|
||||
);
|
||||
}
|
||||
|
||||
let mut builder = SchemaBuilder::with_identifier(identifier);
|
||||
for (field, props) in attributes {
|
||||
builder.new_attribute(field, props);
|
||||
}
|
||||
builder.build()
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_schema_body_conversion() {
|
||||
let schema_body = r#"
|
||||
{
|
||||
"id": ["identifier", "indexed", "displayed"],
|
||||
"title": ["indexed", "displayed"],
|
||||
"date": ["displayed"]
|
||||
}
|
||||
"#;
|
||||
|
||||
let schema_builder = r#"
|
||||
{
|
||||
"identifier": "id",
|
||||
"attributes": {
|
||||
"id": {
|
||||
"indexed": true,
|
||||
"displayed": true
|
||||
},
|
||||
"title": {
|
||||
"indexed": true,
|
||||
"displayed": true
|
||||
},
|
||||
"date": {
|
||||
"displayed": true
|
||||
}
|
||||
}
|
||||
}
|
||||
"#;
|
||||
|
||||
let schema_body: SchemaBody = serde_json::from_str(schema_body).unwrap();
|
||||
let schema_builder: SchemaBuilder = serde_json::from_str(schema_builder).unwrap();
|
||||
|
||||
let schema_from_body: Schema = schema_body.into();
|
||||
let schema_from_builder: Schema = schema_builder.build();
|
||||
|
||||
assert_eq!(schema_from_body, schema_from_builder);
|
||||
}
|
||||
}
|
72
meilidb-http/src/models/token.rs
Normal file
72
meilidb-http/src/models/token.rs
Normal file
@ -0,0 +1,72 @@
|
||||
use chrono::{DateTime, Utc};
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
pub const TOKEN_PREFIX_KEY: &str = "_token_";
|
||||
|
||||
#[derive(Debug, Clone, Eq, PartialEq, Serialize, Deserialize)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub enum ACL {
|
||||
IndexesRead,
|
||||
IndexesWrite,
|
||||
DocumentsRead,
|
||||
DocumentsWrite,
|
||||
SettingsRead,
|
||||
SettingsWrite,
|
||||
Admin,
|
||||
#[serde(rename = "*")]
|
||||
All,
|
||||
}
|
||||
|
||||
pub type Wildcard = String;
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct Token {
|
||||
pub key: String,
|
||||
pub description: String,
|
||||
pub acl: Vec<ACL>,
|
||||
pub indexes: Vec<Wildcard>,
|
||||
pub created_at: DateTime<Utc>,
|
||||
pub updated_at: DateTime<Utc>,
|
||||
pub expires_at: DateTime<Utc>,
|
||||
pub revoked: bool,
|
||||
}
|
||||
|
||||
fn cleanup_wildcard(input: &str) -> (bool, &str, bool) {
|
||||
let first = input.chars().next().filter(|&c| c == '*').is_some();
|
||||
let last = input.chars().last().filter(|&c| c == '*').is_some();
|
||||
let bound_last = std::cmp::max(input.len().saturating_sub(last as usize), first as usize);
|
||||
let output = input.get(first as usize..bound_last).unwrap();
|
||||
(first, output, last)
|
||||
}
|
||||
|
||||
pub fn match_wildcard(pattern: &str, input: &str) -> bool {
|
||||
let (first, pattern, last) = cleanup_wildcard(pattern);
|
||||
|
||||
match (first, last) {
|
||||
(false, false) => pattern == input,
|
||||
(true, false) => input.ends_with(pattern),
|
||||
(false, true) => input.starts_with(pattern),
|
||||
(true, true) => input.contains(pattern),
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_match_wildcard() {
|
||||
assert!(match_wildcard("*", "qqq"));
|
||||
assert!(match_wildcard("*", ""));
|
||||
assert!(match_wildcard("*ab", "qqqab"));
|
||||
assert!(match_wildcard("*ab*", "qqqabqq"));
|
||||
assert!(match_wildcard("ab*", "abqqq"));
|
||||
assert!(match_wildcard("**", "ab"));
|
||||
assert!(match_wildcard("ab", "ab"));
|
||||
assert!(match_wildcard("ab*", "ab"));
|
||||
assert!(match_wildcard("*ab", "ab"));
|
||||
assert!(match_wildcard("*ab*", "ab"));
|
||||
assert!(match_wildcard("*😆*", "ab😆dsa"));
|
||||
}
|
||||
}
|
33
meilidb-http/src/models/update_operation.rs
Normal file
33
meilidb-http/src/models/update_operation.rs
Normal file
@ -0,0 +1,33 @@
|
||||
use std::fmt;
|
||||
|
||||
#[allow(dead_code)]
|
||||
#[derive(Debug)]
|
||||
pub enum UpdateOperation {
|
||||
ClearAllDocuments,
|
||||
DocumentsAddition,
|
||||
DocumentsDeletion,
|
||||
SynonymsAddition,
|
||||
SynonymsDeletion,
|
||||
StopWordsAddition,
|
||||
StopWordsDeletion,
|
||||
Schema,
|
||||
Config,
|
||||
}
|
||||
|
||||
impl fmt::Display for UpdateOperation {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> std::fmt::Result {
|
||||
use UpdateOperation::*;
|
||||
|
||||
match self {
|
||||
ClearAllDocuments => write!(f, "ClearAllDocuments"),
|
||||
DocumentsAddition => write!(f, "DocumentsAddition"),
|
||||
DocumentsDeletion => write!(f, "DocumentsDeletion"),
|
||||
SynonymsAddition => write!(f, "SynonymsAddition"),
|
||||
SynonymsDeletion => write!(f, "SynonymsDelettion"),
|
||||
StopWordsAddition => write!(f, "StopWordsAddition"),
|
||||
StopWordsDeletion => write!(f, "StopWordsDeletion"),
|
||||
Schema => write!(f, "Schema"),
|
||||
Config => write!(f, "Config"),
|
||||
}
|
||||
}
|
||||
}
|
56
meilidb-http/src/option.rs
Normal file
56
meilidb-http/src/option.rs
Normal file
@ -0,0 +1,56 @@
|
||||
use envconfig::Envconfig;
|
||||
use structopt::StructOpt;
|
||||
|
||||
#[derive(Debug, Clone, StructOpt, Envconfig)]
|
||||
struct Vars {
|
||||
/// The destination where the database must be created.
|
||||
#[structopt(long)]
|
||||
#[envconfig(from = "MEILI_DATABASE_PATH")]
|
||||
pub database_path: Option<String>,
|
||||
|
||||
/// The addr on which the http server will listen.
|
||||
#[structopt(long)]
|
||||
#[envconfig(from = "MEILI_HTTP_ADDR")]
|
||||
pub http_addr: Option<String>,
|
||||
|
||||
#[structopt(long)]
|
||||
#[envconfig(from = "MEILI_ADMIN_TOKEN")]
|
||||
pub admin_token: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct Opt {
|
||||
pub database_path: String,
|
||||
pub http_addr: String,
|
||||
pub admin_token: Option<String>,
|
||||
}
|
||||
|
||||
impl Default for Opt {
|
||||
fn default() -> Self {
|
||||
Opt {
|
||||
database_path: String::from("/tmp/meilidb"),
|
||||
http_addr: String::from("127.0.0.1:8080"),
|
||||
admin_token: None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Opt {
|
||||
pub fn new() -> Self {
|
||||
let default = Self::default();
|
||||
let args = Vars::from_args();
|
||||
let env = Vars::init().unwrap();
|
||||
|
||||
Self {
|
||||
database_path: env
|
||||
.database_path
|
||||
.or(args.database_path)
|
||||
.unwrap_or(default.database_path),
|
||||
http_addr: env
|
||||
.http_addr
|
||||
.or(args.http_addr)
|
||||
.unwrap_or(default.http_addr),
|
||||
admin_token: env.admin_token.or(args.admin_token).or(default.admin_token),
|
||||
}
|
||||
}
|
||||
}
|
251
meilidb-http/src/routes/document.rs
Normal file
251
meilidb-http/src/routes/document.rs
Normal file
@ -0,0 +1,251 @@
|
||||
use std::collections::{BTreeSet, HashSet};
|
||||
|
||||
use http::StatusCode;
|
||||
use indexmap::IndexMap;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use serde_json::Value;
|
||||
use tide::querystring::ContextExt as QSContextExt;
|
||||
use tide::response::IntoResponse;
|
||||
use tide::{Context, Response};
|
||||
|
||||
use crate::error::{ResponseError, SResult};
|
||||
use crate::helpers::tide::ContextExt;
|
||||
use crate::models::token::ACL::*;
|
||||
use crate::Data;
|
||||
|
||||
pub async fn get_document(ctx: Context<Data>) -> SResult<Response> {
|
||||
ctx.is_allowed(DocumentsRead)?;
|
||||
|
||||
let index = ctx.index()?;
|
||||
|
||||
let identifier = ctx.identifier()?;
|
||||
let document_id = meilidb_core::serde::compute_document_id(identifier.clone());
|
||||
|
||||
let env = &ctx.state().db.env;
|
||||
let reader = env.read_txn().map_err(ResponseError::internal)?;
|
||||
|
||||
let response = index
|
||||
.document::<IndexMap<String, Value>>(&reader, None, document_id)
|
||||
.map_err(ResponseError::internal)?
|
||||
.ok_or(ResponseError::document_not_found(&identifier))?;
|
||||
|
||||
if response.is_empty() {
|
||||
return Err(ResponseError::document_not_found(identifier));
|
||||
}
|
||||
|
||||
Ok(tide::response::json(response))
|
||||
}
|
||||
|
||||
#[derive(Default, Serialize)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct IndexUpdateResponse {
|
||||
pub update_id: u64,
|
||||
}
|
||||
|
||||
pub async fn delete_document(ctx: Context<Data>) -> SResult<Response> {
|
||||
ctx.is_allowed(DocumentsWrite)?;
|
||||
|
||||
let index = ctx.index()?;
|
||||
let identifier = ctx.identifier()?;
|
||||
let document_id = meilidb_core::serde::compute_document_id(identifier.clone());
|
||||
|
||||
let env = &ctx.state().db.env;
|
||||
let mut writer = env.write_txn().map_err(ResponseError::internal)?;
|
||||
|
||||
let mut documents_deletion = index.documents_deletion();
|
||||
documents_deletion.delete_document_by_id(document_id);
|
||||
let update_id = documents_deletion
|
||||
.finalize(&mut writer)
|
||||
.map_err(ResponseError::internal)?;
|
||||
|
||||
writer.commit().map_err(ResponseError::internal)?;
|
||||
|
||||
let response_body = IndexUpdateResponse { update_id };
|
||||
Ok(tide::response::json(response_body)
|
||||
.with_status(StatusCode::ACCEPTED)
|
||||
.into_response())
|
||||
}
|
||||
|
||||
#[derive(Default, Deserialize)]
|
||||
#[serde(rename_all = "camelCase", deny_unknown_fields)]
|
||||
struct BrowseQuery {
|
||||
offset: Option<usize>,
|
||||
limit: Option<usize>,
|
||||
attributes_to_retrieve: Option<String>,
|
||||
}
|
||||
|
||||
pub async fn browse_documents(ctx: Context<Data>) -> SResult<Response> {
|
||||
ctx.is_allowed(DocumentsRead)?;
|
||||
|
||||
let index = ctx.index()?;
|
||||
let query: BrowseQuery = ctx.url_query().unwrap_or(BrowseQuery::default());
|
||||
|
||||
let offset = query.offset.unwrap_or(0);
|
||||
let limit = query.limit.unwrap_or(20);
|
||||
|
||||
let env = &ctx.state().db.env;
|
||||
let reader = env.read_txn().map_err(ResponseError::internal)?;
|
||||
|
||||
let documents_ids: Result<BTreeSet<_>, _> =
|
||||
match index.documents_fields_counts.documents_ids(&reader) {
|
||||
Ok(documents_ids) => documents_ids.skip(offset).take(limit).collect(),
|
||||
Err(e) => return Err(ResponseError::internal(e)),
|
||||
};
|
||||
|
||||
let documents_ids = match documents_ids {
|
||||
Ok(documents_ids) => documents_ids,
|
||||
Err(e) => return Err(ResponseError::internal(e)),
|
||||
};
|
||||
|
||||
let mut response_body = Vec::<IndexMap<String, Value>>::new();
|
||||
|
||||
if let Some(attributes) = query.attributes_to_retrieve {
|
||||
let attributes = attributes.split(',').collect::<HashSet<&str>>();
|
||||
for document_id in documents_ids {
|
||||
if let Ok(Some(document)) = index.document(&reader, Some(&attributes), document_id) {
|
||||
response_body.push(document);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
for document_id in documents_ids {
|
||||
if let Ok(Some(document)) = index.document(&reader, None, document_id) {
|
||||
response_body.push(document);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if response_body.is_empty() {
|
||||
Ok(tide::response::json(response_body)
|
||||
.with_status(StatusCode::NO_CONTENT)
|
||||
.into_response())
|
||||
} else {
|
||||
Ok(tide::response::json(response_body)
|
||||
.with_status(StatusCode::OK)
|
||||
.into_response())
|
||||
}
|
||||
}
|
||||
|
||||
fn infered_schema(document: &IndexMap<String, Value>) -> Option<meilidb_schema::Schema> {
|
||||
use meilidb_schema::{SchemaBuilder, DISPLAYED, INDEXED};
|
||||
|
||||
let mut identifier = None;
|
||||
for key in document.keys() {
|
||||
if identifier.is_none() && key.to_lowercase().contains("id") {
|
||||
identifier = Some(key);
|
||||
}
|
||||
}
|
||||
|
||||
match identifier {
|
||||
Some(identifier) => {
|
||||
let mut builder = SchemaBuilder::with_identifier(identifier);
|
||||
for key in document.keys() {
|
||||
builder.new_attribute(key, DISPLAYED | INDEXED);
|
||||
}
|
||||
Some(builder.build())
|
||||
}
|
||||
None => None,
|
||||
}
|
||||
}
|
||||
|
||||
async fn update_multiple_documents(mut ctx: Context<Data>, is_partial: bool) -> SResult<Response> {
|
||||
ctx.is_allowed(DocumentsWrite)?;
|
||||
|
||||
let data: Vec<IndexMap<String, Value>> =
|
||||
ctx.body_json().await.map_err(ResponseError::bad_request)?;
|
||||
let index = ctx.index()?;
|
||||
|
||||
let env = &ctx.state().db.env;
|
||||
let mut writer = env.write_txn().map_err(ResponseError::internal)?;
|
||||
|
||||
let current_schema = index
|
||||
.main
|
||||
.schema(&writer)
|
||||
.map_err(ResponseError::internal)?;
|
||||
if current_schema.is_none() {
|
||||
match data.first().and_then(infered_schema) {
|
||||
Some(schema) => {
|
||||
index
|
||||
.schema_update(&mut writer, schema)
|
||||
.map_err(ResponseError::internal)?;
|
||||
}
|
||||
None => return Err(ResponseError::bad_request("Could not infer a schema")),
|
||||
}
|
||||
}
|
||||
|
||||
let mut document_addition = if is_partial {
|
||||
index.documents_partial_addition()
|
||||
} else {
|
||||
index.documents_addition()
|
||||
};
|
||||
|
||||
for document in data {
|
||||
document_addition.update_document(document);
|
||||
}
|
||||
|
||||
let update_id = document_addition
|
||||
.finalize(&mut writer)
|
||||
.map_err(ResponseError::internal)?;
|
||||
|
||||
writer.commit().map_err(ResponseError::internal)?;
|
||||
|
||||
let response_body = IndexUpdateResponse { update_id };
|
||||
Ok(tide::response::json(response_body)
|
||||
.with_status(StatusCode::ACCEPTED)
|
||||
.into_response())
|
||||
}
|
||||
|
||||
pub async fn add_or_replace_multiple_documents(ctx: Context<Data>) -> SResult<Response> {
|
||||
update_multiple_documents(ctx, false).await
|
||||
}
|
||||
|
||||
pub async fn add_or_update_multiple_documents(ctx: Context<Data>) -> SResult<Response> {
|
||||
update_multiple_documents(ctx, true).await
|
||||
}
|
||||
|
||||
pub async fn delete_multiple_documents(mut ctx: Context<Data>) -> SResult<Response> {
|
||||
ctx.is_allowed(DocumentsWrite)?;
|
||||
|
||||
let data: Vec<Value> = ctx.body_json().await.map_err(ResponseError::bad_request)?;
|
||||
let index = ctx.index()?;
|
||||
|
||||
let env = &ctx.state().db.env;
|
||||
let mut writer = env.write_txn().map_err(ResponseError::internal)?;
|
||||
|
||||
let mut documents_deletion = index.documents_deletion();
|
||||
|
||||
for identifier in data {
|
||||
if let Some(identifier) = meilidb_core::serde::value_to_string(&identifier) {
|
||||
documents_deletion
|
||||
.delete_document_by_id(meilidb_core::serde::compute_document_id(identifier));
|
||||
}
|
||||
}
|
||||
|
||||
let update_id = documents_deletion
|
||||
.finalize(&mut writer)
|
||||
.map_err(ResponseError::internal)?;
|
||||
|
||||
writer.commit().map_err(ResponseError::internal)?;
|
||||
|
||||
let response_body = IndexUpdateResponse { update_id };
|
||||
Ok(tide::response::json(response_body)
|
||||
.with_status(StatusCode::ACCEPTED)
|
||||
.into_response())
|
||||
}
|
||||
|
||||
pub async fn clear_all_documents(ctx: Context<Data>) -> SResult<Response> {
|
||||
ctx.is_allowed(DocumentsWrite)?;
|
||||
|
||||
let index = ctx.index()?;
|
||||
|
||||
let env = &ctx.state().db.env;
|
||||
let mut writer = env.write_txn().map_err(ResponseError::internal)?;
|
||||
let update_id = index
|
||||
.clear_all(&mut writer)
|
||||
.map_err(ResponseError::internal)?;
|
||||
writer.commit().map_err(ResponseError::internal)?;
|
||||
|
||||
let response_body = IndexUpdateResponse { update_id };
|
||||
Ok(tide::response::json(response_body)
|
||||
.with_status(StatusCode::ACCEPTED)
|
||||
.into_response())
|
||||
}
|
79
meilidb-http/src/routes/health.rs
Normal file
79
meilidb-http/src/routes/health.rs
Normal file
@ -0,0 +1,79 @@
|
||||
use crate::error::{ResponseError, SResult};
|
||||
use crate::helpers::tide::ContextExt;
|
||||
use crate::models::token::ACL::*;
|
||||
use crate::Data;
|
||||
|
||||
use heed::types::{Str, Unit};
|
||||
use serde::Deserialize;
|
||||
use tide::Context;
|
||||
|
||||
const UNHEALTHY_KEY: &str = "_is_unhealthy";
|
||||
|
||||
pub async fn get_health(ctx: Context<Data>) -> SResult<()> {
|
||||
let db = &ctx.state().db;
|
||||
let env = &db.env;
|
||||
let reader = env.read_txn().map_err(ResponseError::internal)?;
|
||||
|
||||
let common_store = ctx.state().db.common_store();
|
||||
|
||||
if let Ok(Some(_)) = common_store.get::<Str, Unit>(&reader, UNHEALTHY_KEY) {
|
||||
return Err(ResponseError::Maintenance);
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub async fn set_healthy(ctx: Context<Data>) -> SResult<()> {
|
||||
ctx.is_allowed(Admin)?;
|
||||
|
||||
let db = &ctx.state().db;
|
||||
let env = &db.env;
|
||||
let mut writer = env.write_txn().map_err(ResponseError::internal)?;
|
||||
|
||||
let common_store = ctx.state().db.common_store();
|
||||
match common_store.delete::<Str>(&mut writer, UNHEALTHY_KEY) {
|
||||
Ok(_) => (),
|
||||
Err(e) => return Err(ResponseError::internal(e)),
|
||||
}
|
||||
|
||||
if let Err(e) = writer.commit() {
|
||||
return Err(ResponseError::internal(e));
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub async fn set_unhealthy(ctx: Context<Data>) -> SResult<()> {
|
||||
ctx.is_allowed(Admin)?;
|
||||
|
||||
let db = &ctx.state().db;
|
||||
let env = &db.env;
|
||||
let mut writer = env.write_txn().map_err(ResponseError::internal)?;
|
||||
|
||||
let common_store = ctx.state().db.common_store();
|
||||
|
||||
if let Err(e) = common_store.put::<Str, Unit>(&mut writer, UNHEALTHY_KEY, &()) {
|
||||
return Err(ResponseError::internal(e));
|
||||
}
|
||||
|
||||
if let Err(e) = writer.commit() {
|
||||
return Err(ResponseError::internal(e));
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[derive(Deserialize, Clone)]
|
||||
struct HealtBody {
|
||||
health: bool,
|
||||
}
|
||||
|
||||
pub async fn change_healthyness(mut ctx: Context<Data>) -> SResult<()> {
|
||||
let body: HealtBody = ctx.body_json().await.map_err(ResponseError::bad_request)?;
|
||||
|
||||
if body.health {
|
||||
set_healthy(ctx).await
|
||||
} else {
|
||||
set_unhealthy(ctx).await
|
||||
}
|
||||
}
|
195
meilidb-http/src/routes/index.rs
Normal file
195
meilidb-http/src/routes/index.rs
Normal file
@ -0,0 +1,195 @@
|
||||
use http::StatusCode;
|
||||
use meilidb_core::ProcessedUpdateResult;
|
||||
use meilidb_schema::Schema;
|
||||
use serde_json::json;
|
||||
use tide::response::IntoResponse;
|
||||
use tide::{Context, Response};
|
||||
|
||||
use crate::error::{ResponseError, SResult};
|
||||
use crate::helpers::tide::ContextExt;
|
||||
use crate::models::schema::SchemaBody;
|
||||
use crate::models::token::ACL::*;
|
||||
use crate::routes::document::IndexUpdateResponse;
|
||||
use crate::Data;
|
||||
|
||||
pub async fn list_indexes(ctx: Context<Data>) -> SResult<Response> {
|
||||
ctx.is_allowed(IndexesRead)?;
|
||||
let list = ctx
|
||||
.state()
|
||||
.db
|
||||
.indexes_names()
|
||||
.map_err(ResponseError::internal)?;
|
||||
Ok(tide::response::json(list))
|
||||
}
|
||||
|
||||
pub async fn get_index_schema(ctx: Context<Data>) -> SResult<Response> {
|
||||
ctx.is_allowed(IndexesRead)?;
|
||||
|
||||
let index = ctx.index()?;
|
||||
|
||||
let env = &ctx.state().db.env;
|
||||
let reader = env.read_txn().map_err(ResponseError::internal)?;
|
||||
|
||||
let schema = index
|
||||
.main
|
||||
.schema(&reader)
|
||||
.map_err(ResponseError::create_index)?;
|
||||
|
||||
match schema {
|
||||
Some(schema) => {
|
||||
let schema = SchemaBody::from(schema);
|
||||
Ok(tide::response::json(schema))
|
||||
}
|
||||
None => Ok(
|
||||
tide::response::json(json!({ "message": "missing index schema" }))
|
||||
.with_status(StatusCode::NOT_FOUND)
|
||||
.into_response(),
|
||||
),
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn create_index(mut ctx: Context<Data>) -> SResult<Response> {
|
||||
ctx.is_allowed(IndexesWrite)?;
|
||||
|
||||
let index_name = ctx.url_param("index")?;
|
||||
|
||||
let body = ctx.body_bytes().await.map_err(ResponseError::bad_request)?;
|
||||
let schema: Option<Schema> = if body.is_empty() {
|
||||
None
|
||||
} else {
|
||||
serde_json::from_slice::<SchemaBody>(&body)
|
||||
.map_err(ResponseError::bad_request)
|
||||
.map(|s| Some(s.into()))?
|
||||
};
|
||||
|
||||
let db = &ctx.state().db;
|
||||
|
||||
let created_index = match db.create_index(&index_name) {
|
||||
Ok(index) => index,
|
||||
Err(e) => return Err(ResponseError::create_index(e)),
|
||||
};
|
||||
|
||||
let env = &db.env;
|
||||
let mut writer = env.write_txn().map_err(ResponseError::internal)?;
|
||||
|
||||
match schema {
|
||||
Some(schema) => {
|
||||
let update_id = created_index
|
||||
.schema_update(&mut writer, schema.clone())
|
||||
.map_err(ResponseError::internal)?;
|
||||
|
||||
writer.commit().map_err(ResponseError::internal)?;
|
||||
|
||||
let response_body = IndexUpdateResponse { update_id };
|
||||
Ok(tide::response::json(response_body)
|
||||
.with_status(StatusCode::CREATED)
|
||||
.into_response())
|
||||
}
|
||||
None => Ok(Response::new(tide::Body::empty())
|
||||
.with_status(StatusCode::NO_CONTENT)
|
||||
.into_response()),
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn update_schema(mut ctx: Context<Data>) -> SResult<Response> {
|
||||
ctx.is_allowed(IndexesWrite)?;
|
||||
|
||||
let index_name = ctx.url_param("index")?;
|
||||
|
||||
let schema = ctx
|
||||
.body_json::<SchemaBody>()
|
||||
.await
|
||||
.map_err(ResponseError::bad_request)?;
|
||||
|
||||
let db = &ctx.state().db;
|
||||
let env = &db.env;
|
||||
let mut writer = env.write_txn().map_err(ResponseError::internal)?;
|
||||
|
||||
let index = db
|
||||
.open_index(&index_name)
|
||||
.ok_or(ResponseError::index_not_found(index_name))?;
|
||||
|
||||
let schema: meilidb_schema::Schema = schema.into();
|
||||
let update_id = index
|
||||
.schema_update(&mut writer, schema.clone())
|
||||
.map_err(ResponseError::internal)?;
|
||||
|
||||
writer.commit().map_err(ResponseError::internal)?;
|
||||
|
||||
let response_body = IndexUpdateResponse { update_id };
|
||||
Ok(tide::response::json(response_body)
|
||||
.with_status(StatusCode::ACCEPTED)
|
||||
.into_response())
|
||||
}
|
||||
|
||||
pub async fn get_update_status(ctx: Context<Data>) -> SResult<Response> {
|
||||
ctx.is_allowed(IndexesRead)?;
|
||||
|
||||
let env = &ctx.state().db.env;
|
||||
let reader = env.read_txn().map_err(ResponseError::internal)?;
|
||||
|
||||
let update_id = ctx
|
||||
.param::<u64>("update_id")
|
||||
.map_err(|e| ResponseError::bad_parameter("update_id", e))?;
|
||||
|
||||
let index = ctx.index()?;
|
||||
let status = index
|
||||
.update_status(&reader, update_id)
|
||||
.map_err(ResponseError::internal)?;
|
||||
|
||||
let response = match status {
|
||||
Some(status) => tide::response::json(status)
|
||||
.with_status(StatusCode::OK)
|
||||
.into_response(),
|
||||
None => tide::response::json(json!({ "message": "unknown update id" }))
|
||||
.with_status(StatusCode::NOT_FOUND)
|
||||
.into_response(),
|
||||
};
|
||||
|
||||
Ok(response)
|
||||
}
|
||||
|
||||
pub async fn get_all_updates_status(ctx: Context<Data>) -> SResult<Response> {
|
||||
ctx.is_allowed(IndexesRead)?;
|
||||
|
||||
let env = &ctx.state().db.env;
|
||||
let reader = env.read_txn().map_err(ResponseError::internal)?;
|
||||
|
||||
let index = ctx.index()?;
|
||||
let all_status = index
|
||||
.all_updates_status(&reader)
|
||||
.map_err(ResponseError::internal)?;
|
||||
|
||||
let response = tide::response::json(all_status)
|
||||
.with_status(StatusCode::OK)
|
||||
.into_response();
|
||||
|
||||
Ok(response)
|
||||
}
|
||||
|
||||
pub async fn delete_index(ctx: Context<Data>) -> SResult<StatusCode> {
|
||||
ctx.is_allowed(IndexesWrite)?;
|
||||
let index_name = ctx.url_param("index")?;
|
||||
|
||||
let found = ctx
|
||||
.state()
|
||||
.db
|
||||
.delete_index(&index_name)
|
||||
.map_err(ResponseError::internal)?;
|
||||
|
||||
if found {
|
||||
Ok(StatusCode::NO_CONTENT)
|
||||
} else {
|
||||
Ok(StatusCode::NOT_FOUND)
|
||||
}
|
||||
}
|
||||
|
||||
pub fn index_update_callback(index_name: &str, data: &Data, _status: ProcessedUpdateResult) {
|
||||
let env = &data.db.env;
|
||||
let mut writer = env.write_txn().unwrap();
|
||||
|
||||
data.compute_stats(&mut writer, &index_name).unwrap();
|
||||
data.set_last_update(&mut writer, &index_name).unwrap();
|
||||
|
||||
writer.commit().unwrap();
|
||||
}
|
189
meilidb-http/src/routes/key.rs
Normal file
189
meilidb-http/src/routes/key.rs
Normal file
@ -0,0 +1,189 @@
|
||||
use chrono::serde::ts_seconds;
|
||||
use chrono::{DateTime, Utc};
|
||||
use heed::types::{SerdeBincode, Str};
|
||||
use http::StatusCode;
|
||||
use rand::seq::SliceRandom;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use tide::response::IntoResponse;
|
||||
use tide::{Context, Response};
|
||||
|
||||
use crate::error::{ResponseError, SResult};
|
||||
use crate::helpers::tide::ContextExt;
|
||||
use crate::models::token::ACL::*;
|
||||
use crate::models::token::*;
|
||||
use crate::Data;
|
||||
|
||||
fn generate_api_key() -> String {
|
||||
let mut rng = rand::thread_rng();
|
||||
let sample = b"ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789";
|
||||
sample
|
||||
.choose_multiple(&mut rng, 40)
|
||||
.map(|c| *c as char)
|
||||
.collect()
|
||||
}
|
||||
|
||||
pub async fn list(ctx: Context<Data>) -> SResult<Response> {
|
||||
ctx.is_allowed(Admin)?;
|
||||
|
||||
let db = &ctx.state().db;
|
||||
let env = &db.env;
|
||||
let reader = env.read_txn().map_err(ResponseError::internal)?;
|
||||
|
||||
let common_store = db.common_store();
|
||||
|
||||
let mut response: Vec<Token> = Vec::new();
|
||||
|
||||
let iter = common_store
|
||||
.prefix_iter::<Str, SerdeBincode<Token>>(&reader, TOKEN_PREFIX_KEY)
|
||||
.map_err(ResponseError::internal)?;
|
||||
|
||||
for result in iter {
|
||||
let (_, token) = result.map_err(ResponseError::internal)?;
|
||||
response.push(token);
|
||||
}
|
||||
|
||||
Ok(tide::response::json(response))
|
||||
}
|
||||
|
||||
pub async fn get(ctx: Context<Data>) -> SResult<Response> {
|
||||
ctx.is_allowed(Admin)?;
|
||||
let request_key = ctx.url_param("key")?;
|
||||
|
||||
let db = &ctx.state().db;
|
||||
let env = &db.env;
|
||||
let reader = env.read_txn().map_err(ResponseError::internal)?;
|
||||
|
||||
let token_key = format!("{}{}", TOKEN_PREFIX_KEY, request_key);
|
||||
|
||||
let token_config = db
|
||||
.common_store()
|
||||
.get::<Str, SerdeBincode<Token>>(&reader, &token_key)
|
||||
.map_err(ResponseError::internal)?
|
||||
.ok_or(ResponseError::not_found(format!(
|
||||
"token key: {}",
|
||||
token_key
|
||||
)))?;
|
||||
|
||||
Ok(tide::response::json(token_config))
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
#[serde(rename_all = "camelCase", deny_unknown_fields)]
|
||||
pub struct CreatedRequest {
|
||||
description: String,
|
||||
acl: Vec<ACL>,
|
||||
indexes: Vec<Wildcard>,
|
||||
#[serde(with = "ts_seconds")]
|
||||
expires_at: DateTime<Utc>,
|
||||
}
|
||||
|
||||
pub async fn create(mut ctx: Context<Data>) -> SResult<Response> {
|
||||
ctx.is_allowed(Admin)?;
|
||||
|
||||
let data: CreatedRequest = ctx.body_json().await.map_err(ResponseError::bad_request)?;
|
||||
|
||||
let key = generate_api_key();
|
||||
let token_key = format!("{}{}", TOKEN_PREFIX_KEY, key);
|
||||
|
||||
let token_definition = Token {
|
||||
key,
|
||||
description: data.description,
|
||||
acl: data.acl,
|
||||
indexes: data.indexes,
|
||||
expires_at: data.expires_at,
|
||||
created_at: Utc::now(),
|
||||
updated_at: Utc::now(),
|
||||
revoked: false,
|
||||
};
|
||||
|
||||
let db = &ctx.state().db;
|
||||
let env = &db.env;
|
||||
let mut writer = env.write_txn().map_err(ResponseError::internal)?;
|
||||
|
||||
db.common_store()
|
||||
.put::<Str, SerdeBincode<Token>>(&mut writer, &token_key, &token_definition)
|
||||
.map_err(ResponseError::internal)?;
|
||||
|
||||
writer.commit().map_err(ResponseError::internal)?;
|
||||
|
||||
Ok(tide::response::json(token_definition)
|
||||
.with_status(StatusCode::CREATED)
|
||||
.into_response())
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
#[serde(rename_all = "camelCase", deny_unknown_fields)]
|
||||
pub struct UpdatedRequest {
|
||||
description: Option<String>,
|
||||
acl: Option<Vec<ACL>>,
|
||||
indexes: Option<Vec<Wildcard>>,
|
||||
}
|
||||
|
||||
pub async fn update(mut ctx: Context<Data>) -> SResult<Response> {
|
||||
ctx.is_allowed(Admin)?;
|
||||
let request_key = ctx.url_param("key")?;
|
||||
|
||||
let data: UpdatedRequest = ctx.body_json().await.map_err(ResponseError::bad_request)?;
|
||||
|
||||
let db = &ctx.state().db;
|
||||
let env = &db.env;
|
||||
let mut writer = env.write_txn().map_err(ResponseError::internal)?;
|
||||
|
||||
let common_store = db.common_store();
|
||||
|
||||
let token_key = format!("{}{}", TOKEN_PREFIX_KEY, request_key);
|
||||
|
||||
let mut token_config = common_store
|
||||
.get::<Str, SerdeBincode<Token>>(&writer, &token_key)
|
||||
.map_err(ResponseError::internal)?
|
||||
.ok_or(ResponseError::not_found(format!(
|
||||
"token key: {}",
|
||||
token_key
|
||||
)))?;
|
||||
|
||||
// apply the modifications
|
||||
if let Some(description) = data.description {
|
||||
token_config.description = description;
|
||||
}
|
||||
|
||||
if let Some(acl) = data.acl {
|
||||
token_config.acl = acl;
|
||||
}
|
||||
|
||||
if let Some(indexes) = data.indexes {
|
||||
token_config.indexes = indexes;
|
||||
}
|
||||
|
||||
token_config.updated_at = Utc::now();
|
||||
|
||||
common_store
|
||||
.put::<Str, SerdeBincode<Token>>(&mut writer, &token_key, &token_config)
|
||||
.map_err(ResponseError::internal)?;
|
||||
|
||||
writer.commit().map_err(ResponseError::internal)?;
|
||||
|
||||
Ok(tide::response::json(token_config)
|
||||
.with_status(StatusCode::ACCEPTED)
|
||||
.into_response())
|
||||
}
|
||||
|
||||
pub async fn delete(ctx: Context<Data>) -> SResult<StatusCode> {
|
||||
ctx.is_allowed(Admin)?;
|
||||
let request_key = ctx.url_param("key")?;
|
||||
|
||||
let db = &ctx.state().db;
|
||||
let env = &db.env;
|
||||
let mut writer = env.write_txn().map_err(ResponseError::internal)?;
|
||||
|
||||
let common_store = db.common_store();
|
||||
|
||||
let token_key = format!("{}{}", TOKEN_PREFIX_KEY, request_key);
|
||||
|
||||
common_store
|
||||
.delete::<Str>(&mut writer, &token_key)
|
||||
.map_err(ResponseError::internal)?;
|
||||
|
||||
writer.commit().map_err(ResponseError::internal)?;
|
||||
|
||||
Ok(StatusCode::ACCEPTED)
|
||||
}
|
112
meilidb-http/src/routes/mod.rs
Normal file
112
meilidb-http/src/routes/mod.rs
Normal file
@ -0,0 +1,112 @@
|
||||
use crate::data::Data;
|
||||
|
||||
pub mod document;
|
||||
pub mod health;
|
||||
pub mod index;
|
||||
pub mod key;
|
||||
pub mod search;
|
||||
pub mod setting;
|
||||
pub mod stats;
|
||||
pub mod stop_words;
|
||||
pub mod synonym;
|
||||
|
||||
pub fn load_routes(app: &mut tide::App<Data>) {
|
||||
app.at("").nest(|router| {
|
||||
router.at("/indexes").nest(|router| {
|
||||
router.at("/").get(index::list_indexes);
|
||||
|
||||
router.at("/search").post(search::search_multi_index);
|
||||
|
||||
router.at("/:index").nest(|router| {
|
||||
router.at("/search").get(search::search_with_url_query);
|
||||
|
||||
router.at("/updates").nest(|router| {
|
||||
router.at("/").get(index::get_all_updates_status);
|
||||
|
||||
router.at("/:update_id").get(index::get_update_status);
|
||||
});
|
||||
|
||||
router
|
||||
.at("/")
|
||||
.get(index::get_index_schema)
|
||||
.post(index::create_index)
|
||||
.put(index::update_schema)
|
||||
.delete(index::delete_index);
|
||||
|
||||
router.at("/documents").nest(|router| {
|
||||
router
|
||||
.at("/")
|
||||
.get(document::browse_documents)
|
||||
.post(document::add_or_replace_multiple_documents)
|
||||
.put(document::add_or_update_multiple_documents)
|
||||
.delete(document::clear_all_documents);
|
||||
|
||||
router.at("/:identifier").nest(|router| {
|
||||
router
|
||||
.at("/")
|
||||
.get(document::get_document)
|
||||
.delete(document::delete_document);
|
||||
});
|
||||
|
||||
router
|
||||
.at("/delete")
|
||||
.post(document::delete_multiple_documents);
|
||||
});
|
||||
|
||||
router.at("/synonym").nest(|router| {
|
||||
router.at("/").get(synonym::list).post(synonym::create);
|
||||
|
||||
router
|
||||
.at("/:synonym")
|
||||
.get(synonym::get)
|
||||
.put(synonym::update)
|
||||
.delete(synonym::delete);
|
||||
|
||||
router.at("/batch").post(synonym::batch_write);
|
||||
router.at("/clear").post(synonym::clear);
|
||||
});
|
||||
|
||||
router.at("/stop-words").nest(|router| {
|
||||
router
|
||||
.at("/")
|
||||
.get(stop_words::list)
|
||||
.put(stop_words::add)
|
||||
.delete(stop_words::delete);
|
||||
});
|
||||
|
||||
router
|
||||
.at("/settings")
|
||||
.get(setting::get)
|
||||
.post(setting::update);
|
||||
});
|
||||
});
|
||||
|
||||
router.at("/keys").nest(|router| {
|
||||
router.at("/").get(key::list).post(key::create);
|
||||
|
||||
router
|
||||
.at("/:key")
|
||||
.get(key::get)
|
||||
.put(key::update)
|
||||
.delete(key::delete);
|
||||
});
|
||||
});
|
||||
|
||||
// Private
|
||||
app.at("").nest(|router| {
|
||||
router
|
||||
.at("/health")
|
||||
.get(health::get_health)
|
||||
.post(health::set_healthy)
|
||||
.put(health::change_healthyness)
|
||||
.delete(health::set_unhealthy);
|
||||
|
||||
router.at("/stats").get(stats::get_stats);
|
||||
router.at("/stats/:index").get(stats::index_stat);
|
||||
router.at("/version").get(stats::get_version);
|
||||
router.at("/sys-info").get(stats::get_sys_info);
|
||||
router
|
||||
.at("/sys-info/pretty")
|
||||
.get(stats::get_sys_info_pretty);
|
||||
});
|
||||
}
|
250
meilidb-http/src/routes/search.rs
Normal file
250
meilidb-http/src/routes/search.rs
Normal file
@ -0,0 +1,250 @@
|
||||
use std::collections::HashMap;
|
||||
use std::collections::HashSet;
|
||||
use std::time::Duration;
|
||||
|
||||
use meilidb_core::Index;
|
||||
use rayon::iter::{IntoParallelIterator, ParallelIterator};
|
||||
use serde::{Deserialize, Serialize};
|
||||
use tide::querystring::ContextExt as QSContextExt;
|
||||
use tide::{Context, Response};
|
||||
|
||||
use crate::error::{ResponseError, SResult};
|
||||
use crate::helpers::meilidb::{Error, IndexSearchExt, SearchHit};
|
||||
use crate::helpers::tide::ContextExt;
|
||||
use crate::Data;
|
||||
|
||||
#[derive(Deserialize)]
|
||||
#[serde(rename_all = "camelCase", deny_unknown_fields)]
|
||||
struct SearchQuery {
|
||||
q: String,
|
||||
offset: Option<usize>,
|
||||
limit: Option<usize>,
|
||||
attributes_to_retrieve: Option<String>,
|
||||
attributes_to_search_in: Option<String>,
|
||||
attributes_to_crop: Option<String>,
|
||||
crop_length: Option<usize>,
|
||||
attributes_to_highlight: Option<String>,
|
||||
filters: Option<String>,
|
||||
timeout_ms: Option<u64>,
|
||||
matches: Option<bool>,
|
||||
}
|
||||
|
||||
pub async fn search_with_url_query(ctx: Context<Data>) -> SResult<Response> {
|
||||
// ctx.is_allowed(DocumentsRead)?;
|
||||
|
||||
let index = ctx.index()?;
|
||||
let env = &ctx.state().db.env;
|
||||
let reader = env.read_txn().map_err(ResponseError::internal)?;
|
||||
|
||||
let schema = index
|
||||
.main
|
||||
.schema(&reader)
|
||||
.map_err(ResponseError::internal)?
|
||||
.ok_or(ResponseError::open_index("No Schema found"))?;
|
||||
|
||||
let query: SearchQuery = ctx
|
||||
.url_query()
|
||||
.map_err(|_| ResponseError::bad_request("invalid query parameter"))?;
|
||||
|
||||
let mut search_builder = index.new_search(query.q.clone());
|
||||
|
||||
if let Some(offset) = query.offset {
|
||||
search_builder.offset(offset);
|
||||
}
|
||||
if let Some(limit) = query.limit {
|
||||
search_builder.limit(limit);
|
||||
}
|
||||
|
||||
if let Some(attributes_to_retrieve) = query.attributes_to_retrieve {
|
||||
for attr in attributes_to_retrieve.split(',') {
|
||||
search_builder.add_retrievable_field(attr.to_string());
|
||||
}
|
||||
}
|
||||
if let Some(attributes_to_search_in) = query.attributes_to_search_in {
|
||||
for attr in attributes_to_search_in.split(',') {
|
||||
search_builder.add_attribute_to_search_in(attr.to_string());
|
||||
}
|
||||
}
|
||||
if let Some(attributes_to_crop) = query.attributes_to_crop {
|
||||
let crop_length = query.crop_length.unwrap_or(200);
|
||||
if attributes_to_crop == "*" {
|
||||
let attributes_to_crop = schema
|
||||
.iter()
|
||||
.map(|(attr, ..)| (attr.to_string(), crop_length))
|
||||
.collect();
|
||||
search_builder.attributes_to_crop(attributes_to_crop);
|
||||
} else {
|
||||
let attributes_to_crop = attributes_to_crop
|
||||
.split(',')
|
||||
.map(|r| (r.to_string(), crop_length))
|
||||
.collect();
|
||||
search_builder.attributes_to_crop(attributes_to_crop);
|
||||
}
|
||||
}
|
||||
|
||||
if let Some(attributes_to_highlight) = query.attributes_to_highlight {
|
||||
let attributes_to_highlight = if attributes_to_highlight == "*" {
|
||||
schema.iter().map(|(attr, ..)| attr.to_string()).collect()
|
||||
} else {
|
||||
attributes_to_highlight
|
||||
.split(',')
|
||||
.map(ToString::to_string)
|
||||
.collect()
|
||||
};
|
||||
|
||||
search_builder.attributes_to_highlight(attributes_to_highlight);
|
||||
}
|
||||
|
||||
if let Some(filters) = query.filters {
|
||||
search_builder.filters(filters);
|
||||
}
|
||||
|
||||
if let Some(timeout_ms) = query.timeout_ms {
|
||||
search_builder.timeout(Duration::from_millis(timeout_ms));
|
||||
}
|
||||
|
||||
if let Some(matches) = query.matches {
|
||||
if matches {
|
||||
search_builder.get_matches();
|
||||
}
|
||||
}
|
||||
|
||||
let response = match search_builder.search(&reader) {
|
||||
Ok(response) => response,
|
||||
Err(Error::Internal(message)) => return Err(ResponseError::Internal(message)),
|
||||
Err(others) => return Err(ResponseError::bad_request(others)),
|
||||
};
|
||||
|
||||
Ok(tide::response::json(response))
|
||||
}
|
||||
|
||||
#[derive(Clone, Deserialize)]
|
||||
#[serde(rename_all = "camelCase", deny_unknown_fields)]
|
||||
struct SearchMultiBody {
|
||||
indexes: HashSet<String>,
|
||||
query: String,
|
||||
offset: Option<usize>,
|
||||
limit: Option<usize>,
|
||||
attributes_to_retrieve: Option<HashSet<String>>,
|
||||
attributes_to_search_in: Option<HashSet<String>>,
|
||||
attributes_to_crop: Option<HashMap<String, usize>>,
|
||||
attributes_to_highlight: Option<HashSet<String>>,
|
||||
filters: Option<String>,
|
||||
timeout_ms: Option<u64>,
|
||||
matches: Option<bool>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
struct SearchMultiBodyResponse {
|
||||
hits: HashMap<String, Vec<SearchHit>>,
|
||||
offset: usize,
|
||||
hits_per_page: usize,
|
||||
processing_time_ms: usize,
|
||||
query: String,
|
||||
}
|
||||
|
||||
pub async fn search_multi_index(mut ctx: Context<Data>) -> SResult<Response> {
|
||||
// ctx.is_allowed(DocumentsRead)?;
|
||||
let body = ctx
|
||||
.body_json::<SearchMultiBody>()
|
||||
.await
|
||||
.map_err(ResponseError::bad_request)?;
|
||||
|
||||
let mut index_list = body.clone().indexes;
|
||||
|
||||
for index in index_list.clone() {
|
||||
if index == "*" {
|
||||
index_list = ctx
|
||||
.state()
|
||||
.db
|
||||
.indexes_names()
|
||||
.map_err(ResponseError::internal)?
|
||||
.into_iter()
|
||||
.collect();
|
||||
}
|
||||
}
|
||||
|
||||
let mut offset = 0;
|
||||
let mut count = 20;
|
||||
|
||||
if let Some(body_offset) = body.offset {
|
||||
if let Some(limit) = body.limit {
|
||||
offset = body_offset;
|
||||
count = limit;
|
||||
}
|
||||
}
|
||||
|
||||
let offset = offset;
|
||||
let count = count;
|
||||
let db = &ctx.state().db;
|
||||
let par_body = body.clone();
|
||||
let responses_per_index: Vec<SResult<_>> = index_list
|
||||
.into_par_iter()
|
||||
.map(move |index_name| {
|
||||
let index: Index = db
|
||||
.open_index(&index_name)
|
||||
.ok_or(ResponseError::index_not_found(&index_name))?;
|
||||
|
||||
let mut search_builder = index.new_search(par_body.query.clone());
|
||||
|
||||
search_builder.offset(offset);
|
||||
search_builder.limit(count);
|
||||
|
||||
if let Some(attributes_to_retrieve) = par_body.attributes_to_retrieve.clone() {
|
||||
search_builder.attributes_to_retrieve(attributes_to_retrieve);
|
||||
}
|
||||
if let Some(attributes_to_search_in) = par_body.attributes_to_search_in.clone() {
|
||||
search_builder.attributes_to_search_in(attributes_to_search_in);
|
||||
}
|
||||
if let Some(attributes_to_crop) = par_body.attributes_to_crop.clone() {
|
||||
search_builder.attributes_to_crop(attributes_to_crop);
|
||||
}
|
||||
if let Some(attributes_to_highlight) = par_body.attributes_to_highlight.clone() {
|
||||
search_builder.attributes_to_highlight(attributes_to_highlight);
|
||||
}
|
||||
if let Some(filters) = par_body.filters.clone() {
|
||||
search_builder.filters(filters);
|
||||
}
|
||||
if let Some(timeout_ms) = par_body.timeout_ms {
|
||||
search_builder.timeout(Duration::from_secs(timeout_ms));
|
||||
}
|
||||
if let Some(matches) = par_body.matches {
|
||||
if matches {
|
||||
search_builder.get_matches();
|
||||
}
|
||||
}
|
||||
|
||||
let env = &db.env;
|
||||
let reader = env.read_txn().map_err(ResponseError::internal)?;
|
||||
|
||||
let response = search_builder
|
||||
.search(&reader)
|
||||
.map_err(ResponseError::internal)?;
|
||||
Ok((index_name, response))
|
||||
})
|
||||
.collect();
|
||||
|
||||
let mut hits_map = HashMap::new();
|
||||
|
||||
let mut max_query_time = 0;
|
||||
|
||||
for response in responses_per_index {
|
||||
if let Ok((index_name, response)) = response {
|
||||
if response.processing_time_ms > max_query_time {
|
||||
max_query_time = response.processing_time_ms;
|
||||
}
|
||||
hits_map.insert(index_name, response.hits);
|
||||
}
|
||||
}
|
||||
|
||||
let response = SearchMultiBodyResponse {
|
||||
hits: hits_map,
|
||||
offset,
|
||||
hits_per_page: count,
|
||||
processing_time_ms: max_query_time,
|
||||
query: body.query,
|
||||
};
|
||||
|
||||
Ok(tide::response::json(response))
|
||||
}
|
93
meilidb-http/src/routes/setting.rs
Normal file
93
meilidb-http/src/routes/setting.rs
Normal file
@ -0,0 +1,93 @@
|
||||
use std::collections::{HashMap, HashSet};
|
||||
|
||||
use http::StatusCode;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use tide::response::IntoResponse;
|
||||
use tide::{Context, Response};
|
||||
|
||||
use crate::error::{ResponseError, SResult};
|
||||
use crate::helpers::tide::ContextExt;
|
||||
use crate::models::token::ACL::*;
|
||||
use crate::routes::document::IndexUpdateResponse;
|
||||
use crate::Data;
|
||||
|
||||
#[derive(Default, Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
|
||||
#[serde(rename_all = "camelCase", deny_unknown_fields)]
|
||||
pub struct SettingBody {
|
||||
pub stop_words: Option<StopWords>,
|
||||
pub ranking_order: Option<RankingOrder>,
|
||||
pub distinct_field: Option<DistinctField>,
|
||||
pub ranking_rules: Option<RankingRules>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Copy, Clone, PartialEq, Eq, Serialize, Deserialize)]
|
||||
#[serde(rename_all = "lowercase")]
|
||||
pub enum RankingOrdering {
|
||||
Asc,
|
||||
Dsc,
|
||||
}
|
||||
|
||||
pub type StopWords = HashSet<String>;
|
||||
pub type RankingOrder = Vec<String>;
|
||||
pub type DistinctField = String;
|
||||
pub type RankingRules = HashMap<String, RankingOrdering>;
|
||||
|
||||
pub async fn get(ctx: Context<Data>) -> SResult<Response> {
|
||||
ctx.is_allowed(SettingsRead)?;
|
||||
let index = ctx.index()?;
|
||||
|
||||
let env = &ctx.state().db.env;
|
||||
let reader = env.read_txn().map_err(ResponseError::internal)?;
|
||||
|
||||
let settings = match index.main.customs(&reader).unwrap() {
|
||||
Some(bytes) => bincode::deserialize(bytes).unwrap(),
|
||||
None => SettingBody::default(),
|
||||
};
|
||||
|
||||
Ok(tide::response::json(settings))
|
||||
}
|
||||
|
||||
pub async fn update(mut ctx: Context<Data>) -> SResult<Response> {
|
||||
ctx.is_allowed(SettingsWrite)?;
|
||||
|
||||
let settings: SettingBody = ctx.body_json().await.map_err(ResponseError::bad_request)?;
|
||||
|
||||
let index = ctx.index()?;
|
||||
|
||||
let env = &ctx.state().db.env;
|
||||
let mut writer = env.write_txn().map_err(ResponseError::internal)?;
|
||||
|
||||
let mut current_settings = match index.main.customs(&writer).unwrap() {
|
||||
Some(bytes) => bincode::deserialize(bytes).unwrap(),
|
||||
None => SettingBody::default(),
|
||||
};
|
||||
|
||||
if let Some(stop_words) = settings.stop_words {
|
||||
current_settings.stop_words = Some(stop_words);
|
||||
}
|
||||
|
||||
if let Some(ranking_order) = settings.ranking_order {
|
||||
current_settings.ranking_order = Some(ranking_order);
|
||||
}
|
||||
|
||||
if let Some(distinct_field) = settings.distinct_field {
|
||||
current_settings.distinct_field = Some(distinct_field);
|
||||
}
|
||||
|
||||
if let Some(ranking_rules) = settings.ranking_rules {
|
||||
current_settings.ranking_rules = Some(ranking_rules);
|
||||
}
|
||||
|
||||
let bytes = bincode::serialize(¤t_settings).unwrap();
|
||||
|
||||
let update_id = index
|
||||
.customs_update(&mut writer, bytes)
|
||||
.map_err(ResponseError::internal)?;
|
||||
|
||||
writer.commit().map_err(ResponseError::internal)?;
|
||||
|
||||
let response_body = IndexUpdateResponse { update_id };
|
||||
Ok(tide::response::json(response_body)
|
||||
.with_status(StatusCode::ACCEPTED)
|
||||
.into_response())
|
||||
}
|
334
meilidb-http/src/routes/stats.rs
Normal file
334
meilidb-http/src/routes/stats.rs
Normal file
@ -0,0 +1,334 @@
|
||||
use std::collections::HashMap;
|
||||
|
||||
use chrono::{DateTime, Utc};
|
||||
use pretty_bytes::converter::convert;
|
||||
use serde::Serialize;
|
||||
use sysinfo::{NetworkExt, Pid, ProcessExt, ProcessorExt, System, SystemExt};
|
||||
use tide::{Context, Response};
|
||||
use walkdir::WalkDir;
|
||||
|
||||
use crate::error::{ResponseError, SResult};
|
||||
use crate::helpers::tide::ContextExt;
|
||||
use crate::models::token::ACL::*;
|
||||
use crate::Data;
|
||||
|
||||
#[derive(Serialize)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
struct IndexStatsResponse {
|
||||
number_of_documents: u64,
|
||||
is_indexing: bool,
|
||||
last_update: Option<DateTime<Utc>>,
|
||||
fields_frequency: HashMap<String, usize>,
|
||||
}
|
||||
|
||||
pub async fn index_stat(ctx: Context<Data>) -> SResult<Response> {
|
||||
ctx.is_allowed(Admin)?;
|
||||
let index_name = ctx.url_param("index")?;
|
||||
let index = ctx.index()?;
|
||||
|
||||
let env = &ctx.state().db.env;
|
||||
let reader = env.read_txn().map_err(ResponseError::internal)?;
|
||||
|
||||
let number_of_documents = index
|
||||
.main
|
||||
.number_of_documents(&reader)
|
||||
.map_err(ResponseError::internal)?;
|
||||
|
||||
let fields_frequency = ctx
|
||||
.state()
|
||||
.fields_frequency(&reader, &index_name)
|
||||
.map_err(ResponseError::internal)?
|
||||
.unwrap_or_default();
|
||||
|
||||
let is_indexing = ctx
|
||||
.state()
|
||||
.is_indexing(&reader, &index_name)
|
||||
.map_err(ResponseError::internal)?
|
||||
.ok_or(ResponseError::not_found("Index not found"))?;
|
||||
|
||||
let last_update = ctx
|
||||
.state()
|
||||
.last_update(&reader, &index_name)
|
||||
.map_err(ResponseError::internal)?;
|
||||
|
||||
let response = IndexStatsResponse {
|
||||
number_of_documents,
|
||||
is_indexing,
|
||||
last_update,
|
||||
fields_frequency,
|
||||
};
|
||||
Ok(tide::response::json(response))
|
||||
}
|
||||
|
||||
#[derive(Serialize)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
struct StatsResult {
|
||||
database_size: u64,
|
||||
indexes: HashMap<String, IndexStatsResponse>,
|
||||
}
|
||||
|
||||
pub async fn get_stats(ctx: Context<Data>) -> SResult<Response> {
|
||||
ctx.is_allowed(Admin)?;
|
||||
let mut index_list = HashMap::new();
|
||||
|
||||
if let Ok(indexes_set) = ctx.state().db.indexes_names() {
|
||||
for index_name in indexes_set {
|
||||
let db = &ctx.state().db;
|
||||
let env = &db.env;
|
||||
|
||||
let index = db.open_index(&index_name).unwrap();
|
||||
let reader = env.read_txn().map_err(ResponseError::internal)?;
|
||||
|
||||
let number_of_documents = index
|
||||
.main
|
||||
.number_of_documents(&reader)
|
||||
.map_err(ResponseError::internal)?;
|
||||
|
||||
let fields_frequency = ctx
|
||||
.state()
|
||||
.fields_frequency(&reader, &index_name)
|
||||
.map_err(ResponseError::internal)?
|
||||
.unwrap_or_default();
|
||||
|
||||
let is_indexing = ctx
|
||||
.state()
|
||||
.is_indexing(&reader, &index_name)
|
||||
.map_err(ResponseError::internal)?
|
||||
.ok_or(ResponseError::not_found("Index not found"))?;
|
||||
|
||||
let last_update = ctx
|
||||
.state()
|
||||
.last_update(&reader, &index_name)
|
||||
.map_err(ResponseError::internal)?;
|
||||
|
||||
let response = IndexStatsResponse {
|
||||
number_of_documents,
|
||||
is_indexing,
|
||||
last_update,
|
||||
fields_frequency,
|
||||
};
|
||||
index_list.insert(index_name, response);
|
||||
}
|
||||
}
|
||||
|
||||
let database_size = WalkDir::new(ctx.state().db_path.clone())
|
||||
.into_iter()
|
||||
.filter_map(|entry| entry.ok())
|
||||
.filter_map(|entry| entry.metadata().ok())
|
||||
.filter(|metadata| metadata.is_file())
|
||||
.fold(0, |acc, m| acc + m.len());
|
||||
|
||||
let response = StatsResult {
|
||||
database_size,
|
||||
indexes: index_list,
|
||||
};
|
||||
|
||||
Ok(tide::response::json(response))
|
||||
}
|
||||
|
||||
#[derive(Serialize)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
struct VersionResponse {
|
||||
commit_sha: String,
|
||||
build_date: String,
|
||||
pkg_version: String,
|
||||
}
|
||||
|
||||
pub async fn get_version(ctx: Context<Data>) -> SResult<Response> {
|
||||
ctx.is_allowed(Admin)?;
|
||||
let response = VersionResponse {
|
||||
commit_sha: env!("VERGEN_SHA").to_string(),
|
||||
build_date: env!("VERGEN_BUILD_TIMESTAMP").to_string(),
|
||||
pkg_version: env!("CARGO_PKG_VERSION").to_string(),
|
||||
};
|
||||
|
||||
Ok(tide::response::json(response))
|
||||
}
|
||||
|
||||
#[derive(Serialize)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub(crate) struct SysGlobal {
|
||||
total_memory: u64,
|
||||
used_memory: u64,
|
||||
total_swap: u64,
|
||||
used_swap: u64,
|
||||
input_data: u64,
|
||||
output_data: u64,
|
||||
}
|
||||
|
||||
impl SysGlobal {
|
||||
fn new() -> SysGlobal {
|
||||
SysGlobal {
|
||||
total_memory: 0,
|
||||
used_memory: 0,
|
||||
total_swap: 0,
|
||||
used_swap: 0,
|
||||
input_data: 0,
|
||||
output_data: 0,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Serialize)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub(crate) struct SysProcess {
|
||||
memory: u64,
|
||||
cpu: f32,
|
||||
}
|
||||
|
||||
impl SysProcess {
|
||||
fn new() -> SysProcess {
|
||||
SysProcess {
|
||||
memory: 0,
|
||||
cpu: 0.0,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Serialize)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub(crate) struct SysInfo {
|
||||
memory_usage: f64,
|
||||
processor_usage: Vec<f32>,
|
||||
global: SysGlobal,
|
||||
process: SysProcess,
|
||||
}
|
||||
|
||||
impl SysInfo {
|
||||
fn new() -> SysInfo {
|
||||
SysInfo {
|
||||
memory_usage: 0.0,
|
||||
processor_usage: Vec::new(),
|
||||
global: SysGlobal::new(),
|
||||
process: SysProcess::new(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn report(pid: Pid) -> SysInfo {
|
||||
let mut sys = System::new();
|
||||
let mut info = SysInfo::new();
|
||||
|
||||
info.memory_usage = sys.get_used_memory() as f64 / sys.get_total_memory() as f64 * 100.0;
|
||||
|
||||
for processor in sys.get_processor_list() {
|
||||
info.processor_usage.push(processor.get_cpu_usage() * 100.0);
|
||||
}
|
||||
|
||||
info.global.total_memory = sys.get_total_memory();
|
||||
info.global.used_memory = sys.get_used_memory();
|
||||
info.global.total_swap = sys.get_total_swap();
|
||||
info.global.used_swap = sys.get_used_swap();
|
||||
info.global.input_data = sys.get_network().get_income();
|
||||
info.global.output_data = sys.get_network().get_outcome();
|
||||
|
||||
if let Some(process) = sys.get_process(pid) {
|
||||
info.process.memory = process.memory();
|
||||
info.process.cpu = process.cpu_usage() * 100.0;
|
||||
}
|
||||
|
||||
sys.refresh_all();
|
||||
|
||||
info
|
||||
}
|
||||
|
||||
pub async fn get_sys_info(ctx: Context<Data>) -> SResult<Response> {
|
||||
ctx.is_allowed(Admin)?;
|
||||
Ok(tide::response::json(report(ctx.state().server_pid)))
|
||||
}
|
||||
|
||||
#[derive(Serialize)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub(crate) struct SysGlobalPretty {
|
||||
total_memory: String,
|
||||
used_memory: String,
|
||||
total_swap: String,
|
||||
used_swap: String,
|
||||
input_data: String,
|
||||
output_data: String,
|
||||
}
|
||||
|
||||
impl SysGlobalPretty {
|
||||
fn new() -> SysGlobalPretty {
|
||||
SysGlobalPretty {
|
||||
total_memory: "None".to_owned(),
|
||||
used_memory: "None".to_owned(),
|
||||
total_swap: "None".to_owned(),
|
||||
used_swap: "None".to_owned(),
|
||||
input_data: "None".to_owned(),
|
||||
output_data: "None".to_owned(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Serialize)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub(crate) struct SysProcessPretty {
|
||||
memory: String,
|
||||
cpu: String,
|
||||
}
|
||||
|
||||
impl SysProcessPretty {
|
||||
fn new() -> SysProcessPretty {
|
||||
SysProcessPretty {
|
||||
memory: "None".to_owned(),
|
||||
cpu: "None".to_owned(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Serialize)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub(crate) struct SysInfoPretty {
|
||||
memory_usage: String,
|
||||
processor_usage: Vec<String>,
|
||||
global: SysGlobalPretty,
|
||||
process: SysProcessPretty,
|
||||
}
|
||||
|
||||
impl SysInfoPretty {
|
||||
fn new() -> SysInfoPretty {
|
||||
SysInfoPretty {
|
||||
memory_usage: "None".to_owned(),
|
||||
processor_usage: Vec::new(),
|
||||
global: SysGlobalPretty::new(),
|
||||
process: SysProcessPretty::new(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn report_pretty(pid: Pid) -> SysInfoPretty {
|
||||
let mut sys = System::new();
|
||||
let mut info = SysInfoPretty::new();
|
||||
|
||||
info.memory_usage = format!(
|
||||
"{:.1} %",
|
||||
sys.get_used_memory() as f64 / sys.get_total_memory() as f64 * 100.0
|
||||
);
|
||||
|
||||
for processor in sys.get_processor_list() {
|
||||
info.processor_usage
|
||||
.push(format!("{:.1} %", processor.get_cpu_usage() * 100.0));
|
||||
}
|
||||
|
||||
info.global.total_memory = convert(sys.get_total_memory() as f64 * 1024.0);
|
||||
info.global.used_memory = convert(sys.get_used_memory() as f64 * 1024.0);
|
||||
info.global.total_swap = convert(sys.get_total_swap() as f64 * 1024.0);
|
||||
info.global.used_swap = convert(sys.get_used_swap() as f64 * 1024.0);
|
||||
info.global.input_data = convert(sys.get_network().get_income() as f64);
|
||||
info.global.output_data = convert(sys.get_network().get_outcome() as f64);
|
||||
|
||||
if let Some(process) = sys.get_process(pid) {
|
||||
info.process.memory = convert(process.memory() as f64 * 1024.0);
|
||||
info.process.cpu = format!("{:.1} %", process.cpu_usage() * 100.0);
|
||||
}
|
||||
|
||||
sys.refresh_all();
|
||||
|
||||
info
|
||||
}
|
||||
|
||||
pub async fn get_sys_info_pretty(ctx: Context<Data>) -> SResult<Response> {
|
||||
ctx.is_allowed(Admin)?;
|
||||
Ok(tide::response::json(report_pretty(ctx.state().server_pid)))
|
||||
}
|
82
meilidb-http/src/routes/stop_words.rs
Normal file
82
meilidb-http/src/routes/stop_words.rs
Normal file
@ -0,0 +1,82 @@
|
||||
use http::StatusCode;
|
||||
use tide::response::IntoResponse;
|
||||
use tide::{Context, Response};
|
||||
|
||||
use crate::error::{ResponseError, SResult};
|
||||
use crate::helpers::tide::ContextExt;
|
||||
use crate::models::token::ACL::*;
|
||||
use crate::routes::document::IndexUpdateResponse;
|
||||
use crate::Data;
|
||||
|
||||
pub async fn list(ctx: Context<Data>) -> SResult<Response> {
|
||||
ctx.is_allowed(SettingsRead)?;
|
||||
let index = ctx.index()?;
|
||||
|
||||
let env = &ctx.state().db.env;
|
||||
let reader = env.read_txn().map_err(ResponseError::internal)?;
|
||||
|
||||
let stop_words_fst = index
|
||||
.main
|
||||
.stop_words_fst(&reader)
|
||||
.map_err(ResponseError::internal)?;
|
||||
|
||||
let stop_words = stop_words_fst
|
||||
.unwrap_or_default()
|
||||
.stream()
|
||||
.into_strs()
|
||||
.map_err(ResponseError::internal)?;
|
||||
|
||||
Ok(tide::response::json(stop_words))
|
||||
}
|
||||
|
||||
pub async fn add(mut ctx: Context<Data>) -> SResult<Response> {
|
||||
ctx.is_allowed(SettingsRead)?;
|
||||
let index = ctx.index()?;
|
||||
|
||||
let data: Vec<String> = ctx.body_json().await.map_err(ResponseError::bad_request)?;
|
||||
|
||||
let env = &ctx.state().db.env;
|
||||
let mut writer = env.write_txn().map_err(ResponseError::internal)?;
|
||||
|
||||
let mut stop_words_addition = index.stop_words_addition();
|
||||
for stop_word in data {
|
||||
stop_words_addition.add_stop_word(stop_word);
|
||||
}
|
||||
|
||||
let update_id = stop_words_addition
|
||||
.finalize(&mut writer)
|
||||
.map_err(ResponseError::internal)?;
|
||||
|
||||
writer.commit().map_err(ResponseError::internal)?;
|
||||
|
||||
let response_body = IndexUpdateResponse { update_id };
|
||||
Ok(tide::response::json(response_body)
|
||||
.with_status(StatusCode::ACCEPTED)
|
||||
.into_response())
|
||||
}
|
||||
|
||||
pub async fn delete(mut ctx: Context<Data>) -> SResult<Response> {
|
||||
ctx.is_allowed(SettingsRead)?;
|
||||
let index = ctx.index()?;
|
||||
|
||||
let data: Vec<String> = ctx.body_json().await.map_err(ResponseError::bad_request)?;
|
||||
|
||||
let env = &ctx.state().db.env;
|
||||
let mut writer = env.write_txn().map_err(ResponseError::internal)?;
|
||||
|
||||
let mut stop_words_deletion = index.stop_words_deletion();
|
||||
for stop_word in data {
|
||||
stop_words_deletion.delete_stop_word(stop_word);
|
||||
}
|
||||
|
||||
let update_id = stop_words_deletion
|
||||
.finalize(&mut writer)
|
||||
.map_err(ResponseError::internal)?;
|
||||
|
||||
writer.commit().map_err(ResponseError::internal)?;
|
||||
|
||||
let response_body = IndexUpdateResponse { update_id };
|
||||
Ok(tide::response::json(response_body)
|
||||
.with_status(StatusCode::ACCEPTED)
|
||||
.into_response())
|
||||
}
|
235
meilidb-http/src/routes/synonym.rs
Normal file
235
meilidb-http/src/routes/synonym.rs
Normal file
@ -0,0 +1,235 @@
|
||||
use std::collections::HashMap;
|
||||
|
||||
use http::StatusCode;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use tide::response::IntoResponse;
|
||||
use tide::{Context, Response};
|
||||
|
||||
use crate::error::{ResponseError, SResult};
|
||||
use crate::helpers::tide::ContextExt;
|
||||
use crate::models::token::ACL::*;
|
||||
use crate::routes::document::IndexUpdateResponse;
|
||||
use crate::Data;
|
||||
|
||||
#[derive(Clone, Serialize, Deserialize)]
|
||||
#[serde(untagged)]
|
||||
pub enum Synonym {
|
||||
OneWay(SynonymOneWay),
|
||||
MultiWay { synonyms: Vec<String> },
|
||||
}
|
||||
|
||||
#[derive(Clone, Serialize, Deserialize)]
|
||||
#[serde(rename_all = "camelCase", deny_unknown_fields)]
|
||||
pub struct SynonymOneWay {
|
||||
pub input: String,
|
||||
pub synonyms: Vec<String>,
|
||||
}
|
||||
|
||||
pub type Synonyms = Vec<Synonym>;
|
||||
|
||||
pub async fn list(ctx: Context<Data>) -> SResult<Response> {
|
||||
ctx.is_allowed(SettingsRead)?;
|
||||
let index = ctx.index()?;
|
||||
|
||||
let env = &ctx.state().db.env;
|
||||
let reader = env.read_txn().map_err(ResponseError::internal)?;
|
||||
|
||||
let synonyms_fst = index
|
||||
.main
|
||||
.synonyms_fst(&reader)
|
||||
.map_err(ResponseError::internal)?;
|
||||
|
||||
let synonyms_fst = synonyms_fst.unwrap_or_default();
|
||||
let synonyms_list = synonyms_fst.stream().into_strs().unwrap();
|
||||
|
||||
let mut response = HashMap::new();
|
||||
|
||||
let index_synonyms = &index.synonyms;
|
||||
|
||||
for synonym in synonyms_list {
|
||||
let alternative_list = index_synonyms
|
||||
.synonyms(&reader, synonym.as_bytes())
|
||||
.unwrap()
|
||||
.unwrap()
|
||||
.stream()
|
||||
.into_strs()
|
||||
.unwrap();
|
||||
response.insert(synonym, alternative_list);
|
||||
}
|
||||
|
||||
Ok(tide::response::json(response))
|
||||
}
|
||||
|
||||
pub async fn get(ctx: Context<Data>) -> SResult<Response> {
|
||||
ctx.is_allowed(SettingsRead)?;
|
||||
let synonym = ctx.url_param("synonym")?;
|
||||
let index = ctx.index()?;
|
||||
|
||||
let env = &ctx.state().db.env;
|
||||
let reader = env.read_txn().map_err(ResponseError::internal)?;
|
||||
|
||||
let synonym_list = index
|
||||
.synonyms
|
||||
.synonyms(&reader, synonym.as_bytes())
|
||||
.unwrap()
|
||||
.unwrap()
|
||||
.stream()
|
||||
.into_strs()
|
||||
.unwrap();
|
||||
|
||||
Ok(tide::response::json(synonym_list))
|
||||
}
|
||||
|
||||
pub async fn create(mut ctx: Context<Data>) -> SResult<Response> {
|
||||
ctx.is_allowed(SettingsWrite)?;
|
||||
|
||||
let data: Synonym = ctx.body_json().await.map_err(ResponseError::bad_request)?;
|
||||
|
||||
let index = ctx.index()?;
|
||||
|
||||
let env = &ctx.state().db.env;
|
||||
let mut writer = env.write_txn().map_err(ResponseError::internal)?;
|
||||
|
||||
let mut synonyms_addition = index.synonyms_addition();
|
||||
|
||||
match data.clone() {
|
||||
Synonym::OneWay(content) => {
|
||||
synonyms_addition.add_synonym(content.input, content.synonyms.into_iter())
|
||||
}
|
||||
Synonym::MultiWay { mut synonyms } => {
|
||||
if synonyms.len() > 1 {
|
||||
for _ in 0..synonyms.len() {
|
||||
let (first, elems) = synonyms.split_first().unwrap();
|
||||
synonyms_addition.add_synonym(first, elems.iter());
|
||||
synonyms.rotate_left(1);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let update_id = synonyms_addition
|
||||
.finalize(&mut writer)
|
||||
.map_err(ResponseError::internal)?;
|
||||
|
||||
writer.commit().map_err(ResponseError::internal)?;
|
||||
|
||||
let response_body = IndexUpdateResponse { update_id };
|
||||
Ok(tide::response::json(response_body)
|
||||
.with_status(StatusCode::CREATED)
|
||||
.into_response())
|
||||
}
|
||||
|
||||
pub async fn update(mut ctx: Context<Data>) -> SResult<Response> {
|
||||
ctx.is_allowed(SettingsWrite)?;
|
||||
let synonym = ctx.url_param("synonym")?;
|
||||
let index = ctx.index()?;
|
||||
let data: Vec<String> = ctx.body_json().await.map_err(ResponseError::bad_request)?;
|
||||
|
||||
let env = &ctx.state().db.env;
|
||||
let mut writer = env.write_txn().map_err(ResponseError::internal)?;
|
||||
|
||||
let mut synonyms_addition = index.synonyms_addition();
|
||||
synonyms_addition.add_synonym(synonym.clone(), data.clone().into_iter());
|
||||
let update_id = synonyms_addition
|
||||
.finalize(&mut writer)
|
||||
.map_err(ResponseError::internal)?;
|
||||
|
||||
writer.commit().map_err(ResponseError::internal)?;
|
||||
|
||||
let response_body = IndexUpdateResponse { update_id };
|
||||
Ok(tide::response::json(response_body)
|
||||
.with_status(StatusCode::ACCEPTED)
|
||||
.into_response())
|
||||
}
|
||||
|
||||
pub async fn delete(ctx: Context<Data>) -> SResult<Response> {
|
||||
ctx.is_allowed(SettingsWrite)?;
|
||||
let synonym = ctx.url_param("synonym")?;
|
||||
let index = ctx.index()?;
|
||||
|
||||
let env = &ctx.state().db.env;
|
||||
let mut writer = env.write_txn().map_err(ResponseError::internal)?;
|
||||
|
||||
let mut synonyms_deletion = index.synonyms_deletion();
|
||||
synonyms_deletion.delete_all_alternatives_of(synonym);
|
||||
let update_id = synonyms_deletion
|
||||
.finalize(&mut writer)
|
||||
.map_err(ResponseError::internal)?;
|
||||
|
||||
writer.commit().map_err(ResponseError::internal)?;
|
||||
|
||||
let response_body = IndexUpdateResponse { update_id };
|
||||
Ok(tide::response::json(response_body)
|
||||
.with_status(StatusCode::ACCEPTED)
|
||||
.into_response())
|
||||
}
|
||||
|
||||
pub async fn batch_write(mut ctx: Context<Data>) -> SResult<Response> {
|
||||
ctx.is_allowed(SettingsWrite)?;
|
||||
|
||||
let data: Synonyms = ctx.body_json().await.map_err(ResponseError::bad_request)?;
|
||||
|
||||
let index = ctx.index()?;
|
||||
|
||||
let env = &ctx.state().db.env;
|
||||
let mut writer = env.write_txn().map_err(ResponseError::internal)?;
|
||||
|
||||
let mut synonyms_addition = index.synonyms_addition();
|
||||
for raw in data {
|
||||
match raw {
|
||||
Synonym::OneWay(content) => {
|
||||
synonyms_addition.add_synonym(content.input, content.synonyms.into_iter())
|
||||
}
|
||||
Synonym::MultiWay { mut synonyms } => {
|
||||
if synonyms.len() > 1 {
|
||||
for _ in 0..synonyms.len() {
|
||||
let (first, elems) = synonyms.split_first().unwrap();
|
||||
synonyms_addition.add_synonym(first, elems.iter());
|
||||
synonyms.rotate_left(1);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
let update_id = synonyms_addition
|
||||
.finalize(&mut writer)
|
||||
.map_err(ResponseError::internal)?;
|
||||
|
||||
writer.commit().map_err(ResponseError::internal)?;
|
||||
|
||||
let response_body = IndexUpdateResponse { update_id };
|
||||
Ok(tide::response::json(response_body)
|
||||
.with_status(StatusCode::ACCEPTED)
|
||||
.into_response())
|
||||
}
|
||||
|
||||
pub async fn clear(ctx: Context<Data>) -> SResult<Response> {
|
||||
ctx.is_allowed(SettingsWrite)?;
|
||||
let index = ctx.index()?;
|
||||
|
||||
let env = &ctx.state().db.env;
|
||||
let mut writer = env.write_txn().map_err(ResponseError::internal)?;
|
||||
|
||||
let synonyms_fst = index
|
||||
.main
|
||||
.synonyms_fst(&writer)
|
||||
.map_err(ResponseError::internal)?;
|
||||
|
||||
let synonyms_fst = synonyms_fst.unwrap_or_default();
|
||||
let synonyms_list = synonyms_fst.stream().into_strs().unwrap();
|
||||
|
||||
let mut synonyms_deletion = index.synonyms_deletion();
|
||||
for synonym in synonyms_list {
|
||||
synonyms_deletion.delete_all_alternatives_of(synonym);
|
||||
}
|
||||
let update_id = synonyms_deletion
|
||||
.finalize(&mut writer)
|
||||
.map_err(ResponseError::internal)?;
|
||||
|
||||
writer.commit().map_err(ResponseError::internal)?;
|
||||
|
||||
let response_body = IndexUpdateResponse { update_id };
|
||||
Ok(tide::response::json(response_body)
|
||||
.with_status(StatusCode::ACCEPTED)
|
||||
.into_response())
|
||||
}
|
@ -1,6 +1,6 @@
|
||||
[package]
|
||||
name = "meilidb-schema"
|
||||
version = "0.1.0"
|
||||
version = "0.6.0"
|
||||
authors = ["Kerollmops <renault.cle@gmail.com>"]
|
||||
edition = "2018"
|
||||
|
||||
|
@ -22,7 +22,7 @@ pub const RANKED: SchemaProps = SchemaProps {
|
||||
ranked: true,
|
||||
};
|
||||
|
||||
#[derive(Debug, Copy, Clone, PartialEq, Eq, Serialize, Deserialize)]
|
||||
#[derive(Copy, Clone, PartialEq, Eq, Serialize, Deserialize)]
|
||||
pub struct SchemaProps {
|
||||
#[serde(default)]
|
||||
pub displayed: bool,
|
||||
@ -60,6 +60,36 @@ impl BitOr for SchemaProps {
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Debug for SchemaProps {
|
||||
#[allow(non_camel_case_types)]
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
#[derive(Debug)]
|
||||
struct DISPLAYED;
|
||||
|
||||
#[derive(Debug)]
|
||||
struct INDEXED;
|
||||
|
||||
#[derive(Debug)]
|
||||
struct RANKED;
|
||||
|
||||
let mut debug_set = f.debug_set();
|
||||
|
||||
if self.displayed {
|
||||
debug_set.entry(&DISPLAYED);
|
||||
}
|
||||
|
||||
if self.indexed {
|
||||
debug_set.entry(&INDEXED);
|
||||
}
|
||||
|
||||
if self.ranked {
|
||||
debug_set.entry(&RANKED);
|
||||
}
|
||||
|
||||
debug_set.finish()
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize)]
|
||||
pub struct SchemaBuilder {
|
||||
identifier: String,
|
||||
@ -102,12 +132,12 @@ impl SchemaBuilder {
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||
#[derive(Clone, PartialEq, Eq)]
|
||||
pub struct Schema {
|
||||
inner: Arc<InnerSchema>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||
#[derive(Clone, PartialEq, Eq)]
|
||||
struct InnerSchema {
|
||||
identifier: String,
|
||||
attrs: HashMap<String, SchemaAttr>,
|
||||
@ -139,6 +169,10 @@ impl Schema {
|
||||
attributes
|
||||
}
|
||||
|
||||
pub fn number_of_attributes(&self) -> usize {
|
||||
self.inner.attrs.len()
|
||||
}
|
||||
|
||||
pub fn props(&self, attr: SchemaAttr) -> SchemaProps {
|
||||
let (_, props) = self.inner.props[attr.0 as usize];
|
||||
props
|
||||
@ -184,6 +218,16 @@ impl<'de> Deserialize<'de> for Schema {
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Debug for Schema {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
let builder = self.to_builder();
|
||||
f.debug_struct("Schema")
|
||||
.field("identifier", &builder.identifier)
|
||||
.field("attributes", &builder.attributes)
|
||||
.finish()
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug, Copy, Clone, PartialOrd, Ord, PartialEq, Eq, Hash)]
|
||||
pub struct SchemaAttr(pub u16);
|
||||
|
||||
@ -447,4 +491,43 @@ mod tests {
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn debug_output() {
|
||||
use std::fmt::Write as _;
|
||||
|
||||
let mut builder = SchemaBuilder::with_identifier("id");
|
||||
builder.new_attribute("alpha", DISPLAYED);
|
||||
builder.new_attribute("beta", DISPLAYED | INDEXED);
|
||||
builder.new_attribute("gamma", INDEXED);
|
||||
let schema = builder.build();
|
||||
|
||||
let mut output = String::new();
|
||||
let _ = write!(&mut output, "{:#?}", schema);
|
||||
|
||||
let expected = r#"Schema {
|
||||
identifier: "id",
|
||||
attributes: {
|
||||
"alpha": {
|
||||
DISPLAYED,
|
||||
},
|
||||
"beta": {
|
||||
DISPLAYED,
|
||||
INDEXED,
|
||||
},
|
||||
"gamma": {
|
||||
INDEXED,
|
||||
},
|
||||
},
|
||||
}"#;
|
||||
|
||||
assert_eq!(output, expected);
|
||||
|
||||
let mut output = String::new();
|
||||
let _ = write!(&mut output, "{:?}", schema);
|
||||
|
||||
let expected = r#"Schema { identifier: "id", attributes: {"alpha": {DISPLAYED}, "beta": {DISPLAYED, INDEXED}, "gamma": {INDEXED}} }"#;
|
||||
|
||||
assert_eq!(output, expected);
|
||||
}
|
||||
}
|
||||
|
@ -1,8 +1,9 @@
|
||||
[package]
|
||||
name = "meilidb-tokenizer"
|
||||
version = "0.1.0"
|
||||
version = "0.6.1"
|
||||
authors = ["Kerollmops <renault.cle@gmail.com>"]
|
||||
edition = "2018"
|
||||
|
||||
[dependencies]
|
||||
deunicode = "1.0.0"
|
||||
slice-group-by = "0.2.4"
|
||||
|
@ -1,4 +1,5 @@
|
||||
use self::SeparatorCategory::*;
|
||||
use deunicode::deunicode_char;
|
||||
use slice_group_by::StrGroupBy;
|
||||
use std::iter::Peekable;
|
||||
|
||||
@ -43,7 +44,10 @@ fn is_separator(c: char) -> bool {
|
||||
|
||||
fn classify_separator(c: char) -> Option<SeparatorCategory> {
|
||||
match c {
|
||||
' ' | '-' | '_' | '\'' | ':' | '"' => Some(Soft),
|
||||
c if c.is_whitespace() => Some(Soft), // whitespaces
|
||||
c if deunicode_char(c) == Some("'") => Some(Soft), // quotes
|
||||
c if deunicode_char(c) == Some("\"") => Some(Soft), // double quotes
|
||||
'-' | '_' | '\'' | ':' | '/' | '\\' => Some(Soft),
|
||||
'.' | ';' | ',' | '!' | '?' | '(' | ')' => Some(Hard),
|
||||
_ => None,
|
||||
}
|
||||
|
17
meilidb-types/Cargo.toml
Normal file
17
meilidb-types/Cargo.toml
Normal file
@ -0,0 +1,17 @@
|
||||
[package]
|
||||
name = "meilidb-types"
|
||||
version = "0.1.0"
|
||||
authors = ["Clément Renault <renault.cle@gmail.com>"]
|
||||
edition = "2018"
|
||||
|
||||
[dependencies.zerocopy]
|
||||
version = "0.2.8"
|
||||
optional = true
|
||||
|
||||
[dependencies.serde]
|
||||
version = "1.0.101"
|
||||
features = ["derive"]
|
||||
optional = true
|
||||
|
||||
[features]
|
||||
default = ["serde", "zerocopy"]
|
65
meilidb-types/src/lib.rs
Normal file
65
meilidb-types/src/lib.rs
Normal file
@ -0,0 +1,65 @@
|
||||
#[cfg(feature = "zerocopy")]
|
||||
use zerocopy::{AsBytes, FromBytes};
|
||||
|
||||
#[cfg(feature = "serde")]
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
/// Represent an internally generated document unique identifier.
|
||||
///
|
||||
/// It is used to inform the database the document you want to deserialize.
|
||||
/// Helpful for custom ranking.
|
||||
#[derive(Debug, Copy, Clone, Eq, PartialEq, PartialOrd, Ord, Hash)]
|
||||
#[cfg_attr(feature = "zerocopy", derive(AsBytes, FromBytes))]
|
||||
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
|
||||
#[repr(C)]
|
||||
pub struct DocumentId(pub u64);
|
||||
|
||||
/// This structure represent the position of a word
|
||||
/// in a document and its attributes.
|
||||
///
|
||||
/// This is stored in the map, generated at index time,
|
||||
/// extracted and interpreted at search time.
|
||||
#[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
|
||||
#[cfg_attr(feature = "zerocopy", derive(AsBytes, FromBytes))]
|
||||
#[repr(C)]
|
||||
pub struct DocIndex {
|
||||
/// The document identifier where the word was found.
|
||||
pub document_id: DocumentId,
|
||||
|
||||
/// The attribute in the document where the word was found
|
||||
/// along with the index in it.
|
||||
pub attribute: u16,
|
||||
pub word_index: u16,
|
||||
|
||||
/// The position in bytes where the word was found
|
||||
/// along with the length of it.
|
||||
///
|
||||
/// It informs on the original word area in the text indexed
|
||||
/// without needing to run the tokenizer again.
|
||||
pub char_index: u16,
|
||||
pub char_length: u16,
|
||||
}
|
||||
|
||||
/// This structure represent a matching word with informations
|
||||
/// on the location of the word in the document.
|
||||
///
|
||||
/// The order of the field is important because it defines
|
||||
/// the way these structures are ordered between themselves.
|
||||
#[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
|
||||
pub struct Highlight {
|
||||
/// The attribute in the document where the word was found
|
||||
/// along with the index in it.
|
||||
pub attribute: u16,
|
||||
|
||||
/// The position in bytes where the word was found.
|
||||
///
|
||||
/// It informs on the original word area in the text indexed
|
||||
/// without needing to run the tokenizer again.
|
||||
pub char_index: u16,
|
||||
|
||||
/// The length in bytes of the found word.
|
||||
///
|
||||
/// It informs on the original word area in the text indexed
|
||||
/// without needing to run the tokenizer again.
|
||||
pub char_length: u16,
|
||||
}
|
BIN
misc/crates-io-demo.gif
Normal file
BIN
misc/crates-io-demo.gif
Normal file
Binary file not shown.
After Width: | Height: | Size: 7.2 MiB |
Reference in New Issue
Block a user