Rename MeiliDB into MeiliSearch

This commit is contained in:
Clément Renault
2019-11-26 11:06:55 +01:00
parent 58eaf78dc4
commit 7cc096e0a2
94 changed files with 126 additions and 126 deletions

View File

@@ -14,11 +14,11 @@ jobs:
os: [ubuntu-latest, macos-latest]
include:
- os: ubuntu-latest
artifact_name: meilidb-http
asset_name: meilidb-http-linux-amd64
artifact_name: meilisearch-http
asset_name: meilisearch-http-linux-amd64
- os: macos-latest
artifact_name: meilidb-http
asset_name: meilidb-http-macos-amd64
artifact_name: meilisearch-http
asset_name: meilisearch-http-macos-amd64
steps:
- uses: hecrj/setup-rust-action@master

View File

@@ -17,4 +17,4 @@ jobs:
name: getmeili/meilisearch
username: ${{ secrets.DOCKER_USERNAME }}
password: ${{ secrets.DOCKER_PASSWORD }}
tags: true
tags: true

View File

@@ -5,7 +5,7 @@ name: Cargo test
jobs:
check:
name: MeiliDB
name: MeiliSearch
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v1

20
Cargo.lock generated
View File

@@ -930,7 +930,7 @@ version = "2.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "meilidb-core"
name = "meilisearch-core"
version = "0.8.0"
dependencies = [
"arc-swap 0.4.3 (registry+https://github.com/rust-lang/crates.io-index)",
@@ -948,9 +948,9 @@ dependencies = [
"indexmap 1.3.0 (registry+https://github.com/rust-lang/crates.io-index)",
"levenshtein_automata 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)",
"log 0.4.8 (registry+https://github.com/rust-lang/crates.io-index)",
"meilidb-schema 0.8.0",
"meilidb-tokenizer 0.8.0",
"meilidb-types 0.8.0",
"meilisearch-schema 0.8.0",
"meilisearch-tokenizer 0.8.0",
"meilisearch-types 0.8.0",
"once_cell 1.2.0 (registry+https://github.com/rust-lang/crates.io-index)",
"ordered-float 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)",
"rustyline 5.0.4 (registry+https://github.com/rust-lang/crates.io-index)",
@@ -967,7 +967,7 @@ dependencies = [
]
[[package]]
name = "meilidb-http"
name = "meilisearch-http"
version = "0.8.0"
dependencies = [
"async-compression 0.1.0-alpha.7 (registry+https://github.com/rust-lang/crates.io-index)",
@@ -982,8 +982,8 @@ dependencies = [
"jemallocator 0.3.2 (registry+https://github.com/rust-lang/crates.io-index)",
"log 0.4.8 (registry+https://github.com/rust-lang/crates.io-index)",
"main_error 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)",
"meilidb-core 0.8.0",
"meilidb-schema 0.8.0",
"meilisearch-core 0.8.0",
"meilisearch-schema 0.8.0",
"pretty-bytes 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)",
"rand 0.7.2 (registry+https://github.com/rust-lang/crates.io-index)",
"rayon 1.2.0 (registry+https://github.com/rust-lang/crates.io-index)",
@@ -1003,7 +1003,7 @@ dependencies = [
]
[[package]]
name = "meilidb-schema"
name = "meilisearch-schema"
version = "0.8.0"
dependencies = [
"bincode 1.2.0 (registry+https://github.com/rust-lang/crates.io-index)",
@@ -1014,7 +1014,7 @@ dependencies = [
]
[[package]]
name = "meilidb-tokenizer"
name = "meilisearch-tokenizer"
version = "0.8.0"
dependencies = [
"deunicode 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)",
@@ -1022,7 +1022,7 @@ dependencies = [
]
[[package]]
name = "meilidb-types"
name = "meilisearch-types"
version = "0.8.0"
dependencies = [
"serde 1.0.102 (registry+https://github.com/rust-lang/crates.io-index)",

View File

@@ -1,10 +1,10 @@
[workspace]
members = [
"meilidb-core",
"meilidb-http",
"meilidb-schema",
"meilidb-tokenizer",
"meilidb-types",
"meilisearch-core",
"meilisearch-http",
"meilisearch-schema",
"meilisearch-tokenizer",
"meilisearch-types",
]
[profile.release]

View File

@@ -23,7 +23,7 @@ RUN apk update --quiet
RUN apk add libressl
RUN apk add build-base
COPY --from=compiler /meilisearch/target/release/meilidb-http .
COPY --from=compiler /meilisearch/target/release/meilisearch-http .
ENV MEILI_HTTP_ADDR 0.0.0.0:8080
CMD ./meilidb-http
CMD ./meilisearch-http

View File

@@ -1,7 +1,7 @@
# MeiliDB
# MeiliSearch
[![Build Status](https://github.com/meilisearch/MeiliDB/workflows/Cargo%20test/badge.svg)](https://github.com/meilisearch/MeiliDB/actions)
[![dependency status](https://deps.rs/repo/github/meilisearch/MeiliDB/status.svg)](https://deps.rs/repo/github/meilisearch/MeiliDB)
[![Build Status](https://github.com/meilisearch/MeiliSearch/workflows/Cargo%20test/badge.svg)](https://github.com/meilisearch/MeiliSearch/actions)
[![dependency status](https://deps.rs/repo/github/meilisearch/MeiliSearch/status.svg)](https://deps.rs/repo/github/meilisearch/MeiliSearch)
[![License](https://img.shields.io/badge/license-commons%20clause-lightgrey)](https://commonsclause.com/)
Ultra relevant and instant full-text search API.
@@ -10,24 +10,24 @@ MeiliSearch is a powerful, fast, open-source, easy to use and deploy search engi
## Features
- Provides [6 default ranking criteria](https://github.com/meilisearch/MeiliDB/blob/dc5c42821e1340e96cb90a3da472264624a26326/meilidb-core/src/criterion/mod.rs#L107-L113) used to [bucket sort](https://en.wikipedia.org/wiki/Bucket_sort) documents
- Accepts [custom criteria](https://github.com/meilisearch/MeiliDB/blob/dc5c42821e1340e96cb90a3da472264624a26326/meilidb-core/src/criterion/mod.rs#L24-L33) and can apply them in any custom order
- Support [ranged queries](https://github.com/meilisearch/MeiliDB/blob/dc5c42821e1340e96cb90a3da472264624a26326/meilidb-core/src/query_builder.rs#L283), useful for paginating results
- Can [distinct](https://github.com/meilisearch/MeiliDB/blob/dc5c42821e1340e96cb90a3da472264624a26326/meilidb-core/src/query_builder.rs#L265-L270) and [filter](https://github.com/meilisearch/MeiliDB/blob/dc5c42821e1340e96cb90a3da472264624a26326/meilidb-core/src/query_builder.rs#L246-L259) returned documents based on context defined rules
- Searches for [concatenated](https://github.com/meilisearch/MeiliDB/pull/164) and [splitted query words](https://github.com/meilisearch/MeiliDB/pull/232) to improve the search quality.
- Can store complete documents or only [user schema specified fields](https://github.com/meilisearch/MeiliDB/blob/dc5c42821e1340e96cb90a3da472264624a26326/meilidb-schema/src/lib.rs#L265-L279)
- The [default tokenizer](https://github.com/meilisearch/MeiliDB/blob/dc5c42821e1340e96cb90a3da472264624a26326/meilidb-tokenizer/src/lib.rs) can index latin and kanji based languages
- Returns [the matching text areas](https://github.com/meilisearch/MeiliDB/blob/dc5c42821e1340e96cb90a3da472264624a26326/meilidb-core/src/lib.rs#L66-L88), useful to highlight matched words in results
- Accepts query time search config like the [searchable attributes](https://github.com/meilisearch/MeiliDB/blob/dc5c42821e1340e96cb90a3da472264624a26326/meilidb-core/src/query_builder.rs#L272-L275)
- Supports [runtime incremental indexing](https://github.com/meilisearch/MeiliDB/blob/dc5c42821e1340e96cb90a3da472264624a26326/meilidb-core/src/store/mod.rs#L143-L173)
- Provides [6 default ranking criteria](https://github.com/meilisearch/MeiliSearch/blob/dc5c42821e1340e96cb90a3da472264624a26326/meilisearch-core/src/criterion/mod.rs#L107-L113) used to [bucket sort](https://en.wikipedia.org/wiki/Bucket_sort) documents
- Accepts [custom criteria](https://github.com/meilisearch/MeiliSearch/blob/dc5c42821e1340e96cb90a3da472264624a26326/meilisearch-core/src/criterion/mod.rs#L24-L33) and can apply them in any custom order
- Support [ranged queries](https://github.com/meilisearch/MeiliSearch/blob/dc5c42821e1340e96cb90a3da472264624a26326/meilisearch-core/src/query_builder.rs#L283), useful for paginating results
- Can [distinct](https://github.com/meilisearch/MeiliSearch/blob/dc5c42821e1340e96cb90a3da472264624a26326/meilisearch-core/src/query_builder.rs#L265-L270) and [filter](https://github.com/meilisearch/MeiliSearch/blob/dc5c42821e1340e96cb90a3da472264624a26326/meilisearch-core/src/query_builder.rs#L246-L259) returned documents based on context defined rules
- Searches for [concatenated](https://github.com/meilisearch/MeiliSearch/pull/164) and [splitted query words](https://github.com/meilisearch/MeiliSearch/pull/232) to improve the search quality.
- Can store complete documents or only [user schema specified fields](https://github.com/meilisearch/MeiliSearch/blob/dc5c42821e1340e96cb90a3da472264624a26326/meilisearch-schema/src/lib.rs#L265-L279)
- The [default tokenizer](https://github.com/meilisearch/MeiliSearch/blob/dc5c42821e1340e96cb90a3da472264624a26326/meilisearch-tokenizer/src/lib.rs) can index latin and kanji based languages
- Returns [the matching text areas](https://github.com/meilisearch/MeiliSearch/blob/dc5c42821e1340e96cb90a3da472264624a26326/meilisearch-core/src/lib.rs#L66-L88), useful to highlight matched words in results
- Accepts query time search config like the [searchable attributes](https://github.com/meilisearch/MeiliSearch/blob/dc5c42821e1340e96cb90a3da472264624a26326/meilisearch-core/src/query_builder.rs#L272-L275)
- Supports [runtime incremental indexing](https://github.com/meilisearch/MeiliSearch/blob/dc5c42821e1340e96cb90a3da472264624a26326/meilisearch-core/src/store/mod.rs#L143-L173)
It uses [LMDB](https://en.wikipedia.org/wiki/Lightning_Memory-Mapped_Database) as the internal key-value store. The key-value store allows us to handle updates and queries with small memory and CPU overheads. The whole ranking system is [data oriented](https://github.com/meilisearch/MeiliDB/issues/82) and provides great performances.
It uses [LMDB](https://en.wikipedia.org/wiki/Lightning_Memory-Mapped_Database) as the internal key-value store. The key-value store allows us to handle updates and queries with small memory and CPU overheads. The whole ranking system is [data oriented](https://github.com/meilisearch/MeiliSearch/issues/82) and provides great performances.
You can [read the deep dive](deep-dive.md) if you want more information on the engine, it describes the whole process of generating updates and handling queries or you can take a look at the [typos and ranking rules](typos-ranking-rules.md) if you want to know the default rules used to sort the documents.
We will be glad if you submit issues and pull requests. You can help to grow this project and start contributing by checking [issues tagged "good-first-issue"](https://github.com/meilisearch/MeiliDB/issues?q=is%3Aissue+is%3Aopen+label%3A%22good+first+issue%22). It is a good start!
We will be glad if you submit issues and pull requests. You can help to grow this project and start contributing by checking [issues tagged "good-first-issue"](https://github.com/meilisearch/MeiliSearch/issues?q=is%3Aissue+is%3Aopen+label%3A%22good+first+issue%22). It is a good start!
[![crates.io demo gif](misc/crates-io-demo.gif)](https://crates.meilisearch.com)
@@ -37,7 +37,7 @@ We will be glad if you submit issues and pull requests. You can help to grow thi
## Quick Start
You can deploy your own instant, relevant and typo-tolerant MeiliDB search engine by yourself too.
You can deploy your own instant, relevant and typo-tolerant MeiliSearch search engine by yourself too.
Something similar to the demo above can be achieve by following these little three steps first.
You will need to create your own web front display to make it pretty though.
@@ -57,7 +57,7 @@ RUST_LOG=info cargo run --release
### Create an Index and Upload Some Documents
MeiliDB can serve multiple indexes, with different kinds of documents,
MeiliSearch can serve multiple indexes, with different kinds of documents,
therefore, it is required to create the index before sending documents to it.
```bash
@@ -137,10 +137,10 @@ We have seen much better performances when [using jemalloc as the global allocat
## Usage and Examples
MeiliDB also provides an example binary that is mostly used for features testing.
MeiliSearch also provides an example binary that is mostly used for features testing.
Notice that the example binary is faster to index data as it does read direct CSV files and not JSON HTTP payloads.
The _index_ subcommand has been made to create an index and inject documents into it. Using the command line below, the index will be named _movies_ and the _19 700_ movies of the `datasets/` will be injected in MeiliDB.
The _index_ subcommand has been made to create an index and inject documents into it. Using the command line below, the index will be named _movies_ and the _19 700_ movies of the `datasets/` will be injected in MeiliSearch.
```bash
cargo run --release --example from_file -- \

View File

@@ -1 +1 @@
_datas in movies.csv are from https://www.themoviedb.org/_
_datas in movies.csv are from https://www.themoviedb.org/_

View File

@@ -1,8 +1,8 @@
# A deep dive in MeiliDB
# A deep dive in MeiliSearch
On the 15 of May 2019.
MeiliDB is a full text search engine based on a final state transducer named [fst](https://github.com/BurntSushi/fst) and a key-value store named [sled](https://github.com/spacejam/sled). The goal of a search engine is to store data and to respond to queries as accurate and fast as possible. To achieve this it must save the matching words in an [inverted index](https://en.wikipedia.org/wiki/Inverted_index).
MeiliSearch is a full text search engine based on a final state transducer named [fst](https://github.com/BurntSushi/fst) and a key-value store named [sled](https://github.com/spacejam/sled). The goal of a search engine is to store data and to respond to queries as accurate and fast as possible. To achieve this it must save the matching words in an [inverted index](https://en.wikipedia.org/wiki/Inverted_index).
<!-- MarkdownTOC autolink="true" -->
@@ -22,7 +22,7 @@ MeiliDB is a full text search engine based on a final state transducer named [fs
## Where is the data stored?
MeiliDB is entirely backed by a key-value store like any good database (i.e. Postgres, MySQL). This brings a great flexibility in the way documents can be stored and updates handled along time.
MeiliSearch is entirely backed by a key-value store like any good database (i.e. Postgres, MySQL). This brings a great flexibility in the way documents can be stored and updates handled along time.
[sled will brings some](https://github.com/spacejam/sled/tree/434533332a3f485e6d2e467023be0a0b55d3a1af#plans) of the [A.C.I.D. properties](https://en.wikipedia.org/wiki/ACID_(computer_science)) to help us be sure the saved data is consistent.
@@ -34,7 +34,7 @@ It contain the inverted word index, the schema and the documents fields.
### The inverted word index
[The inverted word index](https://github.com/meilisearch/MeiliDB/blob/3db823de002243004612e36a19b4578d800dab97/meilidb-data/src/database/words_index.rs) is a sled Tree dedicated to store and give access to all documents that contains a specific word. The information stored under the word is simply a big ordered array of where in the document the word has been found. In other word, a big list of [`DocIndex`](https://github.com/meilisearch/MeiliDB/blob/3db823de002243004612e36a19b4578d800dab97/meilidb-core/src/lib.rs#L35-L51).
[The inverted word index](https://github.com/meilisearch/MeiliSearch/blob/3db823de002243004612e36a19b4578d800dab97/meilisearch-data/src/database/words_index.rs) is a sled Tree dedicated to store and give access to all documents that contains a specific word. The information stored under the word is simply a big ordered array of where in the document the word has been found. In other word, a big list of [`DocIndex`](https://github.com/meilisearch/MeiliSearch/blob/3db823de002243004612e36a19b4578d800dab97/meilisearch-core/src/lib.rs#L35-L51).
#### A final state transducer
@@ -42,27 +42,27 @@ _...also abbreviated fst_
This is the first entry point of the engine, you can read more about how it work with the beautiful blog post of @BurntSushi, [Index 1,600,000,000 Keys with Automata and Rust](https://blog.burntsushi.net/transducers/).
To make it short it is a powerful way to store all the words that are present in the indexed documents. You construct it by giving it all the words you want to index. When you want to search in it you can provide any automaton you want, in MeiliDB [a custom levenshtein automaton](https://github.com/tantivy-search/levenshtein-automata/) is used.
To make it short it is a powerful way to store all the words that are present in the indexed documents. You construct it by giving it all the words you want to index. When you want to search in it you can provide any automaton you want, in MeiliSearch [a custom levenshtein automaton](https://github.com/tantivy-search/levenshtein-automata/) is used.
#### Document indexes
The `fst` will only return the words that match with the search automaton but the goal of the search engine is to retrieve all matches in all the documents when a query is made. You want it to return some sort of position in an attribute in a document, an information about where the given word matched.
To make it possible we retrieve all of the `DocIndex` corresponding to all the matching words in the fst, we use the [`WordsIndex`](https://github.com/meilisearch/MeiliDB/blob/3db823de002243004612e36a19b4578d800dab97/meilidb-data/src/database/words_index.rs#L11-L21) Tree to get the `DocIndexes` corresponding the words.
To make it possible we retrieve all of the `DocIndex` corresponding to all the matching words in the fst, we use the [`WordsIndex`](https://github.com/meilisearch/MeiliSearch/blob/3db823de002243004612e36a19b4578d800dab97/meilisearch-data/src/database/words_index.rs#L11-L21) Tree to get the `DocIndexes` corresponding the words.
### The schema
The schema is a data structure that represents which documents attributes should be stored and which should be indexed. It is stored under a the [`MainIndex`](https://github.com/meilisearch/MeiliDB/blob/3db823de002243004612e36a19b4578d800dab97/meilidb-data/src/database/main_index.rs#L12) Tree and given to MeiliDB only at the creation of an index.
The schema is a data structure that represents which documents attributes should be stored and which should be indexed. It is stored under a the [`MainIndex`](https://github.com/meilisearch/MeiliSearch/blob/3db823de002243004612e36a19b4578d800dab97/meilisearch-data/src/database/main_index.rs#L12) Tree and given to MeiliSearch only at the creation of an index.
Each document attribute is associated to a unique 16 bit number named [`SchemaAttr`](https://github.com/meilisearch/MeiliDB/blob/3db823de002243004612e36a19b4578d800dab97/meilidb-data/src/schema.rs#L186).
Each document attribute is associated to a unique 16 bit number named [`SchemaAttr`](https://github.com/meilisearch/MeiliSearch/blob/3db823de002243004612e36a19b4578d800dab97/meilisearch-data/src/schema.rs#L186).
In the future, this schema type could be given along with updates, the database could be able to handled a new schema and reindex the database according to the new one.
### Document attributes
When the engine handle a query the result that the requester want is a document, not only the [`Matches`](https://github.com/meilisearch/MeiliDB/blob/3db823de002243004612e36a19b4578d800dab97/meilidb-core/src/lib.rs#L62-L88) associated to it, fields of the original document must be returned too.
When the engine handle a query the result that the requester want is a document, not only the [`Matches`](https://github.com/meilisearch/MeiliSearch/blob/3db823de002243004612e36a19b4578d800dab97/meilisearch-core/src/lib.rs#L62-L88) associated to it, fields of the original document must be returned too.
So MeiliDB again uses the power of the underlying key-value store and save the documents attributes marked as _STORE_ in the schema. The dedicated Tree for this information is the [`DocumentsIndex`](https://github.com/meilisearch/MeiliDB/blob/3db823de002243004612e36a19b4578d800dab97/meilidb-data/src/database/documents_index.rs#L11).
So MeiliSearch again uses the power of the underlying key-value store and save the documents attributes marked as _STORE_ in the schema. The dedicated Tree for this information is the [`DocumentsIndex`](https://github.com/meilisearch/MeiliSearch/blob/3db823de002243004612e36a19b4578d800dab97/meilisearch-data/src/database/documents_index.rs#L11).
When a document field is saved in the key-value store its value is binary encoded using [message pack](https://github.com/3Hren/msgpack-rust), so a document must be serializable using serde.
@@ -70,26 +70,26 @@ When a document field is saved in the key-value store its value is binary encode
## How is a request processed?
Now that we have our inverted index we are able to return results based on a query. In the MeiliDB universe a query is a simple string containing words.
Now that we have our inverted index we are able to return results based on a query. In the MeiliSearch universe a query is a simple string containing words.
### Query lexemes
The first step to be able to call the underlying structures is to split the query in words, for that we use a [custom tokenizer](https://github.com/meilisearch/MeiliDB/blob/3db823de002243004612e36a19b4578d800dab97/meilidb-tokenizer/src/lib.rs#L82-L84). Note that a tokenizer is specialized for a human language, this is the hard part.
The first step to be able to call the underlying structures is to split the query in words, for that we use a [custom tokenizer](https://github.com/meilisearch/MeiliSearch/blob/3db823de002243004612e36a19b4578d800dab97/meilisearch-tokenizer/src/lib.rs#L82-L84). Note that a tokenizer is specialized for a human language, this is the hard part.
### Automatons and query index
So to query the fst we need an automaton, in MeiliDB we use a [levenshtein automaton](https://en.wikipedia.org/wiki/Levenshtein_automaton), this automaton is constructed using a string and a maximum distance. According to the [Algolia's blog post](https://blog.algolia.com/inside-the-algolia-engine-part-3-query-processing/#algolia%e2%80%99s-way-of-searching-for-alternatives) we [created the DFAs](https://github.com/meilisearch/MeiliDB/blob/3db823de002243004612e36a19b4578d800dab97/meilidb-core/src/automaton.rs#L59-L78) with different settings.
So to query the fst we need an automaton, in MeiliSearch we use a [levenshtein automaton](https://en.wikipedia.org/wiki/Levenshtein_automaton), this automaton is constructed using a string and a maximum distance. According to the [Algolia's blog post](https://blog.algolia.com/inside-the-algolia-engine-part-3-query-processing/#algolia%e2%80%99s-way-of-searching-for-alternatives) we [created the DFAs](https://github.com/meilisearch/MeiliSearch/blob/3db823de002243004612e36a19b4578d800dab97/meilisearch-core/src/automaton.rs#L59-L78) with different settings.
Thanks to the power of the fst library [it is possible to union multiple automatons](https://docs.rs/fst/0.3.2/fst/map/struct.OpBuilder.html#method.union) on the same fst set. The `Stream` is able to return all the matching words. We use these words to find the whole list of `DocIndexes` associated.
With all these informations it is possible [to reconstruct a list of all the `DocIndexes` associated](https://github.com/meilisearch/MeiliDB/blob/3db823de002243004612e36a19b4578d800dab97/meilidb-core/src/query_builder.rs#L103-L130) with the words queried.
With all these informations it is possible [to reconstruct a list of all the `DocIndexes` associated](https://github.com/meilisearch/MeiliSearch/blob/3db823de002243004612e36a19b4578d800dab97/meilisearch-core/src/query_builder.rs#L103-L130) with the words queried.
### Sort by criteria
Now that we are able to get a big list of [DocIndexes](https://github.com/Kerollmops/MeiliDB/blob/550dc1e99224e386516877450320f694947332d4/src/lib.rs#L21-L36) it is not enough to sort them by criteria, we need more informations like the levenshtein distance or the fact that a query word match exactly the word stored in the fst. So [we stuff it a little bit](https://github.com/Kerollmops/MeiliDB/blob/550dc1e99224e386516877450320f694947332d4/src/rank/query_builder.rs#L86-L93), and aggregate all these [Matches](https://github.com/Kerollmops/MeiliDB/blob/550dc1e99224e386516877450320f694947332d4/src/lib.rs#L47-L74) for each document. This way it will be easy to sort a simple vector of document using a bunch of functions.
Now that we are able to get a big list of [DocIndexes](https://github.com/Kerollmops/MeiliSearch/blob/550dc1e99224e386516877450320f694947332d4/src/lib.rs#L21-L36) it is not enough to sort them by criteria, we need more informations like the levenshtein distance or the fact that a query word match exactly the word stored in the fst. So [we stuff it a little bit](https://github.com/Kerollmops/MeiliSearch/blob/550dc1e99224e386516877450320f694947332d4/src/rank/query_builder.rs#L86-L93), and aggregate all these [Matches](https://github.com/Kerollmops/MeiliSearch/blob/550dc1e99224e386516877450320f694947332d4/src/lib.rs#L47-L74) for each document. This way it will be easy to sort a simple vector of document using a bunch of functions.
With this big list of documents and associated matches [we are able to sort only the part of the slice that we want](https://github.com/meilisearch/MeiliDB/blob/3db823de002243004612e36a19b4578d800dab97/meilidb-core/src/query_builder.rs#L160-L188) using bucket sorting. [Each criterion](https://github.com/meilisearch/MeiliDB/blob/3db823de002243004612e36a19b4578d800dab97/meilidb-core/src/criterion/mod.rs#L95-L101) is evaluated on each subslice without copy, thanks to [GroupByMut](https://docs.rs/slice-group-by/0.2.4/slice_group_by/) which, I hope [will soon be merged](https://github.com/rust-lang/rfcs/pull/2477).
With this big list of documents and associated matches [we are able to sort only the part of the slice that we want](https://github.com/meilisearch/MeiliSearch/blob/3db823de002243004612e36a19b4578d800dab97/meilisearch-core/src/query_builder.rs#L160-L188) using bucket sorting. [Each criterion](https://github.com/meilisearch/MeiliSearch/blob/3db823de002243004612e36a19b4578d800dab97/meilisearch-core/src/criterion/mod.rs#L95-L101) is evaluated on each subslice without copy, thanks to [GroupByMut](https://docs.rs/slice-group-by/0.2.4/slice_group_by/) which, I hope [will soon be merged](https://github.com/rust-lang/rfcs/pull/2477).
Note that it is possible to customize the criteria used by using the `QueryBuilder::with_criteria` constructor, this way you can implement some custom ranking based on the document attributes using the appropriate structure and the [`document` method](https://github.com/meilisearch/MeiliDB/blob/3db823de002243004612e36a19b4578d800dab97/meilidb-data/src/database/index.rs#L86).
Note that it is possible to customize the criteria used by using the `QueryBuilder::with_criteria` constructor, this way you can implement some custom ranking based on the document attributes using the appropriate structure and the [`document` method](https://github.com/meilisearch/MeiliSearch/blob/3db823de002243004612e36a19b4578d800dab97/meilisearch-data/src/database/index.rs#L86).
At this point, MeiliDB work is over 🎉
At this point, MeiliSearch work is over 🎉

View File

@@ -1,2 +0,0 @@
pub mod meilidb;
pub mod tide;

View File

@@ -1,5 +1,5 @@
[package]
name = "meilidb-core"
name = "meilisearch-core"
version = "0.8.0"
authors = ["Kerollmops <clement@meilisearch.com>"]
edition = "2018"
@@ -17,9 +17,9 @@ hashbrown = { version = "0.6.0", features = ["serde"] }
heed = "0.5.0"
levenshtein_automata = { version = "0.1.1", features = ["fst_automaton"] }
log = "0.4.8"
meilidb-schema = { path = "../meilidb-schema", version = "0.8.0" }
meilidb-tokenizer = { path = "../meilidb-tokenizer", version = "0.8.0" }
meilidb-types = { path = "../meilidb-types", version = "0.8.0" }
meilisearch-schema = { path = "../meilisearch-schema", version = "0.8.0" }
meilisearch-tokenizer = { path = "../meilisearch-tokenizer", version = "0.8.0" }
meilisearch-types = { path = "../meilisearch-types", version = "0.8.0" }
once_cell = "1.2.0"
ordered-float = { version = "1.0.2", features = ["serde"] }
sdset = "0.3.3"

View File

@@ -12,8 +12,8 @@ use serde::{Deserialize, Serialize};
use structopt::StructOpt;
use termcolor::{Color, ColorChoice, ColorSpec, StandardStream, WriteColor};
use meilidb_core::{Database, Highlight, ProcessedUpdateResult};
use meilidb_schema::SchemaAttr;
use meilisearch_core::{Database, Highlight, ProcessedUpdateResult};
use meilisearch_schema::SchemaAttr;
#[derive(Debug, StructOpt)]
struct IndexCommand {
@@ -124,7 +124,7 @@ fn index_command(command: IndexCommand, database: Database) -> Result<(), Box<dy
match index.main.schema(&writer)? {
Some(current_schema) => {
if current_schema != schema {
return Err(meilidb_core::Error::SchemaDiffer.into());
return Err(meilisearch_core::Error::SchemaDiffer.into());
}
writer.abort();
}
@@ -325,7 +325,7 @@ fn search_command(command: SearchCommand, database: Database) -> Result<(), Box<
let schema = index.main.schema(&reader)?;
reader.abort();
let schema = schema.ok_or(meilidb_core::Error::SchemaMissing)?;
let schema = schema.ok_or(meilisearch_core::Error::SchemaMissing)?;
let fields = command.displayed_fields.iter().map(String::as_str);
let fields = HashSet::from_iter(fields);

View File

@@ -6,7 +6,7 @@ use std::{cmp, vec};
use fst::{IntoStreamer, Streamer};
use levenshtein_automata::DFA;
use meilidb_tokenizer::{is_cjk, split_query_string};
use meilisearch_tokenizer::{is_cjk, split_query_string};
use crate::error::MResult;
use crate::store;

View File

@@ -1,6 +1,6 @@
use std::cmp::Ordering;
use meilidb_schema::SchemaAttr;
use meilisearch_schema::SchemaAttr;
use sdset::Set;
use slice_group_by::GroupBy;

View File

@@ -4,7 +4,7 @@ use std::fmt;
use crate::criterion::Criterion;
use crate::{RankedMap, RawDocument};
use meilidb_schema::{Schema, SchemaAttr};
use meilisearch_schema::{Schema, SchemaAttr};
/// An helper struct that permit to sort documents by
/// some of their stored attributes.
@@ -23,7 +23,7 @@ use meilidb_schema::{Schema, SchemaAttr};
///
/// ```ignore
/// use serde_derive::Deserialize;
/// use meilidb::rank::criterion::*;
/// use meilisearch::rank::criterion::*;
///
/// let custom_ranking = SortByAttr::lower_is_better(&ranked_map, &schema, "published_at")?;
///

View File

@@ -25,7 +25,7 @@ pub use self::ranked_map::RankedMap;
pub use self::raw_document::RawDocument;
pub use self::store::Index;
pub use self::update::{EnqueuedUpdateResult, ProcessedUpdateResult, UpdateStatus, UpdateType};
pub use meilidb_types::{DocIndex, DocumentId, Highlight};
pub use meilisearch_types::{DocIndex, DocumentId, Highlight};
#[doc(hidden)]
#[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]

View File

@@ -695,7 +695,7 @@ mod tests {
use std::iter::FromIterator;
use fst::{IntoStreamer, Set};
use meilidb_schema::SchemaAttr;
use meilisearch_schema::SchemaAttr;
use sdset::SetBuf;
use tempfile::TempDir;
@@ -1672,7 +1672,7 @@ mod tests {
#[test]
fn deunicoded_synonyms() {
let mut store = TempDatabase::from_iter(vec![
("telephone", &[doc_index(0, 0)][..]), // meilidb indexes the unidecoded
("telephone", &[doc_index(0, 0)][..]), // meilisearch indexes the unidecoded
("téléphone", &[doc_index(0, 0)][..]), // and the original words on the same DocIndex
("iphone", &[doc_index(1, 0)][..]),
]);

View File

@@ -1,7 +1,7 @@
use std::io::{Read, Write};
use hashbrown::HashMap;
use meilidb_schema::SchemaAttr;
use meilisearch_schema::SchemaAttr;
use serde::{Deserialize, Serialize};
use crate::{DocumentId, Number};

View File

@@ -1,7 +1,7 @@
use std::fmt;
use std::sync::Arc;
use meilidb_schema::SchemaAttr;
use meilisearch_schema::SchemaAttr;
use sdset::SetBuf;
use slice_group_by::GroupBy;

View File

@@ -3,8 +3,8 @@ use std::convert::TryFrom;
use crate::{DocIndex, DocumentId};
use deunicode::deunicode_with_tofu;
use meilidb_schema::SchemaAttr;
use meilidb_tokenizer::{is_cjk, SeqTokenizer, Token, Tokenizer};
use meilisearch_schema::SchemaAttr;
use meilisearch_tokenizer::{is_cjk, SeqTokenizer, Token, Tokenizer};
use sdset::SetBuf;
const WORD_LENGTH_LIMIT: usize = 80;

View File

@@ -2,7 +2,7 @@ use std::collections::HashSet;
use std::io::Cursor;
use std::{error::Error, fmt};
use meilidb_schema::{Schema, SchemaAttr};
use meilisearch_schema::{Schema, SchemaAttr};
use serde::{de, forward_to_deserialize_any};
use serde_json::de::IoRead as SerdeJsonIoRead;
use serde_json::Deserializer as SerdeJsonDeserializer;

View File

@@ -1,4 +1,4 @@
use meilidb_schema::SchemaAttr;
use meilisearch_schema::SchemaAttr;
use serde::ser;
use serde::Serialize;

View File

@@ -1,4 +1,4 @@
use meilidb_schema::{Schema, SchemaAttr, SchemaProps};
use meilisearch_schema::{Schema, SchemaAttr, SchemaProps};
use serde::ser;
use crate::raw_indexer::RawIndexer;

View File

@@ -1,6 +1,6 @@
use heed::types::{ByteSlice, OwnedType};
use heed::Result as ZResult;
use meilidb_schema::SchemaAttr;
use meilisearch_schema::SchemaAttr;
use super::DocumentAttrKey;
use crate::DocumentId;

View File

@@ -2,7 +2,7 @@ use super::DocumentAttrKey;
use crate::DocumentId;
use heed::types::OwnedType;
use heed::Result as ZResult;
use meilidb_schema::SchemaAttr;
use meilisearch_schema::SchemaAttr;
#[derive(Copy, Clone)]
pub struct DocumentsFieldsCounts {

View File

@@ -2,7 +2,7 @@ use crate::RankedMap;
use chrono::{DateTime, Utc};
use heed::types::{ByteSlice, OwnedType, SerdeBincode, Str};
use heed::Result as ZResult;
use meilidb_schema::Schema;
use meilisearch_schema::Schema;
use std::collections::HashMap;
use std::sync::Arc;

View File

@@ -21,7 +21,7 @@ pub use self::updates_results::UpdatesResults;
use std::collections::HashSet;
use heed::Result as ZResult;
use meilidb_schema::{Schema, SchemaAttr};
use meilisearch_schema::{Schema, SchemaAttr};
use serde::de::{self, Deserialize};
use zerocopy::{AsBytes, FromBytes};

View File

@@ -1,7 +1,7 @@
use std::collections::{BTreeSet, HashMap, HashSet};
use fst::{SetBuilder, Streamer};
use meilidb_schema::Schema;
use meilisearch_schema::Schema;
use sdset::{duo::DifferenceByKey, SetBuf, SetOperation};
use crate::database::{UpdateEvent, UpdateEventsEmitter};

View File

@@ -30,7 +30,7 @@ use log::debug;
use serde::{Deserialize, Serialize};
use crate::{store, DocumentId, MResult};
use meilidb_schema::Schema;
use meilisearch_schema::Schema;
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct Update {

View File

@@ -1,4 +1,4 @@
use meilidb_schema::{Diff, Schema};
use meilisearch_schema::{Diff, Schema};
use crate::update::documents_addition::reindex_all_documents;
use crate::update::{next_update_id, Update};
@@ -21,7 +21,7 @@ pub fn apply_schema_update(
let mut need_full_reindexing = false;
if let Some(old_schema) = main_store.schema(writer)? {
for diff in meilidb_schema::diff(&old_schema, new_schema) {
for diff in meilisearch_schema::diff(&old_schema, new_schema) {
match diff {
Diff::IdentChange { .. } => return Err(CannotUpdateSchemaIdentifier.into()),
Diff::AttrMove { .. } => return Err(CannotReorderSchemaAttribute.into()),

View File

@@ -1,5 +1,5 @@
[package]
name = "meilidb-http"
name = "meilisearch-http"
version = "0.8.0"
authors = [
"Quentin de Quelen <quentin@dequelen.me>",
@@ -18,8 +18,8 @@ indexmap = { version = "1.3.0", features = ["serde-1"] }
isahc = "0.7.6"
log = "0.4.8"
main_error = "0.1.0"
meilidb-core = { path = "../meilidb-core", version = "0.8.0" }
meilidb-schema = { path = "../meilidb-schema", version = "0.8.0" }
meilisearch-core = { path = "../meilisearch-core", version = "0.8.0" }
meilisearch-schema = { path = "../meilisearch-schema", version = "0.8.0" }
pretty-bytes = "0.2.2"
rand = "0.7.2"
rayon = "1.2.0"

View File

@@ -5,7 +5,7 @@ use std::sync::Arc;
use chrono::{DateTime, Utc};
use heed::types::{SerdeBincode, Str};
use log::error;
use meilidb_core::{Database, Error as MError, MResult};
use meilisearch_core::{Database, Error as MError, MResult};
use sysinfo::Pid;
use crate::option::Opt;

View File

@@ -1,10 +1,10 @@
use crate::routes::setting::{RankingOrdering, SettingBody};
use indexmap::IndexMap;
use log::error;
use meilidb_core::criterion::*;
use meilidb_core::Highlight;
use meilidb_core::{Index, RankedMap};
use meilidb_schema::{Schema, SchemaAttr};
use meilisearch_core::criterion::*;
use meilisearch_core::Highlight;
use meilisearch_core::{Index, RankedMap};
use meilisearch_schema::{Schema, SchemaAttr};
use serde::{Deserialize, Serialize};
use serde_json::Value;
use std::cmp::Ordering;
@@ -57,8 +57,8 @@ impl fmt::Display for Error {
}
}
impl From<meilidb_core::Error> for Error {
fn from(error: meilidb_core::Error) -> Self {
impl From<meilisearch_core::Error> for Error {
fn from(error: meilisearch_core::Error) -> Self {
Error::Internal(error.to_string())
}
}

View File

@@ -0,0 +1,2 @@
pub mod meilisearch;
pub mod tide;

View File

@@ -3,7 +3,7 @@ use crate::models::token::*;
use crate::Data;
use chrono::Utc;
use heed::types::{SerdeBincode, Str};
use meilidb_core::Index;
use meilisearch_core::Index;
use serde_json::Value;
use tide::Context;
@@ -106,7 +106,7 @@ impl ContextExt for Context<Data> {
let name = self
.param::<Value>("identifier")
.as_ref()
.map(meilidb_core::serde::value_to_string)
.map(meilisearch_core::serde::value_to_string)
.map_err(|e| ResponseError::bad_parameter("identifier", e))?
.ok_or(ResponseError::bad_parameter(
"identifier",

View File

@@ -8,10 +8,10 @@ use structopt::StructOpt;
use tide::middleware::{CorsMiddleware, CorsOrigin};
use tide_log::RequestLogger;
use meilidb_http::data::Data;
use meilidb_http::option::Opt;
use meilidb_http::routes;
use meilidb_http::routes::index::index_update_callback;
use meilisearch_http::data::Data;
use meilisearch_http::option::Opt;
use meilisearch_http::routes;
use meilisearch_http::routes::index::index_update_callback;
mod analytics;

View File

@@ -1,7 +1,7 @@
use std::collections::HashSet;
use indexmap::IndexMap;
use meilidb_schema::{Schema, SchemaBuilder, SchemaProps};
use meilisearch_schema::{Schema, SchemaBuilder, SchemaProps};
use serde::{Deserialize, Serialize};
#[derive(Debug, Clone, Eq, PartialEq, Hash, Serialize, Deserialize)]

View File

@@ -3,7 +3,7 @@ use structopt::StructOpt;
#[derive(Debug, Clone, StructOpt)]
pub struct Opt {
/// The destination where the database must be created.
#[structopt(long, env = "MEILI_DB_PATH", default_value = "/tmp/meilidb")]
#[structopt(long, env = "MEILI_DB_PATH", default_value = "/tmp/meilisearch")]
pub db_path: String,
/// The address on which the http server will listen.

View File

@@ -19,7 +19,7 @@ pub async fn get_document(ctx: Context<Data>) -> SResult<Response> {
let index = ctx.index()?;
let identifier = ctx.identifier()?;
let document_id = meilidb_core::serde::compute_document_id(identifier.clone());
let document_id = meilisearch_core::serde::compute_document_id(identifier.clone());
let env = &ctx.state().db.env;
let reader = env.read_txn().map_err(ResponseError::internal)?;
@@ -47,7 +47,7 @@ pub async fn delete_document(ctx: Context<Data>) -> SResult<Response> {
let index = ctx.index()?;
let identifier = ctx.identifier()?;
let document_id = meilidb_core::serde::compute_document_id(identifier.clone());
let document_id = meilisearch_core::serde::compute_document_id(identifier.clone());
let env = &ctx.state().db.env;
let mut writer = env.write_txn().map_err(ResponseError::internal)?;
@@ -117,8 +117,8 @@ pub async fn get_all_documents(ctx: Context<Data>) -> SResult<Response> {
Ok(tide::response::json(response_body))
}
fn infered_schema(document: &IndexMap<String, Value>) -> Option<meilidb_schema::Schema> {
use meilidb_schema::{SchemaBuilder, DISPLAYED, INDEXED};
fn infered_schema(document: &IndexMap<String, Value>) -> Option<meilisearch_schema::Schema> {
use meilisearch_schema::{SchemaBuilder, DISPLAYED, INDEXED};
let mut identifier = None;
for key in document.keys() {
@@ -206,9 +206,9 @@ pub async fn delete_multiple_documents(mut ctx: Context<Data>) -> SResult<Respon
let mut documents_deletion = index.documents_deletion();
for identifier in data {
if let Some(identifier) = meilidb_core::serde::value_to_string(&identifier) {
if let Some(identifier) = meilisearch_core::serde::value_to_string(&identifier) {
documents_deletion
.delete_document_by_id(meilidb_core::serde::compute_document_id(identifier));
.delete_document_by_id(meilisearch_core::serde::compute_document_id(identifier));
}
}

View File

@@ -1,8 +1,8 @@
use chrono::{DateTime, Utc};
use http::StatusCode;
use log::error;
use meilidb_core::ProcessedUpdateResult;
use meilidb_schema::{Schema, SchemaBuilder};
use meilisearch_core::ProcessedUpdateResult;
use meilisearch_schema::{Schema, SchemaBuilder};
use rand::seq::SliceRandom;
use serde::{Deserialize, Serialize};
use serde_json::json;

View File

@@ -2,14 +2,14 @@ use std::collections::HashMap;
use std::collections::HashSet;
use std::time::Duration;
use meilidb_core::Index;
use meilisearch_core::Index;
use rayon::iter::{IntoParallelIterator, ParallelIterator};
use serde::{Deserialize, Serialize};
use tide::querystring::ContextExt as QSContextExt;
use tide::{Context, Response};
use crate::error::{ResponseError, SResult};
use crate::helpers::meilidb::{Error, IndexSearchExt, SearchHit};
use crate::helpers::meilisearch::{Error, IndexSearchExt, SearchHit};
use crate::helpers::tide::ContextExt;
use crate::Data;

View File

@@ -1,5 +1,5 @@
[package]
name = "meilidb-schema"
name = "meilisearch-schema"
version = "0.8.0"
authors = ["Kerollmops <renault.cle@gmail.com>"]
edition = "2018"

View File

@@ -1,5 +1,5 @@
[package]
name = "meilidb-tokenizer"
name = "meilisearch-tokenizer"
version = "0.8.0"
authors = ["Kerollmops <renault.cle@gmail.com>"]
edition = "2018"

View File

@@ -1,5 +1,5 @@
[package]
name = "meilidb-types"
name = "meilisearch-types"
version = "0.8.0"
authors = ["Clément Renault <renault.cle@gmail.com>"]
edition = "2018"

View File

@@ -1,6 +1,6 @@
# Typo and Ranking rules
This is an explanation of the default rules used in MeiliDB.
This is an explanation of the default rules used in MeiliSearch.
First we have to explain some terms that are used in this reading.
@@ -44,7 +44,7 @@ This means that "satuday", which is 7 characters long, use the second rule and e
## Ranking rules
All documents that have been aggregated using the typo rules above can now be sorted. MeiliDB uses a bucket sort.
All documents that have been aggregated using the typo rules above can now be sorted. MeiliSearch uses a bucket sort.
What is a bucket sort? We sort all the documents with the first rule, for all documents that can't be separated we create a group and sort it using the second rule, and so on.