Merge pull request #194 from meilisearch/set-code-public

Set code public
chore: reformat tests
2025-07-18 20:30:47 +00:00 · 2019-09-19 18:25:13 +02:00 · 2019-09-19 18:08:25 +02:00 · 2019-09-19 18:08:14 +02:00 · 2019-09-19 12:43:36 +02:00 · 2019-09-19 12:43:36 +02:00
114 changed files with 28425 additions and 4315 deletions
--- a/.gitignore
+++ b/.gitignore
@ -1,6 +1,7 @@
-/rocksdb
 /target
 /Cargo.lock
+meilidb/Cargo.lock
+meilidb-core/Cargo.lock
 **/*.rs.bk
 **/*.csv
 **/*.json_lines
--- a/.travis.yml
+++ b/.travis.yml
@ -1,22 +0,0 @@
-language: rust
-
-cache: cargo
-
-branches:
-  only:
-    - master
-
-matrix:
-  fast_finish: true
-  include:
-
-  # Test crates on their minimum Rust versions.
-  - rust: 1.31.0
-    name: "meilidb on 1.31.0"
-    script: ./ci/meilidb.sh
-
-  # Test crates on nightly Rust.
-  - rust: nightly
-    name: "meilidb on nightly"
-    script: ./ci/meilidb.sh
-
--- a/Cargo.toml
+++ b/Cargo.toml
@ -1,39 +1,11 @@
-[package]
-edition = "2018"
-name = "meilidb"
-version = "0.1.0"
-authors = ["Kerollmops <renault.cle@gmail.com>"]
+[workspace]
+members = [
+    "meilidb",
+    "meilidb-core",
+    "meilidb-data",
+    "meilidb-schema",
+    "meilidb-tokenizer",
+]

-[dependencies]
-bincode = "1.0"
-byteorder = "1.2"
-fst = "0.3"
-hashbrown = "0.1"
-lazy_static = "1.1"
-levenshtein_automata = { version = "0.1", features = ["fst_automaton"] }
-linked-hash-map = { version = "0.5", features = ["serde_impl"] }
-sdset = "0.3"
-serde = "1.0"
-serde_derive = "1.0"
-unidecode = "0.3"
-
-[dependencies.rocksdb]
-git = "https://github.com/pingcap/rust-rocksdb.git"
-rev = "c2eb140"
-
-[dependencies.group-by]
-git = "https://github.com/Kerollmops/group-by.git"
-rev = "cab857b"
-
-[features]
-default = ["simd"]
-i128 = ["bincode/i128", "byteorder/i128"]
-simd = ["rocksdb/sse"]
-portable = ["rocksdb/portable"]
-nightly = []
-
-[dev-dependencies]
-csv = "1.0"
-elapsed = "0.1"
-structopt = "0.2"
-tempfile = "3.0"
+[profile.release]
+debug = true
--- a/README.md
+++ b/README.md
@ -1,47 +1,82 @@
 # MeiliDB

+[![Build Status](https://dev.azure.com/thomas0884/thomas/_apis/build/status/meilisearch.MeiliDB?branchName=master)](https://dev.azure.com/thomas0884/thomas/_build/latest?definitionId=1&branchName=master)
+[![dependency status](https://deps.rs/repo/github/Kerollmops/MeiliDB/status.svg)](https://deps.rs/repo/github/Kerollmops/MeiliDB)
+[![License](https://img.shields.io/github/license/Kerollmops/MeiliDB.svg)](https://github.com/Kerollmops/MeiliDB)
+[![Rust 1.31+](https://img.shields.io/badge/rust-1.31+-lightgray.svg)](
+https://www.rust-lang.org)
+
 A _full-text search database_ using a key-value store internally.

-It uses [RocksDB](https://github.com/facebook/rocksdb) like a classic database, to store documents and internal data. The key-value store power allow us to handle updates and queries with small memory and CPU overheads.
+## Features

-You can [read the deep dive](deep-dive.md) if you want more informations on the engine, it describes the whole process of generating updates and handling queries.
+- Provides [6 default ranking criteria](https://github.com/meilisearch/MeiliDB/blob/3d85cbf0cfa3a3103cf1e151a75a443719cdd5d7/meilidb-core/src/criterion/mod.rs#L95-L101) used to [bucket sort](https://en.wikipedia.org/wiki/Bucket_sort) documents
+- Accepts [custom criteria](https://github.com/meilisearch/MeiliDB/blob/3d85cbf0cfa3a3103cf1e151a75a443719cdd5d7/meilidb-core/src/criterion/mod.rs#L22-L29) and can apply them in any custom order
+- Support [ranged queries](https://github.com/meilisearch/MeiliDB/blob/3d85cbf0cfa3a3103cf1e151a75a443719cdd5d7/meilidb-core/src/query_builder.rs#L146), useful for paginating results
+- Can [distinct](https://github.com/meilisearch/MeiliDB/blob/3d85cbf0cfa3a3103cf1e151a75a443719cdd5d7/meilidb-core/src/query_builder.rs#L68) and [filter](https://github.com/meilisearch/MeiliDB/blob/3d85cbf0cfa3a3103cf1e151a75a443719cdd5d7/meilidb-core/src/query_builder.rs#L57) returned documents based on context defined rules
+- Can store complete documents or only [user schema specified fields](https://github.com/meilisearch/MeiliDB/blob/3d85cbf0cfa3a3103cf1e151a75a443719cdd5d7/examples/movies/schema-movies.toml)
+- The [default tokenizer](https://github.com/meilisearch/MeiliDB/blob/3d85cbf0cfa3a3103cf1e151a75a443719cdd5d7/meilidb-tokenizer/src/lib.rs#L99) can index latin and kanji based languages
+- Returns [the matching text areas](https://github.com/meilisearch/MeiliDB/blob/3d85cbf0cfa3a3103cf1e151a75a443719cdd5d7/meilidb-core/src/lib.rs#L117-L120), useful to highlight matched words in results
+- Accepts query time search config like the [searchable fields](https://github.com/meilisearch/MeiliDB/blob/3d85cbf0cfa3a3103cf1e151a75a443719cdd5d7/meilidb-core/src/query_builder.rs#L79)
+- Supports run time indexing  (incremental indexing)

-We will be proud if you send pull requests to help us grow this project, you can start with [issues tagged "good-first-issue"](https://github.com/Kerollmops/MeiliDB/issues?q=is%3Aissue+is%3Aopen+label%3A%22good+first+issue%22) to start !

-At the moment this is a library only, this means that binaries are not part of this repository but since I'm still nice I have made some examples for you in the `examples/` folder that works with the data located in the `misc/` folder.

-In a near future MeiliDB we be a binary like any database: updated and queried using some kind of protocol. It is the final goal, [see the milestones](https://github.com/Kerollmops/MeiliDB/milestones). MeiliDB will just be a bunch of network and protocols functions wrapping the library which itself will be published to https://crates.io, following the same update cycle.
+It uses [RocksDB](https://github.com/facebook/rocksdb) as the internal key-value store. The key-value store allows us to handle updates and queries with small memory and CPU overheads. The whole ranking system is [data oriented](https://github.com/meilisearch/MeiliDB/issues/82) and provides great performances.
+
+You can [read the deep dive](deep-dive.md) if you want more information on the engine, it describes the whole process of generating updates and handling queries or you can take a look at the [typos and ranking rules](typos-ranking-rules.md) if you want to know the default rules used to sort the documents.
+
+We will be proud if you submit issues and pull requests. You can help to grow this project and start contributing by checking [issues tagged "good-first-issue"](https://github.com/meilisearch/MeiliDB/issues?q=is%3Aissue+is%3Aopen+label%3A%22good+first+issue%22). It is a good start!
+
+The project is only a library yet. It means that there is no binary provided yet. To get started, you can check the examples wich are made to work with the data located in the `misc/` folder.
+
+MeiliDB will be a binary in a near future so you will be able to use it as a database out-of-the-box. We should be able to query it using a [to-be-defined](https://github.com/meilisearch/MeiliDB/issues/38) protocol. This is our current goal, [see the milestones](https://github.com/meilisearch/MeiliDB/milestones). In the end, the binary will be a bunch of network protocols and wrappers around the library - which will also be published on [crates.io](https://crates.io). Both the binary and the library will follow the same update cycle.



 ## Performances

-_these informations have been made with a version dated of october 2018, we must update them_
+With a database composed of _100 353_ documents with _352_ attributes each and _3_ of them indexed.
+So more than _300 000_ fields indexed for _35 million_ stored we can handle more than _2.8k req/sec_ with an average response time of _9 ms_ on an Intel i7-7700 (8) @ 4.2GHz.

-We made some tests on remote machines and found that we can handle with a dataset of near 280k products, on a server that cost 5$/month with 1vCPU and 1GB of ram and on the same index and with a simple query:
+Requests are made using [wrk](https://github.com/wg/wrk) and scripted to simulate real users queries.

- near 190 users with an average response time of 90ms
- 150 users with an average response time of 70ms
- 100 users with an average response time of 45ms
-
-Network is mesured, servers are located in amsterdam and tests are made between two different datacenters.
+```
+Running 10s test @ http://localhost:2230
+  2 threads and 25 connections
+  Thread Stats   Avg      Stdev     Max   +/- Stdev
+    Latency     9.52ms    7.61ms  99.25ms   84.58%
+    Req/Sec     1.41k   119.11     1.78k    64.50%
+  28080 requests in 10.01s, 7.42MB read
+Requests/sec:   2806.46
+Transfer/sec:    759.17KB
+```

+### Notes

+The default Rust allocator has recently been [changed to use the system allocator](https://github.com/rust-lang/rust/pull/51241/).
+We have seen much better performances when [using jemalloc as the global allocator](https://github.com/alexcrichton/jemallocator#documentation).

 ## Usage and examples

-MeiliDB work with an index like most of the search engines.
-So to test the library you can create one by indexing a simple csv file.
+Currently MeiliDB do not provide an http server but you can run these two examples to try it out.
+
+It creates an index named _movies_ and insert _19 700_ (in batches of _1000_) movies into it.

 ```bash
-cargo run --release --example create-database -- test.mdb misc/kaggle.csv
+cargo run --release --example create-database -- \
+    --schema examples/movies/schema-movies.toml \
+    --update-group-size 1000 \
+    movies.mdb \
+    examples/movies/movies.csv
 ```

-Once the command finished indexing the database should have been saved under the `test.mdb` folder.
-
-Now you can easily run the `query-database` example to check what is stored in it.
+Once this is done, you can query this database using the second binary example.

 ```bash
-cargo run --release --example query-database -- test.mdb
+cargo run --release --example query-database -- \
+    movies.mdb \
+    --fetch-timeout-ms 50 \
+    -n 4 \
+    id title overview release_date poster
 ```
-
--- a/azure-pipelines.yml
+++ b/azure-pipelines.yml
@ -0,0 +1,47 @@
+---
+trigger:
+  branches:
+    include: [ master ]
+
+pr: [ master ]
+
+jobs:
+  - job: test
+    pool:
+      vmImage: 'Ubuntu 16.04'
+    container: tpayet/chiquitita:latest
+    steps:
+      - script: |
+          curl https://sh.rustup.rs -sSf | sh -s -- -y --default-toolchain nightly
+        displayName: 'Install rustc'
+      - script: |
+          $HOME/.cargo/bin/cargo check
+        displayName: 'Check MeiliDB'
+      - script: |
+          $HOME/.cargo/bin/cargo test
+        displayName: 'Test MeiliDB'
+
+  - job: build
+    dependsOn:
+      - test
+    condition: succeeded()
+    pool:
+      vmImage: 'Ubuntu 16.04'
+    container: tpayet/chiquitita:latest
+    steps:
+      - script: |
+          curl https://sh.rustup.rs -sSf | sh -s -- -y --default-toolchain nightly
+        displayName: 'Install rustc'
+      - script: |
+          $HOME/.cargo/bin/cargo build --release
+        displayName: 'Build MeiliDB'
+      - task: CopyFiles@2
+        inputs:
+          contents: '$(System.DefaultWorkingDirectory)/target/release/libmeilidb.rlib'
+          targetFolder: $(Build.ArtifactStagingDirectory)
+        displayName: 'Copy build'
+      - task: PublishBuildArtifacts@1
+        inputs:
+          artifactName: libmeilidb.rlib
+        displayName: 'Upload artifacts'
+
--- a/deep-dive.md
+++ b/deep-dive.md
@ -1,28 +1,22 @@
 # A deep dive in MeiliDB

-On the 9 of december 2018.
-
-MeiliDB is a full text search engine based on a final state transducer named [fst](https://github.com/BurntSushi/fst) and a key-value store named [RocksDB](https://github.com/facebook/rocksdb). The goal of a search engine is to store data and to respond to queries as accurate and fast as possible. To achieve this it must save the data as an [inverted index](https://en.wikipedia.org/wiki/Inverted_index).
-
+On the 15 of May 2019.

+MeiliDB is a full text search engine based on a final state transducer named [fst](https://github.com/BurntSushi/fst) and a key-value store named [sled](https://github.com/spacejam/sled). The goal of a search engine is to store data and to respond to queries as accurate and fast as possible. To achieve this it must save the matching words in an [inverted index](https://en.wikipedia.org/wiki/Inverted_index).

 <!-- MarkdownTOC autolink="true" -->

 - [Where is the data stored?](#where-is-the-data-stored)
 - [What does the key-value store contains?](#what-does-the-key-value-store-contains)
-    - [The blob type](#the-blob-type)
+    - [The inverted word index](#the-inverted-word-index)
        - [A final state transducer](#a-final-state-transducer)
        - [Document indexes](#document-indexes)
-        - [Document ids](#document-ids)
    - [The schema](#the-schema)
    - [Document attributes](#document-attributes)
- [How is an update handled?](#how-is-an-update-handled)
-    - [The merge operation is CPU consuming](#the-merge-operation-is-cpu-consuming)
 - [How is a request processed?](#how-is-a-request-processed)
    - [Query lexemes](#query-lexemes)
    - [Automatons and query index](#automatons-and-query-index)
    - [Sort by criteria](#sort-by-criteria)
-    - [Retrieve original documents](#retrieve-original-documents)

 <!-- /MarkdownTOC -->

@ -30,21 +24,17 @@ MeiliDB is a full text search engine based on a final state transducer named [fs

 MeiliDB is entirely backed by a key-value store like any good database (i.e. Postgres, MySQL). This brings a great flexibility in the way documents can be stored and updates handled along time.

-[RocksDB brings some](https://rocksdb.org/blog/2015/02/27/write-batch-with-index.html) of the [A.C.I.D. properties](https://en.wikipedia.org/wiki/ACID_(computer_science)) to help us be sure the saved data is consistent, for example we use SST files and the key-value store ability to load them in one time to manage updates.
-
-Note that the SST file have the same restriction as the fst, it needs its keys to be added in order at creation.
+[sled will brings some](https://github.com/spacejam/sled/tree/434533332a3f485e6d2e467023be0a0b55d3a1af#plans) of the [A.C.I.D. properties](https://en.wikipedia.org/wiki/ACID_(computer_science)) to help us be sure the saved data is consistent.



 ## What does the key-value store contains?

-It contain the blob, the schema and the documents stored attributes.
+It contain the inverted word index, the schema and the documents fields.

-### The blob type
+### The inverted word index

-[The Blob type](https://github.com/Kerollmops/MeiliDB/blob/550dc1e99224e386516877450320f694947332d4/src/database/blob/mod.rs#L16-L19) is a data structure that indicate if an update is a positive or a negative one. In the case where the update is considered positive, the blob will contain [an fst map and the document indexes](https://github.com/Kerollmops/MeiliDB/blob/550dc1e99224e386516877450320f694947332d4/src/database/blob/positive/blob.rs#L15-L18) associated. In the other case it will only contain [all the document ids](https://github.com/Kerollmops/MeiliDB/blob/550dc1e99224e386516877450320f694947332d4/src/database/blob/negative/blob.rs#L12-L14) that must be considered removed.
-
-The Blob type [is stored under the "*data-index*" entry](https://github.com/Kerollmops/MeiliDB/blob/550dc1e99224e386516877450320f694947332d4/src/database/update/positive/update.rs#L497-L499) and marked as [a merge operation](https://github.com/facebook/rocksdb/wiki/Merge-Operator-Implementation) in the key-value store.
+[The inverted word index](https://github.com/meilisearch/MeiliDB/blob/3db823de002243004612e36a19b4578d800dab97/meilidb-data/src/database/words_index.rs) is a sled Tree dedicated to store and give access to all documents that contains a specific word. The information stored under the word is simply a big ordered array of where in the document the word has been found. In other word, a big list of [`DocIndex`](https://github.com/meilisearch/MeiliDB/blob/3db823de002243004612e36a19b4578d800dab97/meilidb-core/src/lib.rs#L35-L51).

 #### A final state transducer

@ -52,89 +42,54 @@ _...also abbreviated fst_

 This is the first entry point of the engine, you can read more about how it work with the beautiful blog post of @BurntSushi, [Index 1,600,000,000 Keys with Automata and Rust](https://blog.burntsushi.net/transducers/).

-To make it short it is a powerful way to store all the words that are present in the indexed documents. You construct it by giving it all the words you want to index associated with a value that, for the moment, can only be an `u64`. When you want to search in it you can provide any automaton you want, in MeiliDB [a custom levenshtein automaton](https://github.com/tantivy-search/levenshtein-automata/) is used.
-
-Note that the number under each word is auto-incremental, each new word have a new number that is greater than the prevous one.
-
-Another powerful feature of `fst` is that it can nearly avoid using RAM and be streamed to disk for example, the problem is that the keys must be always added in lexicographic order, so you must sort them before, for the moment MeiliDB uses a [BTreeMap](https://github.com/Kerollmops/raptor-rs/blob/8abdb0a228e2808fe1814a6a0641a4b72d158579/src/metadata/doc_indexes.rs#L107-L112).
+To make it short it is a powerful way to store all the words that are present in the indexed documents. You construct it by giving it all the words you want to index. When you want to search in it you can provide any automaton you want, in MeiliDB [a custom levenshtein automaton](https://github.com/tantivy-search/levenshtein-automata/) is used.

 #### Document indexes

-As it has been specified, the `fst` can only store a number corresponding to a word, an `u64`, but the goal of the search engine is to retrieve a match in a document when a query is made. You want it to return some sort of position in an attribute in a document, an information about where the given word match.
+The `fst` will only return the words that match with the search automaton but the goal of the search engine is to retrieve all matches in all the documents when a query is made. You want it to return some sort of position in an attribute in a document, an information about where the given word matched.

-To make it possible, a custom data structure has been developed, the document indexes is composed of two arrays, the ranges array and all the docindexes corresponding to a given range, each range identify the word number. The [DocIndexes](https://github.com/Kerollmops/MeiliDB/blob/550dc1e99224e386516877450320f694947332d4/src/data/doc_indexes.rs#L23) type is designed to be streamed when constructed, consumming a minimum amount of ram like the fst. Another advantage is that the slices are accessible in `O(1)` when you know the word associated number.
-
-#### Document ids
-
-This is a simple ordered list of all documents ids which must be considered deleted. It is used with [the sdset library](https://docs.rs/sdset/0.3.0/sdset/duo/struct.DifferenceByKey.html), the docindexes and the `DifferenceByKey` operation builder when merging blobs.
-
-When a blob represent a negative update it only contains this simple slice of deleted documents ids.
+To make it possible we retrieve all of the `DocIndex` corresponding to all the matching words in the fst, we use the [`WordsIndex`](https://github.com/meilisearch/MeiliDB/blob/3db823de002243004612e36a19b4578d800dab97/meilidb-data/src/database/words_index.rs#L11-L21) Tree to get the `DocIndexes` corresponding the words.

 ### The schema

-The schema is a data struture that represents which documents attributes should be stored and which should be indexed. It is stored under the "_data-schema_" entry and given to MeiliDB only at the creation.
+The schema is a data structure that represents which documents attributes should be stored and which should be indexed. It is stored under a the [`MainIndex`](https://github.com/meilisearch/MeiliDB/blob/3db823de002243004612e36a19b4578d800dab97/meilidb-data/src/database/main_index.rs#L12) Tree and given to MeiliDB only at the creation of an index.

-Each document attribute is associated to a unique 32 bit number named `SchemaAttr`.
+Each document attribute is associated to a unique 16 bit number named [`SchemaAttr`](https://github.com/meilisearch/MeiliDB/blob/3db823de002243004612e36a19b4578d800dab97/meilidb-data/src/schema.rs#L186).

-In the future this schema type could be given along with updates and probably be different from the original, the database could be able to handled this document structure and reindex it.
+In the future, this schema type could be given along with updates, the database could be able to handled a new schema and reindex the database according to the new one.

 ### Document attributes

-When the engine handle a query the result that the requester want is a document, not only the [match](https://github.com/Kerollmops/MeiliDB/blob/fc2cdf92596fc002ce278e3aa8718640ac44724d/src/lib.rs#L51-L79) associated to it, fields of the original document must be returned too.
+When the engine handle a query the result that the requester want is a document, not only the [`Matches`](https://github.com/meilisearch/MeiliDB/blob/3db823de002243004612e36a19b4578d800dab97/meilidb-core/src/lib.rs#L62-L88) associated to it, fields of the original document must be returned too.

-So MeiliDB again uses the power of the underlying key-value store and save the documents attributes marked as _STORE_. The key is prefixed by "_doc_" followed by the 64 bit document id in bytes and the schema attribute number in bytes corresponding to the document attribute stored.
+So MeiliDB again uses the power of the underlying key-value store and save the documents attributes marked as _STORE_ in the schema. The dedicated Tree for this information is the [`DocumentsIndex`](https://github.com/meilisearch/MeiliDB/blob/3db823de002243004612e36a19b4578d800dab97/meilidb-data/src/database/documents_index.rs#L11).

-When a document field is saved in the key-value store its value is binary encoded using the [bincode](https://docs.rs/bincode/) library, so a document must be serializable using serde.
-
-
-
-## How is an update handled?
-
-First of all an update in MeiliDB is nothing more than [a RocksDB SST file](https://github.com/facebook/rocksdb/wiki/Creating-and-Ingesting-SST-files). It contains the blob and all the documents attributes binary encoded like described above. Note that the blob is stored under the "_data-index_" key marked as [a merge operation](https://github.com/facebook/rocksdb/wiki/Merge-Operator-Implementation).
-
-### The merge operation is CPU consuming
-
-When [the database ingest an update](https://github.com/Kerollmops/MeiliDB/blob/550dc1e99224e386516877450320f694947332d4/src/database/mod.rs#L108-L145) it gives the SST file to the underlying RocksDB, once it has ingested it there is a "_data-index_" entry available, we can request it but the key-value store will call a function before, a merge operation is performed.
-
-This merge operation is done on multiple blobs as you have understood and will compute a [PositiveBlob](https://github.com/Kerollmops/MeiliDB/blob/550dc1e99224e386516877450320f694947332d4/src/database/blob/positive/blob.rs#L15), this type contains the fst and document indexes structures allowing us to search for documents. This two data structures can be considered as the inverted index.
-
-The computation time of this merge is important, RocksDB doesn't keep the previous merged result, it will call our merge operation each time until it decided to do a compaction. So [we must force this compaction earlier](https://github.com/Kerollmops/MeiliDB/blob/550dc1e99224e386516877450320f694947332d4/src/database/mod.rs#L129-L131) when we receive an update to reduce this cost.
-
-This way when we request the "_data-index_" value it will gives us the previously merged positive blob without any other merge overhead.
+When a document field is saved in the key-value store its value is binary encoded using [message pack](https://github.com/3Hren/msgpack-rust), so a document must be serializable using serde.



 ## How is a request processed?

-Now that we have our "_data-index_" we are able to return results based on a query. In the MeiliDB universe a query is a string.
+Now that we have our inverted index we are able to return results based on a query. In the MeiliDB universe a query is a simple string containing words.

 ### Query lexemes

-The first step to be able to call the underlying structures is to split the query in words, for that we use a [custom tokenizer](https://github.com/Kerollmops/MeiliDB/blob/fc2cdf92596fc002ce278e3aa8718640ac44724d/src/tokenizer/mod.rs) that is not finished for the moment, [there is an open issue](https://github.com/Kerollmops/MeiliDB/issues/3). Note that a tokenizer is specialized for a human language, this is the hard part.
+The first step to be able to call the underlying structures is to split the query in words, for that we use a [custom tokenizer](https://github.com/meilisearch/MeiliDB/blob/3db823de002243004612e36a19b4578d800dab97/meilidb-tokenizer/src/lib.rs#L82-L84). Note that a tokenizer is specialized for a human language, this is the hard part.

 ### Automatons and query index

-So to query the fst we need an automaton, in MeiliDB we use a [levenshtein automaton](https://en.wikipedia.org/wiki/Levenshtein_automaton), this automaton is constructed using a string and a maximum distance. According to the [Algolia's blog post](https://blog.algolia.com/inside-the-algolia-engine-part-3-query-processing/#algolia%e2%80%99s-way-of-searching-for-alternatives) we [created the DFAs](https://github.com/Kerollmops/MeiliDB/blob/fc2cdf92596fc002ce278e3aa8718640ac44724d/src/automaton.rs#L62-L75) with different settings.
+So to query the fst we need an automaton, in MeiliDB we use a [levenshtein automaton](https://en.wikipedia.org/wiki/Levenshtein_automaton), this automaton is constructed using a string and a maximum distance. According to the [Algolia's blog post](https://blog.algolia.com/inside-the-algolia-engine-part-3-query-processing/#algolia%e2%80%99s-way-of-searching-for-alternatives) we [created the DFAs](https://github.com/meilisearch/MeiliDB/blob/3db823de002243004612e36a19b4578d800dab97/meilidb-core/src/automaton.rs#L59-L78) with different settings.

-Thanks to the power of the fst library [it is possible to union multiple automatons](https://docs.rs/fst/0.3.2/fst/map/struct.OpBuilder.html#method.union) on the same fst map, it will allow us to know which [automaton returns a word according to its index](https://github.com/Kerollmops/MeiliDB/blob/fc2cdf92596fc002ce278e3aa8718640ac44724d/src/metadata/ops.rs#L111). The `Stream` is able to return all the numbers associated to the words. We use these numbers to find the whole list of `DocIndexes` associated and do the union set operation.
+Thanks to the power of the fst library [it is possible to union multiple automatons](https://docs.rs/fst/0.3.2/fst/map/struct.OpBuilder.html#method.union) on the same fst set. The `Stream` is able to return all the matching words. We use these words to find the whole list of `DocIndexes` associated.

-With all these informations it is possible [to reconstruct a list of all the DocIndexes associated](https://github.com/Kerollmops/MeiliDB/blob/550dc1e99224e386516877450320f694947332d4/src/rank/query_builder.rs#L62-L99) with the words queried.
+With all these informations it is possible [to reconstruct a list of all the `DocIndexes` associated](https://github.com/meilisearch/MeiliDB/blob/3db823de002243004612e36a19b4578d800dab97/meilidb-core/src/query_builder.rs#L103-L130) with the words queried.

 ### Sort by criteria

 Now that we are able to get a big list of [DocIndexes](https://github.com/Kerollmops/MeiliDB/blob/550dc1e99224e386516877450320f694947332d4/src/lib.rs#L21-L36) it is not enough to sort them by criteria, we need more informations like the levenshtein distance or the fact that a query word match exactly the word stored in the fst. So [we stuff it a little bit](https://github.com/Kerollmops/MeiliDB/blob/550dc1e99224e386516877450320f694947332d4/src/rank/query_builder.rs#L86-L93), and aggregate all these [Matches](https://github.com/Kerollmops/MeiliDB/blob/550dc1e99224e386516877450320f694947332d4/src/lib.rs#L47-L74) for each document. This way it will be easy to sort a simple vector of document using a bunch of functions.

-With this big list of documents and associated matches [we are able to sort only the part of the slice that we want](https://github.com/Kerollmops/MeiliDB/blob/550dc1e99224e386516877450320f694947332d4/src/rank/query_builder.rs#L108-L119) using bucket sorting. [Each criterion](https://github.com/Kerollmops/MeiliDB/blob/550dc1e99224e386516877450320f694947332d4/src/rank/criterion/mod.rs#L75-L87) is evaluated on each subslice without copy, thanks to [GroupByMut](https://github.com/Kerollmops/group-by/blob/cab857bae01463dbd0edb99b0e0d7f3624e6c6f5/src/lib.rs#L180-L185) which, I hope [will soon be merged](https://github.com/rust-lang/rfcs/pull/2477).
-
-Note that it is possible to customize the criteria used by using the `QueryBuilder::with_criteria` constructor, this way you can implement some custom ranking based on the document attributes using the appropriate structure and the `retrieve_document` method.
-
-### Retrieve original documents
-
-The [DatabaseView](https://github.com/Kerollmops/MeiliDB/blob/550dc1e99224e386516877450320f694947332d4/src/database/database_view.rs#L18-L24) structure that you must have created to be able to query the database have [two functions](https://github.com/Kerollmops/MeiliDB/blob/550dc1e99224e386516877450320f694947332d4/src/database/database_view.rs#L60-L76) that allows you to retrieve a full (or not) document according to the schema you specified at creation time (i.e. the _STORED_ attributes).
-
-As you can see, these functions force the created type `T` to implement [the serde Deserialize trait](https://docs.rs/serde/1.0.81/serde/trait.Deserialize.html), MeiliDB will use the `bincode::deserialise` function for each attribute to construct your type and return it to you.
-
+With this big list of documents and associated matches [we are able to sort only the part of the slice that we want](https://github.com/meilisearch/MeiliDB/blob/3db823de002243004612e36a19b4578d800dab97/meilidb-core/src/query_builder.rs#L160-L188) using bucket sorting. [Each criterion](https://github.com/meilisearch/MeiliDB/blob/3db823de002243004612e36a19b4578d800dab97/meilidb-core/src/criterion/mod.rs#L95-L101) is evaluated on each subslice without copy, thanks to [GroupByMut](https://docs.rs/slice-group-by/0.2.4/slice_group_by/) which, I hope [will soon be merged](https://github.com/rust-lang/rfcs/pull/2477).

+Note that it is possible to customize the criteria used by using the `QueryBuilder::with_criteria` constructor, this way you can implement some custom ranking based on the document attributes using the appropriate structure and the [`document` method](https://github.com/meilisearch/MeiliDB/blob/3db823de002243004612e36a19b4578d800dab97/meilidb-data/src/database/index.rs#L86).

 At this point, MeiliDB work is over 🎉
-
--- a/examples/create-database.rs
+++ b/examples/create-database.rs
@ -1,98 +0,0 @@
-use std::collections::hash_map::DefaultHasher;
-use std::path::{Path, PathBuf};
-use std::hash::{Hash, Hasher};
-use std::error::Error;
-
-use serde_derive::{Serialize, Deserialize};
-use structopt::StructOpt;
-
-use meilidb::database::schema::{Schema, SchemaBuilder, STORED, INDEXED};
-use meilidb::database::update::PositiveUpdateBuilder;
-use meilidb::tokenizer::DefaultBuilder;
-use meilidb::database::Database;
-
-#[derive(Debug, StructOpt)]
-pub struct Opt {
-    /// The destination where the database must be created
-    #[structopt(parse(from_os_str))]
-    pub database_path: PathBuf,
-
-    /// The csv file to index.
-    #[structopt(parse(from_os_str))]
-    pub csv_data_path: PathBuf,
-}
-
-#[derive(Debug, Serialize, Deserialize)]
-struct Document<'a> {
-    id: &'a str,
-    title: &'a str,
-    description: &'a str,
-    image: &'a str,
-}
-
-fn calculate_hash<T: Hash>(t: &T) -> u64 {
-    let mut s = DefaultHasher::new();
-    t.hash(&mut s);
-    s.finish()
-}
-
-fn create_schema() -> Schema {
-    let mut schema = SchemaBuilder::new();
-    schema.new_attribute("id", STORED);
-    schema.new_attribute("title", STORED | INDEXED);
-    schema.new_attribute("description", STORED | INDEXED);
-    schema.new_attribute("image", STORED);
-    schema.build()
-}
-
-fn index(schema: Schema, database_path: &Path, csv_data_path: &Path) -> Result<Database, Box<Error>> {
-    let database = Database::create(database_path, schema.clone())?;
-
-    println!("start indexing...");
-
-    let tokenizer_builder = DefaultBuilder::new();
-    let update_path = tempfile::NamedTempFile::new()?;
-    let mut update = PositiveUpdateBuilder::new(update_path.path(), schema, tokenizer_builder);
-
-    let mut rdr = csv::Reader::from_path(csv_data_path)?;
-    let mut raw_record = csv::StringRecord::new();
-    let headers = rdr.headers()?.clone();
-
-    while rdr.read_record(&mut raw_record)? {
-        let document: Document = match raw_record.deserialize(Some(&headers)) {
-            Ok(document) => document,
-            Err(e) => {
-                eprintln!("{:?}", e);
-                continue;
-            }
-        };
-
-        let document_id = calculate_hash(&document.id);
-        update.update(document_id, &document).unwrap();
-    }
-
-    let mut update = update.build()?;
-
-    update.set_move(true);
-    database.ingest_update_file(update)?;
-
-    Ok(database)
-}
-
-fn main() -> Result<(), Box<Error>> {
-    let opt = Opt::from_args();
-
-    let schema = create_schema();
-
-    let (elapsed, result) = elapsed::measure_time(|| {
-        index(schema, &opt.database_path, &opt.csv_data_path)
-    });
-
-    if let Err(e) = result {
-        return Err(e.into())
-    }
-
-    println!("database created in {} at: {:?}", elapsed, opt.database_path);
-
-    Ok(())
-}
--- a/examples/kaggle/kaggle.csv
+++ b/examples/kaggle/kaggle.csv
--- a/examples/kaggle/schema-kaggle.toml
+++ b/examples/kaggle/schema-kaggle.toml
@ -0,0 +1,19 @@
+# This schema has been generated ...
+# The order in which the attributes are declared is important,
+# it specify the attribute xxx...
+
+identifier = "id"
+
+[attributes.id]
+displayed = true
+
+[attributes.title]
+displayed = true
+indexed = true
+
+[attributes.description]
+displayed = true
+indexed = true
+
+[attributes.image]
+displayed = true
--- a/examples/movies/README.md
+++ b/examples/movies/README.md
@ -0,0 +1 @@
+_datas in movies.csv are from https://www.themoviedb.org/_
--- a/examples/movies/movies.csv
+++ b/examples/movies/movies.csv
--- a/examples/movies/schema-movies.toml
+++ b/examples/movies/schema-movies.toml
@ -0,0 +1,21 @@
+# This schema has been generated ...
+# The order in which the attributes are declared is important,
+# it specify the attribute xxx...
+identifier = "id"
+
+[attributes.id]
+displayed = true
+
+[attributes.title]
+displayed = true
+indexed = true
+
+[attributes.overview]
+displayed = true
+indexed = true
+
+[attributes.release_date]
+displayed = true
+
+[attributes.poster]
+displayed = true
--- a/examples/query-database.rs
+++ b/examples/query-database.rs
@ -1,68 +0,0 @@
-use std::io::{self, Write};
-use std::path::PathBuf;
-use std::error::Error;
-
-use serde_derive::{Serialize, Deserialize};
-use structopt::StructOpt;
-
-use meilidb::database::Database;
-
-#[derive(Debug, StructOpt)]
-pub struct Opt {
-    /// The destination where the database must be created
-    #[structopt(parse(from_os_str))]
-    pub database_path: PathBuf,
-
-    /// The number of returned results
-    #[structopt(short = "n", long = "number-results", default_value = "10")]
-    pub number_results: usize,
-}
-
-#[derive(Debug, Serialize, Deserialize)]
-struct Document {
-    id: String,
-    title: String,
-    description: String,
-    image: String,
-}
-
-fn main() -> Result<(), Box<Error>> {
-    let opt = Opt::from_args();
-
-    let (elapsed, result) = elapsed::measure_time(|| Database::open(&opt.database_path));
-    let database = result?;
-    println!("database prepared for you in {}", elapsed);
-
-    let mut buffer = String::new();
-    let input = io::stdin();
-
-    loop {
-        print!("Searching for: ");
-        io::stdout().flush()?;
-
-        if input.read_line(&mut buffer)? == 0 { break }
-
-        let view = database.view();
-
-        let (elapsed, documents) = elapsed::measure_time(|| {
-            let builder = view.query_builder().unwrap();
-            builder.query(&buffer, 0..opt.number_results)
-        });
-
-        let mut full_documents = Vec::with_capacity(documents.len());
-
-        for document in documents {
-            match view.retrieve_document::<Document>(document.id) {
-                Ok(document) => full_documents.push(document),
-                Err(e) => eprintln!("{}", e),
-            }
-        }
-
-        println!("{:#?}", full_documents);
-        println!("Found {} results in {}", full_documents.len(), elapsed);
-
-        buffer.clear();
-    }
-
-    Ok(())
-}
--- a/meilidb-core/Cargo.toml
+++ b/meilidb-core/Cargo.toml
@ -0,0 +1,34 @@
+[package]
+name = "meilidb-core"
+version = "0.1.0"
+authors = ["Kerollmops <renault.cle@gmail.com>"]
+edition = "2018"
+
+[dependencies]
+byteorder = "1.3.1"
+deunicode = "1.0.0"
+hashbrown = "0.6.0"
+lazy_static = "1.2.0"
+log = "0.4.6"
+meilidb-tokenizer = { path = "../meilidb-tokenizer", version = "0.1.0" }
+rayon = "1.2.0"
+sdset = "0.3.2"
+serde = { version = "1.0.88", features = ["derive"] }
+slice-group-by = "0.2.6"
+zerocopy = "0.2.8"
+
+[dependencies.fst]
+git = "https://github.com/Kerollmops/fst.git"
+branch = "arc-byte-slice"
+
+[dependencies.levenshtein_automata]
+git = "https://github.com/Kerollmops/levenshtein-automata.git"
+branch = "arc-byte-slice"
+features = ["fst_automaton"]
+
+[dev-dependencies]
+assert_matches = "1.3"
+
+[features]
+i128 = ["byteorder/i128"]
+nightly = ["hashbrown/nightly", "slice-group-by/nightly"]
--- a/meilidb-core/src/automaton.rs
+++ b/meilidb-core/src/automaton.rs
@ -0,0 +1,44 @@
+use lazy_static::lazy_static;
+use levenshtein_automata::{
+    LevenshteinAutomatonBuilder as LevBuilder,
+    DFA,
+};
+
+lazy_static! {
+    static ref LEVDIST0: LevBuilder = LevBuilder::new(0, false);
+    static ref LEVDIST1: LevBuilder = LevBuilder::new(1, false);
+    static ref LEVDIST2: LevBuilder = LevBuilder::new(2, false);
+}
+
+#[derive(Copy, Clone)]
+enum PrefixSetting {
+    Prefix,
+    NoPrefix,
+}
+
+fn build_dfa_with_setting(query: &str, setting: PrefixSetting) -> DFA {
+    use self::PrefixSetting::{Prefix, NoPrefix};
+
+    match query.len() {
+        0 ..= 4 => match setting {
+            Prefix   => LEVDIST0.build_prefix_dfa(query),
+            NoPrefix => LEVDIST0.build_dfa(query),
+        },
+        5 ..= 8 => match setting {
+            Prefix   => LEVDIST1.build_prefix_dfa(query),
+            NoPrefix => LEVDIST1.build_dfa(query),
+        },
+        _ => match setting {
+            Prefix   => LEVDIST2.build_prefix_dfa(query),
+            NoPrefix => LEVDIST2.build_dfa(query),
+        },
+    }
+}
+
+pub fn build_prefix_dfa(query: &str) -> DFA {
+    build_dfa_with_setting(query, PrefixSetting::Prefix)
+}
+
+pub fn build_dfa(query: &str) -> DFA {
+    build_dfa_with_setting(query, PrefixSetting::NoPrefix)
+}
--- a/meilidb-core/src/criterion/document_id.rs
+++ b/meilidb-core/src/criterion/document_id.rs
@ -0,0 +1,16 @@
+use std::cmp::Ordering;
+use crate::criterion::Criterion;
+use crate::RawDocument;
+
+#[derive(Debug, Clone, Copy)]
+pub struct DocumentId;
+
+impl Criterion for DocumentId {
+    fn evaluate(&self, lhs: &RawDocument, rhs: &RawDocument) -> Ordering {
+        lhs.id.cmp(&rhs.id)
+    }
+
+    fn name(&self) -> &'static str {
+        "DocumentId"
+    }
+}
--- a/meilidb-core/src/criterion/exact.rs
+++ b/meilidb-core/src/criterion/exact.rs
@ -0,0 +1,65 @@
+use std::cmp::Ordering;
+use slice_group_by::GroupBy;
+use crate::criterion::Criterion;
+use crate::RawDocument;
+
+#[inline]
+fn number_exact_matches(query_index: &[u32], is_exact: &[bool]) -> usize {
+    let mut count = 0;
+    let mut index = 0;
+
+    for group in query_index.linear_group() {
+        let len = group.len();
+        count += is_exact[index..index + len].contains(&true) as usize;
+        index += len;
+    }
+
+    count
+}
+
+#[derive(Debug, Clone, Copy)]
+pub struct Exact;
+
+impl Criterion for Exact {
+    fn evaluate(&self, lhs: &RawDocument, rhs: &RawDocument) -> Ordering {
+        let lhs = {
+            let query_index = lhs.query_index();
+            let is_exact = lhs.is_exact();
+            number_exact_matches(query_index, is_exact)
+        };
+
+        let rhs = {
+            let query_index = rhs.query_index();
+            let is_exact = rhs.is_exact();
+            number_exact_matches(query_index, is_exact)
+        };
+
+        lhs.cmp(&rhs).reverse()
+    }
+
+    fn name(&self) -> &'static str {
+        "Exact"
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    // typing: "soulier"
+    //
+    // doc0: "Soulier bleu"
+    // doc1: "souliereres rouge"
+    #[test]
+    fn easy_case() {
+        let query_index0 = &[0];
+        let is_exact0 = &[true];
+
+        let query_index1 = &[0];
+        let is_exact1 = &[false];
+
+        let doc0 = number_exact_matches(query_index0, is_exact0);
+        let doc1 = number_exact_matches(query_index1, is_exact1);
+        assert_eq!(doc0.cmp(&doc1).reverse(), Ordering::Less);
+    }
+}
--- a/meilidb-core/src/criterion/mod.rs
+++ b/meilidb-core/src/criterion/mod.rs
@ -0,0 +1,120 @@
+mod sum_of_typos;
+mod number_of_words;
+mod words_proximity;
+mod sum_of_words_attribute;
+mod sum_of_words_position;
+mod exact;
+mod document_id;
+
+use std::cmp::Ordering;
+use crate::RawDocument;
+
+pub use self::{
+    sum_of_typos::SumOfTypos,
+    number_of_words::NumberOfWords,
+    words_proximity::WordsProximity,
+    sum_of_words_attribute::SumOfWordsAttribute,
+    sum_of_words_position::SumOfWordsPosition,
+    exact::Exact,
+    document_id::DocumentId,
+};
+
+pub trait Criterion: Send + Sync {
+    fn evaluate(&self, lhs: &RawDocument, rhs: &RawDocument) -> Ordering;
+
+    fn name(&self) -> &'static str;
+
+    #[inline]
+    fn eq(&self, lhs: &RawDocument, rhs: &RawDocument) -> bool {
+        self.evaluate(lhs, rhs) == Ordering::Equal
+    }
+}
+
+impl<'a, T: Criterion + ?Sized + Send + Sync> Criterion for &'a T {
+    fn evaluate(&self, lhs: &RawDocument, rhs: &RawDocument) -> Ordering {
+        (**self).evaluate(lhs, rhs)
+    }
+
+    fn name(&self) -> &'static str {
+        (**self).name()
+    }
+
+    fn eq(&self, lhs: &RawDocument, rhs: &RawDocument) -> bool {
+        (**self).eq(lhs, rhs)
+    }
+}
+
+impl<T: Criterion + ?Sized> Criterion for Box<T> {
+    fn evaluate(&self, lhs: &RawDocument, rhs: &RawDocument) -> Ordering {
+        (**self).evaluate(lhs, rhs)
+    }
+
+    fn name(&self) -> &'static str {
+        (**self).name()
+    }
+
+    fn eq(&self, lhs: &RawDocument, rhs: &RawDocument) -> bool {
+        (**self).eq(lhs, rhs)
+    }
+}
+
+#[derive(Default)]
+pub struct CriteriaBuilder<'a> {
+    inner: Vec<Box<dyn Criterion + 'a>>
+}
+
+impl<'a> CriteriaBuilder<'a>
+{
+    pub fn new() -> CriteriaBuilder<'a> {
+        CriteriaBuilder { inner: Vec::new() }
+    }
+
+    pub fn with_capacity(capacity: usize) -> CriteriaBuilder<'a> {
+        CriteriaBuilder { inner: Vec::with_capacity(capacity) }
+    }
+
+    pub fn reserve(&mut self, additional: usize) {
+        self.inner.reserve(additional)
+    }
+
+    pub fn add<C: 'a>(mut self, criterion: C) -> CriteriaBuilder<'a>
+    where C: Criterion,
+    {
+        self.push(criterion);
+        self
+    }
+
+    pub fn push<C: 'a>(&mut self, criterion: C)
+    where C: Criterion,
+    {
+        self.inner.push(Box::new(criterion));
+    }
+
+    pub fn build(self) -> Criteria<'a> {
+        Criteria { inner: self.inner }
+    }
+}
+
+pub struct Criteria<'a> {
+    inner: Vec<Box<dyn Criterion + 'a>>,
+}
+
+impl<'a> Default for Criteria<'a> {
+    fn default() -> Self {
+        CriteriaBuilder::with_capacity(7)
+            .add(SumOfTypos)
+            .add(NumberOfWords)
+            .add(WordsProximity)
+            .add(SumOfWordsAttribute)
+            .add(SumOfWordsPosition)
+            .add(Exact)
+            .add(DocumentId)
+            .build()
+    }
+}
+
+impl<'a> AsRef<[Box<dyn Criterion + 'a>]> for Criteria<'a> {
+    fn as_ref(&self) -> &[Box<dyn Criterion + 'a>] {
+        &self.inner
+    }
+}
--- a/meilidb-core/src/criterion/number_of_words.rs
+++ b/meilidb-core/src/criterion/number_of_words.rs
@ -0,0 +1,31 @@
+use std::cmp::Ordering;
+use slice_group_by::GroupBy;
+use crate::criterion::Criterion;
+use crate::RawDocument;
+
+#[inline]
+fn number_of_query_words(query_index: &[u32]) -> usize {
+    query_index.linear_group().count()
+}
+
+#[derive(Debug, Clone, Copy)]
+pub struct NumberOfWords;
+
+impl Criterion for NumberOfWords {
+    fn evaluate(&self, lhs: &RawDocument, rhs: &RawDocument) -> Ordering {
+        let lhs = {
+            let query_index = lhs.query_index();
+            number_of_query_words(query_index)
+        };
+        let rhs = {
+            let query_index = rhs.query_index();
+            number_of_query_words(query_index)
+        };
+
+        lhs.cmp(&rhs).reverse()
+    }
+
+    fn name(&self) -> &'static str {
+        "NumberOfWords"
+    }
+}
--- a/meilidb-core/src/criterion/sum_of_typos.rs
+++ b/meilidb-core/src/criterion/sum_of_typos.rs
@ -0,0 +1,116 @@
+use std::cmp::Ordering;
+
+use slice_group_by::GroupBy;
+
+use crate::criterion::Criterion;
+use crate::RawDocument;
+
+// This function is a wrong logarithmic 10 function.
+// It is safe to panic on input number higher than 3,
+// the number of typos is never bigger than that.
+#[inline]
+fn custom_log10(n: u8) -> f32 {
+    match n {
+        0 => 0.0,       // log(1)
+        1 => 0.30102,   // log(2)
+        2 => 0.47712,   // log(3)
+        3 => 0.60205,   // log(4)
+        _ => panic!("invalid number"),
+    }
+}
+
+#[inline]
+fn sum_matches_typos(query_index: &[u32], distance: &[u8]) -> usize {
+    let mut number_words: usize = 0;
+    let mut sum_typos = 0.0;
+    let mut index = 0;
+
+    for group in query_index.linear_group() {
+        sum_typos += custom_log10(distance[index]);
+        number_words += 1;
+        index += group.len();
+    }
+
+    (number_words as f32 / (sum_typos + 1.0) * 1000.0) as usize
+}
+
+#[derive(Debug, Clone, Copy)]
+pub struct SumOfTypos;
+
+impl Criterion for SumOfTypos {
+    fn evaluate(&self, lhs: &RawDocument, rhs: &RawDocument) -> Ordering {
+        let lhs = {
+            let query_index = lhs.query_index();
+            let distance = lhs.distance();
+            sum_matches_typos(query_index, distance)
+        };
+
+        let rhs = {
+            let query_index = rhs.query_index();
+            let distance = rhs.distance();
+            sum_matches_typos(query_index, distance)
+        };
+
+        lhs.cmp(&rhs).reverse()
+    }
+
+    fn name(&self) -> &'static str {
+        "SumOfTypos"
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    // typing: "Geox CEO"
+    //
+    // doc0: "Geox SpA: CEO and Executive"
+    // doc1: "Mt. Gox CEO Resigns From Bitcoin Foundation"
+    #[test]
+    fn one_typo_reference() {
+        let query_index0 = &[0, 1];
+        let distance0 = &[0, 0];
+
+        let query_index1 = &[0, 1];
+        let distance1 = &[1, 0];
+
+        let doc0 = sum_matches_typos(query_index0, distance0);
+        let doc1 = sum_matches_typos(query_index1, distance1);
+        assert_eq!(doc0.cmp(&doc1).reverse(), Ordering::Less);
+    }
+
+    // typing: "bouton manchette"
+    //
+    // doc0: "bouton manchette"
+    // doc1: "bouton"
+    #[test]
+    fn no_typo() {
+        let query_index0 = &[0, 1];
+        let distance0 = &[0, 0];
+
+        let query_index1 = &[0];
+        let distance1 = &[0];
+
+        let doc0 = sum_matches_typos(query_index0, distance0);
+        let doc1 = sum_matches_typos(query_index1, distance1);
+        assert_eq!(doc0.cmp(&doc1).reverse(), Ordering::Less);
+    }
+
+    // typing: "bouton manchztte"
+    //
+    // doc0: "bouton manchette"
+    // doc1: "bouton"
+    #[test]
+    fn one_typo() {
+        let query_index0 = &[0, 1];
+        let distance0 = &[0, 1];
+
+        let query_index1 = &[0];
+        let distance1 = &[0];
+
+        let doc0 = sum_matches_typos(query_index0, distance0);
+        let doc1 = sum_matches_typos(query_index1, distance1);
+        assert_eq!(doc0.cmp(&doc1).reverse(), Ordering::Less);
+    }
+}
--- a/meilidb-core/src/criterion/sum_of_words_attribute.rs
+++ b/meilidb-core/src/criterion/sum_of_words_attribute.rs
@ -0,0 +1,64 @@
+use std::cmp::Ordering;
+use slice_group_by::GroupBy;
+use crate::criterion::Criterion;
+use crate::RawDocument;
+
+#[inline]
+fn sum_matches_attributes(query_index: &[u32], attribute: &[u16]) -> usize {
+    let mut sum_attributes = 0;
+    let mut index = 0;
+
+    for group in query_index.linear_group() {
+        sum_attributes += attribute[index] as usize;
+        index += group.len();
+    }
+
+    sum_attributes
+}
+
+#[derive(Debug, Clone, Copy)]
+pub struct SumOfWordsAttribute;
+
+impl Criterion for SumOfWordsAttribute {
+    fn evaluate(&self, lhs: &RawDocument, rhs: &RawDocument) -> Ordering {
+        let lhs = {
+            let query_index = lhs.query_index();
+            let attribute = lhs.attribute();
+            sum_matches_attributes(query_index, attribute)
+        };
+
+        let rhs = {
+            let query_index = rhs.query_index();
+            let attribute = rhs.attribute();
+            sum_matches_attributes(query_index, attribute)
+        };
+
+        lhs.cmp(&rhs)
+    }
+
+    fn name(&self) -> &'static str {
+        "SumOfWordsAttribute"
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    // typing: "soulier"
+    //
+    // doc0: { 0. "Soulier bleu", 1. "bla bla bla" }
+    // doc1: { 0. "Botte rouge", 1. "Soulier en cuir" }
+    #[test]
+    fn title_vs_description() {
+        let query_index0 = &[0];
+        let attribute0 = &[0];
+
+        let query_index1 = &[0];
+        let attribute1 = &[1];
+
+        let doc0 = sum_matches_attributes(query_index0, attribute0);
+        let doc1 = sum_matches_attributes(query_index1, attribute1);
+        assert_eq!(doc0.cmp(&doc1), Ordering::Less);
+    }
+}
--- a/meilidb-core/src/criterion/sum_of_words_position.rs
+++ b/meilidb-core/src/criterion/sum_of_words_position.rs
@ -0,0 +1,64 @@
+use std::cmp::Ordering;
+use slice_group_by::GroupBy;
+use crate::criterion::Criterion;
+use crate::RawDocument;
+
+#[inline]
+fn sum_matches_attribute_index(query_index: &[u32], word_index: &[u16]) -> usize {
+    let mut sum_word_index = 0;
+    let mut index = 0;
+
+    for group in query_index.linear_group() {
+        sum_word_index += word_index[index] as usize;
+        index += group.len();
+    }
+
+    sum_word_index
+}
+
+#[derive(Debug, Clone, Copy)]
+pub struct SumOfWordsPosition;
+
+impl Criterion for SumOfWordsPosition {
+    fn evaluate(&self, lhs: &RawDocument, rhs: &RawDocument) -> Ordering {
+        let lhs = {
+            let query_index = lhs.query_index();
+            let word_index = lhs.word_index();
+            sum_matches_attribute_index(query_index, word_index)
+        };
+
+        let rhs = {
+            let query_index = rhs.query_index();
+            let word_index = rhs.word_index();
+            sum_matches_attribute_index(query_index, word_index)
+        };
+
+        lhs.cmp(&rhs)
+    }
+
+    fn name(&self) -> &'static str {
+        "SumOfWordsPosition"
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    // typing: "soulier"
+    //
+    // doc0: "Soulier bleu"
+    // doc1: "Botte rouge et soulier noir"
+    #[test]
+    fn easy_case() {
+        let query_index0 = &[0];
+        let word_index0 = &[0];
+
+        let query_index1 = &[0];
+        let word_index1 = &[3];
+
+        let doc0 = sum_matches_attribute_index(query_index0, word_index0);
+        let doc1 = sum_matches_attribute_index(query_index1, word_index1);
+        assert_eq!(doc0.cmp(&doc1), Ordering::Less);
+    }
+}
--- a/meilidb-core/src/criterion/words_proximity.rs
+++ b/meilidb-core/src/criterion/words_proximity.rs
@ -0,0 +1,155 @@
+use std::cmp::{self, Ordering};
+use slice_group_by::GroupBy;
+use crate::criterion::Criterion;
+use crate::RawDocument;
+
+const MAX_DISTANCE: u16 = 8;
+
+#[inline]
+fn clone_tuple<T: Clone, U: Clone>((a, b): (&T, &U)) -> (T, U) {
+    (a.clone(), b.clone())
+}
+
+fn index_proximity(lhs: u16, rhs: u16) -> u16 {
+    if lhs < rhs {
+        cmp::min(rhs - lhs, MAX_DISTANCE)
+    } else {
+        cmp::min(lhs - rhs, MAX_DISTANCE) + 1
+    }
+}
+
+fn attribute_proximity((lattr, lwi): (u16, u16), (rattr, rwi): (u16, u16)) -> u16 {
+    if lattr != rattr { return MAX_DISTANCE }
+    index_proximity(lwi, rwi)
+}
+
+fn min_proximity((lattr, lwi): (&[u16], &[u16]), (rattr, rwi): (&[u16], &[u16])) -> u16 {
+    let mut min_prox = u16::max_value();
+
+    for a in lattr.iter().zip(lwi) {
+        for b in rattr.iter().zip(rwi) {
+            let a = clone_tuple(a);
+            let b = clone_tuple(b);
+            min_prox = cmp::min(min_prox, attribute_proximity(a, b));
+        }
+    }
+
+    min_prox
+}
+
+fn matches_proximity(
+    query_index: &[u32],
+    distance: &[u8],
+    attribute: &[u16],
+    word_index: &[u16],
+) -> u16
+{
+    let mut query_index_groups = query_index.linear_group();
+    let mut proximity = 0;
+    let mut index = 0;
+
+    let get_attr_wi = |index: usize, group_len: usize| {
+        // retrieve the first distance group (with the lowest values)
+        let len = distance[index..index + group_len].linear_group().next().unwrap().len();
+
+        let rattr = &attribute[index..index + len];
+        let rwi = &word_index[index..index + len];
+
+        (rattr, rwi)
+    };
+
+    let mut last = query_index_groups.next().map(|group| {
+        let attr_wi = get_attr_wi(index, group.len());
+        index += group.len();
+        attr_wi
+    });
+
+    // iter by windows of size 2
+    while let (Some(lhs), Some(rhs)) = (last, query_index_groups.next()) {
+        let attr_wi = get_attr_wi(index, rhs.len());
+        proximity += min_proximity(lhs, attr_wi);
+        last = Some(attr_wi);
+        index += rhs.len();
+    }
+
+    proximity
+}
+
+#[derive(Debug, Clone, Copy)]
+pub struct WordsProximity;
+
+impl Criterion for WordsProximity {
+    fn evaluate(&self, lhs: &RawDocument, rhs: &RawDocument) -> Ordering {
+        let lhs = {
+            let query_index = lhs.query_index();
+            let distance = lhs.distance();
+            let attribute = lhs.attribute();
+            let word_index = lhs.word_index();
+            matches_proximity(query_index, distance, attribute, word_index)
+        };
+
+        let rhs = {
+            let query_index = rhs.query_index();
+            let distance = rhs.distance();
+            let attribute = rhs.attribute();
+            let word_index = rhs.word_index();
+            matches_proximity(query_index, distance, attribute, word_index)
+        };
+
+        lhs.cmp(&rhs)
+    }
+
+    fn name(&self) -> &'static str {
+        "WordsProximity"
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn three_different_attributes() {
+
+        // "soup" "of the" "the day"
+        //
+        // { id: 0, attr: 0, attr_index: 0 }
+        // { id: 1, attr: 1, attr_index: 0 }
+        // { id: 2, attr: 1, attr_index: 1 }
+        // { id: 2, attr: 2, attr_index: 0 }
+        // { id: 3, attr: 3, attr_index: 1 }
+
+        let query_index = &[0, 1, 2, 2, 3];
+        let distance    = &[0, 0, 0, 0, 0];
+        let attribute   = &[0, 1, 1, 2, 3];
+        let word_index  = &[0, 0, 1, 0, 1];
+
+        //   soup -> of = 8
+        // + of -> the  = 1
+        // + the -> day = 8 (not 1)
+        assert_eq!(matches_proximity(query_index, distance, attribute, word_index), 17);
+    }
+
+    #[test]
+    fn two_different_attributes() {
+
+        // "soup day" "soup of the day"
+        //
+        // { id: 0, attr: 0, attr_index: 0 }
+        // { id: 0, attr: 1, attr_index: 0 }
+        // { id: 1, attr: 1, attr_index: 1 }
+        // { id: 2, attr: 1, attr_index: 2 }
+        // { id: 3, attr: 0, attr_index: 1 }
+        // { id: 3, attr: 1, attr_index: 3 }
+
+        let query_index = &[0, 0, 1, 2, 3, 3];
+        let distance    = &[0, 0, 0, 0, 0, 0];
+        let attribute   = &[0, 1, 1, 1, 0, 1];
+        let word_index  = &[0, 0, 1, 2, 1, 3];
+
+        //   soup -> of = 1
+        // + of -> the  = 1
+        // + the -> day = 1
+        assert_eq!(matches_proximity(query_index, distance, attribute, word_index), 3);
+    }
+}
--- a/meilidb-core/src/distinct_map.rs
+++ b/meilidb-core/src/distinct_map.rs
@ -1,5 +1,4 @@
 use std::hash::Hash;
-
 use hashbrown::HashMap;

 pub struct DistinctMap<K> {
@ -12,7 +11,7 @@ impl<K: Hash + Eq> DistinctMap<K> {
    pub fn new(limit: usize) -> Self {
        DistinctMap {
            inner: HashMap::new(),
-            limit: limit,
+            limit,
            len: 0,
        }
    }
@ -31,7 +30,7 @@ pub struct BufferedDistinctMap<'a, K> {
 impl<'a, K: Hash + Eq> BufferedDistinctMap<'a, K> {
    pub fn new(internal: &'a mut DistinctMap<K>) -> BufferedDistinctMap<'a, K> {
        BufferedDistinctMap {
-            internal: internal,
+            internal,
            inner: HashMap::new(),
            len: 0,
        }
--- a/meilidb-core/src/lib.rs
+++ b/meilidb-core/src/lib.rs
@ -0,0 +1,144 @@
+#![feature(checked_duration_since)]
+
+#[cfg(test)]
+#[macro_use] extern crate assert_matches;
+
+mod automaton;
+mod distinct_map;
+mod query_builder;
+mod query_enhancer;
+mod raw_document;
+mod reordered_attrs;
+mod store;
+pub mod criterion;
+
+use serde::{Serialize, Deserialize};
+use zerocopy::{AsBytes, FromBytes};
+
+use self::raw_document::raw_documents_from;
+
+pub use self::query_builder::{QueryBuilder, DistinctQueryBuilder, normalize_str};
+pub use self::raw_document::RawDocument;
+pub use self::store::Store;
+
+/// Represent an internally generated document unique identifier.
+///
+/// It is used to inform the database the document you want to deserialize.
+/// Helpful for custom ranking.
+#[derive(Debug, Copy, Clone, Eq, PartialEq, PartialOrd, Ord, Hash)]
+#[derive(Serialize, Deserialize)]
+#[derive(AsBytes, FromBytes)]
+#[repr(C)]
+pub struct DocumentId(pub u64);
+
+/// This structure represent the position of a word
+/// in a document and its attributes.
+///
+/// This is stored in the map, generated at index time,
+/// extracted and interpreted at search time.
+#[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
+#[derive(AsBytes, FromBytes)]
+#[repr(C)]
+pub struct DocIndex {
+    /// The document identifier where the word was found.
+    pub document_id: DocumentId,
+
+    /// The attribute in the document where the word was found
+    /// along with the index in it.
+    pub attribute: u16,
+    pub word_index: u16,
+
+    /// The position in bytes where the word was found
+    /// along with the length of it.
+    ///
+    /// It informs on the original word area in the text indexed
+    /// without needing to run the tokenizer again.
+    pub char_index: u16,
+    pub char_length: u16,
+}
+
+/// This structure represent a matching word with informations
+/// on the location of the word in the document.
+///
+/// The order of the field is important because it defines
+/// the way these structures are ordered between themselves.
+#[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
+pub struct Highlight {
+    /// The attribute in the document where the word was found
+    /// along with the index in it.
+    pub attribute: u16,
+
+    /// The position in bytes where the word was found.
+    ///
+    /// It informs on the original word area in the text indexed
+    /// without needing to run the tokenizer again.
+    pub char_index: u16,
+
+    /// The length in bytes of the found word.
+    ///
+    /// It informs on the original word area in the text indexed
+    /// without needing to run the tokenizer again.
+    pub char_length: u16,
+}
+
+#[doc(hidden)]
+#[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
+pub struct TmpMatch {
+    pub query_index: u32,
+    pub distance: u8,
+    pub attribute: u16,
+    pub word_index: u16,
+    pub is_exact: bool,
+}
+
+#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
+pub struct Document {
+    pub id: DocumentId,
+    pub highlights: Vec<Highlight>,
+
+    #[cfg(test)]
+    pub matches: Vec<TmpMatch>,
+}
+
+impl Document {
+    #[cfg(not(test))]
+    fn from_raw(raw: RawDocument) -> Document {
+        Document { id: raw.id, highlights: raw.highlights }
+    }
+
+    #[cfg(test)]
+    fn from_raw(raw: RawDocument) -> Document {
+        let len = raw.query_index().len();
+        let mut matches = Vec::with_capacity(len);
+
+        let query_index = raw.query_index();
+        let distance = raw.distance();
+        let attribute = raw.attribute();
+        let word_index = raw.word_index();
+        let is_exact = raw.is_exact();
+
+        for i in 0..len {
+            let match_ = TmpMatch {
+                query_index: query_index[i],
+                distance: distance[i],
+                attribute: attribute[i],
+                word_index: word_index[i],
+                is_exact: is_exact[i],
+            };
+            matches.push(match_);
+        }
+
+        Document { id: raw.id, matches, highlights: raw.highlights }
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use std::mem;
+
+    #[test]
+    fn docindex_mem_size() {
+        assert_eq!(mem::size_of::<DocIndex>(), 16);
+    }
+}
--- a/meilidb-core/src/query_builder.rs
+++ b/meilidb-core/src/query_builder.rs
--- a/meilidb-core/src/query_enhancer.rs
+++ b/meilidb-core/src/query_enhancer.rs
@ -0,0 +1,398 @@
+use std::ops::Range;
+use std::cmp::Ordering::{Less, Greater, Equal};
+
+/// Return `true` if the specified range can accept the given replacements words.
+/// Returns `false` if the replacements words are already present in the original query
+/// or if there is fewer replacement words than the range to replace.
+//
+//
+// ## Ignored because already present in original
+//
+//     new york city subway
+//     -------- ^^^^
+//   /          \
+//  [new york city]
+//
+//
+// ## Ignored because smaller than the original
+//
+//   new york city subway
+//   -------------
+//   \          /
+//    [new york]
+//
+//
+// ## Accepted because bigger than the original
+//
+//        NYC subway
+//        ---
+//       /   \
+//      /     \
+//     /       \
+//    /         \
+//   /           \
+//  [new york city]
+//
+fn rewrite_range_with<S, T>(query: &[S], range: Range<usize>, words: &[T]) -> bool
+where S: AsRef<str>,
+      T: AsRef<str>,
+{
+    if words.len() <= range.len() {
+        // there is fewer or equal replacement words
+        // than there is already in the replaced range
+        return false
+    }
+
+    // retrieve the part to rewrite but with the length
+    // of the replacement part
+    let original = query.iter().skip(range.start).take(words.len());
+
+    // check if the original query doesn't already contain
+    // the replacement words
+    !original.map(AsRef::as_ref).eq(words.iter().map(AsRef::as_ref))
+}
+
+type Origin = usize;
+type RealLength = usize;
+
+struct FakeIntervalTree {
+    intervals: Vec<(Range<usize>, (Origin, RealLength))>,
+}
+
+impl FakeIntervalTree {
+    fn new(mut intervals: Vec<(Range<usize>, (Origin, RealLength))>) -> FakeIntervalTree {
+        intervals.sort_unstable_by_key(|(r, _)| (r.start, r.end));
+        FakeIntervalTree { intervals }
+    }
+
+    fn query(&self, point: usize) -> Option<(Range<usize>, (Origin, RealLength))> {
+        let element = self.intervals.binary_search_by(|(r, _)| {
+            if point >= r.start {
+                if point < r.end { Equal } else { Less }
+            } else { Greater }
+        });
+
+        let n = match element { Ok(n) => n, Err(n) => n };
+
+        match self.intervals.get(n) {
+            Some((range, value)) if range.contains(&point) => Some((range.clone(), *value)),
+            _otherwise => None,
+        }
+    }
+}
+
+pub struct QueryEnhancerBuilder<'a, S> {
+    query: &'a [S],
+    origins: Vec<usize>,
+    real_to_origin: Vec<(Range<usize>, (Origin, RealLength))>,
+}
+
+impl<S: AsRef<str>> QueryEnhancerBuilder<'_, S> {
+    pub fn new(query: &[S]) -> QueryEnhancerBuilder<S> {
+        // we initialize origins query indices based on their positions
+        let origins: Vec<_> = (0..query.len() + 1).collect();
+        let real_to_origin = origins.iter().map(|&o| (o..o+1, (o, 1))).collect();
+
+        QueryEnhancerBuilder { query, origins, real_to_origin }
+    }
+
+    /// Update the final real to origin query indices mapping.
+    ///
+    /// `range` is the original words range that this `replacement` words replace
+    /// and `real` is the first real query index of these replacement words.
+    pub fn declare<T>(&mut self, range: Range<usize>, real: usize, replacement: &[T])
+    where T: AsRef<str>,
+    {
+        // check if the range of original words
+        // can be rewritten with the replacement words
+        if rewrite_range_with(self.query, range.clone(), replacement) {
+
+            // this range can be replaced so we need to
+            // modify the origins accordingly
+            let offset = replacement.len() - range.len();
+
+            let previous_padding = self.origins[range.end - 1];
+            let current_offset = (self.origins[range.end] - 1) - previous_padding;
+            let diff = offset.saturating_sub(current_offset);
+            self.origins[range.end] += diff;
+
+            for r in &mut self.origins[range.end + 1..] {
+                *r += diff;
+            }
+        }
+
+        // we need to store the real number and origins relations
+        // this way it will be possible to know by how many
+        // we need to pad real query indices
+        let real_range = real..real + replacement.len().max(range.len());
+        let real_length = replacement.len();
+        self.real_to_origin.push((real_range, (range.start, real_length)));
+    }
+
+    pub fn build(self) -> QueryEnhancer {
+        QueryEnhancer {
+            origins: self.origins,
+            real_to_origin: FakeIntervalTree::new(self.real_to_origin),
+        }
+    }
+}
+
+pub struct QueryEnhancer {
+    origins: Vec<usize>,
+    real_to_origin: FakeIntervalTree,
+}
+
+impl QueryEnhancer {
+    /// Returns the query indices to use to replace this real query index.
+    pub fn replacement(&self, real: u32) -> Range<u32> {
+        let real = real as usize;
+
+        // query the fake interval tree with the real query index
+        let (range, (origin, real_length)) =
+            self.real_to_origin
+                .query(real)
+                .expect("real has never been declared");
+
+        // if `real` is the end bound of the range
+        if (range.start + real_length - 1) == real {
+            let mut count = range.len();
+            let mut new_origin = origin;
+            for (i, slice) in self.origins[new_origin..].windows(2).enumerate() {
+                let len = slice[1] - slice[0];
+                count = count.saturating_sub(len);
+                if count == 0 { new_origin = origin + i; break }
+            }
+
+            let n = real - range.start;
+            let start = self.origins[origin];
+            let end = self.origins[new_origin + 1];
+            let remaining = (end - start) - n;
+
+            Range { start: (start + n) as u32, end: (start + n + remaining) as u32 }
+
+        } else {
+            // just return the origin along with
+            // the real position of the word
+            let n = real as usize - range.start;
+            let origin = self.origins[origin];
+
+            Range { start: (origin + n) as u32, end: (origin + n + 1) as u32 }
+        }
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn original_unmodified() {
+        let query = ["new", "york", "city", "subway"];
+        //             0       1       2        3
+        let mut builder = QueryEnhancerBuilder::new(&query);
+
+        // new york = new york city
+        builder.declare(0..2, 4, &["new", "york", "city"]);
+        //                    ^      4       5       6
+
+        let enhancer = builder.build();
+
+        assert_eq!(enhancer.replacement(0), 0..1); // new
+        assert_eq!(enhancer.replacement(1), 1..2); // york
+        assert_eq!(enhancer.replacement(2), 2..3); // city
+        assert_eq!(enhancer.replacement(3), 3..4); // subway
+        assert_eq!(enhancer.replacement(4), 0..1); // new
+        assert_eq!(enhancer.replacement(5), 1..2); // york
+        assert_eq!(enhancer.replacement(6), 2..3); // city
+    }
+
+    #[test]
+    fn simple_growing() {
+        let query = ["new", "york", "subway"];
+        //             0       1        2
+        let mut builder = QueryEnhancerBuilder::new(&query);
+
+        // new york = new york city
+        builder.declare(0..2, 3, &["new", "york", "city"]);
+        //                    ^      3       4       5
+
+        let enhancer = builder.build();
+
+        assert_eq!(enhancer.replacement(0), 0..1); // new
+        assert_eq!(enhancer.replacement(1), 1..3); // york
+        assert_eq!(enhancer.replacement(2), 3..4); // subway
+        assert_eq!(enhancer.replacement(3), 0..1); // new
+        assert_eq!(enhancer.replacement(4), 1..2); // york
+        assert_eq!(enhancer.replacement(5), 2..3); // city
+    }
+
+    #[test]
+    fn same_place_growings() {
+        let query = ["NY", "subway"];
+        //             0       1
+        let mut builder = QueryEnhancerBuilder::new(&query);
+
+        // NY = new york
+        builder.declare(0..1, 2, &["new", "york"]);
+        //                    ^      2       3
+
+        // NY = new york city
+        builder.declare(0..1, 4, &["new", "york", "city"]);
+        //                    ^      4       5       6
+
+        // NY = NYC
+        builder.declare(0..1, 7, &["NYC"]);
+        //                    ^      7
+
+        // NY = new york city
+        builder.declare(0..1, 8, &["new", "york", "city"]);
+        //                    ^      8       9      10
+
+        // subway = underground train
+        builder.declare(1..2, 11, &["underground", "train"]);
+        //                    ^          11          12
+
+        let enhancer = builder.build();
+
+        assert_eq!(enhancer.replacement(0), 0..3); // NY
+        assert_eq!(enhancer.replacement(1), 3..5); // subway
+        assert_eq!(enhancer.replacement(2), 0..1); // new
+        assert_eq!(enhancer.replacement(3), 1..3); // york
+        assert_eq!(enhancer.replacement(4), 0..1); // new
+        assert_eq!(enhancer.replacement(5), 1..2); // york
+        assert_eq!(enhancer.replacement(6), 2..3); // city
+        assert_eq!(enhancer.replacement(7), 0..3); // NYC
+        assert_eq!(enhancer.replacement(8), 0..1); // new
+        assert_eq!(enhancer.replacement(9), 1..2); // york
+        assert_eq!(enhancer.replacement(10), 2..3); // city
+        assert_eq!(enhancer.replacement(11), 3..4); // underground
+        assert_eq!(enhancer.replacement(12), 4..5); // train
+    }
+
+    #[test]
+    fn bigger_growing() {
+        let query = ["NYC", "subway"];
+        //             0        1
+        let mut builder = QueryEnhancerBuilder::new(&query);
+
+        // NYC = new york city
+        builder.declare(0..1, 2, &["new", "york", "city"]);
+        //                    ^      2       3       4
+
+        let enhancer = builder.build();
+
+        assert_eq!(enhancer.replacement(0), 0..3); // NYC
+        assert_eq!(enhancer.replacement(1), 3..4); // subway
+        assert_eq!(enhancer.replacement(2), 0..1); // new
+        assert_eq!(enhancer.replacement(3), 1..2); // york
+        assert_eq!(enhancer.replacement(4), 2..3); // city
+    }
+
+    #[test]
+    fn middle_query_growing() {
+        let query = ["great", "awesome", "NYC", "subway"];
+        //              0         1        2        3
+        let mut builder = QueryEnhancerBuilder::new(&query);
+
+        // NYC = new york city
+        builder.declare(2..3, 4, &["new", "york", "city"]);
+        //                    ^      4       5       6
+
+        let enhancer = builder.build();
+
+        assert_eq!(enhancer.replacement(0), 0..1); // great
+        assert_eq!(enhancer.replacement(1), 1..2); // awesome
+        assert_eq!(enhancer.replacement(2), 2..5); // NYC
+        assert_eq!(enhancer.replacement(3), 5..6); // subway
+        assert_eq!(enhancer.replacement(4), 2..3); // new
+        assert_eq!(enhancer.replacement(5), 3..4); // york
+        assert_eq!(enhancer.replacement(6), 4..5); // city
+    }
+
+    #[test]
+    fn end_query_growing() {
+        let query = ["NYC", "subway"];
+        //             0        1
+        let mut builder = QueryEnhancerBuilder::new(&query);
+
+        // NYC = new york city
+        builder.declare(1..2, 2, &["underground", "train"]);
+        //                    ^         2            3
+
+        let enhancer = builder.build();
+
+        assert_eq!(enhancer.replacement(0), 0..1); // NYC
+        assert_eq!(enhancer.replacement(1), 1..3); // subway
+        assert_eq!(enhancer.replacement(2), 1..2); // underground
+        assert_eq!(enhancer.replacement(3), 2..3); // train
+    }
+
+    #[test]
+    fn multiple_growings() {
+        let query = ["great", "awesome", "NYC", "subway"];
+        //              0         1        2        3
+        let mut builder = QueryEnhancerBuilder::new(&query);
+
+        // NYC = new york city
+        builder.declare(2..3, 4, &["new", "york", "city"]);
+        //                    ^      4       5       6
+
+        // subway = underground train
+        builder.declare(3..4, 7, &["underground", "train"]);
+        //                    ^          7           8
+
+        let enhancer = builder.build();
+
+        assert_eq!(enhancer.replacement(0), 0..1); // great
+        assert_eq!(enhancer.replacement(1), 1..2); // awesome
+        assert_eq!(enhancer.replacement(2), 2..5); // NYC
+        assert_eq!(enhancer.replacement(3), 5..7); // subway
+        assert_eq!(enhancer.replacement(4), 2..3); // new
+        assert_eq!(enhancer.replacement(5), 3..4); // york
+        assert_eq!(enhancer.replacement(6), 4..5); // city
+        assert_eq!(enhancer.replacement(7), 5..6); // underground
+        assert_eq!(enhancer.replacement(8), 6..7); // train
+    }
+
+    #[test]
+    fn multiple_probable_growings() {
+        let query = ["great", "awesome", "NYC", "subway"];
+        //              0         1        2        3
+        let mut builder = QueryEnhancerBuilder::new(&query);
+
+        // NYC = new york city
+        builder.declare(2..3, 4, &["new", "york", "city"]);
+        //                    ^      4       5       6
+
+        // subway = underground train
+        builder.declare(3..4, 7, &["underground", "train"]);
+        //                    ^          7           8
+
+        // great awesome = good
+        builder.declare(0..2, 9, &["good"]);
+        //                    ^       9
+
+        // awesome NYC = NY
+        builder.declare(1..3, 10, &["NY"]);
+        //                    ^^     10
+
+        // NYC subway = metro
+        builder.declare(2..4, 11, &["metro"]);
+        //                    ^^      11
+
+        let enhancer = builder.build();
+
+        assert_eq!(enhancer.replacement(0),  0..1); // great
+        assert_eq!(enhancer.replacement(1),  1..2); // awesome
+        assert_eq!(enhancer.replacement(2),  2..5); // NYC
+        assert_eq!(enhancer.replacement(3),  5..7); // subway
+        assert_eq!(enhancer.replacement(4),  2..3); // new
+        assert_eq!(enhancer.replacement(5),  3..4); // york
+        assert_eq!(enhancer.replacement(6),  4..5); // city
+        assert_eq!(enhancer.replacement(7),  5..6); // underground
+        assert_eq!(enhancer.replacement(8),  6..7); // train
+        assert_eq!(enhancer.replacement(9),  0..2); // good
+        assert_eq!(enhancer.replacement(10), 1..5); // NY
+        assert_eq!(enhancer.replacement(11), 2..5); // metro
+    }
+}
--- a/meilidb-core/src/raw_document.rs
+++ b/meilidb-core/src/raw_document.rs
@ -0,0 +1,141 @@
+use std::sync::Arc;
+use std::fmt;
+use sdset::SetBuf;
+use slice_group_by::GroupBy;
+use crate::{TmpMatch, DocumentId, Highlight};
+
+#[derive(Clone)]
+pub struct RawDocument {
+    pub id: DocumentId,
+    pub matches: SharedMatches,
+    pub highlights: Vec<Highlight>,
+}
+
+impl RawDocument {
+    fn new(id: DocumentId, matches: SharedMatches, highlights: Vec<Highlight>) -> RawDocument {
+        RawDocument { id, matches, highlights }
+    }
+
+    pub fn query_index(&self) -> &[u32] {
+        let r = self.matches.range;
+        // it is safe because construction/modifications
+        // can only be done in this module
+        unsafe { &self.matches.matches.query_index.get_unchecked(r.start..r.end) }
+    }
+
+    pub fn distance(&self) -> &[u8] {
+        let r = self.matches.range;
+        // it is safe because construction/modifications
+        // can only be done in this module
+        unsafe { &self.matches.matches.distance.get_unchecked(r.start..r.end) }
+    }
+
+    pub fn attribute(&self) -> &[u16] {
+        let r = self.matches.range;
+        // it is safe because construction/modifications
+        // can only be done in this module
+        unsafe { &self.matches.matches.attribute.get_unchecked(r.start..r.end) }
+    }
+
+    pub fn word_index(&self) -> &[u16] {
+        let r = self.matches.range;
+        // it is safe because construction/modifications
+        // can only be done in this module
+        unsafe { &self.matches.matches.word_index.get_unchecked(r.start..r.end) }
+    }
+
+    pub fn is_exact(&self) -> &[bool] {
+        let r = self.matches.range;
+        // it is safe because construction/modifications
+        // can only be done in this module
+        unsafe { &self.matches.matches.is_exact.get_unchecked(r.start..r.end) }
+    }
+}
+
+impl fmt::Debug for RawDocument {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        f.write_str("RawDocument {\r\n")?;
+        f.write_fmt(format_args!("{:>15}: {:?},\r\n",    "id",          self.id))?;
+        f.write_fmt(format_args!("{:>15}: {:^5?},\r\n",  "query_index", self.query_index()))?;
+        f.write_fmt(format_args!("{:>15}: {:^5?},\r\n",  "distance",    self.distance()))?;
+        f.write_fmt(format_args!("{:>15}: {:^5?},\r\n",  "attribute",   self.attribute()))?;
+        f.write_fmt(format_args!("{:>15}: {:^5?},\r\n",  "word_index",  self.word_index()))?;
+        f.write_fmt(format_args!("{:>15}: {:^5?},\r\n", "is_exact",    self.is_exact()))?;
+        f.write_str("}")?;
+        Ok(())
+    }
+}
+
+pub fn raw_documents_from(
+    matches: SetBuf<(DocumentId, TmpMatch)>,
+    highlights: SetBuf<(DocumentId, Highlight)>,
+) -> Vec<RawDocument>
+{
+    let mut docs_ranges: Vec<(_, Range, _)> = Vec::new();
+    let mut matches2 = Matches::with_capacity(matches.len());
+
+    let matches = matches.linear_group_by_key(|(id, _)| *id);
+    let highlights = highlights.linear_group_by_key(|(id, _)| *id);
+
+    for (mgroup, hgroup) in matches.zip(highlights) {
+        debug_assert_eq!(mgroup[0].0, hgroup[0].0);
+
+        let document_id = mgroup[0].0;
+        let start = docs_ranges.last().map(|(_, r, _)| r.end).unwrap_or(0);
+        let end = start + mgroup.len();
+
+        let highlights = hgroup.iter().map(|(_, h)| *h).collect();
+        docs_ranges.push((document_id, Range { start, end }, highlights));
+
+        matches2.extend_from_slice(mgroup);
+    }
+
+    let matches = Arc::new(matches2);
+    docs_ranges.into_iter().map(|(id, range, highlights)| {
+        let matches = SharedMatches { range, matches: matches.clone() };
+        RawDocument::new(id, matches, highlights)
+    }).collect()
+}
+
+#[derive(Debug, Copy, Clone)]
+struct Range {
+    start: usize,
+    end: usize,
+}
+
+#[derive(Clone)]
+pub struct SharedMatches {
+    range: Range,
+    matches: Arc<Matches>,
+}
+
+#[derive(Clone)]
+struct Matches {
+    query_index: Vec<u32>,
+    distance: Vec<u8>,
+    attribute: Vec<u16>,
+    word_index: Vec<u16>,
+    is_exact: Vec<bool>,
+}
+
+impl Matches {
+    fn with_capacity(cap: usize) -> Matches {
+        Matches {
+            query_index: Vec::with_capacity(cap),
+            distance: Vec::with_capacity(cap),
+            attribute: Vec::with_capacity(cap),
+            word_index: Vec::with_capacity(cap),
+            is_exact: Vec::with_capacity(cap),
+        }
+    }
+
+    fn extend_from_slice(&mut self, matches: &[(DocumentId, TmpMatch)]) {
+        for (_, match_) in matches {
+            self.query_index.push(match_.query_index);
+            self.distance.push(match_.distance);
+            self.attribute.push(match_.attribute);
+            self.word_index.push(match_.word_index);
+            self.is_exact.push(match_.is_exact);
+        }
+    }
+}
--- a/meilidb-core/src/reordered_attrs.rs
+++ b/meilidb-core/src/reordered_attrs.rs
@ -0,0 +1,24 @@
+#[derive(Default, Clone)]
+pub struct ReorderedAttrs {
+    count: usize,
+    reorders: Vec<Option<u16>>,
+}
+
+impl ReorderedAttrs {
+    pub fn new() -> ReorderedAttrs {
+        ReorderedAttrs { count: 0, reorders: Vec::new() }
+    }
+
+    pub fn insert_attribute(&mut self, attribute: u16) {
+        self.reorders.resize(attribute as usize + 1, None);
+        self.reorders[attribute as usize] = Some(self.count as u16);
+        self.count += 1;
+    }
+
+    pub fn get(&self, attribute: u16) -> Option<u16> {
+        match self.reorders.get(attribute as usize) {
+            Some(Some(attribute)) => Some(*attribute),
+            _ => None,
+        }
+    }
+}
--- a/meilidb-core/src/store.rs
+++ b/meilidb-core/src/store.rs
@ -0,0 +1,34 @@
+use std::error::Error;
+use fst::Set;
+use sdset::SetBuf;
+use crate::DocIndex;
+
+pub trait Store {
+    type Error: Error;
+
+    fn words(&self) -> Result<&Set, Self::Error>;
+    fn word_indexes(&self, word: &[u8]) -> Result<Option<SetBuf<DocIndex>>, Self::Error>;
+
+    fn synonyms(&self) -> Result<&Set, Self::Error>;
+    fn alternatives_to(&self, word: &[u8]) -> Result<Option<Set>, Self::Error>;
+}
+
+impl<T> Store for &'_ T where T: Store {
+    type Error = T::Error;
+
+    fn words(&self) -> Result<&Set, Self::Error> {
+        (*self).words()
+    }
+
+    fn word_indexes(&self, word: &[u8]) -> Result<Option<SetBuf<DocIndex>>, Self::Error> {
+        (*self).word_indexes(word)
+    }
+
+    fn synonyms(&self) -> Result<&Set, Self::Error> {
+        (*self).synonyms()
+    }
+
+    fn alternatives_to(&self, word: &[u8]) -> Result<Option<Set>, Self::Error> {
+        (*self).alternatives_to(word)
+    }
+}
--- a/meilidb-data/Cargo.toml
+++ b/meilidb-data/Cargo.toml
@ -0,0 +1,41 @@
+[package]
+name = "meilidb-data"
+version = "0.1.0"
+authors = ["Kerollmops <renault.cle@gmail.com>"]
+edition = "2018"
+
+[dependencies]
+arc-swap = "0.4.2"
+bincode = "1.1.4"
+crossbeam-channel = "0.3.9"
+deunicode = "1.0.0"
+hashbrown = { version = "0.6.0", features = ["serde"] }
+log = "0.4.6"
+meilidb-core = { path = "../meilidb-core", version = "0.1.0" }
+meilidb-schema = { path = "../meilidb-schema", version = "0.1.0" }
+meilidb-tokenizer = { path = "../meilidb-tokenizer", version = "0.1.0" }
+ordered-float = { version = "1.0.2", features = ["serde"] }
+rocksdb = "0.12.3"
+sdset = "0.3.2"
+serde = { version = "1.0.99", features = ["derive"] }
+serde_json = "1.0.40"
+siphasher = "0.3.0"
+zerocopy = "0.2.8"
+
+[dependencies.rmp-serde]
+git = "https://github.com/3Hren/msgpack-rust.git"
+rev = "40b3d48"
+
+[dependencies.rmpv]
+git = "https://github.com/3Hren/msgpack-rust.git"
+rev = "40b3d48"
+features = ["with-serde"]
+
+[dependencies.fst]
+git = "https://github.com/Kerollmops/fst.git"
+branch = "arc-byte-slice"
+
+[dev-dependencies]
+tempfile = "3.1.0"
+maplit = "1.0.2"
+big_s = "1.0.2"
--- a/meilidb-data/src/cf_tree.rs
+++ b/meilidb-data/src/cf_tree.rs
@ -0,0 +1,126 @@
+use std::sync::Arc;
+use crossbeam_channel::{unbounded, Sender, Receiver};
+use rocksdb::{DBVector, IteratorMode, Direction};
+use crate::RocksDbResult;
+
+#[derive(Clone)]
+pub struct CfTree {
+    index: Arc<CfTreeInner>,
+    sender: Option<Sender<()>>,
+}
+
+struct CfTreeInner {
+    db: Arc<rocksdb::DB>,
+    name: String,
+}
+
+impl CfTree {
+    pub fn create(db: Arc<rocksdb::DB>, name: String) -> RocksDbResult<CfTree> {
+        let mut options = rocksdb::Options::default();
+        options.create_missing_column_families(true); // this doesn't work
+
+        if db.cf_handle(&name).is_none() {
+            let _cf = db.create_cf(&name, &options)?;
+        }
+
+        let index = Arc::new(CfTreeInner { db, name });
+
+        Ok(CfTree { index, sender: None })
+    }
+
+    pub fn create_with_subcription(
+        db: Arc<rocksdb::DB>,
+        name: String,
+    ) -> RocksDbResult<(CfTree, Receiver<()>)>
+    {
+        let mut options = rocksdb::Options::default();
+        options.create_missing_column_families(true); // this doesn't work
+
+        if db.cf_handle(&name).is_none() {
+            let _cf = db.create_cf(&name, &options)?;
+        }
+
+        let index = Arc::new(CfTreeInner { db, name });
+        let (sender, receiver) = unbounded();
+
+        Ok((CfTree { index, sender: Some(sender) }, receiver))
+    }
+
+    pub fn insert<K, V>(&self, key: K, value: V) -> RocksDbResult<()>
+    where K: AsRef<[u8]>,
+          V: AsRef<[u8]>,
+    {
+        let cf = self.index.db.cf_handle(&self.index.name).unwrap();
+        let result = self.index.db.put_cf(cf, key, value);
+
+        if let Some(sender) = &self.sender {
+            let _err = sender.send(());
+        }
+
+        result
+    }
+
+    pub fn get<K>(&self, key: K) -> RocksDbResult<Option<DBVector>>
+    where K: AsRef<[u8]>,
+    {
+        let cf = self.index.db.cf_handle(&self.index.name).unwrap();
+        self.index.db.get_cf(cf, key)
+    }
+
+    pub fn remove<K>(&self, key: K) -> RocksDbResult<()>
+    where K: AsRef<[u8]>
+    {
+        let cf = self.index.db.cf_handle(&self.index.name).unwrap();
+        self.index.db.delete_cf(cf, key)
+    }
+
+    /// Start and end key range is inclusive on both bounds.
+    pub fn range<KS, KE>(&self, start: KS, end: KE) -> RocksDbResult<CfIter>
+    where KS: AsRef<[u8]>,
+          KE: AsRef<[u8]>,
+    {
+        let cf = self.index.db.cf_handle(&self.index.name).unwrap();
+
+        let mut iter = self.index.db.iterator_cf(cf, IteratorMode::Start)?;
+        iter.set_mode(IteratorMode::From(start.as_ref(), Direction::Forward));
+
+        let end_bound = Box::from(end.as_ref());
+        Ok(CfIter { iter, end_bound: Some(end_bound) })
+    }
+
+    pub fn iter(&self) -> RocksDbResult<CfIter> {
+        let cf = self.index.db.cf_handle(&self.index.name).unwrap();
+        let iter = self.index.db.iterator_cf(cf, IteratorMode::Start)?;
+        Ok(CfIter { iter, end_bound: None })
+    }
+
+    pub fn last_key(&self) -> RocksDbResult<Option<Box<[u8]>>> {
+        let cf = self.index.db.cf_handle(&self.index.name).unwrap();
+        let mut iter = self.index.db.iterator_cf(cf, IteratorMode::End)?;
+        Ok(iter.next().map(|(key, _)| key))
+    }
+
+    pub fn prefix_iterator<P>(&self, prefix: P) -> RocksDbResult<rocksdb::DBIterator>
+    where P: AsRef<[u8]>,
+    {
+        let cf = self.index.db.cf_handle(&self.index.name).unwrap();
+        self.index.db.prefix_iterator_cf(cf, prefix)
+    }
+}
+
+pub struct CfIter<'a> {
+    iter: rocksdb::DBIterator<'a>,
+    end_bound: Option<Box<[u8]>>,
+}
+
+impl Iterator for CfIter<'_> {
+    type Item = (Box<[u8]>, Box<[u8]>);
+
+    fn next(&mut self) -> Option<Self::Item> {
+        match (self.iter.next(), &self.end_bound) {
+            (Some((ref key, _)), Some(end_bound)) if key > end_bound => None,
+            (Some(entry), _) => Some(entry),
+            (None, _) => None,
+        }
+    }
+}
--- a/meilidb-data/src/database/error.rs
+++ b/meilidb-data/src/database/error.rs
@ -0,0 +1,73 @@
+use std::{error, fmt};
+use crate::serde::SerializerError;
+
+#[derive(Debug)]
+pub enum Error {
+    SchemaDiffer,
+    SchemaMissing,
+    WordIndexMissing,
+    MissingDocumentId,
+    RocksDbError(rocksdb::Error),
+    FstError(fst::Error),
+    RmpDecodeError(rmp_serde::decode::Error),
+    RmpEncodeError(rmp_serde::encode::Error),
+    BincodeError(bincode::Error),
+    SerializerError(SerializerError),
+}
+
+impl From<rocksdb::Error> for Error {
+    fn from(error: rocksdb::Error) -> Error {
+        Error::RocksDbError(error)
+    }
+}
+
+impl From<fst::Error> for Error {
+    fn from(error: fst::Error) -> Error {
+        Error::FstError(error)
+    }
+}
+
+impl From<rmp_serde::decode::Error> for Error {
+    fn from(error: rmp_serde::decode::Error) -> Error {
+        Error::RmpDecodeError(error)
+    }
+}
+
+impl From<rmp_serde::encode::Error> for Error {
+    fn from(error: rmp_serde::encode::Error) -> Error {
+        Error::RmpEncodeError(error)
+    }
+}
+
+impl From<bincode::Error> for Error {
+    fn from(error: bincode::Error) -> Error {
+        Error::BincodeError(error)
+    }
+}
+
+impl From<SerializerError> for Error {
+    fn from(error: SerializerError) -> Error {
+        Error::SerializerError(error)
+    }
+}
+
+impl fmt::Display for Error {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        use self::Error::*;
+        match self {
+            SchemaDiffer => write!(f, "schemas differ"),
+            SchemaMissing => write!(f, "this index does not have a schema"),
+            WordIndexMissing => write!(f, "this index does not have a word index"),
+            MissingDocumentId => write!(f, "document id is missing"),
+            RocksDbError(e) => write!(f, "RocksDB error; {}", e),
+            FstError(e) => write!(f, "fst error; {}", e),
+            RmpDecodeError(e) => write!(f, "rmp decode error; {}", e),
+            RmpEncodeError(e) => write!(f, "rmp encode error; {}", e),
+            BincodeError(e) => write!(f, "bincode error; {}", e),
+            SerializerError(e) => write!(f, "serializer error; {}", e),
+        }
+    }
+}
+
+impl error::Error for Error { }
+
--- a/meilidb-data/src/database/index/common_index.rs
+++ b/meilidb-data/src/database/index/common_index.rs
@ -0,0 +1,77 @@
+use std::ops::Deref;
+use serde::de::DeserializeOwned;
+use serde::Serialize;
+use super::Error;
+use std::marker::PhantomData;
+
+#[derive(Clone)]
+pub struct CommonIndex(pub crate::CfTree);
+
+impl Deref for CommonIndex {
+    type Target = crate::CfTree;
+
+    fn deref(&self) -> &Self::Target {
+        &self.0
+    }
+}
+
+impl CommonIndex {
+    pub fn get<T, K>(&self, key: K) -> Result<Option<T>, Error>
+    where T: DeserializeOwned,
+          K: AsRef<[u8]>,
+    {
+        let raw = match self.0.get(key)? {
+            Some(raw) => raw,
+            None => return Ok(None),
+        };
+        let data = bincode::deserialize(&raw)?;
+        Ok(Some(data))
+    }
+
+    pub fn set<T, K>(&self, key: K, data: &T) -> Result<(), Error>
+    where T: Serialize,
+          K: AsRef<[u8]>,
+    {
+        let raw = bincode::serialize(data)?;
+        self.0.insert(key, &raw)?;
+        Ok(())
+    }
+
+    pub fn prefix_iterator<T, P>(&self, prefix: P) -> Result<SerializedIterator<T>, Error>
+    where T: DeserializeOwned,
+          P: AsRef<[u8]>,
+    {
+        let iter = self.0.prefix_iterator(prefix)?;
+        Ok(SerializedIterator { iter, _marker: PhantomData })
+    }
+}
+
+pub struct SerializedIterator<'a, T> {
+    iter: rocksdb::DBIterator<'a>,
+    _marker: PhantomData<T>,
+}
+
+impl<T> Iterator for SerializedIterator<'_, T>
+where T: DeserializeOwned,
+{
+    type Item = (String, T);
+
+    fn next(&mut self) -> Option<Self::Item> {
+        let (raw_key, raw_value) = match self.iter.next() {
+            Some((key, value)) => (key, value),
+            None => return None,
+        };
+
+        let value: T = match bincode::deserialize(&raw_value) {
+            Ok(data) => data,
+            Err(_) => return None,
+        };
+
+        let key = match std::str::from_utf8(&raw_key) {
+            Ok(key) => key.to_string(),
+            Err(_) => return None,
+        };
+
+        Some((key, value))
+    }
+}
--- a/meilidb-data/src/database/index/custom_settings_index.rs
+++ b/meilidb-data/src/database/index/custom_settings_index.rs
@ -0,0 +1,89 @@
+use serde::de::DeserializeOwned;
+use serde::{Serialize, Deserialize};
+use std::collections::{HashMap, HashSet};
+use std::ops::Deref;
+use super::Error;
+
+#[derive(Debug, Copy, Clone, PartialEq, Eq, Serialize, Deserialize)]
+#[serde(rename_all = "lowercase")]
+pub enum RankingOrdering {
+    Asc,
+    Dsc
+}
+
+pub type StopWords = HashSet<String>;
+pub type RankingOrder = Vec<String>;
+pub type DistinctField = String;
+pub type RankingRules = HashMap<String, RankingOrdering>;
+
+const STOP_WORDS_KEY:       &str = "stop-words";
+const RANKING_ORDER_KEY:    &str = "ranking-order";
+const DISTINCT_FIELD_KEY:   &str = "distinct-field";
+const RANKING_RULES_KEY:    &str = "ranking-rules";
+
+#[derive(Clone)]
+pub struct CustomSettingsIndex(pub(crate) crate::CfTree);
+
+impl Deref for CustomSettingsIndex {
+    type Target = crate::CfTree;
+
+    fn deref(&self) -> &Self::Target {
+        &self.0
+    }
+}
+
+impl CustomSettingsIndex {
+    fn get<K, T>(&self, key: K) -> Result<Option<T>, Error>
+    where K: AsRef<[u8]>,
+          T: DeserializeOwned,
+    {
+        let setting = self.0.get(key)?;
+        let raw = match setting {
+            Some(raw) => raw,
+            None => return Ok(None)
+        };
+
+        Ok(Some(bincode::deserialize(&raw)?))
+    }
+
+    fn set<K, T>(&self, key: K, data: &T) -> Result<(), Error>
+    where K: AsRef<[u8]>,
+          T: Serialize,
+    {
+        let raw = bincode::serialize(data)?;
+        self.0.insert(key, &raw)?;
+        Ok(())
+    }
+
+    pub fn get_stop_words(&self) -> Result<Option<StopWords>, Error> {
+        self.get(STOP_WORDS_KEY)
+    }
+
+    pub fn get_ranking_order(&self) -> Result<Option<RankingOrder>, Error> {
+        self.get(RANKING_ORDER_KEY)
+    }
+
+    pub fn get_distinct_field(&self) -> Result<Option<DistinctField>, Error> {
+        self.get(DISTINCT_FIELD_KEY)
+    }
+
+    pub fn get_ranking_rules(&self) -> Result<Option<RankingRules>, Error> {
+        self.get(RANKING_RULES_KEY)
+    }
+
+    pub fn set_stop_words(&self, value: &StopWords) -> Result<(), Error> {
+        self.set(STOP_WORDS_KEY, value)
+    }
+
+    pub fn set_ranking_order(&self, value: &RankingOrder) -> Result<(), Error> {
+        self.set(RANKING_ORDER_KEY, value)
+    }
+
+    pub fn set_distinct_field(&self, value: &DistinctField) -> Result<(), Error> {
+        self.set(DISTINCT_FIELD_KEY, value)
+    }
+
+    pub fn set_ranking_rules(&self, value: &RankingRules) -> Result<(), Error> {
+        self.set(RANKING_RULES_KEY, value)
+    }
+}
--- a/meilidb-data/src/database/index/docs_words_index.rs
+++ b/meilidb-data/src/database/index/docs_words_index.rs
@ -0,0 +1,33 @@
+use std::sync::Arc;
+use meilidb_core::DocumentId;
+use crate::database::Error;
+
+#[derive(Clone)]
+pub struct DocsWordsIndex(pub crate::CfTree);
+
+impl DocsWordsIndex {
+    pub fn doc_words(&self, id: DocumentId) -> Result<Option<fst::Set>, Error> {
+        let key = id.0.to_be_bytes();
+        match self.0.get(key)? {
+            Some(bytes) => {
+                let len = bytes.len();
+                let value = Arc::from(bytes.as_ref());
+                let fst = fst::raw::Fst::from_shared_bytes(value, 0, len)?;
+                Ok(Some(fst::Set::from(fst)))
+            },
+            None => Ok(None)
+        }
+    }
+
+    pub fn set_doc_words(&self, id: DocumentId, words: &fst::Set) -> Result<(), Error> {
+        let key = id.0.to_be_bytes();
+        self.0.insert(key, words.as_fst().as_bytes())?;
+        Ok(())
+    }
+
+    pub fn del_doc_words(&self, id: DocumentId) -> Result<(), Error> {
+        let key = id.0.to_be_bytes();
+        self.0.remove(key)?;
+        Ok(())
+    }
+}
--- a/meilidb-data/src/database/index/documents_index.rs
+++ b/meilidb-data/src/database/index/documents_index.rs
@ -0,0 +1,146 @@
+use std::convert::TryInto;
+use std::collections::HashMap;
+
+use meilidb_core::DocumentId;
+use meilidb_schema::{Schema, SchemaAttr};
+use rocksdb::DBVector;
+
+use crate::document_attr_key::DocumentAttrKey;
+use crate::RocksDbResult;
+
+fn document_fields_range(id: DocumentId) -> ([u8; 10], [u8; 10]) {
+    let start = DocumentAttrKey::new(id, SchemaAttr::min()).to_be_bytes();
+    let end   = DocumentAttrKey::new(id, SchemaAttr::max()).to_be_bytes();
+
+    (start, end)
+}
+
+#[derive(Clone)]
+pub struct DocumentsIndex(pub(crate) crate::CfTree);
+
+impl DocumentsIndex {
+    pub fn document_field(&self, id: DocumentId, attr: SchemaAttr) -> RocksDbResult<Option<DBVector>> {
+        let key = DocumentAttrKey::new(id, attr).to_be_bytes();
+        self.0.get(key)
+    }
+
+    pub fn set_document_field(&self, id: DocumentId, attr: SchemaAttr, value: Vec<u8>) -> RocksDbResult<()> {
+        let key = DocumentAttrKey::new(id, attr).to_be_bytes();
+        self.0.insert(key, value)?;
+        Ok(())
+    }
+
+    pub fn del_document_field(&self, id: DocumentId, attr: SchemaAttr) -> RocksDbResult<()> {
+        let key = DocumentAttrKey::new(id, attr).to_be_bytes();
+        self.0.remove(key)?;
+        Ok(())
+    }
+
+    pub fn del_all_document_fields(&self, id: DocumentId) -> RocksDbResult<usize> {
+        let (start, end) = document_fields_range(id);
+
+        let mut count = 0;
+        for (key, _) in self.0.range(start, end)? {
+            self.0.remove(key)?;
+            count += 1;
+        }
+
+        Ok(count)
+    }
+
+    pub fn document_fields(&self, id: DocumentId) -> RocksDbResult<DocumentFieldsIter> {
+        let (start, end) = document_fields_range(id);
+
+        let iter = self.0.range(start, end)?;
+        Ok(DocumentFieldsIter(iter))
+    }
+
+    pub fn documents_ids(&self) -> RocksDbResult<DocumentsIdsIter> {
+        let iter = DocumentsKeysIter(self.0.iter()?);
+        Ok(DocumentsIdsIter { inner: iter, last: None })
+    }
+
+    pub fn documents_fields_repartition(&self, schema: Schema) -> RocksDbResult<HashMap<String, u64>> {
+        let iter = self.0.iter()?;
+        let mut repartition_attributes_id = HashMap::new();
+        for key in DocumentsKeysIter(iter) {
+            let counter = repartition_attributes_id.entry(key.attribute).or_insert(0);
+            *counter += 1u64;
+        }
+        let mut repartition_with_attribute_name = HashMap::new();
+        for (key, val) in repartition_attributes_id {
+            repartition_with_attribute_name.insert(schema.attribute_name(key).to_owned(), val);
+        }
+        Ok(repartition_with_attribute_name)
+    }
+
+    pub fn len(&self) -> RocksDbResult<u64> {
+        let mut last_document_id = None;
+        let mut count = 0;
+
+        for (key, _) in self.0.iter()? {
+            let array = key.as_ref().try_into().unwrap();
+            let document_id = DocumentAttrKey::from_be_bytes(array).document_id;
+
+            if Some(document_id) != last_document_id {
+                last_document_id = Some(document_id);
+                count += 1;
+            }
+        }
+
+        Ok(count)
+    }
+}
+
+pub struct DocumentFieldsIter<'a>(crate::CfIter<'a>);
+
+impl Iterator for DocumentFieldsIter<'_> {
+    type Item = (SchemaAttr, Box<[u8]>);
+
+    fn next(&mut self) -> Option<Self::Item> {
+        match self.0.next() {
+            Some((key, value)) => {
+                let array = key.as_ref().try_into().unwrap();
+                let key = DocumentAttrKey::from_be_bytes(array);
+                Some((key.attribute, value))
+            },
+            None => None,
+        }
+    }
+}
+
+pub struct DocumentsKeysIter<'a>(crate::CfIter<'a>);
+
+impl Iterator for DocumentsKeysIter<'_> {
+    type Item = DocumentAttrKey;
+
+    fn next(&mut self) -> Option<Self::Item> {
+        match self.0.next() {
+            Some((key, _)) => {
+                let array = key.as_ref().try_into().unwrap();
+                let key = DocumentAttrKey::from_be_bytes(array);
+                Some(key)
+            },
+            None => None,
+        }
+    }
+}
+
+pub struct DocumentsIdsIter<'a> {
+    inner: DocumentsKeysIter<'a>,
+    last: Option<DocumentId>,
+}
+
+impl Iterator for DocumentsIdsIter<'_> {
+    type Item = DocumentId;
+
+    fn next(&mut self) -> Option<Self::Item> {
+        for DocumentAttrKey { document_id, .. } in &mut self.inner {
+            if self.last != Some(document_id) {
+                self.last = Some(document_id);
+                return Some(document_id)
+            }
+        }
+        None
+    }
+}
--- a/meilidb-data/src/database/index/main_index.rs
+++ b/meilidb-data/src/database/index/main_index.rs
@ -0,0 +1,101 @@
+use std::sync::Arc;
+use std::convert::TryInto;
+
+use meilidb_schema::Schema;
+
+use crate::ranked_map::RankedMap;
+use crate::database::Error;
+
+const SCHEMA_KEY:              &str = "schema";
+const WORDS_KEY:               &str = "words";
+const SYNONYMS_KEY:            &str = "synonyms";
+const RANKED_MAP_KEY:          &str = "ranked-map";
+const NUMBER_OF_DOCUMENTS_KEY: &str = "number-of-documents";
+
+#[derive(Clone)]
+pub struct MainIndex(pub(crate) crate::CfTree);
+
+impl MainIndex {
+    pub fn schema(&self) -> Result<Option<Schema>, Error> {
+        match self.0.get(SCHEMA_KEY)? {
+            Some(bytes) => {
+                let schema = bincode::deserialize_from(bytes.as_ref())?;
+                Ok(Some(schema))
+            },
+            None => Ok(None),
+        }
+    }
+
+    pub fn set_schema(&self, schema: &Schema) -> Result<(), Error> {
+        let bytes = bincode::serialize(schema)?;
+        self.0.insert(SCHEMA_KEY, bytes)?;
+        Ok(())
+    }
+
+    pub fn words_set(&self) -> Result<Option<fst::Set>, Error> {
+        match self.0.get(WORDS_KEY)? {
+            Some(bytes) => {
+                let len = bytes.len();
+                let value = Arc::from(bytes.as_ref());
+                let fst = fst::raw::Fst::from_shared_bytes(value, 0, len)?;
+                Ok(Some(fst::Set::from(fst)))
+            },
+            None => Ok(None),
+        }
+    }
+
+    pub fn set_words_set(&self, value: &fst::Set) -> Result<(), Error> {
+        self.0.insert(WORDS_KEY, value.as_fst().as_bytes()).map(drop).map_err(Into::into)
+    }
+
+    pub fn synonyms_set(&self) -> Result<Option<fst::Set>, Error> {
+        match self.0.get(SYNONYMS_KEY)? {
+            Some(bytes) => {
+                let len = bytes.len();
+                let value = Arc::from(bytes.as_ref());
+                let fst = fst::raw::Fst::from_shared_bytes(value, 0, len)?;
+                Ok(Some(fst::Set::from(fst)))
+            },
+            None => Ok(None),
+        }
+    }
+
+    pub fn set_synonyms_set(&self, value: &fst::Set) -> Result<(), Error> {
+        self.0.insert(SYNONYMS_KEY, value.as_fst().as_bytes()).map(drop).map_err(Into::into)
+    }
+
+    pub fn ranked_map(&self) -> Result<Option<RankedMap>, Error> {
+        match self.0.get(RANKED_MAP_KEY)? {
+            Some(bytes) => {
+                let ranked_map = RankedMap::read_from_bin(bytes.as_ref())?;
+                Ok(Some(ranked_map))
+            },
+            None => Ok(None),
+        }
+    }
+
+    pub fn set_ranked_map(&self, value: &RankedMap) -> Result<(), Error> {
+        let mut bytes = Vec::new();
+        value.write_to_bin(&mut bytes)?;
+        self.0.insert(RANKED_MAP_KEY, bytes)?;
+        Ok(())
+    }
+
+    pub fn number_of_documents(&self) -> Result<u64, Error> {
+        match self.0.get(NUMBER_OF_DOCUMENTS_KEY)? {
+            Some(bytes) => {
+                let array = (*bytes).try_into().unwrap();
+                Ok(u64::from_be_bytes(array))
+            },
+            None => Ok(0),
+        }
+    }
+
+    pub fn set_number_of_documents<F>(&self, f: F) -> Result<u64, Error>
+    where F: FnOnce(u64) -> u64,
+    {
+        let new = self.number_of_documents().map(f)?;
+        self.0.insert(NUMBER_OF_DOCUMENTS_KEY, new.to_be_bytes())?;
+        Ok(new)
+    }
+}
--- a/meilidb-data/src/database/index/mod.rs
+++ b/meilidb-data/src/database/index/mod.rs
@ -0,0 +1,507 @@
+use std::collections::{HashMap, HashSet, BTreeMap};
+use std::convert::TryInto;
+use std::sync::Arc;
+use std::sync::atomic::{AtomicU64, Ordering};
+use std::thread;
+use std::time::{Duration, Instant};
+
+use arc_swap::{ArcSwap, ArcSwapOption, Guard};
+use crossbeam_channel::Receiver;
+use meilidb_core::criterion::Criteria;
+use meilidb_core::{DocIndex, Store, DocumentId, QueryBuilder};
+use meilidb_schema::Schema;
+use sdset::SetBuf;
+use serde::{de, Serialize, Deserialize};
+
+use crate::CfTree;
+use crate::ranked_map::RankedMap;
+use crate::serde::{Deserializer, DeserializerError};
+
+pub use self::custom_settings_index::{CustomSettingsIndex, RankingOrdering, StopWords, RankingOrder, DistinctField, RankingRules};
+pub use self::common_index::CommonIndex;
+pub use self::documents_index::DocumentsIdsIter;
+use self::docs_words_index::DocsWordsIndex;
+use self::documents_index::DocumentsIndex;
+use self::main_index::MainIndex;
+use self::synonyms_index::SynonymsIndex;
+use self::words_index::WordsIndex;
+
+use crate::RocksDbResult;
+use crate::database::{
+    Error,
+    DocumentsAddition, DocumentsDeletion,
+    SynonymsAddition, SynonymsDeletion,
+    apply_documents_addition, apply_documents_deletion,
+    apply_synonyms_addition, apply_synonyms_deletion,
+};
+
+mod common_index;
+mod custom_settings_index;
+mod docs_words_index;
+mod documents_index;
+mod main_index;
+mod synonyms_index;
+mod words_index;
+
+#[derive(Serialize, Deserialize)]
+enum Update {
+    DocumentsAddition(Vec<rmpv::Value>),
+    DocumentsDeletion(Vec<DocumentId>),
+    SynonymsAddition(BTreeMap<String, Vec<String>>),
+    SynonymsDeletion(BTreeMap<String, Option<Vec<String>>>),
+}
+
+#[derive(Clone, Serialize, Deserialize)]
+pub enum UpdateType {
+    DocumentsAddition { number: usize },
+    DocumentsDeletion { number: usize },
+    SynonymsAddition { number: usize },
+    SynonymsDeletion { number: usize },
+}
+
+#[derive(Clone, Serialize, Deserialize)]
+pub struct DetailedDuration {
+    pub main: Duration,
+}
+
+#[derive(Clone, Serialize, Deserialize)]
+pub struct UpdateResult {
+    pub update_id: u64,
+    pub update_type: UpdateType,
+    pub result: Result<(), String>,
+    pub detailed_duration: DetailedDuration,
+}
+
+#[derive(Clone, Serialize, Deserialize)]
+pub enum UpdateStatus {
+    Enqueued,
+    Processed(UpdateResult),
+    Unknown,
+}
+
+fn spawn_update_system(index: Index, subscription: Receiver<()>) -> thread::JoinHandle<()> {
+    thread::spawn(move || {
+        let mut subscription = subscription.into_iter();
+
+        loop {
+            while let Some((key, _)) = index.updates_index.iter().unwrap().next() {
+                let update_id = key.as_ref().try_into().map(u64::from_be_bytes).unwrap();
+
+                let updates = &index.updates_index;
+                let results = &index.updates_results_index;
+
+                let update = updates.get(&key).unwrap().unwrap();
+
+                let (update_type, result, duration) = match rmp_serde::from_read_ref(&update).unwrap() {
+                    Update::DocumentsAddition(documents) => {
+                        let update_type = UpdateType::DocumentsAddition { number: documents.len() };
+                        let ranked_map = index.cache.load().ranked_map.clone();
+                        let start = Instant::now();
+                        let result = apply_documents_addition(&index, ranked_map, documents);
+                        (update_type, result, start.elapsed())
+                    },
+                    Update::DocumentsDeletion(documents) => {
+                        let update_type = UpdateType::DocumentsDeletion { number: documents.len() };
+                        let ranked_map = index.cache.load().ranked_map.clone();
+                        let start = Instant::now();
+                        let result = apply_documents_deletion(&index, ranked_map, documents);
+                        (update_type, result, start.elapsed())
+                    },
+                    Update::SynonymsAddition(synonyms) => {
+                        let update_type = UpdateType::SynonymsAddition { number: synonyms.len() };
+                        let start = Instant::now();
+                        let result = apply_synonyms_addition(&index, synonyms);
+                        (update_type, result, start.elapsed())
+                    },
+                    Update::SynonymsDeletion(synonyms) => {
+                        let update_type = UpdateType::SynonymsDeletion { number: synonyms.len() };
+                        let start = Instant::now();
+                        let result = apply_synonyms_deletion(&index, synonyms);
+                        (update_type, result, start.elapsed())
+                    },
+                };
+
+                let detailed_duration = DetailedDuration { main: duration };
+                let status = UpdateResult {
+                    update_id,
+                    update_type,
+                    result: result.map_err(|e| e.to_string()),
+                    detailed_duration,
+                };
+
+                if let Some(callback) = &*index.update_callback.load() {
+                    (callback)(status.clone());
+                }
+
+                let value = bincode::serialize(&status).unwrap();
+                results.insert(&key, value).unwrap();
+                updates.remove(&key).unwrap();
+            }
+
+            // this subscription is just used to block
+            // the loop until a new update is inserted
+            subscription.next();
+        }
+    })
+}
+
+fn last_update_id(
+    update_index: &crate::CfTree,
+    update_results_index: &crate::CfTree,
+) -> RocksDbResult<u64>
+{
+    let uikey = match update_index.last_key()? {
+        Some(key) => Some(key.as_ref().try_into().map(u64::from_be_bytes).unwrap()),
+        None => None,
+    };
+
+    let urikey = match update_results_index.last_key()? {
+        Some(key) => Some(key.as_ref().try_into().map(u64::from_be_bytes).unwrap()),
+        None => None,
+    };
+
+    Ok(uikey.max(urikey).unwrap_or(0))
+}
+
+#[derive(Clone)]
+pub struct IndexStats {
+    pub number_of_words: usize,
+    pub number_of_documents: u64,
+    pub number_attrs_in_ranked_map: usize,
+    pub documents_fields_repartition: HashMap<String, u64>,
+}
+
+#[derive(Clone)]
+pub struct Index {
+    pub(crate) cache: Arc<ArcSwap<Cache>>,
+
+    // TODO this will be a snapshot in the future
+    main_index: MainIndex,
+    synonyms_index: SynonymsIndex,
+    words_index: WordsIndex,
+    docs_words_index: DocsWordsIndex,
+    documents_index: DocumentsIndex,
+    custom_settings_index: CustomSettingsIndex,
+
+    // used by the update system
+    updates_id: Arc<AtomicU64>,
+    updates_index: crate::CfTree,
+    updates_results_index: crate::CfTree,
+    update_callback: Arc<ArcSwapOption<Box<dyn Fn(UpdateResult) + Send + Sync + 'static>>>,
+}
+
+pub(crate) struct Cache {
+    pub words: Arc<fst::Set>,
+    pub synonyms: Arc<fst::Set>,
+    pub schema: Schema,
+    pub ranked_map: RankedMap,
+    pub number_of_documents: u64,
+}
+
+impl Index {
+    pub fn new(db: Arc<rocksdb::DB>, name: &str) -> Result<Index, Error> {
+        Index::new_raw(db, name, None)
+    }
+
+    pub fn with_schema(db: Arc<rocksdb::DB>, name: &str, schema: Schema) -> Result<Index, Error> {
+        Index::new_raw(db, name, Some(schema))
+    }
+
+    fn new_raw(db: Arc<rocksdb::DB>, name: &str, schema: Option<Schema>) -> Result<Index, Error> {
+        let main_index = CfTree::create(db.clone(), name.to_string()).map(MainIndex)?;
+        let synonyms_index = CfTree::create(db.clone(), format!("{}-synonyms", name)).map(SynonymsIndex)?;
+        let words_index = CfTree::create(db.clone(), format!("{}-words", name)).map(WordsIndex)?;
+        let docs_words_index = CfTree::create(db.clone(), format!("{}-docs-words", name)).map(DocsWordsIndex)?;
+        let documents_index = CfTree::create(db.clone(), format!("{}-documents", name)).map(DocumentsIndex)?;
+        let custom_settings_index = CfTree::create(db.clone(), format!("{}-custom", name)).map(CustomSettingsIndex)?;
+        let (updates_index, subscription) = CfTree::create_with_subcription(db.clone(), format!("{}-updates", name))?;
+        let updates_results_index = CfTree::create(db.clone(), format!("{}-updates-results", name))?;
+
+        let words = match main_index.words_set()? {
+            Some(words) => Arc::new(words),
+            None => Arc::new(fst::Set::default()),
+        };
+
+        let synonyms = match main_index.synonyms_set()? {
+            Some(synonyms) => Arc::new(synonyms),
+            None => Arc::new(fst::Set::default()),
+        };
+
+        let schema = match (schema, main_index.schema()?) {
+            (Some(ref expected), Some(ref current)) if current != expected => {
+                return Err(Error::SchemaDiffer)
+            },
+            (Some(expected), Some(_)) => expected,
+            (Some(expected), None) => {
+                main_index.set_schema(&expected)?;
+                expected
+            },
+            (None, Some(current)) => current,
+            (None, None) => return Err(Error::SchemaMissing),
+        };
+
+        let ranked_map = match main_index.ranked_map()? {
+            Some(map) => map,
+            None => RankedMap::default(),
+        };
+
+        let number_of_documents = documents_index.len()?;
+
+        let cache = Cache { words, synonyms, schema, ranked_map, number_of_documents };
+        let cache = Arc::new(ArcSwap::from_pointee(cache));
+
+        let last_update_id = last_update_id(&updates_index, &updates_results_index)?;
+        let updates_id = Arc::new(AtomicU64::new(last_update_id + 1));
+
+        let index = Index {
+            cache,
+            main_index,
+            synonyms_index,
+            words_index,
+            docs_words_index,
+            documents_index,
+            custom_settings_index,
+            updates_id,
+            updates_index,
+            updates_results_index,
+            update_callback: Arc::new(ArcSwapOption::empty()),
+        };
+
+        let _handle = spawn_update_system(index.clone(), subscription);
+
+        Ok(index)
+    }
+
+    pub fn set_update_callback<F>(&self, callback: F)
+    where F: Fn(UpdateResult) + Send + Sync + 'static
+    {
+        self.update_callback.store(Some(Arc::new(Box::new(callback))));
+    }
+
+    pub fn unset_update_callback(&self) {
+        self.update_callback.store(None);
+    }
+
+    pub fn stats(&self) -> RocksDbResult<IndexStats> {
+        let cache = self.cache.load();
+        let documents_fields_repartition = self.documents_index.documents_fields_repartition(cache.schema.clone())?;
+        Ok(IndexStats {
+            number_of_words: cache.words.len(),
+            number_of_documents: cache.number_of_documents,
+            number_attrs_in_ranked_map: cache.ranked_map.len(),
+            documents_fields_repartition,
+        })
+    }
+
+    pub fn query_builder(&self) -> QueryBuilder<RefIndex> {
+        let ref_index = self.as_ref();
+        QueryBuilder::new(ref_index)
+    }
+
+    pub fn query_builder_with_criteria<'c>(
+        &self,
+        criteria: Criteria<'c>,
+    ) -> QueryBuilder<'c, RefIndex>
+    {
+        let ref_index = self.as_ref();
+        QueryBuilder::with_criteria(ref_index, criteria)
+    }
+
+    pub fn as_ref(&self) -> RefIndex {
+        RefIndex {
+            cache: self.cache.load(),
+            main_index: &self.main_index,
+            synonyms_index: &self.synonyms_index,
+            words_index: &self.words_index,
+            docs_words_index: &self.docs_words_index,
+            documents_index: &self.documents_index,
+            custom_settings_index: &self.custom_settings_index,
+        }
+    }
+
+    pub fn schema(&self) -> Schema {
+        self.cache.load().schema.clone()
+    }
+
+    pub fn ranked_map(&self) -> RankedMap {
+        self.cache.load().ranked_map.clone()
+    }
+
+    pub fn synonyms_index(&self) -> SynonymsIndex {
+        self.synonyms_index.clone()
+    }
+
+    pub fn synonyms_set(&self) -> Arc<fst::Set> {
+        self.cache.load().synonyms.clone()
+    }
+
+    pub fn custom_settings(&self) -> CustomSettingsIndex {
+        self.custom_settings_index.clone()
+    }
+
+    pub fn number_of_documents(&self) -> u64 {
+        self.cache.load().number_of_documents
+    }
+
+    pub fn documents_addition<D>(&self) -> DocumentsAddition<D> {
+        DocumentsAddition::new(self)
+    }
+
+    pub fn documents_deletion(&self) -> DocumentsDeletion {
+        DocumentsDeletion::new(self)
+    }
+
+    pub fn synonyms_addition(&self) -> SynonymsAddition {
+        SynonymsAddition::new(self)
+    }
+
+    pub fn synonyms_deletion(&self) -> SynonymsDeletion {
+        SynonymsDeletion::new(self)
+    }
+
+    pub fn update_status(
+        &self,
+        update_id: u64,
+    ) -> Result<UpdateStatus, Error>
+    {
+        let update_id = update_id.to_be_bytes();
+        match self.updates_results_index.get(update_id)? {
+            Some(value) => {
+                let value = bincode::deserialize(&value)?;
+                Ok(UpdateStatus::Processed(value))
+            },
+            None => {
+                match self.updates_index.get(update_id)? {
+                    Some(_) => Ok(UpdateStatus::Enqueued),
+                    None => Ok(UpdateStatus::Unknown),
+                }
+            }
+        }
+    }
+
+    pub fn update_status_blocking(
+        &self,
+        update_id: u64,
+    ) -> Result<UpdateResult, Error>
+    {
+        loop {
+            if let Some(value) = self.updates_results_index.get(&update_id.to_be_bytes())? {
+                let value = bincode::deserialize(&value)?;
+                return Ok(value)
+            }
+            std::thread::sleep(Duration::from_millis(300));
+        }
+    }
+
+    pub fn documents_ids(&self) -> Result<DocumentsIdsIter, Error> {
+        Ok(self.documents_index.documents_ids()?)
+    }
+
+    pub fn document<T>(
+        &self,
+        fields: Option<&HashSet<&str>>,
+        id: DocumentId,
+    ) -> Result<Option<T>, DeserializerError>
+    where T: de::DeserializeOwned,
+    {
+        let schema = self.schema();
+        let fields = match fields {
+            Some(fields) => fields.into_iter().map(|name| schema.attribute(name)).collect(),
+            None => None,
+        };
+
+        let mut deserializer = Deserializer {
+            document_id: id,
+            index: &self,
+            fields: fields.as_ref(),
+        };
+
+        // TODO: currently we return an error if all document fields are missing,
+        //       returning None would have been better
+        T::deserialize(&mut deserializer).map(Some)
+    }
+}
+
+impl Index {
+    pub(crate) fn push_documents_addition<D>(&self, addition: Vec<D>) -> Result<u64, Error>
+    where D: serde::Serialize
+    {
+        let mut values = Vec::with_capacity(addition.len());
+        for add in addition {
+            let vec = rmp_serde::to_vec_named(&add)?;
+            let add = rmp_serde::from_read(&vec[..])?;
+            values.push(add);
+        }
+
+        let addition = Update::DocumentsAddition(values);
+        let update = rmp_serde::to_vec_named(&addition)?;
+        self.raw_push_update(update)
+    }
+
+    pub(crate) fn push_documents_deletion(
+        &self,
+        deletion: Vec<DocumentId>,
+    ) -> Result<u64, Error>
+    {
+        let deletion = Update::DocumentsDeletion(deletion);
+        let update = rmp_serde::to_vec_named(&deletion)?;
+        self.raw_push_update(update)
+    }
+
+    pub(crate) fn push_synonyms_addition(
+        &self,
+        addition: BTreeMap<String, Vec<String>>,
+    ) -> Result<u64, Error>
+    {
+        let addition = Update::SynonymsAddition(addition);
+        let update = rmp_serde::to_vec_named(&addition)?;
+        self.raw_push_update(update)
+    }
+
+    pub(crate) fn push_synonyms_deletion(
+        &self,
+        deletion: BTreeMap<String, Option<Vec<String>>>,
+    ) -> Result<u64, Error>
+    {
+        let deletion = Update::SynonymsDeletion(deletion);
+        let update = rmp_serde::to_vec_named(&deletion)?;
+        self.raw_push_update(update)
+    }
+
+    fn raw_push_update(&self, raw_update: Vec<u8>) -> Result<u64, Error> {
+        let update_id = self.updates_id.fetch_add(1, Ordering::SeqCst);
+        let update_id_array = update_id.to_be_bytes();
+        self.updates_index.insert(update_id_array, raw_update)?;
+        Ok(update_id)
+    }
+}
+
+pub struct RefIndex<'a> {
+    pub(crate) cache: Guard<'static, Arc<Cache>>,
+    pub main_index: &'a MainIndex,
+    pub synonyms_index: &'a SynonymsIndex,
+    pub words_index: &'a WordsIndex,
+    pub docs_words_index: &'a DocsWordsIndex,
+    pub documents_index: &'a DocumentsIndex,
+    pub custom_settings_index: &'a CustomSettingsIndex,
+}
+
+impl Store for RefIndex<'_> {
+    type Error = Error;
+
+    fn words(&self) -> Result<&fst::Set, Self::Error> {
+        Ok(&self.cache.words)
+    }
+
+    fn word_indexes(&self, word: &[u8]) -> Result<Option<SetBuf<DocIndex>>, Self::Error> {
+        Ok(self.words_index.doc_indexes(word)?)
+    }
+
+    fn synonyms(&self) -> Result<&fst::Set, Self::Error> {
+        Ok(&self.cache.synonyms)
+    }
+
+    fn alternatives_to(&self, word: &[u8]) -> Result<Option<fst::Set>, Self::Error> {
+        Ok(self.synonyms_index.alternatives_to(word)?)
+    }
+}
--- a/meilidb-data/src/database/index/synonyms_index.rs
+++ b/meilidb-data/src/database/index/synonyms_index.rs
@ -0,0 +1,21 @@
+use crate::RocksDbResult;
+
+#[derive(Clone)]
+pub struct SynonymsIndex(pub(crate) crate::CfTree);
+
+impl SynonymsIndex {
+    pub fn alternatives_to(&self, word: &[u8]) -> RocksDbResult<Option<fst::Set>> {
+        match self.0.get(word)? {
+            Some(vector) => Ok(Some(fst::Set::from_bytes(vector.to_vec()).unwrap())),
+            None => Ok(None),
+        }
+    }
+
+    pub fn set_alternatives_to(&self, word: &[u8], value: Vec<u8>) -> RocksDbResult<()> {
+        self.0.insert(word, value).map(drop)
+    }
+
+    pub fn del_alternatives_of(&self, word: &[u8]) -> RocksDbResult<()> {
+        self.0.remove(word).map(drop)
+    }
+}
--- a/meilidb-data/src/database/index/words_index.rs
+++ b/meilidb-data/src/database/index/words_index.rs
@ -0,0 +1,45 @@
+use meilidb_core::DocIndex;
+use sdset::{Set, SetBuf};
+use zerocopy::{LayoutVerified, AsBytes};
+use crate::RocksDbResult;
+
+#[derive(Clone)]
+pub struct WordsIndex(pub(crate) crate::CfTree);
+
+impl WordsIndex {
+    pub fn doc_indexes(&self, word: &[u8]) -> RocksDbResult<Option<SetBuf<DocIndex>>> {
+        // we must force an allocation to make the memory aligned
+        match self.0.get(word)? {
+            Some(bytes) => {
+                let vec = match LayoutVerified::new_slice(bytes.as_ref()) {
+                    Some(layout) => layout.into_slice().to_vec(),
+                    None => {
+                        let len = bytes.as_ref().len();
+                        let count = len / std::mem::size_of::<DocIndex>();
+                        let mut buf: Vec<DocIndex> = Vec::with_capacity(count);
+                        unsafe {
+                            let src = bytes.as_ref().as_ptr();
+                            let dst = buf.as_mut_ptr() as *mut u8;
+                            std::ptr::copy_nonoverlapping(src, dst, len);
+                            buf.set_len(count);
+                        }
+                        buf
+                    }
+                };
+
+                let setbuf = SetBuf::new_unchecked(vec);
+
+                Ok(Some(setbuf))
+            },
+            None => Ok(None),
+        }
+    }
+
+    pub fn set_doc_indexes(&self, word: &[u8], set: &Set<DocIndex>) -> RocksDbResult<()> {
+        self.0.insert(word, set.as_bytes()).map(drop)
+    }
+
+    pub fn del_doc_indexes(&self, word: &[u8]) -> RocksDbResult<()> {
+        self.0.remove(word).map(drop)
+    }
+}
--- a/meilidb-data/src/database/mod.rs
+++ b/meilidb-data/src/database/mod.rs
@ -0,0 +1,155 @@
+use std::collections::hash_map::Entry;
+use std::collections::{HashSet, HashMap};
+use std::path::Path;
+use std::sync::Arc;
+use std::sync::RwLock;
+use meilidb_schema::Schema;
+
+mod error;
+mod index;
+mod update;
+
+use crate::CfTree;
+
+pub use self::error::Error;
+pub use self::index::{
+    Index, CustomSettingsIndex, CommonIndex, RankingOrdering,
+    StopWords, RankingOrder, DistinctField, RankingRules,
+    UpdateType, DetailedDuration, UpdateResult, UpdateStatus
+};
+
+pub use self::update::DocumentsAddition;
+pub use self::update::DocumentsDeletion;
+pub use self::update::SynonymsAddition;
+pub use self::update::SynonymsDeletion;
+
+use self::update::apply_documents_addition;
+use self::update::apply_documents_deletion;
+use self::update::apply_synonyms_addition;
+use self::update::apply_synonyms_deletion;
+
+const INDEXES_KEY: &str = "indexes";
+const COMMON_KEY: &str = "common-index";
+
+fn load_indexes(tree: &rocksdb::DB) -> Result<HashSet<String>, Error> {
+    match tree.get(INDEXES_KEY)? {
+        Some(bytes) => Ok(bincode::deserialize(&bytes)?),
+        None => Ok(HashSet::new())
+    }
+}
+
+pub struct Database {
+    cache: RwLock<HashMap<String, Index>>,
+    inner: Arc<rocksdb::DB>,
+    common: Arc<CommonIndex>,
+}
+
+impl Database {
+    pub fn open<P: AsRef<Path>>(path: P) -> Result<Database, Error> {
+        let cache = RwLock::new(HashMap::new());
+
+        let mut options = rocksdb::Options::default();
+        options.create_if_missing(true);
+
+        let cfs = rocksdb::DB::list_cf(&options, &path).unwrap_or_default();
+        let inner = Arc::new(rocksdb::DB::open_cf(&options, path, cfs)?);
+        let common_tree = CfTree::create(inner.clone(), COMMON_KEY.to_owned())?;
+        let common = Arc::new(CommonIndex(common_tree));
+        let indexes = load_indexes(&inner)?;
+        let database = Database { cache, inner, common };
+
+        for index in indexes {
+            database.open_index(&index)?;
+        }
+
+        Ok(database)
+    }
+
+    pub fn indexes(&self) -> Result<HashSet<String>, Error> {
+        load_indexes(&self.inner)
+    }
+
+    fn set_indexes(&self, value: &HashSet<String>) -> Result<(), Error> {
+        let bytes = bincode::serialize(value)?;
+        self.inner.put(INDEXES_KEY, bytes)?;
+        Ok(())
+    }
+
+    pub fn open_index(&self, name: &str) -> Result<Option<Index>, Error> {
+        {
+            let cache = self.cache.read().unwrap();
+            if let Some(index) = cache.get(name).cloned() {
+                return Ok(Some(index))
+            }
+        }
+
+        let mut cache = self.cache.write().unwrap();
+        let index = match cache.entry(name.to_string()) {
+            Entry::Occupied(occupied) => {
+                occupied.get().clone()
+            },
+            Entry::Vacant(vacant) => {
+                if !self.indexes()?.contains(name) {
+                    return Ok(None)
+                }
+
+                let index = Index::new(self.inner.clone(), name)?;
+                vacant.insert(index).clone()
+            },
+        };
+
+        Ok(Some(index))
+    }
+
+    pub fn create_index(&self, name: &str, schema: Schema) -> Result<Index, Error> {
+        let mut cache = self.cache.write().unwrap();
+
+        let index = match cache.entry(name.to_string()) {
+            Entry::Occupied(occupied) => {
+                occupied.get().clone()
+            },
+            Entry::Vacant(vacant) => {
+                let index = Index::with_schema(self.inner.clone(), name, schema)?;
+
+                let mut indexes = self.indexes()?;
+                indexes.insert(name.to_string());
+                self.set_indexes(&indexes)?;
+
+                vacant.insert(index).clone()
+            },
+        };
+
+        Ok(index)
+    }
+
+    pub fn delete_index(&self, name: &str) -> Result<(), Error> {
+        let mut cache = self.cache.write().unwrap();
+
+        self.inner.drop_cf(name)?;
+        let _ = self.inner.drop_cf(&format!("{}-synonyms", name));
+        let _ = self.inner.drop_cf(&format!("{}-words", name));
+        let _ = self.inner.drop_cf(&format!("{}-docs-words", name));
+        let _ = self.inner.drop_cf(&format!("{}-documents", name));
+        let _ = self.inner.drop_cf(&format!("{}-custom", name));
+        let _ = self.inner.drop_cf(&format!("{}-updates", name));
+        let _ = self.inner.drop_cf(&format!("{}-updates-results", name));
+        cache.remove(name);
+
+        if let Ok(mut index_list) = self.indexes() {
+            index_list.remove(name);
+            let _ = self.set_indexes(&index_list);
+        }
+        Ok(())
+    }
+
+    pub fn common_index(&self) -> Arc<CommonIndex> {
+        self.common.clone()
+    }
+
+    pub fn checkpoint_to<P>(&self, path: P) -> Result<(), Error>
+    where P: AsRef<Path>,
+    {
+        let checkpoint = rocksdb::checkpoint::Checkpoint::new(&self.inner)?;
+        Ok(checkpoint.create_checkpoint(path)?)
+    }
+}
--- a/meilidb-data/src/database/update/documents_addition.rs
+++ b/meilidb-data/src/database/update/documents_addition.rs
@ -0,0 +1,139 @@
+use std::collections::HashSet;
+use std::sync::Arc;
+
+use fst::{SetBuilder, set::OpBuilder};
+use sdset::{SetOperation, duo::Union};
+use serde::Serialize;
+
+use crate::RankedMap;
+use crate::database::{Error, Index, index::Cache, apply_documents_deletion};
+use crate::indexer::Indexer;
+use crate::serde::{extract_document_id, Serializer, RamDocumentStore};
+
+pub struct DocumentsAddition<'a, D> {
+    index: &'a Index,
+    documents: Vec<D>,
+}
+
+impl<'a, D> DocumentsAddition<'a, D> {
+    pub fn new(index: &'a Index) -> DocumentsAddition<'a, D> {
+        DocumentsAddition { index, documents: Vec::new() }
+    }
+
+    pub fn update_document(&mut self, document: D) {
+        self.documents.push(document);
+    }
+
+    pub fn finalize(self) -> Result<u64, Error>
+    where D: serde::Serialize
+    {
+        self.index.push_documents_addition(self.documents)
+    }
+}
+
+pub fn apply_documents_addition(
+    index: &Index,
+    mut ranked_map: RankedMap,
+    addition: Vec<rmpv::Value>,
+) -> Result<(), Error>
+{
+    let mut document_ids = HashSet::new();
+    let mut document_store = RamDocumentStore::new();
+    let mut indexer = Indexer::new();
+
+    let schema = &index.schema();
+    let identifier = schema.identifier_name();
+
+    for document in addition {
+        let document_id = match extract_document_id(identifier, &document)? {
+            Some(id) => id,
+            None => return Err(Error::MissingDocumentId),
+        };
+
+        // 1. store the document id for future deletion
+        document_ids.insert(document_id);
+
+        // 2. index the document fields in ram stores
+        let serializer = Serializer {
+            schema,
+            document_store: &mut document_store,
+            indexer: &mut indexer,
+            ranked_map: &mut ranked_map,
+            document_id,
+        };
+
+        document.serialize(serializer)?;
+    }
+
+    let ref_index = index.as_ref();
+    let docs_words = ref_index.docs_words_index;
+    let documents = ref_index.documents_index;
+    let main = ref_index.main_index;
+    let words = ref_index.words_index;
+
+    // 1. remove the previous documents match indexes
+    let documents_to_insert = document_ids.iter().cloned().collect();
+    apply_documents_deletion(index, ranked_map.clone(), documents_to_insert)?;
+
+    // 2. insert new document attributes in the database
+    for ((id, attr), value) in document_store.into_inner() {
+        documents.set_document_field(id, attr, value)?;
+    }
+
+    let indexed = indexer.build();
+    let mut delta_words_builder = SetBuilder::memory();
+
+    for (word, delta_set) in indexed.words_doc_indexes {
+        delta_words_builder.insert(&word).unwrap();
+
+        let set = match words.doc_indexes(&word)? {
+            Some(set) => Union::new(&set, &delta_set).into_set_buf(),
+            None => delta_set,
+        };
+
+        words.set_doc_indexes(&word, &set)?;
+    }
+
+    for (id, words) in indexed.docs_words {
+        docs_words.set_doc_words(id, &words)?;
+    }
+
+    let delta_words = delta_words_builder
+        .into_inner()
+        .and_then(fst::Set::from_bytes)
+        .unwrap();
+
+    let words = match main.words_set()? {
+        Some(words) => {
+            let op = OpBuilder::new()
+                .add(words.stream())
+                .add(delta_words.stream())
+                .r#union();
+
+            let mut words_builder = SetBuilder::memory();
+            words_builder.extend_stream(op).unwrap();
+            words_builder
+                .into_inner()
+                .and_then(fst::Set::from_bytes)
+                .unwrap()
+        },
+        None => delta_words,
+    };
+
+    main.set_words_set(&words)?;
+    main.set_ranked_map(&ranked_map)?;
+
+    let inserted_documents_len = document_ids.len() as u64;
+    let number_of_documents = main.set_number_of_documents(|old| old + inserted_documents_len)?;
+
+    // update the "consistent" view of the Index
+    let cache = ref_index.cache;
+    let words = Arc::new(words);
+    let synonyms = cache.synonyms.clone();
+    let schema = cache.schema.clone();
+
+    let cache = Cache { words, synonyms, schema, ranked_map, number_of_documents };
+    index.cache.store(Arc::new(cache));
+
+    Ok(())
+}
--- a/meilidb-data/src/database/update/documents_deletion.rs
+++ b/meilidb-data/src/database/update/documents_deletion.rs
@ -0,0 +1,150 @@
+use std::collections::{HashMap, HashSet, BTreeSet};
+use std::sync::Arc;
+
+use fst::{SetBuilder, Streamer};
+use meilidb_core::DocumentId;
+use sdset::{SetBuf, SetOperation, duo::DifferenceByKey};
+
+use crate::RankedMap;
+use crate::serde::extract_document_id;
+
+use crate::database::{Index, Error, index::Cache};
+
+pub struct DocumentsDeletion<'a> {
+    index: &'a Index,
+    documents: Vec<DocumentId>,
+}
+
+impl<'a> DocumentsDeletion<'a> {
+    pub fn new(index: &'a Index) -> DocumentsDeletion<'a> {
+        DocumentsDeletion { index, documents: Vec::new() }
+    }
+
+    pub fn delete_document_by_id(&mut self, document_id: DocumentId) {
+        self.documents.push(document_id);
+    }
+
+    pub fn delete_document<D>(&mut self, document: D) -> Result<(), Error>
+    where D: serde::Serialize,
+    {
+        let schema = self.index.schema();
+        let identifier = schema.identifier_name();
+        let document_id = match extract_document_id(identifier, &document)? {
+            Some(id) => id,
+            None => return Err(Error::MissingDocumentId),
+        };
+
+        self.delete_document_by_id(document_id);
+
+        Ok(())
+    }
+
+    pub fn finalize(self) -> Result<u64, Error> {
+        self.index.push_documents_deletion(self.documents)
+    }
+}
+
+impl Extend<DocumentId> for DocumentsDeletion<'_> {
+    fn extend<T: IntoIterator<Item=DocumentId>>(&mut self, iter: T) {
+        self.documents.extend(iter)
+    }
+}
+
+pub fn apply_documents_deletion(
+    index: &Index,
+    mut ranked_map: RankedMap,
+    deletion: Vec<DocumentId>,
+) -> Result<(), Error>
+{
+    let ref_index = index.as_ref();
+    let schema = index.schema();
+    let docs_words = ref_index.docs_words_index;
+    let documents = ref_index.documents_index;
+    let main = ref_index.main_index;
+    let words = ref_index.words_index;
+
+    let idset = SetBuf::from_dirty(deletion);
+
+    // collect the ranked attributes according to the schema
+    let ranked_attrs: Vec<_> = schema.iter()
+        .filter_map(|(_, attr, prop)| {
+            if prop.is_ranked() { Some(attr) } else { None }
+        })
+        .collect();
+
+    let mut words_document_ids = HashMap::new();
+    for id in idset {
+        // remove all the ranked attributes from the ranked_map
+        for ranked_attr in &ranked_attrs {
+            ranked_map.remove(id, *ranked_attr);
+        }
+
+        if let Some(words) = docs_words.doc_words(id)? {
+            let mut stream = words.stream();
+            while let Some(word) = stream.next() {
+                let word = word.to_vec();
+                words_document_ids.entry(word).or_insert_with(Vec::new).push(id);
+            }
+        }
+    }
+
+    let mut deleted_documents = HashSet::new();
+    let mut removed_words = BTreeSet::new();
+    for (word, document_ids) in words_document_ids {
+        let document_ids = SetBuf::from_dirty(document_ids);
+
+        if let Some(doc_indexes) = words.doc_indexes(&word)? {
+            let op = DifferenceByKey::new(&doc_indexes, &document_ids, |d| d.document_id, |id| *id);
+            let doc_indexes = op.into_set_buf();
+
+            if !doc_indexes.is_empty() {
+                words.set_doc_indexes(&word, &doc_indexes)?;
+            } else {
+                words.del_doc_indexes(&word)?;
+                removed_words.insert(word);
+            }
+        }
+
+        for id in document_ids {
+            if documents.del_all_document_fields(id)? != 0 {
+                deleted_documents.insert(id);
+            }
+            docs_words.del_doc_words(id)?;
+        }
+    }
+
+    let removed_words = fst::Set::from_iter(removed_words).unwrap();
+    let words = match main.words_set()? {
+        Some(words_set) => {
+            let op = fst::set::OpBuilder::new()
+                .add(words_set.stream())
+                .add(removed_words.stream())
+                .difference();
+
+            let mut words_builder = SetBuilder::memory();
+            words_builder.extend_stream(op).unwrap();
+            words_builder
+                .into_inner()
+                .and_then(fst::Set::from_bytes)
+                .unwrap()
+        },
+        None => fst::Set::default(),
+    };
+
+    main.set_words_set(&words)?;
+    main.set_ranked_map(&ranked_map)?;
+
+    let deleted_documents_len = deleted_documents.len() as u64;
+    let number_of_documents = main.set_number_of_documents(|old| old - deleted_documents_len)?;
+
+    // update the "consistent" view of the Index
+    let cache = ref_index.cache;
+    let words = Arc::new(words);
+    let synonyms = cache.synonyms.clone();
+    let schema = cache.schema.clone();
+
+    let cache = Cache { words, synonyms, schema, ranked_map, number_of_documents };
+    index.cache.store(Arc::new(cache));
+
+    Ok(())
+}
--- a/meilidb-data/src/database/update/mod.rs
+++ b/meilidb-data/src/database/update/mod.rs
@ -0,0 +1,9 @@
+mod documents_addition;
+mod documents_deletion;
+mod synonyms_addition;
+mod synonyms_deletion;
+
+pub use self::documents_addition::{DocumentsAddition, apply_documents_addition};
+pub use self::documents_deletion::{DocumentsDeletion, apply_documents_deletion};
+pub use self::synonyms_addition::{SynonymsAddition, apply_synonyms_addition};
+pub use self::synonyms_deletion::{SynonymsDeletion, apply_synonyms_deletion};
--- a/meilidb-data/src/database/update/synonyms_addition.rs
+++ b/meilidb-data/src/database/update/synonyms_addition.rs
@ -0,0 +1,94 @@
+use std::collections::BTreeMap;
+use std::sync::Arc;
+
+use fst::{SetBuilder, set::OpBuilder};
+use meilidb_core::normalize_str;
+use sdset::SetBuf;
+
+use crate::database::{Error, Index,index::Cache};
+
+pub struct SynonymsAddition<'a> {
+    index: &'a Index,
+    synonyms: BTreeMap<String, Vec<String>>,
+}
+
+impl<'a> SynonymsAddition<'a> {
+    pub fn new(index: &'a Index) -> SynonymsAddition<'a> {
+        SynonymsAddition { index, synonyms: BTreeMap::new() }
+    }
+
+    pub fn add_synonym<S, T, I>(&mut self, synonym: S, alternatives: I)
+    where S: AsRef<str>,
+          T: AsRef<str>,
+          I: IntoIterator<Item=T>,
+    {
+        let synonym = normalize_str(synonym.as_ref());
+        let alternatives = alternatives.into_iter().map(|s| s.as_ref().to_lowercase());
+        self.synonyms.entry(synonym).or_insert_with(Vec::new).extend(alternatives);
+    }
+
+    pub fn finalize(self) -> Result<u64, Error> {
+        self.index.push_synonyms_addition(self.synonyms)
+    }
+}
+
+pub fn apply_synonyms_addition(
+    index: &Index,
+    addition: BTreeMap<String, Vec<String>>,
+) -> Result<(), Error>
+{
+    let ref_index = index.as_ref();
+    let synonyms = ref_index.synonyms_index;
+    let main = ref_index.main_index;
+
+    let mut synonyms_builder = SetBuilder::memory();
+
+    for (synonym, alternatives) in addition {
+        synonyms_builder.insert(&synonym).unwrap();
+
+        let alternatives = {
+            let alternatives = SetBuf::from_dirty(alternatives);
+            let mut alternatives_builder = SetBuilder::memory();
+            alternatives_builder.extend_iter(alternatives).unwrap();
+            alternatives_builder.into_inner().unwrap()
+        };
+        synonyms.set_alternatives_to(synonym.as_bytes(), alternatives)?;
+    }
+
+    let delta_synonyms = synonyms_builder
+        .into_inner()
+        .and_then(fst::Set::from_bytes)
+        .unwrap();
+
+    let synonyms = match main.synonyms_set()? {
+        Some(synonyms) => {
+            let op = OpBuilder::new()
+                .add(synonyms.stream())
+                .add(delta_synonyms.stream())
+                .r#union();
+
+            let mut synonyms_builder = SetBuilder::memory();
+            synonyms_builder.extend_stream(op).unwrap();
+            synonyms_builder
+                .into_inner()
+                .and_then(fst::Set::from_bytes)
+                .unwrap()
+        },
+        None => delta_synonyms,
+    };
+
+    main.set_synonyms_set(&synonyms)?;
+
+    // update the "consistent" view of the Index
+    let cache = ref_index.cache;
+    let words = Arc::new(main.words_set()?.unwrap_or_default());
+    let ranked_map = cache.ranked_map.clone();
+    let synonyms = Arc::new(synonyms);
+    let schema = cache.schema.clone();
+    let number_of_documents = cache.number_of_documents;
+
+    let cache = Cache { words, synonyms, schema, ranked_map, number_of_documents };
+    index.cache.store(Arc::new(cache));
+
+    Ok(())
+}
--- a/meilidb-data/src/database/update/synonyms_deletion.rs
+++ b/meilidb-data/src/database/update/synonyms_deletion.rs
@ -0,0 +1,137 @@
+use std::collections::BTreeMap;
+use std::iter::FromIterator;
+use std::sync::Arc;
+
+use fst::{SetBuilder, set::OpBuilder};
+use meilidb_core::normalize_str;
+use sdset::SetBuf;
+
+use crate::database::{Error, Index, index::Cache};
+
+pub struct SynonymsDeletion<'a> {
+    index: &'a Index,
+    synonyms: BTreeMap<String, Option<Vec<String>>>,
+}
+
+impl<'a> SynonymsDeletion<'a> {
+    pub fn new(index: &'a Index) -> SynonymsDeletion<'a> {
+        SynonymsDeletion { index, synonyms: BTreeMap::new() }
+    }
+
+    pub fn delete_all_alternatives_of<S: AsRef<str>>(&mut self, synonym: S) {
+        let synonym = normalize_str(synonym.as_ref());
+        self.synonyms.insert(synonym, None);
+    }
+
+    pub fn delete_specific_alternatives_of<S, T, I>(&mut self, synonym: S, alternatives: I)
+    where S: AsRef<str>,
+          T: AsRef<str>,
+          I: Iterator<Item=T>,
+    {
+        let synonym = normalize_str(synonym.as_ref());
+        let value = self.synonyms.entry(synonym).or_insert(None);
+        let alternatives = alternatives.map(|s| s.as_ref().to_lowercase());
+        match value {
+            Some(v) => v.extend(alternatives),
+            None => *value = Some(Vec::from_iter(alternatives)),
+        }
+    }
+
+    pub fn finalize(self) -> Result<u64, Error> {
+        self.index.push_synonyms_deletion(self.synonyms)
+    }
+}
+
+pub fn apply_synonyms_deletion(
+    index: &Index,
+    deletion: BTreeMap<String, Option<Vec<String>>>,
+) -> Result<(), Error>
+{
+    let ref_index = index.as_ref();
+    let synonyms = ref_index.synonyms_index;
+    let main = ref_index.main_index;
+
+    let mut delete_whole_synonym_builder = SetBuilder::memory();
+
+    for (synonym, alternatives) in deletion {
+        match alternatives {
+            Some(alternatives) => {
+                let prev_alternatives = synonyms.alternatives_to(synonym.as_bytes())?;
+                let prev_alternatives = match prev_alternatives {
+                    Some(alternatives) => alternatives,
+                    None => continue,
+                };
+
+                let delta_alternatives = {
+                    let alternatives = SetBuf::from_dirty(alternatives);
+                    let mut builder = SetBuilder::memory();
+                    builder.extend_iter(alternatives).unwrap();
+                    builder.into_inner()
+                        .and_then(fst::Set::from_bytes)
+                        .unwrap()
+                };
+
+                let op = OpBuilder::new()
+                    .add(prev_alternatives.stream())
+                    .add(delta_alternatives.stream())
+                    .difference();
+
+                let (alternatives, empty_alternatives) = {
+                    let mut builder = SetBuilder::memory();
+                    let len = builder.get_ref().len();
+                    builder.extend_stream(op).unwrap();
+                    let is_empty = len == builder.get_ref().len();
+                    let alternatives = builder.into_inner().unwrap();
+                    (alternatives, is_empty)
+                };
+
+                if empty_alternatives {
+                    delete_whole_synonym_builder.insert(synonym.as_bytes())?;
+                } else {
+                    synonyms.set_alternatives_to(synonym.as_bytes(), alternatives)?;
+                }
+            },
+            None => {
+                delete_whole_synonym_builder.insert(&synonym).unwrap();
+                synonyms.del_alternatives_of(synonym.as_bytes())?;
+            }
+        }
+    }
+
+    let delta_synonyms = delete_whole_synonym_builder
+        .into_inner()
+        .and_then(fst::Set::from_bytes)
+        .unwrap();
+
+    let synonyms = match main.synonyms_set()? {
+        Some(synonyms) => {
+            let op = OpBuilder::new()
+                .add(synonyms.stream())
+                .add(delta_synonyms.stream())
+                .difference();
+
+            let mut synonyms_builder = SetBuilder::memory();
+            synonyms_builder.extend_stream(op).unwrap();
+            synonyms_builder
+                .into_inner()
+                .and_then(fst::Set::from_bytes)
+                .unwrap()
+        },
+        None => fst::Set::default(),
+    };
+
+    main.set_synonyms_set(&synonyms)?;
+
+    // update the "consistent" view of the Index
+    let cache = ref_index.cache;
+    let words = Arc::new(main.words_set()?.unwrap_or_default());
+    let ranked_map = cache.ranked_map.clone();
+    let synonyms = Arc::new(synonyms);
+    let schema = cache.schema.clone();
+    let number_of_documents = cache.number_of_documents;
+
+    let cache = Cache { words, synonyms, schema, ranked_map, number_of_documents };
+    index.cache.store(Arc::new(cache));
+
+    Ok(())
+}
--- a/meilidb-data/src/document_attr_key.rs
+++ b/meilidb-data/src/document_attr_key.rs
@ -0,0 +1,69 @@
+use meilidb_core::DocumentId;
+use meilidb_schema::SchemaAttr;
+
+#[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
+pub struct DocumentAttrKey {
+    pub document_id: DocumentId,
+    pub attribute: SchemaAttr,
+}
+
+impl DocumentAttrKey {
+    pub fn new(document_id: DocumentId, attribute: SchemaAttr) -> DocumentAttrKey {
+        DocumentAttrKey { document_id, attribute }
+    }
+
+    pub fn to_be_bytes(self) -> [u8; 10] {
+        let mut output = [0u8; 10];
+
+        let document_id = self.document_id.0.to_be_bytes();
+        let attribute = self.attribute.0.to_be_bytes();
+
+        unsafe {
+            use std::{mem::size_of, ptr::copy_nonoverlapping};
+
+            let output = output.as_mut_ptr();
+            copy_nonoverlapping(document_id.as_ptr(), output, size_of::<u64>());
+
+            let output = output.add(size_of::<u64>());
+            copy_nonoverlapping(attribute.as_ptr(), output, size_of::<u16>());
+        }
+
+        output
+    }
+
+    pub fn from_be_bytes(bytes: [u8; 10]) -> DocumentAttrKey {
+        let document_id;
+        let attribute;
+
+        unsafe {
+            use std::ptr::read_unaligned;
+
+            let pointer = bytes.as_ptr() as *const _;
+            let document_id_bytes = read_unaligned(pointer);
+            document_id = u64::from_be_bytes(document_id_bytes);
+
+            let pointer = pointer.add(1) as *const _;
+            let attribute_bytes = read_unaligned(pointer);
+            attribute = u16::from_be_bytes(attribute_bytes);
+        }
+
+        DocumentAttrKey {
+            document_id: DocumentId(document_id),
+            attribute: SchemaAttr(attribute),
+        }
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn to_from_be_bytes() {
+        let document_id = DocumentId(67578308);
+        let schema_attr = SchemaAttr(3456);
+        let x = DocumentAttrKey::new(document_id, schema_attr);
+
+        assert_eq!(x, DocumentAttrKey::from_be_bytes(x.to_be_bytes()));
+    }
+}
--- a/meilidb-data/src/indexer.rs
+++ b/meilidb-data/src/indexer.rs
@ -0,0 +1,208 @@
+use std::collections::{BTreeMap, HashMap};
+use std::convert::TryFrom;
+
+use deunicode::deunicode_with_tofu;
+use meilidb_core::{DocumentId, DocIndex};
+use meilidb_schema::SchemaAttr;
+use meilidb_tokenizer::{is_cjk, Tokenizer, SeqTokenizer, Token};
+use sdset::SetBuf;
+
+type Word = Vec<u8>; // TODO make it be a SmallVec
+
+pub struct Indexer {
+    word_limit: usize, // the maximum number of indexed words
+    words_doc_indexes: BTreeMap<Word, Vec<DocIndex>>,
+    docs_words: HashMap<DocumentId, Vec<Word>>,
+}
+
+pub struct Indexed {
+    pub words_doc_indexes: BTreeMap<Word, SetBuf<DocIndex>>,
+    pub docs_words: HashMap<DocumentId, fst::Set>,
+}
+
+impl Indexer {
+    pub fn new() -> Indexer {
+        Indexer::with_word_limit(1000)
+    }
+
+    pub fn with_word_limit(limit: usize) -> Indexer {
+        Indexer {
+            word_limit: limit,
+            words_doc_indexes: BTreeMap::new(),
+            docs_words: HashMap::new(),
+        }
+    }
+
+    pub fn index_text(&mut self, id: DocumentId, attr: SchemaAttr, text: &str) {
+        let lowercase_text = text.to_lowercase();
+        let deunicoded = deunicode_with_tofu(&lowercase_text, "");
+
+        // TODO compute the deunicoded version after the cjk check
+        let next = if !lowercase_text.contains(is_cjk) && lowercase_text != deunicoded {
+            Some(deunicoded)
+        } else {
+            None
+        };
+        let iter = Some(lowercase_text).into_iter().chain(next);
+
+        for text in iter {
+            for token in Tokenizer::new(&text) {
+                let must_continue = index_token(
+                    token,
+                    id,
+                    attr,
+                    self.word_limit,
+                    &mut self.words_doc_indexes,
+                    &mut self.docs_words,
+                );
+
+                if !must_continue { break }
+            }
+        }
+    }
+
+    pub fn index_text_seq<'a, I, IT>(&mut self, id: DocumentId, attr: SchemaAttr, iter: I)
+    where I: IntoIterator<Item=&'a str, IntoIter=IT>,
+          IT: Iterator<Item = &'a str> + Clone,
+    {
+        // TODO serialize this to one call to the SeqTokenizer loop
+
+        let lowercased: Vec<_> = iter.into_iter().map(str::to_lowercase).collect();
+        let iter = lowercased.iter().map(|t| t.as_str());
+
+        for token in SeqTokenizer::new(iter) {
+            let must_continue = index_token(
+                token,
+                id,
+                attr,
+                self.word_limit,
+                &mut self.words_doc_indexes,
+                &mut self.docs_words,
+            );
+
+            if !must_continue { break }
+        }
+
+        let deunicoded: Vec<_> = lowercased.into_iter().map(|lowercase_text| {
+            if lowercase_text.contains(is_cjk) { return lowercase_text }
+            let deunicoded = deunicode_with_tofu(&lowercase_text, "");
+            if lowercase_text != deunicoded { deunicoded } else { lowercase_text }
+        }).collect();
+        let iter = deunicoded.iter().map(|t| t.as_str());
+
+        for token in SeqTokenizer::new(iter) {
+            let must_continue = index_token(
+                token,
+                id,
+                attr,
+                self.word_limit,
+                &mut self.words_doc_indexes,
+                &mut self.docs_words,
+            );
+
+            if !must_continue { break }
+        }
+    }
+
+    pub fn build(self) -> Indexed {
+        let words_doc_indexes = self.words_doc_indexes
+            .into_iter()
+            .map(|(word, indexes)| (word, SetBuf::from_dirty(indexes)))
+            .collect();
+
+        let docs_words = self.docs_words
+            .into_iter()
+            .map(|(id, mut words)| {
+                words.sort_unstable();
+                words.dedup();
+                (id, fst::Set::from_iter(words).unwrap())
+            })
+            .collect();
+
+        Indexed { words_doc_indexes, docs_words }
+    }
+}
+
+fn index_token(
+    token: Token,
+    id: DocumentId,
+    attr: SchemaAttr,
+    word_limit: usize,
+    words_doc_indexes: &mut BTreeMap<Word, Vec<DocIndex>>,
+    docs_words: &mut HashMap<DocumentId, Vec<Word>>,
+) -> bool
+{
+    if token.word_index >= word_limit { return false }
+
+    match token_to_docindex(id, attr, token) {
+        Some(docindex) => {
+            let word = Vec::from(token.word);
+            words_doc_indexes.entry(word.clone()).or_insert_with(Vec::new).push(docindex);
+            docs_words.entry(id).or_insert_with(Vec::new).push(word);
+        },
+        None => return false,
+    }
+
+    true
+}
+
+fn token_to_docindex(id: DocumentId, attr: SchemaAttr, token: Token) -> Option<DocIndex> {
+    let word_index = u16::try_from(token.word_index).ok()?;
+    let char_index = u16::try_from(token.char_index).ok()?;
+    let char_length = u16::try_from(token.word.chars().count()).ok()?;
+
+    let docindex = DocIndex {
+        document_id: id,
+        attribute: attr.0,
+        word_index,
+        char_index,
+        char_length,
+    };
+
+    Some(docindex)
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn strange_apostrophe() {
+        let mut indexer = Indexer::new();
+
+        let docid = DocumentId(0);
+        let attr = SchemaAttr(0);
+        let text = "Zut, l’aspirateur, j’ai oublié de l’éteindre !";
+        indexer.index_text(docid, attr, text);
+
+        let Indexed { words_doc_indexes, .. } = indexer.build();
+
+        assert!(words_doc_indexes.get(&b"l"[..]).is_some());
+        assert!(words_doc_indexes.get(&b"aspirateur"[..]).is_some());
+        assert!(words_doc_indexes.get(&b"ai"[..]).is_some());
+        assert!(words_doc_indexes.get(&b"eteindre"[..]).is_some());
+
+        // with the ugly apostrophe...
+        assert!(words_doc_indexes.get(&"l’éteindre".to_owned().into_bytes()).is_some());
+    }
+
+    #[test]
+    fn strange_apostrophe_in_sequence() {
+        let mut indexer = Indexer::new();
+
+        let docid = DocumentId(0);
+        let attr = SchemaAttr(0);
+        let text = vec!["Zut, l’aspirateur, j’ai oublié de l’éteindre !"];
+        indexer.index_text_seq(docid, attr, text);
+
+        let Indexed { words_doc_indexes, .. } = indexer.build();
+
+        assert!(words_doc_indexes.get(&b"l"[..]).is_some());
+        assert!(words_doc_indexes.get(&b"aspirateur"[..]).is_some());
+        assert!(words_doc_indexes.get(&b"ai"[..]).is_some());
+        assert!(words_doc_indexes.get(&b"eteindre"[..]).is_some());
+
+        // with the ugly apostrophe...
+        assert!(words_doc_indexes.get(&"l’éteindre".to_owned().into_bytes()).is_some());
+    }
+}
--- a/meilidb-data/src/lib.rs
+++ b/meilidb-data/src/lib.rs
@ -0,0 +1,20 @@
+mod cf_tree;
+mod database;
+mod document_attr_key;
+mod indexer;
+mod number;
+mod ranked_map;
+mod serde;
+
+pub use self::cf_tree::{CfTree, CfIter};
+pub use self::database::{
+    Database, Index, CustomSettingsIndex, RankingOrdering,
+    StopWords, RankingOrder, DistinctField, RankingRules,
+    UpdateType, DetailedDuration, UpdateResult, UpdateStatus,
+    Error,
+};
+pub use self::number::Number;
+pub use self::ranked_map::RankedMap;
+pub use self::serde::{compute_document_id, extract_document_id, value_to_string};
+
+pub type RocksDbResult<T> = Result<T, rocksdb::Error>;
--- a/meilidb-data/src/number.rs
+++ b/meilidb-data/src/number.rs
@ -0,0 +1,55 @@
+use std::num::{ParseIntError, ParseFloatError};
+use std::str::FromStr;
+use std::fmt;
+
+use ordered_float::OrderedFloat;
+use serde::{Serialize, Deserialize};
+
+#[derive(Serialize, Deserialize)]
+#[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
+pub enum Number {
+    Unsigned(u64),
+    Signed(i64),
+    Float(OrderedFloat<f64>),
+}
+
+impl FromStr for Number {
+    type Err = ParseNumberError;
+
+    fn from_str(s: &str) -> Result<Self, Self::Err> {
+        let uint_error = match u64::from_str(s) {
+            Ok(unsigned) => return Ok(Number::Unsigned(unsigned)),
+            Err(error) => error,
+        };
+
+        let int_error = match i64::from_str(s) {
+            Ok(signed) => return Ok(Number::Signed(signed)),
+            Err(error) => error,
+        };
+
+        let float_error = match f64::from_str(s) {
+            Ok(float) => return Ok(Number::Float(OrderedFloat(float))),
+            Err(error) => error,
+        };
+
+        Err(ParseNumberError { uint_error, int_error, float_error })
+    }
+}
+
+#[derive(Debug, Clone, PartialEq, Eq)]
+pub struct ParseNumberError {
+    uint_error: ParseIntError,
+    int_error: ParseIntError,
+    float_error: ParseFloatError,
+}
+
+impl fmt::Display for ParseNumberError {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        if self.uint_error == self.int_error {
+            write!(f, "can not parse number: {}, {}", self.uint_error, self.float_error)
+        } else {
+            write!(f, "can not parse number: {}, {}, {}",
+                self.uint_error, self.int_error, self.float_error)
+        }
+    }
+}
--- a/meilidb-data/src/ranked_map.rs
+++ b/meilidb-data/src/ranked_map.rs
@ -0,0 +1,36 @@
+use std::io::{Read, Write};
+
+use hashbrown::HashMap;
+use meilidb_core::DocumentId;
+use meilidb_schema::SchemaAttr;
+
+use crate::Number;
+
+#[derive(Debug, Default, Clone, PartialEq, Eq)]
+pub struct RankedMap(HashMap<(DocumentId, SchemaAttr), Number>);
+
+impl RankedMap {
+    pub fn len(&self) -> usize {
+        self.0.len()
+    }
+
+    pub fn insert(&mut self, document: DocumentId, attribute: SchemaAttr, number: Number) {
+        self.0.insert((document, attribute), number);
+    }
+
+    pub fn remove(&mut self, document: DocumentId, attribute: SchemaAttr) {
+        self.0.remove(&(document, attribute));
+    }
+
+    pub fn get(&self, document: DocumentId, attribute: SchemaAttr) -> Option<Number> {
+        self.0.get(&(document, attribute)).cloned()
+    }
+
+    pub fn read_from_bin<R: Read>(reader: R) -> bincode::Result<RankedMap> {
+        bincode::deserialize_from(reader).map(RankedMap)
+    }
+
+    pub fn write_to_bin<W: Write>(&self, writer: W) -> bincode::Result<()> {
+        bincode::serialize_into(writer, &self.0)
+    }
+}
--- a/meilidb-data/src/serde/convert_to_number.rs
+++ b/meilidb-data/src/serde/convert_to_number.rs
@ -0,0 +1,180 @@
+use std::str::FromStr;
+
+use ordered_float::OrderedFloat;
+use serde::ser;
+use serde::Serialize;
+
+use super::SerializerError;
+use crate::Number;
+
+pub struct ConvertToNumber;
+
+impl ser::Serializer for ConvertToNumber {
+    type Ok = Number;
+    type Error = SerializerError;
+    type SerializeSeq = ser::Impossible<Self::Ok, Self::Error>;
+    type SerializeTuple = ser::Impossible<Self::Ok, Self::Error>;
+    type SerializeTupleStruct = ser::Impossible<Self::Ok, Self::Error>;
+    type SerializeTupleVariant = ser::Impossible<Self::Ok, Self::Error>;
+    type SerializeMap = ser::Impossible<Self::Ok, Self::Error>;
+    type SerializeStruct = ser::Impossible<Self::Ok, Self::Error>;
+    type SerializeStructVariant = ser::Impossible<Self::Ok, Self::Error>;
+
+    fn serialize_bool(self, value: bool) -> Result<Self::Ok, Self::Error> {
+        Ok(Number::Unsigned(u64::from(value)))
+    }
+
+    fn serialize_char(self, _value: char) -> Result<Self::Ok, Self::Error> {
+        Err(SerializerError::UnrankableType { type_name: "char" })
+    }
+
+    fn serialize_i8(self, value: i8) -> Result<Self::Ok, Self::Error> {
+        Ok(Number::Signed(i64::from(value)))
+    }
+
+    fn serialize_i16(self, value: i16) -> Result<Self::Ok, Self::Error> {
+        Ok(Number::Signed(i64::from(value)))
+    }
+
+    fn serialize_i32(self, value: i32) -> Result<Self::Ok, Self::Error> {
+        Ok(Number::Signed(i64::from(value)))
+    }
+
+    fn serialize_i64(self, value: i64) -> Result<Self::Ok, Self::Error> {
+        Ok(Number::Signed(value))
+    }
+
+    fn serialize_u8(self, value: u8) -> Result<Self::Ok, Self::Error> {
+        Ok(Number::Unsigned(u64::from(value)))
+    }
+
+    fn serialize_u16(self, value: u16) -> Result<Self::Ok, Self::Error> {
+        Ok(Number::Unsigned(u64::from(value)))
+    }
+
+    fn serialize_u32(self, value: u32) -> Result<Self::Ok, Self::Error> {
+        Ok(Number::Unsigned(u64::from(value)))
+    }
+
+    fn serialize_u64(self, value: u64) -> Result<Self::Ok, Self::Error> {
+        Ok(Number::Unsigned(value))
+    }
+
+    fn serialize_f32(self, value: f32) -> Result<Self::Ok, Self::Error> {
+        Ok(Number::Float(OrderedFloat(f64::from(value))))
+    }
+
+    fn serialize_f64(self, value: f64) -> Result<Self::Ok, Self::Error> {
+        Ok(Number::Float(OrderedFloat(value)))
+    }
+
+    fn serialize_str(self, value: &str) -> Result<Self::Ok, Self::Error> {
+        Ok(Number::from_str(value)?)
+    }
+
+    fn serialize_bytes(self, _v: &[u8]) -> Result<Self::Ok, Self::Error> {
+        Err(SerializerError::UnrankableType { type_name: "&[u8]" })
+    }
+
+    fn serialize_none(self) -> Result<Self::Ok, Self::Error> {
+        Err(SerializerError::UnrankableType { type_name: "Option" })
+    }
+
+    fn serialize_some<T: ?Sized>(self, _value: &T) -> Result<Self::Ok, Self::Error>
+    where T: Serialize,
+    {
+        Err(SerializerError::UnrankableType { type_name: "Option" })
+    }
+
+    fn serialize_unit(self) -> Result<Self::Ok, Self::Error> {
+        Err(SerializerError::UnrankableType { type_name: "()" })
+    }
+
+    fn serialize_unit_struct(self, _name: &'static str) -> Result<Self::Ok, Self::Error> {
+        Err(SerializerError::UnrankableType { type_name: "unit struct" })
+    }
+
+    fn serialize_unit_variant(
+        self,
+        _name: &'static str,
+        _variant_index: u32,
+        _variant: &'static str
+    ) -> Result<Self::Ok, Self::Error>
+    {
+        Err(SerializerError::UnrankableType { type_name: "unit variant" })
+    }
+
+    fn serialize_newtype_struct<T: ?Sized>(
+        self,
+        _name: &'static str,
+        value: &T
+    ) -> Result<Self::Ok, Self::Error>
+    where T: Serialize,
+    {
+        value.serialize(self)
+    }
+
+    fn serialize_newtype_variant<T: ?Sized>(
+        self,
+        _name: &'static str,
+        _variant_index: u32,
+        _variant: &'static str,
+        _value: &T
+    ) -> Result<Self::Ok, Self::Error>
+    where T: Serialize,
+    {
+        Err(SerializerError::UnrankableType { type_name: "newtype variant" })
+    }
+
+    fn serialize_seq(self, _len: Option<usize>) -> Result<Self::SerializeSeq, Self::Error> {
+        Err(SerializerError::UnrankableType { type_name: "sequence" })
+    }
+
+    fn serialize_tuple(self, _len: usize) -> Result<Self::SerializeTuple, Self::Error> {
+        Err(SerializerError::UnrankableType { type_name: "tuple" })
+    }
+
+    fn serialize_tuple_struct(
+        self,
+        _name: &'static str,
+        _len: usize
+    ) -> Result<Self::SerializeTupleStruct, Self::Error>
+    {
+        Err(SerializerError::UnrankableType { type_name: "tuple struct" })
+    }
+
+    fn serialize_tuple_variant(
+        self,
+        _name: &'static str,
+        _variant_index: u32,
+        _variant: &'static str,
+        _len: usize
+    ) -> Result<Self::SerializeTupleVariant, Self::Error>
+    {
+        Err(SerializerError::UnrankableType { type_name: "tuple variant" })
+    }
+
+    fn serialize_map(self, _len: Option<usize>) -> Result<Self::SerializeMap, Self::Error> {
+        Err(SerializerError::UnrankableType { type_name: "map" })
+    }
+
+    fn serialize_struct(
+        self,
+        _name: &'static str,
+        _len: usize
+    ) -> Result<Self::SerializeStruct, Self::Error>
+    {
+        Err(SerializerError::UnrankableType { type_name: "struct" })
+    }
+
+    fn serialize_struct_variant(
+        self,
+        _name: &'static str,
+        _variant_index: u32,
+        _variant: &'static str,
+        _len: usize
+    ) -> Result<Self::SerializeStructVariant, Self::Error>
+    {
+        Err(SerializerError::UnrankableType { type_name: "struct variant" })
+    }
+}
--- a/meilidb-data/src/serde/convert_to_string.rs
+++ b/meilidb-data/src/serde/convert_to_string.rs
@ -0,0 +1,176 @@
+use serde::Serialize;
+use serde::ser;
+
+use super::SerializerError;
+
+pub struct ConvertToString;
+
+impl ser::Serializer for ConvertToString {
+    type Ok = String;
+    type Error = SerializerError;
+    type SerializeSeq = ser::Impossible<Self::Ok, Self::Error>;
+    type SerializeTuple = ser::Impossible<Self::Ok, Self::Error>;
+    type SerializeTupleStruct = ser::Impossible<Self::Ok, Self::Error>;
+    type SerializeTupleVariant = ser::Impossible<Self::Ok, Self::Error>;
+    type SerializeMap = ser::Impossible<Self::Ok, Self::Error>;
+    type SerializeStruct = ser::Impossible<Self::Ok, Self::Error>;
+    type SerializeStructVariant = ser::Impossible<Self::Ok, Self::Error>;
+
+    fn serialize_bool(self, _value: bool) -> Result<Self::Ok, Self::Error> {
+        Err(SerializerError::UnserializableType { type_name: "boolean" })
+    }
+
+    fn serialize_char(self, value: char) -> Result<Self::Ok, Self::Error> {
+        Ok(value.to_string())
+    }
+
+    fn serialize_i8(self, value: i8) -> Result<Self::Ok, Self::Error> {
+        Ok(value.to_string())
+    }
+
+    fn serialize_i16(self, value: i16) -> Result<Self::Ok, Self::Error> {
+        Ok(value.to_string())
+    }
+
+    fn serialize_i32(self, value: i32) -> Result<Self::Ok, Self::Error> {
+        Ok(value.to_string())
+    }
+
+    fn serialize_i64(self, value: i64) -> Result<Self::Ok, Self::Error> {
+        Ok(value.to_string())
+    }
+
+    fn serialize_u8(self, value: u8) -> Result<Self::Ok, Self::Error> {
+        Ok(value.to_string())
+    }
+
+    fn serialize_u16(self, value: u16) -> Result<Self::Ok, Self::Error> {
+        Ok(value.to_string())
+    }
+
+    fn serialize_u32(self, value: u32) -> Result<Self::Ok, Self::Error> {
+        Ok(value.to_string())
+    }
+
+    fn serialize_u64(self, value: u64) -> Result<Self::Ok, Self::Error> {
+        Ok(value.to_string())
+    }
+
+    fn serialize_f32(self, value: f32) -> Result<Self::Ok, Self::Error> {
+        Ok(value.to_string())
+    }
+
+    fn serialize_f64(self, value: f64) -> Result<Self::Ok, Self::Error> {
+        Ok(value.to_string())
+    }
+
+    fn serialize_str(self, value: &str) -> Result<Self::Ok, Self::Error> {
+        Ok(value.to_string())
+    }
+
+    fn serialize_bytes(self, _v: &[u8]) -> Result<Self::Ok, Self::Error> {
+        Err(SerializerError::UnserializableType { type_name: "&[u8]" })
+    }
+
+    fn serialize_none(self) -> Result<Self::Ok, Self::Error> {
+        Err(SerializerError::UnserializableType { type_name: "Option" })
+    }
+
+    fn serialize_some<T: ?Sized>(self, _value: &T) -> Result<Self::Ok, Self::Error>
+    where T: Serialize,
+    {
+        Err(SerializerError::UnserializableType { type_name: "Option" })
+    }
+
+    fn serialize_unit(self) -> Result<Self::Ok, Self::Error> {
+        Err(SerializerError::UnserializableType { type_name: "()" })
+    }
+
+    fn serialize_unit_struct(self, _name: &'static str) -> Result<Self::Ok, Self::Error> {
+        Err(SerializerError::UnserializableType { type_name: "unit struct" })
+    }
+
+    fn serialize_unit_variant(
+        self,
+        _name: &'static str,
+        _variant_index: u32,
+        _variant: &'static str
+    ) -> Result<Self::Ok, Self::Error>
+    {
+        Err(SerializerError::UnserializableType { type_name: "unit variant" })
+    }
+
+    fn serialize_newtype_struct<T: ?Sized>(
+        self,
+        _name: &'static str,
+        value: &T
+    ) -> Result<Self::Ok, Self::Error>
+    where T: Serialize,
+    {
+        value.serialize(self)
+    }
+
+    fn serialize_newtype_variant<T: ?Sized>(
+        self,
+        _name: &'static str,
+        _variant_index: u32,
+        _variant: &'static str,
+        _value: &T
+    ) -> Result<Self::Ok, Self::Error>
+    where T: Serialize,
+    {
+        Err(SerializerError::UnserializableType { type_name: "newtype variant" })
+    }
+
+    fn serialize_seq(self, _len: Option<usize>) -> Result<Self::SerializeSeq, Self::Error> {
+        Err(SerializerError::UnserializableType { type_name: "sequence" })
+    }
+
+    fn serialize_tuple(self, _len: usize) -> Result<Self::SerializeTuple, Self::Error> {
+        Err(SerializerError::UnserializableType { type_name: "tuple" })
+    }
+
+    fn serialize_tuple_struct(
+        self,
+        _name: &'static str,
+        _len: usize
+    ) -> Result<Self::SerializeTupleStruct, Self::Error>
+    {
+        Err(SerializerError::UnserializableType { type_name: "tuple struct" })
+    }
+
+    fn serialize_tuple_variant(
+        self,
+        _name: &'static str,
+        _variant_index: u32,
+        _variant: &'static str,
+        _len: usize
+    ) -> Result<Self::SerializeTupleVariant, Self::Error>
+    {
+        Err(SerializerError::UnserializableType { type_name: "tuple variant" })
+    }
+
+    fn serialize_map(self, _len: Option<usize>) -> Result<Self::SerializeMap, Self::Error> {
+        Err(SerializerError::UnserializableType { type_name: "map" })
+    }
+
+    fn serialize_struct(
+        self,
+        _name: &'static str,
+        _len: usize
+    ) -> Result<Self::SerializeStruct, Self::Error>
+    {
+        Err(SerializerError::UnserializableType { type_name: "struct" })
+    }
+
+    fn serialize_struct_variant(
+        self,
+        _name: &'static str,
+        _variant_index: u32,
+        _variant: &'static str,
+        _len: usize
+    ) -> Result<Self::SerializeStructVariant, Self::Error>
+    {
+        Err(SerializerError::UnserializableType { type_name: "struct variant" })
+    }
+}
--- a/meilidb-data/src/serde/deserializer.rs
+++ b/meilidb-data/src/serde/deserializer.rs
@ -0,0 +1,132 @@
+use std::collections::HashSet;
+use std::io::Cursor;
+use std::{fmt, error::Error};
+
+use meilidb_core::DocumentId;
+use meilidb_schema::SchemaAttr;
+use rmp_serde::decode::{Deserializer as RmpDeserializer, ReadReader};
+use rmp_serde::decode::{Error as RmpError};
+use serde::{de, forward_to_deserialize_any};
+
+use crate::database::Index;
+
+#[derive(Debug)]
+pub enum DeserializerError {
+    RmpError(RmpError),
+    RocksDbError(rocksdb::Error),
+    Custom(String),
+}
+
+impl de::Error for DeserializerError {
+    fn custom<T: fmt::Display>(msg: T) -> Self {
+        DeserializerError::Custom(msg.to_string())
+    }
+}
+
+impl fmt::Display for DeserializerError {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        match self {
+            DeserializerError::RmpError(e) => write!(f, "rmp serde related error: {}", e),
+            DeserializerError::RocksDbError(e) => write!(f, "RocksDB related error: {}", e),
+            DeserializerError::Custom(s) => f.write_str(s),
+        }
+    }
+}
+
+impl Error for DeserializerError {}
+
+impl From<RmpError> for DeserializerError {
+    fn from(error: RmpError) -> DeserializerError {
+        DeserializerError::RmpError(error)
+    }
+}
+
+impl From<rocksdb::Error> for DeserializerError {
+    fn from(error: rocksdb::Error) -> DeserializerError {
+        DeserializerError::RocksDbError(error)
+    }
+}
+
+pub struct Deserializer<'a> {
+    pub document_id: DocumentId,
+    pub index: &'a Index,
+    pub fields: Option<&'a HashSet<SchemaAttr>>,
+}
+
+impl<'de, 'a, 'b> de::Deserializer<'de> for &'b mut Deserializer<'a>
+{
+    type Error = DeserializerError;
+
+    fn deserialize_any<V>(self, visitor: V) -> Result<V::Value, Self::Error>
+    where V: de::Visitor<'de>
+    {
+        self.deserialize_map(visitor)
+    }
+
+    forward_to_deserialize_any! {
+        bool i8 i16 i32 i64 i128 u8 u16 u32 u64 u128 f32 f64 char str string
+        bytes byte_buf option unit unit_struct newtype_struct seq tuple
+        tuple_struct struct enum identifier ignored_any
+    }
+
+    fn deserialize_map<V>(self, visitor: V) -> Result<V::Value, Self::Error>
+    where V: de::Visitor<'de>
+    {
+        let schema = self.index.schema();
+        let documents = self.index.as_ref().documents_index;
+
+        let iter = documents
+            .document_fields(self.document_id)?
+            .filter_map(|(attr, value)| {
+                let is_displayed = schema.props(attr).is_displayed();
+                if is_displayed && self.fields.map_or(true, |f| f.contains(&attr)) {
+                    let attribute_name = schema.attribute_name(attr);
+                    Some((attribute_name, Value::new(value)))
+                } else {
+                    None
+                }
+            });
+
+        let map_deserializer = de::value::MapDeserializer::new(iter);
+        let result = visitor.visit_map(map_deserializer).map_err(DeserializerError::from);
+
+        result
+    }
+}
+
+struct Value<A>(RmpDeserializer<ReadReader<Cursor<A>>>) where A: AsRef<[u8]>;
+
+impl<A> Value<A> where A: AsRef<[u8]>
+{
+    fn new(value: A) -> Value<A> {
+        Value(RmpDeserializer::new(Cursor::new(value)))
+    }
+}
+
+impl<'de, A> de::IntoDeserializer<'de, RmpError> for Value<A>
+where A: AsRef<[u8]>,
+{
+    type Deserializer = Self;
+
+    fn into_deserializer(self) -> Self::Deserializer {
+        self
+    }
+}
+
+impl<'de, 'a, A> de::Deserializer<'de> for Value<A>
+where A: AsRef<[u8]>,
+{
+    type Error = RmpError;
+
+    fn deserialize_any<V>(mut self, visitor: V) -> Result<V::Value, Self::Error>
+    where V: de::Visitor<'de>
+    {
+        self.0.deserialize_any(visitor)
+    }
+
+    forward_to_deserialize_any! {
+        bool i8 i16 i32 i64 i128 u8 u16 u32 u64 u128 f32 f64 char str string
+        bytes byte_buf option unit unit_struct newtype_struct seq tuple
+        tuple_struct map struct enum identifier ignored_any
+    }
+}
--- a/meilidb-data/src/serde/extract_document_id.rs
+++ b/meilidb-data/src/serde/extract_document_id.rs
@ -0,0 +1,273 @@
+use std::hash::{Hash, Hasher};
+
+use meilidb_core::DocumentId;
+use serde::{ser, Serialize};
+use serde_json::Value;
+use siphasher::sip::SipHasher;
+
+use super::{SerializerError, ConvertToString};
+
+pub fn extract_document_id<D>(
+    identifier: &str,
+    document: &D,
+) -> Result<Option<DocumentId>, SerializerError>
+where D: serde::Serialize,
+{
+    let serializer = ExtractDocumentId { identifier };
+    document.serialize(serializer)
+}
+
+pub fn value_to_string(value: &Value) -> Option<String> {
+    match value {
+        Value::Null => None,
+        Value::Bool(_) => None,
+        Value::Number(value) => Some(value.to_string()),
+        Value::String(value) => Some(value.to_string()),
+        Value::Array(_) => None,
+        Value::Object(_) => None,
+    }
+}
+
+pub fn compute_document_id<H: Hash>(t: H) -> DocumentId {
+    let mut s = SipHasher::new();
+    t.hash(&mut s);
+    let hash = s.finish();
+    DocumentId(hash)
+}
+
+struct ExtractDocumentId<'a> {
+    identifier: &'a str,
+}
+
+impl<'a> ser::Serializer for ExtractDocumentId<'a> {
+    type Ok = Option<DocumentId>;
+    type Error = SerializerError;
+    type SerializeSeq = ser::Impossible<Self::Ok, Self::Error>;
+    type SerializeTuple = ser::Impossible<Self::Ok, Self::Error>;
+    type SerializeTupleStruct = ser::Impossible<Self::Ok, Self::Error>;
+    type SerializeTupleVariant = ser::Impossible<Self::Ok, Self::Error>;
+    type SerializeMap = ExtractDocumentIdMapSerializer<'a>;
+    type SerializeStruct = ExtractDocumentIdStructSerializer<'a>;
+    type SerializeStructVariant = ser::Impossible<Self::Ok, Self::Error>;
+
+    forward_to_unserializable_type! {
+        bool => serialize_bool,
+        char => serialize_char,
+
+        i8  => serialize_i8,
+        i16 => serialize_i16,
+        i32 => serialize_i32,
+        i64 => serialize_i64,
+
+        u8  => serialize_u8,
+        u16 => serialize_u16,
+        u32 => serialize_u32,
+        u64 => serialize_u64,
+
+        f32 => serialize_f32,
+        f64 => serialize_f64,
+    }
+
+    fn serialize_str(self, _value: &str) -> Result<Self::Ok, Self::Error> {
+        Err(SerializerError::UnserializableType { type_name: "str" })
+    }
+
+    fn serialize_bytes(self, _value: &[u8]) -> Result<Self::Ok, Self::Error> {
+        Err(SerializerError::UnserializableType { type_name: "&[u8]" })
+    }
+
+    fn serialize_none(self) -> Result<Self::Ok, Self::Error> {
+        Err(SerializerError::UnserializableType { type_name: "Option" })
+    }
+
+    fn serialize_some<T: ?Sized>(self, _value: &T) -> Result<Self::Ok, Self::Error>
+    where T: Serialize,
+    {
+        Err(SerializerError::UnserializableType { type_name: "Option" })
+    }
+
+    fn serialize_unit(self) -> Result<Self::Ok, Self::Error> {
+        Err(SerializerError::UnserializableType { type_name: "()" })
+    }
+
+    fn serialize_unit_struct(self, _name: &'static str) -> Result<Self::Ok, Self::Error> {
+        Err(SerializerError::UnserializableType { type_name: "unit struct" })
+    }
+
+    fn serialize_unit_variant(
+        self,
+        _name: &'static str,
+        _variant_index: u32,
+        _variant: &'static str
+    ) -> Result<Self::Ok, Self::Error>
+    {
+        Err(SerializerError::UnserializableType { type_name: "unit variant" })
+    }
+
+    fn serialize_newtype_struct<T: ?Sized>(
+        self,
+        _name: &'static str,
+        value: &T
+    ) -> Result<Self::Ok, Self::Error>
+    where T: Serialize,
+    {
+        value.serialize(self)
+    }
+
+    fn serialize_newtype_variant<T: ?Sized>(
+        self,
+        _name: &'static str,
+        _variant_index: u32,
+        _variant: &'static str,
+        _value: &T
+    ) -> Result<Self::Ok, Self::Error>
+    where T: Serialize,
+    {
+        Err(SerializerError::UnserializableType { type_name: "newtype variant" })
+    }
+
+    fn serialize_seq(self, _len: Option<usize>) -> Result<Self::SerializeSeq, Self::Error> {
+        Err(SerializerError::UnserializableType { type_name: "sequence" })
+    }
+
+    fn serialize_tuple(self, _len: usize) -> Result<Self::SerializeTuple, Self::Error> {
+        Err(SerializerError::UnserializableType { type_name: "tuple" })
+    }
+
+    fn serialize_tuple_struct(
+        self,
+        _name: &'static str,
+        _len: usize
+    ) -> Result<Self::SerializeTupleStruct, Self::Error>
+    {
+        Err(SerializerError::UnserializableType { type_name: "tuple struct" })
+    }
+
+    fn serialize_tuple_variant(
+        self,
+        _name: &'static str,
+        _variant_index: u32,
+        _variant: &'static str,
+        _len: usize
+    ) -> Result<Self::SerializeTupleVariant, Self::Error>
+    {
+        Err(SerializerError::UnserializableType { type_name: "tuple variant" })
+    }
+
+    fn serialize_map(self, _len: Option<usize>) -> Result<Self::SerializeMap, Self::Error> {
+        let serializer = ExtractDocumentIdMapSerializer {
+            identifier: self.identifier,
+            document_id: None,
+            current_key_name: None,
+        };
+
+        Ok(serializer)
+    }
+
+    fn serialize_struct(
+        self,
+        _name: &'static str,
+        _len: usize
+    ) -> Result<Self::SerializeStruct, Self::Error>
+    {
+        let serializer = ExtractDocumentIdStructSerializer {
+            identifier: self.identifier,
+            document_id: None,
+        };
+
+        Ok(serializer)
+    }
+
+    fn serialize_struct_variant(
+        self,
+        _name: &'static str,
+        _variant_index: u32,
+        _variant: &'static str,
+        _len: usize
+    ) -> Result<Self::SerializeStructVariant, Self::Error>
+    {
+        Err(SerializerError::UnserializableType { type_name: "struct variant" })
+    }
+}
+
+pub struct ExtractDocumentIdMapSerializer<'a> {
+    identifier: &'a str,
+    document_id: Option<DocumentId>,
+    current_key_name: Option<String>,
+}
+
+impl<'a> ser::SerializeMap for ExtractDocumentIdMapSerializer<'a> {
+    type Ok = Option<DocumentId>;
+    type Error = SerializerError;
+
+    fn serialize_key<T: ?Sized>(&mut self, key: &T) -> Result<(), Self::Error>
+    where T: Serialize,
+    {
+        let key = key.serialize(ConvertToString)?;
+        self.current_key_name = Some(key);
+        Ok(())
+    }
+
+    fn serialize_value<T: ?Sized>(&mut self, value: &T) -> Result<(), Self::Error>
+    where T: Serialize,
+    {
+        let key = self.current_key_name.take().unwrap();
+        self.serialize_entry(&key, value)
+    }
+
+    fn serialize_entry<K: ?Sized, V: ?Sized>(
+        &mut self,
+        key: &K,
+        value: &V
+    ) -> Result<(), Self::Error>
+    where K: Serialize, V: Serialize,
+    {
+        let key = key.serialize(ConvertToString)?;
+
+        if self.identifier == key {
+            let value = serde_json::to_string(value).and_then(|s| serde_json::from_str(&s))?;
+            match value_to_string(&value).map(|s| compute_document_id(&s)) {
+                Some(document_id) => self.document_id = Some(document_id),
+                None => return Err(SerializerError::InvalidDocumentIdType),
+            }
+        }
+
+        Ok(())
+    }
+
+    fn end(self) -> Result<Self::Ok, Self::Error> {
+        Ok(self.document_id)
+    }
+}
+
+pub struct ExtractDocumentIdStructSerializer<'a> {
+    identifier: &'a str,
+    document_id: Option<DocumentId>,
+}
+
+impl<'a> ser::SerializeStruct for ExtractDocumentIdStructSerializer<'a> {
+    type Ok = Option<DocumentId>;
+    type Error = SerializerError;
+
+    fn serialize_field<T: ?Sized>(
+        &mut self,
+        key: &'static str,
+        value: &T
+    ) -> Result<(), Self::Error>
+    where T: Serialize,
+    {
+        if self.identifier == key {
+            let value = serde_json::to_string(value).and_then(|s| serde_json::from_str(&s))?;
+            match value_to_string(&value).map(compute_document_id) {
+                Some(document_id) => self.document_id = Some(document_id),
+                None => return Err(SerializerError::InvalidDocumentIdType),
+            }
+        }
+
+        Ok(())
+    }
+
+    fn end(self) -> Result<Self::Ok, Self::Error> {
+        Ok(self.document_id)
+    }
+}
--- a/meilidb-data/src/serde/indexer.rs
+++ b/meilidb-data/src/serde/indexer.rs
@ -0,0 +1,336 @@
+use meilidb_core::DocumentId;
+use meilidb_schema::SchemaAttr;
+use serde::ser;
+use serde::Serialize;
+
+use crate::indexer::Indexer as RawIndexer;
+use super::{SerializerError, ConvertToString};
+
+pub struct Indexer<'a> {
+    pub attribute: SchemaAttr,
+    pub indexer: &'a mut RawIndexer,
+    pub document_id: DocumentId,
+}
+
+impl<'a> ser::Serializer for Indexer<'a> {
+    type Ok = ();
+    type Error = SerializerError;
+    type SerializeSeq = SeqIndexer<'a>;
+    type SerializeTuple = TupleIndexer<'a>;
+    type SerializeTupleStruct = ser::Impossible<Self::Ok, Self::Error>;
+    type SerializeTupleVariant = ser::Impossible<Self::Ok, Self::Error>;
+    type SerializeMap = MapIndexer<'a>;
+    type SerializeStruct = StructSerializer<'a>;
+    type SerializeStructVariant = ser::Impossible<Self::Ok, Self::Error>;
+
+    fn serialize_bool(self, _value: bool) -> Result<Self::Ok, Self::Error> {
+        Err(SerializerError::UnindexableType { type_name: "boolean" })
+    }
+
+    fn serialize_char(self, value: char) -> Result<Self::Ok, Self::Error> {
+        let text = value.serialize(ConvertToString)?;
+        self.serialize_str(&text)
+    }
+
+    fn serialize_i8(self, value: i8) -> Result<Self::Ok, Self::Error> {
+        let text = value.serialize(ConvertToString)?;
+        self.serialize_str(&text)
+    }
+
+    fn serialize_i16(self, value: i16) -> Result<Self::Ok, Self::Error> {
+        let text = value.serialize(ConvertToString)?;
+        self.serialize_str(&text)
+    }
+
+    fn serialize_i32(self, value: i32) -> Result<Self::Ok, Self::Error> {
+        let text = value.serialize(ConvertToString)?;
+        self.serialize_str(&text)
+    }
+
+    fn serialize_i64(self, value: i64) -> Result<Self::Ok, Self::Error> {
+        let text = value.serialize(ConvertToString)?;
+        self.serialize_str(&text)
+    }
+
+    fn serialize_u8(self, value: u8) -> Result<Self::Ok, Self::Error> {
+        let text = value.serialize(ConvertToString)?;
+        self.serialize_str(&text)
+    }
+
+    fn serialize_u16(self, value: u16) -> Result<Self::Ok, Self::Error> {
+        let text = value.serialize(ConvertToString)?;
+        self.serialize_str(&text)
+    }
+
+    fn serialize_u32(self, value: u32) -> Result<Self::Ok, Self::Error> {
+        let text = value.serialize(ConvertToString)?;
+        self.serialize_str(&text)
+    }
+
+    fn serialize_u64(self, value: u64) -> Result<Self::Ok, Self::Error> {
+        let text = value.serialize(ConvertToString)?;
+        self.serialize_str(&text)
+    }
+
+    fn serialize_f32(self, value: f32) -> Result<Self::Ok, Self::Error> {
+        let text = value.serialize(ConvertToString)?;
+        self.serialize_str(&text)
+    }
+
+    fn serialize_f64(self, value: f64) -> Result<Self::Ok, Self::Error> {
+        let text = value.serialize(ConvertToString)?;
+        self.serialize_str(&text)
+    }
+
+    fn serialize_str(self, text: &str) -> Result<Self::Ok, Self::Error> {
+        self.indexer.index_text(self.document_id, self.attribute, text);
+        Ok(())
+    }
+
+    fn serialize_bytes(self, _v: &[u8]) -> Result<Self::Ok, Self::Error> {
+        Err(SerializerError::UnindexableType { type_name: "&[u8]" })
+    }
+
+    fn serialize_none(self) -> Result<Self::Ok, Self::Error> {
+        Err(SerializerError::UnindexableType { type_name: "Option" })
+    }
+
+    fn serialize_some<T: ?Sized>(self, value: &T) -> Result<Self::Ok, Self::Error>
+    where T: ser::Serialize,
+    {
+        let text = value.serialize(ConvertToString)?;
+        self.indexer.index_text(self.document_id, self.attribute, &text);
+        Ok(())
+    }
+
+    fn serialize_unit(self) -> Result<Self::Ok, Self::Error> {
+        Err(SerializerError::UnindexableType { type_name: "()" })
+    }
+
+    fn serialize_unit_struct(self, _name: &'static str) -> Result<Self::Ok, Self::Error> {
+        Err(SerializerError::UnindexableType { type_name: "unit struct" })
+    }
+
+    fn serialize_unit_variant(
+        self,
+        _name: &'static str,
+        _variant_index: u32,
+        _variant: &'static str
+    ) -> Result<Self::Ok, Self::Error>
+    {
+        Err(SerializerError::UnindexableType { type_name: "unit variant" })
+    }
+
+    fn serialize_newtype_struct<T: ?Sized>(
+        self,
+        _name: &'static str,
+        value: &T
+    ) -> Result<Self::Ok, Self::Error>
+    where T: ser::Serialize,
+    {
+        value.serialize(self)
+    }
+
+    fn serialize_newtype_variant<T: ?Sized>(
+        self,
+        _name: &'static str,
+        _variant_index: u32,
+        _variant: &'static str,
+        _value: &T
+    ) -> Result<Self::Ok, Self::Error>
+    where T: ser::Serialize,
+    {
+        Err(SerializerError::UnindexableType { type_name: "newtype variant" })
+    }
+
+    fn serialize_seq(self, _len: Option<usize>) -> Result<Self::SerializeSeq, Self::Error> {
+        let indexer = SeqIndexer {
+            attribute: self.attribute,
+            document_id: self.document_id,
+            indexer: self.indexer,
+            texts: Vec::new(),
+        };
+
+        Ok(indexer)
+    }
+
+    fn serialize_tuple(self, _len: usize) -> Result<Self::SerializeTuple, Self::Error> {
+        let indexer = TupleIndexer {
+            attribute: self.attribute,
+            document_id: self.document_id,
+            indexer: self.indexer,
+            texts: Vec::new(),
+        };
+
+        Ok(indexer)
+    }
+
+    fn serialize_tuple_struct(
+        self,
+        _name: &'static str,
+        _len: usize
+    ) -> Result<Self::SerializeTupleStruct, Self::Error>
+    {
+        Err(SerializerError::UnindexableType { type_name: "tuple struct" })
+    }
+
+    fn serialize_tuple_variant(
+        self,
+        _name: &'static str,
+        _variant_index: u32,
+        _variant: &'static str,
+        _len: usize
+    ) -> Result<Self::SerializeTupleVariant, Self::Error>
+    {
+        Err(SerializerError::UnindexableType { type_name: "tuple variant" })
+    }
+
+    fn serialize_map(self, _len: Option<usize>) -> Result<Self::SerializeMap, Self::Error> {
+        let indexer = MapIndexer {
+            attribute: self.attribute,
+            document_id: self.document_id,
+            indexer: self.indexer,
+            texts: Vec::new(),
+        };
+
+        Ok(indexer)
+    }
+
+    fn serialize_struct(
+        self,
+        _name: &'static str,
+        _len: usize
+    ) -> Result<Self::SerializeStruct, Self::Error>
+    {
+        Err(SerializerError::UnindexableType { type_name: "struct" })
+    }
+
+    fn serialize_struct_variant(
+        self,
+        _name: &'static str,
+        _variant_index: u32,
+        _variant: &'static str,
+        _len: usize
+    ) -> Result<Self::SerializeStructVariant, Self::Error>
+    {
+        Err(SerializerError::UnindexableType { type_name: "struct variant" })
+    }
+}
+
+pub struct SeqIndexer<'a> {
+    attribute: SchemaAttr,
+    document_id: DocumentId,
+    indexer: &'a mut RawIndexer,
+    texts: Vec<String>,
+}
+
+impl<'a> ser::SerializeSeq for SeqIndexer<'a> {
+    type Ok = ();
+    type Error = SerializerError;
+
+    fn serialize_element<T: ?Sized>(&mut self, value: &T) -> Result<(), Self::Error>
+    where T: ser::Serialize
+    {
+        let text = value.serialize(ConvertToString)?;
+        self.texts.push(text);
+        Ok(())
+    }
+
+    fn end(self) -> Result<Self::Ok, Self::Error> {
+        let texts = self.texts.iter().map(String::as_str);
+        self.indexer.index_text_seq(self.document_id, self.attribute, texts);
+        Ok(())
+    }
+}
+
+pub struct MapIndexer<'a> {
+    attribute: SchemaAttr,
+    document_id: DocumentId,
+    indexer: &'a mut RawIndexer,
+    texts: Vec<String>,
+}
+
+impl<'a> ser::SerializeMap for MapIndexer<'a> {
+    type Ok = ();
+    type Error = SerializerError;
+
+    fn serialize_key<T: ?Sized>(&mut self, key: &T) -> Result<(), Self::Error>
+    where T: ser::Serialize,
+    {
+        let text = key.serialize(ConvertToString)?;
+        self.texts.push(text);
+        Ok(())
+    }
+
+    fn serialize_value<T: ?Sized>(&mut self, value: &T) -> Result<(), Self::Error>
+    where T: ser::Serialize,
+    {
+        let text = value.serialize(ConvertToString)?;
+        self.texts.push(text);
+        Ok(())
+    }
+
+    fn end(self) -> Result<Self::Ok, Self::Error> {
+        let texts = self.texts.iter().map(String::as_str);
+        self.indexer.index_text_seq(self.document_id, self.attribute, texts);
+        Ok(())
+    }
+}
+
+pub struct StructSerializer<'a> {
+    attribute: SchemaAttr,
+    document_id: DocumentId,
+    indexer: &'a mut RawIndexer,
+    texts: Vec<String>,
+}
+
+impl<'a> ser::SerializeStruct for StructSerializer<'a> {
+    type Ok = ();
+    type Error = SerializerError;
+
+    fn serialize_field<T: ?Sized>(
+        &mut self,
+        key: &'static str,
+        value: &T,
+    ) -> Result<(), Self::Error>
+    where T: ser::Serialize,
+    {
+        let key_text = key.to_owned();
+        let value_text = value.serialize(ConvertToString)?;
+        self.texts.push(key_text);
+        self.texts.push(value_text);
+        Ok(())
+    }
+
+    fn end(self) -> Result<Self::Ok, Self::Error> {
+        let texts = self.texts.iter().map(String::as_str);
+        self.indexer.index_text_seq(self.document_id, self.attribute, texts);
+        Ok(())
+    }
+}
+
+pub struct TupleIndexer<'a> {
+    attribute: SchemaAttr,
+    document_id: DocumentId,
+    indexer: &'a mut RawIndexer,
+    texts: Vec<String>,
+}
+
+impl<'a> ser::SerializeTuple for TupleIndexer<'a> {
+    type Ok = ();
+    type Error = SerializerError;
+
+    fn serialize_element<T: ?Sized>(&mut self, value: &T) -> Result<(), Self::Error>
+    where T: Serialize
+    {
+        let text = value.serialize(ConvertToString)?;
+        self.texts.push(text);
+        Ok(())
+    }
+
+    fn end(self) -> Result<Self::Ok, Self::Error> {
+        let texts = self.texts.iter().map(String::as_str);
+        self.indexer.index_text_seq(self.document_id, self.attribute, texts);
+        Ok(())
+    }
+}
--- a/meilidb-data/src/serde/mod.rs
+++ b/meilidb-data/src/serde/mod.rs
@ -0,0 +1,131 @@
+macro_rules! forward_to_unserializable_type {
+    ($($ty:ident => $se_method:ident,)*) => {
+        $(
+            fn $se_method(self, _v: $ty) -> Result<Self::Ok, Self::Error> {
+                Err(SerializerError::UnserializableType { type_name: "$ty" })
+            }
+        )*
+    }
+}
+
+mod convert_to_number;
+mod convert_to_string;
+mod deserializer;
+mod extract_document_id;
+mod indexer;
+mod serializer;
+
+pub use self::deserializer::{Deserializer, DeserializerError};
+pub use self::extract_document_id::{extract_document_id, compute_document_id, value_to_string};
+pub use self::convert_to_string::ConvertToString;
+pub use self::convert_to_number::ConvertToNumber;
+pub use self::indexer::Indexer;
+pub use self::serializer::Serializer;
+
+use std::collections::BTreeMap;
+use std::{fmt, error::Error};
+
+use meilidb_core::DocumentId;
+use meilidb_schema::SchemaAttr;
+use rmp_serde::encode::Error as RmpError;
+use serde_json::Error as SerdeJsonError;
+use serde::ser;
+
+use crate::number::ParseNumberError;
+
+#[derive(Debug)]
+pub enum SerializerError {
+    DocumentIdNotFound,
+    InvalidDocumentIdType,
+    RmpError(RmpError),
+    RocksDbError(rocksdb::Error),
+    SerdeJsonError(SerdeJsonError),
+    ParseNumberError(ParseNumberError),
+    UnserializableType { type_name: &'static str },
+    UnindexableType { type_name: &'static str },
+    UnrankableType { type_name: &'static str },
+    Custom(String),
+}
+
+impl ser::Error for SerializerError {
+    fn custom<T: fmt::Display>(msg: T) -> Self {
+        SerializerError::Custom(msg.to_string())
+    }
+}
+
+impl fmt::Display for SerializerError {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        match self {
+            SerializerError::DocumentIdNotFound => {
+                write!(f, "serialized document does not have an id according to the schema")
+            },
+            SerializerError::InvalidDocumentIdType => {
+                write!(f, "document identifier can only be of type string or number")
+            },
+            SerializerError::RmpError(e) => write!(f, "rmp serde related error: {}", e),
+            SerializerError::RocksDbError(e) => write!(f, "RocksDB related error: {}", e),
+            SerializerError::SerdeJsonError(e) => write!(f, "serde json error: {}", e),
+            SerializerError::ParseNumberError(e) => {
+                write!(f, "error while trying to parse a number: {}", e)
+            },
+            SerializerError::UnserializableType { type_name } => {
+                write!(f, "{} are not a serializable type", type_name)
+            },
+            SerializerError::UnindexableType { type_name } => {
+                write!(f, "{} are not an indexable type", type_name)
+            },
+            SerializerError::UnrankableType { type_name } => {
+                write!(f, "{} types can not be used for ranking", type_name)
+            },
+            SerializerError::Custom(s) => f.write_str(s),
+        }
+    }
+}
+
+impl Error for SerializerError {}
+
+impl From<String> for SerializerError {
+    fn from(value: String) -> SerializerError {
+        SerializerError::Custom(value)
+    }
+}
+
+impl From<RmpError> for SerializerError {
+    fn from(error: RmpError) -> SerializerError {
+        SerializerError::RmpError(error)
+    }
+}
+
+impl From<SerdeJsonError> for SerializerError {
+    fn from(error: SerdeJsonError) -> SerializerError {
+        SerializerError::SerdeJsonError(error)
+    }
+}
+
+impl From<rocksdb::Error> for SerializerError {
+    fn from(error: rocksdb::Error) -> SerializerError {
+        SerializerError::RocksDbError(error)
+    }
+}
+
+impl From<ParseNumberError> for SerializerError {
+    fn from(error: ParseNumberError) -> SerializerError {
+        SerializerError::ParseNumberError(error)
+    }
+}
+
+pub struct RamDocumentStore(BTreeMap<(DocumentId, SchemaAttr), Vec<u8>>);
+
+impl RamDocumentStore {
+    pub fn new() -> RamDocumentStore {
+        RamDocumentStore(BTreeMap::new())
+    }
+
+    pub fn set_document_field(&mut self, id: DocumentId, attr: SchemaAttr, value: Vec<u8>) {
+        self.0.insert((id, attr), value);
+    }
+
+    pub fn into_inner(self) -> BTreeMap<(DocumentId, SchemaAttr), Vec<u8>> {
+        self.0
+    }
+}
--- a/meilidb-data/src/serde/serializer.rs
+++ b/meilidb-data/src/serde/serializer.rs
@ -0,0 +1,287 @@
+use meilidb_core::DocumentId;
+use meilidb_schema::Schema;
+use serde::ser;
+
+use crate::indexer::Indexer as RawIndexer;
+use crate::ranked_map::RankedMap;
+use super::{RamDocumentStore, SerializerError, ConvertToString, ConvertToNumber, Indexer};
+
+pub struct Serializer<'a> {
+    pub schema: &'a Schema,
+    pub document_store: &'a mut RamDocumentStore,
+    pub indexer: &'a mut RawIndexer,
+    pub ranked_map: &'a mut RankedMap,
+    pub document_id: DocumentId,
+}
+
+impl<'a> ser::Serializer for Serializer<'a> {
+    type Ok = ();
+    type Error = SerializerError;
+    type SerializeSeq = ser::Impossible<Self::Ok, Self::Error>;
+    type SerializeTuple = ser::Impossible<Self::Ok, Self::Error>;
+    type SerializeTupleStruct = ser::Impossible<Self::Ok, Self::Error>;
+    type SerializeTupleVariant = ser::Impossible<Self::Ok, Self::Error>;
+    type SerializeMap = MapSerializer<'a>;
+    type SerializeStruct = StructSerializer<'a>;
+    type SerializeStructVariant = ser::Impossible<Self::Ok, Self::Error>;
+
+    forward_to_unserializable_type! {
+        bool => serialize_bool,
+        char => serialize_char,
+
+        i8  => serialize_i8,
+        i16 => serialize_i16,
+        i32 => serialize_i32,
+        i64 => serialize_i64,
+
+        u8  => serialize_u8,
+        u16 => serialize_u16,
+        u32 => serialize_u32,
+        u64 => serialize_u64,
+
+        f32 => serialize_f32,
+        f64 => serialize_f64,
+    }
+
+    fn serialize_str(self, _v: &str) -> Result<Self::Ok, Self::Error> {
+        Err(SerializerError::UnserializableType { type_name: "str" })
+    }
+
+    fn serialize_bytes(self, _v: &[u8]) -> Result<Self::Ok, Self::Error> {
+        Err(SerializerError::UnserializableType { type_name: "&[u8]" })
+    }
+
+    fn serialize_none(self) -> Result<Self::Ok, Self::Error> {
+        Err(SerializerError::UnserializableType { type_name: "Option" })
+    }
+
+    fn serialize_some<T: ?Sized>(self, _value: &T) -> Result<Self::Ok, Self::Error>
+    where T: ser::Serialize,
+    {
+        Err(SerializerError::UnserializableType { type_name: "Option" })
+    }
+
+    fn serialize_unit(self) -> Result<Self::Ok, Self::Error> {
+        Err(SerializerError::UnserializableType { type_name: "()" })
+    }
+
+    fn serialize_unit_struct(self, _name: &'static str) -> Result<Self::Ok, Self::Error> {
+        Err(SerializerError::UnserializableType { type_name: "unit struct" })
+    }
+
+    fn serialize_unit_variant(
+        self,
+        _name: &'static str,
+        _variant_index: u32,
+        _variant: &'static str
+    ) -> Result<Self::Ok, Self::Error>
+    {
+        Err(SerializerError::UnserializableType { type_name: "unit variant" })
+    }
+
+    fn serialize_newtype_struct<T: ?Sized>(
+        self,
+        _name: &'static str,
+        value: &T
+    ) -> Result<Self::Ok, Self::Error>
+    where T: ser::Serialize,
+    {
+        value.serialize(self)
+    }
+
+    fn serialize_newtype_variant<T: ?Sized>(
+        self,
+        _name: &'static str,
+        _variant_index: u32,
+        _variant: &'static str,
+        _value: &T
+    ) -> Result<Self::Ok, Self::Error>
+    where T: ser::Serialize,
+    {
+        Err(SerializerError::UnserializableType { type_name: "newtype variant" })
+    }
+
+    fn serialize_seq(self, _len: Option<usize>) -> Result<Self::SerializeSeq, Self::Error> {
+        Err(SerializerError::UnserializableType { type_name: "sequence" })
+    }
+
+    fn serialize_tuple(self, _len: usize) -> Result<Self::SerializeTuple, Self::Error> {
+        Err(SerializerError::UnserializableType { type_name: "tuple" })
+    }
+
+    fn serialize_tuple_struct(
+        self,
+        _name: &'static str,
+        _len: usize
+    ) -> Result<Self::SerializeTupleStruct, Self::Error>
+    {
+        Err(SerializerError::UnserializableType { type_name: "tuple struct" })
+    }
+
+    fn serialize_tuple_variant(
+        self,
+        _name: &'static str,
+        _variant_index: u32,
+        _variant: &'static str,
+        _len: usize
+    ) -> Result<Self::SerializeTupleVariant, Self::Error>
+    {
+        Err(SerializerError::UnserializableType { type_name: "tuple variant" })
+    }
+
+    fn serialize_map(self, _len: Option<usize>) -> Result<Self::SerializeMap, Self::Error> {
+        Ok(MapSerializer {
+            schema: self.schema,
+            document_id: self.document_id,
+            document_store: self.document_store,
+            indexer: self.indexer,
+            ranked_map: self.ranked_map,
+            current_key_name: None,
+        })
+    }
+
+    fn serialize_struct(
+        self,
+        _name: &'static str,
+        _len: usize
+    ) -> Result<Self::SerializeStruct, Self::Error>
+    {
+        Ok(StructSerializer {
+            schema: self.schema,
+            document_id: self.document_id,
+            document_store: self.document_store,
+            indexer: self.indexer,
+            ranked_map: self.ranked_map,
+        })
+    }
+
+    fn serialize_struct_variant(
+        self,
+        _name: &'static str,
+        _variant_index: u32,
+        _variant: &'static str,
+        _len: usize
+    ) -> Result<Self::SerializeStructVariant, Self::Error>
+    {
+        Err(SerializerError::UnserializableType { type_name: "struct variant" })
+    }
+}
+
+pub struct MapSerializer<'a> {
+    schema: &'a Schema,
+    document_id: DocumentId,
+    document_store: &'a mut RamDocumentStore,
+    indexer: &'a mut RawIndexer,
+    ranked_map: &'a mut RankedMap,
+    current_key_name: Option<String>,
+}
+
+impl<'a> ser::SerializeMap for MapSerializer<'a> {
+    type Ok = ();
+    type Error = SerializerError;
+
+    fn serialize_key<T: ?Sized>(&mut self, key: &T) -> Result<(), Self::Error>
+    where T: ser::Serialize,
+    {
+        let key = key.serialize(ConvertToString)?;
+        self.current_key_name = Some(key);
+        Ok(())
+    }
+
+    fn serialize_value<T: ?Sized>(&mut self, value: &T) -> Result<(), Self::Error>
+    where T: ser::Serialize,
+    {
+        let key = self.current_key_name.take().unwrap();
+        self.serialize_entry(&key, value)
+    }
+
+    fn serialize_entry<K: ?Sized, V: ?Sized>(
+        &mut self,
+        key: &K,
+        value: &V,
+    ) -> Result<(), Self::Error>
+    where K: ser::Serialize, V: ser::Serialize,
+    {
+        let key = key.serialize(ConvertToString)?;
+
+        serialize_value(
+            self.schema,
+            self.document_id,
+            self.document_store,
+            self.indexer,
+            self.ranked_map,
+            &key,
+            value,
+        )
+    }
+
+    fn end(self) -> Result<Self::Ok, Self::Error> {
+        Ok(())
+    }
+}
+
+pub struct StructSerializer<'a> {
+    schema: &'a Schema,
+    document_id: DocumentId,
+    document_store: &'a mut RamDocumentStore,
+    indexer: &'a mut RawIndexer,
+    ranked_map: &'a mut RankedMap,
+}
+
+impl<'a> ser::SerializeStruct for StructSerializer<'a> {
+    type Ok = ();
+    type Error = SerializerError;
+
+    fn serialize_field<T: ?Sized>(
+        &mut self,
+        key: &'static str,
+        value: &T,
+    ) -> Result<(), Self::Error>
+    where T: ser::Serialize,
+    {
+        serialize_value(
+            self.schema,
+            self.document_id,
+            self.document_store,
+            self.indexer,
+            self.ranked_map,
+            key,
+            value,
+        )
+    }
+
+    fn end(self) -> Result<Self::Ok, Self::Error> {
+        Ok(())
+    }
+}
+
+fn serialize_value<T: ?Sized>(
+    schema: &Schema,
+    document_id: DocumentId,
+    document_store: &mut RamDocumentStore,
+    indexer: &mut RawIndexer,
+    ranked_map: &mut RankedMap,
+    key: &str,
+    value: &T,
+) -> Result<(), SerializerError>
+where T: ser::Serialize,
+{
+    if let Some(attribute) = schema.attribute(key) {
+        let props = schema.props(attribute);
+
+        let serialized = rmp_serde::to_vec_named(value)?;
+        document_store.set_document_field(document_id, attribute, serialized);
+
+        if props.is_indexed() {
+            let indexer = Indexer { attribute, indexer, document_id };
+            value.serialize(indexer)?;
+        }
+
+        if props.is_ranked() {
+            let number = value.serialize(ConvertToNumber)?;
+            ranked_map.insert(document_id, attribute, number);
+        }
+    }
+
+    Ok(())
+}
--- a/meilidb-data/tests/common.rs
+++ b/meilidb-data/tests/common.rs
@ -0,0 +1,15 @@
+use meilidb_data::{Database};
+use meilidb_data::Index;
+use meilidb_schema::{SchemaBuilder, DISPLAYED, INDEXED};
+
+pub fn simple_index() -> Index {
+    let tmp_dir = tempfile::tempdir().unwrap();
+    let database = Database::open(&tmp_dir).unwrap();
+
+    let mut builder = SchemaBuilder::with_identifier("objectId");
+    builder.new_attribute("objectId", DISPLAYED | INDEXED);
+    builder.new_attribute("title", DISPLAYED | INDEXED);
+    let schema = builder.build();
+
+    database.create_index("hello", schema).unwrap()
+}
--- a/meilidb-data/tests/custom_settings_index.rs
+++ b/meilidb-data/tests/custom_settings_index.rs
@ -0,0 +1,43 @@
+#[macro_use] extern crate maplit;
+
+mod common;
+
+use big_s::S;
+use meilidb_data::RankingOrdering;
+
+#[test]
+fn stop_words() {
+    let index = common::simple_index();
+    let stop_words = hashset!{ S("le"), S("la"), S("les"), };
+    index.custom_settings().set_stop_words(&stop_words).unwrap();
+    let ret_stop_words = index.custom_settings().get_stop_words().unwrap().unwrap();
+    assert_eq!(ret_stop_words, stop_words);
+}
+
+#[test]
+fn ranking_order() {
+    let index = common::simple_index();
+    let ranking_order = vec![S("SumOfTypos"), S("NumberOfWords"), S("WordsProximity"), S("SumOfWordsAttribute"), S("SumOfWordsPosition"), S("Exact"), S("DocumentId")];
+    index.custom_settings().set_ranking_order(&ranking_order).unwrap();
+    let ret_ranking_orderer = index.custom_settings().get_ranking_order().unwrap().unwrap();
+    assert_eq!(ret_ranking_orderer, ranking_order);
+}
+
+#[test]
+fn distinct_field() {
+    let index = common::simple_index();
+    let distinct_field = S("title");
+    index.custom_settings().set_distinct_field(&distinct_field).unwrap();
+    let ret_distinct_field = index.custom_settings().get_distinct_field().unwrap().unwrap();
+    assert_eq!(ret_distinct_field, distinct_field);
+}
+
+#[test]
+fn ranking_rules() {
+    let index = common::simple_index();
+    let ranking_rules = hashmap!{ S("objectId") => RankingOrdering::Asc };
+    index.custom_settings().set_ranking_rules(&ranking_rules).unwrap();
+    let ret_ranking_rules = index.custom_settings().get_ranking_rules().unwrap().unwrap();
+    assert_eq!(ret_ranking_rules, ranking_rules);
+}
+
--- a/meilidb-data/tests/database.rs
+++ b/meilidb-data/tests/database.rs
@ -0,0 +1,67 @@
+#[macro_use] extern crate maplit;
+
+mod common;
+
+use std::sync::atomic::{AtomicBool, Ordering::Relaxed};
+use std::sync::Arc;
+
+use big_s::S;
+use serde_json::json;
+
+#[test]
+fn database_stats() {
+    let index = common::simple_index();
+    let as_been_updated = Arc::new(AtomicBool::new(false));
+
+    let as_been_updated_clone = as_been_updated.clone();
+    index.set_update_callback(move |_| as_been_updated_clone.store(true, Relaxed));
+
+    let doc1 = json!({ "objectId": 123, "title": "hello" });
+
+    let mut addition = index.documents_addition();
+    addition.update_document(&doc1);
+    let update_id = addition.finalize().unwrap();
+    let status = index.update_status_blocking(update_id).unwrap();
+    assert!(as_been_updated.swap(false, Relaxed));
+    assert!(status.result.is_ok());
+    let stats = index.stats().unwrap();
+    let repartition = hashmap!{
+        S("objectId") => 1u64,
+        S("title") => 1u64,
+    };
+    assert_eq!(stats.number_of_documents, 1);
+    assert_eq!(stats.documents_fields_repartition, repartition);
+
+    let doc2 = json!({ "objectId": 456, "title": "world" });
+
+    let mut addition = index.documents_addition();
+    addition.update_document(&doc2);
+    let update_id = addition.finalize().unwrap();
+    let status = index.update_status_blocking(update_id).unwrap();
+    assert!(as_been_updated.swap(false, Relaxed));
+    assert!(status.result.is_ok());
+    let stats = index.stats().unwrap();
+    let repartition = hashmap!{
+        S("objectId") => 2u64,
+        S("title") => 2u64,
+    };
+    assert_eq!(stats.number_of_documents, 2);
+    assert_eq!(stats.documents_fields_repartition, repartition);
+
+
+    let doc3 = json!({ "objectId": 789 });
+
+    let mut addition = index.documents_addition();
+    addition.update_document(&doc3);
+    let update_id = addition.finalize().unwrap();
+    let status = index.update_status_blocking(update_id).unwrap();
+    assert!(as_been_updated.swap(false, Relaxed));
+    assert!(status.result.is_ok());
+    let stats = index.stats().unwrap();
+    let repartition = hashmap!{
+        S("objectId") => 3u64,
+        S("title") => 2u64,
+    };
+    assert_eq!(stats.number_of_documents, 3);
+    assert_eq!(stats.documents_fields_repartition, repartition);
+}
--- a/meilidb-data/tests/index.rs
+++ b/meilidb-data/tests/index.rs
@ -0,0 +1,99 @@
+mod common;
+
+use std::sync::atomic::{AtomicBool, Ordering::Relaxed};
+use std::sync::Arc;
+
+use serde_json::json;
+
+#[test]
+fn insert_delete_document() {
+    let index = common::simple_index();
+    let as_been_updated = Arc::new(AtomicBool::new(false));
+
+    let as_been_updated_clone = as_been_updated.clone();
+    index.set_update_callback(move |_| as_been_updated_clone.store(true, Relaxed));
+
+    let doc1 = json!({ "objectId": 123, "title": "hello" });
+
+    let mut addition = index.documents_addition();
+    addition.update_document(&doc1);
+    let update_id = addition.finalize().unwrap();
+    let status = index.update_status_blocking(update_id).unwrap();
+    assert!(as_been_updated.swap(false, Relaxed));
+    assert!(status.result.is_ok());
+    assert_eq!(index.number_of_documents(), 1);
+
+    let docs = index.query_builder().query("hello", 0..10).unwrap();
+    assert_eq!(docs.len(), 1);
+    assert_eq!(index.document(None, docs[0].id).unwrap().as_ref(), Some(&doc1));
+
+    let mut deletion = index.documents_deletion();
+    deletion.delete_document(&doc1).unwrap();
+    let update_id = deletion.finalize().unwrap();
+    let status = index.update_status_blocking(update_id).unwrap();
+    assert!(as_been_updated.swap(false, Relaxed));
+    assert!(status.result.is_ok());
+    assert_eq!(index.number_of_documents(), 0);
+
+    let docs = index.query_builder().query("hello", 0..10).unwrap();
+    assert_eq!(docs.len(), 0);
+}
+
+#[test]
+fn replace_document() {
+    let index = common::simple_index();
+    let as_been_updated = Arc::new(AtomicBool::new(false));
+
+    let as_been_updated_clone = as_been_updated.clone();
+    index.set_update_callback(move |_| as_been_updated_clone.store(true, Relaxed));
+
+    let doc1 = json!({ "objectId": 123, "title": "hello" });
+    let doc2 = json!({ "objectId": 123, "title": "coucou" });
+
+    let mut addition = index.documents_addition();
+    addition.update_document(&doc1);
+    let update_id = addition.finalize().unwrap();
+    let status = index.update_status_blocking(update_id).unwrap();
+    assert!(as_been_updated.swap(false, Relaxed));
+    assert!(status.result.is_ok());
+    assert_eq!(index.number_of_documents(), 1);
+
+    let docs = index.query_builder().query("hello", 0..10).unwrap();
+    assert_eq!(docs.len(), 1);
+    assert_eq!(index.document(None, docs[0].id).unwrap().as_ref(), Some(&doc1));
+
+    let mut addition = index.documents_addition();
+    addition.update_document(&doc2);
+    let update_id = addition.finalize().unwrap();
+    let status = index.update_status_blocking(update_id).unwrap();
+    assert!(as_been_updated.swap(false, Relaxed));
+    assert!(status.result.is_ok());
+    assert_eq!(index.number_of_documents(), 1);
+
+    let docs = index.query_builder().query("hello", 0..10).unwrap();
+    assert_eq!(docs.len(), 0);
+
+    let docs = index.query_builder().query("coucou", 0..10).unwrap();
+    assert_eq!(docs.len(), 1);
+    assert_eq!(index.document(None, docs[0].id).unwrap().as_ref(), Some(&doc2));
+}
+
+#[test]
+fn documents_ids() {
+    let index = common::simple_index();
+
+    let doc1 = json!({ "objectId": 123, "title": "hello" });
+    let doc2 = json!({ "objectId": 456, "title": "world" });
+    let doc3 = json!({ "objectId": 789 });
+
+    let mut addition = index.documents_addition();
+    addition.update_document(&doc1);
+    addition.update_document(&doc2);
+    addition.update_document(&doc3);
+    let update_id = addition.finalize().unwrap();
+    let status = index.update_status_blocking(update_id).unwrap();
+    assert!(status.result.is_ok());
+
+    let documents_ids_count = index.documents_ids().unwrap().count();
+    assert_eq!(documents_ids_count, 3);
+}
--- a/meilidb-schema/Cargo.toml
+++ b/meilidb-schema/Cargo.toml
@ -0,0 +1,12 @@
+[package]
+name = "meilidb-schema"
+version = "0.1.0"
+authors = ["Kerollmops <renault.cle@gmail.com>"]
+edition = "2018"
+
+[dependencies]
+bincode = "1.1.2"
+indexmap = { version = "1.1.0", features = ["serde-1"] }
+serde = { version = "1.0.91", features = ["derive"] }
+serde_json = { version = "1.0.39", features = ["preserve_order"] }
+toml = { version = "0.5.0", features = ["preserve_order"] }
--- a/meilidb-schema/src/lib.rs
+++ b/meilidb-schema/src/lib.rs
@ -0,0 +1,285 @@
+use std::collections::{HashMap, BTreeMap};
+use std::{fmt, u16};
+use std::ops::BitOr;
+use std::sync::Arc;
+
+use serde::{Serialize, Deserialize};
+use indexmap::IndexMap;
+
+pub const DISPLAYED: SchemaProps = SchemaProps { displayed: true,  indexed: false, ranked: false };
+pub const INDEXED: SchemaProps   = SchemaProps { displayed: false, indexed: true,  ranked: false };
+pub const RANKED: SchemaProps    = SchemaProps { displayed: false, indexed: false, ranked: true  };
+
+#[derive(Debug, Copy, Clone, PartialEq, Eq, Serialize, Deserialize)]
+pub struct SchemaProps {
+    #[serde(default)]
+    pub displayed: bool,
+
+    #[serde(default)]
+    pub indexed: bool,
+
+    #[serde(default)]
+    pub ranked: bool,
+}
+
+impl SchemaProps {
+    pub fn is_displayed(self) -> bool {
+        self.displayed
+    }
+
+    pub fn is_indexed(self) -> bool {
+        self.indexed
+    }
+
+    pub fn is_ranked(self) -> bool {
+        self.ranked
+    }
+}
+
+impl BitOr for SchemaProps {
+    type Output = Self;
+
+    fn bitor(self, other: Self) -> Self::Output {
+        SchemaProps {
+            displayed: self.displayed | other.displayed,
+            indexed: self.indexed | other.indexed,
+            ranked: self.ranked | other.ranked,
+        }
+    }
+}
+
+#[derive(Serialize, Deserialize)]
+pub struct SchemaBuilder {
+    identifier: String,
+    attributes: IndexMap<String, SchemaProps>,
+}
+
+impl SchemaBuilder {
+    pub fn with_identifier<S: Into<String>>(name: S) -> SchemaBuilder {
+        SchemaBuilder {
+            identifier: name.into(),
+            attributes: IndexMap::new(),
+        }
+    }
+
+    pub fn new_attribute<S: Into<String>>(&mut self, name: S, props: SchemaProps) -> SchemaAttr {
+        let len = self.attributes.len();
+        if self.attributes.insert(name.into(), props).is_some() {
+            panic!("Field already inserted.")
+        }
+        SchemaAttr(len as u16)
+    }
+
+    pub fn build(self) -> Schema {
+        let mut attrs = HashMap::new();
+        let mut props = Vec::new();
+
+        for (i, (name, prop)) in self.attributes.into_iter().enumerate() {
+            attrs.insert(name.clone(), SchemaAttr(i as u16));
+            props.push((name, prop));
+        }
+
+        let identifier = self.identifier;
+        Schema { inner: Arc::new(InnerSchema { identifier, attrs, props }) }
+    }
+}
+
+#[derive(Debug, Clone, PartialEq, Eq)]
+pub struct Schema {
+    inner: Arc<InnerSchema>,
+}
+
+#[derive(Debug, Clone, PartialEq, Eq)]
+struct InnerSchema {
+    identifier: String,
+    attrs: HashMap<String, SchemaAttr>,
+    props: Vec<(String, SchemaProps)>,
+}
+
+impl Schema {
+    fn to_builder(&self) -> SchemaBuilder {
+        let identifier = self.inner.identifier.clone();
+        let attributes = self.attributes_ordered();
+        SchemaBuilder { identifier, attributes }
+    }
+
+    fn attributes_ordered(&self) -> IndexMap<String, SchemaProps> {
+        let mut ordered = BTreeMap::new();
+        for (name, attr) in &self.inner.attrs {
+            let (_, props) = self.inner.props[attr.0 as usize];
+            ordered.insert(attr.0, (name, props));
+        }
+
+        let mut attributes = IndexMap::with_capacity(ordered.len());
+        for (_, (name, props)) in ordered {
+            attributes.insert(name.clone(), props);
+        }
+
+        attributes
+    }
+
+    pub fn props(&self, attr: SchemaAttr) -> SchemaProps {
+        let (_, props) = self.inner.props[attr.0 as usize];
+        props
+    }
+
+    pub fn identifier_name(&self) -> &str {
+        &self.inner.identifier
+    }
+
+    pub fn attribute<S: AsRef<str>>(&self, name: S) -> Option<SchemaAttr> {
+        self.inner.attrs.get(name.as_ref()).cloned()
+    }
+
+    pub fn attribute_name(&self, attr: SchemaAttr) -> &str {
+        let (name, _) = &self.inner.props[attr.0 as usize];
+        name
+    }
+
+    pub fn iter<'a>(&'a self) -> impl Iterator<Item=(&str, SchemaAttr, SchemaProps)> + 'a {
+        self.inner.props.iter()
+            .map(move |(name, prop)| {
+                let attr = self.inner.attrs.get(name).unwrap();
+                (name.as_str(), *attr, *prop)
+            })
+    }
+}
+
+impl Serialize for Schema {
+    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
+    where S: serde::ser::Serializer,
+    {
+        self.to_builder().serialize(serializer)
+    }
+}
+
+impl<'de> Deserialize<'de> for Schema {
+    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
+    where D: serde::de::Deserializer<'de>,
+    {
+        let builder = SchemaBuilder::deserialize(deserializer)?;
+        Ok(builder.build())
+    }
+}
+
+#[derive(Serialize, Deserialize)]
+#[derive(Debug, Copy, Clone, PartialOrd, Ord, PartialEq, Eq, Hash)]
+pub struct SchemaAttr(pub u16);
+
+impl SchemaAttr {
+    pub const fn new(value: u16) -> SchemaAttr {
+        SchemaAttr(value)
+    }
+
+    pub const fn min() -> SchemaAttr {
+        SchemaAttr(u16::min_value())
+    }
+
+    pub const fn max() -> SchemaAttr {
+        SchemaAttr(u16::max_value())
+    }
+
+    pub fn next(self) -> Option<SchemaAttr> {
+        self.0.checked_add(1).map(SchemaAttr)
+    }
+
+    pub fn prev(self) -> Option<SchemaAttr> {
+        self.0.checked_sub(1).map(SchemaAttr)
+    }
+}
+
+impl fmt::Display for SchemaAttr {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        self.0.fmt(f)
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use std::error::Error;
+
+    #[test]
+    fn serialize_deserialize() -> bincode::Result<()> {
+        let mut builder = SchemaBuilder::with_identifier("id");
+        builder.new_attribute("alpha", DISPLAYED);
+        builder.new_attribute("beta", DISPLAYED | INDEXED);
+        builder.new_attribute("gamma", INDEXED);
+        let schema = builder.build();
+
+        let mut buffer = Vec::new();
+        bincode::serialize_into(&mut buffer, &schema)?;
+        let schema2 = bincode::deserialize_from(buffer.as_slice())?;
+
+        assert_eq!(schema, schema2);
+
+        Ok(())
+    }
+
+    #[test]
+    fn serialize_deserialize_toml() -> Result<(), Box<dyn Error>> {
+        let mut builder = SchemaBuilder::with_identifier("id");
+        builder.new_attribute("alpha", DISPLAYED);
+        builder.new_attribute("beta", DISPLAYED | INDEXED);
+        builder.new_attribute("gamma", INDEXED);
+        let schema = builder.build();
+
+        let buffer = toml::to_vec(&schema)?;
+        let schema2 = toml::from_slice(buffer.as_slice())?;
+
+        assert_eq!(schema, schema2);
+
+        let data = r#"
+            identifier = "id"
+
+            [attributes."alpha"]
+            displayed = true
+
+            [attributes."beta"]
+            displayed = true
+            indexed = true
+
+            [attributes."gamma"]
+            indexed = true
+        "#;
+        let schema2 = toml::from_str(data)?;
+        assert_eq!(schema, schema2);
+
+        Ok(())
+    }
+
+    #[test]
+    fn serialize_deserialize_json() -> Result<(), Box<dyn Error>> {
+        let mut builder = SchemaBuilder::with_identifier("id");
+        builder.new_attribute("alpha", DISPLAYED);
+        builder.new_attribute("beta", DISPLAYED | INDEXED);
+        builder.new_attribute("gamma", INDEXED);
+        let schema = builder.build();
+
+        let buffer = serde_json::to_vec(&schema)?;
+        let schema2 = serde_json::from_slice(buffer.as_slice())?;
+
+        assert_eq!(schema, schema2);
+
+        let data = r#"
+            {
+                "identifier": "id",
+                "attributes": {
+                    "alpha": {
+                        "displayed": true
+                    },
+                    "beta": {
+                        "displayed": true,
+                        "indexed": true
+                    },
+                    "gamma": {
+                        "indexed": true
+                    }
+                }
+            }"#;
+        let schema2 = serde_json::from_str(data)?;
+        assert_eq!(schema, schema2);
+
+        Ok(())
+    }
+}
--- a/meilidb-tokenizer/Cargo.toml
+++ b/meilidb-tokenizer/Cargo.toml
@ -0,0 +1,8 @@
+[package]
+name = "meilidb-tokenizer"
+version = "0.1.0"
+authors = ["Kerollmops <renault.cle@gmail.com>"]
+edition = "2018"
+
+[dependencies]
+slice-group-by = "0.2.4"
--- a/meilidb-tokenizer/src/lib.rs
+++ b/meilidb-tokenizer/src/lib.rs
@ -0,0 +1,295 @@
+use std::iter::Peekable;
+use slice_group_by::StrGroupBy;
+use self::SeparatorCategory::*;
+
+pub fn is_cjk(c: char) -> bool {
+    (c >= '\u{2e80}' && c <= '\u{2eff}') ||
+    (c >= '\u{2f00}' && c <= '\u{2fdf}') ||
+    (c >= '\u{3040}' && c <= '\u{309f}') ||
+    (c >= '\u{30a0}' && c <= '\u{30ff}') ||
+    (c >= '\u{3100}' && c <= '\u{312f}') ||
+    (c >= '\u{3200}' && c <= '\u{32ff}') ||
+    (c >= '\u{3400}' && c <= '\u{4dbf}') ||
+    (c >= '\u{4e00}' && c <= '\u{9fff}') ||
+    (c >= '\u{f900}' && c <= '\u{faff}')
+}
+
+#[derive(Debug, Copy, Clone, PartialEq, Eq)]
+enum SeparatorCategory {
+    Soft,
+    Hard,
+}
+
+impl SeparatorCategory {
+    fn merge(self, other: SeparatorCategory) -> SeparatorCategory {
+        if let (Soft, Soft) = (self, other) { Soft } else { Hard }
+    }
+
+    fn to_usize(self) -> usize {
+        match self {
+            Soft => 1,
+            Hard => 8,
+        }
+    }
+}
+
+fn is_separator(c: char) -> bool {
+    classify_separator(c).is_some()
+}
+
+fn classify_separator(c: char) -> Option<SeparatorCategory> {
+    match c {
+        ' ' | '\'' | '"' => Some(Soft),
+        '.' | ';' | ',' | '!' | '?' | '-' | '(' | ')' => Some(Hard),
+        _ => None,
+    }
+}
+
+#[derive(Debug, Copy, Clone, PartialEq, Eq)]
+enum CharCategory {
+    Separator(SeparatorCategory),
+    Cjk,
+    Other,
+}
+
+fn classify_char(c: char) -> CharCategory {
+    if let Some(category) = classify_separator(c) {
+        CharCategory::Separator(category)
+    } else if is_cjk(c) {
+        CharCategory::Cjk
+    } else {
+        CharCategory::Other
+    }
+}
+
+fn is_str_word(s: &str) -> bool {
+    !s.chars().any(is_separator)
+}
+
+fn same_group_category(a: char, b: char) -> bool {
+    match (classify_char(a), classify_char(b)) {
+        (CharCategory::Cjk, _) | (_, CharCategory::Cjk) => false,
+        (CharCategory::Separator(_), CharCategory::Separator(_)) => true,
+        (a, b) => a == b,
+    }
+}
+
+// fold the number of chars along with the index position
+fn chars_count_index((n, _): (usize, usize), (i, c): (usize, char)) -> (usize, usize) {
+    (n + 1, i + c.len_utf8())
+}
+
+pub fn split_query_string(query: &str) -> impl Iterator<Item=&str> {
+    Tokenizer::new(query).map(|t| t.word)
+}
+
+#[derive(Debug, Copy, Clone, PartialEq, Eq)]
+pub struct Token<'a> {
+    pub word: &'a str,
+    pub word_index: usize,
+    pub char_index: usize,
+}
+
+pub struct Tokenizer<'a> {
+    inner: &'a str,
+    word_index: usize,
+    char_index: usize,
+}
+
+impl<'a> Tokenizer<'a> {
+    pub fn new(string: &str) -> Tokenizer {
+        // skip every separator and set `char_index`
+        // to the number of char trimmed
+        let (count, index) = string.char_indices()
+                                   .take_while(|(_, c)| is_separator(*c))
+                                   .fold((0, 0), chars_count_index);
+
+        Tokenizer {
+            inner: &string[index..],
+            word_index: 0,
+            char_index: count,
+        }
+    }
+}
+
+impl<'a> Iterator for Tokenizer<'a> {
+    type Item = Token<'a>;
+
+    fn next(&mut self) -> Option<Self::Item> {
+        let mut iter = self.inner.linear_group_by(same_group_category).peekable();
+
+        while let (Some(string), next_string) = (iter.next(), iter.peek()) {
+            let (count, index) = string.char_indices().fold((0, 0), chars_count_index);
+
+            if !is_str_word(string) {
+                self.word_index += string.chars()
+                                         .filter_map(classify_separator)
+                                         .fold(Soft, |a, x| a.merge(x))
+                                         .to_usize();
+                self.char_index += count;
+                self.inner = &self.inner[index..];
+                continue;
+            }
+
+            let token = Token {
+                word: string,
+                word_index: self.word_index,
+                char_index: self.char_index,
+            };
+
+            if next_string.filter(|s| is_str_word(s)).is_some() {
+                self.word_index += 1;
+            }
+
+            self.char_index += count;
+            self.inner = &self.inner[index..];
+
+            return Some(token);
+        }
+
+        self.inner = "";
+        None
+    }
+}
+
+pub struct SeqTokenizer<'a, I>
+where I: Iterator<Item=&'a str>,
+{
+    inner: I,
+    current: Option<Peekable<Tokenizer<'a>>>,
+    word_offset: usize,
+    char_offset: usize,
+}
+
+impl<'a, I> SeqTokenizer<'a, I>
+where I: Iterator<Item=&'a str>,
+{
+    pub fn new(mut iter: I) -> SeqTokenizer<'a, I> {
+        let current = iter.next().map(|s| Tokenizer::new(s).peekable());
+        SeqTokenizer {
+            inner: iter,
+            current: current,
+            word_offset: 0,
+            char_offset: 0,
+        }
+    }
+}
+
+impl<'a, I> Iterator for SeqTokenizer<'a, I>
+where I: Iterator<Item=&'a str>,
+{
+    type Item = Token<'a>;
+
+    fn next(&mut self) -> Option<Self::Item> {
+        match &mut self.current {
+            Some(current) => {
+                match current.next() {
+                    Some(token) => {
+                        // we must apply the word and char offsets
+                        // to the token before returning it
+                        let token = Token {
+                            word: token.word,
+                            word_index: token.word_index + self.word_offset,
+                            char_index: token.char_index + self.char_offset,
+                        };
+
+                        // if this is the last iteration on this text
+                        // we must save the offsets for next texts
+                        if current.peek().is_none() {
+                            let hard_space = SeparatorCategory::Hard.to_usize();
+                            self.word_offset = token.word_index + hard_space;
+                            self.char_offset = token.char_index + hard_space;
+                        }
+
+                        Some(token)
+                    },
+                    None => {
+                        // no more words in this text we must
+                        // start tokenizing the next text
+                        self.current = self.inner.next().map(|s| Tokenizer::new(s).peekable());
+                        self.next()
+                    },
+                }
+            },
+            // no more texts available
+            None => None,
+        }
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn easy() {
+        let mut tokenizer = Tokenizer::new("salut");
+
+        assert_eq!(tokenizer.next(), Some(Token { word: "salut", word_index: 0, char_index: 0 }));
+        assert_eq!(tokenizer.next(), None);
+
+        let mut tokenizer = Tokenizer::new("yo    ");
+
+        assert_eq!(tokenizer.next(), Some(Token { word: "yo", word_index: 0, char_index: 0 }));
+        assert_eq!(tokenizer.next(), None);
+    }
+
+    #[test]
+    fn hard() {
+        let mut tokenizer = Tokenizer::new(" .? yo lolo. aïe (ouch)");
+
+        assert_eq!(tokenizer.next(), Some(Token { word: "yo", word_index: 0, char_index: 4 }));
+        assert_eq!(tokenizer.next(), Some(Token { word: "lolo", word_index: 1, char_index: 7 }));
+        assert_eq!(tokenizer.next(), Some(Token { word: "aïe", word_index: 9, char_index: 13 }));
+        assert_eq!(tokenizer.next(), Some(Token { word: "ouch", word_index: 17, char_index: 18 }));
+        assert_eq!(tokenizer.next(), None);
+
+        let mut tokenizer = Tokenizer::new("yo ! lolo ? wtf - lol . aïe ,");
+
+        assert_eq!(tokenizer.next(), Some(Token { word: "yo", word_index: 0, char_index: 0 }));
+        assert_eq!(tokenizer.next(), Some(Token { word: "lolo", word_index: 8, char_index: 5 }));
+        assert_eq!(tokenizer.next(), Some(Token { word: "wtf", word_index: 16, char_index: 12 }));
+        assert_eq!(tokenizer.next(), Some(Token { word: "lol", word_index: 24, char_index: 18 }));
+        assert_eq!(tokenizer.next(), Some(Token { word: "aïe", word_index: 32, char_index: 24 }));
+        assert_eq!(tokenizer.next(), None);
+    }
+
+    #[test]
+    fn hard_long_chars() {
+        let mut tokenizer = Tokenizer::new(" .? yo 😂. aïe");
+
+        assert_eq!(tokenizer.next(), Some(Token { word: "yo", word_index: 0, char_index: 4 }));
+        assert_eq!(tokenizer.next(), Some(Token { word: "😂", word_index: 1, char_index: 7 }));
+        assert_eq!(tokenizer.next(), Some(Token { word: "aïe", word_index: 9, char_index: 10 }));
+        assert_eq!(tokenizer.next(), None);
+
+        let mut tokenizer = Tokenizer::new("yo ! lolo ? 😱 - lol . 😣 ,");
+
+        assert_eq!(tokenizer.next(), Some(Token { word: "yo", word_index: 0, char_index: 0 }));
+        assert_eq!(tokenizer.next(), Some(Token { word: "lolo", word_index: 8, char_index: 5 }));
+        assert_eq!(tokenizer.next(), Some(Token { word: "😱", word_index: 16, char_index: 12 }));
+        assert_eq!(tokenizer.next(), Some(Token { word: "lol", word_index: 24, char_index: 16 }));
+        assert_eq!(tokenizer.next(), Some(Token { word: "😣", word_index: 32, char_index: 22 }));
+        assert_eq!(tokenizer.next(), None);
+    }
+
+    #[test]
+    fn hard_kanjis() {
+        let mut tokenizer = Tokenizer::new("\u{2ec4}lolilol\u{2ec7}");
+
+        assert_eq!(tokenizer.next(), Some(Token { word: "\u{2ec4}", word_index: 0, char_index: 0 }));
+        assert_eq!(tokenizer.next(), Some(Token { word: "lolilol", word_index: 1, char_index: 1 }));
+        assert_eq!(tokenizer.next(), Some(Token { word: "\u{2ec7}", word_index: 2, char_index: 8 }));
+        assert_eq!(tokenizer.next(), None);
+
+        let mut tokenizer = Tokenizer::new("\u{2ec4}\u{2ed3}\u{2ef2} lolilol - hello    \u{2ec7}");
+
+        assert_eq!(tokenizer.next(), Some(Token { word: "\u{2ec4}", word_index: 0, char_index: 0 }));
+        assert_eq!(tokenizer.next(), Some(Token { word: "\u{2ed3}", word_index: 1, char_index: 1 }));
+        assert_eq!(tokenizer.next(), Some(Token { word: "\u{2ef2}", word_index: 2, char_index: 2 }));
+        assert_eq!(tokenizer.next(), Some(Token { word: "lolilol", word_index: 3, char_index: 4 }));
+        assert_eq!(tokenizer.next(), Some(Token { word: "hello", word_index: 11, char_index: 14 }));
+        assert_eq!(tokenizer.next(), Some(Token { word: "\u{2ec7}", word_index: 12, char_index: 23 }));
+        assert_eq!(tokenizer.next(), None);
+    }
+}
--- a/meilidb/Cargo.toml
+++ b/meilidb/Cargo.toml
@ -0,0 +1,29 @@
+[package]
+edition = "2018"
+name = "meilidb"
+version = "0.3.1"
+authors = ["Kerollmops <renault.cle@gmail.com>"]
+
+[dependencies]
+meilidb-core = { path = "../meilidb-core", version = "0.1.0" }
+meilidb-data = { path = "../meilidb-data", version = "0.1.0" }
+meilidb-schema = { path = "../meilidb-schema", version = "0.1.0" }
+
+[dev-dependencies]
+csv = "1.0.7"
+diskus = "0.5.0"
+env_logger = "0.6.1"
+indexmap = { version = "1.1.0", features = ["serde-1"] }
+jemallocator = "0.3.2"
+meilidb-core = { path = "../meilidb-core", version = "0.1.0" }
+quickcheck = "0.9.0"
+rand = "0.7.2"
+rand_xorshift = "0.2.0"
+rustyline = { version = "5.0.0", default-features = false }
+serde = { version = "1.0.91" , features = ["derive"] }
+serde_json = "1.0.39"
+structopt = "0.3.2"
+sysinfo = "0.9.5"
+tempfile = "3.0.7"
+termcolor = "1.0.4"
+toml = "0.5.3"
--- a/meilidb/examples/create-database.rs
+++ b/meilidb/examples/create-database.rs
@ -0,0 +1,215 @@
+#[global_allocator]
+static ALLOC: jemallocator::Jemalloc = jemallocator::Jemalloc;
+
+use std::collections::{HashMap, HashSet};
+use std::io::{self, BufRead, BufReader};
+use std::path::{Path, PathBuf};
+use std::time::Instant;
+use std::error::Error;
+use std::fs::{self, File};
+
+use diskus::Walk;
+use sysinfo::{SystemExt, ProcessExt};
+use serde::{Serialize, Deserialize};
+use structopt::StructOpt;
+
+use meilidb_data::Database;
+use meilidb_schema::Schema;
+
+#[derive(Debug, StructOpt)]
+pub struct Opt {
+    /// The destination where the database must be created.
+    #[structopt(parse(from_os_str))]
+    pub database_path: PathBuf,
+
+    /// The csv file to index.
+    #[structopt(parse(from_os_str))]
+    pub csv_data_path: PathBuf,
+
+    /// The path to the schema.
+    #[structopt(long = "schema", parse(from_os_str))]
+    pub schema_path: PathBuf,
+
+    /// The file with the synonyms.
+    #[structopt(long = "synonyms", parse(from_os_str))]
+    pub synonyms: Option<PathBuf>,
+
+    /// The path to the list of stop words (one by line).
+    #[structopt(long = "stop-words", parse(from_os_str))]
+    pub stop_words: Option<PathBuf>,
+
+    #[structopt(long = "update-group-size")]
+    pub update_group_size: Option<usize>,
+}
+
+#[derive(Serialize, Deserialize)]
+struct Document (
+    HashMap<String, String>
+);
+
+#[derive(Debug, Clone, Serialize, Deserialize)]
+#[serde(untagged)]
+pub enum Synonym {
+    OneWay(SynonymOneWay),
+    MultiWay { synonyms: Vec<String> },
+}
+
+#[derive(Debug, Clone, Serialize, Deserialize)]
+#[serde(rename_all = "camelCase")]
+pub struct SynonymOneWay {
+    pub search_terms: String,
+    pub synonyms: Synonyms,
+}
+
+#[derive(Debug, Clone, Serialize, Deserialize)]
+#[serde(untagged)]
+pub enum Synonyms {
+    Multiple(Vec<String>),
+    Single(String),
+}
+
+fn read_synomys(path: &Path) -> Result<Vec<Synonym>, Box<dyn Error>> {
+    let file = File::open(path)?;
+    let synonyms = serde_json::from_reader(file)?;
+    Ok(synonyms)
+}
+
+fn index(
+    schema: Schema,
+    database_path: &Path,
+    csv_data_path: &Path,
+    update_group_size: Option<usize>,
+    stop_words: &HashSet<String>,
+    synonyms: Vec<Synonym>,
+) -> Result<Database, Box<dyn Error>>
+{
+    let database = Database::open(database_path)?;
+
+    let mut wtr = csv::Writer::from_path("./stats.csv").unwrap();
+    wtr.write_record(&["NumberOfDocuments", "DiskUsed", "MemoryUsed"])?;
+
+    let mut system = sysinfo::System::new();
+
+    let index = database.create_index("test", schema.clone())?;
+
+    let mut synonyms_adder = index.synonyms_addition();
+    for synonym in synonyms {
+        match synonym {
+            Synonym::OneWay(SynonymOneWay { search_terms, synonyms }) => {
+                let alternatives = match synonyms {
+                    Synonyms::Multiple(alternatives) => alternatives,
+                    Synonyms::Single(alternative) => vec![alternative],
+                };
+                synonyms_adder.add_synonym(search_terms, alternatives);
+            },
+            Synonym::MultiWay { mut synonyms } => {
+                for _ in 0..synonyms.len() {
+                    if let Some((synonym, alternatives)) = synonyms.split_first() {
+                        synonyms_adder.add_synonym(synonym, alternatives);
+                    }
+                    synonyms.rotate_left(1);
+                }
+            },
+        }
+    }
+    synonyms_adder.finalize()?;
+
+    let mut rdr = csv::Reader::from_path(csv_data_path)?;
+    let mut raw_record = csv::StringRecord::new();
+    let headers = rdr.headers()?.clone();
+
+    let mut i = 0;
+    let mut end_of_file = false;
+
+    while !end_of_file {
+        let mut update = index.documents_addition();
+
+        loop {
+            end_of_file = !rdr.read_record(&mut raw_record)?;
+            if end_of_file { break }
+
+            let document: Document = match raw_record.deserialize(Some(&headers)) {
+                Ok(document) => document,
+                Err(e) => {
+                    eprintln!("{:?}", e);
+                    continue;
+                }
+            };
+
+            update.update_document(document);
+
+            print!("\rindexing document {}", i);
+            i += 1;
+
+            if let Some(group_size) = update_group_size {
+                if i % group_size == 0 { break }
+            }
+        }
+
+        println!();
+
+        println!("committing update...");
+        update.finalize()?;
+
+        // write stats
+        let directory_size = Walk::new(&[database_path.to_owned()], 4).run();
+        system.refresh_all();
+        let pid = sysinfo::get_current_pid()?;
+        let memory = system.get_process(pid).unwrap().memory(); // in kb
+        wtr.write_record(&[i.to_string(), directory_size.to_string(), memory.to_string()])?;
+        wtr.flush()?;
+    }
+
+    Ok(database)
+}
+
+fn retrieve_stop_words(path: &Path) -> io::Result<HashSet<String>> {
+    let f = File::open(path)?;
+    let reader = BufReader::new(f);
+    let mut words = HashSet::new();
+
+    for line in reader.lines() {
+        let line = line?;
+        let word = line.trim().to_string();
+        words.insert(word);
+    }
+
+    Ok(words)
+}
+
+fn main() -> Result<(), Box<dyn Error>> {
+    let _ = env_logger::init();
+    let opt = Opt::from_args();
+
+    let schema = {
+        let string = fs::read_to_string(&opt.schema_path)?;
+        toml::from_str(&string)?
+    };
+
+    let stop_words = match opt.stop_words {
+        Some(ref path) => retrieve_stop_words(path)?,
+        None           => HashSet::new(),
+    };
+
+    let synonyms = match opt.synonyms {
+        Some(ref path) => read_synomys(path)?,
+        None           => Vec::new(),
+    };
+
+    let start = Instant::now();
+    let result = index(
+        schema,
+        &opt.database_path,
+        &opt.csv_data_path,
+        opt.update_group_size,
+        &stop_words,
+        synonyms,
+    );
+
+    if let Err(e) = result {
+        return Err(e.into())
+    }
+
+    println!("database created in {:.2?} at: {:?}", start.elapsed(), opt.database_path);
+    Ok(())
+}
--- a/meilidb/examples/query-database.rs
+++ b/meilidb/examples/query-database.rs
@ -0,0 +1,229 @@
+#[global_allocator]
+static ALLOC: jemallocator::Jemalloc = jemallocator::Jemalloc;
+
+use std::collections::btree_map::{BTreeMap, Entry};
+use std::collections::HashSet;
+use std::error::Error;
+use std::io::{self, Write};
+use std::iter::FromIterator;
+use std::path::PathBuf;
+use std::time::{Instant, Duration};
+
+use indexmap::IndexMap;
+use rustyline::{Editor, Config};
+use structopt::StructOpt;
+use termcolor::{Color, ColorChoice, ColorSpec, StandardStream, WriteColor};
+
+use meilidb_core::Highlight;
+use meilidb_data::Database;
+use meilidb_schema::SchemaAttr;
+
+#[derive(Debug, StructOpt)]
+pub struct Opt {
+    /// The destination where the database must be created
+    #[structopt(parse(from_os_str))]
+    pub database_path: PathBuf,
+
+    #[structopt(long = "fetch-timeout-ms")]
+    pub fetch_timeout_ms: Option<u64>,
+
+    /// Fields that must be displayed.
+    pub displayed_fields: Vec<String>,
+
+    /// The number of returned results
+    #[structopt(short = "n", long = "number-results", default_value = "10")]
+    pub number_results: usize,
+
+    /// The number of characters before and after the first match
+    #[structopt(short = "C", long = "context", default_value = "35")]
+    pub char_context: usize,
+}
+
+type Document = IndexMap<String, String>;
+
+fn display_highlights(text: &str, ranges: &[usize]) -> io::Result<()> {
+    let mut stdout = StandardStream::stdout(ColorChoice::Always);
+    let mut highlighted = false;
+
+    for range in ranges.windows(2) {
+        let [start, end] = match range { [start, end] => [*start, *end], _ => unreachable!() };
+        if highlighted {
+            stdout.set_color(ColorSpec::new().set_fg(Some(Color::Yellow)))?;
+        }
+        write!(&mut stdout, "{}", &text[start..end])?;
+        stdout.reset()?;
+        highlighted = !highlighted;
+    }
+
+    Ok(())
+}
+
+fn char_to_byte_range(index: usize, length: usize, text: &str) -> (usize, usize) {
+    let mut byte_index = 0;
+    let mut byte_length = 0;
+
+    for (n, (i, c)) in text.char_indices().enumerate() {
+        if n == index {
+            byte_index = i;
+        }
+
+        if n + 1 == index + length {
+            byte_length = i - byte_index + c.len_utf8();
+            break;
+        }
+    }
+
+    (byte_index, byte_length)
+}
+
+fn create_highlight_areas(text: &str, highlights: &[Highlight]) -> Vec<usize> {
+    let mut byte_indexes = BTreeMap::new();
+
+    for highlight in highlights {
+        let char_index = highlight.char_index as usize;
+        let char_length = highlight.char_length as usize;
+        let (byte_index, byte_length) = char_to_byte_range(char_index, char_length, text);
+
+        match byte_indexes.entry(byte_index) {
+            Entry::Vacant(entry) => { entry.insert(byte_length); },
+            Entry::Occupied(mut entry) => {
+                if *entry.get() < byte_length {
+                    entry.insert(byte_length);
+                }
+            },
+        }
+    }
+
+    let mut title_areas = Vec::new();
+    title_areas.push(0);
+    for (byte_index, length) in byte_indexes {
+        title_areas.push(byte_index);
+        title_areas.push(byte_index + length);
+    }
+    title_areas.push(text.len());
+    title_areas.sort_unstable();
+    title_areas
+}
+
+/// note: matches must have been sorted by `char_index` and `char_length` before being passed.
+///
+/// ```no_run
+/// matches.sort_unstable_by_key(|m| (m.char_index, m.char_length));
+///
+/// let matches = matches.matches.iter().filter(|m| SchemaAttr::new(m.attribute) == attr).cloned();
+///
+/// let (text, matches) = crop_text(&text, matches, 35);
+/// ```
+fn crop_text(
+    text: &str,
+    highlights: impl IntoIterator<Item=Highlight>,
+    context: usize,
+) -> (String, Vec<Highlight>)
+{
+    let mut highlights = highlights.into_iter().peekable();
+
+    let char_index = highlights.peek().map(|m| m.char_index as usize).unwrap_or(0);
+    let start = char_index.saturating_sub(context);
+    let text = text.chars().skip(start).take(context * 2).collect();
+
+    let highlights = highlights
+        .take_while(|m| {
+            (m.char_index as usize) + (m.char_length as usize) <= start + (context * 2)
+        })
+        .map(|highlight| {
+            Highlight { char_index: highlight.char_index - start as u16, ..highlight }
+        })
+        .collect();
+
+    (text, highlights)
+}
+
+fn main() -> Result<(), Box<dyn Error>> {
+    let _ = env_logger::init();
+    let opt = Opt::from_args();
+
+    let start = Instant::now();
+    let database = Database::open(&opt.database_path)?;
+
+    let index = database.open_index("test")?.unwrap();
+    let schema = index.schema();
+
+    println!("database prepared for you in {:.2?}", start.elapsed());
+
+    let fields = opt.displayed_fields.iter().map(String::as_str);
+    let fields = HashSet::from_iter(fields);
+
+    let config = Config::builder().auto_add_history(true).build();
+    let mut readline = Editor::<()>::with_config(config);
+    let _ = readline.load_history("query-history.txt");
+
+    for result in readline.iter("Searching for: ") {
+        match result {
+            Ok(query) => {
+                let start_total = Instant::now();
+
+                let builder = match opt.fetch_timeout_ms {
+                    Some(timeout_ms) => {
+                        let timeout = Duration::from_millis(timeout_ms);
+                        index.query_builder().with_fetch_timeout(timeout)
+                    },
+                    None => index.query_builder(),
+                };
+                let documents = builder.query(&query, 0..opt.number_results)?;
+
+                let mut retrieve_duration = Duration::default();
+
+                let number_of_documents = documents.len();
+                for mut doc in documents {
+
+                    doc.highlights.sort_unstable_by_key(|m| (m.char_index, m.char_length));
+
+                    let start_retrieve = Instant::now();
+                    let result = index.document::<Document>(Some(&fields), doc.id);
+                    retrieve_duration += start_retrieve.elapsed();
+
+                    match result {
+                        Ok(Some(document)) => {
+                            for (name, text) in document {
+                                print!("{}: ", name);
+
+                                let attr = schema.attribute(&name).unwrap();
+                                let highlights = doc.highlights.iter()
+                                                .filter(|m| SchemaAttr::new(m.attribute) == attr)
+                                                .cloned();
+                                let (text, highlights) = crop_text(&text, highlights, opt.char_context);
+                                let areas = create_highlight_areas(&text, &highlights);
+                                display_highlights(&text, &areas)?;
+                                println!();
+                            }
+                        },
+                        Ok(None) => eprintln!("missing document"),
+                        Err(e) => eprintln!("{}", e),
+                    }
+
+                    let mut matching_attributes = HashSet::new();
+                    for highlight in doc.highlights {
+                        let attr = SchemaAttr::new(highlight.attribute);
+                        let name = schema.attribute_name(attr);
+                        matching_attributes.insert(name);
+                    }
+
+                    let matching_attributes = Vec::from_iter(matching_attributes);
+                    println!("matching in: {:?}", matching_attributes);
+
+                    println!();
+                }
+
+                eprintln!("document field retrieve took {:.2?}", retrieve_duration);
+                eprintln!("===== Found {} results in {:.2?} =====", number_of_documents, start_total.elapsed());
+            },
+            Err(err) => {
+                println!("Error: {:?}", err);
+                break
+            }
+        }
+    }
+
+    readline.save_history("query-history.txt").unwrap();
+    Ok(())
+}
--- a/meilidb/src/lib.rs
+++ b/meilidb/src/lib.rs
@ -0,0 +1,3 @@
+mod sort_by_attr;
+
+pub use self::sort_by_attr::SortByAttr;
--- a/meilidb/src/sort_by_attr.rs
+++ b/meilidb/src/sort_by_attr.rs
@ -0,0 +1,125 @@
+use std::cmp::Ordering;
+use std::error::Error;
+use std::fmt;
+
+use meilidb_core::{criterion::Criterion, RawDocument};
+use meilidb_data::RankedMap;
+use meilidb_schema::{Schema, SchemaAttr};
+
+/// An helper struct that permit to sort documents by
+/// some of their stored attributes.
+///
+/// # Note
+///
+/// If a document cannot be deserialized it will be considered [`None`][].
+///
+/// Deserialized documents are compared like `Some(doc0).cmp(&Some(doc1))`,
+/// so you must check the [`Ord`] of `Option` implementation.
+///
+/// [`None`]: https://doc.rust-lang.org/std/option/enum.Option.html#variant.None
+/// [`Ord`]: https://doc.rust-lang.org/std/option/enum.Option.html#impl-Ord
+///
+/// # Example
+///
+/// ```ignore
+/// use serde_derive::Deserialize;
+/// use meilidb::rank::criterion::*;
+///
+/// let custom_ranking = SortByAttr::lower_is_better(&ranked_map, &schema, "published_at")?;
+///
+/// let builder = CriteriaBuilder::with_capacity(8)
+///        .add(SumOfTypos)
+///        .add(NumberOfWords)
+///        .add(WordsProximity)
+///        .add(SumOfWordsAttribute)
+///        .add(SumOfWordsPosition)
+///        .add(Exact)
+///        .add(custom_ranking)
+///        .add(DocumentId);
+///
+/// let criterion = builder.build();
+///
+/// ```
+pub struct SortByAttr<'a> {
+    ranked_map: &'a RankedMap,
+    attr: SchemaAttr,
+    reversed: bool,
+}
+
+impl<'a> SortByAttr<'a> {
+    pub fn lower_is_better(
+        ranked_map: &'a RankedMap,
+        schema: &Schema,
+        attr_name: &str,
+    ) -> Result<SortByAttr<'a>, SortByAttrError>
+    {
+        SortByAttr::new(ranked_map, schema, attr_name, false)
+    }
+
+    pub fn higher_is_better(
+        ranked_map: &'a RankedMap,
+        schema: &Schema,
+        attr_name: &str,
+    ) -> Result<SortByAttr<'a>, SortByAttrError>
+    {
+        SortByAttr::new(ranked_map, schema, attr_name, true)
+    }
+
+    fn new(
+        ranked_map: &'a RankedMap,
+        schema: &Schema,
+        attr_name: &str,
+        reversed: bool,
+    ) -> Result<SortByAttr<'a>, SortByAttrError>
+    {
+        let attr = match schema.attribute(attr_name) {
+            Some(attr) => attr,
+            None => return Err(SortByAttrError::AttributeNotFound),
+        };
+
+        if !schema.props(attr).is_ranked() {
+            return Err(SortByAttrError::AttributeNotRegisteredForRanking);
+        }
+
+        Ok(SortByAttr { ranked_map, attr, reversed })
+    }
+}
+
+impl<'a> Criterion for SortByAttr<'a> {
+    fn evaluate(&self, lhs: &RawDocument, rhs: &RawDocument) -> Ordering {
+        let lhs = self.ranked_map.get(lhs.id, self.attr);
+        let rhs = self.ranked_map.get(rhs.id, self.attr);
+
+        match (lhs, rhs) {
+            (Some(lhs), Some(rhs)) => {
+                let order = lhs.cmp(&rhs);
+                if self.reversed { order.reverse() } else { order }
+            },
+            (None,    Some(_)) => Ordering::Greater,
+            (Some(_), None)    => Ordering::Less,
+            (None,    None)    => Ordering::Equal,
+        }
+    }
+
+    fn name(&self) -> &'static str {
+        "SortByAttr"
+    }
+}
+
+#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
+pub enum SortByAttrError {
+    AttributeNotFound,
+    AttributeNotRegisteredForRanking,
+}
+
+impl fmt::Display for SortByAttrError {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        use SortByAttrError::*;
+        match self {
+            AttributeNotFound => f.write_str("attribute not found in the schema"),
+            AttributeNotRegisteredForRanking => f.write_str("attribute not registered for ranking"),
+        }
+    }
+}
+
+impl Error for SortByAttrError { }
--- a/misc/en.stopwords.txt
+++ b/misc/en.stopwords.txt
@ -95,7 +95,8 @@ or
 other
 ought
 our
-ours    ourselves
+ours
+ourselves
 out
 over
 own
--- a/misc/fr.stopwords.txt
+++ b/misc/fr.stopwords.txt
@ -0,0 +1,163 @@
+au
+aux
+avec
+ce
+ces
+dans
+de
+des
+du
+elle
+en
+et
+eux
+il
+je
+la
+le
+leur
+lui
+ma
+mais
+me
+même
+mes
+moi
+mon
+ne
+nos
+notre
+nous
+on
+ou
+par
+pas
+pour
+qu
+que
+qui
+sa
+se
+ses
+son
+sur
+ta
+te
+tes
+toi
+ton
+tu
+un
+une
+vos
+votre
+vous
+c
+d
+j
+l
+à
+m
+n
+s
+t
+y
+été
+étée
+étées
+étés
+étant
+suis
+es
+est
+sommes
+êtes
+sont
+serai
+seras
+sera
+serons
+serez
+seront
+serais
+serait
+serions
+seriez
+seraient
+étais
+était
+étions
+étiez
+étaient
+fus
+fut
+fûmes
+fûtes
+furent
+sois
+soit
+soyons
+soyez
+soient
+fusse
+fusses
+fût
+fussions
+fussiez
+fussent
+ayant
+eu
+eue
+eues
+eus
+ai
+as
+avons
+avez
+ont
+aurai
+auras
+aura
+aurons
+aurez
+auront
+aurais
+aurait
+aurions
+auriez
+auraient
+avais
+avait
+avions
+aviez
+avaient
+eut
+eûmes
+eûtes
+eurent
+aie
+aies
+ait
+ayons
+ayez
+aient
+eusse
+eusses
+eût
+eussions
+eussiez
+eussent
+ceci
+celà
+cet
+cette
+ici
+ils
+les
+leurs
+quel
+quels
+quelle
+quelles
+sans
+soi
--- a/src/automaton.rs
+++ b/src/automaton.rs
@ -1,90 +0,0 @@
-use fst::Automaton;
-use lazy_static::lazy_static;
-use levenshtein_automata::{
-    LevenshteinAutomatonBuilder as LevBuilder,
-    DFA, Distance,
-};
-
-lazy_static! {
-    static ref LEVDIST0: LevBuilder = LevBuilder::new(0, false);
-    static ref LEVDIST1: LevBuilder = LevBuilder::new(1, false);
-    static ref LEVDIST2: LevBuilder = LevBuilder::new(2, false);
-}
-
-pub struct DfaExt {
-    query_len: usize,
-    automaton: DFA,
-}
-
-impl Automaton for DfaExt {
-    type State = <DFA as Automaton>::State;
-
-    fn start(&self) -> Self::State {
-        self.automaton.start()
-    }
-
-    fn is_match(&self, state: &Self::State) -> bool {
-        self.automaton.is_match(state)
-    }
-
-    fn can_match(&self, state: &Self::State) -> bool {
-        self.automaton.can_match(state)
-    }
-
-    fn will_always_match(&self, state: &Self::State) -> bool {
-        self.automaton.will_always_match(state)
-    }
-
-    fn accept(&self, state: &Self::State, byte: u8) -> Self::State {
-        self.automaton.accept(state, byte)
-    }
-}
-
-impl AutomatonExt for DfaExt {
-    fn eval<B: AsRef<[u8]>>(&self, s: B) -> Distance {
-        self.automaton.eval(s)
-    }
-
-    fn query_len(&self) -> usize {
-        self.query_len
-    }
-}
-
-enum PrefixSetting {
-    Prefix,
-    NoPrefix,
-}
-
-fn build_dfa_with_setting(query: &str, setting: PrefixSetting) -> DfaExt {
-    use self::PrefixSetting::{Prefix, NoPrefix};
-
-    let dfa = match query.len() {
-        0 ..= 4 => match setting {
-            Prefix   => LEVDIST0.build_prefix_dfa(query),
-            NoPrefix => LEVDIST0.build_dfa(query),
-        },
-        5 ..= 8 => match setting {
-            Prefix   => LEVDIST1.build_prefix_dfa(query),
-            NoPrefix => LEVDIST1.build_dfa(query),
-        },
-        _ => match setting {
-            Prefix   => LEVDIST2.build_prefix_dfa(query),
-            NoPrefix => LEVDIST2.build_dfa(query),
-        },
-    };
-
-    DfaExt { query_len: query.len(), automaton: dfa }
-}
-
-pub fn build_prefix_dfa(query: &str) -> DfaExt {
-    build_dfa_with_setting(query, PrefixSetting::Prefix)
-}
-
-pub fn build_dfa(query: &str) -> DfaExt {
-    build_dfa_with_setting(query, PrefixSetting::NoPrefix)
-}
-
-pub trait AutomatonExt: Automaton {
-    fn eval<B: AsRef<[u8]>>(&self, s: B) -> Distance;
-    fn query_len(&self) -> usize;
-}
--- a/src/common_words.rs
+++ b/src/common_words.rs
@ -1,26 +0,0 @@
-use std::io::{self, BufReader, BufRead};
-use std::collections::HashSet;
-use std::path::Path;
-use std::fs::File;
-
-#[derive(Debug)]
-pub struct CommonWords(HashSet<String>);
-
-impl CommonWords {
-    pub fn from_file<P>(path: P) -> io::Result<Self>
-    where P: AsRef<Path>
-    {
-        let file = File::open(path)?;
-        let file = BufReader::new(file);
-        let mut set = HashSet::new();
-        for line in file.lines().filter_map(|l| l.ok()) {
-            let word = line.trim().to_owned();
-            set.insert(word);
-        }
-        Ok(CommonWords(set))
-    }
-
-    pub fn contains(&self, word: &str) -> bool {
-        self.0.contains(word)
-    }
-}
--- a/src/data/doc_ids.rs
+++ b/src/data/doc_ids.rs
@ -1,59 +0,0 @@
-use std::slice::from_raw_parts;
-use std::error::Error;
-use std::path::Path;
-use std::sync::Arc;
-use std::{io, mem};
-
-use sdset::Set;
-use fst::raw::MmapReadOnly;
-use serde::ser::{Serialize, Serializer};
-
-use crate::DocumentId;
-use crate::data::Data;
-
-#[derive(Default, Clone)]
-pub struct DocIds {
-    data: Data,
-}
-
-impl DocIds {
-    pub unsafe fn from_path<P: AsRef<Path>>(path: P) -> io::Result<Self> {
-        let mmap = MmapReadOnly::open_path(path)?;
-        let data = Data::Mmap(mmap);
-        Ok(DocIds { data })
-    }
-
-    pub fn from_bytes(vec: Vec<u8>) -> Result<Self, Box<Error>> {
-        // FIXME check if modulo DocumentId
-        let len = vec.len();
-        let data = Data::Shared {
-            bytes: Arc::new(vec),
-            offset: 0,
-            len: len
-        };
-        Ok(DocIds { data })
-    }
-
-    pub fn from_document_ids(vec: Vec<DocumentId>) -> Self {
-        DocIds::from_bytes(unsafe { mem::transmute(vec) }).unwrap()
-    }
-
-    pub fn contains(&self, doc: DocumentId) -> bool {
-        // FIXME prefer using the sdset::exponential_search function
-        self.doc_ids().binary_search(&doc).is_ok()
-    }
-
-    pub fn doc_ids(&self) -> &Set<DocumentId> {
-        let slice = &self.data;
-        let ptr = slice.as_ptr() as *const DocumentId;
-        let len = slice.len() / mem::size_of::<DocumentId>();
-        let slice = unsafe { from_raw_parts(ptr, len) };
-        Set::new_unchecked(slice)
-    }
-}
-
-impl Serialize for DocIds {
-    fn serialize<S: Serializer>(&self, serializer: S) -> Result<S::Ok, S::Error> {
-        self.data.as_ref().serialize(serializer)
-    }
-}
--- a/src/data/doc_indexes.rs
+++ b/src/data/doc_indexes.rs
@ -1,204 +0,0 @@
-use std::slice::from_raw_parts;
-use std::io::{self, Write};
-use std::mem::size_of;
-use std::ops::Index;
-use std::path::Path;
-use std::sync::Arc;
-
-use byteorder::{LittleEndian, ReadBytesExt, WriteBytesExt};
-use fst::raw::MmapReadOnly;
-use sdset::Set;
-
-use crate::DocIndex;
-use crate::data::Data;
-
-#[derive(Debug)]
-#[repr(C)]
-struct Range {
-    start: u64,
-    end: u64,
-}
-
-#[derive(Clone, Default)]
-pub struct DocIndexes {
-    ranges: Data,
-    indexes: Data,
-}
-
-impl DocIndexes {
-    pub unsafe fn from_path<P: AsRef<Path>>(path: P) -> io::Result<Self> {
-        let mmap = MmapReadOnly::open_path(path)?;
-        DocIndexes::from_data(Data::Mmap(mmap))
-    }
-
-    pub fn from_bytes(vec: Vec<u8>) -> io::Result<Self> {
-        let len = vec.len();
-        DocIndexes::from_shared_bytes(Arc::new(vec), 0, len)
-    }
-
-    pub fn from_shared_bytes(bytes: Arc<Vec<u8>>, offset: usize, len: usize) -> io::Result<Self> {
-        let data = Data::Shared { bytes, offset, len };
-        DocIndexes::from_data(data)
-    }
-
-    fn from_data(data: Data) -> io::Result<Self> {
-        let ranges_len_offset = data.len() - size_of::<u64>();
-        let ranges_len = (&data[ranges_len_offset..]).read_u64::<LittleEndian>()?;
-        let ranges_len = ranges_len as usize;
-
-        let ranges_offset = ranges_len_offset - ranges_len;
-        let ranges = data.range(ranges_offset, ranges_len);
-
-        let indexes = data.range(0, ranges_offset);
-
-        Ok(DocIndexes { ranges, indexes })
-    }
-
-    pub fn to_vec(&self) -> Vec<u8> {
-        let capacity = self.indexes.len() + self.ranges.len() + size_of::<u64>();
-        let mut bytes = Vec::with_capacity(capacity);
-
-        bytes.extend_from_slice(&self.indexes);
-        bytes.extend_from_slice(&self.ranges);
-        bytes.write_u64::<LittleEndian>(self.ranges.len() as u64).unwrap();
-
-        bytes
-    }
-
-    pub fn get(&self, index: usize) -> Option<&Set<DocIndex>> {
-        self.ranges().get(index as usize).map(|Range { start, end }| {
-            let start = *start as usize;
-            let end = *end as usize;
-            let slice = &self.indexes()[start..end];
-            Set::new_unchecked(slice)
-        })
-    }
-
-    fn ranges(&self) -> &[Range] {
-        let slice = &self.ranges;
-        let ptr = slice.as_ptr() as *const Range;
-        let len = slice.len() / size_of::<Range>();
-        unsafe { from_raw_parts(ptr, len) }
-    }
-
-    fn indexes(&self) -> &[DocIndex] {
-        let slice = &self.indexes;
-        let ptr = slice.as_ptr() as *const DocIndex;
-        let len = slice.len() / size_of::<DocIndex>();
-        unsafe { from_raw_parts(ptr, len) }
-    }
-}
-
-impl Index<usize> for DocIndexes {
-    type Output = [DocIndex];
-
-    fn index(&self, index: usize) -> &Self::Output {
-        match self.get(index) {
-            Some(indexes) => indexes,
-            None => panic!("index {} out of range for a maximum of {} ranges", index, self.ranges().len()),
-        }
-    }
-}
-
-pub struct DocIndexesBuilder<W> {
-    ranges: Vec<Range>,
-    wtr: W,
-}
-
-impl DocIndexesBuilder<Vec<u8>> {
-    pub fn memory() -> Self {
-        DocIndexesBuilder::new(Vec::new())
-    }
-}
-
-impl<W: Write> DocIndexesBuilder<W> {
-    pub fn new(wtr: W) -> Self {
-        DocIndexesBuilder {
-            ranges: Vec::new(),
-            wtr: wtr,
-        }
-    }
-
-    pub fn insert(&mut self, indexes: &Set<DocIndex>) -> io::Result<()> {
-        let len = indexes.len() as u64;
-        let start = self.ranges.last().map(|r| r.end).unwrap_or(0);
-        let range = Range { start, end: start + len };
-        self.ranges.push(range);
-
-        // write the values
-        let indexes = unsafe { into_u8_slice(indexes) };
-        self.wtr.write_all(indexes)
-    }
-
-    pub fn finish(self) -> io::Result<()> {
-        self.into_inner().map(drop)
-    }
-
-    pub fn into_inner(mut self) -> io::Result<W> {
-        // write the ranges
-        let ranges = unsafe { into_u8_slice(self.ranges.as_slice()) };
-        self.wtr.write_all(ranges)?;
-
-        // write the length of the ranges
-        let len = ranges.len() as u64;
-        self.wtr.write_u64::<LittleEndian>(len)?;
-
-        Ok(self.wtr)
-    }
-}
-
-unsafe fn into_u8_slice<T>(slice: &[T]) -> &[u8] {
-    let ptr = slice.as_ptr() as *const u8;
-    let len = slice.len() * size_of::<T>();
-    from_raw_parts(ptr, len)
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-    use std::error::Error;
-
-    #[test]
-    fn builder_serialize_deserialize() -> Result<(), Box<Error>> {
-        let a = DocIndex { document_id: 0, attribute: 3, attribute_index: 11 };
-        let b = DocIndex { document_id: 1, attribute: 4, attribute_index: 21 };
-        let c = DocIndex { document_id: 2, attribute: 8, attribute_index: 2 };
-
-        let mut builder = DocIndexesBuilder::memory();
-
-        builder.insert(Set::new(&[a])?)?;
-        builder.insert(Set::new(&[a, b, c])?)?;
-        builder.insert(Set::new(&[a, c])?)?;
-
-        let bytes = builder.into_inner()?;
-        let docs = DocIndexes::from_bytes(bytes)?;
-
-        assert_eq!(docs.get(0), Some(Set::new(&[a])?));
-        assert_eq!(docs.get(1), Some(Set::new(&[a, b, c])?));
-        assert_eq!(docs.get(2), Some(Set::new(&[a, c])?));
-        assert_eq!(docs.get(3), None);
-
-        Ok(())
-    }
-
-    #[test]
-    fn serialize_deserialize() -> Result<(), Box<Error>> {
-        let a = DocIndex { document_id: 0, attribute: 3, attribute_index: 11 };
-        let b = DocIndex { document_id: 1, attribute: 4, attribute_index: 21 };
-        let c = DocIndex { document_id: 2, attribute: 8, attribute_index: 2 };
-
-        let mut builder = DocIndexesBuilder::memory();
-
-        builder.insert(Set::new(&[a])?)?;
-        builder.insert(Set::new(&[a, b, c])?)?;
-        builder.insert(Set::new(&[a, c])?)?;
-
-        let builder_bytes = builder.into_inner()?;
-        let docs = DocIndexes::from_bytes(builder_bytes.clone())?;
-        let bytes = docs.to_vec();
-
-        assert_eq!(builder_bytes, bytes);
-
-        Ok(())
-    }
-}
--- a/src/data/mod.rs
+++ b/src/data/mod.rs
@ -1,65 +0,0 @@
-mod doc_ids;
-mod doc_indexes;
-
-use std::ops::Deref;
-use std::sync::Arc;
-
-use fst::raw::MmapReadOnly;
-
-pub use self::doc_ids::DocIds;
-pub use self::doc_indexes::{DocIndexes, DocIndexesBuilder};
-
-#[derive(Clone)]
-enum Data {
-    Shared {
-        bytes: Arc<Vec<u8>>,
-        offset: usize,
-        len: usize,
-    },
-    Mmap(MmapReadOnly),
-}
-
-impl Data {
-    pub fn range(&self, off: usize, l: usize) -> Data {
-        match self {
-            Data::Shared { bytes, offset, len } => {
-                assert!(off + l <= *len);
-                Data::Shared {
-                    bytes: bytes.clone(),
-                    offset: offset + off,
-                    len: l,
-                }
-            },
-            Data::Mmap(mmap) => Data::Mmap(mmap.range(off, l)),
-        }
-    }
-}
-
-impl Default for Data {
-    fn default() -> Data {
-        Data::Shared {
-            bytes: Arc::default(),
-            offset: 0,
-            len: 0,
-        }
-    }
-}
-
-impl Deref for Data {
-    type Target = [u8];
-
-    fn deref(&self) -> &Self::Target {
-        self.as_ref()
-    }
-}
-
-impl AsRef<[u8]> for Data {
-    fn as_ref(&self) -> &[u8] {
-        match self {
-            Data::Shared { bytes, offset, len } => {
-                &bytes[*offset..offset + len]
-            },
-            Data::Mmap(m) => m.as_slice(),
-        }
-    }
-}
--- a/src/database/blob/mod.rs
+++ b/src/database/blob/mod.rs
@ -1,110 +0,0 @@
-mod ops;
-pub mod positive;
-pub mod negative;
-
-pub use self::positive::{PositiveBlob, PositiveBlobBuilder};
-pub use self::negative::NegativeBlob;
-pub use self::ops::OpBuilder;
-
-use std::fmt;
-
-use serde_derive::{Serialize, Deserialize};
-use serde::ser::{Serialize, Serializer, SerializeTuple};
-use serde::de::{self, Deserialize, Deserializer, SeqAccess, Visitor};
-
-#[derive(Debug)]
-pub enum Blob {
-    Positive(PositiveBlob),
-    Negative(NegativeBlob),
-}
-
-impl Blob {
-    pub fn is_negative(&self) -> bool {
-        self.sign() == Sign::Negative
-    }
-
-    pub fn is_positive(&self) -> bool {
-        self.sign() == Sign::Positive
-    }
-
-    pub fn sign(&self) -> Sign {
-        match self {
-            Blob::Positive(_) => Sign::Positive,
-            Blob::Negative(_) => Sign::Negative,
-        }
-    }
-}
-
-impl Serialize for Blob {
-    fn serialize<S: Serializer>(&self, serializer: S) -> Result<S::Ok, S::Error> {
-        match self {
-            Blob::Positive(blob) => {
-                let mut tuple = serializer.serialize_tuple(2)?;
-                tuple.serialize_element(&Sign::Positive)?;
-                tuple.serialize_element(&blob)?;
-                tuple.end()
-            },
-            Blob::Negative(blob) => {
-                let mut tuple = serializer.serialize_tuple(2)?;
-                tuple.serialize_element(&Sign::Negative)?;
-                tuple.serialize_element(&blob)?;
-                tuple.end()
-            },
-        }
-    }
-}
-
-impl<'de> Deserialize<'de> for Blob {
-    fn deserialize<D: Deserializer<'de>>(deserializer: D) -> Result<Blob, D::Error> {
-        struct TupleVisitor;
-
-        impl<'de> Visitor<'de> for TupleVisitor {
-            type Value = Blob;
-
-            fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
-                formatter.write_str("a Blob struct")
-            }
-
-            #[inline]
-            fn visit_seq<A: SeqAccess<'de>>(self, mut seq: A) -> Result<Self::Value, A::Error> {
-                let sign = match seq.next_element()? {
-                    Some(value) => value,
-                    None => return Err(de::Error::invalid_length(0, &self)),
-                };
-                match sign {
-                    Sign::Positive => {
-                        let blob = match seq.next_element()? {
-                            Some(value) => value,
-                            None => return Err(de::Error::invalid_length(1, &self)),
-                        };
-                        Ok(Blob::Positive(blob))
-                    },
-                    Sign::Negative => {
-                        let blob = match seq.next_element()? {
-                            Some(value) => value,
-                            None => return Err(de::Error::invalid_length(1, &self)),
-                        };
-                        Ok(Blob::Negative(blob))
-                    },
-                }
-            }
-        }
-
-        deserializer.deserialize_tuple(2, TupleVisitor)
-    }
-}
-
-#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
-pub enum Sign {
-    Positive,
-    Negative,
-}
-
-impl Sign {
-    pub fn invert(self) -> Sign {
-        match self {
-            Sign::Positive => Sign::Negative,
-            Sign::Negative => Sign::Positive,
-        }
-    }
-}
--- a/src/database/blob/negative/blob.rs
+++ b/src/database/blob/negative/blob.rs
@ -1,67 +0,0 @@
-use std::error::Error;
-use std::path::Path;
-use std::fmt;
-
-use sdset::Set;
-use serde::de::{self, Deserialize, Deserializer};
-use serde::ser::{Serialize, Serializer};
-use crate::data::DocIds;
-use crate::DocumentId;
-
-#[derive(Default)]
-pub struct NegativeBlob {
-    doc_ids: DocIds,
-}
-
-impl NegativeBlob {
-    pub unsafe fn from_path<P>(doc_ids: P) -> Result<Self, Box<Error>>
-    where P: AsRef<Path>,
-    {
-        let doc_ids = DocIds::from_path(doc_ids)?;
-        Ok(NegativeBlob { doc_ids })
-    }
-
-    pub fn from_bytes(doc_ids: Vec<u8>) -> Result<Self, Box<Error>> {
-        let doc_ids = DocIds::from_bytes(doc_ids)?;
-        Ok(NegativeBlob { doc_ids })
-    }
-
-    pub fn from_raw(doc_ids: DocIds) -> Self {
-        NegativeBlob { doc_ids }
-    }
-
-    pub fn as_ids(&self) -> &DocIds {
-        &self.doc_ids
-    }
-
-    pub fn into_doc_ids(self) -> DocIds {
-        self.doc_ids
-    }
-}
-
-impl AsRef<Set<DocumentId>> for NegativeBlob {
-    fn as_ref(&self) -> &Set<DocumentId> {
-        self.as_ids().doc_ids()
-    }
-}
-
-impl fmt::Debug for NegativeBlob {
-    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
-        write!(f, "NegativeBlob(")?;
-        f.debug_list().entries(self.as_ref().as_slice()).finish()?;
-        write!(f, ")")
-    }
-}
-
-impl Serialize for NegativeBlob {
-    fn serialize<S: Serializer>(&self, serializer: S) -> Result<S::Ok, S::Error> {
-        self.doc_ids.serialize(serializer)
-    }
-}
-
-impl<'de> Deserialize<'de> for NegativeBlob {
-    fn deserialize<D: Deserializer<'de>>(deserializer: D) -> Result<NegativeBlob, D::Error> {
-        let bytes = Vec::deserialize(deserializer)?;
-        NegativeBlob::from_bytes(bytes).map_err(de::Error::custom)
-    }
-}
--- a/src/database/blob/negative/mod.rs
+++ b/src/database/blob/negative/mod.rs
@ -1,5 +0,0 @@
-mod blob;
-mod ops;
-
-pub use self::blob::NegativeBlob;
-pub use self::ops::OpBuilder;
--- a/src/database/blob/negative/ops.rs
+++ b/src/database/blob/negative/ops.rs
@ -1,73 +0,0 @@
-use sdset::multi::OpBuilder as SdOpBuilder;
-use sdset::Set;
-
-use crate::database::blob::NegativeBlob;
-use crate::data::DocIds;
-use crate::DocumentId;
-
-pub struct OpBuilder<'a> {
-    inner: SdOpBuilder<'a, DocumentId>,
-}
-
-/// Do a set operation on multiple negative blobs.
-impl<'a> OpBuilder<'a> {
-    pub fn new() -> Self {
-        Self { inner: SdOpBuilder::new() }
-    }
-
-    pub fn with_capacity(cap: usize) -> Self {
-        Self { inner: SdOpBuilder::with_capacity(cap) }
-    }
-
-    pub fn add(mut self, blob: &'a NegativeBlob) -> Self {
-        self.push(blob);
-        self
-    }
-
-    pub fn push(&mut self, blob: &'a NegativeBlob) {
-        let set = Set::new_unchecked(blob.as_ref());
-        self.inner.push(set);
-    }
-
-    pub fn union(self) -> Union<'a> {
-        Union::new(self.inner.union())
-    }
-
-    pub fn intersection(self) -> Intersection<'a> {
-        Intersection::new(self.inner.intersection())
-    }
-
-    pub fn difference(self) -> Difference<'a> {
-        Difference::new(self.inner.difference())
-    }
-
-    pub fn symmetric_difference(self) -> SymmetricDifference<'a> {
-        SymmetricDifference::new(self.inner.symmetric_difference())
-    }
-}
-
-macro_rules! logical_operation {
-    (struct $name:ident, $operation:ident) => {
-
-pub struct $name<'a> {
-    op: sdset::multi::$name<'a, DocumentId>,
-}
-
-impl<'a> $name<'a> {
-    fn new(op: sdset::multi::$name<'a, DocumentId>) -> Self {
-        $name { op }
-    }
-
-    pub fn into_negative_blob(self) -> NegativeBlob {
-        let document_ids = sdset::SetOperation::into_set_buf(self.op);
-        let doc_ids = DocIds::from_document_ids(document_ids.into_vec());
-        NegativeBlob::from_raw(doc_ids)
-    }
-}
-
-}}
-
-logical_operation!(struct Union, union);
-logical_operation!(struct Intersection, intersection);
-logical_operation!(struct Difference, difference);
-logical_operation!(struct SymmetricDifference, symmetric_difference);
--- a/src/database/blob/ops.rs
+++ b/src/database/blob/ops.rs
@ -1,109 +0,0 @@
-use std::error::Error;
-
-use fst::{IntoStreamer, Streamer};
-use sdset::duo::DifferenceByKey;
-use sdset::{Set, SetOperation};
-use group_by::GroupBy;
-
-use crate::database::blob::{Blob, Sign, PositiveBlob, PositiveBlobBuilder, NegativeBlob};
-use crate::database::blob::{positive, negative};
-
-fn blob_same_sign(a: &Blob, b: &Blob) -> bool {
-    a.sign() == b.sign()
-}
-
-fn unwrap_positive(blob: &Blob) -> &PositiveBlob {
-    match blob {
-        Blob::Positive(blob) => blob,
-        Blob::Negative(_) => panic!("called `unwrap_positive()` on a `Negative` value"),
-    }
-}
-
-fn unwrap_negative(blob: &Blob) -> &NegativeBlob {
-    match blob {
-        Blob::Negative(blob) => blob,
-        Blob::Positive(_) => panic!("called `unwrap_negative()` on a `Positive` value"),
-    }
-}
-
-pub struct OpBuilder {
-    blobs: Vec<Blob>,
-}
-
-impl OpBuilder {
-    pub fn new() -> OpBuilder {
-        OpBuilder { blobs: Vec::new() }
-    }
-
-    pub fn with_capacity(cap: usize) -> OpBuilder {
-        OpBuilder { blobs: Vec::with_capacity(cap) }
-    }
-
-    pub fn push(&mut self, blob: Blob) {
-        if self.blobs.is_empty() && blob.is_negative() { return }
-        self.blobs.push(blob);
-    }
-
-    pub fn merge(self) -> Result<PositiveBlob, Box<Error>> {
-        let groups = GroupBy::new(&self.blobs, blob_same_sign);
-        let mut aggregated = Vec::new();
-
-        for blobs in groups {
-            match blobs[0].sign() {
-                Sign::Positive => {
-                    let mut op_builder = positive::OpBuilder::with_capacity(blobs.len());
-                    for blob in blobs {
-                        op_builder.push(unwrap_positive(blob));
-                    }
-
-                    let mut stream = op_builder.union().into_stream();
-                    let mut builder = PositiveBlobBuilder::memory();
-                    while let Some((input, doc_indexes)) = stream.next() {
-                        // FIXME empty doc_indexes must be handled by OpBuilder
-                        if !doc_indexes.is_empty() {
-                            builder.insert(input, doc_indexes).unwrap();
-                        }
-                    }
-                    let (map, doc_indexes) = builder.into_inner().unwrap();
-                    let blob = PositiveBlob::from_bytes(map, doc_indexes).unwrap();
-                    aggregated.push(Blob::Positive(blob));
-                },
-                Sign::Negative => {
-                    let mut op_builder = negative::OpBuilder::with_capacity(blobs.len());
-                    for blob in blobs {
-                        op_builder.push(unwrap_negative(blob));
-                    }
-                    let blob = op_builder.union().into_negative_blob();
-                    aggregated.push(Blob::Negative(blob));
-                },
-            }
-        }
-
-        let mut buffer = Vec::new();
-        aggregated.chunks(2).try_fold(PositiveBlob::default(), |base, slice| {
-            let negative = NegativeBlob::default();
-            let (positive, negative) = match slice {
-                [a, b] => (unwrap_positive(a), unwrap_negative(b)),
-                [a] => (unwrap_positive(a), &negative),
-                _ => unreachable!(),
-            };
-
-            let mut builder = PositiveBlobBuilder::memory();
-
-            let op_builder = positive::OpBuilder::new().add(&base).add(&positive);
-            let mut stream = op_builder.union().into_stream();
-            while let Some((input, doc_indexes)) = stream.next() {
-                let op = DifferenceByKey::new(doc_indexes, negative.as_ref(), |x| x.document_id, |x| *x);
-
-                buffer.clear();
-                op.extend_vec(&mut buffer);
-                if !buffer.is_empty() {
-                    builder.insert(input, Set::new_unchecked(&buffer))?;
-                }
-            }
-
-            let (map, doc_indexes) = builder.into_inner()?;
-            PositiveBlob::from_bytes(map, doc_indexes)
-        })
-    }
-}
--- a/src/database/blob/positive/blob.rs
+++ b/src/database/blob/positive/blob.rs
@ -1,254 +0,0 @@
-use std::fmt;
-use std::io::Write;
-use std::path::Path;
-use std::error::Error;
-
-use fst::{map, Map, Streamer, IntoStreamer};
-use sdset::Set;
-
-use crate::DocIndex;
-use crate::data::{DocIndexes, DocIndexesBuilder};
-use serde::ser::{Serialize, Serializer, SerializeTuple};
-use serde::de::{self, Deserialize, Deserializer, SeqAccess, Visitor};
-
-#[derive(Default)]
-pub struct PositiveBlob {
-    map: Map,
-    indexes: DocIndexes,
-}
-
-impl PositiveBlob {
-    pub unsafe fn from_paths<P, Q>(map: P, indexes: Q) -> Result<Self, Box<Error>>
-    where P: AsRef<Path>,
-          Q: AsRef<Path>,
-    {
-        let map = Map::from_path(map)?;
-        let indexes = DocIndexes::from_path(indexes)?;
-        Ok(PositiveBlob { map, indexes })
-    }
-
-    pub fn from_bytes(map: Vec<u8>, indexes: Vec<u8>) -> Result<Self, Box<Error>> {
-        let map = Map::from_bytes(map)?;
-        let indexes = DocIndexes::from_bytes(indexes)?;
-        Ok(PositiveBlob { map, indexes })
-    }
-
-    pub fn from_raw(map: Map, indexes: DocIndexes) -> Self {
-        PositiveBlob { map, indexes }
-    }
-
-    pub fn get<K: AsRef<[u8]>>(&self, key: K) -> Option<&[DocIndex]> {
-        self.map.get(key).map(|index| &self.indexes[index as usize])
-    }
-
-    pub fn as_map(&self) -> &Map {
-        &self.map
-    }
-
-    pub fn as_indexes(&self) -> &DocIndexes {
-        &self.indexes
-    }
-
-    pub fn explode(self) -> (Map, DocIndexes) {
-        (self.map, self.indexes)
-    }
-}
-
-impl fmt::Debug for PositiveBlob {
-    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
-        write!(f, "PositiveBlob([")?;
-        let mut stream = self.into_stream();
-        let mut first = true;
-        while let Some((k, v)) = stream.next() {
-            if !first {
-                write!(f, ", ")?;
-            }
-            first = false;
-            write!(f, "({}, {:?})", String::from_utf8_lossy(k), v)?;
-        }
-        write!(f, "])")
-    }
-}
-
-impl<'m, 'a> IntoStreamer<'a> for &'m PositiveBlob {
-    type Item = (&'a [u8], &'a [DocIndex]);
-    /// The type of the stream to be constructed.
-    type Into = PositiveBlobStream<'m>;
-
-    /// Construct a stream from `Self`.
-    fn into_stream(self) -> Self::Into {
-        PositiveBlobStream {
-            map_stream: self.map.into_stream(),
-            doc_indexes: &self.indexes,
-        }
-    }
-}
-
-pub struct PositiveBlobStream<'m> {
-    map_stream: map::Stream<'m>,
-    doc_indexes: &'m DocIndexes,
-}
-
-impl<'m, 'a> Streamer<'a> for PositiveBlobStream<'m> {
-    type Item = (&'a [u8], &'a [DocIndex]);
-
-    fn next(&'a mut self) -> Option<Self::Item> {
-        match self.map_stream.next() {
-            Some((input, index)) => {
-                let doc_indexes = &self.doc_indexes[index as usize];
-                Some((input, doc_indexes))
-            },
-            None => None,
-        }
-    }
-}
-
-impl Serialize for PositiveBlob {
-    fn serialize<S: Serializer>(&self, serializer: S) -> Result<S::Ok, S::Error> {
-        let mut tuple = serializer.serialize_tuple(2)?;
-        tuple.serialize_element(&self.map.as_fst().to_vec())?;
-        tuple.serialize_element(&self.indexes.to_vec())?;
-        tuple.end()
-    }
-}
-
-impl<'de> Deserialize<'de> for PositiveBlob {
-    fn deserialize<D: Deserializer<'de>>(deserializer: D) -> Result<PositiveBlob, D::Error> {
-        struct TupleVisitor;
-
-        impl<'de> Visitor<'de> for TupleVisitor {
-            type Value = PositiveBlob;
-
-            fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
-                formatter.write_str("a PositiveBlob struct")
-            }
-
-            #[inline]
-            fn visit_seq<A: SeqAccess<'de>>(self, mut seq: A) -> Result<Self::Value, A::Error> {
-                let map = match seq.next_element()? {
-                    Some(bytes) => match Map::from_bytes(bytes) {
-                        Ok(value) => value,
-                        Err(err) => return Err(de::Error::custom(err)),
-                    },
-                    None => return Err(de::Error::invalid_length(0, &self)),
-                };
-
-                let indexes = match seq.next_element()? {
-                    Some(bytes) => match DocIndexes::from_bytes(bytes) {
-                        Ok(value) => value,
-                        Err(err) => return Err(de::Error::custom(err)),
-                    },
-                    None => return Err(de::Error::invalid_length(1, &self)),
-                };
-
-                Ok(PositiveBlob { map, indexes })
-            }
-        }
-
-        deserializer.deserialize_tuple(2, TupleVisitor)
-    }
-}
-
-pub struct PositiveBlobBuilder<W, X> {
-    map: fst::MapBuilder<W>,
-    indexes: DocIndexesBuilder<X>,
-    value: u64,
-}
-
-impl PositiveBlobBuilder<Vec<u8>, Vec<u8>> {
-    pub fn memory() -> Self {
-        PositiveBlobBuilder {
-            map: fst::MapBuilder::memory(),
-            indexes: DocIndexesBuilder::memory(),
-            value: 0,
-        }
-    }
-}
-
-impl<W: Write, X: Write> PositiveBlobBuilder<W, X> {
-    pub fn new(map: W, indexes: X) -> Result<Self, Box<Error>> {
-        Ok(PositiveBlobBuilder {
-            map: fst::MapBuilder::new(map)?,
-            indexes: DocIndexesBuilder::new(indexes),
-            value: 0,
-        })
-    }
-
-    /// If a key is inserted that is less than or equal to any previous key added,
-    /// then an error is returned. Similarly, if there was a problem writing
-    /// to the underlying writer, an error is returned.
-    // FIXME what if one write doesn't work but the other do ?
-    pub fn insert<K>(&mut self, key: K, doc_indexes: &Set<DocIndex>) -> Result<(), Box<Error>>
-    where K: AsRef<[u8]>,
-    {
-        self.map.insert(key, self.value)?;
-        self.indexes.insert(doc_indexes)?;
-        self.value += 1;
-        Ok(())
-    }
-
-    pub fn finish(self) -> Result<(), Box<Error>> {
-        self.into_inner().map(drop)
-    }
-
-    pub fn into_inner(self) -> Result<(W, X), Box<Error>> {
-        let map = self.map.into_inner()?;
-        let indexes = self.indexes.into_inner()?;
-        Ok((map, indexes))
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-    use std::error::Error;
-
-    #[test]
-    fn serialize_deserialize() -> Result<(), Box<Error>> {
-        let a = DocIndex { document_id: 0, attribute: 3, attribute_index: 11 };
-        let b = DocIndex { document_id: 1, attribute: 4, attribute_index: 21 };
-        let c = DocIndex { document_id: 2, attribute: 8, attribute_index: 2 };
-
-        let mut builder = PositiveBlobBuilder::memory();
-
-        builder.insert("aaa", Set::new(&[a])?)?;
-        builder.insert("aab", Set::new(&[a, b, c])?)?;
-        builder.insert("aac", Set::new(&[a, c])?)?;
-
-        let (map_bytes, indexes_bytes) = builder.into_inner()?;
-        let positive_blob = PositiveBlob::from_bytes(map_bytes, indexes_bytes)?;
-
-        assert_eq!(positive_blob.get("aaa"), Some(&[a][..]));
-        assert_eq!(positive_blob.get("aab"), Some(&[a, b, c][..]));
-        assert_eq!(positive_blob.get("aac"), Some(&[a, c][..]));
-        assert_eq!(positive_blob.get("aad"), None);
-
-        Ok(())
-    }
-
-    #[test]
-    fn serde_serialize_deserialize() -> Result<(), Box<Error>> {
-        let a = DocIndex { document_id: 0, attribute: 3, attribute_index: 11 };
-        let b = DocIndex { document_id: 1, attribute: 4, attribute_index: 21 };
-        let c = DocIndex { document_id: 2, attribute: 8, attribute_index: 2 };
-
-        let mut builder = PositiveBlobBuilder::memory();
-
-        builder.insert("aaa", Set::new(&[a])?)?;
-        builder.insert("aab", Set::new(&[a, b, c])?)?;
-        builder.insert("aac", Set::new(&[a, c])?)?;
-
-        let (map_bytes, indexes_bytes) = builder.into_inner()?;
-        let positive_blob = PositiveBlob::from_bytes(map_bytes, indexes_bytes)?;
-
-        let bytes = bincode::serialize(&positive_blob)?;
-        let positive_blob: PositiveBlob = bincode::deserialize(&bytes)?;
-
-        assert_eq!(positive_blob.get("aaa"), Some(&[a][..]));
-        assert_eq!(positive_blob.get("aab"), Some(&[a, b, c][..]));
-        assert_eq!(positive_blob.get("aac"), Some(&[a, c][..]));
-        assert_eq!(positive_blob.get("aad"), None);
-
-        Ok(())
-    }
-}
--- a/src/database/blob/positive/mod.rs
+++ b/src/database/blob/positive/mod.rs
@ -1,5 +0,0 @@
-mod blob;
-mod ops;
-
-pub use self::blob::{PositiveBlob, PositiveBlobBuilder};
-pub use self::ops::OpBuilder;
--- a/src/database/blob/positive/ops.rs
+++ b/src/database/blob/positive/ops.rs
@ -1,128 +0,0 @@
-use sdset::multi::OpBuilder as SdOpBuilder;
-use sdset::{SetOperation, Set};
-
-use crate::database::blob::PositiveBlob;
-use crate::data::DocIndexes;
-use crate::DocIndex;
-
-pub struct OpBuilder<'m> {
-    // the operation on the maps is always an union.
-    map_op: fst::map::OpBuilder<'m>,
-    indexes: Vec<&'m DocIndexes>,
-}
-
-/// Do a set operation on multiple positive blobs.
-impl<'m> OpBuilder<'m> {
-    pub fn new() -> Self {
-        Self {
-            map_op: fst::map::OpBuilder::new(),
-            indexes: Vec::new(),
-        }
-    }
-
-    pub fn with_capacity(cap: usize) -> Self {
-        Self {
-            map_op: fst::map::OpBuilder::new(), // TODO patch fst to add with_capacity
-            indexes: Vec::with_capacity(cap),
-        }
-    }
-
-    pub fn add(mut self, blob: &'m PositiveBlob) -> Self {
-        self.push(blob);
-        self
-    }
-
-    pub fn push(&mut self, blob: &'m PositiveBlob) {
-        self.map_op.push(blob.as_map());
-        self.indexes.push(blob.as_indexes());
-    }
-
-    pub fn union(self) -> Union<'m> {
-        Union::new(self.map_op.union(), self.indexes)
-    }
-
-    pub fn intersection(self) -> Intersection<'m> {
-        Intersection::new(self.map_op.union(), self.indexes)
-    }
-
-    pub fn difference(self) -> Difference<'m> {
-        Difference::new(self.map_op.union(), self.indexes)
-    }
-
-    pub fn symmetric_difference(self) -> SymmetricDifference<'m> {
-        SymmetricDifference::new(self.map_op.union(), self.indexes)
-    }
-}
-
-macro_rules! logical_operation {
-    (struct $name:ident, $operation:ident) => {
-
-pub struct $name<'m> {
-    stream: fst::map::Union<'m>,
-    indexes: Vec<&'m DocIndexes>,
-    outs: Vec<DocIndex>,
-}
-
-impl<'m> $name<'m> {
-    fn new(stream: fst::map::Union<'m>, indexes: Vec<&'m DocIndexes>) -> Self {
-        $name {
-            stream: stream,
-            indexes: indexes,
-            outs: Vec::new(),
-        }
-    }
-}
-
-impl<'m, 'a> fst::Streamer<'a> for $name<'m> {
-    type Item = (&'a [u8], &'a Set<DocIndex>);
-
-    fn next(&'a mut self) -> Option<Self::Item> {
-        // loop {
-        //     let (input, ivalues) = match self.stream.next() {
-        //         Some(value) => value,
-        //         None => return None,
-        //     };
-
-        //     self.outs.clear();
-
-        //     let mut builder = SdOpBuilder::with_capacity(ivalues.len());
-        //     for ivalue in ivalues {
-        //         let indexes = self.indexes[ivalue.index];
-        //         let indexes = indexes.get(ivalue.value).expect("BUG: could not find document indexes");
-        //         let set = Set::new_unchecked(indexes);
-        //         builder.push(set);
-        //     }
-
-        //     builder.$operation().extend_vec(&mut self.outs);
-
-        //     if self.outs.is_empty() { continue }
-        //     return Some((input, &self.outs))
-        // }
-
-        // FIXME make the above code compile
-        match self.stream.next() {
-            Some((input, ivalues)) => {
-                self.outs.clear();
-
-                let mut builder = SdOpBuilder::with_capacity(ivalues.len());
-                for ivalue in ivalues {
-                    let doc_indexes = &self.indexes[ivalue.index][ivalue.value as usize];
-                    let set = Set::new_unchecked(doc_indexes);
-                    builder.push(set);
-                }
-
-                builder.$operation().extend_vec(&mut self.outs);
-
-                if self.outs.is_empty() { return None }
-                return Some((input, Set::new_unchecked(&self.outs)))
-            },
-            None => None
-        }
-    }
-}
-}}
-
-logical_operation!(struct Union, union);
-logical_operation!(struct Intersection, intersection);
-logical_operation!(struct Difference, difference);
-logical_operation!(struct SymmetricDifference, symmetric_difference);
--- a/src/database/database_view.rs
+++ b/src/database/database_view.rs
@ -1,175 +0,0 @@
-use std::error::Error;
-use std::path::Path;
-use std::ops::Deref;
-use std::{fmt, marker};
-
-use rocksdb::rocksdb_options::{ReadOptions, EnvOptions, ColumnFamilyOptions};
-use rocksdb::rocksdb::{DB, DBVector, Snapshot, SeekKey, SstFileWriter};
-use serde::de::DeserializeOwned;
-
-use crate::database::{DocumentKey, DocumentKeyAttr};
-use crate::database::{retrieve_data_schema, retrieve_data_index};
-use crate::database::blob::positive::PositiveBlob;
-use crate::database::deserializer::Deserializer;
-use crate::database::schema::Schema;
-use crate::rank::QueryBuilder;
-use crate::DocumentId;
-
-pub struct DatabaseView<D>
-where D: Deref<Target=DB>
-{
-    snapshot: Snapshot<D>,
-    blob: PositiveBlob,
-    schema: Schema,
-}
-
-impl<D> DatabaseView<D>
-where D: Deref<Target=DB>
-{
-    pub fn new(snapshot: Snapshot<D>) -> Result<DatabaseView<D>, Box<Error>> {
-        let schema = retrieve_data_schema(&snapshot)?;
-        let blob = retrieve_data_index(&snapshot)?;
-        Ok(DatabaseView { snapshot, blob, schema })
-    }
-
-    pub fn schema(&self) -> &Schema {
-        &self.schema
-    }
-
-    pub fn blob(&self) -> &PositiveBlob {
-        &self.blob
-    }
-
-    pub fn into_snapshot(self) -> Snapshot<D> {
-        self.snapshot
-    }
-
-    pub fn snapshot(&self) -> &Snapshot<D> {
-        &self.snapshot
-    }
-
-    pub fn get(&self, key: &[u8]) -> Result<Option<DBVector>, Box<Error>> {
-        Ok(self.snapshot.get(key)?)
-    }
-
-    pub fn dump_all<P: AsRef<Path>>(&self, path: P) -> Result<(), Box<Error>> {
-        let path = path.as_ref().to_string_lossy();
-
-        let env_options = EnvOptions::new();
-        let column_family_options = ColumnFamilyOptions::new();
-        let mut file_writer = SstFileWriter::new(env_options, column_family_options);
-        file_writer.open(&path)?;
-
-        let mut iter = self.snapshot.iter();
-        iter.seek(SeekKey::Start);
-
-        for (key, value) in &mut iter {
-            file_writer.put(&key, &value)?;
-        }
-
-        file_writer.finish()?;
-        Ok(())
-    }
-
-    pub fn query_builder(&self) -> Result<QueryBuilder<D>, Box<Error>> {
-        QueryBuilder::new(self)
-    }
-
-    // TODO create an enum error type
-    pub fn retrieve_document<T>(&self, id: DocumentId) -> Result<T, Box<Error>>
-    where T: DeserializeOwned
-    {
-        let mut deserializer = Deserializer::new(&self.snapshot, &self.schema, id);
-        Ok(T::deserialize(&mut deserializer)?)
-    }
-
-    pub fn retrieve_documents<T, I>(&self, ids: I) -> DocumentIter<D, T, I::IntoIter>
-    where T: DeserializeOwned,
-          I: IntoIterator<Item=DocumentId>,
-    {
-        DocumentIter {
-            database_view: self,
-            document_ids: ids.into_iter(),
-            _phantom: marker::PhantomData,
-        }
-    }
-}
-
-impl<D> fmt::Debug for DatabaseView<D>
-where D: Deref<Target=DB>
-{
-    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
-        let mut options = ReadOptions::new();
-        let lower = DocumentKey::new(0);
-        options.set_iterate_lower_bound(lower.as_ref());
-
-        let mut iter = self.snapshot.iter_opt(options);
-        iter.seek(SeekKey::Start);
-        let iter = iter.map(|(key, _)| DocumentKeyAttr::from_bytes(&key));
-
-        if f.alternate() {
-            writeln!(f, "DatabaseView(")?;
-        } else {
-            write!(f, "DatabaseView(")?;
-        }
-
-        self.schema.fmt(f)?;
-
-        if f.alternate() {
-            writeln!(f, ",")?;
-        } else {
-            write!(f, ", ")?;
-        }
-
-        f.debug_list().entries(iter).finish()?;
-
-        write!(f, ")")
-    }
-}
-
-// TODO this is just an iter::Map !!!
-pub struct DocumentIter<'a, D, T, I>
-where D: Deref<Target=DB>
-{
-    database_view: &'a DatabaseView<D>,
-    document_ids: I,
-    _phantom: marker::PhantomData<T>,
-}
-
-impl<'a, D, T, I> Iterator for DocumentIter<'a, D, T, I>
-where D: Deref<Target=DB>,
-      T: DeserializeOwned,
-      I: Iterator<Item=DocumentId>,
-{
-    type Item = Result<T, Box<Error>>;
-
-    fn size_hint(&self) -> (usize, Option<usize>) {
-        self.document_ids.size_hint()
-    }
-
-    fn next(&mut self) -> Option<Self::Item> {
-        match self.document_ids.next() {
-            Some(id) => Some(self.database_view.retrieve_document(id)),
-            None => None
-        }
-    }
-}
-
-impl<'a, D, T, I> ExactSizeIterator for DocumentIter<'a, D, T, I>
-where D: Deref<Target=DB>,
-      T: DeserializeOwned,
-      I: ExactSizeIterator + Iterator<Item=DocumentId>,
-{ }
-
-impl<'a, D, T, I> DoubleEndedIterator for DocumentIter<'a, D, T, I>
-where D: Deref<Target=DB>,
-      T: DeserializeOwned,
-      I: DoubleEndedIterator + Iterator<Item=DocumentId>,
-{
-    fn next_back(&mut self) -> Option<Self::Item> {
-        match self.document_ids.next_back() {
-            Some(id) => Some(self.database_view.retrieve_document(id)),
-            None => None
-        }
-    }
-}
--- a/src/database/deserializer.rs
+++ b/src/database/deserializer.rs
@ -1,186 +0,0 @@
-use std::error::Error;
-use std::ops::Deref;
-use std::fmt;
-
-use rocksdb::rocksdb::{DB, Snapshot, SeekKey};
-use rocksdb::rocksdb_options::ReadOptions;
-use serde::forward_to_deserialize_any;
-use serde::de::value::MapDeserializer;
-use serde::de::{self, Visitor, IntoDeserializer};
-
-use crate::database::document_key::{DocumentKey, DocumentKeyAttr};
-use crate::database::schema::Schema;
-use crate::DocumentId;
-
-pub struct Deserializer<'a, D>
-where D: Deref<Target=DB>
-{
-    snapshot: &'a Snapshot<D>,
-    schema: &'a Schema,
-    document_id: DocumentId,
-}
-
-impl<'a, D> Deserializer<'a, D>
-where D: Deref<Target=DB>
-{
-    pub fn new(snapshot: &'a Snapshot<D>, schema: &'a Schema, doc: DocumentId) -> Self {
-        Deserializer { snapshot, schema, document_id: doc }
-    }
-}
-
-impl<'de, 'a, 'b, D> de::Deserializer<'de> for &'b mut Deserializer<'a, D>
-where D: Deref<Target=DB>
-{
-    type Error = DeserializerError;
-
-    fn deserialize_any<V>(self, visitor: V) -> Result<V::Value, Self::Error>
-    where V: Visitor<'de>
-    {
-        self.deserialize_map(visitor)
-    }
-
-    forward_to_deserialize_any! {
-        bool u8 u16 u32 u64 i8 i16 i32 i64 f32 f64 char str string unit seq
-        bytes byte_buf unit_struct tuple_struct
-        identifier tuple ignored_any option newtype_struct enum struct
-    }
-
-    fn deserialize_map<V>(self, visitor: V) -> Result<V::Value, Self::Error>
-    where V: Visitor<'de>
-    {
-        let mut options = ReadOptions::new();
-        let lower = DocumentKey::new(self.document_id);
-        let upper = lower.with_attribute_max();
-        options.set_iterate_lower_bound(lower.as_ref());
-        options.set_iterate_upper_bound(upper.as_ref());
-
-        let mut iter = self.snapshot.iter_opt(options);
-        iter.seek(SeekKey::Start);
-
-        if iter.kv().is_none() {
-            // FIXME return an error
-        }
-
-        let iter = iter.map(|(key, value)| {
-            // retrieve the schema attribute name
-            // from the schema attribute number
-            let document_key_attr = DocumentKeyAttr::from_bytes(&key);
-            let schema_attr = document_key_attr.attribute();
-            let attribute_name = self.schema.attribute_name(schema_attr);
-            (attribute_name, Value(value))
-        });
-
-        let map_deserializer = MapDeserializer::new(iter);
-        visitor.visit_map(map_deserializer)
-    }
-}
-
-struct Value(Vec<u8>);
-
-impl<'de> IntoDeserializer<'de, DeserializerError> for Value {
-    type Deserializer = Self;
-
-    fn into_deserializer(self) -> Self::Deserializer {
-        self
-    }
-}
-
-macro_rules! forward_to_bincode_values {
-    ($($ty:ident => $de_method:ident,)*) => {
-        $(
-            fn $de_method<V>(self, visitor: V) -> Result<V::Value, Self::Error>
-                where V: de::Visitor<'de>
-            {
-                match bincode::deserialize::<$ty>(&self.0) {
-                    Ok(val) => val.into_deserializer().$de_method(visitor),
-                    Err(e) => Err(de::Error::custom(e)),
-                }
-            }
-        )*
-    }
-}
-
-impl<'de, 'a> de::Deserializer<'de> for Value {
-    type Error = DeserializerError;
-
-    fn deserialize_any<V>(self, visitor: V) -> Result<V::Value, Self::Error>
-    where V: Visitor<'de>
-    {
-        self.0.into_deserializer().deserialize_any(visitor)
-    }
-
-    fn deserialize_str<V>(self, visitor: V) -> Result<V::Value, Self::Error>
-    where V: Visitor<'de>
-    {
-        self.deserialize_string(visitor)
-    }
-
-    fn deserialize_string<V>(self, visitor: V) -> Result<V::Value, Self::Error>
-    where V: Visitor<'de>
-    {
-        match bincode::deserialize::<String>(&self.0) {
-            Ok(val) => val.into_deserializer().deserialize_string(visitor),
-            Err(e) => Err(de::Error::custom(e)),
-        }
-    }
-
-    fn deserialize_bytes<V>(self, visitor: V) -> Result<V::Value, Self::Error>
-    where V: Visitor<'de>
-    {
-        self.deserialize_byte_buf(visitor)
-    }
-
-    fn deserialize_byte_buf<V>(self, visitor: V) -> Result<V::Value, Self::Error>
-    where V: Visitor<'de>
-    {
-        match bincode::deserialize::<Vec<u8>>(&self.0) {
-            Ok(val) => val.into_deserializer().deserialize_byte_buf(visitor),
-            Err(e) => Err(de::Error::custom(e)),
-        }
-    }
-
-    forward_to_bincode_values! {
-        char => deserialize_char,
-        bool => deserialize_bool,
-
-        u8  => deserialize_u8,
-        u16 => deserialize_u16,
-        u32 => deserialize_u32,
-        u64 => deserialize_u64,
-
-        i8  => deserialize_i8,
-        i16 => deserialize_i16,
-        i32 => deserialize_i32,
-        i64 => deserialize_i64,
-
-        f32 => deserialize_f32,
-        f64 => deserialize_f64,
-    }
-
-    forward_to_deserialize_any! {
-        unit seq map
-        unit_struct tuple_struct
-        identifier tuple ignored_any option newtype_struct enum struct
-    }
-}
-
-#[derive(Debug)]
-pub enum DeserializerError {
-    Custom(String),
-}
-
-impl de::Error for DeserializerError {
-    fn custom<T: fmt::Display>(msg: T) -> Self {
-        DeserializerError::Custom(msg.to_string())
-    }
-}
-
-impl fmt::Display for DeserializerError {
-    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
-        match self {
-            DeserializerError::Custom(s) => f.write_str(&s),
-        }
-    }
-}
-
-impl Error for DeserializerError {}
--- a/src/database/document_key.rs
+++ b/src/database/document_key.rs
@ -1,118 +0,0 @@
-use std::io::{Cursor, Read, Write};
-use std::mem::size_of;
-use std::fmt;
-
-use byteorder::{NativeEndian, WriteBytesExt, ReadBytesExt};
-
-use crate::database::schema::SchemaAttr;
-use crate::DocumentId;
-
-const DOC_KEY_LEN:      usize = 4 + size_of::<u64>();
-const DOC_KEY_ATTR_LEN: usize = DOC_KEY_LEN + 1 + size_of::<u32>();
-
-#[derive(Copy, Clone)]
-pub struct DocumentKey([u8; DOC_KEY_LEN]);
-
-impl DocumentKey {
-    pub fn new(id: DocumentId) -> DocumentKey {
-        let mut buffer = [0; DOC_KEY_LEN];
-
-        let mut wtr = Cursor::new(&mut buffer[..]);
-        wtr.write_all(b"doc-").unwrap();
-        wtr.write_u64::<NativeEndian>(id).unwrap();
-
-        DocumentKey(buffer)
-    }
-
-    pub fn from_bytes(mut bytes: &[u8]) -> DocumentKey {
-        assert!(bytes.len() >= DOC_KEY_LEN);
-        assert_eq!(&bytes[..4], b"doc-");
-
-        let mut buffer = [0; DOC_KEY_LEN];
-        bytes.read_exact(&mut buffer).unwrap();
-
-        DocumentKey(buffer)
-    }
-
-    pub fn with_attribute(&self, attr: SchemaAttr) -> DocumentKeyAttr {
-        DocumentKeyAttr::new(self.document_id(), attr)
-    }
-
-    pub fn with_attribute_max(&self) -> DocumentKeyAttr {
-        DocumentKeyAttr::new(self.document_id(), SchemaAttr::max())
-    }
-
-    pub fn document_id(&self) -> DocumentId {
-        (&self.0[4..]).read_u64::<NativeEndian>().unwrap()
-    }
-}
-
-impl AsRef<[u8]> for DocumentKey {
-    fn as_ref(&self) -> &[u8] {
-        &self.0
-    }
-}
-
-impl fmt::Debug for DocumentKey {
-    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
-        f.debug_struct("DocumentKey")
-            .field("document_id", &self.document_id())
-            .finish()
-    }
-}
-
-#[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord)]
-pub struct DocumentKeyAttr([u8; DOC_KEY_ATTR_LEN]);
-
-impl DocumentKeyAttr {
-    pub fn new(id: DocumentId, attr: SchemaAttr) -> DocumentKeyAttr {
-        let mut buffer = [0; DOC_KEY_ATTR_LEN];
-        let DocumentKey(raw_key) = DocumentKey::new(id);
-
-        let mut wtr = Cursor::new(&mut buffer[..]);
-        wtr.write_all(&raw_key).unwrap();
-        wtr.write_all(b"-").unwrap();
-        wtr.write_u32::<NativeEndian>(attr.as_u32()).unwrap();
-
-        DocumentKeyAttr(buffer)
-    }
-
-    pub fn from_bytes(mut bytes: &[u8]) -> DocumentKeyAttr {
-        assert!(bytes.len() >= DOC_KEY_ATTR_LEN);
-        assert_eq!(&bytes[..4], b"doc-");
-
-        let mut buffer = [0; DOC_KEY_ATTR_LEN];
-        bytes.read_exact(&mut buffer).unwrap();
-
-        DocumentKeyAttr(buffer)
-    }
-
-    pub fn document_id(&self) -> DocumentId {
-        (&self.0[4..]).read_u64::<NativeEndian>().unwrap()
-    }
-
-    pub fn attribute(&self) -> SchemaAttr {
-        let offset = 4 + size_of::<u64>() + 1;
-        let value = (&self.0[offset..]).read_u32::<NativeEndian>().unwrap();
-        SchemaAttr::new(value)
-    }
-
-    pub fn into_document_key(self) -> DocumentKey {
-        DocumentKey::new(self.document_id())
-    }
-}
-
-impl AsRef<[u8]> for DocumentKeyAttr {
-    fn as_ref(&self) -> &[u8] {
-        &self.0
-    }
-}
-
-impl fmt::Debug for DocumentKeyAttr {
-    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
-        f.debug_struct("DocumentKeyAttr")
-            .field("document_id", &self.document_id())
-            .field("attribute", &self.attribute().as_u32())
-            .finish()
-    }
-}
--- a/src/database/mod.rs
+++ b/src/database/mod.rs
@ -1,259 +0,0 @@
-use std::sync::{Arc, Mutex, RwLock, RwLockReadGuard};
-use std::error::Error;
-use std::path::Path;
-use std::ops::Deref;
-
-use rocksdb::rocksdb_options::{DBOptions, IngestExternalFileOptions, ColumnFamilyOptions};
-use rocksdb::rocksdb::{Writable, Snapshot};
-use rocksdb::{DB, DBVector, MergeOperands};
-
-pub use self::document_key::{DocumentKey, DocumentKeyAttr};
-pub use self::database_view::{DatabaseView, DocumentIter};
-use self::blob::positive::PositiveBlob;
-use self::update::Update;
-use self::schema::Schema;
-use self::blob::Blob;
-
-pub mod blob;
-pub mod schema;
-pub mod update;
-mod document_key;
-mod database_view;
-mod deserializer;
-
-const DATA_INDEX:  &[u8] = b"data-index";
-const DATA_SCHEMA: &[u8] = b"data-schema";
-
-pub fn retrieve_data_schema<D>(snapshot: &Snapshot<D>) -> Result<Schema, Box<Error>>
-where D: Deref<Target=DB>
-{
-    match snapshot.get(DATA_SCHEMA)? {
-        Some(vector) => Ok(Schema::read_from(&*vector)?),
-        None => Err(String::from("BUG: no schema found in the database").into()),
-    }
-}
-
-pub fn retrieve_data_index<D>(snapshot: &Snapshot<D>) -> Result<PositiveBlob, Box<Error>>
-where D: Deref<Target=DB>
-{
-    match snapshot.get(DATA_INDEX)? {
-        Some(vector) => Ok(bincode::deserialize(&*vector)?),
-        None => Ok(PositiveBlob::default()),
-    }
-}
-
-pub struct Database {
-    // DB is under a Mutex to sync update ingestions and separate DB update locking
-    // and DatabaseView acquiring locking in other words:
-    // "Block readers the minimum possible amount of time"
-    db: Mutex<Arc<DB>>,
-
-    // This view is updated each time the DB ingests an update
-    view: RwLock<DatabaseView<Arc<DB>>>,
-}
-
-impl Database {
-    pub fn create<P: AsRef<Path>>(path: P, schema: Schema) -> Result<Database, Box<Error>> {
-        let path = path.as_ref();
-        if path.exists() {
-            return Err(format!("File already exists at path: {}, cannot create database.",
-                                path.display()).into())
-        }
-
-        let path = path.to_string_lossy();
-        let mut opts = DBOptions::new();
-        opts.create_if_missing(true);
-        // opts.error_if_exists(true); // FIXME pull request that
-
-        let mut cf_opts = ColumnFamilyOptions::new();
-        cf_opts.add_merge_operator("data-index merge operator", merge_indexes);
-
-        let db = DB::open_cf(opts, &path, vec![("default", cf_opts)])?;
-
-        let mut schema_bytes = Vec::new();
-        schema.write_to(&mut schema_bytes)?;
-        db.put(DATA_SCHEMA, &schema_bytes)?;
-
-        let db = Arc::new(db);
-        let snapshot = Snapshot::new(db.clone());
-        let view = RwLock::new(DatabaseView::new(snapshot)?);
-
-        Ok(Database { db: Mutex::new(db), view })
-    }
-
-    pub fn open<P: AsRef<Path>>(path: P) -> Result<Database, Box<Error>> {
-        let path = path.as_ref().to_string_lossy();
-
-        let mut opts = DBOptions::new();
-        opts.create_if_missing(false);
-
-        let mut cf_opts = ColumnFamilyOptions::new();
-        cf_opts.add_merge_operator("data-index merge operator", merge_indexes);
-
-        let db = DB::open_cf(opts, &path, vec![("default", cf_opts)])?;
-
-        // FIXME create a generic function to do that !
-        let _schema = match db.get(DATA_SCHEMA)? {
-            Some(value) => Schema::read_from(&*value)?,
-            None => return Err(String::from("Database does not contain a schema").into()),
-        };
-
-        let db = Arc::new(db);
-        let snapshot = Snapshot::new(db.clone());
-        let view = RwLock::new(DatabaseView::new(snapshot)?);
-
-        Ok(Database { db: Mutex::new(db), view })
-    }
-
-    pub fn ingest_update_file(&self, update: Update) -> Result<(), Box<Error>> {
-        let snapshot = {
-            // We must have a mutex here to ensure that update ingestions and compactions
-            // are done atomatically and in the right order.
-            // This way update ingestions will block other update ingestions without blocking view
-            // creations while doing the "data-index" compaction
-            let db = match self.db.lock() {
-                Ok(db) => db,
-                Err(e) => return Err(e.to_string().into()),
-            };
-
-            let move_update = update.can_be_moved();
-            let path = update.into_path_buf();
-            let path = path.to_string_lossy();
-
-            let mut options = IngestExternalFileOptions::new();
-            options.move_files(move_update);
-
-            let cf_handle = db.cf_handle("default").expect("\"default\" column family not found");
-            db.ingest_external_file_optimized(&cf_handle, &options, &[&path])?;
-
-            // Compacting to trigger the merge operator only one time
-            // while ingesting the update and not each time searching
-            db.compact_range(Some(DATA_INDEX), Some(DATA_INDEX));
-
-            Snapshot::new(db.clone())
-        };
-
-        // Here we will block the view creation for the minimum amount of time:
-        // updating the DatabaseView itself with the new database snapshot
-        let view = DatabaseView::new(snapshot)?;
-        match self.view.write() {
-            Ok(mut lock) => *lock = view,
-            Err(e) => return Err(e.to_string().into()),
-        }
-
-        Ok(())
-    }
-
-    pub fn get(&self, key: &[u8]) -> Result<Option<DBVector>, Box<Error>> {
-        self.view().get(key)
-    }
-
-    pub fn flush(&self) -> Result<(), Box<Error>> {
-        match self.db.lock() {
-            Ok(db) => Ok(db.flush(true)?),
-            Err(e) => Err(e.to_string().into()),
-        }
-    }
-
-    pub fn view(&self) -> RwLockReadGuard<DatabaseView<Arc<DB>>> {
-        self.view.read().unwrap()
-    }
-}
-
-fn merge_indexes(key: &[u8], existing_value: Option<&[u8]>, operands: &mut MergeOperands) -> Vec<u8> {
-    if key != DATA_INDEX {
-        panic!("The merge operator only supports \"data-index\" merging")
-    }
-
-    let capacity = {
-        let remaining = operands.size_hint().0;
-        let already_exist = usize::from(existing_value.is_some());
-        remaining + already_exist
-    };
-
-    let mut op = blob::OpBuilder::with_capacity(capacity);
-    if let Some(existing_value) = existing_value {
-        let blob = bincode::deserialize(existing_value).expect("BUG: could not deserialize data-index");
-        op.push(Blob::Positive(blob));
-    }
-
-    for bytes in operands {
-        let blob = bincode::deserialize(bytes).expect("BUG: could not deserialize blob");
-        op.push(blob);
-    }
-
-    let blob = op.merge().expect("BUG: could not merge blobs");
-    bincode::serialize(&blob).expect("BUG: could not serialize merged blob")
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-    use std::error::Error;
-
-    use serde_derive::{Serialize, Deserialize};
-    use tempfile::tempdir;
-
-    use crate::tokenizer::DefaultBuilder;
-    use crate::database::update::PositiveUpdateBuilder;
-    use crate::database::schema::{SchemaBuilder, STORED, INDEXED};
-
-    #[test]
-    fn ingest_update_file() -> Result<(), Box<Error>> {
-        let dir = tempdir()?;
-
-        let rocksdb_path = dir.path().join("rocksdb.rdb");
-
-        #[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)]
-        struct SimpleDoc {
-            title: String,
-            description: String,
-            timestamp: u64,
-        }
-
-        let schema = {
-            let mut builder = SchemaBuilder::new();
-            builder.new_attribute("title", STORED | INDEXED);
-            builder.new_attribute("description", STORED | INDEXED);
-            builder.new_attribute("timestamp", STORED);
-            builder.build()
-        };
-
-        let database = Database::create(&rocksdb_path, schema.clone())?;
-        let tokenizer_builder = DefaultBuilder::new();
-
-        let update_path = dir.path().join("update.sst");
-
-        let doc0 = SimpleDoc {
-            title: String::from("I am a title"),
-            description: String::from("I am a description"),
-            timestamp: 1234567,
-        };
-        let doc1 = SimpleDoc {
-            title: String::from("I am the second title"),
-            description: String::from("I am the second description"),
-            timestamp: 7654321,
-        };
-
-        let mut update = {
-            let mut builder = PositiveUpdateBuilder::new(update_path, schema, tokenizer_builder);
-
-            builder.update(0, &doc0).unwrap();
-            builder.update(1, &doc1).unwrap();
-
-            builder.build()?
-        };
-
-        update.set_move(true);
-        database.ingest_update_file(update)?;
-        let view = database.view();
-
-        let de_doc0: SimpleDoc = view.retrieve_document(0)?;
-        let de_doc1: SimpleDoc = view.retrieve_document(1)?;
-
-        assert_eq!(doc0, de_doc0);
-        assert_eq!(doc1, de_doc1);
-
-        Ok(dir.close()?)
-    }
-}
--- a/src/database/schema.rs
+++ b/src/database/schema.rs
@ -1,172 +0,0 @@
-use std::collections::{HashMap, BTreeMap};
-use std::io::{Read, Write};
-use std::{fmt, u32};
-use std::path::Path;
-use std::ops::BitOr;
-use std::sync::Arc;
-use std::fs::File;
-
-use serde_derive::{Serialize, Deserialize};
-use linked_hash_map::LinkedHashMap;
-
-pub const STORED: SchemaProps = SchemaProps { stored: true, indexed: false };
-pub const INDEXED: SchemaProps = SchemaProps { stored: false, indexed: true };
-
-#[derive(Debug, Copy, Clone, PartialEq, Eq, Serialize, Deserialize)]
-pub struct SchemaProps {
-    stored: bool,
-    indexed: bool,
-}
-
-impl SchemaProps {
-    pub fn is_stored(&self) -> bool {
-        self.stored
-    }
-
-    pub fn is_indexed(&self) -> bool {
-        self.indexed
-    }
-}
-
-impl BitOr for SchemaProps {
-    type Output = Self;
-
-    fn bitor(self, other: Self) -> Self::Output {
-        SchemaProps {
-            stored: self.stored | other.stored,
-            indexed: self.indexed | other.indexed,
-        }
-    }
-}
-
-pub struct SchemaBuilder {
-    attrs: LinkedHashMap<String, SchemaProps>,
-}
-
-impl SchemaBuilder {
-    pub fn new() -> SchemaBuilder {
-        SchemaBuilder { attrs: LinkedHashMap::new() }
-    }
-
-    pub fn new_attribute<S: Into<String>>(&mut self, name: S, props: SchemaProps) -> SchemaAttr {
-        let len = self.attrs.len();
-        if self.attrs.insert(name.into(), props).is_some() {
-            panic!("Field already inserted.")
-        }
-        SchemaAttr(len as u32)
-    }
-
-    pub fn build(self) -> Schema {
-        let mut attrs = HashMap::new();
-        let mut props = Vec::new();
-
-        for (i, (name, prop)) in self.attrs.into_iter().enumerate() {
-            attrs.insert(name.clone(), SchemaAttr(i as u32));
-            props.push((name, prop));
-        }
-
-        Schema { inner: Arc::new(InnerSchema { attrs, props }) }
-    }
-}
-
-#[derive(Debug, Clone, PartialEq, Eq)]
-pub struct Schema {
-    inner: Arc<InnerSchema>,
-}
-
-#[derive(Debug, Clone, PartialEq, Eq)]
-struct InnerSchema {
-    attrs: HashMap<String, SchemaAttr>,
-    props: Vec<(String, SchemaProps)>,
-}
-
-impl Schema {
-    pub fn open<P: AsRef<Path>>(path: P) -> bincode::Result<Schema> {
-        let file = File::open(path)?;
-        Schema::read_from(file)
-    }
-
-    pub fn read_from<R: Read>(reader: R) -> bincode::Result<Schema> {
-        let attrs = bincode::deserialize_from(reader)?;
-        let builder = SchemaBuilder { attrs };
-        Ok(builder.build())
-    }
-
-    pub fn write_to<W: Write>(&self, writer: W) -> bincode::Result<()> {
-        let mut ordered = BTreeMap::new();
-        for (name, field) in &self.inner.attrs {
-            let index = field.as_u32();
-            let (_, props) = self.inner.props[index as usize];
-            ordered.insert(index, (name, props));
-        }
-
-        let mut attrs = LinkedHashMap::with_capacity(ordered.len());
-        for (_, (name, props)) in ordered {
-            attrs.insert(name, props);
-        }
-
-        bincode::serialize_into(writer, &attrs)
-    }
-
-    pub fn props(&self, attr: SchemaAttr) -> SchemaProps {
-        let index = attr.as_u32();
-        let (_, props) = self.inner.props[index as usize];
-        props
-    }
-
-    pub fn attribute<S: AsRef<str>>(&self, name: S) -> Option<SchemaAttr> {
-        self.inner.attrs.get(name.as_ref()).cloned()
-    }
-
-    pub fn attribute_name(&self, attr: SchemaAttr) -> &str {
-        let index = attr.as_u32();
-        let (name, _) = &self.inner.props[index as usize];
-        name
-    }
-}
-
-#[derive(Debug, Copy, Clone, PartialOrd, Ord, PartialEq, Eq)]
-pub struct SchemaAttr(u32);
-
-impl SchemaAttr {
-    pub fn new(value: u32) -> SchemaAttr {
-        SchemaAttr(value)
-    }
-
-    pub fn max() -> SchemaAttr {
-        SchemaAttr(u32::MAX)
-    }
-
-    pub fn as_u32(&self) -> u32 {
-        self.0
-    }
-}
-
-impl fmt::Display for SchemaAttr {
-    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
-        self.0.fmt(f)
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    #[test]
-    fn serialize_deserialize() -> bincode::Result<()> {
-        let mut builder = SchemaBuilder::new();
-        builder.new_attribute("alphabet", STORED);
-        builder.new_attribute("beta", STORED | INDEXED);
-        builder.new_attribute("gamma", INDEXED);
-        let schema = builder.build();
-
-        let mut buffer = Vec::new();
-
-        schema.write_to(&mut buffer)?;
-        let schema2 = Schema::read_from(buffer.as_slice())?;
-
-        assert_eq!(schema, schema2);
-
-        Ok(())
-    }
-}
--- a/src/database/update/mod.rs
+++ b/src/database/update/mod.rs
@ -1,35 +0,0 @@
-use std::path::PathBuf;
-use std::error::Error;
-
-mod negative;
-mod positive;
-
-pub use self::positive::{PositiveUpdateBuilder, NewState};
-pub use self::negative::NegativeUpdateBuilder;
-
-pub struct Update {
-    path: PathBuf,
-    can_be_moved: bool,
-}
-
-impl Update {
-    pub fn open<P: Into<PathBuf>>(path: P) -> Result<Update, Box<Error>> {
-        Ok(Update { path: path.into(), can_be_moved: false })
-    }
-
-    pub fn open_and_move<P: Into<PathBuf>>(path: P) -> Result<Update, Box<Error>> {
-        Ok(Update { path: path.into(), can_be_moved: true })
-    }
-
-    pub fn set_move(&mut self, can_be_moved: bool) {
-        self.can_be_moved = can_be_moved
-    }
-
-    pub fn can_be_moved(&self) -> bool {
-        self.can_be_moved
-    }
-
-    pub fn into_path_buf(self) -> PathBuf {
-        self.path
-    }
-}
--- a/src/database/update/negative/mod.rs
+++ b/src/database/update/negative/mod.rs
@ -1,4 +0,0 @@
-mod update;
-mod unordered_builder;
-
-pub use self::update::NegativeUpdateBuilder;
--- a/src/database/update/negative/unordered_builder.rs
+++ b/src/database/update/negative/unordered_builder.rs
@ -1,37 +0,0 @@
-use std::collections::BTreeSet;
-use std::io;
-
-use byteorder::{NativeEndian, WriteBytesExt};
-
-use crate::DocumentId;
-
-pub struct UnorderedNegativeBlobBuilder<W> {
-    doc_ids: BTreeSet<DocumentId>, // TODO: prefer a linked-list
-    wrt: W,
-}
-
-impl UnorderedNegativeBlobBuilder<Vec<u8>> {
-    pub fn memory() -> Self {
-        UnorderedNegativeBlobBuilder::new(Vec::new())
-    }
-}
-
-impl<W: io::Write> UnorderedNegativeBlobBuilder<W> {
-    pub fn new(wrt: W) -> Self {
-        Self {
-            doc_ids: BTreeSet::new(),
-            wrt: wrt,
-        }
-    }
-
-    pub fn insert(&mut self, doc: DocumentId) -> bool {
-        self.doc_ids.insert(doc)
-    }
-
-    pub fn into_inner(mut self) -> io::Result<W> {
-        for id in self.doc_ids {
-            self.wrt.write_u64::<NativeEndian>(id)?;
-        }
-        Ok(self.wrt)
-    }
-}
--- a/src/database/update/negative/update.rs
+++ b/src/database/update/negative/update.rs
@ -1,60 +0,0 @@
-use std::path::PathBuf;
-use std::error::Error;
-
-use ::rocksdb::rocksdb_options;
-
-use crate::database::update::negative::unordered_builder::UnorderedNegativeBlobBuilder;
-use crate::database::blob::{Blob, NegativeBlob};
-use crate::database::update::Update;
-use crate::database::DocumentKey;
-use crate::database::DATA_INDEX;
-use crate::DocumentId;
-
-pub struct NegativeUpdateBuilder {
-    path: PathBuf,
-    doc_ids: UnorderedNegativeBlobBuilder<Vec<u8>>,
-}
-
-impl NegativeUpdateBuilder {
-    pub fn new<P: Into<PathBuf>>(path: P) -> NegativeUpdateBuilder {
-        NegativeUpdateBuilder {
-            path: path.into(),
-            doc_ids: UnorderedNegativeBlobBuilder::memory(),
-        }
-    }
-
-    pub fn remove(&mut self, id: DocumentId) -> bool {
-        self.doc_ids.insert(id)
-    }
-
-    pub fn build(self) -> Result<Update, Box<Error>> {
-        let env_options = rocksdb_options::EnvOptions::new();
-        let column_family_options = rocksdb_options::ColumnFamilyOptions::new();
-        let mut file_writer = rocksdb::SstFileWriter::new(env_options, column_family_options);
-        file_writer.open(&self.path.to_string_lossy())?;
-
-        let bytes = self.doc_ids.into_inner()?;
-        let negative_blob = NegativeBlob::from_bytes(bytes)?;
-        let blob = Blob::Negative(negative_blob);
-
-        // write the data-index aka negative blob
-        let bytes = bincode::serialize(&blob)?;
-        file_writer.merge(DATA_INDEX, &bytes)?;
-
-        // FIXME remove this ugly thing !
-        // let Blob::Negative(negative_blob) = blob;
-        let negative_blob = match blob {
-            Blob::Negative(blob) => blob,
-            Blob::Positive(_) => unreachable!(),
-        };
-
-        for &document_id in negative_blob.as_ref().as_slice() {
-            let start = DocumentKey::new(document_id);
-            let end = start.with_attribute_max();
-            file_writer.delete_range(start.as_ref(), end.as_ref())?;
-        }
-
-        file_writer.finish()?;
-        Update::open(self.path)
-    }
-}
--- a/src/database/update/positive/mod.rs
+++ b/src/database/update/positive/mod.rs
@ -1,4 +0,0 @@
-mod update;
-mod unordered_builder;
-
-pub use self::update::{PositiveUpdateBuilder, NewState};
--- a/src/database/update/positive/unordered_builder.rs
+++ b/src/database/update/positive/unordered_builder.rs
@ -1,49 +0,0 @@
-#![allow(unused)]
-
-use std::collections::BTreeMap;
-use std::error::Error;
-use std::io::Write;
-
-use sdset::Set;
-
-use crate::database::blob::positive::PositiveBlobBuilder;
-use crate::DocIndex;
-
-pub struct UnorderedPositiveBlobBuilder<W, X> {
-    builder: PositiveBlobBuilder<W, X>,
-    map: BTreeMap<Vec<u8>, Vec<DocIndex>>,
-}
-
-impl UnorderedPositiveBlobBuilder<Vec<u8>, Vec<u8>> {
-    pub fn memory() -> Self {
-        Self {
-            builder: PositiveBlobBuilder::memory(),
-            map: BTreeMap::new(),
-        }
-    }
-}
-
-impl<W: Write, X: Write> UnorderedPositiveBlobBuilder<W, X> {
-    pub fn new(map_wtr: W, doc_wtr: X) -> Result<Self, Box<Error>> {
-        Ok(UnorderedPositiveBlobBuilder {
-            builder: PositiveBlobBuilder::new(map_wtr, doc_wtr)?,
-            map: BTreeMap::new(),
-        })
-    }
-
-    pub fn insert<K: Into<Vec<u8>>>(&mut self, input: K, doc_index: DocIndex) {
-        self.map.entry(input.into()).or_insert_with(Vec::new).push(doc_index);
-    }
-
-    pub fn finish(self) -> Result<(), Box<Error>> {
-        self.into_inner().map(drop)
-    }
-
-    pub fn into_inner(mut self) -> Result<(W, X), Box<Error>> {
-        for (key, mut doc_indexes) in self.map {
-            doc_indexes.sort_unstable();
-            self.builder.insert(&key, Set::new_unchecked(&doc_indexes))?;
-        }
-        self.builder.into_inner()
-    }
-}
--- a/src/database/update/positive/update.rs
+++ b/src/database/update/positive/update.rs
@ -1,514 +0,0 @@
-use std::collections::BTreeMap;
-use std::path::PathBuf;
-use std::error::Error;
-use std::fmt;
-
-use ::rocksdb::rocksdb_options;
-use serde::ser::{self, Serialize};
-
-use crate::database::update::positive::unordered_builder::UnorderedPositiveBlobBuilder;
-use crate::database::blob::positive::PositiveBlob;
-use crate::database::schema::{Schema, SchemaAttr};
-use crate::tokenizer::TokenizerBuilder;
-use crate::database::DocumentKeyAttr;
-use crate::database::update::Update;
-use crate::{DocumentId, DocIndex};
-use crate::database::DATA_INDEX;
-use crate::database::blob::Blob;
-
-pub enum NewState {
-    Updated { value: Vec<u8> },
-    Removed,
-}
-
-pub struct PositiveUpdateBuilder<B> {
-    path: PathBuf,
-    schema: Schema,
-    tokenizer_builder: B,
-    builder: UnorderedPositiveBlobBuilder<Vec<u8>, Vec<u8>>,
-    new_states: BTreeMap<DocumentKeyAttr, NewState>,
-}
-
-impl<B> PositiveUpdateBuilder<B> {
-    pub fn new<P: Into<PathBuf>>(path: P, schema: Schema, tokenizer_builder: B) -> PositiveUpdateBuilder<B> {
-        PositiveUpdateBuilder {
-            path: path.into(),
-            schema: schema,
-            tokenizer_builder: tokenizer_builder,
-            builder: UnorderedPositiveBlobBuilder::memory(),
-            new_states: BTreeMap::new(),
-        }
-    }
-
-    pub fn update<T: Serialize>(&mut self, id: DocumentId, document: &T) -> Result<(), Box<Error>>
-    where B: TokenizerBuilder
-    {
-        let serializer = Serializer {
-            schema: &self.schema,
-            document_id: id,
-            tokenizer_builder: &self.tokenizer_builder,
-            builder: &mut self.builder,
-            new_states: &mut self.new_states
-        };
-
-        Ok(ser::Serialize::serialize(document, serializer)?)
-    }
-
-    // TODO value must be a field that can be indexed
-    pub fn update_field(&mut self, id: DocumentId, attr: SchemaAttr, value: String) {
-        let value = bincode::serialize(&value).unwrap();
-        self.new_states.insert(DocumentKeyAttr::new(id, attr), NewState::Updated { value });
-    }
-
-    pub fn remove_field(&mut self, id: DocumentId, attr: SchemaAttr) {
-        self.new_states.insert(DocumentKeyAttr::new(id, attr), NewState::Removed);
-    }
-}
-
-#[derive(Debug)]
-pub enum SerializerError {
-    SchemaDontMatch { attribute: String },
-    UnserializableType { name: &'static str },
-    Custom(String),
-}
-
-impl ser::Error for SerializerError {
-    fn custom<T: fmt::Display>(msg: T) -> Self {
-        SerializerError::Custom(msg.to_string())
-    }
-}
-
-impl fmt::Display for SerializerError {
-    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
-        match self {
-            SerializerError::SchemaDontMatch { attribute } => {
-                write!(f, "serialized document try to specify the \
-                           {:?} attribute that is not known by the schema", attribute)
-            },
-            SerializerError::UnserializableType { name } => {
-                write!(f, "Only struct and map types are considered valid documents and
-                           can be serialized, not {} types directly.", name)
-            },
-            SerializerError::Custom(s) => f.write_str(&s),
-        }
-    }
-}
-
-impl Error for SerializerError {}
-
-struct Serializer<'a, B> {
-    schema: &'a Schema,
-    tokenizer_builder: &'a B,
-    document_id: DocumentId,
-    builder: &'a mut UnorderedPositiveBlobBuilder<Vec<u8>, Vec<u8>>,
-    new_states: &'a mut BTreeMap<DocumentKeyAttr, NewState>,
-}
-
-macro_rules! forward_to_unserializable_type {
-    ($($ty:ident => $se_method:ident,)*) => {
-        $(
-            fn $se_method(self, _v: $ty) -> Result<Self::Ok, Self::Error> {
-                Err(SerializerError::UnserializableType { name: "$ty" })
-            }
-        )*
-    }
-}
-
-impl<'a, B> ser::Serializer for Serializer<'a, B>
-where B: TokenizerBuilder
-{
-    type Ok = ();
-    type Error = SerializerError;
-    type SerializeSeq = ser::Impossible<Self::Ok, Self::Error>;
-    type SerializeTuple = ser::Impossible<Self::Ok, Self::Error>;
-    type SerializeTupleStruct = ser::Impossible<Self::Ok, Self::Error>;
-    type SerializeTupleVariant = ser::Impossible<Self::Ok, Self::Error>;
-    type SerializeMap = ser::Impossible<Self::Ok, Self::Error>;
-    type SerializeStruct = StructSerializer<'a, B>;
-    type SerializeStructVariant = ser::Impossible<Self::Ok, Self::Error>;
-
-    forward_to_unserializable_type! {
-        bool => serialize_bool,
-        char => serialize_char,
-
-        i8  => serialize_i8,
-        i16 => serialize_i16,
-        i32 => serialize_i32,
-        i64 => serialize_i64,
-
-        u8  => serialize_u8,
-        u16 => serialize_u16,
-        u32 => serialize_u32,
-        u64 => serialize_u64,
-
-        f32 => serialize_f32,
-        f64 => serialize_f64,
-    }
-
-    fn serialize_str(self, _v: &str) -> Result<Self::Ok, Self::Error> {
-        Err(SerializerError::UnserializableType { name: "str" })
-    }
-
-    fn serialize_bytes(self, _v: &[u8]) -> Result<Self::Ok, Self::Error> {
-        Err(SerializerError::UnserializableType { name: "&[u8]" })
-    }
-
-    fn serialize_none(self) -> Result<Self::Ok, Self::Error> {
-        Err(SerializerError::UnserializableType { name: "Option" })
-    }
-
-    fn serialize_some<T: ?Sized>(self, _value: &T) -> Result<Self::Ok, Self::Error>
-    where T: Serialize,
-    {
-        Err(SerializerError::UnserializableType { name: "Option" })
-    }
-
-    fn serialize_unit(self) -> Result<Self::Ok, Self::Error> {
-        Err(SerializerError::UnserializableType { name: "()" })
-    }
-
-    fn serialize_unit_struct(self, _name: &'static str) -> Result<Self::Ok, Self::Error> {
-        Err(SerializerError::UnserializableType { name: "unit struct" })
-    }
-
-    fn serialize_unit_variant(
-        self,
-        _name: &'static str,
-        _variant_index: u32,
-        _variant: &'static str
-    ) -> Result<Self::Ok, Self::Error>
-    {
-        Err(SerializerError::UnserializableType { name: "unit variant" })
-    }
-
-    fn serialize_newtype_struct<T: ?Sized>(
-        self,
-        _name: &'static str,
-        value: &T
-    ) -> Result<Self::Ok, Self::Error>
-    where T: Serialize,
-    {
-        value.serialize(self)
-    }
-
-    fn serialize_newtype_variant<T: ?Sized>(
-        self,
-        _name: &'static str,
-        _variant_index: u32,
-        _variant: &'static str,
-        _value: &T
-    ) -> Result<Self::Ok, Self::Error>
-    where T: Serialize,
-    {
-        Err(SerializerError::UnserializableType { name: "newtype variant" })
-    }
-
-    fn serialize_seq(self, _len: Option<usize>) -> Result<Self::SerializeSeq, Self::Error> {
-        Err(SerializerError::UnserializableType { name: "sequence" })
-    }
-
-    fn serialize_tuple(self, _len: usize) -> Result<Self::SerializeTuple, Self::Error> {
-        Err(SerializerError::UnserializableType { name: "tuple" })
-    }
-
-    fn serialize_tuple_struct(
-        self,
-        _name: &'static str,
-        _len: usize
-    ) -> Result<Self::SerializeTupleStruct, Self::Error>
-    {
-        Err(SerializerError::UnserializableType { name: "tuple struct" })
-    }
-
-    fn serialize_tuple_variant(
-        self,
-        _name: &'static str,
-        _variant_index: u32,
-        _variant: &'static str,
-        _len: usize
-    ) -> Result<Self::SerializeTupleVariant, Self::Error>
-    {
-        Err(SerializerError::UnserializableType { name: "tuple variant" })
-    }
-
-    fn serialize_map(self, _len: Option<usize>) -> Result<Self::SerializeMap, Self::Error> {
-        // Ok(MapSerializer {
-        //     schema: self.schema,
-        //     document_id: self.document_id,
-        //     new_states: self.new_states,
-        // })
-        Err(SerializerError::UnserializableType { name: "map" })
-    }
-
-    fn serialize_struct(
-        self,
-        _name: &'static str,
-        _len: usize
-    ) -> Result<Self::SerializeStruct, Self::Error>
-    {
-        Ok(StructSerializer {
-            schema: self.schema,
-            tokenizer_builder: self.tokenizer_builder,
-            document_id: self.document_id,
-            builder: self.builder,
-            new_states: self.new_states,
-        })
-    }
-
-    fn serialize_struct_variant(
-        self,
-        _name: &'static str,
-        _variant_index: u32,
-        _variant: &'static str,
-        _len: usize
-    ) -> Result<Self::SerializeStructVariant, Self::Error>
-    {
-        Err(SerializerError::UnserializableType { name: "struct variant" })
-    }
-}
-
-struct StructSerializer<'a, B> {
-    schema: &'a Schema,
-    tokenizer_builder: &'a B,
-    document_id: DocumentId,
-    builder: &'a mut UnorderedPositiveBlobBuilder<Vec<u8>, Vec<u8>>,
-    new_states: &'a mut BTreeMap<DocumentKeyAttr, NewState>,
-}
-
-impl<'a, B> ser::SerializeStruct for StructSerializer<'a, B>
-where B: TokenizerBuilder
-{
-    type Ok = ();
-    type Error = SerializerError;
-
-    fn serialize_field<T: ?Sized>(
-        &mut self,
-        key: &'static str,
-        value: &T
-    ) -> Result<(), Self::Error>
-    where T: Serialize,
-    {
-        match self.schema.attribute(key) {
-            Some(attr) => {
-                let props = self.schema.props(attr);
-                if props.is_stored() {
-                    let value = bincode::serialize(value).unwrap();
-                    let key = DocumentKeyAttr::new(self.document_id, attr);
-                    self.new_states.insert(key, NewState::Updated { value });
-                }
-                if props.is_indexed() {
-                    let serializer = IndexerSerializer {
-                        builder: self.builder,
-                        tokenizer_builder: self.tokenizer_builder,
-                        document_id: self.document_id,
-                        attribute: attr,
-                    };
-                    value.serialize(serializer)?;
-                }
-                Ok(())
-            },
-            None => Err(SerializerError::SchemaDontMatch { attribute: key.to_owned() }),
-        }
-    }
-
-    fn end(self) -> Result<Self::Ok, Self::Error> {
-        Ok(())
-    }
-}
-
-struct IndexerSerializer<'a, B> {
-    tokenizer_builder: &'a B,
-    builder: &'a mut UnorderedPositiveBlobBuilder<Vec<u8>, Vec<u8>>,
-    document_id: DocumentId,
-    attribute: SchemaAttr,
-}
-
-impl<'a, B> ser::Serializer for IndexerSerializer<'a, B>
-where B: TokenizerBuilder
-{
-    type Ok = ();
-    type Error = SerializerError;
-    type SerializeSeq = ser::Impossible<Self::Ok, Self::Error>;
-    type SerializeTuple = ser::Impossible<Self::Ok, Self::Error>;
-    type SerializeTupleStruct = ser::Impossible<Self::Ok, Self::Error>;
-    type SerializeTupleVariant = ser::Impossible<Self::Ok, Self::Error>;
-    type SerializeMap = ser::Impossible<Self::Ok, Self::Error>;
-    type SerializeStruct = ser::Impossible<Self::Ok, Self::Error>;
-    type SerializeStructVariant = ser::Impossible<Self::Ok, Self::Error>;
-
-    forward_to_unserializable_type! {
-        bool => serialize_bool,
-        char => serialize_char,
-
-        i8  => serialize_i8,
-        i16 => serialize_i16,
-        i32 => serialize_i32,
-        i64 => serialize_i64,
-
-        u8  => serialize_u8,
-        u16 => serialize_u16,
-        u32 => serialize_u32,
-        u64 => serialize_u64,
-
-        f32 => serialize_f32,
-        f64 => serialize_f64,
-    }
-
-    fn serialize_str(self, v: &str) -> Result<Self::Ok, Self::Error> {
-        for (index, word) in self.tokenizer_builder.build(v) {
-            let doc_index = DocIndex {
-                document_id: self.document_id,
-                attribute: self.attribute.as_u32() as u8,
-                attribute_index: index as u32,
-            };
-
-            // insert the exact representation
-            let word_lower = word.to_lowercase();
-
-            // and the unidecoded lowercased version
-            let word_unidecoded = unidecode::unidecode(word).to_lowercase();
-            if word_lower != word_unidecoded {
-                self.builder.insert(word_unidecoded, doc_index);
-            }
-
-            self.builder.insert(word_lower, doc_index);
-        }
-        Ok(())
-    }
-
-    fn serialize_bytes(self, _v: &[u8]) -> Result<Self::Ok, Self::Error> {
-        Err(SerializerError::UnserializableType { name: "&[u8]" })
-    }
-
-    fn serialize_none(self) -> Result<Self::Ok, Self::Error> {
-        Err(SerializerError::UnserializableType { name: "Option" })
-    }
-
-    fn serialize_some<T: ?Sized>(self, _value: &T) -> Result<Self::Ok, Self::Error>
-    where T: Serialize,
-    {
-        Err(SerializerError::UnserializableType { name: "Option" })
-    }
-
-    fn serialize_unit(self) -> Result<Self::Ok, Self::Error> {
-        Err(SerializerError::UnserializableType { name: "()" })
-    }
-
-    fn serialize_unit_struct(self, _name: &'static str) -> Result<Self::Ok, Self::Error> {
-        Err(SerializerError::UnserializableType { name: "unit struct" })
-    }
-
-    fn serialize_unit_variant(
-        self,
-        _name: &'static str,
-        _variant_index: u32,
-        _variant: &'static str
-    ) -> Result<Self::Ok, Self::Error>
-    {
-        Err(SerializerError::UnserializableType { name: "unit variant" })
-    }
-
-    fn serialize_newtype_struct<T: ?Sized>(
-        self,
-        _name: &'static str,
-        value: &T
-    ) -> Result<Self::Ok, Self::Error>
-    where T: Serialize,
-    {
-        value.serialize(self)
-    }
-
-    fn serialize_newtype_variant<T: ?Sized>(
-        self,
-        _name: &'static str,
-        _variant_index: u32,
-        _variant: &'static str,
-        _value: &T
-    ) -> Result<Self::Ok, Self::Error>
-    where T: Serialize,
-    {
-        Err(SerializerError::UnserializableType { name: "newtype variant" })
-    }
-
-    fn serialize_seq(self, _len: Option<usize>) -> Result<Self::SerializeSeq, Self::Error> {
-        Err(SerializerError::UnserializableType { name: "seq" })
-    }
-
-    fn serialize_tuple(self, _len: usize) -> Result<Self::SerializeTuple, Self::Error> {
-        Err(SerializerError::UnserializableType { name: "tuple" })
-    }
-
-    fn serialize_tuple_struct(
-        self,
-        _name: &'static str,
-        _len: usize
-    ) -> Result<Self::SerializeTupleStruct, Self::Error>
-    {
-        Err(SerializerError::UnserializableType { name: "tuple struct" })
-    }
-
-    fn serialize_tuple_variant(
-        self,
-        _name: &'static str,
-        _variant_index: u32,
-        _variant: &'static str,
-        _len: usize
-    ) -> Result<Self::SerializeTupleVariant, Self::Error>
-    {
-        Err(SerializerError::UnserializableType { name: "tuple variant" })
-    }
-
-    fn serialize_map(self, _len: Option<usize>) -> Result<Self::SerializeMap, Self::Error> {
-        Err(SerializerError::UnserializableType { name: "map" })
-    }
-
-    fn serialize_struct(
-        self,
-        _name: &'static str,
-        _len: usize
-    ) -> Result<Self::SerializeStruct, Self::Error>
-    {
-        Err(SerializerError::UnserializableType { name: "struct" })
-    }
-
-    fn serialize_struct_variant(
-        self,
-        _name: &'static str,
-        _variant_index: u32,
-        _variant: &'static str,
-        _len: usize
-    ) -> Result<Self::SerializeStructVariant, Self::Error>
-    {
-        Err(SerializerError::UnserializableType { name: "struct variant" })
-    }
-}
-
-impl<B> PositiveUpdateBuilder<B> {
-    pub fn build(self) -> Result<Update, Box<Error>> {
-        let env_options = rocksdb_options::EnvOptions::new();
-        let column_family_options = rocksdb_options::ColumnFamilyOptions::new();
-        let mut file_writer = rocksdb::SstFileWriter::new(env_options, column_family_options);
-        file_writer.open(&self.path.to_string_lossy())?;
-
-        let (blob_fst_map, blob_doc_idx) = self.builder.into_inner()?;
-        let positive_blob = PositiveBlob::from_bytes(blob_fst_map, blob_doc_idx)?;
-        let blob = Blob::Positive(positive_blob);
-
-        // write the data-index aka positive blob
-        let bytes = bincode::serialize(&blob)?;
-        file_writer.merge(DATA_INDEX, &bytes)?;
-
-        // write all the documents fields updates
-        for (key, state) in self.new_states {
-            match state {
-                NewState::Updated { value } => {
-                    file_writer.put(key.as_ref(), &value)?
-                },
-                NewState::Removed => file_writer.delete(key.as_ref())?,
-            }
-        }
-
-        file_writer.finish()?;
-        Update::open(self.path)
-    }
-}
--- a/src/lib.rs
+++ b/src/lib.rs
@ -1,99 +0,0 @@
-pub mod automaton;
-pub mod database;
-pub mod data;
-pub mod rank;
-pub mod tokenizer;
-pub mod vec_read_only;
-mod common_words;
-
-pub use rocksdb;
-
-pub use self::tokenizer::Tokenizer;
-pub use self::common_words::CommonWords;
-
-pub type DocumentId = u64;
-
-/// This structure represent the position of a word
-/// in a document and its attributes.
-///
-/// This is stored in the map, generated at index time,
-/// extracted and interpreted at search time.
-#[derive(Debug, Copy, Clone, Eq, PartialEq, PartialOrd, Ord, Hash)]
-#[repr(C)]
-pub struct DocIndex {
-    /// The document identifier where the word was found.
-    pub document_id: DocumentId,
-
-    /// The attribute identifier in the document
-    /// where the word was found.
-    ///
-    /// This is an `u8` therefore a document
-    /// can not have more than `2^8` attributes.
-    pub attribute: u8,
-
-    /// The index where the word was found in the attribute.
-    ///
-    /// Only the first 1000 words are indexed.
-    pub attribute_index: u32,
-}
-
-/// This structure represent a matching word with informations
-/// on the location of the word in the document.
-///
-/// The order of the field is important because it defines
-/// the way these structures are ordered between themselves.
-///
-/// The word in itself is not important.
-// TODO do data oriented programming ? very arrays ?
-#[derive(Debug, Copy, Clone, Eq, PartialEq, PartialOrd, Ord, Hash)]
-pub struct Match {
-    /// The word index in the query sentence.
-    /// Same as the `attribute_index` but for the query words.
-    ///
-    /// Used to retrieve the automaton that match this word.
-    pub query_index: u32,
-
-    /// The distance the word has with the query word
-    /// (i.e. the Levenshtein distance).
-    pub distance: u8,
-
-    /// The attribute in which the word is located
-    /// (i.e. Title is 0, Description is 1).
-    ///
-    /// This is an `u8` therefore a document
-    /// can not have more than `2^8` attributes.
-    pub attribute: u8,
-
-    /// Where does this word is located in the attribute string
-    /// (i.e. at the start or the end of the attribute).
-    ///
-    /// The index in the attribute is limited to a maximum of `2^32`
-    /// this is because we index only the first 1000 words
-    /// in an attribute.
-    pub attribute_index: u32,
-
-    /// Whether the word that match is an exact match or a prefix.
-    pub is_exact: bool,
-}
-
-impl Match {
-    pub fn zero() -> Self {
-        Match {
-            query_index: 0,
-            distance: 0,
-            attribute: 0,
-            attribute_index: 0,
-            is_exact: false,
-        }
-    }
-
-    pub fn max() -> Self {
-        Match {
-            query_index: u32::max_value(),
-            distance: u8::max_value(),
-            attribute: u8::max_value(),
-            attribute_index: u32::max_value(),
-            is_exact: true,
-        }
-    }
-}
--- a/Show More
+++ b/Show More
				`@ -0,0 +1 @@`
				`_datas in movies.csv are from https://www.themoviedb.org/_`