Merge pull request #183 from meilisearch/number-of-documents

Compute the number of documents on updates
test: Improve the tests of the number of documents counting
2025-07-19 13:00:46 +00:00 · 2019-09-14 16:32:18 +02:00 · 2019-09-14 15:29:46 +02:00 · 2019-09-14 15:26:41 +02:00 · 2019-09-14 15:26:39 +02:00 · 2019-09-14 15:24:39 +02:00
102 changed files with 27336 additions and 4904 deletions
--- a/.gitignore
+++ b/.gitignore
@ -1,6 +1,7 @@
-/rocksdb
 /target
 /Cargo.lock
+meilidb/Cargo.lock
+meilidb-core/Cargo.lock
 **/*.rs.bk
 **/*.csv
 **/*.json_lines
--- a/.travis.yml
+++ b/.travis.yml
@ -1,22 +0,0 @@
-language: rust
-
-cache: cargo
-
-branches:
-  only:
-    - master
-
-matrix:
-  fast_finish: true
-  include:
-
-  # Test crates on their minimum Rust versions.
-  - rust: 1.31.0
-    name: "meilidb on 1.31.0"
-    script: ./ci/meilidb.sh
-
-  # Test crates on nightly Rust.
-  - rust: nightly
-    name: "meilidb on nightly"
-    script: ./ci/meilidb.sh
-
--- a/Cargo.toml
+++ b/Cargo.toml
@ -1,55 +1,11 @@
-[package]
-edition = "2018"
-name = "meilidb"
-version = "0.2.0"
-authors = ["Kerollmops <renault.cle@gmail.com>"]
-
-[dependencies]
-bincode = "1.0"
-byteorder = "1.2"
-crossbeam = "0.6"
-fst = "0.3"
-hashbrown = { version = "0.1", features = ["serde"] }
-lazy_static = "1.1"
-levenshtein_automata = { version = "0.1", features = ["fst_automaton"] }
-linked-hash-map = { version = "0.5", features = ["serde_impl"] }
-log = "0.4"
-sdset = "0.3"
-serde = "1.0"
-serde_derive = "1.0"
-unidecode = "0.3"
-
-[dependencies.toml]
-git = "https://github.com/Kerollmops/toml-rs.git"
-features = ["preserve_order"]
-rev = "0372ba6"
-
-[dependencies.rocksdb]
-git = "https://github.com/pingcap/rust-rocksdb.git"
-rev = "306e201"
-
-[dependencies.group-by]
-git = "https://github.com/Kerollmops/group-by.git"
-rev = "5a113fe"
-
-[features]
-default = ["simd"]
-i128 = ["bincode/i128", "byteorder/i128"]
-portable = ["rocksdb/portable"]
-simd = ["rocksdb/sse"]
-nightly = ["hashbrown/nightly", "group-by/nightly"]
-
-[dev-dependencies]
-csv = "1.0"
-elapsed = "0.1"
-env_logger = "0.6"
-jemallocator = "0.1"
-quickcheck = "0.8"
-rand = "0.6"
-rand_xorshift = "0.1"
-structopt = "0.2"
-tempfile = "3.0"
-termcolor = "1.0"
+[workspace]
+members = [
+    "meilidb",
+    "meilidb-core",
+    "meilidb-data",
+    "meilidb-schema",
+    "meilidb-tokenizer",
+]

 [profile.release]
 debug = true
--- a/README.md
+++ b/README.md
@ -1,6 +1,6 @@
 # MeiliDB

-[![Build Status](https://travis-ci.org/Kerollmops/MeiliDB.svg?branch=master)](https://travis-ci.org/Kerollmops/MeiliDB)
+[![Build Status](https://dev.azure.com/thomas0884/thomas/_apis/build/status/meilisearch.MeiliDB?branchName=master)](https://dev.azure.com/thomas0884/thomas/_build/latest?definitionId=1&branchName=master)
 [![dependency status](https://deps.rs/repo/github/Kerollmops/MeiliDB/status.svg)](https://deps.rs/repo/github/Kerollmops/MeiliDB)
 [![License](https://img.shields.io/github/license/Kerollmops/MeiliDB.svg)](https://github.com/Kerollmops/MeiliDB)
 [![Rust 1.31+](https://img.shields.io/badge/rust-1.31+-lightgray.svg)](
@ -8,34 +8,48 @@ https://www.rust-lang.org)

 A _full-text search database_ using a key-value store internally.

-It uses [RocksDB](https://github.com/facebook/rocksdb) as the internal key-value store. The key-value store allows us to handle updates and queries with small memory and CPU overheads.
+## Features

-You can [read the deep dive](deep-dive.md) if you want more information on the engine, it describes the whole process of generating updates and handling queries.
+- Provides [6 default ranking criteria](https://github.com/meilisearch/MeiliDB/blob/3d85cbf0cfa3a3103cf1e151a75a443719cdd5d7/meilidb-core/src/criterion/mod.rs#L95-L101) used to [bucket sort](https://en.wikipedia.org/wiki/Bucket_sort) documents
+- Accepts [custom criteria](https://github.com/meilisearch/MeiliDB/blob/3d85cbf0cfa3a3103cf1e151a75a443719cdd5d7/meilidb-core/src/criterion/mod.rs#L22-L29) and can apply them in any custom order
+- Support [ranged queries](https://github.com/meilisearch/MeiliDB/blob/3d85cbf0cfa3a3103cf1e151a75a443719cdd5d7/meilidb-core/src/query_builder.rs#L146), useful for paginating results
+- Can [distinct](https://github.com/meilisearch/MeiliDB/blob/3d85cbf0cfa3a3103cf1e151a75a443719cdd5d7/meilidb-core/src/query_builder.rs#L68) and [filter](https://github.com/meilisearch/MeiliDB/blob/3d85cbf0cfa3a3103cf1e151a75a443719cdd5d7/meilidb-core/src/query_builder.rs#L57) returned documents based on context defined rules
+- Can store complete documents or only [user schema specified fields](https://github.com/meilisearch/MeiliDB/blob/3d85cbf0cfa3a3103cf1e151a75a443719cdd5d7/examples/movies/schema-movies.toml)
+- The [default tokenizer](https://github.com/meilisearch/MeiliDB/blob/3d85cbf0cfa3a3103cf1e151a75a443719cdd5d7/meilidb-tokenizer/src/lib.rs#L99) can index latin and kanji based languages
+- Returns [the matching text areas](https://github.com/meilisearch/MeiliDB/blob/3d85cbf0cfa3a3103cf1e151a75a443719cdd5d7/meilidb-core/src/lib.rs#L117-L120), useful to highlight matched words in results
+- Accepts query time search config like the [searchable fields](https://github.com/meilisearch/MeiliDB/blob/3d85cbf0cfa3a3103cf1e151a75a443719cdd5d7/meilidb-core/src/query_builder.rs#L79)
+- Supports run time indexing  (incremental indexing)

-We will be proud if you submit issues and pull requests. You can help to grow this project and start contributing by checking [issues tagged "good-first-issue"](https://github.com/Kerollmops/MeiliDB/issues?q=is%3Aissue+is%3Aopen+label%3A%22good+first+issue%22). It is a good start!
+
+
+It uses [RocksDB](https://github.com/facebook/rocksdb) as the internal key-value store. The key-value store allows us to handle updates and queries with small memory and CPU overheads. The whole ranking system is [data oriented](https://github.com/meilisearch/MeiliDB/issues/82) and provides great performances.
+
+You can [read the deep dive](deep-dive.md) if you want more information on the engine, it describes the whole process of generating updates and handling queries or you can take a look at the [typos and ranking rules](typos-ranking-rules.md) if you want to know the default rules used to sort the documents.
+
+We will be proud if you submit issues and pull requests. You can help to grow this project and start contributing by checking [issues tagged "good-first-issue"](https://github.com/meilisearch/MeiliDB/issues?q=is%3Aissue+is%3Aopen+label%3A%22good+first+issue%22). It is a good start!

 The project is only a library yet. It means that there is no binary provided yet. To get started, you can check the examples wich are made to work with the data located in the `misc/` folder.

-MeiliDB will be a binary in a near future so you will be able to use it as a database out-of-the-box. We should be able to query it using a [to-be-defined](https://github.com/Kerollmops/MeiliDB/issues/38) protocol. This is our current goal, [see the milestones](https://github.com/Kerollmops/MeiliDB/milestones). In the end, the binary will be a bunch of network protocols and wrappers around the library - which will also be published on [crates.io](https://crates.io). Both the binary and the library will follow the same update cycle.
+MeiliDB will be a binary in a near future so you will be able to use it as a database out-of-the-box. We should be able to query it using a [to-be-defined](https://github.com/meilisearch/MeiliDB/issues/38) protocol. This is our current goal, [see the milestones](https://github.com/meilisearch/MeiliDB/milestones). In the end, the binary will be a bunch of network protocols and wrappers around the library - which will also be published on [crates.io](https://crates.io). Both the binary and the library will follow the same update cycle.



 ## Performances

-With a database composed of _100 353_ documents with _352_ attributes each and _90_ of them indexed.
-So nearly _9 million_ fields indexed for _35 million_ stored we can handle more than _1.2k req/sec_ on an Intel i7-7700 (8) @ 4.2GHz.
+With a database composed of _100 353_ documents with _352_ attributes each and _3_ of them indexed.
+So more than _300 000_ fields indexed for _35 million_ stored we can handle more than _2.8k req/sec_ with an average response time of _9 ms_ on an Intel i7-7700 (8) @ 4.2GHz.

-Requests are made using [wrk](https://github.com/wg/wrk) and scripted to generate real users queries.
+Requests are made using [wrk](https://github.com/wg/wrk) and scripted to simulate real users queries.

 ```
 Running 10s test @ http://localhost:2230
-  2 threads and 12 connections
+  2 threads and 25 connections
  Thread Stats   Avg      Stdev     Max   +/- Stdev
-    Latency    18.86ms   49.39ms 614.89ms   95.23%
-    Req/Sec   620.41     59.53   790.00     65.00%
-  12359 requests in 10.00s, 3.26MB read
-Requests/sec:   1235.54
-Transfer/sec:    334.22KB
+    Latency     9.52ms    7.61ms  99.25ms   84.58%
+    Req/Sec     1.41k   119.11     1.78k    64.50%
+  28080 requests in 10.01s, 7.42MB read
+Requests/sec:   2806.46
+Transfer/sec:    759.17KB
 ```

 ### Notes
@ -45,16 +59,35 @@ We have seen much better performances when [using jemalloc as the global allocat

 ## Usage and examples

-MeiliDB runs with an index like most search engines.
-So to test the library you can create one by indexing a simple csv file.
+You can try a little part of MeiliDB with the following commands.
+It creates an index named _movies_ and insert two great Tarantino movies in it.

 ```bash
-cargo run --release --example create-database -- test.mdb misc/kaggle.csv --schema schema-example.toml --stop-words misc/fr.stopwords.txt
+cargo run --release
+
+curl -XPOST 'http://127.0.0.1:8000/movies' \
+    -d '
+identifier = "id"
+
+[attributes.id]
+stored = true
+
+[attributes.title]
+stored = true
+indexed = true
+'
+
+curl -H 'Content-Type: application/json' \
+     -XPUT 'http://127.0.0.1:8000/movies' \
+     -d '{ "id": 123, "title": "Inglorious Bastards" }'
+
+curl -H 'Content-Type: application/json' \
+     -XPUT 'http://127.0.0.1:8000/movies' \
+     -d '{ "id": 456, "title": "Django Unchained" }'
 ```

-Once the command is executed, the index should be in the `test.mdb` folder. You are now able to run the `query-database` example and play with MeiliDB.
+Once the database is initialized you can query it by using the following command:

 ```bash
-cargo run --release --example query-database -- test.mdb -n 10 id title
+curl -XGET 'http://127.0.0.1:8000/movies/search?q=inglo'
 ```
-
--- a/azure-pipelines.yml
+++ b/azure-pipelines.yml
@ -0,0 +1,47 @@
+---
+trigger:
+  branches:
+    include: [ master ]
+
+pr: [ master ]
+
+jobs:
+  - job: test
+    pool:
+      vmImage: 'Ubuntu 16.04'
+    container: tpayet/chiquitita:latest
+    steps:
+      - script: |
+          curl https://sh.rustup.rs -sSf | sh -s -- -y --default-toolchain nightly
+        displayName: 'Install rustc'
+      - script: |
+          $HOME/.cargo/bin/cargo check
+        displayName: 'Check MeiliDB'
+      - script: |
+          $HOME/.cargo/bin/cargo test
+        displayName: 'Test MeiliDB'
+
+  - job: build
+    dependsOn:
+      - test
+    condition: succeeded()
+    pool:
+      vmImage: 'Ubuntu 16.04'
+    container: tpayet/chiquitita:latest
+    steps:
+      - script: |
+          curl https://sh.rustup.rs -sSf | sh -s -- -y --default-toolchain nightly
+        displayName: 'Install rustc'
+      - script: |
+          $HOME/.cargo/bin/cargo build --release
+        displayName: 'Build MeiliDB'
+      - task: CopyFiles@2
+        inputs:
+          contents: '$(System.DefaultWorkingDirectory)/target/release/libmeilidb.rlib'
+          targetFolder: $(Build.ArtifactStagingDirectory)
+        displayName: 'Copy build'
+      - task: PublishBuildArtifacts@1
+        inputs:
+          artifactName: libmeilidb.rlib
+        displayName: 'Upload artifacts'
+
--- a/deep-dive.md
+++ b/deep-dive.md
@ -1,28 +1,22 @@
 # A deep dive in MeiliDB

-On the 9 of december 2018.
-
-MeiliDB is a full text search engine based on a final state transducer named [fst](https://github.com/BurntSushi/fst) and a key-value store named [RocksDB](https://github.com/facebook/rocksdb). The goal of a search engine is to store data and to respond to queries as accurate and fast as possible. To achieve this it must save the data as an [inverted index](https://en.wikipedia.org/wiki/Inverted_index).
-
+On the 15 of May 2019.

+MeiliDB is a full text search engine based on a final state transducer named [fst](https://github.com/BurntSushi/fst) and a key-value store named [sled](https://github.com/spacejam/sled). The goal of a search engine is to store data and to respond to queries as accurate and fast as possible. To achieve this it must save the matching words in an [inverted index](https://en.wikipedia.org/wiki/Inverted_index).

 <!-- MarkdownTOC autolink="true" -->

 - [Where is the data stored?](#where-is-the-data-stored)
 - [What does the key-value store contains?](#what-does-the-key-value-store-contains)
-    - [The blob type](#the-blob-type)
+    - [The inverted word index](#the-inverted-word-index)
        - [A final state transducer](#a-final-state-transducer)
        - [Document indexes](#document-indexes)
-        - [Document ids](#document-ids)
    - [The schema](#the-schema)
    - [Document attributes](#document-attributes)
- [How is an update handled?](#how-is-an-update-handled)
-    - [The merge operation is CPU consuming](#the-merge-operation-is-cpu-consuming)
 - [How is a request processed?](#how-is-a-request-processed)
    - [Query lexemes](#query-lexemes)
    - [Automatons and query index](#automatons-and-query-index)
    - [Sort by criteria](#sort-by-criteria)
-    - [Retrieve original documents](#retrieve-original-documents)

 <!-- /MarkdownTOC -->

@ -30,21 +24,17 @@ MeiliDB is a full text search engine based on a final state transducer named [fs

 MeiliDB is entirely backed by a key-value store like any good database (i.e. Postgres, MySQL). This brings a great flexibility in the way documents can be stored and updates handled along time.

-[RocksDB brings some](https://rocksdb.org/blog/2015/02/27/write-batch-with-index.html) of the [A.C.I.D. properties](https://en.wikipedia.org/wiki/ACID_(computer_science)) to help us be sure the saved data is consistent, for example we use SST files and the key-value store ability to load them in one time to manage updates.
-
-Note that the SST file have the same restriction as the fst, it needs its keys to be added in order at creation.
+[sled will brings some](https://github.com/spacejam/sled/tree/434533332a3f485e6d2e467023be0a0b55d3a1af#plans) of the [A.C.I.D. properties](https://en.wikipedia.org/wiki/ACID_(computer_science)) to help us be sure the saved data is consistent.



 ## What does the key-value store contains?

-It contain the blob, the schema and the documents stored attributes.
+It contain the inverted word index, the schema and the documents fields.

-### The blob type
+### The inverted word index

-[The Blob type](https://github.com/Kerollmops/MeiliDB/blob/550dc1e99224e386516877450320f694947332d4/src/database/blob/mod.rs#L16-L19) is a data structure that indicate if an update is a positive or a negative one. In the case where the update is considered positive, the blob will contain [an fst map and the document indexes](https://github.com/Kerollmops/MeiliDB/blob/550dc1e99224e386516877450320f694947332d4/src/database/blob/positive/blob.rs#L15-L18) associated. In the other case it will only contain [all the document ids](https://github.com/Kerollmops/MeiliDB/blob/550dc1e99224e386516877450320f694947332d4/src/database/blob/negative/blob.rs#L12-L14) that must be considered removed.
-
-The Blob type [is stored under the "*data-index*" entry](https://github.com/Kerollmops/MeiliDB/blob/550dc1e99224e386516877450320f694947332d4/src/database/update/positive/update.rs#L497-L499) and marked as [a merge operation](https://github.com/facebook/rocksdb/wiki/Merge-Operator-Implementation) in the key-value store.
+[The inverted word index](https://github.com/meilisearch/MeiliDB/blob/3db823de002243004612e36a19b4578d800dab97/meilidb-data/src/database/words_index.rs) is a sled Tree dedicated to store and give access to all documents that contains a specific word. The information stored under the word is simply a big ordered array of where in the document the word has been found. In other word, a big list of [`DocIndex`](https://github.com/meilisearch/MeiliDB/blob/3db823de002243004612e36a19b4578d800dab97/meilidb-core/src/lib.rs#L35-L51).

 #### A final state transducer

@ -52,89 +42,54 @@ _...also abbreviated fst_

 This is the first entry point of the engine, you can read more about how it work with the beautiful blog post of @BurntSushi, [Index 1,600,000,000 Keys with Automata and Rust](https://blog.burntsushi.net/transducers/).

-To make it short it is a powerful way to store all the words that are present in the indexed documents. You construct it by giving it all the words you want to index associated with a value that, for the moment, can only be an `u64`. When you want to search in it you can provide any automaton you want, in MeiliDB [a custom levenshtein automaton](https://github.com/tantivy-search/levenshtein-automata/) is used.
-
-Note that the number under each word is auto-incremental, each new word have a new number that is greater than the prevous one.
-
-Another powerful feature of `fst` is that it can nearly avoid using RAM and be streamed to disk for example, the problem is that the keys must be always added in lexicographic order, so you must sort them before, for the moment MeiliDB uses a [BTreeMap](https://github.com/Kerollmops/raptor-rs/blob/8abdb0a228e2808fe1814a6a0641a4b72d158579/src/metadata/doc_indexes.rs#L107-L112).
+To make it short it is a powerful way to store all the words that are present in the indexed documents. You construct it by giving it all the words you want to index. When you want to search in it you can provide any automaton you want, in MeiliDB [a custom levenshtein automaton](https://github.com/tantivy-search/levenshtein-automata/) is used.

 #### Document indexes

-As it has been specified, the `fst` can only store a number corresponding to a word, an `u64`, but the goal of the search engine is to retrieve a match in a document when a query is made. You want it to return some sort of position in an attribute in a document, an information about where the given word match.
+The `fst` will only return the words that match with the search automaton but the goal of the search engine is to retrieve all matches in all the documents when a query is made. You want it to return some sort of position in an attribute in a document, an information about where the given word matched.

-To make it possible, a custom data structure has been developed, the document indexes is composed of two arrays, the ranges array and all the docindexes corresponding to a given range, each range identify the word number. The [DocIndexes](https://github.com/Kerollmops/MeiliDB/blob/550dc1e99224e386516877450320f694947332d4/src/data/doc_indexes.rs#L23) type is designed to be streamed when constructed, consumming a minimum amount of ram like the fst. Another advantage is that the slices are accessible in `O(1)` when you know the word associated number.
-
-#### Document ids
-
-This is a simple ordered list of all documents ids which must be considered deleted. It is used with [the sdset library](https://docs.rs/sdset/0.3.0/sdset/duo/struct.DifferenceByKey.html), the docindexes and the `DifferenceByKey` operation builder when merging blobs.
-
-When a blob represent a negative update it only contains this simple slice of deleted documents ids.
+To make it possible we retrieve all of the `DocIndex` corresponding to all the matching words in the fst, we use the [`WordsIndex`](https://github.com/meilisearch/MeiliDB/blob/3db823de002243004612e36a19b4578d800dab97/meilidb-data/src/database/words_index.rs#L11-L21) Tree to get the `DocIndexes` corresponding the words.

 ### The schema

-The schema is a data struture that represents which documents attributes should be stored and which should be indexed. It is stored under the "_data-schema_" entry and given to MeiliDB only at the creation.
+The schema is a data structure that represents which documents attributes should be stored and which should be indexed. It is stored under a the [`MainIndex`](https://github.com/meilisearch/MeiliDB/blob/3db823de002243004612e36a19b4578d800dab97/meilidb-data/src/database/main_index.rs#L12) Tree and given to MeiliDB only at the creation of an index.

-Each document attribute is associated to a unique 32 bit number named `SchemaAttr`.
+Each document attribute is associated to a unique 16 bit number named [`SchemaAttr`](https://github.com/meilisearch/MeiliDB/blob/3db823de002243004612e36a19b4578d800dab97/meilidb-data/src/schema.rs#L186).

-In the future this schema type could be given along with updates and probably be different from the original, the database could be able to handled this document structure and reindex it.
+In the future, this schema type could be given along with updates, the database could be able to handled a new schema and reindex the database according to the new one.

 ### Document attributes

-When the engine handle a query the result that the requester want is a document, not only the [match](https://github.com/Kerollmops/MeiliDB/blob/fc2cdf92596fc002ce278e3aa8718640ac44724d/src/lib.rs#L51-L79) associated to it, fields of the original document must be returned too.
+When the engine handle a query the result that the requester want is a document, not only the [`Matches`](https://github.com/meilisearch/MeiliDB/blob/3db823de002243004612e36a19b4578d800dab97/meilidb-core/src/lib.rs#L62-L88) associated to it, fields of the original document must be returned too.

-So MeiliDB again uses the power of the underlying key-value store and save the documents attributes marked as _STORE_. The key is prefixed by "_doc_" followed by the 64 bit document id in bytes and the schema attribute number in bytes corresponding to the document attribute stored.
+So MeiliDB again uses the power of the underlying key-value store and save the documents attributes marked as _STORE_ in the schema. The dedicated Tree for this information is the [`DocumentsIndex`](https://github.com/meilisearch/MeiliDB/blob/3db823de002243004612e36a19b4578d800dab97/meilidb-data/src/database/documents_index.rs#L11).

-When a document field is saved in the key-value store its value is binary encoded using the [bincode](https://docs.rs/bincode/) library, so a document must be serializable using serde.
-
-
-
-## How is an update handled?
-
-First of all an update in MeiliDB is nothing more than [a RocksDB SST file](https://github.com/facebook/rocksdb/wiki/Creating-and-Ingesting-SST-files). It contains the blob and all the documents attributes binary encoded like described above. Note that the blob is stored under the "_data-index_" key marked as [a merge operation](https://github.com/facebook/rocksdb/wiki/Merge-Operator-Implementation).
-
-### The merge operation is CPU consuming
-
-When [the database ingest an update](https://github.com/Kerollmops/MeiliDB/blob/550dc1e99224e386516877450320f694947332d4/src/database/mod.rs#L108-L145) it gives the SST file to the underlying RocksDB, once it has ingested it there is a "_data-index_" entry available, we can request it but the key-value store will call a function before, a merge operation is performed.
-
-This merge operation is done on multiple blobs as you have understood and will compute a [PositiveBlob](https://github.com/Kerollmops/MeiliDB/blob/550dc1e99224e386516877450320f694947332d4/src/database/blob/positive/blob.rs#L15), this type contains the fst and document indexes structures allowing us to search for documents. This two data structures can be considered as the inverted index.
-
-The computation time of this merge is important, RocksDB doesn't keep the previous merged result, it will call our merge operation each time until it decided to do a compaction. So [we must force this compaction earlier](https://github.com/Kerollmops/MeiliDB/blob/550dc1e99224e386516877450320f694947332d4/src/database/mod.rs#L129-L131) when we receive an update to reduce this cost.
-
-This way when we request the "_data-index_" value it will gives us the previously merged positive blob without any other merge overhead.
+When a document field is saved in the key-value store its value is binary encoded using [message pack](https://github.com/3Hren/msgpack-rust), so a document must be serializable using serde.



 ## How is a request processed?

-Now that we have our "_data-index_" we are able to return results based on a query. In the MeiliDB universe a query is a string.
+Now that we have our inverted index we are able to return results based on a query. In the MeiliDB universe a query is a simple string containing words.

 ### Query lexemes

-The first step to be able to call the underlying structures is to split the query in words, for that we use a [custom tokenizer](https://github.com/Kerollmops/MeiliDB/blob/fc2cdf92596fc002ce278e3aa8718640ac44724d/src/tokenizer/mod.rs) that is not finished for the moment, [there is an open issue](https://github.com/Kerollmops/MeiliDB/issues/3). Note that a tokenizer is specialized for a human language, this is the hard part.
+The first step to be able to call the underlying structures is to split the query in words, for that we use a [custom tokenizer](https://github.com/meilisearch/MeiliDB/blob/3db823de002243004612e36a19b4578d800dab97/meilidb-tokenizer/src/lib.rs#L82-L84). Note that a tokenizer is specialized for a human language, this is the hard part.

 ### Automatons and query index

-So to query the fst we need an automaton, in MeiliDB we use a [levenshtein automaton](https://en.wikipedia.org/wiki/Levenshtein_automaton), this automaton is constructed using a string and a maximum distance. According to the [Algolia's blog post](https://blog.algolia.com/inside-the-algolia-engine-part-3-query-processing/#algolia%e2%80%99s-way-of-searching-for-alternatives) we [created the DFAs](https://github.com/Kerollmops/MeiliDB/blob/fc2cdf92596fc002ce278e3aa8718640ac44724d/src/automaton.rs#L62-L75) with different settings.
+So to query the fst we need an automaton, in MeiliDB we use a [levenshtein automaton](https://en.wikipedia.org/wiki/Levenshtein_automaton), this automaton is constructed using a string and a maximum distance. According to the [Algolia's blog post](https://blog.algolia.com/inside-the-algolia-engine-part-3-query-processing/#algolia%e2%80%99s-way-of-searching-for-alternatives) we [created the DFAs](https://github.com/meilisearch/MeiliDB/blob/3db823de002243004612e36a19b4578d800dab97/meilidb-core/src/automaton.rs#L59-L78) with different settings.

-Thanks to the power of the fst library [it is possible to union multiple automatons](https://docs.rs/fst/0.3.2/fst/map/struct.OpBuilder.html#method.union) on the same fst map, it will allow us to know which [automaton returns a word according to its index](https://github.com/Kerollmops/MeiliDB/blob/fc2cdf92596fc002ce278e3aa8718640ac44724d/src/metadata/ops.rs#L111). The `Stream` is able to return all the numbers associated to the words. We use these numbers to find the whole list of `DocIndexes` associated and do the union set operation.
+Thanks to the power of the fst library [it is possible to union multiple automatons](https://docs.rs/fst/0.3.2/fst/map/struct.OpBuilder.html#method.union) on the same fst set. The `Stream` is able to return all the matching words. We use these words to find the whole list of `DocIndexes` associated.

-With all these informations it is possible [to reconstruct a list of all the DocIndexes associated](https://github.com/Kerollmops/MeiliDB/blob/550dc1e99224e386516877450320f694947332d4/src/rank/query_builder.rs#L62-L99) with the words queried.
+With all these informations it is possible [to reconstruct a list of all the `DocIndexes` associated](https://github.com/meilisearch/MeiliDB/blob/3db823de002243004612e36a19b4578d800dab97/meilidb-core/src/query_builder.rs#L103-L130) with the words queried.

 ### Sort by criteria

 Now that we are able to get a big list of [DocIndexes](https://github.com/Kerollmops/MeiliDB/blob/550dc1e99224e386516877450320f694947332d4/src/lib.rs#L21-L36) it is not enough to sort them by criteria, we need more informations like the levenshtein distance or the fact that a query word match exactly the word stored in the fst. So [we stuff it a little bit](https://github.com/Kerollmops/MeiliDB/blob/550dc1e99224e386516877450320f694947332d4/src/rank/query_builder.rs#L86-L93), and aggregate all these [Matches](https://github.com/Kerollmops/MeiliDB/blob/550dc1e99224e386516877450320f694947332d4/src/lib.rs#L47-L74) for each document. This way it will be easy to sort a simple vector of document using a bunch of functions.

-With this big list of documents and associated matches [we are able to sort only the part of the slice that we want](https://github.com/Kerollmops/MeiliDB/blob/550dc1e99224e386516877450320f694947332d4/src/rank/query_builder.rs#L108-L119) using bucket sorting. [Each criterion](https://github.com/Kerollmops/MeiliDB/blob/550dc1e99224e386516877450320f694947332d4/src/rank/criterion/mod.rs#L75-L87) is evaluated on each subslice without copy, thanks to [GroupByMut](https://github.com/Kerollmops/group-by/blob/cab857bae01463dbd0edb99b0e0d7f3624e6c6f5/src/lib.rs#L180-L185) which, I hope [will soon be merged](https://github.com/rust-lang/rfcs/pull/2477).
-
-Note that it is possible to customize the criteria used by using the `QueryBuilder::with_criteria` constructor, this way you can implement some custom ranking based on the document attributes using the appropriate structure and the `retrieve_document` method.
-
-### Retrieve original documents
-
-The [DatabaseView](https://github.com/Kerollmops/MeiliDB/blob/550dc1e99224e386516877450320f694947332d4/src/database/database_view.rs#L18-L24) structure that you must have created to be able to query the database have [two functions](https://github.com/Kerollmops/MeiliDB/blob/550dc1e99224e386516877450320f694947332d4/src/database/database_view.rs#L60-L76) that allows you to retrieve a full (or not) document according to the schema you specified at creation time (i.e. the _STORED_ attributes).
-
-As you can see, these functions force the created type `T` to implement [the serde Deserialize trait](https://docs.rs/serde/1.0.81/serde/trait.Deserialize.html), MeiliDB will use the `bincode::deserialise` function for each attribute to construct your type and return it to you.
-
+With this big list of documents and associated matches [we are able to sort only the part of the slice that we want](https://github.com/meilisearch/MeiliDB/blob/3db823de002243004612e36a19b4578d800dab97/meilidb-core/src/query_builder.rs#L160-L188) using bucket sorting. [Each criterion](https://github.com/meilisearch/MeiliDB/blob/3db823de002243004612e36a19b4578d800dab97/meilidb-core/src/criterion/mod.rs#L95-L101) is evaluated on each subslice without copy, thanks to [GroupByMut](https://docs.rs/slice-group-by/0.2.4/slice_group_by/) which, I hope [will soon be merged](https://github.com/rust-lang/rfcs/pull/2477).

+Note that it is possible to customize the criteria used by using the `QueryBuilder::with_criteria` constructor, this way you can implement some custom ranking based on the document attributes using the appropriate structure and the [`document` method](https://github.com/meilisearch/MeiliDB/blob/3db823de002243004612e36a19b4578d800dab97/meilidb-data/src/database/index.rs#L86).

 At this point, MeiliDB work is over 🎉
-
--- a/examples/create-database.rs
+++ b/examples/create-database.rs
@ -1,138 +0,0 @@
-#[global_allocator]
-static ALLOC: jemallocator::Jemalloc = jemallocator::Jemalloc;
-
-use std::io::{self, BufRead, BufReader};
-use std::path::{Path, PathBuf};
-use std::error::Error;
-use std::borrow::Cow;
-use std::fs::File;
-
-use hashbrown::{HashMap, HashSet};
-use serde_derive::{Serialize, Deserialize};
-use structopt::StructOpt;
-
-use meilidb::database::{Database, Schema, UpdateBuilder};
-use meilidb::tokenizer::DefaultBuilder;
-
-#[derive(Debug, StructOpt)]
-pub struct Opt {
-    /// The destination where the database must be created.
-    #[structopt(parse(from_os_str))]
-    pub database_path: PathBuf,
-
-    /// The csv file to index.
-    #[structopt(parse(from_os_str))]
-    pub csv_data_path: PathBuf,
-
-    /// The path to the schema.
-    #[structopt(long = "schema", parse(from_os_str))]
-    pub schema_path: PathBuf,
-
-    /// The path to the list of stop words (one by line).
-    #[structopt(long = "stop-words", parse(from_os_str))]
-    pub stop_words_path: Option<PathBuf>,
-
-    #[structopt(long = "update-group-size")]
-    pub update_group_size: Option<usize>,
-}
-
-#[derive(Serialize, Deserialize)]
-struct Document<'a> (
-    #[serde(borrow)]
-    HashMap<Cow<'a, str>, Cow<'a, str>>
-);
-
-fn index(
-    schema: Schema,
-    database_path: &Path,
-    csv_data_path: &Path,
-    update_group_size: Option<usize>,
-    stop_words: &HashSet<String>,
-) -> Result<Database, Box<Error>>
-{
-    let database = Database::create(database_path, &schema)?;
-
-    let mut rdr = csv::Reader::from_path(csv_data_path)?;
-    let mut raw_record = csv::StringRecord::new();
-    let headers = rdr.headers()?.clone();
-
-    let mut i = 0;
-    let mut end_of_file = false;
-
-    while !end_of_file {
-        let tokenizer_builder = DefaultBuilder::new();
-        let update_path = tempfile::NamedTempFile::new()?;
-        let mut update = UpdateBuilder::new(update_path.path().to_path_buf(), schema.clone());
-
-        loop {
-            end_of_file = !rdr.read_record(&mut raw_record)?;
-            if end_of_file { break }
-
-            let document: Document = match raw_record.deserialize(Some(&headers)) {
-                Ok(document) => document,
-                Err(e) => {
-                    eprintln!("{:?}", e);
-                    continue;
-                }
-            };
-
-            update.update_document(&document, &tokenizer_builder, &stop_words)?;
-
-            print!("\rindexing document {}", i);
-            i += 1;
-
-            if let Some(group_size) = update_group_size {
-                if i % group_size == 0 { break }
-            }
-        }
-
-        println!();
-
-        println!("building update...");
-        let update = update.build()?;
-        println!("ingesting update...");
-        database.ingest_update_file(update)?;
-    }
-
-    Ok(database)
-}
-
-fn retrieve_stop_words(path: &Path) -> io::Result<HashSet<String>> {
-    let f = File::open(path)?;
-    let reader = BufReader::new(f);
-    let mut words = HashSet::new();
-
-    for line in reader.lines() {
-        let line = line?;
-        let word = line.trim().to_string();
-        words.insert(word);
-    }
-
-    Ok(words)
-}
-
-fn main() -> Result<(), Box<Error>> {
-    let _ = env_logger::init();
-    let opt = Opt::from_args();
-
-    let schema = {
-        let file = File::open(&opt.schema_path)?;
-        Schema::from_toml(file)?
-    };
-
-    let stop_words = match opt.stop_words_path {
-        Some(ref path) => retrieve_stop_words(path)?,
-        None           => HashSet::new(),
-    };
-
-    let (elapsed, result) = elapsed::measure_time(|| {
-        index(schema, &opt.database_path, &opt.csv_data_path, opt.update_group_size, &stop_words)
-    });
-
-    if let Err(e) = result {
-        return Err(e.into())
-    }
-
-    println!("database created in {} at: {:?}", elapsed, opt.database_path);
-    Ok(())
-}
--- a/examples/kaggle/kaggle.csv
+++ b/examples/kaggle/kaggle.csv
--- a/examples/kaggle/schema-kaggle.toml
+++ b/examples/kaggle/schema-kaggle.toml
@ -5,15 +5,15 @@
 identifier = "id"

 [attributes.id]
-stored = true
+displayed = true

 [attributes.title]
-stored = true
+displayed = true
 indexed = true

 [attributes.description]
-stored = true
+displayed = true
 indexed = true

 [attributes.image]
-stored = true
+displayed = true
--- a/examples/movies/README.md
+++ b/examples/movies/README.md
@ -0,0 +1 @@
+_datas in movies.csv are from https://www.themoviedb.org/_
--- a/examples/movies/movies.csv
+++ b/examples/movies/movies.csv
--- a/examples/movies/schema-movies.toml
+++ b/examples/movies/schema-movies.toml
@ -0,0 +1,21 @@
+# This schema has been generated ...
+# The order in which the attributes are declared is important,
+# it specify the attribute xxx...
+identifier = "id"
+
+[attributes.id]
+displayed = true
+
+[attributes.title]
+displayed = true
+indexed = true
+
+[attributes.overview]
+displayed = true
+indexed = true
+
+[attributes.release_date]
+displayed = true
+
+[attributes.poster]
+displayed = true
--- a/examples/query-database.rs
+++ b/examples/query-database.rs
@ -1,170 +0,0 @@
-#[global_allocator]
-static ALLOC: jemallocator::Jemalloc = jemallocator::Jemalloc;
-
-use std::collections::btree_map::{BTreeMap, Entry};
-use std::iter::FromIterator;
-use std::io::{self, Write};
-use std::path::PathBuf;
-use std::error::Error;
-
-use hashbrown::{HashMap, HashSet};
-use termcolor::{Color, ColorChoice, ColorSpec, StandardStream, WriteColor};
-use structopt::StructOpt;
-
-use meilidb::database::schema::SchemaAttr;
-use meilidb::database::Database;
-use meilidb::Match;
-
-#[derive(Debug, StructOpt)]
-pub struct Opt {
-    /// The destination where the database must be created
-    #[structopt(parse(from_os_str))]
-    pub database_path: PathBuf,
-
-    /// Fields that must be displayed.
-    pub displayed_fields: Vec<String>,
-
-    /// The number of returned results
-    #[structopt(short = "n", long = "number-results", default_value = "10")]
-    pub number_results: usize,
-}
-
-type Document = HashMap<String, String>;
-
-fn display_highlights(text: &str, ranges: &[usize]) -> io::Result<()> {
-    let mut stdout = StandardStream::stdout(ColorChoice::Always);
-    let mut highlighted = false;
-
-    for range in ranges.windows(2) {
-        let [start, end] = match range { [start, end] => [*start, *end], _ => unreachable!() };
-        if highlighted {
-            stdout.set_color(ColorSpec::new().set_fg(Some(Color::Yellow)))?;
-        }
-        write!(&mut stdout, "{}", &text[start..end])?;
-        stdout.reset()?;
-        highlighted = !highlighted;
-    }
-
-    Ok(())
-}
-
-fn char_to_byte_range(index: usize, length: usize, text: &str) -> (usize, usize) {
-    let mut byte_index = 0;
-    let mut byte_length = 0;
-
-    for (n, (i, c)) in text.char_indices().enumerate() {
-        if n == index {
-            byte_index = i;
-        }
-
-        if n + 1 == index + length {
-            byte_length = i - byte_index + c.len_utf8();
-            break;
-        }
-    }
-
-    (byte_index, byte_length)
-}
-
-fn create_highlight_areas(text: &str, matches: &[Match], attribute: SchemaAttr) -> Vec<usize> {
-    let mut byte_indexes = BTreeMap::new();
-
-    for match_ in matches {
-        let match_attribute = match_.attribute.attribute();
-        if SchemaAttr::new(match_attribute) == attribute {
-            let word_area = match_.word_area;
-
-            let char_index = word_area.char_index() as usize;
-            let char_length = word_area.length() as usize;
-            let (byte_index, byte_length) = char_to_byte_range(char_index, char_length, text);
-
-            match byte_indexes.entry(byte_index) {
-                Entry::Vacant(entry) => { entry.insert(byte_length); },
-                Entry::Occupied(mut entry) => {
-                    if *entry.get() < byte_length {
-                        entry.insert(byte_length);
-                    }
-                },
-            }
-        }
-    }
-
-    let mut title_areas = Vec::new();
-    title_areas.push(0);
-    for (byte_index, length) in byte_indexes {
-        title_areas.push(byte_index);
-        title_areas.push(byte_index + length);
-    }
-    title_areas.push(text.len());
-    title_areas.sort_unstable();
-    title_areas
-}
-
-fn main() -> Result<(), Box<Error>> {
-    let _ = env_logger::init();
-    let opt = Opt::from_args();
-
-    let (elapsed, result) = elapsed::measure_time(|| Database::open(&opt.database_path));
-    let database = result?;
-    println!("database prepared for you in {}", elapsed);
-
-    let mut buffer = String::new();
-    let input = io::stdin();
-
-    loop {
-        print!("Searching for: ");
-        io::stdout().flush()?;
-
-        if input.read_line(&mut buffer)? == 0 { break }
-        let query = buffer.trim_end_matches('\n');
-
-        let view = database.view();
-        let schema = view.schema();
-
-        let (elapsed, documents) = elapsed::measure_time(|| {
-            let builder = view.query_builder().unwrap();
-            builder.query(query, 0..opt.number_results)
-        });
-
-        let number_of_documents = documents.len();
-        for doc in documents {
-            match view.document_by_id::<Document>(doc.id) {
-                Ok(document) => {
-                    for name in &opt.displayed_fields {
-                        let attr = match schema.attribute(name) {
-                            Some(attr) => attr,
-                            None => continue,
-                        };
-                        let text = match document.get(name) {
-                            Some(text) => text,
-                            None => continue,
-                        };
-
-                        print!("{}: ", name);
-                        let areas = create_highlight_areas(&text, &doc.matches, attr);
-                        display_highlights(&text, &areas)?;
-                        println!();
-                    }
-                },
-                Err(e) => eprintln!("{}", e),
-            }
-
-            let mut matching_attributes = HashSet::new();
-            for _match in doc.matches {
-                let attr = SchemaAttr::new(_match.attribute.attribute());
-                let name = schema.attribute_name(attr);
-                matching_attributes.insert(name);
-            }
-
-            let matching_attributes = Vec::from_iter(matching_attributes);
-            println!("matching in: {:?}", matching_attributes);
-
-            println!();
-        }
-
-        eprintln!("===== Found {} results in {} =====", number_of_documents, elapsed);
-        buffer.clear();
-    }
-
-    Ok(())
-}
--- a/meilidb-core/Cargo.toml
+++ b/meilidb-core/Cargo.toml
@ -0,0 +1,34 @@
+[package]
+name = "meilidb-core"
+version = "0.1.0"
+authors = ["Kerollmops <renault.cle@gmail.com>"]
+edition = "2018"
+
+[dependencies]
+byteorder = "1.3.1"
+deunicode = "1.0.0"
+hashbrown = "0.2.2"
+lazy_static = "1.2.0"
+log = "0.4.6"
+meilidb-tokenizer = { path = "../meilidb-tokenizer", version = "0.1.0" }
+rayon = "1.2.0"
+sdset = "0.3.2"
+serde = { version = "1.0.88", features = ["derive"] }
+slice-group-by = "0.2.6"
+zerocopy = "0.2.2"
+
+[dependencies.fst]
+git = "https://github.com/Kerollmops/fst.git"
+branch = "arc-byte-slice"
+
+[dependencies.levenshtein_automata]
+git = "https://github.com/Kerollmops/levenshtein-automata.git"
+branch = "arc-byte-slice"
+features = ["fst_automaton"]
+
+[dev-dependencies]
+assert_matches = "1.3"
+
+[features]
+i128 = ["byteorder/i128"]
+nightly = ["hashbrown/nightly", "slice-group-by/nightly"]
--- a/meilidb-core/src/automaton.rs
+++ b/meilidb-core/src/automaton.rs
@ -0,0 +1,44 @@
+use lazy_static::lazy_static;
+use levenshtein_automata::{
+    LevenshteinAutomatonBuilder as LevBuilder,
+    DFA,
+};
+
+lazy_static! {
+    static ref LEVDIST0: LevBuilder = LevBuilder::new(0, false);
+    static ref LEVDIST1: LevBuilder = LevBuilder::new(1, false);
+    static ref LEVDIST2: LevBuilder = LevBuilder::new(2, false);
+}
+
+#[derive(Copy, Clone)]
+enum PrefixSetting {
+    Prefix,
+    NoPrefix,
+}
+
+fn build_dfa_with_setting(query: &str, setting: PrefixSetting) -> DFA {
+    use self::PrefixSetting::{Prefix, NoPrefix};
+
+    match query.len() {
+        0 ..= 4 => match setting {
+            Prefix   => LEVDIST0.build_prefix_dfa(query),
+            NoPrefix => LEVDIST0.build_dfa(query),
+        },
+        5 ..= 8 => match setting {
+            Prefix   => LEVDIST1.build_prefix_dfa(query),
+            NoPrefix => LEVDIST1.build_dfa(query),
+        },
+        _ => match setting {
+            Prefix   => LEVDIST2.build_prefix_dfa(query),
+            NoPrefix => LEVDIST2.build_dfa(query),
+        },
+    }
+}
+
+pub fn build_prefix_dfa(query: &str) -> DFA {
+    build_dfa_with_setting(query, PrefixSetting::Prefix)
+}
+
+pub fn build_dfa(query: &str) -> DFA {
+    build_dfa_with_setting(query, PrefixSetting::NoPrefix)
+}
--- a/meilidb-core/src/criterion/document_id.rs
+++ b/meilidb-core/src/criterion/document_id.rs
@ -0,0 +1,16 @@
+use std::cmp::Ordering;
+use crate::criterion::Criterion;
+use crate::RawDocument;
+
+#[derive(Debug, Clone, Copy)]
+pub struct DocumentId;
+
+impl Criterion for DocumentId {
+    fn evaluate(&self, lhs: &RawDocument, rhs: &RawDocument) -> Ordering {
+        lhs.id.cmp(&rhs.id)
+    }
+
+    fn name(&self) -> &'static str {
+        "DocumentId"
+    }
+}
--- a/meilidb-core/src/criterion/exact.rs
+++ b/meilidb-core/src/criterion/exact.rs
@ -0,0 +1,65 @@
+use std::cmp::Ordering;
+use slice_group_by::GroupBy;
+use crate::criterion::Criterion;
+use crate::RawDocument;
+
+#[inline]
+fn number_exact_matches(query_index: &[u32], is_exact: &[bool]) -> usize {
+    let mut count = 0;
+    let mut index = 0;
+
+    for group in query_index.linear_group() {
+        let len = group.len();
+        count += is_exact[index..index + len].contains(&true) as usize;
+        index += len;
+    }
+
+    count
+}
+
+#[derive(Debug, Clone, Copy)]
+pub struct Exact;
+
+impl Criterion for Exact {
+    fn evaluate(&self, lhs: &RawDocument, rhs: &RawDocument) -> Ordering {
+        let lhs = {
+            let query_index = lhs.query_index();
+            let is_exact = lhs.is_exact();
+            number_exact_matches(query_index, is_exact)
+        };
+
+        let rhs = {
+            let query_index = rhs.query_index();
+            let is_exact = rhs.is_exact();
+            number_exact_matches(query_index, is_exact)
+        };
+
+        lhs.cmp(&rhs).reverse()
+    }
+
+    fn name(&self) -> &'static str {
+        "Exact"
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    // typing: "soulier"
+    //
+    // doc0: "Soulier bleu"
+    // doc1: "souliereres rouge"
+    #[test]
+    fn easy_case() {
+        let query_index0 = &[0];
+        let is_exact0 = &[true];
+
+        let query_index1 = &[0];
+        let is_exact1 = &[false];
+
+        let doc0 = number_exact_matches(query_index0, is_exact0);
+        let doc1 = number_exact_matches(query_index1, is_exact1);
+        assert_eq!(doc0.cmp(&doc1).reverse(), Ordering::Less);
+    }
+}
--- a/meilidb-core/src/criterion/mod.rs
+++ b/meilidb-core/src/criterion/mod.rs
@ -0,0 +1,120 @@
+mod sum_of_typos;
+mod number_of_words;
+mod words_proximity;
+mod sum_of_words_attribute;
+mod sum_of_words_position;
+mod exact;
+mod document_id;
+
+use std::cmp::Ordering;
+use crate::RawDocument;
+
+pub use self::{
+    sum_of_typos::SumOfTypos,
+    number_of_words::NumberOfWords,
+    words_proximity::WordsProximity,
+    sum_of_words_attribute::SumOfWordsAttribute,
+    sum_of_words_position::SumOfWordsPosition,
+    exact::Exact,
+    document_id::DocumentId,
+};
+
+pub trait Criterion: Send + Sync {
+    fn evaluate(&self, lhs: &RawDocument, rhs: &RawDocument) -> Ordering;
+
+    fn name(&self) -> &'static str;
+
+    #[inline]
+    fn eq(&self, lhs: &RawDocument, rhs: &RawDocument) -> bool {
+        self.evaluate(lhs, rhs) == Ordering::Equal
+    }
+}
+
+impl<'a, T: Criterion + ?Sized + Send + Sync> Criterion for &'a T {
+    fn evaluate(&self, lhs: &RawDocument, rhs: &RawDocument) -> Ordering {
+        (**self).evaluate(lhs, rhs)
+    }
+
+    fn name(&self) -> &'static str {
+        (**self).name()
+    }
+
+    fn eq(&self, lhs: &RawDocument, rhs: &RawDocument) -> bool {
+        (**self).eq(lhs, rhs)
+    }
+}
+
+impl<T: Criterion + ?Sized> Criterion for Box<T> {
+    fn evaluate(&self, lhs: &RawDocument, rhs: &RawDocument) -> Ordering {
+        (**self).evaluate(lhs, rhs)
+    }
+
+    fn name(&self) -> &'static str {
+        (**self).name()
+    }
+
+    fn eq(&self, lhs: &RawDocument, rhs: &RawDocument) -> bool {
+        (**self).eq(lhs, rhs)
+    }
+}
+
+#[derive(Default)]
+pub struct CriteriaBuilder<'a> {
+    inner: Vec<Box<dyn Criterion + 'a>>
+}
+
+impl<'a> CriteriaBuilder<'a>
+{
+    pub fn new() -> CriteriaBuilder<'a> {
+        CriteriaBuilder { inner: Vec::new() }
+    }
+
+    pub fn with_capacity(capacity: usize) -> CriteriaBuilder<'a> {
+        CriteriaBuilder { inner: Vec::with_capacity(capacity) }
+    }
+
+    pub fn reserve(&mut self, additional: usize) {
+        self.inner.reserve(additional)
+    }
+
+    pub fn add<C: 'a>(mut self, criterion: C) -> CriteriaBuilder<'a>
+    where C: Criterion,
+    {
+        self.push(criterion);
+        self
+    }
+
+    pub fn push<C: 'a>(&mut self, criterion: C)
+    where C: Criterion,
+    {
+        self.inner.push(Box::new(criterion));
+    }
+
+    pub fn build(self) -> Criteria<'a> {
+        Criteria { inner: self.inner }
+    }
+}
+
+pub struct Criteria<'a> {
+    inner: Vec<Box<dyn Criterion + 'a>>,
+}
+
+impl<'a> Default for Criteria<'a> {
+    fn default() -> Self {
+        CriteriaBuilder::with_capacity(7)
+            .add(SumOfTypos)
+            .add(NumberOfWords)
+            .add(WordsProximity)
+            .add(SumOfWordsAttribute)
+            .add(SumOfWordsPosition)
+            .add(Exact)
+            .add(DocumentId)
+            .build()
+    }
+}
+
+impl<'a> AsRef<[Box<dyn Criterion + 'a>]> for Criteria<'a> {
+    fn as_ref(&self) -> &[Box<dyn Criterion + 'a>] {
+        &self.inner
+    }
+}
--- a/meilidb-core/src/criterion/number_of_words.rs
+++ b/meilidb-core/src/criterion/number_of_words.rs
@ -0,0 +1,31 @@
+use std::cmp::Ordering;
+use slice_group_by::GroupBy;
+use crate::criterion::Criterion;
+use crate::RawDocument;
+
+#[inline]
+fn number_of_query_words(query_index: &[u32]) -> usize {
+    query_index.linear_group().count()
+}
+
+#[derive(Debug, Clone, Copy)]
+pub struct NumberOfWords;
+
+impl Criterion for NumberOfWords {
+    fn evaluate(&self, lhs: &RawDocument, rhs: &RawDocument) -> Ordering {
+        let lhs = {
+            let query_index = lhs.query_index();
+            number_of_query_words(query_index)
+        };
+        let rhs = {
+            let query_index = rhs.query_index();
+            number_of_query_words(query_index)
+        };
+
+        lhs.cmp(&rhs).reverse()
+    }
+
+    fn name(&self) -> &'static str {
+        "NumberOfWords"
+    }
+}
--- a/meilidb-core/src/criterion/sum_of_typos.rs
+++ b/meilidb-core/src/criterion/sum_of_typos.rs
@ -0,0 +1,116 @@
+use std::cmp::Ordering;
+
+use slice_group_by::GroupBy;
+
+use crate::criterion::Criterion;
+use crate::RawDocument;
+
+// This function is a wrong logarithmic 10 function.
+// It is safe to panic on input number higher than 3,
+// the number of typos is never bigger than that.
+#[inline]
+fn custom_log10(n: u8) -> f32 {
+    match n {
+        0 => 0.0,       // log(1)
+        1 => 0.30102,   // log(2)
+        2 => 0.47712,   // log(3)
+        3 => 0.60205,   // log(4)
+        _ => panic!("invalid number"),
+    }
+}
+
+#[inline]
+fn sum_matches_typos(query_index: &[u32], distance: &[u8]) -> usize {
+    let mut number_words: usize = 0;
+    let mut sum_typos = 0.0;
+    let mut index = 0;
+
+    for group in query_index.linear_group() {
+        sum_typos += custom_log10(distance[index]);
+        number_words += 1;
+        index += group.len();
+    }
+
+    (number_words as f32 / (sum_typos + 1.0) * 1000.0) as usize
+}
+
+#[derive(Debug, Clone, Copy)]
+pub struct SumOfTypos;
+
+impl Criterion for SumOfTypos {
+    fn evaluate(&self, lhs: &RawDocument, rhs: &RawDocument) -> Ordering {
+        let lhs = {
+            let query_index = lhs.query_index();
+            let distance = lhs.distance();
+            sum_matches_typos(query_index, distance)
+        };
+
+        let rhs = {
+            let query_index = rhs.query_index();
+            let distance = rhs.distance();
+            sum_matches_typos(query_index, distance)
+        };
+
+        lhs.cmp(&rhs).reverse()
+    }
+
+    fn name(&self) -> &'static str {
+        "SumOfTypos"
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    // typing: "Geox CEO"
+    //
+    // doc0: "Geox SpA: CEO and Executive"
+    // doc1: "Mt. Gox CEO Resigns From Bitcoin Foundation"
+    #[test]
+    fn one_typo_reference() {
+        let query_index0 = &[0, 1];
+        let distance0 = &[0, 0];
+
+        let query_index1 = &[0, 1];
+        let distance1 = &[1, 0];
+
+        let doc0 = sum_matches_typos(query_index0, distance0);
+        let doc1 = sum_matches_typos(query_index1, distance1);
+        assert_eq!(doc0.cmp(&doc1).reverse(), Ordering::Less);
+    }
+
+    // typing: "bouton manchette"
+    //
+    // doc0: "bouton manchette"
+    // doc1: "bouton"
+    #[test]
+    fn no_typo() {
+        let query_index0 = &[0, 1];
+        let distance0 = &[0, 0];
+
+        let query_index1 = &[0];
+        let distance1 = &[0];
+
+        let doc0 = sum_matches_typos(query_index0, distance0);
+        let doc1 = sum_matches_typos(query_index1, distance1);
+        assert_eq!(doc0.cmp(&doc1).reverse(), Ordering::Less);
+    }
+
+    // typing: "bouton manchztte"
+    //
+    // doc0: "bouton manchette"
+    // doc1: "bouton"
+    #[test]
+    fn one_typo() {
+        let query_index0 = &[0, 1];
+        let distance0 = &[0, 1];
+
+        let query_index1 = &[0];
+        let distance1 = &[0];
+
+        let doc0 = sum_matches_typos(query_index0, distance0);
+        let doc1 = sum_matches_typos(query_index1, distance1);
+        assert_eq!(doc0.cmp(&doc1).reverse(), Ordering::Less);
+    }
+}
--- a/meilidb-core/src/criterion/sum_of_words_attribute.rs
+++ b/meilidb-core/src/criterion/sum_of_words_attribute.rs
@ -0,0 +1,64 @@
+use std::cmp::Ordering;
+use slice_group_by::GroupBy;
+use crate::criterion::Criterion;
+use crate::RawDocument;
+
+#[inline]
+fn sum_matches_attributes(query_index: &[u32], attribute: &[u16]) -> usize {
+    let mut sum_attributes = 0;
+    let mut index = 0;
+
+    for group in query_index.linear_group() {
+        sum_attributes += attribute[index] as usize;
+        index += group.len();
+    }
+
+    sum_attributes
+}
+
+#[derive(Debug, Clone, Copy)]
+pub struct SumOfWordsAttribute;
+
+impl Criterion for SumOfWordsAttribute {
+    fn evaluate(&self, lhs: &RawDocument, rhs: &RawDocument) -> Ordering {
+        let lhs = {
+            let query_index = lhs.query_index();
+            let attribute = lhs.attribute();
+            sum_matches_attributes(query_index, attribute)
+        };
+
+        let rhs = {
+            let query_index = rhs.query_index();
+            let attribute = rhs.attribute();
+            sum_matches_attributes(query_index, attribute)
+        };
+
+        lhs.cmp(&rhs)
+    }
+
+    fn name(&self) -> &'static str {
+        "SumOfWordsAttribute"
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    // typing: "soulier"
+    //
+    // doc0: { 0. "Soulier bleu", 1. "bla bla bla" }
+    // doc1: { 0. "Botte rouge", 1. "Soulier en cuir" }
+    #[test]
+    fn title_vs_description() {
+        let query_index0 = &[0];
+        let attribute0 = &[0];
+
+        let query_index1 = &[0];
+        let attribute1 = &[1];
+
+        let doc0 = sum_matches_attributes(query_index0, attribute0);
+        let doc1 = sum_matches_attributes(query_index1, attribute1);
+        assert_eq!(doc0.cmp(&doc1), Ordering::Less);
+    }
+}
--- a/meilidb-core/src/criterion/sum_of_words_position.rs
+++ b/meilidb-core/src/criterion/sum_of_words_position.rs
@ -0,0 +1,64 @@
+use std::cmp::Ordering;
+use slice_group_by::GroupBy;
+use crate::criterion::Criterion;
+use crate::RawDocument;
+
+#[inline]
+fn sum_matches_attribute_index(query_index: &[u32], word_index: &[u16]) -> usize {
+    let mut sum_word_index = 0;
+    let mut index = 0;
+
+    for group in query_index.linear_group() {
+        sum_word_index += word_index[index] as usize;
+        index += group.len();
+    }
+
+    sum_word_index
+}
+
+#[derive(Debug, Clone, Copy)]
+pub struct SumOfWordsPosition;
+
+impl Criterion for SumOfWordsPosition {
+    fn evaluate(&self, lhs: &RawDocument, rhs: &RawDocument) -> Ordering {
+        let lhs = {
+            let query_index = lhs.query_index();
+            let word_index = lhs.word_index();
+            sum_matches_attribute_index(query_index, word_index)
+        };
+
+        let rhs = {
+            let query_index = rhs.query_index();
+            let word_index = rhs.word_index();
+            sum_matches_attribute_index(query_index, word_index)
+        };
+
+        lhs.cmp(&rhs)
+    }
+
+    fn name(&self) -> &'static str {
+        "SumOfWordsPosition"
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    // typing: "soulier"
+    //
+    // doc0: "Soulier bleu"
+    // doc1: "Botte rouge et soulier noir"
+    #[test]
+    fn easy_case() {
+        let query_index0 = &[0];
+        let word_index0 = &[0];
+
+        let query_index1 = &[0];
+        let word_index1 = &[3];
+
+        let doc0 = sum_matches_attribute_index(query_index0, word_index0);
+        let doc1 = sum_matches_attribute_index(query_index1, word_index1);
+        assert_eq!(doc0.cmp(&doc1), Ordering::Less);
+    }
+}
--- a/meilidb-core/src/criterion/words_proximity.rs
+++ b/meilidb-core/src/criterion/words_proximity.rs
@ -0,0 +1,155 @@
+use std::cmp::{self, Ordering};
+use slice_group_by::GroupBy;
+use crate::criterion::Criterion;
+use crate::RawDocument;
+
+const MAX_DISTANCE: u16 = 8;
+
+#[inline]
+fn clone_tuple<T: Clone, U: Clone>((a, b): (&T, &U)) -> (T, U) {
+    (a.clone(), b.clone())
+}
+
+fn index_proximity(lhs: u16, rhs: u16) -> u16 {
+    if lhs < rhs {
+        cmp::min(rhs - lhs, MAX_DISTANCE)
+    } else {
+        cmp::min(lhs - rhs, MAX_DISTANCE) + 1
+    }
+}
+
+fn attribute_proximity((lattr, lwi): (u16, u16), (rattr, rwi): (u16, u16)) -> u16 {
+    if lattr != rattr { return MAX_DISTANCE }
+    index_proximity(lwi, rwi)
+}
+
+fn min_proximity((lattr, lwi): (&[u16], &[u16]), (rattr, rwi): (&[u16], &[u16])) -> u16 {
+    let mut min_prox = u16::max_value();
+
+    for a in lattr.iter().zip(lwi) {
+        for b in rattr.iter().zip(rwi) {
+            let a = clone_tuple(a);
+            let b = clone_tuple(b);
+            min_prox = cmp::min(min_prox, attribute_proximity(a, b));
+        }
+    }
+
+    min_prox
+}
+
+fn matches_proximity(
+    query_index: &[u32],
+    distance: &[u8],
+    attribute: &[u16],
+    word_index: &[u16],
+) -> u16
+{
+    let mut query_index_groups = query_index.linear_group();
+    let mut proximity = 0;
+    let mut index = 0;
+
+    let get_attr_wi = |index: usize, group_len: usize| {
+        // retrieve the first distance group (with the lowest values)
+        let len = distance[index..index + group_len].linear_group().next().unwrap().len();
+
+        let rattr = &attribute[index..index + len];
+        let rwi = &word_index[index..index + len];
+
+        (rattr, rwi)
+    };
+
+    let mut last = query_index_groups.next().map(|group| {
+        let attr_wi = get_attr_wi(index, group.len());
+        index += group.len();
+        attr_wi
+    });
+
+    // iter by windows of size 2
+    while let (Some(lhs), Some(rhs)) = (last, query_index_groups.next()) {
+        let attr_wi = get_attr_wi(index, rhs.len());
+        proximity += min_proximity(lhs, attr_wi);
+        last = Some(attr_wi);
+        index += rhs.len();
+    }
+
+    proximity
+}
+
+#[derive(Debug, Clone, Copy)]
+pub struct WordsProximity;
+
+impl Criterion for WordsProximity {
+    fn evaluate(&self, lhs: &RawDocument, rhs: &RawDocument) -> Ordering {
+        let lhs = {
+            let query_index = lhs.query_index();
+            let distance = lhs.distance();
+            let attribute = lhs.attribute();
+            let word_index = lhs.word_index();
+            matches_proximity(query_index, distance, attribute, word_index)
+        };
+
+        let rhs = {
+            let query_index = rhs.query_index();
+            let distance = rhs.distance();
+            let attribute = rhs.attribute();
+            let word_index = rhs.word_index();
+            matches_proximity(query_index, distance, attribute, word_index)
+        };
+
+        lhs.cmp(&rhs)
+    }
+
+    fn name(&self) -> &'static str {
+        "WordsProximity"
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn three_different_attributes() {
+
+        // "soup" "of the" "the day"
+        //
+        // { id: 0, attr: 0, attr_index: 0 }
+        // { id: 1, attr: 1, attr_index: 0 }
+        // { id: 2, attr: 1, attr_index: 1 }
+        // { id: 2, attr: 2, attr_index: 0 }
+        // { id: 3, attr: 3, attr_index: 1 }
+
+        let query_index = &[0, 1, 2, 2, 3];
+        let distance    = &[0, 0, 0, 0, 0];
+        let attribute   = &[0, 1, 1, 2, 3];
+        let word_index  = &[0, 0, 1, 0, 1];
+
+        //   soup -> of = 8
+        // + of -> the  = 1
+        // + the -> day = 8 (not 1)
+        assert_eq!(matches_proximity(query_index, distance, attribute, word_index), 17);
+    }
+
+    #[test]
+    fn two_different_attributes() {
+
+        // "soup day" "soup of the day"
+        //
+        // { id: 0, attr: 0, attr_index: 0 }
+        // { id: 0, attr: 1, attr_index: 0 }
+        // { id: 1, attr: 1, attr_index: 1 }
+        // { id: 2, attr: 1, attr_index: 2 }
+        // { id: 3, attr: 0, attr_index: 1 }
+        // { id: 3, attr: 1, attr_index: 3 }
+
+        let query_index = &[0, 0, 1, 2, 3, 3];
+        let distance    = &[0, 0, 0, 0, 0, 0];
+        let attribute   = &[0, 1, 1, 1, 0, 1];
+        let word_index  = &[0, 0, 1, 2, 1, 3];
+
+        //   soup -> of = 1
+        // + of -> the  = 1
+        // + the -> day = 1
+        assert_eq!(matches_proximity(query_index, distance, attribute, word_index), 3);
+    }
+}
--- a/meilidb-core/src/distinct_map.rs
+++ b/meilidb-core/src/distinct_map.rs
@ -1,5 +1,4 @@
 use std::hash::Hash;
-
 use hashbrown::HashMap;

 pub struct DistinctMap<K> {
@ -12,7 +11,7 @@ impl<K: Hash + Eq> DistinctMap<K> {
    pub fn new(limit: usize) -> Self {
        DistinctMap {
            inner: HashMap::new(),
-            limit: limit,
+            limit,
            len: 0,
        }
    }
@ -31,7 +30,7 @@ pub struct BufferedDistinctMap<'a, K> {
 impl<'a, K: Hash + Eq> BufferedDistinctMap<'a, K> {
    pub fn new(internal: &'a mut DistinctMap<K>) -> BufferedDistinctMap<'a, K> {
        BufferedDistinctMap {
-            internal: internal,
+            internal,
            inner: HashMap::new(),
            len: 0,
        }
--- a/meilidb-core/src/lib.rs
+++ b/meilidb-core/src/lib.rs
@ -0,0 +1,144 @@
+#![feature(checked_duration_since)]
+
+#[cfg(test)]
+#[macro_use] extern crate assert_matches;
+
+mod automaton;
+mod distinct_map;
+mod query_builder;
+mod query_enhancer;
+mod raw_document;
+mod reordered_attrs;
+mod store;
+pub mod criterion;
+
+use serde::{Serialize, Deserialize};
+use zerocopy::{AsBytes, FromBytes};
+
+use self::raw_document::raw_documents_from;
+
+pub use self::query_builder::{QueryBuilder, DistinctQueryBuilder, normalize_str};
+pub use self::raw_document::RawDocument;
+pub use self::store::Store;
+
+/// Represent an internally generated document unique identifier.
+///
+/// It is used to inform the database the document you want to deserialize.
+/// Helpful for custom ranking.
+#[derive(Debug, Copy, Clone, Eq, PartialEq, PartialOrd, Ord, Hash)]
+#[derive(Serialize, Deserialize)]
+#[derive(AsBytes, FromBytes)]
+#[repr(C)]
+pub struct DocumentId(pub u64);
+
+/// This structure represent the position of a word
+/// in a document and its attributes.
+///
+/// This is stored in the map, generated at index time,
+/// extracted and interpreted at search time.
+#[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
+#[derive(AsBytes, FromBytes)]
+#[repr(C)]
+pub struct DocIndex {
+    /// The document identifier where the word was found.
+    pub document_id: DocumentId,
+
+    /// The attribute in the document where the word was found
+    /// along with the index in it.
+    pub attribute: u16,
+    pub word_index: u16,
+
+    /// The position in bytes where the word was found
+    /// along with the length of it.
+    ///
+    /// It informs on the original word area in the text indexed
+    /// without needing to run the tokenizer again.
+    pub char_index: u16,
+    pub char_length: u16,
+}
+
+/// This structure represent a matching word with informations
+/// on the location of the word in the document.
+///
+/// The order of the field is important because it defines
+/// the way these structures are ordered between themselves.
+#[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
+pub struct Highlight {
+    /// The attribute in the document where the word was found
+    /// along with the index in it.
+    pub attribute: u16,
+
+    /// The position in bytes where the word was found.
+    ///
+    /// It informs on the original word area in the text indexed
+    /// without needing to run the tokenizer again.
+    pub char_index: u16,
+
+    /// The length in bytes of the found word.
+    ///
+    /// It informs on the original word area in the text indexed
+    /// without needing to run the tokenizer again.
+    pub char_length: u16,
+}
+
+#[doc(hidden)]
+#[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
+pub struct TmpMatch {
+    pub query_index: u32,
+    pub distance: u8,
+    pub attribute: u16,
+    pub word_index: u16,
+    pub is_exact: bool,
+}
+
+#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
+pub struct Document {
+    pub id: DocumentId,
+    pub highlights: Vec<Highlight>,
+
+    #[cfg(test)]
+    pub matches: Vec<TmpMatch>,
+}
+
+impl Document {
+    #[cfg(not(test))]
+    fn from_raw(raw: RawDocument) -> Document {
+        Document { id: raw.id, highlights: raw.highlights }
+    }
+
+    #[cfg(test)]
+    fn from_raw(raw: RawDocument) -> Document {
+        let len = raw.query_index().len();
+        let mut matches = Vec::with_capacity(len);
+
+        let query_index = raw.query_index();
+        let distance = raw.distance();
+        let attribute = raw.attribute();
+        let word_index = raw.word_index();
+        let is_exact = raw.is_exact();
+
+        for i in 0..len {
+            let match_ = TmpMatch {
+                query_index: query_index[i],
+                distance: distance[i],
+                attribute: attribute[i],
+                word_index: word_index[i],
+                is_exact: is_exact[i],
+            };
+            matches.push(match_);
+        }
+
+        Document { id: raw.id, matches, highlights: raw.highlights }
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use std::mem;
+
+    #[test]
+    fn docindex_mem_size() {
+        assert_eq!(mem::size_of::<DocIndex>(), 16);
+    }
+}
--- a/meilidb-core/src/query_builder.rs
+++ b/meilidb-core/src/query_builder.rs
--- a/meilidb-core/src/query_enhancer.rs
+++ b/meilidb-core/src/query_enhancer.rs
@ -0,0 +1,398 @@
+use std::ops::Range;
+use std::cmp::Ordering::{Less, Greater, Equal};
+
+/// Return `true` if the specified range can accept the given replacements words.
+/// Returns `false` if the replacements words are already present in the original query
+/// or if there is fewer replacement words than the range to replace.
+//
+//
+// ## Ignored because already present in original
+//
+//     new york city subway
+//     -------- ^^^^
+//   /          \
+//  [new york city]
+//
+//
+// ## Ignored because smaller than the original
+//
+//   new york city subway
+//   -------------
+//   \          /
+//    [new york]
+//
+//
+// ## Accepted because bigger than the original
+//
+//        NYC subway
+//        ---
+//       /   \
+//      /     \
+//     /       \
+//    /         \
+//   /           \
+//  [new york city]
+//
+fn rewrite_range_with<S, T>(query: &[S], range: Range<usize>, words: &[T]) -> bool
+where S: AsRef<str>,
+      T: AsRef<str>,
+{
+    if words.len() <= range.len() {
+        // there is fewer or equal replacement words
+        // than there is already in the replaced range
+        return false
+    }
+
+    // retrieve the part to rewrite but with the length
+    // of the replacement part
+    let original = query.iter().skip(range.start).take(words.len());
+
+    // check if the original query doesn't already contain
+    // the replacement words
+    !original.map(AsRef::as_ref).eq(words.iter().map(AsRef::as_ref))
+}
+
+type Origin = usize;
+type RealLength = usize;
+
+struct FakeIntervalTree {
+    intervals: Vec<(Range<usize>, (Origin, RealLength))>,
+}
+
+impl FakeIntervalTree {
+    fn new(mut intervals: Vec<(Range<usize>, (Origin, RealLength))>) -> FakeIntervalTree {
+        intervals.sort_unstable_by_key(|(r, _)| (r.start, r.end));
+        FakeIntervalTree { intervals }
+    }
+
+    fn query(&self, point: usize) -> Option<(Range<usize>, (Origin, RealLength))> {
+        let element = self.intervals.binary_search_by(|(r, _)| {
+            if point >= r.start {
+                if point < r.end { Equal } else { Less }
+            } else { Greater }
+        });
+
+        let n = match element { Ok(n) => n, Err(n) => n };
+
+        match self.intervals.get(n) {
+            Some((range, value)) if range.contains(&point) => Some((range.clone(), *value)),
+            _otherwise => None,
+        }
+    }
+}
+
+pub struct QueryEnhancerBuilder<'a, S> {
+    query: &'a [S],
+    origins: Vec<usize>,
+    real_to_origin: Vec<(Range<usize>, (Origin, RealLength))>,
+}
+
+impl<S: AsRef<str>> QueryEnhancerBuilder<'_, S> {
+    pub fn new(query: &[S]) -> QueryEnhancerBuilder<S> {
+        // we initialize origins query indices based on their positions
+        let origins: Vec<_> = (0..query.len() + 1).collect();
+        let real_to_origin = origins.iter().map(|&o| (o..o+1, (o, 1))).collect();
+
+        QueryEnhancerBuilder { query, origins, real_to_origin }
+    }
+
+    /// Update the final real to origin query indices mapping.
+    ///
+    /// `range` is the original words range that this `replacement` words replace
+    /// and `real` is the first real query index of these replacement words.
+    pub fn declare<T>(&mut self, range: Range<usize>, real: usize, replacement: &[T])
+    where T: AsRef<str>,
+    {
+        // check if the range of original words
+        // can be rewritten with the replacement words
+        if rewrite_range_with(self.query, range.clone(), replacement) {
+
+            // this range can be replaced so we need to
+            // modify the origins accordingly
+            let offset = replacement.len() - range.len();
+
+            let previous_padding = self.origins[range.end - 1];
+            let current_offset = (self.origins[range.end] - 1) - previous_padding;
+            let diff = offset.saturating_sub(current_offset);
+            self.origins[range.end] += diff;
+
+            for r in &mut self.origins[range.end + 1..] {
+                *r += diff;
+            }
+        }
+
+        // we need to store the real number and origins relations
+        // this way it will be possible to know by how many
+        // we need to pad real query indices
+        let real_range = real..real + replacement.len().max(range.len());
+        let real_length = replacement.len();
+        self.real_to_origin.push((real_range, (range.start, real_length)));
+    }
+
+    pub fn build(self) -> QueryEnhancer {
+        QueryEnhancer {
+            origins: self.origins,
+            real_to_origin: FakeIntervalTree::new(self.real_to_origin),
+        }
+    }
+}
+
+pub struct QueryEnhancer {
+    origins: Vec<usize>,
+    real_to_origin: FakeIntervalTree,
+}
+
+impl QueryEnhancer {
+    /// Returns the query indices to use to replace this real query index.
+    pub fn replacement(&self, real: u32) -> Range<u32> {
+        let real = real as usize;
+
+        // query the fake interval tree with the real query index
+        let (range, (origin, real_length)) =
+            self.real_to_origin
+                .query(real)
+                .expect("real has never been declared");
+
+        // if `real` is the end bound of the range
+        if (range.start + real_length - 1) == real {
+            let mut count = range.len();
+            let mut new_origin = origin;
+            for (i, slice) in self.origins[new_origin..].windows(2).enumerate() {
+                let len = slice[1] - slice[0];
+                count = count.saturating_sub(len);
+                if count == 0 { new_origin = origin + i; break }
+            }
+
+            let n = real - range.start;
+            let start = self.origins[origin];
+            let end = self.origins[new_origin + 1];
+            let remaining = (end - start) - n;
+
+            Range { start: (start + n) as u32, end: (start + n + remaining) as u32 }
+
+        } else {
+            // just return the origin along with
+            // the real position of the word
+            let n = real as usize - range.start;
+            let origin = self.origins[origin];
+
+            Range { start: (origin + n) as u32, end: (origin + n + 1) as u32 }
+        }
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn original_unmodified() {
+        let query = ["new", "york", "city", "subway"];
+        //             0       1       2        3
+        let mut builder = QueryEnhancerBuilder::new(&query);
+
+        // new york = new york city
+        builder.declare(0..2, 4, &["new", "york", "city"]);
+        //                    ^      4       5       6
+
+        let enhancer = builder.build();
+
+        assert_eq!(enhancer.replacement(0), 0..1); // new
+        assert_eq!(enhancer.replacement(1), 1..2); // york
+        assert_eq!(enhancer.replacement(2), 2..3); // city
+        assert_eq!(enhancer.replacement(3), 3..4); // subway
+        assert_eq!(enhancer.replacement(4), 0..1); // new
+        assert_eq!(enhancer.replacement(5), 1..2); // york
+        assert_eq!(enhancer.replacement(6), 2..3); // city
+    }
+
+    #[test]
+    fn simple_growing() {
+        let query = ["new", "york", "subway"];
+        //             0       1        2
+        let mut builder = QueryEnhancerBuilder::new(&query);
+
+        // new york = new york city
+        builder.declare(0..2, 3, &["new", "york", "city"]);
+        //                    ^      3       4       5
+
+        let enhancer = builder.build();
+
+        assert_eq!(enhancer.replacement(0), 0..1); // new
+        assert_eq!(enhancer.replacement(1), 1..3); // york
+        assert_eq!(enhancer.replacement(2), 3..4); // subway
+        assert_eq!(enhancer.replacement(3), 0..1); // new
+        assert_eq!(enhancer.replacement(4), 1..2); // york
+        assert_eq!(enhancer.replacement(5), 2..3); // city
+    }
+
+    #[test]
+    fn same_place_growings() {
+        let query = ["NY", "subway"];
+        //             0       1
+        let mut builder = QueryEnhancerBuilder::new(&query);
+
+        // NY = new york
+        builder.declare(0..1, 2, &["new", "york"]);
+        //                    ^      2       3
+
+        // NY = new york city
+        builder.declare(0..1, 4, &["new", "york", "city"]);
+        //                    ^      4       5       6
+
+        // NY = NYC
+        builder.declare(0..1, 7, &["NYC"]);
+        //                    ^      7
+
+        // NY = new york city
+        builder.declare(0..1, 8, &["new", "york", "city"]);
+        //                    ^      8       9      10
+
+        // subway = underground train
+        builder.declare(1..2, 11, &["underground", "train"]);
+        //                    ^          11          12
+
+        let enhancer = builder.build();
+
+        assert_eq!(enhancer.replacement(0), 0..3); // NY
+        assert_eq!(enhancer.replacement(1), 3..5); // subway
+        assert_eq!(enhancer.replacement(2), 0..1); // new
+        assert_eq!(enhancer.replacement(3), 1..3); // york
+        assert_eq!(enhancer.replacement(4), 0..1); // new
+        assert_eq!(enhancer.replacement(5), 1..2); // york
+        assert_eq!(enhancer.replacement(6), 2..3); // city
+        assert_eq!(enhancer.replacement(7), 0..3); // NYC
+        assert_eq!(enhancer.replacement(8), 0..1); // new
+        assert_eq!(enhancer.replacement(9), 1..2); // york
+        assert_eq!(enhancer.replacement(10), 2..3); // city
+        assert_eq!(enhancer.replacement(11), 3..4); // underground
+        assert_eq!(enhancer.replacement(12), 4..5); // train
+    }
+
+    #[test]
+    fn bigger_growing() {
+        let query = ["NYC", "subway"];
+        //             0        1
+        let mut builder = QueryEnhancerBuilder::new(&query);
+
+        // NYC = new york city
+        builder.declare(0..1, 2, &["new", "york", "city"]);
+        //                    ^      2       3       4
+
+        let enhancer = builder.build();
+
+        assert_eq!(enhancer.replacement(0), 0..3); // NYC
+        assert_eq!(enhancer.replacement(1), 3..4); // subway
+        assert_eq!(enhancer.replacement(2), 0..1); // new
+        assert_eq!(enhancer.replacement(3), 1..2); // york
+        assert_eq!(enhancer.replacement(4), 2..3); // city
+    }
+
+    #[test]
+    fn middle_query_growing() {
+        let query = ["great", "awesome", "NYC", "subway"];
+        //              0         1        2        3
+        let mut builder = QueryEnhancerBuilder::new(&query);
+
+        // NYC = new york city
+        builder.declare(2..3, 4, &["new", "york", "city"]);
+        //                    ^      4       5       6
+
+        let enhancer = builder.build();
+
+        assert_eq!(enhancer.replacement(0), 0..1); // great
+        assert_eq!(enhancer.replacement(1), 1..2); // awesome
+        assert_eq!(enhancer.replacement(2), 2..5); // NYC
+        assert_eq!(enhancer.replacement(3), 5..6); // subway
+        assert_eq!(enhancer.replacement(4), 2..3); // new
+        assert_eq!(enhancer.replacement(5), 3..4); // york
+        assert_eq!(enhancer.replacement(6), 4..5); // city
+    }
+
+    #[test]
+    fn end_query_growing() {
+        let query = ["NYC", "subway"];
+        //             0        1
+        let mut builder = QueryEnhancerBuilder::new(&query);
+
+        // NYC = new york city
+        builder.declare(1..2, 2, &["underground", "train"]);
+        //                    ^         2            3
+
+        let enhancer = builder.build();
+
+        assert_eq!(enhancer.replacement(0), 0..1); // NYC
+        assert_eq!(enhancer.replacement(1), 1..3); // subway
+        assert_eq!(enhancer.replacement(2), 1..2); // underground
+        assert_eq!(enhancer.replacement(3), 2..3); // train
+    }
+
+    #[test]
+    fn multiple_growings() {
+        let query = ["great", "awesome", "NYC", "subway"];
+        //              0         1        2        3
+        let mut builder = QueryEnhancerBuilder::new(&query);
+
+        // NYC = new york city
+        builder.declare(2..3, 4, &["new", "york", "city"]);
+        //                    ^      4       5       6
+
+        // subway = underground train
+        builder.declare(3..4, 7, &["underground", "train"]);
+        //                    ^          7           8
+
+        let enhancer = builder.build();
+
+        assert_eq!(enhancer.replacement(0), 0..1); // great
+        assert_eq!(enhancer.replacement(1), 1..2); // awesome
+        assert_eq!(enhancer.replacement(2), 2..5); // NYC
+        assert_eq!(enhancer.replacement(3), 5..7); // subway
+        assert_eq!(enhancer.replacement(4), 2..3); // new
+        assert_eq!(enhancer.replacement(5), 3..4); // york
+        assert_eq!(enhancer.replacement(6), 4..5); // city
+        assert_eq!(enhancer.replacement(7), 5..6); // underground
+        assert_eq!(enhancer.replacement(8), 6..7); // train
+    }
+
+    #[test]
+    fn multiple_probable_growings() {
+        let query = ["great", "awesome", "NYC", "subway"];
+        //              0         1        2        3
+        let mut builder = QueryEnhancerBuilder::new(&query);
+
+        // NYC = new york city
+        builder.declare(2..3, 4, &["new", "york", "city"]);
+        //                    ^      4       5       6
+
+        // subway = underground train
+        builder.declare(3..4, 7, &["underground", "train"]);
+        //                    ^          7           8
+
+        // great awesome = good
+        builder.declare(0..2, 9, &["good"]);
+        //                    ^       9
+
+        // awesome NYC = NY
+        builder.declare(1..3, 10, &["NY"]);
+        //                    ^^     10
+
+        // NYC subway = metro
+        builder.declare(2..4, 11, &["metro"]);
+        //                    ^^      11
+
+        let enhancer = builder.build();
+
+        assert_eq!(enhancer.replacement(0),  0..1); // great
+        assert_eq!(enhancer.replacement(1),  1..2); // awesome
+        assert_eq!(enhancer.replacement(2),  2..5); // NYC
+        assert_eq!(enhancer.replacement(3),  5..7); // subway
+        assert_eq!(enhancer.replacement(4),  2..3); // new
+        assert_eq!(enhancer.replacement(5),  3..4); // york
+        assert_eq!(enhancer.replacement(6),  4..5); // city
+        assert_eq!(enhancer.replacement(7),  5..6); // underground
+        assert_eq!(enhancer.replacement(8),  6..7); // train
+        assert_eq!(enhancer.replacement(9),  0..2); // good
+        assert_eq!(enhancer.replacement(10), 1..5); // NY
+        assert_eq!(enhancer.replacement(11), 2..5); // metro
+    }
+}
--- a/meilidb-core/src/raw_document.rs
+++ b/meilidb-core/src/raw_document.rs
@ -0,0 +1,141 @@
+use std::sync::Arc;
+use std::fmt;
+use sdset::SetBuf;
+use slice_group_by::GroupBy;
+use crate::{TmpMatch, DocumentId, Highlight};
+
+#[derive(Clone)]
+pub struct RawDocument {
+    pub id: DocumentId,
+    pub matches: SharedMatches,
+    pub highlights: Vec<Highlight>,
+}
+
+impl RawDocument {
+    fn new(id: DocumentId, matches: SharedMatches, highlights: Vec<Highlight>) -> RawDocument {
+        RawDocument { id, matches, highlights }
+    }
+
+    pub fn query_index(&self) -> &[u32] {
+        let r = self.matches.range;
+        // it is safe because construction/modifications
+        // can only be done in this module
+        unsafe { &self.matches.matches.query_index.get_unchecked(r.start..r.end) }
+    }
+
+    pub fn distance(&self) -> &[u8] {
+        let r = self.matches.range;
+        // it is safe because construction/modifications
+        // can only be done in this module
+        unsafe { &self.matches.matches.distance.get_unchecked(r.start..r.end) }
+    }
+
+    pub fn attribute(&self) -> &[u16] {
+        let r = self.matches.range;
+        // it is safe because construction/modifications
+        // can only be done in this module
+        unsafe { &self.matches.matches.attribute.get_unchecked(r.start..r.end) }
+    }
+
+    pub fn word_index(&self) -> &[u16] {
+        let r = self.matches.range;
+        // it is safe because construction/modifications
+        // can only be done in this module
+        unsafe { &self.matches.matches.word_index.get_unchecked(r.start..r.end) }
+    }
+
+    pub fn is_exact(&self) -> &[bool] {
+        let r = self.matches.range;
+        // it is safe because construction/modifications
+        // can only be done in this module
+        unsafe { &self.matches.matches.is_exact.get_unchecked(r.start..r.end) }
+    }
+}
+
+impl fmt::Debug for RawDocument {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        f.write_str("RawDocument {\r\n")?;
+        f.write_fmt(format_args!("{:>15}: {:?},\r\n",    "id",          self.id))?;
+        f.write_fmt(format_args!("{:>15}: {:^5?},\r\n",  "query_index", self.query_index()))?;
+        f.write_fmt(format_args!("{:>15}: {:^5?},\r\n",  "distance",    self.distance()))?;
+        f.write_fmt(format_args!("{:>15}: {:^5?},\r\n",  "attribute",   self.attribute()))?;
+        f.write_fmt(format_args!("{:>15}: {:^5?},\r\n",  "word_index",  self.word_index()))?;
+        f.write_fmt(format_args!("{:>15}: {:^5?},\r\n", "is_exact",    self.is_exact()))?;
+        f.write_str("}")?;
+        Ok(())
+    }
+}
+
+pub fn raw_documents_from(
+    matches: SetBuf<(DocumentId, TmpMatch)>,
+    highlights: SetBuf<(DocumentId, Highlight)>,
+) -> Vec<RawDocument>
+{
+    let mut docs_ranges: Vec<(_, Range, _)> = Vec::new();
+    let mut matches2 = Matches::with_capacity(matches.len());
+
+    let matches = matches.linear_group_by_key(|(id, _)| *id);
+    let highlights = highlights.linear_group_by_key(|(id, _)| *id);
+
+    for (mgroup, hgroup) in matches.zip(highlights) {
+        debug_assert_eq!(mgroup[0].0, hgroup[0].0);
+
+        let document_id = mgroup[0].0;
+        let start = docs_ranges.last().map(|(_, r, _)| r.end).unwrap_or(0);
+        let end = start + mgroup.len();
+
+        let highlights = hgroup.iter().map(|(_, h)| *h).collect();
+        docs_ranges.push((document_id, Range { start, end }, highlights));
+
+        matches2.extend_from_slice(mgroup);
+    }
+
+    let matches = Arc::new(matches2);
+    docs_ranges.into_iter().map(|(id, range, highlights)| {
+        let matches = SharedMatches { range, matches: matches.clone() };
+        RawDocument::new(id, matches, highlights)
+    }).collect()
+}
+
+#[derive(Debug, Copy, Clone)]
+struct Range {
+    start: usize,
+    end: usize,
+}
+
+#[derive(Clone)]
+pub struct SharedMatches {
+    range: Range,
+    matches: Arc<Matches>,
+}
+
+#[derive(Clone)]
+struct Matches {
+    query_index: Vec<u32>,
+    distance: Vec<u8>,
+    attribute: Vec<u16>,
+    word_index: Vec<u16>,
+    is_exact: Vec<bool>,
+}
+
+impl Matches {
+    fn with_capacity(cap: usize) -> Matches {
+        Matches {
+            query_index: Vec::with_capacity(cap),
+            distance: Vec::with_capacity(cap),
+            attribute: Vec::with_capacity(cap),
+            word_index: Vec::with_capacity(cap),
+            is_exact: Vec::with_capacity(cap),
+        }
+    }
+
+    fn extend_from_slice(&mut self, matches: &[(DocumentId, TmpMatch)]) {
+        for (_, match_) in matches {
+            self.query_index.push(match_.query_index);
+            self.distance.push(match_.distance);
+            self.attribute.push(match_.attribute);
+            self.word_index.push(match_.word_index);
+            self.is_exact.push(match_.is_exact);
+        }
+    }
+}
--- a/meilidb-core/src/reordered_attrs.rs
+++ b/meilidb-core/src/reordered_attrs.rs
@ -0,0 +1,24 @@
+#[derive(Default, Clone)]
+pub struct ReorderedAttrs {
+    count: usize,
+    reorders: Vec<Option<u16>>,
+}
+
+impl ReorderedAttrs {
+    pub fn new() -> ReorderedAttrs {
+        ReorderedAttrs { count: 0, reorders: Vec::new() }
+    }
+
+    pub fn insert_attribute(&mut self, attribute: u16) {
+        self.reorders.resize(attribute as usize + 1, None);
+        self.reorders[attribute as usize] = Some(self.count as u16);
+        self.count += 1;
+    }
+
+    pub fn get(&self, attribute: u16) -> Option<u16> {
+        match self.reorders.get(attribute as usize) {
+            Some(Some(attribute)) => Some(*attribute),
+            _ => None,
+        }
+    }
+}
--- a/meilidb-core/src/store.rs
+++ b/meilidb-core/src/store.rs
@ -0,0 +1,34 @@
+use std::error::Error;
+use fst::Set;
+use sdset::SetBuf;
+use crate::DocIndex;
+
+pub trait Store {
+    type Error: Error;
+
+    fn words(&self) -> Result<&Set, Self::Error>;
+    fn word_indexes(&self, word: &[u8]) -> Result<Option<SetBuf<DocIndex>>, Self::Error>;
+
+    fn synonyms(&self) -> Result<&Set, Self::Error>;
+    fn alternatives_to(&self, word: &[u8]) -> Result<Option<Set>, Self::Error>;
+}
+
+impl<T> Store for &'_ T where T: Store {
+    type Error = T::Error;
+
+    fn words(&self) -> Result<&Set, Self::Error> {
+        (*self).words()
+    }
+
+    fn word_indexes(&self, word: &[u8]) -> Result<Option<SetBuf<DocIndex>>, Self::Error> {
+        (*self).word_indexes(word)
+    }
+
+    fn synonyms(&self) -> Result<&Set, Self::Error> {
+        (*self).synonyms()
+    }
+
+    fn alternatives_to(&self, word: &[u8]) -> Result<Option<Set>, Self::Error> {
+        (*self).alternatives_to(word)
+    }
+}
--- a/meilidb-data/Cargo.toml
+++ b/meilidb-data/Cargo.toml
@ -0,0 +1,39 @@
+[package]
+name = "meilidb-data"
+version = "0.1.0"
+authors = ["Kerollmops <renault.cle@gmail.com>"]
+edition = "2018"
+
+[dependencies]
+arc-swap = "0.4.2"
+bincode = "1.1.4"
+crossbeam-channel = "0.3.9"
+deunicode = "1.0.0"
+hashbrown = { version = "0.6.0", features = ["serde"] }
+log = "0.4.6"
+meilidb-core = { path = "../meilidb-core", version = "0.1.0" }
+meilidb-schema = { path = "../meilidb-schema", version = "0.1.0" }
+meilidb-tokenizer = { path = "../meilidb-tokenizer", version = "0.1.0" }
+ordered-float = { version = "1.0.2", features = ["serde"] }
+rocksdb = "0.12.3"
+sdset = "0.3.2"
+serde = { version = "1.0.99", features = ["derive"] }
+serde_json = "1.0.40"
+siphasher = "0.3.0"
+zerocopy = "0.2.8"
+
+[dependencies.rmp-serde]
+git = "https://github.com/3Hren/msgpack-rust.git"
+rev = "40b3d48"
+
+[dependencies.rmpv]
+git = "https://github.com/3Hren/msgpack-rust.git"
+rev = "40b3d48"
+features = ["with-serde"]
+
+[dependencies.fst]
+git = "https://github.com/Kerollmops/fst.git"
+branch = "arc-byte-slice"
+
+[dev-dependencies]
+tempfile = "3.1.0"
--- a/meilidb-data/src/cf_tree.rs
+++ b/meilidb-data/src/cf_tree.rs
@ -0,0 +1,113 @@
+use std::sync::Arc;
+use crossbeam_channel::{unbounded, Sender, Receiver};
+use rocksdb::{DBVector, IteratorMode, Direction};
+use crate::RocksDbResult;
+
+#[derive(Clone)]
+pub struct CfTree {
+    index: Arc<CfTreeInner>,
+    sender: Option<Sender<()>>,
+}
+
+struct CfTreeInner {
+    db: Arc<rocksdb::DB>,
+    name: String,
+}
+
+impl CfTree {
+    pub fn create(db: Arc<rocksdb::DB>, name: String) -> RocksDbResult<CfTree> {
+        let mut options = rocksdb::Options::default();
+        options.create_missing_column_families(true);
+
+        let _cf = db.create_cf(&name, &options)?;
+        let index = Arc::new(CfTreeInner { db, name });
+
+        Ok(CfTree { index, sender: None })
+    }
+
+    pub fn create_with_subcription(
+        db: Arc<rocksdb::DB>,
+        name: String,
+    ) -> RocksDbResult<(CfTree, Receiver<()>)>
+    {
+        let mut options = rocksdb::Options::default();
+        options.create_missing_column_families(true);
+
+        let _cf = db.create_cf(&name, &options)?;
+        let index = Arc::new(CfTreeInner { db, name });
+        let (sender, receiver) = unbounded();
+
+        Ok((CfTree { index, sender: Some(sender) }, receiver))
+    }
+
+    pub fn insert<K, V>(&self, key: K, value: V) -> RocksDbResult<()>
+    where K: AsRef<[u8]>,
+          V: AsRef<[u8]>,
+    {
+        let cf = self.index.db.cf_handle(&self.index.name).unwrap();
+        let result = self.index.db.put_cf(cf, key, value);
+
+        if let Some(sender) = &self.sender {
+            let _err = sender.send(());
+        }
+
+        result
+    }
+
+    pub fn get<K>(&self, key: K) -> RocksDbResult<Option<DBVector>>
+    where K: AsRef<[u8]>,
+    {
+        let cf = self.index.db.cf_handle(&self.index.name).unwrap();
+        self.index.db.get_cf(cf, key)
+    }
+
+    pub fn remove<K>(&self, key: K) -> RocksDbResult<()>
+    where K: AsRef<[u8]>
+    {
+        let cf = self.index.db.cf_handle(&self.index.name).unwrap();
+        self.index.db.delete_cf(cf, key)
+    }
+
+    /// Start and end key range is inclusive on both bounds.
+    pub fn range<KS, KE>(&self, start: KS, end: KE) -> RocksDbResult<CfIter>
+    where KS: AsRef<[u8]>,
+          KE: AsRef<[u8]>,
+    {
+        let cf = self.index.db.cf_handle(&self.index.name).unwrap();
+
+        let mut iter = self.index.db.iterator_cf(cf, IteratorMode::Start)?;
+        iter.set_mode(IteratorMode::From(start.as_ref(), Direction::Forward));
+
+        let end_bound = Box::from(end.as_ref());
+        Ok(CfIter { iter, end_bound: Some(end_bound) })
+    }
+
+    pub fn iter(&self) -> RocksDbResult<CfIter> {
+        let cf = self.index.db.cf_handle(&self.index.name).unwrap();
+        let iter = self.index.db.iterator_cf(cf, IteratorMode::Start)?;
+        Ok(CfIter { iter, end_bound: None })
+    }
+
+    pub fn last_key(&self) -> RocksDbResult<Option<Box<[u8]>>> {
+        let cf = self.index.db.cf_handle(&self.index.name).unwrap();
+        let mut iter = self.index.db.iterator_cf(cf, IteratorMode::End)?;
+        Ok(iter.next().map(|(key, _)| key))
+    }
+}
+
+pub struct CfIter<'a> {
+    iter: rocksdb::DBIterator<'a>,
+    end_bound: Option<Box<[u8]>>,
+}
+
+impl Iterator for CfIter<'_> {
+    type Item = (Box<[u8]>, Box<[u8]>);
+
+    fn next(&mut self) -> Option<Self::Item> {
+        match (self.iter.next(), &self.end_bound) {
+            (Some((ref key, _)), Some(end_bound)) if key > end_bound => None,
+            (Some(entry), _) => Some(entry),
+            (None, _) => None,
+        }
+    }
+}
--- a/meilidb-data/src/database/error.rs
+++ b/meilidb-data/src/database/error.rs
@ -0,0 +1,73 @@
+use std::{error, fmt};
+use crate::serde::SerializerError;
+
+#[derive(Debug)]
+pub enum Error {
+    SchemaDiffer,
+    SchemaMissing,
+    WordIndexMissing,
+    MissingDocumentId,
+    RocksDbError(rocksdb::Error),
+    FstError(fst::Error),
+    RmpDecodeError(rmp_serde::decode::Error),
+    RmpEncodeError(rmp_serde::encode::Error),
+    BincodeError(bincode::Error),
+    SerializerError(SerializerError),
+}
+
+impl From<rocksdb::Error> for Error {
+    fn from(error: rocksdb::Error) -> Error {
+        Error::RocksDbError(error)
+    }
+}
+
+impl From<fst::Error> for Error {
+    fn from(error: fst::Error) -> Error {
+        Error::FstError(error)
+    }
+}
+
+impl From<rmp_serde::decode::Error> for Error {
+    fn from(error: rmp_serde::decode::Error) -> Error {
+        Error::RmpDecodeError(error)
+    }
+}
+
+impl From<rmp_serde::encode::Error> for Error {
+    fn from(error: rmp_serde::encode::Error) -> Error {
+        Error::RmpEncodeError(error)
+    }
+}
+
+impl From<bincode::Error> for Error {
+    fn from(error: bincode::Error) -> Error {
+        Error::BincodeError(error)
+    }
+}
+
+impl From<SerializerError> for Error {
+    fn from(error: SerializerError) -> Error {
+        Error::SerializerError(error)
+    }
+}
+
+impl fmt::Display for Error {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        use self::Error::*;
+        match self {
+            SchemaDiffer => write!(f, "schemas differ"),
+            SchemaMissing => write!(f, "this index does not have a schema"),
+            WordIndexMissing => write!(f, "this index does not have a word index"),
+            MissingDocumentId => write!(f, "document id is missing"),
+            RocksDbError(e) => write!(f, "RocksDB error; {}", e),
+            FstError(e) => write!(f, "fst error; {}", e),
+            RmpDecodeError(e) => write!(f, "rmp decode error; {}", e),
+            RmpEncodeError(e) => write!(f, "rmp encode error; {}", e),
+            BincodeError(e) => write!(f, "bincode error; {}", e),
+            SerializerError(e) => write!(f, "serializer error; {}", e),
+        }
+    }
+}
+
+impl error::Error for Error { }
+
--- a/meilidb-data/src/database/index/custom_settings_index.rs
+++ b/meilidb-data/src/database/index/custom_settings_index.rs
@ -0,0 +1,12 @@
+use std::ops::Deref;
+
+#[derive(Clone)]
+pub struct CustomSettingsIndex(pub(crate) crate::CfTree);
+
+impl Deref for CustomSettingsIndex {
+    type Target = crate::CfTree;
+
+    fn deref(&self) -> &Self::Target {
+        &self.0
+    }
+}
--- a/meilidb-data/src/database/index/docs_words_index.rs
+++ b/meilidb-data/src/database/index/docs_words_index.rs
@ -0,0 +1,33 @@
+use std::sync::Arc;
+use meilidb_core::DocumentId;
+use crate::database::Error;
+
+#[derive(Clone)]
+pub struct DocsWordsIndex(pub crate::CfTree);
+
+impl DocsWordsIndex {
+    pub fn doc_words(&self, id: DocumentId) -> Result<Option<fst::Set>, Error> {
+        let key = id.0.to_be_bytes();
+        match self.0.get(key)? {
+            Some(bytes) => {
+                let len = bytes.len();
+                let value = Arc::from(bytes.as_ref());
+                let fst = fst::raw::Fst::from_shared_bytes(value, 0, len)?;
+                Ok(Some(fst::Set::from(fst)))
+            },
+            None => Ok(None)
+        }
+    }
+
+    pub fn set_doc_words(&self, id: DocumentId, words: &fst::Set) -> Result<(), Error> {
+        let key = id.0.to_be_bytes();
+        self.0.insert(key, words.as_fst().as_bytes())?;
+        Ok(())
+    }
+
+    pub fn del_doc_words(&self, id: DocumentId) -> Result<(), Error> {
+        let key = id.0.to_be_bytes();
+        self.0.remove(key)?;
+        Ok(())
+    }
+}
--- a/meilidb-data/src/database/index/documents_index.rs
+++ b/meilidb-data/src/database/index/documents_index.rs
@ -0,0 +1,90 @@
+use std::convert::TryInto;
+
+use meilidb_core::DocumentId;
+use meilidb_schema::SchemaAttr;
+use rocksdb::DBVector;
+
+use crate::document_attr_key::DocumentAttrKey;
+use crate::RocksDbResult;
+
+fn document_fields_range(id: DocumentId) -> ([u8; 10], [u8; 10]) {
+    let start = DocumentAttrKey::new(id, SchemaAttr::min()).to_be_bytes();
+    let end   = DocumentAttrKey::new(id, SchemaAttr::max()).to_be_bytes();
+
+    (start, end)
+}
+
+#[derive(Clone)]
+pub struct DocumentsIndex(pub(crate) crate::CfTree);
+
+impl DocumentsIndex {
+    pub fn document_field(&self, id: DocumentId, attr: SchemaAttr) -> RocksDbResult<Option<DBVector>> {
+        let key = DocumentAttrKey::new(id, attr).to_be_bytes();
+        self.0.get(key)
+    }
+
+    pub fn set_document_field(&self, id: DocumentId, attr: SchemaAttr, value: Vec<u8>) -> RocksDbResult<()> {
+        let key = DocumentAttrKey::new(id, attr).to_be_bytes();
+        self.0.insert(key, value)?;
+        Ok(())
+    }
+
+    pub fn del_document_field(&self, id: DocumentId, attr: SchemaAttr) -> RocksDbResult<()> {
+        let key = DocumentAttrKey::new(id, attr).to_be_bytes();
+        self.0.remove(key)?;
+        Ok(())
+    }
+
+    pub fn del_all_document_fields(&self, id: DocumentId) -> RocksDbResult<usize> {
+        let (start, end) = document_fields_range(id);
+
+        let mut count = 0;
+        for (key, _) in self.0.range(start, end)? {
+            self.0.remove(key)?;
+            count += 1;
+        }
+
+        Ok(count)
+    }
+
+    pub fn document_fields(&self, id: DocumentId) -> RocksDbResult<DocumentFieldsIter> {
+        let (start, end) = document_fields_range(id);
+
+        let iter = self.0.range(start, end)?;
+        Ok(DocumentFieldsIter(iter))
+    }
+
+    pub fn len(&self) -> RocksDbResult<u64> {
+        let mut last_document_id = None;
+        let mut count = 0;
+
+        for (key, _) in self.0.iter()? {
+            let array = key.as_ref().try_into().unwrap();
+            let document_id = DocumentAttrKey::from_be_bytes(array).document_id;
+
+            if Some(document_id) != last_document_id {
+                last_document_id = Some(document_id);
+                count += 1;
+            }
+        }
+
+        Ok(count)
+    }
+}
+
+pub struct DocumentFieldsIter<'a>(crate::CfIter<'a>);
+
+impl Iterator for DocumentFieldsIter<'_> {
+    type Item = (SchemaAttr, Box<[u8]>);
+
+    fn next(&mut self) -> Option<Self::Item> {
+        match self.0.next() {
+            Some((key, value)) => {
+                let array = key.as_ref().try_into().unwrap();
+                let key = DocumentAttrKey::from_be_bytes(array);
+                Some((key.attribute, value))
+            },
+            None => None,
+        }
+    }
+}
--- a/meilidb-data/src/database/index/main_index.rs
+++ b/meilidb-data/src/database/index/main_index.rs
@ -0,0 +1,102 @@
+use std::sync::Arc;
+use std::convert::TryInto;
+
+use meilidb_schema::Schema;
+
+use crate::ranked_map::RankedMap;
+use crate::database::Error;
+
+const SCHEMA_KEY:              &str = "schema";
+const WORDS_KEY:               &str = "words";
+const SYNONYMS_KEY:            &str = "synonyms";
+const RANKED_MAP_KEY:          &str = "ranked-map";
+const NUMBER_OF_DOCUMENTS_KEY: &str = "number-of-documents";
+
+#[derive(Clone)]
+pub struct MainIndex(pub(crate) crate::CfTree);
+
+impl MainIndex {
+    pub fn schema(&self) -> Result<Option<Schema>, Error> {
+        match self.0.get(SCHEMA_KEY)? {
+            Some(bytes) => {
+                let schema = Schema::read_from_bin(bytes.as_ref())?;
+                Ok(Some(schema))
+            },
+            None => Ok(None),
+        }
+    }
+
+    pub fn set_schema(&self, schema: &Schema) -> Result<(), Error> {
+        let mut bytes = Vec::new();
+        schema.write_to_bin(&mut bytes)?;
+        self.0.insert(SCHEMA_KEY, bytes)?;
+        Ok(())
+    }
+
+    pub fn words_set(&self) -> Result<Option<fst::Set>, Error> {
+        match self.0.get(WORDS_KEY)? {
+            Some(bytes) => {
+                let len = bytes.len();
+                let value = Arc::from(bytes.as_ref());
+                let fst = fst::raw::Fst::from_shared_bytes(value, 0, len)?;
+                Ok(Some(fst::Set::from(fst)))
+            },
+            None => Ok(None),
+        }
+    }
+
+    pub fn set_words_set(&self, value: &fst::Set) -> Result<(), Error> {
+        self.0.insert(WORDS_KEY, value.as_fst().as_bytes()).map(drop).map_err(Into::into)
+    }
+
+    pub fn synonyms_set(&self) -> Result<Option<fst::Set>, Error> {
+        match self.0.get(SYNONYMS_KEY)? {
+            Some(bytes) => {
+                let len = bytes.len();
+                let value = Arc::from(bytes.as_ref());
+                let fst = fst::raw::Fst::from_shared_bytes(value, 0, len)?;
+                Ok(Some(fst::Set::from(fst)))
+            },
+            None => Ok(None),
+        }
+    }
+
+    pub fn set_synonyms_set(&self, value: &fst::Set) -> Result<(), Error> {
+        self.0.insert(SYNONYMS_KEY, value.as_fst().as_bytes()).map(drop).map_err(Into::into)
+    }
+
+    pub fn ranked_map(&self) -> Result<Option<RankedMap>, Error> {
+        match self.0.get(RANKED_MAP_KEY)? {
+            Some(bytes) => {
+                let ranked_map = RankedMap::read_from_bin(bytes.as_ref())?;
+                Ok(Some(ranked_map))
+            },
+            None => Ok(None),
+        }
+    }
+
+    pub fn set_ranked_map(&self, value: &RankedMap) -> Result<(), Error> {
+        let mut bytes = Vec::new();
+        value.write_to_bin(&mut bytes)?;
+        self.0.insert(RANKED_MAP_KEY, bytes)?;
+        Ok(())
+    }
+
+    pub fn number_of_documents(&self) -> Result<u64, Error> {
+        match self.0.get(NUMBER_OF_DOCUMENTS_KEY)? {
+            Some(bytes) => {
+                let array = (*bytes).try_into().unwrap();
+                Ok(u64::from_be_bytes(array))
+            },
+            None => Ok(0),
+        }
+    }
+
+    pub fn set_number_of_documents<F>(&self, f: F) -> Result<u64, Error>
+    where F: FnOnce(u64) -> u64,
+    {
+        let new = self.number_of_documents().map(f)?;
+        self.0.insert(NUMBER_OF_DOCUMENTS_KEY, new.to_be_bytes())?;
+        Ok(new)
+    }
+}
--- a/meilidb-data/src/database/index/mod.rs
+++ b/meilidb-data/src/database/index/mod.rs
@ -0,0 +1,487 @@
+use std::collections::{HashSet, BTreeMap};
+use std::convert::TryInto;
+use std::sync::Arc;
+use std::sync::atomic::{AtomicU64, Ordering};
+use std::thread;
+use std::time::{Duration, Instant};
+
+use arc_swap::{ArcSwap, ArcSwapOption, Guard};
+use crossbeam_channel::Receiver;
+use meilidb_core::criterion::Criteria;
+use meilidb_core::{DocIndex, Store, DocumentId, QueryBuilder};
+use meilidb_schema::Schema;
+use sdset::SetBuf;
+use serde::{de, Serialize, Deserialize};
+
+use crate::CfTree;
+use crate::ranked_map::RankedMap;
+use crate::serde::{Deserializer, DeserializerError};
+
+pub use self::custom_settings_index::CustomSettingsIndex;
+use self::docs_words_index::DocsWordsIndex;
+use self::documents_index::DocumentsIndex;
+use self::main_index::MainIndex;
+use self::synonyms_index::SynonymsIndex;
+use self::words_index::WordsIndex;
+
+use crate::RocksDbResult;
+use crate::database::{
+    Error,
+    DocumentsAddition, DocumentsDeletion,
+    SynonymsAddition, SynonymsDeletion,
+    apply_documents_addition, apply_documents_deletion,
+    apply_synonyms_addition, apply_synonyms_deletion,
+};
+
+mod custom_settings_index;
+mod docs_words_index;
+mod documents_index;
+mod main_index;
+mod synonyms_index;
+mod words_index;
+
+#[derive(Deserialize)]
+enum UpdateOwned {
+    DocumentsAddition(Vec<rmpv::Value>),
+    DocumentsDeletion(Vec<DocumentId>),
+    SynonymsAddition(BTreeMap<String, Vec<String>>),
+    SynonymsDeletion(BTreeMap<String, Option<Vec<String>>>),
+}
+
+#[derive(Serialize)]
+enum Update {
+    DocumentsAddition(Vec<rmpv::Value>),
+    DocumentsDeletion(Vec<DocumentId>),
+    SynonymsAddition(BTreeMap<String, Vec<String>>),
+    SynonymsDeletion(BTreeMap<String, Option<Vec<String>>>),
+}
+
+#[derive(Clone, Serialize, Deserialize)]
+pub enum UpdateType {
+    DocumentsAddition { number: usize },
+    DocumentsDeletion { number: usize },
+    SynonymsAddition { number: usize },
+    SynonymsDeletion { number: usize },
+}
+
+#[derive(Clone, Serialize, Deserialize)]
+pub struct DetailedDuration {
+    main: Duration,
+}
+
+#[derive(Clone, Serialize, Deserialize)]
+pub struct UpdateStatus {
+    pub update_id: u64,
+    pub update_type: UpdateType,
+    pub result: Result<(), String>,
+    pub detailed_duration: DetailedDuration,
+}
+
+fn spawn_update_system(index: Index, subscription: Receiver<()>) -> thread::JoinHandle<()> {
+    thread::spawn(move || {
+        let mut subscription = subscription.into_iter();
+
+        loop {
+            while let Some((key, _)) = index.updates_index.iter().unwrap().next() {
+                let update_id = key.as_ref().try_into().map(u64::from_be_bytes).unwrap();
+
+                let updates = &index.updates_index;
+                let results = &index.updates_results_index;
+
+                let update = updates.get(&key).unwrap().unwrap();
+
+                let (update_type, result, duration) = match rmp_serde::from_read_ref(&update).unwrap() {
+                    UpdateOwned::DocumentsAddition(documents) => {
+                        let update_type = UpdateType::DocumentsAddition { number: documents.len() };
+                        let ranked_map = index.cache.load().ranked_map.clone();
+                        let start = Instant::now();
+                        let result = apply_documents_addition(&index, ranked_map, documents);
+                        (update_type, result, start.elapsed())
+                    },
+                    UpdateOwned::DocumentsDeletion(documents) => {
+                        let update_type = UpdateType::DocumentsDeletion { number: documents.len() };
+                        let ranked_map = index.cache.load().ranked_map.clone();
+                        let start = Instant::now();
+                        let result = apply_documents_deletion(&index, ranked_map, documents);
+                        (update_type, result, start.elapsed())
+                    },
+                    UpdateOwned::SynonymsAddition(synonyms) => {
+                        let update_type = UpdateType::SynonymsAddition { number: synonyms.len() };
+                        let start = Instant::now();
+                        let result = apply_synonyms_addition(&index, synonyms);
+                        (update_type, result, start.elapsed())
+                    },
+                    UpdateOwned::SynonymsDeletion(synonyms) => {
+                        let update_type = UpdateType::SynonymsDeletion { number: synonyms.len() };
+                        let start = Instant::now();
+                        let result = apply_synonyms_deletion(&index, synonyms);
+                        (update_type, result, start.elapsed())
+                    },
+                };
+
+                let detailed_duration = DetailedDuration { main: duration };
+                let status = UpdateStatus {
+                    update_id,
+                    update_type,
+                    result: result.map_err(|e| e.to_string()),
+                    detailed_duration,
+                };
+
+                if let Some(callback) = &*index.update_callback.load() {
+                    (callback)(status.clone());
+                }
+
+                let value = bincode::serialize(&status).unwrap();
+                results.insert(&key, value).unwrap();
+                updates.remove(&key).unwrap();
+            }
+
+            // this subscription is just used to block
+            // the loop until a new update is inserted
+            subscription.next();
+        }
+    })
+}
+
+fn last_update_id(
+    update_index: &crate::CfTree,
+    update_results_index: &crate::CfTree,
+) -> RocksDbResult<u64>
+{
+    let uikey = match update_index.last_key()? {
+        Some(key) => Some(key.as_ref().try_into().map(u64::from_be_bytes).unwrap()),
+        None => None,
+    };
+
+    let urikey = match update_results_index.last_key()? {
+        Some(key) => Some(key.as_ref().try_into().map(u64::from_be_bytes).unwrap()),
+        None => None,
+    };
+
+    Ok(uikey.max(urikey).unwrap_or(0))
+}
+
+#[derive(Copy, Clone)]
+pub struct IndexStats {
+    pub number_of_words: usize,
+    pub number_of_documents: u64,
+    pub number_attrs_in_ranked_map: usize,
+}
+
+#[derive(Clone)]
+pub struct Index {
+    pub(crate) cache: Arc<ArcSwap<Cache>>,
+
+    // TODO this will be a snapshot in the future
+    main_index: MainIndex,
+    synonyms_index: SynonymsIndex,
+    words_index: WordsIndex,
+    docs_words_index: DocsWordsIndex,
+    documents_index: DocumentsIndex,
+    custom_settings_index: CustomSettingsIndex,
+
+    // used by the update system
+    updates_id: Arc<AtomicU64>,
+    updates_index: crate::CfTree,
+    updates_results_index: crate::CfTree,
+    update_callback: Arc<ArcSwapOption<Box<dyn Fn(UpdateStatus) + Send + Sync + 'static>>>,
+}
+
+pub(crate) struct Cache {
+    pub words: Arc<fst::Set>,
+    pub synonyms: Arc<fst::Set>,
+    pub schema: Schema,
+    pub ranked_map: RankedMap,
+    pub number_of_documents: u64,
+}
+
+impl Index {
+    pub fn new(db: Arc<rocksdb::DB>, name: &str) -> Result<Index, Error> {
+        Index::new_raw(db, name, None)
+    }
+
+    pub fn with_schema(db: Arc<rocksdb::DB>, name: &str, schema: Schema) -> Result<Index, Error> {
+        Index::new_raw(db, name, Some(schema))
+    }
+
+    fn new_raw(db: Arc<rocksdb::DB>, name: &str, schema: Option<Schema>) -> Result<Index, Error> {
+        let main_index = CfTree::create(db.clone(), name.to_string()).map(MainIndex)?;
+        let synonyms_index = CfTree::create(db.clone(), format!("{}-synonyms", name)).map(SynonymsIndex)?;
+        let words_index = CfTree::create(db.clone(), format!("{}-words", name)).map(WordsIndex)?;
+        let docs_words_index = CfTree::create(db.clone(), format!("{}-docs-words", name)).map(DocsWordsIndex)?;
+        let documents_index = CfTree::create(db.clone(), format!("{}-documents", name)).map(DocumentsIndex)?;
+        let custom_settings_index = CfTree::create(db.clone(), format!("{}-custom", name)).map(CustomSettingsIndex)?;
+        let (updates_index, subscription) = CfTree::create_with_subcription(db.clone(), format!("{}-updates", name))?;
+        let updates_results_index = CfTree::create(db.clone(), format!("{}-updates-results", name))?;
+
+        let words = match main_index.words_set()? {
+            Some(words) => Arc::new(words),
+            None => Arc::new(fst::Set::default()),
+        };
+
+        let synonyms = match main_index.synonyms_set()? {
+            Some(synonyms) => Arc::new(synonyms),
+            None => Arc::new(fst::Set::default()),
+        };
+
+        let schema = match (schema, main_index.schema()?) {
+            (Some(ref expected), Some(ref current)) if current != expected => {
+                return Err(Error::SchemaDiffer)
+            },
+            (Some(expected), Some(_)) => expected,
+            (Some(expected), None) => {
+                main_index.set_schema(&expected)?;
+                expected
+            },
+            (None, Some(current)) => current,
+            (None, None) => return Err(Error::SchemaMissing),
+        };
+
+        let ranked_map = match main_index.ranked_map()? {
+            Some(map) => map,
+            None => RankedMap::default(),
+        };
+
+        let number_of_documents = documents_index.len()?;
+
+        let cache = Cache { words, synonyms, schema, ranked_map, number_of_documents };
+        let cache = Arc::new(ArcSwap::from_pointee(cache));
+
+        let last_update_id = last_update_id(&updates_index, &updates_results_index)?;
+        let updates_id = Arc::new(AtomicU64::new(last_update_id + 1));
+
+        let index = Index {
+            cache,
+            main_index,
+            synonyms_index,
+            words_index,
+            docs_words_index,
+            documents_index,
+            custom_settings_index,
+            updates_id,
+            updates_index,
+            updates_results_index,
+            update_callback: Arc::new(ArcSwapOption::empty()),
+        };
+
+        let _handle = spawn_update_system(index.clone(), subscription);
+
+        Ok(index)
+    }
+
+    pub fn set_update_callback<F>(&self, callback: F)
+    where F: Fn(UpdateStatus) + Send + Sync + 'static
+    {
+        self.update_callback.store(Some(Arc::new(Box::new(callback))));
+    }
+
+    pub fn unset_update_callback(&self) {
+        self.update_callback.store(None);
+    }
+
+    pub fn stats(&self) -> RocksDbResult<IndexStats> {
+        let cache = self.cache.load();
+        Ok(IndexStats {
+            number_of_words: cache.words.len(),
+            number_of_documents: cache.number_of_documents,
+            number_attrs_in_ranked_map: cache.ranked_map.len(),
+        })
+    }
+
+    pub fn query_builder(&self) -> QueryBuilder<RefIndex> {
+        let ref_index = self.as_ref();
+        QueryBuilder::new(ref_index)
+    }
+
+    pub fn query_builder_with_criteria<'c>(
+        &self,
+        criteria: Criteria<'c>,
+    ) -> QueryBuilder<'c, RefIndex>
+    {
+        let ref_index = self.as_ref();
+        QueryBuilder::with_criteria(ref_index, criteria)
+    }
+
+    pub fn as_ref(&self) -> RefIndex {
+        RefIndex {
+            cache: self.cache.load(),
+            main_index: &self.main_index,
+            synonyms_index: &self.synonyms_index,
+            words_index: &self.words_index,
+            docs_words_index: &self.docs_words_index,
+            documents_index: &self.documents_index,
+            custom_settings_index: &self.custom_settings_index,
+        }
+    }
+
+    pub fn schema(&self) -> Schema {
+        self.cache.load().schema.clone()
+    }
+
+    pub fn custom_settings(&self) -> CustomSettingsIndex {
+        self.custom_settings_index.clone()
+    }
+
+    pub fn number_of_documents(&self) -> u64 {
+        self.cache.load().number_of_documents
+    }
+
+    pub fn documents_addition<D>(&self) -> DocumentsAddition<D> {
+        DocumentsAddition::new(self)
+    }
+
+    pub fn documents_deletion(&self) -> DocumentsDeletion {
+        DocumentsDeletion::new(self)
+    }
+
+    pub fn synonyms_addition(&self) -> SynonymsAddition {
+        SynonymsAddition::new(self)
+    }
+
+    pub fn synonyms_deletion(&self) -> SynonymsDeletion {
+        SynonymsDeletion::new(self)
+    }
+
+    pub fn update_status(
+        &self,
+        update_id: u64,
+    ) -> Result<Option<UpdateStatus>, Error>
+    {
+        let update_id = update_id.to_be_bytes();
+        match self.updates_results_index.get(update_id)? {
+            Some(value) => {
+                let value = bincode::deserialize(&value)?;
+                Ok(Some(value))
+            },
+            None => Ok(None),
+        }
+    }
+
+    pub fn update_status_blocking(
+        &self,
+        update_id: u64,
+    ) -> Result<UpdateStatus, Error>
+    {
+        // if we find the update result return it now
+        if let Some(result) = self.update_status(update_id)? {
+            return Ok(result)
+        }
+
+        loop {
+            if self.updates_results_index.get(&update_id.to_be_bytes())?.is_some() { break }
+            std::thread::sleep(Duration::from_millis(300));
+        }
+
+        // the thread has been unblocked, it means that the update result
+        // has been inserted in the tree, retrieve it
+        Ok(self.update_status(update_id)?.unwrap())
+    }
+
+    pub fn document<T>(
+        &self,
+        fields: Option<&HashSet<&str>>,
+        id: DocumentId,
+    ) -> Result<Option<T>, DeserializerError>
+    where T: de::DeserializeOwned,
+    {
+        let schema = self.schema();
+        let fields = match fields {
+            Some(fields) => fields.into_iter().map(|name| schema.attribute(name)).collect(),
+            None => None,
+        };
+
+        let mut deserializer = Deserializer {
+            document_id: id,
+            index: &self,
+            fields: fields.as_ref(),
+        };
+
+        // TODO: currently we return an error if all document fields are missing,
+        //       returning None would have been better
+        T::deserialize(&mut deserializer).map(Some)
+    }
+}
+
+impl Index {
+    pub(crate) fn push_documents_addition<D>(&self, addition: Vec<D>) -> Result<u64, Error>
+    where D: serde::Serialize
+    {
+        let mut values = Vec::with_capacity(addition.len());
+        for add in addition {
+            let vec = rmp_serde::to_vec_named(&add)?;
+            let add = rmp_serde::from_read(&vec[..])?;
+            values.push(add);
+        }
+
+        let addition = Update::DocumentsAddition(values);
+        let update = rmp_serde::to_vec_named(&addition)?;
+        self.raw_push_update(update)
+    }
+
+    pub(crate) fn push_documents_deletion(
+        &self,
+        deletion: Vec<DocumentId>,
+    ) -> Result<u64, Error>
+    {
+        let deletion = Update::DocumentsDeletion(deletion);
+        let update = rmp_serde::to_vec_named(&deletion)?;
+        self.raw_push_update(update)
+    }
+
+    pub(crate) fn push_synonyms_addition(
+        &self,
+        addition: BTreeMap<String, Vec<String>>,
+    ) -> Result<u64, Error>
+    {
+        let addition = Update::SynonymsAddition(addition);
+        let update = rmp_serde::to_vec_named(&addition)?;
+        self.raw_push_update(update)
+    }
+
+    pub(crate) fn push_synonyms_deletion(
+        &self,
+        deletion: BTreeMap<String, Option<Vec<String>>>,
+    ) -> Result<u64, Error>
+    {
+        let deletion = Update::SynonymsDeletion(deletion);
+        let update = rmp_serde::to_vec_named(&deletion)?;
+        self.raw_push_update(update)
+    }
+
+    fn raw_push_update(&self, raw_update: Vec<u8>) -> Result<u64, Error> {
+        let update_id = self.updates_id.fetch_add(1, Ordering::SeqCst);
+        let update_id_array = update_id.to_be_bytes();
+        self.updates_index.insert(update_id_array, raw_update)?;
+        Ok(update_id)
+    }
+}
+
+pub struct RefIndex<'a> {
+    pub(crate) cache: Guard<'static, Arc<Cache>>,
+    pub main_index: &'a MainIndex,
+    pub synonyms_index: &'a SynonymsIndex,
+    pub words_index: &'a WordsIndex,
+    pub docs_words_index: &'a DocsWordsIndex,
+    pub documents_index: &'a DocumentsIndex,
+    pub custom_settings_index: &'a CustomSettingsIndex,
+}
+
+impl Store for RefIndex<'_> {
+    type Error = Error;
+
+    fn words(&self) -> Result<&fst::Set, Self::Error> {
+        Ok(&self.cache.words)
+    }
+
+    fn word_indexes(&self, word: &[u8]) -> Result<Option<SetBuf<DocIndex>>, Self::Error> {
+        Ok(self.words_index.doc_indexes(word)?)
+    }
+
+    fn synonyms(&self) -> Result<&fst::Set, Self::Error> {
+        Ok(&self.cache.synonyms)
+    }
+
+    fn alternatives_to(&self, word: &[u8]) -> Result<Option<fst::Set>, Self::Error> {
+        Ok(self.synonyms_index.alternatives_to(word)?)
+    }
+}
--- a/meilidb-data/src/database/index/synonyms_index.rs
+++ b/meilidb-data/src/database/index/synonyms_index.rs
@ -0,0 +1,21 @@
+use crate::RocksDbResult;
+
+#[derive(Clone)]
+pub struct SynonymsIndex(pub(crate) crate::CfTree);
+
+impl SynonymsIndex {
+    pub fn alternatives_to(&self, word: &[u8]) -> RocksDbResult<Option<fst::Set>> {
+        match self.0.get(word)? {
+            Some(vector) => Ok(Some(fst::Set::from_bytes(vector.to_vec()).unwrap())),
+            None => Ok(None),
+        }
+    }
+
+    pub fn set_alternatives_to(&self, word: &[u8], value: Vec<u8>) -> RocksDbResult<()> {
+        self.0.insert(word, value).map(drop)
+    }
+
+    pub fn del_alternatives_of(&self, word: &[u8]) -> RocksDbResult<()> {
+        self.0.remove(word).map(drop)
+    }
+}
--- a/meilidb-data/src/database/index/words_index.rs
+++ b/meilidb-data/src/database/index/words_index.rs
@ -0,0 +1,45 @@
+use meilidb_core::DocIndex;
+use sdset::{Set, SetBuf};
+use zerocopy::{LayoutVerified, AsBytes};
+use crate::RocksDbResult;
+
+#[derive(Clone)]
+pub struct WordsIndex(pub(crate) crate::CfTree);
+
+impl WordsIndex {
+    pub fn doc_indexes(&self, word: &[u8]) -> RocksDbResult<Option<SetBuf<DocIndex>>> {
+        // we must force an allocation to make the memory aligned
+        match self.0.get(word)? {
+            Some(bytes) => {
+                let vec = match LayoutVerified::new_slice(bytes.as_ref()) {
+                    Some(layout) => layout.into_slice().to_vec(),
+                    None => {
+                        let len = bytes.as_ref().len();
+                        let count = len / std::mem::size_of::<DocIndex>();
+                        let mut buf: Vec<DocIndex> = Vec::with_capacity(count);
+                        unsafe {
+                            let src = bytes.as_ref().as_ptr();
+                            let dst = buf.as_mut_ptr() as *mut u8;
+                            std::ptr::copy_nonoverlapping(src, dst, len);
+                            buf.set_len(count);
+                        }
+                        buf
+                    }
+                };
+
+                let setbuf = SetBuf::new_unchecked(vec);
+
+                Ok(Some(setbuf))
+            },
+            None => Ok(None),
+        }
+    }
+
+    pub fn set_doc_indexes(&self, word: &[u8], set: &Set<DocIndex>) -> RocksDbResult<()> {
+        self.0.insert(word, set.as_bytes()).map(drop)
+    }
+
+    pub fn del_doc_indexes(&self, word: &[u8]) -> RocksDbResult<()> {
+        self.0.remove(word).map(drop)
+    }
+}
--- a/meilidb-data/src/database/mod.rs
+++ b/meilidb-data/src/database/mod.rs
@ -0,0 +1,115 @@
+use std::collections::hash_map::Entry;
+use std::collections::{HashSet, HashMap};
+use std::path::Path;
+use std::sync::Arc;
+use std::sync::RwLock;
+use meilidb_schema::Schema;
+
+mod error;
+mod index;
+mod update;
+
+pub use self::error::Error;
+pub use self::index::{Index, CustomSettingsIndex};
+
+pub use self::update::DocumentsAddition;
+pub use self::update::DocumentsDeletion;
+pub use self::update::SynonymsAddition;
+pub use self::update::SynonymsDeletion;
+
+use self::update::apply_documents_addition;
+use self::update::apply_documents_deletion;
+use self::update::apply_synonyms_addition;
+use self::update::apply_synonyms_deletion;
+
+const INDEXES_KEY: &str = "indexes";
+
+fn load_indexes(tree: &rocksdb::DB) -> Result<HashSet<String>, Error> {
+    match tree.get(INDEXES_KEY)? {
+        Some(bytes) => Ok(bincode::deserialize(&bytes)?),
+        None => Ok(HashSet::new())
+    }
+}
+
+pub struct Database {
+    cache: RwLock<HashMap<String, Index>>,
+    inner: Arc<rocksdb::DB>,
+}
+
+impl Database {
+    pub fn open<P: AsRef<Path>>(path: P) -> Result<Database, Error> {
+        let cache = RwLock::new(HashMap::new());
+
+        let mut options = rocksdb::Options::default();
+        options.create_if_missing(true);
+
+        let cfs = rocksdb::DB::list_cf(&options, &path).unwrap_or_default();
+        let inner = Arc::new(rocksdb::DB::open_cf(&options, path, cfs)?);
+
+        let indexes = load_indexes(&inner)?;
+        let database = Database { cache, inner };
+
+        for index in indexes {
+            database.open_index(&index)?;
+        }
+
+        Ok(database)
+    }
+
+    pub fn indexes(&self) -> Result<HashSet<String>, Error> {
+        load_indexes(&self.inner)
+    }
+
+    fn set_indexes(&self, value: &HashSet<String>) -> Result<(), Error> {
+        let bytes = bincode::serialize(value)?;
+        self.inner.put(INDEXES_KEY, bytes)?;
+        Ok(())
+    }
+
+    pub fn open_index(&self, name: &str) -> Result<Option<Index>, Error> {
+        {
+            let cache = self.cache.read().unwrap();
+            if let Some(index) = cache.get(name).cloned() {
+                return Ok(Some(index))
+            }
+        }
+
+        let mut cache = self.cache.write().unwrap();
+        let index = match cache.entry(name.to_string()) {
+            Entry::Occupied(occupied) => {
+                occupied.get().clone()
+            },
+            Entry::Vacant(vacant) => {
+                if !self.indexes()?.contains(name) {
+                    return Ok(None)
+                }
+
+                let index = Index::new(self.inner.clone(), name)?;
+                vacant.insert(index).clone()
+            },
+        };
+
+        Ok(Some(index))
+    }
+
+    pub fn create_index(&self, name: &str, schema: Schema) -> Result<Index, Error> {
+        let mut cache = self.cache.write().unwrap();
+
+        let index = match cache.entry(name.to_string()) {
+            Entry::Occupied(occupied) => {
+                occupied.get().clone()
+            },
+            Entry::Vacant(vacant) => {
+                let index = Index::with_schema(self.inner.clone(), name, schema)?;
+
+                let mut indexes = self.indexes()?;
+                indexes.insert(name.to_string());
+                self.set_indexes(&indexes)?;
+
+                vacant.insert(index).clone()
+            },
+        };
+
+        Ok(index)
+    }
+}
--- a/meilidb-data/src/database/update/documents_addition.rs
+++ b/meilidb-data/src/database/update/documents_addition.rs
@ -0,0 +1,139 @@
+use std::collections::HashSet;
+use std::sync::Arc;
+
+use fst::{SetBuilder, set::OpBuilder};
+use sdset::{SetOperation, duo::Union};
+use serde::Serialize;
+
+use crate::RankedMap;
+use crate::database::{Error, Index, index::Cache, apply_documents_deletion};
+use crate::indexer::Indexer;
+use crate::serde::{extract_document_id, Serializer, RamDocumentStore};
+
+pub struct DocumentsAddition<'a, D> {
+    index: &'a Index,
+    documents: Vec<D>,
+}
+
+impl<'a, D> DocumentsAddition<'a, D> {
+    pub fn new(index: &'a Index) -> DocumentsAddition<'a, D> {
+        DocumentsAddition { index, documents: Vec::new() }
+    }
+
+    pub fn update_document(&mut self, document: D) {
+        self.documents.push(document);
+    }
+
+    pub fn finalize(self) -> Result<u64, Error>
+    where D: serde::Serialize
+    {
+        self.index.push_documents_addition(self.documents)
+    }
+}
+
+pub fn apply_documents_addition(
+    index: &Index,
+    mut ranked_map: RankedMap,
+    addition: Vec<rmpv::Value>,
+) -> Result<(), Error>
+{
+    let mut document_ids = HashSet::new();
+    let mut document_store = RamDocumentStore::new();
+    let mut indexer = Indexer::new();
+
+    let schema = &index.schema();
+    let identifier = schema.identifier_name();
+
+    for document in addition {
+        let document_id = match extract_document_id(identifier, &document)? {
+            Some(id) => id,
+            None => return Err(Error::MissingDocumentId),
+        };
+
+        // 1. store the document id for future deletion
+        document_ids.insert(document_id);
+
+        // 2. index the document fields in ram stores
+        let serializer = Serializer {
+            schema,
+            document_store: &mut document_store,
+            indexer: &mut indexer,
+            ranked_map: &mut ranked_map,
+            document_id,
+        };
+
+        document.serialize(serializer)?;
+    }
+
+    let ref_index = index.as_ref();
+    let docs_words = ref_index.docs_words_index;
+    let documents = ref_index.documents_index;
+    let main = ref_index.main_index;
+    let words = ref_index.words_index;
+
+    // 1. remove the previous documents match indexes
+    let documents_to_insert = document_ids.iter().cloned().collect();
+    apply_documents_deletion(index, ranked_map.clone(), documents_to_insert)?;
+
+    // 2. insert new document attributes in the database
+    for ((id, attr), value) in document_store.into_inner() {
+        documents.set_document_field(id, attr, value)?;
+    }
+
+    let indexed = indexer.build();
+    let mut delta_words_builder = SetBuilder::memory();
+
+    for (word, delta_set) in indexed.words_doc_indexes {
+        delta_words_builder.insert(&word).unwrap();
+
+        let set = match words.doc_indexes(&word)? {
+            Some(set) => Union::new(&set, &delta_set).into_set_buf(),
+            None => delta_set,
+        };
+
+        words.set_doc_indexes(&word, &set)?;
+    }
+
+    for (id, words) in indexed.docs_words {
+        docs_words.set_doc_words(id, &words)?;
+    }
+
+    let delta_words = delta_words_builder
+        .into_inner()
+        .and_then(fst::Set::from_bytes)
+        .unwrap();
+
+    let words = match main.words_set()? {
+        Some(words) => {
+            let op = OpBuilder::new()
+                .add(words.stream())
+                .add(delta_words.stream())
+                .r#union();
+
+            let mut words_builder = SetBuilder::memory();
+            words_builder.extend_stream(op).unwrap();
+            words_builder
+                .into_inner()
+                .and_then(fst::Set::from_bytes)
+                .unwrap()
+        },
+        None => delta_words,
+    };
+
+    main.set_words_set(&words)?;
+    main.set_ranked_map(&ranked_map)?;
+
+    let inserted_documents_len = document_ids.len() as u64;
+    let number_of_documents = main.set_number_of_documents(|old| old + inserted_documents_len)?;
+
+    // update the "consistent" view of the Index
+    let cache = ref_index.cache;
+    let words = Arc::new(words);
+    let synonyms = cache.synonyms.clone();
+    let schema = cache.schema.clone();
+
+    let cache = Cache { words, synonyms, schema, ranked_map, number_of_documents };
+    index.cache.store(Arc::new(cache));
+
+    Ok(())
+}
--- a/meilidb-data/src/database/update/documents_deletion.rs
+++ b/meilidb-data/src/database/update/documents_deletion.rs
@ -0,0 +1,150 @@
+use std::collections::{HashMap, HashSet, BTreeSet};
+use std::sync::Arc;
+
+use fst::{SetBuilder, Streamer};
+use meilidb_core::DocumentId;
+use sdset::{SetBuf, SetOperation, duo::DifferenceByKey};
+
+use crate::RankedMap;
+use crate::serde::extract_document_id;
+
+use crate::database::{Index, Error, index::Cache};
+
+pub struct DocumentsDeletion<'a> {
+    index: &'a Index,
+    documents: Vec<DocumentId>,
+}
+
+impl<'a> DocumentsDeletion<'a> {
+    pub fn new(index: &'a Index) -> DocumentsDeletion<'a> {
+        DocumentsDeletion { index, documents: Vec::new() }
+    }
+
+    pub fn delete_document_by_id(&mut self, document_id: DocumentId) {
+        self.documents.push(document_id);
+    }
+
+    pub fn delete_document<D>(&mut self, document: D) -> Result<(), Error>
+    where D: serde::Serialize,
+    {
+        let schema = self.index.schema();
+        let identifier = schema.identifier_name();
+        let document_id = match extract_document_id(identifier, &document)? {
+            Some(id) => id,
+            None => return Err(Error::MissingDocumentId),
+        };
+
+        self.delete_document_by_id(document_id);
+
+        Ok(())
+    }
+
+    pub fn finalize(self) -> Result<u64, Error> {
+        self.index.push_documents_deletion(self.documents)
+    }
+}
+
+impl Extend<DocumentId> for DocumentsDeletion<'_> {
+    fn extend<T: IntoIterator<Item=DocumentId>>(&mut self, iter: T) {
+        self.documents.extend(iter)
+    }
+}
+
+pub fn apply_documents_deletion(
+    index: &Index,
+    mut ranked_map: RankedMap,
+    deletion: Vec<DocumentId>,
+) -> Result<(), Error>
+{
+    let ref_index = index.as_ref();
+    let schema = index.schema();
+    let docs_words = ref_index.docs_words_index;
+    let documents = ref_index.documents_index;
+    let main = ref_index.main_index;
+    let words = ref_index.words_index;
+
+    let idset = SetBuf::from_dirty(deletion);
+
+    // collect the ranked attributes according to the schema
+    let ranked_attrs: Vec<_> = schema.iter()
+        .filter_map(|(_, attr, prop)| {
+            if prop.is_ranked() { Some(attr) } else { None }
+        })
+        .collect();
+
+    let mut words_document_ids = HashMap::new();
+    for id in idset {
+        // remove all the ranked attributes from the ranked_map
+        for ranked_attr in &ranked_attrs {
+            ranked_map.remove(id, *ranked_attr);
+        }
+
+        if let Some(words) = docs_words.doc_words(id)? {
+            let mut stream = words.stream();
+            while let Some(word) = stream.next() {
+                let word = word.to_vec();
+                words_document_ids.entry(word).or_insert_with(Vec::new).push(id);
+            }
+        }
+    }
+
+    let mut deleted_documents = HashSet::new();
+    let mut removed_words = BTreeSet::new();
+    for (word, document_ids) in words_document_ids {
+        let document_ids = SetBuf::from_dirty(document_ids);
+
+        if let Some(doc_indexes) = words.doc_indexes(&word)? {
+            let op = DifferenceByKey::new(&doc_indexes, &document_ids, |d| d.document_id, |id| *id);
+            let doc_indexes = op.into_set_buf();
+
+            if !doc_indexes.is_empty() {
+                words.set_doc_indexes(&word, &doc_indexes)?;
+            } else {
+                words.del_doc_indexes(&word)?;
+                removed_words.insert(word);
+            }
+        }
+
+        for id in document_ids {
+            if documents.del_all_document_fields(id)? != 0 {
+                deleted_documents.insert(id);
+            }
+            docs_words.del_doc_words(id)?;
+        }
+    }
+
+    let removed_words = fst::Set::from_iter(removed_words).unwrap();
+    let words = match main.words_set()? {
+        Some(words_set) => {
+            let op = fst::set::OpBuilder::new()
+                .add(words_set.stream())
+                .add(removed_words.stream())
+                .difference();
+
+            let mut words_builder = SetBuilder::memory();
+            words_builder.extend_stream(op).unwrap();
+            words_builder
+                .into_inner()
+                .and_then(fst::Set::from_bytes)
+                .unwrap()
+        },
+        None => fst::Set::default(),
+    };
+
+    main.set_words_set(&words)?;
+    main.set_ranked_map(&ranked_map)?;
+
+    let deleted_documents_len = deleted_documents.len() as u64;
+    let number_of_documents = main.set_number_of_documents(|old| old - deleted_documents_len)?;
+
+    // update the "consistent" view of the Index
+    let cache = ref_index.cache;
+    let words = Arc::new(words);
+    let synonyms = cache.synonyms.clone();
+    let schema = cache.schema.clone();
+
+    let cache = Cache { words, synonyms, schema, ranked_map, number_of_documents };
+    index.cache.store(Arc::new(cache));
+
+    Ok(())
+}
--- a/meilidb-data/src/database/update/mod.rs
+++ b/meilidb-data/src/database/update/mod.rs
@ -0,0 +1,9 @@
+mod documents_addition;
+mod documents_deletion;
+mod synonyms_addition;
+mod synonyms_deletion;
+
+pub use self::documents_addition::{DocumentsAddition, apply_documents_addition};
+pub use self::documents_deletion::{DocumentsDeletion, apply_documents_deletion};
+pub use self::synonyms_addition::{SynonymsAddition, apply_synonyms_addition};
+pub use self::synonyms_deletion::{SynonymsDeletion, apply_synonyms_deletion};
--- a/meilidb-data/src/database/update/synonyms_addition.rs
+++ b/meilidb-data/src/database/update/synonyms_addition.rs
@ -0,0 +1,94 @@
+use std::collections::BTreeMap;
+use std::sync::Arc;
+
+use fst::{SetBuilder, set::OpBuilder};
+use meilidb_core::normalize_str;
+use sdset::SetBuf;
+
+use crate::database::{Error, Index,index::Cache};
+
+pub struct SynonymsAddition<'a> {
+    index: &'a Index,
+    synonyms: BTreeMap<String, Vec<String>>,
+}
+
+impl<'a> SynonymsAddition<'a> {
+    pub fn new(index: &'a Index) -> SynonymsAddition<'a> {
+        SynonymsAddition { index, synonyms: BTreeMap::new() }
+    }
+
+    pub fn add_synonym<S, T, I>(&mut self, synonym: S, alternatives: I)
+    where S: AsRef<str>,
+          T: AsRef<str>,
+          I: IntoIterator<Item=T>,
+    {
+        let synonym = normalize_str(synonym.as_ref());
+        let alternatives = alternatives.into_iter().map(|s| s.as_ref().to_lowercase());
+        self.synonyms.entry(synonym).or_insert_with(Vec::new).extend(alternatives);
+    }
+
+    pub fn finalize(self) -> Result<u64, Error> {
+        self.index.push_synonyms_addition(self.synonyms)
+    }
+}
+
+pub fn apply_synonyms_addition(
+    index: &Index,
+    addition: BTreeMap<String, Vec<String>>,
+) -> Result<(), Error>
+{
+    let ref_index = index.as_ref();
+    let synonyms = ref_index.synonyms_index;
+    let main = ref_index.main_index;
+
+    let mut synonyms_builder = SetBuilder::memory();
+
+    for (synonym, alternatives) in addition {
+        synonyms_builder.insert(&synonym).unwrap();
+
+        let alternatives = {
+            let alternatives = SetBuf::from_dirty(alternatives);
+            let mut alternatives_builder = SetBuilder::memory();
+            alternatives_builder.extend_iter(alternatives).unwrap();
+            alternatives_builder.into_inner().unwrap()
+        };
+        synonyms.set_alternatives_to(synonym.as_bytes(), alternatives)?;
+    }
+
+    let delta_synonyms = synonyms_builder
+        .into_inner()
+        .and_then(fst::Set::from_bytes)
+        .unwrap();
+
+    let synonyms = match main.synonyms_set()? {
+        Some(synonyms) => {
+            let op = OpBuilder::new()
+                .add(synonyms.stream())
+                .add(delta_synonyms.stream())
+                .r#union();
+
+            let mut synonyms_builder = SetBuilder::memory();
+            synonyms_builder.extend_stream(op).unwrap();
+            synonyms_builder
+                .into_inner()
+                .and_then(fst::Set::from_bytes)
+                .unwrap()
+        },
+        None => delta_synonyms,
+    };
+
+    main.set_synonyms_set(&synonyms)?;
+
+    // update the "consistent" view of the Index
+    let cache = ref_index.cache;
+    let words = Arc::new(main.words_set()?.unwrap_or_default());
+    let ranked_map = cache.ranked_map.clone();
+    let synonyms = Arc::new(synonyms);
+    let schema = cache.schema.clone();
+    let number_of_documents = cache.number_of_documents;
+
+    let cache = Cache { words, synonyms, schema, ranked_map, number_of_documents };
+    index.cache.store(Arc::new(cache));
+
+    Ok(())
+}
--- a/meilidb-data/src/database/update/synonyms_deletion.rs
+++ b/meilidb-data/src/database/update/synonyms_deletion.rs
@ -0,0 +1,137 @@
+use std::collections::BTreeMap;
+use std::iter::FromIterator;
+use std::sync::Arc;
+
+use fst::{SetBuilder, set::OpBuilder};
+use meilidb_core::normalize_str;
+use sdset::SetBuf;
+
+use crate::database::{Error, Index, index::Cache};
+
+pub struct SynonymsDeletion<'a> {
+    index: &'a Index,
+    synonyms: BTreeMap<String, Option<Vec<String>>>,
+}
+
+impl<'a> SynonymsDeletion<'a> {
+    pub fn new(index: &'a Index) -> SynonymsDeletion<'a> {
+        SynonymsDeletion { index, synonyms: BTreeMap::new() }
+    }
+
+    pub fn delete_all_alternatives_of<S: AsRef<str>>(&mut self, synonym: S) {
+        let synonym = normalize_str(synonym.as_ref());
+        self.synonyms.insert(synonym, None);
+    }
+
+    pub fn delete_specific_alternatives_of<S, T, I>(&mut self, synonym: S, alternatives: I)
+    where S: AsRef<str>,
+          T: AsRef<str>,
+          I: Iterator<Item=T>,
+    {
+        let synonym = normalize_str(synonym.as_ref());
+        let value = self.synonyms.entry(synonym).or_insert(None);
+        let alternatives = alternatives.map(|s| s.as_ref().to_lowercase());
+        match value {
+            Some(v) => v.extend(alternatives),
+            None => *value = Some(Vec::from_iter(alternatives)),
+        }
+    }
+
+    pub fn finalize(self) -> Result<u64, Error> {
+        self.index.push_synonyms_deletion(self.synonyms)
+    }
+}
+
+pub fn apply_synonyms_deletion(
+    index: &Index,
+    deletion: BTreeMap<String, Option<Vec<String>>>,
+) -> Result<(), Error>
+{
+    let ref_index = index.as_ref();
+    let synonyms = ref_index.synonyms_index;
+    let main = ref_index.main_index;
+
+    let mut delete_whole_synonym_builder = SetBuilder::memory();
+
+    for (synonym, alternatives) in deletion {
+        match alternatives {
+            Some(alternatives) => {
+                let prev_alternatives = synonyms.alternatives_to(synonym.as_bytes())?;
+                let prev_alternatives = match prev_alternatives {
+                    Some(alternatives) => alternatives,
+                    None => continue,
+                };
+
+                let delta_alternatives = {
+                    let alternatives = SetBuf::from_dirty(alternatives);
+                    let mut builder = SetBuilder::memory();
+                    builder.extend_iter(alternatives).unwrap();
+                    builder.into_inner()
+                        .and_then(fst::Set::from_bytes)
+                        .unwrap()
+                };
+
+                let op = OpBuilder::new()
+                    .add(prev_alternatives.stream())
+                    .add(delta_alternatives.stream())
+                    .difference();
+
+                let (alternatives, empty_alternatives) = {
+                    let mut builder = SetBuilder::memory();
+                    let len = builder.get_ref().len();
+                    builder.extend_stream(op).unwrap();
+                    let is_empty = len == builder.get_ref().len();
+                    let alternatives = builder.into_inner().unwrap();
+                    (alternatives, is_empty)
+                };
+
+                if empty_alternatives {
+                    delete_whole_synonym_builder.insert(synonym.as_bytes())?;
+                } else {
+                    synonyms.set_alternatives_to(synonym.as_bytes(), alternatives)?;
+                }
+            },
+            None => {
+                delete_whole_synonym_builder.insert(&synonym).unwrap();
+                synonyms.del_alternatives_of(synonym.as_bytes())?;
+            }
+        }
+    }
+
+    let delta_synonyms = delete_whole_synonym_builder
+        .into_inner()
+        .and_then(fst::Set::from_bytes)
+        .unwrap();
+
+    let synonyms = match main.synonyms_set()? {
+        Some(synonyms) => {
+            let op = OpBuilder::new()
+                .add(synonyms.stream())
+                .add(delta_synonyms.stream())
+                .difference();
+
+            let mut synonyms_builder = SetBuilder::memory();
+            synonyms_builder.extend_stream(op).unwrap();
+            synonyms_builder
+                .into_inner()
+                .and_then(fst::Set::from_bytes)
+                .unwrap()
+        },
+        None => fst::Set::default(),
+    };
+
+    main.set_synonyms_set(&synonyms)?;
+
+    // update the "consistent" view of the Index
+    let cache = ref_index.cache;
+    let words = Arc::new(main.words_set()?.unwrap_or_default());
+    let ranked_map = cache.ranked_map.clone();
+    let synonyms = Arc::new(synonyms);
+    let schema = cache.schema.clone();
+    let number_of_documents = cache.number_of_documents;
+
+    let cache = Cache { words, synonyms, schema, ranked_map, number_of_documents };
+    index.cache.store(Arc::new(cache));
+
+    Ok(())
+}
--- a/meilidb-data/src/document_attr_key.rs
+++ b/meilidb-data/src/document_attr_key.rs
@ -0,0 +1,69 @@
+use meilidb_core::DocumentId;
+use meilidb_schema::SchemaAttr;
+
+#[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
+pub struct DocumentAttrKey {
+    pub document_id: DocumentId,
+    pub attribute: SchemaAttr,
+}
+
+impl DocumentAttrKey {
+    pub fn new(document_id: DocumentId, attribute: SchemaAttr) -> DocumentAttrKey {
+        DocumentAttrKey { document_id, attribute }
+    }
+
+    pub fn to_be_bytes(self) -> [u8; 10] {
+        let mut output = [0u8; 10];
+
+        let document_id = self.document_id.0.to_be_bytes();
+        let attribute = self.attribute.0.to_be_bytes();
+
+        unsafe {
+            use std::{mem::size_of, ptr::copy_nonoverlapping};
+
+            let output = output.as_mut_ptr();
+            copy_nonoverlapping(document_id.as_ptr(), output, size_of::<u64>());
+
+            let output = output.add(size_of::<u64>());
+            copy_nonoverlapping(attribute.as_ptr(), output, size_of::<u16>());
+        }
+
+        output
+    }
+
+    pub fn from_be_bytes(bytes: [u8; 10]) -> DocumentAttrKey {
+        let document_id;
+        let attribute;
+
+        unsafe {
+            use std::ptr::read_unaligned;
+
+            let pointer = bytes.as_ptr() as *const _;
+            let document_id_bytes = read_unaligned(pointer);
+            document_id = u64::from_be_bytes(document_id_bytes);
+
+            let pointer = pointer.add(1) as *const _;
+            let attribute_bytes = read_unaligned(pointer);
+            attribute = u16::from_be_bytes(attribute_bytes);
+        }
+
+        DocumentAttrKey {
+            document_id: DocumentId(document_id),
+            attribute: SchemaAttr(attribute),
+        }
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn to_from_be_bytes() {
+        let document_id = DocumentId(67578308);
+        let schema_attr = SchemaAttr(3456);
+        let x = DocumentAttrKey::new(document_id, schema_attr);
+
+        assert_eq!(x, DocumentAttrKey::from_be_bytes(x.to_be_bytes()));
+    }
+}
--- a/meilidb-data/src/indexer.rs
+++ b/meilidb-data/src/indexer.rs
@ -0,0 +1,208 @@
+use std::collections::{BTreeMap, HashMap};
+use std::convert::TryFrom;
+
+use deunicode::deunicode_with_tofu;
+use meilidb_core::{DocumentId, DocIndex};
+use meilidb_schema::SchemaAttr;
+use meilidb_tokenizer::{is_cjk, Tokenizer, SeqTokenizer, Token};
+use sdset::SetBuf;
+
+type Word = Vec<u8>; // TODO make it be a SmallVec
+
+pub struct Indexer {
+    word_limit: usize, // the maximum number of indexed words
+    words_doc_indexes: BTreeMap<Word, Vec<DocIndex>>,
+    docs_words: HashMap<DocumentId, Vec<Word>>,
+}
+
+pub struct Indexed {
+    pub words_doc_indexes: BTreeMap<Word, SetBuf<DocIndex>>,
+    pub docs_words: HashMap<DocumentId, fst::Set>,
+}
+
+impl Indexer {
+    pub fn new() -> Indexer {
+        Indexer::with_word_limit(1000)
+    }
+
+    pub fn with_word_limit(limit: usize) -> Indexer {
+        Indexer {
+            word_limit: limit,
+            words_doc_indexes: BTreeMap::new(),
+            docs_words: HashMap::new(),
+        }
+    }
+
+    pub fn index_text(&mut self, id: DocumentId, attr: SchemaAttr, text: &str) {
+        let lowercase_text = text.to_lowercase();
+        let deunicoded = deunicode_with_tofu(&lowercase_text, "");
+
+        // TODO compute the deunicoded version after the cjk check
+        let next = if !lowercase_text.contains(is_cjk) && lowercase_text != deunicoded {
+            Some(deunicoded)
+        } else {
+            None
+        };
+        let iter = Some(lowercase_text).into_iter().chain(next);
+
+        for text in iter {
+            for token in Tokenizer::new(&text) {
+                let must_continue = index_token(
+                    token,
+                    id,
+                    attr,
+                    self.word_limit,
+                    &mut self.words_doc_indexes,
+                    &mut self.docs_words,
+                );
+
+                if !must_continue { break }
+            }
+        }
+    }
+
+    pub fn index_text_seq<'a, I, IT>(&mut self, id: DocumentId, attr: SchemaAttr, iter: I)
+    where I: IntoIterator<Item=&'a str, IntoIter=IT>,
+          IT: Iterator<Item = &'a str> + Clone,
+    {
+        // TODO serialize this to one call to the SeqTokenizer loop
+
+        let lowercased: Vec<_> = iter.into_iter().map(str::to_lowercase).collect();
+        let iter = lowercased.iter().map(|t| t.as_str());
+
+        for token in SeqTokenizer::new(iter) {
+            let must_continue = index_token(
+                token,
+                id,
+                attr,
+                self.word_limit,
+                &mut self.words_doc_indexes,
+                &mut self.docs_words,
+            );
+
+            if !must_continue { break }
+        }
+
+        let deunicoded: Vec<_> = lowercased.into_iter().map(|lowercase_text| {
+            if lowercase_text.contains(is_cjk) { return lowercase_text }
+            let deunicoded = deunicode_with_tofu(&lowercase_text, "");
+            if lowercase_text != deunicoded { deunicoded } else { lowercase_text }
+        }).collect();
+        let iter = deunicoded.iter().map(|t| t.as_str());
+
+        for token in SeqTokenizer::new(iter) {
+            let must_continue = index_token(
+                token,
+                id,
+                attr,
+                self.word_limit,
+                &mut self.words_doc_indexes,
+                &mut self.docs_words,
+            );
+
+            if !must_continue { break }
+        }
+    }
+
+    pub fn build(self) -> Indexed {
+        let words_doc_indexes = self.words_doc_indexes
+            .into_iter()
+            .map(|(word, indexes)| (word, SetBuf::from_dirty(indexes)))
+            .collect();
+
+        let docs_words = self.docs_words
+            .into_iter()
+            .map(|(id, mut words)| {
+                words.sort_unstable();
+                words.dedup();
+                (id, fst::Set::from_iter(words).unwrap())
+            })
+            .collect();
+
+        Indexed { words_doc_indexes, docs_words }
+    }
+}
+
+fn index_token(
+    token: Token,
+    id: DocumentId,
+    attr: SchemaAttr,
+    word_limit: usize,
+    words_doc_indexes: &mut BTreeMap<Word, Vec<DocIndex>>,
+    docs_words: &mut HashMap<DocumentId, Vec<Word>>,
+) -> bool
+{
+    if token.word_index >= word_limit { return false }
+
+    match token_to_docindex(id, attr, token) {
+        Some(docindex) => {
+            let word = Vec::from(token.word);
+            words_doc_indexes.entry(word.clone()).or_insert_with(Vec::new).push(docindex);
+            docs_words.entry(id).or_insert_with(Vec::new).push(word);
+        },
+        None => return false,
+    }
+
+    true
+}
+
+fn token_to_docindex(id: DocumentId, attr: SchemaAttr, token: Token) -> Option<DocIndex> {
+    let word_index = u16::try_from(token.word_index).ok()?;
+    let char_index = u16::try_from(token.char_index).ok()?;
+    let char_length = u16::try_from(token.word.chars().count()).ok()?;
+
+    let docindex = DocIndex {
+        document_id: id,
+        attribute: attr.0,
+        word_index,
+        char_index,
+        char_length,
+    };
+
+    Some(docindex)
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn strange_apostrophe() {
+        let mut indexer = Indexer::new();
+
+        let docid = DocumentId(0);
+        let attr = SchemaAttr(0);
+        let text = "Zut, l’aspirateur, j’ai oublié de l’éteindre !";
+        indexer.index_text(docid, attr, text);
+
+        let Indexed { words_doc_indexes, .. } = indexer.build();
+
+        assert!(words_doc_indexes.get(&b"l"[..]).is_some());
+        assert!(words_doc_indexes.get(&b"aspirateur"[..]).is_some());
+        assert!(words_doc_indexes.get(&b"ai"[..]).is_some());
+        assert!(words_doc_indexes.get(&b"eteindre"[..]).is_some());
+
+        // with the ugly apostrophe...
+        assert!(words_doc_indexes.get(&"l’éteindre".to_owned().into_bytes()).is_some());
+    }
+
+    #[test]
+    fn strange_apostrophe_in_sequence() {
+        let mut indexer = Indexer::new();
+
+        let docid = DocumentId(0);
+        let attr = SchemaAttr(0);
+        let text = vec!["Zut, l’aspirateur, j’ai oublié de l’éteindre !"];
+        indexer.index_text_seq(docid, attr, text);
+
+        let Indexed { words_doc_indexes, .. } = indexer.build();
+
+        assert!(words_doc_indexes.get(&b"l"[..]).is_some());
+        assert!(words_doc_indexes.get(&b"aspirateur"[..]).is_some());
+        assert!(words_doc_indexes.get(&b"ai"[..]).is_some());
+        assert!(words_doc_indexes.get(&b"eteindre"[..]).is_some());
+
+        // with the ugly apostrophe...
+        assert!(words_doc_indexes.get(&"l’éteindre".to_owned().into_bytes()).is_some());
+    }
+}
--- a/meilidb-data/src/lib.rs
+++ b/meilidb-data/src/lib.rs
@ -0,0 +1,15 @@
+mod cf_tree;
+mod database;
+mod document_attr_key;
+mod indexer;
+mod number;
+mod ranked_map;
+mod serde;
+
+pub use self::cf_tree::{CfTree, CfIter};
+pub use self::database::{Database, Index, CustomSettingsIndex};
+pub use self::number::Number;
+pub use self::ranked_map::RankedMap;
+pub use self::serde::{compute_document_id, extract_document_id, value_to_string};
+
+pub type RocksDbResult<T> = Result<T, rocksdb::Error>;
--- a/meilidb-data/src/number.rs
+++ b/meilidb-data/src/number.rs
@ -0,0 +1,55 @@
+use std::num::{ParseIntError, ParseFloatError};
+use std::str::FromStr;
+use std::fmt;
+
+use ordered_float::OrderedFloat;
+use serde::{Serialize, Deserialize};
+
+#[derive(Serialize, Deserialize)]
+#[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
+pub enum Number {
+    Unsigned(u64),
+    Signed(i64),
+    Float(OrderedFloat<f64>),
+}
+
+impl FromStr for Number {
+    type Err = ParseNumberError;
+
+    fn from_str(s: &str) -> Result<Self, Self::Err> {
+        let uint_error = match u64::from_str(s) {
+            Ok(unsigned) => return Ok(Number::Unsigned(unsigned)),
+            Err(error) => error,
+        };
+
+        let int_error = match i64::from_str(s) {
+            Ok(signed) => return Ok(Number::Signed(signed)),
+            Err(error) => error,
+        };
+
+        let float_error = match f64::from_str(s) {
+            Ok(float) => return Ok(Number::Float(OrderedFloat(float))),
+            Err(error) => error,
+        };
+
+        Err(ParseNumberError { uint_error, int_error, float_error })
+    }
+}
+
+#[derive(Debug, Clone, PartialEq, Eq)]
+pub struct ParseNumberError {
+    uint_error: ParseIntError,
+    int_error: ParseIntError,
+    float_error: ParseFloatError,
+}
+
+impl fmt::Display for ParseNumberError {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        if self.uint_error == self.int_error {
+            write!(f, "can not parse number: {}, {}", self.uint_error, self.float_error)
+        } else {
+            write!(f, "can not parse number: {}, {}, {}",
+                self.uint_error, self.int_error, self.float_error)
+        }
+    }
+}
--- a/meilidb-data/src/ranked_map.rs
+++ b/meilidb-data/src/ranked_map.rs
@ -0,0 +1,36 @@
+use std::io::{Read, Write};
+
+use hashbrown::HashMap;
+use meilidb_core::DocumentId;
+use meilidb_schema::SchemaAttr;
+
+use crate::Number;
+
+#[derive(Debug, Default, Clone, PartialEq, Eq)]
+pub struct RankedMap(HashMap<(DocumentId, SchemaAttr), Number>);
+
+impl RankedMap {
+    pub fn len(&self) -> usize {
+        self.0.len()
+    }
+
+    pub fn insert(&mut self, document: DocumentId, attribute: SchemaAttr, number: Number) {
+        self.0.insert((document, attribute), number);
+    }
+
+    pub fn remove(&mut self, document: DocumentId, attribute: SchemaAttr) {
+        self.0.remove(&(document, attribute));
+    }
+
+    pub fn get(&self, document: DocumentId, attribute: SchemaAttr) -> Option<Number> {
+        self.0.get(&(document, attribute)).cloned()
+    }
+
+    pub fn read_from_bin<R: Read>(reader: R) -> bincode::Result<RankedMap> {
+        bincode::deserialize_from(reader).map(RankedMap)
+    }
+
+    pub fn write_to_bin<W: Write>(&self, writer: W) -> bincode::Result<()> {
+        bincode::serialize_into(writer, &self.0)
+    }
+}
--- a/meilidb-data/src/serde/convert_to_number.rs
+++ b/meilidb-data/src/serde/convert_to_number.rs
@ -0,0 +1,180 @@
+use std::str::FromStr;
+
+use ordered_float::OrderedFloat;
+use serde::ser;
+use serde::Serialize;
+
+use super::SerializerError;
+use crate::Number;
+
+pub struct ConvertToNumber;
+
+impl ser::Serializer for ConvertToNumber {
+    type Ok = Number;
+    type Error = SerializerError;
+    type SerializeSeq = ser::Impossible<Self::Ok, Self::Error>;
+    type SerializeTuple = ser::Impossible<Self::Ok, Self::Error>;
+    type SerializeTupleStruct = ser::Impossible<Self::Ok, Self::Error>;
+    type SerializeTupleVariant = ser::Impossible<Self::Ok, Self::Error>;
+    type SerializeMap = ser::Impossible<Self::Ok, Self::Error>;
+    type SerializeStruct = ser::Impossible<Self::Ok, Self::Error>;
+    type SerializeStructVariant = ser::Impossible<Self::Ok, Self::Error>;
+
+    fn serialize_bool(self, value: bool) -> Result<Self::Ok, Self::Error> {
+        Ok(Number::Unsigned(u64::from(value)))
+    }
+
+    fn serialize_char(self, _value: char) -> Result<Self::Ok, Self::Error> {
+        Err(SerializerError::UnrankableType { type_name: "char" })
+    }
+
+    fn serialize_i8(self, value: i8) -> Result<Self::Ok, Self::Error> {
+        Ok(Number::Signed(i64::from(value)))
+    }
+
+    fn serialize_i16(self, value: i16) -> Result<Self::Ok, Self::Error> {
+        Ok(Number::Signed(i64::from(value)))
+    }
+
+    fn serialize_i32(self, value: i32) -> Result<Self::Ok, Self::Error> {
+        Ok(Number::Signed(i64::from(value)))
+    }
+
+    fn serialize_i64(self, value: i64) -> Result<Self::Ok, Self::Error> {
+        Ok(Number::Signed(value))
+    }
+
+    fn serialize_u8(self, value: u8) -> Result<Self::Ok, Self::Error> {
+        Ok(Number::Unsigned(u64::from(value)))
+    }
+
+    fn serialize_u16(self, value: u16) -> Result<Self::Ok, Self::Error> {
+        Ok(Number::Unsigned(u64::from(value)))
+    }
+
+    fn serialize_u32(self, value: u32) -> Result<Self::Ok, Self::Error> {
+        Ok(Number::Unsigned(u64::from(value)))
+    }
+
+    fn serialize_u64(self, value: u64) -> Result<Self::Ok, Self::Error> {
+        Ok(Number::Unsigned(value))
+    }
+
+    fn serialize_f32(self, value: f32) -> Result<Self::Ok, Self::Error> {
+        Ok(Number::Float(OrderedFloat(f64::from(value))))
+    }
+
+    fn serialize_f64(self, value: f64) -> Result<Self::Ok, Self::Error> {
+        Ok(Number::Float(OrderedFloat(value)))
+    }
+
+    fn serialize_str(self, value: &str) -> Result<Self::Ok, Self::Error> {
+        Ok(Number::from_str(value)?)
+    }
+
+    fn serialize_bytes(self, _v: &[u8]) -> Result<Self::Ok, Self::Error> {
+        Err(SerializerError::UnrankableType { type_name: "&[u8]" })
+    }
+
+    fn serialize_none(self) -> Result<Self::Ok, Self::Error> {
+        Err(SerializerError::UnrankableType { type_name: "Option" })
+    }
+
+    fn serialize_some<T: ?Sized>(self, _value: &T) -> Result<Self::Ok, Self::Error>
+    where T: Serialize,
+    {
+        Err(SerializerError::UnrankableType { type_name: "Option" })
+    }
+
+    fn serialize_unit(self) -> Result<Self::Ok, Self::Error> {
+        Err(SerializerError::UnrankableType { type_name: "()" })
+    }
+
+    fn serialize_unit_struct(self, _name: &'static str) -> Result<Self::Ok, Self::Error> {
+        Err(SerializerError::UnrankableType { type_name: "unit struct" })
+    }
+
+    fn serialize_unit_variant(
+        self,
+        _name: &'static str,
+        _variant_index: u32,
+        _variant: &'static str
+    ) -> Result<Self::Ok, Self::Error>
+    {
+        Err(SerializerError::UnrankableType { type_name: "unit variant" })
+    }
+
+    fn serialize_newtype_struct<T: ?Sized>(
+        self,
+        _name: &'static str,
+        value: &T
+    ) -> Result<Self::Ok, Self::Error>
+    where T: Serialize,
+    {
+        value.serialize(self)
+    }
+
+    fn serialize_newtype_variant<T: ?Sized>(
+        self,
+        _name: &'static str,
+        _variant_index: u32,
+        _variant: &'static str,
+        _value: &T
+    ) -> Result<Self::Ok, Self::Error>
+    where T: Serialize,
+    {
+        Err(SerializerError::UnrankableType { type_name: "newtype variant" })
+    }
+
+    fn serialize_seq(self, _len: Option<usize>) -> Result<Self::SerializeSeq, Self::Error> {
+        Err(SerializerError::UnrankableType { type_name: "sequence" })
+    }
+
+    fn serialize_tuple(self, _len: usize) -> Result<Self::SerializeTuple, Self::Error> {
+        Err(SerializerError::UnrankableType { type_name: "tuple" })
+    }
+
+    fn serialize_tuple_struct(
+        self,
+        _name: &'static str,
+        _len: usize
+    ) -> Result<Self::SerializeTupleStruct, Self::Error>
+    {
+        Err(SerializerError::UnrankableType { type_name: "tuple struct" })
+    }
+
+    fn serialize_tuple_variant(
+        self,
+        _name: &'static str,
+        _variant_index: u32,
+        _variant: &'static str,
+        _len: usize
+    ) -> Result<Self::SerializeTupleVariant, Self::Error>
+    {
+        Err(SerializerError::UnrankableType { type_name: "tuple variant" })
+    }
+
+    fn serialize_map(self, _len: Option<usize>) -> Result<Self::SerializeMap, Self::Error> {
+        Err(SerializerError::UnrankableType { type_name: "map" })
+    }
+
+    fn serialize_struct(
+        self,
+        _name: &'static str,
+        _len: usize
+    ) -> Result<Self::SerializeStruct, Self::Error>
+    {
+        Err(SerializerError::UnrankableType { type_name: "struct" })
+    }
+
+    fn serialize_struct_variant(
+        self,
+        _name: &'static str,
+        _variant_index: u32,
+        _variant: &'static str,
+        _len: usize
+    ) -> Result<Self::SerializeStructVariant, Self::Error>
+    {
+        Err(SerializerError::UnrankableType { type_name: "struct variant" })
+    }
+}
--- a/meilidb-data/src/serde/convert_to_string.rs
+++ b/meilidb-data/src/serde/convert_to_string.rs
@ -1,11 +1,11 @@
 use serde::Serialize;
 use serde::ser;

-use crate::database::serde::SerializerError;
+use super::SerializerError;

-pub struct KeyToStringSerializer;
+pub struct ConvertToString;

-impl ser::Serializer for KeyToStringSerializer {
+impl ser::Serializer for ConvertToString {
    type Ok = String;
    type Error = SerializerError;
    type SerializeSeq = ser::Impossible<Self::Ok, Self::Error>;
@ -16,22 +16,52 @@ impl ser::Serializer for KeyToStringSerializer {
    type SerializeStruct = ser::Impossible<Self::Ok, Self::Error>;
    type SerializeStructVariant = ser::Impossible<Self::Ok, Self::Error>;

-    forward_to_unserializable_type! {
-        bool => serialize_bool,
-        char => serialize_char,
+    fn serialize_bool(self, _value: bool) -> Result<Self::Ok, Self::Error> {
+        Err(SerializerError::UnserializableType { type_name: "boolean" })
+    }

-        i8  => serialize_i8,
-        i16 => serialize_i16,
-        i32 => serialize_i32,
-        i64 => serialize_i64,
+    fn serialize_char(self, value: char) -> Result<Self::Ok, Self::Error> {
+        Ok(value.to_string())
+    }

-        u8  => serialize_u8,
-        u16 => serialize_u16,
-        u32 => serialize_u32,
-        u64 => serialize_u64,
+    fn serialize_i8(self, value: i8) -> Result<Self::Ok, Self::Error> {
+        Ok(value.to_string())
+    }

-        f32 => serialize_f32,
-        f64 => serialize_f64,
+    fn serialize_i16(self, value: i16) -> Result<Self::Ok, Self::Error> {
+        Ok(value.to_string())
+    }
+
+    fn serialize_i32(self, value: i32) -> Result<Self::Ok, Self::Error> {
+        Ok(value.to_string())
+    }
+
+    fn serialize_i64(self, value: i64) -> Result<Self::Ok, Self::Error> {
+        Ok(value.to_string())
+    }
+
+    fn serialize_u8(self, value: u8) -> Result<Self::Ok, Self::Error> {
+        Ok(value.to_string())
+    }
+
+    fn serialize_u16(self, value: u16) -> Result<Self::Ok, Self::Error> {
+        Ok(value.to_string())
+    }
+
+    fn serialize_u32(self, value: u32) -> Result<Self::Ok, Self::Error> {
+        Ok(value.to_string())
+    }
+
+    fn serialize_u64(self, value: u64) -> Result<Self::Ok, Self::Error> {
+        Ok(value.to_string())
+    }
+
+    fn serialize_f32(self, value: f32) -> Result<Self::Ok, Self::Error> {
+        Ok(value.to_string())
+    }
+
+    fn serialize_f64(self, value: f64) -> Result<Self::Ok, Self::Error> {
+        Ok(value.to_string())
    }

    fn serialize_str(self, value: &str) -> Result<Self::Ok, Self::Error> {
@ -39,25 +69,25 @@ impl ser::Serializer for KeyToStringSerializer {
    }

    fn serialize_bytes(self, _v: &[u8]) -> Result<Self::Ok, Self::Error> {
-        Err(SerializerError::UnserializableType { name: "&[u8]" })
+        Err(SerializerError::UnserializableType { type_name: "&[u8]" })
    }

    fn serialize_none(self) -> Result<Self::Ok, Self::Error> {
-        Err(SerializerError::UnserializableType { name: "Option" })
+        Err(SerializerError::UnserializableType { type_name: "Option" })
    }

    fn serialize_some<T: ?Sized>(self, _value: &T) -> Result<Self::Ok, Self::Error>
    where T: Serialize,
    {
-        Err(SerializerError::UnserializableType { name: "Option" })
+        Err(SerializerError::UnserializableType { type_name: "Option" })
    }

    fn serialize_unit(self) -> Result<Self::Ok, Self::Error> {
-        Err(SerializerError::UnserializableType { name: "()" })
+        Err(SerializerError::UnserializableType { type_name: "()" })
    }

    fn serialize_unit_struct(self, _name: &'static str) -> Result<Self::Ok, Self::Error> {
-        Err(SerializerError::UnserializableType { name: "unit struct" })
+        Err(SerializerError::UnserializableType { type_name: "unit struct" })
    }

    fn serialize_unit_variant(
@ -67,7 +97,7 @@ impl ser::Serializer for KeyToStringSerializer {
        _variant: &'static str
    ) -> Result<Self::Ok, Self::Error>
    {
-        Err(SerializerError::UnserializableType { name: "unit variant" })
+        Err(SerializerError::UnserializableType { type_name: "unit variant" })
    }

    fn serialize_newtype_struct<T: ?Sized>(
@ -89,15 +119,15 @@ impl ser::Serializer for KeyToStringSerializer {
    ) -> Result<Self::Ok, Self::Error>
    where T: Serialize,
    {
-        Err(SerializerError::UnserializableType { name: "newtype variant" })
+        Err(SerializerError::UnserializableType { type_name: "newtype variant" })
    }

    fn serialize_seq(self, _len: Option<usize>) -> Result<Self::SerializeSeq, Self::Error> {
-        Err(SerializerError::UnserializableType { name: "sequence" })
+        Err(SerializerError::UnserializableType { type_name: "sequence" })
    }

    fn serialize_tuple(self, _len: usize) -> Result<Self::SerializeTuple, Self::Error> {
-        Err(SerializerError::UnserializableType { name: "tuple" })
+        Err(SerializerError::UnserializableType { type_name: "tuple" })
    }

    fn serialize_tuple_struct(
@ -106,7 +136,7 @@ impl ser::Serializer for KeyToStringSerializer {
        _len: usize
    ) -> Result<Self::SerializeTupleStruct, Self::Error>
    {
-        Err(SerializerError::UnserializableType { name: "tuple struct" })
+        Err(SerializerError::UnserializableType { type_name: "tuple struct" })
    }

    fn serialize_tuple_variant(
@ -117,11 +147,11 @@ impl ser::Serializer for KeyToStringSerializer {
        _len: usize
    ) -> Result<Self::SerializeTupleVariant, Self::Error>
    {
-        Err(SerializerError::UnserializableType { name: "tuple variant" })
+        Err(SerializerError::UnserializableType { type_name: "tuple variant" })
    }

    fn serialize_map(self, _len: Option<usize>) -> Result<Self::SerializeMap, Self::Error> {
-        Err(SerializerError::UnserializableType { name: "map" })
+        Err(SerializerError::UnserializableType { type_name: "map" })
    }

    fn serialize_struct(
@ -130,7 +160,7 @@ impl ser::Serializer for KeyToStringSerializer {
        _len: usize
    ) -> Result<Self::SerializeStruct, Self::Error>
    {
-        Err(SerializerError::UnserializableType { name: "struct" })
+        Err(SerializerError::UnserializableType { type_name: "struct" })
    }

    fn serialize_struct_variant(
@ -141,6 +171,6 @@ impl ser::Serializer for KeyToStringSerializer {
        _len: usize
    ) -> Result<Self::SerializeStructVariant, Self::Error>
    {
-        Err(SerializerError::UnserializableType { name: "struct variant" })
+        Err(SerializerError::UnserializableType { type_name: "struct variant" })
    }
 }
--- a/meilidb-data/src/serde/deserializer.rs
+++ b/meilidb-data/src/serde/deserializer.rs
@ -0,0 +1,132 @@
+use std::collections::HashSet;
+use std::io::Cursor;
+use std::{fmt, error::Error};
+
+use meilidb_core::DocumentId;
+use meilidb_schema::SchemaAttr;
+use rmp_serde::decode::{Deserializer as RmpDeserializer, ReadReader};
+use rmp_serde::decode::{Error as RmpError};
+use serde::{de, forward_to_deserialize_any};
+
+use crate::database::Index;
+
+#[derive(Debug)]
+pub enum DeserializerError {
+    RmpError(RmpError),
+    RocksDbError(rocksdb::Error),
+    Custom(String),
+}
+
+impl de::Error for DeserializerError {
+    fn custom<T: fmt::Display>(msg: T) -> Self {
+        DeserializerError::Custom(msg.to_string())
+    }
+}
+
+impl fmt::Display for DeserializerError {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        match self {
+            DeserializerError::RmpError(e) => write!(f, "rmp serde related error: {}", e),
+            DeserializerError::RocksDbError(e) => write!(f, "RocksDB related error: {}", e),
+            DeserializerError::Custom(s) => f.write_str(s),
+        }
+    }
+}
+
+impl Error for DeserializerError {}
+
+impl From<RmpError> for DeserializerError {
+    fn from(error: RmpError) -> DeserializerError {
+        DeserializerError::RmpError(error)
+    }
+}
+
+impl From<rocksdb::Error> for DeserializerError {
+    fn from(error: rocksdb::Error) -> DeserializerError {
+        DeserializerError::RocksDbError(error)
+    }
+}
+
+pub struct Deserializer<'a> {
+    pub document_id: DocumentId,
+    pub index: &'a Index,
+    pub fields: Option<&'a HashSet<SchemaAttr>>,
+}
+
+impl<'de, 'a, 'b> de::Deserializer<'de> for &'b mut Deserializer<'a>
+{
+    type Error = DeserializerError;
+
+    fn deserialize_any<V>(self, visitor: V) -> Result<V::Value, Self::Error>
+    where V: de::Visitor<'de>
+    {
+        self.deserialize_map(visitor)
+    }
+
+    forward_to_deserialize_any! {
+        bool i8 i16 i32 i64 i128 u8 u16 u32 u64 u128 f32 f64 char str string
+        bytes byte_buf option unit unit_struct newtype_struct seq tuple
+        tuple_struct struct enum identifier ignored_any
+    }
+
+    fn deserialize_map<V>(self, visitor: V) -> Result<V::Value, Self::Error>
+    where V: de::Visitor<'de>
+    {
+        let schema = self.index.schema();
+        let documents = self.index.as_ref().documents_index;
+
+        let iter = documents
+            .document_fields(self.document_id)?
+            .filter_map(|(attr, value)| {
+                let is_displayed = schema.props(attr).is_displayed();
+                if is_displayed && self.fields.map_or(true, |f| f.contains(&attr)) {
+                    let attribute_name = schema.attribute_name(attr);
+                    Some((attribute_name, Value::new(value)))
+                } else {
+                    None
+                }
+            });
+
+        let map_deserializer = de::value::MapDeserializer::new(iter);
+        let result = visitor.visit_map(map_deserializer).map_err(DeserializerError::from);
+
+        result
+    }
+}
+
+struct Value<A>(RmpDeserializer<ReadReader<Cursor<A>>>) where A: AsRef<[u8]>;
+
+impl<A> Value<A> where A: AsRef<[u8]>
+{
+    fn new(value: A) -> Value<A> {
+        Value(RmpDeserializer::new(Cursor::new(value)))
+    }
+}
+
+impl<'de, A> de::IntoDeserializer<'de, RmpError> for Value<A>
+where A: AsRef<[u8]>,
+{
+    type Deserializer = Self;
+
+    fn into_deserializer(self) -> Self::Deserializer {
+        self
+    }
+}
+
+impl<'de, 'a, A> de::Deserializer<'de> for Value<A>
+where A: AsRef<[u8]>,
+{
+    type Error = RmpError;
+
+    fn deserialize_any<V>(mut self, visitor: V) -> Result<V::Value, Self::Error>
+    where V: de::Visitor<'de>
+    {
+        self.0.deserialize_any(visitor)
+    }
+
+    forward_to_deserialize_any! {
+        bool i8 i16 i32 i64 i128 u8 u16 u32 u64 u128 f32 f64 char str string
+        bytes byte_buf option unit unit_struct newtype_struct seq tuple
+        tuple_struct map struct enum identifier ignored_any
+    }
+}
--- a/meilidb-data/src/serde/extract_document_id.rs
+++ b/meilidb-data/src/serde/extract_document_id.rs
@ -1,23 +1,53 @@
-use serde::Serialize;
-use serde::ser;
+use std::hash::{Hash, Hasher};

-use crate::database::serde::key_to_string::KeyToStringSerializer;
-use crate::database::serde::{SerializerError, calculate_hash};
-use crate::DocumentId;
+use meilidb_core::DocumentId;
+use serde::{ser, Serialize};
+use serde_json::Value;
+use siphasher::sip::SipHasher;

-pub struct FindDocumentIdSerializer<'a> {
-    pub id_attribute_name: &'a str,
+use super::{SerializerError, ConvertToString};
+
+pub fn extract_document_id<D>(
+    identifier: &str,
+    document: &D,
+) -> Result<Option<DocumentId>, SerializerError>
+where D: serde::Serialize,
+{
+    let serializer = ExtractDocumentId { identifier };
+    document.serialize(serializer)
 }

-impl<'a> ser::Serializer for FindDocumentIdSerializer<'a> {
-    type Ok = DocumentId;
+pub fn value_to_string(value: &Value) -> Option<String> {
+    match value {
+        Value::Null => None,
+        Value::Bool(_) => None,
+        Value::Number(value) => Some(value.to_string()),
+        Value::String(value) => Some(value.to_string()),
+        Value::Array(_) => None,
+        Value::Object(_) => None,
+    }
+}
+
+pub fn compute_document_id<H: Hash>(t: H) -> DocumentId {
+    let mut s = SipHasher::new();
+    t.hash(&mut s);
+    let hash = s.finish();
+    DocumentId(hash)
+}
+
+struct ExtractDocumentId<'a> {
+    identifier: &'a str,
+}
+
+impl<'a> ser::Serializer for ExtractDocumentId<'a> {
+    type Ok = Option<DocumentId>;
    type Error = SerializerError;
    type SerializeSeq = ser::Impossible<Self::Ok, Self::Error>;
    type SerializeTuple = ser::Impossible<Self::Ok, Self::Error>;
    type SerializeTupleStruct = ser::Impossible<Self::Ok, Self::Error>;
    type SerializeTupleVariant = ser::Impossible<Self::Ok, Self::Error>;
-    type SerializeMap = FindDocumentIdMapSerializer<'a>;
-    type SerializeStruct = FindDocumentIdStructSerializer<'a>;
+    type SerializeMap = ExtractDocumentIdMapSerializer<'a>;
+    type SerializeStruct = ExtractDocumentIdStructSerializer<'a>;
    type SerializeStructVariant = ser::Impossible<Self::Ok, Self::Error>;

    forward_to_unserializable_type! {
@ -38,30 +68,30 @@ impl<'a> ser::Serializer for FindDocumentIdSerializer<'a> {
        f64 => serialize_f64,
    }

-    fn serialize_str(self, _v: &str) -> Result<Self::Ok, Self::Error> {
-        Err(SerializerError::UnserializableType { name: "str" })
+    fn serialize_str(self, _value: &str) -> Result<Self::Ok, Self::Error> {
+        Err(SerializerError::UnserializableType { type_name: "str" })
    }

-    fn serialize_bytes(self, _v: &[u8]) -> Result<Self::Ok, Self::Error> {
-        Err(SerializerError::UnserializableType { name: "&[u8]" })
+    fn serialize_bytes(self, _value: &[u8]) -> Result<Self::Ok, Self::Error> {
+        Err(SerializerError::UnserializableType { type_name: "&[u8]" })
    }

    fn serialize_none(self) -> Result<Self::Ok, Self::Error> {
-        Err(SerializerError::UnserializableType { name: "Option" })
+        Err(SerializerError::UnserializableType { type_name: "Option" })
    }

    fn serialize_some<T: ?Sized>(self, _value: &T) -> Result<Self::Ok, Self::Error>
    where T: Serialize,
    {
-        Err(SerializerError::UnserializableType { name: "Option" })
+        Err(SerializerError::UnserializableType { type_name: "Option" })
    }

    fn serialize_unit(self) -> Result<Self::Ok, Self::Error> {
-        Err(SerializerError::UnserializableType { name: "()" })
+        Err(SerializerError::UnserializableType { type_name: "()" })
    }

    fn serialize_unit_struct(self, _name: &'static str) -> Result<Self::Ok, Self::Error> {
-        Err(SerializerError::UnserializableType { name: "unit struct" })
+        Err(SerializerError::UnserializableType { type_name: "unit struct" })
    }

    fn serialize_unit_variant(
@ -71,7 +101,7 @@ impl<'a> ser::Serializer for FindDocumentIdSerializer<'a> {
        _variant: &'static str
    ) -> Result<Self::Ok, Self::Error>
    {
-        Err(SerializerError::UnserializableType { name: "unit variant" })
+        Err(SerializerError::UnserializableType { type_name: "unit variant" })
    }

    fn serialize_newtype_struct<T: ?Sized>(
@ -93,15 +123,15 @@ impl<'a> ser::Serializer for FindDocumentIdSerializer<'a> {
    ) -> Result<Self::Ok, Self::Error>
    where T: Serialize,
    {
-        Err(SerializerError::UnserializableType { name: "newtype variant" })
+        Err(SerializerError::UnserializableType { type_name: "newtype variant" })
    }

    fn serialize_seq(self, _len: Option<usize>) -> Result<Self::SerializeSeq, Self::Error> {
-        Err(SerializerError::UnserializableType { name: "sequence" })
+        Err(SerializerError::UnserializableType { type_name: "sequence" })
    }

    fn serialize_tuple(self, _len: usize) -> Result<Self::SerializeTuple, Self::Error> {
-        Err(SerializerError::UnserializableType { name: "tuple" })
+        Err(SerializerError::UnserializableType { type_name: "tuple" })
    }

    fn serialize_tuple_struct(
@ -110,7 +140,7 @@ impl<'a> ser::Serializer for FindDocumentIdSerializer<'a> {
        _len: usize
    ) -> Result<Self::SerializeTupleStruct, Self::Error>
    {
-        Err(SerializerError::UnserializableType { name: "tuple struct" })
+        Err(SerializerError::UnserializableType { type_name: "tuple struct" })
    }

    fn serialize_tuple_variant(
@ -121,15 +151,17 @@ impl<'a> ser::Serializer for FindDocumentIdSerializer<'a> {
        _len: usize
    ) -> Result<Self::SerializeTupleVariant, Self::Error>
    {
-        Err(SerializerError::UnserializableType { name: "tuple variant" })
+        Err(SerializerError::UnserializableType { type_name: "tuple variant" })
    }

    fn serialize_map(self, _len: Option<usize>) -> Result<Self::SerializeMap, Self::Error> {
-        Ok(FindDocumentIdMapSerializer {
-            id_attribute_name: self.id_attribute_name,
+        let serializer = ExtractDocumentIdMapSerializer {
+            identifier: self.identifier,
            document_id: None,
            current_key_name: None,
-        })
+        };
+
+        Ok(serializer)
    }

    fn serialize_struct(
@ -138,10 +170,12 @@ impl<'a> ser::Serializer for FindDocumentIdSerializer<'a> {
        _len: usize
    ) -> Result<Self::SerializeStruct, Self::Error>
    {
-        Ok(FindDocumentIdStructSerializer {
-            id_attribute_name: self.id_attribute_name,
+        let serializer = ExtractDocumentIdStructSerializer {
+            identifier: self.identifier,
            document_id: None,
-        })
+        };
+
+        Ok(serializer)
    }

    fn serialize_struct_variant(
@ -152,24 +186,24 @@ impl<'a> ser::Serializer for FindDocumentIdSerializer<'a> {
        _len: usize
    ) -> Result<Self::SerializeStructVariant, Self::Error>
    {
-        Err(SerializerError::UnserializableType { name: "struct variant" })
+        Err(SerializerError::UnserializableType { type_name: "struct variant" })
    }
 }

-pub struct FindDocumentIdMapSerializer<'a> {
-    id_attribute_name: &'a str,
+pub struct ExtractDocumentIdMapSerializer<'a> {
+    identifier: &'a str,
    document_id: Option<DocumentId>,
    current_key_name: Option<String>,
 }

-impl<'a> ser::SerializeMap for FindDocumentIdMapSerializer<'a> {
-    type Ok = DocumentId;
+impl<'a> ser::SerializeMap for ExtractDocumentIdMapSerializer<'a> {
+    type Ok = Option<DocumentId>;
    type Error = SerializerError;

    fn serialize_key<T: ?Sized>(&mut self, key: &T) -> Result<(), Self::Error>
    where T: Serialize,
    {
-        let key = key.serialize(KeyToStringSerializer)?;
+        let key = key.serialize(ConvertToString)?;
        self.current_key_name = Some(key);
        Ok(())
    }
@ -188,33 +222,31 @@ impl<'a> ser::SerializeMap for FindDocumentIdMapSerializer<'a> {
    ) -> Result<(), Self::Error>
    where K: Serialize, V: Serialize,
    {
-        let key = key.serialize(KeyToStringSerializer)?;
+        let key = key.serialize(ConvertToString)?;

-        if self.id_attribute_name == key {
-            // TODO is it possible to have multiple ids?
-            let id = bincode::serialize(value).unwrap();
-            let hash = calculate_hash(&id);
-            self.document_id = Some(DocumentId(hash));
+        if self.identifier == key {
+            let value = serde_json::to_string(value).and_then(|s| serde_json::from_str(&s))?;
+            match value_to_string(&value).map(|s| compute_document_id(&s)) {
+                Some(document_id) => self.document_id = Some(document_id),
+                None => return Err(SerializerError::InvalidDocumentIdType),
+            }
        }

        Ok(())
    }

    fn end(self) -> Result<Self::Ok, Self::Error> {
-        match self.document_id {
-            Some(document_id) => Ok(document_id),
-            None => Err(SerializerError::DocumentIdNotFound)
-        }
+        Ok(self.document_id)
    }
 }

-pub struct FindDocumentIdStructSerializer<'a> {
-    id_attribute_name: &'a str,
+pub struct ExtractDocumentIdStructSerializer<'a> {
+    identifier: &'a str,
    document_id: Option<DocumentId>,
 }

-impl<'a> ser::SerializeStruct for FindDocumentIdStructSerializer<'a> {
-    type Ok = DocumentId;
+impl<'a> ser::SerializeStruct for ExtractDocumentIdStructSerializer<'a> {
+    type Ok = Option<DocumentId>;
    type Error = SerializerError;

    fn serialize_field<T: ?Sized>(
@ -224,20 +256,18 @@ impl<'a> ser::SerializeStruct for FindDocumentIdStructSerializer<'a> {
    ) -> Result<(), Self::Error>
    where T: Serialize,
    {
-        if self.id_attribute_name == key {
-            // TODO can it be possible to have multiple ids?
-            let id = bincode::serialize(value).unwrap();
-            let hash = calculate_hash(&id);
-            self.document_id = Some(DocumentId(hash));
+        if self.identifier == key {
+            let value = serde_json::to_string(value).and_then(|s| serde_json::from_str(&s))?;
+            match value_to_string(&value).map(compute_document_id) {
+                Some(document_id) => self.document_id = Some(document_id),
+                None => return Err(SerializerError::InvalidDocumentIdType),
+            }
        }

        Ok(())
    }

    fn end(self) -> Result<Self::Ok, Self::Error> {
-        match self.document_id {
-            Some(document_id) => Ok(document_id),
-            None => Err(SerializerError::DocumentIdNotFound)
-        }
+        Ok(self.document_id)
    }
 }
--- a/meilidb-data/src/serde/indexer.rs
+++ b/meilidb-data/src/serde/indexer.rs
@ -0,0 +1,336 @@
+use meilidb_core::DocumentId;
+use meilidb_schema::SchemaAttr;
+use serde::ser;
+use serde::Serialize;
+
+use crate::indexer::Indexer as RawIndexer;
+use super::{SerializerError, ConvertToString};
+
+pub struct Indexer<'a> {
+    pub attribute: SchemaAttr,
+    pub indexer: &'a mut RawIndexer,
+    pub document_id: DocumentId,
+}
+
+impl<'a> ser::Serializer for Indexer<'a> {
+    type Ok = ();
+    type Error = SerializerError;
+    type SerializeSeq = SeqIndexer<'a>;
+    type SerializeTuple = TupleIndexer<'a>;
+    type SerializeTupleStruct = ser::Impossible<Self::Ok, Self::Error>;
+    type SerializeTupleVariant = ser::Impossible<Self::Ok, Self::Error>;
+    type SerializeMap = MapIndexer<'a>;
+    type SerializeStruct = StructSerializer<'a>;
+    type SerializeStructVariant = ser::Impossible<Self::Ok, Self::Error>;
+
+    fn serialize_bool(self, _value: bool) -> Result<Self::Ok, Self::Error> {
+        Err(SerializerError::UnindexableType { type_name: "boolean" })
+    }
+
+    fn serialize_char(self, value: char) -> Result<Self::Ok, Self::Error> {
+        let text = value.serialize(ConvertToString)?;
+        self.serialize_str(&text)
+    }
+
+    fn serialize_i8(self, value: i8) -> Result<Self::Ok, Self::Error> {
+        let text = value.serialize(ConvertToString)?;
+        self.serialize_str(&text)
+    }
+
+    fn serialize_i16(self, value: i16) -> Result<Self::Ok, Self::Error> {
+        let text = value.serialize(ConvertToString)?;
+        self.serialize_str(&text)
+    }
+
+    fn serialize_i32(self, value: i32) -> Result<Self::Ok, Self::Error> {
+        let text = value.serialize(ConvertToString)?;
+        self.serialize_str(&text)
+    }
+
+    fn serialize_i64(self, value: i64) -> Result<Self::Ok, Self::Error> {
+        let text = value.serialize(ConvertToString)?;
+        self.serialize_str(&text)
+    }
+
+    fn serialize_u8(self, value: u8) -> Result<Self::Ok, Self::Error> {
+        let text = value.serialize(ConvertToString)?;
+        self.serialize_str(&text)
+    }
+
+    fn serialize_u16(self, value: u16) -> Result<Self::Ok, Self::Error> {
+        let text = value.serialize(ConvertToString)?;
+        self.serialize_str(&text)
+    }
+
+    fn serialize_u32(self, value: u32) -> Result<Self::Ok, Self::Error> {
+        let text = value.serialize(ConvertToString)?;
+        self.serialize_str(&text)
+    }
+
+    fn serialize_u64(self, value: u64) -> Result<Self::Ok, Self::Error> {
+        let text = value.serialize(ConvertToString)?;
+        self.serialize_str(&text)
+    }
+
+    fn serialize_f32(self, value: f32) -> Result<Self::Ok, Self::Error> {
+        let text = value.serialize(ConvertToString)?;
+        self.serialize_str(&text)
+    }
+
+    fn serialize_f64(self, value: f64) -> Result<Self::Ok, Self::Error> {
+        let text = value.serialize(ConvertToString)?;
+        self.serialize_str(&text)
+    }
+
+    fn serialize_str(self, text: &str) -> Result<Self::Ok, Self::Error> {
+        self.indexer.index_text(self.document_id, self.attribute, text);
+        Ok(())
+    }
+
+    fn serialize_bytes(self, _v: &[u8]) -> Result<Self::Ok, Self::Error> {
+        Err(SerializerError::UnindexableType { type_name: "&[u8]" })
+    }
+
+    fn serialize_none(self) -> Result<Self::Ok, Self::Error> {
+        Err(SerializerError::UnindexableType { type_name: "Option" })
+    }
+
+    fn serialize_some<T: ?Sized>(self, value: &T) -> Result<Self::Ok, Self::Error>
+    where T: ser::Serialize,
+    {
+        let text = value.serialize(ConvertToString)?;
+        self.indexer.index_text(self.document_id, self.attribute, &text);
+        Ok(())
+    }
+
+    fn serialize_unit(self) -> Result<Self::Ok, Self::Error> {
+        Err(SerializerError::UnindexableType { type_name: "()" })
+    }
+
+    fn serialize_unit_struct(self, _name: &'static str) -> Result<Self::Ok, Self::Error> {
+        Err(SerializerError::UnindexableType { type_name: "unit struct" })
+    }
+
+    fn serialize_unit_variant(
+        self,
+        _name: &'static str,
+        _variant_index: u32,
+        _variant: &'static str
+    ) -> Result<Self::Ok, Self::Error>
+    {
+        Err(SerializerError::UnindexableType { type_name: "unit variant" })
+    }
+
+    fn serialize_newtype_struct<T: ?Sized>(
+        self,
+        _name: &'static str,
+        value: &T
+    ) -> Result<Self::Ok, Self::Error>
+    where T: ser::Serialize,
+    {
+        value.serialize(self)
+    }
+
+    fn serialize_newtype_variant<T: ?Sized>(
+        self,
+        _name: &'static str,
+        _variant_index: u32,
+        _variant: &'static str,
+        _value: &T
+    ) -> Result<Self::Ok, Self::Error>
+    where T: ser::Serialize,
+    {
+        Err(SerializerError::UnindexableType { type_name: "newtype variant" })
+    }
+
+    fn serialize_seq(self, _len: Option<usize>) -> Result<Self::SerializeSeq, Self::Error> {
+        let indexer = SeqIndexer {
+            attribute: self.attribute,
+            document_id: self.document_id,
+            indexer: self.indexer,
+            texts: Vec::new(),
+        };
+
+        Ok(indexer)
+    }
+
+    fn serialize_tuple(self, _len: usize) -> Result<Self::SerializeTuple, Self::Error> {
+        let indexer = TupleIndexer {
+            attribute: self.attribute,
+            document_id: self.document_id,
+            indexer: self.indexer,
+            texts: Vec::new(),
+        };
+
+        Ok(indexer)
+    }
+
+    fn serialize_tuple_struct(
+        self,
+        _name: &'static str,
+        _len: usize
+    ) -> Result<Self::SerializeTupleStruct, Self::Error>
+    {
+        Err(SerializerError::UnindexableType { type_name: "tuple struct" })
+    }
+
+    fn serialize_tuple_variant(
+        self,
+        _name: &'static str,
+        _variant_index: u32,
+        _variant: &'static str,
+        _len: usize
+    ) -> Result<Self::SerializeTupleVariant, Self::Error>
+    {
+        Err(SerializerError::UnindexableType { type_name: "tuple variant" })
+    }
+
+    fn serialize_map(self, _len: Option<usize>) -> Result<Self::SerializeMap, Self::Error> {
+        let indexer = MapIndexer {
+            attribute: self.attribute,
+            document_id: self.document_id,
+            indexer: self.indexer,
+            texts: Vec::new(),
+        };
+
+        Ok(indexer)
+    }
+
+    fn serialize_struct(
+        self,
+        _name: &'static str,
+        _len: usize
+    ) -> Result<Self::SerializeStruct, Self::Error>
+    {
+        Err(SerializerError::UnindexableType { type_name: "struct" })
+    }
+
+    fn serialize_struct_variant(
+        self,
+        _name: &'static str,
+        _variant_index: u32,
+        _variant: &'static str,
+        _len: usize
+    ) -> Result<Self::SerializeStructVariant, Self::Error>
+    {
+        Err(SerializerError::UnindexableType { type_name: "struct variant" })
+    }
+}
+
+pub struct SeqIndexer<'a> {
+    attribute: SchemaAttr,
+    document_id: DocumentId,
+    indexer: &'a mut RawIndexer,
+    texts: Vec<String>,
+}
+
+impl<'a> ser::SerializeSeq for SeqIndexer<'a> {
+    type Ok = ();
+    type Error = SerializerError;
+
+    fn serialize_element<T: ?Sized>(&mut self, value: &T) -> Result<(), Self::Error>
+    where T: ser::Serialize
+    {
+        let text = value.serialize(ConvertToString)?;
+        self.texts.push(text);
+        Ok(())
+    }
+
+    fn end(self) -> Result<Self::Ok, Self::Error> {
+        let texts = self.texts.iter().map(String::as_str);
+        self.indexer.index_text_seq(self.document_id, self.attribute, texts);
+        Ok(())
+    }
+}
+
+pub struct MapIndexer<'a> {
+    attribute: SchemaAttr,
+    document_id: DocumentId,
+    indexer: &'a mut RawIndexer,
+    texts: Vec<String>,
+}
+
+impl<'a> ser::SerializeMap for MapIndexer<'a> {
+    type Ok = ();
+    type Error = SerializerError;
+
+    fn serialize_key<T: ?Sized>(&mut self, key: &T) -> Result<(), Self::Error>
+    where T: ser::Serialize,
+    {
+        let text = key.serialize(ConvertToString)?;
+        self.texts.push(text);
+        Ok(())
+    }
+
+    fn serialize_value<T: ?Sized>(&mut self, value: &T) -> Result<(), Self::Error>
+    where T: ser::Serialize,
+    {
+        let text = value.serialize(ConvertToString)?;
+        self.texts.push(text);
+        Ok(())
+    }
+
+    fn end(self) -> Result<Self::Ok, Self::Error> {
+        let texts = self.texts.iter().map(String::as_str);
+        self.indexer.index_text_seq(self.document_id, self.attribute, texts);
+        Ok(())
+    }
+}
+
+pub struct StructSerializer<'a> {
+    attribute: SchemaAttr,
+    document_id: DocumentId,
+    indexer: &'a mut RawIndexer,
+    texts: Vec<String>,
+}
+
+impl<'a> ser::SerializeStruct for StructSerializer<'a> {
+    type Ok = ();
+    type Error = SerializerError;
+
+    fn serialize_field<T: ?Sized>(
+        &mut self,
+        key: &'static str,
+        value: &T,
+    ) -> Result<(), Self::Error>
+    where T: ser::Serialize,
+    {
+        let key_text = key.to_owned();
+        let value_text = value.serialize(ConvertToString)?;
+        self.texts.push(key_text);
+        self.texts.push(value_text);
+        Ok(())
+    }
+
+    fn end(self) -> Result<Self::Ok, Self::Error> {
+        let texts = self.texts.iter().map(String::as_str);
+        self.indexer.index_text_seq(self.document_id, self.attribute, texts);
+        Ok(())
+    }
+}
+
+pub struct TupleIndexer<'a> {
+    attribute: SchemaAttr,
+    document_id: DocumentId,
+    indexer: &'a mut RawIndexer,
+    texts: Vec<String>,
+}
+
+impl<'a> ser::SerializeTuple for TupleIndexer<'a> {
+    type Ok = ();
+    type Error = SerializerError;
+
+    fn serialize_element<T: ?Sized>(&mut self, value: &T) -> Result<(), Self::Error>
+    where T: Serialize
+    {
+        let text = value.serialize(ConvertToString)?;
+        self.texts.push(text);
+        Ok(())
+    }
+
+    fn end(self) -> Result<Self::Ok, Self::Error> {
+        let texts = self.texts.iter().map(String::as_str);
+        self.indexer.index_text_seq(self.document_id, self.attribute, texts);
+        Ok(())
+    }
+}
--- a/meilidb-data/src/serde/mod.rs
+++ b/meilidb-data/src/serde/mod.rs
@ -0,0 +1,131 @@
+macro_rules! forward_to_unserializable_type {
+    ($($ty:ident => $se_method:ident,)*) => {
+        $(
+            fn $se_method(self, _v: $ty) -> Result<Self::Ok, Self::Error> {
+                Err(SerializerError::UnserializableType { type_name: "$ty" })
+            }
+        )*
+    }
+}
+
+mod convert_to_number;
+mod convert_to_string;
+mod deserializer;
+mod extract_document_id;
+mod indexer;
+mod serializer;
+
+pub use self::deserializer::{Deserializer, DeserializerError};
+pub use self::extract_document_id::{extract_document_id, compute_document_id, value_to_string};
+pub use self::convert_to_string::ConvertToString;
+pub use self::convert_to_number::ConvertToNumber;
+pub use self::indexer::Indexer;
+pub use self::serializer::Serializer;
+
+use std::collections::BTreeMap;
+use std::{fmt, error::Error};
+
+use meilidb_core::DocumentId;
+use meilidb_schema::SchemaAttr;
+use rmp_serde::encode::Error as RmpError;
+use serde_json::Error as SerdeJsonError;
+use serde::ser;
+
+use crate::number::ParseNumberError;
+
+#[derive(Debug)]
+pub enum SerializerError {
+    DocumentIdNotFound,
+    InvalidDocumentIdType,
+    RmpError(RmpError),
+    RocksDbError(rocksdb::Error),
+    SerdeJsonError(SerdeJsonError),
+    ParseNumberError(ParseNumberError),
+    UnserializableType { type_name: &'static str },
+    UnindexableType { type_name: &'static str },
+    UnrankableType { type_name: &'static str },
+    Custom(String),
+}
+
+impl ser::Error for SerializerError {
+    fn custom<T: fmt::Display>(msg: T) -> Self {
+        SerializerError::Custom(msg.to_string())
+    }
+}
+
+impl fmt::Display for SerializerError {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        match self {
+            SerializerError::DocumentIdNotFound => {
+                write!(f, "serialized document does not have an id according to the schema")
+            },
+            SerializerError::InvalidDocumentIdType => {
+                write!(f, "document identifier can only be of type string or number")
+            },
+            SerializerError::RmpError(e) => write!(f, "rmp serde related error: {}", e),
+            SerializerError::RocksDbError(e) => write!(f, "RocksDB related error: {}", e),
+            SerializerError::SerdeJsonError(e) => write!(f, "serde json error: {}", e),
+            SerializerError::ParseNumberError(e) => {
+                write!(f, "error while trying to parse a number: {}", e)
+            },
+            SerializerError::UnserializableType { type_name } => {
+                write!(f, "{} are not a serializable type", type_name)
+            },
+            SerializerError::UnindexableType { type_name } => {
+                write!(f, "{} are not an indexable type", type_name)
+            },
+            SerializerError::UnrankableType { type_name } => {
+                write!(f, "{} types can not be used for ranking", type_name)
+            },
+            SerializerError::Custom(s) => f.write_str(s),
+        }
+    }
+}
+
+impl Error for SerializerError {}
+
+impl From<String> for SerializerError {
+    fn from(value: String) -> SerializerError {
+        SerializerError::Custom(value)
+    }
+}
+
+impl From<RmpError> for SerializerError {
+    fn from(error: RmpError) -> SerializerError {
+        SerializerError::RmpError(error)
+    }
+}
+
+impl From<SerdeJsonError> for SerializerError {
+    fn from(error: SerdeJsonError) -> SerializerError {
+        SerializerError::SerdeJsonError(error)
+    }
+}
+
+impl From<rocksdb::Error> for SerializerError {
+    fn from(error: rocksdb::Error) -> SerializerError {
+        SerializerError::RocksDbError(error)
+    }
+}
+
+impl From<ParseNumberError> for SerializerError {
+    fn from(error: ParseNumberError) -> SerializerError {
+        SerializerError::ParseNumberError(error)
+    }
+}
+
+pub struct RamDocumentStore(BTreeMap<(DocumentId, SchemaAttr), Vec<u8>>);
+
+impl RamDocumentStore {
+    pub fn new() -> RamDocumentStore {
+        RamDocumentStore(BTreeMap::new())
+    }
+
+    pub fn set_document_field(&mut self, id: DocumentId, attr: SchemaAttr, value: Vec<u8>) {
+        self.0.insert((id, attr), value);
+    }
+
+    pub fn into_inner(self) -> BTreeMap<(DocumentId, SchemaAttr), Vec<u8>> {
+        self.0
+    }
+}
--- a/meilidb-data/src/serde/serializer.rs
+++ b/meilidb-data/src/serde/serializer.rs
@ -0,0 +1,287 @@
+use meilidb_core::DocumentId;
+use meilidb_schema::Schema;
+use serde::ser;
+
+use crate::indexer::Indexer as RawIndexer;
+use crate::ranked_map::RankedMap;
+use super::{RamDocumentStore, SerializerError, ConvertToString, ConvertToNumber, Indexer};
+
+pub struct Serializer<'a> {
+    pub schema: &'a Schema,
+    pub document_store: &'a mut RamDocumentStore,
+    pub indexer: &'a mut RawIndexer,
+    pub ranked_map: &'a mut RankedMap,
+    pub document_id: DocumentId,
+}
+
+impl<'a> ser::Serializer for Serializer<'a> {
+    type Ok = ();
+    type Error = SerializerError;
+    type SerializeSeq = ser::Impossible<Self::Ok, Self::Error>;
+    type SerializeTuple = ser::Impossible<Self::Ok, Self::Error>;
+    type SerializeTupleStruct = ser::Impossible<Self::Ok, Self::Error>;
+    type SerializeTupleVariant = ser::Impossible<Self::Ok, Self::Error>;
+    type SerializeMap = MapSerializer<'a>;
+    type SerializeStruct = StructSerializer<'a>;
+    type SerializeStructVariant = ser::Impossible<Self::Ok, Self::Error>;
+
+    forward_to_unserializable_type! {
+        bool => serialize_bool,
+        char => serialize_char,
+
+        i8  => serialize_i8,
+        i16 => serialize_i16,
+        i32 => serialize_i32,
+        i64 => serialize_i64,
+
+        u8  => serialize_u8,
+        u16 => serialize_u16,
+        u32 => serialize_u32,
+        u64 => serialize_u64,
+
+        f32 => serialize_f32,
+        f64 => serialize_f64,
+    }
+
+    fn serialize_str(self, _v: &str) -> Result<Self::Ok, Self::Error> {
+        Err(SerializerError::UnserializableType { type_name: "str" })
+    }
+
+    fn serialize_bytes(self, _v: &[u8]) -> Result<Self::Ok, Self::Error> {
+        Err(SerializerError::UnserializableType { type_name: "&[u8]" })
+    }
+
+    fn serialize_none(self) -> Result<Self::Ok, Self::Error> {
+        Err(SerializerError::UnserializableType { type_name: "Option" })
+    }
+
+    fn serialize_some<T: ?Sized>(self, _value: &T) -> Result<Self::Ok, Self::Error>
+    where T: ser::Serialize,
+    {
+        Err(SerializerError::UnserializableType { type_name: "Option" })
+    }
+
+    fn serialize_unit(self) -> Result<Self::Ok, Self::Error> {
+        Err(SerializerError::UnserializableType { type_name: "()" })
+    }
+
+    fn serialize_unit_struct(self, _name: &'static str) -> Result<Self::Ok, Self::Error> {
+        Err(SerializerError::UnserializableType { type_name: "unit struct" })
+    }
+
+    fn serialize_unit_variant(
+        self,
+        _name: &'static str,
+        _variant_index: u32,
+        _variant: &'static str
+    ) -> Result<Self::Ok, Self::Error>
+    {
+        Err(SerializerError::UnserializableType { type_name: "unit variant" })
+    }
+
+    fn serialize_newtype_struct<T: ?Sized>(
+        self,
+        _name: &'static str,
+        value: &T
+    ) -> Result<Self::Ok, Self::Error>
+    where T: ser::Serialize,
+    {
+        value.serialize(self)
+    }
+
+    fn serialize_newtype_variant<T: ?Sized>(
+        self,
+        _name: &'static str,
+        _variant_index: u32,
+        _variant: &'static str,
+        _value: &T
+    ) -> Result<Self::Ok, Self::Error>
+    where T: ser::Serialize,
+    {
+        Err(SerializerError::UnserializableType { type_name: "newtype variant" })
+    }
+
+    fn serialize_seq(self, _len: Option<usize>) -> Result<Self::SerializeSeq, Self::Error> {
+        Err(SerializerError::UnserializableType { type_name: "sequence" })
+    }
+
+    fn serialize_tuple(self, _len: usize) -> Result<Self::SerializeTuple, Self::Error> {
+        Err(SerializerError::UnserializableType { type_name: "tuple" })
+    }
+
+    fn serialize_tuple_struct(
+        self,
+        _name: &'static str,
+        _len: usize
+    ) -> Result<Self::SerializeTupleStruct, Self::Error>
+    {
+        Err(SerializerError::UnserializableType { type_name: "tuple struct" })
+    }
+
+    fn serialize_tuple_variant(
+        self,
+        _name: &'static str,
+        _variant_index: u32,
+        _variant: &'static str,
+        _len: usize
+    ) -> Result<Self::SerializeTupleVariant, Self::Error>
+    {
+        Err(SerializerError::UnserializableType { type_name: "tuple variant" })
+    }
+
+    fn serialize_map(self, _len: Option<usize>) -> Result<Self::SerializeMap, Self::Error> {
+        Ok(MapSerializer {
+            schema: self.schema,
+            document_id: self.document_id,
+            document_store: self.document_store,
+            indexer: self.indexer,
+            ranked_map: self.ranked_map,
+            current_key_name: None,
+        })
+    }
+
+    fn serialize_struct(
+        self,
+        _name: &'static str,
+        _len: usize
+    ) -> Result<Self::SerializeStruct, Self::Error>
+    {
+        Ok(StructSerializer {
+            schema: self.schema,
+            document_id: self.document_id,
+            document_store: self.document_store,
+            indexer: self.indexer,
+            ranked_map: self.ranked_map,
+        })
+    }
+
+    fn serialize_struct_variant(
+        self,
+        _name: &'static str,
+        _variant_index: u32,
+        _variant: &'static str,
+        _len: usize
+    ) -> Result<Self::SerializeStructVariant, Self::Error>
+    {
+        Err(SerializerError::UnserializableType { type_name: "struct variant" })
+    }
+}
+
+pub struct MapSerializer<'a> {
+    schema: &'a Schema,
+    document_id: DocumentId,
+    document_store: &'a mut RamDocumentStore,
+    indexer: &'a mut RawIndexer,
+    ranked_map: &'a mut RankedMap,
+    current_key_name: Option<String>,
+}
+
+impl<'a> ser::SerializeMap for MapSerializer<'a> {
+    type Ok = ();
+    type Error = SerializerError;
+
+    fn serialize_key<T: ?Sized>(&mut self, key: &T) -> Result<(), Self::Error>
+    where T: ser::Serialize,
+    {
+        let key = key.serialize(ConvertToString)?;
+        self.current_key_name = Some(key);
+        Ok(())
+    }
+
+    fn serialize_value<T: ?Sized>(&mut self, value: &T) -> Result<(), Self::Error>
+    where T: ser::Serialize,
+    {
+        let key = self.current_key_name.take().unwrap();
+        self.serialize_entry(&key, value)
+    }
+
+    fn serialize_entry<K: ?Sized, V: ?Sized>(
+        &mut self,
+        key: &K,
+        value: &V,
+    ) -> Result<(), Self::Error>
+    where K: ser::Serialize, V: ser::Serialize,
+    {
+        let key = key.serialize(ConvertToString)?;
+
+        serialize_value(
+            self.schema,
+            self.document_id,
+            self.document_store,
+            self.indexer,
+            self.ranked_map,
+            &key,
+            value,
+        )
+    }
+
+    fn end(self) -> Result<Self::Ok, Self::Error> {
+        Ok(())
+    }
+}
+
+pub struct StructSerializer<'a> {
+    schema: &'a Schema,
+    document_id: DocumentId,
+    document_store: &'a mut RamDocumentStore,
+    indexer: &'a mut RawIndexer,
+    ranked_map: &'a mut RankedMap,
+}
+
+impl<'a> ser::SerializeStruct for StructSerializer<'a> {
+    type Ok = ();
+    type Error = SerializerError;
+
+    fn serialize_field<T: ?Sized>(
+        &mut self,
+        key: &'static str,
+        value: &T,
+    ) -> Result<(), Self::Error>
+    where T: ser::Serialize,
+    {
+        serialize_value(
+            self.schema,
+            self.document_id,
+            self.document_store,
+            self.indexer,
+            self.ranked_map,
+            key,
+            value,
+        )
+    }
+
+    fn end(self) -> Result<Self::Ok, Self::Error> {
+        Ok(())
+    }
+}
+
+fn serialize_value<T: ?Sized>(
+    schema: &Schema,
+    document_id: DocumentId,
+    document_store: &mut RamDocumentStore,
+    indexer: &mut RawIndexer,
+    ranked_map: &mut RankedMap,
+    key: &str,
+    value: &T,
+) -> Result<(), SerializerError>
+where T: ser::Serialize,
+{
+    if let Some(attribute) = schema.attribute(key) {
+        let props = schema.props(attribute);
+
+        let serialized = rmp_serde::to_vec_named(value)?;
+        document_store.set_document_field(document_id, attribute, serialized);
+
+        if props.is_indexed() {
+            let indexer = Indexer { attribute, indexer, document_id };
+            value.serialize(indexer)?;
+        }
+
+        if props.is_ranked() {
+            let number = value.serialize(ConvertToNumber)?;
+            ranked_map.insert(document_id, attribute, number);
+        }
+    }
+
+    Ok(())
+}
--- a/meilidb-data/tests/updates.rs
+++ b/meilidb-data/tests/updates.rs
@ -0,0 +1,96 @@
+use std::sync::atomic::{AtomicBool, Ordering::Relaxed};
+use std::sync::Arc;
+
+use serde_json::json;
+use meilidb_data::Database;
+use meilidb_schema::{Schema, SchemaBuilder, DISPLAYED, INDEXED};
+
+fn simple_schema() -> Schema {
+    let mut builder = SchemaBuilder::with_identifier("objectId");
+    builder.new_attribute("objectId", DISPLAYED | INDEXED);
+    builder.new_attribute("title", DISPLAYED | INDEXED);
+    builder.build()
+}
+
+#[test]
+fn insert_delete_document() {
+    let tmp_dir = tempfile::tempdir().unwrap();
+    let database = Database::open(&tmp_dir).unwrap();
+
+    let as_been_updated = Arc::new(AtomicBool::new(false));
+
+    let schema = simple_schema();
+    let index = database.create_index("hello", schema).unwrap();
+
+    let as_been_updated_clone = as_been_updated.clone();
+    index.set_update_callback(move |_| as_been_updated_clone.store(true, Relaxed));
+
+    let doc1 = json!({ "objectId": 123, "title": "hello" });
+
+    let mut addition = index.documents_addition();
+    addition.update_document(&doc1);
+    let update_id = addition.finalize().unwrap();
+    let status = index.update_status_blocking(update_id).unwrap();
+    assert!(as_been_updated.swap(false, Relaxed));
+    assert!(status.result.is_ok());
+    assert_eq!(index.number_of_documents(), 1);
+
+    let docs = index.query_builder().query("hello", 0..10).unwrap();
+    assert_eq!(docs.len(), 1);
+    assert_eq!(index.document(None, docs[0].id).unwrap().as_ref(), Some(&doc1));
+
+    let mut deletion = index.documents_deletion();
+    deletion.delete_document(&doc1).unwrap();
+    let update_id = deletion.finalize().unwrap();
+    let status = index.update_status_blocking(update_id).unwrap();
+    assert!(as_been_updated.swap(false, Relaxed));
+    assert!(status.result.is_ok());
+    assert_eq!(index.number_of_documents(), 0);
+
+    let docs = index.query_builder().query("hello", 0..10).unwrap();
+    assert_eq!(docs.len(), 0);
+}
+
+#[test]
+fn replace_document() {
+    let tmp_dir = tempfile::tempdir().unwrap();
+    let database = Database::open(&tmp_dir).unwrap();
+
+    let as_been_updated = Arc::new(AtomicBool::new(false));
+
+    let schema = simple_schema();
+    let index = database.create_index("hello", schema).unwrap();
+
+    let as_been_updated_clone = as_been_updated.clone();
+    index.set_update_callback(move |_| as_been_updated_clone.store(true, Relaxed));
+
+    let doc1 = json!({ "objectId": 123, "title": "hello" });
+    let doc2 = json!({ "objectId": 123, "title": "coucou" });
+
+    let mut addition = index.documents_addition();
+    addition.update_document(&doc1);
+    let update_id = addition.finalize().unwrap();
+    let status = index.update_status_blocking(update_id).unwrap();
+    assert!(as_been_updated.swap(false, Relaxed));
+    assert!(status.result.is_ok());
+    assert_eq!(index.number_of_documents(), 1);
+
+    let docs = index.query_builder().query("hello", 0..10).unwrap();
+    assert_eq!(docs.len(), 1);
+    assert_eq!(index.document(None, docs[0].id).unwrap().as_ref(), Some(&doc1));
+
+    let mut addition = index.documents_addition();
+    addition.update_document(&doc2);
+    let update_id = addition.finalize().unwrap();
+    let status = index.update_status_blocking(update_id).unwrap();
+    assert!(as_been_updated.swap(false, Relaxed));
+    assert!(status.result.is_ok());
+    assert_eq!(index.number_of_documents(), 1);
+
+    let docs = index.query_builder().query("hello", 0..10).unwrap();
+    assert_eq!(docs.len(), 0);
+
+    let docs = index.query_builder().query("coucou", 0..10).unwrap();
+    assert_eq!(docs.len(), 1);
+    assert_eq!(index.document(None, docs[0].id).unwrap().as_ref(), Some(&doc2));
+}
--- a/meilidb-schema/Cargo.toml
+++ b/meilidb-schema/Cargo.toml
@ -0,0 +1,12 @@
+[package]
+name = "meilidb-schema"
+version = "0.1.0"
+authors = ["Kerollmops <renault.cle@gmail.com>"]
+edition = "2018"
+
+[dependencies]
+bincode = "1.1.2"
+linked-hash-map = { version = "0.5.2", features = ["serde_impl"] }
+serde = { version = "1.0.91", features = ["derive"] }
+serde_json = { version = "1.0.39", features = ["preserve_order"] }
+toml = { version = "0.5.0", features = ["preserve_order"] }
--- a/meilidb-schema/src/lib.rs
+++ b/meilidb-schema/src/lib.rs
@ -5,34 +5,37 @@ use std::{fmt, u16};
 use std::ops::BitOr;
 use std::sync::Arc;

-use serde_derive::{Serialize, Deserialize};
+use serde::{Serialize, Deserialize};
 use linked_hash_map::LinkedHashMap;
-use serde::Serialize;

-use crate::database::serde::find_id::FindDocumentIdSerializer;
-use crate::database::serde::SerializerError;
-use crate::DocumentId;
-
-pub const STORED: SchemaProps = SchemaProps { stored: true, indexed: false };
-pub const INDEXED: SchemaProps = SchemaProps { stored: false, indexed: true };
+pub const DISPLAYED: SchemaProps = SchemaProps { displayed: true,  indexed: false, ranked: false };
+pub const INDEXED: SchemaProps   = SchemaProps { displayed: false, indexed: true,  ranked: false };
+pub const RANKED: SchemaProps    = SchemaProps { displayed: false, indexed: false, ranked: true  };

 #[derive(Debug, Copy, Clone, PartialEq, Eq, Serialize, Deserialize)]
 pub struct SchemaProps {
    #[serde(default)]
-    stored: bool,
+    displayed: bool,

    #[serde(default)]
    indexed: bool,
+
+    #[serde(default)]
+    ranked: bool,
 }

 impl SchemaProps {
-    pub fn is_stored(self) -> bool {
-        self.stored
+    pub fn is_displayed(self) -> bool {
+        self.displayed
    }

    pub fn is_indexed(self) -> bool {
        self.indexed
    }
+
+    pub fn is_ranked(self) -> bool {
+        self.ranked
+    }
 }

 impl BitOr for SchemaProps {
@ -40,8 +43,9 @@ impl BitOr for SchemaProps {

    fn bitor(self, other: Self) -> Self::Output {
        SchemaProps {
-            stored: self.stored | other.stored,
+            displayed: self.displayed | other.displayed,
            indexed: self.indexed | other.indexed,
+            ranked: self.ranked | other.ranked,
        }
    }
 }
@ -95,14 +99,14 @@ struct InnerSchema {
 }

 impl Schema {
-    pub fn from_toml<R: Read>(mut reader: R) -> Result<Schema, Box<Error>> {
+    pub fn from_toml<R: Read>(mut reader: R) -> Result<Schema, Box<dyn Error>> {
        let mut buffer = Vec::new();
        reader.read_to_end(&mut buffer)?;
        let builder: SchemaBuilder = toml::from_slice(&buffer)?;
        Ok(builder.build())
    }

-    pub fn to_toml<W: Write>(&self, mut writer: W) -> Result<(), Box<Error>> {
+    pub fn to_toml<W: Write>(&self, mut writer: W) -> Result<(), Box<dyn Error>> {
        let identifier = self.inner.identifier.clone();
        let attributes = self.attributes_ordered();
        let builder = SchemaBuilder { identifier, attributes };
@ -113,12 +117,29 @@ impl Schema {
        Ok(())
    }

-    pub(crate) fn read_from_bin<R: Read>(reader: R) -> bincode::Result<Schema> {
+    pub fn from_json<R: Read>(mut reader: R) -> Result<Schema, Box<dyn Error>> {
+        let mut buffer = Vec::new();
+        reader.read_to_end(&mut buffer)?;
+        let builder: SchemaBuilder = serde_json::from_slice(&buffer)?;
+        Ok(builder.build())
+    }
+
+    pub fn to_json<W: Write>(&self, mut writer: W) -> Result<(), Box<dyn Error>> {
+        let identifier = self.inner.identifier.clone();
+        let attributes = self.attributes_ordered();
+        let builder = SchemaBuilder { identifier, attributes };
+        let string = serde_json::to_string_pretty(&builder)?;
+        writer.write_all(string.as_bytes())?;
+
+        Ok(())
+    }
+
+    pub fn read_from_bin<R: Read>(reader: R) -> bincode::Result<Schema> {
        let builder: SchemaBuilder = bincode::deserialize_from(reader)?;
        Ok(builder.build())
    }

-    pub(crate) fn write_to_bin<W: Write>(&self, writer: W) -> bincode::Result<()> {
+    pub fn write_to_bin<W: Write>(&self, writer: W) -> bincode::Result<()> {
        let identifier = self.inner.identifier.clone();
        let attributes = self.attributes_ordered();
        let builder = SchemaBuilder { identifier, attributes };
@ -141,14 +162,6 @@ impl Schema {
        attributes
    }

-    pub fn document_id<T>(&self, document: T) -> Result<DocumentId, SerializerError>
-    where T: Serialize,
-    {
-        let id_attribute_name = &self.inner.identifier;
-        let serializer = FindDocumentIdSerializer { id_attribute_name };
-        document.serialize(serializer)
-    }
-
    pub fn props(&self, attr: SchemaAttr) -> SchemaProps {
        let (_, props) = self.inner.props[attr.0 as usize];
        props
@ -166,18 +179,31 @@ impl Schema {
        let (name, _) = &self.inner.props[attr.0 as usize];
        name
    }
+
+    pub fn iter<'a>(&'a self) -> impl Iterator<Item=(&str, SchemaAttr, SchemaProps)> + 'a {
+        self.inner.props.iter()
+            .map(move |(name, prop)| {
+                let attr = self.inner.attrs.get(name).unwrap();
+                (name.as_str(), *attr, *prop)
+            })
+    }
 }

-#[derive(Debug, Copy, Clone, PartialOrd, Ord, PartialEq, Eq)]
-pub struct SchemaAttr(pub(crate) u16);
+#[derive(Serialize, Deserialize)]
+#[derive(Debug, Copy, Clone, PartialOrd, Ord, PartialEq, Eq, Hash)]
+pub struct SchemaAttr(pub u16);

 impl SchemaAttr {
-    pub fn new(value: u16) -> SchemaAttr {
+    pub const fn new(value: u16) -> SchemaAttr {
        SchemaAttr(value)
    }

-    pub fn min() -> SchemaAttr {
-        SchemaAttr(0)
+    pub const fn min() -> SchemaAttr {
+        SchemaAttr(u16::min_value())
+    }
+
+    pub const fn max() -> SchemaAttr {
+        SchemaAttr(u16::max_value())
    }

    pub fn next(self) -> Option<SchemaAttr> {
@ -187,10 +213,6 @@ impl SchemaAttr {
    pub fn prev(self) -> Option<SchemaAttr> {
        self.0.checked_sub(1).map(SchemaAttr)
    }
-
-    pub fn max() -> SchemaAttr {
-        SchemaAttr(u16::MAX)
-    }
 }

 impl fmt::Display for SchemaAttr {
@ -207,8 +229,8 @@ mod tests {
    #[test]
    fn serialize_deserialize() -> bincode::Result<()> {
        let mut builder = SchemaBuilder::with_identifier("id");
-        builder.new_attribute("alpha", STORED);
-        builder.new_attribute("beta", STORED | INDEXED);
+        builder.new_attribute("alpha", DISPLAYED);
+        builder.new_attribute("beta", DISPLAYED | INDEXED);
        builder.new_attribute("gamma", INDEXED);
        let schema = builder.build();

@ -223,10 +245,10 @@ mod tests {
    }

    #[test]
-    fn serialize_deserialize_toml() -> Result<(), Box<Error>> {
+    fn serialize_deserialize_toml() -> Result<(), Box<dyn Error>> {
        let mut builder = SchemaBuilder::with_identifier("id");
-        builder.new_attribute("alpha", STORED);
-        builder.new_attribute("beta", STORED | INDEXED);
+        builder.new_attribute("alpha", DISPLAYED);
+        builder.new_attribute("beta", DISPLAYED | INDEXED);
        builder.new_attribute("gamma", INDEXED);
        let schema = builder.build();

@ -240,10 +262,10 @@ mod tests {
            identifier = "id"

            [attributes."alpha"]
-            stored = true
+            displayed = true

            [attributes."beta"]
-            stored = true
+            displayed = true
            indexed = true

            [attributes."gamma"]
@ -254,4 +276,40 @@ mod tests {

        Ok(())
    }
+
+    #[test]
+    fn serialize_deserialize_json() -> Result<(), Box<dyn Error>> {
+        let mut builder = SchemaBuilder::with_identifier("id");
+        builder.new_attribute("alpha", DISPLAYED);
+        builder.new_attribute("beta", DISPLAYED | INDEXED);
+        builder.new_attribute("gamma", INDEXED);
+        let schema = builder.build();
+
+        let mut buffer = Vec::new();
+        schema.to_json(&mut buffer)?;
+
+        let schema2 = Schema::from_json(buffer.as_slice())?;
+        assert_eq!(schema, schema2);
+
+        let data = r#"
+            {
+                "identifier": "id",
+                "attributes": {
+                    "alpha": {
+                        "displayed": true
+                    },
+                    "beta": {
+                        "displayed": true,
+                        "indexed": true
+                    },
+                    "gamma": {
+                        "indexed": true
+                    }
+                }
+            }"#;
+        let schema2 = Schema::from_json(data.as_bytes())?;
+        assert_eq!(schema, schema2);
+
+        Ok(())
+    }
 }
--- a/meilidb-tokenizer/Cargo.toml
+++ b/meilidb-tokenizer/Cargo.toml
@ -0,0 +1,8 @@
+[package]
+name = "meilidb-tokenizer"
+version = "0.1.0"
+authors = ["Kerollmops <renault.cle@gmail.com>"]
+edition = "2018"
+
+[dependencies]
+slice-group-by = "0.2.4"
--- a/meilidb-tokenizer/src/lib.rs
+++ b/meilidb-tokenizer/src/lib.rs
@ -0,0 +1,295 @@
+use std::iter::Peekable;
+use slice_group_by::StrGroupBy;
+use self::SeparatorCategory::*;
+
+pub fn is_cjk(c: char) -> bool {
+    (c >= '\u{2e80}' && c <= '\u{2eff}') ||
+    (c >= '\u{2f00}' && c <= '\u{2fdf}') ||
+    (c >= '\u{3040}' && c <= '\u{309f}') ||
+    (c >= '\u{30a0}' && c <= '\u{30ff}') ||
+    (c >= '\u{3100}' && c <= '\u{312f}') ||
+    (c >= '\u{3200}' && c <= '\u{32ff}') ||
+    (c >= '\u{3400}' && c <= '\u{4dbf}') ||
+    (c >= '\u{4e00}' && c <= '\u{9fff}') ||
+    (c >= '\u{f900}' && c <= '\u{faff}')
+}
+
+#[derive(Debug, Copy, Clone, PartialEq, Eq)]
+enum SeparatorCategory {
+    Soft,
+    Hard,
+}
+
+impl SeparatorCategory {
+    fn merge(self, other: SeparatorCategory) -> SeparatorCategory {
+        if let (Soft, Soft) = (self, other) { Soft } else { Hard }
+    }
+
+    fn to_usize(self) -> usize {
+        match self {
+            Soft => 1,
+            Hard => 8,
+        }
+    }
+}
+
+fn is_separator(c: char) -> bool {
+    classify_separator(c).is_some()
+}
+
+fn classify_separator(c: char) -> Option<SeparatorCategory> {
+    match c {
+        ' ' | '\'' | '"' => Some(Soft),
+        '.' | ';' | ',' | '!' | '?' | '-' | '(' | ')' => Some(Hard),
+        _ => None,
+    }
+}
+
+#[derive(Debug, Copy, Clone, PartialEq, Eq)]
+enum CharCategory {
+    Separator(SeparatorCategory),
+    Cjk,
+    Other,
+}
+
+fn classify_char(c: char) -> CharCategory {
+    if let Some(category) = classify_separator(c) {
+        CharCategory::Separator(category)
+    } else if is_cjk(c) {
+        CharCategory::Cjk
+    } else {
+        CharCategory::Other
+    }
+}
+
+fn is_str_word(s: &str) -> bool {
+    !s.chars().any(is_separator)
+}
+
+fn same_group_category(a: char, b: char) -> bool {
+    match (classify_char(a), classify_char(b)) {
+        (CharCategory::Cjk, _) | (_, CharCategory::Cjk) => false,
+        (CharCategory::Separator(_), CharCategory::Separator(_)) => true,
+        (a, b) => a == b,
+    }
+}
+
+// fold the number of chars along with the index position
+fn chars_count_index((n, _): (usize, usize), (i, c): (usize, char)) -> (usize, usize) {
+    (n + 1, i + c.len_utf8())
+}
+
+pub fn split_query_string(query: &str) -> impl Iterator<Item=&str> {
+    Tokenizer::new(query).map(|t| t.word)
+}
+
+#[derive(Debug, Copy, Clone, PartialEq, Eq)]
+pub struct Token<'a> {
+    pub word: &'a str,
+    pub word_index: usize,
+    pub char_index: usize,
+}
+
+pub struct Tokenizer<'a> {
+    inner: &'a str,
+    word_index: usize,
+    char_index: usize,
+}
+
+impl<'a> Tokenizer<'a> {
+    pub fn new(string: &str) -> Tokenizer {
+        // skip every separator and set `char_index`
+        // to the number of char trimmed
+        let (count, index) = string.char_indices()
+                                   .take_while(|(_, c)| is_separator(*c))
+                                   .fold((0, 0), chars_count_index);
+
+        Tokenizer {
+            inner: &string[index..],
+            word_index: 0,
+            char_index: count,
+        }
+    }
+}
+
+impl<'a> Iterator for Tokenizer<'a> {
+    type Item = Token<'a>;
+
+    fn next(&mut self) -> Option<Self::Item> {
+        let mut iter = self.inner.linear_group_by(same_group_category).peekable();
+
+        while let (Some(string), next_string) = (iter.next(), iter.peek()) {
+            let (count, index) = string.char_indices().fold((0, 0), chars_count_index);
+
+            if !is_str_word(string) {
+                self.word_index += string.chars()
+                                         .filter_map(classify_separator)
+                                         .fold(Soft, |a, x| a.merge(x))
+                                         .to_usize();
+                self.char_index += count;
+                self.inner = &self.inner[index..];
+                continue;
+            }
+
+            let token = Token {
+                word: string,
+                word_index: self.word_index,
+                char_index: self.char_index,
+            };
+
+            if next_string.filter(|s| is_str_word(s)).is_some() {
+                self.word_index += 1;
+            }
+
+            self.char_index += count;
+            self.inner = &self.inner[index..];
+
+            return Some(token);
+        }
+
+        self.inner = "";
+        None
+    }
+}
+
+pub struct SeqTokenizer<'a, I>
+where I: Iterator<Item=&'a str>,
+{
+    inner: I,
+    current: Option<Peekable<Tokenizer<'a>>>,
+    word_offset: usize,
+    char_offset: usize,
+}
+
+impl<'a, I> SeqTokenizer<'a, I>
+where I: Iterator<Item=&'a str>,
+{
+    pub fn new(mut iter: I) -> SeqTokenizer<'a, I> {
+        let current = iter.next().map(|s| Tokenizer::new(s).peekable());
+        SeqTokenizer {
+            inner: iter,
+            current: current,
+            word_offset: 0,
+            char_offset: 0,
+        }
+    }
+}
+
+impl<'a, I> Iterator for SeqTokenizer<'a, I>
+where I: Iterator<Item=&'a str>,
+{
+    type Item = Token<'a>;
+
+    fn next(&mut self) -> Option<Self::Item> {
+        match &mut self.current {
+            Some(current) => {
+                match current.next() {
+                    Some(token) => {
+                        // we must apply the word and char offsets
+                        // to the token before returning it
+                        let token = Token {
+                            word: token.word,
+                            word_index: token.word_index + self.word_offset,
+                            char_index: token.char_index + self.char_offset,
+                        };
+
+                        // if this is the last iteration on this text
+                        // we must save the offsets for next texts
+                        if current.peek().is_none() {
+                            let hard_space = SeparatorCategory::Hard.to_usize();
+                            self.word_offset = token.word_index + hard_space;
+                            self.char_offset = token.char_index + hard_space;
+                        }
+
+                        Some(token)
+                    },
+                    None => {
+                        // no more words in this text we must
+                        // start tokenizing the next text
+                        self.current = self.inner.next().map(|s| Tokenizer::new(s).peekable());
+                        self.next()
+                    },
+                }
+            },
+            // no more texts available
+            None => None,
+        }
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn easy() {
+        let mut tokenizer = Tokenizer::new("salut");
+
+        assert_eq!(tokenizer.next(), Some(Token { word: "salut", word_index: 0, char_index: 0 }));
+        assert_eq!(tokenizer.next(), None);
+
+        let mut tokenizer = Tokenizer::new("yo    ");
+
+        assert_eq!(tokenizer.next(), Some(Token { word: "yo", word_index: 0, char_index: 0 }));
+        assert_eq!(tokenizer.next(), None);
+    }
+
+    #[test]
+    fn hard() {
+        let mut tokenizer = Tokenizer::new(" .? yo lolo. aïe (ouch)");
+
+        assert_eq!(tokenizer.next(), Some(Token { word: "yo", word_index: 0, char_index: 4 }));
+        assert_eq!(tokenizer.next(), Some(Token { word: "lolo", word_index: 1, char_index: 7 }));
+        assert_eq!(tokenizer.next(), Some(Token { word: "aïe", word_index: 9, char_index: 13 }));
+        assert_eq!(tokenizer.next(), Some(Token { word: "ouch", word_index: 17, char_index: 18 }));
+        assert_eq!(tokenizer.next(), None);
+
+        let mut tokenizer = Tokenizer::new("yo ! lolo ? wtf - lol . aïe ,");
+
+        assert_eq!(tokenizer.next(), Some(Token { word: "yo", word_index: 0, char_index: 0 }));
+        assert_eq!(tokenizer.next(), Some(Token { word: "lolo", word_index: 8, char_index: 5 }));
+        assert_eq!(tokenizer.next(), Some(Token { word: "wtf", word_index: 16, char_index: 12 }));
+        assert_eq!(tokenizer.next(), Some(Token { word: "lol", word_index: 24, char_index: 18 }));
+        assert_eq!(tokenizer.next(), Some(Token { word: "aïe", word_index: 32, char_index: 24 }));
+        assert_eq!(tokenizer.next(), None);
+    }
+
+    #[test]
+    fn hard_long_chars() {
+        let mut tokenizer = Tokenizer::new(" .? yo 😂. aïe");
+
+        assert_eq!(tokenizer.next(), Some(Token { word: "yo", word_index: 0, char_index: 4 }));
+        assert_eq!(tokenizer.next(), Some(Token { word: "😂", word_index: 1, char_index: 7 }));
+        assert_eq!(tokenizer.next(), Some(Token { word: "aïe", word_index: 9, char_index: 10 }));
+        assert_eq!(tokenizer.next(), None);
+
+        let mut tokenizer = Tokenizer::new("yo ! lolo ? 😱 - lol . 😣 ,");
+
+        assert_eq!(tokenizer.next(), Some(Token { word: "yo", word_index: 0, char_index: 0 }));
+        assert_eq!(tokenizer.next(), Some(Token { word: "lolo", word_index: 8, char_index: 5 }));
+        assert_eq!(tokenizer.next(), Some(Token { word: "😱", word_index: 16, char_index: 12 }));
+        assert_eq!(tokenizer.next(), Some(Token { word: "lol", word_index: 24, char_index: 16 }));
+        assert_eq!(tokenizer.next(), Some(Token { word: "😣", word_index: 32, char_index: 22 }));
+        assert_eq!(tokenizer.next(), None);
+    }
+
+    #[test]
+    fn hard_kanjis() {
+        let mut tokenizer = Tokenizer::new("\u{2ec4}lolilol\u{2ec7}");
+
+        assert_eq!(tokenizer.next(), Some(Token { word: "\u{2ec4}", word_index: 0, char_index: 0 }));
+        assert_eq!(tokenizer.next(), Some(Token { word: "lolilol", word_index: 1, char_index: 1 }));
+        assert_eq!(tokenizer.next(), Some(Token { word: "\u{2ec7}", word_index: 2, char_index: 8 }));
+        assert_eq!(tokenizer.next(), None);
+
+        let mut tokenizer = Tokenizer::new("\u{2ec4}\u{2ed3}\u{2ef2} lolilol - hello    \u{2ec7}");
+
+        assert_eq!(tokenizer.next(), Some(Token { word: "\u{2ec4}", word_index: 0, char_index: 0 }));
+        assert_eq!(tokenizer.next(), Some(Token { word: "\u{2ed3}", word_index: 1, char_index: 1 }));
+        assert_eq!(tokenizer.next(), Some(Token { word: "\u{2ef2}", word_index: 2, char_index: 2 }));
+        assert_eq!(tokenizer.next(), Some(Token { word: "lolilol", word_index: 3, char_index: 4 }));
+        assert_eq!(tokenizer.next(), Some(Token { word: "hello", word_index: 11, char_index: 14 }));
+        assert_eq!(tokenizer.next(), Some(Token { word: "\u{2ec7}", word_index: 12, char_index: 23 }));
+        assert_eq!(tokenizer.next(), None);
+    }
+}
--- a/meilidb/Cargo.toml
+++ b/meilidb/Cargo.toml
@ -0,0 +1,28 @@
+[package]
+edition = "2018"
+name = "meilidb"
+version = "0.3.1"
+authors = ["Kerollmops <renault.cle@gmail.com>"]
+
+[dependencies]
+meilidb-core = { path = "../meilidb-core", version = "0.1.0" }
+meilidb-data = { path = "../meilidb-data", version = "0.1.0" }
+meilidb-schema = { path = "../meilidb-schema", version = "0.1.0" }
+
+[dev-dependencies]
+csv = "1.0.7"
+diskus = "0.5.0"
+env_logger = "0.6.1"
+jemallocator = "0.1.9"
+linked-hash-map = "0.5.2"
+meilidb-core = { path = "../meilidb-core", version = "0.1.0" }
+quickcheck = "0.8.2"
+rand = "0.6.5"
+rand_xorshift = "0.1.1"
+rustyline = { version = "5.0.0", default-features = false }
+serde = { version = "1.0.91" , features = ["derive"] }
+serde_json = "1.0.39"
+structopt = "0.2.15"
+sysinfo = "0.8.4"
+tempfile = "3.0.7"
+termcolor = "1.0.4"
--- a/meilidb/examples/create-database.rs
+++ b/meilidb/examples/create-database.rs
@ -0,0 +1,214 @@
+#[global_allocator]
+static ALLOC: jemallocator::Jemalloc = jemallocator::Jemalloc;
+
+use std::collections::{HashMap, HashSet};
+use std::io::{self, BufRead, BufReader};
+use std::path::{Path, PathBuf};
+use std::time::Instant;
+use std::error::Error;
+use std::fs::File;
+
+use diskus::Walk;
+use sysinfo::{SystemExt, ProcessExt};
+use serde::{Serialize, Deserialize};
+use structopt::StructOpt;
+
+use meilidb_data::Database;
+use meilidb_schema::Schema;
+
+#[derive(Debug, StructOpt)]
+pub struct Opt {
+    /// The destination where the database must be created.
+    #[structopt(parse(from_os_str))]
+    pub database_path: PathBuf,
+
+    /// The csv file to index.
+    #[structopt(parse(from_os_str))]
+    pub csv_data_path: PathBuf,
+
+    /// The path to the schema.
+    #[structopt(long = "schema", parse(from_os_str))]
+    pub schema_path: PathBuf,
+
+    /// The file with the synonyms.
+    #[structopt(long = "synonyms", parse(from_os_str))]
+    pub synonyms: Option<PathBuf>,
+
+    /// The path to the list of stop words (one by line).
+    #[structopt(long = "stop-words", parse(from_os_str))]
+    pub stop_words: Option<PathBuf>,
+
+    #[structopt(long = "update-group-size")]
+    pub update_group_size: Option<usize>,
+}
+
+#[derive(Serialize, Deserialize)]
+struct Document (
+    HashMap<String, String>
+);
+
+#[derive(Debug, Clone, Serialize, Deserialize)]
+#[serde(untagged)]
+pub enum Synonym {
+    OneWay(SynonymOneWay),
+    MultiWay { synonyms: Vec<String> },
+}
+
+#[derive(Debug, Clone, Serialize, Deserialize)]
+#[serde(rename_all = "camelCase")]
+pub struct SynonymOneWay {
+    pub search_terms: String,
+    pub synonyms: Synonyms,
+}
+
+#[derive(Debug, Clone, Serialize, Deserialize)]
+#[serde(untagged)]
+pub enum Synonyms {
+    Multiple(Vec<String>),
+    Single(String),
+}
+
+fn read_synomys(path: &Path) -> Result<Vec<Synonym>, Box<dyn Error>> {
+    let file = File::open(path)?;
+    let synonyms = serde_json::from_reader(file)?;
+    Ok(synonyms)
+}
+
+fn index(
+    schema: Schema,
+    database_path: &Path,
+    csv_data_path: &Path,
+    update_group_size: Option<usize>,
+    stop_words: &HashSet<String>,
+    synonyms: Vec<Synonym>,
+) -> Result<Database, Box<dyn Error>>
+{
+    let database = Database::open(database_path)?;
+
+    let mut wtr = csv::Writer::from_path("./stats.csv").unwrap();
+    wtr.write_record(&["NumberOfDocuments", "DiskUsed", "MemoryUsed"])?;
+
+    let mut system = sysinfo::System::new();
+
+    let index = database.create_index("test", schema.clone())?;
+
+    let mut synonyms_adder = index.synonyms_addition();
+    for synonym in synonyms {
+        match synonym {
+            Synonym::OneWay(SynonymOneWay { search_terms, synonyms }) => {
+                let alternatives = match synonyms {
+                    Synonyms::Multiple(alternatives) => alternatives,
+                    Synonyms::Single(alternative) => vec![alternative],
+                };
+                synonyms_adder.add_synonym(search_terms, alternatives);
+            },
+            Synonym::MultiWay { mut synonyms } => {
+                for _ in 0..synonyms.len() {
+                    if let Some((synonym, alternatives)) = synonyms.split_first() {
+                        synonyms_adder.add_synonym(synonym, alternatives);
+                    }
+                    synonyms.rotate_left(1);
+                }
+            },
+        }
+    }
+    synonyms_adder.finalize()?;
+
+    let mut rdr = csv::Reader::from_path(csv_data_path)?;
+    let mut raw_record = csv::StringRecord::new();
+    let headers = rdr.headers()?.clone();
+
+    let mut i = 0;
+    let mut end_of_file = false;
+
+    while !end_of_file {
+        let mut update = index.documents_addition();
+
+        loop {
+            end_of_file = !rdr.read_record(&mut raw_record)?;
+            if end_of_file { break }
+
+            let document: Document = match raw_record.deserialize(Some(&headers)) {
+                Ok(document) => document,
+                Err(e) => {
+                    eprintln!("{:?}", e);
+                    continue;
+                }
+            };
+
+            update.update_document(document);
+
+            print!("\rindexing document {}", i);
+            i += 1;
+
+            if let Some(group_size) = update_group_size {
+                if i % group_size == 0 { break }
+            }
+        }
+
+        println!();
+
+        println!("committing update...");
+        update.finalize()?;
+
+        // write stats
+        let directory_size = Walk::new(&[database_path.to_owned()], 4).run();
+        system.refresh_all();
+        let memory = system.get_process(sysinfo::get_current_pid()).unwrap().memory(); // in kb
+        wtr.write_record(&[i.to_string(), directory_size.to_string(), memory.to_string()])?;
+        wtr.flush()?;
+    }
+
+    Ok(database)
+}
+
+fn retrieve_stop_words(path: &Path) -> io::Result<HashSet<String>> {
+    let f = File::open(path)?;
+    let reader = BufReader::new(f);
+    let mut words = HashSet::new();
+
+    for line in reader.lines() {
+        let line = line?;
+        let word = line.trim().to_string();
+        words.insert(word);
+    }
+
+    Ok(words)
+}
+
+fn main() -> Result<(), Box<dyn Error>> {
+    let _ = env_logger::init();
+    let opt = Opt::from_args();
+
+    let schema = {
+        let file = File::open(&opt.schema_path)?;
+        Schema::from_toml(file)?
+    };
+
+    let stop_words = match opt.stop_words {
+        Some(ref path) => retrieve_stop_words(path)?,
+        None           => HashSet::new(),
+    };
+
+    let synonyms = match opt.synonyms {
+        Some(ref path) => read_synomys(path)?,
+        None           => Vec::new(),
+    };
+
+    let start = Instant::now();
+    let result = index(
+        schema,
+        &opt.database_path,
+        &opt.csv_data_path,
+        opt.update_group_size,
+        &stop_words,
+        synonyms,
+    );
+
+    if let Err(e) = result {
+        return Err(e.into())
+    }
+
+    println!("database created in {:.2?} at: {:?}", start.elapsed(), opt.database_path);
+    Ok(())
+}
--- a/meilidb/examples/query-database.rs
+++ b/meilidb/examples/query-database.rs
@ -0,0 +1,229 @@
+#[global_allocator]
+static ALLOC: jemallocator::Jemalloc = jemallocator::Jemalloc;
+
+use std::collections::btree_map::{BTreeMap, Entry};
+use std::collections::HashSet;
+use std::error::Error;
+use std::io::{self, Write};
+use std::iter::FromIterator;
+use std::path::PathBuf;
+use std::time::{Instant, Duration};
+
+use linked_hash_map::LinkedHashMap;
+use rustyline::{Editor, Config};
+use structopt::StructOpt;
+use termcolor::{Color, ColorChoice, ColorSpec, StandardStream, WriteColor};
+
+use meilidb_core::Highlight;
+use meilidb_data::Database;
+use meilidb_schema::SchemaAttr;
+
+#[derive(Debug, StructOpt)]
+pub struct Opt {
+    /// The destination where the database must be created
+    #[structopt(parse(from_os_str))]
+    pub database_path: PathBuf,
+
+    #[structopt(long = "fetch-timeout-ms")]
+    pub fetch_timeout_ms: Option<u64>,
+
+    /// Fields that must be displayed.
+    pub displayed_fields: Vec<String>,
+
+    /// The number of returned results
+    #[structopt(short = "n", long = "number-results", default_value = "10")]
+    pub number_results: usize,
+
+    /// The number of characters before and after the first match
+    #[structopt(short = "C", long = "context", default_value = "35")]
+    pub char_context: usize,
+}
+
+type Document = LinkedHashMap<String, String>;
+
+fn display_highlights(text: &str, ranges: &[usize]) -> io::Result<()> {
+    let mut stdout = StandardStream::stdout(ColorChoice::Always);
+    let mut highlighted = false;
+
+    for range in ranges.windows(2) {
+        let [start, end] = match range { [start, end] => [*start, *end], _ => unreachable!() };
+        if highlighted {
+            stdout.set_color(ColorSpec::new().set_fg(Some(Color::Yellow)))?;
+        }
+        write!(&mut stdout, "{}", &text[start..end])?;
+        stdout.reset()?;
+        highlighted = !highlighted;
+    }
+
+    Ok(())
+}
+
+fn char_to_byte_range(index: usize, length: usize, text: &str) -> (usize, usize) {
+    let mut byte_index = 0;
+    let mut byte_length = 0;
+
+    for (n, (i, c)) in text.char_indices().enumerate() {
+        if n == index {
+            byte_index = i;
+        }
+
+        if n + 1 == index + length {
+            byte_length = i - byte_index + c.len_utf8();
+            break;
+        }
+    }
+
+    (byte_index, byte_length)
+}
+
+fn create_highlight_areas(text: &str, highlights: &[Highlight]) -> Vec<usize> {
+    let mut byte_indexes = BTreeMap::new();
+
+    for highlight in highlights {
+        let char_index = highlight.char_index as usize;
+        let char_length = highlight.char_length as usize;
+        let (byte_index, byte_length) = char_to_byte_range(char_index, char_length, text);
+
+        match byte_indexes.entry(byte_index) {
+            Entry::Vacant(entry) => { entry.insert(byte_length); },
+            Entry::Occupied(mut entry) => {
+                if *entry.get() < byte_length {
+                    entry.insert(byte_length);
+                }
+            },
+        }
+    }
+
+    let mut title_areas = Vec::new();
+    title_areas.push(0);
+    for (byte_index, length) in byte_indexes {
+        title_areas.push(byte_index);
+        title_areas.push(byte_index + length);
+    }
+    title_areas.push(text.len());
+    title_areas.sort_unstable();
+    title_areas
+}
+
+/// note: matches must have been sorted by `char_index` and `char_length` before being passed.
+///
+/// ```no_run
+/// matches.sort_unstable_by_key(|m| (m.char_index, m.char_length));
+///
+/// let matches = matches.matches.iter().filter(|m| SchemaAttr::new(m.attribute) == attr).cloned();
+///
+/// let (text, matches) = crop_text(&text, matches, 35);
+/// ```
+fn crop_text(
+    text: &str,
+    highlights: impl IntoIterator<Item=Highlight>,
+    context: usize,
+) -> (String, Vec<Highlight>)
+{
+    let mut highlights = highlights.into_iter().peekable();
+
+    let char_index = highlights.peek().map(|m| m.char_index as usize).unwrap_or(0);
+    let start = char_index.saturating_sub(context);
+    let text = text.chars().skip(start).take(context * 2).collect();
+
+    let highlights = highlights
+        .take_while(|m| {
+            (m.char_index as usize) + (m.char_length as usize) <= start + (context * 2)
+        })
+        .map(|highlight| {
+            Highlight { char_index: highlight.char_index - start as u16, ..highlight }
+        })
+        .collect();
+
+    (text, highlights)
+}
+
+fn main() -> Result<(), Box<dyn Error>> {
+    let _ = env_logger::init();
+    let opt = Opt::from_args();
+
+    let start = Instant::now();
+    let database = Database::open(&opt.database_path)?;
+
+    let index = database.open_index("test")?.unwrap();
+    let schema = index.schema();
+
+    println!("database prepared for you in {:.2?}", start.elapsed());
+
+    let fields = opt.displayed_fields.iter().map(String::as_str);
+    let fields = HashSet::from_iter(fields);
+
+    let config = Config::builder().auto_add_history(true).build();
+    let mut readline = Editor::<()>::with_config(config);
+    let _ = readline.load_history("query-history.txt");
+
+    for result in readline.iter("Searching for: ") {
+        match result {
+            Ok(query) => {
+                let start_total = Instant::now();
+
+                let builder = match opt.fetch_timeout_ms {
+                    Some(timeout_ms) => {
+                        let timeout = Duration::from_millis(timeout_ms);
+                        index.query_builder().with_fetch_timeout(timeout)
+                    },
+                    None => index.query_builder(),
+                };
+                let documents = builder.query(&query, 0..opt.number_results)?;
+
+                let mut retrieve_duration = Duration::default();
+
+                let number_of_documents = documents.len();
+                for mut doc in documents {
+
+                    doc.highlights.sort_unstable_by_key(|m| (m.char_index, m.char_length));
+
+                    let start_retrieve = Instant::now();
+                    let result = index.document::<Document>(Some(&fields), doc.id);
+                    retrieve_duration += start_retrieve.elapsed();
+
+                    match result {
+                        Ok(Some(document)) => {
+                            for (name, text) in document {
+                                print!("{}: ", name);
+
+                                let attr = schema.attribute(&name).unwrap();
+                                let highlights = doc.highlights.iter()
+                                                .filter(|m| SchemaAttr::new(m.attribute) == attr)
+                                                .cloned();
+                                let (text, highlights) = crop_text(&text, highlights, opt.char_context);
+                                let areas = create_highlight_areas(&text, &highlights);
+                                display_highlights(&text, &areas)?;
+                                println!();
+                            }
+                        },
+                        Ok(None) => eprintln!("missing document"),
+                        Err(e) => eprintln!("{}", e),
+                    }
+
+                    let mut matching_attributes = HashSet::new();
+                    for highlight in doc.highlights {
+                        let attr = SchemaAttr::new(highlight.attribute);
+                        let name = schema.attribute_name(attr);
+                        matching_attributes.insert(name);
+                    }
+
+                    let matching_attributes = Vec::from_iter(matching_attributes);
+                    println!("matching in: {:?}", matching_attributes);
+
+                    println!();
+                }
+
+                eprintln!("document field retrieve took {:.2?}", retrieve_duration);
+                eprintln!("===== Found {} results in {:.2?} =====", number_of_documents, start_total.elapsed());
+            },
+            Err(err) => {
+                println!("Error: {:?}", err);
+                break
+            }
+        }
+    }
+
+    readline.save_history("query-history.txt").unwrap();
+    Ok(())
+}
--- a/meilidb/src/lib.rs
+++ b/meilidb/src/lib.rs
@ -0,0 +1,3 @@
+mod sort_by_attr;
+
+pub use self::sort_by_attr::SortByAttr;
--- a/meilidb/src/sort_by_attr.rs
+++ b/meilidb/src/sort_by_attr.rs
@ -0,0 +1,125 @@
+use std::cmp::Ordering;
+use std::error::Error;
+use std::fmt;
+
+use meilidb_core::{criterion::Criterion, RawDocument};
+use meilidb_data::RankedMap;
+use meilidb_schema::{Schema, SchemaAttr};
+
+/// An helper struct that permit to sort documents by
+/// some of their stored attributes.
+///
+/// # Note
+///
+/// If a document cannot be deserialized it will be considered [`None`][].
+///
+/// Deserialized documents are compared like `Some(doc0).cmp(&Some(doc1))`,
+/// so you must check the [`Ord`] of `Option` implementation.
+///
+/// [`None`]: https://doc.rust-lang.org/std/option/enum.Option.html#variant.None
+/// [`Ord`]: https://doc.rust-lang.org/std/option/enum.Option.html#impl-Ord
+///
+/// # Example
+///
+/// ```ignore
+/// use serde_derive::Deserialize;
+/// use meilidb::rank::criterion::*;
+///
+/// let custom_ranking = SortByAttr::lower_is_better(&ranked_map, &schema, "published_at")?;
+///
+/// let builder = CriteriaBuilder::with_capacity(8)
+///        .add(SumOfTypos)
+///        .add(NumberOfWords)
+///        .add(WordsProximity)
+///        .add(SumOfWordsAttribute)
+///        .add(SumOfWordsPosition)
+///        .add(Exact)
+///        .add(custom_ranking)
+///        .add(DocumentId);
+///
+/// let criterion = builder.build();
+///
+/// ```
+pub struct SortByAttr<'a> {
+    ranked_map: &'a RankedMap,
+    attr: SchemaAttr,
+    reversed: bool,
+}
+
+impl<'a> SortByAttr<'a> {
+    pub fn lower_is_better(
+        ranked_map: &'a RankedMap,
+        schema: &Schema,
+        attr_name: &str,
+    ) -> Result<SortByAttr<'a>, SortByAttrError>
+    {
+        SortByAttr::new(ranked_map, schema, attr_name, false)
+    }
+
+    pub fn higher_is_better(
+        ranked_map: &'a RankedMap,
+        schema: &Schema,
+        attr_name: &str,
+    ) -> Result<SortByAttr<'a>, SortByAttrError>
+    {
+        SortByAttr::new(ranked_map, schema, attr_name, true)
+    }
+
+    fn new(
+        ranked_map: &'a RankedMap,
+        schema: &Schema,
+        attr_name: &str,
+        reversed: bool,
+    ) -> Result<SortByAttr<'a>, SortByAttrError>
+    {
+        let attr = match schema.attribute(attr_name) {
+            Some(attr) => attr,
+            None => return Err(SortByAttrError::AttributeNotFound),
+        };
+
+        if !schema.props(attr).is_ranked() {
+            return Err(SortByAttrError::AttributeNotRegisteredForRanking);
+        }
+
+        Ok(SortByAttr { ranked_map, attr, reversed })
+    }
+}
+
+impl<'a> Criterion for SortByAttr<'a> {
+    fn evaluate(&self, lhs: &RawDocument, rhs: &RawDocument) -> Ordering {
+        let lhs = self.ranked_map.get(lhs.id, self.attr);
+        let rhs = self.ranked_map.get(rhs.id, self.attr);
+
+        match (lhs, rhs) {
+            (Some(lhs), Some(rhs)) => {
+                let order = lhs.cmp(&rhs);
+                if self.reversed { order.reverse() } else { order }
+            },
+            (None,    Some(_)) => Ordering::Greater,
+            (Some(_), None)    => Ordering::Less,
+            (None,    None)    => Ordering::Equal,
+        }
+    }
+
+    fn name(&self) -> &'static str {
+        "SortByAttr"
+    }
+}
+
+#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
+pub enum SortByAttrError {
+    AttributeNotFound,
+    AttributeNotRegisteredForRanking,
+}
+
+impl fmt::Display for SortByAttrError {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        use SortByAttrError::*;
+        match self {
+            AttributeNotFound => f.write_str("attribute not found in the schema"),
+            AttributeNotRegisteredForRanking => f.write_str("attribute not registered for ranking"),
+        }
+    }
+}
+
+impl Error for SortByAttrError { }
--- a/src/attribute.rs
+++ b/src/attribute.rs
@ -1,105 +0,0 @@
-use std::fmt;
-
-/// Represent an attribute number along with the word index
-/// according to the tokenizer used.
-///
-/// It can accept up to 1024 attributes and word positions
-/// can be maximum 2^22.
-#[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
-pub struct Attribute(u32);
-
-impl Attribute {
-    /// Construct an `Attribute` from an attribute number and
-    /// the word position of a match according to the tokenizer used.
-    pub(crate) fn new(attribute: u16, index: u32) -> Result<Attribute, AttributeError> {
-        if attribute & 0b1111_1100_0000_0000 != 0 {
-            return Err(AttributeError::AttributeTooBig)
-        }
-
-        if index & 0b1111_1111_1100_0000_0000_0000_0000 != 0 {
-            return Err(AttributeError::IndexTooBig)
-        }
-
-        let attribute = u32::from(attribute) << 22;
-        Ok(Attribute(attribute | index))
-    }
-
-    /// Construct an `Attribute` from an attribute number and
-    /// the word position of a match according to the tokenizer used.
-    ///
-    /// # Panics
-    ///
-    /// The attribute must not be greater than 1024
-    /// and the word index not greater than 2^22.
-    pub(crate) fn new_faillible(attribute: u16, index: u32) -> Attribute {
-        match Attribute::new(attribute, index) {
-            Ok(attribute) => attribute,
-            Err(AttributeError::AttributeTooBig) => {
-                panic!("attribute must not be greater than 1024")
-            },
-            Err(AttributeError::IndexTooBig) => {
-                panic!("attribute word index must not be greater than 2^22")
-            },
-        }
-    }
-
-    pub(crate) fn max_value() -> Attribute {
-        Attribute(u32::max_value())
-    }
-
-    #[inline]
-    pub fn attribute(self) -> u16 {
-        (self.0 >> 22) as u16
-    }
-
-    #[inline]
-    pub fn word_index(self) -> u32 {
-        self.0 & 0b0000_0000_0011_1111_1111_1111_1111
-    }
-}
-
-impl fmt::Debug for Attribute {
-    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
-        f.debug_struct("Attribute")
-            .field("attribute", &self.attribute())
-            .field("word_index", &self.word_index())
-            .finish()
-    }
-}
-
-pub enum AttributeError {
-    AttributeTooBig,
-    IndexTooBig,
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-    use quickcheck::{quickcheck, TestResult};
-
-    quickcheck! {
-        fn qc_attribute(gen_attr: u16, gen_index: u32) -> TestResult {
-            if gen_attr > 2_u16.pow(10) || gen_index > 2_u32.pow(22) {
-                return TestResult::discard()
-            }
-
-            let attribute = Attribute::new_faillible(gen_attr, gen_index);
-
-            let valid_attribute = attribute.attribute() == gen_attr;
-            let valid_index = attribute.word_index() == gen_index;
-
-            TestResult::from_bool(valid_attribute && valid_index)
-        }
-
-        fn qc_attribute_ord(gen_attr: u16, gen_index: u32) -> TestResult {
-            if gen_attr >= 2_u16.pow(10) || gen_index >= 2_u32.pow(22) {
-                return TestResult::discard()
-            }
-
-            let a = Attribute::new_faillible(gen_attr, gen_index);
-            let b = Attribute::new_faillible(gen_attr + 1, gen_index + 1);
-
-            TestResult::from_bool(a < b)
-        }
-    }
-}
--- a/src/automaton.rs
+++ b/src/automaton.rs
@ -1,91 +0,0 @@
-use fst::Automaton;
-use lazy_static::lazy_static;
-use levenshtein_automata::{
-    LevenshteinAutomatonBuilder as LevBuilder,
-    DFA, Distance,
-};
-
-lazy_static! {
-    static ref LEVDIST0: LevBuilder = LevBuilder::new(0, false);
-    static ref LEVDIST1: LevBuilder = LevBuilder::new(1, false);
-    static ref LEVDIST2: LevBuilder = LevBuilder::new(2, false);
-}
-
-pub struct DfaExt {
-    query_len: usize,
-    automaton: DFA,
-}
-
-impl Automaton for DfaExt {
-    type State = <DFA as Automaton>::State;
-
-    fn start(&self) -> Self::State {
-        self.automaton.start()
-    }
-
-    fn is_match(&self, state: &Self::State) -> bool {
-        self.automaton.is_match(state)
-    }
-
-    fn can_match(&self, state: &Self::State) -> bool {
-        self.automaton.can_match(state)
-    }
-
-    fn will_always_match(&self, state: &Self::State) -> bool {
-        self.automaton.will_always_match(state)
-    }
-
-    fn accept(&self, state: &Self::State, byte: u8) -> Self::State {
-        self.automaton.accept(state, byte)
-    }
-}
-
-impl AutomatonExt for DfaExt {
-    fn eval<B: AsRef<[u8]>>(&self, s: B) -> Distance {
-        self.automaton.eval(s)
-    }
-
-    fn query_len(&self) -> usize {
-        self.query_len
-    }
-}
-
-#[derive(Copy, Clone)]
-enum PrefixSetting {
-    Prefix,
-    NoPrefix,
-}
-
-fn build_dfa_with_setting(query: &str, setting: PrefixSetting) -> DfaExt {
-    use self::PrefixSetting::{Prefix, NoPrefix};
-
-    let dfa = match query.len() {
-        0 ..= 4 => match setting {
-            Prefix   => LEVDIST0.build_prefix_dfa(query),
-            NoPrefix => LEVDIST0.build_dfa(query),
-        },
-        5 ..= 8 => match setting {
-            Prefix   => LEVDIST1.build_prefix_dfa(query),
-            NoPrefix => LEVDIST1.build_dfa(query),
-        },
-        _ => match setting {
-            Prefix   => LEVDIST2.build_prefix_dfa(query),
-            NoPrefix => LEVDIST2.build_dfa(query),
-        },
-    };
-
-    DfaExt { query_len: query.len(), automaton: dfa }
-}
-
-pub fn build_prefix_dfa(query: &str) -> DfaExt {
-    build_dfa_with_setting(query, PrefixSetting::Prefix)
-}
-
-pub fn build_dfa(query: &str) -> DfaExt {
-    build_dfa_with_setting(query, PrefixSetting::NoPrefix)
-}
-
-pub trait AutomatonExt: Automaton {
-    fn eval<B: AsRef<[u8]>>(&self, s: B) -> Distance;
-    fn query_len(&self) -> usize;
-}
--- a/src/common_words.rs
+++ b/src/common_words.rs
@ -1,26 +0,0 @@
-use std::io::{self, BufReader, BufRead};
-use std::collections::HashSet;
-use std::path::Path;
-use std::fs::File;
-
-#[derive(Debug)]
-pub struct CommonWords(HashSet<String>);
-
-impl CommonWords {
-    pub fn from_file<P>(path: P) -> io::Result<Self>
-    where P: AsRef<Path>
-    {
-        let file = File::open(path)?;
-        let file = BufReader::new(file);
-        let mut set = HashSet::new();
-        for line in file.lines().filter_map(|l| l.ok()) {
-            let word = line.trim().to_owned();
-            set.insert(word);
-        }
-        Ok(CommonWords(set))
-    }
-
-    pub fn contains(&self, word: &str) -> bool {
-        self.0.contains(word)
-    }
-}
--- a/src/data/doc_ids.rs
+++ b/src/data/doc_ids.rs
@ -1,54 +0,0 @@
-use std::io::{self, Cursor, BufRead};
-use std::slice::from_raw_parts;
-use std::mem::size_of;
-
-use byteorder::{LittleEndian, ReadBytesExt, WriteBytesExt};
-use sdset::Set;
-
-use crate::DocumentId;
-use crate::data::SharedData;
-use super::into_u8_slice;
-
-#[derive(Default, Clone)]
-pub struct DocIds(SharedData);
-
-impl DocIds {
-    pub fn new(ids: &Set<DocumentId>) -> DocIds {
-        let bytes = unsafe { into_u8_slice(ids.as_slice()) };
-        let data = SharedData::from_bytes(bytes.to_vec());
-        DocIds(data)
-    }
-
-    pub fn from_cursor(cursor: &mut Cursor<SharedData>) -> io::Result<DocIds> {
-        let len = cursor.read_u64::<LittleEndian>()? as usize;
-        let offset = cursor.position() as usize;
-        let doc_ids = cursor.get_ref().range(offset, len);
-        cursor.consume(len);
-
-        Ok(DocIds(doc_ids))
-    }
-
-    pub fn write_to_bytes(&self, bytes: &mut Vec<u8>) {
-        let len = self.0.len() as u64;
-        bytes.write_u64::<LittleEndian>(len).unwrap();
-        bytes.extend_from_slice(&self.0);
-    }
-
-    pub fn is_empty(&self) -> bool {
-        self.0.is_empty()
-    }
-
-    pub fn as_bytes(&self) -> &[u8] {
-        &self.0
-    }
-}
-
-impl AsRef<Set<DocumentId>> for DocIds {
-    fn as_ref(&self) -> &Set<DocumentId> {
-        let slice = &self.0;
-        let ptr = slice.as_ptr() as *const DocumentId;
-        let len = slice.len() / size_of::<DocumentId>();
-        let slice = unsafe { from_raw_parts(ptr, len) };
-        Set::new_unchecked(slice)
-    }
-}
--- a/src/data/doc_indexes.rs
+++ b/src/data/doc_indexes.rs
@ -1,226 +0,0 @@
-use std::io::{self, Write, Cursor, BufRead};
-use std::slice::from_raw_parts;
-use std::mem::size_of;
-use std::ops::Index;
-use std::sync::Arc;
-
-use byteorder::{LittleEndian, ReadBytesExt, WriteBytesExt};
-use sdset::Set;
-
-use crate::DocIndex;
-use crate::data::SharedData;
-use super::into_u8_slice;
-
-#[derive(Debug)]
-#[repr(C)]
-struct Range {
-    start: u64,
-    end: u64,
-}
-
-#[derive(Clone, Default)]
-pub struct DocIndexes {
-    ranges: SharedData,
-    indexes: SharedData,
-}
-
-impl DocIndexes {
-    pub fn from_bytes(bytes: Vec<u8>) -> io::Result<DocIndexes> {
-        let bytes = Arc::new(bytes);
-        let len = bytes.len();
-        let data = SharedData::new(bytes, 0, len);
-        let mut  cursor = Cursor::new(data);
-        DocIndexes::from_cursor(&mut cursor)
-    }
-
-    pub fn from_cursor(cursor: &mut Cursor<SharedData>) -> io::Result<DocIndexes> {
-        let len = cursor.read_u64::<LittleEndian>()? as usize;
-        let offset = cursor.position() as usize;
-        let ranges = cursor.get_ref().range(offset, len);
-        cursor.consume(len);
-
-        let len = cursor.read_u64::<LittleEndian>()? as usize;
-        let offset = cursor.position() as usize;
-        let indexes = cursor.get_ref().range(offset, len);
-        cursor.consume(len);
-
-        Ok(DocIndexes { ranges, indexes })
-    }
-
-    pub fn write_to_bytes(&self, bytes: &mut Vec<u8>) {
-        let ranges_len = self.ranges.len() as u64;
-        let _ = bytes.write_u64::<LittleEndian>(ranges_len);
-        bytes.extend_from_slice(&self.ranges);
-
-        let indexes_len = self.indexes.len() as u64;
-        let _ = bytes.write_u64::<LittleEndian>(indexes_len);
-        bytes.extend_from_slice(&self.indexes);
-    }
-
-    pub fn get(&self, index: usize) -> Option<&Set<DocIndex>> {
-        self.ranges().get(index).map(|Range { start, end }| {
-            let start = *start as usize;
-            let end = *end as usize;
-            let slice = &self.indexes()[start..end];
-            Set::new_unchecked(slice)
-        })
-    }
-
-    fn ranges(&self) -> &[Range] {
-        let slice = &self.ranges;
-        let ptr = slice.as_ptr() as *const Range;
-        let len = slice.len() / size_of::<Range>();
-        unsafe { from_raw_parts(ptr, len) }
-    }
-
-    fn indexes(&self) -> &[DocIndex] {
-        let slice = &self.indexes;
-        let ptr = slice.as_ptr() as *const DocIndex;
-        let len = slice.len() / size_of::<DocIndex>();
-        unsafe { from_raw_parts(ptr, len) }
-    }
-}
-
-impl Index<usize> for DocIndexes {
-    type Output = [DocIndex];
-
-    fn index(&self, index: usize) -> &Self::Output {
-        match self.get(index) {
-            Some(indexes) => indexes,
-            None => panic!("index {} out of range for a maximum of {} ranges", index, self.ranges().len()),
-        }
-    }
-}
-
-pub struct DocIndexesBuilder<W> {
-    ranges: Vec<Range>,
-    indexes: Vec<DocIndex>,
-    wtr: W,
-}
-
-impl DocIndexesBuilder<Vec<u8>> {
-    pub fn memory() -> Self {
-        DocIndexesBuilder {
-            ranges: Vec::new(),
-            indexes: Vec::new(),
-            wtr: Vec::new(),
-        }
-    }
-}
-
-impl<W: Write> DocIndexesBuilder<W> {
-    pub fn new(wtr: W) -> Self {
-        DocIndexesBuilder {
-            ranges: Vec::new(),
-            indexes: Vec::new(),
-            wtr: wtr,
-        }
-    }
-
-    pub fn insert(&mut self, indexes: &Set<DocIndex>) {
-        let len = indexes.len() as u64;
-        let start = self.ranges.last().map(|r| r.end).unwrap_or(0);
-        let range = Range { start, end: start + len };
-        self.ranges.push(range);
-
-        self.indexes.extend_from_slice(indexes);
-    }
-
-    pub fn finish(self) -> io::Result<()> {
-        self.into_inner().map(drop)
-    }
-
-    pub fn into_inner(mut self) -> io::Result<W> {
-        let ranges = unsafe { into_u8_slice(&self.ranges) };
-        let len = ranges.len() as u64;
-        self.wtr.write_u64::<LittleEndian>(len)?;
-        self.wtr.write_all(ranges)?;
-
-        let indexes = unsafe { into_u8_slice(&self.indexes) };
-        let len = indexes.len() as u64;
-        self.wtr.write_u64::<LittleEndian>(len)?;
-        self.wtr.write_all(indexes)?;
-
-        Ok(self.wtr)
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    use std::error::Error;
-    use crate::{Attribute, WordArea};
-
-    use crate::DocumentId;
-
-    #[test]
-    fn builder_serialize_deserialize() -> Result<(), Box<Error>> {
-        let a = DocIndex {
-            document_id: DocumentId(0),
-            attribute: Attribute::new_faillible(3, 11),
-            word_area: WordArea::new_faillible(30, 4)
-        };
-        let b = DocIndex {
-            document_id: DocumentId(1),
-            attribute: Attribute::new_faillible(4, 21),
-            word_area: WordArea::new_faillible(35, 6)
-        };
-        let c = DocIndex {
-            document_id: DocumentId(2),
-            attribute: Attribute::new_faillible(8, 2),
-            word_area: WordArea::new_faillible(89, 6)
-        };
-
-        let mut builder = DocIndexesBuilder::memory();
-
-        builder.insert(Set::new(&[a])?);
-        builder.insert(Set::new(&[a, b, c])?);
-        builder.insert(Set::new(&[a, c])?);
-
-        let bytes = builder.into_inner()?;
-        let docs = DocIndexes::from_bytes(bytes)?;
-
-        assert_eq!(docs.get(0), Some(Set::new(&[a])?));
-        assert_eq!(docs.get(1), Some(Set::new(&[a, b, c])?));
-        assert_eq!(docs.get(2), Some(Set::new(&[a, c])?));
-        assert_eq!(docs.get(3), None);
-
-        Ok(())
-    }
-
-    #[test]
-    fn serialize_deserialize() -> Result<(), Box<Error>> {
-        let a = DocIndex {
-            document_id: DocumentId(0),
-            attribute: Attribute::new_faillible(3, 11),
-            word_area: WordArea::new_faillible(30, 4)
-        };
-        let b = DocIndex {
-            document_id: DocumentId(1),
-            attribute: Attribute::new_faillible(4, 21),
-            word_area: WordArea::new_faillible(35, 6)
-        };
-        let c = DocIndex {
-            document_id: DocumentId(2),
-            attribute: Attribute::new_faillible(8, 2),
-            word_area: WordArea::new_faillible(89, 6)
-        };
-
-        let mut builder = DocIndexesBuilder::memory();
-
-        builder.insert(Set::new(&[a])?);
-        builder.insert(Set::new(&[a, b, c])?);
-        builder.insert(Set::new(&[a, c])?);
-
-        let builder_bytes = builder.into_inner()?;
-        let docs = DocIndexes::from_bytes(builder_bytes.clone())?;
-
-        let mut bytes = Vec::new();
-        docs.write_to_bytes(&mut bytes);
-
-        assert_eq!(builder_bytes, bytes);
-
-        Ok(())
-    }
-}
--- a/src/data/mod.rs
+++ b/src/data/mod.rs
@ -1,58 +0,0 @@
-mod doc_ids;
-mod doc_indexes;
-
-use std::slice::from_raw_parts;
-use std::mem::size_of;
-use std::ops::Deref;
-use std::sync::Arc;
-
-pub use self::doc_ids::DocIds;
-pub use self::doc_indexes::{DocIndexes, DocIndexesBuilder};
-
-#[derive(Default, Clone)]
-pub struct SharedData {
-    pub bytes: Arc<Vec<u8>>,
-    pub offset: usize,
-    pub len: usize,
-}
-
-impl SharedData {
-    pub fn from_bytes(vec: Vec<u8>) -> SharedData {
-        let len = vec.len();
-        let bytes = Arc::new(vec);
-        SharedData::new(bytes, 0, len)
-    }
-
-    pub fn new(bytes: Arc<Vec<u8>>, offset: usize, len: usize) -> SharedData {
-        SharedData { bytes, offset, len }
-    }
-
-    pub fn range(&self, offset: usize, len: usize) -> SharedData {
-        assert!(offset + len <= self.len);
-        SharedData {
-            bytes: self.bytes.clone(),
-            offset: self.offset + offset,
-            len: len,
-        }
-    }
-}
-
-impl Deref for SharedData {
-    type Target = [u8];
-
-    fn deref(&self) -> &Self::Target {
-        self.as_ref()
-    }
-}
-
-impl AsRef<[u8]> for SharedData {
-    fn as_ref(&self) -> &[u8] {
-        &self.bytes[self.offset..self.offset + self.len]
-    }
-}
-
-unsafe fn into_u8_slice<T: Sized>(slice: &[T]) -> &[u8] {
-    let ptr = slice.as_ptr() as *const u8;
-    let len = slice.len() * size_of::<T>();
-    from_raw_parts(ptr, len)
-}
--- a/src/database/deserializer.rs
+++ b/src/database/deserializer.rs
@ -1,186 +0,0 @@
-use std::error::Error;
-use std::ops::Deref;
-use std::fmt;
-
-use rocksdb::rocksdb::{DB, Snapshot, SeekKey};
-use rocksdb::rocksdb_options::ReadOptions;
-use serde::forward_to_deserialize_any;
-use serde::de::value::MapDeserializer;
-use serde::de::{self, Visitor, IntoDeserializer};
-
-use crate::database::document_key::{DocumentKey, DocumentKeyAttr};
-use crate::database::schema::Schema;
-use crate::DocumentId;
-
-pub struct Deserializer<'a, D>
-where D: Deref<Target=DB>
-{
-    snapshot: &'a Snapshot<D>,
-    schema: &'a Schema,
-    document_id: DocumentId,
-}
-
-impl<'a, D> Deserializer<'a, D>
-where D: Deref<Target=DB>
-{
-    pub fn new(snapshot: &'a Snapshot<D>, schema: &'a Schema, doc: DocumentId) -> Self {
-        Deserializer { snapshot, schema, document_id: doc }
-    }
-}
-
-impl<'de, 'a, 'b, D> de::Deserializer<'de> for &'b mut Deserializer<'a, D>
-where D: Deref<Target=DB>
-{
-    type Error = DeserializerError;
-
-    fn deserialize_any<V>(self, visitor: V) -> Result<V::Value, Self::Error>
-    where V: Visitor<'de>
-    {
-        self.deserialize_map(visitor)
-    }
-
-    forward_to_deserialize_any! {
-        bool u8 u16 u32 u64 i8 i16 i32 i64 f32 f64 char str string unit seq
-        bytes byte_buf unit_struct tuple_struct
-        identifier tuple ignored_any option newtype_struct enum struct
-    }
-
-    fn deserialize_map<V>(self, visitor: V) -> Result<V::Value, Self::Error>
-    where V: Visitor<'de>
-    {
-        let mut options = ReadOptions::new();
-        let lower = DocumentKey::new(self.document_id);
-        let upper = lower.with_attribute_max();
-        options.set_iterate_lower_bound(lower.as_ref());
-        options.set_iterate_upper_bound(upper.as_ref());
-
-        let mut iter = self.snapshot.iter_opt(options);
-        iter.seek(SeekKey::Start);
-
-        if iter.kv().is_none() {
-            // FIXME return an error
-        }
-
-        let iter = iter.map(|(key, value)| {
-            // retrieve the schema attribute name
-            // from the schema attribute number
-            let document_key_attr = DocumentKeyAttr::from_bytes(&key);
-            let schema_attr = document_key_attr.attribute();
-            let attribute_name = self.schema.attribute_name(schema_attr);
-            (attribute_name, Value(value))
-        });
-
-        let map_deserializer = MapDeserializer::new(iter);
-        visitor.visit_map(map_deserializer)
-    }
-}
-
-struct Value(Vec<u8>);
-
-impl<'de> IntoDeserializer<'de, DeserializerError> for Value {
-    type Deserializer = Self;
-
-    fn into_deserializer(self) -> Self::Deserializer {
-        self
-    }
-}
-
-macro_rules! forward_to_bincode_values {
-    ($($ty:ident => $de_method:ident,)*) => {
-        $(
-            fn $de_method<V>(self, visitor: V) -> Result<V::Value, Self::Error>
-                where V: de::Visitor<'de>
-            {
-                match bincode::deserialize::<$ty>(&self.0) {
-                    Ok(val) => val.into_deserializer().$de_method(visitor),
-                    Err(e) => Err(de::Error::custom(e)),
-                }
-            }
-        )*
-    }
-}
-
-impl<'de, 'a> de::Deserializer<'de> for Value {
-    type Error = DeserializerError;
-
-    fn deserialize_any<V>(self, visitor: V) -> Result<V::Value, Self::Error>
-    where V: Visitor<'de>
-    {
-        self.0.into_deserializer().deserialize_any(visitor)
-    }
-
-    fn deserialize_str<V>(self, visitor: V) -> Result<V::Value, Self::Error>
-    where V: Visitor<'de>
-    {
-        self.deserialize_string(visitor)
-    }
-
-    fn deserialize_string<V>(self, visitor: V) -> Result<V::Value, Self::Error>
-    where V: Visitor<'de>
-    {
-        match bincode::deserialize::<String>(&self.0) {
-            Ok(val) => val.into_deserializer().deserialize_string(visitor),
-            Err(e) => Err(de::Error::custom(e)),
-        }
-    }
-
-    fn deserialize_bytes<V>(self, visitor: V) -> Result<V::Value, Self::Error>
-    where V: Visitor<'de>
-    {
-        self.deserialize_byte_buf(visitor)
-    }
-
-    fn deserialize_byte_buf<V>(self, visitor: V) -> Result<V::Value, Self::Error>
-    where V: Visitor<'de>
-    {
-        match bincode::deserialize::<Vec<u8>>(&self.0) {
-            Ok(val) => val.into_deserializer().deserialize_byte_buf(visitor),
-            Err(e) => Err(de::Error::custom(e)),
-        }
-    }
-
-    forward_to_bincode_values! {
-        char => deserialize_char,
-        bool => deserialize_bool,
-
-        u8  => deserialize_u8,
-        u16 => deserialize_u16,
-        u32 => deserialize_u32,
-        u64 => deserialize_u64,
-
-        i8  => deserialize_i8,
-        i16 => deserialize_i16,
-        i32 => deserialize_i32,
-        i64 => deserialize_i64,
-
-        f32 => deserialize_f32,
-        f64 => deserialize_f64,
-    }
-
-    forward_to_deserialize_any! {
-        unit seq map
-        unit_struct tuple_struct
-        identifier tuple ignored_any option newtype_struct enum struct
-    }
-}
-
-#[derive(Debug)]
-pub enum DeserializerError {
-    Custom(String),
-}
-
-impl de::Error for DeserializerError {
-    fn custom<T: fmt::Display>(msg: T) -> Self {
-        DeserializerError::Custom(msg.to_string())
-    }
-}
-
-impl fmt::Display for DeserializerError {
-    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
-        match self {
-            DeserializerError::Custom(s) => f.write_str(&s),
-        }
-    }
-}
-
-impl Error for DeserializerError {}
--- a/src/database/document_key.rs
+++ b/src/database/document_key.rs
@ -1,145 +0,0 @@
-use std::io::{Cursor, Read, Write};
-use std::mem::size_of;
-use std::fmt;
-
-use byteorder::{BigEndian, WriteBytesExt, ReadBytesExt};
-
-use crate::database::schema::SchemaAttr;
-use crate::DocumentId;
-
-const DOC_KEY_LEN:      usize = 4 + size_of::<u64>();
-const DOC_KEY_ATTR_LEN: usize = DOC_KEY_LEN + 1 + size_of::<u16>();
-
-#[derive(Copy, Clone)]
-pub struct DocumentKey([u8; DOC_KEY_LEN]);
-
-impl DocumentKey {
-    pub fn new(id: DocumentId) -> DocumentKey {
-        let mut buffer = [0; DOC_KEY_LEN];
-
-        let mut wtr = Cursor::new(&mut buffer[..]);
-        wtr.write_all(b"doc-").unwrap();
-        wtr.write_u64::<BigEndian>(id.0).unwrap();
-
-        DocumentKey(buffer)
-    }
-
-    pub fn from_bytes(mut bytes: &[u8]) -> DocumentKey {
-        assert!(bytes.len() >= DOC_KEY_LEN);
-        assert_eq!(&bytes[..4], b"doc-");
-
-        let mut buffer = [0; DOC_KEY_LEN];
-        bytes.read_exact(&mut buffer).unwrap();
-
-        DocumentKey(buffer)
-    }
-
-    pub fn with_attribute(&self, attr: SchemaAttr) -> DocumentKeyAttr {
-        DocumentKeyAttr::new(self.document_id(), attr)
-    }
-
-    pub fn with_attribute_max(&self) -> DocumentKeyAttr {
-        DocumentKeyAttr::new(self.document_id(), SchemaAttr::max())
-    }
-
-    pub fn document_id(&self) -> DocumentId {
-        let id = (&self.0[4..]).read_u64::<BigEndian>().unwrap();
-        DocumentId(id)
-    }
-}
-
-impl AsRef<[u8]> for DocumentKey {
-    fn as_ref(&self) -> &[u8] {
-        &self.0
-    }
-}
-
-impl fmt::Debug for DocumentKey {
-    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
-        f.debug_struct("DocumentKey")
-            .field("document_id", &self.document_id())
-            .finish()
-    }
-}
-
-#[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord)]
-pub struct DocumentKeyAttr([u8; DOC_KEY_ATTR_LEN]);
-
-impl DocumentKeyAttr {
-    pub fn new(id: DocumentId, attr: SchemaAttr) -> DocumentKeyAttr {
-        let mut buffer = [0; DOC_KEY_ATTR_LEN];
-        let DocumentKey(raw_key) = DocumentKey::new(id);
-
-        let mut wtr = Cursor::new(&mut buffer[..]);
-        wtr.write_all(&raw_key).unwrap();
-        wtr.write_all(b"-").unwrap();
-        wtr.write_u16::<BigEndian>(attr.0).unwrap();
-
-        DocumentKeyAttr(buffer)
-    }
-
-    pub fn with_attribute_min(id: DocumentId) -> DocumentKeyAttr {
-        DocumentKeyAttr::new(id, SchemaAttr::min())
-    }
-
-    pub fn with_attribute_max(id: DocumentId) -> DocumentKeyAttr {
-        DocumentKeyAttr::new(id, SchemaAttr::max())
-    }
-
-    pub fn from_bytes(mut bytes: &[u8]) -> DocumentKeyAttr {
-        assert!(bytes.len() >= DOC_KEY_ATTR_LEN);
-        assert_eq!(&bytes[..4], b"doc-");
-
-        let mut buffer = [0; DOC_KEY_ATTR_LEN];
-        bytes.read_exact(&mut buffer).unwrap();
-
-        DocumentKeyAttr(buffer)
-    }
-
-    pub fn document_id(&self) -> DocumentId {
-        let id = (&self.0[4..]).read_u64::<BigEndian>().unwrap();
-        DocumentId(id)
-    }
-
-    pub fn attribute(&self) -> SchemaAttr {
-        let offset = 4 + size_of::<u64>() + 1;
-        let value = (&self.0[offset..]).read_u16::<BigEndian>().unwrap();
-        SchemaAttr::new(value)
-    }
-
-    pub fn into_document_key(self) -> DocumentKey {
-        DocumentKey::new(self.document_id())
-    }
-}
-
-impl AsRef<[u8]> for DocumentKeyAttr {
-    fn as_ref(&self) -> &[u8] {
-        &self.0
-    }
-}
-
-impl fmt::Debug for DocumentKeyAttr {
-    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
-        f.debug_struct("DocumentKeyAttr")
-            .field("document_id", &self.document_id())
-            .field("attribute", &self.attribute().0)
-            .finish()
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    #[test]
-    fn keep_as_ref_order() {
-        for (a, b) in (0..).zip(1..).take(u16::max_value() as usize - 1) {
-            let id = DocumentId(0);
-            let a = DocumentKeyAttr::new(id, SchemaAttr(a));
-            let b = DocumentKeyAttr::new(id, SchemaAttr(b));
-
-            assert!(a < b);
-            assert!(a.as_ref() < b.as_ref());
-        }
-    }
-}
--- a/src/database/index/mod.rs
+++ b/src/database/index/mod.rs
@ -1,82 +0,0 @@
-mod negative;
-mod positive;
-
-pub(crate) use self::negative::Negative;
-pub(crate) use self::positive::{Positive, PositiveBuilder};
-
-use std::error::Error;
-use std::io::Cursor;
-use std::sync::Arc;
-
-use fst::{IntoStreamer, Streamer};
-use sdset::duo::DifferenceByKey;
-use sdset::{Set, SetOperation};
-use fst::Map;
-
-use crate::data::{SharedData, DocIndexes};
-
-#[derive(Default)]
-pub struct Index {
-    pub(crate) negative: Negative,
-    pub(crate) positive: Positive,
-}
-
-impl Index {
-    pub fn from_bytes(bytes: Vec<u8>) -> Result<Index, Box<Error>> {
-        let len = bytes.len();
-        Index::from_shared_bytes(Arc::new(bytes), 0, len)
-    }
-
-    pub fn from_shared_bytes(
-        bytes: Arc<Vec<u8>>,
-        offset: usize,
-        len: usize,
-    ) -> Result<Index, Box<Error>>
-    {
-        let data = SharedData::new(bytes, offset, len);
-        let mut cursor = Cursor::new(data);
-
-        let negative = Negative::from_cursor(&mut cursor)?;
-        let positive = Positive::from_cursor(&mut cursor)?;
-        Ok(Index { negative, positive })
-    }
-
-    pub fn write_to_bytes(&self, bytes: &mut Vec<u8>) {
-        self.negative.write_to_bytes(bytes);
-        self.positive.write_to_bytes(bytes);
-    }
-
-    pub fn merge(&self, other: &Index) -> Result<Index, Box<Error>> {
-        if other.negative.is_empty() {
-            let negative = Negative::default();
-            let positive = self.positive.union(&other.positive)?;
-            return Ok(Index { negative, positive })
-        }
-
-        let mut buffer = Vec::new();
-        let mut builder = PositiveBuilder::memory();
-        let mut stream = self.positive.into_stream();
-        while let Some((key, indexes)) = stream.next() {
-            let op = DifferenceByKey::new(indexes, &other.negative, |x| x.document_id, |x| *x);
-
-            buffer.clear();
-            op.extend_vec(&mut buffer);
-
-            if !buffer.is_empty() {
-                let indexes = Set::new_unchecked(&buffer);
-                builder.insert(key, indexes)?;
-            }
-        }
-
-        let positive = {
-            let (map, indexes) = builder.into_inner()?;
-            let map = Map::from_bytes(map)?;
-            let indexes = DocIndexes::from_bytes(indexes)?;
-            Positive::new(map, indexes)
-        };
-
-        let negative = Negative::default();
-        let positive = positive.union(&other.positive)?;
-        Ok(Index { negative, positive })
-    }
-}
--- a/src/database/index/negative.rs
+++ b/src/database/index/negative.rs
@ -1,43 +0,0 @@
-use std::error::Error;
-use std::io::Cursor;
-use std::ops::Deref;
-
-use sdset::Set;
-use byteorder::{LittleEndian, WriteBytesExt};
-
-use crate::data::SharedData;
-use crate::data::DocIds;
-use crate::DocumentId;
-
-#[derive(Default)]
-pub struct Negative(DocIds);
-
-impl Negative {
-    pub fn new(doc_ids: DocIds) -> Negative {
-        Negative(doc_ids)
-    }
-
-    pub fn from_cursor(cursor: &mut Cursor<SharedData>) -> Result<Negative, Box<Error>> {
-        let doc_ids = DocIds::from_cursor(cursor)?;
-        Ok(Negative(doc_ids))
-    }
-
-    pub fn write_to_bytes(&self, bytes: &mut Vec<u8>) {
-        let slice = self.0.as_bytes();
-        let len = slice.len() as u64;
-        let _ = bytes.write_u64::<LittleEndian>(len);
-        bytes.extend_from_slice(slice);
-    }
-
-    pub fn is_empty(&self) -> bool {
-        self.0.is_empty()
-    }
-}
-
-impl Deref for Negative {
-    type Target = Set<DocumentId>;
-
-    fn deref(&self) -> &Self::Target {
-        self.0.as_ref()
-    }
-}
--- a/src/database/index/positive.rs
+++ b/src/database/index/positive.rs
@ -1,166 +0,0 @@
-use std::io::{Write, BufRead, Cursor};
-use std::error::Error;
-
-use byteorder::{LittleEndian, ReadBytesExt, WriteBytesExt};
-use fst::{map, Map, Streamer, IntoStreamer};
-use sdset::{Set, SetOperation};
-use sdset::duo::Union;
-use fst::raw::Fst;
-
-use crate::data::{DocIndexes, DocIndexesBuilder};
-use crate::data::SharedData;
-use crate::DocIndex;
-
-#[derive(Default)]
-pub struct Positive {
-    map: Map,
-    indexes: DocIndexes,
-}
-
-impl Positive {
-    pub fn new(map: Map, indexes: DocIndexes) -> Positive {
-        Positive { map, indexes }
-    }
-
-    pub fn from_cursor(cursor: &mut Cursor<SharedData>) -> Result<Positive, Box<Error>> {
-        let len = cursor.read_u64::<LittleEndian>()? as usize;
-        let offset = cursor.position() as usize;
-        let data = cursor.get_ref().range(offset, len);
-
-        let fst = Fst::from_shared_bytes(data.bytes, data.offset, data.len)?;
-        let map = Map::from(fst);
-        cursor.consume(len);
-
-        let indexes = DocIndexes::from_cursor(cursor)?;
-
-        Ok(Positive { map, indexes})
-    }
-
-    pub fn write_to_bytes(&self, bytes: &mut Vec<u8>) {
-        let slice = self.map.as_fst().as_bytes();
-        let len = slice.len() as u64;
-        let _ = bytes.write_u64::<LittleEndian>(len);
-        bytes.extend_from_slice(slice);
-
-        self.indexes.write_to_bytes(bytes);
-    }
-
-    pub fn map(&self) -> &Map {
-        &self.map
-    }
-
-    pub fn indexes(&self) -> &DocIndexes {
-        &self.indexes
-    }
-
-    pub fn union(&self, other: &Positive) -> Result<Positive, Box<Error>> {
-        let mut builder = PositiveBuilder::memory();
-        let mut stream = map::OpBuilder::new().add(&self.map).add(&other.map).union();
-
-        let mut buffer = Vec::new();
-        while let Some((key, ivalues)) = stream.next() {
-            buffer.clear();
-            match ivalues {
-                [a, b] => {
-                    let indexes = if a.index == 0 { &self.indexes } else { &other.indexes };
-                    let indexes = indexes.get(a.value as usize).ok_or(format!("index not found"))?;
-                    let a = Set::new_unchecked(indexes);
-
-                    let indexes = if b.index == 0 { &self.indexes } else { &other.indexes };
-                    let indexes = indexes.get(b.value as usize).ok_or(format!("index not found"))?;
-                    let b = Set::new_unchecked(indexes);
-
-                    let op = Union::new(a, b);
-                    op.extend_vec(&mut buffer);
-                },
-                [a] => {
-                    let indexes = if a.index == 0 { &self.indexes } else { &other.indexes };
-                    let indexes = indexes.get(a.value as usize).ok_or(format!("index not found"))?;
-                    buffer.extend_from_slice(indexes)
-                },
-                _ => continue,
-            }
-
-            if !buffer.is_empty() {
-                let indexes = Set::new_unchecked(&buffer);
-                builder.insert(key, indexes)?;
-            }
-        }
-
-        let (map, indexes) = builder.into_inner()?;
-        let map = Map::from_bytes(map)?;
-        let indexes = DocIndexes::from_bytes(indexes)?;
-        Ok(Positive { map, indexes })
-    }
-}
-
-impl<'m, 'a> IntoStreamer<'a> for &'m Positive {
-    type Item = (&'a [u8], &'a Set<DocIndex>);
-    /// The type of the stream to be constructed.
-    type Into = Stream<'m>;
-
-    /// Construct a stream from `Self`.
-    fn into_stream(self) -> Self::Into {
-        Stream {
-            map_stream: self.map.into_stream(),
-            indexes: &self.indexes,
-        }
-    }
-}
-
-pub struct Stream<'m> {
-    map_stream: map::Stream<'m>,
-    indexes: &'m DocIndexes,
-}
-
-impl<'m, 'a> Streamer<'a> for Stream<'m> {
-    type Item = (&'a [u8], &'a Set<DocIndex>);
-
-    fn next(&'a mut self) -> Option<Self::Item> {
-        match self.map_stream.next() {
-            Some((input, index)) => {
-                let indexes = &self.indexes[index as usize];
-                let indexes = Set::new_unchecked(indexes);
-                Some((input, indexes))
-            },
-            None => None,
-        }
-    }
-}
-
-pub struct PositiveBuilder<W, X> {
-    map: fst::MapBuilder<W>,
-    indexes: DocIndexesBuilder<X>,
-    value: u64,
-}
-
-impl PositiveBuilder<Vec<u8>, Vec<u8>> {
-    pub fn memory() -> Self {
-        PositiveBuilder {
-            map: fst::MapBuilder::memory(),
-            indexes: DocIndexesBuilder::memory(),
-            value: 0,
-        }
-    }
-}
-
-impl<W: Write, X: Write> PositiveBuilder<W, X> {
-    /// If a key is inserted that is less than or equal to any previous key added,
-    /// then an error is returned. Similarly, if there was a problem writing
-    /// to the underlying writer, an error is returned.
-    // FIXME what if one write doesn't work but the other do ?
-    pub fn insert<K>(&mut self, key: K, indexes: &Set<DocIndex>) -> Result<(), Box<Error>>
-    where K: AsRef<[u8]>,
-    {
-        self.map.insert(key, self.value)?;
-        self.indexes.insert(indexes);
-        self.value += 1;
-        Ok(())
-    }
-
-    pub fn into_inner(self) -> Result<(W, X), Box<Error>> {
-        let map = self.map.into_inner()?;
-        let indexes = self.indexes.into_inner()?;
-        Ok((map, indexes))
-    }
-}
--- a/src/database/mod.rs
+++ b/src/database/mod.rs
@ -1,684 +0,0 @@
-use std::sync::{Arc, Mutex};
-use std::error::Error;
-use std::ops::Deref;
-use std::path::Path;
-
-use rocksdb::rocksdb_options::{DBOptions, IngestExternalFileOptions, ColumnFamilyOptions};
-use rocksdb::rocksdb::{Writable, Snapshot};
-use rocksdb::{DB, DBVector, MergeOperands};
-use crossbeam::atomic::ArcCell;
-use log::debug;
-
-pub use self::document_key::{DocumentKey, DocumentKeyAttr};
-pub use self::view::{DatabaseView, DocumentIter};
-pub use self::update::{Update, UpdateBuilder};
-pub use self::serde::SerializerError;
-pub use self::schema::Schema;
-pub use self::index::Index;
-
-const DATA_INDEX:  &[u8] = b"data-index";
-const DATA_SCHEMA: &[u8] = b"data-schema";
-
-pub mod schema;
-pub(crate) mod index;
-mod deserializer;
-mod document_key;
-mod serde;
-mod update;
-mod view;
-
-fn retrieve_data_schema<D>(snapshot: &Snapshot<D>) -> Result<Schema, Box<Error>>
-where D: Deref<Target=DB>
-{
-    match snapshot.get(DATA_SCHEMA)? {
-        Some(vector) => Ok(Schema::read_from_bin(&*vector)?),
-        None => Err(String::from("BUG: no schema found in the database").into()),
-    }
-}
-
-fn retrieve_data_index<D>(snapshot: &Snapshot<D>) -> Result<Index, Box<Error>>
-where D: Deref<Target=DB>
-{
-    let index = match snapshot.get(DATA_INDEX)? {
-        Some(vector) => {
-            let bytes = vector.as_ref().to_vec();
-            Index::from_bytes(bytes)?
-        },
-        None => Index::default(),
-    };
-
-    Ok(index)
-}
-
-fn merge_indexes(key: &[u8], existing: Option<&[u8]>, operands: &mut MergeOperands) -> Vec<u8> {
-    assert_eq!(key, DATA_INDEX, "The merge operator only supports \"data-index\" merging");
-
-    let mut index: Option<Index> = None;
-    for bytes in existing.into_iter().chain(operands) {
-        let operand = Index::from_bytes(bytes.to_vec()).unwrap();
-        let merged = match index {
-            Some(ref index) => index.merge(&operand).unwrap(),
-            None            => operand,
-        };
-
-        index.replace(merged);
-    }
-
-    let index = index.unwrap_or_default();
-    let mut bytes = Vec::new();
-    index.write_to_bytes(&mut bytes);
-    bytes
-}
-
-pub struct Database {
-    // DB is under a Mutex to sync update ingestions and separate DB update locking
-    // and DatabaseView acquiring locking in other words:
-    // "Block readers the minimum possible amount of time"
-    db: Mutex<Arc<DB>>,
-
-    // This view is updated each time the DB ingests an update
-    view: ArcCell<DatabaseView<Arc<DB>>>,
-}
-
-impl Database {
-    pub fn create<P: AsRef<Path>>(path: P, schema: &Schema) -> Result<Database, Box<Error>> {
-        let path = path.as_ref();
-        if path.exists() {
-            return Err(format!("File already exists at path: {}, cannot create database.",
-                                path.display()).into())
-        }
-
-        let path = path.to_string_lossy();
-        let mut opts = DBOptions::new();
-        opts.create_if_missing(true);
-        // opts.error_if_exists(true); // FIXME pull request that
-
-        let mut cf_opts = ColumnFamilyOptions::new();
-        cf_opts.add_merge_operator("data-index merge operator", merge_indexes);
-
-        let db = DB::open_cf(opts, &path, vec![("default", cf_opts)])?;
-
-        let mut schema_bytes = Vec::new();
-        schema.write_to_bin(&mut schema_bytes)?;
-        db.put(DATA_SCHEMA, &schema_bytes)?;
-
-        let db = Arc::new(db);
-        let snapshot = Snapshot::new(db.clone());
-        let view = ArcCell::new(Arc::new(DatabaseView::new(snapshot)?));
-
-        Ok(Database { db: Mutex::new(db), view })
-    }
-
-    pub fn open<P: AsRef<Path>>(path: P) -> Result<Database, Box<Error>> {
-        let path = path.as_ref().to_string_lossy();
-
-        let mut opts = DBOptions::new();
-        opts.create_if_missing(false);
-
-        let mut cf_opts = ColumnFamilyOptions::new();
-        cf_opts.add_merge_operator("data-index merge operator", merge_indexes);
-
-        let db = DB::open_cf(opts, &path, vec![("default", cf_opts)])?;
-
-        // FIXME create a generic function to do that !
-        let _schema = match db.get(DATA_SCHEMA)? {
-            Some(value) => Schema::read_from_bin(&*value)?,
-            None => return Err(String::from("Database does not contain a schema").into()),
-        };
-
-        let db = Arc::new(db);
-        let snapshot = Snapshot::new(db.clone());
-        let view = ArcCell::new(Arc::new(DatabaseView::new(snapshot)?));
-
-        Ok(Database { db: Mutex::new(db), view })
-    }
-
-    pub fn ingest_update_file(&self, update: Update) -> Result<Arc<DatabaseView<Arc<DB>>>, Box<Error>> {
-        let snapshot = {
-            // We must have a mutex here to ensure that update ingestions and compactions
-            // are done atomatically and in the right order.
-            // This way update ingestions will block other update ingestions without blocking view
-            // creations while doing the "data-index" compaction
-            let db = match self.db.lock() {
-                Ok(db) => db,
-                Err(e) => return Err(e.to_string().into()),
-            };
-
-            let path = update.path().to_string_lossy();
-            let options = IngestExternalFileOptions::new();
-            // options.move_files(move_update);
-
-            debug!("ingest update file");
-            let cf_handle = db.cf_handle("default").expect("\"default\" column family not found");
-            db.ingest_external_file_optimized(&cf_handle, &options, &[&path])?;
-
-            debug!("compacting index range");
-            // Compacting to trigger the merge operator only one time
-            // while ingesting the update and not each time searching
-            db.compact_range(Some(DATA_INDEX), Some(DATA_INDEX));
-
-            Snapshot::new(db.clone())
-        };
-
-        let view = Arc::new(DatabaseView::new(snapshot)?);
-        self.view.set(view.clone());
-
-        Ok(view)
-    }
-
-    pub fn get(&self, key: &[u8]) -> Result<Option<DBVector>, Box<Error>> {
-        self.view().get(key)
-    }
-
-    pub fn flush(&self) -> Result<(), Box<Error>> {
-        match self.db.lock() {
-            Ok(db) => Ok(db.flush(true)?),
-            Err(e) => Err(e.to_string().into()),
-        }
-    }
-
-    pub fn view(&self) -> Arc<DatabaseView<Arc<DB>>> {
-        self.view.get()
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-    use std::error::Error;
-
-    use serde_derive::{Serialize, Deserialize};
-    use hashbrown::HashSet;
-    use tempfile::tempdir;
-
-    use crate::database::schema::{SchemaBuilder, STORED, INDEXED};
-    use crate::database::update::UpdateBuilder;
-    use crate::tokenizer::DefaultBuilder;
-
-    #[test]
-    fn ingest_one_update_file() -> Result<(), Box<Error>> {
-        let dir = tempdir()?;
-        let stop_words = HashSet::new();
-
-        let rocksdb_path = dir.path().join("rocksdb.rdb");
-
-        #[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)]
-        struct SimpleDoc {
-            id: u64,
-            title: String,
-            description: String,
-            timestamp: u64,
-        }
-
-        let schema = {
-            let mut builder = SchemaBuilder::with_identifier("id");
-            builder.new_attribute("id", STORED);
-            builder.new_attribute("title", STORED | INDEXED);
-            builder.new_attribute("description", STORED | INDEXED);
-            builder.new_attribute("timestamp", STORED);
-            builder.build()
-        };
-
-        let database = Database::create(&rocksdb_path, &schema)?;
-
-        let update_path = dir.path().join("update.sst");
-
-        let doc0 = SimpleDoc {
-            id: 0,
-            title: String::from("I am a title"),
-            description: String::from("I am a description"),
-            timestamp: 1234567,
-        };
-        let doc1 = SimpleDoc {
-            id: 1,
-            title: String::from("I am the second title"),
-            description: String::from("I am the second description"),
-            timestamp: 7654321,
-        };
-
-        let docid0;
-        let docid1;
-        let update = {
-            let tokenizer_builder = DefaultBuilder::new();
-            let mut builder = UpdateBuilder::new(update_path, schema);
-
-            docid0 = builder.update_document(&doc0, &tokenizer_builder, &stop_words)?;
-            docid1 = builder.update_document(&doc1, &tokenizer_builder, &stop_words)?;
-
-            builder.build()?
-        };
-
-        database.ingest_update_file(update)?;
-        let view = database.view();
-
-        let de_doc0: SimpleDoc = view.document_by_id(docid0)?;
-        let de_doc1: SimpleDoc = view.document_by_id(docid1)?;
-
-        assert_eq!(doc0, de_doc0);
-        assert_eq!(doc1, de_doc1);
-
-        Ok(dir.close()?)
-    }
-
-    #[test]
-    fn ingest_two_update_files() -> Result<(), Box<Error>> {
-        let dir = tempdir()?;
-        let stop_words = HashSet::new();
-
-        let rocksdb_path = dir.path().join("rocksdb.rdb");
-
-        #[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)]
-        struct SimpleDoc {
-            id: u64,
-            title: String,
-            description: String,
-            timestamp: u64,
-        }
-
-        let schema = {
-            let mut builder = SchemaBuilder::with_identifier("id");
-            builder.new_attribute("id", STORED);
-            builder.new_attribute("title", STORED | INDEXED);
-            builder.new_attribute("description", STORED | INDEXED);
-            builder.new_attribute("timestamp", STORED);
-            builder.build()
-        };
-
-        let database = Database::create(&rocksdb_path, &schema)?;
-
-        let doc0 = SimpleDoc {
-            id: 0,
-            title: String::from("I am a title"),
-            description: String::from("I am a description"),
-            timestamp: 1234567,
-        };
-        let doc1 = SimpleDoc {
-            id: 1,
-            title: String::from("I am the second title"),
-            description: String::from("I am the second description"),
-            timestamp: 7654321,
-        };
-        let doc2 = SimpleDoc {
-            id: 2,
-            title: String::from("I am the third title"),
-            description: String::from("I am the third description"),
-            timestamp: 7654321,
-        };
-        let doc3 = SimpleDoc {
-            id: 3,
-            title: String::from("I am the fourth title"),
-            description: String::from("I am the fourth description"),
-            timestamp: 7654321,
-        };
-
-        let docid0;
-        let docid1;
-        let update1 = {
-            let tokenizer_builder = DefaultBuilder::new();
-            let update_path = dir.path().join("update-000.sst");
-            let mut builder = UpdateBuilder::new(update_path, schema.clone());
-
-            docid0 = builder.update_document(&doc0, &tokenizer_builder, &stop_words)?;
-            docid1 = builder.update_document(&doc1, &tokenizer_builder, &stop_words)?;
-
-            builder.build()?
-        };
-
-        let docid2;
-        let docid3;
-        let update2 = {
-            let tokenizer_builder = DefaultBuilder::new();
-            let update_path = dir.path().join("update-001.sst");
-            let mut builder = UpdateBuilder::new(update_path, schema);
-
-            docid2 = builder.update_document(&doc2, &tokenizer_builder, &stop_words)?;
-            docid3 = builder.update_document(&doc3, &tokenizer_builder, &stop_words)?;
-
-            builder.build()?
-        };
-
-        database.ingest_update_file(update1)?;
-        database.ingest_update_file(update2)?;
-
-        let view = database.view();
-
-        let de_doc0: SimpleDoc = view.document_by_id(docid0)?;
-        let de_doc1: SimpleDoc = view.document_by_id(docid1)?;
-
-        assert_eq!(doc0, de_doc0);
-        assert_eq!(doc1, de_doc1);
-
-        let de_doc2: SimpleDoc = view.document_by_id(docid2)?;
-        let de_doc3: SimpleDoc = view.document_by_id(docid3)?;
-
-        assert_eq!(doc2, de_doc2);
-        assert_eq!(doc3, de_doc3);
-
-        Ok(dir.close()?)
-    }
-}
-
-#[cfg(all(feature = "nightly", test))]
-mod bench {
-    extern crate test;
-
-    use super::*;
-    use std::error::Error;
-    use std::iter::repeat_with;
-    use self::test::Bencher;
-
-    use rand::distributions::Alphanumeric;
-    use rand_xorshift::XorShiftRng;
-    use rand::{Rng, SeedableRng};
-    use serde_derive::Serialize;
-    use rand::seq::SliceRandom;
-    use hashbrown::HashSet;
-
-    use crate::tokenizer::DefaultBuilder;
-    use crate::database::update::UpdateBuilder;
-    use crate::database::schema::*;
-
-    fn random_sentences<R: Rng>(number: usize, rng: &mut R) -> String {
-        let mut words = String::new();
-
-        for i in 0..number {
-            let word_len = rng.gen_range(1, 12);
-            let iter = repeat_with(|| rng.sample(Alphanumeric)).take(word_len);
-            words.extend(iter);
-
-            if i == number - 1 { // last word
-                let final_ = [".", "?", "!", "..."].choose(rng).cloned();
-                words.extend(final_);
-            } else {
-                let middle = [",", ", "].choose(rng).cloned();
-                words.extend(middle);
-            }
-        }
-
-        words
-    }
-
-    #[bench]
-    fn open_little_database(bench: &mut Bencher) -> Result<(), Box<Error>> {
-        let dir = tempfile::tempdir()?;
-        let stop_words = HashSet::new();
-
-        let mut builder = SchemaBuilder::with_identifier("id");
-        builder.new_attribute("title", STORED | INDEXED);
-        builder.new_attribute("description", STORED | INDEXED);
-        let schema = builder.build();
-
-        let db_path = dir.path().join("bench.mdb");
-        let database = Database::create(db_path.clone(), &schema)?;
-
-        #[derive(Serialize)]
-        struct Document {
-            id: u64,
-            title: String,
-            description: String,
-        }
-
-        let path = dir.path().join("update-000.sst");
-        let tokenizer_builder = DefaultBuilder;
-        let mut builder = UpdateBuilder::new(path, schema);
-        let mut rng = XorShiftRng::seed_from_u64(42);
-
-        for i in 0..300 {
-            let document = Document {
-                id: i,
-                title: random_sentences(rng.gen_range(1, 8), &mut rng),
-                description: random_sentences(rng.gen_range(20, 200), &mut rng),
-            };
-            builder.update_document(&document, &tokenizer_builder, &stop_words)?;
-        }
-
-        let update = builder.build()?;
-        database.ingest_update_file(update)?;
-
-        drop(database);
-
-        bench.iter(|| {
-            let database = Database::open(db_path.clone()).unwrap();
-            test::black_box(|| database);
-        });
-
-        Ok(())
-    }
-
-    #[bench]
-    fn open_medium_database(bench: &mut Bencher) -> Result<(), Box<Error>> {
-        let dir = tempfile::tempdir()?;
-        let stop_words = HashSet::new();
-
-        let mut builder = SchemaBuilder::with_identifier("id");
-        builder.new_attribute("title", STORED | INDEXED);
-        builder.new_attribute("description", STORED | INDEXED);
-        let schema = builder.build();
-
-        let db_path = dir.path().join("bench.mdb");
-        let database = Database::create(db_path.clone(), &schema)?;
-
-        #[derive(Serialize)]
-        struct Document {
-            id: u64,
-            title: String,
-            description: String,
-        }
-
-        let path = dir.path().join("update-000.sst");
-        let tokenizer_builder = DefaultBuilder;
-        let mut builder = UpdateBuilder::new(path, schema);
-        let mut rng = XorShiftRng::seed_from_u64(42);
-
-        for i in 0..3000 {
-            let document = Document {
-                id: i,
-                title: random_sentences(rng.gen_range(1, 8), &mut rng),
-                description: random_sentences(rng.gen_range(20, 200), &mut rng),
-            };
-            builder.update_document(&document, &tokenizer_builder, &stop_words)?;
-        }
-
-        let update = builder.build()?;
-        database.ingest_update_file(update)?;
-
-        drop(database);
-
-        bench.iter(|| {
-            let database = Database::open(db_path.clone()).unwrap();
-            test::black_box(|| database);
-        });
-
-        Ok(())
-    }
-
-    #[bench]
-    #[ignore]
-    fn open_big_database(bench: &mut Bencher) -> Result<(), Box<Error>> {
-        let dir = tempfile::tempdir()?;
-        let stop_words = HashSet::new();
-
-        let mut builder = SchemaBuilder::with_identifier("id");
-        builder.new_attribute("title", STORED | INDEXED);
-        builder.new_attribute("description", STORED | INDEXED);
-        let schema = builder.build();
-
-        let db_path = dir.path().join("bench.mdb");
-        let database = Database::create(db_path.clone(), &schema)?;
-
-        #[derive(Serialize)]
-        struct Document {
-            id: u64,
-            title: String,
-            description: String,
-        }
-
-        let path = dir.path().join("update-000.sst");
-        let tokenizer_builder = DefaultBuilder;
-        let mut builder = UpdateBuilder::new(path, schema);
-        let mut rng = XorShiftRng::seed_from_u64(42);
-
-        for i in 0..30_000 {
-            let document = Document {
-                id: i,
-                title: random_sentences(rng.gen_range(1, 8), &mut rng),
-                description: random_sentences(rng.gen_range(20, 200), &mut rng),
-            };
-            builder.update_document(&document, &tokenizer_builder, &stop_words)?;
-        }
-
-        let update = builder.build()?;
-        database.ingest_update_file(update)?;
-
-        drop(database);
-
-        bench.iter(|| {
-            let database = Database::open(db_path.clone()).unwrap();
-            test::black_box(|| database);
-        });
-
-        Ok(())
-    }
-
-    #[bench]
-    fn search_oneletter_little_database(bench: &mut Bencher) -> Result<(), Box<Error>> {
-        let dir = tempfile::tempdir()?;
-        let stop_words = HashSet::new();
-
-        let mut builder = SchemaBuilder::with_identifier("id");
-        builder.new_attribute("title", STORED | INDEXED);
-        builder.new_attribute("description", STORED | INDEXED);
-        let schema = builder.build();
-
-        let db_path = dir.path().join("bench.mdb");
-        let database = Database::create(db_path.clone(), &schema)?;
-
-        #[derive(Serialize)]
-        struct Document {
-            id: u64,
-            title: String,
-            description: String,
-        }
-
-        let path = dir.path().join("update-000.sst");
-        let tokenizer_builder = DefaultBuilder;
-        let mut builder = UpdateBuilder::new(path, schema);
-        let mut rng = XorShiftRng::seed_from_u64(42);
-
-        for i in 0..300 {
-            let document = Document {
-                id: i,
-                title: random_sentences(rng.gen_range(1, 8), &mut rng),
-                description: random_sentences(rng.gen_range(20, 200), &mut rng),
-            };
-            builder.update_document(&document, &tokenizer_builder, &stop_words)?;
-        }
-
-        let update = builder.build()?;
-        let view = database.ingest_update_file(update)?;
-
-        bench.iter(|| {
-            for q in &["a", "b", "c", "d", "e"] {
-                let documents = view.query_builder().unwrap().query(q, 0..20);
-                test::black_box(|| documents);
-            }
-        });
-
-        Ok(())
-    }
-
-    #[bench]
-    fn search_oneletter_medium_database(bench: &mut Bencher) -> Result<(), Box<Error>> {
-        let dir = tempfile::tempdir()?;
-        let stop_words = HashSet::new();
-
-        let mut builder = SchemaBuilder::with_identifier("id");
-        builder.new_attribute("title", STORED | INDEXED);
-        builder.new_attribute("description", STORED | INDEXED);
-        let schema = builder.build();
-
-        let db_path = dir.path().join("bench.mdb");
-        let database = Database::create(db_path.clone(), &schema)?;
-
-        #[derive(Serialize)]
-        struct Document {
-            id: u64,
-            title: String,
-            description: String,
-        }
-
-        let path = dir.path().join("update-000.sst");
-        let tokenizer_builder = DefaultBuilder;
-        let mut builder = UpdateBuilder::new(path, schema);
-        let mut rng = XorShiftRng::seed_from_u64(42);
-
-        for i in 0..3000 {
-            let document = Document {
-                id: i,
-                title: random_sentences(rng.gen_range(1, 8), &mut rng),
-                description: random_sentences(rng.gen_range(20, 200), &mut rng),
-            };
-            builder.update_document(&document, &tokenizer_builder, &stop_words)?;
-        }
-
-        let update = builder.build()?;
-        let view = database.ingest_update_file(update)?;
-
-        bench.iter(|| {
-            for q in &["a", "b", "c", "d", "e"] {
-                let documents = view.query_builder().unwrap().query(q, 0..20);
-                test::black_box(|| documents);
-            }
-        });
-
-        Ok(())
-    }
-
-    #[bench]
-    #[ignore]
-    fn search_oneletter_big_database(bench: &mut Bencher) -> Result<(), Box<Error>> {
-        let dir = tempfile::tempdir()?;
-        let stop_words = HashSet::new();
-
-        let mut builder = SchemaBuilder::with_identifier("id");
-        builder.new_attribute("title", STORED | INDEXED);
-        builder.new_attribute("description", STORED | INDEXED);
-        let schema = builder.build();
-
-        let db_path = dir.path().join("bench.mdb");
-        let database = Database::create(db_path.clone(), &schema)?;
-
-        #[derive(Serialize)]
-        struct Document {
-            id: u64,
-            title: String,
-            description: String,
-        }
-
-        let path = dir.path().join("update-000.sst");
-        let tokenizer_builder = DefaultBuilder;
-        let mut builder = UpdateBuilder::new(path, schema);
-        let mut rng = XorShiftRng::seed_from_u64(42);
-
-        for i in 0..30_000 {
-            let document = Document {
-                id: i,
-                title: random_sentences(rng.gen_range(1, 8), &mut rng),
-                description: random_sentences(rng.gen_range(20, 200), &mut rng),
-            };
-            builder.update_document(&document, &tokenizer_builder, &stop_words)?;
-        }
-
-        let update = builder.build()?;
-        let view = database.ingest_update_file(update)?;
-
-        bench.iter(|| {
-            for q in &["a", "b", "c", "d", "e"] {
-                let documents = view.query_builder().unwrap().query(q, 0..20);
-                test::black_box(|| documents);
-            }
-        });
-
-        Ok(())
-    }
-}
--- a/src/database/serde/indexer_serializer.rs
+++ b/src/database/serde/indexer_serializer.rs
@ -1,196 +0,0 @@
-use crate::database::update::DocumentUpdate;
-use crate::database::serde::SerializerError;
-use crate::database::schema::SchemaAttr;
-use crate::tokenizer::TokenizerBuilder;
-use crate::tokenizer::Token;
-use crate::{DocumentId, DocIndex, Attribute, WordArea};
-
-use hashbrown::HashSet;
-use serde::Serialize;
-use serde::ser;
-
-pub struct IndexerSerializer<'a, B> {
-    pub tokenizer_builder: &'a B,
-    pub update: &'a mut DocumentUpdate,
-    pub document_id: DocumentId,
-    pub attribute: SchemaAttr,
-    pub stop_words: &'a HashSet<String>,
-}
-
-impl<'a, B> ser::Serializer for IndexerSerializer<'a, B>
-where B: TokenizerBuilder
-{
-    type Ok = ();
-    type Error = SerializerError;
-    type SerializeSeq = ser::Impossible<Self::Ok, Self::Error>;
-    type SerializeTuple = ser::Impossible<Self::Ok, Self::Error>;
-    type SerializeTupleStruct = ser::Impossible<Self::Ok, Self::Error>;
-    type SerializeTupleVariant = ser::Impossible<Self::Ok, Self::Error>;
-    type SerializeMap = ser::Impossible<Self::Ok, Self::Error>;
-    type SerializeStruct = ser::Impossible<Self::Ok, Self::Error>;
-    type SerializeStructVariant = ser::Impossible<Self::Ok, Self::Error>;
-
-    forward_to_unserializable_type! {
-        bool => serialize_bool,
-        char => serialize_char,
-
-        i8  => serialize_i8,
-        i16 => serialize_i16,
-        i32 => serialize_i32,
-        i64 => serialize_i64,
-
-        u8  => serialize_u8,
-        u16 => serialize_u16,
-        u32 => serialize_u32,
-        u64 => serialize_u64,
-
-        f32 => serialize_f32,
-        f64 => serialize_f64,
-    }
-
-    fn serialize_str(self, v: &str) -> Result<Self::Ok, Self::Error> {
-        for token in self.tokenizer_builder.build(v) {
-            let Token { word, word_index, char_index } = token;
-            let document_id = self.document_id;
-
-            // FIXME must u32::try_from instead
-            let attribute = match Attribute::new(self.attribute.0, word_index as u32) {
-                Ok(attribute) => attribute,
-                Err(_) => return Ok(()),
-            };
-
-            // insert the exact representation
-            let word_lower = word.to_lowercase();
-            let length = word.chars().count() as u16;
-
-            if self.stop_words.contains(&word_lower) { continue }
-
-            // and the unidecoded lowercased version
-            let word_unidecoded = unidecode::unidecode(word).to_lowercase();
-            if word_lower != word_unidecoded {
-                let word_area = match WordArea::new(char_index as u32, length) {
-                    Ok(word_area) => word_area,
-                    Err(_) => return Ok(()),
-                };
-
-                let doc_index = DocIndex { document_id, attribute, word_area };
-                self.update.insert_doc_index(word_unidecoded.into_bytes(), doc_index);
-            }
-
-            let word_area = match WordArea::new(char_index as u32, length) {
-                Ok(word_area) => word_area,
-                Err(_) => return Ok(()),
-            };
-
-            let doc_index = DocIndex { document_id, attribute, word_area };
-            self.update.insert_doc_index(word_lower.into_bytes(), doc_index);
-        }
-        Ok(())
-    }
-
-    fn serialize_bytes(self, _v: &[u8]) -> Result<Self::Ok, Self::Error> {
-        Err(SerializerError::UnserializableType { name: "&[u8]" })
-    }
-
-    fn serialize_none(self) -> Result<Self::Ok, Self::Error> {
-        Err(SerializerError::UnserializableType { name: "Option" })
-    }
-
-    fn serialize_some<T: ?Sized>(self, _value: &T) -> Result<Self::Ok, Self::Error>
-    where T: Serialize,
-    {
-        Err(SerializerError::UnserializableType { name: "Option" })
-    }
-
-    fn serialize_unit(self) -> Result<Self::Ok, Self::Error> {
-        Err(SerializerError::UnserializableType { name: "()" })
-    }
-
-    fn serialize_unit_struct(self, _name: &'static str) -> Result<Self::Ok, Self::Error> {
-        Err(SerializerError::UnserializableType { name: "unit struct" })
-    }
-
-    fn serialize_unit_variant(
-        self,
-        _name: &'static str,
-        _variant_index: u32,
-        _variant: &'static str
-    ) -> Result<Self::Ok, Self::Error>
-    {
-        Err(SerializerError::UnserializableType { name: "unit variant" })
-    }
-
-    fn serialize_newtype_struct<T: ?Sized>(
-        self,
-        _name: &'static str,
-        value: &T
-    ) -> Result<Self::Ok, Self::Error>
-    where T: Serialize,
-    {
-        value.serialize(self)
-    }
-
-    fn serialize_newtype_variant<T: ?Sized>(
-        self,
-        _name: &'static str,
-        _variant_index: u32,
-        _variant: &'static str,
-        _value: &T
-    ) -> Result<Self::Ok, Self::Error>
-    where T: Serialize,
-    {
-        Err(SerializerError::UnserializableType { name: "newtype variant" })
-    }
-
-    fn serialize_seq(self, _len: Option<usize>) -> Result<Self::SerializeSeq, Self::Error> {
-        Err(SerializerError::UnserializableType { name: "seq" })
-    }
-
-    fn serialize_tuple(self, _len: usize) -> Result<Self::SerializeTuple, Self::Error> {
-        Err(SerializerError::UnserializableType { name: "tuple" })
-    }
-
-    fn serialize_tuple_struct(
-        self,
-        _name: &'static str,
-        _len: usize
-    ) -> Result<Self::SerializeTupleStruct, Self::Error>
-    {
-        Err(SerializerError::UnserializableType { name: "tuple struct" })
-    }
-
-    fn serialize_tuple_variant(
-        self,
-        _name: &'static str,
-        _variant_index: u32,
-        _variant: &'static str,
-        _len: usize
-    ) -> Result<Self::SerializeTupleVariant, Self::Error>
-    {
-        Err(SerializerError::UnserializableType { name: "tuple variant" })
-    }
-
-    fn serialize_map(self, _len: Option<usize>) -> Result<Self::SerializeMap, Self::Error> {
-        Err(SerializerError::UnserializableType { name: "map" })
-    }
-
-    fn serialize_struct(
-        self,
-        _name: &'static str,
-        _len: usize
-    ) -> Result<Self::SerializeStruct, Self::Error>
-    {
-        Err(SerializerError::UnserializableType { name: "struct" })
-    }
-
-    fn serialize_struct_variant(
-        self,
-        _name: &'static str,
-        _variant_index: u32,
-        _variant: &'static str,
-        _len: usize
-    ) -> Result<Self::SerializeStructVariant, Self::Error>
-    {
-        Err(SerializerError::UnserializableType { name: "struct variant" })
-    }
-}
--- a/src/database/serde/mod.rs
+++ b/src/database/serde/mod.rs
@ -1,57 +0,0 @@
-use std::collections::hash_map::DefaultHasher;
-use std::hash::{Hash, Hasher};
-use std::error::Error;
-use std::fmt;
-
-use serde::ser;
-
-macro_rules! forward_to_unserializable_type {
-    ($($ty:ident => $se_method:ident,)*) => {
-        $(
-            fn $se_method(self, _v: $ty) -> Result<Self::Ok, Self::Error> {
-                Err(SerializerError::UnserializableType { name: "$ty" })
-            }
-        )*
-    }
-}
-
-pub mod find_id;
-pub mod key_to_string;
-pub mod serializer;
-pub mod indexer_serializer;
-
-pub fn calculate_hash<T: Hash>(t: &T) -> u64 {
-    let mut s = DefaultHasher::new();
-    t.hash(&mut s);
-    s.finish()
-}
-
-#[derive(Debug)]
-pub enum SerializerError {
-    DocumentIdNotFound,
-    UnserializableType { name: &'static str },
-    Custom(String),
-}
-
-impl ser::Error for SerializerError {
-    fn custom<T: fmt::Display>(msg: T) -> Self {
-        SerializerError::Custom(msg.to_string())
-    }
-}
-
-impl fmt::Display for SerializerError {
-    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
-        match self {
-            SerializerError::DocumentIdNotFound => {
-                write!(f, "serialized document does not have an id according to the schema")
-            }
-            SerializerError::UnserializableType { name } => {
-                write!(f, "Only struct and map types are considered valid documents and
-                           can be serialized, not {} types directly.", name)
-            },
-            SerializerError::Custom(s) => f.write_str(&s),
-        }
-    }
-}
-
-impl Error for SerializerError {}
--- a/src/database/serde/serializer.rs
+++ b/src/database/serde/serializer.rs
@ -1,286 +0,0 @@
-use hashbrown::HashSet;
-use serde::Serialize;
-use serde::ser;
-
-use crate::database::serde::indexer_serializer::IndexerSerializer;
-use crate::database::serde::key_to_string::KeyToStringSerializer;
-use crate::database::update::DocumentUpdate;
-use crate::database::serde::SerializerError;
-use crate::tokenizer::TokenizerBuilder;
-use crate::database::schema::Schema;
-use crate::DocumentId;
-
-pub struct Serializer<'a, B> {
-    pub schema: &'a Schema,
-    pub update: &'a mut DocumentUpdate,
-    pub document_id: DocumentId,
-    pub tokenizer_builder: &'a B,
-    pub stop_words: &'a HashSet<String>,
-}
-
-impl<'a, B> ser::Serializer for Serializer<'a, B>
-where B: TokenizerBuilder
-{
-    type Ok = ();
-    type Error = SerializerError;
-    type SerializeSeq = ser::Impossible<Self::Ok, Self::Error>;
-    type SerializeTuple = ser::Impossible<Self::Ok, Self::Error>;
-    type SerializeTupleStruct = ser::Impossible<Self::Ok, Self::Error>;
-    type SerializeTupleVariant = ser::Impossible<Self::Ok, Self::Error>;
-    type SerializeMap = MapSerializer<'a, B>;
-    type SerializeStruct = StructSerializer<'a, B>;
-    type SerializeStructVariant = ser::Impossible<Self::Ok, Self::Error>;
-
-    forward_to_unserializable_type! {
-        bool => serialize_bool,
-        char => serialize_char,
-
-        i8  => serialize_i8,
-        i16 => serialize_i16,
-        i32 => serialize_i32,
-        i64 => serialize_i64,
-
-        u8  => serialize_u8,
-        u16 => serialize_u16,
-        u32 => serialize_u32,
-        u64 => serialize_u64,
-
-        f32 => serialize_f32,
-        f64 => serialize_f64,
-    }
-
-    fn serialize_str(self, _v: &str) -> Result<Self::Ok, Self::Error> {
-        Err(SerializerError::UnserializableType { name: "str" })
-    }
-
-    fn serialize_bytes(self, _v: &[u8]) -> Result<Self::Ok, Self::Error> {
-        Err(SerializerError::UnserializableType { name: "&[u8]" })
-    }
-
-    fn serialize_none(self) -> Result<Self::Ok, Self::Error> {
-        Err(SerializerError::UnserializableType { name: "Option" })
-    }
-
-    fn serialize_some<T: ?Sized>(self, _value: &T) -> Result<Self::Ok, Self::Error>
-    where T: Serialize,
-    {
-        Err(SerializerError::UnserializableType { name: "Option" })
-    }
-
-    fn serialize_unit(self) -> Result<Self::Ok, Self::Error> {
-        Err(SerializerError::UnserializableType { name: "()" })
-    }
-
-    fn serialize_unit_struct(self, _name: &'static str) -> Result<Self::Ok, Self::Error> {
-        Err(SerializerError::UnserializableType { name: "unit struct" })
-    }
-
-    fn serialize_unit_variant(
-        self,
-        _name: &'static str,
-        _variant_index: u32,
-        _variant: &'static str
-    ) -> Result<Self::Ok, Self::Error>
-    {
-        Err(SerializerError::UnserializableType { name: "unit variant" })
-    }
-
-    fn serialize_newtype_struct<T: ?Sized>(
-        self,
-        _name: &'static str,
-        value: &T
-    ) -> Result<Self::Ok, Self::Error>
-    where T: Serialize,
-    {
-        value.serialize(self)
-    }
-
-    fn serialize_newtype_variant<T: ?Sized>(
-        self,
-        _name: &'static str,
-        _variant_index: u32,
-        _variant: &'static str,
-        _value: &T
-    ) -> Result<Self::Ok, Self::Error>
-    where T: Serialize,
-    {
-        Err(SerializerError::UnserializableType { name: "newtype variant" })
-    }
-
-    fn serialize_seq(self, _len: Option<usize>) -> Result<Self::SerializeSeq, Self::Error> {
-        Err(SerializerError::UnserializableType { name: "sequence" })
-    }
-
-    fn serialize_tuple(self, _len: usize) -> Result<Self::SerializeTuple, Self::Error> {
-        Err(SerializerError::UnserializableType { name: "tuple" })
-    }
-
-    fn serialize_tuple_struct(
-        self,
-        _name: &'static str,
-        _len: usize
-    ) -> Result<Self::SerializeTupleStruct, Self::Error>
-    {
-        Err(SerializerError::UnserializableType { name: "tuple struct" })
-    }
-
-    fn serialize_tuple_variant(
-        self,
-        _name: &'static str,
-        _variant_index: u32,
-        _variant: &'static str,
-        _len: usize
-    ) -> Result<Self::SerializeTupleVariant, Self::Error>
-    {
-        Err(SerializerError::UnserializableType { name: "tuple variant" })
-    }
-
-    fn serialize_map(self, _len: Option<usize>) -> Result<Self::SerializeMap, Self::Error> {
-        Ok(MapSerializer {
-            schema: self.schema,
-            document_id: self.document_id,
-            update: self.update,
-            tokenizer_builder: self.tokenizer_builder,
-            stop_words: self.stop_words,
-            current_key_name: None,
-        })
-    }
-
-    fn serialize_struct(
-        self,
-        _name: &'static str,
-        _len: usize
-    ) -> Result<Self::SerializeStruct, Self::Error>
-    {
-        Ok(StructSerializer {
-            schema: self.schema,
-            update: self.update,
-            document_id: self.document_id,
-            tokenizer_builder: self.tokenizer_builder,
-            stop_words: self.stop_words,
-        })
-    }
-
-    fn serialize_struct_variant(
-        self,
-        _name: &'static str,
-        _variant_index: u32,
-        _variant: &'static str,
-        _len: usize
-    ) -> Result<Self::SerializeStructVariant, Self::Error>
-    {
-        Err(SerializerError::UnserializableType { name: "struct variant" })
-    }
-}
-
-pub struct MapSerializer<'a, B> {
-    pub schema: &'a Schema,
-    pub document_id: DocumentId,
-    pub update: &'a mut DocumentUpdate,
-    pub tokenizer_builder: &'a B,
-    pub stop_words: &'a HashSet<String>,
-    pub current_key_name: Option<String>,
-}
-
-impl<'a, B> ser::SerializeMap for MapSerializer<'a, B>
-where B: TokenizerBuilder
-{
-    type Ok = ();
-    type Error = SerializerError;
-
-    fn serialize_key<T: ?Sized>(&mut self, key: &T) -> Result<(), Self::Error>
-    where T: Serialize,
-    {
-        let key = key.serialize(KeyToStringSerializer)?;
-        self.current_key_name = Some(key);
-        Ok(())
-    }
-
-    fn serialize_value<T: ?Sized>(&mut self, value: &T) -> Result<(), Self::Error>
-    where T: Serialize,
-    {
-        let key = self.current_key_name.take().unwrap();
-        self.serialize_entry(&key, value)
-    }
-
-    fn serialize_entry<K: ?Sized, V: ?Sized>(
-        &mut self,
-        key: &K,
-        value: &V
-    ) -> Result<(), Self::Error>
-    where K: Serialize, V: Serialize,
-    {
-        let key = key.serialize(KeyToStringSerializer)?;
-
-        if let Some(attr) = self.schema.attribute(key) {
-            let props = self.schema.props(attr);
-            if props.is_stored() {
-                let value = bincode::serialize(value).unwrap();
-                self.update.insert_attribute_value(attr, value);
-            }
-            if props.is_indexed() {
-                let serializer = IndexerSerializer {
-                    update: self.update,
-                    tokenizer_builder: self.tokenizer_builder,
-                    document_id: self.document_id,
-                    attribute: attr,
-                    stop_words: self.stop_words,
-                };
-                value.serialize(serializer)?;
-            }
-        }
-
-        Ok(())
-    }
-
-    fn end(self) -> Result<Self::Ok, Self::Error> {
-        Ok(())
-    }
-}
-
-pub struct StructSerializer<'a, B> {
-    pub schema: &'a Schema,
-    pub document_id: DocumentId,
-    pub update: &'a mut DocumentUpdate,
-    pub tokenizer_builder: &'a B,
-    pub stop_words: &'a HashSet<String>,
-}
-
-impl<'a, B> ser::SerializeStruct for StructSerializer<'a, B>
-where B: TokenizerBuilder
-{
-    type Ok = ();
-    type Error = SerializerError;
-
-    fn serialize_field<T: ?Sized>(
-        &mut self,
-        key: &'static str,
-        value: &T
-    ) -> Result<(), Self::Error>
-    where T: Serialize,
-    {
-        if let Some(attr) = self.schema.attribute(key) {
-            let props = self.schema.props(attr);
-            if props.is_stored() {
-                let value = bincode::serialize(value).unwrap();
-                self.update.insert_attribute_value(attr, value);
-            }
-            if props.is_indexed() {
-                let serializer = IndexerSerializer {
-                    update: self.update,
-                    tokenizer_builder: self.tokenizer_builder,
-                    document_id: self.document_id,
-                    attribute: attr,
-                    stop_words: self.stop_words,
-                };
-                value.serialize(serializer)?;
-            }
-        }
-
-        Ok(())
-    }
-
-    fn end(self) -> Result<Self::Ok, Self::Error> {
-        Ok(())
-    }
-}
--- a/src/database/update/builder.rs
+++ b/src/database/update/builder.rs
@ -1,64 +0,0 @@
-use std::path::PathBuf;
-use std::error::Error;
-
-use hashbrown::HashSet;
-use serde::Serialize;
-
-use crate::database::serde::serializer::Serializer;
-use crate::database::serde::SerializerError;
-use crate::tokenizer::TokenizerBuilder;
-use crate::database::Schema;
-
-use crate::DocumentId;
-use super::{Update, RawUpdateBuilder};
-
-pub struct UpdateBuilder {
-    schema: Schema,
-    raw_builder: RawUpdateBuilder,
-}
-
-impl UpdateBuilder {
-    pub fn new(path: PathBuf, schema: Schema) -> UpdateBuilder {
-        UpdateBuilder {
-            schema: schema,
-            raw_builder: RawUpdateBuilder::new(path),
-        }
-    }
-
-    pub fn update_document<T, B>(
-        &mut self,
-        document: T,
-        tokenizer_builder: &B,
-        stop_words: &HashSet<String>,
-    ) -> Result<DocumentId, SerializerError>
-    where T: Serialize,
-          B: TokenizerBuilder,
-    {
-        let document_id = self.schema.document_id(&document)?;
-        let update = self.raw_builder.document_update(document_id);
-
-        let serializer = Serializer {
-            schema: &self.schema,
-            document_id: document_id,
-            tokenizer_builder: tokenizer_builder,
-            update: update,
-            stop_words: stop_words,
-        };
-
-        document.serialize(serializer)?;
-
-        Ok(document_id)
-    }
-
-    pub fn remove_document<T>(&mut self, document: T) -> Result<DocumentId, SerializerError>
-    where T: Serialize,
-    {
-        let document_id = self.schema.document_id(&document)?;
-        self.raw_builder.document_update(document_id).remove();
-        Ok(document_id)
-    }
-
-    pub fn build(self) -> Result<Update, Box<Error>> {
-        self.raw_builder.build()
-    }
-}
--- a/src/database/update/mod.rs
+++ b/src/database/update/mod.rs
@ -1,17 +0,0 @@
-use std::path::{Path, PathBuf};
-
-mod builder;
-mod raw_builder;
-
-pub use self::builder::UpdateBuilder;
-pub use self::raw_builder::{RawUpdateBuilder, DocumentUpdate};
-
-pub struct Update {
-    sst_file: PathBuf,
-}
-
-impl Update {
-    pub fn path(&self) -> &Path {
-        &self.sst_file
-    }
-}
--- a/src/database/update/raw_builder.rs
+++ b/src/database/update/raw_builder.rs
@ -1,168 +0,0 @@
-use std::collections::btree_map::{BTreeMap, Entry};
-use std::path::PathBuf;
-use std::error::Error;
-
-use rocksdb::rocksdb_options;
-use hashbrown::HashMap;
-use fst::map::Map;
-use sdset::Set;
-
-use crate::database::index::{Index, Positive, PositiveBuilder, Negative};
-use crate::database::{DATA_INDEX, DocumentKeyAttr};
-use crate::database::schema::SchemaAttr;
-use crate::data::{DocIds, DocIndexes};
-use crate::{DocumentId, DocIndex};
-use super::Update;
-
-type Token = Vec<u8>; // TODO could be replaced by a SmallVec
-type Value = Vec<u8>;
-
-pub struct RawUpdateBuilder {
-    sst_file: PathBuf,
-    document_updates: BTreeMap<DocumentId, DocumentUpdate>,
-}
-
-pub struct DocumentUpdate {
-    cleared: bool,
-    words_indexes: HashMap<Token, Vec<DocIndex>>,
-    attributes: BTreeMap<SchemaAttr, Value>,
-}
-
-impl DocumentUpdate {
-    pub fn new() -> DocumentUpdate {
-        DocumentUpdate {
-            cleared: false,
-            words_indexes: HashMap::new(),
-            attributes: BTreeMap::new(),
-        }
-    }
-
-    pub fn remove(&mut self) {
-        self.cleared = true;
-        self.clear();
-    }
-
-    pub fn clear(&mut self) {
-        self.words_indexes.clear();
-        self.attributes.clear();
-    }
-
-    pub fn insert_attribute_value(&mut self, attr: SchemaAttr, value: Vec<u8>) {
-        self.attributes.insert(attr, value);
-    }
-
-    pub fn insert_doc_index(&mut self, token: Vec<u8>, doc_index: DocIndex) {
-        self.words_indexes.entry(token).or_insert_with(Vec::new).push(doc_index)
-    }
-}
-
-impl RawUpdateBuilder {
-    pub fn new(path: PathBuf) -> RawUpdateBuilder {
-        RawUpdateBuilder {
-            sst_file: path,
-            document_updates: BTreeMap::new(),
-        }
-    }
-
-    pub fn document_update(&mut self, document_id: DocumentId) -> &mut DocumentUpdate {
-        match self.document_updates.entry(document_id) {
-            Entry::Occupied(mut occupied) => {
-                occupied.get_mut().clear();
-                occupied.into_mut()
-            },
-            Entry::Vacant(vacant) => vacant.insert(DocumentUpdate::new()),
-        }
-    }
-
-    pub fn build(mut self) -> Result<Update, Box<Error>> {
-        let mut removed_document_ids = Vec::new();
-        let mut words_indexes = BTreeMap::new();
-
-        for (&id, update) in self.document_updates.iter_mut() {
-            if update.cleared { removed_document_ids.push(id) }
-
-            for (token, indexes) in &update.words_indexes {
-                words_indexes.entry(token).or_insert_with(Vec::new).extend_from_slice(indexes)
-            }
-        }
-
-        let negative = {
-            let removed_document_ids = Set::new_unchecked(&removed_document_ids);
-            let doc_ids = DocIds::new(removed_document_ids);
-            Negative::new(doc_ids)
-        };
-
-        let positive = {
-            let mut positive_builder = PositiveBuilder::memory();
-
-            for (key, mut indexes) in words_indexes {
-                indexes.sort_unstable();
-                let indexes = Set::new_unchecked(&indexes);
-                positive_builder.insert(key, indexes)?;
-            }
-
-            let (map, indexes) = positive_builder.into_inner()?;
-            let map = Map::from_bytes(map)?;
-            let indexes = DocIndexes::from_bytes(indexes)?;
-            Positive::new(map, indexes)
-        };
-
-        let index = Index { negative, positive };
-
-        let env_options = rocksdb_options::EnvOptions::new();
-        let column_family_options = rocksdb_options::ColumnFamilyOptions::new();
-        let mut file_writer = rocksdb::SstFileWriter::new(env_options, column_family_options);
-        file_writer.open(&self.sst_file.to_string_lossy())?;
-
-        // write the data-index
-        let mut bytes = Vec::new();
-        index.write_to_bytes(&mut bytes);
-        file_writer.merge(DATA_INDEX, &bytes)?;
-
-        // write all the documents attributes updates
-        for (id, update) in self.document_updates {
-
-            let mut last_attr: Option<SchemaAttr> = None;
-            for (attr, value) in update.attributes {
-
-                if update.cleared {
-                    // if there is no last attribute, remove from the first attribute
-                    let start_attr = match last_attr {
-                        Some(attr) => attr.next(),
-                        None       => Some(SchemaAttr::min())
-                    };
-                    let start = start_attr.map(|a| DocumentKeyAttr::new(id, a));
-                    let end = attr.prev().map(|a| DocumentKeyAttr::new(id, a));
-
-                    // delete_range between (last_attr + 1) and (attr - 1)
-                    if let (Some(start), Some(end)) = (start, end) {
-                        file_writer.delete_range(start.as_ref(), end.as_ref())?;
-                    }
-                }
-
-                let key = DocumentKeyAttr::new(id, attr);
-                file_writer.put(key.as_ref(), &value)?;
-                last_attr = Some(attr);
-            }
-
-            if update.cleared {
-                // if there is no last attribute, remove from the first attribute
-                let start_attr = match last_attr {
-                    Some(attr) => attr.next(),
-                    None       => Some(SchemaAttr::min())
-                };
-                let start = start_attr.map(|a| DocumentKeyAttr::new(id, a));
-                let end = DocumentKeyAttr::with_attribute_max(id);
-
-                // delete_range between (last_attr + 1) and attr_max
-                if let Some(start) = start {
-                    file_writer.delete_range(start.as_ref(), end.as_ref())?;
-                }
-            }
-        }
-
-        file_writer.finish()?;
-
-        Ok(Update { sst_file: self.sst_file })
-    }
-}
--- a/src/database/view.rs
+++ b/src/database/view.rs
@ -1,174 +0,0 @@
-use std::error::Error;
-use std::path::Path;
-use std::ops::Deref;
-use std::{fmt, marker};
-
-use rocksdb::rocksdb_options::{ReadOptions, EnvOptions, ColumnFamilyOptions};
-use rocksdb::rocksdb::{DB, DBVector, Snapshot, SeekKey, SstFileWriter};
-use serde::de::DeserializeOwned;
-
-use crate::database::{DocumentKey, DocumentKeyAttr};
-use crate::database::{retrieve_data_schema, retrieve_data_index};
-use crate::database::deserializer::Deserializer;
-use crate::database::schema::Schema;
-use crate::database::index::Index;
-use crate::rank::{QueryBuilder, FilterFunc};
-use crate::DocumentId;
-
-pub struct DatabaseView<D>
-where D: Deref<Target=DB>
-{
-    snapshot: Snapshot<D>,
-    index: Index,
-    schema: Schema,
-}
-
-impl<D> DatabaseView<D>
-where D: Deref<Target=DB>
-{
-    pub fn new(snapshot: Snapshot<D>) -> Result<DatabaseView<D>, Box<Error>> {
-        let schema = retrieve_data_schema(&snapshot)?;
-        let index = retrieve_data_index(&snapshot)?;
-        Ok(DatabaseView { snapshot, index, schema })
-    }
-
-    pub fn schema(&self) -> &Schema {
-        &self.schema
-    }
-
-    pub fn index(&self) -> &Index {
-        &self.index
-    }
-
-    pub fn into_snapshot(self) -> Snapshot<D> {
-        self.snapshot
-    }
-
-    pub fn snapshot(&self) -> &Snapshot<D> {
-        &self.snapshot
-    }
-
-    pub fn get(&self, key: &[u8]) -> Result<Option<DBVector>, Box<Error>> {
-        Ok(self.snapshot.get(key)?)
-    }
-
-    pub fn dump_all<P: AsRef<Path>>(&self, path: P) -> Result<(), Box<Error>> {
-        let path = path.as_ref().to_string_lossy();
-
-        let env_options = EnvOptions::new();
-        let column_family_options = ColumnFamilyOptions::new();
-        let mut file_writer = SstFileWriter::new(env_options, column_family_options);
-        file_writer.open(&path)?;
-
-        let mut iter = self.snapshot.iter();
-        iter.seek(SeekKey::Start);
-
-        for (key, value) in &mut iter {
-            file_writer.put(&key, &value)?;
-        }
-
-        file_writer.finish()?;
-        Ok(())
-    }
-
-    pub fn query_builder(&self) -> Result<QueryBuilder<D, FilterFunc<D>>, Box<Error>> {
-        QueryBuilder::new(self)
-    }
-
-    pub fn document_by_id<T>(&self, id: DocumentId) -> Result<T, Box<Error>>
-    where T: DeserializeOwned
-    {
-        let mut deserializer = Deserializer::new(&self.snapshot, &self.schema, id);
-        Ok(T::deserialize(&mut deserializer)?)
-    }
-
-    pub fn documents_by_id<T, I>(&self, ids: I) -> DocumentIter<D, T, I::IntoIter>
-    where T: DeserializeOwned,
-          I: IntoIterator<Item=DocumentId>,
-    {
-        DocumentIter {
-            database_view: self,
-            document_ids: ids.into_iter(),
-            _phantom: marker::PhantomData,
-        }
-    }
-}
-
-impl<D> fmt::Debug for DatabaseView<D>
-where D: Deref<Target=DB>
-{
-    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
-        let mut options = ReadOptions::new();
-        let lower = DocumentKey::new(DocumentId(0));
-        options.set_iterate_lower_bound(lower.as_ref());
-
-        let mut iter = self.snapshot.iter_opt(options);
-        iter.seek(SeekKey::Start);
-        let iter = iter.map(|(key, _)| DocumentKeyAttr::from_bytes(&key));
-
-        if f.alternate() {
-            writeln!(f, "DatabaseView(")?;
-        } else {
-            write!(f, "DatabaseView(")?;
-        }
-
-        self.schema.fmt(f)?;
-
-        if f.alternate() {
-            writeln!(f, ",")?;
-        } else {
-            write!(f, ", ")?;
-        }
-
-        f.debug_list().entries(iter).finish()?;
-
-        write!(f, ")")
-    }
-}
-
-// TODO this is just an iter::Map !!!
-pub struct DocumentIter<'a, D, T, I>
-where D: Deref<Target=DB>
-{
-    database_view: &'a DatabaseView<D>,
-    document_ids: I,
-    _phantom: marker::PhantomData<T>,
-}
-
-impl<'a, D, T, I> Iterator for DocumentIter<'a, D, T, I>
-where D: Deref<Target=DB>,
-      T: DeserializeOwned,
-      I: Iterator<Item=DocumentId>,
-{
-    type Item = Result<T, Box<Error>>;
-
-    fn size_hint(&self) -> (usize, Option<usize>) {
-        self.document_ids.size_hint()
-    }
-
-    fn next(&mut self) -> Option<Self::Item> {
-        match self.document_ids.next() {
-            Some(id) => Some(self.database_view.document_by_id(id)),
-            None => None
-        }
-    }
-}
-
-impl<'a, D, T, I> ExactSizeIterator for DocumentIter<'a, D, T, I>
-where D: Deref<Target=DB>,
-      T: DeserializeOwned,
-      I: ExactSizeIterator + Iterator<Item=DocumentId>,
-{ }
-
-impl<'a, D, T, I> DoubleEndedIterator for DocumentIter<'a, D, T, I>
-where D: Deref<Target=DB>,
-      T: DeserializeOwned,
-      I: DoubleEndedIterator + Iterator<Item=DocumentId>,
-{
-    fn next_back(&mut self) -> Option<Self::Item> {
-        match self.document_ids.next_back() {
-            Some(id) => Some(self.database_view.document_by_id(id)),
-            None => None
-        }
-    }
-}
--- a/src/lib.rs
+++ b/src/lib.rs
@ -1,115 +0,0 @@
-#![cfg_attr(feature = "nightly", feature(test))]
-
-pub mod automaton;
-pub mod database;
-pub mod data;
-pub mod rank;
-pub mod tokenizer;
-mod attribute;
-mod word_area;
-mod common_words;
-
-pub use rocksdb;
-
-pub use self::tokenizer::Tokenizer;
-pub use self::common_words::CommonWords;
-pub use self::attribute::{Attribute, AttributeError};
-pub use self::word_area::{WordArea, WordAreaError};
-
-/// Represent an internally generated document unique identifier.
-///
-/// It is used to inform the database the document you want to deserialize.
-/// Helpful for custom ranking.
-#[derive(Debug, Copy, Clone, Eq, PartialEq, PartialOrd, Ord, Hash)]
-pub struct DocumentId(u64);
-
-/// This structure represent the position of a word
-/// in a document and its attributes.
-///
-/// This is stored in the map, generated at index time,
-/// extracted and interpreted at search time.
-#[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
-#[repr(C)]
-pub struct DocIndex {
-    /// The document identifier where the word was found.
-    pub document_id: DocumentId,
-
-    /// The attribute in the document where the word was found
-    /// along with the index in it.
-    pub attribute: Attribute,
-
-    /// The position in bytes where the word was found
-    /// along with the length of it.
-    ///
-    /// It informs on the original word area in the text indexed
-    /// without needing to run the tokenizer again.
-    pub word_area: WordArea,
-}
-
-/// This structure represent a matching word with informations
-/// on the location of the word in the document.
-///
-/// The order of the field is important because it defines
-/// the way these structures are ordered between themselves.
-///
-/// The word in itself is not important.
-// TODO do data oriented programming ? very arrays ?
-#[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
-pub struct Match {
-    /// The word index in the query sentence.
-    /// Same as the `attribute_index` but for the query words.
-    ///
-    /// Used to retrieve the automaton that match this word.
-    pub query_index: u32,
-
-    /// The distance the word has with the query word
-    /// (i.e. the Levenshtein distance).
-    pub distance: u8,
-
-    /// The attribute in the document where the word was found
-    /// along with the index in it.
-    pub attribute: Attribute,
-
-    /// Whether the word that match is an exact match or a prefix.
-    pub is_exact: bool,
-
-    /// The position in bytes where the word was found
-    /// along with the length of it.
-    ///
-    /// It informs on the original word area in the text indexed
-    /// without needing to run the tokenizer again.
-    pub word_area: WordArea,
-}
-
-impl Match {
-    pub fn zero() -> Self {
-        Match {
-            query_index: 0,
-            distance: 0,
-            attribute: Attribute::new_faillible(0, 0),
-            is_exact: false,
-            word_area: WordArea::new_faillible(0, 0),
-        }
-    }
-
-    pub fn max() -> Self {
-        Match {
-            query_index: u32::max_value(),
-            distance: u8::max_value(),
-            attribute: Attribute::max_value(),
-            is_exact: true,
-            word_area: WordArea::max_value(),
-        }
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-    use std::mem;
-
-    #[test]
-    fn docindex_mem_size() {
-        assert_eq!(mem::size_of::<DocIndex>(), 16);
-    }
-}
--- a/src/rank/criterion/document_id.rs
+++ b/src/rank/criterion/document_id.rs
@ -1,19 +0,0 @@
-use std::cmp::Ordering;
-use std::ops::Deref;
-
-use rocksdb::DB;
-
-use crate::rank::criterion::Criterion;
-use crate::database::DatabaseView;
-use crate::rank::Document;
-
-#[derive(Debug, Clone, Copy)]
-pub struct DocumentId;
-
-impl<D> Criterion<D> for DocumentId
-where D: Deref<Target=DB>
-{
-    fn evaluate(&self, lhs: &Document, rhs: &Document, _: &DatabaseView<D>) -> Ordering {
-        lhs.id.cmp(&rhs.id)
-    }
-}
--- a/src/rank/criterion/exact.rs
+++ b/src/rank/criterion/exact.rs
@ -1,34 +0,0 @@
-use std::cmp::Ordering;
-use std::ops::Deref;
-
-use rocksdb::DB;
-use group_by::GroupBy;
-
-use crate::rank::{match_query_index, Document};
-use crate::rank::criterion::Criterion;
-use crate::database::DatabaseView;
-use crate::Match;
-
-#[inline]
-fn contains_exact(matches: &&[Match]) -> bool {
-    matches.iter().any(|m| m.is_exact)
-}
-
-#[inline]
-fn number_exact_matches(matches: &[Match]) -> usize {
-    GroupBy::new(matches, match_query_index).filter(contains_exact).count()
-}
-
-#[derive(Debug, Clone, Copy)]
-pub struct Exact;
-
-impl<D> Criterion<D> for Exact
-where D: Deref<Target=DB>
-{
-    fn evaluate(&self, lhs: &Document, rhs: &Document, _: &DatabaseView<D>) -> Ordering {
-        let lhs = number_exact_matches(&lhs.matches);
-        let rhs = number_exact_matches(&rhs.matches);
-
-        lhs.cmp(&rhs).reverse()
-    }
-}
--- a/src/rank/criterion/mod.rs
+++ b/src/rank/criterion/mod.rs
@ -1,132 +0,0 @@
-mod sum_of_typos;
-mod number_of_words;
-mod words_proximity;
-mod sum_of_words_attribute;
-mod sum_of_words_position;
-mod exact;
-mod sort_by;
-mod document_id;
-
-use std::cmp::Ordering;
-use std::ops::Deref;
-
-use rocksdb::DB;
-
-use crate::database::DatabaseView;
-use crate::rank::Document;
-
-pub use self::{
-    sum_of_typos::SumOfTypos,
-    number_of_words::NumberOfWords,
-    words_proximity::WordsProximity,
-    sum_of_words_attribute::SumOfWordsAttribute,
-    sum_of_words_position::SumOfWordsPosition,
-    exact::Exact,
-    sort_by::SortBy,
-    document_id::DocumentId,
-};
-
-pub trait Criterion<D>
-where D: Deref<Target=DB>
-{
-    fn evaluate(&self, lhs: &Document, rhs: &Document, view: &DatabaseView<D>) -> Ordering;
-
-    #[inline]
-    fn eq(&self, lhs: &Document, rhs: &Document, view: &DatabaseView<D>) -> bool {
-        self.evaluate(lhs, rhs, view) == Ordering::Equal
-    }
-}
-
-impl<'a, D, T: Criterion<D> + ?Sized> Criterion<D> for &'a T
-where D: Deref<Target=DB>
-{
-    fn evaluate(&self, lhs: &Document, rhs: &Document, view: &DatabaseView<D>) -> Ordering {
-        (**self).evaluate(lhs, rhs, view)
-    }
-
-    fn eq(&self, lhs: &Document, rhs: &Document, view: &DatabaseView<D>) -> bool {
-        (**self).eq(lhs, rhs, view)
-    }
-}
-
-impl<D, T: Criterion<D> + ?Sized> Criterion<D> for Box<T>
-where D: Deref<Target=DB>
-{
-    fn evaluate(&self, lhs: &Document, rhs: &Document, view: &DatabaseView<D>) -> Ordering {
-        (**self).evaluate(lhs, rhs, view)
-    }
-
-    fn eq(&self, lhs: &Document, rhs: &Document, view: &DatabaseView<D>) -> bool {
-        (**self).eq(lhs, rhs, view)
-    }
-}
-
-#[derive(Default)]
-pub struct CriteriaBuilder<D>
-where D: Deref<Target=DB>
-{
-    inner: Vec<Box<dyn Criterion<D>>>
-}
-
-impl<D> CriteriaBuilder<D>
-where D: Deref<Target=DB>
-{
-    pub fn new() -> CriteriaBuilder<D> {
-        CriteriaBuilder { inner: Vec::new() }
-    }
-
-    pub fn with_capacity(capacity: usize) -> CriteriaBuilder<D> {
-        CriteriaBuilder { inner: Vec::with_capacity(capacity) }
-    }
-
-    pub fn reserve(&mut self, additional: usize) {
-        self.inner.reserve(additional)
-    }
-
-    pub fn add<C>(mut self, criterion: C) -> CriteriaBuilder<D>
-    where C: 'static + Criterion<D>,
-    {
-        self.push(criterion);
-        self
-    }
-
-    pub fn push<C>(&mut self, criterion: C)
-    where C: 'static + Criterion<D>,
-    {
-        self.inner.push(Box::new(criterion));
-    }
-
-    pub fn build(self) -> Criteria<D> {
-        Criteria { inner: self.inner }
-    }
-}
-
-pub struct Criteria<D>
-where D: Deref<Target=DB>
-{
-    inner: Vec<Box<dyn Criterion<D>>>,
-}
-
-impl<D> Default for Criteria<D>
-where D: Deref<Target=DB>
-{
-    fn default() -> Self {
-        CriteriaBuilder::with_capacity(7)
-            .add(SumOfTypos)
-            .add(NumberOfWords)
-            .add(WordsProximity)
-            .add(SumOfWordsAttribute)
-            .add(SumOfWordsPosition)
-            .add(Exact)
-            .add(DocumentId)
-            .build()
-    }
-}
-
-impl<D> AsRef<[Box<dyn Criterion<D>>]> for Criteria<D>
-where D: Deref<Target=DB>
-{
-    fn as_ref(&self) -> &[Box<dyn Criterion<D>>] {
-        &self.inner
-    }
-}
--- a/src/rank/criterion/number_of_words.rs
+++ b/src/rank/criterion/number_of_words.rs
@ -1,29 +0,0 @@
-use std::cmp::Ordering;
-use std::ops::Deref;
-
-use rocksdb::DB;
-use group_by::GroupBy;
-
-use crate::rank::{match_query_index, Document};
-use crate::rank::criterion::Criterion;
-use crate::database::DatabaseView;
-use crate::Match;
-
-#[inline]
-fn number_of_query_words(matches: &[Match]) -> usize {
-    GroupBy::new(matches, match_query_index).count()
-}
-
-#[derive(Debug, Clone, Copy)]
-pub struct NumberOfWords;
-
-impl<D> Criterion<D> for NumberOfWords
-where D: Deref<Target=DB>
-{
-    fn evaluate(&self, lhs: &Document, rhs: &Document, _: &DatabaseView<D>) -> Ordering {
-        let lhs = number_of_query_words(&lhs.matches);
-        let rhs = number_of_query_words(&rhs.matches);
-
-        lhs.cmp(&rhs).reverse()
-    }
-}
--- a/src/rank/criterion/sort_by.rs
+++ b/src/rank/criterion/sort_by.rs
@ -1,82 +0,0 @@
-use std::cmp::Ordering;
-use std::ops::Deref;
-use std::marker;
-
-use rocksdb::DB;
-use serde::de::DeserializeOwned;
-
-use crate::rank::criterion::Criterion;
-use crate::database::DatabaseView;
-use crate::rank::Document;
-
-/// An helper struct that permit to sort documents by
-/// some of their stored attributes.
-///
-/// # Note
-///
-/// If a document cannot be deserialized it will be considered [`None`][].
-///
-/// Deserialized documents are compared like `Some(doc0).cmp(&Some(doc1))`,
-/// so you must check the [`Ord`] of `Option` implementation.
-///
-/// [`None`]: https://doc.rust-lang.org/std/option/enum.Option.html#variant.None
-/// [`Ord`]: https://doc.rust-lang.org/std/option/enum.Option.html#impl-Ord
-///
-/// # Example
-///
-/// ```no-test
-/// use serde_derive::Deserialize;
-/// use meilidb::rank::criterion::*;
-///
-/// #[derive(Deserialize, PartialOrd, Ord, PartialEq, Eq)]
-/// struct TimeOnly {
-///     time: String,
-/// }
-///
-/// let builder = CriteriaBuilder::with_capacity(8)
-///        .add(SumOfTypos)
-///        .add(NumberOfWords)
-///        .add(WordsProximity)
-///        .add(SumOfWordsAttribute)
-///        .add(SumOfWordsPosition)
-///        .add(Exact)
-///        .add(SortBy::<TimeOnly>::new())
-///        .add(DocumentId);
-///
-/// let criterion = builder.build();
-///
-/// ```
-pub struct SortBy<T> {
-    _phantom: marker::PhantomData<T>,
-}
-
-impl<T> SortBy<T> {
-    pub fn new() -> Self {
-        SortBy::default()
-    }
-}
-
-impl<T> Default for SortBy<T> {
-    fn default() -> SortBy<T> {
-        SortBy { _phantom: marker::PhantomData }
-    }
-}
-
-impl<T, D> Criterion<D> for SortBy<T>
-where D: Deref<Target=DB>,
-      T: DeserializeOwned + Ord,
-{
-    fn evaluate(&self, lhs: &Document, rhs: &Document, view: &DatabaseView<D>) -> Ordering {
-        let lhs = match view.document_by_id::<T>(lhs.id) {
-            Ok(doc) => Some(doc),
-            Err(e) => { eprintln!("{}", e); None },
-        };
-
-        let rhs = match view.document_by_id::<T>(rhs.id) {
-            Ok(doc) => Some(doc),
-            Err(e) => { eprintln!("{}", e); None },
-        };
-
-        lhs.cmp(&rhs)
-    }
-}
--- a/src/rank/criterion/sum_of_typos.rs
+++ b/src/rank/criterion/sum_of_typos.rs
@ -1,205 +0,0 @@
-use std::cmp::Ordering;
-use std::ops::Deref;
-
-use rocksdb::DB;
-
-use group_by::GroupBy;
-
-use crate::rank::{match_query_index, Document};
-use crate::rank::criterion::Criterion;
-use crate::database::DatabaseView;
-use crate::Match;
-
-#[inline]
-fn sum_matches_typos(matches: &[Match]) -> isize {
-    let mut sum_typos = 0;
-    let mut number_words = 0;
-
-    // note that GroupBy will never return an empty group
-    // so we can do this assumption safely
-    for group in GroupBy::new(matches, match_query_index) {
-        sum_typos += unsafe { group.get_unchecked(0).distance as isize };
-        number_words += 1;
-    }
-
-    sum_typos - number_words
-}
-
-#[derive(Debug, Clone, Copy)]
-pub struct SumOfTypos;
-
-impl<D> Criterion<D> for SumOfTypos
-where D: Deref<Target=DB>
-{
-    fn evaluate(&self, lhs: &Document, rhs: &Document, _: &DatabaseView<D>) -> Ordering {
-        let lhs = sum_matches_typos(&lhs.matches);
-        let rhs = sum_matches_typos(&rhs.matches);
-
-        lhs.cmp(&rhs)
-    }
-}
-
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    use crate::{DocumentId, Attribute, WordArea};
-
-    // typing: "Geox CEO"
-    //
-    // doc0: "Geox SpA: CEO and Executive"
-    // doc1: "Mt. Gox CEO Resigns From Bitcoin Foundation"
-    #[test]
-    fn one_typo_reference() {
-        let doc0 = {
-            let matches = vec![
-                Match {
-                    query_index: 0,
-                    distance: 0,
-                    attribute: Attribute::new_faillible(0, 0),
-                    is_exact: false,
-                    word_area: WordArea::new_faillible(0, 6)
-                },
-                Match {
-                    query_index: 1,
-                    distance: 0,
-                    attribute: Attribute::new_faillible(0, 2),
-                    is_exact: false,
-                    word_area: WordArea::new_faillible(0, 6)
-                },
-            ];
-            Document {
-                id: DocumentId(0),
-                matches: matches,
-            }
-        };
-
-        let doc1 = {
-            let matches = vec![
-                Match {
-                    query_index: 0,
-                    distance: 1,
-                    attribute: Attribute::new_faillible(0, 0),
-                    is_exact: false,
-                    word_area: WordArea::new_faillible(0, 6)
-                },
-                Match {
-                    query_index: 1,
-                    distance: 0,
-                    attribute: Attribute::new_faillible(0, 2),
-                    is_exact: false,
-                    word_area: WordArea::new_faillible(0, 6)
-                },
-            ];
-            Document {
-                id: DocumentId(1),
-                matches: matches,
-            }
-        };
-
-        let lhs = sum_matches_typos(&doc0.matches);
-        let rhs = sum_matches_typos(&doc1.matches);
-        assert_eq!(lhs.cmp(&rhs), Ordering::Less);
-    }
-
-    // typing: "bouton manchette"
-    //
-    // doc0: "bouton manchette"
-    // doc1: "bouton"
-    #[test]
-    fn no_typo() {
-        let doc0 = {
-            let matches = vec![
-                Match {
-                    query_index: 0,
-                    distance: 0,
-                    attribute: Attribute::new_faillible(0, 0),
-                    is_exact: false,
-                    word_area: WordArea::new_faillible(0, 6)
-                },
-                Match {
-                    query_index: 1,
-                    distance: 0,
-                    attribute: Attribute::new_faillible(0, 1),
-                    is_exact: false,
-                    word_area: WordArea::new_faillible(0, 6)
-                },
-            ];
-            Document {
-                id: DocumentId(0),
-                matches: matches,
-            }
-        };
-
-        let doc1 = {
-            let matches = vec![
-                Match {
-                    query_index: 0,
-                    distance: 0,
-                    attribute: Attribute::new_faillible(0, 0),
-                    is_exact: false,
-                    word_area: WordArea::new_faillible(0, 6)
-                },
-            ];
-            Document {
-                id: DocumentId(1),
-                matches: matches,
-            }
-        };
-
-        let lhs = sum_matches_typos(&doc0.matches);
-        let rhs = sum_matches_typos(&doc1.matches);
-        assert_eq!(lhs.cmp(&rhs), Ordering::Less);
-    }
-
-    // typing: "bouton manchztte"
-    //
-    // doc0: "bouton manchette"
-    // doc1: "bouton"
-    #[test]
-    fn one_typo() {
-        let doc0 = {
-            let matches = vec![
-                Match {
-                    query_index: 0,
-                    distance: 0,
-                    attribute: Attribute::new_faillible(0, 0),
-                    is_exact: false,
-                    word_area: WordArea::new_faillible(0, 6)
-                },
-                Match {
-                    query_index: 1,
-                    distance: 1,
-                    attribute: Attribute::new_faillible(0, 1),
-                    is_exact: false,
-                    word_area: WordArea::new_faillible(0, 6)
-                },
-            ];
-            Document {
-                id: DocumentId(0),
-                matches: matches,
-            }
-        };
-
-        let doc1 = {
-            let matches = vec![
-                Match {
-                    query_index: 0,
-                    distance: 0,
-                    attribute: Attribute::new_faillible(0, 0),
-                    is_exact: false,
-                    word_area: WordArea::new_faillible(0, 6)
-                },
-            ];
-            Document {
-                id: DocumentId(1),
-                matches: matches,
-            }
-        };
-
-        let lhs = sum_matches_typos(&doc0.matches);
-        let rhs = sum_matches_typos(&doc1.matches);
-        assert_eq!(lhs.cmp(&rhs), Ordering::Equal);
-    }
-}
--- a/src/rank/criterion/sum_of_words_attribute.rs
+++ b/src/rank/criterion/sum_of_words_attribute.rs
@ -1,33 +0,0 @@
-use std::cmp::Ordering;
-use std::ops::Deref;
-
-use rocksdb::DB;
-use group_by::GroupBy;
-
-use crate::database::DatabaseView;
-use crate::rank::{match_query_index, Document};
-use crate::rank::criterion::Criterion;
-use crate::Match;
-
-#[inline]
-fn sum_matches_attributes(matches: &[Match]) -> usize {
-    // note that GroupBy will never return an empty group
-    // so we can do this assumption safely
-    GroupBy::new(matches, match_query_index).map(|group| {
-        unsafe { group.get_unchecked(0).attribute.attribute() as usize }
-    }).sum()
-}
-
-#[derive(Debug, Clone, Copy)]
-pub struct SumOfWordsAttribute;
-
-impl<D> Criterion<D> for SumOfWordsAttribute
-where D: Deref<Target=DB>
-{
-    fn evaluate(&self, lhs: &Document, rhs: &Document, _: &DatabaseView<D>) -> Ordering {
-        let lhs = sum_matches_attributes(&lhs.matches);
-        let rhs = sum_matches_attributes(&rhs.matches);
-
-        lhs.cmp(&rhs)
-    }
-}
--- a/src/rank/criterion/sum_of_words_position.rs
+++ b/src/rank/criterion/sum_of_words_position.rs
@ -1,33 +0,0 @@
-use std::cmp::Ordering;
-use std::ops::Deref;
-
-use rocksdb::DB;
-use group_by::GroupBy;
-
-use crate::database::DatabaseView;
-use crate::rank::{match_query_index, Document};
-use crate::rank::criterion::Criterion;
-use crate::Match;
-
-#[inline]
-fn sum_matches_attribute_index(matches: &[Match]) -> usize {
-    // note that GroupBy will never return an empty group
-    // so we can do this assumption safely
-    GroupBy::new(matches, match_query_index).map(|group| {
-        unsafe { group.get_unchecked(0).attribute.word_index() as usize }
-    }).sum()
-}
-
-#[derive(Debug, Clone, Copy)]
-pub struct SumOfWordsPosition;
-
-impl<D> Criterion<D> for SumOfWordsPosition
-where D: Deref<Target=DB>
-{
-    fn evaluate(&self, lhs: &Document, rhs: &Document, _: &DatabaseView<D>) -> Ordering {
-        let lhs = sum_matches_attribute_index(&lhs.matches);
-        let rhs = sum_matches_attribute_index(&rhs.matches);
-
-        lhs.cmp(&rhs)
-    }
-}
--- a/src/rank/criterion/words_proximity.rs
+++ b/src/rank/criterion/words_proximity.rs
@ -1,162 +0,0 @@
-use std::cmp::{self, Ordering};
-use std::ops::Deref;
-
-use rocksdb::DB;
-use group_by::GroupBy;
-
-use crate::rank::{match_query_index, Document};
-use crate::rank::criterion::Criterion;
-use crate::database::DatabaseView;
-use crate::Match;
-
-const MAX_DISTANCE: u32 = 8;
-
-fn index_proximity(lhs: u32, rhs: u32) -> u32 {
-    if lhs < rhs {
-        cmp::min(rhs - lhs, MAX_DISTANCE)
-    } else {
-        cmp::min(lhs - rhs, MAX_DISTANCE) + 1
-    }
-}
-
-fn attribute_proximity(lhs: &Match, rhs: &Match) -> u32 {
-    if lhs.attribute.attribute() != rhs.attribute.attribute() { return MAX_DISTANCE }
-    index_proximity(lhs.attribute.word_index(), rhs.attribute.word_index())
-}
-
-fn min_proximity(lhs: &[Match], rhs: &[Match]) -> u32 {
-    let mut min_prox = u32::max_value();
-    for a in lhs {
-        for b in rhs {
-            min_prox = cmp::min(min_prox, attribute_proximity(a, b));
-        }
-    }
-    min_prox
-}
-
-fn matches_proximity(matches: &[Match]) -> u32 {
-    let mut proximity = 0;
-    let mut iter = GroupBy::new(matches, match_query_index);
-
-    // iterate over groups by windows of size 2
-    let mut last = iter.next();
-    while let (Some(lhs), Some(rhs)) = (last, iter.next()) {
-        proximity += min_proximity(lhs, rhs);
-        last = Some(rhs);
-    }
-
-    proximity
-}
-
-#[derive(Debug, Clone, Copy)]
-pub struct WordsProximity;
-
-impl<D> Criterion<D> for WordsProximity
-where D: Deref<Target=DB>
-{
-    fn evaluate(&self, lhs: &Document, rhs: &Document, _: &DatabaseView<D>) -> Ordering {
-        let lhs = matches_proximity(&lhs.matches);
-        let rhs = matches_proximity(&rhs.matches);
-
-        lhs.cmp(&rhs)
-    }
-}
-
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    use crate::Attribute;
-
-    #[test]
-    fn three_different_attributes() {
-
-        // "soup" "of the" "the day"
-        //
-        // { id: 0, attr: 0, attr_index: 0 }
-        // { id: 1, attr: 1, attr_index: 0 }
-        // { id: 2, attr: 1, attr_index: 1 }
-        // { id: 2, attr: 2, attr_index: 0 }
-        // { id: 3, attr: 3, attr_index: 1 }
-
-        let matches = &[
-            Match { query_index: 0, attribute: Attribute::new_faillible(0, 0), ..Match::zero() },
-            Match { query_index: 1, attribute: Attribute::new_faillible(1, 0), ..Match::zero() },
-            Match { query_index: 2, attribute: Attribute::new_faillible(1, 1), ..Match::zero() },
-            Match { query_index: 2, attribute: Attribute::new_faillible(2, 0), ..Match::zero() },
-            Match { query_index: 3, attribute: Attribute::new_faillible(3, 1), ..Match::zero() },
-        ];
-
-        //   soup -> of = 8
-        // + of -> the  = 1
-        // + the -> day = 8 (not 1)
-        assert_eq!(matches_proximity(matches), 17);
-    }
-
-    #[test]
-    fn two_different_attributes() {
-
-        // "soup day" "soup of the day"
-        //
-        // { id: 0, attr: 0, attr_index: 0 }
-        // { id: 0, attr: 1, attr_index: 0 }
-        // { id: 1, attr: 1, attr_index: 1 }
-        // { id: 2, attr: 1, attr_index: 2 }
-        // { id: 3, attr: 0, attr_index: 1 }
-        // { id: 3, attr: 1, attr_index: 3 }
-
-        let matches = &[
-            Match { query_index: 0, attribute: Attribute::new_faillible(0, 0), ..Match::zero() },
-            Match { query_index: 0, attribute: Attribute::new_faillible(1, 0), ..Match::zero() },
-            Match { query_index: 1, attribute: Attribute::new_faillible(1, 1), ..Match::zero() },
-            Match { query_index: 2, attribute: Attribute::new_faillible(1, 2), ..Match::zero() },
-            Match { query_index: 3, attribute: Attribute::new_faillible(0, 1), ..Match::zero() },
-            Match { query_index: 3, attribute: Attribute::new_faillible(1, 3), ..Match::zero() },
-        ];
-
-        //   soup -> of = 1
-        // + of -> the  = 1
-        // + the -> day = 1
-        assert_eq!(matches_proximity(matches), 3);
-    }
-}
-
-#[cfg(all(feature = "nightly", test))]
-mod bench {
-    extern crate test;
-
-    use super::*;
-    use std::error::Error;
-    use self::test::Bencher;
-
-    use rand_xorshift::XorShiftRng;
-    use rand::{Rng, SeedableRng};
-
-    use crate::Attribute;
-
-    #[bench]
-    fn evaluate_proximity(bench: &mut Bencher) -> Result<(), Box<Error>> {
-        let number_matches = 30_000;
-        let mut matches = Vec::with_capacity(number_matches);
-        let mut rng = XorShiftRng::seed_from_u64(42);
-
-        for _ in 0..number_matches {
-            let query_index = rng.gen_range(0, 4);
-
-            let attribute = rng.gen_range(0, 5);
-            let word_index = rng.gen_range(0, 15);
-            let attribute = Attribute::new_faillible(attribute, word_index);
-
-            let match_ = Match { query_index, attribute, ..Match::zero() };
-            matches.push(match_);
-        }
-
-        bench.iter(|| {
-            let proximity = matches_proximity(&matches);
-            test::black_box(move || proximity)
-        });
-
-        Ok(())
-    }
-}
--- a/src/rank/mod.rs
+++ b/src/rank/mod.rs
@ -1,33 +0,0 @@
-pub mod criterion;
-mod query_builder;
-mod distinct_map;
-
-use crate::{Match, DocumentId};
-
-pub use self::query_builder::{FilterFunc, QueryBuilder, DistinctQueryBuilder};
-
-#[inline]
-fn match_query_index(a: &Match, b: &Match) -> bool {
-    a.query_index == b.query_index
-}
-
-#[derive(Debug, Clone)]
-pub struct Document {
-    pub id: DocumentId,
-    pub matches: Vec<Match>,
-}
-
-impl Document {
-    pub fn new(doc: DocumentId, match_: Match) -> Self {
-        unsafe { Self::from_sorted_matches(doc, vec![match_]) }
-    }
-
-    pub fn from_matches(doc: DocumentId, mut matches: Vec<Match>) -> Self {
-        matches.sort_unstable();
-        unsafe { Self::from_sorted_matches(doc, matches) }
-    }
-
-    pub unsafe fn from_sorted_matches(id: DocumentId, matches: Vec<Match>) -> Self {
-        Self { id, matches }
-    }
-}
--- a/src/rank/query_builder.rs
+++ b/src/rank/query_builder.rs
@ -1,301 +0,0 @@
-use std::{cmp, mem, vec, str, char};
-use std::ops::{Deref, Range};
-use std::error::Error;
-use std::hash::Hash;
-use std::rc::Rc;
-
-use group_by::BinaryGroupByMut;
-use hashbrown::HashMap;
-use fst::Streamer;
-use rocksdb::DB;
-use log::info;
-
-use crate::automaton::{self, DfaExt, AutomatonExt};
-use crate::rank::distinct_map::{DistinctMap, BufferedDistinctMap};
-use crate::rank::criterion::Criteria;
-use crate::database::DatabaseView;
-use crate::{Match, DocumentId};
-use crate::rank::Document;
-
-fn split_whitespace_automatons(query: &str) -> Vec<DfaExt> {
-    let has_end_whitespace = query.chars().last().map_or(false, char::is_whitespace);
-    let mut automatons = Vec::new();
-    let mut words = query.split_whitespace().map(str::to_lowercase).peekable();
-
-    while let Some(word) = words.next() {
-        let has_following_word = words.peek().is_some();
-        let lev = if has_following_word || has_end_whitespace {
-            automaton::build_dfa(&word)
-        } else {
-            automaton::build_prefix_dfa(&word)
-        };
-        automatons.push(lev);
-    }
-
-    automatons
-}
-
-pub type FilterFunc<D> = fn(DocumentId, &DatabaseView<D>) -> bool;
-
-pub struct QueryBuilder<'a, D, FI>
-where D: Deref<Target=DB>
-{
-    view: &'a DatabaseView<D>,
-    criteria: Criteria<D>,
-    filter: Option<FI>,
-}
-
-impl<'a, D> QueryBuilder<'a, D, FilterFunc<D>>
-where D: Deref<Target=DB>
-{
-    pub fn new(view: &'a DatabaseView<D>) -> Result<Self, Box<Error>> {
-        QueryBuilder::with_criteria(view, Criteria::default())
-    }
-}
-
-impl<'a, D, FI> QueryBuilder<'a, D, FI>
-where D: Deref<Target=DB>,
-{
-    pub fn with_criteria(view: &'a DatabaseView<D>, criteria: Criteria<D>) -> Result<Self, Box<Error>> {
-        Ok(QueryBuilder { view, criteria, filter: None })
-    }
-
-    pub fn with_filter<F>(self, function: F) -> QueryBuilder<'a, D, F>
-    where F: Fn(DocumentId, &DatabaseView<D>) -> bool,
-    {
-        QueryBuilder {
-            view: self.view,
-            criteria: self.criteria,
-            filter: Some(function)
-        }
-    }
-
-    pub fn with_distinct<F, K>(self, function: F, size: usize) -> DistinctQueryBuilder<'a, D, FI, F>
-    where F: Fn(DocumentId, &DatabaseView<D>) -> Option<K>,
-          K: Hash + Eq,
-    {
-        DistinctQueryBuilder {
-            inner: self,
-            function: function,
-            size: size
-        }
-    }
-
-    fn query_all(&self, query: &str) -> Vec<Document> {
-        let automatons = split_whitespace_automatons(query);
-
-        let mut stream = {
-            let mut op_builder = fst::map::OpBuilder::new();
-            for automaton in &automatons {
-                let stream = self.view.index().positive.map().search(automaton);
-                op_builder.push(stream);
-            }
-            op_builder.union()
-        };
-
-        let mut matches = HashMap::new();
-
-        while let Some((input, indexed_values)) = stream.next() {
-            for iv in indexed_values {
-                let automaton = &automatons[iv.index];
-                let distance = automaton.eval(input).to_u8();
-                let is_exact = distance == 0 && input.len() == automaton.query_len();
-
-                let doc_indexes = &self.view.index().positive.indexes();
-                let doc_indexes = &doc_indexes[iv.value as usize];
-
-                for doc_index in doc_indexes {
-                    let match_ = Match {
-                        query_index: iv.index as u32,
-                        distance: distance,
-                        attribute: doc_index.attribute,
-                        is_exact: is_exact,
-                        word_area: doc_index.word_area,
-                    };
-                    matches.entry(doc_index.document_id).or_insert_with(Vec::new).push(match_);
-                }
-            }
-        }
-
-        info!("{} documents to classify", matches.len());
-
-        matches.into_iter().map(|(i, m)| Document::from_matches(i, m)).collect()
-    }
-}
-
-impl<'a, D, FI> QueryBuilder<'a, D, FI>
-where D: Deref<Target=DB>,
-      FI: Fn(DocumentId, &DatabaseView<D>) -> bool,
-{
-    pub fn query(self, query: &str, range: Range<usize>) -> Vec<Document> {
-        // We give the filtering work to the query distinct builder,
-        // specifying a distinct rule that has no effect.
-        if self.filter.is_some() {
-            let builder = self.with_distinct(|_, _| None as Option<()>, 1);
-            return builder.query(query, range);
-        }
-
-        let mut documents = self.query_all(query);
-        let mut groups = vec![documents.as_mut_slice()];
-        let view = &self.view;
-
-        'criteria: for criterion in self.criteria.as_ref() {
-            let tmp_groups = mem::replace(&mut groups, Vec::new());
-            let mut documents_seen = 0;
-
-            for group in tmp_groups {
-                // if this group does not overlap with the requested range,
-                // push it without sorting and splitting it
-                if documents_seen + group.len() < range.start {
-                    documents_seen += group.len();
-                    groups.push(group);
-                    continue;
-                }
-
-                group.sort_unstable_by(|a, b| criterion.evaluate(a, b, view));
-
-                for group in BinaryGroupByMut::new(group, |a, b| criterion.eq(a, b, view)) {
-                    documents_seen += group.len();
-                    groups.push(group);
-
-                    // we have sort enough documents if the last document sorted is after
-                    // the end of the requested range, we can continue to the next criterion
-                    if documents_seen >= range.end { continue 'criteria }
-                }
-            }
-        }
-
-        // `drain` removes the documents efficiently using `ptr::copy`
-        // TODO it could be more efficient to have a custom iterator
-        let offset = cmp::min(documents.len(), range.start);
-        documents.drain(0..offset);
-        documents.truncate(range.len());
-        documents
-    }
-}
-
-pub struct DistinctQueryBuilder<'a, D, FI, FD>
-where D: Deref<Target=DB>
-{
-    inner: QueryBuilder<'a, D, FI>,
-    function: FD,
-    size: usize,
-}
-
-impl<'a, D, FI, FD> DistinctQueryBuilder<'a, D, FI, FD>
-where D: Deref<Target=DB>,
-{
-    pub fn with_filter<F>(self, function: F) -> DistinctQueryBuilder<'a, D, F, FD>
-    where F: Fn(DocumentId, &DatabaseView<D>) -> bool,
-    {
-        DistinctQueryBuilder {
-            inner: self.inner.with_filter(function),
-            function: self.function,
-            size: self.size
-        }
-    }
-}
-
-impl<'a, D, FI, FD, K> DistinctQueryBuilder<'a, D, FI, FD>
-where D: Deref<Target=DB>,
-      FI: Fn(DocumentId, &DatabaseView<D>) -> bool,
-      FD: Fn(DocumentId, &DatabaseView<D>) -> Option<K>,
-      K: Hash + Eq,
-{
-    pub fn query(self, query: &str, range: Range<usize>) -> Vec<Document> {
-        let mut documents = self.inner.query_all(query);
-        let mut groups = vec![documents.as_mut_slice()];
-        let mut key_cache = HashMap::new();
-        let view = &self.inner.view;
-
-        let mut filter_map = HashMap::new();
-        // these two variables informs on the current distinct map and
-        // on the raw offset of the start of the group where the
-        // range.start bound is located according to the distinct function
-        let mut distinct_map = DistinctMap::new(self.size);
-        let mut distinct_raw_offset = 0;
-
-        'criteria: for criterion in self.inner.criteria.as_ref() {
-            let tmp_groups = mem::replace(&mut groups, Vec::new());
-            let mut buf_distinct = BufferedDistinctMap::new(&mut distinct_map);
-            let mut documents_seen = 0;
-
-            for group in tmp_groups {
-                // if this group does not overlap with the requested range,
-                // push it without sorting and splitting it
-                if documents_seen + group.len() < distinct_raw_offset {
-                    documents_seen += group.len();
-                    groups.push(group);
-                    continue;
-                }
-
-                group.sort_unstable_by(|a, b| criterion.evaluate(a, b, view));
-
-                for group in BinaryGroupByMut::new(group, |a, b| criterion.eq(a, b, view)) {
-                    // we must compute the real distinguished len of this sub-group
-                    for document in group.iter() {
-                        let filter_accepted = match &self.inner.filter {
-                            Some(filter) => {
-                                let entry = filter_map.entry(document.id);
-                                *entry.or_insert_with(|| (filter)(document.id, view))
-                            },
-                            None => true,
-                        };
-
-                        if filter_accepted {
-                            let entry = key_cache.entry(document.id);
-                            let key = entry.or_insert_with(|| (self.function)(document.id, view).map(Rc::new));
-
-                            match key.clone() {
-                                Some(key) => buf_distinct.register(key),
-                                None => buf_distinct.register_without_key(),
-                            };
-                        }
-
-                        // the requested range end is reached: stop computing distinct
-                        if buf_distinct.len() >= range.end { break }
-                    }
-
-                    documents_seen += group.len();
-                    groups.push(group);
-
-                    // if this sub-group does not overlap with the requested range
-                    // we must update the distinct map and its start index
-                    if buf_distinct.len() < range.start {
-                        buf_distinct.transfert_to_internal();
-                        distinct_raw_offset = documents_seen;
-                    }
-
-                    // we have sort enough documents if the last document sorted is after
-                    // the end of the requested range, we can continue to the next criterion
-                    if buf_distinct.len() >= range.end { continue 'criteria }
-                }
-            }
-        }
-
-        let mut out_documents = Vec::with_capacity(range.len());
-        let mut seen = BufferedDistinctMap::new(&mut distinct_map);
-
-        for document in documents.into_iter().skip(distinct_raw_offset) {
-            let filter_accepted = match &self.inner.filter {
-                Some(_) => filter_map.remove(&document.id).expect("BUG: filtered not found"),
-                None => true,
-            };
-
-            if filter_accepted {
-                let key = key_cache.remove(&document.id).expect("BUG: cached key not found");
-                let distinct_accepted = match key {
-                    Some(key) => seen.register(key),
-                    None => seen.register_without_key(),
-                };
-
-                if distinct_accepted && seen.len() > range.start {
-                    out_documents.push(document);
-                    if out_documents.len() == range.len() { break }
-                }
-            }
-        }
-
-        out_documents
-    }
-}
--- a/src/tokenizer/mod.rs
+++ b/src/tokenizer/mod.rs
@ -1,188 +0,0 @@
-use std::mem;
-use self::Separator::*;
-
-pub trait TokenizerBuilder {
-    fn build<'a>(&self, text: &'a str) -> Box<Iterator<Item=Token<'a>> + 'a>;
-}
-
-pub struct DefaultBuilder;
-
-impl DefaultBuilder {
-    pub fn new() -> DefaultBuilder {
-        DefaultBuilder
-    }
-}
-
-#[derive(Debug, PartialEq, Eq)]
-pub struct Token<'a> {
-    pub word: &'a str,
-    pub word_index: usize,
-    pub char_index: usize,
-}
-
-impl TokenizerBuilder for DefaultBuilder {
-    fn build<'a>(&self, text: &'a str) -> Box<Iterator<Item=Token<'a>> + 'a> {
-        Box::new(Tokenizer::new(text))
-    }
-}
-
-pub struct Tokenizer<'a> {
-    word_index: usize,
-    char_index: usize,
-    inner: &'a str,
-}
-
-impl<'a> Tokenizer<'a> {
-    pub fn new(string: &str) -> Tokenizer {
-        let mut char_advance = 0;
-        let mut index_advance = 0;
-        for (n, (i, c)) in string.char_indices().enumerate() {
-            char_advance = n;
-            index_advance = i;
-            if detect_separator(c).is_none() { break }
-        }
-
-        Tokenizer {
-            word_index: 0,
-            char_index: char_advance,
-            inner: &string[index_advance..],
-        }
-    }
-}
-
-#[derive(Debug, Clone, Copy)]
-enum Separator {
-    Short,
-    Long,
-}
-
-impl Separator {
-    fn add(self, add: Separator) -> Separator {
-        match (self, add) {
-            (_,     Long)  => Long,
-            (Short, Short) => Short,
-            (Long,  Short) => Long,
-        }
-    }
-
-    fn to_usize(self) -> usize {
-        match self {
-            Short => 1,
-            Long => 8,
-        }
-    }
-}
-
-fn detect_separator(c: char) -> Option<Separator> {
-    match c {
-        '.' | ';' | ',' | '!' | '?' | '-' => Some(Long),
-        ' ' | '\'' | '"'                  => Some(Short),
-        _                                 => None,
-    }
-}
-
-impl<'a> Iterator for Tokenizer<'a> {
-    type Item = Token<'a>;
-
-    fn next(&mut self) -> Option<Self::Item> {
-        let mut start_word = None;
-        let mut distance = None;
-
-        for (i, c) in self.inner.char_indices() {
-            match detect_separator(c) {
-                Some(sep) => {
-                    if let Some(start_word) = start_word {
-                        let (prefix, tail) = self.inner.split_at(i);
-                        let (spaces, word) = prefix.split_at(start_word);
-
-                        self.inner = tail;
-                        self.char_index += spaces.chars().count();
-                        self.word_index += distance.map(Separator::to_usize).unwrap_or(0);
-
-                        let token = Token {
-                            word: word,
-                            word_index: self.word_index,
-                            char_index: self.char_index,
-                        };
-
-                        self.char_index += word.chars().count();
-                        return Some(token)
-                    }
-
-                    distance.replace(distance.map_or(sep, |s| s.add(sep)));
-                },
-                None => { start_word.get_or_insert(i); },
-            }
-        }
-
-        if let Some(start_word) = start_word {
-            let prefix = mem::replace(&mut self.inner, "");
-            let (spaces, word) = prefix.split_at(start_word);
-
-            let token = Token {
-                word: word,
-                word_index: self.word_index + distance.map(Separator::to_usize).unwrap_or(0),
-                char_index: self.char_index + spaces.chars().count(),
-            };
-            return Some(token)
-        }
-
-        None
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    #[test]
-    fn easy() {
-        let mut tokenizer = Tokenizer::new("salut");
-
-        assert_eq!(tokenizer.next(), Some(Token { word: "salut", word_index: 0, char_index: 0 }));
-        assert_eq!(tokenizer.next(), None);
-
-        let mut tokenizer = Tokenizer::new("yo    ");
-
-        assert_eq!(tokenizer.next(), Some(Token { word: "yo", word_index: 0, char_index: 0 }));
-        assert_eq!(tokenizer.next(), None);
-    }
-
-    #[test]
-    fn hard() {
-        let mut tokenizer = Tokenizer::new(" .? yo lolo. aïe");
-
-        assert_eq!(tokenizer.next(), Some(Token { word: "yo", word_index: 0, char_index: 4 }));
-        assert_eq!(tokenizer.next(), Some(Token { word: "lolo", word_index: 1, char_index: 7 }));
-        assert_eq!(tokenizer.next(), Some(Token { word: "aïe", word_index: 9, char_index: 13 }));
-        assert_eq!(tokenizer.next(), None);
-
-        let mut tokenizer = Tokenizer::new("yo ! lolo ? wtf - lol . aïe ,");
-
-        assert_eq!(tokenizer.next(), Some(Token { word: "yo", word_index: 0, char_index: 0 }));
-        assert_eq!(tokenizer.next(), Some(Token { word: "lolo", word_index: 8, char_index: 5 }));
-        assert_eq!(tokenizer.next(), Some(Token { word: "wtf", word_index: 16, char_index: 12 }));
-        assert_eq!(tokenizer.next(), Some(Token { word: "lol", word_index: 24, char_index: 18 }));
-        assert_eq!(tokenizer.next(), Some(Token { word: "aïe", word_index: 32, char_index: 24 }));
-        assert_eq!(tokenizer.next(), None);
-    }
-
-    #[test]
-    fn hard_long_chars() {
-        let mut tokenizer = Tokenizer::new(" .? yo 😂. aïe");
-
-        assert_eq!(tokenizer.next(), Some(Token { word: "yo", word_index: 0, char_index: 4 }));
-        assert_eq!(tokenizer.next(), Some(Token { word: "😂", word_index: 1, char_index: 7 }));
-        assert_eq!(tokenizer.next(), Some(Token { word: "aïe", word_index: 9, char_index: 10 }));
-        assert_eq!(tokenizer.next(), None);
-
-        let mut tokenizer = Tokenizer::new("yo ! lolo ? 😱 - lol . 😣 ,");
-
-        assert_eq!(tokenizer.next(), Some(Token { word: "yo", word_index: 0, char_index: 0 }));
-        assert_eq!(tokenizer.next(), Some(Token { word: "lolo", word_index: 8, char_index: 5 }));
-        assert_eq!(tokenizer.next(), Some(Token { word: "😱", word_index: 16, char_index: 12 }));
-        assert_eq!(tokenizer.next(), Some(Token { word: "lol", word_index: 24, char_index: 16 }));
-        assert_eq!(tokenizer.next(), Some(Token { word: "😣", word_index: 32, char_index: 22 }));
-        assert_eq!(tokenizer.next(), None);
-    }
-}
--- a/Show More
+++ b/Show More
				`@ -0,0 +1 @@`
				`_datas in movies.csv are from https://www.themoviedb.org/_`