Merge pull request #124 from Kerollmops/version-bump

Bump version to 0.3.2
chore: Bump version to 0.3.2
2025-07-19 04:50:37 +00:00 · 2019-02-25 14:22:02 +01:00 · 2019-02-25 13:56:23 +01:00 · 2019-02-24 16:56:12 +01:00 · 2019-02-24 16:25:28 +01:00 · 2019-02-24 16:24:47 +01:00
47 changed files with 2445 additions and 1624 deletions
--- a/.travis.yml
+++ b/.travis.yml
@ -11,8 +11,8 @@ matrix:
  include:

  # Test crates on their minimum Rust versions.
-  - rust: 1.31.0
-    name: "meilidb on 1.31.0"
+  - rust: 1.32.0
+    name: "meilidb on 1.32.0"
    script: ./ci/meilidb.sh

  # Test crates on nightly Rust.
--- a/Cargo.toml
+++ b/Cargo.toml
@ -1,23 +1,28 @@
 [package]
 edition = "2018"
 name = "meilidb"
-version = "0.2.0"
+version = "0.3.2"
 authors = ["Kerollmops <renault.cle@gmail.com>"]

 [dependencies]
-bincode = "1.0"
-byteorder = "1.2"
-crossbeam = "0.6"
-fst = "0.3"
-hashbrown = { version = "0.1", features = ["serde"] }
-lazy_static = "1.1"
-levenshtein_automata = { version = "0.1", features = ["fst_automaton"] }
-linked-hash-map = { version = "0.5", features = ["serde_impl"] }
-log = "0.4"
-sdset = "0.3"
-serde = "1.0"
-serde_derive = "1.0"
-unidecode = "0.3"
+arc-swap = "0.3.7"
+bincode = "1.1.2"
+byteorder = "1.3.1"
+fst = "0.3.3"
+hashbrown = { version = "0.1.8", features = ["serde"] }
+lazy_static = "1.2.0"
+levenshtein_automata = { version = "0.1.1", features = ["fst_automaton"] }
+linked-hash-map = { version = "0.5.1", features = ["serde_impl"] }
+lockfree = "0.5.1"
+log = "0.4.6"
+rayon = "1.0.3"
+sdset = "0.3.1"
+serde = "1.0.88"
+serde_derive = "1.0.88"
+serde_json = { version = "1.0.38", features = ["preserve_order"] }
+size_format = "1.0.2"
+slice-group-by = "0.2.4"
+unidecode = "0.3.0"

 [dependencies.toml]
 git = "https://github.com/Kerollmops/toml-rs.git"
@ -28,28 +33,23 @@ rev = "0372ba6"
 git = "https://github.com/pingcap/rust-rocksdb.git"
 rev = "306e201"

-[dependencies.group-by]
-git = "https://github.com/Kerollmops/group-by.git"
-rev = "5a113fe"
-
 [features]
 default = ["simd"]
 i128 = ["bincode/i128", "byteorder/i128"]
 portable = ["rocksdb/portable"]
 simd = ["rocksdb/sse"]
-nightly = ["hashbrown/nightly", "group-by/nightly"]
+nightly = ["hashbrown/nightly", "slice-group-by/nightly"]

 [dev-dependencies]
-csv = "1.0"
-elapsed = "0.1"
-env_logger = "0.6"
-jemallocator = "0.1"
-quickcheck = "0.8"
-rand = "0.6"
-rand_xorshift = "0.1"
-structopt = "0.2"
-tempfile = "3.0"
-termcolor = "1.0"
+csv = "1.0.5"
+env_logger = "0.6.0"
+jemallocator = "0.1.9"
+quickcheck = "0.8.2"
+rand = "0.6.5"
+rand_xorshift = "0.1.1"
+structopt = "0.2.14"
+tempfile = "3.0.7"
+termcolor = "1.0.4"

 [profile.release]
 debug = true
--- a/README.md
+++ b/README.md
@ -10,7 +10,7 @@ A _full-text search database_ using a key-value store internally.

 It uses [RocksDB](https://github.com/facebook/rocksdb) as the internal key-value store. The key-value store allows us to handle updates and queries with small memory and CPU overheads.

-You can [read the deep dive](deep-dive.md) if you want more information on the engine, it describes the whole process of generating updates and handling queries.
+You can [read the deep dive](deep-dive.md) if you want more information on the engine, it describes the whole process of generating updates and handling queries or you can take a look at the [typos and ranking rules](typos-ranking-rules.md) if you want to know the default rules used to sort the documents.

 We will be proud if you submit issues and pull requests. You can help to grow this project and start contributing by checking [issues tagged "good-first-issue"](https://github.com/Kerollmops/MeiliDB/issues?q=is%3Aissue+is%3Aopen+label%3A%22good+first+issue%22). It is a good start!

@ -22,20 +22,20 @@ MeiliDB will be a binary in a near future so you will be able to use it as a dat

 ## Performances

-With a database composed of _100 353_ documents with _352_ attributes each and _90_ of them indexed.
-So nearly _9 million_ fields indexed for _35 million_ stored we can handle more than _1.2k req/sec_ on an Intel i7-7700 (8) @ 4.2GHz.
+With a database composed of _100 353_ documents with _352_ attributes each and _3_ of them indexed.
+So more than _300 000_ fields indexed for _35 million_ stored we can handle more than _2.8k req/sec_ with an average response time of _9 ms_ on an Intel i7-7700 (8) @ 4.2GHz.

-Requests are made using [wrk](https://github.com/wg/wrk) and scripted to generate real users queries.
+Requests are made using [wrk](https://github.com/wg/wrk) and scripted to simulate real users queries.

 ```
 Running 10s test @ http://localhost:2230
-  2 threads and 12 connections
+  2 threads and 25 connections
  Thread Stats   Avg      Stdev     Max   +/- Stdev
-    Latency    18.86ms   49.39ms 614.89ms   95.23%
-    Req/Sec   620.41     59.53   790.00     65.00%
-  12359 requests in 10.00s, 3.26MB read
-Requests/sec:   1235.54
-Transfer/sec:    334.22KB
+    Latency     9.52ms    7.61ms  99.25ms   84.58%
+    Req/Sec     1.41k   119.11     1.78k    64.50%
+  28080 requests in 10.01s, 7.42MB read
+Requests/sec:   2806.46
+Transfer/sec:    759.17KB
 ```

 ### Notes
@ -49,7 +49,7 @@ MeiliDB runs with an index like most search engines.
 So to test the library you can create one by indexing a simple csv file.

 ```bash
-cargo run --release --example create-database -- test.mdb misc/kaggle.csv --schema schema-example.toml --stop-words misc/fr.stopwords.txt
+cargo run --release --example create-database -- test.mdb misc/kaggle.csv --schema schema-example.toml
 ```

 Once the command is executed, the index should be in the `test.mdb` folder. You are now able to run the `query-database` example and play with MeiliDB.
--- a/examples/create-database.rs
+++ b/examples/create-database.rs
@ -1,17 +1,18 @@
 #[global_allocator]
 static ALLOC: jemallocator::Jemalloc = jemallocator::Jemalloc;

+use std::collections::{HashMap, HashSet};
 use std::io::{self, BufRead, BufReader};
 use std::path::{Path, PathBuf};
+use std::time::Instant;
 use std::error::Error;
 use std::borrow::Cow;
 use std::fs::File;

-use hashbrown::{HashMap, HashSet};
 use serde_derive::{Serialize, Deserialize};
 use structopt::StructOpt;

-use meilidb::database::{Database, Schema, UpdateBuilder};
+use meilidb::database::{Database, Schema};
 use meilidb::tokenizer::DefaultBuilder;

 #[derive(Debug, StructOpt)]
@ -50,7 +51,9 @@ fn index(
    stop_words: &HashSet<String>,
 ) -> Result<Database, Box<Error>>
 {
-    let database = Database::create(database_path, &schema)?;
+    let database = Database::create(database_path)?;
+
+    database.create_index("default", &schema)?;

    let mut rdr = csv::Reader::from_path(csv_data_path)?;
    let mut raw_record = csv::StringRecord::new();
@ -61,8 +64,7 @@ fn index(

    while !end_of_file {
        let tokenizer_builder = DefaultBuilder::new();
-        let update_path = tempfile::NamedTempFile::new()?;
-        let mut update = UpdateBuilder::new(update_path.path().to_path_buf(), schema.clone());
+        let mut update = database.start_update("default")?;

        loop {
            end_of_file = !rdr.read_record(&mut raw_record)?;
@ -88,10 +90,8 @@ fn index(

        println!();

-        println!("building update...");
-        let update = update.build()?;
-        println!("ingesting update...");
-        database.ingest_update_file(update)?;
+        println!("committing update...");
+        database.commit_update(update)?;
    }

    Ok(database)
@ -125,14 +125,13 @@ fn main() -> Result<(), Box<Error>> {
        None           => HashSet::new(),
    };

-    let (elapsed, result) = elapsed::measure_time(|| {
-        index(schema, &opt.database_path, &opt.csv_data_path, opt.update_group_size, &stop_words)
-    });
+    let start = Instant::now();
+    let result = index(schema, &opt.database_path, &opt.csv_data_path, opt.update_group_size, &stop_words);

    if let Err(e) = result {
        return Err(e.into())
    }

-    println!("database created in {} at: {:?}", elapsed, opt.database_path);
+    println!("database created in {:.2?} at: {:?}", start.elapsed(), opt.database_path);
    Ok(())
 }
--- a/examples/query-database.rs
+++ b/examples/query-database.rs
@ -4,6 +4,7 @@ static ALLOC: jemallocator::Jemalloc = jemallocator::Jemalloc;
 use std::collections::btree_map::{BTreeMap, Entry};
 use std::iter::FromIterator;
 use std::io::{self, Write};
+use std::time::Instant;
 use std::path::PathBuf;
 use std::error::Error;

@ -27,6 +28,10 @@ pub struct Opt {
    /// The number of returned results
    #[structopt(short = "n", long = "number-results", default_value = "10")]
    pub number_results: usize,
+
+    /// The number of characters before and after the first match
+    #[structopt(short = "C", long = "context", default_value = "35")]
+    pub char_context: usize,
 }

 type Document = HashMap<String, String>;
@ -66,26 +71,21 @@ fn char_to_byte_range(index: usize, length: usize, text: &str) -> (usize, usize)
    (byte_index, byte_length)
 }

-fn create_highlight_areas(text: &str, matches: &[Match], attribute: SchemaAttr) -> Vec<usize> {
+fn create_highlight_areas(text: &str, matches: &[Match]) -> Vec<usize> {
    let mut byte_indexes = BTreeMap::new();

    for match_ in matches {
-        let match_attribute = match_.attribute.attribute();
-        if SchemaAttr::new(match_attribute) == attribute {
-            let word_area = match_.word_area;
+        let char_index = match_.char_index as usize;
+        let char_length = match_.char_length as usize;
+        let (byte_index, byte_length) = char_to_byte_range(char_index, char_length, text);

-            let char_index = word_area.char_index() as usize;
-            let char_length = word_area.length() as usize;
-            let (byte_index, byte_length) = char_to_byte_range(char_index, char_length, text);
-
-            match byte_indexes.entry(byte_index) {
-                Entry::Vacant(entry) => { entry.insert(byte_length); },
-                Entry::Occupied(mut entry) => {
-                    if *entry.get() < byte_length {
-                        entry.insert(byte_length);
-                    }
-                },
-            }
+        match byte_indexes.entry(byte_index) {
+            Entry::Vacant(entry) => { entry.insert(byte_length); },
+            Entry::Occupied(mut entry) => {
+                if *entry.get() < byte_length {
+                    entry.insert(byte_length);
+                }
+            },
        }
    }

@ -100,13 +100,46 @@ fn create_highlight_areas(text: &str, matches: &[Match], attribute: SchemaAttr)
    title_areas
 }

+/// note: matches must have been sorted by `char_index` and `char_length` before being passed.
+///
+/// ```no_run
+/// matches.sort_unstable_by_key(|m| (m.char_index, m.char_length));
+///
+/// let matches = matches.matches.iter().filter(|m| SchemaAttr::new(m.attribute) == attr).cloned();
+///
+/// let (text, matches) = crop_text(&text, matches, 35);
+/// ```
+fn crop_text(
+    text: &str,
+    matches: impl IntoIterator<Item=Match>,
+    context: usize,
+) -> (String, Vec<Match>)
+{
+    let mut matches = matches.into_iter().peekable();
+
+    let char_index = matches.peek().map(|m| m.char_index as usize).unwrap_or(0);
+    let start = char_index.saturating_sub(context);
+    let text = text.chars().skip(start).take(context * 2).collect();
+
+    let matches = matches
+        .take_while(|m| {
+            (m.char_index as usize) + (m.char_length as usize) <= start + (context * 2)
+        })
+        .map(|match_| {
+            Match { char_index: match_.char_index - start as u32, ..match_ }
+        })
+        .collect();
+
+    (text, matches)
+}
+
 fn main() -> Result<(), Box<Error>> {
    let _ = env_logger::init();
    let opt = Opt::from_args();

-    let (elapsed, result) = elapsed::measure_time(|| Database::open(&opt.database_path));
-    let database = result?;
-    println!("database prepared for you in {}", elapsed);
+    let start = Instant::now();
+    let database = Database::open(&opt.database_path)?;
+    println!("database prepared for you in {:.2?}", start.elapsed());

    let mut buffer = String::new();
    let input = io::stdin();
@ -118,16 +151,19 @@ fn main() -> Result<(), Box<Error>> {
        if input.read_line(&mut buffer)? == 0 { break }
        let query = buffer.trim_end_matches('\n');

-        let view = database.view();
+        let view = database.view("default")?;
        let schema = view.schema();

-        let (elapsed, documents) = elapsed::measure_time(|| {
-            let builder = view.query_builder().unwrap();
-            builder.query(query, 0..opt.number_results)
-        });
+        let start = Instant::now();
+
+        let builder = view.query_builder();
+        let documents = builder.query(query, 0..opt.number_results);

        let number_of_documents = documents.len();
-        for doc in documents {
+        for mut doc in documents {
+
+            doc.matches.sort_unstable_by_key(|m| (m.char_index, m.char_index));
+
            match view.document_by_id::<Document>(doc.id) {
                Ok(document) => {
                    for name in &opt.displayed_fields {
@ -141,7 +177,11 @@ fn main() -> Result<(), Box<Error>> {
                        };

                        print!("{}: ", name);
-                        let areas = create_highlight_areas(&text, &doc.matches, attr);
+                        let matches = doc.matches.iter()
+                                        .filter(|m| SchemaAttr::new(m.attribute) == attr)
+                                        .cloned();
+                        let (text, matches) = crop_text(&text, matches, opt.char_context);
+                        let areas = create_highlight_areas(&text, &matches);
                        display_highlights(&text, &areas)?;
                        println!();
                    }
@ -151,7 +191,7 @@ fn main() -> Result<(), Box<Error>> {

            let mut matching_attributes = HashSet::new();
            for _match in doc.matches {
-                let attr = SchemaAttr::new(_match.attribute.attribute());
+                let attr = SchemaAttr::new(_match.attribute);
                let name = schema.attribute_name(attr);
                matching_attributes.insert(name);
            }
@ -162,7 +202,7 @@ fn main() -> Result<(), Box<Error>> {
            println!();
        }

-        eprintln!("===== Found {} results in {} =====", number_of_documents, elapsed);
+        eprintln!("===== Found {} results in {:.2?} =====", number_of_documents, start.elapsed());
        buffer.clear();
    }

--- a/src/attribute.rs
+++ b/src/attribute.rs
@ -1,105 +0,0 @@
-use std::fmt;
-
-/// Represent an attribute number along with the word index
-/// according to the tokenizer used.
-///
-/// It can accept up to 1024 attributes and word positions
-/// can be maximum 2^22.
-#[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
-pub struct Attribute(u32);
-
-impl Attribute {
-    /// Construct an `Attribute` from an attribute number and
-    /// the word position of a match according to the tokenizer used.
-    pub(crate) fn new(attribute: u16, index: u32) -> Result<Attribute, AttributeError> {
-        if attribute & 0b1111_1100_0000_0000 != 0 {
-            return Err(AttributeError::AttributeTooBig)
-        }
-
-        if index & 0b1111_1111_1100_0000_0000_0000_0000 != 0 {
-            return Err(AttributeError::IndexTooBig)
-        }
-
-        let attribute = u32::from(attribute) << 22;
-        Ok(Attribute(attribute | index))
-    }
-
-    /// Construct an `Attribute` from an attribute number and
-    /// the word position of a match according to the tokenizer used.
-    ///
-    /// # Panics
-    ///
-    /// The attribute must not be greater than 1024
-    /// and the word index not greater than 2^22.
-    pub(crate) fn new_faillible(attribute: u16, index: u32) -> Attribute {
-        match Attribute::new(attribute, index) {
-            Ok(attribute) => attribute,
-            Err(AttributeError::AttributeTooBig) => {
-                panic!("attribute must not be greater than 1024")
-            },
-            Err(AttributeError::IndexTooBig) => {
-                panic!("attribute word index must not be greater than 2^22")
-            },
-        }
-    }
-
-    pub(crate) fn max_value() -> Attribute {
-        Attribute(u32::max_value())
-    }
-
-    #[inline]
-    pub fn attribute(self) -> u16 {
-        (self.0 >> 22) as u16
-    }
-
-    #[inline]
-    pub fn word_index(self) -> u32 {
-        self.0 & 0b0000_0000_0011_1111_1111_1111_1111
-    }
-}
-
-impl fmt::Debug for Attribute {
-    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
-        f.debug_struct("Attribute")
-            .field("attribute", &self.attribute())
-            .field("word_index", &self.word_index())
-            .finish()
-    }
-}
-
-pub enum AttributeError {
-    AttributeTooBig,
-    IndexTooBig,
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-    use quickcheck::{quickcheck, TestResult};
-
-    quickcheck! {
-        fn qc_attribute(gen_attr: u16, gen_index: u32) -> TestResult {
-            if gen_attr > 2_u16.pow(10) || gen_index > 2_u32.pow(22) {
-                return TestResult::discard()
-            }
-
-            let attribute = Attribute::new_faillible(gen_attr, gen_index);
-
-            let valid_attribute = attribute.attribute() == gen_attr;
-            let valid_index = attribute.word_index() == gen_index;
-
-            TestResult::from_bool(valid_attribute && valid_index)
-        }
-
-        fn qc_attribute_ord(gen_attr: u16, gen_index: u32) -> TestResult {
-            if gen_attr >= 2_u16.pow(10) || gen_index >= 2_u32.pow(22) {
-                return TestResult::discard()
-            }
-
-            let a = Attribute::new_faillible(gen_attr, gen_index);
-            let b = Attribute::new_faillible(gen_attr + 1, gen_index + 1);
-
-            TestResult::from_bool(a < b)
-        }
-    }
-}
--- a/src/data/doc_ids.rs
+++ b/src/data/doc_ids.rs
@ -1,12 +1,15 @@
-use std::io::{self, Cursor, BufRead};
 use std::slice::from_raw_parts;
 use std::mem::size_of;
+use std::error::Error;

 use byteorder::{LittleEndian, ReadBytesExt, WriteBytesExt};
 use sdset::Set;

-use crate::DocumentId;
+use crate::shared_data_cursor::{SharedDataCursor, FromSharedDataCursor};
+use crate::write_to_bytes::WriteToBytes;
 use crate::data::SharedData;
+use crate::DocumentId;
+
 use super::into_u8_slice;

 #[derive(Default, Clone)]
@ -19,21 +22,6 @@ impl DocIds {
        DocIds(data)
    }

-    pub fn from_cursor(cursor: &mut Cursor<SharedData>) -> io::Result<DocIds> {
-        let len = cursor.read_u64::<LittleEndian>()? as usize;
-        let offset = cursor.position() as usize;
-        let doc_ids = cursor.get_ref().range(offset, len);
-        cursor.consume(len);
-
-        Ok(DocIds(doc_ids))
-    }
-
-    pub fn write_to_bytes(&self, bytes: &mut Vec<u8>) {
-        let len = self.0.len() as u64;
-        bytes.write_u64::<LittleEndian>(len).unwrap();
-        bytes.extend_from_slice(&self.0);
-    }
-
    pub fn is_empty(&self) -> bool {
        self.0.is_empty()
    }
@ -52,3 +40,22 @@ impl AsRef<Set<DocumentId>> for DocIds {
        Set::new_unchecked(slice)
    }
 }
+
+impl FromSharedDataCursor for DocIds {
+    type Error = Box<Error>;
+
+    fn from_shared_data_cursor(cursor: &mut SharedDataCursor) -> Result<DocIds, Self::Error> {
+        let len = cursor.read_u64::<LittleEndian>()? as usize;
+        let data = cursor.extract(len);
+
+        Ok(DocIds(data))
+    }
+}
+
+impl WriteToBytes for DocIds {
+    fn write_to_bytes(&self, bytes: &mut Vec<u8>) {
+        let len = self.0.len() as u64;
+        bytes.write_u64::<LittleEndian>(len).unwrap();
+        bytes.extend_from_slice(&self.0);
+    }
+}
--- a/src/data/doc_indexes.rs
+++ b/src/data/doc_indexes.rs
@ -1,14 +1,16 @@
-use std::io::{self, Write, Cursor, BufRead};
+use std::io::{self, Write};
 use std::slice::from_raw_parts;
 use std::mem::size_of;
 use std::ops::Index;
-use std::sync::Arc;

 use byteorder::{LittleEndian, ReadBytesExt, WriteBytesExt};
 use sdset::Set;

-use crate::DocIndex;
+use crate::shared_data_cursor::{SharedDataCursor, FromSharedDataCursor};
+use crate::write_to_bytes::WriteToBytes;
 use crate::data::SharedData;
+use crate::DocIndex;
+
 use super::into_u8_slice;

 #[derive(Debug)]
@ -25,38 +27,6 @@ pub struct DocIndexes {
 }

 impl DocIndexes {
-    pub fn from_bytes(bytes: Vec<u8>) -> io::Result<DocIndexes> {
-        let bytes = Arc::new(bytes);
-        let len = bytes.len();
-        let data = SharedData::new(bytes, 0, len);
-        let mut  cursor = Cursor::new(data);
-        DocIndexes::from_cursor(&mut cursor)
-    }
-
-    pub fn from_cursor(cursor: &mut Cursor<SharedData>) -> io::Result<DocIndexes> {
-        let len = cursor.read_u64::<LittleEndian>()? as usize;
-        let offset = cursor.position() as usize;
-        let ranges = cursor.get_ref().range(offset, len);
-        cursor.consume(len);
-
-        let len = cursor.read_u64::<LittleEndian>()? as usize;
-        let offset = cursor.position() as usize;
-        let indexes = cursor.get_ref().range(offset, len);
-        cursor.consume(len);
-
-        Ok(DocIndexes { ranges, indexes })
-    }
-
-    pub fn write_to_bytes(&self, bytes: &mut Vec<u8>) {
-        let ranges_len = self.ranges.len() as u64;
-        let _ = bytes.write_u64::<LittleEndian>(ranges_len);
-        bytes.extend_from_slice(&self.ranges);
-
-        let indexes_len = self.indexes.len() as u64;
-        let _ = bytes.write_u64::<LittleEndian>(indexes_len);
-        bytes.extend_from_slice(&self.indexes);
-    }
-
    pub fn get(&self, index: usize) -> Option<&Set<DocIndex>> {
        self.ranges().get(index).map(|Range { start, end }| {
            let start = *start as usize;
@ -92,6 +62,32 @@ impl Index<usize> for DocIndexes {
    }
 }

+impl FromSharedDataCursor for DocIndexes {
+    type Error = io::Error;
+
+    fn from_shared_data_cursor(cursor: &mut SharedDataCursor) -> Result<DocIndexes, Self::Error> {
+        let len = cursor.read_u64::<LittleEndian>()? as usize;
+        let ranges = cursor.extract(len);
+
+        let len = cursor.read_u64::<LittleEndian>()? as usize;
+        let indexes = cursor.extract(len);
+
+        Ok(DocIndexes { ranges, indexes })
+    }
+}
+
+impl WriteToBytes for DocIndexes {
+    fn write_to_bytes(&self, bytes: &mut Vec<u8>) {
+        let ranges_len = self.ranges.len() as u64;
+        let _ = bytes.write_u64::<LittleEndian>(ranges_len);
+        bytes.extend_from_slice(&self.ranges);
+
+        let indexes_len = self.indexes.len() as u64;
+        let _ = bytes.write_u64::<LittleEndian>(indexes_len);
+        bytes.extend_from_slice(&self.indexes);
+    }
+}
+
 pub struct DocIndexesBuilder<W> {
    ranges: Vec<Range>,
    indexes: Vec<DocIndex>,
@ -147,29 +143,32 @@ impl<W: Write> DocIndexesBuilder<W> {

 #[cfg(test)]
 mod tests {
-    use super::*;
-
    use std::error::Error;
-    use crate::{Attribute, WordArea};
-
    use crate::DocumentId;
+    use super::*;

    #[test]
    fn builder_serialize_deserialize() -> Result<(), Box<Error>> {
        let a = DocIndex {
            document_id: DocumentId(0),
-            attribute: Attribute::new_faillible(3, 11),
-            word_area: WordArea::new_faillible(30, 4)
+            attribute: 3,
+            word_index: 11,
+            char_index: 30,
+            char_length: 4,
        };
        let b = DocIndex {
            document_id: DocumentId(1),
-            attribute: Attribute::new_faillible(4, 21),
-            word_area: WordArea::new_faillible(35, 6)
+            attribute: 4,
+            word_index: 21,
+            char_index: 35,
+            char_length: 6,
        };
        let c = DocIndex {
            document_id: DocumentId(2),
-            attribute: Attribute::new_faillible(8, 2),
-            word_area: WordArea::new_faillible(89, 6)
+            attribute: 8,
+            word_index: 2,
+            char_index: 89,
+            char_length: 6,
        };

        let mut builder = DocIndexesBuilder::memory();
@ -193,18 +192,24 @@ mod tests {
    fn serialize_deserialize() -> Result<(), Box<Error>> {
        let a = DocIndex {
            document_id: DocumentId(0),
-            attribute: Attribute::new_faillible(3, 11),
-            word_area: WordArea::new_faillible(30, 4)
+            attribute: 3,
+            word_index: 11,
+            char_index: 30,
+            char_length: 4,
        };
        let b = DocIndex {
            document_id: DocumentId(1),
-            attribute: Attribute::new_faillible(4, 21),
-            word_area: WordArea::new_faillible(35, 6)
+            attribute: 4,
+            word_index: 21,
+            char_index: 35,
+            char_length: 6,
        };
        let c = DocIndex {
            document_id: DocumentId(2),
-            attribute: Attribute::new_faillible(8, 2),
-            word_area: WordArea::new_faillible(89, 6)
+            attribute: 8,
+            word_index: 2,
+            char_index: 89,
+            char_length: 6,
        };

        let mut builder = DocIndexesBuilder::memory();
--- a/src/data/mod.rs
+++ b/src/data/mod.rs
@ -1,55 +1,13 @@
 mod doc_ids;
 mod doc_indexes;
+mod shared_data;

 use std::slice::from_raw_parts;
 use std::mem::size_of;
-use std::ops::Deref;
-use std::sync::Arc;

 pub use self::doc_ids::DocIds;
 pub use self::doc_indexes::{DocIndexes, DocIndexesBuilder};
-
-#[derive(Default, Clone)]
-pub struct SharedData {
-    pub bytes: Arc<Vec<u8>>,
-    pub offset: usize,
-    pub len: usize,
-}
-
-impl SharedData {
-    pub fn from_bytes(vec: Vec<u8>) -> SharedData {
-        let len = vec.len();
-        let bytes = Arc::new(vec);
-        SharedData::new(bytes, 0, len)
-    }
-
-    pub fn new(bytes: Arc<Vec<u8>>, offset: usize, len: usize) -> SharedData {
-        SharedData { bytes, offset, len }
-    }
-
-    pub fn range(&self, offset: usize, len: usize) -> SharedData {
-        assert!(offset + len <= self.len);
-        SharedData {
-            bytes: self.bytes.clone(),
-            offset: self.offset + offset,
-            len: len,
-        }
-    }
-}
-
-impl Deref for SharedData {
-    type Target = [u8];
-
-    fn deref(&self) -> &Self::Target {
-        self.as_ref()
-    }
-}
-
-impl AsRef<[u8]> for SharedData {
-    fn as_ref(&self) -> &[u8] {
-        &self.bytes[self.offset..self.offset + self.len]
-    }
-}
+pub use self::shared_data::SharedData;

 unsafe fn into_u8_slice<T: Sized>(slice: &[T]) -> &[u8] {
    let ptr = slice.as_ptr() as *const u8;
--- a/src/data/shared_data.rs
+++ b/src/data/shared_data.rs
@ -0,0 +1,48 @@
+use std::sync::Arc;
+use std::ops::Deref;
+
+#[derive(Default, Clone)]
+pub struct SharedData {
+    pub bytes: Arc<Vec<u8>>,
+    pub offset: usize,
+    pub len: usize,
+}
+
+impl SharedData {
+    pub fn from_bytes(vec: Vec<u8>) -> SharedData {
+        let len = vec.len();
+        let bytes = Arc::from(vec);
+        SharedData::new(bytes, 0, len)
+    }
+
+    pub fn new(bytes: Arc<Vec<u8>>, offset: usize, len: usize) -> SharedData {
+        SharedData { bytes, offset, len }
+    }
+
+    pub fn as_slice(&self) -> &[u8] {
+        &self.bytes[self.offset..self.offset + self.len]
+    }
+
+    pub fn range(&self, offset: usize, len: usize) -> SharedData {
+        assert!(offset + len <= self.len);
+        SharedData {
+            bytes: self.bytes.clone(),
+            offset: self.offset + offset,
+            len: len,
+        }
+    }
+}
+
+impl Deref for SharedData {
+    type Target = [u8];
+
+    fn deref(&self) -> &Self::Target {
+        self.as_slice()
+    }
+}
+
+impl AsRef<[u8]> for SharedData {
+    fn as_ref(&self) -> &[u8] {
+        self.as_slice()
+    }
+}
--- a/src/database/config.rs
+++ b/src/database/config.rs
@ -0,0 +1,46 @@
+use std::collections::{HashSet, HashMap};
+use serde_derive::{Serialize, Deserialize};
+
+#[derive(Debug, Copy, Clone, PartialEq, Eq, Serialize, Deserialize)]
+#[serde(rename_all = "lowercase")]
+pub enum RankingOrdering {
+    Asc,
+    Dsc
+}
+
+#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
+pub struct AccessToken {
+    pub read_key: String,
+    pub write_key: String,
+    pub admin_key: String,
+}
+
+
+#[derive(Default, Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
+pub struct Config {
+    pub stop_words: Option<HashSet<String>>,
+    pub ranking_order: Option<Vec<String>>,
+    pub distinct_field: Option<String>,
+    pub ranking_rules: Option<HashMap<String, RankingOrdering>>,
+    pub access_token: Option<AccessToken>,
+}
+
+impl Config {
+    pub fn update_with(&mut self, new: Config) {
+        if let Some(stop_words) = new.stop_words {
+            self.stop_words = Some(stop_words);
+        };
+        if let Some(ranking_order) = new.ranking_order {
+            self.ranking_order = Some(ranking_order);
+        };
+        if let Some(distinct_field) = new.distinct_field {
+            self.distinct_field = Some(distinct_field);
+        };
+        if let Some(ranking_rules) = new.ranking_rules {
+            self.ranking_rules = Some(ranking_rules);
+        };
+        if let Some(access_token) = new.access_token {
+            self.access_token = Some(access_token);
+        };
+    }
+}
--- a/src/database/document_key.rs
+++ b/src/database/document_key.rs
@ -38,6 +38,10 @@ impl DocumentKey {
        DocumentKeyAttr::new(self.document_id(), attr)
    }

+    pub fn with_attribute_min(&self) -> DocumentKeyAttr {
+        DocumentKeyAttr::new(self.document_id(), SchemaAttr::min())
+    }
+
    pub fn with_attribute_max(&self) -> DocumentKeyAttr {
        DocumentKeyAttr::new(self.document_id(), SchemaAttr::max())
    }
--- a/src/database/index/positive.rs
+++ b/src/database/index/positive.rs
@ -1,60 +1,45 @@
-use std::io::{Write, BufRead, Cursor};
 use std::error::Error;

 use byteorder::{LittleEndian, ReadBytesExt, WriteBytesExt};
-use fst::{map, Map, Streamer, IntoStreamer};
-use sdset::{Set, SetOperation};
-use sdset::duo::Union;
+use fst::{map, Map, IntoStreamer, Streamer};
 use fst::raw::Fst;
+use sdset::duo::{Union, DifferenceByKey};
+use sdset::{Set, SetOperation};

+use crate::shared_data_cursor::{SharedDataCursor, FromSharedDataCursor};
+use crate::write_to_bytes::WriteToBytes;
 use crate::data::{DocIndexes, DocIndexesBuilder};
-use crate::data::SharedData;
-use crate::DocIndex;
+use crate::{DocumentId, DocIndex};

 #[derive(Default)]
-pub struct Positive {
-    map: Map,
-    indexes: DocIndexes,
+pub struct Index {
+    pub map: Map,
+    pub indexes: DocIndexes,
 }

-impl Positive {
-    pub fn new(map: Map, indexes: DocIndexes) -> Positive {
-        Positive { map, indexes }
+impl Index {
+    pub fn remove_documents(&self, documents: &Set<DocumentId>) -> Index {
+        let mut buffer = Vec::new();
+        let mut builder = IndexBuilder::new();
+        let mut stream = self.into_stream();
+
+        while let Some((key, indexes)) = stream.next() {
+            buffer.clear();
+
+            let op = DifferenceByKey::new(indexes, documents, |x| x.document_id, |x| *x);
+            op.extend_vec(&mut buffer);
+
+            if !buffer.is_empty() {
+                let indexes = Set::new_unchecked(&buffer);
+                builder.insert(key, indexes).unwrap();
+            }
+        }
+
+        builder.build()
    }

-    pub fn from_cursor(cursor: &mut Cursor<SharedData>) -> Result<Positive, Box<Error>> {
-        let len = cursor.read_u64::<LittleEndian>()? as usize;
-        let offset = cursor.position() as usize;
-        let data = cursor.get_ref().range(offset, len);
-
-        let fst = Fst::from_shared_bytes(data.bytes, data.offset, data.len)?;
-        let map = Map::from(fst);
-        cursor.consume(len);
-
-        let indexes = DocIndexes::from_cursor(cursor)?;
-
-        Ok(Positive { map, indexes})
-    }
-
-    pub fn write_to_bytes(&self, bytes: &mut Vec<u8>) {
-        let slice = self.map.as_fst().as_bytes();
-        let len = slice.len() as u64;
-        let _ = bytes.write_u64::<LittleEndian>(len);
-        bytes.extend_from_slice(slice);
-
-        self.indexes.write_to_bytes(bytes);
-    }
-
-    pub fn map(&self) -> &Map {
-        &self.map
-    }
-
-    pub fn indexes(&self) -> &DocIndexes {
-        &self.indexes
-    }
-
-    pub fn union(&self, other: &Positive) -> Result<Positive, Box<Error>> {
-        let mut builder = PositiveBuilder::memory();
+    pub fn union(&self, other: &Index) -> Index {
+        let mut builder = IndexBuilder::new();
        let mut stream = map::OpBuilder::new().add(&self.map).add(&other.map).union();

        let mut buffer = Vec::new();
@ -63,19 +48,19 @@ impl Positive {
            match ivalues {
                [a, b] => {
                    let indexes = if a.index == 0 { &self.indexes } else { &other.indexes };
-                    let indexes = indexes.get(a.value as usize).ok_or(format!("index not found"))?;
+                    let indexes = &indexes[a.value as usize];
                    let a = Set::new_unchecked(indexes);

                    let indexes = if b.index == 0 { &self.indexes } else { &other.indexes };
-                    let indexes = indexes.get(b.value as usize).ok_or(format!("index not found"))?;
+                    let indexes = &indexes[b.value as usize];
                    let b = Set::new_unchecked(indexes);

                    let op = Union::new(a, b);
                    op.extend_vec(&mut buffer);
                },
-                [a] => {
-                    let indexes = if a.index == 0 { &self.indexes } else { &other.indexes };
-                    let indexes = indexes.get(a.value as usize).ok_or(format!("index not found"))?;
+                [x] => {
+                    let indexes = if x.index == 0 { &self.indexes } else { &other.indexes };
+                    let indexes = &indexes[x.value as usize];
                    buffer.extend_from_slice(indexes)
                },
                _ => continue,
@ -83,23 +68,45 @@ impl Positive {

            if !buffer.is_empty() {
                let indexes = Set::new_unchecked(&buffer);
-                builder.insert(key, indexes)?;
+                builder.insert(key, indexes).unwrap();
            }
        }

-        let (map, indexes) = builder.into_inner()?;
-        let map = Map::from_bytes(map)?;
-        let indexes = DocIndexes::from_bytes(indexes)?;
-        Ok(Positive { map, indexes })
+        builder.build()
    }
 }

-impl<'m, 'a> IntoStreamer<'a> for &'m Positive {
+impl FromSharedDataCursor for Index {
+    type Error = Box<Error>;
+
+    fn from_shared_data_cursor(cursor: &mut SharedDataCursor) -> Result<Index, Self::Error> {
+        let len = cursor.read_u64::<LittleEndian>()? as usize;
+        let data = cursor.extract(len);
+
+        let fst = Fst::from_shared_bytes(data.bytes, data.offset, data.len)?;
+        let map = Map::from(fst);
+
+        let indexes = DocIndexes::from_shared_data_cursor(cursor)?;
+
+        Ok(Index { map, indexes})
+    }
+}
+
+impl WriteToBytes for Index {
+    fn write_to_bytes(&self, bytes: &mut Vec<u8>) {
+        let slice = self.map.as_fst().as_bytes();
+        let len = slice.len() as u64;
+        let _ = bytes.write_u64::<LittleEndian>(len);
+        bytes.extend_from_slice(slice);
+
+        self.indexes.write_to_bytes(bytes);
+    }
+}
+
+impl<'m, 'a> IntoStreamer<'a> for &'m Index {
    type Item = (&'a [u8], &'a Set<DocIndex>);
-    /// The type of the stream to be constructed.
    type Into = Stream<'m>;

-    /// Construct a stream from `Self`.
    fn into_stream(self) -> Self::Into {
        Stream {
            map_stream: self.map.into_stream(),
@ -128,28 +135,26 @@ impl<'m, 'a> Streamer<'a> for Stream<'m> {
    }
 }

-pub struct PositiveBuilder<W, X> {
-    map: fst::MapBuilder<W>,
-    indexes: DocIndexesBuilder<X>,
+pub struct IndexBuilder {
+    map: fst::MapBuilder<Vec<u8>>,
+    indexes: DocIndexesBuilder<Vec<u8>>,
    value: u64,
 }

-impl PositiveBuilder<Vec<u8>, Vec<u8>> {
-    pub fn memory() -> Self {
-        PositiveBuilder {
+impl IndexBuilder {
+    pub fn new() -> Self {
+        IndexBuilder {
            map: fst::MapBuilder::memory(),
            indexes: DocIndexesBuilder::memory(),
            value: 0,
        }
    }
-}

-impl<W: Write, X: Write> PositiveBuilder<W, X> {
    /// If a key is inserted that is less than or equal to any previous key added,
    /// then an error is returned. Similarly, if there was a problem writing
    /// to the underlying writer, an error is returned.
    // FIXME what if one write doesn't work but the other do ?
-    pub fn insert<K>(&mut self, key: K, indexes: &Set<DocIndex>) -> Result<(), Box<Error>>
+    pub fn insert<K>(&mut self, key: K, indexes: &Set<DocIndex>) -> fst::Result<()>
    where K: AsRef<[u8]>,
    {
        self.map.insert(key, self.value)?;
@ -158,9 +163,13 @@ impl<W: Write, X: Write> PositiveBuilder<W, X> {
        Ok(())
    }

-    pub fn into_inner(self) -> Result<(W, X), Box<Error>> {
-        let map = self.map.into_inner()?;
-        let indexes = self.indexes.into_inner()?;
-        Ok((map, indexes))
+    pub fn build(self) -> Index {
+        let map = self.map.into_inner().unwrap();
+        let indexes = self.indexes.into_inner().unwrap();
+
+        let map = Map::from_bytes(map).unwrap();
+        let indexes = DocIndexes::from_bytes(indexes).unwrap();
+
+        Index { map, indexes }
    }
 }
--- a/src/database/index/mod.rs
+++ b/src/database/index/mod.rs
@ -1,82 +0,0 @@
-mod negative;
-mod positive;
-
-pub(crate) use self::negative::Negative;
-pub(crate) use self::positive::{Positive, PositiveBuilder};
-
-use std::error::Error;
-use std::io::Cursor;
-use std::sync::Arc;
-
-use fst::{IntoStreamer, Streamer};
-use sdset::duo::DifferenceByKey;
-use sdset::{Set, SetOperation};
-use fst::Map;
-
-use crate::data::{SharedData, DocIndexes};
-
-#[derive(Default)]
-pub struct Index {
-    pub(crate) negative: Negative,
-    pub(crate) positive: Positive,
-}
-
-impl Index {
-    pub fn from_bytes(bytes: Vec<u8>) -> Result<Index, Box<Error>> {
-        let len = bytes.len();
-        Index::from_shared_bytes(Arc::new(bytes), 0, len)
-    }
-
-    pub fn from_shared_bytes(
-        bytes: Arc<Vec<u8>>,
-        offset: usize,
-        len: usize,
-    ) -> Result<Index, Box<Error>>
-    {
-        let data = SharedData::new(bytes, offset, len);
-        let mut cursor = Cursor::new(data);
-
-        let negative = Negative::from_cursor(&mut cursor)?;
-        let positive = Positive::from_cursor(&mut cursor)?;
-        Ok(Index { negative, positive })
-    }
-
-    pub fn write_to_bytes(&self, bytes: &mut Vec<u8>) {
-        self.negative.write_to_bytes(bytes);
-        self.positive.write_to_bytes(bytes);
-    }
-
-    pub fn merge(&self, other: &Index) -> Result<Index, Box<Error>> {
-        if other.negative.is_empty() {
-            let negative = Negative::default();
-            let positive = self.positive.union(&other.positive)?;
-            return Ok(Index { negative, positive })
-        }
-
-        let mut buffer = Vec::new();
-        let mut builder = PositiveBuilder::memory();
-        let mut stream = self.positive.into_stream();
-        while let Some((key, indexes)) = stream.next() {
-            let op = DifferenceByKey::new(indexes, &other.negative, |x| x.document_id, |x| *x);
-
-            buffer.clear();
-            op.extend_vec(&mut buffer);
-
-            if !buffer.is_empty() {
-                let indexes = Set::new_unchecked(&buffer);
-                builder.insert(key, indexes)?;
-            }
-        }
-
-        let positive = {
-            let (map, indexes) = builder.into_inner()?;
-            let map = Map::from_bytes(map)?;
-            let indexes = DocIndexes::from_bytes(indexes)?;
-            Positive::new(map, indexes)
-        };
-
-        let negative = Negative::default();
-        let positive = positive.union(&other.positive)?;
-        Ok(Index { negative, positive })
-    }
-}
--- a/src/database/index/negative.rs
+++ b/src/database/index/negative.rs
@ -1,43 +0,0 @@
-use std::error::Error;
-use std::io::Cursor;
-use std::ops::Deref;
-
-use sdset::Set;
-use byteorder::{LittleEndian, WriteBytesExt};
-
-use crate::data::SharedData;
-use crate::data::DocIds;
-use crate::DocumentId;
-
-#[derive(Default)]
-pub struct Negative(DocIds);
-
-impl Negative {
-    pub fn new(doc_ids: DocIds) -> Negative {
-        Negative(doc_ids)
-    }
-
-    pub fn from_cursor(cursor: &mut Cursor<SharedData>) -> Result<Negative, Box<Error>> {
-        let doc_ids = DocIds::from_cursor(cursor)?;
-        Ok(Negative(doc_ids))
-    }
-
-    pub fn write_to_bytes(&self, bytes: &mut Vec<u8>) {
-        let slice = self.0.as_bytes();
-        let len = slice.len() as u64;
-        let _ = bytes.write_u64::<LittleEndian>(len);
-        bytes.extend_from_slice(slice);
-    }
-
-    pub fn is_empty(&self) -> bool {
-        self.0.is_empty()
-    }
-}
-
-impl Deref for Negative {
-    type Target = Set<DocumentId>;
-
-    fn deref(&self) -> &Self::Target {
-        self.0.as_ref()
-    }
-}
--- a/src/database/mod.rs
+++ b/src/database/mod.rs
@ -1,27 +1,48 @@
-use std::sync::{Arc, Mutex};
+use std::time::Instant;
 use std::error::Error;
-use std::ops::Deref;
-use std::path::Path;
+use std::ffi::OsStr;
+use std::sync::Arc;
+use std::fs;
+use std::path::{Path, PathBuf};
+use std::sync::atomic::{AtomicBool, Ordering};
+use std::ops::{Deref, DerefMut};

-use rocksdb::rocksdb_options::{DBOptions, IngestExternalFileOptions, ColumnFamilyOptions};
+use rocksdb::rocksdb_options::{DBOptions, ColumnFamilyOptions};
 use rocksdb::rocksdb::{Writable, Snapshot};
-use rocksdb::{DB, DBVector, MergeOperands};
-use crossbeam::atomic::ArcCell;
-use log::debug;
+use rocksdb::{DB, MergeOperands};
+use size_format::SizeFormatterBinary;
+use arc_swap::ArcSwap;
+use lockfree::map::Map;
+use hashbrown::HashMap;
+use log::{info, error, warn};

+use crate::database::schema::SchemaAttr;
+use crate::shared_data_cursor::FromSharedDataCursor;
+use crate::write_to_bytes::WriteToBytes;
+use crate::DocumentId;
+
+use self::update::{ReadIndexEvent, ReadRankedMapEvent};
+
+pub use self::config::Config;
 pub use self::document_key::{DocumentKey, DocumentKeyAttr};
 pub use self::view::{DatabaseView, DocumentIter};
-pub use self::update::{Update, UpdateBuilder};
+pub use self::update::Update;
 pub use self::serde::SerializerError;
 pub use self::schema::Schema;
 pub use self::index::Index;
+pub use self::number::{Number, ParseNumberError};

-const DATA_INDEX:  &[u8] = b"data-index";
-const DATA_SCHEMA: &[u8] = b"data-schema";
+pub type RankedMap = HashMap<(DocumentId, SchemaAttr), Number>;

+const DATA_INDEX:      &[u8] = b"data-index";
+const DATA_RANKED_MAP: &[u8] = b"data-ranked-map";
+const DATA_SCHEMA:     &[u8] = b"data-schema";
+const CONFIG:          &[u8] = b"config";
+
+pub mod config;
 pub mod schema;
 pub(crate) mod index;
-mod deserializer;
+mod number;
 mod document_key;
 mod serde;
 mod update;
@ -39,64 +60,150 @@ where D: Deref<Target=DB>
 fn retrieve_data_index<D>(snapshot: &Snapshot<D>) -> Result<Index, Box<Error>>
 where D: Deref<Target=DB>
 {
-    let index = match snapshot.get(DATA_INDEX)? {
-        Some(vector) => {
-            let bytes = vector.as_ref().to_vec();
-            Index::from_bytes(bytes)?
-        },
-        None => Index::default(),
-    };
+    let start = Instant::now();
+    let vector = snapshot.get(DATA_INDEX)?;
+    info!("loading index from kv-store took {:.2?}", start.elapsed());

-    Ok(index)
+    match vector {
+        Some(vector) => {
+            let start = Instant::now();
+
+            let bytes = vector.as_ref().to_vec();
+            info!("index size is {}B", SizeFormatterBinary::new(bytes.len() as u64));
+
+            let event = ReadIndexEvent::from_bytes(bytes)?;
+            let index = event.updated_documents().expect("BUG: invalid event deserialized");
+
+            info!("loading index from bytes took {:.2?}", start.elapsed());
+
+            Ok(index)
+        },
+        None => Ok(Index::default()),
+    }
 }

-fn merge_indexes(key: &[u8], existing: Option<&[u8]>, operands: &mut MergeOperands) -> Vec<u8> {
-    assert_eq!(key, DATA_INDEX, "The merge operator only supports \"data-index\" merging");
+fn retrieve_data_ranked_map<D>(snapshot: &Snapshot<D>) -> Result<RankedMap, Box<Error>>
+where D: Deref<Target=DB>,
+{
+    let start = Instant::now();
+    let vector = snapshot.get(DATA_RANKED_MAP)?;
+    info!("loading ranked map from kv-store took {:.2?}", start.elapsed());

-    let mut index: Option<Index> = None;
+    match vector {
+        Some(vector) => {
+            let start = Instant::now();
+
+            let bytes = vector.as_ref().to_vec();
+            info!("ranked map size is {}B", SizeFormatterBinary::new(bytes.len() as u64));
+
+            let event = ReadRankedMapEvent::from_bytes(bytes)?;
+            let ranked_map = event.updated_documents().expect("BUG: invalid event deserialized");
+
+            info!("loading ranked map from bytes took {:.2?}", start.elapsed());
+
+            Ok(ranked_map)
+        },
+        None => Ok(RankedMap::new()),
+    }
+}
+
+fn retrieve_config<D>(snapshot: &Snapshot<D>) -> Result<Config, Box<Error>>
+where D: Deref<Target=DB>,
+{
+    match snapshot.get(CONFIG)? {
+        Some(vector) => Ok(bincode::deserialize(&*vector)?),
+        None => Ok(Config::default()),
+    }
+}
+
+fn merge_indexes(existing: Option<&[u8]>, operands: &mut MergeOperands) -> Vec<u8> {
+    use self::update::ReadIndexEvent::{self, *};
+    use self::update::WriteIndexEvent;
+
+    let mut index = Index::default();
    for bytes in existing.into_iter().chain(operands) {
-        let operand = Index::from_bytes(bytes.to_vec()).unwrap();
-        let merged = match index {
-            Some(ref index) => index.merge(&operand).unwrap(),
-            None            => operand,
-        };
-
-        index.replace(merged);
+        match ReadIndexEvent::from_bytes(bytes.to_vec()).unwrap() {
+            RemovedDocuments(d) => index = index.remove_documents(d.as_ref()),
+            UpdatedDocuments(i) => index = index.union(&i),
+        }
    }

-    let index = index.unwrap_or_default();
-    let mut bytes = Vec::new();
-    index.write_to_bytes(&mut bytes);
-    bytes
+    WriteIndexEvent::UpdatedDocuments(&index).into_bytes()
 }

-pub struct Database {
-    // DB is under a Mutex to sync update ingestions and separate DB update locking
-    // and DatabaseView acquiring locking in other words:
-    // "Block readers the minimum possible amount of time"
-    db: Mutex<Arc<DB>>,
+fn merge_ranked_maps(existing: Option<&[u8]>, operands: &mut MergeOperands) -> Vec<u8> {
+    use self::update::ReadRankedMapEvent::{self, *};
+    use self::update::WriteRankedMapEvent;

-    // This view is updated each time the DB ingests an update
-    view: ArcCell<DatabaseView<Arc<DB>>>,
+    let mut ranked_map = RankedMap::default();
+    for bytes in existing.into_iter().chain(operands) {
+        match ReadRankedMapEvent::from_bytes(bytes.to_vec()).unwrap() {
+            RemovedDocuments(d) => ranked_map.retain(|(k, _), _| !d.as_ref().binary_search(k).is_ok()),
+            UpdatedDocuments(i) => ranked_map.extend(i),
+        }
+    }
+
+    WriteRankedMapEvent::UpdatedDocuments(&ranked_map).into_bytes()
 }

-impl Database {
-    pub fn create<P: AsRef<Path>>(path: P, schema: &Schema) -> Result<Database, Box<Error>> {
+fn merge_operator(key: &[u8], existing: Option<&[u8]>, operands: &mut MergeOperands) -> Vec<u8> {
+    match key {
+        DATA_INDEX      => merge_indexes(existing, operands),
+        DATA_RANKED_MAP => merge_ranked_maps(existing, operands),
+        key             => panic!("The merge operator does not support merging {:?}", key),
+    }
+}
+
+pub struct IndexUpdate {
+    index: String,
+    update: Update,
+}
+
+impl Deref for IndexUpdate {
+    type Target = Update;
+
+    fn deref(&self) -> &Update {
+        &self.update
+    }
+}
+
+impl DerefMut for IndexUpdate {
+    fn deref_mut(&mut self) -> &mut Update {
+        &mut self.update
+    }
+}
+
+struct DatabaseIndex {
+    db: Arc<DB>,
+
+    // This view is updated each time the DB ingests an update.
+    view: ArcSwap<DatabaseView<Arc<DB>>>,
+
+    // The path of the mdb folder stored on disk.
+    path: PathBuf,
+
+    // must_die false by default, must be set as true when the Index is dropped.
+    // It is used to erase the folder saved on disk when the user request to delete an index.
+    must_die: AtomicBool,
+}
+
+impl DatabaseIndex {
+    fn create<P: AsRef<Path>>(path: P, schema: &Schema) -> Result<DatabaseIndex, Box<Error>> {
        let path = path.as_ref();
        if path.exists() {
            return Err(format!("File already exists at path: {}, cannot create database.",
                                path.display()).into())
        }

-        let path = path.to_string_lossy();
+        let path_lossy = path.to_string_lossy();
        let mut opts = DBOptions::new();
        opts.create_if_missing(true);
        // opts.error_if_exists(true); // FIXME pull request that

        let mut cf_opts = ColumnFamilyOptions::new();
-        cf_opts.add_merge_operator("data-index merge operator", merge_indexes);
+        cf_opts.add_merge_operator("data merge operator", merge_operator);

-        let db = DB::open_cf(opts, &path, vec![("default", cf_opts)])?;
+        let db = DB::open_cf(opts, &path_lossy, vec![("default", cf_opts)])?;

        let mut schema_bytes = Vec::new();
        schema.write_to_bin(&mut schema_bytes)?;
@ -104,21 +211,26 @@ impl Database {

        let db = Arc::new(db);
        let snapshot = Snapshot::new(db.clone());
-        let view = ArcCell::new(Arc::new(DatabaseView::new(snapshot)?));
+        let view = ArcSwap::new(Arc::new(DatabaseView::new(snapshot)?));

-        Ok(Database { db: Mutex::new(db), view })
+        Ok(DatabaseIndex {
+            db: db,
+            view: view,
+            path: path.to_path_buf(),
+            must_die: AtomicBool::new(false)
+        })
    }

-    pub fn open<P: AsRef<Path>>(path: P) -> Result<Database, Box<Error>> {
-        let path = path.as_ref().to_string_lossy();
+    fn open<P: AsRef<Path>>(path: P) -> Result<DatabaseIndex, Box<Error>> {
+        let path_lossy = path.as_ref().to_string_lossy();

        let mut opts = DBOptions::new();
        opts.create_if_missing(false);

        let mut cf_opts = ColumnFamilyOptions::new();
-        cf_opts.add_merge_operator("data-index merge operator", merge_indexes);
+        cf_opts.add_merge_operator("data merge operator", merge_operator);

-        let db = DB::open_cf(opts, &path, vec![("default", cf_opts)])?;
+        let db = DB::open_cf(opts, &path_lossy, vec![("default", cf_opts)])?;

        // FIXME create a generic function to do that !
        let _schema = match db.get(DATA_SCHEMA)? {
@ -128,79 +240,209 @@ impl Database {

        let db = Arc::new(db);
        let snapshot = Snapshot::new(db.clone());
-        let view = ArcCell::new(Arc::new(DatabaseView::new(snapshot)?));
+        let view = ArcSwap::new(Arc::new(DatabaseView::new(snapshot)?));

-        Ok(Database { db: Mutex::new(db), view })
+        Ok(DatabaseIndex {
+            db: db,
+            view: view,
+            path: path.as_ref().to_path_buf(),
+            must_die: AtomicBool::new(false)
+        })
    }

-    pub fn ingest_update_file(&self, update: Update) -> Result<Arc<DatabaseView<Arc<DB>>>, Box<Error>> {
-        let snapshot = {
-            // We must have a mutex here to ensure that update ingestions and compactions
-            // are done atomatically and in the right order.
-            // This way update ingestions will block other update ingestions without blocking view
-            // creations while doing the "data-index" compaction
-            let db = match self.db.lock() {
-                Ok(db) => db,
-                Err(e) => return Err(e.to_string().into()),
-            };
+    fn must_die(&self) {
+        self.must_die.store(true, Ordering::Relaxed)
+    }

-            let path = update.path().to_string_lossy();
-            let options = IngestExternalFileOptions::new();
-            // options.move_files(move_update);
-
-            debug!("ingest update file");
-            let cf_handle = db.cf_handle("default").expect("\"default\" column family not found");
-            db.ingest_external_file_optimized(&cf_handle, &options, &[&path])?;
-
-            debug!("compacting index range");
-            // Compacting to trigger the merge operator only one time
-            // while ingesting the update and not each time searching
-            db.compact_range(Some(DATA_INDEX), Some(DATA_INDEX));
-
-            Snapshot::new(db.clone())
+    fn start_update(&self) -> Result<Update, Box<Error>> {
+        let schema = match self.db.get(DATA_SCHEMA)? {
+            Some(value) => Schema::read_from_bin(&*value)?,
+            None => panic!("Database does not contain a schema"),
        };

+        Ok(Update::new(schema))
+    }
+
+    fn commit_update(&self, update: Update) -> Result<Arc<DatabaseView<Arc<DB>>>, Box<Error>> {
+        let batch = update.build()?;
+        self.db.write(batch)?;
+
+        let snapshot = Snapshot::new(self.db.clone());
        let view = Arc::new(DatabaseView::new(snapshot)?);
-        self.view.set(view.clone());
+        self.view.store(view.clone());

        Ok(view)
    }

-    pub fn get(&self, key: &[u8]) -> Result<Option<DBVector>, Box<Error>> {
-        self.view().get(key)
+    fn view(&self) -> Arc<DatabaseView<Arc<DB>>> {
+        self.view.load()
    }

-    pub fn flush(&self) -> Result<(), Box<Error>> {
-        match self.db.lock() {
-            Ok(db) => Ok(db.flush(true)?),
-            Err(e) => Err(e.to_string().into()),
+    fn get_config(&self) -> Config {
+        self.view().config().clone()
+    }
+
+    fn update_config(&self, config: Config) -> Result<Arc<DatabaseView<Arc<DB>>>, Box<Error>>{
+        let data = bincode::serialize(&config)?;
+        self.db.put(CONFIG, &data)?;
+
+        let snapshot = Snapshot::new(self.db.clone());
+        let view = Arc::new(DatabaseView::new(snapshot)?);
+        self.view.store(view.clone());
+
+        Ok(view)
+    }
+
+    fn path(&self) -> &Path {
+        self.path.as_path()
+    }
+}
+
+impl Drop for DatabaseIndex {
+    fn drop(&mut self) {
+        if self.must_die.load(Ordering::Relaxed) {
+            if let Err(err) = fs::remove_dir_all(&self.path) {
+                error!("Impossible to remove mdb when Database is dropped; {}", err);
+            }
        }
    }
+}

-    pub fn view(&self) -> Arc<DatabaseView<Arc<DB>>> {
-        self.view.get()
+pub struct Database {
+    indexes: Map<String, Arc<DatabaseIndex>>,
+    path: PathBuf,
+}
+
+impl Database {
+    pub fn create<P: AsRef<Path>>(path: P) -> Result<Database, Box<Error>> {
+        Ok(Database {
+            indexes: Map::new(),
+            path: path.as_ref().to_path_buf(),
+        })
    }
+
+    pub fn open<P: AsRef<Path>>(path: P) -> Result<Database, Box<Error>> {
+        let entries = fs::read_dir(&path)?;
+
+        let indexes = Map::new();
+        for entry in entries {
+            let path = match entry {
+                Ok(p) => p.path(),
+                Err(err) => {
+                    warn!("Impossible to retrieve the path from an entry; {}", err);
+                    continue
+                }
+            };
+
+            let name = match path.file_stem().and_then(OsStr::to_str) {
+                Some(name) => name.to_owned(),
+                None => continue
+            };
+
+            let db = match DatabaseIndex::open(path.clone()) {
+                Ok(db) => db,
+                Err(err) => {
+                    warn!("Impossible to open the database; {}", err);
+                    continue
+                }
+            };
+
+            info!("Load database {}", name);
+            indexes.insert(name, Arc::new(db));
+        }
+
+        Ok(Database {
+            indexes: indexes,
+            path: path.as_ref().to_path_buf(),
+        })
+    }
+
+    pub fn create_index(&self, name: &str, schema: &Schema) -> Result<(), Box<Error>> {
+        let index_path = self.path.join(name);
+
+        if index_path.exists() {
+            return Err("Index already exists".into());
+        }
+
+        let index = DatabaseIndex::create(index_path, schema)?;
+        self.indexes.insert(name.to_owned(), Arc::new(index));
+
+        Ok(())
+    }
+
+    pub fn delete_index(&self, name: &str) -> Result<(), Box<Error>> {
+        let index_guard = self.indexes.remove(name).ok_or("Index not found")?;
+        index_guard.val().must_die();
+
+        Ok(())
+    }
+
+    pub fn list_indexes(&self) -> Vec<String> {
+        self.indexes.iter().map(|g| g.key().clone()).collect()
+    }
+
+    pub fn start_update(&self, index: &str) -> Result<IndexUpdate, Box<Error>> {
+        let index_guard = self.indexes.get(index).ok_or("Index not found")?;
+        let update = index_guard.val().start_update()?;
+
+        Ok(IndexUpdate { index: index.to_owned(), update })
+    }
+
+    pub fn commit_update(&self, update: IndexUpdate)-> Result<Arc<DatabaseView<Arc<DB>>>, Box<Error>> {
+        let index_guard = self.indexes.get(&update.index).ok_or("Index not found")?;
+
+        index_guard.val().commit_update(update.update)
+    }
+
+    pub fn view(&self, index: &str) -> Result<Arc<DatabaseView<Arc<DB>>>, Box<Error>> {
+        let index_guard = self.indexes.get(index).ok_or("Index not found")?;
+
+        Ok(index_guard.val().view())
+    }
+
+    pub fn get_config(&self, index: &str) -> Result<Config, Box<Error>> {
+        let index_guard = self.indexes.get(index).ok_or("Index not found")?;
+
+        Ok(index_guard.val().get_config())
+    }
+
+    pub fn update_config(&self, index: &str, config: Config) -> Result<Arc<DatabaseView<Arc<DB>>>, Box<Error>>{
+        let index_guard = self.indexes.get(index).ok_or("Index not found")?;
+
+        Ok(index_guard.val().update_config(config)?)
+    }
+
+    pub fn path(&self) -> &Path {
+        self.path.as_path()
+    }
+
+    pub fn index_path(&self, index: &str) -> Result<PathBuf, Box<Error>> {
+        let index_guard = self.indexes.get(index).ok_or("Index not found")?;
+        let path = index_guard.val().path();
+        Ok(path.to_path_buf())
+    }
+
 }

 #[cfg(test)]
 mod tests {
-    use super::*;
+    use std::collections::HashSet;
    use std::error::Error;

    use serde_derive::{Serialize, Deserialize};
-    use hashbrown::HashSet;
-    use tempfile::tempdir;

    use crate::database::schema::{SchemaBuilder, STORED, INDEXED};
-    use crate::database::update::UpdateBuilder;
    use crate::tokenizer::DefaultBuilder;

+    use super::*;
+
    #[test]
-    fn ingest_one_update_file() -> Result<(), Box<Error>> {
-        let dir = tempdir()?;
+    fn ingest_one_easy_update() -> Result<(), Box<Error>> {
+        let dir = tempfile::tempdir()?;
        let stop_words = HashSet::new();

-        let rocksdb_path = dir.path().join("rocksdb.rdb");
+        let meilidb_path = dir.path().join("meilidb.mdb");
+        let meilidb_index_name = "default";

        #[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)]
        struct SimpleDoc {
@ -219,9 +461,9 @@ mod tests {
            builder.build()
        };

-        let database = Database::create(&rocksdb_path, &schema)?;
+        let database = Database::create(&meilidb_path)?;

-        let update_path = dir.path().join("update.sst");
+        database.create_index(meilidb_index_name, &schema)?;

        let doc0 = SimpleDoc {
            id: 0,
@ -236,20 +478,13 @@ mod tests {
            timestamp: 7654321,
        };

-        let docid0;
-        let docid1;
-        let update = {
-            let tokenizer_builder = DefaultBuilder::new();
-            let mut builder = UpdateBuilder::new(update_path, schema);
+        let tokenizer_builder = DefaultBuilder::new();
+        let mut builder = database.start_update(meilidb_index_name)?;

-            docid0 = builder.update_document(&doc0, &tokenizer_builder, &stop_words)?;
-            docid1 = builder.update_document(&doc1, &tokenizer_builder, &stop_words)?;
+        let docid0 = builder.update_document(&doc0, &tokenizer_builder, &stop_words)?;
+        let docid1 = builder.update_document(&doc1, &tokenizer_builder, &stop_words)?;

-            builder.build()?
-        };
-
-        database.ingest_update_file(update)?;
-        let view = database.view();
+        let view = database.commit_update(builder)?;

        let de_doc0: SimpleDoc = view.document_by_id(docid0)?;
        let de_doc1: SimpleDoc = view.document_by_id(docid1)?;
@ -261,11 +496,12 @@ mod tests {
    }

    #[test]
-    fn ingest_two_update_files() -> Result<(), Box<Error>> {
-        let dir = tempdir()?;
+    fn ingest_two_easy_updates() -> Result<(), Box<Error>> {
+        let dir = tempfile::tempdir()?;
        let stop_words = HashSet::new();

-        let rocksdb_path = dir.path().join("rocksdb.rdb");
+        let meilidb_path = dir.path().join("meilidb.mdb");
+        let meilidb_index_name = "default";

        #[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)]
        struct SimpleDoc {
@ -284,7 +520,9 @@ mod tests {
            builder.build()
        };

-        let database = Database::create(&rocksdb_path, &schema)?;
+        let database = Database::create(&meilidb_path)?;
+
+        database.create_index(meilidb_index_name, &schema)?;

        let doc0 = SimpleDoc {
            id: 0,
@ -311,36 +549,17 @@ mod tests {
            timestamp: 7654321,
        };

-        let docid0;
-        let docid1;
-        let update1 = {
-            let tokenizer_builder = DefaultBuilder::new();
-            let update_path = dir.path().join("update-000.sst");
-            let mut builder = UpdateBuilder::new(update_path, schema.clone());
+        let tokenizer_builder = DefaultBuilder::new();

-            docid0 = builder.update_document(&doc0, &tokenizer_builder, &stop_words)?;
-            docid1 = builder.update_document(&doc1, &tokenizer_builder, &stop_words)?;
+        let mut builder = database.start_update(meilidb_index_name)?;
+        let docid0 = builder.update_document(&doc0, &tokenizer_builder, &stop_words)?;
+        let docid1 = builder.update_document(&doc1, &tokenizer_builder, &stop_words)?;
+        database.commit_update(builder)?;

-            builder.build()?
-        };
-
-        let docid2;
-        let docid3;
-        let update2 = {
-            let tokenizer_builder = DefaultBuilder::new();
-            let update_path = dir.path().join("update-001.sst");
-            let mut builder = UpdateBuilder::new(update_path, schema);
-
-            docid2 = builder.update_document(&doc2, &tokenizer_builder, &stop_words)?;
-            docid3 = builder.update_document(&doc3, &tokenizer_builder, &stop_words)?;
-
-            builder.build()?
-        };
-
-        database.ingest_update_file(update1)?;
-        database.ingest_update_file(update2)?;
-
-        let view = database.view();
+        let mut builder = database.start_update(meilidb_index_name)?;
+        let docid2 = builder.update_document(&doc2, &tokenizer_builder, &stop_words)?;
+        let docid3 = builder.update_document(&doc3, &tokenizer_builder, &stop_words)?;
+        let view = database.commit_update(builder)?;

        let de_doc0: SimpleDoc = view.document_by_id(docid0)?;
        let de_doc1: SimpleDoc = view.document_by_id(docid1)?;
@ -362,7 +581,7 @@ mod tests {
 mod bench {
    extern crate test;

-    use super::*;
+    use std::collections::HashSet;
    use std::error::Error;
    use std::iter::repeat_with;
    use self::test::Bencher;
@ -372,12 +591,12 @@ mod bench {
    use rand::{Rng, SeedableRng};
    use serde_derive::Serialize;
    use rand::seq::SliceRandom;
-    use hashbrown::HashSet;

    use crate::tokenizer::DefaultBuilder;
-    use crate::database::update::UpdateBuilder;
    use crate::database::schema::*;

+    use super::*;
+
    fn random_sentences<R: Rng>(number: usize, rng: &mut R) -> String {
        let mut words = String::new();

@ -409,7 +628,10 @@ mod bench {
        let schema = builder.build();

        let db_path = dir.path().join("bench.mdb");
-        let database = Database::create(db_path.clone(), &schema)?;
+        let index_name = "default";
+
+        let database = Database::create(&db_path)?;
+        database.create_index(index_name, &schema)?;

        #[derive(Serialize)]
        struct Document {
@ -418,9 +640,8 @@ mod bench {
            description: String,
        }

-        let path = dir.path().join("update-000.sst");
        let tokenizer_builder = DefaultBuilder;
-        let mut builder = UpdateBuilder::new(path, schema);
+        let mut builder = database.start_update(index_name)?;
        let mut rng = XorShiftRng::seed_from_u64(42);

        for i in 0..300 {
@ -432,8 +653,7 @@ mod bench {
            builder.update_document(&document, &tokenizer_builder, &stop_words)?;
        }

-        let update = builder.build()?;
-        database.ingest_update_file(update)?;
+        database.commit_update(builder)?;

        drop(database);

@ -456,7 +676,10 @@ mod bench {
        let schema = builder.build();

        let db_path = dir.path().join("bench.mdb");
-        let database = Database::create(db_path.clone(), &schema)?;
+        let index_name = "default";
+
+        let database = Database::create(&db_path)?;
+        database.create_index(index_name, &schema)?;

        #[derive(Serialize)]
        struct Document {
@ -465,9 +688,8 @@ mod bench {
            description: String,
        }

-        let path = dir.path().join("update-000.sst");
        let tokenizer_builder = DefaultBuilder;
-        let mut builder = UpdateBuilder::new(path, schema);
+        let mut builder = database.start_update(index_name)?;
        let mut rng = XorShiftRng::seed_from_u64(42);

        for i in 0..3000 {
@ -479,8 +701,7 @@ mod bench {
            builder.update_document(&document, &tokenizer_builder, &stop_words)?;
        }

-        let update = builder.build()?;
-        database.ingest_update_file(update)?;
+        database.commit_update(builder)?;

        drop(database);

@ -504,7 +725,10 @@ mod bench {
        let schema = builder.build();

        let db_path = dir.path().join("bench.mdb");
-        let database = Database::create(db_path.clone(), &schema)?;
+        let index_name = "default";
+
+        let database = Database::create(&db_path)?;
+        database.create_index(index_name, &schema)?;

        #[derive(Serialize)]
        struct Document {
@ -513,9 +737,8 @@ mod bench {
            description: String,
        }

-        let path = dir.path().join("update-000.sst");
        let tokenizer_builder = DefaultBuilder;
-        let mut builder = UpdateBuilder::new(path, schema);
+        let mut builder = database.start_update(index_name)?;
        let mut rng = XorShiftRng::seed_from_u64(42);

        for i in 0..30_000 {
@ -527,8 +750,7 @@ mod bench {
            builder.update_document(&document, &tokenizer_builder, &stop_words)?;
        }

-        let update = builder.build()?;
-        database.ingest_update_file(update)?;
+        database.commit_update(builder)?;

        drop(database);

@ -551,7 +773,10 @@ mod bench {
        let schema = builder.build();

        let db_path = dir.path().join("bench.mdb");
-        let database = Database::create(db_path.clone(), &schema)?;
+        let index_name = "default";
+
+        let database = Database::create(&db_path)?;
+        database.create_index(index_name, &schema)?;

        #[derive(Serialize)]
        struct Document {
@ -560,9 +785,8 @@ mod bench {
            description: String,
        }

-        let path = dir.path().join("update-000.sst");
        let tokenizer_builder = DefaultBuilder;
-        let mut builder = UpdateBuilder::new(path, schema);
+        let mut builder = database.start_update(index_name)?;
        let mut rng = XorShiftRng::seed_from_u64(42);

        for i in 0..300 {
@ -574,12 +798,11 @@ mod bench {
            builder.update_document(&document, &tokenizer_builder, &stop_words)?;
        }

-        let update = builder.build()?;
-        let view = database.ingest_update_file(update)?;
+        let view = database.commit_update(builder)?;

        bench.iter(|| {
            for q in &["a", "b", "c", "d", "e"] {
-                let documents = view.query_builder().unwrap().query(q, 0..20);
+                let documents = view.query_builder().query(q, 0..20);
                test::black_box(|| documents);
            }
        });
@ -598,7 +821,10 @@ mod bench {
        let schema = builder.build();

        let db_path = dir.path().join("bench.mdb");
-        let database = Database::create(db_path.clone(), &schema)?;
+        let index_name = "default";
+
+        let database = Database::create(&db_path)?;
+        database.create_index(index_name, &schema)?;

        #[derive(Serialize)]
        struct Document {
@ -607,9 +833,8 @@ mod bench {
            description: String,
        }

-        let path = dir.path().join("update-000.sst");
        let tokenizer_builder = DefaultBuilder;
-        let mut builder = UpdateBuilder::new(path, schema);
+        let mut builder = database.start_update(index_name)?;
        let mut rng = XorShiftRng::seed_from_u64(42);

        for i in 0..3000 {
@ -621,12 +846,11 @@ mod bench {
            builder.update_document(&document, &tokenizer_builder, &stop_words)?;
        }

-        let update = builder.build()?;
-        let view = database.ingest_update_file(update)?;
+        let view = database.commit_update(builder)?;

        bench.iter(|| {
            for q in &["a", "b", "c", "d", "e"] {
-                let documents = view.query_builder().unwrap().query(q, 0..20);
+                let documents = view.query_builder().query(q, 0..20);
                test::black_box(|| documents);
            }
        });
@ -646,7 +870,10 @@ mod bench {
        let schema = builder.build();

        let db_path = dir.path().join("bench.mdb");
-        let database = Database::create(db_path.clone(), &schema)?;
+        let index_name = "default";
+
+        let database = Database::create(&db_path)?;
+        database.create_index(index_name, &schema)?;

        #[derive(Serialize)]
        struct Document {
@ -655,9 +882,8 @@ mod bench {
            description: String,
        }

-        let path = dir.path().join("update-000.sst");
        let tokenizer_builder = DefaultBuilder;
-        let mut builder = UpdateBuilder::new(path, schema);
+        let mut builder = database.start_update(index_name)?;
        let mut rng = XorShiftRng::seed_from_u64(42);

        for i in 0..30_000 {
@ -669,12 +895,11 @@ mod bench {
            builder.update_document(&document, &tokenizer_builder, &stop_words)?;
        }

-        let update = builder.build()?;
-        let view = database.ingest_update_file(update)?;
+        let view = database.commit_update(builder)?;

        bench.iter(|| {
            for q in &["a", "b", "c", "d", "e"] {
-                let documents = view.query_builder().unwrap().query(q, 0..20);
+                let documents = view.query_builder().query(q, 0..20);
                test::black_box(|| documents);
            }
        });
--- a/src/database/number.rs
+++ b/src/database/number.rs
@ -0,0 +1,98 @@
+use std::cmp::Ordering;
+use std::str::FromStr;
+use std::fmt;
+
+use serde_derive::{Serialize, Deserialize};
+
+#[derive(Serialize, Deserialize)]
+#[derive(Debug, Copy, Clone)]
+pub enum Number {
+    Unsigned(u64),
+    Signed(i64),
+    Float(f64),
+}
+
+impl FromStr for Number {
+    type Err = ParseNumberError;
+
+    fn from_str(s: &str) -> Result<Self, Self::Err> {
+        if let Ok(unsigned) = u64::from_str(s) {
+            return Ok(Number::Unsigned(unsigned))
+        }
+
+        if let Ok(signed) = i64::from_str(s) {
+            return Ok(Number::Signed(signed))
+        }
+
+        if let Ok(float) = f64::from_str(s) {
+            if float == 0.0 || float.is_normal() {
+                return Ok(Number::Float(float))
+            }
+        }
+
+        Err(ParseNumberError)
+    }
+}
+
+impl PartialOrd for Number {
+    fn partial_cmp(&self, other: &Number) -> Option<Ordering> {
+        Some(self.cmp(other))
+    }
+}
+
+impl Ord for Number {
+    fn cmp(&self, other: &Number) -> Ordering {
+        use Number::*;
+        match (self, other) {
+            (Unsigned(s), Unsigned(o)) => s.cmp(o),
+            (Unsigned(s), Signed(o)) => {
+                let s = i128::from(*s);
+                let o = i128::from(*o);
+                s.cmp(&o)
+            },
+            (Unsigned(s), Float(o)) => {
+                let s = *s as f64;
+                s.partial_cmp(&o).unwrap_or(Ordering::Equal)
+            },
+
+            (Signed(s), Unsigned(o)) => {
+                let s = i128::from(*s);
+                let o = i128::from(*o);
+                s.cmp(&o)
+            },
+            (Signed(s), Signed(o)) => s.cmp(o),
+            (Signed(s), Float(o)) => {
+                let s = *s as f64;
+                s.partial_cmp(o).unwrap_or(Ordering::Equal)
+            },
+
+            (Float(s), Unsigned(o)) => {
+                let o = *o as f64;
+                s.partial_cmp(&o).unwrap_or(Ordering::Equal)
+            },
+            (Float(s), Signed(o)) => {
+                let o = *o as f64;
+                s.partial_cmp(&o).unwrap_or(Ordering::Equal)
+            },
+            (Float(s), Float(o)) => {
+                s.partial_cmp(o).unwrap_or(Ordering::Equal)
+            },
+        }
+    }
+}
+
+impl PartialEq for Number {
+    fn eq(&self, other: &Number) -> bool {
+        self.cmp(other) == Ordering::Equal
+    }
+}
+
+impl Eq for Number { }
+
+pub struct ParseNumberError;
+
+impl fmt::Display for ParseNumberError {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        f.write_str("can not parse number")
+    }
+}
--- a/src/database/schema.rs
+++ b/src/database/schema.rs
@ -7,14 +7,14 @@ use std::sync::Arc;

 use serde_derive::{Serialize, Deserialize};
 use linked_hash_map::LinkedHashMap;
-use serde::Serialize;

 use crate::database::serde::find_id::FindDocumentIdSerializer;
 use crate::database::serde::SerializerError;
 use crate::DocumentId;

-pub const STORED: SchemaProps = SchemaProps { stored: true, indexed: false };
-pub const INDEXED: SchemaProps = SchemaProps { stored: false, indexed: true };
+pub const STORED: SchemaProps  = SchemaProps { stored: true,  indexed: false, ranked: false };
+pub const INDEXED: SchemaProps = SchemaProps { stored: false, indexed: true,  ranked: false };
+pub const RANKED: SchemaProps  = SchemaProps { stored: false, indexed: false, ranked: true  };

 #[derive(Debug, Copy, Clone, PartialEq, Eq, Serialize, Deserialize)]
 pub struct SchemaProps {
@ -23,6 +23,9 @@ pub struct SchemaProps {

    #[serde(default)]
    indexed: bool,
+
+    #[serde(default)]
+    ranked: bool,
 }

 impl SchemaProps {
@ -33,6 +36,10 @@ impl SchemaProps {
    pub fn is_indexed(self) -> bool {
        self.indexed
    }
+
+    pub fn is_ranked(self) -> bool {
+        self.ranked
+    }
 }

 impl BitOr for SchemaProps {
@ -42,6 +49,7 @@ impl BitOr for SchemaProps {
        SchemaProps {
            stored: self.stored | other.stored,
            indexed: self.indexed | other.indexed,
+            ranked: self.ranked | other.ranked,
        }
    }
 }
@ -113,6 +121,23 @@ impl Schema {
        Ok(())
    }

+    pub fn from_json<R: Read>(mut reader: R) -> Result<Schema, Box<Error>> {
+        let mut buffer = Vec::new();
+        reader.read_to_end(&mut buffer)?;
+        let builder: SchemaBuilder = serde_json::from_slice(&buffer)?;
+        Ok(builder.build())
+    }
+
+    pub fn to_json<W: Write>(&self, mut writer: W) -> Result<(), Box<Error>> {
+        let identifier = self.inner.identifier.clone();
+        let attributes = self.attributes_ordered();
+        let builder = SchemaBuilder { identifier, attributes };
+        let string = serde_json::to_string_pretty(&builder)?;
+        writer.write_all(string.as_bytes())?;
+
+        Ok(())
+    }
+
    pub(crate) fn read_from_bin<R: Read>(reader: R) -> bincode::Result<Schema> {
        let builder: SchemaBuilder = bincode::deserialize_from(reader)?;
        Ok(builder.build())
@ -142,7 +167,7 @@ impl Schema {
    }

    pub fn document_id<T>(&self, document: T) -> Result<DocumentId, SerializerError>
-    where T: Serialize,
+    where T: serde::Serialize,
    {
        let id_attribute_name = &self.inner.identifier;
        let serializer = FindDocumentIdSerializer { id_attribute_name };
@ -168,7 +193,8 @@ impl Schema {
    }
 }

-#[derive(Debug, Copy, Clone, PartialOrd, Ord, PartialEq, Eq)]
+#[derive(Serialize, Deserialize)]
+#[derive(Debug, Copy, Clone, PartialOrd, Ord, PartialEq, Eq, Hash)]
 pub struct SchemaAttr(pub(crate) u16);

 impl SchemaAttr {
@ -254,4 +280,40 @@ mod tests {

        Ok(())
    }
+
+    #[test]
+    fn serialize_deserialize_json() -> Result<(), Box<Error>> {
+        let mut builder = SchemaBuilder::with_identifier("id");
+        builder.new_attribute("alpha", STORED);
+        builder.new_attribute("beta", STORED | INDEXED);
+        builder.new_attribute("gamma", INDEXED);
+        let schema = builder.build();
+
+        let mut buffer = Vec::new();
+        schema.to_json(&mut buffer)?;
+
+        let schema2 = Schema::from_json(buffer.as_slice())?;
+        assert_eq!(schema, schema2);
+
+        let data = r#"
+            {
+                "identifier": "id",
+                "attributes": {
+                    "alpha": {
+                        "stored": true
+                    },
+                    "beta": {
+                        "stored": true,
+                        "indexed": true
+                    },
+                    "gamma": {
+                        "indexed": true
+                    }
+                }
+            }"#;
+        let schema2 = Schema::from_json(data.as_bytes())?;
+        assert_eq!(schema, schema2);
+
+        Ok(())
+    }
 }
--- a/src/database/serde/deserializer.rs
+++ b/src/database/serde/deserializer.rs
--- a/src/database/serde/indexer_serializer.rs
+++ b/src/database/serde/indexer_serializer.rs
@ -1,23 +1,24 @@
+use std::collections::HashSet;
+
+use serde::Serialize;
+use serde::ser;
+
 use crate::database::update::DocumentUpdate;
 use crate::database::serde::SerializerError;
 use crate::database::schema::SchemaAttr;
 use crate::tokenizer::TokenizerBuilder;
 use crate::tokenizer::Token;
-use crate::{DocumentId, DocIndex, Attribute, WordArea};
+use crate::{is_cjk, DocumentId, DocIndex};

-use hashbrown::HashSet;
-use serde::Serialize;
-use serde::ser;
-
-pub struct IndexerSerializer<'a, B> {
+pub struct IndexerSerializer<'a, 'b, B> {
    pub tokenizer_builder: &'a B,
-    pub update: &'a mut DocumentUpdate,
+    pub update: &'a mut DocumentUpdate<'b>,
    pub document_id: DocumentId,
    pub attribute: SchemaAttr,
    pub stop_words: &'a HashSet<String>,
 }

-impl<'a, B> ser::Serializer for IndexerSerializer<'a, B>
+impl<'a, 'b, B> ser::Serializer for IndexerSerializer<'a, 'b, B>
 where B: TokenizerBuilder
 {
    type Ok = ();
@ -54,10 +55,8 @@ where B: TokenizerBuilder
            let document_id = self.document_id;

            // FIXME must u32::try_from instead
-            let attribute = match Attribute::new(self.attribute.0, word_index as u32) {
-                Ok(attribute) => attribute,
-                Err(_) => return Ok(()),
-            };
+            let attribute = self.attribute.0;
+            let word_index = word_index as u32;

            // insert the exact representation
            let word_lower = word.to_lowercase();
@ -66,24 +65,23 @@ where B: TokenizerBuilder
            if self.stop_words.contains(&word_lower) { continue }

            // and the unidecoded lowercased version
-            let word_unidecoded = unidecode::unidecode(word).to_lowercase();
-            if word_lower != word_unidecoded {
-                let word_area = match WordArea::new(char_index as u32, length) {
-                    Ok(word_area) => word_area,
-                    Err(_) => return Ok(()),
-                };
+            if !word_lower.chars().any(is_cjk) {
+                let word_unidecoded = unidecode::unidecode(word).to_lowercase();
+                let word_unidecoded = word_unidecoded.trim();
+                if word_lower != word_unidecoded {
+                    let char_index = char_index as u32;
+                    let char_length = length;

-                let doc_index = DocIndex { document_id, attribute, word_area };
-                self.update.insert_doc_index(word_unidecoded.into_bytes(), doc_index);
+                    let doc_index = DocIndex { document_id, attribute, word_index, char_index, char_length };
+                    self.update.insert_doc_index(word_unidecoded.as_bytes().to_vec(), doc_index)?;
+                }
            }

-            let word_area = match WordArea::new(char_index as u32, length) {
-                Ok(word_area) => word_area,
-                Err(_) => return Ok(()),
-            };
+            let char_index = char_index as u32;
+            let char_length = length;

-            let doc_index = DocIndex { document_id, attribute, word_area };
-            self.update.insert_doc_index(word_lower.into_bytes(), doc_index);
+            let doc_index = DocIndex { document_id, attribute, word_index, char_index, char_length };
+            self.update.insert_doc_index(word_lower.into_bytes(), doc_index)?;
        }
        Ok(())
    }
--- a/src/database/serde/mod.rs
+++ b/src/database/serde/mod.rs
@ -17,8 +17,10 @@ macro_rules! forward_to_unserializable_type {

 pub mod find_id;
 pub mod key_to_string;
+pub mod value_to_number;
 pub mod serializer;
 pub mod indexer_serializer;
+pub mod deserializer;

 pub fn calculate_hash<T: Hash>(t: &T) -> u64 {
    let mut s = DefaultHasher::new();
@ -55,3 +57,9 @@ impl fmt::Display for SerializerError {
 }

 impl Error for SerializerError {}
+
+impl From<String> for SerializerError {
+    fn from(value: String) -> SerializerError {
+        SerializerError::Custom(value)
+    }
+}
--- a/src/database/serde/serializer.rs
+++ b/src/database/serde/serializer.rs
@ -1,24 +1,26 @@
-use hashbrown::HashSet;
+use std::collections::HashSet;
+
 use serde::Serialize;
 use serde::ser;

 use crate::database::serde::indexer_serializer::IndexerSerializer;
 use crate::database::serde::key_to_string::KeyToStringSerializer;
+use crate::database::serde::value_to_number::ValueToNumberSerializer;
 use crate::database::update::DocumentUpdate;
 use crate::database::serde::SerializerError;
 use crate::tokenizer::TokenizerBuilder;
 use crate::database::schema::Schema;
 use crate::DocumentId;

-pub struct Serializer<'a, B> {
+pub struct Serializer<'a, 'b, B> {
    pub schema: &'a Schema,
-    pub update: &'a mut DocumentUpdate,
+    pub update: &'a mut DocumentUpdate<'b>,
    pub document_id: DocumentId,
    pub tokenizer_builder: &'a B,
    pub stop_words: &'a HashSet<String>,
 }

-impl<'a, B> ser::Serializer for Serializer<'a, B>
+impl<'a, 'b, B> ser::Serializer for Serializer<'a, 'b, B>
 where B: TokenizerBuilder
 {
    type Ok = ();
@ -27,8 +29,8 @@ where B: TokenizerBuilder
    type SerializeTuple = ser::Impossible<Self::Ok, Self::Error>;
    type SerializeTupleStruct = ser::Impossible<Self::Ok, Self::Error>;
    type SerializeTupleVariant = ser::Impossible<Self::Ok, Self::Error>;
-    type SerializeMap = MapSerializer<'a, B>;
-    type SerializeStruct = StructSerializer<'a, B>;
+    type SerializeMap = MapSerializer<'a, 'b, B>;
+    type SerializeStruct = StructSerializer<'a, 'b, B>;
    type SerializeStructVariant = ser::Impossible<Self::Ok, Self::Error>;

    forward_to_unserializable_type! {
@ -154,8 +156,8 @@ where B: TokenizerBuilder
    {
        Ok(StructSerializer {
            schema: self.schema,
-            update: self.update,
            document_id: self.document_id,
+            update: self.update,
            tokenizer_builder: self.tokenizer_builder,
            stop_words: self.stop_words,
        })
@ -173,16 +175,16 @@ where B: TokenizerBuilder
    }
 }

-pub struct MapSerializer<'a, B> {
+pub struct MapSerializer<'a, 'b, B> {
    pub schema: &'a Schema,
    pub document_id: DocumentId,
-    pub update: &'a mut DocumentUpdate,
+    pub update: &'a mut DocumentUpdate<'b>,
    pub tokenizer_builder: &'a B,
    pub stop_words: &'a HashSet<String>,
    pub current_key_name: Option<String>,
 }

-impl<'a, B> ser::SerializeMap for MapSerializer<'a, B>
+impl<'a, 'b, B> ser::SerializeMap for MapSerializer<'a, 'b, B>
 where B: TokenizerBuilder
 {
    type Ok = ();
@ -206,7 +208,7 @@ where B: TokenizerBuilder
    fn serialize_entry<K: ?Sized, V: ?Sized>(
        &mut self,
        key: &K,
-        value: &V
+        value: &V,
    ) -> Result<(), Self::Error>
    where K: Serialize, V: Serialize,
    {
@ -216,7 +218,7 @@ where B: TokenizerBuilder
            let props = self.schema.props(attr);
            if props.is_stored() {
                let value = bincode::serialize(value).unwrap();
-                self.update.insert_attribute_value(attr, value);
+                self.update.insert_attribute_value(attr, &value)?;
            }
            if props.is_indexed() {
                let serializer = IndexerSerializer {
@ -228,6 +230,10 @@ where B: TokenizerBuilder
                };
                value.serialize(serializer)?;
            }
+            if props.is_ranked() {
+                let number = value.serialize(ValueToNumberSerializer)?;
+                self.update.register_ranked_attribute(attr, number)?;
+            }
        }

        Ok(())
@ -238,15 +244,15 @@ where B: TokenizerBuilder
    }
 }

-pub struct StructSerializer<'a, B> {
+pub struct StructSerializer<'a, 'b, B> {
    pub schema: &'a Schema,
    pub document_id: DocumentId,
-    pub update: &'a mut DocumentUpdate,
+    pub update: &'a mut DocumentUpdate<'b>,
    pub tokenizer_builder: &'a B,
    pub stop_words: &'a HashSet<String>,
 }

-impl<'a, B> ser::SerializeStruct for StructSerializer<'a, B>
+impl<'a, 'b, B> ser::SerializeStruct for StructSerializer<'a, 'b, B>
 where B: TokenizerBuilder
 {
    type Ok = ();
@ -263,7 +269,7 @@ where B: TokenizerBuilder
            let props = self.schema.props(attr);
            if props.is_stored() {
                let value = bincode::serialize(value).unwrap();
-                self.update.insert_attribute_value(attr, value);
+                self.update.insert_attribute_value(attr, &value)?;
            }
            if props.is_indexed() {
                let serializer = IndexerSerializer {
@ -275,6 +281,10 @@ where B: TokenizerBuilder
                };
                value.serialize(serializer)?;
            }
+            if props.is_ranked() {
+                let integer = value.serialize(ValueToNumberSerializer)?;
+                self.update.register_ranked_attribute(attr, integer)?;
+            }
        }

        Ok(())
--- a/src/database/serde/value_to_number.rs
+++ b/src/database/serde/value_to_number.rs
@ -0,0 +1,176 @@
+use std::str::FromStr;
+
+use serde::Serialize;
+use serde::{ser, ser::Error};
+
+use crate::database::serde::SerializerError;
+use crate::database::Number;
+
+pub struct ValueToNumberSerializer;
+
+impl ser::Serializer for ValueToNumberSerializer {
+    type Ok = Number;
+    type Error = SerializerError;
+    type SerializeSeq = ser::Impossible<Self::Ok, Self::Error>;
+    type SerializeTuple = ser::Impossible<Self::Ok, Self::Error>;
+    type SerializeTupleStruct = ser::Impossible<Self::Ok, Self::Error>;
+    type SerializeTupleVariant = ser::Impossible<Self::Ok, Self::Error>;
+    type SerializeMap = ser::Impossible<Self::Ok, Self::Error>;
+    type SerializeStruct = ser::Impossible<Self::Ok, Self::Error>;
+    type SerializeStructVariant = ser::Impossible<Self::Ok, Self::Error>;
+
+    forward_to_unserializable_type! {
+        bool => serialize_bool,
+        char => serialize_char,
+    }
+
+    fn serialize_i8(self, value: i8) -> Result<Self::Ok, Self::Error> {
+        Ok(Number::Signed(value as i64))
+    }
+
+    fn serialize_i16(self, value: i16) -> Result<Self::Ok, Self::Error> {
+        Ok(Number::Signed(value as i64))
+    }
+
+    fn serialize_i32(self, value: i32) -> Result<Self::Ok, Self::Error> {
+        Ok(Number::Signed(value as i64))
+    }
+
+    fn serialize_i64(self, value: i64) -> Result<Self::Ok, Self::Error> {
+        Ok(Number::Signed(value as i64))
+    }
+
+    fn serialize_u8(self, value: u8) -> Result<Self::Ok, Self::Error> {
+        Ok(Number::Unsigned(value as u64))
+    }
+
+    fn serialize_u16(self, value: u16) -> Result<Self::Ok, Self::Error> {
+        Ok(Number::Unsigned(value as u64))
+    }
+
+    fn serialize_u32(self, value: u32) -> Result<Self::Ok, Self::Error> {
+        Ok(Number::Unsigned(value as u64))
+    }
+
+    fn serialize_u64(self, value: u64) -> Result<Self::Ok, Self::Error> {
+        Ok(Number::Unsigned(value as u64))
+    }
+
+    fn serialize_f32(self, value: f32) -> Result<Self::Ok, Self::Error> {
+        Ok(Number::Float(value as f64))
+    }
+
+    fn serialize_f64(self, value: f64) -> Result<Self::Ok, Self::Error> {
+        Ok(Number::Float(value))
+    }
+
+    fn serialize_str(self, value: &str) -> Result<Self::Ok, Self::Error> {
+        Number::from_str(value).map_err(SerializerError::custom)
+    }
+
+    fn serialize_bytes(self, _v: &[u8]) -> Result<Self::Ok, Self::Error> {
+        Err(SerializerError::UnserializableType { name: "&[u8]" })
+    }
+
+    fn serialize_none(self) -> Result<Self::Ok, Self::Error> {
+        Err(SerializerError::UnserializableType { name: "Option" })
+    }
+
+    fn serialize_some<T: ?Sized>(self, _value: &T) -> Result<Self::Ok, Self::Error>
+    where T: Serialize,
+    {
+        Err(SerializerError::UnserializableType { name: "Option" })
+    }
+
+    fn serialize_unit(self) -> Result<Self::Ok, Self::Error> {
+        Err(SerializerError::UnserializableType { name: "()" })
+    }
+
+    fn serialize_unit_struct(self, _name: &'static str) -> Result<Self::Ok, Self::Error> {
+        Err(SerializerError::UnserializableType { name: "unit struct" })
+    }
+
+    fn serialize_unit_variant(
+        self,
+        _name: &'static str,
+        _variant_index: u32,
+        _variant: &'static str
+    ) -> Result<Self::Ok, Self::Error>
+    {
+        Err(SerializerError::UnserializableType { name: "unit variant" })
+    }
+
+    fn serialize_newtype_struct<T: ?Sized>(
+        self,
+        _name: &'static str,
+        value: &T
+    ) -> Result<Self::Ok, Self::Error>
+    where T: Serialize,
+    {
+        value.serialize(self)
+    }
+
+    fn serialize_newtype_variant<T: ?Sized>(
+        self,
+        _name: &'static str,
+        _variant_index: u32,
+        _variant: &'static str,
+        _value: &T
+    ) -> Result<Self::Ok, Self::Error>
+    where T: Serialize,
+    {
+        Err(SerializerError::UnserializableType { name: "newtype variant" })
+    }
+
+    fn serialize_seq(self, _len: Option<usize>) -> Result<Self::SerializeSeq, Self::Error> {
+        Err(SerializerError::UnserializableType { name: "sequence" })
+    }
+
+    fn serialize_tuple(self, _len: usize) -> Result<Self::SerializeTuple, Self::Error> {
+        Err(SerializerError::UnserializableType { name: "tuple" })
+    }
+
+    fn serialize_tuple_struct(
+        self,
+        _name: &'static str,
+        _len: usize
+    ) -> Result<Self::SerializeTupleStruct, Self::Error>
+    {
+        Err(SerializerError::UnserializableType { name: "tuple struct" })
+    }
+
+    fn serialize_tuple_variant(
+        self,
+        _name: &'static str,
+        _variant_index: u32,
+        _variant: &'static str,
+        _len: usize
+    ) -> Result<Self::SerializeTupleVariant, Self::Error>
+    {
+        Err(SerializerError::UnserializableType { name: "tuple variant" })
+    }
+
+    fn serialize_map(self, _len: Option<usize>) -> Result<Self::SerializeMap, Self::Error> {
+        Err(SerializerError::UnserializableType { name: "map" })
+    }
+
+    fn serialize_struct(
+        self,
+        _name: &'static str,
+        _len: usize
+    ) -> Result<Self::SerializeStruct, Self::Error>
+    {
+        Err(SerializerError::UnserializableType { name: "struct" })
+    }
+
+    fn serialize_struct_variant(
+        self,
+        _name: &'static str,
+        _variant_index: u32,
+        _variant: &'static str,
+        _len: usize
+    ) -> Result<Self::SerializeStructVariant, Self::Error>
+    {
+        Err(SerializerError::UnserializableType { name: "struct variant" })
+    }
+}
--- a/src/database/update/builder.rs
+++ b/src/database/update/builder.rs
@ -1,64 +0,0 @@
-use std::path::PathBuf;
-use std::error::Error;
-
-use hashbrown::HashSet;
-use serde::Serialize;
-
-use crate::database::serde::serializer::Serializer;
-use crate::database::serde::SerializerError;
-use crate::tokenizer::TokenizerBuilder;
-use crate::database::Schema;
-
-use crate::DocumentId;
-use super::{Update, RawUpdateBuilder};
-
-pub struct UpdateBuilder {
-    schema: Schema,
-    raw_builder: RawUpdateBuilder,
-}
-
-impl UpdateBuilder {
-    pub fn new(path: PathBuf, schema: Schema) -> UpdateBuilder {
-        UpdateBuilder {
-            schema: schema,
-            raw_builder: RawUpdateBuilder::new(path),
-        }
-    }
-
-    pub fn update_document<T, B>(
-        &mut self,
-        document: T,
-        tokenizer_builder: &B,
-        stop_words: &HashSet<String>,
-    ) -> Result<DocumentId, SerializerError>
-    where T: Serialize,
-          B: TokenizerBuilder,
-    {
-        let document_id = self.schema.document_id(&document)?;
-        let update = self.raw_builder.document_update(document_id);
-
-        let serializer = Serializer {
-            schema: &self.schema,
-            document_id: document_id,
-            tokenizer_builder: tokenizer_builder,
-            update: update,
-            stop_words: stop_words,
-        };
-
-        document.serialize(serializer)?;
-
-        Ok(document_id)
-    }
-
-    pub fn remove_document<T>(&mut self, document: T) -> Result<DocumentId, SerializerError>
-    where T: Serialize,
-    {
-        let document_id = self.schema.document_id(&document)?;
-        self.raw_builder.document_update(document_id).remove();
-        Ok(document_id)
-    }
-
-    pub fn build(self) -> Result<Update, Box<Error>> {
-        self.raw_builder.build()
-    }
-}
--- a/src/database/update/index_event.rs
+++ b/src/database/update/index_event.rs
@ -0,0 +1,55 @@
+use std::error::Error;
+
+use byteorder::{ReadBytesExt, WriteBytesExt};
+
+use crate::shared_data_cursor::{SharedDataCursor, FromSharedDataCursor};
+use crate::write_to_bytes::WriteToBytes;
+use crate::database::Index;
+use crate::data::DocIds;
+
+pub enum WriteIndexEvent<'a> {
+    RemovedDocuments(&'a DocIds),
+    UpdatedDocuments(&'a Index),
+}
+
+impl<'a> WriteToBytes for WriteIndexEvent<'a> {
+    fn write_to_bytes(&self, bytes: &mut Vec<u8>) {
+        match self {
+            WriteIndexEvent::RemovedDocuments(doc_ids) => {
+                let _ = bytes.write_u8(0);
+                doc_ids.write_to_bytes(bytes);
+            },
+            WriteIndexEvent::UpdatedDocuments(index) => {
+                let _ = bytes.write_u8(1);
+                index.write_to_bytes(bytes);
+            }
+        }
+    }
+}
+
+pub enum ReadIndexEvent {
+    RemovedDocuments(DocIds),
+    UpdatedDocuments(Index),
+}
+
+impl ReadIndexEvent {
+    pub fn updated_documents(self) -> Option<Index> {
+        use ReadIndexEvent::*;
+        match self {
+            RemovedDocuments(_) => None,
+            UpdatedDocuments(index) => Some(index),
+        }
+    }
+}
+
+impl FromSharedDataCursor for ReadIndexEvent {
+    type Error = Box<Error>;
+
+    fn from_shared_data_cursor(cursor: &mut SharedDataCursor) -> Result<Self, Self::Error> {
+        match cursor.read_u8()? {
+            0 => DocIds::from_shared_data_cursor(cursor).map(ReadIndexEvent::RemovedDocuments),
+            1 => Index::from_shared_data_cursor(cursor).map(ReadIndexEvent::UpdatedDocuments),
+            _ => unreachable!(),
+        }
+    }
+}
--- a/src/database/update/mod.rs
+++ b/src/database/update/mod.rs
@ -1,17 +1,239 @@
-use std::path::{Path, PathBuf};
+use std::collections::{HashSet, BTreeMap};
+use std::error::Error;

-mod builder;
-mod raw_builder;
+use rocksdb::rocksdb::{Writable, WriteBatch};
+use hashbrown::hash_map::HashMap;
+use sdset::{Set, SetBuf};
+use serde::Serialize;

-pub use self::builder::UpdateBuilder;
-pub use self::raw_builder::{RawUpdateBuilder, DocumentUpdate};
+use crate::database::document_key::{DocumentKey, DocumentKeyAttr};
+use crate::database::serde::serializer::Serializer;
+use crate::database::serde::SerializerError;
+use crate::database::schema::SchemaAttr;
+use crate::database::schema::Schema;
+use crate::database::index::IndexBuilder;
+use crate::database::{DATA_INDEX, DATA_RANKED_MAP};
+use crate::database::{RankedMap, Number};
+use crate::tokenizer::TokenizerBuilder;
+use crate::write_to_bytes::WriteToBytes;
+use crate::data::DocIds;
+use crate::{DocumentId, DocIndex};
+
+pub use self::index_event::{ReadIndexEvent, WriteIndexEvent};
+pub use self::ranked_map_event::{ReadRankedMapEvent, WriteRankedMapEvent};
+
+mod index_event;
+mod ranked_map_event;
+
+pub type Token = Vec<u8>; // TODO could be replaced by a SmallVec

 pub struct Update {
-    sst_file: PathBuf,
+    schema: Schema,
+    raw_builder: RawUpdateBuilder,
 }

 impl Update {
-    pub fn path(&self) -> &Path {
-        &self.sst_file
+    pub(crate) fn new(schema: Schema) -> Update {
+        Update { schema, raw_builder: RawUpdateBuilder::new() }
+    }
+
+    pub fn update_document<T, B>(
+        &mut self,
+        document: T,
+        tokenizer_builder: &B,
+        stop_words: &HashSet<String>,
+    ) -> Result<DocumentId, SerializerError>
+    where T: Serialize,
+          B: TokenizerBuilder,
+    {
+        let document_id = self.schema.document_id(&document)?;
+
+        let serializer = Serializer {
+            schema: &self.schema,
+            document_id: document_id,
+            tokenizer_builder: tokenizer_builder,
+            update: &mut self.raw_builder.document_update(document_id)?,
+            stop_words: stop_words,
+        };
+
+        document.serialize(serializer)?;
+
+        Ok(document_id)
+    }
+
+    pub fn remove_document<T>(&mut self, document: T) -> Result<DocumentId, SerializerError>
+    where T: Serialize,
+    {
+        let document_id = self.schema.document_id(&document)?;
+        self.raw_builder.document_update(document_id)?.remove()?;
+        Ok(document_id)
+    }
+
+    pub(crate) fn build(self) -> Result<WriteBatch, Box<Error>> {
+        self.raw_builder.build()
+    }
+}
+
+#[derive(Copy, Clone, PartialEq, Eq)]
+enum UpdateType {
+    Updated,
+    Deleted,
+}
+
+use UpdateType::{Updated, Deleted};
+
+pub struct RawUpdateBuilder {
+    documents_update: HashMap<DocumentId, UpdateType>,
+    documents_ranked_fields: RankedMap,
+    indexed_words: BTreeMap<Token, Vec<DocIndex>>,
+    batch: WriteBatch,
+}
+
+impl RawUpdateBuilder {
+    pub fn new() -> RawUpdateBuilder {
+        RawUpdateBuilder {
+            documents_update: HashMap::new(),
+            documents_ranked_fields: HashMap::new(),
+            indexed_words: BTreeMap::new(),
+            batch: WriteBatch::new(),
+        }
+    }
+
+    pub fn document_update(&mut self, document_id: DocumentId) -> Result<DocumentUpdate, SerializerError> {
+        use serde::ser::Error;
+
+        match self.documents_update.get(&document_id) {
+            Some(Deleted) | None => Ok(DocumentUpdate { document_id, inner: self }),
+            Some(Updated) => Err(SerializerError::custom(
+                "This document has already been removed and cannot be updated in the same update"
+            )),
+        }
+    }
+
+    pub fn build(self) -> Result<WriteBatch, Box<Error>> {
+        // create the list of all the removed documents
+        let removed_documents = {
+            let mut document_ids = Vec::new();
+            for (id, update_type) in self.documents_update {
+                if update_type == Deleted {
+                    document_ids.push(id);
+                }
+            }
+
+            document_ids.sort_unstable();
+            let setbuf = SetBuf::new_unchecked(document_ids);
+            DocIds::new(&setbuf)
+        };
+
+        // create the Index of all the document updates
+        let index = {
+            let mut builder = IndexBuilder::new();
+            for (key, mut indexes) in self.indexed_words {
+                indexes.sort_unstable();
+                let indexes = Set::new_unchecked(&indexes);
+                builder.insert(key, indexes).unwrap();
+            }
+            builder.build()
+        };
+
+        // WARN: removed documents must absolutely
+        //       be merged *before* document updates
+
+        // === index ===
+
+        if !removed_documents.is_empty() {
+            // remove the documents using the appropriate IndexEvent
+            let event_bytes = WriteIndexEvent::RemovedDocuments(&removed_documents).into_bytes();
+            self.batch.merge(DATA_INDEX, &event_bytes)?;
+        }
+
+        // update the documents using the appropriate IndexEvent
+        let event_bytes = WriteIndexEvent::UpdatedDocuments(&index).into_bytes();
+        self.batch.merge(DATA_INDEX, &event_bytes)?;
+
+        // === ranked map ===
+
+        if !removed_documents.is_empty() {
+            // update the ranked map using the appropriate RankedMapEvent
+            let event_bytes = WriteRankedMapEvent::RemovedDocuments(&removed_documents).into_bytes();
+            self.batch.merge(DATA_RANKED_MAP, &event_bytes)?;
+        }
+
+        // update the documents using the appropriate IndexEvent
+        let event_bytes = WriteRankedMapEvent::UpdatedDocuments(&self.documents_ranked_fields).into_bytes();
+        self.batch.merge(DATA_RANKED_MAP, &event_bytes)?;
+
+        Ok(self.batch)
+    }
+}
+
+pub struct DocumentUpdate<'a> {
+    document_id: DocumentId,
+    inner: &'a mut RawUpdateBuilder,
+}
+
+impl<'a> DocumentUpdate<'a> {
+    pub fn remove(&mut self) -> Result<(), SerializerError> {
+        use serde::ser::Error;
+
+        if let Updated = self.inner.documents_update.entry(self.document_id).or_insert(Deleted) {
+            return Err(SerializerError::custom(
+                "This document has already been updated and cannot be removed in the same update"
+            ));
+        }
+
+        let start = DocumentKey::new(self.document_id).with_attribute_min();
+        let end = DocumentKey::new(self.document_id).with_attribute_max(); // FIXME max + 1
+        self.inner.batch.delete_range(start.as_ref(), end.as_ref())?;
+
+        Ok(())
+    }
+
+    pub fn insert_attribute_value(&mut self, attr: SchemaAttr, value: &[u8]) -> Result<(), SerializerError> {
+        use serde::ser::Error;
+
+        if let Deleted = self.inner.documents_update.entry(self.document_id).or_insert(Updated) {
+            return Err(SerializerError::custom(
+                "This document has already been deleted and cannot be updated in the same update"
+            ));
+        }
+
+        let key = DocumentKeyAttr::new(self.document_id, attr);
+        self.inner.batch.put(key.as_ref(), &value)?;
+
+        Ok(())
+    }
+
+    pub fn insert_doc_index(&mut self, token: Token, doc_index: DocIndex) -> Result<(), SerializerError> {
+        use serde::ser::Error;
+
+        if let Deleted = self.inner.documents_update.entry(self.document_id).or_insert(Updated) {
+            return Err(SerializerError::custom(
+                "This document has already been deleted and cannot be updated in the same update"
+            ));
+        }
+
+        self.inner.indexed_words.entry(token).or_insert_with(Vec::new).push(doc_index);
+
+        Ok(())
+    }
+
+    pub fn register_ranked_attribute(
+        &mut self,
+        attr: SchemaAttr,
+        number: Number,
+    ) -> Result<(), SerializerError>
+    {
+        use serde::ser::Error;
+
+        if let Deleted = self.inner.documents_update.entry(self.document_id).or_insert(Updated) {
+            return Err(SerializerError::custom(
+                "This document has already been deleted, ranked attributes cannot be added in the same update"
+            ));
+        }
+
+        self.inner.documents_ranked_fields.insert((self.document_id, attr), number);
+
+        Ok(())
    }
 }
--- a/src/database/update/ranked_map_event.rs
+++ b/src/database/update/ranked_map_event.rs
@ -0,0 +1,58 @@
+use std::error::Error;
+
+use byteorder::{ReadBytesExt, WriteBytesExt};
+
+use crate::shared_data_cursor::{SharedDataCursor, FromSharedDataCursor};
+use crate::write_to_bytes::WriteToBytes;
+use crate::database::RankedMap;
+use crate::data::DocIds;
+
+pub enum WriteRankedMapEvent<'a> {
+    RemovedDocuments(&'a DocIds),
+    UpdatedDocuments(&'a RankedMap),
+}
+
+impl<'a> WriteToBytes for WriteRankedMapEvent<'a> {
+    fn write_to_bytes(&self, bytes: &mut Vec<u8>) {
+        match self {
+            WriteRankedMapEvent::RemovedDocuments(doc_ids) => {
+                let _ = bytes.write_u8(0);
+                doc_ids.write_to_bytes(bytes);
+            },
+            WriteRankedMapEvent::UpdatedDocuments(ranked_map) => {
+                let _ = bytes.write_u8(1);
+                bincode::serialize_into(bytes, ranked_map).unwrap()
+            }
+        }
+    }
+}
+
+pub enum ReadRankedMapEvent {
+    RemovedDocuments(DocIds),
+    UpdatedDocuments(RankedMap),
+}
+
+impl ReadRankedMapEvent {
+    pub fn updated_documents(self) -> Option<RankedMap> {
+        use ReadRankedMapEvent::*;
+        match self {
+            RemovedDocuments(_) => None,
+            UpdatedDocuments(ranked_map) => Some(ranked_map),
+        }
+    }
+}
+
+impl FromSharedDataCursor for ReadRankedMapEvent {
+    type Error = Box<Error>;
+
+    fn from_shared_data_cursor(cursor: &mut SharedDataCursor) -> Result<Self, Self::Error> {
+        match cursor.read_u8()? {
+            0 => DocIds::from_shared_data_cursor(cursor).map(ReadRankedMapEvent::RemovedDocuments),
+            1 => {
+                let ranked_map = bincode::deserialize_from(cursor)?;
+                Ok(ReadRankedMapEvent::UpdatedDocuments(ranked_map))
+            },
+            _ => unreachable!(),
+        }
+    }
+}
--- a/src/database/update/raw_builder.rs
+++ b/src/database/update/raw_builder.rs
@ -1,168 +0,0 @@
-use std::collections::btree_map::{BTreeMap, Entry};
-use std::path::PathBuf;
-use std::error::Error;
-
-use rocksdb::rocksdb_options;
-use hashbrown::HashMap;
-use fst::map::Map;
-use sdset::Set;
-
-use crate::database::index::{Index, Positive, PositiveBuilder, Negative};
-use crate::database::{DATA_INDEX, DocumentKeyAttr};
-use crate::database::schema::SchemaAttr;
-use crate::data::{DocIds, DocIndexes};
-use crate::{DocumentId, DocIndex};
-use super::Update;
-
-type Token = Vec<u8>; // TODO could be replaced by a SmallVec
-type Value = Vec<u8>;
-
-pub struct RawUpdateBuilder {
-    sst_file: PathBuf,
-    document_updates: BTreeMap<DocumentId, DocumentUpdate>,
-}
-
-pub struct DocumentUpdate {
-    cleared: bool,
-    words_indexes: HashMap<Token, Vec<DocIndex>>,
-    attributes: BTreeMap<SchemaAttr, Value>,
-}
-
-impl DocumentUpdate {
-    pub fn new() -> DocumentUpdate {
-        DocumentUpdate {
-            cleared: false,
-            words_indexes: HashMap::new(),
-            attributes: BTreeMap::new(),
-        }
-    }
-
-    pub fn remove(&mut self) {
-        self.cleared = true;
-        self.clear();
-    }
-
-    pub fn clear(&mut self) {
-        self.words_indexes.clear();
-        self.attributes.clear();
-    }
-
-    pub fn insert_attribute_value(&mut self, attr: SchemaAttr, value: Vec<u8>) {
-        self.attributes.insert(attr, value);
-    }
-
-    pub fn insert_doc_index(&mut self, token: Vec<u8>, doc_index: DocIndex) {
-        self.words_indexes.entry(token).or_insert_with(Vec::new).push(doc_index)
-    }
-}
-
-impl RawUpdateBuilder {
-    pub fn new(path: PathBuf) -> RawUpdateBuilder {
-        RawUpdateBuilder {
-            sst_file: path,
-            document_updates: BTreeMap::new(),
-        }
-    }
-
-    pub fn document_update(&mut self, document_id: DocumentId) -> &mut DocumentUpdate {
-        match self.document_updates.entry(document_id) {
-            Entry::Occupied(mut occupied) => {
-                occupied.get_mut().clear();
-                occupied.into_mut()
-            },
-            Entry::Vacant(vacant) => vacant.insert(DocumentUpdate::new()),
-        }
-    }
-
-    pub fn build(mut self) -> Result<Update, Box<Error>> {
-        let mut removed_document_ids = Vec::new();
-        let mut words_indexes = BTreeMap::new();
-
-        for (&id, update) in self.document_updates.iter_mut() {
-            if update.cleared { removed_document_ids.push(id) }
-
-            for (token, indexes) in &update.words_indexes {
-                words_indexes.entry(token).or_insert_with(Vec::new).extend_from_slice(indexes)
-            }
-        }
-
-        let negative = {
-            let removed_document_ids = Set::new_unchecked(&removed_document_ids);
-            let doc_ids = DocIds::new(removed_document_ids);
-            Negative::new(doc_ids)
-        };
-
-        let positive = {
-            let mut positive_builder = PositiveBuilder::memory();
-
-            for (key, mut indexes) in words_indexes {
-                indexes.sort_unstable();
-                let indexes = Set::new_unchecked(&indexes);
-                positive_builder.insert(key, indexes)?;
-            }
-
-            let (map, indexes) = positive_builder.into_inner()?;
-            let map = Map::from_bytes(map)?;
-            let indexes = DocIndexes::from_bytes(indexes)?;
-            Positive::new(map, indexes)
-        };
-
-        let index = Index { negative, positive };
-
-        let env_options = rocksdb_options::EnvOptions::new();
-        let column_family_options = rocksdb_options::ColumnFamilyOptions::new();
-        let mut file_writer = rocksdb::SstFileWriter::new(env_options, column_family_options);
-        file_writer.open(&self.sst_file.to_string_lossy())?;
-
-        // write the data-index
-        let mut bytes = Vec::new();
-        index.write_to_bytes(&mut bytes);
-        file_writer.merge(DATA_INDEX, &bytes)?;
-
-        // write all the documents attributes updates
-        for (id, update) in self.document_updates {
-
-            let mut last_attr: Option<SchemaAttr> = None;
-            for (attr, value) in update.attributes {
-
-                if update.cleared {
-                    // if there is no last attribute, remove from the first attribute
-                    let start_attr = match last_attr {
-                        Some(attr) => attr.next(),
-                        None       => Some(SchemaAttr::min())
-                    };
-                    let start = start_attr.map(|a| DocumentKeyAttr::new(id, a));
-                    let end = attr.prev().map(|a| DocumentKeyAttr::new(id, a));
-
-                    // delete_range between (last_attr + 1) and (attr - 1)
-                    if let (Some(start), Some(end)) = (start, end) {
-                        file_writer.delete_range(start.as_ref(), end.as_ref())?;
-                    }
-                }
-
-                let key = DocumentKeyAttr::new(id, attr);
-                file_writer.put(key.as_ref(), &value)?;
-                last_attr = Some(attr);
-            }
-
-            if update.cleared {
-                // if there is no last attribute, remove from the first attribute
-                let start_attr = match last_attr {
-                    Some(attr) => attr.next(),
-                    None       => Some(SchemaAttr::min())
-                };
-                let start = start_attr.map(|a| DocumentKeyAttr::new(id, a));
-                let end = DocumentKeyAttr::with_attribute_max(id);
-
-                // delete_range between (last_attr + 1) and attr_max
-                if let Some(start) = start {
-                    file_writer.delete_range(start.as_ref(), end.as_ref())?;
-                }
-            }
-        }
-
-        file_writer.finish()?;
-
-        Ok(Update { sst_file: self.sst_file })
-    }
-}
--- a/src/database/view.rs
+++ b/src/database/view.rs
@ -7,12 +7,14 @@ use rocksdb::rocksdb_options::{ReadOptions, EnvOptions, ColumnFamilyOptions};
 use rocksdb::rocksdb::{DB, DBVector, Snapshot, SeekKey, SstFileWriter};
 use serde::de::DeserializeOwned;

+use crate::database::{retrieve_data_schema, retrieve_data_index, retrieve_data_ranked_map, retrieve_config};
+use crate::database::serde::deserializer::Deserializer;
 use crate::database::{DocumentKey, DocumentKeyAttr};
-use crate::database::{retrieve_data_schema, retrieve_data_index};
-use crate::database::deserializer::Deserializer;
+use crate::rank::{QueryBuilder, FilterFunc};
 use crate::database::schema::Schema;
 use crate::database::index::Index;
-use crate::rank::{QueryBuilder, FilterFunc};
+use crate::database::RankedMap;
+use crate::database::Config;
 use crate::DocumentId;

 pub struct DatabaseView<D>
@ -20,7 +22,9 @@ where D: Deref<Target=DB>
 {
    snapshot: Snapshot<D>,
    index: Index,
+    ranked_map: RankedMap,
    schema: Schema,
+    config: Config,
 }

 impl<D> DatabaseView<D>
@ -29,7 +33,9 @@ where D: Deref<Target=DB>
    pub fn new(snapshot: Snapshot<D>) -> Result<DatabaseView<D>, Box<Error>> {
        let schema = retrieve_data_schema(&snapshot)?;
        let index = retrieve_data_index(&snapshot)?;
-        Ok(DatabaseView { snapshot, index, schema })
+        let ranked_map = retrieve_data_ranked_map(&snapshot)?;
+        let config = retrieve_config(&snapshot)?;
+        Ok(DatabaseView { snapshot, index, ranked_map, schema, config })
    }

    pub fn schema(&self) -> &Schema {
@ -40,6 +46,10 @@ where D: Deref<Target=DB>
        &self.index
    }

+    pub fn ranked_map(&self) -> &RankedMap {
+        &self.ranked_map
+    }
+
    pub fn into_snapshot(self) -> Snapshot<D> {
        self.snapshot
    }
@ -48,6 +58,10 @@ where D: Deref<Target=DB>
        &self.snapshot
    }

+    pub fn config(&self) -> &Config {
+        &self.config
+    }
+
    pub fn get(&self, key: &[u8]) -> Result<Option<DBVector>, Box<Error>> {
        Ok(self.snapshot.get(key)?)
    }
@ -71,12 +85,25 @@ where D: Deref<Target=DB>
        Ok(())
    }

-    pub fn query_builder(&self) -> Result<QueryBuilder<D, FilterFunc<D>>, Box<Error>> {
-        QueryBuilder::new(self)
+    pub fn query_builder(&self) -> QueryBuilder<FilterFunc> {
+        QueryBuilder::new(self.index())
+    }
+
+    pub fn raw_field_by_document_id(
+        &self,
+        name: &str,
+        id: DocumentId
+    ) -> Result<Option<Vec<u8>>, Box<Error>>
+    {
+        let attr = self.schema.attribute(name).ok_or("field not found")?;
+        let key = DocumentKeyAttr::new(id, attr);
+        let vector = self.snapshot.get(key.as_ref())?;
+
+        Ok(vector.map(|v| v.to_vec()))
    }

    pub fn document_by_id<T>(&self, id: DocumentId) -> Result<T, Box<Error>>
-    where T: DeserializeOwned
+    where T: DeserializeOwned,
    {
        let mut deserializer = Deserializer::new(&self.snapshot, &self.schema, id);
        Ok(T::deserialize(&mut deserializer)?)
--- a/src/lib.rs
+++ b/src/lib.rs
@ -5,21 +5,34 @@ pub mod database;
 pub mod data;
 pub mod rank;
 pub mod tokenizer;
-mod attribute;
-mod word_area;
 mod common_words;
+mod shared_data_cursor;
+mod write_to_bytes;
+
+use serde_derive::{Serialize, Deserialize};

 pub use rocksdb;

 pub use self::tokenizer::Tokenizer;
 pub use self::common_words::CommonWords;
-pub use self::attribute::{Attribute, AttributeError};
-pub use self::word_area::{WordArea, WordAreaError};
+
+pub fn is_cjk(c: char) -> bool {
+    (c >= '\u{2e80}' && c <= '\u{2eff}') ||
+    (c >= '\u{2f00}' && c <= '\u{2fdf}') ||
+    (c >= '\u{3040}' && c <= '\u{309f}') ||
+    (c >= '\u{30a0}' && c <= '\u{30ff}') ||
+    (c >= '\u{3100}' && c <= '\u{312f}') ||
+    (c >= '\u{3200}' && c <= '\u{32ff}') ||
+    (c >= '\u{3400}' && c <= '\u{4dbf}') ||
+    (c >= '\u{4e00}' && c <= '\u{9fff}') ||
+    (c >= '\u{f900}' && c <= '\u{faff}')
+}

 /// Represent an internally generated document unique identifier.
 ///
 /// It is used to inform the database the document you want to deserialize.
 /// Helpful for custom ranking.
+#[derive(Serialize, Deserialize)]
 #[derive(Debug, Copy, Clone, Eq, PartialEq, PartialOrd, Ord, Hash)]
 pub struct DocumentId(u64);

@ -36,14 +49,16 @@ pub struct DocIndex {

    /// The attribute in the document where the word was found
    /// along with the index in it.
-    pub attribute: Attribute,
+    pub attribute: u16,
+    pub word_index: u32,

    /// The position in bytes where the word was found
    /// along with the length of it.
    ///
    /// It informs on the original word area in the text indexed
    /// without needing to run the tokenizer again.
-    pub word_area: WordArea,
+    pub char_index: u32,
+    pub char_length: u16,
 }

 /// This structure represent a matching word with informations
@ -68,7 +83,8 @@ pub struct Match {

    /// The attribute in the document where the word was found
    /// along with the index in it.
-    pub attribute: Attribute,
+    pub attribute: u16,
+    pub word_index: u32,

    /// Whether the word that match is an exact match or a prefix.
    pub is_exact: bool,
@ -78,7 +94,8 @@ pub struct Match {
    ///
    /// It informs on the original word area in the text indexed
    /// without needing to run the tokenizer again.
-    pub word_area: WordArea,
+    pub char_index: u32,
+    pub char_length: u16,
 }

 impl Match {
@ -86,9 +103,11 @@ impl Match {
        Match {
            query_index: 0,
            distance: 0,
-            attribute: Attribute::new_faillible(0, 0),
+            attribute: 0,
+            word_index: 0,
            is_exact: false,
-            word_area: WordArea::new_faillible(0, 0),
+            char_index: 0,
+            char_length: 0,
        }
    }

@ -96,9 +115,11 @@ impl Match {
        Match {
            query_index: u32::max_value(),
            distance: u8::max_value(),
-            attribute: Attribute::max_value(),
+            attribute: u16::max_value(),
+            word_index: u32::max_value(),
            is_exact: true,
-            word_area: WordArea::max_value(),
+            char_index: u32::max_value(),
+            char_length: u16::max_value(),
        }
    }
 }
@ -110,6 +131,6 @@ mod tests {

    #[test]
    fn docindex_mem_size() {
-        assert_eq!(mem::size_of::<DocIndex>(), 16);
+        assert_eq!(mem::size_of::<DocIndex>(), 24);
    }
 }
--- a/src/rank/criterion/document_id.rs
+++ b/src/rank/criterion/document_id.rs
@ -1,19 +1,13 @@
 use std::cmp::Ordering;
-use std::ops::Deref;
-
-use rocksdb::DB;

 use crate::rank::criterion::Criterion;
-use crate::database::DatabaseView;
-use crate::rank::Document;
+use crate::rank::RawDocument;

 #[derive(Debug, Clone, Copy)]
 pub struct DocumentId;

-impl<D> Criterion<D> for DocumentId
-where D: Deref<Target=DB>
-{
-    fn evaluate(&self, lhs: &Document, rhs: &Document, _: &DatabaseView<D>) -> Ordering {
+impl Criterion for DocumentId {
+    fn evaluate(&self, lhs: &RawDocument, rhs: &RawDocument) -> Ordering {
        lhs.id.cmp(&rhs.id)
    }
 }
--- a/src/rank/criterion/exact.rs
+++ b/src/rank/criterion/exact.rs
@ -1,33 +1,40 @@
 use std::cmp::Ordering;
-use std::ops::Deref;

-use rocksdb::DB;
-use group_by::GroupBy;
+use slice_group_by::GroupBy;

-use crate::rank::{match_query_index, Document};
 use crate::rank::criterion::Criterion;
-use crate::database::DatabaseView;
-use crate::Match;
+use crate::rank::RawDocument;

 #[inline]
-fn contains_exact(matches: &&[Match]) -> bool {
-    matches.iter().any(|m| m.is_exact)
-}
+fn number_exact_matches(query_index: &[u32], is_exact: &[bool]) -> usize {
+    let mut count = 0;
+    let mut index = 0;

-#[inline]
-fn number_exact_matches(matches: &[Match]) -> usize {
-    GroupBy::new(matches, match_query_index).filter(contains_exact).count()
+    for group in query_index.linear_group() {
+        let len = group.len();
+        count += is_exact[index..index + len].contains(&true) as usize;
+        index += len;
+    }
+
+    count
 }

 #[derive(Debug, Clone, Copy)]
 pub struct Exact;

-impl<D> Criterion<D> for Exact
-where D: Deref<Target=DB>
-{
-    fn evaluate(&self, lhs: &Document, rhs: &Document, _: &DatabaseView<D>) -> Ordering {
-        let lhs = number_exact_matches(&lhs.matches);
-        let rhs = number_exact_matches(&rhs.matches);
+impl Criterion for Exact {
+    fn evaluate(&self, lhs: &RawDocument, rhs: &RawDocument) -> Ordering {
+        let lhs = {
+            let query_index = lhs.query_index();
+            let is_exact = lhs.is_exact();
+            number_exact_matches(query_index, is_exact)
+        };
+
+        let rhs = {
+            let query_index = rhs.query_index();
+            let is_exact = rhs.is_exact();
+            number_exact_matches(query_index, is_exact)
+        };

        lhs.cmp(&rhs).reverse()
    }
--- a/src/rank/criterion/mod.rs
+++ b/src/rank/criterion/mod.rs
@ -4,16 +4,11 @@ mod words_proximity;
 mod sum_of_words_attribute;
 mod sum_of_words_position;
 mod exact;
-mod sort_by;
+mod sort_by_attr;
 mod document_id;

 use std::cmp::Ordering;
-use std::ops::Deref;
-
-use rocksdb::DB;
-
-use crate::database::DatabaseView;
-use crate::rank::Document;
+use crate::rank::RawDocument;

 pub use self::{
    sum_of_typos::SumOfTypos,
@ -22,60 +17,51 @@ pub use self::{
    sum_of_words_attribute::SumOfWordsAttribute,
    sum_of_words_position::SumOfWordsPosition,
    exact::Exact,
-    sort_by::SortBy,
+    sort_by_attr::SortByAttr,
    document_id::DocumentId,
 };

-pub trait Criterion<D>
-where D: Deref<Target=DB>
-{
-    fn evaluate(&self, lhs: &Document, rhs: &Document, view: &DatabaseView<D>) -> Ordering;
+pub trait Criterion: Send + Sync {
+    fn evaluate(&self, lhs: &RawDocument, rhs: &RawDocument) -> Ordering;

    #[inline]
-    fn eq(&self, lhs: &Document, rhs: &Document, view: &DatabaseView<D>) -> bool {
-        self.evaluate(lhs, rhs, view) == Ordering::Equal
+    fn eq(&self, lhs: &RawDocument, rhs: &RawDocument) -> bool {
+        self.evaluate(lhs, rhs) == Ordering::Equal
    }
 }

-impl<'a, D, T: Criterion<D> + ?Sized> Criterion<D> for &'a T
-where D: Deref<Target=DB>
-{
-    fn evaluate(&self, lhs: &Document, rhs: &Document, view: &DatabaseView<D>) -> Ordering {
-        (**self).evaluate(lhs, rhs, view)
+impl<'a, T: Criterion + ?Sized + Send + Sync> Criterion for &'a T {
+    fn evaluate(&self, lhs: &RawDocument, rhs: &RawDocument) -> Ordering {
+        (**self).evaluate(lhs, rhs)
    }

-    fn eq(&self, lhs: &Document, rhs: &Document, view: &DatabaseView<D>) -> bool {
-        (**self).eq(lhs, rhs, view)
+    fn eq(&self, lhs: &RawDocument, rhs: &RawDocument) -> bool {
+        (**self).eq(lhs, rhs)
    }
 }

-impl<D, T: Criterion<D> + ?Sized> Criterion<D> for Box<T>
-where D: Deref<Target=DB>
-{
-    fn evaluate(&self, lhs: &Document, rhs: &Document, view: &DatabaseView<D>) -> Ordering {
-        (**self).evaluate(lhs, rhs, view)
+impl<T: Criterion + ?Sized> Criterion for Box<T> {
+    fn evaluate(&self, lhs: &RawDocument, rhs: &RawDocument) -> Ordering {
+        (**self).evaluate(lhs, rhs)
    }

-    fn eq(&self, lhs: &Document, rhs: &Document, view: &DatabaseView<D>) -> bool {
-        (**self).eq(lhs, rhs, view)
+    fn eq(&self, lhs: &RawDocument, rhs: &RawDocument) -> bool {
+        (**self).eq(lhs, rhs)
    }
 }

 #[derive(Default)]
-pub struct CriteriaBuilder<D>
-where D: Deref<Target=DB>
-{
-    inner: Vec<Box<dyn Criterion<D>>>
+pub struct CriteriaBuilder<'a> {
+    inner: Vec<Box<dyn Criterion + 'a>>
 }

-impl<D> CriteriaBuilder<D>
-where D: Deref<Target=DB>
+impl<'a> CriteriaBuilder<'a>
 {
-    pub fn new() -> CriteriaBuilder<D> {
+    pub fn new() -> CriteriaBuilder<'a> {
        CriteriaBuilder { inner: Vec::new() }
    }

-    pub fn with_capacity(capacity: usize) -> CriteriaBuilder<D> {
+    pub fn with_capacity(capacity: usize) -> CriteriaBuilder<'a> {
        CriteriaBuilder { inner: Vec::with_capacity(capacity) }
    }

@ -83,33 +69,29 @@ where D: Deref<Target=DB>
        self.inner.reserve(additional)
    }

-    pub fn add<C>(mut self, criterion: C) -> CriteriaBuilder<D>
-    where C: 'static + Criterion<D>,
+    pub fn add<C: 'a>(mut self, criterion: C) -> CriteriaBuilder<'a>
+    where C: Criterion,
    {
        self.push(criterion);
        self
    }

-    pub fn push<C>(&mut self, criterion: C)
-    where C: 'static + Criterion<D>,
+    pub fn push<C: 'a>(&mut self, criterion: C)
+    where C: Criterion,
    {
        self.inner.push(Box::new(criterion));
    }

-    pub fn build(self) -> Criteria<D> {
+    pub fn build(self) -> Criteria<'a> {
        Criteria { inner: self.inner }
    }
 }

-pub struct Criteria<D>
-where D: Deref<Target=DB>
-{
-    inner: Vec<Box<dyn Criterion<D>>>,
+pub struct Criteria<'a> {
+    inner: Vec<Box<dyn Criterion + 'a>>,
 }

-impl<D> Default for Criteria<D>
-where D: Deref<Target=DB>
-{
+impl<'a> Default for Criteria<'a> {
    fn default() -> Self {
        CriteriaBuilder::with_capacity(7)
            .add(SumOfTypos)
@ -123,10 +105,8 @@ where D: Deref<Target=DB>
    }
 }

-impl<D> AsRef<[Box<dyn Criterion<D>>]> for Criteria<D>
-where D: Deref<Target=DB>
-{
-    fn as_ref(&self) -> &[Box<dyn Criterion<D>>] {
+impl<'a> AsRef<[Box<Criterion + 'a>]> for Criteria<'a> {
+    fn as_ref(&self) -> &[Box<dyn Criterion + 'a>] {
        &self.inner
    }
 }
--- a/src/rank/criterion/number_of_words.rs
+++ b/src/rank/criterion/number_of_words.rs
@ -1,28 +1,28 @@
 use std::cmp::Ordering;
-use std::ops::Deref;

-use rocksdb::DB;
-use group_by::GroupBy;
+use slice_group_by::GroupBy;

-use crate::rank::{match_query_index, Document};
 use crate::rank::criterion::Criterion;
-use crate::database::DatabaseView;
-use crate::Match;
+use crate::rank::RawDocument;

 #[inline]
-fn number_of_query_words(matches: &[Match]) -> usize {
-    GroupBy::new(matches, match_query_index).count()
+fn number_of_query_words(query_index: &[u32]) -> usize {
+    query_index.linear_group().count()
 }

 #[derive(Debug, Clone, Copy)]
 pub struct NumberOfWords;

-impl<D> Criterion<D> for NumberOfWords
-where D: Deref<Target=DB>
-{
-    fn evaluate(&self, lhs: &Document, rhs: &Document, _: &DatabaseView<D>) -> Ordering {
-        let lhs = number_of_query_words(&lhs.matches);
-        let rhs = number_of_query_words(&rhs.matches);
+impl Criterion for NumberOfWords {
+    fn evaluate(&self, lhs: &RawDocument, rhs: &RawDocument) -> Ordering {
+        let lhs = {
+            let query_index = lhs.query_index();
+            number_of_query_words(query_index)
+        };
+        let rhs = {
+            let query_index = rhs.query_index();
+            number_of_query_words(query_index)
+        };

        lhs.cmp(&rhs).reverse()
    }
--- a/src/rank/criterion/sort_by.rs
+++ b/src/rank/criterion/sort_by.rs
@ -1,82 +0,0 @@
-use std::cmp::Ordering;
-use std::ops::Deref;
-use std::marker;
-
-use rocksdb::DB;
-use serde::de::DeserializeOwned;
-
-use crate::rank::criterion::Criterion;
-use crate::database::DatabaseView;
-use crate::rank::Document;
-
-/// An helper struct that permit to sort documents by
-/// some of their stored attributes.
-///
-/// # Note
-///
-/// If a document cannot be deserialized it will be considered [`None`][].
-///
-/// Deserialized documents are compared like `Some(doc0).cmp(&Some(doc1))`,
-/// so you must check the [`Ord`] of `Option` implementation.
-///
-/// [`None`]: https://doc.rust-lang.org/std/option/enum.Option.html#variant.None
-/// [`Ord`]: https://doc.rust-lang.org/std/option/enum.Option.html#impl-Ord
-///
-/// # Example
-///
-/// ```no-test
-/// use serde_derive::Deserialize;
-/// use meilidb::rank::criterion::*;
-///
-/// #[derive(Deserialize, PartialOrd, Ord, PartialEq, Eq)]
-/// struct TimeOnly {
-///     time: String,
-/// }
-///
-/// let builder = CriteriaBuilder::with_capacity(8)
-///        .add(SumOfTypos)
-///        .add(NumberOfWords)
-///        .add(WordsProximity)
-///        .add(SumOfWordsAttribute)
-///        .add(SumOfWordsPosition)
-///        .add(Exact)
-///        .add(SortBy::<TimeOnly>::new())
-///        .add(DocumentId);
-///
-/// let criterion = builder.build();
-///
-/// ```
-pub struct SortBy<T> {
-    _phantom: marker::PhantomData<T>,
-}
-
-impl<T> SortBy<T> {
-    pub fn new() -> Self {
-        SortBy::default()
-    }
-}
-
-impl<T> Default for SortBy<T> {
-    fn default() -> SortBy<T> {
-        SortBy { _phantom: marker::PhantomData }
-    }
-}
-
-impl<T, D> Criterion<D> for SortBy<T>
-where D: Deref<Target=DB>,
-      T: DeserializeOwned + Ord,
-{
-    fn evaluate(&self, lhs: &Document, rhs: &Document, view: &DatabaseView<D>) -> Ordering {
-        let lhs = match view.document_by_id::<T>(lhs.id) {
-            Ok(doc) => Some(doc),
-            Err(e) => { eprintln!("{}", e); None },
-        };
-
-        let rhs = match view.document_by_id::<T>(rhs.id) {
-            Ok(doc) => Some(doc),
-            Err(e) => { eprintln!("{}", e); None },
-        };
-
-        lhs.cmp(&rhs)
-    }
-}
--- a/src/rank/criterion/sort_by_attr.rs
+++ b/src/rank/criterion/sort_by_attr.rs
@ -0,0 +1,122 @@
+use std::cmp::Ordering;
+use std::error::Error;
+use std::fmt;
+
+use crate::database::schema::{Schema, SchemaAttr};
+use crate::rank::criterion::Criterion;
+use crate::database::RankedMap;
+use crate::rank::RawDocument;
+
+/// An helper struct that permit to sort documents by
+/// some of their stored attributes.
+///
+/// # Note
+///
+/// If a document cannot be deserialized it will be considered [`None`][].
+///
+/// Deserialized documents are compared like `Some(doc0).cmp(&Some(doc1))`,
+/// so you must check the [`Ord`] of `Option` implementation.
+///
+/// [`None`]: https://doc.rust-lang.org/std/option/enum.Option.html#variant.None
+/// [`Ord`]: https://doc.rust-lang.org/std/option/enum.Option.html#impl-Ord
+///
+/// # Example
+///
+/// ```ignore
+/// use serde_derive::Deserialize;
+/// use meilidb::rank::criterion::*;
+///
+/// let custom_ranking = SortByAttr::lower_is_better(&ranked_map, &schema, "published_at")?;
+///
+/// let builder = CriteriaBuilder::with_capacity(8)
+///        .add(SumOfTypos)
+///        .add(NumberOfWords)
+///        .add(WordsProximity)
+///        .add(SumOfWordsAttribute)
+///        .add(SumOfWordsPosition)
+///        .add(Exact)
+///        .add(custom_ranking)
+///        .add(DocumentId);
+///
+/// let criterion = builder.build();
+///
+/// ```
+pub struct SortByAttr<'a> {
+    ranked_map: &'a RankedMap,
+    attr: SchemaAttr,
+    reversed: bool,
+}
+
+impl<'a> SortByAttr<'a> {
+    pub fn lower_is_better(
+        ranked_map: &'a RankedMap,
+        schema: &Schema,
+        attr_name: &str,
+    ) -> Result<SortByAttr<'a>, SortByAttrError>
+    {
+        SortByAttr::new(ranked_map, schema, attr_name, false)
+    }
+
+    pub fn higher_is_better(
+        ranked_map: &'a RankedMap,
+        schema: &Schema,
+        attr_name: &str,
+    ) -> Result<SortByAttr<'a>, SortByAttrError>
+    {
+        SortByAttr::new(ranked_map, schema, attr_name, true)
+    }
+
+    fn new(
+        ranked_map: &'a RankedMap,
+        schema: &Schema,
+        attr_name: &str,
+        reversed: bool,
+    ) -> Result<SortByAttr<'a>, SortByAttrError>
+    {
+        let attr = match schema.attribute(attr_name) {
+            Some(attr) => attr,
+            None => return Err(SortByAttrError::AttributeNotFound),
+        };
+
+        if !schema.props(attr).is_ranked() {
+            return Err(SortByAttrError::AttributeNotRegisteredForRanking);
+        }
+
+        Ok(SortByAttr { ranked_map, attr, reversed })
+    }
+}
+
+impl<'a> Criterion for SortByAttr<'a> {
+    fn evaluate(&self, lhs: &RawDocument, rhs: &RawDocument) -> Ordering {
+        let lhs = self.ranked_map.get(&(lhs.id, self.attr));
+        let rhs = self.ranked_map.get(&(rhs.id, self.attr));
+
+        match (lhs, rhs) {
+            (Some(lhs), Some(rhs)) => {
+                let order = lhs.cmp(&rhs);
+                if self.reversed { order.reverse() } else { order }
+            },
+            (None,    Some(_)) => Ordering::Greater,
+            (Some(_), None)    => Ordering::Less,
+            (None,    None)    => Ordering::Equal,
+        }
+    }
+}
+
+#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
+pub enum SortByAttrError {
+    AttributeNotFound,
+    AttributeNotRegisteredForRanking,
+}
+
+impl fmt::Display for SortByAttrError {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        use SortByAttrError::*;
+        match self {
+            AttributeNotFound => f.write_str("attribute not found in the schema"),
+            AttributeNotRegisteredForRanking => f.write_str("attribute not registered for ranking"),
+        }
+    }
+}
+
+impl Error for SortByAttrError { }
--- a/src/rank/criterion/sum_of_typos.rs
+++ b/src/rank/criterion/sum_of_typos.rs
@ -1,106 +1,79 @@
 use std::cmp::Ordering;
-use std::ops::Deref;

-use rocksdb::DB;
+use slice_group_by::GroupBy;

-use group_by::GroupBy;
-
-use crate::rank::{match_query_index, Document};
 use crate::rank::criterion::Criterion;
-use crate::database::DatabaseView;
-use crate::Match;
+use crate::rank::RawDocument;
+
+// This function is a wrong logarithmic 10 function.
+// It is safe to panic on input number higher than 3,
+// the number of typos is never bigger than that.
+#[inline]
+fn custom_log10(n: u8) -> f32 {
+    match n {
+        0 => 0.0,       // log(1)
+        1 => 0.30102,   // log(2)
+        2 => 0.47712,   // log(3)
+        3 => 0.60205,   // log(4)
+        _ => panic!("invalid number"),
+    }
+}

 #[inline]
-fn sum_matches_typos(matches: &[Match]) -> isize {
-    let mut sum_typos = 0;
+fn sum_matches_typos(query_index: &[u32], distance: &[u8]) -> usize {
    let mut number_words = 0;
+    let mut sum_typos = 0.0;
+    let mut index = 0;

-    // note that GroupBy will never return an empty group
-    // so we can do this assumption safely
-    for group in GroupBy::new(matches, match_query_index) {
-        sum_typos += unsafe { group.get_unchecked(0).distance as isize };
+    for group in query_index.linear_group() {
+        sum_typos += custom_log10(distance[index]);
        number_words += 1;
+        index += group.len();
    }

-    sum_typos - number_words
+    (number_words as f32 / (sum_typos + 1.0) * 1000.0) as usize
 }

 #[derive(Debug, Clone, Copy)]
 pub struct SumOfTypos;

-impl<D> Criterion<D> for SumOfTypos
-where D: Deref<Target=DB>
-{
-    fn evaluate(&self, lhs: &Document, rhs: &Document, _: &DatabaseView<D>) -> Ordering {
-        let lhs = sum_matches_typos(&lhs.matches);
-        let rhs = sum_matches_typos(&rhs.matches);
+impl Criterion for SumOfTypos {
+    fn evaluate(&self, lhs: &RawDocument, rhs: &RawDocument) -> Ordering {
+        let lhs = {
+            let query_index = lhs.query_index();
+            let distance = lhs.distance();
+            sum_matches_typos(query_index, distance)
+        };

-        lhs.cmp(&rhs)
+        let rhs = {
+            let query_index = rhs.query_index();
+            let distance = rhs.distance();
+            sum_matches_typos(query_index, distance)
+        };
+
+        lhs.cmp(&rhs).reverse()
    }
 }

-
 #[cfg(test)]
 mod tests {
    use super::*;

-    use crate::{DocumentId, Attribute, WordArea};
-
    // typing: "Geox CEO"
    //
    // doc0: "Geox SpA: CEO and Executive"
    // doc1: "Mt. Gox CEO Resigns From Bitcoin Foundation"
    #[test]
    fn one_typo_reference() {
-        let doc0 = {
-            let matches = vec![
-                Match {
-                    query_index: 0,
-                    distance: 0,
-                    attribute: Attribute::new_faillible(0, 0),
-                    is_exact: false,
-                    word_area: WordArea::new_faillible(0, 6)
-                },
-                Match {
-                    query_index: 1,
-                    distance: 0,
-                    attribute: Attribute::new_faillible(0, 2),
-                    is_exact: false,
-                    word_area: WordArea::new_faillible(0, 6)
-                },
-            ];
-            Document {
-                id: DocumentId(0),
-                matches: matches,
-            }
-        };
+        let query_index0 = &[0, 1];
+        let distance0 = &[0, 0];

-        let doc1 = {
-            let matches = vec![
-                Match {
-                    query_index: 0,
-                    distance: 1,
-                    attribute: Attribute::new_faillible(0, 0),
-                    is_exact: false,
-                    word_area: WordArea::new_faillible(0, 6)
-                },
-                Match {
-                    query_index: 1,
-                    distance: 0,
-                    attribute: Attribute::new_faillible(0, 2),
-                    is_exact: false,
-                    word_area: WordArea::new_faillible(0, 6)
-                },
-            ];
-            Document {
-                id: DocumentId(1),
-                matches: matches,
-            }
-        };
+        let query_index1 = &[0, 1];
+        let distance1 = &[1, 0];

-        let lhs = sum_matches_typos(&doc0.matches);
-        let rhs = sum_matches_typos(&doc1.matches);
-        assert_eq!(lhs.cmp(&rhs), Ordering::Less);
+        let doc0 = sum_matches_typos(query_index0, distance0);
+        let doc1 = sum_matches_typos(query_index1, distance1);
+        assert_eq!(doc0.cmp(&doc1).reverse(), Ordering::Less);
    }

    // typing: "bouton manchette"
@ -109,48 +82,15 @@ mod tests {
    // doc1: "bouton"
    #[test]
    fn no_typo() {
-        let doc0 = {
-            let matches = vec![
-                Match {
-                    query_index: 0,
-                    distance: 0,
-                    attribute: Attribute::new_faillible(0, 0),
-                    is_exact: false,
-                    word_area: WordArea::new_faillible(0, 6)
-                },
-                Match {
-                    query_index: 1,
-                    distance: 0,
-                    attribute: Attribute::new_faillible(0, 1),
-                    is_exact: false,
-                    word_area: WordArea::new_faillible(0, 6)
-                },
-            ];
-            Document {
-                id: DocumentId(0),
-                matches: matches,
-            }
-        };
+        let query_index0 = &[0, 1];
+        let distance0 = &[0, 0];

-        let doc1 = {
-            let matches = vec![
-                Match {
-                    query_index: 0,
-                    distance: 0,
-                    attribute: Attribute::new_faillible(0, 0),
-                    is_exact: false,
-                    word_area: WordArea::new_faillible(0, 6)
-                },
-            ];
-            Document {
-                id: DocumentId(1),
-                matches: matches,
-            }
-        };
+        let query_index1 = &[0];
+        let distance1 = &[0];

-        let lhs = sum_matches_typos(&doc0.matches);
-        let rhs = sum_matches_typos(&doc1.matches);
-        assert_eq!(lhs.cmp(&rhs), Ordering::Less);
+        let doc0 = sum_matches_typos(query_index0, distance0);
+        let doc1 = sum_matches_typos(query_index1, distance1);
+        assert_eq!(doc0.cmp(&doc1).reverse(), Ordering::Less);
    }

    // typing: "bouton manchztte"
@ -159,47 +99,14 @@ mod tests {
    // doc1: "bouton"
    #[test]
    fn one_typo() {
-        let doc0 = {
-            let matches = vec![
-                Match {
-                    query_index: 0,
-                    distance: 0,
-                    attribute: Attribute::new_faillible(0, 0),
-                    is_exact: false,
-                    word_area: WordArea::new_faillible(0, 6)
-                },
-                Match {
-                    query_index: 1,
-                    distance: 1,
-                    attribute: Attribute::new_faillible(0, 1),
-                    is_exact: false,
-                    word_area: WordArea::new_faillible(0, 6)
-                },
-            ];
-            Document {
-                id: DocumentId(0),
-                matches: matches,
-            }
-        };
+        let query_index0 = &[0, 1];
+        let distance0 = &[0, 1];

-        let doc1 = {
-            let matches = vec![
-                Match {
-                    query_index: 0,
-                    distance: 0,
-                    attribute: Attribute::new_faillible(0, 0),
-                    is_exact: false,
-                    word_area: WordArea::new_faillible(0, 6)
-                },
-            ];
-            Document {
-                id: DocumentId(1),
-                matches: matches,
-            }
-        };
+        let query_index1 = &[0];
+        let distance1 = &[0];

-        let lhs = sum_matches_typos(&doc0.matches);
-        let rhs = sum_matches_typos(&doc1.matches);
-        assert_eq!(lhs.cmp(&rhs), Ordering::Equal);
+        let doc0 = sum_matches_typos(query_index0, distance0);
+        let doc1 = sum_matches_typos(query_index1, distance1);
+        assert_eq!(doc0.cmp(&doc1).reverse(), Ordering::Less);
    }
 }
--- a/src/rank/criterion/sum_of_words_attribute.rs
+++ b/src/rank/criterion/sum_of_words_attribute.rs
@ -1,32 +1,39 @@
 use std::cmp::Ordering;
-use std::ops::Deref;

-use rocksdb::DB;
-use group_by::GroupBy;
+use slice_group_by::GroupBy;

-use crate::database::DatabaseView;
-use crate::rank::{match_query_index, Document};
 use crate::rank::criterion::Criterion;
-use crate::Match;
+use crate::rank::RawDocument;

 #[inline]
-fn sum_matches_attributes(matches: &[Match]) -> usize {
-    // note that GroupBy will never return an empty group
-    // so we can do this assumption safely
-    GroupBy::new(matches, match_query_index).map(|group| {
-        unsafe { group.get_unchecked(0).attribute.attribute() as usize }
-    }).sum()
+fn sum_matches_attributes(query_index: &[u32], attribute: &[u16]) -> usize {
+    let mut sum_attributes = 0;
+    let mut index = 0;
+
+    for group in query_index.linear_group() {
+        sum_attributes += attribute[index] as usize;
+        index += group.len();
+    }
+
+    sum_attributes
 }

 #[derive(Debug, Clone, Copy)]
 pub struct SumOfWordsAttribute;

-impl<D> Criterion<D> for SumOfWordsAttribute
-where D: Deref<Target=DB>
-{
-    fn evaluate(&self, lhs: &Document, rhs: &Document, _: &DatabaseView<D>) -> Ordering {
-        let lhs = sum_matches_attributes(&lhs.matches);
-        let rhs = sum_matches_attributes(&rhs.matches);
+impl Criterion for SumOfWordsAttribute {
+    fn evaluate(&self, lhs: &RawDocument, rhs: &RawDocument) -> Ordering {
+        let lhs = {
+            let query_index = lhs.query_index();
+            let attribute = lhs.attribute();
+            sum_matches_attributes(query_index, attribute)
+        };
+
+        let rhs = {
+            let query_index = rhs.query_index();
+            let attribute = rhs.attribute();
+            sum_matches_attributes(query_index, attribute)
+        };

        lhs.cmp(&rhs)
    }
--- a/src/rank/criterion/sum_of_words_position.rs
+++ b/src/rank/criterion/sum_of_words_position.rs
@ -1,32 +1,39 @@
 use std::cmp::Ordering;
-use std::ops::Deref;

-use rocksdb::DB;
-use group_by::GroupBy;
+use slice_group_by::GroupBy;

-use crate::database::DatabaseView;
-use crate::rank::{match_query_index, Document};
 use crate::rank::criterion::Criterion;
-use crate::Match;
+use crate::rank::RawDocument;

 #[inline]
-fn sum_matches_attribute_index(matches: &[Match]) -> usize {
-    // note that GroupBy will never return an empty group
-    // so we can do this assumption safely
-    GroupBy::new(matches, match_query_index).map(|group| {
-        unsafe { group.get_unchecked(0).attribute.word_index() as usize }
-    }).sum()
+fn sum_matches_attribute_index(query_index: &[u32], word_index: &[u32]) -> usize {
+    let mut sum_word_index = 0;
+    let mut index = 0;
+
+    for group in query_index.linear_group() {
+        sum_word_index += word_index[index] as usize;
+        index += group.len();
+    }
+
+    sum_word_index
 }

 #[derive(Debug, Clone, Copy)]
 pub struct SumOfWordsPosition;

-impl<D> Criterion<D> for SumOfWordsPosition
-where D: Deref<Target=DB>
-{
-    fn evaluate(&self, lhs: &Document, rhs: &Document, _: &DatabaseView<D>) -> Ordering {
-        let lhs = sum_matches_attribute_index(&lhs.matches);
-        let rhs = sum_matches_attribute_index(&rhs.matches);
+impl Criterion for SumOfWordsPosition {
+    fn evaluate(&self, lhs: &RawDocument, rhs: &RawDocument) -> Ordering {
+        let lhs = {
+            let query_index = lhs.query_index();
+            let word_index = lhs.word_index();
+            sum_matches_attribute_index(query_index, word_index)
+        };
+
+        let rhs = {
+            let query_index = rhs.query_index();
+            let word_index = rhs.word_index();
+            sum_matches_attribute_index(query_index, word_index)
+        };

        lhs.cmp(&rhs)
    }
--- a/src/rank/criterion/words_proximity.rs
+++ b/src/rank/criterion/words_proximity.rs
@ -1,16 +1,17 @@
 use std::cmp::{self, Ordering};
-use std::ops::Deref;

-use rocksdb::DB;
-use group_by::GroupBy;
+use slice_group_by::GroupBy;

-use crate::rank::{match_query_index, Document};
 use crate::rank::criterion::Criterion;
-use crate::database::DatabaseView;
-use crate::Match;
+use crate::rank::RawDocument;

 const MAX_DISTANCE: u32 = 8;

+#[inline]
+fn clone_tuple<T: Clone, U: Clone>((a, b): (&T, &U)) -> (T, U) {
+    (a.clone(), b.clone())
+}
+
 fn index_proximity(lhs: u32, rhs: u32) -> u32 {
    if lhs < rhs {
        cmp::min(rhs - lhs, MAX_DISTANCE)
@ -19,30 +20,58 @@ fn index_proximity(lhs: u32, rhs: u32) -> u32 {
    }
 }

-fn attribute_proximity(lhs: &Match, rhs: &Match) -> u32 {
-    if lhs.attribute.attribute() != rhs.attribute.attribute() { return MAX_DISTANCE }
-    index_proximity(lhs.attribute.word_index(), rhs.attribute.word_index())
+fn attribute_proximity((lattr, lwi): (u16, u32), (rattr, rwi): (u16, u32)) -> u32 {
+    if lattr != rattr { return MAX_DISTANCE }
+    index_proximity(lwi, rwi)
 }

-fn min_proximity(lhs: &[Match], rhs: &[Match]) -> u32 {
+fn min_proximity((lattr, lwi): (&[u16], &[u32]), (rattr, rwi): (&[u16], &[u32])) -> u32 {
    let mut min_prox = u32::max_value();
-    for a in lhs {
-        for b in rhs {
+
+    for a in lattr.iter().zip(lwi) {
+        for b in rattr.iter().zip(rwi) {
+            let a = clone_tuple(a);
+            let b = clone_tuple(b);
            min_prox = cmp::min(min_prox, attribute_proximity(a, b));
        }
    }
+
    min_prox
 }

-fn matches_proximity(matches: &[Match]) -> u32 {
+fn matches_proximity(
+    query_index: &[u32],
+    distance: &[u8],
+    attribute: &[u16],
+    word_index: &[u32],
+) -> u32
+{
+    let mut query_index_groups = query_index.linear_group();
    let mut proximity = 0;
-    let mut iter = GroupBy::new(matches, match_query_index);
+    let mut index = 0;

-    // iterate over groups by windows of size 2
-    let mut last = iter.next();
-    while let (Some(lhs), Some(rhs)) = (last, iter.next()) {
-        proximity += min_proximity(lhs, rhs);
-        last = Some(rhs);
+    let get_attr_wi = |index: usize, group_len: usize| {
+        // retrieve the first distance group (with the lowest values)
+        let len = distance[index..index + group_len].linear_group().next().unwrap().len();
+
+        let rattr = &attribute[index..index + len];
+        let rwi = &word_index[index..index + len];
+
+        (rattr, rwi)
+    };
+
+    let mut last = query_index_groups.next().map(|group| {
+        let attr_wi = get_attr_wi(index, group.len());
+        index += group.len();
+        attr_wi
+    });
+
+    // iter by windows of size 2
+    while let (Some(lhs), Some(rhs)) = (last, query_index_groups.next()) {
+        let attr_wi = get_attr_wi(index, rhs.len());
+        proximity += min_proximity(lhs, attr_wi);
+        last = Some(attr_wi);
+        index += rhs.len();
    }

    proximity
@ -51,24 +80,32 @@ fn matches_proximity(matches: &[Match]) -> u32 {
 #[derive(Debug, Clone, Copy)]
 pub struct WordsProximity;

-impl<D> Criterion<D> for WordsProximity
-where D: Deref<Target=DB>
-{
-    fn evaluate(&self, lhs: &Document, rhs: &Document, _: &DatabaseView<D>) -> Ordering {
-        let lhs = matches_proximity(&lhs.matches);
-        let rhs = matches_proximity(&rhs.matches);
+impl Criterion for WordsProximity {
+    fn evaluate(&self, lhs: &RawDocument, rhs: &RawDocument) -> Ordering {
+        let lhs = {
+            let query_index = lhs.query_index();
+            let distance = lhs.distance();
+            let attribute = lhs.attribute();
+            let word_index = lhs.word_index();
+            matches_proximity(query_index, distance, attribute, word_index)
+        };
+
+        let rhs = {
+            let query_index = rhs.query_index();
+            let distance = rhs.distance();
+            let attribute = rhs.attribute();
+            let word_index = rhs.word_index();
+            matches_proximity(query_index, distance, attribute, word_index)
+        };

        lhs.cmp(&rhs)
    }
 }

-
 #[cfg(test)]
 mod tests {
    use super::*;

-    use crate::Attribute;
-
    #[test]
    fn three_different_attributes() {

@ -80,18 +117,15 @@ mod tests {
        // { id: 2, attr: 2, attr_index: 0 }
        // { id: 3, attr: 3, attr_index: 1 }

-        let matches = &[
-            Match { query_index: 0, attribute: Attribute::new_faillible(0, 0), ..Match::zero() },
-            Match { query_index: 1, attribute: Attribute::new_faillible(1, 0), ..Match::zero() },
-            Match { query_index: 2, attribute: Attribute::new_faillible(1, 1), ..Match::zero() },
-            Match { query_index: 2, attribute: Attribute::new_faillible(2, 0), ..Match::zero() },
-            Match { query_index: 3, attribute: Attribute::new_faillible(3, 1), ..Match::zero() },
-        ];
+        let query_index = &[0, 1, 2, 2, 3];
+        let distance    = &[0, 0, 0, 0, 0];
+        let attribute   = &[0, 1, 1, 2, 3];
+        let word_index  = &[0, 0, 1, 0, 1];

        //   soup -> of = 8
        // + of -> the  = 1
        // + the -> day = 8 (not 1)
-        assert_eq!(matches_proximity(matches), 17);
+        assert_eq!(matches_proximity(query_index, distance, attribute, word_index), 17);
    }

    #[test]
@ -106,57 +140,14 @@ mod tests {
        // { id: 3, attr: 0, attr_index: 1 }
        // { id: 3, attr: 1, attr_index: 3 }

-        let matches = &[
-            Match { query_index: 0, attribute: Attribute::new_faillible(0, 0), ..Match::zero() },
-            Match { query_index: 0, attribute: Attribute::new_faillible(1, 0), ..Match::zero() },
-            Match { query_index: 1, attribute: Attribute::new_faillible(1, 1), ..Match::zero() },
-            Match { query_index: 2, attribute: Attribute::new_faillible(1, 2), ..Match::zero() },
-            Match { query_index: 3, attribute: Attribute::new_faillible(0, 1), ..Match::zero() },
-            Match { query_index: 3, attribute: Attribute::new_faillible(1, 3), ..Match::zero() },
-        ];
+        let query_index = &[0, 0, 1, 2, 3, 3];
+        let distance    = &[0, 0, 0, 0, 0, 0];
+        let attribute   = &[0, 1, 1, 1, 0, 1];
+        let word_index  = &[0, 0, 1, 2, 1, 3];

        //   soup -> of = 1
        // + of -> the  = 1
        // + the -> day = 1
-        assert_eq!(matches_proximity(matches), 3);
-    }
-}
-
-#[cfg(all(feature = "nightly", test))]
-mod bench {
-    extern crate test;
-
-    use super::*;
-    use std::error::Error;
-    use self::test::Bencher;
-
-    use rand_xorshift::XorShiftRng;
-    use rand::{Rng, SeedableRng};
-
-    use crate::Attribute;
-
-    #[bench]
-    fn evaluate_proximity(bench: &mut Bencher) -> Result<(), Box<Error>> {
-        let number_matches = 30_000;
-        let mut matches = Vec::with_capacity(number_matches);
-        let mut rng = XorShiftRng::seed_from_u64(42);
-
-        for _ in 0..number_matches {
-            let query_index = rng.gen_range(0, 4);
-
-            let attribute = rng.gen_range(0, 5);
-            let word_index = rng.gen_range(0, 15);
-            let attribute = Attribute::new_faillible(attribute, word_index);
-
-            let match_ = Match { query_index, attribute, ..Match::zero() };
-            matches.push(match_);
-        }
-
-        bench.iter(|| {
-            let proximity = matches_proximity(&matches);
-            test::black_box(move || proximity)
-        });
-
-        Ok(())
+        assert_eq!(matches_proximity(query_index, distance, attribute, word_index), 3);
    }
 }
--- a/src/rank/mod.rs
+++ b/src/rank/mod.rs
@ -2,32 +2,182 @@ pub mod criterion;
 mod query_builder;
 mod distinct_map;

+use std::sync::Arc;
+
+use slice_group_by::GroupBy;
+use rayon::slice::ParallelSliceMut;
+
 use crate::{Match, DocumentId};

 pub use self::query_builder::{FilterFunc, QueryBuilder, DistinctQueryBuilder};

-#[inline]
-fn match_query_index(a: &Match, b: &Match) -> bool {
-    a.query_index == b.query_index
-}
-
-#[derive(Debug, Clone)]
+#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
 pub struct Document {
    pub id: DocumentId,
    pub matches: Vec<Match>,
 }

 impl Document {
-    pub fn new(doc: DocumentId, match_: Match) -> Self {
-        unsafe { Self::from_sorted_matches(doc, vec![match_]) }
-    }
+    fn from_raw(raw: &RawDocument) -> Document {
+        let len = raw.matches.range.len();
+        let mut matches = Vec::with_capacity(len);

-    pub fn from_matches(doc: DocumentId, mut matches: Vec<Match>) -> Self {
-        matches.sort_unstable();
-        unsafe { Self::from_sorted_matches(doc, matches) }
-    }
+        let query_index = raw.query_index();
+        let distance = raw.distance();
+        let attribute = raw.attribute();
+        let word_index = raw.word_index();
+        let is_exact = raw.is_exact();
+        let char_index = raw.char_index();
+        let char_length = raw.char_length();

-    pub unsafe fn from_sorted_matches(id: DocumentId, matches: Vec<Match>) -> Self {
-        Self { id, matches }
+        for i in 0..len {
+            let match_ = Match {
+                query_index: query_index[i],
+                distance: distance[i],
+                attribute: attribute[i],
+                word_index: word_index[i],
+                is_exact: is_exact[i],
+                char_index: char_index[i],
+                char_length: char_length[i],
+            };
+            matches.push(match_);
+        }
+
+        Document { id: raw.id, matches }
+    }
+}
+
+#[derive(Clone)]
+pub struct RawDocument {
+    pub id: DocumentId,
+    pub matches: SharedMatches,
+}
+
+impl RawDocument {
+    fn new(id: DocumentId, range: Range, matches: Arc<Matches>) -> RawDocument {
+        RawDocument { id, matches: SharedMatches { range, matches } }
+    }
+
+    pub fn query_index(&self) -> &[u32] {
+        let r = self.matches.range;
+        // it is safe because construction/modifications
+        // can only be done in this module
+        unsafe { &self.matches.matches.query_index.get_unchecked(r.start..r.end) }
+    }
+
+    pub fn distance(&self) -> &[u8] {
+        let r = self.matches.range;
+        // it is safe because construction/modifications
+        // can only be done in this module
+        unsafe { &self.matches.matches.distance.get_unchecked(r.start..r.end) }
+    }
+
+    pub fn attribute(&self) -> &[u16] {
+        let r = self.matches.range;
+        // it is safe because construction/modifications
+        // can only be done in this module
+        unsafe { &self.matches.matches.attribute.get_unchecked(r.start..r.end) }
+    }
+
+    pub fn word_index(&self) -> &[u32] {
+        let r = self.matches.range;
+        // it is safe because construction/modifications
+        // can only be done in this module
+        unsafe { &self.matches.matches.word_index.get_unchecked(r.start..r.end) }
+    }
+
+    pub fn is_exact(&self) -> &[bool] {
+        let r = self.matches.range;
+        // it is safe because construction/modifications
+        // can only be done in this module
+        unsafe { &self.matches.matches.is_exact.get_unchecked(r.start..r.end) }
+    }
+
+    pub fn char_index(&self) -> &[u32] {
+        let r = self.matches.range;
+        // it is safe because construction/modifications
+        // can only be done in this module
+        unsafe { &self.matches.matches.char_index.get_unchecked(r.start..r.end) }
+    }
+
+    pub fn char_length(&self) -> &[u16] {
+        let r = self.matches.range;
+        // it is safe because construction/modifications
+        // can only be done in this module
+        unsafe { &self.matches.matches.char_length.get_unchecked(r.start..r.end) }
+    }
+}
+
+pub fn raw_documents_from_matches(mut matches: Vec<(DocumentId, Match)>) -> Vec<RawDocument> {
+    let mut docs_ranges = Vec::<(DocumentId, Range)>::new();
+    let mut matches2 = Matches::with_capacity(matches.len());
+
+    matches.par_sort_unstable();
+
+    for group in matches.linear_group_by(|(a, _), (b, _)| a == b) {
+        let id = group[0].0;
+        let start = docs_ranges.last().map(|(_, r)| r.end).unwrap_or(0);
+        let end = start + group.len();
+        docs_ranges.push((id, Range { start, end }));
+
+        matches2.extend_from_slice(group);
+    }
+
+    let matches = Arc::new(matches2);
+    docs_ranges.into_iter().map(|(i, r)| RawDocument::new(i, r, matches.clone())).collect()
+}
+
+#[derive(Debug, Copy, Clone)]
+struct Range {
+    start: usize,
+    end: usize,
+}
+
+impl Range {
+    fn len(self) -> usize {
+        self.end - self.start
+    }
+}
+
+#[derive(Clone)]
+pub struct SharedMatches {
+    range: Range,
+    matches: Arc<Matches>,
+}
+
+#[derive(Clone)]
+struct Matches {
+    query_index: Vec<u32>,
+    distance: Vec<u8>,
+    attribute: Vec<u16>,
+    word_index: Vec<u32>,
+    is_exact: Vec<bool>,
+    char_index: Vec<u32>,
+    char_length: Vec<u16>,
+}
+
+impl Matches {
+    fn with_capacity(cap: usize) -> Matches {
+        Matches {
+            query_index: Vec::with_capacity(cap),
+            distance: Vec::with_capacity(cap),
+            attribute: Vec::with_capacity(cap),
+            word_index: Vec::with_capacity(cap),
+            is_exact: Vec::with_capacity(cap),
+            char_index: Vec::with_capacity(cap),
+            char_length: Vec::with_capacity(cap),
+        }
+    }
+
+    fn extend_from_slice(&mut self, matches: &[(DocumentId, Match)]) {
+        for (_, match_) in matches {
+            self.query_index.push(match_.query_index);
+            self.distance.push(match_.distance);
+            self.attribute.push(match_.attribute);
+            self.word_index.push(match_.word_index);
+            self.is_exact.push(match_.is_exact);
+            self.char_index.push(match_.char_index);
+            self.char_length.push(match_.char_length);
+        }
    }
 }
--- a/src/rank/query_builder.rs
+++ b/src/rank/query_builder.rs
@ -1,30 +1,56 @@
-use std::{cmp, mem, vec, str, char};
-use std::ops::{Deref, Range};
-use std::error::Error;
+use std::{cmp, mem};
+use std::ops::Range;
+use std::time::Instant;
 use std::hash::Hash;
 use std::rc::Rc;

-use group_by::BinaryGroupByMut;
+use rayon::slice::ParallelSliceMut;
+use slice_group_by::{GroupByMut, LinearStrGroupBy};
 use hashbrown::HashMap;
 use fst::Streamer;
-use rocksdb::DB;
 use log::info;

 use crate::automaton::{self, DfaExt, AutomatonExt};
 use crate::rank::distinct_map::{DistinctMap, BufferedDistinctMap};
 use crate::rank::criterion::Criteria;
-use crate::database::DatabaseView;
-use crate::{Match, DocumentId};
-use crate::rank::Document;
+use crate::database::Index;
+use crate::rank::{raw_documents_from_matches, RawDocument, Document};
+use crate::{is_cjk, Match, DocumentId};
+
+#[derive(Debug, PartialEq, Eq)]
+enum CharCategory {
+    Space,
+    Cjk,
+    Other,
+}
+
+fn classify_char(c: char) -> CharCategory {
+    if c.is_whitespace() { CharCategory::Space }
+    else if is_cjk(c) { CharCategory::Cjk }
+    else { CharCategory::Other }
+}
+
+fn is_word(s: &&str) -> bool {
+    !s.chars().any(char::is_whitespace)
+}
+
+fn same_group_category(a: char, b: char) -> bool {
+    let ca = classify_char(a);
+    let cb = classify_char(b);
+    if ca == CharCategory::Cjk || cb == CharCategory::Cjk { false } else { ca == cb }
+}

 fn split_whitespace_automatons(query: &str) -> Vec<DfaExt> {
    let has_end_whitespace = query.chars().last().map_or(false, char::is_whitespace);
-    let mut automatons = Vec::new();
-    let mut words = query.split_whitespace().map(str::to_lowercase).peekable();
+    let mut groups = LinearStrGroupBy::new(query, same_group_category)
+                        .filter(is_word)
+                        .map(str::to_lowercase)
+                        .peekable();

-    while let Some(word) = words.next() {
-        let has_following_word = words.peek().is_some();
-        let lev = if has_following_word || has_end_whitespace {
+    let mut automatons = Vec::new();
+    while let Some(word) = groups.next() {
+        let has_following_word = groups.peek().is_some();
+        let lev = if has_following_word || has_end_whitespace || word.chars().all(is_cjk) {
            automaton::build_dfa(&word)
        } else {
            automaton::build_prefix_dfa(&word)
@ -35,43 +61,38 @@ fn split_whitespace_automatons(query: &str) -> Vec<DfaExt> {
    automatons
 }

-pub type FilterFunc<D> = fn(DocumentId, &DatabaseView<D>) -> bool;
+pub type FilterFunc = fn(DocumentId) -> bool;

-pub struct QueryBuilder<'a, D, FI>
-where D: Deref<Target=DB>
-{
-    view: &'a DatabaseView<D>,
-    criteria: Criteria<D>,
+pub struct QueryBuilder<'i, 'c, FI> {
+    index: &'i Index,
+    criteria: Criteria<'c>,
    filter: Option<FI>,
 }

-impl<'a, D> QueryBuilder<'a, D, FilterFunc<D>>
-where D: Deref<Target=DB>
-{
-    pub fn new(view: &'a DatabaseView<D>) -> Result<Self, Box<Error>> {
-        QueryBuilder::with_criteria(view, Criteria::default())
+impl<'i, 'c> QueryBuilder<'i, 'c, FilterFunc> {
+    pub fn new(index: &'i Index) -> Self {
+        QueryBuilder::with_criteria(index, Criteria::default())
+    }
+
+    pub fn with_criteria(index: &'i Index, criteria: Criteria<'c>) -> Self {
+        QueryBuilder { index, criteria, filter: None }
    }
 }

-impl<'a, D, FI> QueryBuilder<'a, D, FI>
-where D: Deref<Target=DB>,
+impl<'i, 'c, FI> QueryBuilder<'i, 'c, FI>
 {
-    pub fn with_criteria(view: &'a DatabaseView<D>, criteria: Criteria<D>) -> Result<Self, Box<Error>> {
-        Ok(QueryBuilder { view, criteria, filter: None })
-    }
-
-    pub fn with_filter<F>(self, function: F) -> QueryBuilder<'a, D, F>
-    where F: Fn(DocumentId, &DatabaseView<D>) -> bool,
+    pub fn with_filter<F>(self, function: F) -> QueryBuilder<'i, 'c, F>
+    where F: Fn(DocumentId) -> bool,
    {
        QueryBuilder {
-            view: self.view,
+            index: self.index,
            criteria: self.criteria,
            filter: Some(function)
        }
    }

-    pub fn with_distinct<F, K>(self, function: F, size: usize) -> DistinctQueryBuilder<'a, D, FI, F>
-    where F: Fn(DocumentId, &DatabaseView<D>) -> Option<K>,
+    pub fn with_distinct<F, K>(self, function: F, size: usize) -> DistinctQueryBuilder<'i, 'c, FI, F>
+    where F: Fn(DocumentId) -> Option<K>,
          K: Hash + Eq,
    {
        DistinctQueryBuilder {
@ -81,19 +102,19 @@ where D: Deref<Target=DB>,
        }
    }

-    fn query_all(&self, query: &str) -> Vec<Document> {
+    fn query_all(&self, query: &str) -> Vec<RawDocument> {
        let automatons = split_whitespace_automatons(query);

        let mut stream = {
            let mut op_builder = fst::map::OpBuilder::new();
            for automaton in &automatons {
-                let stream = self.view.index().positive.map().search(automaton);
+                let stream = self.index.map.search(automaton);
                op_builder.push(stream);
            }
            op_builder.union()
        };

-        let mut matches = HashMap::new();
+        let mut matches = Vec::new();

        while let Some((input, indexed_values)) = stream.next() {
            for iv in indexed_values {
@ -101,7 +122,7 @@ where D: Deref<Target=DB>,
                let distance = automaton.eval(input).to_u8();
                let is_exact = distance == 0 && input.len() == automaton.query_len();

-                let doc_indexes = &self.view.index().positive.indexes();
+                let doc_indexes = &self.index.indexes;
                let doc_indexes = &doc_indexes[iv.value as usize];

                for doc_index in doc_indexes {
@ -109,41 +130,50 @@ where D: Deref<Target=DB>,
                        query_index: iv.index as u32,
                        distance: distance,
                        attribute: doc_index.attribute,
+                        word_index: doc_index.word_index,
                        is_exact: is_exact,
-                        word_area: doc_index.word_area,
+                        char_index: doc_index.char_index,
+                        char_length: doc_index.char_length,
                    };
-                    matches.entry(doc_index.document_id).or_insert_with(Vec::new).push(match_);
+                    matches.push((doc_index.document_id, match_));
                }
            }
        }

-        info!("{} documents to classify", matches.len());
+        let total_matches = matches.len();
+        let raw_documents = raw_documents_from_matches(matches);

-        matches.into_iter().map(|(i, m)| Document::from_matches(i, m)).collect()
+        info!("{} total documents to classify", raw_documents.len());
+        info!("{} total matches to classify", total_matches);
+
+        raw_documents
    }
 }

-impl<'a, D, FI> QueryBuilder<'a, D, FI>
-where D: Deref<Target=DB>,
-      FI: Fn(DocumentId, &DatabaseView<D>) -> bool,
+impl<'i, 'c, FI> QueryBuilder<'i, 'c, FI>
+where FI: Fn(DocumentId) -> bool,
 {
    pub fn query(self, query: &str, range: Range<usize>) -> Vec<Document> {
-        // We give the filtering work to the query distinct builder,
+        // We delegate the filter work to the distinct query builder,
        // specifying a distinct rule that has no effect.
        if self.filter.is_some() {
-            let builder = self.with_distinct(|_, _| None as Option<()>, 1);
+            let builder = self.with_distinct(|_| None as Option<()>, 1);
            return builder.query(query, range);
        }

+        let start = Instant::now();
        let mut documents = self.query_all(query);
-        let mut groups = vec![documents.as_mut_slice()];
-        let view = &self.view;
+        info!("query_all took {:.2?}", start.elapsed());

-        'criteria: for criterion in self.criteria.as_ref() {
+        let mut groups = vec![documents.as_mut_slice()];
+
+        'criteria: for (ci, criterion) in self.criteria.as_ref().iter().enumerate() {
            let tmp_groups = mem::replace(&mut groups, Vec::new());
            let mut documents_seen = 0;

            for group in tmp_groups {
+                info!("criterion {}, documents group of size {}", ci, group.len());
+
                // if this group does not overlap with the requested range,
                // push it without sorting and splitting it
                if documents_seen + group.len() < range.start {
@ -152,9 +182,11 @@ where D: Deref<Target=DB>,
                    continue;
                }

-                group.sort_unstable_by(|a, b| criterion.evaluate(a, b, view));
+                let start = Instant::now();
+                group.par_sort_unstable_by(|a, b| criterion.evaluate(a, b));
+                info!("criterion {} sort took {:.2?}", ci, start.elapsed());

-                for group in BinaryGroupByMut::new(group, |a, b| criterion.eq(a, b, view)) {
+                for group in group.binary_group_by_mut(|a, b| criterion.eq(a, b)) {
                    documents_seen += group.len();
                    groups.push(group);

@ -165,28 +197,22 @@ where D: Deref<Target=DB>,
            }
        }

-        // `drain` removes the documents efficiently using `ptr::copy`
-        // TODO it could be more efficient to have a custom iterator
        let offset = cmp::min(documents.len(), range.start);
-        documents.drain(0..offset);
-        documents.truncate(range.len());
-        documents
+        let iter = documents.into_iter().skip(offset).take(range.len());
+        iter.map(|d| Document::from_raw(&d)).collect()
    }
 }

-pub struct DistinctQueryBuilder<'a, D, FI, FD>
-where D: Deref<Target=DB>
-{
-    inner: QueryBuilder<'a, D, FI>,
+pub struct DistinctQueryBuilder<'i, 'c, FI, FD> {
+    inner: QueryBuilder<'i, 'c, FI>,
    function: FD,
    size: usize,
 }

-impl<'a, D, FI, FD> DistinctQueryBuilder<'a, D, FI, FD>
-where D: Deref<Target=DB>,
+impl<'i, 'c, FI, FD> DistinctQueryBuilder<'i, 'c, FI, FD>
 {
-    pub fn with_filter<F>(self, function: F) -> DistinctQueryBuilder<'a, D, F, FD>
-    where F: Fn(DocumentId, &DatabaseView<D>) -> bool,
+    pub fn with_filter<F>(self, function: F) -> DistinctQueryBuilder<'i, 'c, F, FD>
+    where F: Fn(DocumentId) -> bool,
    {
        DistinctQueryBuilder {
            inner: self.inner.with_filter(function),
@ -196,17 +222,18 @@ where D: Deref<Target=DB>,
    }
 }

-impl<'a, D, FI, FD, K> DistinctQueryBuilder<'a, D, FI, FD>
-where D: Deref<Target=DB>,
-      FI: Fn(DocumentId, &DatabaseView<D>) -> bool,
-      FD: Fn(DocumentId, &DatabaseView<D>) -> Option<K>,
+impl<'i, 'c, FI, FD, K> DistinctQueryBuilder<'i, 'c, FI, FD>
+where FI: Fn(DocumentId) -> bool,
+      FD: Fn(DocumentId) -> Option<K>,
      K: Hash + Eq,
 {
    pub fn query(self, query: &str, range: Range<usize>) -> Vec<Document> {
+        let start = Instant::now();
        let mut documents = self.inner.query_all(query);
+        info!("query_all took {:.2?}", start.elapsed());
+
        let mut groups = vec![documents.as_mut_slice()];
        let mut key_cache = HashMap::new();
-        let view = &self.inner.view;

        let mut filter_map = HashMap::new();
        // these two variables informs on the current distinct map and
@ -215,12 +242,14 @@ where D: Deref<Target=DB>,
        let mut distinct_map = DistinctMap::new(self.size);
        let mut distinct_raw_offset = 0;

-        'criteria: for criterion in self.inner.criteria.as_ref() {
+        'criteria: for (ci, criterion) in self.inner.criteria.as_ref().iter().enumerate() {
            let tmp_groups = mem::replace(&mut groups, Vec::new());
            let mut buf_distinct = BufferedDistinctMap::new(&mut distinct_map);
            let mut documents_seen = 0;

            for group in tmp_groups {
+                info!("criterion {}, documents group of size {}", ci, group.len());
+
                // if this group does not overlap with the requested range,
                // push it without sorting and splitting it
                if documents_seen + group.len() < distinct_raw_offset {
@ -229,22 +258,24 @@ where D: Deref<Target=DB>,
                    continue;
                }

-                group.sort_unstable_by(|a, b| criterion.evaluate(a, b, view));
+                let start = Instant::now();
+                group.par_sort_unstable_by(|a, b| criterion.evaluate(a, b));
+                info!("criterion {} sort took {:.2?}", ci, start.elapsed());

-                for group in BinaryGroupByMut::new(group, |a, b| criterion.eq(a, b, view)) {
+                for group in group.binary_group_by_mut(|a, b| criterion.eq(a, b)) {
                    // we must compute the real distinguished len of this sub-group
                    for document in group.iter() {
                        let filter_accepted = match &self.inner.filter {
                            Some(filter) => {
                                let entry = filter_map.entry(document.id);
-                                *entry.or_insert_with(|| (filter)(document.id, view))
+                                *entry.or_insert_with(|| (filter)(document.id))
                            },
                            None => true,
                        };

                        if filter_accepted {
                            let entry = key_cache.entry(document.id);
-                            let key = entry.or_insert_with(|| (self.function)(document.id, view).map(Rc::new));
+                            let key = entry.or_insert_with(|| (self.function)(document.id).map(Rc::new));

                            match key.clone() {
                                Some(key) => buf_distinct.register(key),
@ -290,7 +321,7 @@ where D: Deref<Target=DB>,
                };

                if distinct_accepted && seen.len() > range.start {
-                    out_documents.push(document);
+                    out_documents.push(Document::from_raw(&document));
                    if out_documents.len() == range.len() { break }
                }
            }
--- a/src/shared_data_cursor.rs
+++ b/src/shared_data_cursor.rs
@ -0,0 +1,56 @@
+use std::io::{self, Read, Cursor, BufRead};
+use std::sync::Arc;
+use crate::data::SharedData;
+
+pub struct SharedDataCursor(Cursor<SharedData>);
+
+impl SharedDataCursor {
+    pub fn from_bytes(bytes: Vec<u8>) -> SharedDataCursor {
+        let len = bytes.len();
+        let bytes = Arc::new(bytes);
+
+        SharedDataCursor::from_shared_bytes(bytes, 0, len)
+    }
+
+    pub fn from_shared_bytes(bytes: Arc<Vec<u8>>, offset: usize, len: usize) -> SharedDataCursor {
+        let data = SharedData::new(bytes, offset, len);
+        let cursor = Cursor::new(data);
+
+        SharedDataCursor(cursor)
+    }
+
+    pub fn extract(&mut self, amt: usize) -> SharedData {
+        let offset = self.0.position() as usize;
+        let extracted = self.0.get_ref().range(offset, amt);
+        self.0.consume(amt);
+
+        extracted
+    }
+}
+
+impl Read for SharedDataCursor {
+    fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
+        self.0.read(buf)
+    }
+}
+
+impl BufRead for SharedDataCursor {
+    fn fill_buf(&mut self) -> io::Result<&[u8]> {
+        self.0.fill_buf()
+    }
+
+    fn consume(&mut self, amt: usize) {
+        self.0.consume(amt)
+    }
+}
+
+pub trait FromSharedDataCursor: Sized {
+    type Error;
+
+    fn from_shared_data_cursor(cursor: &mut SharedDataCursor) -> Result<Self, Self::Error>;
+
+    fn from_bytes(bytes: Vec<u8>) -> Result<Self, Self::Error> {
+        let mut cursor = SharedDataCursor::from_bytes(bytes);
+        Self::from_shared_data_cursor(&mut cursor)
+    }
+}
--- a/src/tokenizer/mod.rs
+++ b/src/tokenizer/mod.rs
@ -1,4 +1,5 @@
 use std::mem;
+use crate::is_cjk;
 use self::Separator::*;

 pub trait TokenizerBuilder {
@ -75,9 +76,9 @@ impl Separator {

 fn detect_separator(c: char) -> Option<Separator> {
    match c {
-        '.' | ';' | ',' | '!' | '?' | '-' => Some(Long),
-        ' ' | '\'' | '"'                  => Some(Short),
-        _                                 => None,
+        '.' | ';' | ',' | '!' | '?' | '-' | '(' | ')' => Some(Long),
+        ' ' | '\'' | '"' => Some(Short),
+        _                => None,
    }
 }

@ -109,9 +110,58 @@ impl<'a> Iterator for Tokenizer<'a> {
                        return Some(token)
                    }

-                    distance.replace(distance.map_or(sep, |s| s.add(sep)));
+                    distance = Some(distance.map_or(sep, |s| s.add(sep)));
+                },
+                None => {
+                    // if this is a Chinese, a Japanese or a Korean character
+                    // See <http://unicode-table.com>
+                    if is_cjk(c) {
+                        match start_word {
+                            Some(start_word) => {
+                                let (prefix, tail) = self.inner.split_at(i);
+                                let (spaces, word) = prefix.split_at(start_word);
+
+                                self.inner = tail;
+                                self.char_index += spaces.chars().count();
+                                self.word_index += distance.map(Separator::to_usize).unwrap_or(0);
+
+                                let token = Token {
+                                    word: word,
+                                    word_index: self.word_index,
+                                    char_index: self.char_index,
+                                };
+
+                                self.word_index += 1;
+                                self.char_index += word.chars().count();
+
+                                return Some(token)
+                            },
+                            None => {
+                                let (prefix, tail) = self.inner.split_at(i + c.len_utf8());
+                                let (spaces, word) = prefix.split_at(i);
+
+                                self.inner = tail;
+                                self.char_index += spaces.chars().count();
+                                self.word_index += distance.map(Separator::to_usize).unwrap_or(0);
+
+                                let token = Token {
+                                    word: word,
+                                    word_index: self.word_index,
+                                    char_index: self.char_index,
+                                };
+
+                                if tail.chars().next().and_then(detect_separator).is_none() {
+                                    self.word_index += 1;
+                                }
+                                self.char_index += 1;
+
+                                return Some(token)
+                            }
+                        }
+                    }
+
+                    if start_word.is_none() { start_word = Some(i) }
                },
-                None => { start_word.get_or_insert(i); },
            }
        }

@ -150,11 +200,12 @@ mod tests {

    #[test]
    fn hard() {
-        let mut tokenizer = Tokenizer::new(" .? yo lolo. aïe");
+        let mut tokenizer = Tokenizer::new(" .? yo lolo. aïe (ouch)");

        assert_eq!(tokenizer.next(), Some(Token { word: "yo", word_index: 0, char_index: 4 }));
        assert_eq!(tokenizer.next(), Some(Token { word: "lolo", word_index: 1, char_index: 7 }));
        assert_eq!(tokenizer.next(), Some(Token { word: "aïe", word_index: 9, char_index: 13 }));
+        assert_eq!(tokenizer.next(), Some(Token { word: "ouch", word_index: 17, char_index: 18 }));
        assert_eq!(tokenizer.next(), None);

        let mut tokenizer = Tokenizer::new("yo ! lolo ? wtf - lol . aïe ,");
@ -185,4 +236,24 @@ mod tests {
        assert_eq!(tokenizer.next(), Some(Token { word: "😣", word_index: 32, char_index: 22 }));
        assert_eq!(tokenizer.next(), None);
    }
+
+    #[test]
+    fn hard_kanjis() {
+        let mut tokenizer = Tokenizer::new("\u{2ec4}lolilol\u{2ec7}");
+
+        assert_eq!(tokenizer.next(), Some(Token { word: "\u{2ec4}", word_index: 0, char_index: 0 }));
+        assert_eq!(tokenizer.next(), Some(Token { word: "lolilol", word_index: 1, char_index: 1 }));
+        assert_eq!(tokenizer.next(), Some(Token { word: "\u{2ec7}", word_index: 2, char_index: 8 }));
+        assert_eq!(tokenizer.next(), None);
+
+        let mut tokenizer = Tokenizer::new("\u{2ec4}\u{2ed3}\u{2ef2} lolilol - hello    \u{2ec7}");
+
+        assert_eq!(tokenizer.next(), Some(Token { word: "\u{2ec4}", word_index: 0, char_index: 0 }));
+        assert_eq!(tokenizer.next(), Some(Token { word: "\u{2ed3}", word_index: 1, char_index: 1 }));
+        assert_eq!(tokenizer.next(), Some(Token { word: "\u{2ef2}", word_index: 2, char_index: 2 }));
+        assert_eq!(tokenizer.next(), Some(Token { word: "lolilol", word_index: 3, char_index: 4 }));
+        assert_eq!(tokenizer.next(), Some(Token { word: "hello", word_index: 11, char_index: 14 }));
+        assert_eq!(tokenizer.next(), Some(Token { word: "\u{2ec7}", word_index: 12, char_index: 23 }));
+        assert_eq!(tokenizer.next(), None);
+    }
 }
--- a/src/word_area.rs
+++ b/src/word_area.rs
@ -1,102 +0,0 @@
-use std::fmt;
-
-/// Represent a word position in bytes along with the length of it.
-///
-/// It can represent words byte index to maximum 2^22 and
-/// up to words of length 1024.
-#[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
-pub struct WordArea(u32);
-
-impl WordArea {
-    /// Construct a `WordArea` from a word position in expresed as
-    /// a number of characters and the length of it.
-    ///
-    /// # Panics
-    ///
-    /// The char index must not be greater than 2^22
-    /// and the length not greater than 1024.
-    pub(crate) fn new(char_index: u32, length: u16) -> Result<WordArea, WordAreaError> {
-        if char_index & 0b1111_1111_1100_0000_0000_0000_0000 != 0 {
-            return Err(WordAreaError::ByteIndexTooBig)
-        }
-
-        if length & 0b1111_1100_0000_0000 != 0 {
-            return Err(WordAreaError::LengthTooBig)
-        }
-
-        let char_index = char_index << 10;
-        Ok(WordArea(char_index | u32::from(length)))
-    }
-
-    pub(crate) fn new_faillible(char_index: u32, length: u16) -> WordArea {
-        match WordArea::new(char_index, length) {
-            Ok(word_area) => word_area,
-            Err(WordAreaError::ByteIndexTooBig) => {
-                panic!("word area byte index must not be greater than 2^22")
-            },
-            Err(WordAreaError::LengthTooBig) => {
-                panic!("word area length must not be greater than 1024")
-            },
-        }
-    }
-
-    pub(crate) fn max_value() -> WordArea {
-        WordArea(u32::max_value())
-    }
-
-    #[inline]
-    pub fn char_index(self) -> u32 {
-        self.0 >> 10
-    }
-
-    #[inline]
-    pub fn length(self) -> u16 {
-        (self.0 & 0b0000_0000_0000_0000_0011_1111_1111) as u16
-    }
-}
-
-impl fmt::Debug for WordArea {
-    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
-        f.debug_struct("WordArea")
-            .field("char_index", &self.char_index())
-            .field("length", &self.length())
-            .finish()
-    }
-}
-
-pub enum WordAreaError {
-    ByteIndexTooBig,
-    LengthTooBig,
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-    use quickcheck::{quickcheck, TestResult};
-
-    quickcheck! {
-        fn qc_word_area(gen_char_index: u32, gen_length: u16) -> TestResult {
-            if gen_char_index > 2_u32.pow(22) || gen_length > 2_u16.pow(10) {
-                return TestResult::discard()
-            }
-
-            let word_area = WordArea::new_faillible(gen_char_index, gen_length);
-
-            let valid_char_index = word_area.char_index() == gen_char_index;
-            let valid_length = word_area.length() == gen_length;
-
-            TestResult::from_bool(valid_char_index && valid_length)
-        }
-
-        fn qc_word_area_ord(gen_char_index: u32, gen_length: u16) -> TestResult {
-            if gen_char_index >= 2_u32.pow(22) || gen_length >= 2_u16.pow(10) {
-                return TestResult::discard()
-            }
-
-            let a = WordArea::new_faillible(gen_char_index, gen_length);
-            let b = WordArea::new_faillible(gen_char_index + 1, gen_length + 1);
-
-            TestResult::from_bool(a < b)
-        }
-    }
-}
--- a/src/write_to_bytes.rs
+++ b/src/write_to_bytes.rs
@ -0,0 +1,9 @@
+pub trait WriteToBytes {
+    fn write_to_bytes(&self, bytes: &mut Vec<u8>);
+
+    fn into_bytes(&self) -> Vec<u8> {
+        let mut bytes = Vec::new();
+        self.write_to_bytes(&mut bytes);
+        bytes
+    }
+}
--- a/typos-ranking-rules.md
+++ b/typos-ranking-rules.md
@ -0,0 +1,59 @@
+# Typo and Ranking rules
+
+This is an explanation of the default rules used in MeiliDB.
+
+First we have to explain some terms that are used in this reading.
+
+- A query string is the full list of all the words that the end user is searching for results.
+- A query word is one of the words that compose the query string.
+
+
+
+## Typo rules
+
+The typo rules are used before sorting the documents. They are used to aggregate them, to choose which documents contain words similar to the queried words.
+
+We use a prefix _Levenshtein_ algorithm to check if the words match. The only difference with a Levenshtein algorithm is that it accepts every word that **starts with the query words** too. Therefore words are accepted if they start with or have the equal length.
+
+
+
+The Levenshtein distance between two words _M_ and _P_ is called "the minimum cost of transforming _M_ into _P_" by performing the following elementary operations:
+
+- substitution of a character of _M_ by a character other than _P_. (e.g. **k**itten → **s**itten)
+- insertion in _M_ of a character of _P_. (e.g. sittin → sittin**g**)
+- deleting a character from _M_. (e.g. satu**r**day → satuday)
+
+
+
+There are some rules about what can be considered "similar". These rules are **by word** and not for the whole query string.
+
+- If the query word is between 1 and 4 characters long therefore **no** typo is allowed, only documents that contains words that start or are exactly equal to this query word are considered valid for this request.
+- If the query word is between 5 and 8 characters long, **one** typo is allowed. Documents that contains words that match with one typo are retained for the next steps.
+- If the query word contains more than 8 characters, we accept a maximum of **two** typos.
+
+
+
+This means that "satuday", which is 7 characters long, use the second rule and every document containing words that have only **one** typo will match. For example:
+
+- "satuday" is accepted because it is exactly the same word.
+- "sat" is not accepted because the query word is not a prefix of it but the opposite.
+- "satu**r**day" is accepted because it contains **one** typo.
+- "s**u**tu**r**day" is not accepted because it contains **two** typos.
+
+
+
+## Ranking rules
+
+All documents that have been aggregated using the typo rules above can now be sorted. MeiliDB uses a bucket sort.
+
+What is a bucket sort? We sort all the documents with the first rule, for all documents that can't be separated we create a group and sort it using the second rule, and so on.
+
+Here is the list of all the default rules that are executed in this specific order by default:
+
+- _Number of Typos_ - The less typos there are beween the query words and the document words, the better is the document.
+- _Number of Words_ - A document containing more of the query words will be more important than one that contains less.
+- _Words Proximity_ - The closer the query words are in the document the better is the document.
+- _Attribute_ - A document containing the query words in a more important attribute than another document is considered better.
+- _Position_ - A document containing the query words at the start of an attribute is considered better than a document that contains them at the end.
+- _Exact_ - A document containing the query words in their exact form, not only a prefix of them, is considered better.
+
Author	SHA1	Message	Date
Clément Renault	aef7d7825f	Merge pull request #124 from Kerollmops/version-bump Bump version to 0.3.2	2019-02-25 14:22:02 +01:00
Clément Renault	f28ce661af	chore: Bump version to 0.3.2	2019-02-25 13:56:23 +01:00
Clément Renault	74eb9c8d0f	Merge pull request #122 from Kerollmops/query-builder-no-view-dep Remove the DatabaseView dependencies from the QueryBuilder	2019-02-24 16:56:12 +01:00
Clément Renault	d664221c64	feat: Remove the DatabaseView dependencies from the QueryBuilder	2019-02-24 16:25:28 +01:00
Clément Renault	58bff3d4ac	Merge pull request #123 from Kerollmops/update-deps Update all the dependencies	2019-02-24 16:24:47 +01:00
Clément Renault	2c206eb98c	chore: Update all the dependencies	2019-02-24 16:00:03 +01:00
Clément Renault	19724e5af9	Merge pull request #121 from Kerollmops/no-cjk-unidecode Do not save unidecoded cjk kanjis	2019-02-23 22:34:47 +01:00
Clément Renault	c9e0ad132c	feat: Do not save unidecoded cjk kanjis	2019-02-23 19:11:54 +01:00
Clément Renault	24f265a963	Merge pull request #120 from Kerollmops/custom-log10-function Optimize the SumOfTypos criterion	2019-02-23 19:01:12 +01:00
Clément Renault	f8a743ee00	feat: Optimize the SumOfTypos criterion	2019-02-23 18:36:45 +01:00
Clément Renault	64971de7ed	Merge pull request #119 from Kerollmops/dont-be-hurry Fix the tokenizer (next time don't be so hurry to merge)	2019-02-23 17:07:42 +01:00
Clément Renault	a960c325f3	feat: Make query strings support cjk kanjis	2019-02-23 14:57:13 +01:00
Clément Renault	a799470997	fix: Change the tokenizer to mesure cjk chars positions	2019-02-22 23:06:42 +01:00
Clément Renault	10414791a2	fix: Remove debug println from the tokenizer	2019-02-22 22:34:37 +01:00
Clément Renault	743974e60d	Merge pull request #118 from Kerollmops/tokenizer-support-kanjis Make the Tokenizer support Kanjis	2019-02-22 20:16:55 +01:00
Clément Renault	0e267cae4b	feat: Make the Tokenizer support Kanjis	2019-02-22 19:37:19 +01:00
Clément Renault	12a352ae2f	Merge pull request #117 from Kerollmops/tokenizer-support-parentheses Make the tokenizer support parentheses	2019-02-22 19:36:15 +01:00
Clément Renault	5070b27728	feat: Make the tokenizer support parentheses Interpreting them as hard ponctuation (like a dot).	2019-02-22 18:18:17 +01:00
Clément Renault	7a6b734078	Merge pull request #116 from Kerollmops/raw-field-value-getter Allow users to retrieve the raw field value of a document	2019-02-22 18:02:46 +01:00
Clément Renault	24823da6f7	feat: Allow users to retrieve the raw field value of a document	2019-02-22 15:30:20 +01:00
Clément Renault	8701cb3a8f	Merge pull request #115 from qdequele/database-path Add accessor for database path and index path	2019-02-22 15:11:40 +01:00
Quentin de Quelen	315fc1fbe3	feat: Add accessor for database and index path	2019-02-22 13:49:04 +01:00
Clément Renault	23833bac10	Merge pull request #114 from Kerollmops/hot-fix-ranked-attribute Do not error when an attribute is registered for ranking	2019-02-21 23:17:10 +01:00
Clément Renault	8235b6efc9	fix: Do not error when an attribute is registered for ranking	2019-02-21 20:14:08 +01:00
Clément Renault	7f937eea5a	Merge pull request #113 from Kerollmops/hot-fix-query-builder Remove the QueryBuilder boxed criteria default static restriction	2019-02-21 20:11:10 +01:00
Clément Renault	a1cf634ac1	feat: Remove the QueryBuilder boxed criteria default static restriction	2019-02-21 19:26:22 +01:00
Clément Renault	c86472e997	Merge pull request #112 from Kerollmops/bump-version Bump version to 0.3.1	2019-02-21 15:18:37 +01:00
Clément Renault	26cb398a6f	chore: Bump version to 0.3.1	2019-02-21 14:52:40 +01:00
Clément Renault	f6e664d298	Merge pull request #111 from qdequele/config Add a config per index	2019-02-21 14:39:37 +01:00
Quentin de Quelen	9437cecf87	chore: Use Default derive on Config struct	2019-02-21 14:01:55 +01:00
Quentin de Quelen	13309511b3	chore: Use serde derive lowercase on RankingOrdering	2019-02-21 14:01:55 +01:00
Quentin de Quelen	1941cb16c0	feat: Add Config.update_with(_) method to merge 2 config	2019-02-21 14:01:55 +01:00
Quentin de Quelen	55823c5d5d	feat: add admin key on config	2019-02-21 14:01:55 +01:00
Quentin de Quelen	4721da1679	feat: Add access key on config	2019-02-21 14:01:55 +01:00
Quentin de Quelen	482f750231	chore: Set config field pub	2019-02-21 14:01:55 +01:00
Quentin de Quelen	d5119db165	feat: Allow to retrieve config from Database and DatabaseView	2019-02-21 14:01:55 +01:00
Quentin de Quelen	37578ed74f	feat: store config into database	2019-02-20 14:07:19 +01:00
Clément Renault	f5992ce822	Merge pull request #109 from Kerollmops/implement-text-cropping Introduce text cropping that shows the first matches	2019-02-18 19:40:30 +01:00
Clément Renault	badb0035c5	feat: Introduce text cropping that shows the first match	2019-02-18 18:59:50 +01:00
Clément Renault	4bc14aa261	Merge pull request #108 from Kerollmops/refactor-index Refactor the Index and Updates	2019-02-18 18:59:20 +01:00
Clément Renault	a0c4ec0be0	feat: Introduce the updated_documents methods	2019-02-18 18:01:40 +01:00
Clément Renault	264fffa826	feat: Replace the elapsed dependency by std::time::Instant	2019-02-17 16:37:45 +01:00
Clément Renault	bddb37e44f	feat: Move SharedData to its own module	2019-02-17 16:37:45 +01:00
Clément Renault	6393b0cbc0	feat: Prefer binary to exponential search	2019-02-17 16:37:45 +01:00
Clément Renault	a8df438814	feat: Implement WriteToBytes/FromSharedDataCursor	2019-02-17 16:37:44 +01:00
Clément Renault	8014857ebf	feat: Introduce the WriteToBytes trait	2019-02-17 16:37:44 +01:00
Clément Renault	9e7261a48f	feat: Introduce the FromSharedDataCursor trait	2019-02-17 16:37:44 +01:00
Clément Renault	c4e70d0475	feat: Introduce the SharedDataCursor type	2019-02-17 16:37:44 +01:00
Clément Renault	cbb0aaa217	feat: Introduce the Index structure along with the Events types	2019-02-17 16:36:47 +01:00
Clément Renault	ce50e74491	Merge pull request #107 from Kerollmops/update-dependencies Update dependencies	2019-02-13 16:05:51 +01:00
Clément Renault	e103e1c277	chore: Replace the crossbeam::ArcCell by arc-swap::ArcSwap	2019-02-13 15:19:02 +01:00
Clément Renault	64929fe5dc	chore: Update slice-group-by to 0.2	2019-02-13 15:06:34 +01:00
Clément Renault	b108f1e6c9	Merge pull request #106 from Kerollmops/fix-criterion Fix the SumOfTypos and WordsProximity criteria	2019-02-12 22:06:32 +01:00
Clément Renault	58b417e045	feat: Replace the linear_group_by by the new linear_group method	2019-02-12 21:23:36 +01:00
Clément Renault	2e5a616d8e	fix: Compute the proximity on the words with the min distance	2019-02-12 21:22:45 +01:00
Clément Renault	092d446a7e	chore: Update the slice-group-by dependency	2019-02-12 21:22:45 +01:00
Clément Renault	85a1f126bf	fix: Make the SumOfTypos criterion use a more clever algorithm	2019-02-12 21:22:42 +01:00
Clément Renault	cf58cf86da	Merge pull request #105 from Kerollmops/custom-ranking-field-into-hashmap Save the custom ranking field into an HashMap	2019-02-11 17:36:26 +01:00
Clément Renault	db6210c7ee	feat: Introduce the Number type	2019-02-11 16:58:44 +01:00
Clément Renault	83cd071827	feat: Introduce the SortByAttr custom ranking helper	2019-02-11 16:55:31 +01:00
Clément Renault	084c3a95b6	feat: Add a new ranked attribute to the schema	2019-02-11 16:55:30 +01:00
Clément Renault	78908aa34e	Merge pull request #103 from Kerollmops/ranking-typo-rules Add a reading on the default typos and ranking rules	2019-02-11 15:05:04 +01:00
Clément Renault	cf27706f91	doc: Add a reading on the default typos and ranking rules	2019-02-11 11:58:17 +01:00
Clément Renault	d3f53a7fd6	Merge pull request #104 from Kerollmops/update-readme Update the Redame wrk stats	2019-02-10 14:53:15 +01:00
Clément Renault	508af5613f	doc: Update the Redame wrk stats	2019-02-10 14:05:21 +01:00
Clément Renault	c615c31016	Merge pull request #101 from Kerollmops/version-bump Bump version to 0.3.0	2019-02-07 15:26:38 +01:00
Clément Renault	908b28790b	chore: Bump version to 0.3.0	2019-02-07 14:51:39 +01:00
Clément Renault	4c0279729b	Merge pull request #100 from qdequele/master Allow users to manage multiple database indexes	2019-02-07 14:49:52 +01:00
Quentin de Quelen	96dfac5b33	feat: Allow users to manage multiple database indexes	2019-02-07 13:05:55 +01:00
Clément Renault	8576218b51	Merge pull request #99 from Kerollmops/simplify-transactional-update Remove the lifetime restriction for Database Updates	2019-02-06 18:19:45 +01:00
Clément Renault	1c1f9201b8	feat: Remove the lifetime restriction for Database Updates	2019-02-06 18:03:41 +01:00
Clément Renault	4398b88a3a	Merge pull request #98 from Kerollmops/updates-with-transactions Change updates to be handled using the RocksDB WriteBatch feature	2019-02-06 16:13:47 +01:00
Clément Renault	73e79f5ca4	chore: Make travis build with Rust 1.32	2019-02-06 15:58:48 +01:00
Clément Renault	1bfd51d6e9	feat: Change updates to be handled using the RocksDB WriteBatch feature	2019-02-06 15:58:47 +01:00
Clément Renault	0d2daf27f2	Merge pull request #97 from Kerollmops/remove-hashbrown-stop-words Remove the hashbrown dependency for library users	2019-02-03 17:31:08 +01:00
Clément Renault	87f0d8cf3c	feat: Remove the hashbrown dependency for library users	2019-02-03 12:22:50 +01:00
Clément Renault	06d5a10902	Merge pull request #96 from Kerollmops/chore Make some little changes	2019-02-03 11:55:06 +01:00
Clément Renault	94b89c5439	chore: Make the Document from_raw method private	2019-02-03 11:24:44 +01:00
Clément Renault	c5e951be09	chore: Move the deseserializer into the serde module	2019-02-03 11:24:44 +01:00
Clément Renault	66ae5c8161	chore: Clarify some QueryBuilder comments	2019-02-03 11:24:44 +01:00
Clément Renault	8438e2202f	Merge pull request #95 from Kerollmops/fix-querybuilder-with-criteria Make the QueryBuilder with_criteria use FilterFunc	2019-02-03 11:24:17 +01:00
Clément Renault	7a6166d229	feat: Make the QueryBuilder with_criteria use FilterFunc	2019-02-03 10:55:16 +01:00
Clément Renault	d46fa4b215	Merge pull request #94 from Kerollmops/data-oriented Introduce Data Oriented design into the search algorithm	2019-02-02 15:40:10 +01:00
Clément Renault	2bd5b4ab86	feat: Remove useless WordsProximity criterion benchmark	2019-02-02 15:12:54 +01:00
Clément Renault	5efbc5ceb3	feat: Introduce the revisited SortBy criterion	2019-02-02 14:42:12 +01:00
Clément Renault	2e905bac08	chore: Remove Attribute and WordArea structures	2019-02-02 14:40:15 +01:00
Clément Renault	4c0ad5f964	feat: Simplify the Criterion Trait by removing the DatabaseView param	2019-02-02 14:40:15 +01:00
Clément Renault	455cbf3bf4	feat: Make the search algorithm become fully data oriented	2019-02-02 14:40:14 +01:00
Clément Renault	a3a28c56fa	feat: Replace compressed Match fields by uncompressed ones	2019-02-02 14:40:14 +01:00
Clément Renault	b0b3175641	Merge pull request #93 from Kerollmops/slice-group-by Use the GroupBy/Mut Traits of the slice-group-by library	2019-01-30 17:52:27 +01:00
Clément Renault	c2f0df3f73	feat: Use the GroupBy/Mut Traits of the slice-group-by library	2019-01-30 16:54:52 +01:00
Clément Renault	820f1f9ac6	Merge pull request #91 from Kerollmops/warn-reused-document-id Emit warnings when a document id is reused	2019-01-28 21:05:42 +01:00
Clément Renault	337aee5b65	chore: Emit warnings when a document id is reused	2019-01-28 16:11:55 +01:00
Clément Renault	810dfdf656	Merge pull request #90 from Kerollmops/version-bump Bump version to 0.2.1	2019-01-25 17:08:53 +01:00
Clément Renault	f016652fca	chore: Bump version to 0.2.1	2019-01-25 16:41:08 +01:00
Clément Renault	6c99ebe3fa	Merge pull request #89 from Kerollmops/no-more-compaction Remove the manual compaction triggering	2019-01-25 16:40:08 +01:00
Clément Renault	94d357985f	feat: Remove the manual compaction triggering	2019-01-25 16:05:56 +01:00
Clément Renault	fbc698567a	Merge pull request #87 from Kerollmops/measure-index-loading Display index loading times	2019-01-24 14:07:11 +01:00
Clément Renault	aa9db14c09	chore: Display index loading times	2019-01-23 11:19:44 +01:00
Clément Renault	61e83a1c21	Merge pull request #86 from Kerollmops/measure-indexation Display timings of indexation operations	2019-01-16 13:32:44 +01:00
Clément Renault	1316be5b09	chore: Display timings of indexation operations	2019-01-16 11:45:33 +01:00
Clément Renault	4e8b0383dd	Merge pull request #85 from Kerollmops/debug-more-stats Display more stats infos	2019-01-15 14:20:28 +01:00
Clément Renault	4fa10753c1	chore: Display more stats infos	2019-01-14 21:18:46 +01:00
Clément Renault	2473e289e8	Merge pull request #84 from qdequele/create-server-example Example HTTP server example can use stopwords	2019-01-14 18:55:58 +01:00
Quentin de Quelen	e0e5e87ed3	feat: HTTP server example can use stopwords	2019-01-14 18:21:58 +01:00
Quentin de Quelen	b13e61f40a	Merge pull request #83 from qdequele/create-server-example Create an example of HTTP server managing multiple databases	2019-01-14 14:35:33 +01:00
Quentin de Quelen	c023cb3065	feat: Create an example for HTTP server managing multiple databases	2019-01-14 13:39:54 +01:00
Clément Renault	0a3d069fbc	Merge pull request #79 from qdequele/master Schema can be de/serialized from a json format	2019-01-12 21:50:02 +01:00
Quentin de Quelen	fa062ce2cf	feat: Schema can be de/serialized from a json format	2019-01-12 21:05:48 +01:00
Clément Renault	cdc6e47bf5	Merge pull request #81 from Kerollmops/update-readme Simplify the examples command lines	2019-01-12 13:43:42 +01:00
Clément Renault	d5f44838be	doc: Simplify the examples command lines	2019-01-12 12:56:11 +01:00