Merge pull request #237 from meilisearch/fix-exactness-criterion

Fix the exactness criterion algorithm
2025-07-20 21:30:58 +00:00 · 2019-10-26 18:43:10 +02:00 · 2019-10-26 18:34:40 +02:00 · 2019-10-26 18:23:19 +02:00 · 2019-10-26 15:56:34 +02:00 · 2019-10-24 15:29:16 +02:00
21 changed files with 805 additions and 210 deletions
--- a/README.md
+++ b/README.md
@ -12,6 +12,7 @@ A _full-text search database_ based on the fast [LMDB key-value store](https://e
 - Accepts [custom criteria](https://github.com/meilisearch/MeiliDB/blob/dc5c42821e1340e96cb90a3da472264624a26326/meilidb-core/src/criterion/mod.rs#L24-L33) and can apply them in any custom order
 - Support [ranged queries](https://github.com/meilisearch/MeiliDB/blob/dc5c42821e1340e96cb90a3da472264624a26326/meilidb-core/src/query_builder.rs#L283), useful for paginating results
 - Can [distinct](https://github.com/meilisearch/MeiliDB/blob/dc5c42821e1340e96cb90a3da472264624a26326/meilidb-core/src/query_builder.rs#L265-L270) and [filter](https://github.com/meilisearch/MeiliDB/blob/dc5c42821e1340e96cb90a3da472264624a26326/meilidb-core/src/query_builder.rs#L246-L259) returned documents based on context defined rules
+- Searches for [concatenated](https://github.com/meilisearch/MeiliDB/pull/164) and [splitted query words](https://github.com/meilisearch/MeiliDB/pull/232) to improve the search quality.
 - Can store complete documents or only [user schema specified fields](https://github.com/meilisearch/MeiliDB/blob/dc5c42821e1340e96cb90a3da472264624a26326/meilidb-schema/src/lib.rs#L265-L279)
 - The [default tokenizer](https://github.com/meilisearch/MeiliDB/blob/dc5c42821e1340e96cb90a3da472264624a26326/meilidb-tokenizer/src/lib.rs) can index latin and kanji based languages
 - Returns [the matching text areas](https://github.com/meilisearch/MeiliDB/blob/dc5c42821e1340e96cb90a3da472264624a26326/meilidb-core/src/lib.rs#L66-L88), useful to highlight matched words in results
--- a/meilidb-core/src/automaton/mod.rs
+++ b/meilidb-core/src/automaton/mod.rs
@ -2,7 +2,7 @@ mod dfa;
 mod query_enhancer;

 use std::cmp::Reverse;
-use std::vec;
+use std::{cmp, vec};

 use fst::{IntoStreamer, Streamer};
 use levenshtein_automata::DFA;
@ -18,7 +18,7 @@ use self::query_enhancer::QueryEnhancerBuilder;
 const NGRAMS: usize = 3;

 pub struct AutomatonProducer {
-    automatons: Vec<Vec<Automaton>>,
+    automatons: Vec<AutomatonGroup>,
 }

 impl AutomatonProducer {
@ -26,19 +26,47 @@ impl AutomatonProducer {
        reader: &heed::RoTxn,
        query: &str,
        main_store: store::Main,
+        postings_list_store: store::PostingsLists,
        synonyms_store: store::Synonyms,
    ) -> MResult<(AutomatonProducer, QueryEnhancer)> {
-        let (automatons, query_enhancer) =
-            generate_automatons(reader, query, main_store, synonyms_store)?;
+        let (automatons, query_enhancer) = generate_automatons(
+            reader,
+            query,
+            main_store,
+            postings_list_store,
+            synonyms_store,
+        )?;

        Ok((AutomatonProducer { automatons }, query_enhancer))
    }

-    pub fn into_iter(self) -> vec::IntoIter<Vec<Automaton>> {
+    pub fn into_iter(self) -> vec::IntoIter<AutomatonGroup> {
        self.automatons.into_iter()
    }
 }

+#[derive(Debug)]
+pub struct AutomatonGroup {
+    pub is_phrase_query: bool,
+    pub automatons: Vec<Automaton>,
+}
+
+impl AutomatonGroup {
+    fn normal(automatons: Vec<Automaton>) -> AutomatonGroup {
+        AutomatonGroup {
+            is_phrase_query: false,
+            automatons,
+        }
+    }
+
+    fn phrase_query(automatons: Vec<Automaton>) -> AutomatonGroup {
+        AutomatonGroup {
+            is_phrase_query: true,
+            automatons,
+        }
+    }
+}
+
 #[derive(Debug)]
 pub struct Automaton {
    pub index: usize,
@ -102,12 +130,41 @@ pub fn normalize_str(string: &str) -> String {
    string
 }

+fn split_best_frequency<'a>(
+    reader: &heed::RoTxn,
+    word: &'a str,
+    postings_lists_store: store::PostingsLists,
+) -> MResult<Option<(&'a str, &'a str)>> {
+    let chars = word.char_indices().skip(1);
+    let mut best = None;
+
+    for (i, _) in chars {
+        let (left, right) = word.split_at(i);
+
+        let left_freq = postings_lists_store
+            .postings_list(reader, left.as_ref())?
+            .map_or(0, |i| i.len());
+
+        let right_freq = postings_lists_store
+            .postings_list(reader, right.as_ref())?
+            .map_or(0, |i| i.len());
+
+        let min_freq = cmp::min(left_freq, right_freq);
+        if min_freq != 0 && best.map_or(true, |(old, _, _)| min_freq > old) {
+            best = Some((min_freq, left, right));
+        }
+    }
+
+    Ok(best.map(|(_, l, r)| (l, r)))
+}
+
 fn generate_automatons(
    reader: &heed::RoTxn,
    query: &str,
    main_store: store::Main,
+    postings_lists_store: store::PostingsLists,
    synonym_store: store::Synonyms,
-) -> MResult<(Vec<Vec<Automaton>>, QueryEnhancer)> {
+) -> MResult<(Vec<AutomatonGroup>, QueryEnhancer)> {
    let has_end_whitespace = query.chars().last().map_or(false, char::is_whitespace);
    let query_words: Vec<_> = split_query_string(query).map(str::to_lowercase).collect();
    let synonyms = match main_store.synonyms_fst(reader)? {
@ -136,7 +193,7 @@ fn generate_automatons(
        original_automatons.push(automaton);
    }

-    automatons.push(original_automatons);
+    automatons.push(AutomatonGroup::normal(original_automatons));

    for n in 1..=NGRAMS {
        let mut ngrams = query_words.windows(n).enumerate().peekable();
@ -188,13 +245,27 @@ fn generate_automatons(
                                Automaton::non_exact(automaton_index, n, synonym)
                            };
                            automaton_index += 1;
-                            automatons.push(vec![automaton]);
+                            automatons.push(AutomatonGroup::normal(vec![automaton]));
                        }
                    }
                }
            }

-            if n != 1 {
+            if n == 1 {
+                if let Some((left, right)) =
+                    split_best_frequency(reader, &normalized, postings_lists_store)?
+                {
+                    let a = Automaton::exact(automaton_index, 1, left);
+                    enhancer_builder.declare(query_range.clone(), automaton_index, &[left]);
+                    automaton_index += 1;
+
+                    let b = Automaton::exact(automaton_index, 1, right);
+                    enhancer_builder.declare(query_range.clone(), automaton_index, &[left]);
+                    automaton_index += 1;
+
+                    automatons.push(AutomatonGroup::phrase_query(vec![a, b]));
+                }
+            } else {
                // automaton of concatenation of query words
                let concat = ngram_slice.concat();
                let normalized = normalize_str(&concat);
@ -204,16 +275,20 @@ fn generate_automatons(

                let automaton = Automaton::exact(automaton_index, n, &normalized);
                automaton_index += 1;
-                automatons.push(vec![automaton]);
+                automatons.push(AutomatonGroup::normal(vec![automaton]));
            }
        }
    }

    // order automatons, the most important first,
    // we keep the original automatons at the front.
-    automatons[1..].sort_by_key(|a| {
-        let a = a.first().unwrap();
-        (Reverse(a.is_exact), a.ngram)
+    automatons[1..].sort_by_key(|group| {
+        let a = group.automatons.first().unwrap();
+        (
+            Reverse(a.is_exact),
+            a.ngram,
+            Reverse(group.automatons.len()),
+        )
    });

    Ok((automatons, enhancer_builder.build()))
--- a/meilidb-core/src/criterion/exact.rs
+++ b/meilidb-core/src/criterion/exact.rs
@ -21,16 +21,15 @@ fn number_exact_matches(
        let len = group.len();

        let mut found_exact = false;
-        for (pos, _) in is_exact[index..index + len]
-            .iter()
-            .filter(|x| **x)
-            .enumerate()
-        {
-            found_exact = true;
-            if let Ok(pos) = fields_counts.binary_search_by_key(&attribute[pos], |(a, _)| a.0) {
-                let (_, count) = fields_counts[pos];
-                if count == 1 {
-                    return usize::max_value();
+        for (pos, is_exact) in is_exact[index..index + len].iter().enumerate() {
+            if *is_exact {
+                found_exact = true;
+                let attr = &attribute[index + pos];
+                if let Ok(pos) = fields_counts.binary_search_by_key(attr, |(a, _)| a.0) {
+                    let (_, count) = fields_counts[pos];
+                    if count == 1 {
+                        return usize::max_value();
+                    }
                }
            }
        }
--- a/meilidb-core/src/database.rs
+++ b/meilidb-core/src/database.rs
@ -35,8 +35,10 @@ fn update_awaiter(receiver: Receiver<()>, env: heed::Env, update_fn: Arc<ArcSwap

            match update::update_task(&mut writer, index.clone()) {
                Ok(Some(status)) => {
-                    if let Err(e) = writer.commit() {
-                        error!("update transaction failed: {}", e)
+                    if status.result.is_ok() {
+                        if let Err(e) = writer.commit() {
+                            error!("update transaction failed: {}", e)
+                        }
                    }

                    if let Some(ref callback) = *update_fn.load() {
--- a/meilidb-core/src/error.rs
+++ b/meilidb-core/src/error.rs
@ -12,6 +12,7 @@ pub enum Error {
    SchemaMissing,
    WordIndexMissing,
    MissingDocumentId,
+    DuplicateDocument,
    Zlmdb(heed::Error),
    Fst(fst::Error),
    SerdeJson(SerdeJsonError),
@ -79,6 +80,7 @@ impl fmt::Display for Error {
            SchemaMissing => write!(f, "this index does not have a schema"),
            WordIndexMissing => write!(f, "this index does not have a word index"),
            MissingDocumentId => write!(f, "document id is missing"),
+            DuplicateDocument => write!(f, "update contains documents with the same id"),
            Zlmdb(e) => write!(f, "heed error; {}", e),
            Fst(e) => write!(f, "fst error; {}", e),
            SerdeJson(e) => write!(f, "serde json error; {}", e),
@ -95,6 +97,10 @@ impl error::Error for Error {}
 #[derive(Debug)]
 pub enum UnsupportedOperation {
    SchemaAlreadyExists,
+    CannotUpdateSchemaIdentifier,
+    CannotReorderSchemaAttribute,
+    CannotIntroduceNewSchemaAttribute,
+    CannotRemoveSchemaAttribute,
 }

 impl fmt::Display for UnsupportedOperation {
@ -102,6 +108,12 @@ impl fmt::Display for UnsupportedOperation {
        use self::UnsupportedOperation::*;
        match self {
            SchemaAlreadyExists => write!(f, "Cannot update index which already have a schema"),
+            CannotUpdateSchemaIdentifier => write!(f, "Cannot update the identifier of a schema"),
+            CannotReorderSchemaAttribute => write!(f, "Cannot reorder the attributes of a schema"),
+            CannotIntroduceNewSchemaAttribute => {
+                write!(f, "Cannot introduce new attributes in a schema")
+            }
+            CannotRemoveSchemaAttribute => write!(f, "Cannot remove attributes from a schema"),
        }
    }
 }
--- a/meilidb-core/src/query_builder.rs
+++ b/meilidb-core/src/query_builder.rs
@ -1,4 +1,5 @@
 use hashbrown::HashMap;
+use std::convert::TryFrom;
 use std::mem;
 use std::ops::Range;
 use std::rc::Rc;
@ -8,7 +9,7 @@ use fst::{IntoStreamer, Streamer};
 use sdset::SetBuf;
 use slice_group_by::{GroupBy, GroupByMut};

-use crate::automaton::{Automaton, AutomatonProducer, QueryEnhancer};
+use crate::automaton::{Automaton, AutomatonGroup, AutomatonProducer, QueryEnhancer};
 use crate::distinct_map::{BufferedDistinctMap, DistinctMap};
 use crate::raw_document::{raw_documents_from, RawDocument};
 use crate::{criterion::Criteria, Document, DocumentId, Highlight, TmpMatch};
@ -138,7 +139,7 @@ fn multiword_rewrite_matches(

 fn fetch_raw_documents(
    reader: &heed::RoTxn,
-    automatons: &[Automaton],
+    automatons_groups: &[AutomatonGroup],
    query_enhancer: &QueryEnhancer,
    searchables: Option<&ReorderedAttrs>,
    main_store: store::Main,
@ -148,55 +149,94 @@ fn fetch_raw_documents(
    let mut matches = Vec::new();
    let mut highlights = Vec::new();

-    for automaton in automatons {
-        let Automaton {
-            index,
-            is_exact,
-            query_len,
-            ..
-        } = automaton;
-        let dfa = automaton.dfa();
+    for group in automatons_groups {
+        let AutomatonGroup {
+            is_phrase_query,
+            automatons,
+        } = group;
+        let phrase_query_len = automatons.len();

-        let words = match main_store.words_fst(reader)? {
-            Some(words) => words,
-            None => return Ok(Vec::new()),
-        };
+        let mut tmp_matches = Vec::new();
+        for (id, automaton) in automatons.into_iter().enumerate() {
+            let Automaton {
+                index,
+                is_exact,
+                query_len,
+                ..
+            } = automaton;
+            let dfa = automaton.dfa();

-        let mut stream = words.search(&dfa).into_stream();
-        while let Some(input) = stream.next() {
-            let distance = dfa.eval(input).to_u8();
-            let is_exact = *is_exact && distance == 0 && input.len() == *query_len;
-
-            let doc_indexes = match postings_lists_store.postings_list(reader, input)? {
-                Some(doc_indexes) => doc_indexes,
-                None => continue,
+            let words = match main_store.words_fst(reader)? {
+                Some(words) => words,
+                None => return Ok(Vec::new()),
            };

-            matches.reserve(doc_indexes.len());
-            highlights.reserve(doc_indexes.len());
+            let mut stream = words.search(&dfa).into_stream();
+            while let Some(input) = stream.next() {
+                let distance = dfa.eval(input).to_u8();
+                let is_exact = *is_exact && distance == 0 && input.len() == *query_len;

-            for di in doc_indexes.as_ref() {
-                let attribute = searchables.map_or(Some(di.attribute), |r| r.get(di.attribute));
-                if let Some(attribute) = attribute {
-                    let match_ = TmpMatch {
-                        query_index: *index as u32,
-                        distance,
-                        attribute,
-                        word_index: di.word_index,
-                        is_exact,
-                    };
+                let doc_indexes = match postings_lists_store.postings_list(reader, input)? {
+                    Some(doc_indexes) => doc_indexes,
+                    None => continue,
+                };

-                    let highlight = Highlight {
-                        attribute: di.attribute,
-                        char_index: di.char_index,
-                        char_length: di.char_length,
-                    };
+                tmp_matches.reserve(doc_indexes.len());

-                    matches.push((di.document_id, match_));
-                    highlights.push((di.document_id, highlight));
+                for di in doc_indexes.as_ref() {
+                    let attribute = searchables.map_or(Some(di.attribute), |r| r.get(di.attribute));
+                    if let Some(attribute) = attribute {
+                        let match_ = TmpMatch {
+                            query_index: *index as u32,
+                            distance,
+                            attribute,
+                            word_index: di.word_index,
+                            is_exact,
+                        };
+
+                        let highlight = Highlight {
+                            attribute: di.attribute,
+                            char_index: di.char_index,
+                            char_length: u16::try_from(*query_len).unwrap_or(u16::max_value()),
+                        };
+
+                        tmp_matches.push((di.document_id, id, match_, highlight));
+                    }
                }
            }
        }
+
+        if *is_phrase_query {
+            tmp_matches.sort_unstable_by_key(|(id, _, m, _)| (*id, m.attribute, m.word_index));
+            for group in tmp_matches.linear_group_by_key(|(id, _, m, _)| (*id, m.attribute)) {
+                for window in group.windows(2) {
+                    let (ida, ia, ma, ha) = window[0];
+                    let (idb, ib, mb, hb) = window[1];
+
+                    debug_assert_eq!(ida, idb);
+
+                    // if matches must follow and actually follows themselves
+                    if ia + 1 == ib && ma.word_index + 1 == mb.word_index {
+                        // TODO we must make it work for phrase query longer than 2
+                        // if the second match is the last phrase query word
+                        if ib + 1 == phrase_query_len {
+                            // insert first match
+                            matches.push((ida, ma));
+                            highlights.push((ida, ha));
+
+                            // insert second match
+                            matches.push((idb, mb));
+                            highlights.push((idb, hb));
+                        }
+                    }
+                }
+            }
+        } else {
+            for (id, _, match_, highlight) in tmp_matches {
+                matches.push((id, match_));
+                highlights.push((id, highlight));
+            }
+        }
    }

    let matches = multiword_rewrite_matches(matches, &query_enhancer);
@ -367,15 +407,20 @@ where
    let start_processing = Instant::now();
    let mut raw_documents_processed = Vec::with_capacity(range.len());

-    let (automaton_producer, query_enhancer) =
-        AutomatonProducer::new(reader, query, main_store, synonyms_store)?;
+    let (automaton_producer, query_enhancer) = AutomatonProducer::new(
+        reader,
+        query,
+        main_store,
+        postings_lists_store,
+        synonyms_store,
+    )?;

    let automaton_producer = automaton_producer.into_iter();
    let mut automatons = Vec::new();

    // aggregate automatons groups by groups after time
    for auts in automaton_producer {
-        automatons.extend(auts);
+        automatons.push(auts);

        // we must retrieve the documents associated
        // with the current automatons
@ -480,15 +525,20 @@ where
    let start_processing = Instant::now();
    let mut raw_documents_processed = Vec::new();

-    let (automaton_producer, query_enhancer) =
-        AutomatonProducer::new(reader, query, main_store, synonyms_store)?;
+    let (automaton_producer, query_enhancer) = AutomatonProducer::new(
+        reader,
+        query,
+        main_store,
+        postings_lists_store,
+        synonyms_store,
+    )?;

    let automaton_producer = automaton_producer.into_iter();
    let mut automatons = Vec::new();

    // aggregate automatons groups by groups after time
    for auts in automaton_producer {
-        automatons.extend(auts);
+        automatons.push(auts);

        // we must retrieve the documents associated
        // with the current automatons
@ -1697,4 +1747,68 @@ mod tests {
        });
        assert_matches!(iter.next(), None);
    }
+
+    #[test]
+    fn simple_phrase_query_splitting() {
+        let store = TempDatabase::from_iter(vec![
+            ("search", &[doc_index(0, 0)][..]),
+            ("engine", &[doc_index(0, 1)][..]),
+            ("search", &[doc_index(1, 0)][..]),
+            ("slow", &[doc_index(1, 1)][..]),
+            ("engine", &[doc_index(1, 2)][..]),
+        ]);
+
+        let env = &store.database.env;
+        let reader = env.read_txn().unwrap();
+
+        let builder = store.query_builder();
+        let results = builder.query(&reader, "searchengine", 0..20).unwrap();
+        let mut iter = results.into_iter();
+
+        assert_matches!(iter.next(), Some(Document { id: DocumentId(0), matches, .. }) => {
+            let mut iter = matches.into_iter();
+            assert_matches!(iter.next(), Some(TmpMatch { query_index: 0, word_index: 0, distance: 0, .. })); // search
+            assert_matches!(iter.next(), Some(TmpMatch { query_index: 0, word_index: 1, distance: 0, .. })); // engine
+            assert_matches!(iter.next(), None);
+        });
+        assert_matches!(iter.next(), None);
+    }
+
+    #[test]
+    fn harder_phrase_query_splitting() {
+        let store = TempDatabase::from_iter(vec![
+            ("search", &[doc_index(0, 0)][..]),
+            ("search", &[doc_index(0, 1)][..]),
+            ("engine", &[doc_index(0, 2)][..]),
+            ("search", &[doc_index(1, 0)][..]),
+            ("slow", &[doc_index(1, 1)][..]),
+            ("search", &[doc_index(1, 2)][..]),
+            ("engine", &[doc_index(1, 3)][..]),
+            ("search", &[doc_index(1, 0)][..]),
+            ("search", &[doc_index(1, 1)][..]),
+            ("slow", &[doc_index(1, 2)][..]),
+            ("engine", &[doc_index(1, 3)][..]),
+        ]);
+
+        let env = &store.database.env;
+        let reader = env.read_txn().unwrap();
+
+        let builder = store.query_builder();
+        let results = builder.query(&reader, "searchengine", 0..20).unwrap();
+        let mut iter = results.into_iter();
+
+        assert_matches!(iter.next(), Some(Document { id: DocumentId(0), matches, .. }) => {
+            let mut iter = matches.into_iter();
+            assert_matches!(iter.next(), Some(TmpMatch { query_index: 0, word_index: 1, distance: 0, .. })); // search
+            assert_matches!(iter.next(), Some(TmpMatch { query_index: 0, word_index: 2, distance: 0, .. })); // engine
+            assert_matches!(iter.next(), None);
+        });
+        assert_matches!(iter.next(), Some(Document { id: DocumentId(1), matches, .. }) => {
+            let mut iter = matches.into_iter();
+            assert_matches!(iter.next(), Some(TmpMatch { query_index: 0, word_index: 2, distance: 0, .. })); // search
+            assert_matches!(iter.next(), Some(TmpMatch { query_index: 0, word_index: 3, distance: 0, .. })); // engine
+            assert_matches!(iter.next(), None);
+        });
+        assert_matches!(iter.next(), None);
+    }
 }
--- a/meilidb-core/src/serde/convert_to_string.rs
+++ b/meilidb-core/src/serde/convert_to_string.rs
@ -12,8 +12,8 @@ impl ser::Serializer for ConvertToString {
    type SerializeTuple = ser::Impossible<Self::Ok, Self::Error>;
    type SerializeTupleStruct = ser::Impossible<Self::Ok, Self::Error>;
    type SerializeTupleVariant = ser::Impossible<Self::Ok, Self::Error>;
-    type SerializeMap = ser::Impossible<Self::Ok, Self::Error>;
-    type SerializeStruct = ser::Impossible<Self::Ok, Self::Error>;
+    type SerializeMap = MapConvertToString;
+    type SerializeStruct = StructConvertToString;
    type SerializeStructVariant = ser::Impossible<Self::Ok, Self::Error>;

    fn serialize_bool(self, _value: bool) -> Result<Self::Ok, Self::Error> {
@ -169,7 +169,9 @@ impl ser::Serializer for ConvertToString {
    }

    fn serialize_map(self, _len: Option<usize>) -> Result<Self::SerializeMap, Self::Error> {
-        Err(SerializerError::UnserializableType { type_name: "map" })
+        Ok(MapConvertToString {
+            text: String::new(),
+        })
    }

    fn serialize_struct(
@ -177,8 +179,8 @@ impl ser::Serializer for ConvertToString {
        _name: &'static str,
        _len: usize,
    ) -> Result<Self::SerializeStruct, Self::Error> {
-        Err(SerializerError::UnserializableType {
-            type_name: "struct",
+        Ok(StructConvertToString {
+            text: String::new(),
        })
    }

@ -194,3 +196,63 @@ impl ser::Serializer for ConvertToString {
        })
    }
 }
+
+pub struct MapConvertToString {
+    text: String,
+}
+
+impl ser::SerializeMap for MapConvertToString {
+    type Ok = String;
+    type Error = SerializerError;
+
+    fn serialize_key<T: ?Sized>(&mut self, key: &T) -> Result<(), Self::Error>
+    where
+        T: ser::Serialize,
+    {
+        let text = key.serialize(ConvertToString)?;
+        self.text.push_str(&text);
+        self.text.push_str(" ");
+        Ok(())
+    }
+
+    fn serialize_value<T: ?Sized>(&mut self, value: &T) -> Result<(), Self::Error>
+    where
+        T: ser::Serialize,
+    {
+        let text = value.serialize(ConvertToString)?;
+        self.text.push_str(&text);
+        Ok(())
+    }
+
+    fn end(self) -> Result<Self::Ok, Self::Error> {
+        Ok(self.text)
+    }
+}
+
+pub struct StructConvertToString {
+    text: String,
+}
+
+impl ser::SerializeStruct for StructConvertToString {
+    type Ok = String;
+    type Error = SerializerError;
+
+    fn serialize_field<T: ?Sized>(
+        &mut self,
+        key: &'static str,
+        value: &T,
+    ) -> Result<(), Self::Error>
+    where
+        T: ser::Serialize,
+    {
+        let value = value.serialize(ConvertToString)?;
+        self.text.push_str(key);
+        self.text.push_str(" ");
+        self.text.push_str(&value);
+        Ok(())
+    }
+
+    fn end(self) -> Result<Self::Ok, Self::Error> {
+        Ok(self.text)
+    }
+}
--- a/meilidb-core/src/serde/indexer.rs
+++ b/meilidb-core/src/serde/indexer.rs
@ -20,7 +20,7 @@ impl<'a> ser::Serializer for Indexer<'a> {
    type SerializeTupleStruct = ser::Impossible<Self::Ok, Self::Error>;
    type SerializeTupleVariant = ser::Impossible<Self::Ok, Self::Error>;
    type SerializeMap = MapIndexer<'a>;
-    type SerializeStruct = StructSerializer<'a>;
+    type SerializeStruct = StructIndexer<'a>;
    type SerializeStructVariant = ser::Impossible<Self::Ok, Self::Error>;

    fn serialize_bool(self, _value: bool) -> Result<Self::Ok, Self::Error> {
@ -302,14 +302,14 @@ impl<'a> ser::SerializeMap for MapIndexer<'a> {
    }
 }

-pub struct StructSerializer<'a> {
+pub struct StructIndexer<'a> {
    attribute: SchemaAttr,
    document_id: DocumentId,
    indexer: &'a mut RawIndexer,
    texts: Vec<String>,
 }

-impl<'a> ser::SerializeStruct for StructSerializer<'a> {
+impl<'a> ser::SerializeStruct for StructIndexer<'a> {
    type Ok = Option<usize>;
    type Error = SerializerError;

--- a/meilidb-core/src/serde/mod.rs
+++ b/meilidb-core/src/serde/mod.rs
@ -20,16 +20,14 @@ pub use self::convert_to_string::ConvertToString;
 pub use self::deserializer::{Deserializer, DeserializerError};
 pub use self::extract_document_id::{compute_document_id, extract_document_id, value_to_string};
 pub use self::indexer::Indexer;
-pub use self::serializer::Serializer;
+pub use self::serializer::{serialize_value, Serializer};

-use std::collections::BTreeMap;
 use std::{error::Error, fmt};

-use meilidb_schema::SchemaAttr;
 use serde::ser;
 use serde_json::Error as SerdeJsonError;

-use crate::{DocumentId, ParseNumberError};
+use crate::ParseNumberError;

 #[derive(Debug)]
 pub enum SerializerError {
@ -103,25 +101,3 @@ impl From<ParseNumberError> for SerializerError {
        SerializerError::ParseNumber(error)
    }
 }
-
-pub struct RamDocumentStore(BTreeMap<(DocumentId, SchemaAttr), Vec<u8>>);
-
-impl RamDocumentStore {
-    pub fn new() -> RamDocumentStore {
-        RamDocumentStore(BTreeMap::new())
-    }
-
-    pub fn set_document_field(&mut self, id: DocumentId, attr: SchemaAttr, value: Vec<u8>) {
-        self.0.insert((id, attr), value);
-    }
-
-    pub fn into_inner(self) -> BTreeMap<(DocumentId, SchemaAttr), Vec<u8>> {
-        self.0
-    }
-}
-
-impl Default for RamDocumentStore {
-    fn default() -> Self {
-        Self::new()
-    }
-}
--- a/meilidb-core/src/serde/serializer.rs
+++ b/meilidb-core/src/serde/serializer.rs
@ -1,17 +1,17 @@
-use meilidb_schema::{Schema, SchemaAttr};
+use meilidb_schema::{Schema, SchemaAttr, SchemaProps};
 use serde::ser;
-use std::collections::HashMap;

 use crate::raw_indexer::RawIndexer;
-use crate::serde::RamDocumentStore;
+use crate::store::{DocumentsFields, DocumentsFieldsCounts};
 use crate::{DocumentId, RankedMap};

 use super::{ConvertToNumber, ConvertToString, Indexer, SerializerError};

 pub struct Serializer<'a> {
+    pub txn: &'a mut heed::RwTxn,
    pub schema: &'a Schema,
-    pub document_store: &'a mut RamDocumentStore,
-    pub document_fields_counts: &'a mut HashMap<(DocumentId, SchemaAttr), u64>,
+    pub document_store: DocumentsFields,
+    pub document_fields_counts: DocumentsFieldsCounts,
    pub indexer: &'a mut RawIndexer,
    pub ranked_map: &'a mut RankedMap,
    pub document_id: DocumentId,
@ -150,6 +150,7 @@ impl<'a> ser::Serializer for Serializer<'a> {

    fn serialize_map(self, _len: Option<usize>) -> Result<Self::SerializeMap, Self::Error> {
        Ok(MapSerializer {
+            txn: self.txn,
            schema: self.schema,
            document_id: self.document_id,
            document_store: self.document_store,
@ -166,6 +167,7 @@ impl<'a> ser::Serializer for Serializer<'a> {
        _len: usize,
    ) -> Result<Self::SerializeStruct, Self::Error> {
        Ok(StructSerializer {
+            txn: self.txn,
            schema: self.schema,
            document_id: self.document_id,
            document_store: self.document_store,
@ -189,10 +191,11 @@ impl<'a> ser::Serializer for Serializer<'a> {
 }

 pub struct MapSerializer<'a> {
+    txn: &'a mut heed::RwTxn,
    schema: &'a Schema,
    document_id: DocumentId,
-    document_store: &'a mut RamDocumentStore,
-    document_fields_counts: &'a mut HashMap<(DocumentId, SchemaAttr), u64>,
+    document_store: DocumentsFields,
+    document_fields_counts: DocumentsFieldsCounts,
    indexer: &'a mut RawIndexer,
    ranked_map: &'a mut RankedMap,
    current_key_name: Option<String>,
@ -229,17 +232,20 @@ impl<'a> ser::SerializeMap for MapSerializer<'a> {
        V: ser::Serialize,
    {
        let key = key.serialize(ConvertToString)?;
-
-        serialize_value(
-            self.schema,
-            self.document_id,
-            self.document_store,
-            self.document_fields_counts,
-            self.indexer,
-            self.ranked_map,
-            &key,
-            value,
-        )
+        match self.schema.attribute(&key) {
+            Some(attribute) => serialize_value(
+                self.txn,
+                attribute,
+                self.schema.props(attribute),
+                self.document_id,
+                self.document_store,
+                self.document_fields_counts,
+                self.indexer,
+                self.ranked_map,
+                value,
+            ),
+            None => Ok(()),
+        }
    }

    fn end(self) -> Result<Self::Ok, Self::Error> {
@ -248,10 +254,11 @@ impl<'a> ser::SerializeMap for MapSerializer<'a> {
 }

 pub struct StructSerializer<'a> {
+    txn: &'a mut heed::RwTxn,
    schema: &'a Schema,
    document_id: DocumentId,
-    document_store: &'a mut RamDocumentStore,
-    document_fields_counts: &'a mut HashMap<(DocumentId, SchemaAttr), u64>,
+    document_store: DocumentsFields,
+    document_fields_counts: DocumentsFieldsCounts,
    indexer: &'a mut RawIndexer,
    ranked_map: &'a mut RankedMap,
 }
@ -268,16 +275,20 @@ impl<'a> ser::SerializeStruct for StructSerializer<'a> {
    where
        T: ser::Serialize,
    {
-        serialize_value(
-            self.schema,
-            self.document_id,
-            self.document_store,
-            self.document_fields_counts,
-            self.indexer,
-            self.ranked_map,
-            key,
-            value,
-        )
+        match self.schema.attribute(key) {
+            Some(attribute) => serialize_value(
+                self.txn,
+                attribute,
+                self.schema.props(attribute),
+                self.document_id,
+                self.document_store,
+                self.document_fields_counts,
+                self.indexer,
+                self.ranked_map,
+                value,
+            ),
+            None => Ok(()),
+        }
    }

    fn end(self) -> Result<Self::Ok, Self::Error> {
@ -285,40 +296,42 @@ impl<'a> ser::SerializeStruct for StructSerializer<'a> {
    }
 }

-fn serialize_value<T: ?Sized>(
-    schema: &Schema,
+pub fn serialize_value<T: ?Sized>(
+    txn: &mut heed::RwTxn,
+    attribute: SchemaAttr,
+    props: SchemaProps,
    document_id: DocumentId,
-    document_store: &mut RamDocumentStore,
-    documents_fields_counts: &mut HashMap<(DocumentId, SchemaAttr), u64>,
+    document_store: DocumentsFields,
+    documents_fields_counts: DocumentsFieldsCounts,
    indexer: &mut RawIndexer,
    ranked_map: &mut RankedMap,
-    key: &str,
    value: &T,
 ) -> Result<(), SerializerError>
 where
    T: ser::Serialize,
 {
-    if let Some(attribute) = schema.attribute(key) {
-        let props = schema.props(attribute);
+    let serialized = serde_json::to_vec(value)?;
+    document_store.put_document_field(txn, document_id, attribute, &serialized)?;

-        let serialized = serde_json::to_vec(value)?;
-        document_store.set_document_field(document_id, attribute, serialized);
-
-        if props.is_indexed() {
-            let indexer = Indexer {
-                attribute,
-                indexer,
+    if props.is_indexed() {
+        let indexer = Indexer {
+            attribute,
+            indexer,
+            document_id,
+        };
+        if let Some(number_of_words) = value.serialize(indexer)? {
+            documents_fields_counts.put_document_field_count(
+                txn,
                document_id,
-            };
-            if let Some(number_of_words) = value.serialize(indexer)? {
-                documents_fields_counts.insert((document_id, attribute), number_of_words as u64);
-            }
+                attribute,
+                number_of_words as u64,
+            )?;
        }
+    }

-        if props.is_ranked() {
-            let number = value.serialize(ConvertToNumber)?;
-            ranked_map.insert(document_id, attribute, number);
-        }
+    if props.is_ranked() {
+        let number = value.serialize(ConvertToNumber)?;
+        ranked_map.insert(document_id, attribute, number);
    }

    Ok(())
--- a/meilidb-core/src/store/docs_words.rs
+++ b/meilidb-core/src/store/docs_words.rs
@ -26,6 +26,10 @@ impl DocsWords {
        self.docs_words.delete(writer, &document_id)
    }

+    pub fn clear(self, writer: &mut heed::RwTxn) -> ZResult<()> {
+        self.docs_words.clear(writer)
+    }
+
    pub fn doc_words(
        self,
        reader: &heed::RoTxn,
--- a/meilidb-core/src/store/documents_fields.rs
+++ b/meilidb-core/src/store/documents_fields.rs
@ -32,6 +32,10 @@ impl DocumentsFields {
        self.documents_fields.delete_range(writer, start..=end)
    }

+    pub fn clear(self, writer: &mut heed::RwTxn) -> ZResult<()> {
+        self.documents_fields.clear(writer)
+    }
+
    pub fn document_attribute<'txn>(
        self,
        reader: &'txn heed::RoTxn,
--- a/meilidb-core/src/store/documents_fields_counts.rs
+++ b/meilidb-core/src/store/documents_fields_counts.rs
@ -32,6 +32,10 @@ impl DocumentsFieldsCounts {
            .delete_range(writer, start..=end)
    }

+    pub fn clear(self, writer: &mut heed::RwTxn) -> ZResult<()> {
+        self.documents_fields_counts.clear(writer)
+    }
+
    pub fn document_field_count(
        self,
        reader: &heed::RoTxn,
@ -121,7 +125,7 @@ pub struct AllDocumentsFieldsCountsIter<'txn> {
    iter: heed::RoIter<'txn, OwnedType<DocumentAttrKey>, OwnedType<u64>>,
 }

-impl<'r> Iterator for AllDocumentsFieldsCountsIter<'r> {
+impl Iterator for AllDocumentsFieldsCountsIter<'_> {
    type Item = ZResult<(DocumentId, SchemaAttr, u64)>;

    fn next(&mut self) -> Option<Self::Item> {
--- a/meilidb-core/src/store/mod.rs
+++ b/meilidb-core/src/store/mod.rs
@ -166,6 +166,10 @@ impl Index {
        )
    }

+    pub fn clear_all(&self, writer: &mut heed::RwTxn) -> MResult<u64> {
+        update::push_clear_all(writer, self.updates, self.updates_results)
+    }
+
    pub fn synonyms_addition(&self) -> update::SynonymsAddition {
        update::SynonymsAddition::new(
            self.updates,
--- a/meilidb-core/src/store/postings_lists.rs
+++ b/meilidb-core/src/store/postings_lists.rs
@ -23,6 +23,10 @@ impl PostingsLists {
        self.postings_lists.delete(writer, word)
    }

+    pub fn clear(self, writer: &mut heed::RwTxn) -> ZResult<()> {
+        self.postings_lists.clear(writer)
+    }
+
    pub fn postings_list<'txn>(
        self,
        reader: &'txn heed::RoTxn,
--- a/meilidb-core/src/update/clear_all.rs
+++ b/meilidb-core/src/update/clear_all.rs
@ -0,0 +1,33 @@
+use crate::update::{next_update_id, Update};
+use crate::{store, MResult, RankedMap};
+
+pub fn apply_clear_all(
+    writer: &mut heed::RwTxn,
+    main_store: store::Main,
+    documents_fields_store: store::DocumentsFields,
+    documents_fields_counts_store: store::DocumentsFieldsCounts,
+    postings_lists_store: store::PostingsLists,
+    docs_words_store: store::DocsWords,
+) -> MResult<()> {
+    main_store.put_words_fst(writer, &fst::Set::default())?;
+    main_store.put_ranked_map(writer, &RankedMap::default())?;
+    main_store.put_number_of_documents(writer, |_| 0)?;
+    documents_fields_store.clear(writer)?;
+    documents_fields_counts_store.clear(writer)?;
+    postings_lists_store.clear(writer)?;
+    docs_words_store.clear(writer)?;
+
+    Ok(())
+}
+
+pub fn push_clear_all(
+    writer: &mut heed::RwTxn,
+    updates_store: store::Updates,
+    updates_results_store: store::UpdatesResults,
+) -> MResult<u64> {
+    let last_update_id = next_update_id(writer, updates_store, updates_results_store)?;
+    let update = Update::ClearAll;
+    updates_store.put_update(writer, last_update_id, &update)?;
+
+    Ok(last_update_id)
+}
--- a/meilidb-core/src/update/documents_addition.rs
+++ b/meilidb-core/src/update/documents_addition.rs
@ -5,7 +5,7 @@ use sdset::{duo::Union, SetOperation};
 use serde::Serialize;

 use crate::raw_indexer::RawIndexer;
-use crate::serde::{extract_document_id, RamDocumentStore, Serializer};
+use crate::serde::{extract_document_id, serialize_value, Serializer};
 use crate::store;
 use crate::update::{apply_documents_deletion, next_update_id, Update};
 use crate::{Error, MResult, RankedMap};
@ -84,12 +84,9 @@ pub fn apply_documents_addition(
    documents_fields_counts_store: store::DocumentsFieldsCounts,
    postings_lists_store: store::PostingsLists,
    docs_words_store: store::DocsWords,
-    mut ranked_map: RankedMap,
    addition: Vec<serde_json::Value>,
 ) -> MResult<()> {
-    let mut document_ids = HashSet::new();
-    let mut document_store = RamDocumentStore::new();
-    let mut document_fields_counts = HashMap::new();
+    let mut documents_ids = HashSet::new();
    let mut indexer = RawIndexer::new();

    let schema = match main_store.schema(writer)? {
@ -99,20 +96,47 @@ pub fn apply_documents_addition(

    let identifier = schema.identifier_name();

+    // 1. store documents ids for future deletion
+    for document in addition.iter() {
+        let document_id = match extract_document_id(identifier, &document)? {
+            Some(id) => id,
+            None => return Err(Error::MissingDocumentId),
+        };
+
+        if !documents_ids.insert(document_id) {
+            return Err(Error::DuplicateDocument);
+        }
+    }
+
+    // 2. remove the documents posting lists
+    let number_of_inserted_documents = documents_ids.len();
+    apply_documents_deletion(
+        writer,
+        main_store,
+        documents_fields_store,
+        documents_fields_counts_store,
+        postings_lists_store,
+        docs_words_store,
+        documents_ids.into_iter().collect(),
+    )?;
+
+    let mut ranked_map = match main_store.ranked_map(writer)? {
+        Some(ranked_map) => ranked_map,
+        None => RankedMap::default(),
+    };
+
+    // 3. index the documents fields in the stores
    for document in addition {
        let document_id = match extract_document_id(identifier, &document)? {
            Some(id) => id,
            None => return Err(Error::MissingDocumentId),
        };

-        // 1. store the document id for future deletion
-        document_ids.insert(document_id);
-
-        // 2. index the document fields in ram stores
        let serializer = Serializer {
+            txn: writer,
            schema: &schema,
-            document_store: &mut document_store,
-            document_fields_counts: &mut document_fields_counts,
+            document_store: documents_fields_store,
+            document_fields_counts: documents_fields_counts_store,
            indexer: &mut indexer,
            ranked_map: &mut ranked_map,
            document_id,
@ -121,29 +145,94 @@ pub fn apply_documents_addition(
        document.serialize(serializer)?;
    }

-    // 1. remove the previous documents match indexes
-    let documents_to_insert = document_ids.iter().cloned().collect();
-    apply_documents_deletion(
+    write_documents_addition_index(
        writer,
        main_store,
-        documents_fields_store,
-        documents_fields_counts_store,
        postings_lists_store,
        docs_words_store,
-        ranked_map.clone(),
-        documents_to_insert,
-    )?;
+        ranked_map,
+        number_of_inserted_documents,
+        indexer,
+    )
+}

-    // 2. insert new document attributes in the database
-    for ((id, attr), value) in document_store.into_inner() {
-        documents_fields_store.put_document_field(writer, id, attr, &value)?;
+pub fn reindex_all_documents(
+    writer: &mut heed::RwTxn,
+    main_store: store::Main,
+    documents_fields_store: store::DocumentsFields,
+    documents_fields_counts_store: store::DocumentsFieldsCounts,
+    postings_lists_store: store::PostingsLists,
+    docs_words_store: store::DocsWords,
+) -> MResult<()> {
+    let schema = match main_store.schema(writer)? {
+        Some(schema) => schema,
+        None => return Err(Error::SchemaMissing),
+    };
+
+    let mut ranked_map = RankedMap::default();
+
+    // 1. retrieve all documents ids
+    let mut documents_ids_to_reindex = Vec::new();
+    for result in documents_fields_counts_store.documents_ids(writer)? {
+        let document_id = result?;
+        documents_ids_to_reindex.push(document_id);
    }

-    // 3. insert new document attributes counts
-    for ((id, attr), count) in document_fields_counts {
-        documents_fields_counts_store.put_document_field_count(writer, id, attr, count)?;
+    // 2. remove the documents posting lists
+    let number_of_inserted_documents = documents_ids_to_reindex.len();
+    main_store.put_words_fst(writer, &fst::Set::default())?;
+    main_store.put_ranked_map(writer, &ranked_map)?;
+    main_store.put_number_of_documents(writer, |_| 0)?;
+    postings_lists_store.clear(writer)?;
+    docs_words_store.clear(writer)?;
+
+    // 3. re-index one document by one document (otherwise we make the borrow checker unhappy)
+    let mut indexer = RawIndexer::new();
+    let mut ram_store = HashMap::new();
+
+    for document_id in documents_ids_to_reindex {
+        for result in documents_fields_store.document_fields(writer, document_id)? {
+            let (attr, bytes) = result?;
+            let value: serde_json::Value = serde_json::from_slice(bytes)?;
+            ram_store.insert((document_id, attr), value);
+        }
+
+        for ((docid, attr), value) in ram_store.drain() {
+            serialize_value(
+                writer,
+                attr,
+                schema.props(attr),
+                docid,
+                documents_fields_store,
+                documents_fields_counts_store,
+                &mut indexer,
+                &mut ranked_map,
+                &value,
+            )?;
+        }
    }

+    // 4. write the new index in the main store
+    write_documents_addition_index(
+        writer,
+        main_store,
+        postings_lists_store,
+        docs_words_store,
+        ranked_map,
+        number_of_inserted_documents,
+        indexer,
+    )
+}
+
+pub fn write_documents_addition_index(
+    writer: &mut heed::RwTxn,
+    main_store: store::Main,
+    postings_lists_store: store::PostingsLists,
+    docs_words_store: store::DocsWords,
+    ranked_map: RankedMap,
+    number_of_inserted_documents: usize,
+    indexer: RawIndexer,
+) -> MResult<()> {
    let indexed = indexer.build();
    let mut delta_words_builder = SetBuilder::memory();

@ -186,9 +275,7 @@ pub fn apply_documents_addition(

    main_store.put_words_fst(writer, &words)?;
    main_store.put_ranked_map(writer, &ranked_map)?;
-
-    let inserted_documents_len = document_ids.len() as u64;
-    main_store.put_number_of_documents(writer, |old| old + inserted_documents_len)?;
+    main_store.put_number_of_documents(writer, |old| old + number_of_inserted_documents as u64)?;

    Ok(())
 }
--- a/meilidb-core/src/update/documents_deletion.rs
+++ b/meilidb-core/src/update/documents_deletion.rs
@ -88,7 +88,6 @@ pub fn apply_documents_deletion(
    documents_fields_counts_store: store::DocumentsFieldsCounts,
    postings_lists_store: store::PostingsLists,
    docs_words_store: store::DocsWords,
-    mut ranked_map: RankedMap,
    deletion: Vec<DocumentId>,
 ) -> MResult<()> {
    let idset = SetBuf::from_dirty(deletion);
@ -98,6 +97,11 @@ pub fn apply_documents_deletion(
        None => return Err(Error::SchemaMissing),
    };

+    let mut ranked_map = match main_store.ranked_map(writer)? {
+        Some(ranked_map) => ranked_map,
+        None => RankedMap::default(),
+    };
+
    // collect the ranked attributes according to the schema
    let ranked_attrs: Vec<_> = schema
        .iter()
@ -181,7 +185,6 @@ pub fn apply_documents_deletion(

    main_store.put_words_fst(writer, &words)?;
    main_store.put_ranked_map(writer, &ranked_map)?;
-
    main_store.put_number_of_documents(writer, |old| old - deleted_documents_len)?;

    Ok(())
--- a/meilidb-core/src/update/mod.rs
+++ b/meilidb-core/src/update/mod.rs
@ -1,3 +1,4 @@
+mod clear_all;
 mod customs_update;
 mod documents_addition;
 mod documents_deletion;
@ -5,6 +6,7 @@ mod schema_update;
 mod synonyms_addition;
 mod synonyms_deletion;

+pub use self::clear_all::{apply_clear_all, push_clear_all};
 pub use self::customs_update::{apply_customs_update, push_customs_update};
 pub use self::documents_addition::{apply_documents_addition, DocumentsAddition};
 pub use self::documents_deletion::{apply_documents_deletion, DocumentsDeletion};
@ -20,11 +22,12 @@ use heed::Result as ZResult;
 use log::debug;
 use serde::{Deserialize, Serialize};

-use crate::{store, DocumentId, MResult, RankedMap};
+use crate::{store, DocumentId, MResult};
 use meilidb_schema::Schema;

 #[derive(Debug, Clone, Serialize, Deserialize)]
 pub enum Update {
+    ClearAll,
    Schema(Schema),
    Customs(Vec<u8>),
    DocumentsAddition(Vec<serde_json::Value>),
@ -35,6 +38,7 @@ pub enum Update {

 #[derive(Debug, Clone, Serialize, Deserialize)]
 pub enum UpdateType {
+    ClearAll,
    Schema { schema: Schema },
    Customs,
    DocumentsAddition { number: usize },
@ -107,13 +111,36 @@ pub fn update_task(writer: &mut heed::RwTxn, index: store::Index) -> MResult<Opt
    debug!("Processing update number {}", update_id);

    let (update_type, result, duration) = match update {
+        Update::ClearAll => {
+            let start = Instant::now();
+
+            let update_type = UpdateType::ClearAll;
+            let result = apply_clear_all(
+                writer,
+                index.main,
+                index.documents_fields,
+                index.documents_fields_counts,
+                index.postings_lists,
+                index.docs_words,
+            );
+
+            (update_type, result, start.elapsed())
+        }
        Update::Schema(schema) => {
            let start = Instant::now();

            let update_type = UpdateType::Schema {
                schema: schema.clone(),
            };
-            let result = apply_schema_update(writer, index.main, &schema);
+            let result = apply_schema_update(
+                writer,
+                &schema,
+                index.main,
+                index.documents_fields,
+                index.documents_fields_counts,
+                index.postings_lists,
+                index.docs_words,
+            );

            (update_type, result, start.elapsed())
        }
@ -128,11 +155,6 @@ pub fn update_task(writer: &mut heed::RwTxn, index: store::Index) -> MResult<Opt
        Update::DocumentsAddition(documents) => {
            let start = Instant::now();

-            let ranked_map = match index.main.ranked_map(writer)? {
-                Some(ranked_map) => ranked_map,
-                None => RankedMap::default(),
-            };
-
            let update_type = UpdateType::DocumentsAddition {
                number: documents.len(),
            };
@ -144,7 +166,6 @@ pub fn update_task(writer: &mut heed::RwTxn, index: store::Index) -> MResult<Opt
                index.documents_fields_counts,
                index.postings_lists,
                index.docs_words,
-                ranked_map,
                documents,
            );

@ -153,11 +174,6 @@ pub fn update_task(writer: &mut heed::RwTxn, index: store::Index) -> MResult<Opt
        Update::DocumentsDeletion(documents) => {
            let start = Instant::now();

-            let ranked_map = match index.main.ranked_map(writer)? {
-                Some(ranked_map) => ranked_map,
-                None => RankedMap::default(),
-            };
-
            let update_type = UpdateType::DocumentsDeletion {
                number: documents.len(),
            };
@ -169,7 +185,6 @@ pub fn update_task(writer: &mut heed::RwTxn, index: store::Index) -> MResult<Opt
                index.documents_fields_counts,
                index.postings_lists,
                index.docs_words,
-                ranked_map,
                documents,
            );

--- a/meilidb-core/src/update/schema_update.rs
+++ b/meilidb-core/src/update/schema_update.rs
@ -1,19 +1,58 @@
+use meilidb_schema::{Diff, Schema};
+
+use crate::update::documents_addition::reindex_all_documents;
 use crate::update::{next_update_id, Update};
 use crate::{error::UnsupportedOperation, store, MResult};
-use meilidb_schema::Schema;

 pub fn apply_schema_update(
    writer: &mut heed::RwTxn,
-    main_store: store::Main,
    new_schema: &Schema,
+    main_store: store::Main,
+    documents_fields_store: store::DocumentsFields,
+    documents_fields_counts_store: store::DocumentsFieldsCounts,
+    postings_lists_store: store::PostingsLists,
+    docs_words_store: store::DocsWords,
 ) -> MResult<()> {
-    if main_store.schema(writer)?.is_some() {
-        return Err(UnsupportedOperation::SchemaAlreadyExists.into());
+    use UnsupportedOperation::{
+        CannotIntroduceNewSchemaAttribute, CannotRemoveSchemaAttribute,
+        CannotReorderSchemaAttribute, CannotUpdateSchemaIdentifier,
+    };
+
+    let mut need_full_reindexing = false;
+
+    if let Some(old_schema) = main_store.schema(writer)? {
+        for diff in meilidb_schema::diff(&old_schema, new_schema) {
+            match diff {
+                Diff::IdentChange { .. } => return Err(CannotUpdateSchemaIdentifier.into()),
+                Diff::AttrMove { .. } => return Err(CannotReorderSchemaAttribute.into()),
+                Diff::AttrPropsChange { old, new, .. } => {
+                    if new.indexed != old.indexed {
+                        need_full_reindexing = true;
+                    }
+                    if new.ranked != old.ranked {
+                        need_full_reindexing = true;
+                    }
+                }
+                Diff::NewAttr { .. } => return Err(CannotIntroduceNewSchemaAttribute.into()),
+                Diff::RemovedAttr { .. } => return Err(CannotRemoveSchemaAttribute.into()),
+            }
+        }
    }

-    main_store
-        .put_schema(writer, new_schema)
-        .map_err(Into::into)
+    main_store.put_schema(writer, new_schema)?;
+
+    if need_full_reindexing {
+        reindex_all_documents(
+            writer,
+            main_store,
+            documents_fields_store,
+            documents_fields_counts_store,
+            postings_lists_store,
+            docs_words_store,
+        )?
+    }
+
+    Ok(())
 }

 pub fn push_schema_update(
--- a/meilidb-schema/src/lib.rs
+++ b/meilidb-schema/src/lib.rs
@ -215,11 +215,155 @@ impl fmt::Display for SchemaAttr {
    }
 }

+#[derive(Debug, Clone, PartialEq, Eq)]
+pub enum Diff {
+    IdentChange {
+        old: String,
+        new: String,
+    },
+    AttrMove {
+        name: String,
+        old: usize,
+        new: usize,
+    },
+    AttrPropsChange {
+        name: String,
+        old: SchemaProps,
+        new: SchemaProps,
+    },
+    NewAttr {
+        name: String,
+        pos: usize,
+        props: SchemaProps,
+    },
+    RemovedAttr {
+        name: String,
+    },
+}
+
+pub fn diff(old: &Schema, new: &Schema) -> Vec<Diff> {
+    use Diff::{AttrMove, AttrPropsChange, IdentChange, NewAttr, RemovedAttr};
+
+    let mut differences = Vec::new();
+    let old = old.to_builder();
+    let new = new.to_builder();
+
+    // check if the old identifier differs from the new one
+    if old.identifier != new.identifier {
+        let old = old.identifier;
+        let new = new.identifier;
+        differences.push(IdentChange { old, new });
+    }
+
+    // compare all old attributes positions
+    // and properties with the new ones
+    for (pos, (name, props)) in old.attributes.iter().enumerate() {
+        match new.attributes.get_full(name) {
+            Some((npos, _, nprops)) => {
+                if pos != npos {
+                    let name = name.clone();
+                    differences.push(AttrMove {
+                        name,
+                        old: pos,
+                        new: npos,
+                    });
+                }
+                if props != nprops {
+                    let name = name.clone();
+                    differences.push(AttrPropsChange {
+                        name,
+                        old: *props,
+                        new: *nprops,
+                    });
+                }
+            }
+            None => differences.push(RemovedAttr { name: name.clone() }),
+        }
+    }
+
+    // retrieve all attributes that
+    // were not present in the old schema
+    for (pos, (name, props)) in new.attributes.iter().enumerate() {
+        if !old.attributes.contains_key(name) {
+            let name = name.clone();
+            differences.push(NewAttr {
+                name,
+                pos,
+                props: *props,
+            });
+        }
+    }
+
+    differences
+}
+
 #[cfg(test)]
 mod tests {
    use super::*;
    use std::error::Error;

+    #[test]
+    fn difference() {
+        use Diff::{AttrMove, AttrPropsChange, IdentChange, NewAttr, RemovedAttr};
+
+        let mut builder = SchemaBuilder::with_identifier("id");
+        builder.new_attribute("alpha", DISPLAYED);
+        builder.new_attribute("beta", DISPLAYED | INDEXED);
+        builder.new_attribute("gamma", INDEXED);
+        builder.new_attribute("omega", INDEXED);
+        let old = builder.build();
+
+        let mut builder = SchemaBuilder::with_identifier("kiki");
+        builder.new_attribute("beta", DISPLAYED | INDEXED);
+        builder.new_attribute("alpha", DISPLAYED | INDEXED);
+        builder.new_attribute("delta", RANKED);
+        builder.new_attribute("gamma", DISPLAYED);
+        let new = builder.build();
+
+        let differences = diff(&old, &new);
+        let expected = &[
+            IdentChange {
+                old: format!("id"),
+                new: format!("kiki"),
+            },
+            AttrMove {
+                name: format!("alpha"),
+                old: 0,
+                new: 1,
+            },
+            AttrPropsChange {
+                name: format!("alpha"),
+                old: DISPLAYED,
+                new: DISPLAYED | INDEXED,
+            },
+            AttrMove {
+                name: format!("beta"),
+                old: 1,
+                new: 0,
+            },
+            AttrMove {
+                name: format!("gamma"),
+                old: 2,
+                new: 3,
+            },
+            AttrPropsChange {
+                name: format!("gamma"),
+                old: INDEXED,
+                new: DISPLAYED,
+            },
+            RemovedAttr {
+                name: format!("omega"),
+            },
+            NewAttr {
+                name: format!("delta"),
+                pos: 2,
+                props: RANKED,
+            },
+        ];
+
+        assert_eq!(&differences, expected)
+    }
+
    #[test]
    fn serialize_deserialize() -> bincode::Result<()> {
        let mut builder = SchemaBuilder::with_identifier("id");
Author	SHA1	Message	Date
Clément Renault	a17dccd84e	Merge pull request #237 from meilisearch/fix-exactness-criterion Fix the exactness criterion algorithm	2019-10-26 18:43:10 +02:00
Clément Renault	9a57cab3ee	Fix the exactness criterion algorithm	2019-10-26 18:34:40 +02:00
Clément Renault	751b060320	Merge pull request #238 from meilisearch/improve-highlighting Only highlight query words areas not the whole words	2019-10-26 18:23:19 +02:00
Clément Renault	4111b99a6d	Only highlight query words areas not the whole words	2019-10-26 15:56:34 +02:00
Clément Renault	d6fb2b56d1	Merge pull request #236 from meilisearch/reorder-automatons Make sure that automatons group with more automatons are better	2019-10-24 15:29:16 +02:00
Clément Renault	cb5c77e536	Make sure that automatons group with more automatons are better	2019-10-24 15:18:53 +02:00
Clément Renault	44c89b1ea2	Merge pull request #235 from meilisearch/readme-concat-split-query-words Add information about search concat and split query words support	2019-10-23 18:20:59 +02:00
Clément Renault	26a285053b	Add information about search concat and split query words support	2019-10-23 18:19:15 +02:00
Clément Renault	1446a6a2d2	Merge pull request #234 from meilisearch/clear-all-update-variant Introduce a clear all documents update	2019-10-23 16:45:37 +02:00
Clément Renault	047eba3ff3	Introduce a clear all documents update	2019-10-23 16:39:10 +02:00
Clément Renault	8d9d183ce6	Merge pull request #233 from meilisearch/commit-when-update-ok Commit an update only when it is Ok	2019-10-23 16:07:48 +02:00
Clément Renault	eb67195840	Commit an update only when it is Ok	2019-10-23 15:52:40 +02:00
Clément Renault	93306c2326	Merge pull request #232 from meilisearch/support-splitted-words Support splitted words	2019-10-23 13:38:16 +02:00
Clément Renault	7d9cf8d713	Clean up the fetch algorithm	2019-10-23 12:06:21 +02:00
Clément Renault	03eb7898e7	Introduce a basic working version of phrase query for splitting words	2019-10-23 11:40:13 +02:00
Clément Renault	0fbd4cd632	Merge pull request #231 from meilisearch/recursive-object-indexing Make possible to convert recursive object into strings	2019-10-22 16:20:10 +02:00
Clément Renault	858bf359b8	Make possible to convert recursive object into strings	2019-10-22 16:02:02 +02:00
Clément Renault	5dc8465ebd	Merge pull request #181 from meilisearch/diff-schema Make possible to update an index schema	2019-10-22 14:23:43 +02:00
Clément Renault	0f30a221fa	Introduce the reindex_all_documents indexing function	2019-10-22 14:07:27 +02:00
Clément Renault	e86a547e93	Introduce a basic schema diff function	2019-10-21 17:57:32 +02:00