chore: Make the project a workspace

2025-10-26 05:26:27 +00:00 · 2018-07-06 21:50:19 +02:00
parent d6e113c683
commit 6fa164dc56
14 changed files with 842 additions and 1447 deletions
--- a/src/levenshtein.rs
+++ b/src/levenshtein.rs
@@ -1,35 +0,0 @@
-use levenshtein_automata::{LevenshteinAutomatonBuilder, DFA};
-
-pub struct LevBuilder {
-    automatons: [LevenshteinAutomatonBuilder; 3],
-}
-
-impl LevBuilder {
-    pub fn new() -> Self {
-        Self {
-            automatons: [
-                LevenshteinAutomatonBuilder::new(0, false),
-                LevenshteinAutomatonBuilder::new(1, false),
-                LevenshteinAutomatonBuilder::new(2, false),
-            ],
-        }
-    }
-
-    pub fn get_automaton(&self, query: &str) -> Levenshtein {
-        let dfa = if query.len() <= 4 {
-            self.automatons[0].build_prefix_dfa(query)
-        } else if query.len() <= 8 {
-            self.automatons[1].build_prefix_dfa(query)
-        } else {
-            self.automatons[2].build_prefix_dfa(query)
-        };
-
-        Levenshtein { dfa, query_len: query.len() }
-    }
-}
-
-#[derive(Clone)]
-pub struct Levenshtein {
-    pub dfa: DFA,
-    pub query_len: usize,
-}
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -1,120 +0,0 @@
-#[macro_use] extern crate serde_derive;
-extern crate bincode;
-extern crate fst;
-extern crate group_by;
-extern crate levenshtein_automata;
-extern crate serde;
-
-pub mod map;
-pub mod rank;
-mod levenshtein;
-
-use std::path::Path;
-use std::fs;
-
-pub use self::map::{Map, MapBuilder, Values};
-pub use self::map::{
-    OpBuilder, IndexedValues,
-    OpWithStateBuilder, IndexedValuesWithState,
-};
-pub use self::rank::{RankedStream};
-pub use self::levenshtein::LevBuilder;
-
-pub type DocIndexMap = Map<DocIndex>;
-pub type DocIndexMapBuilder = MapBuilder<DocIndex>;
-
-pub type DocumentId = u64;
-
-/// This structure represent the position of a word
-/// in a document and its attributes.
-///
-/// This is stored in the map, generated at index time,
-/// extracted and interpreted at search time.
-#[derive(Debug, Copy, Clone, Eq, PartialEq, PartialOrd, Ord, Hash, Serialize, Deserialize)]
-pub struct DocIndex {
-
-    /// The document identifier where the word was found.
-    pub document: DocumentId,
-
-    /// The attribute identifier in the document
-    /// where the word was found.
-    ///
-    /// This is an `u8` therefore a document
-    /// can not have more than `2^8` attributes.
-    pub attribute: u8,
-
-    /// The index where the word was found in the attribute.
-    ///
-    /// Only the first 1000 words are indexed.
-    pub attribute_index: u32,
-}
-
-/// This structure represent a matching word with informations
-/// on the location of the word in the document.
-///
-/// The order of the field is important because it defines
-/// the way these structures are ordered between themselves.
-///
-/// The word in itself is not important.
-// TODO do data oriented programming ? very arrays ?
-#[derive(Debug, Copy, Clone, Eq, PartialEq, PartialOrd, Ord, Hash)]
-pub struct Match {
-
-    /// The word index in the query sentence.
-    /// Same as the `attribute_index` but for the query words.
-    ///
-    /// Used to retrieve the automaton that match this word.
-    pub query_index: u32,
-
-    /// The distance the word has with the query word
-    /// (i.e. the Levenshtein distance).
-    pub distance: u8,
-
-    /// The attribute in which the word is located
-    /// (i.e. Title is 0, Description is 1).
-    ///
-    /// This is an `u8` therefore a document
-    /// can not have more than `2^8` attributes.
-    pub attribute: u8,
-
-    /// Where does this word is located in the attribute string
-    /// (i.e. at the start or the end of the attribute).
-    ///
-    /// The index in the attribute is limited to a maximum of `2^32`
-    /// this is because we index only the first 1000 words in an attribute.
-    pub attribute_index: u32,
-
-    /// Whether the word that match is an exact match or a prefix.
-    pub is_exact: bool,
-}
-
-impl Match {
-    pub fn zero() -> Self {
-        Match {
-            query_index: 0,
-            distance: 0,
-            attribute: 0,
-            attribute_index: 0,
-            is_exact: false,
-        }
-    }
-
-    pub fn max() -> Self {
-        Match {
-            query_index: u32::max_value(),
-            distance: u8::max_value(),
-            attribute: u8::max_value(),
-            attribute_index: u32::max_value(),
-            is_exact: true,
-        }
-    }
-}
-
-
-pub fn load_map<P, Q>(map: P, values: Q) -> fst::Result<DocIndexMap>
-where P: AsRef<Path>, Q: AsRef<Path>,
-{
-    let fst = fs::read(map)?;
-    let values = fs::read(values)?;
-    DocIndexMap::from_bytes(fst, &values)
-}
--- a/src/map.rs
+++ b/src/map.rs
@@ -1,404 +0,0 @@
-use bincode;
-use fst::{self, Automaton};
-use serde::de::DeserializeOwned;
-use serde::ser::Serialize;
-use std::collections::BTreeMap;
-use std::collections::btree_map::Entry;
-use std::fs::File;
-use std::io::{Write, BufReader};
-use std::ops::Range;
-use std::path::Path;
-
-#[derive(Debug)]
-pub struct Map<T> {
-    inner: fst::Map,
-    values: Values<T>,
-}
-
-impl<T> Map<T> {
-    pub unsafe fn from_paths<P, Q>(map: P, values: Q) -> fst::Result<Self>
-    where
-        T: DeserializeOwned,
-        P: AsRef<Path>,
-        Q: AsRef<Path>
-    {
-        let inner = fst::Map::from_path(map)?;
-
-        // TODO handle errors !!!
-        let values = File::open(values).unwrap();
-        let values = BufReader::new(values);
-        let values = bincode::deserialize_from(values).unwrap();
-
-        Ok(Self { inner, values })
-    }
-
-    pub fn from_bytes(map: Vec<u8>, values: &[u8]) -> fst::Result<Self>
-    where
-        T: DeserializeOwned
-    {
-        let inner = fst::Map::from_bytes(map)?;
-        let values = bincode::deserialize(values).unwrap();
-
-        Ok(Self { inner, values })
-    }
-
-    pub fn stream(&self) -> Stream<T> {
-        Stream {
-            inner: self.inner.stream(),
-            values: &self.values,
-        }
-    }
-
-    pub fn contains_key<K: AsRef<[u8]>>(&self, key: K) -> bool {
-        self.inner.contains_key(key)
-    }
-
-    pub fn get<K: AsRef<[u8]>>(&self, key: K) -> Option<&[T]> {
-        self.inner.get(key).map(|i| unsafe { self.values.get_unchecked(i as usize) })
-    }
-
-    pub fn search<A: Automaton>(&self, aut: A) -> StreamBuilder<T, A> {
-        StreamBuilder {
-            inner: self.inner.search(aut),
-            values: &self.values,
-        }
-    }
-
-    pub fn as_map(&self) -> &fst::Map {
-        &self.inner
-    }
-
-    pub fn values(&self) -> &Values<T> {
-        &self.values
-    }
-}
-
-#[derive(Debug, Serialize, Deserialize)]
-pub struct Values<T> {
-    ranges: Box<[Range<u64>]>,
-    values: Box<[T]>,
-}
-
-impl<T> Values<T> {
-    fn new(raw: Vec<Vec<T>>) -> Self {
-        let cap = raw.len();
-        let mut ranges = Vec::with_capacity(cap);
-        let cap = raw.iter().map(Vec::len).sum();
-        let mut values = Vec::with_capacity(cap);
-
-        for mut v in &raw {
-            let len = v.len() as u64;
-            let start = ranges.last().map(|&Range { end, .. }| end).unwrap_or(0);
-
-            let range = Range { start, end: start + len };
-            ranges.push(range);
-        }
-
-        values.extend(raw.into_iter().flat_map(IntoIterator::into_iter));
-
-        let ranges = ranges.into_boxed_slice();
-        let values = values.into_boxed_slice();
-
-        Self { ranges, values }
-    }
-
-    pub unsafe fn get_unchecked(&self, index: usize) -> &[T] {
-        let range = self.ranges.get_unchecked(index);
-        let range = Range { start: range.start as usize, end: range.end as usize };
-        self.values.get_unchecked(range)
-    }
-}
-
-#[derive(Debug)]
-pub struct MapBuilder<T> {
-    map: BTreeMap<String, u64>,
-    // This makes many memory indirections but it is only used
-    // at index time, not kept for query time.
-    values: Vec<Vec<T>>,
-}
-
-impl<T> MapBuilder<T> {
-    pub fn new() -> Self {
-        Self {
-            map: BTreeMap::new(),
-            values: Vec::new(),
-        }
-    }
-
-    pub fn insert<S: Into<String>>(&mut self, key: S, value: T) {
-        let key = key.into();
-        match self.map.entry(key) {
-            Entry::Vacant(e) => {
-                self.values.push(vec![value]);
-                let index = (self.values.len() - 1) as u64;
-                e.insert(index);
-            },
-            Entry::Occupied(e) => {
-                let index = *e.get();
-                let values = &mut self.values[index as usize];
-                values.push(value);
-            },
-        }
-    }
-
-    pub fn build_in_memory(self) -> fst::Result<Map<T>> {
-        Ok(Map {
-            inner: fst::Map::from_iter(self.map)?,
-            values: Values::new(self.values),
-        })
-    }
-
-    pub fn build<W, X>(self, map_wrt: W, mut values_wrt: X) -> fst::Result<(W, X)>
-    where
-        T: Serialize,
-        W: Write,
-        X: Write
-    {
-        let mut builder = fst::MapBuilder::new(map_wrt)?;
-        builder.extend_iter(self.map)?;
-        let map = builder.into_inner()?;
-        let values = Values::new(self.values);
-
-        // TODO handle that error !!!
-        bincode::serialize_into(&mut values_wrt, &values).unwrap();
-
-        Ok((map, values_wrt))
-    }
-}
-
-pub struct OpBuilder<'m, 'v, T: 'v> {
-    inner: fst::map::OpBuilder<'m>,
-    values: &'v Values<T>,
-}
-
-impl<'m, 'v, T: 'v> OpBuilder<'m, 'v, T> {
-    pub fn new(values: &'v Values<T>) -> Self {
-        OpBuilder {
-            inner: fst::map::OpBuilder::new(),
-            values: values,
-        }
-    }
-
-    pub fn add<I, S>(mut self, streamable: I) -> Self
-    where
-        I: for<'a> fst::IntoStreamer<'a, Into=S, Item=(&'a [u8], u64)>,
-        S: 'm + for<'a> fst::Streamer<'a, Item=(&'a [u8], u64)>,
-    {
-        self.push(streamable);
-        self
-    }
-
-    pub fn push<I, S>(&mut self, streamable: I)
-    where
-        I: for<'a> fst::IntoStreamer<'a, Into=S, Item=(&'a [u8], u64)>,
-        S: 'm + for<'a> fst::Streamer<'a, Item=(&'a [u8], u64)>,
-    {
-        self.inner.push(streamable);
-    }
-
-    pub fn union(self) -> Union<'m, 'v, T> {
-        Union {
-            inner: self.inner.union(),
-            outs: Vec::new(),
-            values: self.values,
-        }
-    }
-}
-
-pub struct Union<'m, 'v, T: 'v> {
-    inner: fst::map::Union<'m>,
-    outs: Vec<IndexedValues<'v, T>>,
-    values: &'v Values<T>,
-}
-
-impl<'a, 'm, 'v, T: 'v + 'a> fst::Streamer<'a> for Union<'m, 'v, T> {
-    type Item = (&'a [u8], &'a [IndexedValues<'a, T>]);
-
-    fn next(&'a mut self) -> Option<Self::Item> {
-        match self.inner.next() {
-            Some((s, ivalues)) => {
-                self.outs.clear();
-                for ivalue in ivalues {
-                    let index = ivalue.index;
-                    let values = unsafe { self.values.get_unchecked(ivalue.value as usize) };
-                    self.outs.push(IndexedValues { index, values })
-                }
-                Some((s, &self.outs))
-            },
-            None => None,
-        }
-    }
-}
-
-#[derive(Copy, Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
-pub struct IndexedValues<'a, T: 'a> {
-    pub index: usize,
-    pub values: &'a [T],
-}
-
-pub struct OpWithStateBuilder<'m, 'v, T: 'v, U> {
-    inner: fst::map::OpWithStateBuilder<'m, U>,
-    values: &'v Values<T>,
-}
-
-impl<'m, 'v, T: 'v, U: 'static> OpWithStateBuilder<'m, 'v, T, U> {
-    pub fn new(values: &'v Values<T>) -> Self {
-        Self {
-            inner: fst::map::OpWithStateBuilder::new(),
-            values: values,
-        }
-    }
-
-    pub fn add<I, S>(mut self, streamable: I) -> Self
-    where
-        I: for<'a> fst::IntoStreamer<'a, Into=S, Item=(&'a [u8], u64, U)>,
-        S: 'm + for<'a> fst::Streamer<'a, Item=(&'a [u8], u64, U)>,
-    {
-        self.push(streamable);
-        self
-    }
-
-    pub fn push<I, S>(&mut self, streamable: I)
-    where
-        I: for<'a> fst::IntoStreamer<'a, Into=S, Item=(&'a [u8], u64, U)>,
-        S: 'm + for<'a> fst::Streamer<'a, Item=(&'a [u8], u64, U)>,
-    {
-        self.inner.push(streamable);
-    }
-
-    pub fn union(self) -> UnionWithState<'m, 'v, T, U> {
-        UnionWithState {
-            inner: self.inner.union(),
-            outs: Vec::new(),
-            values: self.values,
-        }
-    }
-}
-
-pub struct UnionWithState<'m, 'v, T: 'v, U> {
-    inner: fst::map::UnionWithState<'m, U>,
-    outs: Vec<IndexedValuesWithState<'v, T, U>>,
-    values: &'v Values<T>,
-}
-
-impl<'a, 'm, 'v, T: 'v + 'a, U: 'a> fst::Streamer<'a> for UnionWithState<'m, 'v, T, U>
-where
-    U: Clone,
-{
-    // TODO prefer returning (&[u8], index, value T, state) one by one
-    type Item = (&'a [u8], &'a [IndexedValuesWithState<'a, T, U>]);
-
-    fn next(&'a mut self) -> Option<Self::Item> {
-        match self.inner.next() {
-            Some((s, ivalues)) => {
-                self.outs.clear();
-                self.outs.reserve(ivalues.len());
-                for ivalue in ivalues {
-                    let index = ivalue.index;
-                    let values = unsafe { self.values.get_unchecked(ivalue.value as usize) };
-                    let state = ivalue.state.clone();
-                    self.outs.push(IndexedValuesWithState { index, values, state })
-                }
-                Some((s, &self.outs))
-            },
-            None => None,
-        }
-    }
-}
-
-#[derive(Copy, Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
-pub struct IndexedValuesWithState<'a, T: 'a, U> {
-    pub index: usize,
-    pub values: &'a [T],
-    pub state: U,
-}
-
-pub struct StreamBuilder<'m, 'v, T: 'v, A> {
-    inner: fst::map::StreamBuilder<'m, A>,
-    values: &'v Values<T>,
-}
-
-impl<'m, 'v, T: 'v, A> StreamBuilder<'m, 'v, T, A> {
-    pub fn with_state(self) -> StreamWithStateBuilder<'m, 'v, T, A> {
-        StreamWithStateBuilder {
-            inner: self.inner.with_state(),
-            values: self.values,
-        }
-    }
-}
-
-impl<'m, 'v, 'a, T: 'v + 'a, A: Automaton> fst::IntoStreamer<'a> for StreamBuilder<'m, 'v, T, A> {
-    type Item = <Self::Into as fst::Streamer<'a>>::Item;
-    type Into = Stream<'m, 'v, T, A>;
-
-    fn into_stream(self) -> Self::Into {
-        Stream {
-            inner: self.inner.into_stream(),
-            values: self.values,
-        }
-    }
-}
-
-pub struct Stream<'m, 'v, T: 'v, A: Automaton = fst::automaton::AlwaysMatch> {
-    inner: fst::map::Stream<'m, A>,
-    values: &'v Values<T>,
-}
-
-impl<'m, 'v, 'a, T: 'v + 'a, A: Automaton> fst::Streamer<'a> for Stream<'m, 'v, T, A> {
-    type Item = (&'a [u8], &'a [T]);
-
-    fn next(&'a mut self) -> Option<Self::Item> {
-        // Here we can't just `map` because of some borrow rules
-        match self.inner.next() {
-            Some((key, i)) => {
-                let values = unsafe { self.values.get_unchecked(i as usize) };
-                Some((key, values))
-            },
-            None => None,
-        }
-    }
-}
-
-pub struct StreamWithStateBuilder<'m, 'v, T: 'v, A> {
-    inner: fst::map::StreamWithStateBuilder<'m, A>,
-    values: &'v Values<T>,
-}
-
-impl<'m, 'v, 'a, T: 'v + 'a, A: 'a> fst::IntoStreamer<'a> for StreamWithStateBuilder<'m, 'v, T, A>
-where
-    A: Automaton,
-    A::State: Clone,
-{
-    type Item = <Self::Into as fst::Streamer<'a>>::Item;
-    type Into = StreamWithState<'m, 'v, T, A>;
-
-    fn into_stream(self) -> Self::Into {
-        StreamWithState {
-            inner: self.inner.into_stream(),
-            values: self.values,
-        }
-    }
-}
-
-pub struct StreamWithState<'m, 'v, T: 'v, A: Automaton = fst::automaton::AlwaysMatch> {
-    inner: fst::map::StreamWithState<'m, A>,
-    values: &'v Values<T>,
-}
-
-impl<'m, 'v, 'a, T: 'v + 'a, A: 'a> fst::Streamer<'a> for StreamWithState<'m, 'v, T, A>
-where
-    A: Automaton,
-    A::State: Clone,
-{
-    type Item = (&'a [u8], &'a [T], A::State);
-
-    fn next(&'a mut self) -> Option<Self::Item> {
-        match self.inner.next() {
-            Some((key, i, state)) => {
-                let values = unsafe { self.values.get_unchecked(i as usize) };
-                Some((key, values, state))
-            },
-            None => None,
-        }
-    }
-}
--- a/src/rank.rs
+++ b/src/rank.rs
@@ -1,275 +0,0 @@
-use std::cmp::{self, Ordering};
-use std::collections::HashMap;
-use std::{mem, vec, iter};
-use DocIndexMap;
-use fst;
-use levenshtein::Levenshtein;
-use map::{
-    OpWithStateBuilder, UnionWithState,
-    StreamWithStateBuilder,
-    Values,
-};
-use {Match, DocIndex, DocumentId};
-use group_by::{GroupBy, GroupByMut};
-
-const MAX_DISTANCE: u32 = 8;
-
-#[inline]
-fn match_query_index(a: &Match, b: &Match) -> bool {
-    a.query_index == b.query_index
-}
-
-#[derive(Debug, Clone)]
-pub struct Document {
-    document_id: DocumentId,
-    matches: Vec<Match>,
-}
-
-impl Document {
-    pub fn new(doc: DocumentId, match_: Match) -> Self {
-        Self::from_sorted_matches(doc, vec![match_])
-    }
-
-    pub fn from_sorted_matches(doc: DocumentId, matches: Vec<Match>) -> Self {
-        Self {
-            document_id: doc,
-            matches: matches,
-        }
-    }
-}
-
-fn sum_of_typos(lhs: &Document, rhs: &Document) -> Ordering {
-    let key = |doc: &Document| -> u8 {
-        GroupBy::new(&doc.matches, match_query_index).map(|m| m[0].distance).sum()
-    };
-
-    key(lhs).cmp(&key(rhs))
-}
-
-fn number_of_words(lhs: &Document, rhs: &Document) -> Ordering {
-    let key = |doc: &Document| -> usize {
-        GroupBy::new(&doc.matches, match_query_index).count()
-    };
-
-    key(lhs).cmp(&key(rhs)).reverse()
-}
-
-fn index_proximity(lhs: u32, rhs: u32) -> u32 {
-    if lhs < rhs {
-        cmp::min(rhs - lhs, MAX_DISTANCE)
-    } else {
-        cmp::min(lhs - rhs, MAX_DISTANCE) + 1
-    }
-}
-
-fn attribute_proximity(lhs: &Match, rhs: &Match) -> u32 {
-    if lhs.attribute != rhs.attribute { return MAX_DISTANCE }
-    index_proximity(lhs.attribute_index, rhs.attribute_index)
-}
-
-fn words_proximity(lhs: &Document, rhs: &Document) -> Ordering {
-    let key = |doc: &Document| -> u32 {
-        let mut proximity = 0;
-        let mut next_group_index = 0;
-        for group in GroupBy::new(&doc.matches, match_query_index) {
-            next_group_index += group.len();
-            // FIXME distance is wrong if 2 different attributes matches
-            // FIXME do that in a manner to avoid memory cache misses
-            if let Some(first_next_group) = doc.matches.get(next_group_index) {
-                proximity += attribute_proximity(first_next_group, &group[0]);
-            }
-        }
-        proximity
-    };
-
-    key(lhs).cmp(&key(rhs))
-}
-
-fn sum_of_words_attribute(lhs: &Document, rhs: &Document) -> Ordering {
-    let key = |doc: &Document| -> u8 {
-        GroupBy::new(&doc.matches, match_query_index).map(|m| m[0].attribute).sum()
-    };
-
-    key(lhs).cmp(&key(rhs))
-}
-
-fn sum_of_words_position(lhs: &Document, rhs: &Document) -> Ordering {
-    let key = |doc: &Document| -> u32 {
-        GroupBy::new(&doc.matches, match_query_index).map(|m| m[0].attribute_index).sum()
-    };
-
-    key(lhs).cmp(&key(rhs))
-}
-
-fn exact(lhs: &Document, rhs: &Document) -> Ordering {
-    let contains_exact = |matches: &[Match]| matches.iter().any(|m| m.is_exact);
-    let key = |doc: &Document| -> usize {
-        GroupBy::new(&doc.matches, match_query_index).map(contains_exact).filter(|x| *x).count()
-    };
-
-    key(lhs).cmp(&key(rhs))
-}
-
-pub struct Pool {
-    documents: Vec<Document>,
-    limit: usize,
-}
-
-impl Pool {
-    pub fn new(query_size: usize, limit: usize) -> Self {
-        Self {
-            documents: Vec::new(),
-            limit: limit,
-        }
-    }
-
-    // TODO remove the matches HashMap, not proud of it
-    pub fn extend(&mut self, matches: &mut HashMap<DocumentId, Vec<Match>>) {
-        for doc in self.documents.iter_mut() {
-            if let Some(matches) = matches.remove(&doc.document_id) {
-                doc.matches.extend(matches);
-                doc.matches.sort_unstable();
-            }
-        }
-
-        for (id, mut matches) in matches.drain() {
-            // note that matches are already sorted we do that by security
-            // TODO remove this useless sort
-            matches.sort_unstable();
-
-            let document = Document::from_sorted_matches(id, matches);
-            self.documents.push(document);
-        }
-    }
-}
-
-fn invert_sorts<F>(a: &Document, b: &Document, sorts: &[F]) -> bool
-where F: Fn(&Document, &Document) -> Ordering,
-{
-    sorts.iter().rev().all(|sort| sort(a, b) == Ordering::Equal)
-}
-
-impl IntoIterator for Pool {
-    type Item = Document;
-    type IntoIter = vec::IntoIter<Self::Item>;
-
-    fn into_iter(mut self) -> Self::IntoIter {
-        let sorts = &[
-            sum_of_typos,
-            number_of_words,
-            words_proximity,
-            sum_of_words_attribute,
-            sum_of_words_position,
-            exact,
-        ];
-
-        for (i, sort) in sorts.iter().enumerate() {
-            let mut computed = 0;
-            for group in GroupByMut::new(&mut self.documents, |a, b| invert_sorts(a, b, &sorts[..i])) {
-                // TODO prefer using `sort_unstable_by_key` to allow reusing the key computation
-                //      `number of words` needs to be reversed, we can use the `cmp::Reverse` struct to do that
-                group.sort_unstable_by(sort);
-                computed += group.len();
-                if computed >= self.limit { break }
-            }
-        }
-
-        self.documents.truncate(self.limit);
-        self.documents.into_iter()
-    }
-}
-
-pub enum RankedStream<'m, 'v> {
-    Fed {
-        inner: UnionWithState<'m, 'v, DocIndex, u32>,
-        automatons: Vec<Levenshtein>,
-        pool: Pool,
-    },
-    Pours {
-        inner: vec::IntoIter<Document>,
-    },
-}
-
-impl<'m, 'v> RankedStream<'m, 'v> {
-    pub fn new(map: &'m DocIndexMap, values: &'v Values<DocIndex>, automatons: Vec<Levenshtein>, limit: usize) -> Self {
-        let mut op = OpWithStateBuilder::new(values);
-
-        for automaton in automatons.iter().map(|l| l.dfa.clone()) {
-            let stream = map.as_map().search(automaton).with_state();
-            op.push(stream);
-        }
-
-        let pool = Pool::new(automatons.len(), limit);
-
-        RankedStream::Fed {
-            inner: op.union(),
-            automatons: automatons,
-            pool: pool,
-        }
-    }
-}
-
-impl<'m, 'v, 'a> fst::Streamer<'a> for RankedStream<'m, 'v> {
-    type Item = DocumentId;
-
-    fn next(&'a mut self) -> Option<Self::Item> {
-        let mut matches = HashMap::new();
-
-        loop {
-            // TODO remove that when NLL are here !
-            let mut transfert_pool = None;
-
-            match self {
-                RankedStream::Fed { inner, automatons, pool } => {
-                    match inner.next() {
-                        Some((string, indexed_values)) => {
-                            for iv in indexed_values {
-
-                                // TODO extend documents matches by batch of query_index
-                                //      that way it will be possible to discard matches that
-                                //      have an invalid distance *before* adding them
-                                //      to the matches of the documents and, that way, avoid a sort
-
-                                let automaton = &automatons[iv.index];
-                                let distance = automaton.dfa.distance(iv.state).to_u8();
-
-                                // TODO remove the Pool system !
-                                //      this is an internal Pool rule but
-                                //      it is more efficient to test that here
-                                // if pool.limitation.is_reached() && distance != 0 { continue }
-
-                                for di in iv.values {
-                                    let match_ = Match {
-                                        query_index: iv.index as u32,
-                                        distance: distance,
-                                        attribute: di.attribute,
-                                        attribute_index: di.attribute_index,
-                                        is_exact: string.len() == automaton.query_len,
-                                    };
-                                    matches.entry(di.document)
-                                            .and_modify(|ms: &mut Vec<_>| ms.push(match_))
-                                            .or_insert_with(|| vec![match_]);
-                                }
-                                pool.extend(&mut matches);
-                            }
-                        },
-                        None => {
-                            // TODO remove this when NLL are here !
-                            transfert_pool = Some(mem::replace(pool, Pool::new(1, 1)));
-                        },
-                    }
-                },
-                RankedStream::Pours { inner } => {
-                    return inner.next().map(|d| d.document_id)
-                },
-            }
-
-            // transform the `RankedStream` into a `Pours`
-            if let Some(pool) = transfert_pool {
-                *self = RankedStream::Pours {
-                    inner: pool.into_iter(),
-                }
-            }
-        }
-    }
-}