mirror of
https://github.com/meilisearch/meilisearch.git
synced 2025-07-28 01:01:00 +00:00
feat: Improve the indexing time a little bit
...by a factor of 17.6x.
This commit is contained in:
@ -4,11 +4,8 @@ version = "0.1.0"
|
||||
authors = ["Kerollmops <renault.cle@gmail.com>"]
|
||||
|
||||
[dependencies]
|
||||
env_logger = { version = "0.3", default-features = false }
|
||||
raptor = { path = "../raptor" }
|
||||
elapsed = "0.1"
|
||||
serde = "1.0"
|
||||
serde_derive = "1.0"
|
||||
|
||||
[dependencies.fst]
|
||||
git = "https://github.com/Kerollmops/fst.git"
|
||||
|
@ -1,4 +1,3 @@
|
||||
extern crate env_logger;
|
||||
extern crate rocksdb;
|
||||
extern crate fst;
|
||||
extern crate raptor;
|
||||
@ -10,49 +9,55 @@ use std::io::{self, Write};
|
||||
use elapsed::measure_time;
|
||||
use fst::Streamer;
|
||||
use rocksdb::{DB, DBOptions};
|
||||
use raptor::{load_map, DocIndexMap, RankedStream, LevBuilder};
|
||||
use raptor::{Metadata, RankedStream, LevBuilder};
|
||||
|
||||
fn search(map: &DocIndexMap, lev_builder: &LevBuilder, db: &DB, query: &str) {
|
||||
fn search(metadata: &Metadata, database: &DB, lev_builder: &LevBuilder, query: &str) {
|
||||
let mut automatons = Vec::new();
|
||||
for query in query.split_whitespace() {
|
||||
let lev = lev_builder.get_automaton(query);
|
||||
automatons.push(lev);
|
||||
}
|
||||
|
||||
let mut stream = RankedStream::new(&map, map.values(), automatons, 20);
|
||||
let map = metadata.as_map();
|
||||
let indexes = metadata.as_indexes();
|
||||
|
||||
let mut stream = RankedStream::new(&map, &indexes, automatons, 20);
|
||||
while let Some(document) = stream.next() {
|
||||
print!("{:?} ", document.document_id);
|
||||
print!("{:?}", document.document_id);
|
||||
|
||||
let title_key = format!("{}-title", document.document_id);
|
||||
let title = db.get(title_key.as_bytes()).unwrap().unwrap();
|
||||
let title = database.get(title_key.as_bytes()).unwrap().unwrap();
|
||||
let title = unsafe { from_utf8_unchecked(&title) };
|
||||
print!("{:?}", title);
|
||||
print!(" {:?}", title);
|
||||
|
||||
println!();
|
||||
}
|
||||
}
|
||||
|
||||
fn main() {
|
||||
drop(env_logger::init());
|
||||
let map_file = "map.meta";
|
||||
let indexes_file = "indexes.meta";
|
||||
let rocksdb_file = "rocksdb/storage";
|
||||
|
||||
let (elapsed, map) = measure_time(|| load_map("map.fst", "values.vecs").unwrap());
|
||||
println!("{} to load the map", elapsed);
|
||||
let (elapsed, meta) = measure_time(|| unsafe {
|
||||
Metadata::from_paths(map_file, indexes_file).unwrap()
|
||||
});
|
||||
println!("{} to load metadata", elapsed);
|
||||
|
||||
let (elapsed, db) = measure_time(|| {
|
||||
let options = DBOptions::new();
|
||||
DB::open_for_read_only(options, rocksdb_file, false).unwrap()
|
||||
});
|
||||
println!("{} to load the RocksDB database", elapsed);
|
||||
|
||||
let (elapsed, lev_builder) = measure_time(|| LevBuilder::new());
|
||||
println!("{} to load the levenshtein automaton", elapsed);
|
||||
|
||||
let (elapsed, db) = measure_time(|| {
|
||||
let opts = DBOptions::new();
|
||||
let error_if_log_file_exist = false;
|
||||
DB::open_for_read_only(opts, "rocksdb/storage", error_if_log_file_exist).unwrap()
|
||||
});
|
||||
println!("{} to load the rocksdb DB", elapsed);
|
||||
|
||||
match env::args().nth(1) {
|
||||
Some(query) => {
|
||||
println!("Searching for: {:?}", query);
|
||||
let query = query.to_lowercase();
|
||||
let (elapsed, _) = measure_time(|| search(&map, &lev_builder, &db, &query));
|
||||
let (elapsed, _) = measure_time(|| search(&meta, &db, &lev_builder, &query));
|
||||
println!("Finished in {}", elapsed);
|
||||
},
|
||||
None => loop {
|
||||
@ -65,7 +70,7 @@ fn main() {
|
||||
|
||||
if query.is_empty() { break }
|
||||
|
||||
let (elapsed, _) = measure_time(|| search(&map, &lev_builder, &db, &query));
|
||||
let (elapsed, _) = measure_time(|| search(&meta, &db, &lev_builder, &query));
|
||||
println!("Finished in {}", elapsed);
|
||||
},
|
||||
}
|
||||
|
Reference in New Issue
Block a user