feat: Introduce the index module

This commit is contained in:
Clément Renault
2018-11-20 11:37:19 +01:00
parent b3249d515d
commit 7c1a17520d
9 changed files with 188 additions and 92 deletions

41
examples/create-index.rs Normal file
View File

@ -0,0 +1,41 @@
use std::path::Path;
use std::error::Error;
use std::path::PathBuf;
use std::io::{self, Write};
use elapsed::measure_time;
use moby_name_gen::random_name;
use structopt::StructOpt;
use pentium::index::update::Update;
use pentium::index::Index;
#[derive(Debug, StructOpt)]
pub struct Cmd {
/// csv file to index
#[structopt(parse(from_os_str))]
pub csv_file: PathBuf,
}
fn generate_update_from_csv(path: &Path) -> Result<Update, Box<Error>> {
unimplemented!()
}
fn main() -> Result<(), Box<Error>> {
let command = Cmd::from_args();
let path = random_name();
println!("generating the update...");
let update = generate_update_from_csv(&command.csv_file)?;
println!("creating the index");
let index = Index::open(&path)?;
println!("ingesting the changes in the index");
index.ingest_update(update)?;
println!("the index {:?} has been created!", path);
Ok(())
}

40
examples/index-search.rs Normal file
View File

@ -0,0 +1,40 @@
use std::error::Error;
use std::path::PathBuf;
use std::io::{self, Write};
use elapsed::measure_time;
use structopt::StructOpt;
use pentium::index::Index;
#[derive(Debug, StructOpt)]
pub struct Cmd {
/// Index path (e.g. relaxed-colden).
#[structopt(parse(from_os_str))]
pub index_path: PathBuf,
}
fn main() -> Result<(), Box<Error>> {
let command = Cmd::from_args();
let index = Index::open(command.index_path)?;
loop {
print!("Searching for: ");
io::stdout().flush()?;
let mut query = String::new();
io::stdin().read_line(&mut query)?;
if query.is_empty() { break }
let (elapsed, result) = measure_time(|| index.search(&query));
match result {
Ok(documents) => {
// display documents here !
println!("Finished in {}", elapsed)
},
Err(e) => panic!("{}", e),
}
}
Ok(())
}

View File

@ -1,3 +1,4 @@
use std::error::Error;
use std::str::from_utf8_unchecked;
use std::io::{self, Write};
use structopt::StructOpt;
@ -5,37 +6,25 @@ use std::path::PathBuf;
use elapsed::measure_time;
use rocksdb::{DB, DBOptions, IngestExternalFileOptions};
use pentium::index::Index;
use pentium::rank::{criterion, Config, RankedStream};
use pentium::{automaton, DocumentId, Metadata};
use pentium::{automaton, DocumentId};
#[derive(Debug, StructOpt)]
pub struct CommandConsole {
/// Meta file name (e.g. relaxed-colden).
#[structopt(parse(from_os_str))]
pub meta_name: PathBuf,
pub index_path: PathBuf,
}
pub struct ConsoleSearch {
metadata: Metadata,
db: DB,
index: Index,
}
impl ConsoleSearch {
pub fn from_command(command: CommandConsole) -> io::Result<ConsoleSearch> {
let map_file = command.meta_name.with_extension("map");
let idx_file = command.meta_name.with_extension("idx");
let sst_file = command.meta_name.with_extension("sst");
let metadata = unsafe { Metadata::from_paths(map_file, idx_file).unwrap() };
let rocksdb = "rocksdb/storage";
let db = DB::open_default(rocksdb).unwrap();
let sst_file = sst_file.to_str().unwrap();
db.ingest_external_file(&IngestExternalFileOptions::new(), &[sst_file]).unwrap();
drop(db);
let db = DB::open_for_read_only(DBOptions::default(), rocksdb, false).unwrap();
Ok(ConsoleSearch { metadata, db })
pub fn from_command(command: CommandConsole) -> Result<ConsoleSearch, Box<Error>> {
let index = Index::open(command.index_path)?;
Ok(ConsoleSearch { index })
}
pub fn serve(self) {
@ -48,13 +37,13 @@ impl ConsoleSearch {
if query.is_empty() { break }
let (elapsed, _) = measure_time(|| search(&self.metadata, &self.db, &query));
let (elapsed, _) = measure_time(|| search(&self.index, &query));
println!("Finished in {}", elapsed);
}
}
}
fn search(metadata: &Metadata, database: &DB, query: &str) {
fn search(index: &Index, query: &str) {
let mut automatons = Vec::new();
for query in query.split_whitespace().map(str::to_lowercase) {
let lev = automaton::build_prefix_dfa(&query);
@ -75,9 +64,11 @@ fn search(metadata: &Metadata, database: &DB, query: &str) {
}
};
let index: Index = unimplemented!();
// "Sony" "PlayStation 4 500GB"
let config = Config {
index: unimplemented!(),
blobs: &index.blobs().unwrap(),
automatons: automatons,
criteria: criterion::default(),
distinct: (distinct_by_title_first_four_chars, 1),