feat: Introduce a working key-value based database

This commit is contained in:
Clément Renault
2018-11-22 15:44:51 +01:00
parent 86f23d2695
commit 66dac923bf
10 changed files with 217 additions and 94 deletions

View File

@@ -1,13 +1,15 @@
use std::fs;
use std::path::Path;
use std::error::Error;
use std::path::PathBuf;
use std::io::{self, Write};
use elapsed::measure_time;
use moby_name_gen::random_name;
use structopt::StructOpt;
use pentium::index::update::Update;
use pentium::index::schema::{Schema, SchemaBuilder, STORED, INDEXED};
use pentium::index::update::{Update, PositiveUpdateBuilder};
use pentium::tokenizer::DefaultBuilder;
use pentium::index::Index;
#[derive(Debug, StructOpt)]
@@ -17,8 +19,47 @@ pub struct Cmd {
pub csv_file: PathBuf,
}
fn generate_update_from_csv(path: &Path) -> Result<Update, Box<Error>> {
unimplemented!()
fn generate_update_from_csv(path: &Path) -> Result<(Schema, Update), Box<Error>> {
let mut csv = csv::Reader::from_path(path)?;
let mut attributes = Vec::new();
let (schema, id_attr_index) = {
let mut id_attr_index = None;
let mut builder = SchemaBuilder::new();
for (i, header_name) in csv.headers()?.iter().enumerate() {
// FIXME this does not disallow multiple "id" fields
if header_name == "id" { id_attr_index = Some(i) };
let field = builder.new_attribute(header_name, STORED | INDEXED);
attributes.push(field);
}
let id = match id_attr_index {
Some(index) => index,
None => return Err(String::from("No \"id\" field found which is mandatory").into()),
};
(builder.build(), id)
};
let update_path = PathBuf::from("./positive-update-xxx.sst");
let tokenizer_builder = DefaultBuilder::new();
let mut builder = PositiveUpdateBuilder::new(&update_path, schema.clone(), tokenizer_builder);
for record in csv.records() {
let record = match record {
Ok(x) => x,
Err(e) => { eprintln!("{:?}", e); continue }
};
let id = record.into_iter().nth(id_attr_index).unwrap().parse()?;
for (value, attr) in record.into_iter().zip(&attributes) {
builder.update_field(id, *attr, value.to_string());
}
}
builder.build().map(|update| (schema, update))
}
fn main() -> Result<(), Box<Error>> {
@@ -27,14 +68,19 @@ fn main() -> Result<(), Box<Error>> {
let path = random_name();
println!("generating the update...");
let update = generate_update_from_csv(&command.csv_file)?;
let (schema, update) = generate_update_from_csv(&command.csv_file)?;
println!("creating the index");
let index = Index::open(&path)?;
let index = Index::create(&path, schema)?;
println!("ingesting the changes in the index");
index.ingest_update(update)?;
// FIXME this is really ugly !!!!
// the index does not support moving update files
// so we must remove it by hand
fs::remove_file("./positive-update-xxx.sst")?;
println!("the index {:?} has been created!", path);
Ok(())

View File

@@ -29,7 +29,7 @@ fn main() -> Result<(), Box<Error>> {
let (elapsed, result) = measure_time(|| index.search(&query));
match result {
Ok(documents) => {
// display documents here !
println!("{:?}", documents);
println!("Finished in {}", elapsed)
},
Err(e) => panic!("{}", e),