chore: Update the module hierarchy

This commit is contained in:
Clément Renault
2018-12-07 12:22:51 +01:00
parent 2c3d71dd8f
commit 8bee31078d
26 changed files with 33 additions and 296 deletions

View File

@ -1,7 +1,7 @@
use sdset::multi::OpBuilder as SdOpBuilder;
use sdset::Set;
use crate::blob::NegativeBlob;
use crate::database::blob::NegativeBlob;
use crate::data::DocIds;
use crate::DocumentId;

View File

@ -5,8 +5,8 @@ use group_by::GroupBy;
use sdset::duo::DifferenceByKey;
use sdset::{Set, SetOperation};
use crate::blob::{Blob, Sign, PositiveBlob, PositiveBlobBuilder, NegativeBlob};
use crate::blob::{positive, negative};
use crate::database::blob::{Blob, Sign, PositiveBlob, PositiveBlobBuilder, NegativeBlob};
use crate::database::blob::{positive, negative};
fn blob_same_sign(a: &Blob, b: &Blob) -> bool {
a.sign() == b.sign()

View File

@ -1,7 +1,7 @@
use sdset::multi::OpBuilder as SdOpBuilder;
use sdset::{SetOperation, Set};
use crate::blob::PositiveBlob;
use crate::database::blob::PositiveBlob;
use crate::data::DocIndexes;
use crate::DocIndex;

View File

@ -5,11 +5,9 @@ use rocksdb::rocksdb::{DB, DBVector, Snapshot, SeekKey};
use rocksdb::rocksdb_options::ReadOptions;
use serde::de::DeserializeOwned;
use crate::database::deserializer::{Deserializer, DeserializerError};
use crate::database::{DATA_INDEX, DATA_SCHEMA};
use crate::blob::positive::PositiveBlob;
use crate::index::schema::Schema;
use crate::database::{retrieve_data_schema, DocumentKey, DocumentKeyAttr};
use crate::database::deserializer::Deserializer;
use crate::database::schema::Schema;
use crate::DocumentId;
pub struct DatabaseView<'a> {

View File

@ -8,7 +8,7 @@ use serde::de::value::MapDeserializer;
use serde::de::{self, Visitor, IntoDeserializer};
use crate::database::document_key::{DocumentKey, DocumentKeyAttr};
use crate::index::schema::Schema;
use crate::database::schema::Schema;
use crate::DocumentId;
pub struct Deserializer<'a> {

View File

@ -4,7 +4,7 @@ use std::fmt;
use byteorder::{NativeEndian, WriteBytesExt, ReadBytesExt};
use crate::index::schema::SchemaAttr;
use crate::database::schema::SchemaAttr;
use crate::DocumentId;
const DOC_KEY_LEN: usize = 4 + size_of::<u64>();

View File

@ -7,13 +7,16 @@ use rocksdb::rocksdb_options::{DBOptions, IngestExternalFileOptions, ColumnFamil
use rocksdb::{DB, DBVector, MergeOperands, SeekKey};
use rocksdb::rocksdb::{Writable, Snapshot};
pub use crate::database::document_key::{DocumentKey, DocumentKeyAttr};
pub use crate::database::database_view::DatabaseView;
use crate::index::update::Update;
use crate::index::schema::Schema;
use crate::blob::positive::PositiveBlob;
use crate::blob::{self, Blob};
pub use self::document_key::{DocumentKey, DocumentKeyAttr};
pub use self::database_view::DatabaseView;
use self::blob::positive::PositiveBlob;
use self::update::Update;
use self::schema::Schema;
use self::blob::Blob;
pub mod blob;
pub mod schema;
pub mod update;
mod document_key;
mod database_view;
mod deserializer;
@ -163,14 +166,13 @@ fn merge_indexes(key: &[u8], existing_value: Option<&[u8]>, operands: &mut Merge
mod tests {
use super::*;
use std::error::Error;
use std::path::PathBuf;
use serde_derive::{Serialize, Deserialize};
use tempfile::tempdir;
use crate::tokenizer::DefaultBuilder;
use crate::index::update::PositiveUpdateBuilder;
use crate::index::schema::{Schema, SchemaBuilder, STORED, INDEXED};
use crate::database::update::PositiveUpdateBuilder;
use crate::database::schema::{SchemaBuilder, STORED, INDEXED};
#[test]
fn ingest_update_file() -> Result<(), Box<Error>> {

View File

@ -1,12 +1,6 @@
use std::io::{Cursor, Write};
use std::path::PathBuf;
use std::error::Error;
use byteorder::{NetworkEndian, WriteBytesExt};
use crate::index::schema::SchemaAttr;
use crate::DocumentId;
mod negative;
mod positive;

View File

@ -3,11 +3,11 @@ use std::error::Error;
use ::rocksdb::rocksdb_options;
use crate::index::update::negative::unordered_builder::UnorderedNegativeBlobBuilder;
use crate::index::update::Update;
use crate::database::{DocumentKey, DocumentKeyAttr};
use crate::blob::{Blob, NegativeBlob};
use crate::index::DATA_INDEX;
use crate::database::update::negative::unordered_builder::UnorderedNegativeBlobBuilder;
use crate::database::blob::{Blob, NegativeBlob};
use crate::database::update::Update;
use crate::database::DocumentKey;
use crate::database::DATA_INDEX;
use crate::DocumentId;
pub struct NegativeUpdateBuilder {

View File

@ -2,7 +2,7 @@ use std::collections::BTreeMap;
use std::error::Error;
use std::io::Write;
use crate::blob::positive::PositiveBlobBuilder;
use crate::database::blob::positive::PositiveBlobBuilder;
use crate::DocIndex;
pub struct UnorderedPositiveBlobBuilder<W, X> {

View File

@ -6,15 +6,15 @@ use std::fmt;
use ::rocksdb::rocksdb_options;
use serde::ser::{self, Serialize};
use crate::index::update::positive::unordered_builder::UnorderedPositiveBlobBuilder;
use crate::index::schema::{SchemaProps, Schema, SchemaAttr};
use crate::index::update::Update;
use crate::database::{DocumentKey, DocumentKeyAttr};
use crate::blob::positive::PositiveBlob;
use crate::database::update::positive::unordered_builder::UnorderedPositiveBlobBuilder;
use crate::database::blob::positive::PositiveBlob;
use crate::database::schema::{Schema, SchemaAttr};
use crate::tokenizer::TokenizerBuilder;
use crate::database::DocumentKeyAttr;
use crate::database::update::Update;
use crate::{DocumentId, DocIndex};
use crate::index::DATA_INDEX;
use crate::blob::Blob;
use crate::database::DATA_INDEX;
use crate::database::blob::Blob;
pub enum NewState {
Updated { value: Vec<u8> },

View File

@ -1,128 +0,0 @@
pub mod schema;
pub mod update;
use std::error::Error;
use std::path::Path;
use ::rocksdb::rocksdb::Writable;
use ::rocksdb::{rocksdb, rocksdb_options};
use ::rocksdb::merge_operator::MergeOperands;
use crate::rank::Document;
use crate::index::schema::Schema;
use crate::index::update::Update;
use crate::rank::QueryBuilder;
use crate::blob::{self, Blob};
const DATA_INDEX: &[u8] = b"data-index";
const DATA_SCHEMA: &[u8] = b"data-schema";
fn merge_indexes(key: &[u8], existing_value: Option<&[u8]>, operands: &mut MergeOperands) -> Vec<u8> {
if key != DATA_INDEX { panic!("The merge operator only supports \"data-index\" merging") }
let capacity = {
let remaining = operands.size_hint().0;
let already_exist = usize::from(existing_value.is_some());
remaining + already_exist
};
let mut op = blob::OpBuilder::with_capacity(capacity);
if let Some(existing_value) = existing_value {
let blob = bincode::deserialize(existing_value).expect("BUG: could not deserialize data-index");
op.push(Blob::Positive(blob));
}
for bytes in operands {
let blob = bincode::deserialize(bytes).expect("BUG: could not deserialize blob");
op.push(blob);
}
let blob = op.merge().expect("BUG: could no merge blobs");
bincode::serialize(&blob).expect("BUG: could not serialize merged blob")
}
pub struct Index {
database: rocksdb::DB,
}
impl Index {
pub fn create<P: AsRef<Path>>(path: P, schema: Schema) -> Result<Index, Box<Error>> {
// Self::open must not take a parameter for create_if_missing
// or we must create an OpenOptions with many parameters
// https://doc.rust-lang.org/std/fs/struct.OpenOptions.html
let path = path.as_ref();
if path.exists() {
return Err(format!("File already exists at path: {}, cannot create database.",
path.display()).into())
}
let path = path.to_string_lossy();
let mut opts = rocksdb_options::DBOptions::new();
opts.create_if_missing(true);
let mut cf_opts = rocksdb_options::ColumnFamilyOptions::new();
cf_opts.add_merge_operator("data-index merge operator", merge_indexes);
let database = rocksdb::DB::open_cf(opts, &path, vec![("default", cf_opts)])?;
let mut schema_bytes = Vec::new();
schema.write_to(&mut schema_bytes)?;
database.put(DATA_SCHEMA, &schema_bytes)?;
Ok(Self { database })
}
pub fn open<P: AsRef<Path>>(path: P) -> Result<Index, Box<Error>> {
let path = path.as_ref().to_string_lossy();
let mut opts = rocksdb_options::DBOptions::new();
opts.create_if_missing(false);
let mut cf_opts = rocksdb_options::ColumnFamilyOptions::new();
cf_opts.add_merge_operator("data-index merge operator", merge_indexes);
let database = rocksdb::DB::open_cf(opts, &path, vec![("default", cf_opts)])?;
// compacting to avoid calling the merge operator
database.compact_range(Some(DATA_INDEX), Some(DATA_INDEX));
let _schema = match database.get(DATA_SCHEMA)? {
Some(value) => Schema::read_from(&*value)?,
None => return Err(String::from("Database does not contain a schema").into()),
};
Ok(Self { database })
}
pub fn ingest_update(&self, update: Update) -> Result<(), Box<Error>> {
let path = update.into_path_buf();
let path = path.to_string_lossy();
let mut options = rocksdb_options::IngestExternalFileOptions::new();
// options.move_files(true);
let cf_handle = self.database.cf_handle("default").unwrap();
self.database.ingest_external_file_optimized(&cf_handle, &options, &[&path])?;
// compacting to avoid calling the merge operator
self.database.compact_range(Some(DATA_INDEX), Some(DATA_INDEX));
Ok(())
}
pub fn schema(&self) -> Result<Schema, Box<Error>> {
let bytes = self.database.get(DATA_SCHEMA)?.expect("data-schema entry not found");
Ok(Schema::read_from(&*bytes).expect("Invalid schema"))
}
pub fn search(&self, query: &str) -> Result<Vec<Document>, Box<Error>> {
// this snapshot will allow consistent reads for the whole search operation
let snapshot = self.database.snapshot();
let builder = QueryBuilder::new(snapshot)?;
let documents = builder.query(query, 20);
Ok(documents)
}
}

Binary file not shown.

View File

@ -1,8 +1,6 @@
pub mod automaton;
pub mod blob;
pub mod database;
pub mod data;
pub mod index;
pub mod rank;
pub mod tokenizer;
pub mod vec_read_only;

View File

@ -12,7 +12,7 @@ use crate::automaton::{self, DfaExt, AutomatonExt};
use crate::rank::criterion::{self, Criterion};
use crate::rank::distinct_map::DistinctMap;
use crate::database::retrieve_data_index;
use crate::blob::PositiveBlob;
use crate::database::blob::PositiveBlob;
use crate::{Match, DocumentId};
use crate::rank::Document;