Merge pull request #81 from meilisearch/smart-workspace

Change the project to become a workspace
This commit is contained in:
Clément Renault
2021-02-14 19:02:00 +01:00
committed by GitHub
58 changed files with 1250 additions and 2744 deletions

3
.gitignore vendored
View File

@@ -2,9 +2,6 @@
/target
/Cargo.lock
# the sub target folder
http-ui/target
# datasets
*.csv
*.mmdb

1210
Cargo.lock generated

File diff suppressed because it is too large Load Diff

View File

@@ -1,70 +1,6 @@
[package]
name = "milli"
version = "0.1.0"
authors = ["Kerollmops <clement@meilisearch.com>"]
edition = "2018"
[dependencies]
anyhow = "1.0.28"
bstr = "0.2.13"
byte-unit = { version = "4.0.9", default-features = false, features = ["std"] }
byteorder = "1.3.4"
crossbeam-channel = "0.5.0"
csv = "1.1.3"
either = "1.6.1"
flate2 = "1.0.17"
fst = "0.4.5"
fxhash = "0.2.1"
grenad = { git = "https://github.com/Kerollmops/grenad.git", rev = "3adcb26" }
heed = { version = "0.10.5", default-features = false, features = ["lmdb", "sync-read-txn"] }
human_format = "1.0.3"
jemallocator = "0.3.2"
levenshtein_automata = { version = "0.2.0", features = ["fst_automaton"] }
linked-hash-map = "0.5.3"
meilisearch-tokenizer = { git = "https://github.com/meilisearch/Tokenizer.git", branch = "main" }
memmap = "0.7.0"
near-proximity = { git = "https://github.com/Kerollmops/plane-sweep-proximity", rev = "6608205" }
num-traits = "0.2.14"
obkv = "0.1.0"
once_cell = "1.4.0"
ordered-float = "2.0.0"
rayon = "1.3.1"
regex = "1.4.2"
ringtail = "0.3.0"
roaring = "0.6.4"
serde = { version = "1.0", features = ["derive"] }
serde_json = { version = "1.0.59", features = ["preserve_order"] }
slice-group-by = "0.2.6"
smallstr = { version = "0.2.0", features = ["serde"] }
smallvec = "1.4.0"
structopt = { version = "0.3.14", default-features = false, features = ["wrap_help"] }
tempfile = "3.1.0"
uuid = { version = "0.8.1", features = ["v4"] }
# facet filter parser
pest = { git = "https://github.com/pest-parser/pest.git", rev = "51fd1d49f1041f7839975664ef71fe15c7dcaf67" }
pest_derive = "2.1.0"
# documents words self-join
itertools = "0.9.0"
# logging
log = "0.4.11"
stderrlog = "0.5.0"
[dev-dependencies]
criterion = "0.3.3"
maplit = "1.0.2"
[build-dependencies]
fst = "0.4.5"
[features]
default = []
[[bench]]
name = "search"
harness = false
[workspace]
members = ["milli", "http-ui", "infos", "search"]
default-members = ["milli"]
[profile.release]
debug = true

2530
http-ui/Cargo.lock generated

File diff suppressed because it is too large Load Diff

View File

@@ -12,7 +12,7 @@ grenad = { git = "https://github.com/Kerollmops/grenad.git", rev = "3adcb26" }
heed = "0.10.5"
meilisearch-tokenizer = { git = "https://github.com/meilisearch/Tokenizer.git", branch = "main" }
memmap = "0.7.0"
milli = { path = ".." }
milli = { path = "../milli" }
once_cell = "1.4.1"
rayon = "1.5.0"
structopt = { version = "0.3.14", default-features = false, features = ["wrap_help"] }
@@ -34,3 +34,6 @@ warp = "0.2.2"
log = "0.4.11"
stderrlog = "0.5.0"
fst = "0.4.5"
# Temporary fix for bitvec, remove once fixed. (https://github.com/bitvecto-rs/bitvec/issues/105)
funty = "=1.1.0"

17
infos/Cargo.toml Normal file
View File

@@ -0,0 +1,17 @@
[package]
name = "infos"
version = "0.1.0"
authors = ["Clément Renault <clement@meilisearch.com>"]
edition = "2018"
[dependencies]
anyhow = "1.0.28"
byte-unit = { version = "4.0.9", default-features = false, features = ["std"] }
csv = "1.1.3"
heed = "0.10.5"
jemallocator = "0.3.2"
milli = { path = "../milli" }
roaring = "0.6.4"
serde_json = "1.0.59"
stderrlog = "0.5.0"
structopt = { version = "0.3.14", default-features = false }

View File

@@ -4,12 +4,16 @@ use std::{str, io, fmt};
use anyhow::Context;
use byte_unit::Byte;
use crate::Index;
use heed::EnvOpenOptions;
use milli::Index;
use structopt::StructOpt;
use Command::*;
#[cfg(target_os = "linux")]
#[global_allocator]
static ALLOC: jemallocator::Jemalloc = jemallocator::Jemalloc;
const MAIN_DB_NAME: &str = "main";
const WORD_DOCIDS_DB_NAME: &str = "word-docids";
const DOCID_WORD_POSITIONS_DB_NAME: &str = "docid-word-positions";
@@ -153,7 +157,18 @@ enum Command {
PatchToNewExternalIds,
}
pub fn run(opt: Opt) -> anyhow::Result<()> {
fn main() -> Result<(), ()> {
let opt = Opt::from_args();
match run(opt) {
Ok(()) => Ok(()),
Err(e) => {
eprintln!("{}", e);
Err(())
},
}
}
fn run(opt: Opt) -> anyhow::Result<()> {
stderrlog::new()
.verbosity(opt.verbose)
.show_level(false)
@@ -204,7 +219,7 @@ fn patch_to_new_external_ids(index: &Index, wtxn: &mut heed::RwTxn) -> anyhow::R
let documents_ids = documents_ids.to_owned();
index.main.put::<_, ByteSlice, ByteSlice>(
wtxn,
crate::index::HARD_EXTERNAL_DOCUMENTS_IDS_KEY.as_bytes(),
milli::index::HARD_EXTERNAL_DOCUMENTS_IDS_KEY.as_bytes(),
&documents_ids,
)?;
index.main.delete::<_, ByteSlice>(wtxn, USERS_IDS_DOCUMENTS_IDS)?;
@@ -242,7 +257,7 @@ fn facet_values_iter<'txn, DC: 'txn, T>(
rtxn: &'txn heed::RoTxn,
db: heed::Database<heed::types::ByteSlice, DC>,
field_id: u8,
facet_type: crate::facet::FacetType,
facet_type: milli::facet::FacetType,
string_fn: impl Fn(&str) -> T + 'txn,
float_fn: impl Fn(u8, f64, f64) -> T + 'txn,
integer_fn: impl Fn(u8, i64, i64) -> T + 'txn,
@@ -250,8 +265,8 @@ fn facet_values_iter<'txn, DC: 'txn, T>(
where
DC: heed::BytesDecode<'txn>,
{
use crate::facet::FacetType;
use crate::heed_codec::facet::{
use milli::facet::FacetType;
use milli::heed_codec::facet::{
FacetValueStringCodec, FacetLevelValueF64Codec, FacetLevelValueI64Codec,
};
@@ -504,7 +519,7 @@ fn export_words_fst(index: &Index, rtxn: &heed::RoTxn) -> anyhow::Result<()> {
fn export_documents(index: &Index, rtxn: &heed::RoTxn) -> anyhow::Result<()> {
use std::io::{BufWriter, Write as _};
use crate::obkv_to_json;
use milli::obkv_to_json;
let stdout = io::stdout();
let mut out = BufWriter::new(stdout);
@@ -548,7 +563,7 @@ fn total_docid_word_positions_size(index: &Index, rtxn: &heed::RoTxn) -> anyhow:
fn average_number_of_words_by_doc(index: &Index, rtxn: &heed::RoTxn) -> anyhow::Result<()> {
use heed::types::DecodeIgnore;
use crate::{DocumentId, BEU32StrCodec};
use milli::{DocumentId, BEU32StrCodec};
let mut words_counts = Vec::new();
let mut count = 0;
@@ -587,7 +602,7 @@ fn average_number_of_words_by_doc(index: &Index, rtxn: &heed::RoTxn) -> anyhow::
fn average_number_of_positions_by_word(index: &Index, rtxn: &heed::RoTxn) -> anyhow::Result<()> {
use heed::types::DecodeIgnore;
use crate::BoRoaringBitmapCodec;
use milli::BoRoaringBitmapCodec;
let mut values_length = Vec::new();
let mut count = 0;
@@ -639,7 +654,7 @@ fn database_stats(index: &Index, rtxn: &heed::RoTxn, name: &str) -> anyhow::Resu
use heed::types::ByteSlice;
use heed::{Error, BytesDecode};
use roaring::RoaringBitmap;
use crate::{BoRoaringBitmapCodec, CboRoaringBitmapCodec, RoaringBitmapCodec};
use milli::{BoRoaringBitmapCodec, CboRoaringBitmapCodec, RoaringBitmapCodec};
fn compute_stats<'a, DC: BytesDecode<'a, DItem = RoaringBitmap>>(
db: heed::PolyDatabase,
@@ -720,7 +735,7 @@ fn word_pair_proximities_docids(
) -> anyhow::Result<()>
{
use heed::types::ByteSlice;
use crate::RoaringBitmapCodec;
use milli::RoaringBitmapCodec;
let stdout = io::stdout();
let mut wtr = csv::Writer::from_writer(stdout.lock());

60
milli/Cargo.toml Normal file
View File

@@ -0,0 +1,60 @@
[package]
name = "milli"
version = "0.1.0"
authors = ["Kerollmops <clement@meilisearch.com>"]
edition = "2018"
[dependencies]
anyhow = "1.0.28"
bstr = "0.2.13"
byteorder = "1.3.4"
crossbeam-channel = "0.5.0"
csv = "1.1.3"
either = "1.6.1"
flate2 = "1.0.17"
fst = "0.4.5"
fxhash = "0.2.1"
grenad = { git = "https://github.com/Kerollmops/grenad.git", rev = "3adcb26" }
heed = { version = "0.10.5", default-features = false, features = ["lmdb", "sync-read-txn"] }
human_format = "1.0.3"
levenshtein_automata = { version = "0.2.0", features = ["fst_automaton"] }
linked-hash-map = "0.5.3"
meilisearch-tokenizer = { git = "https://github.com/meilisearch/Tokenizer.git", branch = "main" }
memmap = "0.7.0"
num-traits = "0.2.14"
obkv = "0.1.0"
once_cell = "1.4.0"
ordered-float = "2.0.0"
rayon = "1.3.1"
regex = "1.4.2"
roaring = "0.6.4"
serde = { version = "1.0", features = ["derive"] }
serde_json = { version = "1.0.59", features = ["preserve_order"] }
smallstr = { version = "0.2.0", features = ["serde"] }
smallvec = "1.4.0"
tempfile = "3.1.0"
uuid = { version = "0.8.1", features = ["v4"] }
# facet filter parser
pest = { git = "https://github.com/pest-parser/pest.git", rev = "51fd1d49f1041f7839975664ef71fe15c7dcaf67" }
pest_derive = "2.1.0"
# documents words self-join
itertools = "0.9.0"
# logging
log = "0.4.11"
[dev-dependencies]
criterion = "0.3.3"
maplit = "1.0.2"
[build-dependencies]
fst = "0.4.5"
[features]
default = []
[[bench]]
name = "search"
harness = false

View File

@@ -3,15 +3,14 @@
mod criterion;
mod external_documents_ids;
mod fields_ids_map;
mod index;
mod mdfs;
mod query_tokens;
mod search;
mod update_store;
pub mod facet;
pub mod heed_codec;
pub mod index;
pub mod proximity;
pub mod subcommand;
pub mod update;
use std::borrow::Cow;

16
search/Cargo.toml Normal file
View File

@@ -0,0 +1,16 @@
[package]
name = "search"
version = "0.1.0"
authors = ["Clément Renault <clement@meilisearch.com>"]
edition = "2018"
[dependencies]
anyhow = "1.0.28"
byte-unit = { version = "4.0.9", default-features = false, features = ["std"] }
heed = "0.10.5"
jemallocator = "0.3.2"
log = "0.4.11"
milli = { path = "../milli" }
serde_json = "1.0.59"
stderrlog = "0.5.0"
structopt = { version = "0.3.14", default-features = false }

View File

@@ -8,7 +8,11 @@ use heed::EnvOpenOptions;
use log::debug;
use structopt::StructOpt;
use crate::{Index, obkv_to_json};
use milli::{Index, obkv_to_json};
#[cfg(target_os = "linux")]
#[global_allocator]
static ALLOC: jemallocator::Jemalloc = jemallocator::Jemalloc;
#[derive(Debug, StructOpt)]
/// A simple search helper binary for the milli project.
@@ -35,7 +39,18 @@ pub struct Opt {
print_facet_distribution: bool,
}
pub fn run(opt: Opt) -> anyhow::Result<()> {
fn main() -> Result<(), ()> {
let opt = Opt::from_args();
match run(opt) {
Ok(()) => Ok(()),
Err(e) => {
eprintln!("{}", e);
Err(())
},
}
}
fn run(opt: Opt) -> anyhow::Result<()> {
stderrlog::new()
.verbosity(opt.verbose)
.show_level(false)

View File

@@ -1,22 +0,0 @@
use structopt::StructOpt;
use milli::subcommand::infos::{self, Opt as InfosOpt};
use milli::subcommand::search::{self, Opt as SearchOpt};
#[cfg(target_os = "linux")]
#[global_allocator]
static ALLOC: jemallocator::Jemalloc = jemallocator::Jemalloc;
#[derive(Debug, StructOpt)]
#[structopt(name = "milli", about = "The milli project.")]
enum Command {
Infos(InfosOpt),
Search(SearchOpt),
}
fn main() -> anyhow::Result<()> {
match Command::from_args() {
Command::Infos(opt) => infos::run(opt),
Command::Search(opt) => search::run(opt),
}
}

View File

@@ -1,2 +0,0 @@
pub mod infos;
pub mod search;