mirror of
				https://github.com/meilisearch/meilisearch.git
				synced 2025-10-25 21:16:28 +00:00 
			
		
		
		
	Move the binaries into one with subcommands
This commit is contained in:
		| @@ -5,6 +5,7 @@ mod search; | ||||
| mod update_store; | ||||
| pub mod heed_codec; | ||||
| pub mod proximity; | ||||
| pub mod subcommand; | ||||
| pub mod tokenizer; | ||||
|  | ||||
| use std::collections::HashMap; | ||||
|   | ||||
							
								
								
									
										28
									
								
								src/main.rs
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										28
									
								
								src/main.rs
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,28 @@ | ||||
| use structopt::StructOpt; | ||||
|  | ||||
| use milli::subcommand::indexer::{self, Opt as IndexerOpt}; | ||||
| use milli::subcommand::infos::{self, Opt as InfosOpt}; | ||||
| use milli::subcommand::serve::{self, Opt as ServeOpt}; | ||||
| use milli::subcommand::search::{self, Opt as SearchOpt}; | ||||
|  | ||||
| #[cfg(target_os = "linux")] | ||||
| #[global_allocator] | ||||
| static ALLOC: jemallocator::Jemalloc = jemallocator::Jemalloc; | ||||
|  | ||||
| #[derive(Debug, StructOpt)] | ||||
| #[structopt(name = "milli", about = "The milli project.")] | ||||
| enum Command { | ||||
|     Serve(ServeOpt), | ||||
|     Indexer(IndexerOpt), | ||||
|     Infos(InfosOpt), | ||||
|     Search(SearchOpt), | ||||
| } | ||||
|  | ||||
| fn main() -> anyhow::Result<()> { | ||||
|     match Command::from_args() { | ||||
|         Command::Serve(opt) => serve::run(opt), | ||||
|         Command::Indexer(opt) => indexer::run(opt), | ||||
|         Command::Infos(opt) => infos::run(opt), | ||||
|         Command::Search(opt) => search::run(opt), | ||||
|     } | ||||
| } | ||||
| @@ -22,9 +22,9 @@ use roaring::RoaringBitmap; | ||||
| use structopt::StructOpt; | ||||
| use tempfile::tempfile; | ||||
| 
 | ||||
| use milli::heed_codec::{CsvStringRecordCodec, BoRoaringBitmapCodec, CboRoaringBitmapCodec}; | ||||
| use milli::tokenizer::{simple_tokenizer, only_token}; | ||||
| use milli::{SmallVec32, Index, Position, DocumentId}; | ||||
| use crate::heed_codec::{CsvStringRecordCodec, BoRoaringBitmapCodec, CboRoaringBitmapCodec}; | ||||
| use crate::tokenizer::{simple_tokenizer, only_token}; | ||||
| use crate::{SmallVec32, Index, Position, DocumentId}; | ||||
| 
 | ||||
| const LMDB_MAX_KEY_LENGTH: usize = 511; | ||||
| const ONE_KILOBYTE: usize = 1024 * 1024; | ||||
| @@ -32,18 +32,14 @@ const ONE_KILOBYTE: usize = 1024 * 1024; | ||||
| const MAX_POSITION: usize = 1000; | ||||
| const MAX_ATTRIBUTES: usize = u32::max_value() as usize / MAX_POSITION; | ||||
| 
 | ||||
| const WORDS_FST_KEY: &[u8] = milli::WORDS_FST_KEY.as_bytes(); | ||||
| const HEADERS_KEY: &[u8] = milli::HEADERS_KEY.as_bytes(); | ||||
| const DOCUMENTS_IDS_KEY: &[u8] = milli::DOCUMENTS_IDS_KEY.as_bytes(); | ||||
| 
 | ||||
| #[cfg(target_os = "linux")] | ||||
| #[global_allocator] | ||||
| static ALLOC: jemallocator::Jemalloc = jemallocator::Jemalloc; | ||||
| const WORDS_FST_KEY: &[u8] = crate::WORDS_FST_KEY.as_bytes(); | ||||
| const HEADERS_KEY: &[u8] = crate::HEADERS_KEY.as_bytes(); | ||||
| const DOCUMENTS_IDS_KEY: &[u8] = crate::DOCUMENTS_IDS_KEY.as_bytes(); | ||||
| 
 | ||||
| #[derive(Debug, StructOpt)] | ||||
| #[structopt(name = "milli-indexer")] | ||||
| /// The indexer binary of the milli project.
 | ||||
| struct Opt { | ||||
| pub struct Opt { | ||||
|     /// The database path where the database is located.
 | ||||
|     /// It is created if it doesn't already exist.
 | ||||
|     #[structopt(long = "db", parse(from_os_str))] | ||||
| @@ -191,7 +187,7 @@ fn compute_words_pair_proximities( | ||||
|     for ((w1, ps1), (w2, ps2)) in word_positions.iter().cartesian_product(word_positions) { | ||||
|         let mut min_prox = None; | ||||
|         for (ps1, ps2) in ps1.iter().cartesian_product(ps2) { | ||||
|             let prox = milli::proximity::positions_proximity(*ps1, *ps2); | ||||
|             let prox = crate::proximity::positions_proximity(*ps1, *ps2); | ||||
|             let prox = u8::try_from(prox).unwrap(); | ||||
|             // We don't care about a word that appear at the
 | ||||
|             // same position or too far from the other.
 | ||||
| @@ -736,9 +732,7 @@ fn csv_readers( | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| fn main() -> anyhow::Result<()> { | ||||
|     let opt = Opt::from_args(); | ||||
| 
 | ||||
| pub fn run(opt: Opt) -> anyhow::Result<()> { | ||||
|     stderrlog::new() | ||||
|         .verbosity(opt.verbose) | ||||
|         .show_level(false) | ||||
| @@ -2,16 +2,12 @@ use std::path::PathBuf; | ||||
| use std::{str, io}; | ||||
| 
 | ||||
| use anyhow::Context; | ||||
| use crate::Index; | ||||
| use heed::EnvOpenOptions; | ||||
| use milli::Index; | ||||
| use structopt::StructOpt; | ||||
| 
 | ||||
| use Command::*; | ||||
| 
 | ||||
| #[cfg(target_os = "linux")] | ||||
| #[global_allocator] | ||||
| static ALLOC: jemallocator::Jemalloc = jemallocator::Jemalloc; | ||||
| 
 | ||||
| const MAIN_DB_NAME: &str = "main"; | ||||
| const WORD_DOCIDS_DB_NAME: &str = "word-docids"; | ||||
| const DOCID_WORD_POSITIONS_DB_NAME: &str = "docid-word-positions"; | ||||
| @@ -33,8 +29,8 @@ const POSTINGS_DATABASE_NAMES: &[&str] = &[ | ||||
| ]; | ||||
| 
 | ||||
| #[derive(Debug, StructOpt)] | ||||
| #[structopt(name = "milli-info", about = "A stats crawler for milli.")] | ||||
| struct Opt { | ||||
| /// A stats fetcher for milli.
 | ||||
| pub struct Opt { | ||||
|     /// The database path where the database is located.
 | ||||
|     /// It is created if it doesn't already exist.
 | ||||
|     #[structopt(long = "db", parse(from_os_str))] | ||||
| @@ -133,8 +129,11 @@ enum Command { | ||||
|     }, | ||||
| } | ||||
| 
 | ||||
| fn main() -> anyhow::Result<()> { | ||||
|     let opt = Opt::from_args(); | ||||
| pub fn run(opt: Opt) -> anyhow::Result<()> { | ||||
|     let env = EnvOpenOptions::new() | ||||
|         .map_size(opt.database_size) | ||||
|         .max_dbs(10) | ||||
|         .open(&opt.database)?; | ||||
| 
 | ||||
|     stderrlog::new() | ||||
|         .verbosity(opt.verbose) | ||||
| @@ -142,11 +141,6 @@ fn main() -> anyhow::Result<()> { | ||||
|         .timestamp(stderrlog::Timestamp::Off) | ||||
|         .init()?; | ||||
| 
 | ||||
|     let env = EnvOpenOptions::new() | ||||
|         .map_size(opt.database_size) | ||||
|         .max_dbs(10) | ||||
|         .open(&opt.database)?; | ||||
| 
 | ||||
|     // Open the LMDB database.
 | ||||
|     let index = Index::new(&env)?; | ||||
|     let rtxn = env.read_txn()?; | ||||
| @@ -196,7 +190,7 @@ fn biggest_value_sizes(index: &Index, rtxn: &heed::RoTxn, limit: usize) -> anyho | ||||
|     use std::cmp::Reverse; | ||||
|     use std::collections::BinaryHeap; | ||||
|     use heed::types::{Str, ByteSlice}; | ||||
|     use milli::heed_codec::BEU32StrCodec; | ||||
|     use crate::heed_codec::BEU32StrCodec; | ||||
| 
 | ||||
|     let main_name = "main"; | ||||
|     let word_docids_name = "word_docids"; | ||||
| @@ -306,7 +300,7 @@ fn total_docid_word_positions_size(index: &Index, rtxn: &heed::RoTxn) -> anyhow: | ||||
| 
 | ||||
| fn average_number_of_words_by_doc(index: &Index, rtxn: &heed::RoTxn) -> anyhow::Result<()> { | ||||
|     use heed::types::DecodeIgnore; | ||||
|     use milli::{DocumentId, BEU32StrCodec}; | ||||
|     use crate::{DocumentId, BEU32StrCodec}; | ||||
| 
 | ||||
|     let mut words_counts = Vec::new(); | ||||
|     let mut count = 0; | ||||
| @@ -345,7 +339,7 @@ fn average_number_of_words_by_doc(index: &Index, rtxn: &heed::RoTxn) -> anyhow:: | ||||
| 
 | ||||
| fn average_number_of_positions_by_word(index: &Index, rtxn: &heed::RoTxn) -> anyhow::Result<()> { | ||||
|     use heed::types::DecodeIgnore; | ||||
|     use milli::BoRoaringBitmapCodec; | ||||
|     use crate::BoRoaringBitmapCodec; | ||||
| 
 | ||||
|     let mut values_length = Vec::new(); | ||||
|     let mut count = 0; | ||||
| @@ -397,7 +391,7 @@ fn database_stats(index: &Index, rtxn: &heed::RoTxn, name: &str) -> anyhow::Resu | ||||
|     use heed::types::ByteSlice; | ||||
|     use heed::{Error, BytesDecode}; | ||||
|     use roaring::RoaringBitmap; | ||||
|     use milli::{BoRoaringBitmapCodec, CboRoaringBitmapCodec, RoaringBitmapCodec}; | ||||
|     use crate::{BoRoaringBitmapCodec, CboRoaringBitmapCodec, RoaringBitmapCodec}; | ||||
| 
 | ||||
|     fn compute_stats<'a, DC: BytesDecode<'a, DItem = RoaringBitmap>>( | ||||
|         db: heed::PolyDatabase, | ||||
| @@ -478,7 +472,7 @@ fn word_pair_proximities_docids( | ||||
| ) -> anyhow::Result<()> | ||||
| { | ||||
|     use heed::types::ByteSlice; | ||||
|     use milli::RoaringBitmapCodec; | ||||
|     use crate::RoaringBitmapCodec; | ||||
| 
 | ||||
|     let stdout = io::stdout(); | ||||
|     let mut wtr = csv::Writer::from_writer(stdout.lock()); | ||||
							
								
								
									
										4
									
								
								src/subcommand/mod.rs
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										4
									
								
								src/subcommand/mod.rs
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,4 @@ | ||||
| pub mod indexer; | ||||
| pub mod infos; | ||||
| pub mod search; | ||||
| pub mod serve; | ||||
| @@ -5,16 +5,13 @@ use std::time::Instant; | ||||
| 
 | ||||
| use heed::EnvOpenOptions; | ||||
| use log::debug; | ||||
| use milli::Index; | ||||
| use structopt::StructOpt; | ||||
| 
 | ||||
| #[cfg(target_os = "linux")] | ||||
| #[global_allocator] | ||||
| static ALLOC: jemallocator::Jemalloc = jemallocator::Jemalloc; | ||||
| use crate::Index; | ||||
| 
 | ||||
| #[derive(Debug, StructOpt)] | ||||
| #[structopt(name = "milli-search", about = "A simple search binary for milli project.")] | ||||
| struct Opt { | ||||
| /// A simple search helper binary for the milli project.
 | ||||
| pub struct Opt { | ||||
|     /// The database path where the database is located.
 | ||||
|     /// It is created if it doesn't already exist.
 | ||||
|     #[structopt(long = "db", parse(from_os_str))] | ||||
| @@ -33,9 +30,7 @@ struct Opt { | ||||
|     query: Option<String>, | ||||
| } | ||||
| 
 | ||||
| fn main() -> anyhow::Result<()> { | ||||
|     let opt = Opt::from_args(); | ||||
| 
 | ||||
| pub fn run(opt: Opt) -> anyhow::Result<()> { | ||||
|     stderrlog::new() | ||||
|         .verbosity(opt.verbose) | ||||
|         .show_level(false) | ||||
| @@ -11,16 +11,12 @@ use serde::Deserialize; | ||||
| use structopt::StructOpt; | ||||
| use warp::{Filter, http::Response}; | ||||
| 
 | ||||
| use milli::tokenizer::{simple_tokenizer, TokenType}; | ||||
| use milli::{Index, SearchResult}; | ||||
| 
 | ||||
| #[cfg(target_os = "linux")] | ||||
| #[global_allocator] | ||||
| static ALLOC: jemallocator::Jemalloc = jemallocator::Jemalloc; | ||||
| use crate::tokenizer::{simple_tokenizer, TokenType}; | ||||
| use crate::{Index, SearchResult}; | ||||
| 
 | ||||
| #[derive(Debug, StructOpt)] | ||||
| #[structopt(name = "milli", about = "The server binary of the milli project.")] | ||||
| struct Opt { | ||||
| /// The HTTP main server of the milli project.
 | ||||
| pub struct Opt { | ||||
|     /// The database path where the LMDB database is located.
 | ||||
|     /// It is created if it doesn't already exist.
 | ||||
|     #[structopt(long = "db", parse(from_os_str))] | ||||
| @@ -73,10 +69,7 @@ struct IndexTemplate { | ||||
|     docs_count: usize, | ||||
| } | ||||
| 
 | ||||
| #[tokio::main] | ||||
| async fn main() -> anyhow::Result<()> { | ||||
|     let opt = Opt::from_args(); | ||||
| 
 | ||||
| pub fn run(opt: Opt) -> anyhow::Result<()> { | ||||
|     stderrlog::new() | ||||
|         .verbosity(opt.verbose) | ||||
|         .show_level(false) | ||||
| @@ -231,8 +224,13 @@ async fn main() -> anyhow::Result<()> { | ||||
|         .or(dash_logo_black_route) | ||||
|         .or(query_route); | ||||
| 
 | ||||
|     let addr = SocketAddr::from_str(&opt.http_listen_addr).unwrap(); | ||||
|     warp::serve(routes).run(addr).await; | ||||
|     let addr = SocketAddr::from_str(&opt.http_listen_addr)?; | ||||
|     tokio::runtime::Builder::new_multi_thread() | ||||
|         .enable_all() | ||||
|         .build()? | ||||
|         .block_on(async { | ||||
|             warp::serve(routes).run(addr).await | ||||
|         }); | ||||
| 
 | ||||
|     Ok(()) | ||||
| } | ||||
		Reference in New Issue
	
	Block a user