mirror of
				https://github.com/meilisearch/meilisearch.git
				synced 2025-10-22 19:46:26 +00:00 
			
		
		
		
	Merge pull request #1588 from meilisearch/test-new-indexer
Integrate the new indexer
This commit is contained in:
		
							
								
								
									
										95
									
								
								Cargo.lock
									
									
									
										generated
									
									
									
								
							
							
						
						
									
										95
									
								
								Cargo.lock
									
									
									
										generated
									
									
									
								
							| @@ -458,6 +458,20 @@ name = "bytemuck" | ||||
| version = "1.7.0" | ||||
| source = "registry+https://github.com/rust-lang/crates.io-index" | ||||
| checksum = "9966d2ab714d0f785dbac0a0396251a35280aeb42413281617d0209ab4898435" | ||||
| dependencies = [ | ||||
|  "bytemuck_derive", | ||||
| ] | ||||
|  | ||||
| [[package]] | ||||
| name = "bytemuck_derive" | ||||
| version = "1.0.1" | ||||
| source = "registry+https://github.com/rust-lang/crates.io-index" | ||||
| checksum = "8e215f8c2f9f79cb53c8335e687ffd07d5bfcb6fe5fc80723762d0be46e7cc54" | ||||
| dependencies = [ | ||||
|  "proc-macro2 1.0.27", | ||||
|  "quote 1.0.9", | ||||
|  "syn 1.0.73", | ||||
| ] | ||||
|  | ||||
| [[package]] | ||||
| name = "byteorder" | ||||
| @@ -630,6 +644,12 @@ dependencies = [ | ||||
|  "version_check", | ||||
| ] | ||||
|  | ||||
| [[package]] | ||||
| name = "core-foundation-sys" | ||||
| version = "0.8.2" | ||||
| source = "registry+https://github.com/rust-lang/crates.io-index" | ||||
| checksum = "ea221b5284a47e40033bf9b66f35f984ec0ea2931eb03505246cd27a963f981b" | ||||
|  | ||||
| [[package]] | ||||
| name = "cow-utils" | ||||
| version = "0.1.2" | ||||
| @@ -1097,13 +1117,14 @@ checksum = "9b919933a397b79c37e33b77bb2aa3dc8eb6e165ad809e58ff75bc7db2e34574" | ||||
|  | ||||
| [[package]] | ||||
| name = "grenad" | ||||
| version = "0.1.0" | ||||
| source = "git+https://github.com/Kerollmops/grenad.git?rev=3adcb26#3adcb267dcbc590c7da10eb5f887a254865b3dbe" | ||||
| version = "0.3.0" | ||||
| source = "registry+https://github.com/rust-lang/crates.io-index" | ||||
| checksum = "7824d499230110f4e4a8d4fd3fd4dc15c1347fce5082e4bba82eef17f43e1ed8" | ||||
| dependencies = [ | ||||
|  "bytemuck", | ||||
|  "byteorder", | ||||
|  "flate2", | ||||
|  "log", | ||||
|  "nix", | ||||
|  "lz4_flex", | ||||
|  "snap", | ||||
|  "tempfile", | ||||
|  "zstd", | ||||
| @@ -1161,8 +1182,8 @@ dependencies = [ | ||||
|  | ||||
| [[package]] | ||||
| name = "heed" | ||||
| version = "0.12.0" | ||||
| source = "git+https://github.com/Kerollmops/heed?tag=v0.12.1#8e5dc6d71c8166a8d7d0db059e6e51478942b551" | ||||
| version = "0.12.1" | ||||
| source = "git+https://github.com/Kerollmops/heed?tag=v0.12.1#fc017cf3394af737f92fd71e16f0499a78b79d65" | ||||
| dependencies = [ | ||||
|  "byteorder", | ||||
|  "heed-traits", | ||||
| @@ -1180,12 +1201,12 @@ dependencies = [ | ||||
| [[package]] | ||||
| name = "heed-traits" | ||||
| version = "0.7.0" | ||||
| source = "git+https://github.com/Kerollmops/heed?tag=v0.12.1#8e5dc6d71c8166a8d7d0db059e6e51478942b551" | ||||
| source = "git+https://github.com/Kerollmops/heed?tag=v0.12.1#fc017cf3394af737f92fd71e16f0499a78b79d65" | ||||
|  | ||||
| [[package]] | ||||
| name = "heed-types" | ||||
| version = "0.7.2" | ||||
| source = "git+https://github.com/Kerollmops/heed?tag=v0.12.1#8e5dc6d71c8166a8d7d0db059e6e51478942b551" | ||||
| source = "git+https://github.com/Kerollmops/heed?tag=v0.12.1#fc017cf3394af737f92fd71e16f0499a78b79d65" | ||||
| dependencies = [ | ||||
|  "bincode", | ||||
|  "heed-traits", | ||||
| @@ -1534,6 +1555,15 @@ dependencies = [ | ||||
|  "syn 0.15.44", | ||||
| ] | ||||
|  | ||||
| [[package]] | ||||
| name = "lz4_flex" | ||||
| version = "0.8.2" | ||||
| source = "registry+https://github.com/rust-lang/crates.io-index" | ||||
| checksum = "5827b976d911b5d2e42b2ccfc7c0d2461a1414e8280436885218762fc529b3f8" | ||||
| dependencies = [ | ||||
|  "twox-hash", | ||||
| ] | ||||
|  | ||||
| [[package]] | ||||
| name = "main_error" | ||||
| version = "0.1.1" | ||||
| @@ -1619,6 +1649,7 @@ dependencies = [ | ||||
|  "siphasher", | ||||
|  "slice-group-by", | ||||
|  "structopt", | ||||
|  "sysinfo", | ||||
|  "tar", | ||||
|  "tempdir", | ||||
|  "tempfile", | ||||
| @@ -1675,13 +1706,14 @@ dependencies = [ | ||||
|  | ||||
| [[package]] | ||||
| name = "milli" | ||||
| version = "0.11.0" | ||||
| source = "git+https://github.com/meilisearch/milli.git?tag=v0.11.0#c51bb6789cb3fbb6511138374b3443f9116a445c" | ||||
| version = "0.12.0" | ||||
| source = "git+https://github.com/meilisearch/milli.git?tag=v0.12.0#5cbe8793251bbf143434c8a4c4e7195ca6c5f2ac" | ||||
| dependencies = [ | ||||
|  "bstr", | ||||
|  "byteorder", | ||||
|  "chrono", | ||||
|  "concat-arrays", | ||||
|  "crossbeam-channel", | ||||
|  "csv", | ||||
|  "either", | ||||
|  "flate2", | ||||
| @@ -1788,18 +1820,6 @@ dependencies = [ | ||||
|  "syn 1.0.73", | ||||
| ] | ||||
|  | ||||
| [[package]] | ||||
| name = "nix" | ||||
| version = "0.19.1" | ||||
| source = "registry+https://github.com/rust-lang/crates.io-index" | ||||
| checksum = "b2ccba0cfe4fdf15982d1674c69b1fd80bad427d293849982668dfe454bd61f2" | ||||
| dependencies = [ | ||||
|  "bitflags", | ||||
|  "cc", | ||||
|  "cfg-if 1.0.0", | ||||
|  "libc", | ||||
| ] | ||||
|  | ||||
| [[package]] | ||||
| name = "normalize-line-endings" | ||||
| version = "0.3.0" | ||||
| @@ -2690,6 +2710,12 @@ dependencies = [ | ||||
|  "version_check", | ||||
| ] | ||||
|  | ||||
| [[package]] | ||||
| name = "static_assertions" | ||||
| version = "1.1.0" | ||||
| source = "registry+https://github.com/rust-lang/crates.io-index" | ||||
| checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f" | ||||
|  | ||||
| [[package]] | ||||
| name = "stdweb" | ||||
| version = "0.4.20" | ||||
| @@ -2812,6 +2838,21 @@ dependencies = [ | ||||
|  "unicode-xid 0.2.2", | ||||
| ] | ||||
|  | ||||
| [[package]] | ||||
| name = "sysinfo" | ||||
| version = "0.20.0" | ||||
| source = "registry+https://github.com/rust-lang/crates.io-index" | ||||
| checksum = "0af066e6272f2175c1783cfc2ebf3e2d8dfe2c182b00677fdeccbf8291af83fb" | ||||
| dependencies = [ | ||||
|  "cfg-if 1.0.0", | ||||
|  "core-foundation-sys", | ||||
|  "libc", | ||||
|  "ntapi", | ||||
|  "once_cell", | ||||
|  "rayon", | ||||
|  "winapi", | ||||
| ] | ||||
|  | ||||
| [[package]] | ||||
| name = "tar" | ||||
| version = "0.4.35" | ||||
| @@ -3052,6 +3093,16 @@ version = "0.2.3" | ||||
| source = "registry+https://github.com/rust-lang/crates.io-index" | ||||
| checksum = "59547bce71d9c38b83d9c0e92b6066c4253371f15005def0c30d9657f50c7642" | ||||
|  | ||||
| [[package]] | ||||
| name = "twox-hash" | ||||
| version = "1.6.1" | ||||
| source = "registry+https://github.com/rust-lang/crates.io-index" | ||||
| checksum = "1f559b464de2e2bdabcac6a210d12e9b5a5973c251e102c44c585c71d51bd78e" | ||||
| dependencies = [ | ||||
|  "cfg-if 1.0.0", | ||||
|  "static_assertions", | ||||
| ] | ||||
|  | ||||
| [[package]] | ||||
| name = "typenum" | ||||
| version = "1.13.0" | ||||
|   | ||||
| @@ -50,7 +50,7 @@ main_error = "0.1.0" | ||||
| meilisearch-error = { path = "../meilisearch-error" } | ||||
| meilisearch-tokenizer = { git = "https://github.com/meilisearch/tokenizer.git", tag = "v0.2.5" } | ||||
| memmap = "0.7.0" | ||||
| milli = { git = "https://github.com/meilisearch/milli.git", tag = "v0.11.0" } | ||||
| milli = { git = "https://github.com/meilisearch/milli.git", tag = "v0.12.0" } | ||||
| mime = "0.3.16" | ||||
| num_cpus = "1.13.0" | ||||
| once_cell = "1.5.2" | ||||
| @@ -76,6 +76,7 @@ pin-project = "1.0.7" | ||||
| whoami = { version = "1.1.2", optional = true } | ||||
| reqwest = { version = "0.11.3", features = ["json", "rustls-tls"], default-features = false, optional = true } | ||||
| serdeval = "0.1.0" | ||||
| sysinfo = "0.20.0" | ||||
|  | ||||
| [dev-dependencies] | ||||
| actix-rt = "2.1.0" | ||||
|   | ||||
| @@ -14,10 +14,8 @@ pub struct UpdateHandler { | ||||
|     chunk_compression_level: Option<u32>, | ||||
|     thread_pool: ThreadPool, | ||||
|     log_frequency: usize, | ||||
|     max_memory: usize, | ||||
|     linked_hash_map_size: usize, | ||||
|     max_memory: Option<usize>, | ||||
|     chunk_compression_type: CompressionType, | ||||
|     chunk_fusing_shrink_size: u64, | ||||
| } | ||||
|  | ||||
| impl UpdateHandler { | ||||
| @@ -25,15 +23,14 @@ impl UpdateHandler { | ||||
|         let thread_pool = rayon::ThreadPoolBuilder::new() | ||||
|             .num_threads(opt.indexing_jobs.unwrap_or(num_cpus::get() / 2)) | ||||
|             .build()?; | ||||
|  | ||||
|         Ok(Self { | ||||
|             max_nb_chunks: opt.max_nb_chunks, | ||||
|             chunk_compression_level: opt.chunk_compression_level, | ||||
|             thread_pool, | ||||
|             log_frequency: opt.log_every_n, | ||||
|             max_memory: opt.max_memory.get_bytes() as usize, | ||||
|             linked_hash_map_size: opt.linked_hash_map_size, | ||||
|             max_memory: opt.max_memory.map(|m| m.get_bytes() as usize), | ||||
|             chunk_compression_type: opt.chunk_compression_type, | ||||
|             chunk_fusing_shrink_size: opt.chunk_fusing_shrink_size.get_bytes(), | ||||
|         }) | ||||
|     } | ||||
|  | ||||
| @@ -48,10 +45,10 @@ impl UpdateHandler { | ||||
|         } | ||||
|         update_builder.thread_pool(&self.thread_pool); | ||||
|         update_builder.log_every_n(self.log_frequency); | ||||
|         update_builder.max_memory(self.max_memory); | ||||
|         update_builder.linked_hash_map_size(self.linked_hash_map_size); | ||||
|         if let Some(max_memory) = self.max_memory { | ||||
|             update_builder.max_memory(max_memory); | ||||
|         } | ||||
|         update_builder.chunk_compression_type(self.chunk_compression_type); | ||||
|         update_builder.chunk_fusing_shrink_size(self.chunk_fusing_shrink_size); | ||||
|         update_builder | ||||
|     } | ||||
|  | ||||
|   | ||||
| @@ -1,5 +1,9 @@ | ||||
| use byte_unit::ByteError; | ||||
| use std::fmt; | ||||
| use std::io::{BufReader, Read}; | ||||
| use std::ops::Deref; | ||||
| use std::path::PathBuf; | ||||
| use std::str::FromStr; | ||||
| use std::sync::Arc; | ||||
| use std::{error, fs}; | ||||
|  | ||||
| @@ -11,6 +15,7 @@ use rustls::{ | ||||
|     RootCertStore, | ||||
| }; | ||||
| use structopt::StructOpt; | ||||
| use sysinfo::{RefreshKind, System, SystemExt}; | ||||
|  | ||||
| #[derive(Debug, Clone, StructOpt)] | ||||
| pub struct IndexerOpts { | ||||
| @@ -23,13 +28,15 @@ pub struct IndexerOpts { | ||||
|     #[structopt(long)] | ||||
|     pub max_nb_chunks: Option<usize>, | ||||
|  | ||||
|     /// The maximum amount of memory to use for the Grenad buffer. It is recommended | ||||
|     /// to use something like 80%-90% of the available memory. | ||||
|     /// The maximum amount of memory the indexer will use. It defaults to 2/3 | ||||
|     /// of the available memory. It is recommended to use something like 80%-90% | ||||
|     /// of the available memory, no more. | ||||
|     /// | ||||
|     /// It is automatically split by the number of jobs e.g. if you use 7 jobs | ||||
|     /// and 7 GB of max memory, each thread will use a maximum of 1 GB. | ||||
|     #[structopt(long, default_value = "7 GiB")] | ||||
|     pub max_memory: Byte, | ||||
|     /// In case the engine is unable to retrieve the available memory the engine will | ||||
|     /// try to use the memory it needs but without real limit, this can lead to | ||||
|     /// Out-Of-Memory issues and it is recommended to specify the amount of memory to use. | ||||
|     #[structopt(long, default_value)] | ||||
|     pub max_memory: MaxMemory, | ||||
|  | ||||
|     /// Size of the linked hash map cache when indexing. | ||||
|     /// The bigger it is, the faster the indexing is but the more memory it takes. | ||||
| @@ -69,7 +76,7 @@ impl Default for IndexerOpts { | ||||
|         Self { | ||||
|             log_every_n: 100_000, | ||||
|             max_nb_chunks: None, | ||||
|             max_memory: Byte::from_str("1GiB").unwrap(), | ||||
|             max_memory: MaxMemory::default(), | ||||
|             linked_hash_map_size: 500, | ||||
|             chunk_compression_type: CompressionType::None, | ||||
|             chunk_compression_level: None, | ||||
| @@ -240,6 +247,57 @@ impl Opt { | ||||
|     } | ||||
| } | ||||
|  | ||||
| /// A type used to detect the max memory available and use 2/3 of it. | ||||
| #[derive(Debug, Clone, Copy)] | ||||
| pub struct MaxMemory(Option<Byte>); | ||||
|  | ||||
| impl FromStr for MaxMemory { | ||||
|     type Err = ByteError; | ||||
|  | ||||
|     fn from_str(s: &str) -> Result<MaxMemory, ByteError> { | ||||
|         Byte::from_str(s).map(Some).map(MaxMemory) | ||||
|     } | ||||
| } | ||||
|  | ||||
| impl Default for MaxMemory { | ||||
|     fn default() -> MaxMemory { | ||||
|         MaxMemory( | ||||
|             total_memory_bytes() | ||||
|                 .map(|bytes| bytes * 2 / 3) | ||||
|                 .map(Byte::from_bytes), | ||||
|         ) | ||||
|     } | ||||
| } | ||||
|  | ||||
| impl fmt::Display for MaxMemory { | ||||
|     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { | ||||
|         match self.0 { | ||||
|             Some(memory) => write!(f, "{}", memory.get_appropriate_unit(true)), | ||||
|             None => f.write_str("unknown"), | ||||
|         } | ||||
|     } | ||||
| } | ||||
|  | ||||
| impl Deref for MaxMemory { | ||||
|     type Target = Option<Byte>; | ||||
|  | ||||
|     fn deref(&self) -> &Self::Target { | ||||
|         &self.0 | ||||
|     } | ||||
| } | ||||
|  | ||||
| /// Returns the total amount of bytes available or `None` if this system isn't supported. | ||||
| fn total_memory_bytes() -> Option<u64> { | ||||
|     if System::IS_SUPPORTED { | ||||
|         let memory_kind = RefreshKind::new().with_memory(); | ||||
|         let mut system = System::new_with_specifics(memory_kind); | ||||
|         system.refresh_memory(); | ||||
|         Some(system.total_memory() * 1024) // KiB into bytes | ||||
|     } else { | ||||
|         None | ||||
|     } | ||||
| } | ||||
|  | ||||
| fn load_certs(filename: PathBuf) -> Result<Vec<rustls::Certificate>, Box<dyn error::Error>> { | ||||
|     let certfile = fs::File::open(filename).map_err(|_| "cannot open certificate file")?; | ||||
|     let mut reader = BufReader::new(certfile); | ||||
|   | ||||
		Reference in New Issue
	
	Block a user