mirror of
				https://github.com/meilisearch/meilisearch.git
				synced 2025-10-25 21:16:28 +00:00 
			
		
		
		
	Merge pull request #5762 from meilisearch/new-document-indexer-for-dumps
Use the edition 2024 documents indexer in the dumps
This commit is contained in:
		
							
								
								
									
										2
									
								
								.gitignore
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										2
									
								
								.gitignore
									
									
									
									
										vendored
									
									
								
							| @@ -5,7 +5,7 @@ | |||||||
| **/*.json_lines | **/*.json_lines | ||||||
| **/*.rs.bk | **/*.rs.bk | ||||||
| /*.mdb | /*.mdb | ||||||
| /data.ms | /*.ms | ||||||
| /snapshots | /snapshots | ||||||
| /dumps | /dumps | ||||||
| /bench | /bench | ||||||
|   | |||||||
							
								
								
									
										5
									
								
								Cargo.lock
									
									
									
										generated
									
									
									
								
							
							
						
						
									
										5
									
								
								Cargo.lock
									
									
									
										generated
									
									
									
								
							| @@ -3775,6 +3775,7 @@ dependencies = [ | |||||||
|  "meili-snap", |  "meili-snap", | ||||||
|  "meilisearch-auth", |  "meilisearch-auth", | ||||||
|  "meilisearch-types", |  "meilisearch-types", | ||||||
|  |  "memmap2", | ||||||
|  "mimalloc", |  "mimalloc", | ||||||
|  "mime", |  "mime", | ||||||
|  "mopa-maintained", |  "mopa-maintained", | ||||||
| @@ -3908,9 +3909,9 @@ checksum = "32a282da65faaf38286cf3be983213fcf1d2e2a58700e808f83f4ea9a4804bc0" | |||||||
|  |  | ||||||
| [[package]] | [[package]] | ||||||
| name = "memmap2" | name = "memmap2" | ||||||
| version = "0.9.5" | version = "0.9.7" | ||||||
| source = "registry+https://github.com/rust-lang/crates.io-index" | source = "registry+https://github.com/rust-lang/crates.io-index" | ||||||
| checksum = "fd3f7eed9d3848f8b98834af67102b720745c4ec028fcd0aa0239277e7de374f" | checksum = "483758ad303d734cec05e5c12b41d7e93e6a6390c5e9dae6bdeb7c1259012d28" | ||||||
| dependencies = [ | dependencies = [ | ||||||
|  "libc", |  "libc", | ||||||
|  "stable_deref_trait", |  "stable_deref_trait", | ||||||
|   | |||||||
| @@ -14,7 +14,7 @@ license.workspace = true | |||||||
| anyhow = "1.0.98" | anyhow = "1.0.98" | ||||||
| bumpalo = "3.18.1" | bumpalo = "3.18.1" | ||||||
| csv = "1.3.1" | csv = "1.3.1" | ||||||
| memmap2 = "0.9.5" | memmap2 = "0.9.7" | ||||||
| milli = { path = "../milli" } | milli = { path = "../milli" } | ||||||
| mimalloc = { version = "0.1.47", default-features = false } | mimalloc = { version = "0.1.47", default-features = false } | ||||||
| serde_json = { version = "1.0.140", features = ["preserve_order"] } | serde_json = { version = "1.0.140", features = ["preserve_order"] } | ||||||
| @@ -55,4 +55,3 @@ harness = false | |||||||
| [[bench]] | [[bench]] | ||||||
| name = "sort" | name = "sort" | ||||||
| harness = false | harness = false | ||||||
|  |  | ||||||
|   | |||||||
| @@ -1,3 +1,4 @@ | |||||||
|  | use std::fs::File; | ||||||
| use std::str::FromStr; | use std::str::FromStr; | ||||||
|  |  | ||||||
| use super::v2_to_v3::CompatV2ToV3; | use super::v2_to_v3::CompatV2ToV3; | ||||||
| @@ -94,6 +95,10 @@ impl CompatIndexV1ToV2 { | |||||||
|         self.from.documents().map(|it| Box::new(it) as Box<dyn Iterator<Item = _>>) |         self.from.documents().map(|it| Box::new(it) as Box<dyn Iterator<Item = _>>) | ||||||
|     } |     } | ||||||
|  |  | ||||||
|  |     pub fn documents_file(&self) -> &File { | ||||||
|  |         self.from.documents_file() | ||||||
|  |     } | ||||||
|  |  | ||||||
|     pub fn settings(&mut self) -> Result<v2::settings::Settings<v2::settings::Checked>> { |     pub fn settings(&mut self) -> Result<v2::settings::Settings<v2::settings::Checked>> { | ||||||
|         Ok(v2::settings::Settings::<v2::settings::Unchecked>::from(self.from.settings()?).check()) |         Ok(v2::settings::Settings::<v2::settings::Unchecked>::from(self.from.settings()?).check()) | ||||||
|     } |     } | ||||||
|   | |||||||
| @@ -1,3 +1,4 @@ | |||||||
|  | use std::fs::File; | ||||||
| use std::str::FromStr; | use std::str::FromStr; | ||||||
|  |  | ||||||
| use time::OffsetDateTime; | use time::OffsetDateTime; | ||||||
| @@ -122,6 +123,13 @@ impl CompatIndexV2ToV3 { | |||||||
|         } |         } | ||||||
|     } |     } | ||||||
|  |  | ||||||
|  |     pub fn documents_file(&self) -> &File { | ||||||
|  |         match self { | ||||||
|  |             CompatIndexV2ToV3::V2(v2) => v2.documents_file(), | ||||||
|  |             CompatIndexV2ToV3::Compat(compat) => compat.documents_file(), | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  |  | ||||||
|     pub fn settings(&mut self) -> Result<v3::Settings<v3::Checked>> { |     pub fn settings(&mut self) -> Result<v3::Settings<v3::Checked>> { | ||||||
|         let settings = match self { |         let settings = match self { | ||||||
|             CompatIndexV2ToV3::V2(from) => from.settings()?, |             CompatIndexV2ToV3::V2(from) => from.settings()?, | ||||||
|   | |||||||
| @@ -1,3 +1,5 @@ | |||||||
|  | use std::fs::File; | ||||||
|  |  | ||||||
| use super::v2_to_v3::{CompatIndexV2ToV3, CompatV2ToV3}; | use super::v2_to_v3::{CompatIndexV2ToV3, CompatV2ToV3}; | ||||||
| use super::v4_to_v5::CompatV4ToV5; | use super::v4_to_v5::CompatV4ToV5; | ||||||
| use crate::reader::{v3, v4, UpdateFile}; | use crate::reader::{v3, v4, UpdateFile}; | ||||||
| @@ -252,6 +254,13 @@ impl CompatIndexV3ToV4 { | |||||||
|         } |         } | ||||||
|     } |     } | ||||||
|  |  | ||||||
|  |     pub fn documents_file(&self) -> &File { | ||||||
|  |         match self { | ||||||
|  |             CompatIndexV3ToV4::V3(v3) => v3.documents_file(), | ||||||
|  |             CompatIndexV3ToV4::Compat(compat) => compat.documents_file(), | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  |  | ||||||
|     pub fn settings(&mut self) -> Result<v4::Settings<v4::Checked>> { |     pub fn settings(&mut self) -> Result<v4::Settings<v4::Checked>> { | ||||||
|         Ok(match self { |         Ok(match self { | ||||||
|             CompatIndexV3ToV4::V3(v3) => { |             CompatIndexV3ToV4::V3(v3) => { | ||||||
|   | |||||||
| @@ -1,3 +1,5 @@ | |||||||
|  | use std::fs::File; | ||||||
|  |  | ||||||
| use super::v3_to_v4::{CompatIndexV3ToV4, CompatV3ToV4}; | use super::v3_to_v4::{CompatIndexV3ToV4, CompatV3ToV4}; | ||||||
| use super::v5_to_v6::CompatV5ToV6; | use super::v5_to_v6::CompatV5ToV6; | ||||||
| use crate::reader::{v4, v5, Document}; | use crate::reader::{v4, v5, Document}; | ||||||
| @@ -241,6 +243,13 @@ impl CompatIndexV4ToV5 { | |||||||
|         } |         } | ||||||
|     } |     } | ||||||
|  |  | ||||||
|  |     pub fn documents_file(&self) -> &File { | ||||||
|  |         match self { | ||||||
|  |             CompatIndexV4ToV5::V4(v4) => v4.documents_file(), | ||||||
|  |             CompatIndexV4ToV5::Compat(compat) => compat.documents_file(), | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  |  | ||||||
|     pub fn settings(&mut self) -> Result<v5::Settings<v5::Checked>> { |     pub fn settings(&mut self) -> Result<v5::Settings<v5::Checked>> { | ||||||
|         match self { |         match self { | ||||||
|             CompatIndexV4ToV5::V4(v4) => Ok(v5::Settings::from(v4.settings()?).check()), |             CompatIndexV4ToV5::V4(v4) => Ok(v5::Settings::from(v4.settings()?).check()), | ||||||
|   | |||||||
| @@ -1,3 +1,4 @@ | |||||||
|  | use std::fs::File; | ||||||
| use std::num::NonZeroUsize; | use std::num::NonZeroUsize; | ||||||
| use std::str::FromStr; | use std::str::FromStr; | ||||||
|  |  | ||||||
| @@ -243,6 +244,13 @@ impl CompatIndexV5ToV6 { | |||||||
|         } |         } | ||||||
|     } |     } | ||||||
|  |  | ||||||
|  |     pub fn documents_file(&self) -> &File { | ||||||
|  |         match self { | ||||||
|  |             CompatIndexV5ToV6::V5(v5) => v5.documents_file(), | ||||||
|  |             CompatIndexV5ToV6::Compat(compat) => compat.documents_file(), | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  |  | ||||||
|     pub fn settings(&mut self) -> Result<v6::Settings<v6::Checked>> { |     pub fn settings(&mut self) -> Result<v6::Settings<v6::Checked>> { | ||||||
|         match self { |         match self { | ||||||
|             CompatIndexV5ToV6::V5(v5) => Ok(v6::Settings::from(v5.settings()?).check()), |             CompatIndexV5ToV6::V5(v5) => Ok(v6::Settings::from(v5.settings()?).check()), | ||||||
|   | |||||||
| @@ -192,6 +192,14 @@ impl DumpIndexReader { | |||||||
|         } |         } | ||||||
|     } |     } | ||||||
|  |  | ||||||
|  |     /// A reference to a file in the NDJSON format containing all the documents of the index | ||||||
|  |     pub fn documents_file(&self) -> &File { | ||||||
|  |         match self { | ||||||
|  |             DumpIndexReader::Current(v6) => v6.documents_file(), | ||||||
|  |             DumpIndexReader::Compat(compat) => compat.documents_file(), | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  |  | ||||||
|     pub fn settings(&mut self) -> Result<v6::Settings<v6::Checked>> { |     pub fn settings(&mut self) -> Result<v6::Settings<v6::Checked>> { | ||||||
|         match self { |         match self { | ||||||
|             DumpIndexReader::Current(v6) => v6.settings(), |             DumpIndexReader::Current(v6) => v6.settings(), | ||||||
|   | |||||||
| @@ -72,6 +72,10 @@ impl V1IndexReader { | |||||||
|             .map(|line| -> Result<_> { Ok(serde_json::from_str(&line?)?) })) |             .map(|line| -> Result<_> { Ok(serde_json::from_str(&line?)?) })) | ||||||
|     } |     } | ||||||
|  |  | ||||||
|  |     pub fn documents_file(&self) -> &File { | ||||||
|  |         self.documents.get_ref() | ||||||
|  |     } | ||||||
|  |  | ||||||
|     pub fn settings(&mut self) -> Result<self::settings::Settings> { |     pub fn settings(&mut self) -> Result<self::settings::Settings> { | ||||||
|         Ok(serde_json::from_reader(&mut self.settings)?) |         Ok(serde_json::from_reader(&mut self.settings)?) | ||||||
|     } |     } | ||||||
|   | |||||||
| @@ -203,6 +203,10 @@ impl V2IndexReader { | |||||||
|             .map(|line| -> Result<_> { Ok(serde_json::from_str(&line?)?) })) |             .map(|line| -> Result<_> { Ok(serde_json::from_str(&line?)?) })) | ||||||
|     } |     } | ||||||
|  |  | ||||||
|  |     pub fn documents_file(&self) -> &File { | ||||||
|  |         self.documents.get_ref() | ||||||
|  |     } | ||||||
|  |  | ||||||
|     pub fn settings(&mut self) -> Result<Settings<Checked>> { |     pub fn settings(&mut self) -> Result<Settings<Checked>> { | ||||||
|         Ok(self.settings.clone()) |         Ok(self.settings.clone()) | ||||||
|     } |     } | ||||||
|   | |||||||
| @@ -215,6 +215,10 @@ impl V3IndexReader { | |||||||
|             .map(|line| -> Result<_> { Ok(serde_json::from_str(&line?)?) })) |             .map(|line| -> Result<_> { Ok(serde_json::from_str(&line?)?) })) | ||||||
|     } |     } | ||||||
|  |  | ||||||
|  |     pub fn documents_file(&self) -> &File { | ||||||
|  |         self.documents.get_ref() | ||||||
|  |     } | ||||||
|  |  | ||||||
|     pub fn settings(&mut self) -> Result<Settings<Checked>> { |     pub fn settings(&mut self) -> Result<Settings<Checked>> { | ||||||
|         Ok(self.settings.clone()) |         Ok(self.settings.clone()) | ||||||
|     } |     } | ||||||
|   | |||||||
| @@ -210,6 +210,10 @@ impl V4IndexReader { | |||||||
|             .map(|line| -> Result<_> { Ok(serde_json::from_str(&line?)?) })) |             .map(|line| -> Result<_> { Ok(serde_json::from_str(&line?)?) })) | ||||||
|     } |     } | ||||||
|  |  | ||||||
|  |     pub fn documents_file(&self) -> &File { | ||||||
|  |         self.documents.get_ref() | ||||||
|  |     } | ||||||
|  |  | ||||||
|     pub fn settings(&mut self) -> Result<Settings<Checked>> { |     pub fn settings(&mut self) -> Result<Settings<Checked>> { | ||||||
|         Ok(self.settings.clone()) |         Ok(self.settings.clone()) | ||||||
|     } |     } | ||||||
|   | |||||||
| @@ -247,6 +247,10 @@ impl V5IndexReader { | |||||||
|             .map(|line| -> Result<_> { Ok(serde_json::from_str(&line?)?) })) |             .map(|line| -> Result<_> { Ok(serde_json::from_str(&line?)?) })) | ||||||
|     } |     } | ||||||
|  |  | ||||||
|  |     pub fn documents_file(&self) -> &File { | ||||||
|  |         self.documents.get_ref() | ||||||
|  |     } | ||||||
|  |  | ||||||
|     pub fn settings(&mut self) -> Result<Settings<Checked>> { |     pub fn settings(&mut self) -> Result<Settings<Checked>> { | ||||||
|         Ok(self.settings.clone()) |         Ok(self.settings.clone()) | ||||||
|     } |     } | ||||||
|   | |||||||
| @@ -284,6 +284,10 @@ impl V6IndexReader { | |||||||
|             .map(|line| -> Result<_> { Ok(serde_json::from_str(&line?)?) })) |             .map(|line| -> Result<_> { Ok(serde_json::from_str(&line?)?) })) | ||||||
|     } |     } | ||||||
|  |  | ||||||
|  |     pub fn documents_file(&self) -> &File { | ||||||
|  |         self.documents.get_ref() | ||||||
|  |     } | ||||||
|  |  | ||||||
|     pub fn settings(&mut self) -> Result<Settings<Checked>> { |     pub fn settings(&mut self) -> Result<Settings<Checked>> { | ||||||
|         let mut settings: Settings<Unchecked> = serde_json::from_reader(&mut self.settings)?; |         let mut settings: Settings<Unchecked> = serde_json::from_reader(&mut self.settings)?; | ||||||
|         patch_embedders(&mut settings); |         patch_embedders(&mut settings); | ||||||
|   | |||||||
| @@ -26,7 +26,7 @@ flate2 = "1.1.2" | |||||||
| indexmap = "2.9.0" | indexmap = "2.9.0" | ||||||
| meilisearch-auth = { path = "../meilisearch-auth" } | meilisearch-auth = { path = "../meilisearch-auth" } | ||||||
| meilisearch-types = { path = "../meilisearch-types" } | meilisearch-types = { path = "../meilisearch-types" } | ||||||
| memmap2 = "0.9.5" | memmap2 = "0.9.7" | ||||||
| page_size = "0.6.0" | page_size = "0.6.0" | ||||||
| rayon = "1.10.0" | rayon = "1.10.0" | ||||||
| roaring = { version = "0.10.12", features = ["serde"] } | roaring = { version = "0.10.12", features = ["serde"] } | ||||||
|   | |||||||
| @@ -20,6 +20,7 @@ pub fn snapshot_index_scheduler(scheduler: &IndexScheduler) -> String { | |||||||
|  |  | ||||||
|     let IndexScheduler { |     let IndexScheduler { | ||||||
|         cleanup_enabled: _, |         cleanup_enabled: _, | ||||||
|  |         experimental_no_edition_2024_for_dumps: _, | ||||||
|         processing_tasks, |         processing_tasks, | ||||||
|         env, |         env, | ||||||
|         version, |         version, | ||||||
|   | |||||||
| @@ -168,6 +168,9 @@ pub struct IndexScheduler { | |||||||
|     /// Whether we should automatically cleanup the task queue or not. |     /// Whether we should automatically cleanup the task queue or not. | ||||||
|     pub(crate) cleanup_enabled: bool, |     pub(crate) cleanup_enabled: bool, | ||||||
|  |  | ||||||
|  |     /// Whether we should use the old document indexer or the new one. | ||||||
|  |     pub(crate) experimental_no_edition_2024_for_dumps: bool, | ||||||
|  |  | ||||||
|     /// The webhook url we should send tasks to after processing every batches. |     /// The webhook url we should send tasks to after processing every batches. | ||||||
|     pub(crate) webhook_url: Option<String>, |     pub(crate) webhook_url: Option<String>, | ||||||
|     /// The Authorization header to send to the webhook URL. |     /// The Authorization header to send to the webhook URL. | ||||||
| @@ -210,6 +213,7 @@ impl IndexScheduler { | |||||||
|  |  | ||||||
|             index_mapper: self.index_mapper.clone(), |             index_mapper: self.index_mapper.clone(), | ||||||
|             cleanup_enabled: self.cleanup_enabled, |             cleanup_enabled: self.cleanup_enabled, | ||||||
|  |             experimental_no_edition_2024_for_dumps: self.experimental_no_edition_2024_for_dumps, | ||||||
|             webhook_url: self.webhook_url.clone(), |             webhook_url: self.webhook_url.clone(), | ||||||
|             webhook_authorization_header: self.webhook_authorization_header.clone(), |             webhook_authorization_header: self.webhook_authorization_header.clone(), | ||||||
|             embedders: self.embedders.clone(), |             embedders: self.embedders.clone(), | ||||||
| @@ -296,6 +300,9 @@ impl IndexScheduler { | |||||||
|             index_mapper, |             index_mapper, | ||||||
|             env, |             env, | ||||||
|             cleanup_enabled: options.cleanup_enabled, |             cleanup_enabled: options.cleanup_enabled, | ||||||
|  |             experimental_no_edition_2024_for_dumps: options | ||||||
|  |                 .indexer_config | ||||||
|  |                 .experimental_no_edition_2024_for_dumps, | ||||||
|             webhook_url: options.webhook_url, |             webhook_url: options.webhook_url, | ||||||
|             webhook_authorization_header: options.webhook_authorization_header, |             webhook_authorization_header: options.webhook_authorization_header, | ||||||
|             embedders: Default::default(), |             embedders: Default::default(), | ||||||
| @@ -594,6 +601,11 @@ impl IndexScheduler { | |||||||
|         Ok(nbr_index_processing_tasks > 0) |         Ok(nbr_index_processing_tasks > 0) | ||||||
|     } |     } | ||||||
|  |  | ||||||
|  |     /// Whether the index should use the old document indexer. | ||||||
|  |     pub fn no_edition_2024_for_dumps(&self) -> bool { | ||||||
|  |         self.experimental_no_edition_2024_for_dumps | ||||||
|  |     } | ||||||
|  |  | ||||||
|     /// Return the tasks matching the query from the user's point of view along |     /// Return the tasks matching the query from the user's point of view along | ||||||
|     /// with the total number of tasks matching the query, ignoring from and limit. |     /// with the total number of tasks matching the query, ignoring from and limit. | ||||||
|     /// |     /// | ||||||
|   | |||||||
| @@ -24,7 +24,7 @@ enum-iterator = "2.1.0" | |||||||
| file-store = { path = "../file-store" } | file-store = { path = "../file-store" } | ||||||
| flate2 = "1.1.2" | flate2 = "1.1.2" | ||||||
| fst = "0.4.7" | fst = "0.4.7" | ||||||
| memmap2 = "0.9.5" | memmap2 = "0.9.7" | ||||||
| milli = { path = "../milli" } | milli = { path = "../milli" } | ||||||
| roaring = { version = "0.10.12", features = ["serde"] } | roaring = { version = "0.10.12", features = ["serde"] } | ||||||
| rustc-hash = "2.1.1" | rustc-hash = "2.1.1" | ||||||
|   | |||||||
| @@ -50,6 +50,7 @@ jsonwebtoken = "9.3.1" | |||||||
| lazy_static = "1.5.0" | lazy_static = "1.5.0" | ||||||
| meilisearch-auth = { path = "../meilisearch-auth" } | meilisearch-auth = { path = "../meilisearch-auth" } | ||||||
| meilisearch-types = { path = "../meilisearch-types" } | meilisearch-types = { path = "../meilisearch-types" } | ||||||
|  | memmap2 = "0.9.7" | ||||||
| mimalloc = { version = "0.1.47", default-features = false } | mimalloc = { version = "0.1.47", default-features = false } | ||||||
| mime = "0.3.17" | mime = "0.3.17" | ||||||
| num_cpus = "1.17.0" | num_cpus = "1.17.0" | ||||||
|   | |||||||
| @@ -203,6 +203,7 @@ struct Infos { | |||||||
|     experimental_composite_embedders: bool, |     experimental_composite_embedders: bool, | ||||||
|     experimental_embedding_cache_entries: usize, |     experimental_embedding_cache_entries: usize, | ||||||
|     experimental_no_snapshot_compaction: bool, |     experimental_no_snapshot_compaction: bool, | ||||||
|  |     experimental_no_edition_2024_for_dumps: bool, | ||||||
|     experimental_no_edition_2024_for_settings: bool, |     experimental_no_edition_2024_for_settings: bool, | ||||||
|     gpu_enabled: bool, |     gpu_enabled: bool, | ||||||
|     db_path: bool, |     db_path: bool, | ||||||
| @@ -293,6 +294,7 @@ impl Infos { | |||||||
|             max_indexing_threads, |             max_indexing_threads, | ||||||
|             skip_index_budget: _, |             skip_index_budget: _, | ||||||
|             experimental_no_edition_2024_for_settings, |             experimental_no_edition_2024_for_settings, | ||||||
|  |             experimental_no_edition_2024_for_dumps, | ||||||
|         } = indexer_options; |         } = indexer_options; | ||||||
|  |  | ||||||
|         let RuntimeTogglableFeatures { |         let RuntimeTogglableFeatures { | ||||||
| @@ -329,6 +331,7 @@ impl Infos { | |||||||
|             experimental_composite_embedders: composite_embedders, |             experimental_composite_embedders: composite_embedders, | ||||||
|             experimental_embedding_cache_entries, |             experimental_embedding_cache_entries, | ||||||
|             experimental_no_snapshot_compaction, |             experimental_no_snapshot_compaction, | ||||||
|  |             experimental_no_edition_2024_for_dumps, | ||||||
|             gpu_enabled: meilisearch_types::milli::vector::is_cuda_enabled(), |             gpu_enabled: meilisearch_types::milli::vector::is_cuda_enabled(), | ||||||
|             db_path: db_path != PathBuf::from("./data.ms"), |             db_path: db_path != PathBuf::from("./data.ms"), | ||||||
|             import_dump: import_dump.is_some(), |             import_dump: import_dump.is_some(), | ||||||
|   | |||||||
| @@ -30,6 +30,7 @@ use actix_web::web::Data; | |||||||
| use actix_web::{web, HttpRequest}; | use actix_web::{web, HttpRequest}; | ||||||
| use analytics::Analytics; | use analytics::Analytics; | ||||||
| use anyhow::bail; | use anyhow::bail; | ||||||
|  | use bumpalo::Bump; | ||||||
| use error::PayloadError; | use error::PayloadError; | ||||||
| use extractors::payload::PayloadConfig; | use extractors::payload::PayloadConfig; | ||||||
| use index_scheduler::versioning::Versioning; | use index_scheduler::versioning::Versioning; | ||||||
| @@ -38,6 +39,7 @@ use meilisearch_auth::{open_auth_store_env, AuthController}; | |||||||
| use meilisearch_types::milli::constants::VERSION_MAJOR; | use meilisearch_types::milli::constants::VERSION_MAJOR; | ||||||
| use meilisearch_types::milli::documents::{DocumentsBatchBuilder, DocumentsBatchReader}; | use meilisearch_types::milli::documents::{DocumentsBatchBuilder, DocumentsBatchReader}; | ||||||
| use meilisearch_types::milli::progress::{EmbedderStats, Progress}; | use meilisearch_types::milli::progress::{EmbedderStats, Progress}; | ||||||
|  | use meilisearch_types::milli::update::new::indexer; | ||||||
| use meilisearch_types::milli::update::{ | use meilisearch_types::milli::update::{ | ||||||
|     default_thread_pool_and_threads, IndexDocumentsConfig, IndexDocumentsMethod, IndexerConfig, |     default_thread_pool_and_threads, IndexDocumentsConfig, IndexDocumentsMethod, IndexerConfig, | ||||||
| }; | }; | ||||||
| @@ -533,7 +535,7 @@ fn import_dump( | |||||||
|         let mut index_reader = index_reader?; |         let mut index_reader = index_reader?; | ||||||
|         let metadata = index_reader.metadata(); |         let metadata = index_reader.metadata(); | ||||||
|         let uid = metadata.uid.clone(); |         let uid = metadata.uid.clone(); | ||||||
|         tracing::info!("Importing index `{}`.", metadata.uid); |         tracing::info!("Importing index `{uid}`."); | ||||||
|  |  | ||||||
|         let date = Some((metadata.created_at, metadata.updated_at)); |         let date = Some((metadata.created_at, metadata.updated_at)); | ||||||
|         let index = index_scheduler.create_raw_index(&metadata.uid, date)?; |         let index = index_scheduler.create_raw_index(&metadata.uid, date)?; | ||||||
| @@ -552,48 +554,100 @@ fn import_dump( | |||||||
|         apply_settings_to_builder(&settings, &mut builder); |         apply_settings_to_builder(&settings, &mut builder); | ||||||
|         let embedder_stats: Arc<EmbedderStats> = Default::default(); |         let embedder_stats: Arc<EmbedderStats> = Default::default(); | ||||||
|         builder.execute(&|| false, &progress, embedder_stats.clone())?; |         builder.execute(&|| false, &progress, embedder_stats.clone())?; | ||||||
|  |         wtxn.commit()?; | ||||||
|  |  | ||||||
|         // 5.3 Import the documents. |         let mut wtxn = index.write_txn()?; | ||||||
|         // 5.3.1 We need to recreate the grenad+obkv format accepted by the index. |         let rtxn = index.read_txn()?; | ||||||
|         tracing::info!("Importing the documents."); |  | ||||||
|         let file = tempfile::tempfile()?; |         if index_scheduler.no_edition_2024_for_dumps() { | ||||||
|         let mut builder = DocumentsBatchBuilder::new(BufWriter::new(file)); |             // 5.3 Import the documents. | ||||||
|         for document in index_reader.documents()? { |             // 5.3.1 We need to recreate the grenad+obkv format accepted by the index. | ||||||
|             builder.append_json_object(&document?)?; |             tracing::info!("Importing the documents."); | ||||||
|  |             let file = tempfile::tempfile()?; | ||||||
|  |             let mut builder = DocumentsBatchBuilder::new(BufWriter::new(file)); | ||||||
|  |             for document in index_reader.documents()? { | ||||||
|  |                 builder.append_json_object(&document?)?; | ||||||
|  |             } | ||||||
|  |  | ||||||
|  |             // This flush the content of the batch builder. | ||||||
|  |             let file = builder.into_inner()?.into_inner()?; | ||||||
|  |  | ||||||
|  |             // 5.3.2 We feed it to the milli index. | ||||||
|  |             let reader = BufReader::new(file); | ||||||
|  |             let reader = DocumentsBatchReader::from_reader(reader)?; | ||||||
|  |  | ||||||
|  |             let embedder_configs = index.embedding_configs().embedding_configs(&wtxn)?; | ||||||
|  |             let embedders = index_scheduler.embedders(uid.to_string(), embedder_configs)?; | ||||||
|  |  | ||||||
|  |             let builder = milli::update::IndexDocuments::new( | ||||||
|  |                 &mut wtxn, | ||||||
|  |                 &index, | ||||||
|  |                 indexer_config, | ||||||
|  |                 IndexDocumentsConfig { | ||||||
|  |                     update_method: IndexDocumentsMethod::ReplaceDocuments, | ||||||
|  |                     ..Default::default() | ||||||
|  |                 }, | ||||||
|  |                 |indexing_step| tracing::trace!("update: {:?}", indexing_step), | ||||||
|  |                 || false, | ||||||
|  |                 &embedder_stats, | ||||||
|  |             )?; | ||||||
|  |  | ||||||
|  |             let builder = builder.with_embedders(embedders); | ||||||
|  |  | ||||||
|  |             let (builder, user_result) = builder.add_documents(reader)?; | ||||||
|  |             let user_result = user_result?; | ||||||
|  |             tracing::info!(documents_found = user_result, "{} documents found.", user_result); | ||||||
|  |             builder.execute()?; | ||||||
|  |         } else { | ||||||
|  |             let db_fields_ids_map = index.fields_ids_map(&rtxn)?; | ||||||
|  |             let primary_key = index.primary_key(&rtxn)?; | ||||||
|  |             let mut new_fields_ids_map = db_fields_ids_map.clone(); | ||||||
|  |  | ||||||
|  |             let mut indexer = indexer::DocumentOperation::new(); | ||||||
|  |             let embedders = index.embedding_configs().embedding_configs(&rtxn)?; | ||||||
|  |             let embedders = index_scheduler.embedders(uid.clone(), embedders)?; | ||||||
|  |  | ||||||
|  |             let mmap = unsafe { memmap2::Mmap::map(index_reader.documents_file())? }; | ||||||
|  |  | ||||||
|  |             indexer.replace_documents(&mmap)?; | ||||||
|  |  | ||||||
|  |             let indexer_config = index_scheduler.indexer_config(); | ||||||
|  |             let pool = &indexer_config.thread_pool; | ||||||
|  |  | ||||||
|  |             let indexer_alloc = Bump::new(); | ||||||
|  |             let (document_changes, mut operation_stats, primary_key) = indexer.into_changes( | ||||||
|  |                 &indexer_alloc, | ||||||
|  |                 &index, | ||||||
|  |                 &rtxn, | ||||||
|  |                 primary_key, | ||||||
|  |                 &mut new_fields_ids_map, | ||||||
|  |                 &|| false, // never stop processing a dump | ||||||
|  |                 progress.clone(), | ||||||
|  |             )?; | ||||||
|  |  | ||||||
|  |             let operation_stats = operation_stats.pop().unwrap(); | ||||||
|  |             if let Some(error) = operation_stats.error { | ||||||
|  |                 return Err(error.into()); | ||||||
|  |             } | ||||||
|  |  | ||||||
|  |             let _congestion = indexer::index( | ||||||
|  |                 &mut wtxn, | ||||||
|  |                 &index, | ||||||
|  |                 pool, | ||||||
|  |                 indexer_config.grenad_parameters(), | ||||||
|  |                 &db_fields_ids_map, | ||||||
|  |                 new_fields_ids_map, | ||||||
|  |                 primary_key, | ||||||
|  |                 &document_changes, | ||||||
|  |                 embedders, | ||||||
|  |                 &|| false, // never stop processing a dump | ||||||
|  |                 &progress, | ||||||
|  |                 &embedder_stats, | ||||||
|  |             )?; | ||||||
|         } |         } | ||||||
|  |  | ||||||
|         // This flush the content of the batch builder. |  | ||||||
|         let file = builder.into_inner()?.into_inner()?; |  | ||||||
|  |  | ||||||
|         // 5.3.2 We feed it to the milli index. |  | ||||||
|         let reader = BufReader::new(file); |  | ||||||
|         let reader = DocumentsBatchReader::from_reader(reader)?; |  | ||||||
|  |  | ||||||
|         let embedder_configs = index.embedding_configs().embedding_configs(&wtxn)?; |  | ||||||
|         let embedders = index_scheduler.embedders(uid.to_string(), embedder_configs)?; |  | ||||||
|  |  | ||||||
|         let builder = milli::update::IndexDocuments::new( |  | ||||||
|             &mut wtxn, |  | ||||||
|             &index, |  | ||||||
|             indexer_config, |  | ||||||
|             IndexDocumentsConfig { |  | ||||||
|                 update_method: IndexDocumentsMethod::ReplaceDocuments, |  | ||||||
|                 ..Default::default() |  | ||||||
|             }, |  | ||||||
|             |indexing_step| tracing::trace!("update: {:?}", indexing_step), |  | ||||||
|             || false, |  | ||||||
|             &embedder_stats, |  | ||||||
|         )?; |  | ||||||
|  |  | ||||||
|         let builder = builder.with_embedders(embedders); |  | ||||||
|  |  | ||||||
|         let (builder, user_result) = builder.add_documents(reader)?; |  | ||||||
|         let user_result = user_result?; |  | ||||||
|         tracing::info!(documents_found = user_result, "{} documents found.", user_result); |  | ||||||
|         builder.execute()?; |  | ||||||
|         wtxn.commit()?; |         wtxn.commit()?; | ||||||
|         tracing::info!("All documents successfully imported."); |         tracing::info!("All documents successfully imported."); | ||||||
|  |  | ||||||
|         index_scheduler.refresh_index_stats(&uid)?; |         index_scheduler.refresh_index_stats(&uid)?; | ||||||
|     } |     } | ||||||
|  |  | ||||||
|   | |||||||
| @@ -68,6 +68,8 @@ const MEILI_EXPERIMENTAL_LIMIT_BATCHED_TASKS_TOTAL_SIZE: &str = | |||||||
| const MEILI_EXPERIMENTAL_EMBEDDING_CACHE_ENTRIES: &str = | const MEILI_EXPERIMENTAL_EMBEDDING_CACHE_ENTRIES: &str = | ||||||
|     "MEILI_EXPERIMENTAL_EMBEDDING_CACHE_ENTRIES"; |     "MEILI_EXPERIMENTAL_EMBEDDING_CACHE_ENTRIES"; | ||||||
| const MEILI_EXPERIMENTAL_NO_SNAPSHOT_COMPACTION: &str = "MEILI_EXPERIMENTAL_NO_SNAPSHOT_COMPACTION"; | const MEILI_EXPERIMENTAL_NO_SNAPSHOT_COMPACTION: &str = "MEILI_EXPERIMENTAL_NO_SNAPSHOT_COMPACTION"; | ||||||
|  | const MEILI_EXPERIMENTAL_NO_EDITION_2024_FOR_DUMPS: &str = | ||||||
|  |     "MEILI_EXPERIMENTAL_NO_EDITION_2024_FOR_DUMPS"; | ||||||
| const DEFAULT_CONFIG_FILE_PATH: &str = "./config.toml"; | const DEFAULT_CONFIG_FILE_PATH: &str = "./config.toml"; | ||||||
| const DEFAULT_DB_PATH: &str = "./data.ms"; | const DEFAULT_DB_PATH: &str = "./data.ms"; | ||||||
| const DEFAULT_HTTP_ADDR: &str = "localhost:7700"; | const DEFAULT_HTTP_ADDR: &str = "localhost:7700"; | ||||||
| @@ -759,6 +761,15 @@ pub struct IndexerOpts { | |||||||
|     #[clap(long, env = MEILI_EXPERIMENTAL_NO_EDITION_2024_FOR_SETTINGS)] |     #[clap(long, env = MEILI_EXPERIMENTAL_NO_EDITION_2024_FOR_SETTINGS)] | ||||||
|     #[serde(default)] |     #[serde(default)] | ||||||
|     pub experimental_no_edition_2024_for_settings: bool, |     pub experimental_no_edition_2024_for_settings: bool, | ||||||
|  |  | ||||||
|  |     /// Experimental make dump imports use the old document indexer. | ||||||
|  |     /// | ||||||
|  |     /// When enabled, Meilisearch will use the old document indexer when importing dumps. | ||||||
|  |     /// | ||||||
|  |     /// For more information, see <https://github.com/orgs/meilisearch/discussions/851>. | ||||||
|  |     #[clap(long, env = MEILI_EXPERIMENTAL_NO_EDITION_2024_FOR_DUMPS)] | ||||||
|  |     #[serde(default)] | ||||||
|  |     pub experimental_no_edition_2024_for_dumps: bool, | ||||||
| } | } | ||||||
|  |  | ||||||
| impl IndexerOpts { | impl IndexerOpts { | ||||||
| @@ -769,6 +780,7 @@ impl IndexerOpts { | |||||||
|             max_indexing_threads, |             max_indexing_threads, | ||||||
|             skip_index_budget: _, |             skip_index_budget: _, | ||||||
|             experimental_no_edition_2024_for_settings, |             experimental_no_edition_2024_for_settings, | ||||||
|  |             experimental_no_edition_2024_for_dumps, | ||||||
|         } = self; |         } = self; | ||||||
|         if let Some(max_indexing_memory) = max_indexing_memory.0 { |         if let Some(max_indexing_memory) = max_indexing_memory.0 { | ||||||
|             export_to_env_if_not_present( |             export_to_env_if_not_present( | ||||||
| @@ -788,6 +800,12 @@ impl IndexerOpts { | |||||||
|                 experimental_no_edition_2024_for_settings.to_string(), |                 experimental_no_edition_2024_for_settings.to_string(), | ||||||
|             ); |             ); | ||||||
|         } |         } | ||||||
|  |         if experimental_no_edition_2024_for_dumps { | ||||||
|  |             export_to_env_if_not_present( | ||||||
|  |                 MEILI_EXPERIMENTAL_NO_EDITION_2024_FOR_DUMPS, | ||||||
|  |                 experimental_no_edition_2024_for_dumps.to_string(), | ||||||
|  |             ); | ||||||
|  |         } | ||||||
|     } |     } | ||||||
| } | } | ||||||
|  |  | ||||||
| @@ -808,6 +826,7 @@ impl TryFrom<&IndexerOpts> for IndexerConfig { | |||||||
|             skip_index_budget: other.skip_index_budget, |             skip_index_budget: other.skip_index_budget, | ||||||
|             experimental_no_edition_2024_for_settings: other |             experimental_no_edition_2024_for_settings: other | ||||||
|                 .experimental_no_edition_2024_for_settings, |                 .experimental_no_edition_2024_for_settings, | ||||||
|  |             experimental_no_edition_2024_for_dumps: other.experimental_no_edition_2024_for_dumps, | ||||||
|             chunk_compression_type: Default::default(), |             chunk_compression_type: Default::default(), | ||||||
|             chunk_compression_level: Default::default(), |             chunk_compression_level: Default::default(), | ||||||
|             documents_chunk_size: Default::default(), |             documents_chunk_size: Default::default(), | ||||||
|   | |||||||
| @@ -466,6 +466,7 @@ pub fn default_settings(dir: impl AsRef<Path>) -> Opt { | |||||||
|             // Having 2 threads makes the tests way faster |             // Having 2 threads makes the tests way faster | ||||||
|             max_indexing_threads: MaxThreads::from_str("2").unwrap(), |             max_indexing_threads: MaxThreads::from_str("2").unwrap(), | ||||||
|             experimental_no_edition_2024_for_settings: false, |             experimental_no_edition_2024_for_settings: false, | ||||||
|  |             experimental_no_edition_2024_for_dumps: false, | ||||||
|         }, |         }, | ||||||
|         experimental_enable_metrics: false, |         experimental_enable_metrics: false, | ||||||
|         ..Parser::parse_from(None as Option<&str>) |         ..Parser::parse_from(None as Option<&str>) | ||||||
|   | |||||||
| @@ -40,7 +40,7 @@ indexmap = { version = "2.9.0", features = ["serde"] } | |||||||
| json-depth-checker = { path = "../json-depth-checker" } | json-depth-checker = { path = "../json-depth-checker" } | ||||||
| levenshtein_automata = { version = "0.2.1", features = ["fst_automaton"] } | levenshtein_automata = { version = "0.2.1", features = ["fst_automaton"] } | ||||||
| memchr = "2.7.5" | memchr = "2.7.5" | ||||||
| memmap2 = "0.9.5" | memmap2 = "0.9.7" | ||||||
| obkv = "0.3.0" | obkv = "0.3.0" | ||||||
| once_cell = "1.21.3" | once_cell = "1.21.3" | ||||||
| ordered-float = "5.0.0" | ordered-float = "5.0.0" | ||||||
|   | |||||||
| @@ -16,6 +16,7 @@ pub struct IndexerConfig { | |||||||
|     pub max_positions_per_attributes: Option<u32>, |     pub max_positions_per_attributes: Option<u32>, | ||||||
|     pub skip_index_budget: bool, |     pub skip_index_budget: bool, | ||||||
|     pub experimental_no_edition_2024_for_settings: bool, |     pub experimental_no_edition_2024_for_settings: bool, | ||||||
|  |     pub experimental_no_edition_2024_for_dumps: bool, | ||||||
| } | } | ||||||
|  |  | ||||||
| impl IndexerConfig { | impl IndexerConfig { | ||||||
| @@ -65,6 +66,7 @@ impl Default for IndexerConfig { | |||||||
|             max_positions_per_attributes: None, |             max_positions_per_attributes: None, | ||||||
|             skip_index_budget: false, |             skip_index_budget: false, | ||||||
|             experimental_no_edition_2024_for_settings: false, |             experimental_no_edition_2024_for_settings: false, | ||||||
|  |             experimental_no_edition_2024_for_dumps: false, | ||||||
|         } |         } | ||||||
|     } |     } | ||||||
| } | } | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user