mirror of
				https://github.com/meilisearch/meilisearch.git
				synced 2025-10-25 04:56:28 +00:00 
			
		
		
		
	Merge #5262
5262: Bring back changes from v1.12.4, v1.12.5, and v1.12.6 into main r=dureuill a=Kerollmops This PR follows [this guideline to bring back changes after we worked on v1.12.4, v1.12.5, and v1.12.6](https://github.com/meilisearch/engine-team/blob/main/resources/meilisearch-release.md#after-the-release-bring-back-changes-to-main). Co-authored-by: Louis Dureuil <louis@meilisearch.com> Co-authored-by: ManyTheFish <many@meilisearch.com> Co-authored-by: Kerollmops <clement@meilisearch.com> Co-authored-by: Kerollmops <Kerollmops@users.noreply.github.com>
This commit is contained in:
		
							
								
								
									
										4
									
								
								Cargo.lock
									
									
									
										generated
									
									
									
								
							
							
						
						
									
										4
									
								
								Cargo.lock
									
									
									
										generated
									
									
									
								
							| @@ -6231,9 +6231,9 @@ dependencies = [ | |||||||
|  |  | ||||||
| [[package]] | [[package]] | ||||||
| name = "utoipa-scalar" | name = "utoipa-scalar" | ||||||
| version = "0.2.1" | version = "0.3.0" | ||||||
| source = "registry+https://github.com/rust-lang/crates.io-index" | source = "registry+https://github.com/rust-lang/crates.io-index" | ||||||
| checksum = "088e93bf19f6bd06e0aacb02ca432b3c5a449c4aec2e4aa9fc333a667f2b2c55" | checksum = "59559e1509172f6b26c1cdbc7247c4ddd1ac6560fe94b584f81ee489b141f719" | ||||||
| dependencies = [ | dependencies = [ | ||||||
|  "actix-web", |  "actix-web", | ||||||
|  "serde", |  "serde", | ||||||
|   | |||||||
| @@ -1,8 +1,9 @@ | |||||||
| use std::collections::HashMap; | use std::collections::HashMap; | ||||||
|  | use std::io; | ||||||
|  |  | ||||||
| use dump::{KindDump, TaskDump, UpdateFile}; | use dump::{KindDump, TaskDump, UpdateFile}; | ||||||
| use meilisearch_types::heed::RwTxn; | use meilisearch_types::heed::RwTxn; | ||||||
| use meilisearch_types::milli::documents::DocumentsBatchBuilder; | use meilisearch_types::milli; | ||||||
| use meilisearch_types::tasks::{Kind, KindWithContent, Status, Task}; | use meilisearch_types::tasks::{Kind, KindWithContent, Status, Task}; | ||||||
| use roaring::RoaringBitmap; | use roaring::RoaringBitmap; | ||||||
| use uuid::Uuid; | use uuid::Uuid; | ||||||
| @@ -39,14 +40,19 @@ impl<'a> Dump<'a> { | |||||||
|         task: TaskDump, |         task: TaskDump, | ||||||
|         content_file: Option<Box<UpdateFile>>, |         content_file: Option<Box<UpdateFile>>, | ||||||
|     ) -> Result<Task> { |     ) -> Result<Task> { | ||||||
|  |         let task_has_no_docs = matches!(task.kind, KindDump::DocumentImport { documents_count, .. } if documents_count == 0); | ||||||
|  |  | ||||||
|         let content_uuid = match content_file { |         let content_uuid = match content_file { | ||||||
|             Some(content_file) if task.status == Status::Enqueued => { |             Some(content_file) if task.status == Status::Enqueued => { | ||||||
|                 let (uuid, mut file) = self.index_scheduler.queue.create_update_file(false)?; |                 let (uuid, file) = self.index_scheduler.queue.create_update_file(false)?; | ||||||
|                 let mut builder = DocumentsBatchBuilder::new(&mut file); |                 let mut writer = io::BufWriter::new(file); | ||||||
|                 for doc in content_file { |                 for doc in content_file { | ||||||
|                     builder.append_json_object(&doc?)?; |                     let doc = doc?; | ||||||
|  |                     serde_json::to_writer(&mut writer, &doc).map_err(|e| { | ||||||
|  |                         Error::from_milli(milli::InternalError::SerdeJson(e).into(), None) | ||||||
|  |                     })?; | ||||||
|                 } |                 } | ||||||
|                 builder.into_inner()?; |                 let file = writer.into_inner().map_err(|e| e.into_error())?; | ||||||
|                 file.persist()?; |                 file.persist()?; | ||||||
|  |  | ||||||
|                 Some(uuid) |                 Some(uuid) | ||||||
| @@ -54,6 +60,12 @@ impl<'a> Dump<'a> { | |||||||
|             // If the task isn't `Enqueued` then just generate a recognisable `Uuid` |             // If the task isn't `Enqueued` then just generate a recognisable `Uuid` | ||||||
|             // in case we try to open it later. |             // in case we try to open it later. | ||||||
|             _ if task.status != Status::Enqueued => Some(Uuid::nil()), |             _ if task.status != Status::Enqueued => Some(Uuid::nil()), | ||||||
|  |             None if task.status == Status::Enqueued && task_has_no_docs => { | ||||||
|  |                 let (uuid, file) = self.index_scheduler.queue.create_update_file(false)?; | ||||||
|  |                 file.persist()?; | ||||||
|  |  | ||||||
|  |                 Some(uuid) | ||||||
|  |             } | ||||||
|             _ => None, |             _ => None, | ||||||
|         }; |         }; | ||||||
|  |  | ||||||
|   | |||||||
| @@ -4,7 +4,6 @@ use std::sync::atomic::Ordering; | |||||||
|  |  | ||||||
| use dump::IndexMetadata; | use dump::IndexMetadata; | ||||||
| use meilisearch_types::milli::constants::RESERVED_VECTORS_FIELD_NAME; | use meilisearch_types::milli::constants::RESERVED_VECTORS_FIELD_NAME; | ||||||
| use meilisearch_types::milli::documents::{obkv_to_object, DocumentsBatchReader}; |  | ||||||
| use meilisearch_types::milli::progress::Progress; | use meilisearch_types::milli::progress::Progress; | ||||||
| use meilisearch_types::milli::vector::parsed_vectors::{ExplicitVectors, VectorOrArrayOfVectors}; | use meilisearch_types::milli::vector::parsed_vectors::{ExplicitVectors, VectorOrArrayOfVectors}; | ||||||
| use meilisearch_types::milli::{self}; | use meilisearch_types::milli::{self}; | ||||||
| @@ -72,6 +71,13 @@ impl IndexScheduler { | |||||||
|                 t.started_at = Some(started_at); |                 t.started_at = Some(started_at); | ||||||
|                 t.finished_at = Some(finished_at); |                 t.finished_at = Some(finished_at); | ||||||
|             } |             } | ||||||
|  |  | ||||||
|  |             // Patch the task to remove the batch uid, because as of v1.12.5 batches are not persisted. | ||||||
|  |             // This prevent from referencing *future* batches not actually associated with the task. | ||||||
|  |             // | ||||||
|  |             // See <https://github.com/meilisearch/meilisearch/issues/5247> for details. | ||||||
|  |             t.batch_uid = None; | ||||||
|  |  | ||||||
|             let mut dump_content_file = dump_tasks.push_task(&t.into())?; |             let mut dump_content_file = dump_tasks.push_task(&t.into())?; | ||||||
|  |  | ||||||
|             // 2.1. Dump the `content_file` associated with the task if there is one and the task is not finished yet. |             // 2.1. Dump the `content_file` associated with the task if there is one and the task is not finished yet. | ||||||
| @@ -82,19 +88,15 @@ impl IndexScheduler { | |||||||
|                 if status == Status::Enqueued { |                 if status == Status::Enqueued { | ||||||
|                     let content_file = self.queue.file_store.get_update(content_file)?; |                     let content_file = self.queue.file_store.get_update(content_file)?; | ||||||
|  |  | ||||||
|                     let reader = DocumentsBatchReader::from_reader(content_file) |                     for document in | ||||||
|                         .map_err(|e| Error::from_milli(e.into(), None))?; |                         serde_json::de::Deserializer::from_reader(content_file).into_iter() | ||||||
|  |  | ||||||
|                     let (mut cursor, documents_batch_index) = reader.into_cursor_and_fields_index(); |  | ||||||
|  |  | ||||||
|                     while let Some(doc) = |  | ||||||
|                         cursor.next_document().map_err(|e| Error::from_milli(e.into(), None))? |  | ||||||
|                     { |                     { | ||||||
|                         dump_content_file.push_document( |                         let document = document.map_err(|e| { | ||||||
|                             &obkv_to_object(doc, &documents_batch_index) |                             Error::from_milli(milli::InternalError::SerdeJson(e).into(), None) | ||||||
|                                 .map_err(|e| Error::from_milli(e, None))?, |                         })?; | ||||||
|                         )?; |                         dump_content_file.push_document(&document)?; | ||||||
|                     } |                     } | ||||||
|  |  | ||||||
|                     dump_content_file.flush()?; |                     dump_content_file.flush()?; | ||||||
|                 } |                 } | ||||||
|             } |             } | ||||||
|   | |||||||
| @@ -105,8 +105,16 @@ tracing-actix-web = "0.7.15" | |||||||
| build-info = { version = "1.7.0", path = "../build-info" } | build-info = { version = "1.7.0", path = "../build-info" } | ||||||
| roaring = "0.10.10" | roaring = "0.10.10" | ||||||
| mopa-maintained = "0.2.3" | mopa-maintained = "0.2.3" | ||||||
| utoipa = { version = "5.3.1", features = ["actix_extras", "macros", "non_strict_integers", "preserve_order", "uuid", "time", "openapi_extensions"] } | utoipa = { version = "5.3.1", features = [ | ||||||
| utoipa-scalar = { version = "0.2.1", optional = true, features = ["actix-web"] } |     "actix_extras", | ||||||
|  |     "macros", | ||||||
|  |     "non_strict_integers", | ||||||
|  |     "preserve_order", | ||||||
|  |     "uuid", | ||||||
|  |     "time", | ||||||
|  |     "openapi_extensions", | ||||||
|  | ] } | ||||||
|  | utoipa-scalar = { version = "0.3.0", optional = true, features = ["actix-web"] } | ||||||
|  |  | ||||||
| [dev-dependencies] | [dev-dependencies] | ||||||
| actix-rt = "2.10.0" | actix-rt = "2.10.0" | ||||||
|   | |||||||
| @@ -1524,3 +1524,57 @@ async fn change_attributes_settings() { | |||||||
|         ) |         ) | ||||||
|         .await; |         .await; | ||||||
| } | } | ||||||
|  |  | ||||||
|  | /// Modifying facets with different casing should work correctly | ||||||
|  | #[actix_rt::test] | ||||||
|  | async fn change_facet_casing() { | ||||||
|  |     let server = Server::new().await; | ||||||
|  |     let index = server.index("test"); | ||||||
|  |  | ||||||
|  |     let (response, code) = index | ||||||
|  |         .update_settings(json!({ | ||||||
|  |             "filterableAttributes": ["dog"], | ||||||
|  |         })) | ||||||
|  |         .await; | ||||||
|  |     assert_eq!("202", code.as_str(), "{:?}", response); | ||||||
|  |     index.wait_task(response.uid()).await; | ||||||
|  |  | ||||||
|  |     let (response, _code) = index | ||||||
|  |         .add_documents( | ||||||
|  |             json!([ | ||||||
|  |                 { | ||||||
|  |                     "id": 1, | ||||||
|  |                     "dog": "Bouvier Bernois" | ||||||
|  |                 } | ||||||
|  |             ]), | ||||||
|  |             None, | ||||||
|  |         ) | ||||||
|  |         .await; | ||||||
|  |     index.wait_task(response.uid()).await; | ||||||
|  |  | ||||||
|  |     let (response, _code) = index | ||||||
|  |         .add_documents( | ||||||
|  |             json!([ | ||||||
|  |                 { | ||||||
|  |                     "id": 1, | ||||||
|  |                     "dog": "bouvier bernois" | ||||||
|  |                 } | ||||||
|  |             ]), | ||||||
|  |             None, | ||||||
|  |         ) | ||||||
|  |         .await; | ||||||
|  |     index.wait_task(response.uid()).await; | ||||||
|  |  | ||||||
|  |     index | ||||||
|  |         .search(json!({ "facets": ["dog"] }), |response, code| { | ||||||
|  |             meili_snap::snapshot!(code, @"200 OK"); | ||||||
|  |             meili_snap::snapshot!(meili_snap::json_string!(response["facetDistribution"]), @r###" | ||||||
|  |             { | ||||||
|  |               "dog": { | ||||||
|  |                 "bouvier bernois": 1 | ||||||
|  |               } | ||||||
|  |             } | ||||||
|  |             "###); | ||||||
|  |         }) | ||||||
|  |         .await; | ||||||
|  | } | ||||||
|   | |||||||
| @@ -88,7 +88,7 @@ fn main() -> anyhow::Result<()> { | |||||||
|     match command { |     match command { | ||||||
|         Command::ClearTaskQueue => clear_task_queue(db_path), |         Command::ClearTaskQueue => clear_task_queue(db_path), | ||||||
|         Command::ExportADump { dump_dir, skip_enqueued_tasks } => { |         Command::ExportADump { dump_dir, skip_enqueued_tasks } => { | ||||||
|             export_a_dump(db_path, dump_dir, skip_enqueued_tasks) |             export_a_dump(db_path, dump_dir, skip_enqueued_tasks, detected_version) | ||||||
|         } |         } | ||||||
|         Command::OfflineUpgrade { target_version } => { |         Command::OfflineUpgrade { target_version } => { | ||||||
|             let target_version = parse_version(&target_version).context("While parsing `--target-version`. Make sure `--target-version` is in the format MAJOR.MINOR.PATCH")?; |             let target_version = parse_version(&target_version).context("While parsing `--target-version`. Make sure `--target-version` is in the format MAJOR.MINOR.PATCH")?; | ||||||
| @@ -187,6 +187,7 @@ fn export_a_dump( | |||||||
|     db_path: PathBuf, |     db_path: PathBuf, | ||||||
|     dump_dir: PathBuf, |     dump_dir: PathBuf, | ||||||
|     skip_enqueued_tasks: bool, |     skip_enqueued_tasks: bool, | ||||||
|  |     detected_version: (String, String, String), | ||||||
| ) -> Result<(), anyhow::Error> { | ) -> Result<(), anyhow::Error> { | ||||||
|     let started_at = OffsetDateTime::now_utc(); |     let started_at = OffsetDateTime::now_utc(); | ||||||
|  |  | ||||||
| @@ -238,15 +239,13 @@ fn export_a_dump( | |||||||
|     if skip_enqueued_tasks { |     if skip_enqueued_tasks { | ||||||
|         eprintln!("Skip dumping the enqueued tasks..."); |         eprintln!("Skip dumping the enqueued tasks..."); | ||||||
|     } else { |     } else { | ||||||
|         eprintln!("Dumping the enqueued tasks..."); |  | ||||||
|  |  | ||||||
|         // 3. dump the tasks |  | ||||||
|         let mut dump_tasks = dump.create_tasks_queue()?; |         let mut dump_tasks = dump.create_tasks_queue()?; | ||||||
|         let mut count = 0; |         let mut count = 0; | ||||||
|         for ret in all_tasks.iter(&rtxn)? { |         for ret in all_tasks.iter(&rtxn)? { | ||||||
|             let (_, t) = ret?; |             let (_, t) = ret?; | ||||||
|             let status = t.status; |             let status = t.status; | ||||||
|             let content_file = t.content_uuid(); |             let content_file = t.content_uuid(); | ||||||
|  |  | ||||||
|             let mut dump_content_file = dump_tasks.push_task(&t.into())?; |             let mut dump_content_file = dump_tasks.push_task(&t.into())?; | ||||||
|  |  | ||||||
|             // 3.1. Dump the `content_file` associated with the task if there is one and the task is not finished yet. |             // 3.1. Dump the `content_file` associated with the task if there is one and the task is not finished yet. | ||||||
| @@ -254,18 +253,39 @@ fn export_a_dump( | |||||||
|                 if status == Status::Enqueued { |                 if status == Status::Enqueued { | ||||||
|                     let content_file = file_store.get_update(content_file_uuid)?; |                     let content_file = file_store.get_update(content_file_uuid)?; | ||||||
|  |  | ||||||
|                     let reader = |                     if ( | ||||||
|                         DocumentsBatchReader::from_reader(content_file).with_context(|| { |                         detected_version.0.as_str(), | ||||||
|                             format!("While reading content file {:?}", content_file_uuid) |                         detected_version.1.as_str(), | ||||||
|                         })?; |                         detected_version.2.as_str(), | ||||||
|  |                     ) < ("1", "12", "0") | ||||||
|                     let (mut cursor, documents_batch_index) = reader.into_cursor_and_fields_index(); |                     { | ||||||
|                     while let Some(doc) = cursor.next_document().with_context(|| { |                         eprintln!("Dumping the enqueued tasks reading them in obkv format..."); | ||||||
|                         format!("While iterating on content file {:?}", content_file_uuid) |                         let reader = | ||||||
|                     })? { |                             DocumentsBatchReader::from_reader(content_file).with_context(|| { | ||||||
|                         dump_content_file |                                 format!("While reading content file {:?}", content_file_uuid) | ||||||
|                             .push_document(&obkv_to_object(doc, &documents_batch_index)?)?; |                             })?; | ||||||
|  |                         let (mut cursor, documents_batch_index) = | ||||||
|  |                             reader.into_cursor_and_fields_index(); | ||||||
|  |                         while let Some(doc) = cursor.next_document().with_context(|| { | ||||||
|  |                             format!("While iterating on content file {:?}", content_file_uuid) | ||||||
|  |                         })? { | ||||||
|  |                             dump_content_file | ||||||
|  |                                 .push_document(&obkv_to_object(doc, &documents_batch_index)?)?; | ||||||
|  |                         } | ||||||
|  |                     } else { | ||||||
|  |                         eprintln!( | ||||||
|  |                             "Dumping the enqueued tasks reading them in JSON stream format..." | ||||||
|  |                         ); | ||||||
|  |                         for document in | ||||||
|  |                             serde_json::de::Deserializer::from_reader(content_file).into_iter() | ||||||
|  |                         { | ||||||
|  |                             let document = document.with_context(|| { | ||||||
|  |                                 format!("While reading content file {:?}", content_file_uuid) | ||||||
|  |                             })?; | ||||||
|  |                             dump_content_file.push_document(&document)?; | ||||||
|  |                         } | ||||||
|                     } |                     } | ||||||
|  |  | ||||||
|                     dump_content_file.flush()?; |                     dump_content_file.flush()?; | ||||||
|                     count += 1; |                     count += 1; | ||||||
|                 } |                 } | ||||||
|   | |||||||
| @@ -20,6 +20,34 @@ pub struct OfflineUpgrade { | |||||||
|  |  | ||||||
| impl OfflineUpgrade { | impl OfflineUpgrade { | ||||||
|     pub fn upgrade(self) -> anyhow::Result<()> { |     pub fn upgrade(self) -> anyhow::Result<()> { | ||||||
|  |         // Adding a version? | ||||||
|  |         // | ||||||
|  |         // 1. Update the LAST_SUPPORTED_UPGRADE_FROM_VERSION and LAST_SUPPORTED_UPGRADE_TO_VERSION. | ||||||
|  |         // 2. Add new version to the upgrade list if necessary | ||||||
|  |         // 3. Use `no_upgrade` as index for versions that are compatible. | ||||||
|  |  | ||||||
|  |         if self.current_version == self.target_version { | ||||||
|  |             println!("Database is already at the target version. Exiting."); | ||||||
|  |             return Ok(()); | ||||||
|  |         } | ||||||
|  |  | ||||||
|  |         if self.current_version > self.target_version { | ||||||
|  |             bail!( | ||||||
|  |                 "Cannot downgrade from {}.{}.{} to {}.{}.{}. Downgrade not supported", | ||||||
|  |                 self.current_version.0, | ||||||
|  |                 self.current_version.1, | ||||||
|  |                 self.current_version.2, | ||||||
|  |                 self.target_version.0, | ||||||
|  |                 self.target_version.1, | ||||||
|  |                 self.target_version.2 | ||||||
|  |             ); | ||||||
|  |         } | ||||||
|  |  | ||||||
|  |         const FIRST_SUPPORTED_UPGRADE_FROM_VERSION: &str = "1.9.0"; | ||||||
|  |         const LAST_SUPPORTED_UPGRADE_FROM_VERSION: &str = "1.12.5"; | ||||||
|  |         const FIRST_SUPPORTED_UPGRADE_TO_VERSION: &str = "1.10.0"; | ||||||
|  |         const LAST_SUPPORTED_UPGRADE_TO_VERSION: &str = "1.12.5"; | ||||||
|  |  | ||||||
|         let upgrade_list = [ |         let upgrade_list = [ | ||||||
|             ( |             ( | ||||||
|                 v1_9_to_v1_10 as fn(&Path, &str, &str, &str) -> Result<(), anyhow::Error>, |                 v1_9_to_v1_10 as fn(&Path, &str, &str, &str) -> Result<(), anyhow::Error>, | ||||||
| @@ -32,6 +60,8 @@ impl OfflineUpgrade { | |||||||
|             (v1_12_to_v1_12_3, "1", "12", "3"), |             (v1_12_to_v1_12_3, "1", "12", "3"), | ||||||
|         ]; |         ]; | ||||||
|  |  | ||||||
|  |         let no_upgrade: usize = upgrade_list.len(); | ||||||
|  |  | ||||||
|         let (current_major, current_minor, current_patch) = &self.current_version; |         let (current_major, current_minor, current_patch) = &self.current_version; | ||||||
|  |  | ||||||
|         let start_at = match ( |         let start_at = match ( | ||||||
| @@ -42,9 +72,12 @@ impl OfflineUpgrade { | |||||||
|             ("1", "9", _) => 0, |             ("1", "9", _) => 0, | ||||||
|             ("1", "10", _) => 1, |             ("1", "10", _) => 1, | ||||||
|             ("1", "11", _) => 2, |             ("1", "11", _) => 2, | ||||||
|             ("1", "12", x) if x == "0" || x == "1" || x == "2" => 3, |             ("1", "12", "0" | "1" | "2") => 3, | ||||||
|  |             ("1", "12", "3" | "4" | "5") => no_upgrade, | ||||||
|             _ => { |             _ => { | ||||||
|                 bail!("Unsupported current version {current_major}.{current_minor}.{current_patch}. Can only upgrade from v1.9 and v1.10") |                 bail!("Unsupported current version {current_major}.{current_minor}.{current_patch}. Can only upgrade from versions in range [{}-{}]", | ||||||
|  |                       FIRST_SUPPORTED_UPGRADE_FROM_VERSION, | ||||||
|  |                       LAST_SUPPORTED_UPGRADE_FROM_VERSION); | ||||||
|             } |             } | ||||||
|         }; |         }; | ||||||
|  |  | ||||||
| @@ -53,18 +86,28 @@ impl OfflineUpgrade { | |||||||
|         let ends_at = match (target_major.as_str(), target_minor.as_str(), target_patch.as_str()) { |         let ends_at = match (target_major.as_str(), target_minor.as_str(), target_patch.as_str()) { | ||||||
|             ("1", "10", _) => 0, |             ("1", "10", _) => 0, | ||||||
|             ("1", "11", _) => 1, |             ("1", "11", _) => 1, | ||||||
|             ("1", "12", x) if x == "0" || x == "1" || x == "2" => 2, |             ("1", "12", "0" | "1" | "2") => 2, | ||||||
|             ("1", "12", "3") => 3, |             ("1", "12", "3" | "4" | "5") => 3, | ||||||
|             (major, _, _) if major.starts_with('v') => { |             (major, _, _) if major.starts_with('v') => { | ||||||
|                 bail!("Target version must not starts with a `v`. Instead of writing `v1.9.0` write `1.9.0` for example.") |                 bail!("Target version must not starts with a `v`. Instead of writing `v1.9.0` write `1.9.0` for example.") | ||||||
|             } |             } | ||||||
|             _ => { |             _ => { | ||||||
|                 bail!("Unsupported target version {target_major}.{target_minor}.{target_patch}. Can only upgrade to v1.10 and v1.11") |                 bail!("Unsupported target version {target_major}.{target_minor}.{target_patch}. Can only upgrade to versions in range [{}-{}]", | ||||||
|  |                       FIRST_SUPPORTED_UPGRADE_TO_VERSION, | ||||||
|  |                       LAST_SUPPORTED_UPGRADE_TO_VERSION); | ||||||
|             } |             } | ||||||
|         }; |         }; | ||||||
|  |  | ||||||
|         println!("Starting the upgrade from {current_major}.{current_minor}.{current_patch} to {target_major}.{target_minor}.{target_patch}"); |         println!("Starting the upgrade from {current_major}.{current_minor}.{current_patch} to {target_major}.{target_minor}.{target_patch}"); | ||||||
|  |  | ||||||
|  |         if start_at == no_upgrade { | ||||||
|  |             println!("No upgrade operation to perform, writing VERSION file"); | ||||||
|  |             create_version_file(&self.db_path, target_major, target_minor, target_patch) | ||||||
|  |                 .context("while writing VERSION file after the upgrade")?; | ||||||
|  |             println!("Success"); | ||||||
|  |             return Ok(()); | ||||||
|  |         } | ||||||
|  |  | ||||||
|         #[allow(clippy::needless_range_loop)] |         #[allow(clippy::needless_range_loop)] | ||||||
|         for index in start_at..=ends_at { |         for index in start_at..=ends_at { | ||||||
|             let (func, major, minor, patch) = upgrade_list[index]; |             let (func, major, minor, patch) = upgrade_list[index]; | ||||||
|   | |||||||
| @@ -219,12 +219,19 @@ impl<'a> FacetDistribution<'a> { | |||||||
|                 let facet_key = StrRefCodec::bytes_decode(facet_key).unwrap(); |                 let facet_key = StrRefCodec::bytes_decode(facet_key).unwrap(); | ||||||
|  |  | ||||||
|                 let key: (FieldId, _, &str) = (field_id, any_docid, facet_key); |                 let key: (FieldId, _, &str) = (field_id, any_docid, facet_key); | ||||||
|                 let original_string = self |                 let optional_original_string = | ||||||
|                     .index |                     self.index.field_id_docid_facet_strings.get(self.rtxn, &key)?; | ||||||
|                     .field_id_docid_facet_strings |  | ||||||
|                     .get(self.rtxn, &key)? |                 let original_string = match optional_original_string { | ||||||
|                     .unwrap() |                     Some(original_string) => original_string.to_owned(), | ||||||
|                     .to_owned(); |                     None => { | ||||||
|  |                         tracing::error!( | ||||||
|  |                             "Missing original facet string. Using the normalized facet {} instead", | ||||||
|  |                             facet_key | ||||||
|  |                         ); | ||||||
|  |                         facet_key.to_string() | ||||||
|  |                     } | ||||||
|  |                 }; | ||||||
|  |  | ||||||
|                 distribution.insert(original_string, nbr_docids); |                 distribution.insert(original_string, nbr_docids); | ||||||
|                 if distribution.len() == self.max_values_per_facet { |                 if distribution.len() == self.max_values_per_facet { | ||||||
|   | |||||||
| @@ -283,42 +283,60 @@ impl FacetedDocidsExtractor { | |||||||
| } | } | ||||||
|  |  | ||||||
| struct DelAddFacetValue<'doc> { | struct DelAddFacetValue<'doc> { | ||||||
|     strings: HashMap<(FieldId, BVec<'doc, u8>), DelAdd, hashbrown::DefaultHashBuilder, &'doc Bump>, |     strings: HashMap< | ||||||
|  |         (FieldId, &'doc str), | ||||||
|  |         Option<BVec<'doc, u8>>, | ||||||
|  |         hashbrown::DefaultHashBuilder, | ||||||
|  |         &'doc Bump, | ||||||
|  |     >, | ||||||
|     f64s: HashMap<(FieldId, BVec<'doc, u8>), DelAdd, hashbrown::DefaultHashBuilder, &'doc Bump>, |     f64s: HashMap<(FieldId, BVec<'doc, u8>), DelAdd, hashbrown::DefaultHashBuilder, &'doc Bump>, | ||||||
|  |     doc_alloc: &'doc Bump, | ||||||
| } | } | ||||||
|  |  | ||||||
| impl<'doc> DelAddFacetValue<'doc> { | impl<'doc> DelAddFacetValue<'doc> { | ||||||
|     fn new(doc_alloc: &'doc Bump) -> Self { |     fn new(doc_alloc: &'doc Bump) -> Self { | ||||||
|         Self { strings: HashMap::new_in(doc_alloc), f64s: HashMap::new_in(doc_alloc) } |         Self { strings: HashMap::new_in(doc_alloc), f64s: HashMap::new_in(doc_alloc), doc_alloc } | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     fn insert_add(&mut self, fid: FieldId, value: BVec<'doc, u8>, kind: FacetKind) { |     fn insert_add(&mut self, fid: FieldId, value: BVec<'doc, u8>, kind: FacetKind) { | ||||||
|         let cache = match kind { |         match kind { | ||||||
|             FacetKind::String => &mut self.strings, |             FacetKind::Number => { | ||||||
|             FacetKind::Number => &mut self.f64s, |                 let key = (fid, value); | ||||||
|             _ => return, |                 if let Some(DelAdd::Deletion) = self.f64s.get(&key) { | ||||||
|         }; |                     self.f64s.remove(&key); | ||||||
|  |                 } else { | ||||||
|         let key = (fid, value); |                     self.f64s.insert(key, DelAdd::Addition); | ||||||
|         if let Some(DelAdd::Deletion) = cache.get(&key) { |                 } | ||||||
|             cache.remove(&key); |             } | ||||||
|         } else { |             FacetKind::String => { | ||||||
|             cache.insert(key, DelAdd::Addition); |                 if let Ok(s) = std::str::from_utf8(&value) { | ||||||
|  |                     let normalized = crate::normalize_facet(s); | ||||||
|  |                     let truncated = self.doc_alloc.alloc_str(truncate_str(&normalized)); | ||||||
|  |                     self.strings.insert((fid, truncated), Some(value)); | ||||||
|  |                 } | ||||||
|  |             } | ||||||
|  |             _ => (), | ||||||
|         } |         } | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     fn insert_del(&mut self, fid: FieldId, value: BVec<'doc, u8>, kind: FacetKind) { |     fn insert_del(&mut self, fid: FieldId, value: BVec<'doc, u8>, kind: FacetKind) { | ||||||
|         let cache = match kind { |         match kind { | ||||||
|             FacetKind::String => &mut self.strings, |             FacetKind::Number => { | ||||||
|             FacetKind::Number => &mut self.f64s, |                 let key = (fid, value); | ||||||
|             _ => return, |                 if let Some(DelAdd::Addition) = self.f64s.get(&key) { | ||||||
|         }; |                     self.f64s.remove(&key); | ||||||
|  |                 } else { | ||||||
|         let key = (fid, value); |                     self.f64s.insert(key, DelAdd::Deletion); | ||||||
|         if let Some(DelAdd::Addition) = cache.get(&key) { |                 } | ||||||
|             cache.remove(&key); |             } | ||||||
|         } else { |             FacetKind::String => { | ||||||
|             cache.insert(key, DelAdd::Deletion); |                 if let Ok(s) = std::str::from_utf8(&value) { | ||||||
|  |                     let normalized = crate::normalize_facet(s); | ||||||
|  |                     let truncated = self.doc_alloc.alloc_str(truncate_str(&normalized)); | ||||||
|  |                     self.strings.insert((fid, truncated), None); | ||||||
|  |                 } | ||||||
|  |             } | ||||||
|  |             _ => (), | ||||||
|         } |         } | ||||||
|     } |     } | ||||||
|  |  | ||||||
| @@ -329,18 +347,14 @@ impl<'doc> DelAddFacetValue<'doc> { | |||||||
|         doc_alloc: &Bump, |         doc_alloc: &Bump, | ||||||
|     ) -> crate::Result<()> { |     ) -> crate::Result<()> { | ||||||
|         let mut buffer = bumpalo::collections::Vec::new_in(doc_alloc); |         let mut buffer = bumpalo::collections::Vec::new_in(doc_alloc); | ||||||
|         for ((fid, value), deladd) in self.strings { |         for ((fid, truncated), value) in self.strings { | ||||||
|             if let Ok(s) = std::str::from_utf8(&value) { |             buffer.clear(); | ||||||
|                 buffer.clear(); |             buffer.extend_from_slice(&fid.to_be_bytes()); | ||||||
|                 buffer.extend_from_slice(&fid.to_be_bytes()); |             buffer.extend_from_slice(&docid.to_be_bytes()); | ||||||
|                 buffer.extend_from_slice(&docid.to_be_bytes()); |             buffer.extend_from_slice(truncated.as_bytes()); | ||||||
|                 let normalized = crate::normalize_facet(s); |             match &value { | ||||||
|                 let truncated = truncate_str(&normalized); |                 Some(value) => sender.write_facet_string(&buffer, value)?, | ||||||
|                 buffer.extend_from_slice(truncated.as_bytes()); |                 None => sender.delete_facet_string(&buffer)?, | ||||||
|                 match deladd { |  | ||||||
|                     DelAdd::Deletion => sender.delete_facet_string(&buffer)?, |  | ||||||
|                     DelAdd::Addition => sender.write_facet_string(&buffer, &value)?, |  | ||||||
|                 } |  | ||||||
|             } |             } | ||||||
|         } |         } | ||||||
|  |  | ||||||
|   | |||||||
| @@ -153,10 +153,12 @@ pub fn write_from_bbqueue( | |||||||
|                     } |                     } | ||||||
|                     (key, None) => match database.delete(wtxn, key) { |                     (key, None) => match database.delete(wtxn, key) { | ||||||
|                         Ok(false) => { |                         Ok(false) => { | ||||||
|                             unreachable!( |                             tracing::error!( | ||||||
|                                 "We tried to delete an unknown key from {database_name}: {:?}", |                                 database_name, | ||||||
|                                 key.as_bstr() |                                 key_bytes = ?key, | ||||||
|                             ) |                                 formatted_key = ?key.as_bstr(), | ||||||
|  |                                 "Attempt to delete an unknown key" | ||||||
|  |                             ); | ||||||
|                         } |                         } | ||||||
|                         Ok(_) => (), |                         Ok(_) => (), | ||||||
|                         Err(error) => { |                         Err(error) => { | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user