mirror of
				https://github.com/meilisearch/meilisearch.git
				synced 2025-10-26 05:26:27 +00:00 
			
		
		
		
	Merge #5351
	
		
			
	
		
	
	
		
	
		
			Some checks failed
		
		
	
	
		
			
				
	
				Run the indexing fuzzer / Setup the action (push) Failing after 2m50s
				
					
					
				
			
		
			
				
	
				Indexing bench (push) / Run and upload benchmarks (push) Has been cancelled
				
					
					
				
			
		
			
				
	
				Benchmarks of indexing (push) / Run and upload benchmarks (push) Has been cancelled
				
					
					
				
			
		
			
				
	
				Benchmarks of search for geo (push) / Run and upload benchmarks (push) Has been cancelled
				
					
					
				
			
		
			
				
	
				Benchmarks of search for songs (push) / Run and upload benchmarks (push) Has been cancelled
				
					
					
				
			
		
			
				
	
				Benchmarks of search for Wikipedia articles (push) / Run and upload benchmarks (push) Has been cancelled
				
					
					
				
			
		
			
				
	
				Look for flaky tests / flaky (push) Failing after 19s
				
					
					
				
			
		
			
				
	
				SDKs tests / define-docker-image (push) Failing after 5s
				
					
					
				
			
		
			
				
	
				SDKs tests / .NET SDK tests (push) Has been skipped
				
					
					
				
			
		
			
				
	
				SDKs tests / Dart SDK tests (push) Has been skipped
				
					
					
				
			
		
			
				
	
				SDKs tests / Go SDK tests (push) Has been skipped
				
					
					
				
			
		
			
				
	
				SDKs tests / Java SDK tests (push) Has been skipped
				
					
					
				
			
		
			
				
	
				SDKs tests / JS SDK tests (push) Has been skipped
				
					
					
				
			
		
			
				
	
				SDKs tests / PHP SDK tests (push) Has been skipped
				
					
					
				
			
		
			
				
	
				SDKs tests / Python SDK tests (push) Has been skipped
				
					
					
				
			
		
			
				
	
				SDKs tests / Ruby SDK tests (push) Has been skipped
				
					
					
				
			
		
			
				
	
				SDKs tests / Rust SDK tests (push) Has been skipped
				
					
					
				
			
		
			
				
	
				SDKs tests / Swift SDK tests (push) Has been skipped
				
					
					
				
			
		
			
				
	
				SDKs tests / meilisearch-js-plugins tests (push) Has been skipped
				
					
					
				
			
		
			
				
	
				SDKs tests / meilisearch-rails tests (push) Has been skipped
				
					
					
				
			
		
			
				
	
				SDKs tests / meilisearch-symfony tests (push) Has been skipped
				
					
					
				
			
		
			
				
	
				Publish binaries to GitHub release / Check the version validity (push) Successful in 9s
				
					
					
				
			
		
			
				
	
				Publish binaries to GitHub release / Publish binary for aarch64 (meilisearch-linux-aarch64, aarch64-unknown-linux-gnu) (push) Failing after 2s
				
					
					
				
			
		
			
				
	
				Publish binaries to GitHub release / Publish binary for Linux (push) Failing after 12s
				
					
					
				
			
		
			
				
	
				Publish binaries to GitHub release / Publish binary for macos-13 (push) Has been cancelled
				
					
					
				
			
		
			
				
	
				Publish binaries to GitHub release / Publish binary for windows-2022 (push) Has been cancelled
				
					
					
				
			
		
			
				
	
				Publish binaries to GitHub release / Publish binary for macOS silicon (meilisearch-macos-apple-silicon, aarch64-apple-darwin) (push) Has been cancelled
				
					
					
				
			
		
			
				
	
				Test suite / Tests on ubuntu-20.04 (push) Failing after 12s
				
					
					
				
			
		
			
				
	
				Test suite / Test with Ollama (push) Failing after 7s
				
					
					
				
			
		
			
				
	
				Test suite / Test disabled tokenization (push) Failing after 11s
				
					
					
				
			
		
			
				
	
				Test suite / Run tests in debug (push) Failing after 11s
				
					
					
				
			
		
			
				
	
				Test suite / Run Clippy (push) Failing after 17s
				
					
					
				
			
		
			
				
	
				Test suite / Run Rustfmt (push) Successful in 1m51s
				
					
					
				
			
		
			
				
	
				Test suite / Tests almost all features (push) Failing after 7m7s
				
					
					
				
			
		
			
				
	
				Test suite / Tests on macos-13 (push) Has been cancelled
				
					
					
				
			
		
			
				
	
				Test suite / Tests on windows-2022 (push) Has been cancelled
				
					
					
				
			
		
		
	
	
				
					
				
			
		
			Some checks failed
		
		
	
	Run the indexing fuzzer / Setup the action (push) Failing after 2m50s
				Indexing bench (push) / Run and upload benchmarks (push) Has been cancelled
				Benchmarks of indexing (push) / Run and upload benchmarks (push) Has been cancelled
				Benchmarks of search for geo (push) / Run and upload benchmarks (push) Has been cancelled
				Benchmarks of search for songs (push) / Run and upload benchmarks (push) Has been cancelled
				Benchmarks of search for Wikipedia articles (push) / Run and upload benchmarks (push) Has been cancelled
				Look for flaky tests / flaky (push) Failing after 19s
				SDKs tests / define-docker-image (push) Failing after 5s
				SDKs tests / .NET SDK tests (push) Has been skipped
				SDKs tests / Dart SDK tests (push) Has been skipped
				SDKs tests / Go SDK tests (push) Has been skipped
				SDKs tests / Java SDK tests (push) Has been skipped
				SDKs tests / JS SDK tests (push) Has been skipped
				SDKs tests / PHP SDK tests (push) Has been skipped
				SDKs tests / Python SDK tests (push) Has been skipped
				SDKs tests / Ruby SDK tests (push) Has been skipped
				SDKs tests / Rust SDK tests (push) Has been skipped
				SDKs tests / Swift SDK tests (push) Has been skipped
				SDKs tests / meilisearch-js-plugins tests (push) Has been skipped
				SDKs tests / meilisearch-rails tests (push) Has been skipped
				SDKs tests / meilisearch-symfony tests (push) Has been skipped
				Publish binaries to GitHub release / Check the version validity (push) Successful in 9s
				Publish binaries to GitHub release / Publish binary for aarch64 (meilisearch-linux-aarch64, aarch64-unknown-linux-gnu) (push) Failing after 2s
				Publish binaries to GitHub release / Publish binary for Linux (push) Failing after 12s
				Publish binaries to GitHub release / Publish binary for macos-13 (push) Has been cancelled
				Publish binaries to GitHub release / Publish binary for windows-2022 (push) Has been cancelled
				Publish binaries to GitHub release / Publish binary for macOS silicon (meilisearch-macos-apple-silicon, aarch64-apple-darwin) (push) Has been cancelled
				Test suite / Tests on ubuntu-20.04 (push) Failing after 12s
				Test suite / Test with Ollama (push) Failing after 7s
				Test suite / Test disabled tokenization (push) Failing after 11s
				Test suite / Run tests in debug (push) Failing after 11s
				Test suite / Run Clippy (push) Failing after 17s
				Test suite / Run Rustfmt (push) Successful in 1m51s
				Test suite / Tests almost all features (push) Failing after 7m7s
				Test suite / Tests on macos-13 (push) Has been cancelled
				Test suite / Tests on windows-2022 (push) Has been cancelled
				5351: Bring back v1.13.0 changes into main r=irevoire a=Kerollmops This PR brings back the changes made in v1.13 into the main branch. Co-authored-by: ManyTheFish <many@meilisearch.com> Co-authored-by: Kerollmops <clement@meilisearch.com> Co-authored-by: Louis Dureuil <louis@meilisearch.com> Co-authored-by: Clémentine <clementine@meilisearch.com> Co-authored-by: meili-bors[bot] <89034592+meili-bors[bot]@users.noreply.github.com> Co-authored-by: Tamo <tamo@meilisearch.com> Co-authored-by: Clément Renault <clement@meilisearch.com>
This commit is contained in:
		
							
								
								
									
										2
									
								
								.github/workflows/sdks-tests.yml
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										2
									
								
								.github/workflows/sdks-tests.yml
									
									
									
									
										vendored
									
									
								
							| @@ -52,7 +52,7 @@ jobs: | |||||||
|       - name: Setup .NET Core |       - name: Setup .NET Core | ||||||
|         uses: actions/setup-dotnet@v4 |         uses: actions/setup-dotnet@v4 | ||||||
|         with: |         with: | ||||||
|           dotnet-version: "6.0.x" |           dotnet-version: "8.0.x" | ||||||
|       - name: Install dependencies |       - name: Install dependencies | ||||||
|         run: dotnet restore |         run: dotnet restore | ||||||
|       - name: Build |       - name: Build | ||||||
|   | |||||||
| @@ -10,8 +10,10 @@ dump | |||||||
| ├── instance-uid.uuid | ├── instance-uid.uuid | ||||||
| ├── keys.jsonl | ├── keys.jsonl | ||||||
| ├── metadata.json | ├── metadata.json | ||||||
| └── tasks | ├── tasks | ||||||
|     ├── update_files | │   ├── update_files | ||||||
|     │   └── [task_id].jsonl | │   │   └── [task_id].jsonl | ||||||
|  | │   └── queue.jsonl | ||||||
|  | └── batches | ||||||
|     └── queue.jsonl |     └── queue.jsonl | ||||||
| ``` | ``` | ||||||
| @@ -228,14 +228,16 @@ pub(crate) mod test { | |||||||
|  |  | ||||||
|     use big_s::S; |     use big_s::S; | ||||||
|     use maplit::{btreemap, btreeset}; |     use maplit::{btreemap, btreeset}; | ||||||
|  |     use meilisearch_types::batches::{Batch, BatchEnqueuedAt, BatchStats}; | ||||||
|     use meilisearch_types::facet_values_sort::FacetValuesSort; |     use meilisearch_types::facet_values_sort::FacetValuesSort; | ||||||
|     use meilisearch_types::features::RuntimeTogglableFeatures; |     use meilisearch_types::features::{Network, Remote, RuntimeTogglableFeatures}; | ||||||
|     use meilisearch_types::index_uid_pattern::IndexUidPattern; |     use meilisearch_types::index_uid_pattern::IndexUidPattern; | ||||||
|     use meilisearch_types::keys::{Action, Key}; |     use meilisearch_types::keys::{Action, Key}; | ||||||
|     use meilisearch_types::milli; |     use meilisearch_types::milli; | ||||||
|     use meilisearch_types::milli::update::Setting; |     use meilisearch_types::milli::update::Setting; | ||||||
|     use meilisearch_types::settings::{Checked, FacetingSettings, Settings}; |     use meilisearch_types::settings::{Checked, FacetingSettings, Settings}; | ||||||
|     use meilisearch_types::tasks::{Details, Status}; |     use meilisearch_types::task_view::DetailsView; | ||||||
|  |     use meilisearch_types::tasks::{Details, Kind, Status}; | ||||||
|     use serde_json::{json, Map, Value}; |     use serde_json::{json, Map, Value}; | ||||||
|     use time::macros::datetime; |     use time::macros::datetime; | ||||||
|     use uuid::Uuid; |     use uuid::Uuid; | ||||||
| @@ -305,6 +307,30 @@ pub(crate) mod test { | |||||||
|         settings.check() |         settings.check() | ||||||
|     } |     } | ||||||
|  |  | ||||||
|  |     pub fn create_test_batches() -> Vec<Batch> { | ||||||
|  |         vec![Batch { | ||||||
|  |             uid: 0, | ||||||
|  |             details: DetailsView { | ||||||
|  |                 received_documents: Some(12), | ||||||
|  |                 indexed_documents: Some(Some(10)), | ||||||
|  |                 ..DetailsView::default() | ||||||
|  |             }, | ||||||
|  |             progress: None, | ||||||
|  |             stats: BatchStats { | ||||||
|  |                 total_nb_tasks: 1, | ||||||
|  |                 status: maplit::btreemap! { Status::Succeeded => 1 }, | ||||||
|  |                 types: maplit::btreemap! { Kind::DocumentAdditionOrUpdate => 1 }, | ||||||
|  |                 index_uids: maplit::btreemap! { "doggo".to_string() => 1 }, | ||||||
|  |             }, | ||||||
|  |             enqueued_at: Some(BatchEnqueuedAt { | ||||||
|  |                 earliest: datetime!(2022-11-11 0:00 UTC), | ||||||
|  |                 oldest: datetime!(2022-11-11 0:00 UTC), | ||||||
|  |             }), | ||||||
|  |             started_at: datetime!(2022-11-20 0:00 UTC), | ||||||
|  |             finished_at: Some(datetime!(2022-11-21 0:00 UTC)), | ||||||
|  |         }] | ||||||
|  |     } | ||||||
|  |  | ||||||
|     pub fn create_test_tasks() -> Vec<(TaskDump, Option<Vec<Document>>)> { |     pub fn create_test_tasks() -> Vec<(TaskDump, Option<Vec<Document>>)> { | ||||||
|         vec![ |         vec![ | ||||||
|             ( |             ( | ||||||
| @@ -427,6 +453,15 @@ pub(crate) mod test { | |||||||
|         index.flush().unwrap(); |         index.flush().unwrap(); | ||||||
|         index.settings(&settings).unwrap(); |         index.settings(&settings).unwrap(); | ||||||
|  |  | ||||||
|  |         // ========== pushing the batch queue | ||||||
|  |         let batches = create_test_batches(); | ||||||
|  |  | ||||||
|  |         let mut batch_queue = dump.create_batches_queue().unwrap(); | ||||||
|  |         for batch in &batches { | ||||||
|  |             batch_queue.push_batch(batch).unwrap(); | ||||||
|  |         } | ||||||
|  |         batch_queue.flush().unwrap(); | ||||||
|  |  | ||||||
|         // ========== pushing the task queue |         // ========== pushing the task queue | ||||||
|         let tasks = create_test_tasks(); |         let tasks = create_test_tasks(); | ||||||
|  |  | ||||||
| @@ -455,6 +490,10 @@ pub(crate) mod test { | |||||||
|  |  | ||||||
|         dump.create_experimental_features(features).unwrap(); |         dump.create_experimental_features(features).unwrap(); | ||||||
|  |  | ||||||
|  |         // ========== network | ||||||
|  |         let network = create_test_network(); | ||||||
|  |         dump.create_network(network).unwrap(); | ||||||
|  |  | ||||||
|         // create the dump |         // create the dump | ||||||
|         let mut file = tempfile::tempfile().unwrap(); |         let mut file = tempfile::tempfile().unwrap(); | ||||||
|         dump.persist_to(&mut file).unwrap(); |         dump.persist_to(&mut file).unwrap(); | ||||||
| @@ -467,6 +506,13 @@ pub(crate) mod test { | |||||||
|         RuntimeTogglableFeatures::default() |         RuntimeTogglableFeatures::default() | ||||||
|     } |     } | ||||||
|  |  | ||||||
|  |     fn create_test_network() -> Network { | ||||||
|  |         Network { | ||||||
|  |             local: Some("myself".to_string()), | ||||||
|  |             remotes: maplit::btreemap! {"other".to_string() => Remote { url: "http://test".to_string(), search_api_key: Some("apiKey".to_string()) }}, | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  |  | ||||||
|     #[test] |     #[test] | ||||||
|     fn test_creating_and_read_dump() { |     fn test_creating_and_read_dump() { | ||||||
|         let mut file = create_test_dump(); |         let mut file = create_test_dump(); | ||||||
| @@ -515,5 +561,9 @@ pub(crate) mod test { | |||||||
|         // ==== checking the features |         // ==== checking the features | ||||||
|         let expected = create_test_features(); |         let expected = create_test_features(); | ||||||
|         assert_eq!(dump.features().unwrap().unwrap(), expected); |         assert_eq!(dump.features().unwrap().unwrap(), expected); | ||||||
|  |  | ||||||
|  |         // ==== checking the network | ||||||
|  |         let expected = create_test_network(); | ||||||
|  |         assert_eq!(&expected, dump.network().unwrap().unwrap()); | ||||||
|     } |     } | ||||||
| } | } | ||||||
|   | |||||||
| @@ -196,6 +196,10 @@ impl CompatV5ToV6 { | |||||||
|     pub fn features(&self) -> Result<Option<v6::RuntimeTogglableFeatures>> { |     pub fn features(&self) -> Result<Option<v6::RuntimeTogglableFeatures>> { | ||||||
|         Ok(None) |         Ok(None) | ||||||
|     } |     } | ||||||
|  |  | ||||||
|  |     pub fn network(&self) -> Result<Option<&v6::Network>> { | ||||||
|  |         Ok(None) | ||||||
|  |     } | ||||||
| } | } | ||||||
|  |  | ||||||
| pub enum CompatIndexV5ToV6 { | pub enum CompatIndexV5ToV6 { | ||||||
|   | |||||||
| @@ -23,6 +23,7 @@ mod v6; | |||||||
| pub type Document = serde_json::Map<String, serde_json::Value>; | pub type Document = serde_json::Map<String, serde_json::Value>; | ||||||
| pub type UpdateFile = dyn Iterator<Item = Result<Document>>; | pub type UpdateFile = dyn Iterator<Item = Result<Document>>; | ||||||
|  |  | ||||||
|  | #[allow(clippy::large_enum_variant)] | ||||||
| pub enum DumpReader { | pub enum DumpReader { | ||||||
|     Current(V6Reader), |     Current(V6Reader), | ||||||
|     Compat(CompatV5ToV6), |     Compat(CompatV5ToV6), | ||||||
| @@ -101,6 +102,13 @@ impl DumpReader { | |||||||
|         } |         } | ||||||
|     } |     } | ||||||
|  |  | ||||||
|  |     pub fn batches(&mut self) -> Result<Box<dyn Iterator<Item = Result<v6::Batch>> + '_>> { | ||||||
|  |         match self { | ||||||
|  |             DumpReader::Current(current) => Ok(current.batches()), | ||||||
|  |             DumpReader::Compat(_compat) => Ok(Box::new(std::iter::empty())), | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  |  | ||||||
|     pub fn keys(&mut self) -> Result<Box<dyn Iterator<Item = Result<v6::Key>> + '_>> { |     pub fn keys(&mut self) -> Result<Box<dyn Iterator<Item = Result<v6::Key>> + '_>> { | ||||||
|         match self { |         match self { | ||||||
|             DumpReader::Current(current) => Ok(current.keys()), |             DumpReader::Current(current) => Ok(current.keys()), | ||||||
| @@ -114,6 +122,13 @@ impl DumpReader { | |||||||
|             DumpReader::Compat(compat) => compat.features(), |             DumpReader::Compat(compat) => compat.features(), | ||||||
|         } |         } | ||||||
|     } |     } | ||||||
|  |  | ||||||
|  |     pub fn network(&self) -> Result<Option<&v6::Network>> { | ||||||
|  |         match self { | ||||||
|  |             DumpReader::Current(current) => Ok(current.network()), | ||||||
|  |             DumpReader::Compat(compat) => compat.network(), | ||||||
|  |         } | ||||||
|  |     } | ||||||
| } | } | ||||||
|  |  | ||||||
| impl From<V6Reader> for DumpReader { | impl From<V6Reader> for DumpReader { | ||||||
| @@ -219,6 +234,10 @@ pub(crate) mod test { | |||||||
|         insta::assert_snapshot!(dump.date().unwrap(), @"2024-05-16 15:51:34.151044 +00:00:00"); |         insta::assert_snapshot!(dump.date().unwrap(), @"2024-05-16 15:51:34.151044 +00:00:00"); | ||||||
|         insta::assert_debug_snapshot!(dump.instance_uid().unwrap(), @"None"); |         insta::assert_debug_snapshot!(dump.instance_uid().unwrap(), @"None"); | ||||||
|  |  | ||||||
|  |         // batches didn't exists at the time | ||||||
|  |         let batches = dump.batches().unwrap().collect::<Result<Vec<_>>>().unwrap(); | ||||||
|  |         meili_snap::snapshot!(meili_snap::json_string!(batches), @"[]"); | ||||||
|  |  | ||||||
|         // tasks |         // tasks | ||||||
|         let tasks = dump.tasks().unwrap().collect::<Result<Vec<_>>>().unwrap(); |         let tasks = dump.tasks().unwrap().collect::<Result<Vec<_>>>().unwrap(); | ||||||
|         let (tasks, update_files): (Vec<_>, Vec<_>) = tasks.into_iter().unzip(); |         let (tasks, update_files): (Vec<_>, Vec<_>) = tasks.into_iter().unzip(); | ||||||
| @@ -328,6 +347,7 @@ pub(crate) mod test { | |||||||
|         } |         } | ||||||
|  |  | ||||||
|         assert_eq!(dump.features().unwrap().unwrap(), RuntimeTogglableFeatures::default()); |         assert_eq!(dump.features().unwrap().unwrap(), RuntimeTogglableFeatures::default()); | ||||||
|  |         assert_eq!(dump.network().unwrap(), None); | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     #[test] |     #[test] | ||||||
| @@ -339,6 +359,10 @@ pub(crate) mod test { | |||||||
|         insta::assert_snapshot!(dump.date().unwrap(), @"2023-07-06 7:10:27.21958 +00:00:00"); |         insta::assert_snapshot!(dump.date().unwrap(), @"2023-07-06 7:10:27.21958 +00:00:00"); | ||||||
|         insta::assert_debug_snapshot!(dump.instance_uid().unwrap(), @"None"); |         insta::assert_debug_snapshot!(dump.instance_uid().unwrap(), @"None"); | ||||||
|  |  | ||||||
|  |         // batches didn't exists at the time | ||||||
|  |         let batches = dump.batches().unwrap().collect::<Result<Vec<_>>>().unwrap(); | ||||||
|  |         meili_snap::snapshot!(meili_snap::json_string!(batches), @"[]"); | ||||||
|  |  | ||||||
|         // tasks |         // tasks | ||||||
|         let tasks = dump.tasks().unwrap().collect::<Result<Vec<_>>>().unwrap(); |         let tasks = dump.tasks().unwrap().collect::<Result<Vec<_>>>().unwrap(); | ||||||
|         let (tasks, update_files): (Vec<_>, Vec<_>) = tasks.into_iter().unzip(); |         let (tasks, update_files): (Vec<_>, Vec<_>) = tasks.into_iter().unzip(); | ||||||
| @@ -373,6 +397,27 @@ pub(crate) mod test { | |||||||
|         assert_eq!(dump.features().unwrap().unwrap(), RuntimeTogglableFeatures::default()); |         assert_eq!(dump.features().unwrap().unwrap(), RuntimeTogglableFeatures::default()); | ||||||
|     } |     } | ||||||
|  |  | ||||||
|  |     #[test] | ||||||
|  |     fn import_dump_v6_network() { | ||||||
|  |         let dump = File::open("tests/assets/v6-with-network.dump").unwrap(); | ||||||
|  |         let dump = DumpReader::open(dump).unwrap(); | ||||||
|  |  | ||||||
|  |         // top level infos | ||||||
|  |         insta::assert_snapshot!(dump.date().unwrap(), @"2025-01-29 15:45:32.738676 +00:00:00"); | ||||||
|  |         insta::assert_debug_snapshot!(dump.instance_uid().unwrap(), @"None"); | ||||||
|  |  | ||||||
|  |         // network | ||||||
|  |  | ||||||
|  |         let network = dump.network().unwrap().unwrap(); | ||||||
|  |         insta::assert_snapshot!(network.local.as_ref().unwrap(), @"ms-0"); | ||||||
|  |         insta::assert_snapshot!(network.remotes.get("ms-0").as_ref().unwrap().url, @"http://localhost:7700"); | ||||||
|  |         insta::assert_snapshot!(network.remotes.get("ms-0").as_ref().unwrap().search_api_key.is_none(), @"true"); | ||||||
|  |         insta::assert_snapshot!(network.remotes.get("ms-1").as_ref().unwrap().url, @"http://localhost:7701"); | ||||||
|  |         insta::assert_snapshot!(network.remotes.get("ms-1").as_ref().unwrap().search_api_key.is_none(), @"true"); | ||||||
|  |         insta::assert_snapshot!(network.remotes.get("ms-2").as_ref().unwrap().url, @"http://ms-5679.example.meilisearch.io"); | ||||||
|  |         insta::assert_snapshot!(network.remotes.get("ms-2").as_ref().unwrap().search_api_key.as_ref().unwrap(), @"foo"); | ||||||
|  |     } | ||||||
|  |  | ||||||
|     #[test] |     #[test] | ||||||
|     fn import_dump_v5() { |     fn import_dump_v5() { | ||||||
|         let dump = File::open("tests/assets/v5.dump").unwrap(); |         let dump = File::open("tests/assets/v5.dump").unwrap(); | ||||||
| @@ -382,6 +427,10 @@ pub(crate) mod test { | |||||||
|         insta::assert_snapshot!(dump.date().unwrap(), @"2022-10-04 15:55:10.344982459 +00:00:00"); |         insta::assert_snapshot!(dump.date().unwrap(), @"2022-10-04 15:55:10.344982459 +00:00:00"); | ||||||
|         insta::assert_snapshot!(dump.instance_uid().unwrap().unwrap(), @"9e15e977-f2ae-4761-943f-1eaf75fd736d"); |         insta::assert_snapshot!(dump.instance_uid().unwrap().unwrap(), @"9e15e977-f2ae-4761-943f-1eaf75fd736d"); | ||||||
|  |  | ||||||
|  |         // batches didn't exists at the time | ||||||
|  |         let batches = dump.batches().unwrap().collect::<Result<Vec<_>>>().unwrap(); | ||||||
|  |         meili_snap::snapshot!(meili_snap::json_string!(batches), @"[]"); | ||||||
|  |  | ||||||
|         // tasks |         // tasks | ||||||
|         let tasks = dump.tasks().unwrap().collect::<Result<Vec<_>>>().unwrap(); |         let tasks = dump.tasks().unwrap().collect::<Result<Vec<_>>>().unwrap(); | ||||||
|         let (tasks, update_files): (Vec<_>, Vec<_>) = tasks.into_iter().unzip(); |         let (tasks, update_files): (Vec<_>, Vec<_>) = tasks.into_iter().unzip(); | ||||||
| @@ -462,6 +511,10 @@ pub(crate) mod test { | |||||||
|         insta::assert_snapshot!(dump.date().unwrap(), @"2022-10-06 12:53:49.131989609 +00:00:00"); |         insta::assert_snapshot!(dump.date().unwrap(), @"2022-10-06 12:53:49.131989609 +00:00:00"); | ||||||
|         insta::assert_snapshot!(dump.instance_uid().unwrap().unwrap(), @"9e15e977-f2ae-4761-943f-1eaf75fd736d"); |         insta::assert_snapshot!(dump.instance_uid().unwrap().unwrap(), @"9e15e977-f2ae-4761-943f-1eaf75fd736d"); | ||||||
|  |  | ||||||
|  |         // batches didn't exists at the time | ||||||
|  |         let batches = dump.batches().unwrap().collect::<Result<Vec<_>>>().unwrap(); | ||||||
|  |         meili_snap::snapshot!(meili_snap::json_string!(batches), @"[]"); | ||||||
|  |  | ||||||
|         // tasks |         // tasks | ||||||
|         let tasks = dump.tasks().unwrap().collect::<Result<Vec<_>>>().unwrap(); |         let tasks = dump.tasks().unwrap().collect::<Result<Vec<_>>>().unwrap(); | ||||||
|         let (tasks, update_files): (Vec<_>, Vec<_>) = tasks.into_iter().unzip(); |         let (tasks, update_files): (Vec<_>, Vec<_>) = tasks.into_iter().unzip(); | ||||||
| @@ -539,6 +592,10 @@ pub(crate) mod test { | |||||||
|         insta::assert_snapshot!(dump.date().unwrap(), @"2022-10-07 11:39:03.709153554 +00:00:00"); |         insta::assert_snapshot!(dump.date().unwrap(), @"2022-10-07 11:39:03.709153554 +00:00:00"); | ||||||
|         assert_eq!(dump.instance_uid().unwrap(), None); |         assert_eq!(dump.instance_uid().unwrap(), None); | ||||||
|  |  | ||||||
|  |         // batches didn't exists at the time | ||||||
|  |         let batches = dump.batches().unwrap().collect::<Result<Vec<_>>>().unwrap(); | ||||||
|  |         meili_snap::snapshot!(meili_snap::json_string!(batches), @"[]"); | ||||||
|  |  | ||||||
|         // tasks |         // tasks | ||||||
|         let tasks = dump.tasks().unwrap().collect::<Result<Vec<_>>>().unwrap(); |         let tasks = dump.tasks().unwrap().collect::<Result<Vec<_>>>().unwrap(); | ||||||
|         let (tasks, update_files): (Vec<_>, Vec<_>) = tasks.into_iter().unzip(); |         let (tasks, update_files): (Vec<_>, Vec<_>) = tasks.into_iter().unzip(); | ||||||
| @@ -632,6 +689,10 @@ pub(crate) mod test { | |||||||
|         insta::assert_snapshot!(dump.date().unwrap(), @"2022-10-09 20:27:59.904096267 +00:00:00"); |         insta::assert_snapshot!(dump.date().unwrap(), @"2022-10-09 20:27:59.904096267 +00:00:00"); | ||||||
|         assert_eq!(dump.instance_uid().unwrap(), None); |         assert_eq!(dump.instance_uid().unwrap(), None); | ||||||
|  |  | ||||||
|  |         // batches didn't exists at the time | ||||||
|  |         let batches = dump.batches().unwrap().collect::<Result<Vec<_>>>().unwrap(); | ||||||
|  |         meili_snap::snapshot!(meili_snap::json_string!(batches), @"[]"); | ||||||
|  |  | ||||||
|         // tasks |         // tasks | ||||||
|         let tasks = dump.tasks().unwrap().collect::<Result<Vec<_>>>().unwrap(); |         let tasks = dump.tasks().unwrap().collect::<Result<Vec<_>>>().unwrap(); | ||||||
|         let (tasks, update_files): (Vec<_>, Vec<_>) = tasks.into_iter().unzip(); |         let (tasks, update_files): (Vec<_>, Vec<_>) = tasks.into_iter().unzip(); | ||||||
| @@ -725,6 +786,10 @@ pub(crate) mod test { | |||||||
|         insta::assert_snapshot!(dump.date().unwrap(), @"2023-01-30 16:26:09.247261 +00:00:00"); |         insta::assert_snapshot!(dump.date().unwrap(), @"2023-01-30 16:26:09.247261 +00:00:00"); | ||||||
|         assert_eq!(dump.instance_uid().unwrap(), None); |         assert_eq!(dump.instance_uid().unwrap(), None); | ||||||
|  |  | ||||||
|  |         // batches didn't exists at the time | ||||||
|  |         let batches = dump.batches().unwrap().collect::<Result<Vec<_>>>().unwrap(); | ||||||
|  |         meili_snap::snapshot!(meili_snap::json_string!(batches), @"[]"); | ||||||
|  |  | ||||||
|         // tasks |         // tasks | ||||||
|         let tasks = dump.tasks().unwrap().collect::<Result<Vec<_>>>().unwrap(); |         let tasks = dump.tasks().unwrap().collect::<Result<Vec<_>>>().unwrap(); | ||||||
|         let (tasks, update_files): (Vec<_>, Vec<_>) = tasks.into_iter().unzip(); |         let (tasks, update_files): (Vec<_>, Vec<_>) = tasks.into_iter().unzip(); | ||||||
| @@ -801,6 +866,10 @@ pub(crate) mod test { | |||||||
|         assert_eq!(dump.date(), None); |         assert_eq!(dump.date(), None); | ||||||
|         assert_eq!(dump.instance_uid().unwrap(), None); |         assert_eq!(dump.instance_uid().unwrap(), None); | ||||||
|  |  | ||||||
|  |         // batches didn't exists at the time | ||||||
|  |         let batches = dump.batches().unwrap().collect::<Result<Vec<_>>>().unwrap(); | ||||||
|  |         meili_snap::snapshot!(meili_snap::json_string!(batches), @"[]"); | ||||||
|  |  | ||||||
|         // tasks |         // tasks | ||||||
|         let tasks = dump.tasks().unwrap().collect::<Result<Vec<_>>>().unwrap(); |         let tasks = dump.tasks().unwrap().collect::<Result<Vec<_>>>().unwrap(); | ||||||
|         let (tasks, update_files): (Vec<_>, Vec<_>) = tasks.into_iter().unzip(); |         let (tasks, update_files): (Vec<_>, Vec<_>) = tasks.into_iter().unzip(); | ||||||
|   | |||||||
| @@ -18,8 +18,10 @@ pub type Checked = meilisearch_types::settings::Checked; | |||||||
| pub type Unchecked = meilisearch_types::settings::Unchecked; | pub type Unchecked = meilisearch_types::settings::Unchecked; | ||||||
|  |  | ||||||
| pub type Task = crate::TaskDump; | pub type Task = crate::TaskDump; | ||||||
|  | pub type Batch = meilisearch_types::batches::Batch; | ||||||
| pub type Key = meilisearch_types::keys::Key; | pub type Key = meilisearch_types::keys::Key; | ||||||
| pub type RuntimeTogglableFeatures = meilisearch_types::features::RuntimeTogglableFeatures; | pub type RuntimeTogglableFeatures = meilisearch_types::features::RuntimeTogglableFeatures; | ||||||
|  | pub type Network = meilisearch_types::features::Network; | ||||||
|  |  | ||||||
| // ===== Other types to clarify the code of the compat module | // ===== Other types to clarify the code of the compat module | ||||||
| // everything related to the tasks | // everything related to the tasks | ||||||
| @@ -48,8 +50,10 @@ pub struct V6Reader { | |||||||
|     instance_uid: Option<Uuid>, |     instance_uid: Option<Uuid>, | ||||||
|     metadata: Metadata, |     metadata: Metadata, | ||||||
|     tasks: BufReader<File>, |     tasks: BufReader<File>, | ||||||
|  |     batches: Option<BufReader<File>>, | ||||||
|     keys: BufReader<File>, |     keys: BufReader<File>, | ||||||
|     features: Option<RuntimeTogglableFeatures>, |     features: Option<RuntimeTogglableFeatures>, | ||||||
|  |     network: Option<Network>, | ||||||
| } | } | ||||||
|  |  | ||||||
| impl V6Reader { | impl V6Reader { | ||||||
| @@ -77,13 +81,38 @@ impl V6Reader { | |||||||
|         } else { |         } else { | ||||||
|             None |             None | ||||||
|         }; |         }; | ||||||
|  |         let batches = match File::open(dump.path().join("batches").join("queue.jsonl")) { | ||||||
|  |             Ok(file) => Some(BufReader::new(file)), | ||||||
|  |             // The batch file was only introduced during the v1.13, anything prior to that won't have batches | ||||||
|  |             Err(err) if err.kind() == ErrorKind::NotFound => None, | ||||||
|  |             Err(e) => return Err(e.into()), | ||||||
|  |         }; | ||||||
|  |  | ||||||
|  |         let network_file = match fs::read(dump.path().join("network.json")) { | ||||||
|  |             Ok(network_file) => Some(network_file), | ||||||
|  |             Err(error) => match error.kind() { | ||||||
|  |                 // Allows the file to be missing, this will only result in all experimental features disabled. | ||||||
|  |                 ErrorKind::NotFound => { | ||||||
|  |                     debug!("`network.json` not found in dump"); | ||||||
|  |                     None | ||||||
|  |                 } | ||||||
|  |                 _ => return Err(error.into()), | ||||||
|  |             }, | ||||||
|  |         }; | ||||||
|  |         let network = if let Some(network_file) = network_file { | ||||||
|  |             Some(serde_json::from_reader(&*network_file)?) | ||||||
|  |         } else { | ||||||
|  |             None | ||||||
|  |         }; | ||||||
|  |  | ||||||
|         Ok(V6Reader { |         Ok(V6Reader { | ||||||
|             metadata: serde_json::from_reader(&*meta_file)?, |             metadata: serde_json::from_reader(&*meta_file)?, | ||||||
|             instance_uid, |             instance_uid, | ||||||
|             tasks: BufReader::new(File::open(dump.path().join("tasks").join("queue.jsonl"))?), |             tasks: BufReader::new(File::open(dump.path().join("tasks").join("queue.jsonl"))?), | ||||||
|  |             batches, | ||||||
|             keys: BufReader::new(File::open(dump.path().join("keys.jsonl"))?), |             keys: BufReader::new(File::open(dump.path().join("keys.jsonl"))?), | ||||||
|             features, |             features, | ||||||
|  |             network, | ||||||
|             dump, |             dump, | ||||||
|         }) |         }) | ||||||
|     } |     } | ||||||
| @@ -124,7 +153,7 @@ impl V6Reader { | |||||||
|         &mut self, |         &mut self, | ||||||
|     ) -> Box<dyn Iterator<Item = Result<(Task, Option<Box<super::UpdateFile>>)>> + '_> { |     ) -> Box<dyn Iterator<Item = Result<(Task, Option<Box<super::UpdateFile>>)>> + '_> { | ||||||
|         Box::new((&mut self.tasks).lines().map(|line| -> Result<_> { |         Box::new((&mut self.tasks).lines().map(|line| -> Result<_> { | ||||||
|             let task: Task = serde_json::from_str(&line?).unwrap(); |             let task: Task = serde_json::from_str(&line?)?; | ||||||
|  |  | ||||||
|             let update_file_path = self |             let update_file_path = self | ||||||
|                 .dump |                 .dump | ||||||
| @@ -136,8 +165,7 @@ impl V6Reader { | |||||||
|             if update_file_path.exists() { |             if update_file_path.exists() { | ||||||
|                 Ok(( |                 Ok(( | ||||||
|                     task, |                     task, | ||||||
|                     Some(Box::new(UpdateFile::new(&update_file_path).unwrap()) |                     Some(Box::new(UpdateFile::new(&update_file_path)?) as Box<super::UpdateFile>), | ||||||
|                         as Box<super::UpdateFile>), |  | ||||||
|                 )) |                 )) | ||||||
|             } else { |             } else { | ||||||
|                 Ok((task, None)) |                 Ok((task, None)) | ||||||
| @@ -145,6 +173,16 @@ impl V6Reader { | |||||||
|         })) |         })) | ||||||
|     } |     } | ||||||
|  |  | ||||||
|  |     pub fn batches(&mut self) -> Box<dyn Iterator<Item = Result<Batch>> + '_> { | ||||||
|  |         match self.batches.as_mut() { | ||||||
|  |             Some(batches) => Box::new((batches).lines().map(|line| -> Result<_> { | ||||||
|  |                 let batch = serde_json::from_str(&line?)?; | ||||||
|  |                 Ok(batch) | ||||||
|  |             })), | ||||||
|  |             None => Box::new(std::iter::empty()) as Box<dyn Iterator<Item = Result<Batch>> + '_>, | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  |  | ||||||
|     pub fn keys(&mut self) -> Box<dyn Iterator<Item = Result<Key>> + '_> { |     pub fn keys(&mut self) -> Box<dyn Iterator<Item = Result<Key>> + '_> { | ||||||
|         Box::new( |         Box::new( | ||||||
|             (&mut self.keys).lines().map(|line| -> Result<_> { Ok(serde_json::from_str(&line?)?) }), |             (&mut self.keys).lines().map(|line| -> Result<_> { Ok(serde_json::from_str(&line?)?) }), | ||||||
| @@ -154,6 +192,10 @@ impl V6Reader { | |||||||
|     pub fn features(&self) -> Option<RuntimeTogglableFeatures> { |     pub fn features(&self) -> Option<RuntimeTogglableFeatures> { | ||||||
|         self.features |         self.features | ||||||
|     } |     } | ||||||
|  |  | ||||||
|  |     pub fn network(&self) -> Option<&Network> { | ||||||
|  |         self.network.as_ref() | ||||||
|  |     } | ||||||
| } | } | ||||||
|  |  | ||||||
| pub struct UpdateFile { | pub struct UpdateFile { | ||||||
|   | |||||||
| @@ -4,7 +4,8 @@ use std::path::PathBuf; | |||||||
|  |  | ||||||
| use flate2::write::GzEncoder; | use flate2::write::GzEncoder; | ||||||
| use flate2::Compression; | use flate2::Compression; | ||||||
| use meilisearch_types::features::RuntimeTogglableFeatures; | use meilisearch_types::batches::Batch; | ||||||
|  | use meilisearch_types::features::{Network, RuntimeTogglableFeatures}; | ||||||
| use meilisearch_types::keys::Key; | use meilisearch_types::keys::Key; | ||||||
| use meilisearch_types::settings::{Checked, Settings}; | use meilisearch_types::settings::{Checked, Settings}; | ||||||
| use serde_json::{Map, Value}; | use serde_json::{Map, Value}; | ||||||
| @@ -54,6 +55,10 @@ impl DumpWriter { | |||||||
|         TaskWriter::new(self.dir.path().join("tasks")) |         TaskWriter::new(self.dir.path().join("tasks")) | ||||||
|     } |     } | ||||||
|  |  | ||||||
|  |     pub fn create_batches_queue(&self) -> Result<BatchWriter> { | ||||||
|  |         BatchWriter::new(self.dir.path().join("batches")) | ||||||
|  |     } | ||||||
|  |  | ||||||
|     pub fn create_experimental_features(&self, features: RuntimeTogglableFeatures) -> Result<()> { |     pub fn create_experimental_features(&self, features: RuntimeTogglableFeatures) -> Result<()> { | ||||||
|         Ok(std::fs::write( |         Ok(std::fs::write( | ||||||
|             self.dir.path().join("experimental-features.json"), |             self.dir.path().join("experimental-features.json"), | ||||||
| @@ -61,6 +66,10 @@ impl DumpWriter { | |||||||
|         )?) |         )?) | ||||||
|     } |     } | ||||||
|  |  | ||||||
|  |     pub fn create_network(&self, network: Network) -> Result<()> { | ||||||
|  |         Ok(std::fs::write(self.dir.path().join("network.json"), serde_json::to_string(&network)?)?) | ||||||
|  |     } | ||||||
|  |  | ||||||
|     pub fn persist_to(self, mut writer: impl Write) -> Result<()> { |     pub fn persist_to(self, mut writer: impl Write) -> Result<()> { | ||||||
|         let gz_encoder = GzEncoder::new(&mut writer, Compression::default()); |         let gz_encoder = GzEncoder::new(&mut writer, Compression::default()); | ||||||
|         let mut tar_encoder = tar::Builder::new(gz_encoder); |         let mut tar_encoder = tar::Builder::new(gz_encoder); | ||||||
| @@ -84,7 +93,7 @@ impl KeyWriter { | |||||||
|     } |     } | ||||||
|  |  | ||||||
|     pub fn push_key(&mut self, key: &Key) -> Result<()> { |     pub fn push_key(&mut self, key: &Key) -> Result<()> { | ||||||
|         self.keys.write_all(&serde_json::to_vec(key)?)?; |         serde_json::to_writer(&mut self.keys, &key)?; | ||||||
|         self.keys.write_all(b"\n")?; |         self.keys.write_all(b"\n")?; | ||||||
|         Ok(()) |         Ok(()) | ||||||
|     } |     } | ||||||
| @@ -114,7 +123,7 @@ impl TaskWriter { | |||||||
|     /// Pushes tasks in the dump. |     /// Pushes tasks in the dump. | ||||||
|     /// If the tasks has an associated `update_file` it'll use the `task_id` as its name. |     /// If the tasks has an associated `update_file` it'll use the `task_id` as its name. | ||||||
|     pub fn push_task(&mut self, task: &TaskDump) -> Result<UpdateFile> { |     pub fn push_task(&mut self, task: &TaskDump) -> Result<UpdateFile> { | ||||||
|         self.queue.write_all(&serde_json::to_vec(task)?)?; |         serde_json::to_writer(&mut self.queue, &task)?; | ||||||
|         self.queue.write_all(b"\n")?; |         self.queue.write_all(b"\n")?; | ||||||
|  |  | ||||||
|         Ok(UpdateFile::new(self.update_files.join(format!("{}.jsonl", task.uid)))) |         Ok(UpdateFile::new(self.update_files.join(format!("{}.jsonl", task.uid)))) | ||||||
| @@ -126,6 +135,30 @@ impl TaskWriter { | |||||||
|     } |     } | ||||||
| } | } | ||||||
|  |  | ||||||
|  | pub struct BatchWriter { | ||||||
|  |     queue: BufWriter<File>, | ||||||
|  | } | ||||||
|  |  | ||||||
|  | impl BatchWriter { | ||||||
|  |     pub(crate) fn new(path: PathBuf) -> Result<Self> { | ||||||
|  |         std::fs::create_dir(&path)?; | ||||||
|  |         let queue = File::create(path.join("queue.jsonl"))?; | ||||||
|  |         Ok(BatchWriter { queue: BufWriter::new(queue) }) | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     /// Pushes batches in the dump. | ||||||
|  |     pub fn push_batch(&mut self, batch: &Batch) -> Result<()> { | ||||||
|  |         serde_json::to_writer(&mut self.queue, &batch)?; | ||||||
|  |         self.queue.write_all(b"\n")?; | ||||||
|  |         Ok(()) | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     pub fn flush(mut self) -> Result<()> { | ||||||
|  |         self.queue.flush()?; | ||||||
|  |         Ok(()) | ||||||
|  |     } | ||||||
|  | } | ||||||
|  |  | ||||||
| pub struct UpdateFile { | pub struct UpdateFile { | ||||||
|     path: PathBuf, |     path: PathBuf, | ||||||
|     writer: Option<BufWriter<File>>, |     writer: Option<BufWriter<File>>, | ||||||
| @@ -137,8 +170,8 @@ impl UpdateFile { | |||||||
|     } |     } | ||||||
|  |  | ||||||
|     pub fn push_document(&mut self, document: &Document) -> Result<()> { |     pub fn push_document(&mut self, document: &Document) -> Result<()> { | ||||||
|         if let Some(writer) = self.writer.as_mut() { |         if let Some(mut writer) = self.writer.as_mut() { | ||||||
|             writer.write_all(&serde_json::to_vec(document)?)?; |             serde_json::to_writer(&mut writer, &document)?; | ||||||
|             writer.write_all(b"\n")?; |             writer.write_all(b"\n")?; | ||||||
|         } else { |         } else { | ||||||
|             let file = File::create(&self.path).unwrap(); |             let file = File::create(&self.path).unwrap(); | ||||||
| @@ -205,8 +238,8 @@ pub(crate) mod test { | |||||||
|     use super::*; |     use super::*; | ||||||
|     use crate::reader::Document; |     use crate::reader::Document; | ||||||
|     use crate::test::{ |     use crate::test::{ | ||||||
|         create_test_api_keys, create_test_documents, create_test_dump, create_test_instance_uid, |         create_test_api_keys, create_test_batches, create_test_documents, create_test_dump, | ||||||
|         create_test_settings, create_test_tasks, |         create_test_instance_uid, create_test_settings, create_test_tasks, | ||||||
|     }; |     }; | ||||||
|  |  | ||||||
|     fn create_directory_hierarchy(dir: &Path) -> String { |     fn create_directory_hierarchy(dir: &Path) -> String { | ||||||
| @@ -281,8 +314,10 @@ pub(crate) mod test { | |||||||
|         let dump_path = dump.path(); |         let dump_path = dump.path(); | ||||||
|  |  | ||||||
|         // ==== checking global file hierarchy (we want to be sure there isn't too many files or too few) |         // ==== checking global file hierarchy (we want to be sure there isn't too many files or too few) | ||||||
|         insta::assert_snapshot!(create_directory_hierarchy(dump_path), @r###" |         insta::assert_snapshot!(create_directory_hierarchy(dump_path), @r" | ||||||
|         . |         . | ||||||
|  |         ├---- batches/ | ||||||
|  |         │    └---- queue.jsonl | ||||||
|         ├---- indexes/ |         ├---- indexes/ | ||||||
|         │    └---- doggos/ |         │    └---- doggos/ | ||||||
|         │    │    ├---- documents.jsonl |         │    │    ├---- documents.jsonl | ||||||
| @@ -295,8 +330,9 @@ pub(crate) mod test { | |||||||
|         ├---- experimental-features.json |         ├---- experimental-features.json | ||||||
|         ├---- instance_uid.uuid |         ├---- instance_uid.uuid | ||||||
|         ├---- keys.jsonl |         ├---- keys.jsonl | ||||||
|         └---- metadata.json |         ├---- metadata.json | ||||||
|         "###); |         └---- network.json | ||||||
|  |         "); | ||||||
|  |  | ||||||
|         // ==== checking the top level infos |         // ==== checking the top level infos | ||||||
|         let metadata = fs::read_to_string(dump_path.join("metadata.json")).unwrap(); |         let metadata = fs::read_to_string(dump_path.join("metadata.json")).unwrap(); | ||||||
| @@ -349,6 +385,16 @@ pub(crate) mod test { | |||||||
|             } |             } | ||||||
|         } |         } | ||||||
|  |  | ||||||
|  |         // ==== checking the batch queue | ||||||
|  |         let batches_queue = fs::read_to_string(dump_path.join("batches/queue.jsonl")).unwrap(); | ||||||
|  |         for (batch, expected) in batches_queue.lines().zip(create_test_batches()) { | ||||||
|  |             let mut batch = serde_json::from_str::<Batch>(batch).unwrap(); | ||||||
|  |             if batch.details.settings == Some(Box::new(Settings::<Unchecked>::default())) { | ||||||
|  |                 batch.details.settings = None; | ||||||
|  |             } | ||||||
|  |             assert_eq!(batch, expected, "{batch:#?}{expected:#?}"); | ||||||
|  |         } | ||||||
|  |  | ||||||
|         // ==== checking the keys |         // ==== checking the keys | ||||||
|         let keys = fs::read_to_string(dump_path.join("keys.jsonl")).unwrap(); |         let keys = fs::read_to_string(dump_path.join("keys.jsonl")).unwrap(); | ||||||
|         for (key, expected) in keys.lines().zip(create_test_api_keys()) { |         for (key, expected) in keys.lines().zip(create_test_api_keys()) { | ||||||
|   | |||||||
							
								
								
									
										
											BIN
										
									
								
								crates/dump/tests/assets/v6-with-network.dump
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										
											BIN
										
									
								
								crates/dump/tests/assets/v6-with-network.dump
									
									
									
									
									
										Normal file
									
								
							
										
											Binary file not shown.
										
									
								
							| @@ -2,6 +2,7 @@ use std::collections::HashMap; | |||||||
| use std::io; | use std::io; | ||||||
|  |  | ||||||
| use dump::{KindDump, TaskDump, UpdateFile}; | use dump::{KindDump, TaskDump, UpdateFile}; | ||||||
|  | use meilisearch_types::batches::{Batch, BatchId}; | ||||||
| use meilisearch_types::heed::RwTxn; | use meilisearch_types::heed::RwTxn; | ||||||
| use meilisearch_types::milli; | use meilisearch_types::milli; | ||||||
| use meilisearch_types::tasks::{Kind, KindWithContent, Status, Task}; | use meilisearch_types::tasks::{Kind, KindWithContent, Status, Task}; | ||||||
| @@ -14,9 +15,15 @@ pub struct Dump<'a> { | |||||||
|     index_scheduler: &'a IndexScheduler, |     index_scheduler: &'a IndexScheduler, | ||||||
|     wtxn: RwTxn<'a>, |     wtxn: RwTxn<'a>, | ||||||
|  |  | ||||||
|  |     batch_to_task_mapping: HashMap<BatchId, RoaringBitmap>, | ||||||
|  |  | ||||||
|     indexes: HashMap<String, RoaringBitmap>, |     indexes: HashMap<String, RoaringBitmap>, | ||||||
|     statuses: HashMap<Status, RoaringBitmap>, |     statuses: HashMap<Status, RoaringBitmap>, | ||||||
|     kinds: HashMap<Kind, RoaringBitmap>, |     kinds: HashMap<Kind, RoaringBitmap>, | ||||||
|  |  | ||||||
|  |     batch_indexes: HashMap<String, RoaringBitmap>, | ||||||
|  |     batch_statuses: HashMap<Status, RoaringBitmap>, | ||||||
|  |     batch_kinds: HashMap<Kind, RoaringBitmap>, | ||||||
| } | } | ||||||
|  |  | ||||||
| impl<'a> Dump<'a> { | impl<'a> Dump<'a> { | ||||||
| @@ -27,12 +34,72 @@ impl<'a> Dump<'a> { | |||||||
|         Ok(Dump { |         Ok(Dump { | ||||||
|             index_scheduler, |             index_scheduler, | ||||||
|             wtxn, |             wtxn, | ||||||
|  |             batch_to_task_mapping: HashMap::new(), | ||||||
|             indexes: HashMap::new(), |             indexes: HashMap::new(), | ||||||
|             statuses: HashMap::new(), |             statuses: HashMap::new(), | ||||||
|             kinds: HashMap::new(), |             kinds: HashMap::new(), | ||||||
|  |             batch_indexes: HashMap::new(), | ||||||
|  |             batch_statuses: HashMap::new(), | ||||||
|  |             batch_kinds: HashMap::new(), | ||||||
|         }) |         }) | ||||||
|     } |     } | ||||||
|  |  | ||||||
|  |     /// Register a new batch coming from a dump in the scheduler. | ||||||
|  |     /// By taking a mutable ref we're pretty sure no one will ever import a dump while actix is running. | ||||||
|  |     pub fn register_dumped_batch(&mut self, batch: Batch) -> Result<()> { | ||||||
|  |         self.index_scheduler.queue.batches.all_batches.put(&mut self.wtxn, &batch.uid, &batch)?; | ||||||
|  |         if let Some(enqueued_at) = batch.enqueued_at { | ||||||
|  |             utils::insert_task_datetime( | ||||||
|  |                 &mut self.wtxn, | ||||||
|  |                 self.index_scheduler.queue.batches.enqueued_at, | ||||||
|  |                 enqueued_at.earliest, | ||||||
|  |                 batch.uid, | ||||||
|  |             )?; | ||||||
|  |             utils::insert_task_datetime( | ||||||
|  |                 &mut self.wtxn, | ||||||
|  |                 self.index_scheduler.queue.batches.enqueued_at, | ||||||
|  |                 enqueued_at.oldest, | ||||||
|  |                 batch.uid, | ||||||
|  |             )?; | ||||||
|  |         } | ||||||
|  |         utils::insert_task_datetime( | ||||||
|  |             &mut self.wtxn, | ||||||
|  |             self.index_scheduler.queue.batches.started_at, | ||||||
|  |             batch.started_at, | ||||||
|  |             batch.uid, | ||||||
|  |         )?; | ||||||
|  |         if let Some(finished_at) = batch.finished_at { | ||||||
|  |             utils::insert_task_datetime( | ||||||
|  |                 &mut self.wtxn, | ||||||
|  |                 self.index_scheduler.queue.batches.finished_at, | ||||||
|  |                 finished_at, | ||||||
|  |                 batch.uid, | ||||||
|  |             )?; | ||||||
|  |         } | ||||||
|  |  | ||||||
|  |         for index in batch.stats.index_uids.keys() { | ||||||
|  |             match self.batch_indexes.get_mut(index) { | ||||||
|  |                 Some(bitmap) => { | ||||||
|  |                     bitmap.insert(batch.uid); | ||||||
|  |                 } | ||||||
|  |                 None => { | ||||||
|  |                     let mut bitmap = RoaringBitmap::new(); | ||||||
|  |                     bitmap.insert(batch.uid); | ||||||
|  |                     self.batch_indexes.insert(index.to_string(), bitmap); | ||||||
|  |                 } | ||||||
|  |             }; | ||||||
|  |         } | ||||||
|  |  | ||||||
|  |         for status in batch.stats.status.keys() { | ||||||
|  |             self.batch_statuses.entry(*status).or_default().insert(batch.uid); | ||||||
|  |         } | ||||||
|  |         for kind in batch.stats.types.keys() { | ||||||
|  |             self.batch_kinds.entry(*kind).or_default().insert(batch.uid); | ||||||
|  |         } | ||||||
|  |  | ||||||
|  |         Ok(()) | ||||||
|  |     } | ||||||
|  |  | ||||||
|     /// Register a new task coming from a dump in the scheduler. |     /// Register a new task coming from a dump in the scheduler. | ||||||
|     /// By taking a mutable ref we're pretty sure no one will ever import a dump while actix is running. |     /// By taking a mutable ref we're pretty sure no one will ever import a dump while actix is running. | ||||||
|     pub fn register_dumped_task( |     pub fn register_dumped_task( | ||||||
| @@ -149,6 +216,9 @@ impl<'a> Dump<'a> { | |||||||
|         }; |         }; | ||||||
|  |  | ||||||
|         self.index_scheduler.queue.tasks.all_tasks.put(&mut self.wtxn, &task.uid, &task)?; |         self.index_scheduler.queue.tasks.all_tasks.put(&mut self.wtxn, &task.uid, &task)?; | ||||||
|  |         if let Some(batch_id) = task.batch_uid { | ||||||
|  |             self.batch_to_task_mapping.entry(batch_id).or_default().insert(task.uid); | ||||||
|  |         } | ||||||
|  |  | ||||||
|         for index in task.indexes() { |         for index in task.indexes() { | ||||||
|             match self.indexes.get_mut(index) { |             match self.indexes.get_mut(index) { | ||||||
| @@ -198,6 +268,14 @@ impl<'a> Dump<'a> { | |||||||
|  |  | ||||||
|     /// Commit all the changes and exit the importing dump state |     /// Commit all the changes and exit the importing dump state | ||||||
|     pub fn finish(mut self) -> Result<()> { |     pub fn finish(mut self) -> Result<()> { | ||||||
|  |         for (batch_id, task_ids) in self.batch_to_task_mapping { | ||||||
|  |             self.index_scheduler.queue.batch_to_tasks_mapping.put( | ||||||
|  |                 &mut self.wtxn, | ||||||
|  |                 &batch_id, | ||||||
|  |                 &task_ids, | ||||||
|  |             )?; | ||||||
|  |         } | ||||||
|  |  | ||||||
|         for (index, bitmap) in self.indexes { |         for (index, bitmap) in self.indexes { | ||||||
|             self.index_scheduler.queue.tasks.index_tasks.put(&mut self.wtxn, &index, &bitmap)?; |             self.index_scheduler.queue.tasks.index_tasks.put(&mut self.wtxn, &index, &bitmap)?; | ||||||
|         } |         } | ||||||
| @@ -208,6 +286,16 @@ impl<'a> Dump<'a> { | |||||||
|             self.index_scheduler.queue.tasks.put_kind(&mut self.wtxn, kind, &bitmap)?; |             self.index_scheduler.queue.tasks.put_kind(&mut self.wtxn, kind, &bitmap)?; | ||||||
|         } |         } | ||||||
|  |  | ||||||
|  |         for (index, bitmap) in self.batch_indexes { | ||||||
|  |             self.index_scheduler.queue.batches.index_tasks.put(&mut self.wtxn, &index, &bitmap)?; | ||||||
|  |         } | ||||||
|  |         for (status, bitmap) in self.batch_statuses { | ||||||
|  |             self.index_scheduler.queue.batches.put_status(&mut self.wtxn, status, &bitmap)?; | ||||||
|  |         } | ||||||
|  |         for (kind, bitmap) in self.batch_kinds { | ||||||
|  |             self.index_scheduler.queue.batches.put_kind(&mut self.wtxn, kind, &bitmap)?; | ||||||
|  |         } | ||||||
|  |  | ||||||
|         self.wtxn.commit()?; |         self.wtxn.commit()?; | ||||||
|         self.index_scheduler.scheduler.wake_up.signal(); |         self.index_scheduler.scheduler.wake_up.signal(); | ||||||
|  |  | ||||||
|   | |||||||
| @@ -109,6 +109,8 @@ pub enum Error { | |||||||
|     InvalidIndexUid { index_uid: String }, |     InvalidIndexUid { index_uid: String }, | ||||||
|     #[error("Task `{0}` not found.")] |     #[error("Task `{0}` not found.")] | ||||||
|     TaskNotFound(TaskId), |     TaskNotFound(TaskId), | ||||||
|  |     #[error("Task `{0}` does not contain any documents. Only `documentAdditionOrUpdate` tasks with the statuses `enqueued` or `processing` contain documents")] | ||||||
|  |     TaskFileNotFound(TaskId), | ||||||
|     #[error("Batch `{0}` not found.")] |     #[error("Batch `{0}` not found.")] | ||||||
|     BatchNotFound(BatchId), |     BatchNotFound(BatchId), | ||||||
|     #[error("Query parameters to filter the tasks to delete are missing. Available query parameters are: `uids`, `indexUids`, `statuses`, `types`, `canceledBy`, `beforeEnqueuedAt`, `afterEnqueuedAt`, `beforeStartedAt`, `afterStartedAt`, `beforeFinishedAt`, `afterFinishedAt`.")] |     #[error("Query parameters to filter the tasks to delete are missing. Available query parameters are: `uids`, `indexUids`, `statuses`, `types`, `canceledBy`, `beforeEnqueuedAt`, `afterEnqueuedAt`, `beforeStartedAt`, `afterStartedAt`, `beforeFinishedAt`, `afterFinishedAt`.")] | ||||||
| @@ -127,8 +129,8 @@ pub enum Error { | |||||||
|         _ => format!("{error}") |         _ => format!("{error}") | ||||||
|     })] |     })] | ||||||
|     Milli { error: milli::Error, index_uid: Option<String> }, |     Milli { error: milli::Error, index_uid: Option<String> }, | ||||||
|     #[error("An unexpected crash occurred when processing the task.")] |     #[error("An unexpected crash occurred when processing the task: {0}")] | ||||||
|     ProcessBatchPanicked, |     ProcessBatchPanicked(String), | ||||||
|     #[error(transparent)] |     #[error(transparent)] | ||||||
|     FileStore(#[from] file_store::Error), |     FileStore(#[from] file_store::Error), | ||||||
|     #[error(transparent)] |     #[error(transparent)] | ||||||
| @@ -189,6 +191,7 @@ impl Error { | |||||||
|             | Error::InvalidTaskCanceledBy { .. } |             | Error::InvalidTaskCanceledBy { .. } | ||||||
|             | Error::InvalidIndexUid { .. } |             | Error::InvalidIndexUid { .. } | ||||||
|             | Error::TaskNotFound(_) |             | Error::TaskNotFound(_) | ||||||
|  |             | Error::TaskFileNotFound(_) | ||||||
|             | Error::BatchNotFound(_) |             | Error::BatchNotFound(_) | ||||||
|             | Error::TaskDeletionWithEmptyQuery |             | Error::TaskDeletionWithEmptyQuery | ||||||
|             | Error::TaskCancelationWithEmptyQuery |             | Error::TaskCancelationWithEmptyQuery | ||||||
| @@ -196,7 +199,7 @@ impl Error { | |||||||
|             | Error::Dump(_) |             | Error::Dump(_) | ||||||
|             | Error::Heed(_) |             | Error::Heed(_) | ||||||
|             | Error::Milli { .. } |             | Error::Milli { .. } | ||||||
|             | Error::ProcessBatchPanicked |             | Error::ProcessBatchPanicked(_) | ||||||
|             | Error::FileStore(_) |             | Error::FileStore(_) | ||||||
|             | Error::IoError(_) |             | Error::IoError(_) | ||||||
|             | Error::Persist(_) |             | Error::Persist(_) | ||||||
| @@ -250,6 +253,7 @@ impl ErrorCode for Error { | |||||||
|             Error::InvalidTaskCanceledBy { .. } => Code::InvalidTaskCanceledBy, |             Error::InvalidTaskCanceledBy { .. } => Code::InvalidTaskCanceledBy, | ||||||
|             Error::InvalidIndexUid { .. } => Code::InvalidIndexUid, |             Error::InvalidIndexUid { .. } => Code::InvalidIndexUid, | ||||||
|             Error::TaskNotFound(_) => Code::TaskNotFound, |             Error::TaskNotFound(_) => Code::TaskNotFound, | ||||||
|  |             Error::TaskFileNotFound(_) => Code::TaskFileNotFound, | ||||||
|             Error::BatchNotFound(_) => Code::BatchNotFound, |             Error::BatchNotFound(_) => Code::BatchNotFound, | ||||||
|             Error::TaskDeletionWithEmptyQuery => Code::MissingTaskFilters, |             Error::TaskDeletionWithEmptyQuery => Code::MissingTaskFilters, | ||||||
|             Error::TaskCancelationWithEmptyQuery => Code::MissingTaskFilters, |             Error::TaskCancelationWithEmptyQuery => Code::MissingTaskFilters, | ||||||
| @@ -257,7 +261,7 @@ impl ErrorCode for Error { | |||||||
|             Error::NoSpaceLeftInTaskQueue => Code::NoSpaceLeftOnDevice, |             Error::NoSpaceLeftInTaskQueue => Code::NoSpaceLeftOnDevice, | ||||||
|             Error::Dump(e) => e.error_code(), |             Error::Dump(e) => e.error_code(), | ||||||
|             Error::Milli { error, .. } => error.error_code(), |             Error::Milli { error, .. } => error.error_code(), | ||||||
|             Error::ProcessBatchPanicked => Code::Internal, |             Error::ProcessBatchPanicked(_) => Code::Internal, | ||||||
|             Error::Heed(e) => e.error_code(), |             Error::Heed(e) => e.error_code(), | ||||||
|             Error::HeedTransaction(e) => e.error_code(), |             Error::HeedTransaction(e) => e.error_code(), | ||||||
|             Error::FileStore(e) => e.error_code(), |             Error::FileStore(e) => e.error_code(), | ||||||
|   | |||||||
| @@ -1,6 +1,6 @@ | |||||||
| use std::sync::{Arc, RwLock}; | use std::sync::{Arc, RwLock}; | ||||||
|  |  | ||||||
| use meilisearch_types::features::{InstanceTogglableFeatures, RuntimeTogglableFeatures}; | use meilisearch_types::features::{InstanceTogglableFeatures, Network, RuntimeTogglableFeatures}; | ||||||
| use meilisearch_types::heed::types::{SerdeJson, Str}; | use meilisearch_types::heed::types::{SerdeJson, Str}; | ||||||
| use meilisearch_types::heed::{Database, Env, RwTxn}; | use meilisearch_types::heed::{Database, Env, RwTxn}; | ||||||
|  |  | ||||||
| @@ -14,10 +14,16 @@ mod db_name { | |||||||
|     pub const EXPERIMENTAL_FEATURES: &str = "experimental-features"; |     pub const EXPERIMENTAL_FEATURES: &str = "experimental-features"; | ||||||
| } | } | ||||||
|  |  | ||||||
|  | mod db_keys { | ||||||
|  |     pub const EXPERIMENTAL_FEATURES: &str = "experimental-features"; | ||||||
|  |     pub const NETWORK: &str = "network"; | ||||||
|  | } | ||||||
|  |  | ||||||
| #[derive(Clone)] | #[derive(Clone)] | ||||||
| pub(crate) struct FeatureData { | pub(crate) struct FeatureData { | ||||||
|     persisted: Database<Str, SerdeJson<RuntimeTogglableFeatures>>, |     persisted: Database<Str, SerdeJson<RuntimeTogglableFeatures>>, | ||||||
|     runtime: Arc<RwLock<RuntimeTogglableFeatures>>, |     runtime: Arc<RwLock<RuntimeTogglableFeatures>>, | ||||||
|  |     network: Arc<RwLock<Network>>, | ||||||
| } | } | ||||||
|  |  | ||||||
| #[derive(Debug, Clone, Copy)] | #[derive(Debug, Clone, Copy)] | ||||||
| @@ -86,6 +92,32 @@ impl RoFeatures { | |||||||
|             .into()) |             .into()) | ||||||
|         } |         } | ||||||
|     } |     } | ||||||
|  |  | ||||||
|  |     pub fn check_network(&self, disabled_action: &'static str) -> Result<()> { | ||||||
|  |         if self.runtime.network { | ||||||
|  |             Ok(()) | ||||||
|  |         } else { | ||||||
|  |             Err(FeatureNotEnabledError { | ||||||
|  |                 disabled_action, | ||||||
|  |                 feature: "network", | ||||||
|  |                 issue_link: "https://github.com/orgs/meilisearch/discussions/805", | ||||||
|  |             } | ||||||
|  |             .into()) | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     pub fn check_get_task_documents_route(&self) -> Result<()> { | ||||||
|  |         if self.runtime.get_task_documents_route { | ||||||
|  |             Ok(()) | ||||||
|  |         } else { | ||||||
|  |             Err(FeatureNotEnabledError { | ||||||
|  |                 disabled_action: "Getting the documents of an enqueued task", | ||||||
|  |                 feature: "get task documents route", | ||||||
|  |                 issue_link: "https://github.com/orgs/meilisearch/discussions/808", | ||||||
|  |             } | ||||||
|  |             .into()) | ||||||
|  |         } | ||||||
|  |     } | ||||||
| } | } | ||||||
|  |  | ||||||
| impl FeatureData { | impl FeatureData { | ||||||
| @@ -102,7 +134,7 @@ impl FeatureData { | |||||||
|             env.create_database(wtxn, Some(db_name::EXPERIMENTAL_FEATURES))?; |             env.create_database(wtxn, Some(db_name::EXPERIMENTAL_FEATURES))?; | ||||||
|  |  | ||||||
|         let persisted_features: RuntimeTogglableFeatures = |         let persisted_features: RuntimeTogglableFeatures = | ||||||
|             runtime_features_db.get(wtxn, db_name::EXPERIMENTAL_FEATURES)?.unwrap_or_default(); |             runtime_features_db.get(wtxn, db_keys::EXPERIMENTAL_FEATURES)?.unwrap_or_default(); | ||||||
|         let InstanceTogglableFeatures { metrics, logs_route, contains_filter } = instance_features; |         let InstanceTogglableFeatures { metrics, logs_route, contains_filter } = instance_features; | ||||||
|         let runtime = Arc::new(RwLock::new(RuntimeTogglableFeatures { |         let runtime = Arc::new(RwLock::new(RuntimeTogglableFeatures { | ||||||
|             metrics: metrics || persisted_features.metrics, |             metrics: metrics || persisted_features.metrics, | ||||||
| @@ -111,7 +143,14 @@ impl FeatureData { | |||||||
|             ..persisted_features |             ..persisted_features | ||||||
|         })); |         })); | ||||||
|  |  | ||||||
|         Ok(Self { persisted: runtime_features_db, runtime }) |         let network_db = runtime_features_db.remap_data_type::<SerdeJson<Network>>(); | ||||||
|  |         let network: Network = network_db.get(wtxn, db_keys::NETWORK)?.unwrap_or_default(); | ||||||
|  |  | ||||||
|  |         Ok(Self { | ||||||
|  |             persisted: runtime_features_db, | ||||||
|  |             runtime, | ||||||
|  |             network: Arc::new(RwLock::new(network)), | ||||||
|  |         }) | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     pub fn put_runtime_features( |     pub fn put_runtime_features( | ||||||
| @@ -119,7 +158,7 @@ impl FeatureData { | |||||||
|         mut wtxn: RwTxn, |         mut wtxn: RwTxn, | ||||||
|         features: RuntimeTogglableFeatures, |         features: RuntimeTogglableFeatures, | ||||||
|     ) -> Result<()> { |     ) -> Result<()> { | ||||||
|         self.persisted.put(&mut wtxn, db_name::EXPERIMENTAL_FEATURES, &features)?; |         self.persisted.put(&mut wtxn, db_keys::EXPERIMENTAL_FEATURES, &features)?; | ||||||
|         wtxn.commit()?; |         wtxn.commit()?; | ||||||
|  |  | ||||||
|         // safe to unwrap, the lock will only fail if: |         // safe to unwrap, the lock will only fail if: | ||||||
| @@ -140,4 +179,21 @@ impl FeatureData { | |||||||
|     pub fn features(&self) -> RoFeatures { |     pub fn features(&self) -> RoFeatures { | ||||||
|         RoFeatures::new(self) |         RoFeatures::new(self) | ||||||
|     } |     } | ||||||
|  |  | ||||||
|  |     pub fn put_network(&self, mut wtxn: RwTxn, new_network: Network) -> Result<()> { | ||||||
|  |         self.persisted.remap_data_type::<SerdeJson<Network>>().put( | ||||||
|  |             &mut wtxn, | ||||||
|  |             db_keys::NETWORK, | ||||||
|  |             &new_network, | ||||||
|  |         )?; | ||||||
|  |         wtxn.commit()?; | ||||||
|  |  | ||||||
|  |         let mut network = self.network.write().unwrap(); | ||||||
|  |         *network = new_network; | ||||||
|  |         Ok(()) | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     pub fn network(&self) -> Network { | ||||||
|  |         Network::clone(&*self.network.read().unwrap()) | ||||||
|  |     } | ||||||
| } | } | ||||||
|   | |||||||
| @@ -1,5 +1,7 @@ | |||||||
| use std::collections::BTreeMap; | use std::collections::BTreeMap; | ||||||
|  | use std::env::VarError; | ||||||
| use std::path::Path; | use std::path::Path; | ||||||
|  | use std::str::FromStr; | ||||||
| use std::time::Duration; | use std::time::Duration; | ||||||
|  |  | ||||||
| use meilisearch_types::heed::{EnvClosingEvent, EnvFlags, EnvOpenOptions}; | use meilisearch_types::heed::{EnvClosingEvent, EnvFlags, EnvOpenOptions}; | ||||||
| @@ -304,7 +306,18 @@ fn create_or_open_index( | |||||||
| ) -> Result<Index> { | ) -> Result<Index> { | ||||||
|     let mut options = EnvOpenOptions::new(); |     let mut options = EnvOpenOptions::new(); | ||||||
|     options.map_size(clamp_to_page_size(map_size)); |     options.map_size(clamp_to_page_size(map_size)); | ||||||
|     options.max_readers(1024); |  | ||||||
|  |     // You can find more details about this experimental | ||||||
|  |     // environment variable on the following GitHub discussion: | ||||||
|  |     // <https://github.com/orgs/meilisearch/discussions/806> | ||||||
|  |     let max_readers = match std::env::var("MEILI_EXPERIMENTAL_INDEX_MAX_READERS") { | ||||||
|  |         Ok(value) => u32::from_str(&value).unwrap(), | ||||||
|  |         Err(VarError::NotPresent) => 1024, | ||||||
|  |         Err(VarError::NotUnicode(value)) => panic!( | ||||||
|  |             "Invalid unicode for the `MEILI_EXPERIMENTAL_INDEX_MAX_READERS` env var: {value:?}" | ||||||
|  |         ), | ||||||
|  |     }; | ||||||
|  |     options.max_readers(max_readers); | ||||||
|     if enable_mdb_writemap { |     if enable_mdb_writemap { | ||||||
|         unsafe { options.flags(EnvFlags::WRITE_MAP) }; |         unsafe { options.flags(EnvFlags::WRITE_MAP) }; | ||||||
|     } |     } | ||||||
|   | |||||||
| @@ -106,6 +106,12 @@ pub struct IndexStats { | |||||||
|     /// are not returned to the disk after a deletion, this number is typically larger than |     /// are not returned to the disk after a deletion, this number is typically larger than | ||||||
|     /// `used_database_size` that only includes the size of the used pages. |     /// `used_database_size` that only includes the size of the used pages. | ||||||
|     pub database_size: u64, |     pub database_size: u64, | ||||||
|  |     /// Number of embeddings in the index. | ||||||
|  |     /// Option: retrocompatible with the stats of the pre-v1.13.0 versions of meilisearch | ||||||
|  |     pub number_of_embeddings: Option<u64>, | ||||||
|  |     /// Number of embedded documents in the index. | ||||||
|  |     /// Option: retrocompatible with the stats of the pre-v1.13.0 versions of meilisearch | ||||||
|  |     pub number_of_embedded_documents: Option<u64>, | ||||||
|     /// Size taken by the used pages of the index' DB, in bytes. |     /// Size taken by the used pages of the index' DB, in bytes. | ||||||
|     /// |     /// | ||||||
|     /// As the DB backend does not return to the disk the pages that are not currently used by the DB, |     /// As the DB backend does not return to the disk the pages that are not currently used by the DB, | ||||||
| @@ -130,8 +136,11 @@ impl IndexStats { | |||||||
|     /// |     /// | ||||||
|     /// - rtxn: a RO transaction for the index, obtained from `Index::read_txn()`. |     /// - rtxn: a RO transaction for the index, obtained from `Index::read_txn()`. | ||||||
|     pub fn new(index: &Index, rtxn: &RoTxn) -> milli::Result<Self> { |     pub fn new(index: &Index, rtxn: &RoTxn) -> milli::Result<Self> { | ||||||
|  |         let arroy_stats = index.arroy_stats(rtxn)?; | ||||||
|         Ok(IndexStats { |         Ok(IndexStats { | ||||||
|             number_of_documents: index.number_of_documents(rtxn)?, |             number_of_documents: index.number_of_documents(rtxn)?, | ||||||
|  |             number_of_embeddings: Some(arroy_stats.number_of_embeddings), | ||||||
|  |             number_of_embedded_documents: Some(arroy_stats.documents.len()), | ||||||
|             database_size: index.on_disk_size()?, |             database_size: index.on_disk_size()?, | ||||||
|             used_database_size: index.used_size()?, |             used_database_size: index.used_size()?, | ||||||
|             primary_key: index.primary_key(rtxn)?.map(|s| s.to_string()), |             primary_key: index.primary_key(rtxn)?.map(|s| s.to_string()), | ||||||
|   | |||||||
| @@ -1,7 +1,7 @@ | |||||||
| use std::collections::BTreeSet; | use std::collections::BTreeSet; | ||||||
| use std::fmt::Write; | use std::fmt::Write; | ||||||
|  |  | ||||||
| use meilisearch_types::batches::Batch; | use meilisearch_types::batches::{Batch, BatchEnqueuedAt}; | ||||||
| use meilisearch_types::heed::types::{SerdeBincode, SerdeJson, Str}; | use meilisearch_types::heed::types::{SerdeBincode, SerdeJson, Str}; | ||||||
| use meilisearch_types::heed::{Database, RoTxn}; | use meilisearch_types::heed::{Database, RoTxn}; | ||||||
| use meilisearch_types::milli::{CboRoaringBitmapCodec, RoaringBitmapCodec, BEU32}; | use meilisearch_types::milli::{CboRoaringBitmapCodec, RoaringBitmapCodec, BEU32}; | ||||||
| @@ -341,10 +341,14 @@ pub fn snapshot_canceled_by(rtxn: &RoTxn, db: Database<BEU32, RoaringBitmapCodec | |||||||
|  |  | ||||||
| pub fn snapshot_batch(batch: &Batch) -> String { | pub fn snapshot_batch(batch: &Batch) -> String { | ||||||
|     let mut snap = String::new(); |     let mut snap = String::new(); | ||||||
|     let Batch { uid, details, stats, started_at, finished_at, progress: _ } = batch; |     let Batch { uid, details, stats, started_at, finished_at, progress: _, enqueued_at } = batch; | ||||||
|     if let Some(finished_at) = finished_at { |     if let Some(finished_at) = finished_at { | ||||||
|         assert!(finished_at > started_at); |         assert!(finished_at > started_at); | ||||||
|     } |     } | ||||||
|  |     let BatchEnqueuedAt { earliest, oldest } = enqueued_at.unwrap(); | ||||||
|  |     assert!(*started_at > earliest); | ||||||
|  |     assert!(earliest >= oldest); | ||||||
|  |  | ||||||
|     snap.push('{'); |     snap.push('{'); | ||||||
|     snap.push_str(&format!("uid: {uid}, ")); |     snap.push_str(&format!("uid: {uid}, ")); | ||||||
|     snap.push_str(&format!("details: {}, ", serde_json::to_string(details).unwrap())); |     snap.push_str(&format!("details: {}, ", serde_json::to_string(details).unwrap())); | ||||||
|   | |||||||
| @@ -33,7 +33,7 @@ mod test_utils; | |||||||
| pub mod upgrade; | pub mod upgrade; | ||||||
| mod utils; | mod utils; | ||||||
| pub mod uuid_codec; | pub mod uuid_codec; | ||||||
| mod versioning; | pub mod versioning; | ||||||
|  |  | ||||||
| pub type Result<T, E = Error> = std::result::Result<T, E>; | pub type Result<T, E = Error> = std::result::Result<T, E>; | ||||||
| pub type TaskId = u32; | pub type TaskId = u32; | ||||||
| @@ -51,7 +51,7 @@ pub use features::RoFeatures; | |||||||
| use flate2::bufread::GzEncoder; | use flate2::bufread::GzEncoder; | ||||||
| use flate2::Compression; | use flate2::Compression; | ||||||
| use meilisearch_types::batches::Batch; | use meilisearch_types::batches::Batch; | ||||||
| use meilisearch_types::features::{InstanceTogglableFeatures, RuntimeTogglableFeatures}; | use meilisearch_types::features::{InstanceTogglableFeatures, Network, RuntimeTogglableFeatures}; | ||||||
| use meilisearch_types::heed::byteorder::BE; | use meilisearch_types::heed::byteorder::BE; | ||||||
| use meilisearch_types::heed::types::I128; | use meilisearch_types::heed::types::I128; | ||||||
| use meilisearch_types::heed::{self, Env, RoTxn}; | use meilisearch_types::heed::{self, Env, RoTxn}; | ||||||
| @@ -770,7 +770,16 @@ impl IndexScheduler { | |||||||
|         Ok(()) |         Ok(()) | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     // TODO: consider using a type alias or a struct embedder/template |     pub fn put_network(&self, network: Network) -> Result<()> { | ||||||
|  |         let wtxn = self.env.write_txn().map_err(Error::HeedTransaction)?; | ||||||
|  |         self.features.put_network(wtxn, network)?; | ||||||
|  |         Ok(()) | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     pub fn network(&self) -> Network { | ||||||
|  |         self.features.network() | ||||||
|  |     } | ||||||
|  |  | ||||||
|     pub fn embedders( |     pub fn embedders( | ||||||
|         &self, |         &self, | ||||||
|         index_uid: String, |         index_uid: String, | ||||||
|   | |||||||
| @@ -96,6 +96,7 @@ make_enum_progress! { | |||||||
|         StartTheDumpCreation, |         StartTheDumpCreation, | ||||||
|         DumpTheApiKeys, |         DumpTheApiKeys, | ||||||
|         DumpTheTasks, |         DumpTheTasks, | ||||||
|  |         DumpTheBatches, | ||||||
|         DumpTheIndexes, |         DumpTheIndexes, | ||||||
|         DumpTheExperimentalFeatures, |         DumpTheExperimentalFeatures, | ||||||
|         CompressTheDump, |         CompressTheDump, | ||||||
|   | |||||||
| @@ -12,8 +12,8 @@ use time::OffsetDateTime; | |||||||
| use super::{Query, Queue}; | use super::{Query, Queue}; | ||||||
| use crate::processing::ProcessingTasks; | use crate::processing::ProcessingTasks; | ||||||
| use crate::utils::{ | use crate::utils::{ | ||||||
|     insert_task_datetime, keep_ids_within_datetimes, map_bound, remove_task_datetime, |     insert_task_datetime, keep_ids_within_datetimes, map_bound, | ||||||
|     ProcessingBatch, |     remove_n_tasks_datetime_earlier_than, remove_task_datetime, ProcessingBatch, | ||||||
| }; | }; | ||||||
| use crate::{Error, Result, BEI128}; | use crate::{Error, Result, BEI128}; | ||||||
|  |  | ||||||
| @@ -181,6 +181,7 @@ impl BatchQueue { | |||||||
|                 stats: batch.stats, |                 stats: batch.stats, | ||||||
|                 started_at: batch.started_at, |                 started_at: batch.started_at, | ||||||
|                 finished_at: batch.finished_at, |                 finished_at: batch.finished_at, | ||||||
|  |                 enqueued_at: batch.enqueued_at, | ||||||
|             }, |             }, | ||||||
|         )?; |         )?; | ||||||
|  |  | ||||||
| @@ -234,34 +235,25 @@ impl BatchQueue { | |||||||
|         // What we know, though, is that the task date is from before the enqueued_at, and max two timestamps have been written |         // What we know, though, is that the task date is from before the enqueued_at, and max two timestamps have been written | ||||||
|         // to the DB per batches. |         // to the DB per batches. | ||||||
|         if let Some(ref old_batch) = old_batch { |         if let Some(ref old_batch) = old_batch { | ||||||
|             let started_at = old_batch.started_at.unix_timestamp_nanos(); |             if let Some(enqueued_at) = old_batch.enqueued_at { | ||||||
|  |                 remove_task_datetime(wtxn, self.enqueued_at, enqueued_at.earliest, old_batch.uid)?; | ||||||
|             // We have either one or two enqueued at to remove |                 remove_task_datetime(wtxn, self.enqueued_at, enqueued_at.oldest, old_batch.uid)?; | ||||||
|             let mut exit = old_batch.stats.total_nb_tasks.clamp(0, 2); |             } else { | ||||||
|             let mut iterator = self.enqueued_at.rev_iter_mut(wtxn)?; |                 // If we don't have the enqueued at in the batch it means the database comes from the v1.12 | ||||||
|             while let Some(entry) = iterator.next() { |                 // and we still need to find the date by scrolling the database | ||||||
|                 let (key, mut value) = entry?; |                 remove_n_tasks_datetime_earlier_than( | ||||||
|                 if key > started_at { |                     wtxn, | ||||||
|                     continue; |                     self.enqueued_at, | ||||||
|                 } |                     old_batch.started_at, | ||||||
|                 if value.remove(old_batch.uid) { |                     old_batch.stats.total_nb_tasks.clamp(1, 2) as usize, | ||||||
|                     exit = exit.saturating_sub(1); |                     old_batch.uid, | ||||||
|                     // Safe because the key and value are owned |                 )?; | ||||||
|                     unsafe { |  | ||||||
|                         iterator.put_current(&key, &value)?; |  | ||||||
|                     } |  | ||||||
|                     if exit == 0 { |  | ||||||
|                         break; |  | ||||||
|             } |             } | ||||||
|         } |         } | ||||||
|             } |         // A finished batch MUST contains at least one task and have an enqueued_at | ||||||
|         } |         let enqueued_at = batch.enqueued_at.as_ref().unwrap(); | ||||||
|         if let Some(enqueued_at) = batch.oldest_enqueued_at { |         insert_task_datetime(wtxn, self.enqueued_at, enqueued_at.earliest, batch.uid)?; | ||||||
|             insert_task_datetime(wtxn, self.enqueued_at, enqueued_at, batch.uid)?; |         insert_task_datetime(wtxn, self.enqueued_at, enqueued_at.oldest, batch.uid)?; | ||||||
|         } |  | ||||||
|         if let Some(enqueued_at) = batch.earliest_enqueued_at { |  | ||||||
|             insert_task_datetime(wtxn, self.enqueued_at, enqueued_at, batch.uid)?; |  | ||||||
|         } |  | ||||||
|  |  | ||||||
|         // Update the started at and finished at |         // Update the started at and finished at | ||||||
|         if let Some(ref old_batch) = old_batch { |         if let Some(ref old_batch) = old_batch { | ||||||
|   | |||||||
| @@ -102,6 +102,8 @@ fn query_batches_simple() { | |||||||
|         .unwrap(); |         .unwrap(); | ||||||
|     assert_eq!(batches.len(), 1); |     assert_eq!(batches.len(), 1); | ||||||
|     batches[0].started_at = OffsetDateTime::UNIX_EPOCH; |     batches[0].started_at = OffsetDateTime::UNIX_EPOCH; | ||||||
|  |     assert!(batches[0].enqueued_at.is_some()); | ||||||
|  |     batches[0].enqueued_at = None; | ||||||
|     // Insta cannot snapshot our batches because the batch stats contains an enum as key: https://github.com/mitsuhiko/insta/issues/689 |     // Insta cannot snapshot our batches because the batch stats contains an enum as key: https://github.com/mitsuhiko/insta/issues/689 | ||||||
|     let batch = serde_json::to_string_pretty(&batches[0]).unwrap(); |     let batch = serde_json::to_string_pretty(&batches[0]).unwrap(); | ||||||
|     snapshot!(batch, @r#" |     snapshot!(batch, @r#" | ||||||
| @@ -123,7 +125,8 @@ fn query_batches_simple() { | |||||||
|         } |         } | ||||||
|       }, |       }, | ||||||
|       "startedAt": "1970-01-01T00:00:00Z", |       "startedAt": "1970-01-01T00:00:00Z", | ||||||
|           "finishedAt": null |       "finishedAt": null, | ||||||
|  |       "enqueuedAt": null | ||||||
|     } |     } | ||||||
|     "#); |     "#); | ||||||
|  |  | ||||||
|   | |||||||
| @@ -8,6 +8,7 @@ mod tasks_test; | |||||||
| mod test; | mod test; | ||||||
|  |  | ||||||
| use std::collections::BTreeMap; | use std::collections::BTreeMap; | ||||||
|  | use std::fs::File as StdFile; | ||||||
| use std::time::Duration; | use std::time::Duration; | ||||||
|  |  | ||||||
| use file_store::FileStore; | use file_store::FileStore; | ||||||
| @@ -216,6 +217,11 @@ impl Queue { | |||||||
|         } |         } | ||||||
|     } |     } | ||||||
|  |  | ||||||
|  |     /// Open and returns the task's content File. | ||||||
|  |     pub fn update_file(&self, uuid: Uuid) -> file_store::Result<StdFile> { | ||||||
|  |         self.file_store.get_update(uuid) | ||||||
|  |     } | ||||||
|  |  | ||||||
|     /// Delete a file from the index scheduler. |     /// Delete a file from the index scheduler. | ||||||
|     /// |     /// | ||||||
|     /// Counterpart to the [`create_update_file`](IndexScheduler::create_update_file) method. |     /// Counterpart to the [`create_update_file`](IndexScheduler::create_update_file) method. | ||||||
|   | |||||||
| @@ -326,7 +326,7 @@ fn test_auto_deletion_of_tasks() { | |||||||
| fn test_task_queue_is_full() { | fn test_task_queue_is_full() { | ||||||
|     let (index_scheduler, mut handle) = IndexScheduler::test_with_custom_config(vec![], |config| { |     let (index_scheduler, mut handle) = IndexScheduler::test_with_custom_config(vec![], |config| { | ||||||
|         // that's the minimum map size possible |         // that's the minimum map size possible | ||||||
|         config.task_db_size = 1048576; |         config.task_db_size = 1048576 * 3; | ||||||
|         None |         None | ||||||
|     }); |     }); | ||||||
|  |  | ||||||
|   | |||||||
| @@ -166,13 +166,41 @@ impl IndexScheduler { | |||||||
|             let processing_batch = &mut processing_batch; |             let processing_batch = &mut processing_batch; | ||||||
|             let progress = progress.clone(); |             let progress = progress.clone(); | ||||||
|             std::thread::scope(|s| { |             std::thread::scope(|s| { | ||||||
|  |                 let p = progress.clone(); | ||||||
|                 let handle = std::thread::Builder::new() |                 let handle = std::thread::Builder::new() | ||||||
|                     .name(String::from("batch-operation")) |                     .name(String::from("batch-operation")) | ||||||
|                     .spawn_scoped(s, move || { |                     .spawn_scoped(s, move || { | ||||||
|                         cloned_index_scheduler.process_batch(batch, processing_batch, progress) |                         cloned_index_scheduler.process_batch(batch, processing_batch, p) | ||||||
|                     }) |                     }) | ||||||
|                     .unwrap(); |                     .unwrap(); | ||||||
|                 handle.join().unwrap_or(Err(Error::ProcessBatchPanicked)) |  | ||||||
|  |                 match handle.join() { | ||||||
|  |                     Ok(ret) => { | ||||||
|  |                         if ret.is_err() { | ||||||
|  |                             if let Ok(progress_view) = | ||||||
|  |                                 serde_json::to_string(&progress.as_progress_view()) | ||||||
|  |                             { | ||||||
|  |                                 tracing::warn!("Batch failed while doing: {progress_view}") | ||||||
|  |                             } | ||||||
|  |                         } | ||||||
|  |                         ret | ||||||
|  |                     } | ||||||
|  |                     Err(panic) => { | ||||||
|  |                         if let Ok(progress_view) = | ||||||
|  |                             serde_json::to_string(&progress.as_progress_view()) | ||||||
|  |                         { | ||||||
|  |                             tracing::warn!("Batch failed while doing: {progress_view}") | ||||||
|  |                         } | ||||||
|  |                         let msg = match panic.downcast_ref::<&'static str>() { | ||||||
|  |                             Some(s) => *s, | ||||||
|  |                             None => match panic.downcast_ref::<String>() { | ||||||
|  |                                 Some(s) => &s[..], | ||||||
|  |                                 None => "Box<dyn Any>", | ||||||
|  |                             }, | ||||||
|  |                         }; | ||||||
|  |                         Err(Error::ProcessBatchPanicked(msg.to_string())) | ||||||
|  |                     } | ||||||
|  |                 } | ||||||
|             }) |             }) | ||||||
|         }; |         }; | ||||||
|  |  | ||||||
|   | |||||||
| @@ -2,7 +2,7 @@ use std::collections::{BTreeSet, HashMap, HashSet}; | |||||||
| use std::panic::{catch_unwind, AssertUnwindSafe}; | use std::panic::{catch_unwind, AssertUnwindSafe}; | ||||||
| use std::sync::atomic::Ordering; | use std::sync::atomic::Ordering; | ||||||
|  |  | ||||||
| use meilisearch_types::batches::BatchId; | use meilisearch_types::batches::{BatchEnqueuedAt, BatchId}; | ||||||
| use meilisearch_types::heed::{RoTxn, RwTxn}; | use meilisearch_types::heed::{RoTxn, RwTxn}; | ||||||
| use meilisearch_types::milli::progress::{Progress, VariableNameStep}; | use meilisearch_types::milli::progress::{Progress, VariableNameStep}; | ||||||
| use meilisearch_types::milli::{self}; | use meilisearch_types::milli::{self}; | ||||||
| @@ -16,7 +16,10 @@ use crate::processing::{ | |||||||
|     InnerSwappingTwoIndexes, SwappingTheIndexes, TaskCancelationProgress, TaskDeletionProgress, |     InnerSwappingTwoIndexes, SwappingTheIndexes, TaskCancelationProgress, TaskDeletionProgress, | ||||||
|     UpdateIndexProgress, |     UpdateIndexProgress, | ||||||
| }; | }; | ||||||
| use crate::utils::{self, swap_index_uid_in_task, ProcessingBatch}; | use crate::utils::{ | ||||||
|  |     self, remove_n_tasks_datetime_earlier_than, remove_task_datetime, swap_index_uid_in_task, | ||||||
|  |     ProcessingBatch, | ||||||
|  | }; | ||||||
| use crate::{Error, IndexScheduler, Result, TaskId}; | use crate::{Error, IndexScheduler, Result, TaskId}; | ||||||
|  |  | ||||||
| impl IndexScheduler { | impl IndexScheduler { | ||||||
| @@ -323,8 +326,17 @@ impl IndexScheduler { | |||||||
|                 match ret { |                 match ret { | ||||||
|                     Ok(Ok(())) => (), |                     Ok(Ok(())) => (), | ||||||
|                     Ok(Err(e)) => return Err(Error::DatabaseUpgrade(Box::new(e))), |                     Ok(Err(e)) => return Err(Error::DatabaseUpgrade(Box::new(e))), | ||||||
|                     Err(_e) => { |                     Err(e) => { | ||||||
|                         return Err(Error::DatabaseUpgrade(Box::new(Error::ProcessBatchPanicked))); |                         let msg = match e.downcast_ref::<&'static str>() { | ||||||
|  |                             Some(s) => *s, | ||||||
|  |                             None => match e.downcast_ref::<String>() { | ||||||
|  |                                 Some(s) => &s[..], | ||||||
|  |                                 None => "Box<dyn Any>", | ||||||
|  |                             }, | ||||||
|  |                         }; | ||||||
|  |                         return Err(Error::DatabaseUpgrade(Box::new(Error::ProcessBatchPanicked( | ||||||
|  |                             msg.to_string(), | ||||||
|  |                         )))); | ||||||
|                     } |                     } | ||||||
|                 } |                 } | ||||||
|  |  | ||||||
| @@ -418,7 +430,6 @@ impl IndexScheduler { | |||||||
|         to_delete_tasks -= &enqueued_tasks; |         to_delete_tasks -= &enqueued_tasks; | ||||||
|  |  | ||||||
|         // 2. We now have a list of tasks to delete, delete them |         // 2. We now have a list of tasks to delete, delete them | ||||||
|  |  | ||||||
|         let mut affected_indexes = HashSet::new(); |         let mut affected_indexes = HashSet::new(); | ||||||
|         let mut affected_statuses = HashSet::new(); |         let mut affected_statuses = HashSet::new(); | ||||||
|         let mut affected_kinds = HashSet::new(); |         let mut affected_kinds = HashSet::new(); | ||||||
| @@ -515,9 +526,51 @@ impl IndexScheduler { | |||||||
|                 tasks -= &to_delete_tasks; |                 tasks -= &to_delete_tasks; | ||||||
|                 // We must remove the batch entirely |                 // We must remove the batch entirely | ||||||
|                 if tasks.is_empty() { |                 if tasks.is_empty() { | ||||||
|  |                     if let Some(batch) = self.queue.batches.get_batch(wtxn, batch_id)? { | ||||||
|  |                         if let Some(BatchEnqueuedAt { earliest, oldest }) = batch.enqueued_at { | ||||||
|  |                             remove_task_datetime( | ||||||
|  |                                 wtxn, | ||||||
|  |                                 self.queue.batches.enqueued_at, | ||||||
|  |                                 earliest, | ||||||
|  |                                 batch_id, | ||||||
|  |                             )?; | ||||||
|  |                             remove_task_datetime( | ||||||
|  |                                 wtxn, | ||||||
|  |                                 self.queue.batches.enqueued_at, | ||||||
|  |                                 oldest, | ||||||
|  |                                 batch_id, | ||||||
|  |                             )?; | ||||||
|  |                         } else { | ||||||
|  |                             // If we don't have the enqueued at in the batch it means the database comes from the v1.12 | ||||||
|  |                             // and we still need to find the date by scrolling the database | ||||||
|  |                             remove_n_tasks_datetime_earlier_than( | ||||||
|  |                                 wtxn, | ||||||
|  |                                 self.queue.batches.enqueued_at, | ||||||
|  |                                 batch.started_at, | ||||||
|  |                                 batch.stats.total_nb_tasks.clamp(1, 2) as usize, | ||||||
|  |                                 batch_id, | ||||||
|  |                             )?; | ||||||
|  |                         } | ||||||
|  |                         remove_task_datetime( | ||||||
|  |                             wtxn, | ||||||
|  |                             self.queue.batches.started_at, | ||||||
|  |                             batch.started_at, | ||||||
|  |                             batch_id, | ||||||
|  |                         )?; | ||||||
|  |                         if let Some(finished_at) = batch.finished_at { | ||||||
|  |                             remove_task_datetime( | ||||||
|  |                                 wtxn, | ||||||
|  |                                 self.queue.batches.finished_at, | ||||||
|  |                                 finished_at, | ||||||
|  |                                 batch_id, | ||||||
|  |                             )?; | ||||||
|  |                         } | ||||||
|  |  | ||||||
|                         self.queue.batches.all_batches.delete(wtxn, &batch_id)?; |                         self.queue.batches.all_batches.delete(wtxn, &batch_id)?; | ||||||
|                         self.queue.batch_to_tasks_mapping.delete(wtxn, &batch_id)?; |                         self.queue.batch_to_tasks_mapping.delete(wtxn, &batch_id)?; | ||||||
|                     } |                     } | ||||||
|  |                 } | ||||||
|  |  | ||||||
|                 // Anyway, we must remove the batch from all its reverse indexes. |                 // Anyway, we must remove the batch from all its reverse indexes. | ||||||
|                 // The only way to do that is to check |                 // The only way to do that is to check | ||||||
|  |  | ||||||
|   | |||||||
| @@ -1,3 +1,4 @@ | |||||||
|  | use std::collections::BTreeMap; | ||||||
| use std::fs::File; | use std::fs::File; | ||||||
| use std::io::BufWriter; | use std::io::BufWriter; | ||||||
| use std::sync::atomic::Ordering; | use std::sync::atomic::Ordering; | ||||||
| @@ -11,7 +12,9 @@ use meilisearch_types::tasks::{Details, KindWithContent, Status, Task}; | |||||||
| use time::macros::format_description; | use time::macros::format_description; | ||||||
| use time::OffsetDateTime; | use time::OffsetDateTime; | ||||||
|  |  | ||||||
| use crate::processing::{AtomicDocumentStep, AtomicTaskStep, DumpCreationProgress}; | use crate::processing::{ | ||||||
|  |     AtomicBatchStep, AtomicDocumentStep, AtomicTaskStep, DumpCreationProgress, | ||||||
|  | }; | ||||||
| use crate::{Error, IndexScheduler, Result}; | use crate::{Error, IndexScheduler, Result}; | ||||||
|  |  | ||||||
| impl IndexScheduler { | impl IndexScheduler { | ||||||
| @@ -102,7 +105,40 @@ impl IndexScheduler { | |||||||
|         } |         } | ||||||
|         dump_tasks.flush()?; |         dump_tasks.flush()?; | ||||||
|  |  | ||||||
|         // 3. Dump the indexes |         // 3. dump the batches | ||||||
|  |         progress.update_progress(DumpCreationProgress::DumpTheBatches); | ||||||
|  |         let mut dump_batches = dump.create_batches_queue()?; | ||||||
|  |  | ||||||
|  |         let (atomic_batch_progress, update_batch_progress) = | ||||||
|  |             AtomicBatchStep::new(self.queue.batches.all_batches.len(&rtxn)? as u32); | ||||||
|  |         progress.update_progress(update_batch_progress); | ||||||
|  |  | ||||||
|  |         for ret in self.queue.batches.all_batches.iter(&rtxn)? { | ||||||
|  |             if self.scheduler.must_stop_processing.get() { | ||||||
|  |                 return Err(Error::AbortedTask); | ||||||
|  |             } | ||||||
|  |  | ||||||
|  |             let (_, mut b) = ret?; | ||||||
|  |             // In the case we're dumping ourselves we want to be marked as finished | ||||||
|  |             // to not loop over ourselves indefinitely. | ||||||
|  |             if b.uid == task.uid { | ||||||
|  |                 let finished_at = OffsetDateTime::now_utc(); | ||||||
|  |  | ||||||
|  |                 // We're going to fake the date because we don't know if everything is going to go well. | ||||||
|  |                 // But we need to dump the task as finished and successful. | ||||||
|  |                 // If something fail everything will be set appropriately in the end. | ||||||
|  |                 let mut statuses = BTreeMap::new(); | ||||||
|  |                 statuses.insert(Status::Succeeded, b.stats.total_nb_tasks); | ||||||
|  |                 b.stats.status = statuses; | ||||||
|  |                 b.finished_at = Some(finished_at); | ||||||
|  |             } | ||||||
|  |  | ||||||
|  |             dump_batches.push_batch(&b)?; | ||||||
|  |             atomic_batch_progress.fetch_add(1, Ordering::Relaxed); | ||||||
|  |         } | ||||||
|  |         dump_batches.flush()?; | ||||||
|  |  | ||||||
|  |         // 4. Dump the indexes | ||||||
|         progress.update_progress(DumpCreationProgress::DumpTheIndexes); |         progress.update_progress(DumpCreationProgress::DumpTheIndexes); | ||||||
|         let nb_indexes = self.index_mapper.index_mapping.len(&rtxn)? as u32; |         let nb_indexes = self.index_mapper.index_mapping.len(&rtxn)? as u32; | ||||||
|         let mut count = 0; |         let mut count = 0; | ||||||
| @@ -142,7 +178,7 @@ impl IndexScheduler { | |||||||
|             let documents = index |             let documents = index | ||||||
|                 .all_documents(&rtxn) |                 .all_documents(&rtxn) | ||||||
|                 .map_err(|e| Error::from_milli(e, Some(uid.to_string())))?; |                 .map_err(|e| Error::from_milli(e, Some(uid.to_string())))?; | ||||||
|             // 3.1. Dump the documents |             // 4.1. Dump the documents | ||||||
|             for ret in documents { |             for ret in documents { | ||||||
|                 if self.scheduler.must_stop_processing.get() { |                 if self.scheduler.must_stop_processing.get() { | ||||||
|                     return Err(Error::AbortedTask); |                     return Err(Error::AbortedTask); | ||||||
| @@ -204,7 +240,7 @@ impl IndexScheduler { | |||||||
|                 atomic.fetch_add(1, Ordering::Relaxed); |                 atomic.fetch_add(1, Ordering::Relaxed); | ||||||
|             } |             } | ||||||
|  |  | ||||||
|             // 3.2. Dump the settings |             // 4.2. Dump the settings | ||||||
|             let settings = meilisearch_types::settings::settings( |             let settings = meilisearch_types::settings::settings( | ||||||
|                 index, |                 index, | ||||||
|                 &rtxn, |                 &rtxn, | ||||||
| @@ -215,10 +251,12 @@ impl IndexScheduler { | |||||||
|             Ok(()) |             Ok(()) | ||||||
|         })?; |         })?; | ||||||
|  |  | ||||||
|         // 4. Dump experimental feature settings |         // 5. Dump experimental feature settings | ||||||
|         progress.update_progress(DumpCreationProgress::DumpTheExperimentalFeatures); |         progress.update_progress(DumpCreationProgress::DumpTheExperimentalFeatures); | ||||||
|         let features = self.features().runtime_features(); |         let features = self.features().runtime_features(); | ||||||
|         dump.create_experimental_features(features)?; |         dump.create_experimental_features(features)?; | ||||||
|  |         let network = self.network(); | ||||||
|  |         dump.create_network(network)?; | ||||||
|  |  | ||||||
|         let dump_uid = started_at.format(format_description!( |         let dump_uid = started_at.format(format_description!( | ||||||
|                     "[year repr:full][month repr:numerical][day padding:zero]-[hour padding:zero][minute padding:zero][second padding:zero][subsecond digits:3]" |                     "[year repr:full][month repr:numerical][day padding:zero]-[hour padding:zero][minute padding:zero][second padding:zero][subsecond digits:3]" | ||||||
|   | |||||||
| @@ -56,16 +56,13 @@ succeeded [1,] | |||||||
| ### Batches Index Tasks: | ### Batches Index Tasks: | ||||||
| ---------------------------------------------------------------------- | ---------------------------------------------------------------------- | ||||||
| ### Batches Enqueued At: | ### Batches Enqueued At: | ||||||
| [timestamp] [0,] |  | ||||||
| [timestamp] [1,] | [timestamp] [1,] | ||||||
| [timestamp] [1,] | [timestamp] [1,] | ||||||
| ---------------------------------------------------------------------- | ---------------------------------------------------------------------- | ||||||
| ### Batches Started At: | ### Batches Started At: | ||||||
| [timestamp] [0,] |  | ||||||
| [timestamp] [1,] | [timestamp] [1,] | ||||||
| ---------------------------------------------------------------------- | ---------------------------------------------------------------------- | ||||||
| ### Batches Finished At: | ### Batches Finished At: | ||||||
| [timestamp] [0,] |  | ||||||
| [timestamp] [1,] | [timestamp] [1,] | ||||||
| ---------------------------------------------------------------------- | ---------------------------------------------------------------------- | ||||||
| ### File Store: | ### File Store: | ||||||
|   | |||||||
| @@ -54,15 +54,12 @@ succeeded [1,] | |||||||
| ### Batches Index Tasks: | ### Batches Index Tasks: | ||||||
| ---------------------------------------------------------------------- | ---------------------------------------------------------------------- | ||||||
| ### Batches Enqueued At: | ### Batches Enqueued At: | ||||||
| [timestamp] [0,] |  | ||||||
| [timestamp] [1,] | [timestamp] [1,] | ||||||
| ---------------------------------------------------------------------- | ---------------------------------------------------------------------- | ||||||
| ### Batches Started At: | ### Batches Started At: | ||||||
| [timestamp] [0,] |  | ||||||
| [timestamp] [1,] | [timestamp] [1,] | ||||||
| ---------------------------------------------------------------------- | ---------------------------------------------------------------------- | ||||||
| ### Batches Finished At: | ### Batches Finished At: | ||||||
| [timestamp] [0,] |  | ||||||
| [timestamp] [1,] | [timestamp] [1,] | ||||||
| ---------------------------------------------------------------------- | ---------------------------------------------------------------------- | ||||||
| ### File Store: | ### File Store: | ||||||
|   | |||||||
| @@ -7,7 +7,7 @@ snapshot_kind: text | |||||||
| [] | [] | ||||||
| ---------------------------------------------------------------------- | ---------------------------------------------------------------------- | ||||||
| ### All Tasks: | ### All Tasks: | ||||||
| 0 {uid: 0, batch_uid: 0, status: failed, error: ResponseError { code: 200, message: "An unexpected crash occurred when processing the task.", error_code: "internal", error_type: "internal", error_link: "https://docs.meilisearch.com/errors#internal" }, details: { primary_key: Some("mouse") }, kind: IndexCreation { index_uid: "catto", primary_key: Some("mouse") }} | 0 {uid: 0, batch_uid: 0, status: failed, error: ResponseError { code: 200, message: "An unexpected crash occurred when processing the task: simulated panic", error_code: "internal", error_type: "internal", error_link: "https://docs.meilisearch.com/errors#internal" }, details: { primary_key: Some("mouse") }, kind: IndexCreation { index_uid: "catto", primary_key: Some("mouse") }} | ||||||
| ---------------------------------------------------------------------- | ---------------------------------------------------------------------- | ||||||
| ### Status: | ### Status: | ||||||
| enqueued [] | enqueued [] | ||||||
|   | |||||||
| @@ -87,7 +87,6 @@ doggo [2,3,] | |||||||
| girafo [4,] | girafo [4,] | ||||||
| ---------------------------------------------------------------------- | ---------------------------------------------------------------------- | ||||||
| ### Batches Enqueued At: | ### Batches Enqueued At: | ||||||
| [timestamp] [0,] |  | ||||||
| [timestamp] [1,] | [timestamp] [1,] | ||||||
| [timestamp] [2,] | [timestamp] [2,] | ||||||
| [timestamp] [3,] | [timestamp] [3,] | ||||||
| @@ -95,7 +94,6 @@ girafo [4,] | |||||||
| [timestamp] [5,] | [timestamp] [5,] | ||||||
| ---------------------------------------------------------------------- | ---------------------------------------------------------------------- | ||||||
| ### Batches Started At: | ### Batches Started At: | ||||||
| [timestamp] [0,] |  | ||||||
| [timestamp] [1,] | [timestamp] [1,] | ||||||
| [timestamp] [2,] | [timestamp] [2,] | ||||||
| [timestamp] [3,] | [timestamp] [3,] | ||||||
| @@ -103,7 +101,6 @@ girafo [4,] | |||||||
| [timestamp] [5,] | [timestamp] [5,] | ||||||
| ---------------------------------------------------------------------- | ---------------------------------------------------------------------- | ||||||
| ### Batches Finished At: | ### Batches Finished At: | ||||||
| [timestamp] [0,] |  | ||||||
| [timestamp] [1,] | [timestamp] [1,] | ||||||
| [timestamp] [2,] | [timestamp] [2,] | ||||||
| [timestamp] [3,] | [timestamp] [3,] | ||||||
|   | |||||||
| @@ -903,7 +903,7 @@ fn create_and_list_index() { | |||||||
|  |  | ||||||
|     index_scheduler.index("kefir").unwrap(); |     index_scheduler.index("kefir").unwrap(); | ||||||
|     let list = index_scheduler.get_paginated_indexes_stats(&AuthFilter::default(), 0, 20).unwrap(); |     let list = index_scheduler.get_paginated_indexes_stats(&AuthFilter::default(), 0, 20).unwrap(); | ||||||
|     snapshot!(json_string!(list, { "[1][0][1].created_at" => "[date]", "[1][0][1].updated_at" => "[date]", "[1][0][1].used_database_size" => "[bytes]", "[1][0][1].database_size" => "[bytes]" }), @r#" |     snapshot!(json_string!(list, { "[1][0][1].created_at" => "[date]", "[1][0][1].updated_at" => "[date]", "[1][0][1].used_database_size" => "[bytes]", "[1][0][1].database_size" => "[bytes]" }), @r###" | ||||||
|     [ |     [ | ||||||
|       1, |       1, | ||||||
|       [ |       [ | ||||||
| @@ -912,6 +912,8 @@ fn create_and_list_index() { | |||||||
|           { |           { | ||||||
|             "number_of_documents": 0, |             "number_of_documents": 0, | ||||||
|             "database_size": "[bytes]", |             "database_size": "[bytes]", | ||||||
|  |             "number_of_embeddings": 0, | ||||||
|  |             "number_of_embedded_documents": 0, | ||||||
|             "used_database_size": "[bytes]", |             "used_database_size": "[bytes]", | ||||||
|             "primary_key": null, |             "primary_key": null, | ||||||
|             "field_distribution": {}, |             "field_distribution": {}, | ||||||
| @@ -921,5 +923,5 @@ fn create_and_list_index() { | |||||||
|         ] |         ] | ||||||
|       ] |       ] | ||||||
|     ] |     ] | ||||||
|     "#); |     "###); | ||||||
| } | } | ||||||
|   | |||||||
| @@ -6,8 +6,7 @@ use meili_snap::snapshot; | |||||||
| use meilisearch_types::milli::obkv_to_json; | use meilisearch_types::milli::obkv_to_json; | ||||||
| use meilisearch_types::milli::update::IndexDocumentsMethod::*; | use meilisearch_types::milli::update::IndexDocumentsMethod::*; | ||||||
| use meilisearch_types::milli::update::Setting; | use meilisearch_types::milli::update::Setting; | ||||||
| use meilisearch_types::tasks::Kind; | use meilisearch_types::tasks::{Kind, KindWithContent}; | ||||||
| use meilisearch_types::tasks::KindWithContent; |  | ||||||
|  |  | ||||||
| use crate::insta_snapshot::snapshot_index_scheduler; | use crate::insta_snapshot::snapshot_index_scheduler; | ||||||
| use crate::test_utils::Breakpoint::*; | use crate::test_utils::Breakpoint::*; | ||||||
|   | |||||||
| @@ -3,7 +3,7 @@ | |||||||
| use std::collections::{BTreeSet, HashSet}; | use std::collections::{BTreeSet, HashSet}; | ||||||
| use std::ops::Bound; | use std::ops::Bound; | ||||||
|  |  | ||||||
| use meilisearch_types::batches::{Batch, BatchId, BatchStats}; | use meilisearch_types::batches::{Batch, BatchEnqueuedAt, BatchId, BatchStats}; | ||||||
| use meilisearch_types::heed::{Database, RoTxn, RwTxn}; | use meilisearch_types::heed::{Database, RoTxn, RwTxn}; | ||||||
| use meilisearch_types::milli::CboRoaringBitmapCodec; | use meilisearch_types::milli::CboRoaringBitmapCodec; | ||||||
| use meilisearch_types::task_view::DetailsView; | use meilisearch_types::task_view::DetailsView; | ||||||
| @@ -30,8 +30,7 @@ pub struct ProcessingBatch { | |||||||
|     pub kinds: HashSet<Kind>, |     pub kinds: HashSet<Kind>, | ||||||
|     pub indexes: HashSet<String>, |     pub indexes: HashSet<String>, | ||||||
|     pub canceled_by: HashSet<TaskId>, |     pub canceled_by: HashSet<TaskId>, | ||||||
|     pub oldest_enqueued_at: Option<OffsetDateTime>, |     pub enqueued_at: Option<BatchEnqueuedAt>, | ||||||
|     pub earliest_enqueued_at: Option<OffsetDateTime>, |  | ||||||
|     pub started_at: OffsetDateTime, |     pub started_at: OffsetDateTime, | ||||||
|     pub finished_at: Option<OffsetDateTime>, |     pub finished_at: Option<OffsetDateTime>, | ||||||
| } | } | ||||||
| @@ -51,8 +50,7 @@ impl ProcessingBatch { | |||||||
|             kinds: HashSet::default(), |             kinds: HashSet::default(), | ||||||
|             indexes: HashSet::default(), |             indexes: HashSet::default(), | ||||||
|             canceled_by: HashSet::default(), |             canceled_by: HashSet::default(), | ||||||
|             oldest_enqueued_at: None, |             enqueued_at: None, | ||||||
|             earliest_enqueued_at: None, |  | ||||||
|             started_at: OffsetDateTime::now_utc(), |             started_at: OffsetDateTime::now_utc(), | ||||||
|             finished_at: None, |             finished_at: None, | ||||||
|         } |         } | ||||||
| @@ -80,14 +78,18 @@ impl ProcessingBatch { | |||||||
|             if let Some(canceled_by) = task.canceled_by { |             if let Some(canceled_by) = task.canceled_by { | ||||||
|                 self.canceled_by.insert(canceled_by); |                 self.canceled_by.insert(canceled_by); | ||||||
|             } |             } | ||||||
|             self.oldest_enqueued_at = |             match self.enqueued_at.as_mut() { | ||||||
|                 Some(self.oldest_enqueued_at.map_or(task.enqueued_at, |oldest_enqueued_at| { |                 Some(BatchEnqueuedAt { earliest, oldest }) => { | ||||||
|                     task.enqueued_at.min(oldest_enqueued_at) |                     *oldest = task.enqueued_at.min(*oldest); | ||||||
|                 })); |                     *earliest = task.enqueued_at.max(*earliest); | ||||||
|             self.earliest_enqueued_at = |                 } | ||||||
|                 Some(self.earliest_enqueued_at.map_or(task.enqueued_at, |earliest_enqueued_at| { |                 None => { | ||||||
|                     task.enqueued_at.max(earliest_enqueued_at) |                     self.enqueued_at = Some(BatchEnqueuedAt { | ||||||
|                 })); |                         earliest: task.enqueued_at, | ||||||
|  |                         oldest: task.enqueued_at, | ||||||
|  |                     }); | ||||||
|  |                 } | ||||||
|  |             } | ||||||
|         } |         } | ||||||
|     } |     } | ||||||
|  |  | ||||||
| @@ -138,6 +140,7 @@ impl ProcessingBatch { | |||||||
|             stats: self.stats.clone(), |             stats: self.stats.clone(), | ||||||
|             started_at: self.started_at, |             started_at: self.started_at, | ||||||
|             finished_at: self.finished_at, |             finished_at: self.finished_at, | ||||||
|  |             enqueued_at: self.enqueued_at, | ||||||
|         } |         } | ||||||
|     } |     } | ||||||
| } | } | ||||||
| @@ -174,6 +177,33 @@ pub(crate) fn remove_task_datetime( | |||||||
|     Ok(()) |     Ok(()) | ||||||
| } | } | ||||||
|  |  | ||||||
|  | pub(crate) fn remove_n_tasks_datetime_earlier_than( | ||||||
|  |     wtxn: &mut RwTxn, | ||||||
|  |     database: Database<BEI128, CboRoaringBitmapCodec>, | ||||||
|  |     earlier_than: OffsetDateTime, | ||||||
|  |     mut count: usize, | ||||||
|  |     task_id: TaskId, | ||||||
|  | ) -> Result<()> { | ||||||
|  |     let earlier_than = earlier_than.unix_timestamp_nanos(); | ||||||
|  |     let mut iter = database.rev_range_mut(wtxn, &(..earlier_than))?; | ||||||
|  |     while let Some((current, mut existing)) = iter.next().transpose()? { | ||||||
|  |         count -= existing.remove(task_id) as usize; | ||||||
|  |  | ||||||
|  |         if existing.is_empty() { | ||||||
|  |             // safety: We don't keep references to the database | ||||||
|  |             unsafe { iter.del_current()? }; | ||||||
|  |         } else { | ||||||
|  |             // safety: We don't keep references to the database | ||||||
|  |             unsafe { iter.put_current(¤t, &existing)? }; | ||||||
|  |         } | ||||||
|  |         if count == 0 { | ||||||
|  |             break; | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     Ok(()) | ||||||
|  | } | ||||||
|  |  | ||||||
| pub(crate) fn keep_ids_within_datetimes( | pub(crate) fn keep_ids_within_datetimes( | ||||||
|     rtxn: &RoTxn, |     rtxn: &RoTxn, | ||||||
|     ids: &mut RoaringBitmap, |     ids: &mut RoaringBitmap, | ||||||
| @@ -329,14 +359,27 @@ impl crate::IndexScheduler { | |||||||
|                 kind, |                 kind, | ||||||
|             } = task; |             } = task; | ||||||
|             assert_eq!(uid, task.uid); |             assert_eq!(uid, task.uid); | ||||||
|             if let Some(ref batch) = batch_uid { |             if task.status != Status::Enqueued { | ||||||
|  |                 let batch_uid = batch_uid.expect("All non enqueued tasks must be part of a batch"); | ||||||
|                 assert!(self |                 assert!(self | ||||||
|                     .queue |                     .queue | ||||||
|                     .batch_to_tasks_mapping |                     .batch_to_tasks_mapping | ||||||
|                     .get(&rtxn, batch) |                     .get(&rtxn, &batch_uid) | ||||||
|                     .unwrap() |                     .unwrap() | ||||||
|                     .unwrap() |                     .unwrap() | ||||||
|                     .contains(uid)); |                     .contains(uid)); | ||||||
|  |                 let batch = self.queue.batches.get_batch(&rtxn, batch_uid).unwrap().unwrap(); | ||||||
|  |                 assert_eq!(batch.uid, batch_uid); | ||||||
|  |                 if task.status == Status::Processing { | ||||||
|  |                     assert!(batch.progress.is_some()); | ||||||
|  |                 } else { | ||||||
|  |                     assert!(batch.progress.is_none()); | ||||||
|  |                 } | ||||||
|  |                 assert_eq!(batch.started_at, task.started_at.unwrap()); | ||||||
|  |                 assert_eq!(batch.finished_at, task.finished_at); | ||||||
|  |                 let enqueued_at = batch.enqueued_at.unwrap(); | ||||||
|  |                 assert!(task.enqueued_at >= enqueued_at.oldest); | ||||||
|  |                 assert!(task.enqueued_at <= enqueued_at.earliest); | ||||||
|             } |             } | ||||||
|             if let Some(task_index_uid) = &task_index_uid { |             if let Some(task_index_uid) = &task_index_uid { | ||||||
|                 assert!(self |                 assert!(self | ||||||
|   | |||||||
| @@ -1,9 +1,10 @@ | |||||||
| use crate::{upgrade::upgrade_index_scheduler, Result}; | use meilisearch_types::heed::types::Str; | ||||||
| use meilisearch_types::{ | use meilisearch_types::heed::{self, Database, Env, RoTxn, RwTxn}; | ||||||
|     heed::{types::Str, Database, Env, RoTxn, RwTxn}, | use meilisearch_types::milli::heed_codec::version::VersionCodec; | ||||||
|     milli::heed_codec::version::VersionCodec, | use meilisearch_types::versioning; | ||||||
|     versioning, |  | ||||||
| }; | use crate::upgrade::upgrade_index_scheduler; | ||||||
|  | use crate::Result; | ||||||
|  |  | ||||||
| /// The number of database used by queue itself | /// The number of database used by queue itself | ||||||
| const NUMBER_OF_DATABASES: u32 = 1; | const NUMBER_OF_DATABASES: u32 = 1; | ||||||
| @@ -21,30 +22,38 @@ pub struct Versioning { | |||||||
| } | } | ||||||
|  |  | ||||||
| impl Versioning { | impl Versioning { | ||||||
|     pub(crate) const fn nb_db() -> u32 { |     pub const fn nb_db() -> u32 { | ||||||
|         NUMBER_OF_DATABASES |         NUMBER_OF_DATABASES | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     pub fn get_version(&self, rtxn: &RoTxn) -> Result<Option<(u32, u32, u32)>> { |     pub fn get_version(&self, rtxn: &RoTxn) -> Result<Option<(u32, u32, u32)>, heed::Error> { | ||||||
|         Ok(self.version.get(rtxn, entry_name::MAIN)?) |         self.version.get(rtxn, entry_name::MAIN) | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     pub fn set_version(&self, wtxn: &mut RwTxn, version: (u32, u32, u32)) -> Result<()> { |     pub fn set_version( | ||||||
|         Ok(self.version.put(wtxn, entry_name::MAIN, &version)?) |         &self, | ||||||
|  |         wtxn: &mut RwTxn, | ||||||
|  |         version: (u32, u32, u32), | ||||||
|  |     ) -> Result<(), heed::Error> { | ||||||
|  |         self.version.put(wtxn, entry_name::MAIN, &version) | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     pub fn set_current_version(&self, wtxn: &mut RwTxn) -> Result<()> { |     pub fn set_current_version(&self, wtxn: &mut RwTxn) -> Result<(), heed::Error> { | ||||||
|         let major = versioning::VERSION_MAJOR.parse().unwrap(); |         let major = versioning::VERSION_MAJOR.parse().unwrap(); | ||||||
|         let minor = versioning::VERSION_MINOR.parse().unwrap(); |         let minor = versioning::VERSION_MINOR.parse().unwrap(); | ||||||
|         let patch = versioning::VERSION_PATCH.parse().unwrap(); |         let patch = versioning::VERSION_PATCH.parse().unwrap(); | ||||||
|         self.set_version(wtxn, (major, minor, patch)) |         self.set_version(wtxn, (major, minor, patch)) | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     /// Create an index scheduler and start its run loop. |     /// Return `Self` without checking anything about the version | ||||||
|  |     pub fn raw_new(env: &Env, wtxn: &mut RwTxn) -> Result<Self, heed::Error> { | ||||||
|  |         let version = env.create_database(wtxn, Some(db_name::VERSION))?; | ||||||
|  |         Ok(Self { version }) | ||||||
|  |     } | ||||||
|  |  | ||||||
|     pub(crate) fn new(env: &Env, db_version: (u32, u32, u32)) -> Result<Self> { |     pub(crate) fn new(env: &Env, db_version: (u32, u32, u32)) -> Result<Self> { | ||||||
|         let mut wtxn = env.write_txn()?; |         let mut wtxn = env.write_txn()?; | ||||||
|         let version = env.create_database(&mut wtxn, Some(db_name::VERSION))?; |         let this = Self::raw_new(env, &mut wtxn)?; | ||||||
|         let this = Self { version }; |  | ||||||
|         let from = match this.get_version(&wtxn)? { |         let from = match this.get_version(&wtxn)? { | ||||||
|             Some(version) => version, |             Some(version) => version, | ||||||
|             // fresh DB: use the db version |             // fresh DB: use the db version | ||||||
|   | |||||||
| @@ -24,9 +24,35 @@ pub struct Batch { | |||||||
|     pub started_at: OffsetDateTime, |     pub started_at: OffsetDateTime, | ||||||
|     #[serde(with = "time::serde::rfc3339::option")] |     #[serde(with = "time::serde::rfc3339::option")] | ||||||
|     pub finished_at: Option<OffsetDateTime>, |     pub finished_at: Option<OffsetDateTime>, | ||||||
|  |  | ||||||
|  |     // Enqueued at is never displayed and is only required when removing a batch. | ||||||
|  |     // It's always some except when upgrading from a database pre v1.12 | ||||||
|  |     pub enqueued_at: Option<BatchEnqueuedAt>, | ||||||
| } | } | ||||||
|  |  | ||||||
| #[derive(Default, Debug, Clone, Serialize, Deserialize, ToSchema)] | impl PartialEq for Batch { | ||||||
|  |     fn eq(&self, other: &Self) -> bool { | ||||||
|  |         let Self { uid, progress, details, stats, started_at, finished_at, enqueued_at } = self; | ||||||
|  |  | ||||||
|  |         *uid == other.uid | ||||||
|  |             && progress.is_none() == other.progress.is_none() | ||||||
|  |             && details == &other.details | ||||||
|  |             && stats == &other.stats | ||||||
|  |             && started_at == &other.started_at | ||||||
|  |             && finished_at == &other.finished_at | ||||||
|  |             && enqueued_at == &other.enqueued_at | ||||||
|  |     } | ||||||
|  | } | ||||||
|  |  | ||||||
|  | #[derive(Clone, Copy, Debug, PartialEq, Eq, Serialize, Deserialize)] | ||||||
|  | pub struct BatchEnqueuedAt { | ||||||
|  |     #[serde(with = "time::serde::rfc3339")] | ||||||
|  |     pub earliest: OffsetDateTime, | ||||||
|  |     #[serde(with = "time::serde::rfc3339")] | ||||||
|  |     pub oldest: OffsetDateTime, | ||||||
|  | } | ||||||
|  |  | ||||||
|  | #[derive(Default, Debug, Clone, PartialEq, Eq, Serialize, Deserialize, ToSchema)] | ||||||
| #[serde(rename_all = "camelCase")] | #[serde(rename_all = "camelCase")] | ||||||
| #[schema(rename_all = "camelCase")] | #[schema(rename_all = "camelCase")] | ||||||
| pub struct BatchStats { | pub struct BatchStats { | ||||||
|   | |||||||
| @@ -193,6 +193,8 @@ merge_with_error_impl_take_error_message!(ParseTaskKindError); | |||||||
| merge_with_error_impl_take_error_message!(ParseTaskStatusError); | merge_with_error_impl_take_error_message!(ParseTaskStatusError); | ||||||
| merge_with_error_impl_take_error_message!(IndexUidFormatError); | merge_with_error_impl_take_error_message!(IndexUidFormatError); | ||||||
| merge_with_error_impl_take_error_message!(InvalidMultiSearchWeight); | merge_with_error_impl_take_error_message!(InvalidMultiSearchWeight); | ||||||
|  | merge_with_error_impl_take_error_message!(InvalidNetworkUrl); | ||||||
|  | merge_with_error_impl_take_error_message!(InvalidNetworkSearchApiKey); | ||||||
| merge_with_error_impl_take_error_message!(InvalidSearchSemanticRatio); | merge_with_error_impl_take_error_message!(InvalidSearchSemanticRatio); | ||||||
| merge_with_error_impl_take_error_message!(InvalidSearchRankingScoreThreshold); | merge_with_error_impl_take_error_message!(InvalidSearchRankingScoreThreshold); | ||||||
| merge_with_error_impl_take_error_message!(InvalidSimilarRankingScoreThreshold); | merge_with_error_impl_take_error_message!(InvalidSimilarRankingScoreThreshold); | ||||||
|   | |||||||
| @@ -260,7 +260,13 @@ InvalidMultiSearchMergeFacets         , InvalidRequest       , BAD_REQUEST ; | |||||||
| InvalidMultiSearchQueryFacets         , InvalidRequest       , BAD_REQUEST ; | InvalidMultiSearchQueryFacets         , InvalidRequest       , BAD_REQUEST ; | ||||||
| InvalidMultiSearchQueryPagination     , InvalidRequest       , BAD_REQUEST ; | InvalidMultiSearchQueryPagination     , InvalidRequest       , BAD_REQUEST ; | ||||||
| InvalidMultiSearchQueryRankingRules   , InvalidRequest       , BAD_REQUEST ; | InvalidMultiSearchQueryRankingRules   , InvalidRequest       , BAD_REQUEST ; | ||||||
|  | InvalidMultiSearchQueryPosition       , InvalidRequest       , BAD_REQUEST ; | ||||||
|  | InvalidMultiSearchRemote              , InvalidRequest       , BAD_REQUEST ; | ||||||
| InvalidMultiSearchWeight              , InvalidRequest       , BAD_REQUEST ; | InvalidMultiSearchWeight              , InvalidRequest       , BAD_REQUEST ; | ||||||
|  | InvalidNetworkRemotes                 , InvalidRequest       , BAD_REQUEST ; | ||||||
|  | InvalidNetworkSelf                    , InvalidRequest       , BAD_REQUEST ; | ||||||
|  | InvalidNetworkSearchApiKey            , InvalidRequest       , BAD_REQUEST ; | ||||||
|  | InvalidNetworkUrl                     , InvalidRequest       , BAD_REQUEST ; | ||||||
| InvalidSearchAttributesToSearchOn     , InvalidRequest       , BAD_REQUEST ; | InvalidSearchAttributesToSearchOn     , InvalidRequest       , BAD_REQUEST ; | ||||||
| InvalidSearchAttributesToCrop         , InvalidRequest       , BAD_REQUEST ; | InvalidSearchAttributesToCrop         , InvalidRequest       , BAD_REQUEST ; | ||||||
| InvalidSearchAttributesToHighlight    , InvalidRequest       , BAD_REQUEST ; | InvalidSearchAttributesToHighlight    , InvalidRequest       , BAD_REQUEST ; | ||||||
| @@ -351,14 +357,22 @@ MissingDocumentId                     , InvalidRequest       , BAD_REQUEST ; | |||||||
| MissingFacetSearchFacetName           , InvalidRequest       , BAD_REQUEST ; | MissingFacetSearchFacetName           , InvalidRequest       , BAD_REQUEST ; | ||||||
| MissingIndexUid                       , InvalidRequest       , BAD_REQUEST ; | MissingIndexUid                       , InvalidRequest       , BAD_REQUEST ; | ||||||
| MissingMasterKey                      , Auth                 , UNAUTHORIZED ; | MissingMasterKey                      , Auth                 , UNAUTHORIZED ; | ||||||
|  | MissingNetworkUrl                     , InvalidRequest       , BAD_REQUEST ; | ||||||
| MissingPayload                        , InvalidRequest       , BAD_REQUEST ; | MissingPayload                        , InvalidRequest       , BAD_REQUEST ; | ||||||
| MissingSearchHybrid                   , InvalidRequest       , BAD_REQUEST ; | MissingSearchHybrid                   , InvalidRequest       , BAD_REQUEST ; | ||||||
| MissingSwapIndexes                    , InvalidRequest       , BAD_REQUEST ; | MissingSwapIndexes                    , InvalidRequest       , BAD_REQUEST ; | ||||||
| MissingTaskFilters                    , InvalidRequest       , BAD_REQUEST ; | MissingTaskFilters                    , InvalidRequest       , BAD_REQUEST ; | ||||||
| NoSpaceLeftOnDevice                   , System               , UNPROCESSABLE_ENTITY; | NoSpaceLeftOnDevice                   , System               , UNPROCESSABLE_ENTITY; | ||||||
| PayloadTooLarge                       , InvalidRequest       , PAYLOAD_TOO_LARGE ; | PayloadTooLarge                       , InvalidRequest       , PAYLOAD_TOO_LARGE ; | ||||||
|  | RemoteBadResponse                     , System               , BAD_GATEWAY ; | ||||||
|  | RemoteBadRequest                      , InvalidRequest       , BAD_REQUEST ; | ||||||
|  | RemoteCouldNotSendRequest             , System               , BAD_GATEWAY ; | ||||||
|  | RemoteInvalidApiKey                   , Auth                 , FORBIDDEN ; | ||||||
|  | RemoteRemoteError                     , System               , BAD_GATEWAY ; | ||||||
|  | RemoteTimeout                         , System               , BAD_GATEWAY ; | ||||||
| TooManySearchRequests                 , System               , SERVICE_UNAVAILABLE ; | TooManySearchRequests                 , System               , SERVICE_UNAVAILABLE ; | ||||||
| TaskNotFound                          , InvalidRequest       , NOT_FOUND ; | TaskNotFound                          , InvalidRequest       , NOT_FOUND ; | ||||||
|  | TaskFileNotFound                      , InvalidRequest       , NOT_FOUND ; | ||||||
| BatchNotFound                         , InvalidRequest       , NOT_FOUND ; | BatchNotFound                         , InvalidRequest       , NOT_FOUND ; | ||||||
| TooManyOpenFiles                      , System               , UNPROCESSABLE_ENTITY ; | TooManyOpenFiles                      , System               , UNPROCESSABLE_ENTITY ; | ||||||
| TooManyVectors                        , InvalidRequest       , BAD_REQUEST ; | TooManyVectors                        , InvalidRequest       , BAD_REQUEST ; | ||||||
| @@ -583,6 +597,18 @@ impl fmt::Display for deserr_codes::InvalidSimilarRankingScoreThreshold { | |||||||
|     } |     } | ||||||
| } | } | ||||||
|  |  | ||||||
|  | impl fmt::Display for deserr_codes::InvalidNetworkUrl { | ||||||
|  |     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { | ||||||
|  |         write!(f, "the value of `url` is invalid, expected a string.") | ||||||
|  |     } | ||||||
|  | } | ||||||
|  |  | ||||||
|  | impl fmt::Display for deserr_codes::InvalidNetworkSearchApiKey { | ||||||
|  |     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { | ||||||
|  |         write!(f, "the value of `searchApiKey` is invalid, expected a string.") | ||||||
|  |     } | ||||||
|  | } | ||||||
|  |  | ||||||
| #[macro_export] | #[macro_export] | ||||||
| macro_rules! internal_error { | macro_rules! internal_error { | ||||||
|     ($target:ty : $($other:path), *) => { |     ($target:ty : $($other:path), *) => { | ||||||
|   | |||||||
| @@ -1,3 +1,5 @@ | |||||||
|  | use std::collections::BTreeMap; | ||||||
|  |  | ||||||
| use serde::{Deserialize, Serialize}; | use serde::{Deserialize, Serialize}; | ||||||
|  |  | ||||||
| #[derive(Serialize, Deserialize, Debug, Clone, Copy, Default, PartialEq, Eq)] | #[derive(Serialize, Deserialize, Debug, Clone, Copy, Default, PartialEq, Eq)] | ||||||
| @@ -7,6 +9,8 @@ pub struct RuntimeTogglableFeatures { | |||||||
|     pub logs_route: bool, |     pub logs_route: bool, | ||||||
|     pub edit_documents_by_function: bool, |     pub edit_documents_by_function: bool, | ||||||
|     pub contains_filter: bool, |     pub contains_filter: bool, | ||||||
|  |     pub network: bool, | ||||||
|  |     pub get_task_documents_route: bool, | ||||||
| } | } | ||||||
|  |  | ||||||
| #[derive(Default, Debug, Clone, Copy)] | #[derive(Default, Debug, Clone, Copy)] | ||||||
| @@ -15,3 +19,20 @@ pub struct InstanceTogglableFeatures { | |||||||
|     pub logs_route: bool, |     pub logs_route: bool, | ||||||
|     pub contains_filter: bool, |     pub contains_filter: bool, | ||||||
| } | } | ||||||
|  |  | ||||||
|  | #[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)] | ||||||
|  | #[serde(rename_all = "camelCase")] | ||||||
|  | pub struct Remote { | ||||||
|  |     pub url: String, | ||||||
|  |     #[serde(default)] | ||||||
|  |     pub search_api_key: Option<String>, | ||||||
|  | } | ||||||
|  |  | ||||||
|  | #[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq, Default)] | ||||||
|  | #[serde(rename_all = "camelCase")] | ||||||
|  | pub struct Network { | ||||||
|  |     #[serde(default, rename = "self")] | ||||||
|  |     pub local: Option<String>, | ||||||
|  |     #[serde(default)] | ||||||
|  |     pub remotes: BTreeMap<String, Remote>, | ||||||
|  | } | ||||||
|   | |||||||
| @@ -4,13 +4,14 @@ use std::fmt; | |||||||
| use std::str::FromStr; | use std::str::FromStr; | ||||||
|  |  | ||||||
| use deserr::Deserr; | use deserr::Deserr; | ||||||
|  | use serde::Serialize; | ||||||
| use utoipa::ToSchema; | use utoipa::ToSchema; | ||||||
|  |  | ||||||
| use crate::error::{Code, ErrorCode}; | use crate::error::{Code, ErrorCode}; | ||||||
|  |  | ||||||
| /// An index uid is composed of only ascii alphanumeric characters, - and _, between 1 and 400 | /// An index uid is composed of only ascii alphanumeric characters, - and _, between 1 and 400 | ||||||
| /// bytes long | /// bytes long | ||||||
| #[derive(Debug, Clone, PartialEq, Eq, Deserr, PartialOrd, Ord, ToSchema)] | #[derive(Debug, Clone, PartialEq, Eq, Deserr, PartialOrd, Ord, Serialize, ToSchema)] | ||||||
| #[deserr(try_from(String) = IndexUid::try_from -> IndexUidFormatError)] | #[deserr(try_from(String) = IndexUid::try_from -> IndexUidFormatError)] | ||||||
| #[schema(value_type = String, example = "movies")] | #[schema(value_type = String, example = "movies")] | ||||||
| pub struct IndexUid(String); | pub struct IndexUid(String); | ||||||
|   | |||||||
| @@ -302,6 +302,12 @@ pub enum Action { | |||||||
|     #[serde(rename = "experimental.update")] |     #[serde(rename = "experimental.update")] | ||||||
|     #[deserr(rename = "experimental.update")] |     #[deserr(rename = "experimental.update")] | ||||||
|     ExperimentalFeaturesUpdate, |     ExperimentalFeaturesUpdate, | ||||||
|  |     #[serde(rename = "network.get")] | ||||||
|  |     #[deserr(rename = "network.get")] | ||||||
|  |     NetworkGet, | ||||||
|  |     #[serde(rename = "network.update")] | ||||||
|  |     #[deserr(rename = "network.update")] | ||||||
|  |     NetworkUpdate, | ||||||
| } | } | ||||||
|  |  | ||||||
| impl Action { | impl Action { | ||||||
| @@ -341,6 +347,8 @@ impl Action { | |||||||
|             KEYS_DELETE => Some(Self::KeysDelete), |             KEYS_DELETE => Some(Self::KeysDelete), | ||||||
|             EXPERIMENTAL_FEATURES_GET => Some(Self::ExperimentalFeaturesGet), |             EXPERIMENTAL_FEATURES_GET => Some(Self::ExperimentalFeaturesGet), | ||||||
|             EXPERIMENTAL_FEATURES_UPDATE => Some(Self::ExperimentalFeaturesUpdate), |             EXPERIMENTAL_FEATURES_UPDATE => Some(Self::ExperimentalFeaturesUpdate), | ||||||
|  |             NETWORK_GET => Some(Self::NetworkGet), | ||||||
|  |             NETWORK_UPDATE => Some(Self::NetworkUpdate), | ||||||
|             _otherwise => None, |             _otherwise => None, | ||||||
|         } |         } | ||||||
|     } |     } | ||||||
| @@ -386,4 +394,7 @@ pub mod actions { | |||||||
|     pub const KEYS_DELETE: u8 = KeysDelete.repr(); |     pub const KEYS_DELETE: u8 = KeysDelete.repr(); | ||||||
|     pub const EXPERIMENTAL_FEATURES_GET: u8 = ExperimentalFeaturesGet.repr(); |     pub const EXPERIMENTAL_FEATURES_GET: u8 = ExperimentalFeaturesGet.repr(); | ||||||
|     pub const EXPERIMENTAL_FEATURES_UPDATE: u8 = ExperimentalFeaturesUpdate.repr(); |     pub const EXPERIMENTAL_FEATURES_UPDATE: u8 = ExperimentalFeaturesUpdate.repr(); | ||||||
|  |  | ||||||
|  |     pub const NETWORK_GET: u8 = NetworkGet.repr(); | ||||||
|  |     pub const NETWORK_UPDATE: u8 = NetworkUpdate.repr(); | ||||||
| } | } | ||||||
|   | |||||||
| @@ -1,7 +1,10 @@ | |||||||
| use std::fs; | use std::fs; | ||||||
| use std::io::{self, ErrorKind}; | use std::io::{ErrorKind, Write}; | ||||||
| use std::path::Path; | use std::path::Path; | ||||||
|  |  | ||||||
|  | use milli::heed; | ||||||
|  | use tempfile::NamedTempFile; | ||||||
|  |  | ||||||
| /// The name of the file that contains the version of the database. | /// The name of the file that contains the version of the database. | ||||||
| pub const VERSION_FILE_NAME: &str = "VERSION"; | pub const VERSION_FILE_NAME: &str = "VERSION"; | ||||||
|  |  | ||||||
| @@ -10,37 +13,7 @@ pub static VERSION_MINOR: &str = env!("CARGO_PKG_VERSION_MINOR"); | |||||||
| pub static VERSION_PATCH: &str = env!("CARGO_PKG_VERSION_PATCH"); | pub static VERSION_PATCH: &str = env!("CARGO_PKG_VERSION_PATCH"); | ||||||
|  |  | ||||||
| /// Persists the version of the current Meilisearch binary to a VERSION file | /// Persists the version of the current Meilisearch binary to a VERSION file | ||||||
| pub fn update_version_file_for_dumpless_upgrade( | pub fn create_current_version_file(db_path: &Path) -> anyhow::Result<()> { | ||||||
|     db_path: &Path, |  | ||||||
|     from: (u32, u32, u32), |  | ||||||
|     to: (u32, u32, u32), |  | ||||||
| ) -> Result<(), VersionFileError> { |  | ||||||
|     let (from_major, from_minor, from_patch) = from; |  | ||||||
|     let (to_major, to_minor, to_patch) = to; |  | ||||||
|  |  | ||||||
|     if from_major > to_major |  | ||||||
|         || (from_major == to_major && from_minor > to_minor) |  | ||||||
|         || (from_major == to_major && from_minor == to_minor && from_patch > to_patch) |  | ||||||
|     { |  | ||||||
|         Err(VersionFileError::DowngradeNotSupported { |  | ||||||
|             major: from_major, |  | ||||||
|             minor: from_minor, |  | ||||||
|             patch: from_patch, |  | ||||||
|         }) |  | ||||||
|     } else if from_major < 1 || (from_major == to_major && from_minor < 12) { |  | ||||||
|         Err(VersionFileError::TooOldForAutomaticUpgrade { |  | ||||||
|             major: from_major, |  | ||||||
|             minor: from_minor, |  | ||||||
|             patch: from_patch, |  | ||||||
|         }) |  | ||||||
|     } else { |  | ||||||
|         create_current_version_file(db_path)?; |  | ||||||
|         Ok(()) |  | ||||||
|     } |  | ||||||
| } |  | ||||||
|  |  | ||||||
| /// Persists the version of the current Meilisearch binary to a VERSION file |  | ||||||
| pub fn create_current_version_file(db_path: &Path) -> io::Result<()> { |  | ||||||
|     create_version_file(db_path, VERSION_MAJOR, VERSION_MINOR, VERSION_PATCH) |     create_version_file(db_path, VERSION_MAJOR, VERSION_MINOR, VERSION_PATCH) | ||||||
| } | } | ||||||
|  |  | ||||||
| @@ -49,9 +22,14 @@ pub fn create_version_file( | |||||||
|     major: &str, |     major: &str, | ||||||
|     minor: &str, |     minor: &str, | ||||||
|     patch: &str, |     patch: &str, | ||||||
| ) -> io::Result<()> { | ) -> anyhow::Result<()> { | ||||||
|     let version_path = db_path.join(VERSION_FILE_NAME); |     let version_path = db_path.join(VERSION_FILE_NAME); | ||||||
|     fs::write(version_path, format!("{}.{}.{}", major, minor, patch)) |     // In order to persist the file later we must create it in the `data.ms` and not in `/tmp` | ||||||
|  |     let mut file = NamedTempFile::new_in(db_path)?; | ||||||
|  |     file.write_all(format!("{}.{}.{}", major, minor, patch).as_bytes())?; | ||||||
|  |     file.flush()?; | ||||||
|  |     file.persist(version_path)?; | ||||||
|  |     Ok(()) | ||||||
| } | } | ||||||
|  |  | ||||||
| pub fn get_version(db_path: &Path) -> Result<(u32, u32, u32), VersionFileError> { | pub fn get_version(db_path: &Path) -> Result<(u32, u32, u32), VersionFileError> { | ||||||
| @@ -61,7 +39,7 @@ pub fn get_version(db_path: &Path) -> Result<(u32, u32, u32), VersionFileError> | |||||||
|         Ok(version) => parse_version(&version), |         Ok(version) => parse_version(&version), | ||||||
|         Err(error) => match error.kind() { |         Err(error) => match error.kind() { | ||||||
|             ErrorKind::NotFound => Err(VersionFileError::MissingVersionFile), |             ErrorKind::NotFound => Err(VersionFileError::MissingVersionFile), | ||||||
|             _ => Err(error.into()), |             _ => Err(anyhow::Error::from(error).into()), | ||||||
|         }, |         }, | ||||||
|     } |     } | ||||||
| } | } | ||||||
| @@ -112,7 +90,9 @@ pub enum VersionFileError { | |||||||
|     DowngradeNotSupported { major: u32, minor: u32, patch: u32 }, |     DowngradeNotSupported { major: u32, minor: u32, patch: u32 }, | ||||||
|     #[error("Database version {major}.{minor}.{patch} is too old for the experimental dumpless upgrade feature. Please generate a dump using the v{major}.{minor}.{patch} and import it in the v{VERSION_MAJOR}.{VERSION_MINOR}.{VERSION_PATCH}")] |     #[error("Database version {major}.{minor}.{patch} is too old for the experimental dumpless upgrade feature. Please generate a dump using the v{major}.{minor}.{patch} and import it in the v{VERSION_MAJOR}.{VERSION_MINOR}.{VERSION_PATCH}")] | ||||||
|     TooOldForAutomaticUpgrade { major: u32, minor: u32, patch: u32 }, |     TooOldForAutomaticUpgrade { major: u32, minor: u32, patch: u32 }, | ||||||
|  |     #[error("Error while modifying the database: {0}")] | ||||||
|  |     ErrorWhileModifyingTheDatabase(#[from] heed::Error), | ||||||
|  |  | ||||||
|     #[error(transparent)] |     #[error(transparent)] | ||||||
|     IoError(#[from] std::io::Error), |     AnyhowError(#[from] anyhow::Error), | ||||||
| } | } | ||||||
|   | |||||||
| @@ -31,6 +31,7 @@ use crate::routes::{create_all_stats, Stats}; | |||||||
| use crate::Opt; | use crate::Opt; | ||||||
|  |  | ||||||
| const ANALYTICS_HEADER: &str = "X-Meilisearch-Client"; | const ANALYTICS_HEADER: &str = "X-Meilisearch-Client"; | ||||||
|  | const MEILI_SERVER_PROVIDER: &str = "MEILI_SERVER_PROVIDER"; | ||||||
|  |  | ||||||
| /// Write the instance-uid in the `data.ms` and in `~/.config/MeiliSearch/path-to-db-instance-uid`. Ignore the errors. | /// Write the instance-uid in the `data.ms` and in `~/.config/MeiliSearch/path-to-db-instance-uid`. Ignore the errors. | ||||||
| fn write_user_id(db_path: &Path, user_id: &InstanceUid) { | fn write_user_id(db_path: &Path, user_id: &InstanceUid) { | ||||||
| @@ -195,6 +196,8 @@ struct Infos { | |||||||
|     experimental_reduce_indexing_memory_usage: bool, |     experimental_reduce_indexing_memory_usage: bool, | ||||||
|     experimental_max_number_of_batched_tasks: usize, |     experimental_max_number_of_batched_tasks: usize, | ||||||
|     experimental_limit_batched_tasks_total_size: u64, |     experimental_limit_batched_tasks_total_size: u64, | ||||||
|  |     experimental_network: bool, | ||||||
|  |     experimental_get_task_documents_route: bool, | ||||||
|     gpu_enabled: bool, |     gpu_enabled: bool, | ||||||
|     db_path: bool, |     db_path: bool, | ||||||
|     import_dump: bool, |     import_dump: bool, | ||||||
| @@ -285,6 +288,8 @@ impl Infos { | |||||||
|             logs_route, |             logs_route, | ||||||
|             edit_documents_by_function, |             edit_documents_by_function, | ||||||
|             contains_filter, |             contains_filter, | ||||||
|  |             network, | ||||||
|  |             get_task_documents_route, | ||||||
|         } = features; |         } = features; | ||||||
|  |  | ||||||
|         // We're going to override every sensible information. |         // We're going to override every sensible information. | ||||||
| @@ -302,6 +307,8 @@ impl Infos { | |||||||
|             experimental_replication_parameters, |             experimental_replication_parameters, | ||||||
|             experimental_enable_logs_route: experimental_enable_logs_route | logs_route, |             experimental_enable_logs_route: experimental_enable_logs_route | logs_route, | ||||||
|             experimental_reduce_indexing_memory_usage, |             experimental_reduce_indexing_memory_usage, | ||||||
|  |             experimental_network: network, | ||||||
|  |             experimental_get_task_documents_route: get_task_documents_route, | ||||||
|             gpu_enabled: meilisearch_types::milli::vector::is_cuda_enabled(), |             gpu_enabled: meilisearch_types::milli::vector::is_cuda_enabled(), | ||||||
|             db_path: db_path != PathBuf::from("./data.ms"), |             db_path: db_path != PathBuf::from("./data.ms"), | ||||||
|             import_dump: import_dump.is_some(), |             import_dump: import_dump.is_some(), | ||||||
| @@ -357,7 +364,7 @@ impl Segment { | |||||||
|                     "cores": sys.cpus().len(), |                     "cores": sys.cpus().len(), | ||||||
|                     "ram_size": sys.total_memory(), |                     "ram_size": sys.total_memory(), | ||||||
|                     "disk_size": disks.iter().map(|disk| disk.total_space()).max(), |                     "disk_size": disks.iter().map(|disk| disk.total_space()).max(), | ||||||
|                     "server_provider": std::env::var("MEILI_SERVER_PROVIDER").ok(), |                     "server_provider": std::env::var(MEILI_SERVER_PROVIDER).ok(), | ||||||
|             }) |             }) | ||||||
|         }); |         }); | ||||||
|         let number_of_documents = |         let number_of_documents = | ||||||
| @@ -380,10 +387,18 @@ impl Segment { | |||||||
|         index_scheduler: Arc<IndexScheduler>, |         index_scheduler: Arc<IndexScheduler>, | ||||||
|         auth_controller: Arc<AuthController>, |         auth_controller: Arc<AuthController>, | ||||||
|     ) { |     ) { | ||||||
|         const INTERVAL: Duration = Duration::from_secs(60 * 60); // one hour |         let interval: Duration = match std::env::var(MEILI_SERVER_PROVIDER) { | ||||||
|                                                                  // The first batch must be sent after one hour. |             Ok(provider) if provider.starts_with("meili_cloud:") => { | ||||||
|  |                 Duration::from_secs(60 * 60) // one hour | ||||||
|  |             } | ||||||
|  |             _ => { | ||||||
|  |                 // We're an open source instance | ||||||
|  |                 Duration::from_secs(60 * 60 * 24) // one day | ||||||
|  |             } | ||||||
|  |         }; | ||||||
|  |  | ||||||
|         let mut interval = |         let mut interval = | ||||||
|             tokio::time::interval_at(tokio::time::Instant::now() + INTERVAL, INTERVAL); |             tokio::time::interval_at(tokio::time::Instant::now() + interval, interval); | ||||||
|  |  | ||||||
|         loop { |         loop { | ||||||
|             select! { |             select! { | ||||||
|   | |||||||
| @@ -32,6 +32,7 @@ use analytics::Analytics; | |||||||
| use anyhow::bail; | use anyhow::bail; | ||||||
| use error::PayloadError; | use error::PayloadError; | ||||||
| use extractors::payload::PayloadConfig; | use extractors::payload::PayloadConfig; | ||||||
|  | use index_scheduler::versioning::Versioning; | ||||||
| use index_scheduler::{IndexScheduler, IndexSchedulerOptions}; | use index_scheduler::{IndexScheduler, IndexSchedulerOptions}; | ||||||
| use meilisearch_auth::AuthController; | use meilisearch_auth::AuthController; | ||||||
| use meilisearch_types::milli::constants::VERSION_MAJOR; | use meilisearch_types::milli::constants::VERSION_MAJOR; | ||||||
| @@ -40,10 +41,9 @@ use meilisearch_types::milli::update::{IndexDocumentsConfig, IndexDocumentsMetho | |||||||
| use meilisearch_types::settings::apply_settings_to_builder; | use meilisearch_types::settings::apply_settings_to_builder; | ||||||
| use meilisearch_types::tasks::KindWithContent; | use meilisearch_types::tasks::KindWithContent; | ||||||
| use meilisearch_types::versioning::{ | use meilisearch_types::versioning::{ | ||||||
|     create_current_version_file, get_version, update_version_file_for_dumpless_upgrade, |     create_current_version_file, get_version, VersionFileError, VERSION_MINOR, VERSION_PATCH, | ||||||
|     VersionFileError, VERSION_MINOR, VERSION_PATCH, |  | ||||||
| }; | }; | ||||||
| use meilisearch_types::{compression, milli, VERSION_FILE_NAME}; | use meilisearch_types::{compression, heed, milli, VERSION_FILE_NAME}; | ||||||
| pub use option::Opt; | pub use option::Opt; | ||||||
| use option::ScheduleSnapshot; | use option::ScheduleSnapshot; | ||||||
| use search_queue::SearchQueue; | use search_queue::SearchQueue; | ||||||
| @@ -356,14 +356,19 @@ fn open_or_create_database_unchecked( | |||||||
|  |  | ||||||
| /// Ensures Meilisearch version is compatible with the database, returns an error in case of version mismatch. | /// Ensures Meilisearch version is compatible with the database, returns an error in case of version mismatch. | ||||||
| /// Returns the version that was contained in the version file | /// Returns the version that was contained in the version file | ||||||
| fn check_version(opt: &Opt, binary_version: (u32, u32, u32)) -> anyhow::Result<(u32, u32, u32)> { | fn check_version( | ||||||
|  |     opt: &Opt, | ||||||
|  |     index_scheduler_opt: &IndexSchedulerOptions, | ||||||
|  |     binary_version: (u32, u32, u32), | ||||||
|  | ) -> anyhow::Result<(u32, u32, u32)> { | ||||||
|     let (bin_major, bin_minor, bin_patch) = binary_version; |     let (bin_major, bin_minor, bin_patch) = binary_version; | ||||||
|     let (db_major, db_minor, db_patch) = get_version(&opt.db_path)?; |     let (db_major, db_minor, db_patch) = get_version(&opt.db_path)?; | ||||||
|  |  | ||||||
|     if db_major != bin_major || db_minor != bin_minor || db_patch > bin_patch { |     if db_major != bin_major || db_minor != bin_minor || db_patch > bin_patch { | ||||||
|         if opt.experimental_dumpless_upgrade { |         if opt.experimental_dumpless_upgrade { | ||||||
|             update_version_file_for_dumpless_upgrade( |             update_version_file_for_dumpless_upgrade( | ||||||
|                 &opt.db_path, |                 opt, | ||||||
|  |                 index_scheduler_opt, | ||||||
|                 (db_major, db_minor, db_patch), |                 (db_major, db_minor, db_patch), | ||||||
|                 (bin_major, bin_minor, bin_patch), |                 (bin_major, bin_minor, bin_patch), | ||||||
|             )?; |             )?; | ||||||
| @@ -380,6 +385,57 @@ fn check_version(opt: &Opt, binary_version: (u32, u32, u32)) -> anyhow::Result<( | |||||||
|     Ok((db_major, db_minor, db_patch)) |     Ok((db_major, db_minor, db_patch)) | ||||||
| } | } | ||||||
|  |  | ||||||
|  | /// Persists the version of the current Meilisearch binary to a VERSION file | ||||||
|  | pub fn update_version_file_for_dumpless_upgrade( | ||||||
|  |     opt: &Opt, | ||||||
|  |     index_scheduler_opt: &IndexSchedulerOptions, | ||||||
|  |     from: (u32, u32, u32), | ||||||
|  |     to: (u32, u32, u32), | ||||||
|  | ) -> Result<(), VersionFileError> { | ||||||
|  |     let (from_major, from_minor, from_patch) = from; | ||||||
|  |     let (to_major, to_minor, to_patch) = to; | ||||||
|  |  | ||||||
|  |     // Early exit in case of error | ||||||
|  |     if from_major > to_major | ||||||
|  |         || (from_major == to_major && from_minor > to_minor) | ||||||
|  |         || (from_major == to_major && from_minor == to_minor && from_patch > to_patch) | ||||||
|  |     { | ||||||
|  |         return Err(VersionFileError::DowngradeNotSupported { | ||||||
|  |             major: from_major, | ||||||
|  |             minor: from_minor, | ||||||
|  |             patch: from_patch, | ||||||
|  |         }); | ||||||
|  |     } else if from_major < 1 || (from_major == to_major && from_minor < 12) { | ||||||
|  |         return Err(VersionFileError::TooOldForAutomaticUpgrade { | ||||||
|  |             major: from_major, | ||||||
|  |             minor: from_minor, | ||||||
|  |             patch: from_patch, | ||||||
|  |         }); | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     // In the case of v1.12, the index-scheduler didn't store its internal version at the time. | ||||||
|  |     // => We must write it immediately **in the index-scheduler** otherwise we'll update the version file | ||||||
|  |     //    there is a risk of DB corruption if a restart happens after writing the version file but before | ||||||
|  |     //    writing the version in the index-scheduler. See <https://github.com/meilisearch/meilisearch/issues/5280> | ||||||
|  |     if from_major == 1 && from_minor == 12 { | ||||||
|  |         let env = unsafe { | ||||||
|  |             heed::EnvOpenOptions::new() | ||||||
|  |                 .max_dbs(Versioning::nb_db()) | ||||||
|  |                 .map_size(index_scheduler_opt.task_db_size) | ||||||
|  |                 .open(&index_scheduler_opt.tasks_path) | ||||||
|  |         }?; | ||||||
|  |         let mut wtxn = env.write_txn()?; | ||||||
|  |         let versioning = Versioning::raw_new(&env, &mut wtxn)?; | ||||||
|  |         versioning.set_version(&mut wtxn, (from_major, from_minor, from_patch))?; | ||||||
|  |         wtxn.commit()?; | ||||||
|  |         // Should be instant since we're the only one using the env | ||||||
|  |         env.prepare_for_closing().wait(); | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     create_current_version_file(&opt.db_path)?; | ||||||
|  |     Ok(()) | ||||||
|  | } | ||||||
|  |  | ||||||
| /// Ensure you're in a valid state and open the IndexScheduler + AuthController for you. | /// Ensure you're in a valid state and open the IndexScheduler + AuthController for you. | ||||||
| fn open_or_create_database( | fn open_or_create_database( | ||||||
|     opt: &Opt, |     opt: &Opt, | ||||||
| @@ -387,7 +443,11 @@ fn open_or_create_database( | |||||||
|     empty_db: bool, |     empty_db: bool, | ||||||
|     binary_version: (u32, u32, u32), |     binary_version: (u32, u32, u32), | ||||||
| ) -> anyhow::Result<(IndexScheduler, AuthController)> { | ) -> anyhow::Result<(IndexScheduler, AuthController)> { | ||||||
|     let version = if !empty_db { check_version(opt, binary_version)? } else { binary_version }; |     let version = if !empty_db { | ||||||
|  |         check_version(opt, &index_scheduler_opt, binary_version)? | ||||||
|  |     } else { | ||||||
|  |         binary_version | ||||||
|  |     }; | ||||||
|  |  | ||||||
|     open_or_create_database_unchecked(opt, index_scheduler_opt, OnFailure::KeepDb, version) |     open_or_create_database_unchecked(opt, index_scheduler_opt, OnFailure::KeepDb, version) | ||||||
| } | } | ||||||
| @@ -431,10 +491,13 @@ fn import_dump( | |||||||
|         keys.push(key); |         keys.push(key); | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     // 3. Import the runtime features. |     // 3. Import the runtime features and network | ||||||
|     let features = dump_reader.features()?.unwrap_or_default(); |     let features = dump_reader.features()?.unwrap_or_default(); | ||||||
|     index_scheduler.put_runtime_features(features)?; |     index_scheduler.put_runtime_features(features)?; | ||||||
|  |  | ||||||
|  |     let network = dump_reader.network()?.cloned().unwrap_or_default(); | ||||||
|  |     index_scheduler.put_network(network)?; | ||||||
|  |  | ||||||
|     let indexer_config = index_scheduler.indexer_config(); |     let indexer_config = index_scheduler.indexer_config(); | ||||||
|  |  | ||||||
|     // /!\ The tasks must be imported AFTER importing the indexes or else the scheduler might |     // /!\ The tasks must be imported AFTER importing the indexes or else the scheduler might | ||||||
| @@ -508,9 +571,15 @@ fn import_dump( | |||||||
|         index_scheduler.refresh_index_stats(&uid)?; |         index_scheduler.refresh_index_stats(&uid)?; | ||||||
|     } |     } | ||||||
|  |  | ||||||
|  |     // 5. Import the queue | ||||||
|     let mut index_scheduler_dump = index_scheduler.register_dumped_task()?; |     let mut index_scheduler_dump = index_scheduler.register_dumped_task()?; | ||||||
|  |     // 5.1. Import the batches | ||||||
|  |     for ret in dump_reader.batches()? { | ||||||
|  |         let batch = ret?; | ||||||
|  |         index_scheduler_dump.register_dumped_batch(batch)?; | ||||||
|  |     } | ||||||
|  |  | ||||||
|     // 5. Import the tasks. |     // 5.2. Import the tasks | ||||||
|     for ret in dump_reader.tasks()? { |     for ret in dump_reader.tasks()? { | ||||||
|         let (task, file) = ret?; |         let (task, file) = ret?; | ||||||
|         index_scheduler_dump.register_dumped_task(task, file)?; |         index_scheduler_dump.register_dumped_task(task, file)?; | ||||||
|   | |||||||
| @@ -50,6 +50,8 @@ pub fn configure(cfg: &mut web::ServiceConfig) { | |||||||
|             logs_route: Some(false), |             logs_route: Some(false), | ||||||
|             edit_documents_by_function: Some(false), |             edit_documents_by_function: Some(false), | ||||||
|             contains_filter: Some(false), |             contains_filter: Some(false), | ||||||
|  |             network: Some(false), | ||||||
|  |             get_task_documents_route: Some(false), | ||||||
|         })), |         })), | ||||||
|         (status = 401, description = "The authorization header is missing", body = ResponseError, content_type = "application/json", example = json!( |         (status = 401, description = "The authorization header is missing", body = ResponseError, content_type = "application/json", example = json!( | ||||||
|             { |             { | ||||||
| @@ -88,6 +90,10 @@ pub struct RuntimeTogglableFeatures { | |||||||
|     pub edit_documents_by_function: Option<bool>, |     pub edit_documents_by_function: Option<bool>, | ||||||
|     #[deserr(default)] |     #[deserr(default)] | ||||||
|     pub contains_filter: Option<bool>, |     pub contains_filter: Option<bool>, | ||||||
|  |     #[deserr(default)] | ||||||
|  |     pub network: Option<bool>, | ||||||
|  |     #[deserr(default)] | ||||||
|  |     pub get_task_documents_route: Option<bool>, | ||||||
| } | } | ||||||
|  |  | ||||||
| impl From<meilisearch_types::features::RuntimeTogglableFeatures> for RuntimeTogglableFeatures { | impl From<meilisearch_types::features::RuntimeTogglableFeatures> for RuntimeTogglableFeatures { | ||||||
| @@ -97,6 +103,8 @@ impl From<meilisearch_types::features::RuntimeTogglableFeatures> for RuntimeTogg | |||||||
|             logs_route, |             logs_route, | ||||||
|             edit_documents_by_function, |             edit_documents_by_function, | ||||||
|             contains_filter, |             contains_filter, | ||||||
|  |             network, | ||||||
|  |             get_task_documents_route, | ||||||
|         } = value; |         } = value; | ||||||
|  |  | ||||||
|         Self { |         Self { | ||||||
| @@ -104,6 +112,8 @@ impl From<meilisearch_types::features::RuntimeTogglableFeatures> for RuntimeTogg | |||||||
|             logs_route: Some(logs_route), |             logs_route: Some(logs_route), | ||||||
|             edit_documents_by_function: Some(edit_documents_by_function), |             edit_documents_by_function: Some(edit_documents_by_function), | ||||||
|             contains_filter: Some(contains_filter), |             contains_filter: Some(contains_filter), | ||||||
|  |             network: Some(network), | ||||||
|  |             get_task_documents_route: Some(get_task_documents_route), | ||||||
|         } |         } | ||||||
|     } |     } | ||||||
| } | } | ||||||
| @@ -114,6 +124,8 @@ pub struct PatchExperimentalFeatureAnalytics { | |||||||
|     logs_route: bool, |     logs_route: bool, | ||||||
|     edit_documents_by_function: bool, |     edit_documents_by_function: bool, | ||||||
|     contains_filter: bool, |     contains_filter: bool, | ||||||
|  |     network: bool, | ||||||
|  |     get_task_documents_route: bool, | ||||||
| } | } | ||||||
|  |  | ||||||
| impl Aggregate for PatchExperimentalFeatureAnalytics { | impl Aggregate for PatchExperimentalFeatureAnalytics { | ||||||
| @@ -127,6 +139,8 @@ impl Aggregate for PatchExperimentalFeatureAnalytics { | |||||||
|             logs_route: new.logs_route, |             logs_route: new.logs_route, | ||||||
|             edit_documents_by_function: new.edit_documents_by_function, |             edit_documents_by_function: new.edit_documents_by_function, | ||||||
|             contains_filter: new.contains_filter, |             contains_filter: new.contains_filter, | ||||||
|  |             network: new.network, | ||||||
|  |             get_task_documents_route: new.get_task_documents_route, | ||||||
|         }) |         }) | ||||||
|     } |     } | ||||||
|  |  | ||||||
| @@ -149,6 +163,8 @@ impl Aggregate for PatchExperimentalFeatureAnalytics { | |||||||
|             logs_route: Some(false), |             logs_route: Some(false), | ||||||
|             edit_documents_by_function: Some(false), |             edit_documents_by_function: Some(false), | ||||||
|             contains_filter: Some(false), |             contains_filter: Some(false), | ||||||
|  |             network: Some(false), | ||||||
|  |             get_task_documents_route: Some(false), | ||||||
|          })), |          })), | ||||||
|         (status = 401, description = "The authorization header is missing", body = ResponseError, content_type = "application/json", example = json!( |         (status = 401, description = "The authorization header is missing", body = ResponseError, content_type = "application/json", example = json!( | ||||||
|             { |             { | ||||||
| @@ -181,16 +197,23 @@ async fn patch_features( | |||||||
|             .edit_documents_by_function |             .edit_documents_by_function | ||||||
|             .unwrap_or(old_features.edit_documents_by_function), |             .unwrap_or(old_features.edit_documents_by_function), | ||||||
|         contains_filter: new_features.0.contains_filter.unwrap_or(old_features.contains_filter), |         contains_filter: new_features.0.contains_filter.unwrap_or(old_features.contains_filter), | ||||||
|  |         network: new_features.0.network.unwrap_or(old_features.network), | ||||||
|  |         get_task_documents_route: new_features | ||||||
|  |             .0 | ||||||
|  |             .get_task_documents_route | ||||||
|  |             .unwrap_or(old_features.get_task_documents_route), | ||||||
|     }; |     }; | ||||||
|  |  | ||||||
|     // explicitly destructure for analytics rather than using the `Serialize` implementation, because |     // explicitly destructure for analytics rather than using the `Serialize` implementation, because | ||||||
|     // the it renames to camelCase, which we don't want for analytics. |     // it renames to camelCase, which we don't want for analytics. | ||||||
|     // **Do not** ignore fields with `..` or `_` here, because we want to add them in the future. |     // **Do not** ignore fields with `..` or `_` here, because we want to add them in the future. | ||||||
|     let meilisearch_types::features::RuntimeTogglableFeatures { |     let meilisearch_types::features::RuntimeTogglableFeatures { | ||||||
|         metrics, |         metrics, | ||||||
|         logs_route, |         logs_route, | ||||||
|         edit_documents_by_function, |         edit_documents_by_function, | ||||||
|         contains_filter, |         contains_filter, | ||||||
|  |         network, | ||||||
|  |         get_task_documents_route, | ||||||
|     } = new_features; |     } = new_features; | ||||||
|  |  | ||||||
|     analytics.publish( |     analytics.publish( | ||||||
| @@ -199,6 +222,8 @@ async fn patch_features( | |||||||
|             logs_route, |             logs_route, | ||||||
|             edit_documents_by_function, |             edit_documents_by_function, | ||||||
|             contains_filter, |             contains_filter, | ||||||
|  |             network, | ||||||
|  |             get_task_documents_route, | ||||||
|         }, |         }, | ||||||
|         &req, |         &req, | ||||||
|     ); |     ); | ||||||
|   | |||||||
| @@ -496,6 +496,12 @@ pub struct IndexStats { | |||||||
|     pub number_of_documents: u64, |     pub number_of_documents: u64, | ||||||
|     /// Whether or not the index is currently ingesting document |     /// Whether or not the index is currently ingesting document | ||||||
|     pub is_indexing: bool, |     pub is_indexing: bool, | ||||||
|  |     /// Number of embeddings in the index | ||||||
|  |     #[serde(skip_serializing_if = "Option::is_none")] | ||||||
|  |     pub number_of_embeddings: Option<u64>, | ||||||
|  |     /// Number of embedded documents in the index | ||||||
|  |     #[serde(skip_serializing_if = "Option::is_none")] | ||||||
|  |     pub number_of_embedded_documents: Option<u64>, | ||||||
|     /// Association of every field name with the number of times it occurs in the documents. |     /// Association of every field name with the number of times it occurs in the documents. | ||||||
|     #[schema(value_type = HashMap<String, u64>)] |     #[schema(value_type = HashMap<String, u64>)] | ||||||
|     pub field_distribution: FieldDistribution, |     pub field_distribution: FieldDistribution, | ||||||
| @@ -506,6 +512,8 @@ impl From<index_scheduler::IndexStats> for IndexStats { | |||||||
|         IndexStats { |         IndexStats { | ||||||
|             number_of_documents: stats.inner_stats.number_of_documents, |             number_of_documents: stats.inner_stats.number_of_documents, | ||||||
|             is_indexing: stats.is_indexing, |             is_indexing: stats.is_indexing, | ||||||
|  |             number_of_embeddings: stats.inner_stats.number_of_embeddings, | ||||||
|  |             number_of_embedded_documents: stats.inner_stats.number_of_embedded_documents, | ||||||
|             field_distribution: stats.inner_stats.field_distribution, |             field_distribution: stats.inner_stats.field_distribution, | ||||||
|         } |         } | ||||||
|     } |     } | ||||||
| @@ -524,6 +532,8 @@ impl From<index_scheduler::IndexStats> for IndexStats { | |||||||
|         (status = OK, description = "The stats of the index", body = IndexStats, content_type = "application/json", example = json!( |         (status = OK, description = "The stats of the index", body = IndexStats, content_type = "application/json", example = json!( | ||||||
|             { |             { | ||||||
|                 "numberOfDocuments": 10, |                 "numberOfDocuments": 10, | ||||||
|  |                 "numberOfEmbeddings": 10, | ||||||
|  |                 "numberOfEmbeddedDocuments": 10, | ||||||
|                 "isIndexing": true, |                 "isIndexing": true, | ||||||
|                 "fieldDistribution": { |                 "fieldDistribution": { | ||||||
|                     "genre": 10, |                     "genre": 10, | ||||||
|   | |||||||
| @@ -34,6 +34,7 @@ use crate::routes::features::RuntimeTogglableFeatures; | |||||||
| use crate::routes::indexes::documents::{DocumentDeletionByFilter, DocumentEditionByFunction}; | use crate::routes::indexes::documents::{DocumentDeletionByFilter, DocumentEditionByFunction}; | ||||||
| use crate::routes::indexes::IndexView; | use crate::routes::indexes::IndexView; | ||||||
| use crate::routes::multi_search::SearchResults; | use crate::routes::multi_search::SearchResults; | ||||||
|  | use crate::routes::network::{Network, Remote}; | ||||||
| use crate::routes::swap_indexes::SwapIndexesPayload; | use crate::routes::swap_indexes::SwapIndexesPayload; | ||||||
| use crate::search::{ | use crate::search::{ | ||||||
|     FederatedSearch, FederatedSearchResult, Federation, FederationOptions, MergeFacets, |     FederatedSearch, FederatedSearchResult, Federation, FederationOptions, MergeFacets, | ||||||
| @@ -54,6 +55,7 @@ mod logs; | |||||||
| mod metrics; | mod metrics; | ||||||
| mod multi_search; | mod multi_search; | ||||||
| mod multi_search_analytics; | mod multi_search_analytics; | ||||||
|  | pub mod network; | ||||||
| mod open_api_utils; | mod open_api_utils; | ||||||
| mod snapshot; | mod snapshot; | ||||||
| mod swap_indexes; | mod swap_indexes; | ||||||
| @@ -75,6 +77,7 @@ pub mod tasks; | |||||||
|         (path = "/multi-search", api = multi_search::MultiSearchApi), |         (path = "/multi-search", api = multi_search::MultiSearchApi), | ||||||
|         (path = "/swap-indexes", api = swap_indexes::SwapIndexesApi), |         (path = "/swap-indexes", api = swap_indexes::SwapIndexesApi), | ||||||
|         (path = "/experimental-features", api = features::ExperimentalFeaturesApi), |         (path = "/experimental-features", api = features::ExperimentalFeaturesApi), | ||||||
|  |         (path = "/network", api = network::NetworkApi), | ||||||
|     ), |     ), | ||||||
|     paths(get_health, get_version, get_stats), |     paths(get_health, get_version, get_stats), | ||||||
|     tags( |     tags( | ||||||
| @@ -85,7 +88,7 @@ pub mod tasks; | |||||||
|         url = "/", |         url = "/", | ||||||
|         description = "Local server", |         description = "Local server", | ||||||
|     )), |     )), | ||||||
|     components(schemas(PaginationView<KeyView>, PaginationView<IndexView>, IndexView, DocumentDeletionByFilter, AllBatches, BatchStats, ProgressStepView, ProgressView, BatchView, RuntimeTogglableFeatures, SwapIndexesPayload, DocumentEditionByFunction, MergeFacets, FederationOptions, SearchQueryWithIndex, Federation, FederatedSearch, FederatedSearchResult, SearchResults, SearchResultWithIndex, SimilarQuery, SimilarResult, PaginationView<serde_json::Value>, BrowseQuery, UpdateIndexRequest, IndexUid, IndexCreateRequest, KeyView, Action, CreateApiKey, UpdateStderrLogs, LogMode, GetLogs, IndexStats, Stats, HealthStatus, HealthResponse, VersionResponse, Code, ErrorType, AllTasks, TaskView, Status, DetailsView, ResponseError, Settings<Unchecked>, Settings<Checked>, TypoSettings, MinWordSizeTyposSetting, FacetingSettings, PaginationSettings, SummarizedTaskView, Kind)) |     components(schemas(PaginationView<KeyView>, PaginationView<IndexView>, IndexView, DocumentDeletionByFilter, AllBatches, BatchStats, ProgressStepView, ProgressView, BatchView, RuntimeTogglableFeatures, SwapIndexesPayload, DocumentEditionByFunction, MergeFacets, FederationOptions, SearchQueryWithIndex, Federation, FederatedSearch, FederatedSearchResult, SearchResults, SearchResultWithIndex, SimilarQuery, SimilarResult, PaginationView<serde_json::Value>, BrowseQuery, UpdateIndexRequest, IndexUid, IndexCreateRequest, KeyView, Action, CreateApiKey, UpdateStderrLogs, LogMode, GetLogs, IndexStats, Stats, HealthStatus, HealthResponse, VersionResponse, Code, ErrorType, AllTasks, TaskView, Status, DetailsView, ResponseError, Settings<Unchecked>, Settings<Checked>, TypoSettings, MinWordSizeTyposSetting, FacetingSettings, PaginationSettings, SummarizedTaskView, Kind, Network, Remote)) | ||||||
| )] | )] | ||||||
| pub struct MeilisearchApi; | pub struct MeilisearchApi; | ||||||
|  |  | ||||||
| @@ -103,7 +106,8 @@ pub fn configure(cfg: &mut web::ServiceConfig) { | |||||||
|         .service(web::scope("/multi-search").configure(multi_search::configure)) |         .service(web::scope("/multi-search").configure(multi_search::configure)) | ||||||
|         .service(web::scope("/swap-indexes").configure(swap_indexes::configure)) |         .service(web::scope("/swap-indexes").configure(swap_indexes::configure)) | ||||||
|         .service(web::scope("/metrics").configure(metrics::configure)) |         .service(web::scope("/metrics").configure(metrics::configure)) | ||||||
|         .service(web::scope("/experimental-features").configure(features::configure)); |         .service(web::scope("/experimental-features").configure(features::configure)) | ||||||
|  |         .service(web::scope("/network").configure(network::configure)); | ||||||
|  |  | ||||||
|     #[cfg(feature = "swagger")] |     #[cfg(feature = "swagger")] | ||||||
|     { |     { | ||||||
| @@ -359,9 +363,9 @@ pub async fn running() -> HttpResponse { | |||||||
| #[derive(Serialize, Debug, ToSchema)] | #[derive(Serialize, Debug, ToSchema)] | ||||||
| #[serde(rename_all = "camelCase")] | #[serde(rename_all = "camelCase")] | ||||||
| pub struct Stats { | pub struct Stats { | ||||||
|     /// The size of the database, in bytes. |     /// The disk space used by the database, in bytes. | ||||||
|     pub database_size: u64, |     pub database_size: u64, | ||||||
|     #[serde(skip)] |     /// The size of the database, in bytes. | ||||||
|     pub used_database_size: u64, |     pub used_database_size: u64, | ||||||
|     /// The date of the last update in the RFC 3339 formats. Can be `null` if no update has ever been processed. |     /// The date of the last update in the RFC 3339 formats. Can be `null` if no update has ever been processed. | ||||||
|     #[serde(serialize_with = "time::serde::rfc3339::option::serialize")] |     #[serde(serialize_with = "time::serde::rfc3339::option::serialize")] | ||||||
| @@ -383,6 +387,7 @@ pub struct Stats { | |||||||
|         (status = 200, description = "The stats of the instance", body = Stats, content_type = "application/json", example = json!( |         (status = 200, description = "The stats of the instance", body = Stats, content_type = "application/json", example = json!( | ||||||
|             { |             { | ||||||
|                 "databaseSize": 567, |                 "databaseSize": 567, | ||||||
|  |                 "usedDatabaseSize": 456, | ||||||
|                 "lastUpdate": "2019-11-20T09:40:33.711324Z", |                 "lastUpdate": "2019-11-20T09:40:33.711324Z", | ||||||
|                 "indexes": { |                 "indexes": { | ||||||
|                     "movies": { |                     "movies": { | ||||||
|   | |||||||
| @@ -20,6 +20,7 @@ use crate::routes::indexes::search::search_kind; | |||||||
| use crate::search::{ | use crate::search::{ | ||||||
|     add_search_rules, perform_federated_search, perform_search, FederatedSearch, |     add_search_rules, perform_federated_search, perform_search, FederatedSearch, | ||||||
|     FederatedSearchResult, RetrieveVectors, SearchQueryWithIndex, SearchResultWithIndex, |     FederatedSearchResult, RetrieveVectors, SearchQueryWithIndex, SearchResultWithIndex, | ||||||
|  |     PROXY_SEARCH_HEADER, PROXY_SEARCH_HEADER_VALUE, | ||||||
| }; | }; | ||||||
| use crate::search_queue::SearchQueue; | use crate::search_queue::SearchQueue; | ||||||
|  |  | ||||||
| @@ -48,6 +49,7 @@ pub struct SearchResults { | |||||||
| /// Bundle multiple search queries in a single API request. Use this endpoint to search through multiple indexes at once. | /// Bundle multiple search queries in a single API request. Use this endpoint to search through multiple indexes at once. | ||||||
| #[utoipa::path( | #[utoipa::path( | ||||||
|     post, |     post, | ||||||
|  |     request_body = FederatedSearch, | ||||||
|     path = "", |     path = "", | ||||||
|     tag = "Multi-search", |     tag = "Multi-search", | ||||||
|     security(("Bearer" = ["search", "*"])), |     security(("Bearer" = ["search", "*"])), | ||||||
| @@ -186,18 +188,22 @@ pub async fn multi_search_with_post( | |||||||
|  |  | ||||||
|     let response = match federation { |     let response = match federation { | ||||||
|         Some(federation) => { |         Some(federation) => { | ||||||
|             let search_result = tokio::task::spawn_blocking(move || { |             // check remote header | ||||||
|                 perform_federated_search(&index_scheduler, queries, federation, features) |             let is_proxy = req | ||||||
|             }) |                 .headers() | ||||||
|  |                 .get(PROXY_SEARCH_HEADER) | ||||||
|  |                 .is_some_and(|value| value.as_bytes() == PROXY_SEARCH_HEADER_VALUE.as_bytes()); | ||||||
|  |             let search_result = | ||||||
|  |                 perform_federated_search(&index_scheduler, queries, federation, features, is_proxy) | ||||||
|                     .await; |                     .await; | ||||||
|             permit.drop().await; |             permit.drop().await; | ||||||
|  |  | ||||||
|             if let Ok(Ok(_)) = search_result { |             if search_result.is_ok() { | ||||||
|                 multi_aggregate.succeed(); |                 multi_aggregate.succeed(); | ||||||
|             } |             } | ||||||
|  |  | ||||||
|             analytics.publish(multi_aggregate, &req); |             analytics.publish(multi_aggregate, &req); | ||||||
|             HttpResponse::Ok().json(search_result??) |             HttpResponse::Ok().json(search_result?) | ||||||
|         } |         } | ||||||
|         None => { |         None => { | ||||||
|             // Explicitly expect a `(ResponseError, usize)` for the error type rather than `ResponseError` only, |             // Explicitly expect a `(ResponseError, usize)` for the error type rather than `ResponseError` only, | ||||||
|   | |||||||
| @@ -13,6 +13,8 @@ pub struct MultiSearchAggregator { | |||||||
|  |  | ||||||
|     // sum of the number of distinct indexes in each single request, use with total_received to compute an avg |     // sum of the number of distinct indexes in each single request, use with total_received to compute an avg | ||||||
|     total_distinct_index_count: usize, |     total_distinct_index_count: usize, | ||||||
|  |     // sum of the number of distinct remotes in each single request, use with total_received to compute an avg | ||||||
|  |     total_distinct_remote_count: usize, | ||||||
|     // number of queries with a single index, use with total_received to compute a proportion |     // number of queries with a single index, use with total_received to compute a proportion | ||||||
|     total_single_index: usize, |     total_single_index: usize, | ||||||
|  |  | ||||||
| @@ -31,15 +33,13 @@ impl MultiSearchAggregator { | |||||||
|     pub fn from_federated_search(federated_search: &FederatedSearch) -> Self { |     pub fn from_federated_search(federated_search: &FederatedSearch) -> Self { | ||||||
|         let use_federation = federated_search.federation.is_some(); |         let use_federation = federated_search.federation.is_some(); | ||||||
|  |  | ||||||
|         let distinct_indexes: HashSet<_> = federated_search |         let mut distinct_indexes = HashSet::with_capacity(federated_search.queries.len()); | ||||||
|             .queries |         let mut distinct_remotes = HashSet::with_capacity(federated_search.queries.len()); | ||||||
|             .iter() |  | ||||||
|             .map(|query| { |  | ||||||
|                 let query = &query; |  | ||||||
|         // make sure we get a compilation error if a field gets added to / removed from SearchQueryWithIndex |         // make sure we get a compilation error if a field gets added to / removed from SearchQueryWithIndex | ||||||
|                 let SearchQueryWithIndex { |         for SearchQueryWithIndex { | ||||||
|             index_uid, |             index_uid, | ||||||
|                     federation_options: _, |             federation_options, | ||||||
|             q: _, |             q: _, | ||||||
|             vector: _, |             vector: _, | ||||||
|             offset: _, |             offset: _, | ||||||
| @@ -66,11 +66,16 @@ impl MultiSearchAggregator { | |||||||
|             hybrid: _, |             hybrid: _, | ||||||
|             ranking_score_threshold: _, |             ranking_score_threshold: _, | ||||||
|             locales: _, |             locales: _, | ||||||
|                 } = query; |         } in &federated_search.queries | ||||||
|  |         { | ||||||
|  |             if let Some(federation_options) = federation_options { | ||||||
|  |                 if let Some(remote) = &federation_options.remote { | ||||||
|  |                     distinct_remotes.insert(remote.as_str()); | ||||||
|  |                 } | ||||||
|  |             } | ||||||
|  |  | ||||||
|                 index_uid.as_str() |             distinct_indexes.insert(index_uid.as_str()); | ||||||
|             }) |         } | ||||||
|             .collect(); |  | ||||||
|  |  | ||||||
|         let show_ranking_score = |         let show_ranking_score = | ||||||
|             federated_search.queries.iter().any(|query| query.show_ranking_score); |             federated_search.queries.iter().any(|query| query.show_ranking_score); | ||||||
| @@ -81,6 +86,7 @@ impl MultiSearchAggregator { | |||||||
|             total_received: 1, |             total_received: 1, | ||||||
|             total_succeeded: 0, |             total_succeeded: 0, | ||||||
|             total_distinct_index_count: distinct_indexes.len(), |             total_distinct_index_count: distinct_indexes.len(), | ||||||
|  |             total_distinct_remote_count: distinct_remotes.len(), | ||||||
|             total_single_index: if distinct_indexes.len() == 1 { 1 } else { 0 }, |             total_single_index: if distinct_indexes.len() == 1 { 1 } else { 0 }, | ||||||
|             total_search_count: federated_search.queries.len(), |             total_search_count: federated_search.queries.len(), | ||||||
|             show_ranking_score, |             show_ranking_score, | ||||||
| @@ -110,6 +116,8 @@ impl Aggregate for MultiSearchAggregator { | |||||||
|         let total_succeeded = this.total_succeeded.saturating_add(new.total_succeeded); |         let total_succeeded = this.total_succeeded.saturating_add(new.total_succeeded); | ||||||
|         let total_distinct_index_count = |         let total_distinct_index_count = | ||||||
|             this.total_distinct_index_count.saturating_add(new.total_distinct_index_count); |             this.total_distinct_index_count.saturating_add(new.total_distinct_index_count); | ||||||
|  |         let total_distinct_remote_count = | ||||||
|  |             this.total_distinct_remote_count.saturating_add(new.total_distinct_remote_count); | ||||||
|         let total_single_index = this.total_single_index.saturating_add(new.total_single_index); |         let total_single_index = this.total_single_index.saturating_add(new.total_single_index); | ||||||
|         let total_search_count = this.total_search_count.saturating_add(new.total_search_count); |         let total_search_count = this.total_search_count.saturating_add(new.total_search_count); | ||||||
|         let show_ranking_score = this.show_ranking_score || new.show_ranking_score; |         let show_ranking_score = this.show_ranking_score || new.show_ranking_score; | ||||||
| @@ -121,6 +129,7 @@ impl Aggregate for MultiSearchAggregator { | |||||||
|             total_received, |             total_received, | ||||||
|             total_succeeded, |             total_succeeded, | ||||||
|             total_distinct_index_count, |             total_distinct_index_count, | ||||||
|  |             total_distinct_remote_count, | ||||||
|             total_single_index, |             total_single_index, | ||||||
|             total_search_count, |             total_search_count, | ||||||
|             show_ranking_score, |             show_ranking_score, | ||||||
| @@ -134,6 +143,7 @@ impl Aggregate for MultiSearchAggregator { | |||||||
|             total_received, |             total_received, | ||||||
|             total_succeeded, |             total_succeeded, | ||||||
|             total_distinct_index_count, |             total_distinct_index_count, | ||||||
|  |             total_distinct_remote_count, | ||||||
|             total_single_index, |             total_single_index, | ||||||
|             total_search_count, |             total_search_count, | ||||||
|             show_ranking_score, |             show_ranking_score, | ||||||
| @@ -152,6 +162,10 @@ impl Aggregate for MultiSearchAggregator { | |||||||
|                 "total_distinct_index_count": total_distinct_index_count, |                 "total_distinct_index_count": total_distinct_index_count, | ||||||
|                 "avg_distinct_index_count": (total_distinct_index_count as f64) / (total_received as f64), // not 0 else returned early |                 "avg_distinct_index_count": (total_distinct_index_count as f64) / (total_received as f64), // not 0 else returned early | ||||||
|             }, |             }, | ||||||
|  |             "remotes": { | ||||||
|  |                 "total_distinct_remote_count": total_distinct_remote_count, | ||||||
|  |                 "avg_distinct_remote_count": (total_distinct_remote_count as f64) / (total_received as f64), // not 0 else returned early | ||||||
|  |             }, | ||||||
|             "searches": { |             "searches": { | ||||||
|                 "total_search_count": total_search_count, |                 "total_search_count": total_search_count, | ||||||
|                 "avg_search_count": (total_search_count as f64) / (total_received as f64), |                 "avg_search_count": (total_search_count as f64) / (total_received as f64), | ||||||
|   | |||||||
							
								
								
									
										261
									
								
								crates/meilisearch/src/routes/network.rs
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										261
									
								
								crates/meilisearch/src/routes/network.rs
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,261 @@ | |||||||
|  | use std::collections::BTreeMap; | ||||||
|  |  | ||||||
|  | use actix_web::web::{self, Data}; | ||||||
|  | use actix_web::{HttpRequest, HttpResponse}; | ||||||
|  | use deserr::actix_web::AwebJson; | ||||||
|  | use deserr::Deserr; | ||||||
|  | use index_scheduler::IndexScheduler; | ||||||
|  | use itertools::{EitherOrBoth, Itertools}; | ||||||
|  | use meilisearch_types::deserr::DeserrJsonError; | ||||||
|  | use meilisearch_types::error::deserr_codes::{ | ||||||
|  |     InvalidNetworkRemotes, InvalidNetworkSearchApiKey, InvalidNetworkSelf, InvalidNetworkUrl, | ||||||
|  | }; | ||||||
|  | use meilisearch_types::error::ResponseError; | ||||||
|  | use meilisearch_types::features::{Network as DbNetwork, Remote as DbRemote}; | ||||||
|  | use meilisearch_types::keys::actions; | ||||||
|  | use meilisearch_types::milli::update::Setting; | ||||||
|  | use serde::Serialize; | ||||||
|  | use tracing::debug; | ||||||
|  | use utoipa::{OpenApi, ToSchema}; | ||||||
|  |  | ||||||
|  | use crate::analytics::{Aggregate, Analytics}; | ||||||
|  | use crate::extractors::authentication::policies::ActionPolicy; | ||||||
|  | use crate::extractors::authentication::GuardedData; | ||||||
|  | use crate::extractors::sequential_extractor::SeqHandler; | ||||||
|  |  | ||||||
|  | #[derive(OpenApi)] | ||||||
|  | #[openapi( | ||||||
|  |     paths(get_network, patch_network), | ||||||
|  |     tags(( | ||||||
|  |         name = "Network", | ||||||
|  |         description = "The `/network` route allows you to describe the topology of a network of Meilisearch instances. | ||||||
|  |  | ||||||
|  | This route is **synchronous**. This means that no task object will be returned, and any change to the network will be made available immediately.", | ||||||
|  |         external_docs(url = "https://www.meilisearch.com/docs/reference/api/network"), | ||||||
|  |     )), | ||||||
|  | )] | ||||||
|  | pub struct NetworkApi; | ||||||
|  |  | ||||||
|  | pub fn configure(cfg: &mut web::ServiceConfig) { | ||||||
|  |     cfg.service( | ||||||
|  |         web::resource("") | ||||||
|  |             .route(web::get().to(get_network)) | ||||||
|  |             .route(web::patch().to(SeqHandler(patch_network))), | ||||||
|  |     ); | ||||||
|  | } | ||||||
|  |  | ||||||
|  | /// Get network topology | ||||||
|  | /// | ||||||
|  | /// Get a list of all Meilisearch instances currently known to this instance. | ||||||
|  | #[utoipa::path( | ||||||
|  |     get, | ||||||
|  |     path = "", | ||||||
|  |     tag = "Network", | ||||||
|  |     security(("Bearer" = ["network.get", "network.*", "*"])), | ||||||
|  |     responses( | ||||||
|  |         (status = OK, description = "Known nodes are returned", body = Network, content_type = "application/json", example = json!( | ||||||
|  |             { | ||||||
|  |             "self": "ms-0", | ||||||
|  |             "remotes": { | ||||||
|  |             "ms-0": Remote { url: Setting::Set("http://localhost:7700".into()), search_api_key: Setting::Reset }, | ||||||
|  |             "ms-1": Remote { url: Setting::Set("http://localhost:7701".into()), search_api_key: Setting::Set("foo".into()) }, | ||||||
|  |             "ms-2": Remote { url: Setting::Set("http://localhost:7702".into()), search_api_key: Setting::Set("bar".into()) }, | ||||||
|  |         } | ||||||
|  |     })), | ||||||
|  |         (status = 401, description = "The authorization header is missing", body = ResponseError, content_type = "application/json", example = json!( | ||||||
|  |             { | ||||||
|  |                 "message": "The Authorization header is missing. It must use the bearer authorization method.", | ||||||
|  |                 "code": "missing_authorization_header", | ||||||
|  |                 "type": "auth", | ||||||
|  |                 "link": "https://docs.meilisearch.com/errors#missing_authorization_header" | ||||||
|  |             } | ||||||
|  |         )), | ||||||
|  |     ) | ||||||
|  | )] | ||||||
|  | async fn get_network( | ||||||
|  |     index_scheduler: GuardedData<ActionPolicy<{ actions::NETWORK_GET }>, Data<IndexScheduler>>, | ||||||
|  | ) -> Result<HttpResponse, ResponseError> { | ||||||
|  |     index_scheduler.features().check_network("Using the /network route")?; | ||||||
|  |  | ||||||
|  |     let network = index_scheduler.network(); | ||||||
|  |     debug!(returns = ?network, "Get network"); | ||||||
|  |     Ok(HttpResponse::Ok().json(network)) | ||||||
|  | } | ||||||
|  |  | ||||||
|  | #[derive(Debug, Deserr, ToSchema, Serialize)] | ||||||
|  | #[deserr(error = DeserrJsonError<InvalidNetworkRemotes>, rename_all = camelCase, deny_unknown_fields)] | ||||||
|  | #[serde(rename_all = "camelCase")] | ||||||
|  | #[schema(rename_all = "camelCase")] | ||||||
|  | pub struct Remote { | ||||||
|  |     #[schema(value_type = Option<String>, example = json!({ | ||||||
|  |         "ms-0": Remote { url: Setting::Set("http://localhost:7700".into()), search_api_key: Setting::Reset }, | ||||||
|  |         "ms-1": Remote { url: Setting::Set("http://localhost:7701".into()), search_api_key: Setting::Set("foo".into()) }, | ||||||
|  |         "ms-2": Remote { url: Setting::Set("http://localhost:7702".into()), search_api_key: Setting::Set("bar".into()) }, | ||||||
|  |     }))] | ||||||
|  |     #[deserr(default, error = DeserrJsonError<InvalidNetworkUrl>)] | ||||||
|  |     #[serde(default)] | ||||||
|  |     pub url: Setting<String>, | ||||||
|  |     #[schema(value_type = Option<String>, example = json!("XWnBI8QHUc-4IlqbKPLUDuhftNq19mQtjc6JvmivzJU"))] | ||||||
|  |     #[deserr(default, error = DeserrJsonError<InvalidNetworkSearchApiKey>)] | ||||||
|  |     #[serde(default)] | ||||||
|  |     pub search_api_key: Setting<String>, | ||||||
|  | } | ||||||
|  |  | ||||||
|  | #[derive(Debug, Deserr, ToSchema, Serialize)] | ||||||
|  | #[deserr(error = DeserrJsonError, rename_all = camelCase, deny_unknown_fields)] | ||||||
|  | #[serde(rename_all = "camelCase")] | ||||||
|  | #[schema(rename_all = "camelCase")] | ||||||
|  | pub struct Network { | ||||||
|  |     #[schema(value_type = Option<BTreeMap<String, Remote>>, example = json!("http://localhost:7700"))] | ||||||
|  |     #[deserr(default, error = DeserrJsonError<InvalidNetworkRemotes>)] | ||||||
|  |     #[serde(default)] | ||||||
|  |     pub remotes: Setting<BTreeMap<String, Option<Remote>>>, | ||||||
|  |     #[schema(value_type = Option<String>, example = json!("ms-00"), rename = "self")] | ||||||
|  |     #[serde(default, rename = "self")] | ||||||
|  |     #[deserr(default, rename = "self", error = DeserrJsonError<InvalidNetworkSelf>)] | ||||||
|  |     pub local: Setting<String>, | ||||||
|  | } | ||||||
|  |  | ||||||
|  | impl Remote { | ||||||
|  |     pub fn try_into_db_node(self, name: &str) -> Result<DbRemote, ResponseError> { | ||||||
|  |         Ok(DbRemote { | ||||||
|  |             url: self.url.set().ok_or(ResponseError::from_msg( | ||||||
|  |                 format!("Missing field `.remotes.{name}.url`"), | ||||||
|  |                 meilisearch_types::error::Code::MissingNetworkUrl, | ||||||
|  |             ))?, | ||||||
|  |             search_api_key: self.search_api_key.set(), | ||||||
|  |         }) | ||||||
|  |     } | ||||||
|  | } | ||||||
|  |  | ||||||
|  | #[derive(Serialize)] | ||||||
|  | pub struct PatchNetworkAnalytics { | ||||||
|  |     network_size: usize, | ||||||
|  |     network_has_self: bool, | ||||||
|  | } | ||||||
|  |  | ||||||
|  | impl Aggregate for PatchNetworkAnalytics { | ||||||
|  |     fn event_name(&self) -> &'static str { | ||||||
|  |         "Network Updated" | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     fn aggregate(self: Box<Self>, new: Box<Self>) -> Box<Self> { | ||||||
|  |         Box::new(Self { network_size: new.network_size, network_has_self: new.network_has_self }) | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     fn into_event(self: Box<Self>) -> serde_json::Value { | ||||||
|  |         serde_json::to_value(*self).unwrap_or_default() | ||||||
|  |     } | ||||||
|  | } | ||||||
|  |  | ||||||
|  | /// Configure Network | ||||||
|  | /// | ||||||
|  | /// Add or remove nodes from network. | ||||||
|  | #[utoipa::path( | ||||||
|  |     patch, | ||||||
|  |     path = "", | ||||||
|  |     tag = "Network", | ||||||
|  |     request_body = Network, | ||||||
|  |     security(("Bearer" = ["network.update", "network.*", "*"])), | ||||||
|  |     responses( | ||||||
|  |         (status = OK, description = "New network state is returned",  body = Network, content_type = "application/json", example = json!( | ||||||
|  |             { | ||||||
|  |                 "self": "ms-0", | ||||||
|  |                 "remotes": { | ||||||
|  |                 "ms-0": Remote { url: Setting::Set("http://localhost:7700".into()), search_api_key: Setting::Reset }, | ||||||
|  |                 "ms-1": Remote { url: Setting::Set("http://localhost:7701".into()), search_api_key: Setting::Set("foo".into()) }, | ||||||
|  |                 "ms-2": Remote { url: Setting::Set("http://localhost:7702".into()), search_api_key: Setting::Set("bar".into()) }, | ||||||
|  |             } | ||||||
|  |         })), | ||||||
|  |         (status = 401, description = "The authorization header is missing", body = ResponseError, content_type = "application/json", example = json!( | ||||||
|  |             { | ||||||
|  |                 "message": "The Authorization header is missing. It must use the bearer authorization method.", | ||||||
|  |                 "code": "missing_authorization_header", | ||||||
|  |                 "type": "auth", | ||||||
|  |                 "link": "https://docs.meilisearch.com/errors#missing_authorization_header" | ||||||
|  |             } | ||||||
|  |         )), | ||||||
|  |     ) | ||||||
|  | )] | ||||||
|  | async fn patch_network( | ||||||
|  |     index_scheduler: GuardedData<ActionPolicy<{ actions::NETWORK_UPDATE }>, Data<IndexScheduler>>, | ||||||
|  |     new_network: AwebJson<Network, DeserrJsonError>, | ||||||
|  |     req: HttpRequest, | ||||||
|  |     analytics: Data<Analytics>, | ||||||
|  | ) -> Result<HttpResponse, ResponseError> { | ||||||
|  |     index_scheduler.features().check_network("Using the /network route")?; | ||||||
|  |  | ||||||
|  |     let new_network = new_network.0; | ||||||
|  |     let old_network = index_scheduler.network(); | ||||||
|  |     debug!(parameters = ?new_network, "Patch network"); | ||||||
|  |  | ||||||
|  |     let merged_self = match new_network.local { | ||||||
|  |         Setting::Set(new_self) => Some(new_self), | ||||||
|  |         Setting::Reset => None, | ||||||
|  |         Setting::NotSet => old_network.local, | ||||||
|  |     }; | ||||||
|  |  | ||||||
|  |     let merged_remotes = match new_network.remotes { | ||||||
|  |         Setting::Set(new_remotes) => { | ||||||
|  |             let mut merged_remotes = BTreeMap::new(); | ||||||
|  |             for either_or_both in old_network | ||||||
|  |                 .remotes | ||||||
|  |                 .into_iter() | ||||||
|  |                 .merge_join_by(new_remotes.into_iter(), |left, right| left.0.cmp(&right.0)) | ||||||
|  |             { | ||||||
|  |                 match either_or_both { | ||||||
|  |                     EitherOrBoth::Both((key, old), (_, Some(new))) => { | ||||||
|  |                         let DbRemote { url: old_url, search_api_key: old_search_api_key } = old; | ||||||
|  |  | ||||||
|  |                         let Remote { url: new_url, search_api_key: new_search_api_key } = new; | ||||||
|  |  | ||||||
|  |                         let merged = DbRemote { | ||||||
|  |                             url: match new_url { | ||||||
|  |                                 Setting::Set(new_url) => new_url, | ||||||
|  |                                 Setting::Reset => { | ||||||
|  |                                     return Err(ResponseError::from_msg( | ||||||
|  |                                         format!( | ||||||
|  |                                             "Field `.remotes.{key}.url` cannot be set to `null`" | ||||||
|  |                                         ), | ||||||
|  |                                         meilisearch_types::error::Code::InvalidNetworkUrl, | ||||||
|  |                                     )) | ||||||
|  |                                 } | ||||||
|  |                                 Setting::NotSet => old_url, | ||||||
|  |                             }, | ||||||
|  |                             search_api_key: match new_search_api_key { | ||||||
|  |                                 Setting::Set(new_search_api_key) => Some(new_search_api_key), | ||||||
|  |                                 Setting::Reset => None, | ||||||
|  |                                 Setting::NotSet => old_search_api_key, | ||||||
|  |                             }, | ||||||
|  |                         }; | ||||||
|  |                         merged_remotes.insert(key, merged); | ||||||
|  |                     } | ||||||
|  |                     EitherOrBoth::Both((_, _), (_, None)) | EitherOrBoth::Right((_, None)) => {} | ||||||
|  |                     EitherOrBoth::Left((key, node)) => { | ||||||
|  |                         merged_remotes.insert(key, node); | ||||||
|  |                     } | ||||||
|  |                     EitherOrBoth::Right((key, Some(node))) => { | ||||||
|  |                         let node = node.try_into_db_node(&key)?; | ||||||
|  |                         merged_remotes.insert(key, node); | ||||||
|  |                     } | ||||||
|  |                 } | ||||||
|  |             } | ||||||
|  |             merged_remotes | ||||||
|  |         } | ||||||
|  |         Setting::Reset => BTreeMap::new(), | ||||||
|  |         Setting::NotSet => old_network.remotes, | ||||||
|  |     }; | ||||||
|  |  | ||||||
|  |     analytics.publish( | ||||||
|  |         PatchNetworkAnalytics { | ||||||
|  |             network_size: merged_remotes.len(), | ||||||
|  |             network_has_self: merged_self.is_some(), | ||||||
|  |         }, | ||||||
|  |         &req, | ||||||
|  |     ); | ||||||
|  |  | ||||||
|  |     let merged_network = DbNetwork { local: merged_self, remotes: merged_remotes }; | ||||||
|  |     index_scheduler.put_network(merged_network.clone())?; | ||||||
|  |     debug!(returns = ?merged_network, "Patch network"); | ||||||
|  |     Ok(HttpResponse::Ok().json(merged_network)) | ||||||
|  | } | ||||||
| @@ -1,3 +1,5 @@ | |||||||
|  | use std::io::ErrorKind; | ||||||
|  |  | ||||||
| use actix_web::web::Data; | use actix_web::web::Data; | ||||||
| use actix_web::{web, HttpRequest, HttpResponse}; | use actix_web::{web, HttpRequest, HttpResponse}; | ||||||
| use deserr::actix_web::AwebQueryParameter; | use deserr::actix_web::AwebQueryParameter; | ||||||
| @@ -16,6 +18,7 @@ use serde::Serialize; | |||||||
| use time::format_description::well_known::Rfc3339; | use time::format_description::well_known::Rfc3339; | ||||||
| use time::macros::format_description; | use time::macros::format_description; | ||||||
| use time::{Date, Duration, OffsetDateTime, Time}; | use time::{Date, Duration, OffsetDateTime, Time}; | ||||||
|  | use tokio::io::AsyncReadExt; | ||||||
| use tokio::task; | use tokio::task; | ||||||
| use utoipa::{IntoParams, OpenApi, ToSchema}; | use utoipa::{IntoParams, OpenApi, ToSchema}; | ||||||
|  |  | ||||||
| @@ -44,7 +47,11 @@ pub fn configure(cfg: &mut web::ServiceConfig) { | |||||||
|             .route(web::delete().to(SeqHandler(delete_tasks))), |             .route(web::delete().to(SeqHandler(delete_tasks))), | ||||||
|     ) |     ) | ||||||
|     .service(web::resource("/cancel").route(web::post().to(SeqHandler(cancel_tasks)))) |     .service(web::resource("/cancel").route(web::post().to(SeqHandler(cancel_tasks)))) | ||||||
|     .service(web::resource("/{task_id}").route(web::get().to(SeqHandler(get_task)))); |     .service(web::resource("/{task_id}").route(web::get().to(SeqHandler(get_task)))) | ||||||
|  |     .service( | ||||||
|  |         web::resource("/{task_id}/documents") | ||||||
|  |             .route(web::get().to(SeqHandler(get_task_documents_file))), | ||||||
|  |     ); | ||||||
| } | } | ||||||
|  |  | ||||||
| #[derive(Debug, Deserr, IntoParams)] | #[derive(Debug, Deserr, IntoParams)] | ||||||
| @@ -639,6 +646,76 @@ async fn get_task( | |||||||
|     } |     } | ||||||
| } | } | ||||||
|  |  | ||||||
|  | /// Get a task's documents. | ||||||
|  | /// | ||||||
|  | /// Get a [task's documents file](https://www.meilisearch.com/docs/learn/async/asynchronous_operations). | ||||||
|  | #[utoipa::path( | ||||||
|  |     get, | ||||||
|  |     path = "/{taskUid}/documents", | ||||||
|  |     tag = "Tasks", | ||||||
|  |     security(("Bearer" = ["tasks.get", "tasks.*", "*"])), | ||||||
|  |     params(("taskUid", format = UInt32, example = 0, description = "The task identifier", nullable = false)), | ||||||
|  |     responses( | ||||||
|  |         (status = 200, description = "The content of the task update", body = serde_json::Value, content_type = "application/x-ndjson"), | ||||||
|  |         (status = 401, description = "The authorization header is missing", body = ResponseError, content_type = "application/json", example = json!( | ||||||
|  |             { | ||||||
|  |                 "message": "The Authorization header is missing. It must use the bearer authorization method.", | ||||||
|  |                 "code": "missing_authorization_header", | ||||||
|  |                 "type": "auth", | ||||||
|  |                 "link": "https://docs.meilisearch.com/errors#missing_authorization_header" | ||||||
|  |             } | ||||||
|  |         )), | ||||||
|  |         (status = 404, description = "The task uid does not exists", body = ResponseError, content_type = "application/json", example = json!( | ||||||
|  |             { | ||||||
|  |                 "message": "Task :taskUid not found.", | ||||||
|  |                 "code": "task_not_found", | ||||||
|  |                 "type": "invalid_request", | ||||||
|  |                 "link": "https://docs.meilisearch.com/errors/#task_not_found" | ||||||
|  |             } | ||||||
|  |         )) | ||||||
|  |     ) | ||||||
|  | )] | ||||||
|  | async fn get_task_documents_file( | ||||||
|  |     index_scheduler: GuardedData<ActionPolicy<{ actions::TASKS_GET }>, Data<IndexScheduler>>, | ||||||
|  |     task_uid: web::Path<String>, | ||||||
|  | ) -> Result<HttpResponse, ResponseError> { | ||||||
|  |     index_scheduler.features().check_get_task_documents_route()?; | ||||||
|  |     let task_uid_string = task_uid.into_inner(); | ||||||
|  |  | ||||||
|  |     let task_uid: TaskId = match task_uid_string.parse() { | ||||||
|  |         Ok(id) => id, | ||||||
|  |         Err(_e) => { | ||||||
|  |             return Err(index_scheduler::Error::InvalidTaskUid { task_uid: task_uid_string }.into()) | ||||||
|  |         } | ||||||
|  |     }; | ||||||
|  |  | ||||||
|  |     let query = index_scheduler::Query { uids: Some(vec![task_uid]), ..Query::default() }; | ||||||
|  |     let filters = index_scheduler.filters(); | ||||||
|  |     let (tasks, _) = index_scheduler.get_tasks_from_authorized_indexes(&query, filters)?; | ||||||
|  |  | ||||||
|  |     if let Some(task) = tasks.first() { | ||||||
|  |         match task.content_uuid() { | ||||||
|  |             Some(uuid) => { | ||||||
|  |                 let mut tfile = match index_scheduler.queue.update_file(uuid) { | ||||||
|  |                     Ok(file) => tokio::fs::File::from_std(file), | ||||||
|  |                     Err(file_store::Error::IoError(e)) if e.kind() == ErrorKind::NotFound => { | ||||||
|  |                         return Err(index_scheduler::Error::TaskFileNotFound(task_uid).into()) | ||||||
|  |                     } | ||||||
|  |                     Err(e) => return Err(e.into()), | ||||||
|  |                 }; | ||||||
|  |                 // Yes, that's awful to put everything in memory when we could have streamed it from | ||||||
|  |                 // disk but it's really (really) complex to do with the current state of async Rust. | ||||||
|  |                 let mut content = String::new(); | ||||||
|  |                 tfile.read_to_string(&mut content).await?; | ||||||
|  |                 Ok(HttpResponse::Ok().content_type("application/x-ndjson").body(content)) | ||||||
|  |             } | ||||||
|  |             None => Err(index_scheduler::Error::TaskFileNotFound(task_uid).into()), | ||||||
|  |         } | ||||||
|  |     } else { | ||||||
|  |         Err(index_scheduler::Error::TaskNotFound(task_uid).into()) | ||||||
|  |     } | ||||||
|  | } | ||||||
|  |  | ||||||
| pub enum DeserializeDateOption { | pub enum DeserializeDateOption { | ||||||
|     Before, |     Before, | ||||||
|     After, |     After, | ||||||
|   | |||||||
| @@ -1,923 +0,0 @@ | |||||||
| use std::cmp::Ordering; |  | ||||||
| use std::collections::BTreeMap; |  | ||||||
| use std::fmt; |  | ||||||
| use std::iter::Zip; |  | ||||||
| use std::rc::Rc; |  | ||||||
| use std::str::FromStr as _; |  | ||||||
| use std::time::Duration; |  | ||||||
| use std::vec::{IntoIter, Vec}; |  | ||||||
|  |  | ||||||
| use actix_http::StatusCode; |  | ||||||
| use index_scheduler::{IndexScheduler, RoFeatures}; |  | ||||||
| use indexmap::IndexMap; |  | ||||||
| use meilisearch_types::deserr::DeserrJsonError; |  | ||||||
| use meilisearch_types::error::deserr_codes::{ |  | ||||||
|     InvalidMultiSearchFacetsByIndex, InvalidMultiSearchMaxValuesPerFacet, |  | ||||||
|     InvalidMultiSearchMergeFacets, InvalidMultiSearchWeight, InvalidSearchLimit, |  | ||||||
|     InvalidSearchOffset, |  | ||||||
| }; |  | ||||||
| use meilisearch_types::error::ResponseError; |  | ||||||
| use meilisearch_types::index_uid::IndexUid; |  | ||||||
| use meilisearch_types::milli::score_details::{ScoreDetails, ScoreValue}; |  | ||||||
| use meilisearch_types::milli::{self, DocumentId, OrderBy, TimeBudget}; |  | ||||||
| use roaring::RoaringBitmap; |  | ||||||
| use serde::Serialize; |  | ||||||
| use utoipa::ToSchema; |  | ||||||
|  |  | ||||||
| use super::ranking_rules::{self, RankingRules}; |  | ||||||
| use super::{ |  | ||||||
|     compute_facet_distribution_stats, prepare_search, AttributesFormat, ComputedFacets, FacetStats, |  | ||||||
|     HitMaker, HitsInfo, RetrieveVectors, SearchHit, SearchKind, SearchQuery, SearchQueryWithIndex, |  | ||||||
| }; |  | ||||||
| use crate::error::MeilisearchHttpError; |  | ||||||
| use crate::routes::indexes::search::search_kind; |  | ||||||
|  |  | ||||||
| pub const DEFAULT_FEDERATED_WEIGHT: f64 = 1.0; |  | ||||||
|  |  | ||||||
| #[derive(Debug, Default, Clone, Copy, PartialEq, deserr::Deserr, ToSchema)] |  | ||||||
| #[deserr(error = DeserrJsonError, rename_all = camelCase, deny_unknown_fields)] |  | ||||||
| pub struct FederationOptions { |  | ||||||
|     #[deserr(default, error = DeserrJsonError<InvalidMultiSearchWeight>)] |  | ||||||
|     #[schema(value_type = f64)] |  | ||||||
|     pub weight: Weight, |  | ||||||
| } |  | ||||||
|  |  | ||||||
| #[derive(Debug, Clone, Copy, PartialEq, deserr::Deserr)] |  | ||||||
| #[deserr(try_from(f64) = TryFrom::try_from -> InvalidMultiSearchWeight)] |  | ||||||
| pub struct Weight(f64); |  | ||||||
|  |  | ||||||
| impl Default for Weight { |  | ||||||
|     fn default() -> Self { |  | ||||||
|         Weight(DEFAULT_FEDERATED_WEIGHT) |  | ||||||
|     } |  | ||||||
| } |  | ||||||
|  |  | ||||||
| impl std::convert::TryFrom<f64> for Weight { |  | ||||||
|     type Error = InvalidMultiSearchWeight; |  | ||||||
|  |  | ||||||
|     fn try_from(f: f64) -> Result<Self, Self::Error> { |  | ||||||
|         if f < 0.0 { |  | ||||||
|             Err(InvalidMultiSearchWeight) |  | ||||||
|         } else { |  | ||||||
|             Ok(Weight(f)) |  | ||||||
|         } |  | ||||||
|     } |  | ||||||
| } |  | ||||||
|  |  | ||||||
| impl std::ops::Deref for Weight { |  | ||||||
|     type Target = f64; |  | ||||||
|  |  | ||||||
|     fn deref(&self) -> &Self::Target { |  | ||||||
|         &self.0 |  | ||||||
|     } |  | ||||||
| } |  | ||||||
|  |  | ||||||
| #[derive(Debug, deserr::Deserr, ToSchema)] |  | ||||||
| #[deserr(error = DeserrJsonError, rename_all = camelCase, deny_unknown_fields)] |  | ||||||
| #[schema(rename_all = "camelCase")] |  | ||||||
| pub struct Federation { |  | ||||||
|     #[deserr(default = super::DEFAULT_SEARCH_LIMIT(), error = DeserrJsonError<InvalidSearchLimit>)] |  | ||||||
|     pub limit: usize, |  | ||||||
|     #[deserr(default = super::DEFAULT_SEARCH_OFFSET(), error = DeserrJsonError<InvalidSearchOffset>)] |  | ||||||
|     pub offset: usize, |  | ||||||
|     #[deserr(default, error = DeserrJsonError<InvalidMultiSearchFacetsByIndex>)] |  | ||||||
|     pub facets_by_index: BTreeMap<IndexUid, Option<Vec<String>>>, |  | ||||||
|     #[deserr(default, error = DeserrJsonError<InvalidMultiSearchMergeFacets>)] |  | ||||||
|     pub merge_facets: Option<MergeFacets>, |  | ||||||
| } |  | ||||||
|  |  | ||||||
| #[derive(Copy, Clone, Debug, deserr::Deserr, Default, ToSchema)] |  | ||||||
| #[deserr(error = DeserrJsonError<InvalidMultiSearchMergeFacets>, rename_all = camelCase, deny_unknown_fields)] |  | ||||||
| #[schema(rename_all = "camelCase")] |  | ||||||
| pub struct MergeFacets { |  | ||||||
|     #[deserr(default, error = DeserrJsonError<InvalidMultiSearchMaxValuesPerFacet>)] |  | ||||||
|     pub max_values_per_facet: Option<usize>, |  | ||||||
| } |  | ||||||
|  |  | ||||||
| #[derive(Debug, deserr::Deserr, ToSchema)] |  | ||||||
| #[deserr(error = DeserrJsonError, rename_all = camelCase, deny_unknown_fields)] |  | ||||||
| #[schema(rename_all = "camelCase")] |  | ||||||
| pub struct FederatedSearch { |  | ||||||
|     pub queries: Vec<SearchQueryWithIndex>, |  | ||||||
|     #[deserr(default)] |  | ||||||
|     pub federation: Option<Federation>, |  | ||||||
| } |  | ||||||
|  |  | ||||||
| #[derive(Serialize, Clone, ToSchema)] |  | ||||||
| #[serde(rename_all = "camelCase")] |  | ||||||
| #[schema(rename_all = "camelCase")] |  | ||||||
| pub struct FederatedSearchResult { |  | ||||||
|     pub hits: Vec<SearchHit>, |  | ||||||
|     pub processing_time_ms: u128, |  | ||||||
|     #[serde(flatten)] |  | ||||||
|     pub hits_info: HitsInfo, |  | ||||||
|  |  | ||||||
|     #[serde(skip_serializing_if = "Option::is_none")] |  | ||||||
|     pub semantic_hit_count: Option<u32>, |  | ||||||
|  |  | ||||||
|     #[serde(skip_serializing_if = "Option::is_none")] |  | ||||||
|     #[schema(value_type = Option<BTreeMap<String, BTreeMap<String, u64>>>)] |  | ||||||
|     pub facet_distribution: Option<BTreeMap<String, IndexMap<String, u64>>>, |  | ||||||
|     #[serde(skip_serializing_if = "Option::is_none")] |  | ||||||
|     pub facet_stats: Option<BTreeMap<String, FacetStats>>, |  | ||||||
|     #[serde(skip_serializing_if = "FederatedFacets::is_empty")] |  | ||||||
|     pub facets_by_index: FederatedFacets, |  | ||||||
|  |  | ||||||
|     // These fields are only used for analytics purposes |  | ||||||
|     #[serde(skip)] |  | ||||||
|     pub degraded: bool, |  | ||||||
|     #[serde(skip)] |  | ||||||
|     pub used_negative_operator: bool, |  | ||||||
| } |  | ||||||
|  |  | ||||||
| impl fmt::Debug for FederatedSearchResult { |  | ||||||
|     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { |  | ||||||
|         let FederatedSearchResult { |  | ||||||
|             hits, |  | ||||||
|             processing_time_ms, |  | ||||||
|             hits_info, |  | ||||||
|             semantic_hit_count, |  | ||||||
|             degraded, |  | ||||||
|             used_negative_operator, |  | ||||||
|             facet_distribution, |  | ||||||
|             facet_stats, |  | ||||||
|             facets_by_index, |  | ||||||
|         } = self; |  | ||||||
|  |  | ||||||
|         let mut debug = f.debug_struct("SearchResult"); |  | ||||||
|         // The most important thing when looking at a search result is the time it took to process |  | ||||||
|         debug.field("processing_time_ms", &processing_time_ms); |  | ||||||
|         debug.field("hits", &format!("[{} hits returned]", hits.len())); |  | ||||||
|         debug.field("hits_info", &hits_info); |  | ||||||
|         if *used_negative_operator { |  | ||||||
|             debug.field("used_negative_operator", used_negative_operator); |  | ||||||
|         } |  | ||||||
|         if *degraded { |  | ||||||
|             debug.field("degraded", degraded); |  | ||||||
|         } |  | ||||||
|         if let Some(facet_distribution) = facet_distribution { |  | ||||||
|             debug.field("facet_distribution", &facet_distribution); |  | ||||||
|         } |  | ||||||
|         if let Some(facet_stats) = facet_stats { |  | ||||||
|             debug.field("facet_stats", &facet_stats); |  | ||||||
|         } |  | ||||||
|         if let Some(semantic_hit_count) = semantic_hit_count { |  | ||||||
|             debug.field("semantic_hit_count", &semantic_hit_count); |  | ||||||
|         } |  | ||||||
|         if !facets_by_index.is_empty() { |  | ||||||
|             debug.field("facets_by_index", &facets_by_index); |  | ||||||
|         } |  | ||||||
|  |  | ||||||
|         debug.finish() |  | ||||||
|     } |  | ||||||
| } |  | ||||||
|  |  | ||||||
| struct WeightedScore<'a> { |  | ||||||
|     details: &'a [ScoreDetails], |  | ||||||
|     weight: f64, |  | ||||||
| } |  | ||||||
|  |  | ||||||
| impl<'a> WeightedScore<'a> { |  | ||||||
|     pub fn new(details: &'a [ScoreDetails], weight: f64) -> Self { |  | ||||||
|         Self { details, weight } |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     pub fn weighted_global_score(&self) -> f64 { |  | ||||||
|         ScoreDetails::global_score(self.details.iter()) * self.weight |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     pub fn compare_weighted_global_scores(&self, other: &Self) -> Ordering { |  | ||||||
|         self.weighted_global_score() |  | ||||||
|             .partial_cmp(&other.weighted_global_score()) |  | ||||||
|             // both are numbers, possibly infinite |  | ||||||
|             .unwrap() |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     pub fn compare(&self, other: &Self) -> Ordering { |  | ||||||
|         let mut left_it = ScoreDetails::score_values(self.details.iter()); |  | ||||||
|         let mut right_it = ScoreDetails::score_values(other.details.iter()); |  | ||||||
|  |  | ||||||
|         loop { |  | ||||||
|             let left = left_it.next(); |  | ||||||
|             let right = right_it.next(); |  | ||||||
|  |  | ||||||
|             match (left, right) { |  | ||||||
|                 (None, None) => return Ordering::Equal, |  | ||||||
|                 (None, Some(_)) => return Ordering::Less, |  | ||||||
|                 (Some(_), None) => return Ordering::Greater, |  | ||||||
|                 (Some(ScoreValue::Score(left)), Some(ScoreValue::Score(right))) => { |  | ||||||
|                     let left = left * self.weight; |  | ||||||
|                     let right = right * other.weight; |  | ||||||
|                     if (left - right).abs() <= f64::EPSILON { |  | ||||||
|                         continue; |  | ||||||
|                     } |  | ||||||
|                     return left.partial_cmp(&right).unwrap(); |  | ||||||
|                 } |  | ||||||
|                 (Some(ScoreValue::Sort(left)), Some(ScoreValue::Sort(right))) => { |  | ||||||
|                     match left.partial_cmp(right) { |  | ||||||
|                         Some(Ordering::Equal) => continue, |  | ||||||
|                         Some(order) => return order, |  | ||||||
|                         None => return self.compare_weighted_global_scores(other), |  | ||||||
|                     } |  | ||||||
|                 } |  | ||||||
|                 (Some(ScoreValue::GeoSort(left)), Some(ScoreValue::GeoSort(right))) => { |  | ||||||
|                     match left.partial_cmp(right) { |  | ||||||
|                         Some(Ordering::Equal) => continue, |  | ||||||
|                         Some(order) => return order, |  | ||||||
|                         None => { |  | ||||||
|                             return self.compare_weighted_global_scores(other); |  | ||||||
|                         } |  | ||||||
|                     } |  | ||||||
|                 } |  | ||||||
|                 // not comparable details, use global |  | ||||||
|                 (Some(ScoreValue::Score(_)), Some(_)) |  | ||||||
|                 | (Some(_), Some(ScoreValue::Score(_))) |  | ||||||
|                 | (Some(ScoreValue::GeoSort(_)), Some(ScoreValue::Sort(_))) |  | ||||||
|                 | (Some(ScoreValue::Sort(_)), Some(ScoreValue::GeoSort(_))) => { |  | ||||||
|                     let left_count = left_it.count(); |  | ||||||
|                     let right_count = right_it.count(); |  | ||||||
|                     // compare how many remaining groups of rules each side has. |  | ||||||
|                     // the group with the most remaining groups wins. |  | ||||||
|                     return left_count |  | ||||||
|                         .cmp(&right_count) |  | ||||||
|                         // breaks ties with the global ranking score |  | ||||||
|                         .then_with(|| self.compare_weighted_global_scores(other)); |  | ||||||
|                 } |  | ||||||
|             } |  | ||||||
|         } |  | ||||||
|     } |  | ||||||
| } |  | ||||||
|  |  | ||||||
| struct QueryByIndex { |  | ||||||
|     query: SearchQuery, |  | ||||||
|     federation_options: FederationOptions, |  | ||||||
|     query_index: usize, |  | ||||||
| } |  | ||||||
|  |  | ||||||
| struct SearchResultByQuery<'a> { |  | ||||||
|     documents_ids: Vec<DocumentId>, |  | ||||||
|     document_scores: Vec<Vec<ScoreDetails>>, |  | ||||||
|     federation_options: FederationOptions, |  | ||||||
|     hit_maker: HitMaker<'a>, |  | ||||||
|     query_index: usize, |  | ||||||
| } |  | ||||||
|  |  | ||||||
| struct SearchResultByQueryIter<'a> { |  | ||||||
|     it: Zip<IntoIter<DocumentId>, IntoIter<Vec<ScoreDetails>>>, |  | ||||||
|     federation_options: FederationOptions, |  | ||||||
|     hit_maker: Rc<HitMaker<'a>>, |  | ||||||
|     query_index: usize, |  | ||||||
| } |  | ||||||
|  |  | ||||||
| impl<'a> SearchResultByQueryIter<'a> { |  | ||||||
|     fn new( |  | ||||||
|         SearchResultByQuery { |  | ||||||
|             documents_ids, |  | ||||||
|             document_scores, |  | ||||||
|             federation_options, |  | ||||||
|             hit_maker, |  | ||||||
|             query_index, |  | ||||||
|         }: SearchResultByQuery<'a>, |  | ||||||
|     ) -> Self { |  | ||||||
|         let it = documents_ids.into_iter().zip(document_scores); |  | ||||||
|         Self { it, federation_options, hit_maker: Rc::new(hit_maker), query_index } |  | ||||||
|     } |  | ||||||
| } |  | ||||||
|  |  | ||||||
| struct SearchResultByQueryIterItem<'a> { |  | ||||||
|     docid: DocumentId, |  | ||||||
|     score: Vec<ScoreDetails>, |  | ||||||
|     federation_options: FederationOptions, |  | ||||||
|     hit_maker: Rc<HitMaker<'a>>, |  | ||||||
|     query_index: usize, |  | ||||||
| } |  | ||||||
|  |  | ||||||
| fn merge_index_local_results( |  | ||||||
|     results_by_query: Vec<SearchResultByQuery<'_>>, |  | ||||||
| ) -> impl Iterator<Item = SearchResultByQueryIterItem> + '_ { |  | ||||||
|     itertools::kmerge_by( |  | ||||||
|         results_by_query.into_iter().map(SearchResultByQueryIter::new), |  | ||||||
|         |left: &SearchResultByQueryIterItem, right: &SearchResultByQueryIterItem| { |  | ||||||
|             let left_score = WeightedScore::new(&left.score, *left.federation_options.weight); |  | ||||||
|             let right_score = WeightedScore::new(&right.score, *right.federation_options.weight); |  | ||||||
|  |  | ||||||
|             match left_score.compare(&right_score) { |  | ||||||
|                 // the biggest score goes first |  | ||||||
|                 Ordering::Greater => true, |  | ||||||
|                 // break ties using query index |  | ||||||
|                 Ordering::Equal => left.query_index < right.query_index, |  | ||||||
|                 Ordering::Less => false, |  | ||||||
|             } |  | ||||||
|         }, |  | ||||||
|     ) |  | ||||||
| } |  | ||||||
|  |  | ||||||
| fn merge_index_global_results( |  | ||||||
|     results_by_index: Vec<SearchResultByIndex>, |  | ||||||
| ) -> impl Iterator<Item = SearchHitByIndex> { |  | ||||||
|     itertools::kmerge_by( |  | ||||||
|         results_by_index.into_iter().map(|result_by_index| result_by_index.hits.into_iter()), |  | ||||||
|         |left: &SearchHitByIndex, right: &SearchHitByIndex| { |  | ||||||
|             let left_score = WeightedScore::new(&left.score, *left.federation_options.weight); |  | ||||||
|             let right_score = WeightedScore::new(&right.score, *right.federation_options.weight); |  | ||||||
|  |  | ||||||
|             match left_score.compare(&right_score) { |  | ||||||
|                 // the biggest score goes first |  | ||||||
|                 Ordering::Greater => true, |  | ||||||
|                 // break ties using query index |  | ||||||
|                 Ordering::Equal => left.query_index < right.query_index, |  | ||||||
|                 Ordering::Less => false, |  | ||||||
|             } |  | ||||||
|         }, |  | ||||||
|     ) |  | ||||||
| } |  | ||||||
|  |  | ||||||
| impl<'a> Iterator for SearchResultByQueryIter<'a> { |  | ||||||
|     type Item = SearchResultByQueryIterItem<'a>; |  | ||||||
|  |  | ||||||
|     fn next(&mut self) -> Option<Self::Item> { |  | ||||||
|         let (docid, score) = self.it.next()?; |  | ||||||
|         Some(SearchResultByQueryIterItem { |  | ||||||
|             docid, |  | ||||||
|             score, |  | ||||||
|             federation_options: self.federation_options, |  | ||||||
|             hit_maker: Rc::clone(&self.hit_maker), |  | ||||||
|             query_index: self.query_index, |  | ||||||
|         }) |  | ||||||
|     } |  | ||||||
| } |  | ||||||
|  |  | ||||||
| struct SearchHitByIndex { |  | ||||||
|     hit: SearchHit, |  | ||||||
|     score: Vec<ScoreDetails>, |  | ||||||
|     federation_options: FederationOptions, |  | ||||||
|     query_index: usize, |  | ||||||
| } |  | ||||||
|  |  | ||||||
| struct SearchResultByIndex { |  | ||||||
|     index: String, |  | ||||||
|     hits: Vec<SearchHitByIndex>, |  | ||||||
|     estimated_total_hits: usize, |  | ||||||
|     degraded: bool, |  | ||||||
|     used_negative_operator: bool, |  | ||||||
|     facets: Option<ComputedFacets>, |  | ||||||
| } |  | ||||||
|  |  | ||||||
| #[derive(Debug, Clone, Default, Serialize, ToSchema)] |  | ||||||
| pub struct FederatedFacets(pub BTreeMap<String, ComputedFacets>); |  | ||||||
|  |  | ||||||
| impl FederatedFacets { |  | ||||||
|     pub fn insert(&mut self, index: String, facets: Option<ComputedFacets>) { |  | ||||||
|         if let Some(facets) = facets { |  | ||||||
|             self.0.insert(index, facets); |  | ||||||
|         } |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     pub fn is_empty(&self) -> bool { |  | ||||||
|         self.0.is_empty() |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     pub fn merge( |  | ||||||
|         self, |  | ||||||
|         MergeFacets { max_values_per_facet }: MergeFacets, |  | ||||||
|         facet_order: BTreeMap<String, (String, OrderBy)>, |  | ||||||
|     ) -> Option<ComputedFacets> { |  | ||||||
|         if self.is_empty() { |  | ||||||
|             return None; |  | ||||||
|         } |  | ||||||
|  |  | ||||||
|         let mut distribution: BTreeMap<String, _> = Default::default(); |  | ||||||
|         let mut stats: BTreeMap<String, FacetStats> = Default::default(); |  | ||||||
|  |  | ||||||
|         for facets_by_index in self.0.into_values() { |  | ||||||
|             for (facet, index_distribution) in facets_by_index.distribution { |  | ||||||
|                 match distribution.entry(facet) { |  | ||||||
|                     std::collections::btree_map::Entry::Vacant(entry) => { |  | ||||||
|                         entry.insert(index_distribution); |  | ||||||
|                     } |  | ||||||
|                     std::collections::btree_map::Entry::Occupied(mut entry) => { |  | ||||||
|                         let distribution = entry.get_mut(); |  | ||||||
|  |  | ||||||
|                         for (value, index_count) in index_distribution { |  | ||||||
|                             distribution |  | ||||||
|                                 .entry(value) |  | ||||||
|                                 .and_modify(|count| *count += index_count) |  | ||||||
|                                 .or_insert(index_count); |  | ||||||
|                         } |  | ||||||
|                     } |  | ||||||
|                 } |  | ||||||
|             } |  | ||||||
|  |  | ||||||
|             for (facet, index_stats) in facets_by_index.stats { |  | ||||||
|                 match stats.entry(facet) { |  | ||||||
|                     std::collections::btree_map::Entry::Vacant(entry) => { |  | ||||||
|                         entry.insert(index_stats); |  | ||||||
|                     } |  | ||||||
|                     std::collections::btree_map::Entry::Occupied(mut entry) => { |  | ||||||
|                         let stats = entry.get_mut(); |  | ||||||
|  |  | ||||||
|                         stats.min = f64::min(stats.min, index_stats.min); |  | ||||||
|                         stats.max = f64::max(stats.max, index_stats.max); |  | ||||||
|                     } |  | ||||||
|                 } |  | ||||||
|             } |  | ||||||
|         } |  | ||||||
|  |  | ||||||
|         // fixup order |  | ||||||
|         for (facet, values) in &mut distribution { |  | ||||||
|             let order_by = facet_order.get(facet).map(|(_, order)| *order).unwrap_or_default(); |  | ||||||
|  |  | ||||||
|             match order_by { |  | ||||||
|                 OrderBy::Lexicographic => { |  | ||||||
|                     values.sort_unstable_by(|left, _, right, _| left.cmp(right)) |  | ||||||
|                 } |  | ||||||
|                 OrderBy::Count => { |  | ||||||
|                     values.sort_unstable_by(|_, left, _, right| { |  | ||||||
|                         left.cmp(right) |  | ||||||
|                             // biggest first |  | ||||||
|                             .reverse() |  | ||||||
|                     }) |  | ||||||
|                 } |  | ||||||
|             } |  | ||||||
|  |  | ||||||
|             if let Some(max_values_per_facet) = max_values_per_facet { |  | ||||||
|                 values.truncate(max_values_per_facet) |  | ||||||
|             }; |  | ||||||
|         } |  | ||||||
|  |  | ||||||
|         Some(ComputedFacets { distribution, stats }) |  | ||||||
|     } |  | ||||||
| } |  | ||||||
|  |  | ||||||
| pub fn perform_federated_search( |  | ||||||
|     index_scheduler: &IndexScheduler, |  | ||||||
|     queries: Vec<SearchQueryWithIndex>, |  | ||||||
|     mut federation: Federation, |  | ||||||
|     features: RoFeatures, |  | ||||||
| ) -> Result<FederatedSearchResult, ResponseError> { |  | ||||||
|     let before_search = std::time::Instant::now(); |  | ||||||
|  |  | ||||||
|     // this implementation partition the queries by index to guarantee an important property: |  | ||||||
|     // - all the queries to a particular index use the same read transaction. |  | ||||||
|     // This is an important property, otherwise we cannot guarantee the self-consistency of the results. |  | ||||||
|  |  | ||||||
|     // 1. partition queries by index |  | ||||||
|     let mut queries_by_index: BTreeMap<String, Vec<QueryByIndex>> = Default::default(); |  | ||||||
|     for (query_index, federated_query) in queries.into_iter().enumerate() { |  | ||||||
|         if let Some(pagination_field) = federated_query.has_pagination() { |  | ||||||
|             return Err(MeilisearchHttpError::PaginationInFederatedQuery( |  | ||||||
|                 query_index, |  | ||||||
|                 pagination_field, |  | ||||||
|             ) |  | ||||||
|             .into()); |  | ||||||
|         } |  | ||||||
|  |  | ||||||
|         if let Some(facets) = federated_query.has_facets() { |  | ||||||
|             let facets = facets.to_owned(); |  | ||||||
|             return Err(MeilisearchHttpError::FacetsInFederatedQuery( |  | ||||||
|                 query_index, |  | ||||||
|                 federated_query.index_uid.into_inner(), |  | ||||||
|                 facets, |  | ||||||
|             ) |  | ||||||
|             .into()); |  | ||||||
|         } |  | ||||||
|  |  | ||||||
|         let (index_uid, query, federation_options) = federated_query.into_index_query_federation(); |  | ||||||
|  |  | ||||||
|         queries_by_index.entry(index_uid.into_inner()).or_default().push(QueryByIndex { |  | ||||||
|             query, |  | ||||||
|             federation_options: federation_options.unwrap_or_default(), |  | ||||||
|             query_index, |  | ||||||
|         }) |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     // 2. perform queries, merge and make hits index by index |  | ||||||
|     let required_hit_count = federation.limit + federation.offset; |  | ||||||
|  |  | ||||||
|     // In step (2), semantic_hit_count will be set to Some(0) if any search kind uses semantic |  | ||||||
|     // Then in step (3), we'll update its value if there is any semantic search |  | ||||||
|     let mut semantic_hit_count = None; |  | ||||||
|     let mut results_by_index = Vec::with_capacity(queries_by_index.len()); |  | ||||||
|     let mut previous_query_data: Option<(RankingRules, usize, String)> = None; |  | ||||||
|  |  | ||||||
|     // remember the order and name of first index for each facet when merging with index settings |  | ||||||
|     // to detect if the order is inconsistent for a facet. |  | ||||||
|     let mut facet_order: Option<BTreeMap<String, (String, OrderBy)>> = match federation.merge_facets |  | ||||||
|     { |  | ||||||
|         Some(MergeFacets { .. }) => Some(Default::default()), |  | ||||||
|         _ => None, |  | ||||||
|     }; |  | ||||||
|  |  | ||||||
|     for (index_uid, queries) in queries_by_index { |  | ||||||
|         let first_query_index = queries.first().map(|query| query.query_index); |  | ||||||
|  |  | ||||||
|         let index = match index_scheduler.index(&index_uid) { |  | ||||||
|             Ok(index) => index, |  | ||||||
|             Err(err) => { |  | ||||||
|                 let mut err = ResponseError::from(err); |  | ||||||
|                 // Patch the HTTP status code to 400 as it defaults to 404 for `index_not_found`, but |  | ||||||
|                 // here the resource not found is not part of the URL. |  | ||||||
|                 err.code = StatusCode::BAD_REQUEST; |  | ||||||
|                 if let Some(query_index) = first_query_index { |  | ||||||
|                     err.message = format!("Inside `.queries[{}]`: {}", query_index, err.message); |  | ||||||
|                 } |  | ||||||
|                 return Err(err); |  | ||||||
|             } |  | ||||||
|         }; |  | ||||||
|  |  | ||||||
|         // Important: this is the only transaction we'll use for this index during this federated search |  | ||||||
|         let rtxn = index.read_txn()?; |  | ||||||
|  |  | ||||||
|         let criteria = index.criteria(&rtxn)?; |  | ||||||
|  |  | ||||||
|         let dictionary = index.dictionary(&rtxn)?; |  | ||||||
|         let dictionary: Option<Vec<_>> = |  | ||||||
|             dictionary.as_ref().map(|x| x.iter().map(String::as_str).collect()); |  | ||||||
|         let separators = index.allowed_separators(&rtxn)?; |  | ||||||
|         let separators: Option<Vec<_>> = |  | ||||||
|             separators.as_ref().map(|x| x.iter().map(String::as_str).collect()); |  | ||||||
|  |  | ||||||
|         // each query gets its individual cutoff |  | ||||||
|         let cutoff = index.search_cutoff(&rtxn)?; |  | ||||||
|  |  | ||||||
|         let mut degraded = false; |  | ||||||
|         let mut used_negative_operator = false; |  | ||||||
|         let mut candidates = RoaringBitmap::new(); |  | ||||||
|  |  | ||||||
|         let facets_by_index = federation.facets_by_index.remove(&index_uid).flatten(); |  | ||||||
|  |  | ||||||
|         // TODO: recover the max size + facets_by_index as return value of this function so as not to ask it for all queries |  | ||||||
|         if let Err(mut error) = |  | ||||||
|             check_facet_order(&mut facet_order, &index_uid, &facets_by_index, &index, &rtxn) |  | ||||||
|         { |  | ||||||
|             error.message = format!( |  | ||||||
|                 "Inside `.federation.facetsByIndex.{index_uid}`: {error}{}", |  | ||||||
|                 if let Some(query_index) = first_query_index { |  | ||||||
|                     format!("\n - Note: index `{index_uid}` used in `.queries[{query_index}]`") |  | ||||||
|                 } else { |  | ||||||
|                     Default::default() |  | ||||||
|                 } |  | ||||||
|             ); |  | ||||||
|             return Err(error); |  | ||||||
|         } |  | ||||||
|  |  | ||||||
|         // 2.1. Compute all candidates for each query in the index |  | ||||||
|         let mut results_by_query = Vec::with_capacity(queries.len()); |  | ||||||
|  |  | ||||||
|         for QueryByIndex { query, federation_options, query_index } in queries { |  | ||||||
|             // use an immediately invoked lambda to capture the result without returning from the function |  | ||||||
|  |  | ||||||
|             let res: Result<(), ResponseError> = (|| { |  | ||||||
|                 let search_kind = |  | ||||||
|                     search_kind(&query, index_scheduler, index_uid.to_string(), &index)?; |  | ||||||
|  |  | ||||||
|                 let canonicalization_kind = match (&search_kind, &query.q) { |  | ||||||
|                     (SearchKind::SemanticOnly { .. }, _) => { |  | ||||||
|                         ranking_rules::CanonicalizationKind::Vector |  | ||||||
|                     } |  | ||||||
|                     (_, Some(q)) if !q.is_empty() => ranking_rules::CanonicalizationKind::Keyword, |  | ||||||
|                     _ => ranking_rules::CanonicalizationKind::Placeholder, |  | ||||||
|                 }; |  | ||||||
|  |  | ||||||
|                 let sort = if let Some(sort) = &query.sort { |  | ||||||
|                     let sorts: Vec<_> = |  | ||||||
|                         match sort.iter().map(|s| milli::AscDesc::from_str(s)).collect() { |  | ||||||
|                             Ok(sorts) => sorts, |  | ||||||
|                             Err(asc_desc_error) => { |  | ||||||
|                                 return Err(milli::Error::from(milli::SortError::from( |  | ||||||
|                                     asc_desc_error, |  | ||||||
|                                 )) |  | ||||||
|                                 .into()) |  | ||||||
|                             } |  | ||||||
|                         }; |  | ||||||
|                     Some(sorts) |  | ||||||
|                 } else { |  | ||||||
|                     None |  | ||||||
|                 }; |  | ||||||
|  |  | ||||||
|                 let ranking_rules = ranking_rules::RankingRules::new( |  | ||||||
|                     criteria.clone(), |  | ||||||
|                     sort, |  | ||||||
|                     query.matching_strategy.into(), |  | ||||||
|                     canonicalization_kind, |  | ||||||
|                 ); |  | ||||||
|  |  | ||||||
|                 if let Some((previous_ranking_rules, previous_query_index, previous_index_uid)) = |  | ||||||
|                     previous_query_data.take() |  | ||||||
|                 { |  | ||||||
|                     if let Err(error) = ranking_rules.is_compatible_with(&previous_ranking_rules) { |  | ||||||
|                         return Err(error.to_response_error( |  | ||||||
|                             &ranking_rules, |  | ||||||
|                             &previous_ranking_rules, |  | ||||||
|                             query_index, |  | ||||||
|                             previous_query_index, |  | ||||||
|                             &index_uid, |  | ||||||
|                             &previous_index_uid, |  | ||||||
|                         )); |  | ||||||
|                     } |  | ||||||
|                     previous_query_data = if previous_ranking_rules.constraint_count() |  | ||||||
|                         > ranking_rules.constraint_count() |  | ||||||
|                     { |  | ||||||
|                         Some((previous_ranking_rules, previous_query_index, previous_index_uid)) |  | ||||||
|                     } else { |  | ||||||
|                         Some((ranking_rules, query_index, index_uid.clone())) |  | ||||||
|                     }; |  | ||||||
|                 } else { |  | ||||||
|                     previous_query_data = Some((ranking_rules, query_index, index_uid.clone())); |  | ||||||
|                 } |  | ||||||
|  |  | ||||||
|                 match search_kind { |  | ||||||
|                     SearchKind::KeywordOnly => {} |  | ||||||
|                     _ => semantic_hit_count = Some(0), |  | ||||||
|                 } |  | ||||||
|  |  | ||||||
|                 let retrieve_vectors = RetrieveVectors::new(query.retrieve_vectors); |  | ||||||
|  |  | ||||||
|                 let time_budget = match cutoff { |  | ||||||
|                     Some(cutoff) => TimeBudget::new(Duration::from_millis(cutoff)), |  | ||||||
|                     None => TimeBudget::default(), |  | ||||||
|                 }; |  | ||||||
|  |  | ||||||
|                 let (mut search, _is_finite_pagination, _max_total_hits, _offset) = |  | ||||||
|                     prepare_search(&index, &rtxn, &query, &search_kind, time_budget, features)?; |  | ||||||
|  |  | ||||||
|                 search.scoring_strategy(milli::score_details::ScoringStrategy::Detailed); |  | ||||||
|                 search.offset(0); |  | ||||||
|                 search.limit(required_hit_count); |  | ||||||
|  |  | ||||||
|                 let (result, _semantic_hit_count) = |  | ||||||
|                     super::search_from_kind(index_uid.to_string(), search_kind, search)?; |  | ||||||
|                 let format = AttributesFormat { |  | ||||||
|                     attributes_to_retrieve: query.attributes_to_retrieve, |  | ||||||
|                     retrieve_vectors, |  | ||||||
|                     attributes_to_highlight: query.attributes_to_highlight, |  | ||||||
|                     attributes_to_crop: query.attributes_to_crop, |  | ||||||
|                     crop_length: query.crop_length, |  | ||||||
|                     crop_marker: query.crop_marker, |  | ||||||
|                     highlight_pre_tag: query.highlight_pre_tag, |  | ||||||
|                     highlight_post_tag: query.highlight_post_tag, |  | ||||||
|                     show_matches_position: query.show_matches_position, |  | ||||||
|                     sort: query.sort, |  | ||||||
|                     show_ranking_score: query.show_ranking_score, |  | ||||||
|                     show_ranking_score_details: query.show_ranking_score_details, |  | ||||||
|                     locales: query.locales.map(|l| l.iter().copied().map(Into::into).collect()), |  | ||||||
|                 }; |  | ||||||
|  |  | ||||||
|                 let milli::SearchResult { |  | ||||||
|                     matching_words, |  | ||||||
|                     candidates: query_candidates, |  | ||||||
|                     documents_ids, |  | ||||||
|                     document_scores, |  | ||||||
|                     degraded: query_degraded, |  | ||||||
|                     used_negative_operator: query_used_negative_operator, |  | ||||||
|                 } = result; |  | ||||||
|  |  | ||||||
|                 candidates |= query_candidates; |  | ||||||
|                 degraded |= query_degraded; |  | ||||||
|                 used_negative_operator |= query_used_negative_operator; |  | ||||||
|  |  | ||||||
|                 let tokenizer = HitMaker::tokenizer(dictionary.as_deref(), separators.as_deref()); |  | ||||||
|  |  | ||||||
|                 let formatter_builder = HitMaker::formatter_builder(matching_words, tokenizer); |  | ||||||
|  |  | ||||||
|                 let hit_maker = |  | ||||||
|                     HitMaker::new(&index, &rtxn, format, formatter_builder).map_err(|e| { |  | ||||||
|                         MeilisearchHttpError::from_milli(e, Some(index_uid.to_string())) |  | ||||||
|                     })?; |  | ||||||
|  |  | ||||||
|                 results_by_query.push(SearchResultByQuery { |  | ||||||
|                     federation_options, |  | ||||||
|                     hit_maker, |  | ||||||
|                     query_index, |  | ||||||
|                     documents_ids, |  | ||||||
|                     document_scores, |  | ||||||
|                 }); |  | ||||||
|                 Ok(()) |  | ||||||
|             })(); |  | ||||||
|  |  | ||||||
|             if let Err(mut error) = res { |  | ||||||
|                 error.message = format!("Inside `.queries[{query_index}]`: {}", error.message); |  | ||||||
|                 return Err(error); |  | ||||||
|             } |  | ||||||
|         } |  | ||||||
|         // 2.2. merge inside index |  | ||||||
|         let mut documents_seen = RoaringBitmap::new(); |  | ||||||
|         let merged_result: Result<Vec<_>, ResponseError> = |  | ||||||
|             merge_index_local_results(results_by_query) |  | ||||||
|                 // skip documents we've already seen & mark that we saw the current document |  | ||||||
|                 .filter(|SearchResultByQueryIterItem { docid, .. }| documents_seen.insert(*docid)) |  | ||||||
|                 .take(required_hit_count) |  | ||||||
|                 // 2.3 make hits |  | ||||||
|                 .map( |  | ||||||
|                     |SearchResultByQueryIterItem { |  | ||||||
|                          docid, |  | ||||||
|                          score, |  | ||||||
|                          federation_options, |  | ||||||
|                          hit_maker, |  | ||||||
|                          query_index, |  | ||||||
|                      }| { |  | ||||||
|                         let mut hit = hit_maker.make_hit(docid, &score)?; |  | ||||||
|                         let weighted_score = |  | ||||||
|                             ScoreDetails::global_score(score.iter()) * (*federation_options.weight); |  | ||||||
|  |  | ||||||
|                         let _federation = serde_json::json!( |  | ||||||
|                             { |  | ||||||
|                                 "indexUid": index_uid, |  | ||||||
|                                 "queriesPosition": query_index, |  | ||||||
|                                 "weightedRankingScore": weighted_score, |  | ||||||
|                             } |  | ||||||
|                         ); |  | ||||||
|                         hit.document.insert("_federation".to_string(), _federation); |  | ||||||
|                         Ok(SearchHitByIndex { hit, score, federation_options, query_index }) |  | ||||||
|                     }, |  | ||||||
|                 ) |  | ||||||
|                 .collect(); |  | ||||||
|  |  | ||||||
|         let merged_result = merged_result?; |  | ||||||
|  |  | ||||||
|         let estimated_total_hits = candidates.len() as usize; |  | ||||||
|  |  | ||||||
|         let facets = facets_by_index |  | ||||||
|             .map(|facets_by_index| { |  | ||||||
|                 compute_facet_distribution_stats( |  | ||||||
|                     &facets_by_index, |  | ||||||
|                     &index, |  | ||||||
|                     &rtxn, |  | ||||||
|                     candidates, |  | ||||||
|                     super::Route::MultiSearch, |  | ||||||
|                 ) |  | ||||||
|             }) |  | ||||||
|             .transpose() |  | ||||||
|             .map_err(|mut error| { |  | ||||||
|                 error.message = format!( |  | ||||||
|                     "Inside `.federation.facetsByIndex.{index_uid}`: {}{}", |  | ||||||
|                     error.message, |  | ||||||
|                     if let Some(query_index) = first_query_index { |  | ||||||
|                         format!("\n - Note: index `{index_uid}` used in `.queries[{query_index}]`") |  | ||||||
|                     } else { |  | ||||||
|                         Default::default() |  | ||||||
|                     } |  | ||||||
|                 ); |  | ||||||
|                 error |  | ||||||
|             })?; |  | ||||||
|  |  | ||||||
|         results_by_index.push(SearchResultByIndex { |  | ||||||
|             index: index_uid, |  | ||||||
|             hits: merged_result, |  | ||||||
|             estimated_total_hits, |  | ||||||
|             degraded, |  | ||||||
|             used_negative_operator, |  | ||||||
|             facets, |  | ||||||
|         }); |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     // bonus step, make sure to return an error if an index wants a non-faceted field, even if no query actually uses that index. |  | ||||||
|     for (index_uid, facets) in federation.facets_by_index { |  | ||||||
|         let index = match index_scheduler.index(&index_uid) { |  | ||||||
|             Ok(index) => index, |  | ||||||
|             Err(err) => { |  | ||||||
|                 let mut err = ResponseError::from(err); |  | ||||||
|                 // Patch the HTTP status code to 400 as it defaults to 404 for `index_not_found`, but |  | ||||||
|                 // here the resource not found is not part of the URL. |  | ||||||
|                 err.code = StatusCode::BAD_REQUEST; |  | ||||||
|                 err.message = format!( |  | ||||||
|                     "Inside `.federation.facetsByIndex.{index_uid}`: {}\n - Note: index `{index_uid}` is not used in queries", |  | ||||||
|                     err.message |  | ||||||
|                 ); |  | ||||||
|                 return Err(err); |  | ||||||
|             } |  | ||||||
|         }; |  | ||||||
|  |  | ||||||
|         // Important: this is the only transaction we'll use for this index during this federated search |  | ||||||
|         let rtxn = index.read_txn()?; |  | ||||||
|  |  | ||||||
|         if let Err(mut error) = |  | ||||||
|             check_facet_order(&mut facet_order, &index_uid, &facets, &index, &rtxn) |  | ||||||
|         { |  | ||||||
|             error.message = format!( |  | ||||||
|                 "Inside `.federation.facetsByIndex.{index_uid}`: {error}\n - Note: index `{index_uid}` is not used in queries", |  | ||||||
|             ); |  | ||||||
|             return Err(error); |  | ||||||
|         } |  | ||||||
|  |  | ||||||
|         if let Some(facets) = facets { |  | ||||||
|             if let Err(mut error) = compute_facet_distribution_stats( |  | ||||||
|                 &facets, |  | ||||||
|                 &index, |  | ||||||
|                 &rtxn, |  | ||||||
|                 Default::default(), |  | ||||||
|                 super::Route::MultiSearch, |  | ||||||
|             ) { |  | ||||||
|                 error.message = |  | ||||||
|                     format!("Inside `.federation.facetsByIndex.{index_uid}`: {}\n - Note: index `{index_uid}` is not used in queries", error.message); |  | ||||||
|                 return Err(error); |  | ||||||
|             } |  | ||||||
|         } |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     // 3. merge hits and metadata across indexes |  | ||||||
|     // 3.1 merge metadata |  | ||||||
|     let (estimated_total_hits, degraded, used_negative_operator, facets) = { |  | ||||||
|         let mut estimated_total_hits = 0; |  | ||||||
|         let mut degraded = false; |  | ||||||
|         let mut used_negative_operator = false; |  | ||||||
|  |  | ||||||
|         let mut facets: FederatedFacets = FederatedFacets::default(); |  | ||||||
|  |  | ||||||
|         for SearchResultByIndex { |  | ||||||
|             index, |  | ||||||
|             hits: _, |  | ||||||
|             estimated_total_hits: estimated_total_hits_by_index, |  | ||||||
|             facets: facets_by_index, |  | ||||||
|             degraded: degraded_by_index, |  | ||||||
|             used_negative_operator: used_negative_operator_by_index, |  | ||||||
|         } in &mut results_by_index |  | ||||||
|         { |  | ||||||
|             estimated_total_hits += *estimated_total_hits_by_index; |  | ||||||
|             degraded |= *degraded_by_index; |  | ||||||
|             used_negative_operator |= *used_negative_operator_by_index; |  | ||||||
|  |  | ||||||
|             let facets_by_index = std::mem::take(facets_by_index); |  | ||||||
|             let index = std::mem::take(index); |  | ||||||
|  |  | ||||||
|             facets.insert(index, facets_by_index); |  | ||||||
|         } |  | ||||||
|  |  | ||||||
|         (estimated_total_hits, degraded, used_negative_operator, facets) |  | ||||||
|     }; |  | ||||||
|  |  | ||||||
|     // 3.2 merge hits |  | ||||||
|     let merged_hits: Vec<_> = merge_index_global_results(results_by_index) |  | ||||||
|         .skip(federation.offset) |  | ||||||
|         .take(federation.limit) |  | ||||||
|         .inspect(|hit| { |  | ||||||
|             if let Some(semantic_hit_count) = &mut semantic_hit_count { |  | ||||||
|                 if hit.score.iter().any(|score| matches!(&score, ScoreDetails::Vector(_))) { |  | ||||||
|                     *semantic_hit_count += 1; |  | ||||||
|                 } |  | ||||||
|             } |  | ||||||
|         }) |  | ||||||
|         .map(|hit| hit.hit) |  | ||||||
|         .collect(); |  | ||||||
|  |  | ||||||
|     let (facet_distribution, facet_stats, facets_by_index) = |  | ||||||
|         match federation.merge_facets.zip(facet_order) { |  | ||||||
|             Some((merge_facets, facet_order)) => { |  | ||||||
|                 let facets = facets.merge(merge_facets, facet_order); |  | ||||||
|  |  | ||||||
|                 let (facet_distribution, facet_stats) = facets |  | ||||||
|                     .map(|ComputedFacets { distribution, stats }| (distribution, stats)) |  | ||||||
|                     .unzip(); |  | ||||||
|  |  | ||||||
|                 (facet_distribution, facet_stats, FederatedFacets::default()) |  | ||||||
|             } |  | ||||||
|             None => (None, None, facets), |  | ||||||
|         }; |  | ||||||
|  |  | ||||||
|     let search_result = FederatedSearchResult { |  | ||||||
|         hits: merged_hits, |  | ||||||
|         processing_time_ms: before_search.elapsed().as_millis(), |  | ||||||
|         hits_info: HitsInfo::OffsetLimit { |  | ||||||
|             limit: federation.limit, |  | ||||||
|             offset: federation.offset, |  | ||||||
|             estimated_total_hits, |  | ||||||
|         }, |  | ||||||
|         semantic_hit_count, |  | ||||||
|         degraded, |  | ||||||
|         used_negative_operator, |  | ||||||
|         facet_distribution, |  | ||||||
|         facet_stats, |  | ||||||
|         facets_by_index, |  | ||||||
|     }; |  | ||||||
|  |  | ||||||
|     Ok(search_result) |  | ||||||
| } |  | ||||||
|  |  | ||||||
| fn check_facet_order( |  | ||||||
|     facet_order: &mut Option<BTreeMap<String, (String, OrderBy)>>, |  | ||||||
|     current_index: &str, |  | ||||||
|     facets_by_index: &Option<Vec<String>>, |  | ||||||
|     index: &milli::Index, |  | ||||||
|     rtxn: &milli::heed::RoTxn<'_>, |  | ||||||
| ) -> Result<(), ResponseError> { |  | ||||||
|     if let (Some(facet_order), Some(facets_by_index)) = (facet_order, facets_by_index) { |  | ||||||
|         let index_facet_order = index.sort_facet_values_by(rtxn)?; |  | ||||||
|         for facet in facets_by_index { |  | ||||||
|             let index_facet_order = index_facet_order.get(facet); |  | ||||||
|             let (previous_index, previous_facet_order) = facet_order |  | ||||||
|                 .entry(facet.to_owned()) |  | ||||||
|                 .or_insert_with(|| (current_index.to_owned(), index_facet_order)); |  | ||||||
|             if previous_facet_order != &index_facet_order { |  | ||||||
|                 return Err(MeilisearchHttpError::InconsistentFacetOrder { |  | ||||||
|                     facet: facet.clone(), |  | ||||||
|                     previous_facet_order: *previous_facet_order, |  | ||||||
|                     previous_uid: previous_index.clone(), |  | ||||||
|                     current_uid: current_index.to_owned(), |  | ||||||
|                     index_facet_order, |  | ||||||
|                 } |  | ||||||
|                 .into()); |  | ||||||
|             } |  | ||||||
|         } |  | ||||||
|     }; |  | ||||||
|     Ok(()) |  | ||||||
| } |  | ||||||
							
								
								
									
										10
									
								
								crates/meilisearch/src/search/federated/mod.rs
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										10
									
								
								crates/meilisearch/src/search/federated/mod.rs
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,10 @@ | |||||||
|  | mod perform; | ||||||
|  | mod proxy; | ||||||
|  | mod types; | ||||||
|  | mod weighted_scores; | ||||||
|  |  | ||||||
|  | pub use perform::perform_federated_search; | ||||||
|  | pub use proxy::{PROXY_SEARCH_HEADER, PROXY_SEARCH_HEADER_VALUE}; | ||||||
|  | pub use types::{ | ||||||
|  |     FederatedSearch, FederatedSearchResult, Federation, FederationOptions, MergeFacets, | ||||||
|  | }; | ||||||
							
								
								
									
										1112
									
								
								crates/meilisearch/src/search/federated/perform.rs
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										1112
									
								
								crates/meilisearch/src/search/federated/perform.rs
									
									
									
									
									
										Normal file
									
								
							
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							
							
								
								
									
										267
									
								
								crates/meilisearch/src/search/federated/proxy.rs
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										267
									
								
								crates/meilisearch/src/search/federated/proxy.rs
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,267 @@ | |||||||
|  | pub use error::ProxySearchError; | ||||||
|  | use error::ReqwestErrorWithoutUrl; | ||||||
|  | use meilisearch_types::features::Remote; | ||||||
|  | use rand::Rng as _; | ||||||
|  | use reqwest::{Client, Response, StatusCode}; | ||||||
|  | use serde::de::DeserializeOwned; | ||||||
|  | use serde_json::Value; | ||||||
|  |  | ||||||
|  | use super::types::{FederatedSearch, FederatedSearchResult, Federation}; | ||||||
|  | use crate::search::SearchQueryWithIndex; | ||||||
|  |  | ||||||
|  | pub const PROXY_SEARCH_HEADER: &str = "Meili-Proxy-Search"; | ||||||
|  | pub const PROXY_SEARCH_HEADER_VALUE: &str = "true"; | ||||||
|  |  | ||||||
|  | mod error { | ||||||
|  |     use meilisearch_types::error::ResponseError; | ||||||
|  |     use reqwest::StatusCode; | ||||||
|  |  | ||||||
|  |     #[derive(Debug, thiserror::Error)] | ||||||
|  |     pub enum ProxySearchError { | ||||||
|  |         #[error("{0}")] | ||||||
|  |         CouldNotSendRequest(ReqwestErrorWithoutUrl), | ||||||
|  |         #[error("could not authenticate against the remote host\n  - hint: check that the remote instance was registered with a valid API key having the `search` action")] | ||||||
|  |         AuthenticationError, | ||||||
|  |         #[error( | ||||||
|  |             "could not parse response from the remote host as a federated search response{}\n  - hint: check that the remote instance is a Meilisearch instance running the same version", | ||||||
|  |             response_from_remote(response) | ||||||
|  |         )] | ||||||
|  |         CouldNotParseResponse { response: Result<String, ReqwestErrorWithoutUrl> }, | ||||||
|  |         #[error("remote host responded with code {}{}\n  - hint: check that the remote instance has the correct index configuration for that request\n  - hint: check that the `network` experimental feature is enabled on the remote instance", status_code.as_u16(), response_from_remote(response))] | ||||||
|  |         BadRequest { status_code: StatusCode, response: Result<String, ReqwestErrorWithoutUrl> }, | ||||||
|  |         #[error("remote host did not answer before the deadline")] | ||||||
|  |         Timeout, | ||||||
|  |         #[error("remote hit does not contain `{0}`\n  - hint: check that the remote instance is a Meilisearch instance running the same version")] | ||||||
|  |         MissingPathInResponse(&'static str), | ||||||
|  |         #[error("remote host responded with code {}{}", status_code.as_u16(), response_from_remote(response))] | ||||||
|  |         RemoteError { status_code: StatusCode, response: Result<String, ReqwestErrorWithoutUrl> }, | ||||||
|  |         #[error("remote hit contains an unexpected value at path `{path}`: expected {expected_type}, received `{received_value}`\n  - hint: check that the remote instance is a Meilisearch instance running the same version")] | ||||||
|  |         UnexpectedValueInPath { | ||||||
|  |             path: &'static str, | ||||||
|  |             expected_type: &'static str, | ||||||
|  |             received_value: String, | ||||||
|  |         }, | ||||||
|  |         #[error("could not parse weighted score values in the remote hit: {0}")] | ||||||
|  |         CouldNotParseWeightedScoreValues(serde_json::Error), | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     impl ProxySearchError { | ||||||
|  |         pub fn as_response_error(&self) -> ResponseError { | ||||||
|  |             use meilisearch_types::error::Code; | ||||||
|  |             let message = self.to_string(); | ||||||
|  |             let code = match self { | ||||||
|  |                 ProxySearchError::CouldNotSendRequest(_) => Code::RemoteCouldNotSendRequest, | ||||||
|  |                 ProxySearchError::AuthenticationError => Code::RemoteInvalidApiKey, | ||||||
|  |                 ProxySearchError::BadRequest { .. } => Code::RemoteBadRequest, | ||||||
|  |                 ProxySearchError::Timeout => Code::RemoteTimeout, | ||||||
|  |                 ProxySearchError::RemoteError { .. } => Code::RemoteRemoteError, | ||||||
|  |                 ProxySearchError::CouldNotParseResponse { .. } | ||||||
|  |                 | ProxySearchError::MissingPathInResponse(_) | ||||||
|  |                 | ProxySearchError::UnexpectedValueInPath { .. } | ||||||
|  |                 | ProxySearchError::CouldNotParseWeightedScoreValues(_) => Code::RemoteBadResponse, | ||||||
|  |             }; | ||||||
|  |             ResponseError::from_msg(message, code) | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     #[derive(Debug, thiserror::Error)] | ||||||
|  |     #[error(transparent)] | ||||||
|  |     pub struct ReqwestErrorWithoutUrl(reqwest::Error); | ||||||
|  |     impl ReqwestErrorWithoutUrl { | ||||||
|  |         pub fn new(inner: reqwest::Error) -> Self { | ||||||
|  |             Self(inner.without_url()) | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     fn response_from_remote(response: &Result<String, ReqwestErrorWithoutUrl>) -> String { | ||||||
|  |         match response { | ||||||
|  |             Ok(response) => { | ||||||
|  |                 format!(":\n  - response from remote: {}", response) | ||||||
|  |             } | ||||||
|  |             Err(error) => { | ||||||
|  |                 format!(":\n  - additionally, could not retrieve response from remote: {error}") | ||||||
|  |             } | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  | } | ||||||
|  |  | ||||||
|  | #[derive(Clone)] | ||||||
|  | pub struct ProxySearchParams { | ||||||
|  |     pub deadline: Option<std::time::Instant>, | ||||||
|  |     pub try_count: u32, | ||||||
|  |     pub client: reqwest::Client, | ||||||
|  | } | ||||||
|  |  | ||||||
|  | /// Performs a federated search on a remote host and returns the results | ||||||
|  | pub async fn proxy_search( | ||||||
|  |     node: &Remote, | ||||||
|  |     queries: Vec<SearchQueryWithIndex>, | ||||||
|  |     federation: Federation, | ||||||
|  |     params: &ProxySearchParams, | ||||||
|  | ) -> Result<FederatedSearchResult, ProxySearchError> { | ||||||
|  |     let url = format!("{}/multi-search", node.url); | ||||||
|  |  | ||||||
|  |     let federated = FederatedSearch { queries, federation: Some(federation) }; | ||||||
|  |  | ||||||
|  |     let search_api_key = node.search_api_key.as_deref(); | ||||||
|  |  | ||||||
|  |     let max_deadline = std::time::Instant::now() + std::time::Duration::from_secs(5); | ||||||
|  |  | ||||||
|  |     let deadline = if let Some(deadline) = params.deadline { | ||||||
|  |         std::time::Instant::min(deadline, max_deadline) | ||||||
|  |     } else { | ||||||
|  |         max_deadline | ||||||
|  |     }; | ||||||
|  |  | ||||||
|  |     for i in 0..params.try_count { | ||||||
|  |         match try_proxy_search(&url, search_api_key, &federated, ¶ms.client, deadline).await { | ||||||
|  |             Ok(response) => return Ok(response), | ||||||
|  |             Err(retry) => { | ||||||
|  |                 let duration = retry.into_duration(i)?; | ||||||
|  |                 tokio::time::sleep(duration).await; | ||||||
|  |             } | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  |     try_proxy_search(&url, search_api_key, &federated, ¶ms.client, deadline) | ||||||
|  |         .await | ||||||
|  |         .map_err(Retry::into_error) | ||||||
|  | } | ||||||
|  |  | ||||||
|  | async fn try_proxy_search( | ||||||
|  |     url: &str, | ||||||
|  |     search_api_key: Option<&str>, | ||||||
|  |     federated: &FederatedSearch, | ||||||
|  |     client: &Client, | ||||||
|  |     deadline: std::time::Instant, | ||||||
|  | ) -> Result<FederatedSearchResult, Retry> { | ||||||
|  |     let timeout = deadline.saturating_duration_since(std::time::Instant::now()); | ||||||
|  |  | ||||||
|  |     let request = client.post(url).json(&federated).timeout(timeout); | ||||||
|  |     let request = if let Some(search_api_key) = search_api_key { | ||||||
|  |         request.bearer_auth(search_api_key) | ||||||
|  |     } else { | ||||||
|  |         request | ||||||
|  |     }; | ||||||
|  |     let request = request.header(PROXY_SEARCH_HEADER, PROXY_SEARCH_HEADER_VALUE); | ||||||
|  |  | ||||||
|  |     let response = request.send().await; | ||||||
|  |     let response = match response { | ||||||
|  |         Ok(response) => response, | ||||||
|  |         Err(error) if error.is_timeout() => return Err(Retry::give_up(ProxySearchError::Timeout)), | ||||||
|  |         Err(error) => { | ||||||
|  |             return Err(Retry::retry_later(ProxySearchError::CouldNotSendRequest( | ||||||
|  |                 ReqwestErrorWithoutUrl::new(error), | ||||||
|  |             ))) | ||||||
|  |         } | ||||||
|  |     }; | ||||||
|  |  | ||||||
|  |     match response.status() { | ||||||
|  |         status_code if status_code.is_success() => (), | ||||||
|  |         StatusCode::UNAUTHORIZED | StatusCode::FORBIDDEN => { | ||||||
|  |             return Err(Retry::give_up(ProxySearchError::AuthenticationError)) | ||||||
|  |         } | ||||||
|  |         status_code if status_code.is_client_error() => { | ||||||
|  |             let response = parse_error(response).await; | ||||||
|  |             return Err(Retry::give_up(ProxySearchError::BadRequest { status_code, response })); | ||||||
|  |         } | ||||||
|  |         status_code if status_code.is_server_error() => { | ||||||
|  |             let response = parse_error(response).await; | ||||||
|  |             return Err(Retry::retry_later(ProxySearchError::RemoteError { | ||||||
|  |                 status_code, | ||||||
|  |                 response, | ||||||
|  |             })); | ||||||
|  |         } | ||||||
|  |         status_code => { | ||||||
|  |             tracing::warn!( | ||||||
|  |                 status_code = status_code.as_u16(), | ||||||
|  |                 "remote replied with unexpected status code" | ||||||
|  |             ); | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     let response = match parse_response(response).await { | ||||||
|  |         Ok(response) => response, | ||||||
|  |         Err(response) => { | ||||||
|  |             return Err(Retry::retry_later(ProxySearchError::CouldNotParseResponse { response })) | ||||||
|  |         } | ||||||
|  |     }; | ||||||
|  |  | ||||||
|  |     Ok(response) | ||||||
|  | } | ||||||
|  |  | ||||||
|  | /// Always parse the body of the response of a failed request as JSON. | ||||||
|  | async fn parse_error(response: Response) -> Result<String, ReqwestErrorWithoutUrl> { | ||||||
|  |     let bytes = match response.bytes().await { | ||||||
|  |         Ok(bytes) => bytes, | ||||||
|  |         Err(error) => return Err(ReqwestErrorWithoutUrl::new(error)), | ||||||
|  |     }; | ||||||
|  |  | ||||||
|  |     Ok(parse_bytes_as_error(&bytes)) | ||||||
|  | } | ||||||
|  |  | ||||||
|  | fn parse_bytes_as_error(bytes: &[u8]) -> String { | ||||||
|  |     match serde_json::from_slice::<Value>(bytes) { | ||||||
|  |         Ok(value) => value.to_string(), | ||||||
|  |         Err(_) => String::from_utf8_lossy(bytes).into_owned(), | ||||||
|  |     } | ||||||
|  | } | ||||||
|  |  | ||||||
|  | async fn parse_response<T: DeserializeOwned>( | ||||||
|  |     response: Response, | ||||||
|  | ) -> Result<T, Result<String, ReqwestErrorWithoutUrl>> { | ||||||
|  |     let bytes = match response.bytes().await { | ||||||
|  |         Ok(bytes) => bytes, | ||||||
|  |         Err(error) => return Err(Err(ReqwestErrorWithoutUrl::new(error))), | ||||||
|  |     }; | ||||||
|  |  | ||||||
|  |     match serde_json::from_slice::<T>(&bytes) { | ||||||
|  |         Ok(value) => Ok(value), | ||||||
|  |         Err(_) => Err(Ok(parse_bytes_as_error(&bytes))), | ||||||
|  |     } | ||||||
|  | } | ||||||
|  |  | ||||||
|  | pub struct Retry { | ||||||
|  |     error: ProxySearchError, | ||||||
|  |     strategy: RetryStrategy, | ||||||
|  | } | ||||||
|  |  | ||||||
|  | pub enum RetryStrategy { | ||||||
|  |     GiveUp, | ||||||
|  |     Retry, | ||||||
|  | } | ||||||
|  |  | ||||||
|  | impl Retry { | ||||||
|  |     pub fn give_up(error: ProxySearchError) -> Self { | ||||||
|  |         Self { error, strategy: RetryStrategy::GiveUp } | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     pub fn retry_later(error: ProxySearchError) -> Self { | ||||||
|  |         Self { error, strategy: RetryStrategy::Retry } | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     pub fn into_duration(self, attempt: u32) -> Result<std::time::Duration, ProxySearchError> { | ||||||
|  |         match self.strategy { | ||||||
|  |             RetryStrategy::GiveUp => Err(self.error), | ||||||
|  |             RetryStrategy::Retry => { | ||||||
|  |                 let retry_duration = std::time::Duration::from_nanos((10u64).pow(attempt)); | ||||||
|  |                 let retry_duration = retry_duration.min(std::time::Duration::from_millis(100)); // don't wait more than 100ms | ||||||
|  |  | ||||||
|  |                 // randomly up to double the retry duration | ||||||
|  |                 let retry_duration = retry_duration | ||||||
|  |                     + rand::thread_rng().gen_range(std::time::Duration::ZERO..retry_duration); | ||||||
|  |  | ||||||
|  |                 tracing::warn!( | ||||||
|  |                     "Attempt #{}, failed with {}, retrying after {}ms.", | ||||||
|  |                     attempt, | ||||||
|  |                     self.error, | ||||||
|  |                     retry_duration.as_millis() | ||||||
|  |                 ); | ||||||
|  |                 Ok(retry_duration) | ||||||
|  |             } | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     pub fn into_error(self) -> ProxySearchError { | ||||||
|  |         self.error | ||||||
|  |     } | ||||||
|  | } | ||||||
							
								
								
									
										322
									
								
								crates/meilisearch/src/search/federated/types.rs
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										322
									
								
								crates/meilisearch/src/search/federated/types.rs
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,322 @@ | |||||||
|  | use std::collections::btree_map::Entry; | ||||||
|  | use std::collections::BTreeMap; | ||||||
|  | use std::fmt; | ||||||
|  | use std::vec::Vec; | ||||||
|  |  | ||||||
|  | use indexmap::IndexMap; | ||||||
|  | use meilisearch_types::deserr::DeserrJsonError; | ||||||
|  | use meilisearch_types::error::deserr_codes::{ | ||||||
|  |     InvalidMultiSearchFacetsByIndex, InvalidMultiSearchMaxValuesPerFacet, | ||||||
|  |     InvalidMultiSearchMergeFacets, InvalidMultiSearchQueryPosition, InvalidMultiSearchRemote, | ||||||
|  |     InvalidMultiSearchWeight, InvalidSearchLimit, InvalidSearchOffset, | ||||||
|  | }; | ||||||
|  | use meilisearch_types::error::ResponseError; | ||||||
|  | use meilisearch_types::index_uid::IndexUid; | ||||||
|  | use meilisearch_types::milli::order_by_map::OrderByMap; | ||||||
|  | use meilisearch_types::milli::OrderBy; | ||||||
|  | use serde::{Deserialize, Serialize}; | ||||||
|  | use utoipa::ToSchema; | ||||||
|  |  | ||||||
|  | use super::super::{ComputedFacets, FacetStats, HitsInfo, SearchHit, SearchQueryWithIndex}; | ||||||
|  |  | ||||||
|  | pub const DEFAULT_FEDERATED_WEIGHT: f64 = 1.0; | ||||||
|  |  | ||||||
|  | // fields in the response | ||||||
|  | pub const FEDERATION_HIT: &str = "_federation"; | ||||||
|  | pub const INDEX_UID: &str = "indexUid"; | ||||||
|  | pub const QUERIES_POSITION: &str = "queriesPosition"; | ||||||
|  | pub const WEIGHTED_RANKING_SCORE: &str = "weightedRankingScore"; | ||||||
|  | pub const WEIGHTED_SCORE_VALUES: &str = "weightedScoreValues"; | ||||||
|  | pub const FEDERATION_REMOTE: &str = "remote"; | ||||||
|  |  | ||||||
|  | #[derive(Debug, Default, Clone, PartialEq, Serialize, deserr::Deserr, ToSchema)] | ||||||
|  | #[deserr(error = DeserrJsonError, rename_all = camelCase, deny_unknown_fields)] | ||||||
|  | #[serde(rename_all = "camelCase")] | ||||||
|  |  | ||||||
|  | pub struct FederationOptions { | ||||||
|  |     #[deserr(default, error = DeserrJsonError<InvalidMultiSearchWeight>)] | ||||||
|  |     #[schema(value_type = f64)] | ||||||
|  |     pub weight: Weight, | ||||||
|  |  | ||||||
|  |     #[deserr(default, error = DeserrJsonError<InvalidMultiSearchRemote>)] | ||||||
|  |     pub remote: Option<String>, | ||||||
|  |  | ||||||
|  |     #[deserr(default, error = DeserrJsonError<InvalidMultiSearchQueryPosition>)] | ||||||
|  |     pub query_position: Option<usize>, | ||||||
|  | } | ||||||
|  |  | ||||||
|  | #[derive(Debug, Clone, Copy, PartialEq, Serialize, deserr::Deserr)] | ||||||
|  | #[deserr(try_from(f64) = TryFrom::try_from -> InvalidMultiSearchWeight)] | ||||||
|  | pub struct Weight(f64); | ||||||
|  |  | ||||||
|  | impl Default for Weight { | ||||||
|  |     fn default() -> Self { | ||||||
|  |         Weight(DEFAULT_FEDERATED_WEIGHT) | ||||||
|  |     } | ||||||
|  | } | ||||||
|  |  | ||||||
|  | impl std::convert::TryFrom<f64> for Weight { | ||||||
|  |     type Error = InvalidMultiSearchWeight; | ||||||
|  |  | ||||||
|  |     fn try_from(f: f64) -> Result<Self, Self::Error> { | ||||||
|  |         if f < 0.0 { | ||||||
|  |             Err(InvalidMultiSearchWeight) | ||||||
|  |         } else { | ||||||
|  |             Ok(Weight(f)) | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  | } | ||||||
|  |  | ||||||
|  | impl std::ops::Deref for Weight { | ||||||
|  |     type Target = f64; | ||||||
|  |  | ||||||
|  |     fn deref(&self) -> &Self::Target { | ||||||
|  |         &self.0 | ||||||
|  |     } | ||||||
|  | } | ||||||
|  |  | ||||||
|  | #[derive(Debug, Clone, deserr::Deserr, Serialize, ToSchema)] | ||||||
|  | #[deserr(error = DeserrJsonError, rename_all = camelCase, deny_unknown_fields)] | ||||||
|  | #[schema(rename_all = "camelCase")] | ||||||
|  | #[serde(rename_all = "camelCase")] | ||||||
|  | pub struct Federation { | ||||||
|  |     #[deserr(default = super::super::DEFAULT_SEARCH_LIMIT(), error = DeserrJsonError<InvalidSearchLimit>)] | ||||||
|  |     pub limit: usize, | ||||||
|  |     #[deserr(default = super::super::DEFAULT_SEARCH_OFFSET(), error = DeserrJsonError<InvalidSearchOffset>)] | ||||||
|  |     pub offset: usize, | ||||||
|  |     #[deserr(default, error = DeserrJsonError<InvalidMultiSearchFacetsByIndex>)] | ||||||
|  |     pub facets_by_index: BTreeMap<IndexUid, Option<Vec<String>>>, | ||||||
|  |     #[deserr(default, error = DeserrJsonError<InvalidMultiSearchMergeFacets>)] | ||||||
|  |     pub merge_facets: Option<MergeFacets>, | ||||||
|  | } | ||||||
|  |  | ||||||
|  | #[derive(Copy, Clone, Debug, deserr::Deserr, Serialize, Default, ToSchema)] | ||||||
|  | #[deserr(error = DeserrJsonError<InvalidMultiSearchMergeFacets>, rename_all = camelCase, deny_unknown_fields)] | ||||||
|  | #[schema(rename_all = "camelCase")] | ||||||
|  | #[serde(rename_all = "camelCase")] | ||||||
|  | pub struct MergeFacets { | ||||||
|  |     #[deserr(default, error = DeserrJsonError<InvalidMultiSearchMaxValuesPerFacet>)] | ||||||
|  |     pub max_values_per_facet: Option<usize>, | ||||||
|  | } | ||||||
|  |  | ||||||
|  | #[derive(Debug, deserr::Deserr, Serialize, ToSchema)] | ||||||
|  | #[deserr(error = DeserrJsonError, rename_all = camelCase, deny_unknown_fields)] | ||||||
|  | #[schema(rename_all = "camelCase")] | ||||||
|  | #[serde(rename_all = "camelCase")] | ||||||
|  | pub struct FederatedSearch { | ||||||
|  |     pub queries: Vec<SearchQueryWithIndex>, | ||||||
|  |     #[deserr(default)] | ||||||
|  |     pub federation: Option<Federation>, | ||||||
|  | } | ||||||
|  |  | ||||||
|  | #[derive(Serialize, Deserialize, Clone, ToSchema)] | ||||||
|  | #[serde(rename_all = "camelCase")] | ||||||
|  | #[schema(rename_all = "camelCase")] | ||||||
|  | pub struct FederatedSearchResult { | ||||||
|  |     pub hits: Vec<SearchHit>, | ||||||
|  |     pub processing_time_ms: u128, | ||||||
|  |     #[serde(flatten)] | ||||||
|  |     pub hits_info: HitsInfo, | ||||||
|  |  | ||||||
|  |     #[serde(default, skip_serializing_if = "Option::is_none")] | ||||||
|  |     pub semantic_hit_count: Option<u32>, | ||||||
|  |  | ||||||
|  |     #[serde(default, skip_serializing_if = "Option::is_none")] | ||||||
|  |     #[schema(value_type = Option<BTreeMap<String, BTreeMap<String, u64>>>)] | ||||||
|  |     pub facet_distribution: Option<BTreeMap<String, IndexMap<String, u64>>>, | ||||||
|  |     #[serde(default, skip_serializing_if = "Option::is_none")] | ||||||
|  |     pub facet_stats: Option<BTreeMap<String, FacetStats>>, | ||||||
|  |     #[serde(default, skip_serializing_if = "FederatedFacets::is_empty")] | ||||||
|  |     pub facets_by_index: FederatedFacets, | ||||||
|  |  | ||||||
|  |     #[serde(default, skip_serializing_if = "Option::is_none")] | ||||||
|  |     pub remote_errors: Option<BTreeMap<String, ResponseError>>, | ||||||
|  |  | ||||||
|  |     // These fields are only used for analytics purposes | ||||||
|  |     #[serde(skip)] | ||||||
|  |     pub degraded: bool, | ||||||
|  |     #[serde(skip)] | ||||||
|  |     pub used_negative_operator: bool, | ||||||
|  | } | ||||||
|  |  | ||||||
|  | impl fmt::Debug for FederatedSearchResult { | ||||||
|  |     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { | ||||||
|  |         let FederatedSearchResult { | ||||||
|  |             hits, | ||||||
|  |             processing_time_ms, | ||||||
|  |             hits_info, | ||||||
|  |             semantic_hit_count, | ||||||
|  |             degraded, | ||||||
|  |             used_negative_operator, | ||||||
|  |             facet_distribution, | ||||||
|  |             facet_stats, | ||||||
|  |             facets_by_index, | ||||||
|  |             remote_errors, | ||||||
|  |         } = self; | ||||||
|  |  | ||||||
|  |         let mut debug = f.debug_struct("SearchResult"); | ||||||
|  |         // The most important thing when looking at a search result is the time it took to process | ||||||
|  |         debug.field("processing_time_ms", &processing_time_ms); | ||||||
|  |         debug.field("hits", &format!("[{} hits returned]", hits.len())); | ||||||
|  |         debug.field("hits_info", &hits_info); | ||||||
|  |         if *used_negative_operator { | ||||||
|  |             debug.field("used_negative_operator", used_negative_operator); | ||||||
|  |         } | ||||||
|  |         if *degraded { | ||||||
|  |             debug.field("degraded", degraded); | ||||||
|  |         } | ||||||
|  |         if let Some(facet_distribution) = facet_distribution { | ||||||
|  |             debug.field("facet_distribution", &facet_distribution); | ||||||
|  |         } | ||||||
|  |         if let Some(facet_stats) = facet_stats { | ||||||
|  |             debug.field("facet_stats", &facet_stats); | ||||||
|  |         } | ||||||
|  |         if let Some(semantic_hit_count) = semantic_hit_count { | ||||||
|  |             debug.field("semantic_hit_count", &semantic_hit_count); | ||||||
|  |         } | ||||||
|  |         if !facets_by_index.is_empty() { | ||||||
|  |             debug.field("facets_by_index", &facets_by_index); | ||||||
|  |         } | ||||||
|  |         if let Some(remote_errors) = remote_errors { | ||||||
|  |             debug.field("remote_errors", &remote_errors); | ||||||
|  |         } | ||||||
|  |  | ||||||
|  |         debug.finish() | ||||||
|  |     } | ||||||
|  | } | ||||||
|  |  | ||||||
|  | #[derive(Debug, Clone, Default, Serialize, Deserialize, ToSchema)] | ||||||
|  | pub struct FederatedFacets(pub BTreeMap<String, ComputedFacets>); | ||||||
|  |  | ||||||
|  | impl FederatedFacets { | ||||||
|  |     pub fn insert(&mut self, index: String, facets: Option<ComputedFacets>) { | ||||||
|  |         if let Some(facets) = facets { | ||||||
|  |             self.0.insert(index, facets); | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     pub fn is_empty(&self) -> bool { | ||||||
|  |         self.0.is_empty() | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     pub fn merge( | ||||||
|  |         self, | ||||||
|  |         MergeFacets { max_values_per_facet }: MergeFacets, | ||||||
|  |         facet_order: BTreeMap<String, (String, OrderBy)>, | ||||||
|  |     ) -> Option<ComputedFacets> { | ||||||
|  |         if self.is_empty() { | ||||||
|  |             return None; | ||||||
|  |         } | ||||||
|  |  | ||||||
|  |         let mut distribution: BTreeMap<String, _> = Default::default(); | ||||||
|  |         let mut stats: BTreeMap<String, FacetStats> = Default::default(); | ||||||
|  |  | ||||||
|  |         for facets_by_index in self.0.into_values() { | ||||||
|  |             for (facet, index_distribution) in facets_by_index.distribution { | ||||||
|  |                 match distribution.entry(facet) { | ||||||
|  |                     Entry::Vacant(entry) => { | ||||||
|  |                         entry.insert(index_distribution); | ||||||
|  |                     } | ||||||
|  |                     Entry::Occupied(mut entry) => { | ||||||
|  |                         let distribution = entry.get_mut(); | ||||||
|  |  | ||||||
|  |                         for (value, index_count) in index_distribution { | ||||||
|  |                             distribution | ||||||
|  |                                 .entry(value) | ||||||
|  |                                 .and_modify(|count| *count += index_count) | ||||||
|  |                                 .or_insert(index_count); | ||||||
|  |                         } | ||||||
|  |                     } | ||||||
|  |                 } | ||||||
|  |             } | ||||||
|  |  | ||||||
|  |             for (facet, index_stats) in facets_by_index.stats { | ||||||
|  |                 match stats.entry(facet) { | ||||||
|  |                     Entry::Vacant(entry) => { | ||||||
|  |                         entry.insert(index_stats); | ||||||
|  |                     } | ||||||
|  |                     Entry::Occupied(mut entry) => { | ||||||
|  |                         let stats = entry.get_mut(); | ||||||
|  |  | ||||||
|  |                         stats.min = f64::min(stats.min, index_stats.min); | ||||||
|  |                         stats.max = f64::max(stats.max, index_stats.max); | ||||||
|  |                     } | ||||||
|  |                 } | ||||||
|  |             } | ||||||
|  |         } | ||||||
|  |  | ||||||
|  |         // fixup order | ||||||
|  |         for (facet, values) in &mut distribution { | ||||||
|  |             let order_by = facet_order.get(facet).map(|(_, order)| *order).unwrap_or_default(); | ||||||
|  |  | ||||||
|  |             match order_by { | ||||||
|  |                 OrderBy::Lexicographic => { | ||||||
|  |                     values.sort_unstable_by(|left, _, right, _| left.cmp(right)) | ||||||
|  |                 } | ||||||
|  |                 OrderBy::Count => { | ||||||
|  |                     values.sort_unstable_by(|_, left, _, right| { | ||||||
|  |                         left.cmp(right) | ||||||
|  |                             // biggest first | ||||||
|  |                             .reverse() | ||||||
|  |                     }) | ||||||
|  |                 } | ||||||
|  |             } | ||||||
|  |  | ||||||
|  |             if let Some(max_values_per_facet) = max_values_per_facet { | ||||||
|  |                 values.truncate(max_values_per_facet) | ||||||
|  |             }; | ||||||
|  |         } | ||||||
|  |  | ||||||
|  |         Some(ComputedFacets { distribution, stats }) | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     pub(crate) fn append(&mut self, FederatedFacets(remote_facets_by_index): FederatedFacets) { | ||||||
|  |         for (index, remote_facets) in remote_facets_by_index { | ||||||
|  |             let merged_facets = self.0.entry(index).or_default(); | ||||||
|  |  | ||||||
|  |             for (remote_facet, remote_stats) in remote_facets.stats { | ||||||
|  |                 match merged_facets.stats.entry(remote_facet) { | ||||||
|  |                     Entry::Vacant(vacant_entry) => { | ||||||
|  |                         vacant_entry.insert(remote_stats); | ||||||
|  |                     } | ||||||
|  |                     Entry::Occupied(mut occupied_entry) => { | ||||||
|  |                         let stats = occupied_entry.get_mut(); | ||||||
|  |                         stats.min = f64::min(stats.min, remote_stats.min); | ||||||
|  |                         stats.max = f64::max(stats.max, remote_stats.max); | ||||||
|  |                     } | ||||||
|  |                 } | ||||||
|  |             } | ||||||
|  |  | ||||||
|  |             for (remote_facet, remote_values) in remote_facets.distribution { | ||||||
|  |                 let merged_facet = merged_facets.distribution.entry(remote_facet).or_default(); | ||||||
|  |                 for (remote_value, remote_count) in remote_values { | ||||||
|  |                     let count = merged_facet.entry(remote_value).or_default(); | ||||||
|  |                     *count += remote_count; | ||||||
|  |                 } | ||||||
|  |             } | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     pub fn sort_and_truncate(&mut self, facet_order: BTreeMap<String, (OrderByMap, usize)>) { | ||||||
|  |         for (index, facets) in &mut self.0 { | ||||||
|  |             let Some((order_by, max_values_per_facet)) = facet_order.get(index) else { | ||||||
|  |                 continue; | ||||||
|  |             }; | ||||||
|  |             for (facet, values) in &mut facets.distribution { | ||||||
|  |                 match order_by.get(facet) { | ||||||
|  |                     OrderBy::Lexicographic => { | ||||||
|  |                         values.sort_unstable_by(|left, _, right, _| left.cmp(right)) | ||||||
|  |                     } | ||||||
|  |                     OrderBy::Count => { | ||||||
|  |                         values.sort_unstable_by(|_, left, _, right| { | ||||||
|  |                             left.cmp(right) | ||||||
|  |                                 // biggest first | ||||||
|  |                                 .reverse() | ||||||
|  |                         }) | ||||||
|  |                     } | ||||||
|  |                 } | ||||||
|  |                 values.truncate(*max_values_per_facet); | ||||||
|  |             } | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  | } | ||||||
							
								
								
									
										88
									
								
								crates/meilisearch/src/search/federated/weighted_scores.rs
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										88
									
								
								crates/meilisearch/src/search/federated/weighted_scores.rs
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,88 @@ | |||||||
|  | use std::cmp::Ordering; | ||||||
|  |  | ||||||
|  | use meilisearch_types::milli::score_details::{self, WeightedScoreValue}; | ||||||
|  |  | ||||||
|  | pub fn compare( | ||||||
|  |     mut left_it: impl Iterator<Item = WeightedScoreValue>, | ||||||
|  |     left_weighted_global_score: f64, | ||||||
|  |     mut right_it: impl Iterator<Item = WeightedScoreValue>, | ||||||
|  |     right_weighted_global_score: f64, | ||||||
|  | ) -> Ordering { | ||||||
|  |     loop { | ||||||
|  |         let left = left_it.next(); | ||||||
|  |         let right = right_it.next(); | ||||||
|  |  | ||||||
|  |         match (left, right) { | ||||||
|  |             (None, None) => return Ordering::Equal, | ||||||
|  |             (None, Some(_)) => return Ordering::Less, | ||||||
|  |             (Some(_), None) => return Ordering::Greater, | ||||||
|  |             ( | ||||||
|  |                 Some( | ||||||
|  |                     WeightedScoreValue::WeightedScore(left) | WeightedScoreValue::VectorSort(left), | ||||||
|  |                 ), | ||||||
|  |                 Some( | ||||||
|  |                     WeightedScoreValue::WeightedScore(right) | ||||||
|  |                     | WeightedScoreValue::VectorSort(right), | ||||||
|  |                 ), | ||||||
|  |             ) => { | ||||||
|  |                 if (left - right).abs() <= f64::EPSILON { | ||||||
|  |                     continue; | ||||||
|  |                 } | ||||||
|  |                 return left.partial_cmp(&right).unwrap(); | ||||||
|  |             } | ||||||
|  |             ( | ||||||
|  |                 Some(WeightedScoreValue::Sort { asc: left_asc, value: left }), | ||||||
|  |                 Some(WeightedScoreValue::Sort { asc: right_asc, value: right }), | ||||||
|  |             ) => { | ||||||
|  |                 if left_asc != right_asc { | ||||||
|  |                     return left_weighted_global_score | ||||||
|  |                         .partial_cmp(&right_weighted_global_score) | ||||||
|  |                         .unwrap(); | ||||||
|  |                 } | ||||||
|  |                 match score_details::compare_sort_values(left_asc, &left, &right) { | ||||||
|  |                     Ordering::Equal => continue, | ||||||
|  |                     order => return order, | ||||||
|  |                 } | ||||||
|  |             } | ||||||
|  |             ( | ||||||
|  |                 Some(WeightedScoreValue::GeoSort { asc: left_asc, distance: left }), | ||||||
|  |                 Some(WeightedScoreValue::GeoSort { asc: right_asc, distance: right }), | ||||||
|  |             ) => { | ||||||
|  |                 if left_asc != right_asc { | ||||||
|  |                     continue; | ||||||
|  |                 } | ||||||
|  |                 match (left, right) { | ||||||
|  |                     (None, None) => continue, | ||||||
|  |                     (None, Some(_)) => return Ordering::Less, | ||||||
|  |                     (Some(_), None) => return Ordering::Greater, | ||||||
|  |                     (Some(left), Some(right)) => { | ||||||
|  |                         if (left - right).abs() <= f64::EPSILON { | ||||||
|  |                             continue; | ||||||
|  |                         } | ||||||
|  |                         return left.partial_cmp(&right).unwrap(); | ||||||
|  |                     } | ||||||
|  |                 } | ||||||
|  |             } | ||||||
|  |             // not comparable details, use global | ||||||
|  |             (Some(WeightedScoreValue::WeightedScore(_)), Some(_)) | ||||||
|  |             | (Some(_), Some(WeightedScoreValue::WeightedScore(_))) | ||||||
|  |             | (Some(WeightedScoreValue::VectorSort(_)), Some(_)) | ||||||
|  |             | (Some(_), Some(WeightedScoreValue::VectorSort(_))) | ||||||
|  |             | (Some(WeightedScoreValue::GeoSort { .. }), Some(WeightedScoreValue::Sort { .. })) | ||||||
|  |             | (Some(WeightedScoreValue::Sort { .. }), Some(WeightedScoreValue::GeoSort { .. })) => { | ||||||
|  |                 let left_count = left_it.count(); | ||||||
|  |                 let right_count = right_it.count(); | ||||||
|  |                 // compare how many remaining groups of rules each side has. | ||||||
|  |                 // the group with the most remaining groups wins. | ||||||
|  |                 return left_count | ||||||
|  |                     .cmp(&right_count) | ||||||
|  |                     // breaks ties with the global ranking score | ||||||
|  |                     .then_with(|| { | ||||||
|  |                         left_weighted_global_score | ||||||
|  |                             .partial_cmp(&right_weighted_global_score) | ||||||
|  |                             .unwrap() | ||||||
|  |                     }); | ||||||
|  |             } | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  | } | ||||||
| @@ -30,7 +30,7 @@ use milli::{ | |||||||
|     MatchBounds, MatcherBuilder, SortError, TermsMatchingStrategy, DEFAULT_VALUES_PER_FACET, |     MatchBounds, MatcherBuilder, SortError, TermsMatchingStrategy, DEFAULT_VALUES_PER_FACET, | ||||||
| }; | }; | ||||||
| use regex::Regex; | use regex::Regex; | ||||||
| use serde::Serialize; | use serde::{Deserialize, Serialize}; | ||||||
| use serde_json::{json, Value}; | use serde_json::{json, Value}; | ||||||
| #[cfg(test)] | #[cfg(test)] | ||||||
| mod mod_test; | mod mod_test; | ||||||
| @@ -41,7 +41,7 @@ use crate::error::MeilisearchHttpError; | |||||||
| mod federated; | mod federated; | ||||||
| pub use federated::{ | pub use federated::{ | ||||||
|     perform_federated_search, FederatedSearch, FederatedSearchResult, Federation, |     perform_federated_search, FederatedSearch, FederatedSearchResult, Federation, | ||||||
|     FederationOptions, MergeFacets, |     FederationOptions, MergeFacets, PROXY_SEARCH_HEADER, PROXY_SEARCH_HEADER_VALUE, | ||||||
| }; | }; | ||||||
|  |  | ||||||
| mod ranking_rules; | mod ranking_rules; | ||||||
| @@ -119,7 +119,7 @@ pub struct SearchQuery { | |||||||
|     pub locales: Option<Vec<Locale>>, |     pub locales: Option<Vec<Locale>>, | ||||||
| } | } | ||||||
|  |  | ||||||
| #[derive(Debug, Clone, Copy, PartialEq, Deserr, ToSchema)] | #[derive(Debug, Clone, Copy, PartialEq, Deserr, ToSchema, Serialize)] | ||||||
| #[deserr(try_from(f64) = TryFrom::try_from -> InvalidSearchRankingScoreThreshold)] | #[deserr(try_from(f64) = TryFrom::try_from -> InvalidSearchRankingScoreThreshold)] | ||||||
| pub struct RankingScoreThreshold(f64); | pub struct RankingScoreThreshold(f64); | ||||||
| impl std::convert::TryFrom<f64> for RankingScoreThreshold { | impl std::convert::TryFrom<f64> for RankingScoreThreshold { | ||||||
| @@ -275,11 +275,13 @@ impl fmt::Debug for SearchQuery { | |||||||
|     } |     } | ||||||
| } | } | ||||||
|  |  | ||||||
| #[derive(Debug, Clone, Default, PartialEq, Deserr, ToSchema)] | #[derive(Debug, Clone, Default, PartialEq, Deserr, ToSchema, Serialize)] | ||||||
| #[deserr(error = DeserrJsonError<InvalidSearchHybridQuery>, rename_all = camelCase, deny_unknown_fields)] | #[deserr(error = DeserrJsonError<InvalidSearchHybridQuery>, rename_all = camelCase, deny_unknown_fields)] | ||||||
|  | #[serde(rename_all = "camelCase")] | ||||||
| pub struct HybridQuery { | pub struct HybridQuery { | ||||||
|     #[deserr(default, error = DeserrJsonError<InvalidSearchSemanticRatio>, default)] |     #[deserr(default, error = DeserrJsonError<InvalidSearchSemanticRatio>, default)] | ||||||
|     #[schema(value_type = f32, default)] |     #[schema(value_type = f32, default)] | ||||||
|  |     #[serde(default)] | ||||||
|     pub semantic_ratio: SemanticRatio, |     pub semantic_ratio: SemanticRatio, | ||||||
|     #[deserr(error = DeserrJsonError<InvalidSearchEmbedder>)] |     #[deserr(error = DeserrJsonError<InvalidSearchEmbedder>)] | ||||||
|     pub embedder: String, |     pub embedder: String, | ||||||
| @@ -369,7 +371,7 @@ impl SearchKind { | |||||||
|     } |     } | ||||||
| } | } | ||||||
|  |  | ||||||
| #[derive(Debug, Clone, Copy, PartialEq, Deserr)] | #[derive(Debug, Clone, Copy, PartialEq, Deserr, Serialize)] | ||||||
| #[deserr(try_from(f32) = TryFrom::try_from -> InvalidSearchSemanticRatio)] | #[deserr(try_from(f32) = TryFrom::try_from -> InvalidSearchSemanticRatio)] | ||||||
| pub struct SemanticRatio(f32); | pub struct SemanticRatio(f32); | ||||||
|  |  | ||||||
| @@ -411,8 +413,9 @@ impl SearchQuery { | |||||||
| // This struct contains the fields of `SearchQuery` inline. | // This struct contains the fields of `SearchQuery` inline. | ||||||
| // This is because neither deserr nor serde support `flatten` when using `deny_unknown_fields. | // This is because neither deserr nor serde support `flatten` when using `deny_unknown_fields. | ||||||
| // The `From<SearchQueryWithIndex>` implementation ensures both structs remain up to date. | // The `From<SearchQueryWithIndex>` implementation ensures both structs remain up to date. | ||||||
| #[derive(Debug, Clone, PartialEq, Deserr, ToSchema)] | #[derive(Debug, Clone, Serialize, PartialEq, Deserr, ToSchema)] | ||||||
| #[deserr(error = DeserrJsonError, rename_all = camelCase, deny_unknown_fields)] | #[deserr(error = DeserrJsonError, rename_all = camelCase, deny_unknown_fields)] | ||||||
|  | #[serde(rename_all = "camelCase")] | ||||||
| #[schema(rename_all = "camelCase")] | #[schema(rename_all = "camelCase")] | ||||||
| pub struct SearchQueryWithIndex { | pub struct SearchQueryWithIndex { | ||||||
|     #[deserr(error = DeserrJsonError<InvalidIndexUid>, missing_field_error = DeserrJsonError::missing_index_uid)] |     #[deserr(error = DeserrJsonError<InvalidIndexUid>, missing_field_error = DeserrJsonError::missing_index_uid)] | ||||||
| @@ -493,6 +496,72 @@ impl SearchQueryWithIndex { | |||||||
|         self.facets.as_deref().filter(|v| !v.is_empty()) |         self.facets.as_deref().filter(|v| !v.is_empty()) | ||||||
|     } |     } | ||||||
|  |  | ||||||
|  |     pub fn from_index_query_federation( | ||||||
|  |         index_uid: IndexUid, | ||||||
|  |         query: SearchQuery, | ||||||
|  |         federation_options: Option<FederationOptions>, | ||||||
|  |     ) -> Self { | ||||||
|  |         let SearchQuery { | ||||||
|  |             q, | ||||||
|  |             vector, | ||||||
|  |             hybrid, | ||||||
|  |             offset, | ||||||
|  |             limit, | ||||||
|  |             page, | ||||||
|  |             hits_per_page, | ||||||
|  |             attributes_to_retrieve, | ||||||
|  |             retrieve_vectors, | ||||||
|  |             attributes_to_crop, | ||||||
|  |             crop_length, | ||||||
|  |             attributes_to_highlight, | ||||||
|  |             show_matches_position, | ||||||
|  |             show_ranking_score, | ||||||
|  |             show_ranking_score_details, | ||||||
|  |             filter, | ||||||
|  |             sort, | ||||||
|  |             distinct, | ||||||
|  |             facets, | ||||||
|  |             highlight_pre_tag, | ||||||
|  |             highlight_post_tag, | ||||||
|  |             crop_marker, | ||||||
|  |             matching_strategy, | ||||||
|  |             attributes_to_search_on, | ||||||
|  |             ranking_score_threshold, | ||||||
|  |             locales, | ||||||
|  |         } = query; | ||||||
|  |  | ||||||
|  |         SearchQueryWithIndex { | ||||||
|  |             index_uid, | ||||||
|  |             q, | ||||||
|  |             vector, | ||||||
|  |             hybrid, | ||||||
|  |             offset: if offset == DEFAULT_SEARCH_OFFSET() { None } else { Some(offset) }, | ||||||
|  |             limit: if limit == DEFAULT_SEARCH_LIMIT() { None } else { Some(limit) }, | ||||||
|  |             page, | ||||||
|  |             hits_per_page, | ||||||
|  |             attributes_to_retrieve, | ||||||
|  |             retrieve_vectors, | ||||||
|  |             attributes_to_crop, | ||||||
|  |             crop_length, | ||||||
|  |             attributes_to_highlight, | ||||||
|  |             show_ranking_score, | ||||||
|  |             show_ranking_score_details, | ||||||
|  |             show_matches_position, | ||||||
|  |             filter, | ||||||
|  |             sort, | ||||||
|  |             distinct, | ||||||
|  |             facets, | ||||||
|  |             highlight_pre_tag, | ||||||
|  |             highlight_post_tag, | ||||||
|  |             crop_marker, | ||||||
|  |             matching_strategy, | ||||||
|  |             attributes_to_search_on, | ||||||
|  |             ranking_score_threshold, | ||||||
|  |             locales, | ||||||
|  |             federation_options, | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  |  | ||||||
|     pub fn into_index_query_federation(self) -> (IndexUid, SearchQuery, Option<FederationOptions>) { |     pub fn into_index_query_federation(self) -> (IndexUid, SearchQuery, Option<FederationOptions>) { | ||||||
|         let SearchQueryWithIndex { |         let SearchQueryWithIndex { | ||||||
|             index_uid, |             index_uid, | ||||||
| @@ -620,8 +689,9 @@ impl TryFrom<Value> for ExternalDocumentId { | |||||||
|     } |     } | ||||||
| } | } | ||||||
|  |  | ||||||
| #[derive(Debug, Copy, Clone, PartialEq, Eq, Deserr, ToSchema)] | #[derive(Debug, Copy, Clone, PartialEq, Eq, Deserr, ToSchema, Serialize)] | ||||||
| #[deserr(rename_all = camelCase)] | #[deserr(rename_all = camelCase)] | ||||||
|  | #[serde(rename_all = "camelCase")] | ||||||
| pub enum MatchingStrategy { | pub enum MatchingStrategy { | ||||||
|     /// Remove query words from last to first |     /// Remove query words from last to first | ||||||
|     Last, |     Last, | ||||||
| @@ -667,19 +737,19 @@ impl From<FacetValuesSort> for OrderBy { | |||||||
|     } |     } | ||||||
| } | } | ||||||
|  |  | ||||||
| #[derive(Debug, Clone, Serialize, PartialEq, ToSchema)] | #[derive(Debug, Clone, Serialize, Deserialize, PartialEq, ToSchema)] | ||||||
| pub struct SearchHit { | pub struct SearchHit { | ||||||
|     #[serde(flatten)] |     #[serde(flatten)] | ||||||
|     #[schema(additional_properties, inline, value_type = HashMap<String, Value>)] |     #[schema(additional_properties, inline, value_type = HashMap<String, Value>)] | ||||||
|     pub document: Document, |     pub document: Document, | ||||||
|     #[serde(rename = "_formatted", skip_serializing_if = "Document::is_empty")] |     #[serde(default, rename = "_formatted", skip_serializing_if = "Document::is_empty")] | ||||||
|     #[schema(additional_properties, value_type = HashMap<String, Value>)] |     #[schema(additional_properties, value_type = HashMap<String, Value>)] | ||||||
|     pub formatted: Document, |     pub formatted: Document, | ||||||
|     #[serde(rename = "_matchesPosition", skip_serializing_if = "Option::is_none")] |     #[serde(default, rename = "_matchesPosition", skip_serializing_if = "Option::is_none")] | ||||||
|     pub matches_position: Option<MatchesPosition>, |     pub matches_position: Option<MatchesPosition>, | ||||||
|     #[serde(rename = "_rankingScore", skip_serializing_if = "Option::is_none")] |     #[serde(default, rename = "_rankingScore", skip_serializing_if = "Option::is_none")] | ||||||
|     pub ranking_score: Option<f64>, |     pub ranking_score: Option<f64>, | ||||||
|     #[serde(rename = "_rankingScoreDetails", skip_serializing_if = "Option::is_none")] |     #[serde(default, rename = "_rankingScoreDetails", skip_serializing_if = "Option::is_none")] | ||||||
|     pub ranking_score_details: Option<serde_json::Map<String, serde_json::Value>>, |     pub ranking_score_details: Option<serde_json::Map<String, serde_json::Value>>, | ||||||
| } | } | ||||||
|  |  | ||||||
| @@ -767,7 +837,7 @@ pub struct SearchResultWithIndex { | |||||||
|     pub result: SearchResult, |     pub result: SearchResult, | ||||||
| } | } | ||||||
|  |  | ||||||
| #[derive(Serialize, Debug, Clone, PartialEq, Eq, ToSchema)] | #[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq, ToSchema)] | ||||||
| #[serde(untagged)] | #[serde(untagged)] | ||||||
| pub enum HitsInfo { | pub enum HitsInfo { | ||||||
|     #[serde(rename_all = "camelCase")] |     #[serde(rename_all = "camelCase")] | ||||||
| @@ -778,7 +848,7 @@ pub enum HitsInfo { | |||||||
|     OffsetLimit { limit: usize, offset: usize, estimated_total_hits: usize }, |     OffsetLimit { limit: usize, offset: usize, estimated_total_hits: usize }, | ||||||
| } | } | ||||||
|  |  | ||||||
| #[derive(Serialize, Debug, Clone, PartialEq, ToSchema)] | #[derive(Serialize, Deserialize, Debug, Clone, PartialEq, ToSchema)] | ||||||
| pub struct FacetStats { | pub struct FacetStats { | ||||||
|     pub min: f64, |     pub min: f64, | ||||||
|     pub max: f64, |     pub max: f64, | ||||||
| @@ -1061,7 +1131,7 @@ pub fn perform_search( | |||||||
|     Ok(result) |     Ok(result) | ||||||
| } | } | ||||||
|  |  | ||||||
| #[derive(Debug, Clone, Default, Serialize, ToSchema)] | #[derive(Debug, Clone, Default, Serialize, Deserialize, ToSchema)] | ||||||
| pub struct ComputedFacets { | pub struct ComputedFacets { | ||||||
|     #[schema(value_type = BTreeMap<String, BTreeMap<String, u64>>)] |     #[schema(value_type = BTreeMap<String, BTreeMap<String, u64>>)] | ||||||
|     pub distribution: BTreeMap<String, IndexMap<String, u64>>, |     pub distribution: BTreeMap<String, IndexMap<String, u64>>, | ||||||
|   | |||||||
										
											Binary file not shown.
										
									
								
							| @@ -421,7 +421,7 @@ async fn error_add_api_key_invalid_parameters_actions() { | |||||||
|     meili_snap::snapshot!(code, @"400 Bad Request"); |     meili_snap::snapshot!(code, @"400 Bad Request"); | ||||||
|     meili_snap::snapshot!(meili_snap::json_string!(response, { ".createdAt" => "[ignored]", ".updatedAt" => "[ignored]" }), @r###" |     meili_snap::snapshot!(meili_snap::json_string!(response, { ".createdAt" => "[ignored]", ".updatedAt" => "[ignored]" }), @r###" | ||||||
|     { |     { | ||||||
|       "message": "Unknown value `doc.add` at `.actions[0]`: expected one of `*`, `search`, `documents.*`, `documents.add`, `documents.get`, `documents.delete`, `indexes.*`, `indexes.create`, `indexes.get`, `indexes.update`, `indexes.delete`, `indexes.swap`, `tasks.*`, `tasks.cancel`, `tasks.delete`, `tasks.get`, `settings.*`, `settings.get`, `settings.update`, `stats.*`, `stats.get`, `metrics.*`, `metrics.get`, `dumps.*`, `dumps.create`, `snapshots.*`, `snapshots.create`, `version`, `keys.create`, `keys.get`, `keys.update`, `keys.delete`, `experimental.get`, `experimental.update`", |       "message": "Unknown value `doc.add` at `.actions[0]`: expected one of `*`, `search`, `documents.*`, `documents.add`, `documents.get`, `documents.delete`, `indexes.*`, `indexes.create`, `indexes.get`, `indexes.update`, `indexes.delete`, `indexes.swap`, `tasks.*`, `tasks.cancel`, `tasks.delete`, `tasks.get`, `settings.*`, `settings.get`, `settings.update`, `stats.*`, `stats.get`, `metrics.*`, `metrics.get`, `dumps.*`, `dumps.create`, `snapshots.*`, `snapshots.create`, `version`, `keys.create`, `keys.get`, `keys.update`, `keys.delete`, `experimental.get`, `experimental.update`, `network.get`, `network.update`", | ||||||
|       "code": "invalid_api_key_actions", |       "code": "invalid_api_key_actions", | ||||||
|       "type": "invalid_request", |       "type": "invalid_request", | ||||||
|       "link": "https://docs.meilisearch.com/errors#invalid_api_key_actions" |       "link": "https://docs.meilisearch.com/errors#invalid_api_key_actions" | ||||||
|   | |||||||
| @@ -68,6 +68,8 @@ pub static AUTHORIZATIONS: Lazy<HashMap<(&'static str, &'static str), HashSet<&' | |||||||
|             ("GET",     "/keys") =>                                            hashset!{"keys.get", "*"}, |             ("GET",     "/keys") =>                                            hashset!{"keys.get", "*"}, | ||||||
|             ("GET",     "/experimental-features") =>                           hashset!{"experimental.get", "*"}, |             ("GET",     "/experimental-features") =>                           hashset!{"experimental.get", "*"}, | ||||||
|             ("PATCH",   "/experimental-features") =>                           hashset!{"experimental.update", "*"}, |             ("PATCH",   "/experimental-features") =>                           hashset!{"experimental.update", "*"}, | ||||||
|  |             ("GET",   "/network") =>                                           hashset!{"network.get", "*"}, | ||||||
|  |             ("PATCH",   "/network") =>                                         hashset!{"network.update", "*"}, | ||||||
|         }; |         }; | ||||||
|  |  | ||||||
|         authorizations |         authorizations | ||||||
|   | |||||||
| @@ -93,7 +93,7 @@ async fn create_api_key_bad_actions() { | |||||||
|     snapshot!(code, @"400 Bad Request"); |     snapshot!(code, @"400 Bad Request"); | ||||||
|     snapshot!(json_string!(response), @r###" |     snapshot!(json_string!(response), @r###" | ||||||
|     { |     { | ||||||
|       "message": "Unknown value `doggo` at `.actions[0]`: expected one of `*`, `search`, `documents.*`, `documents.add`, `documents.get`, `documents.delete`, `indexes.*`, `indexes.create`, `indexes.get`, `indexes.update`, `indexes.delete`, `indexes.swap`, `tasks.*`, `tasks.cancel`, `tasks.delete`, `tasks.get`, `settings.*`, `settings.get`, `settings.update`, `stats.*`, `stats.get`, `metrics.*`, `metrics.get`, `dumps.*`, `dumps.create`, `snapshots.*`, `snapshots.create`, `version`, `keys.create`, `keys.get`, `keys.update`, `keys.delete`, `experimental.get`, `experimental.update`", |       "message": "Unknown value `doggo` at `.actions[0]`: expected one of `*`, `search`, `documents.*`, `documents.add`, `documents.get`, `documents.delete`, `indexes.*`, `indexes.create`, `indexes.get`, `indexes.update`, `indexes.delete`, `indexes.swap`, `tasks.*`, `tasks.cancel`, `tasks.delete`, `tasks.get`, `settings.*`, `settings.get`, `settings.update`, `stats.*`, `stats.get`, `metrics.*`, `metrics.get`, `dumps.*`, `dumps.create`, `snapshots.*`, `snapshots.create`, `version`, `keys.create`, `keys.get`, `keys.update`, `keys.delete`, `experimental.get`, `experimental.update`, `network.get`, `network.update`", | ||||||
|       "code": "invalid_api_key_actions", |       "code": "invalid_api_key_actions", | ||||||
|       "type": "invalid_request", |       "type": "invalid_request", | ||||||
|       "link": "https://docs.meilisearch.com/errors#invalid_api_key_actions" |       "link": "https://docs.meilisearch.com/errors#invalid_api_key_actions" | ||||||
|   | |||||||
| @@ -41,9 +41,8 @@ async fn list_batches() { | |||||||
|     let index = server.index("test"); |     let index = server.index("test"); | ||||||
|     let (task, _status_code) = index.create(None).await; |     let (task, _status_code) = index.create(None).await; | ||||||
|     index.wait_task(task.uid()).await.succeeded(); |     index.wait_task(task.uid()).await.succeeded(); | ||||||
|     index |     let (task, _status_code) = index.create(None).await; | ||||||
|         .add_documents(serde_json::from_str(include_str!("../assets/test_set.json")).unwrap(), None) |     index.wait_task(task.uid()).await.failed(); | ||||||
|         .await; |  | ||||||
|     let (response, code) = index.list_batches().await; |     let (response, code) = index.list_batches().await; | ||||||
|     assert_eq!(code, 200); |     assert_eq!(code, 200); | ||||||
|     assert_eq!( |     assert_eq!( | ||||||
| @@ -96,11 +95,12 @@ async fn list_batches_pagination_and_reverse() { | |||||||
| async fn list_batches_with_star_filters() { | async fn list_batches_with_star_filters() { | ||||||
|     let server = Server::new().await; |     let server = Server::new().await; | ||||||
|     let index = server.index("test"); |     let index = server.index("test"); | ||||||
|     let (batch, _code) = index.create(None).await; |     let (task, _code) = index.create(None).await; | ||||||
|     index.wait_task(batch.uid()).await.succeeded(); |     index.wait_task(task.uid()).await.succeeded(); | ||||||
|     index |     let index = server.index("test"); | ||||||
|         .add_documents(serde_json::from_str(include_str!("../assets/test_set.json")).unwrap(), None) |     let (task, _code) = index.create(None).await; | ||||||
|         .await; |     index.wait_task(task.uid()).await.failed(); | ||||||
|  |  | ||||||
|     let (response, code) = index.service.get("/batches?indexUids=test").await; |     let (response, code) = index.service.get("/batches?indexUids=test").await; | ||||||
|     assert_eq!(code, 200); |     assert_eq!(code, 200); | ||||||
|     assert_eq!(response["results"].as_array().unwrap().len(), 2); |     assert_eq!(response["results"].as_array().unwrap().len(), 2); | ||||||
| @@ -187,9 +187,6 @@ async fn list_batches_invalid_canceled_by_filter() { | |||||||
|     let index = server.index("test"); |     let index = server.index("test"); | ||||||
|     let (task, _status_code) = index.create(None).await; |     let (task, _status_code) = index.create(None).await; | ||||||
|     index.wait_task(task.uid()).await.succeeded(); |     index.wait_task(task.uid()).await.succeeded(); | ||||||
|     index |  | ||||||
|         .add_documents(serde_json::from_str(include_str!("../assets/test_set.json")).unwrap(), None) |  | ||||||
|         .await; |  | ||||||
|  |  | ||||||
|     let (response, code) = index.filtered_batches(&[], &[], &["0"]).await; |     let (response, code) = index.filtered_batches(&[], &[], &["0"]).await; | ||||||
|     assert_eq!(code, 200, "{}", response); |     assert_eq!(code, 200, "{}", response); | ||||||
| @@ -202,9 +199,8 @@ async fn list_batches_status_and_type_filtered() { | |||||||
|     let index = server.index("test"); |     let index = server.index("test"); | ||||||
|     let (task, _status_code) = index.create(None).await; |     let (task, _status_code) = index.create(None).await; | ||||||
|     index.wait_task(task.uid()).await.succeeded(); |     index.wait_task(task.uid()).await.succeeded(); | ||||||
|     index |     let (task, _status_code) = index.update(Some("id")).await; | ||||||
|         .add_documents(serde_json::from_str(include_str!("../assets/test_set.json")).unwrap(), None) |     index.wait_task(task.uid()).await.succeeded(); | ||||||
|         .await; |  | ||||||
|  |  | ||||||
|     let (response, code) = index.filtered_batches(&["indexCreation"], &["failed"], &[]).await; |     let (response, code) = index.filtered_batches(&["indexCreation"], &["failed"], &[]).await; | ||||||
|     assert_eq!(code, 200, "{}", response); |     assert_eq!(code, 200, "{}", response); | ||||||
| @@ -212,7 +208,7 @@ async fn list_batches_status_and_type_filtered() { | |||||||
|  |  | ||||||
|     let (response, code) = index |     let (response, code) = index | ||||||
|         .filtered_batches( |         .filtered_batches( | ||||||
|             &["indexCreation", "documentAdditionOrUpdate"], |             &["indexCreation", "IndexUpdate"], | ||||||
|             &["succeeded", "processing", "enqueued"], |             &["succeeded", "processing", "enqueued"], | ||||||
|             &[], |             &[], | ||||||
|         ) |         ) | ||||||
|   | |||||||
| @@ -88,6 +88,10 @@ impl Server<Owned> { | |||||||
|         self.service.api_key = Some(api_key.as_ref().to_string()); |         self.service.api_key = Some(api_key.as_ref().to_string()); | ||||||
|     } |     } | ||||||
|  |  | ||||||
|  |     pub fn clear_api_key(&mut self) { | ||||||
|  |         self.service.api_key = None; | ||||||
|  |     } | ||||||
|  |  | ||||||
|     /// Fetch and use the default admin key for nexts http requests. |     /// Fetch and use the default admin key for nexts http requests. | ||||||
|     pub async fn use_admin_key(&mut self, master_key: impl AsRef<str>) { |     pub async fn use_admin_key(&mut self, master_key: impl AsRef<str>) { | ||||||
|         self.use_api_key(master_key); |         self.use_api_key(master_key); | ||||||
| @@ -159,10 +163,18 @@ impl Server<Owned> { | |||||||
|         self.service.get("/tasks").await |         self.service.get("/tasks").await | ||||||
|     } |     } | ||||||
|  |  | ||||||
|  |     pub async fn batches(&self) -> (Value, StatusCode) { | ||||||
|  |         self.service.get("/batches").await | ||||||
|  |     } | ||||||
|  |  | ||||||
|     pub async fn set_features(&self, value: Value) -> (Value, StatusCode) { |     pub async fn set_features(&self, value: Value) -> (Value, StatusCode) { | ||||||
|         self.service.patch("/experimental-features", value).await |         self.service.patch("/experimental-features", value).await | ||||||
|     } |     } | ||||||
|  |  | ||||||
|  |     pub async fn set_network(&self, value: Value) -> (Value, StatusCode) { | ||||||
|  |         self.service.patch("/network", value).await | ||||||
|  |     } | ||||||
|  |  | ||||||
|     pub async fn get_metrics(&self) -> (Value, StatusCode) { |     pub async fn get_metrics(&self) -> (Value, StatusCode) { | ||||||
|         self.service.get("/metrics").await |         self.service.get("/metrics").await | ||||||
|     } |     } | ||||||
| @@ -408,6 +420,10 @@ impl<State> Server<State> { | |||||||
|     pub async fn get_features(&self) -> (Value, StatusCode) { |     pub async fn get_features(&self) -> (Value, StatusCode) { | ||||||
|         self.service.get("/experimental-features").await |         self.service.get("/experimental-features").await | ||||||
|     } |     } | ||||||
|  |  | ||||||
|  |     pub async fn get_network(&self) -> (Value, StatusCode) { | ||||||
|  |         self.service.get("/network").await | ||||||
|  |     } | ||||||
| } | } | ||||||
|  |  | ||||||
| pub fn default_settings(dir: impl AsRef<Path>) -> Opt { | pub fn default_settings(dir: impl AsRef<Path>) -> Opt { | ||||||
|   | |||||||
| @@ -1803,6 +1803,275 @@ async fn add_documents_with_geo_field() { | |||||||
|       "finishedAt": "[date]" |       "finishedAt": "[date]" | ||||||
|     } |     } | ||||||
|     "###); |     "###); | ||||||
|  |  | ||||||
|  |     let (response, code) = index.get_all_documents(GetAllDocumentsOptions::default()).await; | ||||||
|  |  | ||||||
|  |     snapshot!(code, @"200 OK"); | ||||||
|  |     snapshot!(json_string!(response, { ".duration" => "[duration]", ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]" }), | ||||||
|  |     @r###" | ||||||
|  |     { | ||||||
|  |       "results": [ | ||||||
|  |         { | ||||||
|  |           "id": "1" | ||||||
|  |         }, | ||||||
|  |         { | ||||||
|  |           "id": "2", | ||||||
|  |           "_geo": null | ||||||
|  |         }, | ||||||
|  |         { | ||||||
|  |           "id": "3", | ||||||
|  |           "_geo": { | ||||||
|  |             "lat": 1, | ||||||
|  |             "lng": 1 | ||||||
|  |           } | ||||||
|  |         }, | ||||||
|  |         { | ||||||
|  |           "id": "4", | ||||||
|  |           "_geo": { | ||||||
|  |             "lat": "1", | ||||||
|  |             "lng": "1" | ||||||
|  |           } | ||||||
|  |         } | ||||||
|  |       ], | ||||||
|  |       "offset": 0, | ||||||
|  |       "limit": 20, | ||||||
|  |       "total": 4 | ||||||
|  |     } | ||||||
|  |     "###); | ||||||
|  |  | ||||||
|  |     let (response, code) = index | ||||||
|  |         .search_post(json!({"sort": ["_geoPoint(50.629973371633746,3.0569447399419567):desc"]})) | ||||||
|  |         .await; | ||||||
|  |     snapshot!(code, @"200 OK"); | ||||||
|  |     // we are expecting docs 4 and 3 first as they have geo | ||||||
|  |     snapshot!(json_string!(response, { ".processingTimeMs" => "[time]" }), | ||||||
|  |     @r###" | ||||||
|  |     { | ||||||
|  |       "hits": [ | ||||||
|  |         { | ||||||
|  |           "id": "4", | ||||||
|  |           "_geo": { | ||||||
|  |             "lat": "1", | ||||||
|  |             "lng": "1" | ||||||
|  |           }, | ||||||
|  |           "_geoDistance": 5522018 | ||||||
|  |         }, | ||||||
|  |         { | ||||||
|  |           "id": "3", | ||||||
|  |           "_geo": { | ||||||
|  |             "lat": 1, | ||||||
|  |             "lng": 1 | ||||||
|  |           }, | ||||||
|  |           "_geoDistance": 5522018 | ||||||
|  |         }, | ||||||
|  |         { | ||||||
|  |           "id": "1" | ||||||
|  |         }, | ||||||
|  |         { | ||||||
|  |           "id": "2", | ||||||
|  |           "_geo": null | ||||||
|  |         } | ||||||
|  |       ], | ||||||
|  |       "query": "", | ||||||
|  |       "processingTimeMs": "[time]", | ||||||
|  |       "limit": 20, | ||||||
|  |       "offset": 0, | ||||||
|  |       "estimatedTotalHits": 4 | ||||||
|  |     } | ||||||
|  |     "###); | ||||||
|  | } | ||||||
|  |  | ||||||
|  | #[actix_rt::test] | ||||||
|  | async fn update_documents_with_geo_field() { | ||||||
|  |     let server = Server::new().await; | ||||||
|  |     let index = server.index("doggo"); | ||||||
|  |     index.update_settings(json!({"sortableAttributes": ["_geo"]})).await; | ||||||
|  |  | ||||||
|  |     let documents = json!([ | ||||||
|  |         { | ||||||
|  |             "id": "1", | ||||||
|  |         }, | ||||||
|  |         { | ||||||
|  |             "id": "2", | ||||||
|  |             "_geo": null, | ||||||
|  |         }, | ||||||
|  |         { | ||||||
|  |             "id": "3", | ||||||
|  |             "_geo": { "lat": 1, "lng": 1 }, | ||||||
|  |         }, | ||||||
|  |         { | ||||||
|  |             "id": "4", | ||||||
|  |             "_geo": { "lat": "1", "lng": "1" }, | ||||||
|  |         }, | ||||||
|  |     ]); | ||||||
|  |  | ||||||
|  |     let (task, _status_code) = index.add_documents(documents, None).await; | ||||||
|  |     let response = index.wait_task(task.uid()).await; | ||||||
|  |     snapshot!(json_string!(response, { ".duration" => "[duration]", ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]" }), | ||||||
|  |         @r###" | ||||||
|  |     { | ||||||
|  |       "uid": 1, | ||||||
|  |       "batchUid": 1, | ||||||
|  |       "indexUid": "doggo", | ||||||
|  |       "status": "succeeded", | ||||||
|  |       "type": "documentAdditionOrUpdate", | ||||||
|  |       "canceledBy": null, | ||||||
|  |       "details": { | ||||||
|  |         "receivedDocuments": 4, | ||||||
|  |         "indexedDocuments": 4 | ||||||
|  |       }, | ||||||
|  |       "error": null, | ||||||
|  |       "duration": "[duration]", | ||||||
|  |       "enqueuedAt": "[date]", | ||||||
|  |       "startedAt": "[date]", | ||||||
|  |       "finishedAt": "[date]" | ||||||
|  |     } | ||||||
|  |     "###); | ||||||
|  |  | ||||||
|  |     let (response, code) = index | ||||||
|  |         .search_post(json!({"sort": ["_geoPoint(50.629973371633746,3.0569447399419567):desc"]})) | ||||||
|  |         .await; | ||||||
|  |     snapshot!(code, @"200 OK"); | ||||||
|  |     // we are expecting docs 4 and 3 first as they have geo | ||||||
|  |     snapshot!(json_string!(response, { ".processingTimeMs" => "[time]" }), | ||||||
|  |     @r###" | ||||||
|  |     { | ||||||
|  |       "hits": [ | ||||||
|  |         { | ||||||
|  |           "id": "4", | ||||||
|  |           "_geo": { | ||||||
|  |             "lat": "1", | ||||||
|  |             "lng": "1" | ||||||
|  |           }, | ||||||
|  |           "_geoDistance": 5522018 | ||||||
|  |         }, | ||||||
|  |         { | ||||||
|  |           "id": "3", | ||||||
|  |           "_geo": { | ||||||
|  |             "lat": 1, | ||||||
|  |             "lng": 1 | ||||||
|  |           }, | ||||||
|  |           "_geoDistance": 5522018 | ||||||
|  |         }, | ||||||
|  |         { | ||||||
|  |           "id": "1" | ||||||
|  |         }, | ||||||
|  |         { | ||||||
|  |           "id": "2", | ||||||
|  |           "_geo": null | ||||||
|  |         } | ||||||
|  |       ], | ||||||
|  |       "query": "", | ||||||
|  |       "processingTimeMs": "[time]", | ||||||
|  |       "limit": 20, | ||||||
|  |       "offset": 0, | ||||||
|  |       "estimatedTotalHits": 4 | ||||||
|  |     } | ||||||
|  |     "###); | ||||||
|  |  | ||||||
|  |     let updated_documents = json!([{ | ||||||
|  |       "id": "3", | ||||||
|  |       "doggo": "kefir", | ||||||
|  |     }]); | ||||||
|  |     let (task, _status_code) = index.update_documents(updated_documents, None).await; | ||||||
|  |     let response = index.wait_task(task.uid()).await; | ||||||
|  |     snapshot!(json_string!(response, { ".duration" => "[duration]", ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]" }), | ||||||
|  |         @r###" | ||||||
|  |     { | ||||||
|  |       "uid": 2, | ||||||
|  |       "batchUid": 2, | ||||||
|  |       "indexUid": "doggo", | ||||||
|  |       "status": "succeeded", | ||||||
|  |       "type": "documentAdditionOrUpdate", | ||||||
|  |       "canceledBy": null, | ||||||
|  |       "details": { | ||||||
|  |         "receivedDocuments": 1, | ||||||
|  |         "indexedDocuments": 1 | ||||||
|  |       }, | ||||||
|  |       "error": null, | ||||||
|  |       "duration": "[duration]", | ||||||
|  |       "enqueuedAt": "[date]", | ||||||
|  |       "startedAt": "[date]", | ||||||
|  |       "finishedAt": "[date]" | ||||||
|  |     } | ||||||
|  |     "###); | ||||||
|  |     let (response, code) = index.get_all_documents(GetAllDocumentsOptions::default()).await; | ||||||
|  |  | ||||||
|  |     snapshot!(code, @"200 OK"); | ||||||
|  |     snapshot!(json_string!(response, { ".duration" => "[duration]", ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]" }), | ||||||
|  |     @r###" | ||||||
|  |     { | ||||||
|  |       "results": [ | ||||||
|  |         { | ||||||
|  |           "id": "1" | ||||||
|  |         }, | ||||||
|  |         { | ||||||
|  |           "id": "2", | ||||||
|  |           "_geo": null | ||||||
|  |         }, | ||||||
|  |         { | ||||||
|  |           "id": "3", | ||||||
|  |           "_geo": { | ||||||
|  |             "lat": 1, | ||||||
|  |             "lng": 1 | ||||||
|  |           }, | ||||||
|  |           "doggo": "kefir" | ||||||
|  |         }, | ||||||
|  |         { | ||||||
|  |           "id": "4", | ||||||
|  |           "_geo": { | ||||||
|  |             "lat": "1", | ||||||
|  |             "lng": "1" | ||||||
|  |           } | ||||||
|  |         } | ||||||
|  |       ], | ||||||
|  |       "offset": 0, | ||||||
|  |       "limit": 20, | ||||||
|  |       "total": 4 | ||||||
|  |     } | ||||||
|  |     "###); | ||||||
|  |  | ||||||
|  |     let (response, code) = index | ||||||
|  |         .search_post(json!({"sort": ["_geoPoint(50.629973371633746,3.0569447399419567):desc"]})) | ||||||
|  |         .await; | ||||||
|  |     snapshot!(code, @"200 OK"); | ||||||
|  |     // the search response should not have changed: we are expecting docs 4 and 3 first as they have geo | ||||||
|  |     snapshot!(json_string!(response, { ".processingTimeMs" => "[time]" }), | ||||||
|  |     @r###" | ||||||
|  |     { | ||||||
|  |       "hits": [ | ||||||
|  |         { | ||||||
|  |           "id": "4", | ||||||
|  |           "_geo": { | ||||||
|  |             "lat": "1", | ||||||
|  |             "lng": "1" | ||||||
|  |           }, | ||||||
|  |           "_geoDistance": 5522018 | ||||||
|  |         }, | ||||||
|  |         { | ||||||
|  |           "id": "3", | ||||||
|  |           "_geo": { | ||||||
|  |             "lat": 1, | ||||||
|  |             "lng": 1 | ||||||
|  |           }, | ||||||
|  |           "doggo": "kefir", | ||||||
|  |           "_geoDistance": 5522018 | ||||||
|  |         }, | ||||||
|  |         { | ||||||
|  |           "id": "1" | ||||||
|  |         }, | ||||||
|  |         { | ||||||
|  |           "id": "2", | ||||||
|  |           "_geo": null | ||||||
|  |         } | ||||||
|  |       ], | ||||||
|  |       "query": "", | ||||||
|  |       "processingTimeMs": "[time]", | ||||||
|  |       "limit": 20, | ||||||
|  |       "offset": 0, | ||||||
|  |       "estimatedTotalHits": 4 | ||||||
|  |     } | ||||||
|  |     "###); | ||||||
| } | } | ||||||
|  |  | ||||||
| #[actix_rt::test] | #[actix_rt::test] | ||||||
|   | |||||||
| @@ -161,6 +161,8 @@ async fn delete_document_by_filter() { | |||||||
|     { |     { | ||||||
|       "numberOfDocuments": 4, |       "numberOfDocuments": 4, | ||||||
|       "isIndexing": false, |       "isIndexing": false, | ||||||
|  |       "numberOfEmbeddings": 0, | ||||||
|  |       "numberOfEmbeddedDocuments": 0, | ||||||
|       "fieldDistribution": { |       "fieldDistribution": { | ||||||
|         "color": 3, |         "color": 3, | ||||||
|         "id": 4 |         "id": 4 | ||||||
| @@ -208,6 +210,8 @@ async fn delete_document_by_filter() { | |||||||
|     { |     { | ||||||
|       "numberOfDocuments": 2, |       "numberOfDocuments": 2, | ||||||
|       "isIndexing": false, |       "isIndexing": false, | ||||||
|  |       "numberOfEmbeddings": 0, | ||||||
|  |       "numberOfEmbeddedDocuments": 0, | ||||||
|       "fieldDistribution": { |       "fieldDistribution": { | ||||||
|         "color": 1, |         "color": 1, | ||||||
|         "id": 2 |         "id": 2 | ||||||
| @@ -274,6 +278,8 @@ async fn delete_document_by_filter() { | |||||||
|     { |     { | ||||||
|       "numberOfDocuments": 1, |       "numberOfDocuments": 1, | ||||||
|       "isIndexing": false, |       "isIndexing": false, | ||||||
|  |       "numberOfEmbeddings": 0, | ||||||
|  |       "numberOfEmbeddedDocuments": 0, | ||||||
|       "fieldDistribution": { |       "fieldDistribution": { | ||||||
|         "color": 1, |         "color": 1, | ||||||
|         "id": 1 |         "id": 1 | ||||||
|   | |||||||
| @@ -22,6 +22,7 @@ pub enum GetDump { | |||||||
|     TestV5, |     TestV5, | ||||||
|  |  | ||||||
|     TestV6WithExperimental, |     TestV6WithExperimental, | ||||||
|  |     TestV6WithBatchesAndEnqueuedTasks, | ||||||
| } | } | ||||||
|  |  | ||||||
| impl GetDump { | impl GetDump { | ||||||
| @@ -74,6 +75,10 @@ impl GetDump { | |||||||
|                 "tests/assets/v6_v1.6.0_use_deactivated_experimental_setting.dump" |                 "tests/assets/v6_v1.6.0_use_deactivated_experimental_setting.dump" | ||||||
|             ) |             ) | ||||||
|             .into(), |             .into(), | ||||||
|  |             GetDump::TestV6WithBatchesAndEnqueuedTasks => { | ||||||
|  |                 exist_relative_path!("tests/assets/v6_v1.13.0_batches_and_enqueued_tasks.dump") | ||||||
|  |                     .into() | ||||||
|  |             } | ||||||
|         } |         } | ||||||
|     } |     } | ||||||
| } | } | ||||||
|   | |||||||
| @@ -27,9 +27,24 @@ async fn import_dump_v1_movie_raw() { | |||||||
|  |  | ||||||
|     let (stats, code) = index.stats().await; |     let (stats, code) = index.stats().await; | ||||||
|     snapshot!(code, @"200 OK"); |     snapshot!(code, @"200 OK"); | ||||||
|     assert_eq!( |     snapshot!( | ||||||
|         stats, |       json_string!(stats), | ||||||
|         json!({ "numberOfDocuments": 53, "isIndexing": false, "fieldDistribution": {"genres": 53, "id": 53, "overview": 53, "poster": 53, "release_date": 53, "title": 53 }}) |       @r###" | ||||||
|  |     { | ||||||
|  |       "numberOfDocuments": 53, | ||||||
|  |       "isIndexing": false, | ||||||
|  |       "numberOfEmbeddings": 0, | ||||||
|  |       "numberOfEmbeddedDocuments": 0, | ||||||
|  |       "fieldDistribution": { | ||||||
|  |         "genres": 53, | ||||||
|  |         "id": 53, | ||||||
|  |         "overview": 53, | ||||||
|  |         "poster": 53, | ||||||
|  |         "release_date": 53, | ||||||
|  |         "title": 53 | ||||||
|  |       } | ||||||
|  |     } | ||||||
|  |     "### | ||||||
|     ); |     ); | ||||||
|  |  | ||||||
|     let (settings, code) = index.settings().await; |     let (settings, code) = index.settings().await; | ||||||
| @@ -173,6 +188,8 @@ async fn import_dump_v1_movie_with_settings() { | |||||||
|     { |     { | ||||||
|       "numberOfDocuments": 53, |       "numberOfDocuments": 53, | ||||||
|       "isIndexing": false, |       "isIndexing": false, | ||||||
|  |       "numberOfEmbeddings": 0, | ||||||
|  |       "numberOfEmbeddedDocuments": 0, | ||||||
|       "fieldDistribution": { |       "fieldDistribution": { | ||||||
|         "genres": 53, |         "genres": 53, | ||||||
|         "id": 53, |         "id": 53, | ||||||
| @@ -333,9 +350,24 @@ async fn import_dump_v1_rubygems_with_settings() { | |||||||
|  |  | ||||||
|     let (stats, code) = index.stats().await; |     let (stats, code) = index.stats().await; | ||||||
|     snapshot!(code, @"200 OK"); |     snapshot!(code, @"200 OK"); | ||||||
|     assert_eq!( |     snapshot!( | ||||||
|         stats, |       json_string!(stats), | ||||||
|         json!({ "numberOfDocuments": 53, "isIndexing": false, "fieldDistribution": {"description": 53, "id": 53, "name": 53, "summary": 53, "total_downloads": 53, "version": 53 }}) |       @r###" | ||||||
|  |     { | ||||||
|  |       "numberOfDocuments": 53, | ||||||
|  |       "isIndexing": false, | ||||||
|  |       "numberOfEmbeddings": 0, | ||||||
|  |       "numberOfEmbeddedDocuments": 0, | ||||||
|  |       "fieldDistribution": { | ||||||
|  |         "description": 53, | ||||||
|  |         "id": 53, | ||||||
|  |         "name": 53, | ||||||
|  |         "summary": 53, | ||||||
|  |         "total_downloads": 53, | ||||||
|  |         "version": 53 | ||||||
|  |       } | ||||||
|  |     } | ||||||
|  |     "### | ||||||
|     ); |     ); | ||||||
|  |  | ||||||
|     let (settings, code) = index.settings().await; |     let (settings, code) = index.settings().await; | ||||||
| @@ -483,9 +515,24 @@ async fn import_dump_v2_movie_raw() { | |||||||
|  |  | ||||||
|     let (stats, code) = index.stats().await; |     let (stats, code) = index.stats().await; | ||||||
|     snapshot!(code, @"200 OK"); |     snapshot!(code, @"200 OK"); | ||||||
|     assert_eq!( |     snapshot!( | ||||||
|         stats, |       json_string!(stats), | ||||||
|         json!({ "numberOfDocuments": 53, "isIndexing": false, "fieldDistribution": {"genres": 53, "id": 53, "overview": 53, "poster": 53, "release_date": 53, "title": 53 }}) |       @r###" | ||||||
|  |     { | ||||||
|  |       "numberOfDocuments": 53, | ||||||
|  |       "isIndexing": false, | ||||||
|  |       "numberOfEmbeddings": 0, | ||||||
|  |       "numberOfEmbeddedDocuments": 0, | ||||||
|  |       "fieldDistribution": { | ||||||
|  |         "genres": 53, | ||||||
|  |         "id": 53, | ||||||
|  |         "overview": 53, | ||||||
|  |         "poster": 53, | ||||||
|  |         "release_date": 53, | ||||||
|  |         "title": 53 | ||||||
|  |       } | ||||||
|  |     } | ||||||
|  |     "### | ||||||
|     ); |     ); | ||||||
|  |  | ||||||
|     let (settings, code) = index.settings().await; |     let (settings, code) = index.settings().await; | ||||||
| @@ -623,9 +670,24 @@ async fn import_dump_v2_movie_with_settings() { | |||||||
|  |  | ||||||
|     let (stats, code) = index.stats().await; |     let (stats, code) = index.stats().await; | ||||||
|     snapshot!(code, @"200 OK"); |     snapshot!(code, @"200 OK"); | ||||||
|     assert_eq!( |     snapshot!( | ||||||
|         stats, |       json_string!(stats), | ||||||
|         json!({ "numberOfDocuments": 53, "isIndexing": false, "fieldDistribution": {"genres": 53, "id": 53, "overview": 53, "poster": 53, "release_date": 53, "title": 53 }}) |       @r###" | ||||||
|  |     { | ||||||
|  |       "numberOfDocuments": 53, | ||||||
|  |       "isIndexing": false, | ||||||
|  |       "numberOfEmbeddings": 0, | ||||||
|  |       "numberOfEmbeddedDocuments": 0, | ||||||
|  |       "fieldDistribution": { | ||||||
|  |         "genres": 53, | ||||||
|  |         "id": 53, | ||||||
|  |         "overview": 53, | ||||||
|  |         "poster": 53, | ||||||
|  |         "release_date": 53, | ||||||
|  |         "title": 53 | ||||||
|  |       } | ||||||
|  |     } | ||||||
|  |     "### | ||||||
|     ); |     ); | ||||||
|  |  | ||||||
|     let (settings, code) = index.settings().await; |     let (settings, code) = index.settings().await; | ||||||
| @@ -773,9 +835,24 @@ async fn import_dump_v2_rubygems_with_settings() { | |||||||
|  |  | ||||||
|     let (stats, code) = index.stats().await; |     let (stats, code) = index.stats().await; | ||||||
|     snapshot!(code, @"200 OK"); |     snapshot!(code, @"200 OK"); | ||||||
|     assert_eq!( |     snapshot!( | ||||||
|         stats, |       json_string!(stats), | ||||||
|         json!({ "numberOfDocuments": 53, "isIndexing": false, "fieldDistribution": {"description": 53, "id": 53, "name": 53, "summary": 53, "total_downloads": 53, "version": 53 }}) |       @r###" | ||||||
|  |     { | ||||||
|  |       "numberOfDocuments": 53, | ||||||
|  |       "isIndexing": false, | ||||||
|  |       "numberOfEmbeddings": 0, | ||||||
|  |       "numberOfEmbeddedDocuments": 0, | ||||||
|  |       "fieldDistribution": { | ||||||
|  |         "description": 53, | ||||||
|  |         "id": 53, | ||||||
|  |         "name": 53, | ||||||
|  |         "summary": 53, | ||||||
|  |         "total_downloads": 53, | ||||||
|  |         "version": 53 | ||||||
|  |       } | ||||||
|  |     } | ||||||
|  |     "### | ||||||
|     ); |     ); | ||||||
|  |  | ||||||
|     let (settings, code) = index.settings().await; |     let (settings, code) = index.settings().await; | ||||||
| @@ -920,9 +997,24 @@ async fn import_dump_v3_movie_raw() { | |||||||
|  |  | ||||||
|     let (stats, code) = index.stats().await; |     let (stats, code) = index.stats().await; | ||||||
|     snapshot!(code, @"200 OK"); |     snapshot!(code, @"200 OK"); | ||||||
|     assert_eq!( |     snapshot!( | ||||||
|         stats, |       json_string!(stats), | ||||||
|         json!({ "numberOfDocuments": 53, "isIndexing": false, "fieldDistribution": {"genres": 53, "id": 53, "overview": 53, "poster": 53, "release_date": 53, "title": 53 }}) |       @r###" | ||||||
|  |     { | ||||||
|  |       "numberOfDocuments": 53, | ||||||
|  |       "isIndexing": false, | ||||||
|  |       "numberOfEmbeddings": 0, | ||||||
|  |       "numberOfEmbeddedDocuments": 0, | ||||||
|  |       "fieldDistribution": { | ||||||
|  |         "genres": 53, | ||||||
|  |         "id": 53, | ||||||
|  |         "overview": 53, | ||||||
|  |         "poster": 53, | ||||||
|  |         "release_date": 53, | ||||||
|  |         "title": 53 | ||||||
|  |       } | ||||||
|  |     } | ||||||
|  |     "### | ||||||
|     ); |     ); | ||||||
|  |  | ||||||
|     let (settings, code) = index.settings().await; |     let (settings, code) = index.settings().await; | ||||||
| @@ -1060,9 +1152,24 @@ async fn import_dump_v3_movie_with_settings() { | |||||||
|  |  | ||||||
|     let (stats, code) = index.stats().await; |     let (stats, code) = index.stats().await; | ||||||
|     snapshot!(code, @"200 OK"); |     snapshot!(code, @"200 OK"); | ||||||
|     assert_eq!( |     snapshot!( | ||||||
|         stats, |       json_string!(stats), | ||||||
|         json!({ "numberOfDocuments": 53, "isIndexing": false, "fieldDistribution": {"genres": 53, "id": 53, "overview": 53, "poster": 53, "release_date": 53, "title": 53 }}) |       @r###" | ||||||
|  |     { | ||||||
|  |       "numberOfDocuments": 53, | ||||||
|  |       "isIndexing": false, | ||||||
|  |       "numberOfEmbeddings": 0, | ||||||
|  |       "numberOfEmbeddedDocuments": 0, | ||||||
|  |       "fieldDistribution": { | ||||||
|  |         "genres": 53, | ||||||
|  |         "id": 53, | ||||||
|  |         "overview": 53, | ||||||
|  |         "poster": 53, | ||||||
|  |         "release_date": 53, | ||||||
|  |         "title": 53 | ||||||
|  |       } | ||||||
|  |     } | ||||||
|  |     "### | ||||||
|     ); |     ); | ||||||
|  |  | ||||||
|     let (settings, code) = index.settings().await; |     let (settings, code) = index.settings().await; | ||||||
| @@ -1210,9 +1317,24 @@ async fn import_dump_v3_rubygems_with_settings() { | |||||||
|  |  | ||||||
|     let (stats, code) = index.stats().await; |     let (stats, code) = index.stats().await; | ||||||
|     snapshot!(code, @"200 OK"); |     snapshot!(code, @"200 OK"); | ||||||
|     assert_eq!( |     snapshot!( | ||||||
|         stats, |       json_string!(stats), | ||||||
|         json!({ "numberOfDocuments": 53, "isIndexing": false, "fieldDistribution": {"description": 53, "id": 53, "name": 53, "summary": 53, "total_downloads": 53, "version": 53 }}) |       @r###" | ||||||
|  |     { | ||||||
|  |       "numberOfDocuments": 53, | ||||||
|  |       "isIndexing": false, | ||||||
|  |       "numberOfEmbeddings": 0, | ||||||
|  |       "numberOfEmbeddedDocuments": 0, | ||||||
|  |       "fieldDistribution": { | ||||||
|  |         "description": 53, | ||||||
|  |         "id": 53, | ||||||
|  |         "name": 53, | ||||||
|  |         "summary": 53, | ||||||
|  |         "total_downloads": 53, | ||||||
|  |         "version": 53 | ||||||
|  |       } | ||||||
|  |     } | ||||||
|  |     "### | ||||||
|     ); |     ); | ||||||
|  |  | ||||||
|     let (settings, code) = index.settings().await; |     let (settings, code) = index.settings().await; | ||||||
| @@ -1357,9 +1479,24 @@ async fn import_dump_v4_movie_raw() { | |||||||
|  |  | ||||||
|     let (stats, code) = index.stats().await; |     let (stats, code) = index.stats().await; | ||||||
|     snapshot!(code, @"200 OK"); |     snapshot!(code, @"200 OK"); | ||||||
|     assert_eq!( |     snapshot!( | ||||||
|         stats, |       json_string!(stats), | ||||||
|         json!({ "numberOfDocuments": 53, "isIndexing": false, "fieldDistribution": {"genres": 53, "id": 53, "overview": 53, "poster": 53, "release_date": 53, "title": 53 }}) |       @r###" | ||||||
|  |     { | ||||||
|  |       "numberOfDocuments": 53, | ||||||
|  |       "isIndexing": false, | ||||||
|  |       "numberOfEmbeddings": 0, | ||||||
|  |       "numberOfEmbeddedDocuments": 0, | ||||||
|  |       "fieldDistribution": { | ||||||
|  |         "genres": 53, | ||||||
|  |         "id": 53, | ||||||
|  |         "overview": 53, | ||||||
|  |         "poster": 53, | ||||||
|  |         "release_date": 53, | ||||||
|  |         "title": 53 | ||||||
|  |       } | ||||||
|  |     } | ||||||
|  |     "### | ||||||
|     ); |     ); | ||||||
|  |  | ||||||
|     let (settings, code) = index.settings().await; |     let (settings, code) = index.settings().await; | ||||||
| @@ -1497,9 +1634,24 @@ async fn import_dump_v4_movie_with_settings() { | |||||||
|  |  | ||||||
|     let (stats, code) = index.stats().await; |     let (stats, code) = index.stats().await; | ||||||
|     snapshot!(code, @"200 OK"); |     snapshot!(code, @"200 OK"); | ||||||
|     assert_eq!( |     snapshot!( | ||||||
|         stats, |       json_string!(stats), | ||||||
|         json!({ "numberOfDocuments": 53, "isIndexing": false, "fieldDistribution": {"genres": 53, "id": 53, "overview": 53, "poster": 53, "release_date": 53, "title": 53 }}) |       @r###" | ||||||
|  |     { | ||||||
|  |       "numberOfDocuments": 53, | ||||||
|  |       "isIndexing": false, | ||||||
|  |       "numberOfEmbeddings": 0, | ||||||
|  |       "numberOfEmbeddedDocuments": 0, | ||||||
|  |       "fieldDistribution": { | ||||||
|  |         "genres": 53, | ||||||
|  |         "id": 53, | ||||||
|  |         "overview": 53, | ||||||
|  |         "poster": 53, | ||||||
|  |         "release_date": 53, | ||||||
|  |         "title": 53 | ||||||
|  |       } | ||||||
|  |     } | ||||||
|  |     "### | ||||||
|     ); |     ); | ||||||
|  |  | ||||||
|     let (settings, code) = index.settings().await; |     let (settings, code) = index.settings().await; | ||||||
| @@ -1647,9 +1799,24 @@ async fn import_dump_v4_rubygems_with_settings() { | |||||||
|  |  | ||||||
|     let (stats, code) = index.stats().await; |     let (stats, code) = index.stats().await; | ||||||
|     snapshot!(code, @"200 OK"); |     snapshot!(code, @"200 OK"); | ||||||
|     assert_eq!( |     snapshot!( | ||||||
|         stats, |       json_string!(stats), | ||||||
|         json!({ "numberOfDocuments": 53, "isIndexing": false, "fieldDistribution": {"description": 53, "id": 53, "name": 53, "summary": 53, "total_downloads": 53, "version": 53 }}) |       @r###" | ||||||
|  |     { | ||||||
|  |       "numberOfDocuments": 53, | ||||||
|  |       "isIndexing": false, | ||||||
|  |       "numberOfEmbeddings": 0, | ||||||
|  |       "numberOfEmbeddedDocuments": 0, | ||||||
|  |       "fieldDistribution": { | ||||||
|  |         "description": 53, | ||||||
|  |         "id": 53, | ||||||
|  |         "name": 53, | ||||||
|  |         "summary": 53, | ||||||
|  |         "total_downloads": 53, | ||||||
|  |         "version": 53 | ||||||
|  |       } | ||||||
|  |     } | ||||||
|  |     "### | ||||||
|     ); |     ); | ||||||
|  |  | ||||||
|     let (settings, code) = index.settings().await; |     let (settings, code) = index.settings().await; | ||||||
| @@ -1798,9 +1965,17 @@ async fn import_dump_v5() { | |||||||
|         server.wait_task(task["uid"].as_u64().unwrap()).await; |         server.wait_task(task["uid"].as_u64().unwrap()).await; | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     let expected_stats = json!({ |     let index1 = server.index("test"); | ||||||
|  |     let index2 = server.index("test2"); | ||||||
|  |  | ||||||
|  |     let (stats, code) = index1.stats().await; | ||||||
|  |     snapshot!(code, @"200 OK"); | ||||||
|  |     snapshot!(json_string!(stats), @r###" | ||||||
|  |     { | ||||||
|       "numberOfDocuments": 10, |       "numberOfDocuments": 10, | ||||||
|       "isIndexing": false, |       "isIndexing": false, | ||||||
|  |       "numberOfEmbeddings": 0, | ||||||
|  |       "numberOfEmbeddedDocuments": 0, | ||||||
|       "fieldDistribution": { |       "fieldDistribution": { | ||||||
|         "cast": 10, |         "cast": 10, | ||||||
|         "director": 10, |         "director": 10, | ||||||
| @@ -1817,14 +1992,8 @@ async fn import_dump_v5() { | |||||||
|         "vote_average": 10, |         "vote_average": 10, | ||||||
|         "vote_count": 10 |         "vote_count": 10 | ||||||
|       } |       } | ||||||
|     }); |     } | ||||||
|  |     "###); | ||||||
|     let index1 = server.index("test"); |  | ||||||
|     let index2 = server.index("test2"); |  | ||||||
|  |  | ||||||
|     let (stats, code) = index1.stats().await; |  | ||||||
|     snapshot!(code, @"200 OK"); |  | ||||||
|     assert_eq!(stats, expected_stats); |  | ||||||
|  |  | ||||||
|     let (docs, code) = index2.get_all_documents(GetAllDocumentsOptions::default()).await; |     let (docs, code) = index2.get_all_documents(GetAllDocumentsOptions::default()).await; | ||||||
|     snapshot!(code, @"200 OK"); |     snapshot!(code, @"200 OK"); | ||||||
| @@ -1835,7 +2004,32 @@ async fn import_dump_v5() { | |||||||
|  |  | ||||||
|     let (stats, code) = index2.stats().await; |     let (stats, code) = index2.stats().await; | ||||||
|     snapshot!(code, @"200 OK"); |     snapshot!(code, @"200 OK"); | ||||||
|     assert_eq!(stats, expected_stats); |     snapshot!( | ||||||
|  |       json_string!(stats), | ||||||
|  |       @r###" | ||||||
|  |     { | ||||||
|  |       "numberOfDocuments": 10, | ||||||
|  |       "isIndexing": false, | ||||||
|  |       "numberOfEmbeddings": 0, | ||||||
|  |       "numberOfEmbeddedDocuments": 0, | ||||||
|  |       "fieldDistribution": { | ||||||
|  |         "cast": 10, | ||||||
|  |         "director": 10, | ||||||
|  |         "genres": 10, | ||||||
|  |         "id": 10, | ||||||
|  |         "overview": 10, | ||||||
|  |         "popularity": 10, | ||||||
|  |         "poster_path": 10, | ||||||
|  |         "producer": 10, | ||||||
|  |         "production_companies": 10, | ||||||
|  |         "release_date": 10, | ||||||
|  |         "tagline": 10, | ||||||
|  |         "title": 10, | ||||||
|  |         "vote_average": 10, | ||||||
|  |         "vote_count": 10 | ||||||
|  |       } | ||||||
|  |     } | ||||||
|  |     "###); | ||||||
|  |  | ||||||
|     let (keys, code) = server.list_api_keys("").await; |     let (keys, code) = server.list_api_keys("").await; | ||||||
|     snapshot!(code, @"200 OK"); |     snapshot!(code, @"200 OK"); | ||||||
| @@ -1908,7 +2102,9 @@ async fn import_dump_v6_containing_experimental_features() { | |||||||
|       "metrics": false, |       "metrics": false, | ||||||
|       "logsRoute": false, |       "logsRoute": false, | ||||||
|       "editDocumentsByFunction": false, |       "editDocumentsByFunction": false, | ||||||
|       "containsFilter": false |       "containsFilter": false, | ||||||
|  |       "network": false, | ||||||
|  |       "getTaskDocumentsRoute": false | ||||||
|     } |     } | ||||||
|     "###); |     "###); | ||||||
|  |  | ||||||
| @@ -1992,6 +2188,63 @@ async fn import_dump_v6_containing_experimental_features() { | |||||||
|         .await; |         .await; | ||||||
| } | } | ||||||
|  |  | ||||||
|  | #[actix_rt::test] | ||||||
|  | async fn import_dump_v6_containing_batches_and_enqueued_tasks() { | ||||||
|  |     let temp = tempfile::tempdir().unwrap(); | ||||||
|  |  | ||||||
|  |     let options = Opt { | ||||||
|  |         import_dump: Some(GetDump::TestV6WithBatchesAndEnqueuedTasks.path()), | ||||||
|  |         ..default_settings(temp.path()) | ||||||
|  |     }; | ||||||
|  |     let mut server = Server::new_auth_with_options(options, temp).await; | ||||||
|  |     server.use_api_key("MASTER_KEY"); | ||||||
|  |     server.wait_task(2).await.succeeded(); | ||||||
|  |     let (tasks, _) = server.tasks().await; | ||||||
|  |     snapshot!(json_string!(tasks, { ".results[1].startedAt" => "[date]", ".results[1].finishedAt" => "[date]", ".results[1].duration" => "[date]" }), name: "tasks"); | ||||||
|  |     let (batches, _) = server.batches().await; | ||||||
|  |     snapshot!(json_string!(batches, { ".results[0].startedAt" => "[date]", ".results[0].finishedAt" => "[date]", ".results[0].duration" => "[date]" }), name: "batches"); | ||||||
|  |  | ||||||
|  |     let (indexes, code) = server.list_indexes(None, None).await; | ||||||
|  |     assert_eq!(code, 200, "{indexes}"); | ||||||
|  |  | ||||||
|  |     assert_eq!(indexes["results"].as_array().unwrap().len(), 1); | ||||||
|  |     assert_eq!(indexes["results"][0]["uid"], json!("kefir")); | ||||||
|  |     assert_eq!(indexes["results"][0]["primaryKey"], json!("id")); | ||||||
|  |  | ||||||
|  |     let (response, code) = server.get_features().await; | ||||||
|  |     meili_snap::snapshot!(code, @"200 OK"); | ||||||
|  |     meili_snap::snapshot!(meili_snap::json_string!(response), @r###" | ||||||
|  |     { | ||||||
|  |       "metrics": false, | ||||||
|  |       "logsRoute": false, | ||||||
|  |       "editDocumentsByFunction": false, | ||||||
|  |       "containsFilter": false, | ||||||
|  |       "network": false, | ||||||
|  |       "getTaskDocumentsRoute": false | ||||||
|  |     } | ||||||
|  |     "###); | ||||||
|  |  | ||||||
|  |     let index = server.index("kefir"); | ||||||
|  |     let (documents, _) = index.get_all_documents_raw("").await; | ||||||
|  |     snapshot!(documents, @r#" | ||||||
|  |     { | ||||||
|  |       "results": [ | ||||||
|  |         { | ||||||
|  |           "id": 1, | ||||||
|  |           "dog": "kefir" | ||||||
|  |         }, | ||||||
|  |         { | ||||||
|  |           "id": 2, | ||||||
|  |           "dog": "intel" | ||||||
|  |         } | ||||||
|  |       ], | ||||||
|  |       "offset": 0, | ||||||
|  |       "limit": 20, | ||||||
|  |       "total": 2 | ||||||
|  |     } | ||||||
|  |     "#); | ||||||
|  | } | ||||||
|  |  | ||||||
| // In this test we must generate the dump ourselves to ensure the | // In this test we must generate the dump ourselves to ensure the | ||||||
| // `user provided` vectors are well set | // `user provided` vectors are well set | ||||||
| #[actix_rt::test] | #[actix_rt::test] | ||||||
| @@ -2069,7 +2322,9 @@ async fn generate_and_import_dump_containing_vectors() { | |||||||
|       "metrics": false, |       "metrics": false, | ||||||
|       "logsRoute": false, |       "logsRoute": false, | ||||||
|       "editDocumentsByFunction": false, |       "editDocumentsByFunction": false, | ||||||
|       "containsFilter": false |       "containsFilter": false, | ||||||
|  |       "network": false, | ||||||
|  |       "getTaskDocumentsRoute": false | ||||||
|     } |     } | ||||||
|     "###); |     "###); | ||||||
|  |  | ||||||
|   | |||||||
| @@ -0,0 +1,78 @@ | |||||||
|  | --- | ||||||
|  | source: crates/meilisearch/tests/dumps/mod.rs | ||||||
|  | snapshot_kind: text | ||||||
|  | --- | ||||||
|  | { | ||||||
|  |   "results": [ | ||||||
|  |     { | ||||||
|  |       "uid": 2, | ||||||
|  |       "progress": null, | ||||||
|  |       "details": { | ||||||
|  |         "receivedDocuments": 1, | ||||||
|  |         "indexedDocuments": 1 | ||||||
|  |       }, | ||||||
|  |       "stats": { | ||||||
|  |         "totalNbTasks": 1, | ||||||
|  |         "status": { | ||||||
|  |           "succeeded": 1 | ||||||
|  |         }, | ||||||
|  |         "types": { | ||||||
|  |           "documentAdditionOrUpdate": 1 | ||||||
|  |         }, | ||||||
|  |         "indexUids": { | ||||||
|  |           "kefir": 1 | ||||||
|  |         } | ||||||
|  |       }, | ||||||
|  |       "duration": "[date]", | ||||||
|  |       "startedAt": "[date]", | ||||||
|  |       "finishedAt": "[date]" | ||||||
|  |     }, | ||||||
|  |     { | ||||||
|  |       "uid": 1, | ||||||
|  |       "progress": null, | ||||||
|  |       "details": { | ||||||
|  |         "receivedDocuments": 1, | ||||||
|  |         "indexedDocuments": 1 | ||||||
|  |       }, | ||||||
|  |       "stats": { | ||||||
|  |         "totalNbTasks": 1, | ||||||
|  |         "status": { | ||||||
|  |           "succeeded": 1 | ||||||
|  |         }, | ||||||
|  |         "types": { | ||||||
|  |           "documentAdditionOrUpdate": 1 | ||||||
|  |         }, | ||||||
|  |         "indexUids": { | ||||||
|  |           "kefir": 1 | ||||||
|  |         } | ||||||
|  |       }, | ||||||
|  |       "duration": "PT0.144827890S", | ||||||
|  |       "startedAt": "2025-02-04T10:15:21.275640274Z", | ||||||
|  |       "finishedAt": "2025-02-04T10:15:21.420468164Z" | ||||||
|  |     }, | ||||||
|  |     { | ||||||
|  |       "uid": 0, | ||||||
|  |       "progress": null, | ||||||
|  |       "details": {}, | ||||||
|  |       "stats": { | ||||||
|  |         "totalNbTasks": 1, | ||||||
|  |         "status": { | ||||||
|  |           "succeeded": 1 | ||||||
|  |         }, | ||||||
|  |         "types": { | ||||||
|  |           "indexCreation": 1 | ||||||
|  |         }, | ||||||
|  |         "indexUids": { | ||||||
|  |           "kefir": 1 | ||||||
|  |         } | ||||||
|  |       }, | ||||||
|  |       "duration": "PT0.032902186S", | ||||||
|  |       "startedAt": "2025-02-04T10:14:43.559526162Z", | ||||||
|  |       "finishedAt": "2025-02-04T10:14:43.592428348Z" | ||||||
|  |     } | ||||||
|  |   ], | ||||||
|  |   "total": 3, | ||||||
|  |   "limit": 20, | ||||||
|  |   "from": 2, | ||||||
|  |   "next": null | ||||||
|  | } | ||||||
| @@ -0,0 +1,78 @@ | |||||||
|  | --- | ||||||
|  | source: crates/meilisearch/tests/dumps/mod.rs | ||||||
|  | snapshot_kind: text | ||||||
|  | --- | ||||||
|  | { | ||||||
|  |   "results": [ | ||||||
|  |     { | ||||||
|  |       "uid": 3, | ||||||
|  |       "batchUid": null, | ||||||
|  |       "indexUid": null, | ||||||
|  |       "status": "succeeded", | ||||||
|  |       "type": "dumpCreation", | ||||||
|  |       "canceledBy": null, | ||||||
|  |       "details": { | ||||||
|  |         "dumpUid": null | ||||||
|  |       }, | ||||||
|  |       "error": null, | ||||||
|  |       "duration": "PT0.000629059S", | ||||||
|  |       "enqueuedAt": "2025-02-04T10:22:31.318175268Z", | ||||||
|  |       "startedAt": "2025-02-04T10:22:31.331701375Z", | ||||||
|  |       "finishedAt": "2025-02-04T10:22:31.332330434Z" | ||||||
|  |     }, | ||||||
|  |     { | ||||||
|  |       "uid": 2, | ||||||
|  |       "batchUid": 2, | ||||||
|  |       "indexUid": "kefir", | ||||||
|  |       "status": "succeeded", | ||||||
|  |       "type": "documentAdditionOrUpdate", | ||||||
|  |       "canceledBy": null, | ||||||
|  |       "details": { | ||||||
|  |         "receivedDocuments": 1, | ||||||
|  |         "indexedDocuments": 1 | ||||||
|  |       }, | ||||||
|  |       "error": null, | ||||||
|  |       "duration": "[date]", | ||||||
|  |       "enqueuedAt": "2025-02-04T10:15:49.212484063Z", | ||||||
|  |       "startedAt": "[date]", | ||||||
|  |       "finishedAt": "[date]" | ||||||
|  |     }, | ||||||
|  |     { | ||||||
|  |       "uid": 1, | ||||||
|  |       "batchUid": null, | ||||||
|  |       "indexUid": "kefir", | ||||||
|  |       "status": "succeeded", | ||||||
|  |       "type": "documentAdditionOrUpdate", | ||||||
|  |       "canceledBy": null, | ||||||
|  |       "details": { | ||||||
|  |         "receivedDocuments": 1, | ||||||
|  |         "indexedDocuments": 1 | ||||||
|  |       }, | ||||||
|  |       "error": null, | ||||||
|  |       "duration": "PT0.144827890S", | ||||||
|  |       "enqueuedAt": "2025-02-04T10:15:21.258630973Z", | ||||||
|  |       "startedAt": "2025-02-04T10:15:21.275640274Z", | ||||||
|  |       "finishedAt": "2025-02-04T10:15:21.420468164Z" | ||||||
|  |     }, | ||||||
|  |     { | ||||||
|  |       "uid": 0, | ||||||
|  |       "batchUid": null, | ||||||
|  |       "indexUid": "kefir", | ||||||
|  |       "status": "succeeded", | ||||||
|  |       "type": "indexCreation", | ||||||
|  |       "canceledBy": null, | ||||||
|  |       "details": { | ||||||
|  |         "primaryKey": null | ||||||
|  |       }, | ||||||
|  |       "error": null, | ||||||
|  |       "duration": "PT0.032902186S", | ||||||
|  |       "enqueuedAt": "2025-02-04T10:14:43.550379968Z", | ||||||
|  |       "startedAt": "2025-02-04T10:14:43.559526162Z", | ||||||
|  |       "finishedAt": "2025-02-04T10:14:43.592428348Z" | ||||||
|  |     } | ||||||
|  |   ], | ||||||
|  |   "total": 4, | ||||||
|  |   "limit": 20, | ||||||
|  |   "from": 3, | ||||||
|  |   "next": null | ||||||
|  | } | ||||||
| @@ -21,7 +21,9 @@ async fn experimental_features() { | |||||||
|       "metrics": false, |       "metrics": false, | ||||||
|       "logsRoute": false, |       "logsRoute": false, | ||||||
|       "editDocumentsByFunction": false, |       "editDocumentsByFunction": false, | ||||||
|       "containsFilter": false |       "containsFilter": false, | ||||||
|  |       "network": false, | ||||||
|  |       "getTaskDocumentsRoute": false | ||||||
|     } |     } | ||||||
|     "###); |     "###); | ||||||
|  |  | ||||||
| @@ -33,7 +35,9 @@ async fn experimental_features() { | |||||||
|       "metrics": true, |       "metrics": true, | ||||||
|       "logsRoute": false, |       "logsRoute": false, | ||||||
|       "editDocumentsByFunction": false, |       "editDocumentsByFunction": false, | ||||||
|       "containsFilter": false |       "containsFilter": false, | ||||||
|  |       "network": false, | ||||||
|  |       "getTaskDocumentsRoute": false | ||||||
|     } |     } | ||||||
|     "###); |     "###); | ||||||
|  |  | ||||||
| @@ -45,7 +49,9 @@ async fn experimental_features() { | |||||||
|       "metrics": true, |       "metrics": true, | ||||||
|       "logsRoute": false, |       "logsRoute": false, | ||||||
|       "editDocumentsByFunction": false, |       "editDocumentsByFunction": false, | ||||||
|       "containsFilter": false |       "containsFilter": false, | ||||||
|  |       "network": false, | ||||||
|  |       "getTaskDocumentsRoute": false | ||||||
|     } |     } | ||||||
|     "###); |     "###); | ||||||
|  |  | ||||||
| @@ -58,7 +64,9 @@ async fn experimental_features() { | |||||||
|       "metrics": true, |       "metrics": true, | ||||||
|       "logsRoute": false, |       "logsRoute": false, | ||||||
|       "editDocumentsByFunction": false, |       "editDocumentsByFunction": false, | ||||||
|       "containsFilter": false |       "containsFilter": false, | ||||||
|  |       "network": false, | ||||||
|  |       "getTaskDocumentsRoute": false | ||||||
|     } |     } | ||||||
|     "###); |     "###); | ||||||
|  |  | ||||||
| @@ -71,7 +79,9 @@ async fn experimental_features() { | |||||||
|       "metrics": true, |       "metrics": true, | ||||||
|       "logsRoute": false, |       "logsRoute": false, | ||||||
|       "editDocumentsByFunction": false, |       "editDocumentsByFunction": false, | ||||||
|       "containsFilter": false |       "containsFilter": false, | ||||||
|  |       "network": false, | ||||||
|  |       "getTaskDocumentsRoute": false | ||||||
|     } |     } | ||||||
|     "###); |     "###); | ||||||
| } | } | ||||||
| @@ -91,7 +101,9 @@ async fn experimental_feature_metrics() { | |||||||
|       "metrics": true, |       "metrics": true, | ||||||
|       "logsRoute": false, |       "logsRoute": false, | ||||||
|       "editDocumentsByFunction": false, |       "editDocumentsByFunction": false, | ||||||
|       "containsFilter": false |       "containsFilter": false, | ||||||
|  |       "network": false, | ||||||
|  |       "getTaskDocumentsRoute": false | ||||||
|     } |     } | ||||||
|     "###); |     "###); | ||||||
|  |  | ||||||
| @@ -146,7 +158,7 @@ async fn errors() { | |||||||
|     meili_snap::snapshot!(code, @"400 Bad Request"); |     meili_snap::snapshot!(code, @"400 Bad Request"); | ||||||
|     meili_snap::snapshot!(meili_snap::json_string!(response), @r###" |     meili_snap::snapshot!(meili_snap::json_string!(response), @r###" | ||||||
|     { |     { | ||||||
|       "message": "Unknown field `NotAFeature`: expected one of `metrics`, `logsRoute`, `editDocumentsByFunction`, `containsFilter`", |       "message": "Unknown field `NotAFeature`: expected one of `metrics`, `logsRoute`, `editDocumentsByFunction`, `containsFilter`, `network`, `getTaskDocumentsRoute`", | ||||||
|       "code": "bad_request", |       "code": "bad_request", | ||||||
|       "type": "invalid_request", |       "type": "invalid_request", | ||||||
|       "link": "https://docs.meilisearch.com/errors#bad_request" |       "link": "https://docs.meilisearch.com/errors#bad_request" | ||||||
|   | |||||||
| @@ -7,6 +7,7 @@ mod dumps; | |||||||
| mod features; | mod features; | ||||||
| mod index; | mod index; | ||||||
| mod logs; | mod logs; | ||||||
|  | mod network; | ||||||
| mod search; | mod search; | ||||||
| mod settings; | mod settings; | ||||||
| mod similar; | mod similar; | ||||||
|   | |||||||
							
								
								
									
										606
									
								
								crates/meilisearch/tests/network/mod.rs
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										606
									
								
								crates/meilisearch/tests/network/mod.rs
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,606 @@ | |||||||
|  | use serde_json::Value::Null; | ||||||
|  |  | ||||||
|  | use crate::common::Server; | ||||||
|  | use crate::json; | ||||||
|  |  | ||||||
|  | #[actix_rt::test] | ||||||
|  | async fn error_network_not_enabled() { | ||||||
|  |     let server = Server::new().await; | ||||||
|  |  | ||||||
|  |     let (response, code) = server.get_network().await; | ||||||
|  |  | ||||||
|  |     meili_snap::snapshot!(code, @"400 Bad Request"); | ||||||
|  |     meili_snap::snapshot!(meili_snap::json_string!(response), @r###" | ||||||
|  |     { | ||||||
|  |       "message": "Using the /network route requires enabling the `network` experimental feature. See https://github.com/orgs/meilisearch/discussions/805", | ||||||
|  |       "code": "feature_not_enabled", | ||||||
|  |       "type": "invalid_request", | ||||||
|  |       "link": "https://docs.meilisearch.com/errors#feature_not_enabled" | ||||||
|  |     } | ||||||
|  |     "###); | ||||||
|  |  | ||||||
|  |     let (response, code) = server.set_network(json!({"self": "myself"})).await; | ||||||
|  |  | ||||||
|  |     meili_snap::snapshot!(code, @"400 Bad Request"); | ||||||
|  |     meili_snap::snapshot!(meili_snap::json_string!(response), @r###" | ||||||
|  |     { | ||||||
|  |       "message": "Using the /network route requires enabling the `network` experimental feature. See https://github.com/orgs/meilisearch/discussions/805", | ||||||
|  |       "code": "feature_not_enabled", | ||||||
|  |       "type": "invalid_request", | ||||||
|  |       "link": "https://docs.meilisearch.com/errors#feature_not_enabled" | ||||||
|  |     } | ||||||
|  |     "###); | ||||||
|  | } | ||||||
|  |  | ||||||
|  | #[actix_rt::test] | ||||||
|  | async fn errors_on_param() { | ||||||
|  |     let server = Server::new().await; | ||||||
|  |  | ||||||
|  |     let (response, code) = server.set_features(json!({"network": true})).await; | ||||||
|  |     meili_snap::snapshot!(code, @"200 OK"); | ||||||
|  |     meili_snap::snapshot!(meili_snap::json_string!(response["network"]), @r#"true"#); | ||||||
|  |  | ||||||
|  |     // non-existing param | ||||||
|  |     let (response, code) = server.set_network(json!({"selfie": "myself"})).await; | ||||||
|  |  | ||||||
|  |     meili_snap::snapshot!(code, @"400 Bad Request"); | ||||||
|  |     meili_snap::snapshot!(meili_snap::json_string!(response), @r###" | ||||||
|  |     { | ||||||
|  |       "message": "Unknown field `selfie`: expected one of `remotes`, `self`", | ||||||
|  |       "code": "bad_request", | ||||||
|  |       "type": "invalid_request", | ||||||
|  |       "link": "https://docs.meilisearch.com/errors#bad_request" | ||||||
|  |     } | ||||||
|  |     "###); | ||||||
|  |  | ||||||
|  |     // self not a string | ||||||
|  |     let (response, code) = server.set_network(json!({"self": 42})).await; | ||||||
|  |  | ||||||
|  |     meili_snap::snapshot!(code, @"400 Bad Request"); | ||||||
|  |     meili_snap::snapshot!(meili_snap::json_string!(response), @r###" | ||||||
|  |     { | ||||||
|  |       "message": "Invalid value type at `.self`: expected a string, but found a positive integer: `42`", | ||||||
|  |       "code": "invalid_network_self", | ||||||
|  |       "type": "invalid_request", | ||||||
|  |       "link": "https://docs.meilisearch.com/errors#invalid_network_self" | ||||||
|  |     } | ||||||
|  |     "###); | ||||||
|  |  | ||||||
|  |     // remotes not an object | ||||||
|  |     let (response, code) = server.set_network(json!({"remotes": 42})).await; | ||||||
|  |  | ||||||
|  |     meili_snap::snapshot!(code, @"400 Bad Request"); | ||||||
|  |     meili_snap::snapshot!(meili_snap::json_string!(response), @r###" | ||||||
|  |     { | ||||||
|  |       "message": "Invalid value type at `.remotes`: expected an object, but found a positive integer: `42`", | ||||||
|  |       "code": "invalid_network_remotes", | ||||||
|  |       "type": "invalid_request", | ||||||
|  |       "link": "https://docs.meilisearch.com/errors#invalid_network_remotes" | ||||||
|  |     } | ||||||
|  |     "###); | ||||||
|  |  | ||||||
|  |     // new remote without url | ||||||
|  |     let (response, code) = server | ||||||
|  |         .set_network(json!({"remotes": { | ||||||
|  |             "new": { | ||||||
|  |                 "searchApiKey": "http://localhost:7700" | ||||||
|  |             } | ||||||
|  |         }})) | ||||||
|  |         .await; | ||||||
|  |  | ||||||
|  |     meili_snap::snapshot!(code, @"400 Bad Request"); | ||||||
|  |     meili_snap::snapshot!(meili_snap::json_string!(response), @r###" | ||||||
|  |     { | ||||||
|  |       "message": "Missing field `.remotes.new.url`", | ||||||
|  |       "code": "missing_network_url", | ||||||
|  |       "type": "invalid_request", | ||||||
|  |       "link": "https://docs.meilisearch.com/errors#missing_network_url" | ||||||
|  |     } | ||||||
|  |     "###); | ||||||
|  |  | ||||||
|  |     // remote with url not a string | ||||||
|  |     let (response, code) = server | ||||||
|  |         .set_network(json!({"remotes": { | ||||||
|  |             "new": { | ||||||
|  |                 "url": 7700 | ||||||
|  |             } | ||||||
|  |         }})) | ||||||
|  |         .await; | ||||||
|  |  | ||||||
|  |     meili_snap::snapshot!(code, @"400 Bad Request"); | ||||||
|  |     meili_snap::snapshot!(meili_snap::json_string!(response), @r###" | ||||||
|  |     { | ||||||
|  |       "message": "Invalid value type at `.remotes.new.url`: expected a string, but found a positive integer: `7700`", | ||||||
|  |       "code": "invalid_network_url", | ||||||
|  |       "type": "invalid_request", | ||||||
|  |       "link": "https://docs.meilisearch.com/errors#invalid_network_url" | ||||||
|  |     } | ||||||
|  |     "###); | ||||||
|  |  | ||||||
|  |     // remote with non-existing param | ||||||
|  |     let (response, code) = server | ||||||
|  |         .set_network(json!({"remotes": { | ||||||
|  |             "new": { | ||||||
|  |                 "url": "http://localhost:7700", | ||||||
|  |                 "doggo": "Intel the Beagle" | ||||||
|  |             } | ||||||
|  |         }})) | ||||||
|  |         .await; | ||||||
|  |  | ||||||
|  |     meili_snap::snapshot!(code, @"400 Bad Request"); | ||||||
|  |     meili_snap::snapshot!(meili_snap::json_string!(response), @r###" | ||||||
|  |     { | ||||||
|  |       "message": "Unknown field `doggo` inside `.remotes.new`: expected one of `url`, `searchApiKey`", | ||||||
|  |       "code": "invalid_network_remotes", | ||||||
|  |       "type": "invalid_request", | ||||||
|  |       "link": "https://docs.meilisearch.com/errors#invalid_network_remotes" | ||||||
|  |     } | ||||||
|  |     "###); | ||||||
|  |  | ||||||
|  |     // remote with non-string searchApiKey | ||||||
|  |     let (response, code) = server | ||||||
|  |         .set_network(json!({"remotes": { | ||||||
|  |             "new": { | ||||||
|  |                 "url": "http://localhost:7700", | ||||||
|  |                 "searchApiKey": 1204664602099962445u64, | ||||||
|  |             } | ||||||
|  |         }})) | ||||||
|  |         .await; | ||||||
|  |  | ||||||
|  |     meili_snap::snapshot!(code, @"400 Bad Request"); | ||||||
|  |     meili_snap::snapshot!(meili_snap::json_string!(response), @r###" | ||||||
|  |     { | ||||||
|  |       "message": "Invalid value type at `.remotes.new.searchApiKey`: expected a string, but found a positive integer: `1204664602099962445`", | ||||||
|  |       "code": "invalid_network_search_api_key", | ||||||
|  |       "type": "invalid_request", | ||||||
|  |       "link": "https://docs.meilisearch.com/errors#invalid_network_search_api_key" | ||||||
|  |     } | ||||||
|  |     "###); | ||||||
|  |  | ||||||
|  |     // setting `null` on URL a posteriori | ||||||
|  |     let (response, code) = server | ||||||
|  |         .set_network(json!({"remotes": { | ||||||
|  |             "kefir": { | ||||||
|  |                 "url": "http://localhost:7700", | ||||||
|  |             } | ||||||
|  |         }})) | ||||||
|  |         .await; | ||||||
|  |  | ||||||
|  |     meili_snap::snapshot!(code, @"200 OK"); | ||||||
|  |     meili_snap::snapshot!(meili_snap::json_string!(response), @r###" | ||||||
|  |     { | ||||||
|  |       "self": null, | ||||||
|  |       "remotes": { | ||||||
|  |         "kefir": { | ||||||
|  |           "url": "http://localhost:7700", | ||||||
|  |           "searchApiKey": null | ||||||
|  |         } | ||||||
|  |       } | ||||||
|  |     } | ||||||
|  |     "###); | ||||||
|  |     let (response, code) = server | ||||||
|  |         .set_network(json!({"remotes": { | ||||||
|  |             "kefir": { | ||||||
|  |                 "url": Null, | ||||||
|  |             } | ||||||
|  |         }})) | ||||||
|  |         .await; | ||||||
|  |  | ||||||
|  |     meili_snap::snapshot!(code, @"400 Bad Request"); | ||||||
|  |     meili_snap::snapshot!(meili_snap::json_string!(response), @r###" | ||||||
|  |     { | ||||||
|  |       "message": "Field `.remotes.kefir.url` cannot be set to `null`", | ||||||
|  |       "code": "invalid_network_url", | ||||||
|  |       "type": "invalid_request", | ||||||
|  |       "link": "https://docs.meilisearch.com/errors#invalid_network_url" | ||||||
|  |     } | ||||||
|  |     "###); | ||||||
|  | } | ||||||
|  |  | ||||||
|  | #[actix_rt::test] | ||||||
|  | async fn auth() { | ||||||
|  |     let mut server = Server::new_auth().await; | ||||||
|  |     server.use_api_key("MASTER_KEY"); | ||||||
|  |  | ||||||
|  |     let (response, code) = server.set_features(json!({"network": true})).await; | ||||||
|  |     meili_snap::snapshot!(code, @"200 OK"); | ||||||
|  |     meili_snap::snapshot!(meili_snap::json_string!(response["network"]), @r#"true"#); | ||||||
|  |  | ||||||
|  |     let (get_network_key, code) = server | ||||||
|  |         .add_api_key(json!({ | ||||||
|  |           "actions": ["network.get"], | ||||||
|  |           "indexes": ["*"], | ||||||
|  |           "expiresAt": serde_json::Value::Null | ||||||
|  |         })) | ||||||
|  |         .await; | ||||||
|  |     meili_snap::snapshot!(code, @"201 Created"); | ||||||
|  |     let get_network_key = get_network_key["key"].clone(); | ||||||
|  |  | ||||||
|  |     let (update_network_key, code) = server | ||||||
|  |         .add_api_key(json!({ | ||||||
|  |           "actions": ["network.update"], | ||||||
|  |           "indexes": ["*"], | ||||||
|  |           "expiresAt": serde_json::Value::Null | ||||||
|  |         })) | ||||||
|  |         .await; | ||||||
|  |     meili_snap::snapshot!(code, @"201 Created"); | ||||||
|  |     let update_network_key = update_network_key["key"].clone(); | ||||||
|  |  | ||||||
|  |     let (search_api_key, code) = server | ||||||
|  |         .add_api_key(json!({ | ||||||
|  |           "actions": ["search"], | ||||||
|  |           "indexes": ["*"], | ||||||
|  |           "expiresAt": serde_json::Value::Null | ||||||
|  |         })) | ||||||
|  |         .await; | ||||||
|  |     meili_snap::snapshot!(code, @"201 Created"); | ||||||
|  |     let search_api_key = search_api_key["key"].clone(); | ||||||
|  |  | ||||||
|  |     // try with master key | ||||||
|  |     let (response, code) = server | ||||||
|  |         .set_network(json!({ | ||||||
|  |           "self": "master" | ||||||
|  |         })) | ||||||
|  |         .await; | ||||||
|  |  | ||||||
|  |     meili_snap::snapshot!(code, @"200 OK"); | ||||||
|  |     meili_snap::snapshot!(meili_snap::json_string!(response), @r###" | ||||||
|  |     { | ||||||
|  |       "self": "master", | ||||||
|  |       "remotes": {} | ||||||
|  |     } | ||||||
|  |     "###); | ||||||
|  |  | ||||||
|  |     let (response, code) = server.get_network().await; | ||||||
|  |  | ||||||
|  |     meili_snap::snapshot!(code, @"200 OK"); | ||||||
|  |     meili_snap::snapshot!(meili_snap::json_string!(response), @r###" | ||||||
|  | { | ||||||
|  |   "self": "master", | ||||||
|  |   "remotes": {} | ||||||
|  | } | ||||||
|  | "###); | ||||||
|  |  | ||||||
|  |     // try get with get permission | ||||||
|  |     server.use_api_key(get_network_key.as_str().unwrap()); | ||||||
|  |     let (response, code) = server.get_network().await; | ||||||
|  |  | ||||||
|  |     meili_snap::snapshot!(code, @"200 OK"); | ||||||
|  |     meili_snap::snapshot!(meili_snap::json_string!(response), @r###" | ||||||
|  | { | ||||||
|  |   "self": "master", | ||||||
|  |   "remotes": {} | ||||||
|  | } | ||||||
|  | "###); | ||||||
|  |  | ||||||
|  |     // try update with update permission | ||||||
|  |     server.use_api_key(update_network_key.as_str().unwrap()); | ||||||
|  |  | ||||||
|  |     let (response, code) = server | ||||||
|  |         .set_network(json!({ | ||||||
|  |           "self": "api_key" | ||||||
|  |         })) | ||||||
|  |         .await; | ||||||
|  |  | ||||||
|  |     meili_snap::snapshot!(code, @"200 OK"); | ||||||
|  |     meili_snap::snapshot!(meili_snap::json_string!(response), @r###" | ||||||
|  | { | ||||||
|  |   "self": "api_key", | ||||||
|  |   "remotes": {} | ||||||
|  | } | ||||||
|  | "###); | ||||||
|  |  | ||||||
|  |     // try with the other's permission | ||||||
|  |     let (response, code) = server.get_network().await; | ||||||
|  |  | ||||||
|  |     meili_snap::snapshot!(code, @"403 Forbidden"); | ||||||
|  |     meili_snap::snapshot!(meili_snap::json_string!(response), @r###" | ||||||
|  |     { | ||||||
|  |       "message": "The provided API key is invalid.", | ||||||
|  |       "code": "invalid_api_key", | ||||||
|  |       "type": "auth", | ||||||
|  |       "link": "https://docs.meilisearch.com/errors#invalid_api_key" | ||||||
|  |     } | ||||||
|  |     "###); | ||||||
|  |  | ||||||
|  |     server.use_api_key(get_network_key.as_str().unwrap()); | ||||||
|  |     let (response, code) = server | ||||||
|  |         .set_network(json!({ | ||||||
|  |           "self": "get_api_key" | ||||||
|  |         })) | ||||||
|  |         .await; | ||||||
|  |  | ||||||
|  |     meili_snap::snapshot!(code, @"403 Forbidden"); | ||||||
|  |     meili_snap::snapshot!(meili_snap::json_string!(response), @r###" | ||||||
|  |     { | ||||||
|  |       "message": "The provided API key is invalid.", | ||||||
|  |       "code": "invalid_api_key", | ||||||
|  |       "type": "auth", | ||||||
|  |       "link": "https://docs.meilisearch.com/errors#invalid_api_key" | ||||||
|  |     } | ||||||
|  |     "###); | ||||||
|  |     // try either with bad permission | ||||||
|  |     server.use_api_key(search_api_key.as_str().unwrap()); | ||||||
|  |     let (response, code) = server.get_network().await; | ||||||
|  |  | ||||||
|  |     meili_snap::snapshot!(code, @"403 Forbidden"); | ||||||
|  |     meili_snap::snapshot!(meili_snap::json_string!(response), @r###" | ||||||
|  |     { | ||||||
|  |       "message": "The provided API key is invalid.", | ||||||
|  |       "code": "invalid_api_key", | ||||||
|  |       "type": "auth", | ||||||
|  |       "link": "https://docs.meilisearch.com/errors#invalid_api_key" | ||||||
|  |     } | ||||||
|  |     "###); | ||||||
|  |  | ||||||
|  |     let (response, code) = server | ||||||
|  |         .set_network(json!({ | ||||||
|  |           "self": "get_api_key" | ||||||
|  |         })) | ||||||
|  |         .await; | ||||||
|  |  | ||||||
|  |     meili_snap::snapshot!(code, @"403 Forbidden"); | ||||||
|  |     meili_snap::snapshot!(meili_snap::json_string!(response), @r###" | ||||||
|  |     { | ||||||
|  |       "message": "The provided API key is invalid.", | ||||||
|  |       "code": "invalid_api_key", | ||||||
|  |       "type": "auth", | ||||||
|  |       "link": "https://docs.meilisearch.com/errors#invalid_api_key" | ||||||
|  |     } | ||||||
|  |     "###); | ||||||
|  | } | ||||||
|  |  | ||||||
|  | #[actix_rt::test] | ||||||
|  | async fn get_and_set_network() { | ||||||
|  |     let server = Server::new().await; | ||||||
|  |  | ||||||
|  |     let (response, code) = server.set_features(json!({"network": true})).await; | ||||||
|  |     meili_snap::snapshot!(code, @"200 OK"); | ||||||
|  |     meili_snap::snapshot!(meili_snap::json_string!(response["network"]), @r#"true"#); | ||||||
|  |  | ||||||
|  |     let (response, code) = server.get_network().await; | ||||||
|  |  | ||||||
|  |     meili_snap::snapshot!(code, @"200 OK"); | ||||||
|  |     meili_snap::snapshot!(meili_snap::json_string!(response), @r###" | ||||||
|  |     { | ||||||
|  |       "self": null, | ||||||
|  |       "remotes": {} | ||||||
|  |     } | ||||||
|  |     "###); | ||||||
|  |  | ||||||
|  |     // adding self | ||||||
|  |     let (response, code) = server.set_network(json!({"self": "myself"})).await; | ||||||
|  |     meili_snap::snapshot!(code, @"200 OK"); | ||||||
|  |     meili_snap::snapshot!(meili_snap::json_string!(response), @r###" | ||||||
|  |     { | ||||||
|  |       "self": "myself", | ||||||
|  |       "remotes": {} | ||||||
|  |     } | ||||||
|  |     "###); | ||||||
|  |  | ||||||
|  |     // adding remotes | ||||||
|  |     let (response, code) = server | ||||||
|  |         .set_network(json!({"remotes": { | ||||||
|  |             "myself": { | ||||||
|  |                 "url": "http://localhost:7700" | ||||||
|  |             }, | ||||||
|  |             "thy": { | ||||||
|  |                 "url": "http://localhost:7701", | ||||||
|  |                 "searchApiKey": "foo" | ||||||
|  |             } | ||||||
|  |         }})) | ||||||
|  |         .await; | ||||||
|  |  | ||||||
|  |     meili_snap::snapshot!(code, @"200 OK"); | ||||||
|  |     meili_snap::snapshot!(meili_snap::json_string!(response), @r###" | ||||||
|  |     { | ||||||
|  |       "self": "myself", | ||||||
|  |       "remotes": { | ||||||
|  |         "myself": { | ||||||
|  |           "url": "http://localhost:7700", | ||||||
|  |           "searchApiKey": null | ||||||
|  |         }, | ||||||
|  |         "thy": { | ||||||
|  |           "url": "http://localhost:7701", | ||||||
|  |           "searchApiKey": "foo" | ||||||
|  |         } | ||||||
|  |       } | ||||||
|  |     } | ||||||
|  |     "###); | ||||||
|  |  | ||||||
|  |     // partially updating one remote | ||||||
|  |     let (response, code) = server | ||||||
|  |         .set_network(json!({"remotes": { | ||||||
|  |             "thy": { | ||||||
|  |                 "searchApiKey": "bar" | ||||||
|  |             } | ||||||
|  |         }})) | ||||||
|  |         .await; | ||||||
|  |  | ||||||
|  |     meili_snap::snapshot!(code, @"200 OK"); | ||||||
|  |     meili_snap::snapshot!(meili_snap::json_string!(response), @r###" | ||||||
|  |     { | ||||||
|  |       "self": "myself", | ||||||
|  |       "remotes": { | ||||||
|  |         "myself": { | ||||||
|  |           "url": "http://localhost:7700", | ||||||
|  |           "searchApiKey": null | ||||||
|  |         }, | ||||||
|  |         "thy": { | ||||||
|  |           "url": "http://localhost:7701", | ||||||
|  |           "searchApiKey": "bar" | ||||||
|  |         } | ||||||
|  |       } | ||||||
|  |     } | ||||||
|  |     "###); | ||||||
|  |  | ||||||
|  |     // adding one remote | ||||||
|  |     let (response, code) = server | ||||||
|  |         .set_network(json!({"remotes": { | ||||||
|  |             "them": { | ||||||
|  |                 "url": "http://localhost:7702", | ||||||
|  |                 "searchApiKey": "baz" | ||||||
|  |             } | ||||||
|  |         }})) | ||||||
|  |         .await; | ||||||
|  |  | ||||||
|  |     meili_snap::snapshot!(code, @"200 OK"); | ||||||
|  |     meili_snap::snapshot!(meili_snap::json_string!(response), @r###" | ||||||
|  |     { | ||||||
|  |       "self": "myself", | ||||||
|  |       "remotes": { | ||||||
|  |         "myself": { | ||||||
|  |           "url": "http://localhost:7700", | ||||||
|  |           "searchApiKey": null | ||||||
|  |         }, | ||||||
|  |         "them": { | ||||||
|  |           "url": "http://localhost:7702", | ||||||
|  |           "searchApiKey": "baz" | ||||||
|  |         }, | ||||||
|  |         "thy": { | ||||||
|  |           "url": "http://localhost:7701", | ||||||
|  |           "searchApiKey": "bar" | ||||||
|  |         } | ||||||
|  |       } | ||||||
|  |     } | ||||||
|  |     "###); | ||||||
|  |  | ||||||
|  |     // deleting one remote | ||||||
|  |     let (response, code) = server | ||||||
|  |         .set_network(json!({"remotes": { | ||||||
|  |             "myself": Null, | ||||||
|  |         }})) | ||||||
|  |         .await; | ||||||
|  |  | ||||||
|  |     meili_snap::snapshot!(code, @"200 OK"); | ||||||
|  |     meili_snap::snapshot!(meili_snap::json_string!(response), @r###" | ||||||
|  |     { | ||||||
|  |       "self": "myself", | ||||||
|  |       "remotes": { | ||||||
|  |         "them": { | ||||||
|  |           "url": "http://localhost:7702", | ||||||
|  |           "searchApiKey": "baz" | ||||||
|  |         }, | ||||||
|  |         "thy": { | ||||||
|  |           "url": "http://localhost:7701", | ||||||
|  |           "searchApiKey": "bar" | ||||||
|  |         } | ||||||
|  |       } | ||||||
|  |     } | ||||||
|  |     "###); | ||||||
|  |  | ||||||
|  |     // removing self | ||||||
|  |     let (response, code) = server.set_network(json!({"self": Null})).await; | ||||||
|  |  | ||||||
|  |     meili_snap::snapshot!(code, @"200 OK"); | ||||||
|  |     meili_snap::snapshot!(meili_snap::json_string!(response), @r###" | ||||||
|  |     { | ||||||
|  |       "self": null, | ||||||
|  |       "remotes": { | ||||||
|  |         "them": { | ||||||
|  |           "url": "http://localhost:7702", | ||||||
|  |           "searchApiKey": "baz" | ||||||
|  |         }, | ||||||
|  |         "thy": { | ||||||
|  |           "url": "http://localhost:7701", | ||||||
|  |           "searchApiKey": "bar" | ||||||
|  |         } | ||||||
|  |       } | ||||||
|  |     } | ||||||
|  |     "###); | ||||||
|  |  | ||||||
|  |     // setting self again | ||||||
|  |     let (response, code) = server.set_network(json!({"self": "thy"})).await; | ||||||
|  |  | ||||||
|  |     meili_snap::snapshot!(code, @"200 OK"); | ||||||
|  |     meili_snap::snapshot!(meili_snap::json_string!(response), @r###" | ||||||
|  |     { | ||||||
|  |       "self": "thy", | ||||||
|  |       "remotes": { | ||||||
|  |         "them": { | ||||||
|  |           "url": "http://localhost:7702", | ||||||
|  |           "searchApiKey": "baz" | ||||||
|  |         }, | ||||||
|  |         "thy": { | ||||||
|  |           "url": "http://localhost:7701", | ||||||
|  |           "searchApiKey": "bar" | ||||||
|  |         } | ||||||
|  |       } | ||||||
|  |     } | ||||||
|  |     "###); | ||||||
|  |  | ||||||
|  |     // doing nothing | ||||||
|  |     let (response, code) = server.set_network(json!({})).await; | ||||||
|  |  | ||||||
|  |     meili_snap::snapshot!(code, @"200 OK"); | ||||||
|  |     meili_snap::snapshot!(meili_snap::json_string!(response), @r###" | ||||||
|  |         { | ||||||
|  |           "self": "thy", | ||||||
|  |           "remotes": { | ||||||
|  |             "them": { | ||||||
|  |               "url": "http://localhost:7702", | ||||||
|  |               "searchApiKey": "baz" | ||||||
|  |             }, | ||||||
|  |             "thy": { | ||||||
|  |               "url": "http://localhost:7701", | ||||||
|  |               "searchApiKey": "bar" | ||||||
|  |             } | ||||||
|  |           } | ||||||
|  |         } | ||||||
|  |         "###); | ||||||
|  |  | ||||||
|  |     // still doing nothing | ||||||
|  |     let (response, code) = server.set_network(json!({"remotes": {}})).await; | ||||||
|  |  | ||||||
|  |     meili_snap::snapshot!(code, @"200 OK"); | ||||||
|  |     meili_snap::snapshot!(meili_snap::json_string!(response), @r###" | ||||||
|  |         { | ||||||
|  |           "self": "thy", | ||||||
|  |           "remotes": { | ||||||
|  |             "them": { | ||||||
|  |               "url": "http://localhost:7702", | ||||||
|  |               "searchApiKey": "baz" | ||||||
|  |             }, | ||||||
|  |             "thy": { | ||||||
|  |               "url": "http://localhost:7701", | ||||||
|  |               "searchApiKey": "bar" | ||||||
|  |             } | ||||||
|  |           } | ||||||
|  |         } | ||||||
|  |         "###); | ||||||
|  |  | ||||||
|  |     // good time to check GET | ||||||
|  |     let (response, code) = server.get_network().await; | ||||||
|  |  | ||||||
|  |     meili_snap::snapshot!(code, @"200 OK"); | ||||||
|  |     meili_snap::snapshot!(meili_snap::json_string!(response), @r###" | ||||||
|  |         { | ||||||
|  |           "self": "thy", | ||||||
|  |           "remotes": { | ||||||
|  |             "them": { | ||||||
|  |               "url": "http://localhost:7702", | ||||||
|  |               "searchApiKey": "baz" | ||||||
|  |             }, | ||||||
|  |             "thy": { | ||||||
|  |               "url": "http://localhost:7701", | ||||||
|  |               "searchApiKey": "bar" | ||||||
|  |             } | ||||||
|  |           } | ||||||
|  |         } | ||||||
|  |         "###); | ||||||
|  |  | ||||||
|  |     // deleting everything | ||||||
|  |     let (response, code) = server | ||||||
|  |         .set_network(json!({ | ||||||
|  |             "remotes": Null, | ||||||
|  |         })) | ||||||
|  |         .await; | ||||||
|  |  | ||||||
|  |     meili_snap::snapshot!(code, @"200 OK"); | ||||||
|  |     meili_snap::snapshot!(meili_snap::json_string!(response), @r###" | ||||||
|  |     { | ||||||
|  |       "self": "thy", | ||||||
|  |       "remotes": {} | ||||||
|  |     } | ||||||
|  |     "###); | ||||||
|  | } | ||||||
| @@ -5,6 +5,8 @@ use crate::common::Server; | |||||||
| use crate::json; | use crate::json; | ||||||
| use crate::search::{SCORE_DOCUMENTS, VECTOR_DOCUMENTS}; | use crate::search::{SCORE_DOCUMENTS, VECTOR_DOCUMENTS}; | ||||||
| 
 | 
 | ||||||
|  | mod proxy; | ||||||
|  | 
 | ||||||
| #[actix_rt::test] | #[actix_rt::test] | ||||||
| async fn search_empty_list() { | async fn search_empty_list() { | ||||||
|     let server = Server::new().await; |     let server = Server::new().await; | ||||||
							
								
								
									
										2591
									
								
								crates/meilisearch/tests/search/multi/proxy.rs
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										2591
									
								
								crates/meilisearch/tests/search/multi/proxy.rs
									
									
									
									
									
										Normal file
									
								
							
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							| @@ -1,3 +1,4 @@ | |||||||
|  | use meili_snap::{json_string, snapshot}; | ||||||
| use time::format_description::well_known::Rfc3339; | use time::format_description::well_known::Rfc3339; | ||||||
| use time::OffsetDateTime; | use time::OffsetDateTime; | ||||||
|  |  | ||||||
| @@ -74,3 +75,253 @@ async fn stats() { | |||||||
|     assert_eq!(response["indexes"]["test"]["fieldDistribution"]["name"], 1); |     assert_eq!(response["indexes"]["test"]["fieldDistribution"]["name"], 1); | ||||||
|     assert_eq!(response["indexes"]["test"]["fieldDistribution"]["age"], 1); |     assert_eq!(response["indexes"]["test"]["fieldDistribution"]["age"], 1); | ||||||
| } | } | ||||||
|  |  | ||||||
|  | #[actix_rt::test] | ||||||
|  | async fn add_remove_embeddings() { | ||||||
|  |     let server = Server::new().await; | ||||||
|  |     let index = server.index("doggo"); | ||||||
|  |  | ||||||
|  |     let (response, code) = index | ||||||
|  |         .update_settings(json!({ | ||||||
|  |           "embedders": { | ||||||
|  |             "manual": { | ||||||
|  |                 "source": "userProvided", | ||||||
|  |                 "dimensions": 3, | ||||||
|  |             }, | ||||||
|  |             "handcrafted": { | ||||||
|  |                 "source": "userProvided", | ||||||
|  |                 "dimensions": 3, | ||||||
|  |             }, | ||||||
|  |  | ||||||
|  |           }, | ||||||
|  |         })) | ||||||
|  |         .await; | ||||||
|  |     snapshot!(code, @"202 Accepted"); | ||||||
|  |     server.wait_task(response.uid()).await.succeeded(); | ||||||
|  |  | ||||||
|  |     // 2 embedded documents for 5 embeddings in total | ||||||
|  |     let documents = json!([ | ||||||
|  |       {"id": 0, "name": "kefir", "_vectors": { "manual": [0, 0, 0], "handcrafted": [0, 0, 0] }}, | ||||||
|  |       {"id": 1, "name": "echo", "_vectors": { "manual": [1, 1, 1], "handcrafted": [[1, 1, 1], [2, 2, 2]] }}, | ||||||
|  |     ]); | ||||||
|  |  | ||||||
|  |     let (response, code) = index.add_documents(documents, None).await; | ||||||
|  |     snapshot!(code, @"202 Accepted"); | ||||||
|  |     index.wait_task(response.uid()).await.succeeded(); | ||||||
|  |  | ||||||
|  |     let (stats, _code) = index.stats().await; | ||||||
|  |     snapshot!(json_string!(stats), @r###" | ||||||
|  |     { | ||||||
|  |       "numberOfDocuments": 2, | ||||||
|  |       "isIndexing": false, | ||||||
|  |       "numberOfEmbeddings": 5, | ||||||
|  |       "numberOfEmbeddedDocuments": 2, | ||||||
|  |       "fieldDistribution": { | ||||||
|  |         "id": 2, | ||||||
|  |         "name": 2 | ||||||
|  |       } | ||||||
|  |     } | ||||||
|  |     "###); | ||||||
|  |  | ||||||
|  |     // 2 embedded documents for 3 embeddings in total | ||||||
|  |     let documents = json!([ | ||||||
|  |       {"id": 1, "name": "echo", "_vectors": { "manual": [1, 1, 1], "handcrafted": null }}, | ||||||
|  |     ]); | ||||||
|  |  | ||||||
|  |     let (response, code) = index.update_documents(documents, None).await; | ||||||
|  |     snapshot!(code, @"202 Accepted"); | ||||||
|  |     index.wait_task(response.uid()).await.succeeded(); | ||||||
|  |  | ||||||
|  |     let (stats, _code) = index.stats().await; | ||||||
|  |     snapshot!(json_string!(stats), @r###" | ||||||
|  |     { | ||||||
|  |       "numberOfDocuments": 2, | ||||||
|  |       "isIndexing": false, | ||||||
|  |       "numberOfEmbeddings": 3, | ||||||
|  |       "numberOfEmbeddedDocuments": 2, | ||||||
|  |       "fieldDistribution": { | ||||||
|  |         "id": 2, | ||||||
|  |         "name": 2 | ||||||
|  |       } | ||||||
|  |     } | ||||||
|  |     "###); | ||||||
|  |  | ||||||
|  |     // 2 embedded documents for 2 embeddings in total | ||||||
|  |     let documents = json!([ | ||||||
|  |         {"id": 0, "name": "kefir", "_vectors": { "manual": null, "handcrafted": [0, 0, 0] }}, | ||||||
|  |     ]); | ||||||
|  |  | ||||||
|  |     let (response, code) = index.update_documents(documents, None).await; | ||||||
|  |     snapshot!(code, @"202 Accepted"); | ||||||
|  |     index.wait_task(response.uid()).await.succeeded(); | ||||||
|  |  | ||||||
|  |     let (stats, _code) = index.stats().await; | ||||||
|  |     snapshot!(json_string!(stats), @r###" | ||||||
|  |     { | ||||||
|  |       "numberOfDocuments": 2, | ||||||
|  |       "isIndexing": false, | ||||||
|  |       "numberOfEmbeddings": 2, | ||||||
|  |       "numberOfEmbeddedDocuments": 2, | ||||||
|  |       "fieldDistribution": { | ||||||
|  |         "id": 2, | ||||||
|  |         "name": 2 | ||||||
|  |       } | ||||||
|  |     } | ||||||
|  |     "###); | ||||||
|  |  | ||||||
|  |     // 1 embedded documents for 2 embeddings in total | ||||||
|  |     let documents = json!([ | ||||||
|  |         {"id": 0, "name": "kefir", "_vectors": { "manual": [0, 0, 0], "handcrafted": [0, 0, 0] }}, | ||||||
|  |         {"id": 1, "name": "echo", "_vectors": { "manual": null, "handcrafted": null }}, | ||||||
|  |     ]); | ||||||
|  |  | ||||||
|  |     let (response, code) = index.update_documents(documents, None).await; | ||||||
|  |     snapshot!(code, @"202 Accepted"); | ||||||
|  |     index.wait_task(response.uid()).await.succeeded(); | ||||||
|  |  | ||||||
|  |     let (stats, _code) = index.stats().await; | ||||||
|  |     snapshot!(json_string!(stats), @r###" | ||||||
|  |     { | ||||||
|  |       "numberOfDocuments": 2, | ||||||
|  |       "isIndexing": false, | ||||||
|  |       "numberOfEmbeddings": 2, | ||||||
|  |       "numberOfEmbeddedDocuments": 1, | ||||||
|  |       "fieldDistribution": { | ||||||
|  |         "id": 2, | ||||||
|  |         "name": 2 | ||||||
|  |       } | ||||||
|  |     } | ||||||
|  |     "###); | ||||||
|  | } | ||||||
|  |  | ||||||
|  | #[actix_rt::test] | ||||||
|  | async fn add_remove_embedded_documents() { | ||||||
|  |     let server = Server::new().await; | ||||||
|  |     let index = server.index("doggo"); | ||||||
|  |  | ||||||
|  |     let (response, code) = index | ||||||
|  |         .update_settings(json!({ | ||||||
|  |           "embedders": { | ||||||
|  |             "manual": { | ||||||
|  |                 "source": "userProvided", | ||||||
|  |                 "dimensions": 3, | ||||||
|  |             }, | ||||||
|  |             "handcrafted": { | ||||||
|  |                 "source": "userProvided", | ||||||
|  |                 "dimensions": 3, | ||||||
|  |             }, | ||||||
|  |  | ||||||
|  |           }, | ||||||
|  |         })) | ||||||
|  |         .await; | ||||||
|  |     snapshot!(code, @"202 Accepted"); | ||||||
|  |     server.wait_task(response.uid()).await.succeeded(); | ||||||
|  |  | ||||||
|  |     // 2 embedded documents for 5 embeddings in total | ||||||
|  |     let documents = json!([ | ||||||
|  |       {"id": 0, "name": "kefir", "_vectors": { "manual": [0, 0, 0], "handcrafted": [0, 0, 0] }}, | ||||||
|  |       {"id": 1, "name": "echo", "_vectors": { "manual": [1, 1, 1], "handcrafted": [[1, 1, 1], [2, 2, 2]] }}, | ||||||
|  |     ]); | ||||||
|  |  | ||||||
|  |     let (response, code) = index.add_documents(documents, None).await; | ||||||
|  |     snapshot!(code, @"202 Accepted"); | ||||||
|  |     index.wait_task(response.uid()).await.succeeded(); | ||||||
|  |  | ||||||
|  |     let (stats, _code) = index.stats().await; | ||||||
|  |     snapshot!(json_string!(stats), @r###" | ||||||
|  |     { | ||||||
|  |       "numberOfDocuments": 2, | ||||||
|  |       "isIndexing": false, | ||||||
|  |       "numberOfEmbeddings": 5, | ||||||
|  |       "numberOfEmbeddedDocuments": 2, | ||||||
|  |       "fieldDistribution": { | ||||||
|  |         "id": 2, | ||||||
|  |         "name": 2 | ||||||
|  |       } | ||||||
|  |     } | ||||||
|  |     "###); | ||||||
|  |  | ||||||
|  |     // delete one embedded document, remaining 1 embedded documents for 3 embeddings in total | ||||||
|  |     let (response, code) = index.delete_document(0).await; | ||||||
|  |     snapshot!(code, @"202 Accepted"); | ||||||
|  |     index.wait_task(response.uid()).await.succeeded(); | ||||||
|  |  | ||||||
|  |     let (stats, _code) = index.stats().await; | ||||||
|  |     snapshot!(json_string!(stats), @r###" | ||||||
|  |     { | ||||||
|  |       "numberOfDocuments": 1, | ||||||
|  |       "isIndexing": false, | ||||||
|  |       "numberOfEmbeddings": 3, | ||||||
|  |       "numberOfEmbeddedDocuments": 1, | ||||||
|  |       "fieldDistribution": { | ||||||
|  |         "id": 1, | ||||||
|  |         "name": 1 | ||||||
|  |       } | ||||||
|  |     } | ||||||
|  |     "###); | ||||||
|  | } | ||||||
|  |  | ||||||
|  | #[actix_rt::test] | ||||||
|  | async fn update_embedder_settings() { | ||||||
|  |     let server = Server::new().await; | ||||||
|  |     let index = server.index("doggo"); | ||||||
|  |  | ||||||
|  |     // 2 embedded documents for 3 embeddings in total | ||||||
|  |     // but no embedders are added in the settings yet so we expect 0 embedded documents for 0 embeddings in total | ||||||
|  |     let documents = json!([ | ||||||
|  |       {"id": 0, "name": "kefir", "_vectors": { "manual": [0, 0, 0], "handcrafted": [0, 0, 0] }}, | ||||||
|  |       {"id": 1, "name": "echo", "_vectors": { "manual": [1, 1, 1], "handcrafted": null }}, | ||||||
|  |     ]); | ||||||
|  |  | ||||||
|  |     let (response, code) = index.add_documents(documents, None).await; | ||||||
|  |     snapshot!(code, @"202 Accepted"); | ||||||
|  |     index.wait_task(response.uid()).await.succeeded(); | ||||||
|  |  | ||||||
|  |     let (stats, _code) = index.stats().await; | ||||||
|  |     snapshot!(json_string!(stats), @r###" | ||||||
|  |     { | ||||||
|  |       "numberOfDocuments": 2, | ||||||
|  |       "isIndexing": false, | ||||||
|  |       "numberOfEmbeddings": 0, | ||||||
|  |       "numberOfEmbeddedDocuments": 0, | ||||||
|  |       "fieldDistribution": { | ||||||
|  |         "id": 2, | ||||||
|  |         "name": 2 | ||||||
|  |       } | ||||||
|  |     } | ||||||
|  |     "###); | ||||||
|  |  | ||||||
|  |     // add embedders to the settings | ||||||
|  |     // 2 embedded documents for 3 embeddings in total | ||||||
|  |     let (response, code) = index | ||||||
|  |         .update_settings(json!({ | ||||||
|  |           "embedders": { | ||||||
|  |             "manual": { | ||||||
|  |                 "source": "userProvided", | ||||||
|  |                 "dimensions": 3, | ||||||
|  |             }, | ||||||
|  |             "handcrafted": { | ||||||
|  |                 "source": "userProvided", | ||||||
|  |                 "dimensions": 3, | ||||||
|  |             }, | ||||||
|  |  | ||||||
|  |           }, | ||||||
|  |         })) | ||||||
|  |         .await; | ||||||
|  |     snapshot!(code, @"202 Accepted"); | ||||||
|  |     server.wait_task(response.uid()).await.succeeded(); | ||||||
|  |  | ||||||
|  |     let (stats, _code) = index.stats().await; | ||||||
|  |     snapshot!(json_string!(stats), @r###" | ||||||
|  |     { | ||||||
|  |       "numberOfDocuments": 2, | ||||||
|  |       "isIndexing": false, | ||||||
|  |       "numberOfEmbeddings": 3, | ||||||
|  |       "numberOfEmbeddedDocuments": 2, | ||||||
|  |       "fieldDistribution": { | ||||||
|  |         "id": 2, | ||||||
|  |         "name": 2 | ||||||
|  |       } | ||||||
|  |     } | ||||||
|  |     "###); | ||||||
|  | } | ||||||
|   | |||||||
| @@ -126,14 +126,17 @@ async fn check_the_index_scheduler(server: &Server) { | |||||||
|     "#); |     "#); | ||||||
|     // And their metadata are still right |     // And their metadata are still right | ||||||
|     let (stats, _) = server.stats().await; |     let (stats, _) = server.stats().await; | ||||||
|     snapshot!(stats, @r#" |     snapshot!(stats, @r###" | ||||||
|     { |     { | ||||||
|       "databaseSize": 438272, |       "databaseSize": 438272, | ||||||
|  |       "usedDatabaseSize": 196608, | ||||||
|       "lastUpdate": "2025-01-23T11:36:22.634859166Z", |       "lastUpdate": "2025-01-23T11:36:22.634859166Z", | ||||||
|       "indexes": { |       "indexes": { | ||||||
|         "kefir": { |         "kefir": { | ||||||
|           "numberOfDocuments": 1, |           "numberOfDocuments": 1, | ||||||
|           "isIndexing": false, |           "isIndexing": false, | ||||||
|  |           "numberOfEmbeddings": 0, | ||||||
|  |           "numberOfEmbeddedDocuments": 0, | ||||||
|           "fieldDistribution": { |           "fieldDistribution": { | ||||||
|             "age": 1, |             "age": 1, | ||||||
|             "description": 1, |             "description": 1, | ||||||
| @@ -144,7 +147,7 @@ async fn check_the_index_scheduler(server: &Server) { | |||||||
|         } |         } | ||||||
|       } |       } | ||||||
|     } |     } | ||||||
|     "#); |     "###); | ||||||
|  |  | ||||||
|     // Wait until the upgrade has been applied to all indexes to avoid flakyness |     // Wait until the upgrade has been applied to all indexes to avoid flakyness | ||||||
|     let (tasks, _) = server.tasks_filter("types=upgradeDatabase&limit=1").await; |     let (tasks, _) = server.tasks_filter("types=upgradeDatabase&limit=1").await; | ||||||
| @@ -205,14 +208,17 @@ async fn check_the_index_scheduler(server: &Server) { | |||||||
|     snapshot!(json_string!(batches, { ".results[0].duration" => "[duration]", ".results[0].enqueuedAt" => "[date]", ".results[0].startedAt" => "[date]", ".results[0].finishedAt" => "[date]" }), name: "batches_filter_afterFinishedAt_equal_2025-01-16T16_47_41"); |     snapshot!(json_string!(batches, { ".results[0].duration" => "[duration]", ".results[0].enqueuedAt" => "[date]", ".results[0].startedAt" => "[date]", ".results[0].finishedAt" => "[date]" }), name: "batches_filter_afterFinishedAt_equal_2025-01-16T16_47_41"); | ||||||
|  |  | ||||||
|     let (stats, _) = server.stats().await; |     let (stats, _) = server.stats().await; | ||||||
|     snapshot!(stats, @r#" |     snapshot!(stats, @r###" | ||||||
|     { |     { | ||||||
|       "databaseSize": 438272, |       "databaseSize": 438272, | ||||||
|  |       "usedDatabaseSize": 196608, | ||||||
|       "lastUpdate": "2025-01-23T11:36:22.634859166Z", |       "lastUpdate": "2025-01-23T11:36:22.634859166Z", | ||||||
|       "indexes": { |       "indexes": { | ||||||
|         "kefir": { |         "kefir": { | ||||||
|           "numberOfDocuments": 1, |           "numberOfDocuments": 1, | ||||||
|           "isIndexing": false, |           "isIndexing": false, | ||||||
|  |           "numberOfEmbeddings": 0, | ||||||
|  |           "numberOfEmbeddedDocuments": 0, | ||||||
|           "fieldDistribution": { |           "fieldDistribution": { | ||||||
|             "age": 1, |             "age": 1, | ||||||
|             "description": 1, |             "description": 1, | ||||||
| @@ -223,13 +229,15 @@ async fn check_the_index_scheduler(server: &Server) { | |||||||
|         } |         } | ||||||
|       } |       } | ||||||
|     } |     } | ||||||
|     "#); |     "###); | ||||||
|     let index = server.index("kefir"); |     let index = server.index("kefir"); | ||||||
|     let (stats, _) = index.stats().await; |     let (stats, _) = index.stats().await; | ||||||
|     snapshot!(stats, @r#" |     snapshot!(stats, @r###" | ||||||
|     { |     { | ||||||
|       "numberOfDocuments": 1, |       "numberOfDocuments": 1, | ||||||
|       "isIndexing": false, |       "isIndexing": false, | ||||||
|  |       "numberOfEmbeddings": 0, | ||||||
|  |       "numberOfEmbeddedDocuments": 0, | ||||||
|       "fieldDistribution": { |       "fieldDistribution": { | ||||||
|         "age": 1, |         "age": 1, | ||||||
|         "description": 1, |         "description": 1, | ||||||
| @@ -238,7 +246,7 @@ async fn check_the_index_scheduler(server: &Server) { | |||||||
|         "surname": 1 |         "surname": 1 | ||||||
|       } |       } | ||||||
|     } |     } | ||||||
|     "#); |     "###); | ||||||
|  |  | ||||||
|     // Delete all the tasks of a specific batch |     // Delete all the tasks of a specific batch | ||||||
|     let (task, _) = server.delete_tasks("batchUids=10").await; |     let (task, _) = server.delete_tasks("batchUids=10").await; | ||||||
|   | |||||||
| @@ -32,7 +32,7 @@ async fn field_unavailable_for_source() { | |||||||
|     snapshot!(code, @"400 Bad Request"); |     snapshot!(code, @"400 Bad Request"); | ||||||
|     snapshot!(response, @r###" |     snapshot!(response, @r###" | ||||||
|     { |     { | ||||||
|       "message": "`.embedders.default`: Field `revision` unavailable for source `openAi` (only available for sources: `huggingFace`). Available fields: `source`, `model`, `apiKey`, `documentTemplate`, `dimensions`, `distribution`, `url`, `binaryQuantized`", |       "message": "`.embedders.default`: Field `revision` unavailable for source `openAi` (only available for sources: `huggingFace`). Available fields: `source`, `model`, `apiKey`, `documentTemplate`, `documentTemplateMaxBytes`, `dimensions`, `distribution`, `url`, `binaryQuantized`", | ||||||
|       "code": "invalid_settings_embedders", |       "code": "invalid_settings_embedders", | ||||||
|       "type": "invalid_request", |       "type": "invalid_request", | ||||||
|       "link": "https://docs.meilisearch.com/errors#invalid_settings_embedders" |       "link": "https://docs.meilisearch.com/errors#invalid_settings_embedders" | ||||||
|   | |||||||
| @@ -1,19 +1,26 @@ | |||||||
| use std::fs::{read_dir, read_to_string, remove_file, File}; | use std::fs::{read_dir, read_to_string, remove_file, File}; | ||||||
| use std::io::BufWriter; | use std::io::{BufWriter, Write as _}; | ||||||
| use std::path::PathBuf; | use std::path::PathBuf; | ||||||
|  | use std::time::Instant; | ||||||
|  |  | ||||||
| use anyhow::Context; | use anyhow::{bail, Context}; | ||||||
| use clap::{Parser, Subcommand}; | use clap::{Parser, Subcommand, ValueEnum}; | ||||||
| use dump::{DumpWriter, IndexMetadata}; | use dump::{DumpWriter, IndexMetadata}; | ||||||
| use file_store::FileStore; | use file_store::FileStore; | ||||||
| use meilisearch_auth::AuthController; | use meilisearch_auth::AuthController; | ||||||
| use meilisearch_types::heed::types::{SerdeJson, Str}; | use meilisearch_types::batches::Batch; | ||||||
| use meilisearch_types::heed::{Database, Env, EnvOpenOptions, RoTxn, RwTxn, Unspecified}; | use meilisearch_types::heed::types::{Bytes, SerdeJson, Str}; | ||||||
|  | use meilisearch_types::heed::{ | ||||||
|  |     CompactionOption, Database, Env, EnvOpenOptions, RoTxn, RwTxn, Unspecified, | ||||||
|  | }; | ||||||
|  | use meilisearch_types::milli::constants::RESERVED_VECTORS_FIELD_NAME; | ||||||
| use meilisearch_types::milli::documents::{obkv_to_object, DocumentsBatchReader}; | use meilisearch_types::milli::documents::{obkv_to_object, DocumentsBatchReader}; | ||||||
|  | use meilisearch_types::milli::vector::parsed_vectors::{ExplicitVectors, VectorOrArrayOfVectors}; | ||||||
| use meilisearch_types::milli::{obkv_to_json, BEU32}; | use meilisearch_types::milli::{obkv_to_json, BEU32}; | ||||||
| use meilisearch_types::tasks::{Status, Task}; | use meilisearch_types::tasks::{Status, Task}; | ||||||
| use meilisearch_types::versioning::{get_version, parse_version}; | use meilisearch_types::versioning::{get_version, parse_version}; | ||||||
| use meilisearch_types::Index; | use meilisearch_types::Index; | ||||||
|  | use serde_json::Value::Object; | ||||||
| use time::macros::format_description; | use time::macros::format_description; | ||||||
| use time::OffsetDateTime; | use time::OffsetDateTime; | ||||||
| use upgrade::OfflineUpgrade; | use upgrade::OfflineUpgrade; | ||||||
| @@ -65,6 +72,24 @@ enum Command { | |||||||
|         skip_enqueued_tasks: bool, |         skip_enqueued_tasks: bool, | ||||||
|     }, |     }, | ||||||
|  |  | ||||||
|  |     /// Exports the documents of an index in NDJSON format from a Meilisearch index to stdout. | ||||||
|  |     /// | ||||||
|  |     /// This command can be executed on a running Meilisearch database. However, please note that | ||||||
|  |     /// it will maintain a read-only transaction for the duration of the extraction process. | ||||||
|  |     ExportDocuments { | ||||||
|  |         /// The index name to export the documents from. | ||||||
|  |         #[arg(long)] | ||||||
|  |         index_name: String, | ||||||
|  |  | ||||||
|  |         /// Do not export vectors with the documents. | ||||||
|  |         #[arg(long)] | ||||||
|  |         ignore_vectors: bool, | ||||||
|  |  | ||||||
|  |         /// The number of documents to skip. | ||||||
|  |         #[arg(long)] | ||||||
|  |         offset: Option<usize>, | ||||||
|  |     }, | ||||||
|  |  | ||||||
|     /// Attempts to upgrade from one major version to the next without a dump. |     /// Attempts to upgrade from one major version to the next without a dump. | ||||||
|     /// |     /// | ||||||
|     /// Make sure to run this commmand when Meilisearch is not running! |     /// Make sure to run this commmand when Meilisearch is not running! | ||||||
| @@ -78,6 +103,46 @@ enum Command { | |||||||
|         #[arg(long)] |         #[arg(long)] | ||||||
|         target_version: String, |         target_version: String, | ||||||
|     }, |     }, | ||||||
|  |  | ||||||
|  |     /// Compact the index by using LMDB. | ||||||
|  |     /// | ||||||
|  |     /// You must run this command while Meilisearch is off. The reason is that Meilisearch keep the | ||||||
|  |     /// indexes opened and this compaction operation writes into another file. Meilisearch will not | ||||||
|  |     /// switch to the new file. | ||||||
|  |     /// | ||||||
|  |     /// **Another possibility** is to keep Meilisearch running to serve search requests, run the | ||||||
|  |     /// compaction and once done, close and immediately reopen Meilisearch. This way Meilisearch | ||||||
|  |     /// will reopened the data.mdb file when rebooting and see the newly compacted file, ignoring | ||||||
|  |     /// the previous non-compacted data. | ||||||
|  |     /// | ||||||
|  |     /// Note that the compaction will open the index, copy and compact the index into another file | ||||||
|  |     /// **on the same disk as the index** and replace the previous index with the newly compacted | ||||||
|  |     /// one. This means that the disk must have enough room for at most two times the index size. | ||||||
|  |     /// | ||||||
|  |     /// To make sure not to lose any data, this tool takes a mutable transaction on the index | ||||||
|  |     /// before running the copy and compaction. This way the current indexation must finish before | ||||||
|  |     /// the compaction operation can start. Once the compaction is done, the big index is replaced | ||||||
|  |     /// by the compacted one and the mutable transaction is released. | ||||||
|  |     CompactIndex { index_name: String }, | ||||||
|  |  | ||||||
|  |     /// Uses the hair dryer the dedicate pages hot in cache | ||||||
|  |     /// | ||||||
|  |     /// To make the index faster we must make sure it is hot in the DB cache that's the cure of | ||||||
|  |     /// memory-mapping but also it's strengh. This command is designed to make a spcific part of | ||||||
|  |     /// the index hot in cache. | ||||||
|  |     HairDryer { | ||||||
|  |         #[arg(long, value_delimiter = ',')] | ||||||
|  |         index_name: Vec<String>, | ||||||
|  |  | ||||||
|  |         #[arg(long, value_delimiter = ',')] | ||||||
|  |         index_part: Vec<IndexPart>, | ||||||
|  |     }, | ||||||
|  | } | ||||||
|  |  | ||||||
|  | #[derive(Clone, ValueEnum)] | ||||||
|  | enum IndexPart { | ||||||
|  |     /// Will make the arroy index hot. | ||||||
|  |     Arroy, | ||||||
| } | } | ||||||
|  |  | ||||||
| fn main() -> anyhow::Result<()> { | fn main() -> anyhow::Result<()> { | ||||||
| @@ -90,10 +155,17 @@ fn main() -> anyhow::Result<()> { | |||||||
|         Command::ExportADump { dump_dir, skip_enqueued_tasks } => { |         Command::ExportADump { dump_dir, skip_enqueued_tasks } => { | ||||||
|             export_a_dump(db_path, dump_dir, skip_enqueued_tasks, detected_version) |             export_a_dump(db_path, dump_dir, skip_enqueued_tasks, detected_version) | ||||||
|         } |         } | ||||||
|  |         Command::ExportDocuments { index_name, ignore_vectors, offset } => { | ||||||
|  |             export_documents(db_path, index_name, ignore_vectors, offset) | ||||||
|  |         } | ||||||
|         Command::OfflineUpgrade { target_version } => { |         Command::OfflineUpgrade { target_version } => { | ||||||
|             let target_version = parse_version(&target_version).context("While parsing `--target-version`. Make sure `--target-version` is in the format MAJOR.MINOR.PATCH")?; |             let target_version = parse_version(&target_version).context("While parsing `--target-version`. Make sure `--target-version` is in the format MAJOR.MINOR.PATCH")?; | ||||||
|             OfflineUpgrade { db_path, current_version: detected_version, target_version }.upgrade() |             OfflineUpgrade { db_path, current_version: detected_version, target_version }.upgrade() | ||||||
|         } |         } | ||||||
|  |         Command::CompactIndex { index_name } => compact_index(db_path, &index_name), | ||||||
|  |         Command::HairDryer { index_name, index_part } => { | ||||||
|  |             hair_dryer(db_path, &index_name, &index_part) | ||||||
|  |         } | ||||||
|     } |     } | ||||||
| } | } | ||||||
|  |  | ||||||
| @@ -230,22 +302,28 @@ fn export_a_dump( | |||||||
|  |  | ||||||
|     eprintln!("Successfully dumped {count} keys!"); |     eprintln!("Successfully dumped {count} keys!"); | ||||||
|  |  | ||||||
|  |     eprintln!("Dumping the queue"); | ||||||
|     let rtxn = env.read_txn()?; |     let rtxn = env.read_txn()?; | ||||||
|     let all_tasks: Database<BEU32, SerdeJson<Task>> = |     let all_tasks: Database<BEU32, SerdeJson<Task>> = | ||||||
|         try_opening_database(&env, &rtxn, "all-tasks")?; |         try_opening_database(&env, &rtxn, "all-tasks")?; | ||||||
|  |     let all_batches: Database<BEU32, SerdeJson<Batch>> = | ||||||
|  |         try_opening_database(&env, &rtxn, "all-batches")?; | ||||||
|     let index_mapping: Database<Str, UuidCodec> = |     let index_mapping: Database<Str, UuidCodec> = | ||||||
|         try_opening_database(&env, &rtxn, "index-mapping")?; |         try_opening_database(&env, &rtxn, "index-mapping")?; | ||||||
|  |  | ||||||
|     if skip_enqueued_tasks { |     eprintln!("Dumping the tasks"); | ||||||
|         eprintln!("Skip dumping the enqueued tasks..."); |  | ||||||
|     } else { |  | ||||||
|     let mut dump_tasks = dump.create_tasks_queue()?; |     let mut dump_tasks = dump.create_tasks_queue()?; | ||||||
|         let mut count = 0; |     let mut count_tasks = 0; | ||||||
|  |     let mut count_enqueued_tasks = 0; | ||||||
|     for ret in all_tasks.iter(&rtxn)? { |     for ret in all_tasks.iter(&rtxn)? { | ||||||
|         let (_, t) = ret?; |         let (_, t) = ret?; | ||||||
|         let status = t.status; |         let status = t.status; | ||||||
|         let content_file = t.content_uuid(); |         let content_file = t.content_uuid(); | ||||||
|  |  | ||||||
|  |         if status == Status::Enqueued && skip_enqueued_tasks { | ||||||
|  |             continue; | ||||||
|  |         } | ||||||
|  |  | ||||||
|         let mut dump_content_file = dump_tasks.push_task(&t.into())?; |         let mut dump_content_file = dump_tasks.push_task(&t.into())?; | ||||||
|  |  | ||||||
|         // 3.1. Dump the `content_file` associated with the task if there is one and the task is not finished yet. |         // 3.1. Dump the `content_file` associated with the task if there is one and the task is not finished yet. | ||||||
| @@ -259,8 +337,7 @@ fn export_a_dump( | |||||||
|                         DocumentsBatchReader::from_reader(content_file).with_context(|| { |                         DocumentsBatchReader::from_reader(content_file).with_context(|| { | ||||||
|                             format!("While reading content file {:?}", content_file_uuid) |                             format!("While reading content file {:?}", content_file_uuid) | ||||||
|                         })?; |                         })?; | ||||||
|                         let (mut cursor, documents_batch_index) = |                     let (mut cursor, documents_batch_index) = reader.into_cursor_and_fields_index(); | ||||||
|                             reader.into_cursor_and_fields_index(); |  | ||||||
|                     while let Some(doc) = cursor.next_document().with_context(|| { |                     while let Some(doc) = cursor.next_document().with_context(|| { | ||||||
|                         format!("While iterating on content file {:?}", content_file_uuid) |                         format!("While iterating on content file {:?}", content_file_uuid) | ||||||
|                     })? { |                     })? { | ||||||
| @@ -268,9 +345,7 @@ fn export_a_dump( | |||||||
|                             .push_document(&obkv_to_object(doc, &documents_batch_index)?)?; |                             .push_document(&obkv_to_object(doc, &documents_batch_index)?)?; | ||||||
|                     } |                     } | ||||||
|                 } else { |                 } else { | ||||||
|                         eprintln!( |                     eprintln!("Dumping the enqueued tasks reading them in JSON stream format..."); | ||||||
|                             "Dumping the enqueued tasks reading them in JSON stream format..." |  | ||||||
|                         ); |  | ||||||
|                     for document in |                     for document in | ||||||
|                         serde_json::de::Deserializer::from_reader(content_file).into_iter() |                         serde_json::de::Deserializer::from_reader(content_file).into_iter() | ||||||
|                     { |                     { | ||||||
| @@ -282,18 +357,31 @@ fn export_a_dump( | |||||||
|                 } |                 } | ||||||
|  |  | ||||||
|                 dump_content_file.flush()?; |                 dump_content_file.flush()?; | ||||||
|                     count += 1; |                 count_enqueued_tasks += 1; | ||||||
|             } |             } | ||||||
|         } |         } | ||||||
|  |         count_tasks += 1; | ||||||
|     } |     } | ||||||
|     dump_tasks.flush()?; |     dump_tasks.flush()?; | ||||||
|  |     eprintln!( | ||||||
|  |         "Successfully dumped {count_tasks} tasks including {count_enqueued_tasks} enqueued tasks!" | ||||||
|  |     ); | ||||||
|  |  | ||||||
|         eprintln!("Successfully dumped {count} enqueued tasks!"); |     // 4. dump the batches | ||||||
|  |     eprintln!("Dumping the batches"); | ||||||
|  |     let mut dump_batches = dump.create_batches_queue()?; | ||||||
|  |     let mut count = 0; | ||||||
|  |  | ||||||
|  |     for ret in all_batches.iter(&rtxn)? { | ||||||
|  |         let (_, b) = ret?; | ||||||
|  |         dump_batches.push_batch(&b)?; | ||||||
|  |         count += 1; | ||||||
|     } |     } | ||||||
|  |     dump_batches.flush()?; | ||||||
|  |     eprintln!("Successfully dumped {count} batches!"); | ||||||
|  |  | ||||||
|  |     // 5. Dump the indexes | ||||||
|     eprintln!("Dumping the indexes..."); |     eprintln!("Dumping the indexes..."); | ||||||
|  |  | ||||||
|     // 4. Dump the indexes |  | ||||||
|     let mut count = 0; |     let mut count = 0; | ||||||
|     for result in index_mapping.iter(&rtxn)? { |     for result in index_mapping.iter(&rtxn)? { | ||||||
|         let (uid, uuid) = result?; |         let (uid, uuid) = result?; | ||||||
| @@ -314,14 +402,14 @@ fn export_a_dump( | |||||||
|         let fields_ids_map = index.fields_ids_map(&rtxn)?; |         let fields_ids_map = index.fields_ids_map(&rtxn)?; | ||||||
|         let all_fields: Vec<_> = fields_ids_map.iter().map(|(id, _)| id).collect(); |         let all_fields: Vec<_> = fields_ids_map.iter().map(|(id, _)| id).collect(); | ||||||
|  |  | ||||||
|         // 4.1. Dump the documents |         // 5.1. Dump the documents | ||||||
|         for ret in index.all_documents(&rtxn)? { |         for ret in index.all_documents(&rtxn)? { | ||||||
|             let (_id, doc) = ret?; |             let (_id, doc) = ret?; | ||||||
|             let document = obkv_to_json(&all_fields, &fields_ids_map, doc)?; |             let document = obkv_to_json(&all_fields, &fields_ids_map, doc)?; | ||||||
|             index_dumper.push_document(&document)?; |             index_dumper.push_document(&document)?; | ||||||
|         } |         } | ||||||
|  |  | ||||||
|         // 4.2. Dump the settings |         // 5.2. Dump the settings | ||||||
|         let settings = meilisearch_types::settings::settings( |         let settings = meilisearch_types::settings::settings( | ||||||
|             &index, |             &index, | ||||||
|             &rtxn, |             &rtxn, | ||||||
| @@ -347,3 +435,241 @@ fn export_a_dump( | |||||||
|  |  | ||||||
|     Ok(()) |     Ok(()) | ||||||
| } | } | ||||||
|  |  | ||||||
|  | fn compact_index(db_path: PathBuf, index_name: &str) -> anyhow::Result<()> { | ||||||
|  |     let index_scheduler_path = db_path.join("tasks"); | ||||||
|  |     let env = unsafe { EnvOpenOptions::new().max_dbs(100).open(&index_scheduler_path) } | ||||||
|  |         .with_context(|| format!("While trying to open {:?}", index_scheduler_path.display()))?; | ||||||
|  |  | ||||||
|  |     let rtxn = env.read_txn()?; | ||||||
|  |     let index_mapping: Database<Str, UuidCodec> = | ||||||
|  |         try_opening_database(&env, &rtxn, "index-mapping")?; | ||||||
|  |  | ||||||
|  |     for result in index_mapping.iter(&rtxn)? { | ||||||
|  |         let (uid, uuid) = result?; | ||||||
|  |  | ||||||
|  |         if uid != index_name { | ||||||
|  |             eprintln!("Found index {uid} and skipping it"); | ||||||
|  |             continue; | ||||||
|  |         } else { | ||||||
|  |             eprintln!("Found index {uid} 🎉"); | ||||||
|  |         } | ||||||
|  |  | ||||||
|  |         let index_path = db_path.join("indexes").join(uuid.to_string()); | ||||||
|  |         let index = Index::new(EnvOpenOptions::new(), &index_path, false).with_context(|| { | ||||||
|  |             format!("While trying to open the index at path {:?}", index_path.display()) | ||||||
|  |         })?; | ||||||
|  |  | ||||||
|  |         eprintln!("Awaiting for a mutable transaction..."); | ||||||
|  |         let _wtxn = index.write_txn().context("While awaiting for a write transaction")?; | ||||||
|  |  | ||||||
|  |         // We create and immediately drop the file because the | ||||||
|  |         let non_compacted_index_file_path = index_path.join("data.mdb"); | ||||||
|  |         let compacted_index_file_path = index_path.join("data.mdb.cpy"); | ||||||
|  |  | ||||||
|  |         eprintln!("Compacting the index..."); | ||||||
|  |         let before_compaction = Instant::now(); | ||||||
|  |         let new_file = index | ||||||
|  |             .copy_to_file(&compacted_index_file_path, CompactionOption::Enabled) | ||||||
|  |             .with_context(|| format!("While compacting {}", compacted_index_file_path.display()))?; | ||||||
|  |  | ||||||
|  |         let after_size = new_file.metadata()?.len(); | ||||||
|  |         let before_size = std::fs::metadata(&non_compacted_index_file_path) | ||||||
|  |             .with_context(|| { | ||||||
|  |                 format!( | ||||||
|  |                     "While retrieving the metadata of {}", | ||||||
|  |                     non_compacted_index_file_path.display(), | ||||||
|  |                 ) | ||||||
|  |             })? | ||||||
|  |             .len(); | ||||||
|  |  | ||||||
|  |         let reduction = before_size as f64 / after_size as f64; | ||||||
|  |         println!("Compaction successful. Took around {:.2?}", before_compaction.elapsed()); | ||||||
|  |         eprintln!("The index went from {before_size} bytes to {after_size} bytes ({reduction:.2}x reduction)"); | ||||||
|  |  | ||||||
|  |         eprintln!("Replacing the non-compacted index by the compacted one..."); | ||||||
|  |         std::fs::rename(&compacted_index_file_path, &non_compacted_index_file_path).with_context( | ||||||
|  |             || { | ||||||
|  |                 format!( | ||||||
|  |                     "While renaming {} into {}", | ||||||
|  |                     compacted_index_file_path.display(), | ||||||
|  |                     non_compacted_index_file_path.display(), | ||||||
|  |                 ) | ||||||
|  |             }, | ||||||
|  |         )?; | ||||||
|  |  | ||||||
|  |         drop(new_file); | ||||||
|  |  | ||||||
|  |         println!("Everything's done 🎉"); | ||||||
|  |         return Ok(()); | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     bail!("Target index {index_name} not found!") | ||||||
|  | } | ||||||
|  |  | ||||||
|  | fn export_documents( | ||||||
|  |     db_path: PathBuf, | ||||||
|  |     index_name: String, | ||||||
|  |     ignore_vectors: bool, | ||||||
|  |     offset: Option<usize>, | ||||||
|  | ) -> anyhow::Result<()> { | ||||||
|  |     let index_scheduler_path = db_path.join("tasks"); | ||||||
|  |     let env = unsafe { EnvOpenOptions::new().max_dbs(100).open(&index_scheduler_path) } | ||||||
|  |         .with_context(|| format!("While trying to open {:?}", index_scheduler_path.display()))?; | ||||||
|  |  | ||||||
|  |     let rtxn = env.read_txn()?; | ||||||
|  |     let index_mapping: Database<Str, UuidCodec> = | ||||||
|  |         try_opening_database(&env, &rtxn, "index-mapping")?; | ||||||
|  |  | ||||||
|  |     for result in index_mapping.iter(&rtxn)? { | ||||||
|  |         let (uid, uuid) = result?; | ||||||
|  |         if uid == index_name { | ||||||
|  |             let index_path = db_path.join("indexes").join(uuid.to_string()); | ||||||
|  |             let index = | ||||||
|  |                 Index::new(EnvOpenOptions::new(), &index_path, false).with_context(|| { | ||||||
|  |                     format!("While trying to open the index at path {:?}", index_path.display()) | ||||||
|  |                 })?; | ||||||
|  |  | ||||||
|  |             let rtxn = index.read_txn()?; | ||||||
|  |             let fields_ids_map = index.fields_ids_map(&rtxn)?; | ||||||
|  |             let all_fields: Vec<_> = fields_ids_map.iter().map(|(id, _)| id).collect(); | ||||||
|  |             let embedding_configs = index.embedding_configs(&rtxn)?; | ||||||
|  |  | ||||||
|  |             if let Some(offset) = offset { | ||||||
|  |                 eprintln!("Skipping {offset} documents"); | ||||||
|  |             } | ||||||
|  |  | ||||||
|  |             let mut stdout = BufWriter::new(std::io::stdout()); | ||||||
|  |             let all_documents = index.documents_ids(&rtxn)?.into_iter().skip(offset.unwrap_or(0)); | ||||||
|  |             for (i, ret) in index.iter_documents(&rtxn, all_documents)?.enumerate() { | ||||||
|  |                 let (id, doc) = ret?; | ||||||
|  |                 let mut document = obkv_to_json(&all_fields, &fields_ids_map, doc)?; | ||||||
|  |  | ||||||
|  |                 if i % 10_000 == 0 { | ||||||
|  |                     eprintln!("Starting the {}th document", i + offset.unwrap_or(0)); | ||||||
|  |                 } | ||||||
|  |  | ||||||
|  |                 if !ignore_vectors { | ||||||
|  |                     'inject_vectors: { | ||||||
|  |                         let embeddings = index.embeddings(&rtxn, id)?; | ||||||
|  |  | ||||||
|  |                         if embeddings.is_empty() { | ||||||
|  |                             break 'inject_vectors; | ||||||
|  |                         } | ||||||
|  |  | ||||||
|  |                         let vectors = document | ||||||
|  |                             .entry(RESERVED_VECTORS_FIELD_NAME) | ||||||
|  |                             .or_insert(Object(Default::default())); | ||||||
|  |  | ||||||
|  |                         let Object(vectors) = vectors else { | ||||||
|  |                             return Err(meilisearch_types::milli::Error::UserError( | ||||||
|  |                                 meilisearch_types::milli::UserError::InvalidVectorsMapType { | ||||||
|  |                                     document_id: { | ||||||
|  |                                         if let Ok(Some(Ok(index))) = index | ||||||
|  |                                             .external_id_of(&rtxn, std::iter::once(id)) | ||||||
|  |                                             .map(|it| it.into_iter().next()) | ||||||
|  |                                         { | ||||||
|  |                                             index | ||||||
|  |                                         } else { | ||||||
|  |                                             format!("internal docid={id}") | ||||||
|  |                                         } | ||||||
|  |                                     }, | ||||||
|  |                                     value: vectors.clone(), | ||||||
|  |                                 }, | ||||||
|  |                             ) | ||||||
|  |                             .into()); | ||||||
|  |                         }; | ||||||
|  |  | ||||||
|  |                         for (embedder_name, embeddings) in embeddings { | ||||||
|  |                             let user_provided = embedding_configs | ||||||
|  |                                 .iter() | ||||||
|  |                                 .find(|conf| conf.name == embedder_name) | ||||||
|  |                                 .is_some_and(|conf| conf.user_provided.contains(id)); | ||||||
|  |  | ||||||
|  |                             let embeddings = ExplicitVectors { | ||||||
|  |                                 embeddings: Some(VectorOrArrayOfVectors::from_array_of_vectors( | ||||||
|  |                                     embeddings, | ||||||
|  |                                 )), | ||||||
|  |                                 regenerate: !user_provided, | ||||||
|  |                             }; | ||||||
|  |                             vectors | ||||||
|  |                                 .insert(embedder_name, serde_json::to_value(embeddings).unwrap()); | ||||||
|  |                         } | ||||||
|  |                     } | ||||||
|  |                 } | ||||||
|  |  | ||||||
|  |                 serde_json::to_writer(&mut stdout, &document)?; | ||||||
|  |             } | ||||||
|  |  | ||||||
|  |             stdout.flush()?; | ||||||
|  |         } else { | ||||||
|  |             eprintln!("Found index {uid} but it's not the right index..."); | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     Ok(()) | ||||||
|  | } | ||||||
|  |  | ||||||
|  | fn hair_dryer( | ||||||
|  |     db_path: PathBuf, | ||||||
|  |     index_names: &[String], | ||||||
|  |     index_parts: &[IndexPart], | ||||||
|  | ) -> anyhow::Result<()> { | ||||||
|  |     let index_scheduler_path = db_path.join("tasks"); | ||||||
|  |     let env = unsafe { EnvOpenOptions::new().max_dbs(100).open(&index_scheduler_path) } | ||||||
|  |         .with_context(|| format!("While trying to open {:?}", index_scheduler_path.display()))?; | ||||||
|  |  | ||||||
|  |     eprintln!("Trying to get a read transaction on the index scheduler..."); | ||||||
|  |  | ||||||
|  |     let rtxn = env.read_txn()?; | ||||||
|  |     let index_mapping: Database<Str, UuidCodec> = | ||||||
|  |         try_opening_database(&env, &rtxn, "index-mapping")?; | ||||||
|  |  | ||||||
|  |     for result in index_mapping.iter(&rtxn)? { | ||||||
|  |         let (uid, uuid) = result?; | ||||||
|  |         if index_names.iter().any(|i| i == uid) { | ||||||
|  |             let index_path = db_path.join("indexes").join(uuid.to_string()); | ||||||
|  |             let index = | ||||||
|  |                 Index::new(EnvOpenOptions::new(), &index_path, false).with_context(|| { | ||||||
|  |                     format!("While trying to open the index at path {:?}", index_path.display()) | ||||||
|  |                 })?; | ||||||
|  |  | ||||||
|  |             eprintln!("Trying to get a read transaction on the {uid} index..."); | ||||||
|  |  | ||||||
|  |             let rtxn = index.read_txn()?; | ||||||
|  |             for part in index_parts { | ||||||
|  |                 match part { | ||||||
|  |                     IndexPart::Arroy => { | ||||||
|  |                         let mut count = 0; | ||||||
|  |                         let total = index.vector_arroy.len(&rtxn)?; | ||||||
|  |                         eprintln!("Hair drying arroy for {uid}..."); | ||||||
|  |                         for (i, result) in index | ||||||
|  |                             .vector_arroy | ||||||
|  |                             .remap_types::<Bytes, Bytes>() | ||||||
|  |                             .iter(&rtxn)? | ||||||
|  |                             .enumerate() | ||||||
|  |                         { | ||||||
|  |                             let (key, value) = result?; | ||||||
|  |  | ||||||
|  |                             // All of this just to avoid compiler optimizations 🤞 | ||||||
|  |                             // We must read all the bytes to make the pages hot in cache. | ||||||
|  |                             // <https://doc.rust-lang.org/std/hint/fn.black_box.html> | ||||||
|  |                             count += std::hint::black_box(key.iter().fold(0, |acc, _| acc + 1)); | ||||||
|  |                             count += std::hint::black_box(value.iter().fold(0, |acc, _| acc + 1)); | ||||||
|  |  | ||||||
|  |                             if i % 10_000 == 0 { | ||||||
|  |                                 let perc = (i as f64) / (total as f64) * 100.0; | ||||||
|  |                                 eprintln!("Visited {i}/{total} ({perc:.2}%) keys") | ||||||
|  |                             } | ||||||
|  |                         } | ||||||
|  |                         eprintln!("Done hair drying a total of at least {count} bytes."); | ||||||
|  |                     } | ||||||
|  |                 } | ||||||
|  |             } | ||||||
|  |         } else { | ||||||
|  |             eprintln!("Found index {uid} but it's not the right index..."); | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     Ok(()) | ||||||
|  | } | ||||||
|   | |||||||
| @@ -1,5 +1,5 @@ | |||||||
| <p align="center"> | <p align="center"> | ||||||
|   <img alt="the milli logo" src="../assets/milli-logo.svg"> |   <img alt="the milli logo" src="../../assets/milli-logo.svg"> | ||||||
| </p> | </p> | ||||||
|  |  | ||||||
| <p align="center">a concurrent indexer combined with fast and relevant search algorithms</p> | <p align="center">a concurrent indexer combined with fast and relevant search algorithms</p> | ||||||
|   | |||||||
| @@ -22,7 +22,7 @@ use crate::heed_codec::version::VersionCodec; | |||||||
| use crate::heed_codec::{BEU16StrCodec, FstSetCodec, StrBEU16Codec, StrRefCodec}; | use crate::heed_codec::{BEU16StrCodec, FstSetCodec, StrBEU16Codec, StrRefCodec}; | ||||||
| use crate::order_by_map::OrderByMap; | use crate::order_by_map::OrderByMap; | ||||||
| use crate::proximity::ProximityPrecision; | use crate::proximity::ProximityPrecision; | ||||||
| use crate::vector::{ArroyWrapper, Embedding, EmbeddingConfig}; | use crate::vector::{ArroyStats, ArroyWrapper, Embedding, EmbeddingConfig}; | ||||||
| use crate::{ | use crate::{ | ||||||
|     default_criteria, CboRoaringBitmapCodec, Criterion, DocumentId, ExternalDocumentsIds, |     default_criteria, CboRoaringBitmapCodec, Criterion, DocumentId, ExternalDocumentsIds, | ||||||
|     FacetDistribution, FieldDistribution, FieldId, FieldIdMapMissingEntry, FieldIdWordCountCodec, |     FacetDistribution, FieldDistribution, FieldId, FieldIdMapMissingEntry, FieldIdWordCountCodec, | ||||||
| @@ -1731,6 +1731,18 @@ impl Index { | |||||||
|         let compute_prefixes = self.prefix_search(rtxn)?.unwrap_or_default(); |         let compute_prefixes = self.prefix_search(rtxn)?.unwrap_or_default(); | ||||||
|         Ok(PrefixSettings { compute_prefixes, max_prefix_length: 4, prefix_count_threshold: 100 }) |         Ok(PrefixSettings { compute_prefixes, max_prefix_length: 4, prefix_count_threshold: 100 }) | ||||||
|     } |     } | ||||||
|  |  | ||||||
|  |     pub fn arroy_stats(&self, rtxn: &RoTxn<'_>) -> Result<ArroyStats> { | ||||||
|  |         let mut stats = ArroyStats::default(); | ||||||
|  |         let embedding_configs = self.embedding_configs(rtxn)?; | ||||||
|  |         for config in embedding_configs { | ||||||
|  |             let embedder_id = self.embedder_category_id.get(rtxn, &config.name)?.unwrap(); | ||||||
|  |             let reader = | ||||||
|  |                 ArroyWrapper::new(self.vector_arroy, embedder_id, config.config.quantized()); | ||||||
|  |             reader.aggregate_stats(rtxn, &mut stats)?; | ||||||
|  |         } | ||||||
|  |         Ok(stats) | ||||||
|  |     } | ||||||
| } | } | ||||||
|  |  | ||||||
| #[derive(Debug, Deserialize, Serialize)] | #[derive(Debug, Deserialize, Serialize)] | ||||||
|   | |||||||
| @@ -1,7 +1,7 @@ | |||||||
| use std::cmp::Ordering; | use std::cmp::Ordering; | ||||||
|  |  | ||||||
| use itertools::Itertools; | use itertools::Itertools; | ||||||
| use serde::Serialize; | use serde::{Deserialize, Serialize}; | ||||||
|  |  | ||||||
| use crate::distance_between_two_points; | use crate::distance_between_two_points; | ||||||
|  |  | ||||||
| @@ -36,6 +36,15 @@ enum RankOrValue<'a> { | |||||||
|     Score(f64), |     Score(f64), | ||||||
| } | } | ||||||
|  |  | ||||||
|  | #[derive(Clone, Serialize, Deserialize)] | ||||||
|  | #[serde(rename_all = "camelCase")] | ||||||
|  | pub enum WeightedScoreValue { | ||||||
|  |     WeightedScore(f64), | ||||||
|  |     Sort { asc: bool, value: serde_json::Value }, | ||||||
|  |     GeoSort { asc: bool, distance: Option<f64> }, | ||||||
|  |     VectorSort(f64), | ||||||
|  | } | ||||||
|  |  | ||||||
| impl ScoreDetails { | impl ScoreDetails { | ||||||
|     pub fn local_score(&self) -> Option<f64> { |     pub fn local_score(&self) -> Option<f64> { | ||||||
|         self.rank().map(Rank::local_score) |         self.rank().map(Rank::local_score) | ||||||
| @@ -87,6 +96,30 @@ impl ScoreDetails { | |||||||
|             }) |             }) | ||||||
|     } |     } | ||||||
|  |  | ||||||
|  |     pub fn weighted_score_values<'a>( | ||||||
|  |         details: impl Iterator<Item = &'a Self> + 'a, | ||||||
|  |         weight: f64, | ||||||
|  |     ) -> impl Iterator<Item = WeightedScoreValue> + 'a { | ||||||
|  |         details | ||||||
|  |             .map(ScoreDetails::rank_or_value) | ||||||
|  |             .coalesce(|left, right| match (left, right) { | ||||||
|  |                 (RankOrValue::Rank(left), RankOrValue::Rank(right)) => { | ||||||
|  |                     Ok(RankOrValue::Rank(Rank::merge(left, right))) | ||||||
|  |                 } | ||||||
|  |                 (left, right) => Err((left, right)), | ||||||
|  |             }) | ||||||
|  |             .map(move |rank_or_value| match rank_or_value { | ||||||
|  |                 RankOrValue::Rank(r) => WeightedScoreValue::WeightedScore(r.local_score() * weight), | ||||||
|  |                 RankOrValue::Sort(s) => { | ||||||
|  |                     WeightedScoreValue::Sort { asc: s.ascending, value: s.value.clone() } | ||||||
|  |                 } | ||||||
|  |                 RankOrValue::GeoSort(g) => { | ||||||
|  |                     WeightedScoreValue::GeoSort { asc: g.ascending, distance: g.distance() } | ||||||
|  |                 } | ||||||
|  |                 RankOrValue::Score(s) => WeightedScoreValue::VectorSort(s * weight), | ||||||
|  |             }) | ||||||
|  |     } | ||||||
|  |  | ||||||
|     fn rank_or_value(&self) -> RankOrValue<'_> { |     fn rank_or_value(&self) -> RankOrValue<'_> { | ||||||
|         match self { |         match self { | ||||||
|             ScoreDetails::Words(w) => RankOrValue::Rank(w.rank()), |             ScoreDetails::Words(w) => RankOrValue::Rank(w.rank()), | ||||||
| @@ -423,34 +456,58 @@ pub struct Sort { | |||||||
|     pub value: serde_json::Value, |     pub value: serde_json::Value, | ||||||
| } | } | ||||||
|  |  | ||||||
|  | pub fn compare_sort_values( | ||||||
|  |     ascending: bool, | ||||||
|  |     left: &serde_json::Value, | ||||||
|  |     right: &serde_json::Value, | ||||||
|  | ) -> Ordering { | ||||||
|  |     use serde_json::Value::*; | ||||||
|  |     match (left, right) { | ||||||
|  |         (Null, Null) => Ordering::Equal, | ||||||
|  |         (Null, _) => Ordering::Less, | ||||||
|  |         (_, Null) => Ordering::Greater, | ||||||
|  |         // numbers are always before strings | ||||||
|  |         (Number(_), String(_)) => Ordering::Greater, | ||||||
|  |         (String(_), Number(_)) => Ordering::Less, | ||||||
|  |         (Number(left), Number(right)) => { | ||||||
|  |             // FIXME: unwrap permitted here? | ||||||
|  |             let order = left | ||||||
|  |                 .as_f64() | ||||||
|  |                 .unwrap() | ||||||
|  |                 .partial_cmp(&right.as_f64().unwrap()) | ||||||
|  |                 .unwrap_or(Ordering::Equal); | ||||||
|  |             // 12 < 42, and when ascending, we want to see 12 first, so the smallest. | ||||||
|  |             // Hence, when ascending, smaller is better | ||||||
|  |             if ascending { | ||||||
|  |                 order.reverse() | ||||||
|  |             } else { | ||||||
|  |                 order | ||||||
|  |             } | ||||||
|  |         } | ||||||
|  |         (String(left), String(right)) => { | ||||||
|  |             let order = left.cmp(right); | ||||||
|  |             // Taking e.g. "a" and "z" | ||||||
|  |             // "a" < "z", and when ascending, we want to see "a" first, so the smallest. | ||||||
|  |             // Hence, when ascending, smaller is better | ||||||
|  |             if ascending { | ||||||
|  |                 order.reverse() | ||||||
|  |             } else { | ||||||
|  |                 order | ||||||
|  |             } | ||||||
|  |         } | ||||||
|  |         (left, right) => { | ||||||
|  |             tracing::warn!(%left, %right, "sort values that are neither numbers, strings or null, handling as equal"); | ||||||
|  |             Ordering::Equal | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  | } | ||||||
|  |  | ||||||
| impl PartialOrd for Sort { | impl PartialOrd for Sort { | ||||||
|     fn partial_cmp(&self, other: &Self) -> Option<Ordering> { |     fn partial_cmp(&self, other: &Self) -> Option<Ordering> { | ||||||
|         if self.ascending != other.ascending { |         if self.ascending != other.ascending { | ||||||
|             return None; |             return None; | ||||||
|         } |         } | ||||||
|         match (&self.value, &other.value) { |         Some(compare_sort_values(self.ascending, &self.value, &other.value)) | ||||||
|             (serde_json::Value::Null, serde_json::Value::Null) => Some(Ordering::Equal), |  | ||||||
|             (serde_json::Value::Null, _) => Some(Ordering::Less), |  | ||||||
|             (_, serde_json::Value::Null) => Some(Ordering::Greater), |  | ||||||
|             // numbers are always before strings |  | ||||||
|             (serde_json::Value::Number(_), serde_json::Value::String(_)) => Some(Ordering::Greater), |  | ||||||
|             (serde_json::Value::String(_), serde_json::Value::Number(_)) => Some(Ordering::Less), |  | ||||||
|             (serde_json::Value::Number(left), serde_json::Value::Number(right)) => { |  | ||||||
|                 // FIXME: unwrap permitted here? |  | ||||||
|                 let order = left.as_f64().unwrap().partial_cmp(&right.as_f64().unwrap())?; |  | ||||||
|                 // 12 < 42, and when ascending, we want to see 12 first, so the smallest. |  | ||||||
|                 // Hence, when ascending, smaller is better |  | ||||||
|                 Some(if self.ascending { order.reverse() } else { order }) |  | ||||||
|             } |  | ||||||
|             (serde_json::Value::String(left), serde_json::Value::String(right)) => { |  | ||||||
|                 let order = left.cmp(right); |  | ||||||
|                 // Taking e.g. "a" and "z" |  | ||||||
|                 // "a" < "z", and when ascending, we want to see "a" first, so the smallest. |  | ||||||
|                 // Hence, when ascending, smaller is better |  | ||||||
|                 Some(if self.ascending { order.reverse() } else { order }) |  | ||||||
|             } |  | ||||||
|             _ => None, |  | ||||||
|         } |  | ||||||
|     } |     } | ||||||
| } | } | ||||||
|  |  | ||||||
|   | |||||||
| @@ -11,7 +11,7 @@ use either::Either; | |||||||
| pub use matching_words::MatchingWords; | pub use matching_words::MatchingWords; | ||||||
| use matching_words::{MatchType, PartialMatch}; | use matching_words::{MatchType, PartialMatch}; | ||||||
| use r#match::{Match, MatchPosition}; | use r#match::{Match, MatchPosition}; | ||||||
| use serde::Serialize; | use serde::{Deserialize, Serialize}; | ||||||
| use simple_token_kind::SimpleTokenKind; | use simple_token_kind::SimpleTokenKind; | ||||||
| use utoipa::ToSchema; | use utoipa::ToSchema; | ||||||
|  |  | ||||||
| @@ -101,11 +101,11 @@ impl FormatOptions { | |||||||
|     } |     } | ||||||
| } | } | ||||||
|  |  | ||||||
| #[derive(Serialize, Debug, Clone, PartialEq, Eq, ToSchema)] | #[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq, ToSchema)] | ||||||
| pub struct MatchBounds { | pub struct MatchBounds { | ||||||
|     pub start: usize, |     pub start: usize, | ||||||
|     pub length: usize, |     pub length: usize, | ||||||
|     #[serde(skip_serializing_if = "Option::is_none")] |     #[serde(skip_serializing_if = "Option::is_none", default)] | ||||||
|     pub indices: Option<Vec<usize>>, |     pub indices: Option<Vec<usize>>, | ||||||
| } | } | ||||||
|  |  | ||||||
|   | |||||||
| @@ -563,7 +563,7 @@ fn resolve_sort_criteria<'ctx, Query: RankingRuleQueryTrait>( | |||||||
|     Ok(()) |     Ok(()) | ||||||
| } | } | ||||||
|  |  | ||||||
| #[tracing::instrument(level = "trace", skip_all, target = "search::universe")] | #[tracing::instrument(level = "debug", skip_all, target = "search::universe")] | ||||||
| pub fn filtered_universe( | pub fn filtered_universe( | ||||||
|     index: &Index, |     index: &Index, | ||||||
|     txn: &RoTxn<'_>, |     txn: &RoTxn<'_>, | ||||||
|   | |||||||
| @@ -1,4 +1,4 @@ | |||||||
| use std::sync::atomic::{AtomicBool, Ordering}; | use std::sync::atomic::{AtomicBool, AtomicUsize, Ordering}; | ||||||
| use std::sync::Arc; | use std::sync::Arc; | ||||||
|  |  | ||||||
| use rayon::{ThreadPool, ThreadPoolBuilder}; | use rayon::{ThreadPool, ThreadPoolBuilder}; | ||||||
| @@ -9,6 +9,8 @@ use thiserror::Error; | |||||||
| #[derive(Debug)] | #[derive(Debug)] | ||||||
| pub struct ThreadPoolNoAbort { | pub struct ThreadPoolNoAbort { | ||||||
|     thread_pool: ThreadPool, |     thread_pool: ThreadPool, | ||||||
|  |     /// The number of active operations. | ||||||
|  |     active_operations: AtomicUsize, | ||||||
|     /// Set to true if the thread pool catched a panic. |     /// Set to true if the thread pool catched a panic. | ||||||
|     pool_catched_panic: Arc<AtomicBool>, |     pool_catched_panic: Arc<AtomicBool>, | ||||||
| } | } | ||||||
| @@ -19,7 +21,9 @@ impl ThreadPoolNoAbort { | |||||||
|         OP: FnOnce() -> R + Send, |         OP: FnOnce() -> R + Send, | ||||||
|         R: Send, |         R: Send, | ||||||
|     { |     { | ||||||
|  |         self.active_operations.fetch_add(1, Ordering::Relaxed); | ||||||
|         let output = self.thread_pool.install(op); |         let output = self.thread_pool.install(op); | ||||||
|  |         self.active_operations.fetch_sub(1, Ordering::Relaxed); | ||||||
|         // While reseting the pool panic catcher we return an error if we catched one. |         // While reseting the pool panic catcher we return an error if we catched one. | ||||||
|         if self.pool_catched_panic.swap(false, Ordering::SeqCst) { |         if self.pool_catched_panic.swap(false, Ordering::SeqCst) { | ||||||
|             Err(PanicCatched) |             Err(PanicCatched) | ||||||
| @@ -31,6 +35,11 @@ impl ThreadPoolNoAbort { | |||||||
|     pub fn current_num_threads(&self) -> usize { |     pub fn current_num_threads(&self) -> usize { | ||||||
|         self.thread_pool.current_num_threads() |         self.thread_pool.current_num_threads() | ||||||
|     } |     } | ||||||
|  |  | ||||||
|  |     /// The number of active operations. | ||||||
|  |     pub fn active_operations(&self) -> usize { | ||||||
|  |         self.active_operations.load(Ordering::Relaxed) | ||||||
|  |     } | ||||||
| } | } | ||||||
|  |  | ||||||
| #[derive(Error, Debug)] | #[derive(Error, Debug)] | ||||||
| @@ -64,6 +73,10 @@ impl ThreadPoolNoAbortBuilder { | |||||||
|             let catched_panic = pool_catched_panic.clone(); |             let catched_panic = pool_catched_panic.clone(); | ||||||
|             move |_result| catched_panic.store(true, Ordering::SeqCst) |             move |_result| catched_panic.store(true, Ordering::SeqCst) | ||||||
|         }); |         }); | ||||||
|         Ok(ThreadPoolNoAbort { thread_pool: self.0.build()?, pool_catched_panic }) |         Ok(ThreadPoolNoAbort { | ||||||
|  |             thread_pool: self.0.build()?, | ||||||
|  |             active_operations: AtomicUsize::new(0), | ||||||
|  |             pool_catched_panic, | ||||||
|  |         }) | ||||||
|     } |     } | ||||||
| } | } | ||||||
|   | |||||||
| @@ -5,6 +5,8 @@ use std::marker::PhantomData; | |||||||
| use std::mem; | use std::mem; | ||||||
| use std::num::NonZeroU16; | use std::num::NonZeroU16; | ||||||
| use std::ops::Range; | use std::ops::Range; | ||||||
|  | use std::sync::atomic::{self, AtomicUsize}; | ||||||
|  | use std::sync::Arc; | ||||||
| use std::time::Duration; | use std::time::Duration; | ||||||
|  |  | ||||||
| use bbqueue::framed::{FrameGrantR, FrameProducer}; | use bbqueue::framed::{FrameGrantR, FrameProducer}; | ||||||
| @@ -71,12 +73,23 @@ pub fn extractor_writer_bbqueue( | |||||||
|         consumer |         consumer | ||||||
|     }); |     }); | ||||||
|  |  | ||||||
|  |     let sent_messages_attempts = Arc::new(AtomicUsize::new(0)); | ||||||
|  |     let blocking_sent_messages_attempts = Arc::new(AtomicUsize::new(0)); | ||||||
|  |  | ||||||
|     let (sender, receiver) = flume::bounded(channel_capacity); |     let (sender, receiver) = flume::bounded(channel_capacity); | ||||||
|     let sender = ExtractorBbqueueSender { sender, producers, max_grant }; |     let sender = ExtractorBbqueueSender { | ||||||
|  |         sender, | ||||||
|  |         producers, | ||||||
|  |         max_grant, | ||||||
|  |         sent_messages_attempts: sent_messages_attempts.clone(), | ||||||
|  |         blocking_sent_messages_attempts: blocking_sent_messages_attempts.clone(), | ||||||
|  |     }; | ||||||
|     let receiver = WriterBbqueueReceiver { |     let receiver = WriterBbqueueReceiver { | ||||||
|         receiver, |         receiver, | ||||||
|         look_at_consumer: (0..consumers.len()).cycle(), |         look_at_consumer: (0..consumers.len()).cycle(), | ||||||
|         consumers, |         consumers, | ||||||
|  |         sent_messages_attempts, | ||||||
|  |         blocking_sent_messages_attempts, | ||||||
|     }; |     }; | ||||||
|     (sender, receiver) |     (sender, receiver) | ||||||
| } | } | ||||||
| @@ -92,6 +105,12 @@ pub struct ExtractorBbqueueSender<'a> { | |||||||
|     /// It will never be able to store more than that as the |     /// It will never be able to store more than that as the | ||||||
|     /// buffer cannot split data into two parts. |     /// buffer cannot split data into two parts. | ||||||
|     max_grant: usize, |     max_grant: usize, | ||||||
|  |     /// The total number of attempts to send messages | ||||||
|  |     /// over the bbqueue channel. | ||||||
|  |     sent_messages_attempts: Arc<AtomicUsize>, | ||||||
|  |     /// The number of times an attempt to send a | ||||||
|  |     /// messages failed and we had to pause for a bit. | ||||||
|  |     blocking_sent_messages_attempts: Arc<AtomicUsize>, | ||||||
| } | } | ||||||
|  |  | ||||||
| pub struct WriterBbqueueReceiver<'a> { | pub struct WriterBbqueueReceiver<'a> { | ||||||
| @@ -104,6 +123,12 @@ pub struct WriterBbqueueReceiver<'a> { | |||||||
|     look_at_consumer: Cycle<Range<usize>>, |     look_at_consumer: Cycle<Range<usize>>, | ||||||
|     /// The BBQueue frames to read when waking-up. |     /// The BBQueue frames to read when waking-up. | ||||||
|     consumers: Vec<bbqueue::framed::FrameConsumer<'a>>, |     consumers: Vec<bbqueue::framed::FrameConsumer<'a>>, | ||||||
|  |     /// The total number of attempts to send messages | ||||||
|  |     /// over the bbqueue channel. | ||||||
|  |     sent_messages_attempts: Arc<AtomicUsize>, | ||||||
|  |     /// The number of times an attempt to send a | ||||||
|  |     /// message failed and we had to pause for a bit. | ||||||
|  |     blocking_sent_messages_attempts: Arc<AtomicUsize>, | ||||||
| } | } | ||||||
|  |  | ||||||
| /// The action to perform on the receiver/writer side. | /// The action to perform on the receiver/writer side. | ||||||
| @@ -169,6 +194,16 @@ impl<'a> WriterBbqueueReceiver<'a> { | |||||||
|         } |         } | ||||||
|         None |         None | ||||||
|     } |     } | ||||||
|  |  | ||||||
|  |     /// Returns the total count of attempts to send messages through the BBQueue channel. | ||||||
|  |     pub fn sent_messages_attempts(&self) -> usize { | ||||||
|  |         self.sent_messages_attempts.load(atomic::Ordering::Relaxed) | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     /// Returns the count of attempts to send messages that had to be paused due to BBQueue being full. | ||||||
|  |     pub fn blocking_sent_messages_attempts(&self) -> usize { | ||||||
|  |         self.blocking_sent_messages_attempts.load(atomic::Ordering::Relaxed) | ||||||
|  |     } | ||||||
| } | } | ||||||
|  |  | ||||||
| pub struct FrameWithHeader<'a> { | pub struct FrameWithHeader<'a> { | ||||||
| @@ -458,10 +493,17 @@ impl<'b> ExtractorBbqueueSender<'b> { | |||||||
|         } |         } | ||||||
|  |  | ||||||
|         // Spin loop to have a frame the size we requested. |         // Spin loop to have a frame the size we requested. | ||||||
|         reserve_and_write_grant(&mut producer, total_length, &self.sender, |grant| { |         reserve_and_write_grant( | ||||||
|  |             &mut producer, | ||||||
|  |             total_length, | ||||||
|  |             &self.sender, | ||||||
|  |             &self.sent_messages_attempts, | ||||||
|  |             &self.blocking_sent_messages_attempts, | ||||||
|  |             |grant| { | ||||||
|                 payload_header.serialize_into(grant); |                 payload_header.serialize_into(grant); | ||||||
|                 Ok(()) |                 Ok(()) | ||||||
|         })?; |             }, | ||||||
|  |         )?; | ||||||
|  |  | ||||||
|         Ok(()) |         Ok(()) | ||||||
|     } |     } | ||||||
| @@ -500,20 +542,28 @@ impl<'b> ExtractorBbqueueSender<'b> { | |||||||
|         } |         } | ||||||
|  |  | ||||||
|         // Spin loop to have a frame the size we requested. |         // Spin loop to have a frame the size we requested. | ||||||
|         reserve_and_write_grant(&mut producer, total_length, &self.sender, |grant| { |         reserve_and_write_grant( | ||||||
|  |             &mut producer, | ||||||
|  |             total_length, | ||||||
|  |             &self.sender, | ||||||
|  |             &self.sent_messages_attempts, | ||||||
|  |             &self.blocking_sent_messages_attempts, | ||||||
|  |             |grant| { | ||||||
|                 let header_size = payload_header.header_size(); |                 let header_size = payload_header.header_size(); | ||||||
|                 let (header_bytes, remaining) = grant.split_at_mut(header_size); |                 let (header_bytes, remaining) = grant.split_at_mut(header_size); | ||||||
|                 payload_header.serialize_into(header_bytes); |                 payload_header.serialize_into(header_bytes); | ||||||
|  |  | ||||||
|                 if dimensions != 0 { |                 if dimensions != 0 { | ||||||
|                 let output_iter = remaining.chunks_exact_mut(dimensions * mem::size_of::<f32>()); |                     let output_iter = | ||||||
|  |                         remaining.chunks_exact_mut(dimensions * mem::size_of::<f32>()); | ||||||
|                     for (embedding, output) in embeddings.iter().zip(output_iter) { |                     for (embedding, output) in embeddings.iter().zip(output_iter) { | ||||||
|                         output.copy_from_slice(bytemuck::cast_slice(embedding)); |                         output.copy_from_slice(bytemuck::cast_slice(embedding)); | ||||||
|                     } |                     } | ||||||
|                 } |                 } | ||||||
|  |  | ||||||
|                 Ok(()) |                 Ok(()) | ||||||
|         })?; |             }, | ||||||
|  |         )?; | ||||||
|  |  | ||||||
|         Ok(()) |         Ok(()) | ||||||
|     } |     } | ||||||
| @@ -571,13 +621,20 @@ impl<'b> ExtractorBbqueueSender<'b> { | |||||||
|         } |         } | ||||||
|  |  | ||||||
|         // Spin loop to have a frame the size we requested. |         // Spin loop to have a frame the size we requested. | ||||||
|         reserve_and_write_grant(&mut producer, total_length, &self.sender, |grant| { |         reserve_and_write_grant( | ||||||
|  |             &mut producer, | ||||||
|  |             total_length, | ||||||
|  |             &self.sender, | ||||||
|  |             &self.sent_messages_attempts, | ||||||
|  |             &self.blocking_sent_messages_attempts, | ||||||
|  |             |grant| { | ||||||
|                 let header_size = payload_header.header_size(); |                 let header_size = payload_header.header_size(); | ||||||
|                 let (header_bytes, remaining) = grant.split_at_mut(header_size); |                 let (header_bytes, remaining) = grant.split_at_mut(header_size); | ||||||
|                 payload_header.serialize_into(header_bytes); |                 payload_header.serialize_into(header_bytes); | ||||||
|                 let (key_buffer, value_buffer) = remaining.split_at_mut(key_length.get() as usize); |                 let (key_buffer, value_buffer) = remaining.split_at_mut(key_length.get() as usize); | ||||||
|                 key_value_writer(key_buffer, value_buffer) |                 key_value_writer(key_buffer, value_buffer) | ||||||
|         })?; |             }, | ||||||
|  |         )?; | ||||||
|  |  | ||||||
|         Ok(()) |         Ok(()) | ||||||
|     } |     } | ||||||
| @@ -619,12 +676,19 @@ impl<'b> ExtractorBbqueueSender<'b> { | |||||||
|         } |         } | ||||||
|  |  | ||||||
|         // Spin loop to have a frame the size we requested. |         // Spin loop to have a frame the size we requested. | ||||||
|         reserve_and_write_grant(&mut producer, total_length, &self.sender, |grant| { |         reserve_and_write_grant( | ||||||
|  |             &mut producer, | ||||||
|  |             total_length, | ||||||
|  |             &self.sender, | ||||||
|  |             &self.sent_messages_attempts, | ||||||
|  |             &self.blocking_sent_messages_attempts, | ||||||
|  |             |grant| { | ||||||
|                 let header_size = payload_header.header_size(); |                 let header_size = payload_header.header_size(); | ||||||
|                 let (header_bytes, remaining) = grant.split_at_mut(header_size); |                 let (header_bytes, remaining) = grant.split_at_mut(header_size); | ||||||
|                 payload_header.serialize_into(header_bytes); |                 payload_header.serialize_into(header_bytes); | ||||||
|                 key_writer(remaining) |                 key_writer(remaining) | ||||||
|         })?; |             }, | ||||||
|  |         )?; | ||||||
|  |  | ||||||
|         Ok(()) |         Ok(()) | ||||||
|     } |     } | ||||||
| @@ -637,12 +701,18 @@ fn reserve_and_write_grant<F>( | |||||||
|     producer: &mut FrameProducer, |     producer: &mut FrameProducer, | ||||||
|     total_length: usize, |     total_length: usize, | ||||||
|     sender: &flume::Sender<ReceiverAction>, |     sender: &flume::Sender<ReceiverAction>, | ||||||
|  |     sent_messages_attempts: &AtomicUsize, | ||||||
|  |     blocking_sent_messages_attempts: &AtomicUsize, | ||||||
|     f: F, |     f: F, | ||||||
| ) -> crate::Result<()> | ) -> crate::Result<()> | ||||||
| where | where | ||||||
|     F: FnOnce(&mut [u8]) -> crate::Result<()>, |     F: FnOnce(&mut [u8]) -> crate::Result<()>, | ||||||
| { | { | ||||||
|     loop { |     loop { | ||||||
|  |         // An attempt means trying multiple times | ||||||
|  |         // whether is succeeded or not. | ||||||
|  |         sent_messages_attempts.fetch_add(1, atomic::Ordering::Relaxed); | ||||||
|  |  | ||||||
|         for _ in 0..10_000 { |         for _ in 0..10_000 { | ||||||
|             match producer.grant(total_length) { |             match producer.grant(total_length) { | ||||||
|                 Ok(mut grant) => { |                 Ok(mut grant) => { | ||||||
| @@ -666,6 +736,10 @@ where | |||||||
|             return Err(Error::InternalError(InternalError::AbortedIndexation)); |             return Err(Error::InternalError(InternalError::AbortedIndexation)); | ||||||
|         } |         } | ||||||
|  |  | ||||||
|  |         // We made an attempt to send a message in the | ||||||
|  |         // bbqueue channel but it didn't succeed. | ||||||
|  |         blocking_sent_messages_attempts.fetch_add(1, atomic::Ordering::Relaxed); | ||||||
|  |  | ||||||
|         // We prefer to yield and allow the writing thread |         // We prefer to yield and allow the writing thread | ||||||
|         // to do its job, especially beneficial when there |         // to do its job, especially beneficial when there | ||||||
|         // is only one CPU core available. |         // is only one CPU core available. | ||||||
|   | |||||||
| @@ -144,7 +144,7 @@ impl<'doc> Update<'doc> { | |||||||
|         )?) |         )?) | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     pub fn updated(&self) -> DocumentFromVersions<'_, 'doc> { |     pub fn only_changed_fields(&self) -> DocumentFromVersions<'_, 'doc> { | ||||||
|         DocumentFromVersions::new(&self.new) |         DocumentFromVersions::new(&self.new) | ||||||
|     } |     } | ||||||
|  |  | ||||||
| @@ -182,7 +182,7 @@ impl<'doc> Update<'doc> { | |||||||
|         let mut cached_current = None; |         let mut cached_current = None; | ||||||
|         let mut updated_selected_field_count = 0; |         let mut updated_selected_field_count = 0; | ||||||
|  |  | ||||||
|         for entry in self.updated().iter_top_level_fields() { |         for entry in self.only_changed_fields().iter_top_level_fields() { | ||||||
|             let (key, updated_value) = entry?; |             let (key, updated_value) = entry?; | ||||||
|  |  | ||||||
|             if perm_json_p::select_field(key, fields, &[]) == perm_json_p::Selection::Skip { |             if perm_json_p::select_field(key, fields, &[]) == perm_json_p::Selection::Skip { | ||||||
| @@ -241,7 +241,7 @@ impl<'doc> Update<'doc> { | |||||||
|         Ok(has_deleted_fields) |         Ok(has_deleted_fields) | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     pub fn updated_vectors( |     pub fn only_changed_vectors( | ||||||
|         &self, |         &self, | ||||||
|         doc_alloc: &'doc Bump, |         doc_alloc: &'doc Bump, | ||||||
|         embedders: &'doc EmbeddingConfigs, |         embedders: &'doc EmbeddingConfigs, | ||||||
|   | |||||||
| @@ -199,7 +199,7 @@ impl<'extractor> Extractor<'extractor> for GeoExtractor { | |||||||
|                         .transpose()?; |                         .transpose()?; | ||||||
|  |  | ||||||
|                     let updated_geo = update |                     let updated_geo = update | ||||||
|                         .updated() |                         .merged(rtxn, index, db_fields_ids_map)? | ||||||
|                         .geo_field()? |                         .geo_field()? | ||||||
|                         .map(|geo| extract_geo_coordinates(external_id, geo)) |                         .map(|geo| extract_geo_coordinates(external_id, geo)) | ||||||
|                         .transpose()?; |                         .transpose()?; | ||||||
|   | |||||||
| @@ -99,7 +99,8 @@ impl<'a, 'b, 'extractor> Extractor<'extractor> for EmbeddingExtractor<'a, 'b> { | |||||||
|                         context.db_fields_ids_map, |                         context.db_fields_ids_map, | ||||||
|                         &context.doc_alloc, |                         &context.doc_alloc, | ||||||
|                     )?; |                     )?; | ||||||
|                     let new_vectors = update.updated_vectors(&context.doc_alloc, self.embedders)?; |                     let new_vectors = | ||||||
|  |                         update.only_changed_vectors(&context.doc_alloc, self.embedders)?; | ||||||
|  |  | ||||||
|                     if let Some(new_vectors) = &new_vectors { |                     if let Some(new_vectors) = &new_vectors { | ||||||
|                         unused_vectors_distribution.append(new_vectors)?; |                         unused_vectors_distribution.append(new_vectors)?; | ||||||
|   | |||||||
| @@ -234,7 +234,7 @@ where | |||||||
|         ); |         ); | ||||||
|         let mut datastore = ThreadLocal::with_capacity(rayon::current_num_threads()); |         let mut datastore = ThreadLocal::with_capacity(rayon::current_num_threads()); | ||||||
|         { |         { | ||||||
|             let span = tracing::trace_span!(target: "indexing::documents::extract", "vectors"); |             let span = tracing::debug_span!(target: "indexing::documents::extract", "vectors"); | ||||||
|             let _entered = span.enter(); |             let _entered = span.enter(); | ||||||
|  |  | ||||||
|             extract( |             extract( | ||||||
| @@ -247,7 +247,7 @@ where | |||||||
|             )?; |             )?; | ||||||
|         } |         } | ||||||
|         { |         { | ||||||
|             let span = tracing::trace_span!(target: "indexing::documents::merge", "vectors"); |             let span = tracing::debug_span!(target: "indexing::documents::merge", "vectors"); | ||||||
|             let _entered = span.enter(); |             let _entered = span.enter(); | ||||||
|  |  | ||||||
|             for config in &mut index_embeddings { |             for config in &mut index_embeddings { | ||||||
|   | |||||||
| @@ -1,5 +1,5 @@ | |||||||
| use std::sync::atomic::AtomicBool; | use std::sync::atomic::AtomicBool; | ||||||
| use std::sync::RwLock; | use std::sync::{Once, RwLock}; | ||||||
| use std::thread::{self, Builder}; | use std::thread::{self, Builder}; | ||||||
|  |  | ||||||
| use big_s::S; | use big_s::S; | ||||||
| @@ -33,6 +33,8 @@ mod post_processing; | |||||||
| mod update_by_function; | mod update_by_function; | ||||||
| mod write; | mod write; | ||||||
|  |  | ||||||
|  | static LOG_MEMORY_METRICS_ONCE: Once = Once::new(); | ||||||
|  |  | ||||||
| /// This is the main function of this crate. | /// This is the main function of this crate. | ||||||
| /// | /// | ||||||
| /// Give it the output of the [`Indexer::document_changes`] method and it will execute it in the [`rayon::ThreadPool`]. | /// Give it the output of the [`Indexer::document_changes`] method and it will execute it in the [`rayon::ThreadPool`]. | ||||||
| @@ -93,6 +95,15 @@ where | |||||||
|         }, |         }, | ||||||
|     ); |     ); | ||||||
|  |  | ||||||
|  |     LOG_MEMORY_METRICS_ONCE.call_once(|| { | ||||||
|  |         tracing::debug!( | ||||||
|  |             "Indexation allocated memory metrics - \ | ||||||
|  |             Total BBQueue size: {total_bbbuffer_capacity}, \ | ||||||
|  |             Total extractor memory: {:?}", | ||||||
|  |             grenad_parameters.max_memory, | ||||||
|  |         ); | ||||||
|  |     }); | ||||||
|  |  | ||||||
|     let (extractor_sender, writer_receiver) = pool |     let (extractor_sender, writer_receiver) = pool | ||||||
|         .install(|| extractor_writer_bbqueue(&mut bbbuffers, total_bbbuffer_capacity, 1000)) |         .install(|| extractor_writer_bbqueue(&mut bbbuffers, total_bbbuffer_capacity, 1000)) | ||||||
|         .unwrap(); |         .unwrap(); | ||||||
| @@ -179,13 +190,16 @@ where | |||||||
|  |  | ||||||
|         indexing_context.progress.update_progress(IndexingStep::WritingEmbeddingsToDatabase); |         indexing_context.progress.update_progress(IndexingStep::WritingEmbeddingsToDatabase); | ||||||
|  |  | ||||||
|  |         pool.install(|| { | ||||||
|             build_vectors( |             build_vectors( | ||||||
|                 index, |                 index, | ||||||
|                 wtxn, |                 wtxn, | ||||||
|                 index_embeddings, |                 index_embeddings, | ||||||
|                 &mut arroy_writers, |                 &mut arroy_writers, | ||||||
|                 &indexing_context.must_stop_processing, |                 &indexing_context.must_stop_processing, | ||||||
|         )?; |             ) | ||||||
|  |         }) | ||||||
|  |         .unwrap()?; | ||||||
|  |  | ||||||
|         post_processing::post_process( |         post_processing::post_process( | ||||||
|             indexing_context, |             indexing_context, | ||||||
|   | |||||||
| @@ -72,11 +72,23 @@ pub(super) fn write_to_db( | |||||||
|             &mut aligned_embedding, |             &mut aligned_embedding, | ||||||
|         )?; |         )?; | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     write_from_bbqueue(&mut writer_receiver, index, wtxn, arroy_writers, &mut aligned_embedding)?; |     write_from_bbqueue(&mut writer_receiver, index, wtxn, arroy_writers, &mut aligned_embedding)?; | ||||||
|  |  | ||||||
|  |     let direct_attempts = writer_receiver.sent_messages_attempts(); | ||||||
|  |     let blocking_attempts = writer_receiver.blocking_sent_messages_attempts(); | ||||||
|  |     let congestion_pct = (blocking_attempts as f64 / direct_attempts as f64) * 100.0; | ||||||
|  |     tracing::debug!( | ||||||
|  |         "Channel congestion metrics - \ | ||||||
|  |         Attempts: {direct_attempts}, \ | ||||||
|  |         Blocked attempts: {blocking_attempts} \ | ||||||
|  |         ({congestion_pct:.1}% congestion)" | ||||||
|  |     ); | ||||||
|  |  | ||||||
|     Ok(()) |     Ok(()) | ||||||
| } | } | ||||||
|  |  | ||||||
| #[tracing::instrument(level = "trace", skip_all, target = "indexing::vectors")] | #[tracing::instrument(level = "debug", skip_all, target = "indexing::vectors")] | ||||||
| pub(super) fn build_vectors<MSP>( | pub(super) fn build_vectors<MSP>( | ||||||
|     index: &Index, |     index: &Index, | ||||||
|     wtxn: &mut RwTxn<'_>, |     wtxn: &mut RwTxn<'_>, | ||||||
|   | |||||||
| @@ -1,7 +1,9 @@ | |||||||
| mod v1_12; | mod v1_12; | ||||||
|  | mod v1_13; | ||||||
|  |  | ||||||
| use heed::RwTxn; | use heed::RwTxn; | ||||||
| use v1_12::{V1_12_3_To_Current, V1_12_To_V1_12_3}; | use v1_12::{V1_12_3_To_V1_13_0, V1_12_To_V1_12_3}; | ||||||
|  | use v1_13::V1_13_0_To_Current; | ||||||
|  |  | ||||||
| use crate::progress::{Progress, VariableNameStep}; | use crate::progress::{Progress, VariableNameStep}; | ||||||
| use crate::{Index, InternalError, Result}; | use crate::{Index, InternalError, Result}; | ||||||
| @@ -26,11 +28,13 @@ pub fn upgrade( | |||||||
|     progress: Progress, |     progress: Progress, | ||||||
| ) -> Result<bool> { | ) -> Result<bool> { | ||||||
|     let from = index.get_version(wtxn)?.unwrap_or(db_version); |     let from = index.get_version(wtxn)?.unwrap_or(db_version); | ||||||
|     let upgrade_functions: &[&dyn UpgradeIndex] = &[&V1_12_To_V1_12_3 {}, &V1_12_3_To_Current()]; |     let upgrade_functions: &[&dyn UpgradeIndex] = | ||||||
|  |         &[&V1_12_To_V1_12_3 {}, &V1_12_3_To_V1_13_0 {}, &V1_13_0_To_Current()]; | ||||||
|  |  | ||||||
|     let start = match from { |     let start = match from { | ||||||
|         (1, 12, 0..=2) => 0, |         (1, 12, 0..=2) => 0, | ||||||
|         (1, 12, 3..) => 1, |         (1, 12, 3..) => 1, | ||||||
|  |         (1, 13, 0) => 2, | ||||||
|         // We must handle the current version in the match because in case of a failure some index may have been upgraded but not other. |         // We must handle the current version in the match because in case of a failure some index may have been upgraded but not other. | ||||||
|         (1, 13, _) => return Ok(false), |         (1, 13, _) => return Ok(false), | ||||||
|         (major, minor, patch) => { |         (major, minor, patch) => { | ||||||
|   | |||||||
| @@ -1,11 +1,9 @@ | |||||||
| use heed::RwTxn; | use heed::RwTxn; | ||||||
|  |  | ||||||
| use crate::constants::{VERSION_MAJOR, VERSION_MINOR, VERSION_PATCH}; | use super::UpgradeIndex; | ||||||
| use crate::progress::Progress; | use crate::progress::Progress; | ||||||
| use crate::{make_enum_progress, Index, Result}; | use crate::{make_enum_progress, Index, Result}; | ||||||
|  |  | ||||||
| use super::UpgradeIndex; |  | ||||||
|  |  | ||||||
| #[allow(non_camel_case_types)] | #[allow(non_camel_case_types)] | ||||||
| pub(super) struct V1_12_To_V1_12_3 {} | pub(super) struct V1_12_To_V1_12_3 {} | ||||||
|  |  | ||||||
| @@ -33,9 +31,9 @@ impl UpgradeIndex for V1_12_To_V1_12_3 { | |||||||
| } | } | ||||||
|  |  | ||||||
| #[allow(non_camel_case_types)] | #[allow(non_camel_case_types)] | ||||||
| pub(super) struct V1_12_3_To_Current(); | pub(super) struct V1_12_3_To_V1_13_0 {} | ||||||
|  |  | ||||||
| impl UpgradeIndex for V1_12_3_To_Current { | impl UpgradeIndex for V1_12_3_To_V1_13_0 { | ||||||
|     fn upgrade( |     fn upgrade( | ||||||
|         &self, |         &self, | ||||||
|         _wtxn: &mut RwTxn, |         _wtxn: &mut RwTxn, | ||||||
| @@ -43,14 +41,11 @@ impl UpgradeIndex for V1_12_3_To_Current { | |||||||
|         _original: (u32, u32, u32), |         _original: (u32, u32, u32), | ||||||
|         _progress: Progress, |         _progress: Progress, | ||||||
|     ) -> Result<bool> { |     ) -> Result<bool> { | ||||||
|         Ok(false) |         // recompute the indexes stats | ||||||
|  |         Ok(true) | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     fn target_version(&self) -> (u32, u32, u32) { |     fn target_version(&self) -> (u32, u32, u32) { | ||||||
|         ( |         (1, 13, 0) | ||||||
|             VERSION_MAJOR.parse().unwrap(), |  | ||||||
|             VERSION_MINOR.parse().unwrap(), |  | ||||||
|             VERSION_PATCH.parse().unwrap(), |  | ||||||
|         ) |  | ||||||
|     } |     } | ||||||
| } | } | ||||||
|   | |||||||
							
								
								
									
										29
									
								
								crates/milli/src/update/upgrade/v1_13.rs
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										29
									
								
								crates/milli/src/update/upgrade/v1_13.rs
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,29 @@ | |||||||
|  | use heed::RwTxn; | ||||||
|  |  | ||||||
|  | use super::UpgradeIndex; | ||||||
|  | use crate::constants::{VERSION_MAJOR, VERSION_MINOR, VERSION_PATCH}; | ||||||
|  | use crate::progress::Progress; | ||||||
|  | use crate::{Index, Result}; | ||||||
|  |  | ||||||
|  | #[allow(non_camel_case_types)] | ||||||
|  | pub(super) struct V1_13_0_To_Current(); | ||||||
|  |  | ||||||
|  | impl UpgradeIndex for V1_13_0_To_Current { | ||||||
|  |     fn upgrade( | ||||||
|  |         &self, | ||||||
|  |         _wtxn: &mut RwTxn, | ||||||
|  |         _index: &Index, | ||||||
|  |         _original: (u32, u32, u32), | ||||||
|  |         _progress: Progress, | ||||||
|  |     ) -> Result<bool> { | ||||||
|  |         Ok(false) | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     fn target_version(&self) -> (u32, u32, u32) { | ||||||
|  |         ( | ||||||
|  |             VERSION_MAJOR.parse().unwrap(), | ||||||
|  |             VERSION_MINOR.parse().unwrap(), | ||||||
|  |             VERSION_PATCH.parse().unwrap(), | ||||||
|  |         ) | ||||||
|  |     } | ||||||
|  | } | ||||||
| @@ -410,8 +410,43 @@ impl ArroyWrapper { | |||||||
|     fn quantized_db(&self) -> arroy::Database<BinaryQuantizedCosine> { |     fn quantized_db(&self) -> arroy::Database<BinaryQuantizedCosine> { | ||||||
|         self.database.remap_data_type() |         self.database.remap_data_type() | ||||||
|     } |     } | ||||||
|  |  | ||||||
|  |     pub fn aggregate_stats( | ||||||
|  |         &self, | ||||||
|  |         rtxn: &RoTxn, | ||||||
|  |         stats: &mut ArroyStats, | ||||||
|  |     ) -> Result<(), arroy::Error> { | ||||||
|  |         if self.quantized { | ||||||
|  |             for reader in self.readers(rtxn, self.quantized_db()) { | ||||||
|  |                 let reader = reader?; | ||||||
|  |                 let documents = reader.item_ids(); | ||||||
|  |                 if documents.is_empty() { | ||||||
|  |                     break; | ||||||
|  |                 } | ||||||
|  |                 stats.documents |= documents; | ||||||
|  |                 stats.number_of_embeddings += documents.len(); | ||||||
|  |             } | ||||||
|  |         } else { | ||||||
|  |             for reader in self.readers(rtxn, self.angular_db()) { | ||||||
|  |                 let reader = reader?; | ||||||
|  |                 let documents = reader.item_ids(); | ||||||
|  |                 if documents.is_empty() { | ||||||
|  |                     break; | ||||||
|  |                 } | ||||||
|  |                 stats.documents |= documents; | ||||||
|  |                 stats.number_of_embeddings += documents.len(); | ||||||
|  |             } | ||||||
|         } |         } | ||||||
|  |  | ||||||
|  |         Ok(()) | ||||||
|  |     } | ||||||
|  | } | ||||||
|  |  | ||||||
|  | #[derive(Debug, Default, Clone)] | ||||||
|  | pub struct ArroyStats { | ||||||
|  |     pub number_of_embeddings: u64, | ||||||
|  |     pub documents: RoaringBitmap, | ||||||
|  | } | ||||||
| /// One or multiple embeddings stored consecutively in a flat vector. | /// One or multiple embeddings stored consecutively in a flat vector. | ||||||
| pub struct Embeddings<F> { | pub struct Embeddings<F> { | ||||||
|     data: Vec<F>, |     data: Vec<F>, | ||||||
| @@ -611,6 +646,7 @@ impl Embedder { | |||||||
|         } |         } | ||||||
|     } |     } | ||||||
|  |  | ||||||
|  |     #[tracing::instrument(level = "debug", skip_all, target = "search")] | ||||||
|     pub fn embed_one( |     pub fn embed_one( | ||||||
|         &self, |         &self, | ||||||
|         text: String, |         text: String, | ||||||
|   | |||||||
| @@ -5,7 +5,7 @@ use rayon::slice::ParallelSlice as _; | |||||||
|  |  | ||||||
| use super::error::{EmbedError, EmbedErrorKind, NewEmbedderError, NewEmbedderErrorKind}; | use super::error::{EmbedError, EmbedErrorKind, NewEmbedderError, NewEmbedderErrorKind}; | ||||||
| use super::rest::{Embedder as RestEmbedder, EmbedderOptions as RestEmbedderOptions}; | use super::rest::{Embedder as RestEmbedder, EmbedderOptions as RestEmbedderOptions}; | ||||||
| use super::DistributionShift; | use super::{DistributionShift, REQUEST_PARALLELISM}; | ||||||
| use crate::error::FaultSource; | use crate::error::FaultSource; | ||||||
| use crate::vector::Embedding; | use crate::vector::Embedding; | ||||||
| use crate::ThreadPoolNoAbort; | use crate::ThreadPoolNoAbort; | ||||||
| @@ -118,6 +118,11 @@ impl Embedder { | |||||||
|         text_chunks: Vec<Vec<String>>, |         text_chunks: Vec<Vec<String>>, | ||||||
|         threads: &ThreadPoolNoAbort, |         threads: &ThreadPoolNoAbort, | ||||||
|     ) -> Result<Vec<Vec<Embedding>>, EmbedError> { |     ) -> Result<Vec<Vec<Embedding>>, EmbedError> { | ||||||
|  |         // This condition helps reduce the number of active rayon jobs | ||||||
|  |         // so that we avoid consuming all the LMDB rtxns and avoid stack overflows. | ||||||
|  |         if threads.active_operations() >= REQUEST_PARALLELISM { | ||||||
|  |             text_chunks.into_iter().map(move |chunk| self.embed(&chunk, None)).collect() | ||||||
|  |         } else { | ||||||
|             threads |             threads | ||||||
|                 .install(move || { |                 .install(move || { | ||||||
|                     text_chunks.into_par_iter().map(move |chunk| self.embed(&chunk, None)).collect() |                     text_chunks.into_par_iter().map(move |chunk| self.embed(&chunk, None)).collect() | ||||||
| @@ -127,12 +132,24 @@ impl Embedder { | |||||||
|                     fault: FaultSource::Bug, |                     fault: FaultSource::Bug, | ||||||
|                 })? |                 })? | ||||||
|         } |         } | ||||||
|  |     } | ||||||
|  |  | ||||||
|     pub(crate) fn embed_chunks_ref( |     pub(crate) fn embed_chunks_ref( | ||||||
|         &self, |         &self, | ||||||
|         texts: &[&str], |         texts: &[&str], | ||||||
|         threads: &ThreadPoolNoAbort, |         threads: &ThreadPoolNoAbort, | ||||||
|     ) -> Result<Vec<Vec<f32>>, EmbedError> { |     ) -> Result<Vec<Vec<f32>>, EmbedError> { | ||||||
|  |         // This condition helps reduce the number of active rayon jobs | ||||||
|  |         // so that we avoid consuming all the LMDB rtxns and avoid stack overflows. | ||||||
|  |         if threads.active_operations() >= REQUEST_PARALLELISM { | ||||||
|  |             let embeddings: Result<Vec<Vec<Embedding>>, _> = texts | ||||||
|  |                 .chunks(self.prompt_count_in_chunk_hint()) | ||||||
|  |                 .map(move |chunk| self.embed(chunk, None)) | ||||||
|  |                 .collect(); | ||||||
|  |  | ||||||
|  |             let embeddings = embeddings?; | ||||||
|  |             Ok(embeddings.into_iter().flatten().collect()) | ||||||
|  |         } else { | ||||||
|             threads |             threads | ||||||
|                 .install(move || { |                 .install(move || { | ||||||
|                     let embeddings: Result<Vec<Vec<Embedding>>, _> = texts |                     let embeddings: Result<Vec<Vec<Embedding>>, _> = texts | ||||||
| @@ -148,6 +165,7 @@ impl Embedder { | |||||||
|                     fault: FaultSource::Bug, |                     fault: FaultSource::Bug, | ||||||
|                 })? |                 })? | ||||||
|         } |         } | ||||||
|  |     } | ||||||
|  |  | ||||||
|     pub fn chunk_count_hint(&self) -> usize { |     pub fn chunk_count_hint(&self) -> usize { | ||||||
|         self.rest_embedder.chunk_count_hint() |         self.rest_embedder.chunk_count_hint() | ||||||
|   | |||||||
| @@ -7,7 +7,7 @@ use rayon::slice::ParallelSlice as _; | |||||||
|  |  | ||||||
| use super::error::{EmbedError, NewEmbedderError}; | use super::error::{EmbedError, NewEmbedderError}; | ||||||
| use super::rest::{Embedder as RestEmbedder, EmbedderOptions as RestEmbedderOptions}; | use super::rest::{Embedder as RestEmbedder, EmbedderOptions as RestEmbedderOptions}; | ||||||
| use super::DistributionShift; | use super::{DistributionShift, REQUEST_PARALLELISM}; | ||||||
| use crate::error::FaultSource; | use crate::error::FaultSource; | ||||||
| use crate::vector::error::EmbedErrorKind; | use crate::vector::error::EmbedErrorKind; | ||||||
| use crate::vector::Embedding; | use crate::vector::Embedding; | ||||||
| @@ -255,6 +255,11 @@ impl Embedder { | |||||||
|         text_chunks: Vec<Vec<String>>, |         text_chunks: Vec<Vec<String>>, | ||||||
|         threads: &ThreadPoolNoAbort, |         threads: &ThreadPoolNoAbort, | ||||||
|     ) -> Result<Vec<Vec<Embedding>>, EmbedError> { |     ) -> Result<Vec<Vec<Embedding>>, EmbedError> { | ||||||
|  |         // This condition helps reduce the number of active rayon jobs | ||||||
|  |         // so that we avoid consuming all the LMDB rtxns and avoid stack overflows. | ||||||
|  |         if threads.active_operations() >= REQUEST_PARALLELISM { | ||||||
|  |             text_chunks.into_iter().map(move |chunk| self.embed(&chunk, None)).collect() | ||||||
|  |         } else { | ||||||
|             threads |             threads | ||||||
|                 .install(move || { |                 .install(move || { | ||||||
|                     text_chunks.into_par_iter().map(move |chunk| self.embed(&chunk, None)).collect() |                     text_chunks.into_par_iter().map(move |chunk| self.embed(&chunk, None)).collect() | ||||||
| @@ -264,12 +269,23 @@ impl Embedder { | |||||||
|                     fault: FaultSource::Bug, |                     fault: FaultSource::Bug, | ||||||
|                 })? |                 })? | ||||||
|         } |         } | ||||||
|  |     } | ||||||
|  |  | ||||||
|     pub(crate) fn embed_chunks_ref( |     pub(crate) fn embed_chunks_ref( | ||||||
|         &self, |         &self, | ||||||
|         texts: &[&str], |         texts: &[&str], | ||||||
|         threads: &ThreadPoolNoAbort, |         threads: &ThreadPoolNoAbort, | ||||||
|     ) -> Result<Vec<Vec<f32>>, EmbedError> { |     ) -> Result<Vec<Vec<f32>>, EmbedError> { | ||||||
|  |         // This condition helps reduce the number of active rayon jobs | ||||||
|  |         // so that we avoid consuming all the LMDB rtxns and avoid stack overflows. | ||||||
|  |         if threads.active_operations() >= REQUEST_PARALLELISM { | ||||||
|  |             let embeddings: Result<Vec<Vec<Embedding>>, _> = texts | ||||||
|  |                 .chunks(self.prompt_count_in_chunk_hint()) | ||||||
|  |                 .map(move |chunk| self.embed(chunk, None)) | ||||||
|  |                 .collect(); | ||||||
|  |             let embeddings = embeddings?; | ||||||
|  |             Ok(embeddings.into_iter().flatten().collect()) | ||||||
|  |         } else { | ||||||
|             threads |             threads | ||||||
|                 .install(move || { |                 .install(move || { | ||||||
|                     let embeddings: Result<Vec<Vec<Embedding>>, _> = texts |                     let embeddings: Result<Vec<Vec<Embedding>>, _> = texts | ||||||
| @@ -285,6 +301,7 @@ impl Embedder { | |||||||
|                     fault: FaultSource::Bug, |                     fault: FaultSource::Bug, | ||||||
|                 })? |                 })? | ||||||
|         } |         } | ||||||
|  |     } | ||||||
|  |  | ||||||
|     pub fn chunk_count_hint(&self) -> usize { |     pub fn chunk_count_hint(&self) -> usize { | ||||||
|         self.rest_embedder.chunk_count_hint() |         self.rest_embedder.chunk_count_hint() | ||||||
|   | |||||||
| @@ -130,6 +130,7 @@ impl Embedder { | |||||||
|         let client = ureq::AgentBuilder::new() |         let client = ureq::AgentBuilder::new() | ||||||
|             .max_idle_connections(REQUEST_PARALLELISM * 2) |             .max_idle_connections(REQUEST_PARALLELISM * 2) | ||||||
|             .max_idle_connections_per_host(REQUEST_PARALLELISM * 2) |             .max_idle_connections_per_host(REQUEST_PARALLELISM * 2) | ||||||
|  |             .timeout(std::time::Duration::from_secs(30)) | ||||||
|             .build(); |             .build(); | ||||||
|  |  | ||||||
|         let request = Request::new(options.request)?; |         let request = Request::new(options.request)?; | ||||||
| @@ -188,6 +189,11 @@ impl Embedder { | |||||||
|         text_chunks: Vec<Vec<String>>, |         text_chunks: Vec<Vec<String>>, | ||||||
|         threads: &ThreadPoolNoAbort, |         threads: &ThreadPoolNoAbort, | ||||||
|     ) -> Result<Vec<Vec<Embedding>>, EmbedError> { |     ) -> Result<Vec<Vec<Embedding>>, EmbedError> { | ||||||
|  |         // This condition helps reduce the number of active rayon jobs | ||||||
|  |         // so that we avoid consuming all the LMDB rtxns and avoid stack overflows. | ||||||
|  |         if threads.active_operations() >= REQUEST_PARALLELISM { | ||||||
|  |             text_chunks.into_iter().map(move |chunk| self.embed(chunk, None)).collect() | ||||||
|  |         } else { | ||||||
|             threads |             threads | ||||||
|                 .install(move || { |                 .install(move || { | ||||||
|                     text_chunks.into_par_iter().map(move |chunk| self.embed(chunk, None)).collect() |                     text_chunks.into_par_iter().map(move |chunk| self.embed(chunk, None)).collect() | ||||||
| @@ -197,12 +203,24 @@ impl Embedder { | |||||||
|                     fault: FaultSource::Bug, |                     fault: FaultSource::Bug, | ||||||
|                 })? |                 })? | ||||||
|         } |         } | ||||||
|  |     } | ||||||
|  |  | ||||||
|     pub(crate) fn embed_chunks_ref( |     pub(crate) fn embed_chunks_ref( | ||||||
|         &self, |         &self, | ||||||
|         texts: &[&str], |         texts: &[&str], | ||||||
|         threads: &ThreadPoolNoAbort, |         threads: &ThreadPoolNoAbort, | ||||||
|     ) -> Result<Vec<Embedding>, EmbedError> { |     ) -> Result<Vec<Embedding>, EmbedError> { | ||||||
|  |         // This condition helps reduce the number of active rayon jobs | ||||||
|  |         // so that we avoid consuming all the LMDB rtxns and avoid stack overflows. | ||||||
|  |         if threads.active_operations() >= REQUEST_PARALLELISM { | ||||||
|  |             let embeddings: Result<Vec<Vec<Embedding>>, _> = texts | ||||||
|  |                 .chunks(self.prompt_count_in_chunk_hint()) | ||||||
|  |                 .map(move |chunk| self.embed_ref(chunk, None)) | ||||||
|  |                 .collect(); | ||||||
|  |  | ||||||
|  |             let embeddings = embeddings?; | ||||||
|  |             Ok(embeddings.into_iter().flatten().collect()) | ||||||
|  |         } else { | ||||||
|             threads |             threads | ||||||
|                 .install(move || { |                 .install(move || { | ||||||
|                     let embeddings: Result<Vec<Vec<Embedding>>, _> = texts |                     let embeddings: Result<Vec<Vec<Embedding>>, _> = texts | ||||||
| @@ -218,6 +236,7 @@ impl Embedder { | |||||||
|                     fault: FaultSource::Bug, |                     fault: FaultSource::Bug, | ||||||
|                 })? |                 })? | ||||||
|         } |         } | ||||||
|  |     } | ||||||
|  |  | ||||||
|     pub fn chunk_count_hint(&self) -> usize { |     pub fn chunk_count_hint(&self) -> usize { | ||||||
|         super::REQUEST_PARALLELISM |         super::REQUEST_PARALLELISM | ||||||
|   | |||||||
| @@ -455,7 +455,7 @@ impl EmbeddingSettings { | |||||||
|                 EmbedderSource::Ollama, |                 EmbedderSource::Ollama, | ||||||
|                 EmbedderSource::Rest, |                 EmbedderSource::Rest, | ||||||
|             ], |             ], | ||||||
|             Self::DOCUMENT_TEMPLATE => &[ |             Self::DOCUMENT_TEMPLATE | Self::DOCUMENT_TEMPLATE_MAX_BYTES => &[ | ||||||
|                 EmbedderSource::HuggingFace, |                 EmbedderSource::HuggingFace, | ||||||
|                 EmbedderSource::OpenAi, |                 EmbedderSource::OpenAi, | ||||||
|                 EmbedderSource::Ollama, |                 EmbedderSource::Ollama, | ||||||
| @@ -490,6 +490,7 @@ impl EmbeddingSettings { | |||||||
|                 Self::MODEL, |                 Self::MODEL, | ||||||
|                 Self::API_KEY, |                 Self::API_KEY, | ||||||
|                 Self::DOCUMENT_TEMPLATE, |                 Self::DOCUMENT_TEMPLATE, | ||||||
|  |                 Self::DOCUMENT_TEMPLATE_MAX_BYTES, | ||||||
|                 Self::DIMENSIONS, |                 Self::DIMENSIONS, | ||||||
|                 Self::DISTRIBUTION, |                 Self::DISTRIBUTION, | ||||||
|                 Self::URL, |                 Self::URL, | ||||||
| @@ -500,6 +501,7 @@ impl EmbeddingSettings { | |||||||
|                 Self::MODEL, |                 Self::MODEL, | ||||||
|                 Self::REVISION, |                 Self::REVISION, | ||||||
|                 Self::DOCUMENT_TEMPLATE, |                 Self::DOCUMENT_TEMPLATE, | ||||||
|  |                 Self::DOCUMENT_TEMPLATE_MAX_BYTES, | ||||||
|                 Self::DISTRIBUTION, |                 Self::DISTRIBUTION, | ||||||
|                 Self::BINARY_QUANTIZED, |                 Self::BINARY_QUANTIZED, | ||||||
|             ], |             ], | ||||||
| @@ -507,6 +509,7 @@ impl EmbeddingSettings { | |||||||
|                 Self::SOURCE, |                 Self::SOURCE, | ||||||
|                 Self::MODEL, |                 Self::MODEL, | ||||||
|                 Self::DOCUMENT_TEMPLATE, |                 Self::DOCUMENT_TEMPLATE, | ||||||
|  |                 Self::DOCUMENT_TEMPLATE_MAX_BYTES, | ||||||
|                 Self::URL, |                 Self::URL, | ||||||
|                 Self::API_KEY, |                 Self::API_KEY, | ||||||
|                 Self::DIMENSIONS, |                 Self::DIMENSIONS, | ||||||
| @@ -521,6 +524,7 @@ impl EmbeddingSettings { | |||||||
|                 Self::API_KEY, |                 Self::API_KEY, | ||||||
|                 Self::DIMENSIONS, |                 Self::DIMENSIONS, | ||||||
|                 Self::DOCUMENT_TEMPLATE, |                 Self::DOCUMENT_TEMPLATE, | ||||||
|  |                 Self::DOCUMENT_TEMPLATE_MAX_BYTES, | ||||||
|                 Self::URL, |                 Self::URL, | ||||||
|                 Self::REQUEST, |                 Self::REQUEST, | ||||||
|                 Self::RESPONSE, |                 Self::RESPONSE, | ||||||
|   | |||||||
| @@ -12,16 +12,6 @@ | |||||||
|     } |     } | ||||||
|   }, |   }, | ||||||
|   "precommands": [ |   "precommands": [ | ||||||
|     { |  | ||||||
|       "route": "experimental-features", |  | ||||||
|       "method": "PATCH", |  | ||||||
|       "body": { |  | ||||||
|         "inline": { |  | ||||||
|           "vectorStore": true |  | ||||||
|         } |  | ||||||
|       }, |  | ||||||
|       "synchronous": "DontWait" |  | ||||||
|     }, |  | ||||||
|     { |     { | ||||||
|       "route": "indexes/movies/settings", |       "route": "indexes/movies/settings", | ||||||
|       "method": "PATCH", |       "method": "PATCH", | ||||||
|   | |||||||
| @@ -12,16 +12,6 @@ | |||||||
|     } |     } | ||||||
|   }, |   }, | ||||||
|   "precommands": [ |   "precommands": [ | ||||||
|     { |  | ||||||
|       "route": "experimental-features", |  | ||||||
|       "method": "PATCH", |  | ||||||
|       "body": { |  | ||||||
|         "inline": { |  | ||||||
|           "vectorStore": true |  | ||||||
|         } |  | ||||||
|       }, |  | ||||||
|       "synchronous": "DontWait" |  | ||||||
|     }, |  | ||||||
|     { |     { | ||||||
|       "route": "indexes/movies/settings", |       "route": "indexes/movies/settings", | ||||||
|       "method": "PATCH", |       "method": "PATCH", | ||||||
|   | |||||||
| @@ -28,10 +28,10 @@ | |||||||
|       "remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/hackernews/modification/05.ndjson", |       "remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/hackernews/modification/05.ndjson", | ||||||
|       "sha256": "be31d5632602f798e62d1c10c83bdfda2b4deaa068477eacde05fdd247572b82" |       "sha256": "be31d5632602f798e62d1c10c83bdfda2b4deaa068477eacde05fdd247572b82" | ||||||
|     }, |     }, | ||||||
|     "hackernews-02-modified-filters.ndjson": { |     "hackernews-modified-number-filters.ndjson": { | ||||||
|       "local_location": null, |       "local_location": null, | ||||||
|       "remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/hackernews/modification/02-modified-filters.ndjson", |       "remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/hackernews/modification/01-modified-filters.ndjson", | ||||||
|       "sha256": "7272cbfd41110d32d7fe168424a0000f07589bfe40f664652b34f4f20aaf3802" |       "sha256": "b80c245ce1b1df80b9b38800f677f3bd11947ebc62716fb108269d50e796c35c" | ||||||
|     } |     } | ||||||
|   }, |   }, | ||||||
|   "precommands": [ |   "precommands": [ | ||||||
| @@ -102,7 +102,7 @@ | |||||||
|         "route": "indexes/movies/documents", |         "route": "indexes/movies/documents", | ||||||
|         "method": "POST", |         "method": "POST", | ||||||
|         "body": { |         "body": { | ||||||
|           "asset": "hackernews-02-modified-filters.ndjson" |           "asset": "hackernews-modified-number-filters.ndjson" | ||||||
|         }, |         }, | ||||||
|         "synchronous": "WaitForTask" |         "synchronous": "WaitForTask" | ||||||
|       } |       } | ||||||
|   | |||||||
| @@ -28,10 +28,10 @@ | |||||||
|       "remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/hackernews/modification/05.ndjson", |       "remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/hackernews/modification/05.ndjson", | ||||||
|       "sha256": "be31d5632602f798e62d1c10c83bdfda2b4deaa068477eacde05fdd247572b82" |       "sha256": "be31d5632602f798e62d1c10c83bdfda2b4deaa068477eacde05fdd247572b82" | ||||||
|     }, |     }, | ||||||
|     "hackernews-01-modified-filters.ndjson": { |     "hackernews-modified-string-filters.ndjson": { | ||||||
|       "local_location": null, |       "local_location": null, | ||||||
|       "remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/hackernews/modification/01-modified-filters.ndjson", |       "remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/hackernews/modification/02-modified-filters.ndjson", | ||||||
|       "sha256": "b80c245ce1b1df80b9b38800f677f3bd11947ebc62716fb108269d50e796c35c" |       "sha256": "7272cbfd41110d32d7fe168424a0000f07589bfe40f664652b34f4f20aaf3802" | ||||||
|     } |     } | ||||||
|   }, |   }, | ||||||
|   "precommands": [ |   "precommands": [ | ||||||
| @@ -102,7 +102,7 @@ | |||||||
|         "route": "indexes/movies/documents", |         "route": "indexes/movies/documents", | ||||||
|         "method": "POST", |         "method": "POST", | ||||||
|         "body": { |         "body": { | ||||||
|           "asset": "hackernews-01-modified-filters.ndjson" |           "asset": "hackernews-modified-string-filters.ndjson" | ||||||
|         }, |         }, | ||||||
|         "synchronous": "WaitForTask" |         "synchronous": "WaitForTask" | ||||||
|       } |       } | ||||||
|   | |||||||
Some files were not shown because too many files have changed in this diff Show More
		Reference in New Issue
	
	Block a user