mirror of
				https://github.com/meilisearch/meilisearch.git
				synced 2025-10-31 16:06:31 +00:00 
			
		
		
		
	Merge #5351
	
		
			
	
		
	
	
		
	
		
			Some checks failed
		
		
	
	
		
			
				
	
				Run the indexing fuzzer / Setup the action (push) Failing after 2m50s
				
					
					
				
			
		
			
				
	
				Indexing bench (push) / Run and upload benchmarks (push) Has been cancelled
				
					
					
				
			
		
			
				
	
				Benchmarks of indexing (push) / Run and upload benchmarks (push) Has been cancelled
				
					
					
				
			
		
			
				
	
				Benchmarks of search for geo (push) / Run and upload benchmarks (push) Has been cancelled
				
					
					
				
			
		
			
				
	
				Benchmarks of search for songs (push) / Run and upload benchmarks (push) Has been cancelled
				
					
					
				
			
		
			
				
	
				Benchmarks of search for Wikipedia articles (push) / Run and upload benchmarks (push) Has been cancelled
				
					
					
				
			
		
			
				
	
				Look for flaky tests / flaky (push) Failing after 19s
				
					
					
				
			
		
			
				
	
				SDKs tests / define-docker-image (push) Failing after 5s
				
					
					
				
			
		
			
				
	
				SDKs tests / .NET SDK tests (push) Has been skipped
				
					
					
				
			
		
			
				
	
				SDKs tests / Dart SDK tests (push) Has been skipped
				
					
					
				
			
		
			
				
	
				SDKs tests / Go SDK tests (push) Has been skipped
				
					
					
				
			
		
			
				
	
				SDKs tests / Java SDK tests (push) Has been skipped
				
					
					
				
			
		
			
				
	
				SDKs tests / JS SDK tests (push) Has been skipped
				
					
					
				
			
		
			
				
	
				SDKs tests / PHP SDK tests (push) Has been skipped
				
					
					
				
			
		
			
				
	
				SDKs tests / Python SDK tests (push) Has been skipped
				
					
					
				
			
		
			
				
	
				SDKs tests / Ruby SDK tests (push) Has been skipped
				
					
					
				
			
		
			
				
	
				SDKs tests / Rust SDK tests (push) Has been skipped
				
					
					
				
			
		
			
				
	
				SDKs tests / Swift SDK tests (push) Has been skipped
				
					
					
				
			
		
			
				
	
				SDKs tests / meilisearch-js-plugins tests (push) Has been skipped
				
					
					
				
			
		
			
				
	
				SDKs tests / meilisearch-rails tests (push) Has been skipped
				
					
					
				
			
		
			
				
	
				SDKs tests / meilisearch-symfony tests (push) Has been skipped
				
					
					
				
			
		
			
				
	
				Publish binaries to GitHub release / Check the version validity (push) Successful in 9s
				
					
					
				
			
		
			
				
	
				Publish binaries to GitHub release / Publish binary for aarch64 (meilisearch-linux-aarch64, aarch64-unknown-linux-gnu) (push) Failing after 2s
				
					
					
				
			
		
			
				
	
				Publish binaries to GitHub release / Publish binary for Linux (push) Failing after 12s
				
					
					
				
			
		
			
				
	
				Publish binaries to GitHub release / Publish binary for macos-13 (push) Has been cancelled
				
					
					
				
			
		
			
				
	
				Publish binaries to GitHub release / Publish binary for windows-2022 (push) Has been cancelled
				
					
					
				
			
		
			
				
	
				Publish binaries to GitHub release / Publish binary for macOS silicon (meilisearch-macos-apple-silicon, aarch64-apple-darwin) (push) Has been cancelled
				
					
					
				
			
		
			
				
	
				Test suite / Tests on ubuntu-20.04 (push) Failing after 12s
				
					
					
				
			
		
			
				
	
				Test suite / Test with Ollama (push) Failing after 7s
				
					
					
				
			
		
			
				
	
				Test suite / Test disabled tokenization (push) Failing after 11s
				
					
					
				
			
		
			
				
	
				Test suite / Run tests in debug (push) Failing after 11s
				
					
					
				
			
		
			
				
	
				Test suite / Run Clippy (push) Failing after 17s
				
					
					
				
			
		
			
				
	
				Test suite / Run Rustfmt (push) Successful in 1m51s
				
					
					
				
			
		
			
				
	
				Test suite / Tests almost all features (push) Failing after 7m7s
				
					
					
				
			
		
			
				
	
				Test suite / Tests on macos-13 (push) Has been cancelled
				
					
					
				
			
		
			
				
	
				Test suite / Tests on windows-2022 (push) Has been cancelled
				
					
					
				
			
		
		
	
	
				
					
				
			
		
			Some checks failed
		
		
	
	Run the indexing fuzzer / Setup the action (push) Failing after 2m50s
				Indexing bench (push) / Run and upload benchmarks (push) Has been cancelled
				Benchmarks of indexing (push) / Run and upload benchmarks (push) Has been cancelled
				Benchmarks of search for geo (push) / Run and upload benchmarks (push) Has been cancelled
				Benchmarks of search for songs (push) / Run and upload benchmarks (push) Has been cancelled
				Benchmarks of search for Wikipedia articles (push) / Run and upload benchmarks (push) Has been cancelled
				Look for flaky tests / flaky (push) Failing after 19s
				SDKs tests / define-docker-image (push) Failing after 5s
				SDKs tests / .NET SDK tests (push) Has been skipped
				SDKs tests / Dart SDK tests (push) Has been skipped
				SDKs tests / Go SDK tests (push) Has been skipped
				SDKs tests / Java SDK tests (push) Has been skipped
				SDKs tests / JS SDK tests (push) Has been skipped
				SDKs tests / PHP SDK tests (push) Has been skipped
				SDKs tests / Python SDK tests (push) Has been skipped
				SDKs tests / Ruby SDK tests (push) Has been skipped
				SDKs tests / Rust SDK tests (push) Has been skipped
				SDKs tests / Swift SDK tests (push) Has been skipped
				SDKs tests / meilisearch-js-plugins tests (push) Has been skipped
				SDKs tests / meilisearch-rails tests (push) Has been skipped
				SDKs tests / meilisearch-symfony tests (push) Has been skipped
				Publish binaries to GitHub release / Check the version validity (push) Successful in 9s
				Publish binaries to GitHub release / Publish binary for aarch64 (meilisearch-linux-aarch64, aarch64-unknown-linux-gnu) (push) Failing after 2s
				Publish binaries to GitHub release / Publish binary for Linux (push) Failing after 12s
				Publish binaries to GitHub release / Publish binary for macos-13 (push) Has been cancelled
				Publish binaries to GitHub release / Publish binary for windows-2022 (push) Has been cancelled
				Publish binaries to GitHub release / Publish binary for macOS silicon (meilisearch-macos-apple-silicon, aarch64-apple-darwin) (push) Has been cancelled
				Test suite / Tests on ubuntu-20.04 (push) Failing after 12s
				Test suite / Test with Ollama (push) Failing after 7s
				Test suite / Test disabled tokenization (push) Failing after 11s
				Test suite / Run tests in debug (push) Failing after 11s
				Test suite / Run Clippy (push) Failing after 17s
				Test suite / Run Rustfmt (push) Successful in 1m51s
				Test suite / Tests almost all features (push) Failing after 7m7s
				Test suite / Tests on macos-13 (push) Has been cancelled
				Test suite / Tests on windows-2022 (push) Has been cancelled
				5351: Bring back v1.13.0 changes into main r=irevoire a=Kerollmops This PR brings back the changes made in v1.13 into the main branch. Co-authored-by: ManyTheFish <many@meilisearch.com> Co-authored-by: Kerollmops <clement@meilisearch.com> Co-authored-by: Louis Dureuil <louis@meilisearch.com> Co-authored-by: Clémentine <clementine@meilisearch.com> Co-authored-by: meili-bors[bot] <89034592+meili-bors[bot]@users.noreply.github.com> Co-authored-by: Tamo <tamo@meilisearch.com> Co-authored-by: Clément Renault <clement@meilisearch.com>
This commit is contained in:
		| @@ -10,8 +10,10 @@ dump | ||||
| ├── instance-uid.uuid | ||||
| ├── keys.jsonl | ||||
| ├── metadata.json | ||||
| └── tasks | ||||
|     ├── update_files | ||||
|     │   └── [task_id].jsonl | ||||
| ├── tasks | ||||
| │   ├── update_files | ||||
| │   │   └── [task_id].jsonl | ||||
| │   └── queue.jsonl | ||||
| └── batches | ||||
|     └── queue.jsonl | ||||
| ``` | ||||
| ``` | ||||
|   | ||||
| @@ -228,14 +228,16 @@ pub(crate) mod test { | ||||
|  | ||||
|     use big_s::S; | ||||
|     use maplit::{btreemap, btreeset}; | ||||
|     use meilisearch_types::batches::{Batch, BatchEnqueuedAt, BatchStats}; | ||||
|     use meilisearch_types::facet_values_sort::FacetValuesSort; | ||||
|     use meilisearch_types::features::RuntimeTogglableFeatures; | ||||
|     use meilisearch_types::features::{Network, Remote, RuntimeTogglableFeatures}; | ||||
|     use meilisearch_types::index_uid_pattern::IndexUidPattern; | ||||
|     use meilisearch_types::keys::{Action, Key}; | ||||
|     use meilisearch_types::milli; | ||||
|     use meilisearch_types::milli::update::Setting; | ||||
|     use meilisearch_types::settings::{Checked, FacetingSettings, Settings}; | ||||
|     use meilisearch_types::tasks::{Details, Status}; | ||||
|     use meilisearch_types::task_view::DetailsView; | ||||
|     use meilisearch_types::tasks::{Details, Kind, Status}; | ||||
|     use serde_json::{json, Map, Value}; | ||||
|     use time::macros::datetime; | ||||
|     use uuid::Uuid; | ||||
| @@ -305,6 +307,30 @@ pub(crate) mod test { | ||||
|         settings.check() | ||||
|     } | ||||
|  | ||||
|     pub fn create_test_batches() -> Vec<Batch> { | ||||
|         vec![Batch { | ||||
|             uid: 0, | ||||
|             details: DetailsView { | ||||
|                 received_documents: Some(12), | ||||
|                 indexed_documents: Some(Some(10)), | ||||
|                 ..DetailsView::default() | ||||
|             }, | ||||
|             progress: None, | ||||
|             stats: BatchStats { | ||||
|                 total_nb_tasks: 1, | ||||
|                 status: maplit::btreemap! { Status::Succeeded => 1 }, | ||||
|                 types: maplit::btreemap! { Kind::DocumentAdditionOrUpdate => 1 }, | ||||
|                 index_uids: maplit::btreemap! { "doggo".to_string() => 1 }, | ||||
|             }, | ||||
|             enqueued_at: Some(BatchEnqueuedAt { | ||||
|                 earliest: datetime!(2022-11-11 0:00 UTC), | ||||
|                 oldest: datetime!(2022-11-11 0:00 UTC), | ||||
|             }), | ||||
|             started_at: datetime!(2022-11-20 0:00 UTC), | ||||
|             finished_at: Some(datetime!(2022-11-21 0:00 UTC)), | ||||
|         }] | ||||
|     } | ||||
|  | ||||
|     pub fn create_test_tasks() -> Vec<(TaskDump, Option<Vec<Document>>)> { | ||||
|         vec![ | ||||
|             ( | ||||
| @@ -427,6 +453,15 @@ pub(crate) mod test { | ||||
|         index.flush().unwrap(); | ||||
|         index.settings(&settings).unwrap(); | ||||
|  | ||||
|         // ========== pushing the batch queue | ||||
|         let batches = create_test_batches(); | ||||
|  | ||||
|         let mut batch_queue = dump.create_batches_queue().unwrap(); | ||||
|         for batch in &batches { | ||||
|             batch_queue.push_batch(batch).unwrap(); | ||||
|         } | ||||
|         batch_queue.flush().unwrap(); | ||||
|  | ||||
|         // ========== pushing the task queue | ||||
|         let tasks = create_test_tasks(); | ||||
|  | ||||
| @@ -455,6 +490,10 @@ pub(crate) mod test { | ||||
|  | ||||
|         dump.create_experimental_features(features).unwrap(); | ||||
|  | ||||
|         // ========== network | ||||
|         let network = create_test_network(); | ||||
|         dump.create_network(network).unwrap(); | ||||
|  | ||||
|         // create the dump | ||||
|         let mut file = tempfile::tempfile().unwrap(); | ||||
|         dump.persist_to(&mut file).unwrap(); | ||||
| @@ -467,6 +506,13 @@ pub(crate) mod test { | ||||
|         RuntimeTogglableFeatures::default() | ||||
|     } | ||||
|  | ||||
|     fn create_test_network() -> Network { | ||||
|         Network { | ||||
|             local: Some("myself".to_string()), | ||||
|             remotes: maplit::btreemap! {"other".to_string() => Remote { url: "http://test".to_string(), search_api_key: Some("apiKey".to_string()) }}, | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     #[test] | ||||
|     fn test_creating_and_read_dump() { | ||||
|         let mut file = create_test_dump(); | ||||
| @@ -515,5 +561,9 @@ pub(crate) mod test { | ||||
|         // ==== checking the features | ||||
|         let expected = create_test_features(); | ||||
|         assert_eq!(dump.features().unwrap().unwrap(), expected); | ||||
|  | ||||
|         // ==== checking the network | ||||
|         let expected = create_test_network(); | ||||
|         assert_eq!(&expected, dump.network().unwrap().unwrap()); | ||||
|     } | ||||
| } | ||||
|   | ||||
| @@ -196,6 +196,10 @@ impl CompatV5ToV6 { | ||||
|     pub fn features(&self) -> Result<Option<v6::RuntimeTogglableFeatures>> { | ||||
|         Ok(None) | ||||
|     } | ||||
|  | ||||
|     pub fn network(&self) -> Result<Option<&v6::Network>> { | ||||
|         Ok(None) | ||||
|     } | ||||
| } | ||||
|  | ||||
| pub enum CompatIndexV5ToV6 { | ||||
|   | ||||
| @@ -23,6 +23,7 @@ mod v6; | ||||
| pub type Document = serde_json::Map<String, serde_json::Value>; | ||||
| pub type UpdateFile = dyn Iterator<Item = Result<Document>>; | ||||
|  | ||||
| #[allow(clippy::large_enum_variant)] | ||||
| pub enum DumpReader { | ||||
|     Current(V6Reader), | ||||
|     Compat(CompatV5ToV6), | ||||
| @@ -101,6 +102,13 @@ impl DumpReader { | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     pub fn batches(&mut self) -> Result<Box<dyn Iterator<Item = Result<v6::Batch>> + '_>> { | ||||
|         match self { | ||||
|             DumpReader::Current(current) => Ok(current.batches()), | ||||
|             DumpReader::Compat(_compat) => Ok(Box::new(std::iter::empty())), | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     pub fn keys(&mut self) -> Result<Box<dyn Iterator<Item = Result<v6::Key>> + '_>> { | ||||
|         match self { | ||||
|             DumpReader::Current(current) => Ok(current.keys()), | ||||
| @@ -114,6 +122,13 @@ impl DumpReader { | ||||
|             DumpReader::Compat(compat) => compat.features(), | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     pub fn network(&self) -> Result<Option<&v6::Network>> { | ||||
|         match self { | ||||
|             DumpReader::Current(current) => Ok(current.network()), | ||||
|             DumpReader::Compat(compat) => compat.network(), | ||||
|         } | ||||
|     } | ||||
| } | ||||
|  | ||||
| impl From<V6Reader> for DumpReader { | ||||
| @@ -219,6 +234,10 @@ pub(crate) mod test { | ||||
|         insta::assert_snapshot!(dump.date().unwrap(), @"2024-05-16 15:51:34.151044 +00:00:00"); | ||||
|         insta::assert_debug_snapshot!(dump.instance_uid().unwrap(), @"None"); | ||||
|  | ||||
|         // batches didn't exists at the time | ||||
|         let batches = dump.batches().unwrap().collect::<Result<Vec<_>>>().unwrap(); | ||||
|         meili_snap::snapshot!(meili_snap::json_string!(batches), @"[]"); | ||||
|  | ||||
|         // tasks | ||||
|         let tasks = dump.tasks().unwrap().collect::<Result<Vec<_>>>().unwrap(); | ||||
|         let (tasks, update_files): (Vec<_>, Vec<_>) = tasks.into_iter().unzip(); | ||||
| @@ -328,6 +347,7 @@ pub(crate) mod test { | ||||
|         } | ||||
|  | ||||
|         assert_eq!(dump.features().unwrap().unwrap(), RuntimeTogglableFeatures::default()); | ||||
|         assert_eq!(dump.network().unwrap(), None); | ||||
|     } | ||||
|  | ||||
|     #[test] | ||||
| @@ -339,6 +359,10 @@ pub(crate) mod test { | ||||
|         insta::assert_snapshot!(dump.date().unwrap(), @"2023-07-06 7:10:27.21958 +00:00:00"); | ||||
|         insta::assert_debug_snapshot!(dump.instance_uid().unwrap(), @"None"); | ||||
|  | ||||
|         // batches didn't exists at the time | ||||
|         let batches = dump.batches().unwrap().collect::<Result<Vec<_>>>().unwrap(); | ||||
|         meili_snap::snapshot!(meili_snap::json_string!(batches), @"[]"); | ||||
|  | ||||
|         // tasks | ||||
|         let tasks = dump.tasks().unwrap().collect::<Result<Vec<_>>>().unwrap(); | ||||
|         let (tasks, update_files): (Vec<_>, Vec<_>) = tasks.into_iter().unzip(); | ||||
| @@ -373,6 +397,27 @@ pub(crate) mod test { | ||||
|         assert_eq!(dump.features().unwrap().unwrap(), RuntimeTogglableFeatures::default()); | ||||
|     } | ||||
|  | ||||
|     #[test] | ||||
|     fn import_dump_v6_network() { | ||||
|         let dump = File::open("tests/assets/v6-with-network.dump").unwrap(); | ||||
|         let dump = DumpReader::open(dump).unwrap(); | ||||
|  | ||||
|         // top level infos | ||||
|         insta::assert_snapshot!(dump.date().unwrap(), @"2025-01-29 15:45:32.738676 +00:00:00"); | ||||
|         insta::assert_debug_snapshot!(dump.instance_uid().unwrap(), @"None"); | ||||
|  | ||||
|         // network | ||||
|  | ||||
|         let network = dump.network().unwrap().unwrap(); | ||||
|         insta::assert_snapshot!(network.local.as_ref().unwrap(), @"ms-0"); | ||||
|         insta::assert_snapshot!(network.remotes.get("ms-0").as_ref().unwrap().url, @"http://localhost:7700"); | ||||
|         insta::assert_snapshot!(network.remotes.get("ms-0").as_ref().unwrap().search_api_key.is_none(), @"true"); | ||||
|         insta::assert_snapshot!(network.remotes.get("ms-1").as_ref().unwrap().url, @"http://localhost:7701"); | ||||
|         insta::assert_snapshot!(network.remotes.get("ms-1").as_ref().unwrap().search_api_key.is_none(), @"true"); | ||||
|         insta::assert_snapshot!(network.remotes.get("ms-2").as_ref().unwrap().url, @"http://ms-5679.example.meilisearch.io"); | ||||
|         insta::assert_snapshot!(network.remotes.get("ms-2").as_ref().unwrap().search_api_key.as_ref().unwrap(), @"foo"); | ||||
|     } | ||||
|  | ||||
|     #[test] | ||||
|     fn import_dump_v5() { | ||||
|         let dump = File::open("tests/assets/v5.dump").unwrap(); | ||||
| @@ -382,6 +427,10 @@ pub(crate) mod test { | ||||
|         insta::assert_snapshot!(dump.date().unwrap(), @"2022-10-04 15:55:10.344982459 +00:00:00"); | ||||
|         insta::assert_snapshot!(dump.instance_uid().unwrap().unwrap(), @"9e15e977-f2ae-4761-943f-1eaf75fd736d"); | ||||
|  | ||||
|         // batches didn't exists at the time | ||||
|         let batches = dump.batches().unwrap().collect::<Result<Vec<_>>>().unwrap(); | ||||
|         meili_snap::snapshot!(meili_snap::json_string!(batches), @"[]"); | ||||
|  | ||||
|         // tasks | ||||
|         let tasks = dump.tasks().unwrap().collect::<Result<Vec<_>>>().unwrap(); | ||||
|         let (tasks, update_files): (Vec<_>, Vec<_>) = tasks.into_iter().unzip(); | ||||
| @@ -462,6 +511,10 @@ pub(crate) mod test { | ||||
|         insta::assert_snapshot!(dump.date().unwrap(), @"2022-10-06 12:53:49.131989609 +00:00:00"); | ||||
|         insta::assert_snapshot!(dump.instance_uid().unwrap().unwrap(), @"9e15e977-f2ae-4761-943f-1eaf75fd736d"); | ||||
|  | ||||
|         // batches didn't exists at the time | ||||
|         let batches = dump.batches().unwrap().collect::<Result<Vec<_>>>().unwrap(); | ||||
|         meili_snap::snapshot!(meili_snap::json_string!(batches), @"[]"); | ||||
|  | ||||
|         // tasks | ||||
|         let tasks = dump.tasks().unwrap().collect::<Result<Vec<_>>>().unwrap(); | ||||
|         let (tasks, update_files): (Vec<_>, Vec<_>) = tasks.into_iter().unzip(); | ||||
| @@ -539,6 +592,10 @@ pub(crate) mod test { | ||||
|         insta::assert_snapshot!(dump.date().unwrap(), @"2022-10-07 11:39:03.709153554 +00:00:00"); | ||||
|         assert_eq!(dump.instance_uid().unwrap(), None); | ||||
|  | ||||
|         // batches didn't exists at the time | ||||
|         let batches = dump.batches().unwrap().collect::<Result<Vec<_>>>().unwrap(); | ||||
|         meili_snap::snapshot!(meili_snap::json_string!(batches), @"[]"); | ||||
|  | ||||
|         // tasks | ||||
|         let tasks = dump.tasks().unwrap().collect::<Result<Vec<_>>>().unwrap(); | ||||
|         let (tasks, update_files): (Vec<_>, Vec<_>) = tasks.into_iter().unzip(); | ||||
| @@ -632,6 +689,10 @@ pub(crate) mod test { | ||||
|         insta::assert_snapshot!(dump.date().unwrap(), @"2022-10-09 20:27:59.904096267 +00:00:00"); | ||||
|         assert_eq!(dump.instance_uid().unwrap(), None); | ||||
|  | ||||
|         // batches didn't exists at the time | ||||
|         let batches = dump.batches().unwrap().collect::<Result<Vec<_>>>().unwrap(); | ||||
|         meili_snap::snapshot!(meili_snap::json_string!(batches), @"[]"); | ||||
|  | ||||
|         // tasks | ||||
|         let tasks = dump.tasks().unwrap().collect::<Result<Vec<_>>>().unwrap(); | ||||
|         let (tasks, update_files): (Vec<_>, Vec<_>) = tasks.into_iter().unzip(); | ||||
| @@ -725,6 +786,10 @@ pub(crate) mod test { | ||||
|         insta::assert_snapshot!(dump.date().unwrap(), @"2023-01-30 16:26:09.247261 +00:00:00"); | ||||
|         assert_eq!(dump.instance_uid().unwrap(), None); | ||||
|  | ||||
|         // batches didn't exists at the time | ||||
|         let batches = dump.batches().unwrap().collect::<Result<Vec<_>>>().unwrap(); | ||||
|         meili_snap::snapshot!(meili_snap::json_string!(batches), @"[]"); | ||||
|  | ||||
|         // tasks | ||||
|         let tasks = dump.tasks().unwrap().collect::<Result<Vec<_>>>().unwrap(); | ||||
|         let (tasks, update_files): (Vec<_>, Vec<_>) = tasks.into_iter().unzip(); | ||||
| @@ -801,6 +866,10 @@ pub(crate) mod test { | ||||
|         assert_eq!(dump.date(), None); | ||||
|         assert_eq!(dump.instance_uid().unwrap(), None); | ||||
|  | ||||
|         // batches didn't exists at the time | ||||
|         let batches = dump.batches().unwrap().collect::<Result<Vec<_>>>().unwrap(); | ||||
|         meili_snap::snapshot!(meili_snap::json_string!(batches), @"[]"); | ||||
|  | ||||
|         // tasks | ||||
|         let tasks = dump.tasks().unwrap().collect::<Result<Vec<_>>>().unwrap(); | ||||
|         let (tasks, update_files): (Vec<_>, Vec<_>) = tasks.into_iter().unzip(); | ||||
|   | ||||
| @@ -18,8 +18,10 @@ pub type Checked = meilisearch_types::settings::Checked; | ||||
| pub type Unchecked = meilisearch_types::settings::Unchecked; | ||||
|  | ||||
| pub type Task = crate::TaskDump; | ||||
| pub type Batch = meilisearch_types::batches::Batch; | ||||
| pub type Key = meilisearch_types::keys::Key; | ||||
| pub type RuntimeTogglableFeatures = meilisearch_types::features::RuntimeTogglableFeatures; | ||||
| pub type Network = meilisearch_types::features::Network; | ||||
|  | ||||
| // ===== Other types to clarify the code of the compat module | ||||
| // everything related to the tasks | ||||
| @@ -48,8 +50,10 @@ pub struct V6Reader { | ||||
|     instance_uid: Option<Uuid>, | ||||
|     metadata: Metadata, | ||||
|     tasks: BufReader<File>, | ||||
|     batches: Option<BufReader<File>>, | ||||
|     keys: BufReader<File>, | ||||
|     features: Option<RuntimeTogglableFeatures>, | ||||
|     network: Option<Network>, | ||||
| } | ||||
|  | ||||
| impl V6Reader { | ||||
| @@ -77,13 +81,38 @@ impl V6Reader { | ||||
|         } else { | ||||
|             None | ||||
|         }; | ||||
|         let batches = match File::open(dump.path().join("batches").join("queue.jsonl")) { | ||||
|             Ok(file) => Some(BufReader::new(file)), | ||||
|             // The batch file was only introduced during the v1.13, anything prior to that won't have batches | ||||
|             Err(err) if err.kind() == ErrorKind::NotFound => None, | ||||
|             Err(e) => return Err(e.into()), | ||||
|         }; | ||||
|  | ||||
|         let network_file = match fs::read(dump.path().join("network.json")) { | ||||
|             Ok(network_file) => Some(network_file), | ||||
|             Err(error) => match error.kind() { | ||||
|                 // Allows the file to be missing, this will only result in all experimental features disabled. | ||||
|                 ErrorKind::NotFound => { | ||||
|                     debug!("`network.json` not found in dump"); | ||||
|                     None | ||||
|                 } | ||||
|                 _ => return Err(error.into()), | ||||
|             }, | ||||
|         }; | ||||
|         let network = if let Some(network_file) = network_file { | ||||
|             Some(serde_json::from_reader(&*network_file)?) | ||||
|         } else { | ||||
|             None | ||||
|         }; | ||||
|  | ||||
|         Ok(V6Reader { | ||||
|             metadata: serde_json::from_reader(&*meta_file)?, | ||||
|             instance_uid, | ||||
|             tasks: BufReader::new(File::open(dump.path().join("tasks").join("queue.jsonl"))?), | ||||
|             batches, | ||||
|             keys: BufReader::new(File::open(dump.path().join("keys.jsonl"))?), | ||||
|             features, | ||||
|             network, | ||||
|             dump, | ||||
|         }) | ||||
|     } | ||||
| @@ -124,7 +153,7 @@ impl V6Reader { | ||||
|         &mut self, | ||||
|     ) -> Box<dyn Iterator<Item = Result<(Task, Option<Box<super::UpdateFile>>)>> + '_> { | ||||
|         Box::new((&mut self.tasks).lines().map(|line| -> Result<_> { | ||||
|             let task: Task = serde_json::from_str(&line?).unwrap(); | ||||
|             let task: Task = serde_json::from_str(&line?)?; | ||||
|  | ||||
|             let update_file_path = self | ||||
|                 .dump | ||||
| @@ -136,8 +165,7 @@ impl V6Reader { | ||||
|             if update_file_path.exists() { | ||||
|                 Ok(( | ||||
|                     task, | ||||
|                     Some(Box::new(UpdateFile::new(&update_file_path).unwrap()) | ||||
|                         as Box<super::UpdateFile>), | ||||
|                     Some(Box::new(UpdateFile::new(&update_file_path)?) as Box<super::UpdateFile>), | ||||
|                 )) | ||||
|             } else { | ||||
|                 Ok((task, None)) | ||||
| @@ -145,6 +173,16 @@ impl V6Reader { | ||||
|         })) | ||||
|     } | ||||
|  | ||||
|     pub fn batches(&mut self) -> Box<dyn Iterator<Item = Result<Batch>> + '_> { | ||||
|         match self.batches.as_mut() { | ||||
|             Some(batches) => Box::new((batches).lines().map(|line| -> Result<_> { | ||||
|                 let batch = serde_json::from_str(&line?)?; | ||||
|                 Ok(batch) | ||||
|             })), | ||||
|             None => Box::new(std::iter::empty()) as Box<dyn Iterator<Item = Result<Batch>> + '_>, | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     pub fn keys(&mut self) -> Box<dyn Iterator<Item = Result<Key>> + '_> { | ||||
|         Box::new( | ||||
|             (&mut self.keys).lines().map(|line| -> Result<_> { Ok(serde_json::from_str(&line?)?) }), | ||||
| @@ -154,6 +192,10 @@ impl V6Reader { | ||||
|     pub fn features(&self) -> Option<RuntimeTogglableFeatures> { | ||||
|         self.features | ||||
|     } | ||||
|  | ||||
|     pub fn network(&self) -> Option<&Network> { | ||||
|         self.network.as_ref() | ||||
|     } | ||||
| } | ||||
|  | ||||
| pub struct UpdateFile { | ||||
|   | ||||
| @@ -4,7 +4,8 @@ use std::path::PathBuf; | ||||
|  | ||||
| use flate2::write::GzEncoder; | ||||
| use flate2::Compression; | ||||
| use meilisearch_types::features::RuntimeTogglableFeatures; | ||||
| use meilisearch_types::batches::Batch; | ||||
| use meilisearch_types::features::{Network, RuntimeTogglableFeatures}; | ||||
| use meilisearch_types::keys::Key; | ||||
| use meilisearch_types::settings::{Checked, Settings}; | ||||
| use serde_json::{Map, Value}; | ||||
| @@ -54,6 +55,10 @@ impl DumpWriter { | ||||
|         TaskWriter::new(self.dir.path().join("tasks")) | ||||
|     } | ||||
|  | ||||
|     pub fn create_batches_queue(&self) -> Result<BatchWriter> { | ||||
|         BatchWriter::new(self.dir.path().join("batches")) | ||||
|     } | ||||
|  | ||||
|     pub fn create_experimental_features(&self, features: RuntimeTogglableFeatures) -> Result<()> { | ||||
|         Ok(std::fs::write( | ||||
|             self.dir.path().join("experimental-features.json"), | ||||
| @@ -61,6 +66,10 @@ impl DumpWriter { | ||||
|         )?) | ||||
|     } | ||||
|  | ||||
|     pub fn create_network(&self, network: Network) -> Result<()> { | ||||
|         Ok(std::fs::write(self.dir.path().join("network.json"), serde_json::to_string(&network)?)?) | ||||
|     } | ||||
|  | ||||
|     pub fn persist_to(self, mut writer: impl Write) -> Result<()> { | ||||
|         let gz_encoder = GzEncoder::new(&mut writer, Compression::default()); | ||||
|         let mut tar_encoder = tar::Builder::new(gz_encoder); | ||||
| @@ -84,7 +93,7 @@ impl KeyWriter { | ||||
|     } | ||||
|  | ||||
|     pub fn push_key(&mut self, key: &Key) -> Result<()> { | ||||
|         self.keys.write_all(&serde_json::to_vec(key)?)?; | ||||
|         serde_json::to_writer(&mut self.keys, &key)?; | ||||
|         self.keys.write_all(b"\n")?; | ||||
|         Ok(()) | ||||
|     } | ||||
| @@ -114,7 +123,7 @@ impl TaskWriter { | ||||
|     /// Pushes tasks in the dump. | ||||
|     /// If the tasks has an associated `update_file` it'll use the `task_id` as its name. | ||||
|     pub fn push_task(&mut self, task: &TaskDump) -> Result<UpdateFile> { | ||||
|         self.queue.write_all(&serde_json::to_vec(task)?)?; | ||||
|         serde_json::to_writer(&mut self.queue, &task)?; | ||||
|         self.queue.write_all(b"\n")?; | ||||
|  | ||||
|         Ok(UpdateFile::new(self.update_files.join(format!("{}.jsonl", task.uid)))) | ||||
| @@ -126,6 +135,30 @@ impl TaskWriter { | ||||
|     } | ||||
| } | ||||
|  | ||||
| pub struct BatchWriter { | ||||
|     queue: BufWriter<File>, | ||||
| } | ||||
|  | ||||
| impl BatchWriter { | ||||
|     pub(crate) fn new(path: PathBuf) -> Result<Self> { | ||||
|         std::fs::create_dir(&path)?; | ||||
|         let queue = File::create(path.join("queue.jsonl"))?; | ||||
|         Ok(BatchWriter { queue: BufWriter::new(queue) }) | ||||
|     } | ||||
|  | ||||
|     /// Pushes batches in the dump. | ||||
|     pub fn push_batch(&mut self, batch: &Batch) -> Result<()> { | ||||
|         serde_json::to_writer(&mut self.queue, &batch)?; | ||||
|         self.queue.write_all(b"\n")?; | ||||
|         Ok(()) | ||||
|     } | ||||
|  | ||||
|     pub fn flush(mut self) -> Result<()> { | ||||
|         self.queue.flush()?; | ||||
|         Ok(()) | ||||
|     } | ||||
| } | ||||
|  | ||||
| pub struct UpdateFile { | ||||
|     path: PathBuf, | ||||
|     writer: Option<BufWriter<File>>, | ||||
| @@ -137,8 +170,8 @@ impl UpdateFile { | ||||
|     } | ||||
|  | ||||
|     pub fn push_document(&mut self, document: &Document) -> Result<()> { | ||||
|         if let Some(writer) = self.writer.as_mut() { | ||||
|             writer.write_all(&serde_json::to_vec(document)?)?; | ||||
|         if let Some(mut writer) = self.writer.as_mut() { | ||||
|             serde_json::to_writer(&mut writer, &document)?; | ||||
|             writer.write_all(b"\n")?; | ||||
|         } else { | ||||
|             let file = File::create(&self.path).unwrap(); | ||||
| @@ -205,8 +238,8 @@ pub(crate) mod test { | ||||
|     use super::*; | ||||
|     use crate::reader::Document; | ||||
|     use crate::test::{ | ||||
|         create_test_api_keys, create_test_documents, create_test_dump, create_test_instance_uid, | ||||
|         create_test_settings, create_test_tasks, | ||||
|         create_test_api_keys, create_test_batches, create_test_documents, create_test_dump, | ||||
|         create_test_instance_uid, create_test_settings, create_test_tasks, | ||||
|     }; | ||||
|  | ||||
|     fn create_directory_hierarchy(dir: &Path) -> String { | ||||
| @@ -281,8 +314,10 @@ pub(crate) mod test { | ||||
|         let dump_path = dump.path(); | ||||
|  | ||||
|         // ==== checking global file hierarchy (we want to be sure there isn't too many files or too few) | ||||
|         insta::assert_snapshot!(create_directory_hierarchy(dump_path), @r###" | ||||
|         insta::assert_snapshot!(create_directory_hierarchy(dump_path), @r" | ||||
|         . | ||||
|         ├---- batches/ | ||||
|         │    └---- queue.jsonl | ||||
|         ├---- indexes/ | ||||
|         │    └---- doggos/ | ||||
|         │    │    ├---- documents.jsonl | ||||
| @@ -295,8 +330,9 @@ pub(crate) mod test { | ||||
|         ├---- experimental-features.json | ||||
|         ├---- instance_uid.uuid | ||||
|         ├---- keys.jsonl | ||||
|         └---- metadata.json | ||||
|         "###); | ||||
|         ├---- metadata.json | ||||
|         └---- network.json | ||||
|         "); | ||||
|  | ||||
|         // ==== checking the top level infos | ||||
|         let metadata = fs::read_to_string(dump_path.join("metadata.json")).unwrap(); | ||||
| @@ -349,6 +385,16 @@ pub(crate) mod test { | ||||
|             } | ||||
|         } | ||||
|  | ||||
|         // ==== checking the batch queue | ||||
|         let batches_queue = fs::read_to_string(dump_path.join("batches/queue.jsonl")).unwrap(); | ||||
|         for (batch, expected) in batches_queue.lines().zip(create_test_batches()) { | ||||
|             let mut batch = serde_json::from_str::<Batch>(batch).unwrap(); | ||||
|             if batch.details.settings == Some(Box::new(Settings::<Unchecked>::default())) { | ||||
|                 batch.details.settings = None; | ||||
|             } | ||||
|             assert_eq!(batch, expected, "{batch:#?}{expected:#?}"); | ||||
|         } | ||||
|  | ||||
|         // ==== checking the keys | ||||
|         let keys = fs::read_to_string(dump_path.join("keys.jsonl")).unwrap(); | ||||
|         for (key, expected) in keys.lines().zip(create_test_api_keys()) { | ||||
|   | ||||
							
								
								
									
										
											BIN
										
									
								
								crates/dump/tests/assets/v6-with-network.dump
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										
											BIN
										
									
								
								crates/dump/tests/assets/v6-with-network.dump
									
									
									
									
									
										Normal file
									
								
							
										
											Binary file not shown.
										
									
								
							| @@ -2,6 +2,7 @@ use std::collections::HashMap; | ||||
| use std::io; | ||||
|  | ||||
| use dump::{KindDump, TaskDump, UpdateFile}; | ||||
| use meilisearch_types::batches::{Batch, BatchId}; | ||||
| use meilisearch_types::heed::RwTxn; | ||||
| use meilisearch_types::milli; | ||||
| use meilisearch_types::tasks::{Kind, KindWithContent, Status, Task}; | ||||
| @@ -14,9 +15,15 @@ pub struct Dump<'a> { | ||||
|     index_scheduler: &'a IndexScheduler, | ||||
|     wtxn: RwTxn<'a>, | ||||
|  | ||||
|     batch_to_task_mapping: HashMap<BatchId, RoaringBitmap>, | ||||
|  | ||||
|     indexes: HashMap<String, RoaringBitmap>, | ||||
|     statuses: HashMap<Status, RoaringBitmap>, | ||||
|     kinds: HashMap<Kind, RoaringBitmap>, | ||||
|  | ||||
|     batch_indexes: HashMap<String, RoaringBitmap>, | ||||
|     batch_statuses: HashMap<Status, RoaringBitmap>, | ||||
|     batch_kinds: HashMap<Kind, RoaringBitmap>, | ||||
| } | ||||
|  | ||||
| impl<'a> Dump<'a> { | ||||
| @@ -27,12 +34,72 @@ impl<'a> Dump<'a> { | ||||
|         Ok(Dump { | ||||
|             index_scheduler, | ||||
|             wtxn, | ||||
|             batch_to_task_mapping: HashMap::new(), | ||||
|             indexes: HashMap::new(), | ||||
|             statuses: HashMap::new(), | ||||
|             kinds: HashMap::new(), | ||||
|             batch_indexes: HashMap::new(), | ||||
|             batch_statuses: HashMap::new(), | ||||
|             batch_kinds: HashMap::new(), | ||||
|         }) | ||||
|     } | ||||
|  | ||||
|     /// Register a new batch coming from a dump in the scheduler. | ||||
|     /// By taking a mutable ref we're pretty sure no one will ever import a dump while actix is running. | ||||
|     pub fn register_dumped_batch(&mut self, batch: Batch) -> Result<()> { | ||||
|         self.index_scheduler.queue.batches.all_batches.put(&mut self.wtxn, &batch.uid, &batch)?; | ||||
|         if let Some(enqueued_at) = batch.enqueued_at { | ||||
|             utils::insert_task_datetime( | ||||
|                 &mut self.wtxn, | ||||
|                 self.index_scheduler.queue.batches.enqueued_at, | ||||
|                 enqueued_at.earliest, | ||||
|                 batch.uid, | ||||
|             )?; | ||||
|             utils::insert_task_datetime( | ||||
|                 &mut self.wtxn, | ||||
|                 self.index_scheduler.queue.batches.enqueued_at, | ||||
|                 enqueued_at.oldest, | ||||
|                 batch.uid, | ||||
|             )?; | ||||
|         } | ||||
|         utils::insert_task_datetime( | ||||
|             &mut self.wtxn, | ||||
|             self.index_scheduler.queue.batches.started_at, | ||||
|             batch.started_at, | ||||
|             batch.uid, | ||||
|         )?; | ||||
|         if let Some(finished_at) = batch.finished_at { | ||||
|             utils::insert_task_datetime( | ||||
|                 &mut self.wtxn, | ||||
|                 self.index_scheduler.queue.batches.finished_at, | ||||
|                 finished_at, | ||||
|                 batch.uid, | ||||
|             )?; | ||||
|         } | ||||
|  | ||||
|         for index in batch.stats.index_uids.keys() { | ||||
|             match self.batch_indexes.get_mut(index) { | ||||
|                 Some(bitmap) => { | ||||
|                     bitmap.insert(batch.uid); | ||||
|                 } | ||||
|                 None => { | ||||
|                     let mut bitmap = RoaringBitmap::new(); | ||||
|                     bitmap.insert(batch.uid); | ||||
|                     self.batch_indexes.insert(index.to_string(), bitmap); | ||||
|                 } | ||||
|             }; | ||||
|         } | ||||
|  | ||||
|         for status in batch.stats.status.keys() { | ||||
|             self.batch_statuses.entry(*status).or_default().insert(batch.uid); | ||||
|         } | ||||
|         for kind in batch.stats.types.keys() { | ||||
|             self.batch_kinds.entry(*kind).or_default().insert(batch.uid); | ||||
|         } | ||||
|  | ||||
|         Ok(()) | ||||
|     } | ||||
|  | ||||
|     /// Register a new task coming from a dump in the scheduler. | ||||
|     /// By taking a mutable ref we're pretty sure no one will ever import a dump while actix is running. | ||||
|     pub fn register_dumped_task( | ||||
| @@ -149,6 +216,9 @@ impl<'a> Dump<'a> { | ||||
|         }; | ||||
|  | ||||
|         self.index_scheduler.queue.tasks.all_tasks.put(&mut self.wtxn, &task.uid, &task)?; | ||||
|         if let Some(batch_id) = task.batch_uid { | ||||
|             self.batch_to_task_mapping.entry(batch_id).or_default().insert(task.uid); | ||||
|         } | ||||
|  | ||||
|         for index in task.indexes() { | ||||
|             match self.indexes.get_mut(index) { | ||||
| @@ -198,6 +268,14 @@ impl<'a> Dump<'a> { | ||||
|  | ||||
|     /// Commit all the changes and exit the importing dump state | ||||
|     pub fn finish(mut self) -> Result<()> { | ||||
|         for (batch_id, task_ids) in self.batch_to_task_mapping { | ||||
|             self.index_scheduler.queue.batch_to_tasks_mapping.put( | ||||
|                 &mut self.wtxn, | ||||
|                 &batch_id, | ||||
|                 &task_ids, | ||||
|             )?; | ||||
|         } | ||||
|  | ||||
|         for (index, bitmap) in self.indexes { | ||||
|             self.index_scheduler.queue.tasks.index_tasks.put(&mut self.wtxn, &index, &bitmap)?; | ||||
|         } | ||||
| @@ -208,6 +286,16 @@ impl<'a> Dump<'a> { | ||||
|             self.index_scheduler.queue.tasks.put_kind(&mut self.wtxn, kind, &bitmap)?; | ||||
|         } | ||||
|  | ||||
|         for (index, bitmap) in self.batch_indexes { | ||||
|             self.index_scheduler.queue.batches.index_tasks.put(&mut self.wtxn, &index, &bitmap)?; | ||||
|         } | ||||
|         for (status, bitmap) in self.batch_statuses { | ||||
|             self.index_scheduler.queue.batches.put_status(&mut self.wtxn, status, &bitmap)?; | ||||
|         } | ||||
|         for (kind, bitmap) in self.batch_kinds { | ||||
|             self.index_scheduler.queue.batches.put_kind(&mut self.wtxn, kind, &bitmap)?; | ||||
|         } | ||||
|  | ||||
|         self.wtxn.commit()?; | ||||
|         self.index_scheduler.scheduler.wake_up.signal(); | ||||
|  | ||||
|   | ||||
| @@ -109,6 +109,8 @@ pub enum Error { | ||||
|     InvalidIndexUid { index_uid: String }, | ||||
|     #[error("Task `{0}` not found.")] | ||||
|     TaskNotFound(TaskId), | ||||
|     #[error("Task `{0}` does not contain any documents. Only `documentAdditionOrUpdate` tasks with the statuses `enqueued` or `processing` contain documents")] | ||||
|     TaskFileNotFound(TaskId), | ||||
|     #[error("Batch `{0}` not found.")] | ||||
|     BatchNotFound(BatchId), | ||||
|     #[error("Query parameters to filter the tasks to delete are missing. Available query parameters are: `uids`, `indexUids`, `statuses`, `types`, `canceledBy`, `beforeEnqueuedAt`, `afterEnqueuedAt`, `beforeStartedAt`, `afterStartedAt`, `beforeFinishedAt`, `afterFinishedAt`.")] | ||||
| @@ -127,8 +129,8 @@ pub enum Error { | ||||
|         _ => format!("{error}") | ||||
|     })] | ||||
|     Milli { error: milli::Error, index_uid: Option<String> }, | ||||
|     #[error("An unexpected crash occurred when processing the task.")] | ||||
|     ProcessBatchPanicked, | ||||
|     #[error("An unexpected crash occurred when processing the task: {0}")] | ||||
|     ProcessBatchPanicked(String), | ||||
|     #[error(transparent)] | ||||
|     FileStore(#[from] file_store::Error), | ||||
|     #[error(transparent)] | ||||
| @@ -189,6 +191,7 @@ impl Error { | ||||
|             | Error::InvalidTaskCanceledBy { .. } | ||||
|             | Error::InvalidIndexUid { .. } | ||||
|             | Error::TaskNotFound(_) | ||||
|             | Error::TaskFileNotFound(_) | ||||
|             | Error::BatchNotFound(_) | ||||
|             | Error::TaskDeletionWithEmptyQuery | ||||
|             | Error::TaskCancelationWithEmptyQuery | ||||
| @@ -196,7 +199,7 @@ impl Error { | ||||
|             | Error::Dump(_) | ||||
|             | Error::Heed(_) | ||||
|             | Error::Milli { .. } | ||||
|             | Error::ProcessBatchPanicked | ||||
|             | Error::ProcessBatchPanicked(_) | ||||
|             | Error::FileStore(_) | ||||
|             | Error::IoError(_) | ||||
|             | Error::Persist(_) | ||||
| @@ -250,6 +253,7 @@ impl ErrorCode for Error { | ||||
|             Error::InvalidTaskCanceledBy { .. } => Code::InvalidTaskCanceledBy, | ||||
|             Error::InvalidIndexUid { .. } => Code::InvalidIndexUid, | ||||
|             Error::TaskNotFound(_) => Code::TaskNotFound, | ||||
|             Error::TaskFileNotFound(_) => Code::TaskFileNotFound, | ||||
|             Error::BatchNotFound(_) => Code::BatchNotFound, | ||||
|             Error::TaskDeletionWithEmptyQuery => Code::MissingTaskFilters, | ||||
|             Error::TaskCancelationWithEmptyQuery => Code::MissingTaskFilters, | ||||
| @@ -257,7 +261,7 @@ impl ErrorCode for Error { | ||||
|             Error::NoSpaceLeftInTaskQueue => Code::NoSpaceLeftOnDevice, | ||||
|             Error::Dump(e) => e.error_code(), | ||||
|             Error::Milli { error, .. } => error.error_code(), | ||||
|             Error::ProcessBatchPanicked => Code::Internal, | ||||
|             Error::ProcessBatchPanicked(_) => Code::Internal, | ||||
|             Error::Heed(e) => e.error_code(), | ||||
|             Error::HeedTransaction(e) => e.error_code(), | ||||
|             Error::FileStore(e) => e.error_code(), | ||||
|   | ||||
| @@ -1,6 +1,6 @@ | ||||
| use std::sync::{Arc, RwLock}; | ||||
|  | ||||
| use meilisearch_types::features::{InstanceTogglableFeatures, RuntimeTogglableFeatures}; | ||||
| use meilisearch_types::features::{InstanceTogglableFeatures, Network, RuntimeTogglableFeatures}; | ||||
| use meilisearch_types::heed::types::{SerdeJson, Str}; | ||||
| use meilisearch_types::heed::{Database, Env, RwTxn}; | ||||
|  | ||||
| @@ -14,10 +14,16 @@ mod db_name { | ||||
|     pub const EXPERIMENTAL_FEATURES: &str = "experimental-features"; | ||||
| } | ||||
|  | ||||
| mod db_keys { | ||||
|     pub const EXPERIMENTAL_FEATURES: &str = "experimental-features"; | ||||
|     pub const NETWORK: &str = "network"; | ||||
| } | ||||
|  | ||||
| #[derive(Clone)] | ||||
| pub(crate) struct FeatureData { | ||||
|     persisted: Database<Str, SerdeJson<RuntimeTogglableFeatures>>, | ||||
|     runtime: Arc<RwLock<RuntimeTogglableFeatures>>, | ||||
|     network: Arc<RwLock<Network>>, | ||||
| } | ||||
|  | ||||
| #[derive(Debug, Clone, Copy)] | ||||
| @@ -86,6 +92,32 @@ impl RoFeatures { | ||||
|             .into()) | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     pub fn check_network(&self, disabled_action: &'static str) -> Result<()> { | ||||
|         if self.runtime.network { | ||||
|             Ok(()) | ||||
|         } else { | ||||
|             Err(FeatureNotEnabledError { | ||||
|                 disabled_action, | ||||
|                 feature: "network", | ||||
|                 issue_link: "https://github.com/orgs/meilisearch/discussions/805", | ||||
|             } | ||||
|             .into()) | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     pub fn check_get_task_documents_route(&self) -> Result<()> { | ||||
|         if self.runtime.get_task_documents_route { | ||||
|             Ok(()) | ||||
|         } else { | ||||
|             Err(FeatureNotEnabledError { | ||||
|                 disabled_action: "Getting the documents of an enqueued task", | ||||
|                 feature: "get task documents route", | ||||
|                 issue_link: "https://github.com/orgs/meilisearch/discussions/808", | ||||
|             } | ||||
|             .into()) | ||||
|         } | ||||
|     } | ||||
| } | ||||
|  | ||||
| impl FeatureData { | ||||
| @@ -102,7 +134,7 @@ impl FeatureData { | ||||
|             env.create_database(wtxn, Some(db_name::EXPERIMENTAL_FEATURES))?; | ||||
|  | ||||
|         let persisted_features: RuntimeTogglableFeatures = | ||||
|             runtime_features_db.get(wtxn, db_name::EXPERIMENTAL_FEATURES)?.unwrap_or_default(); | ||||
|             runtime_features_db.get(wtxn, db_keys::EXPERIMENTAL_FEATURES)?.unwrap_or_default(); | ||||
|         let InstanceTogglableFeatures { metrics, logs_route, contains_filter } = instance_features; | ||||
|         let runtime = Arc::new(RwLock::new(RuntimeTogglableFeatures { | ||||
|             metrics: metrics || persisted_features.metrics, | ||||
| @@ -111,7 +143,14 @@ impl FeatureData { | ||||
|             ..persisted_features | ||||
|         })); | ||||
|  | ||||
|         Ok(Self { persisted: runtime_features_db, runtime }) | ||||
|         let network_db = runtime_features_db.remap_data_type::<SerdeJson<Network>>(); | ||||
|         let network: Network = network_db.get(wtxn, db_keys::NETWORK)?.unwrap_or_default(); | ||||
|  | ||||
|         Ok(Self { | ||||
|             persisted: runtime_features_db, | ||||
|             runtime, | ||||
|             network: Arc::new(RwLock::new(network)), | ||||
|         }) | ||||
|     } | ||||
|  | ||||
|     pub fn put_runtime_features( | ||||
| @@ -119,7 +158,7 @@ impl FeatureData { | ||||
|         mut wtxn: RwTxn, | ||||
|         features: RuntimeTogglableFeatures, | ||||
|     ) -> Result<()> { | ||||
|         self.persisted.put(&mut wtxn, db_name::EXPERIMENTAL_FEATURES, &features)?; | ||||
|         self.persisted.put(&mut wtxn, db_keys::EXPERIMENTAL_FEATURES, &features)?; | ||||
|         wtxn.commit()?; | ||||
|  | ||||
|         // safe to unwrap, the lock will only fail if: | ||||
| @@ -140,4 +179,21 @@ impl FeatureData { | ||||
|     pub fn features(&self) -> RoFeatures { | ||||
|         RoFeatures::new(self) | ||||
|     } | ||||
|  | ||||
|     pub fn put_network(&self, mut wtxn: RwTxn, new_network: Network) -> Result<()> { | ||||
|         self.persisted.remap_data_type::<SerdeJson<Network>>().put( | ||||
|             &mut wtxn, | ||||
|             db_keys::NETWORK, | ||||
|             &new_network, | ||||
|         )?; | ||||
|         wtxn.commit()?; | ||||
|  | ||||
|         let mut network = self.network.write().unwrap(); | ||||
|         *network = new_network; | ||||
|         Ok(()) | ||||
|     } | ||||
|  | ||||
|     pub fn network(&self) -> Network { | ||||
|         Network::clone(&*self.network.read().unwrap()) | ||||
|     } | ||||
| } | ||||
|   | ||||
| @@ -1,5 +1,7 @@ | ||||
| use std::collections::BTreeMap; | ||||
| use std::env::VarError; | ||||
| use std::path::Path; | ||||
| use std::str::FromStr; | ||||
| use std::time::Duration; | ||||
|  | ||||
| use meilisearch_types::heed::{EnvClosingEvent, EnvFlags, EnvOpenOptions}; | ||||
| @@ -304,7 +306,18 @@ fn create_or_open_index( | ||||
| ) -> Result<Index> { | ||||
|     let mut options = EnvOpenOptions::new(); | ||||
|     options.map_size(clamp_to_page_size(map_size)); | ||||
|     options.max_readers(1024); | ||||
|  | ||||
|     // You can find more details about this experimental | ||||
|     // environment variable on the following GitHub discussion: | ||||
|     // <https://github.com/orgs/meilisearch/discussions/806> | ||||
|     let max_readers = match std::env::var("MEILI_EXPERIMENTAL_INDEX_MAX_READERS") { | ||||
|         Ok(value) => u32::from_str(&value).unwrap(), | ||||
|         Err(VarError::NotPresent) => 1024, | ||||
|         Err(VarError::NotUnicode(value)) => panic!( | ||||
|             "Invalid unicode for the `MEILI_EXPERIMENTAL_INDEX_MAX_READERS` env var: {value:?}" | ||||
|         ), | ||||
|     }; | ||||
|     options.max_readers(max_readers); | ||||
|     if enable_mdb_writemap { | ||||
|         unsafe { options.flags(EnvFlags::WRITE_MAP) }; | ||||
|     } | ||||
|   | ||||
| @@ -106,6 +106,12 @@ pub struct IndexStats { | ||||
|     /// are not returned to the disk after a deletion, this number is typically larger than | ||||
|     /// `used_database_size` that only includes the size of the used pages. | ||||
|     pub database_size: u64, | ||||
|     /// Number of embeddings in the index. | ||||
|     /// Option: retrocompatible with the stats of the pre-v1.13.0 versions of meilisearch | ||||
|     pub number_of_embeddings: Option<u64>, | ||||
|     /// Number of embedded documents in the index. | ||||
|     /// Option: retrocompatible with the stats of the pre-v1.13.0 versions of meilisearch | ||||
|     pub number_of_embedded_documents: Option<u64>, | ||||
|     /// Size taken by the used pages of the index' DB, in bytes. | ||||
|     /// | ||||
|     /// As the DB backend does not return to the disk the pages that are not currently used by the DB, | ||||
| @@ -130,8 +136,11 @@ impl IndexStats { | ||||
|     /// | ||||
|     /// - rtxn: a RO transaction for the index, obtained from `Index::read_txn()`. | ||||
|     pub fn new(index: &Index, rtxn: &RoTxn) -> milli::Result<Self> { | ||||
|         let arroy_stats = index.arroy_stats(rtxn)?; | ||||
|         Ok(IndexStats { | ||||
|             number_of_documents: index.number_of_documents(rtxn)?, | ||||
|             number_of_embeddings: Some(arroy_stats.number_of_embeddings), | ||||
|             number_of_embedded_documents: Some(arroy_stats.documents.len()), | ||||
|             database_size: index.on_disk_size()?, | ||||
|             used_database_size: index.used_size()?, | ||||
|             primary_key: index.primary_key(rtxn)?.map(|s| s.to_string()), | ||||
|   | ||||
| @@ -1,7 +1,7 @@ | ||||
| use std::collections::BTreeSet; | ||||
| use std::fmt::Write; | ||||
|  | ||||
| use meilisearch_types::batches::Batch; | ||||
| use meilisearch_types::batches::{Batch, BatchEnqueuedAt}; | ||||
| use meilisearch_types::heed::types::{SerdeBincode, SerdeJson, Str}; | ||||
| use meilisearch_types::heed::{Database, RoTxn}; | ||||
| use meilisearch_types::milli::{CboRoaringBitmapCodec, RoaringBitmapCodec, BEU32}; | ||||
| @@ -341,10 +341,14 @@ pub fn snapshot_canceled_by(rtxn: &RoTxn, db: Database<BEU32, RoaringBitmapCodec | ||||
|  | ||||
| pub fn snapshot_batch(batch: &Batch) -> String { | ||||
|     let mut snap = String::new(); | ||||
|     let Batch { uid, details, stats, started_at, finished_at, progress: _ } = batch; | ||||
|     let Batch { uid, details, stats, started_at, finished_at, progress: _, enqueued_at } = batch; | ||||
|     if let Some(finished_at) = finished_at { | ||||
|         assert!(finished_at > started_at); | ||||
|     } | ||||
|     let BatchEnqueuedAt { earliest, oldest } = enqueued_at.unwrap(); | ||||
|     assert!(*started_at > earliest); | ||||
|     assert!(earliest >= oldest); | ||||
|  | ||||
|     snap.push('{'); | ||||
|     snap.push_str(&format!("uid: {uid}, ")); | ||||
|     snap.push_str(&format!("details: {}, ", serde_json::to_string(details).unwrap())); | ||||
|   | ||||
| @@ -33,7 +33,7 @@ mod test_utils; | ||||
| pub mod upgrade; | ||||
| mod utils; | ||||
| pub mod uuid_codec; | ||||
| mod versioning; | ||||
| pub mod versioning; | ||||
|  | ||||
| pub type Result<T, E = Error> = std::result::Result<T, E>; | ||||
| pub type TaskId = u32; | ||||
| @@ -51,7 +51,7 @@ pub use features::RoFeatures; | ||||
| use flate2::bufread::GzEncoder; | ||||
| use flate2::Compression; | ||||
| use meilisearch_types::batches::Batch; | ||||
| use meilisearch_types::features::{InstanceTogglableFeatures, RuntimeTogglableFeatures}; | ||||
| use meilisearch_types::features::{InstanceTogglableFeatures, Network, RuntimeTogglableFeatures}; | ||||
| use meilisearch_types::heed::byteorder::BE; | ||||
| use meilisearch_types::heed::types::I128; | ||||
| use meilisearch_types::heed::{self, Env, RoTxn}; | ||||
| @@ -770,7 +770,16 @@ impl IndexScheduler { | ||||
|         Ok(()) | ||||
|     } | ||||
|  | ||||
|     // TODO: consider using a type alias or a struct embedder/template | ||||
|     pub fn put_network(&self, network: Network) -> Result<()> { | ||||
|         let wtxn = self.env.write_txn().map_err(Error::HeedTransaction)?; | ||||
|         self.features.put_network(wtxn, network)?; | ||||
|         Ok(()) | ||||
|     } | ||||
|  | ||||
|     pub fn network(&self) -> Network { | ||||
|         self.features.network() | ||||
|     } | ||||
|  | ||||
|     pub fn embedders( | ||||
|         &self, | ||||
|         index_uid: String, | ||||
|   | ||||
| @@ -96,6 +96,7 @@ make_enum_progress! { | ||||
|         StartTheDumpCreation, | ||||
|         DumpTheApiKeys, | ||||
|         DumpTheTasks, | ||||
|         DumpTheBatches, | ||||
|         DumpTheIndexes, | ||||
|         DumpTheExperimentalFeatures, | ||||
|         CompressTheDump, | ||||
|   | ||||
| @@ -12,8 +12,8 @@ use time::OffsetDateTime; | ||||
| use super::{Query, Queue}; | ||||
| use crate::processing::ProcessingTasks; | ||||
| use crate::utils::{ | ||||
|     insert_task_datetime, keep_ids_within_datetimes, map_bound, remove_task_datetime, | ||||
|     ProcessingBatch, | ||||
|     insert_task_datetime, keep_ids_within_datetimes, map_bound, | ||||
|     remove_n_tasks_datetime_earlier_than, remove_task_datetime, ProcessingBatch, | ||||
| }; | ||||
| use crate::{Error, Result, BEI128}; | ||||
|  | ||||
| @@ -181,6 +181,7 @@ impl BatchQueue { | ||||
|                 stats: batch.stats, | ||||
|                 started_at: batch.started_at, | ||||
|                 finished_at: batch.finished_at, | ||||
|                 enqueued_at: batch.enqueued_at, | ||||
|             }, | ||||
|         )?; | ||||
|  | ||||
| @@ -234,34 +235,25 @@ impl BatchQueue { | ||||
|         // What we know, though, is that the task date is from before the enqueued_at, and max two timestamps have been written | ||||
|         // to the DB per batches. | ||||
|         if let Some(ref old_batch) = old_batch { | ||||
|             let started_at = old_batch.started_at.unix_timestamp_nanos(); | ||||
|  | ||||
|             // We have either one or two enqueued at to remove | ||||
|             let mut exit = old_batch.stats.total_nb_tasks.clamp(0, 2); | ||||
|             let mut iterator = self.enqueued_at.rev_iter_mut(wtxn)?; | ||||
|             while let Some(entry) = iterator.next() { | ||||
|                 let (key, mut value) = entry?; | ||||
|                 if key > started_at { | ||||
|                     continue; | ||||
|                 } | ||||
|                 if value.remove(old_batch.uid) { | ||||
|                     exit = exit.saturating_sub(1); | ||||
|                     // Safe because the key and value are owned | ||||
|                     unsafe { | ||||
|                         iterator.put_current(&key, &value)?; | ||||
|                     } | ||||
|                     if exit == 0 { | ||||
|                         break; | ||||
|                     } | ||||
|                 } | ||||
|             if let Some(enqueued_at) = old_batch.enqueued_at { | ||||
|                 remove_task_datetime(wtxn, self.enqueued_at, enqueued_at.earliest, old_batch.uid)?; | ||||
|                 remove_task_datetime(wtxn, self.enqueued_at, enqueued_at.oldest, old_batch.uid)?; | ||||
|             } else { | ||||
|                 // If we don't have the enqueued at in the batch it means the database comes from the v1.12 | ||||
|                 // and we still need to find the date by scrolling the database | ||||
|                 remove_n_tasks_datetime_earlier_than( | ||||
|                     wtxn, | ||||
|                     self.enqueued_at, | ||||
|                     old_batch.started_at, | ||||
|                     old_batch.stats.total_nb_tasks.clamp(1, 2) as usize, | ||||
|                     old_batch.uid, | ||||
|                 )?; | ||||
|             } | ||||
|         } | ||||
|         if let Some(enqueued_at) = batch.oldest_enqueued_at { | ||||
|             insert_task_datetime(wtxn, self.enqueued_at, enqueued_at, batch.uid)?; | ||||
|         } | ||||
|         if let Some(enqueued_at) = batch.earliest_enqueued_at { | ||||
|             insert_task_datetime(wtxn, self.enqueued_at, enqueued_at, batch.uid)?; | ||||
|         } | ||||
|         // A finished batch MUST contains at least one task and have an enqueued_at | ||||
|         let enqueued_at = batch.enqueued_at.as_ref().unwrap(); | ||||
|         insert_task_datetime(wtxn, self.enqueued_at, enqueued_at.earliest, batch.uid)?; | ||||
|         insert_task_datetime(wtxn, self.enqueued_at, enqueued_at.oldest, batch.uid)?; | ||||
|  | ||||
|         // Update the started at and finished at | ||||
|         if let Some(ref old_batch) = old_batch { | ||||
|   | ||||
| @@ -102,30 +102,33 @@ fn query_batches_simple() { | ||||
|         .unwrap(); | ||||
|     assert_eq!(batches.len(), 1); | ||||
|     batches[0].started_at = OffsetDateTime::UNIX_EPOCH; | ||||
|     assert!(batches[0].enqueued_at.is_some()); | ||||
|     batches[0].enqueued_at = None; | ||||
|     // Insta cannot snapshot our batches because the batch stats contains an enum as key: https://github.com/mitsuhiko/insta/issues/689 | ||||
|     let batch = serde_json::to_string_pretty(&batches[0]).unwrap(); | ||||
|     snapshot!(batch, @r#" | ||||
|         { | ||||
|           "uid": 0, | ||||
|           "details": { | ||||
|             "primaryKey": "mouse" | ||||
|           }, | ||||
|           "stats": { | ||||
|             "totalNbTasks": 1, | ||||
|             "status": { | ||||
|               "processing": 1 | ||||
|             }, | ||||
|             "types": { | ||||
|               "indexCreation": 1 | ||||
|             }, | ||||
|             "indexUids": { | ||||
|               "catto": 1 | ||||
|             } | ||||
|           }, | ||||
|           "startedAt": "1970-01-01T00:00:00Z", | ||||
|           "finishedAt": null | ||||
|     { | ||||
|       "uid": 0, | ||||
|       "details": { | ||||
|         "primaryKey": "mouse" | ||||
|       }, | ||||
|       "stats": { | ||||
|         "totalNbTasks": 1, | ||||
|         "status": { | ||||
|           "processing": 1 | ||||
|         }, | ||||
|         "types": { | ||||
|           "indexCreation": 1 | ||||
|         }, | ||||
|         "indexUids": { | ||||
|           "catto": 1 | ||||
|         } | ||||
|         "#); | ||||
|       }, | ||||
|       "startedAt": "1970-01-01T00:00:00Z", | ||||
|       "finishedAt": null, | ||||
|       "enqueuedAt": null | ||||
|     } | ||||
|     "#); | ||||
|  | ||||
|     let query = Query { statuses: Some(vec![Status::Enqueued]), ..Default::default() }; | ||||
|     let (batches, _) = index_scheduler | ||||
|   | ||||
| @@ -8,6 +8,7 @@ mod tasks_test; | ||||
| mod test; | ||||
|  | ||||
| use std::collections::BTreeMap; | ||||
| use std::fs::File as StdFile; | ||||
| use std::time::Duration; | ||||
|  | ||||
| use file_store::FileStore; | ||||
| @@ -216,6 +217,11 @@ impl Queue { | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     /// Open and returns the task's content File. | ||||
|     pub fn update_file(&self, uuid: Uuid) -> file_store::Result<StdFile> { | ||||
|         self.file_store.get_update(uuid) | ||||
|     } | ||||
|  | ||||
|     /// Delete a file from the index scheduler. | ||||
|     /// | ||||
|     /// Counterpart to the [`create_update_file`](IndexScheduler::create_update_file) method. | ||||
|   | ||||
| @@ -326,7 +326,7 @@ fn test_auto_deletion_of_tasks() { | ||||
| fn test_task_queue_is_full() { | ||||
|     let (index_scheduler, mut handle) = IndexScheduler::test_with_custom_config(vec![], |config| { | ||||
|         // that's the minimum map size possible | ||||
|         config.task_db_size = 1048576; | ||||
|         config.task_db_size = 1048576 * 3; | ||||
|         None | ||||
|     }); | ||||
|  | ||||
|   | ||||
| @@ -166,13 +166,41 @@ impl IndexScheduler { | ||||
|             let processing_batch = &mut processing_batch; | ||||
|             let progress = progress.clone(); | ||||
|             std::thread::scope(|s| { | ||||
|                 let p = progress.clone(); | ||||
|                 let handle = std::thread::Builder::new() | ||||
|                     .name(String::from("batch-operation")) | ||||
|                     .spawn_scoped(s, move || { | ||||
|                         cloned_index_scheduler.process_batch(batch, processing_batch, progress) | ||||
|                         cloned_index_scheduler.process_batch(batch, processing_batch, p) | ||||
|                     }) | ||||
|                     .unwrap(); | ||||
|                 handle.join().unwrap_or(Err(Error::ProcessBatchPanicked)) | ||||
|  | ||||
|                 match handle.join() { | ||||
|                     Ok(ret) => { | ||||
|                         if ret.is_err() { | ||||
|                             if let Ok(progress_view) = | ||||
|                                 serde_json::to_string(&progress.as_progress_view()) | ||||
|                             { | ||||
|                                 tracing::warn!("Batch failed while doing: {progress_view}") | ||||
|                             } | ||||
|                         } | ||||
|                         ret | ||||
|                     } | ||||
|                     Err(panic) => { | ||||
|                         if let Ok(progress_view) = | ||||
|                             serde_json::to_string(&progress.as_progress_view()) | ||||
|                         { | ||||
|                             tracing::warn!("Batch failed while doing: {progress_view}") | ||||
|                         } | ||||
|                         let msg = match panic.downcast_ref::<&'static str>() { | ||||
|                             Some(s) => *s, | ||||
|                             None => match panic.downcast_ref::<String>() { | ||||
|                                 Some(s) => &s[..], | ||||
|                                 None => "Box<dyn Any>", | ||||
|                             }, | ||||
|                         }; | ||||
|                         Err(Error::ProcessBatchPanicked(msg.to_string())) | ||||
|                     } | ||||
|                 } | ||||
|             }) | ||||
|         }; | ||||
|  | ||||
|   | ||||
| @@ -2,7 +2,7 @@ use std::collections::{BTreeSet, HashMap, HashSet}; | ||||
| use std::panic::{catch_unwind, AssertUnwindSafe}; | ||||
| use std::sync::atomic::Ordering; | ||||
|  | ||||
| use meilisearch_types::batches::BatchId; | ||||
| use meilisearch_types::batches::{BatchEnqueuedAt, BatchId}; | ||||
| use meilisearch_types::heed::{RoTxn, RwTxn}; | ||||
| use meilisearch_types::milli::progress::{Progress, VariableNameStep}; | ||||
| use meilisearch_types::milli::{self}; | ||||
| @@ -16,7 +16,10 @@ use crate::processing::{ | ||||
|     InnerSwappingTwoIndexes, SwappingTheIndexes, TaskCancelationProgress, TaskDeletionProgress, | ||||
|     UpdateIndexProgress, | ||||
| }; | ||||
| use crate::utils::{self, swap_index_uid_in_task, ProcessingBatch}; | ||||
| use crate::utils::{ | ||||
|     self, remove_n_tasks_datetime_earlier_than, remove_task_datetime, swap_index_uid_in_task, | ||||
|     ProcessingBatch, | ||||
| }; | ||||
| use crate::{Error, IndexScheduler, Result, TaskId}; | ||||
|  | ||||
| impl IndexScheduler { | ||||
| @@ -323,8 +326,17 @@ impl IndexScheduler { | ||||
|                 match ret { | ||||
|                     Ok(Ok(())) => (), | ||||
|                     Ok(Err(e)) => return Err(Error::DatabaseUpgrade(Box::new(e))), | ||||
|                     Err(_e) => { | ||||
|                         return Err(Error::DatabaseUpgrade(Box::new(Error::ProcessBatchPanicked))); | ||||
|                     Err(e) => { | ||||
|                         let msg = match e.downcast_ref::<&'static str>() { | ||||
|                             Some(s) => *s, | ||||
|                             None => match e.downcast_ref::<String>() { | ||||
|                                 Some(s) => &s[..], | ||||
|                                 None => "Box<dyn Any>", | ||||
|                             }, | ||||
|                         }; | ||||
|                         return Err(Error::DatabaseUpgrade(Box::new(Error::ProcessBatchPanicked( | ||||
|                             msg.to_string(), | ||||
|                         )))); | ||||
|                     } | ||||
|                 } | ||||
|  | ||||
| @@ -418,7 +430,6 @@ impl IndexScheduler { | ||||
|         to_delete_tasks -= &enqueued_tasks; | ||||
|  | ||||
|         // 2. We now have a list of tasks to delete, delete them | ||||
|  | ||||
|         let mut affected_indexes = HashSet::new(); | ||||
|         let mut affected_statuses = HashSet::new(); | ||||
|         let mut affected_kinds = HashSet::new(); | ||||
| @@ -515,9 +526,51 @@ impl IndexScheduler { | ||||
|                 tasks -= &to_delete_tasks; | ||||
|                 // We must remove the batch entirely | ||||
|                 if tasks.is_empty() { | ||||
|                     self.queue.batches.all_batches.delete(wtxn, &batch_id)?; | ||||
|                     self.queue.batch_to_tasks_mapping.delete(wtxn, &batch_id)?; | ||||
|                     if let Some(batch) = self.queue.batches.get_batch(wtxn, batch_id)? { | ||||
|                         if let Some(BatchEnqueuedAt { earliest, oldest }) = batch.enqueued_at { | ||||
|                             remove_task_datetime( | ||||
|                                 wtxn, | ||||
|                                 self.queue.batches.enqueued_at, | ||||
|                                 earliest, | ||||
|                                 batch_id, | ||||
|                             )?; | ||||
|                             remove_task_datetime( | ||||
|                                 wtxn, | ||||
|                                 self.queue.batches.enqueued_at, | ||||
|                                 oldest, | ||||
|                                 batch_id, | ||||
|                             )?; | ||||
|                         } else { | ||||
|                             // If we don't have the enqueued at in the batch it means the database comes from the v1.12 | ||||
|                             // and we still need to find the date by scrolling the database | ||||
|                             remove_n_tasks_datetime_earlier_than( | ||||
|                                 wtxn, | ||||
|                                 self.queue.batches.enqueued_at, | ||||
|                                 batch.started_at, | ||||
|                                 batch.stats.total_nb_tasks.clamp(1, 2) as usize, | ||||
|                                 batch_id, | ||||
|                             )?; | ||||
|                         } | ||||
|                         remove_task_datetime( | ||||
|                             wtxn, | ||||
|                             self.queue.batches.started_at, | ||||
|                             batch.started_at, | ||||
|                             batch_id, | ||||
|                         )?; | ||||
|                         if let Some(finished_at) = batch.finished_at { | ||||
|                             remove_task_datetime( | ||||
|                                 wtxn, | ||||
|                                 self.queue.batches.finished_at, | ||||
|                                 finished_at, | ||||
|                                 batch_id, | ||||
|                             )?; | ||||
|                         } | ||||
|  | ||||
|                         self.queue.batches.all_batches.delete(wtxn, &batch_id)?; | ||||
|                         self.queue.batch_to_tasks_mapping.delete(wtxn, &batch_id)?; | ||||
|                     } | ||||
|                 } | ||||
|  | ||||
|                 // Anyway, we must remove the batch from all its reverse indexes. | ||||
|                 // The only way to do that is to check | ||||
|  | ||||
|   | ||||
| @@ -1,3 +1,4 @@ | ||||
| use std::collections::BTreeMap; | ||||
| use std::fs::File; | ||||
| use std::io::BufWriter; | ||||
| use std::sync::atomic::Ordering; | ||||
| @@ -11,7 +12,9 @@ use meilisearch_types::tasks::{Details, KindWithContent, Status, Task}; | ||||
| use time::macros::format_description; | ||||
| use time::OffsetDateTime; | ||||
|  | ||||
| use crate::processing::{AtomicDocumentStep, AtomicTaskStep, DumpCreationProgress}; | ||||
| use crate::processing::{ | ||||
|     AtomicBatchStep, AtomicDocumentStep, AtomicTaskStep, DumpCreationProgress, | ||||
| }; | ||||
| use crate::{Error, IndexScheduler, Result}; | ||||
|  | ||||
| impl IndexScheduler { | ||||
| @@ -102,7 +105,40 @@ impl IndexScheduler { | ||||
|         } | ||||
|         dump_tasks.flush()?; | ||||
|  | ||||
|         // 3. Dump the indexes | ||||
|         // 3. dump the batches | ||||
|         progress.update_progress(DumpCreationProgress::DumpTheBatches); | ||||
|         let mut dump_batches = dump.create_batches_queue()?; | ||||
|  | ||||
|         let (atomic_batch_progress, update_batch_progress) = | ||||
|             AtomicBatchStep::new(self.queue.batches.all_batches.len(&rtxn)? as u32); | ||||
|         progress.update_progress(update_batch_progress); | ||||
|  | ||||
|         for ret in self.queue.batches.all_batches.iter(&rtxn)? { | ||||
|             if self.scheduler.must_stop_processing.get() { | ||||
|                 return Err(Error::AbortedTask); | ||||
|             } | ||||
|  | ||||
|             let (_, mut b) = ret?; | ||||
|             // In the case we're dumping ourselves we want to be marked as finished | ||||
|             // to not loop over ourselves indefinitely. | ||||
|             if b.uid == task.uid { | ||||
|                 let finished_at = OffsetDateTime::now_utc(); | ||||
|  | ||||
|                 // We're going to fake the date because we don't know if everything is going to go well. | ||||
|                 // But we need to dump the task as finished and successful. | ||||
|                 // If something fail everything will be set appropriately in the end. | ||||
|                 let mut statuses = BTreeMap::new(); | ||||
|                 statuses.insert(Status::Succeeded, b.stats.total_nb_tasks); | ||||
|                 b.stats.status = statuses; | ||||
|                 b.finished_at = Some(finished_at); | ||||
|             } | ||||
|  | ||||
|             dump_batches.push_batch(&b)?; | ||||
|             atomic_batch_progress.fetch_add(1, Ordering::Relaxed); | ||||
|         } | ||||
|         dump_batches.flush()?; | ||||
|  | ||||
|         // 4. Dump the indexes | ||||
|         progress.update_progress(DumpCreationProgress::DumpTheIndexes); | ||||
|         let nb_indexes = self.index_mapper.index_mapping.len(&rtxn)? as u32; | ||||
|         let mut count = 0; | ||||
| @@ -142,7 +178,7 @@ impl IndexScheduler { | ||||
|             let documents = index | ||||
|                 .all_documents(&rtxn) | ||||
|                 .map_err(|e| Error::from_milli(e, Some(uid.to_string())))?; | ||||
|             // 3.1. Dump the documents | ||||
|             // 4.1. Dump the documents | ||||
|             for ret in documents { | ||||
|                 if self.scheduler.must_stop_processing.get() { | ||||
|                     return Err(Error::AbortedTask); | ||||
| @@ -204,7 +240,7 @@ impl IndexScheduler { | ||||
|                 atomic.fetch_add(1, Ordering::Relaxed); | ||||
|             } | ||||
|  | ||||
|             // 3.2. Dump the settings | ||||
|             // 4.2. Dump the settings | ||||
|             let settings = meilisearch_types::settings::settings( | ||||
|                 index, | ||||
|                 &rtxn, | ||||
| @@ -215,10 +251,12 @@ impl IndexScheduler { | ||||
|             Ok(()) | ||||
|         })?; | ||||
|  | ||||
|         // 4. Dump experimental feature settings | ||||
|         // 5. Dump experimental feature settings | ||||
|         progress.update_progress(DumpCreationProgress::DumpTheExperimentalFeatures); | ||||
|         let features = self.features().runtime_features(); | ||||
|         dump.create_experimental_features(features)?; | ||||
|         let network = self.network(); | ||||
|         dump.create_network(network)?; | ||||
|  | ||||
|         let dump_uid = started_at.format(format_description!( | ||||
|                     "[year repr:full][month repr:numerical][day padding:zero]-[hour padding:zero][minute padding:zero][second padding:zero][subsecond digits:3]" | ||||
|   | ||||
| @@ -56,16 +56,13 @@ succeeded [1,] | ||||
| ### Batches Index Tasks: | ||||
| ---------------------------------------------------------------------- | ||||
| ### Batches Enqueued At: | ||||
| [timestamp] [0,] | ||||
| [timestamp] [1,] | ||||
| [timestamp] [1,] | ||||
| ---------------------------------------------------------------------- | ||||
| ### Batches Started At: | ||||
| [timestamp] [0,] | ||||
| [timestamp] [1,] | ||||
| ---------------------------------------------------------------------- | ||||
| ### Batches Finished At: | ||||
| [timestamp] [0,] | ||||
| [timestamp] [1,] | ||||
| ---------------------------------------------------------------------- | ||||
| ### File Store: | ||||
|   | ||||
| @@ -54,15 +54,12 @@ succeeded [1,] | ||||
| ### Batches Index Tasks: | ||||
| ---------------------------------------------------------------------- | ||||
| ### Batches Enqueued At: | ||||
| [timestamp] [0,] | ||||
| [timestamp] [1,] | ||||
| ---------------------------------------------------------------------- | ||||
| ### Batches Started At: | ||||
| [timestamp] [0,] | ||||
| [timestamp] [1,] | ||||
| ---------------------------------------------------------------------- | ||||
| ### Batches Finished At: | ||||
| [timestamp] [0,] | ||||
| [timestamp] [1,] | ||||
| ---------------------------------------------------------------------- | ||||
| ### File Store: | ||||
|   | ||||
| @@ -7,7 +7,7 @@ snapshot_kind: text | ||||
| [] | ||||
| ---------------------------------------------------------------------- | ||||
| ### All Tasks: | ||||
| 0 {uid: 0, batch_uid: 0, status: failed, error: ResponseError { code: 200, message: "An unexpected crash occurred when processing the task.", error_code: "internal", error_type: "internal", error_link: "https://docs.meilisearch.com/errors#internal" }, details: { primary_key: Some("mouse") }, kind: IndexCreation { index_uid: "catto", primary_key: Some("mouse") }} | ||||
| 0 {uid: 0, batch_uid: 0, status: failed, error: ResponseError { code: 200, message: "An unexpected crash occurred when processing the task: simulated panic", error_code: "internal", error_type: "internal", error_link: "https://docs.meilisearch.com/errors#internal" }, details: { primary_key: Some("mouse") }, kind: IndexCreation { index_uid: "catto", primary_key: Some("mouse") }} | ||||
| ---------------------------------------------------------------------- | ||||
| ### Status: | ||||
| enqueued [] | ||||
|   | ||||
| @@ -87,7 +87,6 @@ doggo [2,3,] | ||||
| girafo [4,] | ||||
| ---------------------------------------------------------------------- | ||||
| ### Batches Enqueued At: | ||||
| [timestamp] [0,] | ||||
| [timestamp] [1,] | ||||
| [timestamp] [2,] | ||||
| [timestamp] [3,] | ||||
| @@ -95,7 +94,6 @@ girafo [4,] | ||||
| [timestamp] [5,] | ||||
| ---------------------------------------------------------------------- | ||||
| ### Batches Started At: | ||||
| [timestamp] [0,] | ||||
| [timestamp] [1,] | ||||
| [timestamp] [2,] | ||||
| [timestamp] [3,] | ||||
| @@ -103,7 +101,6 @@ girafo [4,] | ||||
| [timestamp] [5,] | ||||
| ---------------------------------------------------------------------- | ||||
| ### Batches Finished At: | ||||
| [timestamp] [0,] | ||||
| [timestamp] [1,] | ||||
| [timestamp] [2,] | ||||
| [timestamp] [3,] | ||||
|   | ||||
| @@ -903,7 +903,7 @@ fn create_and_list_index() { | ||||
|  | ||||
|     index_scheduler.index("kefir").unwrap(); | ||||
|     let list = index_scheduler.get_paginated_indexes_stats(&AuthFilter::default(), 0, 20).unwrap(); | ||||
|     snapshot!(json_string!(list, { "[1][0][1].created_at" => "[date]", "[1][0][1].updated_at" => "[date]", "[1][0][1].used_database_size" => "[bytes]", "[1][0][1].database_size" => "[bytes]" }), @r#" | ||||
|     snapshot!(json_string!(list, { "[1][0][1].created_at" => "[date]", "[1][0][1].updated_at" => "[date]", "[1][0][1].used_database_size" => "[bytes]", "[1][0][1].database_size" => "[bytes]" }), @r###" | ||||
|     [ | ||||
|       1, | ||||
|       [ | ||||
| @@ -912,6 +912,8 @@ fn create_and_list_index() { | ||||
|           { | ||||
|             "number_of_documents": 0, | ||||
|             "database_size": "[bytes]", | ||||
|             "number_of_embeddings": 0, | ||||
|             "number_of_embedded_documents": 0, | ||||
|             "used_database_size": "[bytes]", | ||||
|             "primary_key": null, | ||||
|             "field_distribution": {}, | ||||
| @@ -921,5 +923,5 @@ fn create_and_list_index() { | ||||
|         ] | ||||
|       ] | ||||
|     ] | ||||
|     "#); | ||||
|     "###); | ||||
| } | ||||
|   | ||||
| @@ -6,8 +6,7 @@ use meili_snap::snapshot; | ||||
| use meilisearch_types::milli::obkv_to_json; | ||||
| use meilisearch_types::milli::update::IndexDocumentsMethod::*; | ||||
| use meilisearch_types::milli::update::Setting; | ||||
| use meilisearch_types::tasks::Kind; | ||||
| use meilisearch_types::tasks::KindWithContent; | ||||
| use meilisearch_types::tasks::{Kind, KindWithContent}; | ||||
|  | ||||
| use crate::insta_snapshot::snapshot_index_scheduler; | ||||
| use crate::test_utils::Breakpoint::*; | ||||
|   | ||||
| @@ -3,7 +3,7 @@ | ||||
| use std::collections::{BTreeSet, HashSet}; | ||||
| use std::ops::Bound; | ||||
|  | ||||
| use meilisearch_types::batches::{Batch, BatchId, BatchStats}; | ||||
| use meilisearch_types::batches::{Batch, BatchEnqueuedAt, BatchId, BatchStats}; | ||||
| use meilisearch_types::heed::{Database, RoTxn, RwTxn}; | ||||
| use meilisearch_types::milli::CboRoaringBitmapCodec; | ||||
| use meilisearch_types::task_view::DetailsView; | ||||
| @@ -30,8 +30,7 @@ pub struct ProcessingBatch { | ||||
|     pub kinds: HashSet<Kind>, | ||||
|     pub indexes: HashSet<String>, | ||||
|     pub canceled_by: HashSet<TaskId>, | ||||
|     pub oldest_enqueued_at: Option<OffsetDateTime>, | ||||
|     pub earliest_enqueued_at: Option<OffsetDateTime>, | ||||
|     pub enqueued_at: Option<BatchEnqueuedAt>, | ||||
|     pub started_at: OffsetDateTime, | ||||
|     pub finished_at: Option<OffsetDateTime>, | ||||
| } | ||||
| @@ -51,8 +50,7 @@ impl ProcessingBatch { | ||||
|             kinds: HashSet::default(), | ||||
|             indexes: HashSet::default(), | ||||
|             canceled_by: HashSet::default(), | ||||
|             oldest_enqueued_at: None, | ||||
|             earliest_enqueued_at: None, | ||||
|             enqueued_at: None, | ||||
|             started_at: OffsetDateTime::now_utc(), | ||||
|             finished_at: None, | ||||
|         } | ||||
| @@ -80,14 +78,18 @@ impl ProcessingBatch { | ||||
|             if let Some(canceled_by) = task.canceled_by { | ||||
|                 self.canceled_by.insert(canceled_by); | ||||
|             } | ||||
|             self.oldest_enqueued_at = | ||||
|                 Some(self.oldest_enqueued_at.map_or(task.enqueued_at, |oldest_enqueued_at| { | ||||
|                     task.enqueued_at.min(oldest_enqueued_at) | ||||
|                 })); | ||||
|             self.earliest_enqueued_at = | ||||
|                 Some(self.earliest_enqueued_at.map_or(task.enqueued_at, |earliest_enqueued_at| { | ||||
|                     task.enqueued_at.max(earliest_enqueued_at) | ||||
|                 })); | ||||
|             match self.enqueued_at.as_mut() { | ||||
|                 Some(BatchEnqueuedAt { earliest, oldest }) => { | ||||
|                     *oldest = task.enqueued_at.min(*oldest); | ||||
|                     *earliest = task.enqueued_at.max(*earliest); | ||||
|                 } | ||||
|                 None => { | ||||
|                     self.enqueued_at = Some(BatchEnqueuedAt { | ||||
|                         earliest: task.enqueued_at, | ||||
|                         oldest: task.enqueued_at, | ||||
|                     }); | ||||
|                 } | ||||
|             } | ||||
|         } | ||||
|     } | ||||
|  | ||||
| @@ -138,6 +140,7 @@ impl ProcessingBatch { | ||||
|             stats: self.stats.clone(), | ||||
|             started_at: self.started_at, | ||||
|             finished_at: self.finished_at, | ||||
|             enqueued_at: self.enqueued_at, | ||||
|         } | ||||
|     } | ||||
| } | ||||
| @@ -174,6 +177,33 @@ pub(crate) fn remove_task_datetime( | ||||
|     Ok(()) | ||||
| } | ||||
|  | ||||
| pub(crate) fn remove_n_tasks_datetime_earlier_than( | ||||
|     wtxn: &mut RwTxn, | ||||
|     database: Database<BEI128, CboRoaringBitmapCodec>, | ||||
|     earlier_than: OffsetDateTime, | ||||
|     mut count: usize, | ||||
|     task_id: TaskId, | ||||
| ) -> Result<()> { | ||||
|     let earlier_than = earlier_than.unix_timestamp_nanos(); | ||||
|     let mut iter = database.rev_range_mut(wtxn, &(..earlier_than))?; | ||||
|     while let Some((current, mut existing)) = iter.next().transpose()? { | ||||
|         count -= existing.remove(task_id) as usize; | ||||
|  | ||||
|         if existing.is_empty() { | ||||
|             // safety: We don't keep references to the database | ||||
|             unsafe { iter.del_current()? }; | ||||
|         } else { | ||||
|             // safety: We don't keep references to the database | ||||
|             unsafe { iter.put_current(¤t, &existing)? }; | ||||
|         } | ||||
|         if count == 0 { | ||||
|             break; | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     Ok(()) | ||||
| } | ||||
|  | ||||
| pub(crate) fn keep_ids_within_datetimes( | ||||
|     rtxn: &RoTxn, | ||||
|     ids: &mut RoaringBitmap, | ||||
| @@ -329,14 +359,27 @@ impl crate::IndexScheduler { | ||||
|                 kind, | ||||
|             } = task; | ||||
|             assert_eq!(uid, task.uid); | ||||
|             if let Some(ref batch) = batch_uid { | ||||
|             if task.status != Status::Enqueued { | ||||
|                 let batch_uid = batch_uid.expect("All non enqueued tasks must be part of a batch"); | ||||
|                 assert!(self | ||||
|                     .queue | ||||
|                     .batch_to_tasks_mapping | ||||
|                     .get(&rtxn, batch) | ||||
|                     .get(&rtxn, &batch_uid) | ||||
|                     .unwrap() | ||||
|                     .unwrap() | ||||
|                     .contains(uid)); | ||||
|                 let batch = self.queue.batches.get_batch(&rtxn, batch_uid).unwrap().unwrap(); | ||||
|                 assert_eq!(batch.uid, batch_uid); | ||||
|                 if task.status == Status::Processing { | ||||
|                     assert!(batch.progress.is_some()); | ||||
|                 } else { | ||||
|                     assert!(batch.progress.is_none()); | ||||
|                 } | ||||
|                 assert_eq!(batch.started_at, task.started_at.unwrap()); | ||||
|                 assert_eq!(batch.finished_at, task.finished_at); | ||||
|                 let enqueued_at = batch.enqueued_at.unwrap(); | ||||
|                 assert!(task.enqueued_at >= enqueued_at.oldest); | ||||
|                 assert!(task.enqueued_at <= enqueued_at.earliest); | ||||
|             } | ||||
|             if let Some(task_index_uid) = &task_index_uid { | ||||
|                 assert!(self | ||||
|   | ||||
| @@ -1,9 +1,10 @@ | ||||
| use crate::{upgrade::upgrade_index_scheduler, Result}; | ||||
| use meilisearch_types::{ | ||||
|     heed::{types::Str, Database, Env, RoTxn, RwTxn}, | ||||
|     milli::heed_codec::version::VersionCodec, | ||||
|     versioning, | ||||
| }; | ||||
| use meilisearch_types::heed::types::Str; | ||||
| use meilisearch_types::heed::{self, Database, Env, RoTxn, RwTxn}; | ||||
| use meilisearch_types::milli::heed_codec::version::VersionCodec; | ||||
| use meilisearch_types::versioning; | ||||
|  | ||||
| use crate::upgrade::upgrade_index_scheduler; | ||||
| use crate::Result; | ||||
|  | ||||
| /// The number of database used by queue itself | ||||
| const NUMBER_OF_DATABASES: u32 = 1; | ||||
| @@ -21,30 +22,38 @@ pub struct Versioning { | ||||
| } | ||||
|  | ||||
| impl Versioning { | ||||
|     pub(crate) const fn nb_db() -> u32 { | ||||
|     pub const fn nb_db() -> u32 { | ||||
|         NUMBER_OF_DATABASES | ||||
|     } | ||||
|  | ||||
|     pub fn get_version(&self, rtxn: &RoTxn) -> Result<Option<(u32, u32, u32)>> { | ||||
|         Ok(self.version.get(rtxn, entry_name::MAIN)?) | ||||
|     pub fn get_version(&self, rtxn: &RoTxn) -> Result<Option<(u32, u32, u32)>, heed::Error> { | ||||
|         self.version.get(rtxn, entry_name::MAIN) | ||||
|     } | ||||
|  | ||||
|     pub fn set_version(&self, wtxn: &mut RwTxn, version: (u32, u32, u32)) -> Result<()> { | ||||
|         Ok(self.version.put(wtxn, entry_name::MAIN, &version)?) | ||||
|     pub fn set_version( | ||||
|         &self, | ||||
|         wtxn: &mut RwTxn, | ||||
|         version: (u32, u32, u32), | ||||
|     ) -> Result<(), heed::Error> { | ||||
|         self.version.put(wtxn, entry_name::MAIN, &version) | ||||
|     } | ||||
|  | ||||
|     pub fn set_current_version(&self, wtxn: &mut RwTxn) -> Result<()> { | ||||
|     pub fn set_current_version(&self, wtxn: &mut RwTxn) -> Result<(), heed::Error> { | ||||
|         let major = versioning::VERSION_MAJOR.parse().unwrap(); | ||||
|         let minor = versioning::VERSION_MINOR.parse().unwrap(); | ||||
|         let patch = versioning::VERSION_PATCH.parse().unwrap(); | ||||
|         self.set_version(wtxn, (major, minor, patch)) | ||||
|     } | ||||
|  | ||||
|     /// Create an index scheduler and start its run loop. | ||||
|     /// Return `Self` without checking anything about the version | ||||
|     pub fn raw_new(env: &Env, wtxn: &mut RwTxn) -> Result<Self, heed::Error> { | ||||
|         let version = env.create_database(wtxn, Some(db_name::VERSION))?; | ||||
|         Ok(Self { version }) | ||||
|     } | ||||
|  | ||||
|     pub(crate) fn new(env: &Env, db_version: (u32, u32, u32)) -> Result<Self> { | ||||
|         let mut wtxn = env.write_txn()?; | ||||
|         let version = env.create_database(&mut wtxn, Some(db_name::VERSION))?; | ||||
|         let this = Self { version }; | ||||
|         let this = Self::raw_new(env, &mut wtxn)?; | ||||
|         let from = match this.get_version(&wtxn)? { | ||||
|             Some(version) => version, | ||||
|             // fresh DB: use the db version | ||||
|   | ||||
| @@ -24,9 +24,35 @@ pub struct Batch { | ||||
|     pub started_at: OffsetDateTime, | ||||
|     #[serde(with = "time::serde::rfc3339::option")] | ||||
|     pub finished_at: Option<OffsetDateTime>, | ||||
|  | ||||
|     // Enqueued at is never displayed and is only required when removing a batch. | ||||
|     // It's always some except when upgrading from a database pre v1.12 | ||||
|     pub enqueued_at: Option<BatchEnqueuedAt>, | ||||
| } | ||||
|  | ||||
| #[derive(Default, Debug, Clone, Serialize, Deserialize, ToSchema)] | ||||
| impl PartialEq for Batch { | ||||
|     fn eq(&self, other: &Self) -> bool { | ||||
|         let Self { uid, progress, details, stats, started_at, finished_at, enqueued_at } = self; | ||||
|  | ||||
|         *uid == other.uid | ||||
|             && progress.is_none() == other.progress.is_none() | ||||
|             && details == &other.details | ||||
|             && stats == &other.stats | ||||
|             && started_at == &other.started_at | ||||
|             && finished_at == &other.finished_at | ||||
|             && enqueued_at == &other.enqueued_at | ||||
|     } | ||||
| } | ||||
|  | ||||
| #[derive(Clone, Copy, Debug, PartialEq, Eq, Serialize, Deserialize)] | ||||
| pub struct BatchEnqueuedAt { | ||||
|     #[serde(with = "time::serde::rfc3339")] | ||||
|     pub earliest: OffsetDateTime, | ||||
|     #[serde(with = "time::serde::rfc3339")] | ||||
|     pub oldest: OffsetDateTime, | ||||
| } | ||||
|  | ||||
| #[derive(Default, Debug, Clone, PartialEq, Eq, Serialize, Deserialize, ToSchema)] | ||||
| #[serde(rename_all = "camelCase")] | ||||
| #[schema(rename_all = "camelCase")] | ||||
| pub struct BatchStats { | ||||
|   | ||||
| @@ -193,6 +193,8 @@ merge_with_error_impl_take_error_message!(ParseTaskKindError); | ||||
| merge_with_error_impl_take_error_message!(ParseTaskStatusError); | ||||
| merge_with_error_impl_take_error_message!(IndexUidFormatError); | ||||
| merge_with_error_impl_take_error_message!(InvalidMultiSearchWeight); | ||||
| merge_with_error_impl_take_error_message!(InvalidNetworkUrl); | ||||
| merge_with_error_impl_take_error_message!(InvalidNetworkSearchApiKey); | ||||
| merge_with_error_impl_take_error_message!(InvalidSearchSemanticRatio); | ||||
| merge_with_error_impl_take_error_message!(InvalidSearchRankingScoreThreshold); | ||||
| merge_with_error_impl_take_error_message!(InvalidSimilarRankingScoreThreshold); | ||||
|   | ||||
| @@ -260,7 +260,13 @@ InvalidMultiSearchMergeFacets         , InvalidRequest       , BAD_REQUEST ; | ||||
| InvalidMultiSearchQueryFacets         , InvalidRequest       , BAD_REQUEST ; | ||||
| InvalidMultiSearchQueryPagination     , InvalidRequest       , BAD_REQUEST ; | ||||
| InvalidMultiSearchQueryRankingRules   , InvalidRequest       , BAD_REQUEST ; | ||||
| InvalidMultiSearchQueryPosition       , InvalidRequest       , BAD_REQUEST ; | ||||
| InvalidMultiSearchRemote              , InvalidRequest       , BAD_REQUEST ; | ||||
| InvalidMultiSearchWeight              , InvalidRequest       , BAD_REQUEST ; | ||||
| InvalidNetworkRemotes                 , InvalidRequest       , BAD_REQUEST ; | ||||
| InvalidNetworkSelf                    , InvalidRequest       , BAD_REQUEST ; | ||||
| InvalidNetworkSearchApiKey            , InvalidRequest       , BAD_REQUEST ; | ||||
| InvalidNetworkUrl                     , InvalidRequest       , BAD_REQUEST ; | ||||
| InvalidSearchAttributesToSearchOn     , InvalidRequest       , BAD_REQUEST ; | ||||
| InvalidSearchAttributesToCrop         , InvalidRequest       , BAD_REQUEST ; | ||||
| InvalidSearchAttributesToHighlight    , InvalidRequest       , BAD_REQUEST ; | ||||
| @@ -351,14 +357,22 @@ MissingDocumentId                     , InvalidRequest       , BAD_REQUEST ; | ||||
| MissingFacetSearchFacetName           , InvalidRequest       , BAD_REQUEST ; | ||||
| MissingIndexUid                       , InvalidRequest       , BAD_REQUEST ; | ||||
| MissingMasterKey                      , Auth                 , UNAUTHORIZED ; | ||||
| MissingNetworkUrl                     , InvalidRequest       , BAD_REQUEST ; | ||||
| MissingPayload                        , InvalidRequest       , BAD_REQUEST ; | ||||
| MissingSearchHybrid                   , InvalidRequest       , BAD_REQUEST ; | ||||
| MissingSwapIndexes                    , InvalidRequest       , BAD_REQUEST ; | ||||
| MissingTaskFilters                    , InvalidRequest       , BAD_REQUEST ; | ||||
| NoSpaceLeftOnDevice                   , System               , UNPROCESSABLE_ENTITY; | ||||
| PayloadTooLarge                       , InvalidRequest       , PAYLOAD_TOO_LARGE ; | ||||
| RemoteBadResponse                     , System               , BAD_GATEWAY ; | ||||
| RemoteBadRequest                      , InvalidRequest       , BAD_REQUEST ; | ||||
| RemoteCouldNotSendRequest             , System               , BAD_GATEWAY ; | ||||
| RemoteInvalidApiKey                   , Auth                 , FORBIDDEN ; | ||||
| RemoteRemoteError                     , System               , BAD_GATEWAY ; | ||||
| RemoteTimeout                         , System               , BAD_GATEWAY ; | ||||
| TooManySearchRequests                 , System               , SERVICE_UNAVAILABLE ; | ||||
| TaskNotFound                          , InvalidRequest       , NOT_FOUND ; | ||||
| TaskFileNotFound                      , InvalidRequest       , NOT_FOUND ; | ||||
| BatchNotFound                         , InvalidRequest       , NOT_FOUND ; | ||||
| TooManyOpenFiles                      , System               , UNPROCESSABLE_ENTITY ; | ||||
| TooManyVectors                        , InvalidRequest       , BAD_REQUEST ; | ||||
| @@ -583,6 +597,18 @@ impl fmt::Display for deserr_codes::InvalidSimilarRankingScoreThreshold { | ||||
|     } | ||||
| } | ||||
|  | ||||
| impl fmt::Display for deserr_codes::InvalidNetworkUrl { | ||||
|     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { | ||||
|         write!(f, "the value of `url` is invalid, expected a string.") | ||||
|     } | ||||
| } | ||||
|  | ||||
| impl fmt::Display for deserr_codes::InvalidNetworkSearchApiKey { | ||||
|     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { | ||||
|         write!(f, "the value of `searchApiKey` is invalid, expected a string.") | ||||
|     } | ||||
| } | ||||
|  | ||||
| #[macro_export] | ||||
| macro_rules! internal_error { | ||||
|     ($target:ty : $($other:path), *) => { | ||||
|   | ||||
| @@ -1,3 +1,5 @@ | ||||
| use std::collections::BTreeMap; | ||||
|  | ||||
| use serde::{Deserialize, Serialize}; | ||||
|  | ||||
| #[derive(Serialize, Deserialize, Debug, Clone, Copy, Default, PartialEq, Eq)] | ||||
| @@ -7,6 +9,8 @@ pub struct RuntimeTogglableFeatures { | ||||
|     pub logs_route: bool, | ||||
|     pub edit_documents_by_function: bool, | ||||
|     pub contains_filter: bool, | ||||
|     pub network: bool, | ||||
|     pub get_task_documents_route: bool, | ||||
| } | ||||
|  | ||||
| #[derive(Default, Debug, Clone, Copy)] | ||||
| @@ -15,3 +19,20 @@ pub struct InstanceTogglableFeatures { | ||||
|     pub logs_route: bool, | ||||
|     pub contains_filter: bool, | ||||
| } | ||||
|  | ||||
| #[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)] | ||||
| #[serde(rename_all = "camelCase")] | ||||
| pub struct Remote { | ||||
|     pub url: String, | ||||
|     #[serde(default)] | ||||
|     pub search_api_key: Option<String>, | ||||
| } | ||||
|  | ||||
| #[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq, Default)] | ||||
| #[serde(rename_all = "camelCase")] | ||||
| pub struct Network { | ||||
|     #[serde(default, rename = "self")] | ||||
|     pub local: Option<String>, | ||||
|     #[serde(default)] | ||||
|     pub remotes: BTreeMap<String, Remote>, | ||||
| } | ||||
|   | ||||
| @@ -4,13 +4,14 @@ use std::fmt; | ||||
| use std::str::FromStr; | ||||
|  | ||||
| use deserr::Deserr; | ||||
| use serde::Serialize; | ||||
| use utoipa::ToSchema; | ||||
|  | ||||
| use crate::error::{Code, ErrorCode}; | ||||
|  | ||||
| /// An index uid is composed of only ascii alphanumeric characters, - and _, between 1 and 400 | ||||
| /// bytes long | ||||
| #[derive(Debug, Clone, PartialEq, Eq, Deserr, PartialOrd, Ord, ToSchema)] | ||||
| #[derive(Debug, Clone, PartialEq, Eq, Deserr, PartialOrd, Ord, Serialize, ToSchema)] | ||||
| #[deserr(try_from(String) = IndexUid::try_from -> IndexUidFormatError)] | ||||
| #[schema(value_type = String, example = "movies")] | ||||
| pub struct IndexUid(String); | ||||
|   | ||||
| @@ -302,6 +302,12 @@ pub enum Action { | ||||
|     #[serde(rename = "experimental.update")] | ||||
|     #[deserr(rename = "experimental.update")] | ||||
|     ExperimentalFeaturesUpdate, | ||||
|     #[serde(rename = "network.get")] | ||||
|     #[deserr(rename = "network.get")] | ||||
|     NetworkGet, | ||||
|     #[serde(rename = "network.update")] | ||||
|     #[deserr(rename = "network.update")] | ||||
|     NetworkUpdate, | ||||
| } | ||||
|  | ||||
| impl Action { | ||||
| @@ -341,6 +347,8 @@ impl Action { | ||||
|             KEYS_DELETE => Some(Self::KeysDelete), | ||||
|             EXPERIMENTAL_FEATURES_GET => Some(Self::ExperimentalFeaturesGet), | ||||
|             EXPERIMENTAL_FEATURES_UPDATE => Some(Self::ExperimentalFeaturesUpdate), | ||||
|             NETWORK_GET => Some(Self::NetworkGet), | ||||
|             NETWORK_UPDATE => Some(Self::NetworkUpdate), | ||||
|             _otherwise => None, | ||||
|         } | ||||
|     } | ||||
| @@ -386,4 +394,7 @@ pub mod actions { | ||||
|     pub const KEYS_DELETE: u8 = KeysDelete.repr(); | ||||
|     pub const EXPERIMENTAL_FEATURES_GET: u8 = ExperimentalFeaturesGet.repr(); | ||||
|     pub const EXPERIMENTAL_FEATURES_UPDATE: u8 = ExperimentalFeaturesUpdate.repr(); | ||||
|  | ||||
|     pub const NETWORK_GET: u8 = NetworkGet.repr(); | ||||
|     pub const NETWORK_UPDATE: u8 = NetworkUpdate.repr(); | ||||
| } | ||||
|   | ||||
| @@ -1,7 +1,10 @@ | ||||
| use std::fs; | ||||
| use std::io::{self, ErrorKind}; | ||||
| use std::io::{ErrorKind, Write}; | ||||
| use std::path::Path; | ||||
|  | ||||
| use milli::heed; | ||||
| use tempfile::NamedTempFile; | ||||
|  | ||||
| /// The name of the file that contains the version of the database. | ||||
| pub const VERSION_FILE_NAME: &str = "VERSION"; | ||||
|  | ||||
| @@ -10,37 +13,7 @@ pub static VERSION_MINOR: &str = env!("CARGO_PKG_VERSION_MINOR"); | ||||
| pub static VERSION_PATCH: &str = env!("CARGO_PKG_VERSION_PATCH"); | ||||
|  | ||||
| /// Persists the version of the current Meilisearch binary to a VERSION file | ||||
| pub fn update_version_file_for_dumpless_upgrade( | ||||
|     db_path: &Path, | ||||
|     from: (u32, u32, u32), | ||||
|     to: (u32, u32, u32), | ||||
| ) -> Result<(), VersionFileError> { | ||||
|     let (from_major, from_minor, from_patch) = from; | ||||
|     let (to_major, to_minor, to_patch) = to; | ||||
|  | ||||
|     if from_major > to_major | ||||
|         || (from_major == to_major && from_minor > to_minor) | ||||
|         || (from_major == to_major && from_minor == to_minor && from_patch > to_patch) | ||||
|     { | ||||
|         Err(VersionFileError::DowngradeNotSupported { | ||||
|             major: from_major, | ||||
|             minor: from_minor, | ||||
|             patch: from_patch, | ||||
|         }) | ||||
|     } else if from_major < 1 || (from_major == to_major && from_minor < 12) { | ||||
|         Err(VersionFileError::TooOldForAutomaticUpgrade { | ||||
|             major: from_major, | ||||
|             minor: from_minor, | ||||
|             patch: from_patch, | ||||
|         }) | ||||
|     } else { | ||||
|         create_current_version_file(db_path)?; | ||||
|         Ok(()) | ||||
|     } | ||||
| } | ||||
|  | ||||
| /// Persists the version of the current Meilisearch binary to a VERSION file | ||||
| pub fn create_current_version_file(db_path: &Path) -> io::Result<()> { | ||||
| pub fn create_current_version_file(db_path: &Path) -> anyhow::Result<()> { | ||||
|     create_version_file(db_path, VERSION_MAJOR, VERSION_MINOR, VERSION_PATCH) | ||||
| } | ||||
|  | ||||
| @@ -49,9 +22,14 @@ pub fn create_version_file( | ||||
|     major: &str, | ||||
|     minor: &str, | ||||
|     patch: &str, | ||||
| ) -> io::Result<()> { | ||||
| ) -> anyhow::Result<()> { | ||||
|     let version_path = db_path.join(VERSION_FILE_NAME); | ||||
|     fs::write(version_path, format!("{}.{}.{}", major, minor, patch)) | ||||
|     // In order to persist the file later we must create it in the `data.ms` and not in `/tmp` | ||||
|     let mut file = NamedTempFile::new_in(db_path)?; | ||||
|     file.write_all(format!("{}.{}.{}", major, minor, patch).as_bytes())?; | ||||
|     file.flush()?; | ||||
|     file.persist(version_path)?; | ||||
|     Ok(()) | ||||
| } | ||||
|  | ||||
| pub fn get_version(db_path: &Path) -> Result<(u32, u32, u32), VersionFileError> { | ||||
| @@ -61,7 +39,7 @@ pub fn get_version(db_path: &Path) -> Result<(u32, u32, u32), VersionFileError> | ||||
|         Ok(version) => parse_version(&version), | ||||
|         Err(error) => match error.kind() { | ||||
|             ErrorKind::NotFound => Err(VersionFileError::MissingVersionFile), | ||||
|             _ => Err(error.into()), | ||||
|             _ => Err(anyhow::Error::from(error).into()), | ||||
|         }, | ||||
|     } | ||||
| } | ||||
| @@ -112,7 +90,9 @@ pub enum VersionFileError { | ||||
|     DowngradeNotSupported { major: u32, minor: u32, patch: u32 }, | ||||
|     #[error("Database version {major}.{minor}.{patch} is too old for the experimental dumpless upgrade feature. Please generate a dump using the v{major}.{minor}.{patch} and import it in the v{VERSION_MAJOR}.{VERSION_MINOR}.{VERSION_PATCH}")] | ||||
|     TooOldForAutomaticUpgrade { major: u32, minor: u32, patch: u32 }, | ||||
|     #[error("Error while modifying the database: {0}")] | ||||
|     ErrorWhileModifyingTheDatabase(#[from] heed::Error), | ||||
|  | ||||
|     #[error(transparent)] | ||||
|     IoError(#[from] std::io::Error), | ||||
|     AnyhowError(#[from] anyhow::Error), | ||||
| } | ||||
|   | ||||
| @@ -31,6 +31,7 @@ use crate::routes::{create_all_stats, Stats}; | ||||
| use crate::Opt; | ||||
|  | ||||
| const ANALYTICS_HEADER: &str = "X-Meilisearch-Client"; | ||||
| const MEILI_SERVER_PROVIDER: &str = "MEILI_SERVER_PROVIDER"; | ||||
|  | ||||
| /// Write the instance-uid in the `data.ms` and in `~/.config/MeiliSearch/path-to-db-instance-uid`. Ignore the errors. | ||||
| fn write_user_id(db_path: &Path, user_id: &InstanceUid) { | ||||
| @@ -195,6 +196,8 @@ struct Infos { | ||||
|     experimental_reduce_indexing_memory_usage: bool, | ||||
|     experimental_max_number_of_batched_tasks: usize, | ||||
|     experimental_limit_batched_tasks_total_size: u64, | ||||
|     experimental_network: bool, | ||||
|     experimental_get_task_documents_route: bool, | ||||
|     gpu_enabled: bool, | ||||
|     db_path: bool, | ||||
|     import_dump: bool, | ||||
| @@ -285,6 +288,8 @@ impl Infos { | ||||
|             logs_route, | ||||
|             edit_documents_by_function, | ||||
|             contains_filter, | ||||
|             network, | ||||
|             get_task_documents_route, | ||||
|         } = features; | ||||
|  | ||||
|         // We're going to override every sensible information. | ||||
| @@ -302,6 +307,8 @@ impl Infos { | ||||
|             experimental_replication_parameters, | ||||
|             experimental_enable_logs_route: experimental_enable_logs_route | logs_route, | ||||
|             experimental_reduce_indexing_memory_usage, | ||||
|             experimental_network: network, | ||||
|             experimental_get_task_documents_route: get_task_documents_route, | ||||
|             gpu_enabled: meilisearch_types::milli::vector::is_cuda_enabled(), | ||||
|             db_path: db_path != PathBuf::from("./data.ms"), | ||||
|             import_dump: import_dump.is_some(), | ||||
| @@ -357,7 +364,7 @@ impl Segment { | ||||
|                     "cores": sys.cpus().len(), | ||||
|                     "ram_size": sys.total_memory(), | ||||
|                     "disk_size": disks.iter().map(|disk| disk.total_space()).max(), | ||||
|                     "server_provider": std::env::var("MEILI_SERVER_PROVIDER").ok(), | ||||
|                     "server_provider": std::env::var(MEILI_SERVER_PROVIDER).ok(), | ||||
|             }) | ||||
|         }); | ||||
|         let number_of_documents = | ||||
| @@ -380,10 +387,18 @@ impl Segment { | ||||
|         index_scheduler: Arc<IndexScheduler>, | ||||
|         auth_controller: Arc<AuthController>, | ||||
|     ) { | ||||
|         const INTERVAL: Duration = Duration::from_secs(60 * 60); // one hour | ||||
|                                                                  // The first batch must be sent after one hour. | ||||
|         let interval: Duration = match std::env::var(MEILI_SERVER_PROVIDER) { | ||||
|             Ok(provider) if provider.starts_with("meili_cloud:") => { | ||||
|                 Duration::from_secs(60 * 60) // one hour | ||||
|             } | ||||
|             _ => { | ||||
|                 // We're an open source instance | ||||
|                 Duration::from_secs(60 * 60 * 24) // one day | ||||
|             } | ||||
|         }; | ||||
|  | ||||
|         let mut interval = | ||||
|             tokio::time::interval_at(tokio::time::Instant::now() + INTERVAL, INTERVAL); | ||||
|             tokio::time::interval_at(tokio::time::Instant::now() + interval, interval); | ||||
|  | ||||
|         loop { | ||||
|             select! { | ||||
|   | ||||
| @@ -32,6 +32,7 @@ use analytics::Analytics; | ||||
| use anyhow::bail; | ||||
| use error::PayloadError; | ||||
| use extractors::payload::PayloadConfig; | ||||
| use index_scheduler::versioning::Versioning; | ||||
| use index_scheduler::{IndexScheduler, IndexSchedulerOptions}; | ||||
| use meilisearch_auth::AuthController; | ||||
| use meilisearch_types::milli::constants::VERSION_MAJOR; | ||||
| @@ -40,10 +41,9 @@ use meilisearch_types::milli::update::{IndexDocumentsConfig, IndexDocumentsMetho | ||||
| use meilisearch_types::settings::apply_settings_to_builder; | ||||
| use meilisearch_types::tasks::KindWithContent; | ||||
| use meilisearch_types::versioning::{ | ||||
|     create_current_version_file, get_version, update_version_file_for_dumpless_upgrade, | ||||
|     VersionFileError, VERSION_MINOR, VERSION_PATCH, | ||||
|     create_current_version_file, get_version, VersionFileError, VERSION_MINOR, VERSION_PATCH, | ||||
| }; | ||||
| use meilisearch_types::{compression, milli, VERSION_FILE_NAME}; | ||||
| use meilisearch_types::{compression, heed, milli, VERSION_FILE_NAME}; | ||||
| pub use option::Opt; | ||||
| use option::ScheduleSnapshot; | ||||
| use search_queue::SearchQueue; | ||||
| @@ -356,14 +356,19 @@ fn open_or_create_database_unchecked( | ||||
|  | ||||
| /// Ensures Meilisearch version is compatible with the database, returns an error in case of version mismatch. | ||||
| /// Returns the version that was contained in the version file | ||||
| fn check_version(opt: &Opt, binary_version: (u32, u32, u32)) -> anyhow::Result<(u32, u32, u32)> { | ||||
| fn check_version( | ||||
|     opt: &Opt, | ||||
|     index_scheduler_opt: &IndexSchedulerOptions, | ||||
|     binary_version: (u32, u32, u32), | ||||
| ) -> anyhow::Result<(u32, u32, u32)> { | ||||
|     let (bin_major, bin_minor, bin_patch) = binary_version; | ||||
|     let (db_major, db_minor, db_patch) = get_version(&opt.db_path)?; | ||||
|  | ||||
|     if db_major != bin_major || db_minor != bin_minor || db_patch > bin_patch { | ||||
|         if opt.experimental_dumpless_upgrade { | ||||
|             update_version_file_for_dumpless_upgrade( | ||||
|                 &opt.db_path, | ||||
|                 opt, | ||||
|                 index_scheduler_opt, | ||||
|                 (db_major, db_minor, db_patch), | ||||
|                 (bin_major, bin_minor, bin_patch), | ||||
|             )?; | ||||
| @@ -380,6 +385,57 @@ fn check_version(opt: &Opt, binary_version: (u32, u32, u32)) -> anyhow::Result<( | ||||
|     Ok((db_major, db_minor, db_patch)) | ||||
| } | ||||
|  | ||||
| /// Persists the version of the current Meilisearch binary to a VERSION file | ||||
| pub fn update_version_file_for_dumpless_upgrade( | ||||
|     opt: &Opt, | ||||
|     index_scheduler_opt: &IndexSchedulerOptions, | ||||
|     from: (u32, u32, u32), | ||||
|     to: (u32, u32, u32), | ||||
| ) -> Result<(), VersionFileError> { | ||||
|     let (from_major, from_minor, from_patch) = from; | ||||
|     let (to_major, to_minor, to_patch) = to; | ||||
|  | ||||
|     // Early exit in case of error | ||||
|     if from_major > to_major | ||||
|         || (from_major == to_major && from_minor > to_minor) | ||||
|         || (from_major == to_major && from_minor == to_minor && from_patch > to_patch) | ||||
|     { | ||||
|         return Err(VersionFileError::DowngradeNotSupported { | ||||
|             major: from_major, | ||||
|             minor: from_minor, | ||||
|             patch: from_patch, | ||||
|         }); | ||||
|     } else if from_major < 1 || (from_major == to_major && from_minor < 12) { | ||||
|         return Err(VersionFileError::TooOldForAutomaticUpgrade { | ||||
|             major: from_major, | ||||
|             minor: from_minor, | ||||
|             patch: from_patch, | ||||
|         }); | ||||
|     } | ||||
|  | ||||
|     // In the case of v1.12, the index-scheduler didn't store its internal version at the time. | ||||
|     // => We must write it immediately **in the index-scheduler** otherwise we'll update the version file | ||||
|     //    there is a risk of DB corruption if a restart happens after writing the version file but before | ||||
|     //    writing the version in the index-scheduler. See <https://github.com/meilisearch/meilisearch/issues/5280> | ||||
|     if from_major == 1 && from_minor == 12 { | ||||
|         let env = unsafe { | ||||
|             heed::EnvOpenOptions::new() | ||||
|                 .max_dbs(Versioning::nb_db()) | ||||
|                 .map_size(index_scheduler_opt.task_db_size) | ||||
|                 .open(&index_scheduler_opt.tasks_path) | ||||
|         }?; | ||||
|         let mut wtxn = env.write_txn()?; | ||||
|         let versioning = Versioning::raw_new(&env, &mut wtxn)?; | ||||
|         versioning.set_version(&mut wtxn, (from_major, from_minor, from_patch))?; | ||||
|         wtxn.commit()?; | ||||
|         // Should be instant since we're the only one using the env | ||||
|         env.prepare_for_closing().wait(); | ||||
|     } | ||||
|  | ||||
|     create_current_version_file(&opt.db_path)?; | ||||
|     Ok(()) | ||||
| } | ||||
|  | ||||
| /// Ensure you're in a valid state and open the IndexScheduler + AuthController for you. | ||||
| fn open_or_create_database( | ||||
|     opt: &Opt, | ||||
| @@ -387,7 +443,11 @@ fn open_or_create_database( | ||||
|     empty_db: bool, | ||||
|     binary_version: (u32, u32, u32), | ||||
| ) -> anyhow::Result<(IndexScheduler, AuthController)> { | ||||
|     let version = if !empty_db { check_version(opt, binary_version)? } else { binary_version }; | ||||
|     let version = if !empty_db { | ||||
|         check_version(opt, &index_scheduler_opt, binary_version)? | ||||
|     } else { | ||||
|         binary_version | ||||
|     }; | ||||
|  | ||||
|     open_or_create_database_unchecked(opt, index_scheduler_opt, OnFailure::KeepDb, version) | ||||
| } | ||||
| @@ -431,10 +491,13 @@ fn import_dump( | ||||
|         keys.push(key); | ||||
|     } | ||||
|  | ||||
|     // 3. Import the runtime features. | ||||
|     // 3. Import the runtime features and network | ||||
|     let features = dump_reader.features()?.unwrap_or_default(); | ||||
|     index_scheduler.put_runtime_features(features)?; | ||||
|  | ||||
|     let network = dump_reader.network()?.cloned().unwrap_or_default(); | ||||
|     index_scheduler.put_network(network)?; | ||||
|  | ||||
|     let indexer_config = index_scheduler.indexer_config(); | ||||
|  | ||||
|     // /!\ The tasks must be imported AFTER importing the indexes or else the scheduler might | ||||
| @@ -508,9 +571,15 @@ fn import_dump( | ||||
|         index_scheduler.refresh_index_stats(&uid)?; | ||||
|     } | ||||
|  | ||||
|     // 5. Import the queue | ||||
|     let mut index_scheduler_dump = index_scheduler.register_dumped_task()?; | ||||
|     // 5.1. Import the batches | ||||
|     for ret in dump_reader.batches()? { | ||||
|         let batch = ret?; | ||||
|         index_scheduler_dump.register_dumped_batch(batch)?; | ||||
|     } | ||||
|  | ||||
|     // 5. Import the tasks. | ||||
|     // 5.2. Import the tasks | ||||
|     for ret in dump_reader.tasks()? { | ||||
|         let (task, file) = ret?; | ||||
|         index_scheduler_dump.register_dumped_task(task, file)?; | ||||
|   | ||||
| @@ -50,6 +50,8 @@ pub fn configure(cfg: &mut web::ServiceConfig) { | ||||
|             logs_route: Some(false), | ||||
|             edit_documents_by_function: Some(false), | ||||
|             contains_filter: Some(false), | ||||
|             network: Some(false), | ||||
|             get_task_documents_route: Some(false), | ||||
|         })), | ||||
|         (status = 401, description = "The authorization header is missing", body = ResponseError, content_type = "application/json", example = json!( | ||||
|             { | ||||
| @@ -88,6 +90,10 @@ pub struct RuntimeTogglableFeatures { | ||||
|     pub edit_documents_by_function: Option<bool>, | ||||
|     #[deserr(default)] | ||||
|     pub contains_filter: Option<bool>, | ||||
|     #[deserr(default)] | ||||
|     pub network: Option<bool>, | ||||
|     #[deserr(default)] | ||||
|     pub get_task_documents_route: Option<bool>, | ||||
| } | ||||
|  | ||||
| impl From<meilisearch_types::features::RuntimeTogglableFeatures> for RuntimeTogglableFeatures { | ||||
| @@ -97,6 +103,8 @@ impl From<meilisearch_types::features::RuntimeTogglableFeatures> for RuntimeTogg | ||||
|             logs_route, | ||||
|             edit_documents_by_function, | ||||
|             contains_filter, | ||||
|             network, | ||||
|             get_task_documents_route, | ||||
|         } = value; | ||||
|  | ||||
|         Self { | ||||
| @@ -104,6 +112,8 @@ impl From<meilisearch_types::features::RuntimeTogglableFeatures> for RuntimeTogg | ||||
|             logs_route: Some(logs_route), | ||||
|             edit_documents_by_function: Some(edit_documents_by_function), | ||||
|             contains_filter: Some(contains_filter), | ||||
|             network: Some(network), | ||||
|             get_task_documents_route: Some(get_task_documents_route), | ||||
|         } | ||||
|     } | ||||
| } | ||||
| @@ -114,6 +124,8 @@ pub struct PatchExperimentalFeatureAnalytics { | ||||
|     logs_route: bool, | ||||
|     edit_documents_by_function: bool, | ||||
|     contains_filter: bool, | ||||
|     network: bool, | ||||
|     get_task_documents_route: bool, | ||||
| } | ||||
|  | ||||
| impl Aggregate for PatchExperimentalFeatureAnalytics { | ||||
| @@ -127,6 +139,8 @@ impl Aggregate for PatchExperimentalFeatureAnalytics { | ||||
|             logs_route: new.logs_route, | ||||
|             edit_documents_by_function: new.edit_documents_by_function, | ||||
|             contains_filter: new.contains_filter, | ||||
|             network: new.network, | ||||
|             get_task_documents_route: new.get_task_documents_route, | ||||
|         }) | ||||
|     } | ||||
|  | ||||
| @@ -149,6 +163,8 @@ impl Aggregate for PatchExperimentalFeatureAnalytics { | ||||
|             logs_route: Some(false), | ||||
|             edit_documents_by_function: Some(false), | ||||
|             contains_filter: Some(false), | ||||
|             network: Some(false), | ||||
|             get_task_documents_route: Some(false), | ||||
|          })), | ||||
|         (status = 401, description = "The authorization header is missing", body = ResponseError, content_type = "application/json", example = json!( | ||||
|             { | ||||
| @@ -181,16 +197,23 @@ async fn patch_features( | ||||
|             .edit_documents_by_function | ||||
|             .unwrap_or(old_features.edit_documents_by_function), | ||||
|         contains_filter: new_features.0.contains_filter.unwrap_or(old_features.contains_filter), | ||||
|         network: new_features.0.network.unwrap_or(old_features.network), | ||||
|         get_task_documents_route: new_features | ||||
|             .0 | ||||
|             .get_task_documents_route | ||||
|             .unwrap_or(old_features.get_task_documents_route), | ||||
|     }; | ||||
|  | ||||
|     // explicitly destructure for analytics rather than using the `Serialize` implementation, because | ||||
|     // the it renames to camelCase, which we don't want for analytics. | ||||
|     // it renames to camelCase, which we don't want for analytics. | ||||
|     // **Do not** ignore fields with `..` or `_` here, because we want to add them in the future. | ||||
|     let meilisearch_types::features::RuntimeTogglableFeatures { | ||||
|         metrics, | ||||
|         logs_route, | ||||
|         edit_documents_by_function, | ||||
|         contains_filter, | ||||
|         network, | ||||
|         get_task_documents_route, | ||||
|     } = new_features; | ||||
|  | ||||
|     analytics.publish( | ||||
| @@ -199,6 +222,8 @@ async fn patch_features( | ||||
|             logs_route, | ||||
|             edit_documents_by_function, | ||||
|             contains_filter, | ||||
|             network, | ||||
|             get_task_documents_route, | ||||
|         }, | ||||
|         &req, | ||||
|     ); | ||||
|   | ||||
| @@ -496,6 +496,12 @@ pub struct IndexStats { | ||||
|     pub number_of_documents: u64, | ||||
|     /// Whether or not the index is currently ingesting document | ||||
|     pub is_indexing: bool, | ||||
|     /// Number of embeddings in the index | ||||
|     #[serde(skip_serializing_if = "Option::is_none")] | ||||
|     pub number_of_embeddings: Option<u64>, | ||||
|     /// Number of embedded documents in the index | ||||
|     #[serde(skip_serializing_if = "Option::is_none")] | ||||
|     pub number_of_embedded_documents: Option<u64>, | ||||
|     /// Association of every field name with the number of times it occurs in the documents. | ||||
|     #[schema(value_type = HashMap<String, u64>)] | ||||
|     pub field_distribution: FieldDistribution, | ||||
| @@ -506,6 +512,8 @@ impl From<index_scheduler::IndexStats> for IndexStats { | ||||
|         IndexStats { | ||||
|             number_of_documents: stats.inner_stats.number_of_documents, | ||||
|             is_indexing: stats.is_indexing, | ||||
|             number_of_embeddings: stats.inner_stats.number_of_embeddings, | ||||
|             number_of_embedded_documents: stats.inner_stats.number_of_embedded_documents, | ||||
|             field_distribution: stats.inner_stats.field_distribution, | ||||
|         } | ||||
|     } | ||||
| @@ -524,6 +532,8 @@ impl From<index_scheduler::IndexStats> for IndexStats { | ||||
|         (status = OK, description = "The stats of the index", body = IndexStats, content_type = "application/json", example = json!( | ||||
|             { | ||||
|                 "numberOfDocuments": 10, | ||||
|                 "numberOfEmbeddings": 10, | ||||
|                 "numberOfEmbeddedDocuments": 10, | ||||
|                 "isIndexing": true, | ||||
|                 "fieldDistribution": { | ||||
|                     "genre": 10, | ||||
|   | ||||
| @@ -34,6 +34,7 @@ use crate::routes::features::RuntimeTogglableFeatures; | ||||
| use crate::routes::indexes::documents::{DocumentDeletionByFilter, DocumentEditionByFunction}; | ||||
| use crate::routes::indexes::IndexView; | ||||
| use crate::routes::multi_search::SearchResults; | ||||
| use crate::routes::network::{Network, Remote}; | ||||
| use crate::routes::swap_indexes::SwapIndexesPayload; | ||||
| use crate::search::{ | ||||
|     FederatedSearch, FederatedSearchResult, Federation, FederationOptions, MergeFacets, | ||||
| @@ -54,6 +55,7 @@ mod logs; | ||||
| mod metrics; | ||||
| mod multi_search; | ||||
| mod multi_search_analytics; | ||||
| pub mod network; | ||||
| mod open_api_utils; | ||||
| mod snapshot; | ||||
| mod swap_indexes; | ||||
| @@ -75,6 +77,7 @@ pub mod tasks; | ||||
|         (path = "/multi-search", api = multi_search::MultiSearchApi), | ||||
|         (path = "/swap-indexes", api = swap_indexes::SwapIndexesApi), | ||||
|         (path = "/experimental-features", api = features::ExperimentalFeaturesApi), | ||||
|         (path = "/network", api = network::NetworkApi), | ||||
|     ), | ||||
|     paths(get_health, get_version, get_stats), | ||||
|     tags( | ||||
| @@ -85,7 +88,7 @@ pub mod tasks; | ||||
|         url = "/", | ||||
|         description = "Local server", | ||||
|     )), | ||||
|     components(schemas(PaginationView<KeyView>, PaginationView<IndexView>, IndexView, DocumentDeletionByFilter, AllBatches, BatchStats, ProgressStepView, ProgressView, BatchView, RuntimeTogglableFeatures, SwapIndexesPayload, DocumentEditionByFunction, MergeFacets, FederationOptions, SearchQueryWithIndex, Federation, FederatedSearch, FederatedSearchResult, SearchResults, SearchResultWithIndex, SimilarQuery, SimilarResult, PaginationView<serde_json::Value>, BrowseQuery, UpdateIndexRequest, IndexUid, IndexCreateRequest, KeyView, Action, CreateApiKey, UpdateStderrLogs, LogMode, GetLogs, IndexStats, Stats, HealthStatus, HealthResponse, VersionResponse, Code, ErrorType, AllTasks, TaskView, Status, DetailsView, ResponseError, Settings<Unchecked>, Settings<Checked>, TypoSettings, MinWordSizeTyposSetting, FacetingSettings, PaginationSettings, SummarizedTaskView, Kind)) | ||||
|     components(schemas(PaginationView<KeyView>, PaginationView<IndexView>, IndexView, DocumentDeletionByFilter, AllBatches, BatchStats, ProgressStepView, ProgressView, BatchView, RuntimeTogglableFeatures, SwapIndexesPayload, DocumentEditionByFunction, MergeFacets, FederationOptions, SearchQueryWithIndex, Federation, FederatedSearch, FederatedSearchResult, SearchResults, SearchResultWithIndex, SimilarQuery, SimilarResult, PaginationView<serde_json::Value>, BrowseQuery, UpdateIndexRequest, IndexUid, IndexCreateRequest, KeyView, Action, CreateApiKey, UpdateStderrLogs, LogMode, GetLogs, IndexStats, Stats, HealthStatus, HealthResponse, VersionResponse, Code, ErrorType, AllTasks, TaskView, Status, DetailsView, ResponseError, Settings<Unchecked>, Settings<Checked>, TypoSettings, MinWordSizeTyposSetting, FacetingSettings, PaginationSettings, SummarizedTaskView, Kind, Network, Remote)) | ||||
| )] | ||||
| pub struct MeilisearchApi; | ||||
|  | ||||
| @@ -103,7 +106,8 @@ pub fn configure(cfg: &mut web::ServiceConfig) { | ||||
|         .service(web::scope("/multi-search").configure(multi_search::configure)) | ||||
|         .service(web::scope("/swap-indexes").configure(swap_indexes::configure)) | ||||
|         .service(web::scope("/metrics").configure(metrics::configure)) | ||||
|         .service(web::scope("/experimental-features").configure(features::configure)); | ||||
|         .service(web::scope("/experimental-features").configure(features::configure)) | ||||
|         .service(web::scope("/network").configure(network::configure)); | ||||
|  | ||||
|     #[cfg(feature = "swagger")] | ||||
|     { | ||||
| @@ -359,9 +363,9 @@ pub async fn running() -> HttpResponse { | ||||
| #[derive(Serialize, Debug, ToSchema)] | ||||
| #[serde(rename_all = "camelCase")] | ||||
| pub struct Stats { | ||||
|     /// The size of the database, in bytes. | ||||
|     /// The disk space used by the database, in bytes. | ||||
|     pub database_size: u64, | ||||
|     #[serde(skip)] | ||||
|     /// The size of the database, in bytes. | ||||
|     pub used_database_size: u64, | ||||
|     /// The date of the last update in the RFC 3339 formats. Can be `null` if no update has ever been processed. | ||||
|     #[serde(serialize_with = "time::serde::rfc3339::option::serialize")] | ||||
| @@ -383,6 +387,7 @@ pub struct Stats { | ||||
|         (status = 200, description = "The stats of the instance", body = Stats, content_type = "application/json", example = json!( | ||||
|             { | ||||
|                 "databaseSize": 567, | ||||
|                 "usedDatabaseSize": 456, | ||||
|                 "lastUpdate": "2019-11-20T09:40:33.711324Z", | ||||
|                 "indexes": { | ||||
|                     "movies": { | ||||
|   | ||||
| @@ -20,6 +20,7 @@ use crate::routes::indexes::search::search_kind; | ||||
| use crate::search::{ | ||||
|     add_search_rules, perform_federated_search, perform_search, FederatedSearch, | ||||
|     FederatedSearchResult, RetrieveVectors, SearchQueryWithIndex, SearchResultWithIndex, | ||||
|     PROXY_SEARCH_HEADER, PROXY_SEARCH_HEADER_VALUE, | ||||
| }; | ||||
| use crate::search_queue::SearchQueue; | ||||
|  | ||||
| @@ -48,6 +49,7 @@ pub struct SearchResults { | ||||
| /// Bundle multiple search queries in a single API request. Use this endpoint to search through multiple indexes at once. | ||||
| #[utoipa::path( | ||||
|     post, | ||||
|     request_body = FederatedSearch, | ||||
|     path = "", | ||||
|     tag = "Multi-search", | ||||
|     security(("Bearer" = ["search", "*"])), | ||||
| @@ -186,18 +188,22 @@ pub async fn multi_search_with_post( | ||||
|  | ||||
|     let response = match federation { | ||||
|         Some(federation) => { | ||||
|             let search_result = tokio::task::spawn_blocking(move || { | ||||
|                 perform_federated_search(&index_scheduler, queries, federation, features) | ||||
|             }) | ||||
|             .await; | ||||
|             // check remote header | ||||
|             let is_proxy = req | ||||
|                 .headers() | ||||
|                 .get(PROXY_SEARCH_HEADER) | ||||
|                 .is_some_and(|value| value.as_bytes() == PROXY_SEARCH_HEADER_VALUE.as_bytes()); | ||||
|             let search_result = | ||||
|                 perform_federated_search(&index_scheduler, queries, federation, features, is_proxy) | ||||
|                     .await; | ||||
|             permit.drop().await; | ||||
|  | ||||
|             if let Ok(Ok(_)) = search_result { | ||||
|             if search_result.is_ok() { | ||||
|                 multi_aggregate.succeed(); | ||||
|             } | ||||
|  | ||||
|             analytics.publish(multi_aggregate, &req); | ||||
|             HttpResponse::Ok().json(search_result??) | ||||
|             HttpResponse::Ok().json(search_result?) | ||||
|         } | ||||
|         None => { | ||||
|             // Explicitly expect a `(ResponseError, usize)` for the error type rather than `ResponseError` only, | ||||
|   | ||||
| @@ -13,6 +13,8 @@ pub struct MultiSearchAggregator { | ||||
|  | ||||
|     // sum of the number of distinct indexes in each single request, use with total_received to compute an avg | ||||
|     total_distinct_index_count: usize, | ||||
|     // sum of the number of distinct remotes in each single request, use with total_received to compute an avg | ||||
|     total_distinct_remote_count: usize, | ||||
|     // number of queries with a single index, use with total_received to compute a proportion | ||||
|     total_single_index: usize, | ||||
|  | ||||
| @@ -31,46 +33,49 @@ impl MultiSearchAggregator { | ||||
|     pub fn from_federated_search(federated_search: &FederatedSearch) -> Self { | ||||
|         let use_federation = federated_search.federation.is_some(); | ||||
|  | ||||
|         let distinct_indexes: HashSet<_> = federated_search | ||||
|             .queries | ||||
|             .iter() | ||||
|             .map(|query| { | ||||
|                 let query = &query; | ||||
|                 // make sure we get a compilation error if a field gets added to / removed from SearchQueryWithIndex | ||||
|                 let SearchQueryWithIndex { | ||||
|                     index_uid, | ||||
|                     federation_options: _, | ||||
|                     q: _, | ||||
|                     vector: _, | ||||
|                     offset: _, | ||||
|                     limit: _, | ||||
|                     page: _, | ||||
|                     hits_per_page: _, | ||||
|                     attributes_to_retrieve: _, | ||||
|                     retrieve_vectors: _, | ||||
|                     attributes_to_crop: _, | ||||
|                     crop_length: _, | ||||
|                     attributes_to_highlight: _, | ||||
|                     show_ranking_score: _, | ||||
|                     show_ranking_score_details: _, | ||||
|                     show_matches_position: _, | ||||
|                     filter: _, | ||||
|                     sort: _, | ||||
|                     distinct: _, | ||||
|                     facets: _, | ||||
|                     highlight_pre_tag: _, | ||||
|                     highlight_post_tag: _, | ||||
|                     crop_marker: _, | ||||
|                     matching_strategy: _, | ||||
|                     attributes_to_search_on: _, | ||||
|                     hybrid: _, | ||||
|                     ranking_score_threshold: _, | ||||
|                     locales: _, | ||||
|                 } = query; | ||||
|         let mut distinct_indexes = HashSet::with_capacity(federated_search.queries.len()); | ||||
|         let mut distinct_remotes = HashSet::with_capacity(federated_search.queries.len()); | ||||
|  | ||||
|                 index_uid.as_str() | ||||
|             }) | ||||
|             .collect(); | ||||
|         // make sure we get a compilation error if a field gets added to / removed from SearchQueryWithIndex | ||||
|         for SearchQueryWithIndex { | ||||
|             index_uid, | ||||
|             federation_options, | ||||
|             q: _, | ||||
|             vector: _, | ||||
|             offset: _, | ||||
|             limit: _, | ||||
|             page: _, | ||||
|             hits_per_page: _, | ||||
|             attributes_to_retrieve: _, | ||||
|             retrieve_vectors: _, | ||||
|             attributes_to_crop: _, | ||||
|             crop_length: _, | ||||
|             attributes_to_highlight: _, | ||||
|             show_ranking_score: _, | ||||
|             show_ranking_score_details: _, | ||||
|             show_matches_position: _, | ||||
|             filter: _, | ||||
|             sort: _, | ||||
|             distinct: _, | ||||
|             facets: _, | ||||
|             highlight_pre_tag: _, | ||||
|             highlight_post_tag: _, | ||||
|             crop_marker: _, | ||||
|             matching_strategy: _, | ||||
|             attributes_to_search_on: _, | ||||
|             hybrid: _, | ||||
|             ranking_score_threshold: _, | ||||
|             locales: _, | ||||
|         } in &federated_search.queries | ||||
|         { | ||||
|             if let Some(federation_options) = federation_options { | ||||
|                 if let Some(remote) = &federation_options.remote { | ||||
|                     distinct_remotes.insert(remote.as_str()); | ||||
|                 } | ||||
|             } | ||||
|  | ||||
|             distinct_indexes.insert(index_uid.as_str()); | ||||
|         } | ||||
|  | ||||
|         let show_ranking_score = | ||||
|             federated_search.queries.iter().any(|query| query.show_ranking_score); | ||||
| @@ -81,6 +86,7 @@ impl MultiSearchAggregator { | ||||
|             total_received: 1, | ||||
|             total_succeeded: 0, | ||||
|             total_distinct_index_count: distinct_indexes.len(), | ||||
|             total_distinct_remote_count: distinct_remotes.len(), | ||||
|             total_single_index: if distinct_indexes.len() == 1 { 1 } else { 0 }, | ||||
|             total_search_count: federated_search.queries.len(), | ||||
|             show_ranking_score, | ||||
| @@ -110,6 +116,8 @@ impl Aggregate for MultiSearchAggregator { | ||||
|         let total_succeeded = this.total_succeeded.saturating_add(new.total_succeeded); | ||||
|         let total_distinct_index_count = | ||||
|             this.total_distinct_index_count.saturating_add(new.total_distinct_index_count); | ||||
|         let total_distinct_remote_count = | ||||
|             this.total_distinct_remote_count.saturating_add(new.total_distinct_remote_count); | ||||
|         let total_single_index = this.total_single_index.saturating_add(new.total_single_index); | ||||
|         let total_search_count = this.total_search_count.saturating_add(new.total_search_count); | ||||
|         let show_ranking_score = this.show_ranking_score || new.show_ranking_score; | ||||
| @@ -121,6 +129,7 @@ impl Aggregate for MultiSearchAggregator { | ||||
|             total_received, | ||||
|             total_succeeded, | ||||
|             total_distinct_index_count, | ||||
|             total_distinct_remote_count, | ||||
|             total_single_index, | ||||
|             total_search_count, | ||||
|             show_ranking_score, | ||||
| @@ -134,6 +143,7 @@ impl Aggregate for MultiSearchAggregator { | ||||
|             total_received, | ||||
|             total_succeeded, | ||||
|             total_distinct_index_count, | ||||
|             total_distinct_remote_count, | ||||
|             total_single_index, | ||||
|             total_search_count, | ||||
|             show_ranking_score, | ||||
| @@ -152,6 +162,10 @@ impl Aggregate for MultiSearchAggregator { | ||||
|                 "total_distinct_index_count": total_distinct_index_count, | ||||
|                 "avg_distinct_index_count": (total_distinct_index_count as f64) / (total_received as f64), // not 0 else returned early | ||||
|             }, | ||||
|             "remotes": { | ||||
|                 "total_distinct_remote_count": total_distinct_remote_count, | ||||
|                 "avg_distinct_remote_count": (total_distinct_remote_count as f64) / (total_received as f64), // not 0 else returned early | ||||
|             }, | ||||
|             "searches": { | ||||
|                 "total_search_count": total_search_count, | ||||
|                 "avg_search_count": (total_search_count as f64) / (total_received as f64), | ||||
|   | ||||
							
								
								
									
										261
									
								
								crates/meilisearch/src/routes/network.rs
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										261
									
								
								crates/meilisearch/src/routes/network.rs
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,261 @@ | ||||
| use std::collections::BTreeMap; | ||||
|  | ||||
| use actix_web::web::{self, Data}; | ||||
| use actix_web::{HttpRequest, HttpResponse}; | ||||
| use deserr::actix_web::AwebJson; | ||||
| use deserr::Deserr; | ||||
| use index_scheduler::IndexScheduler; | ||||
| use itertools::{EitherOrBoth, Itertools}; | ||||
| use meilisearch_types::deserr::DeserrJsonError; | ||||
| use meilisearch_types::error::deserr_codes::{ | ||||
|     InvalidNetworkRemotes, InvalidNetworkSearchApiKey, InvalidNetworkSelf, InvalidNetworkUrl, | ||||
| }; | ||||
| use meilisearch_types::error::ResponseError; | ||||
| use meilisearch_types::features::{Network as DbNetwork, Remote as DbRemote}; | ||||
| use meilisearch_types::keys::actions; | ||||
| use meilisearch_types::milli::update::Setting; | ||||
| use serde::Serialize; | ||||
| use tracing::debug; | ||||
| use utoipa::{OpenApi, ToSchema}; | ||||
|  | ||||
| use crate::analytics::{Aggregate, Analytics}; | ||||
| use crate::extractors::authentication::policies::ActionPolicy; | ||||
| use crate::extractors::authentication::GuardedData; | ||||
| use crate::extractors::sequential_extractor::SeqHandler; | ||||
|  | ||||
| #[derive(OpenApi)] | ||||
| #[openapi( | ||||
|     paths(get_network, patch_network), | ||||
|     tags(( | ||||
|         name = "Network", | ||||
|         description = "The `/network` route allows you to describe the topology of a network of Meilisearch instances. | ||||
|  | ||||
| This route is **synchronous**. This means that no task object will be returned, and any change to the network will be made available immediately.", | ||||
|         external_docs(url = "https://www.meilisearch.com/docs/reference/api/network"), | ||||
|     )), | ||||
| )] | ||||
| pub struct NetworkApi; | ||||
|  | ||||
| pub fn configure(cfg: &mut web::ServiceConfig) { | ||||
|     cfg.service( | ||||
|         web::resource("") | ||||
|             .route(web::get().to(get_network)) | ||||
|             .route(web::patch().to(SeqHandler(patch_network))), | ||||
|     ); | ||||
| } | ||||
|  | ||||
| /// Get network topology | ||||
| /// | ||||
| /// Get a list of all Meilisearch instances currently known to this instance. | ||||
| #[utoipa::path( | ||||
|     get, | ||||
|     path = "", | ||||
|     tag = "Network", | ||||
|     security(("Bearer" = ["network.get", "network.*", "*"])), | ||||
|     responses( | ||||
|         (status = OK, description = "Known nodes are returned", body = Network, content_type = "application/json", example = json!( | ||||
|             { | ||||
|             "self": "ms-0", | ||||
|             "remotes": { | ||||
|             "ms-0": Remote { url: Setting::Set("http://localhost:7700".into()), search_api_key: Setting::Reset }, | ||||
|             "ms-1": Remote { url: Setting::Set("http://localhost:7701".into()), search_api_key: Setting::Set("foo".into()) }, | ||||
|             "ms-2": Remote { url: Setting::Set("http://localhost:7702".into()), search_api_key: Setting::Set("bar".into()) }, | ||||
|         } | ||||
|     })), | ||||
|         (status = 401, description = "The authorization header is missing", body = ResponseError, content_type = "application/json", example = json!( | ||||
|             { | ||||
|                 "message": "The Authorization header is missing. It must use the bearer authorization method.", | ||||
|                 "code": "missing_authorization_header", | ||||
|                 "type": "auth", | ||||
|                 "link": "https://docs.meilisearch.com/errors#missing_authorization_header" | ||||
|             } | ||||
|         )), | ||||
|     ) | ||||
| )] | ||||
| async fn get_network( | ||||
|     index_scheduler: GuardedData<ActionPolicy<{ actions::NETWORK_GET }>, Data<IndexScheduler>>, | ||||
| ) -> Result<HttpResponse, ResponseError> { | ||||
|     index_scheduler.features().check_network("Using the /network route")?; | ||||
|  | ||||
|     let network = index_scheduler.network(); | ||||
|     debug!(returns = ?network, "Get network"); | ||||
|     Ok(HttpResponse::Ok().json(network)) | ||||
| } | ||||
|  | ||||
| #[derive(Debug, Deserr, ToSchema, Serialize)] | ||||
| #[deserr(error = DeserrJsonError<InvalidNetworkRemotes>, rename_all = camelCase, deny_unknown_fields)] | ||||
| #[serde(rename_all = "camelCase")] | ||||
| #[schema(rename_all = "camelCase")] | ||||
| pub struct Remote { | ||||
|     #[schema(value_type = Option<String>, example = json!({ | ||||
|         "ms-0": Remote { url: Setting::Set("http://localhost:7700".into()), search_api_key: Setting::Reset }, | ||||
|         "ms-1": Remote { url: Setting::Set("http://localhost:7701".into()), search_api_key: Setting::Set("foo".into()) }, | ||||
|         "ms-2": Remote { url: Setting::Set("http://localhost:7702".into()), search_api_key: Setting::Set("bar".into()) }, | ||||
|     }))] | ||||
|     #[deserr(default, error = DeserrJsonError<InvalidNetworkUrl>)] | ||||
|     #[serde(default)] | ||||
|     pub url: Setting<String>, | ||||
|     #[schema(value_type = Option<String>, example = json!("XWnBI8QHUc-4IlqbKPLUDuhftNq19mQtjc6JvmivzJU"))] | ||||
|     #[deserr(default, error = DeserrJsonError<InvalidNetworkSearchApiKey>)] | ||||
|     #[serde(default)] | ||||
|     pub search_api_key: Setting<String>, | ||||
| } | ||||
|  | ||||
| #[derive(Debug, Deserr, ToSchema, Serialize)] | ||||
| #[deserr(error = DeserrJsonError, rename_all = camelCase, deny_unknown_fields)] | ||||
| #[serde(rename_all = "camelCase")] | ||||
| #[schema(rename_all = "camelCase")] | ||||
| pub struct Network { | ||||
|     #[schema(value_type = Option<BTreeMap<String, Remote>>, example = json!("http://localhost:7700"))] | ||||
|     #[deserr(default, error = DeserrJsonError<InvalidNetworkRemotes>)] | ||||
|     #[serde(default)] | ||||
|     pub remotes: Setting<BTreeMap<String, Option<Remote>>>, | ||||
|     #[schema(value_type = Option<String>, example = json!("ms-00"), rename = "self")] | ||||
|     #[serde(default, rename = "self")] | ||||
|     #[deserr(default, rename = "self", error = DeserrJsonError<InvalidNetworkSelf>)] | ||||
|     pub local: Setting<String>, | ||||
| } | ||||
|  | ||||
| impl Remote { | ||||
|     pub fn try_into_db_node(self, name: &str) -> Result<DbRemote, ResponseError> { | ||||
|         Ok(DbRemote { | ||||
|             url: self.url.set().ok_or(ResponseError::from_msg( | ||||
|                 format!("Missing field `.remotes.{name}.url`"), | ||||
|                 meilisearch_types::error::Code::MissingNetworkUrl, | ||||
|             ))?, | ||||
|             search_api_key: self.search_api_key.set(), | ||||
|         }) | ||||
|     } | ||||
| } | ||||
|  | ||||
| #[derive(Serialize)] | ||||
| pub struct PatchNetworkAnalytics { | ||||
|     network_size: usize, | ||||
|     network_has_self: bool, | ||||
| } | ||||
|  | ||||
| impl Aggregate for PatchNetworkAnalytics { | ||||
|     fn event_name(&self) -> &'static str { | ||||
|         "Network Updated" | ||||
|     } | ||||
|  | ||||
|     fn aggregate(self: Box<Self>, new: Box<Self>) -> Box<Self> { | ||||
|         Box::new(Self { network_size: new.network_size, network_has_self: new.network_has_self }) | ||||
|     } | ||||
|  | ||||
|     fn into_event(self: Box<Self>) -> serde_json::Value { | ||||
|         serde_json::to_value(*self).unwrap_or_default() | ||||
|     } | ||||
| } | ||||
|  | ||||
| /// Configure Network | ||||
| /// | ||||
| /// Add or remove nodes from network. | ||||
| #[utoipa::path( | ||||
|     patch, | ||||
|     path = "", | ||||
|     tag = "Network", | ||||
|     request_body = Network, | ||||
|     security(("Bearer" = ["network.update", "network.*", "*"])), | ||||
|     responses( | ||||
|         (status = OK, description = "New network state is returned",  body = Network, content_type = "application/json", example = json!( | ||||
|             { | ||||
|                 "self": "ms-0", | ||||
|                 "remotes": { | ||||
|                 "ms-0": Remote { url: Setting::Set("http://localhost:7700".into()), search_api_key: Setting::Reset }, | ||||
|                 "ms-1": Remote { url: Setting::Set("http://localhost:7701".into()), search_api_key: Setting::Set("foo".into()) }, | ||||
|                 "ms-2": Remote { url: Setting::Set("http://localhost:7702".into()), search_api_key: Setting::Set("bar".into()) }, | ||||
|             } | ||||
|         })), | ||||
|         (status = 401, description = "The authorization header is missing", body = ResponseError, content_type = "application/json", example = json!( | ||||
|             { | ||||
|                 "message": "The Authorization header is missing. It must use the bearer authorization method.", | ||||
|                 "code": "missing_authorization_header", | ||||
|                 "type": "auth", | ||||
|                 "link": "https://docs.meilisearch.com/errors#missing_authorization_header" | ||||
|             } | ||||
|         )), | ||||
|     ) | ||||
| )] | ||||
| async fn patch_network( | ||||
|     index_scheduler: GuardedData<ActionPolicy<{ actions::NETWORK_UPDATE }>, Data<IndexScheduler>>, | ||||
|     new_network: AwebJson<Network, DeserrJsonError>, | ||||
|     req: HttpRequest, | ||||
|     analytics: Data<Analytics>, | ||||
| ) -> Result<HttpResponse, ResponseError> { | ||||
|     index_scheduler.features().check_network("Using the /network route")?; | ||||
|  | ||||
|     let new_network = new_network.0; | ||||
|     let old_network = index_scheduler.network(); | ||||
|     debug!(parameters = ?new_network, "Patch network"); | ||||
|  | ||||
|     let merged_self = match new_network.local { | ||||
|         Setting::Set(new_self) => Some(new_self), | ||||
|         Setting::Reset => None, | ||||
|         Setting::NotSet => old_network.local, | ||||
|     }; | ||||
|  | ||||
|     let merged_remotes = match new_network.remotes { | ||||
|         Setting::Set(new_remotes) => { | ||||
|             let mut merged_remotes = BTreeMap::new(); | ||||
|             for either_or_both in old_network | ||||
|                 .remotes | ||||
|                 .into_iter() | ||||
|                 .merge_join_by(new_remotes.into_iter(), |left, right| left.0.cmp(&right.0)) | ||||
|             { | ||||
|                 match either_or_both { | ||||
|                     EitherOrBoth::Both((key, old), (_, Some(new))) => { | ||||
|                         let DbRemote { url: old_url, search_api_key: old_search_api_key } = old; | ||||
|  | ||||
|                         let Remote { url: new_url, search_api_key: new_search_api_key } = new; | ||||
|  | ||||
|                         let merged = DbRemote { | ||||
|                             url: match new_url { | ||||
|                                 Setting::Set(new_url) => new_url, | ||||
|                                 Setting::Reset => { | ||||
|                                     return Err(ResponseError::from_msg( | ||||
|                                         format!( | ||||
|                                             "Field `.remotes.{key}.url` cannot be set to `null`" | ||||
|                                         ), | ||||
|                                         meilisearch_types::error::Code::InvalidNetworkUrl, | ||||
|                                     )) | ||||
|                                 } | ||||
|                                 Setting::NotSet => old_url, | ||||
|                             }, | ||||
|                             search_api_key: match new_search_api_key { | ||||
|                                 Setting::Set(new_search_api_key) => Some(new_search_api_key), | ||||
|                                 Setting::Reset => None, | ||||
|                                 Setting::NotSet => old_search_api_key, | ||||
|                             }, | ||||
|                         }; | ||||
|                         merged_remotes.insert(key, merged); | ||||
|                     } | ||||
|                     EitherOrBoth::Both((_, _), (_, None)) | EitherOrBoth::Right((_, None)) => {} | ||||
|                     EitherOrBoth::Left((key, node)) => { | ||||
|                         merged_remotes.insert(key, node); | ||||
|                     } | ||||
|                     EitherOrBoth::Right((key, Some(node))) => { | ||||
|                         let node = node.try_into_db_node(&key)?; | ||||
|                         merged_remotes.insert(key, node); | ||||
|                     } | ||||
|                 } | ||||
|             } | ||||
|             merged_remotes | ||||
|         } | ||||
|         Setting::Reset => BTreeMap::new(), | ||||
|         Setting::NotSet => old_network.remotes, | ||||
|     }; | ||||
|  | ||||
|     analytics.publish( | ||||
|         PatchNetworkAnalytics { | ||||
|             network_size: merged_remotes.len(), | ||||
|             network_has_self: merged_self.is_some(), | ||||
|         }, | ||||
|         &req, | ||||
|     ); | ||||
|  | ||||
|     let merged_network = DbNetwork { local: merged_self, remotes: merged_remotes }; | ||||
|     index_scheduler.put_network(merged_network.clone())?; | ||||
|     debug!(returns = ?merged_network, "Patch network"); | ||||
|     Ok(HttpResponse::Ok().json(merged_network)) | ||||
| } | ||||
| @@ -1,3 +1,5 @@ | ||||
| use std::io::ErrorKind; | ||||
|  | ||||
| use actix_web::web::Data; | ||||
| use actix_web::{web, HttpRequest, HttpResponse}; | ||||
| use deserr::actix_web::AwebQueryParameter; | ||||
| @@ -16,6 +18,7 @@ use serde::Serialize; | ||||
| use time::format_description::well_known::Rfc3339; | ||||
| use time::macros::format_description; | ||||
| use time::{Date, Duration, OffsetDateTime, Time}; | ||||
| use tokio::io::AsyncReadExt; | ||||
| use tokio::task; | ||||
| use utoipa::{IntoParams, OpenApi, ToSchema}; | ||||
|  | ||||
| @@ -44,7 +47,11 @@ pub fn configure(cfg: &mut web::ServiceConfig) { | ||||
|             .route(web::delete().to(SeqHandler(delete_tasks))), | ||||
|     ) | ||||
|     .service(web::resource("/cancel").route(web::post().to(SeqHandler(cancel_tasks)))) | ||||
|     .service(web::resource("/{task_id}").route(web::get().to(SeqHandler(get_task)))); | ||||
|     .service(web::resource("/{task_id}").route(web::get().to(SeqHandler(get_task)))) | ||||
|     .service( | ||||
|         web::resource("/{task_id}/documents") | ||||
|             .route(web::get().to(SeqHandler(get_task_documents_file))), | ||||
|     ); | ||||
| } | ||||
|  | ||||
| #[derive(Debug, Deserr, IntoParams)] | ||||
| @@ -639,6 +646,76 @@ async fn get_task( | ||||
|     } | ||||
| } | ||||
|  | ||||
| /// Get a task's documents. | ||||
| /// | ||||
| /// Get a [task's documents file](https://www.meilisearch.com/docs/learn/async/asynchronous_operations). | ||||
| #[utoipa::path( | ||||
|     get, | ||||
|     path = "/{taskUid}/documents", | ||||
|     tag = "Tasks", | ||||
|     security(("Bearer" = ["tasks.get", "tasks.*", "*"])), | ||||
|     params(("taskUid", format = UInt32, example = 0, description = "The task identifier", nullable = false)), | ||||
|     responses( | ||||
|         (status = 200, description = "The content of the task update", body = serde_json::Value, content_type = "application/x-ndjson"), | ||||
|         (status = 401, description = "The authorization header is missing", body = ResponseError, content_type = "application/json", example = json!( | ||||
|             { | ||||
|                 "message": "The Authorization header is missing. It must use the bearer authorization method.", | ||||
|                 "code": "missing_authorization_header", | ||||
|                 "type": "auth", | ||||
|                 "link": "https://docs.meilisearch.com/errors#missing_authorization_header" | ||||
|             } | ||||
|         )), | ||||
|         (status = 404, description = "The task uid does not exists", body = ResponseError, content_type = "application/json", example = json!( | ||||
|             { | ||||
|                 "message": "Task :taskUid not found.", | ||||
|                 "code": "task_not_found", | ||||
|                 "type": "invalid_request", | ||||
|                 "link": "https://docs.meilisearch.com/errors/#task_not_found" | ||||
|             } | ||||
|         )) | ||||
|     ) | ||||
| )] | ||||
| async fn get_task_documents_file( | ||||
|     index_scheduler: GuardedData<ActionPolicy<{ actions::TASKS_GET }>, Data<IndexScheduler>>, | ||||
|     task_uid: web::Path<String>, | ||||
| ) -> Result<HttpResponse, ResponseError> { | ||||
|     index_scheduler.features().check_get_task_documents_route()?; | ||||
|     let task_uid_string = task_uid.into_inner(); | ||||
|  | ||||
|     let task_uid: TaskId = match task_uid_string.parse() { | ||||
|         Ok(id) => id, | ||||
|         Err(_e) => { | ||||
|             return Err(index_scheduler::Error::InvalidTaskUid { task_uid: task_uid_string }.into()) | ||||
|         } | ||||
|     }; | ||||
|  | ||||
|     let query = index_scheduler::Query { uids: Some(vec![task_uid]), ..Query::default() }; | ||||
|     let filters = index_scheduler.filters(); | ||||
|     let (tasks, _) = index_scheduler.get_tasks_from_authorized_indexes(&query, filters)?; | ||||
|  | ||||
|     if let Some(task) = tasks.first() { | ||||
|         match task.content_uuid() { | ||||
|             Some(uuid) => { | ||||
|                 let mut tfile = match index_scheduler.queue.update_file(uuid) { | ||||
|                     Ok(file) => tokio::fs::File::from_std(file), | ||||
|                     Err(file_store::Error::IoError(e)) if e.kind() == ErrorKind::NotFound => { | ||||
|                         return Err(index_scheduler::Error::TaskFileNotFound(task_uid).into()) | ||||
|                     } | ||||
|                     Err(e) => return Err(e.into()), | ||||
|                 }; | ||||
|                 // Yes, that's awful to put everything in memory when we could have streamed it from | ||||
|                 // disk but it's really (really) complex to do with the current state of async Rust. | ||||
|                 let mut content = String::new(); | ||||
|                 tfile.read_to_string(&mut content).await?; | ||||
|                 Ok(HttpResponse::Ok().content_type("application/x-ndjson").body(content)) | ||||
|             } | ||||
|             None => Err(index_scheduler::Error::TaskFileNotFound(task_uid).into()), | ||||
|         } | ||||
|     } else { | ||||
|         Err(index_scheduler::Error::TaskNotFound(task_uid).into()) | ||||
|     } | ||||
| } | ||||
|  | ||||
| pub enum DeserializeDateOption { | ||||
|     Before, | ||||
|     After, | ||||
|   | ||||
| @@ -1,923 +0,0 @@ | ||||
| use std::cmp::Ordering; | ||||
| use std::collections::BTreeMap; | ||||
| use std::fmt; | ||||
| use std::iter::Zip; | ||||
| use std::rc::Rc; | ||||
| use std::str::FromStr as _; | ||||
| use std::time::Duration; | ||||
| use std::vec::{IntoIter, Vec}; | ||||
|  | ||||
| use actix_http::StatusCode; | ||||
| use index_scheduler::{IndexScheduler, RoFeatures}; | ||||
| use indexmap::IndexMap; | ||||
| use meilisearch_types::deserr::DeserrJsonError; | ||||
| use meilisearch_types::error::deserr_codes::{ | ||||
|     InvalidMultiSearchFacetsByIndex, InvalidMultiSearchMaxValuesPerFacet, | ||||
|     InvalidMultiSearchMergeFacets, InvalidMultiSearchWeight, InvalidSearchLimit, | ||||
|     InvalidSearchOffset, | ||||
| }; | ||||
| use meilisearch_types::error::ResponseError; | ||||
| use meilisearch_types::index_uid::IndexUid; | ||||
| use meilisearch_types::milli::score_details::{ScoreDetails, ScoreValue}; | ||||
| use meilisearch_types::milli::{self, DocumentId, OrderBy, TimeBudget}; | ||||
| use roaring::RoaringBitmap; | ||||
| use serde::Serialize; | ||||
| use utoipa::ToSchema; | ||||
|  | ||||
| use super::ranking_rules::{self, RankingRules}; | ||||
| use super::{ | ||||
|     compute_facet_distribution_stats, prepare_search, AttributesFormat, ComputedFacets, FacetStats, | ||||
|     HitMaker, HitsInfo, RetrieveVectors, SearchHit, SearchKind, SearchQuery, SearchQueryWithIndex, | ||||
| }; | ||||
| use crate::error::MeilisearchHttpError; | ||||
| use crate::routes::indexes::search::search_kind; | ||||
|  | ||||
| pub const DEFAULT_FEDERATED_WEIGHT: f64 = 1.0; | ||||
|  | ||||
| #[derive(Debug, Default, Clone, Copy, PartialEq, deserr::Deserr, ToSchema)] | ||||
| #[deserr(error = DeserrJsonError, rename_all = camelCase, deny_unknown_fields)] | ||||
| pub struct FederationOptions { | ||||
|     #[deserr(default, error = DeserrJsonError<InvalidMultiSearchWeight>)] | ||||
|     #[schema(value_type = f64)] | ||||
|     pub weight: Weight, | ||||
| } | ||||
|  | ||||
| #[derive(Debug, Clone, Copy, PartialEq, deserr::Deserr)] | ||||
| #[deserr(try_from(f64) = TryFrom::try_from -> InvalidMultiSearchWeight)] | ||||
| pub struct Weight(f64); | ||||
|  | ||||
| impl Default for Weight { | ||||
|     fn default() -> Self { | ||||
|         Weight(DEFAULT_FEDERATED_WEIGHT) | ||||
|     } | ||||
| } | ||||
|  | ||||
| impl std::convert::TryFrom<f64> for Weight { | ||||
|     type Error = InvalidMultiSearchWeight; | ||||
|  | ||||
|     fn try_from(f: f64) -> Result<Self, Self::Error> { | ||||
|         if f < 0.0 { | ||||
|             Err(InvalidMultiSearchWeight) | ||||
|         } else { | ||||
|             Ok(Weight(f)) | ||||
|         } | ||||
|     } | ||||
| } | ||||
|  | ||||
| impl std::ops::Deref for Weight { | ||||
|     type Target = f64; | ||||
|  | ||||
|     fn deref(&self) -> &Self::Target { | ||||
|         &self.0 | ||||
|     } | ||||
| } | ||||
|  | ||||
| #[derive(Debug, deserr::Deserr, ToSchema)] | ||||
| #[deserr(error = DeserrJsonError, rename_all = camelCase, deny_unknown_fields)] | ||||
| #[schema(rename_all = "camelCase")] | ||||
| pub struct Federation { | ||||
|     #[deserr(default = super::DEFAULT_SEARCH_LIMIT(), error = DeserrJsonError<InvalidSearchLimit>)] | ||||
|     pub limit: usize, | ||||
|     #[deserr(default = super::DEFAULT_SEARCH_OFFSET(), error = DeserrJsonError<InvalidSearchOffset>)] | ||||
|     pub offset: usize, | ||||
|     #[deserr(default, error = DeserrJsonError<InvalidMultiSearchFacetsByIndex>)] | ||||
|     pub facets_by_index: BTreeMap<IndexUid, Option<Vec<String>>>, | ||||
|     #[deserr(default, error = DeserrJsonError<InvalidMultiSearchMergeFacets>)] | ||||
|     pub merge_facets: Option<MergeFacets>, | ||||
| } | ||||
|  | ||||
| #[derive(Copy, Clone, Debug, deserr::Deserr, Default, ToSchema)] | ||||
| #[deserr(error = DeserrJsonError<InvalidMultiSearchMergeFacets>, rename_all = camelCase, deny_unknown_fields)] | ||||
| #[schema(rename_all = "camelCase")] | ||||
| pub struct MergeFacets { | ||||
|     #[deserr(default, error = DeserrJsonError<InvalidMultiSearchMaxValuesPerFacet>)] | ||||
|     pub max_values_per_facet: Option<usize>, | ||||
| } | ||||
|  | ||||
| #[derive(Debug, deserr::Deserr, ToSchema)] | ||||
| #[deserr(error = DeserrJsonError, rename_all = camelCase, deny_unknown_fields)] | ||||
| #[schema(rename_all = "camelCase")] | ||||
| pub struct FederatedSearch { | ||||
|     pub queries: Vec<SearchQueryWithIndex>, | ||||
|     #[deserr(default)] | ||||
|     pub federation: Option<Federation>, | ||||
| } | ||||
|  | ||||
| #[derive(Serialize, Clone, ToSchema)] | ||||
| #[serde(rename_all = "camelCase")] | ||||
| #[schema(rename_all = "camelCase")] | ||||
| pub struct FederatedSearchResult { | ||||
|     pub hits: Vec<SearchHit>, | ||||
|     pub processing_time_ms: u128, | ||||
|     #[serde(flatten)] | ||||
|     pub hits_info: HitsInfo, | ||||
|  | ||||
|     #[serde(skip_serializing_if = "Option::is_none")] | ||||
|     pub semantic_hit_count: Option<u32>, | ||||
|  | ||||
|     #[serde(skip_serializing_if = "Option::is_none")] | ||||
|     #[schema(value_type = Option<BTreeMap<String, BTreeMap<String, u64>>>)] | ||||
|     pub facet_distribution: Option<BTreeMap<String, IndexMap<String, u64>>>, | ||||
|     #[serde(skip_serializing_if = "Option::is_none")] | ||||
|     pub facet_stats: Option<BTreeMap<String, FacetStats>>, | ||||
|     #[serde(skip_serializing_if = "FederatedFacets::is_empty")] | ||||
|     pub facets_by_index: FederatedFacets, | ||||
|  | ||||
|     // These fields are only used for analytics purposes | ||||
|     #[serde(skip)] | ||||
|     pub degraded: bool, | ||||
|     #[serde(skip)] | ||||
|     pub used_negative_operator: bool, | ||||
| } | ||||
|  | ||||
| impl fmt::Debug for FederatedSearchResult { | ||||
|     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { | ||||
|         let FederatedSearchResult { | ||||
|             hits, | ||||
|             processing_time_ms, | ||||
|             hits_info, | ||||
|             semantic_hit_count, | ||||
|             degraded, | ||||
|             used_negative_operator, | ||||
|             facet_distribution, | ||||
|             facet_stats, | ||||
|             facets_by_index, | ||||
|         } = self; | ||||
|  | ||||
|         let mut debug = f.debug_struct("SearchResult"); | ||||
|         // The most important thing when looking at a search result is the time it took to process | ||||
|         debug.field("processing_time_ms", &processing_time_ms); | ||||
|         debug.field("hits", &format!("[{} hits returned]", hits.len())); | ||||
|         debug.field("hits_info", &hits_info); | ||||
|         if *used_negative_operator { | ||||
|             debug.field("used_negative_operator", used_negative_operator); | ||||
|         } | ||||
|         if *degraded { | ||||
|             debug.field("degraded", degraded); | ||||
|         } | ||||
|         if let Some(facet_distribution) = facet_distribution { | ||||
|             debug.field("facet_distribution", &facet_distribution); | ||||
|         } | ||||
|         if let Some(facet_stats) = facet_stats { | ||||
|             debug.field("facet_stats", &facet_stats); | ||||
|         } | ||||
|         if let Some(semantic_hit_count) = semantic_hit_count { | ||||
|             debug.field("semantic_hit_count", &semantic_hit_count); | ||||
|         } | ||||
|         if !facets_by_index.is_empty() { | ||||
|             debug.field("facets_by_index", &facets_by_index); | ||||
|         } | ||||
|  | ||||
|         debug.finish() | ||||
|     } | ||||
| } | ||||
|  | ||||
| struct WeightedScore<'a> { | ||||
|     details: &'a [ScoreDetails], | ||||
|     weight: f64, | ||||
| } | ||||
|  | ||||
| impl<'a> WeightedScore<'a> { | ||||
|     pub fn new(details: &'a [ScoreDetails], weight: f64) -> Self { | ||||
|         Self { details, weight } | ||||
|     } | ||||
|  | ||||
|     pub fn weighted_global_score(&self) -> f64 { | ||||
|         ScoreDetails::global_score(self.details.iter()) * self.weight | ||||
|     } | ||||
|  | ||||
|     pub fn compare_weighted_global_scores(&self, other: &Self) -> Ordering { | ||||
|         self.weighted_global_score() | ||||
|             .partial_cmp(&other.weighted_global_score()) | ||||
|             // both are numbers, possibly infinite | ||||
|             .unwrap() | ||||
|     } | ||||
|  | ||||
|     pub fn compare(&self, other: &Self) -> Ordering { | ||||
|         let mut left_it = ScoreDetails::score_values(self.details.iter()); | ||||
|         let mut right_it = ScoreDetails::score_values(other.details.iter()); | ||||
|  | ||||
|         loop { | ||||
|             let left = left_it.next(); | ||||
|             let right = right_it.next(); | ||||
|  | ||||
|             match (left, right) { | ||||
|                 (None, None) => return Ordering::Equal, | ||||
|                 (None, Some(_)) => return Ordering::Less, | ||||
|                 (Some(_), None) => return Ordering::Greater, | ||||
|                 (Some(ScoreValue::Score(left)), Some(ScoreValue::Score(right))) => { | ||||
|                     let left = left * self.weight; | ||||
|                     let right = right * other.weight; | ||||
|                     if (left - right).abs() <= f64::EPSILON { | ||||
|                         continue; | ||||
|                     } | ||||
|                     return left.partial_cmp(&right).unwrap(); | ||||
|                 } | ||||
|                 (Some(ScoreValue::Sort(left)), Some(ScoreValue::Sort(right))) => { | ||||
|                     match left.partial_cmp(right) { | ||||
|                         Some(Ordering::Equal) => continue, | ||||
|                         Some(order) => return order, | ||||
|                         None => return self.compare_weighted_global_scores(other), | ||||
|                     } | ||||
|                 } | ||||
|                 (Some(ScoreValue::GeoSort(left)), Some(ScoreValue::GeoSort(right))) => { | ||||
|                     match left.partial_cmp(right) { | ||||
|                         Some(Ordering::Equal) => continue, | ||||
|                         Some(order) => return order, | ||||
|                         None => { | ||||
|                             return self.compare_weighted_global_scores(other); | ||||
|                         } | ||||
|                     } | ||||
|                 } | ||||
|                 // not comparable details, use global | ||||
|                 (Some(ScoreValue::Score(_)), Some(_)) | ||||
|                 | (Some(_), Some(ScoreValue::Score(_))) | ||||
|                 | (Some(ScoreValue::GeoSort(_)), Some(ScoreValue::Sort(_))) | ||||
|                 | (Some(ScoreValue::Sort(_)), Some(ScoreValue::GeoSort(_))) => { | ||||
|                     let left_count = left_it.count(); | ||||
|                     let right_count = right_it.count(); | ||||
|                     // compare how many remaining groups of rules each side has. | ||||
|                     // the group with the most remaining groups wins. | ||||
|                     return left_count | ||||
|                         .cmp(&right_count) | ||||
|                         // breaks ties with the global ranking score | ||||
|                         .then_with(|| self.compare_weighted_global_scores(other)); | ||||
|                 } | ||||
|             } | ||||
|         } | ||||
|     } | ||||
| } | ||||
|  | ||||
| struct QueryByIndex { | ||||
|     query: SearchQuery, | ||||
|     federation_options: FederationOptions, | ||||
|     query_index: usize, | ||||
| } | ||||
|  | ||||
| struct SearchResultByQuery<'a> { | ||||
|     documents_ids: Vec<DocumentId>, | ||||
|     document_scores: Vec<Vec<ScoreDetails>>, | ||||
|     federation_options: FederationOptions, | ||||
|     hit_maker: HitMaker<'a>, | ||||
|     query_index: usize, | ||||
| } | ||||
|  | ||||
| struct SearchResultByQueryIter<'a> { | ||||
|     it: Zip<IntoIter<DocumentId>, IntoIter<Vec<ScoreDetails>>>, | ||||
|     federation_options: FederationOptions, | ||||
|     hit_maker: Rc<HitMaker<'a>>, | ||||
|     query_index: usize, | ||||
| } | ||||
|  | ||||
| impl<'a> SearchResultByQueryIter<'a> { | ||||
|     fn new( | ||||
|         SearchResultByQuery { | ||||
|             documents_ids, | ||||
|             document_scores, | ||||
|             federation_options, | ||||
|             hit_maker, | ||||
|             query_index, | ||||
|         }: SearchResultByQuery<'a>, | ||||
|     ) -> Self { | ||||
|         let it = documents_ids.into_iter().zip(document_scores); | ||||
|         Self { it, federation_options, hit_maker: Rc::new(hit_maker), query_index } | ||||
|     } | ||||
| } | ||||
|  | ||||
| struct SearchResultByQueryIterItem<'a> { | ||||
|     docid: DocumentId, | ||||
|     score: Vec<ScoreDetails>, | ||||
|     federation_options: FederationOptions, | ||||
|     hit_maker: Rc<HitMaker<'a>>, | ||||
|     query_index: usize, | ||||
| } | ||||
|  | ||||
| fn merge_index_local_results( | ||||
|     results_by_query: Vec<SearchResultByQuery<'_>>, | ||||
| ) -> impl Iterator<Item = SearchResultByQueryIterItem> + '_ { | ||||
|     itertools::kmerge_by( | ||||
|         results_by_query.into_iter().map(SearchResultByQueryIter::new), | ||||
|         |left: &SearchResultByQueryIterItem, right: &SearchResultByQueryIterItem| { | ||||
|             let left_score = WeightedScore::new(&left.score, *left.federation_options.weight); | ||||
|             let right_score = WeightedScore::new(&right.score, *right.federation_options.weight); | ||||
|  | ||||
|             match left_score.compare(&right_score) { | ||||
|                 // the biggest score goes first | ||||
|                 Ordering::Greater => true, | ||||
|                 // break ties using query index | ||||
|                 Ordering::Equal => left.query_index < right.query_index, | ||||
|                 Ordering::Less => false, | ||||
|             } | ||||
|         }, | ||||
|     ) | ||||
| } | ||||
|  | ||||
| fn merge_index_global_results( | ||||
|     results_by_index: Vec<SearchResultByIndex>, | ||||
| ) -> impl Iterator<Item = SearchHitByIndex> { | ||||
|     itertools::kmerge_by( | ||||
|         results_by_index.into_iter().map(|result_by_index| result_by_index.hits.into_iter()), | ||||
|         |left: &SearchHitByIndex, right: &SearchHitByIndex| { | ||||
|             let left_score = WeightedScore::new(&left.score, *left.federation_options.weight); | ||||
|             let right_score = WeightedScore::new(&right.score, *right.federation_options.weight); | ||||
|  | ||||
|             match left_score.compare(&right_score) { | ||||
|                 // the biggest score goes first | ||||
|                 Ordering::Greater => true, | ||||
|                 // break ties using query index | ||||
|                 Ordering::Equal => left.query_index < right.query_index, | ||||
|                 Ordering::Less => false, | ||||
|             } | ||||
|         }, | ||||
|     ) | ||||
| } | ||||
|  | ||||
| impl<'a> Iterator for SearchResultByQueryIter<'a> { | ||||
|     type Item = SearchResultByQueryIterItem<'a>; | ||||
|  | ||||
|     fn next(&mut self) -> Option<Self::Item> { | ||||
|         let (docid, score) = self.it.next()?; | ||||
|         Some(SearchResultByQueryIterItem { | ||||
|             docid, | ||||
|             score, | ||||
|             federation_options: self.federation_options, | ||||
|             hit_maker: Rc::clone(&self.hit_maker), | ||||
|             query_index: self.query_index, | ||||
|         }) | ||||
|     } | ||||
| } | ||||
|  | ||||
| struct SearchHitByIndex { | ||||
|     hit: SearchHit, | ||||
|     score: Vec<ScoreDetails>, | ||||
|     federation_options: FederationOptions, | ||||
|     query_index: usize, | ||||
| } | ||||
|  | ||||
| struct SearchResultByIndex { | ||||
|     index: String, | ||||
|     hits: Vec<SearchHitByIndex>, | ||||
|     estimated_total_hits: usize, | ||||
|     degraded: bool, | ||||
|     used_negative_operator: bool, | ||||
|     facets: Option<ComputedFacets>, | ||||
| } | ||||
|  | ||||
| #[derive(Debug, Clone, Default, Serialize, ToSchema)] | ||||
| pub struct FederatedFacets(pub BTreeMap<String, ComputedFacets>); | ||||
|  | ||||
| impl FederatedFacets { | ||||
|     pub fn insert(&mut self, index: String, facets: Option<ComputedFacets>) { | ||||
|         if let Some(facets) = facets { | ||||
|             self.0.insert(index, facets); | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     pub fn is_empty(&self) -> bool { | ||||
|         self.0.is_empty() | ||||
|     } | ||||
|  | ||||
|     pub fn merge( | ||||
|         self, | ||||
|         MergeFacets { max_values_per_facet }: MergeFacets, | ||||
|         facet_order: BTreeMap<String, (String, OrderBy)>, | ||||
|     ) -> Option<ComputedFacets> { | ||||
|         if self.is_empty() { | ||||
|             return None; | ||||
|         } | ||||
|  | ||||
|         let mut distribution: BTreeMap<String, _> = Default::default(); | ||||
|         let mut stats: BTreeMap<String, FacetStats> = Default::default(); | ||||
|  | ||||
|         for facets_by_index in self.0.into_values() { | ||||
|             for (facet, index_distribution) in facets_by_index.distribution { | ||||
|                 match distribution.entry(facet) { | ||||
|                     std::collections::btree_map::Entry::Vacant(entry) => { | ||||
|                         entry.insert(index_distribution); | ||||
|                     } | ||||
|                     std::collections::btree_map::Entry::Occupied(mut entry) => { | ||||
|                         let distribution = entry.get_mut(); | ||||
|  | ||||
|                         for (value, index_count) in index_distribution { | ||||
|                             distribution | ||||
|                                 .entry(value) | ||||
|                                 .and_modify(|count| *count += index_count) | ||||
|                                 .or_insert(index_count); | ||||
|                         } | ||||
|                     } | ||||
|                 } | ||||
|             } | ||||
|  | ||||
|             for (facet, index_stats) in facets_by_index.stats { | ||||
|                 match stats.entry(facet) { | ||||
|                     std::collections::btree_map::Entry::Vacant(entry) => { | ||||
|                         entry.insert(index_stats); | ||||
|                     } | ||||
|                     std::collections::btree_map::Entry::Occupied(mut entry) => { | ||||
|                         let stats = entry.get_mut(); | ||||
|  | ||||
|                         stats.min = f64::min(stats.min, index_stats.min); | ||||
|                         stats.max = f64::max(stats.max, index_stats.max); | ||||
|                     } | ||||
|                 } | ||||
|             } | ||||
|         } | ||||
|  | ||||
|         // fixup order | ||||
|         for (facet, values) in &mut distribution { | ||||
|             let order_by = facet_order.get(facet).map(|(_, order)| *order).unwrap_or_default(); | ||||
|  | ||||
|             match order_by { | ||||
|                 OrderBy::Lexicographic => { | ||||
|                     values.sort_unstable_by(|left, _, right, _| left.cmp(right)) | ||||
|                 } | ||||
|                 OrderBy::Count => { | ||||
|                     values.sort_unstable_by(|_, left, _, right| { | ||||
|                         left.cmp(right) | ||||
|                             // biggest first | ||||
|                             .reverse() | ||||
|                     }) | ||||
|                 } | ||||
|             } | ||||
|  | ||||
|             if let Some(max_values_per_facet) = max_values_per_facet { | ||||
|                 values.truncate(max_values_per_facet) | ||||
|             }; | ||||
|         } | ||||
|  | ||||
|         Some(ComputedFacets { distribution, stats }) | ||||
|     } | ||||
| } | ||||
|  | ||||
| pub fn perform_federated_search( | ||||
|     index_scheduler: &IndexScheduler, | ||||
|     queries: Vec<SearchQueryWithIndex>, | ||||
|     mut federation: Federation, | ||||
|     features: RoFeatures, | ||||
| ) -> Result<FederatedSearchResult, ResponseError> { | ||||
|     let before_search = std::time::Instant::now(); | ||||
|  | ||||
|     // this implementation partition the queries by index to guarantee an important property: | ||||
|     // - all the queries to a particular index use the same read transaction. | ||||
|     // This is an important property, otherwise we cannot guarantee the self-consistency of the results. | ||||
|  | ||||
|     // 1. partition queries by index | ||||
|     let mut queries_by_index: BTreeMap<String, Vec<QueryByIndex>> = Default::default(); | ||||
|     for (query_index, federated_query) in queries.into_iter().enumerate() { | ||||
|         if let Some(pagination_field) = federated_query.has_pagination() { | ||||
|             return Err(MeilisearchHttpError::PaginationInFederatedQuery( | ||||
|                 query_index, | ||||
|                 pagination_field, | ||||
|             ) | ||||
|             .into()); | ||||
|         } | ||||
|  | ||||
|         if let Some(facets) = federated_query.has_facets() { | ||||
|             let facets = facets.to_owned(); | ||||
|             return Err(MeilisearchHttpError::FacetsInFederatedQuery( | ||||
|                 query_index, | ||||
|                 federated_query.index_uid.into_inner(), | ||||
|                 facets, | ||||
|             ) | ||||
|             .into()); | ||||
|         } | ||||
|  | ||||
|         let (index_uid, query, federation_options) = federated_query.into_index_query_federation(); | ||||
|  | ||||
|         queries_by_index.entry(index_uid.into_inner()).or_default().push(QueryByIndex { | ||||
|             query, | ||||
|             federation_options: federation_options.unwrap_or_default(), | ||||
|             query_index, | ||||
|         }) | ||||
|     } | ||||
|  | ||||
|     // 2. perform queries, merge and make hits index by index | ||||
|     let required_hit_count = federation.limit + federation.offset; | ||||
|  | ||||
|     // In step (2), semantic_hit_count will be set to Some(0) if any search kind uses semantic | ||||
|     // Then in step (3), we'll update its value if there is any semantic search | ||||
|     let mut semantic_hit_count = None; | ||||
|     let mut results_by_index = Vec::with_capacity(queries_by_index.len()); | ||||
|     let mut previous_query_data: Option<(RankingRules, usize, String)> = None; | ||||
|  | ||||
|     // remember the order and name of first index for each facet when merging with index settings | ||||
|     // to detect if the order is inconsistent for a facet. | ||||
|     let mut facet_order: Option<BTreeMap<String, (String, OrderBy)>> = match federation.merge_facets | ||||
|     { | ||||
|         Some(MergeFacets { .. }) => Some(Default::default()), | ||||
|         _ => None, | ||||
|     }; | ||||
|  | ||||
|     for (index_uid, queries) in queries_by_index { | ||||
|         let first_query_index = queries.first().map(|query| query.query_index); | ||||
|  | ||||
|         let index = match index_scheduler.index(&index_uid) { | ||||
|             Ok(index) => index, | ||||
|             Err(err) => { | ||||
|                 let mut err = ResponseError::from(err); | ||||
|                 // Patch the HTTP status code to 400 as it defaults to 404 for `index_not_found`, but | ||||
|                 // here the resource not found is not part of the URL. | ||||
|                 err.code = StatusCode::BAD_REQUEST; | ||||
|                 if let Some(query_index) = first_query_index { | ||||
|                     err.message = format!("Inside `.queries[{}]`: {}", query_index, err.message); | ||||
|                 } | ||||
|                 return Err(err); | ||||
|             } | ||||
|         }; | ||||
|  | ||||
|         // Important: this is the only transaction we'll use for this index during this federated search | ||||
|         let rtxn = index.read_txn()?; | ||||
|  | ||||
|         let criteria = index.criteria(&rtxn)?; | ||||
|  | ||||
|         let dictionary = index.dictionary(&rtxn)?; | ||||
|         let dictionary: Option<Vec<_>> = | ||||
|             dictionary.as_ref().map(|x| x.iter().map(String::as_str).collect()); | ||||
|         let separators = index.allowed_separators(&rtxn)?; | ||||
|         let separators: Option<Vec<_>> = | ||||
|             separators.as_ref().map(|x| x.iter().map(String::as_str).collect()); | ||||
|  | ||||
|         // each query gets its individual cutoff | ||||
|         let cutoff = index.search_cutoff(&rtxn)?; | ||||
|  | ||||
|         let mut degraded = false; | ||||
|         let mut used_negative_operator = false; | ||||
|         let mut candidates = RoaringBitmap::new(); | ||||
|  | ||||
|         let facets_by_index = federation.facets_by_index.remove(&index_uid).flatten(); | ||||
|  | ||||
|         // TODO: recover the max size + facets_by_index as return value of this function so as not to ask it for all queries | ||||
|         if let Err(mut error) = | ||||
|             check_facet_order(&mut facet_order, &index_uid, &facets_by_index, &index, &rtxn) | ||||
|         { | ||||
|             error.message = format!( | ||||
|                 "Inside `.federation.facetsByIndex.{index_uid}`: {error}{}", | ||||
|                 if let Some(query_index) = first_query_index { | ||||
|                     format!("\n - Note: index `{index_uid}` used in `.queries[{query_index}]`") | ||||
|                 } else { | ||||
|                     Default::default() | ||||
|                 } | ||||
|             ); | ||||
|             return Err(error); | ||||
|         } | ||||
|  | ||||
|         // 2.1. Compute all candidates for each query in the index | ||||
|         let mut results_by_query = Vec::with_capacity(queries.len()); | ||||
|  | ||||
|         for QueryByIndex { query, federation_options, query_index } in queries { | ||||
|             // use an immediately invoked lambda to capture the result without returning from the function | ||||
|  | ||||
|             let res: Result<(), ResponseError> = (|| { | ||||
|                 let search_kind = | ||||
|                     search_kind(&query, index_scheduler, index_uid.to_string(), &index)?; | ||||
|  | ||||
|                 let canonicalization_kind = match (&search_kind, &query.q) { | ||||
|                     (SearchKind::SemanticOnly { .. }, _) => { | ||||
|                         ranking_rules::CanonicalizationKind::Vector | ||||
|                     } | ||||
|                     (_, Some(q)) if !q.is_empty() => ranking_rules::CanonicalizationKind::Keyword, | ||||
|                     _ => ranking_rules::CanonicalizationKind::Placeholder, | ||||
|                 }; | ||||
|  | ||||
|                 let sort = if let Some(sort) = &query.sort { | ||||
|                     let sorts: Vec<_> = | ||||
|                         match sort.iter().map(|s| milli::AscDesc::from_str(s)).collect() { | ||||
|                             Ok(sorts) => sorts, | ||||
|                             Err(asc_desc_error) => { | ||||
|                                 return Err(milli::Error::from(milli::SortError::from( | ||||
|                                     asc_desc_error, | ||||
|                                 )) | ||||
|                                 .into()) | ||||
|                             } | ||||
|                         }; | ||||
|                     Some(sorts) | ||||
|                 } else { | ||||
|                     None | ||||
|                 }; | ||||
|  | ||||
|                 let ranking_rules = ranking_rules::RankingRules::new( | ||||
|                     criteria.clone(), | ||||
|                     sort, | ||||
|                     query.matching_strategy.into(), | ||||
|                     canonicalization_kind, | ||||
|                 ); | ||||
|  | ||||
|                 if let Some((previous_ranking_rules, previous_query_index, previous_index_uid)) = | ||||
|                     previous_query_data.take() | ||||
|                 { | ||||
|                     if let Err(error) = ranking_rules.is_compatible_with(&previous_ranking_rules) { | ||||
|                         return Err(error.to_response_error( | ||||
|                             &ranking_rules, | ||||
|                             &previous_ranking_rules, | ||||
|                             query_index, | ||||
|                             previous_query_index, | ||||
|                             &index_uid, | ||||
|                             &previous_index_uid, | ||||
|                         )); | ||||
|                     } | ||||
|                     previous_query_data = if previous_ranking_rules.constraint_count() | ||||
|                         > ranking_rules.constraint_count() | ||||
|                     { | ||||
|                         Some((previous_ranking_rules, previous_query_index, previous_index_uid)) | ||||
|                     } else { | ||||
|                         Some((ranking_rules, query_index, index_uid.clone())) | ||||
|                     }; | ||||
|                 } else { | ||||
|                     previous_query_data = Some((ranking_rules, query_index, index_uid.clone())); | ||||
|                 } | ||||
|  | ||||
|                 match search_kind { | ||||
|                     SearchKind::KeywordOnly => {} | ||||
|                     _ => semantic_hit_count = Some(0), | ||||
|                 } | ||||
|  | ||||
|                 let retrieve_vectors = RetrieveVectors::new(query.retrieve_vectors); | ||||
|  | ||||
|                 let time_budget = match cutoff { | ||||
|                     Some(cutoff) => TimeBudget::new(Duration::from_millis(cutoff)), | ||||
|                     None => TimeBudget::default(), | ||||
|                 }; | ||||
|  | ||||
|                 let (mut search, _is_finite_pagination, _max_total_hits, _offset) = | ||||
|                     prepare_search(&index, &rtxn, &query, &search_kind, time_budget, features)?; | ||||
|  | ||||
|                 search.scoring_strategy(milli::score_details::ScoringStrategy::Detailed); | ||||
|                 search.offset(0); | ||||
|                 search.limit(required_hit_count); | ||||
|  | ||||
|                 let (result, _semantic_hit_count) = | ||||
|                     super::search_from_kind(index_uid.to_string(), search_kind, search)?; | ||||
|                 let format = AttributesFormat { | ||||
|                     attributes_to_retrieve: query.attributes_to_retrieve, | ||||
|                     retrieve_vectors, | ||||
|                     attributes_to_highlight: query.attributes_to_highlight, | ||||
|                     attributes_to_crop: query.attributes_to_crop, | ||||
|                     crop_length: query.crop_length, | ||||
|                     crop_marker: query.crop_marker, | ||||
|                     highlight_pre_tag: query.highlight_pre_tag, | ||||
|                     highlight_post_tag: query.highlight_post_tag, | ||||
|                     show_matches_position: query.show_matches_position, | ||||
|                     sort: query.sort, | ||||
|                     show_ranking_score: query.show_ranking_score, | ||||
|                     show_ranking_score_details: query.show_ranking_score_details, | ||||
|                     locales: query.locales.map(|l| l.iter().copied().map(Into::into).collect()), | ||||
|                 }; | ||||
|  | ||||
|                 let milli::SearchResult { | ||||
|                     matching_words, | ||||
|                     candidates: query_candidates, | ||||
|                     documents_ids, | ||||
|                     document_scores, | ||||
|                     degraded: query_degraded, | ||||
|                     used_negative_operator: query_used_negative_operator, | ||||
|                 } = result; | ||||
|  | ||||
|                 candidates |= query_candidates; | ||||
|                 degraded |= query_degraded; | ||||
|                 used_negative_operator |= query_used_negative_operator; | ||||
|  | ||||
|                 let tokenizer = HitMaker::tokenizer(dictionary.as_deref(), separators.as_deref()); | ||||
|  | ||||
|                 let formatter_builder = HitMaker::formatter_builder(matching_words, tokenizer); | ||||
|  | ||||
|                 let hit_maker = | ||||
|                     HitMaker::new(&index, &rtxn, format, formatter_builder).map_err(|e| { | ||||
|                         MeilisearchHttpError::from_milli(e, Some(index_uid.to_string())) | ||||
|                     })?; | ||||
|  | ||||
|                 results_by_query.push(SearchResultByQuery { | ||||
|                     federation_options, | ||||
|                     hit_maker, | ||||
|                     query_index, | ||||
|                     documents_ids, | ||||
|                     document_scores, | ||||
|                 }); | ||||
|                 Ok(()) | ||||
|             })(); | ||||
|  | ||||
|             if let Err(mut error) = res { | ||||
|                 error.message = format!("Inside `.queries[{query_index}]`: {}", error.message); | ||||
|                 return Err(error); | ||||
|             } | ||||
|         } | ||||
|         // 2.2. merge inside index | ||||
|         let mut documents_seen = RoaringBitmap::new(); | ||||
|         let merged_result: Result<Vec<_>, ResponseError> = | ||||
|             merge_index_local_results(results_by_query) | ||||
|                 // skip documents we've already seen & mark that we saw the current document | ||||
|                 .filter(|SearchResultByQueryIterItem { docid, .. }| documents_seen.insert(*docid)) | ||||
|                 .take(required_hit_count) | ||||
|                 // 2.3 make hits | ||||
|                 .map( | ||||
|                     |SearchResultByQueryIterItem { | ||||
|                          docid, | ||||
|                          score, | ||||
|                          federation_options, | ||||
|                          hit_maker, | ||||
|                          query_index, | ||||
|                      }| { | ||||
|                         let mut hit = hit_maker.make_hit(docid, &score)?; | ||||
|                         let weighted_score = | ||||
|                             ScoreDetails::global_score(score.iter()) * (*federation_options.weight); | ||||
|  | ||||
|                         let _federation = serde_json::json!( | ||||
|                             { | ||||
|                                 "indexUid": index_uid, | ||||
|                                 "queriesPosition": query_index, | ||||
|                                 "weightedRankingScore": weighted_score, | ||||
|                             } | ||||
|                         ); | ||||
|                         hit.document.insert("_federation".to_string(), _federation); | ||||
|                         Ok(SearchHitByIndex { hit, score, federation_options, query_index }) | ||||
|                     }, | ||||
|                 ) | ||||
|                 .collect(); | ||||
|  | ||||
|         let merged_result = merged_result?; | ||||
|  | ||||
|         let estimated_total_hits = candidates.len() as usize; | ||||
|  | ||||
|         let facets = facets_by_index | ||||
|             .map(|facets_by_index| { | ||||
|                 compute_facet_distribution_stats( | ||||
|                     &facets_by_index, | ||||
|                     &index, | ||||
|                     &rtxn, | ||||
|                     candidates, | ||||
|                     super::Route::MultiSearch, | ||||
|                 ) | ||||
|             }) | ||||
|             .transpose() | ||||
|             .map_err(|mut error| { | ||||
|                 error.message = format!( | ||||
|                     "Inside `.federation.facetsByIndex.{index_uid}`: {}{}", | ||||
|                     error.message, | ||||
|                     if let Some(query_index) = first_query_index { | ||||
|                         format!("\n - Note: index `{index_uid}` used in `.queries[{query_index}]`") | ||||
|                     } else { | ||||
|                         Default::default() | ||||
|                     } | ||||
|                 ); | ||||
|                 error | ||||
|             })?; | ||||
|  | ||||
|         results_by_index.push(SearchResultByIndex { | ||||
|             index: index_uid, | ||||
|             hits: merged_result, | ||||
|             estimated_total_hits, | ||||
|             degraded, | ||||
|             used_negative_operator, | ||||
|             facets, | ||||
|         }); | ||||
|     } | ||||
|  | ||||
|     // bonus step, make sure to return an error if an index wants a non-faceted field, even if no query actually uses that index. | ||||
|     for (index_uid, facets) in federation.facets_by_index { | ||||
|         let index = match index_scheduler.index(&index_uid) { | ||||
|             Ok(index) => index, | ||||
|             Err(err) => { | ||||
|                 let mut err = ResponseError::from(err); | ||||
|                 // Patch the HTTP status code to 400 as it defaults to 404 for `index_not_found`, but | ||||
|                 // here the resource not found is not part of the URL. | ||||
|                 err.code = StatusCode::BAD_REQUEST; | ||||
|                 err.message = format!( | ||||
|                     "Inside `.federation.facetsByIndex.{index_uid}`: {}\n - Note: index `{index_uid}` is not used in queries", | ||||
|                     err.message | ||||
|                 ); | ||||
|                 return Err(err); | ||||
|             } | ||||
|         }; | ||||
|  | ||||
|         // Important: this is the only transaction we'll use for this index during this federated search | ||||
|         let rtxn = index.read_txn()?; | ||||
|  | ||||
|         if let Err(mut error) = | ||||
|             check_facet_order(&mut facet_order, &index_uid, &facets, &index, &rtxn) | ||||
|         { | ||||
|             error.message = format!( | ||||
|                 "Inside `.federation.facetsByIndex.{index_uid}`: {error}\n - Note: index `{index_uid}` is not used in queries", | ||||
|             ); | ||||
|             return Err(error); | ||||
|         } | ||||
|  | ||||
|         if let Some(facets) = facets { | ||||
|             if let Err(mut error) = compute_facet_distribution_stats( | ||||
|                 &facets, | ||||
|                 &index, | ||||
|                 &rtxn, | ||||
|                 Default::default(), | ||||
|                 super::Route::MultiSearch, | ||||
|             ) { | ||||
|                 error.message = | ||||
|                     format!("Inside `.federation.facetsByIndex.{index_uid}`: {}\n - Note: index `{index_uid}` is not used in queries", error.message); | ||||
|                 return Err(error); | ||||
|             } | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     // 3. merge hits and metadata across indexes | ||||
|     // 3.1 merge metadata | ||||
|     let (estimated_total_hits, degraded, used_negative_operator, facets) = { | ||||
|         let mut estimated_total_hits = 0; | ||||
|         let mut degraded = false; | ||||
|         let mut used_negative_operator = false; | ||||
|  | ||||
|         let mut facets: FederatedFacets = FederatedFacets::default(); | ||||
|  | ||||
|         for SearchResultByIndex { | ||||
|             index, | ||||
|             hits: _, | ||||
|             estimated_total_hits: estimated_total_hits_by_index, | ||||
|             facets: facets_by_index, | ||||
|             degraded: degraded_by_index, | ||||
|             used_negative_operator: used_negative_operator_by_index, | ||||
|         } in &mut results_by_index | ||||
|         { | ||||
|             estimated_total_hits += *estimated_total_hits_by_index; | ||||
|             degraded |= *degraded_by_index; | ||||
|             used_negative_operator |= *used_negative_operator_by_index; | ||||
|  | ||||
|             let facets_by_index = std::mem::take(facets_by_index); | ||||
|             let index = std::mem::take(index); | ||||
|  | ||||
|             facets.insert(index, facets_by_index); | ||||
|         } | ||||
|  | ||||
|         (estimated_total_hits, degraded, used_negative_operator, facets) | ||||
|     }; | ||||
|  | ||||
|     // 3.2 merge hits | ||||
|     let merged_hits: Vec<_> = merge_index_global_results(results_by_index) | ||||
|         .skip(federation.offset) | ||||
|         .take(federation.limit) | ||||
|         .inspect(|hit| { | ||||
|             if let Some(semantic_hit_count) = &mut semantic_hit_count { | ||||
|                 if hit.score.iter().any(|score| matches!(&score, ScoreDetails::Vector(_))) { | ||||
|                     *semantic_hit_count += 1; | ||||
|                 } | ||||
|             } | ||||
|         }) | ||||
|         .map(|hit| hit.hit) | ||||
|         .collect(); | ||||
|  | ||||
|     let (facet_distribution, facet_stats, facets_by_index) = | ||||
|         match federation.merge_facets.zip(facet_order) { | ||||
|             Some((merge_facets, facet_order)) => { | ||||
|                 let facets = facets.merge(merge_facets, facet_order); | ||||
|  | ||||
|                 let (facet_distribution, facet_stats) = facets | ||||
|                     .map(|ComputedFacets { distribution, stats }| (distribution, stats)) | ||||
|                     .unzip(); | ||||
|  | ||||
|                 (facet_distribution, facet_stats, FederatedFacets::default()) | ||||
|             } | ||||
|             None => (None, None, facets), | ||||
|         }; | ||||
|  | ||||
|     let search_result = FederatedSearchResult { | ||||
|         hits: merged_hits, | ||||
|         processing_time_ms: before_search.elapsed().as_millis(), | ||||
|         hits_info: HitsInfo::OffsetLimit { | ||||
|             limit: federation.limit, | ||||
|             offset: federation.offset, | ||||
|             estimated_total_hits, | ||||
|         }, | ||||
|         semantic_hit_count, | ||||
|         degraded, | ||||
|         used_negative_operator, | ||||
|         facet_distribution, | ||||
|         facet_stats, | ||||
|         facets_by_index, | ||||
|     }; | ||||
|  | ||||
|     Ok(search_result) | ||||
| } | ||||
|  | ||||
| fn check_facet_order( | ||||
|     facet_order: &mut Option<BTreeMap<String, (String, OrderBy)>>, | ||||
|     current_index: &str, | ||||
|     facets_by_index: &Option<Vec<String>>, | ||||
|     index: &milli::Index, | ||||
|     rtxn: &milli::heed::RoTxn<'_>, | ||||
| ) -> Result<(), ResponseError> { | ||||
|     if let (Some(facet_order), Some(facets_by_index)) = (facet_order, facets_by_index) { | ||||
|         let index_facet_order = index.sort_facet_values_by(rtxn)?; | ||||
|         for facet in facets_by_index { | ||||
|             let index_facet_order = index_facet_order.get(facet); | ||||
|             let (previous_index, previous_facet_order) = facet_order | ||||
|                 .entry(facet.to_owned()) | ||||
|                 .or_insert_with(|| (current_index.to_owned(), index_facet_order)); | ||||
|             if previous_facet_order != &index_facet_order { | ||||
|                 return Err(MeilisearchHttpError::InconsistentFacetOrder { | ||||
|                     facet: facet.clone(), | ||||
|                     previous_facet_order: *previous_facet_order, | ||||
|                     previous_uid: previous_index.clone(), | ||||
|                     current_uid: current_index.to_owned(), | ||||
|                     index_facet_order, | ||||
|                 } | ||||
|                 .into()); | ||||
|             } | ||||
|         } | ||||
|     }; | ||||
|     Ok(()) | ||||
| } | ||||
							
								
								
									
										10
									
								
								crates/meilisearch/src/search/federated/mod.rs
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										10
									
								
								crates/meilisearch/src/search/federated/mod.rs
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,10 @@ | ||||
| mod perform; | ||||
| mod proxy; | ||||
| mod types; | ||||
| mod weighted_scores; | ||||
|  | ||||
| pub use perform::perform_federated_search; | ||||
| pub use proxy::{PROXY_SEARCH_HEADER, PROXY_SEARCH_HEADER_VALUE}; | ||||
| pub use types::{ | ||||
|     FederatedSearch, FederatedSearchResult, Federation, FederationOptions, MergeFacets, | ||||
| }; | ||||
							
								
								
									
										1112
									
								
								crates/meilisearch/src/search/federated/perform.rs
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										1112
									
								
								crates/meilisearch/src/search/federated/perform.rs
									
									
									
									
									
										Normal file
									
								
							
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							
							
								
								
									
										267
									
								
								crates/meilisearch/src/search/federated/proxy.rs
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										267
									
								
								crates/meilisearch/src/search/federated/proxy.rs
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,267 @@ | ||||
| pub use error::ProxySearchError; | ||||
| use error::ReqwestErrorWithoutUrl; | ||||
| use meilisearch_types::features::Remote; | ||||
| use rand::Rng as _; | ||||
| use reqwest::{Client, Response, StatusCode}; | ||||
| use serde::de::DeserializeOwned; | ||||
| use serde_json::Value; | ||||
|  | ||||
| use super::types::{FederatedSearch, FederatedSearchResult, Federation}; | ||||
| use crate::search::SearchQueryWithIndex; | ||||
|  | ||||
| pub const PROXY_SEARCH_HEADER: &str = "Meili-Proxy-Search"; | ||||
| pub const PROXY_SEARCH_HEADER_VALUE: &str = "true"; | ||||
|  | ||||
| mod error { | ||||
|     use meilisearch_types::error::ResponseError; | ||||
|     use reqwest::StatusCode; | ||||
|  | ||||
|     #[derive(Debug, thiserror::Error)] | ||||
|     pub enum ProxySearchError { | ||||
|         #[error("{0}")] | ||||
|         CouldNotSendRequest(ReqwestErrorWithoutUrl), | ||||
|         #[error("could not authenticate against the remote host\n  - hint: check that the remote instance was registered with a valid API key having the `search` action")] | ||||
|         AuthenticationError, | ||||
|         #[error( | ||||
|             "could not parse response from the remote host as a federated search response{}\n  - hint: check that the remote instance is a Meilisearch instance running the same version", | ||||
|             response_from_remote(response) | ||||
|         )] | ||||
|         CouldNotParseResponse { response: Result<String, ReqwestErrorWithoutUrl> }, | ||||
|         #[error("remote host responded with code {}{}\n  - hint: check that the remote instance has the correct index configuration for that request\n  - hint: check that the `network` experimental feature is enabled on the remote instance", status_code.as_u16(), response_from_remote(response))] | ||||
|         BadRequest { status_code: StatusCode, response: Result<String, ReqwestErrorWithoutUrl> }, | ||||
|         #[error("remote host did not answer before the deadline")] | ||||
|         Timeout, | ||||
|         #[error("remote hit does not contain `{0}`\n  - hint: check that the remote instance is a Meilisearch instance running the same version")] | ||||
|         MissingPathInResponse(&'static str), | ||||
|         #[error("remote host responded with code {}{}", status_code.as_u16(), response_from_remote(response))] | ||||
|         RemoteError { status_code: StatusCode, response: Result<String, ReqwestErrorWithoutUrl> }, | ||||
|         #[error("remote hit contains an unexpected value at path `{path}`: expected {expected_type}, received `{received_value}`\n  - hint: check that the remote instance is a Meilisearch instance running the same version")] | ||||
|         UnexpectedValueInPath { | ||||
|             path: &'static str, | ||||
|             expected_type: &'static str, | ||||
|             received_value: String, | ||||
|         }, | ||||
|         #[error("could not parse weighted score values in the remote hit: {0}")] | ||||
|         CouldNotParseWeightedScoreValues(serde_json::Error), | ||||
|     } | ||||
|  | ||||
|     impl ProxySearchError { | ||||
|         pub fn as_response_error(&self) -> ResponseError { | ||||
|             use meilisearch_types::error::Code; | ||||
|             let message = self.to_string(); | ||||
|             let code = match self { | ||||
|                 ProxySearchError::CouldNotSendRequest(_) => Code::RemoteCouldNotSendRequest, | ||||
|                 ProxySearchError::AuthenticationError => Code::RemoteInvalidApiKey, | ||||
|                 ProxySearchError::BadRequest { .. } => Code::RemoteBadRequest, | ||||
|                 ProxySearchError::Timeout => Code::RemoteTimeout, | ||||
|                 ProxySearchError::RemoteError { .. } => Code::RemoteRemoteError, | ||||
|                 ProxySearchError::CouldNotParseResponse { .. } | ||||
|                 | ProxySearchError::MissingPathInResponse(_) | ||||
|                 | ProxySearchError::UnexpectedValueInPath { .. } | ||||
|                 | ProxySearchError::CouldNotParseWeightedScoreValues(_) => Code::RemoteBadResponse, | ||||
|             }; | ||||
|             ResponseError::from_msg(message, code) | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     #[derive(Debug, thiserror::Error)] | ||||
|     #[error(transparent)] | ||||
|     pub struct ReqwestErrorWithoutUrl(reqwest::Error); | ||||
|     impl ReqwestErrorWithoutUrl { | ||||
|         pub fn new(inner: reqwest::Error) -> Self { | ||||
|             Self(inner.without_url()) | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     fn response_from_remote(response: &Result<String, ReqwestErrorWithoutUrl>) -> String { | ||||
|         match response { | ||||
|             Ok(response) => { | ||||
|                 format!(":\n  - response from remote: {}", response) | ||||
|             } | ||||
|             Err(error) => { | ||||
|                 format!(":\n  - additionally, could not retrieve response from remote: {error}") | ||||
|             } | ||||
|         } | ||||
|     } | ||||
| } | ||||
|  | ||||
| #[derive(Clone)] | ||||
| pub struct ProxySearchParams { | ||||
|     pub deadline: Option<std::time::Instant>, | ||||
|     pub try_count: u32, | ||||
|     pub client: reqwest::Client, | ||||
| } | ||||
|  | ||||
| /// Performs a federated search on a remote host and returns the results | ||||
| pub async fn proxy_search( | ||||
|     node: &Remote, | ||||
|     queries: Vec<SearchQueryWithIndex>, | ||||
|     federation: Federation, | ||||
|     params: &ProxySearchParams, | ||||
| ) -> Result<FederatedSearchResult, ProxySearchError> { | ||||
|     let url = format!("{}/multi-search", node.url); | ||||
|  | ||||
|     let federated = FederatedSearch { queries, federation: Some(federation) }; | ||||
|  | ||||
|     let search_api_key = node.search_api_key.as_deref(); | ||||
|  | ||||
|     let max_deadline = std::time::Instant::now() + std::time::Duration::from_secs(5); | ||||
|  | ||||
|     let deadline = if let Some(deadline) = params.deadline { | ||||
|         std::time::Instant::min(deadline, max_deadline) | ||||
|     } else { | ||||
|         max_deadline | ||||
|     }; | ||||
|  | ||||
|     for i in 0..params.try_count { | ||||
|         match try_proxy_search(&url, search_api_key, &federated, ¶ms.client, deadline).await { | ||||
|             Ok(response) => return Ok(response), | ||||
|             Err(retry) => { | ||||
|                 let duration = retry.into_duration(i)?; | ||||
|                 tokio::time::sleep(duration).await; | ||||
|             } | ||||
|         } | ||||
|     } | ||||
|     try_proxy_search(&url, search_api_key, &federated, ¶ms.client, deadline) | ||||
|         .await | ||||
|         .map_err(Retry::into_error) | ||||
| } | ||||
|  | ||||
| async fn try_proxy_search( | ||||
|     url: &str, | ||||
|     search_api_key: Option<&str>, | ||||
|     federated: &FederatedSearch, | ||||
|     client: &Client, | ||||
|     deadline: std::time::Instant, | ||||
| ) -> Result<FederatedSearchResult, Retry> { | ||||
|     let timeout = deadline.saturating_duration_since(std::time::Instant::now()); | ||||
|  | ||||
|     let request = client.post(url).json(&federated).timeout(timeout); | ||||
|     let request = if let Some(search_api_key) = search_api_key { | ||||
|         request.bearer_auth(search_api_key) | ||||
|     } else { | ||||
|         request | ||||
|     }; | ||||
|     let request = request.header(PROXY_SEARCH_HEADER, PROXY_SEARCH_HEADER_VALUE); | ||||
|  | ||||
|     let response = request.send().await; | ||||
|     let response = match response { | ||||
|         Ok(response) => response, | ||||
|         Err(error) if error.is_timeout() => return Err(Retry::give_up(ProxySearchError::Timeout)), | ||||
|         Err(error) => { | ||||
|             return Err(Retry::retry_later(ProxySearchError::CouldNotSendRequest( | ||||
|                 ReqwestErrorWithoutUrl::new(error), | ||||
|             ))) | ||||
|         } | ||||
|     }; | ||||
|  | ||||
|     match response.status() { | ||||
|         status_code if status_code.is_success() => (), | ||||
|         StatusCode::UNAUTHORIZED | StatusCode::FORBIDDEN => { | ||||
|             return Err(Retry::give_up(ProxySearchError::AuthenticationError)) | ||||
|         } | ||||
|         status_code if status_code.is_client_error() => { | ||||
|             let response = parse_error(response).await; | ||||
|             return Err(Retry::give_up(ProxySearchError::BadRequest { status_code, response })); | ||||
|         } | ||||
|         status_code if status_code.is_server_error() => { | ||||
|             let response = parse_error(response).await; | ||||
|             return Err(Retry::retry_later(ProxySearchError::RemoteError { | ||||
|                 status_code, | ||||
|                 response, | ||||
|             })); | ||||
|         } | ||||
|         status_code => { | ||||
|             tracing::warn!( | ||||
|                 status_code = status_code.as_u16(), | ||||
|                 "remote replied with unexpected status code" | ||||
|             ); | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     let response = match parse_response(response).await { | ||||
|         Ok(response) => response, | ||||
|         Err(response) => { | ||||
|             return Err(Retry::retry_later(ProxySearchError::CouldNotParseResponse { response })) | ||||
|         } | ||||
|     }; | ||||
|  | ||||
|     Ok(response) | ||||
| } | ||||
|  | ||||
| /// Always parse the body of the response of a failed request as JSON. | ||||
| async fn parse_error(response: Response) -> Result<String, ReqwestErrorWithoutUrl> { | ||||
|     let bytes = match response.bytes().await { | ||||
|         Ok(bytes) => bytes, | ||||
|         Err(error) => return Err(ReqwestErrorWithoutUrl::new(error)), | ||||
|     }; | ||||
|  | ||||
|     Ok(parse_bytes_as_error(&bytes)) | ||||
| } | ||||
|  | ||||
| fn parse_bytes_as_error(bytes: &[u8]) -> String { | ||||
|     match serde_json::from_slice::<Value>(bytes) { | ||||
|         Ok(value) => value.to_string(), | ||||
|         Err(_) => String::from_utf8_lossy(bytes).into_owned(), | ||||
|     } | ||||
| } | ||||
|  | ||||
| async fn parse_response<T: DeserializeOwned>( | ||||
|     response: Response, | ||||
| ) -> Result<T, Result<String, ReqwestErrorWithoutUrl>> { | ||||
|     let bytes = match response.bytes().await { | ||||
|         Ok(bytes) => bytes, | ||||
|         Err(error) => return Err(Err(ReqwestErrorWithoutUrl::new(error))), | ||||
|     }; | ||||
|  | ||||
|     match serde_json::from_slice::<T>(&bytes) { | ||||
|         Ok(value) => Ok(value), | ||||
|         Err(_) => Err(Ok(parse_bytes_as_error(&bytes))), | ||||
|     } | ||||
| } | ||||
|  | ||||
| pub struct Retry { | ||||
|     error: ProxySearchError, | ||||
|     strategy: RetryStrategy, | ||||
| } | ||||
|  | ||||
| pub enum RetryStrategy { | ||||
|     GiveUp, | ||||
|     Retry, | ||||
| } | ||||
|  | ||||
| impl Retry { | ||||
|     pub fn give_up(error: ProxySearchError) -> Self { | ||||
|         Self { error, strategy: RetryStrategy::GiveUp } | ||||
|     } | ||||
|  | ||||
|     pub fn retry_later(error: ProxySearchError) -> Self { | ||||
|         Self { error, strategy: RetryStrategy::Retry } | ||||
|     } | ||||
|  | ||||
|     pub fn into_duration(self, attempt: u32) -> Result<std::time::Duration, ProxySearchError> { | ||||
|         match self.strategy { | ||||
|             RetryStrategy::GiveUp => Err(self.error), | ||||
|             RetryStrategy::Retry => { | ||||
|                 let retry_duration = std::time::Duration::from_nanos((10u64).pow(attempt)); | ||||
|                 let retry_duration = retry_duration.min(std::time::Duration::from_millis(100)); // don't wait more than 100ms | ||||
|  | ||||
|                 // randomly up to double the retry duration | ||||
|                 let retry_duration = retry_duration | ||||
|                     + rand::thread_rng().gen_range(std::time::Duration::ZERO..retry_duration); | ||||
|  | ||||
|                 tracing::warn!( | ||||
|                     "Attempt #{}, failed with {}, retrying after {}ms.", | ||||
|                     attempt, | ||||
|                     self.error, | ||||
|                     retry_duration.as_millis() | ||||
|                 ); | ||||
|                 Ok(retry_duration) | ||||
|             } | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     pub fn into_error(self) -> ProxySearchError { | ||||
|         self.error | ||||
|     } | ||||
| } | ||||
							
								
								
									
										322
									
								
								crates/meilisearch/src/search/federated/types.rs
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										322
									
								
								crates/meilisearch/src/search/federated/types.rs
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,322 @@ | ||||
| use std::collections::btree_map::Entry; | ||||
| use std::collections::BTreeMap; | ||||
| use std::fmt; | ||||
| use std::vec::Vec; | ||||
|  | ||||
| use indexmap::IndexMap; | ||||
| use meilisearch_types::deserr::DeserrJsonError; | ||||
| use meilisearch_types::error::deserr_codes::{ | ||||
|     InvalidMultiSearchFacetsByIndex, InvalidMultiSearchMaxValuesPerFacet, | ||||
|     InvalidMultiSearchMergeFacets, InvalidMultiSearchQueryPosition, InvalidMultiSearchRemote, | ||||
|     InvalidMultiSearchWeight, InvalidSearchLimit, InvalidSearchOffset, | ||||
| }; | ||||
| use meilisearch_types::error::ResponseError; | ||||
| use meilisearch_types::index_uid::IndexUid; | ||||
| use meilisearch_types::milli::order_by_map::OrderByMap; | ||||
| use meilisearch_types::milli::OrderBy; | ||||
| use serde::{Deserialize, Serialize}; | ||||
| use utoipa::ToSchema; | ||||
|  | ||||
| use super::super::{ComputedFacets, FacetStats, HitsInfo, SearchHit, SearchQueryWithIndex}; | ||||
|  | ||||
| pub const DEFAULT_FEDERATED_WEIGHT: f64 = 1.0; | ||||
|  | ||||
| // fields in the response | ||||
| pub const FEDERATION_HIT: &str = "_federation"; | ||||
| pub const INDEX_UID: &str = "indexUid"; | ||||
| pub const QUERIES_POSITION: &str = "queriesPosition"; | ||||
| pub const WEIGHTED_RANKING_SCORE: &str = "weightedRankingScore"; | ||||
| pub const WEIGHTED_SCORE_VALUES: &str = "weightedScoreValues"; | ||||
| pub const FEDERATION_REMOTE: &str = "remote"; | ||||
|  | ||||
| #[derive(Debug, Default, Clone, PartialEq, Serialize, deserr::Deserr, ToSchema)] | ||||
| #[deserr(error = DeserrJsonError, rename_all = camelCase, deny_unknown_fields)] | ||||
| #[serde(rename_all = "camelCase")] | ||||
|  | ||||
| pub struct FederationOptions { | ||||
|     #[deserr(default, error = DeserrJsonError<InvalidMultiSearchWeight>)] | ||||
|     #[schema(value_type = f64)] | ||||
|     pub weight: Weight, | ||||
|  | ||||
|     #[deserr(default, error = DeserrJsonError<InvalidMultiSearchRemote>)] | ||||
|     pub remote: Option<String>, | ||||
|  | ||||
|     #[deserr(default, error = DeserrJsonError<InvalidMultiSearchQueryPosition>)] | ||||
|     pub query_position: Option<usize>, | ||||
| } | ||||
|  | ||||
| #[derive(Debug, Clone, Copy, PartialEq, Serialize, deserr::Deserr)] | ||||
| #[deserr(try_from(f64) = TryFrom::try_from -> InvalidMultiSearchWeight)] | ||||
| pub struct Weight(f64); | ||||
|  | ||||
| impl Default for Weight { | ||||
|     fn default() -> Self { | ||||
|         Weight(DEFAULT_FEDERATED_WEIGHT) | ||||
|     } | ||||
| } | ||||
|  | ||||
| impl std::convert::TryFrom<f64> for Weight { | ||||
|     type Error = InvalidMultiSearchWeight; | ||||
|  | ||||
|     fn try_from(f: f64) -> Result<Self, Self::Error> { | ||||
|         if f < 0.0 { | ||||
|             Err(InvalidMultiSearchWeight) | ||||
|         } else { | ||||
|             Ok(Weight(f)) | ||||
|         } | ||||
|     } | ||||
| } | ||||
|  | ||||
| impl std::ops::Deref for Weight { | ||||
|     type Target = f64; | ||||
|  | ||||
|     fn deref(&self) -> &Self::Target { | ||||
|         &self.0 | ||||
|     } | ||||
| } | ||||
|  | ||||
| #[derive(Debug, Clone, deserr::Deserr, Serialize, ToSchema)] | ||||
| #[deserr(error = DeserrJsonError, rename_all = camelCase, deny_unknown_fields)] | ||||
| #[schema(rename_all = "camelCase")] | ||||
| #[serde(rename_all = "camelCase")] | ||||
| pub struct Federation { | ||||
|     #[deserr(default = super::super::DEFAULT_SEARCH_LIMIT(), error = DeserrJsonError<InvalidSearchLimit>)] | ||||
|     pub limit: usize, | ||||
|     #[deserr(default = super::super::DEFAULT_SEARCH_OFFSET(), error = DeserrJsonError<InvalidSearchOffset>)] | ||||
|     pub offset: usize, | ||||
|     #[deserr(default, error = DeserrJsonError<InvalidMultiSearchFacetsByIndex>)] | ||||
|     pub facets_by_index: BTreeMap<IndexUid, Option<Vec<String>>>, | ||||
|     #[deserr(default, error = DeserrJsonError<InvalidMultiSearchMergeFacets>)] | ||||
|     pub merge_facets: Option<MergeFacets>, | ||||
| } | ||||
|  | ||||
| #[derive(Copy, Clone, Debug, deserr::Deserr, Serialize, Default, ToSchema)] | ||||
| #[deserr(error = DeserrJsonError<InvalidMultiSearchMergeFacets>, rename_all = camelCase, deny_unknown_fields)] | ||||
| #[schema(rename_all = "camelCase")] | ||||
| #[serde(rename_all = "camelCase")] | ||||
| pub struct MergeFacets { | ||||
|     #[deserr(default, error = DeserrJsonError<InvalidMultiSearchMaxValuesPerFacet>)] | ||||
|     pub max_values_per_facet: Option<usize>, | ||||
| } | ||||
|  | ||||
| #[derive(Debug, deserr::Deserr, Serialize, ToSchema)] | ||||
| #[deserr(error = DeserrJsonError, rename_all = camelCase, deny_unknown_fields)] | ||||
| #[schema(rename_all = "camelCase")] | ||||
| #[serde(rename_all = "camelCase")] | ||||
| pub struct FederatedSearch { | ||||
|     pub queries: Vec<SearchQueryWithIndex>, | ||||
|     #[deserr(default)] | ||||
|     pub federation: Option<Federation>, | ||||
| } | ||||
|  | ||||
| #[derive(Serialize, Deserialize, Clone, ToSchema)] | ||||
| #[serde(rename_all = "camelCase")] | ||||
| #[schema(rename_all = "camelCase")] | ||||
| pub struct FederatedSearchResult { | ||||
|     pub hits: Vec<SearchHit>, | ||||
|     pub processing_time_ms: u128, | ||||
|     #[serde(flatten)] | ||||
|     pub hits_info: HitsInfo, | ||||
|  | ||||
|     #[serde(default, skip_serializing_if = "Option::is_none")] | ||||
|     pub semantic_hit_count: Option<u32>, | ||||
|  | ||||
|     #[serde(default, skip_serializing_if = "Option::is_none")] | ||||
|     #[schema(value_type = Option<BTreeMap<String, BTreeMap<String, u64>>>)] | ||||
|     pub facet_distribution: Option<BTreeMap<String, IndexMap<String, u64>>>, | ||||
|     #[serde(default, skip_serializing_if = "Option::is_none")] | ||||
|     pub facet_stats: Option<BTreeMap<String, FacetStats>>, | ||||
|     #[serde(default, skip_serializing_if = "FederatedFacets::is_empty")] | ||||
|     pub facets_by_index: FederatedFacets, | ||||
|  | ||||
|     #[serde(default, skip_serializing_if = "Option::is_none")] | ||||
|     pub remote_errors: Option<BTreeMap<String, ResponseError>>, | ||||
|  | ||||
|     // These fields are only used for analytics purposes | ||||
|     #[serde(skip)] | ||||
|     pub degraded: bool, | ||||
|     #[serde(skip)] | ||||
|     pub used_negative_operator: bool, | ||||
| } | ||||
|  | ||||
| impl fmt::Debug for FederatedSearchResult { | ||||
|     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { | ||||
|         let FederatedSearchResult { | ||||
|             hits, | ||||
|             processing_time_ms, | ||||
|             hits_info, | ||||
|             semantic_hit_count, | ||||
|             degraded, | ||||
|             used_negative_operator, | ||||
|             facet_distribution, | ||||
|             facet_stats, | ||||
|             facets_by_index, | ||||
|             remote_errors, | ||||
|         } = self; | ||||
|  | ||||
|         let mut debug = f.debug_struct("SearchResult"); | ||||
|         // The most important thing when looking at a search result is the time it took to process | ||||
|         debug.field("processing_time_ms", &processing_time_ms); | ||||
|         debug.field("hits", &format!("[{} hits returned]", hits.len())); | ||||
|         debug.field("hits_info", &hits_info); | ||||
|         if *used_negative_operator { | ||||
|             debug.field("used_negative_operator", used_negative_operator); | ||||
|         } | ||||
|         if *degraded { | ||||
|             debug.field("degraded", degraded); | ||||
|         } | ||||
|         if let Some(facet_distribution) = facet_distribution { | ||||
|             debug.field("facet_distribution", &facet_distribution); | ||||
|         } | ||||
|         if let Some(facet_stats) = facet_stats { | ||||
|             debug.field("facet_stats", &facet_stats); | ||||
|         } | ||||
|         if let Some(semantic_hit_count) = semantic_hit_count { | ||||
|             debug.field("semantic_hit_count", &semantic_hit_count); | ||||
|         } | ||||
|         if !facets_by_index.is_empty() { | ||||
|             debug.field("facets_by_index", &facets_by_index); | ||||
|         } | ||||
|         if let Some(remote_errors) = remote_errors { | ||||
|             debug.field("remote_errors", &remote_errors); | ||||
|         } | ||||
|  | ||||
|         debug.finish() | ||||
|     } | ||||
| } | ||||
|  | ||||
| #[derive(Debug, Clone, Default, Serialize, Deserialize, ToSchema)] | ||||
| pub struct FederatedFacets(pub BTreeMap<String, ComputedFacets>); | ||||
|  | ||||
| impl FederatedFacets { | ||||
|     pub fn insert(&mut self, index: String, facets: Option<ComputedFacets>) { | ||||
|         if let Some(facets) = facets { | ||||
|             self.0.insert(index, facets); | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     pub fn is_empty(&self) -> bool { | ||||
|         self.0.is_empty() | ||||
|     } | ||||
|  | ||||
|     pub fn merge( | ||||
|         self, | ||||
|         MergeFacets { max_values_per_facet }: MergeFacets, | ||||
|         facet_order: BTreeMap<String, (String, OrderBy)>, | ||||
|     ) -> Option<ComputedFacets> { | ||||
|         if self.is_empty() { | ||||
|             return None; | ||||
|         } | ||||
|  | ||||
|         let mut distribution: BTreeMap<String, _> = Default::default(); | ||||
|         let mut stats: BTreeMap<String, FacetStats> = Default::default(); | ||||
|  | ||||
|         for facets_by_index in self.0.into_values() { | ||||
|             for (facet, index_distribution) in facets_by_index.distribution { | ||||
|                 match distribution.entry(facet) { | ||||
|                     Entry::Vacant(entry) => { | ||||
|                         entry.insert(index_distribution); | ||||
|                     } | ||||
|                     Entry::Occupied(mut entry) => { | ||||
|                         let distribution = entry.get_mut(); | ||||
|  | ||||
|                         for (value, index_count) in index_distribution { | ||||
|                             distribution | ||||
|                                 .entry(value) | ||||
|                                 .and_modify(|count| *count += index_count) | ||||
|                                 .or_insert(index_count); | ||||
|                         } | ||||
|                     } | ||||
|                 } | ||||
|             } | ||||
|  | ||||
|             for (facet, index_stats) in facets_by_index.stats { | ||||
|                 match stats.entry(facet) { | ||||
|                     Entry::Vacant(entry) => { | ||||
|                         entry.insert(index_stats); | ||||
|                     } | ||||
|                     Entry::Occupied(mut entry) => { | ||||
|                         let stats = entry.get_mut(); | ||||
|  | ||||
|                         stats.min = f64::min(stats.min, index_stats.min); | ||||
|                         stats.max = f64::max(stats.max, index_stats.max); | ||||
|                     } | ||||
|                 } | ||||
|             } | ||||
|         } | ||||
|  | ||||
|         // fixup order | ||||
|         for (facet, values) in &mut distribution { | ||||
|             let order_by = facet_order.get(facet).map(|(_, order)| *order).unwrap_or_default(); | ||||
|  | ||||
|             match order_by { | ||||
|                 OrderBy::Lexicographic => { | ||||
|                     values.sort_unstable_by(|left, _, right, _| left.cmp(right)) | ||||
|                 } | ||||
|                 OrderBy::Count => { | ||||
|                     values.sort_unstable_by(|_, left, _, right| { | ||||
|                         left.cmp(right) | ||||
|                             // biggest first | ||||
|                             .reverse() | ||||
|                     }) | ||||
|                 } | ||||
|             } | ||||
|  | ||||
|             if let Some(max_values_per_facet) = max_values_per_facet { | ||||
|                 values.truncate(max_values_per_facet) | ||||
|             }; | ||||
|         } | ||||
|  | ||||
|         Some(ComputedFacets { distribution, stats }) | ||||
|     } | ||||
|  | ||||
|     pub(crate) fn append(&mut self, FederatedFacets(remote_facets_by_index): FederatedFacets) { | ||||
|         for (index, remote_facets) in remote_facets_by_index { | ||||
|             let merged_facets = self.0.entry(index).or_default(); | ||||
|  | ||||
|             for (remote_facet, remote_stats) in remote_facets.stats { | ||||
|                 match merged_facets.stats.entry(remote_facet) { | ||||
|                     Entry::Vacant(vacant_entry) => { | ||||
|                         vacant_entry.insert(remote_stats); | ||||
|                     } | ||||
|                     Entry::Occupied(mut occupied_entry) => { | ||||
|                         let stats = occupied_entry.get_mut(); | ||||
|                         stats.min = f64::min(stats.min, remote_stats.min); | ||||
|                         stats.max = f64::max(stats.max, remote_stats.max); | ||||
|                     } | ||||
|                 } | ||||
|             } | ||||
|  | ||||
|             for (remote_facet, remote_values) in remote_facets.distribution { | ||||
|                 let merged_facet = merged_facets.distribution.entry(remote_facet).or_default(); | ||||
|                 for (remote_value, remote_count) in remote_values { | ||||
|                     let count = merged_facet.entry(remote_value).or_default(); | ||||
|                     *count += remote_count; | ||||
|                 } | ||||
|             } | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     pub fn sort_and_truncate(&mut self, facet_order: BTreeMap<String, (OrderByMap, usize)>) { | ||||
|         for (index, facets) in &mut self.0 { | ||||
|             let Some((order_by, max_values_per_facet)) = facet_order.get(index) else { | ||||
|                 continue; | ||||
|             }; | ||||
|             for (facet, values) in &mut facets.distribution { | ||||
|                 match order_by.get(facet) { | ||||
|                     OrderBy::Lexicographic => { | ||||
|                         values.sort_unstable_by(|left, _, right, _| left.cmp(right)) | ||||
|                     } | ||||
|                     OrderBy::Count => { | ||||
|                         values.sort_unstable_by(|_, left, _, right| { | ||||
|                             left.cmp(right) | ||||
|                                 // biggest first | ||||
|                                 .reverse() | ||||
|                         }) | ||||
|                     } | ||||
|                 } | ||||
|                 values.truncate(*max_values_per_facet); | ||||
|             } | ||||
|         } | ||||
|     } | ||||
| } | ||||
							
								
								
									
										88
									
								
								crates/meilisearch/src/search/federated/weighted_scores.rs
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										88
									
								
								crates/meilisearch/src/search/federated/weighted_scores.rs
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,88 @@ | ||||
| use std::cmp::Ordering; | ||||
|  | ||||
| use meilisearch_types::milli::score_details::{self, WeightedScoreValue}; | ||||
|  | ||||
| pub fn compare( | ||||
|     mut left_it: impl Iterator<Item = WeightedScoreValue>, | ||||
|     left_weighted_global_score: f64, | ||||
|     mut right_it: impl Iterator<Item = WeightedScoreValue>, | ||||
|     right_weighted_global_score: f64, | ||||
| ) -> Ordering { | ||||
|     loop { | ||||
|         let left = left_it.next(); | ||||
|         let right = right_it.next(); | ||||
|  | ||||
|         match (left, right) { | ||||
|             (None, None) => return Ordering::Equal, | ||||
|             (None, Some(_)) => return Ordering::Less, | ||||
|             (Some(_), None) => return Ordering::Greater, | ||||
|             ( | ||||
|                 Some( | ||||
|                     WeightedScoreValue::WeightedScore(left) | WeightedScoreValue::VectorSort(left), | ||||
|                 ), | ||||
|                 Some( | ||||
|                     WeightedScoreValue::WeightedScore(right) | ||||
|                     | WeightedScoreValue::VectorSort(right), | ||||
|                 ), | ||||
|             ) => { | ||||
|                 if (left - right).abs() <= f64::EPSILON { | ||||
|                     continue; | ||||
|                 } | ||||
|                 return left.partial_cmp(&right).unwrap(); | ||||
|             } | ||||
|             ( | ||||
|                 Some(WeightedScoreValue::Sort { asc: left_asc, value: left }), | ||||
|                 Some(WeightedScoreValue::Sort { asc: right_asc, value: right }), | ||||
|             ) => { | ||||
|                 if left_asc != right_asc { | ||||
|                     return left_weighted_global_score | ||||
|                         .partial_cmp(&right_weighted_global_score) | ||||
|                         .unwrap(); | ||||
|                 } | ||||
|                 match score_details::compare_sort_values(left_asc, &left, &right) { | ||||
|                     Ordering::Equal => continue, | ||||
|                     order => return order, | ||||
|                 } | ||||
|             } | ||||
|             ( | ||||
|                 Some(WeightedScoreValue::GeoSort { asc: left_asc, distance: left }), | ||||
|                 Some(WeightedScoreValue::GeoSort { asc: right_asc, distance: right }), | ||||
|             ) => { | ||||
|                 if left_asc != right_asc { | ||||
|                     continue; | ||||
|                 } | ||||
|                 match (left, right) { | ||||
|                     (None, None) => continue, | ||||
|                     (None, Some(_)) => return Ordering::Less, | ||||
|                     (Some(_), None) => return Ordering::Greater, | ||||
|                     (Some(left), Some(right)) => { | ||||
|                         if (left - right).abs() <= f64::EPSILON { | ||||
|                             continue; | ||||
|                         } | ||||
|                         return left.partial_cmp(&right).unwrap(); | ||||
|                     } | ||||
|                 } | ||||
|             } | ||||
|             // not comparable details, use global | ||||
|             (Some(WeightedScoreValue::WeightedScore(_)), Some(_)) | ||||
|             | (Some(_), Some(WeightedScoreValue::WeightedScore(_))) | ||||
|             | (Some(WeightedScoreValue::VectorSort(_)), Some(_)) | ||||
|             | (Some(_), Some(WeightedScoreValue::VectorSort(_))) | ||||
|             | (Some(WeightedScoreValue::GeoSort { .. }), Some(WeightedScoreValue::Sort { .. })) | ||||
|             | (Some(WeightedScoreValue::Sort { .. }), Some(WeightedScoreValue::GeoSort { .. })) => { | ||||
|                 let left_count = left_it.count(); | ||||
|                 let right_count = right_it.count(); | ||||
|                 // compare how many remaining groups of rules each side has. | ||||
|                 // the group with the most remaining groups wins. | ||||
|                 return left_count | ||||
|                     .cmp(&right_count) | ||||
|                     // breaks ties with the global ranking score | ||||
|                     .then_with(|| { | ||||
|                         left_weighted_global_score | ||||
|                             .partial_cmp(&right_weighted_global_score) | ||||
|                             .unwrap() | ||||
|                     }); | ||||
|             } | ||||
|         } | ||||
|     } | ||||
| } | ||||
| @@ -30,7 +30,7 @@ use milli::{ | ||||
|     MatchBounds, MatcherBuilder, SortError, TermsMatchingStrategy, DEFAULT_VALUES_PER_FACET, | ||||
| }; | ||||
| use regex::Regex; | ||||
| use serde::Serialize; | ||||
| use serde::{Deserialize, Serialize}; | ||||
| use serde_json::{json, Value}; | ||||
| #[cfg(test)] | ||||
| mod mod_test; | ||||
| @@ -41,7 +41,7 @@ use crate::error::MeilisearchHttpError; | ||||
| mod federated; | ||||
| pub use federated::{ | ||||
|     perform_federated_search, FederatedSearch, FederatedSearchResult, Federation, | ||||
|     FederationOptions, MergeFacets, | ||||
|     FederationOptions, MergeFacets, PROXY_SEARCH_HEADER, PROXY_SEARCH_HEADER_VALUE, | ||||
| }; | ||||
|  | ||||
| mod ranking_rules; | ||||
| @@ -119,7 +119,7 @@ pub struct SearchQuery { | ||||
|     pub locales: Option<Vec<Locale>>, | ||||
| } | ||||
|  | ||||
| #[derive(Debug, Clone, Copy, PartialEq, Deserr, ToSchema)] | ||||
| #[derive(Debug, Clone, Copy, PartialEq, Deserr, ToSchema, Serialize)] | ||||
| #[deserr(try_from(f64) = TryFrom::try_from -> InvalidSearchRankingScoreThreshold)] | ||||
| pub struct RankingScoreThreshold(f64); | ||||
| impl std::convert::TryFrom<f64> for RankingScoreThreshold { | ||||
| @@ -275,11 +275,13 @@ impl fmt::Debug for SearchQuery { | ||||
|     } | ||||
| } | ||||
|  | ||||
| #[derive(Debug, Clone, Default, PartialEq, Deserr, ToSchema)] | ||||
| #[derive(Debug, Clone, Default, PartialEq, Deserr, ToSchema, Serialize)] | ||||
| #[deserr(error = DeserrJsonError<InvalidSearchHybridQuery>, rename_all = camelCase, deny_unknown_fields)] | ||||
| #[serde(rename_all = "camelCase")] | ||||
| pub struct HybridQuery { | ||||
|     #[deserr(default, error = DeserrJsonError<InvalidSearchSemanticRatio>, default)] | ||||
|     #[schema(value_type = f32, default)] | ||||
|     #[serde(default)] | ||||
|     pub semantic_ratio: SemanticRatio, | ||||
|     #[deserr(error = DeserrJsonError<InvalidSearchEmbedder>)] | ||||
|     pub embedder: String, | ||||
| @@ -369,7 +371,7 @@ impl SearchKind { | ||||
|     } | ||||
| } | ||||
|  | ||||
| #[derive(Debug, Clone, Copy, PartialEq, Deserr)] | ||||
| #[derive(Debug, Clone, Copy, PartialEq, Deserr, Serialize)] | ||||
| #[deserr(try_from(f32) = TryFrom::try_from -> InvalidSearchSemanticRatio)] | ||||
| pub struct SemanticRatio(f32); | ||||
|  | ||||
| @@ -411,8 +413,9 @@ impl SearchQuery { | ||||
| // This struct contains the fields of `SearchQuery` inline. | ||||
| // This is because neither deserr nor serde support `flatten` when using `deny_unknown_fields. | ||||
| // The `From<SearchQueryWithIndex>` implementation ensures both structs remain up to date. | ||||
| #[derive(Debug, Clone, PartialEq, Deserr, ToSchema)] | ||||
| #[derive(Debug, Clone, Serialize, PartialEq, Deserr, ToSchema)] | ||||
| #[deserr(error = DeserrJsonError, rename_all = camelCase, deny_unknown_fields)] | ||||
| #[serde(rename_all = "camelCase")] | ||||
| #[schema(rename_all = "camelCase")] | ||||
| pub struct SearchQueryWithIndex { | ||||
|     #[deserr(error = DeserrJsonError<InvalidIndexUid>, missing_field_error = DeserrJsonError::missing_index_uid)] | ||||
| @@ -493,6 +496,72 @@ impl SearchQueryWithIndex { | ||||
|         self.facets.as_deref().filter(|v| !v.is_empty()) | ||||
|     } | ||||
|  | ||||
|     pub fn from_index_query_federation( | ||||
|         index_uid: IndexUid, | ||||
|         query: SearchQuery, | ||||
|         federation_options: Option<FederationOptions>, | ||||
|     ) -> Self { | ||||
|         let SearchQuery { | ||||
|             q, | ||||
|             vector, | ||||
|             hybrid, | ||||
|             offset, | ||||
|             limit, | ||||
|             page, | ||||
|             hits_per_page, | ||||
|             attributes_to_retrieve, | ||||
|             retrieve_vectors, | ||||
|             attributes_to_crop, | ||||
|             crop_length, | ||||
|             attributes_to_highlight, | ||||
|             show_matches_position, | ||||
|             show_ranking_score, | ||||
|             show_ranking_score_details, | ||||
|             filter, | ||||
|             sort, | ||||
|             distinct, | ||||
|             facets, | ||||
|             highlight_pre_tag, | ||||
|             highlight_post_tag, | ||||
|             crop_marker, | ||||
|             matching_strategy, | ||||
|             attributes_to_search_on, | ||||
|             ranking_score_threshold, | ||||
|             locales, | ||||
|         } = query; | ||||
|  | ||||
|         SearchQueryWithIndex { | ||||
|             index_uid, | ||||
|             q, | ||||
|             vector, | ||||
|             hybrid, | ||||
|             offset: if offset == DEFAULT_SEARCH_OFFSET() { None } else { Some(offset) }, | ||||
|             limit: if limit == DEFAULT_SEARCH_LIMIT() { None } else { Some(limit) }, | ||||
|             page, | ||||
|             hits_per_page, | ||||
|             attributes_to_retrieve, | ||||
|             retrieve_vectors, | ||||
|             attributes_to_crop, | ||||
|             crop_length, | ||||
|             attributes_to_highlight, | ||||
|             show_ranking_score, | ||||
|             show_ranking_score_details, | ||||
|             show_matches_position, | ||||
|             filter, | ||||
|             sort, | ||||
|             distinct, | ||||
|             facets, | ||||
|             highlight_pre_tag, | ||||
|             highlight_post_tag, | ||||
|             crop_marker, | ||||
|             matching_strategy, | ||||
|             attributes_to_search_on, | ||||
|             ranking_score_threshold, | ||||
|             locales, | ||||
|             federation_options, | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     pub fn into_index_query_federation(self) -> (IndexUid, SearchQuery, Option<FederationOptions>) { | ||||
|         let SearchQueryWithIndex { | ||||
|             index_uid, | ||||
| @@ -620,8 +689,9 @@ impl TryFrom<Value> for ExternalDocumentId { | ||||
|     } | ||||
| } | ||||
|  | ||||
| #[derive(Debug, Copy, Clone, PartialEq, Eq, Deserr, ToSchema)] | ||||
| #[derive(Debug, Copy, Clone, PartialEq, Eq, Deserr, ToSchema, Serialize)] | ||||
| #[deserr(rename_all = camelCase)] | ||||
| #[serde(rename_all = "camelCase")] | ||||
| pub enum MatchingStrategy { | ||||
|     /// Remove query words from last to first | ||||
|     Last, | ||||
| @@ -667,19 +737,19 @@ impl From<FacetValuesSort> for OrderBy { | ||||
|     } | ||||
| } | ||||
|  | ||||
| #[derive(Debug, Clone, Serialize, PartialEq, ToSchema)] | ||||
| #[derive(Debug, Clone, Serialize, Deserialize, PartialEq, ToSchema)] | ||||
| pub struct SearchHit { | ||||
|     #[serde(flatten)] | ||||
|     #[schema(additional_properties, inline, value_type = HashMap<String, Value>)] | ||||
|     pub document: Document, | ||||
|     #[serde(rename = "_formatted", skip_serializing_if = "Document::is_empty")] | ||||
|     #[serde(default, rename = "_formatted", skip_serializing_if = "Document::is_empty")] | ||||
|     #[schema(additional_properties, value_type = HashMap<String, Value>)] | ||||
|     pub formatted: Document, | ||||
|     #[serde(rename = "_matchesPosition", skip_serializing_if = "Option::is_none")] | ||||
|     #[serde(default, rename = "_matchesPosition", skip_serializing_if = "Option::is_none")] | ||||
|     pub matches_position: Option<MatchesPosition>, | ||||
|     #[serde(rename = "_rankingScore", skip_serializing_if = "Option::is_none")] | ||||
|     #[serde(default, rename = "_rankingScore", skip_serializing_if = "Option::is_none")] | ||||
|     pub ranking_score: Option<f64>, | ||||
|     #[serde(rename = "_rankingScoreDetails", skip_serializing_if = "Option::is_none")] | ||||
|     #[serde(default, rename = "_rankingScoreDetails", skip_serializing_if = "Option::is_none")] | ||||
|     pub ranking_score_details: Option<serde_json::Map<String, serde_json::Value>>, | ||||
| } | ||||
|  | ||||
| @@ -767,7 +837,7 @@ pub struct SearchResultWithIndex { | ||||
|     pub result: SearchResult, | ||||
| } | ||||
|  | ||||
| #[derive(Serialize, Debug, Clone, PartialEq, Eq, ToSchema)] | ||||
| #[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq, ToSchema)] | ||||
| #[serde(untagged)] | ||||
| pub enum HitsInfo { | ||||
|     #[serde(rename_all = "camelCase")] | ||||
| @@ -778,7 +848,7 @@ pub enum HitsInfo { | ||||
|     OffsetLimit { limit: usize, offset: usize, estimated_total_hits: usize }, | ||||
| } | ||||
|  | ||||
| #[derive(Serialize, Debug, Clone, PartialEq, ToSchema)] | ||||
| #[derive(Serialize, Deserialize, Debug, Clone, PartialEq, ToSchema)] | ||||
| pub struct FacetStats { | ||||
|     pub min: f64, | ||||
|     pub max: f64, | ||||
| @@ -1061,7 +1131,7 @@ pub fn perform_search( | ||||
|     Ok(result) | ||||
| } | ||||
|  | ||||
| #[derive(Debug, Clone, Default, Serialize, ToSchema)] | ||||
| #[derive(Debug, Clone, Default, Serialize, Deserialize, ToSchema)] | ||||
| pub struct ComputedFacets { | ||||
|     #[schema(value_type = BTreeMap<String, BTreeMap<String, u64>>)] | ||||
|     pub distribution: BTreeMap<String, IndexMap<String, u64>>, | ||||
|   | ||||
										
											Binary file not shown.
										
									
								
							| @@ -421,7 +421,7 @@ async fn error_add_api_key_invalid_parameters_actions() { | ||||
|     meili_snap::snapshot!(code, @"400 Bad Request"); | ||||
|     meili_snap::snapshot!(meili_snap::json_string!(response, { ".createdAt" => "[ignored]", ".updatedAt" => "[ignored]" }), @r###" | ||||
|     { | ||||
|       "message": "Unknown value `doc.add` at `.actions[0]`: expected one of `*`, `search`, `documents.*`, `documents.add`, `documents.get`, `documents.delete`, `indexes.*`, `indexes.create`, `indexes.get`, `indexes.update`, `indexes.delete`, `indexes.swap`, `tasks.*`, `tasks.cancel`, `tasks.delete`, `tasks.get`, `settings.*`, `settings.get`, `settings.update`, `stats.*`, `stats.get`, `metrics.*`, `metrics.get`, `dumps.*`, `dumps.create`, `snapshots.*`, `snapshots.create`, `version`, `keys.create`, `keys.get`, `keys.update`, `keys.delete`, `experimental.get`, `experimental.update`", | ||||
|       "message": "Unknown value `doc.add` at `.actions[0]`: expected one of `*`, `search`, `documents.*`, `documents.add`, `documents.get`, `documents.delete`, `indexes.*`, `indexes.create`, `indexes.get`, `indexes.update`, `indexes.delete`, `indexes.swap`, `tasks.*`, `tasks.cancel`, `tasks.delete`, `tasks.get`, `settings.*`, `settings.get`, `settings.update`, `stats.*`, `stats.get`, `metrics.*`, `metrics.get`, `dumps.*`, `dumps.create`, `snapshots.*`, `snapshots.create`, `version`, `keys.create`, `keys.get`, `keys.update`, `keys.delete`, `experimental.get`, `experimental.update`, `network.get`, `network.update`", | ||||
|       "code": "invalid_api_key_actions", | ||||
|       "type": "invalid_request", | ||||
|       "link": "https://docs.meilisearch.com/errors#invalid_api_key_actions" | ||||
|   | ||||
| @@ -68,6 +68,8 @@ pub static AUTHORIZATIONS: Lazy<HashMap<(&'static str, &'static str), HashSet<&' | ||||
|             ("GET",     "/keys") =>                                            hashset!{"keys.get", "*"}, | ||||
|             ("GET",     "/experimental-features") =>                           hashset!{"experimental.get", "*"}, | ||||
|             ("PATCH",   "/experimental-features") =>                           hashset!{"experimental.update", "*"}, | ||||
|             ("GET",   "/network") =>                                           hashset!{"network.get", "*"}, | ||||
|             ("PATCH",   "/network") =>                                         hashset!{"network.update", "*"}, | ||||
|         }; | ||||
|  | ||||
|         authorizations | ||||
|   | ||||
| @@ -93,7 +93,7 @@ async fn create_api_key_bad_actions() { | ||||
|     snapshot!(code, @"400 Bad Request"); | ||||
|     snapshot!(json_string!(response), @r###" | ||||
|     { | ||||
|       "message": "Unknown value `doggo` at `.actions[0]`: expected one of `*`, `search`, `documents.*`, `documents.add`, `documents.get`, `documents.delete`, `indexes.*`, `indexes.create`, `indexes.get`, `indexes.update`, `indexes.delete`, `indexes.swap`, `tasks.*`, `tasks.cancel`, `tasks.delete`, `tasks.get`, `settings.*`, `settings.get`, `settings.update`, `stats.*`, `stats.get`, `metrics.*`, `metrics.get`, `dumps.*`, `dumps.create`, `snapshots.*`, `snapshots.create`, `version`, `keys.create`, `keys.get`, `keys.update`, `keys.delete`, `experimental.get`, `experimental.update`", | ||||
|       "message": "Unknown value `doggo` at `.actions[0]`: expected one of `*`, `search`, `documents.*`, `documents.add`, `documents.get`, `documents.delete`, `indexes.*`, `indexes.create`, `indexes.get`, `indexes.update`, `indexes.delete`, `indexes.swap`, `tasks.*`, `tasks.cancel`, `tasks.delete`, `tasks.get`, `settings.*`, `settings.get`, `settings.update`, `stats.*`, `stats.get`, `metrics.*`, `metrics.get`, `dumps.*`, `dumps.create`, `snapshots.*`, `snapshots.create`, `version`, `keys.create`, `keys.get`, `keys.update`, `keys.delete`, `experimental.get`, `experimental.update`, `network.get`, `network.update`", | ||||
|       "code": "invalid_api_key_actions", | ||||
|       "type": "invalid_request", | ||||
|       "link": "https://docs.meilisearch.com/errors#invalid_api_key_actions" | ||||
|   | ||||
| @@ -41,9 +41,8 @@ async fn list_batches() { | ||||
|     let index = server.index("test"); | ||||
|     let (task, _status_code) = index.create(None).await; | ||||
|     index.wait_task(task.uid()).await.succeeded(); | ||||
|     index | ||||
|         .add_documents(serde_json::from_str(include_str!("../assets/test_set.json")).unwrap(), None) | ||||
|         .await; | ||||
|     let (task, _status_code) = index.create(None).await; | ||||
|     index.wait_task(task.uid()).await.failed(); | ||||
|     let (response, code) = index.list_batches().await; | ||||
|     assert_eq!(code, 200); | ||||
|     assert_eq!( | ||||
| @@ -96,11 +95,12 @@ async fn list_batches_pagination_and_reverse() { | ||||
| async fn list_batches_with_star_filters() { | ||||
|     let server = Server::new().await; | ||||
|     let index = server.index("test"); | ||||
|     let (batch, _code) = index.create(None).await; | ||||
|     index.wait_task(batch.uid()).await.succeeded(); | ||||
|     index | ||||
|         .add_documents(serde_json::from_str(include_str!("../assets/test_set.json")).unwrap(), None) | ||||
|         .await; | ||||
|     let (task, _code) = index.create(None).await; | ||||
|     index.wait_task(task.uid()).await.succeeded(); | ||||
|     let index = server.index("test"); | ||||
|     let (task, _code) = index.create(None).await; | ||||
|     index.wait_task(task.uid()).await.failed(); | ||||
|  | ||||
|     let (response, code) = index.service.get("/batches?indexUids=test").await; | ||||
|     assert_eq!(code, 200); | ||||
|     assert_eq!(response["results"].as_array().unwrap().len(), 2); | ||||
| @@ -187,9 +187,6 @@ async fn list_batches_invalid_canceled_by_filter() { | ||||
|     let index = server.index("test"); | ||||
|     let (task, _status_code) = index.create(None).await; | ||||
|     index.wait_task(task.uid()).await.succeeded(); | ||||
|     index | ||||
|         .add_documents(serde_json::from_str(include_str!("../assets/test_set.json")).unwrap(), None) | ||||
|         .await; | ||||
|  | ||||
|     let (response, code) = index.filtered_batches(&[], &[], &["0"]).await; | ||||
|     assert_eq!(code, 200, "{}", response); | ||||
| @@ -202,9 +199,8 @@ async fn list_batches_status_and_type_filtered() { | ||||
|     let index = server.index("test"); | ||||
|     let (task, _status_code) = index.create(None).await; | ||||
|     index.wait_task(task.uid()).await.succeeded(); | ||||
|     index | ||||
|         .add_documents(serde_json::from_str(include_str!("../assets/test_set.json")).unwrap(), None) | ||||
|         .await; | ||||
|     let (task, _status_code) = index.update(Some("id")).await; | ||||
|     index.wait_task(task.uid()).await.succeeded(); | ||||
|  | ||||
|     let (response, code) = index.filtered_batches(&["indexCreation"], &["failed"], &[]).await; | ||||
|     assert_eq!(code, 200, "{}", response); | ||||
| @@ -212,7 +208,7 @@ async fn list_batches_status_and_type_filtered() { | ||||
|  | ||||
|     let (response, code) = index | ||||
|         .filtered_batches( | ||||
|             &["indexCreation", "documentAdditionOrUpdate"], | ||||
|             &["indexCreation", "IndexUpdate"], | ||||
|             &["succeeded", "processing", "enqueued"], | ||||
|             &[], | ||||
|         ) | ||||
|   | ||||
| @@ -88,6 +88,10 @@ impl Server<Owned> { | ||||
|         self.service.api_key = Some(api_key.as_ref().to_string()); | ||||
|     } | ||||
|  | ||||
|     pub fn clear_api_key(&mut self) { | ||||
|         self.service.api_key = None; | ||||
|     } | ||||
|  | ||||
|     /// Fetch and use the default admin key for nexts http requests. | ||||
|     pub async fn use_admin_key(&mut self, master_key: impl AsRef<str>) { | ||||
|         self.use_api_key(master_key); | ||||
| @@ -159,10 +163,18 @@ impl Server<Owned> { | ||||
|         self.service.get("/tasks").await | ||||
|     } | ||||
|  | ||||
|     pub async fn batches(&self) -> (Value, StatusCode) { | ||||
|         self.service.get("/batches").await | ||||
|     } | ||||
|  | ||||
|     pub async fn set_features(&self, value: Value) -> (Value, StatusCode) { | ||||
|         self.service.patch("/experimental-features", value).await | ||||
|     } | ||||
|  | ||||
|     pub async fn set_network(&self, value: Value) -> (Value, StatusCode) { | ||||
|         self.service.patch("/network", value).await | ||||
|     } | ||||
|  | ||||
|     pub async fn get_metrics(&self) -> (Value, StatusCode) { | ||||
|         self.service.get("/metrics").await | ||||
|     } | ||||
| @@ -408,6 +420,10 @@ impl<State> Server<State> { | ||||
|     pub async fn get_features(&self) -> (Value, StatusCode) { | ||||
|         self.service.get("/experimental-features").await | ||||
|     } | ||||
|  | ||||
|     pub async fn get_network(&self) -> (Value, StatusCode) { | ||||
|         self.service.get("/network").await | ||||
|     } | ||||
| } | ||||
|  | ||||
| pub fn default_settings(dir: impl AsRef<Path>) -> Opt { | ||||
|   | ||||
| @@ -1803,6 +1803,275 @@ async fn add_documents_with_geo_field() { | ||||
|       "finishedAt": "[date]" | ||||
|     } | ||||
|     "###); | ||||
|  | ||||
|     let (response, code) = index.get_all_documents(GetAllDocumentsOptions::default()).await; | ||||
|  | ||||
|     snapshot!(code, @"200 OK"); | ||||
|     snapshot!(json_string!(response, { ".duration" => "[duration]", ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]" }), | ||||
|     @r###" | ||||
|     { | ||||
|       "results": [ | ||||
|         { | ||||
|           "id": "1" | ||||
|         }, | ||||
|         { | ||||
|           "id": "2", | ||||
|           "_geo": null | ||||
|         }, | ||||
|         { | ||||
|           "id": "3", | ||||
|           "_geo": { | ||||
|             "lat": 1, | ||||
|             "lng": 1 | ||||
|           } | ||||
|         }, | ||||
|         { | ||||
|           "id": "4", | ||||
|           "_geo": { | ||||
|             "lat": "1", | ||||
|             "lng": "1" | ||||
|           } | ||||
|         } | ||||
|       ], | ||||
|       "offset": 0, | ||||
|       "limit": 20, | ||||
|       "total": 4 | ||||
|     } | ||||
|     "###); | ||||
|  | ||||
|     let (response, code) = index | ||||
|         .search_post(json!({"sort": ["_geoPoint(50.629973371633746,3.0569447399419567):desc"]})) | ||||
|         .await; | ||||
|     snapshot!(code, @"200 OK"); | ||||
|     // we are expecting docs 4 and 3 first as they have geo | ||||
|     snapshot!(json_string!(response, { ".processingTimeMs" => "[time]" }), | ||||
|     @r###" | ||||
|     { | ||||
|       "hits": [ | ||||
|         { | ||||
|           "id": "4", | ||||
|           "_geo": { | ||||
|             "lat": "1", | ||||
|             "lng": "1" | ||||
|           }, | ||||
|           "_geoDistance": 5522018 | ||||
|         }, | ||||
|         { | ||||
|           "id": "3", | ||||
|           "_geo": { | ||||
|             "lat": 1, | ||||
|             "lng": 1 | ||||
|           }, | ||||
|           "_geoDistance": 5522018 | ||||
|         }, | ||||
|         { | ||||
|           "id": "1" | ||||
|         }, | ||||
|         { | ||||
|           "id": "2", | ||||
|           "_geo": null | ||||
|         } | ||||
|       ], | ||||
|       "query": "", | ||||
|       "processingTimeMs": "[time]", | ||||
|       "limit": 20, | ||||
|       "offset": 0, | ||||
|       "estimatedTotalHits": 4 | ||||
|     } | ||||
|     "###); | ||||
| } | ||||
|  | ||||
| #[actix_rt::test] | ||||
| async fn update_documents_with_geo_field() { | ||||
|     let server = Server::new().await; | ||||
|     let index = server.index("doggo"); | ||||
|     index.update_settings(json!({"sortableAttributes": ["_geo"]})).await; | ||||
|  | ||||
|     let documents = json!([ | ||||
|         { | ||||
|             "id": "1", | ||||
|         }, | ||||
|         { | ||||
|             "id": "2", | ||||
|             "_geo": null, | ||||
|         }, | ||||
|         { | ||||
|             "id": "3", | ||||
|             "_geo": { "lat": 1, "lng": 1 }, | ||||
|         }, | ||||
|         { | ||||
|             "id": "4", | ||||
|             "_geo": { "lat": "1", "lng": "1" }, | ||||
|         }, | ||||
|     ]); | ||||
|  | ||||
|     let (task, _status_code) = index.add_documents(documents, None).await; | ||||
|     let response = index.wait_task(task.uid()).await; | ||||
|     snapshot!(json_string!(response, { ".duration" => "[duration]", ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]" }), | ||||
|         @r###" | ||||
|     { | ||||
|       "uid": 1, | ||||
|       "batchUid": 1, | ||||
|       "indexUid": "doggo", | ||||
|       "status": "succeeded", | ||||
|       "type": "documentAdditionOrUpdate", | ||||
|       "canceledBy": null, | ||||
|       "details": { | ||||
|         "receivedDocuments": 4, | ||||
|         "indexedDocuments": 4 | ||||
|       }, | ||||
|       "error": null, | ||||
|       "duration": "[duration]", | ||||
|       "enqueuedAt": "[date]", | ||||
|       "startedAt": "[date]", | ||||
|       "finishedAt": "[date]" | ||||
|     } | ||||
|     "###); | ||||
|  | ||||
|     let (response, code) = index | ||||
|         .search_post(json!({"sort": ["_geoPoint(50.629973371633746,3.0569447399419567):desc"]})) | ||||
|         .await; | ||||
|     snapshot!(code, @"200 OK"); | ||||
|     // we are expecting docs 4 and 3 first as they have geo | ||||
|     snapshot!(json_string!(response, { ".processingTimeMs" => "[time]" }), | ||||
|     @r###" | ||||
|     { | ||||
|       "hits": [ | ||||
|         { | ||||
|           "id": "4", | ||||
|           "_geo": { | ||||
|             "lat": "1", | ||||
|             "lng": "1" | ||||
|           }, | ||||
|           "_geoDistance": 5522018 | ||||
|         }, | ||||
|         { | ||||
|           "id": "3", | ||||
|           "_geo": { | ||||
|             "lat": 1, | ||||
|             "lng": 1 | ||||
|           }, | ||||
|           "_geoDistance": 5522018 | ||||
|         }, | ||||
|         { | ||||
|           "id": "1" | ||||
|         }, | ||||
|         { | ||||
|           "id": "2", | ||||
|           "_geo": null | ||||
|         } | ||||
|       ], | ||||
|       "query": "", | ||||
|       "processingTimeMs": "[time]", | ||||
|       "limit": 20, | ||||
|       "offset": 0, | ||||
|       "estimatedTotalHits": 4 | ||||
|     } | ||||
|     "###); | ||||
|  | ||||
|     let updated_documents = json!([{ | ||||
|       "id": "3", | ||||
|       "doggo": "kefir", | ||||
|     }]); | ||||
|     let (task, _status_code) = index.update_documents(updated_documents, None).await; | ||||
|     let response = index.wait_task(task.uid()).await; | ||||
|     snapshot!(json_string!(response, { ".duration" => "[duration]", ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]" }), | ||||
|         @r###" | ||||
|     { | ||||
|       "uid": 2, | ||||
|       "batchUid": 2, | ||||
|       "indexUid": "doggo", | ||||
|       "status": "succeeded", | ||||
|       "type": "documentAdditionOrUpdate", | ||||
|       "canceledBy": null, | ||||
|       "details": { | ||||
|         "receivedDocuments": 1, | ||||
|         "indexedDocuments": 1 | ||||
|       }, | ||||
|       "error": null, | ||||
|       "duration": "[duration]", | ||||
|       "enqueuedAt": "[date]", | ||||
|       "startedAt": "[date]", | ||||
|       "finishedAt": "[date]" | ||||
|     } | ||||
|     "###); | ||||
|     let (response, code) = index.get_all_documents(GetAllDocumentsOptions::default()).await; | ||||
|  | ||||
|     snapshot!(code, @"200 OK"); | ||||
|     snapshot!(json_string!(response, { ".duration" => "[duration]", ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]" }), | ||||
|     @r###" | ||||
|     { | ||||
|       "results": [ | ||||
|         { | ||||
|           "id": "1" | ||||
|         }, | ||||
|         { | ||||
|           "id": "2", | ||||
|           "_geo": null | ||||
|         }, | ||||
|         { | ||||
|           "id": "3", | ||||
|           "_geo": { | ||||
|             "lat": 1, | ||||
|             "lng": 1 | ||||
|           }, | ||||
|           "doggo": "kefir" | ||||
|         }, | ||||
|         { | ||||
|           "id": "4", | ||||
|           "_geo": { | ||||
|             "lat": "1", | ||||
|             "lng": "1" | ||||
|           } | ||||
|         } | ||||
|       ], | ||||
|       "offset": 0, | ||||
|       "limit": 20, | ||||
|       "total": 4 | ||||
|     } | ||||
|     "###); | ||||
|  | ||||
|     let (response, code) = index | ||||
|         .search_post(json!({"sort": ["_geoPoint(50.629973371633746,3.0569447399419567):desc"]})) | ||||
|         .await; | ||||
|     snapshot!(code, @"200 OK"); | ||||
|     // the search response should not have changed: we are expecting docs 4 and 3 first as they have geo | ||||
|     snapshot!(json_string!(response, { ".processingTimeMs" => "[time]" }), | ||||
|     @r###" | ||||
|     { | ||||
|       "hits": [ | ||||
|         { | ||||
|           "id": "4", | ||||
|           "_geo": { | ||||
|             "lat": "1", | ||||
|             "lng": "1" | ||||
|           }, | ||||
|           "_geoDistance": 5522018 | ||||
|         }, | ||||
|         { | ||||
|           "id": "3", | ||||
|           "_geo": { | ||||
|             "lat": 1, | ||||
|             "lng": 1 | ||||
|           }, | ||||
|           "doggo": "kefir", | ||||
|           "_geoDistance": 5522018 | ||||
|         }, | ||||
|         { | ||||
|           "id": "1" | ||||
|         }, | ||||
|         { | ||||
|           "id": "2", | ||||
|           "_geo": null | ||||
|         } | ||||
|       ], | ||||
|       "query": "", | ||||
|       "processingTimeMs": "[time]", | ||||
|       "limit": 20, | ||||
|       "offset": 0, | ||||
|       "estimatedTotalHits": 4 | ||||
|     } | ||||
|     "###); | ||||
| } | ||||
|  | ||||
| #[actix_rt::test] | ||||
|   | ||||
| @@ -161,6 +161,8 @@ async fn delete_document_by_filter() { | ||||
|     { | ||||
|       "numberOfDocuments": 4, | ||||
|       "isIndexing": false, | ||||
|       "numberOfEmbeddings": 0, | ||||
|       "numberOfEmbeddedDocuments": 0, | ||||
|       "fieldDistribution": { | ||||
|         "color": 3, | ||||
|         "id": 4 | ||||
| @@ -208,6 +210,8 @@ async fn delete_document_by_filter() { | ||||
|     { | ||||
|       "numberOfDocuments": 2, | ||||
|       "isIndexing": false, | ||||
|       "numberOfEmbeddings": 0, | ||||
|       "numberOfEmbeddedDocuments": 0, | ||||
|       "fieldDistribution": { | ||||
|         "color": 1, | ||||
|         "id": 2 | ||||
| @@ -274,6 +278,8 @@ async fn delete_document_by_filter() { | ||||
|     { | ||||
|       "numberOfDocuments": 1, | ||||
|       "isIndexing": false, | ||||
|       "numberOfEmbeddings": 0, | ||||
|       "numberOfEmbeddedDocuments": 0, | ||||
|       "fieldDistribution": { | ||||
|         "color": 1, | ||||
|         "id": 1 | ||||
|   | ||||
| @@ -22,6 +22,7 @@ pub enum GetDump { | ||||
|     TestV5, | ||||
|  | ||||
|     TestV6WithExperimental, | ||||
|     TestV6WithBatchesAndEnqueuedTasks, | ||||
| } | ||||
|  | ||||
| impl GetDump { | ||||
| @@ -74,6 +75,10 @@ impl GetDump { | ||||
|                 "tests/assets/v6_v1.6.0_use_deactivated_experimental_setting.dump" | ||||
|             ) | ||||
|             .into(), | ||||
|             GetDump::TestV6WithBatchesAndEnqueuedTasks => { | ||||
|                 exist_relative_path!("tests/assets/v6_v1.13.0_batches_and_enqueued_tasks.dump") | ||||
|                     .into() | ||||
|             } | ||||
|         } | ||||
|     } | ||||
| } | ||||
|   | ||||
| @@ -27,9 +27,24 @@ async fn import_dump_v1_movie_raw() { | ||||
|  | ||||
|     let (stats, code) = index.stats().await; | ||||
|     snapshot!(code, @"200 OK"); | ||||
|     assert_eq!( | ||||
|         stats, | ||||
|         json!({ "numberOfDocuments": 53, "isIndexing": false, "fieldDistribution": {"genres": 53, "id": 53, "overview": 53, "poster": 53, "release_date": 53, "title": 53 }}) | ||||
|     snapshot!( | ||||
|       json_string!(stats), | ||||
|       @r###" | ||||
|     { | ||||
|       "numberOfDocuments": 53, | ||||
|       "isIndexing": false, | ||||
|       "numberOfEmbeddings": 0, | ||||
|       "numberOfEmbeddedDocuments": 0, | ||||
|       "fieldDistribution": { | ||||
|         "genres": 53, | ||||
|         "id": 53, | ||||
|         "overview": 53, | ||||
|         "poster": 53, | ||||
|         "release_date": 53, | ||||
|         "title": 53 | ||||
|       } | ||||
|     } | ||||
|     "### | ||||
|     ); | ||||
|  | ||||
|     let (settings, code) = index.settings().await; | ||||
| @@ -173,6 +188,8 @@ async fn import_dump_v1_movie_with_settings() { | ||||
|     { | ||||
|       "numberOfDocuments": 53, | ||||
|       "isIndexing": false, | ||||
|       "numberOfEmbeddings": 0, | ||||
|       "numberOfEmbeddedDocuments": 0, | ||||
|       "fieldDistribution": { | ||||
|         "genres": 53, | ||||
|         "id": 53, | ||||
| @@ -333,9 +350,24 @@ async fn import_dump_v1_rubygems_with_settings() { | ||||
|  | ||||
|     let (stats, code) = index.stats().await; | ||||
|     snapshot!(code, @"200 OK"); | ||||
|     assert_eq!( | ||||
|         stats, | ||||
|         json!({ "numberOfDocuments": 53, "isIndexing": false, "fieldDistribution": {"description": 53, "id": 53, "name": 53, "summary": 53, "total_downloads": 53, "version": 53 }}) | ||||
|     snapshot!( | ||||
|       json_string!(stats), | ||||
|       @r###" | ||||
|     { | ||||
|       "numberOfDocuments": 53, | ||||
|       "isIndexing": false, | ||||
|       "numberOfEmbeddings": 0, | ||||
|       "numberOfEmbeddedDocuments": 0, | ||||
|       "fieldDistribution": { | ||||
|         "description": 53, | ||||
|         "id": 53, | ||||
|         "name": 53, | ||||
|         "summary": 53, | ||||
|         "total_downloads": 53, | ||||
|         "version": 53 | ||||
|       } | ||||
|     } | ||||
|     "### | ||||
|     ); | ||||
|  | ||||
|     let (settings, code) = index.settings().await; | ||||
| @@ -483,9 +515,24 @@ async fn import_dump_v2_movie_raw() { | ||||
|  | ||||
|     let (stats, code) = index.stats().await; | ||||
|     snapshot!(code, @"200 OK"); | ||||
|     assert_eq!( | ||||
|         stats, | ||||
|         json!({ "numberOfDocuments": 53, "isIndexing": false, "fieldDistribution": {"genres": 53, "id": 53, "overview": 53, "poster": 53, "release_date": 53, "title": 53 }}) | ||||
|     snapshot!( | ||||
|       json_string!(stats), | ||||
|       @r###" | ||||
|     { | ||||
|       "numberOfDocuments": 53, | ||||
|       "isIndexing": false, | ||||
|       "numberOfEmbeddings": 0, | ||||
|       "numberOfEmbeddedDocuments": 0, | ||||
|       "fieldDistribution": { | ||||
|         "genres": 53, | ||||
|         "id": 53, | ||||
|         "overview": 53, | ||||
|         "poster": 53, | ||||
|         "release_date": 53, | ||||
|         "title": 53 | ||||
|       } | ||||
|     } | ||||
|     "### | ||||
|     ); | ||||
|  | ||||
|     let (settings, code) = index.settings().await; | ||||
| @@ -623,9 +670,24 @@ async fn import_dump_v2_movie_with_settings() { | ||||
|  | ||||
|     let (stats, code) = index.stats().await; | ||||
|     snapshot!(code, @"200 OK"); | ||||
|     assert_eq!( | ||||
|         stats, | ||||
|         json!({ "numberOfDocuments": 53, "isIndexing": false, "fieldDistribution": {"genres": 53, "id": 53, "overview": 53, "poster": 53, "release_date": 53, "title": 53 }}) | ||||
|     snapshot!( | ||||
|       json_string!(stats), | ||||
|       @r###" | ||||
|     { | ||||
|       "numberOfDocuments": 53, | ||||
|       "isIndexing": false, | ||||
|       "numberOfEmbeddings": 0, | ||||
|       "numberOfEmbeddedDocuments": 0, | ||||
|       "fieldDistribution": { | ||||
|         "genres": 53, | ||||
|         "id": 53, | ||||
|         "overview": 53, | ||||
|         "poster": 53, | ||||
|         "release_date": 53, | ||||
|         "title": 53 | ||||
|       } | ||||
|     } | ||||
|     "### | ||||
|     ); | ||||
|  | ||||
|     let (settings, code) = index.settings().await; | ||||
| @@ -773,9 +835,24 @@ async fn import_dump_v2_rubygems_with_settings() { | ||||
|  | ||||
|     let (stats, code) = index.stats().await; | ||||
|     snapshot!(code, @"200 OK"); | ||||
|     assert_eq!( | ||||
|         stats, | ||||
|         json!({ "numberOfDocuments": 53, "isIndexing": false, "fieldDistribution": {"description": 53, "id": 53, "name": 53, "summary": 53, "total_downloads": 53, "version": 53 }}) | ||||
|     snapshot!( | ||||
|       json_string!(stats), | ||||
|       @r###" | ||||
|     { | ||||
|       "numberOfDocuments": 53, | ||||
|       "isIndexing": false, | ||||
|       "numberOfEmbeddings": 0, | ||||
|       "numberOfEmbeddedDocuments": 0, | ||||
|       "fieldDistribution": { | ||||
|         "description": 53, | ||||
|         "id": 53, | ||||
|         "name": 53, | ||||
|         "summary": 53, | ||||
|         "total_downloads": 53, | ||||
|         "version": 53 | ||||
|       } | ||||
|     } | ||||
|     "### | ||||
|     ); | ||||
|  | ||||
|     let (settings, code) = index.settings().await; | ||||
| @@ -920,9 +997,24 @@ async fn import_dump_v3_movie_raw() { | ||||
|  | ||||
|     let (stats, code) = index.stats().await; | ||||
|     snapshot!(code, @"200 OK"); | ||||
|     assert_eq!( | ||||
|         stats, | ||||
|         json!({ "numberOfDocuments": 53, "isIndexing": false, "fieldDistribution": {"genres": 53, "id": 53, "overview": 53, "poster": 53, "release_date": 53, "title": 53 }}) | ||||
|     snapshot!( | ||||
|       json_string!(stats), | ||||
|       @r###" | ||||
|     { | ||||
|       "numberOfDocuments": 53, | ||||
|       "isIndexing": false, | ||||
|       "numberOfEmbeddings": 0, | ||||
|       "numberOfEmbeddedDocuments": 0, | ||||
|       "fieldDistribution": { | ||||
|         "genres": 53, | ||||
|         "id": 53, | ||||
|         "overview": 53, | ||||
|         "poster": 53, | ||||
|         "release_date": 53, | ||||
|         "title": 53 | ||||
|       } | ||||
|     } | ||||
|     "### | ||||
|     ); | ||||
|  | ||||
|     let (settings, code) = index.settings().await; | ||||
| @@ -1060,9 +1152,24 @@ async fn import_dump_v3_movie_with_settings() { | ||||
|  | ||||
|     let (stats, code) = index.stats().await; | ||||
|     snapshot!(code, @"200 OK"); | ||||
|     assert_eq!( | ||||
|         stats, | ||||
|         json!({ "numberOfDocuments": 53, "isIndexing": false, "fieldDistribution": {"genres": 53, "id": 53, "overview": 53, "poster": 53, "release_date": 53, "title": 53 }}) | ||||
|     snapshot!( | ||||
|       json_string!(stats), | ||||
|       @r###" | ||||
|     { | ||||
|       "numberOfDocuments": 53, | ||||
|       "isIndexing": false, | ||||
|       "numberOfEmbeddings": 0, | ||||
|       "numberOfEmbeddedDocuments": 0, | ||||
|       "fieldDistribution": { | ||||
|         "genres": 53, | ||||
|         "id": 53, | ||||
|         "overview": 53, | ||||
|         "poster": 53, | ||||
|         "release_date": 53, | ||||
|         "title": 53 | ||||
|       } | ||||
|     } | ||||
|     "### | ||||
|     ); | ||||
|  | ||||
|     let (settings, code) = index.settings().await; | ||||
| @@ -1210,9 +1317,24 @@ async fn import_dump_v3_rubygems_with_settings() { | ||||
|  | ||||
|     let (stats, code) = index.stats().await; | ||||
|     snapshot!(code, @"200 OK"); | ||||
|     assert_eq!( | ||||
|         stats, | ||||
|         json!({ "numberOfDocuments": 53, "isIndexing": false, "fieldDistribution": {"description": 53, "id": 53, "name": 53, "summary": 53, "total_downloads": 53, "version": 53 }}) | ||||
|     snapshot!( | ||||
|       json_string!(stats), | ||||
|       @r###" | ||||
|     { | ||||
|       "numberOfDocuments": 53, | ||||
|       "isIndexing": false, | ||||
|       "numberOfEmbeddings": 0, | ||||
|       "numberOfEmbeddedDocuments": 0, | ||||
|       "fieldDistribution": { | ||||
|         "description": 53, | ||||
|         "id": 53, | ||||
|         "name": 53, | ||||
|         "summary": 53, | ||||
|         "total_downloads": 53, | ||||
|         "version": 53 | ||||
|       } | ||||
|     } | ||||
|     "### | ||||
|     ); | ||||
|  | ||||
|     let (settings, code) = index.settings().await; | ||||
| @@ -1357,9 +1479,24 @@ async fn import_dump_v4_movie_raw() { | ||||
|  | ||||
|     let (stats, code) = index.stats().await; | ||||
|     snapshot!(code, @"200 OK"); | ||||
|     assert_eq!( | ||||
|         stats, | ||||
|         json!({ "numberOfDocuments": 53, "isIndexing": false, "fieldDistribution": {"genres": 53, "id": 53, "overview": 53, "poster": 53, "release_date": 53, "title": 53 }}) | ||||
|     snapshot!( | ||||
|       json_string!(stats), | ||||
|       @r###" | ||||
|     { | ||||
|       "numberOfDocuments": 53, | ||||
|       "isIndexing": false, | ||||
|       "numberOfEmbeddings": 0, | ||||
|       "numberOfEmbeddedDocuments": 0, | ||||
|       "fieldDistribution": { | ||||
|         "genres": 53, | ||||
|         "id": 53, | ||||
|         "overview": 53, | ||||
|         "poster": 53, | ||||
|         "release_date": 53, | ||||
|         "title": 53 | ||||
|       } | ||||
|     } | ||||
|     "### | ||||
|     ); | ||||
|  | ||||
|     let (settings, code) = index.settings().await; | ||||
| @@ -1497,9 +1634,24 @@ async fn import_dump_v4_movie_with_settings() { | ||||
|  | ||||
|     let (stats, code) = index.stats().await; | ||||
|     snapshot!(code, @"200 OK"); | ||||
|     assert_eq!( | ||||
|         stats, | ||||
|         json!({ "numberOfDocuments": 53, "isIndexing": false, "fieldDistribution": {"genres": 53, "id": 53, "overview": 53, "poster": 53, "release_date": 53, "title": 53 }}) | ||||
|     snapshot!( | ||||
|       json_string!(stats), | ||||
|       @r###" | ||||
|     { | ||||
|       "numberOfDocuments": 53, | ||||
|       "isIndexing": false, | ||||
|       "numberOfEmbeddings": 0, | ||||
|       "numberOfEmbeddedDocuments": 0, | ||||
|       "fieldDistribution": { | ||||
|         "genres": 53, | ||||
|         "id": 53, | ||||
|         "overview": 53, | ||||
|         "poster": 53, | ||||
|         "release_date": 53, | ||||
|         "title": 53 | ||||
|       } | ||||
|     } | ||||
|     "### | ||||
|     ); | ||||
|  | ||||
|     let (settings, code) = index.settings().await; | ||||
| @@ -1647,9 +1799,24 @@ async fn import_dump_v4_rubygems_with_settings() { | ||||
|  | ||||
|     let (stats, code) = index.stats().await; | ||||
|     snapshot!(code, @"200 OK"); | ||||
|     assert_eq!( | ||||
|         stats, | ||||
|         json!({ "numberOfDocuments": 53, "isIndexing": false, "fieldDistribution": {"description": 53, "id": 53, "name": 53, "summary": 53, "total_downloads": 53, "version": 53 }}) | ||||
|     snapshot!( | ||||
|       json_string!(stats), | ||||
|       @r###" | ||||
|     { | ||||
|       "numberOfDocuments": 53, | ||||
|       "isIndexing": false, | ||||
|       "numberOfEmbeddings": 0, | ||||
|       "numberOfEmbeddedDocuments": 0, | ||||
|       "fieldDistribution": { | ||||
|         "description": 53, | ||||
|         "id": 53, | ||||
|         "name": 53, | ||||
|         "summary": 53, | ||||
|         "total_downloads": 53, | ||||
|         "version": 53 | ||||
|       } | ||||
|     } | ||||
|     "### | ||||
|     ); | ||||
|  | ||||
|     let (settings, code) = index.settings().await; | ||||
| @@ -1798,33 +1965,35 @@ async fn import_dump_v5() { | ||||
|         server.wait_task(task["uid"].as_u64().unwrap()).await; | ||||
|     } | ||||
|  | ||||
|     let expected_stats = json!({ | ||||
|         "numberOfDocuments": 10, | ||||
|         "isIndexing": false, | ||||
|         "fieldDistribution": { | ||||
|             "cast": 10, | ||||
|             "director": 10, | ||||
|             "genres": 10, | ||||
|             "id": 10, | ||||
|             "overview": 10, | ||||
|             "popularity": 10, | ||||
|             "poster_path": 10, | ||||
|             "producer": 10, | ||||
|             "production_companies": 10, | ||||
|             "release_date": 10, | ||||
|             "tagline": 10, | ||||
|             "title": 10, | ||||
|             "vote_average": 10, | ||||
|             "vote_count": 10 | ||||
|         } | ||||
|     }); | ||||
|  | ||||
|     let index1 = server.index("test"); | ||||
|     let index2 = server.index("test2"); | ||||
|  | ||||
|     let (stats, code) = index1.stats().await; | ||||
|     snapshot!(code, @"200 OK"); | ||||
|     assert_eq!(stats, expected_stats); | ||||
|     snapshot!(json_string!(stats), @r###" | ||||
|     { | ||||
|       "numberOfDocuments": 10, | ||||
|       "isIndexing": false, | ||||
|       "numberOfEmbeddings": 0, | ||||
|       "numberOfEmbeddedDocuments": 0, | ||||
|       "fieldDistribution": { | ||||
|         "cast": 10, | ||||
|         "director": 10, | ||||
|         "genres": 10, | ||||
|         "id": 10, | ||||
|         "overview": 10, | ||||
|         "popularity": 10, | ||||
|         "poster_path": 10, | ||||
|         "producer": 10, | ||||
|         "production_companies": 10, | ||||
|         "release_date": 10, | ||||
|         "tagline": 10, | ||||
|         "title": 10, | ||||
|         "vote_average": 10, | ||||
|         "vote_count": 10 | ||||
|       } | ||||
|     } | ||||
|     "###); | ||||
|  | ||||
|     let (docs, code) = index2.get_all_documents(GetAllDocumentsOptions::default()).await; | ||||
|     snapshot!(code, @"200 OK"); | ||||
| @@ -1835,7 +2004,32 @@ async fn import_dump_v5() { | ||||
|  | ||||
|     let (stats, code) = index2.stats().await; | ||||
|     snapshot!(code, @"200 OK"); | ||||
|     assert_eq!(stats, expected_stats); | ||||
|     snapshot!( | ||||
|       json_string!(stats), | ||||
|       @r###" | ||||
|     { | ||||
|       "numberOfDocuments": 10, | ||||
|       "isIndexing": false, | ||||
|       "numberOfEmbeddings": 0, | ||||
|       "numberOfEmbeddedDocuments": 0, | ||||
|       "fieldDistribution": { | ||||
|         "cast": 10, | ||||
|         "director": 10, | ||||
|         "genres": 10, | ||||
|         "id": 10, | ||||
|         "overview": 10, | ||||
|         "popularity": 10, | ||||
|         "poster_path": 10, | ||||
|         "producer": 10, | ||||
|         "production_companies": 10, | ||||
|         "release_date": 10, | ||||
|         "tagline": 10, | ||||
|         "title": 10, | ||||
|         "vote_average": 10, | ||||
|         "vote_count": 10 | ||||
|       } | ||||
|     } | ||||
|     "###); | ||||
|  | ||||
|     let (keys, code) = server.list_api_keys("").await; | ||||
|     snapshot!(code, @"200 OK"); | ||||
| @@ -1908,7 +2102,9 @@ async fn import_dump_v6_containing_experimental_features() { | ||||
|       "metrics": false, | ||||
|       "logsRoute": false, | ||||
|       "editDocumentsByFunction": false, | ||||
|       "containsFilter": false | ||||
|       "containsFilter": false, | ||||
|       "network": false, | ||||
|       "getTaskDocumentsRoute": false | ||||
|     } | ||||
|     "###); | ||||
|  | ||||
| @@ -1992,6 +2188,63 @@ async fn import_dump_v6_containing_experimental_features() { | ||||
|         .await; | ||||
| } | ||||
|  | ||||
| #[actix_rt::test] | ||||
| async fn import_dump_v6_containing_batches_and_enqueued_tasks() { | ||||
|     let temp = tempfile::tempdir().unwrap(); | ||||
|  | ||||
|     let options = Opt { | ||||
|         import_dump: Some(GetDump::TestV6WithBatchesAndEnqueuedTasks.path()), | ||||
|         ..default_settings(temp.path()) | ||||
|     }; | ||||
|     let mut server = Server::new_auth_with_options(options, temp).await; | ||||
|     server.use_api_key("MASTER_KEY"); | ||||
|     server.wait_task(2).await.succeeded(); | ||||
|     let (tasks, _) = server.tasks().await; | ||||
|     snapshot!(json_string!(tasks, { ".results[1].startedAt" => "[date]", ".results[1].finishedAt" => "[date]", ".results[1].duration" => "[date]" }), name: "tasks"); | ||||
|     let (batches, _) = server.batches().await; | ||||
|     snapshot!(json_string!(batches, { ".results[0].startedAt" => "[date]", ".results[0].finishedAt" => "[date]", ".results[0].duration" => "[date]" }), name: "batches"); | ||||
|  | ||||
|     let (indexes, code) = server.list_indexes(None, None).await; | ||||
|     assert_eq!(code, 200, "{indexes}"); | ||||
|  | ||||
|     assert_eq!(indexes["results"].as_array().unwrap().len(), 1); | ||||
|     assert_eq!(indexes["results"][0]["uid"], json!("kefir")); | ||||
|     assert_eq!(indexes["results"][0]["primaryKey"], json!("id")); | ||||
|  | ||||
|     let (response, code) = server.get_features().await; | ||||
|     meili_snap::snapshot!(code, @"200 OK"); | ||||
|     meili_snap::snapshot!(meili_snap::json_string!(response), @r###" | ||||
|     { | ||||
|       "metrics": false, | ||||
|       "logsRoute": false, | ||||
|       "editDocumentsByFunction": false, | ||||
|       "containsFilter": false, | ||||
|       "network": false, | ||||
|       "getTaskDocumentsRoute": false | ||||
|     } | ||||
|     "###); | ||||
|  | ||||
|     let index = server.index("kefir"); | ||||
|     let (documents, _) = index.get_all_documents_raw("").await; | ||||
|     snapshot!(documents, @r#" | ||||
|     { | ||||
|       "results": [ | ||||
|         { | ||||
|           "id": 1, | ||||
|           "dog": "kefir" | ||||
|         }, | ||||
|         { | ||||
|           "id": 2, | ||||
|           "dog": "intel" | ||||
|         } | ||||
|       ], | ||||
|       "offset": 0, | ||||
|       "limit": 20, | ||||
|       "total": 2 | ||||
|     } | ||||
|     "#); | ||||
| } | ||||
|  | ||||
| // In this test we must generate the dump ourselves to ensure the | ||||
| // `user provided` vectors are well set | ||||
| #[actix_rt::test] | ||||
| @@ -2069,7 +2322,9 @@ async fn generate_and_import_dump_containing_vectors() { | ||||
|       "metrics": false, | ||||
|       "logsRoute": false, | ||||
|       "editDocumentsByFunction": false, | ||||
|       "containsFilter": false | ||||
|       "containsFilter": false, | ||||
|       "network": false, | ||||
|       "getTaskDocumentsRoute": false | ||||
|     } | ||||
|     "###); | ||||
|  | ||||
|   | ||||
| @@ -0,0 +1,78 @@ | ||||
| --- | ||||
| source: crates/meilisearch/tests/dumps/mod.rs | ||||
| snapshot_kind: text | ||||
| --- | ||||
| { | ||||
|   "results": [ | ||||
|     { | ||||
|       "uid": 2, | ||||
|       "progress": null, | ||||
|       "details": { | ||||
|         "receivedDocuments": 1, | ||||
|         "indexedDocuments": 1 | ||||
|       }, | ||||
|       "stats": { | ||||
|         "totalNbTasks": 1, | ||||
|         "status": { | ||||
|           "succeeded": 1 | ||||
|         }, | ||||
|         "types": { | ||||
|           "documentAdditionOrUpdate": 1 | ||||
|         }, | ||||
|         "indexUids": { | ||||
|           "kefir": 1 | ||||
|         } | ||||
|       }, | ||||
|       "duration": "[date]", | ||||
|       "startedAt": "[date]", | ||||
|       "finishedAt": "[date]" | ||||
|     }, | ||||
|     { | ||||
|       "uid": 1, | ||||
|       "progress": null, | ||||
|       "details": { | ||||
|         "receivedDocuments": 1, | ||||
|         "indexedDocuments": 1 | ||||
|       }, | ||||
|       "stats": { | ||||
|         "totalNbTasks": 1, | ||||
|         "status": { | ||||
|           "succeeded": 1 | ||||
|         }, | ||||
|         "types": { | ||||
|           "documentAdditionOrUpdate": 1 | ||||
|         }, | ||||
|         "indexUids": { | ||||
|           "kefir": 1 | ||||
|         } | ||||
|       }, | ||||
|       "duration": "PT0.144827890S", | ||||
|       "startedAt": "2025-02-04T10:15:21.275640274Z", | ||||
|       "finishedAt": "2025-02-04T10:15:21.420468164Z" | ||||
|     }, | ||||
|     { | ||||
|       "uid": 0, | ||||
|       "progress": null, | ||||
|       "details": {}, | ||||
|       "stats": { | ||||
|         "totalNbTasks": 1, | ||||
|         "status": { | ||||
|           "succeeded": 1 | ||||
|         }, | ||||
|         "types": { | ||||
|           "indexCreation": 1 | ||||
|         }, | ||||
|         "indexUids": { | ||||
|           "kefir": 1 | ||||
|         } | ||||
|       }, | ||||
|       "duration": "PT0.032902186S", | ||||
|       "startedAt": "2025-02-04T10:14:43.559526162Z", | ||||
|       "finishedAt": "2025-02-04T10:14:43.592428348Z" | ||||
|     } | ||||
|   ], | ||||
|   "total": 3, | ||||
|   "limit": 20, | ||||
|   "from": 2, | ||||
|   "next": null | ||||
| } | ||||
| @@ -0,0 +1,78 @@ | ||||
| --- | ||||
| source: crates/meilisearch/tests/dumps/mod.rs | ||||
| snapshot_kind: text | ||||
| --- | ||||
| { | ||||
|   "results": [ | ||||
|     { | ||||
|       "uid": 3, | ||||
|       "batchUid": null, | ||||
|       "indexUid": null, | ||||
|       "status": "succeeded", | ||||
|       "type": "dumpCreation", | ||||
|       "canceledBy": null, | ||||
|       "details": { | ||||
|         "dumpUid": null | ||||
|       }, | ||||
|       "error": null, | ||||
|       "duration": "PT0.000629059S", | ||||
|       "enqueuedAt": "2025-02-04T10:22:31.318175268Z", | ||||
|       "startedAt": "2025-02-04T10:22:31.331701375Z", | ||||
|       "finishedAt": "2025-02-04T10:22:31.332330434Z" | ||||
|     }, | ||||
|     { | ||||
|       "uid": 2, | ||||
|       "batchUid": 2, | ||||
|       "indexUid": "kefir", | ||||
|       "status": "succeeded", | ||||
|       "type": "documentAdditionOrUpdate", | ||||
|       "canceledBy": null, | ||||
|       "details": { | ||||
|         "receivedDocuments": 1, | ||||
|         "indexedDocuments": 1 | ||||
|       }, | ||||
|       "error": null, | ||||
|       "duration": "[date]", | ||||
|       "enqueuedAt": "2025-02-04T10:15:49.212484063Z", | ||||
|       "startedAt": "[date]", | ||||
|       "finishedAt": "[date]" | ||||
|     }, | ||||
|     { | ||||
|       "uid": 1, | ||||
|       "batchUid": null, | ||||
|       "indexUid": "kefir", | ||||
|       "status": "succeeded", | ||||
|       "type": "documentAdditionOrUpdate", | ||||
|       "canceledBy": null, | ||||
|       "details": { | ||||
|         "receivedDocuments": 1, | ||||
|         "indexedDocuments": 1 | ||||
|       }, | ||||
|       "error": null, | ||||
|       "duration": "PT0.144827890S", | ||||
|       "enqueuedAt": "2025-02-04T10:15:21.258630973Z", | ||||
|       "startedAt": "2025-02-04T10:15:21.275640274Z", | ||||
|       "finishedAt": "2025-02-04T10:15:21.420468164Z" | ||||
|     }, | ||||
|     { | ||||
|       "uid": 0, | ||||
|       "batchUid": null, | ||||
|       "indexUid": "kefir", | ||||
|       "status": "succeeded", | ||||
|       "type": "indexCreation", | ||||
|       "canceledBy": null, | ||||
|       "details": { | ||||
|         "primaryKey": null | ||||
|       }, | ||||
|       "error": null, | ||||
|       "duration": "PT0.032902186S", | ||||
|       "enqueuedAt": "2025-02-04T10:14:43.550379968Z", | ||||
|       "startedAt": "2025-02-04T10:14:43.559526162Z", | ||||
|       "finishedAt": "2025-02-04T10:14:43.592428348Z" | ||||
|     } | ||||
|   ], | ||||
|   "total": 4, | ||||
|   "limit": 20, | ||||
|   "from": 3, | ||||
|   "next": null | ||||
| } | ||||
| @@ -21,7 +21,9 @@ async fn experimental_features() { | ||||
|       "metrics": false, | ||||
|       "logsRoute": false, | ||||
|       "editDocumentsByFunction": false, | ||||
|       "containsFilter": false | ||||
|       "containsFilter": false, | ||||
|       "network": false, | ||||
|       "getTaskDocumentsRoute": false | ||||
|     } | ||||
|     "###); | ||||
|  | ||||
| @@ -33,7 +35,9 @@ async fn experimental_features() { | ||||
|       "metrics": true, | ||||
|       "logsRoute": false, | ||||
|       "editDocumentsByFunction": false, | ||||
|       "containsFilter": false | ||||
|       "containsFilter": false, | ||||
|       "network": false, | ||||
|       "getTaskDocumentsRoute": false | ||||
|     } | ||||
|     "###); | ||||
|  | ||||
| @@ -45,7 +49,9 @@ async fn experimental_features() { | ||||
|       "metrics": true, | ||||
|       "logsRoute": false, | ||||
|       "editDocumentsByFunction": false, | ||||
|       "containsFilter": false | ||||
|       "containsFilter": false, | ||||
|       "network": false, | ||||
|       "getTaskDocumentsRoute": false | ||||
|     } | ||||
|     "###); | ||||
|  | ||||
| @@ -58,7 +64,9 @@ async fn experimental_features() { | ||||
|       "metrics": true, | ||||
|       "logsRoute": false, | ||||
|       "editDocumentsByFunction": false, | ||||
|       "containsFilter": false | ||||
|       "containsFilter": false, | ||||
|       "network": false, | ||||
|       "getTaskDocumentsRoute": false | ||||
|     } | ||||
|     "###); | ||||
|  | ||||
| @@ -71,7 +79,9 @@ async fn experimental_features() { | ||||
|       "metrics": true, | ||||
|       "logsRoute": false, | ||||
|       "editDocumentsByFunction": false, | ||||
|       "containsFilter": false | ||||
|       "containsFilter": false, | ||||
|       "network": false, | ||||
|       "getTaskDocumentsRoute": false | ||||
|     } | ||||
|     "###); | ||||
| } | ||||
| @@ -91,7 +101,9 @@ async fn experimental_feature_metrics() { | ||||
|       "metrics": true, | ||||
|       "logsRoute": false, | ||||
|       "editDocumentsByFunction": false, | ||||
|       "containsFilter": false | ||||
|       "containsFilter": false, | ||||
|       "network": false, | ||||
|       "getTaskDocumentsRoute": false | ||||
|     } | ||||
|     "###); | ||||
|  | ||||
| @@ -146,7 +158,7 @@ async fn errors() { | ||||
|     meili_snap::snapshot!(code, @"400 Bad Request"); | ||||
|     meili_snap::snapshot!(meili_snap::json_string!(response), @r###" | ||||
|     { | ||||
|       "message": "Unknown field `NotAFeature`: expected one of `metrics`, `logsRoute`, `editDocumentsByFunction`, `containsFilter`", | ||||
|       "message": "Unknown field `NotAFeature`: expected one of `metrics`, `logsRoute`, `editDocumentsByFunction`, `containsFilter`, `network`, `getTaskDocumentsRoute`", | ||||
|       "code": "bad_request", | ||||
|       "type": "invalid_request", | ||||
|       "link": "https://docs.meilisearch.com/errors#bad_request" | ||||
|   | ||||
| @@ -7,6 +7,7 @@ mod dumps; | ||||
| mod features; | ||||
| mod index; | ||||
| mod logs; | ||||
| mod network; | ||||
| mod search; | ||||
| mod settings; | ||||
| mod similar; | ||||
|   | ||||
							
								
								
									
										606
									
								
								crates/meilisearch/tests/network/mod.rs
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										606
									
								
								crates/meilisearch/tests/network/mod.rs
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,606 @@ | ||||
| use serde_json::Value::Null; | ||||
|  | ||||
| use crate::common::Server; | ||||
| use crate::json; | ||||
|  | ||||
| #[actix_rt::test] | ||||
| async fn error_network_not_enabled() { | ||||
|     let server = Server::new().await; | ||||
|  | ||||
|     let (response, code) = server.get_network().await; | ||||
|  | ||||
|     meili_snap::snapshot!(code, @"400 Bad Request"); | ||||
|     meili_snap::snapshot!(meili_snap::json_string!(response), @r###" | ||||
|     { | ||||
|       "message": "Using the /network route requires enabling the `network` experimental feature. See https://github.com/orgs/meilisearch/discussions/805", | ||||
|       "code": "feature_not_enabled", | ||||
|       "type": "invalid_request", | ||||
|       "link": "https://docs.meilisearch.com/errors#feature_not_enabled" | ||||
|     } | ||||
|     "###); | ||||
|  | ||||
|     let (response, code) = server.set_network(json!({"self": "myself"})).await; | ||||
|  | ||||
|     meili_snap::snapshot!(code, @"400 Bad Request"); | ||||
|     meili_snap::snapshot!(meili_snap::json_string!(response), @r###" | ||||
|     { | ||||
|       "message": "Using the /network route requires enabling the `network` experimental feature. See https://github.com/orgs/meilisearch/discussions/805", | ||||
|       "code": "feature_not_enabled", | ||||
|       "type": "invalid_request", | ||||
|       "link": "https://docs.meilisearch.com/errors#feature_not_enabled" | ||||
|     } | ||||
|     "###); | ||||
| } | ||||
|  | ||||
| #[actix_rt::test] | ||||
| async fn errors_on_param() { | ||||
|     let server = Server::new().await; | ||||
|  | ||||
|     let (response, code) = server.set_features(json!({"network": true})).await; | ||||
|     meili_snap::snapshot!(code, @"200 OK"); | ||||
|     meili_snap::snapshot!(meili_snap::json_string!(response["network"]), @r#"true"#); | ||||
|  | ||||
|     // non-existing param | ||||
|     let (response, code) = server.set_network(json!({"selfie": "myself"})).await; | ||||
|  | ||||
|     meili_snap::snapshot!(code, @"400 Bad Request"); | ||||
|     meili_snap::snapshot!(meili_snap::json_string!(response), @r###" | ||||
|     { | ||||
|       "message": "Unknown field `selfie`: expected one of `remotes`, `self`", | ||||
|       "code": "bad_request", | ||||
|       "type": "invalid_request", | ||||
|       "link": "https://docs.meilisearch.com/errors#bad_request" | ||||
|     } | ||||
|     "###); | ||||
|  | ||||
|     // self not a string | ||||
|     let (response, code) = server.set_network(json!({"self": 42})).await; | ||||
|  | ||||
|     meili_snap::snapshot!(code, @"400 Bad Request"); | ||||
|     meili_snap::snapshot!(meili_snap::json_string!(response), @r###" | ||||
|     { | ||||
|       "message": "Invalid value type at `.self`: expected a string, but found a positive integer: `42`", | ||||
|       "code": "invalid_network_self", | ||||
|       "type": "invalid_request", | ||||
|       "link": "https://docs.meilisearch.com/errors#invalid_network_self" | ||||
|     } | ||||
|     "###); | ||||
|  | ||||
|     // remotes not an object | ||||
|     let (response, code) = server.set_network(json!({"remotes": 42})).await; | ||||
|  | ||||
|     meili_snap::snapshot!(code, @"400 Bad Request"); | ||||
|     meili_snap::snapshot!(meili_snap::json_string!(response), @r###" | ||||
|     { | ||||
|       "message": "Invalid value type at `.remotes`: expected an object, but found a positive integer: `42`", | ||||
|       "code": "invalid_network_remotes", | ||||
|       "type": "invalid_request", | ||||
|       "link": "https://docs.meilisearch.com/errors#invalid_network_remotes" | ||||
|     } | ||||
|     "###); | ||||
|  | ||||
|     // new remote without url | ||||
|     let (response, code) = server | ||||
|         .set_network(json!({"remotes": { | ||||
|             "new": { | ||||
|                 "searchApiKey": "http://localhost:7700" | ||||
|             } | ||||
|         }})) | ||||
|         .await; | ||||
|  | ||||
|     meili_snap::snapshot!(code, @"400 Bad Request"); | ||||
|     meili_snap::snapshot!(meili_snap::json_string!(response), @r###" | ||||
|     { | ||||
|       "message": "Missing field `.remotes.new.url`", | ||||
|       "code": "missing_network_url", | ||||
|       "type": "invalid_request", | ||||
|       "link": "https://docs.meilisearch.com/errors#missing_network_url" | ||||
|     } | ||||
|     "###); | ||||
|  | ||||
|     // remote with url not a string | ||||
|     let (response, code) = server | ||||
|         .set_network(json!({"remotes": { | ||||
|             "new": { | ||||
|                 "url": 7700 | ||||
|             } | ||||
|         }})) | ||||
|         .await; | ||||
|  | ||||
|     meili_snap::snapshot!(code, @"400 Bad Request"); | ||||
|     meili_snap::snapshot!(meili_snap::json_string!(response), @r###" | ||||
|     { | ||||
|       "message": "Invalid value type at `.remotes.new.url`: expected a string, but found a positive integer: `7700`", | ||||
|       "code": "invalid_network_url", | ||||
|       "type": "invalid_request", | ||||
|       "link": "https://docs.meilisearch.com/errors#invalid_network_url" | ||||
|     } | ||||
|     "###); | ||||
|  | ||||
|     // remote with non-existing param | ||||
|     let (response, code) = server | ||||
|         .set_network(json!({"remotes": { | ||||
|             "new": { | ||||
|                 "url": "http://localhost:7700", | ||||
|                 "doggo": "Intel the Beagle" | ||||
|             } | ||||
|         }})) | ||||
|         .await; | ||||
|  | ||||
|     meili_snap::snapshot!(code, @"400 Bad Request"); | ||||
|     meili_snap::snapshot!(meili_snap::json_string!(response), @r###" | ||||
|     { | ||||
|       "message": "Unknown field `doggo` inside `.remotes.new`: expected one of `url`, `searchApiKey`", | ||||
|       "code": "invalid_network_remotes", | ||||
|       "type": "invalid_request", | ||||
|       "link": "https://docs.meilisearch.com/errors#invalid_network_remotes" | ||||
|     } | ||||
|     "###); | ||||
|  | ||||
|     // remote with non-string searchApiKey | ||||
|     let (response, code) = server | ||||
|         .set_network(json!({"remotes": { | ||||
|             "new": { | ||||
|                 "url": "http://localhost:7700", | ||||
|                 "searchApiKey": 1204664602099962445u64, | ||||
|             } | ||||
|         }})) | ||||
|         .await; | ||||
|  | ||||
|     meili_snap::snapshot!(code, @"400 Bad Request"); | ||||
|     meili_snap::snapshot!(meili_snap::json_string!(response), @r###" | ||||
|     { | ||||
|       "message": "Invalid value type at `.remotes.new.searchApiKey`: expected a string, but found a positive integer: `1204664602099962445`", | ||||
|       "code": "invalid_network_search_api_key", | ||||
|       "type": "invalid_request", | ||||
|       "link": "https://docs.meilisearch.com/errors#invalid_network_search_api_key" | ||||
|     } | ||||
|     "###); | ||||
|  | ||||
|     // setting `null` on URL a posteriori | ||||
|     let (response, code) = server | ||||
|         .set_network(json!({"remotes": { | ||||
|             "kefir": { | ||||
|                 "url": "http://localhost:7700", | ||||
|             } | ||||
|         }})) | ||||
|         .await; | ||||
|  | ||||
|     meili_snap::snapshot!(code, @"200 OK"); | ||||
|     meili_snap::snapshot!(meili_snap::json_string!(response), @r###" | ||||
|     { | ||||
|       "self": null, | ||||
|       "remotes": { | ||||
|         "kefir": { | ||||
|           "url": "http://localhost:7700", | ||||
|           "searchApiKey": null | ||||
|         } | ||||
|       } | ||||
|     } | ||||
|     "###); | ||||
|     let (response, code) = server | ||||
|         .set_network(json!({"remotes": { | ||||
|             "kefir": { | ||||
|                 "url": Null, | ||||
|             } | ||||
|         }})) | ||||
|         .await; | ||||
|  | ||||
|     meili_snap::snapshot!(code, @"400 Bad Request"); | ||||
|     meili_snap::snapshot!(meili_snap::json_string!(response), @r###" | ||||
|     { | ||||
|       "message": "Field `.remotes.kefir.url` cannot be set to `null`", | ||||
|       "code": "invalid_network_url", | ||||
|       "type": "invalid_request", | ||||
|       "link": "https://docs.meilisearch.com/errors#invalid_network_url" | ||||
|     } | ||||
|     "###); | ||||
| } | ||||
|  | ||||
| #[actix_rt::test] | ||||
| async fn auth() { | ||||
|     let mut server = Server::new_auth().await; | ||||
|     server.use_api_key("MASTER_KEY"); | ||||
|  | ||||
|     let (response, code) = server.set_features(json!({"network": true})).await; | ||||
|     meili_snap::snapshot!(code, @"200 OK"); | ||||
|     meili_snap::snapshot!(meili_snap::json_string!(response["network"]), @r#"true"#); | ||||
|  | ||||
|     let (get_network_key, code) = server | ||||
|         .add_api_key(json!({ | ||||
|           "actions": ["network.get"], | ||||
|           "indexes": ["*"], | ||||
|           "expiresAt": serde_json::Value::Null | ||||
|         })) | ||||
|         .await; | ||||
|     meili_snap::snapshot!(code, @"201 Created"); | ||||
|     let get_network_key = get_network_key["key"].clone(); | ||||
|  | ||||
|     let (update_network_key, code) = server | ||||
|         .add_api_key(json!({ | ||||
|           "actions": ["network.update"], | ||||
|           "indexes": ["*"], | ||||
|           "expiresAt": serde_json::Value::Null | ||||
|         })) | ||||
|         .await; | ||||
|     meili_snap::snapshot!(code, @"201 Created"); | ||||
|     let update_network_key = update_network_key["key"].clone(); | ||||
|  | ||||
|     let (search_api_key, code) = server | ||||
|         .add_api_key(json!({ | ||||
|           "actions": ["search"], | ||||
|           "indexes": ["*"], | ||||
|           "expiresAt": serde_json::Value::Null | ||||
|         })) | ||||
|         .await; | ||||
|     meili_snap::snapshot!(code, @"201 Created"); | ||||
|     let search_api_key = search_api_key["key"].clone(); | ||||
|  | ||||
|     // try with master key | ||||
|     let (response, code) = server | ||||
|         .set_network(json!({ | ||||
|           "self": "master" | ||||
|         })) | ||||
|         .await; | ||||
|  | ||||
|     meili_snap::snapshot!(code, @"200 OK"); | ||||
|     meili_snap::snapshot!(meili_snap::json_string!(response), @r###" | ||||
|     { | ||||
|       "self": "master", | ||||
|       "remotes": {} | ||||
|     } | ||||
|     "###); | ||||
|  | ||||
|     let (response, code) = server.get_network().await; | ||||
|  | ||||
|     meili_snap::snapshot!(code, @"200 OK"); | ||||
|     meili_snap::snapshot!(meili_snap::json_string!(response), @r###" | ||||
| { | ||||
|   "self": "master", | ||||
|   "remotes": {} | ||||
| } | ||||
| "###); | ||||
|  | ||||
|     // try get with get permission | ||||
|     server.use_api_key(get_network_key.as_str().unwrap()); | ||||
|     let (response, code) = server.get_network().await; | ||||
|  | ||||
|     meili_snap::snapshot!(code, @"200 OK"); | ||||
|     meili_snap::snapshot!(meili_snap::json_string!(response), @r###" | ||||
| { | ||||
|   "self": "master", | ||||
|   "remotes": {} | ||||
| } | ||||
| "###); | ||||
|  | ||||
|     // try update with update permission | ||||
|     server.use_api_key(update_network_key.as_str().unwrap()); | ||||
|  | ||||
|     let (response, code) = server | ||||
|         .set_network(json!({ | ||||
|           "self": "api_key" | ||||
|         })) | ||||
|         .await; | ||||
|  | ||||
|     meili_snap::snapshot!(code, @"200 OK"); | ||||
|     meili_snap::snapshot!(meili_snap::json_string!(response), @r###" | ||||
| { | ||||
|   "self": "api_key", | ||||
|   "remotes": {} | ||||
| } | ||||
| "###); | ||||
|  | ||||
|     // try with the other's permission | ||||
|     let (response, code) = server.get_network().await; | ||||
|  | ||||
|     meili_snap::snapshot!(code, @"403 Forbidden"); | ||||
|     meili_snap::snapshot!(meili_snap::json_string!(response), @r###" | ||||
|     { | ||||
|       "message": "The provided API key is invalid.", | ||||
|       "code": "invalid_api_key", | ||||
|       "type": "auth", | ||||
|       "link": "https://docs.meilisearch.com/errors#invalid_api_key" | ||||
|     } | ||||
|     "###); | ||||
|  | ||||
|     server.use_api_key(get_network_key.as_str().unwrap()); | ||||
|     let (response, code) = server | ||||
|         .set_network(json!({ | ||||
|           "self": "get_api_key" | ||||
|         })) | ||||
|         .await; | ||||
|  | ||||
|     meili_snap::snapshot!(code, @"403 Forbidden"); | ||||
|     meili_snap::snapshot!(meili_snap::json_string!(response), @r###" | ||||
|     { | ||||
|       "message": "The provided API key is invalid.", | ||||
|       "code": "invalid_api_key", | ||||
|       "type": "auth", | ||||
|       "link": "https://docs.meilisearch.com/errors#invalid_api_key" | ||||
|     } | ||||
|     "###); | ||||
|     // try either with bad permission | ||||
|     server.use_api_key(search_api_key.as_str().unwrap()); | ||||
|     let (response, code) = server.get_network().await; | ||||
|  | ||||
|     meili_snap::snapshot!(code, @"403 Forbidden"); | ||||
|     meili_snap::snapshot!(meili_snap::json_string!(response), @r###" | ||||
|     { | ||||
|       "message": "The provided API key is invalid.", | ||||
|       "code": "invalid_api_key", | ||||
|       "type": "auth", | ||||
|       "link": "https://docs.meilisearch.com/errors#invalid_api_key" | ||||
|     } | ||||
|     "###); | ||||
|  | ||||
|     let (response, code) = server | ||||
|         .set_network(json!({ | ||||
|           "self": "get_api_key" | ||||
|         })) | ||||
|         .await; | ||||
|  | ||||
|     meili_snap::snapshot!(code, @"403 Forbidden"); | ||||
|     meili_snap::snapshot!(meili_snap::json_string!(response), @r###" | ||||
|     { | ||||
|       "message": "The provided API key is invalid.", | ||||
|       "code": "invalid_api_key", | ||||
|       "type": "auth", | ||||
|       "link": "https://docs.meilisearch.com/errors#invalid_api_key" | ||||
|     } | ||||
|     "###); | ||||
| } | ||||
|  | ||||
| #[actix_rt::test] | ||||
| async fn get_and_set_network() { | ||||
|     let server = Server::new().await; | ||||
|  | ||||
|     let (response, code) = server.set_features(json!({"network": true})).await; | ||||
|     meili_snap::snapshot!(code, @"200 OK"); | ||||
|     meili_snap::snapshot!(meili_snap::json_string!(response["network"]), @r#"true"#); | ||||
|  | ||||
|     let (response, code) = server.get_network().await; | ||||
|  | ||||
|     meili_snap::snapshot!(code, @"200 OK"); | ||||
|     meili_snap::snapshot!(meili_snap::json_string!(response), @r###" | ||||
|     { | ||||
|       "self": null, | ||||
|       "remotes": {} | ||||
|     } | ||||
|     "###); | ||||
|  | ||||
|     // adding self | ||||
|     let (response, code) = server.set_network(json!({"self": "myself"})).await; | ||||
|     meili_snap::snapshot!(code, @"200 OK"); | ||||
|     meili_snap::snapshot!(meili_snap::json_string!(response), @r###" | ||||
|     { | ||||
|       "self": "myself", | ||||
|       "remotes": {} | ||||
|     } | ||||
|     "###); | ||||
|  | ||||
|     // adding remotes | ||||
|     let (response, code) = server | ||||
|         .set_network(json!({"remotes": { | ||||
|             "myself": { | ||||
|                 "url": "http://localhost:7700" | ||||
|             }, | ||||
|             "thy": { | ||||
|                 "url": "http://localhost:7701", | ||||
|                 "searchApiKey": "foo" | ||||
|             } | ||||
|         }})) | ||||
|         .await; | ||||
|  | ||||
|     meili_snap::snapshot!(code, @"200 OK"); | ||||
|     meili_snap::snapshot!(meili_snap::json_string!(response), @r###" | ||||
|     { | ||||
|       "self": "myself", | ||||
|       "remotes": { | ||||
|         "myself": { | ||||
|           "url": "http://localhost:7700", | ||||
|           "searchApiKey": null | ||||
|         }, | ||||
|         "thy": { | ||||
|           "url": "http://localhost:7701", | ||||
|           "searchApiKey": "foo" | ||||
|         } | ||||
|       } | ||||
|     } | ||||
|     "###); | ||||
|  | ||||
|     // partially updating one remote | ||||
|     let (response, code) = server | ||||
|         .set_network(json!({"remotes": { | ||||
|             "thy": { | ||||
|                 "searchApiKey": "bar" | ||||
|             } | ||||
|         }})) | ||||
|         .await; | ||||
|  | ||||
|     meili_snap::snapshot!(code, @"200 OK"); | ||||
|     meili_snap::snapshot!(meili_snap::json_string!(response), @r###" | ||||
|     { | ||||
|       "self": "myself", | ||||
|       "remotes": { | ||||
|         "myself": { | ||||
|           "url": "http://localhost:7700", | ||||
|           "searchApiKey": null | ||||
|         }, | ||||
|         "thy": { | ||||
|           "url": "http://localhost:7701", | ||||
|           "searchApiKey": "bar" | ||||
|         } | ||||
|       } | ||||
|     } | ||||
|     "###); | ||||
|  | ||||
|     // adding one remote | ||||
|     let (response, code) = server | ||||
|         .set_network(json!({"remotes": { | ||||
|             "them": { | ||||
|                 "url": "http://localhost:7702", | ||||
|                 "searchApiKey": "baz" | ||||
|             } | ||||
|         }})) | ||||
|         .await; | ||||
|  | ||||
|     meili_snap::snapshot!(code, @"200 OK"); | ||||
|     meili_snap::snapshot!(meili_snap::json_string!(response), @r###" | ||||
|     { | ||||
|       "self": "myself", | ||||
|       "remotes": { | ||||
|         "myself": { | ||||
|           "url": "http://localhost:7700", | ||||
|           "searchApiKey": null | ||||
|         }, | ||||
|         "them": { | ||||
|           "url": "http://localhost:7702", | ||||
|           "searchApiKey": "baz" | ||||
|         }, | ||||
|         "thy": { | ||||
|           "url": "http://localhost:7701", | ||||
|           "searchApiKey": "bar" | ||||
|         } | ||||
|       } | ||||
|     } | ||||
|     "###); | ||||
|  | ||||
|     // deleting one remote | ||||
|     let (response, code) = server | ||||
|         .set_network(json!({"remotes": { | ||||
|             "myself": Null, | ||||
|         }})) | ||||
|         .await; | ||||
|  | ||||
|     meili_snap::snapshot!(code, @"200 OK"); | ||||
|     meili_snap::snapshot!(meili_snap::json_string!(response), @r###" | ||||
|     { | ||||
|       "self": "myself", | ||||
|       "remotes": { | ||||
|         "them": { | ||||
|           "url": "http://localhost:7702", | ||||
|           "searchApiKey": "baz" | ||||
|         }, | ||||
|         "thy": { | ||||
|           "url": "http://localhost:7701", | ||||
|           "searchApiKey": "bar" | ||||
|         } | ||||
|       } | ||||
|     } | ||||
|     "###); | ||||
|  | ||||
|     // removing self | ||||
|     let (response, code) = server.set_network(json!({"self": Null})).await; | ||||
|  | ||||
|     meili_snap::snapshot!(code, @"200 OK"); | ||||
|     meili_snap::snapshot!(meili_snap::json_string!(response), @r###" | ||||
|     { | ||||
|       "self": null, | ||||
|       "remotes": { | ||||
|         "them": { | ||||
|           "url": "http://localhost:7702", | ||||
|           "searchApiKey": "baz" | ||||
|         }, | ||||
|         "thy": { | ||||
|           "url": "http://localhost:7701", | ||||
|           "searchApiKey": "bar" | ||||
|         } | ||||
|       } | ||||
|     } | ||||
|     "###); | ||||
|  | ||||
|     // setting self again | ||||
|     let (response, code) = server.set_network(json!({"self": "thy"})).await; | ||||
|  | ||||
|     meili_snap::snapshot!(code, @"200 OK"); | ||||
|     meili_snap::snapshot!(meili_snap::json_string!(response), @r###" | ||||
|     { | ||||
|       "self": "thy", | ||||
|       "remotes": { | ||||
|         "them": { | ||||
|           "url": "http://localhost:7702", | ||||
|           "searchApiKey": "baz" | ||||
|         }, | ||||
|         "thy": { | ||||
|           "url": "http://localhost:7701", | ||||
|           "searchApiKey": "bar" | ||||
|         } | ||||
|       } | ||||
|     } | ||||
|     "###); | ||||
|  | ||||
|     // doing nothing | ||||
|     let (response, code) = server.set_network(json!({})).await; | ||||
|  | ||||
|     meili_snap::snapshot!(code, @"200 OK"); | ||||
|     meili_snap::snapshot!(meili_snap::json_string!(response), @r###" | ||||
|         { | ||||
|           "self": "thy", | ||||
|           "remotes": { | ||||
|             "them": { | ||||
|               "url": "http://localhost:7702", | ||||
|               "searchApiKey": "baz" | ||||
|             }, | ||||
|             "thy": { | ||||
|               "url": "http://localhost:7701", | ||||
|               "searchApiKey": "bar" | ||||
|             } | ||||
|           } | ||||
|         } | ||||
|         "###); | ||||
|  | ||||
|     // still doing nothing | ||||
|     let (response, code) = server.set_network(json!({"remotes": {}})).await; | ||||
|  | ||||
|     meili_snap::snapshot!(code, @"200 OK"); | ||||
|     meili_snap::snapshot!(meili_snap::json_string!(response), @r###" | ||||
|         { | ||||
|           "self": "thy", | ||||
|           "remotes": { | ||||
|             "them": { | ||||
|               "url": "http://localhost:7702", | ||||
|               "searchApiKey": "baz" | ||||
|             }, | ||||
|             "thy": { | ||||
|               "url": "http://localhost:7701", | ||||
|               "searchApiKey": "bar" | ||||
|             } | ||||
|           } | ||||
|         } | ||||
|         "###); | ||||
|  | ||||
|     // good time to check GET | ||||
|     let (response, code) = server.get_network().await; | ||||
|  | ||||
|     meili_snap::snapshot!(code, @"200 OK"); | ||||
|     meili_snap::snapshot!(meili_snap::json_string!(response), @r###" | ||||
|         { | ||||
|           "self": "thy", | ||||
|           "remotes": { | ||||
|             "them": { | ||||
|               "url": "http://localhost:7702", | ||||
|               "searchApiKey": "baz" | ||||
|             }, | ||||
|             "thy": { | ||||
|               "url": "http://localhost:7701", | ||||
|               "searchApiKey": "bar" | ||||
|             } | ||||
|           } | ||||
|         } | ||||
|         "###); | ||||
|  | ||||
|     // deleting everything | ||||
|     let (response, code) = server | ||||
|         .set_network(json!({ | ||||
|             "remotes": Null, | ||||
|         })) | ||||
|         .await; | ||||
|  | ||||
|     meili_snap::snapshot!(code, @"200 OK"); | ||||
|     meili_snap::snapshot!(meili_snap::json_string!(response), @r###" | ||||
|     { | ||||
|       "self": "thy", | ||||
|       "remotes": {} | ||||
|     } | ||||
|     "###); | ||||
| } | ||||
| @@ -5,6 +5,8 @@ use crate::common::Server; | ||||
| use crate::json; | ||||
| use crate::search::{SCORE_DOCUMENTS, VECTOR_DOCUMENTS}; | ||||
| 
 | ||||
| mod proxy; | ||||
| 
 | ||||
| #[actix_rt::test] | ||||
| async fn search_empty_list() { | ||||
|     let server = Server::new().await; | ||||
							
								
								
									
										2591
									
								
								crates/meilisearch/tests/search/multi/proxy.rs
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										2591
									
								
								crates/meilisearch/tests/search/multi/proxy.rs
									
									
									
									
									
										Normal file
									
								
							
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							| @@ -1,3 +1,4 @@ | ||||
| use meili_snap::{json_string, snapshot}; | ||||
| use time::format_description::well_known::Rfc3339; | ||||
| use time::OffsetDateTime; | ||||
|  | ||||
| @@ -74,3 +75,253 @@ async fn stats() { | ||||
|     assert_eq!(response["indexes"]["test"]["fieldDistribution"]["name"], 1); | ||||
|     assert_eq!(response["indexes"]["test"]["fieldDistribution"]["age"], 1); | ||||
| } | ||||
|  | ||||
| #[actix_rt::test] | ||||
| async fn add_remove_embeddings() { | ||||
|     let server = Server::new().await; | ||||
|     let index = server.index("doggo"); | ||||
|  | ||||
|     let (response, code) = index | ||||
|         .update_settings(json!({ | ||||
|           "embedders": { | ||||
|             "manual": { | ||||
|                 "source": "userProvided", | ||||
|                 "dimensions": 3, | ||||
|             }, | ||||
|             "handcrafted": { | ||||
|                 "source": "userProvided", | ||||
|                 "dimensions": 3, | ||||
|             }, | ||||
|  | ||||
|           }, | ||||
|         })) | ||||
|         .await; | ||||
|     snapshot!(code, @"202 Accepted"); | ||||
|     server.wait_task(response.uid()).await.succeeded(); | ||||
|  | ||||
|     // 2 embedded documents for 5 embeddings in total | ||||
|     let documents = json!([ | ||||
|       {"id": 0, "name": "kefir", "_vectors": { "manual": [0, 0, 0], "handcrafted": [0, 0, 0] }}, | ||||
|       {"id": 1, "name": "echo", "_vectors": { "manual": [1, 1, 1], "handcrafted": [[1, 1, 1], [2, 2, 2]] }}, | ||||
|     ]); | ||||
|  | ||||
|     let (response, code) = index.add_documents(documents, None).await; | ||||
|     snapshot!(code, @"202 Accepted"); | ||||
|     index.wait_task(response.uid()).await.succeeded(); | ||||
|  | ||||
|     let (stats, _code) = index.stats().await; | ||||
|     snapshot!(json_string!(stats), @r###" | ||||
|     { | ||||
|       "numberOfDocuments": 2, | ||||
|       "isIndexing": false, | ||||
|       "numberOfEmbeddings": 5, | ||||
|       "numberOfEmbeddedDocuments": 2, | ||||
|       "fieldDistribution": { | ||||
|         "id": 2, | ||||
|         "name": 2 | ||||
|       } | ||||
|     } | ||||
|     "###); | ||||
|  | ||||
|     // 2 embedded documents for 3 embeddings in total | ||||
|     let documents = json!([ | ||||
|       {"id": 1, "name": "echo", "_vectors": { "manual": [1, 1, 1], "handcrafted": null }}, | ||||
|     ]); | ||||
|  | ||||
|     let (response, code) = index.update_documents(documents, None).await; | ||||
|     snapshot!(code, @"202 Accepted"); | ||||
|     index.wait_task(response.uid()).await.succeeded(); | ||||
|  | ||||
|     let (stats, _code) = index.stats().await; | ||||
|     snapshot!(json_string!(stats), @r###" | ||||
|     { | ||||
|       "numberOfDocuments": 2, | ||||
|       "isIndexing": false, | ||||
|       "numberOfEmbeddings": 3, | ||||
|       "numberOfEmbeddedDocuments": 2, | ||||
|       "fieldDistribution": { | ||||
|         "id": 2, | ||||
|         "name": 2 | ||||
|       } | ||||
|     } | ||||
|     "###); | ||||
|  | ||||
|     // 2 embedded documents for 2 embeddings in total | ||||
|     let documents = json!([ | ||||
|         {"id": 0, "name": "kefir", "_vectors": { "manual": null, "handcrafted": [0, 0, 0] }}, | ||||
|     ]); | ||||
|  | ||||
|     let (response, code) = index.update_documents(documents, None).await; | ||||
|     snapshot!(code, @"202 Accepted"); | ||||
|     index.wait_task(response.uid()).await.succeeded(); | ||||
|  | ||||
|     let (stats, _code) = index.stats().await; | ||||
|     snapshot!(json_string!(stats), @r###" | ||||
|     { | ||||
|       "numberOfDocuments": 2, | ||||
|       "isIndexing": false, | ||||
|       "numberOfEmbeddings": 2, | ||||
|       "numberOfEmbeddedDocuments": 2, | ||||
|       "fieldDistribution": { | ||||
|         "id": 2, | ||||
|         "name": 2 | ||||
|       } | ||||
|     } | ||||
|     "###); | ||||
|  | ||||
|     // 1 embedded documents for 2 embeddings in total | ||||
|     let documents = json!([ | ||||
|         {"id": 0, "name": "kefir", "_vectors": { "manual": [0, 0, 0], "handcrafted": [0, 0, 0] }}, | ||||
|         {"id": 1, "name": "echo", "_vectors": { "manual": null, "handcrafted": null }}, | ||||
|     ]); | ||||
|  | ||||
|     let (response, code) = index.update_documents(documents, None).await; | ||||
|     snapshot!(code, @"202 Accepted"); | ||||
|     index.wait_task(response.uid()).await.succeeded(); | ||||
|  | ||||
|     let (stats, _code) = index.stats().await; | ||||
|     snapshot!(json_string!(stats), @r###" | ||||
|     { | ||||
|       "numberOfDocuments": 2, | ||||
|       "isIndexing": false, | ||||
|       "numberOfEmbeddings": 2, | ||||
|       "numberOfEmbeddedDocuments": 1, | ||||
|       "fieldDistribution": { | ||||
|         "id": 2, | ||||
|         "name": 2 | ||||
|       } | ||||
|     } | ||||
|     "###); | ||||
| } | ||||
|  | ||||
| #[actix_rt::test] | ||||
| async fn add_remove_embedded_documents() { | ||||
|     let server = Server::new().await; | ||||
|     let index = server.index("doggo"); | ||||
|  | ||||
|     let (response, code) = index | ||||
|         .update_settings(json!({ | ||||
|           "embedders": { | ||||
|             "manual": { | ||||
|                 "source": "userProvided", | ||||
|                 "dimensions": 3, | ||||
|             }, | ||||
|             "handcrafted": { | ||||
|                 "source": "userProvided", | ||||
|                 "dimensions": 3, | ||||
|             }, | ||||
|  | ||||
|           }, | ||||
|         })) | ||||
|         .await; | ||||
|     snapshot!(code, @"202 Accepted"); | ||||
|     server.wait_task(response.uid()).await.succeeded(); | ||||
|  | ||||
|     // 2 embedded documents for 5 embeddings in total | ||||
|     let documents = json!([ | ||||
|       {"id": 0, "name": "kefir", "_vectors": { "manual": [0, 0, 0], "handcrafted": [0, 0, 0] }}, | ||||
|       {"id": 1, "name": "echo", "_vectors": { "manual": [1, 1, 1], "handcrafted": [[1, 1, 1], [2, 2, 2]] }}, | ||||
|     ]); | ||||
|  | ||||
|     let (response, code) = index.add_documents(documents, None).await; | ||||
|     snapshot!(code, @"202 Accepted"); | ||||
|     index.wait_task(response.uid()).await.succeeded(); | ||||
|  | ||||
|     let (stats, _code) = index.stats().await; | ||||
|     snapshot!(json_string!(stats), @r###" | ||||
|     { | ||||
|       "numberOfDocuments": 2, | ||||
|       "isIndexing": false, | ||||
|       "numberOfEmbeddings": 5, | ||||
|       "numberOfEmbeddedDocuments": 2, | ||||
|       "fieldDistribution": { | ||||
|         "id": 2, | ||||
|         "name": 2 | ||||
|       } | ||||
|     } | ||||
|     "###); | ||||
|  | ||||
|     // delete one embedded document, remaining 1 embedded documents for 3 embeddings in total | ||||
|     let (response, code) = index.delete_document(0).await; | ||||
|     snapshot!(code, @"202 Accepted"); | ||||
|     index.wait_task(response.uid()).await.succeeded(); | ||||
|  | ||||
|     let (stats, _code) = index.stats().await; | ||||
|     snapshot!(json_string!(stats), @r###" | ||||
|     { | ||||
|       "numberOfDocuments": 1, | ||||
|       "isIndexing": false, | ||||
|       "numberOfEmbeddings": 3, | ||||
|       "numberOfEmbeddedDocuments": 1, | ||||
|       "fieldDistribution": { | ||||
|         "id": 1, | ||||
|         "name": 1 | ||||
|       } | ||||
|     } | ||||
|     "###); | ||||
| } | ||||
|  | ||||
| #[actix_rt::test] | ||||
| async fn update_embedder_settings() { | ||||
|     let server = Server::new().await; | ||||
|     let index = server.index("doggo"); | ||||
|  | ||||
|     // 2 embedded documents for 3 embeddings in total | ||||
|     // but no embedders are added in the settings yet so we expect 0 embedded documents for 0 embeddings in total | ||||
|     let documents = json!([ | ||||
|       {"id": 0, "name": "kefir", "_vectors": { "manual": [0, 0, 0], "handcrafted": [0, 0, 0] }}, | ||||
|       {"id": 1, "name": "echo", "_vectors": { "manual": [1, 1, 1], "handcrafted": null }}, | ||||
|     ]); | ||||
|  | ||||
|     let (response, code) = index.add_documents(documents, None).await; | ||||
|     snapshot!(code, @"202 Accepted"); | ||||
|     index.wait_task(response.uid()).await.succeeded(); | ||||
|  | ||||
|     let (stats, _code) = index.stats().await; | ||||
|     snapshot!(json_string!(stats), @r###" | ||||
|     { | ||||
|       "numberOfDocuments": 2, | ||||
|       "isIndexing": false, | ||||
|       "numberOfEmbeddings": 0, | ||||
|       "numberOfEmbeddedDocuments": 0, | ||||
|       "fieldDistribution": { | ||||
|         "id": 2, | ||||
|         "name": 2 | ||||
|       } | ||||
|     } | ||||
|     "###); | ||||
|  | ||||
|     // add embedders to the settings | ||||
|     // 2 embedded documents for 3 embeddings in total | ||||
|     let (response, code) = index | ||||
|         .update_settings(json!({ | ||||
|           "embedders": { | ||||
|             "manual": { | ||||
|                 "source": "userProvided", | ||||
|                 "dimensions": 3, | ||||
|             }, | ||||
|             "handcrafted": { | ||||
|                 "source": "userProvided", | ||||
|                 "dimensions": 3, | ||||
|             }, | ||||
|  | ||||
|           }, | ||||
|         })) | ||||
|         .await; | ||||
|     snapshot!(code, @"202 Accepted"); | ||||
|     server.wait_task(response.uid()).await.succeeded(); | ||||
|  | ||||
|     let (stats, _code) = index.stats().await; | ||||
|     snapshot!(json_string!(stats), @r###" | ||||
|     { | ||||
|       "numberOfDocuments": 2, | ||||
|       "isIndexing": false, | ||||
|       "numberOfEmbeddings": 3, | ||||
|       "numberOfEmbeddedDocuments": 2, | ||||
|       "fieldDistribution": { | ||||
|         "id": 2, | ||||
|         "name": 2 | ||||
|       } | ||||
|     } | ||||
|     "###); | ||||
| } | ||||
|   | ||||
| @@ -126,14 +126,17 @@ async fn check_the_index_scheduler(server: &Server) { | ||||
|     "#); | ||||
|     // And their metadata are still right | ||||
|     let (stats, _) = server.stats().await; | ||||
|     snapshot!(stats, @r#" | ||||
|     snapshot!(stats, @r###" | ||||
|     { | ||||
|       "databaseSize": 438272, | ||||
|       "usedDatabaseSize": 196608, | ||||
|       "lastUpdate": "2025-01-23T11:36:22.634859166Z", | ||||
|       "indexes": { | ||||
|         "kefir": { | ||||
|           "numberOfDocuments": 1, | ||||
|           "isIndexing": false, | ||||
|           "numberOfEmbeddings": 0, | ||||
|           "numberOfEmbeddedDocuments": 0, | ||||
|           "fieldDistribution": { | ||||
|             "age": 1, | ||||
|             "description": 1, | ||||
| @@ -144,7 +147,7 @@ async fn check_the_index_scheduler(server: &Server) { | ||||
|         } | ||||
|       } | ||||
|     } | ||||
|     "#); | ||||
|     "###); | ||||
|  | ||||
|     // Wait until the upgrade has been applied to all indexes to avoid flakyness | ||||
|     let (tasks, _) = server.tasks_filter("types=upgradeDatabase&limit=1").await; | ||||
| @@ -205,14 +208,17 @@ async fn check_the_index_scheduler(server: &Server) { | ||||
|     snapshot!(json_string!(batches, { ".results[0].duration" => "[duration]", ".results[0].enqueuedAt" => "[date]", ".results[0].startedAt" => "[date]", ".results[0].finishedAt" => "[date]" }), name: "batches_filter_afterFinishedAt_equal_2025-01-16T16_47_41"); | ||||
|  | ||||
|     let (stats, _) = server.stats().await; | ||||
|     snapshot!(stats, @r#" | ||||
|     snapshot!(stats, @r###" | ||||
|     { | ||||
|       "databaseSize": 438272, | ||||
|       "usedDatabaseSize": 196608, | ||||
|       "lastUpdate": "2025-01-23T11:36:22.634859166Z", | ||||
|       "indexes": { | ||||
|         "kefir": { | ||||
|           "numberOfDocuments": 1, | ||||
|           "isIndexing": false, | ||||
|           "numberOfEmbeddings": 0, | ||||
|           "numberOfEmbeddedDocuments": 0, | ||||
|           "fieldDistribution": { | ||||
|             "age": 1, | ||||
|             "description": 1, | ||||
| @@ -223,13 +229,15 @@ async fn check_the_index_scheduler(server: &Server) { | ||||
|         } | ||||
|       } | ||||
|     } | ||||
|     "#); | ||||
|     "###); | ||||
|     let index = server.index("kefir"); | ||||
|     let (stats, _) = index.stats().await; | ||||
|     snapshot!(stats, @r#" | ||||
|     snapshot!(stats, @r###" | ||||
|     { | ||||
|       "numberOfDocuments": 1, | ||||
|       "isIndexing": false, | ||||
|       "numberOfEmbeddings": 0, | ||||
|       "numberOfEmbeddedDocuments": 0, | ||||
|       "fieldDistribution": { | ||||
|         "age": 1, | ||||
|         "description": 1, | ||||
| @@ -238,7 +246,7 @@ async fn check_the_index_scheduler(server: &Server) { | ||||
|         "surname": 1 | ||||
|       } | ||||
|     } | ||||
|     "#); | ||||
|     "###); | ||||
|  | ||||
|     // Delete all the tasks of a specific batch | ||||
|     let (task, _) = server.delete_tasks("batchUids=10").await; | ||||
|   | ||||
| @@ -32,7 +32,7 @@ async fn field_unavailable_for_source() { | ||||
|     snapshot!(code, @"400 Bad Request"); | ||||
|     snapshot!(response, @r###" | ||||
|     { | ||||
|       "message": "`.embedders.default`: Field `revision` unavailable for source `openAi` (only available for sources: `huggingFace`). Available fields: `source`, `model`, `apiKey`, `documentTemplate`, `dimensions`, `distribution`, `url`, `binaryQuantized`", | ||||
|       "message": "`.embedders.default`: Field `revision` unavailable for source `openAi` (only available for sources: `huggingFace`). Available fields: `source`, `model`, `apiKey`, `documentTemplate`, `documentTemplateMaxBytes`, `dimensions`, `distribution`, `url`, `binaryQuantized`", | ||||
|       "code": "invalid_settings_embedders", | ||||
|       "type": "invalid_request", | ||||
|       "link": "https://docs.meilisearch.com/errors#invalid_settings_embedders" | ||||
|   | ||||
| @@ -1,19 +1,26 @@ | ||||
| use std::fs::{read_dir, read_to_string, remove_file, File}; | ||||
| use std::io::BufWriter; | ||||
| use std::io::{BufWriter, Write as _}; | ||||
| use std::path::PathBuf; | ||||
| use std::time::Instant; | ||||
|  | ||||
| use anyhow::Context; | ||||
| use clap::{Parser, Subcommand}; | ||||
| use anyhow::{bail, Context}; | ||||
| use clap::{Parser, Subcommand, ValueEnum}; | ||||
| use dump::{DumpWriter, IndexMetadata}; | ||||
| use file_store::FileStore; | ||||
| use meilisearch_auth::AuthController; | ||||
| use meilisearch_types::heed::types::{SerdeJson, Str}; | ||||
| use meilisearch_types::heed::{Database, Env, EnvOpenOptions, RoTxn, RwTxn, Unspecified}; | ||||
| use meilisearch_types::batches::Batch; | ||||
| use meilisearch_types::heed::types::{Bytes, SerdeJson, Str}; | ||||
| use meilisearch_types::heed::{ | ||||
|     CompactionOption, Database, Env, EnvOpenOptions, RoTxn, RwTxn, Unspecified, | ||||
| }; | ||||
| use meilisearch_types::milli::constants::RESERVED_VECTORS_FIELD_NAME; | ||||
| use meilisearch_types::milli::documents::{obkv_to_object, DocumentsBatchReader}; | ||||
| use meilisearch_types::milli::vector::parsed_vectors::{ExplicitVectors, VectorOrArrayOfVectors}; | ||||
| use meilisearch_types::milli::{obkv_to_json, BEU32}; | ||||
| use meilisearch_types::tasks::{Status, Task}; | ||||
| use meilisearch_types::versioning::{get_version, parse_version}; | ||||
| use meilisearch_types::Index; | ||||
| use serde_json::Value::Object; | ||||
| use time::macros::format_description; | ||||
| use time::OffsetDateTime; | ||||
| use upgrade::OfflineUpgrade; | ||||
| @@ -65,6 +72,24 @@ enum Command { | ||||
|         skip_enqueued_tasks: bool, | ||||
|     }, | ||||
|  | ||||
|     /// Exports the documents of an index in NDJSON format from a Meilisearch index to stdout. | ||||
|     /// | ||||
|     /// This command can be executed on a running Meilisearch database. However, please note that | ||||
|     /// it will maintain a read-only transaction for the duration of the extraction process. | ||||
|     ExportDocuments { | ||||
|         /// The index name to export the documents from. | ||||
|         #[arg(long)] | ||||
|         index_name: String, | ||||
|  | ||||
|         /// Do not export vectors with the documents. | ||||
|         #[arg(long)] | ||||
|         ignore_vectors: bool, | ||||
|  | ||||
|         /// The number of documents to skip. | ||||
|         #[arg(long)] | ||||
|         offset: Option<usize>, | ||||
|     }, | ||||
|  | ||||
|     /// Attempts to upgrade from one major version to the next without a dump. | ||||
|     /// | ||||
|     /// Make sure to run this commmand when Meilisearch is not running! | ||||
| @@ -78,6 +103,46 @@ enum Command { | ||||
|         #[arg(long)] | ||||
|         target_version: String, | ||||
|     }, | ||||
|  | ||||
|     /// Compact the index by using LMDB. | ||||
|     /// | ||||
|     /// You must run this command while Meilisearch is off. The reason is that Meilisearch keep the | ||||
|     /// indexes opened and this compaction operation writes into another file. Meilisearch will not | ||||
|     /// switch to the new file. | ||||
|     /// | ||||
|     /// **Another possibility** is to keep Meilisearch running to serve search requests, run the | ||||
|     /// compaction and once done, close and immediately reopen Meilisearch. This way Meilisearch | ||||
|     /// will reopened the data.mdb file when rebooting and see the newly compacted file, ignoring | ||||
|     /// the previous non-compacted data. | ||||
|     /// | ||||
|     /// Note that the compaction will open the index, copy and compact the index into another file | ||||
|     /// **on the same disk as the index** and replace the previous index with the newly compacted | ||||
|     /// one. This means that the disk must have enough room for at most two times the index size. | ||||
|     /// | ||||
|     /// To make sure not to lose any data, this tool takes a mutable transaction on the index | ||||
|     /// before running the copy and compaction. This way the current indexation must finish before | ||||
|     /// the compaction operation can start. Once the compaction is done, the big index is replaced | ||||
|     /// by the compacted one and the mutable transaction is released. | ||||
|     CompactIndex { index_name: String }, | ||||
|  | ||||
|     /// Uses the hair dryer the dedicate pages hot in cache | ||||
|     /// | ||||
|     /// To make the index faster we must make sure it is hot in the DB cache that's the cure of | ||||
|     /// memory-mapping but also it's strengh. This command is designed to make a spcific part of | ||||
|     /// the index hot in cache. | ||||
|     HairDryer { | ||||
|         #[arg(long, value_delimiter = ',')] | ||||
|         index_name: Vec<String>, | ||||
|  | ||||
|         #[arg(long, value_delimiter = ',')] | ||||
|         index_part: Vec<IndexPart>, | ||||
|     }, | ||||
| } | ||||
|  | ||||
| #[derive(Clone, ValueEnum)] | ||||
| enum IndexPart { | ||||
|     /// Will make the arroy index hot. | ||||
|     Arroy, | ||||
| } | ||||
|  | ||||
| fn main() -> anyhow::Result<()> { | ||||
| @@ -90,10 +155,17 @@ fn main() -> anyhow::Result<()> { | ||||
|         Command::ExportADump { dump_dir, skip_enqueued_tasks } => { | ||||
|             export_a_dump(db_path, dump_dir, skip_enqueued_tasks, detected_version) | ||||
|         } | ||||
|         Command::ExportDocuments { index_name, ignore_vectors, offset } => { | ||||
|             export_documents(db_path, index_name, ignore_vectors, offset) | ||||
|         } | ||||
|         Command::OfflineUpgrade { target_version } => { | ||||
|             let target_version = parse_version(&target_version).context("While parsing `--target-version`. Make sure `--target-version` is in the format MAJOR.MINOR.PATCH")?; | ||||
|             OfflineUpgrade { db_path, current_version: detected_version, target_version }.upgrade() | ||||
|         } | ||||
|         Command::CompactIndex { index_name } => compact_index(db_path, &index_name), | ||||
|         Command::HairDryer { index_name, index_part } => { | ||||
|             hair_dryer(db_path, &index_name, &index_part) | ||||
|         } | ||||
|     } | ||||
| } | ||||
|  | ||||
| @@ -230,70 +302,86 @@ fn export_a_dump( | ||||
|  | ||||
|     eprintln!("Successfully dumped {count} keys!"); | ||||
|  | ||||
|     eprintln!("Dumping the queue"); | ||||
|     let rtxn = env.read_txn()?; | ||||
|     let all_tasks: Database<BEU32, SerdeJson<Task>> = | ||||
|         try_opening_database(&env, &rtxn, "all-tasks")?; | ||||
|     let all_batches: Database<BEU32, SerdeJson<Batch>> = | ||||
|         try_opening_database(&env, &rtxn, "all-batches")?; | ||||
|     let index_mapping: Database<Str, UuidCodec> = | ||||
|         try_opening_database(&env, &rtxn, "index-mapping")?; | ||||
|  | ||||
|     if skip_enqueued_tasks { | ||||
|         eprintln!("Skip dumping the enqueued tasks..."); | ||||
|     } else { | ||||
|         let mut dump_tasks = dump.create_tasks_queue()?; | ||||
|         let mut count = 0; | ||||
|         for ret in all_tasks.iter(&rtxn)? { | ||||
|             let (_, t) = ret?; | ||||
|             let status = t.status; | ||||
|             let content_file = t.content_uuid(); | ||||
|     eprintln!("Dumping the tasks"); | ||||
|     let mut dump_tasks = dump.create_tasks_queue()?; | ||||
|     let mut count_tasks = 0; | ||||
|     let mut count_enqueued_tasks = 0; | ||||
|     for ret in all_tasks.iter(&rtxn)? { | ||||
|         let (_, t) = ret?; | ||||
|         let status = t.status; | ||||
|         let content_file = t.content_uuid(); | ||||
|  | ||||
|             let mut dump_content_file = dump_tasks.push_task(&t.into())?; | ||||
|         if status == Status::Enqueued && skip_enqueued_tasks { | ||||
|             continue; | ||||
|         } | ||||
|  | ||||
|             // 3.1. Dump the `content_file` associated with the task if there is one and the task is not finished yet. | ||||
|             if let Some(content_file_uuid) = content_file { | ||||
|                 if status == Status::Enqueued { | ||||
|                     let content_file = file_store.get_update(content_file_uuid)?; | ||||
|         let mut dump_content_file = dump_tasks.push_task(&t.into())?; | ||||
|  | ||||
|                     if (detected_version.0, detected_version.1, detected_version.2) < (1, 12, 0) { | ||||
|                         eprintln!("Dumping the enqueued tasks reading them in obkv format..."); | ||||
|                         let reader = | ||||
|                             DocumentsBatchReader::from_reader(content_file).with_context(|| { | ||||
|                                 format!("While reading content file {:?}", content_file_uuid) | ||||
|                             })?; | ||||
|                         let (mut cursor, documents_batch_index) = | ||||
|                             reader.into_cursor_and_fields_index(); | ||||
|                         while let Some(doc) = cursor.next_document().with_context(|| { | ||||
|                             format!("While iterating on content file {:?}", content_file_uuid) | ||||
|                         })? { | ||||
|                             dump_content_file | ||||
|                                 .push_document(&obkv_to_object(doc, &documents_batch_index)?)?; | ||||
|                         } | ||||
|                     } else { | ||||
|                         eprintln!( | ||||
|                             "Dumping the enqueued tasks reading them in JSON stream format..." | ||||
|                         ); | ||||
|                         for document in | ||||
|                             serde_json::de::Deserializer::from_reader(content_file).into_iter() | ||||
|                         { | ||||
|                             let document = document.with_context(|| { | ||||
|                                 format!("While reading content file {:?}", content_file_uuid) | ||||
|                             })?; | ||||
|                             dump_content_file.push_document(&document)?; | ||||
|                         } | ||||
|         // 3.1. Dump the `content_file` associated with the task if there is one and the task is not finished yet. | ||||
|         if let Some(content_file_uuid) = content_file { | ||||
|             if status == Status::Enqueued { | ||||
|                 let content_file = file_store.get_update(content_file_uuid)?; | ||||
|  | ||||
|                 if (detected_version.0, detected_version.1, detected_version.2) < (1, 12, 0) { | ||||
|                     eprintln!("Dumping the enqueued tasks reading them in obkv format..."); | ||||
|                     let reader = | ||||
|                         DocumentsBatchReader::from_reader(content_file).with_context(|| { | ||||
|                             format!("While reading content file {:?}", content_file_uuid) | ||||
|                         })?; | ||||
|                     let (mut cursor, documents_batch_index) = reader.into_cursor_and_fields_index(); | ||||
|                     while let Some(doc) = cursor.next_document().with_context(|| { | ||||
|                         format!("While iterating on content file {:?}", content_file_uuid) | ||||
|                     })? { | ||||
|                         dump_content_file | ||||
|                             .push_document(&obkv_to_object(doc, &documents_batch_index)?)?; | ||||
|                     } | ||||
|                 } else { | ||||
|                     eprintln!("Dumping the enqueued tasks reading them in JSON stream format..."); | ||||
|                     for document in | ||||
|                         serde_json::de::Deserializer::from_reader(content_file).into_iter() | ||||
|                     { | ||||
|                         let document = document.with_context(|| { | ||||
|                             format!("While reading content file {:?}", content_file_uuid) | ||||
|                         })?; | ||||
|                         dump_content_file.push_document(&document)?; | ||||
|                     } | ||||
|  | ||||
|                     dump_content_file.flush()?; | ||||
|                     count += 1; | ||||
|                 } | ||||
|  | ||||
|                 dump_content_file.flush()?; | ||||
|                 count_enqueued_tasks += 1; | ||||
|             } | ||||
|         } | ||||
|         dump_tasks.flush()?; | ||||
|  | ||||
|         eprintln!("Successfully dumped {count} enqueued tasks!"); | ||||
|         count_tasks += 1; | ||||
|     } | ||||
|     dump_tasks.flush()?; | ||||
|     eprintln!( | ||||
|         "Successfully dumped {count_tasks} tasks including {count_enqueued_tasks} enqueued tasks!" | ||||
|     ); | ||||
|  | ||||
|     // 4. dump the batches | ||||
|     eprintln!("Dumping the batches"); | ||||
|     let mut dump_batches = dump.create_batches_queue()?; | ||||
|     let mut count = 0; | ||||
|  | ||||
|     for ret in all_batches.iter(&rtxn)? { | ||||
|         let (_, b) = ret?; | ||||
|         dump_batches.push_batch(&b)?; | ||||
|         count += 1; | ||||
|     } | ||||
|     dump_batches.flush()?; | ||||
|     eprintln!("Successfully dumped {count} batches!"); | ||||
|  | ||||
|     // 5. Dump the indexes | ||||
|     eprintln!("Dumping the indexes..."); | ||||
|  | ||||
|     // 4. Dump the indexes | ||||
|     let mut count = 0; | ||||
|     for result in index_mapping.iter(&rtxn)? { | ||||
|         let (uid, uuid) = result?; | ||||
| @@ -314,14 +402,14 @@ fn export_a_dump( | ||||
|         let fields_ids_map = index.fields_ids_map(&rtxn)?; | ||||
|         let all_fields: Vec<_> = fields_ids_map.iter().map(|(id, _)| id).collect(); | ||||
|  | ||||
|         // 4.1. Dump the documents | ||||
|         // 5.1. Dump the documents | ||||
|         for ret in index.all_documents(&rtxn)? { | ||||
|             let (_id, doc) = ret?; | ||||
|             let document = obkv_to_json(&all_fields, &fields_ids_map, doc)?; | ||||
|             index_dumper.push_document(&document)?; | ||||
|         } | ||||
|  | ||||
|         // 4.2. Dump the settings | ||||
|         // 5.2. Dump the settings | ||||
|         let settings = meilisearch_types::settings::settings( | ||||
|             &index, | ||||
|             &rtxn, | ||||
| @@ -347,3 +435,241 @@ fn export_a_dump( | ||||
|  | ||||
|     Ok(()) | ||||
| } | ||||
|  | ||||
| fn compact_index(db_path: PathBuf, index_name: &str) -> anyhow::Result<()> { | ||||
|     let index_scheduler_path = db_path.join("tasks"); | ||||
|     let env = unsafe { EnvOpenOptions::new().max_dbs(100).open(&index_scheduler_path) } | ||||
|         .with_context(|| format!("While trying to open {:?}", index_scheduler_path.display()))?; | ||||
|  | ||||
|     let rtxn = env.read_txn()?; | ||||
|     let index_mapping: Database<Str, UuidCodec> = | ||||
|         try_opening_database(&env, &rtxn, "index-mapping")?; | ||||
|  | ||||
|     for result in index_mapping.iter(&rtxn)? { | ||||
|         let (uid, uuid) = result?; | ||||
|  | ||||
|         if uid != index_name { | ||||
|             eprintln!("Found index {uid} and skipping it"); | ||||
|             continue; | ||||
|         } else { | ||||
|             eprintln!("Found index {uid} 🎉"); | ||||
|         } | ||||
|  | ||||
|         let index_path = db_path.join("indexes").join(uuid.to_string()); | ||||
|         let index = Index::new(EnvOpenOptions::new(), &index_path, false).with_context(|| { | ||||
|             format!("While trying to open the index at path {:?}", index_path.display()) | ||||
|         })?; | ||||
|  | ||||
|         eprintln!("Awaiting for a mutable transaction..."); | ||||
|         let _wtxn = index.write_txn().context("While awaiting for a write transaction")?; | ||||
|  | ||||
|         // We create and immediately drop the file because the | ||||
|         let non_compacted_index_file_path = index_path.join("data.mdb"); | ||||
|         let compacted_index_file_path = index_path.join("data.mdb.cpy"); | ||||
|  | ||||
|         eprintln!("Compacting the index..."); | ||||
|         let before_compaction = Instant::now(); | ||||
|         let new_file = index | ||||
|             .copy_to_file(&compacted_index_file_path, CompactionOption::Enabled) | ||||
|             .with_context(|| format!("While compacting {}", compacted_index_file_path.display()))?; | ||||
|  | ||||
|         let after_size = new_file.metadata()?.len(); | ||||
|         let before_size = std::fs::metadata(&non_compacted_index_file_path) | ||||
|             .with_context(|| { | ||||
|                 format!( | ||||
|                     "While retrieving the metadata of {}", | ||||
|                     non_compacted_index_file_path.display(), | ||||
|                 ) | ||||
|             })? | ||||
|             .len(); | ||||
|  | ||||
|         let reduction = before_size as f64 / after_size as f64; | ||||
|         println!("Compaction successful. Took around {:.2?}", before_compaction.elapsed()); | ||||
|         eprintln!("The index went from {before_size} bytes to {after_size} bytes ({reduction:.2}x reduction)"); | ||||
|  | ||||
|         eprintln!("Replacing the non-compacted index by the compacted one..."); | ||||
|         std::fs::rename(&compacted_index_file_path, &non_compacted_index_file_path).with_context( | ||||
|             || { | ||||
|                 format!( | ||||
|                     "While renaming {} into {}", | ||||
|                     compacted_index_file_path.display(), | ||||
|                     non_compacted_index_file_path.display(), | ||||
|                 ) | ||||
|             }, | ||||
|         )?; | ||||
|  | ||||
|         drop(new_file); | ||||
|  | ||||
|         println!("Everything's done 🎉"); | ||||
|         return Ok(()); | ||||
|     } | ||||
|  | ||||
|     bail!("Target index {index_name} not found!") | ||||
| } | ||||
|  | ||||
| fn export_documents( | ||||
|     db_path: PathBuf, | ||||
|     index_name: String, | ||||
|     ignore_vectors: bool, | ||||
|     offset: Option<usize>, | ||||
| ) -> anyhow::Result<()> { | ||||
|     let index_scheduler_path = db_path.join("tasks"); | ||||
|     let env = unsafe { EnvOpenOptions::new().max_dbs(100).open(&index_scheduler_path) } | ||||
|         .with_context(|| format!("While trying to open {:?}", index_scheduler_path.display()))?; | ||||
|  | ||||
|     let rtxn = env.read_txn()?; | ||||
|     let index_mapping: Database<Str, UuidCodec> = | ||||
|         try_opening_database(&env, &rtxn, "index-mapping")?; | ||||
|  | ||||
|     for result in index_mapping.iter(&rtxn)? { | ||||
|         let (uid, uuid) = result?; | ||||
|         if uid == index_name { | ||||
|             let index_path = db_path.join("indexes").join(uuid.to_string()); | ||||
|             let index = | ||||
|                 Index::new(EnvOpenOptions::new(), &index_path, false).with_context(|| { | ||||
|                     format!("While trying to open the index at path {:?}", index_path.display()) | ||||
|                 })?; | ||||
|  | ||||
|             let rtxn = index.read_txn()?; | ||||
|             let fields_ids_map = index.fields_ids_map(&rtxn)?; | ||||
|             let all_fields: Vec<_> = fields_ids_map.iter().map(|(id, _)| id).collect(); | ||||
|             let embedding_configs = index.embedding_configs(&rtxn)?; | ||||
|  | ||||
|             if let Some(offset) = offset { | ||||
|                 eprintln!("Skipping {offset} documents"); | ||||
|             } | ||||
|  | ||||
|             let mut stdout = BufWriter::new(std::io::stdout()); | ||||
|             let all_documents = index.documents_ids(&rtxn)?.into_iter().skip(offset.unwrap_or(0)); | ||||
|             for (i, ret) in index.iter_documents(&rtxn, all_documents)?.enumerate() { | ||||
|                 let (id, doc) = ret?; | ||||
|                 let mut document = obkv_to_json(&all_fields, &fields_ids_map, doc)?; | ||||
|  | ||||
|                 if i % 10_000 == 0 { | ||||
|                     eprintln!("Starting the {}th document", i + offset.unwrap_or(0)); | ||||
|                 } | ||||
|  | ||||
|                 if !ignore_vectors { | ||||
|                     'inject_vectors: { | ||||
|                         let embeddings = index.embeddings(&rtxn, id)?; | ||||
|  | ||||
|                         if embeddings.is_empty() { | ||||
|                             break 'inject_vectors; | ||||
|                         } | ||||
|  | ||||
|                         let vectors = document | ||||
|                             .entry(RESERVED_VECTORS_FIELD_NAME) | ||||
|                             .or_insert(Object(Default::default())); | ||||
|  | ||||
|                         let Object(vectors) = vectors else { | ||||
|                             return Err(meilisearch_types::milli::Error::UserError( | ||||
|                                 meilisearch_types::milli::UserError::InvalidVectorsMapType { | ||||
|                                     document_id: { | ||||
|                                         if let Ok(Some(Ok(index))) = index | ||||
|                                             .external_id_of(&rtxn, std::iter::once(id)) | ||||
|                                             .map(|it| it.into_iter().next()) | ||||
|                                         { | ||||
|                                             index | ||||
|                                         } else { | ||||
|                                             format!("internal docid={id}") | ||||
|                                         } | ||||
|                                     }, | ||||
|                                     value: vectors.clone(), | ||||
|                                 }, | ||||
|                             ) | ||||
|                             .into()); | ||||
|                         }; | ||||
|  | ||||
|                         for (embedder_name, embeddings) in embeddings { | ||||
|                             let user_provided = embedding_configs | ||||
|                                 .iter() | ||||
|                                 .find(|conf| conf.name == embedder_name) | ||||
|                                 .is_some_and(|conf| conf.user_provided.contains(id)); | ||||
|  | ||||
|                             let embeddings = ExplicitVectors { | ||||
|                                 embeddings: Some(VectorOrArrayOfVectors::from_array_of_vectors( | ||||
|                                     embeddings, | ||||
|                                 )), | ||||
|                                 regenerate: !user_provided, | ||||
|                             }; | ||||
|                             vectors | ||||
|                                 .insert(embedder_name, serde_json::to_value(embeddings).unwrap()); | ||||
|                         } | ||||
|                     } | ||||
|                 } | ||||
|  | ||||
|                 serde_json::to_writer(&mut stdout, &document)?; | ||||
|             } | ||||
|  | ||||
|             stdout.flush()?; | ||||
|         } else { | ||||
|             eprintln!("Found index {uid} but it's not the right index..."); | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     Ok(()) | ||||
| } | ||||
|  | ||||
| fn hair_dryer( | ||||
|     db_path: PathBuf, | ||||
|     index_names: &[String], | ||||
|     index_parts: &[IndexPart], | ||||
| ) -> anyhow::Result<()> { | ||||
|     let index_scheduler_path = db_path.join("tasks"); | ||||
|     let env = unsafe { EnvOpenOptions::new().max_dbs(100).open(&index_scheduler_path) } | ||||
|         .with_context(|| format!("While trying to open {:?}", index_scheduler_path.display()))?; | ||||
|  | ||||
|     eprintln!("Trying to get a read transaction on the index scheduler..."); | ||||
|  | ||||
|     let rtxn = env.read_txn()?; | ||||
|     let index_mapping: Database<Str, UuidCodec> = | ||||
|         try_opening_database(&env, &rtxn, "index-mapping")?; | ||||
|  | ||||
|     for result in index_mapping.iter(&rtxn)? { | ||||
|         let (uid, uuid) = result?; | ||||
|         if index_names.iter().any(|i| i == uid) { | ||||
|             let index_path = db_path.join("indexes").join(uuid.to_string()); | ||||
|             let index = | ||||
|                 Index::new(EnvOpenOptions::new(), &index_path, false).with_context(|| { | ||||
|                     format!("While trying to open the index at path {:?}", index_path.display()) | ||||
|                 })?; | ||||
|  | ||||
|             eprintln!("Trying to get a read transaction on the {uid} index..."); | ||||
|  | ||||
|             let rtxn = index.read_txn()?; | ||||
|             for part in index_parts { | ||||
|                 match part { | ||||
|                     IndexPart::Arroy => { | ||||
|                         let mut count = 0; | ||||
|                         let total = index.vector_arroy.len(&rtxn)?; | ||||
|                         eprintln!("Hair drying arroy for {uid}..."); | ||||
|                         for (i, result) in index | ||||
|                             .vector_arroy | ||||
|                             .remap_types::<Bytes, Bytes>() | ||||
|                             .iter(&rtxn)? | ||||
|                             .enumerate() | ||||
|                         { | ||||
|                             let (key, value) = result?; | ||||
|  | ||||
|                             // All of this just to avoid compiler optimizations 🤞 | ||||
|                             // We must read all the bytes to make the pages hot in cache. | ||||
|                             // <https://doc.rust-lang.org/std/hint/fn.black_box.html> | ||||
|                             count += std::hint::black_box(key.iter().fold(0, |acc, _| acc + 1)); | ||||
|                             count += std::hint::black_box(value.iter().fold(0, |acc, _| acc + 1)); | ||||
|  | ||||
|                             if i % 10_000 == 0 { | ||||
|                                 let perc = (i as f64) / (total as f64) * 100.0; | ||||
|                                 eprintln!("Visited {i}/{total} ({perc:.2}%) keys") | ||||
|                             } | ||||
|                         } | ||||
|                         eprintln!("Done hair drying a total of at least {count} bytes."); | ||||
|                     } | ||||
|                 } | ||||
|             } | ||||
|         } else { | ||||
|             eprintln!("Found index {uid} but it's not the right index..."); | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     Ok(()) | ||||
| } | ||||
|   | ||||
| @@ -1,5 +1,5 @@ | ||||
| <p align="center"> | ||||
|   <img alt="the milli logo" src="../assets/milli-logo.svg"> | ||||
|   <img alt="the milli logo" src="../../assets/milli-logo.svg"> | ||||
| </p> | ||||
|  | ||||
| <p align="center">a concurrent indexer combined with fast and relevant search algorithms</p> | ||||
|   | ||||
| @@ -22,7 +22,7 @@ use crate::heed_codec::version::VersionCodec; | ||||
| use crate::heed_codec::{BEU16StrCodec, FstSetCodec, StrBEU16Codec, StrRefCodec}; | ||||
| use crate::order_by_map::OrderByMap; | ||||
| use crate::proximity::ProximityPrecision; | ||||
| use crate::vector::{ArroyWrapper, Embedding, EmbeddingConfig}; | ||||
| use crate::vector::{ArroyStats, ArroyWrapper, Embedding, EmbeddingConfig}; | ||||
| use crate::{ | ||||
|     default_criteria, CboRoaringBitmapCodec, Criterion, DocumentId, ExternalDocumentsIds, | ||||
|     FacetDistribution, FieldDistribution, FieldId, FieldIdMapMissingEntry, FieldIdWordCountCodec, | ||||
| @@ -1731,6 +1731,18 @@ impl Index { | ||||
|         let compute_prefixes = self.prefix_search(rtxn)?.unwrap_or_default(); | ||||
|         Ok(PrefixSettings { compute_prefixes, max_prefix_length: 4, prefix_count_threshold: 100 }) | ||||
|     } | ||||
|  | ||||
|     pub fn arroy_stats(&self, rtxn: &RoTxn<'_>) -> Result<ArroyStats> { | ||||
|         let mut stats = ArroyStats::default(); | ||||
|         let embedding_configs = self.embedding_configs(rtxn)?; | ||||
|         for config in embedding_configs { | ||||
|             let embedder_id = self.embedder_category_id.get(rtxn, &config.name)?.unwrap(); | ||||
|             let reader = | ||||
|                 ArroyWrapper::new(self.vector_arroy, embedder_id, config.config.quantized()); | ||||
|             reader.aggregate_stats(rtxn, &mut stats)?; | ||||
|         } | ||||
|         Ok(stats) | ||||
|     } | ||||
| } | ||||
|  | ||||
| #[derive(Debug, Deserialize, Serialize)] | ||||
|   | ||||
| @@ -1,7 +1,7 @@ | ||||
| use std::cmp::Ordering; | ||||
|  | ||||
| use itertools::Itertools; | ||||
| use serde::Serialize; | ||||
| use serde::{Deserialize, Serialize}; | ||||
|  | ||||
| use crate::distance_between_two_points; | ||||
|  | ||||
| @@ -36,6 +36,15 @@ enum RankOrValue<'a> { | ||||
|     Score(f64), | ||||
| } | ||||
|  | ||||
| #[derive(Clone, Serialize, Deserialize)] | ||||
| #[serde(rename_all = "camelCase")] | ||||
| pub enum WeightedScoreValue { | ||||
|     WeightedScore(f64), | ||||
|     Sort { asc: bool, value: serde_json::Value }, | ||||
|     GeoSort { asc: bool, distance: Option<f64> }, | ||||
|     VectorSort(f64), | ||||
| } | ||||
|  | ||||
| impl ScoreDetails { | ||||
|     pub fn local_score(&self) -> Option<f64> { | ||||
|         self.rank().map(Rank::local_score) | ||||
| @@ -87,6 +96,30 @@ impl ScoreDetails { | ||||
|             }) | ||||
|     } | ||||
|  | ||||
|     pub fn weighted_score_values<'a>( | ||||
|         details: impl Iterator<Item = &'a Self> + 'a, | ||||
|         weight: f64, | ||||
|     ) -> impl Iterator<Item = WeightedScoreValue> + 'a { | ||||
|         details | ||||
|             .map(ScoreDetails::rank_or_value) | ||||
|             .coalesce(|left, right| match (left, right) { | ||||
|                 (RankOrValue::Rank(left), RankOrValue::Rank(right)) => { | ||||
|                     Ok(RankOrValue::Rank(Rank::merge(left, right))) | ||||
|                 } | ||||
|                 (left, right) => Err((left, right)), | ||||
|             }) | ||||
|             .map(move |rank_or_value| match rank_or_value { | ||||
|                 RankOrValue::Rank(r) => WeightedScoreValue::WeightedScore(r.local_score() * weight), | ||||
|                 RankOrValue::Sort(s) => { | ||||
|                     WeightedScoreValue::Sort { asc: s.ascending, value: s.value.clone() } | ||||
|                 } | ||||
|                 RankOrValue::GeoSort(g) => { | ||||
|                     WeightedScoreValue::GeoSort { asc: g.ascending, distance: g.distance() } | ||||
|                 } | ||||
|                 RankOrValue::Score(s) => WeightedScoreValue::VectorSort(s * weight), | ||||
|             }) | ||||
|     } | ||||
|  | ||||
|     fn rank_or_value(&self) -> RankOrValue<'_> { | ||||
|         match self { | ||||
|             ScoreDetails::Words(w) => RankOrValue::Rank(w.rank()), | ||||
| @@ -423,34 +456,58 @@ pub struct Sort { | ||||
|     pub value: serde_json::Value, | ||||
| } | ||||
|  | ||||
| pub fn compare_sort_values( | ||||
|     ascending: bool, | ||||
|     left: &serde_json::Value, | ||||
|     right: &serde_json::Value, | ||||
| ) -> Ordering { | ||||
|     use serde_json::Value::*; | ||||
|     match (left, right) { | ||||
|         (Null, Null) => Ordering::Equal, | ||||
|         (Null, _) => Ordering::Less, | ||||
|         (_, Null) => Ordering::Greater, | ||||
|         // numbers are always before strings | ||||
|         (Number(_), String(_)) => Ordering::Greater, | ||||
|         (String(_), Number(_)) => Ordering::Less, | ||||
|         (Number(left), Number(right)) => { | ||||
|             // FIXME: unwrap permitted here? | ||||
|             let order = left | ||||
|                 .as_f64() | ||||
|                 .unwrap() | ||||
|                 .partial_cmp(&right.as_f64().unwrap()) | ||||
|                 .unwrap_or(Ordering::Equal); | ||||
|             // 12 < 42, and when ascending, we want to see 12 first, so the smallest. | ||||
|             // Hence, when ascending, smaller is better | ||||
|             if ascending { | ||||
|                 order.reverse() | ||||
|             } else { | ||||
|                 order | ||||
|             } | ||||
|         } | ||||
|         (String(left), String(right)) => { | ||||
|             let order = left.cmp(right); | ||||
|             // Taking e.g. "a" and "z" | ||||
|             // "a" < "z", and when ascending, we want to see "a" first, so the smallest. | ||||
|             // Hence, when ascending, smaller is better | ||||
|             if ascending { | ||||
|                 order.reverse() | ||||
|             } else { | ||||
|                 order | ||||
|             } | ||||
|         } | ||||
|         (left, right) => { | ||||
|             tracing::warn!(%left, %right, "sort values that are neither numbers, strings or null, handling as equal"); | ||||
|             Ordering::Equal | ||||
|         } | ||||
|     } | ||||
| } | ||||
|  | ||||
| impl PartialOrd for Sort { | ||||
|     fn partial_cmp(&self, other: &Self) -> Option<Ordering> { | ||||
|         if self.ascending != other.ascending { | ||||
|             return None; | ||||
|         } | ||||
|         match (&self.value, &other.value) { | ||||
|             (serde_json::Value::Null, serde_json::Value::Null) => Some(Ordering::Equal), | ||||
|             (serde_json::Value::Null, _) => Some(Ordering::Less), | ||||
|             (_, serde_json::Value::Null) => Some(Ordering::Greater), | ||||
|             // numbers are always before strings | ||||
|             (serde_json::Value::Number(_), serde_json::Value::String(_)) => Some(Ordering::Greater), | ||||
|             (serde_json::Value::String(_), serde_json::Value::Number(_)) => Some(Ordering::Less), | ||||
|             (serde_json::Value::Number(left), serde_json::Value::Number(right)) => { | ||||
|                 // FIXME: unwrap permitted here? | ||||
|                 let order = left.as_f64().unwrap().partial_cmp(&right.as_f64().unwrap())?; | ||||
|                 // 12 < 42, and when ascending, we want to see 12 first, so the smallest. | ||||
|                 // Hence, when ascending, smaller is better | ||||
|                 Some(if self.ascending { order.reverse() } else { order }) | ||||
|             } | ||||
|             (serde_json::Value::String(left), serde_json::Value::String(right)) => { | ||||
|                 let order = left.cmp(right); | ||||
|                 // Taking e.g. "a" and "z" | ||||
|                 // "a" < "z", and when ascending, we want to see "a" first, so the smallest. | ||||
|                 // Hence, when ascending, smaller is better | ||||
|                 Some(if self.ascending { order.reverse() } else { order }) | ||||
|             } | ||||
|             _ => None, | ||||
|         } | ||||
|         Some(compare_sort_values(self.ascending, &self.value, &other.value)) | ||||
|     } | ||||
| } | ||||
|  | ||||
|   | ||||
| @@ -11,7 +11,7 @@ use either::Either; | ||||
| pub use matching_words::MatchingWords; | ||||
| use matching_words::{MatchType, PartialMatch}; | ||||
| use r#match::{Match, MatchPosition}; | ||||
| use serde::Serialize; | ||||
| use serde::{Deserialize, Serialize}; | ||||
| use simple_token_kind::SimpleTokenKind; | ||||
| use utoipa::ToSchema; | ||||
|  | ||||
| @@ -101,11 +101,11 @@ impl FormatOptions { | ||||
|     } | ||||
| } | ||||
|  | ||||
| #[derive(Serialize, Debug, Clone, PartialEq, Eq, ToSchema)] | ||||
| #[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq, ToSchema)] | ||||
| pub struct MatchBounds { | ||||
|     pub start: usize, | ||||
|     pub length: usize, | ||||
|     #[serde(skip_serializing_if = "Option::is_none")] | ||||
|     #[serde(skip_serializing_if = "Option::is_none", default)] | ||||
|     pub indices: Option<Vec<usize>>, | ||||
| } | ||||
|  | ||||
|   | ||||
| @@ -563,7 +563,7 @@ fn resolve_sort_criteria<'ctx, Query: RankingRuleQueryTrait>( | ||||
|     Ok(()) | ||||
| } | ||||
|  | ||||
| #[tracing::instrument(level = "trace", skip_all, target = "search::universe")] | ||||
| #[tracing::instrument(level = "debug", skip_all, target = "search::universe")] | ||||
| pub fn filtered_universe( | ||||
|     index: &Index, | ||||
|     txn: &RoTxn<'_>, | ||||
|   | ||||
| @@ -1,4 +1,4 @@ | ||||
| use std::sync::atomic::{AtomicBool, Ordering}; | ||||
| use std::sync::atomic::{AtomicBool, AtomicUsize, Ordering}; | ||||
| use std::sync::Arc; | ||||
|  | ||||
| use rayon::{ThreadPool, ThreadPoolBuilder}; | ||||
| @@ -9,6 +9,8 @@ use thiserror::Error; | ||||
| #[derive(Debug)] | ||||
| pub struct ThreadPoolNoAbort { | ||||
|     thread_pool: ThreadPool, | ||||
|     /// The number of active operations. | ||||
|     active_operations: AtomicUsize, | ||||
|     /// Set to true if the thread pool catched a panic. | ||||
|     pool_catched_panic: Arc<AtomicBool>, | ||||
| } | ||||
| @@ -19,7 +21,9 @@ impl ThreadPoolNoAbort { | ||||
|         OP: FnOnce() -> R + Send, | ||||
|         R: Send, | ||||
|     { | ||||
|         self.active_operations.fetch_add(1, Ordering::Relaxed); | ||||
|         let output = self.thread_pool.install(op); | ||||
|         self.active_operations.fetch_sub(1, Ordering::Relaxed); | ||||
|         // While reseting the pool panic catcher we return an error if we catched one. | ||||
|         if self.pool_catched_panic.swap(false, Ordering::SeqCst) { | ||||
|             Err(PanicCatched) | ||||
| @@ -31,6 +35,11 @@ impl ThreadPoolNoAbort { | ||||
|     pub fn current_num_threads(&self) -> usize { | ||||
|         self.thread_pool.current_num_threads() | ||||
|     } | ||||
|  | ||||
|     /// The number of active operations. | ||||
|     pub fn active_operations(&self) -> usize { | ||||
|         self.active_operations.load(Ordering::Relaxed) | ||||
|     } | ||||
| } | ||||
|  | ||||
| #[derive(Error, Debug)] | ||||
| @@ -64,6 +73,10 @@ impl ThreadPoolNoAbortBuilder { | ||||
|             let catched_panic = pool_catched_panic.clone(); | ||||
|             move |_result| catched_panic.store(true, Ordering::SeqCst) | ||||
|         }); | ||||
|         Ok(ThreadPoolNoAbort { thread_pool: self.0.build()?, pool_catched_panic }) | ||||
|         Ok(ThreadPoolNoAbort { | ||||
|             thread_pool: self.0.build()?, | ||||
|             active_operations: AtomicUsize::new(0), | ||||
|             pool_catched_panic, | ||||
|         }) | ||||
|     } | ||||
| } | ||||
|   | ||||
| @@ -5,6 +5,8 @@ use std::marker::PhantomData; | ||||
| use std::mem; | ||||
| use std::num::NonZeroU16; | ||||
| use std::ops::Range; | ||||
| use std::sync::atomic::{self, AtomicUsize}; | ||||
| use std::sync::Arc; | ||||
| use std::time::Duration; | ||||
|  | ||||
| use bbqueue::framed::{FrameGrantR, FrameProducer}; | ||||
| @@ -71,12 +73,23 @@ pub fn extractor_writer_bbqueue( | ||||
|         consumer | ||||
|     }); | ||||
|  | ||||
|     let sent_messages_attempts = Arc::new(AtomicUsize::new(0)); | ||||
|     let blocking_sent_messages_attempts = Arc::new(AtomicUsize::new(0)); | ||||
|  | ||||
|     let (sender, receiver) = flume::bounded(channel_capacity); | ||||
|     let sender = ExtractorBbqueueSender { sender, producers, max_grant }; | ||||
|     let sender = ExtractorBbqueueSender { | ||||
|         sender, | ||||
|         producers, | ||||
|         max_grant, | ||||
|         sent_messages_attempts: sent_messages_attempts.clone(), | ||||
|         blocking_sent_messages_attempts: blocking_sent_messages_attempts.clone(), | ||||
|     }; | ||||
|     let receiver = WriterBbqueueReceiver { | ||||
|         receiver, | ||||
|         look_at_consumer: (0..consumers.len()).cycle(), | ||||
|         consumers, | ||||
|         sent_messages_attempts, | ||||
|         blocking_sent_messages_attempts, | ||||
|     }; | ||||
|     (sender, receiver) | ||||
| } | ||||
| @@ -92,6 +105,12 @@ pub struct ExtractorBbqueueSender<'a> { | ||||
|     /// It will never be able to store more than that as the | ||||
|     /// buffer cannot split data into two parts. | ||||
|     max_grant: usize, | ||||
|     /// The total number of attempts to send messages | ||||
|     /// over the bbqueue channel. | ||||
|     sent_messages_attempts: Arc<AtomicUsize>, | ||||
|     /// The number of times an attempt to send a | ||||
|     /// messages failed and we had to pause for a bit. | ||||
|     blocking_sent_messages_attempts: Arc<AtomicUsize>, | ||||
| } | ||||
|  | ||||
| pub struct WriterBbqueueReceiver<'a> { | ||||
| @@ -104,6 +123,12 @@ pub struct WriterBbqueueReceiver<'a> { | ||||
|     look_at_consumer: Cycle<Range<usize>>, | ||||
|     /// The BBQueue frames to read when waking-up. | ||||
|     consumers: Vec<bbqueue::framed::FrameConsumer<'a>>, | ||||
|     /// The total number of attempts to send messages | ||||
|     /// over the bbqueue channel. | ||||
|     sent_messages_attempts: Arc<AtomicUsize>, | ||||
|     /// The number of times an attempt to send a | ||||
|     /// message failed and we had to pause for a bit. | ||||
|     blocking_sent_messages_attempts: Arc<AtomicUsize>, | ||||
| } | ||||
|  | ||||
| /// The action to perform on the receiver/writer side. | ||||
| @@ -169,6 +194,16 @@ impl<'a> WriterBbqueueReceiver<'a> { | ||||
|         } | ||||
|         None | ||||
|     } | ||||
|  | ||||
|     /// Returns the total count of attempts to send messages through the BBQueue channel. | ||||
|     pub fn sent_messages_attempts(&self) -> usize { | ||||
|         self.sent_messages_attempts.load(atomic::Ordering::Relaxed) | ||||
|     } | ||||
|  | ||||
|     /// Returns the count of attempts to send messages that had to be paused due to BBQueue being full. | ||||
|     pub fn blocking_sent_messages_attempts(&self) -> usize { | ||||
|         self.blocking_sent_messages_attempts.load(atomic::Ordering::Relaxed) | ||||
|     } | ||||
| } | ||||
|  | ||||
| pub struct FrameWithHeader<'a> { | ||||
| @@ -458,10 +493,17 @@ impl<'b> ExtractorBbqueueSender<'b> { | ||||
|         } | ||||
|  | ||||
|         // Spin loop to have a frame the size we requested. | ||||
|         reserve_and_write_grant(&mut producer, total_length, &self.sender, |grant| { | ||||
|             payload_header.serialize_into(grant); | ||||
|             Ok(()) | ||||
|         })?; | ||||
|         reserve_and_write_grant( | ||||
|             &mut producer, | ||||
|             total_length, | ||||
|             &self.sender, | ||||
|             &self.sent_messages_attempts, | ||||
|             &self.blocking_sent_messages_attempts, | ||||
|             |grant| { | ||||
|                 payload_header.serialize_into(grant); | ||||
|                 Ok(()) | ||||
|             }, | ||||
|         )?; | ||||
|  | ||||
|         Ok(()) | ||||
|     } | ||||
| @@ -500,20 +542,28 @@ impl<'b> ExtractorBbqueueSender<'b> { | ||||
|         } | ||||
|  | ||||
|         // Spin loop to have a frame the size we requested. | ||||
|         reserve_and_write_grant(&mut producer, total_length, &self.sender, |grant| { | ||||
|             let header_size = payload_header.header_size(); | ||||
|             let (header_bytes, remaining) = grant.split_at_mut(header_size); | ||||
|             payload_header.serialize_into(header_bytes); | ||||
|         reserve_and_write_grant( | ||||
|             &mut producer, | ||||
|             total_length, | ||||
|             &self.sender, | ||||
|             &self.sent_messages_attempts, | ||||
|             &self.blocking_sent_messages_attempts, | ||||
|             |grant| { | ||||
|                 let header_size = payload_header.header_size(); | ||||
|                 let (header_bytes, remaining) = grant.split_at_mut(header_size); | ||||
|                 payload_header.serialize_into(header_bytes); | ||||
|  | ||||
|             if dimensions != 0 { | ||||
|                 let output_iter = remaining.chunks_exact_mut(dimensions * mem::size_of::<f32>()); | ||||
|                 for (embedding, output) in embeddings.iter().zip(output_iter) { | ||||
|                     output.copy_from_slice(bytemuck::cast_slice(embedding)); | ||||
|                 if dimensions != 0 { | ||||
|                     let output_iter = | ||||
|                         remaining.chunks_exact_mut(dimensions * mem::size_of::<f32>()); | ||||
|                     for (embedding, output) in embeddings.iter().zip(output_iter) { | ||||
|                         output.copy_from_slice(bytemuck::cast_slice(embedding)); | ||||
|                     } | ||||
|                 } | ||||
|             } | ||||
|  | ||||
|             Ok(()) | ||||
|         })?; | ||||
|                 Ok(()) | ||||
|             }, | ||||
|         )?; | ||||
|  | ||||
|         Ok(()) | ||||
|     } | ||||
| @@ -571,13 +621,20 @@ impl<'b> ExtractorBbqueueSender<'b> { | ||||
|         } | ||||
|  | ||||
|         // Spin loop to have a frame the size we requested. | ||||
|         reserve_and_write_grant(&mut producer, total_length, &self.sender, |grant| { | ||||
|             let header_size = payload_header.header_size(); | ||||
|             let (header_bytes, remaining) = grant.split_at_mut(header_size); | ||||
|             payload_header.serialize_into(header_bytes); | ||||
|             let (key_buffer, value_buffer) = remaining.split_at_mut(key_length.get() as usize); | ||||
|             key_value_writer(key_buffer, value_buffer) | ||||
|         })?; | ||||
|         reserve_and_write_grant( | ||||
|             &mut producer, | ||||
|             total_length, | ||||
|             &self.sender, | ||||
|             &self.sent_messages_attempts, | ||||
|             &self.blocking_sent_messages_attempts, | ||||
|             |grant| { | ||||
|                 let header_size = payload_header.header_size(); | ||||
|                 let (header_bytes, remaining) = grant.split_at_mut(header_size); | ||||
|                 payload_header.serialize_into(header_bytes); | ||||
|                 let (key_buffer, value_buffer) = remaining.split_at_mut(key_length.get() as usize); | ||||
|                 key_value_writer(key_buffer, value_buffer) | ||||
|             }, | ||||
|         )?; | ||||
|  | ||||
|         Ok(()) | ||||
|     } | ||||
| @@ -619,12 +676,19 @@ impl<'b> ExtractorBbqueueSender<'b> { | ||||
|         } | ||||
|  | ||||
|         // Spin loop to have a frame the size we requested. | ||||
|         reserve_and_write_grant(&mut producer, total_length, &self.sender, |grant| { | ||||
|             let header_size = payload_header.header_size(); | ||||
|             let (header_bytes, remaining) = grant.split_at_mut(header_size); | ||||
|             payload_header.serialize_into(header_bytes); | ||||
|             key_writer(remaining) | ||||
|         })?; | ||||
|         reserve_and_write_grant( | ||||
|             &mut producer, | ||||
|             total_length, | ||||
|             &self.sender, | ||||
|             &self.sent_messages_attempts, | ||||
|             &self.blocking_sent_messages_attempts, | ||||
|             |grant| { | ||||
|                 let header_size = payload_header.header_size(); | ||||
|                 let (header_bytes, remaining) = grant.split_at_mut(header_size); | ||||
|                 payload_header.serialize_into(header_bytes); | ||||
|                 key_writer(remaining) | ||||
|             }, | ||||
|         )?; | ||||
|  | ||||
|         Ok(()) | ||||
|     } | ||||
| @@ -637,12 +701,18 @@ fn reserve_and_write_grant<F>( | ||||
|     producer: &mut FrameProducer, | ||||
|     total_length: usize, | ||||
|     sender: &flume::Sender<ReceiverAction>, | ||||
|     sent_messages_attempts: &AtomicUsize, | ||||
|     blocking_sent_messages_attempts: &AtomicUsize, | ||||
|     f: F, | ||||
| ) -> crate::Result<()> | ||||
| where | ||||
|     F: FnOnce(&mut [u8]) -> crate::Result<()>, | ||||
| { | ||||
|     loop { | ||||
|         // An attempt means trying multiple times | ||||
|         // whether is succeeded or not. | ||||
|         sent_messages_attempts.fetch_add(1, atomic::Ordering::Relaxed); | ||||
|  | ||||
|         for _ in 0..10_000 { | ||||
|             match producer.grant(total_length) { | ||||
|                 Ok(mut grant) => { | ||||
| @@ -666,6 +736,10 @@ where | ||||
|             return Err(Error::InternalError(InternalError::AbortedIndexation)); | ||||
|         } | ||||
|  | ||||
|         // We made an attempt to send a message in the | ||||
|         // bbqueue channel but it didn't succeed. | ||||
|         blocking_sent_messages_attempts.fetch_add(1, atomic::Ordering::Relaxed); | ||||
|  | ||||
|         // We prefer to yield and allow the writing thread | ||||
|         // to do its job, especially beneficial when there | ||||
|         // is only one CPU core available. | ||||
|   | ||||
| @@ -144,7 +144,7 @@ impl<'doc> Update<'doc> { | ||||
|         )?) | ||||
|     } | ||||
|  | ||||
|     pub fn updated(&self) -> DocumentFromVersions<'_, 'doc> { | ||||
|     pub fn only_changed_fields(&self) -> DocumentFromVersions<'_, 'doc> { | ||||
|         DocumentFromVersions::new(&self.new) | ||||
|     } | ||||
|  | ||||
| @@ -182,7 +182,7 @@ impl<'doc> Update<'doc> { | ||||
|         let mut cached_current = None; | ||||
|         let mut updated_selected_field_count = 0; | ||||
|  | ||||
|         for entry in self.updated().iter_top_level_fields() { | ||||
|         for entry in self.only_changed_fields().iter_top_level_fields() { | ||||
|             let (key, updated_value) = entry?; | ||||
|  | ||||
|             if perm_json_p::select_field(key, fields, &[]) == perm_json_p::Selection::Skip { | ||||
| @@ -241,7 +241,7 @@ impl<'doc> Update<'doc> { | ||||
|         Ok(has_deleted_fields) | ||||
|     } | ||||
|  | ||||
|     pub fn updated_vectors( | ||||
|     pub fn only_changed_vectors( | ||||
|         &self, | ||||
|         doc_alloc: &'doc Bump, | ||||
|         embedders: &'doc EmbeddingConfigs, | ||||
|   | ||||
| @@ -199,7 +199,7 @@ impl<'extractor> Extractor<'extractor> for GeoExtractor { | ||||
|                         .transpose()?; | ||||
|  | ||||
|                     let updated_geo = update | ||||
|                         .updated() | ||||
|                         .merged(rtxn, index, db_fields_ids_map)? | ||||
|                         .geo_field()? | ||||
|                         .map(|geo| extract_geo_coordinates(external_id, geo)) | ||||
|                         .transpose()?; | ||||
|   | ||||
| @@ -99,7 +99,8 @@ impl<'a, 'b, 'extractor> Extractor<'extractor> for EmbeddingExtractor<'a, 'b> { | ||||
|                         context.db_fields_ids_map, | ||||
|                         &context.doc_alloc, | ||||
|                     )?; | ||||
|                     let new_vectors = update.updated_vectors(&context.doc_alloc, self.embedders)?; | ||||
|                     let new_vectors = | ||||
|                         update.only_changed_vectors(&context.doc_alloc, self.embedders)?; | ||||
|  | ||||
|                     if let Some(new_vectors) = &new_vectors { | ||||
|                         unused_vectors_distribution.append(new_vectors)?; | ||||
|   | ||||
| @@ -234,7 +234,7 @@ where | ||||
|         ); | ||||
|         let mut datastore = ThreadLocal::with_capacity(rayon::current_num_threads()); | ||||
|         { | ||||
|             let span = tracing::trace_span!(target: "indexing::documents::extract", "vectors"); | ||||
|             let span = tracing::debug_span!(target: "indexing::documents::extract", "vectors"); | ||||
|             let _entered = span.enter(); | ||||
|  | ||||
|             extract( | ||||
| @@ -247,7 +247,7 @@ where | ||||
|             )?; | ||||
|         } | ||||
|         { | ||||
|             let span = tracing::trace_span!(target: "indexing::documents::merge", "vectors"); | ||||
|             let span = tracing::debug_span!(target: "indexing::documents::merge", "vectors"); | ||||
|             let _entered = span.enter(); | ||||
|  | ||||
|             for config in &mut index_embeddings { | ||||
|   | ||||
| @@ -1,5 +1,5 @@ | ||||
| use std::sync::atomic::AtomicBool; | ||||
| use std::sync::RwLock; | ||||
| use std::sync::{Once, RwLock}; | ||||
| use std::thread::{self, Builder}; | ||||
|  | ||||
| use big_s::S; | ||||
| @@ -33,6 +33,8 @@ mod post_processing; | ||||
| mod update_by_function; | ||||
| mod write; | ||||
|  | ||||
| static LOG_MEMORY_METRICS_ONCE: Once = Once::new(); | ||||
|  | ||||
| /// This is the main function of this crate. | ||||
| /// | ||||
| /// Give it the output of the [`Indexer::document_changes`] method and it will execute it in the [`rayon::ThreadPool`]. | ||||
| @@ -93,6 +95,15 @@ where | ||||
|         }, | ||||
|     ); | ||||
|  | ||||
|     LOG_MEMORY_METRICS_ONCE.call_once(|| { | ||||
|         tracing::debug!( | ||||
|             "Indexation allocated memory metrics - \ | ||||
|             Total BBQueue size: {total_bbbuffer_capacity}, \ | ||||
|             Total extractor memory: {:?}", | ||||
|             grenad_parameters.max_memory, | ||||
|         ); | ||||
|     }); | ||||
|  | ||||
|     let (extractor_sender, writer_receiver) = pool | ||||
|         .install(|| extractor_writer_bbqueue(&mut bbbuffers, total_bbbuffer_capacity, 1000)) | ||||
|         .unwrap(); | ||||
| @@ -179,13 +190,16 @@ where | ||||
|  | ||||
|         indexing_context.progress.update_progress(IndexingStep::WritingEmbeddingsToDatabase); | ||||
|  | ||||
|         build_vectors( | ||||
|             index, | ||||
|             wtxn, | ||||
|             index_embeddings, | ||||
|             &mut arroy_writers, | ||||
|             &indexing_context.must_stop_processing, | ||||
|         )?; | ||||
|         pool.install(|| { | ||||
|             build_vectors( | ||||
|                 index, | ||||
|                 wtxn, | ||||
|                 index_embeddings, | ||||
|                 &mut arroy_writers, | ||||
|                 &indexing_context.must_stop_processing, | ||||
|             ) | ||||
|         }) | ||||
|         .unwrap()?; | ||||
|  | ||||
|         post_processing::post_process( | ||||
|             indexing_context, | ||||
|   | ||||
| @@ -72,11 +72,23 @@ pub(super) fn write_to_db( | ||||
|             &mut aligned_embedding, | ||||
|         )?; | ||||
|     } | ||||
|  | ||||
|     write_from_bbqueue(&mut writer_receiver, index, wtxn, arroy_writers, &mut aligned_embedding)?; | ||||
|  | ||||
|     let direct_attempts = writer_receiver.sent_messages_attempts(); | ||||
|     let blocking_attempts = writer_receiver.blocking_sent_messages_attempts(); | ||||
|     let congestion_pct = (blocking_attempts as f64 / direct_attempts as f64) * 100.0; | ||||
|     tracing::debug!( | ||||
|         "Channel congestion metrics - \ | ||||
|         Attempts: {direct_attempts}, \ | ||||
|         Blocked attempts: {blocking_attempts} \ | ||||
|         ({congestion_pct:.1}% congestion)" | ||||
|     ); | ||||
|  | ||||
|     Ok(()) | ||||
| } | ||||
|  | ||||
| #[tracing::instrument(level = "trace", skip_all, target = "indexing::vectors")] | ||||
| #[tracing::instrument(level = "debug", skip_all, target = "indexing::vectors")] | ||||
| pub(super) fn build_vectors<MSP>( | ||||
|     index: &Index, | ||||
|     wtxn: &mut RwTxn<'_>, | ||||
|   | ||||
| @@ -1,7 +1,9 @@ | ||||
| mod v1_12; | ||||
| mod v1_13; | ||||
|  | ||||
| use heed::RwTxn; | ||||
| use v1_12::{V1_12_3_To_Current, V1_12_To_V1_12_3}; | ||||
| use v1_12::{V1_12_3_To_V1_13_0, V1_12_To_V1_12_3}; | ||||
| use v1_13::V1_13_0_To_Current; | ||||
|  | ||||
| use crate::progress::{Progress, VariableNameStep}; | ||||
| use crate::{Index, InternalError, Result}; | ||||
| @@ -26,11 +28,13 @@ pub fn upgrade( | ||||
|     progress: Progress, | ||||
| ) -> Result<bool> { | ||||
|     let from = index.get_version(wtxn)?.unwrap_or(db_version); | ||||
|     let upgrade_functions: &[&dyn UpgradeIndex] = &[&V1_12_To_V1_12_3 {}, &V1_12_3_To_Current()]; | ||||
|     let upgrade_functions: &[&dyn UpgradeIndex] = | ||||
|         &[&V1_12_To_V1_12_3 {}, &V1_12_3_To_V1_13_0 {}, &V1_13_0_To_Current()]; | ||||
|  | ||||
|     let start = match from { | ||||
|         (1, 12, 0..=2) => 0, | ||||
|         (1, 12, 3..) => 1, | ||||
|         (1, 13, 0) => 2, | ||||
|         // We must handle the current version in the match because in case of a failure some index may have been upgraded but not other. | ||||
|         (1, 13, _) => return Ok(false), | ||||
|         (major, minor, patch) => { | ||||
|   | ||||
| @@ -1,11 +1,9 @@ | ||||
| use heed::RwTxn; | ||||
|  | ||||
| use crate::constants::{VERSION_MAJOR, VERSION_MINOR, VERSION_PATCH}; | ||||
| use super::UpgradeIndex; | ||||
| use crate::progress::Progress; | ||||
| use crate::{make_enum_progress, Index, Result}; | ||||
|  | ||||
| use super::UpgradeIndex; | ||||
|  | ||||
| #[allow(non_camel_case_types)] | ||||
| pub(super) struct V1_12_To_V1_12_3 {} | ||||
|  | ||||
| @@ -33,9 +31,9 @@ impl UpgradeIndex for V1_12_To_V1_12_3 { | ||||
| } | ||||
|  | ||||
| #[allow(non_camel_case_types)] | ||||
| pub(super) struct V1_12_3_To_Current(); | ||||
| pub(super) struct V1_12_3_To_V1_13_0 {} | ||||
|  | ||||
| impl UpgradeIndex for V1_12_3_To_Current { | ||||
| impl UpgradeIndex for V1_12_3_To_V1_13_0 { | ||||
|     fn upgrade( | ||||
|         &self, | ||||
|         _wtxn: &mut RwTxn, | ||||
| @@ -43,14 +41,11 @@ impl UpgradeIndex for V1_12_3_To_Current { | ||||
|         _original: (u32, u32, u32), | ||||
|         _progress: Progress, | ||||
|     ) -> Result<bool> { | ||||
|         Ok(false) | ||||
|         // recompute the indexes stats | ||||
|         Ok(true) | ||||
|     } | ||||
|  | ||||
|     fn target_version(&self) -> (u32, u32, u32) { | ||||
|         ( | ||||
|             VERSION_MAJOR.parse().unwrap(), | ||||
|             VERSION_MINOR.parse().unwrap(), | ||||
|             VERSION_PATCH.parse().unwrap(), | ||||
|         ) | ||||
|         (1, 13, 0) | ||||
|     } | ||||
| } | ||||
|   | ||||
							
								
								
									
										29
									
								
								crates/milli/src/update/upgrade/v1_13.rs
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										29
									
								
								crates/milli/src/update/upgrade/v1_13.rs
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,29 @@ | ||||
| use heed::RwTxn; | ||||
|  | ||||
| use super::UpgradeIndex; | ||||
| use crate::constants::{VERSION_MAJOR, VERSION_MINOR, VERSION_PATCH}; | ||||
| use crate::progress::Progress; | ||||
| use crate::{Index, Result}; | ||||
|  | ||||
| #[allow(non_camel_case_types)] | ||||
| pub(super) struct V1_13_0_To_Current(); | ||||
|  | ||||
| impl UpgradeIndex for V1_13_0_To_Current { | ||||
|     fn upgrade( | ||||
|         &self, | ||||
|         _wtxn: &mut RwTxn, | ||||
|         _index: &Index, | ||||
|         _original: (u32, u32, u32), | ||||
|         _progress: Progress, | ||||
|     ) -> Result<bool> { | ||||
|         Ok(false) | ||||
|     } | ||||
|  | ||||
|     fn target_version(&self) -> (u32, u32, u32) { | ||||
|         ( | ||||
|             VERSION_MAJOR.parse().unwrap(), | ||||
|             VERSION_MINOR.parse().unwrap(), | ||||
|             VERSION_PATCH.parse().unwrap(), | ||||
|         ) | ||||
|     } | ||||
| } | ||||
| @@ -410,8 +410,43 @@ impl ArroyWrapper { | ||||
|     fn quantized_db(&self) -> arroy::Database<BinaryQuantizedCosine> { | ||||
|         self.database.remap_data_type() | ||||
|     } | ||||
|  | ||||
|     pub fn aggregate_stats( | ||||
|         &self, | ||||
|         rtxn: &RoTxn, | ||||
|         stats: &mut ArroyStats, | ||||
|     ) -> Result<(), arroy::Error> { | ||||
|         if self.quantized { | ||||
|             for reader in self.readers(rtxn, self.quantized_db()) { | ||||
|                 let reader = reader?; | ||||
|                 let documents = reader.item_ids(); | ||||
|                 if documents.is_empty() { | ||||
|                     break; | ||||
|                 } | ||||
|                 stats.documents |= documents; | ||||
|                 stats.number_of_embeddings += documents.len(); | ||||
|             } | ||||
|         } else { | ||||
|             for reader in self.readers(rtxn, self.angular_db()) { | ||||
|                 let reader = reader?; | ||||
|                 let documents = reader.item_ids(); | ||||
|                 if documents.is_empty() { | ||||
|                     break; | ||||
|                 } | ||||
|                 stats.documents |= documents; | ||||
|                 stats.number_of_embeddings += documents.len(); | ||||
|             } | ||||
|         } | ||||
|  | ||||
|         Ok(()) | ||||
|     } | ||||
| } | ||||
|  | ||||
| #[derive(Debug, Default, Clone)] | ||||
| pub struct ArroyStats { | ||||
|     pub number_of_embeddings: u64, | ||||
|     pub documents: RoaringBitmap, | ||||
| } | ||||
| /// One or multiple embeddings stored consecutively in a flat vector. | ||||
| pub struct Embeddings<F> { | ||||
|     data: Vec<F>, | ||||
| @@ -611,6 +646,7 @@ impl Embedder { | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     #[tracing::instrument(level = "debug", skip_all, target = "search")] | ||||
|     pub fn embed_one( | ||||
|         &self, | ||||
|         text: String, | ||||
|   | ||||
| @@ -5,7 +5,7 @@ use rayon::slice::ParallelSlice as _; | ||||
|  | ||||
| use super::error::{EmbedError, EmbedErrorKind, NewEmbedderError, NewEmbedderErrorKind}; | ||||
| use super::rest::{Embedder as RestEmbedder, EmbedderOptions as RestEmbedderOptions}; | ||||
| use super::DistributionShift; | ||||
| use super::{DistributionShift, REQUEST_PARALLELISM}; | ||||
| use crate::error::FaultSource; | ||||
| use crate::vector::Embedding; | ||||
| use crate::ThreadPoolNoAbort; | ||||
| @@ -118,14 +118,20 @@ impl Embedder { | ||||
|         text_chunks: Vec<Vec<String>>, | ||||
|         threads: &ThreadPoolNoAbort, | ||||
|     ) -> Result<Vec<Vec<Embedding>>, EmbedError> { | ||||
|         threads | ||||
|             .install(move || { | ||||
|                 text_chunks.into_par_iter().map(move |chunk| self.embed(&chunk, None)).collect() | ||||
|             }) | ||||
|             .map_err(|error| EmbedError { | ||||
|                 kind: EmbedErrorKind::PanicInThreadPool(error), | ||||
|                 fault: FaultSource::Bug, | ||||
|             })? | ||||
|         // This condition helps reduce the number of active rayon jobs | ||||
|         // so that we avoid consuming all the LMDB rtxns and avoid stack overflows. | ||||
|         if threads.active_operations() >= REQUEST_PARALLELISM { | ||||
|             text_chunks.into_iter().map(move |chunk| self.embed(&chunk, None)).collect() | ||||
|         } else { | ||||
|             threads | ||||
|                 .install(move || { | ||||
|                     text_chunks.into_par_iter().map(move |chunk| self.embed(&chunk, None)).collect() | ||||
|                 }) | ||||
|                 .map_err(|error| EmbedError { | ||||
|                     kind: EmbedErrorKind::PanicInThreadPool(error), | ||||
|                     fault: FaultSource::Bug, | ||||
|                 })? | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     pub(crate) fn embed_chunks_ref( | ||||
| @@ -133,20 +139,32 @@ impl Embedder { | ||||
|         texts: &[&str], | ||||
|         threads: &ThreadPoolNoAbort, | ||||
|     ) -> Result<Vec<Vec<f32>>, EmbedError> { | ||||
|         threads | ||||
|             .install(move || { | ||||
|                 let embeddings: Result<Vec<Vec<Embedding>>, _> = texts | ||||
|                     .par_chunks(self.prompt_count_in_chunk_hint()) | ||||
|                     .map(move |chunk| self.embed(chunk, None)) | ||||
|                     .collect(); | ||||
|         // This condition helps reduce the number of active rayon jobs | ||||
|         // so that we avoid consuming all the LMDB rtxns and avoid stack overflows. | ||||
|         if threads.active_operations() >= REQUEST_PARALLELISM { | ||||
|             let embeddings: Result<Vec<Vec<Embedding>>, _> = texts | ||||
|                 .chunks(self.prompt_count_in_chunk_hint()) | ||||
|                 .map(move |chunk| self.embed(chunk, None)) | ||||
|                 .collect(); | ||||
|  | ||||
|                 let embeddings = embeddings?; | ||||
|                 Ok(embeddings.into_iter().flatten().collect()) | ||||
|             }) | ||||
|             .map_err(|error| EmbedError { | ||||
|                 kind: EmbedErrorKind::PanicInThreadPool(error), | ||||
|                 fault: FaultSource::Bug, | ||||
|             })? | ||||
|             let embeddings = embeddings?; | ||||
|             Ok(embeddings.into_iter().flatten().collect()) | ||||
|         } else { | ||||
|             threads | ||||
|                 .install(move || { | ||||
|                     let embeddings: Result<Vec<Vec<Embedding>>, _> = texts | ||||
|                         .par_chunks(self.prompt_count_in_chunk_hint()) | ||||
|                         .map(move |chunk| self.embed(chunk, None)) | ||||
|                         .collect(); | ||||
|  | ||||
|                     let embeddings = embeddings?; | ||||
|                     Ok(embeddings.into_iter().flatten().collect()) | ||||
|                 }) | ||||
|                 .map_err(|error| EmbedError { | ||||
|                     kind: EmbedErrorKind::PanicInThreadPool(error), | ||||
|                     fault: FaultSource::Bug, | ||||
|                 })? | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     pub fn chunk_count_hint(&self) -> usize { | ||||
|   | ||||
| @@ -7,7 +7,7 @@ use rayon::slice::ParallelSlice as _; | ||||
|  | ||||
| use super::error::{EmbedError, NewEmbedderError}; | ||||
| use super::rest::{Embedder as RestEmbedder, EmbedderOptions as RestEmbedderOptions}; | ||||
| use super::DistributionShift; | ||||
| use super::{DistributionShift, REQUEST_PARALLELISM}; | ||||
| use crate::error::FaultSource; | ||||
| use crate::vector::error::EmbedErrorKind; | ||||
| use crate::vector::Embedding; | ||||
| @@ -255,14 +255,20 @@ impl Embedder { | ||||
|         text_chunks: Vec<Vec<String>>, | ||||
|         threads: &ThreadPoolNoAbort, | ||||
|     ) -> Result<Vec<Vec<Embedding>>, EmbedError> { | ||||
|         threads | ||||
|             .install(move || { | ||||
|                 text_chunks.into_par_iter().map(move |chunk| self.embed(&chunk, None)).collect() | ||||
|             }) | ||||
|             .map_err(|error| EmbedError { | ||||
|                 kind: EmbedErrorKind::PanicInThreadPool(error), | ||||
|                 fault: FaultSource::Bug, | ||||
|             })? | ||||
|         // This condition helps reduce the number of active rayon jobs | ||||
|         // so that we avoid consuming all the LMDB rtxns and avoid stack overflows. | ||||
|         if threads.active_operations() >= REQUEST_PARALLELISM { | ||||
|             text_chunks.into_iter().map(move |chunk| self.embed(&chunk, None)).collect() | ||||
|         } else { | ||||
|             threads | ||||
|                 .install(move || { | ||||
|                     text_chunks.into_par_iter().map(move |chunk| self.embed(&chunk, None)).collect() | ||||
|                 }) | ||||
|                 .map_err(|error| EmbedError { | ||||
|                     kind: EmbedErrorKind::PanicInThreadPool(error), | ||||
|                     fault: FaultSource::Bug, | ||||
|                 })? | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     pub(crate) fn embed_chunks_ref( | ||||
| @@ -270,20 +276,31 @@ impl Embedder { | ||||
|         texts: &[&str], | ||||
|         threads: &ThreadPoolNoAbort, | ||||
|     ) -> Result<Vec<Vec<f32>>, EmbedError> { | ||||
|         threads | ||||
|             .install(move || { | ||||
|                 let embeddings: Result<Vec<Vec<Embedding>>, _> = texts | ||||
|                     .par_chunks(self.prompt_count_in_chunk_hint()) | ||||
|                     .map(move |chunk| self.embed(chunk, None)) | ||||
|                     .collect(); | ||||
|         // This condition helps reduce the number of active rayon jobs | ||||
|         // so that we avoid consuming all the LMDB rtxns and avoid stack overflows. | ||||
|         if threads.active_operations() >= REQUEST_PARALLELISM { | ||||
|             let embeddings: Result<Vec<Vec<Embedding>>, _> = texts | ||||
|                 .chunks(self.prompt_count_in_chunk_hint()) | ||||
|                 .map(move |chunk| self.embed(chunk, None)) | ||||
|                 .collect(); | ||||
|             let embeddings = embeddings?; | ||||
|             Ok(embeddings.into_iter().flatten().collect()) | ||||
|         } else { | ||||
|             threads | ||||
|                 .install(move || { | ||||
|                     let embeddings: Result<Vec<Vec<Embedding>>, _> = texts | ||||
|                         .par_chunks(self.prompt_count_in_chunk_hint()) | ||||
|                         .map(move |chunk| self.embed(chunk, None)) | ||||
|                         .collect(); | ||||
|  | ||||
|                 let embeddings = embeddings?; | ||||
|                 Ok(embeddings.into_iter().flatten().collect()) | ||||
|             }) | ||||
|             .map_err(|error| EmbedError { | ||||
|                 kind: EmbedErrorKind::PanicInThreadPool(error), | ||||
|                 fault: FaultSource::Bug, | ||||
|             })? | ||||
|                     let embeddings = embeddings?; | ||||
|                     Ok(embeddings.into_iter().flatten().collect()) | ||||
|                 }) | ||||
|                 .map_err(|error| EmbedError { | ||||
|                     kind: EmbedErrorKind::PanicInThreadPool(error), | ||||
|                     fault: FaultSource::Bug, | ||||
|                 })? | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     pub fn chunk_count_hint(&self) -> usize { | ||||
|   | ||||
| @@ -130,6 +130,7 @@ impl Embedder { | ||||
|         let client = ureq::AgentBuilder::new() | ||||
|             .max_idle_connections(REQUEST_PARALLELISM * 2) | ||||
|             .max_idle_connections_per_host(REQUEST_PARALLELISM * 2) | ||||
|             .timeout(std::time::Duration::from_secs(30)) | ||||
|             .build(); | ||||
|  | ||||
|         let request = Request::new(options.request)?; | ||||
| @@ -188,14 +189,20 @@ impl Embedder { | ||||
|         text_chunks: Vec<Vec<String>>, | ||||
|         threads: &ThreadPoolNoAbort, | ||||
|     ) -> Result<Vec<Vec<Embedding>>, EmbedError> { | ||||
|         threads | ||||
|             .install(move || { | ||||
|                 text_chunks.into_par_iter().map(move |chunk| self.embed(chunk, None)).collect() | ||||
|             }) | ||||
|             .map_err(|error| EmbedError { | ||||
|                 kind: EmbedErrorKind::PanicInThreadPool(error), | ||||
|                 fault: FaultSource::Bug, | ||||
|             })? | ||||
|         // This condition helps reduce the number of active rayon jobs | ||||
|         // so that we avoid consuming all the LMDB rtxns and avoid stack overflows. | ||||
|         if threads.active_operations() >= REQUEST_PARALLELISM { | ||||
|             text_chunks.into_iter().map(move |chunk| self.embed(chunk, None)).collect() | ||||
|         } else { | ||||
|             threads | ||||
|                 .install(move || { | ||||
|                     text_chunks.into_par_iter().map(move |chunk| self.embed(chunk, None)).collect() | ||||
|                 }) | ||||
|                 .map_err(|error| EmbedError { | ||||
|                     kind: EmbedErrorKind::PanicInThreadPool(error), | ||||
|                     fault: FaultSource::Bug, | ||||
|                 })? | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     pub(crate) fn embed_chunks_ref( | ||||
| @@ -203,20 +210,32 @@ impl Embedder { | ||||
|         texts: &[&str], | ||||
|         threads: &ThreadPoolNoAbort, | ||||
|     ) -> Result<Vec<Embedding>, EmbedError> { | ||||
|         threads | ||||
|             .install(move || { | ||||
|                 let embeddings: Result<Vec<Vec<Embedding>>, _> = texts | ||||
|                     .par_chunks(self.prompt_count_in_chunk_hint()) | ||||
|                     .map(move |chunk| self.embed_ref(chunk, None)) | ||||
|                     .collect(); | ||||
|         // This condition helps reduce the number of active rayon jobs | ||||
|         // so that we avoid consuming all the LMDB rtxns and avoid stack overflows. | ||||
|         if threads.active_operations() >= REQUEST_PARALLELISM { | ||||
|             let embeddings: Result<Vec<Vec<Embedding>>, _> = texts | ||||
|                 .chunks(self.prompt_count_in_chunk_hint()) | ||||
|                 .map(move |chunk| self.embed_ref(chunk, None)) | ||||
|                 .collect(); | ||||
|  | ||||
|                 let embeddings = embeddings?; | ||||
|                 Ok(embeddings.into_iter().flatten().collect()) | ||||
|             }) | ||||
|             .map_err(|error| EmbedError { | ||||
|                 kind: EmbedErrorKind::PanicInThreadPool(error), | ||||
|                 fault: FaultSource::Bug, | ||||
|             })? | ||||
|             let embeddings = embeddings?; | ||||
|             Ok(embeddings.into_iter().flatten().collect()) | ||||
|         } else { | ||||
|             threads | ||||
|                 .install(move || { | ||||
|                     let embeddings: Result<Vec<Vec<Embedding>>, _> = texts | ||||
|                         .par_chunks(self.prompt_count_in_chunk_hint()) | ||||
|                         .map(move |chunk| self.embed_ref(chunk, None)) | ||||
|                         .collect(); | ||||
|  | ||||
|                     let embeddings = embeddings?; | ||||
|                     Ok(embeddings.into_iter().flatten().collect()) | ||||
|                 }) | ||||
|                 .map_err(|error| EmbedError { | ||||
|                     kind: EmbedErrorKind::PanicInThreadPool(error), | ||||
|                     fault: FaultSource::Bug, | ||||
|                 })? | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     pub fn chunk_count_hint(&self) -> usize { | ||||
|   | ||||
| @@ -455,7 +455,7 @@ impl EmbeddingSettings { | ||||
|                 EmbedderSource::Ollama, | ||||
|                 EmbedderSource::Rest, | ||||
|             ], | ||||
|             Self::DOCUMENT_TEMPLATE => &[ | ||||
|             Self::DOCUMENT_TEMPLATE | Self::DOCUMENT_TEMPLATE_MAX_BYTES => &[ | ||||
|                 EmbedderSource::HuggingFace, | ||||
|                 EmbedderSource::OpenAi, | ||||
|                 EmbedderSource::Ollama, | ||||
| @@ -490,6 +490,7 @@ impl EmbeddingSettings { | ||||
|                 Self::MODEL, | ||||
|                 Self::API_KEY, | ||||
|                 Self::DOCUMENT_TEMPLATE, | ||||
|                 Self::DOCUMENT_TEMPLATE_MAX_BYTES, | ||||
|                 Self::DIMENSIONS, | ||||
|                 Self::DISTRIBUTION, | ||||
|                 Self::URL, | ||||
| @@ -500,6 +501,7 @@ impl EmbeddingSettings { | ||||
|                 Self::MODEL, | ||||
|                 Self::REVISION, | ||||
|                 Self::DOCUMENT_TEMPLATE, | ||||
|                 Self::DOCUMENT_TEMPLATE_MAX_BYTES, | ||||
|                 Self::DISTRIBUTION, | ||||
|                 Self::BINARY_QUANTIZED, | ||||
|             ], | ||||
| @@ -507,6 +509,7 @@ impl EmbeddingSettings { | ||||
|                 Self::SOURCE, | ||||
|                 Self::MODEL, | ||||
|                 Self::DOCUMENT_TEMPLATE, | ||||
|                 Self::DOCUMENT_TEMPLATE_MAX_BYTES, | ||||
|                 Self::URL, | ||||
|                 Self::API_KEY, | ||||
|                 Self::DIMENSIONS, | ||||
| @@ -521,6 +524,7 @@ impl EmbeddingSettings { | ||||
|                 Self::API_KEY, | ||||
|                 Self::DIMENSIONS, | ||||
|                 Self::DOCUMENT_TEMPLATE, | ||||
|                 Self::DOCUMENT_TEMPLATE_MAX_BYTES, | ||||
|                 Self::URL, | ||||
|                 Self::REQUEST, | ||||
|                 Self::RESPONSE, | ||||
|   | ||||
		Reference in New Issue
	
	Block a user