mirror of
				https://github.com/meilisearch/meilisearch.git
				synced 2025-10-30 23:46:28 +00:00 
			
		
		
		
	Merge #5147
5147: Batch progress r=dureuill a=irevoire # Pull Request ## Related issue Fixes https://github.com/meilisearch/meilisearch/issues/5068 ## What does this PR do? - ... ## PR checklist Please check if your PR fulfills the following requirements: - [ ] Does this PR fix an existing issue, or have you listed the changes applied in the PR description (and why they are needed)? - [ ] Have you read the contributing guidelines? - [ ] Have you made sure that the title is accurate and descriptive of the changes? Thank you so much for contributing to Meilisearch! Co-authored-by: Tamo <tamo@meilisearch.com>
This commit is contained in:
		| @@ -8,6 +8,7 @@ use bumpalo::Bump; | ||||
| use criterion::{criterion_group, criterion_main, Criterion}; | ||||
| use milli::documents::PrimaryKey; | ||||
| use milli::heed::{EnvOpenOptions, RwTxn}; | ||||
| use milli::progress::Progress; | ||||
| use milli::update::new::indexer; | ||||
| use milli::update::{IndexDocumentsMethod, IndexerConfig, Settings}; | ||||
| use milli::vector::EmbeddingConfigs; | ||||
| @@ -151,7 +152,7 @@ fn indexing_songs_default(c: &mut Criterion) { | ||||
|                         None, | ||||
|                         &mut new_fields_ids_map, | ||||
|                         &|| false, | ||||
|                         &|_progress| (), | ||||
|                         Progress::default(), | ||||
|                     ) | ||||
|                     .unwrap(); | ||||
|  | ||||
| @@ -166,7 +167,7 @@ fn indexing_songs_default(c: &mut Criterion) { | ||||
|                     &document_changes, | ||||
|                     EmbeddingConfigs::default(), | ||||
|                     &|| false, | ||||
|                     &|_| (), | ||||
|                     &Progress::default(), | ||||
|                 ) | ||||
|                 .unwrap(); | ||||
|  | ||||
| @@ -218,7 +219,7 @@ fn reindexing_songs_default(c: &mut Criterion) { | ||||
|                         None, | ||||
|                         &mut new_fields_ids_map, | ||||
|                         &|| false, | ||||
|                         &|_progress| (), | ||||
|                         Progress::default(), | ||||
|                     ) | ||||
|                     .unwrap(); | ||||
|  | ||||
| @@ -233,7 +234,7 @@ fn reindexing_songs_default(c: &mut Criterion) { | ||||
|                     &document_changes, | ||||
|                     EmbeddingConfigs::default(), | ||||
|                     &|| false, | ||||
|                     &|_| (), | ||||
|                     &Progress::default(), | ||||
|                 ) | ||||
|                 .unwrap(); | ||||
|  | ||||
| @@ -263,7 +264,7 @@ fn reindexing_songs_default(c: &mut Criterion) { | ||||
|                         None, | ||||
|                         &mut new_fields_ids_map, | ||||
|                         &|| false, | ||||
|                         &|_progress| (), | ||||
|                         Progress::default(), | ||||
|                     ) | ||||
|                     .unwrap(); | ||||
|  | ||||
| @@ -278,7 +279,7 @@ fn reindexing_songs_default(c: &mut Criterion) { | ||||
|                     &document_changes, | ||||
|                     EmbeddingConfigs::default(), | ||||
|                     &|| false, | ||||
|                     &|_| (), | ||||
|                     &Progress::default(), | ||||
|                 ) | ||||
|                 .unwrap(); | ||||
|  | ||||
| @@ -332,7 +333,7 @@ fn deleting_songs_in_batches_default(c: &mut Criterion) { | ||||
|                         None, | ||||
|                         &mut new_fields_ids_map, | ||||
|                         &|| false, | ||||
|                         &|_progress| (), | ||||
|                         Progress::default(), | ||||
|                     ) | ||||
|                     .unwrap(); | ||||
|  | ||||
| @@ -347,7 +348,7 @@ fn deleting_songs_in_batches_default(c: &mut Criterion) { | ||||
|                     &document_changes, | ||||
|                     EmbeddingConfigs::default(), | ||||
|                     &|| false, | ||||
|                     &|_| (), | ||||
|                     &Progress::default(), | ||||
|                 ) | ||||
|                 .unwrap(); | ||||
|  | ||||
| @@ -409,7 +410,7 @@ fn indexing_songs_in_three_batches_default(c: &mut Criterion) { | ||||
|                         None, | ||||
|                         &mut new_fields_ids_map, | ||||
|                         &|| false, | ||||
|                         &|_progress| (), | ||||
|                         Progress::default(), | ||||
|                     ) | ||||
|                     .unwrap(); | ||||
|  | ||||
| @@ -424,7 +425,7 @@ fn indexing_songs_in_three_batches_default(c: &mut Criterion) { | ||||
|                     &document_changes, | ||||
|                     EmbeddingConfigs::default(), | ||||
|                     &|| false, | ||||
|                     &|_| (), | ||||
|                     &Progress::default(), | ||||
|                 ) | ||||
|                 .unwrap(); | ||||
|  | ||||
| @@ -454,7 +455,7 @@ fn indexing_songs_in_three_batches_default(c: &mut Criterion) { | ||||
|                         None, | ||||
|                         &mut new_fields_ids_map, | ||||
|                         &|| false, | ||||
|                         &|_progress| (), | ||||
|                         Progress::default(), | ||||
|                     ) | ||||
|                     .unwrap(); | ||||
|  | ||||
| @@ -469,7 +470,7 @@ fn indexing_songs_in_three_batches_default(c: &mut Criterion) { | ||||
|                     &document_changes, | ||||
|                     EmbeddingConfigs::default(), | ||||
|                     &|| false, | ||||
|                     &|_| (), | ||||
|                     &Progress::default(), | ||||
|                 ) | ||||
|                 .unwrap(); | ||||
|  | ||||
| @@ -495,7 +496,7 @@ fn indexing_songs_in_three_batches_default(c: &mut Criterion) { | ||||
|                         None, | ||||
|                         &mut new_fields_ids_map, | ||||
|                         &|| false, | ||||
|                         &|_progress| (), | ||||
|                         Progress::default(), | ||||
|                     ) | ||||
|                     .unwrap(); | ||||
|  | ||||
| @@ -510,7 +511,7 @@ fn indexing_songs_in_three_batches_default(c: &mut Criterion) { | ||||
|                     &document_changes, | ||||
|                     EmbeddingConfigs::default(), | ||||
|                     &|| false, | ||||
|                     &|_| (), | ||||
|                     &Progress::default(), | ||||
|                 ) | ||||
|                 .unwrap(); | ||||
|  | ||||
| @@ -563,7 +564,7 @@ fn indexing_songs_without_faceted_numbers(c: &mut Criterion) { | ||||
|                         None, | ||||
|                         &mut new_fields_ids_map, | ||||
|                         &|| false, | ||||
|                         &|_progress| (), | ||||
|                         Progress::default(), | ||||
|                     ) | ||||
|                     .unwrap(); | ||||
|  | ||||
| @@ -578,7 +579,7 @@ fn indexing_songs_without_faceted_numbers(c: &mut Criterion) { | ||||
|                     &document_changes, | ||||
|                     EmbeddingConfigs::default(), | ||||
|                     &|| false, | ||||
|                     &|_| (), | ||||
|                     &Progress::default(), | ||||
|                 ) | ||||
|                 .unwrap(); | ||||
|  | ||||
| @@ -630,7 +631,7 @@ fn indexing_songs_without_faceted_fields(c: &mut Criterion) { | ||||
|                         None, | ||||
|                         &mut new_fields_ids_map, | ||||
|                         &|| false, | ||||
|                         &|_progress| (), | ||||
|                         Progress::default(), | ||||
|                     ) | ||||
|                     .unwrap(); | ||||
|  | ||||
| @@ -645,7 +646,7 @@ fn indexing_songs_without_faceted_fields(c: &mut Criterion) { | ||||
|                     &document_changes, | ||||
|                     EmbeddingConfigs::default(), | ||||
|                     &|| false, | ||||
|                     &|_| (), | ||||
|                     &Progress::default(), | ||||
|                 ) | ||||
|                 .unwrap(); | ||||
|  | ||||
| @@ -697,7 +698,7 @@ fn indexing_wiki(c: &mut Criterion) { | ||||
|                         None, | ||||
|                         &mut new_fields_ids_map, | ||||
|                         &|| false, | ||||
|                         &|_progress| (), | ||||
|                         Progress::default(), | ||||
|                     ) | ||||
|                     .unwrap(); | ||||
|  | ||||
| @@ -712,7 +713,7 @@ fn indexing_wiki(c: &mut Criterion) { | ||||
|                     &document_changes, | ||||
|                     EmbeddingConfigs::default(), | ||||
|                     &|| false, | ||||
|                     &|_| (), | ||||
|                     &Progress::default(), | ||||
|                 ) | ||||
|                 .unwrap(); | ||||
|  | ||||
| @@ -763,7 +764,7 @@ fn reindexing_wiki(c: &mut Criterion) { | ||||
|                         None, | ||||
|                         &mut new_fields_ids_map, | ||||
|                         &|| false, | ||||
|                         &|_progress| (), | ||||
|                         Progress::default(), | ||||
|                     ) | ||||
|                     .unwrap(); | ||||
|  | ||||
| @@ -778,7 +779,7 @@ fn reindexing_wiki(c: &mut Criterion) { | ||||
|                     &document_changes, | ||||
|                     EmbeddingConfigs::default(), | ||||
|                     &|| false, | ||||
|                     &|_| (), | ||||
|                     &Progress::default(), | ||||
|                 ) | ||||
|                 .unwrap(); | ||||
|  | ||||
| @@ -808,7 +809,7 @@ fn reindexing_wiki(c: &mut Criterion) { | ||||
|                         None, | ||||
|                         &mut new_fields_ids_map, | ||||
|                         &|| false, | ||||
|                         &|_progress| (), | ||||
|                         Progress::default(), | ||||
|                     ) | ||||
|                     .unwrap(); | ||||
|  | ||||
| @@ -823,7 +824,7 @@ fn reindexing_wiki(c: &mut Criterion) { | ||||
|                     &document_changes, | ||||
|                     EmbeddingConfigs::default(), | ||||
|                     &|| false, | ||||
|                     &|_| (), | ||||
|                     &Progress::default(), | ||||
|                 ) | ||||
|                 .unwrap(); | ||||
|  | ||||
| @@ -876,7 +877,7 @@ fn deleting_wiki_in_batches_default(c: &mut Criterion) { | ||||
|                         None, | ||||
|                         &mut new_fields_ids_map, | ||||
|                         &|| false, | ||||
|                         &|_progress| (), | ||||
|                         Progress::default(), | ||||
|                     ) | ||||
|                     .unwrap(); | ||||
|  | ||||
| @@ -891,7 +892,7 @@ fn deleting_wiki_in_batches_default(c: &mut Criterion) { | ||||
|                     &document_changes, | ||||
|                     EmbeddingConfigs::default(), | ||||
|                     &|| false, | ||||
|                     &|_| (), | ||||
|                     &Progress::default(), | ||||
|                 ) | ||||
|                 .unwrap(); | ||||
|  | ||||
| @@ -953,7 +954,7 @@ fn indexing_wiki_in_three_batches(c: &mut Criterion) { | ||||
|                         None, | ||||
|                         &mut new_fields_ids_map, | ||||
|                         &|| false, | ||||
|                         &|_progress| (), | ||||
|                         Progress::default(), | ||||
|                     ) | ||||
|                     .unwrap(); | ||||
|  | ||||
| @@ -968,7 +969,7 @@ fn indexing_wiki_in_three_batches(c: &mut Criterion) { | ||||
|                     &document_changes, | ||||
|                     EmbeddingConfigs::default(), | ||||
|                     &|| false, | ||||
|                     &|_| (), | ||||
|                     &Progress::default(), | ||||
|                 ) | ||||
|                 .unwrap(); | ||||
|  | ||||
| @@ -999,7 +1000,7 @@ fn indexing_wiki_in_three_batches(c: &mut Criterion) { | ||||
|                         None, | ||||
|                         &mut new_fields_ids_map, | ||||
|                         &|| false, | ||||
|                         &|_progress| (), | ||||
|                         Progress::default(), | ||||
|                     ) | ||||
|                     .unwrap(); | ||||
|  | ||||
| @@ -1014,7 +1015,7 @@ fn indexing_wiki_in_three_batches(c: &mut Criterion) { | ||||
|                     &document_changes, | ||||
|                     EmbeddingConfigs::default(), | ||||
|                     &|| false, | ||||
|                     &|_| (), | ||||
|                     &Progress::default(), | ||||
|                 ) | ||||
|                 .unwrap(); | ||||
|  | ||||
| @@ -1041,7 +1042,7 @@ fn indexing_wiki_in_three_batches(c: &mut Criterion) { | ||||
|                         None, | ||||
|                         &mut new_fields_ids_map, | ||||
|                         &|| false, | ||||
|                         &|_progress| (), | ||||
|                         Progress::default(), | ||||
|                     ) | ||||
|                     .unwrap(); | ||||
|  | ||||
| @@ -1056,7 +1057,7 @@ fn indexing_wiki_in_three_batches(c: &mut Criterion) { | ||||
|                     &document_changes, | ||||
|                     EmbeddingConfigs::default(), | ||||
|                     &|| false, | ||||
|                     &|_| (), | ||||
|                     &Progress::default(), | ||||
|                 ) | ||||
|                 .unwrap(); | ||||
|  | ||||
| @@ -1108,7 +1109,7 @@ fn indexing_movies_default(c: &mut Criterion) { | ||||
|                         None, | ||||
|                         &mut new_fields_ids_map, | ||||
|                         &|| false, | ||||
|                         &|_progress| (), | ||||
|                         Progress::default(), | ||||
|                     ) | ||||
|                     .unwrap(); | ||||
|  | ||||
| @@ -1123,7 +1124,7 @@ fn indexing_movies_default(c: &mut Criterion) { | ||||
|                     &document_changes, | ||||
|                     EmbeddingConfigs::default(), | ||||
|                     &|| false, | ||||
|                     &|_| (), | ||||
|                     &Progress::default(), | ||||
|                 ) | ||||
|                 .unwrap(); | ||||
|  | ||||
| @@ -1174,7 +1175,7 @@ fn reindexing_movies_default(c: &mut Criterion) { | ||||
|                         None, | ||||
|                         &mut new_fields_ids_map, | ||||
|                         &|| false, | ||||
|                         &|_progress| (), | ||||
|                         Progress::default(), | ||||
|                     ) | ||||
|                     .unwrap(); | ||||
|  | ||||
| @@ -1189,7 +1190,7 @@ fn reindexing_movies_default(c: &mut Criterion) { | ||||
|                     &document_changes, | ||||
|                     EmbeddingConfigs::default(), | ||||
|                     &|| false, | ||||
|                     &|_| (), | ||||
|                     &Progress::default(), | ||||
|                 ) | ||||
|                 .unwrap(); | ||||
|  | ||||
| @@ -1219,7 +1220,7 @@ fn reindexing_movies_default(c: &mut Criterion) { | ||||
|                         None, | ||||
|                         &mut new_fields_ids_map, | ||||
|                         &|| false, | ||||
|                         &|_progress| (), | ||||
|                         Progress::default(), | ||||
|                     ) | ||||
|                     .unwrap(); | ||||
|  | ||||
| @@ -1234,7 +1235,7 @@ fn reindexing_movies_default(c: &mut Criterion) { | ||||
|                     &document_changes, | ||||
|                     EmbeddingConfigs::default(), | ||||
|                     &|| false, | ||||
|                     &|_| (), | ||||
|                     &Progress::default(), | ||||
|                 ) | ||||
|                 .unwrap(); | ||||
|  | ||||
| @@ -1287,7 +1288,7 @@ fn deleting_movies_in_batches_default(c: &mut Criterion) { | ||||
|                         None, | ||||
|                         &mut new_fields_ids_map, | ||||
|                         &|| false, | ||||
|                         &|_progress| (), | ||||
|                         Progress::default(), | ||||
|                     ) | ||||
|                     .unwrap(); | ||||
|  | ||||
| @@ -1302,7 +1303,7 @@ fn deleting_movies_in_batches_default(c: &mut Criterion) { | ||||
|                     &document_changes, | ||||
|                     EmbeddingConfigs::default(), | ||||
|                     &|| false, | ||||
|                     &|_| (), | ||||
|                     &Progress::default(), | ||||
|                 ) | ||||
|                 .unwrap(); | ||||
|  | ||||
| @@ -1350,7 +1351,7 @@ fn delete_documents_from_ids(index: Index, document_ids_to_delete: Vec<RoaringBi | ||||
|             &document_changes, | ||||
|             EmbeddingConfigs::default(), | ||||
|             &|| false, | ||||
|             &|_| (), | ||||
|             &Progress::default(), | ||||
|         ) | ||||
|         .unwrap(); | ||||
|  | ||||
| @@ -1400,7 +1401,7 @@ fn indexing_movies_in_three_batches(c: &mut Criterion) { | ||||
|                         None, | ||||
|                         &mut new_fields_ids_map, | ||||
|                         &|| false, | ||||
|                         &|_progress| (), | ||||
|                         Progress::default(), | ||||
|                     ) | ||||
|                     .unwrap(); | ||||
|  | ||||
| @@ -1415,7 +1416,7 @@ fn indexing_movies_in_three_batches(c: &mut Criterion) { | ||||
|                     &document_changes, | ||||
|                     EmbeddingConfigs::default(), | ||||
|                     &|| false, | ||||
|                     &|_| (), | ||||
|                     &Progress::default(), | ||||
|                 ) | ||||
|                 .unwrap(); | ||||
|  | ||||
| @@ -1445,7 +1446,7 @@ fn indexing_movies_in_three_batches(c: &mut Criterion) { | ||||
|                         None, | ||||
|                         &mut new_fields_ids_map, | ||||
|                         &|| false, | ||||
|                         &|_progress| (), | ||||
|                         Progress::default(), | ||||
|                     ) | ||||
|                     .unwrap(); | ||||
|  | ||||
| @@ -1460,7 +1461,7 @@ fn indexing_movies_in_three_batches(c: &mut Criterion) { | ||||
|                     &document_changes, | ||||
|                     EmbeddingConfigs::default(), | ||||
|                     &|| false, | ||||
|                     &|_| (), | ||||
|                     &Progress::default(), | ||||
|                 ) | ||||
|                 .unwrap(); | ||||
|  | ||||
| @@ -1486,7 +1487,7 @@ fn indexing_movies_in_three_batches(c: &mut Criterion) { | ||||
|                         None, | ||||
|                         &mut new_fields_ids_map, | ||||
|                         &|| false, | ||||
|                         &|_progress| (), | ||||
|                         Progress::default(), | ||||
|                     ) | ||||
|                     .unwrap(); | ||||
|  | ||||
| @@ -1501,7 +1502,7 @@ fn indexing_movies_in_three_batches(c: &mut Criterion) { | ||||
|                     &document_changes, | ||||
|                     EmbeddingConfigs::default(), | ||||
|                     &|| false, | ||||
|                     &|_| (), | ||||
|                     &Progress::default(), | ||||
|                 ) | ||||
|                 .unwrap(); | ||||
|  | ||||
| @@ -1576,7 +1577,7 @@ fn indexing_nested_movies_default(c: &mut Criterion) { | ||||
|                         None, | ||||
|                         &mut new_fields_ids_map, | ||||
|                         &|| false, | ||||
|                         &|_progress| (), | ||||
|                         Progress::default(), | ||||
|                     ) | ||||
|                     .unwrap(); | ||||
|  | ||||
| @@ -1591,7 +1592,7 @@ fn indexing_nested_movies_default(c: &mut Criterion) { | ||||
|                     &document_changes, | ||||
|                     EmbeddingConfigs::default(), | ||||
|                     &|| false, | ||||
|                     &|_| (), | ||||
|                     &Progress::default(), | ||||
|                 ) | ||||
|                 .unwrap(); | ||||
|  | ||||
| @@ -1667,7 +1668,7 @@ fn deleting_nested_movies_in_batches_default(c: &mut Criterion) { | ||||
|                         None, | ||||
|                         &mut new_fields_ids_map, | ||||
|                         &|| false, | ||||
|                         &|_progress| (), | ||||
|                         Progress::default(), | ||||
|                     ) | ||||
|                     .unwrap(); | ||||
|  | ||||
| @@ -1682,7 +1683,7 @@ fn deleting_nested_movies_in_batches_default(c: &mut Criterion) { | ||||
|                     &document_changes, | ||||
|                     EmbeddingConfigs::default(), | ||||
|                     &|| false, | ||||
|                     &|_| (), | ||||
|                     &Progress::default(), | ||||
|                 ) | ||||
|                 .unwrap(); | ||||
|  | ||||
| @@ -1750,7 +1751,7 @@ fn indexing_nested_movies_without_faceted_fields(c: &mut Criterion) { | ||||
|                         None, | ||||
|                         &mut new_fields_ids_map, | ||||
|                         &|| false, | ||||
|                         &|_progress| (), | ||||
|                         Progress::default(), | ||||
|                     ) | ||||
|                     .unwrap(); | ||||
|  | ||||
| @@ -1765,7 +1766,7 @@ fn indexing_nested_movies_without_faceted_fields(c: &mut Criterion) { | ||||
|                     &document_changes, | ||||
|                     EmbeddingConfigs::default(), | ||||
|                     &|| false, | ||||
|                     &|_| (), | ||||
|                     &Progress::default(), | ||||
|                 ) | ||||
|                 .unwrap(); | ||||
|  | ||||
| @@ -1817,7 +1818,7 @@ fn indexing_geo(c: &mut Criterion) { | ||||
|                         None, | ||||
|                         &mut new_fields_ids_map, | ||||
|                         &|| false, | ||||
|                         &|_progress| (), | ||||
|                         Progress::default(), | ||||
|                     ) | ||||
|                     .unwrap(); | ||||
|  | ||||
| @@ -1832,7 +1833,7 @@ fn indexing_geo(c: &mut Criterion) { | ||||
|                     &document_changes, | ||||
|                     EmbeddingConfigs::default(), | ||||
|                     &|| false, | ||||
|                     &|_| (), | ||||
|                     &Progress::default(), | ||||
|                 ) | ||||
|                 .unwrap(); | ||||
|  | ||||
| @@ -1883,7 +1884,7 @@ fn reindexing_geo(c: &mut Criterion) { | ||||
|                         None, | ||||
|                         &mut new_fields_ids_map, | ||||
|                         &|| false, | ||||
|                         &|_progress| (), | ||||
|                         Progress::default(), | ||||
|                     ) | ||||
|                     .unwrap(); | ||||
|  | ||||
| @@ -1898,7 +1899,7 @@ fn reindexing_geo(c: &mut Criterion) { | ||||
|                     &document_changes, | ||||
|                     EmbeddingConfigs::default(), | ||||
|                     &|| false, | ||||
|                     &|_| (), | ||||
|                     &Progress::default(), | ||||
|                 ) | ||||
|                 .unwrap(); | ||||
|  | ||||
| @@ -1928,7 +1929,7 @@ fn reindexing_geo(c: &mut Criterion) { | ||||
|                         None, | ||||
|                         &mut new_fields_ids_map, | ||||
|                         &|| false, | ||||
|                         &|_progress| (), | ||||
|                         Progress::default(), | ||||
|                     ) | ||||
|                     .unwrap(); | ||||
|  | ||||
| @@ -1943,7 +1944,7 @@ fn reindexing_geo(c: &mut Criterion) { | ||||
|                     &document_changes, | ||||
|                     EmbeddingConfigs::default(), | ||||
|                     &|| false, | ||||
|                     &|_| (), | ||||
|                     &Progress::default(), | ||||
|                 ) | ||||
|                 .unwrap(); | ||||
|  | ||||
| @@ -1996,7 +1997,7 @@ fn deleting_geo_in_batches_default(c: &mut Criterion) { | ||||
|                         None, | ||||
|                         &mut new_fields_ids_map, | ||||
|                         &|| false, | ||||
|                         &|_progress| (), | ||||
|                         Progress::default(), | ||||
|                     ) | ||||
|                     .unwrap(); | ||||
|  | ||||
| @@ -2011,7 +2012,7 @@ fn deleting_geo_in_batches_default(c: &mut Criterion) { | ||||
|                     &document_changes, | ||||
|                     EmbeddingConfigs::default(), | ||||
|                     &|| false, | ||||
|                     &|_| (), | ||||
|                     &Progress::default(), | ||||
|                 ) | ||||
|                 .unwrap(); | ||||
|  | ||||
|   | ||||
| @@ -10,6 +10,7 @@ use bumpalo::Bump; | ||||
| use criterion::BenchmarkId; | ||||
| use memmap2::Mmap; | ||||
| use milli::heed::EnvOpenOptions; | ||||
| use milli::progress::Progress; | ||||
| use milli::update::new::indexer; | ||||
| use milli::update::{IndexDocumentsMethod, IndexerConfig, Settings}; | ||||
| use milli::vector::EmbeddingConfigs; | ||||
| @@ -110,7 +111,7 @@ pub fn base_setup(conf: &Conf) -> Index { | ||||
|             None, | ||||
|             &mut new_fields_ids_map, | ||||
|             &|| false, | ||||
|             &|_progress| (), | ||||
|             Progress::default(), | ||||
|         ) | ||||
|         .unwrap(); | ||||
|  | ||||
| @@ -125,7 +126,7 @@ pub fn base_setup(conf: &Conf) -> Index { | ||||
|         &document_changes, | ||||
|         EmbeddingConfigs::default(), | ||||
|         &|| false, | ||||
|         &|_| (), | ||||
|         &Progress::default(), | ||||
|     ) | ||||
|     .unwrap(); | ||||
|  | ||||
|   | ||||
| @@ -10,6 +10,7 @@ use either::Either; | ||||
| use fuzzers::Operation; | ||||
| use milli::documents::mmap_from_objects; | ||||
| use milli::heed::EnvOpenOptions; | ||||
| use milli::progress::Progress; | ||||
| use milli::update::new::indexer; | ||||
| use milli::update::{IndexDocumentsMethod, IndexerConfig}; | ||||
| use milli::vector::EmbeddingConfigs; | ||||
| @@ -128,7 +129,7 @@ fn main() { | ||||
|                                     None, | ||||
|                                     &mut new_fields_ids_map, | ||||
|                                     &|| false, | ||||
|                                     &|_progress| (), | ||||
|                                     Progress::default(), | ||||
|                                 ) | ||||
|                                 .unwrap(); | ||||
|  | ||||
| @@ -143,7 +144,7 @@ fn main() { | ||||
|                                 &document_changes, | ||||
|                                 embedders, | ||||
|                                 &|| false, | ||||
|                                 &|_| (), | ||||
|                                 &Progress::default(), | ||||
|                             ) | ||||
|                             .unwrap(); | ||||
|  | ||||
|   | ||||
| @@ -15,6 +15,7 @@ anyhow = "1.0.86" | ||||
| bincode = "1.3.3" | ||||
| bumpalo = "3.16.0" | ||||
| bumparaw-collections = "0.1.2" | ||||
| convert_case = "0.6.0" | ||||
| csv = "1.3.0" | ||||
| derive_builder = "0.20.0" | ||||
| dump = { path = "../dump" } | ||||
|   | ||||
| @@ -22,8 +22,7 @@ use std::ffi::OsStr; | ||||
| use std::fmt; | ||||
| use std::fs::{self, File}; | ||||
| use std::io::BufWriter; | ||||
| use std::sync::atomic::{self, AtomicU64}; | ||||
| use std::time::Duration; | ||||
| use std::sync::atomic::Ordering; | ||||
|  | ||||
| use bumpalo::collections::CollectIn; | ||||
| use bumpalo::Bump; | ||||
| @@ -32,6 +31,7 @@ use meilisearch_types::batches::BatchId; | ||||
| use meilisearch_types::heed::{RoTxn, RwTxn}; | ||||
| use meilisearch_types::milli::documents::{obkv_to_object, DocumentsBatchReader, PrimaryKey}; | ||||
| use meilisearch_types::milli::heed::CompactionOption; | ||||
| use meilisearch_types::milli::progress::Progress; | ||||
| use meilisearch_types::milli::update::new::indexer::{self, UpdateByFunction}; | ||||
| use meilisearch_types::milli::update::{ | ||||
|     DocumentAdditionResult, IndexDocumentsMethod, Settings as MilliSettings, | ||||
| @@ -41,9 +41,7 @@ use meilisearch_types::milli::vector::parsed_vectors::{ | ||||
| }; | ||||
| use meilisearch_types::milli::{self, Filter, ThreadPoolNoAbortBuilder}; | ||||
| use meilisearch_types::settings::{apply_settings_to_builder, Settings, Unchecked}; | ||||
| use meilisearch_types::tasks::{ | ||||
|     Details, IndexSwap, Kind, KindWithContent, Status, Task, TaskProgress, | ||||
| }; | ||||
| use meilisearch_types::tasks::{Details, IndexSwap, Kind, KindWithContent, Status, Task}; | ||||
| use meilisearch_types::{compression, Index, VERSION_FILE_NAME}; | ||||
| use roaring::RoaringBitmap; | ||||
| use time::macros::format_description; | ||||
| @@ -51,6 +49,13 @@ use time::OffsetDateTime; | ||||
| use uuid::Uuid; | ||||
|  | ||||
| use crate::autobatcher::{self, BatchKind}; | ||||
| use crate::processing::{ | ||||
|     AtomicBatchStep, AtomicDocumentStep, AtomicTaskStep, AtomicUpdateFileStep, CreateIndexProgress, | ||||
|     DeleteIndexProgress, DocumentDeletionProgress, DocumentEditionProgress, | ||||
|     DocumentOperationProgress, DumpCreationProgress, InnerSwappingTwoIndexes, SettingsProgress, | ||||
|     SnapshotCreationProgress, SwappingTheIndexes, TaskCancelationProgress, TaskDeletionProgress, | ||||
|     UpdateIndexProgress, VariableNameStep, | ||||
| }; | ||||
| use crate::utils::{self, swap_index_uid_in_task, ProcessingBatch}; | ||||
| use crate::{Error, IndexScheduler, Result, TaskId}; | ||||
|  | ||||
| @@ -561,11 +566,12 @@ impl IndexScheduler { | ||||
|     /// The list of tasks that were processed. The metadata of each task in the returned | ||||
|     /// list is updated accordingly, with the exception of the its date fields | ||||
|     /// [`finished_at`](meilisearch_types::tasks::Task::finished_at) and [`started_at`](meilisearch_types::tasks::Task::started_at). | ||||
|     #[tracing::instrument(level = "trace", skip(self, batch), target = "indexing::scheduler", fields(batch=batch.to_string()))] | ||||
|     #[tracing::instrument(level = "trace", skip(self, batch, progress), target = "indexing::scheduler", fields(batch=batch.to_string()))] | ||||
|     pub(crate) fn process_batch( | ||||
|         &self, | ||||
|         batch: Batch, | ||||
|         current_batch: &mut ProcessingBatch, | ||||
|         progress: Progress, | ||||
|     ) -> Result<Vec<Task>> { | ||||
|         #[cfg(test)] | ||||
|         { | ||||
| @@ -585,8 +591,13 @@ impl IndexScheduler { | ||||
|                     }; | ||||
|  | ||||
|                 let rtxn = self.env.read_txn()?; | ||||
|                 let mut canceled_tasks = | ||||
|                     self.cancel_matched_tasks(&rtxn, task.uid, current_batch, matched_tasks)?; | ||||
|                 let mut canceled_tasks = self.cancel_matched_tasks( | ||||
|                     &rtxn, | ||||
|                     task.uid, | ||||
|                     current_batch, | ||||
|                     matched_tasks, | ||||
|                     &progress, | ||||
|                 )?; | ||||
|  | ||||
|                 task.status = Status::Succeeded; | ||||
|                 match &mut task.details { | ||||
| @@ -617,7 +628,8 @@ impl IndexScheduler { | ||||
|                 } | ||||
|  | ||||
|                 let mut wtxn = self.env.write_txn()?; | ||||
|                 let mut deleted_tasks = self.delete_matched_tasks(&mut wtxn, &matched_tasks)?; | ||||
|                 let mut deleted_tasks = | ||||
|                     self.delete_matched_tasks(&mut wtxn, &matched_tasks, &progress)?; | ||||
|                 wtxn.commit()?; | ||||
|  | ||||
|                 for task in tasks.iter_mut() { | ||||
| @@ -643,6 +655,8 @@ impl IndexScheduler { | ||||
|                 Ok(tasks) | ||||
|             } | ||||
|             Batch::SnapshotCreation(mut tasks) => { | ||||
|                 progress.update_progress(SnapshotCreationProgress::StartTheSnapshotCreation); | ||||
|  | ||||
|                 fs::create_dir_all(&self.snapshots_path)?; | ||||
|                 let temp_snapshot_dir = tempfile::tempdir()?; | ||||
|  | ||||
| @@ -663,6 +677,7 @@ impl IndexScheduler { | ||||
|                 // two read operations as the task processing is synchronous. | ||||
|  | ||||
|                 // 2.1 First copy the LMDB env of the index-scheduler | ||||
|                 progress.update_progress(SnapshotCreationProgress::SnapshotTheIndexScheduler); | ||||
|                 let dst = temp_snapshot_dir.path().join("tasks"); | ||||
|                 fs::create_dir_all(&dst)?; | ||||
|                 self.env.copy_to_file(dst.join("data.mdb"), CompactionOption::Enabled)?; | ||||
| @@ -675,18 +690,29 @@ impl IndexScheduler { | ||||
|                 fs::create_dir_all(&update_files_dir)?; | ||||
|  | ||||
|                 // 2.4 Only copy the update files of the enqueued tasks | ||||
|                 for task_id in self.get_status(&rtxn, Status::Enqueued)? { | ||||
|                 progress.update_progress(SnapshotCreationProgress::SnapshotTheUpdateFiles); | ||||
|                 let enqueued = self.get_status(&rtxn, Status::Enqueued)?; | ||||
|                 let (atomic, update_file_progress) = | ||||
|                     AtomicUpdateFileStep::new(enqueued.len() as u32); | ||||
|                 progress.update_progress(update_file_progress); | ||||
|                 for task_id in enqueued { | ||||
|                     let task = self.get_task(&rtxn, task_id)?.ok_or(Error::CorruptedTaskQueue)?; | ||||
|                     if let Some(content_uuid) = task.content_uuid() { | ||||
|                         let src = self.file_store.get_update_path(content_uuid); | ||||
|                         let dst = update_files_dir.join(content_uuid.to_string()); | ||||
|                         fs::copy(src, dst)?; | ||||
|                     } | ||||
|                     atomic.fetch_add(1, Ordering::Relaxed); | ||||
|                 } | ||||
|  | ||||
|                 // 3. Snapshot every indexes | ||||
|                 for result in self.index_mapper.index_mapping.iter(&rtxn)? { | ||||
|                 progress.update_progress(SnapshotCreationProgress::SnapshotTheIndexes); | ||||
|                 let index_mapping = self.index_mapper.index_mapping; | ||||
|                 let nb_indexes = index_mapping.len(&rtxn)? as u32; | ||||
|  | ||||
|                 for (i, result) in index_mapping.iter(&rtxn)?.enumerate() { | ||||
|                     let (name, uuid) = result?; | ||||
|                     progress.update_progress(VariableNameStep::new(name, i as u32, nb_indexes)); | ||||
|                     let index = self.index_mapper.index(&rtxn, name)?; | ||||
|                     let dst = temp_snapshot_dir.path().join("indexes").join(uuid.to_string()); | ||||
|                     fs::create_dir_all(&dst)?; | ||||
| @@ -698,6 +724,7 @@ impl IndexScheduler { | ||||
|                 drop(rtxn); | ||||
|  | ||||
|                 // 4. Snapshot the auth LMDB env | ||||
|                 progress.update_progress(SnapshotCreationProgress::SnapshotTheApiKeys); | ||||
|                 let dst = temp_snapshot_dir.path().join("auth"); | ||||
|                 fs::create_dir_all(&dst)?; | ||||
|                 // TODO We can't use the open_auth_store_env function here but we should | ||||
| @@ -710,6 +737,7 @@ impl IndexScheduler { | ||||
|                 auth.copy_to_file(dst.join("data.mdb"), CompactionOption::Enabled)?; | ||||
|  | ||||
|                 // 5. Copy and tarball the flat snapshot | ||||
|                 progress.update_progress(SnapshotCreationProgress::CreateTheTarball); | ||||
|                 // 5.1 Find the original name of the database | ||||
|                 // TODO find a better way to get this path | ||||
|                 let mut base_path = self.env.path().to_owned(); | ||||
| @@ -742,6 +770,7 @@ impl IndexScheduler { | ||||
|                 Ok(tasks) | ||||
|             } | ||||
|             Batch::Dump(mut task) => { | ||||
|                 progress.update_progress(DumpCreationProgress::StartTheDumpCreation); | ||||
|                 let started_at = OffsetDateTime::now_utc(); | ||||
|                 let (keys, instance_uid) = | ||||
|                     if let KindWithContent::DumpCreation { keys, instance_uid } = &task.kind { | ||||
| @@ -752,6 +781,7 @@ impl IndexScheduler { | ||||
|                 let dump = dump::DumpWriter::new(*instance_uid)?; | ||||
|  | ||||
|                 // 1. dump the keys | ||||
|                 progress.update_progress(DumpCreationProgress::DumpTheApiKeys); | ||||
|                 let mut dump_keys = dump.create_keys()?; | ||||
|                 for key in keys { | ||||
|                     dump_keys.push_key(key)?; | ||||
| @@ -761,7 +791,13 @@ impl IndexScheduler { | ||||
|                 let rtxn = self.env.read_txn()?; | ||||
|  | ||||
|                 // 2. dump the tasks | ||||
|                 progress.update_progress(DumpCreationProgress::DumpTheTasks); | ||||
|                 let mut dump_tasks = dump.create_tasks_queue()?; | ||||
|  | ||||
|                 let (atomic, update_task_progress) = | ||||
|                     AtomicTaskStep::new(self.all_tasks.len(&rtxn)? as u32); | ||||
|                 progress.update_progress(update_task_progress); | ||||
|  | ||||
|                 for ret in self.all_tasks.iter(&rtxn)? { | ||||
|                     if self.must_stop_processing.get() { | ||||
|                         return Err(Error::AbortedTask); | ||||
| @@ -811,11 +847,22 @@ impl IndexScheduler { | ||||
|                             dump_content_file.flush()?; | ||||
|                         } | ||||
|                     } | ||||
|                     atomic.fetch_add(1, Ordering::Relaxed); | ||||
|                 } | ||||
|                 dump_tasks.flush()?; | ||||
|  | ||||
|                 // 3. Dump the indexes | ||||
|                 progress.update_progress(DumpCreationProgress::DumpTheIndexes); | ||||
|                 let nb_indexes = self.index_mapper.index_mapping.len(&rtxn)? as u32; | ||||
|                 let mut count = 0; | ||||
|                 self.index_mapper.try_for_each_index(&rtxn, |uid, index| -> Result<()> { | ||||
|                     progress.update_progress(VariableNameStep::new( | ||||
|                         uid.to_string(), | ||||
|                         count, | ||||
|                         nb_indexes, | ||||
|                     )); | ||||
|                     count += 1; | ||||
|  | ||||
|                     let rtxn = index.read_txn()?; | ||||
|                     let metadata = IndexMetadata { | ||||
|                         uid: uid.to_owned(), | ||||
| @@ -835,6 +882,12 @@ impl IndexScheduler { | ||||
|                         .embedding_configs(&rtxn) | ||||
|                         .map_err(|e| Error::from_milli(e, Some(uid.to_string())))?; | ||||
|  | ||||
|                     let nb_documents = index | ||||
|                         .number_of_documents(&rtxn) | ||||
|                         .map_err(|e| Error::from_milli(e, Some(uid.to_string())))? | ||||
|                         as u32; | ||||
|                     let (atomic, update_document_progress) = AtomicDocumentStep::new(nb_documents); | ||||
|                     progress.update_progress(update_document_progress); | ||||
|                     let documents = index | ||||
|                         .all_documents(&rtxn) | ||||
|                         .map_err(|e| Error::from_milli(e, Some(uid.to_string())))?; | ||||
| @@ -904,6 +957,7 @@ impl IndexScheduler { | ||||
|                         } | ||||
|  | ||||
|                         index_dumper.push_document(&document)?; | ||||
|                         atomic.fetch_add(1, Ordering::Relaxed); | ||||
|                     } | ||||
|  | ||||
|                     // 3.2. Dump the settings | ||||
| @@ -918,6 +972,7 @@ impl IndexScheduler { | ||||
|                 })?; | ||||
|  | ||||
|                 // 4. Dump experimental feature settings | ||||
|                 progress.update_progress(DumpCreationProgress::DumpTheExperimentalFeatures); | ||||
|                 let features = self.features().runtime_features(); | ||||
|                 dump.create_experimental_features(features)?; | ||||
|  | ||||
| @@ -928,6 +983,7 @@ impl IndexScheduler { | ||||
|                 if self.must_stop_processing.get() { | ||||
|                     return Err(Error::AbortedTask); | ||||
|                 } | ||||
|                 progress.update_progress(DumpCreationProgress::CompressTheDump); | ||||
|                 let path = self.dumps_path.join(format!("{}.dump", dump_uid)); | ||||
|                 let file = File::create(path)?; | ||||
|                 dump.persist_to(BufWriter::new(file))?; | ||||
| @@ -953,7 +1009,7 @@ impl IndexScheduler { | ||||
|                     .set_currently_updating_index(Some((index_uid.clone(), index.clone()))); | ||||
|  | ||||
|                 let mut index_wtxn = index.write_txn()?; | ||||
|                 let tasks = self.apply_index_operation(&mut index_wtxn, &index, op)?; | ||||
|                 let tasks = self.apply_index_operation(&mut index_wtxn, &index, op, progress)?; | ||||
|  | ||||
|                 { | ||||
|                     let span = tracing::trace_span!(target: "indexing::scheduler", "commit"); | ||||
| @@ -987,6 +1043,8 @@ impl IndexScheduler { | ||||
|                 Ok(tasks) | ||||
|             } | ||||
|             Batch::IndexCreation { index_uid, primary_key, task } => { | ||||
|                 progress.update_progress(CreateIndexProgress::CreatingTheIndex); | ||||
|  | ||||
|                 let wtxn = self.env.write_txn()?; | ||||
|                 if self.index_mapper.exists(&wtxn, &index_uid)? { | ||||
|                     return Err(Error::IndexAlreadyExists(index_uid)); | ||||
| @@ -996,9 +1054,11 @@ impl IndexScheduler { | ||||
|                 self.process_batch( | ||||
|                     Batch::IndexUpdate { index_uid, primary_key, task }, | ||||
|                     current_batch, | ||||
|                     progress, | ||||
|                 ) | ||||
|             } | ||||
|             Batch::IndexUpdate { index_uid, primary_key, mut task } => { | ||||
|                 progress.update_progress(UpdateIndexProgress::UpdatingTheIndex); | ||||
|                 let rtxn = self.env.read_txn()?; | ||||
|                 let index = self.index_mapper.index(&rtxn, &index_uid)?; | ||||
|  | ||||
| @@ -1051,6 +1111,7 @@ impl IndexScheduler { | ||||
|                 Ok(vec![task]) | ||||
|             } | ||||
|             Batch::IndexDeletion { index_uid, index_has_been_created, mut tasks } => { | ||||
|                 progress.update_progress(DeleteIndexProgress::DeletingTheIndex); | ||||
|                 let wtxn = self.env.write_txn()?; | ||||
|  | ||||
|                 // it's possible that the index doesn't exist | ||||
| @@ -1084,6 +1145,8 @@ impl IndexScheduler { | ||||
|                 Ok(tasks) | ||||
|             } | ||||
|             Batch::IndexSwap { mut task } => { | ||||
|                 progress.update_progress(SwappingTheIndexes::EnsuringCorrectnessOfTheSwap); | ||||
|  | ||||
|                 let mut wtxn = self.env.write_txn()?; | ||||
|                 let swaps = if let KindWithContent::IndexSwap { swaps } = &task.kind { | ||||
|                     swaps | ||||
| @@ -1110,8 +1173,20 @@ impl IndexScheduler { | ||||
|                         )); | ||||
|                     } | ||||
|                 } | ||||
|                 for swap in swaps { | ||||
|                     self.apply_index_swap(&mut wtxn, task.uid, &swap.indexes.0, &swap.indexes.1)?; | ||||
|                 progress.update_progress(SwappingTheIndexes::SwappingTheIndexes); | ||||
|                 for (step, swap) in swaps.iter().enumerate() { | ||||
|                     progress.update_progress(VariableNameStep::new( | ||||
|                         format!("swapping index {} and {}", swap.indexes.0, swap.indexes.1), | ||||
|                         step as u32, | ||||
|                         swaps.len() as u32, | ||||
|                     )); | ||||
|                     self.apply_index_swap( | ||||
|                         &mut wtxn, | ||||
|                         &progress, | ||||
|                         task.uid, | ||||
|                         &swap.indexes.0, | ||||
|                         &swap.indexes.1, | ||||
|                     )?; | ||||
|                 } | ||||
|                 wtxn.commit()?; | ||||
|                 task.status = Status::Succeeded; | ||||
| @@ -1121,7 +1196,15 @@ impl IndexScheduler { | ||||
|     } | ||||
|  | ||||
|     /// Swap the index `lhs` with the index `rhs`. | ||||
|     fn apply_index_swap(&self, wtxn: &mut RwTxn, task_id: u32, lhs: &str, rhs: &str) -> Result<()> { | ||||
|     fn apply_index_swap( | ||||
|         &self, | ||||
|         wtxn: &mut RwTxn, | ||||
|         progress: &Progress, | ||||
|         task_id: u32, | ||||
|         lhs: &str, | ||||
|         rhs: &str, | ||||
|     ) -> Result<()> { | ||||
|         progress.update_progress(InnerSwappingTwoIndexes::RetrieveTheTasks); | ||||
|         // 1. Verify that both lhs and rhs are existing indexes | ||||
|         let index_lhs_exists = self.index_mapper.index_exists(wtxn, lhs)?; | ||||
|         if !index_lhs_exists { | ||||
| @@ -1139,14 +1222,21 @@ impl IndexScheduler { | ||||
|         index_rhs_task_ids.remove_range(task_id..); | ||||
|  | ||||
|         // 3. before_name -> new_name in the task's KindWithContent | ||||
|         for task_id in &index_lhs_task_ids | &index_rhs_task_ids { | ||||
|         progress.update_progress(InnerSwappingTwoIndexes::UpdateTheTasks); | ||||
|         let tasks_to_update = &index_lhs_task_ids | &index_rhs_task_ids; | ||||
|         let (atomic, task_progress) = AtomicTaskStep::new(tasks_to_update.len() as u32); | ||||
|         progress.update_progress(task_progress); | ||||
|  | ||||
|         for task_id in tasks_to_update { | ||||
|             let mut task = self.get_task(wtxn, task_id)?.ok_or(Error::CorruptedTaskQueue)?; | ||||
|             swap_index_uid_in_task(&mut task, (lhs, rhs)); | ||||
|             self.all_tasks.put(wtxn, &task_id, &task)?; | ||||
|             atomic.fetch_add(1, Ordering::Relaxed); | ||||
|         } | ||||
|  | ||||
|         // 4. remove the task from indexuid = before_name | ||||
|         // 5. add the task to indexuid = after_name | ||||
|         progress.update_progress(InnerSwappingTwoIndexes::UpdateTheIndexesMetadata); | ||||
|         self.update_index(wtxn, lhs, |lhs_tasks| { | ||||
|             *lhs_tasks -= &index_lhs_task_ids; | ||||
|             *lhs_tasks |= &index_rhs_task_ids; | ||||
| @@ -1168,7 +1258,7 @@ impl IndexScheduler { | ||||
|     /// The list of processed tasks. | ||||
|     #[tracing::instrument( | ||||
|         level = "trace", | ||||
|         skip(self, index_wtxn, index), | ||||
|         skip(self, index_wtxn, index, progress), | ||||
|         target = "indexing::scheduler" | ||||
|     )] | ||||
|     fn apply_index_operation<'i>( | ||||
| @@ -1176,44 +1266,12 @@ impl IndexScheduler { | ||||
|         index_wtxn: &mut RwTxn<'i>, | ||||
|         index: &'i Index, | ||||
|         operation: IndexOperation, | ||||
|         progress: Progress, | ||||
|     ) -> Result<Vec<Task>> { | ||||
|         let indexer_alloc = Bump::new(); | ||||
|  | ||||
|         let started_processing_at = std::time::Instant::now(); | ||||
|         let secs_since_started_processing_at = AtomicU64::new(0); | ||||
|         const PRINT_SECS_DELTA: u64 = 5; | ||||
|  | ||||
|         let processing_tasks = self.processing_tasks.clone(); | ||||
|         let must_stop_processing = self.must_stop_processing.clone(); | ||||
|         let send_progress = |progress| { | ||||
|             let now = std::time::Instant::now(); | ||||
|             let elapsed = secs_since_started_processing_at.load(atomic::Ordering::Relaxed); | ||||
|             let previous = started_processing_at + Duration::from_secs(elapsed); | ||||
|             let elapsed = now - previous; | ||||
|  | ||||
|             if elapsed.as_secs() < PRINT_SECS_DELTA { | ||||
|                 return; | ||||
|             } | ||||
|  | ||||
|             secs_since_started_processing_at | ||||
|                 .store((now - started_processing_at).as_secs(), atomic::Ordering::Relaxed); | ||||
|  | ||||
|             let TaskProgress { | ||||
|                 current_step, | ||||
|                 finished_steps, | ||||
|                 total_steps, | ||||
|                 finished_substeps, | ||||
|                 total_substeps, | ||||
|             } = processing_tasks.write().unwrap().update_progress(progress); | ||||
|  | ||||
|             tracing::info!( | ||||
|                 current_step, | ||||
|                 finished_steps, | ||||
|                 total_steps, | ||||
|                 finished_substeps, | ||||
|                 total_substeps | ||||
|             ); | ||||
|         }; | ||||
|  | ||||
|         match operation { | ||||
|             IndexOperation::DocumentClear { index_uid, mut tasks } => { | ||||
| @@ -1245,6 +1303,7 @@ impl IndexScheduler { | ||||
|                 operations, | ||||
|                 mut tasks, | ||||
|             } => { | ||||
|                 progress.update_progress(DocumentOperationProgress::RetrievingConfig); | ||||
|                 // TODO: at some point, for better efficiency we might want to reuse the bumpalo for successive batches. | ||||
|                 // this is made difficult by the fact we're doing private clones of the index scheduler and sending it | ||||
|                 // to a fresh thread. | ||||
| @@ -1300,6 +1359,7 @@ impl IndexScheduler { | ||||
|                     } | ||||
|                 }; | ||||
|  | ||||
|                 progress.update_progress(DocumentOperationProgress::ComputingDocumentChanges); | ||||
|                 let (document_changes, operation_stats, primary_key) = indexer | ||||
|                     .into_changes( | ||||
|                         &indexer_alloc, | ||||
| @@ -1308,7 +1368,7 @@ impl IndexScheduler { | ||||
|                         primary_key.as_deref(), | ||||
|                         &mut new_fields_ids_map, | ||||
|                         &|| must_stop_processing.get(), | ||||
|                         &send_progress, | ||||
|                         progress.clone(), | ||||
|                     ) | ||||
|                     .map_err(|e| Error::from_milli(e, Some(index_uid.clone())))?; | ||||
|  | ||||
| @@ -1344,6 +1404,7 @@ impl IndexScheduler { | ||||
|                     } | ||||
|                 } | ||||
|  | ||||
|                 progress.update_progress(DocumentOperationProgress::Indexing); | ||||
|                 if tasks.iter().any(|res| res.error.is_none()) { | ||||
|                     indexer::index( | ||||
|                         index_wtxn, | ||||
| @@ -1356,7 +1417,7 @@ impl IndexScheduler { | ||||
|                         &document_changes, | ||||
|                         embedders, | ||||
|                         &|| must_stop_processing.get(), | ||||
|                         &send_progress, | ||||
|                         &progress, | ||||
|                     ) | ||||
|                     .map_err(|e| Error::from_milli(e, Some(index_uid.clone())))?; | ||||
|  | ||||
| @@ -1373,6 +1434,8 @@ impl IndexScheduler { | ||||
|                 Ok(tasks) | ||||
|             } | ||||
|             IndexOperation::DocumentEdition { index_uid, mut task } => { | ||||
|                 progress.update_progress(DocumentEditionProgress::RetrievingConfig); | ||||
|  | ||||
|                 let (filter, code) = if let KindWithContent::DocumentEdition { | ||||
|                     filter_expr, | ||||
|                     context: _, | ||||
| @@ -1446,6 +1509,7 @@ impl IndexScheduler { | ||||
|                     }; | ||||
|  | ||||
|                     let candidates_count = candidates.len(); | ||||
|                     progress.update_progress(DocumentEditionProgress::ComputingDocumentChanges); | ||||
|                     let indexer = UpdateByFunction::new(candidates, context.clone(), code.clone()); | ||||
|                     let document_changes = pool | ||||
|                         .install(|| { | ||||
| @@ -1459,6 +1523,7 @@ impl IndexScheduler { | ||||
|                         .map_err(|err| Error::from_milli(err, Some(index_uid.clone())))?; | ||||
|                     let embedders = self.embedders(index_uid.clone(), embedders)?; | ||||
|  | ||||
|                     progress.update_progress(DocumentEditionProgress::Indexing); | ||||
|                     indexer::index( | ||||
|                         index_wtxn, | ||||
|                         index, | ||||
| @@ -1470,7 +1535,7 @@ impl IndexScheduler { | ||||
|                         &document_changes, | ||||
|                         embedders, | ||||
|                         &|| must_stop_processing.get(), | ||||
|                         &send_progress, | ||||
|                         &progress, | ||||
|                     ) | ||||
|                     .map_err(|err| Error::from_milli(err, Some(index_uid.clone())))?; | ||||
|  | ||||
| @@ -1511,6 +1576,8 @@ impl IndexScheduler { | ||||
|                 Ok(vec![task]) | ||||
|             } | ||||
|             IndexOperation::DocumentDeletion { mut tasks, index_uid } => { | ||||
|                 progress.update_progress(DocumentDeletionProgress::RetrievingConfig); | ||||
|  | ||||
|                 let mut to_delete = RoaringBitmap::new(); | ||||
|                 let external_documents_ids = index.external_documents_ids(); | ||||
|  | ||||
| @@ -1601,6 +1668,7 @@ impl IndexScheduler { | ||||
|                         } | ||||
|                     }; | ||||
|  | ||||
|                     progress.update_progress(DocumentDeletionProgress::DeleteDocuments); | ||||
|                     let mut indexer = indexer::DocumentDeletion::new(); | ||||
|                     let candidates_count = to_delete.len(); | ||||
|                     indexer.delete_documents_by_docids(to_delete); | ||||
| @@ -1610,6 +1678,7 @@ impl IndexScheduler { | ||||
|                         .map_err(|err| Error::from_milli(err, Some(index_uid.clone())))?; | ||||
|                     let embedders = self.embedders(index_uid.clone(), embedders)?; | ||||
|  | ||||
|                     progress.update_progress(DocumentDeletionProgress::Indexing); | ||||
|                     indexer::index( | ||||
|                         index_wtxn, | ||||
|                         index, | ||||
| @@ -1621,7 +1690,7 @@ impl IndexScheduler { | ||||
|                         &document_changes, | ||||
|                         embedders, | ||||
|                         &|| must_stop_processing.get(), | ||||
|                         &send_progress, | ||||
|                         &progress, | ||||
|                     ) | ||||
|                     .map_err(|err| Error::from_milli(err, Some(index_uid.clone())))?; | ||||
|  | ||||
| @@ -1638,6 +1707,7 @@ impl IndexScheduler { | ||||
|                 Ok(tasks) | ||||
|             } | ||||
|             IndexOperation::Settings { index_uid, settings, mut tasks } => { | ||||
|                 progress.update_progress(SettingsProgress::RetrievingAndMergingTheSettings); | ||||
|                 let indexer_config = self.index_mapper.indexer_config(); | ||||
|                 let mut builder = milli::update::Settings::new(index_wtxn, index, indexer_config); | ||||
|  | ||||
| @@ -1651,6 +1721,7 @@ impl IndexScheduler { | ||||
|                     task.status = Status::Succeeded; | ||||
|                 } | ||||
|  | ||||
|                 progress.update_progress(SettingsProgress::ApplyTheSettings); | ||||
|                 builder | ||||
|                     .execute( | ||||
|                         |indexing_step| tracing::debug!(update = ?indexing_step), | ||||
| @@ -1673,12 +1744,14 @@ impl IndexScheduler { | ||||
|                         index_uid: index_uid.clone(), | ||||
|                         tasks: cleared_tasks, | ||||
|                     }, | ||||
|                     progress.clone(), | ||||
|                 )?; | ||||
|  | ||||
|                 let settings_tasks = self.apply_index_operation( | ||||
|                     index_wtxn, | ||||
|                     index, | ||||
|                     IndexOperation::Settings { index_uid, settings, tasks: settings_tasks }, | ||||
|                     progress, | ||||
|                 )?; | ||||
|  | ||||
|                 let mut tasks = settings_tasks; | ||||
| @@ -1695,15 +1768,18 @@ impl IndexScheduler { | ||||
|         &self, | ||||
|         wtxn: &mut RwTxn, | ||||
|         matched_tasks: &RoaringBitmap, | ||||
|         progress: &Progress, | ||||
|     ) -> Result<RoaringBitmap> { | ||||
|         progress.update_progress(TaskDeletionProgress::DeletingTasksDateTime); | ||||
|  | ||||
|         // 1. Remove from this list the tasks that we are not allowed to delete | ||||
|         let enqueued_tasks = self.get_status(wtxn, Status::Enqueued)?; | ||||
|         let processing_tasks = &self.processing_tasks.read().unwrap().processing.clone(); | ||||
|  | ||||
|         let all_task_ids = self.all_task_ids(wtxn)?; | ||||
|         let mut to_delete_tasks = all_task_ids & matched_tasks; | ||||
|         to_delete_tasks -= processing_tasks; | ||||
|         to_delete_tasks -= enqueued_tasks; | ||||
|         to_delete_tasks -= &**processing_tasks; | ||||
|         to_delete_tasks -= &enqueued_tasks; | ||||
|  | ||||
|         // 2. We now have a list of tasks to delete, delete them | ||||
|  | ||||
| @@ -1714,6 +1790,8 @@ impl IndexScheduler { | ||||
|         // The tasks that have been removed *per batches*. | ||||
|         let mut affected_batches: HashMap<BatchId, RoaringBitmap> = HashMap::new(); | ||||
|  | ||||
|         let (atomic_progress, task_progress) = AtomicTaskStep::new(to_delete_tasks.len() as u32); | ||||
|         progress.update_progress(task_progress); | ||||
|         for task_id in to_delete_tasks.iter() { | ||||
|             let task = self.get_task(wtxn, task_id)?.ok_or(Error::CorruptedTaskQueue)?; | ||||
|  | ||||
| @@ -1737,22 +1815,35 @@ impl IndexScheduler { | ||||
|             if let Some(batch_uid) = task.batch_uid { | ||||
|                 affected_batches.entry(batch_uid).or_default().insert(task_id); | ||||
|             } | ||||
|             atomic_progress.fetch_add(1, Ordering::Relaxed); | ||||
|         } | ||||
|  | ||||
|         progress.update_progress(TaskDeletionProgress::DeletingTasksMetadata); | ||||
|         let (atomic_progress, task_progress) = AtomicTaskStep::new( | ||||
|             (affected_indexes.len() + affected_statuses.len() + affected_kinds.len()) as u32, | ||||
|         ); | ||||
|         progress.update_progress(task_progress); | ||||
|         for index in affected_indexes.iter() { | ||||
|             self.update_index(wtxn, index, |bitmap| *bitmap -= &to_delete_tasks)?; | ||||
|             atomic_progress.fetch_add(1, Ordering::Relaxed); | ||||
|         } | ||||
|  | ||||
|         for status in affected_statuses.iter() { | ||||
|             self.update_status(wtxn, *status, |bitmap| *bitmap -= &to_delete_tasks)?; | ||||
|             atomic_progress.fetch_add(1, Ordering::Relaxed); | ||||
|         } | ||||
|  | ||||
|         for kind in affected_kinds.iter() { | ||||
|             self.update_kind(wtxn, *kind, |bitmap| *bitmap -= &to_delete_tasks)?; | ||||
|             atomic_progress.fetch_add(1, Ordering::Relaxed); | ||||
|         } | ||||
|  | ||||
|         progress.update_progress(TaskDeletionProgress::DeletingTasks); | ||||
|         let (atomic_progress, task_progress) = AtomicTaskStep::new(to_delete_tasks.len() as u32); | ||||
|         progress.update_progress(task_progress); | ||||
|         for task in to_delete_tasks.iter() { | ||||
|             self.all_tasks.delete(wtxn, &task)?; | ||||
|             atomic_progress.fetch_add(1, Ordering::Relaxed); | ||||
|         } | ||||
|         for canceled_by in affected_canceled_by { | ||||
|             if let Some(mut tasks) = self.canceled_by.get(wtxn, &canceled_by)? { | ||||
| @@ -1764,6 +1855,9 @@ impl IndexScheduler { | ||||
|                 } | ||||
|             } | ||||
|         } | ||||
|         progress.update_progress(TaskDeletionProgress::DeletingBatches); | ||||
|         let (atomic_progress, batch_progress) = AtomicBatchStep::new(affected_batches.len() as u32); | ||||
|         progress.update_progress(batch_progress); | ||||
|         for (batch_id, to_delete_tasks) in affected_batches { | ||||
|             if let Some(mut tasks) = self.batch_to_tasks_mapping.get(wtxn, &batch_id)? { | ||||
|                 tasks -= &to_delete_tasks; | ||||
| @@ -1805,6 +1899,7 @@ impl IndexScheduler { | ||||
|                     } | ||||
|                 } | ||||
|             } | ||||
|             atomic_progress.fetch_add(1, Ordering::Relaxed); | ||||
|         } | ||||
|  | ||||
|         Ok(to_delete_tasks) | ||||
| @@ -1819,21 +1914,36 @@ impl IndexScheduler { | ||||
|         cancel_task_id: TaskId, | ||||
|         current_batch: &mut ProcessingBatch, | ||||
|         matched_tasks: &RoaringBitmap, | ||||
|         progress: &Progress, | ||||
|     ) -> Result<Vec<Task>> { | ||||
|         progress.update_progress(TaskCancelationProgress::RetrievingTasks); | ||||
|  | ||||
|         // 1. Remove from this list the tasks that we are not allowed to cancel | ||||
|         //    Notice that only the _enqueued_ ones are cancelable and we should | ||||
|         //    have already aborted the indexation of the _processing_ ones | ||||
|         let cancelable_tasks = self.get_status(rtxn, Status::Enqueued)?; | ||||
|         let tasks_to_cancel = cancelable_tasks & matched_tasks; | ||||
|  | ||||
|         // 2. We now have a list of tasks to cancel, cancel them | ||||
|         let mut tasks = self.get_existing_tasks(rtxn, tasks_to_cancel.iter())?; | ||||
|         let (task_progress, progress_obj) = AtomicTaskStep::new(tasks_to_cancel.len() as u32); | ||||
|         progress.update_progress(progress_obj); | ||||
|  | ||||
|         // 2. We now have a list of tasks to cancel, cancel them | ||||
|         let mut tasks = self.get_existing_tasks( | ||||
|             rtxn, | ||||
|             tasks_to_cancel.iter().inspect(|_| { | ||||
|                 task_progress.fetch_add(1, Ordering::Relaxed); | ||||
|             }), | ||||
|         )?; | ||||
|  | ||||
|         progress.update_progress(TaskCancelationProgress::UpdatingTasks); | ||||
|         let (task_progress, progress_obj) = AtomicTaskStep::new(tasks_to_cancel.len() as u32); | ||||
|         progress.update_progress(progress_obj); | ||||
|         for task in tasks.iter_mut() { | ||||
|             task.status = Status::Canceled; | ||||
|             task.canceled_by = Some(cancel_task_id); | ||||
|             task.details = task.details.as_ref().map(|d| d.to_failed()); | ||||
|             current_batch.processing(Some(task)); | ||||
|             task_progress.fetch_add(1, Ordering::Relaxed); | ||||
|         } | ||||
|  | ||||
|         Ok(tasks) | ||||
|   | ||||
| @@ -3,10 +3,6 @@ use std::sync::{Arc, RwLock}; | ||||
| use std::time::Duration; | ||||
| use std::{fs, thread}; | ||||
|  | ||||
| use self::index_map::IndexMap; | ||||
| use self::IndexStatus::{Available, BeingDeleted, Closing, Missing}; | ||||
| use crate::uuid_codec::UuidCodec; | ||||
| use crate::{Error, Result}; | ||||
| use meilisearch_types::heed::types::{SerdeJson, Str}; | ||||
| use meilisearch_types::heed::{Database, Env, RoTxn, RwTxn}; | ||||
| use meilisearch_types::milli; | ||||
| @@ -17,6 +13,11 @@ use time::OffsetDateTime; | ||||
| use tracing::error; | ||||
| use uuid::Uuid; | ||||
|  | ||||
| use self::index_map::IndexMap; | ||||
| use self::IndexStatus::{Available, BeingDeleted, Closing, Missing}; | ||||
| use crate::uuid_codec::UuidCodec; | ||||
| use crate::{Error, Result}; | ||||
|  | ||||
| mod index_map; | ||||
|  | ||||
| const INDEX_MAPPING: &str = "index-mapping"; | ||||
|   | ||||
| @@ -353,7 +353,7 @@ pub fn snapshot_canceled_by(rtxn: &RoTxn, db: Database<BEU32, RoaringBitmapCodec | ||||
|  | ||||
| pub fn snapshot_batch(batch: &Batch) -> String { | ||||
|     let mut snap = String::new(); | ||||
|     let Batch { uid, details, stats, started_at, finished_at } = batch; | ||||
|     let Batch { uid, details, stats, started_at, finished_at, progress: _ } = batch; | ||||
|     if let Some(finished_at) = finished_at { | ||||
|         assert!(finished_at > started_at); | ||||
|     } | ||||
|   | ||||
| @@ -26,6 +26,7 @@ mod index_mapper; | ||||
| #[cfg(test)] | ||||
| mod insta_snapshot; | ||||
| mod lru; | ||||
| mod processing; | ||||
| mod utils; | ||||
| pub mod uuid_codec; | ||||
|  | ||||
| @@ -56,12 +57,12 @@ use meilisearch_types::heed::types::{SerdeBincode, SerdeJson, Str, I128}; | ||||
| use meilisearch_types::heed::{self, Database, Env, PutFlags, RoTxn, RwTxn}; | ||||
| use meilisearch_types::milli::documents::DocumentsBatchBuilder; | ||||
| use meilisearch_types::milli::index::IndexEmbeddingConfig; | ||||
| use meilisearch_types::milli::update::new::indexer::document_changes::Progress; | ||||
| use meilisearch_types::milli::update::IndexerConfig; | ||||
| use meilisearch_types::milli::vector::{Embedder, EmbedderOptions, EmbeddingConfigs}; | ||||
| use meilisearch_types::milli::{self, CboRoaringBitmapCodec, Index, RoaringBitmapCodec, BEU32}; | ||||
| use meilisearch_types::task_view::TaskView; | ||||
| use meilisearch_types::tasks::{Kind, KindWithContent, Status, Task, TaskProgress}; | ||||
| use meilisearch_types::tasks::{Kind, KindWithContent, Status, Task}; | ||||
| use processing::ProcessingTasks; | ||||
| use rayon::current_num_threads; | ||||
| use rayon::prelude::{IntoParallelIterator, ParallelIterator}; | ||||
| use roaring::RoaringBitmap; | ||||
| @@ -72,7 +73,8 @@ use utils::{filter_out_references_to_newer_tasks, keep_ids_within_datetimes, map | ||||
| use uuid::Uuid; | ||||
|  | ||||
| use crate::index_mapper::IndexMapper; | ||||
| use crate::utils::{check_index_swap_validity, clamp_to_page_size, ProcessingBatch}; | ||||
| use crate::processing::{AtomicTaskStep, BatchProgress}; | ||||
| use crate::utils::{check_index_swap_validity, clamp_to_page_size}; | ||||
|  | ||||
| pub(crate) type BEI128 = I128<BE>; | ||||
|  | ||||
| @@ -163,48 +165,6 @@ impl Query { | ||||
|     } | ||||
| } | ||||
|  | ||||
| #[derive(Debug, Clone)] | ||||
| pub struct ProcessingTasks { | ||||
|     batch: Option<ProcessingBatch>, | ||||
|     /// The list of tasks ids that are currently running. | ||||
|     processing: RoaringBitmap, | ||||
|     /// The progress on processing tasks | ||||
|     progress: Option<TaskProgress>, | ||||
| } | ||||
|  | ||||
| impl ProcessingTasks { | ||||
|     /// Creates an empty `ProcessingAt` struct. | ||||
|     fn new() -> ProcessingTasks { | ||||
|         ProcessingTasks { batch: None, processing: RoaringBitmap::new(), progress: None } | ||||
|     } | ||||
|  | ||||
|     /// Stores the currently processing tasks, and the date time at which it started. | ||||
|     fn start_processing(&mut self, processing_batch: ProcessingBatch, processing: RoaringBitmap) { | ||||
|         self.batch = Some(processing_batch); | ||||
|         self.processing = processing; | ||||
|     } | ||||
|  | ||||
|     fn update_progress(&mut self, progress: Progress) -> TaskProgress { | ||||
|         self.progress.get_or_insert_with(TaskProgress::default).update(progress) | ||||
|     } | ||||
|  | ||||
|     /// Set the processing tasks to an empty list | ||||
|     fn stop_processing(&mut self) -> Self { | ||||
|         self.progress = None; | ||||
|  | ||||
|         Self { | ||||
|             batch: std::mem::take(&mut self.batch), | ||||
|             processing: std::mem::take(&mut self.processing), | ||||
|             progress: None, | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     /// Returns `true` if there, at least, is one task that is currently processing that we must stop. | ||||
|     fn must_cancel_processing_tasks(&self, canceled_tasks: &RoaringBitmap) -> bool { | ||||
|         !self.processing.is_disjoint(canceled_tasks) | ||||
|     } | ||||
| } | ||||
|  | ||||
| #[derive(Default, Clone, Debug)] | ||||
| struct MustStopProcessing(Arc<AtomicBool>); | ||||
|  | ||||
| @@ -813,7 +773,7 @@ impl IndexScheduler { | ||||
|             let mut batch_tasks = RoaringBitmap::new(); | ||||
|             for batch_uid in batch_uids { | ||||
|                 if processing_batch.as_ref().map_or(false, |batch| batch.uid == *batch_uid) { | ||||
|                     batch_tasks |= &processing_tasks; | ||||
|                     batch_tasks |= &*processing_tasks; | ||||
|                 } else { | ||||
|                     batch_tasks |= self.tasks_in_batch(rtxn, *batch_uid)?; | ||||
|                 } | ||||
| @@ -827,13 +787,13 @@ impl IndexScheduler { | ||||
|                 match status { | ||||
|                     // special case for Processing tasks | ||||
|                     Status::Processing => { | ||||
|                         status_tasks |= &processing_tasks; | ||||
|                         status_tasks |= &*processing_tasks; | ||||
|                     } | ||||
|                     status => status_tasks |= &self.get_status(rtxn, *status)?, | ||||
|                 }; | ||||
|             } | ||||
|             if !status.contains(&Status::Processing) { | ||||
|                 tasks -= &processing_tasks; | ||||
|                 tasks -= &*processing_tasks; | ||||
|             } | ||||
|             tasks &= status_tasks; | ||||
|         } | ||||
| @@ -882,7 +842,7 @@ impl IndexScheduler { | ||||
|         // Once we have filtered the two subsets, we put them back together and assign it back to `tasks`. | ||||
|         tasks = { | ||||
|             let (mut filtered_non_processing_tasks, mut filtered_processing_tasks) = | ||||
|                 (&tasks - &processing_tasks, &tasks & &processing_tasks); | ||||
|                 (&tasks - &*processing_tasks, &tasks & &*processing_tasks); | ||||
|  | ||||
|             // special case for Processing tasks | ||||
|             // A closure that clears the filtered_processing_tasks if their started_at date falls outside the given bounds | ||||
| @@ -1090,7 +1050,7 @@ impl IndexScheduler { | ||||
|         // Once we have filtered the two subsets, we put them back together and assign it back to `batches`. | ||||
|         batches = { | ||||
|             let (mut filtered_non_processing_batches, mut filtered_processing_batches) = | ||||
|                 (&batches - &processing.processing, &batches & &processing.processing); | ||||
|                 (&batches - &*processing.processing, &batches & &*processing.processing); | ||||
|  | ||||
|             // special case for Processing batches | ||||
|             // A closure that clears the filtered_processing_batches if their started_at date falls outside the given bounds | ||||
| @@ -1606,7 +1566,8 @@ impl IndexScheduler { | ||||
|  | ||||
|         // We reset the must_stop flag to be sure that we don't stop processing tasks | ||||
|         self.must_stop_processing.reset(); | ||||
|         self.processing_tasks | ||||
|         let progress = self | ||||
|             .processing_tasks | ||||
|             .write() | ||||
|             .unwrap() | ||||
|             // We can clone the processing batch here because we don't want its modification to affect the view of the processing batches | ||||
| @@ -1619,11 +1580,12 @@ impl IndexScheduler { | ||||
|         let res = { | ||||
|             let cloned_index_scheduler = self.private_clone(); | ||||
|             let processing_batch = &mut processing_batch; | ||||
|             let progress = progress.clone(); | ||||
|             std::thread::scope(|s| { | ||||
|                 let handle = std::thread::Builder::new() | ||||
|                     .name(String::from("batch-operation")) | ||||
|                     .spawn_scoped(s, move || { | ||||
|                         cloned_index_scheduler.process_batch(batch, processing_batch) | ||||
|                         cloned_index_scheduler.process_batch(batch, processing_batch, progress) | ||||
|                     }) | ||||
|                     .unwrap(); | ||||
|                 handle.join().unwrap_or(Err(Error::ProcessBatchPanicked)) | ||||
| @@ -1636,6 +1598,7 @@ impl IndexScheduler { | ||||
|         #[cfg(test)] | ||||
|         self.maybe_fail(tests::FailureLocation::AcquiringWtxn)?; | ||||
|  | ||||
|         progress.update_progress(BatchProgress::WritingTasksToDisk); | ||||
|         processing_batch.finished(); | ||||
|         let mut wtxn = self.env.write_txn().map_err(Error::HeedTransaction)?; | ||||
|         let mut canceled = RoaringBitmap::new(); | ||||
| @@ -1645,12 +1608,15 @@ impl IndexScheduler { | ||||
|                 #[cfg(test)] | ||||
|                 self.breakpoint(Breakpoint::ProcessBatchSucceeded); | ||||
|  | ||||
|                 let (task_progress, task_progress_obj) = AtomicTaskStep::new(tasks.len() as u32); | ||||
|                 progress.update_progress(task_progress_obj); | ||||
|                 let mut success = 0; | ||||
|                 let mut failure = 0; | ||||
|                 let mut canceled_by = None; | ||||
|  | ||||
|                 #[allow(unused_variables)] | ||||
|                 for (i, mut task) in tasks.into_iter().enumerate() { | ||||
|                     task_progress.fetch_add(1, Ordering::Relaxed); | ||||
|                     processing_batch.update(&mut task); | ||||
|                     if task.status == Status::Canceled { | ||||
|                         canceled.insert(task.uid); | ||||
| @@ -1718,8 +1684,12 @@ impl IndexScheduler { | ||||
|             Err(err) => { | ||||
|                 #[cfg(test)] | ||||
|                 self.breakpoint(Breakpoint::ProcessBatchFailed); | ||||
|                 let (task_progress, task_progress_obj) = AtomicTaskStep::new(ids.len() as u32); | ||||
|                 progress.update_progress(task_progress_obj); | ||||
|  | ||||
|                 let error: ResponseError = err.into(); | ||||
|                 for id in ids.iter() { | ||||
|                     task_progress.fetch_add(1, Ordering::Relaxed); | ||||
|                     let mut task = self | ||||
|                         .get_task(&wtxn, id) | ||||
|                         .map_err(|e| Error::TaskDatabaseUpdate(Box::new(e)))? | ||||
|   | ||||
							
								
								
									
										316
									
								
								crates/index-scheduler/src/processing.rs
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										316
									
								
								crates/index-scheduler/src/processing.rs
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,316 @@ | ||||
| use std::borrow::Cow; | ||||
| use std::sync::Arc; | ||||
|  | ||||
| use enum_iterator::Sequence; | ||||
| use meilisearch_types::milli::progress::{AtomicSubStep, NamedStep, Progress, ProgressView, Step}; | ||||
| use meilisearch_types::milli::{make_atomic_progress, make_enum_progress}; | ||||
| use roaring::RoaringBitmap; | ||||
|  | ||||
| use crate::utils::ProcessingBatch; | ||||
|  | ||||
| #[derive(Clone)] | ||||
| pub struct ProcessingTasks { | ||||
|     pub batch: Option<Arc<ProcessingBatch>>, | ||||
|     /// The list of tasks ids that are currently running. | ||||
|     pub processing: Arc<RoaringBitmap>, | ||||
|     /// The progress on processing tasks | ||||
|     pub progress: Option<Progress>, | ||||
| } | ||||
|  | ||||
| impl ProcessingTasks { | ||||
|     /// Creates an empty `ProcessingAt` struct. | ||||
|     pub fn new() -> ProcessingTasks { | ||||
|         ProcessingTasks { batch: None, processing: Arc::new(RoaringBitmap::new()), progress: None } | ||||
|     } | ||||
|  | ||||
|     pub fn get_progress_view(&self) -> Option<ProgressView> { | ||||
|         Some(self.progress.as_ref()?.as_progress_view()) | ||||
|     } | ||||
|  | ||||
|     /// Stores the currently processing tasks, and the date time at which it started. | ||||
|     pub fn start_processing( | ||||
|         &mut self, | ||||
|         processing_batch: ProcessingBatch, | ||||
|         processing: RoaringBitmap, | ||||
|     ) -> Progress { | ||||
|         self.batch = Some(Arc::new(processing_batch)); | ||||
|         self.processing = Arc::new(processing); | ||||
|         let progress = Progress::default(); | ||||
|         progress.update_progress(BatchProgress::ProcessingTasks); | ||||
|         self.progress = Some(progress.clone()); | ||||
|  | ||||
|         progress | ||||
|     } | ||||
|  | ||||
|     /// Set the processing tasks to an empty list | ||||
|     pub fn stop_processing(&mut self) -> Self { | ||||
|         self.progress = None; | ||||
|  | ||||
|         Self { | ||||
|             batch: std::mem::take(&mut self.batch), | ||||
|             processing: std::mem::take(&mut self.processing), | ||||
|             progress: None, | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     /// Returns `true` if there, at least, is one task that is currently processing that we must stop. | ||||
|     pub fn must_cancel_processing_tasks(&self, canceled_tasks: &RoaringBitmap) -> bool { | ||||
|         !self.processing.is_disjoint(canceled_tasks) | ||||
|     } | ||||
| } | ||||
|  | ||||
| make_enum_progress! { | ||||
|     pub enum BatchProgress { | ||||
|         ProcessingTasks, | ||||
|         WritingTasksToDisk, | ||||
|     } | ||||
| } | ||||
|  | ||||
| make_enum_progress! { | ||||
|     pub enum TaskCancelationProgress { | ||||
|         RetrievingTasks, | ||||
|         UpdatingTasks, | ||||
|     } | ||||
| } | ||||
|  | ||||
| make_enum_progress! { | ||||
|     pub enum TaskDeletionProgress { | ||||
|         DeletingTasksDateTime, | ||||
|         DeletingTasksMetadata, | ||||
|         DeletingTasks, | ||||
|         DeletingBatches, | ||||
|     } | ||||
| } | ||||
|  | ||||
| make_enum_progress! { | ||||
|     pub enum SnapshotCreationProgress { | ||||
|         StartTheSnapshotCreation, | ||||
|         SnapshotTheIndexScheduler, | ||||
|         SnapshotTheUpdateFiles, | ||||
|         SnapshotTheIndexes, | ||||
|         SnapshotTheApiKeys, | ||||
|         CreateTheTarball, | ||||
|     } | ||||
| } | ||||
|  | ||||
| make_enum_progress! { | ||||
|     pub enum DumpCreationProgress { | ||||
|         StartTheDumpCreation, | ||||
|         DumpTheApiKeys, | ||||
|         DumpTheTasks, | ||||
|         DumpTheIndexes, | ||||
|         DumpTheExperimentalFeatures, | ||||
|         CompressTheDump, | ||||
|     } | ||||
| } | ||||
|  | ||||
| make_enum_progress! { | ||||
|     pub enum CreateIndexProgress { | ||||
|         CreatingTheIndex, | ||||
|     } | ||||
| } | ||||
|  | ||||
| make_enum_progress! { | ||||
|     pub enum UpdateIndexProgress { | ||||
|         UpdatingTheIndex, | ||||
|     } | ||||
| } | ||||
|  | ||||
| make_enum_progress! { | ||||
|     pub enum DeleteIndexProgress { | ||||
|         DeletingTheIndex, | ||||
|     } | ||||
| } | ||||
|  | ||||
| make_enum_progress! { | ||||
|     pub enum SwappingTheIndexes { | ||||
|         EnsuringCorrectnessOfTheSwap, | ||||
|         SwappingTheIndexes, | ||||
|     } | ||||
| } | ||||
|  | ||||
| make_enum_progress! { | ||||
|     pub enum InnerSwappingTwoIndexes { | ||||
|         RetrieveTheTasks, | ||||
|         UpdateTheTasks, | ||||
|         UpdateTheIndexesMetadata, | ||||
|     } | ||||
| } | ||||
|  | ||||
| make_enum_progress! { | ||||
|     pub enum DocumentOperationProgress { | ||||
|         RetrievingConfig, | ||||
|         ComputingDocumentChanges, | ||||
|         Indexing, | ||||
|     } | ||||
| } | ||||
|  | ||||
| make_enum_progress! { | ||||
|     pub enum DocumentEditionProgress { | ||||
|         RetrievingConfig, | ||||
|         ComputingDocumentChanges, | ||||
|         Indexing, | ||||
|     } | ||||
| } | ||||
|  | ||||
| make_enum_progress! { | ||||
|     pub enum DocumentDeletionProgress { | ||||
|         RetrievingConfig, | ||||
|         DeleteDocuments, | ||||
|         Indexing, | ||||
|     } | ||||
| } | ||||
|  | ||||
| make_enum_progress! { | ||||
|     pub enum SettingsProgress { | ||||
|         RetrievingAndMergingTheSettings, | ||||
|         ApplyTheSettings, | ||||
|     } | ||||
| } | ||||
|  | ||||
| make_atomic_progress!(Task alias AtomicTaskStep => "task" ); | ||||
| make_atomic_progress!(Document alias AtomicDocumentStep => "document" ); | ||||
| make_atomic_progress!(Batch alias AtomicBatchStep => "batch" ); | ||||
| make_atomic_progress!(UpdateFile alias AtomicUpdateFileStep => "update file" ); | ||||
|  | ||||
| pub struct VariableNameStep { | ||||
|     name: String, | ||||
|     current: u32, | ||||
|     total: u32, | ||||
| } | ||||
|  | ||||
| impl VariableNameStep { | ||||
|     pub fn new(name: impl Into<String>, current: u32, total: u32) -> Self { | ||||
|         Self { name: name.into(), current, total } | ||||
|     } | ||||
| } | ||||
|  | ||||
| impl Step for VariableNameStep { | ||||
|     fn name(&self) -> Cow<'static, str> { | ||||
|         self.name.clone().into() | ||||
|     } | ||||
|  | ||||
|     fn current(&self) -> u32 { | ||||
|         self.current | ||||
|     } | ||||
|  | ||||
|     fn total(&self) -> u32 { | ||||
|         self.total | ||||
|     } | ||||
| } | ||||
|  | ||||
| #[cfg(test)] | ||||
| mod test { | ||||
|     use std::sync::atomic::Ordering; | ||||
|  | ||||
|     use meili_snap::{json_string, snapshot}; | ||||
|  | ||||
|     use super::*; | ||||
|  | ||||
|     #[test] | ||||
|     fn one_level() { | ||||
|         let mut processing = ProcessingTasks::new(); | ||||
|         processing.start_processing(ProcessingBatch::new(0), RoaringBitmap::new()); | ||||
|         snapshot!(json_string!(processing.get_progress_view()), @r#" | ||||
|         { | ||||
|           "steps": [ | ||||
|             { | ||||
|               "currentStep": "processing tasks", | ||||
|               "finished": 0, | ||||
|               "total": 2 | ||||
|             } | ||||
|           ], | ||||
|           "percentage": 0.0 | ||||
|         } | ||||
|         "#); | ||||
|         processing.progress.as_ref().unwrap().update_progress(BatchProgress::WritingTasksToDisk); | ||||
|         snapshot!(json_string!(processing.get_progress_view()), @r#" | ||||
|         { | ||||
|           "steps": [ | ||||
|             { | ||||
|               "currentStep": "writing tasks to disk", | ||||
|               "finished": 1, | ||||
|               "total": 2 | ||||
|             } | ||||
|           ], | ||||
|           "percentage": 50.0 | ||||
|         } | ||||
|         "#); | ||||
|     } | ||||
|  | ||||
|     #[test] | ||||
|     fn task_progress() { | ||||
|         let mut processing = ProcessingTasks::new(); | ||||
|         processing.start_processing(ProcessingBatch::new(0), RoaringBitmap::new()); | ||||
|         let (atomic, tasks) = AtomicTaskStep::new(10); | ||||
|         processing.progress.as_ref().unwrap().update_progress(tasks); | ||||
|         snapshot!(json_string!(processing.get_progress_view()), @r#" | ||||
|         { | ||||
|           "steps": [ | ||||
|             { | ||||
|               "currentStep": "processing tasks", | ||||
|               "finished": 0, | ||||
|               "total": 2 | ||||
|             }, | ||||
|             { | ||||
|               "currentStep": "task", | ||||
|               "finished": 0, | ||||
|               "total": 10 | ||||
|             } | ||||
|           ], | ||||
|           "percentage": 0.0 | ||||
|         } | ||||
|         "#); | ||||
|         atomic.fetch_add(6, Ordering::Relaxed); | ||||
|         snapshot!(json_string!(processing.get_progress_view()), @r#" | ||||
|         { | ||||
|           "steps": [ | ||||
|             { | ||||
|               "currentStep": "processing tasks", | ||||
|               "finished": 0, | ||||
|               "total": 2 | ||||
|             }, | ||||
|             { | ||||
|               "currentStep": "task", | ||||
|               "finished": 6, | ||||
|               "total": 10 | ||||
|             } | ||||
|           ], | ||||
|           "percentage": 30.000002 | ||||
|         } | ||||
|         "#); | ||||
|         processing.progress.as_ref().unwrap().update_progress(BatchProgress::WritingTasksToDisk); | ||||
|         snapshot!(json_string!(processing.get_progress_view()), @r#" | ||||
|         { | ||||
|           "steps": [ | ||||
|             { | ||||
|               "currentStep": "writing tasks to disk", | ||||
|               "finished": 1, | ||||
|               "total": 2 | ||||
|             } | ||||
|           ], | ||||
|           "percentage": 50.0 | ||||
|         } | ||||
|         "#); | ||||
|         let (atomic, tasks) = AtomicTaskStep::new(5); | ||||
|         processing.progress.as_ref().unwrap().update_progress(tasks); | ||||
|         atomic.fetch_add(4, Ordering::Relaxed); | ||||
|         snapshot!(json_string!(processing.get_progress_view()), @r#" | ||||
|         { | ||||
|           "steps": [ | ||||
|             { | ||||
|               "currentStep": "writing tasks to disk", | ||||
|               "finished": 1, | ||||
|               "total": 2 | ||||
|             }, | ||||
|             { | ||||
|               "currentStep": "task", | ||||
|               "finished": 4, | ||||
|               "total": 5 | ||||
|             } | ||||
|           ], | ||||
|           "percentage": 90.0 | ||||
|         } | ||||
|         "#); | ||||
|     } | ||||
| } | ||||
| @@ -134,6 +134,7 @@ impl ProcessingBatch { | ||||
|     pub fn to_batch(&self) -> Batch { | ||||
|         Batch { | ||||
|             uid: self.uid, | ||||
|             progress: None, | ||||
|             details: self.details.clone(), | ||||
|             stats: self.stats.clone(), | ||||
|             started_at: self.started_at, | ||||
| @@ -187,6 +188,7 @@ impl IndexScheduler { | ||||
|             &batch.uid, | ||||
|             &Batch { | ||||
|                 uid: batch.uid, | ||||
|                 progress: None, | ||||
|                 details: batch.details, | ||||
|                 stats: batch.stats, | ||||
|                 started_at: batch.started_at, | ||||
| @@ -273,7 +275,9 @@ impl IndexScheduler { | ||||
|             .into_iter() | ||||
|             .map(|batch_id| { | ||||
|                 if Some(batch_id) == processing.batch.as_ref().map(|batch| batch.uid) { | ||||
|                     Ok(processing.batch.as_ref().unwrap().to_batch()) | ||||
|                     let mut batch = processing.batch.as_ref().unwrap().to_batch(); | ||||
|                     batch.progress = processing.get_progress_view(); | ||||
|                     Ok(batch) | ||||
|                 } else { | ||||
|                     self.get_batch(rtxn, batch_id) | ||||
|                         .and_then(|task| task.ok_or(Error::CorruptedTaskQueue)) | ||||
|   | ||||
| @@ -1,16 +1,16 @@ | ||||
| use milli::progress::ProgressView; | ||||
| use serde::Serialize; | ||||
| use time::{Duration, OffsetDateTime}; | ||||
|  | ||||
| use crate::{ | ||||
|     batches::{Batch, BatchId, BatchStats}, | ||||
|     task_view::DetailsView, | ||||
|     tasks::serialize_duration, | ||||
| }; | ||||
| use crate::batches::{Batch, BatchId, BatchStats}; | ||||
| use crate::task_view::DetailsView; | ||||
| use crate::tasks::serialize_duration; | ||||
|  | ||||
| #[derive(Debug, Clone, Serialize)] | ||||
| #[serde(rename_all = "camelCase")] | ||||
| pub struct BatchView { | ||||
|     pub uid: BatchId, | ||||
|     pub progress: Option<ProgressView>, | ||||
|     pub details: DetailsView, | ||||
|     pub stats: BatchStats, | ||||
|     #[serde(serialize_with = "serialize_duration", default)] | ||||
| @@ -25,6 +25,7 @@ impl BatchView { | ||||
|     pub fn from_batch(batch: &Batch) -> Self { | ||||
|         Self { | ||||
|             uid: batch.uid, | ||||
|             progress: batch.progress.clone(), | ||||
|             details: batch.details.clone(), | ||||
|             stats: batch.stats.clone(), | ||||
|             duration: batch.finished_at.map(|finished_at| finished_at - batch.started_at), | ||||
|   | ||||
| @@ -1,12 +1,11 @@ | ||||
| use std::collections::BTreeMap; | ||||
|  | ||||
| use milli::progress::ProgressView; | ||||
| use serde::{Deserialize, Serialize}; | ||||
| use time::OffsetDateTime; | ||||
|  | ||||
| use crate::{ | ||||
|     task_view::DetailsView, | ||||
|     tasks::{Kind, Status}, | ||||
| }; | ||||
| use crate::task_view::DetailsView; | ||||
| use crate::tasks::{Kind, Status}; | ||||
|  | ||||
| pub type BatchId = u32; | ||||
|  | ||||
| @@ -15,6 +14,8 @@ pub type BatchId = u32; | ||||
| pub struct Batch { | ||||
|     pub uid: BatchId, | ||||
|  | ||||
|     #[serde(skip)] | ||||
|     pub progress: Option<ProgressView>, | ||||
|     pub details: DetailsView, | ||||
|     pub stats: BatchStats, | ||||
|  | ||||
|   | ||||
| @@ -4,7 +4,6 @@ use std::fmt::{Display, Write}; | ||||
| use std::str::FromStr; | ||||
|  | ||||
| use enum_iterator::Sequence; | ||||
| use milli::update::new::indexer::document_changes::Progress; | ||||
| use milli::update::IndexDocumentsMethod; | ||||
| use milli::Object; | ||||
| use roaring::RoaringBitmap; | ||||
| @@ -41,62 +40,6 @@ pub struct Task { | ||||
|     pub kind: KindWithContent, | ||||
| } | ||||
|  | ||||
| #[derive(Clone, Copy, Debug, PartialEq, Eq, Serialize, Deserialize)] | ||||
| #[serde(rename_all = "camelCase")] | ||||
| pub struct TaskProgress { | ||||
|     pub current_step: &'static str, | ||||
|     pub finished_steps: u16, | ||||
|     pub total_steps: u16, | ||||
|     pub finished_substeps: Option<u32>, | ||||
|     pub total_substeps: Option<u32>, | ||||
| } | ||||
|  | ||||
| impl Default for TaskProgress { | ||||
|     fn default() -> Self { | ||||
|         Self::new() | ||||
|     } | ||||
| } | ||||
|  | ||||
| impl TaskProgress { | ||||
|     pub fn new() -> Self { | ||||
|         Self { | ||||
|             current_step: "start", | ||||
|             finished_steps: 0, | ||||
|             total_steps: 1, | ||||
|             finished_substeps: None, | ||||
|             total_substeps: None, | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     pub fn update(&mut self, progress: Progress) -> TaskProgress { | ||||
|         if self.finished_steps > progress.finished_steps { | ||||
|             return *self; | ||||
|         } | ||||
|  | ||||
|         if self.current_step != progress.step_name { | ||||
|             self.current_step = progress.step_name | ||||
|         } | ||||
|  | ||||
|         self.total_steps = progress.total_steps; | ||||
|  | ||||
|         if self.finished_steps < progress.finished_steps { | ||||
|             self.finished_substeps = None; | ||||
|             self.total_substeps = None; | ||||
|         } | ||||
|         self.finished_steps = progress.finished_steps; | ||||
|         if let Some((finished_substeps, total_substeps)) = progress.finished_total_substep { | ||||
|             if let Some(task_finished_substeps) = self.finished_substeps { | ||||
|                 if task_finished_substeps > finished_substeps { | ||||
|                     return *self; | ||||
|                 } | ||||
|             } | ||||
|             self.finished_substeps = Some(finished_substeps); | ||||
|             self.total_substeps = Some(total_substeps); | ||||
|         } | ||||
|         *self | ||||
|     } | ||||
| } | ||||
|  | ||||
| impl Task { | ||||
|     pub fn index_uid(&self) -> Option<&str> { | ||||
|         use KindWithContent::*; | ||||
|   | ||||
| @@ -1,18 +1,18 @@ | ||||
| use actix_web::{ | ||||
|     web::{self, Data}, | ||||
|     HttpResponse, | ||||
| }; | ||||
| use actix_web::web::{self, Data}; | ||||
| use actix_web::HttpResponse; | ||||
| use deserr::actix_web::AwebQueryParameter; | ||||
| use index_scheduler::{IndexScheduler, Query}; | ||||
| use meilisearch_types::{ | ||||
|     batch_view::BatchView, batches::BatchId, deserr::DeserrQueryParamError, error::ResponseError, | ||||
|     keys::actions, | ||||
| }; | ||||
| use meilisearch_types::batch_view::BatchView; | ||||
| use meilisearch_types::batches::BatchId; | ||||
| use meilisearch_types::deserr::DeserrQueryParamError; | ||||
| use meilisearch_types::error::ResponseError; | ||||
| use meilisearch_types::keys::actions; | ||||
| use serde::Serialize; | ||||
|  | ||||
| use crate::extractors::{authentication::GuardedData, sequential_extractor::SeqHandler}; | ||||
|  | ||||
| use super::{tasks::TasksFilterQuery, ActionPolicy}; | ||||
| use super::tasks::TasksFilterQuery; | ||||
| use super::ActionPolicy; | ||||
| use crate::extractors::authentication::GuardedData; | ||||
| use crate::extractors::sequential_extractor::SeqHandler; | ||||
|  | ||||
| pub fn configure(cfg: &mut web::ServiceConfig) { | ||||
|     cfg.service(web::resource("").route(web::get().to(SeqHandler(get_batches)))) | ||||
|   | ||||
| @@ -284,6 +284,7 @@ async fn test_summarized_document_addition_or_update() { | ||||
|         @r#" | ||||
|     { | ||||
|       "uid": 0, | ||||
|       "progress": null, | ||||
|       "details": { | ||||
|         "receivedDocuments": 1, | ||||
|         "indexedDocuments": 1 | ||||
| @@ -314,6 +315,7 @@ async fn test_summarized_document_addition_or_update() { | ||||
|         @r#" | ||||
|     { | ||||
|       "uid": 1, | ||||
|       "progress": null, | ||||
|       "details": { | ||||
|         "receivedDocuments": 1, | ||||
|         "indexedDocuments": 1 | ||||
| @@ -349,6 +351,7 @@ async fn test_summarized_delete_documents_by_batch() { | ||||
|         @r#" | ||||
|     { | ||||
|       "uid": 0, | ||||
|       "progress": null, | ||||
|       "details": { | ||||
|         "providedIds": 3, | ||||
|         "deletedDocuments": 0 | ||||
| @@ -380,6 +383,7 @@ async fn test_summarized_delete_documents_by_batch() { | ||||
|         @r#" | ||||
|     { | ||||
|       "uid": 2, | ||||
|       "progress": null, | ||||
|       "details": { | ||||
|         "providedIds": 1, | ||||
|         "deletedDocuments": 0 | ||||
| @@ -416,6 +420,7 @@ async fn test_summarized_delete_documents_by_filter() { | ||||
|         @r#" | ||||
|     { | ||||
|       "uid": 0, | ||||
|       "progress": null, | ||||
|       "details": { | ||||
|         "providedIds": 0, | ||||
|         "deletedDocuments": 0, | ||||
| @@ -448,6 +453,7 @@ async fn test_summarized_delete_documents_by_filter() { | ||||
|         @r#" | ||||
|     { | ||||
|       "uid": 2, | ||||
|       "progress": null, | ||||
|       "details": { | ||||
|         "providedIds": 0, | ||||
|         "deletedDocuments": 0, | ||||
| @@ -480,6 +486,7 @@ async fn test_summarized_delete_documents_by_filter() { | ||||
|         @r#" | ||||
|     { | ||||
|       "uid": 4, | ||||
|       "progress": null, | ||||
|       "details": { | ||||
|         "providedIds": 0, | ||||
|         "deletedDocuments": 0, | ||||
| @@ -516,6 +523,7 @@ async fn test_summarized_delete_document_by_id() { | ||||
|         @r#" | ||||
|     { | ||||
|       "uid": 0, | ||||
|       "progress": null, | ||||
|       "details": { | ||||
|         "providedIds": 1, | ||||
|         "deletedDocuments": 0 | ||||
| @@ -547,6 +555,7 @@ async fn test_summarized_delete_document_by_id() { | ||||
|         @r#" | ||||
|     { | ||||
|       "uid": 2, | ||||
|       "progress": null, | ||||
|       "details": { | ||||
|         "providedIds": 1, | ||||
|         "deletedDocuments": 0 | ||||
| @@ -594,6 +603,7 @@ async fn test_summarized_settings_update() { | ||||
|         @r#" | ||||
|     { | ||||
|       "uid": 0, | ||||
|       "progress": null, | ||||
|       "details": { | ||||
|         "displayedAttributes": [ | ||||
|           "doggos", | ||||
| @@ -638,6 +648,7 @@ async fn test_summarized_index_creation() { | ||||
|         @r#" | ||||
|     { | ||||
|       "uid": 0, | ||||
|       "progress": null, | ||||
|       "details": {}, | ||||
|       "stats": { | ||||
|         "totalNbTasks": 1, | ||||
| @@ -665,6 +676,7 @@ async fn test_summarized_index_creation() { | ||||
|         @r#" | ||||
|     { | ||||
|       "uid": 1, | ||||
|       "progress": null, | ||||
|       "details": { | ||||
|         "primaryKey": "doggos" | ||||
|       }, | ||||
| @@ -809,6 +821,7 @@ async fn test_summarized_index_update() { | ||||
|         @r#" | ||||
|     { | ||||
|       "uid": 0, | ||||
|       "progress": null, | ||||
|       "details": {}, | ||||
|       "stats": { | ||||
|         "totalNbTasks": 1, | ||||
| @@ -836,6 +849,7 @@ async fn test_summarized_index_update() { | ||||
|         @r#" | ||||
|     { | ||||
|       "uid": 1, | ||||
|       "progress": null, | ||||
|       "details": { | ||||
|         "primaryKey": "bones" | ||||
|       }, | ||||
| @@ -868,6 +882,7 @@ async fn test_summarized_index_update() { | ||||
|         @r#" | ||||
|     { | ||||
|       "uid": 3, | ||||
|       "progress": null, | ||||
|       "details": {}, | ||||
|       "stats": { | ||||
|         "totalNbTasks": 1, | ||||
| @@ -895,6 +910,7 @@ async fn test_summarized_index_update() { | ||||
|         @r#" | ||||
|     { | ||||
|       "uid": 4, | ||||
|       "progress": null, | ||||
|       "details": { | ||||
|         "primaryKey": "bones" | ||||
|       }, | ||||
| @@ -932,6 +948,7 @@ async fn test_summarized_index_swap() { | ||||
|         @r#" | ||||
|     { | ||||
|       "uid": 0, | ||||
|       "progress": null, | ||||
|       "details": { | ||||
|         "swaps": [ | ||||
|           { | ||||
| @@ -972,6 +989,7 @@ async fn test_summarized_index_swap() { | ||||
|         @r#" | ||||
|     { | ||||
|       "uid": 3, | ||||
|       "progress": null, | ||||
|       "details": { | ||||
|         "swaps": [ | ||||
|           { | ||||
| @@ -1014,6 +1032,7 @@ async fn test_summarized_batch_cancelation() { | ||||
|         @r#" | ||||
|     { | ||||
|       "uid": 1, | ||||
|       "progress": null, | ||||
|       "details": { | ||||
|         "matchedTasks": 1, | ||||
|         "canceledTasks": 0, | ||||
| @@ -1051,6 +1070,7 @@ async fn test_summarized_batch_deletion() { | ||||
|         @r#" | ||||
|     { | ||||
|       "uid": 1, | ||||
|       "progress": null, | ||||
|       "details": { | ||||
|         "matchedTasks": 1, | ||||
|         "deletedTasks": 1, | ||||
| @@ -1084,6 +1104,7 @@ async fn test_summarized_dump_creation() { | ||||
|         @r#" | ||||
|     { | ||||
|       "uid": 0, | ||||
|       "progress": null, | ||||
|       "details": { | ||||
|         "dumpUid": "[dumpUid]" | ||||
|       }, | ||||
|   | ||||
| @@ -7,7 +7,6 @@ use std::path::{Path, PathBuf}; | ||||
|  | ||||
| use anyhow::{bail, Context}; | ||||
| use meilisearch_types::versioning::create_version_file; | ||||
|  | ||||
| use v1_10::v1_9_to_v1_10; | ||||
| use v1_12::v1_11_to_v1_12; | ||||
|  | ||||
|   | ||||
| @@ -1,18 +1,13 @@ | ||||
| use anyhow::bail; | ||||
| use std::path::Path; | ||||
|  | ||||
| use anyhow::Context; | ||||
| use meilisearch_types::{ | ||||
|     heed::{ | ||||
|         types::{SerdeJson, Str}, | ||||
|         Database, Env, EnvOpenOptions, RoTxn, RwTxn, Unspecified, | ||||
|     }, | ||||
|     milli::index::{db_name, main_key}, | ||||
| }; | ||||
|  | ||||
| use crate::{try_opening_database, try_opening_poly_database, uuid_codec::UuidCodec}; | ||||
| use anyhow::{bail, Context}; | ||||
| use meilisearch_types::heed::types::{SerdeJson, Str}; | ||||
| use meilisearch_types::heed::{Database, Env, EnvOpenOptions, RoTxn, RwTxn, Unspecified}; | ||||
| use meilisearch_types::milli::index::{db_name, main_key}; | ||||
|  | ||||
| use super::v1_9; | ||||
| use crate::uuid_codec::UuidCodec; | ||||
| use crate::{try_opening_database, try_opening_poly_database}; | ||||
|  | ||||
| pub type FieldDistribution = std::collections::BTreeMap<String, u64>; | ||||
|  | ||||
|   | ||||
| @@ -7,12 +7,12 @@ | ||||
| use std::path::Path; | ||||
|  | ||||
| use anyhow::Context; | ||||
| use meilisearch_types::{ | ||||
|     heed::{types::Str, Database, EnvOpenOptions}, | ||||
|     milli::index::db_name, | ||||
| }; | ||||
| use meilisearch_types::heed::types::Str; | ||||
| use meilisearch_types::heed::{Database, EnvOpenOptions}; | ||||
| use meilisearch_types::milli::index::db_name; | ||||
|  | ||||
| use crate::{try_opening_database, try_opening_poly_database, uuid_codec::UuidCodec}; | ||||
| use crate::uuid_codec::UuidCodec; | ||||
| use crate::{try_opening_database, try_opening_poly_database}; | ||||
|  | ||||
| pub fn v1_10_to_v1_11(db_path: &Path) -> anyhow::Result<()> { | ||||
|     println!("Upgrading from v1.10.0 to v1.11.0"); | ||||
|   | ||||
| @@ -1,7 +1,8 @@ | ||||
| //! The breaking changes that happened between the v1.11 and the v1.12 are: | ||||
| //! - The new indexer changed the update files format from OBKV to ndjson. https://github.com/meilisearch/meilisearch/pull/4900 | ||||
|  | ||||
| use std::{io::BufWriter, path::Path}; | ||||
| use std::io::BufWriter; | ||||
| use std::path::Path; | ||||
|  | ||||
| use anyhow::Context; | ||||
| use file_store::FileStore; | ||||
|   | ||||
| @@ -1734,6 +1734,7 @@ pub(crate) mod tests { | ||||
|  | ||||
|     use crate::error::{Error, InternalError}; | ||||
|     use crate::index::{DEFAULT_MIN_WORD_LEN_ONE_TYPO, DEFAULT_MIN_WORD_LEN_TWO_TYPOS}; | ||||
|     use crate::progress::Progress; | ||||
|     use crate::update::new::indexer; | ||||
|     use crate::update::settings::InnerIndexSettings; | ||||
|     use crate::update::{ | ||||
| @@ -1810,7 +1811,7 @@ pub(crate) mod tests { | ||||
|                 None, | ||||
|                 &mut new_fields_ids_map, | ||||
|                 &|| false, | ||||
|                 &|_progress| (), | ||||
|                 Progress::default(), | ||||
|             )?; | ||||
|  | ||||
|             if let Some(error) = operation_stats.into_iter().find_map(|stat| stat.error) { | ||||
| @@ -1829,7 +1830,7 @@ pub(crate) mod tests { | ||||
|                     &document_changes, | ||||
|                     embedders, | ||||
|                     &|| false, | ||||
|                     &|_| (), | ||||
|                     &Progress::default(), | ||||
|                 ) | ||||
|             }) | ||||
|             .unwrap()?; | ||||
| @@ -1901,7 +1902,7 @@ pub(crate) mod tests { | ||||
|                 None, | ||||
|                 &mut new_fields_ids_map, | ||||
|                 &|| false, | ||||
|                 &|_progress| (), | ||||
|                 Progress::default(), | ||||
|             )?; | ||||
|  | ||||
|             if let Some(error) = operation_stats.into_iter().find_map(|stat| stat.error) { | ||||
| @@ -1920,7 +1921,7 @@ pub(crate) mod tests { | ||||
|                     &document_changes, | ||||
|                     embedders, | ||||
|                     &|| false, | ||||
|                     &|_| (), | ||||
|                     &Progress::default(), | ||||
|                 ) | ||||
|             }) | ||||
|             .unwrap()?; | ||||
| @@ -1982,7 +1983,7 @@ pub(crate) mod tests { | ||||
|                 None, | ||||
|                 &mut new_fields_ids_map, | ||||
|                 &|| false, | ||||
|                 &|_progress| (), | ||||
|                 Progress::default(), | ||||
|             ) | ||||
|             .unwrap(); | ||||
|  | ||||
| @@ -2001,7 +2002,7 @@ pub(crate) mod tests { | ||||
|                     &document_changes, | ||||
|                     embedders, | ||||
|                     &|| should_abort.load(Relaxed), | ||||
|                     &|_| (), | ||||
|                     &Progress::default(), | ||||
|                 ) | ||||
|             }) | ||||
|             .unwrap() | ||||
|   | ||||
| @@ -31,6 +31,7 @@ pub mod vector; | ||||
| #[macro_use] | ||||
| pub mod snapshot_tests; | ||||
| mod fieldids_weights_map; | ||||
| pub mod progress; | ||||
|  | ||||
| use std::collections::{BTreeMap, HashMap}; | ||||
| use std::convert::{TryFrom, TryInto}; | ||||
|   | ||||
							
								
								
									
										152
									
								
								crates/milli/src/progress.rs
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										152
									
								
								crates/milli/src/progress.rs
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,152 @@ | ||||
| use std::any::TypeId; | ||||
| use std::borrow::Cow; | ||||
| use std::sync::atomic::{AtomicU32, Ordering}; | ||||
| use std::sync::{Arc, RwLock}; | ||||
|  | ||||
| use serde::Serialize; | ||||
|  | ||||
| pub trait Step: 'static + Send + Sync { | ||||
|     fn name(&self) -> Cow<'static, str>; | ||||
|     fn current(&self) -> u32; | ||||
|     fn total(&self) -> u32; | ||||
| } | ||||
|  | ||||
| #[derive(Clone, Default)] | ||||
| pub struct Progress { | ||||
|     steps: Arc<RwLock<Vec<(TypeId, Box<dyn Step>)>>>, | ||||
| } | ||||
|  | ||||
| impl Progress { | ||||
|     pub fn update_progress<P: Step>(&self, sub_progress: P) { | ||||
|         let mut steps = self.steps.write().unwrap(); | ||||
|         let step_type = TypeId::of::<P>(); | ||||
|         if let Some(idx) = steps.iter().position(|(id, _)| *id == step_type) { | ||||
|             steps.truncate(idx); | ||||
|         } | ||||
|         steps.push((step_type, Box::new(sub_progress))); | ||||
|     } | ||||
|  | ||||
|     // TODO: This code should be in meilisearch_types but cannot because milli can't depend on meilisearch_types | ||||
|     pub fn as_progress_view(&self) -> ProgressView { | ||||
|         let steps = self.steps.read().unwrap(); | ||||
|  | ||||
|         let mut percentage = 0.0; | ||||
|         let mut prev_factors = 1.0; | ||||
|  | ||||
|         let mut step_view = Vec::with_capacity(steps.len()); | ||||
|         for (_, step) in steps.iter() { | ||||
|             prev_factors *= step.total() as f32; | ||||
|             percentage += step.current() as f32 / prev_factors; | ||||
|  | ||||
|             step_view.push(ProgressStepView { | ||||
|                 current_step: step.name(), | ||||
|                 finished: step.current(), | ||||
|                 total: step.total(), | ||||
|             }); | ||||
|         } | ||||
|  | ||||
|         ProgressView { steps: step_view, percentage: percentage * 100.0 } | ||||
|     } | ||||
| } | ||||
|  | ||||
| /// This trait lets you use the AtomicSubStep defined right below. | ||||
| /// The name must be a const that never changed but that can't be enforced by the type system because it make the trait non object-safe. | ||||
| /// By forcing the Default trait + the &'static str we make it harder to miss-use the trait. | ||||
| pub trait NamedStep: 'static + Send + Sync + Default { | ||||
|     fn name(&self) -> &'static str; | ||||
| } | ||||
|  | ||||
| /// Structure to quickly define steps that need very quick, lockless updating of their current step. | ||||
| /// You can use this struct if: | ||||
| /// - The name of the step doesn't change | ||||
| /// - The total number of steps doesn't change | ||||
| pub struct AtomicSubStep<Name: NamedStep> { | ||||
|     unit_name: Name, | ||||
|     current: Arc<AtomicU32>, | ||||
|     total: u32, | ||||
| } | ||||
|  | ||||
| impl<Name: NamedStep> AtomicSubStep<Name> { | ||||
|     pub fn new(total: u32) -> (Arc<AtomicU32>, Self) { | ||||
|         let current = Arc::new(AtomicU32::new(0)); | ||||
|         (current.clone(), Self { current, total, unit_name: Name::default() }) | ||||
|     } | ||||
| } | ||||
|  | ||||
| impl<Name: NamedStep> Step for AtomicSubStep<Name> { | ||||
|     fn name(&self) -> Cow<'static, str> { | ||||
|         self.unit_name.name().into() | ||||
|     } | ||||
|  | ||||
|     fn current(&self) -> u32 { | ||||
|         self.current.load(Ordering::Relaxed) | ||||
|     } | ||||
|  | ||||
|     fn total(&self) -> u32 { | ||||
|         self.total | ||||
|     } | ||||
| } | ||||
|  | ||||
| #[macro_export] | ||||
| macro_rules! make_enum_progress { | ||||
|     ($visibility:vis enum $name:ident { $($variant:ident,)+ }) => { | ||||
|         #[repr(u8)] | ||||
|         #[derive(Debug, Clone, Copy, PartialEq, Eq, Sequence)] | ||||
|         #[allow(clippy::enum_variant_names)] | ||||
|         $visibility enum $name { | ||||
|             $($variant),+ | ||||
|         } | ||||
|  | ||||
|         impl Step for $name { | ||||
|             fn name(&self) -> Cow<'static, str> { | ||||
|                 use convert_case::Casing; | ||||
|  | ||||
|                 match self { | ||||
|                     $( | ||||
|                         $name::$variant => stringify!($variant).from_case(convert_case::Case::Camel).to_case(convert_case::Case::Lower).into() | ||||
|                     ),+ | ||||
|                 } | ||||
|             } | ||||
|  | ||||
|             fn current(&self) -> u32 { | ||||
|                 *self as u32 | ||||
|             } | ||||
|  | ||||
|             fn total(&self) -> u32 { | ||||
|                 Self::CARDINALITY as u32 | ||||
|             } | ||||
|         } | ||||
|     }; | ||||
| } | ||||
|  | ||||
| #[macro_export] | ||||
| macro_rules! make_atomic_progress { | ||||
|     ($struct_name:ident alias $atomic_struct_name:ident => $step_name:literal) => { | ||||
|         #[derive(Default, Debug, Clone, Copy)] | ||||
|         pub struct $struct_name {} | ||||
|         impl NamedStep for $struct_name { | ||||
|             fn name(&self) -> &'static str { | ||||
|                 $step_name | ||||
|             } | ||||
|         } | ||||
|         pub type $atomic_struct_name = AtomicSubStep<$struct_name>; | ||||
|     }; | ||||
| } | ||||
|  | ||||
| make_atomic_progress!(Document alias AtomicDocumentStep => "document" ); | ||||
| make_atomic_progress!(Payload alias AtomicPayloadStep => "payload" ); | ||||
|  | ||||
| #[derive(Debug, Serialize, Clone)] | ||||
| #[serde(rename_all = "camelCase")] | ||||
| pub struct ProgressView { | ||||
|     pub steps: Vec<ProgressStepView>, | ||||
|     pub percentage: f32, | ||||
| } | ||||
|  | ||||
| #[derive(Debug, Serialize, Clone)] | ||||
| #[serde(rename_all = "camelCase")] | ||||
| pub struct ProgressStepView { | ||||
|     pub current_step: Cow<'static, str>, | ||||
|     pub finished: u32, | ||||
|     pub total: u32, | ||||
| } | ||||
| @@ -5,6 +5,7 @@ use bumpalo::Bump; | ||||
| use heed::EnvOpenOptions; | ||||
| use maplit::{btreemap, hashset}; | ||||
|  | ||||
| use crate::progress::Progress; | ||||
| use crate::update::new::indexer; | ||||
| use crate::update::{IndexDocumentsMethod, IndexerConfig, Settings}; | ||||
| use crate::vector::EmbeddingConfigs; | ||||
| @@ -72,7 +73,7 @@ pub fn setup_search_index_with_criteria(criteria: &[Criterion]) -> Index { | ||||
|             None, | ||||
|             &mut new_fields_ids_map, | ||||
|             &|| false, | ||||
|             &|_progress| (), | ||||
|             Progress::default(), | ||||
|         ) | ||||
|         .unwrap(); | ||||
|  | ||||
| @@ -91,7 +92,7 @@ pub fn setup_search_index_with_criteria(criteria: &[Criterion]) -> Index { | ||||
|         &document_changes, | ||||
|         embedders, | ||||
|         &|| false, | ||||
|         &|_| (), | ||||
|         &Progress::default(), | ||||
|     ) | ||||
|     .unwrap(); | ||||
|  | ||||
|   | ||||
| @@ -766,6 +766,7 @@ mod tests { | ||||
|     use crate::documents::mmap_from_objects; | ||||
|     use crate::index::tests::TempIndex; | ||||
|     use crate::index::IndexEmbeddingConfig; | ||||
|     use crate::progress::Progress; | ||||
|     use crate::search::TermsMatchingStrategy; | ||||
|     use crate::update::new::indexer; | ||||
|     use crate::update::Setting; | ||||
| @@ -1964,7 +1965,7 @@ mod tests { | ||||
|                 None, | ||||
|                 &mut new_fields_ids_map, | ||||
|                 &|| false, | ||||
|                 &|_progress| (), | ||||
|                 Progress::default(), | ||||
|             ) | ||||
|             .unwrap(); | ||||
|  | ||||
| @@ -2148,7 +2149,7 @@ mod tests { | ||||
|                 None, | ||||
|                 &mut new_fields_ids_map, | ||||
|                 &|| false, | ||||
|                 &|_progress| (), | ||||
|                 Progress::default(), | ||||
|             ) | ||||
|             .unwrap(); | ||||
|  | ||||
| @@ -2163,7 +2164,7 @@ mod tests { | ||||
|             &document_changes, | ||||
|             embedders, | ||||
|             &|| false, | ||||
|             &|_| (), | ||||
|             &Progress::default(), | ||||
|         ) | ||||
|         .unwrap(); | ||||
|         wtxn.commit().unwrap(); | ||||
| @@ -2210,7 +2211,7 @@ mod tests { | ||||
|                 None, | ||||
|                 &mut new_fields_ids_map, | ||||
|                 &|| false, | ||||
|                 &|_progress| (), | ||||
|                 Progress::default(), | ||||
|             ) | ||||
|             .unwrap(); | ||||
|  | ||||
| @@ -2225,7 +2226,7 @@ mod tests { | ||||
|             &document_changes, | ||||
|             embedders, | ||||
|             &|| false, | ||||
|             &|_| (), | ||||
|             &Progress::default(), | ||||
|         ) | ||||
|         .unwrap(); | ||||
|         wtxn.commit().unwrap(); | ||||
| @@ -2263,7 +2264,7 @@ mod tests { | ||||
|                 None, | ||||
|                 &mut new_fields_ids_map, | ||||
|                 &|| false, | ||||
|                 &|_progress| (), | ||||
|                 Progress::default(), | ||||
|             ) | ||||
|             .unwrap(); | ||||
|  | ||||
| @@ -2278,7 +2279,7 @@ mod tests { | ||||
|             &document_changes, | ||||
|             embedders, | ||||
|             &|| false, | ||||
|             &|_| (), | ||||
|             &Progress::default(), | ||||
|         ) | ||||
|         .unwrap(); | ||||
|         wtxn.commit().unwrap(); | ||||
| @@ -2315,7 +2316,7 @@ mod tests { | ||||
|                 None, | ||||
|                 &mut new_fields_ids_map, | ||||
|                 &|| false, | ||||
|                 &|_progress| (), | ||||
|                 Progress::default(), | ||||
|             ) | ||||
|             .unwrap(); | ||||
|  | ||||
| @@ -2330,7 +2331,7 @@ mod tests { | ||||
|             &document_changes, | ||||
|             embedders, | ||||
|             &|| false, | ||||
|             &|_| (), | ||||
|             &Progress::default(), | ||||
|         ) | ||||
|         .unwrap(); | ||||
|         wtxn.commit().unwrap(); | ||||
| @@ -2369,7 +2370,7 @@ mod tests { | ||||
|                 None, | ||||
|                 &mut new_fields_ids_map, | ||||
|                 &|| false, | ||||
|                 &|_progress| (), | ||||
|                 Progress::default(), | ||||
|             ) | ||||
|             .unwrap(); | ||||
|  | ||||
| @@ -2384,7 +2385,7 @@ mod tests { | ||||
|             &document_changes, | ||||
|             embedders, | ||||
|             &|| false, | ||||
|             &|_| (), | ||||
|             &Progress::default(), | ||||
|         ) | ||||
|         .unwrap(); | ||||
|         wtxn.commit().unwrap(); | ||||
| @@ -2428,7 +2429,7 @@ mod tests { | ||||
|                 None, | ||||
|                 &mut new_fields_ids_map, | ||||
|                 &|| false, | ||||
|                 &|_progress| (), | ||||
|                 Progress::default(), | ||||
|             ) | ||||
|             .unwrap(); | ||||
|  | ||||
| @@ -2443,7 +2444,7 @@ mod tests { | ||||
|             &document_changes, | ||||
|             embedders, | ||||
|             &|| false, | ||||
|             &|_| (), | ||||
|             &Progress::default(), | ||||
|         ) | ||||
|         .unwrap(); | ||||
|         wtxn.commit().unwrap(); | ||||
| @@ -2480,7 +2481,7 @@ mod tests { | ||||
|                 None, | ||||
|                 &mut new_fields_ids_map, | ||||
|                 &|| false, | ||||
|                 &|_progress| (), | ||||
|                 Progress::default(), | ||||
|             ) | ||||
|             .unwrap(); | ||||
|  | ||||
| @@ -2495,7 +2496,7 @@ mod tests { | ||||
|             &document_changes, | ||||
|             embedders, | ||||
|             &|| false, | ||||
|             &|_| (), | ||||
|             &Progress::default(), | ||||
|         ) | ||||
|         .unwrap(); | ||||
|         wtxn.commit().unwrap(); | ||||
| @@ -2532,7 +2533,7 @@ mod tests { | ||||
|                 None, | ||||
|                 &mut new_fields_ids_map, | ||||
|                 &|| false, | ||||
|                 &|_progress| (), | ||||
|                 Progress::default(), | ||||
|             ) | ||||
|             .unwrap(); | ||||
|  | ||||
| @@ -2547,7 +2548,7 @@ mod tests { | ||||
|             &document_changes, | ||||
|             embedders, | ||||
|             &|| false, | ||||
|             &|_| (), | ||||
|             &Progress::default(), | ||||
|         ) | ||||
|         .unwrap(); | ||||
|         wtxn.commit().unwrap(); | ||||
| @@ -2726,7 +2727,7 @@ mod tests { | ||||
|                 None, | ||||
|                 &mut new_fields_ids_map, | ||||
|                 &|| false, | ||||
|                 &|_progress| (), | ||||
|                 Progress::default(), | ||||
|             ) | ||||
|             .unwrap(); | ||||
|  | ||||
| @@ -2741,7 +2742,7 @@ mod tests { | ||||
|             &document_changes, | ||||
|             embedders, | ||||
|             &|| false, | ||||
|             &|_| (), | ||||
|             &Progress::default(), | ||||
|         ) | ||||
|         .unwrap(); | ||||
|         wtxn.commit().unwrap(); | ||||
| @@ -2785,7 +2786,7 @@ mod tests { | ||||
|                 None, | ||||
|                 &mut new_fields_ids_map, | ||||
|                 &|| false, | ||||
|                 &|_progress| (), | ||||
|                 Progress::default(), | ||||
|             ) | ||||
|             .unwrap(); | ||||
|  | ||||
| @@ -2800,7 +2801,7 @@ mod tests { | ||||
|             &document_changes, | ||||
|             embedders, | ||||
|             &|| false, | ||||
|             &|_| (), | ||||
|             &Progress::default(), | ||||
|         ) | ||||
|         .unwrap(); | ||||
|         wtxn.commit().unwrap(); | ||||
| @@ -2841,7 +2842,7 @@ mod tests { | ||||
|                 None, | ||||
|                 &mut new_fields_ids_map, | ||||
|                 &|| false, | ||||
|                 &|_progress| (), | ||||
|                 Progress::default(), | ||||
|             ) | ||||
|             .unwrap(); | ||||
|  | ||||
| @@ -2856,7 +2857,7 @@ mod tests { | ||||
|             &document_changes, | ||||
|             embedders, | ||||
|             &|| false, | ||||
|             &|_| (), | ||||
|             &Progress::default(), | ||||
|         ) | ||||
|         .unwrap(); | ||||
|         wtxn.commit().unwrap(); | ||||
|   | ||||
| @@ -16,10 +16,10 @@ use crate::update::del_add::DelAdd; | ||||
| use crate::update::new::channel::FieldIdDocidFacetSender; | ||||
| use crate::update::new::extract::perm_json_p; | ||||
| use crate::update::new::indexer::document_changes::{ | ||||
|     extract, DocumentChangeContext, DocumentChanges, Extractor, IndexingContext, Progress, | ||||
|     extract, DocumentChangeContext, DocumentChanges, Extractor, IndexingContext, | ||||
| }; | ||||
| use crate::update::new::ref_cell_ext::RefCellExt as _; | ||||
| use crate::update::new::steps::Step; | ||||
| use crate::update::new::steps::IndexingStep; | ||||
| use crate::update::new::thread_local::{FullySend, ThreadLocal}; | ||||
| use crate::update::new::DocumentChange; | ||||
| use crate::update::GrenadParameters; | ||||
| @@ -373,26 +373,16 @@ fn truncate_str(s: &str) -> &str { | ||||
|  | ||||
| impl FacetedDocidsExtractor { | ||||
|     #[tracing::instrument(level = "trace", skip_all, target = "indexing::extract::faceted")] | ||||
|     pub fn run_extraction< | ||||
|         'pl, | ||||
|         'fid, | ||||
|         'indexer, | ||||
|         'index, | ||||
|         'extractor, | ||||
|         DC: DocumentChanges<'pl>, | ||||
|         MSP, | ||||
|         SP, | ||||
|     >( | ||||
|     pub fn run_extraction<'pl, 'fid, 'indexer, 'index, 'extractor, DC: DocumentChanges<'pl>, MSP>( | ||||
|         grenad_parameters: GrenadParameters, | ||||
|         document_changes: &DC, | ||||
|         indexing_context: IndexingContext<'fid, 'indexer, 'index, MSP, SP>, | ||||
|         indexing_context: IndexingContext<'fid, 'indexer, 'index, MSP>, | ||||
|         extractor_allocs: &'extractor mut ThreadLocal<FullySend<Bump>>, | ||||
|         sender: &FieldIdDocidFacetSender, | ||||
|         step: Step, | ||||
|         step: IndexingStep, | ||||
|     ) -> Result<Vec<BalancedCaches<'extractor>>> | ||||
|     where | ||||
|         MSP: Fn() -> bool + Sync, | ||||
|         SP: Fn(Progress) + Sync, | ||||
|     { | ||||
|         let index = indexing_context.index; | ||||
|         let rtxn = index.read_txn()?; | ||||
|   | ||||
| @@ -15,23 +15,22 @@ pub use geo::*; | ||||
| pub use searchable::*; | ||||
| pub use vectors::EmbeddingExtractor; | ||||
|  | ||||
| use super::indexer::document_changes::{DocumentChanges, IndexingContext, Progress}; | ||||
| use super::steps::Step; | ||||
| use super::indexer::document_changes::{DocumentChanges, IndexingContext}; | ||||
| use super::steps::IndexingStep; | ||||
| use super::thread_local::{FullySend, ThreadLocal}; | ||||
| use crate::update::GrenadParameters; | ||||
| use crate::Result; | ||||
|  | ||||
| pub trait DocidsExtractor { | ||||
|     fn run_extraction<'pl, 'fid, 'indexer, 'index, 'extractor, DC: DocumentChanges<'pl>, MSP, SP>( | ||||
|     fn run_extraction<'pl, 'fid, 'indexer, 'index, 'extractor, DC: DocumentChanges<'pl>, MSP>( | ||||
|         grenad_parameters: GrenadParameters, | ||||
|         document_changes: &DC, | ||||
|         indexing_context: IndexingContext<'fid, 'indexer, 'index, MSP, SP>, | ||||
|         indexing_context: IndexingContext<'fid, 'indexer, 'index, MSP>, | ||||
|         extractor_allocs: &'extractor mut ThreadLocal<FullySend<Bump>>, | ||||
|         step: Step, | ||||
|         step: IndexingStep, | ||||
|     ) -> Result<Vec<BalancedCaches<'extractor>>> | ||||
|     where | ||||
|         MSP: Fn() -> bool + Sync, | ||||
|         SP: Fn(Progress) + Sync; | ||||
|         MSP: Fn() -> bool + Sync; | ||||
| } | ||||
|  | ||||
| /// TODO move in permissive json pointer | ||||
|   | ||||
| @@ -11,10 +11,10 @@ use super::tokenize_document::{tokenizer_builder, DocumentTokenizer}; | ||||
| use crate::update::new::extract::cache::BalancedCaches; | ||||
| use crate::update::new::extract::perm_json_p::contained_in; | ||||
| use crate::update::new::indexer::document_changes::{ | ||||
|     extract, DocumentChangeContext, DocumentChanges, Extractor, IndexingContext, Progress, | ||||
|     extract, DocumentChangeContext, DocumentChanges, Extractor, IndexingContext, | ||||
| }; | ||||
| use crate::update::new::ref_cell_ext::RefCellExt as _; | ||||
| use crate::update::new::steps::Step; | ||||
| use crate::update::new::steps::IndexingStep; | ||||
| use crate::update::new::thread_local::{FullySend, MostlySend, ThreadLocal}; | ||||
| use crate::update::new::DocumentChange; | ||||
| use crate::update::GrenadParameters; | ||||
| @@ -239,25 +239,15 @@ impl<'a, 'extractor> Extractor<'extractor> for WordDocidsExtractorData<'a> { | ||||
| pub struct WordDocidsExtractors; | ||||
|  | ||||
| impl WordDocidsExtractors { | ||||
|     pub fn run_extraction< | ||||
|         'pl, | ||||
|         'fid, | ||||
|         'indexer, | ||||
|         'index, | ||||
|         'extractor, | ||||
|         DC: DocumentChanges<'pl>, | ||||
|         MSP, | ||||
|         SP, | ||||
|     >( | ||||
|     pub fn run_extraction<'pl, 'fid, 'indexer, 'index, 'extractor, DC: DocumentChanges<'pl>, MSP>( | ||||
|         grenad_parameters: GrenadParameters, | ||||
|         document_changes: &DC, | ||||
|         indexing_context: IndexingContext<'fid, 'indexer, 'index, MSP, SP>, | ||||
|         indexing_context: IndexingContext<'fid, 'indexer, 'index, MSP>, | ||||
|         extractor_allocs: &'extractor mut ThreadLocal<FullySend<Bump>>, | ||||
|         step: Step, | ||||
|         step: IndexingStep, | ||||
|     ) -> Result<WordDocidsCaches<'extractor>> | ||||
|     where | ||||
|         MSP: Fn() -> bool + Sync, | ||||
|         SP: Fn(Progress) + Sync, | ||||
|     { | ||||
|         let index = indexing_context.index; | ||||
|         let rtxn = index.read_txn()?; | ||||
|   | ||||
| @@ -14,9 +14,9 @@ use tokenize_document::{tokenizer_builder, DocumentTokenizer}; | ||||
| use super::cache::BalancedCaches; | ||||
| use super::DocidsExtractor; | ||||
| use crate::update::new::indexer::document_changes::{ | ||||
|     extract, DocumentChangeContext, DocumentChanges, Extractor, IndexingContext, Progress, | ||||
|     extract, DocumentChangeContext, DocumentChanges, Extractor, IndexingContext, | ||||
| }; | ||||
| use crate::update::new::steps::Step; | ||||
| use crate::update::new::steps::IndexingStep; | ||||
| use crate::update::new::thread_local::{FullySend, ThreadLocal}; | ||||
| use crate::update::new::DocumentChange; | ||||
| use crate::update::GrenadParameters; | ||||
| @@ -56,16 +56,15 @@ impl<'a, 'extractor, EX: SearchableExtractor + Sync> Extractor<'extractor> | ||||
| } | ||||
|  | ||||
| pub trait SearchableExtractor: Sized + Sync { | ||||
|     fn run_extraction<'pl, 'fid, 'indexer, 'index, 'extractor, DC: DocumentChanges<'pl>, MSP, SP>( | ||||
|     fn run_extraction<'pl, 'fid, 'indexer, 'index, 'extractor, DC: DocumentChanges<'pl>, MSP>( | ||||
|         grenad_parameters: GrenadParameters, | ||||
|         document_changes: &DC, | ||||
|         indexing_context: IndexingContext<'fid, 'indexer, 'index, MSP, SP>, | ||||
|         indexing_context: IndexingContext<'fid, 'indexer, 'index, MSP>, | ||||
|         extractor_allocs: &'extractor mut ThreadLocal<FullySend<Bump>>, | ||||
|         step: Step, | ||||
|         step: IndexingStep, | ||||
|     ) -> Result<Vec<BalancedCaches<'extractor>>> | ||||
|     where | ||||
|         MSP: Fn() -> bool + Sync, | ||||
|         SP: Fn(Progress) + Sync, | ||||
|     { | ||||
|         let rtxn = indexing_context.index.read_txn()?; | ||||
|         let stop_words = indexing_context.index.stop_words(&rtxn)?; | ||||
| @@ -134,16 +133,15 @@ pub trait SearchableExtractor: Sized + Sync { | ||||
| } | ||||
|  | ||||
| impl<T: SearchableExtractor> DocidsExtractor for T { | ||||
|     fn run_extraction<'pl, 'fid, 'indexer, 'index, 'extractor, DC: DocumentChanges<'pl>, MSP, SP>( | ||||
|     fn run_extraction<'pl, 'fid, 'indexer, 'index, 'extractor, DC: DocumentChanges<'pl>, MSP>( | ||||
|         grenad_parameters: GrenadParameters, | ||||
|         document_changes: &DC, | ||||
|         indexing_context: IndexingContext<'fid, 'indexer, 'index, MSP, SP>, | ||||
|         indexing_context: IndexingContext<'fid, 'indexer, 'index, MSP>, | ||||
|         extractor_allocs: &'extractor mut ThreadLocal<FullySend<Bump>>, | ||||
|         step: Step, | ||||
|         step: IndexingStep, | ||||
|     ) -> Result<Vec<BalancedCaches<'extractor>>> | ||||
|     where | ||||
|         MSP: Fn() -> bool + Sync, | ||||
|         SP: Fn(Progress) + Sync, | ||||
|     { | ||||
|         Self::run_extraction( | ||||
|             grenad_parameters, | ||||
|   | ||||
| @@ -1,4 +1,5 @@ | ||||
| use std::cell::{Cell, RefCell}; | ||||
| use std::sync::atomic::Ordering; | ||||
| use std::sync::{Arc, RwLock}; | ||||
|  | ||||
| use bumpalo::Bump; | ||||
| @@ -7,8 +8,9 @@ use rayon::iter::IndexedParallelIterator; | ||||
|  | ||||
| use super::super::document_change::DocumentChange; | ||||
| use crate::fields_ids_map::metadata::FieldIdMapWithMetadata; | ||||
| use crate::progress::{AtomicDocumentStep, Progress}; | ||||
| use crate::update::new::parallel_iterator_ext::ParallelIteratorExt as _; | ||||
| use crate::update::new::steps::Step; | ||||
| use crate::update::new::steps::IndexingStep; | ||||
| use crate::update::new::thread_local::{FullySend, MostlySend, ThreadLocal}; | ||||
| use crate::{FieldsIdsMap, GlobalFieldsIdsMap, Index, InternalError, Result}; | ||||
|  | ||||
| @@ -133,10 +135,8 @@ pub struct IndexingContext< | ||||
|     'indexer, // covariant lifetime of objects that are borrowed  during the entire indexing operation | ||||
|     'index,   // covariant lifetime of the index | ||||
|     MSP, | ||||
|     SP, | ||||
| > where | ||||
|     MSP: Fn() -> bool + Sync, | ||||
|     SP: Fn(Progress) + Sync, | ||||
| { | ||||
|     pub index: &'index Index, | ||||
|     pub db_fields_ids_map: &'indexer FieldsIdsMap, | ||||
| @@ -144,7 +144,7 @@ pub struct IndexingContext< | ||||
|     pub doc_allocs: &'indexer ThreadLocal<FullySend<Cell<Bump>>>, | ||||
|     pub fields_ids_map_store: &'indexer ThreadLocal<FullySend<RefCell<GlobalFieldsIdsMap<'fid>>>>, | ||||
|     pub must_stop_processing: &'indexer MSP, | ||||
|     pub send_progress: &'indexer SP, | ||||
|     pub progress: &'indexer Progress, | ||||
| } | ||||
|  | ||||
| impl< | ||||
| @@ -152,18 +152,15 @@ impl< | ||||
|         'indexer, // covariant lifetime of objects that are borrowed  during the entire indexing operation | ||||
|         'index,   // covariant lifetime of the index | ||||
|         MSP, | ||||
|         SP, | ||||
|     > Copy | ||||
|     for IndexingContext< | ||||
|         'fid,     // invariant lifetime of fields ids map | ||||
|         'indexer, // covariant lifetime of objects that are borrowed  during the entire indexing operation | ||||
|         'index,   // covariant lifetime of the index | ||||
|         MSP, | ||||
|         SP, | ||||
|     > | ||||
| where | ||||
|     MSP: Fn() -> bool + Sync, | ||||
|     SP: Fn(Progress) + Sync, | ||||
| { | ||||
| } | ||||
|  | ||||
| @@ -172,18 +169,15 @@ impl< | ||||
|         'indexer, // covariant lifetime of objects that are borrowed  during the entire indexing operation | ||||
|         'index,   // covariant lifetime of the index | ||||
|         MSP, | ||||
|         SP, | ||||
|     > Clone | ||||
|     for IndexingContext< | ||||
|         'fid,     // invariant lifetime of fields ids map | ||||
|         'indexer, // covariant lifetime of objects that are borrowed  during the entire indexing operation | ||||
|         'index,   // covariant lifetime of the index | ||||
|         MSP, | ||||
|         SP, | ||||
|     > | ||||
| where | ||||
|     MSP: Fn() -> bool + Sync, | ||||
|     SP: Fn(Progress) + Sync, | ||||
| { | ||||
|     fn clone(&self) -> Self { | ||||
|         *self | ||||
| @@ -202,7 +196,6 @@ pub fn extract< | ||||
|     EX, | ||||
|     DC: DocumentChanges<'pl>, | ||||
|     MSP, | ||||
|     SP, | ||||
| >( | ||||
|     document_changes: &DC, | ||||
|     extractor: &EX, | ||||
| @@ -213,18 +206,18 @@ pub fn extract< | ||||
|         doc_allocs, | ||||
|         fields_ids_map_store, | ||||
|         must_stop_processing, | ||||
|         send_progress, | ||||
|     }: IndexingContext<'fid, 'indexer, 'index, MSP, SP>, | ||||
|         progress, | ||||
|     }: IndexingContext<'fid, 'indexer, 'index, MSP>, | ||||
|     extractor_allocs: &'extractor mut ThreadLocal<FullySend<Bump>>, | ||||
|     datastore: &'data ThreadLocal<EX::Data>, | ||||
|     step: Step, | ||||
|     step: IndexingStep, | ||||
| ) -> Result<()> | ||||
| where | ||||
|     EX: Extractor<'extractor>, | ||||
|     MSP: Fn() -> bool + Sync, | ||||
|     SP: Fn(Progress) + Sync, | ||||
| { | ||||
|     tracing::trace!("We are resetting the extractor allocators"); | ||||
|     progress.update_progress(step); | ||||
|     // Clean up and reuse the extractor allocs | ||||
|     for extractor_alloc in extractor_allocs.iter_mut() { | ||||
|         tracing::trace!("\tWith {} bytes reset", extractor_alloc.0.allocated_bytes()); | ||||
| @@ -232,9 +225,11 @@ where | ||||
|     } | ||||
|  | ||||
|     let total_documents = document_changes.len() as u32; | ||||
|     let (step, progress_step) = AtomicDocumentStep::new(total_documents); | ||||
|     progress.update_progress(progress_step); | ||||
|  | ||||
|     let pi = document_changes.iter(CHUNK_SIZE); | ||||
|     pi.enumerate().try_arc_for_each_try_init( | ||||
|     pi.try_arc_for_each_try_init( | ||||
|         || { | ||||
|             DocumentChangeContext::new( | ||||
|                 index, | ||||
| @@ -247,13 +242,10 @@ where | ||||
|                 move |index_alloc| extractor.init_data(index_alloc), | ||||
|             ) | ||||
|         }, | ||||
|         |context, (finished_documents, items)| { | ||||
|         |context, items| { | ||||
|             if (must_stop_processing)() { | ||||
|                 return Err(Arc::new(InternalError::AbortedIndexation.into())); | ||||
|             } | ||||
|             let finished_documents = (finished_documents * CHUNK_SIZE) as u32; | ||||
|  | ||||
|             (send_progress)(Progress::from_step_substep(step, finished_documents, total_documents)); | ||||
|  | ||||
|             // Clean up and reuse the document-specific allocator | ||||
|             context.doc_alloc.reset(); | ||||
| @@ -264,6 +256,7 @@ where | ||||
|             }); | ||||
|  | ||||
|             let res = extractor.process(changes, context).map_err(Arc::new); | ||||
|             step.fetch_add(items.as_ref().len() as u32, Ordering::Relaxed); | ||||
|  | ||||
|             // send back the doc_alloc in the pool | ||||
|             context.doc_allocs.get_or_default().0.set(std::mem::take(&mut context.doc_alloc)); | ||||
| @@ -271,32 +264,7 @@ where | ||||
|             res | ||||
|         }, | ||||
|     )?; | ||||
|  | ||||
|     (send_progress)(Progress::from_step_substep(step, total_documents, total_documents)); | ||||
|     step.store(total_documents, Ordering::Relaxed); | ||||
|  | ||||
|     Ok(()) | ||||
| } | ||||
|  | ||||
| pub struct Progress { | ||||
|     pub finished_steps: u16, | ||||
|     pub total_steps: u16, | ||||
|     pub step_name: &'static str, | ||||
|     pub finished_total_substep: Option<(u32, u32)>, | ||||
| } | ||||
|  | ||||
| impl Progress { | ||||
|     pub fn from_step(step: Step) -> Self { | ||||
|         Self { | ||||
|             finished_steps: step.finished_steps(), | ||||
|             total_steps: Step::total_steps(), | ||||
|             step_name: step.name(), | ||||
|             finished_total_substep: None, | ||||
|         } | ||||
|     } | ||||
|     pub fn from_step_substep(step: Step, finished_substep: u32, total_substep: u32) -> Self { | ||||
|         Self { | ||||
|             finished_total_substep: Some((finished_substep, total_substep)), | ||||
|             ..Progress::from_step(step) | ||||
|         } | ||||
|     } | ||||
| } | ||||
|   | ||||
| @@ -92,11 +92,12 @@ mod test { | ||||
|  | ||||
|     use crate::fields_ids_map::metadata::{FieldIdMapWithMetadata, MetadataBuilder}; | ||||
|     use crate::index::tests::TempIndex; | ||||
|     use crate::progress::Progress; | ||||
|     use crate::update::new::indexer::document_changes::{ | ||||
|         extract, DocumentChangeContext, Extractor, IndexingContext, | ||||
|     }; | ||||
|     use crate::update::new::indexer::DocumentDeletion; | ||||
|     use crate::update::new::steps::Step; | ||||
|     use crate::update::new::steps::IndexingStep; | ||||
|     use crate::update::new::thread_local::{MostlySend, ThreadLocal}; | ||||
|     use crate::update::new::DocumentChange; | ||||
|     use crate::DocumentId; | ||||
| @@ -164,7 +165,7 @@ mod test { | ||||
|             doc_allocs: &doc_allocs, | ||||
|             fields_ids_map_store: &fields_ids_map_store, | ||||
|             must_stop_processing: &(|| false), | ||||
|             send_progress: &(|_progress| {}), | ||||
|             progress: &Progress::default(), | ||||
|         }; | ||||
|  | ||||
|         for _ in 0..3 { | ||||
| @@ -176,7 +177,7 @@ mod test { | ||||
|                 context, | ||||
|                 &mut extractor_allocs, | ||||
|                 &datastore, | ||||
|                 Step::ExtractingDocuments, | ||||
|                 IndexingStep::ExtractingDocuments, | ||||
|             ) | ||||
|             .unwrap(); | ||||
|  | ||||
|   | ||||
| @@ -1,3 +1,5 @@ | ||||
| use std::sync::atomic::Ordering; | ||||
|  | ||||
| use bumpalo::collections::CollectIn; | ||||
| use bumpalo::Bump; | ||||
| use bumparaw_collections::RawMap; | ||||
| @@ -10,11 +12,12 @@ use serde_json::value::RawValue; | ||||
| use serde_json::Deserializer; | ||||
|  | ||||
| use super::super::document_change::DocumentChange; | ||||
| use super::document_changes::{DocumentChangeContext, DocumentChanges, Progress}; | ||||
| use super::document_changes::{DocumentChangeContext, DocumentChanges}; | ||||
| use super::retrieve_or_guess_primary_key; | ||||
| use crate::documents::PrimaryKey; | ||||
| use crate::progress::{AtomicPayloadStep, Progress}; | ||||
| use crate::update::new::document::Versions; | ||||
| use crate::update::new::steps::Step; | ||||
| use crate::update::new::steps::IndexingStep; | ||||
| use crate::update::new::thread_local::MostlySend; | ||||
| use crate::update::new::{Deletion, Insertion, Update}; | ||||
| use crate::update::{AvailableIds, IndexDocumentsMethod}; | ||||
| @@ -45,7 +48,7 @@ impl<'pl> DocumentOperation<'pl> { | ||||
|  | ||||
|     #[allow(clippy::too_many_arguments)] | ||||
|     #[tracing::instrument(level = "trace", skip_all, target = "indexing::document_operation")] | ||||
|     pub fn into_changes<MSP, SP>( | ||||
|     pub fn into_changes<MSP>( | ||||
|         self, | ||||
|         indexer: &'pl Bump, | ||||
|         index: &Index, | ||||
| @@ -53,12 +56,12 @@ impl<'pl> DocumentOperation<'pl> { | ||||
|         primary_key_from_op: Option<&'pl str>, | ||||
|         new_fields_ids_map: &mut FieldsIdsMap, | ||||
|         must_stop_processing: &MSP, | ||||
|         send_progress: &SP, | ||||
|         progress: Progress, | ||||
|     ) -> Result<(DocumentOperationChanges<'pl>, Vec<PayloadStats>, Option<PrimaryKey<'pl>>)> | ||||
|     where | ||||
|         MSP: Fn() -> bool, | ||||
|         SP: Fn(Progress), | ||||
|     { | ||||
|         progress.update_progress(IndexingStep::PreparingPayloads); | ||||
|         let Self { operations, method } = self; | ||||
|  | ||||
|         let documents_ids = index.documents_ids(rtxn)?; | ||||
| @@ -68,16 +71,14 @@ impl<'pl> DocumentOperation<'pl> { | ||||
|         let mut primary_key = None; | ||||
|  | ||||
|         let payload_count = operations.len(); | ||||
|         let (step, progress_step) = AtomicPayloadStep::new(payload_count as u32); | ||||
|         progress.update_progress(progress_step); | ||||
|  | ||||
|         for (payload_index, operation) in operations.into_iter().enumerate() { | ||||
|             if must_stop_processing() { | ||||
|                 return Err(InternalError::AbortedIndexation.into()); | ||||
|             } | ||||
|             send_progress(Progress::from_step_substep( | ||||
|                 Step::PreparingPayloads, | ||||
|                 payload_index as u32, | ||||
|                 payload_count as u32, | ||||
|             )); | ||||
|             step.store(payload_index as u32, Ordering::Relaxed); | ||||
|  | ||||
|             let mut bytes = 0; | ||||
|             let result = match operation { | ||||
| @@ -118,12 +119,7 @@ impl<'pl> DocumentOperation<'pl> { | ||||
|             }; | ||||
|             operations_stats.push(PayloadStats { document_count, bytes, error }); | ||||
|         } | ||||
|  | ||||
|         send_progress(Progress::from_step_substep( | ||||
|             Step::PreparingPayloads, | ||||
|             payload_count as u32, | ||||
|             payload_count as u32, | ||||
|         )); | ||||
|         step.store(payload_count as u32, Ordering::Relaxed); | ||||
|  | ||||
|         // TODO We must drain the HashMap into a Vec because rayon::hash_map::IntoIter: !Clone | ||||
|         let mut docids_version_offsets: bumpalo::collections::vec::Vec<_> = | ||||
|   | ||||
| @@ -5,7 +5,7 @@ use std::thread::{self, Builder}; | ||||
|  | ||||
| use big_s::S; | ||||
| use bumparaw_collections::RawMap; | ||||
| use document_changes::{extract, DocumentChanges, IndexingContext, Progress}; | ||||
| use document_changes::{extract, DocumentChanges, IndexingContext}; | ||||
| pub use document_deletion::DocumentDeletion; | ||||
| pub use document_operation::{DocumentOperation, PayloadStats}; | ||||
| use hashbrown::HashMap; | ||||
| @@ -22,7 +22,7 @@ use super::channel::*; | ||||
| use super::extract::*; | ||||
| use super::facet_search_builder::FacetSearchBuilder; | ||||
| use super::merger::FacetFieldIdsDelta; | ||||
| use super::steps::Step; | ||||
| use super::steps::IndexingStep; | ||||
| use super::thread_local::ThreadLocal; | ||||
| use super::word_fst_builder::{PrefixData, PrefixDelta, WordFstBuilder}; | ||||
| use super::words_prefix_docids::{ | ||||
| @@ -33,6 +33,7 @@ use crate::documents::{PrimaryKey, DEFAULT_PRIMARY_KEY}; | ||||
| use crate::facet::FacetType; | ||||
| use crate::fields_ids_map::metadata::{FieldIdMapWithMetadata, MetadataBuilder}; | ||||
| use crate::index::main_key::{WORDS_FST_KEY, WORDS_PREFIXES_FST_KEY}; | ||||
| use crate::progress::Progress; | ||||
| use crate::proximity::ProximityPrecision; | ||||
| use crate::update::del_add::DelAdd; | ||||
| use crate::update::new::extract::EmbeddingExtractor; | ||||
| @@ -60,7 +61,7 @@ mod update_by_function; | ||||
| /// | ||||
| /// TODO return stats | ||||
| #[allow(clippy::too_many_arguments)] // clippy: 😝 | ||||
| pub fn index<'pl, 'indexer, 'index, DC, MSP, SP>( | ||||
| pub fn index<'pl, 'indexer, 'index, DC, MSP>( | ||||
|     wtxn: &mut RwTxn, | ||||
|     index: &'index Index, | ||||
|     pool: &ThreadPoolNoAbort, | ||||
| @@ -71,12 +72,11 @@ pub fn index<'pl, 'indexer, 'index, DC, MSP, SP>( | ||||
|     document_changes: &DC, | ||||
|     embedders: EmbeddingConfigs, | ||||
|     must_stop_processing: &'indexer MSP, | ||||
|     send_progress: &'indexer SP, | ||||
|     progress: &'indexer Progress, | ||||
| ) -> Result<()> | ||||
| where | ||||
|     DC: DocumentChanges<'pl>, | ||||
|     MSP: Fn() -> bool + Sync, | ||||
|     SP: Fn(Progress) + Sync, | ||||
| { | ||||
|     let mut bbbuffers = Vec::new(); | ||||
|     let finished_extraction = AtomicBool::new(false); | ||||
| @@ -125,7 +125,7 @@ where | ||||
|         doc_allocs: &doc_allocs, | ||||
|         fields_ids_map_store: &fields_ids_map_store, | ||||
|         must_stop_processing, | ||||
|         send_progress, | ||||
|         progress, | ||||
|     }; | ||||
|  | ||||
|     let mut index_embeddings = index.embedding_configs(wtxn)?; | ||||
| @@ -159,7 +159,7 @@ where | ||||
|                         indexing_context, | ||||
|                         &mut extractor_allocs, | ||||
|                         &datastore, | ||||
|                         Step::ExtractingDocuments, | ||||
|                         IndexingStep::ExtractingDocuments, | ||||
|                     )?; | ||||
|                 } | ||||
|                 { | ||||
| @@ -191,7 +191,7 @@ where | ||||
|                                 indexing_context, | ||||
|                                 &mut extractor_allocs, | ||||
|                                 &extractor_sender.field_id_docid_facet_sender(), | ||||
|                                 Step::ExtractingFacets | ||||
|                                 IndexingStep::ExtractingFacets | ||||
|                             )? | ||||
|                     }; | ||||
|  | ||||
| @@ -224,7 +224,7 @@ where | ||||
|                             document_changes, | ||||
|                             indexing_context, | ||||
|                             &mut extractor_allocs, | ||||
|                             Step::ExtractingWords | ||||
|                             IndexingStep::ExtractingWords | ||||
|                         )? | ||||
|                     }; | ||||
|  | ||||
| @@ -302,7 +302,7 @@ where | ||||
|                             document_changes, | ||||
|                             indexing_context, | ||||
|                             &mut extractor_allocs, | ||||
|                             Step::ExtractingWordProximity, | ||||
|                             IndexingStep::ExtractingWordProximity, | ||||
|                         )? | ||||
|                     }; | ||||
|  | ||||
| @@ -338,7 +338,7 @@ where | ||||
|                             indexing_context, | ||||
|                             &mut extractor_allocs, | ||||
|                             &datastore, | ||||
|                             Step::ExtractingEmbeddings, | ||||
|                             IndexingStep::ExtractingEmbeddings, | ||||
|                         )?; | ||||
|                     } | ||||
|                     { | ||||
| @@ -371,7 +371,7 @@ where | ||||
|                             indexing_context, | ||||
|                             &mut extractor_allocs, | ||||
|                             &datastore, | ||||
|                             Step::WritingGeoPoints | ||||
|                             IndexingStep::WritingGeoPoints | ||||
|                         )?; | ||||
|                     } | ||||
|  | ||||
| @@ -383,9 +383,7 @@ where | ||||
|                         &indexing_context.must_stop_processing, | ||||
|                     )?; | ||||
|                 } | ||||
|  | ||||
|                 (indexing_context.send_progress)(Progress::from_step(Step::WritingToDatabase)); | ||||
|  | ||||
|                 indexing_context.progress.update_progress(IndexingStep::WritingToDatabase); | ||||
|                 finished_extraction.store(true, std::sync::atomic::Ordering::Relaxed); | ||||
|  | ||||
|                 Result::Ok((facet_field_ids_delta, index_embeddings)) | ||||
| @@ -485,7 +483,7 @@ where | ||||
|             )?; | ||||
|         } | ||||
|  | ||||
|         (indexing_context.send_progress)(Progress::from_step(Step::WaitingForExtractors)); | ||||
|         indexing_context.progress.update_progress(IndexingStep::WaitingForExtractors); | ||||
|  | ||||
|         let (facet_field_ids_delta, index_embeddings) = extractor_handle.join().unwrap()?; | ||||
|  | ||||
| @@ -498,10 +496,7 @@ where | ||||
|                 break 'vectors; | ||||
|             } | ||||
|  | ||||
|             (indexing_context.send_progress)(Progress::from_step( | ||||
|                 Step::WritingEmbeddingsToDatabase, | ||||
|             )); | ||||
|  | ||||
|             indexing_context.progress.update_progress(IndexingStep::WritingEmbeddingsToDatabase); | ||||
|             let mut rng = rand::rngs::StdRng::seed_from_u64(42); | ||||
|             for (_index, (_embedder_name, _embedder, writer, dimensions)) in &mut arroy_writers { | ||||
|                 let dimensions = *dimensions; | ||||
| @@ -517,21 +512,19 @@ where | ||||
|             index.put_embedding_configs(wtxn, index_embeddings)?; | ||||
|         } | ||||
|  | ||||
|         (indexing_context.send_progress)(Progress::from_step(Step::PostProcessingFacets)); | ||||
|  | ||||
|         indexing_context.progress.update_progress(IndexingStep::PostProcessingFacets); | ||||
|         if index.facet_search(wtxn)? { | ||||
|             compute_facet_search_database(index, wtxn, global_fields_ids_map)?; | ||||
|         } | ||||
|  | ||||
|         compute_facet_level_database(index, wtxn, facet_field_ids_delta)?; | ||||
|  | ||||
|         (indexing_context.send_progress)(Progress::from_step(Step::PostProcessingWords)); | ||||
|  | ||||
|         indexing_context.progress.update_progress(IndexingStep::PostProcessingWords); | ||||
|         if let Some(prefix_delta) = compute_word_fst(index, wtxn)? { | ||||
|             compute_prefix_database(index, wtxn, prefix_delta, grenad_parameters)?; | ||||
|         } | ||||
|  | ||||
|         (indexing_context.send_progress)(Progress::from_step(Step::Finalizing)); | ||||
|         indexing_context.progress.update_progress(IndexingStep::Finalizing); | ||||
|  | ||||
|         Ok(()) as Result<_> | ||||
|     })?; | ||||
|   | ||||
| @@ -1,8 +1,12 @@ | ||||
| use std::borrow::Cow; | ||||
|  | ||||
| use enum_iterator::Sequence; | ||||
|  | ||||
| use crate::progress::Step; | ||||
|  | ||||
| #[derive(Debug, Clone, Copy, PartialEq, Eq, Sequence)] | ||||
| #[repr(u16)] | ||||
| pub enum Step { | ||||
| #[repr(u8)] | ||||
| pub enum IndexingStep { | ||||
|     PreparingPayloads, | ||||
|     ExtractingDocuments, | ||||
|     ExtractingFacets, | ||||
| @@ -18,30 +22,31 @@ pub enum Step { | ||||
|     Finalizing, | ||||
| } | ||||
|  | ||||
| impl Step { | ||||
|     pub fn name(&self) -> &'static str { | ||||
| impl Step for IndexingStep { | ||||
|     fn name(&self) -> Cow<'static, str> { | ||||
|         match self { | ||||
|             Step::PreparingPayloads => "preparing update file", | ||||
|             Step::ExtractingDocuments => "extracting documents", | ||||
|             Step::ExtractingFacets => "extracting facets", | ||||
|             Step::ExtractingWords => "extracting words", | ||||
|             Step::ExtractingWordProximity => "extracting word proximity", | ||||
|             Step::ExtractingEmbeddings => "extracting embeddings", | ||||
|             Step::WritingGeoPoints => "writing geo points", | ||||
|             Step::WritingToDatabase => "writing to database", | ||||
|             Step::WaitingForExtractors => "waiting for extractors", | ||||
|             Step::WritingEmbeddingsToDatabase => "writing embeddings to database", | ||||
|             Step::PostProcessingFacets => "post-processing facets", | ||||
|             Step::PostProcessingWords => "post-processing words", | ||||
|             Step::Finalizing => "finalizing", | ||||
|             IndexingStep::PreparingPayloads => "preparing update file", | ||||
|             IndexingStep::ExtractingDocuments => "extracting documents", | ||||
|             IndexingStep::ExtractingFacets => "extracting facets", | ||||
|             IndexingStep::ExtractingWords => "extracting words", | ||||
|             IndexingStep::ExtractingWordProximity => "extracting word proximity", | ||||
|             IndexingStep::ExtractingEmbeddings => "extracting embeddings", | ||||
|             IndexingStep::WritingGeoPoints => "writing geo points", | ||||
|             IndexingStep::WritingToDatabase => "writing to database", | ||||
|             IndexingStep::WaitingForExtractors => "waiting for extractors", | ||||
|             IndexingStep::WritingEmbeddingsToDatabase => "writing embeddings to database", | ||||
|             IndexingStep::PostProcessingFacets => "post-processing facets", | ||||
|             IndexingStep::PostProcessingWords => "post-processing words", | ||||
|             IndexingStep::Finalizing => "finalizing", | ||||
|         } | ||||
|         .into() | ||||
|     } | ||||
|  | ||||
|     pub fn finished_steps(self) -> u16 { | ||||
|         self as u16 | ||||
|     fn current(&self) -> u32 { | ||||
|         *self as u32 | ||||
|     } | ||||
|  | ||||
|     pub const fn total_steps() -> u16 { | ||||
|         Self::CARDINALITY as u16 | ||||
|     fn total(&self) -> u32 { | ||||
|         Self::CARDINALITY as u32 | ||||
|     } | ||||
| } | ||||
|   | ||||
| @@ -3,6 +3,7 @@ use bumpalo::Bump; | ||||
| use heed::EnvOpenOptions; | ||||
| use maplit::hashset; | ||||
| use milli::documents::mmap_from_objects; | ||||
| use milli::progress::Progress; | ||||
| use milli::update::new::indexer; | ||||
| use milli::update::{IndexDocumentsMethod, IndexerConfig, Settings}; | ||||
| use milli::vector::EmbeddingConfigs; | ||||
| @@ -57,7 +58,7 @@ fn test_facet_distribution_with_no_facet_values() { | ||||
|             None, | ||||
|             &mut new_fields_ids_map, | ||||
|             &|| false, | ||||
|             &|_progress| (), | ||||
|             Progress::default(), | ||||
|         ) | ||||
|         .unwrap(); | ||||
|  | ||||
| @@ -72,7 +73,7 @@ fn test_facet_distribution_with_no_facet_values() { | ||||
|         &document_changes, | ||||
|         embedders, | ||||
|         &|| false, | ||||
|         &|_| (), | ||||
|         &Progress::default(), | ||||
|     ) | ||||
|     .unwrap(); | ||||
|  | ||||
|   | ||||
| @@ -7,6 +7,7 @@ use bumpalo::Bump; | ||||
| use either::{Either, Left, Right}; | ||||
| use heed::EnvOpenOptions; | ||||
| use maplit::{btreemap, hashset}; | ||||
| use milli::progress::Progress; | ||||
| use milli::update::new::indexer; | ||||
| use milli::update::{IndexDocumentsMethod, IndexerConfig, Settings}; | ||||
| use milli::vector::EmbeddingConfigs; | ||||
| @@ -90,7 +91,7 @@ pub fn setup_search_index_with_criteria(criteria: &[Criterion]) -> Index { | ||||
|             None, | ||||
|             &mut new_fields_ids_map, | ||||
|             &|| false, | ||||
|             &|_progress| (), | ||||
|             Progress::default(), | ||||
|         ) | ||||
|         .unwrap(); | ||||
|  | ||||
| @@ -109,7 +110,7 @@ pub fn setup_search_index_with_criteria(criteria: &[Criterion]) -> Index { | ||||
|         &document_changes, | ||||
|         embedders, | ||||
|         &|| false, | ||||
|         &|_| (), | ||||
|         &Progress::default(), | ||||
|     ) | ||||
|     .unwrap(); | ||||
|  | ||||
|   | ||||
| @@ -5,6 +5,7 @@ use bumpalo::Bump; | ||||
| use heed::EnvOpenOptions; | ||||
| use itertools::Itertools; | ||||
| use maplit::hashset; | ||||
| use milli::progress::Progress; | ||||
| use milli::update::new::indexer; | ||||
| use milli::update::{IndexDocumentsMethod, IndexerConfig, Settings}; | ||||
| use milli::vector::EmbeddingConfigs; | ||||
| @@ -326,7 +327,7 @@ fn criteria_ascdesc() { | ||||
|             None, | ||||
|             &mut new_fields_ids_map, | ||||
|             &|| false, | ||||
|             &|_progress| (), | ||||
|             Progress::default(), | ||||
|         ) | ||||
|         .unwrap(); | ||||
|  | ||||
| @@ -341,7 +342,7 @@ fn criteria_ascdesc() { | ||||
|         &document_changes, | ||||
|         embedders, | ||||
|         &|| false, | ||||
|         &|_| (), | ||||
|         &Progress::default(), | ||||
|     ) | ||||
|     .unwrap(); | ||||
|  | ||||
|   | ||||
| @@ -3,6 +3,7 @@ use std::collections::BTreeSet; | ||||
| use bumpalo::Bump; | ||||
| use heed::EnvOpenOptions; | ||||
| use milli::documents::mmap_from_objects; | ||||
| use milli::progress::Progress; | ||||
| use milli::update::new::indexer; | ||||
| use milli::update::{IndexDocumentsMethod, IndexerConfig, Settings}; | ||||
| use milli::vector::EmbeddingConfigs; | ||||
| @@ -135,7 +136,7 @@ fn test_typo_disabled_on_word() { | ||||
|             None, | ||||
|             &mut new_fields_ids_map, | ||||
|             &|| false, | ||||
|             &|_progress| (), | ||||
|             Progress::default(), | ||||
|         ) | ||||
|         .unwrap(); | ||||
|  | ||||
| @@ -150,7 +151,7 @@ fn test_typo_disabled_on_word() { | ||||
|         &document_changes, | ||||
|         embedders, | ||||
|         &|| false, | ||||
|         &|_| (), | ||||
|         &Progress::default(), | ||||
|     ) | ||||
|     .unwrap(); | ||||
|  | ||||
|   | ||||
		Reference in New Issue
	
	Block a user