mirror of
				https://github.com/meilisearch/meilisearch.git
				synced 2025-10-30 23:46:28 +00:00 
			
		
		
		
	inital implementation of the progress
This commit is contained in:
		| @@ -8,6 +8,7 @@ use bumpalo::Bump; | ||||
| use criterion::{criterion_group, criterion_main, Criterion}; | ||||
| use milli::documents::PrimaryKey; | ||||
| use milli::heed::{EnvOpenOptions, RwTxn}; | ||||
| use milli::progress::Progress; | ||||
| use milli::update::new::indexer; | ||||
| use milli::update::{IndexDocumentsMethod, IndexerConfig, Settings}; | ||||
| use milli::vector::EmbeddingConfigs; | ||||
| @@ -151,7 +152,7 @@ fn indexing_songs_default(c: &mut Criterion) { | ||||
|                         None, | ||||
|                         &mut new_fields_ids_map, | ||||
|                         &|| false, | ||||
|                         &|_progress| (), | ||||
|                         Progress::default(), | ||||
|                     ) | ||||
|                     .unwrap(); | ||||
|  | ||||
| @@ -166,7 +167,7 @@ fn indexing_songs_default(c: &mut Criterion) { | ||||
|                     &document_changes, | ||||
|                     EmbeddingConfigs::default(), | ||||
|                     &|| false, | ||||
|                     &|_| (), | ||||
|                     &Progress::default(), | ||||
|                 ) | ||||
|                 .unwrap(); | ||||
|  | ||||
| @@ -218,7 +219,7 @@ fn reindexing_songs_default(c: &mut Criterion) { | ||||
|                         None, | ||||
|                         &mut new_fields_ids_map, | ||||
|                         &|| false, | ||||
|                         &|_progress| (), | ||||
|                         Progress::default(), | ||||
|                     ) | ||||
|                     .unwrap(); | ||||
|  | ||||
| @@ -233,7 +234,7 @@ fn reindexing_songs_default(c: &mut Criterion) { | ||||
|                     &document_changes, | ||||
|                     EmbeddingConfigs::default(), | ||||
|                     &|| false, | ||||
|                     &|_| (), | ||||
|                     &Progress::default(), | ||||
|                 ) | ||||
|                 .unwrap(); | ||||
|  | ||||
| @@ -263,7 +264,7 @@ fn reindexing_songs_default(c: &mut Criterion) { | ||||
|                         None, | ||||
|                         &mut new_fields_ids_map, | ||||
|                         &|| false, | ||||
|                         &|_progress| (), | ||||
|                         Progress::default(), | ||||
|                     ) | ||||
|                     .unwrap(); | ||||
|  | ||||
| @@ -278,7 +279,7 @@ fn reindexing_songs_default(c: &mut Criterion) { | ||||
|                     &document_changes, | ||||
|                     EmbeddingConfigs::default(), | ||||
|                     &|| false, | ||||
|                     &|_| (), | ||||
|                     &Progress::default(), | ||||
|                 ) | ||||
|                 .unwrap(); | ||||
|  | ||||
| @@ -332,7 +333,7 @@ fn deleting_songs_in_batches_default(c: &mut Criterion) { | ||||
|                         None, | ||||
|                         &mut new_fields_ids_map, | ||||
|                         &|| false, | ||||
|                         &|_progress| (), | ||||
|                         Progress::default(), | ||||
|                     ) | ||||
|                     .unwrap(); | ||||
|  | ||||
| @@ -347,7 +348,7 @@ fn deleting_songs_in_batches_default(c: &mut Criterion) { | ||||
|                     &document_changes, | ||||
|                     EmbeddingConfigs::default(), | ||||
|                     &|| false, | ||||
|                     &|_| (), | ||||
|                     &Progress::default(), | ||||
|                 ) | ||||
|                 .unwrap(); | ||||
|  | ||||
| @@ -409,7 +410,7 @@ fn indexing_songs_in_three_batches_default(c: &mut Criterion) { | ||||
|                         None, | ||||
|                         &mut new_fields_ids_map, | ||||
|                         &|| false, | ||||
|                         &|_progress| (), | ||||
|                         Progress::default(), | ||||
|                     ) | ||||
|                     .unwrap(); | ||||
|  | ||||
| @@ -424,7 +425,7 @@ fn indexing_songs_in_three_batches_default(c: &mut Criterion) { | ||||
|                     &document_changes, | ||||
|                     EmbeddingConfigs::default(), | ||||
|                     &|| false, | ||||
|                     &|_| (), | ||||
|                     &Progress::default(), | ||||
|                 ) | ||||
|                 .unwrap(); | ||||
|  | ||||
| @@ -454,7 +455,7 @@ fn indexing_songs_in_three_batches_default(c: &mut Criterion) { | ||||
|                         None, | ||||
|                         &mut new_fields_ids_map, | ||||
|                         &|| false, | ||||
|                         &|_progress| (), | ||||
|                         Progress::default(), | ||||
|                     ) | ||||
|                     .unwrap(); | ||||
|  | ||||
| @@ -469,7 +470,7 @@ fn indexing_songs_in_three_batches_default(c: &mut Criterion) { | ||||
|                     &document_changes, | ||||
|                     EmbeddingConfigs::default(), | ||||
|                     &|| false, | ||||
|                     &|_| (), | ||||
|                     &Progress::default(), | ||||
|                 ) | ||||
|                 .unwrap(); | ||||
|  | ||||
| @@ -495,7 +496,7 @@ fn indexing_songs_in_three_batches_default(c: &mut Criterion) { | ||||
|                         None, | ||||
|                         &mut new_fields_ids_map, | ||||
|                         &|| false, | ||||
|                         &|_progress| (), | ||||
|                         Progress::default(), | ||||
|                     ) | ||||
|                     .unwrap(); | ||||
|  | ||||
| @@ -510,7 +511,7 @@ fn indexing_songs_in_three_batches_default(c: &mut Criterion) { | ||||
|                     &document_changes, | ||||
|                     EmbeddingConfigs::default(), | ||||
|                     &|| false, | ||||
|                     &|_| (), | ||||
|                     &Progress::default(), | ||||
|                 ) | ||||
|                 .unwrap(); | ||||
|  | ||||
| @@ -563,7 +564,7 @@ fn indexing_songs_without_faceted_numbers(c: &mut Criterion) { | ||||
|                         None, | ||||
|                         &mut new_fields_ids_map, | ||||
|                         &|| false, | ||||
|                         &|_progress| (), | ||||
|                         Progress::default(), | ||||
|                     ) | ||||
|                     .unwrap(); | ||||
|  | ||||
| @@ -578,7 +579,7 @@ fn indexing_songs_without_faceted_numbers(c: &mut Criterion) { | ||||
|                     &document_changes, | ||||
|                     EmbeddingConfigs::default(), | ||||
|                     &|| false, | ||||
|                     &|_| (), | ||||
|                     &Progress::default(), | ||||
|                 ) | ||||
|                 .unwrap(); | ||||
|  | ||||
| @@ -630,7 +631,7 @@ fn indexing_songs_without_faceted_fields(c: &mut Criterion) { | ||||
|                         None, | ||||
|                         &mut new_fields_ids_map, | ||||
|                         &|| false, | ||||
|                         &|_progress| (), | ||||
|                         Progress::default(), | ||||
|                     ) | ||||
|                     .unwrap(); | ||||
|  | ||||
| @@ -645,7 +646,7 @@ fn indexing_songs_without_faceted_fields(c: &mut Criterion) { | ||||
|                     &document_changes, | ||||
|                     EmbeddingConfigs::default(), | ||||
|                     &|| false, | ||||
|                     &|_| (), | ||||
|                     &Progress::default(), | ||||
|                 ) | ||||
|                 .unwrap(); | ||||
|  | ||||
| @@ -697,7 +698,7 @@ fn indexing_wiki(c: &mut Criterion) { | ||||
|                         None, | ||||
|                         &mut new_fields_ids_map, | ||||
|                         &|| false, | ||||
|                         &|_progress| (), | ||||
|                         Progress::default(), | ||||
|                     ) | ||||
|                     .unwrap(); | ||||
|  | ||||
| @@ -712,7 +713,7 @@ fn indexing_wiki(c: &mut Criterion) { | ||||
|                     &document_changes, | ||||
|                     EmbeddingConfigs::default(), | ||||
|                     &|| false, | ||||
|                     &|_| (), | ||||
|                     &Progress::default(), | ||||
|                 ) | ||||
|                 .unwrap(); | ||||
|  | ||||
| @@ -763,7 +764,7 @@ fn reindexing_wiki(c: &mut Criterion) { | ||||
|                         None, | ||||
|                         &mut new_fields_ids_map, | ||||
|                         &|| false, | ||||
|                         &|_progress| (), | ||||
|                         Progress::default(), | ||||
|                     ) | ||||
|                     .unwrap(); | ||||
|  | ||||
| @@ -778,7 +779,7 @@ fn reindexing_wiki(c: &mut Criterion) { | ||||
|                     &document_changes, | ||||
|                     EmbeddingConfigs::default(), | ||||
|                     &|| false, | ||||
|                     &|_| (), | ||||
|                     &Progress::default(), | ||||
|                 ) | ||||
|                 .unwrap(); | ||||
|  | ||||
| @@ -808,7 +809,7 @@ fn reindexing_wiki(c: &mut Criterion) { | ||||
|                         None, | ||||
|                         &mut new_fields_ids_map, | ||||
|                         &|| false, | ||||
|                         &|_progress| (), | ||||
|                         Progress::default(), | ||||
|                     ) | ||||
|                     .unwrap(); | ||||
|  | ||||
| @@ -823,7 +824,7 @@ fn reindexing_wiki(c: &mut Criterion) { | ||||
|                     &document_changes, | ||||
|                     EmbeddingConfigs::default(), | ||||
|                     &|| false, | ||||
|                     &|_| (), | ||||
|                     &Progress::default(), | ||||
|                 ) | ||||
|                 .unwrap(); | ||||
|  | ||||
| @@ -876,7 +877,7 @@ fn deleting_wiki_in_batches_default(c: &mut Criterion) { | ||||
|                         None, | ||||
|                         &mut new_fields_ids_map, | ||||
|                         &|| false, | ||||
|                         &|_progress| (), | ||||
|                         Progress::default(), | ||||
|                     ) | ||||
|                     .unwrap(); | ||||
|  | ||||
| @@ -891,7 +892,7 @@ fn deleting_wiki_in_batches_default(c: &mut Criterion) { | ||||
|                     &document_changes, | ||||
|                     EmbeddingConfigs::default(), | ||||
|                     &|| false, | ||||
|                     &|_| (), | ||||
|                     &Progress::default(), | ||||
|                 ) | ||||
|                 .unwrap(); | ||||
|  | ||||
| @@ -953,7 +954,7 @@ fn indexing_wiki_in_three_batches(c: &mut Criterion) { | ||||
|                         None, | ||||
|                         &mut new_fields_ids_map, | ||||
|                         &|| false, | ||||
|                         &|_progress| (), | ||||
|                         Progress::default(), | ||||
|                     ) | ||||
|                     .unwrap(); | ||||
|  | ||||
| @@ -968,7 +969,7 @@ fn indexing_wiki_in_three_batches(c: &mut Criterion) { | ||||
|                     &document_changes, | ||||
|                     EmbeddingConfigs::default(), | ||||
|                     &|| false, | ||||
|                     &|_| (), | ||||
|                     &Progress::default(), | ||||
|                 ) | ||||
|                 .unwrap(); | ||||
|  | ||||
| @@ -999,7 +1000,7 @@ fn indexing_wiki_in_three_batches(c: &mut Criterion) { | ||||
|                         None, | ||||
|                         &mut new_fields_ids_map, | ||||
|                         &|| false, | ||||
|                         &|_progress| (), | ||||
|                         Progress::default(), | ||||
|                     ) | ||||
|                     .unwrap(); | ||||
|  | ||||
| @@ -1014,7 +1015,7 @@ fn indexing_wiki_in_three_batches(c: &mut Criterion) { | ||||
|                     &document_changes, | ||||
|                     EmbeddingConfigs::default(), | ||||
|                     &|| false, | ||||
|                     &|_| (), | ||||
|                     &Progress::default(), | ||||
|                 ) | ||||
|                 .unwrap(); | ||||
|  | ||||
| @@ -1041,7 +1042,7 @@ fn indexing_wiki_in_three_batches(c: &mut Criterion) { | ||||
|                         None, | ||||
|                         &mut new_fields_ids_map, | ||||
|                         &|| false, | ||||
|                         &|_progress| (), | ||||
|                         Progress::default(), | ||||
|                     ) | ||||
|                     .unwrap(); | ||||
|  | ||||
| @@ -1056,7 +1057,7 @@ fn indexing_wiki_in_three_batches(c: &mut Criterion) { | ||||
|                     &document_changes, | ||||
|                     EmbeddingConfigs::default(), | ||||
|                     &|| false, | ||||
|                     &|_| (), | ||||
|                     &Progress::default(), | ||||
|                 ) | ||||
|                 .unwrap(); | ||||
|  | ||||
| @@ -1108,7 +1109,7 @@ fn indexing_movies_default(c: &mut Criterion) { | ||||
|                         None, | ||||
|                         &mut new_fields_ids_map, | ||||
|                         &|| false, | ||||
|                         &|_progress| (), | ||||
|                         Progress::default(), | ||||
|                     ) | ||||
|                     .unwrap(); | ||||
|  | ||||
| @@ -1123,7 +1124,7 @@ fn indexing_movies_default(c: &mut Criterion) { | ||||
|                     &document_changes, | ||||
|                     EmbeddingConfigs::default(), | ||||
|                     &|| false, | ||||
|                     &|_| (), | ||||
|                     &Progress::default(), | ||||
|                 ) | ||||
|                 .unwrap(); | ||||
|  | ||||
| @@ -1174,7 +1175,7 @@ fn reindexing_movies_default(c: &mut Criterion) { | ||||
|                         None, | ||||
|                         &mut new_fields_ids_map, | ||||
|                         &|| false, | ||||
|                         &|_progress| (), | ||||
|                         Progress::default(), | ||||
|                     ) | ||||
|                     .unwrap(); | ||||
|  | ||||
| @@ -1189,7 +1190,7 @@ fn reindexing_movies_default(c: &mut Criterion) { | ||||
|                     &document_changes, | ||||
|                     EmbeddingConfigs::default(), | ||||
|                     &|| false, | ||||
|                     &|_| (), | ||||
|                     &Progress::default(), | ||||
|                 ) | ||||
|                 .unwrap(); | ||||
|  | ||||
| @@ -1219,7 +1220,7 @@ fn reindexing_movies_default(c: &mut Criterion) { | ||||
|                         None, | ||||
|                         &mut new_fields_ids_map, | ||||
|                         &|| false, | ||||
|                         &|_progress| (), | ||||
|                         Progress::default(), | ||||
|                     ) | ||||
|                     .unwrap(); | ||||
|  | ||||
| @@ -1234,7 +1235,7 @@ fn reindexing_movies_default(c: &mut Criterion) { | ||||
|                     &document_changes, | ||||
|                     EmbeddingConfigs::default(), | ||||
|                     &|| false, | ||||
|                     &|_| (), | ||||
|                     &Progress::default(), | ||||
|                 ) | ||||
|                 .unwrap(); | ||||
|  | ||||
| @@ -1287,7 +1288,7 @@ fn deleting_movies_in_batches_default(c: &mut Criterion) { | ||||
|                         None, | ||||
|                         &mut new_fields_ids_map, | ||||
|                         &|| false, | ||||
|                         &|_progress| (), | ||||
|                         Progress::default(), | ||||
|                     ) | ||||
|                     .unwrap(); | ||||
|  | ||||
| @@ -1302,7 +1303,7 @@ fn deleting_movies_in_batches_default(c: &mut Criterion) { | ||||
|                     &document_changes, | ||||
|                     EmbeddingConfigs::default(), | ||||
|                     &|| false, | ||||
|                     &|_| (), | ||||
|                     &Progress::default(), | ||||
|                 ) | ||||
|                 .unwrap(); | ||||
|  | ||||
| @@ -1350,7 +1351,7 @@ fn delete_documents_from_ids(index: Index, document_ids_to_delete: Vec<RoaringBi | ||||
|             &document_changes, | ||||
|             EmbeddingConfigs::default(), | ||||
|             &|| false, | ||||
|             &|_| (), | ||||
|             &Progress::default(), | ||||
|         ) | ||||
|         .unwrap(); | ||||
|  | ||||
| @@ -1400,7 +1401,7 @@ fn indexing_movies_in_three_batches(c: &mut Criterion) { | ||||
|                         None, | ||||
|                         &mut new_fields_ids_map, | ||||
|                         &|| false, | ||||
|                         &|_progress| (), | ||||
|                         Progress::default(), | ||||
|                     ) | ||||
|                     .unwrap(); | ||||
|  | ||||
| @@ -1415,7 +1416,7 @@ fn indexing_movies_in_three_batches(c: &mut Criterion) { | ||||
|                     &document_changes, | ||||
|                     EmbeddingConfigs::default(), | ||||
|                     &|| false, | ||||
|                     &|_| (), | ||||
|                     &Progress::default(), | ||||
|                 ) | ||||
|                 .unwrap(); | ||||
|  | ||||
| @@ -1445,7 +1446,7 @@ fn indexing_movies_in_three_batches(c: &mut Criterion) { | ||||
|                         None, | ||||
|                         &mut new_fields_ids_map, | ||||
|                         &|| false, | ||||
|                         &|_progress| (), | ||||
|                         Progress::default(), | ||||
|                     ) | ||||
|                     .unwrap(); | ||||
|  | ||||
| @@ -1460,7 +1461,7 @@ fn indexing_movies_in_three_batches(c: &mut Criterion) { | ||||
|                     &document_changes, | ||||
|                     EmbeddingConfigs::default(), | ||||
|                     &|| false, | ||||
|                     &|_| (), | ||||
|                     &Progress::default(), | ||||
|                 ) | ||||
|                 .unwrap(); | ||||
|  | ||||
| @@ -1486,7 +1487,7 @@ fn indexing_movies_in_three_batches(c: &mut Criterion) { | ||||
|                         None, | ||||
|                         &mut new_fields_ids_map, | ||||
|                         &|| false, | ||||
|                         &|_progress| (), | ||||
|                         Progress::default(), | ||||
|                     ) | ||||
|                     .unwrap(); | ||||
|  | ||||
| @@ -1501,7 +1502,7 @@ fn indexing_movies_in_three_batches(c: &mut Criterion) { | ||||
|                     &document_changes, | ||||
|                     EmbeddingConfigs::default(), | ||||
|                     &|| false, | ||||
|                     &|_| (), | ||||
|                     &Progress::default(), | ||||
|                 ) | ||||
|                 .unwrap(); | ||||
|  | ||||
| @@ -1576,7 +1577,7 @@ fn indexing_nested_movies_default(c: &mut Criterion) { | ||||
|                         None, | ||||
|                         &mut new_fields_ids_map, | ||||
|                         &|| false, | ||||
|                         &|_progress| (), | ||||
|                         Progress::default(), | ||||
|                     ) | ||||
|                     .unwrap(); | ||||
|  | ||||
| @@ -1591,7 +1592,7 @@ fn indexing_nested_movies_default(c: &mut Criterion) { | ||||
|                     &document_changes, | ||||
|                     EmbeddingConfigs::default(), | ||||
|                     &|| false, | ||||
|                     &|_| (), | ||||
|                     &Progress::default(), | ||||
|                 ) | ||||
|                 .unwrap(); | ||||
|  | ||||
| @@ -1667,7 +1668,7 @@ fn deleting_nested_movies_in_batches_default(c: &mut Criterion) { | ||||
|                         None, | ||||
|                         &mut new_fields_ids_map, | ||||
|                         &|| false, | ||||
|                         &|_progress| (), | ||||
|                         Progress::default(), | ||||
|                     ) | ||||
|                     .unwrap(); | ||||
|  | ||||
| @@ -1682,7 +1683,7 @@ fn deleting_nested_movies_in_batches_default(c: &mut Criterion) { | ||||
|                     &document_changes, | ||||
|                     EmbeddingConfigs::default(), | ||||
|                     &|| false, | ||||
|                     &|_| (), | ||||
|                     &Progress::default(), | ||||
|                 ) | ||||
|                 .unwrap(); | ||||
|  | ||||
| @@ -1750,7 +1751,7 @@ fn indexing_nested_movies_without_faceted_fields(c: &mut Criterion) { | ||||
|                         None, | ||||
|                         &mut new_fields_ids_map, | ||||
|                         &|| false, | ||||
|                         &|_progress| (), | ||||
|                         Progress::default(), | ||||
|                     ) | ||||
|                     .unwrap(); | ||||
|  | ||||
| @@ -1765,7 +1766,7 @@ fn indexing_nested_movies_without_faceted_fields(c: &mut Criterion) { | ||||
|                     &document_changes, | ||||
|                     EmbeddingConfigs::default(), | ||||
|                     &|| false, | ||||
|                     &|_| (), | ||||
|                     &Progress::default(), | ||||
|                 ) | ||||
|                 .unwrap(); | ||||
|  | ||||
| @@ -1817,7 +1818,7 @@ fn indexing_geo(c: &mut Criterion) { | ||||
|                         None, | ||||
|                         &mut new_fields_ids_map, | ||||
|                         &|| false, | ||||
|                         &|_progress| (), | ||||
|                         Progress::default(), | ||||
|                     ) | ||||
|                     .unwrap(); | ||||
|  | ||||
| @@ -1832,7 +1833,7 @@ fn indexing_geo(c: &mut Criterion) { | ||||
|                     &document_changes, | ||||
|                     EmbeddingConfigs::default(), | ||||
|                     &|| false, | ||||
|                     &|_| (), | ||||
|                     &Progress::default(), | ||||
|                 ) | ||||
|                 .unwrap(); | ||||
|  | ||||
| @@ -1883,7 +1884,7 @@ fn reindexing_geo(c: &mut Criterion) { | ||||
|                         None, | ||||
|                         &mut new_fields_ids_map, | ||||
|                         &|| false, | ||||
|                         &|_progress| (), | ||||
|                         Progress::default(), | ||||
|                     ) | ||||
|                     .unwrap(); | ||||
|  | ||||
| @@ -1898,7 +1899,7 @@ fn reindexing_geo(c: &mut Criterion) { | ||||
|                     &document_changes, | ||||
|                     EmbeddingConfigs::default(), | ||||
|                     &|| false, | ||||
|                     &|_| (), | ||||
|                     &Progress::default(), | ||||
|                 ) | ||||
|                 .unwrap(); | ||||
|  | ||||
| @@ -1928,7 +1929,7 @@ fn reindexing_geo(c: &mut Criterion) { | ||||
|                         None, | ||||
|                         &mut new_fields_ids_map, | ||||
|                         &|| false, | ||||
|                         &|_progress| (), | ||||
|                         Progress::default(), | ||||
|                     ) | ||||
|                     .unwrap(); | ||||
|  | ||||
| @@ -1943,7 +1944,7 @@ fn reindexing_geo(c: &mut Criterion) { | ||||
|                     &document_changes, | ||||
|                     EmbeddingConfigs::default(), | ||||
|                     &|| false, | ||||
|                     &|_| (), | ||||
|                     &Progress::default(), | ||||
|                 ) | ||||
|                 .unwrap(); | ||||
|  | ||||
| @@ -1996,7 +1997,7 @@ fn deleting_geo_in_batches_default(c: &mut Criterion) { | ||||
|                         None, | ||||
|                         &mut new_fields_ids_map, | ||||
|                         &|| false, | ||||
|                         &|_progress| (), | ||||
|                         Progress::default(), | ||||
|                     ) | ||||
|                     .unwrap(); | ||||
|  | ||||
| @@ -2011,7 +2012,7 @@ fn deleting_geo_in_batches_default(c: &mut Criterion) { | ||||
|                     &document_changes, | ||||
|                     EmbeddingConfigs::default(), | ||||
|                     &|| false, | ||||
|                     &|_| (), | ||||
|                     &Progress::default(), | ||||
|                 ) | ||||
|                 .unwrap(); | ||||
|  | ||||
|   | ||||
| @@ -10,6 +10,7 @@ use bumpalo::Bump; | ||||
| use criterion::BenchmarkId; | ||||
| use memmap2::Mmap; | ||||
| use milli::heed::EnvOpenOptions; | ||||
| use milli::progress::Progress; | ||||
| use milli::update::new::indexer; | ||||
| use milli::update::{IndexDocumentsMethod, IndexerConfig, Settings}; | ||||
| use milli::vector::EmbeddingConfigs; | ||||
| @@ -110,7 +111,7 @@ pub fn base_setup(conf: &Conf) -> Index { | ||||
|             None, | ||||
|             &mut new_fields_ids_map, | ||||
|             &|| false, | ||||
|             &|_progress| (), | ||||
|             Progress::default(), | ||||
|         ) | ||||
|         .unwrap(); | ||||
|  | ||||
| @@ -125,7 +126,7 @@ pub fn base_setup(conf: &Conf) -> Index { | ||||
|         &document_changes, | ||||
|         EmbeddingConfigs::default(), | ||||
|         &|| false, | ||||
|         &|_| (), | ||||
|         &Progress::default(), | ||||
|     ) | ||||
|     .unwrap(); | ||||
|  | ||||
|   | ||||
| @@ -10,6 +10,7 @@ use either::Either; | ||||
| use fuzzers::Operation; | ||||
| use milli::documents::mmap_from_objects; | ||||
| use milli::heed::EnvOpenOptions; | ||||
| use milli::progress::Progress; | ||||
| use milli::update::new::indexer; | ||||
| use milli::update::{IndexDocumentsMethod, IndexerConfig}; | ||||
| use milli::vector::EmbeddingConfigs; | ||||
| @@ -128,7 +129,7 @@ fn main() { | ||||
|                                     None, | ||||
|                                     &mut new_fields_ids_map, | ||||
|                                     &|| false, | ||||
|                                     &|_progress| (), | ||||
|                                     Progress::default(), | ||||
|                                 ) | ||||
|                                 .unwrap(); | ||||
|  | ||||
| @@ -143,7 +144,7 @@ fn main() { | ||||
|                                 &document_changes, | ||||
|                                 embedders, | ||||
|                                 &|| false, | ||||
|                                 &|_| (), | ||||
|                                 &Progress::default(), | ||||
|                             ) | ||||
|                             .unwrap(); | ||||
|  | ||||
|   | ||||
| @@ -22,8 +22,6 @@ use std::ffi::OsStr; | ||||
| use std::fmt; | ||||
| use std::fs::{self, File}; | ||||
| use std::io::BufWriter; | ||||
| use std::sync::atomic::{self, AtomicU64}; | ||||
| use std::time::Duration; | ||||
|  | ||||
| use bumpalo::collections::CollectIn; | ||||
| use bumpalo::Bump; | ||||
| @@ -32,6 +30,7 @@ use meilisearch_types::batches::BatchId; | ||||
| use meilisearch_types::heed::{RoTxn, RwTxn}; | ||||
| use meilisearch_types::milli::documents::{obkv_to_object, DocumentsBatchReader, PrimaryKey}; | ||||
| use meilisearch_types::milli::heed::CompactionOption; | ||||
| use meilisearch_types::milli::progress::Progress; | ||||
| use meilisearch_types::milli::update::new::indexer::{self, UpdateByFunction}; | ||||
| use meilisearch_types::milli::update::{ | ||||
|     DocumentAdditionResult, IndexDocumentsMethod, Settings as MilliSettings, | ||||
| @@ -41,9 +40,7 @@ use meilisearch_types::milli::vector::parsed_vectors::{ | ||||
| }; | ||||
| use meilisearch_types::milli::{self, Filter, ThreadPoolNoAbortBuilder}; | ||||
| use meilisearch_types::settings::{apply_settings_to_builder, Settings, Unchecked}; | ||||
| use meilisearch_types::tasks::{ | ||||
|     Details, IndexSwap, Kind, KindWithContent, Status, Task, TaskProgress, | ||||
| }; | ||||
| use meilisearch_types::tasks::{Details, IndexSwap, Kind, KindWithContent, Status, Task}; | ||||
| use meilisearch_types::{compression, Index, VERSION_FILE_NAME}; | ||||
| use roaring::RoaringBitmap; | ||||
| use time::macros::format_description; | ||||
| @@ -561,11 +558,12 @@ impl IndexScheduler { | ||||
|     /// The list of tasks that were processed. The metadata of each task in the returned | ||||
|     /// list is updated accordingly, with the exception of the its date fields | ||||
|     /// [`finished_at`](meilisearch_types::tasks::Task::finished_at) and [`started_at`](meilisearch_types::tasks::Task::started_at). | ||||
|     #[tracing::instrument(level = "trace", skip(self, batch), target = "indexing::scheduler", fields(batch=batch.to_string()))] | ||||
|     #[tracing::instrument(level = "trace", skip(self, batch, progress), target = "indexing::scheduler", fields(batch=batch.to_string()))] | ||||
|     pub(crate) fn process_batch( | ||||
|         &self, | ||||
|         batch: Batch, | ||||
|         current_batch: &mut ProcessingBatch, | ||||
|         progress: Progress, | ||||
|     ) -> Result<Vec<Task>> { | ||||
|         #[cfg(test)] | ||||
|         { | ||||
| @@ -953,7 +951,7 @@ impl IndexScheduler { | ||||
|                     .set_currently_updating_index(Some((index_uid.clone(), index.clone()))); | ||||
|  | ||||
|                 let mut index_wtxn = index.write_txn()?; | ||||
|                 let tasks = self.apply_index_operation(&mut index_wtxn, &index, op)?; | ||||
|                 let tasks = self.apply_index_operation(&mut index_wtxn, &index, op, progress)?; | ||||
|  | ||||
|                 { | ||||
|                     let span = tracing::trace_span!(target: "indexing::scheduler", "commit"); | ||||
| @@ -996,6 +994,7 @@ impl IndexScheduler { | ||||
|                 self.process_batch( | ||||
|                     Batch::IndexUpdate { index_uid, primary_key, task }, | ||||
|                     current_batch, | ||||
|                     progress, | ||||
|                 ) | ||||
|             } | ||||
|             Batch::IndexUpdate { index_uid, primary_key, mut task } => { | ||||
| @@ -1168,7 +1167,7 @@ impl IndexScheduler { | ||||
|     /// The list of processed tasks. | ||||
|     #[tracing::instrument( | ||||
|         level = "trace", | ||||
|         skip(self, index_wtxn, index), | ||||
|         skip(self, index_wtxn, index, progress), | ||||
|         target = "indexing::scheduler" | ||||
|     )] | ||||
|     fn apply_index_operation<'i>( | ||||
| @@ -1176,44 +1175,12 @@ impl IndexScheduler { | ||||
|         index_wtxn: &mut RwTxn<'i>, | ||||
|         index: &'i Index, | ||||
|         operation: IndexOperation, | ||||
|         progress: Progress, | ||||
|     ) -> Result<Vec<Task>> { | ||||
|         let indexer_alloc = Bump::new(); | ||||
|  | ||||
|         let started_processing_at = std::time::Instant::now(); | ||||
|         let secs_since_started_processing_at = AtomicU64::new(0); | ||||
|         const PRINT_SECS_DELTA: u64 = 5; | ||||
|  | ||||
|         let processing_tasks = self.processing_tasks.clone(); | ||||
|         let must_stop_processing = self.must_stop_processing.clone(); | ||||
|         let send_progress = |progress| { | ||||
|             let now = std::time::Instant::now(); | ||||
|             let elapsed = secs_since_started_processing_at.load(atomic::Ordering::Relaxed); | ||||
|             let previous = started_processing_at + Duration::from_secs(elapsed); | ||||
|             let elapsed = now - previous; | ||||
|  | ||||
|             if elapsed.as_secs() < PRINT_SECS_DELTA { | ||||
|                 return; | ||||
|             } | ||||
|  | ||||
|             secs_since_started_processing_at | ||||
|                 .store((now - started_processing_at).as_secs(), atomic::Ordering::Relaxed); | ||||
|  | ||||
|             let TaskProgress { | ||||
|                 current_step, | ||||
|                 finished_steps, | ||||
|                 total_steps, | ||||
|                 finished_substeps, | ||||
|                 total_substeps, | ||||
|             } = processing_tasks.write().unwrap().update_progress(progress); | ||||
|  | ||||
|             tracing::info!( | ||||
|                 current_step, | ||||
|                 finished_steps, | ||||
|                 total_steps, | ||||
|                 finished_substeps, | ||||
|                 total_substeps | ||||
|             ); | ||||
|         }; | ||||
|  | ||||
|         match operation { | ||||
|             IndexOperation::DocumentClear { index_uid, mut tasks } => { | ||||
| @@ -1308,7 +1275,7 @@ impl IndexScheduler { | ||||
|                         primary_key.as_deref(), | ||||
|                         &mut new_fields_ids_map, | ||||
|                         &|| must_stop_processing.get(), | ||||
|                         &send_progress, | ||||
|                         progress.clone(), | ||||
|                     ) | ||||
|                     .map_err(|e| Error::from_milli(e, Some(index_uid.clone())))?; | ||||
|  | ||||
| @@ -1356,7 +1323,7 @@ impl IndexScheduler { | ||||
|                         &document_changes, | ||||
|                         embedders, | ||||
|                         &|| must_stop_processing.get(), | ||||
|                         &send_progress, | ||||
|                         &progress, | ||||
|                     ) | ||||
|                     .map_err(|e| Error::from_milli(e, Some(index_uid.clone())))?; | ||||
|  | ||||
| @@ -1470,7 +1437,7 @@ impl IndexScheduler { | ||||
|                         &document_changes, | ||||
|                         embedders, | ||||
|                         &|| must_stop_processing.get(), | ||||
|                         &send_progress, | ||||
|                         &progress, | ||||
|                     ) | ||||
|                     .map_err(|err| Error::from_milli(err, Some(index_uid.clone())))?; | ||||
|  | ||||
| @@ -1621,7 +1588,7 @@ impl IndexScheduler { | ||||
|                         &document_changes, | ||||
|                         embedders, | ||||
|                         &|| must_stop_processing.get(), | ||||
|                         &send_progress, | ||||
|                         &progress, | ||||
|                     ) | ||||
|                     .map_err(|err| Error::from_milli(err, Some(index_uid.clone())))?; | ||||
|  | ||||
| @@ -1673,12 +1640,14 @@ impl IndexScheduler { | ||||
|                         index_uid: index_uid.clone(), | ||||
|                         tasks: cleared_tasks, | ||||
|                     }, | ||||
|                     progress.clone(), | ||||
|                 )?; | ||||
|  | ||||
|                 let settings_tasks = self.apply_index_operation( | ||||
|                     index_wtxn, | ||||
|                     index, | ||||
|                     IndexOperation::Settings { index_uid, settings, tasks: settings_tasks }, | ||||
|                     progress, | ||||
|                 )?; | ||||
|  | ||||
|                 let mut tasks = settings_tasks; | ||||
| @@ -1702,8 +1671,8 @@ impl IndexScheduler { | ||||
|  | ||||
|         let all_task_ids = self.all_task_ids(wtxn)?; | ||||
|         let mut to_delete_tasks = all_task_ids & matched_tasks; | ||||
|         to_delete_tasks -= processing_tasks; | ||||
|         to_delete_tasks -= enqueued_tasks; | ||||
|         to_delete_tasks -= &**processing_tasks; | ||||
|         to_delete_tasks -= &enqueued_tasks; | ||||
|  | ||||
|         // 2. We now have a list of tasks to delete, delete them | ||||
|  | ||||
|   | ||||
| @@ -353,7 +353,7 @@ pub fn snapshot_canceled_by(rtxn: &RoTxn, db: Database<BEU32, RoaringBitmapCodec | ||||
|  | ||||
| pub fn snapshot_batch(batch: &Batch) -> String { | ||||
|     let mut snap = String::new(); | ||||
|     let Batch { uid, details, stats, started_at, finished_at } = batch; | ||||
|     let Batch { uid, details, stats, started_at, finished_at, progress: _ } = batch; | ||||
|     if let Some(finished_at) = finished_at { | ||||
|         assert!(finished_at > started_at); | ||||
|     } | ||||
|   | ||||
| @@ -26,6 +26,7 @@ mod index_mapper; | ||||
| #[cfg(test)] | ||||
| mod insta_snapshot; | ||||
| mod lru; | ||||
| mod processing; | ||||
| mod utils; | ||||
| pub mod uuid_codec; | ||||
|  | ||||
| @@ -56,12 +57,12 @@ use meilisearch_types::heed::types::{SerdeBincode, SerdeJson, Str, I128}; | ||||
| use meilisearch_types::heed::{self, Database, Env, PutFlags, RoTxn, RwTxn}; | ||||
| use meilisearch_types::milli::documents::DocumentsBatchBuilder; | ||||
| use meilisearch_types::milli::index::IndexEmbeddingConfig; | ||||
| use meilisearch_types::milli::update::new::indexer::document_changes::Progress; | ||||
| use meilisearch_types::milli::update::IndexerConfig; | ||||
| use meilisearch_types::milli::vector::{Embedder, EmbedderOptions, EmbeddingConfigs}; | ||||
| use meilisearch_types::milli::{self, CboRoaringBitmapCodec, Index, RoaringBitmapCodec, BEU32}; | ||||
| use meilisearch_types::task_view::TaskView; | ||||
| use meilisearch_types::tasks::{Kind, KindWithContent, Status, Task, TaskProgress}; | ||||
| use meilisearch_types::tasks::{Kind, KindWithContent, Status, Task}; | ||||
| use processing::ProcessingTasks; | ||||
| use rayon::current_num_threads; | ||||
| use rayon::prelude::{IntoParallelIterator, ParallelIterator}; | ||||
| use roaring::RoaringBitmap; | ||||
| @@ -72,7 +73,8 @@ use utils::{filter_out_references_to_newer_tasks, keep_ids_within_datetimes, map | ||||
| use uuid::Uuid; | ||||
|  | ||||
| use crate::index_mapper::IndexMapper; | ||||
| use crate::utils::{check_index_swap_validity, clamp_to_page_size, ProcessingBatch}; | ||||
| use crate::processing::{AtomicTaskStep, BatchProgress}; | ||||
| use crate::utils::{check_index_swap_validity, clamp_to_page_size}; | ||||
|  | ||||
| pub(crate) type BEI128 = I128<BE>; | ||||
|  | ||||
| @@ -163,48 +165,6 @@ impl Query { | ||||
|     } | ||||
| } | ||||
|  | ||||
| #[derive(Debug, Clone)] | ||||
| pub struct ProcessingTasks { | ||||
|     batch: Option<ProcessingBatch>, | ||||
|     /// The list of tasks ids that are currently running. | ||||
|     processing: RoaringBitmap, | ||||
|     /// The progress on processing tasks | ||||
|     progress: Option<TaskProgress>, | ||||
| } | ||||
|  | ||||
| impl ProcessingTasks { | ||||
|     /// Creates an empty `ProcessingAt` struct. | ||||
|     fn new() -> ProcessingTasks { | ||||
|         ProcessingTasks { batch: None, processing: RoaringBitmap::new(), progress: None } | ||||
|     } | ||||
|  | ||||
|     /// Stores the currently processing tasks, and the date time at which it started. | ||||
|     fn start_processing(&mut self, processing_batch: ProcessingBatch, processing: RoaringBitmap) { | ||||
|         self.batch = Some(processing_batch); | ||||
|         self.processing = processing; | ||||
|     } | ||||
|  | ||||
|     fn update_progress(&mut self, progress: Progress) -> TaskProgress { | ||||
|         self.progress.get_or_insert_with(TaskProgress::default).update(progress) | ||||
|     } | ||||
|  | ||||
|     /// Set the processing tasks to an empty list | ||||
|     fn stop_processing(&mut self) -> Self { | ||||
|         self.progress = None; | ||||
|  | ||||
|         Self { | ||||
|             batch: std::mem::take(&mut self.batch), | ||||
|             processing: std::mem::take(&mut self.processing), | ||||
|             progress: None, | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     /// Returns `true` if there, at least, is one task that is currently processing that we must stop. | ||||
|     fn must_cancel_processing_tasks(&self, canceled_tasks: &RoaringBitmap) -> bool { | ||||
|         !self.processing.is_disjoint(canceled_tasks) | ||||
|     } | ||||
| } | ||||
|  | ||||
| #[derive(Default, Clone, Debug)] | ||||
| struct MustStopProcessing(Arc<AtomicBool>); | ||||
|  | ||||
| @@ -813,7 +773,7 @@ impl IndexScheduler { | ||||
|             let mut batch_tasks = RoaringBitmap::new(); | ||||
|             for batch_uid in batch_uids { | ||||
|                 if processing_batch.as_ref().map_or(false, |batch| batch.uid == *batch_uid) { | ||||
|                     batch_tasks |= &processing_tasks; | ||||
|                     batch_tasks |= &*processing_tasks; | ||||
|                 } else { | ||||
|                     batch_tasks |= self.tasks_in_batch(rtxn, *batch_uid)?; | ||||
|                 } | ||||
| @@ -827,13 +787,13 @@ impl IndexScheduler { | ||||
|                 match status { | ||||
|                     // special case for Processing tasks | ||||
|                     Status::Processing => { | ||||
|                         status_tasks |= &processing_tasks; | ||||
|                         status_tasks |= &*processing_tasks; | ||||
|                     } | ||||
|                     status => status_tasks |= &self.get_status(rtxn, *status)?, | ||||
|                 }; | ||||
|             } | ||||
|             if !status.contains(&Status::Processing) { | ||||
|                 tasks -= &processing_tasks; | ||||
|                 tasks -= &*processing_tasks; | ||||
|             } | ||||
|             tasks &= status_tasks; | ||||
|         } | ||||
| @@ -882,7 +842,7 @@ impl IndexScheduler { | ||||
|         // Once we have filtered the two subsets, we put them back together and assign it back to `tasks`. | ||||
|         tasks = { | ||||
|             let (mut filtered_non_processing_tasks, mut filtered_processing_tasks) = | ||||
|                 (&tasks - &processing_tasks, &tasks & &processing_tasks); | ||||
|                 (&tasks - &*processing_tasks, &tasks & &*processing_tasks); | ||||
|  | ||||
|             // special case for Processing tasks | ||||
|             // A closure that clears the filtered_processing_tasks if their started_at date falls outside the given bounds | ||||
| @@ -1090,7 +1050,7 @@ impl IndexScheduler { | ||||
|         // Once we have filtered the two subsets, we put them back together and assign it back to `batches`. | ||||
|         batches = { | ||||
|             let (mut filtered_non_processing_batches, mut filtered_processing_batches) = | ||||
|                 (&batches - &processing.processing, &batches & &processing.processing); | ||||
|                 (&batches - &*processing.processing, &batches & &*processing.processing); | ||||
|  | ||||
|             // special case for Processing batches | ||||
|             // A closure that clears the filtered_processing_batches if their started_at date falls outside the given bounds | ||||
| @@ -1606,7 +1566,8 @@ impl IndexScheduler { | ||||
|  | ||||
|         // We reset the must_stop flag to be sure that we don't stop processing tasks | ||||
|         self.must_stop_processing.reset(); | ||||
|         self.processing_tasks | ||||
|         let progress = self | ||||
|             .processing_tasks | ||||
|             .write() | ||||
|             .unwrap() | ||||
|             // We can clone the processing batch here because we don't want its modification to affect the view of the processing batches | ||||
| @@ -1619,11 +1580,12 @@ impl IndexScheduler { | ||||
|         let res = { | ||||
|             let cloned_index_scheduler = self.private_clone(); | ||||
|             let processing_batch = &mut processing_batch; | ||||
|             let progress = progress.clone(); | ||||
|             std::thread::scope(|s| { | ||||
|                 let handle = std::thread::Builder::new() | ||||
|                     .name(String::from("batch-operation")) | ||||
|                     .spawn_scoped(s, move || { | ||||
|                         cloned_index_scheduler.process_batch(batch, processing_batch) | ||||
|                         cloned_index_scheduler.process_batch(batch, processing_batch, progress) | ||||
|                     }) | ||||
|                     .unwrap(); | ||||
|                 handle.join().unwrap_or(Err(Error::ProcessBatchPanicked)) | ||||
| @@ -1636,6 +1598,7 @@ impl IndexScheduler { | ||||
|         #[cfg(test)] | ||||
|         self.maybe_fail(tests::FailureLocation::AcquiringWtxn)?; | ||||
|  | ||||
|         progress.update_progress(BatchProgress::WritingTasksToDisk); | ||||
|         processing_batch.finished(); | ||||
|         let mut wtxn = self.env.write_txn().map_err(Error::HeedTransaction)?; | ||||
|         let mut canceled = RoaringBitmap::new(); | ||||
| @@ -1645,12 +1608,15 @@ impl IndexScheduler { | ||||
|                 #[cfg(test)] | ||||
|                 self.breakpoint(Breakpoint::ProcessBatchSucceeded); | ||||
|  | ||||
|                 let (task_progress, task_progress_obj) = AtomicTaskStep::new(tasks.len() as u32); | ||||
|                 progress.update_progress(task_progress_obj); | ||||
|                 let mut success = 0; | ||||
|                 let mut failure = 0; | ||||
|                 let mut canceled_by = None; | ||||
|  | ||||
|                 #[allow(unused_variables)] | ||||
|                 for (i, mut task) in tasks.into_iter().enumerate() { | ||||
|                     task_progress.fetch_add(1, Ordering::Relaxed); | ||||
|                     processing_batch.update(&mut task); | ||||
|                     if task.status == Status::Canceled { | ||||
|                         canceled.insert(task.uid); | ||||
| @@ -1718,8 +1684,12 @@ impl IndexScheduler { | ||||
|             Err(err) => { | ||||
|                 #[cfg(test)] | ||||
|                 self.breakpoint(Breakpoint::ProcessBatchFailed); | ||||
|                 let (task_progress, task_progress_obj) = AtomicTaskStep::new(ids.len() as u32); | ||||
|                 progress.update_progress(task_progress_obj); | ||||
|  | ||||
|                 let error: ResponseError = err.into(); | ||||
|                 for id in ids.iter() { | ||||
|                     task_progress.fetch_add(1, Ordering::Relaxed); | ||||
|                     let mut task = self | ||||
|                         .get_task(&wtxn, id) | ||||
|                         .map_err(|e| Error::TaskDatabaseUpdate(Box::new(e)))? | ||||
|   | ||||
							
								
								
									
										205
									
								
								crates/index-scheduler/src/processing.rs
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										205
									
								
								crates/index-scheduler/src/processing.rs
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,205 @@ | ||||
| use crate::utils::ProcessingBatch; | ||||
| use meilisearch_types::milli::progress::{AtomicSubStep, NamedStep, Progress, ProgressView, Step}; | ||||
| use roaring::RoaringBitmap; | ||||
| use std::{borrow::Cow, sync::Arc}; | ||||
|  | ||||
| #[derive(Clone)] | ||||
| pub struct ProcessingTasks { | ||||
|     pub batch: Option<Arc<ProcessingBatch>>, | ||||
|     /// The list of tasks ids that are currently running. | ||||
|     pub processing: Arc<RoaringBitmap>, | ||||
|     /// The progress on processing tasks | ||||
|     pub progress: Option<Progress>, | ||||
| } | ||||
|  | ||||
| impl ProcessingTasks { | ||||
|     /// Creates an empty `ProcessingAt` struct. | ||||
|     pub fn new() -> ProcessingTasks { | ||||
|         ProcessingTasks { batch: None, processing: Arc::new(RoaringBitmap::new()), progress: None } | ||||
|     } | ||||
|  | ||||
|     pub fn get_progress_view(&self) -> Option<ProgressView> { | ||||
|         Some(self.progress.as_ref()?.as_progress_view()) | ||||
|     } | ||||
|  | ||||
|     /// Stores the currently processing tasks, and the date time at which it started. | ||||
|     pub fn start_processing( | ||||
|         &mut self, | ||||
|         processing_batch: ProcessingBatch, | ||||
|         processing: RoaringBitmap, | ||||
|     ) -> Progress { | ||||
|         self.batch = Some(Arc::new(processing_batch)); | ||||
|         self.processing = Arc::new(processing); | ||||
|         let progress = Progress::default(); | ||||
|         progress.update_progress(BatchProgress::ProcessingTasks); | ||||
|         self.progress = Some(progress.clone()); | ||||
|  | ||||
|         progress | ||||
|     } | ||||
|  | ||||
|     /// Set the processing tasks to an empty list | ||||
|     pub fn stop_processing(&mut self) -> Self { | ||||
|         self.progress = None; | ||||
|  | ||||
|         Self { | ||||
|             batch: std::mem::take(&mut self.batch), | ||||
|             processing: std::mem::take(&mut self.processing), | ||||
|             progress: None, | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     /// Returns `true` if there, at least, is one task that is currently processing that we must stop. | ||||
|     pub fn must_cancel_processing_tasks(&self, canceled_tasks: &RoaringBitmap) -> bool { | ||||
|         !self.processing.is_disjoint(canceled_tasks) | ||||
|     } | ||||
| } | ||||
|  | ||||
| #[repr(u8)] | ||||
| #[derive(Copy, Clone)] | ||||
| pub enum BatchProgress { | ||||
|     ProcessingTasks, | ||||
|     WritingTasksToDisk, | ||||
| } | ||||
|  | ||||
| impl Step for BatchProgress { | ||||
|     fn name(&self) -> Cow<'static, str> { | ||||
|         match self { | ||||
|             BatchProgress::ProcessingTasks => Cow::Borrowed("processing tasks"), | ||||
|             BatchProgress::WritingTasksToDisk => Cow::Borrowed("writing tasks to disk"), | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     fn current(&self) -> u32 { | ||||
|         *self as u8 as u32 | ||||
|     } | ||||
|  | ||||
|     fn total(&self) -> u32 { | ||||
|         2 | ||||
|     } | ||||
| } | ||||
|  | ||||
| #[derive(Default)] | ||||
| pub struct Task {} | ||||
|  | ||||
| impl NamedStep for Task { | ||||
|     fn name(&self) -> &'static str { | ||||
|         "task" | ||||
|     } | ||||
| } | ||||
| pub type AtomicTaskStep = AtomicSubStep<Task>; | ||||
|  | ||||
| #[cfg(test)] | ||||
| mod test { | ||||
|     use std::sync::atomic::Ordering; | ||||
|  | ||||
|     use meili_snap::{json_string, snapshot}; | ||||
|  | ||||
|     use super::*; | ||||
|  | ||||
|     #[test] | ||||
|     fn one_level() { | ||||
|         let mut processing = ProcessingTasks::new(); | ||||
|         processing.start_processing(ProcessingBatch::new(0), RoaringBitmap::new()); | ||||
|         snapshot!(json_string!(processing.get_progress_view()), @r#" | ||||
|         { | ||||
|           "steps": [ | ||||
|             { | ||||
|               "name": "processing tasks", | ||||
|               "finished": 0, | ||||
|               "total": 2 | ||||
|             } | ||||
|           ], | ||||
|           "percentage": 0.0 | ||||
|         } | ||||
|         "#); | ||||
|         processing.progress.as_ref().unwrap().update_progress(BatchProgress::WritingTasksToDisk); | ||||
|         snapshot!(json_string!(processing.get_progress_view()), @r#" | ||||
|         { | ||||
|           "steps": [ | ||||
|             { | ||||
|               "name": "writing tasks to disk", | ||||
|               "finished": 1, | ||||
|               "total": 2 | ||||
|             } | ||||
|           ], | ||||
|           "percentage": 50.0 | ||||
|         } | ||||
|         "#); | ||||
|     } | ||||
|  | ||||
|     #[test] | ||||
|     fn task_progress() { | ||||
|         let mut processing = ProcessingTasks::new(); | ||||
|         processing.start_processing(ProcessingBatch::new(0), RoaringBitmap::new()); | ||||
|         let (atomic, tasks) = AtomicTaskStep::new(10); | ||||
|         processing.progress.as_ref().unwrap().update_progress(tasks); | ||||
|         snapshot!(json_string!(processing.get_progress_view()), @r#" | ||||
|         { | ||||
|           "steps": [ | ||||
|             { | ||||
|               "name": "processing tasks", | ||||
|               "finished": 0, | ||||
|               "total": 2 | ||||
|             }, | ||||
|             { | ||||
|               "name": "task", | ||||
|               "finished": 0, | ||||
|               "total": 10 | ||||
|             } | ||||
|           ], | ||||
|           "percentage": 0.0 | ||||
|         } | ||||
|         "#); | ||||
|         atomic.fetch_add(6, Ordering::Relaxed); | ||||
|         snapshot!(json_string!(processing.get_progress_view()), @r#" | ||||
|         { | ||||
|           "steps": [ | ||||
|             { | ||||
|               "name": "processing tasks", | ||||
|               "finished": 0, | ||||
|               "total": 2 | ||||
|             }, | ||||
|             { | ||||
|               "name": "task", | ||||
|               "finished": 6, | ||||
|               "total": 10 | ||||
|             } | ||||
|           ], | ||||
|           "percentage": 30.000002 | ||||
|         } | ||||
|         "#); | ||||
|         processing.progress.as_ref().unwrap().update_progress(BatchProgress::WritingTasksToDisk); | ||||
|         snapshot!(json_string!(processing.get_progress_view()), @r#" | ||||
|         { | ||||
|           "steps": [ | ||||
|             { | ||||
|               "name": "writing tasks to disk", | ||||
|               "finished": 1, | ||||
|               "total": 2 | ||||
|             } | ||||
|           ], | ||||
|           "percentage": 50.0 | ||||
|         } | ||||
|         "#); | ||||
|         let (atomic, tasks) = AtomicTaskStep::new(5); | ||||
|         processing.progress.as_ref().unwrap().update_progress(tasks); | ||||
|         atomic.fetch_add(4, Ordering::Relaxed); | ||||
|         snapshot!(json_string!(processing.get_progress_view()), @r#" | ||||
|         { | ||||
|           "steps": [ | ||||
|             { | ||||
|               "name": "writing tasks to disk", | ||||
|               "finished": 1, | ||||
|               "total": 2 | ||||
|             }, | ||||
|             { | ||||
|               "name": "task", | ||||
|               "finished": 4, | ||||
|               "total": 5 | ||||
|             } | ||||
|           ], | ||||
|           "percentage": 90.0 | ||||
|         } | ||||
|         "#); | ||||
|     } | ||||
| } | ||||
| @@ -134,6 +134,7 @@ impl ProcessingBatch { | ||||
|     pub fn to_batch(&self) -> Batch { | ||||
|         Batch { | ||||
|             uid: self.uid, | ||||
|             progress: None, | ||||
|             details: self.details.clone(), | ||||
|             stats: self.stats.clone(), | ||||
|             started_at: self.started_at, | ||||
| @@ -187,6 +188,7 @@ impl IndexScheduler { | ||||
|             &batch.uid, | ||||
|             &Batch { | ||||
|                 uid: batch.uid, | ||||
|                 progress: None, | ||||
|                 details: batch.details, | ||||
|                 stats: batch.stats, | ||||
|                 started_at: batch.started_at, | ||||
| @@ -273,7 +275,10 @@ impl IndexScheduler { | ||||
|             .into_iter() | ||||
|             .map(|batch_id| { | ||||
|                 if Some(batch_id) == processing.batch.as_ref().map(|batch| batch.uid) { | ||||
|                     Ok(processing.batch.as_ref().unwrap().to_batch()) | ||||
|                     let mut batch = processing.batch.as_ref().unwrap().to_batch(); | ||||
|                     println!("here with progress: {}", processing.progress.is_some()); | ||||
|                     batch.progress = processing.get_progress_view(); | ||||
|                     Ok(batch) | ||||
|                 } else { | ||||
|                     self.get_batch(rtxn, batch_id) | ||||
|                         .and_then(|task| task.ok_or(Error::CorruptedTaskQueue)) | ||||
|   | ||||
| @@ -1,3 +1,4 @@ | ||||
| use milli::progress::ProgressView; | ||||
| use serde::Serialize; | ||||
| use time::{Duration, OffsetDateTime}; | ||||
|  | ||||
| @@ -11,6 +12,7 @@ use crate::{ | ||||
| #[serde(rename_all = "camelCase")] | ||||
| pub struct BatchView { | ||||
|     pub uid: BatchId, | ||||
|     pub progress: Option<ProgressView>, | ||||
|     pub details: DetailsView, | ||||
|     pub stats: BatchStats, | ||||
|     #[serde(serialize_with = "serialize_duration", default)] | ||||
| @@ -25,6 +27,7 @@ impl BatchView { | ||||
|     pub fn from_batch(batch: &Batch) -> Self { | ||||
|         Self { | ||||
|             uid: batch.uid, | ||||
|             progress: batch.progress.clone(), | ||||
|             details: batch.details.clone(), | ||||
|             stats: batch.stats.clone(), | ||||
|             duration: batch.finished_at.map(|finished_at| finished_at - batch.started_at), | ||||
|   | ||||
| @@ -1,5 +1,6 @@ | ||||
| use std::collections::BTreeMap; | ||||
|  | ||||
| use milli::progress::ProgressView; | ||||
| use serde::{Deserialize, Serialize}; | ||||
| use time::OffsetDateTime; | ||||
|  | ||||
| @@ -15,6 +16,8 @@ pub type BatchId = u32; | ||||
| pub struct Batch { | ||||
|     pub uid: BatchId, | ||||
|  | ||||
|     #[serde(skip_deserializing)] | ||||
|     pub progress: Option<ProgressView>, | ||||
|     pub details: DetailsView, | ||||
|     pub stats: BatchStats, | ||||
|  | ||||
|   | ||||
| @@ -4,7 +4,6 @@ use std::fmt::{Display, Write}; | ||||
| use std::str::FromStr; | ||||
|  | ||||
| use enum_iterator::Sequence; | ||||
| use milli::update::new::indexer::document_changes::Progress; | ||||
| use milli::update::IndexDocumentsMethod; | ||||
| use milli::Object; | ||||
| use roaring::RoaringBitmap; | ||||
| @@ -41,62 +40,6 @@ pub struct Task { | ||||
|     pub kind: KindWithContent, | ||||
| } | ||||
|  | ||||
| #[derive(Clone, Copy, Debug, PartialEq, Eq, Serialize, Deserialize)] | ||||
| #[serde(rename_all = "camelCase")] | ||||
| pub struct TaskProgress { | ||||
|     pub current_step: &'static str, | ||||
|     pub finished_steps: u16, | ||||
|     pub total_steps: u16, | ||||
|     pub finished_substeps: Option<u32>, | ||||
|     pub total_substeps: Option<u32>, | ||||
| } | ||||
|  | ||||
| impl Default for TaskProgress { | ||||
|     fn default() -> Self { | ||||
|         Self::new() | ||||
|     } | ||||
| } | ||||
|  | ||||
| impl TaskProgress { | ||||
|     pub fn new() -> Self { | ||||
|         Self { | ||||
|             current_step: "start", | ||||
|             finished_steps: 0, | ||||
|             total_steps: 1, | ||||
|             finished_substeps: None, | ||||
|             total_substeps: None, | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     pub fn update(&mut self, progress: Progress) -> TaskProgress { | ||||
|         if self.finished_steps > progress.finished_steps { | ||||
|             return *self; | ||||
|         } | ||||
|  | ||||
|         if self.current_step != progress.step_name { | ||||
|             self.current_step = progress.step_name | ||||
|         } | ||||
|  | ||||
|         self.total_steps = progress.total_steps; | ||||
|  | ||||
|         if self.finished_steps < progress.finished_steps { | ||||
|             self.finished_substeps = None; | ||||
|             self.total_substeps = None; | ||||
|         } | ||||
|         self.finished_steps = progress.finished_steps; | ||||
|         if let Some((finished_substeps, total_substeps)) = progress.finished_total_substep { | ||||
|             if let Some(task_finished_substeps) = self.finished_substeps { | ||||
|                 if task_finished_substeps > finished_substeps { | ||||
|                     return *self; | ||||
|                 } | ||||
|             } | ||||
|             self.finished_substeps = Some(finished_substeps); | ||||
|             self.total_substeps = Some(total_substeps); | ||||
|         } | ||||
|         *self | ||||
|     } | ||||
| } | ||||
|  | ||||
| impl Task { | ||||
|     pub fn index_uid(&self) -> Option<&str> { | ||||
|         use KindWithContent::*; | ||||
|   | ||||
| @@ -1734,6 +1734,7 @@ pub(crate) mod tests { | ||||
|  | ||||
|     use crate::error::{Error, InternalError}; | ||||
|     use crate::index::{DEFAULT_MIN_WORD_LEN_ONE_TYPO, DEFAULT_MIN_WORD_LEN_TWO_TYPOS}; | ||||
|     use crate::progress::Progress; | ||||
|     use crate::update::new::indexer; | ||||
|     use crate::update::settings::InnerIndexSettings; | ||||
|     use crate::update::{ | ||||
| @@ -1810,7 +1811,7 @@ pub(crate) mod tests { | ||||
|                 None, | ||||
|                 &mut new_fields_ids_map, | ||||
|                 &|| false, | ||||
|                 &|_progress| (), | ||||
|                 Progress::default(), | ||||
|             )?; | ||||
|  | ||||
|             if let Some(error) = operation_stats.into_iter().find_map(|stat| stat.error) { | ||||
| @@ -1829,7 +1830,7 @@ pub(crate) mod tests { | ||||
|                     &document_changes, | ||||
|                     embedders, | ||||
|                     &|| false, | ||||
|                     &|_| (), | ||||
|                     &Progress::default(), | ||||
|                 ) | ||||
|             }) | ||||
|             .unwrap()?; | ||||
| @@ -1901,7 +1902,7 @@ pub(crate) mod tests { | ||||
|                 None, | ||||
|                 &mut new_fields_ids_map, | ||||
|                 &|| false, | ||||
|                 &|_progress| (), | ||||
|                 Progress::default(), | ||||
|             )?; | ||||
|  | ||||
|             if let Some(error) = operation_stats.into_iter().find_map(|stat| stat.error) { | ||||
| @@ -1920,7 +1921,7 @@ pub(crate) mod tests { | ||||
|                     &document_changes, | ||||
|                     embedders, | ||||
|                     &|| false, | ||||
|                     &|_| (), | ||||
|                     &Progress::default(), | ||||
|                 ) | ||||
|             }) | ||||
|             .unwrap()?; | ||||
| @@ -1982,7 +1983,7 @@ pub(crate) mod tests { | ||||
|                 None, | ||||
|                 &mut new_fields_ids_map, | ||||
|                 &|| false, | ||||
|                 &|_progress| (), | ||||
|                 Progress::default(), | ||||
|             ) | ||||
|             .unwrap(); | ||||
|  | ||||
| @@ -2001,7 +2002,7 @@ pub(crate) mod tests { | ||||
|                     &document_changes, | ||||
|                     embedders, | ||||
|                     &|| should_abort.load(Relaxed), | ||||
|                     &|_| (), | ||||
|                     &Progress::default(), | ||||
|                 ) | ||||
|             }) | ||||
|             .unwrap() | ||||
|   | ||||
| @@ -31,6 +31,7 @@ pub mod vector; | ||||
| #[macro_use] | ||||
| pub mod snapshot_tests; | ||||
| mod fieldids_weights_map; | ||||
| pub mod progress; | ||||
|  | ||||
| use std::collections::{BTreeMap, HashMap}; | ||||
| use std::convert::{TryFrom, TryInto}; | ||||
|   | ||||
							
								
								
									
										116
									
								
								crates/milli/src/progress.rs
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										116
									
								
								crates/milli/src/progress.rs
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,116 @@ | ||||
| use std::{ | ||||
|     any::TypeId, | ||||
|     borrow::Cow, | ||||
|     sync::{ | ||||
|         atomic::{AtomicU32, Ordering}, | ||||
|         Arc, RwLock, | ||||
|     }, | ||||
| }; | ||||
|  | ||||
| use serde::Serialize; | ||||
|  | ||||
| pub trait Step: 'static + Send + Sync { | ||||
|     fn name(&self) -> Cow<'static, str>; | ||||
|     fn current(&self) -> u32; | ||||
|     fn total(&self) -> u32; | ||||
| } | ||||
|  | ||||
| #[derive(Clone, Default)] | ||||
| pub struct Progress { | ||||
|     steps: Arc<RwLock<Vec<(TypeId, Box<dyn Step>)>>>, | ||||
| } | ||||
|  | ||||
| impl Progress { | ||||
|     pub fn update_progress<P: Step>(&self, sub_progress: P) { | ||||
|         let mut steps = self.steps.write().unwrap(); | ||||
|         let step_type = TypeId::of::<P>(); | ||||
|         if let Some(idx) = steps.iter().position(|(id, _)| *id == step_type) { | ||||
|             steps.truncate(idx); | ||||
|         } | ||||
|         steps.push((step_type, Box::new(sub_progress))); | ||||
|     } | ||||
|  | ||||
|     // TODO: This code should be in meilisearch_types but cannot because milli can't depend on meilisearch_types | ||||
|     pub fn as_progress_view(&self) -> ProgressView { | ||||
|         let steps = self.steps.read().unwrap(); | ||||
|  | ||||
|         let mut percentage = 0.0; | ||||
|         let mut prev_factors = 1.0; | ||||
|  | ||||
|         let mut step_view = Vec::new(); | ||||
|         for (_, step) in steps.iter() { | ||||
|             prev_factors *= step.total() as f32; | ||||
|             percentage += step.current() as f32 / prev_factors; | ||||
|  | ||||
|             step_view.push(ProgressStepView { | ||||
|                 name: step.name(), | ||||
|                 finished: step.current(), | ||||
|                 total: step.total(), | ||||
|             }); | ||||
|         } | ||||
|  | ||||
|         ProgressView { steps: step_view, percentage: percentage * 100.0 } | ||||
|     } | ||||
| } | ||||
|  | ||||
| /// This trait lets you use the AtomicSubStep defined right below. | ||||
| /// The name must be a const that never changed but that can't be enforced by the type system because it make the trait non object-safe. | ||||
| /// By forcing the Default trait + the &'static str we make it harder to miss-use the trait. | ||||
| pub trait NamedStep: 'static + Send + Sync + Default { | ||||
|     fn name(&self) -> &'static str; | ||||
| } | ||||
|  | ||||
| /// Structure to quickly define steps that need very quick, lockless updating of their current step. | ||||
| /// You can use this struct if: | ||||
| /// - The name of the step doesn't change | ||||
| /// - The total number of steps doesn't change | ||||
| pub struct AtomicSubStep<Name: NamedStep> { | ||||
|     name: Name, | ||||
|     current: Arc<AtomicU32>, | ||||
|     total: u32, | ||||
| } | ||||
|  | ||||
| impl<Name: NamedStep> AtomicSubStep<Name> { | ||||
|     pub fn new(total: u32) -> (Arc<AtomicU32>, Self) { | ||||
|         let current = Arc::new(AtomicU32::new(0)); | ||||
|         (current.clone(), Self { current, total, name: Name::default() }) | ||||
|     } | ||||
| } | ||||
|  | ||||
| impl<Name: NamedStep> Step for AtomicSubStep<Name> { | ||||
|     fn name(&self) -> Cow<'static, str> { | ||||
|         self.name.name().into() | ||||
|     } | ||||
|  | ||||
|     fn current(&self) -> u32 { | ||||
|         self.current.load(Ordering::Relaxed) | ||||
|     } | ||||
|  | ||||
|     fn total(&self) -> u32 { | ||||
|         self.total | ||||
|     } | ||||
| } | ||||
|  | ||||
| #[derive(Default)] | ||||
| pub struct Document {} | ||||
|  | ||||
| impl NamedStep for Document { | ||||
|     fn name(&self) -> &'static str { | ||||
|         "document" | ||||
|     } | ||||
| } | ||||
|  | ||||
| pub type AtomicDocumentStep = AtomicSubStep<Document>; | ||||
|  | ||||
| #[derive(Debug, Serialize, Clone)] | ||||
| pub struct ProgressView { | ||||
|     steps: Vec<ProgressStepView>, | ||||
|     percentage: f32, | ||||
| } | ||||
|  | ||||
| #[derive(Debug, Serialize, Clone)] | ||||
| pub struct ProgressStepView { | ||||
|     name: Cow<'static, str>, | ||||
|     finished: u32, | ||||
|     total: u32, | ||||
| } | ||||
| @@ -5,6 +5,7 @@ use bumpalo::Bump; | ||||
| use heed::EnvOpenOptions; | ||||
| use maplit::{btreemap, hashset}; | ||||
|  | ||||
| use crate::progress::Progress; | ||||
| use crate::update::new::indexer; | ||||
| use crate::update::{IndexDocumentsMethod, IndexerConfig, Settings}; | ||||
| use crate::vector::EmbeddingConfigs; | ||||
| @@ -72,7 +73,7 @@ pub fn setup_search_index_with_criteria(criteria: &[Criterion]) -> Index { | ||||
|             None, | ||||
|             &mut new_fields_ids_map, | ||||
|             &|| false, | ||||
|             &|_progress| (), | ||||
|             Progress::default(), | ||||
|         ) | ||||
|         .unwrap(); | ||||
|  | ||||
| @@ -91,7 +92,7 @@ pub fn setup_search_index_with_criteria(criteria: &[Criterion]) -> Index { | ||||
|         &document_changes, | ||||
|         embedders, | ||||
|         &|| false, | ||||
|         &|_| (), | ||||
|         &Progress::default(), | ||||
|     ) | ||||
|     .unwrap(); | ||||
|  | ||||
|   | ||||
| @@ -766,6 +766,7 @@ mod tests { | ||||
|     use crate::documents::mmap_from_objects; | ||||
|     use crate::index::tests::TempIndex; | ||||
|     use crate::index::IndexEmbeddingConfig; | ||||
|     use crate::progress::Progress; | ||||
|     use crate::search::TermsMatchingStrategy; | ||||
|     use crate::update::new::indexer; | ||||
|     use crate::update::Setting; | ||||
| @@ -1964,7 +1965,7 @@ mod tests { | ||||
|                 None, | ||||
|                 &mut new_fields_ids_map, | ||||
|                 &|| false, | ||||
|                 &|_progress| (), | ||||
|                 Progress::default(), | ||||
|             ) | ||||
|             .unwrap(); | ||||
|  | ||||
| @@ -2148,7 +2149,7 @@ mod tests { | ||||
|                 None, | ||||
|                 &mut new_fields_ids_map, | ||||
|                 &|| false, | ||||
|                 &|_progress| (), | ||||
|                 Progress::default(), | ||||
|             ) | ||||
|             .unwrap(); | ||||
|  | ||||
| @@ -2163,7 +2164,7 @@ mod tests { | ||||
|             &document_changes, | ||||
|             embedders, | ||||
|             &|| false, | ||||
|             &|_| (), | ||||
|             &Progress::default(), | ||||
|         ) | ||||
|         .unwrap(); | ||||
|         wtxn.commit().unwrap(); | ||||
| @@ -2210,7 +2211,7 @@ mod tests { | ||||
|                 None, | ||||
|                 &mut new_fields_ids_map, | ||||
|                 &|| false, | ||||
|                 &|_progress| (), | ||||
|                 Progress::default(), | ||||
|             ) | ||||
|             .unwrap(); | ||||
|  | ||||
| @@ -2225,7 +2226,7 @@ mod tests { | ||||
|             &document_changes, | ||||
|             embedders, | ||||
|             &|| false, | ||||
|             &|_| (), | ||||
|             &Progress::default(), | ||||
|         ) | ||||
|         .unwrap(); | ||||
|         wtxn.commit().unwrap(); | ||||
| @@ -2263,7 +2264,7 @@ mod tests { | ||||
|                 None, | ||||
|                 &mut new_fields_ids_map, | ||||
|                 &|| false, | ||||
|                 &|_progress| (), | ||||
|                 Progress::default(), | ||||
|             ) | ||||
|             .unwrap(); | ||||
|  | ||||
| @@ -2278,7 +2279,7 @@ mod tests { | ||||
|             &document_changes, | ||||
|             embedders, | ||||
|             &|| false, | ||||
|             &|_| (), | ||||
|             &Progress::default(), | ||||
|         ) | ||||
|         .unwrap(); | ||||
|         wtxn.commit().unwrap(); | ||||
| @@ -2315,7 +2316,7 @@ mod tests { | ||||
|                 None, | ||||
|                 &mut new_fields_ids_map, | ||||
|                 &|| false, | ||||
|                 &|_progress| (), | ||||
|                 Progress::default(), | ||||
|             ) | ||||
|             .unwrap(); | ||||
|  | ||||
| @@ -2330,7 +2331,7 @@ mod tests { | ||||
|             &document_changes, | ||||
|             embedders, | ||||
|             &|| false, | ||||
|             &|_| (), | ||||
|             &Progress::default(), | ||||
|         ) | ||||
|         .unwrap(); | ||||
|         wtxn.commit().unwrap(); | ||||
| @@ -2369,7 +2370,7 @@ mod tests { | ||||
|                 None, | ||||
|                 &mut new_fields_ids_map, | ||||
|                 &|| false, | ||||
|                 &|_progress| (), | ||||
|                 Progress::default(), | ||||
|             ) | ||||
|             .unwrap(); | ||||
|  | ||||
| @@ -2384,7 +2385,7 @@ mod tests { | ||||
|             &document_changes, | ||||
|             embedders, | ||||
|             &|| false, | ||||
|             &|_| (), | ||||
|             &Progress::default(), | ||||
|         ) | ||||
|         .unwrap(); | ||||
|         wtxn.commit().unwrap(); | ||||
| @@ -2428,7 +2429,7 @@ mod tests { | ||||
|                 None, | ||||
|                 &mut new_fields_ids_map, | ||||
|                 &|| false, | ||||
|                 &|_progress| (), | ||||
|                 Progress::default(), | ||||
|             ) | ||||
|             .unwrap(); | ||||
|  | ||||
| @@ -2443,7 +2444,7 @@ mod tests { | ||||
|             &document_changes, | ||||
|             embedders, | ||||
|             &|| false, | ||||
|             &|_| (), | ||||
|             &Progress::default(), | ||||
|         ) | ||||
|         .unwrap(); | ||||
|         wtxn.commit().unwrap(); | ||||
| @@ -2480,7 +2481,7 @@ mod tests { | ||||
|                 None, | ||||
|                 &mut new_fields_ids_map, | ||||
|                 &|| false, | ||||
|                 &|_progress| (), | ||||
|                 Progress::default(), | ||||
|             ) | ||||
|             .unwrap(); | ||||
|  | ||||
| @@ -2495,7 +2496,7 @@ mod tests { | ||||
|             &document_changes, | ||||
|             embedders, | ||||
|             &|| false, | ||||
|             &|_| (), | ||||
|             &Progress::default(), | ||||
|         ) | ||||
|         .unwrap(); | ||||
|         wtxn.commit().unwrap(); | ||||
| @@ -2532,7 +2533,7 @@ mod tests { | ||||
|                 None, | ||||
|                 &mut new_fields_ids_map, | ||||
|                 &|| false, | ||||
|                 &|_progress| (), | ||||
|                 Progress::default(), | ||||
|             ) | ||||
|             .unwrap(); | ||||
|  | ||||
| @@ -2547,7 +2548,7 @@ mod tests { | ||||
|             &document_changes, | ||||
|             embedders, | ||||
|             &|| false, | ||||
|             &|_| (), | ||||
|             &Progress::default(), | ||||
|         ) | ||||
|         .unwrap(); | ||||
|         wtxn.commit().unwrap(); | ||||
| @@ -2726,7 +2727,7 @@ mod tests { | ||||
|                 None, | ||||
|                 &mut new_fields_ids_map, | ||||
|                 &|| false, | ||||
|                 &|_progress| (), | ||||
|                 Progress::default(), | ||||
|             ) | ||||
|             .unwrap(); | ||||
|  | ||||
| @@ -2741,7 +2742,7 @@ mod tests { | ||||
|             &document_changes, | ||||
|             embedders, | ||||
|             &|| false, | ||||
|             &|_| (), | ||||
|             &Progress::default(), | ||||
|         ) | ||||
|         .unwrap(); | ||||
|         wtxn.commit().unwrap(); | ||||
| @@ -2785,7 +2786,7 @@ mod tests { | ||||
|                 None, | ||||
|                 &mut new_fields_ids_map, | ||||
|                 &|| false, | ||||
|                 &|_progress| (), | ||||
|                 Progress::default(), | ||||
|             ) | ||||
|             .unwrap(); | ||||
|  | ||||
| @@ -2800,7 +2801,7 @@ mod tests { | ||||
|             &document_changes, | ||||
|             embedders, | ||||
|             &|| false, | ||||
|             &|_| (), | ||||
|             &Progress::default(), | ||||
|         ) | ||||
|         .unwrap(); | ||||
|         wtxn.commit().unwrap(); | ||||
| @@ -2841,7 +2842,7 @@ mod tests { | ||||
|                 None, | ||||
|                 &mut new_fields_ids_map, | ||||
|                 &|| false, | ||||
|                 &|_progress| (), | ||||
|                 Progress::default(), | ||||
|             ) | ||||
|             .unwrap(); | ||||
|  | ||||
| @@ -2856,7 +2857,7 @@ mod tests { | ||||
|             &document_changes, | ||||
|             embedders, | ||||
|             &|| false, | ||||
|             &|_| (), | ||||
|             &Progress::default(), | ||||
|         ) | ||||
|         .unwrap(); | ||||
|         wtxn.commit().unwrap(); | ||||
|   | ||||
| @@ -16,10 +16,10 @@ use crate::update::del_add::DelAdd; | ||||
| use crate::update::new::channel::FieldIdDocidFacetSender; | ||||
| use crate::update::new::extract::perm_json_p; | ||||
| use crate::update::new::indexer::document_changes::{ | ||||
|     extract, DocumentChangeContext, DocumentChanges, Extractor, IndexingContext, Progress, | ||||
|     extract, DocumentChangeContext, DocumentChanges, Extractor, IndexingContext, | ||||
| }; | ||||
| use crate::update::new::ref_cell_ext::RefCellExt as _; | ||||
| use crate::update::new::steps::Step; | ||||
| use crate::update::new::steps::IndexingStep; | ||||
| use crate::update::new::thread_local::{FullySend, ThreadLocal}; | ||||
| use crate::update::new::DocumentChange; | ||||
| use crate::update::GrenadParameters; | ||||
| @@ -373,26 +373,16 @@ fn truncate_str(s: &str) -> &str { | ||||
|  | ||||
| impl FacetedDocidsExtractor { | ||||
|     #[tracing::instrument(level = "trace", skip_all, target = "indexing::extract::faceted")] | ||||
|     pub fn run_extraction< | ||||
|         'pl, | ||||
|         'fid, | ||||
|         'indexer, | ||||
|         'index, | ||||
|         'extractor, | ||||
|         DC: DocumentChanges<'pl>, | ||||
|         MSP, | ||||
|         SP, | ||||
|     >( | ||||
|     pub fn run_extraction<'pl, 'fid, 'indexer, 'index, 'extractor, DC: DocumentChanges<'pl>, MSP>( | ||||
|         grenad_parameters: GrenadParameters, | ||||
|         document_changes: &DC, | ||||
|         indexing_context: IndexingContext<'fid, 'indexer, 'index, MSP, SP>, | ||||
|         indexing_context: IndexingContext<'fid, 'indexer, 'index, MSP>, | ||||
|         extractor_allocs: &'extractor mut ThreadLocal<FullySend<Bump>>, | ||||
|         sender: &FieldIdDocidFacetSender, | ||||
|         step: Step, | ||||
|         step: IndexingStep, | ||||
|     ) -> Result<Vec<BalancedCaches<'extractor>>> | ||||
|     where | ||||
|         MSP: Fn() -> bool + Sync, | ||||
|         SP: Fn(Progress) + Sync, | ||||
|     { | ||||
|         let index = indexing_context.index; | ||||
|         let rtxn = index.read_txn()?; | ||||
|   | ||||
| @@ -15,23 +15,22 @@ pub use geo::*; | ||||
| pub use searchable::*; | ||||
| pub use vectors::EmbeddingExtractor; | ||||
|  | ||||
| use super::indexer::document_changes::{DocumentChanges, IndexingContext, Progress}; | ||||
| use super::steps::Step; | ||||
| use super::indexer::document_changes::{DocumentChanges, IndexingContext}; | ||||
| use super::steps::IndexingStep; | ||||
| use super::thread_local::{FullySend, ThreadLocal}; | ||||
| use crate::update::GrenadParameters; | ||||
| use crate::Result; | ||||
|  | ||||
| pub trait DocidsExtractor { | ||||
|     fn run_extraction<'pl, 'fid, 'indexer, 'index, 'extractor, DC: DocumentChanges<'pl>, MSP, SP>( | ||||
|     fn run_extraction<'pl, 'fid, 'indexer, 'index, 'extractor, DC: DocumentChanges<'pl>, MSP>( | ||||
|         grenad_parameters: GrenadParameters, | ||||
|         document_changes: &DC, | ||||
|         indexing_context: IndexingContext<'fid, 'indexer, 'index, MSP, SP>, | ||||
|         indexing_context: IndexingContext<'fid, 'indexer, 'index, MSP>, | ||||
|         extractor_allocs: &'extractor mut ThreadLocal<FullySend<Bump>>, | ||||
|         step: Step, | ||||
|         step: IndexingStep, | ||||
|     ) -> Result<Vec<BalancedCaches<'extractor>>> | ||||
|     where | ||||
|         MSP: Fn() -> bool + Sync, | ||||
|         SP: Fn(Progress) + Sync; | ||||
|         MSP: Fn() -> bool + Sync; | ||||
| } | ||||
|  | ||||
| /// TODO move in permissive json pointer | ||||
|   | ||||
| @@ -11,10 +11,10 @@ use super::tokenize_document::{tokenizer_builder, DocumentTokenizer}; | ||||
| use crate::update::new::extract::cache::BalancedCaches; | ||||
| use crate::update::new::extract::perm_json_p::contained_in; | ||||
| use crate::update::new::indexer::document_changes::{ | ||||
|     extract, DocumentChangeContext, DocumentChanges, Extractor, IndexingContext, Progress, | ||||
|     extract, DocumentChangeContext, DocumentChanges, Extractor, IndexingContext, | ||||
| }; | ||||
| use crate::update::new::ref_cell_ext::RefCellExt as _; | ||||
| use crate::update::new::steps::Step; | ||||
| use crate::update::new::steps::IndexingStep; | ||||
| use crate::update::new::thread_local::{FullySend, MostlySend, ThreadLocal}; | ||||
| use crate::update::new::DocumentChange; | ||||
| use crate::update::GrenadParameters; | ||||
| @@ -239,25 +239,15 @@ impl<'a, 'extractor> Extractor<'extractor> for WordDocidsExtractorData<'a> { | ||||
| pub struct WordDocidsExtractors; | ||||
|  | ||||
| impl WordDocidsExtractors { | ||||
|     pub fn run_extraction< | ||||
|         'pl, | ||||
|         'fid, | ||||
|         'indexer, | ||||
|         'index, | ||||
|         'extractor, | ||||
|         DC: DocumentChanges<'pl>, | ||||
|         MSP, | ||||
|         SP, | ||||
|     >( | ||||
|     pub fn run_extraction<'pl, 'fid, 'indexer, 'index, 'extractor, DC: DocumentChanges<'pl>, MSP>( | ||||
|         grenad_parameters: GrenadParameters, | ||||
|         document_changes: &DC, | ||||
|         indexing_context: IndexingContext<'fid, 'indexer, 'index, MSP, SP>, | ||||
|         indexing_context: IndexingContext<'fid, 'indexer, 'index, MSP>, | ||||
|         extractor_allocs: &'extractor mut ThreadLocal<FullySend<Bump>>, | ||||
|         step: Step, | ||||
|         step: IndexingStep, | ||||
|     ) -> Result<WordDocidsCaches<'extractor>> | ||||
|     where | ||||
|         MSP: Fn() -> bool + Sync, | ||||
|         SP: Fn(Progress) + Sync, | ||||
|     { | ||||
|         let index = indexing_context.index; | ||||
|         let rtxn = index.read_txn()?; | ||||
|   | ||||
| @@ -14,9 +14,9 @@ use tokenize_document::{tokenizer_builder, DocumentTokenizer}; | ||||
| use super::cache::BalancedCaches; | ||||
| use super::DocidsExtractor; | ||||
| use crate::update::new::indexer::document_changes::{ | ||||
|     extract, DocumentChangeContext, DocumentChanges, Extractor, IndexingContext, Progress, | ||||
|     extract, DocumentChangeContext, DocumentChanges, Extractor, IndexingContext, | ||||
| }; | ||||
| use crate::update::new::steps::Step; | ||||
| use crate::update::new::steps::IndexingStep; | ||||
| use crate::update::new::thread_local::{FullySend, ThreadLocal}; | ||||
| use crate::update::new::DocumentChange; | ||||
| use crate::update::GrenadParameters; | ||||
| @@ -56,16 +56,15 @@ impl<'a, 'extractor, EX: SearchableExtractor + Sync> Extractor<'extractor> | ||||
| } | ||||
|  | ||||
| pub trait SearchableExtractor: Sized + Sync { | ||||
|     fn run_extraction<'pl, 'fid, 'indexer, 'index, 'extractor, DC: DocumentChanges<'pl>, MSP, SP>( | ||||
|     fn run_extraction<'pl, 'fid, 'indexer, 'index, 'extractor, DC: DocumentChanges<'pl>, MSP>( | ||||
|         grenad_parameters: GrenadParameters, | ||||
|         document_changes: &DC, | ||||
|         indexing_context: IndexingContext<'fid, 'indexer, 'index, MSP, SP>, | ||||
|         indexing_context: IndexingContext<'fid, 'indexer, 'index, MSP>, | ||||
|         extractor_allocs: &'extractor mut ThreadLocal<FullySend<Bump>>, | ||||
|         step: Step, | ||||
|         step: IndexingStep, | ||||
|     ) -> Result<Vec<BalancedCaches<'extractor>>> | ||||
|     where | ||||
|         MSP: Fn() -> bool + Sync, | ||||
|         SP: Fn(Progress) + Sync, | ||||
|     { | ||||
|         let rtxn = indexing_context.index.read_txn()?; | ||||
|         let stop_words = indexing_context.index.stop_words(&rtxn)?; | ||||
| @@ -134,16 +133,15 @@ pub trait SearchableExtractor: Sized + Sync { | ||||
| } | ||||
|  | ||||
| impl<T: SearchableExtractor> DocidsExtractor for T { | ||||
|     fn run_extraction<'pl, 'fid, 'indexer, 'index, 'extractor, DC: DocumentChanges<'pl>, MSP, SP>( | ||||
|     fn run_extraction<'pl, 'fid, 'indexer, 'index, 'extractor, DC: DocumentChanges<'pl>, MSP>( | ||||
|         grenad_parameters: GrenadParameters, | ||||
|         document_changes: &DC, | ||||
|         indexing_context: IndexingContext<'fid, 'indexer, 'index, MSP, SP>, | ||||
|         indexing_context: IndexingContext<'fid, 'indexer, 'index, MSP>, | ||||
|         extractor_allocs: &'extractor mut ThreadLocal<FullySend<Bump>>, | ||||
|         step: Step, | ||||
|         step: IndexingStep, | ||||
|     ) -> Result<Vec<BalancedCaches<'extractor>>> | ||||
|     where | ||||
|         MSP: Fn() -> bool + Sync, | ||||
|         SP: Fn(Progress) + Sync, | ||||
|     { | ||||
|         Self::run_extraction( | ||||
|             grenad_parameters, | ||||
|   | ||||
| @@ -1,4 +1,5 @@ | ||||
| use std::cell::{Cell, RefCell}; | ||||
| use std::sync::atomic::Ordering; | ||||
| use std::sync::{Arc, RwLock}; | ||||
|  | ||||
| use bumpalo::Bump; | ||||
| @@ -7,8 +8,9 @@ use rayon::iter::IndexedParallelIterator; | ||||
|  | ||||
| use super::super::document_change::DocumentChange; | ||||
| use crate::fields_ids_map::metadata::FieldIdMapWithMetadata; | ||||
| use crate::progress::{AtomicDocumentStep, Progress}; | ||||
| use crate::update::new::parallel_iterator_ext::ParallelIteratorExt as _; | ||||
| use crate::update::new::steps::Step; | ||||
| use crate::update::new::steps::IndexingStep; | ||||
| use crate::update::new::thread_local::{FullySend, MostlySend, ThreadLocal}; | ||||
| use crate::{FieldsIdsMap, GlobalFieldsIdsMap, Index, InternalError, Result}; | ||||
|  | ||||
| @@ -133,10 +135,8 @@ pub struct IndexingContext< | ||||
|     'indexer, // covariant lifetime of objects that are borrowed  during the entire indexing operation | ||||
|     'index,   // covariant lifetime of the index | ||||
|     MSP, | ||||
|     SP, | ||||
| > where | ||||
|     MSP: Fn() -> bool + Sync, | ||||
|     SP: Fn(Progress) + Sync, | ||||
| { | ||||
|     pub index: &'index Index, | ||||
|     pub db_fields_ids_map: &'indexer FieldsIdsMap, | ||||
| @@ -144,7 +144,8 @@ pub struct IndexingContext< | ||||
|     pub doc_allocs: &'indexer ThreadLocal<FullySend<Cell<Bump>>>, | ||||
|     pub fields_ids_map_store: &'indexer ThreadLocal<FullySend<RefCell<GlobalFieldsIdsMap<'fid>>>>, | ||||
|     pub must_stop_processing: &'indexer MSP, | ||||
|     pub send_progress: &'indexer SP, | ||||
|     // TODO: TAMO: Rename field to progress | ||||
|     pub send_progress: &'indexer Progress, | ||||
| } | ||||
|  | ||||
| impl< | ||||
| @@ -152,18 +153,15 @@ impl< | ||||
|         'indexer, // covariant lifetime of objects that are borrowed  during the entire indexing operation | ||||
|         'index,   // covariant lifetime of the index | ||||
|         MSP, | ||||
|         SP, | ||||
|     > Copy | ||||
|     for IndexingContext< | ||||
|         'fid,     // invariant lifetime of fields ids map | ||||
|         'indexer, // covariant lifetime of objects that are borrowed  during the entire indexing operation | ||||
|         'index,   // covariant lifetime of the index | ||||
|         MSP, | ||||
|         SP, | ||||
|     > | ||||
| where | ||||
|     MSP: Fn() -> bool + Sync, | ||||
|     SP: Fn(Progress) + Sync, | ||||
| { | ||||
| } | ||||
|  | ||||
| @@ -172,18 +170,15 @@ impl< | ||||
|         'indexer, // covariant lifetime of objects that are borrowed  during the entire indexing operation | ||||
|         'index,   // covariant lifetime of the index | ||||
|         MSP, | ||||
|         SP, | ||||
|     > Clone | ||||
|     for IndexingContext< | ||||
|         'fid,     // invariant lifetime of fields ids map | ||||
|         'indexer, // covariant lifetime of objects that are borrowed  during the entire indexing operation | ||||
|         'index,   // covariant lifetime of the index | ||||
|         MSP, | ||||
|         SP, | ||||
|     > | ||||
| where | ||||
|     MSP: Fn() -> bool + Sync, | ||||
|     SP: Fn(Progress) + Sync, | ||||
| { | ||||
|     fn clone(&self) -> Self { | ||||
|         *self | ||||
| @@ -202,7 +197,6 @@ pub fn extract< | ||||
|     EX, | ||||
|     DC: DocumentChanges<'pl>, | ||||
|     MSP, | ||||
|     SP, | ||||
| >( | ||||
|     document_changes: &DC, | ||||
|     extractor: &EX, | ||||
| @@ -214,17 +208,17 @@ pub fn extract< | ||||
|         fields_ids_map_store, | ||||
|         must_stop_processing, | ||||
|         send_progress, | ||||
|     }: IndexingContext<'fid, 'indexer, 'index, MSP, SP>, | ||||
|     }: IndexingContext<'fid, 'indexer, 'index, MSP>, | ||||
|     extractor_allocs: &'extractor mut ThreadLocal<FullySend<Bump>>, | ||||
|     datastore: &'data ThreadLocal<EX::Data>, | ||||
|     step: Step, | ||||
|     step: IndexingStep, | ||||
| ) -> Result<()> | ||||
| where | ||||
|     EX: Extractor<'extractor>, | ||||
|     MSP: Fn() -> bool + Sync, | ||||
|     SP: Fn(Progress) + Sync, | ||||
| { | ||||
|     tracing::trace!("We are resetting the extractor allocators"); | ||||
|     send_progress.update_progress(step); | ||||
|     // Clean up and reuse the extractor allocs | ||||
|     for extractor_alloc in extractor_allocs.iter_mut() { | ||||
|         tracing::trace!("\tWith {} bytes reset", extractor_alloc.0.allocated_bytes()); | ||||
| @@ -232,6 +226,8 @@ where | ||||
|     } | ||||
|  | ||||
|     let total_documents = document_changes.len() as u32; | ||||
|     let (step, progress_step) = AtomicDocumentStep::new(total_documents); | ||||
|     send_progress.update_progress(progress_step); | ||||
|  | ||||
|     let pi = document_changes.iter(CHUNK_SIZE); | ||||
|     pi.enumerate().try_arc_for_each_try_init( | ||||
| @@ -253,7 +249,7 @@ where | ||||
|             } | ||||
|             let finished_documents = (finished_documents * CHUNK_SIZE) as u32; | ||||
|  | ||||
|             (send_progress)(Progress::from_step_substep(step, finished_documents, total_documents)); | ||||
|             step.store(finished_documents, Ordering::Relaxed); | ||||
|  | ||||
|             // Clean up and reuse the document-specific allocator | ||||
|             context.doc_alloc.reset(); | ||||
| @@ -271,32 +267,7 @@ where | ||||
|             res | ||||
|         }, | ||||
|     )?; | ||||
|  | ||||
|     (send_progress)(Progress::from_step_substep(step, total_documents, total_documents)); | ||||
|     step.store(total_documents, Ordering::Relaxed); | ||||
|  | ||||
|     Ok(()) | ||||
| } | ||||
|  | ||||
| pub struct Progress { | ||||
|     pub finished_steps: u16, | ||||
|     pub total_steps: u16, | ||||
|     pub step_name: &'static str, | ||||
|     pub finished_total_substep: Option<(u32, u32)>, | ||||
| } | ||||
|  | ||||
| impl Progress { | ||||
|     pub fn from_step(step: Step) -> Self { | ||||
|         Self { | ||||
|             finished_steps: step.finished_steps(), | ||||
|             total_steps: Step::total_steps(), | ||||
|             step_name: step.name(), | ||||
|             finished_total_substep: None, | ||||
|         } | ||||
|     } | ||||
|     pub fn from_step_substep(step: Step, finished_substep: u32, total_substep: u32) -> Self { | ||||
|         Self { | ||||
|             finished_total_substep: Some((finished_substep, total_substep)), | ||||
|             ..Progress::from_step(step) | ||||
|         } | ||||
|     } | ||||
| } | ||||
|   | ||||
| @@ -92,11 +92,12 @@ mod test { | ||||
|  | ||||
|     use crate::fields_ids_map::metadata::{FieldIdMapWithMetadata, MetadataBuilder}; | ||||
|     use crate::index::tests::TempIndex; | ||||
|     use crate::progress::Progress; | ||||
|     use crate::update::new::indexer::document_changes::{ | ||||
|         extract, DocumentChangeContext, Extractor, IndexingContext, | ||||
|     }; | ||||
|     use crate::update::new::indexer::DocumentDeletion; | ||||
|     use crate::update::new::steps::Step; | ||||
|     use crate::update::new::steps::IndexingStep; | ||||
|     use crate::update::new::thread_local::{MostlySend, ThreadLocal}; | ||||
|     use crate::update::new::DocumentChange; | ||||
|     use crate::DocumentId; | ||||
| @@ -164,7 +165,7 @@ mod test { | ||||
|             doc_allocs: &doc_allocs, | ||||
|             fields_ids_map_store: &fields_ids_map_store, | ||||
|             must_stop_processing: &(|| false), | ||||
|             send_progress: &(|_progress| {}), | ||||
|             send_progress: &Progress::default(), | ||||
|         }; | ||||
|  | ||||
|         for _ in 0..3 { | ||||
| @@ -176,7 +177,7 @@ mod test { | ||||
|                 context, | ||||
|                 &mut extractor_allocs, | ||||
|                 &datastore, | ||||
|                 Step::ExtractingDocuments, | ||||
|                 IndexingStep::ExtractingDocuments, | ||||
|             ) | ||||
|             .unwrap(); | ||||
|  | ||||
|   | ||||
| @@ -1,3 +1,5 @@ | ||||
| use std::sync::atomic::Ordering; | ||||
|  | ||||
| use bumpalo::collections::CollectIn; | ||||
| use bumpalo::Bump; | ||||
| use bumparaw_collections::RawMap; | ||||
| @@ -10,11 +12,12 @@ use serde_json::value::RawValue; | ||||
| use serde_json::Deserializer; | ||||
|  | ||||
| use super::super::document_change::DocumentChange; | ||||
| use super::document_changes::{DocumentChangeContext, DocumentChanges, Progress}; | ||||
| use super::document_changes::{DocumentChangeContext, DocumentChanges}; | ||||
| use super::retrieve_or_guess_primary_key; | ||||
| use crate::documents::PrimaryKey; | ||||
| use crate::progress::{AtomicSubStep, Progress}; | ||||
| use crate::update::new::document::Versions; | ||||
| use crate::update::new::steps::Step; | ||||
| use crate::update::new::steps::IndexingStep; | ||||
| use crate::update::new::thread_local::MostlySend; | ||||
| use crate::update::new::{Deletion, Insertion, Update}; | ||||
| use crate::update::{AvailableIds, IndexDocumentsMethod}; | ||||
| @@ -45,7 +48,7 @@ impl<'pl> DocumentOperation<'pl> { | ||||
|  | ||||
|     #[allow(clippy::too_many_arguments)] | ||||
|     #[tracing::instrument(level = "trace", skip_all, target = "indexing::document_operation")] | ||||
|     pub fn into_changes<MSP, SP>( | ||||
|     pub fn into_changes<MSP>( | ||||
|         self, | ||||
|         indexer: &'pl Bump, | ||||
|         index: &Index, | ||||
| @@ -53,12 +56,12 @@ impl<'pl> DocumentOperation<'pl> { | ||||
|         primary_key_from_op: Option<&'pl str>, | ||||
|         new_fields_ids_map: &mut FieldsIdsMap, | ||||
|         must_stop_processing: &MSP, | ||||
|         send_progress: &SP, | ||||
|         progress: Progress, | ||||
|     ) -> Result<(DocumentOperationChanges<'pl>, Vec<PayloadStats>, Option<PrimaryKey<'pl>>)> | ||||
|     where | ||||
|         MSP: Fn() -> bool, | ||||
|         SP: Fn(Progress), | ||||
|     { | ||||
|         progress.update_progress(IndexingStep::PreparingPayloads); | ||||
|         let Self { operations, method } = self; | ||||
|  | ||||
|         let documents_ids = index.documents_ids(rtxn)?; | ||||
| @@ -68,16 +71,15 @@ impl<'pl> DocumentOperation<'pl> { | ||||
|         let mut primary_key = None; | ||||
|  | ||||
|         let payload_count = operations.len(); | ||||
|         let (step, progress_step) = | ||||
|             AtomicSubStep::<crate::progress::Document>::new(payload_count as u32); | ||||
|         progress.update_progress(progress_step); | ||||
|  | ||||
|         for (payload_index, operation) in operations.into_iter().enumerate() { | ||||
|             if must_stop_processing() { | ||||
|                 return Err(InternalError::AbortedIndexation.into()); | ||||
|             } | ||||
|             send_progress(Progress::from_step_substep( | ||||
|                 Step::PreparingPayloads, | ||||
|                 payload_index as u32, | ||||
|                 payload_count as u32, | ||||
|             )); | ||||
|             step.store(payload_index as u32, Ordering::Relaxed); | ||||
|  | ||||
|             let mut bytes = 0; | ||||
|             let result = match operation { | ||||
| @@ -118,12 +120,7 @@ impl<'pl> DocumentOperation<'pl> { | ||||
|             }; | ||||
|             operations_stats.push(PayloadStats { document_count, bytes, error }); | ||||
|         } | ||||
|  | ||||
|         send_progress(Progress::from_step_substep( | ||||
|             Step::PreparingPayloads, | ||||
|             payload_count as u32, | ||||
|             payload_count as u32, | ||||
|         )); | ||||
|         step.store(payload_count as u32, Ordering::Relaxed); | ||||
|  | ||||
|         // TODO We must drain the HashMap into a Vec because rayon::hash_map::IntoIter: !Clone | ||||
|         let mut docids_version_offsets: bumpalo::collections::vec::Vec<_> = | ||||
|   | ||||
| @@ -5,7 +5,7 @@ use std::thread::{self, Builder}; | ||||
|  | ||||
| use big_s::S; | ||||
| use bumparaw_collections::RawMap; | ||||
| use document_changes::{extract, DocumentChanges, IndexingContext, Progress}; | ||||
| use document_changes::{extract, DocumentChanges, IndexingContext}; | ||||
| pub use document_deletion::DocumentDeletion; | ||||
| pub use document_operation::{DocumentOperation, PayloadStats}; | ||||
| use hashbrown::HashMap; | ||||
| @@ -22,7 +22,7 @@ use super::channel::*; | ||||
| use super::extract::*; | ||||
| use super::facet_search_builder::FacetSearchBuilder; | ||||
| use super::merger::FacetFieldIdsDelta; | ||||
| use super::steps::Step; | ||||
| use super::steps::IndexingStep; | ||||
| use super::thread_local::ThreadLocal; | ||||
| use super::word_fst_builder::{PrefixData, PrefixDelta, WordFstBuilder}; | ||||
| use super::words_prefix_docids::{ | ||||
| @@ -33,6 +33,7 @@ use crate::documents::{PrimaryKey, DEFAULT_PRIMARY_KEY}; | ||||
| use crate::facet::FacetType; | ||||
| use crate::fields_ids_map::metadata::{FieldIdMapWithMetadata, MetadataBuilder}; | ||||
| use crate::index::main_key::{WORDS_FST_KEY, WORDS_PREFIXES_FST_KEY}; | ||||
| use crate::progress::Progress; | ||||
| use crate::proximity::ProximityPrecision; | ||||
| use crate::update::del_add::DelAdd; | ||||
| use crate::update::new::extract::EmbeddingExtractor; | ||||
| @@ -60,7 +61,7 @@ mod update_by_function; | ||||
| /// | ||||
| /// TODO return stats | ||||
| #[allow(clippy::too_many_arguments)] // clippy: 😝 | ||||
| pub fn index<'pl, 'indexer, 'index, DC, MSP, SP>( | ||||
| pub fn index<'pl, 'indexer, 'index, DC, MSP>( | ||||
|     wtxn: &mut RwTxn, | ||||
|     index: &'index Index, | ||||
|     pool: &ThreadPoolNoAbort, | ||||
| @@ -71,12 +72,11 @@ pub fn index<'pl, 'indexer, 'index, DC, MSP, SP>( | ||||
|     document_changes: &DC, | ||||
|     embedders: EmbeddingConfigs, | ||||
|     must_stop_processing: &'indexer MSP, | ||||
|     send_progress: &'indexer SP, | ||||
|     send_progress: &'indexer Progress, | ||||
| ) -> Result<()> | ||||
| where | ||||
|     DC: DocumentChanges<'pl>, | ||||
|     MSP: Fn() -> bool + Sync, | ||||
|     SP: Fn(Progress) + Sync, | ||||
| { | ||||
|     let mut bbbuffers = Vec::new(); | ||||
|     let finished_extraction = AtomicBool::new(false); | ||||
| @@ -159,7 +159,7 @@ where | ||||
|                         indexing_context, | ||||
|                         &mut extractor_allocs, | ||||
|                         &datastore, | ||||
|                         Step::ExtractingDocuments, | ||||
|                         IndexingStep::ExtractingDocuments, | ||||
|                     )?; | ||||
|                 } | ||||
|                 { | ||||
| @@ -191,7 +191,7 @@ where | ||||
|                                 indexing_context, | ||||
|                                 &mut extractor_allocs, | ||||
|                                 &extractor_sender.field_id_docid_facet_sender(), | ||||
|                                 Step::ExtractingFacets | ||||
|                                 IndexingStep::ExtractingFacets | ||||
|                             )? | ||||
|                     }; | ||||
|  | ||||
| @@ -224,7 +224,7 @@ where | ||||
|                             document_changes, | ||||
|                             indexing_context, | ||||
|                             &mut extractor_allocs, | ||||
|                             Step::ExtractingWords | ||||
|                             IndexingStep::ExtractingWords | ||||
|                         )? | ||||
|                     }; | ||||
|  | ||||
| @@ -302,7 +302,7 @@ where | ||||
|                             document_changes, | ||||
|                             indexing_context, | ||||
|                             &mut extractor_allocs, | ||||
|                             Step::ExtractingWordProximity, | ||||
|                             IndexingStep::ExtractingWordProximity, | ||||
|                         )? | ||||
|                     }; | ||||
|  | ||||
| @@ -338,7 +338,7 @@ where | ||||
|                             indexing_context, | ||||
|                             &mut extractor_allocs, | ||||
|                             &datastore, | ||||
|                             Step::ExtractingEmbeddings, | ||||
|                             IndexingStep::ExtractingEmbeddings, | ||||
|                         )?; | ||||
|                     } | ||||
|                     { | ||||
| @@ -371,7 +371,7 @@ where | ||||
|                             indexing_context, | ||||
|                             &mut extractor_allocs, | ||||
|                             &datastore, | ||||
|                             Step::WritingGeoPoints | ||||
|                             IndexingStep::WritingGeoPoints | ||||
|                         )?; | ||||
|                     } | ||||
|  | ||||
| @@ -383,9 +383,7 @@ where | ||||
|                         &indexing_context.must_stop_processing, | ||||
|                     )?; | ||||
|                 } | ||||
|  | ||||
|                 (indexing_context.send_progress)(Progress::from_step(Step::WritingToDatabase)); | ||||
|  | ||||
|                 indexing_context.send_progress.update_progress(IndexingStep::WritingToDatabase); | ||||
|                 finished_extraction.store(true, std::sync::atomic::Ordering::Relaxed); | ||||
|  | ||||
|                 Result::Ok((facet_field_ids_delta, index_embeddings)) | ||||
| @@ -485,7 +483,7 @@ where | ||||
|             )?; | ||||
|         } | ||||
|  | ||||
|         (indexing_context.send_progress)(Progress::from_step(Step::WaitingForExtractors)); | ||||
|         indexing_context.send_progress.update_progress(IndexingStep::WaitingForExtractors); | ||||
|  | ||||
|         let (facet_field_ids_delta, index_embeddings) = extractor_handle.join().unwrap()?; | ||||
|  | ||||
| @@ -498,10 +496,9 @@ where | ||||
|                 break 'vectors; | ||||
|             } | ||||
|  | ||||
|             (indexing_context.send_progress)(Progress::from_step( | ||||
|                 Step::WritingEmbeddingsToDatabase, | ||||
|             )); | ||||
|  | ||||
|             indexing_context | ||||
|                 .send_progress | ||||
|                 .update_progress(IndexingStep::WritingEmbeddingsToDatabase); | ||||
|             let mut rng = rand::rngs::StdRng::seed_from_u64(42); | ||||
|             for (_index, (_embedder_name, _embedder, writer, dimensions)) in &mut arroy_writers { | ||||
|                 let dimensions = *dimensions; | ||||
| @@ -517,21 +514,19 @@ where | ||||
|             index.put_embedding_configs(wtxn, index_embeddings)?; | ||||
|         } | ||||
|  | ||||
|         (indexing_context.send_progress)(Progress::from_step(Step::PostProcessingFacets)); | ||||
|  | ||||
|         indexing_context.send_progress.update_progress(IndexingStep::PostProcessingFacets); | ||||
|         if index.facet_search(wtxn)? { | ||||
|             compute_facet_search_database(index, wtxn, global_fields_ids_map)?; | ||||
|         } | ||||
|  | ||||
|         compute_facet_level_database(index, wtxn, facet_field_ids_delta)?; | ||||
|  | ||||
|         (indexing_context.send_progress)(Progress::from_step(Step::PostProcessingWords)); | ||||
|  | ||||
|         indexing_context.send_progress.update_progress(IndexingStep::PostProcessingWords); | ||||
|         if let Some(prefix_delta) = compute_word_fst(index, wtxn)? { | ||||
|             compute_prefix_database(index, wtxn, prefix_delta, grenad_parameters)?; | ||||
|         } | ||||
|  | ||||
|         (indexing_context.send_progress)(Progress::from_step(Step::Finalizing)); | ||||
|         indexing_context.send_progress.update_progress(IndexingStep::Finalizing); | ||||
|  | ||||
|         Ok(()) as Result<_> | ||||
|     })?; | ||||
|   | ||||
| @@ -1,8 +1,12 @@ | ||||
| use std::borrow::Cow; | ||||
|  | ||||
| use enum_iterator::Sequence; | ||||
|  | ||||
| use crate::progress::Step; | ||||
|  | ||||
| #[derive(Debug, Clone, Copy, PartialEq, Eq, Sequence)] | ||||
| #[repr(u16)] | ||||
| pub enum Step { | ||||
| #[repr(u8)] | ||||
| pub enum IndexingStep { | ||||
|     PreparingPayloads, | ||||
|     ExtractingDocuments, | ||||
|     ExtractingFacets, | ||||
| @@ -18,30 +22,31 @@ pub enum Step { | ||||
|     Finalizing, | ||||
| } | ||||
|  | ||||
| impl Step { | ||||
|     pub fn name(&self) -> &'static str { | ||||
| impl Step for IndexingStep { | ||||
|     fn name(&self) -> Cow<'static, str> { | ||||
|         match self { | ||||
|             Step::PreparingPayloads => "preparing update file", | ||||
|             Step::ExtractingDocuments => "extracting documents", | ||||
|             Step::ExtractingFacets => "extracting facets", | ||||
|             Step::ExtractingWords => "extracting words", | ||||
|             Step::ExtractingWordProximity => "extracting word proximity", | ||||
|             Step::ExtractingEmbeddings => "extracting embeddings", | ||||
|             Step::WritingGeoPoints => "writing geo points", | ||||
|             Step::WritingToDatabase => "writing to database", | ||||
|             Step::WaitingForExtractors => "waiting for extractors", | ||||
|             Step::WritingEmbeddingsToDatabase => "writing embeddings to database", | ||||
|             Step::PostProcessingFacets => "post-processing facets", | ||||
|             Step::PostProcessingWords => "post-processing words", | ||||
|             Step::Finalizing => "finalizing", | ||||
|             IndexingStep::PreparingPayloads => "preparing update file", | ||||
|             IndexingStep::ExtractingDocuments => "extracting documents", | ||||
|             IndexingStep::ExtractingFacets => "extracting facets", | ||||
|             IndexingStep::ExtractingWords => "extracting words", | ||||
|             IndexingStep::ExtractingWordProximity => "extracting word proximity", | ||||
|             IndexingStep::ExtractingEmbeddings => "extracting embeddings", | ||||
|             IndexingStep::WritingGeoPoints => "writing geo points", | ||||
|             IndexingStep::WritingToDatabase => "writing to database", | ||||
|             IndexingStep::WaitingForExtractors => "waiting for extractors", | ||||
|             IndexingStep::WritingEmbeddingsToDatabase => "writing embeddings to database", | ||||
|             IndexingStep::PostProcessingFacets => "post-processing facets", | ||||
|             IndexingStep::PostProcessingWords => "post-processing words", | ||||
|             IndexingStep::Finalizing => "finalizing", | ||||
|         } | ||||
|         .into() | ||||
|     } | ||||
|  | ||||
|     pub fn finished_steps(self) -> u16 { | ||||
|         self as u16 | ||||
|     fn current(&self) -> u32 { | ||||
|         *self as u32 | ||||
|     } | ||||
|  | ||||
|     pub const fn total_steps() -> u16 { | ||||
|         Self::CARDINALITY as u16 | ||||
|     fn total(&self) -> u32 { | ||||
|         Self::CARDINALITY as u32 | ||||
|     } | ||||
| } | ||||
|   | ||||
| @@ -3,6 +3,7 @@ use bumpalo::Bump; | ||||
| use heed::EnvOpenOptions; | ||||
| use maplit::hashset; | ||||
| use milli::documents::mmap_from_objects; | ||||
| use milli::progress::Progress; | ||||
| use milli::update::new::indexer; | ||||
| use milli::update::{IndexDocumentsMethod, IndexerConfig, Settings}; | ||||
| use milli::vector::EmbeddingConfigs; | ||||
| @@ -57,7 +58,7 @@ fn test_facet_distribution_with_no_facet_values() { | ||||
|             None, | ||||
|             &mut new_fields_ids_map, | ||||
|             &|| false, | ||||
|             &|_progress| (), | ||||
|             Progress::default(), | ||||
|         ) | ||||
|         .unwrap(); | ||||
|  | ||||
| @@ -72,7 +73,7 @@ fn test_facet_distribution_with_no_facet_values() { | ||||
|         &document_changes, | ||||
|         embedders, | ||||
|         &|| false, | ||||
|         &|_| (), | ||||
|         &Progress::default(), | ||||
|     ) | ||||
|     .unwrap(); | ||||
|  | ||||
|   | ||||
| @@ -7,6 +7,7 @@ use bumpalo::Bump; | ||||
| use either::{Either, Left, Right}; | ||||
| use heed::EnvOpenOptions; | ||||
| use maplit::{btreemap, hashset}; | ||||
| use milli::progress::Progress; | ||||
| use milli::update::new::indexer; | ||||
| use milli::update::{IndexDocumentsMethod, IndexerConfig, Settings}; | ||||
| use milli::vector::EmbeddingConfigs; | ||||
| @@ -90,7 +91,7 @@ pub fn setup_search_index_with_criteria(criteria: &[Criterion]) -> Index { | ||||
|             None, | ||||
|             &mut new_fields_ids_map, | ||||
|             &|| false, | ||||
|             &|_progress| (), | ||||
|             Progress::default(), | ||||
|         ) | ||||
|         .unwrap(); | ||||
|  | ||||
| @@ -109,7 +110,7 @@ pub fn setup_search_index_with_criteria(criteria: &[Criterion]) -> Index { | ||||
|         &document_changes, | ||||
|         embedders, | ||||
|         &|| false, | ||||
|         &|_| (), | ||||
|         &Progress::default(), | ||||
|     ) | ||||
|     .unwrap(); | ||||
|  | ||||
|   | ||||
| @@ -5,6 +5,7 @@ use bumpalo::Bump; | ||||
| use heed::EnvOpenOptions; | ||||
| use itertools::Itertools; | ||||
| use maplit::hashset; | ||||
| use milli::progress::Progress; | ||||
| use milli::update::new::indexer; | ||||
| use milli::update::{IndexDocumentsMethod, IndexerConfig, Settings}; | ||||
| use milli::vector::EmbeddingConfigs; | ||||
| @@ -326,7 +327,7 @@ fn criteria_ascdesc() { | ||||
|             None, | ||||
|             &mut new_fields_ids_map, | ||||
|             &|| false, | ||||
|             &|_progress| (), | ||||
|             Progress::default(), | ||||
|         ) | ||||
|         .unwrap(); | ||||
|  | ||||
| @@ -341,7 +342,7 @@ fn criteria_ascdesc() { | ||||
|         &document_changes, | ||||
|         embedders, | ||||
|         &|| false, | ||||
|         &|_| (), | ||||
|         &Progress::default(), | ||||
|     ) | ||||
|     .unwrap(); | ||||
|  | ||||
|   | ||||
| @@ -3,6 +3,7 @@ use std::collections::BTreeSet; | ||||
| use bumpalo::Bump; | ||||
| use heed::EnvOpenOptions; | ||||
| use milli::documents::mmap_from_objects; | ||||
| use milli::progress::Progress; | ||||
| use milli::update::new::indexer; | ||||
| use milli::update::{IndexDocumentsMethod, IndexerConfig, Settings}; | ||||
| use milli::vector::EmbeddingConfigs; | ||||
| @@ -135,7 +136,7 @@ fn test_typo_disabled_on_word() { | ||||
|             None, | ||||
|             &mut new_fields_ids_map, | ||||
|             &|| false, | ||||
|             &|_progress| (), | ||||
|             Progress::default(), | ||||
|         ) | ||||
|         .unwrap(); | ||||
|  | ||||
| @@ -150,7 +151,7 @@ fn test_typo_disabled_on_word() { | ||||
|         &document_changes, | ||||
|         embedders, | ||||
|         &|| false, | ||||
|         &|_| (), | ||||
|         &Progress::default(), | ||||
|     ) | ||||
|     .unwrap(); | ||||
|  | ||||
|   | ||||
		Reference in New Issue
	
	Block a user