Add a log on the time taken by the incremental facet updating

Disable the facet search
Merge #4318
2025-07-21 13:51:05 +00:00 · 2024-01-25 17:48:31 +01:00 · 2024-01-25 17:47:33 +01:00 · 2024-01-15 09:37:31 +00:00 · 2024-01-11 21:35:30 +01:00 · 2024-01-11 21:35:06 +01:00
5 changed files with 96 additions and 108 deletions
--- a/meilisearch-types/src/settings.rs
+++ b/meilisearch-types/src/settings.rs
@ -600,11 +600,12 @@ pub fn settings(
        ),
    };

-    let embedders = index
+    let embedders: BTreeMap<_, _> = index
        .embedding_configs(rtxn)?
        .into_iter()
        .map(|(name, config)| (name, Setting::Set(config.into())))
        .collect();
+    let embedders = if embedders.is_empty() { Setting::NotSet } else { Setting::Set(embedders) };

    Ok(Settings {
        displayed_attributes: match displayed_attributes {
@ -631,7 +632,7 @@ pub fn settings(
        typo_tolerance: Setting::Set(typo_tolerance),
        faceting: Setting::Set(faceting),
        pagination: Setting::Set(pagination),
-        embedders: Setting::Set(embedders),
+        embedders,
        _kind: PhantomData,
    })
 }
--- a/meilisearch/tests/dumps/mod.rs
+++ b/meilisearch/tests/dumps/mod.rs
@ -77,8 +77,7 @@ async fn import_dump_v1_movie_raw() {
      },
      "pagination": {
        "maxTotalHits": 1000
-      },
-      "embedders": {}
+      }
    }
    "###
    );
@ -239,8 +238,7 @@ async fn import_dump_v1_movie_with_settings() {
      },
      "pagination": {
        "maxTotalHits": 1000
-      },
-      "embedders": {}
+      }
    }
    "###
    );
@ -387,8 +385,7 @@ async fn import_dump_v1_rubygems_with_settings() {
      },
      "pagination": {
        "maxTotalHits": 1000
-      },
-      "embedders": {}
+      }
    }
    "###
    );
@ -521,8 +518,7 @@ async fn import_dump_v2_movie_raw() {
      },
      "pagination": {
        "maxTotalHits": 1000
-      },
-      "embedders": {}
+      }
    }
    "###
    );
@ -667,8 +663,7 @@ async fn import_dump_v2_movie_with_settings() {
      },
      "pagination": {
        "maxTotalHits": 1000
-      },
-      "embedders": {}
+      }
    }
    "###
    );
@ -812,8 +807,7 @@ async fn import_dump_v2_rubygems_with_settings() {
      },
      "pagination": {
        "maxTotalHits": 1000
-      },
-      "embedders": {}
+      }
    }
    "###
    );
@ -946,8 +940,7 @@ async fn import_dump_v3_movie_raw() {
      },
      "pagination": {
        "maxTotalHits": 1000
-      },
-      "embedders": {}
+      }
    }
    "###
    );
@ -1092,8 +1085,7 @@ async fn import_dump_v3_movie_with_settings() {
      },
      "pagination": {
        "maxTotalHits": 1000
-      },
-      "embedders": {}
+      }
    }
    "###
    );
@ -1237,8 +1229,7 @@ async fn import_dump_v3_rubygems_with_settings() {
      },
      "pagination": {
        "maxTotalHits": 1000
-      },
-      "embedders": {}
+      }
    }
    "###
    );
@ -1371,8 +1362,7 @@ async fn import_dump_v4_movie_raw() {
      },
      "pagination": {
        "maxTotalHits": 1000
-      },
-      "embedders": {}
+      }
    }
    "###
    );
@ -1517,8 +1507,7 @@ async fn import_dump_v4_movie_with_settings() {
      },
      "pagination": {
        "maxTotalHits": 1000
-      },
-      "embedders": {}
+      }
    }
    "###
    );
@ -1662,8 +1651,7 @@ async fn import_dump_v4_rubygems_with_settings() {
      },
      "pagination": {
        "maxTotalHits": 1000
-      },
-      "embedders": {}
+      }
    }
    "###
    );
@ -1907,8 +1895,7 @@ async fn import_dump_v6_containing_experimental_features() {
      },
      "pagination": {
        "maxTotalHits": 1000
-      },
-      "embedders": {}
+      }
    }
    "###);

--- a/meilisearch/tests/settings/get_settings.rs
+++ b/meilisearch/tests/settings/get_settings.rs
@ -54,7 +54,7 @@ async fn get_settings() {
    let (response, code) = index.settings().await;
    assert_eq!(code, 200);
    let settings = response.as_object().unwrap();
-    assert_eq!(settings.keys().len(), 16);
+    assert_eq!(settings.keys().len(), 15);
    assert_eq!(settings["displayedAttributes"], json!(["*"]));
    assert_eq!(settings["searchableAttributes"], json!(["*"]));
    assert_eq!(settings["filterableAttributes"], json!([]));
@ -83,7 +83,6 @@ async fn get_settings() {
            "maxTotalHits": 1000,
        })
    );
-    assert_eq!(settings["embedders"], json!({}));
    assert_eq!(settings["proximityPrecision"], json!("byWord"));
 }

--- a/milli/src/update/facet/incremental.rs
+++ b/milli/src/update/facet/incremental.rs
@ -61,6 +61,7 @@ impl FacetsUpdateIncremental {
        }
    }

+    #[logging_timer::time("FacetsUpdateIncremental::{}")]
    pub fn execute(self, wtxn: &mut RwTxn) -> crate::Result<()> {
        let mut cursor = self.delta_data.into_cursor()?;
        while let Some((key, value)) = cursor.move_on_next()? {
--- a/milli/src/update/facet/mod.rs
+++ b/milli/src/update/facet/mod.rs
@ -170,91 +170,91 @@ impl<'i> FacetsUpdate<'i> {
            incremental_update.execute(wtxn)?;
        }

-        // We clear the list of normalized-for-search facets
-        // and the previous FSTs to compute everything from scratch
-        self.index.facet_id_normalized_string_strings.clear(wtxn)?;
-        self.index.facet_id_string_fst.clear(wtxn)?;
+        // // We clear the list of normalized-for-search facets
+        // // and the previous FSTs to compute everything from scratch
+        // self.index.facet_id_normalized_string_strings.clear(wtxn)?;
+        // self.index.facet_id_string_fst.clear(wtxn)?;

-        // As we can't use the same write transaction to read and write in two different databases
-        // we must create a temporary sorter that we will write into LMDB afterward.
-        // As multiple unnormalized facet values can become the same normalized facet value
-        // we must merge them together.
-        let mut sorter = create_sorter(
-            SortAlgorithm::Unstable,
-            merge_btreeset_string,
-            CompressionType::None,
-            None,
-            None,
-            None,
-        );
+        // // As we can't use the same write transaction to read and write in two different databases
+        // // we must create a temporary sorter that we will write into LMDB afterward.
+        // // As multiple unnormalized facet values can become the same normalized facet value
+        // // we must merge them together.
+        // let mut sorter = create_sorter(
+        //     SortAlgorithm::Unstable,
+        //     merge_btreeset_string,
+        //     CompressionType::None,
+        //     None,
+        //     None,
+        //     None,
+        // );

-        // We iterate on the list of original, semi-normalized, facet values
-        // and normalize them for search, inserting them in LMDB in any given order.
-        let options = NormalizerOption { lossy: true, ..Default::default() };
-        let database = self.index.facet_id_string_docids.remap_data_type::<DecodeIgnore>();
-        for result in database.iter(wtxn)? {
-            let (facet_group_key, ()) = result?;
-            if let FacetGroupKey { field_id, level: 0, left_bound } = facet_group_key {
-                let mut normalized_facet = left_bound.normalize(&options);
-                let normalized_truncated_facet: String;
-                if normalized_facet.len() > MAX_FACET_VALUE_LENGTH {
-                    normalized_truncated_facet = normalized_facet
-                        .char_indices()
-                        .take_while(|(idx, _)| *idx < MAX_FACET_VALUE_LENGTH)
-                        .map(|(_, c)| c)
-                        .collect();
-                    normalized_facet = normalized_truncated_facet.into();
-                }
-                let set = BTreeSet::from_iter(std::iter::once(left_bound));
-                let key = (field_id, normalized_facet.as_ref());
-                let key = BEU16StrCodec::bytes_encode(&key).map_err(heed::Error::Encoding)?;
-                let val = SerdeJson::bytes_encode(&set).map_err(heed::Error::Encoding)?;
-                sorter.insert(key, val)?;
-            }
-        }
+        // // We iterate on the list of original, semi-normalized, facet values
+        // // and normalize them for search, inserting them in LMDB in any given order.
+        // let options = NormalizerOption { lossy: true, ..Default::default() };
+        // let database = self.index.facet_id_string_docids.remap_data_type::<DecodeIgnore>();
+        // for result in database.iter(wtxn)? {
+        //     let (facet_group_key, ()) = result?;
+        //     if let FacetGroupKey { field_id, level: 0, left_bound } = facet_group_key {
+        //         let mut normalized_facet = left_bound.normalize(&options);
+        //         let normalized_truncated_facet: String;
+        //         if normalized_facet.len() > MAX_FACET_VALUE_LENGTH {
+        //             normalized_truncated_facet = normalized_facet
+        //                 .char_indices()
+        //                 .take_while(|(idx, _)| *idx < MAX_FACET_VALUE_LENGTH)
+        //                 .map(|(_, c)| c)
+        //                 .collect();
+        //             normalized_facet = normalized_truncated_facet.into();
+        //         }
+        //         let set = BTreeSet::from_iter(std::iter::once(left_bound));
+        //         let key = (field_id, normalized_facet.as_ref());
+        //         let key = BEU16StrCodec::bytes_encode(&key).map_err(heed::Error::Encoding)?;
+        //         let val = SerdeJson::bytes_encode(&set).map_err(heed::Error::Encoding)?;
+        //         sorter.insert(key, val)?;
+        //     }
+        // }

-        // In this loop we don't need to take care of merging bitmaps
-        // as the grenad sorter already merged them for us.
-        let mut merger_iter = sorter.into_stream_merger_iter()?;
-        while let Some((key_bytes, btreeset_bytes)) = merger_iter.next()? {
-            self.index.facet_id_normalized_string_strings.remap_types::<Bytes, Bytes>().put(
-                wtxn,
-                key_bytes,
-                btreeset_bytes,
-            )?;
-        }
+        // // In this loop we don't need to take care of merging bitmaps
+        // // as the grenad sorter already merged them for us.
+        // let mut merger_iter = sorter.into_stream_merger_iter()?;
+        // while let Some((key_bytes, btreeset_bytes)) = merger_iter.next()? {
+        //     self.index.facet_id_normalized_string_strings.remap_types::<Bytes, Bytes>().put(
+        //         wtxn,
+        //         key_bytes,
+        //         btreeset_bytes,
+        //     )?;
+        // }

-        // We compute one FST by string facet
-        let mut text_fsts = vec![];
-        let mut current_fst: Option<(u16, fst::SetBuilder<Vec<u8>>)> = None;
-        let database =
-            self.index.facet_id_normalized_string_strings.remap_data_type::<DecodeIgnore>();
-        for result in database.iter(wtxn)? {
-            let ((field_id, normalized_facet), _) = result?;
-            current_fst = match current_fst.take() {
-                Some((fid, fst_builder)) if fid != field_id => {
-                    let fst = fst_builder.into_set();
-                    text_fsts.push((fid, fst));
-                    Some((field_id, fst::SetBuilder::memory()))
-                }
-                Some((field_id, fst_builder)) => Some((field_id, fst_builder)),
-                None => Some((field_id, fst::SetBuilder::memory())),
-            };
+        // // We compute one FST by string facet
+        // let mut text_fsts = vec![];
+        // let mut current_fst: Option<(u16, fst::SetBuilder<Vec<u8>>)> = None;
+        // let database =
+        //     self.index.facet_id_normalized_string_strings.remap_data_type::<DecodeIgnore>();
+        // for result in database.iter(wtxn)? {
+        //     let ((field_id, normalized_facet), _) = result?;
+        //     current_fst = match current_fst.take() {
+        //         Some((fid, fst_builder)) if fid != field_id => {
+        //             let fst = fst_builder.into_set();
+        //             text_fsts.push((fid, fst));
+        //             Some((field_id, fst::SetBuilder::memory()))
+        //         }
+        //         Some((field_id, fst_builder)) => Some((field_id, fst_builder)),
+        //         None => Some((field_id, fst::SetBuilder::memory())),
+        //     };

-            if let Some((_, fst_builder)) = current_fst.as_mut() {
-                fst_builder.insert(normalized_facet)?;
-            }
-        }
+        //     if let Some((_, fst_builder)) = current_fst.as_mut() {
+        //         fst_builder.insert(normalized_facet)?;
+        //     }
+        // }

-        if let Some((field_id, fst_builder)) = current_fst {
-            let fst = fst_builder.into_set();
-            text_fsts.push((field_id, fst));
-        }
+        // if let Some((field_id, fst_builder)) = current_fst {
+        //     let fst = fst_builder.into_set();
+        //     text_fsts.push((field_id, fst));
+        // }

-        // We write those FSTs in LMDB now
-        for (field_id, fst) in text_fsts {
-            self.index.facet_id_string_fst.put(wtxn, &field_id, &fst)?;
-        }
+        // // We write those FSTs in LMDB now
+        // for (field_id, fst) in text_fsts {
+        //     self.index.facet_id_string_fst.put(wtxn, &field_id, &fst)?;
+        // }

        Ok(())
    }
Author	SHA1	Message	Date
Clément Renault	bceaf4f981	Add a log on the time taken by the incremental facet updating	2024-01-25 17:48:31 +01:00
Clément Renault	d29b301618	Disable the facet search	2024-01-25 17:47:33 +01:00
meili-bors[bot]	a6fa0b97ec	Merge #4318 4318: Hide embedders r=ManyTheFish a=dureuill Hides `embedders` when it is an empty dictionary. Manual tests: - getting settings with empty embedders: not displayed - getting settings with non-empty embedders: displayed like before - dump with empty embedders: can be imported - dump with non-empty embedders: can be imported Co-authored-by: Louis Dureuil <louis@meilisearch.com>	2024-01-15 09:37:31 +00:00
Louis Dureuil	38abfec611	Fix tests	2024-01-11 21:35:30 +01:00
Louis Dureuil	84a5c304fc	Don't display the embedders setting when it is an empty dict	2024-01-11 21:35:06 +01:00