make sure we NEVER ever write the cli defined webhook to the database or dumps

This commit is contained in:
Tamo
2025-08-05 18:55:32 +02:00
parent 1ff6da63e8
commit 899be9c3ff
9 changed files with 130 additions and 63 deletions

View File

@ -25,7 +25,7 @@ pub type Key = meilisearch_types::keys::Key;
pub type ChatCompletionSettings = meilisearch_types::features::ChatCompletionSettings; pub type ChatCompletionSettings = meilisearch_types::features::ChatCompletionSettings;
pub type RuntimeTogglableFeatures = meilisearch_types::features::RuntimeTogglableFeatures; pub type RuntimeTogglableFeatures = meilisearch_types::features::RuntimeTogglableFeatures;
pub type Network = meilisearch_types::features::Network; pub type Network = meilisearch_types::features::Network;
pub type Webhooks = meilisearch_types::webhooks::Webhooks; pub type Webhooks = meilisearch_types::webhooks::WebhooksDumpView;
// ===== Other types to clarify the code of the compat module // ===== Other types to clarify the code of the compat module
// everything related to the tasks // everything related to the tasks

View File

@ -8,7 +8,7 @@ use meilisearch_types::batches::Batch;
use meilisearch_types::features::{ChatCompletionSettings, Network, RuntimeTogglableFeatures}; use meilisearch_types::features::{ChatCompletionSettings, Network, RuntimeTogglableFeatures};
use meilisearch_types::keys::Key; use meilisearch_types::keys::Key;
use meilisearch_types::settings::{Checked, Settings}; use meilisearch_types::settings::{Checked, Settings};
use meilisearch_types::webhooks::Webhooks; use meilisearch_types::webhooks::WebhooksDumpView;
use serde_json::{Map, Value}; use serde_json::{Map, Value};
use tempfile::TempDir; use tempfile::TempDir;
use time::OffsetDateTime; use time::OffsetDateTime;
@ -75,11 +75,7 @@ impl DumpWriter {
Ok(std::fs::write(self.dir.path().join("network.json"), serde_json::to_string(&network)?)?) Ok(std::fs::write(self.dir.path().join("network.json"), serde_json::to_string(&network)?)?)
} }
pub fn create_webhooks(&self, mut webhooks: Webhooks) -> Result<()> { pub fn create_webhooks(&self, webhooks: WebhooksDumpView) -> Result<()> {
if webhooks == Webhooks::default() {
return Ok(());
}
webhooks.webhooks.remove(&Uuid::nil()); // Don't store the cli webhook
Ok(std::fs::write( Ok(std::fs::write(
self.dir.path().join("webhooks.json"), self.dir.path().join("webhooks.json"),
serde_json::to_string(&webhooks)?, serde_json::to_string(&webhooks)?,

View File

@ -30,9 +30,7 @@ pub fn snapshot_index_scheduler(scheduler: &IndexScheduler) -> String {
index_mapper, index_mapper,
features: _, features: _,
cli_webhook_url: _, webhooks: _,
cli_webhook_authorization: _,
cached_webhooks: _,
test_breakpoint_sdr: _, test_breakpoint_sdr: _,
planned_failures: _, planned_failures: _,
run_loop_iteration: _, run_loop_iteration: _,

View File

@ -65,13 +65,14 @@ use meilisearch_types::milli::vector::{
use meilisearch_types::milli::{self, Index}; use meilisearch_types::milli::{self, Index};
use meilisearch_types::task_view::TaskView; use meilisearch_types::task_view::TaskView;
use meilisearch_types::tasks::{KindWithContent, Task}; use meilisearch_types::tasks::{KindWithContent, Task};
use meilisearch_types::webhooks::{Webhook, Webhooks}; use meilisearch_types::webhooks::{Webhook, WebhooksDumpView, WebhooksView};
use milli::vector::db::IndexEmbeddingConfig; use milli::vector::db::IndexEmbeddingConfig;
use processing::ProcessingTasks; use processing::ProcessingTasks;
pub use queue::Query; pub use queue::Query;
use queue::Queue; use queue::Queue;
use roaring::RoaringBitmap; use roaring::RoaringBitmap;
use scheduler::Scheduler; use scheduler::Scheduler;
use serde::{Deserialize, Serialize};
use time::OffsetDateTime; use time::OffsetDateTime;
use uuid::Uuid; use uuid::Uuid;
use versioning::Versioning; use versioning::Versioning;
@ -184,12 +185,8 @@ pub struct IndexScheduler {
/// A database to store single-keyed data that is persisted across restarts. /// A database to store single-keyed data that is persisted across restarts.
persisted: Database<Str, Str>, persisted: Database<Str, Str>,
/// The webhook url that was set by the CLI. /// Webhook, loaded and stored in the `persisted` database
cli_webhook_url: Option<String>, webhooks: Arc<Webhooks>,
/// The Authorization header to send to the webhook URL that was set by the CLI.
cli_webhook_authorization: Option<String>,
/// Webhook
cached_webhooks: Arc<RwLock<Webhooks>>,
/// A map to retrieve the runtime representation of an embedder depending on its configuration. /// A map to retrieve the runtime representation of an embedder depending on its configuration.
/// ///
@ -231,10 +228,7 @@ impl IndexScheduler {
experimental_no_edition_2024_for_dumps: self.experimental_no_edition_2024_for_dumps, experimental_no_edition_2024_for_dumps: self.experimental_no_edition_2024_for_dumps,
persisted: self.persisted, persisted: self.persisted,
cli_webhook_url: self.cli_webhook_url.clone(), webhooks: self.webhooks.clone(),
cli_webhook_authorization: self.cli_webhook_authorization.clone(),
cached_webhooks: self.cached_webhooks.clone(),
embedders: self.embedders.clone(), embedders: self.embedders.clone(),
#[cfg(test)] #[cfg(test)]
test_breakpoint_sdr: self.test_breakpoint_sdr.clone(), test_breakpoint_sdr: self.test_breakpoint_sdr.clone(),
@ -313,7 +307,8 @@ impl IndexScheduler {
let persisted = env.create_database(&mut wtxn, Some(db_name::PERSISTED))?; let persisted = env.create_database(&mut wtxn, Some(db_name::PERSISTED))?;
let webhooks_db = persisted.remap_data_type::<SerdeJson<Webhooks>>(); let webhooks_db = persisted.remap_data_type::<SerdeJson<Webhooks>>();
let mut webhooks = webhooks_db.get(&wtxn, db_keys::WEBHOOKS)?.unwrap_or_default(); let mut webhooks = webhooks_db.get(&wtxn, db_keys::WEBHOOKS)?.unwrap_or_default();
webhooks.webhooks.remove(&Uuid::nil()); // remove the cli webhook if it was saved by mistake webhooks
.with_cli(options.cli_webhook_url.clone(), options.cli_webhook_authorization.clone());
wtxn.commit()?; wtxn.commit()?;
@ -331,11 +326,7 @@ impl IndexScheduler {
.indexer_config .indexer_config
.experimental_no_edition_2024_for_dumps, .experimental_no_edition_2024_for_dumps,
persisted, persisted,
webhooks: Arc::new(webhooks),
cached_webhooks: Arc::new(RwLock::new(webhooks)),
cli_webhook_url: options.cli_webhook_url,
cli_webhook_authorization: options.cli_webhook_authorization,
embedders: Default::default(), embedders: Default::default(),
#[cfg(test)] #[cfg(test)]
@ -829,8 +820,8 @@ impl IndexScheduler {
} }
} }
let webhooks = self.webhooks(); let webhooks = self.webhooks.get_all();
if webhooks.webhooks.is_empty() { if webhooks.is_empty() {
return; return;
} }
let this = self.private_clone(); let this = self.private_clone();
@ -845,7 +836,7 @@ impl IndexScheduler {
}; };
std::thread::spawn(move || { std::thread::spawn(move || {
for (uuid, Webhook { url, headers }) in webhooks.webhooks.iter() { for (uuid, Webhook { url, headers }) in webhooks.iter() {
let task_reader = TaskReader { let task_reader = TaskReader {
rtxn: &rtxn, rtxn: &rtxn,
index_scheduler: &this, index_scheduler: &this,
@ -899,27 +890,27 @@ impl IndexScheduler {
self.features.network() self.features.network()
} }
pub fn put_webhooks(&self, mut webhooks: Webhooks) -> Result<()> { pub fn update_runtime_webhooks(&self, runtime: RuntimeWebhooks) -> Result<()> {
let webhooks = Webhooks::from_runtime(runtime);
let mut wtxn = self.env.write_txn()?; let mut wtxn = self.env.write_txn()?;
let webhooks_db = self.persisted.remap_data_type::<SerdeJson<Webhooks>>(); let webhooks_db = self.persisted.remap_data_type::<SerdeJson<Webhooks>>();
webhooks.webhooks.remove(&Uuid::nil()); // the cli webhook must not be saved
webhooks_db.put(&mut wtxn, db_keys::WEBHOOKS, &webhooks)?; webhooks_db.put(&mut wtxn, db_keys::WEBHOOKS, &webhooks)?;
wtxn.commit()?; wtxn.commit()?;
*self.cached_webhooks.write().unwrap() = webhooks; self.webhooks.update_runtime(webhooks.into_runtime());
Ok(()) Ok(())
} }
pub fn webhooks(&self) -> Webhooks { pub fn webhooks_dump_view(&self) -> WebhooksDumpView {
let webhooks = self.cached_webhooks.read().unwrap_or_else(|poisoned| poisoned.into_inner()); // We must not dump the cli api key
let mut webhooks = Webhooks::clone(&*webhooks); WebhooksDumpView { webhooks: self.webhooks.get_runtime() }
if let Some(url) = self.cli_webhook_url.as_ref().cloned() { }
let mut headers = BTreeMap::new();
if let Some(auth) = self.cli_webhook_authorization.as_ref().cloned() { pub fn webhooks_view(&self) -> WebhooksView {
headers.insert(String::from("Authorization"), auth); WebhooksView { webhooks: self.webhooks.get_all() }
} }
webhooks.webhooks.insert(Uuid::nil(), Webhook { url, headers });
} pub fn retrieve_runtime_webhooks(&self) -> RuntimeWebhooks {
webhooks self.webhooks.get_runtime()
} }
pub fn embedders( pub fn embedders(
@ -1050,3 +1041,72 @@ pub struct IndexStats {
/// Internal stats computed from the index. /// Internal stats computed from the index.
pub inner_stats: index_mapper::IndexStats, pub inner_stats: index_mapper::IndexStats,
} }
/// These structure are not meant to be exposed to the end user, if needed, use the meilisearch-types::webhooks structure instead.
/// /!\ Everytime you deserialize this structure you should fill the cli_webhook later on with the `with_cli` method. /!\
#[derive(Debug, Serialize, Deserialize, Default)]
#[serde(rename_all = "camelCase")]
struct Webhooks {
// The cli webhook should *never* be stored in a database.
// It represent a state that only exists for this execution of meilisearch
#[serde(skip)]
pub cli: Option<CliWebhook>,
#[serde(default)]
pub runtime: RwLock<RuntimeWebhooks>,
}
type RuntimeWebhooks = BTreeMap<Uuid, Webhook>;
impl Webhooks {
pub fn with_cli(&mut self, url: Option<String>, auth: Option<String>) {
if let Some(url) = url {
let webhook = CliWebhook { url, auth };
self.cli = Some(webhook);
}
}
pub fn from_runtime(webhooks: RuntimeWebhooks) -> Self {
Self { cli: None, runtime: RwLock::new(webhooks) }
}
pub fn into_runtime(self) -> RuntimeWebhooks {
// safe because we own self and it cannot be cloned
self.runtime.into_inner().unwrap()
}
pub fn update_runtime(&self, webhooks: RuntimeWebhooks) {
*self.runtime.write().unwrap() = webhooks;
}
/// Returns all the webhooks in an unified view. The cli webhook is represented with an uuid set to 0
pub fn get_all(&self) -> BTreeMap<Uuid, Webhook> {
self.cli
.as_ref()
.map(|wh| (Uuid::nil(), Webhook::from(wh)))
.into_iter()
.chain(self.runtime.read().unwrap().iter().map(|(uuid, wh)| (*uuid, wh.clone())))
.collect()
}
/// Returns all the runtime webhooks.
pub fn get_runtime(&self) -> BTreeMap<Uuid, Webhook> {
self.runtime.read().unwrap().iter().map(|(uuid, wh)| (*uuid, wh.clone())).collect()
}
}
#[derive(Debug, Serialize, Deserialize, Default, Clone, PartialEq)]
struct CliWebhook {
pub url: String,
pub auth: Option<String>,
}
impl From<&CliWebhook> for Webhook {
fn from(webhook: &CliWebhook) -> Self {
let mut headers = BTreeMap::new();
if let Some(ref auth) = webhook.auth {
headers.insert("Authorization".to_string(), auth.to_string());
}
Self { url: webhook.url.to_string(), headers }
}
}

View File

@ -272,7 +272,7 @@ impl IndexScheduler {
// 7. Dump the webhooks // 7. Dump the webhooks
progress.update_progress(DumpCreationProgress::DumpTheWebhooks); progress.update_progress(DumpCreationProgress::DumpTheWebhooks);
let webhooks = self.webhooks(); let webhooks = self.webhooks_dump_view();
dump.create_webhooks(webhooks)?; dump.create_webhooks(webhooks)?;
let dump_uid = started_at.format(format_description!( let dump_uid = started_at.format(format_description!(

View File

@ -11,9 +11,18 @@ pub struct Webhook {
pub headers: BTreeMap<String, String>, pub headers: BTreeMap<String, String>,
} }
#[derive(Debug, Serialize, Deserialize, Default, Clone, PartialEq)] #[derive(Debug, Serialize, Default, Clone, PartialEq)]
#[serde(rename_all = "camelCase")] #[serde(rename_all = "camelCase")]
pub struct Webhooks { pub struct WebhooksView {
#[serde(default)]
pub webhooks: BTreeMap<Uuid, Webhook>,
}
// Same as the WebhooksView instead it should never contains the CLI webhooks.
// It's the right structure to use in the dump
#[derive(Debug, Deserialize, Serialize, Default, Clone, PartialEq)]
#[serde(rename_all = "camelCase")]
pub struct WebhooksDumpView {
#[serde(default)] #[serde(default)]
pub webhooks: BTreeMap<Uuid, Webhook>, pub webhooks: BTreeMap<Uuid, Webhook>,
} }

View File

@ -493,7 +493,7 @@ fn import_dump(
// 2. Import the webhooks // 2. Import the webhooks
if let Some(webhooks) = dump_reader.webhooks() { if let Some(webhooks) = dump_reader.webhooks() {
index_scheduler.put_webhooks(webhooks.clone())?; index_scheduler.update_runtime_webhooks(webhooks.webhooks.clone())?;
} }
// 3. Import the `Key`s. // 3. Import the `Key`s.

View File

@ -146,7 +146,7 @@ pub(super) struct WebhookResults {
async fn get_webhooks( async fn get_webhooks(
index_scheduler: GuardedData<ActionPolicy<{ actions::WEBHOOKS_GET }>, Data<IndexScheduler>>, index_scheduler: GuardedData<ActionPolicy<{ actions::WEBHOOKS_GET }>, Data<IndexScheduler>>,
) -> Result<HttpResponse, ResponseError> { ) -> Result<HttpResponse, ResponseError> {
let webhooks = index_scheduler.webhooks(); let webhooks = index_scheduler.webhooks_view();
let results = webhooks let results = webhooks
.webhooks .webhooks
.into_iter() .into_iter()
@ -326,7 +326,7 @@ async fn get_webhook(
uuid: Path<String>, uuid: Path<String>,
) -> Result<HttpResponse, ResponseError> { ) -> Result<HttpResponse, ResponseError> {
let uuid = Uuid::from_str(&uuid.into_inner()).map_err(InvalidUuid)?; let uuid = Uuid::from_str(&uuid.into_inner()).map_err(InvalidUuid)?;
let mut webhooks = index_scheduler.webhooks(); let mut webhooks = index_scheduler.webhooks_view();
let webhook = webhooks.webhooks.remove(&uuid).ok_or(WebhookNotFound(uuid))?; let webhook = webhooks.webhooks.remove(&uuid).ok_or(WebhookNotFound(uuid))?;
let webhook = WebhookWithMetadata::from(uuid, webhook); let webhook = WebhookWithMetadata::from(uuid, webhook);
@ -368,8 +368,8 @@ async fn post_webhook(
return Err(TooManyHeaders(uuid).into()); return Err(TooManyHeaders(uuid).into());
} }
let mut webhooks = index_scheduler.webhooks(); let mut webhooks = index_scheduler.retrieve_runtime_webhooks();
if webhooks.webhooks.len() >= 20 { if webhooks.len() >= 20 {
return Err(TooManyWebhooks.into()); return Err(TooManyWebhooks.into());
} }
@ -383,8 +383,8 @@ async fn post_webhook(
}; };
check_changed(uuid, &webhook)?; check_changed(uuid, &webhook)?;
webhooks.webhooks.insert(uuid, webhook.clone()); webhooks.insert(uuid, webhook.clone());
index_scheduler.put_webhooks(webhooks)?; index_scheduler.update_runtime_webhooks(webhooks)?;
analytics.publish(PatchWebhooksAnalytics::post_webhook(), &req); analytics.publish(PatchWebhooksAnalytics::post_webhook(), &req);
@ -426,13 +426,17 @@ async fn patch_webhook(
let webhook_settings = webhook_settings.into_inner(); let webhook_settings = webhook_settings.into_inner();
debug!(parameters = ?(uuid, &webhook_settings), "Patch webhook"); debug!(parameters = ?(uuid, &webhook_settings), "Patch webhook");
let mut webhooks = index_scheduler.webhooks(); if uuid.is_nil() {
let old_webhook = webhooks.webhooks.remove(&uuid).ok_or(WebhookNotFound(uuid))?; return Err(ImmutableWebhook(uuid).into());
}
let mut webhooks = index_scheduler.retrieve_runtime_webhooks();
let old_webhook = webhooks.remove(&uuid).ok_or(WebhookNotFound(uuid))?;
let webhook = patch_webhook_inner(&uuid, old_webhook, webhook_settings)?; let webhook = patch_webhook_inner(&uuid, old_webhook, webhook_settings)?;
check_changed(uuid, &webhook)?; check_changed(uuid, &webhook)?;
webhooks.webhooks.insert(uuid, webhook.clone()); webhooks.insert(uuid, webhook.clone());
index_scheduler.put_webhooks(webhooks)?; index_scheduler.update_runtime_webhooks(webhooks)?;
analytics.publish(PatchWebhooksAnalytics::patch_webhook(), &req); analytics.publish(PatchWebhooksAnalytics::patch_webhook(), &req);
@ -468,9 +472,9 @@ async fn delete_webhook(
return Err(ImmutableWebhook(uuid).into()); return Err(ImmutableWebhook(uuid).into());
} }
let mut webhooks = index_scheduler.webhooks(); let mut webhooks = index_scheduler.retrieve_runtime_webhooks();
webhooks.webhooks.remove(&uuid).ok_or(WebhookNotFound(uuid))?; webhooks.remove(&uuid).ok_or(WebhookNotFound(uuid))?;
index_scheduler.put_webhooks(webhooks)?; index_scheduler.update_runtime_webhooks(webhooks)?;
analytics.publish(PatchWebhooksAnalytics::delete_webhook(), &req); analytics.publish(PatchWebhooksAnalytics::delete_webhook(), &req);

View File

@ -283,7 +283,6 @@ async fn reserved_names() {
let (value, code) = server let (value, code) = server
.patch_webhook(Uuid::nil().to_string(), json!({ "url": "http://localhost:8080" })) .patch_webhook(Uuid::nil().to_string(), json!({ "url": "http://localhost:8080" }))
.await; .await;
snapshot!(code, @"400 Bad Request");
snapshot!(value, @r#" snapshot!(value, @r#"
{ {
"message": "Webhook `[uuid]` is immutable. The webhook defined from the command line cannot be modified using the API.", "message": "Webhook `[uuid]` is immutable. The webhook defined from the command line cannot be modified using the API.",
@ -292,9 +291,9 @@ async fn reserved_names() {
"link": "https://docs.meilisearch.com/errors#immutable_webhook" "link": "https://docs.meilisearch.com/errors#immutable_webhook"
} }
"#); "#);
snapshot!(code, @"400 Bad Request");
let (value, code) = server.delete_webhook(Uuid::nil().to_string()).await; let (value, code) = server.delete_webhook(Uuid::nil().to_string()).await;
snapshot!(code, @"400 Bad Request");
snapshot!(value, @r#" snapshot!(value, @r#"
{ {
"message": "Webhook `[uuid]` is immutable. The webhook defined from the command line cannot be modified using the API.", "message": "Webhook `[uuid]` is immutable. The webhook defined from the command line cannot be modified using the API.",
@ -303,6 +302,7 @@ async fn reserved_names() {
"link": "https://docs.meilisearch.com/errors#immutable_webhook" "link": "https://docs.meilisearch.com/errors#immutable_webhook"
} }
"#); "#);
snapshot!(code, @"400 Bad Request");
} }
#[actix_web::test] #[actix_web::test]