From 9a3c26bb0a2101dd2f3d4ebc460c1fa7f3333935 Mon Sep 17 00:00:00 2001 From: discord9 Date: Thu, 27 Feb 2025 14:26:58 +0800 Subject: [PATCH] metrics: better bucket&longer timeout --- src/flow/src/metrics.rs | 18 ++++++++++++++++-- src/flow/src/recording_rules/engine.rs | 4 ++-- 2 files changed, 18 insertions(+), 4 deletions(-) diff --git a/src/flow/src/metrics.rs b/src/flow/src/metrics.rs index 21a02683c9..2144519ed1 100644 --- a/src/flow/src/metrics.rs +++ b/src/flow/src/metrics.rs @@ -31,13 +31,27 @@ lazy_static! { pub static ref METRIC_FLOW_RULE_ENGINE_QUERY_TIME: HistogramVec = register_histogram_vec!( "greptime_flow_rule_engine_query_time", "flow rule engine query time", - &["flow_id"] + &["flow_id"], + vec![ + 0.0, + 1., + 3., + 5., + 10., + 20., + 30., + 60., + 2. * 60., + 5. * 60., + 10. * 60. + ] ) .unwrap(); pub static ref METRIC_FLOW_RULE_ENGINE_SLOW_QUERY: HistogramVec = register_histogram_vec!( "greptime_flow_rule_engine_slow_query", "flow rule engine slow query", - &["flow_id", "sql", "peer"] + &["flow_id", "sql", "peer"], + vec![60., 2. * 60., 3. * 60., 5. * 60., 10. * 60.] ) .unwrap(); pub static ref METRIC_FLOW_RUN_INTERVAL_MS: IntGauge = diff --git a/src/flow/src/recording_rules/engine.rs b/src/flow/src/recording_rules/engine.rs index d3c0fa0d40..e5a59862e9 100644 --- a/src/flow/src/recording_rules/engine.rs +++ b/src/flow/src/recording_rules/engine.rs @@ -38,8 +38,8 @@ use crate::recording_rules::{find_plan_time_window_bound, sql_to_df_plan}; use crate::Error; /// TODO(discord9): make those constants configurable -/// The default rule engine query timeout is 5 minutes -pub const DEFAULT_RULE_ENGINE_QUERY_TIMEOUT: Duration = Duration::from_secs(5 * 60); +/// The default rule engine query timeout is 10 minutes +pub const DEFAULT_RULE_ENGINE_QUERY_TIMEOUT: Duration = Duration::from_secs(10 * 60); /// will output a warn log for any query that runs for more that 1 minutes, and also every 1 minutes when that query is still running pub const SLOW_QUERY_THRESHOLD: Duration = Duration::from_secs(60);