fix: unable to record slow query (#6590)

* refactor: add process manager for prometheus query

Signed-off-by: zyy17 <zyylsxm@gmail.com>

* refactor: modify `register_query()` API to accept parsed statement(`catalog::process_manager::QueryStatement`)

Signed-off-by: zyy17 <zyylsxm@gmail.com>

* refactor: add the slow query timer in the `Tikcet` of ProcessManager

Signed-off-by: zyy17 <zyylsxm@gmail.com>

* test: add integration tests

Signed-off-by: zyy17 <zyylsxm@gmail.com>

* refactor: add process manager in `do_exec_plan()`

Signed-off-by: zyy17 <zyylsxm@gmail.com>

* tests: add `test_postgres_slow_query` integration test

Signed-off-by: zyy17 <zyylsxm@gmail.com>

* chore: polish the code

Signed-off-by: zyy17 <zyylsxm@gmail.com>

* refactor: create a query ticket and slow query timer if the statement is a query in `query_statement()`

Signed-off-by: zyy17 <zyylsxm@gmail.com>

* fix: sqlness errors

Signed-off-by: zyy17 <zyylsxm@gmail.com>

---------

Signed-off-by: zyy17 <zyylsxm@gmail.com>
This commit is contained in:
zyy17
2025-08-05 20:35:12 -07:00
committed by GitHub
parent cc35bab5e4
commit 3a9f0220b5
25 changed files with 525 additions and 177 deletions

View File

@@ -15,6 +15,7 @@
use std::collections::BTreeMap;
use std::io::Write;
use std::str::FromStr;
use std::time::Duration;
use api::prom_store::remote::label_matcher::Type as MatcherType;
use api::prom_store::remote::{
@@ -23,10 +24,13 @@ use api::prom_store::remote::{
use auth::user_provider_from_option;
use axum::http::{HeaderName, HeaderValue, StatusCode};
use chrono::Utc;
use common_catalog::consts::{trace_services_table_name, TRACE_TABLE_NAME};
use common_catalog::consts::{
trace_services_table_name, DEFAULT_PRIVATE_SCHEMA_NAME, TRACE_TABLE_NAME,
};
use common_error::status_code::StatusCode as ErrorCode;
use flate2::write::GzEncoder;
use flate2::Compression;
use frontend::slow_query_recorder::{SLOW_QUERY_TABLE_NAME, SLOW_QUERY_TABLE_QUERY_COLUMN_NAME};
use log_query::{Context, Limit, LogQuery, TimeFilter};
use loki_proto::logproto::{EntryAdapter, LabelPairAdapter, PushRequest, StreamAdapter};
use loki_proto::prost_types::Timestamp;
@@ -55,6 +59,7 @@ use tests_integration::test_util::{
setup_test_http_app_with_frontend_and_user_provider, setup_test_prom_app_with_frontend,
StorageType,
};
use urlencoding::encode;
use yaml_rust::YamlLoader;
#[macro_export]
@@ -88,6 +93,7 @@ macro_rules! http_tests {
test_http_auth,
test_sql_api,
test_http_sql_slow_query,
test_prometheus_promql_api,
test_prom_http_api,
test_metrics_api,
@@ -542,6 +548,29 @@ pub async fn test_sql_api(store_type: StorageType) {
guard.remove_all().await;
}
pub async fn test_http_sql_slow_query(store_type: StorageType) {
let (app, mut guard) = setup_test_http_app_with_frontend(store_type, "sql_api").await;
let client = TestClient::new(app).await;
let slow_query = "WITH RECURSIVE slow_cte AS (SELECT 1 AS n, md5(random()) AS hash UNION ALL SELECT n + 1, md5(concat(hash, n)) FROM slow_cte WHERE n < 4500) SELECT COUNT(*) FROM slow_cte";
let encoded_slow_query = encode(slow_query);
let query_params = format!("/v1/sql?sql={encoded_slow_query}");
let res = client.get(&query_params).send().await;
assert_eq!(res.status(), StatusCode::OK);
// Wait for the slow query to be recorded.
tokio::time::sleep(Duration::from_secs(5)).await;
let table = format!("{}.{}", DEFAULT_PRIVATE_SCHEMA_NAME, SLOW_QUERY_TABLE_NAME);
let query = format!("SELECT {} FROM {table}", SLOW_QUERY_TABLE_QUERY_COLUMN_NAME);
let expected = format!(r#"[["{}"]]"#, slow_query);
validate_data("test_http_sql_slow_query", &client, &query, &expected).await;
guard.remove_all().await;
}
pub async fn test_prometheus_promql_api(store_type: StorageType) {
let (app, mut guard) = setup_test_http_app_with_frontend(store_type, "promql_api").await;
let client = TestClient::new(app).await;
@@ -1305,7 +1334,7 @@ write_interval = "30s"
[slow_query]
enable = true
record_type = "system_table"
threshold = "30s"
threshold = "1s"
sample_ratio = 1.0
ttl = "30d"

View File

@@ -16,6 +16,12 @@ use std::collections::HashMap;
use auth::user_provider_from_option;
use chrono::{DateTime, NaiveDate, NaiveDateTime, SecondsFormat, Utc};
use common_catalog::consts::DEFAULT_PRIVATE_SCHEMA_NAME;
use frontend::slow_query_recorder::{
SLOW_QUERY_TABLE_COST_COLUMN_NAME, SLOW_QUERY_TABLE_IS_PROMQL_COLUMN_NAME,
SLOW_QUERY_TABLE_NAME, SLOW_QUERY_TABLE_QUERY_COLUMN_NAME,
SLOW_QUERY_TABLE_THRESHOLD_COLUMN_NAME,
};
use sqlx::mysql::{MySqlConnection, MySqlDatabaseError, MySqlPoolOptions};
use sqlx::postgres::{PgDatabaseError, PgPoolOptions};
use sqlx::{Connection, Executor, Row};
@@ -64,10 +70,12 @@ macro_rules! sql_tests {
test_mysql_crud,
test_mysql_timezone,
test_mysql_async_timestamp,
test_mysql_slow_query,
test_postgres_auth,
test_postgres_crud,
test_postgres_timezone,
test_postgres_bytea,
test_postgres_slow_query,
test_postgres_datestyle,
test_postgres_parameter_inference,
test_postgres_array_types,
@@ -580,6 +588,56 @@ pub async fn test_postgres_crud(store_type: StorageType) {
let _ = fe_pg_server.shutdown().await;
guard.remove_all().await;
}
pub async fn test_mysql_slow_query(store_type: StorageType) {
common_telemetry::init_default_ut_logging();
let (mut guard, fe_mysql_server) =
setup_mysql_server(store_type, "test_mysql_slow_query").await;
let addr = fe_mysql_server.bind_addr().unwrap().to_string();
let pool = MySqlPoolOptions::new()
.max_connections(2)
.connect(&format!("mysql://{addr}/public"))
.await
.unwrap();
// The slow query will run at least longer than 1s.
let slow_query = "WITH RECURSIVE slow_cte AS (SELECT 1 AS n, md5(random()) AS hash UNION ALL SELECT n + 1, md5(concat(hash, n)) FROM slow_cte WHERE n < 4500) SELECT COUNT(*) FROM slow_cte";
// Simulate a slow query.
sqlx::query(slow_query).fetch_all(&pool).await.unwrap();
// Wait for the slow query to be recorded.
tokio::time::sleep(std::time::Duration::from_secs(5)).await;
let table = format!("{}.{}", DEFAULT_PRIVATE_SCHEMA_NAME, SLOW_QUERY_TABLE_NAME);
let query = format!(
"SELECT {}, {}, {}, {} FROM {table}",
SLOW_QUERY_TABLE_COST_COLUMN_NAME,
SLOW_QUERY_TABLE_THRESHOLD_COLUMN_NAME,
SLOW_QUERY_TABLE_QUERY_COLUMN_NAME,
SLOW_QUERY_TABLE_IS_PROMQL_COLUMN_NAME
);
let rows = sqlx::query(&query).fetch_all(&pool).await.unwrap();
assert_eq!(rows.len(), 1);
// Check the results.
let row = &rows[0];
let cost: u64 = row.get(0);
let threshold: u64 = row.get(1);
let query: String = row.get(2);
let is_promql: bool = row.get(3);
assert!(cost > 0 && threshold > 0 && cost > threshold);
assert_eq!(query, slow_query);
assert!(!is_promql);
let _ = fe_mysql_server.shutdown().await;
guard.remove_all().await;
}
pub async fn test_postgres_bytea(store_type: StorageType) {
let (mut guard, fe_pg_server) = setup_pg_server(store_type, "test_postgres_bytea").await;
let addr = fe_pg_server.bind_addr().unwrap().to_string();
@@ -650,6 +708,46 @@ pub async fn test_postgres_bytea(store_type: StorageType) {
guard.remove_all().await;
}
pub async fn test_postgres_slow_query(store_type: StorageType) {
let (mut guard, fe_pg_server) = setup_pg_server(store_type, "test_postgres_slow_query").await;
let addr = fe_pg_server.bind_addr().unwrap().to_string();
let pool = PgPoolOptions::new()
.max_connections(2)
.connect(&format!("postgres://{addr}/public"))
.await
.unwrap();
let slow_query = "WITH RECURSIVE slow_cte AS (SELECT 1 AS n, md5(random()) AS hash UNION ALL SELECT n + 1, md5(concat(hash, n)) FROM slow_cte WHERE n < 4500) SELECT COUNT(*) FROM slow_cte";
let _ = sqlx::query(slow_query).fetch_all(&pool).await.unwrap();
// Wait for the slow query to be recorded.
tokio::time::sleep(std::time::Duration::from_secs(5)).await;
let table = format!("{}.{}", DEFAULT_PRIVATE_SCHEMA_NAME, SLOW_QUERY_TABLE_NAME);
let query = format!(
"SELECT {}, {}, {}, {} FROM {table}",
SLOW_QUERY_TABLE_COST_COLUMN_NAME,
SLOW_QUERY_TABLE_THRESHOLD_COLUMN_NAME,
SLOW_QUERY_TABLE_QUERY_COLUMN_NAME,
SLOW_QUERY_TABLE_IS_PROMQL_COLUMN_NAME
);
let rows = sqlx::query(&query).fetch_all(&pool).await.unwrap();
assert_eq!(rows.len(), 1);
let row = &rows[0];
let cost: i64 = row.get(0);
let threshold: i64 = row.get(1);
let query: String = row.get(2);
let is_promql: bool = row.get(3);
assert!(cost > 0 && threshold > 0 && cost > threshold);
assert_eq!(query, slow_query);
assert!(!is_promql);
let _ = fe_pg_server.shutdown().await;
guard.remove_all().await;
}
pub async fn test_postgres_datestyle(store_type: StorageType) {
let (mut guard, fe_pg_server) = setup_pg_server(store_type, "test_postgres_datestyle").await;
let addr = fe_pg_server.bind_addr().unwrap().to_string();