mirror of
https://github.com/GreptimeTeam/greptimedb.git
synced 2026-05-17 05:20:37 +00:00
feat: add StatementStatistics for slow query logging implementation (#4719)
* feat: log slow query * feat: log slow query for sql * refactor: add slow query logging options * ci: fix errors * feat: add StatementStatistics * chore: revert modification of servers crate * docs: update config docs * fix: clippy errors
This commit is contained in:
1
Cargo.lock
generated
1
Cargo.lock
generated
@@ -2289,6 +2289,7 @@ dependencies = [
|
||||
"common-error",
|
||||
"console-subscriber",
|
||||
"greptime-proto",
|
||||
"humantime-serde",
|
||||
"lazy_static",
|
||||
"once_cell",
|
||||
"opentelemetry 0.21.0",
|
||||
|
||||
@@ -163,6 +163,10 @@
|
||||
| `logging.log_format` | String | `text` | The log format. Can be `text`/`json`. |
|
||||
| `logging.tracing_sample_ratio` | -- | -- | The percentage of tracing will be sampled and exported.<br/>Valid range `[0, 1]`, 1 means all traces are sampled, 0 means all traces are not sampled, the default value is 1.<br/>ratio > 1 are treated as 1. Fractions < 0 are treated as 0 |
|
||||
| `logging.tracing_sample_ratio.default_ratio` | Float | `1.0` | -- |
|
||||
| `logging.slow_query` | -- | -- | The slow query log options. |
|
||||
| `logging.slow_query.enable` | Bool | `false` | Whether to enable slow query log. |
|
||||
| `logging.slow_query.threshold` | String | Unset | The threshold of slow query. |
|
||||
| `logging.slow_query.sample_ratio` | Float | Unset | The sampling ratio of slow query log. The value should be in the range of (0, 1]. |
|
||||
| `export_metrics` | -- | -- | The datanode can export its metrics and send to Prometheus compatible service (e.g. send to `greptimedb` itself) from remote-write API.<br/>This is only used for `greptimedb` to export its own metrics internally. It's different from prometheus scrape. |
|
||||
| `export_metrics.enable` | Bool | `false` | whether enable export metrics. |
|
||||
| `export_metrics.write_interval` | String | `30s` | The interval of export metrics. |
|
||||
@@ -249,6 +253,10 @@
|
||||
| `logging.log_format` | String | `text` | The log format. Can be `text`/`json`. |
|
||||
| `logging.tracing_sample_ratio` | -- | -- | The percentage of tracing will be sampled and exported.<br/>Valid range `[0, 1]`, 1 means all traces are sampled, 0 means all traces are not sampled, the default value is 1.<br/>ratio > 1 are treated as 1. Fractions < 0 are treated as 0 |
|
||||
| `logging.tracing_sample_ratio.default_ratio` | Float | `1.0` | -- |
|
||||
| `logging.slow_query` | -- | -- | The slow query log options. |
|
||||
| `logging.slow_query.enable` | Bool | `false` | Whether to enable slow query log. |
|
||||
| `logging.slow_query.threshold` | String | Unset | The threshold of slow query. |
|
||||
| `logging.slow_query.sample_ratio` | Float | Unset | The sampling ratio of slow query log. The value should be in the range of (0, 1]. |
|
||||
| `export_metrics` | -- | -- | The datanode can export its metrics and send to Prometheus compatible service (e.g. send to `greptimedb` itself) from remote-write API.<br/>This is only used for `greptimedb` to export its own metrics internally. It's different from prometheus scrape. |
|
||||
| `export_metrics.enable` | Bool | `false` | whether enable export metrics. |
|
||||
| `export_metrics.write_interval` | String | `30s` | The interval of export metrics. |
|
||||
@@ -314,6 +322,10 @@
|
||||
| `logging.log_format` | String | `text` | The log format. Can be `text`/`json`. |
|
||||
| `logging.tracing_sample_ratio` | -- | -- | The percentage of tracing will be sampled and exported.<br/>Valid range `[0, 1]`, 1 means all traces are sampled, 0 means all traces are not sampled, the default value is 1.<br/>ratio > 1 are treated as 1. Fractions < 0 are treated as 0 |
|
||||
| `logging.tracing_sample_ratio.default_ratio` | Float | `1.0` | -- |
|
||||
| `logging.slow_query` | -- | -- | The slow query log options. |
|
||||
| `logging.slow_query.enable` | Bool | `false` | Whether to enable slow query log. |
|
||||
| `logging.slow_query.threshold` | String | Unset | The threshold of slow query. |
|
||||
| `logging.slow_query.sample_ratio` | Float | Unset | The sampling ratio of slow query log. The value should be in the range of (0, 1]. |
|
||||
| `export_metrics` | -- | -- | The datanode can export its metrics and send to Prometheus compatible service (e.g. send to `greptimedb` itself) from remote-write API.<br/>This is only used for `greptimedb` to export its own metrics internally. It's different from prometheus scrape. |
|
||||
| `export_metrics.enable` | Bool | `false` | whether enable export metrics. |
|
||||
| `export_metrics.write_interval` | String | `30s` | The interval of export metrics. |
|
||||
@@ -466,6 +478,10 @@
|
||||
| `logging.log_format` | String | `text` | The log format. Can be `text`/`json`. |
|
||||
| `logging.tracing_sample_ratio` | -- | -- | The percentage of tracing will be sampled and exported.<br/>Valid range `[0, 1]`, 1 means all traces are sampled, 0 means all traces are not sampled, the default value is 1.<br/>ratio > 1 are treated as 1. Fractions < 0 are treated as 0 |
|
||||
| `logging.tracing_sample_ratio.default_ratio` | Float | `1.0` | -- |
|
||||
| `logging.slow_query` | -- | -- | The slow query log options. |
|
||||
| `logging.slow_query.enable` | Bool | `false` | Whether to enable slow query log. |
|
||||
| `logging.slow_query.threshold` | String | Unset | The threshold of slow query. |
|
||||
| `logging.slow_query.sample_ratio` | Float | Unset | The sampling ratio of slow query log. The value should be in the range of (0, 1]. |
|
||||
| `export_metrics` | -- | -- | The datanode can export its metrics and send to Prometheus compatible service (e.g. send to `greptimedb` itself) from remote-write API.<br/>This is only used for `greptimedb` to export its own metrics internally. It's different from prometheus scrape. |
|
||||
| `export_metrics.enable` | Bool | `false` | whether enable export metrics. |
|
||||
| `export_metrics.write_interval` | String | `30s` | The interval of export metrics. |
|
||||
@@ -512,5 +528,9 @@
|
||||
| `logging.log_format` | String | `text` | The log format. Can be `text`/`json`. |
|
||||
| `logging.tracing_sample_ratio` | -- | -- | The percentage of tracing will be sampled and exported.<br/>Valid range `[0, 1]`, 1 means all traces are sampled, 0 means all traces are not sampled, the default value is 1.<br/>ratio > 1 are treated as 1. Fractions < 0 are treated as 0 |
|
||||
| `logging.tracing_sample_ratio.default_ratio` | Float | `1.0` | -- |
|
||||
| `logging.slow_query` | -- | -- | The slow query log options. |
|
||||
| `logging.slow_query.enable` | Bool | `false` | Whether to enable slow query log. |
|
||||
| `logging.slow_query.threshold` | String | Unset | The threshold of slow query. |
|
||||
| `logging.slow_query.sample_ratio` | Float | Unset | The sampling ratio of slow query log. The value should be in the range of (0, 1]. |
|
||||
| `tracing` | -- | -- | The tracing options. Only effect when compiled with `tokio-console` feature. |
|
||||
| `tracing.tokio_console_addr` | String | Unset | The tokio console address. |
|
||||
|
||||
@@ -586,6 +586,19 @@ log_format = "text"
|
||||
[logging.tracing_sample_ratio]
|
||||
default_ratio = 1.0
|
||||
|
||||
## The slow query log options.
|
||||
[logging.slow_query]
|
||||
## Whether to enable slow query log.
|
||||
enable = false
|
||||
|
||||
## The threshold of slow query.
|
||||
## @toml2docs:none-default
|
||||
threshold = "10s"
|
||||
|
||||
## The sampling ratio of slow query log. The value should be in the range of (0, 1].
|
||||
## @toml2docs:none-default
|
||||
sample_ratio = 1.0
|
||||
|
||||
## The datanode can export its metrics and send to Prometheus compatible service (e.g. send to `greptimedb` itself) from remote-write API.
|
||||
## This is only used for `greptimedb` to export its own metrics internally. It's different from prometheus scrape.
|
||||
[export_metrics]
|
||||
|
||||
@@ -84,6 +84,19 @@ log_format = "text"
|
||||
[logging.tracing_sample_ratio]
|
||||
default_ratio = 1.0
|
||||
|
||||
## The slow query log options.
|
||||
[logging.slow_query]
|
||||
## Whether to enable slow query log.
|
||||
enable = false
|
||||
|
||||
## The threshold of slow query.
|
||||
## @toml2docs:none-default
|
||||
threshold = "10s"
|
||||
|
||||
## The sampling ratio of slow query log. The value should be in the range of (0, 1].
|
||||
## @toml2docs:none-default
|
||||
sample_ratio = 1.0
|
||||
|
||||
## The tracing options. Only effect when compiled with `tokio-console` feature.
|
||||
[tracing]
|
||||
## The tokio console address.
|
||||
|
||||
@@ -191,6 +191,19 @@ log_format = "text"
|
||||
[logging.tracing_sample_ratio]
|
||||
default_ratio = 1.0
|
||||
|
||||
## The slow query log options.
|
||||
[logging.slow_query]
|
||||
## Whether to enable slow query log.
|
||||
enable = false
|
||||
|
||||
## The threshold of slow query.
|
||||
## @toml2docs:none-default
|
||||
threshold = "10s"
|
||||
|
||||
## The sampling ratio of slow query log. The value should be in the range of (0, 1].
|
||||
## @toml2docs:none-default
|
||||
sample_ratio = 1.0
|
||||
|
||||
## The datanode can export its metrics and send to Prometheus compatible service (e.g. send to `greptimedb` itself) from remote-write API.
|
||||
## This is only used for `greptimedb` to export its own metrics internally. It's different from prometheus scrape.
|
||||
[export_metrics]
|
||||
|
||||
@@ -178,6 +178,19 @@ log_format = "text"
|
||||
[logging.tracing_sample_ratio]
|
||||
default_ratio = 1.0
|
||||
|
||||
## The slow query log options.
|
||||
[logging.slow_query]
|
||||
## Whether to enable slow query log.
|
||||
enable = false
|
||||
|
||||
## The threshold of slow query.
|
||||
## @toml2docs:none-default
|
||||
threshold = "10s"
|
||||
|
||||
## The sampling ratio of slow query log. The value should be in the range of (0, 1].
|
||||
## @toml2docs:none-default
|
||||
sample_ratio = 1.0
|
||||
|
||||
## The datanode can export its metrics and send to Prometheus compatible service (e.g. send to `greptimedb` itself) from remote-write API.
|
||||
## This is only used for `greptimedb` to export its own metrics internally. It's different from prometheus scrape.
|
||||
[export_metrics]
|
||||
|
||||
@@ -630,6 +630,19 @@ log_format = "text"
|
||||
[logging.tracing_sample_ratio]
|
||||
default_ratio = 1.0
|
||||
|
||||
## The slow query log options.
|
||||
[logging.slow_query]
|
||||
## Whether to enable slow query log.
|
||||
enable = false
|
||||
|
||||
## The threshold of slow query.
|
||||
## @toml2docs:none-default
|
||||
threshold = "10s"
|
||||
|
||||
## The sampling ratio of slow query log. The value should be in the range of (0, 1].
|
||||
## @toml2docs:none-default
|
||||
sample_ratio = 1.0
|
||||
|
||||
## The datanode can export its metrics and send to Prometheus compatible service (e.g. send to `greptimedb` itself) from remote-write API.
|
||||
## This is only used for `greptimedb` to export its own metrics internally. It's different from prometheus scrape.
|
||||
[export_metrics]
|
||||
|
||||
@@ -36,6 +36,7 @@ use frontend::instance::builder::FrontendBuilder;
|
||||
use frontend::instance::{FrontendInstance, Instance as FeInstance};
|
||||
use frontend::server::Services;
|
||||
use meta_client::{MetaClientOptions, MetaClientType};
|
||||
use query::stats::StatementStatistics;
|
||||
use servers::tls::{TlsMode, TlsOption};
|
||||
use servers::Mode;
|
||||
use snafu::{OptionExt, ResultExt};
|
||||
@@ -352,6 +353,7 @@ impl StartCommand {
|
||||
catalog_manager,
|
||||
Arc::new(client),
|
||||
meta_client,
|
||||
StatementStatistics::new(opts.logging.slow_query.clone()),
|
||||
)
|
||||
.with_plugin(plugins.clone())
|
||||
.with_local_cache_invalidator(layered_cache_registry)
|
||||
|
||||
@@ -55,6 +55,7 @@ use frontend::service_config::{
|
||||
};
|
||||
use meta_srv::metasrv::{FLOW_ID_SEQ, TABLE_ID_SEQ};
|
||||
use mito2::config::MitoConfig;
|
||||
use query::stats::StatementStatistics;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use servers::export_metrics::ExportMetricsOption;
|
||||
use servers::grpc::GrpcOptions;
|
||||
@@ -557,6 +558,7 @@ impl StartCommand {
|
||||
catalog_manager.clone(),
|
||||
node_manager.clone(),
|
||||
ddl_task_executor.clone(),
|
||||
StatementStatistics::new(opts.logging.slow_query.clone()),
|
||||
)
|
||||
.with_plugin(plugins.clone())
|
||||
.try_build()
|
||||
|
||||
@@ -17,6 +17,7 @@ backtrace = "0.3"
|
||||
common-error.workspace = true
|
||||
console-subscriber = { version = "0.1", optional = true }
|
||||
greptime-proto.workspace = true
|
||||
humantime-serde.workspace = true
|
||||
lazy_static.workspace = true
|
||||
once_cell.workspace = true
|
||||
opentelemetry = { version = "0.21.0", default-features = false, features = [
|
||||
|
||||
@@ -15,6 +15,7 @@
|
||||
//! logging stuffs, inspired by databend
|
||||
use std::env;
|
||||
use std::sync::{Arc, Mutex, Once};
|
||||
use std::time::Duration;
|
||||
|
||||
use once_cell::sync::{Lazy, OnceCell};
|
||||
use opentelemetry::{global, KeyValue};
|
||||
@@ -26,7 +27,7 @@ use serde::{Deserialize, Serialize};
|
||||
use tracing_appender::non_blocking::WorkerGuard;
|
||||
use tracing_appender::rolling::{RollingFileAppender, Rotation};
|
||||
use tracing_log::LogTracer;
|
||||
use tracing_subscriber::filter::Targets;
|
||||
use tracing_subscriber::filter::{FilterFn, Targets};
|
||||
use tracing_subscriber::fmt::Layer;
|
||||
use tracing_subscriber::layer::SubscriberExt;
|
||||
use tracing_subscriber::prelude::*;
|
||||
@@ -64,6 +65,24 @@ pub struct LoggingOptions {
|
||||
|
||||
/// The tracing sample ratio.
|
||||
pub tracing_sample_ratio: Option<TracingSampleOptions>,
|
||||
|
||||
/// The logging options of slow query.
|
||||
pub slow_query: SlowQueryOptions,
|
||||
}
|
||||
|
||||
/// The options of slow query.
|
||||
#[derive(Clone, Debug, Serialize, Deserialize, Default)]
|
||||
#[serde(default)]
|
||||
pub struct SlowQueryOptions {
|
||||
/// Whether to enable slow query log.
|
||||
pub enable: bool,
|
||||
|
||||
/// The threshold of slow queries.
|
||||
#[serde(with = "humantime_serde")]
|
||||
pub threshold: Option<Duration>,
|
||||
|
||||
/// The sample ratio of slow queries.
|
||||
pub sample_ratio: Option<f64>,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, Copy, PartialEq, Eq, Serialize, Deserialize)]
|
||||
@@ -96,6 +115,7 @@ impl Default for LoggingOptions {
|
||||
otlp_endpoint: None,
|
||||
tracing_sample_ratio: None,
|
||||
append_stdout: true,
|
||||
slow_query: SlowQueryOptions::default(),
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -235,6 +255,42 @@ pub fn init_global_logging(
|
||||
None
|
||||
};
|
||||
|
||||
let slow_query_logging_layer = if !opts.dir.is_empty() && opts.slow_query.enable {
|
||||
let rolling_appender =
|
||||
RollingFileAppender::new(Rotation::HOURLY, &opts.dir, "greptimedb-slow-queries");
|
||||
let (writer, guard) = tracing_appender::non_blocking(rolling_appender);
|
||||
guards.push(guard);
|
||||
|
||||
// Only logs if the field contains "slow".
|
||||
let slow_query_filter = FilterFn::new(|metadata| {
|
||||
metadata
|
||||
.fields()
|
||||
.iter()
|
||||
.any(|field| field.name().contains("slow"))
|
||||
});
|
||||
|
||||
if opts.log_format == LogFormat::Json {
|
||||
Some(
|
||||
Layer::new()
|
||||
.json()
|
||||
.with_writer(writer)
|
||||
.with_ansi(false)
|
||||
.with_filter(slow_query_filter)
|
||||
.boxed(),
|
||||
)
|
||||
} else {
|
||||
Some(
|
||||
Layer::new()
|
||||
.with_writer(writer)
|
||||
.with_ansi(false)
|
||||
.with_filter(slow_query_filter)
|
||||
.boxed(),
|
||||
)
|
||||
}
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
// resolve log level settings from:
|
||||
// - options from command line or config files
|
||||
// - environment variable: RUST_LOG
|
||||
@@ -279,6 +335,7 @@ pub fn init_global_logging(
|
||||
.with(stdout_logging_layer)
|
||||
.with(file_logging_layer)
|
||||
.with(err_file_logging_layer)
|
||||
.with(slow_query_logging_layer)
|
||||
};
|
||||
|
||||
// consume the `tracing_opts` to avoid "unused" warnings.
|
||||
@@ -289,7 +346,8 @@ pub fn init_global_logging(
|
||||
.with(dyn_filter)
|
||||
.with(stdout_logging_layer)
|
||||
.with(file_logging_layer)
|
||||
.with(err_file_logging_layer);
|
||||
.with(err_file_logging_layer)
|
||||
.with(slow_query_logging_layer);
|
||||
|
||||
if opts.enable_otlp_tracing {
|
||||
global::set_text_map_propagator(TraceContextPropagator::new());
|
||||
|
||||
@@ -152,6 +152,17 @@ macro_rules! trace {
|
||||
};
|
||||
}
|
||||
|
||||
#[macro_export]
|
||||
macro_rules! slow {
|
||||
(target: $target:expr, $($arg:tt)+) => {
|
||||
$crate::log!(target: $target, slow = true, $crate::tracing::Level::INFO, $($arg)+)
|
||||
};
|
||||
|
||||
($($arg:tt)+) => {
|
||||
$crate::log!($crate::tracing::Level::INFO, slow = true, $($arg)+)
|
||||
};
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use common_error::mock::MockError;
|
||||
|
||||
@@ -37,6 +37,7 @@ use operator::delete::Deleter;
|
||||
use operator::insert::Inserter;
|
||||
use operator::statement::StatementExecutor;
|
||||
use partition::manager::PartitionRuleManager;
|
||||
use query::stats::StatementStatistics;
|
||||
use query::{QueryEngine, QueryEngineFactory};
|
||||
use servers::error::{AlreadyStartedSnafu, StartGrpcSnafu, TcpBindSnafu, TcpIncomingSnafu};
|
||||
use servers::server::Server;
|
||||
@@ -475,6 +476,7 @@ impl FrontendInvoker {
|
||||
layered_cache_registry.clone(),
|
||||
inserter.clone(),
|
||||
table_route_cache,
|
||||
StatementStatistics::default(),
|
||||
));
|
||||
|
||||
let invoker = FrontendInvoker::new(inserter, deleter, statement_executor);
|
||||
|
||||
@@ -33,6 +33,7 @@ use operator::statement::{StatementExecutor, StatementExecutorRef};
|
||||
use operator::table::TableMutationOperator;
|
||||
use partition::manager::PartitionRuleManager;
|
||||
use pipeline::pipeline_operator::PipelineOperator;
|
||||
use query::stats::StatementStatistics;
|
||||
use query::QueryEngineFactory;
|
||||
use servers::server::ServerHandlers;
|
||||
use snafu::OptionExt;
|
||||
@@ -55,6 +56,7 @@ pub struct FrontendBuilder {
|
||||
plugins: Option<Plugins>,
|
||||
procedure_executor: ProcedureExecutorRef,
|
||||
heartbeat_task: Option<HeartbeatTask>,
|
||||
stats: StatementStatistics,
|
||||
}
|
||||
|
||||
impl FrontendBuilder {
|
||||
@@ -65,6 +67,7 @@ impl FrontendBuilder {
|
||||
catalog_manager: CatalogManagerRef,
|
||||
node_manager: NodeManagerRef,
|
||||
procedure_executor: ProcedureExecutorRef,
|
||||
stats: StatementStatistics,
|
||||
) -> Self {
|
||||
Self {
|
||||
options,
|
||||
@@ -76,6 +79,7 @@ impl FrontendBuilder {
|
||||
plugins: None,
|
||||
procedure_executor,
|
||||
heartbeat_task: None,
|
||||
stats,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -181,6 +185,7 @@ impl FrontendBuilder {
|
||||
local_cache_invalidator,
|
||||
inserter.clone(),
|
||||
table_route_cache,
|
||||
self.stats,
|
||||
));
|
||||
|
||||
let pipeline_operator = Arc::new(PipelineOperator::new(
|
||||
|
||||
@@ -45,6 +45,7 @@ use common_time::Timestamp;
|
||||
use datafusion_expr::LogicalPlan;
|
||||
use partition::manager::{PartitionRuleManager, PartitionRuleManagerRef};
|
||||
use query::parser::QueryStatement;
|
||||
use query::stats::StatementStatistics;
|
||||
use query::QueryEngineRef;
|
||||
use session::context::{Channel, QueryContextRef};
|
||||
use session::table_name::table_idents_to_full_name;
|
||||
@@ -80,11 +81,13 @@ pub struct StatementExecutor {
|
||||
partition_manager: PartitionRuleManagerRef,
|
||||
cache_invalidator: CacheInvalidatorRef,
|
||||
inserter: InserterRef,
|
||||
stats: StatementStatistics,
|
||||
}
|
||||
|
||||
pub type StatementExecutorRef = Arc<StatementExecutor>;
|
||||
|
||||
impl StatementExecutor {
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
pub fn new(
|
||||
catalog_manager: CatalogManagerRef,
|
||||
query_engine: QueryEngineRef,
|
||||
@@ -93,6 +96,7 @@ impl StatementExecutor {
|
||||
cache_invalidator: CacheInvalidatorRef,
|
||||
inserter: InserterRef,
|
||||
table_route_cache: TableRouteCacheRef,
|
||||
stats: StatementStatistics,
|
||||
) -> Self {
|
||||
Self {
|
||||
catalog_manager,
|
||||
@@ -104,6 +108,7 @@ impl StatementExecutor {
|
||||
partition_manager: Arc::new(PartitionRuleManager::new(kv_backend, table_route_cache)),
|
||||
cache_invalidator,
|
||||
inserter,
|
||||
stats,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -113,6 +118,7 @@ impl StatementExecutor {
|
||||
stmt: QueryStatement,
|
||||
query_ctx: QueryContextRef,
|
||||
) -> Result<Output> {
|
||||
let _slow_query_timer = self.stats.start_slow_query_timer(stmt.clone());
|
||||
match stmt {
|
||||
QueryStatement::Sql(stmt) => self.execute_sql(stmt, query_ctx).await,
|
||||
QueryStatement::Promql(_) => self.plan_exec(stmt, query_ctx).await,
|
||||
|
||||
@@ -54,6 +54,7 @@ prometheus.workspace = true
|
||||
promql.workspace = true
|
||||
promql-parser.workspace = true
|
||||
prost.workspace = true
|
||||
rand.workspace = true
|
||||
regex.workspace = true
|
||||
session.workspace = true
|
||||
snafu.workspace = true
|
||||
|
||||
@@ -35,6 +35,7 @@ pub mod query_engine;
|
||||
mod range_select;
|
||||
pub mod region_query;
|
||||
pub mod sql;
|
||||
pub mod stats;
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests;
|
||||
|
||||
98
src/query/src/stats.rs
Normal file
98
src/query/src/stats.rs
Normal file
@@ -0,0 +1,98 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::time::Duration;
|
||||
|
||||
use common_telemetry::logging::SlowQueryOptions;
|
||||
use common_telemetry::slow;
|
||||
use rand::random;
|
||||
|
||||
use crate::parser::QueryStatement;
|
||||
|
||||
/// StatementStatistics is used to collect statistics for a statement.
|
||||
#[derive(Default, Clone, Debug)]
|
||||
pub struct StatementStatistics {
|
||||
/// slow_query is used to configure slow query log.
|
||||
pub slow_query: SlowQueryOptions,
|
||||
}
|
||||
|
||||
impl StatementStatistics {
|
||||
pub fn new(slow_query_options: SlowQueryOptions) -> Self {
|
||||
Self {
|
||||
slow_query: slow_query_options,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn start_slow_query_timer(&self, stmt: QueryStatement) -> Option<SlowQueryTimer> {
|
||||
if self.slow_query.enable {
|
||||
Some(SlowQueryTimer {
|
||||
start: std::time::Instant::now(),
|
||||
stmt,
|
||||
threshold: self.slow_query.threshold,
|
||||
sample_ratio: self.slow_query.sample_ratio,
|
||||
})
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// SlowQueryTimer is used to log slow query when it's dropped.
|
||||
pub struct SlowQueryTimer {
|
||||
start: std::time::Instant,
|
||||
stmt: QueryStatement,
|
||||
threshold: Option<Duration>,
|
||||
sample_ratio: Option<f64>,
|
||||
}
|
||||
|
||||
impl SlowQueryTimer {
|
||||
fn log_slow_query(&self, elapsed: Duration, threshold: Duration) {
|
||||
match &self.stmt {
|
||||
QueryStatement::Sql(stmt) => {
|
||||
slow!(
|
||||
cost = elapsed.as_millis() as u64,
|
||||
threshold = threshold.as_millis() as u64,
|
||||
sql = stmt.to_string()
|
||||
);
|
||||
}
|
||||
QueryStatement::Promql(stmt) => {
|
||||
slow!(
|
||||
cost = elapsed.as_millis() as u64,
|
||||
threshold = threshold.as_millis() as u64,
|
||||
// TODO(zyy17): It's better to implement Display for EvalStmt for pretty print.
|
||||
promql = format!("{:?}", stmt)
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Drop for SlowQueryTimer {
|
||||
fn drop(&mut self) {
|
||||
if let Some(threshold) = self.threshold {
|
||||
let elapsed = self.start.elapsed();
|
||||
if elapsed > threshold {
|
||||
if let Some(ratio) = self.sample_ratio {
|
||||
// Generate a random number in [0, 1) and compare it with sample_ratio.
|
||||
if ratio >= 1.0 || random::<f64>() <= ratio {
|
||||
self.log_slow_query(elapsed, threshold);
|
||||
}
|
||||
} else {
|
||||
// If sample_ratio is not set, log all slow queries.
|
||||
self.log_slow_query(elapsed, threshold);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -48,6 +48,7 @@ use meta_client::client::MetaClientBuilder;
|
||||
use meta_srv::cluster::MetaPeerClientRef;
|
||||
use meta_srv::metasrv::{Metasrv, MetasrvOptions, SelectorRef};
|
||||
use meta_srv::mocks::MockInfo;
|
||||
use query::stats::StatementStatistics;
|
||||
use servers::grpc::flight::FlightCraftWrapper;
|
||||
use servers::grpc::region_server::RegionServerRequestHandler;
|
||||
use servers::heartbeat_options::HeartbeatOptions;
|
||||
@@ -393,6 +394,7 @@ impl GreptimeDbClusterBuilder {
|
||||
catalog_manager,
|
||||
datanode_clients,
|
||||
meta_client,
|
||||
StatementStatistics::default(),
|
||||
)
|
||||
.with_local_cache_invalidator(cache_registry)
|
||||
.with_heartbeat_task(heartbeat_task)
|
||||
|
||||
@@ -40,6 +40,7 @@ use flow::FlownodeBuilder;
|
||||
use frontend::instance::builder::FrontendBuilder;
|
||||
use frontend::instance::{FrontendInstance, Instance, StandaloneDatanodeManager};
|
||||
use meta_srv::metasrv::{FLOW_ID_SEQ, TABLE_ID_SEQ};
|
||||
use query::stats::StatementStatistics;
|
||||
use servers::Mode;
|
||||
use snafu::ResultExt;
|
||||
|
||||
@@ -215,6 +216,7 @@ impl GreptimeDbStandaloneBuilder {
|
||||
catalog_manager.clone(),
|
||||
node_manager.clone(),
|
||||
ddl_task_executor.clone(),
|
||||
StatementStatistics::default(),
|
||||
)
|
||||
.with_plugin(plugins)
|
||||
.try_build()
|
||||
|
||||
@@ -836,6 +836,9 @@ retry_delay = "500ms"
|
||||
append_stdout = true
|
||||
enable_otlp_tracing = false
|
||||
|
||||
[logging.slow_query]
|
||||
enable = false
|
||||
|
||||
[[region_engine]]
|
||||
|
||||
[region_engine.mito]
|
||||
|
||||
Reference in New Issue
Block a user