feat: Enable distributed tracing in greptimedb (#2755)

* feat: implement distributed tracing

* fix: change usage of span

* fix: use otlp as exporter

* chore: update dependence

* chore: add span info

* chore: add alias

* chore: use instrument instead of trace
This commit is contained in:
WU Jingdi
2023-11-17 16:51:57 +08:00
committed by GitHub
parent ac4b6cd7f0
commit 500e299e40
58 changed files with 602 additions and 394 deletions

View File

@@ -34,6 +34,7 @@ use common_recordbatch::adapter::RecordBatchStreamAdapter;
use common_recordbatch::{
EmptyRecordBatchStream, RecordBatch, RecordBatches, SendableRecordBatchStream,
};
use common_telemetry::tracing;
use datafusion::common::Column;
use datafusion::physical_plan::analyze::AnalyzeExec;
use datafusion::physical_plan::coalesce_partitions::CoalescePartitionsExec;
@@ -77,6 +78,7 @@ impl DatafusionQueryEngine {
Self { state, plugins }
}
#[tracing::instrument(skip_all)]
async fn exec_query_plan(
&self,
plan: LogicalPlan,
@@ -97,6 +99,7 @@ impl DatafusionQueryEngine {
Ok(Output::Stream(self.execute_stream(&ctx, &physical_plan)?))
}
#[tracing::instrument(skip_all)]
async fn exec_dml_statement(
&self,
dml: DmlStatement,
@@ -147,6 +150,7 @@ impl DatafusionQueryEngine {
Ok(Output::AffectedRows(affected_rows))
}
#[tracing::instrument(skip_all)]
async fn delete<'a>(
&self,
table_name: &ResolvedTableReference<'a>,
@@ -189,6 +193,7 @@ impl DatafusionQueryEngine {
.await
}
#[tracing::instrument(skip_all)]
async fn insert<'a>(
&self,
table_name: &ResolvedTableReference<'a>,
@@ -285,6 +290,7 @@ impl QueryEngine for DatafusionQueryEngine {
}
impl LogicalOptimizer for DatafusionQueryEngine {
#[tracing::instrument(skip_all)]
fn optimize(&self, plan: &LogicalPlan) -> Result<LogicalPlan> {
let _timer = metrics::METRIC_OPTIMIZE_LOGICAL_ELAPSED.start_timer();
match plan {
@@ -305,6 +311,7 @@ impl LogicalOptimizer for DatafusionQueryEngine {
#[async_trait::async_trait]
impl PhysicalPlanner for DatafusionQueryEngine {
#[tracing::instrument(skip_all)]
async fn create_physical_plan(
&self,
ctx: &mut QueryEngineContext,
@@ -338,6 +345,7 @@ impl PhysicalPlanner for DatafusionQueryEngine {
}
impl PhysicalOptimizer for DatafusionQueryEngine {
#[tracing::instrument(skip_all)]
fn optimize_physical_plan(
&self,
ctx: &mut QueryEngineContext,
@@ -385,6 +393,7 @@ impl PhysicalOptimizer for DatafusionQueryEngine {
}
impl QueryExecutor for DatafusionQueryEngine {
#[tracing::instrument(skip_all)]
fn execute_stream(
&self,
ctx: &QueryEngineContext,

View File

@@ -27,7 +27,8 @@ use common_recordbatch::error::ExternalSnafu;
use common_recordbatch::{
DfSendableRecordBatchStream, RecordBatch, RecordBatchStreamAdaptor, SendableRecordBatchStream,
};
use common_telemetry::trace_id;
use common_telemetry::tracing;
use common_telemetry::tracing_context::TracingContext;
use datafusion::physical_plan::metrics::{
Count, ExecutionPlanMetricsSet, MetricBuilder, MetricsSet, Time,
};
@@ -155,6 +156,7 @@ impl MergeScanExec {
})
}
#[tracing::instrument(skip_all)]
pub fn to_stream(&self, context: Arc<TaskContext>) -> Result<SendableRecordBatchStream> {
let substrait_plan = self.substrait_plan.to_vec();
let regions = self.regions.clone();
@@ -163,7 +165,8 @@ impl MergeScanExec {
let schema = Self::arrow_schema_to_schema(self.schema())?;
let dbname = context.task_id().unwrap_or_default();
let trace_id = trace_id().unwrap_or_default();
let tracing_context = TracingContext::from_current_span().to_w3c();
let stream = Box::pin(stream!({
METRIC_MERGE_SCAN_REGIONS.observe(regions.len() as f64);
@@ -174,8 +177,7 @@ impl MergeScanExec {
for region_id in regions {
let request = QueryRequest {
header: Some(RegionRequestHeader {
trace_id,
span_id: 0,
tracing_context: tracing_context.clone(),
dbname: dbname.clone(),
}),
region_id: region_id.into(),

View File

@@ -17,6 +17,7 @@ use std::sync::Arc;
use async_trait::async_trait;
use catalog::table_source::DfTableSourceProvider;
use common_error::ext::BoxedError;
use common_telemetry::tracing;
use datafusion::execution::context::SessionState;
use datafusion_sql::planner::{ParserOptions, SqlToRel};
use promql::planner::PromPlanner;
@@ -51,6 +52,7 @@ impl DfLogicalPlanner {
}
}
#[tracing::instrument(skip_all)]
async fn plan_sql(&self, stmt: Statement, query_ctx: QueryContextRef) -> Result<LogicalPlan> {
let df_stmt = (&stmt).try_into().context(SqlSnafu)?;
@@ -85,6 +87,7 @@ impl DfLogicalPlanner {
Ok(LogicalPlan::DfPlan(plan))
}
#[tracing::instrument(skip_all)]
async fn plan_pql(&self, stmt: EvalStmt, query_ctx: QueryContextRef) -> Result<LogicalPlan> {
let table_provider = DfTableSourceProvider::new(
self.engine_state.catalog_manager().clone(),
@@ -101,6 +104,7 @@ impl DfLogicalPlanner {
#[async_trait]
impl LogicalPlanner for DfLogicalPlanner {
#[tracing::instrument(skip_all)]
async fn plan(&self, stmt: QueryStatement, query_ctx: QueryContextRef) -> Result<LogicalPlan> {
match stmt {
QueryStatement::Sql(stmt) => self.plan_sql(stmt, query_ctx).await,