feat: support explain analyze verbose (#5763)

* Add explain_verbose to QueryContext

* feat: fmt plan by display type

* feat: update proto to use ExplainOptions

* feat: display more info in verbose mode

* chore: fix clippy

* test: add sqlness test

* test: update sqlness result

* chore: update proto version

* chore: Simplify QueryContextBuilder::explain_options using get_or_insert_default
This commit is contained in:
Yingwen
2025-03-25 11:48:36 +08:00
committed by GitHub
parent 3b547d9d13
commit d88482b996
15 changed files with 261 additions and 28 deletions

View File

@@ -50,12 +50,13 @@ pub struct DistAnalyzeExec {
input: Arc<dyn ExecutionPlan>,
schema: SchemaRef,
properties: PlanProperties,
verbose: bool,
format: AnalyzeFormat,
}
impl DistAnalyzeExec {
/// Create a new DistAnalyzeExec
pub fn new(input: Arc<dyn ExecutionPlan>, format: AnalyzeFormat) -> Self {
pub fn new(input: Arc<dyn ExecutionPlan>, verbose: bool, format: AnalyzeFormat) -> Self {
let schema = SchemaRef::new(Schema::new(vec![
Field::new(STAGE, DataType::UInt32, true),
Field::new(NODE, DataType::UInt32, true),
@@ -66,6 +67,7 @@ impl DistAnalyzeExec {
input,
schema,
properties,
verbose,
format,
}
}
@@ -116,7 +118,11 @@ impl ExecutionPlan for DistAnalyzeExec {
self: Arc<Self>,
mut children: Vec<Arc<dyn ExecutionPlan>>,
) -> DfResult<Arc<dyn ExecutionPlan>> {
Ok(Arc::new(Self::new(children.pop().unwrap(), self.format)))
Ok(Arc::new(Self::new(
children.pop().unwrap(),
self.verbose,
self.format,
)))
}
fn execute(
@@ -138,6 +144,7 @@ impl ExecutionPlan for DistAnalyzeExec {
// Finish the input stream and create the output
let format = self.format;
let verbose = self.verbose;
let mut input_stream = coalesce_partition_plan.execute(0, context)?;
let output = async move {
let mut total_rows = 0;
@@ -145,7 +152,7 @@ impl ExecutionPlan for DistAnalyzeExec {
total_rows += batch.num_rows();
}
create_output_batch(total_rows, captured_input, captured_schema, format)
create_output_batch(total_rows, captured_input, captured_schema, format, verbose)
};
Ok(Box::pin(RecordBatchStreamAdapter::new(
@@ -205,11 +212,12 @@ fn create_output_batch(
input: Arc<dyn ExecutionPlan>,
schema: SchemaRef,
format: AnalyzeFormat,
verbose: bool,
) -> DfResult<DfRecordBatch> {
let mut builder = AnalyzeOutputBuilder::new(schema);
// Treat the current stage as stage 0. Fetch its metrics
let mut collector = MetricCollector::default();
let mut collector = MetricCollector::new(verbose);
// Safety: metric collector won't return error
accept(input.as_ref(), &mut collector).unwrap();
let stage_0_metrics = collector.record_batch_metrics;

View File

@@ -367,8 +367,15 @@ impl DatafusionQueryEngine {
} else {
AnalyzeFormat::TEXT
};
// Sets the verbose flag of the query context.
// The MergeScanExec plan uses the verbose flag to determine whether to print the plan in verbose mode.
ctx.query_ctx().set_explain_verbose(analyze_plan.verbose());
Arc::new(DistAnalyzeExec::new(analyze_plan.input().clone(), format))
Arc::new(DistAnalyzeExec::new(
analyze_plan.input().clone(),
analyze_plan.verbose(),
format,
))
// let mut new_plan = analyze_plan.input().clone();
// for optimizer in state.physical_optimizers() {
// new_plan = optimizer
@@ -511,6 +518,7 @@ impl QueryExecutor for DatafusionQueryEngine {
.map_err(BoxedError::new)
.context(QueryExecutionSnafu)?;
stream.set_metrics2(plan.clone());
stream.set_explain_verbose(ctx.query_ctx().explain_verbose());
let stream = OnDone::new(Box::pin(stream), move || {
exec_timer.observe_duration();
});
@@ -537,6 +545,7 @@ impl QueryExecutor for DatafusionQueryEngine {
.map_err(BoxedError::new)
.context(QueryExecutionSnafu)?;
stream.set_metrics2(plan.clone());
stream.set_explain_verbose(ctx.query_ctx().explain_verbose());
let stream = OnDone::new(Box::pin(stream), move || {
exec_timer.observe_duration();
});