mirror of
https://github.com/GreptimeTeam/greptimedb.git
synced 2026-05-24 00:40:40 +00:00
chore: upgrade DataFusion family (#7558)
* chore: upgrade DataFusion family Signed-off-by: luofucong <luofc@foxmail.com> * use main proto Signed-off-by: luofucong <luofc@foxmail.com> * fix ci Signed-off-by: luofucong <luofc@foxmail.com> --------- Signed-off-by: luofucong <luofc@foxmail.com>
This commit is contained in:
@@ -255,9 +255,9 @@ fn metrics_to_string(metrics: RecordBatchMetrics, format: AnalyzeFormat) -> DfRe
|
||||
match format {
|
||||
AnalyzeFormat::JSON => Ok(JsonMetrics::from_record_batch_metrics(metrics).to_string()),
|
||||
AnalyzeFormat::TEXT => Ok(metrics.to_string()),
|
||||
AnalyzeFormat::GRAPHVIZ => Err(DataFusionError::NotImplemented(
|
||||
"GRAPHVIZ format is not supported for metrics output".to_string(),
|
||||
)),
|
||||
format => Err(DataFusionError::NotImplemented(format!(
|
||||
"AnalyzeFormat {format}",
|
||||
))),
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -316,18 +316,15 @@ impl DatafusionQueryEngine {
|
||||
return state
|
||||
.create_physical_plan(logical_plan)
|
||||
.await
|
||||
.context(error::DatafusionSnafu)
|
||||
.map_err(BoxedError::new)
|
||||
.context(QueryExecutionSnafu);
|
||||
.map_err(Into::into);
|
||||
}
|
||||
|
||||
// analyze first
|
||||
let analyzed_plan = state
|
||||
.analyzer()
|
||||
.execute_and_check(logical_plan.clone(), state.config_options(), |_, _| {})
|
||||
.context(error::DatafusionSnafu)
|
||||
.map_err(BoxedError::new)
|
||||
.context(QueryExecutionSnafu)?;
|
||||
let analyzed_plan = state.analyzer().execute_and_check(
|
||||
logical_plan.clone(),
|
||||
state.config_options(),
|
||||
|_, _| {},
|
||||
)?;
|
||||
|
||||
logger.after_analyze = Some(analyzed_plan.clone());
|
||||
|
||||
@@ -341,10 +338,7 @@ impl DatafusionQueryEngine {
|
||||
} else {
|
||||
state
|
||||
.optimizer()
|
||||
.optimize(analyzed_plan, state, |_, _| {})
|
||||
.context(error::DatafusionSnafu)
|
||||
.map_err(BoxedError::new)
|
||||
.context(QueryExecutionSnafu)?
|
||||
.optimize(analyzed_plan, state, |_, _| {})?
|
||||
};
|
||||
|
||||
common_telemetry::debug!("Create physical plan, optimized plan: {optimized_plan}");
|
||||
@@ -371,19 +365,10 @@ impl DatafusionQueryEngine {
|
||||
// Optimized by extension rules
|
||||
let optimized_plan = self
|
||||
.state
|
||||
.optimize_by_extension_rules(plan.clone(), context)
|
||||
.context(error::DatafusionSnafu)
|
||||
.map_err(BoxedError::new)
|
||||
.context(QueryExecutionSnafu)?;
|
||||
.optimize_by_extension_rules(plan.clone(), context)?;
|
||||
|
||||
// Optimized by datafusion optimizer
|
||||
let optimized_plan = self
|
||||
.state
|
||||
.session_state()
|
||||
.optimize(&optimized_plan)
|
||||
.context(error::DatafusionSnafu)
|
||||
.map_err(BoxedError::new)
|
||||
.context(QueryExecutionSnafu)?;
|
||||
let optimized_plan = self.state.session_state().optimize(&optimized_plan)?;
|
||||
|
||||
Ok(optimized_plan)
|
||||
}
|
||||
@@ -516,11 +501,7 @@ impl QueryEngine for DatafusionQueryEngine {
|
||||
}
|
||||
|
||||
fn read_table(&self, table: TableRef) -> Result<DataFrame> {
|
||||
self.state
|
||||
.read_table(table)
|
||||
.context(error::DatafusionSnafu)
|
||||
.map_err(BoxedError::new)
|
||||
.context(QueryExecutionSnafu)
|
||||
self.state.read_table(table).map_err(Into::into)
|
||||
}
|
||||
|
||||
fn engine_context(&self, query_ctx: QueryContextRef) -> QueryEngineContext {
|
||||
@@ -543,7 +524,8 @@ impl QueryEngine for DatafusionQueryEngine {
|
||||
}
|
||||
|
||||
// configure execution options
|
||||
state.config_mut().options_mut().execution.time_zone = query_ctx.timezone().to_string();
|
||||
state.config_mut().options_mut().execution.time_zone =
|
||||
Some(query_ctx.timezone().to_string());
|
||||
|
||||
// usually it's impossible to have both `set variable` set by sql client and
|
||||
// hint in header by grpc client, so only need to deal with them separately
|
||||
@@ -619,11 +601,7 @@ impl QueryExecutor for DatafusionQueryEngine {
|
||||
Ok(Box::pin(EmptyRecordBatchStream::new(schema)))
|
||||
}
|
||||
1 => {
|
||||
let df_stream = plan
|
||||
.execute(0, task_ctx)
|
||||
.context(error::DatafusionSnafu)
|
||||
.map_err(BoxedError::new)
|
||||
.context(QueryExecutionSnafu)?;
|
||||
let df_stream = plan.execute(0, task_ctx)?;
|
||||
let mut stream = RecordBatchStreamAdapter::try_new_with_span(df_stream, span)
|
||||
.context(error::ConvertDfRecordBatchStreamSnafu)
|
||||
.map_err(BoxedError::new)
|
||||
@@ -652,11 +630,7 @@ impl QueryExecutor for DatafusionQueryEngine {
|
||||
.output_partitioning()
|
||||
.partition_count()
|
||||
);
|
||||
let df_stream = merged_plan
|
||||
.execute(0, task_ctx)
|
||||
.context(error::DatafusionSnafu)
|
||||
.map_err(BoxedError::new)
|
||||
.context(QueryExecutionSnafu)?;
|
||||
let df_stream = merged_plan.execute(0, task_ctx)?;
|
||||
let mut stream = RecordBatchStreamAdapter::try_new_with_span(df_stream, span)
|
||||
.context(error::ConvertDfRecordBatchStreamSnafu)
|
||||
.map_err(BoxedError::new)
|
||||
|
||||
@@ -25,7 +25,7 @@ use snafu::{Location, Snafu};
|
||||
#[snafu(visibility(pub))]
|
||||
#[stack_trace_debug]
|
||||
pub enum InnerError {
|
||||
#[snafu(display("DataFusion error"))]
|
||||
#[snafu(transparent)]
|
||||
Datafusion {
|
||||
#[snafu(source)]
|
||||
error: DataFusionError,
|
||||
|
||||
@@ -1170,7 +1170,7 @@ fn test_simplify_select_now_expression() {
|
||||
let expected = [
|
||||
"Projection: now()",
|
||||
" MergeScan [is_placeholder=false, remote_input=[",
|
||||
r#"Projection: TimestampNanosecond(<TIME>, Some("+00:00")) AS now()"#,
|
||||
r#"Projection: TimestampNanosecond(<TIME>, None) AS now()"#,
|
||||
" TableScan: t",
|
||||
"]]",
|
||||
]
|
||||
|
||||
@@ -143,7 +143,7 @@ mod tests {
|
||||
let plan = create_test_plan_with_project(proj);
|
||||
let result = StringNormalizationRule.analyze(plan, config).unwrap();
|
||||
let expected = format!(
|
||||
"Projection: CAST(Utf8(\"2017-07-23 13:10:11\") AS Timestamp({:#?}, None))\n TableScan: t",
|
||||
"Projection: CAST(Utf8(\"2017-07-23 13:10:11\") AS Timestamp({}))\n TableScan: t",
|
||||
time_unit
|
||||
);
|
||||
assert_eq!(expected, result.to_string());
|
||||
@@ -162,7 +162,7 @@ mod tests {
|
||||
.analyze(int_to_timestamp_plan, config)
|
||||
.unwrap();
|
||||
let expected = String::from(
|
||||
"Projection: CAST(Int64(158412331400600000) AS Timestamp(Nanosecond, None))\n TableScan: t",
|
||||
"Projection: CAST(Int64(158412331400600000) AS Timestamp(ns))\n TableScan: t",
|
||||
);
|
||||
assert_eq!(expected, result.to_string());
|
||||
|
||||
|
||||
@@ -4687,11 +4687,11 @@ mod test {
|
||||
assert_eq!(
|
||||
plan.display_indent_schema().to_string(),
|
||||
"PromInstantManipulate: range=[0..100000000], lookback=[1000], interval=[5000], time index=[timestamp] [field:Float64;N, tag:Utf8, timestamp:Timestamp(Millisecond, None)]\
|
||||
\n PromSeriesDivide: tags=[\"tag\"] [field:Float64;N, tag:Utf8, timestamp:Timestamp(Millisecond, None)]\
|
||||
\n Sort: metrics.tag ASC NULLS FIRST, metrics.timestamp ASC NULLS FIRST [field:Float64;N, tag:Utf8, timestamp:Timestamp(Millisecond, None)]\
|
||||
\n Filter: metrics.tag = Utf8(\"1\") AND metrics.timestamp >= TimestampMillisecond(-1000, None) AND metrics.timestamp <= TimestampMillisecond(100001000, None) [field:Float64;N, tag:Utf8, timestamp:Timestamp(Millisecond, None)]\
|
||||
\n Projection: metrics.field, metrics.tag, CAST(metrics.timestamp AS Timestamp(Millisecond, None)) AS timestamp [field:Float64;N, tag:Utf8, timestamp:Timestamp(Millisecond, None)]\
|
||||
\n TableScan: metrics [tag:Utf8, timestamp:Timestamp(Nanosecond, None), field:Float64;N]"
|
||||
\n PromSeriesDivide: tags=[\"tag\"] [field:Float64;N, tag:Utf8, timestamp:Timestamp(Millisecond, None)]\
|
||||
\n Sort: metrics.tag ASC NULLS FIRST, metrics.timestamp ASC NULLS FIRST [field:Float64;N, tag:Utf8, timestamp:Timestamp(Millisecond, None)]\
|
||||
\n Filter: metrics.tag = Utf8(\"1\") AND metrics.timestamp >= TimestampMillisecond(-1000, None) AND metrics.timestamp <= TimestampMillisecond(100001000, None) [field:Float64;N, tag:Utf8, timestamp:Timestamp(Millisecond, None)]\
|
||||
\n Projection: metrics.field, metrics.tag, CAST(metrics.timestamp AS Timestamp(ms)) AS timestamp [field:Float64;N, tag:Utf8, timestamp:Timestamp(Millisecond, None)]\
|
||||
\n TableScan: metrics [tag:Utf8, timestamp:Timestamp(Nanosecond, None), field:Float64;N]"
|
||||
);
|
||||
let plan = PromPlanner::stmt_to_plan(
|
||||
DfTableSourceProvider::new(
|
||||
@@ -4717,14 +4717,14 @@ mod test {
|
||||
assert_eq!(
|
||||
plan.display_indent_schema().to_string(),
|
||||
"Filter: prom_avg_over_time(timestamp_range,field) IS NOT NULL [timestamp:Timestamp(Millisecond, None), prom_avg_over_time(timestamp_range,field):Float64;N, tag:Utf8]\
|
||||
\n Projection: metrics.timestamp, prom_avg_over_time(timestamp_range, field) AS prom_avg_over_time(timestamp_range,field), metrics.tag [timestamp:Timestamp(Millisecond, None), prom_avg_over_time(timestamp_range,field):Float64;N, tag:Utf8]\
|
||||
\n PromRangeManipulate: req range=[0..100000000], interval=[5000], eval range=[5000], time index=[timestamp], values=[\"field\"] [field:Dictionary(Int64, Float64);N, tag:Utf8, timestamp:Timestamp(Millisecond, None), timestamp_range:Dictionary(Int64, Timestamp(Millisecond, None))]\
|
||||
\n PromSeriesNormalize: offset=[0], time index=[timestamp], filter NaN: [true] [field:Float64;N, tag:Utf8, timestamp:Timestamp(Millisecond, None)]\
|
||||
\n PromSeriesDivide: tags=[\"tag\"] [field:Float64;N, tag:Utf8, timestamp:Timestamp(Millisecond, None)]\
|
||||
\n Sort: metrics.tag ASC NULLS FIRST, metrics.timestamp ASC NULLS FIRST [field:Float64;N, tag:Utf8, timestamp:Timestamp(Millisecond, None)]\
|
||||
\n Filter: metrics.tag = Utf8(\"1\") AND metrics.timestamp >= TimestampMillisecond(-6000, None) AND metrics.timestamp <= TimestampMillisecond(100001000, None) [field:Float64;N, tag:Utf8, timestamp:Timestamp(Millisecond, None)]\
|
||||
\n Projection: metrics.field, metrics.tag, CAST(metrics.timestamp AS Timestamp(Millisecond, None)) AS timestamp [field:Float64;N, tag:Utf8, timestamp:Timestamp(Millisecond, None)]\
|
||||
\n TableScan: metrics [tag:Utf8, timestamp:Timestamp(Nanosecond, None), field:Float64;N]"
|
||||
\n Projection: metrics.timestamp, prom_avg_over_time(timestamp_range, field) AS prom_avg_over_time(timestamp_range,field), metrics.tag [timestamp:Timestamp(Millisecond, None), prom_avg_over_time(timestamp_range,field):Float64;N, tag:Utf8]\
|
||||
\n PromRangeManipulate: req range=[0..100000000], interval=[5000], eval range=[5000], time index=[timestamp], values=[\"field\"] [field:Dictionary(Int64, Float64);N, tag:Utf8, timestamp:Timestamp(Millisecond, None), timestamp_range:Dictionary(Int64, Timestamp(Millisecond, None))]\
|
||||
\n PromSeriesNormalize: offset=[0], time index=[timestamp], filter NaN: [true] [field:Float64;N, tag:Utf8, timestamp:Timestamp(Millisecond, None)]\
|
||||
\n PromSeriesDivide: tags=[\"tag\"] [field:Float64;N, tag:Utf8, timestamp:Timestamp(Millisecond, None)]\
|
||||
\n Sort: metrics.tag ASC NULLS FIRST, metrics.timestamp ASC NULLS FIRST [field:Float64;N, tag:Utf8, timestamp:Timestamp(Millisecond, None)]\
|
||||
\n Filter: metrics.tag = Utf8(\"1\") AND metrics.timestamp >= TimestampMillisecond(-6000, None) AND metrics.timestamp <= TimestampMillisecond(100001000, None) [field:Float64;N, tag:Utf8, timestamp:Timestamp(Millisecond, None)]\
|
||||
\n Projection: metrics.field, metrics.tag, CAST(metrics.timestamp AS Timestamp(ms)) AS timestamp [field:Float64;N, tag:Utf8, timestamp:Timestamp(Millisecond, None)]\
|
||||
\n TableScan: metrics [tag:Utf8, timestamp:Timestamp(Nanosecond, None), field:Float64;N]"
|
||||
);
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user