chore: update datafusion family (#6675)

* chore: update datafusion family

Signed-off-by: luofucong <luofc@foxmail.com>

* fix ci

Signed-off-by: luofucong <luofc@foxmail.com>

* use official otel-arrow-rust

Signed-off-by: luofucong <luofc@foxmail.com>

* rebase

Signed-off-by: luofucong <luofc@foxmail.com>

* use the official orc-rust

Signed-off-by: luofucong <luofc@foxmail.com>

* resolve PR comments

Signed-off-by: luofucong <luofc@foxmail.com>

* remove the empty lines

Signed-off-by: luofucong <luofc@foxmail.com>

* try following PR comments

Signed-off-by: luofucong <luofc@foxmail.com>

---------

Signed-off-by: luofucong <luofc@foxmail.com>
This commit is contained in:
LFC
2025-08-15 20:41:49 +08:00
committed by GitHub
parent dfc29eb3b3
commit f9d2a89a0c
198 changed files with 4932 additions and 4196 deletions

View File

@@ -24,8 +24,7 @@ use common_query::logical_plan::breakup_insert_plan;
use common_telemetry::tracing::warn;
use common_telemetry::{debug, info};
use common_time::Timestamp;
use datafusion::optimizer::analyzer::count_wildcard_rule::CountWildcardRule;
use datafusion::optimizer::AnalyzerRule;
use datafusion::datasource::DefaultTableSource;
use datafusion::sql::unparser::expr_to_sql;
use datafusion_common::tree_node::{Transformed, TreeNode};
use datafusion_common::DFSchemaRef;
@@ -40,6 +39,7 @@ use snafu::{ensure, OptionExt, ResultExt};
use sql::parser::{ParseOptions, ParserContext};
use sql::statements::statement::Statement;
use substrait::{DFLogicalSubstraitConvertor, SubstraitPlan};
use table::table::adapter::DfTableProviderAdapter;
use tokio::sync::oneshot;
use tokio::sync::oneshot::error::TryRecvError;
use tokio::time::Instant;
@@ -252,7 +252,11 @@ impl BatchingTask {
.await?;
let new_query = self
.gen_query_with_time_window(engine.clone(), &table.meta.schema, max_window_cnt)
.gen_query_with_time_window(
engine.clone(),
&table.table_info().meta.schema,
max_window_cnt,
)
.await?;
let insert_into = if let Some((new_query, _column_cnt)) = new_query {
@@ -274,6 +278,10 @@ impl BatchingTask {
}
);
}
let table_provider = Arc::new(DfTableProviderAdapter::new(table));
let table_source = Arc::new(DefaultTableSource::new(table_provider));
// update_at& time index placeholder (if exists) should have default value
LogicalPlan::Dml(DmlStatement::new(
datafusion_common::TableReference::Full {
@@ -281,7 +289,7 @@ impl BatchingTask {
schema: self.config.sink_table_name[1].clone().into(),
table: self.config.sink_table_name[2].clone().into(),
},
df_schema,
table_source,
WriteOp::Insert(datafusion_expr::dml::InsertOp::Append),
Arc::new(new_query),
))
@@ -324,7 +332,7 @@ impl BatchingTask {
let schema = &self.config.sink_table_name[1];
// fix all table ref by make it fully qualified, i.e. "table_name" => "catalog_name.schema_name.table_name"
let fixed_plan = plan
let plan = plan
.clone()
.transform_down_with_subqueries(|p| {
if let LogicalPlan::TableScan(mut table_scan) = p {
@@ -340,16 +348,6 @@ impl BatchingTask {
})?
.data;
let expanded_plan = CountWildcardRule::new()
.analyze(fixed_plan.clone(), &Default::default())
.with_context(|_| DatafusionSnafu {
context: format!(
"Failed to expand wildcard in logical plan, plan={:?}",
fixed_plan
),
})?;
let plan = expanded_plan;
let mut peer_desc = None;
let res = {

View File

@@ -36,7 +36,7 @@ use snafu::{ensure, OptionExt, ResultExt};
use sql::parser::{ParseOptions, ParserContext};
use sql::statements::statement::Statement;
use sql::statements::tql::Tql;
use table::metadata::TableInfo;
use table::TableRef;
use crate::adapter::AUTO_CREATED_PLACEHOLDER_TS_COL;
use crate::df_optimizer::apply_df_optimizer;
@@ -46,7 +46,7 @@ use crate::{Error, TableName};
pub async fn get_table_info_df_schema(
catalog_mr: CatalogManagerRef,
table_name: TableName,
) -> Result<(Arc<TableInfo>, Arc<DFSchema>), Error> {
) -> Result<(TableRef, Arc<DFSchema>), Error> {
let full_table_name = table_name.clone().join(".");
let table = catalog_mr
.table(&table_name[0], &table_name[1], &table_name[2], None)
@@ -56,7 +56,7 @@ pub async fn get_table_info_df_schema(
.context(TableNotFoundSnafu {
name: &full_table_name,
})?;
let table_info = table.table_info().clone();
let table_info = table.table_info();
let schema = table_info.meta.schema.clone();
@@ -72,7 +72,7 @@ pub async fn get_table_info_df_schema(
),
})?,
);
Ok((table_info, df_schema))
Ok((table, df_schema))
}
/// Convert sql to datafusion logical plan
@@ -426,7 +426,7 @@ impl TreeNodeRewriter for AddFilterRewriter {
return Ok(Transformed::no(node));
}
match node {
LogicalPlan::Filter(mut filter) if !filter.having => {
LogicalPlan::Filter(mut filter) => {
filter.predicate = filter.predicate.and(self.extra_filter.clone());
self.is_rewritten = true;
Ok(Transformed::yes(LogicalPlan::Filter(filter)))

View File

@@ -29,7 +29,6 @@ use datafusion::optimizer::analyzer::type_coercion::TypeCoercion;
use datafusion::optimizer::common_subexpr_eliminate::CommonSubexprEliminate;
use datafusion::optimizer::optimize_projections::OptimizeProjections;
use datafusion::optimizer::simplify_expressions::SimplifyExpressions;
use datafusion::optimizer::unwrap_cast_in_comparison::UnwrapCastInComparison;
use datafusion::optimizer::utils::NamePreserver;
use datafusion::optimizer::{Analyzer, AnalyzerRule, Optimizer, OptimizerContext};
use datafusion_common::tree_node::{
@@ -38,8 +37,8 @@ use datafusion_common::tree_node::{
use datafusion_common::{Column, DFSchema, ScalarValue};
use datafusion_expr::utils::merge_schema;
use datafusion_expr::{
BinaryExpr, ColumnarValue, Expr, Operator, Projection, ScalarFunctionArgs, ScalarUDFImpl,
Signature, TypeSignature, Volatility,
BinaryExpr, ColumnarValue, Expr, Literal, Operator, Projection, ScalarFunctionArgs,
ScalarUDFImpl, Signature, TypeSignature, Volatility,
};
use query::optimizer::count_wildcard::CountWildcardToTimeIndexRule;
use query::parser::QueryLanguageParser;
@@ -80,7 +79,6 @@ pub async fn apply_df_optimizer(
Arc::new(OptimizeProjections::new()),
Arc::new(CommonSubexprEliminate::new()),
Arc::new(SimplifyExpressions::new()),
Arc::new(UnwrapCastInComparison::new()),
]);
let plan = optimizer
.optimize(plan, &ctx, |_, _| {})
@@ -305,11 +303,11 @@ impl TreeNodeRewriter for ExpandAvgRewriter<'_> {
BinaryExpr::new(Box::new(sum_cast), Operator::Divide, Box::new(count_expr));
let div_expr = Box::new(Expr::BinaryExpr(div));
let zero = Box::new(Expr::Literal(ScalarValue::Int64(Some(0))));
let zero = Box::new(0.lit());
let not_zero =
BinaryExpr::new(Box::new(count_expr_ref), Operator::NotEq, zero.clone());
let not_zero = Box::new(Expr::BinaryExpr(not_zero));
let null = Box::new(Expr::Literal(ScalarValue::Null));
let null = Box::new(Expr::Literal(ScalarValue::Null, None));
let case_when =
datafusion_expr::Case::new(None, vec![(not_zero, div_expr)], Some(null));