mirror of
https://github.com/GreptimeTeam/greptimedb.git
synced 2026-05-31 12:20:38 +00:00
chore: update datafusion family (#6675)
* chore: update datafusion family Signed-off-by: luofucong <luofc@foxmail.com> * fix ci Signed-off-by: luofucong <luofc@foxmail.com> * use official otel-arrow-rust Signed-off-by: luofucong <luofc@foxmail.com> * rebase Signed-off-by: luofucong <luofc@foxmail.com> * use the official orc-rust Signed-off-by: luofucong <luofc@foxmail.com> * resolve PR comments Signed-off-by: luofucong <luofc@foxmail.com> * remove the empty lines Signed-off-by: luofucong <luofc@foxmail.com> * try following PR comments Signed-off-by: luofucong <luofc@foxmail.com> --------- Signed-off-by: luofucong <luofc@foxmail.com>
This commit is contained in:
@@ -24,8 +24,7 @@ use common_query::logical_plan::breakup_insert_plan;
|
||||
use common_telemetry::tracing::warn;
|
||||
use common_telemetry::{debug, info};
|
||||
use common_time::Timestamp;
|
||||
use datafusion::optimizer::analyzer::count_wildcard_rule::CountWildcardRule;
|
||||
use datafusion::optimizer::AnalyzerRule;
|
||||
use datafusion::datasource::DefaultTableSource;
|
||||
use datafusion::sql::unparser::expr_to_sql;
|
||||
use datafusion_common::tree_node::{Transformed, TreeNode};
|
||||
use datafusion_common::DFSchemaRef;
|
||||
@@ -40,6 +39,7 @@ use snafu::{ensure, OptionExt, ResultExt};
|
||||
use sql::parser::{ParseOptions, ParserContext};
|
||||
use sql::statements::statement::Statement;
|
||||
use substrait::{DFLogicalSubstraitConvertor, SubstraitPlan};
|
||||
use table::table::adapter::DfTableProviderAdapter;
|
||||
use tokio::sync::oneshot;
|
||||
use tokio::sync::oneshot::error::TryRecvError;
|
||||
use tokio::time::Instant;
|
||||
@@ -252,7 +252,11 @@ impl BatchingTask {
|
||||
.await?;
|
||||
|
||||
let new_query = self
|
||||
.gen_query_with_time_window(engine.clone(), &table.meta.schema, max_window_cnt)
|
||||
.gen_query_with_time_window(
|
||||
engine.clone(),
|
||||
&table.table_info().meta.schema,
|
||||
max_window_cnt,
|
||||
)
|
||||
.await?;
|
||||
|
||||
let insert_into = if let Some((new_query, _column_cnt)) = new_query {
|
||||
@@ -274,6 +278,10 @@ impl BatchingTask {
|
||||
}
|
||||
);
|
||||
}
|
||||
|
||||
let table_provider = Arc::new(DfTableProviderAdapter::new(table));
|
||||
let table_source = Arc::new(DefaultTableSource::new(table_provider));
|
||||
|
||||
// update_at& time index placeholder (if exists) should have default value
|
||||
LogicalPlan::Dml(DmlStatement::new(
|
||||
datafusion_common::TableReference::Full {
|
||||
@@ -281,7 +289,7 @@ impl BatchingTask {
|
||||
schema: self.config.sink_table_name[1].clone().into(),
|
||||
table: self.config.sink_table_name[2].clone().into(),
|
||||
},
|
||||
df_schema,
|
||||
table_source,
|
||||
WriteOp::Insert(datafusion_expr::dml::InsertOp::Append),
|
||||
Arc::new(new_query),
|
||||
))
|
||||
@@ -324,7 +332,7 @@ impl BatchingTask {
|
||||
let schema = &self.config.sink_table_name[1];
|
||||
|
||||
// fix all table ref by make it fully qualified, i.e. "table_name" => "catalog_name.schema_name.table_name"
|
||||
let fixed_plan = plan
|
||||
let plan = plan
|
||||
.clone()
|
||||
.transform_down_with_subqueries(|p| {
|
||||
if let LogicalPlan::TableScan(mut table_scan) = p {
|
||||
@@ -340,16 +348,6 @@ impl BatchingTask {
|
||||
})?
|
||||
.data;
|
||||
|
||||
let expanded_plan = CountWildcardRule::new()
|
||||
.analyze(fixed_plan.clone(), &Default::default())
|
||||
.with_context(|_| DatafusionSnafu {
|
||||
context: format!(
|
||||
"Failed to expand wildcard in logical plan, plan={:?}",
|
||||
fixed_plan
|
||||
),
|
||||
})?;
|
||||
|
||||
let plan = expanded_plan;
|
||||
let mut peer_desc = None;
|
||||
|
||||
let res = {
|
||||
|
||||
@@ -36,7 +36,7 @@ use snafu::{ensure, OptionExt, ResultExt};
|
||||
use sql::parser::{ParseOptions, ParserContext};
|
||||
use sql::statements::statement::Statement;
|
||||
use sql::statements::tql::Tql;
|
||||
use table::metadata::TableInfo;
|
||||
use table::TableRef;
|
||||
|
||||
use crate::adapter::AUTO_CREATED_PLACEHOLDER_TS_COL;
|
||||
use crate::df_optimizer::apply_df_optimizer;
|
||||
@@ -46,7 +46,7 @@ use crate::{Error, TableName};
|
||||
pub async fn get_table_info_df_schema(
|
||||
catalog_mr: CatalogManagerRef,
|
||||
table_name: TableName,
|
||||
) -> Result<(Arc<TableInfo>, Arc<DFSchema>), Error> {
|
||||
) -> Result<(TableRef, Arc<DFSchema>), Error> {
|
||||
let full_table_name = table_name.clone().join(".");
|
||||
let table = catalog_mr
|
||||
.table(&table_name[0], &table_name[1], &table_name[2], None)
|
||||
@@ -56,7 +56,7 @@ pub async fn get_table_info_df_schema(
|
||||
.context(TableNotFoundSnafu {
|
||||
name: &full_table_name,
|
||||
})?;
|
||||
let table_info = table.table_info().clone();
|
||||
let table_info = table.table_info();
|
||||
|
||||
let schema = table_info.meta.schema.clone();
|
||||
|
||||
@@ -72,7 +72,7 @@ pub async fn get_table_info_df_schema(
|
||||
),
|
||||
})?,
|
||||
);
|
||||
Ok((table_info, df_schema))
|
||||
Ok((table, df_schema))
|
||||
}
|
||||
|
||||
/// Convert sql to datafusion logical plan
|
||||
@@ -426,7 +426,7 @@ impl TreeNodeRewriter for AddFilterRewriter {
|
||||
return Ok(Transformed::no(node));
|
||||
}
|
||||
match node {
|
||||
LogicalPlan::Filter(mut filter) if !filter.having => {
|
||||
LogicalPlan::Filter(mut filter) => {
|
||||
filter.predicate = filter.predicate.and(self.extra_filter.clone());
|
||||
self.is_rewritten = true;
|
||||
Ok(Transformed::yes(LogicalPlan::Filter(filter)))
|
||||
|
||||
@@ -29,7 +29,6 @@ use datafusion::optimizer::analyzer::type_coercion::TypeCoercion;
|
||||
use datafusion::optimizer::common_subexpr_eliminate::CommonSubexprEliminate;
|
||||
use datafusion::optimizer::optimize_projections::OptimizeProjections;
|
||||
use datafusion::optimizer::simplify_expressions::SimplifyExpressions;
|
||||
use datafusion::optimizer::unwrap_cast_in_comparison::UnwrapCastInComparison;
|
||||
use datafusion::optimizer::utils::NamePreserver;
|
||||
use datafusion::optimizer::{Analyzer, AnalyzerRule, Optimizer, OptimizerContext};
|
||||
use datafusion_common::tree_node::{
|
||||
@@ -38,8 +37,8 @@ use datafusion_common::tree_node::{
|
||||
use datafusion_common::{Column, DFSchema, ScalarValue};
|
||||
use datafusion_expr::utils::merge_schema;
|
||||
use datafusion_expr::{
|
||||
BinaryExpr, ColumnarValue, Expr, Operator, Projection, ScalarFunctionArgs, ScalarUDFImpl,
|
||||
Signature, TypeSignature, Volatility,
|
||||
BinaryExpr, ColumnarValue, Expr, Literal, Operator, Projection, ScalarFunctionArgs,
|
||||
ScalarUDFImpl, Signature, TypeSignature, Volatility,
|
||||
};
|
||||
use query::optimizer::count_wildcard::CountWildcardToTimeIndexRule;
|
||||
use query::parser::QueryLanguageParser;
|
||||
@@ -80,7 +79,6 @@ pub async fn apply_df_optimizer(
|
||||
Arc::new(OptimizeProjections::new()),
|
||||
Arc::new(CommonSubexprEliminate::new()),
|
||||
Arc::new(SimplifyExpressions::new()),
|
||||
Arc::new(UnwrapCastInComparison::new()),
|
||||
]);
|
||||
let plan = optimizer
|
||||
.optimize(plan, &ctx, |_, _| {})
|
||||
@@ -305,11 +303,11 @@ impl TreeNodeRewriter for ExpandAvgRewriter<'_> {
|
||||
BinaryExpr::new(Box::new(sum_cast), Operator::Divide, Box::new(count_expr));
|
||||
let div_expr = Box::new(Expr::BinaryExpr(div));
|
||||
|
||||
let zero = Box::new(Expr::Literal(ScalarValue::Int64(Some(0))));
|
||||
let zero = Box::new(0.lit());
|
||||
let not_zero =
|
||||
BinaryExpr::new(Box::new(count_expr_ref), Operator::NotEq, zero.clone());
|
||||
let not_zero = Box::new(Expr::BinaryExpr(not_zero));
|
||||
let null = Box::new(Expr::Literal(ScalarValue::Null));
|
||||
let null = Box::new(Expr::Literal(ScalarValue::Null, None));
|
||||
|
||||
let case_when =
|
||||
datafusion_expr::Case::new(None, vec![(not_zero, div_expr)], Some(null));
|
||||
|
||||
Reference in New Issue
Block a user