mirror of
https://github.com/GreptimeTeam/greptimedb.git
synced 2026-05-18 05:50:41 +00:00
chore: update datafusion family (#6675)
* chore: update datafusion family Signed-off-by: luofucong <luofc@foxmail.com> * fix ci Signed-off-by: luofucong <luofc@foxmail.com> * use official otel-arrow-rust Signed-off-by: luofucong <luofc@foxmail.com> * rebase Signed-off-by: luofucong <luofc@foxmail.com> * use the official orc-rust Signed-off-by: luofucong <luofc@foxmail.com> * resolve PR comments Signed-off-by: luofucong <luofc@foxmail.com> * remove the empty lines Signed-off-by: luofucong <luofc@foxmail.com> * try following PR comments Signed-off-by: luofucong <luofc@foxmail.com> --------- Signed-off-by: luofucong <luofc@foxmail.com>
This commit is contained in:
@@ -25,14 +25,14 @@
|
||||
use std::sync::Arc;
|
||||
|
||||
use arrow::array::StructArray;
|
||||
use arrow_schema::Fields;
|
||||
use arrow_schema::{FieldRef, Fields};
|
||||
use common_telemetry::debug;
|
||||
use datafusion::functions_aggregate::all_default_aggregate_functions;
|
||||
use datafusion::optimizer::analyzer::type_coercion::TypeCoercion;
|
||||
use datafusion::optimizer::AnalyzerRule;
|
||||
use datafusion::physical_planner::create_aggregate_expr_and_maybe_filter;
|
||||
use datafusion_common::{Column, ScalarValue};
|
||||
use datafusion_expr::expr::AggregateFunction;
|
||||
use datafusion_expr::expr::{AggregateFunction, AggregateFunctionParams};
|
||||
use datafusion_expr::function::StateFieldsArgs;
|
||||
use datafusion_expr::{
|
||||
Accumulator, Aggregate, AggregateUDF, AggregateUDFImpl, Expr, ExprSchemable, LogicalPlan,
|
||||
@@ -146,6 +146,7 @@ impl StateMergeHelper {
|
||||
};
|
||||
|
||||
let original_input_types = aggr_func
|
||||
.params
|
||||
.args
|
||||
.iter()
|
||||
.map(|e| e.get_type(&aggr.input.schema()))
|
||||
@@ -156,11 +157,7 @@ impl StateMergeHelper {
|
||||
|
||||
let expr = AggregateFunction {
|
||||
func: Arc::new(state_func.into()),
|
||||
args: aggr_func.args.clone(),
|
||||
distinct: aggr_func.distinct,
|
||||
filter: aggr_func.filter.clone(),
|
||||
order_by: aggr_func.order_by.clone(),
|
||||
null_treatment: aggr_func.null_treatment,
|
||||
params: aggr_func.params.clone(),
|
||||
};
|
||||
let expr = Expr::AggregateFunction(expr);
|
||||
let lower_state_output_col_name = expr.schema_name().to_string();
|
||||
@@ -182,11 +179,10 @@ impl StateMergeHelper {
|
||||
let arg = Expr::Column(Column::new_unqualified(lower_state_output_col_name));
|
||||
let expr = AggregateFunction {
|
||||
func: Arc::new(merge_func.into()),
|
||||
args: vec![arg],
|
||||
distinct: aggr_func.distinct,
|
||||
filter: aggr_func.filter.clone(),
|
||||
order_by: aggr_func.order_by.clone(),
|
||||
null_treatment: aggr_func.null_treatment,
|
||||
params: AggregateFunctionParams {
|
||||
args: vec![arg],
|
||||
..aggr_func.params.clone()
|
||||
},
|
||||
};
|
||||
|
||||
// alias to the original aggregate expr's schema name, so parent plan can refer to it
|
||||
@@ -247,15 +243,8 @@ impl StateWrapper {
|
||||
pub fn deduce_aggr_return_type(
|
||||
&self,
|
||||
acc_args: &datafusion_expr::function::AccumulatorArgs,
|
||||
) -> datafusion_common::Result<DataType> {
|
||||
let input_exprs = acc_args.exprs;
|
||||
let input_schema = acc_args.schema;
|
||||
let input_types = input_exprs
|
||||
.iter()
|
||||
.map(|e| e.data_type(input_schema))
|
||||
.collect::<Result<Vec<_>, _>>()?;
|
||||
let return_type = self.inner.return_type(&input_types)?;
|
||||
Ok(return_type)
|
||||
) -> datafusion_common::Result<FieldRef> {
|
||||
self.inner.return_field(acc_args.schema.fields())
|
||||
}
|
||||
}
|
||||
|
||||
@@ -265,14 +254,13 @@ impl AggregateUDFImpl for StateWrapper {
|
||||
acc_args: datafusion_expr::function::AccumulatorArgs<'b>,
|
||||
) -> datafusion_common::Result<Box<dyn Accumulator>> {
|
||||
// fix and recover proper acc args for the original aggregate function.
|
||||
let state_type = acc_args.return_type.clone();
|
||||
let state_type = acc_args.return_type().clone();
|
||||
let inner = {
|
||||
let old_return_type = self.deduce_aggr_return_type(&acc_args)?;
|
||||
let acc_args = datafusion_expr::function::AccumulatorArgs {
|
||||
return_type: &old_return_type,
|
||||
return_field: self.deduce_aggr_return_type(&acc_args)?,
|
||||
schema: acc_args.schema,
|
||||
ignore_nulls: acc_args.ignore_nulls,
|
||||
ordering_req: acc_args.ordering_req,
|
||||
order_bys: acc_args.order_bys,
|
||||
is_reversed: acc_args.is_reversed,
|
||||
name: acc_args.name,
|
||||
is_distinct: acc_args.is_distinct,
|
||||
@@ -297,11 +285,15 @@ impl AggregateUDFImpl for StateWrapper {
|
||||
/// Return state_fields as the output struct type.
|
||||
///
|
||||
fn return_type(&self, arg_types: &[DataType]) -> datafusion_common::Result<DataType> {
|
||||
let old_return_type = self.inner.return_type(arg_types)?;
|
||||
let input_fields = &arg_types
|
||||
.iter()
|
||||
.map(|x| Arc::new(Field::new("x", x.clone(), false)))
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
let state_fields_args = StateFieldsArgs {
|
||||
name: self.inner().name(),
|
||||
input_types: arg_types,
|
||||
return_type: &old_return_type,
|
||||
input_fields,
|
||||
return_field: self.inner.return_field(input_fields)?,
|
||||
// TODO(discord9): how to get this?, probably ok?
|
||||
ordering_fields: &[],
|
||||
is_distinct: false,
|
||||
@@ -315,12 +307,11 @@ impl AggregateUDFImpl for StateWrapper {
|
||||
fn state_fields(
|
||||
&self,
|
||||
args: datafusion_expr::function::StateFieldsArgs,
|
||||
) -> datafusion_common::Result<Vec<Field>> {
|
||||
let old_return_type = self.inner.return_type(args.input_types)?;
|
||||
) -> datafusion_common::Result<Vec<FieldRef>> {
|
||||
let state_fields_args = StateFieldsArgs {
|
||||
name: args.name,
|
||||
input_types: args.input_types,
|
||||
return_type: &old_return_type,
|
||||
input_fields: args.input_fields,
|
||||
return_field: self.inner.return_field(args.input_fields)?,
|
||||
ordering_fields: args.ordering_fields,
|
||||
is_distinct: args.is_distinct,
|
||||
};
|
||||
@@ -502,7 +493,7 @@ impl AggregateUDFImpl for MergeWrapper {
|
||||
fn state_fields(
|
||||
&self,
|
||||
_args: datafusion_expr::function::StateFieldsArgs,
|
||||
) -> datafusion_common::Result<Vec<Field>> {
|
||||
) -> datafusion_common::Result<Vec<FieldRef>> {
|
||||
self.original_phy_expr.state_fields()
|
||||
}
|
||||
}
|
||||
|
||||
@@ -35,7 +35,7 @@ use datafusion::prelude::SessionContext;
|
||||
use datafusion_common::{Column, TableReference};
|
||||
use datafusion_expr::expr::AggregateFunction;
|
||||
use datafusion_expr::sqlparser::ast::NullTreatment;
|
||||
use datafusion_expr::{Aggregate, Expr, LogicalPlan, SortExpr, TableScan};
|
||||
use datafusion_expr::{lit, Aggregate, Expr, LogicalPlan, SortExpr, TableScan};
|
||||
use datafusion_physical_expr::aggregate::AggregateExprBuilder;
|
||||
use datafusion_physical_expr::{EquivalenceProperties, Partitioning};
|
||||
use datatypes::arrow_array::StringArray;
|
||||
@@ -234,7 +234,7 @@ async fn test_sum_udaf() {
|
||||
vec![Expr::Column(Column::new_unqualified("number"))],
|
||||
false,
|
||||
None,
|
||||
None,
|
||||
vec![],
|
||||
None,
|
||||
))],
|
||||
)
|
||||
@@ -250,7 +250,7 @@ async fn test_sum_udaf() {
|
||||
vec![Expr::Column(Column::new_unqualified("number"))],
|
||||
false,
|
||||
None,
|
||||
None,
|
||||
vec![],
|
||||
None,
|
||||
))],
|
||||
)
|
||||
@@ -290,7 +290,7 @@ async fn test_sum_udaf() {
|
||||
vec![Expr::Column(Column::new_unqualified("__sum_state(number)"))],
|
||||
false,
|
||||
None,
|
||||
None,
|
||||
vec![],
|
||||
None,
|
||||
))
|
||||
.alias("sum(number)")],
|
||||
@@ -378,7 +378,7 @@ async fn test_avg_udaf() {
|
||||
vec![Expr::Column(Column::new_unqualified("number"))],
|
||||
false,
|
||||
None,
|
||||
None,
|
||||
vec![],
|
||||
None,
|
||||
))],
|
||||
)
|
||||
@@ -395,7 +395,7 @@ async fn test_avg_udaf() {
|
||||
vec![Expr::Column(Column::new_unqualified("number"))],
|
||||
false,
|
||||
None,
|
||||
None,
|
||||
vec![],
|
||||
None,
|
||||
))],
|
||||
)
|
||||
@@ -449,7 +449,7 @@ async fn test_avg_udaf() {
|
||||
vec![Expr::Column(Column::new_unqualified("__avg_state(number)"))],
|
||||
false,
|
||||
None,
|
||||
None,
|
||||
vec![],
|
||||
None,
|
||||
))
|
||||
.alias("avg(number)")],
|
||||
@@ -551,7 +551,7 @@ async fn test_udaf_correct_eval_result() {
|
||||
expected_fn: Option<ExpectedFn>,
|
||||
distinct: bool,
|
||||
filter: Option<Box<Expr>>,
|
||||
order_by: Option<Vec<SortExpr>>,
|
||||
order_by: Vec<SortExpr>,
|
||||
null_treatment: Option<NullTreatment>,
|
||||
}
|
||||
type ExpectedFn = fn(ArrayRef) -> bool;
|
||||
@@ -575,7 +575,7 @@ async fn test_udaf_correct_eval_result() {
|
||||
expected_fn: None,
|
||||
distinct: false,
|
||||
filter: None,
|
||||
order_by: None,
|
||||
order_by: vec![],
|
||||
null_treatment: None,
|
||||
},
|
||||
TestCase {
|
||||
@@ -596,7 +596,7 @@ async fn test_udaf_correct_eval_result() {
|
||||
expected_fn: None,
|
||||
distinct: false,
|
||||
filter: None,
|
||||
order_by: None,
|
||||
order_by: vec![],
|
||||
null_treatment: None,
|
||||
},
|
||||
TestCase {
|
||||
@@ -619,7 +619,7 @@ async fn test_udaf_correct_eval_result() {
|
||||
expected_fn: None,
|
||||
distinct: false,
|
||||
filter: None,
|
||||
order_by: None,
|
||||
order_by: vec![],
|
||||
null_treatment: None,
|
||||
},
|
||||
TestCase {
|
||||
@@ -630,8 +630,8 @@ async fn test_udaf_correct_eval_result() {
|
||||
true,
|
||||
)])),
|
||||
args: vec![
|
||||
Expr::Literal(ScalarValue::Int64(Some(128))),
|
||||
Expr::Literal(ScalarValue::Float64(Some(0.05))),
|
||||
lit(128i64),
|
||||
lit(0.05f64),
|
||||
Expr::Column(Column::new_unqualified("number")),
|
||||
],
|
||||
input: vec![Arc::new(Float64Array::from(vec![
|
||||
@@ -659,7 +659,7 @@ async fn test_udaf_correct_eval_result() {
|
||||
}),
|
||||
distinct: false,
|
||||
filter: None,
|
||||
order_by: None,
|
||||
order_by: vec![],
|
||||
null_treatment: None,
|
||||
},
|
||||
TestCase {
|
||||
@@ -690,7 +690,7 @@ async fn test_udaf_correct_eval_result() {
|
||||
}),
|
||||
distinct: false,
|
||||
filter: None,
|
||||
order_by: None,
|
||||
order_by: vec![],
|
||||
null_treatment: None,
|
||||
},
|
||||
// TODO(discord9): udd_merge/hll_merge/geo_path/quantile_aggr tests
|
||||
|
||||
@@ -41,7 +41,7 @@ use datatypes::arrow::array::{
|
||||
Array, ArrayRef, AsArray, BooleanArray, Int64Array, ListArray, UInt64Array,
|
||||
};
|
||||
use datatypes::arrow::buffer::{OffsetBuffer, ScalarBuffer};
|
||||
use datatypes::arrow::datatypes::{DataType, Field};
|
||||
use datatypes::arrow::datatypes::{DataType, Field, FieldRef};
|
||||
|
||||
use crate::function_registry::FunctionRegistry;
|
||||
|
||||
@@ -94,14 +94,14 @@ impl AggregateUDFImpl for CountHash {
|
||||
false
|
||||
}
|
||||
|
||||
fn state_fields(&self, args: StateFieldsArgs) -> Result<Vec<Field>> {
|
||||
Ok(vec![Field::new_list(
|
||||
fn state_fields(&self, args: StateFieldsArgs) -> Result<Vec<FieldRef>> {
|
||||
Ok(vec![Arc::new(Field::new_list(
|
||||
format_state_name(args.name, "count_hash"),
|
||||
Field::new_list_field(DataType::UInt64, true),
|
||||
// For count_hash accumulator, null list item stands for an
|
||||
// empty value set (i.e., all NULL value so far for that group).
|
||||
true,
|
||||
)])
|
||||
))])
|
||||
}
|
||||
|
||||
fn accumulator(&self, acc_args: AccumulatorArgs) -> Result<Box<dyn Accumulator>> {
|
||||
|
||||
@@ -23,6 +23,7 @@ use datafusion::logical_expr::Volatility;
|
||||
use datatypes::value::{Value, ValueRef};
|
||||
use session::context::QueryContextRef;
|
||||
use snafu::{ensure, ResultExt};
|
||||
use sql::ast::ObjectNamePartExt;
|
||||
use sql::parser::ParserContext;
|
||||
use store_api::storage::ConcreteDataType;
|
||||
|
||||
@@ -85,9 +86,9 @@ fn parse_flush_flow(
|
||||
let (catalog_name, flow_name) = match &obj_name.0[..] {
|
||||
[flow_name] => (
|
||||
query_ctx.current_catalog().to_string(),
|
||||
flow_name.value.clone(),
|
||||
flow_name.to_string_unquoted(),
|
||||
),
|
||||
[catalog, flow_name] => (catalog.value.clone(), flow_name.value.clone()),
|
||||
[catalog, flow_name] => (catalog.to_string_unquoted(), flow_name.to_string_unquoted()),
|
||||
_ => {
|
||||
return InvalidFuncArgsSnafu {
|
||||
err_msg: format!(
|
||||
|
||||
@@ -113,6 +113,8 @@ mod tests {
|
||||
|
||||
use common_query::prelude::ScalarValue;
|
||||
use datafusion::arrow::array::BooleanArray;
|
||||
use datafusion_common::config::ConfigOptions;
|
||||
use datatypes::arrow::datatypes::Field;
|
||||
use datatypes::data_type::ConcreteDataType;
|
||||
use datatypes::prelude::VectorRef;
|
||||
use datatypes::vectors::{BooleanVector, ConstantVector};
|
||||
@@ -162,10 +164,21 @@ mod tests {
|
||||
]))),
|
||||
];
|
||||
|
||||
let arg_fields = vec![
|
||||
Arc::new(Field::new("a", args[0].data_type(), false)),
|
||||
Arc::new(Field::new("b", args[1].data_type(), false)),
|
||||
];
|
||||
let return_field = Arc::new(Field::new(
|
||||
"x",
|
||||
ConcreteDataType::boolean_datatype().as_arrow_type(),
|
||||
false,
|
||||
));
|
||||
let args = ScalarFunctionArgs {
|
||||
args,
|
||||
arg_fields,
|
||||
number_rows: 4,
|
||||
return_type: &ConcreteDataType::boolean_datatype().as_arrow_type(),
|
||||
return_field,
|
||||
config_options: Arc::new(ConfigOptions::default()),
|
||||
};
|
||||
match udf.invoke_with_args(args).unwrap() {
|
||||
datafusion_expr::ColumnarValue::Array(x) => {
|
||||
|
||||
Reference in New Issue
Block a user