mirror of
https://github.com/GreptimeTeam/greptimedb.git
synced 2026-05-21 07:20:41 +00:00
chore: bump datafusion version to fix last_value regression (#4169)
* chore: bump datafusion version to fix `last_value` regression * fix: resolve PR comments * fix ci
This commit is contained in:
@@ -19,7 +19,6 @@ use std::vec;
|
||||
|
||||
use common_test_util::find_workspace_path;
|
||||
use datafusion::assert_batches_eq;
|
||||
use datafusion::config::TableParquetOptions;
|
||||
use datafusion::datasource::physical_plan::{FileOpener, FileScanConfig, FileStream, ParquetExec};
|
||||
use datafusion::execution::context::TaskContext;
|
||||
use datafusion::physical_plan::metrics::ExecutionPlanMetricsSet;
|
||||
@@ -167,8 +166,9 @@ async fn test_parquet_exec() {
|
||||
.to_string();
|
||||
let base_config = scan_config(schema.clone(), None, path);
|
||||
|
||||
let exec = ParquetExec::new(base_config, None, None, TableParquetOptions::default())
|
||||
.with_parquet_file_reader_factory(Arc::new(DefaultParquetFileReaderFactory::new(store)));
|
||||
let exec = ParquetExec::builder(base_config)
|
||||
.with_parquet_file_reader_factory(Arc::new(DefaultParquetFileReaderFactory::new(store)))
|
||||
.build();
|
||||
|
||||
let ctx = SessionContext::new();
|
||||
|
||||
|
||||
@@ -882,7 +882,8 @@ impl TryFrom<ScalarValue> for Value {
|
||||
| ScalarValue::FixedSizeList(_)
|
||||
| ScalarValue::LargeList(_)
|
||||
| ScalarValue::Dictionary(_, _)
|
||||
| ScalarValue::Union(_, _, _) => {
|
||||
| ScalarValue::Union(_, _, _)
|
||||
| ScalarValue::Float16(_) => {
|
||||
return error::UnsupportedArrowTypeSnafu {
|
||||
arrow_type: v.data_type(),
|
||||
}
|
||||
|
||||
@@ -239,7 +239,8 @@ impl Helper {
|
||||
| ScalarValue::FixedSizeList(_)
|
||||
| ScalarValue::LargeList(_)
|
||||
| ScalarValue::Dictionary(_, _)
|
||||
| ScalarValue::Union(_, _, _) => {
|
||||
| ScalarValue::Union(_, _, _)
|
||||
| ScalarValue::Float16(_) => {
|
||||
return error::ConversionSnafu {
|
||||
from: format!("Unsupported scalar value: {value}"),
|
||||
}
|
||||
|
||||
@@ -22,7 +22,6 @@ use common_datasource::file_format::Format;
|
||||
use common_recordbatch::adapter::RecordBatchStreamAdapter;
|
||||
use common_recordbatch::SendableRecordBatchStream;
|
||||
use datafusion::common::{Statistics, ToDFSchema};
|
||||
use datafusion::config::TableParquetOptions;
|
||||
use datafusion::datasource::listing::PartitionedFile;
|
||||
use datafusion::datasource::object_store::ObjectStoreUrl;
|
||||
use datafusion::datasource::physical_plan::{FileOpener, FileScanConfig, FileStream, ParquetExec};
|
||||
@@ -198,10 +197,15 @@ fn new_parquet_stream_with_exec_plan(
|
||||
|
||||
// TODO(ruihang): get this from upper layer
|
||||
let task_ctx = SessionContext::default().task_ctx();
|
||||
let parquet_exec = ParquetExec::new(scan_config, filters, None, TableParquetOptions::default())
|
||||
let mut builder = ParquetExec::builder(scan_config);
|
||||
if let Some(filters) = filters {
|
||||
builder = builder.with_predicate(filters);
|
||||
}
|
||||
let parquet_exec = builder
|
||||
.with_parquet_file_reader_factory(Arc::new(DefaultParquetFileReaderFactory::new(
|
||||
store.clone(),
|
||||
)));
|
||||
)))
|
||||
.build();
|
||||
let stream = parquet_exec
|
||||
.execute(0, task_ctx)
|
||||
.context(error::ParquetScanPlanSnafu)?;
|
||||
|
||||
@@ -24,9 +24,10 @@ use hydroflow::futures::future::Map;
|
||||
use itertools::Itertools;
|
||||
use snafu::{OptionExt, ResultExt};
|
||||
use substrait::variation_const::{
|
||||
DATE_32_TYPE_REF, DATE_64_TYPE_REF, DEFAULT_TYPE_REF, TIMESTAMP_MICRO_TYPE_REF,
|
||||
TIMESTAMP_MILLI_TYPE_REF, TIMESTAMP_NANO_TYPE_REF, TIMESTAMP_SECOND_TYPE_REF,
|
||||
UNSIGNED_INTEGER_TYPE_REF,
|
||||
DATE_32_TYPE_VARIATION_REF, DATE_64_TYPE_VARIATION_REF, DEFAULT_TYPE_VARIATION_REF,
|
||||
TIMESTAMP_MICRO_TYPE_VARIATION_REF, TIMESTAMP_MILLI_TYPE_VARIATION_REF,
|
||||
TIMESTAMP_NANO_TYPE_VARIATION_REF, TIMESTAMP_SECOND_TYPE_VARIATION_REF,
|
||||
UNSIGNED_INTEGER_TYPE_VARIATION_REF,
|
||||
};
|
||||
use substrait_proto::proto::aggregate_function::AggregationInvocation;
|
||||
use substrait_proto::proto::aggregate_rel::{Grouping, Measure};
|
||||
|
||||
@@ -17,9 +17,10 @@ use common_time::{Date, Timestamp};
|
||||
use datatypes::data_type::ConcreteDataType as CDT;
|
||||
use datatypes::value::Value;
|
||||
use substrait::variation_const::{
|
||||
DATE_32_TYPE_REF, DATE_64_TYPE_REF, DEFAULT_TYPE_REF, TIMESTAMP_MICRO_TYPE_REF,
|
||||
TIMESTAMP_MILLI_TYPE_REF, TIMESTAMP_NANO_TYPE_REF, TIMESTAMP_SECOND_TYPE_REF,
|
||||
UNSIGNED_INTEGER_TYPE_REF,
|
||||
DATE_32_TYPE_VARIATION_REF, DATE_64_TYPE_VARIATION_REF, DEFAULT_TYPE_VARIATION_REF,
|
||||
TIMESTAMP_MICRO_TYPE_VARIATION_REF, TIMESTAMP_MILLI_TYPE_VARIATION_REF,
|
||||
TIMESTAMP_NANO_TYPE_VARIATION_REF, TIMESTAMP_SECOND_TYPE_VARIATION_REF,
|
||||
UNSIGNED_INTEGER_TYPE_VARIATION_REF,
|
||||
};
|
||||
use substrait_proto::proto::expression::literal::LiteralType;
|
||||
use substrait_proto::proto::expression::Literal;
|
||||
@@ -33,41 +34,41 @@ pub(crate) fn from_substrait_literal(lit: &Literal) -> Result<(Value, CDT), Erro
|
||||
let scalar_value = match &lit.literal_type {
|
||||
Some(LiteralType::Boolean(b)) => (Value::from(*b), CDT::boolean_datatype()),
|
||||
Some(LiteralType::I8(n)) => match lit.type_variation_reference {
|
||||
DEFAULT_TYPE_REF => (Value::from(*n as i8), CDT::int8_datatype()),
|
||||
UNSIGNED_INTEGER_TYPE_REF => (Value::from(*n as u8), CDT::uint8_datatype()),
|
||||
DEFAULT_TYPE_VARIATION_REF => (Value::from(*n as i8), CDT::int8_datatype()),
|
||||
UNSIGNED_INTEGER_TYPE_VARIATION_REF => (Value::from(*n as u8), CDT::uint8_datatype()),
|
||||
others => not_impl_err!("Unknown type variation reference {others}",)?,
|
||||
},
|
||||
Some(LiteralType::I16(n)) => match lit.type_variation_reference {
|
||||
DEFAULT_TYPE_REF => (Value::from(*n as i16), CDT::int16_datatype()),
|
||||
UNSIGNED_INTEGER_TYPE_REF => (Value::from(*n as u16), CDT::uint16_datatype()),
|
||||
DEFAULT_TYPE_VARIATION_REF => (Value::from(*n as i16), CDT::int16_datatype()),
|
||||
UNSIGNED_INTEGER_TYPE_VARIATION_REF => (Value::from(*n as u16), CDT::uint16_datatype()),
|
||||
others => not_impl_err!("Unknown type variation reference {others}",)?,
|
||||
},
|
||||
Some(LiteralType::I32(n)) => match lit.type_variation_reference {
|
||||
DEFAULT_TYPE_REF => (Value::from(*n), CDT::int32_datatype()),
|
||||
UNSIGNED_INTEGER_TYPE_REF => (Value::from(*n as u32), CDT::uint32_datatype()),
|
||||
DEFAULT_TYPE_VARIATION_REF => (Value::from(*n), CDT::int32_datatype()),
|
||||
UNSIGNED_INTEGER_TYPE_VARIATION_REF => (Value::from(*n as u32), CDT::uint32_datatype()),
|
||||
others => not_impl_err!("Unknown type variation reference {others}",)?,
|
||||
},
|
||||
Some(LiteralType::I64(n)) => match lit.type_variation_reference {
|
||||
DEFAULT_TYPE_REF => (Value::from(*n), CDT::int64_datatype()),
|
||||
UNSIGNED_INTEGER_TYPE_REF => (Value::from(*n as u64), CDT::uint64_datatype()),
|
||||
DEFAULT_TYPE_VARIATION_REF => (Value::from(*n), CDT::int64_datatype()),
|
||||
UNSIGNED_INTEGER_TYPE_VARIATION_REF => (Value::from(*n as u64), CDT::uint64_datatype()),
|
||||
others => not_impl_err!("Unknown type variation reference {others}",)?,
|
||||
},
|
||||
Some(LiteralType::Fp32(f)) => (Value::from(*f), CDT::float32_datatype()),
|
||||
Some(LiteralType::Fp64(f)) => (Value::from(*f), CDT::float64_datatype()),
|
||||
Some(LiteralType::Timestamp(t)) => match lit.type_variation_reference {
|
||||
TIMESTAMP_SECOND_TYPE_REF => (
|
||||
TIMESTAMP_SECOND_TYPE_VARIATION_REF => (
|
||||
Value::from(Timestamp::new_second(*t)),
|
||||
CDT::timestamp_second_datatype(),
|
||||
),
|
||||
TIMESTAMP_MILLI_TYPE_REF => (
|
||||
TIMESTAMP_MILLI_TYPE_VARIATION_REF => (
|
||||
Value::from(Timestamp::new_millisecond(*t)),
|
||||
CDT::timestamp_millisecond_datatype(),
|
||||
),
|
||||
TIMESTAMP_MICRO_TYPE_REF => (
|
||||
TIMESTAMP_MICRO_TYPE_VARIATION_REF => (
|
||||
Value::from(Timestamp::new_microsecond(*t)),
|
||||
CDT::timestamp_microsecond_datatype(),
|
||||
),
|
||||
TIMESTAMP_NANO_TYPE_REF => (
|
||||
TIMESTAMP_NANO_TYPE_VARIATION_REF => (
|
||||
Value::from(Timestamp::new_nanosecond(*t)),
|
||||
CDT::timestamp_nanosecond_datatype(),
|
||||
),
|
||||
@@ -115,37 +116,36 @@ pub fn from_substrait_type(null_type: &substrait_proto::proto::Type) -> Result<C
|
||||
match kind {
|
||||
Kind::Bool(_) => Ok(CDT::boolean_datatype()),
|
||||
Kind::I8(integer) => match integer.type_variation_reference {
|
||||
DEFAULT_TYPE_REF => Ok(CDT::int8_datatype()),
|
||||
UNSIGNED_INTEGER_TYPE_REF => Ok(CDT::uint8_datatype()),
|
||||
DEFAULT_TYPE_VARIATION_REF => Ok(CDT::int8_datatype()),
|
||||
UNSIGNED_INTEGER_TYPE_VARIATION_REF => Ok(CDT::uint8_datatype()),
|
||||
v => not_impl_err!("Unsupported Substrait type variation {v} of type {kind:?}"),
|
||||
},
|
||||
Kind::I16(integer) => match integer.type_variation_reference {
|
||||
DEFAULT_TYPE_REF => Ok(CDT::int16_datatype()),
|
||||
UNSIGNED_INTEGER_TYPE_REF => Ok(CDT::uint16_datatype()),
|
||||
DEFAULT_TYPE_VARIATION_REF => Ok(CDT::int16_datatype()),
|
||||
UNSIGNED_INTEGER_TYPE_VARIATION_REF => Ok(CDT::uint16_datatype()),
|
||||
v => not_impl_err!("Unsupported Substrait type variation {v} of type {kind:?}"),
|
||||
},
|
||||
Kind::I32(integer) => match integer.type_variation_reference {
|
||||
DEFAULT_TYPE_REF => Ok(CDT::int32_datatype()),
|
||||
UNSIGNED_INTEGER_TYPE_REF => Ok(CDT::uint32_datatype()),
|
||||
DEFAULT_TYPE_VARIATION_REF => Ok(CDT::int32_datatype()),
|
||||
UNSIGNED_INTEGER_TYPE_VARIATION_REF => Ok(CDT::uint32_datatype()),
|
||||
v => not_impl_err!("Unsupported Substrait type variation {v} of type {kind:?}"),
|
||||
},
|
||||
Kind::I64(integer) => match integer.type_variation_reference {
|
||||
DEFAULT_TYPE_REF => Ok(CDT::int64_datatype()),
|
||||
UNSIGNED_INTEGER_TYPE_REF => Ok(CDT::uint64_datatype()),
|
||||
DEFAULT_TYPE_VARIATION_REF => Ok(CDT::int64_datatype()),
|
||||
UNSIGNED_INTEGER_TYPE_VARIATION_REF => Ok(CDT::uint64_datatype()),
|
||||
v => not_impl_err!("Unsupported Substrait type variation {v} of type {kind:?}"),
|
||||
},
|
||||
Kind::Fp32(_) => Ok(CDT::float32_datatype()),
|
||||
Kind::Fp64(_) => Ok(CDT::float64_datatype()),
|
||||
Kind::Timestamp(ts) => match ts.type_variation_reference {
|
||||
TIMESTAMP_SECOND_TYPE_REF => Ok(CDT::timestamp_second_datatype()),
|
||||
TIMESTAMP_MILLI_TYPE_REF => Ok(CDT::timestamp_millisecond_datatype()),
|
||||
TIMESTAMP_MICRO_TYPE_REF => Ok(CDT::timestamp_microsecond_datatype()),
|
||||
TIMESTAMP_NANO_TYPE_REF => Ok(CDT::timestamp_nanosecond_datatype()),
|
||||
TIMESTAMP_SECOND_TYPE_VARIATION_REF => Ok(CDT::timestamp_second_datatype()),
|
||||
TIMESTAMP_MILLI_TYPE_VARIATION_REF => Ok(CDT::timestamp_millisecond_datatype()),
|
||||
TIMESTAMP_MICRO_TYPE_VARIATION_REF => Ok(CDT::timestamp_microsecond_datatype()),
|
||||
TIMESTAMP_NANO_TYPE_VARIATION_REF => Ok(CDT::timestamp_nanosecond_datatype()),
|
||||
v => not_impl_err!("Unsupported Substrait type variation {v} of type {kind:?}"),
|
||||
},
|
||||
Kind::Date(date) => match date.type_variation_reference {
|
||||
DATE_32_TYPE_REF => Ok(CDT::date_datatype()),
|
||||
DATE_64_TYPE_REF => Ok(CDT::date_datatype()),
|
||||
DATE_32_TYPE_VARIATION_REF | DATE_64_TYPE_VARIATION_REF => Ok(CDT::date_datatype()),
|
||||
v => not_impl_err!("Unsupported Substrait type variation {v} of type {kind:?}"),
|
||||
},
|
||||
Kind::Binary(_) => Ok(CDT::binary_datatype()),
|
||||
|
||||
@@ -22,6 +22,7 @@ use common_query::prelude::GREPTIME_VALUE;
|
||||
use datafusion::common::{DFSchemaRef, Result as DfResult};
|
||||
use datafusion::datasource::DefaultTableSource;
|
||||
use datafusion::execution::context::SessionState;
|
||||
use datafusion::functions_aggregate::sum;
|
||||
use datafusion::logical_expr::expr::{
|
||||
AggregateFunction, AggregateFunctionDefinition, Alias, ScalarFunction,
|
||||
};
|
||||
@@ -1379,14 +1380,18 @@ impl PromPlanner {
|
||||
input_plan: &LogicalPlan,
|
||||
) -> Result<Vec<DfExpr>> {
|
||||
let aggr = match op.id() {
|
||||
token::T_SUM => AggregateFunctionEnum::Sum,
|
||||
token::T_AVG => AggregateFunctionEnum::Avg,
|
||||
token::T_COUNT => AggregateFunctionEnum::Count,
|
||||
token::T_MIN => AggregateFunctionEnum::Min,
|
||||
token::T_MAX => AggregateFunctionEnum::Max,
|
||||
token::T_GROUP => AggregateFunctionEnum::Grouping,
|
||||
token::T_STDDEV => AggregateFunctionEnum::StddevPop,
|
||||
token::T_STDVAR => AggregateFunctionEnum::VariancePop,
|
||||
token::T_SUM => AggregateFunctionDefinition::UDF(sum::sum_udaf()),
|
||||
token::T_AVG => AggregateFunctionDefinition::BuiltIn(AggregateFunctionEnum::Avg),
|
||||
token::T_COUNT => AggregateFunctionDefinition::BuiltIn(AggregateFunctionEnum::Count),
|
||||
token::T_MIN => AggregateFunctionDefinition::BuiltIn(AggregateFunctionEnum::Min),
|
||||
token::T_MAX => AggregateFunctionDefinition::BuiltIn(AggregateFunctionEnum::Max),
|
||||
token::T_GROUP => AggregateFunctionDefinition::BuiltIn(AggregateFunctionEnum::Grouping),
|
||||
token::T_STDDEV => {
|
||||
AggregateFunctionDefinition::BuiltIn(AggregateFunctionEnum::StddevPop)
|
||||
}
|
||||
token::T_STDVAR => {
|
||||
AggregateFunctionDefinition::BuiltIn(AggregateFunctionEnum::VariancePop)
|
||||
}
|
||||
token::T_TOPK | token::T_BOTTOMK | token::T_COUNT_VALUES | token::T_QUANTILE => {
|
||||
UnsupportedExprSnafu {
|
||||
name: format!("{op:?}"),
|
||||
@@ -1403,7 +1408,7 @@ impl PromPlanner {
|
||||
.iter()
|
||||
.map(|col| {
|
||||
DfExpr::AggregateFunction(AggregateFunction {
|
||||
func_def: AggregateFunctionDefinition::BuiltIn(aggr.clone()),
|
||||
func_def: aggr.clone(),
|
||||
args: vec![DfExpr::Column(Column::from_name(col))],
|
||||
distinct: false,
|
||||
filter: None,
|
||||
|
||||
Reference in New Issue
Block a user