chore: update datafusion family (#6675)

* chore: update datafusion family

Signed-off-by: luofucong <luofc@foxmail.com>

* fix ci

Signed-off-by: luofucong <luofc@foxmail.com>

* use official otel-arrow-rust

Signed-off-by: luofucong <luofc@foxmail.com>

* rebase

Signed-off-by: luofucong <luofc@foxmail.com>

* use the official orc-rust

Signed-off-by: luofucong <luofc@foxmail.com>

* resolve PR comments

Signed-off-by: luofucong <luofc@foxmail.com>

* remove the empty lines

Signed-off-by: luofucong <luofc@foxmail.com>

* try following PR comments

Signed-off-by: luofucong <luofc@foxmail.com>

---------

Signed-off-by: luofucong <luofc@foxmail.com>
This commit is contained in:
LFC
2025-08-15 20:41:49 +08:00
committed by GitHub
parent dfc29eb3b3
commit f9d2a89a0c
198 changed files with 4932 additions and 4196 deletions

View File

@@ -89,7 +89,9 @@ impl DistAnalyzeExec {
impl DisplayAs for DistAnalyzeExec {
fn fmt_as(&self, t: DisplayFormatType, f: &mut std::fmt::Formatter) -> std::fmt::Result {
match t {
DisplayFormatType::Default | DisplayFormatType::Verbose => {
DisplayFormatType::Default
| DisplayFormatType::Verbose
| DisplayFormatType::TreeRender => {
write!(f, "DistAnalyzeExec",)
}
}

View File

@@ -206,6 +206,7 @@ impl DistPlannerAnalyzer {
Ok(Subquery {
subquery: Arc::new(rewrote_subquery),
outer_ref_columns: subquery.outer_ref_columns,
spans: Default::default(),
})
}
}

View File

@@ -646,7 +646,8 @@ fn expand_part_col_aggr_part_col_aggr() {
.unwrap();
let expected_original = [
"Aggregate: groupBy=[[t.pk1, t.pk2, max(t.number)]], aggr=[[min(max(t.number))]]", // notice here `max(t.number)` is added to groupBy due to aggr exprs depend on this column
// See DataFusion #14860 for change details.
"Aggregate: groupBy=[[t.pk1, t.pk2]], aggr=[[min(max(t.number))]]",
" Aggregate: groupBy=[[t.pk1, t.pk2]], aggr=[[max(t.number)]]",
" TableScan: t",
]
@@ -657,9 +658,9 @@ fn expand_part_col_aggr_part_col_aggr() {
let result = DistPlannerAnalyzer {}.analyze(plan, &config).unwrap();
let expected = [
"Projection: t.pk1, t.pk2, max(t.number), min(max(t.number))",
"Projection: t.pk1, t.pk2, min(max(t.number))",
" MergeScan [is_placeholder=false, remote_input=[",
"Aggregate: groupBy=[[t.pk1, t.pk2, max(t.number)]], aggr=[[min(max(t.number))]]",
"Aggregate: groupBy=[[t.pk1, t.pk2]], aggr=[[min(max(t.number))]]",
" Aggregate: groupBy=[[t.pk1, t.pk2]], aggr=[[max(t.number)]]",
" TableScan: t",
"]]",

View File

@@ -78,7 +78,7 @@ pub fn step_aggr_to_upper_aggr(
pub fn is_all_aggr_exprs_steppable(aggr_exprs: &[Expr]) -> bool {
aggr_exprs.iter().all(|expr| {
if let Some(aggr_func) = get_aggr_func(expr) {
if aggr_func.distinct {
if aggr_func.params.distinct {
// Distinct aggregate functions are not steppable(yet).
return false;
}
@@ -259,10 +259,11 @@ impl Categorizer {
}
pub fn check_expr(expr: &Expr) -> Commutativity {
#[allow(deprecated)]
match expr {
Expr::Column(_)
| Expr::ScalarVariable(_, _)
| Expr::Literal(_)
| Expr::Literal(_, _)
| Expr::BinaryExpr(_)
| Expr::Not(_)
| Expr::IsNotNull(_)

View File

@@ -40,9 +40,7 @@ use datafusion::physical_plan::{
use datafusion_common::{Column as ColumnExpr, Result};
use datafusion_expr::{Expr, Extension, LogicalPlan, UserDefinedLogicalNodeCore};
use datafusion_physical_expr::expressions::Column;
use datafusion_physical_expr::{
Distribution, EquivalenceProperties, LexOrdering, PhysicalSortExpr,
};
use datafusion_physical_expr::{Distribution, EquivalenceProperties, PhysicalSortExpr};
use datatypes::schema::{Schema, SchemaRef};
use futures_util::StreamExt;
use greptime_proto::v1::region::RegionRequestHeader;
@@ -209,10 +207,7 @@ impl MergeScanExec {
))
})
.collect::<Result<Vec<_>>>()?;
EquivalenceProperties::new_with_orderings(
arrow_schema.clone(),
&[LexOrdering::new(lex_ordering)],
)
EquivalenceProperties::new_with_orderings(arrow_schema.clone(), vec![lex_ordering])
} else {
EquivalenceProperties::new(arrow_schema.clone())
};

View File

@@ -398,7 +398,7 @@ impl DataFusionExprConverter {
};
Ok(Operand::Column(column_name))
}
Expr::Literal(scalar_value) => {
Expr::Literal(scalar_value, _) => {
let value = Value::try_from(scalar_value.clone()).unwrap();
Ok(Operand::Value(value))
}

View File

@@ -17,6 +17,7 @@
#![feature(try_blocks)]
#![feature(stmt_expr_attributes)]
#![feature(iterator_try_collect)]
#![feature(box_patterns)]
mod analyze;
pub mod dataframe;

View File

@@ -1546,7 +1546,7 @@ mod tests {
.unwrap();
// Verify the nested structure is properly created
let expected_expr_debug = "BinaryExpr(BinaryExpr { left: BinaryExpr(BinaryExpr { left: Column(Column { relation: None, name: \"age\" }), op: Plus, right: Literal(Int32(5)) }), op: Gt, right: Literal(Int32(30)) })";
let expected_expr_debug = r#"BinaryExpr(BinaryExpr { left: BinaryExpr(BinaryExpr { left: Column(Column { relation: None, name: "age" }), op: Plus, right: Literal(Int32(5), None) }), op: Gt, right: Literal(Int32(30), None) })"#;
assert_eq!(format!("{:?}", expr), expected_expr_debug);
}
}

View File

@@ -13,6 +13,7 @@
// limitations under the License.
use std::fmt;
use std::fmt::Formatter;
use std::hash::{Hash, Hasher};
use std::sync::Arc;
@@ -129,6 +130,10 @@ impl PhysicalExpr for PreCompiledMatchesTermExpr {
probes: self.probes.clone(),
}))
}
fn fmt_sql(&self, f: &mut Formatter<'_>) -> fmt::Result {
write!(f, "{}", self)
}
}
/// Optimizer rule that pre-compiles constant term in `matches_term` function.
@@ -236,13 +241,14 @@ mod tests {
use common_function::scalars::matches_term::MatchesTermFunction;
use common_function::scalars::udf::create_udf;
use common_function::state::FunctionState;
use datafusion::datasource::memory::MemorySourceConfig;
use datafusion::datasource::source::DataSourceExec;
use datafusion::physical_optimizer::PhysicalOptimizerRule;
use datafusion::physical_plan::filter::FilterExec;
use datafusion::physical_plan::get_plan_string;
use datafusion::physical_plan::memory::MemoryExec;
use datafusion_common::{Column, DFSchema, ScalarValue};
use datafusion_common::{Column, DFSchema};
use datafusion_expr::expr::ScalarFunction;
use datafusion_expr::{Expr, ScalarUDF};
use datafusion_expr::{Expr, Literal, ScalarUDF};
use datafusion_physical_expr::{create_physical_expr, ScalarFunctionExpr};
use datatypes::prelude::ConcreteDataType;
use datatypes::schema::ColumnSchema;
@@ -335,18 +341,16 @@ mod tests {
let predicate = create_physical_expr(
&Expr::ScalarFunction(ScalarFunction::new_udf(
matches_term_udf(),
vec![
Expr::Column(Column::from_name("text")),
Expr::Literal(ScalarValue::Utf8(Some("hello".to_string()))),
],
vec![Expr::Column(Column::from_name("text")), "hello".lit()],
)),
&DFSchema::try_from(batch.schema().clone()).unwrap(),
&Default::default(),
)
.unwrap();
let input =
Arc::new(MemoryExec::try_new(&[vec![batch.clone()]], batch.schema(), None).unwrap());
let input = DataSourceExec::from_data_source(
MemorySourceConfig::try_new(&[vec![batch.clone()]], batch.schema(), None).unwrap(),
);
let filter = FilterExec::try_new(predicate, input).unwrap();
// Apply the optimizer
@@ -385,8 +389,9 @@ mod tests {
)
.unwrap();
let input =
Arc::new(MemoryExec::try_new(&[vec![batch.clone()]], batch.schema(), None).unwrap());
let input = DataSourceExec::from_data_source(
MemorySourceConfig::try_new(&[vec![batch.clone()]], batch.schema(), None).unwrap(),
);
let filter = FilterExec::try_new(predicate, input).unwrap();
let optimizer = MatchesConstantTermOptimizer;

View File

@@ -16,7 +16,7 @@ use datafusion::datasource::DefaultTableSource;
use datafusion_common::tree_node::{
Transformed, TransformedResult, TreeNode, TreeNodeRecursion, TreeNodeVisitor,
};
use datafusion_common::{Column, Result as DataFusionResult};
use datafusion_common::{Column, Result as DataFusionResult, ScalarValue};
use datafusion_expr::expr::{AggregateFunction, WindowFunction};
use datafusion_expr::utils::COUNT_STAR_EXPANSION;
use datafusion_expr::{col, lit, Expr, LogicalPlan, WindowFunctionDefinition};
@@ -62,13 +62,13 @@ impl CountWildcardToTimeIndexRule {
Expr::WindowFunction(mut window_function)
if Self::is_count_star_window_aggregate(&window_function) =>
{
window_function.args.clone_from(&new_arg);
window_function.params.args.clone_from(&new_arg);
Ok(Transformed::yes(Expr::WindowFunction(window_function)))
}
Expr::AggregateFunction(mut aggregate_function)
if Self::is_count_star_aggregate(&aggregate_function) =>
{
aggregate_function.args.clone_from(&new_arg);
aggregate_function.params.args.clone_from(&new_arg);
Ok(Transformed::yes(Expr::AggregateFunction(
aggregate_function,
)))
@@ -105,24 +105,30 @@ impl CountWildcardToTimeIndexRule {
/// Utility functions from the original rule.
impl CountWildcardToTimeIndexRule {
fn is_wildcard(expr: &Expr) -> bool {
matches!(expr, Expr::Wildcard { .. })
#[expect(deprecated)]
fn args_at_most_wildcard_or_literal_one(args: &[Expr]) -> bool {
match args {
[] => true,
[Expr::Literal(ScalarValue::Int64(Some(v)), _)] => *v == 1,
[Expr::Wildcard { .. }] => true,
_ => false,
}
}
fn is_count_star_aggregate(aggregate_function: &AggregateFunction) -> bool {
let args = &aggregate_function.params.args;
matches!(aggregate_function,
AggregateFunction {
func,
args,
..
} if func.name() == "count" && (args.len() == 1 && Self::is_wildcard(&args[0]) || args.is_empty()))
} if func.name() == "count" && Self::args_at_most_wildcard_or_literal_one(args))
}
fn is_count_star_window_aggregate(window_function: &WindowFunction) -> bool {
let args = &window_function.args;
let args = &window_function.params.args;
matches!(window_function.fun,
WindowFunctionDefinition::AggregateUDF(ref udaf)
if udaf.name() == "count" && (args.len() == 1 && Self::is_wildcard(&args[0]) || args.is_empty()))
if udaf.name() == "count" && Self::args_at_most_wildcard_or_literal_one(args))
}
}
@@ -184,8 +190,8 @@ impl TimeIndexFinder {
mod test {
use std::sync::Arc;
use datafusion::functions_aggregate::count::count;
use datafusion_expr::{wildcard, LogicalPlanBuilder};
use datafusion::functions_aggregate::count::count_all;
use datafusion_expr::LogicalPlanBuilder;
use table::table::numbers::NumbersTable;
use super::*;
@@ -199,7 +205,7 @@ mod test {
let plan = LogicalPlanBuilder::scan_with_filters("t", table_source, None, vec![])
.unwrap()
.aggregate(Vec::<Expr>::new(), vec![count(wildcard())])
.aggregate(Vec::<Expr>::new(), vec![count_all()])
.unwrap()
.alias(r#""FgHiJ""#)
.unwrap()

View File

@@ -57,7 +57,7 @@ impl ParallelizeScan {
.transform_down(|plan| {
if let Some(sort_exec) = plan.as_any().downcast_ref::<SortExec>() {
// save the first order expr
first_order_expr = sort_exec.expr().first().cloned();
first_order_expr = Some(sort_exec.expr().first()).cloned();
} else if let Some(region_scan_exec) =
plan.as_any().downcast_ref::<RegionScanExec>()
{

View File

@@ -217,13 +217,16 @@ impl TreeNodeVisitor<'_> for ScanHintVisitor {
is_all_last_value = false;
break;
};
if func.func.name() != "last_value" || func.filter.is_some() || func.distinct {
if func.func.name() != "last_value"
|| func.params.filter.is_some()
|| func.params.distinct
{
is_all_last_value = false;
break;
}
// check order by requirement
if let Some(order_by) = &func.order_by
&& let Some(first_order_by) = order_by.first()
let order_by = &func.params.order_by;
if let Some(first_order_by) = order_by.first()
&& order_by.len() == 1
{
if let Some(existing_order_by) = &order_by_expr {
@@ -298,7 +301,7 @@ mod test {
use std::sync::Arc;
use datafusion::functions_aggregate::first_last::last_value_udaf;
use datafusion_expr::expr::AggregateFunction;
use datafusion_expr::expr::{AggregateFunction, AggregateFunctionParams};
use datafusion_expr::{col, LogicalPlanBuilder};
use datafusion_optimizer::OptimizerContext;
use store_api::storage::RegionId;
@@ -320,7 +323,6 @@ mod test {
.unwrap();
let context = OptimizerContext::default();
assert!(ScanHintRule.supports_rewrite());
ScanHintRule.rewrite(plan, &context).unwrap();
// should read the first (with `.sort(true, false)`) sort option
@@ -347,15 +349,17 @@ mod test {
vec![col("k0")],
vec![Expr::AggregateFunction(AggregateFunction {
func: last_value_udaf(),
args: vec![col("v0")],
distinct: false,
filter: None,
order_by: Some(vec![Sort {
expr: col("ts"),
asc: true,
nulls_first: true,
}]),
null_treatment: None,
params: AggregateFunctionParams {
args: vec![col("v0")],
distinct: false,
filter: None,
order_by: vec![Sort {
expr: col("ts"),
asc: true,
nulls_first: true,
}],
null_treatment: None,
},
})],
)
.unwrap()
@@ -363,7 +367,6 @@ mod test {
.unwrap();
let context = OptimizerContext::default();
assert!(ScanHintRule.supports_rewrite());
ScanHintRule.rewrite(plan, &context).unwrap();
let scan_req = provider.scan_request();

View File

@@ -90,9 +90,9 @@ impl TreeNodeRewriter for StringNormalizationConverter {
Expr::Cast(Cast { expr, data_type }) => {
let expr = match data_type {
DataType::Timestamp(_, _) => match *expr {
Expr::Literal(value) => match value {
Expr::Literal(value, _) => match value {
ScalarValue::Utf8(Some(s)) => trim_utf_expr(s),
_ => Expr::Literal(value),
_ => Expr::Literal(value, None),
},
expr => expr,
},
@@ -112,7 +112,7 @@ impl TreeNodeRewriter for StringNormalizationConverter {
fn trim_utf_expr(s: String) -> Expr {
let parts: Vec<_> = s.split_whitespace().collect();
let trimmed = parts.join(" ");
Expr::Literal(ScalarValue::Utf8(Some(trimmed)))
Expr::Literal(ScalarValue::Utf8(Some(trimmed)), None)
}
#[cfg(test)]
@@ -195,7 +195,7 @@ mod tests {
fn prepare_test_plan_builder() -> LogicalPlanBuilder {
let schema = Schema::new(vec![Field::new("f", DataType::Float64, false)]);
let table = MemTable::try_new(SchemaRef::from(schema), vec![]).unwrap();
let table = MemTable::try_new(SchemaRef::from(schema), vec![vec![]]).unwrap();
LogicalPlanBuilder::scan("t", provider_as_source(Arc::new(table)), None).unwrap()
}
}

View File

@@ -125,7 +125,7 @@ mod tests {
Field::new("a", DataType::Utf8, false),
Field::new("b", DataType::Utf8, false),
]);
let table = MemTable::try_new(SchemaRef::from(schema), vec![]).unwrap();
let table = MemTable::try_new(SchemaRef::from(schema), vec![vec![]]).unwrap();
LogicalPlanBuilder::scan("t", provider_as_source(Arc::new(table)), None).unwrap()
}

View File

@@ -87,7 +87,6 @@ impl ExtensionAnalyzerRule for TypeConversionRule {
| LogicalPlan::Extension { .. }
| LogicalPlan::Sort { .. }
| LogicalPlan::Union { .. }
| LogicalPlan::Join { .. }
| LogicalPlan::Values { .. }
| LogicalPlan::Analyze { .. } => {
let mut converter = TypeConverter {
@@ -116,7 +115,8 @@ impl ExtensionAnalyzerRule for TypeConversionRule {
| LogicalPlan::Statement(_)
| LogicalPlan::Ddl(_)
| LogicalPlan::Copy(_)
| LogicalPlan::RecursiveQuery(_) => Ok(Transformed::no(plan)),
| LogicalPlan::RecursiveQuery(_)
| LogicalPlan::Join { .. } => Ok(Transformed::no(plan)),
})
.map(|x| x.data)
}
@@ -154,7 +154,7 @@ impl TypeConverter {
(target_type, value) => {
let value_arr = value.to_array()?;
let arr = compute::cast(&value_arr, target_type)
.map_err(|e| DataFusionError::ArrowError(e, None))?;
.map_err(|e| DataFusionError::ArrowError(Box::new(e), None))?;
ScalarValue::try_from_array(
&arr,
@@ -180,23 +180,23 @@ impl TypeConverter {
}
match (left, right) {
(Expr::Column(col), Expr::Literal(value)) => {
(Expr::Column(col), Expr::Literal(value, _)) => {
let casted_right = self.cast_scalar_value(value, target_type)?;
if casted_right.is_null() {
return Err(DataFusionError::Plan(format!(
"column:{col:?}. Casting value:{value:?} to {target_type:?} is invalid",
)));
}
Ok((left.clone(), Expr::Literal(casted_right)))
Ok((left.clone(), Expr::Literal(casted_right, None)))
}
(Expr::Literal(value), Expr::Column(col)) => {
(Expr::Literal(value, _), Expr::Column(col)) => {
let casted_left = self.cast_scalar_value(value, target_type)?;
if casted_left.is_null() {
return Err(DataFusionError::Plan(format!(
"column:{col:?}. Casting value:{value:?} to {target_type:?} is invalid",
)));
}
Ok((Expr::Literal(casted_left), right.clone()))
Ok((Expr::Literal(casted_left, None), right.clone()))
}
_ => Ok((left.clone(), right.clone())),
}
@@ -255,7 +255,7 @@ impl TreeNodeRewriter for TypeConverter {
negated,
})
}
Expr::Literal(value) => match value {
Expr::Literal(value, _) => match value {
ScalarValue::TimestampSecond(Some(i), _) => {
timestamp_to_timestamp_ms_expr(i, TimeUnit::Second)
}
@@ -268,7 +268,7 @@ impl TreeNodeRewriter for TypeConverter {
ScalarValue::TimestampNanosecond(Some(i), _) => {
timestamp_to_timestamp_ms_expr(i, TimeUnit::Nanosecond)
}
_ => Expr::Literal(value),
_ => Expr::Literal(value, None),
},
expr => expr,
};
@@ -284,7 +284,10 @@ fn timestamp_to_timestamp_ms_expr(val: i64, unit: TimeUnit) -> Expr {
TimeUnit::Nanosecond => val / 1_000 / 1_000,
};
Expr::Literal(ScalarValue::TimestampMillisecond(Some(timestamp), None))
Expr::Literal(
ScalarValue::TimestampMillisecond(Some(timestamp), None),
None,
)
}
fn string_to_timestamp_ms(string: &str, timezone: Option<&Timezone>) -> Result<ScalarValue> {
@@ -308,7 +311,7 @@ mod tests {
use datafusion_common::arrow::datatypes::Field;
use datafusion_common::{Column, DFSchema};
use datafusion_expr::LogicalPlanBuilder;
use datafusion_expr::{Literal, LogicalPlanBuilder};
use datafusion_sql::TableReference;
use session::context::QueryContext;
@@ -348,36 +351,36 @@ mod tests {
fn test_timestamp_to_timestamp_ms_expr() {
assert_eq!(
timestamp_to_timestamp_ms_expr(123, TimeUnit::Second),
Expr::Literal(ScalarValue::TimestampMillisecond(Some(123000), None))
ScalarValue::TimestampMillisecond(Some(123000), None).lit()
);
assert_eq!(
timestamp_to_timestamp_ms_expr(123, TimeUnit::Millisecond),
Expr::Literal(ScalarValue::TimestampMillisecond(Some(123), None))
ScalarValue::TimestampMillisecond(Some(123), None).lit()
);
assert_eq!(
timestamp_to_timestamp_ms_expr(123, TimeUnit::Microsecond),
Expr::Literal(ScalarValue::TimestampMillisecond(Some(0), None))
ScalarValue::TimestampMillisecond(Some(0), None).lit()
);
assert_eq!(
timestamp_to_timestamp_ms_expr(1230, TimeUnit::Microsecond),
Expr::Literal(ScalarValue::TimestampMillisecond(Some(1), None))
ScalarValue::TimestampMillisecond(Some(1), None).lit()
);
assert_eq!(
timestamp_to_timestamp_ms_expr(123000, TimeUnit::Microsecond),
Expr::Literal(ScalarValue::TimestampMillisecond(Some(123), None))
ScalarValue::TimestampMillisecond(Some(123), None).lit()
);
assert_eq!(
timestamp_to_timestamp_ms_expr(1230, TimeUnit::Nanosecond),
Expr::Literal(ScalarValue::TimestampMillisecond(Some(0), None))
ScalarValue::TimestampMillisecond(Some(0), None).lit()
);
assert_eq!(
timestamp_to_timestamp_ms_expr(123_000_000, TimeUnit::Nanosecond),
Expr::Literal(ScalarValue::TimestampMillisecond(Some(123), None))
ScalarValue::TimestampMillisecond(Some(123), None).lit()
);
}
@@ -405,16 +408,13 @@ mod tests {
};
assert_eq!(
Expr::Column(Column::from_name("ts")).gt(Expr::Literal(ScalarValue::TimestampSecond(
Expr::Column(Column::from_name("ts")).gt(ScalarValue::TimestampSecond(
Some(1599514949),
None
))),
)
.lit()),
converter
.f_up(
Expr::Column(Column::from_name("ts")).gt(Expr::Literal(ScalarValue::Utf8(
Some("2020-09-08T05:42:29+08:00".to_string()),
)))
)
.f_up(Expr::Column(Column::from_name("ts")).gt("2020-09-08T05:42:29+08:00".lit()))
.unwrap()
.data
);
@@ -439,13 +439,9 @@ mod tests {
};
assert_eq!(
Expr::Column(Column::from_name(col_name))
.eq(Expr::Literal(ScalarValue::Boolean(Some(true)))),
Expr::Column(Column::from_name(col_name)).eq(true.lit()),
converter
.f_up(
Expr::Column(Column::from_name(col_name))
.eq(Expr::Literal(ScalarValue::Utf8(Some("true".to_string()))))
)
.f_up(Expr::Column(Column::from_name(col_name)).eq("true".lit()))
.unwrap()
.data
);
@@ -453,40 +449,36 @@ mod tests {
#[test]
fn test_retrieve_type_from_aggr_plan() {
let plan =
LogicalPlanBuilder::values(vec![vec![
Expr::Literal(ScalarValue::Int64(Some(1))),
Expr::Literal(ScalarValue::Float64(Some(1.0))),
Expr::Literal(ScalarValue::TimestampMillisecond(Some(1), None)),
]])
.unwrap()
.filter(Expr::Column(Column::from_name("column3")).gt(Expr::Literal(
ScalarValue::Utf8(Some("1970-01-01 00:00:00+08:00".to_string())),
)))
.unwrap()
.filter(
Expr::Literal(ScalarValue::Utf8(Some(
"1970-01-01 00:00:00+08:00".to_string(),
)))
let plan = LogicalPlanBuilder::values(vec![vec![
ScalarValue::Int64(Some(1)).lit(),
ScalarValue::Float64(Some(1.0)).lit(),
ScalarValue::TimestampMillisecond(Some(1), None).lit(),
]])
.unwrap()
.filter(Expr::Column(Column::from_name("column3")).gt("1970-01-01 00:00:00+08:00".lit()))
.unwrap()
.filter(
"1970-01-01 00:00:00+08:00"
.lit()
.lt_eq(Expr::Column(Column::from_name("column3"))),
)
.unwrap()
.aggregate(
Vec::<Expr>::new(),
vec![Expr::AggregateFunction(
datafusion_expr::expr::AggregateFunction::new_udf(
datafusion::functions_aggregate::count::count_udaf(),
vec![Expr::Column(Column::from_name("column1"))],
false,
None,
None,
None,
),
)],
)
.unwrap()
.build()
.unwrap();
)
.unwrap()
.aggregate(
Vec::<Expr>::new(),
vec![Expr::AggregateFunction(
datafusion_expr::expr::AggregateFunction::new_udf(
datafusion::functions_aggregate::count::count_udaf(),
vec![Expr::Column(Column::from_name("column1"))],
false,
None,
vec![],
None,
),
)],
)
.unwrap()
.build()
.unwrap();
let context = QueryEngineContext::mock();
let transformed_plan = TypeConversionRule
@@ -505,21 +497,18 @@ mod tests {
fn test_reverse_non_ts_type() {
let context = QueryEngineContext::mock();
let plan =
LogicalPlanBuilder::values(vec![vec![Expr::Literal(ScalarValue::Float64(Some(1.0)))]])
.unwrap()
.filter(
Expr::Column(Column::from_name("column1"))
.gt_eq(Expr::Literal(ScalarValue::Utf8(Some("1.2345".to_string())))),
)
.unwrap()
.filter(
Expr::Literal(ScalarValue::Utf8(Some("1.2345".to_string())))
.lt(Expr::Column(Column::from_name("column1"))),
)
.unwrap()
.build()
.unwrap();
let plan = LogicalPlanBuilder::values(vec![vec![1.0f64.lit()]])
.unwrap()
.filter(Expr::Column(Column::from_name("column1")).gt_eq("1.2345".lit()))
.unwrap()
.filter(
"1.2345"
.lit()
.lt(Expr::Column(Column::from_name("column1"))),
)
.unwrap()
.build()
.unwrap();
let transformed_plan = TypeConversionRule
.analyze(plan, &context, &ConfigOptions::default())
.unwrap();

View File

@@ -18,6 +18,7 @@ use std::sync::Arc;
use datafusion::physical_optimizer::PhysicalOptimizerRule;
use datafusion::physical_plan::coalesce_batches::CoalesceBatchesExec;
use datafusion::physical_plan::coalesce_partitions::CoalescePartitionsExec;
use datafusion::physical_plan::coop::CooperativeExec;
use datafusion::physical_plan::filter::FilterExec;
use datafusion::physical_plan::projection::ProjectionExec;
use datafusion::physical_plan::repartition::RepartitionExec;
@@ -27,7 +28,6 @@ use datafusion::physical_plan::ExecutionPlan;
use datafusion_common::tree_node::{Transformed, TreeNode};
use datafusion_common::Result as DataFusionResult;
use datafusion_physical_expr::expressions::Column as PhysicalColumn;
use datafusion_physical_expr::LexOrdering;
use store_api::region_engine::PartitionRange;
use table::table::scan::RegionScanExec;
@@ -89,11 +89,11 @@ impl WindowedSortPhysicalRule {
};
let input_schema = sort_input.schema();
if let Some(first_sort_expr) = sort_exec.expr().first()
&& let Some(column_expr) = first_sort_expr
.expr
.as_any()
.downcast_ref::<PhysicalColumn>()
let first_sort_expr = sort_exec.expr().first();
if let Some(column_expr) = first_sort_expr
.expr
.as_any()
.downcast_ref::<PhysicalColumn>()
&& scanner_info
.time_index
.contains(input_schema.field(column_expr.index()).name())
@@ -101,7 +101,6 @@ impl WindowedSortPhysicalRule {
} else {
return Ok(Transformed::no(plan));
}
let first_sort_expr = sort_exec.expr().first().unwrap().clone();
// PartSortExec is unnecessary if:
// - there is no tag column, and
@@ -120,7 +119,7 @@ impl WindowedSortPhysicalRule {
};
let windowed_sort_exec = WindowedSortExec::try_new(
first_sort_expr,
first_sort_expr.clone(),
sort_exec.fetch(),
scanner_info.partition_ranges,
new_input,
@@ -128,7 +127,7 @@ impl WindowedSortPhysicalRule {
if !preserve_partitioning {
let order_preserving_merge = SortPreservingMergeExec::new(
LexOrdering::new(sort_exec.expr().to_vec()),
sort_exec.expr().clone(),
Arc::new(windowed_sort_exec),
);
return Ok(Transformed {
@@ -168,6 +167,10 @@ fn fetch_partition_range(input: Arc<dyn ExecutionPlan>) -> DataFusionResult<Opti
let mut is_batch_coalesced = false;
input.transform_up(|plan| {
if plan.as_any().is::<CooperativeExec>() {
return Ok(Transformed::no(plan));
}
// Unappliable case, reset the state.
if plan.as_any().is::<RepartitionExec>()
|| plan.as_any().is::<CoalescePartitionsExec>()

View File

@@ -35,7 +35,7 @@ use datafusion::physical_plan::{
DisplayAs, DisplayFormatType, ExecutionPlan, ExecutionPlanProperties, PlanProperties, TopK,
};
use datafusion_common::{internal_err, DataFusionError};
use datafusion_physical_expr::{LexOrdering, PhysicalSortExpr};
use datafusion_physical_expr::PhysicalSortExpr;
use futures::{Stream, StreamExt};
use itertools::Itertools;
use snafu::location;
@@ -243,11 +243,13 @@ impl PartSortStream {
TopK::try_new(
partition,
sort.schema().clone(),
LexOrdering::new(vec![sort.expression.clone()]),
vec![],
[sort.expression.clone()].into(),
limit,
context.session_config().batch_size(),
context.runtime_env(),
&sort.metrics,
None,
)?,
0,
)
@@ -429,14 +431,14 @@ impl PartSortStream {
let sort_column =
concat(&sort_columns.iter().map(|a| a.as_ref()).collect_vec()).map_err(|e| {
DataFusionError::ArrowError(
e,
Box::new(e),
Some(format!("Fail to concat sort columns at {}", location!())),
)
})?;
let indices = sort_to_indices(&sort_column, opt, self.limit).map_err(|e| {
DataFusionError::ArrowError(
e,
Box::new(e),
Some(format!("Fail to sort to indices at {}", location!())),
)
})?;
@@ -468,7 +470,7 @@ impl PartSortStream {
let full_input = concat_batches(&self.schema, &buffer).map_err(|e| {
DataFusionError::ArrowError(
e,
Box::new(e),
Some(format!(
"Fail to concat input batches when sorting at {}",
location!()
@@ -478,7 +480,7 @@ impl PartSortStream {
let sorted = take_record_batch(&full_input, &indices).map_err(|e| {
DataFusionError::ArrowError(
e,
Box::new(e),
Some(format!(
"Fail to take result record batch when sorting at {}",
location!()
@@ -498,11 +500,13 @@ impl PartSortStream {
let new_top_buffer = TopK::try_new(
self.partition,
self.schema().clone(),
LexOrdering::new(vec![self.expression.clone()]),
vec![],
[self.expression.clone()].into(),
self.limit.unwrap(),
self.context.session_config().batch_size(),
self.context.runtime_env(),
&self.root_metrics,
None,
)?;
let PartSortBuffer::Top(top_k, _) =
std::mem::replace(&mut self.buffer, PartSortBuffer::Top(new_top_buffer, 0))
@@ -532,7 +536,7 @@ impl PartSortStream {
let concat_batch = concat_batches(&self.schema, &results).map_err(|e| {
DataFusionError::ArrowError(
e,
Box::new(e),
Some(format!(
"Fail to concat top k result record batch when sorting at {}",
location!()

View File

@@ -17,7 +17,7 @@ use std::collections::HashSet;
use datafusion::datasource::DefaultTableSource;
use datafusion_common::tree_node::{Transformed, TreeNode, TreeNodeRewriter};
use datafusion_common::TableReference;
use datafusion_expr::{BinaryExpr, Expr, Join, LogicalPlan, Operator};
use datafusion_expr::{Expr, LogicalPlan};
use session::context::QueryContextRef;
pub use table::metadata::TableType;
use table::table::adapter::DfTableProviderAdapter;
@@ -132,24 +132,7 @@ pub trait ExtractExpr {
impl ExtractExpr for LogicalPlan {
fn expressions_consider_join(&self) -> Vec<Expr> {
match self {
LogicalPlan::Join(Join { on, filter, .. }) => {
// The first part of expr is equi-exprs,
// and the struct of each equi-expr is like `left-expr = right-expr`.
// We only normalize the filter_expr (non equality predicate from ON clause).
on.iter()
.map(|(left, right)| {
Expr::BinaryExpr(BinaryExpr {
left: Box::new(left.clone()),
op: Operator::Eq,
right: Box::new(right.clone()),
})
})
.chain(filter.clone())
.collect()
}
_ => self.expressions(),
}
self.expressions()
}
}

View File

@@ -97,7 +97,15 @@ impl DfLogicalPlanner {
is_tql_cte = true;
}
let df_stmt = stmt.as_ref().try_into().context(SqlSnafu)?;
let mut df_stmt = stmt.as_ref().try_into().context(SqlSnafu)?;
// TODO(LFC): Remove this when Datafusion supports **both** the syntax and implementation of "explain with format".
if let datafusion::sql::parser::Statement::Statement(
box datafusion::sql::sqlparser::ast::Statement::Explain { format, .. },
) = &mut df_stmt
{
format.take();
}
let table_provider = DfTableSourceProvider::new(
self.engine_state.catalog_manager().clone(),
@@ -124,11 +132,8 @@ impl DfLogicalPlanner {
let config_options = self.session_state.config().options();
let parser_options = &config_options.sql_parser;
let parser_options = ParserOptions {
enable_ident_normalization: parser_options.enable_ident_normalization,
parse_float_as_decimal: parser_options.parse_float_as_decimal,
support_varchar_with_length: parser_options.support_varchar_with_length,
enable_options_value_normalization: parser_options.enable_options_value_normalization,
collect_spans: parser_options.collect_spans,
map_string_types_to_utf8view: false,
..parser_options.into()
};
let sql_to_rel = SqlToRel::new_with_options(&context_provider, parser_options);
@@ -183,12 +188,10 @@ impl DfLogicalPlanner {
let config_options = self.session_state.config().options();
let parser_options = &config_options.sql_parser;
let parser_options = ParserOptions {
let parser_options: ParserOptions = ParserOptions {
map_string_types_to_utf8view: false,
enable_ident_normalization: normalize_ident,
parse_float_as_decimal: parser_options.parse_float_as_decimal,
support_varchar_with_length: parser_options.support_varchar_with_length,
enable_options_value_normalization: parser_options.enable_options_value_normalization,
collect_spans: parser_options.collect_spans,
..parser_options.into()
};
let sql_to_rel = SqlToRel::new_with_options(&context_provider, parser_options);

View File

@@ -29,8 +29,8 @@ use crate::promql::error::{
fn build_time_filter(time_index_expr: Expr, start: Timestamp, end: Timestamp) -> Expr {
time_index_expr
.clone()
.gt_eq(Expr::Literal(timestamp_to_scalar_value(start)))
.and(time_index_expr.lt_eq(Expr::Literal(timestamp_to_scalar_value(end))))
.gt_eq(Expr::Literal(timestamp_to_scalar_value(start), None))
.and(time_index_expr.lt_eq(Expr::Literal(timestamp_to_scalar_value(end), None)))
}
fn timestamp_to_scalar_value(timestamp: Timestamp) -> ScalarValue {

View File

@@ -44,9 +44,10 @@ use datafusion::prelude as df_prelude;
use datafusion::prelude::{Column, Expr as DfExpr, JoinType};
use datafusion::scalar::ScalarValue;
use datafusion::sql::TableReference;
use datafusion_common::DFSchema;
use datafusion_common::{DFSchema, NullEquality};
use datafusion_expr::expr::WindowFunctionParams;
use datafusion_expr::utils::conjunction;
use datafusion_expr::{col, lit, ExprSchemable, SortExpr};
use datafusion_expr::{col, lit, ExprSchemable, Literal, SortExpr};
use datatypes::arrow::datatypes::{DataType as ArrowDataType, TimeUnit as ArrowTimeUnit};
use datatypes::data_type::ConcreteDataType;
use itertools::Itertools;
@@ -1299,11 +1300,11 @@ impl PromPlanner {
.field_with_unqualified_name(&matcher.name)
.is_err()
{
DfExpr::Literal(ScalarValue::Utf8(Some(String::new()))).alias(matcher.name)
DfExpr::Literal(ScalarValue::Utf8(Some(String::new())), None).alias(matcher.name)
} else {
DfExpr::Column(Column::from_name(matcher.name))
};
let lit = DfExpr::Literal(ScalarValue::Utf8(Some(matcher.value)));
let lit = DfExpr::Literal(ScalarValue::Utf8(Some(matcher.value)), None);
let expr = match matcher.op {
MatchOp::Equal => col.eq(lit),
MatchOp::NotEqual => col.not_eq(lit),
@@ -1315,17 +1316,13 @@ impl PromPlanner {
DfExpr::BinaryExpr(BinaryExpr {
left: Box::new(col),
op: Operator::RegexMatch,
right: Box::new(DfExpr::Literal(ScalarValue::Utf8(Some(
re.as_str().to_string(),
)))),
right: Box::new(re.as_str().lit()),
})
}
MatchOp::NotRe(re) => DfExpr::BinaryExpr(BinaryExpr {
left: Box::new(col),
op: Operator::RegexNotMatch,
right: Box::new(DfExpr::Literal(ScalarValue::Utf8(Some(
re.as_str().to_string(),
)))),
right: Box::new(re.as_str().lit()),
}),
};
exprs.push(expr);
@@ -1367,16 +1364,20 @@ impl PromPlanner {
if (end - start) / interval > MAX_SCATTER_POINTS || interval <= INTERVAL_1H {
let single_time_range = time_index_expr
.clone()
.gt_eq(DfExpr::Literal(ScalarValue::TimestampMillisecond(
Some(self.ctx.start + offset_duration - self.ctx.lookback_delta - range),
.gt_eq(DfExpr::Literal(
ScalarValue::TimestampMillisecond(
Some(self.ctx.start + offset_duration - self.ctx.lookback_delta - range),
None,
),
None,
)))
.and(
time_index_expr.lt_eq(DfExpr::Literal(ScalarValue::TimestampMillisecond(
))
.and(time_index_expr.lt_eq(DfExpr::Literal(
ScalarValue::TimestampMillisecond(
Some(self.ctx.end + offset_duration + self.ctx.lookback_delta),
None,
))),
);
),
None,
)));
return Ok(Some(single_time_range));
}
@@ -1386,15 +1387,19 @@ impl PromPlanner {
filters.push(
time_index_expr
.clone()
.gt_eq(DfExpr::Literal(ScalarValue::TimestampMillisecond(
Some(timestamp + offset_duration - lookback_delta - range),
.gt_eq(DfExpr::Literal(
ScalarValue::TimestampMillisecond(
Some(timestamp + offset_duration - lookback_delta - range),
None,
),
None,
)))
))
.and(time_index_expr.clone().lt_eq(DfExpr::Literal(
ScalarValue::TimestampMillisecond(
Some(timestamp + offset_duration + lookback_delta),
None,
),
None,
))),
)
}
@@ -1457,6 +1462,7 @@ impl PromPlanner {
table: table_ref.to_quoted_string(),
})?
.clone(),
metadata: None,
})))
.collect::<Vec<_>>();
scan_plan = LogicalPlanBuilder::from(scan_plan)
@@ -1547,7 +1553,7 @@ impl PromPlanner {
self.ctx.interval,
SPECIAL_TIME_FUNCTION.to_string(),
DEFAULT_FIELD_COLUMN.to_string(),
Some(DfExpr::Literal(ScalarValue::Float64(Some(0.0)))),
Some(lit(0.0f64)),
)
.context(DataFusionPlanningSnafu)?,
),
@@ -1700,13 +1706,14 @@ impl PromPlanner {
// 'days',
// (date_trunc('month', <TIME INDEX>::date) + interval '1 month - 1 day')
// );
let day_lit_expr = DfExpr::Literal(ScalarValue::Utf8(Some("day".to_string())));
let month_lit_expr = DfExpr::Literal(ScalarValue::Utf8(Some("month".to_string())));
let day_lit_expr = "day".lit();
let month_lit_expr = "month".lit();
let interval_1month_lit_expr =
DfExpr::Literal(ScalarValue::IntervalYearMonth(Some(1)));
let interval_1day_lit_expr = DfExpr::Literal(ScalarValue::IntervalDayTime(Some(
IntervalDayTime::new(1, 0),
)));
DfExpr::Literal(ScalarValue::IntervalYearMonth(Some(1)), None);
let interval_1day_lit_expr = DfExpr::Literal(
ScalarValue::IntervalDayTime(Some(IntervalDayTime::new(1, 0))),
None,
);
let the_1month_minus_1day_expr = DfExpr::BinaryExpr(BinaryExpr {
left: Box::new(interval_1month_lit_expr),
op: Operator::Minus,
@@ -1794,7 +1801,7 @@ impl PromPlanner {
}
"round" => {
if other_input_exprs.is_empty() {
other_input_exprs.push_front(DfExpr::Literal(ScalarValue::Float64(Some(0.0))));
other_input_exprs.push_front(0.0f64.lit());
}
ScalarFunc::DataFusionUdf(Arc::new(Round::scalar_udf()))
}
@@ -1949,7 +1956,7 @@ impl PromPlanner {
) -> Result<Option<(DfExpr, String)>> {
// label_replace(vector, dst_label, replacement, src_label, regex)
let dst_label = match other_input_exprs.pop_front() {
Some(DfExpr::Literal(ScalarValue::Utf8(Some(d)))) => d,
Some(DfExpr::Literal(ScalarValue::Utf8(Some(d)), _)) => d,
other => UnexpectedPlanExprSnafu {
desc: format!("expected dst_label string literal, but found {:?}", other),
}
@@ -1959,14 +1966,14 @@ impl PromPlanner {
// Validate the destination label name
Self::validate_label_name(&dst_label)?;
let replacement = match other_input_exprs.pop_front() {
Some(DfExpr::Literal(ScalarValue::Utf8(Some(r)))) => r,
Some(DfExpr::Literal(ScalarValue::Utf8(Some(r)), _)) => r,
other => UnexpectedPlanExprSnafu {
desc: format!("expected replacement string literal, but found {:?}", other),
}
.fail()?,
};
let src_label = match other_input_exprs.pop_front() {
Some(DfExpr::Literal(ScalarValue::Utf8(Some(s)))) => s,
Some(DfExpr::Literal(ScalarValue::Utf8(Some(s)), None)) => s,
other => UnexpectedPlanExprSnafu {
desc: format!("expected src_label string literal, but found {:?}", other),
}
@@ -1974,7 +1981,7 @@ impl PromPlanner {
};
let regex = match other_input_exprs.pop_front() {
Some(DfExpr::Literal(ScalarValue::Utf8(Some(r)))) => r,
Some(DfExpr::Literal(ScalarValue::Utf8(Some(r)), None)) => r,
other => UnexpectedPlanExprSnafu {
desc: format!("expected regex string literal, but found {:?}", other),
}
@@ -2004,7 +2011,7 @@ impl PromPlanner {
// the replacement is not empty, always adds dst_label with replacement value.
return Ok(Some((
// alias literal `replacement` as dst_label
DfExpr::Literal(ScalarValue::Utf8(Some(replacement))).alias(&dst_label),
lit(replacement).alias(&dst_label),
dst_label,
)));
}
@@ -2025,12 +2032,12 @@ impl PromPlanner {
// regexp_replace(src_label, regex, replacement)
let args = vec![
if src_label.is_empty() {
DfExpr::Literal(ScalarValue::Utf8(Some(String::new())))
DfExpr::Literal(ScalarValue::Utf8(Some(String::new())), None)
} else {
DfExpr::Column(Column::from_name(src_label))
},
DfExpr::Literal(ScalarValue::Utf8(Some(regex))),
DfExpr::Literal(ScalarValue::Utf8(Some(replacement))),
DfExpr::Literal(ScalarValue::Utf8(Some(regex)), None),
DfExpr::Literal(ScalarValue::Utf8(Some(replacement)), None),
];
Ok(Some((
@@ -2052,14 +2059,14 @@ impl PromPlanner {
// label_join(vector, dst_label, separator, src_label_1, src_label_2, ...)
let dst_label = match other_input_exprs.pop_front() {
Some(DfExpr::Literal(ScalarValue::Utf8(Some(d)))) => d,
Some(DfExpr::Literal(ScalarValue::Utf8(Some(d)), _)) => d,
other => UnexpectedPlanExprSnafu {
desc: format!("expected dst_label string literal, but found {:?}", other),
}
.fail()?,
};
let separator = match other_input_exprs.pop_front() {
Some(DfExpr::Literal(ScalarValue::Utf8(Some(d)))) => d,
Some(DfExpr::Literal(ScalarValue::Utf8(Some(d)), _)) => d,
other => UnexpectedPlanExprSnafu {
desc: format!("expected separator string literal, but found {:?}", other),
}
@@ -2080,15 +2087,15 @@ impl PromPlanner {
.map(|expr| {
// Cast source label into column or null literal
match expr {
DfExpr::Literal(ScalarValue::Utf8(Some(label))) => {
DfExpr::Literal(ScalarValue::Utf8(Some(label)), None) => {
if label.is_empty() {
Ok(DfExpr::Literal(ScalarValue::Null))
Ok(DfExpr::Literal(ScalarValue::Null, None))
} else if available_columns.contains(label.as_str()) {
// Label exists in the table schema
Ok(DfExpr::Column(Column::from_name(label)))
} else {
// Label doesn't exist, treat as empty string (null)
Ok(DfExpr::Literal(ScalarValue::Null))
Ok(DfExpr::Literal(ScalarValue::Null, None))
}
}
other => UnexpectedPlanExprSnafu {
@@ -2116,7 +2123,7 @@ impl PromPlanner {
// concat_ws(separator, src_label_1, src_label_2, ...) as dst_label
let mut args = Vec::with_capacity(1 + src_labels.len());
args.push(DfExpr::Literal(ScalarValue::Utf8(Some(separator))));
args.push(DfExpr::Literal(ScalarValue::Utf8(Some(separator)), None));
args.extend(src_labels);
Ok((
@@ -2187,7 +2194,7 @@ impl PromPlanner {
tags.iter()
.map(|col| match col {
DfExpr::Literal(ScalarValue::Utf8(Some(label))) => {
DfExpr::Literal(ScalarValue::Utf8(Some(label)), _) => {
Ok(DfExpr::Column(Column::from_name(label)).sort(asc, false))
}
other => UnexpectedPlanExprSnafu {
@@ -2400,14 +2407,17 @@ impl PromPlanner {
// Try to ensure the relative stability of the output results.
sort_exprs.extend(tag_sort_exprs.clone());
DfExpr::WindowFunction(WindowFunction {
DfExpr::WindowFunction(Box::new(WindowFunction {
fun: WindowFunctionDefinition::WindowUDF(Arc::new(RowNumber::new().into())),
args: vec![],
partition_by: group_exprs.clone(),
order_by: sort_exprs,
window_frame: WindowFrame::new(Some(true)),
null_treatment: None,
})
params: WindowFunctionParams {
args: vec![],
partition_by: group_exprs.clone(),
order_by: sort_exprs,
window_frame: WindowFrame::new(Some(true)),
null_treatment: None,
distinct: false,
},
}))
})
.collect();
@@ -2601,7 +2611,7 @@ impl PromPlanner {
.with_context(|| ValueNotFoundSnafu {
table: self.ctx.table_name.clone().unwrap_or_default(),
})?;
let first_value_expr = first_value(first_field_expr, None);
let first_value_expr = first_value(first_field_expr, vec![]);
let ordered_aggregated_input = LogicalPlanBuilder::from(input)
.aggregate(
@@ -2647,14 +2657,8 @@ impl PromPlanner {
/// `None` if the input is not a literal expression.
fn try_build_literal_expr(expr: &PromExpr) -> Option<DfExpr> {
match expr {
PromExpr::NumberLiteral(NumberLiteral { val }) => {
let scalar_value = ScalarValue::Float64(Some(*val));
Some(DfExpr::Literal(scalar_value))
}
PromExpr::StringLiteral(StringLiteral { val }) => {
let scalar_value = ScalarValue::Utf8(Some(val.to_string()));
Some(DfExpr::Literal(scalar_value))
}
PromExpr::NumberLiteral(NumberLiteral { val }) => Some(val.lit()),
PromExpr::StringLiteral(StringLiteral { val }) => Some(val.lit()),
PromExpr::VectorSelector(_)
| PromExpr::MatrixSelector(_)
| PromExpr::Extension(_)
@@ -2850,7 +2854,7 @@ impl PromPlanner {
.collect::<Vec<_>>(),
),
None,
true,
NullEquality::NullEqualsNull,
)
.context(DataFusionPlanningSnafu)?
.build()
@@ -2986,7 +2990,7 @@ impl PromPlanner {
JoinType::LeftSemi,
(join_keys.clone(), join_keys),
None,
true,
NullEquality::NullEqualsNull,
)
.context(DataFusionPlanningSnafu)?
.build()
@@ -2999,7 +3003,7 @@ impl PromPlanner {
JoinType::LeftAnti,
(join_keys.clone(), join_keys),
None,
true,
NullEquality::NullEqualsNull,
)
.context(DataFusionPlanningSnafu)?
.build()
@@ -3105,7 +3109,7 @@ impl PromPlanner {
// step 1: align schema using project, fill non-exist columns with null
let left_proj_exprs = all_columns.iter().map(|col| {
if tags_not_in_left.contains(col) {
DfExpr::Literal(ScalarValue::Utf8(None)).alias(col.to_string())
DfExpr::Literal(ScalarValue::Utf8(None), None).alias(col.to_string())
} else {
DfExpr::Column(Column::new(None::<String>, col))
}
@@ -3137,7 +3141,7 @@ impl PromPlanner {
right_field_col,
))
} else if tags_not_in_right.contains(col) {
DfExpr::Literal(ScalarValue::Utf8(None)).alias(col.to_string())
DfExpr::Literal(ScalarValue::Utf8(None), None).alias(col.to_string())
} else {
DfExpr::Column(Column::new(None::<String>, col))
}
@@ -3286,7 +3290,6 @@ impl PromPlanner {
/// Generate an expr like `date_part("hour", <TIME_INDEX>)`. Caller should ensure the
/// time index column in context is set
fn date_part_on_time_index(&self, date_part: &str) -> Result<DfExpr> {
let lit_expr = DfExpr::Literal(ScalarValue::Utf8(Some(date_part.to_string())));
let input_expr = datafusion::logical_expr::col(
self.ctx
.time_index_column
@@ -3298,7 +3301,7 @@ impl PromPlanner {
);
let fn_expr = DfExpr::ScalarFunction(ScalarFunction {
func: datafusion_functions::datetime::date_part(),
args: vec![lit_expr, input_expr],
args: vec![date_part.lit(), input_expr],
});
Ok(fn_expr)
}
@@ -3900,7 +3903,7 @@ mod test {
assert_eq!(plan.display_indent_schema().to_string(), expected);
}
async fn indie_query_plan_compare(query: &str, expected: String) {
async fn indie_query_plan_compare<T: AsRef<str>>(query: &str, expected: T) {
let prom_expr = parser::parse(query).unwrap();
let eval_stmt = EvalStmt {
expr: prom_expr,
@@ -3929,7 +3932,7 @@ mod test {
.await
.unwrap();
assert_eq!(plan.display_indent_schema().to_string(), expected);
assert_eq!(plan.display_indent_schema().to_string(), expected.as_ref());
}
#[tokio::test]
@@ -3950,8 +3953,8 @@ mod test {
#[tokio::test]
async fn binary_op_literal_literal() {
let query = r#"1 + 1"#;
let expected = String::from("EmptyMetric: range=[0..100000000], interval=[5000] [time:Timestamp(Millisecond, None), value:Float64;N]");
let expected = r#"EmptyMetric: range=[0..100000000], interval=[5000] [time:Timestamp(Millisecond, None), value:Float64;N]
TableScan: dummy [time:Timestamp(Millisecond, None), value:Float64;N]"#;
indie_query_plan_compare(query, expected).await;
}
@@ -4950,21 +4953,22 @@ Filter: up.field_0 IS NOT NULL [timestamp:Timestamp(Millisecond, None), field_0:
PromPlanner::stmt_to_plan(table_provider, &eval_stmt, &build_query_engine_state())
.await
.unwrap();
let expected = "UnionDistinctOn: on col=[[\"job\"]], ts_col=[greptime_timestamp] [greptime_timestamp:Timestamp(Millisecond, None), job:Utf8, sum(metric_exists.greptime_value):Float64;N]\
\n SubqueryAlias: metric_exists [greptime_timestamp:Timestamp(Millisecond, None), job:Utf8, sum(metric_exists.greptime_value):Float64;N]\
\n Projection: metric_exists.greptime_timestamp, metric_exists.job, sum(metric_exists.greptime_value) [greptime_timestamp:Timestamp(Millisecond, None), job:Utf8, sum(metric_exists.greptime_value):Float64;N]\
\n Sort: metric_exists.job ASC NULLS LAST, metric_exists.greptime_timestamp ASC NULLS LAST [job:Utf8, greptime_timestamp:Timestamp(Millisecond, None), sum(metric_exists.greptime_value):Float64;N]\
\n Aggregate: groupBy=[[metric_exists.job, metric_exists.greptime_timestamp]], aggr=[[sum(metric_exists.greptime_value)]] [job:Utf8, greptime_timestamp:Timestamp(Millisecond, None), sum(metric_exists.greptime_value):Float64;N]\
\n PromInstantManipulate: range=[0..100000000], lookback=[1000], interval=[5000], time index=[greptime_timestamp] [job:Utf8, greptime_timestamp:Timestamp(Millisecond, None), greptime_value:Float64;N]\
\n PromSeriesDivide: tags=[\"job\"] [job:Utf8, greptime_timestamp:Timestamp(Millisecond, None), greptime_value:Float64;N]\
\n Sort: metric_exists.job ASC NULLS FIRST, metric_exists.greptime_timestamp ASC NULLS FIRST [job:Utf8, greptime_timestamp:Timestamp(Millisecond, None), greptime_value:Float64;N]\
\n Filter: metric_exists.greptime_timestamp >= TimestampMillisecond(-1000, None) AND metric_exists.greptime_timestamp <= TimestampMillisecond(100001000, None) [job:Utf8, greptime_timestamp:Timestamp(Millisecond, None), greptime_value:Float64;N]\
\n TableScan: metric_exists [job:Utf8, greptime_timestamp:Timestamp(Millisecond, None), greptime_value:Float64;N]\
\n SubqueryAlias: [greptime_timestamp:Timestamp(Millisecond, None), job:Utf8;N, sum(.value):Float64;N]\
\n Projection: .time AS greptime_timestamp, Utf8(NULL) AS job, sum(.value) [greptime_timestamp:Timestamp(Millisecond, None), job:Utf8;N, sum(.value):Float64;N]\
\n Sort: .time ASC NULLS LAST [time:Timestamp(Millisecond, None), sum(.value):Float64;N]\
\n Aggregate: groupBy=[[.time]], aggr=[[sum(.value)]] [time:Timestamp(Millisecond, None), sum(.value):Float64;N]\
\n EmptyMetric: range=[0..-1], interval=[5000] [time:Timestamp(Millisecond, None), value:Float64;N]";
let expected = r#"UnionDistinctOn: on col=[["job"]], ts_col=[greptime_timestamp] [greptime_timestamp:Timestamp(Millisecond, None), job:Utf8, sum(metric_exists.greptime_value):Float64;N]
SubqueryAlias: metric_exists [greptime_timestamp:Timestamp(Millisecond, None), job:Utf8, sum(metric_exists.greptime_value):Float64;N]
Projection: metric_exists.greptime_timestamp, metric_exists.job, sum(metric_exists.greptime_value) [greptime_timestamp:Timestamp(Millisecond, None), job:Utf8, sum(metric_exists.greptime_value):Float64;N]
Sort: metric_exists.job ASC NULLS LAST, metric_exists.greptime_timestamp ASC NULLS LAST [job:Utf8, greptime_timestamp:Timestamp(Millisecond, None), sum(metric_exists.greptime_value):Float64;N]
Aggregate: groupBy=[[metric_exists.job, metric_exists.greptime_timestamp]], aggr=[[sum(metric_exists.greptime_value)]] [job:Utf8, greptime_timestamp:Timestamp(Millisecond, None), sum(metric_exists.greptime_value):Float64;N]
PromInstantManipulate: range=[0..100000000], lookback=[1000], interval=[5000], time index=[greptime_timestamp] [job:Utf8, greptime_timestamp:Timestamp(Millisecond, None), greptime_value:Float64;N]
PromSeriesDivide: tags=["job"] [job:Utf8, greptime_timestamp:Timestamp(Millisecond, None), greptime_value:Float64;N]
Sort: metric_exists.job ASC NULLS FIRST, metric_exists.greptime_timestamp ASC NULLS FIRST [job:Utf8, greptime_timestamp:Timestamp(Millisecond, None), greptime_value:Float64;N]
Filter: metric_exists.greptime_timestamp >= TimestampMillisecond(-1000, None) AND metric_exists.greptime_timestamp <= TimestampMillisecond(100001000, None) [job:Utf8, greptime_timestamp:Timestamp(Millisecond, None), greptime_value:Float64;N]
TableScan: metric_exists [job:Utf8, greptime_timestamp:Timestamp(Millisecond, None), greptime_value:Float64;N]
SubqueryAlias: [greptime_timestamp:Timestamp(Millisecond, None), job:Utf8;N, sum(.value):Float64;N]
Projection: .time AS greptime_timestamp, Utf8(NULL) AS job, sum(.value) [greptime_timestamp:Timestamp(Millisecond, None), job:Utf8;N, sum(.value):Float64;N]
Sort: .time ASC NULLS LAST [time:Timestamp(Millisecond, None), sum(.value):Float64;N]
Aggregate: groupBy=[[.time]], aggr=[[sum(.value)]] [time:Timestamp(Millisecond, None), sum(.value):Float64;N]
EmptyMetric: range=[0..-1], interval=[5000] [time:Timestamp(Millisecond, None), value:Float64;N]
TableScan: dummy [time:Timestamp(Millisecond, None), value:Float64;N]"#;
assert_eq!(plan.display_indent_schema().to_string(), expected);
}

View File

@@ -30,15 +30,13 @@ use datafusion::error::Result as DfResult;
use datafusion::execution::context::{QueryPlanner, SessionConfig, SessionContext, SessionState};
use datafusion::execution::runtime_env::RuntimeEnv;
use datafusion::execution::SessionStateBuilder;
use datafusion::physical_optimizer::enforce_sorting::EnforceSorting;
use datafusion::physical_optimizer::optimizer::PhysicalOptimizer;
use datafusion::physical_optimizer::sanity_checker::SanityCheckPlan;
use datafusion::physical_optimizer::PhysicalOptimizerRule;
use datafusion::physical_plan::ExecutionPlan;
use datafusion::physical_planner::{DefaultPhysicalPlanner, ExtensionPlanner, PhysicalPlanner};
use datafusion_expr::{AggregateUDF, LogicalPlan as DfLogicalPlan};
use datafusion_optimizer::analyzer::count_wildcard_rule::CountWildcardRule;
use datafusion_optimizer::analyzer::{Analyzer, AnalyzerRule};
use datafusion_optimizer::analyzer::Analyzer;
use datafusion_optimizer::optimizer::Optimizer;
use promql::extension_plan::PromExtensionPlanner;
use table::table::adapter::DfTableProviderAdapter;
@@ -128,9 +126,6 @@ impl QueryEngineState {
let mut analyzer = Analyzer::new();
analyzer.rules.insert(0, Arc::new(TranscribeAtatRule));
analyzer.rules.insert(0, Arc::new(StringNormalizationRule));
// Use our custom rule instead to optimize the count(*) query
Self::remove_analyzer_rule(&mut analyzer.rules, CountWildcardRule {}.name());
analyzer
.rules
.insert(0, Arc::new(CountWildcardToTimeIndexRule));
@@ -152,9 +147,6 @@ impl QueryEngineState {
physical_optimizer
.rules
.insert(1, Arc::new(PassDistribution));
physical_optimizer
.rules
.insert(2, Arc::new(EnforceSorting {}));
// Add rule for windowed sort
physical_optimizer
.rules
@@ -202,10 +194,6 @@ impl QueryEngineState {
}
}
fn remove_analyzer_rule(rules: &mut Vec<Arc<dyn AnalyzerRule + Send + Sync>>, name: &str) {
rules.retain(|rule| rule.name() != name);
}
fn remove_physical_optimizer_rule(
rules: &mut Vec<Arc<dyn PhysicalOptimizerRule + Send + Sync>>,
name: &str,

View File

@@ -25,7 +25,7 @@ use ahash::RandomState;
use arrow::compute::{self, cast_with_options, take_arrays, CastOptions};
use arrow_schema::{DataType, Field, Schema, SchemaRef, SortOptions, TimeUnit};
use common_recordbatch::DfSendableRecordBatchStream;
use datafusion::common::{Result as DataFusionResult, Statistics};
use datafusion::common::Result as DataFusionResult;
use datafusion::error::Result as DfResult;
use datafusion::execution::context::SessionState;
use datafusion::execution::TaskContext;
@@ -35,7 +35,6 @@ use datafusion::physical_plan::{
DisplayAs, DisplayFormatType, ExecutionPlan, PlanProperties, RecordBatchStream,
SendableRecordBatchStream,
};
use datafusion::physical_planner::create_physical_sort_expr;
use datafusion_common::hash_utils::create_hashes;
use datafusion_common::{DFSchema, DFSchemaRef, DataFusionError, ScalarValue};
use datafusion_expr::utils::{exprlist_to_fields, COUNT_STAR_EXPANSION};
@@ -44,8 +43,8 @@ use datafusion_expr::{
};
use datafusion_physical_expr::aggregate::{AggregateExprBuilder, AggregateFunctionExpr};
use datafusion_physical_expr::{
create_physical_expr, Distribution, EquivalenceProperties, LexOrdering, Partitioning,
PhysicalExpr, PhysicalSortExpr,
create_physical_expr, create_physical_sort_expr, Distribution, EquivalenceProperties,
Partitioning, PhysicalExpr, PhysicalSortExpr,
};
use datatypes::arrow::array::{
Array, ArrayRef, TimestampMillisecondArray, TimestampMillisecondBuilder, UInt32Builder,
@@ -545,6 +544,7 @@ impl RangeSelect {
// At this time, aggregate plan has been replaced by a custom range plan,
// so `CountWildcardRule` has not been applied.
// We manually modify it when creating the physical plan.
#[expect(deprecated)]
Expr::Wildcard { .. } if is_count_aggr => create_physical_expr(
&lit(COUNT_STAR_EXPANSION),
df_schema.as_ref(),
@@ -590,8 +590,9 @@ impl RangeSelect {
if (aggr.func.name() == "last_value"
|| aggr.func.name() == "first_value") =>
{
let order_by = if let Some(exprs) = &aggr.order_by {
exprs
let order_by = if !aggr.params.order_by.is_empty() {
aggr.params
.order_by
.iter()
.map(|x| {
create_physical_sort_expr(
@@ -618,7 +619,7 @@ impl RangeSelect {
};
let arg = self.create_physical_expr_list(
false,
&aggr.args,
&aggr.params.args,
input_dfschema,
session_state,
)?;
@@ -627,13 +628,14 @@ impl RangeSelect {
// We can safely assume that there is only one element here.
AggregateExprBuilder::new(aggr.func.clone(), arg)
.schema(input_schema.clone())
.order_by(LexOrdering::new(order_by))
.order_by(order_by)
.alias(name)
.build()
}
Expr::AggregateFunction(aggr) => {
let order_by = if let Some(exprs) = &aggr.order_by {
exprs
let order_by = if !aggr.params.order_by.is_empty() {
aggr.params
.order_by
.iter()
.map(|x| {
create_physical_sort_expr(
@@ -646,18 +648,18 @@ impl RangeSelect {
} else {
vec![]
};
let distinct = aggr.distinct;
let distinct = aggr.params.distinct;
// TODO(discord9): add default null treatment?
let input_phy_exprs = self.create_physical_expr_list(
aggr.func.name() == "count",
&aggr.args,
&aggr.params.args,
input_dfschema,
session_state,
)?;
AggregateExprBuilder::new(aggr.func.clone(), input_phy_exprs)
.schema(input_schema.clone())
.order_by(LexOrdering::new(order_by))
.order_by(order_by)
.with_distinct(distinct)
.alias(name)
.build()
@@ -724,9 +726,7 @@ impl RangeFnExec {
/// Order-sensitive aggregators, such as `FIRST_VALUE(x ORDER BY y)` requires this.
fn expressions(&self) -> Vec<Arc<dyn PhysicalExpr>> {
let mut exprs = self.expr.expressions();
if let Some(ordering) = self.expr.order_bys() {
exprs.extend(ordering.iter().map(|sort| sort.expr.clone()));
}
exprs.extend(self.expr.order_bys().iter().map(|sort| sort.expr.clone()));
exprs
}
}
@@ -766,7 +766,9 @@ pub struct RangeSelectExec {
impl DisplayAs for RangeSelectExec {
fn fmt_as(&self, t: DisplayFormatType, f: &mut std::fmt::Formatter) -> std::fmt::Result {
match t {
DisplayFormatType::Default | DisplayFormatType::Verbose => {
DisplayFormatType::Default
| DisplayFormatType::Verbose
| DisplayFormatType::TreeRender => {
write!(f, "RangeSelectExec: ")?;
let range_expr_strs: Vec<String> =
self.range_exec.iter().map(RangeFnExec::to_string).collect();
@@ -873,10 +875,6 @@ impl ExecutionPlan for RangeSelectExec {
Some(self.metric.clone_inner())
}
fn statistics(&self) -> DataFusionResult<Statistics> {
Ok(Statistics::new_unknown(self.schema.as_ref()))
}
fn name(&self) -> &str {
"RanegSelectExec"
}
@@ -1244,8 +1242,9 @@ mod test {
use datafusion::arrow::datatypes::{
ArrowPrimitiveType, DataType, Field, Schema, TimestampMillisecondType,
};
use datafusion::datasource::memory::MemorySourceConfig;
use datafusion::datasource::source::DataSourceExec;
use datafusion::functions_aggregate::min_max;
use datafusion::physical_plan::memory::MemoryExec;
use datafusion::physical_plan::sorts::sort::SortExec;
use datafusion::prelude::SessionContext;
use datafusion_physical_expr::expressions::Column;
@@ -1257,7 +1256,7 @@ mod test {
const TIME_INDEX_COLUMN: &str = "timestamp";
fn prepare_test_data(is_float: bool, is_gap: bool) -> MemoryExec {
fn prepare_test_data(is_float: bool, is_gap: bool) -> DataSourceExec {
let schema = Arc::new(Schema::new(vec![
Field::new(TIME_INDEX_COLUMN, TimestampMillisecondType::DATA_TYPE, true),
Field::new(
@@ -1307,7 +1306,9 @@ mod test {
)
.unwrap();
MemoryExec::try_new(&[vec![data]], schema, None).unwrap()
DataSourceExec::new(Arc::new(
MemorySourceConfig::try_new(&[vec![data]], schema, None).unwrap(),
))
}
async fn do_range_select_test(
@@ -1390,7 +1391,7 @@ mod test {
cache,
});
let sort_exec = SortExec::new(
LexOrdering::new(vec![
[
PhysicalSortExpr {
expr: Arc::new(Column::new("host", 3)),
options: SortOptions {
@@ -1405,7 +1406,8 @@ mod test {
nulls_first: true,
},
},
]),
]
.into(),
range_select_exec,
);
let session_context = SessionContext::default();

View File

@@ -33,7 +33,7 @@ use datafusion_expr::expr::WildcardOptions;
use datafusion_expr::simplify::SimplifyContext;
use datafusion_expr::{
Aggregate, Analyze, Cast, Distinct, DistinctOn, Explain, Expr, ExprSchemable, Extension,
LogicalPlan, LogicalPlanBuilder, Projection,
Literal, LogicalPlan, LogicalPlanBuilder, Projection,
};
use datafusion_optimizer::simplify_expressions::ExprSimplifier;
use datatypes::prelude::ConcreteDataType;
@@ -76,6 +76,9 @@ impl RangeExprRewriter<'_> {
"Range expr not found in underlying Aggregate Plan".into(),
))
}
Some(Expr::Alias(alias)) => {
self.get_range_expr(std::slice::from_ref(alias.expr.as_ref()), 0)
}
other => Err(dispose_parse_error(other)),
}
}
@@ -96,14 +99,14 @@ fn dispose_parse_error(expr: Option<&Expr>) -> DataFusionError {
fn parse_str_expr(args: &[Expr], i: usize) -> DFResult<&str> {
match args.get(i) {
Some(Expr::Literal(ScalarValue::Utf8(Some(str)))) => Ok(str.as_str()),
Some(Expr::Literal(ScalarValue::Utf8(Some(str)), _)) => Ok(str.as_str()),
other => Err(dispose_parse_error(other)),
}
}
fn parse_expr_to_string(args: &[Expr], i: usize) -> DFResult<String> {
match args.get(i) {
Some(Expr::Literal(ScalarValue::Utf8(Some(str)))) => Ok(str.to_string()),
Some(Expr::Literal(ScalarValue::Utf8(Some(str)), _)) => Ok(str.to_string()),
Some(expr) => Ok(expr.schema_name().to_string()),
None => Err(dispose_parse_error(None)),
}
@@ -115,7 +118,7 @@ fn parse_expr_to_string(args: &[Expr], i: usize) -> DFResult<String> {
/// 3. An interval expr can be evaluated at the logical plan stage (e.g. `INTERVAL '2' day - INTERVAL '1' day`)
fn parse_duration_expr(args: &[Expr], i: usize) -> DFResult<Duration> {
match args.get(i) {
Some(Expr::Literal(ScalarValue::Utf8(Some(str)))) => {
Some(Expr::Literal(ScalarValue::Utf8(Some(str)), _)) => {
parse_duration(str).map_err(DataFusionError::Plan)
}
Some(expr) => {
@@ -147,20 +150,20 @@ fn evaluate_expr_to_millisecond(args: &[Expr], i: usize, interval_only: bool) ->
let info = SimplifyContext::new(&execution_props).with_schema(Arc::new(DFSchema::empty()));
let simplify_expr = ExprSimplifier::new(info).simplify(expr.clone())?;
match simplify_expr {
Expr::Literal(ScalarValue::TimestampNanosecond(ts_nanos, _))
| Expr::Literal(ScalarValue::DurationNanosecond(ts_nanos)) => {
Expr::Literal(ScalarValue::TimestampNanosecond(ts_nanos, _), _)
| Expr::Literal(ScalarValue::DurationNanosecond(ts_nanos), _) => {
ts_nanos.map(|v| v / 1_000_000)
}
Expr::Literal(ScalarValue::TimestampMicrosecond(ts_micros, _))
| Expr::Literal(ScalarValue::DurationMicrosecond(ts_micros)) => {
Expr::Literal(ScalarValue::TimestampMicrosecond(ts_micros, _), _)
| Expr::Literal(ScalarValue::DurationMicrosecond(ts_micros), _) => {
ts_micros.map(|v| v / 1_000)
}
Expr::Literal(ScalarValue::TimestampMillisecond(ts_millis, _))
| Expr::Literal(ScalarValue::DurationMillisecond(ts_millis)) => ts_millis,
Expr::Literal(ScalarValue::TimestampSecond(ts_secs, _))
| Expr::Literal(ScalarValue::DurationSecond(ts_secs)) => ts_secs.map(|v| v * 1_000),
Expr::Literal(ScalarValue::TimestampMillisecond(ts_millis, _), _)
| Expr::Literal(ScalarValue::DurationMillisecond(ts_millis), _) => ts_millis,
Expr::Literal(ScalarValue::TimestampSecond(ts_secs, _), _)
| Expr::Literal(ScalarValue::DurationSecond(ts_secs), _) => ts_secs.map(|v| v * 1_000),
// We don't support interval with months as days in a month is unclear.
Expr::Literal(ScalarValue::IntervalYearMonth(interval)) => interval
Expr::Literal(ScalarValue::IntervalYearMonth(interval), _) => interval
.map(|v| {
let interval = IntervalYearMonth::from_i32(v);
if interval.months != 0 {
@@ -173,11 +176,11 @@ fn evaluate_expr_to_millisecond(args: &[Expr], i: usize, interval_only: bool) ->
Ok(0)
})
.transpose()?,
Expr::Literal(ScalarValue::IntervalDayTime(interval)) => interval.map(|v| {
Expr::Literal(ScalarValue::IntervalDayTime(interval), _) => interval.map(|v| {
let interval = IntervalDayTime::from(v);
interval.as_millis()
}),
Expr::Literal(ScalarValue::IntervalMonthDayNano(interval)) => interval
Expr::Literal(ScalarValue::IntervalMonthDayNano(interval), _) => interval
.map(|v| {
let interval = IntervalMonthDayNano::from(v);
if interval.months != 0 {
@@ -236,7 +239,10 @@ fn parse_expr_list(args: &[Expr], start: usize, len: usize) -> DFResult<Vec<Expr
for i in start..start + len {
outs.push(match &args.get(i) {
Some(
Expr::Column(_) | Expr::Literal(_) | Expr::BinaryExpr(_) | Expr::ScalarFunction(_),
Expr::Column(_)
| Expr::Literal(_, _)
| Expr::BinaryExpr(_)
| Expr::ScalarFunction(_),
) => args[i].clone(),
other => {
return Err(dispose_parse_error(*other));
@@ -482,6 +488,7 @@ impl RangePlanRewriter {
/// If the user does not explicitly use the `by` keyword to indicate time series,
/// `[row_columns]` will be use as default time series
async fn get_index_by(&mut self, schema: &Arc<DFSchema>) -> Result<(Expr, Vec<Expr>)> {
#[allow(deprecated)]
let mut time_index_expr = Expr::Wildcard {
qualifier: None,
options: Box::new(WildcardOptions::default()),
@@ -522,7 +529,7 @@ impl RangePlanRewriter {
// then by default all data will be aggregated into one time series,
// which is equivalent to using `by(1)` in SQL
if default_by.is_empty() {
default_by = vec![Expr::Literal(ScalarValue::Int64(Some(1)))];
default_by = vec![1.lit()];
}
time_index_expr = Expr::Column(Column::new(
Some(table_ref.clone()),
@@ -531,6 +538,7 @@ impl RangePlanRewriter {
}
}
}
#[allow(deprecated)]
if matches!(time_index_expr, Expr::Wildcard { .. }) {
TimeIndexNotFoundSnafu {
table: schema.to_string(),
@@ -567,7 +575,7 @@ fn interval_only_in_expr(expr: &Expr) -> bool {
Expr::Cast(Cast{
expr,
data_type: DataType::Interval(_)
}) if matches!(&**expr, Expr::Literal(ScalarValue::Utf8(_)))
}) if matches!(&**expr, Expr::Literal(ScalarValue::Utf8(_), _))
) {
// Stop checking the sub `expr`,
// which is a `Utf8` type and has already been tested above.
@@ -576,9 +584,9 @@ fn interval_only_in_expr(expr: &Expr) -> bool {
if !matches!(
expr,
Expr::Literal(ScalarValue::IntervalDayTime(_))
| Expr::Literal(ScalarValue::IntervalMonthDayNano(_))
| Expr::Literal(ScalarValue::IntervalYearMonth(_))
Expr::Literal(ScalarValue::IntervalDayTime(_), _)
| Expr::Literal(ScalarValue::IntervalMonthDayNano(_), _)
| Expr::Literal(ScalarValue::IntervalYearMonth(_), _)
| Expr::BinaryExpr(_)
| Expr::Cast(Cast {
data_type: DataType::Interval(_),
@@ -603,7 +611,7 @@ mod test {
use catalog::RegisterTableRequest;
use common_catalog::consts::{DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME};
use common_time::IntervalYearMonth;
use datafusion_expr::{BinaryExpr, Operator};
use datafusion_expr::{BinaryExpr, Literal, Operator};
use datatypes::prelude::ConcreteDataType;
use datatypes::schema::{ColumnSchema, Schema};
use session::context::QueryContext;
@@ -846,37 +854,31 @@ mod test {
fn test_parse_duration_expr() {
// test IntervalYearMonth
let interval = IntervalYearMonth::new(10);
let args = vec![Expr::Literal(ScalarValue::IntervalYearMonth(Some(
interval.to_i32(),
)))];
let args = vec![ScalarValue::IntervalYearMonth(Some(interval.to_i32())).lit()];
assert!(parse_duration_expr(&args, 0).is_err(),);
// test IntervalDayTime
let interval = IntervalDayTime::new(10, 10);
let args = vec![Expr::Literal(ScalarValue::IntervalDayTime(Some(
interval.into(),
)))];
let args = vec![ScalarValue::IntervalDayTime(Some(interval.into())).lit()];
assert_eq!(
parse_duration_expr(&args, 0).unwrap().as_millis() as i64,
interval.as_millis()
);
// test IntervalMonthDayNano
let interval = IntervalMonthDayNano::new(0, 10, 10);
let args = vec![Expr::Literal(ScalarValue::IntervalMonthDayNano(Some(
interval.into(),
)))];
let args = vec![ScalarValue::IntervalMonthDayNano(Some(interval.into())).lit()];
assert_eq!(
parse_duration_expr(&args, 0).unwrap().as_millis() as i64,
interval.days as i64 * MS_PER_DAY + interval.nanoseconds / NANOS_PER_MILLI,
);
// test Duration
let args = vec![Expr::Literal(ScalarValue::Utf8(Some("1y4w".into())))];
let args = vec!["1y4w".lit()];
assert_eq!(
parse_duration_expr(&args, 0).unwrap(),
parse_duration("1y4w").unwrap()
);
// test cast expression
let args = vec![Expr::Cast(Cast {
expr: Box::new(Expr::Literal(ScalarValue::Utf8(Some("15 minutes".into())))),
expr: Box::new("15 minutes".lit()),
data_type: DataType::Interval(IntervalUnit::MonthDayNano),
})];
assert_eq!(
@@ -887,36 +889,36 @@ mod test {
assert!(parse_duration_expr(&args, 10).is_err());
// test evaluate expr
let args = vec![Expr::BinaryExpr(BinaryExpr {
left: Box::new(Expr::Literal(ScalarValue::IntervalDayTime(Some(
IntervalDayTime::new(0, 10).into(),
)))),
left: Box::new(
ScalarValue::IntervalDayTime(Some(IntervalDayTime::new(0, 10).into())).lit(),
),
op: Operator::Plus,
right: Box::new(Expr::Literal(ScalarValue::IntervalDayTime(Some(
IntervalDayTime::new(0, 10).into(),
)))),
right: Box::new(
ScalarValue::IntervalDayTime(Some(IntervalDayTime::new(0, 10).into())).lit(),
),
})];
assert_eq!(
parse_duration_expr(&args, 0).unwrap(),
Duration::from_millis(20)
);
let args = vec![Expr::BinaryExpr(BinaryExpr {
left: Box::new(Expr::Literal(ScalarValue::IntervalDayTime(Some(
IntervalDayTime::new(0, 10).into(),
)))),
left: Box::new(
ScalarValue::IntervalDayTime(Some(IntervalDayTime::new(0, 10).into())).lit(),
),
op: Operator::Minus,
right: Box::new(Expr::Literal(ScalarValue::IntervalDayTime(Some(
IntervalDayTime::new(0, 10).into(),
)))),
right: Box::new(
ScalarValue::IntervalDayTime(Some(IntervalDayTime::new(0, 10).into())).lit(),
),
})];
// test zero interval error
assert!(parse_duration_expr(&args, 0).is_err());
// test must all be interval
let args = vec![Expr::BinaryExpr(BinaryExpr {
left: Box::new(Expr::Literal(ScalarValue::IntervalYearMonth(Some(
IntervalYearMonth::new(10).to_i32(),
)))),
left: Box::new(
ScalarValue::IntervalYearMonth(Some(IntervalYearMonth::new(10).to_i32())).lit(),
),
op: Operator::Minus,
right: Box::new(Expr::Literal(ScalarValue::Time64Microsecond(Some(0)))),
right: Box::new(ScalarValue::Time64Microsecond(Some(0)).lit()),
})];
assert!(parse_duration_expr(&args, 0).is_err());
}
@@ -924,14 +926,14 @@ mod test {
#[test]
fn test_parse_align_to() {
// test NOW
let args = vec![Expr::Literal(ScalarValue::Utf8(Some("NOW".into())))];
let args = vec!["NOW".lit()];
let epsinon = parse_align_to(&args, 0, None).unwrap() - Timestamp::current_millis().value();
assert!(epsinon.abs() < 100);
// test default
let args = vec![Expr::Literal(ScalarValue::Utf8(Some("".into())))];
let args = vec!["".lit()];
assert_eq!(0, parse_align_to(&args, 0, None).unwrap());
// test default with timezone
let args = vec![Expr::Literal(ScalarValue::Utf8(Some("".into())))];
let args = vec!["".lit()];
assert_eq!(
-36000 * 1000,
parse_align_to(&args, 0, Some(&Timezone::from_tz_string("HST").unwrap())).unwrap()
@@ -947,14 +949,10 @@ mod test {
);
// test Timestamp
let args = vec![Expr::Literal(ScalarValue::Utf8(Some(
"1970-01-01T00:00:00+08:00".into(),
)))];
let args = vec!["1970-01-01T00:00:00+08:00".lit()];
assert_eq!(parse_align_to(&args, 0, None).unwrap(), -8 * 60 * 60 * 1000);
// timezone
let args = vec![Expr::Literal(ScalarValue::Utf8(Some(
"1970-01-01T00:00:00".into(),
)))];
let args = vec!["1970-01-01T00:00:00".lit()];
assert_eq!(
parse_align_to(
&args,
@@ -966,13 +964,13 @@ mod test {
);
// test evaluate expr
let args = vec![Expr::BinaryExpr(BinaryExpr {
left: Box::new(Expr::Literal(ScalarValue::IntervalDayTime(Some(
IntervalDayTime::new(0, 10).into(),
)))),
left: Box::new(
ScalarValue::IntervalDayTime(Some(IntervalDayTime::new(0, 10).into())).lit(),
),
op: Operator::Plus,
right: Box::new(Expr::Literal(ScalarValue::IntervalDayTime(Some(
IntervalDayTime::new(0, 10).into(),
)))),
right: Box::new(
ScalarValue::IntervalDayTime(Some(IntervalDayTime::new(0, 10).into())).lit(),
),
})];
assert_eq!(parse_align_to(&args, 0, None).unwrap(), 20);
}
@@ -980,50 +978,46 @@ mod test {
#[test]
fn test_interval_only() {
let expr = Expr::BinaryExpr(BinaryExpr {
left: Box::new(Expr::Literal(ScalarValue::DurationMillisecond(Some(20)))),
left: Box::new(ScalarValue::DurationMillisecond(Some(20)).lit()),
op: Operator::Minus,
right: Box::new(Expr::Literal(ScalarValue::IntervalDayTime(Some(
IntervalDayTime::new(10, 0).into(),
)))),
right: Box::new(
ScalarValue::IntervalDayTime(Some(IntervalDayTime::new(10, 0).into())).lit(),
),
});
assert!(!interval_only_in_expr(&expr));
let expr = Expr::BinaryExpr(BinaryExpr {
left: Box::new(Expr::Literal(ScalarValue::IntervalDayTime(Some(
IntervalDayTime::new(10, 0).into(),
)))),
left: Box::new(
ScalarValue::IntervalDayTime(Some(IntervalDayTime::new(10, 0).into())).lit(),
),
op: Operator::Minus,
right: Box::new(Expr::Literal(ScalarValue::IntervalDayTime(Some(
IntervalDayTime::new(10, 0).into(),
)))),
right: Box::new(
ScalarValue::IntervalDayTime(Some(IntervalDayTime::new(10, 0).into())).lit(),
),
});
assert!(interval_only_in_expr(&expr));
let expr = Expr::BinaryExpr(BinaryExpr {
left: Box::new(Expr::Cast(Cast {
expr: Box::new(Expr::Literal(ScalarValue::Utf8(Some(
"15 minute".to_string(),
)))),
expr: Box::new("15 minute".lit()),
data_type: DataType::Interval(IntervalUnit::MonthDayNano),
})),
op: Operator::Minus,
right: Box::new(Expr::Literal(ScalarValue::IntervalDayTime(Some(
IntervalDayTime::new(10, 0).into(),
)))),
right: Box::new(
ScalarValue::IntervalDayTime(Some(IntervalDayTime::new(10, 0).into())).lit(),
),
});
assert!(interval_only_in_expr(&expr));
let expr = Expr::Cast(Cast {
expr: Box::new(Expr::BinaryExpr(BinaryExpr {
left: Box::new(Expr::Cast(Cast {
expr: Box::new(Expr::Literal(ScalarValue::Utf8(Some(
"15 minute".to_string(),
)))),
expr: Box::new("15 minute".lit()),
data_type: DataType::Interval(IntervalUnit::MonthDayNano),
})),
op: Operator::Minus,
right: Box::new(Expr::Literal(ScalarValue::IntervalDayTime(Some(
IntervalDayTime::new(10, 0).into(),
)))),
right: Box::new(
ScalarValue::IntervalDayTime(Some(IntervalDayTime::new(10, 0).into())).lit(),
),
})),
data_type: DataType::Interval(IntervalUnit::MonthDayNano),
});

View File

@@ -41,7 +41,6 @@ use common_time::timezone::get_timezone;
use common_time::Timestamp;
use datafusion::common::ScalarValue;
use datafusion::prelude::SessionContext;
use datafusion_expr::expr::WildcardOptions;
use datafusion_expr::{case, col, lit, Expr, SortExpr};
use datatypes::prelude::*;
use datatypes::schema::{ColumnDefaultConstraint, ColumnSchema, RawSchema, Schema};
@@ -250,13 +249,6 @@ async fn query_from_information_schema_table(
let DataFrame::DataFusion(dataframe) = query_engine.read_table(table)?;
// Apply select
let dataframe = if select.is_empty() {
dataframe
} else {
dataframe.select(select).context(error::PlanSqlSnafu)?
};
// Apply filters
let dataframe = filters.into_iter().try_fold(dataframe, |df, expr| {
df.filter(expr).context(error::PlanSqlSnafu)
@@ -272,11 +264,26 @@ async fn query_from_information_schema_table(
};
// Apply sorting
let dataframe = dataframe
.sort(sort)
.context(error::PlanSqlSnafu)?
.select_columns(&projects.iter().map(|(c, _)| *c).collect::<Vec<_>>())
.context(error::PlanSqlSnafu)?;
let dataframe = if sort.is_empty() {
dataframe
} else {
dataframe.sort(sort).context(error::PlanSqlSnafu)?
};
// Apply select
let dataframe = if select.is_empty() {
if projects.is_empty() {
dataframe
} else {
let projection = projects
.iter()
.map(|x| col(x.0).alias(x.1))
.collect::<Vec<_>>();
dataframe.select(projection).context(error::PlanSqlSnafu)?
}
} else {
dataframe.select(select).context(error::PlanSqlSnafu)?
};
// Apply projection
let dataframe = projects
@@ -400,8 +407,12 @@ pub async fn show_index(
};
let select = vec![
col(key_column_usage::TABLE_NAME).alias(INDEX_TABLE_COLUMN),
// 1 as `Non_unique`: contain duplicates
lit(1).alias(INDEX_NONT_UNIQUE_COLUMN),
col(key_column_usage::CONSTRAINT_NAME).alias(INDEX_KEY_NAME_COLUMN),
col(key_column_usage::ORDINAL_POSITION).alias(INDEX_SEQ_IN_INDEX_COLUMN),
col(key_column_usage::COLUMN_NAME).alias(INDEX_COLUMN_NAME_COLUMN),
// How the column is sorted in the index: A (ascending).
lit("A").alias(COLUMN_COLLATION_COLUMN),
null().alias(INDEX_CARDINALITY_COLUMN),
@@ -416,14 +427,11 @@ pub async fn show_index(
.otherwise(lit(YES_STR))
.context(error::PlanSqlSnafu)?
.alias(COLUMN_NULLABLE_COLUMN),
col(key_column_usage::GREPTIME_INDEX_TYPE).alias(INDEX_INDEX_TYPE_COLUMN),
lit("").alias(COLUMN_COMMENT_COLUMN),
lit("").alias(INDEX_COMMENT_COLUMN),
lit(YES_STR).alias(INDEX_VISIBLE_COLUMN),
null().alias(INDEX_EXPRESSION_COLUMN),
Expr::Wildcard {
qualifier: None,
options: Box::new(WildcardOptions::default()),
},
];
let projects = vec![
@@ -765,7 +773,7 @@ pub async fn show_search_path(_query_ctx: QueryContextRef) -> Result<Output> {
pub fn show_create_database(database_name: &str, options: OptionMap) -> Result<Output> {
let stmt = CreateDatabase {
name: ObjectName(vec![Ident::new(database_name)]),
name: ObjectName::from(vec![Ident::new(database_name)]),
if_not_exists: true,
options,
};
@@ -1005,7 +1013,7 @@ pub fn show_create_flow(
let stmt = CreateFlow {
flow_name,
sink_table_name: ObjectName(vec![Ident::new(&flow_val.sink_table_name().table_name)]),
sink_table_name: ObjectName::from(vec![Ident::new(&flow_val.sink_table_name().table_name)]),
// notice we don't want `OR REPLACE` and `IF NOT EXISTS` in same sql since it's unclear what to do
// so we set `or_replace` to false.
or_replace: false,
@@ -1422,7 +1430,7 @@ mod test {
fn exec_show_variable(variable: &str, tz: &str) -> Result<String> {
let stmt = ShowVariables {
variable: ObjectName(vec![Ident::new(variable)]),
variable: ObjectName::from(vec![Ident::new(variable)]),
};
let ctx = Arc::new(
QueryContextBuilder::default()

View File

@@ -87,7 +87,8 @@ fn create_column(column_schema: &ColumnSchema, quote_style: char) -> Result<Colu
let expr = match c {
ColumnDefaultConstraint::Value(v) => Expr::Value(
statements::value_to_sql_value(v)
.with_context(|_| ConvertSqlValueSnafu { value: v.clone() })?,
.with_context(|_| ConvertSqlValueSnafu { value: v.clone() })?
.into(),
),
ColumnDefaultConstraint::Function(expr) => {
ParserContext::parse_function(expr, &GreptimeDbDialect {}).context(SqlSnafu)?
@@ -165,7 +166,6 @@ fn create_column(column_schema: &ColumnSchema, quote_style: char) -> Result<Colu
.with_context(|_| ConvertSqlTypeSnafu {
datatype: column_schema.data_type.clone(),
})?,
collation: None,
options,
},
extensions,
@@ -241,7 +241,7 @@ pub fn create_table_stmt(
Ok(CreateTable {
if_not_exists: true,
table_id: table_info.ident.table_id,
name: ObjectName(vec![Ident::with_quote(quote_style, table_name)]),
name: ObjectName::from(vec![Ident::with_quote(quote_style, table_name)]),
columns,
engine: table_meta.engine.clone(),
constraints,

View File

@@ -40,7 +40,7 @@ use datafusion::physical_plan::{
};
use datafusion_common::utils::bisect;
use datafusion_common::{internal_err, DataFusionError};
use datafusion_physical_expr::{LexOrdering, PhysicalSortExpr};
use datafusion_physical_expr::PhysicalSortExpr;
use datatypes::value::Value;
use futures::Stream;
use itertools::Itertools;
@@ -118,12 +118,12 @@ impl WindowedSortExec {
) -> Result<Self> {
check_partition_range_monotonicity(&ranges, expression.options.descending)?;
let mut eq_properties = input.equivalence_properties().clone();
eq_properties.reorder(vec![expression.clone()])?;
let properties = input.properties();
let properties = PlanProperties::new(
input
.equivalence_properties()
.clone()
.with_reorder(LexOrdering::new(vec![expression.clone()])),
eq_properties,
input.output_partitioning().clone(),
properties.emission_type,
properties.boundedness,
@@ -238,7 +238,7 @@ impl ExecutionPlan for WindowedSortExec {
/// and is expected to run directly on storage engine's output
/// distribution / partition.
fn benefits_from_input_partitioning(&self) -> Vec<bool> {
vec![false; self.ranges.len()]
vec![false]
}
fn name(&self) -> &str {
@@ -645,12 +645,11 @@ impl WindowedSortStream {
let reservation = MemoryConsumer::new(format!("WindowedSortStream[{}]", self.merge_count))
.register(&self.memory_pool);
self.merge_count += 1;
let lex_ordering = LexOrdering::new(vec![self.expression.clone()]);
let resulting_stream = StreamingMergeBuilder::new()
.with_streams(streams)
.with_schema(self.schema())
.with_expressions(&lex_ordering)
.with_expressions(&[self.expression.clone()].into())
.with_metrics(self.metrics.clone())
.with_batch_size(self.batch_size)
.with_fetch(fetch)