chore: update datafusion family (#6675)

* chore: update datafusion family

Signed-off-by: luofucong <luofc@foxmail.com>

* fix ci

Signed-off-by: luofucong <luofc@foxmail.com>

* use official otel-arrow-rust

Signed-off-by: luofucong <luofc@foxmail.com>

* rebase

Signed-off-by: luofucong <luofc@foxmail.com>

* use the official orc-rust

Signed-off-by: luofucong <luofc@foxmail.com>

* resolve PR comments

Signed-off-by: luofucong <luofc@foxmail.com>

* remove the empty lines

Signed-off-by: luofucong <luofc@foxmail.com>

* try following PR comments

Signed-off-by: luofucong <luofc@foxmail.com>

---------

Signed-off-by: luofucong <luofc@foxmail.com>
This commit is contained in:
LFC
2025-08-15 20:41:49 +08:00
committed by GitHub
parent dfc29eb3b3
commit f9d2a89a0c
198 changed files with 4932 additions and 4196 deletions

View File

@@ -45,7 +45,7 @@ use query::sql::{
use session::context::QueryContextRef;
use session::table_name::table_idents_to_full_name;
use snafu::{ensure, OptionExt, ResultExt};
use sql::ast::{ColumnOption, ObjectName};
use sql::ast::{ColumnOption, ObjectName, ObjectNamePartExt};
use sql::statements::alter::{
AlterDatabase, AlterDatabaseOperation, AlterTable, AlterTableOperation,
};
@@ -819,7 +819,7 @@ fn sanitize_flow_name(mut flow_name: ObjectName) -> Result<String> {
}
);
// safety: we've checked flow_name.0 has exactly one element.
Ok(flow_name.0.swap_remove(0).value)
Ok(flow_name.0.swap_remove(0).to_string_unquoted())
}
#[cfg(test)]

View File

@@ -5,7 +5,7 @@ use api::v1::{
};
use session::context::QueryContextRef;
use snafu::ensure;
use sql::ast::ObjectName;
use sql::ast::{ObjectName, ObjectNamePartExt};
use sql::statements::create::trigger::{ChannelType, CreateTrigger};
use crate::error::Result;
@@ -67,7 +67,7 @@ fn sanitize_trigger_name(mut trigger_name: ObjectName) -> Result<String> {
}
);
// safety: we've checked trigger_name.0 has exactly one element.
Ok(trigger_name.0.swap_remove(0).value)
Ok(trigger_name.0.swap_remove(0).to_string_unquoted())
}
#[cfg(test)]
@@ -81,15 +81,15 @@ mod tests {
#[test]
fn test_sanitize_trigger_name() {
let name = ObjectName(vec![sql::ast::Ident::new("my_trigger")]);
let name = vec![sql::ast::Ident::new("my_trigger")].into();
let sanitized = sanitize_trigger_name(name).unwrap();
assert_eq!(sanitized, "my_trigger");
let name = ObjectName(vec![sql::ast::Ident::with_quote('`', "my_trigger")]);
let name = vec![sql::ast::Ident::with_quote('`', "my_trigger")].into();
let sanitized = sanitize_trigger_name(name).unwrap();
assert_eq!(sanitized, "my_trigger");
let name = ObjectName(vec![sql::ast::Ident::with_quote('\'', "trigger")]);
let name = vec![sql::ast::Ident::with_quote('\'', "trigger")].into();
let sanitized = sanitize_trigger_name(name).unwrap();
assert_eq!(sanitized, "trigger");
}

View File

@@ -22,6 +22,7 @@ use datatypes::schema::{ColumnSchema, SchemaRef};
use partition::manager::PartitionRuleManager;
use session::context::{QueryContext, QueryContextRef};
use snafu::{ensure, OptionExt, ResultExt};
use sql::ast::ObjectNamePartExt;
use sql::statements::insert::Insert;
use sqlparser::ast::{ObjectName, Value as SqlValue};
use table::metadata::TableInfoRef;
@@ -172,17 +173,17 @@ impl<'a> StatementToRegion<'a> {
[table] => Ok((
self.ctx.current_catalog().to_owned(),
self.ctx.current_schema(),
table.value.clone(),
table.to_string_unquoted(),
)),
[schema, table] => Ok((
self.ctx.current_catalog().to_owned(),
schema.value.clone(),
table.value.clone(),
schema.to_string_unquoted(),
table.to_string_unquoted(),
)),
[catalog, schema, table] => Ok((
catalog.value.clone(),
schema.value.clone(),
table.value.clone(),
catalog.to_string_unquoted(),
schema.to_string_unquoted(),
table.to_string_unquoted(),
)),
_ => InvalidSqlSnafu {
err_msg: format!(

View File

@@ -55,6 +55,7 @@ use session::context::{Channel, QueryContextRef};
use session::table_name::table_idents_to_full_name;
use set::{set_query_timeout, set_read_preference};
use snafu::{ensure, OptionExt, ResultExt};
use sql::ast::ObjectNamePartExt;
use sql::statements::copy::{
CopyDatabase, CopyDatabaseArgument, CopyQueryToArgument, CopyTable, CopyTableArgument,
};
@@ -736,9 +737,9 @@ fn idents_to_full_database_name(
match &obj_name.0[..] {
[database] => Ok((
query_ctx.current_catalog().to_owned(),
database.value.clone(),
database.to_string_unquoted(),
)),
[catalog, database] => Ok((catalog.value.clone(), database.value.clone())),
[catalog, database] => Ok((catalog.to_string_unquoted(), database.to_string_unquoted())),
_ => InvalidSqlSnafu {
err_msg: format!(
"expect database name to be <catalog>.<database>, <database>, found: {obj_name}",

View File

@@ -69,7 +69,7 @@ impl StatementExecutor {
}
.fail();
};
Ok(value)
Ok(&value.value)
})
.collect::<Result<Vec<_>>>()?;

View File

@@ -20,7 +20,6 @@ use std::sync::Arc;
use client::{Output, OutputData, OutputMeta};
use common_base::readable_size::ReadableSize;
use common_datasource::file_format::csv::CsvFormat;
use common_datasource::file_format::json::JsonFormat;
use common_datasource::file_format::orc::{infer_orc_schema, new_orc_stream_reader, ReaderAdapter};
use common_datasource::file_format::{FileFormat, Format};
use common_datasource::lister::{Lister, Source};
@@ -33,12 +32,11 @@ use common_telemetry::{debug, tracing};
use datafusion::datasource::listing::PartitionedFile;
use datafusion::datasource::object_store::ObjectStoreUrl;
use datafusion::datasource::physical_plan::{
CsvConfig, CsvOpener, FileOpener, FileScanConfig, FileStream, JsonOpener,
CsvSource, FileGroup, FileScanConfigBuilder, FileSource, FileStream, JsonSource,
};
use datafusion::parquet::arrow::arrow_reader::ArrowReaderMetadata;
use datafusion::parquet::arrow::ParquetRecordBatchStreamBuilder;
use datafusion::physical_plan::metrics::ExecutionPlanMetricsSet;
use datafusion_common::{Constraints, Statistics};
use datafusion_expr::Expr;
use datatypes::arrow::compute::can_cast_types;
use datatypes::arrow::datatypes::{DataType as ArrowDataType, Schema, SchemaRef};
@@ -69,7 +67,6 @@ enum FileMetadata {
},
Json {
schema: SchemaRef,
format: JsonFormat,
path: String,
},
Csv {
@@ -150,7 +147,6 @@ impl StatementExecutor {
.await
.context(error::InferSchemaSnafu { path: &path })?,
),
format,
path,
}),
Format::Parquet(_) => {
@@ -199,30 +195,29 @@ impl StatementExecutor {
}
}
async fn build_file_stream<F: FileOpener + Send + 'static>(
async fn build_file_stream(
&self,
opener: F,
store: &ObjectStore,
filename: &str,
file_schema: SchemaRef,
file_source: Arc<dyn FileSource>,
projection: Option<Vec<usize>>,
) -> Result<DfSendableRecordBatchStream> {
let statistics = Statistics::new_unknown(file_schema.as_ref());
let stream = FileStream::new(
&FileScanConfig {
object_store_url: ObjectStoreUrl::parse("empty://").unwrap(), // won't be used
file_schema,
file_groups: vec![vec![PartitionedFile::new(filename.to_string(), 10)]],
statistics,
projection: None,
limit: None,
table_partition_cols: vec![],
output_ordering: vec![],
constraints: Constraints::empty(),
},
0,
opener,
&ExecutionPlanMetricsSet::new(),
let config = FileScanConfigBuilder::new(
ObjectStoreUrl::local_filesystem(),
file_schema,
file_source.clone(),
)
.context(error::BuildFileStreamSnafu)?;
.with_file_group(FileGroup::new(vec![PartitionedFile::new(filename, 0)]))
.with_projection(projection)
.build();
let store = Arc::new(object_store_opendal::OpendalStore::new(store.clone()));
let file_opener = file_source
.with_projection(&config)
.create_file_opener(store, &config, 0);
let stream = FileStream::new(&config, 0, file_opener, &ExecutionPlanMetricsSet::new())
.context(error::BuildFileStreamSnafu)?;
Ok(Box::pin(stream))
}
@@ -246,29 +241,18 @@ impl StatementExecutor {
.project(&projection)
.context(error::ProjectSchemaSnafu)?,
);
let csv_config = Arc::new(CsvConfig::new(
DEFAULT_BATCH_SIZE,
schema.clone(),
Some(projection.clone()),
format.has_header,
format.delimiter,
b'"',
None,
Arc::new(object_store_opendal::OpendalStore::new(
object_store.clone(),
)),
None,
));
let projected_file_schema = Arc::new(
schema
.project(&projection)
.context(error::ProjectSchemaSnafu)?,
);
let csv_source = CsvSource::new(format.has_header, format.delimiter, b'"')
.with_schema(schema.clone())
.with_batch_size(DEFAULT_BATCH_SIZE);
let stream = self
.build_file_stream(
CsvOpener::new(csv_config, format.compression_type.into()),
object_store,
path,
projected_file_schema,
schema.clone(),
csv_source,
Some(projection),
)
.await?;
@@ -280,32 +264,24 @@ impl StatementExecutor {
.context(error::PhysicalExprSnafu)?,
))
}
FileMetadata::Json {
format,
path,
schema,
} => {
let projected_file_schema = Arc::new(
schema
.project(&projection)
.context(error::ProjectSchemaSnafu)?,
);
FileMetadata::Json { path, schema } => {
let output_schema = Arc::new(
compat_schema
.project(&projection)
.context(error::ProjectSchemaSnafu)?,
);
let store = object_store_opendal::OpendalStore::new(object_store.clone());
let json_source = JsonSource::new()
.with_schema(schema.clone())
.with_batch_size(DEFAULT_BATCH_SIZE);
let stream = self
.build_file_stream(
JsonOpener::new(
DEFAULT_BATCH_SIZE,
projected_file_schema.clone(),
format.compression_type.into(),
Arc::new(store),
),
object_store,
path,
projected_file_schema,
schema.clone(),
json_source,
Some(projection),
)
.await?;

View File

@@ -1817,27 +1817,27 @@ fn convert_one_expr(
// col, val
(Expr::Identifier(ident), Expr::Value(value)) => {
let (column_name, data_type) = convert_identifier(ident, column_name_and_type)?;
let value = convert_value(value, data_type, timezone, None)?;
let value = convert_value(&value.value, data_type, timezone, None)?;
(Operand::Column(column_name), op, Operand::Value(value))
}
(Expr::Identifier(ident), Expr::UnaryOp { op: unary_op, expr })
if let Expr::Value(v) = &**expr =>
{
let (column_name, data_type) = convert_identifier(ident, column_name_and_type)?;
let value = convert_value(v, data_type, timezone, Some(*unary_op))?;
let value = convert_value(&v.value, data_type, timezone, Some(*unary_op))?;
(Operand::Column(column_name), op, Operand::Value(value))
}
// val, col
(Expr::Value(value), Expr::Identifier(ident)) => {
let (column_name, data_type) = convert_identifier(ident, column_name_and_type)?;
let value = convert_value(value, data_type, timezone, None)?;
let value = convert_value(&value.value, data_type, timezone, None)?;
(Operand::Value(value), op, Operand::Column(column_name))
}
(Expr::UnaryOp { op: unary_op, expr }, Expr::Identifier(ident))
if let Expr::Value(v) = &**expr =>
{
let (column_name, data_type) = convert_identifier(ident, column_name_and_type)?;
let value = convert_value(v, data_type, timezone, Some(*unary_op))?;
let value = convert_value(&v.value, data_type, timezone, Some(*unary_op))?;
(Operand::Value(value), op, Operand::Column(column_name))
}
(Expr::BinaryOp { .. }, Expr::BinaryOp { .. }) => {

View File

@@ -25,6 +25,7 @@ use session::ReadPreference;
use snafu::{ensure, OptionExt, ResultExt};
use sql::ast::{Expr, Ident, Value};
use sql::statements::set_variables::SetVariables;
use sqlparser::ast::ValueWithSpan;
use crate::error::{InvalidConfigValueSnafu, InvalidSqlSnafu, NotSupportedSnafu, Result};
@@ -43,8 +44,14 @@ pub fn set_read_preference(exprs: Vec<Expr>, ctx: QueryContextRef) -> Result<()>
})?;
match read_preference_expr {
Expr::Value(Value::SingleQuotedString(expr))
| Expr::Value(Value::DoubleQuotedString(expr)) => {
Expr::Value(ValueWithSpan {
value: Value::SingleQuotedString(expr),
..
})
| Expr::Value(ValueWithSpan {
value: Value::DoubleQuotedString(expr),
..
}) => {
match ReadPreference::from_str(expr.as_str().to_lowercase().as_str()) {
Ok(read_preference) => ctx.set_read_preference(read_preference),
Err(_) => {
@@ -74,7 +81,14 @@ pub fn set_timezone(exprs: Vec<Expr>, ctx: QueryContextRef) -> Result<()> {
feat: "No timezone find in set variable statement",
})?;
match tz_expr {
Expr::Value(Value::SingleQuotedString(tz)) | Expr::Value(Value::DoubleQuotedString(tz)) => {
Expr::Value(ValueWithSpan {
value: Value::SingleQuotedString(tz),
..
})
| Expr::Value(ValueWithSpan {
value: Value::DoubleQuotedString(tz),
..
}) => {
match Timezone::from_tz_string(tz.as_str()) {
Ok(timezone) => ctx.set_timezone(timezone),
Err(_) => {
@@ -110,7 +124,7 @@ pub fn set_bytea_output(exprs: Vec<Expr>, ctx: QueryContextRef) -> Result<()> {
.fail();
};
ctx.configuration_parameter().set_postgres_bytea_output(
PGByteaOutputValue::try_from(value.clone()).context(InvalidConfigValueSnafu)?,
PGByteaOutputValue::try_from(value.value.clone()).context(InvalidConfigValueSnafu)?,
);
Ok(())
}
@@ -120,8 +134,14 @@ pub fn set_search_path(exprs: Vec<Expr>, ctx: QueryContextRef) -> Result<()> {
feat: "No search path find in set variable statement",
})?;
match search_expr {
Expr::Value(Value::SingleQuotedString(search_path))
| Expr::Value(Value::DoubleQuotedString(search_path)) => {
Expr::Value(ValueWithSpan {
value: Value::SingleQuotedString(search_path),
..
})
| Expr::Value(ValueWithSpan {
value: Value::DoubleQuotedString(search_path),
..
}) => {
ctx.set_current_schema(search_path);
Ok(())
}
@@ -147,7 +167,10 @@ pub fn validate_client_encoding(set: SetVariables) -> Result<()> {
.fail();
};
let encoding = match encoding {
Expr::Value(Value::SingleQuotedString(x))
Expr::Value(ValueWithSpan {
value: Value::SingleQuotedString(x),
..
})
| Expr::Identifier(Ident {
value: x,
quote_style: _,
@@ -210,19 +233,20 @@ fn try_parse_datestyle(expr: &Expr) -> Result<(Option<PGDateTimeStyle>, Option<P
quote_style: _,
span: _,
})
| Expr::Value(Value::SingleQuotedString(s))
| Expr::Value(Value::DoubleQuotedString(s)) => {
s.split(',')
.map(|s| s.trim())
.try_fold((None, None), |(style, order), s| match try_parse_str(s)? {
ParsedDateStyle::Order(o) => {
Ok((style, merge_datestyle_value(order, Some(o))?))
}
ParsedDateStyle::Style(s) => {
Ok((merge_datestyle_value(style, Some(s))?, order))
}
})
}
| Expr::Value(ValueWithSpan {
value: Value::SingleQuotedString(s),
..
})
| Expr::Value(ValueWithSpan {
value: Value::DoubleQuotedString(s),
..
}) => s
.split(',')
.map(|s| s.trim())
.try_fold((None, None), |(style, order), s| match try_parse_str(s)? {
ParsedDateStyle::Order(o) => Ok((style, merge_datestyle_value(order, Some(o))?)),
ParsedDateStyle::Style(s) => Ok((merge_datestyle_value(style, Some(s))?, order)),
}),
_ => NotSupportedSnafu {
feat: "Not supported expression for datestyle",
}
@@ -237,7 +261,10 @@ pub fn set_allow_query_fallback(exprs: Vec<Expr>, ctx: QueryContextRef) -> Resul
feat: "No allow query fallback value find in set variable statement",
})?;
match allow_fallback_expr {
Expr::Value(Value::Boolean(allow)) => {
Expr::Value(ValueWithSpan {
value: Value::Boolean(allow),
span: _,
}) => {
ctx.configuration_parameter()
.set_allow_query_fallback(*allow);
Ok(())
@@ -279,7 +306,10 @@ pub fn set_query_timeout(exprs: Vec<Expr>, ctx: QueryContextRef) -> Result<()> {
feat: "No timeout value find in set query timeout statement",
})?;
match timeout_expr {
Expr::Value(Value::Number(timeout, _)) => {
Expr::Value(ValueWithSpan {
value: Value::Number(timeout, _),
..
}) => {
match timeout.parse::<u64>() {
Ok(timeout) => ctx.set_query_timeout(Duration::from_millis(timeout)),
Err(_) => {
@@ -292,8 +322,14 @@ pub fn set_query_timeout(exprs: Vec<Expr>, ctx: QueryContextRef) -> Result<()> {
Ok(())
}
// postgres support time units i.e. SET STATEMENT_TIMEOUT = '50ms';
Expr::Value(Value::SingleQuotedString(timeout))
| Expr::Value(Value::DoubleQuotedString(timeout)) => {
Expr::Value(ValueWithSpan {
value: Value::SingleQuotedString(timeout),
..
})
| Expr::Value(ValueWithSpan {
value: Value::DoubleQuotedString(timeout),
..
}) => {
if ctx.channel() != Postgres {
return NotSupportedSnafu {
feat: format!("Invalid timeout expr {} in set variable statement", timeout),

View File

@@ -20,6 +20,7 @@ use partition::manager::PartitionInfo;
use session::context::QueryContextRef;
use session::table_name::table_idents_to_full_name;
use snafu::{OptionExt, ResultExt};
use sql::ast::ObjectNamePartExt;
use sql::statements::create::Partitions;
use sql::statements::show::{
ShowColumns, ShowCreateFlow, ShowCreateView, ShowDatabases, ShowFlows, ShowIndex, ShowKind,
@@ -245,8 +246,8 @@ impl StatementExecutor {
) -> Result<Output> {
let obj_name = &show.flow_name;
let (catalog_name, flow_name) = match &obj_name.0[..] {
[table] => (query_ctx.current_catalog().to_string(), table.value.clone()),
[catalog, table] => (catalog.value.clone(), table.value.clone()),
[table] => (query_ctx.current_catalog().to_string(), table.to_string_unquoted()),
[catalog, table] => (catalog.to_string_unquoted(), table.to_string_unquoted()),
_ => {
return InvalidSqlSnafu {
err_msg: format!(