chore: update datafusion family (#5814)

This commit is contained in:
LFC
2025-04-09 10:20:55 +08:00
committed by GitHub
parent 7e3cad8a55
commit 311727939d
83 changed files with 902 additions and 1027 deletions

817
Cargo.lock generated

File diff suppressed because it is too large Load Diff

View File

@@ -90,11 +90,11 @@ rust.unexpected_cfgs = { level = "warn", check-cfg = ['cfg(tokio_unstable)'] }
# See for more detaiils: https://github.com/rust-lang/cargo/issues/11329
ahash = { version = "0.8", features = ["compile-time-rng"] }
aquamarine = "0.6"
arrow = { version = "53.0.0", features = ["prettyprint"] }
arrow-array = { version = "53.0.0", default-features = false, features = ["chrono-tz"] }
arrow-flight = "53.0"
arrow-ipc = { version = "53.0.0", default-features = false, features = ["lz4", "zstd"] }
arrow-schema = { version = "53.0", features = ["serde"] }
arrow = { version = "54.2", features = ["prettyprint"] }
arrow-array = { version = "54.2", default-features = false, features = ["chrono-tz"] }
arrow-flight = "54.2"
arrow-ipc = { version = "54.2", default-features = false, features = ["lz4", "zstd"] }
arrow-schema = { version = "54.2", features = ["serde"] }
async-stream = "0.3"
async-trait = "0.1"
# Remember to update axum-extra, axum-macros when updating axum
@@ -113,15 +113,15 @@ clap = { version = "4.4", features = ["derive"] }
config = "0.13.0"
crossbeam-utils = "0.8"
dashmap = "6.1"
datafusion = { git = "https://github.com/apache/datafusion.git", rev = "2464703c84c400a09cc59277018813f0e797bb4e" }
datafusion-common = { git = "https://github.com/apache/datafusion.git", rev = "2464703c84c400a09cc59277018813f0e797bb4e" }
datafusion-expr = { git = "https://github.com/apache/datafusion.git", rev = "2464703c84c400a09cc59277018813f0e797bb4e" }
datafusion-functions = { git = "https://github.com/apache/datafusion.git", rev = "2464703c84c400a09cc59277018813f0e797bb4e" }
datafusion-optimizer = { git = "https://github.com/apache/datafusion.git", rev = "2464703c84c400a09cc59277018813f0e797bb4e" }
datafusion-physical-expr = { git = "https://github.com/apache/datafusion.git", rev = "2464703c84c400a09cc59277018813f0e797bb4e" }
datafusion-physical-plan = { git = "https://github.com/apache/datafusion.git", rev = "2464703c84c400a09cc59277018813f0e797bb4e" }
datafusion-sql = { git = "https://github.com/apache/datafusion.git", rev = "2464703c84c400a09cc59277018813f0e797bb4e" }
datafusion-substrait = { git = "https://github.com/apache/datafusion.git", rev = "2464703c84c400a09cc59277018813f0e797bb4e" }
datafusion = { git = "https://github.com/apache/datafusion.git", rev = "8ebed674dd71f8a466f658626877944cd16a4375" }
datafusion-common = { git = "https://github.com/apache/datafusion.git", rev = "8ebed674dd71f8a466f658626877944cd16a4375" }
datafusion-expr = { git = "https://github.com/apache/datafusion.git", rev = "8ebed674dd71f8a466f658626877944cd16a4375" }
datafusion-functions = { git = "https://github.com/apache/datafusion.git", rev = "8ebed674dd71f8a466f658626877944cd16a4375" }
datafusion-optimizer = { git = "https://github.com/apache/datafusion.git", rev = "8ebed674dd71f8a466f658626877944cd16a4375" }
datafusion-physical-expr = { git = "https://github.com/apache/datafusion.git", rev = "8ebed674dd71f8a466f658626877944cd16a4375" }
datafusion-physical-plan = { git = "https://github.com/apache/datafusion.git", rev = "8ebed674dd71f8a466f658626877944cd16a4375" }
datafusion-sql = { git = "https://github.com/apache/datafusion.git", rev = "8ebed674dd71f8a466f658626877944cd16a4375" }
datafusion-substrait = { git = "https://github.com/apache/datafusion.git", rev = "8ebed674dd71f8a466f658626877944cd16a4375" }
deadpool = "0.12"
deadpool-postgres = "0.14"
derive_builder = "0.20"
@@ -148,6 +148,7 @@ moka = "0.12"
nalgebra = "0.33"
notify = "8.0"
num_cpus = "1.16"
object_store_opendal = "0.49.0"
once_cell = "1.18"
opentelemetry-proto = { version = "0.27", features = [
"gen-tonic",
@@ -157,7 +158,7 @@ opentelemetry-proto = { version = "0.27", features = [
"logs",
] }
parking_lot = "0.12"
parquet = { version = "53.0.0", default-features = false, features = ["arrow", "async", "object_store"] }
parquet = { version = "54.2", default-features = false, features = ["arrow", "async", "object_store"] }
paste = "1.0"
pin-project = "1.0"
prometheus = { version = "0.13.3", features = ["process"] }
@@ -191,19 +192,18 @@ simd-json = "0.15"
similar-asserts = "1.6.0"
smallvec = { version = "1", features = ["serde"] }
snafu = "0.8"
sqlparser = { git = "https://github.com/GreptimeTeam/sqlparser-rs.git", rev = "e98e6b322426a9d397a71efef17075966223c089", features = [
"visitor",
"serde",
] } # branch = "v0.54.x"
sqlx = { version = "0.8", features = [
"runtime-tokio-rustls",
"mysql",
"postgres",
"chrono",
] }
sysinfo = "0.33"
# on branch v0.52.x
sqlparser = { git = "https://github.com/GreptimeTeam/sqlparser-rs.git", rev = "71dd86058d2af97b9925093d40c4e03360403170", features = [
"visitor",
"serde",
] } # on branch v0.44.x
strum = { version = "0.27", features = ["derive"] }
sysinfo = "0.33"
tempfile = "3"
tokio = { version = "1.40", features = ["full"] }
tokio-postgres = "0.7"

View File

@@ -437,10 +437,7 @@ mod tests {
}
fn column(name: &str) -> Expr {
Expr::Column(Column {
relation: None,
name: name.to_string(),
})
Expr::Column(Column::from_name(name))
}
fn string_literal(v: &str) -> Expr {

View File

@@ -31,7 +31,8 @@ derive_builder.workspace = true
futures.workspace = true
lazy_static.workspace = true
object-store.workspace = true
orc-rust = { version = "0.5", default-features = false, features = [
object_store_opendal.workspace = true
orc-rust = { git = "https://github.com/datafusion-contrib/orc-rust", rev = "3134cab581a8e91b942d6a23aca2916ea965f6bb", default-features = false, features = [
"async",
] }
parquet.workspace = true

View File

@@ -19,6 +19,7 @@ use std::str::FromStr;
use async_compression::tokio::bufread::{BzDecoder, GzipDecoder, XzDecoder, ZstdDecoder};
use async_compression::tokio::write;
use bytes::Bytes;
use datafusion::datasource::file_format::file_compression_type::FileCompressionType;
use futures::Stream;
use serde::{Deserialize, Serialize};
use strum::EnumIter;
@@ -192,3 +193,15 @@ macro_rules! impl_compression_type {
}
impl_compression_type!((Gzip, Gzip), (Bzip2, Bz), (Xz, Xz), (Zstd, Zstd));
impl From<CompressionType> for FileCompressionType {
fn from(t: CompressionType) -> Self {
match t {
CompressionType::Gzip => FileCompressionType::GZIP,
CompressionType::Bzip2 => FileCompressionType::BZIP2,
CompressionType::Xz => FileCompressionType::XZ,
CompressionType::Zstd => FileCompressionType::ZSTD,
CompressionType::Uncompressed => FileCompressionType::UNCOMPRESSED,
}
}
}

View File

@@ -14,18 +14,14 @@
use std::collections::HashMap;
use std::str::FromStr;
use std::sync::Arc;
use arrow::csv;
use arrow::csv::reader::Format;
use arrow::record_batch::RecordBatch;
use arrow_schema::{Schema, SchemaRef};
use arrow_schema::Schema;
use async_trait::async_trait;
use common_runtime;
use datafusion::datasource::physical_plan::{FileMeta, FileOpenFuture, FileOpener};
use datafusion::error::Result as DataFusionResult;
use datafusion::physical_plan::SendableRecordBatchStream;
use derive_builder::Builder;
use object_store::ObjectStore;
use snafu::ResultExt;
use tokio_util::compat::FuturesAsyncReadCompatExt;
@@ -34,7 +30,7 @@ use tokio_util::io::SyncIoBridge;
use crate::buffered_writer::DfRecordBatchEncoder;
use crate::compression::CompressionType;
use crate::error::{self, Result};
use crate::file_format::{self, open_with_decoder, stream_to_file, FileFormat};
use crate::file_format::{self, stream_to_file, FileFormat};
use crate::share_buffer::SharedBuffer;
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
@@ -99,66 +95,6 @@ impl Default for CsvFormat {
}
}
#[derive(Debug, Clone, Builder)]
pub struct CsvConfig {
batch_size: usize,
file_schema: SchemaRef,
#[builder(default = "None")]
file_projection: Option<Vec<usize>>,
#[builder(default = "true")]
has_header: bool,
#[builder(default = "b','")]
delimiter: u8,
}
impl CsvConfig {
fn builder(&self) -> csv::ReaderBuilder {
let mut builder = csv::ReaderBuilder::new(self.file_schema.clone())
.with_delimiter(self.delimiter)
.with_batch_size(self.batch_size)
.with_header(self.has_header);
if let Some(proj) = &self.file_projection {
builder = builder.with_projection(proj.clone());
}
builder
}
}
#[derive(Debug, Clone)]
pub struct CsvOpener {
config: Arc<CsvConfig>,
object_store: Arc<ObjectStore>,
compression_type: CompressionType,
}
impl CsvOpener {
/// Return a new [`CsvOpener`]. The caller must ensure [`CsvConfig`].file_schema must correspond to the opening file.
pub fn new(
config: CsvConfig,
object_store: ObjectStore,
compression_type: CompressionType,
) -> Self {
CsvOpener {
config: Arc::new(config),
object_store: Arc::new(object_store),
compression_type,
}
}
}
impl FileOpener for CsvOpener {
fn open(&self, meta: FileMeta) -> DataFusionResult<FileOpenFuture> {
open_with_decoder(
self.object_store.clone(),
meta.location().to_string(),
self.compression_type,
|| Ok(self.config.builder().build_decoder()),
)
}
}
#[async_trait]
impl FileFormat for CsvFormat {
async fn infer_schema(&self, store: &ObjectStore, path: &str) -> Result<Schema> {

View File

@@ -15,18 +15,14 @@
use std::collections::HashMap;
use std::io::BufReader;
use std::str::FromStr;
use std::sync::Arc;
use arrow::datatypes::SchemaRef;
use arrow::json;
use arrow::json::reader::{infer_json_schema_from_iterator, ValueIter};
use arrow::json::writer::LineDelimited;
use arrow::json::{self, ReaderBuilder};
use arrow::record_batch::RecordBatch;
use arrow_schema::Schema;
use async_trait::async_trait;
use common_runtime;
use datafusion::datasource::physical_plan::{FileMeta, FileOpenFuture, FileOpener};
use datafusion::error::{DataFusionError, Result as DataFusionResult};
use datafusion::physical_plan::SendableRecordBatchStream;
use object_store::ObjectStore;
use snafu::ResultExt;
@@ -36,7 +32,7 @@ use tokio_util::io::SyncIoBridge;
use crate::buffered_writer::DfRecordBatchEncoder;
use crate::compression::CompressionType;
use crate::error::{self, Result};
use crate::file_format::{self, open_with_decoder, stream_to_file, FileFormat};
use crate::file_format::{self, stream_to_file, FileFormat};
use crate::share_buffer::SharedBuffer;
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
@@ -113,47 +109,6 @@ impl FileFormat for JsonFormat {
}
}
#[derive(Debug, Clone)]
pub struct JsonOpener {
batch_size: usize,
projected_schema: SchemaRef,
object_store: Arc<ObjectStore>,
compression_type: CompressionType,
}
impl JsonOpener {
/// Return a new [`JsonOpener`]. Any fields not present in `projected_schema` will be ignored.
pub fn new(
batch_size: usize,
projected_schema: SchemaRef,
object_store: ObjectStore,
compression_type: CompressionType,
) -> Self {
Self {
batch_size,
projected_schema,
object_store: Arc::new(object_store),
compression_type,
}
}
}
impl FileOpener for JsonOpener {
fn open(&self, meta: FileMeta) -> DataFusionResult<FileOpenFuture> {
open_with_decoder(
self.object_store.clone(),
meta.location().to_string(),
self.compression_type,
|| {
ReaderBuilder::new(self.projected_schema.clone())
.with_batch_size(self.batch_size)
.build_decoder()
.map_err(DataFusionError::from)
},
)
}
}
pub async fn stream_to_json(
stream: SendableRecordBatchStream,
store: ObjectStore,

View File

@@ -19,7 +19,10 @@ use std::vec;
use common_test_util::find_workspace_path;
use datafusion::assert_batches_eq;
use datafusion::datasource::physical_plan::{FileOpener, FileScanConfig, FileStream, ParquetExec};
use datafusion::datasource::file_format::file_compression_type::FileCompressionType;
use datafusion::datasource::physical_plan::{
CsvConfig, CsvOpener, FileOpener, FileScanConfig, FileStream, JsonOpener, ParquetExec,
};
use datafusion::execution::context::TaskContext;
use datafusion::physical_plan::metrics::ExecutionPlanMetricsSet;
use datafusion::physical_plan::ExecutionPlan;
@@ -27,14 +30,11 @@ use datafusion::prelude::SessionContext;
use futures::StreamExt;
use super::FORMAT_TYPE;
use crate::compression::CompressionType;
use crate::error;
use crate::file_format::csv::{CsvConfigBuilder, CsvOpener};
use crate::file_format::json::JsonOpener;
use crate::file_format::orc::{OrcFormat, OrcOpener};
use crate::file_format::parquet::DefaultParquetFileReaderFactory;
use crate::file_format::{FileFormat, Format};
use crate::test_util::{self, scan_config, test_basic_schema, test_store};
use crate::test_util::{scan_config, test_basic_schema, test_store};
use crate::{error, test_util};
struct Test<'a, T: FileOpener> {
config: FileScanConfig,
@@ -62,15 +62,18 @@ impl<T: FileOpener> Test<'_, T> {
#[tokio::test]
async fn test_json_opener() {
let store = test_store("/");
let store = Arc::new(object_store_opendal::OpendalStore::new(store));
let schema = test_basic_schema();
let json_opener = JsonOpener::new(
100,
schema.clone(),
store.clone(),
CompressionType::Uncompressed,
);
let json_opener = || {
JsonOpener::new(
test_util::TEST_BATCH_SIZE,
schema.clone(),
FileCompressionType::UNCOMPRESSED,
store.clone(),
)
};
let path = &find_workspace_path("/src/common/datasource/tests/json/basic.json")
.display()
@@ -78,7 +81,7 @@ async fn test_json_opener() {
let tests = [
Test {
config: scan_config(schema.clone(), None, path),
opener: json_opener.clone(),
opener: json_opener(),
expected: vec![
"+-----+-------+",
"| num | str |",
@@ -91,7 +94,7 @@ async fn test_json_opener() {
},
Test {
config: scan_config(schema.clone(), Some(1), path),
opener: json_opener.clone(),
opener: json_opener(),
expected: vec![
"+-----+------+",
"| num | str |",
@@ -110,23 +113,30 @@ async fn test_json_opener() {
#[tokio::test]
async fn test_csv_opener() {
let store = test_store("/");
let store = Arc::new(object_store_opendal::OpendalStore::new(store));
let schema = test_basic_schema();
let path = &find_workspace_path("/src/common/datasource/tests/csv/basic.csv")
.display()
.to_string();
let csv_conf = CsvConfigBuilder::default()
.batch_size(test_util::TEST_BATCH_SIZE)
.file_schema(schema.clone())
.build()
.unwrap();
let csv_config = Arc::new(CsvConfig::new(
test_util::TEST_BATCH_SIZE,
schema.clone(),
None,
true,
b',',
b'"',
None,
store,
None,
));
let csv_opener = CsvOpener::new(csv_conf, store, CompressionType::Uncompressed);
let csv_opener = || CsvOpener::new(csv_config.clone(), FileCompressionType::UNCOMPRESSED);
let tests = [
Test {
config: scan_config(schema.clone(), None, path),
opener: csv_opener.clone(),
opener: csv_opener(),
expected: vec![
"+-----+-------+",
"| num | str |",
@@ -139,7 +149,7 @@ async fn test_csv_opener() {
},
Test {
config: scan_config(schema.clone(), Some(1), path),
opener: csv_opener.clone(),
opener: csv_opener(),
expected: vec![
"+-----+------+",
"| num | str |",

View File

@@ -16,17 +16,19 @@ use std::sync::Arc;
use arrow_schema::{DataType, Field, Schema, SchemaRef};
use common_test_util::temp_dir::{create_temp_dir, TempDir};
use datafusion::common::Statistics;
use datafusion::common::{Constraints, Statistics};
use datafusion::datasource::file_format::file_compression_type::FileCompressionType;
use datafusion::datasource::listing::PartitionedFile;
use datafusion::datasource::object_store::ObjectStoreUrl;
use datafusion::datasource::physical_plan::{FileScanConfig, FileStream};
use datafusion::datasource::physical_plan::{
CsvConfig, CsvOpener, FileScanConfig, FileStream, JsonOpener,
};
use datafusion::physical_plan::metrics::ExecutionPlanMetricsSet;
use object_store::services::Fs;
use object_store::ObjectStore;
use crate::compression::CompressionType;
use crate::file_format::csv::{stream_to_csv, CsvConfigBuilder, CsvOpener};
use crate::file_format::json::{stream_to_json, JsonOpener};
use crate::file_format::csv::stream_to_csv;
use crate::file_format::json::stream_to_json;
use crate::test_util;
pub const TEST_BATCH_SIZE: usize = 100;
@@ -74,6 +76,7 @@ pub fn scan_config(file_schema: SchemaRef, limit: Option<usize>, filename: &str)
object_store_url: ObjectStoreUrl::parse("empty://").unwrap(), // won't be used
file_schema,
file_groups: vec![vec![PartitionedFile::new(filename.to_string(), 10)]],
constraints: Constraints::empty(),
statistics,
projection: None,
limit,
@@ -90,8 +93,8 @@ pub async fn setup_stream_to_json_test(origin_path: &str, threshold: impl Fn(usi
let json_opener = JsonOpener::new(
test_util::TEST_BATCH_SIZE,
schema.clone(),
store.clone(),
CompressionType::Uncompressed,
FileCompressionType::UNCOMPRESSED,
Arc::new(object_store_opendal::OpendalStore::new(store.clone())),
);
let size = store.read(origin_path).await.unwrap().len();
@@ -124,13 +127,19 @@ pub async fn setup_stream_to_csv_test(origin_path: &str, threshold: impl Fn(usiz
let schema = test_basic_schema();
let csv_conf = CsvConfigBuilder::default()
.batch_size(test_util::TEST_BATCH_SIZE)
.file_schema(schema.clone())
.build()
.unwrap();
let csv_config = Arc::new(CsvConfig::new(
TEST_BATCH_SIZE,
schema.clone(),
None,
true,
b',',
b'"',
None,
Arc::new(object_store_opendal::OpendalStore::new(store.clone())),
None,
));
let csv_opener = CsvOpener::new(csv_conf, store.clone(), CompressionType::Uncompressed);
let csv_opener = CsvOpener::new(csv_config, FileCompressionType::UNCOMPRESSED);
let size = store.read(origin_path).await.unwrap().len();

View File

@@ -163,7 +163,7 @@ mod tests {
];
let args = ScalarFunctionArgs {
args: &args,
args,
number_rows: 4,
return_type: &ConcreteDataType::boolean_datatype().as_arrow_type(),
};

View File

@@ -295,6 +295,7 @@ pub fn datafusion_status_code<T: ErrorExt + 'static>(
default_status.unwrap_or(StatusCode::EngineExecuteQuery)
}
}
DataFusionError::Diagnostic(_, e) => datafusion_status_code::<T>(e, default_status),
_ => default_status.unwrap_or(StatusCode::EngineExecuteQuery),
}
}

View File

@@ -54,10 +54,7 @@ pub fn build_filter_from_timestamp(
time_range: Option<&TimestampRange>,
) -> Option<Expr> {
let time_range = time_range?;
let ts_col_expr = Expr::Column(Column {
relation: None,
name: ts_col_name.to_string(),
});
let ts_col_expr = Expr::Column(Column::from_name(ts_col_name));
match (time_range.start(), time_range.end()) {
(None, None) => None,

View File

@@ -134,7 +134,7 @@ impl From<TypeSignature> for DfTypeSignature {
}
TypeSignature::Uniform(n, types) => {
if n == 0 {
return DfTypeSignature::NullAry;
return DfTypeSignature::Nullary;
}
DfTypeSignature::Uniform(n, concrete_types_to_arrow_types(types))
}
@@ -143,7 +143,7 @@ impl From<TypeSignature> for DfTypeSignature {
}
TypeSignature::Any(n) => {
if n == 0 {
return DfTypeSignature::NullAry;
return DfTypeSignature::Nullary;
}
DfTypeSignature::Any(n)
}
@@ -151,7 +151,7 @@ impl From<TypeSignature> for DfTypeSignature {
DfTypeSignature::OneOf(ts.into_iter().map(Into::into).collect())
}
TypeSignature::VariadicAny => DfTypeSignature::VariadicAny,
TypeSignature::NullAry => DfTypeSignature::NullAry,
TypeSignature::NullAry => DfTypeSignature::Nullary,
}
}
}

View File

@@ -21,9 +21,8 @@ use common_recordbatch::SendableRecordBatchStream;
use datafusion::execution::context::TaskContext;
use datafusion::execution::SendableRecordBatchStream as DfSendableRecordBatchStream;
use datafusion::physical_expr::{EquivalenceProperties, Partitioning, PhysicalSortExpr};
use datafusion::physical_plan::{
DisplayAs, DisplayFormatType, ExecutionMode, ExecutionPlan, PlanProperties,
};
use datafusion::physical_plan::execution_plan::{Boundedness, EmissionType};
use datafusion::physical_plan::{DisplayAs, DisplayFormatType, ExecutionPlan, PlanProperties};
use datafusion_common::DataFusionError;
use datatypes::arrow::datatypes::SchemaRef as ArrowSchemaRef;
use datatypes::schema::SchemaRef;
@@ -53,7 +52,8 @@ impl StreamScanAdapter {
let properties = PlanProperties::new(
EquivalenceProperties::new(arrow_schema.clone()),
Partitioning::UnknownPartitioning(1),
ExecutionMode::Bounded,
EmissionType::Incremental,
Boundedness::Bounded,
);
Self {

View File

@@ -271,10 +271,7 @@ mod test {
fn unsupported_filter_op() {
// `+` is not supported
let expr = Expr::BinaryExpr(BinaryExpr {
left: Box::new(Expr::Column(Column {
relation: None,
name: "foo".to_string(),
})),
left: Box::new(Expr::Column(Column::from_name("foo"))),
op: Operator::Plus,
right: Box::new(Expr::Literal(ScalarValue::Int64(Some(1)))),
});
@@ -290,25 +287,16 @@ mod test {
// two column is not supported
let expr = Expr::BinaryExpr(BinaryExpr {
left: Box::new(Expr::Column(Column {
relation: None,
name: "foo".to_string(),
})),
left: Box::new(Expr::Column(Column::from_name("foo"))),
op: Operator::Eq,
right: Box::new(Expr::Column(Column {
relation: None,
name: "bar".to_string(),
})),
right: Box::new(Expr::Column(Column::from_name("bar"))),
});
assert!(SimpleFilterEvaluator::try_new(&expr).is_none());
// compound expr is not supported
let expr = Expr::BinaryExpr(BinaryExpr {
left: Box::new(Expr::BinaryExpr(BinaryExpr {
left: Box::new(Expr::Column(Column {
relation: None,
name: "foo".to_string(),
})),
left: Box::new(Expr::Column(Column::from_name("foo"))),
op: Operator::Eq,
right: Box::new(Expr::Literal(ScalarValue::Int64(Some(1)))),
})),
@@ -322,10 +310,7 @@ mod test {
fn supported_filter_op() {
// equal
let expr = Expr::BinaryExpr(BinaryExpr {
left: Box::new(Expr::Column(Column {
relation: None,
name: "foo".to_string(),
})),
left: Box::new(Expr::Column(Column::from_name("foo"))),
op: Operator::Eq,
right: Box::new(Expr::Literal(ScalarValue::Int64(Some(1)))),
});
@@ -335,10 +320,7 @@ mod test {
let expr = Expr::BinaryExpr(BinaryExpr {
left: Box::new(Expr::Literal(ScalarValue::Int64(Some(1)))),
op: Operator::Lt,
right: Box::new(Expr::Column(Column {
relation: None,
name: "foo".to_string(),
})),
right: Box::new(Expr::Column(Column::from_name("foo"))),
});
let evaluator = SimpleFilterEvaluator::try_new(&expr).unwrap();
assert_eq!(evaluator.op, Operator::Gt);
@@ -348,10 +330,7 @@ mod test {
#[test]
fn run_on_array() {
let expr = Expr::BinaryExpr(BinaryExpr {
left: Box::new(Expr::Column(Column {
relation: None,
name: "foo".to_string(),
})),
left: Box::new(Expr::Column(Column::from_name("foo"))),
op: Operator::Eq,
right: Box::new(Expr::Literal(ScalarValue::Int64(Some(1)))),
});
@@ -373,10 +352,7 @@ mod test {
#[test]
fn run_on_scalar() {
let expr = Expr::BinaryExpr(BinaryExpr {
left: Box::new(Expr::Column(Column {
relation: None,
name: "foo".to_string(),
})),
left: Box::new(Expr::Column(Column::from_name("foo"))),
op: Operator::Lt,
right: Box::new(Expr::Literal(ScalarValue::Int64(Some(1)))),
});

View File

@@ -29,6 +29,7 @@ datafusion-expr.workspace = true
datatypes.workspace = true
futures.workspace = true
object-store.workspace = true
object_store_opendal.workspace = true
serde = { version = "1.0", features = ["derive"] }
serde_json.workspace = true
snafu.workspace = true

View File

@@ -128,14 +128,6 @@ pub enum Error {
source: common_datasource::error::Error,
},
#[snafu(display("Failed to build csv config"))]
BuildCsvConfig {
#[snafu(source)]
error: common_datasource::file_format::csv::CsvConfigBuilderError,
#[snafu(implicit)]
location: Location,
},
#[snafu(display("Failed to build stream"))]
BuildStream {
#[snafu(source)]
@@ -224,8 +216,7 @@ impl ErrorExt for Error {
use Error::*;
match self {
BuildCsvConfig { .. }
| ProjectArrowSchema { .. }
ProjectArrowSchema { .. }
| ProjectSchema { .. }
| MissingRequiredField { .. }
| Unsupported { .. }

View File

@@ -14,17 +14,19 @@
use std::sync::Arc;
use common_datasource::file_format::csv::{CsvConfigBuilder, CsvFormat, CsvOpener};
use common_datasource::file_format::json::{JsonFormat, JsonOpener};
use common_datasource::file_format::csv::CsvFormat;
use common_datasource::file_format::json::JsonFormat;
use common_datasource::file_format::orc::{OrcFormat, OrcOpener};
use common_datasource::file_format::parquet::{DefaultParquetFileReaderFactory, ParquetFormat};
use common_datasource::file_format::Format;
use common_recordbatch::adapter::RecordBatchStreamAdapter;
use common_recordbatch::SendableRecordBatchStream;
use datafusion::common::{Statistics, ToDFSchema};
use datafusion::common::{Constraints, Statistics, ToDFSchema};
use datafusion::datasource::listing::PartitionedFile;
use datafusion::datasource::object_store::ObjectStoreUrl;
use datafusion::datasource::physical_plan::{FileOpener, FileScanConfig, FileStream, ParquetExec};
use datafusion::datasource::physical_plan::{
CsvConfig, CsvOpener, FileOpener, FileScanConfig, FileStream, JsonOpener, ParquetExec,
};
use datafusion::physical_expr::create_physical_expr;
use datafusion::physical_expr::execution_props::ExecutionProps;
use datafusion::physical_plan::metrics::ExecutionPlanMetricsSet;
@@ -48,20 +50,21 @@ fn build_csv_opener(
file_schema: Arc<ArrowSchema>,
config: &ScanPlanConfig,
format: &CsvFormat,
) -> Result<CsvOpener> {
let csv_config = CsvConfigBuilder::default()
.batch_size(DEFAULT_BATCH_SIZE)
.file_schema(file_schema)
.file_projection(config.projection.cloned())
.delimiter(format.delimiter)
.has_header(format.has_header)
.build()
.context(error::BuildCsvConfigSnafu)?;
Ok(CsvOpener::new(
csv_config,
config.store.clone(),
format.compression_type,
))
) -> CsvOpener {
let csv_config = Arc::new(CsvConfig::new(
DEFAULT_BATCH_SIZE,
file_schema,
config.projection.cloned(),
format.has_header,
format.delimiter,
b'"',
None,
Arc::new(object_store_opendal::OpendalStore::new(
config.store.clone(),
)),
None,
));
CsvOpener::new(csv_config, format.compression_type.into())
}
fn build_json_opener(
@@ -78,11 +81,12 @@ fn build_json_opener(
} else {
file_schema
};
let store = object_store_opendal::OpendalStore::new(config.store.clone());
Ok(JsonOpener::new(
DEFAULT_BATCH_SIZE,
projected_schema,
config.store.clone(),
format.compression_type,
format.compression_type.into(),
Arc::new(store),
))
}
@@ -115,6 +119,7 @@ fn build_record_batch_stream<T: FileOpener + Send + 'static>(
limit,
table_partition_cols: vec![],
output_ordering: vec![],
constraints: Constraints::empty(),
},
0, // partition: hard-code
opener,
@@ -132,7 +137,7 @@ fn new_csv_stream(
format: &CsvFormat,
) -> Result<SendableRecordBatchStream> {
let file_schema = config.file_schema.arrow_schema().clone();
let opener = build_csv_opener(file_schema.clone(), config, format)?;
let opener = build_csv_opener(file_schema.clone(), config, format);
// push down limit only if there is no filter
let limit = config.filters.is_empty().then_some(config.limit).flatten();
build_record_batch_stream(opener, file_schema, config.files, config.projection, limit)
@@ -173,6 +178,7 @@ fn new_parquet_stream_with_exec_plan(
.iter()
.map(|filename| PartitionedFile::new(filename.to_string(), 0))
.collect::<Vec<_>>()],
constraints: Constraints::empty(),
statistics: Statistics::new_unknown(file_schema.as_ref()),
projection: projection.cloned(),
limit: *limit,

View File

@@ -39,7 +39,8 @@ use datafusion_common::tree_node::{
use datafusion_common::{Column, DFSchema, ScalarValue};
use datafusion_expr::utils::merge_schema;
use datafusion_expr::{
BinaryExpr, Expr, Operator, Projection, ScalarUDFImpl, Signature, TypeSignature, Volatility,
BinaryExpr, ColumnarValue, Expr, Operator, Projection, ScalarFunctionArgs, ScalarUDFImpl,
Signature, TypeSignature, Volatility,
};
use query::parser::QueryLanguageParser;
use query::query_engine::DefaultSerializer;
@@ -518,10 +519,10 @@ impl ScalarUDFImpl for TumbleExpand {
})
}
fn invoke(
fn invoke_with_args(
&self,
_args: &[datafusion_expr::ColumnarValue],
) -> Result<datafusion_expr::ColumnarValue, DataFusionError> {
_args: ScalarFunctionArgs,
) -> datafusion_common::Result<ColumnarValue> {
Err(DataFusionError::Plan(
"This function should not be executed by datafusion".to_string(),
))

View File

@@ -21,6 +21,7 @@ use common_telemetry::debug;
use datafusion::execution::SessionStateBuilder;
use datafusion::functions::all_default_functions;
use datafusion_physical_expr::PhysicalExpr;
use datafusion_substrait::logical_plan::consumer::DefaultSubstraitConsumer;
use datatypes::data_type::ConcreteDataType as CDT;
use snafu::{ensure, OptionExt, ResultExt};
use substrait_proto::proto::expression::field_reference::ReferenceType::DirectReference;
@@ -88,15 +89,13 @@ pub(crate) async fn from_scalar_fn_to_df_fn_impl(
};
let schema = input_schema.to_df_schema()?;
let df_expr = substrait::df_logical_plan::consumer::from_substrait_rex(
&SessionStateBuilder::new()
.with_scalar_functions(all_default_functions())
.build(),
&e,
&schema,
&extensions.to_extensions(),
)
.await;
let extensions = extensions.to_extensions();
let session_state = SessionStateBuilder::new()
.with_scalar_functions(all_default_functions())
.build();
let consumer = DefaultSubstraitConsumer::new(&extensions, &session_state);
let df_expr =
substrait::df_logical_plan::consumer::from_substrait_rex(&consumer, &e, &schema).await;
let expr = df_expr.context({
DatafusionSnafu {
context: "Failed to convert substrait scalar function to datafusion scalar function",

View File

@@ -500,7 +500,8 @@ pub fn check_permission(
Statement::ShowCharset(_) | Statement::ShowCollation(_) => {}
Statement::Insert(insert) => {
validate_param(insert.table_name(), query_ctx)?;
let name = insert.table_name().context(ParseSqlSnafu)?;
validate_param(name, query_ctx)?;
}
Statement::CreateTable(stmt) => {
validate_param(&stmt.name, query_ctx)?;

View File

@@ -606,10 +606,7 @@ mod tests {
for i in 0..100 {
let timestamps: Vec<_> = (0..10).map(|v| i as i64 * 1000 + v).collect();
let expr = Expr::BinaryExpr(BinaryExpr {
left: Box::new(Expr::Column(Column {
relation: None,
name: "k1".to_string(),
})),
left: Box::new(Expr::Column(Column::from_name("k1"))),
op: Operator::Eq,
right: Box::new(Expr::Literal(ScalarValue::UInt32(Some(i)))),
});

View File

@@ -220,10 +220,7 @@ mod tests {
fn create_filter(column_name: &str, value: &str) -> SimpleFilterEvaluator {
let expr = Expr::BinaryExpr(BinaryExpr {
left: Box::new(Expr::Column(Column {
relation: None,
name: column_name.to_string(),
})),
left: Box::new(Expr::Column(Column::from_name(column_name))),
op: Operator::Eq,
right: Box::new(Expr::Literal(ScalarValue::Utf8(Some(value.to_string())))),
});

View File

@@ -287,10 +287,7 @@ mod tests {
}
fn column(name: &str) -> Expr {
Expr::Column(Column {
relation: None,
name: name.to_string(),
})
Expr::Column(Column::from_name(name))
}
fn string_lit(s: impl Into<String>) -> Expr {

View File

@@ -300,10 +300,7 @@ mod tests {
let func = ScalarFunction {
args: vec![
Expr::Column(Column {
name: "text".to_string(),
relation: None,
}),
Expr::Column(Column::from_name("text")),
Expr::Literal(ScalarValue::Utf8(Some("foo".to_string()))),
],
func: matches_func(),
@@ -320,10 +317,7 @@ mod tests {
let metadata = mock_metadata();
let func = ScalarFunction {
args: vec![Expr::Column(Column {
name: "text".to_string(),
relation: None,
})],
args: vec![Expr::Column(Column::from_name("text"))],
func: matches_func(),
};
@@ -336,10 +330,7 @@ mod tests {
let func = ScalarFunction {
args: vec![
Expr::Column(Column {
name: "not_found".to_string(),
relation: None,
}),
Expr::Column(Column::from_name("not_found")),
Expr::Literal(ScalarValue::Utf8(Some("foo".to_string()))),
],
func: matches_func(),
@@ -354,10 +345,7 @@ mod tests {
let func = ScalarFunction {
args: vec![
Expr::Column(Column {
name: "ts".to_string(),
relation: None,
}),
Expr::Column(Column::from_name("ts")),
Expr::Literal(ScalarValue::Utf8(Some("foo".to_string()))),
],
func: matches_func(),
@@ -372,10 +360,7 @@ mod tests {
let func = ScalarFunction {
args: vec![
Expr::Column(Column {
name: "text".to_string(),
relation: None,
}),
Expr::Column(Column::from_name("text")),
Expr::Literal(ScalarValue::Int64(Some(42))),
],
func: matches_func(),
@@ -390,10 +375,7 @@ mod tests {
let func = ScalarFunction {
args: vec![
Expr::Column(Column {
name: "text".to_string(),
relation: None,
}),
Expr::Column(Column::from_name("text")),
Expr::Literal(ScalarValue::Utf8(Some("foo".to_string()))),
],
func: matches_term_func(),
@@ -416,10 +398,7 @@ mod tests {
let metadata = mock_metadata();
let lower_func_expr = ScalarFunction {
args: vec![Expr::Column(Column {
name: "text".to_string(),
relation: None,
})],
args: vec![Expr::Column(Column::from_name("text"))],
func: lower(),
};
@@ -448,10 +427,7 @@ mod tests {
let metadata = mock_metadata();
let func = ScalarFunction {
args: vec![Expr::Column(Column {
name: "text".to_string(),
relation: None,
})],
args: vec![Expr::Column(Column::from_name("text"))],
func: matches_term_func(),
};
@@ -464,10 +440,7 @@ mod tests {
let func = ScalarFunction {
args: vec![
Expr::Column(Column {
name: "text".to_string(),
relation: None,
}),
Expr::Column(Column::from_name("text")),
Expr::Literal(ScalarValue::Utf8(Some("foo".to_string()))),
],
func: matches_func(), // Using 'matches' instead of 'matches_term'
@@ -479,10 +452,7 @@ mod tests {
#[test]
fn test_extract_lower_arg() {
let func = ScalarFunction {
args: vec![Expr::Column(Column {
name: "text".to_string(),
relation: None,
})],
args: vec![Expr::Column(Column::from_name("text"))],
func: lower(),
};
@@ -498,10 +468,7 @@ mod tests {
#[test]
fn test_extract_lower_arg_wrong_function() {
let func = ScalarFunction {
args: vec![Expr::Column(Column {
name: "text".to_string(),
relation: None,
})],
args: vec![Expr::Column(Column::from_name("text"))],
func: matches_func(), // Not 'lower'
};
@@ -515,10 +482,7 @@ mod tests {
// Create a matches expression
let matches_expr = Expr::ScalarFunction(ScalarFunction {
args: vec![
Expr::Column(Column {
name: "text".to_string(),
relation: None,
}),
Expr::Column(Column::from_name("text")),
Expr::Literal(ScalarValue::Utf8(Some("foo".to_string()))),
],
func: matches_func(),
@@ -541,10 +505,7 @@ mod tests {
// Create a matches expression
let matches_expr = Expr::ScalarFunction(ScalarFunction {
args: vec![
Expr::Column(Column {
name: "text".to_string(),
relation: None,
}),
Expr::Column(Column::from_name("text")),
Expr::Literal(ScalarValue::Utf8(Some("foo".to_string()))),
],
func: matches_func(),
@@ -553,10 +514,7 @@ mod tests {
// Create a matches_term expression
let matches_term_expr = Expr::ScalarFunction(ScalarFunction {
args: vec![
Expr::Column(Column {
name: "text".to_string(),
relation: None,
}),
Expr::Column(Column::from_name("text")),
Expr::Literal(ScalarValue::Utf8(Some("bar".to_string()))),
],
func: matches_term_func(),

View File

@@ -287,31 +287,19 @@ mod tests {
}
pub(crate) fn tag_column() -> Expr {
Expr::Column(Column {
relation: None,
name: "a".to_string(),
})
Expr::Column(Column::from_name("a"))
}
pub(crate) fn tag_column2() -> Expr {
Expr::Column(Column {
relation: None,
name: "b".to_string(),
})
Expr::Column(Column::from_name("b"))
}
pub(crate) fn field_column() -> Expr {
Expr::Column(Column {
relation: None,
name: "c".to_string(),
})
Expr::Column(Column::from_name("c"))
}
pub(crate) fn nonexistent_column() -> Expr {
Expr::Column(Column {
relation: None,
name: "nonexistent".to_string(),
})
Expr::Column(Column::from_name("nonexistence"))
}
pub(crate) fn string_lit(s: impl Into<String>) -> Expr {

View File

@@ -342,10 +342,7 @@ mod tests {
// Predicate
let predicate = Some(Predicate::new(vec![Expr::BinaryExpr(BinaryExpr {
left: Box::new(Expr::Column(Column {
relation: None,
name: "tag_0".to_string(),
})),
left: Box::new(Expr::Column(Column::from_name("tag_0"))),
op: Operator::Eq,
right: Box::new(Expr::Literal(ScalarValue::Utf8(Some("a".to_string())))),
})]));
@@ -435,10 +432,7 @@ mod tests {
// Predicate
let predicate = Some(Predicate::new(vec![Expr::BinaryExpr(BinaryExpr {
left: Box::new(Expr::Column(Column {
relation: None,
name: "field_0".to_string(),
})),
left: Box::new(Expr::Column(Column::from_name("field_0"))),
op: Operator::GtEq,
right: Box::new(Expr::Literal(ScalarValue::UInt64(Some(150)))),
})]));

View File

@@ -45,6 +45,7 @@ meter-core.workspace = true
meter-macros.workspace = true
moka.workspace = true
object-store.workspace = true
object_store_opendal.workspace = true
partition.workspace = true
prometheus.workspace = true
query.workspace = true

View File

@@ -514,14 +514,6 @@ pub enum Error {
source: common_datasource::error::Error,
},
#[snafu(display("Failed to build csv config"))]
BuildCsvConfig {
#[snafu(source)]
error: common_datasource::file_format::csv::CsvConfigBuilderError,
#[snafu(implicit)]
location: Location,
},
#[snafu(display("Failed to write stream to path: {}", path))]
WriteStreamToFile {
path: String,
@@ -825,7 +817,6 @@ impl ErrorExt for Error {
| Error::ColumnNotFound { .. }
| Error::BuildRegex { .. }
| Error::InvalidSchema { .. }
| Error::BuildCsvConfig { .. }
| Error::ProjectSchema { .. }
| Error::UnsupportedFormat { .. }
| Error::ColumnNoneDefaultValue { .. }

View File

@@ -63,7 +63,8 @@ impl<'a> StatementToRegion<'a> {
stmt: &Insert,
query_ctx: &QueryContextRef,
) -> Result<(InstantAndNormalInsertRequests, TableInfoRef)> {
let (catalog, schema, table_name) = self.get_full_name(stmt.table_name())?;
let name = stmt.table_name().context(ParseSqlSnafu)?;
let (catalog, schema, table_name) = self.get_full_name(name)?;
let table = self.get_table(&catalog, &schema, &table_name).await?;
let table_schema = table.schema();
let table_info = table.table_info();

View File

@@ -18,8 +18,8 @@ use std::sync::Arc;
use client::{Output, OutputData, OutputMeta};
use common_base::readable_size::ReadableSize;
use common_datasource::file_format::csv::{CsvConfigBuilder, CsvFormat, CsvOpener};
use common_datasource::file_format::json::{JsonFormat, JsonOpener};
use common_datasource::file_format::csv::CsvFormat;
use common_datasource::file_format::json::JsonFormat;
use common_datasource::file_format::orc::{infer_orc_schema, new_orc_stream_reader, ReaderAdapter};
use common_datasource::file_format::{FileFormat, Format};
use common_datasource::lister::{Lister, Source};
@@ -31,11 +31,13 @@ use common_recordbatch::DfSendableRecordBatchStream;
use common_telemetry::{debug, tracing};
use datafusion::datasource::listing::PartitionedFile;
use datafusion::datasource::object_store::ObjectStoreUrl;
use datafusion::datasource::physical_plan::{FileOpener, FileScanConfig, FileStream};
use datafusion::datasource::physical_plan::{
CsvConfig, CsvOpener, FileOpener, FileScanConfig, FileStream, JsonOpener,
};
use datafusion::parquet::arrow::arrow_reader::ArrowReaderMetadata;
use datafusion::parquet::arrow::ParquetRecordBatchStreamBuilder;
use datafusion::physical_plan::metrics::ExecutionPlanMetricsSet;
use datafusion_common::Statistics;
use datafusion_common::{Constraints, Statistics};
use datafusion_expr::Expr;
use datatypes::arrow::compute::can_cast_types;
use datatypes::arrow::datatypes::{Schema, SchemaRef};
@@ -210,6 +212,7 @@ impl StatementExecutor {
limit: None,
table_partition_cols: vec![],
output_ordering: vec![],
constraints: Constraints::empty(),
},
0,
opener,
@@ -239,16 +242,23 @@ impl StatementExecutor {
.project(&projection)
.context(error::ProjectSchemaSnafu)?,
);
let csv_conf = CsvConfigBuilder::default()
.batch_size(DEFAULT_BATCH_SIZE)
.file_schema(schema.clone())
.file_projection(Some(projection.clone()))
.build()
.context(error::BuildCsvConfigSnafu)?;
let csv_config = Arc::new(CsvConfig::new(
DEFAULT_BATCH_SIZE,
schema.clone(),
Some(projection.clone()),
format.has_header,
format.delimiter,
b'"',
None,
Arc::new(object_store_opendal::OpendalStore::new(
object_store.clone(),
)),
None,
));
let stream = self
.build_file_stream(
CsvOpener::new(csv_conf, object_store.clone(), format.compression_type),
CsvOpener::new(csv_config, format.compression_type.into()),
path,
schema.clone(),
)
@@ -275,13 +285,14 @@ impl StatementExecutor {
.project(&projection)
.context(error::ProjectSchemaSnafu)?,
);
let store = object_store_opendal::OpendalStore::new(object_store.clone());
let stream = self
.build_file_stream(
JsonOpener::new(
DEFAULT_BATCH_SIZE,
projected_file_schema,
object_store.clone(),
format.compression_type,
format.compression_type.into(),
Arc::new(store),
),
path,
schema.clone(),

View File

@@ -147,6 +147,7 @@ pub fn validate_client_encoding(set: SetVariables) -> Result<()> {
| Expr::Identifier(Ident {
value: x,
quote_style: _,
span: _,
}) => x.to_uppercase(),
_ => {
return InvalidSqlSnafu {
@@ -203,6 +204,7 @@ fn try_parse_datestyle(expr: &Expr) -> Result<(Option<PGDateTimeStyle>, Option<P
Expr::Identifier(Ident {
value: s,
quote_style: _,
span: _,
})
| Expr::Value(Value::SingleQuotedString(s))
| Expr::Value(Value::DoubleQuotedString(s)) => {

View File

@@ -30,10 +30,11 @@ use datafusion::error::DataFusionError;
use datafusion::execution::context::{SessionState, TaskContext};
use datafusion::logical_expr::{ExprSchemable, LogicalPlan, UserDefinedLogicalNodeCore};
use datafusion::physical_expr::{EquivalenceProperties, PhysicalExprRef};
use datafusion::physical_plan::execution_plan::{Boundedness, EmissionType};
use datafusion::physical_plan::metrics::{BaselineMetrics, ExecutionPlanMetricsSet, MetricsSet};
use datafusion::physical_plan::{
DisplayAs, DisplayFormatType, ExecutionMode, ExecutionPlan, Partitioning, PlanProperties,
RecordBatchStream, SendableRecordBatchStream,
DisplayAs, DisplayFormatType, ExecutionPlan, Partitioning, PlanProperties, RecordBatchStream,
SendableRecordBatchStream,
};
use datafusion::physical_planner::PhysicalPlanner;
use datafusion::prelude::{col, lit, Expr};
@@ -112,7 +113,8 @@ impl EmptyMetric {
let properties = Arc::new(PlanProperties::new(
EquivalenceProperties::new(result_schema.clone()),
Partitioning::UnknownPartitioning(1),
ExecutionMode::Bounded,
EmissionType::Incremental,
Boundedness::Bounded,
));
Ok(Arc::new(EmptyMetricExec {
start: self.start,

View File

@@ -30,6 +30,7 @@ use datafusion::error::{DataFusionError, Result as DataFusionResult};
use datafusion::execution::TaskContext;
use datafusion::logical_expr::{LogicalPlan, UserDefinedLogicalNodeCore};
use datafusion::physical_expr::{EquivalenceProperties, LexRequirement, PhysicalSortRequirement};
use datafusion::physical_plan::execution_plan::{Boundedness, EmissionType};
use datafusion::physical_plan::expressions::{CastExpr as PhyCast, Column as PhyColumn};
use datafusion::physical_plan::metrics::{BaselineMetrics, ExecutionPlanMetricsSet, MetricsSet};
use datafusion::physical_plan::{
@@ -182,7 +183,8 @@ impl HistogramFold {
let properties = PlanProperties::new(
EquivalenceProperties::new(output_schema.clone()),
Partitioning::UnknownPartitioning(1),
exec_input.properties().execution_mode(),
EmissionType::Incremental,
Boundedness::Bounded,
);
Arc::new(HistogramFoldExec {
le_column_index,
@@ -728,7 +730,6 @@ mod test {
use datafusion::arrow::datatypes::{Field, Schema};
use datafusion::common::ToDFSchema;
use datafusion::physical_plan::memory::MemoryExec;
use datafusion::physical_plan::ExecutionMode;
use datafusion::prelude::SessionContext;
use datatypes::arrow_array::StringArray;
@@ -806,7 +807,8 @@ mod test {
let properties = PlanProperties::new(
EquivalenceProperties::new(output_schema.clone()),
Partitioning::UnknownPartitioning(1),
ExecutionMode::Bounded,
EmissionType::Incremental,
Boundedness::Bounded,
);
let fold_exec = Arc::new(HistogramFoldExec {
le_column_index: 1,

View File

@@ -158,10 +158,12 @@ impl RangeManipulate {
pub fn to_execution_plan(&self, exec_input: Arc<dyn ExecutionPlan>) -> Arc<dyn ExecutionPlan> {
let output_schema: SchemaRef = SchemaRef::new(self.output_schema.as_ref().into());
let properties = exec_input.properties();
let properties = PlanProperties::new(
EquivalenceProperties::new(output_schema.clone()),
exec_input.properties().partitioning.clone(),
exec_input.properties().execution_mode,
properties.partitioning.clone(),
properties.emission_type,
properties.boundedness,
);
Arc::new(RangeManipulateExec {
start: self.start,
@@ -336,10 +338,12 @@ impl ExecutionPlan for RangeManipulateExec {
) -> DataFusionResult<Arc<dyn ExecutionPlan>> {
assert!(!children.is_empty());
let exec_input = children[0].clone();
let properties = exec_input.properties();
let properties = PlanProperties::new(
EquivalenceProperties::new(self.output_schema.clone()),
exec_input.properties().partitioning.clone(),
exec_input.properties().execution_mode,
properties.partitioning.clone(),
properties.emission_type,
properties.boundedness,
);
Ok(Arc::new(Self {
start: self.start,
@@ -625,8 +629,8 @@ mod test {
};
use datafusion::common::ToDFSchema;
use datafusion::physical_expr::Partitioning;
use datafusion::physical_plan::execution_plan::{Boundedness, EmissionType};
use datafusion::physical_plan::memory::MemoryExec;
use datafusion::physical_plan::ExecutionMode;
use datafusion::prelude::SessionContext;
use datatypes::arrow::array::TimestampMillisecondArray;
@@ -685,7 +689,8 @@ mod test {
let properties = PlanProperties::new(
EquivalenceProperties::new(manipulate_output_schema.clone()),
Partitioning::UnknownPartitioning(1),
ExecutionMode::Bounded,
EmissionType::Incremental,
Boundedness::Bounded,
);
let normalize_exec = Arc::new(RangeManipulateExec {
start,

View File

@@ -128,10 +128,12 @@ impl ScalarCalculate {
.index_of(&self.field_column)
.map_err(|e| DataFusionError::ArrowError(e, None))?;
let schema = Arc::new(Schema::new(fields));
let properties = exec_input.properties();
let properties = PlanProperties::new(
EquivalenceProperties::new(schema.clone()),
Partitioning::UnknownPartitioning(1),
exec_input.properties().execution_mode,
properties.emission_type,
properties.boundedness,
);
Ok(Arc::new(ScalarCalculateExec {
start: self.start,
@@ -533,8 +535,8 @@ impl Stream for ScalarCalculateStream {
#[cfg(test)]
mod test {
use datafusion::arrow::datatypes::{DataType, Field, Schema};
use datafusion::physical_plan::execution_plan::{Boundedness, EmissionType};
use datafusion::physical_plan::memory::MemoryExec;
use datafusion::physical_plan::ExecutionMode;
use datafusion::prelude::SessionContext;
use datatypes::arrow::array::{Float64Array, TimestampMillisecondArray};
use datatypes::arrow::datatypes::TimeUnit;
@@ -560,7 +562,8 @@ mod test {
let properties = PlanProperties::new(
EquivalenceProperties::new(schema.clone()),
Partitioning::UnknownPartitioning(1),
ExecutionMode::Bounded,
EmissionType::Incremental,
Boundedness::Bounded,
);
let scalar_exec = Arc::new(ScalarCalculateExec {
start: 0,

View File

@@ -26,10 +26,11 @@ use datafusion::error::{DataFusionError, Result as DataFusionResult};
use datafusion::execution::context::TaskContext;
use datafusion::logical_expr::{Expr, LogicalPlan, UserDefinedLogicalNodeCore};
use datafusion::physical_expr::EquivalenceProperties;
use datafusion::physical_plan::execution_plan::{Boundedness, EmissionType};
use datafusion::physical_plan::metrics::{BaselineMetrics, ExecutionPlanMetricsSet, MetricsSet};
use datafusion::physical_plan::{
hash_utils, DisplayAs, DisplayFormatType, Distribution, ExecutionMode, ExecutionPlan,
Partitioning, PlanProperties, RecordBatchStream, SendableRecordBatchStream,
hash_utils, DisplayAs, DisplayFormatType, Distribution, ExecutionPlan, Partitioning,
PlanProperties, RecordBatchStream, SendableRecordBatchStream,
};
use datatypes::arrow::compute;
use futures::future::BoxFuture;
@@ -95,7 +96,8 @@ impl UnionDistinctOn {
let properties = Arc::new(PlanProperties::new(
EquivalenceProperties::new(output_schema.clone()),
Partitioning::UnknownPartitioning(1),
ExecutionMode::Bounded,
EmissionType::Incremental,
Boundedness::Bounded,
));
Arc::new(UnionDistinctOnExec {
left: left_exec,

View File

@@ -74,6 +74,7 @@ impl Round {
#[cfg(test)]
mod tests {
use datafusion_expr::ScalarFunctionArgs;
use datatypes::arrow::array::Float64Array;
use super::*;
@@ -81,7 +82,12 @@ mod tests {
fn test_round_f64(value: Vec<f64>, nearest: f64, expected: Vec<f64>) {
let round_udf = Round::scalar_udf(nearest);
let input = vec![ColumnarValue::Array(Arc::new(Float64Array::from(value)))];
let result = round_udf.invoke_batch(&input, 1).unwrap();
let args = ScalarFunctionArgs {
args: input,
number_rows: 1,
return_type: &DataType::Float64,
};
let result = round_udf.invoke_with_args(args).unwrap();
let result_array = extract_array(&result).unwrap();
assert_eq!(result_array.len(), 1);
assert_eq!(

View File

@@ -17,6 +17,8 @@ use std::sync::Arc;
use datafusion::arrow::array::Float64Array;
use datafusion::logical_expr::ScalarUDF;
use datafusion::physical_plan::ColumnarValue;
use datafusion_expr::ScalarFunctionArgs;
use datatypes::arrow::datatypes::DataType;
use crate::functions::extract_array;
use crate::range_array::RangeArray;
@@ -33,14 +35,19 @@ pub fn simple_range_udf_runner(
ColumnarValue::Array(Arc::new(input_ts.into_dict())),
ColumnarValue::Array(Arc::new(input_value.into_dict())),
];
let eval_result: Vec<Option<f64>> =
extract_array(&range_fn.invoke_batch(&input, num_rows).unwrap())
.unwrap()
.as_any()
.downcast_ref::<Float64Array>()
.unwrap()
.iter()
.collect();
let args = ScalarFunctionArgs {
args: input,
number_rows: num_rows,
return_type: &DataType::Float64,
};
let value = range_fn.invoke_with_args(args).unwrap();
let eval_result: Vec<Option<f64>> = extract_array(&value)
.unwrap()
.as_any()
.downcast_ref::<Float64Array>()
.unwrap()
.iter()
.collect();
assert_eq!(eval_result.len(), expected.len());
assert!(eval_result
.iter()

View File

@@ -30,7 +30,7 @@ use datafusion::execution::TaskContext;
use datafusion::physical_plan::coalesce_partitions::CoalescePartitionsExec;
use datafusion::physical_plan::stream::RecordBatchStreamAdapter;
use datafusion::physical_plan::{
accept, DisplayAs, DisplayFormatType, ExecutionPlan, ExecutionPlanProperties, PlanProperties,
accept, DisplayAs, DisplayFormatType, ExecutionPlan, PlanProperties,
};
use datafusion_common::tree_node::{TreeNode, TreeNodeRecursion};
use datafusion_common::{internal_err, DataFusionError};
@@ -76,8 +76,13 @@ impl DistAnalyzeExec {
fn compute_properties(input: &Arc<dyn ExecutionPlan>, schema: SchemaRef) -> PlanProperties {
let eq_properties = EquivalenceProperties::new(schema);
let output_partitioning = Partitioning::UnknownPartitioning(1);
let exec_mode = input.execution_mode();
PlanProperties::new(eq_properties, output_partitioning, exec_mode)
let properties = input.properties();
PlanProperties::new(
eq_properties,
output_partitioning,
properties.emission_type,
properties.boundedness,
)
}
}

View File

@@ -50,9 +50,9 @@ use crate::dataframe::DataFrame;
pub use crate::datafusion::planner::DfContextProviderAdapter;
use crate::dist_plan::MergeScanLogicalPlan;
use crate::error::{
CatalogSnafu, ConvertSchemaSnafu, CreateRecordBatchSnafu, MissingTableMutationHandlerSnafu,
MissingTimestampColumnSnafu, QueryExecutionSnafu, Result, TableMutationSnafu,
TableNotFoundSnafu, TableReadOnlySnafu, UnsupportedExprSnafu,
CatalogSnafu, ConvertSchemaSnafu, CreateRecordBatchSnafu, DataFusionSnafu,
MissingTableMutationHandlerSnafu, MissingTimestampColumnSnafu, QueryExecutionSnafu, Result,
TableMutationSnafu, TableNotFoundSnafu, TableReadOnlySnafu, UnsupportedExprSnafu,
};
use crate::executor::QueryExecutor;
use crate::metrics::{OnDone, QUERY_STAGE_ELAPSED};
@@ -309,9 +309,7 @@ impl DatafusionQueryEngine {
.query_planner()
.create_physical_plan(&optimized_plan, state)
.await
.context(error::DatafusionSnafu)
.map_err(BoxedError::new)
.context(QueryExecutionSnafu)?;
.context(DataFusionSnafu)?;
Ok(physical_plan)
}

View File

@@ -30,11 +30,12 @@ use common_recordbatch::{
};
use common_telemetry::tracing_context::TracingContext;
use datafusion::execution::{SessionState, TaskContext};
use datafusion::physical_plan::execution_plan::{Boundedness, EmissionType};
use datafusion::physical_plan::metrics::{
Count, ExecutionPlanMetricsSet, Gauge, MetricBuilder, MetricsSet, Time,
};
use datafusion::physical_plan::{
DisplayAs, DisplayFormatType, ExecutionMode, ExecutionPlan, Partitioning, PlanProperties,
DisplayAs, DisplayFormatType, ExecutionPlan, Partitioning, PlanProperties,
};
use datafusion_common::{Column as ColumnExpr, Result};
use datafusion_expr::{Expr, Extension, LogicalPlan, UserDefinedLogicalNodeCore};
@@ -222,7 +223,12 @@ impl MergeScanExec {
.collect();
let partitioning = Partitioning::Hash(partition_exprs, target_partition);
let properties = PlanProperties::new(eq_properties, partitioning, ExecutionMode::Bounded);
let properties = PlanProperties::new(
eq_properties,
partitioning,
EmissionType::Incremental,
Boundedness::Bounded,
);
let schema = Self::arrow_schema_to_schema(arrow_schema.clone())?;
Ok(Self {
table,
@@ -387,7 +393,8 @@ impl MergeScanExec {
properties: PlanProperties::new(
self.properties.eq_properties.clone(),
Partitioning::Hash(hash_exprs, self.target_partition),
self.properties.execution_mode,
self.properties.emission_type,
self.properties.boundedness,
),
sub_stage_metrics: self.sub_stage_metrics.clone(),
query_ctx: self.query_ctx.clone(),

View File

@@ -126,7 +126,7 @@ pub enum Error {
location: Location,
},
#[snafu(display("DataFusion error"))]
#[snafu(display(""))]
DataFusion {
#[snafu(source)]
error: DataFusionError,

View File

@@ -68,10 +68,12 @@ impl PartSortExec {
input: Arc<dyn ExecutionPlan>,
) -> Self {
let metrics = ExecutionPlanMetricsSet::new();
let properties = input.properties();
let properties = PlanProperties::new(
input.equivalence_properties().clone(),
input.output_partitioning().clone(),
input.execution_mode(),
properties.emission_type,
properties.boundedness,
);
Self {

View File

@@ -95,13 +95,13 @@ impl DfLogicalPlanner {
.await?;
let config_options = self.session_state.config().options();
let parser_options = &config_options.sql_parser;
let parser_options = ParserOptions {
enable_ident_normalization: config_options.sql_parser.enable_ident_normalization,
parse_float_as_decimal: config_options.sql_parser.parse_float_as_decimal,
support_varchar_with_length: config_options.sql_parser.support_varchar_with_length,
enable_options_value_normalization: config_options
.sql_parser
.enable_options_value_normalization,
enable_ident_normalization: parser_options.enable_ident_normalization,
parse_float_as_decimal: parser_options.parse_float_as_decimal,
support_varchar_with_length: parser_options.support_varchar_with_length,
enable_options_value_normalization: parser_options.enable_options_value_normalization,
collect_spans: parser_options.collect_spans,
};
let sql_to_rel = SqlToRel::new_with_options(&context_provider, parser_options);
@@ -143,13 +143,13 @@ impl DfLogicalPlanner {
.await?;
let config_options = self.session_state.config().options();
let parser_options = &config_options.sql_parser;
let parser_options = ParserOptions {
enable_ident_normalization: normalize_ident,
parse_float_as_decimal: config_options.sql_parser.parse_float_as_decimal,
support_varchar_with_length: config_options.sql_parser.support_varchar_with_length,
enable_options_value_normalization: config_options
.sql_parser
.enable_options_value_normalization,
parse_float_as_decimal: parser_options.parse_float_as_decimal,
support_varchar_with_length: parser_options.support_varchar_with_length,
enable_options_value_normalization: parser_options.enable_options_value_normalization,
collect_spans: parser_options.collect_spans,
};
let sql_to_rel = SqlToRel::new_with_options(&context_provider, parser_options);

View File

@@ -29,9 +29,10 @@ use datafusion::common::{Result as DataFusionResult, Statistics};
use datafusion::error::Result as DfResult;
use datafusion::execution::context::SessionState;
use datafusion::execution::TaskContext;
use datafusion::physical_plan::execution_plan::{Boundedness, EmissionType};
use datafusion::physical_plan::metrics::{BaselineMetrics, ExecutionPlanMetricsSet, MetricsSet};
use datafusion::physical_plan::{
DisplayAs, DisplayFormatType, ExecutionMode, ExecutionPlan, PlanProperties, RecordBatchStream,
DisplayAs, DisplayFormatType, ExecutionPlan, PlanProperties, RecordBatchStream,
SendableRecordBatchStream,
};
use datafusion::physical_planner::create_physical_sort_expr;
@@ -691,7 +692,8 @@ impl RangeSelect {
let cache = PlanProperties::new(
EquivalenceProperties::new(schema.clone()),
Partitioning::UnknownPartitioning(1),
ExecutionMode::Bounded,
EmissionType::Incremental,
Boundedness::Bounded,
);
Ok(Arc::new(RangeSelectExec {
input: exec_input,
@@ -1341,7 +1343,8 @@ mod test {
let cache = PlanProperties::new(
EquivalenceProperties::new(schema.clone()),
Partitioning::UnknownPartitioning(1),
ExecutionMode::Bounded,
EmissionType::Incremental,
Boundedness::Bounded,
);
let input_schema = memory_exec.schema().clone();
let range_select_exec = Arc::new(RangeSelectExec {

View File

@@ -492,7 +492,7 @@ impl RangePlanRewriter {
async fn get_index_by(&mut self, schema: &Arc<DFSchema>) -> Result<(Expr, Vec<Expr>)> {
let mut time_index_expr = Expr::Wildcard {
qualifier: None,
options: WildcardOptions::default(),
options: Box::new(WildcardOptions::default()),
};
let mut default_by = vec![];
for i in 0..schema.fields().len() {

View File

@@ -453,7 +453,7 @@ pub async fn show_index(
null().alias(INDEX_EXPRESSION_COLUMN),
Expr::Wildcard {
qualifier: None,
options: WildcardOptions::default(),
options: Box::new(WildcardOptions::default()),
},
];
@@ -793,10 +793,7 @@ pub async fn show_search_path(_query_ctx: QueryContextRef) -> Result<Output> {
pub fn show_create_database(database_name: &str, options: OptionMap) -> Result<Output> {
let stmt = CreateDatabase {
name: ObjectName(vec![Ident {
value: database_name.to_string(),
quote_style: None,
}]),
name: ObjectName(vec![Ident::new(database_name)]),
if_not_exists: true,
options,
};
@@ -999,10 +996,7 @@ pub fn show_create_flow(
let stmt = CreateFlow {
flow_name,
sink_table_name: ObjectName(vec![Ident {
value: flow_val.sink_table_name().table_name.clone(),
quote_style: None,
}]),
sink_table_name: ObjectName(vec![Ident::new(&flow_val.sink_table_name().table_name)]),
// notice we don't want `OR REPLACE` and `IF NOT EXISTS` in same sql since it's unclear what to do
// so we set `or_replace` to false.
or_replace: false,

View File

@@ -24,9 +24,8 @@ use arrow::array::{
use arrow_schema::{SchemaRef, TimeUnit};
use common_recordbatch::{DfRecordBatch, DfSendableRecordBatchStream};
use datafusion::execution::{RecordBatchStream, TaskContext};
use datafusion::physical_plan::{
DisplayAs, DisplayFormatType, ExecutionMode, ExecutionPlan, PlanProperties,
};
use datafusion::physical_plan::execution_plan::{Boundedness, EmissionType};
use datafusion::physical_plan::{DisplayAs, DisplayFormatType, ExecutionPlan, PlanProperties};
use datafusion_physical_expr::{EquivalenceProperties, Partitioning};
use futures::Stream;
@@ -58,7 +57,8 @@ impl MockInputExec {
properties: PlanProperties::new(
EquivalenceProperties::new(schema.clone()),
Partitioning::UnknownPartitioning(1),
ExecutionMode::Bounded,
EmissionType::Incremental,
Boundedness::Bounded,
),
input,
schema,

View File

@@ -117,13 +117,15 @@ impl WindowedSortExec {
) -> Result<Self> {
check_partition_range_monotonicity(&ranges, expression.options.descending)?;
let properties = input.properties();
let properties = PlanProperties::new(
input
.equivalence_properties()
.clone()
.with_reorder(LexOrdering::new(vec![expression.clone()])),
input.output_partitioning().clone(),
input.execution_mode(),
properties.emission_type,
properties.boundedness,
);
let mut all_avail_working_range = Vec::with_capacity(ranges.len());

View File

@@ -66,7 +66,7 @@ http-body = "1"
humantime.workspace = true
humantime-serde.workspace = true
hyper = { workspace = true, features = ["full"] }
indexmap = "2.7"
indexmap = "2.8"
influxdb_line_protocol = { git = "https://github.com/evenyag/influxdb_iox", branch = "feat/line-protocol" }
itertools.workspace = true
jsonb.workspace = true

View File

@@ -17,7 +17,7 @@ use sqlparser::ast::{Ident, Query};
use sqlparser::dialect::Dialect;
use sqlparser::keywords::Keyword;
use sqlparser::parser::{Parser, ParserError, ParserOptions};
use sqlparser::tokenizer::{Token, TokenWithLocation};
use sqlparser::tokenizer::{Token, TokenWithSpan};
use crate::ast::{Expr, ObjectName};
use crate::error::{self, Result, SyntaxSnafu};
@@ -112,7 +112,7 @@ impl ParserContext<'_> {
.try_with_sql(sql)
.context(SyntaxSnafu)?;
let function_name = parser.parse_identifier(false).context(SyntaxSnafu)?;
let function_name = parser.parse_identifier().context(SyntaxSnafu)?;
parser
.parse_function(ObjectName(vec![function_name]))
.context(SyntaxSnafu)
@@ -178,12 +178,12 @@ impl ParserContext<'_> {
Keyword::USE => {
let _ = self.parser.next_token();
let database_name = self.parser.parse_identifier(false).context(
let database_name = self.parser.parse_identifier().with_context(|_| {
error::UnexpectedSnafu {
expected: "a database name",
actual: self.peek_token_as_string(),
},
)?;
}
})?;
Ok(Statement::Use(
Self::canonicalize_identifier(database_name).value,
))
@@ -222,7 +222,7 @@ impl ParserContext<'_> {
}
// Report unexpected token
pub(crate) fn expected<T>(&self, expected: &str, found: TokenWithLocation) -> Result<T> {
pub(crate) fn expected<T>(&self, expected: &str, found: TokenWithSpan) -> Result<T> {
Err(ParserError::ParserError(format!(
"Expected {expected}, found: {found}",
)))
@@ -255,10 +255,7 @@ impl ParserContext<'_> {
if ident.quote_style.is_some() {
ident
} else {
Ident {
value: ident.value.to_lowercase(),
quote_style: None,
}
Ident::new(ident.value.to_lowercase())
}
}
@@ -280,14 +277,6 @@ impl ParserContext<'_> {
pub(crate) fn parse_object_name(&mut self) -> std::result::Result<ObjectName, ParserError> {
self.parser.parse_object_name(false)
}
/// Simply a shortcut for sqlparser's same name method `parse_identifier`,
/// but with constant argument "false".
/// Because the argument is always "false" for us (it's introduced by BigQuery),
/// we don't want to write it again and again.
pub(crate) fn parse_identifier(parser: &mut Parser) -> std::result::Result<Ident, ParserError> {
parser.parse_identifier(false)
}
}
#[cfg(test)]

View File

@@ -20,7 +20,7 @@ use snafu::{ensure, ResultExt};
use sqlparser::ast::Ident;
use sqlparser::keywords::Keyword;
use sqlparser::parser::{Parser, ParserError};
use sqlparser::tokenizer::{Token, TokenWithLocation};
use sqlparser::tokenizer::{Token, TokenWithSpan};
use crate::error::{self, InvalidColumnOptionSnafu, Result, SetFulltextOptionSnafu};
use crate::parser::ParserContext;
@@ -124,8 +124,7 @@ impl ParserContext<'_> {
.expect_keyword(Keyword::COLUMN)
.context(error::SyntaxSnafu)?;
let name = Self::canonicalize_identifier(
Self::parse_identifier(&mut self.parser)
.context(error::SyntaxSnafu)?,
self.parser.parse_identifier().context(error::SyntaxSnafu)?,
);
AlterTableOperation::DropColumn { name }
}
@@ -205,9 +204,7 @@ impl ParserContext<'_> {
.expect_keyword(Keyword::COLUMN)
.context(error::SyntaxSnafu)?;
let column_name = Self::canonicalize_identifier(
self.parser
.parse_identifier(false)
.context(error::SyntaxSnafu)?,
self.parser.parse_identifier().context(error::SyntaxSnafu)?,
);
match self.parser.peek_token().token {
@@ -240,7 +237,7 @@ impl ParserContext<'_> {
column_name: Ident,
) -> Result<AlterTableOperation> {
match self.parser.next_token() {
TokenWithLocation {
TokenWithSpan {
token: Token::Word(w),
..
} if w.keyword == Keyword::FULLTEXT => {
@@ -252,7 +249,7 @@ impl ParserContext<'_> {
})
}
TokenWithLocation {
TokenWithSpan {
token: Token::Word(w),
..
} if w.value.eq_ignore_ascii_case(INVERTED) => {
@@ -264,7 +261,7 @@ impl ParserContext<'_> {
})
}
TokenWithLocation {
TokenWithSpan {
token: Token::Word(w),
..
} if w.value.eq_ignore_ascii_case("SKIPPING") => {
@@ -288,7 +285,7 @@ impl ParserContext<'_> {
fn parse_alter_column_set_index(&mut self, column_name: Ident) -> Result<AlterTableOperation> {
match self.parser.next_token() {
TokenWithLocation {
TokenWithSpan {
token: Token::Word(w),
..
} if w.keyword == Keyword::FULLTEXT => {
@@ -298,7 +295,7 @@ impl ParserContext<'_> {
self.parse_alter_column_fulltext(column_name)
}
TokenWithLocation {
TokenWithSpan {
token: Token::Word(w),
..
} if w.value.eq_ignore_ascii_case(INVERTED) => {
@@ -310,7 +307,7 @@ impl ParserContext<'_> {
})
}
TokenWithLocation {
TokenWithSpan {
token: Token::Word(w),
..
} if w.value.eq_ignore_ascii_case("SKIPPING") => {
@@ -416,8 +413,7 @@ fn parse_add_columns(parser: &mut Parser) -> std::result::Result<AddColumn, Pars
} else if let Token::Word(word) = parser.peek_token().token {
if word.value.eq_ignore_ascii_case("AFTER") {
let _ = parser.next_token();
let name =
ParserContext::canonicalize_identifier(ParserContext::parse_identifier(parser)?);
let name = ParserContext::canonicalize_identifier(parser.parse_identifier()?);
Some(AddColumnLocation::After {
column_name: name.value,
})
@@ -810,7 +806,7 @@ mod tests {
target_type,
} => {
assert_eq!("a", column_name.value);
assert_eq!(DataType::Text, *target_type);
assert_eq!(DataType::MediumText, *target_type);
}
_ => unreachable!(),
}
@@ -1012,10 +1008,7 @@ mod tests {
alter_operation,
&AlterTableOperation::UnsetIndex {
options: UnsetIndexOperation::Fulltext {
column_name: Ident {
value: "a".to_string(),
quote_style: None
}
column_name: Ident::new("a"),
}
}
);
@@ -1079,10 +1072,7 @@ mod tests {
alter_operation,
&AlterTableOperation::UnsetIndex {
options: UnsetIndexOperation::Inverted {
column_name: Ident {
value: "a".to_string(),
quote_style: None
}
column_name: Ident::new("a"),
}
}
);

View File

@@ -25,7 +25,7 @@ use sqlparser::dialect::keywords::Keyword;
use sqlparser::keywords::ALL_KEYWORDS;
use sqlparser::parser::IsOptional::Mandatory;
use sqlparser::parser::{Parser, ParserError};
use sqlparser::tokenizer::{Token, TokenWithLocation, Word};
use sqlparser::tokenizer::{Token, TokenWithSpan, Word};
use table::requests::validate_table_option;
use crate::ast::{ColumnDef, Ident};
@@ -299,7 +299,7 @@ impl<'a> ParserContext<'a> {
let comment = if self.parser.parse_keyword(Keyword::COMMENT) {
match self.parser.next_token() {
TokenWithLocation {
TokenWithSpan {
token: Token::SingleQuotedString(value, ..),
..
} => Some(value),
@@ -496,10 +496,7 @@ impl<'a> ParserContext<'a> {
time_index_opt_idx = Some(index);
let constraint = TableConstraint::TimeIndex {
column: Ident {
value: column.name().value.clone(),
quote_style: None,
},
column: Ident::new(column.name().value.clone()),
};
constraints.push(constraint);
}
@@ -547,7 +544,7 @@ impl<'a> ParserContext<'a> {
/// Parse the column name and check if it's valid.
fn parse_column_name(&mut self) -> std::result::Result<Ident, ParserError> {
let name = self.parser.parse_identifier(false)?;
let name = self.parser.parse_identifier()?;
if name.quote_style.is_none() &&
// "ALL_KEYWORDS" are sorted.
ALL_KEYWORDS.binary_search(&name.value.to_uppercase().as_str()).is_ok()
@@ -587,7 +584,7 @@ impl<'a> ParserContext<'a> {
let mut extensions = ColumnExtensions::default();
loop {
if parser.parse_keyword(Keyword::CONSTRAINT) {
let name = Some(parser.parse_identifier(false).context(SyntaxSnafu)?);
let name = Some(parser.parse_identifier().context(SyntaxSnafu)?);
if let Some(option) = Self::parse_optional_column_option(parser)? {
options.push(ColumnOptionDef { name, option });
} else {
@@ -625,7 +622,7 @@ impl<'a> ParserContext<'a> {
Ok(Some(ColumnOption::NotNull))
} else if parser.parse_keywords(&[Keyword::COMMENT]) {
match parser.next_token() {
TokenWithLocation {
TokenWithSpan {
token: Token::SingleQuotedString(value, ..),
..
} => Ok(Some(ColumnOption::Comment(value))),
@@ -844,7 +841,7 @@ impl<'a> ParserContext<'a> {
fn parse_optional_table_constraint(&mut self) -> Result<Option<TableConstraint>> {
match self.parser.next_token() {
TokenWithLocation {
TokenWithSpan {
token: Token::Word(w),
..
} if w.keyword == Keyword::PRIMARY => {
@@ -864,7 +861,7 @@ impl<'a> ParserContext<'a> {
.collect();
Ok(Some(TableConstraint::PrimaryKey { columns }))
}
TokenWithLocation {
TokenWithSpan {
token: Token::Word(w),
..
} if w.keyword == Keyword::TIME => {
@@ -1313,20 +1310,8 @@ SELECT max(c1), min(c2) FROM schema_2.table_2;";
};
let expected = CreateFlow {
flow_name: ObjectName(vec![Ident {
value: "task_1".to_string(),
quote_style: None,
}]),
sink_table_name: ObjectName(vec![
Ident {
value: "schema_1".to_string(),
quote_style: None,
},
Ident {
value: "table_1".to_string(),
quote_style: None,
},
]),
flow_name: ObjectName(vec![Ident::new("task_1")]),
sink_table_name: ObjectName(vec![Ident::new("schema_1"), Ident::new("table_1")]),
or_replace: true,
if_not_exists: true,
expire_after: Some(300),
@@ -1825,7 +1810,7 @@ ENGINE=mito";
ParserContext::create_with_dialect(sql, &GreptimeDbDialect {}, ParseOptions::default());
assert_eq!(
result.unwrap_err().output_msg(),
"Invalid SQL, error: Partition rule expr Identifier(Ident { value: \"b\", quote_style: None }) is not a binary expr"
r#"Invalid SQL, error: Partition rule expr Identifier(Ident { value: "b", quote_style: None, span: Span(Location(4,5)..Location(4,6)) }) is not a binary expr"#
);
}

View File

@@ -24,7 +24,7 @@ impl ParserContext<'_> {
self.parser
.expect_keyword(Keyword::DEALLOCATE)
.context(SyntaxSnafu)?;
let stmt_name = self.parser.parse_identifier(false).context(SyntaxSnafu)?;
let stmt_name = self.parser.parse_identifier().context(SyntaxSnafu)?;
Ok(stmt_name.value)
}
}

View File

@@ -27,7 +27,7 @@ impl ParserContext<'_> {
self.parser
.expect_keyword(Keyword::EXECUTE)
.context(SyntaxSnafu)?;
let stmt_name = self.parser.parse_identifier(false).context(SyntaxSnafu)?;
let stmt_name = self.parser.parse_identifier().context(SyntaxSnafu)?;
if self.parser.parse_keyword(Keyword::USING) {
let param_list = self
.parser

View File

@@ -37,6 +37,7 @@ impl ParserContext<'_> {
#[cfg(test)]
mod tests {
use sqlparser::ast::helpers::attached_token::AttachedToken;
use sqlparser::ast::{
GroupByExpr, Query as SpQuery, Statement as SpStatement, WildcardAdditionalOptions,
};
@@ -69,6 +70,8 @@ mod tests {
partitions: vec![],
version: None,
with_ordinality: false,
json_path: None,
sample: None,
},
joins: vec![],
}],
@@ -86,6 +89,7 @@ mod tests {
prewhere: None,
window_before_qualify: false,
connect_by: None,
select_token: AttachedToken::empty(),
};
let sp_statement = SpStatement::Query(Box::new(SpQuery {
@@ -110,6 +114,7 @@ mod tests {
format: None,
query_plan: false,
options: None,
estimate: false,
})
.unwrap();

View File

@@ -26,7 +26,7 @@ impl ParserContext<'_> {
self.parser
.expect_keyword(Keyword::PREPARE)
.context(SyntaxSnafu)?;
let stmt_name = self.parser.parse_identifier(false).context(SyntaxSnafu)?;
let stmt_name = self.parser.parse_identifier().context(SyntaxSnafu)?;
self.parser
.expect_keyword(Keyword::FROM)
.context(SyntaxSnafu)?;

View File

@@ -38,10 +38,7 @@ impl ParserContext<'_> {
})),
SpStatement::SetTimeZone { value, .. } => Ok(Statement::SetVariables(SetVariables {
variable: ObjectName(vec![Ident {
value: "TIMEZONE".to_string(),
quote_style: None,
}]),
variable: ObjectName(vec![Ident::new("TIMEZONE")]),
value: vec![value],
})),

View File

@@ -295,7 +295,7 @@ impl ParserContext<'_> {
Keyword::LIKE => {
self.parser.next_token();
Ok(ShowKind::Like(
Self::parse_identifier(&mut self.parser).with_context(|_| {
self.parser.parse_identifier().with_context(|_| {
error::UnexpectedSnafu {
expected: "LIKE",
actual: self.peek_token_as_string(),
@@ -498,12 +498,12 @@ impl ParserContext<'_> {
))),
Token::Word(w) => match w.keyword {
Keyword::LIKE => Ok(Statement::ShowDatabases(ShowDatabases::new(
ShowKind::Like(Self::parse_identifier(&mut self.parser).with_context(
|_| error::UnexpectedSnafu {
ShowKind::Like(self.parser.parse_identifier().with_context(|_| {
error::UnexpectedSnafu {
expected: "LIKE",
actual: tok.to_string(),
},
)?),
}
})?),
full,
))),
Keyword::WHERE => Ok(Statement::ShowDatabases(ShowDatabases::new(
@@ -639,6 +639,7 @@ mod tests {
kind: ShowKind::Like(sqlparser::ast::Ident {
value: _,
quote_style: None,
span: _,
}),
..
})
@@ -698,6 +699,7 @@ mod tests {
kind: ShowKind::Like(sqlparser::ast::Ident {
value: _,
quote_style: None,
span: _,
}),
database: None,
full: false
@@ -716,6 +718,7 @@ mod tests {
kind: ShowKind::Like(sqlparser::ast::Ident {
value: _,
quote_style: None,
span: _,
}),
database: Some(_),
full: false
@@ -806,6 +809,7 @@ mod tests {
kind: ShowKind::Like(sqlparser::ast::Ident {
value: _,
quote_style: None,
span: _,
}),
database: None,
full: true
@@ -824,6 +828,7 @@ mod tests {
kind: ShowKind::Like(sqlparser::ast::Ident {
value: _,
quote_style: None,
span: _,
}),
database: Some(_),
full: true

View File

@@ -217,7 +217,7 @@ impl ParserContext<'_> {
while matches!(parser.peek_token().token, Token::Comma) {
let _skip_token = parser.next_token();
}
let index = parser.next_token().location.column as usize;
let index = parser.next_token().span.start.column as usize;
if index == 0 {
return Err(ParserError::ParserError("empty TQL query".to_string()));
}

View File

@@ -281,12 +281,7 @@ pub fn sql_value_to_value(
if let Some(unary_op) = unary_op {
match unary_op {
UnaryOperator::Plus | UnaryOperator::Minus | UnaryOperator::Not => {}
UnaryOperator::PGBitwiseNot
| UnaryOperator::PGSquareRoot
| UnaryOperator::PGCubeRoot
| UnaryOperator::PGPostfixFactorial
| UnaryOperator::PGPrefixFactorial
| UnaryOperator::PGAbs => {
_ => {
return UnsupportedUnaryOpSnafu { unary_op }.fail();
}
}
@@ -570,9 +565,12 @@ pub fn sql_data_type_to_concrete_data_type(data_type: &SqlDataType) -> Result<Co
SqlDataType::Char(_)
| SqlDataType::Varchar(_)
| SqlDataType::Text
| SqlDataType::TinyText
| SqlDataType::MediumText
| SqlDataType::LongText
| SqlDataType::String(_) => Ok(ConcreteDataType::string_datatype()),
SqlDataType::Float(_) => Ok(ConcreteDataType::float32_datatype()),
SqlDataType::Double | SqlDataType::Float64 => Ok(ConcreteDataType::float64_datatype()),
SqlDataType::Double(_) | SqlDataType::Float64 => Ok(ConcreteDataType::float64_datatype()),
SqlDataType::Boolean => Ok(ConcreteDataType::boolean_datatype()),
SqlDataType::Date => Ok(ConcreteDataType::date_datatype()),
SqlDataType::Binary(_)
@@ -636,7 +634,7 @@ pub fn concrete_data_type_to_sql_data_type(data_type: &ConcreteDataType) -> Resu
ConcreteDataType::UInt8(_) => Ok(SqlDataType::UnsignedTinyInt(None)),
ConcreteDataType::String(_) => Ok(SqlDataType::String(None)),
ConcreteDataType::Float32(_) => Ok(SqlDataType::Float(None)),
ConcreteDataType::Float64(_) => Ok(SqlDataType::Double),
ConcreteDataType::Float64(_) => Ok(SqlDataType::Double(ExactNumberInfo::None)),
ConcreteDataType::Boolean(_) => Ok(SqlDataType::Boolean),
ConcreteDataType::Date(_) => Ok(SqlDataType::Date),
ConcreteDataType::Timestamp(ts_type) => Ok(SqlDataType::Timestamp(
@@ -721,7 +719,10 @@ mod tests {
SqlDataType::Float(None),
ConcreteDataType::float32_datatype(),
);
check_type(SqlDataType::Double, ConcreteDataType::float64_datatype());
check_type(
SqlDataType::Double(ExactNumberInfo::None),
ConcreteDataType::float64_datatype(),
);
check_type(SqlDataType::Boolean, ConcreteDataType::boolean_datatype());
check_type(SqlDataType::Date, ConcreteDataType::date_datatype());
check_type(
@@ -1187,7 +1188,7 @@ mod tests {
// test basic
let column_def = ColumnDef {
name: "col".into(),
data_type: SqlDataType::Double,
data_type: SqlDataType::Double(ExactNumberInfo::None),
collation: None,
options: vec![],
};
@@ -1203,7 +1204,7 @@ mod tests {
// test not null
let column_def = ColumnDef {
name: "col".into(),
data_type: SqlDataType::Double,
data_type: SqlDataType::Double(ExactNumberInfo::None),
collation: None,
options: vec![ColumnOptionDef {
name: None,
@@ -1217,7 +1218,7 @@ mod tests {
// test primary key
let column_def = ColumnDef {
name: "col".into(),
data_type: SqlDataType::Double,
data_type: SqlDataType::Double(ExactNumberInfo::None),
collation: None,
options: vec![ColumnOptionDef {
name: None,
@@ -1290,7 +1291,7 @@ mod tests {
pub fn test_has_primary_key_option() {
let column_def = ColumnDef {
name: "col".into(),
data_type: SqlDataType::Double,
data_type: SqlDataType::Double(ExactNumberInfo::None),
collation: None,
options: vec![],
};
@@ -1298,7 +1299,7 @@ mod tests {
let column_def = ColumnDef {
name: "col".into(),
data_type: SqlDataType::Double,
data_type: SqlDataType::Double(ExactNumberInfo::None),
collation: None,
options: vec![ColumnOptionDef {
name: None,
@@ -1316,7 +1317,7 @@ mod tests {
let column_def = Column {
column_def: ColumnDef {
name: "col".into(),
data_type: SqlDataType::Double,
data_type: SqlDataType::Double(ExactNumberInfo::None),
collation: None,
options: vec![],
},

View File

@@ -14,13 +14,13 @@
use serde::Serialize;
use sqlparser::ast::{
Insert as SpInsert, ObjectName, Query, SetExpr, Statement, UnaryOperator, Values,
Insert as SpInsert, ObjectName, Query, SetExpr, Statement, TableObject, UnaryOperator, Values,
};
use sqlparser::parser::ParserError;
use sqlparser_derive::{Visit, VisitMut};
use crate::ast::{Expr, Value};
use crate::error::Result;
use crate::error::{Result, UnsupportedSnafu};
use crate::statements::query::Query as GtQuery;
#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut, Serialize)]
@@ -39,9 +39,17 @@ macro_rules! parse_fail {
}
impl Insert {
pub fn table_name(&self) -> &ObjectName {
pub fn table_name(&self) -> Result<&ObjectName> {
match &self.inner {
Statement::Insert(insert) => &insert.table_name,
Statement::Insert(insert) => {
let TableObject::TableName(name) = &insert.table else {
return UnsupportedSnafu {
keyword: "TABLE FUNCTION".to_string(),
}
.fail();
};
Ok(name)
}
_ => unreachable!(),
}
}

View File

@@ -334,13 +334,7 @@ mod tests {
assert_eq!("", format!("{}", ShowKind::All));
assert_eq!(
"LIKE test",
format!(
"{}",
ShowKind::Like(Ident {
value: "test".to_string(),
quote_style: None,
})
)
format!("{}", ShowKind::Like(Ident::new("test")),)
);
assert_eq!(
"WHERE NOT a",
@@ -348,10 +342,7 @@ mod tests {
"{}",
ShowKind::Where(Expr::UnaryOp {
op: UnaryOperator::Not,
expr: Box::new(Expr::Identifier(Ident {
value: "a".to_string(),
quote_style: None,
})),
expr: Box::new(Expr::Identifier(Ident::new("a"))),
})
)
);

View File

@@ -16,8 +16,8 @@ use std::ops::ControlFlow;
use datatypes::data_type::DataType as GreptimeDataType;
use sqlparser::ast::{
DataType, Expr, Function, FunctionArg, FunctionArgExpr, FunctionArgumentList, Ident,
ObjectName, Value,
DataType, ExactNumberInfo, Expr, Function, FunctionArg, FunctionArgExpr, FunctionArgumentList,
Ident, ObjectName, Value,
};
use crate::error::Result;
@@ -91,6 +91,7 @@ impl TransformRule for TypeAliasTransformRule {
over: None,
parameters: sqlparser::ast::FunctionArguments::None,
within_group: vec![],
uses_odbc_syntax: false,
}
}
@@ -166,7 +167,7 @@ pub(crate) fn get_type_by_alias(data_type: &DataType) -> Option<DataType> {
DataType::UInt32 => Some(DataType::UnsignedInt(None)),
DataType::UInt64 => Some(DataType::UnsignedBigInt(None)),
DataType::Float32 => Some(DataType::Float(None)),
DataType::Float64 => Some(DataType::Double),
DataType::Float64 => Some(DataType::Double(ExactNumberInfo::None)),
DataType::Bool => Some(DataType::Boolean),
DataType::Datetime(_) => Some(DataType::Timestamp(Some(6), TimezoneInfo::None)),
_ => None,
@@ -207,7 +208,7 @@ pub(crate) fn get_data_type_by_alias_name(name: &str) -> Option<DataType> {
"UINT32" => Some(DataType::UnsignedInt(None)),
"UINT64" => Some(DataType::UnsignedBigInt(None)),
"FLOAT32" => Some(DataType::Float(None)),
"FLOAT64" => Some(DataType::Double),
"FLOAT64" => Some(DataType::Double(ExactNumberInfo::None)),
// String type alias
"TINYTEXT" | "MEDIUMTEXT" | "LONGTEXT" => Some(DataType::Text),
_ => None,
@@ -226,15 +227,15 @@ mod tests {
fn test_get_data_type_by_alias_name() {
assert_eq!(
get_data_type_by_alias_name("float64"),
Some(DataType::Double)
Some(DataType::Double(ExactNumberInfo::None))
);
assert_eq!(
get_data_type_by_alias_name("Float64"),
Some(DataType::Double)
Some(DataType::Double(ExactNumberInfo::None))
);
assert_eq!(
get_data_type_by_alias_name("FLOAT64"),
Some(DataType::Double)
Some(DataType::Double(ExactNumberInfo::None))
);
assert_eq!(
@@ -410,9 +411,9 @@ CREATE TABLE data_types (
Statement::CreateTable(c) => {
let expected = r#"CREATE TABLE data_types (
s STRING,
tt TEXT,
mt TEXT,
lt TEXT,
tt TINYTEXT,
mt MEDIUMTEXT,
lt LONGTEXT,
tint TINYINT,
sint SMALLINT,
i INT,

View File

@@ -254,30 +254,7 @@ fn extract_from_binary_expr(
let right = extract_time_range_from_expr(ts_col_name, ts_col_unit, right)?;
Some(left.or(&right))
}
Operator::NotEq
| Operator::Plus
| Operator::Minus
| Operator::Multiply
| Operator::Divide
| Operator::Modulo
| Operator::IsDistinctFrom
| Operator::IsNotDistinctFrom
| Operator::RegexMatch
| Operator::RegexIMatch
| Operator::RegexNotMatch
| Operator::RegexNotIMatch
| Operator::BitwiseAnd
| Operator::BitwiseOr
| Operator::BitwiseXor
| Operator::BitwiseShiftRight
| Operator::BitwiseShiftLeft
| Operator::StringConcat
| Operator::ArrowAt
| Operator::AtArrow
| Operator::LikeMatch
| Operator::ILikeMatch
| Operator::NotLikeMatch
| Operator::NotILikeMatch => None,
_ => None,
}
}

View File

@@ -25,9 +25,10 @@ use common_telemetry::tracing_context::TracingContext;
use common_telemetry::warn;
use datafusion::error::Result as DfResult;
use datafusion::execution::context::TaskContext;
use datafusion::physical_plan::execution_plan::{Boundedness, EmissionType};
use datafusion::physical_plan::metrics::{ExecutionPlanMetricsSet, MetricsSet};
use datafusion::physical_plan::{
DisplayAs, DisplayFormatType, ExecutionMode, ExecutionPlan, PlanProperties,
DisplayAs, DisplayFormatType, ExecutionPlan, PlanProperties,
RecordBatchStream as DfRecordBatchStream,
};
use datafusion_common::stats::Precision;
@@ -134,7 +135,8 @@ impl RegionScanExec {
let properties = PlanProperties::new(
eq_props,
Partitioning::UnknownPartitioning(num_output_partition),
ExecutionMode::Bounded,
EmissionType::Incremental,
Boundedness::Bounded,
);
let append_mode = scanner_props.append_mode();
let total_rows = scanner_props.total_rows();

View File

@@ -19,7 +19,7 @@ pub mod create_expr;
pub fn sql_data_type_to_postgres_data_type(data_type: SqlDataType) -> String {
match data_type {
SqlDataType::Double => "DOUBLE PRECISION".to_string(),
SqlDataType::Double(_) => "DOUBLE PRECISION".to_string(),
_ => data_type.to_string(),
}
}

View File

@@ -438,7 +438,7 @@ pub async fn test_sql_api(store_type: StorageType) {
assert_eq!(res.status(), StatusCode::OK);
assert_eq!(
res.text().await,
"[{\"DescribeTable\":{\"name\":[{\"value\":\"t\",\"quote_style\":null}]}}]"
r#"[{"DescribeTable":{"name":[{"value":"t","quote_style":null,"span":{"start":{"line":0,"column":0},"end":{"line":0,"column":0}}}]}}]"#,
);
// test timezone header

View File

@@ -54,7 +54,7 @@ show create table t3;
create table t4 (ts timestamp time index default now);
Error: 1001(Unsupported), Unsupported expr in default constraint: Identifier(Ident { value: "now", quote_style: None }) for column: ts
Error: 1001(Unsupported), Unsupported expr in default constraint: Identifier(Ident { value: "now", quote_style: None, span: Span(Location(1,50)..Location(1,53)) }) for column: ts
drop table t1;

View File

@@ -335,11 +335,11 @@ FROM cell_cte;
SELECT UNNEST(geo_path(37.76938, -122.3889, 1728083375::TimestampSecond));
+--------------------------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------+
| unnest_placeholder(geo_path(Float64(37.76938),Float64(-122.3889),arrow_cast(Int64(1728083375),Utf8("Timestamp(Second, None)")))).lat | unnest_placeholder(geo_path(Float64(37.76938),Float64(-122.3889),arrow_cast(Int64(1728083375),Utf8("Timestamp(Second, None)")))).lng |
+--------------------------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------+
| [37.76938] | [-122.3889] |
+--------------------------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------+
+----------------------------------------------------------------------------------------------------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------+
| __unnest_placeholder(geo_path(Float64(37.76938),Float64(-122.3889),arrow_cast(Int64(1728083375),Utf8("Timestamp(Second, None)")))).lat | __unnest_placeholder(geo_path(Float64(37.76938),Float64(-122.3889),arrow_cast(Int64(1728083375),Utf8("Timestamp(Second, None)")))).lng |
+----------------------------------------------------------------------------------------------------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------+
| [37.76938] | [-122.3889] |
+----------------------------------------------------------------------------------------------------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------+
SELECT UNNEST(geo_path(lat, lon, ts))
FROM(
@@ -352,11 +352,11 @@ FROM(
SELECT 37.77001 AS lat, -122.3888 AS lon, 1728083372::TimestampSecond AS ts
);
+----------------------------------------------+----------------------------------------------+
| unnest_placeholder(geo_path(lat,lon,ts)).lat | unnest_placeholder(geo_path(lat,lon,ts)).lng |
+----------------------------------------------+----------------------------------------------+
| [37.77001, 37.76928, 37.76938, 37.7693] | [-122.3888, -122.3839, -122.3889, -122.382] |
+----------------------------------------------+----------------------------------------------+
+------------------------------------------------+------------------------------------------------+
| __unnest_placeholder(geo_path(lat,lon,ts)).lat | __unnest_placeholder(geo_path(lat,lon,ts)).lng |
+------------------------------------------------+------------------------------------------------+
| [37.77001, 37.76928, 37.76938, 37.7693] | [-122.3888, -122.3839, -122.3889, -122.382] |
+------------------------------------------------+------------------------------------------------+
SELECT wkt_point_from_latlng(37.76938, -122.3889) AS point;

View File

@@ -23,7 +23,7 @@ select * from data;
insert into data values (4, 'infinityyyy'::double);
Error: 3001(EngineExecuteQuery), DataFusion error: Cast error: Cannot cast string 'infinityyyy' to value of Float64 type
Error: 3001(EngineExecuteQuery), Cast error: Cannot cast string 'infinityyyy' to value of Float64 type
drop table data;

View File

@@ -45,11 +45,11 @@ Error: 3000(PlanQuery), Failed to plan SQL: No field named a.
SELECT a FROM test LIMIT SUM(42);
Error: 3001(EngineExecuteQuery), DataFusion error: This feature is not implemented: Unsupported LIMIT expression: Some(AggregateFunction(AggregateFunction { func: AggregateUDF { inner: Sum { signature: Signature { type_signature: UserDefined, volatility: Immutable } } }, args: [Literal(Int64(42))], distinct: false, filter: None, order_by: None, null_treatment: None }))
Error: 1001(Unsupported), This feature is not implemented: Unsupported LIMIT expression: Some(AggregateFunction(AggregateFunction { func: AggregateUDF { inner: Sum { signature: Signature { type_signature: UserDefined, volatility: Immutable } } }, args: [Literal(Int64(42))], distinct: false, filter: None, order_by: None, null_treatment: None }))
SELECT a FROM test LIMIT row_number() OVER ();
Error: 3001(EngineExecuteQuery), DataFusion error: This feature is not implemented: Unsupported LIMIT expression: Some(Cast(Cast { expr: WindowFunction(WindowFunction { fun: WindowUDF(WindowUDF { inner: RowNumber { signature: Signature { type_signature: NullAry, volatility: Immutable } } }), args: [], partition_by: [], order_by: [], window_frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }, null_treatment: None }), data_type: Int64 }))
Error: 3001(EngineExecuteQuery), This feature is not implemented: Unsupported LIMIT expression: Some(Cast(Cast { expr: WindowFunction(WindowFunction { fun: WindowUDF(WindowUDF { inner: RowNumber { signature: Signature { type_signature: Nullary, volatility: Immutable } } }), args: [], partition_by: [], order_by: [], window_frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }, null_treatment: None }), data_type: Int64 }))
CREATE TABLE test2 (a STRING, ts TIMESTAMP TIME INDEX);
@@ -122,11 +122,11 @@ Error: 3001(EngineExecuteQuery), DataFusion error: Error during planning: Cannot
SELECT * FROM integers as int LIMIT (SELECT NULL);
Error: 3001(EngineExecuteQuery), DataFusion error: This feature is not implemented: Unsupported LIMIT expression: Some(ScalarSubquery(<subquery>))
Error: 1001(Unsupported), This feature is not implemented: Unsupported LIMIT expression: Some(ScalarSubquery(<subquery>))
SELECT * FROM integers as int LIMIT (SELECT -1);
Error: 3001(EngineExecuteQuery), DataFusion error: This feature is not implemented: Unsupported LIMIT expression: Some(ScalarSubquery(<subquery>))
Error: 1001(Unsupported), This feature is not implemented: Unsupported LIMIT expression: Some(ScalarSubquery(<subquery>))
SELECT * FROM integers as int LIMIT (SELECT 'ab');

View File

@@ -82,7 +82,7 @@ EXPLAIN SELECT a % 2, b FROM test UNION SELECT a % 2 AS k, b FROM test ORDER BY
SELECT a % 2, b FROM test UNION SELECT a % 2 AS k FROM test ORDER BY -1;
Error: 3000(PlanQuery), Failed to plan SQL: Error during planning: UNION queries have different number of columns: left has 2 columns whereas right has 1 columns
Error: 3000(PlanQuery), Failed to plan SQL: Error during planning: UNION queries have different number of columns
DROP TABLE test;

View File

@@ -3,7 +3,7 @@
-- SQLNESS PROTOCOL MYSQL
SELECT ?;
Failed to execute query, err: MySqlError { ERROR 1815 (HY000): (EngineExecuteQuery): DataFusion error: Execution error: Placeholder '?' was not provided a value for execution. }
Failed to execute query, err: MySqlError { ERROR 1815 (HY000): (EngineExecuteQuery): Execution error: Placeholder '?' was not provided a value for execution. }
-- SQLNESS PROTOCOL MYSQL
PREPARE stmt FROM 'SELECT ?::int;';

View File

@@ -63,7 +63,7 @@ SELECT ts, length(host)::INT64 + 2, max(val) RANGE '5s' FROM host ALIGN '20s' BY
-- project non-aggregation key
SELECT ts, host, max(val) RANGE '5s' FROM host ALIGN '20s' BY () ORDER BY ts;
Error: 3001(EngineExecuteQuery), DataFusion error: No field named host.host. Valid fields are "max(host.val) RANGE 5s", host.ts, "Int64(1)".
Error: 3001(EngineExecuteQuery), No field named host.host. Valid fields are "max(host.val) RANGE 5s", host.ts, "Int64(1)".
DROP TABLE host;

View File

@@ -41,7 +41,7 @@ Error: 2000(InvalidSyntax), Invalid SQL syntax: sql parser error: Illegal Range
SELECT min(val) RANGE '10s', max(val) FROM host ALIGN '5s';
Error: 3001(EngineExecuteQuery), DataFusion error: No field named "max(host.val)". Valid fields are "min(host.val) RANGE 10s", host.ts, host.host.
Error: 3001(EngineExecuteQuery), No field named "max(host.val)". Valid fields are "min(host.val) RANGE 10s", host.ts, host.host.
SELECT min(val) * 2 RANGE '10s' FROM host ALIGN '5s';
@@ -54,7 +54,7 @@ Error: 2000(InvalidSyntax), Invalid SQL syntax: sql parser error: Can't use the
-- 2.2 no align param
SELECT min(val) RANGE '5s' FROM host;
Error: 3000(PlanQuery), DataFusion error: Error during planning: Missing argument in range select query
Error: 3000(PlanQuery), Error during planning: Missing argument in range select query
-- 2.3 type mismatch
SELECT covar(ceil(val), floor(val)) RANGE '20s' FROM host ALIGN '10s';
@@ -85,24 +85,24 @@ Error: 2000(InvalidSyntax), Range Query: Window functions is not allowed in Rang
-- 2.6 invalid fill
SELECT min(val) RANGE '5s' FROM host ALIGN '5s' FILL 3.0;
Error: 3000(PlanQuery), DataFusion error: Error during planning: 3.0 is not a valid fill option, fail to convert to a const value. { Arrow error: Cast error: Cannot cast string '3.0' to value of Int64 type }
Error: 3000(PlanQuery), Error during planning: 3.0 is not a valid fill option, fail to convert to a const value. { Arrow error: Cast error: Cannot cast string '3.0' to value of Int64 type }
-- 2.7 zero align/range
SELECT min(val) RANGE '5s' FROM host ALIGN '0s';
Error: 3000(PlanQuery), DataFusion error: Error during planning: duration must be greater than 0
Error: 3000(PlanQuery), Error during planning: duration must be greater than 0
SELECT min(val) RANGE '0s' FROM host ALIGN '5s';
Error: 3000(PlanQuery), DataFusion error: Error during planning: duration must be greater than 0
Error: 3000(PlanQuery), Error during planning: duration must be greater than 0
SELECT min(val) RANGE '5s' FROM host ALIGN (INTERVAL '0' day);
Error: 3000(PlanQuery), DataFusion error: Error during planning: Illegal argument `IntervalMonthDayNano("IntervalMonthDayNano { months: 0, days: 0, nanoseconds: 0 }")` in range select query
Error: 3000(PlanQuery), Error during planning: Illegal argument `IntervalMonthDayNano("IntervalMonthDayNano { months: 0, days: 0, nanoseconds: 0 }")` in range select query
SELECT min(val) RANGE (INTERVAL '0' day) FROM host ALIGN '5s';
Error: 3000(PlanQuery), DataFusion error: Error during planning: Illegal argument `IntervalMonthDayNano("IntervalMonthDayNano { months: 0, days: 0, nanoseconds: 0 }")` in range select query
Error: 3000(PlanQuery), Error during planning: Illegal argument `IntervalMonthDayNano("IntervalMonthDayNano { months: 0, days: 0, nanoseconds: 0 }")` in range select query
DROP TABLE host;

View File

@@ -20,7 +20,7 @@ Affected Rows: 8
SELECT ts, host, min(val) RANGE (INTERVAL '1 year') FROM host ALIGN (INTERVAL '1 year') ORDER BY host, ts;
Error: 3000(PlanQuery), DataFusion error: Error during planning: Year or month interval is not allowed in range query: IntervalMonthDayNano("IntervalMonthDayNano { months: 12, days: 0, nanoseconds: 0 }")
Error: 3000(PlanQuery), Error during planning: Year or month interval is not allowed in range query: IntervalMonthDayNano("IntervalMonthDayNano { months: 12, days: 0, nanoseconds: 0 }")
SELECT ts, host, min(val) RANGE (INTERVAL '1' day) FROM host ALIGN (INTERVAL '1' day) ORDER BY host, ts;

View File

@@ -64,12 +64,12 @@ SELECT ts, host, first_value(addon ORDER BY val DESC) RANGE '5s', last_value(add
| ts | host | first_value(host.addon) ORDER BY [host.val DESC NULLS FIRST] RANGE 5s | last_value(host.addon) ORDER BY [host.val DESC NULLS FIRST] RANGE 5s |
+---------------------+-------+-----------------------------------------------------------------------+----------------------------------------------------------------------+
| 1970-01-01T00:00:00 | host1 | 3 | 1 |
| 1970-01-01T00:00:05 | host1 | 4 | 4 |
| 1970-01-01T00:00:05 | host1 | 4 | 6 |
| 1970-01-01T00:00:10 | host1 | 7 | 8 |
| 1970-01-01T00:00:15 | host1 | 11 | 10 |
| 1970-01-01T00:00:20 | host1 | 15 | 13 |
| 1970-01-01T00:00:00 | host2 | 18 | 16 |
| 1970-01-01T00:00:05 | host2 | 19 | 19 |
| 1970-01-01T00:00:05 | host2 | 19 | 21 |
| 1970-01-01T00:00:10 | host2 | 22 | 23 |
| 1970-01-01T00:00:15 | host2 | 26 | 25 |
| 1970-01-01T00:00:20 | host2 | 30 | 28 |
@@ -81,12 +81,12 @@ SELECT ts, host, first_value(addon ORDER BY val DESC NULLS LAST) RANGE '5s', las
| ts | host | first_value(host.addon) ORDER BY [host.val DESC NULLS LAST] RANGE 5s | last_value(host.addon) ORDER BY [host.val DESC NULLS LAST] RANGE 5s |
+---------------------+-------+----------------------------------------------------------------------+---------------------------------------------------------------------+
| 1970-01-01T00:00:00 | host1 | 3 | 1 |
| 1970-01-01T00:00:05 | host1 | 4 | 4 |
| 1970-01-01T00:00:05 | host1 | 4 | 6 |
| 1970-01-01T00:00:10 | host1 | 9 | 7 |
| 1970-01-01T00:00:15 | host1 | 12 | 11 |
| 1970-01-01T00:00:20 | host1 | 14 | 15 |
| 1970-01-01T00:00:00 | host2 | 18 | 16 |
| 1970-01-01T00:00:05 | host2 | 19 | 19 |
| 1970-01-01T00:00:05 | host2 | 19 | 21 |
| 1970-01-01T00:00:10 | host2 | 24 | 22 |
| 1970-01-01T00:00:15 | host2 | 27 | 26 |
| 1970-01-01T00:00:20 | host2 | 29 | 30 |
@@ -98,12 +98,12 @@ SELECT ts, host, first_value(addon ORDER BY val ASC) RANGE '5s', last_value(addo
| ts | host | first_value(host.addon) ORDER BY [host.val ASC NULLS LAST] RANGE 5s | last_value(host.addon) ORDER BY [host.val ASC NULLS LAST] RANGE 5s |
+---------------------+-------+---------------------------------------------------------------------+--------------------------------------------------------------------+
| 1970-01-01T00:00:00 | host1 | 1 | 3 |
| 1970-01-01T00:00:05 | host1 | 4 | 4 |
| 1970-01-01T00:00:05 | host1 | 4 | 6 |
| 1970-01-01T00:00:10 | host1 | 8 | 7 |
| 1970-01-01T00:00:15 | host1 | 10 | 11 |
| 1970-01-01T00:00:20 | host1 | 13 | 15 |
| 1970-01-01T00:00:00 | host2 | 16 | 18 |
| 1970-01-01T00:00:05 | host2 | 19 | 19 |
| 1970-01-01T00:00:05 | host2 | 19 | 21 |
| 1970-01-01T00:00:10 | host2 | 23 | 22 |
| 1970-01-01T00:00:15 | host2 | 25 | 26 |
| 1970-01-01T00:00:20 | host2 | 28 | 30 |
@@ -115,12 +115,12 @@ SELECT ts, host, first_value(addon ORDER BY val ASC NULLS FIRST) RANGE '5s', las
| ts | host | first_value(host.addon) ORDER BY [host.val ASC NULLS FIRST] RANGE 5s | last_value(host.addon) ORDER BY [host.val ASC NULLS FIRST] RANGE 5s |
+---------------------+-------+----------------------------------------------------------------------+---------------------------------------------------------------------+
| 1970-01-01T00:00:00 | host1 | 1 | 3 |
| 1970-01-01T00:00:05 | host1 | 4 | 4 |
| 1970-01-01T00:00:05 | host1 | 4 | 6 |
| 1970-01-01T00:00:10 | host1 | 7 | 9 |
| 1970-01-01T00:00:15 | host1 | 11 | 12 |
| 1970-01-01T00:00:20 | host1 | 15 | 14 |
| 1970-01-01T00:00:00 | host2 | 16 | 18 |
| 1970-01-01T00:00:05 | host2 | 19 | 19 |
| 1970-01-01T00:00:05 | host2 | 19 | 21 |
| 1970-01-01T00:00:10 | host2 | 22 | 24 |
| 1970-01-01T00:00:15 | host2 | 26 | 27 |
| 1970-01-01T00:00:20 | host2 | 30 | 29 |
@@ -231,7 +231,7 @@ SELECT ts, host, count(distinct *) RANGE '5s' FROM host ALIGN '5s' ORDER BY host
-- Test error first_value/last_value
SELECT ts, host, first_value(val, val) RANGE '5s' FROM host ALIGN '5s' ORDER BY host, ts;
Error: 3000(PlanQuery), Failed to plan SQL: Error during planning: Error during planning: The function expected 1 arguments but received 2 No function matches the given name and argument types 'first_value(Int64, Int64)'. You might need to add explicit type casts.
Error: 3000(PlanQuery), Failed to plan SQL: Error during planning: The function 'first_value' expected 1 arguments but received 2 No function matches the given name and argument types 'first_value(Int64, Int64)'. You might need to add explicit type casts.
Candidate functions:
first_value(Any)

View File

@@ -31,7 +31,7 @@ SELECT ts, host, min(val) RANGE '1d' FROM host ALIGN '1d' ORDER BY host, ts;
SELECT ts, host, min(val) RANGE '1d' FROM host ALIGN '1d' TO UNKNOWN ORDER BY host, ts;
Error: 3000(PlanQuery), DataFusion error: Error during planning: Illegal `align to` argument `UNKNOWN` in range select query, can't be parse as NOW/CALENDAR/Timestamp, error: Failed to parse a string into Timestamp, raw string: UNKNOWN
Error: 3000(PlanQuery), Error during planning: Illegal `align to` argument `UNKNOWN` in range select query, can't be parse as NOW/CALENDAR/Timestamp, error: Failed to parse a string into Timestamp, raw string: UNKNOWN
SELECT ts, host, min(val) RANGE '1d' FROM host ALIGN '1d' TO '1900-01-01T00:00:00+01:00' ORDER BY host, ts;
@@ -95,16 +95,16 @@ SELECT ts, min(val) RANGE (INTERVAL '2' day - INTERVAL '1' day) FROM host ALIGN
-- non-positive duration
SELECT ts, min(val) RANGE (INTERVAL '1' day - INTERVAL '2' day) FROM host ALIGN (INTERVAL '1' day) TO '1900-01-01T00:00:00+01:00' by (1) ORDER BY ts;
Error: 3000(PlanQuery), DataFusion error: Error during planning: Illegal argument `IntervalMonthDayNano("IntervalMonthDayNano { months: 0, days: 1, nanoseconds: 0 }") - IntervalMonthDayNano("IntervalMonthDayNano { months: 0, days: 2, nanoseconds: 0 }")` in range select query
Error: 3000(PlanQuery), Error during planning: Illegal argument `IntervalMonthDayNano("IntervalMonthDayNano { months: 0, days: 1, nanoseconds: 0 }") - IntervalMonthDayNano("IntervalMonthDayNano { months: 0, days: 2, nanoseconds: 0 }")` in range select query
SELECT ts, min(val) RANGE (INTERVAL '1' day - INTERVAL '1' day) FROM host ALIGN (INTERVAL '1' day) TO '1900-01-01T00:00:00+01:00' by (1) ORDER BY ts;
Error: 3000(PlanQuery), DataFusion error: Error during planning: Illegal argument `IntervalMonthDayNano("IntervalMonthDayNano { months: 0, days: 1, nanoseconds: 0 }") - IntervalMonthDayNano("IntervalMonthDayNano { months: 0, days: 1, nanoseconds: 0 }")` in range select query
Error: 3000(PlanQuery), Error during planning: Illegal argument `IntervalMonthDayNano("IntervalMonthDayNano { months: 0, days: 1, nanoseconds: 0 }") - IntervalMonthDayNano("IntervalMonthDayNano { months: 0, days: 1, nanoseconds: 0 }")` in range select query
-- duration not all interval
SELECT ts, min(val) RANGE (now() - INTERVAL '1' day) FROM host ALIGN (INTERVAL '1' day) TO '1900-01-01T00:00:00+01:00' by (1) ORDER BY ts;
Error: 3000(PlanQuery), DataFusion error: Error during planning: Illegal argument `now() - IntervalMonthDayNano("IntervalMonthDayNano { months: 0, days: 1, nanoseconds: 0 }")` in range select query
Error: 3000(PlanQuery), Error during planning: Illegal argument `now() - IntervalMonthDayNano("IntervalMonthDayNano { months: 0, days: 1, nanoseconds: 0 }")` in range select query
--- ALIGN TO with time zone ---
set time_zone='Asia/Shanghai';

View File

@@ -29,11 +29,11 @@ SELECT unnest([1,2,3]);
SELECT unnest(struct(1,2,3));
+-----------------------------------------------------------+-----------------------------------------------------------+-----------------------------------------------------------+
| unnest_placeholder(struct(Int64(1),Int64(2),Int64(3))).c0 | unnest_placeholder(struct(Int64(1),Int64(2),Int64(3))).c1 | unnest_placeholder(struct(Int64(1),Int64(2),Int64(3))).c2 |
+-----------------------------------------------------------+-----------------------------------------------------------+-----------------------------------------------------------+
| 1 | 2 | 3 |
+-----------------------------------------------------------+-----------------------------------------------------------+-----------------------------------------------------------+
+-------------------------------------------------------------+-------------------------------------------------------------+-------------------------------------------------------------+
| __unnest_placeholder(struct(Int64(1),Int64(2),Int64(3))).c0 | __unnest_placeholder(struct(Int64(1),Int64(2),Int64(3))).c1 | __unnest_placeholder(struct(Int64(1),Int64(2),Int64(3))).c2 |
+-------------------------------------------------------------+-------------------------------------------------------------+-------------------------------------------------------------+
| 1 | 2 | 3 |
+-------------------------------------------------------------+-------------------------------------------------------------+-------------------------------------------------------------+
-- Table function is not supported for now
-- SELECT * FROM unnest([1,2,3]);

View File

@@ -119,11 +119,11 @@ Affected Rows: 25
INSERT INTO jsons VALUES(parse_json('{"a":1, "b":2, "c":3'), 4);
Error: 3001(EngineExecuteQuery), DataFusion error: Invalid function args: Cannot convert the string to json, have: {"a":1, "b":2, "c":3
Error: 3001(EngineExecuteQuery), Invalid function args: Cannot convert the string to json, have: {"a":1, "b":2, "c":3
INSERT INTO jsons VALUES(parse_json('Morning my friends, have a nice day :)'), 5);
Error: 3001(EngineExecuteQuery), DataFusion error: Invalid function args: Cannot convert the string to json, have: Morning my friends, have a nice day :)
Error: 3001(EngineExecuteQuery), Invalid function args: Cannot convert the string to json, have: Morning my friends, have a nice day :)
SELECT json_to_string(j), t FROM jsons;

View File

@@ -75,13 +75,13 @@ SELECT MAX(t) FROM timestamp;
SELECT SUM(t) FROM timestamp;
Error: 3000(PlanQuery), Failed to plan SQL: Error during planning: Execution error: User-defined coercion failed with Execution("Sum not supported for Timestamp(Millisecond, None)") No function matches the given name and argument types 'sum(Timestamp(Millisecond, None))'. You might need to add explicit type casts.
Error: 3000(PlanQuery), Failed to plan SQL: Error during planning: Execution error: Function 'sum' user-defined coercion failed with Execution("Sum not supported for Timestamp(Millisecond, None)") No function matches the given name and argument types 'sum(Timestamp(Millisecond, None))'. You might need to add explicit type casts.
Candidate functions:
sum(UserDefined)
SELECT AVG(t) FROM timestamp;
Error: 3000(PlanQuery), Failed to plan SQL: Error during planning: Execution error: User-defined coercion failed with Plan("The function \"avg\" does not support inputs of type Timestamp(Millisecond, None).") No function matches the given name and argument types 'avg(Timestamp(Millisecond, None))'. You might need to add explicit type casts.
Error: 3000(PlanQuery), Failed to plan SQL: Error during planning: Execution error: Function 'avg' user-defined coercion failed with Plan("The function \"avg\" does not support inputs of type Timestamp(Millisecond, None).") No function matches the given name and argument types 'avg(Timestamp(Millisecond, None))'. You might need to add explicit type casts.
Candidate functions:
avg(UserDefined)