mirror of
https://github.com/GreptimeTeam/greptimedb.git
synced 2026-05-27 10:20:38 +00:00
chore(deps): bump arrow/parquet to 40.0, datafuson to the latest HEAD (#1677)
* fix compile error Signed-off-by: Ruihang Xia <waynestxia@gmail.com> * remove deprecated substrait Signed-off-by: Ruihang Xia <waynestxia@gmail.com> * fix clippy Signed-off-by: Ruihang Xia <waynestxia@gmail.com> * update deps Signed-off-by: Ruihang Xia <waynestxia@gmail.com> * downgrade opendal to 0.33.1 Signed-off-by: Ruihang Xia <waynestxia@gmail.com> * change finish's impl Signed-off-by: Ruihang Xia <waynestxia@gmail.com> * update test results Signed-off-by: Ruihang Xia <waynestxia@gmail.com> * ignore failing cases Signed-off-by: Ruihang Xia <waynestxia@gmail.com> --------- Signed-off-by: Ruihang Xia <waynestxia@gmail.com>
This commit is contained in:
1241
Cargo.lock
generated
1241
Cargo.lock
generated
File diff suppressed because it is too large
Load Diff
26
Cargo.toml
26
Cargo.toml
@@ -54,31 +54,31 @@ edition = "2021"
|
||||
license = "Apache-2.0"
|
||||
|
||||
[workspace.dependencies]
|
||||
arrow = { version = "37.0" }
|
||||
arrow-array = "37.0"
|
||||
arrow-flight = "37.0"
|
||||
arrow-schema = { version = "37.0", features = ["serde"] }
|
||||
arrow = { version = "40.0" }
|
||||
arrow-array = "40.0"
|
||||
arrow-flight = "40.0"
|
||||
arrow-schema = { version = "40.0", features = ["serde"] }
|
||||
async-stream = "0.3"
|
||||
async-trait = "0.1"
|
||||
chrono = { version = "0.4", features = ["serde"] }
|
||||
# TODO(ruihang): use arrow-datafusion when it contains https://github.com/apache/arrow-datafusion/pull/6032
|
||||
datafusion = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "5337c86120de8193406b59be7612484796a46294" }
|
||||
datafusion-common = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "5337c86120de8193406b59be7612484796a46294" }
|
||||
datafusion-expr = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "5337c86120de8193406b59be7612484796a46294" }
|
||||
datafusion-optimizer = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "5337c86120de8193406b59be7612484796a46294" }
|
||||
datafusion-physical-expr = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "5337c86120de8193406b59be7612484796a46294" }
|
||||
datafusion-sql = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "5337c86120de8193406b59be7612484796a46294" }
|
||||
datafusion-substrait = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "5337c86120de8193406b59be7612484796a46294" }
|
||||
datafusion = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "63e52dde9e44cac4b1f6c6e6b6bf6368ba3bd323" }
|
||||
datafusion-common = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "63e52dde9e44cac4b1f6c6e6b6bf6368ba3bd323" }
|
||||
datafusion-expr = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "63e52dde9e44cac4b1f6c6e6b6bf6368ba3bd323" }
|
||||
datafusion-optimizer = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "63e52dde9e44cac4b1f6c6e6b6bf6368ba3bd323" }
|
||||
datafusion-physical-expr = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "63e52dde9e44cac4b1f6c6e6b6bf6368ba3bd323" }
|
||||
datafusion-sql = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "63e52dde9e44cac4b1f6c6e6b6bf6368ba3bd323" }
|
||||
datafusion-substrait = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "63e52dde9e44cac4b1f6c6e6b6bf6368ba3bd323" }
|
||||
futures = "0.3"
|
||||
futures-util = "0.3"
|
||||
parquet = "37.0"
|
||||
parquet = "40.0"
|
||||
paste = "1.0"
|
||||
prost = "0.11"
|
||||
rand = "0.8"
|
||||
serde = { version = "1.0", features = ["derive"] }
|
||||
serde_json = "1.0"
|
||||
snafu = { version = "0.7", features = ["backtraces"] }
|
||||
sqlparser = "0.33"
|
||||
sqlparser = "0.34"
|
||||
tempfile = "3"
|
||||
tokio = { version = "1.28", features = ["full"] }
|
||||
tokio-util = { version = "0.7", features = ["io-util", "compat"] }
|
||||
|
||||
@@ -110,6 +110,7 @@ impl ArrowDecoder for arrow::csv::reader::Decoder {
|
||||
}
|
||||
}
|
||||
|
||||
#[allow(deprecated)]
|
||||
impl ArrowDecoder for arrow::json::RawDecoder {
|
||||
fn decode(&mut self, buf: &[u8]) -> result::Result<usize, ArrowError> {
|
||||
self.decode(buf)
|
||||
|
||||
@@ -17,6 +17,7 @@ use std::str::FromStr;
|
||||
use std::sync::Arc;
|
||||
|
||||
use arrow::csv;
|
||||
#[allow(deprecated)]
|
||||
use arrow::csv::reader::infer_reader_schema as infer_csv_schema;
|
||||
use arrow::record_batch::RecordBatch;
|
||||
use arrow_schema::{Schema, SchemaRef};
|
||||
@@ -113,8 +114,7 @@ pub struct CsvConfig {
|
||||
|
||||
impl CsvConfig {
|
||||
fn builder(&self) -> csv::ReaderBuilder {
|
||||
let mut builder = csv::ReaderBuilder::new()
|
||||
.with_schema(self.file_schema.clone())
|
||||
let mut builder = csv::ReaderBuilder::new(self.file_schema.clone())
|
||||
.with_delimiter(self.delimiter)
|
||||
.with_batch_size(self.batch_size)
|
||||
.has_header(self.has_header);
|
||||
@@ -160,6 +160,7 @@ impl FileOpener for CsvOpener {
|
||||
}
|
||||
}
|
||||
|
||||
#[allow(deprecated)]
|
||||
#[async_trait]
|
||||
impl FileFormat for CsvFormat {
|
||||
async fn infer_schema(&self, store: &ObjectStore, path: &str) -> Result<Schema> {
|
||||
|
||||
@@ -20,6 +20,7 @@ use std::sync::Arc;
|
||||
use arrow::datatypes::SchemaRef;
|
||||
use arrow::json::reader::{infer_json_schema_from_iterator, ValueIter};
|
||||
use arrow::json::writer::LineDelimited;
|
||||
#[allow(deprecated)]
|
||||
use arrow::json::{self, RawReaderBuilder};
|
||||
use arrow::record_batch::RecordBatch;
|
||||
use arrow_schema::Schema;
|
||||
@@ -129,6 +130,7 @@ impl JsonOpener {
|
||||
}
|
||||
}
|
||||
|
||||
#[allow(deprecated)]
|
||||
impl FileOpener for JsonOpener {
|
||||
fn open(&self, meta: FileMeta) -> DataFusionResult<FileOpenFuture> {
|
||||
open_with_decoder(
|
||||
@@ -159,8 +161,7 @@ pub async fn stream_to_json(
|
||||
|
||||
impl DfRecordBatchEncoder for json::Writer<SharedBuffer, LineDelimited> {
|
||||
fn write(&mut self, batch: &RecordBatch) -> Result<()> {
|
||||
self.write(batch.clone())
|
||||
.context(error::WriteRecordBatchSnafu)
|
||||
self.write(batch).context(error::WriteRecordBatchSnafu)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -25,7 +25,7 @@ query = { path = "../../query" }
|
||||
|
||||
[dependencies.substrait_proto]
|
||||
package = "substrait"
|
||||
version = "0.7"
|
||||
version = "0.10"
|
||||
|
||||
[dev-dependencies]
|
||||
datatypes = { path = "../../datatypes" }
|
||||
|
||||
@@ -1,77 +0,0 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::collections::HashMap;
|
||||
|
||||
use datafusion::common::DFSchemaRef;
|
||||
use substrait_proto::proto::extensions::simple_extension_declaration::{
|
||||
ExtensionFunction, MappingType,
|
||||
};
|
||||
use substrait_proto::proto::extensions::SimpleExtensionDeclaration;
|
||||
|
||||
#[derive(Default)]
|
||||
pub struct ConvertorContext {
|
||||
scalar_fn_names: HashMap<String, u32>,
|
||||
scalar_fn_map: HashMap<u32, String>,
|
||||
df_schema: Option<DFSchemaRef>,
|
||||
}
|
||||
|
||||
impl ConvertorContext {
|
||||
pub fn register_scalar_fn<S: AsRef<str>>(&mut self, name: S) -> u32 {
|
||||
if let Some(anchor) = self.scalar_fn_names.get(name.as_ref()) {
|
||||
return *anchor;
|
||||
}
|
||||
|
||||
let next_anchor = self.scalar_fn_map.len() as _;
|
||||
self.scalar_fn_map
|
||||
.insert(next_anchor, name.as_ref().to_string());
|
||||
self.scalar_fn_names
|
||||
.insert(name.as_ref().to_string(), next_anchor);
|
||||
next_anchor
|
||||
}
|
||||
|
||||
pub fn register_scalar_with_anchor<S: AsRef<str>>(&mut self, name: S, anchor: u32) {
|
||||
self.scalar_fn_map.insert(anchor, name.as_ref().to_string());
|
||||
self.scalar_fn_names
|
||||
.insert(name.as_ref().to_string(), anchor);
|
||||
}
|
||||
|
||||
pub fn find_scalar_fn(&self, anchor: u32) -> Option<&str> {
|
||||
self.scalar_fn_map.get(&anchor).map(|s| s.as_str())
|
||||
}
|
||||
|
||||
pub fn generate_function_extension(&self) -> Vec<SimpleExtensionDeclaration> {
|
||||
let mut result = Vec::with_capacity(self.scalar_fn_map.len());
|
||||
for (anchor, name) in &self.scalar_fn_map {
|
||||
let declaration = SimpleExtensionDeclaration {
|
||||
mapping_type: Some(MappingType::ExtensionFunction(ExtensionFunction {
|
||||
extension_uri_reference: 0,
|
||||
function_anchor: *anchor,
|
||||
name: name.clone(),
|
||||
})),
|
||||
};
|
||||
result.push(declaration);
|
||||
}
|
||||
result
|
||||
}
|
||||
|
||||
pub(crate) fn set_df_schema(&mut self, schema: DFSchemaRef) {
|
||||
debug_assert!(self.df_schema.is_none());
|
||||
self.df_schema.get_or_insert(schema);
|
||||
}
|
||||
|
||||
pub(crate) fn df_schema(&self) -> Option<&DFSchemaRef> {
|
||||
self.df_schema.as_ref()
|
||||
}
|
||||
}
|
||||
@@ -1,799 +0,0 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::collections::VecDeque;
|
||||
use std::str::FromStr;
|
||||
|
||||
use datafusion::common::Column;
|
||||
use datafusion_expr::expr::Sort;
|
||||
use datafusion_expr::{expr_fn, lit, Between, BinaryExpr, BuiltinScalarFunction, Expr, Operator};
|
||||
use datatypes::schema::Schema;
|
||||
use snafu::{ensure, OptionExt};
|
||||
use substrait_proto::proto::expression::field_reference::ReferenceType as FieldReferenceType;
|
||||
use substrait_proto::proto::expression::reference_segment::{
|
||||
ReferenceType as SegReferenceType, StructField,
|
||||
};
|
||||
use substrait_proto::proto::expression::{
|
||||
FieldReference, Literal, ReferenceSegment, RexType, ScalarFunction,
|
||||
};
|
||||
use substrait_proto::proto::function_argument::ArgType;
|
||||
use substrait_proto::proto::Expression;
|
||||
|
||||
use crate::context::ConvertorContext;
|
||||
use crate::error::{
|
||||
EmptyExprSnafu, InvalidParametersSnafu, MissingFieldSnafu, Result, UnsupportedExprSnafu,
|
||||
};
|
||||
use crate::types::{literal_type_to_scalar_value, scalar_value_as_literal_type};
|
||||
|
||||
/// Convert substrait's `Expression` to DataFusion's `Expr`.
|
||||
pub(crate) fn to_df_expr(
|
||||
ctx: &ConvertorContext,
|
||||
expression: Expression,
|
||||
schema: &Schema,
|
||||
) -> Result<Expr> {
|
||||
let expr_rex_type = expression.rex_type.context(EmptyExprSnafu)?;
|
||||
match expr_rex_type {
|
||||
RexType::Literal(l) => {
|
||||
let t = l.literal_type.context(MissingFieldSnafu {
|
||||
field: "LiteralType",
|
||||
plan: "Literal",
|
||||
})?;
|
||||
let v = literal_type_to_scalar_value(t)?;
|
||||
Ok(lit(v))
|
||||
}
|
||||
RexType::Selection(selection) => convert_selection_rex(*selection, schema),
|
||||
RexType::ScalarFunction(scalar_fn) => convert_scalar_function(ctx, scalar_fn, schema),
|
||||
RexType::WindowFunction(_)
|
||||
| RexType::IfThen(_)
|
||||
| RexType::SwitchExpression(_)
|
||||
| RexType::SingularOrList(_)
|
||||
| RexType::MultiOrList(_)
|
||||
| RexType::Cast(_)
|
||||
| RexType::Subquery(_)
|
||||
| RexType::Nested(_)
|
||||
| RexType::Enum(_) => UnsupportedExprSnafu {
|
||||
name: format!("substrait expression {expr_rex_type:?}"),
|
||||
}
|
||||
.fail()?,
|
||||
}
|
||||
}
|
||||
|
||||
/// Convert Substrait's `FieldReference` - `DirectReference` - `StructField` to Datafusion's
|
||||
/// `Column` expr.
|
||||
pub fn convert_selection_rex(selection: FieldReference, schema: &Schema) -> Result<Expr> {
|
||||
if let Some(FieldReferenceType::DirectReference(direct_ref)) = selection.reference_type
|
||||
&& let Some(SegReferenceType::StructField(field)) = direct_ref.reference_type {
|
||||
let column_name = schema.column_name_by_index(field.field as _).to_string();
|
||||
Ok(Expr::Column(Column {
|
||||
relation: None,
|
||||
name: column_name,
|
||||
}))
|
||||
} else {
|
||||
InvalidParametersSnafu {
|
||||
reason: "Only support direct struct reference in Selection Rex",
|
||||
}
|
||||
.fail()
|
||||
}
|
||||
}
|
||||
|
||||
pub fn convert_scalar_function(
|
||||
ctx: &ConvertorContext,
|
||||
scalar_fn: ScalarFunction,
|
||||
schema: &Schema,
|
||||
) -> Result<Expr> {
|
||||
// convert argument
|
||||
let mut inputs = VecDeque::with_capacity(scalar_fn.arguments.len());
|
||||
for arg in scalar_fn.arguments {
|
||||
if let Some(ArgType::Value(sub_expr)) = arg.arg_type {
|
||||
inputs.push_back(to_df_expr(ctx, sub_expr, schema)?);
|
||||
} else {
|
||||
InvalidParametersSnafu {
|
||||
reason: "Only value expression arg is supported to be function argument",
|
||||
}
|
||||
.fail()?;
|
||||
}
|
||||
}
|
||||
|
||||
// convert this scalar function
|
||||
// map function name
|
||||
let anchor = scalar_fn.function_reference;
|
||||
let fn_name = ctx
|
||||
.find_scalar_fn(anchor)
|
||||
.with_context(|| InvalidParametersSnafu {
|
||||
reason: format!("Unregistered scalar function reference: {anchor}"),
|
||||
})?;
|
||||
|
||||
// convenient util
|
||||
let ensure_arg_len = |expected: usize| -> Result<()> {
|
||||
ensure!(
|
||||
inputs.len() == expected,
|
||||
InvalidParametersSnafu {
|
||||
reason: format!(
|
||||
"Invalid number of scalar function {}, expected {} but found {}",
|
||||
fn_name,
|
||||
expected,
|
||||
inputs.len()
|
||||
)
|
||||
}
|
||||
);
|
||||
Ok(())
|
||||
};
|
||||
|
||||
// construct DataFusion expr
|
||||
let expr = match fn_name {
|
||||
// begin binary exprs, with the same order of DF `Operator`'s definition.
|
||||
"eq" | "equal" => {
|
||||
ensure_arg_len(2)?;
|
||||
inputs.pop_front().unwrap().eq(inputs.pop_front().unwrap())
|
||||
}
|
||||
"not_eq" | "not_equal" => {
|
||||
ensure_arg_len(2)?;
|
||||
inputs
|
||||
.pop_front()
|
||||
.unwrap()
|
||||
.not_eq(inputs.pop_front().unwrap())
|
||||
}
|
||||
"lt" => {
|
||||
ensure_arg_len(2)?;
|
||||
inputs.pop_front().unwrap().lt(inputs.pop_front().unwrap())
|
||||
}
|
||||
"lt_eq" | "lte" => {
|
||||
ensure_arg_len(2)?;
|
||||
inputs
|
||||
.pop_front()
|
||||
.unwrap()
|
||||
.lt_eq(inputs.pop_front().unwrap())
|
||||
}
|
||||
"gt" => {
|
||||
ensure_arg_len(2)?;
|
||||
inputs.pop_front().unwrap().gt(inputs.pop_front().unwrap())
|
||||
}
|
||||
"gt_eq" | "gte" => {
|
||||
ensure_arg_len(2)?;
|
||||
inputs
|
||||
.pop_front()
|
||||
.unwrap()
|
||||
.gt_eq(inputs.pop_front().unwrap())
|
||||
}
|
||||
"plus" => {
|
||||
ensure_arg_len(2)?;
|
||||
expr_fn::binary_expr(
|
||||
inputs.pop_front().unwrap(),
|
||||
Operator::Plus,
|
||||
inputs.pop_front().unwrap(),
|
||||
)
|
||||
}
|
||||
"minus" => {
|
||||
ensure_arg_len(2)?;
|
||||
expr_fn::binary_expr(
|
||||
inputs.pop_front().unwrap(),
|
||||
Operator::Minus,
|
||||
inputs.pop_front().unwrap(),
|
||||
)
|
||||
}
|
||||
"multiply" => {
|
||||
ensure_arg_len(2)?;
|
||||
expr_fn::binary_expr(
|
||||
inputs.pop_front().unwrap(),
|
||||
Operator::Multiply,
|
||||
inputs.pop_front().unwrap(),
|
||||
)
|
||||
}
|
||||
"divide" => {
|
||||
ensure_arg_len(2)?;
|
||||
expr_fn::binary_expr(
|
||||
inputs.pop_front().unwrap(),
|
||||
Operator::Divide,
|
||||
inputs.pop_front().unwrap(),
|
||||
)
|
||||
}
|
||||
"modulo" => {
|
||||
ensure_arg_len(2)?;
|
||||
expr_fn::binary_expr(
|
||||
inputs.pop_front().unwrap(),
|
||||
Operator::Modulo,
|
||||
inputs.pop_front().unwrap(),
|
||||
)
|
||||
}
|
||||
"and" => {
|
||||
ensure_arg_len(2)?;
|
||||
expr_fn::and(inputs.pop_front().unwrap(), inputs.pop_front().unwrap())
|
||||
}
|
||||
"or" => {
|
||||
ensure_arg_len(2)?;
|
||||
expr_fn::or(inputs.pop_front().unwrap(), inputs.pop_front().unwrap())
|
||||
}
|
||||
"like" => {
|
||||
ensure_arg_len(2)?;
|
||||
inputs
|
||||
.pop_front()
|
||||
.unwrap()
|
||||
.like(inputs.pop_front().unwrap())
|
||||
}
|
||||
"not_like" => {
|
||||
ensure_arg_len(2)?;
|
||||
inputs
|
||||
.pop_front()
|
||||
.unwrap()
|
||||
.not_like(inputs.pop_front().unwrap())
|
||||
}
|
||||
"is_distinct_from" => {
|
||||
ensure_arg_len(2)?;
|
||||
expr_fn::binary_expr(
|
||||
inputs.pop_front().unwrap(),
|
||||
Operator::IsDistinctFrom,
|
||||
inputs.pop_front().unwrap(),
|
||||
)
|
||||
}
|
||||
"is_not_distinct_from" => {
|
||||
ensure_arg_len(2)?;
|
||||
expr_fn::binary_expr(
|
||||
inputs.pop_front().unwrap(),
|
||||
Operator::IsNotDistinctFrom,
|
||||
inputs.pop_front().unwrap(),
|
||||
)
|
||||
}
|
||||
"regex_match" => {
|
||||
ensure_arg_len(2)?;
|
||||
expr_fn::binary_expr(
|
||||
inputs.pop_front().unwrap(),
|
||||
Operator::RegexMatch,
|
||||
inputs.pop_front().unwrap(),
|
||||
)
|
||||
}
|
||||
"regex_i_match" => {
|
||||
ensure_arg_len(2)?;
|
||||
expr_fn::binary_expr(
|
||||
inputs.pop_front().unwrap(),
|
||||
Operator::RegexIMatch,
|
||||
inputs.pop_front().unwrap(),
|
||||
)
|
||||
}
|
||||
"regex_not_match" => {
|
||||
ensure_arg_len(2)?;
|
||||
expr_fn::binary_expr(
|
||||
inputs.pop_front().unwrap(),
|
||||
Operator::RegexNotMatch,
|
||||
inputs.pop_front().unwrap(),
|
||||
)
|
||||
}
|
||||
"regex_not_i_match" => {
|
||||
ensure_arg_len(2)?;
|
||||
expr_fn::binary_expr(
|
||||
inputs.pop_front().unwrap(),
|
||||
Operator::RegexNotIMatch,
|
||||
inputs.pop_front().unwrap(),
|
||||
)
|
||||
}
|
||||
"bitwise_and" => {
|
||||
ensure_arg_len(2)?;
|
||||
expr_fn::binary_expr(
|
||||
inputs.pop_front().unwrap(),
|
||||
Operator::BitwiseAnd,
|
||||
inputs.pop_front().unwrap(),
|
||||
)
|
||||
}
|
||||
"bitwise_or" => {
|
||||
ensure_arg_len(2)?;
|
||||
expr_fn::binary_expr(
|
||||
inputs.pop_front().unwrap(),
|
||||
Operator::BitwiseOr,
|
||||
inputs.pop_front().unwrap(),
|
||||
)
|
||||
}
|
||||
// end binary exprs
|
||||
// start other direct expr, with the same order of DF `Expr`'s definition.
|
||||
"not" => {
|
||||
ensure_arg_len(1)?;
|
||||
inputs.pop_front().unwrap().not()
|
||||
}
|
||||
"is_not_null" => {
|
||||
ensure_arg_len(1)?;
|
||||
inputs.pop_front().unwrap().is_not_null()
|
||||
}
|
||||
"is_null" => {
|
||||
ensure_arg_len(1)?;
|
||||
inputs.pop_front().unwrap().is_null()
|
||||
}
|
||||
"negative" => {
|
||||
ensure_arg_len(1)?;
|
||||
Expr::Negative(Box::new(inputs.pop_front().unwrap()))
|
||||
}
|
||||
// skip GetIndexedField, unimplemented.
|
||||
"between" => {
|
||||
ensure_arg_len(3)?;
|
||||
Expr::Between(Between {
|
||||
expr: Box::new(inputs.pop_front().unwrap()),
|
||||
negated: false,
|
||||
low: Box::new(inputs.pop_front().unwrap()),
|
||||
high: Box::new(inputs.pop_front().unwrap()),
|
||||
})
|
||||
}
|
||||
"not_between" => {
|
||||
ensure_arg_len(3)?;
|
||||
Expr::Between(Between {
|
||||
expr: Box::new(inputs.pop_front().unwrap()),
|
||||
negated: true,
|
||||
low: Box::new(inputs.pop_front().unwrap()),
|
||||
high: Box::new(inputs.pop_front().unwrap()),
|
||||
})
|
||||
}
|
||||
// skip Case, is covered in substrait::SwitchExpression.
|
||||
// skip Cast and TryCast, is covered in substrait::Cast.
|
||||
"sort" | "sort_des" => {
|
||||
ensure_arg_len(1)?;
|
||||
Expr::Sort(Sort {
|
||||
expr: Box::new(inputs.pop_front().unwrap()),
|
||||
asc: false,
|
||||
nulls_first: false,
|
||||
})
|
||||
}
|
||||
"sort_asc" => {
|
||||
ensure_arg_len(1)?;
|
||||
Expr::Sort(Sort {
|
||||
expr: Box::new(inputs.pop_front().unwrap()),
|
||||
asc: true,
|
||||
nulls_first: false,
|
||||
})
|
||||
}
|
||||
// those are datafusion built-in "scalar functions".
|
||||
"abs"
|
||||
| "acos"
|
||||
| "asin"
|
||||
| "atan"
|
||||
| "atan2"
|
||||
| "ceil"
|
||||
| "cos"
|
||||
| "exp"
|
||||
| "floor"
|
||||
| "ln"
|
||||
| "log"
|
||||
| "log10"
|
||||
| "log2"
|
||||
| "power"
|
||||
| "pow"
|
||||
| "round"
|
||||
| "signum"
|
||||
| "sin"
|
||||
| "sqrt"
|
||||
| "tan"
|
||||
| "trunc"
|
||||
| "coalesce"
|
||||
| "make_array"
|
||||
| "ascii"
|
||||
| "bit_length"
|
||||
| "btrim"
|
||||
| "char_length"
|
||||
| "character_length"
|
||||
| "concat"
|
||||
| "concat_ws"
|
||||
| "chr"
|
||||
| "current_date"
|
||||
| "current_time"
|
||||
| "date_part"
|
||||
| "datepart"
|
||||
| "date_trunc"
|
||||
| "datetrunc"
|
||||
| "date_bin"
|
||||
| "initcap"
|
||||
| "left"
|
||||
| "length"
|
||||
| "lower"
|
||||
| "lpad"
|
||||
| "ltrim"
|
||||
| "md5"
|
||||
| "nullif"
|
||||
| "octet_length"
|
||||
| "random"
|
||||
| "regexp_replace"
|
||||
| "repeat"
|
||||
| "replace"
|
||||
| "reverse"
|
||||
| "right"
|
||||
| "rpad"
|
||||
| "rtrim"
|
||||
| "sha224"
|
||||
| "sha256"
|
||||
| "sha384"
|
||||
| "sha512"
|
||||
| "digest"
|
||||
| "split_part"
|
||||
| "starts_with"
|
||||
| "strpos"
|
||||
| "substr"
|
||||
| "to_hex"
|
||||
| "to_timestamp"
|
||||
| "to_timestamp_millis"
|
||||
| "to_timestamp_micros"
|
||||
| "to_timestamp_seconds"
|
||||
| "now"
|
||||
| "translate"
|
||||
| "trim"
|
||||
| "upper"
|
||||
| "uuid"
|
||||
| "regexp_match"
|
||||
| "struct"
|
||||
| "from_unixtime"
|
||||
| "arrow_typeof" => Expr::ScalarFunction {
|
||||
fun: BuiltinScalarFunction::from_str(fn_name).unwrap(),
|
||||
args: inputs.into(),
|
||||
},
|
||||
// skip ScalarUDF, unimplemented.
|
||||
// skip AggregateFunction, is covered in substrait::AggregateRel
|
||||
// skip WindowFunction, is covered in substrait WindowFunction
|
||||
// skip AggregateUDF, unimplemented.
|
||||
// skip InList, unimplemented
|
||||
// skip Wildcard, unimplemented.
|
||||
// end other direct expr
|
||||
_ => UnsupportedExprSnafu {
|
||||
name: format!("scalar function {fn_name}"),
|
||||
}
|
||||
.fail()?,
|
||||
};
|
||||
|
||||
Ok(expr)
|
||||
}
|
||||
|
||||
/// Convert DataFusion's `Expr` to substrait's `Expression`
|
||||
pub fn expression_from_df_expr(
|
||||
ctx: &mut ConvertorContext,
|
||||
expr: &Expr,
|
||||
schema: &Schema,
|
||||
) -> Result<Expression> {
|
||||
let expression = match expr {
|
||||
// Don't merge them with other unsupported expr arms to preserve the ordering.
|
||||
Expr::Alias(..) => UnsupportedExprSnafu {
|
||||
name: expr.to_string(),
|
||||
}
|
||||
.fail()?,
|
||||
Expr::Column(column) => {
|
||||
let field_reference = convert_column(column, schema)?;
|
||||
Expression {
|
||||
rex_type: Some(RexType::Selection(Box::new(field_reference))),
|
||||
}
|
||||
}
|
||||
// Don't merge them with other unsupported expr arms to preserve the ordering.
|
||||
Expr::ScalarVariable(..) => UnsupportedExprSnafu {
|
||||
name: expr.to_string(),
|
||||
}
|
||||
.fail()?,
|
||||
Expr::Literal(v) => {
|
||||
let t = scalar_value_as_literal_type(v)?;
|
||||
let l = Literal {
|
||||
nullable: true,
|
||||
type_variation_reference: 0,
|
||||
literal_type: Some(t),
|
||||
};
|
||||
Expression {
|
||||
rex_type: Some(RexType::Literal(l)),
|
||||
}
|
||||
}
|
||||
Expr::BinaryExpr(BinaryExpr { left, op, right }) => {
|
||||
let left = expression_from_df_expr(ctx, left, schema)?;
|
||||
let right = expression_from_df_expr(ctx, right, schema)?;
|
||||
let arguments = utils::expression_to_argument(vec![left, right]);
|
||||
let op_name = utils::name_df_operator(op);
|
||||
let function_reference = ctx.register_scalar_fn(op_name);
|
||||
utils::build_scalar_function_expression(function_reference, arguments)
|
||||
}
|
||||
Expr::Not(e) => {
|
||||
let arg = expression_from_df_expr(ctx, e, schema)?;
|
||||
let arguments = utils::expression_to_argument(vec![arg]);
|
||||
let op_name = "not";
|
||||
let function_reference = ctx.register_scalar_fn(op_name);
|
||||
utils::build_scalar_function_expression(function_reference, arguments)
|
||||
}
|
||||
Expr::IsNotNull(e) => {
|
||||
let arg = expression_from_df_expr(ctx, e, schema)?;
|
||||
let arguments = utils::expression_to_argument(vec![arg]);
|
||||
let op_name = "is_not_null";
|
||||
let function_reference = ctx.register_scalar_fn(op_name);
|
||||
utils::build_scalar_function_expression(function_reference, arguments)
|
||||
}
|
||||
Expr::IsNull(e) => {
|
||||
let arg = expression_from_df_expr(ctx, e, schema)?;
|
||||
let arguments = utils::expression_to_argument(vec![arg]);
|
||||
let op_name = "is_null";
|
||||
let function_reference = ctx.register_scalar_fn(op_name);
|
||||
utils::build_scalar_function_expression(function_reference, arguments)
|
||||
}
|
||||
Expr::Negative(e) => {
|
||||
let arg = expression_from_df_expr(ctx, e, schema)?;
|
||||
let arguments = utils::expression_to_argument(vec![arg]);
|
||||
let op_name = "negative";
|
||||
let function_reference = ctx.register_scalar_fn(op_name);
|
||||
utils::build_scalar_function_expression(function_reference, arguments)
|
||||
}
|
||||
// Don't merge them with other unsupported expr arms to preserve the ordering.
|
||||
Expr::GetIndexedField { .. } => UnsupportedExprSnafu {
|
||||
name: expr.to_string(),
|
||||
}
|
||||
.fail()?,
|
||||
Expr::Between(Between {
|
||||
expr,
|
||||
negated,
|
||||
low,
|
||||
high,
|
||||
}) => {
|
||||
let expr = expression_from_df_expr(ctx, expr, schema)?;
|
||||
let low = expression_from_df_expr(ctx, low, schema)?;
|
||||
let high = expression_from_df_expr(ctx, high, schema)?;
|
||||
let arguments = utils::expression_to_argument(vec![expr, low, high]);
|
||||
let op_name = if *negated { "not_between" } else { "between" };
|
||||
let function_reference = ctx.register_scalar_fn(op_name);
|
||||
utils::build_scalar_function_expression(function_reference, arguments)
|
||||
}
|
||||
// Don't merge them with other unsupported expr arms to preserve the ordering.
|
||||
Expr::Case { .. } | Expr::Cast { .. } | Expr::TryCast { .. } => UnsupportedExprSnafu {
|
||||
name: expr.to_string(),
|
||||
}
|
||||
.fail()?,
|
||||
Expr::Sort(Sort {
|
||||
expr,
|
||||
asc,
|
||||
nulls_first: _,
|
||||
}) => {
|
||||
let expr = expression_from_df_expr(ctx, expr, schema)?;
|
||||
let arguments = utils::expression_to_argument(vec![expr]);
|
||||
let op_name = if *asc { "sort_asc" } else { "sort_des" };
|
||||
let function_reference = ctx.register_scalar_fn(op_name);
|
||||
utils::build_scalar_function_expression(function_reference, arguments)
|
||||
}
|
||||
Expr::ScalarFunction { fun, args } => {
|
||||
let arguments = utils::expression_to_argument(
|
||||
args.iter()
|
||||
.map(|e| expression_from_df_expr(ctx, e, schema))
|
||||
.collect::<Result<Vec<_>>>()?,
|
||||
);
|
||||
let op_name = utils::name_builtin_scalar_function(fun);
|
||||
let function_reference = ctx.register_scalar_fn(op_name);
|
||||
utils::build_scalar_function_expression(function_reference, arguments)
|
||||
}
|
||||
// Don't merge them with other unsupported expr arms to preserve the ordering.
|
||||
Expr::ScalarUDF { .. }
|
||||
| Expr::AggregateFunction { .. }
|
||||
| Expr::WindowFunction { .. }
|
||||
| Expr::AggregateUDF { .. }
|
||||
| Expr::InList { .. }
|
||||
| Expr::Wildcard
|
||||
| Expr::Like(_)
|
||||
| Expr::ILike(_)
|
||||
| Expr::SimilarTo(_)
|
||||
| Expr::IsTrue(_)
|
||||
| Expr::IsFalse(_)
|
||||
| Expr::IsUnknown(_)
|
||||
| Expr::IsNotTrue(_)
|
||||
| Expr::IsNotFalse(_)
|
||||
| Expr::IsNotUnknown(_)
|
||||
| Expr::Exists { .. }
|
||||
| Expr::InSubquery { .. }
|
||||
| Expr::ScalarSubquery(..)
|
||||
| Expr::Placeholder { .. }
|
||||
| Expr::QualifiedWildcard { .. } => todo!(),
|
||||
Expr::GroupingSet(_) | Expr::OuterReferenceColumn(_, _) => UnsupportedExprSnafu {
|
||||
name: expr.to_string(),
|
||||
}
|
||||
.fail()?,
|
||||
};
|
||||
|
||||
Ok(expression)
|
||||
}
|
||||
|
||||
/// Convert DataFusion's `Column` expr into substrait's `FieldReference` -
|
||||
/// `DirectReference` - `StructField`.
|
||||
pub fn convert_column(column: &Column, schema: &Schema) -> Result<FieldReference> {
|
||||
let column_name = &column.name;
|
||||
let field_index =
|
||||
schema
|
||||
.column_index_by_name(column_name)
|
||||
.with_context(|| MissingFieldSnafu {
|
||||
field: format!("{column:?}"),
|
||||
plan: format!("schema: {schema:?}"),
|
||||
})?;
|
||||
|
||||
Ok(FieldReference {
|
||||
reference_type: Some(FieldReferenceType::DirectReference(ReferenceSegment {
|
||||
reference_type: Some(SegReferenceType::StructField(Box::new(StructField {
|
||||
field: field_index as _,
|
||||
child: None,
|
||||
}))),
|
||||
})),
|
||||
root_type: None,
|
||||
})
|
||||
}
|
||||
|
||||
/// Some utils special for this `DataFusion::Expr` and `Substrait::Expression` conversion.
|
||||
mod utils {
|
||||
use datafusion_expr::{BuiltinScalarFunction, Operator};
|
||||
use substrait_proto::proto::expression::{RexType, ScalarFunction};
|
||||
use substrait_proto::proto::function_argument::ArgType;
|
||||
use substrait_proto::proto::{Expression, FunctionArgument};
|
||||
|
||||
pub(crate) fn name_df_operator(op: &Operator) -> &str {
|
||||
match op {
|
||||
Operator::Eq => "equal",
|
||||
Operator::NotEq => "not_equal",
|
||||
Operator::Lt => "lt",
|
||||
Operator::LtEq => "lte",
|
||||
Operator::Gt => "gt",
|
||||
Operator::GtEq => "gte",
|
||||
Operator::Plus => "plus",
|
||||
Operator::Minus => "minus",
|
||||
Operator::Multiply => "multiply",
|
||||
Operator::Divide => "divide",
|
||||
Operator::Modulo => "modulo",
|
||||
Operator::And => "and",
|
||||
Operator::Or => "or",
|
||||
Operator::IsDistinctFrom => "is_distinct_from",
|
||||
Operator::IsNotDistinctFrom => "is_not_distinct_from",
|
||||
Operator::RegexMatch => "regex_match",
|
||||
Operator::RegexIMatch => "regex_i_match",
|
||||
Operator::RegexNotMatch => "regex_not_match",
|
||||
Operator::RegexNotIMatch => "regex_not_i_match",
|
||||
Operator::BitwiseAnd => "bitwise_and",
|
||||
Operator::BitwiseOr => "bitwise_or",
|
||||
Operator::BitwiseXor => "bitwise_xor",
|
||||
Operator::BitwiseShiftRight => "bitwise_shift_right",
|
||||
Operator::BitwiseShiftLeft => "bitwise_shift_left",
|
||||
Operator::StringConcat => "string_concat",
|
||||
}
|
||||
}
|
||||
|
||||
/// Convert list of [Expression] to [FunctionArgument] vector.
|
||||
pub(crate) fn expression_to_argument<I: IntoIterator<Item = Expression>>(
|
||||
expressions: I,
|
||||
) -> Vec<FunctionArgument> {
|
||||
expressions
|
||||
.into_iter()
|
||||
.map(|expr| FunctionArgument {
|
||||
arg_type: Some(ArgType::Value(expr)),
|
||||
})
|
||||
.collect()
|
||||
}
|
||||
|
||||
/// Convenient builder for [Expression]
|
||||
pub(crate) fn build_scalar_function_expression(
|
||||
function_reference: u32,
|
||||
arguments: Vec<FunctionArgument>,
|
||||
) -> Expression {
|
||||
Expression {
|
||||
rex_type: Some(RexType::ScalarFunction(ScalarFunction {
|
||||
function_reference,
|
||||
arguments,
|
||||
output_type: None,
|
||||
..Default::default()
|
||||
})),
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn name_builtin_scalar_function(fun: &BuiltinScalarFunction) -> &str {
|
||||
match fun {
|
||||
BuiltinScalarFunction::Abs => "abs",
|
||||
BuiltinScalarFunction::Acos => "acos",
|
||||
BuiltinScalarFunction::Asin => "asin",
|
||||
BuiltinScalarFunction::Atan => "atan",
|
||||
BuiltinScalarFunction::Ceil => "ceil",
|
||||
BuiltinScalarFunction::Cos => "cos",
|
||||
BuiltinScalarFunction::Digest => "digest",
|
||||
BuiltinScalarFunction::Exp => "exp",
|
||||
BuiltinScalarFunction::Floor => "floor",
|
||||
BuiltinScalarFunction::Ln => "ln",
|
||||
BuiltinScalarFunction::Log => "log",
|
||||
BuiltinScalarFunction::Log10 => "log10",
|
||||
BuiltinScalarFunction::Log2 => "log2",
|
||||
BuiltinScalarFunction::Round => "round",
|
||||
BuiltinScalarFunction::Signum => "signum",
|
||||
BuiltinScalarFunction::Sin => "sin",
|
||||
BuiltinScalarFunction::Sqrt => "sqrt",
|
||||
BuiltinScalarFunction::Tan => "tan",
|
||||
BuiltinScalarFunction::Trunc => "trunc",
|
||||
BuiltinScalarFunction::Ascii => "ascii",
|
||||
BuiltinScalarFunction::BitLength => "bit_length",
|
||||
BuiltinScalarFunction::Btrim => "btrim",
|
||||
BuiltinScalarFunction::CharacterLength => "character_length",
|
||||
BuiltinScalarFunction::Chr => "chr",
|
||||
BuiltinScalarFunction::Concat => "concat",
|
||||
BuiltinScalarFunction::ConcatWithSeparator => "concat_ws",
|
||||
BuiltinScalarFunction::DatePart => "date_part",
|
||||
BuiltinScalarFunction::DateTrunc => "date_trunc",
|
||||
BuiltinScalarFunction::InitCap => "initcap",
|
||||
BuiltinScalarFunction::Left => "left",
|
||||
BuiltinScalarFunction::Lpad => "lpad",
|
||||
BuiltinScalarFunction::Lower => "lower",
|
||||
BuiltinScalarFunction::Ltrim => "ltrim",
|
||||
BuiltinScalarFunction::MD5 => "md5",
|
||||
BuiltinScalarFunction::NullIf => "nullif",
|
||||
BuiltinScalarFunction::OctetLength => "octet_length",
|
||||
BuiltinScalarFunction::Random => "random",
|
||||
BuiltinScalarFunction::RegexpReplace => "regexp_replace",
|
||||
BuiltinScalarFunction::Repeat => "repeat",
|
||||
BuiltinScalarFunction::Replace => "replace",
|
||||
BuiltinScalarFunction::Reverse => "reverse",
|
||||
BuiltinScalarFunction::Right => "right",
|
||||
BuiltinScalarFunction::Rpad => "rpad",
|
||||
BuiltinScalarFunction::Rtrim => "rtrim",
|
||||
BuiltinScalarFunction::SHA224 => "sha224",
|
||||
BuiltinScalarFunction::SHA256 => "sha256",
|
||||
BuiltinScalarFunction::SHA384 => "sha384",
|
||||
BuiltinScalarFunction::SHA512 => "sha512",
|
||||
BuiltinScalarFunction::SplitPart => "split_part",
|
||||
BuiltinScalarFunction::StartsWith => "starts_with",
|
||||
BuiltinScalarFunction::Strpos => "strpos",
|
||||
BuiltinScalarFunction::Substr => "substr",
|
||||
BuiltinScalarFunction::ToHex => "to_hex",
|
||||
BuiltinScalarFunction::ToTimestamp => "to_timestamp",
|
||||
BuiltinScalarFunction::ToTimestampMillis => "to_timestamp_millis",
|
||||
BuiltinScalarFunction::ToTimestampMicros => "to_timestamp_macros",
|
||||
BuiltinScalarFunction::ToTimestampSeconds => "to_timestamp_seconds",
|
||||
BuiltinScalarFunction::Now => "now",
|
||||
BuiltinScalarFunction::Translate => "translate",
|
||||
BuiltinScalarFunction::Trim => "trim",
|
||||
BuiltinScalarFunction::Upper => "upper",
|
||||
BuiltinScalarFunction::RegexpMatch => "regexp_match",
|
||||
BuiltinScalarFunction::Atan2 => "atan2",
|
||||
BuiltinScalarFunction::Coalesce => "coalesce",
|
||||
BuiltinScalarFunction::Power => "power",
|
||||
BuiltinScalarFunction::MakeArray => "make_array",
|
||||
BuiltinScalarFunction::DateBin => "date_bin",
|
||||
BuiltinScalarFunction::FromUnixtime => "from_unixtime",
|
||||
BuiltinScalarFunction::CurrentDate => "current_date",
|
||||
BuiltinScalarFunction::CurrentTime => "current_time",
|
||||
BuiltinScalarFunction::Uuid => "uuid",
|
||||
BuiltinScalarFunction::Struct => "struct",
|
||||
BuiltinScalarFunction::ArrowTypeof => "arrow_type_of",
|
||||
BuiltinScalarFunction::Acosh => "acosh",
|
||||
BuiltinScalarFunction::Asinh => "asinh",
|
||||
BuiltinScalarFunction::Atanh => "atanh",
|
||||
BuiltinScalarFunction::Cbrt => "cbrt",
|
||||
BuiltinScalarFunction::Cosh => "cosh",
|
||||
BuiltinScalarFunction::Pi => "pi",
|
||||
BuiltinScalarFunction::Sinh => "sinh",
|
||||
BuiltinScalarFunction::Tanh => "tanh",
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use datatypes::schema::ColumnSchema;
|
||||
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn expr_round_trip() {
|
||||
let expr = expr_fn::and(
|
||||
expr_fn::col("column_a").lt_eq(expr_fn::col("column_b")),
|
||||
expr_fn::col("column_a").gt(expr_fn::col("column_b")),
|
||||
);
|
||||
|
||||
let schema = Schema::new(vec![
|
||||
ColumnSchema::new(
|
||||
"column_a",
|
||||
datatypes::data_type::ConcreteDataType::int64_datatype(),
|
||||
true,
|
||||
),
|
||||
ColumnSchema::new(
|
||||
"column_b",
|
||||
datatypes::data_type::ConcreteDataType::float64_datatype(),
|
||||
true,
|
||||
),
|
||||
]);
|
||||
|
||||
let mut ctx = ConvertorContext::default();
|
||||
let substrait_expr = expression_from_df_expr(&mut ctx, &expr, &schema).unwrap();
|
||||
let converted_expr = to_df_expr(&ctx, substrait_expr, &schema).unwrap();
|
||||
|
||||
assert_eq!(expr, converted_expr);
|
||||
}
|
||||
}
|
||||
@@ -1,534 +0,0 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::sync::Arc;
|
||||
|
||||
use async_recursion::async_recursion;
|
||||
use async_trait::async_trait;
|
||||
use bytes::{Buf, Bytes};
|
||||
use catalog::table_source::DfTableSourceProvider;
|
||||
use catalog::CatalogManagerRef;
|
||||
use common_catalog::format_full_table_name;
|
||||
use common_telemetry::debug;
|
||||
use datafusion::arrow::datatypes::SchemaRef as ArrowSchemaRef;
|
||||
use datafusion::catalog::catalog::CatalogList;
|
||||
use datafusion::common::{DFField, DFSchema};
|
||||
use datafusion::datasource::DefaultTableSource;
|
||||
use datafusion::physical_plan::project_schema;
|
||||
use datafusion::sql::TableReference;
|
||||
use datafusion_expr::{Filter, LogicalPlan, TableScan};
|
||||
use session::context::QueryContext;
|
||||
use snafu::{ensure, OptionExt, ResultExt};
|
||||
use substrait_proto::proto::expression::mask_expression::{StructItem, StructSelect};
|
||||
use substrait_proto::proto::expression::MaskExpression;
|
||||
use substrait_proto::proto::extensions::simple_extension_declaration::MappingType;
|
||||
use substrait_proto::proto::plan_rel::RelType as PlanRelType;
|
||||
use substrait_proto::proto::read_rel::{NamedTable, ReadType};
|
||||
use substrait_proto::proto::rel::RelType;
|
||||
use substrait_proto::proto::{FilterRel, Plan, PlanRel, ReadRel, Rel};
|
||||
use table::table::adapter::DfTableProviderAdapter;
|
||||
|
||||
use crate::context::ConvertorContext;
|
||||
use crate::df_expr::{expression_from_df_expr, to_df_expr};
|
||||
use crate::error::{
|
||||
self, DFInternalSnafu, EmptyPlanSnafu, Error, InvalidParametersSnafu, MissingFieldSnafu,
|
||||
ResolveTableSnafu, SchemaNotMatchSnafu, UnknownPlanSnafu, UnsupportedExprSnafu,
|
||||
UnsupportedPlanSnafu,
|
||||
};
|
||||
use crate::schema::{from_schema, to_schema};
|
||||
use crate::SubstraitPlan;
|
||||
|
||||
pub struct DFLogicalSubstraitConvertorDeprecated;
|
||||
|
||||
#[async_trait]
|
||||
impl SubstraitPlan for DFLogicalSubstraitConvertorDeprecated {
|
||||
type Error = Error;
|
||||
|
||||
type Plan = LogicalPlan;
|
||||
|
||||
async fn decode<B: Buf + Send>(
|
||||
&self,
|
||||
_message: B,
|
||||
_catalog_list: Arc<dyn CatalogList>,
|
||||
) -> Result<Self::Plan, Self::Error> {
|
||||
unimplemented!()
|
||||
}
|
||||
|
||||
fn encode(&self, plan: Self::Plan) -> Result<Bytes, Self::Error> {
|
||||
unimplemented!()
|
||||
}
|
||||
}
|
||||
|
||||
impl DFLogicalSubstraitConvertorDeprecated {
|
||||
async fn convert_plan(
|
||||
&self,
|
||||
mut plan: Plan,
|
||||
catalog_manager: CatalogManagerRef,
|
||||
) -> Result<LogicalPlan, Error> {
|
||||
// prepare convertor context
|
||||
let mut ctx = ConvertorContext::default();
|
||||
for simple_ext in plan.extensions {
|
||||
if let Some(MappingType::ExtensionFunction(function_extension)) =
|
||||
simple_ext.mapping_type
|
||||
{
|
||||
ctx.register_scalar_with_anchor(
|
||||
function_extension.name,
|
||||
function_extension.function_anchor,
|
||||
);
|
||||
} else {
|
||||
debug!("Encounter unsupported substrait extension {:?}", simple_ext);
|
||||
}
|
||||
}
|
||||
|
||||
// extract rel
|
||||
let rel = if let Some(PlanRel { rel_type }) = plan.relations.pop()
|
||||
&& let Some(PlanRelType::Rel(rel)) = rel_type {
|
||||
rel
|
||||
} else {
|
||||
UnsupportedPlanSnafu {
|
||||
name: "Emply or non-Rel relation",
|
||||
}
|
||||
.fail()?
|
||||
};
|
||||
|
||||
// TODO(LFC): Create table provider from outside, respect "disallow_cross_schema_query" option in query engine state.
|
||||
let mut table_provider =
|
||||
DfTableSourceProvider::new(catalog_manager, false, &QueryContext::new());
|
||||
self.rel_to_logical_plan(&mut ctx, Box::new(rel), &mut table_provider)
|
||||
.await
|
||||
}
|
||||
|
||||
#[async_recursion]
|
||||
async fn rel_to_logical_plan(
|
||||
&self,
|
||||
ctx: &mut ConvertorContext,
|
||||
rel: Box<Rel>,
|
||||
table_provider: &mut DfTableSourceProvider,
|
||||
) -> Result<LogicalPlan, Error> {
|
||||
let rel_type = rel.rel_type.context(EmptyPlanSnafu)?;
|
||||
|
||||
// build logical plan
|
||||
let logical_plan = match rel_type {
|
||||
RelType::Read(read_rel) => self.convert_read_rel(ctx, read_rel, table_provider).await?,
|
||||
RelType::Filter(filter) => {
|
||||
let FilterRel {
|
||||
common: _,
|
||||
input,
|
||||
condition,
|
||||
advanced_extension: _,
|
||||
} = *filter;
|
||||
|
||||
let input = input.context(MissingFieldSnafu {
|
||||
field: "input",
|
||||
plan: "Filter",
|
||||
})?;
|
||||
let input = Arc::new(self.rel_to_logical_plan(ctx, input, table_provider).await?);
|
||||
|
||||
let condition = condition.context(MissingFieldSnafu {
|
||||
field: "condition",
|
||||
plan: "Filter",
|
||||
})?;
|
||||
|
||||
let schema = ctx.df_schema().context(InvalidParametersSnafu {
|
||||
reason: "the underlying TableScan plan should have included a table schema",
|
||||
})?;
|
||||
let schema = schema
|
||||
.clone()
|
||||
.try_into()
|
||||
.context(error::ConvertDfSchemaSnafu)?;
|
||||
let predicate = to_df_expr(ctx, *condition, &schema)?;
|
||||
|
||||
LogicalPlan::Filter(Filter::try_new(predicate, input).context(DFInternalSnafu)?)
|
||||
}
|
||||
RelType::Fetch(_fetch_rel) => UnsupportedPlanSnafu {
|
||||
name: "Fetch Relation",
|
||||
}
|
||||
.fail()?,
|
||||
RelType::Aggregate(_aggr_rel) => UnsupportedPlanSnafu {
|
||||
name: "Fetch Relation",
|
||||
}
|
||||
.fail()?,
|
||||
RelType::Sort(_sort_rel) => UnsupportedPlanSnafu {
|
||||
name: "Sort Relation",
|
||||
}
|
||||
.fail()?,
|
||||
RelType::Join(_join_rel) => UnsupportedPlanSnafu {
|
||||
name: "Join Relation",
|
||||
}
|
||||
.fail()?,
|
||||
RelType::Project(_project_rel) => UnsupportedPlanSnafu {
|
||||
name: "Project Relation",
|
||||
}
|
||||
.fail()?,
|
||||
RelType::Set(_set_rel) => UnsupportedPlanSnafu {
|
||||
name: "Set Relation",
|
||||
}
|
||||
.fail()?,
|
||||
RelType::ExtensionSingle(_ext_single_rel) => UnsupportedPlanSnafu {
|
||||
name: "Extension Single Relation",
|
||||
}
|
||||
.fail()?,
|
||||
RelType::ExtensionMulti(_ext_multi_rel) => UnsupportedPlanSnafu {
|
||||
name: "Extension Multi Relation",
|
||||
}
|
||||
.fail()?,
|
||||
RelType::ExtensionLeaf(_ext_leaf_rel) => UnsupportedPlanSnafu {
|
||||
name: "Extension Leaf Relation",
|
||||
}
|
||||
.fail()?,
|
||||
RelType::Cross(_cross_rel) => UnsupportedPlanSnafu {
|
||||
name: "Cross Relation",
|
||||
}
|
||||
.fail()?,
|
||||
RelType::HashJoin(_) => UnsupportedPlanSnafu {
|
||||
name: "Cross Relation",
|
||||
}
|
||||
.fail()?,
|
||||
RelType::MergeJoin(_) => UnsupportedPlanSnafu {
|
||||
name: "Cross Relation",
|
||||
}
|
||||
.fail()?,
|
||||
};
|
||||
|
||||
Ok(logical_plan)
|
||||
}
|
||||
|
||||
async fn convert_read_rel(
|
||||
&self,
|
||||
ctx: &mut ConvertorContext,
|
||||
read_rel: Box<ReadRel>,
|
||||
table_provider: &mut DfTableSourceProvider,
|
||||
) -> Result<LogicalPlan, Error> {
|
||||
// Extract the catalog, schema and table name from NamedTable. Assume the first three are those names.
|
||||
let read_type = read_rel.read_type.context(MissingFieldSnafu {
|
||||
field: "read_type",
|
||||
plan: "Read",
|
||||
})?;
|
||||
let (table_name, schema_name, catalog_name) = match read_type {
|
||||
ReadType::NamedTable(mut named_table) => {
|
||||
ensure!(
|
||||
named_table.names.len() == 3,
|
||||
InvalidParametersSnafu {
|
||||
reason:
|
||||
"NamedTable should contains three names for catalog, schema and table",
|
||||
}
|
||||
);
|
||||
(
|
||||
named_table.names.pop().unwrap(),
|
||||
named_table.names.pop().unwrap(),
|
||||
named_table.names.pop().unwrap(),
|
||||
)
|
||||
}
|
||||
ReadType::VirtualTable(_) | ReadType::LocalFiles(_) | ReadType::ExtensionTable(_) => {
|
||||
UnsupportedExprSnafu {
|
||||
name: "Non-NamedTable Read",
|
||||
}
|
||||
.fail()?
|
||||
}
|
||||
};
|
||||
|
||||
// Get projection indices
|
||||
let projection = read_rel
|
||||
.projection
|
||||
.map(|mask_expr| self.convert_mask_expression(mask_expr));
|
||||
|
||||
let table_ref = TableReference::full(
|
||||
catalog_name.clone(),
|
||||
schema_name.clone(),
|
||||
table_name.clone(),
|
||||
);
|
||||
let adapter = table_provider
|
||||
.resolve_table(table_ref.clone())
|
||||
.await
|
||||
.with_context(|_| ResolveTableSnafu {
|
||||
table_name: format_full_table_name(&catalog_name, &schema_name, &table_name),
|
||||
})?;
|
||||
|
||||
// Get schema directly from the table, and compare it with the schema retrieved from substrait proto.
|
||||
let stored_schema = adapter.schema();
|
||||
let retrieved_schema = to_schema(read_rel.base_schema.unwrap_or_default())?;
|
||||
let retrieved_arrow_schema = retrieved_schema.arrow_schema();
|
||||
ensure!(
|
||||
same_schema_without_metadata(&stored_schema, retrieved_arrow_schema),
|
||||
SchemaNotMatchSnafu {
|
||||
substrait_schema: retrieved_arrow_schema.clone(),
|
||||
storage_schema: stored_schema
|
||||
}
|
||||
);
|
||||
|
||||
// Convert filter
|
||||
let filters = if let Some(filter) = read_rel.filter {
|
||||
vec![to_df_expr(ctx, *filter, &retrieved_schema)?]
|
||||
} else {
|
||||
vec![]
|
||||
};
|
||||
|
||||
// Calculate the projected schema
|
||||
let projected_schema = Arc::new(
|
||||
project_schema(&stored_schema, projection.as_ref())
|
||||
.and_then(|x| {
|
||||
DFSchema::new_with_metadata(
|
||||
x.fields()
|
||||
.iter()
|
||||
.map(|f| DFField::from_qualified(table_ref.clone(), f.clone()))
|
||||
.collect(),
|
||||
x.metadata().clone(),
|
||||
)
|
||||
})
|
||||
.context(DFInternalSnafu)?,
|
||||
);
|
||||
|
||||
ctx.set_df_schema(projected_schema.clone());
|
||||
|
||||
// TODO(ruihang): Support limit(fetch)
|
||||
Ok(LogicalPlan::TableScan(TableScan {
|
||||
table_name: table_ref,
|
||||
source: adapter,
|
||||
projection,
|
||||
projected_schema,
|
||||
filters,
|
||||
fetch: None,
|
||||
}))
|
||||
}
|
||||
|
||||
fn convert_mask_expression(&self, mask_expression: MaskExpression) -> Vec<usize> {
|
||||
mask_expression
|
||||
.select
|
||||
.unwrap_or_default()
|
||||
.struct_items
|
||||
.into_iter()
|
||||
.map(|select| select.field as _)
|
||||
.collect()
|
||||
}
|
||||
}
|
||||
|
||||
impl DFLogicalSubstraitConvertorDeprecated {
|
||||
fn logical_plan_to_rel(
|
||||
&self,
|
||||
ctx: &mut ConvertorContext,
|
||||
plan: Arc<LogicalPlan>,
|
||||
) -> Result<Rel, Error> {
|
||||
Ok(match &*plan {
|
||||
LogicalPlan::Projection(_) => UnsupportedPlanSnafu {
|
||||
name: "DataFusion Logical Projection",
|
||||
}
|
||||
.fail()?,
|
||||
LogicalPlan::Filter(filter) => {
|
||||
let input = Some(Box::new(
|
||||
self.logical_plan_to_rel(ctx, filter.input.clone())?,
|
||||
));
|
||||
|
||||
let schema = plan
|
||||
.schema()
|
||||
.clone()
|
||||
.try_into()
|
||||
.context(error::ConvertDfSchemaSnafu)?;
|
||||
let condition = Some(Box::new(expression_from_df_expr(
|
||||
ctx,
|
||||
&filter.predicate,
|
||||
&schema,
|
||||
)?));
|
||||
|
||||
let rel = FilterRel {
|
||||
common: None,
|
||||
input,
|
||||
condition,
|
||||
advanced_extension: None,
|
||||
};
|
||||
Rel {
|
||||
rel_type: Some(RelType::Filter(Box::new(rel))),
|
||||
}
|
||||
}
|
||||
LogicalPlan::Window(_) => UnsupportedPlanSnafu {
|
||||
name: "DataFusion Logical Window",
|
||||
}
|
||||
.fail()?,
|
||||
LogicalPlan::Aggregate(_) => UnsupportedPlanSnafu {
|
||||
name: "DataFusion Logical Aggregate",
|
||||
}
|
||||
.fail()?,
|
||||
LogicalPlan::Sort(_) => UnsupportedPlanSnafu {
|
||||
name: "DataFusion Logical Sort",
|
||||
}
|
||||
.fail()?,
|
||||
LogicalPlan::Join(_) => UnsupportedPlanSnafu {
|
||||
name: "DataFusion Logical Join",
|
||||
}
|
||||
.fail()?,
|
||||
LogicalPlan::CrossJoin(_) => UnsupportedPlanSnafu {
|
||||
name: "DataFusion Logical CrossJoin",
|
||||
}
|
||||
.fail()?,
|
||||
LogicalPlan::Repartition(_) => UnsupportedPlanSnafu {
|
||||
name: "DataFusion Logical Repartition",
|
||||
}
|
||||
.fail()?,
|
||||
LogicalPlan::Union(_) => UnsupportedPlanSnafu {
|
||||
name: "DataFusion Logical Union",
|
||||
}
|
||||
.fail()?,
|
||||
LogicalPlan::TableScan(table_scan) => {
|
||||
let read_rel = self.convert_table_scan_plan(ctx, table_scan)?;
|
||||
Rel {
|
||||
rel_type: Some(RelType::Read(Box::new(read_rel))),
|
||||
}
|
||||
}
|
||||
LogicalPlan::EmptyRelation(_) => UnsupportedPlanSnafu {
|
||||
name: "DataFusion Logical EmptyRelation",
|
||||
}
|
||||
.fail()?,
|
||||
LogicalPlan::Limit(_) => UnsupportedPlanSnafu {
|
||||
name: "DataFusion Logical Limit",
|
||||
}
|
||||
.fail()?,
|
||||
|
||||
LogicalPlan::Subquery(_)
|
||||
| LogicalPlan::SubqueryAlias(_)
|
||||
| LogicalPlan::CreateView(_)
|
||||
| LogicalPlan::CreateCatalogSchema(_)
|
||||
| LogicalPlan::CreateCatalog(_)
|
||||
| LogicalPlan::DropView(_)
|
||||
| LogicalPlan::Distinct(_)
|
||||
| LogicalPlan::CreateExternalTable(_)
|
||||
| LogicalPlan::CreateMemoryTable(_)
|
||||
| LogicalPlan::DropTable(_)
|
||||
| LogicalPlan::Values(_)
|
||||
| LogicalPlan::Explain(_)
|
||||
| LogicalPlan::Analyze(_)
|
||||
| LogicalPlan::Extension(_)
|
||||
| LogicalPlan::Prepare(_)
|
||||
| LogicalPlan::Dml(_)
|
||||
| LogicalPlan::DescribeTable(_)
|
||||
| LogicalPlan::Unnest(_)
|
||||
| LogicalPlan::Statement(_) => InvalidParametersSnafu {
|
||||
reason: format!(
|
||||
"Trying to convert DDL/DML plan to substrait proto, plan: {plan:?}",
|
||||
),
|
||||
}
|
||||
.fail()?,
|
||||
})
|
||||
}
|
||||
|
||||
fn convert_df_plan(&self, plan: LogicalPlan) -> Result<Plan, Error> {
|
||||
let mut ctx = ConvertorContext::default();
|
||||
|
||||
let rel = self.logical_plan_to_rel(&mut ctx, Arc::new(plan))?;
|
||||
|
||||
// convert extension
|
||||
let extensions = ctx.generate_function_extension();
|
||||
|
||||
// assemble PlanRel
|
||||
let plan_rel = PlanRel {
|
||||
rel_type: Some(PlanRelType::Rel(rel)),
|
||||
};
|
||||
|
||||
Ok(Plan {
|
||||
extension_uris: vec![],
|
||||
extensions,
|
||||
relations: vec![plan_rel],
|
||||
advanced_extensions: None,
|
||||
expected_type_urls: vec![],
|
||||
..Default::default()
|
||||
})
|
||||
}
|
||||
|
||||
pub fn convert_table_scan_plan(
|
||||
&self,
|
||||
ctx: &mut ConvertorContext,
|
||||
table_scan: &TableScan,
|
||||
) -> Result<ReadRel, Error> {
|
||||
let provider = table_scan
|
||||
.source
|
||||
.as_any()
|
||||
.downcast_ref::<DefaultTableSource>()
|
||||
.context(UnknownPlanSnafu)?
|
||||
.table_provider
|
||||
.as_any()
|
||||
.downcast_ref::<DfTableProviderAdapter>()
|
||||
.context(UnknownPlanSnafu)?;
|
||||
let table_info = provider.table().table_info();
|
||||
|
||||
// assemble NamedTable and ReadType
|
||||
let catalog_name = table_info.catalog_name.clone();
|
||||
let schema_name = table_info.schema_name.clone();
|
||||
let table_name = table_info.name.clone();
|
||||
let named_table = NamedTable {
|
||||
names: vec![catalog_name, schema_name, table_name],
|
||||
advanced_extension: None,
|
||||
};
|
||||
let read_type = ReadType::NamedTable(named_table);
|
||||
|
||||
// assemble projection
|
||||
let projection = table_scan
|
||||
.projection
|
||||
.as_ref()
|
||||
.map(|x| self.convert_schema_projection(x));
|
||||
|
||||
// assemble base (unprojected) schema using Table's schema.
|
||||
let base_schema = from_schema(&provider.table().schema())?;
|
||||
|
||||
// make conjunction over a list of filters and convert the result to substrait
|
||||
let filter = if let Some(conjunction) = table_scan
|
||||
.filters
|
||||
.iter()
|
||||
.cloned()
|
||||
.reduce(|accum, expr| accum.and(expr))
|
||||
{
|
||||
Some(Box::new(expression_from_df_expr(
|
||||
ctx,
|
||||
&conjunction,
|
||||
&provider.table().schema(),
|
||||
)?))
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
let read_rel = ReadRel {
|
||||
common: None,
|
||||
base_schema: Some(base_schema),
|
||||
filter,
|
||||
projection,
|
||||
advanced_extension: None,
|
||||
read_type: Some(read_type),
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
Ok(read_rel)
|
||||
}
|
||||
|
||||
/// Convert a index-based schema projection to substrait's [MaskExpression].
|
||||
fn convert_schema_projection(&self, projections: &[usize]) -> MaskExpression {
|
||||
let struct_items = projections
|
||||
.iter()
|
||||
.map(|index| StructItem {
|
||||
field: *index as i32,
|
||||
child: None,
|
||||
})
|
||||
.collect();
|
||||
MaskExpression {
|
||||
select: Some(StructSelect { struct_items }),
|
||||
// TODO(ruihang): this field is unspecified
|
||||
maintain_singular_struct: true,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn same_schema_without_metadata(lhs: &ArrowSchemaRef, rhs: &ArrowSchemaRef) -> bool {
|
||||
lhs.fields.len() == rhs.fields.len()
|
||||
&& lhs.fields.iter().zip(rhs.fields.iter()).all(|(x, y)| {
|
||||
x.name() == y.name()
|
||||
&& x.data_type() == y.data_type()
|
||||
&& x.is_nullable() == y.is_nullable()
|
||||
})
|
||||
}
|
||||
@@ -52,8 +52,9 @@ impl SubstraitPlan for DFLogicalSubstraitConvertor {
|
||||
|
||||
fn encode(&self, plan: Self::Plan) -> Result<Bytes, Self::Error> {
|
||||
let mut buf = BytesMut::new();
|
||||
let context = SessionContext::new();
|
||||
|
||||
let substrait_plan = to_substrait_plan(&plan).context(EncodeDfPlanSnafu)?;
|
||||
let substrait_plan = to_substrait_plan(&plan, &context).context(EncodeDfPlanSnafu)?;
|
||||
substrait_plan.encode(&mut buf).context(EncodeRelSnafu)?;
|
||||
|
||||
Ok(buf.freeze())
|
||||
|
||||
@@ -15,14 +15,8 @@
|
||||
#![feature(let_chains)]
|
||||
#![feature(trait_upcasting)]
|
||||
|
||||
mod context;
|
||||
mod df_expr;
|
||||
#[allow(unused)]
|
||||
mod df_logical;
|
||||
mod df_substrait;
|
||||
pub mod error;
|
||||
mod schema;
|
||||
mod types;
|
||||
|
||||
use std::sync::Arc;
|
||||
|
||||
|
||||
@@ -1,111 +0,0 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use datatypes::schema::{ColumnSchema, Schema};
|
||||
use substrait_proto::proto::r#type::{Nullability, Struct as SubstraitStruct};
|
||||
use substrait_proto::proto::NamedStruct;
|
||||
|
||||
use crate::error::Result;
|
||||
use crate::types::{from_concrete_type, to_concrete_type};
|
||||
|
||||
pub fn to_schema(named_struct: NamedStruct) -> Result<Schema> {
|
||||
if named_struct.r#struct.is_none() {
|
||||
return Ok(Schema::new(vec![]));
|
||||
}
|
||||
|
||||
let column_schemas = named_struct
|
||||
.r#struct
|
||||
.unwrap()
|
||||
.types
|
||||
.into_iter()
|
||||
.zip(named_struct.names.into_iter())
|
||||
.map(|(ty, name)| {
|
||||
let (concrete_type, is_nullable) = to_concrete_type(&ty)?;
|
||||
let column_schema = ColumnSchema::new(name, concrete_type, is_nullable);
|
||||
Ok(column_schema)
|
||||
})
|
||||
.collect::<Result<_>>()?;
|
||||
|
||||
Ok(Schema::new(column_schemas))
|
||||
}
|
||||
|
||||
pub fn from_schema(schema: &Schema) -> Result<NamedStruct> {
|
||||
let mut names = Vec::with_capacity(schema.num_columns());
|
||||
let mut types = Vec::with_capacity(schema.num_columns());
|
||||
|
||||
for column_schema in schema.column_schemas() {
|
||||
names.push(column_schema.name.clone());
|
||||
let substrait_type = from_concrete_type(
|
||||
column_schema.data_type.clone(),
|
||||
Some(column_schema.is_nullable()),
|
||||
)?;
|
||||
types.push(substrait_type);
|
||||
}
|
||||
|
||||
// TODO(ruihang): `type_variation_reference` and `nullability` are unspecified.
|
||||
let substrait_struct = SubstraitStruct {
|
||||
types,
|
||||
type_variation_reference: 0,
|
||||
nullability: Nullability::Unspecified as _,
|
||||
};
|
||||
|
||||
Ok(NamedStruct {
|
||||
names,
|
||||
r#struct: Some(substrait_struct),
|
||||
})
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
pub(crate) mod test {
|
||||
use datatypes::prelude::{ConcreteDataType, DataType};
|
||||
|
||||
use super::*;
|
||||
|
||||
pub(crate) fn supported_types() -> Vec<ColumnSchema> {
|
||||
[
|
||||
ConcreteDataType::null_datatype(),
|
||||
ConcreteDataType::boolean_datatype(),
|
||||
ConcreteDataType::int8_datatype(),
|
||||
ConcreteDataType::int16_datatype(),
|
||||
ConcreteDataType::int32_datatype(),
|
||||
ConcreteDataType::int64_datatype(),
|
||||
ConcreteDataType::uint8_datatype(),
|
||||
ConcreteDataType::uint16_datatype(),
|
||||
ConcreteDataType::uint32_datatype(),
|
||||
ConcreteDataType::uint64_datatype(),
|
||||
ConcreteDataType::float32_datatype(),
|
||||
ConcreteDataType::float64_datatype(),
|
||||
ConcreteDataType::binary_datatype(),
|
||||
ConcreteDataType::string_datatype(),
|
||||
ConcreteDataType::date_datatype(),
|
||||
ConcreteDataType::timestamp_datatype(Default::default()),
|
||||
// TODO(ruihang): DateTime and List type are not supported now
|
||||
]
|
||||
.into_iter()
|
||||
.enumerate()
|
||||
.map(|(ordinal, ty)| ColumnSchema::new(ty.name().to_string(), ty, ordinal % 2 == 0))
|
||||
.collect()
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn supported_types_round_trip() {
|
||||
let column_schemas = supported_types();
|
||||
let schema = Schema::new(column_schemas);
|
||||
|
||||
let named_struct = from_schema(&schema).unwrap();
|
||||
let converted_schema = to_schema(named_struct).unwrap();
|
||||
|
||||
assert_eq!(schema, converted_schema);
|
||||
}
|
||||
}
|
||||
@@ -1,225 +0,0 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
//! Methods that perform conversion between Substrait's type ([Type](SType)) and GreptimeDB's type ([ConcreteDataType]).
|
||||
//!
|
||||
//! Substrait use [type variation](https://substrait.io/types/type_variations/) to express different "logical types".
|
||||
//! Current we only have variations on integer types. Variation 0 (system preferred) are the same with base types, which
|
||||
//! are signed integer (i.e. I8 -> [i8]), and Variation 1 stands for unsigned integer (i.e. I8 -> [u8]).
|
||||
|
||||
use datafusion::scalar::ScalarValue;
|
||||
use datatypes::prelude::ConcreteDataType;
|
||||
use datatypes::types::TimestampType;
|
||||
use substrait_proto::proto::expression::literal::LiteralType;
|
||||
use substrait_proto::proto::r#type::{self as s_type, Kind, Nullability};
|
||||
use substrait_proto::proto::{Type as SType, Type};
|
||||
|
||||
use crate::error::{self, Result, UnsupportedConcreteTypeSnafu, UnsupportedSubstraitTypeSnafu};
|
||||
|
||||
macro_rules! substrait_kind {
|
||||
($desc:ident, $concrete_ty:ident) => {{
|
||||
let nullable = $desc.nullability() == Nullability::Nullable;
|
||||
let ty = ConcreteDataType::$concrete_ty();
|
||||
Ok((ty, nullable))
|
||||
}};
|
||||
|
||||
($desc:ident, $concrete_ty:expr) => {{
|
||||
let nullable = $desc.nullability() == Nullability::Nullable;
|
||||
Ok(($concrete_ty, nullable))
|
||||
}};
|
||||
|
||||
($desc:ident, $concrete_ty_0:ident, $concrete_ty_1:ident) => {{
|
||||
let nullable = $desc.nullability() == Nullability::Nullable;
|
||||
let ty = match $desc.type_variation_reference {
|
||||
0 => ConcreteDataType::$concrete_ty_0(),
|
||||
1 => ConcreteDataType::$concrete_ty_1(),
|
||||
_ => UnsupportedSubstraitTypeSnafu {
|
||||
ty: format!("{:?}", $desc),
|
||||
}
|
||||
.fail()?,
|
||||
};
|
||||
Ok((ty, nullable))
|
||||
}};
|
||||
}
|
||||
|
||||
/// Convert Substrait [Type](SType) to GreptimeDB's [ConcreteDataType]. The bool in return
|
||||
/// tuple is the nullability identifier.
|
||||
pub fn to_concrete_type(ty: &SType) -> Result<(ConcreteDataType, bool)> {
|
||||
if ty.kind.is_none() {
|
||||
return Ok((ConcreteDataType::null_datatype(), true));
|
||||
}
|
||||
let kind = ty.kind.as_ref().unwrap();
|
||||
match kind {
|
||||
Kind::Bool(desc) => substrait_kind!(desc, boolean_datatype),
|
||||
Kind::I8(desc) => substrait_kind!(desc, int8_datatype, uint8_datatype),
|
||||
Kind::I16(desc) => substrait_kind!(desc, int16_datatype, uint16_datatype),
|
||||
Kind::I32(desc) => substrait_kind!(desc, int32_datatype, uint32_datatype),
|
||||
Kind::I64(desc) => substrait_kind!(desc, int64_datatype, uint64_datatype),
|
||||
Kind::Fp32(desc) => substrait_kind!(desc, float32_datatype),
|
||||
Kind::Fp64(desc) => substrait_kind!(desc, float64_datatype),
|
||||
Kind::String(desc) => substrait_kind!(desc, string_datatype),
|
||||
Kind::Binary(desc) => substrait_kind!(desc, binary_datatype),
|
||||
Kind::Timestamp(desc) => substrait_kind!(
|
||||
desc,
|
||||
ConcreteDataType::timestamp_datatype(
|
||||
TimestampType::try_from(desc.type_variation_reference as u64)
|
||||
.map_err(|_| UnsupportedSubstraitTypeSnafu {
|
||||
ty: format!("{kind:?}")
|
||||
}
|
||||
.build())?
|
||||
.unit()
|
||||
)
|
||||
),
|
||||
Kind::Date(desc) => substrait_kind!(desc, date_datatype),
|
||||
Kind::Time(_)
|
||||
| Kind::IntervalYear(_)
|
||||
| Kind::IntervalDay(_)
|
||||
| Kind::TimestampTz(_)
|
||||
| Kind::Uuid(_)
|
||||
| Kind::FixedChar(_)
|
||||
| Kind::Varchar(_)
|
||||
| Kind::FixedBinary(_)
|
||||
| Kind::Decimal(_)
|
||||
| Kind::Struct(_)
|
||||
| Kind::List(_)
|
||||
| Kind::Map(_)
|
||||
| Kind::UserDefined(_)
|
||||
| Kind::UserDefinedTypeReference(_) => UnsupportedSubstraitTypeSnafu {
|
||||
ty: format!("{kind:?}"),
|
||||
}
|
||||
.fail(),
|
||||
}
|
||||
}
|
||||
|
||||
macro_rules! build_substrait_kind {
|
||||
($kind:ident,$s_type:ident,$nullable:ident,$variation:expr) => {{
|
||||
let nullability = match $nullable {
|
||||
Some(true) => Nullability::Nullable,
|
||||
Some(false) => Nullability::Required,
|
||||
None => Nullability::Unspecified,
|
||||
} as _;
|
||||
Some(Kind::$kind(s_type::$s_type {
|
||||
type_variation_reference: $variation,
|
||||
nullability,
|
||||
}))
|
||||
}};
|
||||
}
|
||||
|
||||
/// Convert GreptimeDB's [ConcreteDataType] to Substrait [Type](SType).
|
||||
///
|
||||
/// Refer to [mod level documentation](super::types) for more information about type variation.
|
||||
pub fn from_concrete_type(ty: ConcreteDataType, nullability: Option<bool>) -> Result<SType> {
|
||||
let kind = match ty {
|
||||
ConcreteDataType::Null(_) => None,
|
||||
ConcreteDataType::Boolean(_) => build_substrait_kind!(Bool, Boolean, nullability, 0),
|
||||
ConcreteDataType::Int8(_) => build_substrait_kind!(I8, I8, nullability, 0),
|
||||
ConcreteDataType::Int16(_) => build_substrait_kind!(I16, I16, nullability, 0),
|
||||
ConcreteDataType::Int32(_) => build_substrait_kind!(I32, I32, nullability, 0),
|
||||
ConcreteDataType::Int64(_) => build_substrait_kind!(I64, I64, nullability, 0),
|
||||
ConcreteDataType::UInt8(_) => build_substrait_kind!(I8, I8, nullability, 1),
|
||||
ConcreteDataType::UInt16(_) => build_substrait_kind!(I16, I16, nullability, 1),
|
||||
ConcreteDataType::UInt32(_) => build_substrait_kind!(I32, I32, nullability, 1),
|
||||
ConcreteDataType::UInt64(_) => build_substrait_kind!(I64, I64, nullability, 1),
|
||||
ConcreteDataType::Float32(_) => build_substrait_kind!(Fp32, Fp32, nullability, 0),
|
||||
ConcreteDataType::Float64(_) => build_substrait_kind!(Fp64, Fp64, nullability, 0),
|
||||
ConcreteDataType::Binary(_) => build_substrait_kind!(Binary, Binary, nullability, 0),
|
||||
ConcreteDataType::String(_) => build_substrait_kind!(String, String, nullability, 0),
|
||||
ConcreteDataType::Date(_) => build_substrait_kind!(Date, Date, nullability, 0),
|
||||
ConcreteDataType::DateTime(_) => UnsupportedConcreteTypeSnafu { ty }.fail()?,
|
||||
ConcreteDataType::Timestamp(ty) => {
|
||||
build_substrait_kind!(Timestamp, Timestamp, nullability, ty.precision() as u32)
|
||||
}
|
||||
ConcreteDataType::List(_) | ConcreteDataType::Dictionary(_) => {
|
||||
UnsupportedConcreteTypeSnafu { ty }.fail()?
|
||||
}
|
||||
};
|
||||
|
||||
Ok(SType { kind })
|
||||
}
|
||||
|
||||
pub(crate) fn scalar_value_as_literal_type(v: &ScalarValue) -> Result<LiteralType> {
|
||||
Ok(if v.is_null() {
|
||||
LiteralType::Null(Type { kind: None })
|
||||
} else {
|
||||
match v {
|
||||
ScalarValue::Boolean(Some(v)) => LiteralType::Boolean(*v),
|
||||
ScalarValue::Float32(Some(v)) => LiteralType::Fp32(*v),
|
||||
ScalarValue::Float64(Some(v)) => LiteralType::Fp64(*v),
|
||||
ScalarValue::Int8(Some(v)) => LiteralType::I8(*v as i32),
|
||||
ScalarValue::Int16(Some(v)) => LiteralType::I16(*v as i32),
|
||||
ScalarValue::Int32(Some(v)) => LiteralType::I32(*v),
|
||||
ScalarValue::Int64(Some(v)) => LiteralType::I64(*v),
|
||||
ScalarValue::LargeUtf8(Some(v)) => LiteralType::String(v.clone()),
|
||||
ScalarValue::LargeBinary(Some(v)) => LiteralType::Binary(v.clone()),
|
||||
ScalarValue::TimestampSecond(Some(seconds), _) => {
|
||||
LiteralType::Timestamp(*seconds * 1_000_000)
|
||||
}
|
||||
ScalarValue::TimestampMillisecond(Some(millis), _) => {
|
||||
LiteralType::Timestamp(*millis * 1000)
|
||||
}
|
||||
ScalarValue::TimestampMicrosecond(Some(micros), _) => LiteralType::Timestamp(*micros),
|
||||
ScalarValue::TimestampNanosecond(Some(nanos), _) => {
|
||||
LiteralType::Timestamp(*nanos / 1000)
|
||||
}
|
||||
ScalarValue::Utf8(Some(s)) => LiteralType::String(s.clone()),
|
||||
// TODO(LFC): Implement other conversions: ScalarValue => LiteralType
|
||||
_ => {
|
||||
return error::UnsupportedExprSnafu {
|
||||
name: format!("ScalarValue: {v:?}"),
|
||||
}
|
||||
.fail()
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
pub(crate) fn literal_type_to_scalar_value(t: LiteralType) -> Result<ScalarValue> {
|
||||
Ok(match t {
|
||||
LiteralType::Null(Type { kind: Some(kind) }) => match kind {
|
||||
Kind::Bool(_) => ScalarValue::Boolean(None),
|
||||
Kind::I8(_) => ScalarValue::Int8(None),
|
||||
Kind::I16(_) => ScalarValue::Int16(None),
|
||||
Kind::I32(_) => ScalarValue::Int32(None),
|
||||
Kind::I64(_) => ScalarValue::Int64(None),
|
||||
Kind::Fp32(_) => ScalarValue::Float32(None),
|
||||
Kind::Fp64(_) => ScalarValue::Float64(None),
|
||||
Kind::String(_) => ScalarValue::LargeUtf8(None),
|
||||
Kind::Binary(_) => ScalarValue::LargeBinary(None),
|
||||
// TODO(LFC): Implement other conversions: Kind => ScalarValue
|
||||
_ => {
|
||||
return error::UnsupportedSubstraitTypeSnafu {
|
||||
ty: format!("{kind:?}"),
|
||||
}
|
||||
.fail()
|
||||
}
|
||||
},
|
||||
LiteralType::Boolean(v) => ScalarValue::Boolean(Some(v)),
|
||||
LiteralType::I8(v) => ScalarValue::Int8(Some(v as i8)),
|
||||
LiteralType::I16(v) => ScalarValue::Int16(Some(v as i16)),
|
||||
LiteralType::I32(v) => ScalarValue::Int32(Some(v)),
|
||||
LiteralType::I64(v) => ScalarValue::Int64(Some(v)),
|
||||
LiteralType::Fp32(v) => ScalarValue::Float32(Some(v)),
|
||||
LiteralType::Fp64(v) => ScalarValue::Float64(Some(v)),
|
||||
LiteralType::String(v) => ScalarValue::LargeUtf8(Some(v)),
|
||||
LiteralType::Binary(v) => ScalarValue::LargeBinary(Some(v)),
|
||||
LiteralType::Timestamp(v) => ScalarValue::TimestampMicrosecond(Some(v), None),
|
||||
// TODO(LFC): Implement other conversions: LiteralType => ScalarValue
|
||||
_ => {
|
||||
return error::UnsupportedSubstraitTypeSnafu {
|
||||
ty: format!("{t:?}"),
|
||||
}
|
||||
.fail()
|
||||
}
|
||||
})
|
||||
}
|
||||
@@ -433,8 +433,7 @@ impl NullBufferBuilder {
|
||||
/// Builds the null buffer and resets the builder.
|
||||
/// Returns `None` if the builder only contains `true`s.
|
||||
fn finish(&mut self) -> Option<Buffer> {
|
||||
let buf = self.bitmap_builder.as_mut().map(|b| b.finish());
|
||||
self.bitmap_builder = None;
|
||||
let buf = self.bitmap_builder.take().map(Into::into);
|
||||
self.len = 0;
|
||||
buf
|
||||
}
|
||||
|
||||
@@ -21,11 +21,11 @@ use async_recursion::async_recursion;
|
||||
use catalog::table_source::DfTableSourceProvider;
|
||||
use datafusion::common::{DFSchemaRef, OwnedTableReference, Result as DfResult};
|
||||
use datafusion::datasource::DefaultTableSource;
|
||||
use datafusion::logical_expr::expr::AggregateFunction;
|
||||
use datafusion::logical_expr::expr::{AggregateFunction, ScalarFunction, ScalarUDF};
|
||||
use datafusion::logical_expr::expr_rewriter::normalize_cols;
|
||||
use datafusion::logical_expr::{
|
||||
AggregateFunction as AggregateFunctionEnum, BinaryExpr, BuiltinScalarFunction, Cast, Extension,
|
||||
LogicalPlan, LogicalPlanBuilder, Operator, ScalarUDF,
|
||||
LogicalPlan, LogicalPlanBuilder, Operator, ScalarUDF as ScalarUdfDef,
|
||||
};
|
||||
use datafusion::optimizer::utils;
|
||||
use datafusion::prelude as df_prelude;
|
||||
@@ -927,10 +927,10 @@ impl PromPlanner {
|
||||
match scalar_func.clone() {
|
||||
ScalarFunc::DataFusionBuiltin(fun) => {
|
||||
other_input_exprs.insert(field_column_pos, col_expr);
|
||||
let fn_expr = DfExpr::ScalarFunction {
|
||||
let fn_expr = DfExpr::ScalarFunction(ScalarFunction {
|
||||
fun,
|
||||
args: other_input_exprs.clone(),
|
||||
};
|
||||
});
|
||||
exprs.push(fn_expr);
|
||||
other_input_exprs.remove(field_column_pos);
|
||||
}
|
||||
@@ -942,10 +942,10 @@ impl PromPlanner {
|
||||
));
|
||||
other_input_exprs.insert(field_column_pos, ts_range_expr);
|
||||
other_input_exprs.insert(field_column_pos + 1, col_expr);
|
||||
let fn_expr = DfExpr::ScalarUDF {
|
||||
let fn_expr = DfExpr::ScalarUDF(ScalarUDF {
|
||||
fun: Arc::new(fun),
|
||||
args: other_input_exprs.clone(),
|
||||
};
|
||||
});
|
||||
exprs.push(fn_expr);
|
||||
other_input_exprs.remove(field_column_pos + 1);
|
||||
other_input_exprs.remove(field_column_pos);
|
||||
@@ -960,10 +960,10 @@ impl PromPlanner {
|
||||
other_input_exprs.insert(field_column_pos + 1, col_expr);
|
||||
other_input_exprs
|
||||
.insert(field_column_pos + 2, self.create_time_index_column_expr()?);
|
||||
let fn_expr = DfExpr::ScalarUDF {
|
||||
let fn_expr = DfExpr::ScalarUDF(ScalarUDF {
|
||||
fun: Arc::new(fun),
|
||||
args: other_input_exprs.clone(),
|
||||
};
|
||||
});
|
||||
exprs.push(fn_expr);
|
||||
other_input_exprs.remove(field_column_pos + 2);
|
||||
other_input_exprs.remove(field_column_pos + 1);
|
||||
@@ -1069,6 +1069,7 @@ impl PromPlanner {
|
||||
args: vec![DfExpr::Column(Column::from_name(col))],
|
||||
distinct: false,
|
||||
filter: None,
|
||||
order_by: None,
|
||||
})
|
||||
})
|
||||
.collect();
|
||||
@@ -1281,10 +1282,10 @@ struct FunctionArgs {
|
||||
#[derive(Debug, Clone)]
|
||||
enum ScalarFunc {
|
||||
DataFusionBuiltin(BuiltinScalarFunction),
|
||||
Udf(ScalarUDF),
|
||||
Udf(ScalarUdfDef),
|
||||
// todo(ruihang): maybe merge with Udf later
|
||||
/// UDF that require extra information like range length to be evaluated.
|
||||
ExtrapolateUdf(ScalarUDF),
|
||||
ExtrapolateUdf(ScalarUdfDef),
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
@@ -1668,12 +1669,12 @@ mod test {
|
||||
|
||||
#[tokio::test]
|
||||
async fn aggregate_stddev() {
|
||||
do_aggregate_expr_plan("stddev", "STDDEVPOP").await;
|
||||
do_aggregate_expr_plan("stddev", "STDDEV_POP").await;
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn aggregate_stdvar() {
|
||||
do_aggregate_expr_plan("stdvar", "VARIANCEPOP").await;
|
||||
do_aggregate_expr_plan("stdvar", "VARIANCE_POP").await;
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
|
||||
@@ -219,11 +219,6 @@ impl Array for RangeArray {
|
||||
self
|
||||
}
|
||||
|
||||
#[allow(deprecated)]
|
||||
fn data(&self) -> &ArrayData {
|
||||
self.array.data()
|
||||
}
|
||||
|
||||
fn into_data(self) -> ArrayData {
|
||||
self.array.into_data()
|
||||
}
|
||||
@@ -239,6 +234,30 @@ impl Array for RangeArray {
|
||||
fn nulls(&self) -> Option<&NullBuffer> {
|
||||
self.array.nulls()
|
||||
}
|
||||
|
||||
fn data_type(&self) -> &DataType {
|
||||
self.array.data_type()
|
||||
}
|
||||
|
||||
fn len(&self) -> usize {
|
||||
self.len()
|
||||
}
|
||||
|
||||
fn is_empty(&self) -> bool {
|
||||
self.is_empty()
|
||||
}
|
||||
|
||||
fn offset(&self) -> usize {
|
||||
self.array.offset()
|
||||
}
|
||||
|
||||
fn get_buffer_memory_size(&self) -> usize {
|
||||
self.array.get_buffer_memory_size()
|
||||
}
|
||||
|
||||
fn get_array_memory_size(&self) -> usize {
|
||||
self.array.get_array_memory_size()
|
||||
}
|
||||
}
|
||||
|
||||
impl std::fmt::Debug for RangeArray {
|
||||
|
||||
@@ -59,19 +59,13 @@ impl Categorizer {
|
||||
LogicalPlan::Distinct(_) => Commutativity::PartialCommutative,
|
||||
LogicalPlan::Unnest(_) => Commutativity::Commutative,
|
||||
LogicalPlan::Statement(_) => Commutativity::Unsupported,
|
||||
LogicalPlan::CreateExternalTable(_) => Commutativity::Unsupported,
|
||||
LogicalPlan::CreateMemoryTable(_) => Commutativity::Unsupported,
|
||||
LogicalPlan::CreateView(_) => Commutativity::Unsupported,
|
||||
LogicalPlan::CreateCatalogSchema(_) => Commutativity::Unsupported,
|
||||
LogicalPlan::CreateCatalog(_) => Commutativity::Unsupported,
|
||||
LogicalPlan::DropTable(_) => Commutativity::Unsupported,
|
||||
LogicalPlan::DropView(_) => Commutativity::Unsupported,
|
||||
LogicalPlan::Values(_) => Commutativity::Unsupported,
|
||||
LogicalPlan::Explain(_) => Commutativity::Unsupported,
|
||||
LogicalPlan::Analyze(_) => Commutativity::Unsupported,
|
||||
LogicalPlan::Prepare(_) => Commutativity::Unsupported,
|
||||
LogicalPlan::DescribeTable(_) => Commutativity::Unsupported,
|
||||
LogicalPlan::Dml(_) => Commutativity::Unsupported,
|
||||
LogicalPlan::Ddl(_) => Commutativity::Unsupported,
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -18,6 +18,7 @@ use common_time::timestamp::{TimeUnit, Timestamp};
|
||||
use datafusion::config::ConfigOptions;
|
||||
use datafusion_common::tree_node::{Transformed, TreeNode, TreeNodeRewriter};
|
||||
use datafusion_common::{DFSchemaRef, DataFusionError, Result, ScalarValue};
|
||||
use datafusion_expr::expr::InList;
|
||||
use datafusion_expr::{
|
||||
Between, BinaryExpr, Expr, ExprSchemable, Filter, LogicalPlan, Operator, TableScan,
|
||||
};
|
||||
@@ -76,7 +77,6 @@ impl AnalyzerRule for TypeConversionRule {
|
||||
| LogicalPlan::Window { .. }
|
||||
| LogicalPlan::Aggregate { .. }
|
||||
| LogicalPlan::Repartition { .. }
|
||||
| LogicalPlan::CreateExternalTable { .. }
|
||||
| LogicalPlan::Extension { .. }
|
||||
| LogicalPlan::Sort { .. }
|
||||
| LogicalPlan::Explain { .. }
|
||||
@@ -84,9 +84,6 @@ impl AnalyzerRule for TypeConversionRule {
|
||||
| LogicalPlan::Union { .. }
|
||||
| LogicalPlan::Join { .. }
|
||||
| LogicalPlan::CrossJoin { .. }
|
||||
| LogicalPlan::CreateMemoryTable { .. }
|
||||
| LogicalPlan::DropTable { .. }
|
||||
| LogicalPlan::DropView { .. }
|
||||
| LogicalPlan::Distinct { .. }
|
||||
| LogicalPlan::Values { .. }
|
||||
| LogicalPlan::Analyze { .. } => {
|
||||
@@ -105,15 +102,13 @@ impl AnalyzerRule for TypeConversionRule {
|
||||
|
||||
LogicalPlan::Subquery { .. }
|
||||
| LogicalPlan::SubqueryAlias { .. }
|
||||
| LogicalPlan::CreateView { .. }
|
||||
| LogicalPlan::CreateCatalogSchema { .. }
|
||||
| LogicalPlan::CreateCatalog { .. }
|
||||
| LogicalPlan::EmptyRelation(_)
|
||||
| LogicalPlan::Prepare(_)
|
||||
| LogicalPlan::Dml(_)
|
||||
| LogicalPlan::DescribeTable(_)
|
||||
| LogicalPlan::Unnest(_)
|
||||
| LogicalPlan::Statement(_) => Ok(Transformed::No(plan)),
|
||||
| LogicalPlan::Statement(_)
|
||||
| LogicalPlan::Ddl(_) => Ok(Transformed::No(plan)),
|
||||
})
|
||||
}
|
||||
|
||||
@@ -229,21 +224,21 @@ impl TreeNodeRewriter for TypeConverter {
|
||||
high: Box::new(high),
|
||||
})
|
||||
}
|
||||
Expr::InList {
|
||||
Expr::InList(InList {
|
||||
expr,
|
||||
list,
|
||||
negated,
|
||||
} => {
|
||||
}) => {
|
||||
let mut list_expr = Vec::with_capacity(list.len());
|
||||
for e in list {
|
||||
let (_, expr_conversion) = self.convert_type(&expr, &e)?;
|
||||
list_expr.push(expr_conversion);
|
||||
}
|
||||
Expr::InList {
|
||||
Expr::InList(InList {
|
||||
expr,
|
||||
list: list_expr,
|
||||
negated,
|
||||
}
|
||||
})
|
||||
}
|
||||
Expr::Literal(value) => match value {
|
||||
ScalarValue::TimestampSecond(Some(i), _) => {
|
||||
|
||||
@@ -255,7 +255,7 @@ mod test {
|
||||
let expected = String::from("Sql(Query(Query { \
|
||||
inner: Query { \
|
||||
with: None, body: Select(Select { \
|
||||
distinct: false, \
|
||||
distinct: None, \
|
||||
top: None, \
|
||||
projection: \
|
||||
[Wildcard(WildcardAdditionalOptions { opt_exclude: None, opt_except: None, opt_rename: None, opt_replace: None })], \
|
||||
@@ -274,6 +274,7 @@ mod test {
|
||||
distribute_by: [], \
|
||||
sort_by: [], \
|
||||
having: None, \
|
||||
named_window: [], \
|
||||
qualify: None \
|
||||
}), order_by: [], limit: None, offset: None, fetch: None, locks: [] }, param_types: [] }))");
|
||||
|
||||
|
||||
@@ -539,7 +539,7 @@ mod tests {
|
||||
assert_eq!(1, stmts.len());
|
||||
|
||||
let select = sqlparser::ast::Select {
|
||||
distinct: false,
|
||||
distinct: None,
|
||||
top: None,
|
||||
projection: vec![sqlparser::ast::SelectItem::Wildcard(
|
||||
WildcardAdditionalOptions::default(),
|
||||
@@ -562,6 +562,7 @@ mod tests {
|
||||
sort_by: vec![],
|
||||
having: None,
|
||||
qualify: None,
|
||||
named_window: vec![],
|
||||
};
|
||||
|
||||
let sp_statement = SpStatement::Query(Box::new(SpQuery {
|
||||
|
||||
@@ -19,6 +19,7 @@ use common_time::Timestamp;
|
||||
use datafusion::parquet::file::metadata::RowGroupMetaData;
|
||||
use datafusion::physical_optimizer::pruning::PruningPredicate;
|
||||
use datafusion_common::ToDFSchema;
|
||||
use datafusion_expr::expr::InList;
|
||||
use datafusion_expr::{Between, BinaryExpr, Operator};
|
||||
use datafusion_physical_expr::create_physical_expr;
|
||||
use datafusion_physical_expr::execution_props::ExecutionProps;
|
||||
@@ -130,11 +131,11 @@ impl<'a> TimeRangePredicateBuilder<'a> {
|
||||
low,
|
||||
high,
|
||||
}) => self.extract_from_between_expr(expr, negated, low, high),
|
||||
DfExpr::InList {
|
||||
DfExpr::InList(InList {
|
||||
expr,
|
||||
list,
|
||||
negated,
|
||||
} => self.extract_from_in_list_expr(expr, *negated, list),
|
||||
}) => self.extract_from_in_list_expr(expr, *negated, list),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -35,6 +35,7 @@ mod tests {
|
||||
test_exec(instance).await;
|
||||
}
|
||||
|
||||
#[ignore = "https://github.com/GreptimeTeam/greptimedb/issues/1681"]
|
||||
#[tokio::test(flavor = "multi_thread")]
|
||||
async fn test_distributed_exec() {
|
||||
let distributed = tests::create_distributed_instance("test_distributed_exec").await;
|
||||
|
||||
@@ -869,6 +869,7 @@ async fn test_create_table_after_rename_table(instance: Arc<dyn MockInstance>) {
|
||||
check_output_stream(output, expect).await;
|
||||
}
|
||||
|
||||
#[ignore = "https://github.com/GreptimeTeam/greptimedb/issues/1681"]
|
||||
#[apply(both_instances_cases)]
|
||||
async fn test_alter_table(instance: Arc<dyn MockInstance>) {
|
||||
let instance = instance.frontend();
|
||||
|
||||
@@ -21,7 +21,7 @@ Error: 3000(PlanQuery), Error during planning: For SELECT DISTINCT, ORDER BY exp
|
||||
|
||||
SELECT DISTINCT ON (1) i % 2, i FROM integers WHERE i<3 ORDER BY i;
|
||||
|
||||
Error: 1001(Unsupported), SQL statement is not supported: SELECT DISTINCT ON (1) i % 2, i FROM integers WHERE i<3 ORDER BY i;, keyword: %
|
||||
Error: 3000(PlanQuery), This feature is not implemented: DISTINCT ON Exprs not supported
|
||||
|
||||
SELECT DISTINCT integers.i FROM integers ORDER BY i DESC;
|
||||
|
||||
|
||||
@@ -149,7 +149,7 @@ SELECT * FROM integers i1 WHERE NOT EXISTS(SELECT i FROM integers WHERE i=i1.i)
|
||||
|
||||
SELECT i1.i,i2.i FROM integers i1, integers i2 WHERE i1.i=(SELECT i FROM integers WHERE i1.i=i) AND i1.i=i2.i ORDER BY i1.i;
|
||||
|
||||
Error: 3001(EngineExecuteQuery), This feature is not implemented: Physical plan does not support logical expression (<subquery>)
|
||||
Error: 3001(EngineExecuteQuery), Error during planning: Correlated scalar subquery must be aggregated to return at most one row
|
||||
|
||||
SELECT * FROM (SELECT i1.i AS a, i2.i AS b FROM integers i1, integers i2) a1 WHERE a=b ORDER BY 1;
|
||||
|
||||
@@ -194,8 +194,7 @@ SELECT * FROM (SELECT 0=1 AS cond FROM integers i1, integers i2) a1 WHERE cond O
|
||||
|
||||
SELECT * FROM (SELECT 0=1 AS cond FROM integers i1, integers i2 GROUP BY 1) a1 WHERE cond ORDER BY 1;
|
||||
|
||||
++
|
||||
++
|
||||
Error: 3001(EngineExecuteQuery), Error during planning: Attempted to create Filter predicate with expression `Boolean(false)` aliased as 'Int64(0) = Int64(1)'. Filter predicates should not be aliased.
|
||||
|
||||
DROP TABLE integers;
|
||||
|
||||
|
||||
@@ -337,14 +337,14 @@ Affected Rows: 4
|
||||
|
||||
select i, split_part(s, 'b', 1) from test8 order by i;
|
||||
|
||||
+---+---------------------------------------+
|
||||
| i | splitpart(test8.s,Utf8("b"),Int64(1)) |
|
||||
+---+---------------------------------------+
|
||||
| 1 | cc |
|
||||
| 2 | |
|
||||
| 3 | a |
|
||||
| | d |
|
||||
+---+---------------------------------------+
|
||||
+---+----------------------------------------+
|
||||
| i | split_part(test8.s,Utf8("b"),Int64(1)) |
|
||||
+---+----------------------------------------+
|
||||
| 1 | cc |
|
||||
| 2 | |
|
||||
| 3 | a |
|
||||
| | d |
|
||||
+---+----------------------------------------+
|
||||
|
||||
CREATE TABLE DirectReports
|
||||
(
|
||||
|
||||
Reference in New Issue
Block a user