mirror of
https://github.com/GreptimeTeam/greptimedb.git
synced 2026-05-26 09:50:40 +00:00
feat: Substrait logical plan (#704)
* feat: use Substrait logical plan to query data from Datanode in Frontend in distributed mode * fix: resolve PR comments * fix: resolve PR comments * fix: resolve PR comments Co-authored-by: luofucong <luofucong@greptime.com>
This commit is contained in:
@@ -14,7 +14,6 @@ regex = "1.6"
|
||||
serde = "1.0"
|
||||
serde_json = "1.0"
|
||||
snafu = { version = "0.7", features = ["backtraces"] }
|
||||
table = { path = "../../table" }
|
||||
|
||||
[dev-dependencies]
|
||||
chrono = "0.4"
|
||||
|
||||
@@ -25,9 +25,3 @@ pub const MIN_USER_TABLE_ID: u32 = 1024;
|
||||
pub const SYSTEM_CATALOG_TABLE_ID: u32 = 0;
|
||||
/// scripts table id
|
||||
pub const SCRIPTS_TABLE_ID: u32 = 1;
|
||||
|
||||
pub(crate) const CATALOG_KEY_PREFIX: &str = "__c";
|
||||
pub(crate) const SCHEMA_KEY_PREFIX: &str = "__s";
|
||||
pub(crate) const TABLE_GLOBAL_KEY_PREFIX: &str = "__tg";
|
||||
pub(crate) const TABLE_REGIONAL_KEY_PREFIX: &str = "__tr";
|
||||
pub const TABLE_ID_KEY_PREFIX: &str = "__tid";
|
||||
|
||||
@@ -1,383 +0,0 @@
|
||||
// Copyright 2022 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::collections::HashMap;
|
||||
use std::fmt::{Display, Formatter};
|
||||
|
||||
use lazy_static::lazy_static;
|
||||
use regex::Regex;
|
||||
use serde::{Deserialize, Serialize, Serializer};
|
||||
use snafu::{ensure, OptionExt, ResultExt};
|
||||
use table::metadata::{RawTableInfo, TableId, TableVersion};
|
||||
|
||||
use crate::consts::{
|
||||
CATALOG_KEY_PREFIX, SCHEMA_KEY_PREFIX, TABLE_GLOBAL_KEY_PREFIX, TABLE_REGIONAL_KEY_PREFIX,
|
||||
};
|
||||
use crate::error::{
|
||||
DeserializeCatalogEntryValueSnafu, Error, InvalidCatalogSnafu, SerializeCatalogEntryValueSnafu,
|
||||
};
|
||||
|
||||
const ALPHANUMERICS_NAME_PATTERN: &str = "[a-zA-Z_][a-zA-Z0-9_]*";
|
||||
|
||||
lazy_static! {
|
||||
static ref CATALOG_KEY_PATTERN: Regex = Regex::new(&format!(
|
||||
"^{}-({})$",
|
||||
CATALOG_KEY_PREFIX, ALPHANUMERICS_NAME_PATTERN
|
||||
))
|
||||
.unwrap();
|
||||
}
|
||||
|
||||
lazy_static! {
|
||||
static ref SCHEMA_KEY_PATTERN: Regex = Regex::new(&format!(
|
||||
"^{}-({})-({})$",
|
||||
SCHEMA_KEY_PREFIX, ALPHANUMERICS_NAME_PATTERN, ALPHANUMERICS_NAME_PATTERN
|
||||
))
|
||||
.unwrap();
|
||||
}
|
||||
|
||||
lazy_static! {
|
||||
static ref TABLE_GLOBAL_KEY_PATTERN: Regex = Regex::new(&format!(
|
||||
"^{}-({})-({})-({})$",
|
||||
TABLE_GLOBAL_KEY_PREFIX,
|
||||
ALPHANUMERICS_NAME_PATTERN,
|
||||
ALPHANUMERICS_NAME_PATTERN,
|
||||
ALPHANUMERICS_NAME_PATTERN
|
||||
))
|
||||
.unwrap();
|
||||
}
|
||||
|
||||
lazy_static! {
|
||||
static ref TABLE_REGIONAL_KEY_PATTERN: Regex = Regex::new(&format!(
|
||||
"^{}-({})-({})-({})-([0-9]+)$",
|
||||
TABLE_REGIONAL_KEY_PREFIX,
|
||||
ALPHANUMERICS_NAME_PATTERN,
|
||||
ALPHANUMERICS_NAME_PATTERN,
|
||||
ALPHANUMERICS_NAME_PATTERN
|
||||
))
|
||||
.unwrap();
|
||||
}
|
||||
|
||||
pub fn build_catalog_prefix() -> String {
|
||||
format!("{}-", CATALOG_KEY_PREFIX)
|
||||
}
|
||||
|
||||
pub fn build_schema_prefix(catalog_name: impl AsRef<str>) -> String {
|
||||
format!("{}-{}-", SCHEMA_KEY_PREFIX, catalog_name.as_ref())
|
||||
}
|
||||
|
||||
pub fn build_table_global_prefix(
|
||||
catalog_name: impl AsRef<str>,
|
||||
schema_name: impl AsRef<str>,
|
||||
) -> String {
|
||||
format!(
|
||||
"{}-{}-{}-",
|
||||
TABLE_GLOBAL_KEY_PREFIX,
|
||||
catalog_name.as_ref(),
|
||||
schema_name.as_ref()
|
||||
)
|
||||
}
|
||||
|
||||
pub fn build_table_regional_prefix(
|
||||
catalog_name: impl AsRef<str>,
|
||||
schema_name: impl AsRef<str>,
|
||||
) -> String {
|
||||
format!(
|
||||
"{}-{}-{}-",
|
||||
TABLE_REGIONAL_KEY_PREFIX,
|
||||
catalog_name.as_ref(),
|
||||
schema_name.as_ref()
|
||||
)
|
||||
}
|
||||
|
||||
/// Table global info has only one key across all datanodes so it does not have `node_id` field.
|
||||
pub struct TableGlobalKey {
|
||||
pub catalog_name: String,
|
||||
pub schema_name: String,
|
||||
pub table_name: String,
|
||||
}
|
||||
|
||||
impl Display for TableGlobalKey {
|
||||
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
|
||||
f.write_str(TABLE_GLOBAL_KEY_PREFIX)?;
|
||||
f.write_str("-")?;
|
||||
f.write_str(&self.catalog_name)?;
|
||||
f.write_str("-")?;
|
||||
f.write_str(&self.schema_name)?;
|
||||
f.write_str("-")?;
|
||||
f.write_str(&self.table_name)
|
||||
}
|
||||
}
|
||||
|
||||
impl TableGlobalKey {
|
||||
pub fn parse<S: AsRef<str>>(s: S) -> Result<Self, Error> {
|
||||
let key = s.as_ref();
|
||||
let captures = TABLE_GLOBAL_KEY_PATTERN
|
||||
.captures(key)
|
||||
.context(InvalidCatalogSnafu { key })?;
|
||||
ensure!(captures.len() == 4, InvalidCatalogSnafu { key });
|
||||
|
||||
Ok(Self {
|
||||
catalog_name: captures[1].to_string(),
|
||||
schema_name: captures[2].to_string(),
|
||||
table_name: captures[3].to_string(),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
/// Table global info contains necessary info for a datanode to create table regions, including
|
||||
/// table id, table meta(schema...), region id allocation across datanodes.
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
|
||||
pub struct TableGlobalValue {
|
||||
/// Id of datanode that created the global table info kv. only for debugging.
|
||||
pub node_id: u64,
|
||||
// TODO(LFC): Maybe remove it?
|
||||
/// Allocation of region ids across all datanodes.
|
||||
pub regions_id_map: HashMap<u64, Vec<u32>>,
|
||||
pub table_info: RawTableInfo,
|
||||
}
|
||||
|
||||
impl TableGlobalValue {
|
||||
pub fn table_id(&self) -> TableId {
|
||||
self.table_info.ident.table_id
|
||||
}
|
||||
}
|
||||
|
||||
/// Table regional info that varies between datanode, so it contains a `node_id` field.
|
||||
pub struct TableRegionalKey {
|
||||
pub catalog_name: String,
|
||||
pub schema_name: String,
|
||||
pub table_name: String,
|
||||
pub node_id: u64,
|
||||
}
|
||||
|
||||
impl Display for TableRegionalKey {
|
||||
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
|
||||
f.write_str(TABLE_REGIONAL_KEY_PREFIX)?;
|
||||
f.write_str("-")?;
|
||||
f.write_str(&self.catalog_name)?;
|
||||
f.write_str("-")?;
|
||||
f.write_str(&self.schema_name)?;
|
||||
f.write_str("-")?;
|
||||
f.write_str(&self.table_name)?;
|
||||
f.write_str("-")?;
|
||||
f.serialize_u64(self.node_id)
|
||||
}
|
||||
}
|
||||
|
||||
impl TableRegionalKey {
|
||||
pub fn parse<S: AsRef<str>>(s: S) -> Result<Self, Error> {
|
||||
let key = s.as_ref();
|
||||
let captures = TABLE_REGIONAL_KEY_PATTERN
|
||||
.captures(key)
|
||||
.context(InvalidCatalogSnafu { key })?;
|
||||
ensure!(captures.len() == 5, InvalidCatalogSnafu { key });
|
||||
let node_id = captures[4]
|
||||
.to_string()
|
||||
.parse()
|
||||
.map_err(|_| InvalidCatalogSnafu { key }.build())?;
|
||||
Ok(Self {
|
||||
catalog_name: captures[1].to_string(),
|
||||
schema_name: captures[2].to_string(),
|
||||
table_name: captures[3].to_string(),
|
||||
node_id,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
/// Regional table info of specific datanode, including table version on that datanode and
|
||||
/// region ids allocated by metasrv.
|
||||
#[derive(Debug, Serialize, Deserialize, Clone)]
|
||||
pub struct TableRegionalValue {
|
||||
pub version: TableVersion,
|
||||
pub regions_ids: Vec<u32>,
|
||||
}
|
||||
|
||||
pub struct CatalogKey {
|
||||
pub catalog_name: String,
|
||||
}
|
||||
|
||||
impl Display for CatalogKey {
|
||||
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
|
||||
f.write_str(CATALOG_KEY_PREFIX)?;
|
||||
f.write_str("-")?;
|
||||
f.write_str(&self.catalog_name)
|
||||
}
|
||||
}
|
||||
|
||||
impl CatalogKey {
|
||||
pub fn parse(s: impl AsRef<str>) -> Result<Self, Error> {
|
||||
let key = s.as_ref();
|
||||
let captures = CATALOG_KEY_PATTERN
|
||||
.captures(key)
|
||||
.context(InvalidCatalogSnafu { key })?;
|
||||
ensure!(captures.len() == 2, InvalidCatalogSnafu { key });
|
||||
Ok(Self {
|
||||
catalog_name: captures[1].to_string(),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
pub struct CatalogValue;
|
||||
|
||||
pub struct SchemaKey {
|
||||
pub catalog_name: String,
|
||||
pub schema_name: String,
|
||||
}
|
||||
|
||||
impl Display for SchemaKey {
|
||||
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
|
||||
f.write_str(SCHEMA_KEY_PREFIX)?;
|
||||
f.write_str("-")?;
|
||||
f.write_str(&self.catalog_name)?;
|
||||
f.write_str("-")?;
|
||||
f.write_str(&self.schema_name)
|
||||
}
|
||||
}
|
||||
|
||||
impl SchemaKey {
|
||||
pub fn parse(s: impl AsRef<str>) -> Result<Self, Error> {
|
||||
let key = s.as_ref();
|
||||
let captures = SCHEMA_KEY_PATTERN
|
||||
.captures(key)
|
||||
.context(InvalidCatalogSnafu { key })?;
|
||||
ensure!(captures.len() == 3, InvalidCatalogSnafu { key });
|
||||
Ok(Self {
|
||||
catalog_name: captures[1].to_string(),
|
||||
schema_name: captures[2].to_string(),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
pub struct SchemaValue;
|
||||
|
||||
macro_rules! define_catalog_value {
|
||||
( $($val_ty: ty), *) => {
|
||||
$(
|
||||
impl $val_ty {
|
||||
pub fn parse(s: impl AsRef<str>) -> Result<Self, Error> {
|
||||
serde_json::from_str(s.as_ref())
|
||||
.context(DeserializeCatalogEntryValueSnafu { raw: s.as_ref() })
|
||||
}
|
||||
|
||||
pub fn from_bytes(bytes: impl AsRef<[u8]>) -> Result<Self, Error> {
|
||||
Self::parse(&String::from_utf8_lossy(bytes.as_ref()))
|
||||
}
|
||||
|
||||
pub fn as_bytes(&self) -> Result<Vec<u8>, Error> {
|
||||
Ok(serde_json::to_string(self)
|
||||
.context(SerializeCatalogEntryValueSnafu)?
|
||||
.into_bytes())
|
||||
}
|
||||
}
|
||||
)*
|
||||
}
|
||||
}
|
||||
|
||||
define_catalog_value!(
|
||||
TableRegionalValue,
|
||||
TableGlobalValue,
|
||||
CatalogValue,
|
||||
SchemaValue
|
||||
);
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use datatypes::prelude::ConcreteDataType;
|
||||
use datatypes::schema::{ColumnSchema, RawSchema, Schema};
|
||||
use table::metadata::{RawTableMeta, TableIdent, TableType};
|
||||
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_parse_catalog_key() {
|
||||
let key = "__c-C";
|
||||
let catalog_key = CatalogKey::parse(key).unwrap();
|
||||
assert_eq!("C", catalog_key.catalog_name);
|
||||
assert_eq!(key, catalog_key.to_string());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_parse_schema_key() {
|
||||
let key = "__s-C-S";
|
||||
let schema_key = SchemaKey::parse(key).unwrap();
|
||||
assert_eq!("C", schema_key.catalog_name);
|
||||
assert_eq!("S", schema_key.schema_name);
|
||||
assert_eq!(key, schema_key.to_string());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_parse_table_key() {
|
||||
let key = "__tg-C-S-T";
|
||||
let entry = TableGlobalKey::parse(key).unwrap();
|
||||
assert_eq!("C", entry.catalog_name);
|
||||
assert_eq!("S", entry.schema_name);
|
||||
assert_eq!("T", entry.table_name);
|
||||
assert_eq!(key, &entry.to_string());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_build_prefix() {
|
||||
assert_eq!("__c-", build_catalog_prefix());
|
||||
assert_eq!("__s-CATALOG-", build_schema_prefix("CATALOG"));
|
||||
assert_eq!(
|
||||
"__tg-CATALOG-SCHEMA-",
|
||||
build_table_global_prefix("CATALOG", "SCHEMA")
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_serialize_schema() {
|
||||
let schema = Schema::new(vec![ColumnSchema::new(
|
||||
"name",
|
||||
ConcreteDataType::string_datatype(),
|
||||
true,
|
||||
)]);
|
||||
|
||||
let meta = RawTableMeta {
|
||||
schema: RawSchema::from(&schema),
|
||||
engine: "mito".to_string(),
|
||||
created_on: chrono::DateTime::default(),
|
||||
primary_key_indices: vec![0, 1],
|
||||
next_column_id: 3,
|
||||
engine_options: Default::default(),
|
||||
value_indices: vec![2, 3],
|
||||
options: Default::default(),
|
||||
region_numbers: vec![1],
|
||||
};
|
||||
|
||||
let table_info = RawTableInfo {
|
||||
ident: TableIdent {
|
||||
table_id: 42,
|
||||
version: 1,
|
||||
},
|
||||
name: "table_1".to_string(),
|
||||
desc: Some("blah".to_string()),
|
||||
catalog_name: "catalog_1".to_string(),
|
||||
schema_name: "schema_1".to_string(),
|
||||
meta,
|
||||
table_type: TableType::Base,
|
||||
};
|
||||
|
||||
let value = TableGlobalValue {
|
||||
node_id: 0,
|
||||
regions_id_map: HashMap::from([(0, vec![1, 2, 3])]),
|
||||
table_info,
|
||||
};
|
||||
let serialized = serde_json::to_string(&value).unwrap();
|
||||
let deserialized = TableGlobalValue::parse(&serialized).unwrap();
|
||||
assert_eq!(value, deserialized);
|
||||
}
|
||||
}
|
||||
@@ -14,10 +14,3 @@
|
||||
|
||||
pub mod consts;
|
||||
pub mod error;
|
||||
mod helper;
|
||||
|
||||
pub use helper::{
|
||||
build_catalog_prefix, build_schema_prefix, build_table_global_prefix,
|
||||
build_table_regional_prefix, CatalogKey, CatalogValue, SchemaKey, SchemaValue, TableGlobalKey,
|
||||
TableGlobalValue, TableRegionalKey, TableRegionalValue,
|
||||
};
|
||||
|
||||
@@ -14,6 +14,7 @@
|
||||
|
||||
use std::collections::HashMap;
|
||||
|
||||
use datafusion::logical_plan::DFSchemaRef;
|
||||
use substrait_proto::protobuf::extensions::simple_extension_declaration::{
|
||||
ExtensionFunction, MappingType,
|
||||
};
|
||||
@@ -23,6 +24,7 @@ use substrait_proto::protobuf::extensions::SimpleExtensionDeclaration;
|
||||
pub struct ConvertorContext {
|
||||
scalar_fn_names: HashMap<String, u32>,
|
||||
scalar_fn_map: HashMap<u32, String>,
|
||||
df_schema: Option<DFSchemaRef>,
|
||||
}
|
||||
|
||||
impl ConvertorContext {
|
||||
@@ -63,4 +65,13 @@ impl ConvertorContext {
|
||||
}
|
||||
result
|
||||
}
|
||||
|
||||
pub(crate) fn set_df_schema(&mut self, schema: DFSchemaRef) {
|
||||
debug_assert!(self.df_schema.is_none());
|
||||
self.df_schema.get_or_insert(schema);
|
||||
}
|
||||
|
||||
pub(crate) fn df_schema(&self) -> Option<&DFSchemaRef> {
|
||||
self.df_schema.as_ref()
|
||||
}
|
||||
}
|
||||
|
||||
@@ -16,7 +16,7 @@ use std::collections::VecDeque;
|
||||
use std::str::FromStr;
|
||||
|
||||
use datafusion::logical_plan::{Column, Expr};
|
||||
use datafusion_expr::{expr_fn, BuiltinScalarFunction, Operator};
|
||||
use datafusion_expr::{expr_fn, lit, BuiltinScalarFunction, Operator};
|
||||
use datatypes::schema::Schema;
|
||||
use snafu::{ensure, OptionExt};
|
||||
use substrait_proto::protobuf::expression::field_reference::ReferenceType as FieldReferenceType;
|
||||
@@ -24,7 +24,7 @@ use substrait_proto::protobuf::expression::reference_segment::{
|
||||
ReferenceType as SegReferenceType, StructField,
|
||||
};
|
||||
use substrait_proto::protobuf::expression::{
|
||||
FieldReference, ReferenceSegment, RexType, ScalarFunction,
|
||||
FieldReference, Literal, ReferenceSegment, RexType, ScalarFunction,
|
||||
};
|
||||
use substrait_proto::protobuf::function_argument::ArgType;
|
||||
use substrait_proto::protobuf::Expression;
|
||||
@@ -33,15 +33,24 @@ use crate::context::ConvertorContext;
|
||||
use crate::error::{
|
||||
EmptyExprSnafu, InvalidParametersSnafu, MissingFieldSnafu, Result, UnsupportedExprSnafu,
|
||||
};
|
||||
use crate::types::{literal_type_to_scalar_value, scalar_value_as_literal_type};
|
||||
|
||||
/// Convert substrait's `Expression` to DataFusion's `Expr`.
|
||||
pub fn to_df_expr(ctx: &ConvertorContext, expression: Expression, schema: &Schema) -> Result<Expr> {
|
||||
pub(crate) fn to_df_expr(
|
||||
ctx: &ConvertorContext,
|
||||
expression: Expression,
|
||||
schema: &Schema,
|
||||
) -> Result<Expr> {
|
||||
let expr_rex_type = expression.rex_type.context(EmptyExprSnafu)?;
|
||||
match expr_rex_type {
|
||||
RexType::Literal(_) => UnsupportedExprSnafu {
|
||||
name: "substrait Literal expression",
|
||||
RexType::Literal(l) => {
|
||||
let t = l.literal_type.context(MissingFieldSnafu {
|
||||
field: "LiteralType",
|
||||
plan: "Literal",
|
||||
})?;
|
||||
let v = literal_type_to_scalar_value(t)?;
|
||||
Ok(lit(v))
|
||||
}
|
||||
.fail()?,
|
||||
RexType::Selection(selection) => convert_selection_rex(*selection, schema),
|
||||
RexType::ScalarFunction(scalar_fn) => convert_scalar_function(ctx, scalar_fn, schema),
|
||||
RexType::WindowFunction(_)
|
||||
@@ -453,10 +462,21 @@ pub fn expression_from_df_expr(
|
||||
}
|
||||
}
|
||||
// Don't merge them with other unsupported expr arms to preserve the ordering.
|
||||
Expr::ScalarVariable(..) | Expr::Literal(..) => UnsupportedExprSnafu {
|
||||
Expr::ScalarVariable(..) => UnsupportedExprSnafu {
|
||||
name: expr.to_string(),
|
||||
}
|
||||
.fail()?,
|
||||
Expr::Literal(v) => {
|
||||
let t = scalar_value_as_literal_type(v)?;
|
||||
let l = Literal {
|
||||
nullable: true,
|
||||
type_variation_reference: 0,
|
||||
literal_type: Some(t),
|
||||
};
|
||||
Expression {
|
||||
rex_type: Some(RexType::Literal(l)),
|
||||
}
|
||||
}
|
||||
Expr::BinaryExpr { left, op, right } => {
|
||||
let left = expression_from_df_expr(ctx, left, schema)?;
|
||||
let right = expression_from_df_expr(ctx, right, schema)?;
|
||||
|
||||
@@ -18,7 +18,9 @@ use bytes::{Buf, Bytes, BytesMut};
|
||||
use catalog::CatalogManagerRef;
|
||||
use common_error::prelude::BoxedError;
|
||||
use common_telemetry::debug;
|
||||
use datafusion::arrow::datatypes::SchemaRef as ArrowSchemaRef;
|
||||
use datafusion::datasource::TableProvider;
|
||||
use datafusion::logical_plan::plan::Filter;
|
||||
use datafusion::logical_plan::{LogicalPlan, TableScan, ToDFSchema};
|
||||
use datafusion::physical_plan::project_schema;
|
||||
use prost::Message;
|
||||
@@ -29,31 +31,33 @@ use substrait_proto::protobuf::extensions::simple_extension_declaration::Mapping
|
||||
use substrait_proto::protobuf::plan_rel::RelType as PlanRelType;
|
||||
use substrait_proto::protobuf::read_rel::{NamedTable, ReadType};
|
||||
use substrait_proto::protobuf::rel::RelType;
|
||||
use substrait_proto::protobuf::{Plan, PlanRel, ReadRel, Rel};
|
||||
use substrait_proto::protobuf::{FilterRel, Plan, PlanRel, ReadRel, Rel};
|
||||
use table::table::adapter::DfTableProviderAdapter;
|
||||
|
||||
use crate::context::ConvertorContext;
|
||||
use crate::df_expr::{expression_from_df_expr, to_df_expr};
|
||||
use crate::error::{
|
||||
DFInternalSnafu, DecodeRelSnafu, EmptyPlanSnafu, EncodeRelSnafu, Error, InternalSnafu,
|
||||
self, DFInternalSnafu, DecodeRelSnafu, EmptyPlanSnafu, EncodeRelSnafu, Error, InternalSnafu,
|
||||
InvalidParametersSnafu, MissingFieldSnafu, SchemaNotMatchSnafu, TableNotFoundSnafu,
|
||||
UnknownPlanSnafu, UnsupportedExprSnafu, UnsupportedPlanSnafu,
|
||||
};
|
||||
use crate::schema::{from_schema, to_schema};
|
||||
use crate::SubstraitPlan;
|
||||
|
||||
pub struct DFLogicalSubstraitConvertor {
|
||||
catalog_manager: CatalogManagerRef,
|
||||
}
|
||||
pub struct DFLogicalSubstraitConvertor;
|
||||
|
||||
impl SubstraitPlan for DFLogicalSubstraitConvertor {
|
||||
type Error = Error;
|
||||
|
||||
type Plan = LogicalPlan;
|
||||
|
||||
fn decode<B: Buf + Send>(&self, message: B) -> Result<Self::Plan, Self::Error> {
|
||||
fn decode<B: Buf + Send>(
|
||||
&self,
|
||||
message: B,
|
||||
catalog_manager: CatalogManagerRef,
|
||||
) -> Result<Self::Plan, Self::Error> {
|
||||
let plan = Plan::decode(message).context(DecodeRelSnafu)?;
|
||||
self.convert_plan(plan)
|
||||
self.convert_plan(plan, catalog_manager)
|
||||
}
|
||||
|
||||
fn encode(&self, plan: Self::Plan) -> Result<Bytes, Self::Error> {
|
||||
@@ -67,13 +71,11 @@ impl SubstraitPlan for DFLogicalSubstraitConvertor {
|
||||
}
|
||||
|
||||
impl DFLogicalSubstraitConvertor {
|
||||
pub fn new(catalog_manager: CatalogManagerRef) -> Self {
|
||||
Self { catalog_manager }
|
||||
}
|
||||
}
|
||||
|
||||
impl DFLogicalSubstraitConvertor {
|
||||
pub fn convert_plan(&self, mut plan: Plan) -> Result<LogicalPlan, Error> {
|
||||
fn convert_plan(
|
||||
&self,
|
||||
mut plan: Plan,
|
||||
catalog_manager: CatalogManagerRef,
|
||||
) -> Result<LogicalPlan, Error> {
|
||||
// prepare convertor context
|
||||
let mut ctx = ConvertorContext::default();
|
||||
for simple_ext in plan.extensions {
|
||||
@@ -99,15 +101,51 @@ impl DFLogicalSubstraitConvertor {
|
||||
}
|
||||
.fail()?
|
||||
};
|
||||
|
||||
self.rel_to_logical_plan(&mut ctx, Box::new(rel), catalog_manager)
|
||||
}
|
||||
|
||||
fn rel_to_logical_plan(
|
||||
&self,
|
||||
ctx: &mut ConvertorContext,
|
||||
rel: Box<Rel>,
|
||||
catalog_manager: CatalogManagerRef,
|
||||
) -> Result<LogicalPlan, Error> {
|
||||
let rel_type = rel.rel_type.context(EmptyPlanSnafu)?;
|
||||
|
||||
// build logical plan
|
||||
let logical_plan = match rel_type {
|
||||
RelType::Read(read_rel) => self.convert_read_rel(&mut ctx, read_rel),
|
||||
RelType::Filter(_filter_rel) => UnsupportedPlanSnafu {
|
||||
name: "Filter Relation",
|
||||
RelType::Read(read_rel) => self.convert_read_rel(ctx, read_rel, catalog_manager)?,
|
||||
RelType::Filter(filter) => {
|
||||
let FilterRel {
|
||||
common: _,
|
||||
input,
|
||||
condition,
|
||||
advanced_extension: _,
|
||||
} = *filter;
|
||||
|
||||
let input = input.context(MissingFieldSnafu {
|
||||
field: "input",
|
||||
plan: "Filter",
|
||||
})?;
|
||||
let input = Arc::new(self.rel_to_logical_plan(ctx, input, catalog_manager)?);
|
||||
|
||||
let condition = condition.context(MissingFieldSnafu {
|
||||
field: "condition",
|
||||
plan: "Filter",
|
||||
})?;
|
||||
|
||||
let schema = ctx.df_schema().context(InvalidParametersSnafu {
|
||||
reason: "the underlying TableScan plan should have included a table schema",
|
||||
})?;
|
||||
let schema = schema
|
||||
.clone()
|
||||
.try_into()
|
||||
.context(error::ConvertDfSchemaSnafu)?;
|
||||
let predicate = to_df_expr(ctx, *condition, &schema)?;
|
||||
|
||||
LogicalPlan::Filter(Filter { predicate, input })
|
||||
}
|
||||
.fail()?,
|
||||
RelType::Fetch(_fetch_rel) => UnsupportedPlanSnafu {
|
||||
name: "Fetch Relation",
|
||||
}
|
||||
@@ -148,7 +186,7 @@ impl DFLogicalSubstraitConvertor {
|
||||
name: "Cross Relation",
|
||||
}
|
||||
.fail()?,
|
||||
}?;
|
||||
};
|
||||
|
||||
Ok(logical_plan)
|
||||
}
|
||||
@@ -157,6 +195,7 @@ impl DFLogicalSubstraitConvertor {
|
||||
&self,
|
||||
ctx: &mut ConvertorContext,
|
||||
read_rel: Box<ReadRel>,
|
||||
catalog_manager: CatalogManagerRef,
|
||||
) -> Result<LogicalPlan, Error> {
|
||||
// Extract the catalog, schema and table name from NamedTable. Assume the first three are those names.
|
||||
let read_type = read_rel.read_type.context(MissingFieldSnafu {
|
||||
@@ -192,8 +231,7 @@ impl DFLogicalSubstraitConvertor {
|
||||
.map(|mask_expr| self.convert_mask_expression(mask_expr));
|
||||
|
||||
// Get table handle from catalog manager
|
||||
let table_ref = self
|
||||
.catalog_manager
|
||||
let table_ref = catalog_manager
|
||||
.table(&catalog_name, &schema_name, &table_name)
|
||||
.map_err(BoxedError::new)
|
||||
.context(InternalSnafu)?
|
||||
@@ -207,7 +245,7 @@ impl DFLogicalSubstraitConvertor {
|
||||
let retrieved_schema = to_schema(read_rel.base_schema.unwrap_or_default())?;
|
||||
let retrieved_arrow_schema = retrieved_schema.arrow_schema();
|
||||
ensure!(
|
||||
stored_schema.fields == retrieved_arrow_schema.fields,
|
||||
same_schema_without_metadata(&stored_schema, retrieved_arrow_schema),
|
||||
SchemaNotMatchSnafu {
|
||||
substrait_schema: retrieved_arrow_schema.clone(),
|
||||
storage_schema: stored_schema
|
||||
@@ -227,9 +265,11 @@ impl DFLogicalSubstraitConvertor {
|
||||
.to_dfschema_ref()
|
||||
.context(DFInternalSnafu)?;
|
||||
|
||||
// TODO(ruihang): Support filters and limit
|
||||
ctx.set_df_schema(projected_schema.clone());
|
||||
|
||||
// TODO(ruihang): Support limit
|
||||
Ok(LogicalPlan::TableScan(TableScan {
|
||||
table_name,
|
||||
table_name: format!("{}.{}.{}", catalog_name, schema_name, table_name),
|
||||
source: adapter,
|
||||
projection,
|
||||
projected_schema,
|
||||
@@ -250,20 +290,42 @@ impl DFLogicalSubstraitConvertor {
|
||||
}
|
||||
|
||||
impl DFLogicalSubstraitConvertor {
|
||||
pub fn convert_df_plan(&self, plan: LogicalPlan) -> Result<Plan, Error> {
|
||||
let mut ctx = ConvertorContext::default();
|
||||
|
||||
// TODO(ruihang): extract this translation logic into a separated function
|
||||
// convert PlanRel
|
||||
let rel = match plan {
|
||||
fn logical_plan_to_rel(
|
||||
&self,
|
||||
ctx: &mut ConvertorContext,
|
||||
plan: Arc<LogicalPlan>,
|
||||
) -> Result<Rel, Error> {
|
||||
Ok(match &*plan {
|
||||
LogicalPlan::Projection(_) => UnsupportedPlanSnafu {
|
||||
name: "DataFusion Logical Projection",
|
||||
}
|
||||
.fail()?,
|
||||
LogicalPlan::Filter(_) => UnsupportedPlanSnafu {
|
||||
name: "DataFusion Logical Filter",
|
||||
LogicalPlan::Filter(filter) => {
|
||||
let input = Some(Box::new(
|
||||
self.logical_plan_to_rel(ctx, filter.input.clone())?,
|
||||
));
|
||||
|
||||
let schema = plan
|
||||
.schema()
|
||||
.clone()
|
||||
.try_into()
|
||||
.context(error::ConvertDfSchemaSnafu)?;
|
||||
let condition = Some(Box::new(expression_from_df_expr(
|
||||
ctx,
|
||||
&filter.predicate,
|
||||
&schema,
|
||||
)?));
|
||||
|
||||
let rel = FilterRel {
|
||||
common: None,
|
||||
input,
|
||||
condition,
|
||||
advanced_extension: None,
|
||||
};
|
||||
Rel {
|
||||
rel_type: Some(RelType::Filter(Box::new(rel))),
|
||||
}
|
||||
}
|
||||
.fail()?,
|
||||
LogicalPlan::Window(_) => UnsupportedPlanSnafu {
|
||||
name: "DataFusion Logical Window",
|
||||
}
|
||||
@@ -293,7 +355,7 @@ impl DFLogicalSubstraitConvertor {
|
||||
}
|
||||
.fail()?,
|
||||
LogicalPlan::TableScan(table_scan) => {
|
||||
let read_rel = self.convert_table_scan_plan(&mut ctx, table_scan)?;
|
||||
let read_rel = self.convert_table_scan_plan(ctx, table_scan)?;
|
||||
Rel {
|
||||
rel_type: Some(RelType::Read(Box::new(read_rel))),
|
||||
}
|
||||
@@ -319,7 +381,13 @@ impl DFLogicalSubstraitConvertor {
|
||||
),
|
||||
}
|
||||
.fail()?,
|
||||
};
|
||||
})
|
||||
}
|
||||
|
||||
fn convert_df_plan(&self, plan: LogicalPlan) -> Result<Plan, Error> {
|
||||
let mut ctx = ConvertorContext::default();
|
||||
|
||||
let rel = self.logical_plan_to_rel(&mut ctx, Arc::new(plan))?;
|
||||
|
||||
// convert extension
|
||||
let extensions = ctx.generate_function_extension();
|
||||
@@ -341,7 +409,7 @@ impl DFLogicalSubstraitConvertor {
|
||||
pub fn convert_table_scan_plan(
|
||||
&self,
|
||||
ctx: &mut ConvertorContext,
|
||||
table_scan: TableScan,
|
||||
table_scan: &TableScan,
|
||||
) -> Result<ReadRel, Error> {
|
||||
let provider = table_scan
|
||||
.source
|
||||
@@ -363,7 +431,8 @@ impl DFLogicalSubstraitConvertor {
|
||||
// assemble projection
|
||||
let projection = table_scan
|
||||
.projection
|
||||
.map(|proj| self.convert_schema_projection(&proj));
|
||||
.as_ref()
|
||||
.map(|x| self.convert_schema_projection(x));
|
||||
|
||||
// assemble base (unprojected) schema using Table's schema.
|
||||
let base_schema = from_schema(&provider.table().schema())?;
|
||||
@@ -371,7 +440,8 @@ impl DFLogicalSubstraitConvertor {
|
||||
// make conjunction over a list of filters and convert the result to substrait
|
||||
let filter = if let Some(conjunction) = table_scan
|
||||
.filters
|
||||
.into_iter()
|
||||
.iter()
|
||||
.cloned()
|
||||
.reduce(|accum, expr| accum.and(expr))
|
||||
{
|
||||
Some(Box::new(expression_from_df_expr(
|
||||
@@ -412,6 +482,13 @@ impl DFLogicalSubstraitConvertor {
|
||||
}
|
||||
}
|
||||
|
||||
fn same_schema_without_metadata(lhs: &ArrowSchemaRef, rhs: &ArrowSchemaRef) -> bool {
|
||||
lhs.fields.len() == rhs.fields.len()
|
||||
&& lhs.fields.iter().zip(rhs.fields.iter()).all(|(x, y)| {
|
||||
x.name == y.name && x.data_type == y.data_type && x.is_nullable == y.is_nullable
|
||||
})
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use catalog::local::{LocalCatalogManager, MemoryCatalogProvider, MemorySchemaProvider};
|
||||
@@ -463,10 +540,10 @@ mod test {
|
||||
}
|
||||
|
||||
async fn logical_plan_round_trip(plan: LogicalPlan, catalog: CatalogManagerRef) {
|
||||
let convertor = DFLogicalSubstraitConvertor::new(catalog);
|
||||
let convertor = DFLogicalSubstraitConvertor;
|
||||
|
||||
let proto = convertor.encode(plan.clone()).unwrap();
|
||||
let tripped_plan = convertor.decode(proto).unwrap();
|
||||
let tripped_plan = convertor.decode(proto, catalog).unwrap();
|
||||
|
||||
assert_eq!(format!("{:?}", plan), format!("{:?}", tripped_plan));
|
||||
}
|
||||
@@ -488,6 +565,7 @@ mod test {
|
||||
.await
|
||||
.unwrap();
|
||||
let adapter = Arc::new(DfTableProviderAdapter::new(table_ref));
|
||||
|
||||
let projection = vec![1, 3, 5];
|
||||
let df_schema = adapter.schema().to_dfschema().unwrap();
|
||||
let projected_fields = projection
|
||||
@@ -498,7 +576,10 @@ mod test {
|
||||
Arc::new(DFSchema::new_with_metadata(projected_fields, Default::default()).unwrap());
|
||||
|
||||
let table_scan_plan = LogicalPlan::TableScan(TableScan {
|
||||
table_name: DEFAULT_TABLE_NAME.to_string(),
|
||||
table_name: format!(
|
||||
"{}.{}.{}",
|
||||
DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME, DEFAULT_TABLE_NAME
|
||||
),
|
||||
source: adapter,
|
||||
projection: Some(projection),
|
||||
projected_schema,
|
||||
|
||||
@@ -99,6 +99,12 @@ pub enum Error {
|
||||
storage_schema: datafusion::arrow::datatypes::SchemaRef,
|
||||
backtrace: Backtrace,
|
||||
},
|
||||
|
||||
#[snafu(display("Failed to convert DataFusion schema, source: {}", source))]
|
||||
ConvertDfSchema {
|
||||
#[snafu(backtrace)]
|
||||
source: datatypes::error::Error,
|
||||
},
|
||||
}
|
||||
|
||||
pub type Result<T> = std::result::Result<T, Error>;
|
||||
@@ -120,6 +126,7 @@ impl ErrorExt for Error {
|
||||
| Error::TableNotFound { .. }
|
||||
| Error::SchemaNotMatch { .. } => StatusCode::InvalidArguments,
|
||||
Error::DFInternal { .. } | Error::Internal { .. } => StatusCode::Internal,
|
||||
Error::ConvertDfSchema { source } => source.status_code(),
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -22,6 +22,7 @@ mod schema;
|
||||
mod types;
|
||||
|
||||
use bytes::{Buf, Bytes};
|
||||
use catalog::CatalogManagerRef;
|
||||
|
||||
pub use crate::df_logical::DFLogicalSubstraitConvertor;
|
||||
|
||||
@@ -30,7 +31,11 @@ pub trait SubstraitPlan {
|
||||
|
||||
type Plan;
|
||||
|
||||
fn decode<B: Buf + Send>(&self, message: B) -> Result<Self::Plan, Self::Error>;
|
||||
fn decode<B: Buf + Send>(
|
||||
&self,
|
||||
message: B,
|
||||
catalog_manager: CatalogManagerRef,
|
||||
) -> Result<Self::Plan, Self::Error>;
|
||||
|
||||
fn encode(&self, plan: Self::Plan) -> Result<Bytes, Self::Error>;
|
||||
}
|
||||
|
||||
@@ -18,11 +18,13 @@
|
||||
//! Current we only have variations on integer types. Variation 0 (system preferred) are the same with base types, which
|
||||
//! are signed integer (i.e. I8 -> [i8]), and Variation 1 stands for unsigned integer (i.e. I8 -> [u8]).
|
||||
|
||||
use datafusion::scalar::ScalarValue;
|
||||
use datatypes::prelude::ConcreteDataType;
|
||||
use substrait_proto::protobuf::expression::literal::LiteralType;
|
||||
use substrait_proto::protobuf::r#type::{self as s_type, Kind, Nullability};
|
||||
use substrait_proto::protobuf::Type as SType;
|
||||
use substrait_proto::protobuf::{Type as SType, Type};
|
||||
|
||||
use crate::error::{Result, UnsupportedConcreteTypeSnafu, UnsupportedSubstraitTypeSnafu};
|
||||
use crate::error::{self, Result, UnsupportedConcreteTypeSnafu, UnsupportedSubstraitTypeSnafu};
|
||||
|
||||
macro_rules! substrait_kind {
|
||||
($desc:ident, $concrete_ty:ident) => {{
|
||||
@@ -134,3 +136,67 @@ pub fn from_concrete_type(ty: ConcreteDataType, nullability: Option<bool>) -> Re
|
||||
|
||||
Ok(SType { kind })
|
||||
}
|
||||
|
||||
pub(crate) fn scalar_value_as_literal_type(v: &ScalarValue) -> Result<LiteralType> {
|
||||
Ok(if v.is_null() {
|
||||
LiteralType::Null(Type { kind: None })
|
||||
} else {
|
||||
match v {
|
||||
ScalarValue::Boolean(Some(v)) => LiteralType::Boolean(*v),
|
||||
ScalarValue::Float32(Some(v)) => LiteralType::Fp32(*v),
|
||||
ScalarValue::Float64(Some(v)) => LiteralType::Fp64(*v),
|
||||
ScalarValue::Int8(Some(v)) => LiteralType::I8(*v as i32),
|
||||
ScalarValue::Int16(Some(v)) => LiteralType::I16(*v as i32),
|
||||
ScalarValue::Int32(Some(v)) => LiteralType::I32(*v),
|
||||
ScalarValue::Int64(Some(v)) => LiteralType::I64(*v),
|
||||
ScalarValue::LargeUtf8(Some(v)) => LiteralType::String(v.clone()),
|
||||
ScalarValue::LargeBinary(Some(v)) => LiteralType::Binary(v.clone()),
|
||||
// TODO(LFC): Implement other conversions: ScalarValue => LiteralType
|
||||
_ => {
|
||||
return error::UnsupportedExprSnafu {
|
||||
name: format!("{:?}", v),
|
||||
}
|
||||
.fail()
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
pub(crate) fn literal_type_to_scalar_value(t: LiteralType) -> Result<ScalarValue> {
|
||||
Ok(match t {
|
||||
LiteralType::Null(Type { kind: Some(kind) }) => match kind {
|
||||
Kind::Bool(_) => ScalarValue::Boolean(None),
|
||||
Kind::I8(_) => ScalarValue::Int8(None),
|
||||
Kind::I16(_) => ScalarValue::Int16(None),
|
||||
Kind::I32(_) => ScalarValue::Int32(None),
|
||||
Kind::I64(_) => ScalarValue::Int64(None),
|
||||
Kind::Fp32(_) => ScalarValue::Float32(None),
|
||||
Kind::Fp64(_) => ScalarValue::Float64(None),
|
||||
Kind::String(_) => ScalarValue::LargeUtf8(None),
|
||||
Kind::Binary(_) => ScalarValue::LargeBinary(None),
|
||||
// TODO(LFC): Implement other conversions: Kind => ScalarValue
|
||||
_ => {
|
||||
return error::UnsupportedSubstraitTypeSnafu {
|
||||
ty: format!("{:?}", kind),
|
||||
}
|
||||
.fail()
|
||||
}
|
||||
},
|
||||
LiteralType::Boolean(v) => ScalarValue::Boolean(Some(v)),
|
||||
LiteralType::I8(v) => ScalarValue::Int8(Some(v as i8)),
|
||||
LiteralType::I16(v) => ScalarValue::Int16(Some(v as i16)),
|
||||
LiteralType::I32(v) => ScalarValue::Int32(Some(v)),
|
||||
LiteralType::I64(v) => ScalarValue::Int64(Some(v)),
|
||||
LiteralType::Fp32(v) => ScalarValue::Float32(Some(v)),
|
||||
LiteralType::Fp64(v) => ScalarValue::Float64(Some(v)),
|
||||
LiteralType::String(v) => ScalarValue::LargeUtf8(Some(v)),
|
||||
LiteralType::Binary(v) => ScalarValue::LargeBinary(Some(v)),
|
||||
// TODO(LFC): Implement other conversions: LiteralType => ScalarValue
|
||||
_ => {
|
||||
return error::UnsupportedSubstraitTypeSnafu {
|
||||
ty: format!("{:?}", t),
|
||||
}
|
||||
.fail()
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user