mirror of
https://github.com/GreptimeTeam/greptimedb.git
synced 2026-05-18 05:50:41 +00:00
feat: upgraded pg_catalog support (#6918)
* refactor: add datafusion-postgres dependency * refactor: move and include pg_catalog udfs * chore: update upstream * feat: register table function pg_get_keywords * feat: bridge CatalogInfo for our CatalogManager Signed-off-by: Ning Sun <sunning@greptime.com> * feat: convert pg_catalog table to our system table * feat: bridge system catalog with datafusion-postgres Signed-off-by: Ning Sun <sunning@greptime.com> * feat: add more udfs * feat: add compatibility rewriter to postgres handler * fix: various fix * fmt: fix * fix: use functions from pg_catalog library * fmt * fix: sqlness runner Signed-off-by: Ning Sun <sunning@greptime.com> * test: adopt arrow 56.0 to 56.1 memory size change * fix: add additional udfs * chore: format * refactor: return None when creating system table failed Signed-off-by: Ning Sun <sunning@greptime.com> * chore: provide safety comments about expect usage --------- Signed-off-by: Ning Sun <sunning@greptime.com>
This commit is contained in:
@@ -37,6 +37,7 @@ datafusion-common.workspace = true
|
||||
datafusion-expr.workspace = true
|
||||
datafusion-functions-aggregate-common.workspace = true
|
||||
datafusion-physical-expr.workspace = true
|
||||
datafusion-postgres.workspace = true
|
||||
datatypes.workspace = true
|
||||
derive_more = { version = "1", default-features = false, features = ["display"] }
|
||||
geo = { version = "0.29", optional = true }
|
||||
|
||||
@@ -21,8 +21,7 @@ mod version;
|
||||
|
||||
use build::BuildFunction;
|
||||
use database::{
|
||||
ConnectionIdFunction, CurrentSchemaFunction, DatabaseFunction, PgBackendPidFunction,
|
||||
ReadPreferenceFunction, SessionUserFunction,
|
||||
ConnectionIdFunction, DatabaseFunction, PgBackendPidFunction, ReadPreferenceFunction,
|
||||
};
|
||||
use pg_catalog::PGCatalogFunction;
|
||||
use procedure_state::ProcedureStateFunction;
|
||||
@@ -37,9 +36,7 @@ impl SystemFunction {
|
||||
pub fn register(registry: &FunctionRegistry) {
|
||||
registry.register_scalar(BuildFunction);
|
||||
registry.register_scalar(VersionFunction);
|
||||
registry.register_scalar(CurrentSchemaFunction);
|
||||
registry.register_scalar(DatabaseFunction);
|
||||
registry.register_scalar(SessionUserFunction);
|
||||
registry.register_scalar(ReadPreferenceFunction);
|
||||
registry.register_scalar(PgBackendPidFunction);
|
||||
registry.register_scalar(ConnectionIdFunction);
|
||||
|
||||
@@ -26,10 +26,6 @@ use crate::function::{Function, find_function_context};
|
||||
#[derive(Clone, Debug, Default)]
|
||||
pub struct DatabaseFunction;
|
||||
|
||||
#[derive(Clone, Debug, Default)]
|
||||
pub struct CurrentSchemaFunction;
|
||||
pub struct SessionUserFunction;
|
||||
|
||||
pub struct ReadPreferenceFunction;
|
||||
|
||||
#[derive(Display)]
|
||||
@@ -41,8 +37,6 @@ pub struct PgBackendPidFunction;
|
||||
pub struct ConnectionIdFunction;
|
||||
|
||||
const DATABASE_FUNCTION_NAME: &str = "database";
|
||||
const CURRENT_SCHEMA_FUNCTION_NAME: &str = "current_schema";
|
||||
const SESSION_USER_FUNCTION_NAME: &str = "session_user";
|
||||
const READ_PREFERENCE_FUNCTION_NAME: &str = "read_preference";
|
||||
const PG_BACKEND_PID: &str = "pg_backend_pid";
|
||||
const CONNECTION_ID: &str = "connection_id";
|
||||
@@ -71,58 +65,6 @@ impl Function for DatabaseFunction {
|
||||
}
|
||||
}
|
||||
|
||||
// Though "current_schema" can be aliased to "database", to not cause any breaking changes,
|
||||
// we are not doing it: not until https://github.com/apache/datafusion/issues/17469 is resolved.
|
||||
impl Function for CurrentSchemaFunction {
|
||||
fn name(&self) -> &str {
|
||||
CURRENT_SCHEMA_FUNCTION_NAME
|
||||
}
|
||||
|
||||
fn return_type(&self, _: &[DataType]) -> Result<DataType> {
|
||||
Ok(DataType::Utf8View)
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
Signature::nullary(Volatility::Immutable)
|
||||
}
|
||||
|
||||
fn invoke_with_args(
|
||||
&self,
|
||||
args: ScalarFunctionArgs,
|
||||
) -> datafusion_common::Result<ColumnarValue> {
|
||||
let func_ctx = find_function_context(&args)?;
|
||||
let db = func_ctx.query_ctx.current_schema();
|
||||
|
||||
Ok(ColumnarValue::Scalar(ScalarValue::Utf8View(Some(db))))
|
||||
}
|
||||
}
|
||||
|
||||
impl Function for SessionUserFunction {
|
||||
fn name(&self) -> &str {
|
||||
SESSION_USER_FUNCTION_NAME
|
||||
}
|
||||
|
||||
fn return_type(&self, _: &[DataType]) -> Result<DataType> {
|
||||
Ok(DataType::Utf8View)
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
Signature::nullary(Volatility::Immutable)
|
||||
}
|
||||
|
||||
fn invoke_with_args(
|
||||
&self,
|
||||
args: ScalarFunctionArgs,
|
||||
) -> datafusion_common::Result<ColumnarValue> {
|
||||
let func_ctx = find_function_context(&args)?;
|
||||
let user = func_ctx.query_ctx.current_user();
|
||||
|
||||
Ok(ColumnarValue::Scalar(ScalarValue::Utf8View(Some(
|
||||
user.username().to_string(),
|
||||
))))
|
||||
}
|
||||
}
|
||||
|
||||
impl Function for ReadPreferenceFunction {
|
||||
fn name(&self) -> &str {
|
||||
READ_PREFERENCE_FUNCTION_NAME
|
||||
@@ -203,18 +145,6 @@ impl fmt::Display for DatabaseFunction {
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Display for CurrentSchemaFunction {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
write!(f, "CURRENT_SCHEMA")
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Display for SessionUserFunction {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
write!(f, "SESSION_USER")
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Display for ReadPreferenceFunction {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
write!(f, "READ_PREFERENCE")
|
||||
|
||||
@@ -12,29 +12,168 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
mod pg_get_userbyid;
|
||||
mod table_is_visible;
|
||||
mod version;
|
||||
|
||||
use pg_get_userbyid::PGGetUserByIdFunction;
|
||||
use table_is_visible::PGTableIsVisibleFunction;
|
||||
use std::sync::Arc;
|
||||
|
||||
use common_query::error::Result;
|
||||
use datafusion::arrow::array::{ArrayRef, StringArray, as_boolean_array};
|
||||
use datafusion::catalog::TableFunction;
|
||||
use datafusion::common::ScalarValue;
|
||||
use datafusion::common::utils::SingleRowListArrayBuilder;
|
||||
use datafusion_expr::{ColumnarValue, ScalarFunctionArgs, Signature, Volatility};
|
||||
use datafusion_postgres::pg_catalog::{self, PgCatalogStaticTables};
|
||||
use datatypes::arrow::datatypes::{DataType, Field};
|
||||
use derive_more::Display;
|
||||
use version::PGVersionFunction;
|
||||
|
||||
use crate::function::{Function, find_function_context};
|
||||
use crate::function_registry::FunctionRegistry;
|
||||
|
||||
#[macro_export]
|
||||
macro_rules! pg_catalog_func_fullname {
|
||||
($name:literal) => {
|
||||
concat!("pg_catalog.", $name)
|
||||
};
|
||||
const CURRENT_SCHEMA_FUNCTION_NAME: &str = "current_schema";
|
||||
const CURRENT_SCHEMAS_FUNCTION_NAME: &str = "current_schemas";
|
||||
const SESSION_USER_FUNCTION_NAME: &str = "session_user";
|
||||
|
||||
#[derive(Clone, Debug, Default, Display)]
|
||||
#[display("{}", self.name())]
|
||||
pub struct CurrentSchemaFunction;
|
||||
|
||||
#[derive(Clone, Debug, Default, Display)]
|
||||
#[display("{}", self.name())]
|
||||
pub struct CurrentSchemasFunction;
|
||||
|
||||
#[derive(Clone, Debug, Default, Display)]
|
||||
#[display("{}", self.name())]
|
||||
pub struct SessionUserFunction;
|
||||
|
||||
// Though "current_schema" can be aliased to "database", to not cause any breaking changes,
|
||||
// we are not doing it: not until https://github.com/apache/datafusion/issues/17469 is resolved.
|
||||
impl Function for CurrentSchemaFunction {
|
||||
fn name(&self) -> &str {
|
||||
CURRENT_SCHEMA_FUNCTION_NAME
|
||||
}
|
||||
|
||||
fn return_type(&self, _: &[DataType]) -> Result<DataType> {
|
||||
Ok(DataType::Utf8View)
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
Signature::nullary(Volatility::Immutable)
|
||||
}
|
||||
|
||||
fn invoke_with_args(
|
||||
&self,
|
||||
args: ScalarFunctionArgs,
|
||||
) -> datafusion_common::Result<ColumnarValue> {
|
||||
let func_ctx = find_function_context(&args)?;
|
||||
let db = func_ctx.query_ctx.current_schema();
|
||||
|
||||
Ok(ColumnarValue::Scalar(ScalarValue::Utf8View(Some(db))))
|
||||
}
|
||||
}
|
||||
|
||||
impl Function for SessionUserFunction {
|
||||
fn name(&self) -> &str {
|
||||
SESSION_USER_FUNCTION_NAME
|
||||
}
|
||||
|
||||
fn return_type(&self, _: &[DataType]) -> Result<DataType> {
|
||||
Ok(DataType::Utf8View)
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
Signature::nullary(Volatility::Immutable)
|
||||
}
|
||||
|
||||
fn invoke_with_args(
|
||||
&self,
|
||||
args: ScalarFunctionArgs,
|
||||
) -> datafusion_common::Result<ColumnarValue> {
|
||||
let func_ctx = find_function_context(&args)?;
|
||||
let user = func_ctx.query_ctx.current_user();
|
||||
|
||||
Ok(ColumnarValue::Scalar(ScalarValue::Utf8View(Some(
|
||||
user.username().to_string(),
|
||||
))))
|
||||
}
|
||||
}
|
||||
|
||||
impl Function for CurrentSchemasFunction {
|
||||
fn name(&self) -> &str {
|
||||
CURRENT_SCHEMAS_FUNCTION_NAME
|
||||
}
|
||||
|
||||
fn return_type(&self, _: &[DataType]) -> Result<DataType> {
|
||||
Ok(DataType::List(Arc::new(Field::new(
|
||||
"x",
|
||||
DataType::Utf8View,
|
||||
false,
|
||||
))))
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
Signature::exact(vec![DataType::Boolean], Volatility::Immutable)
|
||||
}
|
||||
|
||||
fn invoke_with_args(
|
||||
&self,
|
||||
args: ScalarFunctionArgs,
|
||||
) -> datafusion_common::Result<ColumnarValue> {
|
||||
let args = ColumnarValue::values_to_arrays(&args.args)?;
|
||||
let input = as_boolean_array(&args[0]);
|
||||
|
||||
// Create a UTF8 array with a single value
|
||||
let mut values = vec!["public"];
|
||||
// include implicit schemas
|
||||
if input.value(0) {
|
||||
values.push("information_schema");
|
||||
values.push("pg_catalog");
|
||||
values.push("greptime_private");
|
||||
}
|
||||
|
||||
let list_array = SingleRowListArrayBuilder::new(Arc::new(StringArray::from(values)));
|
||||
|
||||
let array: ArrayRef = Arc::new(list_array.build_list_array());
|
||||
|
||||
Ok(ColumnarValue::Array(array))
|
||||
}
|
||||
}
|
||||
|
||||
pub(super) struct PGCatalogFunction;
|
||||
|
||||
impl PGCatalogFunction {
|
||||
pub fn register(registry: &FunctionRegistry) {
|
||||
registry.register_scalar(PGTableIsVisibleFunction);
|
||||
registry.register_scalar(PGGetUserByIdFunction);
|
||||
let static_tables =
|
||||
Arc::new(PgCatalogStaticTables::try_new().expect("load postgres static tables"));
|
||||
|
||||
registry.register_scalar(PGVersionFunction);
|
||||
registry.register_scalar(CurrentSchemaFunction);
|
||||
registry.register_scalar(CurrentSchemasFunction);
|
||||
registry.register_scalar(SessionUserFunction);
|
||||
registry.register(pg_catalog::format_type::create_format_type_udf());
|
||||
registry.register(pg_catalog::create_pg_get_partkeydef_udf());
|
||||
registry.register(pg_catalog::has_privilege_udf::create_has_privilege_udf(
|
||||
"has_table_privilege",
|
||||
));
|
||||
registry.register(pg_catalog::has_privilege_udf::create_has_privilege_udf(
|
||||
"has_schema_privilege",
|
||||
));
|
||||
registry.register(pg_catalog::has_privilege_udf::create_has_privilege_udf(
|
||||
"has_database_privilege",
|
||||
));
|
||||
registry.register(pg_catalog::has_privilege_udf::create_has_privilege_udf(
|
||||
"has_any_column_privilege",
|
||||
));
|
||||
registry.register_table_function(TableFunction::new(
|
||||
"pg_get_keywords".to_string(),
|
||||
static_tables.pg_get_keywords.clone(),
|
||||
));
|
||||
registry.register(pg_catalog::create_pg_relation_is_publishable_udf());
|
||||
registry.register(pg_catalog::create_pg_get_statisticsobjdef_columns_udf());
|
||||
registry.register(pg_catalog::create_pg_get_userbyid_udf());
|
||||
registry.register(pg_catalog::create_pg_table_is_visible());
|
||||
registry.register(pg_catalog::pg_get_expr_udf::create_pg_get_expr_udf());
|
||||
// TODO(sunng87): upgrade datafusion to add
|
||||
//registry.register(pg_catalog::create_pg_encoding_to_char_udf());
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,73 +0,0 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::fmt::{self};
|
||||
use std::sync::Arc;
|
||||
|
||||
use common_query::error::Result;
|
||||
use datafusion::arrow::datatypes::DataType as ArrowDataType;
|
||||
use datafusion_expr::{Signature, Volatility};
|
||||
use datatypes::prelude::{DataType, VectorRef};
|
||||
use datatypes::types::LogicalPrimitiveType;
|
||||
use datatypes::with_match_primitive_type_id;
|
||||
use num_traits::AsPrimitive;
|
||||
|
||||
use crate::function::{Function, FunctionContext};
|
||||
use crate::scalars::expression::{EvalContext, scalar_unary_op};
|
||||
|
||||
#[derive(Clone, Debug, Default)]
|
||||
pub struct PGGetUserByIdFunction;
|
||||
|
||||
const NAME: &str = crate::pg_catalog_func_fullname!("pg_get_userbyid");
|
||||
|
||||
impl fmt::Display for PGGetUserByIdFunction {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
write!(f, crate::pg_catalog_func_fullname!("PG_GET_USERBYID"))
|
||||
}
|
||||
}
|
||||
|
||||
impl Function for PGGetUserByIdFunction {
|
||||
fn name(&self) -> &str {
|
||||
NAME
|
||||
}
|
||||
|
||||
fn return_type(&self, _: &[ArrowDataType]) -> Result<ArrowDataType> {
|
||||
Ok(ArrowDataType::Utf8)
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
Signature::uniform(
|
||||
1,
|
||||
vec![arrow::datatypes::DataType::UInt32],
|
||||
Volatility::Immutable,
|
||||
)
|
||||
}
|
||||
|
||||
fn eval(&self, _func_ctx: &FunctionContext, columns: &[VectorRef]) -> Result<VectorRef> {
|
||||
with_match_primitive_type_id!(columns[0].data_type().logical_type_id(), |$T| {
|
||||
let col = scalar_unary_op::<<$T as LogicalPrimitiveType>::Native, String, _>(&columns[0], pg_get_user_by_id, &mut EvalContext::default())?;
|
||||
Ok(Arc::new(col))
|
||||
}, {
|
||||
unreachable!()
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
fn pg_get_user_by_id<I>(table_oid: Option<I>, _ctx: &mut EvalContext) -> Option<String>
|
||||
where
|
||||
I: AsPrimitive<u32>,
|
||||
{
|
||||
// TODO(J0HN50N133): We lack way to get the user_info by a numeric value. Once we have it, we can implement this function.
|
||||
table_oid.map(|_| "".to_string())
|
||||
}
|
||||
@@ -1,73 +0,0 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::fmt::{self};
|
||||
use std::sync::Arc;
|
||||
|
||||
use common_query::error::Result;
|
||||
use datafusion::arrow::datatypes::DataType as ArrowDataType;
|
||||
use datafusion_expr::{Signature, Volatility};
|
||||
use datatypes::prelude::{DataType, VectorRef};
|
||||
use datatypes::types::LogicalPrimitiveType;
|
||||
use datatypes::with_match_primitive_type_id;
|
||||
use num_traits::AsPrimitive;
|
||||
|
||||
use crate::function::{Function, FunctionContext};
|
||||
use crate::scalars::expression::{EvalContext, scalar_unary_op};
|
||||
|
||||
#[derive(Clone, Debug, Default)]
|
||||
pub struct PGTableIsVisibleFunction;
|
||||
|
||||
const NAME: &str = crate::pg_catalog_func_fullname!("pg_table_is_visible");
|
||||
|
||||
impl fmt::Display for PGTableIsVisibleFunction {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
write!(f, crate::pg_catalog_func_fullname!("PG_TABLE_IS_VISIBLE"))
|
||||
}
|
||||
}
|
||||
|
||||
impl Function for PGTableIsVisibleFunction {
|
||||
fn name(&self) -> &str {
|
||||
NAME
|
||||
}
|
||||
|
||||
fn return_type(&self, _: &[ArrowDataType]) -> Result<ArrowDataType> {
|
||||
Ok(ArrowDataType::Boolean)
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
Signature::uniform(
|
||||
1,
|
||||
vec![arrow::datatypes::DataType::UInt32],
|
||||
Volatility::Immutable,
|
||||
)
|
||||
}
|
||||
|
||||
fn eval(&self, _func_ctx: &FunctionContext, columns: &[VectorRef]) -> Result<VectorRef> {
|
||||
with_match_primitive_type_id!(columns[0].data_type().logical_type_id(), |$T| {
|
||||
let col = scalar_unary_op::<<$T as LogicalPrimitiveType>::Native, bool, _>(&columns[0], pg_table_is_visible, &mut EvalContext::default())?;
|
||||
Ok(Arc::new(col))
|
||||
}, {
|
||||
unreachable!()
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
fn pg_table_is_visible<I>(table_oid: Option<I>, _ctx: &mut EvalContext) -> Option<bool>
|
||||
where
|
||||
I: AsPrimitive<u32>,
|
||||
{
|
||||
// There is no table visibility in greptime, so we always return true
|
||||
table_oid.map(|_| true)
|
||||
}
|
||||
@@ -27,13 +27,13 @@ pub(crate) struct PGVersionFunction;
|
||||
|
||||
impl fmt::Display for PGVersionFunction {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
write!(f, crate::pg_catalog_func_fullname!("VERSION"))
|
||||
write!(f, "pg_catalog.VERSION")
|
||||
}
|
||||
}
|
||||
|
||||
impl Function for PGVersionFunction {
|
||||
fn name(&self) -> &str {
|
||||
crate::pg_catalog_func_fullname!("version")
|
||||
"pg_catalog.version"
|
||||
}
|
||||
|
||||
fn return_type(&self, _: &[DataType]) -> Result<DataType> {
|
||||
|
||||
Reference in New Issue
Block a user