feat: upgraded pg_catalog support (#6918)

* refactor: add datafusion-postgres dependency

* refactor: move and include pg_catalog udfs

* chore: update upstream

* feat: register table function pg_get_keywords

* feat: bridge CatalogInfo for our CatalogManager

Signed-off-by: Ning Sun <sunning@greptime.com>

* feat: convert pg_catalog table to our system table

* feat: bridge system catalog with datafusion-postgres

Signed-off-by: Ning Sun <sunning@greptime.com>

* feat: add more udfs

* feat: add compatibility rewriter to postgres handler

* fix: various fix

* fmt: fix

* fix: use functions from pg_catalog library

* fmt

* fix: sqlness runner

Signed-off-by: Ning Sun <sunning@greptime.com>

* test: adopt arrow 56.0 to 56.1 memory size change

* fix: add additional udfs

* chore: format

* refactor: return None when creating system table failed

Signed-off-by: Ning Sun <sunning@greptime.com>

* chore: provide safety comments about expect usage

---------

Signed-off-by: Ning Sun <sunning@greptime.com>
This commit is contained in:
Ning Sun
2025-09-25 12:05:34 +08:00
committed by GitHub
parent 91a727790d
commit 964dc254aa
31 changed files with 1294 additions and 1412 deletions

View File

@@ -37,6 +37,7 @@ datafusion-common.workspace = true
datafusion-expr.workspace = true
datafusion-functions-aggregate-common.workspace = true
datafusion-physical-expr.workspace = true
datafusion-postgres.workspace = true
datatypes.workspace = true
derive_more = { version = "1", default-features = false, features = ["display"] }
geo = { version = "0.29", optional = true }

View File

@@ -21,8 +21,7 @@ mod version;
use build::BuildFunction;
use database::{
ConnectionIdFunction, CurrentSchemaFunction, DatabaseFunction, PgBackendPidFunction,
ReadPreferenceFunction, SessionUserFunction,
ConnectionIdFunction, DatabaseFunction, PgBackendPidFunction, ReadPreferenceFunction,
};
use pg_catalog::PGCatalogFunction;
use procedure_state::ProcedureStateFunction;
@@ -37,9 +36,7 @@ impl SystemFunction {
pub fn register(registry: &FunctionRegistry) {
registry.register_scalar(BuildFunction);
registry.register_scalar(VersionFunction);
registry.register_scalar(CurrentSchemaFunction);
registry.register_scalar(DatabaseFunction);
registry.register_scalar(SessionUserFunction);
registry.register_scalar(ReadPreferenceFunction);
registry.register_scalar(PgBackendPidFunction);
registry.register_scalar(ConnectionIdFunction);

View File

@@ -26,10 +26,6 @@ use crate::function::{Function, find_function_context};
#[derive(Clone, Debug, Default)]
pub struct DatabaseFunction;
#[derive(Clone, Debug, Default)]
pub struct CurrentSchemaFunction;
pub struct SessionUserFunction;
pub struct ReadPreferenceFunction;
#[derive(Display)]
@@ -41,8 +37,6 @@ pub struct PgBackendPidFunction;
pub struct ConnectionIdFunction;
const DATABASE_FUNCTION_NAME: &str = "database";
const CURRENT_SCHEMA_FUNCTION_NAME: &str = "current_schema";
const SESSION_USER_FUNCTION_NAME: &str = "session_user";
const READ_PREFERENCE_FUNCTION_NAME: &str = "read_preference";
const PG_BACKEND_PID: &str = "pg_backend_pid";
const CONNECTION_ID: &str = "connection_id";
@@ -71,58 +65,6 @@ impl Function for DatabaseFunction {
}
}
// Though "current_schema" can be aliased to "database", to not cause any breaking changes,
// we are not doing it: not until https://github.com/apache/datafusion/issues/17469 is resolved.
impl Function for CurrentSchemaFunction {
fn name(&self) -> &str {
CURRENT_SCHEMA_FUNCTION_NAME
}
fn return_type(&self, _: &[DataType]) -> Result<DataType> {
Ok(DataType::Utf8View)
}
fn signature(&self) -> Signature {
Signature::nullary(Volatility::Immutable)
}
fn invoke_with_args(
&self,
args: ScalarFunctionArgs,
) -> datafusion_common::Result<ColumnarValue> {
let func_ctx = find_function_context(&args)?;
let db = func_ctx.query_ctx.current_schema();
Ok(ColumnarValue::Scalar(ScalarValue::Utf8View(Some(db))))
}
}
impl Function for SessionUserFunction {
fn name(&self) -> &str {
SESSION_USER_FUNCTION_NAME
}
fn return_type(&self, _: &[DataType]) -> Result<DataType> {
Ok(DataType::Utf8View)
}
fn signature(&self) -> Signature {
Signature::nullary(Volatility::Immutable)
}
fn invoke_with_args(
&self,
args: ScalarFunctionArgs,
) -> datafusion_common::Result<ColumnarValue> {
let func_ctx = find_function_context(&args)?;
let user = func_ctx.query_ctx.current_user();
Ok(ColumnarValue::Scalar(ScalarValue::Utf8View(Some(
user.username().to_string(),
))))
}
}
impl Function for ReadPreferenceFunction {
fn name(&self) -> &str {
READ_PREFERENCE_FUNCTION_NAME
@@ -203,18 +145,6 @@ impl fmt::Display for DatabaseFunction {
}
}
impl fmt::Display for CurrentSchemaFunction {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "CURRENT_SCHEMA")
}
}
impl fmt::Display for SessionUserFunction {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "SESSION_USER")
}
}
impl fmt::Display for ReadPreferenceFunction {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "READ_PREFERENCE")

View File

@@ -12,29 +12,168 @@
// See the License for the specific language governing permissions and
// limitations under the License.
mod pg_get_userbyid;
mod table_is_visible;
mod version;
use pg_get_userbyid::PGGetUserByIdFunction;
use table_is_visible::PGTableIsVisibleFunction;
use std::sync::Arc;
use common_query::error::Result;
use datafusion::arrow::array::{ArrayRef, StringArray, as_boolean_array};
use datafusion::catalog::TableFunction;
use datafusion::common::ScalarValue;
use datafusion::common::utils::SingleRowListArrayBuilder;
use datafusion_expr::{ColumnarValue, ScalarFunctionArgs, Signature, Volatility};
use datafusion_postgres::pg_catalog::{self, PgCatalogStaticTables};
use datatypes::arrow::datatypes::{DataType, Field};
use derive_more::Display;
use version::PGVersionFunction;
use crate::function::{Function, find_function_context};
use crate::function_registry::FunctionRegistry;
#[macro_export]
macro_rules! pg_catalog_func_fullname {
($name:literal) => {
concat!("pg_catalog.", $name)
};
const CURRENT_SCHEMA_FUNCTION_NAME: &str = "current_schema";
const CURRENT_SCHEMAS_FUNCTION_NAME: &str = "current_schemas";
const SESSION_USER_FUNCTION_NAME: &str = "session_user";
#[derive(Clone, Debug, Default, Display)]
#[display("{}", self.name())]
pub struct CurrentSchemaFunction;
#[derive(Clone, Debug, Default, Display)]
#[display("{}", self.name())]
pub struct CurrentSchemasFunction;
#[derive(Clone, Debug, Default, Display)]
#[display("{}", self.name())]
pub struct SessionUserFunction;
// Though "current_schema" can be aliased to "database", to not cause any breaking changes,
// we are not doing it: not until https://github.com/apache/datafusion/issues/17469 is resolved.
impl Function for CurrentSchemaFunction {
fn name(&self) -> &str {
CURRENT_SCHEMA_FUNCTION_NAME
}
fn return_type(&self, _: &[DataType]) -> Result<DataType> {
Ok(DataType::Utf8View)
}
fn signature(&self) -> Signature {
Signature::nullary(Volatility::Immutable)
}
fn invoke_with_args(
&self,
args: ScalarFunctionArgs,
) -> datafusion_common::Result<ColumnarValue> {
let func_ctx = find_function_context(&args)?;
let db = func_ctx.query_ctx.current_schema();
Ok(ColumnarValue::Scalar(ScalarValue::Utf8View(Some(db))))
}
}
impl Function for SessionUserFunction {
fn name(&self) -> &str {
SESSION_USER_FUNCTION_NAME
}
fn return_type(&self, _: &[DataType]) -> Result<DataType> {
Ok(DataType::Utf8View)
}
fn signature(&self) -> Signature {
Signature::nullary(Volatility::Immutable)
}
fn invoke_with_args(
&self,
args: ScalarFunctionArgs,
) -> datafusion_common::Result<ColumnarValue> {
let func_ctx = find_function_context(&args)?;
let user = func_ctx.query_ctx.current_user();
Ok(ColumnarValue::Scalar(ScalarValue::Utf8View(Some(
user.username().to_string(),
))))
}
}
impl Function for CurrentSchemasFunction {
fn name(&self) -> &str {
CURRENT_SCHEMAS_FUNCTION_NAME
}
fn return_type(&self, _: &[DataType]) -> Result<DataType> {
Ok(DataType::List(Arc::new(Field::new(
"x",
DataType::Utf8View,
false,
))))
}
fn signature(&self) -> Signature {
Signature::exact(vec![DataType::Boolean], Volatility::Immutable)
}
fn invoke_with_args(
&self,
args: ScalarFunctionArgs,
) -> datafusion_common::Result<ColumnarValue> {
let args = ColumnarValue::values_to_arrays(&args.args)?;
let input = as_boolean_array(&args[0]);
// Create a UTF8 array with a single value
let mut values = vec!["public"];
// include implicit schemas
if input.value(0) {
values.push("information_schema");
values.push("pg_catalog");
values.push("greptime_private");
}
let list_array = SingleRowListArrayBuilder::new(Arc::new(StringArray::from(values)));
let array: ArrayRef = Arc::new(list_array.build_list_array());
Ok(ColumnarValue::Array(array))
}
}
pub(super) struct PGCatalogFunction;
impl PGCatalogFunction {
pub fn register(registry: &FunctionRegistry) {
registry.register_scalar(PGTableIsVisibleFunction);
registry.register_scalar(PGGetUserByIdFunction);
let static_tables =
Arc::new(PgCatalogStaticTables::try_new().expect("load postgres static tables"));
registry.register_scalar(PGVersionFunction);
registry.register_scalar(CurrentSchemaFunction);
registry.register_scalar(CurrentSchemasFunction);
registry.register_scalar(SessionUserFunction);
registry.register(pg_catalog::format_type::create_format_type_udf());
registry.register(pg_catalog::create_pg_get_partkeydef_udf());
registry.register(pg_catalog::has_privilege_udf::create_has_privilege_udf(
"has_table_privilege",
));
registry.register(pg_catalog::has_privilege_udf::create_has_privilege_udf(
"has_schema_privilege",
));
registry.register(pg_catalog::has_privilege_udf::create_has_privilege_udf(
"has_database_privilege",
));
registry.register(pg_catalog::has_privilege_udf::create_has_privilege_udf(
"has_any_column_privilege",
));
registry.register_table_function(TableFunction::new(
"pg_get_keywords".to_string(),
static_tables.pg_get_keywords.clone(),
));
registry.register(pg_catalog::create_pg_relation_is_publishable_udf());
registry.register(pg_catalog::create_pg_get_statisticsobjdef_columns_udf());
registry.register(pg_catalog::create_pg_get_userbyid_udf());
registry.register(pg_catalog::create_pg_table_is_visible());
registry.register(pg_catalog::pg_get_expr_udf::create_pg_get_expr_udf());
// TODO(sunng87): upgrade datafusion to add
//registry.register(pg_catalog::create_pg_encoding_to_char_udf());
}
}

View File

@@ -1,73 +0,0 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use std::fmt::{self};
use std::sync::Arc;
use common_query::error::Result;
use datafusion::arrow::datatypes::DataType as ArrowDataType;
use datafusion_expr::{Signature, Volatility};
use datatypes::prelude::{DataType, VectorRef};
use datatypes::types::LogicalPrimitiveType;
use datatypes::with_match_primitive_type_id;
use num_traits::AsPrimitive;
use crate::function::{Function, FunctionContext};
use crate::scalars::expression::{EvalContext, scalar_unary_op};
#[derive(Clone, Debug, Default)]
pub struct PGGetUserByIdFunction;
const NAME: &str = crate::pg_catalog_func_fullname!("pg_get_userbyid");
impl fmt::Display for PGGetUserByIdFunction {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, crate::pg_catalog_func_fullname!("PG_GET_USERBYID"))
}
}
impl Function for PGGetUserByIdFunction {
fn name(&self) -> &str {
NAME
}
fn return_type(&self, _: &[ArrowDataType]) -> Result<ArrowDataType> {
Ok(ArrowDataType::Utf8)
}
fn signature(&self) -> Signature {
Signature::uniform(
1,
vec![arrow::datatypes::DataType::UInt32],
Volatility::Immutable,
)
}
fn eval(&self, _func_ctx: &FunctionContext, columns: &[VectorRef]) -> Result<VectorRef> {
with_match_primitive_type_id!(columns[0].data_type().logical_type_id(), |$T| {
let col = scalar_unary_op::<<$T as LogicalPrimitiveType>::Native, String, _>(&columns[0], pg_get_user_by_id, &mut EvalContext::default())?;
Ok(Arc::new(col))
}, {
unreachable!()
})
}
}
fn pg_get_user_by_id<I>(table_oid: Option<I>, _ctx: &mut EvalContext) -> Option<String>
where
I: AsPrimitive<u32>,
{
// TODO(J0HN50N133): We lack way to get the user_info by a numeric value. Once we have it, we can implement this function.
table_oid.map(|_| "".to_string())
}

View File

@@ -1,73 +0,0 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use std::fmt::{self};
use std::sync::Arc;
use common_query::error::Result;
use datafusion::arrow::datatypes::DataType as ArrowDataType;
use datafusion_expr::{Signature, Volatility};
use datatypes::prelude::{DataType, VectorRef};
use datatypes::types::LogicalPrimitiveType;
use datatypes::with_match_primitive_type_id;
use num_traits::AsPrimitive;
use crate::function::{Function, FunctionContext};
use crate::scalars::expression::{EvalContext, scalar_unary_op};
#[derive(Clone, Debug, Default)]
pub struct PGTableIsVisibleFunction;
const NAME: &str = crate::pg_catalog_func_fullname!("pg_table_is_visible");
impl fmt::Display for PGTableIsVisibleFunction {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, crate::pg_catalog_func_fullname!("PG_TABLE_IS_VISIBLE"))
}
}
impl Function for PGTableIsVisibleFunction {
fn name(&self) -> &str {
NAME
}
fn return_type(&self, _: &[ArrowDataType]) -> Result<ArrowDataType> {
Ok(ArrowDataType::Boolean)
}
fn signature(&self) -> Signature {
Signature::uniform(
1,
vec![arrow::datatypes::DataType::UInt32],
Volatility::Immutable,
)
}
fn eval(&self, _func_ctx: &FunctionContext, columns: &[VectorRef]) -> Result<VectorRef> {
with_match_primitive_type_id!(columns[0].data_type().logical_type_id(), |$T| {
let col = scalar_unary_op::<<$T as LogicalPrimitiveType>::Native, bool, _>(&columns[0], pg_table_is_visible, &mut EvalContext::default())?;
Ok(Arc::new(col))
}, {
unreachable!()
})
}
}
fn pg_table_is_visible<I>(table_oid: Option<I>, _ctx: &mut EvalContext) -> Option<bool>
where
I: AsPrimitive<u32>,
{
// There is no table visibility in greptime, so we always return true
table_oid.map(|_| true)
}

View File

@@ -27,13 +27,13 @@ pub(crate) struct PGVersionFunction;
impl fmt::Display for PGVersionFunction {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, crate::pg_catalog_func_fullname!("VERSION"))
write!(f, "pg_catalog.VERSION")
}
}
impl Function for PGVersionFunction {
fn name(&self) -> &str {
crate::pg_catalog_func_fullname!("version")
"pg_catalog.version"
}
fn return_type(&self, _: &[DataType]) -> Result<DataType> {