feat: support function alias (#6917)

* feat: udf alias

Signed-off-by: luofucong <luofc@foxmail.com>

* trying to fix sqlness

Signed-off-by: luofucong <luofc@foxmail.com>

* x

Signed-off-by: luofucong <luofc@foxmail.com>

---------

Signed-off-by: luofucong <luofc@foxmail.com>
This commit is contained in:
LFC
2025-09-08 16:57:24 +08:00
committed by GitHub
parent c9377e7c5a
commit 47384c7701
8 changed files with 108 additions and 40 deletions

View File

@@ -70,6 +70,10 @@ pub trait Function: fmt::Display + Sync + Send {
/// Evaluate the function, e.g. run/execute the function.
fn eval(&self, ctx: &FunctionContext, columns: &[VectorRef]) -> Result<VectorRef>;
fn aliases(&self) -> &[String] {
&[]
}
}
pub type FunctionRef = Arc<dyn Function>;

View File

@@ -64,7 +64,18 @@ impl FunctionRegistry {
/// Register a scalar function in the registry.
pub fn register_scalar(&self, func: impl Function + 'static) {
self.register(Arc::new(func) as FunctionRef);
let func = Arc::new(func) as FunctionRef;
for alias in func.aliases() {
let func: ScalarFunctionFactory = func.clone().into();
let alias = ScalarFunctionFactory {
name: alias.to_string(),
..func
};
self.register(alias);
}
self.register(func)
}
/// Register an aggregate function in the registry.

View File

@@ -29,7 +29,7 @@ pub(crate) struct IpFunctions;
impl IpFunctions {
pub fn register(registry: &FunctionRegistry) {
// Register IPv4 functions
registry.register_scalar(Ipv4NumToString);
registry.register_scalar(Ipv4NumToString::default());
registry.register_scalar(Ipv4StringToNum);
registry.register_scalar(Ipv4ToCidr);
registry.register_scalar(Ipv4InRange);

View File

@@ -34,9 +34,19 @@ use crate::function::{Function, FunctionContext};
/// For example:
/// - 167772160 (0x0A000000) returns "10.0.0.0"
/// - 3232235521 (0xC0A80001) returns "192.168.0.1"
#[derive(Clone, Debug, Default, Display)]
#[derive(Clone, Debug, Display)]
#[display("{}", self.name())]
pub struct Ipv4NumToString;
pub struct Ipv4NumToString {
aliases: [String; 1],
}
impl Default for Ipv4NumToString {
fn default() -> Self {
Self {
aliases: ["inet_ntoa".to_string()],
}
}
}
impl Function for Ipv4NumToString {
fn name(&self) -> &str {
@@ -85,6 +95,10 @@ impl Function for Ipv4NumToString {
Ok(results.to_vector())
}
fn aliases(&self) -> &[String] {
&self.aliases
}
}
/// Function that converts a string representation of an IPv4 address to a UInt32 number.
@@ -156,7 +170,7 @@ mod tests {
#[test]
fn test_ipv4_num_to_string() {
let func = Ipv4NumToString;
let func = Ipv4NumToString::default();
let ctx = FunctionContext::default();
// Test data
@@ -193,7 +207,7 @@ mod tests {
#[test]
fn test_ipv4_conversions_roundtrip() {
let to_num = Ipv4StringToNum;
let to_string = Ipv4NumToString;
let to_string = Ipv4NumToString::default();
let ctx = FunctionContext::default();
// Test data for string to num to string

View File

@@ -50,6 +50,10 @@ impl ScalarUDFImpl for ScalarUdf {
self.function.name()
}
fn aliases(&self) -> &[String] {
self.function.aliases()
}
fn signature(&self) -> &datafusion_expr::Signature {
&self.signature
}

View File

@@ -68,6 +68,8 @@ impl Function for DatabaseFunction {
}
}
// Though "current_schema" can be aliased to "database", to not cause any breaking changes,
// we are not doing it: not until https://github.com/apache/datafusion/issues/17469 is resolved.
impl Function for CurrentSchemaFunction {
fn name(&self) -> &str {
CURRENT_SCHEMA_FUNCTION_NAME

View File

@@ -32,7 +32,7 @@ CREATE TABLE network_traffic (
`time` TIMESTAMP DEFAULT 0,
source_ip STRING,
dest_ip STRING,
bytes_sent UINT64,
bytes_sent UINT64,
PRIMARY KEY(`id`),
TIME INDEX(`time`)
);
@@ -78,9 +78,9 @@ Affected Rows: 8
-- Test IPv4 string/number conversion functions
-- SQLNESS SORT_RESULT 3 1
SELECT
`id`,
ip_addr,
SELECT
`id`,
ip_addr,
ip_numeric,
ipv4_string_to_num(ip_addr) AS computed_numeric,
ipv4_num_to_string(ip_numeric) AS computed_addr
@@ -99,9 +99,32 @@ FROM ip_v4_data;
| 8 | 0.0.0.0 | 0 | 0 | 0.0.0.0 |
+----+-----------------+------------+------------------+-----------------+
-- Test IPv4 string/number conversion functions, by the function alias
-- SQLNESS SORT_RESULT 3 1
SELECT
`id`,
ip_addr,
ip_numeric,
ipv4_string_to_num(ip_addr) AS computed_numeric,
inet_ntoa(ip_numeric) AS computed_addr
FROM ip_v4_data;
+----+-----------------+------------+------------------+-----------------+
| id | ip_addr | ip_numeric | computed_numeric | computed_addr |
+----+-----------------+------------+------------------+-----------------+
| 1 | 192.168.1.1 | 3232235777 | 3232235777 | 192.168.1.1 |
| 2 | 10.0.0.1 | 167772161 | 167772161 | 10.0.0.1 |
| 3 | 172.16.0.1 | 2886729729 | 2886729729 | 172.16.0.1 |
| 4 | 127.0.0.1 | 2130706433 | 2130706433 | 127.0.0.1 |
| 5 | 8.8.8.8 | 134744072 | 134744072 | 8.8.8.8 |
| 6 | 192.168.0.1 | 3232235521 | 3232235521 | 192.168.0.1 |
| 7 | 255.255.255.255 | 4294967295 | 4294967295 | 255.255.255.255 |
| 8 | 0.0.0.0 | 0 | 0 | 0.0.0.0 |
+----+-----------------+------------+------------------+-----------------+
-- Test IPv4 CIDR functions
-- SQLNESS SORT_RESULT 3 1
SELECT
SELECT
`id`,
ip_addr,
subnet_mask,
@@ -126,7 +149,7 @@ FROM ip_v4_data;
-- Test IPv4 range checks
-- SQLNESS SORT_RESULT 3 1
-- Only get IPv4 records
SELECT
SELECT
t.`id`,
t.source_ip,
t.dest_ip,
@@ -159,9 +182,9 @@ WHERE t.source_ip NOT LIKE '%:%';
-- Test IPv6 string/hex conversion functions
-- SQLNESS SORT_RESULT 3 1
SELECT
`id`,
ip_addr,
SELECT
`id`,
ip_addr,
ip_hex,
ipv6_num_to_string(ip_hex) AS computed_addr
FROM ip_v6_data;
@@ -179,7 +202,7 @@ FROM ip_v6_data;
-- Test IPv6 CIDR functions
-- SQLNESS SORT_RESULT 3 1
SELECT
SELECT
`id`,
ip_addr,
subnet_mask,
@@ -202,7 +225,7 @@ FROM ip_v6_data;
-- Test IPv6 range checks
-- SQLNESS SORT_RESULT 3 1
-- Only get IPv6 records
SELECT
SELECT
t.`id`,
t.source_ip,
t.dest_ip,
@@ -227,14 +250,14 @@ WHERE t.source_ip LIKE '%:%';
-- Combined IPv4/IPv6 example - Security analysis
-- Find all traffic from the same network to specific IPs
-- SQLNESS SORT_RESULT 3 1
SELECT
SELECT
source_ip,
dest_ip,
bytes_sent,
CASE
WHEN source_ip LIKE '%:%' THEN
CASE
WHEN source_ip LIKE '%:%' THEN
ipv6_to_cidr(source_ip, arrow_cast(64, 'UInt8'))
ELSE
ELSE
ipv4_to_cidr(source_ip, arrow_cast(24, 'UInt8'))
END AS source_network,
CASE
@@ -261,7 +284,7 @@ ORDER BY bytes_sent DESC;
-- Subnet analysis - IPv4
-- SQLNESS SORT_RESULT 3 1
SELECT
SELECT
ipv4_to_cidr(source_ip, arrow_cast(24,'UInt8')) AS subnet,
COUNT(*) AS device_count,
SUM(bytes_sent) AS total_bytes
@@ -280,7 +303,7 @@ ORDER BY total_bytes DESC;
-- Subnet analysis - IPv6
-- SQLNESS SORT_RESULT 3 1
SELECT
SELECT
ipv6_to_cidr(source_ip, arrow_cast(48,'UInt8')) AS subnet,
COUNT(*) AS device_count,
SUM(bytes_sent) AS total_bytes

View File

@@ -28,7 +28,7 @@ CREATE TABLE network_traffic (
`time` TIMESTAMP DEFAULT 0,
source_ip STRING,
dest_ip STRING,
bytes_sent UINT64,
bytes_sent UINT64,
PRIMARY KEY(`id`),
TIME INDEX(`time`)
);
@@ -66,17 +66,27 @@ INSERT INTO network_traffic (`id`, source_ip, dest_ip, bytes_sent) VALUES
-- Test IPv4 string/number conversion functions
-- SQLNESS SORT_RESULT 3 1
SELECT
`id`,
ip_addr,
SELECT
`id`,
ip_addr,
ip_numeric,
ipv4_string_to_num(ip_addr) AS computed_numeric,
ipv4_num_to_string(ip_numeric) AS computed_addr
FROM ip_v4_data;
-- Test IPv4 string/number conversion functions, by the function alias
-- SQLNESS SORT_RESULT 3 1
SELECT
`id`,
ip_addr,
ip_numeric,
ipv4_string_to_num(ip_addr) AS computed_numeric,
inet_ntoa(ip_numeric) AS computed_addr
FROM ip_v4_data;
-- Test IPv4 CIDR functions
-- SQLNESS SORT_RESULT 3 1
SELECT
SELECT
`id`,
ip_addr,
subnet_mask,
@@ -87,7 +97,7 @@ FROM ip_v4_data;
-- Test IPv4 range checks
-- SQLNESS SORT_RESULT 3 1
SELECT
SELECT
t.`id`,
t.source_ip,
t.dest_ip,
@@ -102,16 +112,16 @@ WHERE t.source_ip NOT LIKE '%:%';
-- Test IPv6 string/hex conversion functions
-- SQLNESS SORT_RESULT 3 1
SELECT
`id`,
ip_addr,
SELECT
`id`,
ip_addr,
ip_hex,
ipv6_num_to_string(ip_hex) AS computed_addr
FROM ip_v6_data;
-- Test IPv6 CIDR functions
-- SQLNESS SORT_RESULT 3 1
SELECT
SELECT
`id`,
ip_addr,
subnet_mask,
@@ -122,7 +132,7 @@ FROM ip_v6_data;
-- Test IPv6 range checks
-- SQLNESS SORT_RESULT 3 1
SELECT
SELECT
t.`id`,
t.source_ip,
t.dest_ip,
@@ -138,14 +148,14 @@ WHERE t.source_ip LIKE '%:%';
-- Combined IPv4/IPv6 example - Security analysis
-- Find all traffic from the same network to specific IPs
-- SQLNESS SORT_RESULT 3 1
SELECT
SELECT
source_ip,
dest_ip,
bytes_sent,
CASE
WHEN source_ip LIKE '%:%' THEN
CASE
WHEN source_ip LIKE '%:%' THEN
ipv6_to_cidr(source_ip, arrow_cast(64, 'UInt8'))
ELSE
ELSE
ipv4_to_cidr(source_ip, arrow_cast(24, 'UInt8'))
END AS source_network,
CASE
@@ -159,7 +169,7 @@ ORDER BY bytes_sent DESC;
-- Subnet analysis - IPv4
-- SQLNESS SORT_RESULT 3 1
SELECT
SELECT
ipv4_to_cidr(source_ip, arrow_cast(24,'UInt8')) AS subnet,
COUNT(*) AS device_count,
SUM(bytes_sent) AS total_bytes
@@ -170,7 +180,7 @@ ORDER BY total_bytes DESC;
-- Subnet analysis - IPv6
-- SQLNESS SORT_RESULT 3 1
SELECT
SELECT
ipv6_to_cidr(source_ip, arrow_cast(48,'UInt8')) AS subnet,
COUNT(*) AS device_count,
SUM(bytes_sent) AS total_bytes