From 2dfcf35fee21b1b3bf96d937d5bcff3e50325708 Mon Sep 17 00:00:00 2001 From: dennis zhuang Date: Tue, 16 Dec 2025 14:56:23 +0800 Subject: [PATCH] feat: support function aliases and add MySQL-compatible aliases (#7410) * feat: support function aliases and add MySQL-compatible aliases Signed-off-by: Dennis Zhuang * fix: get_table_function_source Signed-off-by: Dennis Zhuang * refactor: add function_alias mod Signed-off-by: Dennis Zhuang * fix: license Signed-off-by: Dennis Zhuang --------- Signed-off-by: Dennis Zhuang --- src/query/src/datafusion/planner.rs | 39 +++++++-- .../src/datafusion/planner/function_alias.rs | 86 +++++++++++++++++++ .../common/function/function_alias.result | 72 ++++++++++++++++ .../common/function/function_alias.sql | 36 ++++++++ 4 files changed, 228 insertions(+), 5 deletions(-) create mode 100644 src/query/src/datafusion/planner/function_alias.rs create mode 100644 tests/cases/standalone/common/function/function_alias.result create mode 100644 tests/cases/standalone/common/function/function_alias.sql diff --git a/src/query/src/datafusion/planner.rs b/src/query/src/datafusion/planner.rs index d9c74b9d5a..43e7a04db1 100644 --- a/src/query/src/datafusion/planner.rs +++ b/src/query/src/datafusion/planner.rs @@ -41,6 +41,8 @@ use snafu::{Location, ResultExt}; use crate::error::{CatalogSnafu, Result}; use crate::query_engine::{DefaultPlanDecoder, QueryEngineState}; +mod function_alias; + pub struct DfContextProviderAdapter { engine_state: Arc, session_state: SessionState, @@ -147,7 +149,17 @@ impl ContextProvider for DfContextProviderAdapter { fn get_function_meta(&self, name: &str) -> Option> { self.engine_state.scalar_function(name).map_or_else( - || self.session_state.scalar_functions().get(name).cloned(), + || { + self.session_state + .scalar_functions() + .get(name) + .cloned() + .or_else(|| { + function_alias::resolve_scalar(name).and_then(|name| { + self.session_state.scalar_functions().get(name).cloned() + }) + }) + }, |func| { Some(Arc::new(func.provide(FunctionContext { query_ctx: self.query_ctx.clone(), @@ -159,7 +171,17 @@ impl ContextProvider for DfContextProviderAdapter { fn get_aggregate_meta(&self, name: &str) -> Option> { self.engine_state.aggr_function(name).map_or_else( - || self.session_state.aggregate_functions().get(name).cloned(), + || { + self.session_state + .aggregate_functions() + .get(name) + .cloned() + .or_else(|| { + function_alias::resolve_aggregate(name).and_then(|name| { + self.session_state.aggregate_functions().get(name).cloned() + }) + }) + }, |func| Some(Arc::new(func)), ) } @@ -193,12 +215,14 @@ impl ContextProvider for DfContextProviderAdapter { fn udf_names(&self) -> Vec { let mut names = self.engine_state.scalar_names(); names.extend(self.session_state.scalar_functions().keys().cloned()); + names.extend(function_alias::scalar_alias_names().map(|name| name.to_string())); names } fn udaf_names(&self) -> Vec { let mut names = self.engine_state.aggr_names(); names.extend(self.session_state.aggregate_functions().keys().cloned()); + names.extend(function_alias::aggregate_alias_names().map(|name| name.to_string())); names } @@ -233,9 +257,14 @@ impl ContextProvider for DfContextProviderAdapter { .table_functions() .get(name) .cloned() - .ok_or_else(|| { - DataFusionError::Plan(format!("table function '{name}' not found")) - })?; + .or_else(|| { + function_alias::resolve_scalar(name) + .and_then(|alias| self.session_state.table_functions().get(alias).cloned()) + }); + + let tbl_func = tbl_func.ok_or_else(|| { + DataFusionError::Plan(format!("table function '{name}' not found")) + })?; let provider = tbl_func.create_table_provider(&args)?; Ok(provider_as_source(provider)) diff --git a/src/query/src/datafusion/planner/function_alias.rs b/src/query/src/datafusion/planner/function_alias.rs new file mode 100644 index 0000000000..898ef81e93 --- /dev/null +++ b/src/query/src/datafusion/planner/function_alias.rs @@ -0,0 +1,86 @@ +// Copyright 2023 Greptime Team +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::collections::HashMap; + +use once_cell::sync::Lazy; + +const SCALAR_ALIASES: &[(&str, &str)] = &[ + // SQL compat aliases. + ("ucase", "upper"), + ("lcase", "lower"), + ("ceiling", "ceil"), + ("mid", "substr"), + // MySQL's RAND([seed]) accepts an optional seed argument, while DataFusion's `random()` + // does not. We alias the name for `rand()` compatibility, and `rand(seed)` will error + // due to mismatched arity. + ("rand", "random"), +]; + +const AGGREGATE_ALIASES: &[(&str, &str)] = &[ + // MySQL compat aliases that don't override existing DataFusion aggregate names. + // + // NOTE: We intentionally do NOT alias `stddev` here, because DataFusion defines `stddev` + // as sample standard deviation while MySQL's `STDDEV` is population standard deviation. + ("std", "stddev_pop"), + ("variance", "var_pop"), +]; + +static SCALAR_FUNCTION_ALIAS: Lazy> = + Lazy::new(|| SCALAR_ALIASES.iter().copied().collect()); + +static AGGREGATE_FUNCTION_ALIAS: Lazy> = + Lazy::new(|| AGGREGATE_ALIASES.iter().copied().collect()); + +pub fn resolve_scalar(name: &str) -> Option<&'static str> { + let name = name.to_ascii_lowercase(); + SCALAR_FUNCTION_ALIAS.get(name.as_str()).copied() +} + +pub fn resolve_aggregate(name: &str) -> Option<&'static str> { + let name = name.to_ascii_lowercase(); + AGGREGATE_FUNCTION_ALIAS.get(name.as_str()).copied() +} + +pub fn scalar_alias_names() -> impl Iterator { + SCALAR_ALIASES.iter().map(|(name, _)| *name) +} + +pub fn aggregate_alias_names() -> impl Iterator { + AGGREGATE_ALIASES.iter().map(|(name, _)| *name) +} + +#[cfg(test)] +mod tests { + use super::{resolve_aggregate, resolve_scalar}; + + #[test] + fn resolves_scalar_aliases_case_insensitive() { + assert_eq!(resolve_scalar("ucase"), Some("upper")); + assert_eq!(resolve_scalar("UCASE"), Some("upper")); + assert_eq!(resolve_scalar("lcase"), Some("lower")); + assert_eq!(resolve_scalar("ceiling"), Some("ceil")); + assert_eq!(resolve_scalar("MID"), Some("substr")); + assert_eq!(resolve_scalar("RAND"), Some("random")); + assert_eq!(resolve_scalar("not_a_real_alias"), None); + } + + #[test] + fn resolves_aggregate_aliases_case_insensitive() { + assert_eq!(resolve_aggregate("std"), Some("stddev_pop")); + assert_eq!(resolve_aggregate("variance"), Some("var_pop")); + assert_eq!(resolve_aggregate("STDDEV"), None); + assert_eq!(resolve_aggregate("not_a_real_alias"), None); + } +} diff --git a/tests/cases/standalone/common/function/function_alias.result b/tests/cases/standalone/common/function/function_alias.result new file mode 100644 index 0000000000..fe41c83ccb --- /dev/null +++ b/tests/cases/standalone/common/function/function_alias.result @@ -0,0 +1,72 @@ +-- MySQL-compatible function alias tests +-- ucase -> upper +SELECT + ucase('dataFusion') AS ucase_value, + upper('dataFusion') AS upper_value; + ++-------------+-------------+ +| ucase_value | upper_value | ++-------------+-------------+ +| DATAFUSION | DATAFUSION | ++-------------+-------------+ + +-- lcase -> lower +SELECT + lcase('DataFusion') AS lcase_value, + lower('DataFusion') AS lower_value; + ++-------------+-------------+ +| lcase_value | lower_value | ++-------------+-------------+ +| datafusion | datafusion | ++-------------+-------------+ + +-- ceiling -> ceil +SELECT + ceiling(1.2) AS ceiling_pos, + ceil(1.2) AS ceil_pos, + ceiling(-1.2) AS ceiling_neg, + ceil(-1.2) AS ceil_neg; + ++-------------+----------+-------------+----------+ +| ceiling_pos | ceil_pos | ceiling_neg | ceil_neg | ++-------------+----------+-------------+----------+ +| 2.0 | 2.0 | -1.0 | -1.0 | ++-------------+----------+-------------+----------+ + +-- mid -> substr +SELECT + mid('datafusion', 5, 3) AS mid_value, + substr('datafusion', 5, 3) AS substr_value; + ++-----------+--------------+ +| mid_value | substr_value | ++-----------+--------------+ +| fus | fus | ++-----------+--------------+ + +-- rand -> random +-- NOTE: RAND([seed]) is supported by MySQL, but seed is not supported here. +-- This test only validates that rand() exists and returns values in [0, 1). +SELECT rand() >= 0.0 AND rand() < 1.0 AS rand_in_range; + ++---------------+ +| rand_in_range | ++---------------+ +| true | ++---------------+ + +-- std -> stddev_pop, variance -> var_pop +SELECT + round(std(x), 6) AS std_value, + round(stddev_pop(x), 6) AS stddev_pop_value, + round(variance(x), 6) AS variance_value, + round(var_pop(x), 6) AS var_pop_value +FROM (VALUES (1.0), (2.0), (3.0)) AS t(x); + ++-----------+------------------+----------------+---------------+ +| std_value | stddev_pop_value | variance_value | var_pop_value | ++-----------+------------------+----------------+---------------+ +| 0.816497 | 0.816497 | 0.666667 | 0.666667 | ++-----------+------------------+----------------+---------------+ + diff --git a/tests/cases/standalone/common/function/function_alias.sql b/tests/cases/standalone/common/function/function_alias.sql new file mode 100644 index 0000000000..3582bfe565 --- /dev/null +++ b/tests/cases/standalone/common/function/function_alias.sql @@ -0,0 +1,36 @@ +-- MySQL-compatible function alias tests + +-- ucase -> upper +SELECT + ucase('dataFusion') AS ucase_value, + upper('dataFusion') AS upper_value; + +-- lcase -> lower +SELECT + lcase('DataFusion') AS lcase_value, + lower('DataFusion') AS lower_value; + +-- ceiling -> ceil +SELECT + ceiling(1.2) AS ceiling_pos, + ceil(1.2) AS ceil_pos, + ceiling(-1.2) AS ceiling_neg, + ceil(-1.2) AS ceil_neg; + +-- mid -> substr +SELECT + mid('datafusion', 5, 3) AS mid_value, + substr('datafusion', 5, 3) AS substr_value; + +-- rand -> random +-- NOTE: RAND([seed]) is supported by MySQL, but seed is not supported here. +-- This test only validates that rand() exists and returns values in [0, 1). +SELECT rand() >= 0.0 AND rand() < 1.0 AS rand_in_range; + +-- std -> stddev_pop, variance -> var_pop +SELECT + round(std(x), 6) AS std_value, + round(stddev_pop(x), 6) AS stddev_pop_value, + round(variance(x), 6) AS variance_value, + round(var_pop(x), 6) AS var_pop_value +FROM (VALUES (1.0), (2.0), (3.0)) AS t(x);