mirror of
https://github.com/GreptimeTeam/greptimedb.git
synced 2025-12-22 22:20:02 +00:00
feat: support function aliases and add MySQL-compatible aliases (#7410)
* feat: support function aliases and add MySQL-compatible aliases Signed-off-by: Dennis Zhuang <killme2008@gmail.com> * fix: get_table_function_source Signed-off-by: Dennis Zhuang <killme2008@gmail.com> * refactor: add function_alias mod Signed-off-by: Dennis Zhuang <killme2008@gmail.com> * fix: license Signed-off-by: Dennis Zhuang <killme2008@gmail.com> --------- Signed-off-by: Dennis Zhuang <killme2008@gmail.com>
This commit is contained in:
@@ -41,6 +41,8 @@ use snafu::{Location, ResultExt};
|
||||
use crate::error::{CatalogSnafu, Result};
|
||||
use crate::query_engine::{DefaultPlanDecoder, QueryEngineState};
|
||||
|
||||
mod function_alias;
|
||||
|
||||
pub struct DfContextProviderAdapter {
|
||||
engine_state: Arc<QueryEngineState>,
|
||||
session_state: SessionState,
|
||||
@@ -147,7 +149,17 @@ impl ContextProvider for DfContextProviderAdapter {
|
||||
|
||||
fn get_function_meta(&self, name: &str) -> Option<Arc<ScalarUDF>> {
|
||||
self.engine_state.scalar_function(name).map_or_else(
|
||||
|| self.session_state.scalar_functions().get(name).cloned(),
|
||||
|| {
|
||||
self.session_state
|
||||
.scalar_functions()
|
||||
.get(name)
|
||||
.cloned()
|
||||
.or_else(|| {
|
||||
function_alias::resolve_scalar(name).and_then(|name| {
|
||||
self.session_state.scalar_functions().get(name).cloned()
|
||||
})
|
||||
})
|
||||
},
|
||||
|func| {
|
||||
Some(Arc::new(func.provide(FunctionContext {
|
||||
query_ctx: self.query_ctx.clone(),
|
||||
@@ -159,7 +171,17 @@ impl ContextProvider for DfContextProviderAdapter {
|
||||
|
||||
fn get_aggregate_meta(&self, name: &str) -> Option<Arc<AggregateUDF>> {
|
||||
self.engine_state.aggr_function(name).map_or_else(
|
||||
|| self.session_state.aggregate_functions().get(name).cloned(),
|
||||
|| {
|
||||
self.session_state
|
||||
.aggregate_functions()
|
||||
.get(name)
|
||||
.cloned()
|
||||
.or_else(|| {
|
||||
function_alias::resolve_aggregate(name).and_then(|name| {
|
||||
self.session_state.aggregate_functions().get(name).cloned()
|
||||
})
|
||||
})
|
||||
},
|
||||
|func| Some(Arc::new(func)),
|
||||
)
|
||||
}
|
||||
@@ -193,12 +215,14 @@ impl ContextProvider for DfContextProviderAdapter {
|
||||
fn udf_names(&self) -> Vec<String> {
|
||||
let mut names = self.engine_state.scalar_names();
|
||||
names.extend(self.session_state.scalar_functions().keys().cloned());
|
||||
names.extend(function_alias::scalar_alias_names().map(|name| name.to_string()));
|
||||
names
|
||||
}
|
||||
|
||||
fn udaf_names(&self) -> Vec<String> {
|
||||
let mut names = self.engine_state.aggr_names();
|
||||
names.extend(self.session_state.aggregate_functions().keys().cloned());
|
||||
names.extend(function_alias::aggregate_alias_names().map(|name| name.to_string()));
|
||||
names
|
||||
}
|
||||
|
||||
@@ -233,9 +257,14 @@ impl ContextProvider for DfContextProviderAdapter {
|
||||
.table_functions()
|
||||
.get(name)
|
||||
.cloned()
|
||||
.ok_or_else(|| {
|
||||
DataFusionError::Plan(format!("table function '{name}' not found"))
|
||||
})?;
|
||||
.or_else(|| {
|
||||
function_alias::resolve_scalar(name)
|
||||
.and_then(|alias| self.session_state.table_functions().get(alias).cloned())
|
||||
});
|
||||
|
||||
let tbl_func = tbl_func.ok_or_else(|| {
|
||||
DataFusionError::Plan(format!("table function '{name}' not found"))
|
||||
})?;
|
||||
let provider = tbl_func.create_table_provider(&args)?;
|
||||
|
||||
Ok(provider_as_source(provider))
|
||||
|
||||
86
src/query/src/datafusion/planner/function_alias.rs
Normal file
86
src/query/src/datafusion/planner/function_alias.rs
Normal file
@@ -0,0 +1,86 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::collections::HashMap;
|
||||
|
||||
use once_cell::sync::Lazy;
|
||||
|
||||
const SCALAR_ALIASES: &[(&str, &str)] = &[
|
||||
// SQL compat aliases.
|
||||
("ucase", "upper"),
|
||||
("lcase", "lower"),
|
||||
("ceiling", "ceil"),
|
||||
("mid", "substr"),
|
||||
// MySQL's RAND([seed]) accepts an optional seed argument, while DataFusion's `random()`
|
||||
// does not. We alias the name for `rand()` compatibility, and `rand(seed)` will error
|
||||
// due to mismatched arity.
|
||||
("rand", "random"),
|
||||
];
|
||||
|
||||
const AGGREGATE_ALIASES: &[(&str, &str)] = &[
|
||||
// MySQL compat aliases that don't override existing DataFusion aggregate names.
|
||||
//
|
||||
// NOTE: We intentionally do NOT alias `stddev` here, because DataFusion defines `stddev`
|
||||
// as sample standard deviation while MySQL's `STDDEV` is population standard deviation.
|
||||
("std", "stddev_pop"),
|
||||
("variance", "var_pop"),
|
||||
];
|
||||
|
||||
static SCALAR_FUNCTION_ALIAS: Lazy<HashMap<&'static str, &'static str>> =
|
||||
Lazy::new(|| SCALAR_ALIASES.iter().copied().collect());
|
||||
|
||||
static AGGREGATE_FUNCTION_ALIAS: Lazy<HashMap<&'static str, &'static str>> =
|
||||
Lazy::new(|| AGGREGATE_ALIASES.iter().copied().collect());
|
||||
|
||||
pub fn resolve_scalar(name: &str) -> Option<&'static str> {
|
||||
let name = name.to_ascii_lowercase();
|
||||
SCALAR_FUNCTION_ALIAS.get(name.as_str()).copied()
|
||||
}
|
||||
|
||||
pub fn resolve_aggregate(name: &str) -> Option<&'static str> {
|
||||
let name = name.to_ascii_lowercase();
|
||||
AGGREGATE_FUNCTION_ALIAS.get(name.as_str()).copied()
|
||||
}
|
||||
|
||||
pub fn scalar_alias_names() -> impl Iterator<Item = &'static str> {
|
||||
SCALAR_ALIASES.iter().map(|(name, _)| *name)
|
||||
}
|
||||
|
||||
pub fn aggregate_alias_names() -> impl Iterator<Item = &'static str> {
|
||||
AGGREGATE_ALIASES.iter().map(|(name, _)| *name)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::{resolve_aggregate, resolve_scalar};
|
||||
|
||||
#[test]
|
||||
fn resolves_scalar_aliases_case_insensitive() {
|
||||
assert_eq!(resolve_scalar("ucase"), Some("upper"));
|
||||
assert_eq!(resolve_scalar("UCASE"), Some("upper"));
|
||||
assert_eq!(resolve_scalar("lcase"), Some("lower"));
|
||||
assert_eq!(resolve_scalar("ceiling"), Some("ceil"));
|
||||
assert_eq!(resolve_scalar("MID"), Some("substr"));
|
||||
assert_eq!(resolve_scalar("RAND"), Some("random"));
|
||||
assert_eq!(resolve_scalar("not_a_real_alias"), None);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn resolves_aggregate_aliases_case_insensitive() {
|
||||
assert_eq!(resolve_aggregate("std"), Some("stddev_pop"));
|
||||
assert_eq!(resolve_aggregate("variance"), Some("var_pop"));
|
||||
assert_eq!(resolve_aggregate("STDDEV"), None);
|
||||
assert_eq!(resolve_aggregate("not_a_real_alias"), None);
|
||||
}
|
||||
}
|
||||
72
tests/cases/standalone/common/function/function_alias.result
Normal file
72
tests/cases/standalone/common/function/function_alias.result
Normal file
@@ -0,0 +1,72 @@
|
||||
-- MySQL-compatible function alias tests
|
||||
-- ucase -> upper
|
||||
SELECT
|
||||
ucase('dataFusion') AS ucase_value,
|
||||
upper('dataFusion') AS upper_value;
|
||||
|
||||
+-------------+-------------+
|
||||
| ucase_value | upper_value |
|
||||
+-------------+-------------+
|
||||
| DATAFUSION | DATAFUSION |
|
||||
+-------------+-------------+
|
||||
|
||||
-- lcase -> lower
|
||||
SELECT
|
||||
lcase('DataFusion') AS lcase_value,
|
||||
lower('DataFusion') AS lower_value;
|
||||
|
||||
+-------------+-------------+
|
||||
| lcase_value | lower_value |
|
||||
+-------------+-------------+
|
||||
| datafusion | datafusion |
|
||||
+-------------+-------------+
|
||||
|
||||
-- ceiling -> ceil
|
||||
SELECT
|
||||
ceiling(1.2) AS ceiling_pos,
|
||||
ceil(1.2) AS ceil_pos,
|
||||
ceiling(-1.2) AS ceiling_neg,
|
||||
ceil(-1.2) AS ceil_neg;
|
||||
|
||||
+-------------+----------+-------------+----------+
|
||||
| ceiling_pos | ceil_pos | ceiling_neg | ceil_neg |
|
||||
+-------------+----------+-------------+----------+
|
||||
| 2.0 | 2.0 | -1.0 | -1.0 |
|
||||
+-------------+----------+-------------+----------+
|
||||
|
||||
-- mid -> substr
|
||||
SELECT
|
||||
mid('datafusion', 5, 3) AS mid_value,
|
||||
substr('datafusion', 5, 3) AS substr_value;
|
||||
|
||||
+-----------+--------------+
|
||||
| mid_value | substr_value |
|
||||
+-----------+--------------+
|
||||
| fus | fus |
|
||||
+-----------+--------------+
|
||||
|
||||
-- rand -> random
|
||||
-- NOTE: RAND([seed]) is supported by MySQL, but seed is not supported here.
|
||||
-- This test only validates that rand() exists and returns values in [0, 1).
|
||||
SELECT rand() >= 0.0 AND rand() < 1.0 AS rand_in_range;
|
||||
|
||||
+---------------+
|
||||
| rand_in_range |
|
||||
+---------------+
|
||||
| true |
|
||||
+---------------+
|
||||
|
||||
-- std -> stddev_pop, variance -> var_pop
|
||||
SELECT
|
||||
round(std(x), 6) AS std_value,
|
||||
round(stddev_pop(x), 6) AS stddev_pop_value,
|
||||
round(variance(x), 6) AS variance_value,
|
||||
round(var_pop(x), 6) AS var_pop_value
|
||||
FROM (VALUES (1.0), (2.0), (3.0)) AS t(x);
|
||||
|
||||
+-----------+------------------+----------------+---------------+
|
||||
| std_value | stddev_pop_value | variance_value | var_pop_value |
|
||||
+-----------+------------------+----------------+---------------+
|
||||
| 0.816497 | 0.816497 | 0.666667 | 0.666667 |
|
||||
+-----------+------------------+----------------+---------------+
|
||||
|
||||
36
tests/cases/standalone/common/function/function_alias.sql
Normal file
36
tests/cases/standalone/common/function/function_alias.sql
Normal file
@@ -0,0 +1,36 @@
|
||||
-- MySQL-compatible function alias tests
|
||||
|
||||
-- ucase -> upper
|
||||
SELECT
|
||||
ucase('dataFusion') AS ucase_value,
|
||||
upper('dataFusion') AS upper_value;
|
||||
|
||||
-- lcase -> lower
|
||||
SELECT
|
||||
lcase('DataFusion') AS lcase_value,
|
||||
lower('DataFusion') AS lower_value;
|
||||
|
||||
-- ceiling -> ceil
|
||||
SELECT
|
||||
ceiling(1.2) AS ceiling_pos,
|
||||
ceil(1.2) AS ceil_pos,
|
||||
ceiling(-1.2) AS ceiling_neg,
|
||||
ceil(-1.2) AS ceil_neg;
|
||||
|
||||
-- mid -> substr
|
||||
SELECT
|
||||
mid('datafusion', 5, 3) AS mid_value,
|
||||
substr('datafusion', 5, 3) AS substr_value;
|
||||
|
||||
-- rand -> random
|
||||
-- NOTE: RAND([seed]) is supported by MySQL, but seed is not supported here.
|
||||
-- This test only validates that rand() exists and returns values in [0, 1).
|
||||
SELECT rand() >= 0.0 AND rand() < 1.0 AS rand_in_range;
|
||||
|
||||
-- std -> stddev_pop, variance -> var_pop
|
||||
SELECT
|
||||
round(std(x), 6) AS std_value,
|
||||
round(stddev_pop(x), 6) AS stddev_pop_value,
|
||||
round(variance(x), 6) AS variance_value,
|
||||
round(var_pop(x), 6) AS var_pop_value
|
||||
FROM (VALUES (1.0), (2.0), (3.0)) AS t(x);
|
||||
Reference in New Issue
Block a user