From 1255c1fc9e6f2875ac6b4c1c043db5d4c6479906 Mon Sep 17 00:00:00 2001 From: tison Date: Tue, 12 Mar 2024 09:46:19 +0800 Subject: [PATCH] feat: to_timezone function (#3470) * feat: to_timezone function Signed-off-by: tison * impl Function for ToTimezoneFunction Signed-off-by: tison * add test Signed-off-by: tison * Add original authors Co-authored-by: parkma99 Co-authored-by: Yingwen * fixup Signed-off-by: tison * address comments Signed-off-by: tison * add issue link Signed-off-by: tison * code refactor Signed-off-by: tison * further tidy Signed-off-by: tison --------- Signed-off-by: tison Co-authored-by: parkma99 Co-authored-by: Yingwen --- .editorconfig | 10 + src/common/function/src/scalars/timestamp.rs | 3 + .../src/scalars/timestamp/to_timezone.rs | 260 ++++++++++++++++++ .../standalone/common/function/time.result | 36 +++ .../cases/standalone/common/function/time.sql | 8 +- 5 files changed, 316 insertions(+), 1 deletion(-) create mode 100644 .editorconfig create mode 100644 src/common/function/src/scalars/timestamp/to_timezone.rs diff --git a/.editorconfig b/.editorconfig new file mode 100644 index 0000000000..4de4965659 --- /dev/null +++ b/.editorconfig @@ -0,0 +1,10 @@ +root = true + +[*] +end_of_line = lf +indent_style = space +insert_final_newline = true +trim_trailing_whitespace = true + +[{Makefile,**.mk}] +indent_style = tab diff --git a/src/common/function/src/scalars/timestamp.rs b/src/common/function/src/scalars/timestamp.rs index fecf884ce0..fbee427921 100644 --- a/src/common/function/src/scalars/timestamp.rs +++ b/src/common/function/src/scalars/timestamp.rs @@ -14,9 +14,11 @@ use std::sync::Arc; mod greatest; +mod to_timezone; mod to_unixtime; use greatest::GreatestFunction; +use to_timezone::ToTimezoneFunction; use to_unixtime::ToUnixtimeFunction; use crate::function_registry::FunctionRegistry; @@ -25,6 +27,7 @@ pub(crate) struct TimestampFunction; impl TimestampFunction { pub fn register(registry: &FunctionRegistry) { + registry.register(Arc::new(ToTimezoneFunction)); registry.register(Arc::new(ToUnixtimeFunction)); registry.register(Arc::new(GreatestFunction)); } diff --git a/src/common/function/src/scalars/timestamp/to_timezone.rs b/src/common/function/src/scalars/timestamp/to_timezone.rs new file mode 100644 index 0000000000..1160267dbd --- /dev/null +++ b/src/common/function/src/scalars/timestamp/to_timezone.rs @@ -0,0 +1,260 @@ +// Copyright 2023 Greptime Team +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::fmt; +use std::sync::Arc; + +use common_query::error::{InvalidFuncArgsSnafu, Result, UnsupportedInputDataTypeSnafu}; +use common_query::prelude::Signature; +use common_time::{Timestamp, Timezone}; +use datatypes::data_type::ConcreteDataType; +use datatypes::prelude::VectorRef; +use datatypes::types::TimestampType; +use datatypes::value::Value; +use datatypes::vectors::{ + StringVector, TimestampMicrosecondVector, TimestampMillisecondVector, + TimestampNanosecondVector, TimestampSecondVector, Vector, +}; +use snafu::{ensure, OptionExt}; + +use crate::function::{Function, FunctionContext}; +use crate::helper; + +#[derive(Clone, Debug, Default)] +pub struct ToTimezoneFunction; + +const NAME: &str = "to_timezone"; + +fn convert_to_timezone(arg: &str) -> Option { + Timezone::from_tz_string(arg).ok() +} + +fn convert_to_timestamp(arg: &Value) -> Option { + match arg { + Value::Timestamp(ts) => Some(*ts), + _ => None, + } +} + +impl fmt::Display for ToTimezoneFunction { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "TO_TIMEZONE") + } +} + +impl Function for ToTimezoneFunction { + fn name(&self) -> &str { + NAME + } + + fn return_type(&self, input_types: &[ConcreteDataType]) -> Result { + // type checked by signature - MUST BE timestamp + Ok(input_types[0].clone()) + } + + fn signature(&self) -> Signature { + helper::one_of_sigs2( + vec![ + ConcreteDataType::timestamp_second_datatype(), + ConcreteDataType::timestamp_millisecond_datatype(), + ConcreteDataType::timestamp_microsecond_datatype(), + ConcreteDataType::timestamp_nanosecond_datatype(), + ], + vec![ConcreteDataType::string_datatype()], + ) + } + + fn eval(&self, _ctx: FunctionContext, columns: &[VectorRef]) -> Result { + ensure!( + columns.len() == 2, + InvalidFuncArgsSnafu { + err_msg: format!( + "The length of the args is not correct, expect exactly 2, have: {}", + columns.len() + ), + } + ); + + // TODO: maybe support epoch timestamp? https://github.com/GreptimeTeam/greptimedb/issues/3477 + let ts = columns[0].data_type().as_timestamp().with_context(|| { + UnsupportedInputDataTypeSnafu { + function: NAME, + datatypes: columns.iter().map(|c| c.data_type()).collect::>(), + } + })?; + let array = columns[0].to_arrow_array(); + let times = match ts { + TimestampType::Second(_) => { + let vector = TimestampSecondVector::try_from_arrow_array(array).unwrap(); + (0..vector.len()) + .map(|i| convert_to_timestamp(&vector.get(i))) + .collect::>() + } + TimestampType::Millisecond(_) => { + let vector = TimestampMillisecondVector::try_from_arrow_array(array).unwrap(); + (0..vector.len()) + .map(|i| convert_to_timestamp(&vector.get(i))) + .collect::>() + } + TimestampType::Microsecond(_) => { + let vector = TimestampMicrosecondVector::try_from_arrow_array(array).unwrap(); + (0..vector.len()) + .map(|i| convert_to_timestamp(&vector.get(i))) + .collect::>() + } + TimestampType::Nanosecond(_) => { + let vector = TimestampNanosecondVector::try_from_arrow_array(array).unwrap(); + (0..vector.len()) + .map(|i| convert_to_timestamp(&vector.get(i))) + .collect::>() + } + }; + + let tzs = { + let array = columns[1].to_arrow_array(); + let vector = StringVector::try_from_arrow_array(&array) + .ok() + .with_context(|| UnsupportedInputDataTypeSnafu { + function: NAME, + datatypes: columns.iter().map(|c| c.data_type()).collect::>(), + })?; + (0..vector.len()) + .map(|i| convert_to_timezone(&vector.get(i).to_string())) + .collect::>() + }; + + let result = times + .iter() + .zip(tzs.iter()) + .map(|(time, tz)| match (time, tz) { + (Some(time), _) => Some(time.to_timezone_aware_string(tz.as_ref())), + _ => None, + }) + .collect::>>(); + Ok(Arc::new(StringVector::from(result))) + } +} + +#[cfg(test)] +mod tests { + + use datatypes::scalars::ScalarVector; + use datatypes::timestamp::{ + TimestampMicrosecond, TimestampMillisecond, TimestampNanosecond, TimestampSecond, + }; + use datatypes::vectors::StringVector; + + use super::*; + + #[test] + fn test_timestamp_to_timezone() { + let f = ToTimezoneFunction; + assert_eq!("to_timezone", f.name()); + + let results = vec![ + Some("1969-12-31 19:00:01"), + None, + Some("1970-01-01 03:00:01"), + None, + ]; + let times: Vec> = vec![ + Some(TimestampSecond::new(1)), + None, + Some(TimestampSecond::new(1)), + None, + ]; + let ts_vector: TimestampSecondVector = + TimestampSecondVector::from_owned_iterator(times.into_iter()); + let tzs = vec![Some("America/New_York"), None, Some("Europe/Moscow"), None]; + let args: Vec = vec![ + Arc::new(ts_vector), + Arc::new(StringVector::from(tzs.clone())), + ]; + let vector = f.eval(FunctionContext::default(), &args).unwrap(); + assert_eq!(4, vector.len()); + let expect_times: VectorRef = Arc::new(StringVector::from(results)); + assert_eq!(expect_times, vector); + + let results = vec![ + Some("1969-12-31 19:00:00.001"), + None, + Some("1970-01-01 03:00:00.001"), + None, + ]; + let times: Vec> = vec![ + Some(TimestampMillisecond::new(1)), + None, + Some(TimestampMillisecond::new(1)), + None, + ]; + let ts_vector: TimestampMillisecondVector = + TimestampMillisecondVector::from_owned_iterator(times.into_iter()); + let args: Vec = vec![ + Arc::new(ts_vector), + Arc::new(StringVector::from(tzs.clone())), + ]; + let vector = f.eval(FunctionContext::default(), &args).unwrap(); + assert_eq!(4, vector.len()); + let expect_times: VectorRef = Arc::new(StringVector::from(results)); + assert_eq!(expect_times, vector); + + let results = vec![ + Some("1969-12-31 19:00:00.000001"), + None, + Some("1970-01-01 03:00:00.000001"), + None, + ]; + let times: Vec> = vec![ + Some(TimestampMicrosecond::new(1)), + None, + Some(TimestampMicrosecond::new(1)), + None, + ]; + let ts_vector: TimestampMicrosecondVector = + TimestampMicrosecondVector::from_owned_iterator(times.into_iter()); + + let args: Vec = vec![ + Arc::new(ts_vector), + Arc::new(StringVector::from(tzs.clone())), + ]; + let vector = f.eval(FunctionContext::default(), &args).unwrap(); + assert_eq!(4, vector.len()); + let expect_times: VectorRef = Arc::new(StringVector::from(results)); + assert_eq!(expect_times, vector); + + let results = vec![ + Some("1969-12-31 19:00:00.000000001"), + None, + Some("1970-01-01 03:00:00.000000001"), + None, + ]; + let times: Vec> = vec![ + Some(TimestampNanosecond::new(1)), + None, + Some(TimestampNanosecond::new(1)), + None, + ]; + let ts_vector: TimestampNanosecondVector = + TimestampNanosecondVector::from_owned_iterator(times.into_iter()); + + let args: Vec = vec![ + Arc::new(ts_vector), + Arc::new(StringVector::from(tzs.clone())), + ]; + let vector = f.eval(FunctionContext::default(), &args).unwrap(); + assert_eq!(4, vector.len()); + let expect_times: VectorRef = Arc::new(StringVector::from(results)); + assert_eq!(expect_times, vector); + } +} diff --git a/tests/cases/standalone/common/function/time.result b/tests/cases/standalone/common/function/time.result index 123b6a3f2f..7c6815682c 100644 --- a/tests/cases/standalone/common/function/time.result +++ b/tests/cases/standalone/common/function/time.result @@ -20,3 +20,39 @@ select GREATEST('2000-02-11'::Date, '2020-12-30'::Date); | 2020-12-30 | +-------------------------------------------------+ +select to_timezone('2022-09-20T14:16:43.012345+08:00', 'Europe/Berlin'); + ++-----------------------------------------------------------------------------+ +| to_timezone(Utf8("2022-09-20T14:16:43.012345+08:00"),Utf8("Europe/Berlin")) | ++-----------------------------------------------------------------------------+ +| 2022-09-20 08:16:43.012345 | ++-----------------------------------------------------------------------------+ + +select to_timezone('2022-09-20T14:16:43.012345+08:00'::Timestamp, 'Europe/Berlin'); + ++-----------------------------------------------------------------------------+ +| to_timezone(Utf8("2022-09-20T14:16:43.012345+08:00"),Utf8("Europe/Berlin")) | ++-----------------------------------------------------------------------------+ +| 2022-09-20 08:16:43.012 | ++-----------------------------------------------------------------------------+ + +select to_timezone('2024-03-29T14:16:43.012345Z', 'Asia/Shanghai'); + ++------------------------------------------------------------------------+ +| to_timezone(Utf8("2024-03-29T14:16:43.012345Z"),Utf8("Asia/Shanghai")) | ++------------------------------------------------------------------------+ +| 2024-03-29 22:16:43.012345 | ++------------------------------------------------------------------------+ + +select to_timezone('2024-03-29T14:16:43.012345Z'::Timestamp, 'Asia/Shanghai'); + ++------------------------------------------------------------------------+ +| to_timezone(Utf8("2024-03-29T14:16:43.012345Z"),Utf8("Asia/Shanghai")) | ++------------------------------------------------------------------------+ +| 2024-03-29 22:16:43.012 | ++------------------------------------------------------------------------+ + +select to_timezone(1709992225, 'Asia/Shanghai'); + +Error: 3001(EngineExecuteQuery), DataFusion error: Error during planning: Coercion from [Int64, Utf8] to the signature OneOf([Exact([Timestamp(Second, None), Utf8]), Exact([Timestamp(Millisecond, None), Utf8]), Exact([Timestamp(Microsecond, None), Utf8]), Exact([Timestamp(Nanosecond, None), Utf8])]) failed. + diff --git a/tests/cases/standalone/common/function/time.sql b/tests/cases/standalone/common/function/time.sql index 46d5c2347f..a8e59132a5 100644 --- a/tests/cases/standalone/common/function/time.sql +++ b/tests/cases/standalone/common/function/time.sql @@ -3,5 +3,11 @@ select current_time(); select GREATEST('1999-01-30', '2023-03-01'); - select GREATEST('2000-02-11'::Date, '2020-12-30'::Date); + +select to_timezone('2022-09-20T14:16:43.012345+08:00', 'Europe/Berlin'); +select to_timezone('2022-09-20T14:16:43.012345+08:00'::Timestamp, 'Europe/Berlin'); +select to_timezone('2024-03-29T14:16:43.012345Z', 'Asia/Shanghai'); +select to_timezone('2024-03-29T14:16:43.012345Z'::Timestamp, 'Asia/Shanghai'); + +select to_timezone(1709992225, 'Asia/Shanghai');