From c58217ccec7831ae777dce7ccaa2c1c187f7e3ef Mon Sep 17 00:00:00 2001 From: "Lei, HUANG" <6406592+v0y4g3r@users.noreply.github.com> Date: Thu, 17 Apr 2025 11:58:36 +0800 Subject: [PATCH] fix: support duration to interval conversion in PostgreSQL protocol (#5913) * fix/pg-timestamp-diff: ### Add Support for `Duration` Type in PostgreSQL Encoding - **Enhanced `encode_value` Functionality**: Updated `src/servers/src/postgres/types.rs` to support encoding of `Value::Duration` using `PgInterval`. - **Implemented `Duration` Conversion**: Added conversion logic from `Duration` to `PgInterval` in `src/servers/src/postgres/types/interval.rs`. - **Added Unit Tests**: Introduced tests for `Duration` to `PgInterval` conversion in `src/servers/src/postgres/types/interval.rs`. - **Updated SQL Test Cases**: Modified `tests/cases/standalone/common/types/timestamp/timestamp.sql` and `timestamp.result` to include tests for timestamp subtraction using PostgreSQL protocol. * fix: overflow * fix/pg-timestamp-diff: Update `timestamp.sql` to ensure newline consistency - Modified `timestamp.sql` to add a newline at the end of the file for consistency. * fix/pg-timestamp-diff: ### Add Documentation for Month Approximation in Interval Calculation - **File Modified**: `src/servers/src/postgres/types/interval.rs` - **Key Change**: Added a comment explaining the approximation of one month as 30.44 days in the interval calculations. --- src/servers/src/error.rs | 5 + src/servers/src/postgres/types.rs | 27 ++-- src/servers/src/postgres/types/interval.rs | 147 +++++++++++++++++- .../common/types/timestamp/timestamp.result | 9 ++ .../common/types/timestamp/timestamp.sql | 3 + 5 files changed, 176 insertions(+), 15 deletions(-) diff --git a/src/servers/src/error.rs b/src/servers/src/error.rs index dd3516d890..bfb36c32ca 100644 --- a/src/servers/src/error.rs +++ b/src/servers/src/error.rs @@ -25,6 +25,7 @@ use common_error::ext::{BoxedError, ErrorExt}; use common_error::status_code::StatusCode; use common_macro::stack_trace_debug; use common_telemetry::{error, warn}; +use common_time::Duration; use datafusion::error::DataFusionError; use datatypes::prelude::ConcreteDataType; use headers::ContentType; @@ -615,6 +616,9 @@ pub enum Error { #[snafu(implicit)] location: Location, }, + + #[snafu(display("Overflow while casting `{:?}` to Interval", val))] + DurationOverflow { val: Duration }, } pub type Result = std::result::Result; @@ -734,6 +738,7 @@ impl ErrorExt for Error { ConvertSqlValue { source, .. } => source.status_code(), InFlightWriteBytesExceeded { .. } => StatusCode::RateLimited, + DurationOverflow { .. } => StatusCode::InvalidArguments, } } diff --git a/src/servers/src/postgres/types.rs b/src/servers/src/postgres/types.rs index a8daf880c6..68bdb9d2e9 100644 --- a/src/servers/src/postgres/types.rs +++ b/src/servers/src/postgres/types.rs @@ -421,13 +421,13 @@ pub(super) fn encode_value( Value::IntervalDayTime(v) => builder.encode_field(&PgInterval::from(*v)), Value::IntervalMonthDayNano(v) => builder.encode_field(&PgInterval::from(*v)), Value::Decimal128(v) => builder.encode_field(&v.to_string()), + Value::Duration(d) => match PgInterval::try_from(*d) { + Ok(i) => builder.encode_field(&i), + Err(e) => Err(PgWireError::ApiError(Box::new(Error::Internal { + err_msg: e.to_string(), + }))), + }, Value::List(values) => encode_array(query_ctx, values, builder), - Value::Duration(_) => Err(PgWireError::ApiError(Box::new(Error::Internal { - err_msg: format!( - "cannot write value {:?} in postgres protocol: unimplemented", - &value - ), - }))), } } @@ -466,8 +466,8 @@ pub(super) fn type_gt_to_pg(origin: &ConcreteDataType) -> Result { &ConcreteDataType::Interval(_) => Ok(Type::INTERVAL_ARRAY), &ConcreteDataType::Decimal128(_) => Ok(Type::NUMERIC_ARRAY), &ConcreteDataType::Json(_) => Ok(Type::JSON_ARRAY), - &ConcreteDataType::Duration(_) - | &ConcreteDataType::Dictionary(_) + &ConcreteDataType::Duration(_) => Ok(Type::INTERVAL_ARRAY), + &ConcreteDataType::Dictionary(_) | &ConcreteDataType::Vector(_) | &ConcreteDataType::List(_) => server_error::UnsupportedDataTypeSnafu { data_type: origin, @@ -475,13 +475,12 @@ pub(super) fn type_gt_to_pg(origin: &ConcreteDataType) -> Result { } .fail(), }, - &ConcreteDataType::Duration(_) | &ConcreteDataType::Dictionary(_) => { - server_error::UnsupportedDataTypeSnafu { - data_type: origin, - reason: "not implemented", - } - .fail() + &ConcreteDataType::Dictionary(_) => server_error::UnsupportedDataTypeSnafu { + data_type: origin, + reason: "not implemented", } + .fail(), + &ConcreteDataType::Duration(_) => Ok(Type::INTERVAL), } } diff --git a/src/servers/src/postgres/types/interval.rs b/src/servers/src/postgres/types/interval.rs index c15833f86c..ec9bfe912b 100644 --- a/src/servers/src/postgres/types/interval.rs +++ b/src/servers/src/postgres/types/interval.rs @@ -16,10 +16,19 @@ use std::fmt::Display; use bytes::{Buf, BufMut}; use common_time::interval::IntervalFormat; -use common_time::{IntervalDayTime, IntervalMonthDayNano, IntervalYearMonth}; +use common_time::timestamp::TimeUnit; +use common_time::{Duration, IntervalDayTime, IntervalMonthDayNano, IntervalYearMonth}; use pgwire::types::ToSqlText; use postgres_types::{to_sql_checked, FromSql, IsNull, ToSql, Type}; +use crate::error; + +/// On average one month has 30.44 day, which is a common approximation. +const SECONDS_PER_MONTH: i64 = 24 * 6 * 6 * 3044; +const SECONDS_PER_DAY: i64 = 24 * 60 * 60; +const MILLISECONDS_PER_MONTH: i64 = SECONDS_PER_MONTH * 1000; +const MILLISECONDS_PER_DAY: i64 = SECONDS_PER_DAY * 1000; + #[derive(Debug, Clone, Copy, Default)] pub struct PgInterval { pub(crate) months: i32, @@ -57,6 +66,62 @@ impl From for PgInterval { } } +impl TryFrom for PgInterval { + type Error = error::Error; + + fn try_from(duration: Duration) -> error::Result { + let value = duration.value(); + let unit = duration.unit(); + + // Convert the duration to microseconds + match unit { + TimeUnit::Second => { + let months = i32::try_from(value / SECONDS_PER_MONTH) + .map_err(|_| error::DurationOverflowSnafu { val: duration }.build())?; + let days = + i32::try_from((value - (months as i64) * SECONDS_PER_MONTH) / SECONDS_PER_DAY) + .map_err(|_| error::DurationOverflowSnafu { val: duration }.build())?; + let microseconds = + (value - (months as i64) * SECONDS_PER_MONTH - (days as i64) * SECONDS_PER_DAY) + .checked_mul(1_000_000) + .ok_or(error::DurationOverflowSnafu { val: duration }.build())?; + + Ok(Self { + months, + days, + microseconds, + }) + } + TimeUnit::Millisecond => { + let months = i32::try_from(value / MILLISECONDS_PER_MONTH) + .map_err(|_| error::DurationOverflowSnafu { val: duration }.build())?; + let days = i32::try_from( + (value - (months as i64) * MILLISECONDS_PER_MONTH) / MILLISECONDS_PER_DAY, + ) + .map_err(|_| error::DurationOverflowSnafu { val: duration }.build())?; + let microseconds = ((value - (months as i64) * MILLISECONDS_PER_MONTH) + - (days as i64) * MILLISECONDS_PER_DAY) + * 1_000; + Ok(Self { + months, + days, + microseconds, + }) + } + TimeUnit::Microsecond => Ok(Self { + months: 0, + days: 0, + microseconds: value, + }), + TimeUnit::Nanosecond => Ok(Self { + months: 0, + days: 0, + microseconds: value / 1000, + }), + } + } +} + impl From for IntervalMonthDayNano { fn from(interval: PgInterval) -> Self { IntervalMonthDayNano::new( @@ -149,3 +214,83 @@ impl ToSqlText for PgInterval { Ok(IsNull::No) } } + +#[cfg(test)] +mod tests { + use common_time::timestamp::TimeUnit; + use common_time::Duration; + + use super::*; + + #[test] + fn test_duration_to_pg_interval() { + // Test with seconds + let duration = Duration::new(86400, TimeUnit::Second); // 1 day + let interval = PgInterval::try_from(duration).unwrap(); + assert_eq!(interval.months, 0); + assert_eq!(interval.days, 1); + assert_eq!(interval.microseconds, 0); + + // Test with milliseconds + let duration = Duration::new(86400000, TimeUnit::Millisecond); // 1 day + let interval = PgInterval::try_from(duration).unwrap(); + assert_eq!(interval.months, 0); + assert_eq!(interval.days, 1); + assert_eq!(interval.microseconds, 0); + + // Test with microseconds + let duration = Duration::new(86400000000, TimeUnit::Microsecond); // 1 day + let interval = PgInterval::try_from(duration).unwrap(); + assert_eq!(interval.months, 0); + assert_eq!(interval.days, 0); + assert_eq!(interval.microseconds, 86400000000); + + // Test with nanoseconds + let duration = Duration::new(86400000000000, TimeUnit::Nanosecond); // 1 day + let interval = PgInterval::try_from(duration).unwrap(); + assert_eq!(interval.months, 0); + assert_eq!(interval.days, 0); + assert_eq!(interval.microseconds, 86400000000); + + // Test with partial day + let duration = Duration::new(43200, TimeUnit::Second); // 12 hours + let interval = PgInterval::try_from(duration).unwrap(); + assert_eq!(interval.months, 0); + assert_eq!(interval.days, 0); + assert_eq!(interval.microseconds, 43_200_000_000); // 12 hours in microseconds + + // Test with negative duration + let duration = Duration::new(-86400, TimeUnit::Second); // -1 day + let interval = PgInterval::try_from(duration).unwrap(); + assert_eq!(interval.months, 0); + assert_eq!(interval.days, -1); + assert_eq!(interval.microseconds, 0); + + // Test with multiple days + let duration = Duration::new(259200, TimeUnit::Second); // 3 days + let interval = PgInterval::try_from(duration).unwrap(); + assert_eq!(interval.months, 0); + assert_eq!(interval.days, 3); + assert_eq!(interval.microseconds, 0); + + // Test with small duration (less than a day) + let duration = Duration::new(3600, TimeUnit::Second); // 1 hour + let interval = PgInterval::try_from(duration).unwrap(); + assert_eq!(interval.months, 0); + assert_eq!(interval.days, 0); + assert_eq!(interval.microseconds, 3600000000); // 1 hour in microseconds + + // Test with very small duration + let duration = Duration::new(1, TimeUnit::Microsecond); // 1 microsecond + let interval = PgInterval::try_from(duration).unwrap(); + assert_eq!(interval.months, 0); + assert_eq!(interval.days, 0); + assert_eq!(interval.microseconds, 1); + + let duration = Duration::new(i64::MAX, TimeUnit::Second); + assert!(PgInterval::try_from(duration).is_err()); + + let duration = Duration::new(i64::MAX, TimeUnit::Millisecond); + assert!(PgInterval::try_from(duration).is_err()); + } +} diff --git a/tests/cases/standalone/common/types/timestamp/timestamp.result b/tests/cases/standalone/common/types/timestamp/timestamp.result index 51bc812814..c0d1c79250 100644 --- a/tests/cases/standalone/common/types/timestamp/timestamp.result +++ b/tests/cases/standalone/common/types/timestamp/timestamp.result @@ -192,3 +192,12 @@ DROP TABLE timestamp; Affected Rows: 0 +-- SQLNESS PROTOCOL POSTGRES +SELECT '2025-04-14 20:42:19.021000'::TIMESTAMP - '2025-04-14 20:42:18.103000'::TIMESTAMP; + ++---------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| arrow_cast(Utf8("2025-04-14 20:42:19.021000"),Utf8("Timestamp(Millisecond, None)")) - arrow_cast(Utf8("2025-04-14 20:42:18.103000"),Utf8("Timestamp(Millisecond, None)")) | ++---------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| 00:00:00.918000 | ++---------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ + diff --git a/tests/cases/standalone/common/types/timestamp/timestamp.sql b/tests/cases/standalone/common/types/timestamp/timestamp.sql index e2924bc2e0..070032810b 100644 --- a/tests/cases/standalone/common/types/timestamp/timestamp.sql +++ b/tests/cases/standalone/common/types/timestamp/timestamp.sql @@ -55,3 +55,6 @@ SELECT TIMESTAMP '-8-01-01 00:00:01.5'::VARCHAR; SELECT TIMESTAMP '100000-01-01 00:00:01.5'::VARCHAR; DROP TABLE timestamp; + +-- SQLNESS PROTOCOL POSTGRES +SELECT '2025-04-14 20:42:19.021000'::TIMESTAMP - '2025-04-14 20:42:18.103000'::TIMESTAMP;