feat: change Range Query’s default align behavior aware of timezone (#3219)

* feat: change Range Query’s default align behavior to 1970-01-01 00:00:00 aware of timezone

* test: test with +23:00 timezone
This commit is contained in:
dennis zhuang
2024-01-24 16:17:57 +08:00
committed by GitHub
parent f82ddc9491
commit 7323e9b36f
4 changed files with 157 additions and 49 deletions

View File

@@ -15,8 +15,8 @@
use std::fmt::Display;
use std::str::FromStr;
use chrono::FixedOffset;
use chrono_tz::Tz;
use chrono::{FixedOffset, NaiveDateTime, TimeZone};
use chrono_tz::{OffsetComponents, Tz};
use once_cell::sync::OnceCell;
use snafu::{OptionExt, ResultExt};
@@ -108,6 +108,21 @@ impl Timezone {
ParseTimezoneNameSnafu { raw: tz_string }.fail()
}
}
/// Returns the number of seconds to add to convert from UTC to the local time.
pub fn local_minus_utc(&self) -> i64 {
match self {
Self::Offset(offset) => offset.local_minus_utc().into(),
Self::Named(tz) => {
let datetime = NaiveDateTime::from_timestamp_opt(0, 0).unwrap();
let datetime = tz.from_utc_datetime(&datetime);
let utc_offset = datetime.offset().base_utc_offset();
let dst_offset = datetime.offset().dst_offset();
let total_offset = utc_offset + dst_offset;
total_offset.num_seconds()
}
}
}
}
impl Display for Timezone {
@@ -129,6 +144,31 @@ pub fn system_timezone_name() -> String {
mod tests {
use super::*;
#[test]
fn test_local_minus_utc() {
assert_eq!(
28800,
Timezone::from_tz_string("+8:00").unwrap().local_minus_utc()
);
assert_eq!(
28800,
Timezone::from_tz_string("Asia/Shanghai")
.unwrap()
.local_minus_utc()
);
assert_eq!(
-14400,
Timezone::from_tz_string("America/Aruba")
.unwrap()
.local_minus_utc()
);
assert_eq!(
-36000,
Timezone::from_tz_string("HST").unwrap().local_minus_utc()
);
}
#[test]
fn test_from_tz_string() {
assert_eq!(

View File

@@ -134,14 +134,14 @@ fn parse_duration_expr(args: &[Expr], i: usize) -> DFResult<Duration> {
/// which is used as the basis for dividing time slot during the align operation.
/// 1. NOW: align to current execute time
/// 2. Timestamp string: align to specific timestamp
/// 3. leave empty (as Default Option): align to unix epoch 0
/// 3. leave empty (as Default Option): align to unix epoch 0 (timezone aware)
fn parse_align_to(args: &[Expr], i: usize, timezone: Option<&Timezone>) -> DFResult<i64> {
let s = parse_str_expr(args, i)?;
let upper = s.to_uppercase();
match upper.as_str() {
"NOW" => return Ok(Timestamp::current_millis().value()),
// default align to unix epoch 0
"" => return Ok(0),
// default align to unix epoch 0 (timezone aware)
"" => return Ok(timezone.map(|tz| tz.local_minus_utc() * 1000).unwrap_or(0)),
_ => (),
}
@@ -762,7 +762,23 @@ mod test {
assert!(epsinon.abs() < 100);
// test default
let args = vec![Expr::Literal(ScalarValue::Utf8(Some("".into())))];
assert!(parse_align_to(&args, 0, None).unwrap() == 0);
assert_eq!(0, parse_align_to(&args, 0, None).unwrap());
// test default with timezone
let args = vec![Expr::Literal(ScalarValue::Utf8(Some("".into())))];
assert_eq!(
-36000 * 1000,
parse_align_to(&args, 0, Some(&Timezone::from_tz_string("HST").unwrap())).unwrap()
);
assert_eq!(
28800 * 1000,
parse_align_to(
&args,
0,
Some(&Timezone::from_tz_string("Asia/Shanghai").unwrap())
)
.unwrap()
);
// test Timestamp
let args = vec![Expr::Literal(ScalarValue::Utf8(Some(
"1970-01-01T00:00:00+08:00".into(),

View File

@@ -7,14 +7,14 @@ CREATE TABLE host (
Affected Rows: 0
INSERT INTO TABLE host VALUES
("1970-01-01T22:30:00+00:00", 'host1', 0),
("1970-01-01T23:30:00+00:00", 'host1', 1),
("1970-01-02T22:30:00+00:00", 'host1', 2),
("1970-01-02T23:30:00+00:00", 'host1', 3),
("1970-01-01T22:30:00+00:00", 'host2', 4),
("1970-01-01T23:30:00+00:00", 'host2', 5),
("1970-01-02T22:30:00+00:00", 'host2', 6),
("1970-01-02T23:30:00+00:00", 'host2', 7);
("2024-01-23T22:30:00+00:00", 'host1', 0),
("2024-01-23T23:30:00+00:00", 'host1', 1),
("2024-01-24T22:30:00+00:00", 'host1', 2),
("2024-01-24T23:30:00+00:00", 'host1', 3),
("2024-01-23T22:30:00+00:00", 'host2', 4),
("2024-01-23T23:30:00+00:00", 'host2', 5),
("2024-01-24T22:30:00+00:00", 'host2', 6),
("2024-01-24T23:30:00+00:00", 'host2', 7);
Affected Rows: 8
@@ -23,10 +23,10 @@ SELECT ts, host, min(val) RANGE '1d' FROM host ALIGN '1d' ORDER BY host, ts;
+---------------------+-------+----------------------------------+
| ts | host | MIN(host.val) RANGE 1d FILL NULL |
+---------------------+-------+----------------------------------+
| 1970-01-01T00:00:00 | host1 | 0 |
| 1970-01-02T00:00:00 | host1 | 2 |
| 1970-01-01T00:00:00 | host2 | 4 |
| 1970-01-02T00:00:00 | host2 | 6 |
| 2024-01-23T00:00:00 | host1 | 0 |
| 2024-01-24T00:00:00 | host1 | 2 |
| 2024-01-23T00:00:00 | host2 | 4 |
| 2024-01-24T00:00:00 | host2 | 6 |
+---------------------+-------+----------------------------------+
SELECT ts, host, min(val) RANGE '1d' FROM host ALIGN '1d' TO UNKNOWN ORDER BY host, ts;
@@ -38,25 +38,25 @@ SELECT ts, host, min(val) RANGE '1d' FROM host ALIGN '1d' TO '1900-01-01T00:00:0
+---------------------+-------+----------------------------------+
| ts | host | MIN(host.val) RANGE 1d FILL NULL |
+---------------------+-------+----------------------------------+
| 1969-12-31T23:00:00 | host1 | 0 |
| 1970-01-01T23:00:00 | host1 | 1 |
| 1970-01-02T23:00:00 | host1 | 3 |
| 1969-12-31T23:00:00 | host2 | 4 |
| 1970-01-01T23:00:00 | host2 | 5 |
| 1970-01-02T23:00:00 | host2 | 7 |
| 2024-01-22T23:00:00 | host1 | 0 |
| 2024-01-23T23:00:00 | host1 | 1 |
| 2024-01-24T23:00:00 | host1 | 3 |
| 2024-01-22T23:00:00 | host2 | 4 |
| 2024-01-23T23:00:00 | host2 | 5 |
| 2024-01-24T23:00:00 | host2 | 7 |
+---------------------+-------+----------------------------------+
SELECT ts, host, min(val) RANGE '1d' FROM host ALIGN '1d' TO '1970-01-01T00:00:00+01:00' ORDER BY host, ts;
SELECT ts, host, min(val) RANGE '1d' FROM host ALIGN '1d' TO '2024-01-23T00:00:00+01:00' ORDER BY host, ts;
+---------------------+-------+----------------------------------+
| ts | host | MIN(host.val) RANGE 1d FILL NULL |
+---------------------+-------+----------------------------------+
| 1969-12-31T23:00:00 | host1 | 0 |
| 1970-01-01T23:00:00 | host1 | 1 |
| 1970-01-02T23:00:00 | host1 | 3 |
| 1969-12-31T23:00:00 | host2 | 4 |
| 1970-01-01T23:00:00 | host2 | 5 |
| 1970-01-02T23:00:00 | host2 | 7 |
| 2024-01-22T23:00:00 | host1 | 0 |
| 2024-01-23T23:00:00 | host1 | 1 |
| 2024-01-24T23:00:00 | host1 | 3 |
| 2024-01-22T23:00:00 | host2 | 4 |
| 2024-01-23T23:00:00 | host2 | 5 |
| 2024-01-24T23:00:00 | host2 | 7 |
+---------------------+-------+----------------------------------+
SELECT ts, host, min(val) RANGE '1d' FROM host ALIGN '1d' TO '2023-01-01T00:00:00+01:00' ORDER BY host, ts;
@@ -64,12 +64,12 @@ SELECT ts, host, min(val) RANGE '1d' FROM host ALIGN '1d' TO '2023-01-01T00:00:0
+---------------------+-------+----------------------------------+
| ts | host | MIN(host.val) RANGE 1d FILL NULL |
+---------------------+-------+----------------------------------+
| 1969-12-31T23:00:00 | host1 | 0 |
| 1970-01-01T23:00:00 | host1 | 1 |
| 1970-01-02T23:00:00 | host1 | 3 |
| 1969-12-31T23:00:00 | host2 | 4 |
| 1970-01-01T23:00:00 | host2 | 5 |
| 1970-01-02T23:00:00 | host2 | 7 |
| 2024-01-22T23:00:00 | host1 | 0 |
| 2024-01-23T23:00:00 | host1 | 1 |
| 2024-01-24T23:00:00 | host1 | 3 |
| 2024-01-22T23:00:00 | host2 | 4 |
| 2024-01-23T23:00:00 | host2 | 5 |
| 2024-01-24T23:00:00 | host2 | 7 |
+---------------------+-------+----------------------------------+
SELECT ts, min(val) RANGE (INTERVAL '1' day) FROM host ALIGN (INTERVAL '1' day) TO '1900-01-01T00:00:00+01:00' by (1) ORDER BY ts;
@@ -77,11 +77,50 @@ SELECT ts, min(val) RANGE (INTERVAL '1' day) FROM host ALIGN (INTERVAL '1' day)
+---------------------+----------------------------------------------------------------------------+
| ts | MIN(host.val) RANGE IntervalMonthDayNano("18446744073709551616") FILL NULL |
+---------------------+----------------------------------------------------------------------------+
| 1969-12-31T23:00:00 | 0 |
| 1970-01-01T23:00:00 | 1 |
| 1970-01-02T23:00:00 | 3 |
| 2024-01-22T23:00:00 | 0 |
| 2024-01-23T23:00:00 | 1 |
| 2024-01-24T23:00:00 | 3 |
+---------------------+----------------------------------------------------------------------------+
--- ALIGN TO with time zone ---
set time_zone='Asia/Shanghai';
Affected Rows: 0
---- align to 'Asia/Shanghai' unix epoch 0 ----
SELECT ts, host, min(val) RANGE '1d' FROM host ALIGN '1d' ORDER BY host, ts;
+---------------------+-------+----------------------------------+
| ts | host | MIN(host.val) RANGE 1d FILL NULL |
+---------------------+-------+----------------------------------+
| 2024-01-23T08:00:00 | host1 | 0 |
| 2024-01-24T08:00:00 | host1 | 2 |
| 2024-01-23T08:00:00 | host2 | 4 |
| 2024-01-24T08:00:00 | host2 | 6 |
+---------------------+-------+----------------------------------+
set time_zone='+23:00';
Affected Rows: 0
---- align to '+23:00' unix epoch 0 ----
SELECT ts, host, min(val) RANGE '1d' FROM host ALIGN '1d' ORDER BY host, ts;
+---------------------+-------+----------------------------------+
| ts | host | MIN(host.val) RANGE 1d FILL NULL |
+---------------------+-------+----------------------------------+
| 2024-01-22T23:00:00 | host1 | 0 |
| 2024-01-23T23:00:00 | host1 | 1 |
| 2024-01-24T23:00:00 | host1 | 3 |
| 2024-01-22T23:00:00 | host2 | 4 |
| 2024-01-23T23:00:00 | host2 | 5 |
| 2024-01-24T23:00:00 | host2 | 7 |
+---------------------+-------+----------------------------------+
set time_zone='UTC';
Affected Rows: 0
DROP TABLE host;
Affected Rows: 0

View File

@@ -5,14 +5,14 @@ CREATE TABLE host (
);
INSERT INTO TABLE host VALUES
("1970-01-01T22:30:00+00:00", 'host1', 0),
("1970-01-01T23:30:00+00:00", 'host1', 1),
("1970-01-02T22:30:00+00:00", 'host1', 2),
("1970-01-02T23:30:00+00:00", 'host1', 3),
("1970-01-01T22:30:00+00:00", 'host2', 4),
("1970-01-01T23:30:00+00:00", 'host2', 5),
("1970-01-02T22:30:00+00:00", 'host2', 6),
("1970-01-02T23:30:00+00:00", 'host2', 7);
("2024-01-23T22:30:00+00:00", 'host1', 0),
("2024-01-23T23:30:00+00:00", 'host1', 1),
("2024-01-24T22:30:00+00:00", 'host1', 2),
("2024-01-24T23:30:00+00:00", 'host1', 3),
("2024-01-23T22:30:00+00:00", 'host2', 4),
("2024-01-23T23:30:00+00:00", 'host2', 5),
("2024-01-24T22:30:00+00:00", 'host2', 6),
("2024-01-24T23:30:00+00:00", 'host2', 7);
SELECT ts, host, min(val) RANGE '1d' FROM host ALIGN '1d' ORDER BY host, ts;
@@ -20,10 +20,23 @@ SELECT ts, host, min(val) RANGE '1d' FROM host ALIGN '1d' TO UNKNOWN ORDER BY ho
SELECT ts, host, min(val) RANGE '1d' FROM host ALIGN '1d' TO '1900-01-01T00:00:00+01:00' ORDER BY host, ts;
SELECT ts, host, min(val) RANGE '1d' FROM host ALIGN '1d' TO '1970-01-01T00:00:00+01:00' ORDER BY host, ts;
SELECT ts, host, min(val) RANGE '1d' FROM host ALIGN '1d' TO '2024-01-23T00:00:00+01:00' ORDER BY host, ts;
SELECT ts, host, min(val) RANGE '1d' FROM host ALIGN '1d' TO '2023-01-01T00:00:00+01:00' ORDER BY host, ts;
SELECT ts, min(val) RANGE (INTERVAL '1' day) FROM host ALIGN (INTERVAL '1' day) TO '1900-01-01T00:00:00+01:00' by (1) ORDER BY ts;
--- ALIGN TO with time zone ---
set time_zone='Asia/Shanghai';
---- align to 'Asia/Shanghai' unix epoch 0 ----
SELECT ts, host, min(val) RANGE '1d' FROM host ALIGN '1d' ORDER BY host, ts;
set time_zone='+23:00';
---- align to '+23:00' unix epoch 0 ----
SELECT ts, host, min(val) RANGE '1d' FROM host ALIGN '1d' ORDER BY host, ts;
set time_zone='UTC';
DROP TABLE host;