feat: add align to / interval support in range query (#2842)

* feat: add align to / interval support in range query

* chore: fix ci

* chore: simplify `parse_duration_expr`

* chore: change s to ms
This commit is contained in:
WU Jingdi
2023-12-04 16:00:41 +08:00
committed by GitHub
parent f78dab078c
commit 806400caff
12 changed files with 472 additions and 67 deletions

View File

@@ -81,6 +81,23 @@ SELECT min(val) RANGE '5s' FROM host ALIGN '5s' FILL 3.0;
Error: 3000(PlanQuery), DataFusion error: Error during planning: 3.0 is not a valid fill option, fail to convert to a const value. { Arrow error: Cast error: Cannot cast string '3.0' to value of Int64 type }
-- 2.7 zero align/range
SELECT min(val) RANGE '5s' FROM host ALIGN '0s';
Error: 3000(PlanQuery), DataFusion error: Error during planning: duration must be greater than 0
SELECT min(val) RANGE '0s' FROM host ALIGN '5s';
Error: 3000(PlanQuery), DataFusion error: Error during planning: duration must be greater than 0
SELECT min(val) RANGE '5s' FROM host ALIGN (INTERVAL '0' day);
Error: 2000(InvalidSyntax), Range Query: Can't use 0 as align in Range Query
SELECT min(val) RANGE (INTERVAL '0' day) FROM host ALIGN '5s';
Error: 2000(InvalidSyntax), Range Query: Invalid Range expr `MIN(host.val) RANGE IntervalMonthDayNano("0") FILL NULL`, Can't use 0 as range in Range Query
DROP TABLE host;
Affected Rows: 0

View File

@@ -58,4 +58,14 @@ SELECT min(val) RANGE '5s', min(val) RANGE '5s' FILL NULL FROM host ALIGN '5s';
SELECT min(val) RANGE '5s' FROM host ALIGN '5s' FILL 3.0;
-- 2.7 zero align/range
SELECT min(val) RANGE '5s' FROM host ALIGN '0s';
SELECT min(val) RANGE '0s' FROM host ALIGN '5s';
SELECT min(val) RANGE '5s' FROM host ALIGN (INTERVAL '0' day);
SELECT min(val) RANGE (INTERVAL '0' day) FROM host ALIGN '5s';
DROP TABLE host;

View File

@@ -0,0 +1,46 @@
CREATE TABLE host (
ts timestamp(3) time index,
host STRING PRIMARY KEY,
val BIGINT,
);
Affected Rows: 0
INSERT INTO TABLE host VALUES
("1970-01-01T01:00:00+08:00", 'host1', 0),
("1970-01-01T02:00:00+08:00", 'host1', 1),
("1971-01-02T03:00:00+08:00", 'host1', 2),
("1971-01-02T04:00:00+08:00", 'host1', 3),
("1970-01-01T01:00:00+08:00", 'host2', 4),
("1970-01-01T02:00:00+08:00", 'host2', 5),
("1971-01-02T03:00:00+08:00", 'host2', 6),
("1971-01-02T04:00:00+08:00", 'host2', 7);
Affected Rows: 8
SELECT ts, host, min(val) RANGE (INTERVAL '1 year') FROM host ALIGN (INTERVAL '1 year') ORDER BY host, ts;
+---------------------+-------+--------------------------------------------------------------------------------------+
| ts | host | MIN(host.val) RANGE IntervalMonthDayNano("950737950171172051122527404032") FILL NULL |
+---------------------+-------+--------------------------------------------------------------------------------------+
| 1970-01-01T00:00:00 | host1 | 0 |
| 1971-12-22T00:00:00 | host1 | 2 |
| 1970-01-01T00:00:00 | host2 | 4 |
| 1971-12-22T00:00:00 | host2 | 6 |
+---------------------+-------+--------------------------------------------------------------------------------------+
SELECT ts, host, min(val) RANGE (INTERVAL '1' year) FROM host ALIGN (INTERVAL '1' year) ORDER BY host, ts;
+---------------------+-------+--------------------------------------------------------------------------------------+
| ts | host | MIN(host.val) RANGE IntervalMonthDayNano("950737950171172051122527404032") FILL NULL |
+---------------------+-------+--------------------------------------------------------------------------------------+
| 1970-01-01T00:00:00 | host1 | 0 |
| 1971-12-22T00:00:00 | host1 | 2 |
| 1970-01-01T00:00:00 | host2 | 4 |
| 1971-12-22T00:00:00 | host2 | 6 |
+---------------------+-------+--------------------------------------------------------------------------------------+
DROP TABLE host;
Affected Rows: 0

View File

@@ -0,0 +1,21 @@
CREATE TABLE host (
ts timestamp(3) time index,
host STRING PRIMARY KEY,
val BIGINT,
);
INSERT INTO TABLE host VALUES
("1970-01-01T01:00:00+08:00", 'host1', 0),
("1970-01-01T02:00:00+08:00", 'host1', 1),
("1971-01-02T03:00:00+08:00", 'host1', 2),
("1971-01-02T04:00:00+08:00", 'host1', 3),
("1970-01-01T01:00:00+08:00", 'host2', 4),
("1970-01-01T02:00:00+08:00", 'host2', 5),
("1971-01-02T03:00:00+08:00", 'host2', 6),
("1971-01-02T04:00:00+08:00", 'host2', 7);
SELECT ts, host, min(val) RANGE (INTERVAL '1 year') FROM host ALIGN (INTERVAL '1 year') ORDER BY host, ts;
SELECT ts, host, min(val) RANGE (INTERVAL '1' year) FROM host ALIGN (INTERVAL '1' year) ORDER BY host, ts;
DROP TABLE host;

View File

@@ -55,9 +55,9 @@ EXPLAIN SELECT ts, host, min(val) RANGE '5s' FROM host ALIGN '5s';
+-+-+
| plan_type_| plan_|
+-+-+
| logical_plan_| RangeSelect: range_exprs=[MIN(host.val) RANGE 5s FILL NULL], align=5s time_index=ts_|
| logical_plan_| RangeSelect: range_exprs=[MIN(host.val) RANGE 5s FILL NULL], align=5000ms, align_to=0ms, align_by=[host.host], time_index=ts |
|_|_MergeScan [is_placeholder=false]_|
| physical_plan | RangeSelectExec: range_expr=[RangeFnExec{ MIN(host.val), range: 5000}], align=5000, time_index=ts, by=[host@1] |
| physical_plan | RangeSelectExec: range_expr=[MIN(host.val) RANGE 5s FILL NULL], align=5000ms, align_to=0ms, align_by=[host@1], time_index=ts |
|_|_MergeScanExec: REDACTED
|_|_|
+-+-+
@@ -71,7 +71,7 @@ EXPLAIN ANALYZE SELECT ts, host, min(val) RANGE '5s' FROM host ALIGN '5s';
+-+-+
| plan_type_| plan_|
+-+-+
| Plan with Metrics | RangeSelectExec: range_expr=[RangeFnExec{ MIN(host.val), range: 5000}], align=5000, time_index=ts, by=[host@1], REDACTED
| Plan with Metrics | RangeSelectExec: range_expr=[MIN(host.val) RANGE 5s FILL NULL], align=5000ms, align_to=0ms, align_by=[host@1], time_index=ts, REDACTED
|_|_MergeScanExec: REDACTED
|_|_|
+-+-+

View File

@@ -0,0 +1,99 @@
CREATE TABLE host (
ts timestamp(3) time index,
host STRING PRIMARY KEY,
val BIGINT,
);
Affected Rows: 0
INSERT INTO TABLE host VALUES
("1970-01-01T23:30:00+00:00", 'host1', 0),
("1970-01-01T22:30:00+00:00", 'host1', 1),
("1970-01-02T23:30:00+00:00", 'host1', 2),
("1970-01-02T22:30:00+00:00", 'host1', 3),
("1970-01-01T23:30:00+00:00", 'host2', 4),
("1970-01-01T22:30:00+00:00", 'host2', 5),
("1970-01-02T23:30:00+00:00", 'host2', 6),
("1970-01-02T22:30:00+00:00", 'host2', 7);
Affected Rows: 8
SELECT ts, host, min(val) RANGE '1d' FROM host ALIGN '1d' ORDER BY host, ts;
+---------------------+-------+----------------------------------+
| ts | host | MIN(host.val) RANGE 1d FILL NULL |
+---------------------+-------+----------------------------------+
| 1970-01-02T00:00:00 | host1 | 0 |
| 1970-01-03T00:00:00 | host1 | 2 |
| 1970-01-02T00:00:00 | host2 | 4 |
| 1970-01-03T00:00:00 | host2 | 6 |
+---------------------+-------+----------------------------------+
SELECT ts, host, min(val) RANGE '1d' FROM host ALIGN '1d' TO CALENDAR ORDER BY host, ts;
+---------------------+-------+----------------------------------+
| ts | host | MIN(host.val) RANGE 1d FILL NULL |
+---------------------+-------+----------------------------------+
| 1970-01-02T00:00:00 | host1 | 0 |
| 1970-01-03T00:00:00 | host1 | 2 |
| 1970-01-02T00:00:00 | host2 | 4 |
| 1970-01-03T00:00:00 | host2 | 6 |
+---------------------+-------+----------------------------------+
SELECT ts, host, min(val) RANGE '1d' FROM host ALIGN '1d' TO UNKNOWN ORDER BY host, ts;
Error: 3000(PlanQuery), DataFusion error: Error during planning: Illegal `align to` argument `UNKNOWN` in range select query, can't be parse as NOW/CALENDAR/Timestamp, error: Failed to parse a string into Timestamp, raw string: UNKNOWN
SELECT ts, host, min(val) RANGE '1d' FROM host ALIGN '1d' TO '1900-01-01T00:00:00+01:00' ORDER BY host, ts;
+---------------------+-------+----------------------------------+
| ts | host | MIN(host.val) RANGE 1d FILL NULL |
+---------------------+-------+----------------------------------+
| 1970-01-01T23:00:00 | host1 | 1 |
| 1970-01-02T23:00:00 | host1 | 0 |
| 1970-01-03T23:00:00 | host1 | 2 |
| 1970-01-01T23:00:00 | host2 | 5 |
| 1970-01-02T23:00:00 | host2 | 4 |
| 1970-01-03T23:00:00 | host2 | 6 |
+---------------------+-------+----------------------------------+
SELECT ts, host, min(val) RANGE '1d' FROM host ALIGN '1d' TO '1970-01-01T00:00:00+01:00' ORDER BY host, ts;
+---------------------+-------+----------------------------------+
| ts | host | MIN(host.val) RANGE 1d FILL NULL |
+---------------------+-------+----------------------------------+
| 1970-01-01T23:00:00 | host1 | 1 |
| 1970-01-02T23:00:00 | host1 | 0 |
| 1970-01-03T23:00:00 | host1 | 2 |
| 1970-01-01T23:00:00 | host2 | 5 |
| 1970-01-02T23:00:00 | host2 | 4 |
| 1970-01-03T23:00:00 | host2 | 6 |
+---------------------+-------+----------------------------------+
SELECT ts, host, min(val) RANGE '1d' FROM host ALIGN '1d' TO '2023-01-01T00:00:00+01:00' ORDER BY host, ts;
+---------------------+-------+----------------------------------+
| ts | host | MIN(host.val) RANGE 1d FILL NULL |
+---------------------+-------+----------------------------------+
| 1970-01-01T23:00:00 | host1 | 1 |
| 1970-01-02T23:00:00 | host1 | 0 |
| 1970-01-03T23:00:00 | host1 | 2 |
| 1970-01-01T23:00:00 | host2 | 5 |
| 1970-01-02T23:00:00 | host2 | 4 |
| 1970-01-03T23:00:00 | host2 | 6 |
+---------------------+-------+----------------------------------+
SELECT ts, min(val) RANGE (INTERVAL '1' day) FROM host ALIGN (INTERVAL '1' day) TO '1900-01-01T00:00:00+01:00' by (1) ORDER BY ts;
+---------------------+----------------------------------------------------------------------------+
| ts | MIN(host.val) RANGE IntervalMonthDayNano("18446744073709551616") FILL NULL |
+---------------------+----------------------------------------------------------------------------+
| 1970-01-01T23:00:00 | 1 |
| 1970-01-02T23:00:00 | 0 |
| 1970-01-03T23:00:00 | 2 |
+---------------------+----------------------------------------------------------------------------+
DROP TABLE host;
Affected Rows: 0

View File

@@ -0,0 +1,31 @@
CREATE TABLE host (
ts timestamp(3) time index,
host STRING PRIMARY KEY,
val BIGINT,
);
INSERT INTO TABLE host VALUES
("1970-01-01T23:30:00+00:00", 'host1', 0),
("1970-01-01T22:30:00+00:00", 'host1', 1),
("1970-01-02T23:30:00+00:00", 'host1', 2),
("1970-01-02T22:30:00+00:00", 'host1', 3),
("1970-01-01T23:30:00+00:00", 'host2', 4),
("1970-01-01T22:30:00+00:00", 'host2', 5),
("1970-01-02T23:30:00+00:00", 'host2', 6),
("1970-01-02T22:30:00+00:00", 'host2', 7);
SELECT ts, host, min(val) RANGE '1d' FROM host ALIGN '1d' ORDER BY host, ts;
SELECT ts, host, min(val) RANGE '1d' FROM host ALIGN '1d' TO CALENDAR ORDER BY host, ts;
SELECT ts, host, min(val) RANGE '1d' FROM host ALIGN '1d' TO UNKNOWN ORDER BY host, ts;
SELECT ts, host, min(val) RANGE '1d' FROM host ALIGN '1d' TO '1900-01-01T00:00:00+01:00' ORDER BY host, ts;
SELECT ts, host, min(val) RANGE '1d' FROM host ALIGN '1d' TO '1970-01-01T00:00:00+01:00' ORDER BY host, ts;
SELECT ts, host, min(val) RANGE '1d' FROM host ALIGN '1d' TO '2023-01-01T00:00:00+01:00' ORDER BY host, ts;
SELECT ts, min(val) RANGE (INTERVAL '1' day) FROM host ALIGN (INTERVAL '1' day) TO '1900-01-01T00:00:00+01:00' by (1) ORDER BY ts;
DROP TABLE host;