Files
greptimedb/tests/cases/standalone/common/flow/flow_call_df_func.result
discord9 9f9c1dab60 feat(flow): use DataFusion's optimizer (#4489)
* feat: use datafusion optimization

refactor: mv `sql_to_flow_plan` elsewhere

feat(WIP): use df optimization

WIP analyzer rule

feat(WIP): avg expander

fix: transform avg expander

fix: avg expand

feat: names from substrait

fix: avg rewrite

test: update `test_avg`&`test_avg_group_by`

test: fix `test_sum`

test: fix some tests

chore: remove unused flow plan transform

feat: tumble expander

test: update tests

* chore: clippy

* fix: tumble lose `group expr`

* test: sqlness test update

* test: rm unused cast

* test: simplify sqlness

* refactor: per review

* chore: after rebase

* fix: remove a outdated test

* test: add comment

* fix: report error when not literal

* chore: update sqlness test after rebase

* refactor: per review
2024-08-29 02:52:00 +00:00

373 lines
11 KiB
Plaintext

CREATE TABLE numbers_input_df_func (
number INT,
ts TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
PRIMARY KEY(number),
TIME INDEX(ts)
);
Affected Rows: 0
-- call `sum(abs(number))` where `abs` is DataFusion Function and `sum` is flow function
CREATE FLOW test_numbers_df_func
SINK TO out_num_cnt_df_func
AS
SELECT sum(abs(number)) FROM numbers_input_df_func GROUP BY tumble(ts, '1 second', '2021-07-01 00:00:00');
Affected Rows: 0
admin flush_flow('test_numbers_df_func');
+------------------------------------------+
| ADMIN flush_flow('test_numbers_df_func') |
+------------------------------------------+
| 0 |
+------------------------------------------+
INSERT INTO numbers_input_df_func
VALUES
(-20, "2021-07-01 00:00:00.200"),
(22, "2021-07-01 00:00:00.600");
Affected Rows: 2
-- flush flow to make sure that table is created and data is inserted
admin flush_flow('test_numbers_df_func');
+------------------------------------------+
| ADMIN flush_flow('test_numbers_df_func') |
+------------------------------------------+
| 1 |
+------------------------------------------+
-- note that this quote-unquote column is a column-name, **not** a aggregation expr, generated by datafusion
SELECT "SUM(abs(numbers_input_df_func.number))", window_start, window_end FROM out_num_cnt_df_func;
+----------------------------------------+---------------------+---------------------+
| SUM(abs(numbers_input_df_func.number)) | window_start | window_end |
+----------------------------------------+---------------------+---------------------+
| 42 | 2021-07-01T00:00:00 | 2021-07-01T00:00:01 |
+----------------------------------------+---------------------+---------------------+
admin flush_flow('test_numbers_df_func');
+------------------------------------------+
| ADMIN flush_flow('test_numbers_df_func') |
+------------------------------------------+
| 0 |
+------------------------------------------+
INSERT INTO numbers_input_df_func
VALUES
(23,"2021-07-01 00:00:01.000"),
(-24,"2021-07-01 00:00:01.500");
Affected Rows: 2
admin flush_flow('test_numbers_df_func');
+------------------------------------------+
| ADMIN flush_flow('test_numbers_df_func') |
+------------------------------------------+
| 1 |
+------------------------------------------+
-- note that this quote-unquote column is a column-name, **not** a aggregation expr, generated by datafusion
SELECT "SUM(abs(numbers_input_df_func.number))", window_start, window_end FROM out_num_cnt_df_func;
+----------------------------------------+---------------------+---------------------+
| SUM(abs(numbers_input_df_func.number)) | window_start | window_end |
+----------------------------------------+---------------------+---------------------+
| 42 | 2021-07-01T00:00:00 | 2021-07-01T00:00:01 |
| 47 | 2021-07-01T00:00:01 | 2021-07-01T00:00:02 |
+----------------------------------------+---------------------+---------------------+
DROP FLOW test_numbers_df_func;
Affected Rows: 0
DROP TABLE numbers_input_df_func;
Affected Rows: 0
DROP TABLE out_num_cnt_df_func;
Affected Rows: 0
CREATE TABLE numbers_input_df_func (
number INT,
ts TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
PRIMARY KEY(number),
TIME INDEX(ts)
);
Affected Rows: 0
-- call `abs(sum(number))`to make sure that calling `abs` function(impl by datafusion) on `sum` function(impl by flow) is working
CREATE FLOW test_numbers_df_func
SINK TO out_num_cnt_df_func
AS
SELECT abs(sum(number)) FROM numbers_input_df_func GROUP BY tumble(ts, '1 second', '2021-07-01 00:00:00');
Affected Rows: 0
admin flush_flow('test_numbers_df_func');
+------------------------------------------+
| ADMIN flush_flow('test_numbers_df_func') |
+------------------------------------------+
| 0 |
+------------------------------------------+
INSERT INTO numbers_input_df_func
VALUES
(-20, "2021-07-01 00:00:00.200"),
(22, "2021-07-01 00:00:00.600");
Affected Rows: 2
-- flush flow to make sure that table is created and data is inserted
admin flush_flow('test_numbers_df_func');
+------------------------------------------+
| ADMIN flush_flow('test_numbers_df_func') |
+------------------------------------------+
| 1 |
+------------------------------------------+
SELECT "abs(SUM(numbers_input_df_func.number))", window_start, window_end FROM out_num_cnt_df_func;
+----------------------------------------+---------------------+---------------------+
| abs(SUM(numbers_input_df_func.number)) | window_start | window_end |
+----------------------------------------+---------------------+---------------------+
| 2 | 2021-07-01T00:00:00 | 2021-07-01T00:00:01 |
+----------------------------------------+---------------------+---------------------+
admin flush_flow('test_numbers_df_func');
+------------------------------------------+
| ADMIN flush_flow('test_numbers_df_func') |
+------------------------------------------+
| 0 |
+------------------------------------------+
INSERT INTO numbers_input_df_func
VALUES
(23,"2021-07-01 00:00:01.000"),
(-24,"2021-07-01 00:00:01.500");
Affected Rows: 2
admin flush_flow('test_numbers_df_func');
+------------------------------------------+
| ADMIN flush_flow('test_numbers_df_func') |
+------------------------------------------+
| 1 |
+------------------------------------------+
SELECT "abs(SUM(numbers_input_df_func.number))", window_start, window_end FROM out_num_cnt_df_func;
+----------------------------------------+---------------------+---------------------+
| abs(SUM(numbers_input_df_func.number)) | window_start | window_end |
+----------------------------------------+---------------------+---------------------+
| 2 | 2021-07-01T00:00:00 | 2021-07-01T00:00:01 |
| 1 | 2021-07-01T00:00:01 | 2021-07-01T00:00:02 |
+----------------------------------------+---------------------+---------------------+
DROP FLOW test_numbers_df_func;
Affected Rows: 0
DROP TABLE numbers_input_df_func;
Affected Rows: 0
DROP TABLE out_num_cnt_df_func;
Affected Rows: 0
-- test date_bin
CREATE TABLE numbers_input_df_func (
number INT,
ts TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
PRIMARY KEY(number),
TIME INDEX(ts)
);
Affected Rows: 0
CREATE FLOW test_numbers_df_func
SINK TO out_num_cnt_df_func
AS
SELECT max(number) - min(number) as maxmin, date_bin(INTERVAL '1 second', ts, '2021-07-01 00:00:00'::Timestamp) as time_window FROM numbers_input_df_func GROUP BY time_window;
Affected Rows: 0
admin flush_flow('test_numbers_df_func');
+------------------------------------------+
| ADMIN flush_flow('test_numbers_df_func') |
+------------------------------------------+
| 0 |
+------------------------------------------+
INSERT INTO numbers_input_df_func
VALUES
(20, "2021-07-01 00:00:00.200"),
(22, "2021-07-01 00:00:00.600");
Affected Rows: 2
admin flush_flow('test_numbers_df_func');
+------------------------------------------+
| ADMIN flush_flow('test_numbers_df_func') |
+------------------------------------------+
| 1 |
+------------------------------------------+
SELECT maxmin, time_window FROM out_num_cnt_df_func;
+--------+---------------------+
| maxmin | time_window |
+--------+---------------------+
| 2 | 2021-07-01T00:00:00 |
+--------+---------------------+
admin flush_flow('test_numbers_df_func');
+------------------------------------------+
| ADMIN flush_flow('test_numbers_df_func') |
+------------------------------------------+
| 0 |
+------------------------------------------+
INSERT INTO numbers_input_df_func
VALUES
(23,"2021-07-01 00:00:01.000"),
(24,"2021-07-01 00:00:01.500");
Affected Rows: 2
admin flush_flow('test_numbers_df_func');
+------------------------------------------+
| ADMIN flush_flow('test_numbers_df_func') |
+------------------------------------------+
| 1 |
+------------------------------------------+
SELECT maxmin, time_window FROM out_num_cnt_df_func;
+--------+---------------------+
| maxmin | time_window |
+--------+---------------------+
| 2 | 2021-07-01T00:00:00 |
| 1 | 2021-07-01T00:00:01 |
+--------+---------------------+
DROP FLOW test_numbers_df_func;
Affected Rows: 0
DROP TABLE numbers_input_df_func;
Affected Rows: 0
DROP TABLE out_num_cnt_df_func;
Affected Rows: 0
-- test date_trunc
CREATE TABLE numbers_input_df_func (
number INT,
ts TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
PRIMARY KEY(number),
TIME INDEX(ts)
);
Affected Rows: 0
CREATE FLOW test_numbers_df_func
SINK TO out_num_cnt
AS
SELECT date_trunc('second', ts) as time_window, sum(number) as sum_num FROM numbers_input_df_func GROUP BY date_trunc('second', ts);
Affected Rows: 0
admin flush_flow('test_numbers_df_func');
+------------------------------------------+
| ADMIN flush_flow('test_numbers_df_func') |
+------------------------------------------+
| 0 |
+------------------------------------------+
INSERT INTO numbers_input_df_func
VALUES
(20, "2021-07-01 00:00:00.200"),
(22, "2021-07-01 00:00:00.600");
Affected Rows: 2
admin flush_flow('test_numbers_df_func');
+------------------------------------------+
| ADMIN flush_flow('test_numbers_df_func') |
+------------------------------------------+
| 1 |
+------------------------------------------+
SELECT time_window, sum_num FROM out_num_cnt;
+---------------------+---------+
| time_window | sum_num |
+---------------------+---------+
| 2021-07-01T00:00:00 | 42 |
+---------------------+---------+
admin flush_flow('test_numbers_df_func');
+------------------------------------------+
| ADMIN flush_flow('test_numbers_df_func') |
+------------------------------------------+
| 0 |
+------------------------------------------+
INSERT INTO numbers_input_df_func
VALUES
(23,"2021-07-01 00:00:01.000"),
(24,"2021-07-01 00:00:01.500");
Affected Rows: 2
admin flush_flow('test_numbers_df_func');
+------------------------------------------+
| ADMIN flush_flow('test_numbers_df_func') |
+------------------------------------------+
| 1 |
+------------------------------------------+
SELECT time_window, sum_num FROM out_num_cnt;
+---------------------+---------+
| time_window | sum_num |
+---------------------+---------+
| 2021-07-01T00:00:00 | 42 |
| 2021-07-01T00:00:01 | 47 |
+---------------------+---------+
DROP FLOW test_numbers_df_func;
Affected Rows: 0
DROP TABLE numbers_input_df_func;
Affected Rows: 0
DROP TABLE out_num_cnt;
Affected Rows: 0