Files
greptimedb/tests/cases/standalone/common/flow/flow_call_df_func.sql
discord9 9f9c1dab60 feat(flow): use DataFusion's optimizer (#4489)
* feat: use datafusion optimization

refactor: mv `sql_to_flow_plan` elsewhere

feat(WIP): use df optimization

WIP analyzer rule

feat(WIP): avg expander

fix: transform avg expander

fix: avg expand

feat: names from substrait

fix: avg rewrite

test: update `test_avg`&`test_avg_group_by`

test: fix `test_sum`

test: fix some tests

chore: remove unused flow plan transform

feat: tumble expander

test: update tests

* chore: clippy

* fix: tumble lose `group expr`

* test: sqlness test update

* test: rm unused cast

* test: simplify sqlness

* refactor: per review

* chore: after rebase

* fix: remove a outdated test

* test: add comment

* fix: report error when not literal

* chore: update sqlness test after rebase

* refactor: per review
2024-08-29 02:52:00 +00:00

161 lines
4.5 KiB
SQL

CREATE TABLE numbers_input_df_func (
number INT,
ts TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
PRIMARY KEY(number),
TIME INDEX(ts)
);
-- call `sum(abs(number))` where `abs` is DataFusion Function and `sum` is flow function
CREATE FLOW test_numbers_df_func
SINK TO out_num_cnt_df_func
AS
SELECT sum(abs(number)) FROM numbers_input_df_func GROUP BY tumble(ts, '1 second', '2021-07-01 00:00:00');
admin flush_flow('test_numbers_df_func');
INSERT INTO numbers_input_df_func
VALUES
(-20, "2021-07-01 00:00:00.200"),
(22, "2021-07-01 00:00:00.600");
-- flush flow to make sure that table is created and data is inserted
admin flush_flow('test_numbers_df_func');
-- note that this quote-unquote column is a column-name, **not** a aggregation expr, generated by datafusion
SELECT "SUM(abs(numbers_input_df_func.number))", window_start, window_end FROM out_num_cnt_df_func;
admin flush_flow('test_numbers_df_func');
INSERT INTO numbers_input_df_func
VALUES
(23,"2021-07-01 00:00:01.000"),
(-24,"2021-07-01 00:00:01.500");
admin flush_flow('test_numbers_df_func');
-- note that this quote-unquote column is a column-name, **not** a aggregation expr, generated by datafusion
SELECT "SUM(abs(numbers_input_df_func.number))", window_start, window_end FROM out_num_cnt_df_func;
DROP FLOW test_numbers_df_func;
DROP TABLE numbers_input_df_func;
DROP TABLE out_num_cnt_df_func;
CREATE TABLE numbers_input_df_func (
number INT,
ts TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
PRIMARY KEY(number),
TIME INDEX(ts)
);
-- call `abs(sum(number))`to make sure that calling `abs` function(impl by datafusion) on `sum` function(impl by flow) is working
CREATE FLOW test_numbers_df_func
SINK TO out_num_cnt_df_func
AS
SELECT abs(sum(number)) FROM numbers_input_df_func GROUP BY tumble(ts, '1 second', '2021-07-01 00:00:00');
admin flush_flow('test_numbers_df_func');
INSERT INTO numbers_input_df_func
VALUES
(-20, "2021-07-01 00:00:00.200"),
(22, "2021-07-01 00:00:00.600");
-- flush flow to make sure that table is created and data is inserted
admin flush_flow('test_numbers_df_func');
SELECT "abs(SUM(numbers_input_df_func.number))", window_start, window_end FROM out_num_cnt_df_func;
admin flush_flow('test_numbers_df_func');
INSERT INTO numbers_input_df_func
VALUES
(23,"2021-07-01 00:00:01.000"),
(-24,"2021-07-01 00:00:01.500");
admin flush_flow('test_numbers_df_func');
SELECT "abs(SUM(numbers_input_df_func.number))", window_start, window_end FROM out_num_cnt_df_func;
DROP FLOW test_numbers_df_func;
DROP TABLE numbers_input_df_func;
DROP TABLE out_num_cnt_df_func;
-- test date_bin
CREATE TABLE numbers_input_df_func (
number INT,
ts TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
PRIMARY KEY(number),
TIME INDEX(ts)
);
CREATE FLOW test_numbers_df_func
SINK TO out_num_cnt_df_func
AS
SELECT max(number) - min(number) as maxmin, date_bin(INTERVAL '1 second', ts, '2021-07-01 00:00:00'::Timestamp) as time_window FROM numbers_input_df_func GROUP BY time_window;
admin flush_flow('test_numbers_df_func');
INSERT INTO numbers_input_df_func
VALUES
(20, "2021-07-01 00:00:00.200"),
(22, "2021-07-01 00:00:00.600");
admin flush_flow('test_numbers_df_func');
SELECT maxmin, time_window FROM out_num_cnt_df_func;
admin flush_flow('test_numbers_df_func');
INSERT INTO numbers_input_df_func
VALUES
(23,"2021-07-01 00:00:01.000"),
(24,"2021-07-01 00:00:01.500");
admin flush_flow('test_numbers_df_func');
SELECT maxmin, time_window FROM out_num_cnt_df_func;
DROP FLOW test_numbers_df_func;
DROP TABLE numbers_input_df_func;
DROP TABLE out_num_cnt_df_func;
-- test date_trunc
CREATE TABLE numbers_input_df_func (
number INT,
ts TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
PRIMARY KEY(number),
TIME INDEX(ts)
);
CREATE FLOW test_numbers_df_func
SINK TO out_num_cnt
AS
SELECT date_trunc('second', ts) as time_window, sum(number) as sum_num FROM numbers_input_df_func GROUP BY date_trunc('second', ts);
admin flush_flow('test_numbers_df_func');
INSERT INTO numbers_input_df_func
VALUES
(20, "2021-07-01 00:00:00.200"),
(22, "2021-07-01 00:00:00.600");
admin flush_flow('test_numbers_df_func');
SELECT time_window, sum_num FROM out_num_cnt;
admin flush_flow('test_numbers_df_func');
INSERT INTO numbers_input_df_func
VALUES
(23,"2021-07-01 00:00:01.000"),
(24,"2021-07-01 00:00:01.500");
admin flush_flow('test_numbers_df_func');
SELECT time_window, sum_num FROM out_num_cnt;
DROP FLOW test_numbers_df_func;
DROP TABLE numbers_input_df_func;
DROP TABLE out_num_cnt;