Files
greptimedb/tests/cases/standalone/common/flow/flow_step_aggr.sql
Zhenchi e2df38d0d1 chore: bump version to 0.14.1 (#6006)
* feat: remove own greatest fn (#5994)

* fix: prune primary key with multiple columns may use default value as statistics (#5996)

* test: incorrect test result when filtering pk with multiple columns

* fix: prune non first tag correctly

Distinguish no column and no stats and only use default value when no
column

* test: update test result

* refactor: rename test file

* test: add test for null filter

* fix: use StatValues for null counts

* test: drop table

* test: fix unstable flow test

* fix: check if memtable is empty by stats (#5989)

fix/checking-memtable-empty-and-stats:
 - **Refactor timestamp updates**: Simplified timestamp range updates in `PartitionTreeMemtable` and `TimeSeriesMemtable` by replacing `update_timestamp_range` with `fetch_max` and `fetch_min` methods for `max_timestamp` and `min_timestamp`.
   - Affected files: `partition_tree.rs`, `time_series.rs`

 - **Remove unused code**: Deleted the `update_timestamp_range` method from `WriteMetrics` and removed unnecessary imports.
   - Affected file: `stats.rs`

 - **Optimize memtable filtering**: Streamlined the check for empty memtables in `ScanRegion` by directly using `time_range`.
   - Affected file: `scan_region.rs`

* chore: bump version to 0.14.1

Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>

---------

Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>
Co-authored-by: dennis zhuang <killme2008@gmail.com>
Co-authored-by: Yingwen <realevenyag@gmail.com>
Co-authored-by: Lei, HUANG <6406592+v0y4g3r@users.noreply.github.com>
2025-04-28 07:39:49 +00:00

165 lines
3.7 KiB
SQL

CREATE TABLE access_log (
"url" STRING,
user_id BIGINT,
ts TIMESTAMP TIME INDEX,
PRIMARY KEY ("url", user_id)
);
CREATE TABLE access_log_10s (
"url" STRING,
time_window timestamp time INDEX,
state BINARY,
PRIMARY KEY ("url")
);
CREATE FLOW calc_access_log_10s SINK TO access_log_10s
AS
SELECT
"url",
date_bin('10s'::INTERVAL, ts) AS time_window,
hll(user_id) AS state
FROM
access_log
GROUP BY
"url",
time_window;
-- insert 4 rows of data
INSERT INTO access_log VALUES
("/dashboard", 1, "2025-03-04 00:00:00"),
("/dashboard", 1, "2025-03-04 00:00:01"),
("/dashboard", 2, "2025-03-04 00:00:05"),
("/not_found", 3, "2025-03-04 00:00:11"),
("/dashboard", 4, "2025-03-04 00:00:15");
-- SQLNESS REPLACE (ADMIN\sFLUSH_FLOW\('\w+'\)\s+\|\n\+-+\+\n\|\s+)[0-9]+\s+\| $1 FLOW_FLUSHED |
ADMIN FLUSH_FLOW('calc_access_log_10s');
-- query should return 3 rows
-- SQLNESS SORT_RESULT 3 1
SELECT "url", time_window FROM access_log_10s
ORDER BY
time_window;
-- use hll_count to query the approximate data in access_log_10s
-- SQLNESS SORT_RESULT 3 1
SELECT "url", time_window, hll_count(state) FROM access_log_10s
ORDER BY
time_window;
-- further, we can aggregate 10 seconds of data to every minute, by using hll_merge to merge 10 seconds of hyperloglog state
-- SQLNESS SORT_RESULT 3 1
SELECT
"url",
date_bin('1 minute'::INTERVAL, time_window) AS time_window_1m,
hll_count(hll_merge(state)) as uv_per_min
FROM
access_log_10s
GROUP BY
"url",
time_window_1m
ORDER BY
time_window_1m;
DROP FLOW calc_access_log_10s;
DROP TABLE access_log_10s;
DROP TABLE access_log;
CREATE TABLE percentile_base (
"id" INT PRIMARY KEY,
"value" DOUBLE,
ts timestamp(0) time index
);
CREATE TABLE percentile_5s (
"percentile_state" BINARY,
time_window timestamp(0) time index
);
CREATE FLOW calc_percentile_5s SINK TO percentile_5s
AS
SELECT
uddsketch_state(128, 0.01, "value") AS "value",
date_bin('5 seconds'::INTERVAL, ts) AS time_window
FROM
percentile_base
WHERE
"value" > 0 AND "value" < 70
GROUP BY
time_window;
INSERT INTO percentile_base ("id", "value", ts) VALUES
(1, 10.0, 1),
(2, 20.0, 2),
(3, 30.0, 3),
(4, 40.0, 4),
(5, 50.0, 5),
(6, 60.0, 6),
(7, 70.0, 7),
(8, 80.0, 8),
(9, 90.0, 9),
(10, 100.0, 10);
-- SQLNESS REPLACE (ADMIN\sFLUSH_FLOW\('\w+'\)\s+\|\n\+-+\+\n\|\s+)[0-9]+\s+\| $1 FLOW_FLUSHED |
ADMIN FLUSH_FLOW('calc_percentile_5s');
SELECT
time_window,
uddsketch_calc(0.99, `percentile_state`) AS p99
FROM
percentile_5s
ORDER BY
time_window;
DROP FLOW calc_percentile_5s;
DROP TABLE percentile_5s;
DROP TABLE percentile_base;
CREATE TABLE percentile_base (
"id" INT PRIMARY KEY,
"value" DOUBLE,
ts timestamp(0) time index
);
CREATE TABLE percentile_5s (
"percentile_state" BINARY,
time_window timestamp(0) time index
);
CREATE FLOW calc_percentile_5s SINK TO percentile_5s
AS
SELECT
uddsketch_state(128, 0.01, CASE WHEN "value" > 0 AND "value" < 70 THEN "value" ELSE NULL END) AS "value",
date_bin('5 seconds'::INTERVAL, ts) AS time_window
FROM
percentile_base
GROUP BY
time_window;
INSERT INTO percentile_base ("id", "value", ts) VALUES
(1, 10.0, 1),
(2, 20.0, 2),
(3, 30.0, 3),
(4, 40.0, 4),
(5, 50.0, 5),
(6, 60.0, 6),
(7, 70.0, 7),
(8, 80.0, 8),
(9, 90.0, 9),
(10, 100.0, 10);
-- SQLNESS REPLACE (ADMIN\sFLUSH_FLOW\('\w+'\)\s+\|\n\+-+\+\n\|\s+)[0-9]+\s+\| $1 FLOW_FLUSHED |
ADMIN FLUSH_FLOW('calc_percentile_5s');
SELECT
time_window,
uddsketch_calc(0.99, percentile_state) AS p99
FROM
percentile_5s
ORDER BY
time_window;
DROP FLOW calc_percentile_5s;
DROP TABLE percentile_5s;
DROP TABLE percentile_base;