mirror of
https://github.com/GreptimeTeam/greptimedb.git
synced 2026-06-01 21:00:38 +00:00
* fix(flow): harden incremental read correctness Signed-off-by: discord9 <discord9@163.com> * fix(flow): propagate dirty window options Signed-off-by: discord9 <discord9@163.com> * test: more Signed-off-by: discord9 <discord9@163.com> * chore: test config api Signed-off-by: discord9 <discord9@163.com> * refactor: split gen Signed-off-by: discord9 <discord9@163.com> * chore: per review Signed-off-by: discord9 <discord9@163.com> * fix: allowlist key Signed-off-by: discord9 <discord9@163.com> --------- Signed-off-by: discord9 <discord9@163.com>
135 lines
4.4 KiB
Plaintext
135 lines
4.4 KiB
Plaintext
-- Validate that a flow performing an incremental aggregate read only reads memtable
|
|
-- data and does NOT re-read source rows that have already been flushed to SST after
|
|
-- a previous checkpoint.
|
|
CREATE TABLE flow_incr_memtable_input (
|
|
host_id INT,
|
|
n INT,
|
|
ts TIMESTAMP TIME INDEX,
|
|
PRIMARY KEY(host_id)
|
|
) WITH (
|
|
append_mode = 'true'
|
|
);
|
|
|
|
Affected Rows: 0
|
|
|
|
CREATE FLOW flow_incr_memtable SINK TO flow_incr_memtable_sink
|
|
WITH (experimental_enable_incremental_read = 'true')
|
|
AS
|
|
SELECT
|
|
sum(n) AS total,
|
|
min(n) AS min_n,
|
|
max(n) AS max_n,
|
|
date_bin(INTERVAL '1 minute', ts, '2024-01-01 00:00:00') AS time_window
|
|
FROM
|
|
flow_incr_memtable_input
|
|
GROUP BY
|
|
time_window;
|
|
|
|
Affected Rows: 0
|
|
|
|
-- ==== Phase 1: initial insert + checkpoint ====
|
|
INSERT INTO flow_incr_memtable_input VALUES
|
|
(1, 10, '2024-01-01 00:00:00'),
|
|
(2, 20, '2024-01-01 00:00:30');
|
|
|
|
Affected Rows: 2
|
|
|
|
-- SQLNESS REPLACE (ADMIN\sFLUSH_FLOW\('\w+'\)\s+\|\n\+-+\+\n\|\s+)[0-9]+\s+\| $1 FLOW_FLUSHED |
|
|
ADMIN FLUSH_FLOW('flow_incr_memtable');
|
|
|
|
+----------------------------------------+
|
|
| ADMIN FLUSH_FLOW('flow_incr_memtable') |
|
|
+----------------------------------------+
|
|
| FLOW_FLUSHED |
|
|
+----------------------------------------+
|
|
|
|
SELECT total, min_n, max_n, time_window FROM flow_incr_memtable_sink ORDER BY time_window;
|
|
|
|
+-------+-------+-------+---------------------+
|
|
| total | min_n | max_n | time_window |
|
|
+-------+-------+-------+---------------------+
|
|
| 30 | 10 | 20 | 2024-01-01T00:00:00 |
|
|
+-------+-------+-------+---------------------+
|
|
|
|
-- ==== Phase 2: flush sink and source tables to SST ====
|
|
-- The next incremental run must still read the flushed sink aggregate state,
|
|
-- while skipping already-checkpointed source SST files.
|
|
ADMIN FLUSH_TABLE('flow_incr_memtable_sink');
|
|
|
|
+----------------------------------------------+
|
|
| ADMIN FLUSH_TABLE('flow_incr_memtable_sink') |
|
|
+----------------------------------------------+
|
|
| 0 |
|
|
+----------------------------------------------+
|
|
|
|
ADMIN FLUSH_TABLE('flow_incr_memtable_input');
|
|
|
|
+-----------------------------------------------+
|
|
| ADMIN FLUSH_TABLE('flow_incr_memtable_input') |
|
|
+-----------------------------------------------+
|
|
| 0 |
|
|
+-----------------------------------------------+
|
|
|
|
-- ==== Phase 3: empty incremental window ====
|
|
-- Flush the flow without inserting any new source rows to verify that
|
|
-- the incremental read correctly handles the case where no new memtable
|
|
-- data exists.
|
|
-- SQLNESS REPLACE (ADMIN\sFLUSH_FLOW\('\w+'\)\s+\|\n\+-+\+\n\|\s+)[0-9]+\s+\| $1 FLOW_FLUSHED |
|
|
ADMIN FLUSH_FLOW('flow_incr_memtable');
|
|
|
|
+----------------------------------------+
|
|
| ADMIN FLUSH_FLOW('flow_incr_memtable') |
|
|
+----------------------------------------+
|
|
| FLOW_FLUSHED |
|
|
+----------------------------------------+
|
|
|
|
SELECT total, min_n, max_n, time_window FROM flow_incr_memtable_sink ORDER BY time_window;
|
|
|
|
+-------+-------+-------+---------------------+
|
|
| total | min_n | max_n | time_window |
|
|
+-------+-------+-------+---------------------+
|
|
| 30 | 10 | 20 | 2024-01-01T00:00:00 |
|
|
+-------+-------+-------+---------------------+
|
|
|
|
-- ==== Phase 4: insert new delta within the same time window ====
|
|
INSERT INTO flow_incr_memtable_input VALUES
|
|
(3, 30, '2024-01-01 00:00:15'),
|
|
(4, 40, '2024-01-01 00:00:45');
|
|
|
|
Affected Rows: 2
|
|
|
|
-- ==== Phase 5: flush flow again (incremental read) ====
|
|
-- The flow must only read the new memtable delta and merge with the existing
|
|
-- sink aggregate. If it mistakenly re-reads the SST, the result will be
|
|
-- inflated (initial data counted twice).
|
|
-- SQLNESS REPLACE (ADMIN\sFLUSH_FLOW\('\w+'\)\s+\|\n\+-+\+\n\|\s+)[0-9]+\s+\| $1 FLOW_FLUSHED |
|
|
ADMIN FLUSH_FLOW('flow_incr_memtable');
|
|
|
|
+----------------------------------------+
|
|
| ADMIN FLUSH_FLOW('flow_incr_memtable') |
|
|
+----------------------------------------+
|
|
| FLOW_FLUSHED |
|
|
+----------------------------------------+
|
|
|
|
SELECT total, min_n, max_n, time_window FROM flow_incr_memtable_sink ORDER BY time_window;
|
|
|
|
+-------+-------+-------+---------------------+
|
|
| total | min_n | max_n | time_window |
|
|
+-------+-------+-------+---------------------+
|
|
| 100 | 10 | 40 | 2024-01-01T00:00:00 |
|
|
+-------+-------+-------+---------------------+
|
|
|
|
-- Clean up
|
|
DROP FLOW flow_incr_memtable;
|
|
|
|
Affected Rows: 0
|
|
|
|
DROP TABLE flow_incr_memtable_input;
|
|
|
|
Affected Rows: 0
|
|
|
|
DROP TABLE flow_incr_memtable_sink;
|
|
|
|
Affected Rows: 0
|
|
|