mirror of
https://github.com/GreptimeTeam/greptimedb.git
synced 2026-05-31 12:20:38 +00:00
* feat: flownode inc mode Signed-off-by: discord9 <discord9@163.com> * chore: rename fallback reason Signed-off-by: discord9 <discord9@163.com> * fix: harden flow incremental checkpoints Signed-off-by: discord9 <discord9@163.com> * fix: address flow watermark lint Signed-off-by: discord9 <discord9@163.com> * fix: address flow clippy Signed-off-by: discord9 <discord9@163.com> * refactor: clarify incremental plan preparation Signed-off-by: discord9 <discord9@163.com> * refactor: per review Signed-off-by: discord9 <discord9@163.com> * refactor: per review Signed-off-by: discord9 <discord9@163.com> * test: more sqlness test Signed-off-by: discord9 <discord9@163.com> * refactor: per review Signed-off-by: discord9 <discord9@163.com> --------- Signed-off-by: discord9 <discord9@163.com>
67 lines
2.5 KiB
SQL
67 lines
2.5 KiB
SQL
-- Validate that a flow performing an incremental aggregate read only reads memtable
|
|
-- data and does NOT re-read source rows that have already been flushed to SST after
|
|
-- a previous checkpoint.
|
|
CREATE TABLE flow_incr_memtable_input (
|
|
host_id INT,
|
|
n INT,
|
|
ts TIMESTAMP TIME INDEX,
|
|
PRIMARY KEY(host_id)
|
|
) WITH (
|
|
append_mode = 'true'
|
|
);
|
|
|
|
CREATE FLOW flow_incr_memtable SINK TO flow_incr_memtable_sink AS
|
|
SELECT
|
|
sum(n) AS total,
|
|
min(n) AS min_n,
|
|
max(n) AS max_n,
|
|
date_bin(INTERVAL '1 minute', ts, '2024-01-01 00:00:00') AS time_window
|
|
FROM
|
|
flow_incr_memtable_input
|
|
GROUP BY
|
|
time_window;
|
|
|
|
-- ==== Phase 1: initial insert + checkpoint ====
|
|
INSERT INTO flow_incr_memtable_input VALUES
|
|
(1, 10, '2024-01-01 00:00:00'),
|
|
(2, 20, '2024-01-01 00:00:30');
|
|
|
|
-- SQLNESS REPLACE (ADMIN\sFLUSH_FLOW\('\w+'\)\s+\|\n\+-+\+\n\|\s+)[0-9]+\s+\| $1 FLOW_FLUSHED |
|
|
ADMIN FLUSH_FLOW('flow_incr_memtable');
|
|
|
|
SELECT total, min_n, max_n, time_window FROM flow_incr_memtable_sink ORDER BY time_window;
|
|
|
|
-- ==== Phase 2: flush sink and source tables to SST ====
|
|
-- The next incremental run must still read the flushed sink aggregate state,
|
|
-- while skipping already-checkpointed source SST files.
|
|
ADMIN FLUSH_TABLE('flow_incr_memtable_sink');
|
|
ADMIN FLUSH_TABLE('flow_incr_memtable_input');
|
|
|
|
-- ==== Phase 3: empty incremental window ====
|
|
-- Flush the flow without inserting any new source rows to verify that
|
|
-- the incremental read correctly handles the case where no new memtable
|
|
-- data exists.
|
|
-- SQLNESS REPLACE (ADMIN\sFLUSH_FLOW\('\w+'\)\s+\|\n\+-+\+\n\|\s+)[0-9]+\s+\| $1 FLOW_FLUSHED |
|
|
ADMIN FLUSH_FLOW('flow_incr_memtable');
|
|
|
|
SELECT total, min_n, max_n, time_window FROM flow_incr_memtable_sink ORDER BY time_window;
|
|
|
|
-- ==== Phase 4: insert new delta within the same time window ====
|
|
INSERT INTO flow_incr_memtable_input VALUES
|
|
(3, 30, '2024-01-01 00:00:15'),
|
|
(4, 40, '2024-01-01 00:00:45');
|
|
|
|
-- ==== Phase 5: flush flow again (incremental read) ====
|
|
-- The flow must only read the new memtable delta and merge with the existing
|
|
-- sink aggregate. If it mistakenly re-reads the SST, the result will be
|
|
-- inflated (initial data counted twice).
|
|
-- SQLNESS REPLACE (ADMIN\sFLUSH_FLOW\('\w+'\)\s+\|\n\+-+\+\n\|\s+)[0-9]+\s+\| $1 FLOW_FLUSHED |
|
|
ADMIN FLUSH_FLOW('flow_incr_memtable');
|
|
|
|
SELECT total, min_n, max_n, time_window FROM flow_incr_memtable_sink ORDER BY time_window;
|
|
|
|
-- Clean up
|
|
DROP FLOW flow_incr_memtable;
|
|
DROP TABLE flow_incr_memtable_input;
|
|
DROP TABLE flow_incr_memtable_sink;
|