Files
greptimedb/tests/cases/standalone/common/flow/flow_incremental_memtable.sql
discord9 eccd97b5c7 feat(flow): support incremental read checkpoints (#8179)
* feat: flownode inc mode

Signed-off-by: discord9 <discord9@163.com>

* chore: rename fallback reason

Signed-off-by: discord9 <discord9@163.com>

* fix: harden flow incremental checkpoints

Signed-off-by: discord9 <discord9@163.com>

* fix: address flow watermark lint

Signed-off-by: discord9 <discord9@163.com>

* fix: address flow clippy

Signed-off-by: discord9 <discord9@163.com>

* refactor: clarify incremental plan preparation

Signed-off-by: discord9 <discord9@163.com>

* refactor: per review

Signed-off-by: discord9 <discord9@163.com>

* refactor: per review

Signed-off-by: discord9 <discord9@163.com>

* test: more sqlness test

Signed-off-by: discord9 <discord9@163.com>

* refactor: per review

Signed-off-by: discord9 <discord9@163.com>

---------

Signed-off-by: discord9 <discord9@163.com>
2026-05-28 09:31:46 +00:00

67 lines
2.5 KiB
SQL

-- Validate that a flow performing an incremental aggregate read only reads memtable
-- data and does NOT re-read source rows that have already been flushed to SST after
-- a previous checkpoint.
CREATE TABLE flow_incr_memtable_input (
host_id INT,
n INT,
ts TIMESTAMP TIME INDEX,
PRIMARY KEY(host_id)
) WITH (
append_mode = 'true'
);
CREATE FLOW flow_incr_memtable SINK TO flow_incr_memtable_sink AS
SELECT
sum(n) AS total,
min(n) AS min_n,
max(n) AS max_n,
date_bin(INTERVAL '1 minute', ts, '2024-01-01 00:00:00') AS time_window
FROM
flow_incr_memtable_input
GROUP BY
time_window;
-- ==== Phase 1: initial insert + checkpoint ====
INSERT INTO flow_incr_memtable_input VALUES
(1, 10, '2024-01-01 00:00:00'),
(2, 20, '2024-01-01 00:00:30');
-- SQLNESS REPLACE (ADMIN\sFLUSH_FLOW\('\w+'\)\s+\|\n\+-+\+\n\|\s+)[0-9]+\s+\| $1 FLOW_FLUSHED |
ADMIN FLUSH_FLOW('flow_incr_memtable');
SELECT total, min_n, max_n, time_window FROM flow_incr_memtable_sink ORDER BY time_window;
-- ==== Phase 2: flush sink and source tables to SST ====
-- The next incremental run must still read the flushed sink aggregate state,
-- while skipping already-checkpointed source SST files.
ADMIN FLUSH_TABLE('flow_incr_memtable_sink');
ADMIN FLUSH_TABLE('flow_incr_memtable_input');
-- ==== Phase 3: empty incremental window ====
-- Flush the flow without inserting any new source rows to verify that
-- the incremental read correctly handles the case where no new memtable
-- data exists.
-- SQLNESS REPLACE (ADMIN\sFLUSH_FLOW\('\w+'\)\s+\|\n\+-+\+\n\|\s+)[0-9]+\s+\| $1 FLOW_FLUSHED |
ADMIN FLUSH_FLOW('flow_incr_memtable');
SELECT total, min_n, max_n, time_window FROM flow_incr_memtable_sink ORDER BY time_window;
-- ==== Phase 4: insert new delta within the same time window ====
INSERT INTO flow_incr_memtable_input VALUES
(3, 30, '2024-01-01 00:00:15'),
(4, 40, '2024-01-01 00:00:45');
-- ==== Phase 5: flush flow again (incremental read) ====
-- The flow must only read the new memtable delta and merge with the existing
-- sink aggregate. If it mistakenly re-reads the SST, the result will be
-- inflated (initial data counted twice).
-- SQLNESS REPLACE (ADMIN\sFLUSH_FLOW\('\w+'\)\s+\|\n\+-+\+\n\|\s+)[0-9]+\s+\| $1 FLOW_FLUSHED |
ADMIN FLUSH_FLOW('flow_incr_memtable');
SELECT total, min_n, max_n, time_window FROM flow_incr_memtable_sink ORDER BY time_window;
-- Clean up
DROP FLOW flow_incr_memtable;
DROP TABLE flow_incr_memtable_input;
DROP TABLE flow_incr_memtable_sink;