mirror of
https://github.com/GreptimeTeam/greptimedb.git
synced 2026-05-19 06:20:38 +00:00
134 lines
5.6 KiB
SQL
134 lines
5.6 KiB
SQL
-- Regression tests for PromQL execution plans that return mismatched
|
|
-- `Statistics::column_statistics` length.
|
|
--
|
|
-- DataFusion expects: `statistics.column_statistics.len() == schema.fields().len()`.
|
|
-- If not, `FilterExec` may fail while building `ExprBoundaries` with:
|
|
-- "Could not create `ExprBoundaries`: ... col_index has gone out of bounds ..."
|
|
|
|
-- -----------------------------------------------------------------------------
|
|
-- Case 1: HistogramFoldExec + topk(histogram_quantile(...)) on metric engine
|
|
-- -----------------------------------------------------------------------------
|
|
|
|
CREATE TABLE promql_stats_mismatch_physical (
|
|
ts TIMESTAMP(3) TIME INDEX,
|
|
val DOUBLE,
|
|
) ENGINE = metric WITH ("physical_metric_table" = "");
|
|
|
|
CREATE TABLE promql_stats_mismatch_hist_bucket (
|
|
`cluster` STRING NULL,
|
|
le STRING NULL,
|
|
instance STRING NULL,
|
|
operation STRING NULL,
|
|
`type` STRING NULL,
|
|
ts TIMESTAMP(3) NOT NULL,
|
|
val DOUBLE NULL,
|
|
TIME INDEX (ts),
|
|
PRIMARY KEY(`cluster`, le, instance, operation, `type`),
|
|
)
|
|
ENGINE = metric
|
|
WITH(
|
|
on_physical_table = 'promql_stats_mismatch_physical'
|
|
);
|
|
|
|
-- Counter samples at t=1ms and t=5m (300000ms). `rate(...[5m])` uses the delta in this window.
|
|
INSERT INTO promql_stats_mismatch_hist_bucket (`cluster`, le, instance, operation, `type`, ts, val) VALUES
|
|
-- t = 1ms (avoid range-start boundary exclusion)
|
|
('cluster', '0.5', 'inst1', 'op', 't', 1, 0),
|
|
('cluster', '1', 'inst1', 'op', 't', 1, 0),
|
|
('cluster', '2', 'inst1', 'op', 't', 1, 0),
|
|
('cluster', '+Inf', 'inst1', 'op', 't', 1, 0),
|
|
('cluster', '0.5', 'inst2', 'op', 't', 1, 0),
|
|
('cluster', '1', 'inst2', 'op', 't', 1, 0),
|
|
('cluster', '2', 'inst2', 'op', 't', 1, 0),
|
|
('cluster', '+Inf', 'inst2', 'op', 't', 1, 0),
|
|
('cluster', '0.5', 'inst3', 'op', 't', 1, 0),
|
|
('cluster', '1', 'inst3', 'op', 't', 1, 0),
|
|
('cluster', '2', 'inst3', 'op', 't', 1, 0),
|
|
('cluster', '+Inf', 'inst3', 'op', 't', 1, 0),
|
|
('cluster', '0.5', 'inst4', 'op', 't', 1, 0),
|
|
('cluster', '1', 'inst4', 'op', 't', 1, 0),
|
|
('cluster', '2', 'inst4', 'op', 't', 1, 0),
|
|
('cluster', '+Inf', 'inst4', 'op', 't', 1, 0),
|
|
('cluster', '0.5', 'inst5', 'op', 't', 1, 0),
|
|
('cluster', '1', 'inst5', 'op', 't', 1, 0),
|
|
('cluster', '2', 'inst5', 'op', 't', 1, 0),
|
|
('cluster', '+Inf', 'inst5', 'op', 't', 1, 0),
|
|
('cluster', '0.5', 'inst6', 'op', 't', 1, 0),
|
|
('cluster', '1', 'inst6', 'op', 't', 1, 0),
|
|
('cluster', '2', 'inst6', 'op', 't', 1, 0),
|
|
('cluster', '+Inf', 'inst6', 'op', 't', 1, 0),
|
|
-- t = 300000ms (5m)
|
|
('cluster', '0.5', 'inst1', 'op', 't', 300000, 95),
|
|
('cluster', '1', 'inst1', 'op', 't', 300000, 98),
|
|
('cluster', '2', 'inst1', 'op', 't', 300000, 100),
|
|
('cluster', '+Inf', 'inst1', 'op', 't', 300000, 100),
|
|
('cluster', '0.5', 'inst2', 'op', 't', 300000, 50),
|
|
('cluster', '1', 'inst2', 'op', 't', 300000, 95),
|
|
('cluster', '2', 'inst2', 'op', 't', 300000, 100),
|
|
('cluster', '+Inf', 'inst2', 'op', 't', 300000, 100),
|
|
('cluster', '0.5', 'inst3', 'op', 't', 300000, 50),
|
|
('cluster', '1', 'inst3', 'op', 't', 300000, 75),
|
|
('cluster', '2', 'inst3', 'op', 't', 300000, 100),
|
|
('cluster', '+Inf', 'inst3', 'op', 't', 300000, 100),
|
|
('cluster', '0.5', 'inst4', 'op', 't', 300000, 50),
|
|
('cluster', '1', 'inst4', 'op', 't', 300000, 80),
|
|
('cluster', '2', 'inst4', 'op', 't', 300000, 97),
|
|
('cluster', '+Inf', 'inst4', 'op', 't', 300000, 100),
|
|
('cluster', '0.5', 'inst5', 'op', 't', 300000, 10),
|
|
('cluster', '1', 'inst5', 'op', 't', 300000, 20),
|
|
('cluster', '2', 'inst5', 'op', 't', 300000, 100),
|
|
('cluster', '+Inf', 'inst5', 'op', 't', 300000, 100),
|
|
('cluster', '0.5', 'inst6', 'op', 't', 300000, 0),
|
|
('cluster', '1', 'inst6', 'op', 't', 300000, 0),
|
|
('cluster', '2', 'inst6', 'op', 't', 300000, 100),
|
|
('cluster', '+Inf', 'inst6', 'op', 't', 300000, 100);
|
|
|
|
-- This used to error due to HistogramFoldExec returning `column_statistics` with wrong length.
|
|
TQL EVAL (300, 300, '300s')
|
|
topk(
|
|
5,
|
|
histogram_quantile(
|
|
0.95,
|
|
sum(rate(promql_stats_mismatch_hist_bucket{cluster="cluster"}[5m])) by (cluster, le, instance, operation, type)
|
|
)
|
|
) AS q95;
|
|
|
|
DROP TABLE promql_stats_mismatch_hist_bucket;
|
|
DROP TABLE promql_stats_mismatch_physical;
|
|
|
|
-- -----------------------------------------------------------------------------
|
|
-- Case 2: InstantManipulateExec stats mismatch with nested Arrow fields
|
|
-- -----------------------------------------------------------------------------
|
|
--
|
|
-- Metric engine enforces float64 field + string tags, so it can't create a nested schema
|
|
-- needed to reproduce the `flattened_fields().len()` vs `fields().len()` mismatch.
|
|
-- Use a normal table with a structured JSON (Arrow Struct) field column instead.
|
|
|
|
CREATE TABLE promql_instant_mismatch_nested (
|
|
ts TIMESTAMP(3) TIME INDEX,
|
|
k STRING PRIMARY KEY,
|
|
v JSON2,
|
|
);
|
|
|
|
-- TODO(LFC): Uncomment the following SQLs and results when JSON2 is ready.
|
|
|
|
-- INSERT INTO promql_instant_mismatch_nested VALUES
|
|
-- (0, 'a', '{"x": 1}'),
|
|
-- (1000, 'a', '{"x": 2}');
|
|
|
|
-- Affected Rows: 0
|
|
|
|
-- This used to error due to InstantManipulateExec returning `column_statistics` sized by
|
|
-- `schema.flattened_fields().len()` when the schema contains nested fields (Arrow Struct).
|
|
-- SQLNESS SORT_RESULT 3 1
|
|
-- TQL EVAL (0, 1, '1s') promql_instant_mismatch_nested == promql_instant_mismatch_nested;
|
|
|
|
-- +---------------------+--------+---+
|
|
-- | ts | v | k |
|
|
-- +---------------------+--------+---+
|
|
-- | 1970-01-01T00:00:00 | {x: 1} | a |
|
|
-- | 1970-01-01T00:00:01 | {x: 2} | a |
|
|
-- +---------------------+--------+---+
|
|
|
|
DROP TABLE promql_instant_mismatch_nested;
|