Files
greptimedb/tests/cases/standalone/common/promql/stats_schema_mismatch_regression.sql
2026-04-15 03:38:01 +00:00

134 lines
5.6 KiB
SQL

-- Regression tests for PromQL execution plans that return mismatched
-- `Statistics::column_statistics` length.
--
-- DataFusion expects: `statistics.column_statistics.len() == schema.fields().len()`.
-- If not, `FilterExec` may fail while building `ExprBoundaries` with:
-- "Could not create `ExprBoundaries`: ... col_index has gone out of bounds ..."
-- -----------------------------------------------------------------------------
-- Case 1: HistogramFoldExec + topk(histogram_quantile(...)) on metric engine
-- -----------------------------------------------------------------------------
CREATE TABLE promql_stats_mismatch_physical (
ts TIMESTAMP(3) TIME INDEX,
val DOUBLE,
) ENGINE = metric WITH ("physical_metric_table" = "");
CREATE TABLE promql_stats_mismatch_hist_bucket (
`cluster` STRING NULL,
le STRING NULL,
instance STRING NULL,
operation STRING NULL,
`type` STRING NULL,
ts TIMESTAMP(3) NOT NULL,
val DOUBLE NULL,
TIME INDEX (ts),
PRIMARY KEY(`cluster`, le, instance, operation, `type`),
)
ENGINE = metric
WITH(
on_physical_table = 'promql_stats_mismatch_physical'
);
-- Counter samples at t=1ms and t=5m (300000ms). `rate(...[5m])` uses the delta in this window.
INSERT INTO promql_stats_mismatch_hist_bucket (`cluster`, le, instance, operation, `type`, ts, val) VALUES
-- t = 1ms (avoid range-start boundary exclusion)
('cluster', '0.5', 'inst1', 'op', 't', 1, 0),
('cluster', '1', 'inst1', 'op', 't', 1, 0),
('cluster', '2', 'inst1', 'op', 't', 1, 0),
('cluster', '+Inf', 'inst1', 'op', 't', 1, 0),
('cluster', '0.5', 'inst2', 'op', 't', 1, 0),
('cluster', '1', 'inst2', 'op', 't', 1, 0),
('cluster', '2', 'inst2', 'op', 't', 1, 0),
('cluster', '+Inf', 'inst2', 'op', 't', 1, 0),
('cluster', '0.5', 'inst3', 'op', 't', 1, 0),
('cluster', '1', 'inst3', 'op', 't', 1, 0),
('cluster', '2', 'inst3', 'op', 't', 1, 0),
('cluster', '+Inf', 'inst3', 'op', 't', 1, 0),
('cluster', '0.5', 'inst4', 'op', 't', 1, 0),
('cluster', '1', 'inst4', 'op', 't', 1, 0),
('cluster', '2', 'inst4', 'op', 't', 1, 0),
('cluster', '+Inf', 'inst4', 'op', 't', 1, 0),
('cluster', '0.5', 'inst5', 'op', 't', 1, 0),
('cluster', '1', 'inst5', 'op', 't', 1, 0),
('cluster', '2', 'inst5', 'op', 't', 1, 0),
('cluster', '+Inf', 'inst5', 'op', 't', 1, 0),
('cluster', '0.5', 'inst6', 'op', 't', 1, 0),
('cluster', '1', 'inst6', 'op', 't', 1, 0),
('cluster', '2', 'inst6', 'op', 't', 1, 0),
('cluster', '+Inf', 'inst6', 'op', 't', 1, 0),
-- t = 300000ms (5m)
('cluster', '0.5', 'inst1', 'op', 't', 300000, 95),
('cluster', '1', 'inst1', 'op', 't', 300000, 98),
('cluster', '2', 'inst1', 'op', 't', 300000, 100),
('cluster', '+Inf', 'inst1', 'op', 't', 300000, 100),
('cluster', '0.5', 'inst2', 'op', 't', 300000, 50),
('cluster', '1', 'inst2', 'op', 't', 300000, 95),
('cluster', '2', 'inst2', 'op', 't', 300000, 100),
('cluster', '+Inf', 'inst2', 'op', 't', 300000, 100),
('cluster', '0.5', 'inst3', 'op', 't', 300000, 50),
('cluster', '1', 'inst3', 'op', 't', 300000, 75),
('cluster', '2', 'inst3', 'op', 't', 300000, 100),
('cluster', '+Inf', 'inst3', 'op', 't', 300000, 100),
('cluster', '0.5', 'inst4', 'op', 't', 300000, 50),
('cluster', '1', 'inst4', 'op', 't', 300000, 80),
('cluster', '2', 'inst4', 'op', 't', 300000, 97),
('cluster', '+Inf', 'inst4', 'op', 't', 300000, 100),
('cluster', '0.5', 'inst5', 'op', 't', 300000, 10),
('cluster', '1', 'inst5', 'op', 't', 300000, 20),
('cluster', '2', 'inst5', 'op', 't', 300000, 100),
('cluster', '+Inf', 'inst5', 'op', 't', 300000, 100),
('cluster', '0.5', 'inst6', 'op', 't', 300000, 0),
('cluster', '1', 'inst6', 'op', 't', 300000, 0),
('cluster', '2', 'inst6', 'op', 't', 300000, 100),
('cluster', '+Inf', 'inst6', 'op', 't', 300000, 100);
-- This used to error due to HistogramFoldExec returning `column_statistics` with wrong length.
TQL EVAL (300, 300, '300s')
topk(
5,
histogram_quantile(
0.95,
sum(rate(promql_stats_mismatch_hist_bucket{cluster="cluster"}[5m])) by (cluster, le, instance, operation, type)
)
) AS q95;
DROP TABLE promql_stats_mismatch_hist_bucket;
DROP TABLE promql_stats_mismatch_physical;
-- -----------------------------------------------------------------------------
-- Case 2: InstantManipulateExec stats mismatch with nested Arrow fields
-- -----------------------------------------------------------------------------
--
-- Metric engine enforces float64 field + string tags, so it can't create a nested schema
-- needed to reproduce the `flattened_fields().len()` vs `fields().len()` mismatch.
-- Use a normal table with a structured JSON (Arrow Struct) field column instead.
CREATE TABLE promql_instant_mismatch_nested (
ts TIMESTAMP(3) TIME INDEX,
k STRING PRIMARY KEY,
v JSON2,
);
-- TODO(LFC): Uncomment the following SQLs and results when JSON2 is ready.
-- INSERT INTO promql_instant_mismatch_nested VALUES
-- (0, 'a', '{"x": 1}'),
-- (1000, 'a', '{"x": 2}');
-- Affected Rows: 0
-- This used to error due to InstantManipulateExec returning `column_statistics` sized by
-- `schema.flattened_fields().len()` when the schema contains nested fields (Arrow Struct).
-- SQLNESS SORT_RESULT 3 1
-- TQL EVAL (0, 1, '1s') promql_instant_mismatch_nested == promql_instant_mismatch_nested;
-- +---------------------+--------+---+
-- | ts | v | k |
-- +---------------------+--------+---+
-- | 1970-01-01T00:00:00 | {x: 1} | a |
-- | 1970-01-01T00:00:01 | {x: 2} | a |
-- +---------------------+--------+---+
DROP TABLE promql_instant_mismatch_nested;