From 2f82e7525f8c7e99e623331254ef08927f7b99c1 Mon Sep 17 00:00:00 2001 From: Ruihang Xia Date: Fri, 23 Jan 2026 15:46:46 +0800 Subject: [PATCH] fix: correct histogram result (#7608) correct result by removing tsid Signed-off-by: Ruihang Xia --- src/query/src/promql/planner.rs | 5 + .../tsid_histogram_quantile_regression.result | 153 ++++++++++++++++++ .../tsid_histogram_quantile_regression.sql | 81 ++++++++++ 3 files changed, 239 insertions(+) create mode 100644 tests/cases/standalone/common/promql/tsid_histogram_quantile_regression.result create mode 100644 tests/cases/standalone/common/promql/tsid_histogram_quantile_regression.sql diff --git a/src/query/src/promql/planner.rs b/src/query/src/promql/planner.rs index 1a5e12d1ac..91deb507ce 100644 --- a/src/query/src/promql/planner.rs +++ b/src/query/src/promql/planner.rs @@ -2949,6 +2949,11 @@ impl PromPlanner { let input = args.args[1].as_ref().clone(); let input_plan = self.prom_expr_to_plan(&input, query_engine_state).await?; + // `histogram_quantile` folds buckets across `le`, so `__tsid` (which includes `le`) is not + // a stable series identifier anymore. Also, HistogramFold infers label columns from the + // input schema and must not treat `__tsid` as a label column. + let input_plan = self.strip_tsid_column(input_plan)?; + self.ctx.use_tsid = false; if !self.ctx.has_le_tag() { // Return empty result instead of error when 'le' column is not found diff --git a/tests/cases/standalone/common/promql/tsid_histogram_quantile_regression.result b/tests/cases/standalone/common/promql/tsid_histogram_quantile_regression.result new file mode 100644 index 0000000000..e69c23851b --- /dev/null +++ b/tests/cases/standalone/common/promql/tsid_histogram_quantile_regression.result @@ -0,0 +1,153 @@ +-- Regression test for `__tsid` optimization on non-aggregating PromQL queries. +-- The `__tsid` column must not affect operators that infer label columns from the input schema, +-- such as `histogram_quantile` (HistogramFold). +CREATE TABLE tsid_no_aggr_physical ( + ts TIMESTAMP(3) TIME INDEX, + val DOUBLE, +) ENGINE = metric WITH ("physical_metric_table" = ""); + +Affected Rows: 0 + +CREATE TABLE tsid_no_aggr_metric ( + job STRING NULL, + instance STRING NULL, + ts TIMESTAMP(3) NOT NULL, + val DOUBLE NULL, + TIME INDEX (ts), + PRIMARY KEY(job, instance), +) +ENGINE = metric +WITH( + on_physical_table = 'tsid_no_aggr_physical' +); + +Affected Rows: 0 + +CREATE TABLE tsid_no_aggr_histogram_bucket ( + job STRING NULL, + le STRING NULL, + ts TIMESTAMP(3) NOT NULL, + val DOUBLE NULL, + TIME INDEX (ts), + PRIMARY KEY(job, le), +) +ENGINE = metric +WITH( + on_physical_table = 'tsid_no_aggr_physical' +); + +Affected Rows: 0 + +INSERT INTO tsid_no_aggr_metric VALUES + ('job1', 'instance1', 0, 1), + ('job1', 'instance1', 5000, 3), + ('job1', 'instance1', 10000, 5); + +Affected Rows: 3 + +INSERT INTO tsid_no_aggr_histogram_bucket VALUES + ('job1', '1', 0, 1), + ('job1', '2', 0, 2), + ('job1', '+Inf', 0, 3), + ('job1', '1', 5000, 2), + ('job1', '2', 5000, 4), + ('job1', '+Inf', 5000, 6), + ('job1', '1', 10000, 3), + ('job1', '2', 10000, 6), + ('job1', '+Inf', 10000, 9); + +Affected Rows: 9 + +-- Selector (no series merge) +-- SQLNESS SORT_RESULT 3 1 +TQL EVAL (0, 10, '5s') tsid_no_aggr_metric; + ++-----+----------+-----------+---------------------+ +| val | instance | job | ts | ++-----+----------+-----------+---------------------+ +| 1.0 | job1 | instance1 | 1970-01-01T00:00:00 | +| 3.0 | job1 | instance1 | 1970-01-01T00:00:05 | +| 5.0 | job1 | instance1 | 1970-01-01T00:00:10 | ++-----+----------+-----------+---------------------+ + +-- Scalar function (no series merge) +-- SQLNESS SORT_RESULT 3 1 +TQL EVAL (0, 10, '5s') abs(tsid_no_aggr_metric); + ++---------------------+----------+----------+-----------+ +| ts | abs(val) | instance | job | ++---------------------+----------+----------+-----------+ +| 1970-01-01T00:00:00 | 1.0 | job1 | instance1 | +| 1970-01-01T00:00:05 | 3.0 | job1 | instance1 | +| 1970-01-01T00:00:10 | 5.0 | job1 | instance1 | ++---------------------+----------+----------+-----------+ + +-- Range function (no series merge) +-- SQLNESS SORT_RESULT 3 1 +TQL EVAL (0, 10, '5s') avg_over_time(tsid_no_aggr_metric[5s]); + ++---------------------+----------------------------------+----------+-----------+ +| ts | prom_avg_over_time(ts_range,val) | instance | job | ++---------------------+----------------------------------+----------+-----------+ +| 1970-01-01T00:00:00 | 1.0 | job1 | instance1 | +| 1970-01-01T00:00:05 | 2.0 | job1 | instance1 | +| 1970-01-01T00:00:10 | 4.0 | job1 | instance1 | ++---------------------+----------------------------------+----------+-----------+ + +-- Histogram quantile must fold buckets across `le` even when `__tsid` exists. +-- The physical plan must not treat `__tsid` as a label column for HistogramFold. +-- SQLNESS REPLACE (metrics.*) REDACTED +-- SQLNESS REPLACE (RoundRobinBatch.*) REDACTED +-- SQLNESS REPLACE (-+) - +-- SQLNESS REPLACE (\s\s+) _ +-- SQLNESS REPLACE (peers.*) REDACTED +-- SQLNESS REPLACE region=\d+\(\d+,\s+\d+\) region=REDACTED +-- SQLNESS REPLACE (Hash.*) REDACTED +TQL ANALYZE (0, 10, '5s') histogram_quantile(0.5, tsid_no_aggr_histogram_bucket); + ++-+-+-+ +| stage | node | plan_| ++-+-+-+ +| 0_| 0_|_HistogramFoldExec: le=@2, field=@0, quantile=0.5 REDACTED +|_|_|_SortExec: expr=[job@1 ASC NULLS LAST, ts@3 ASC NULLS LAST, CAST(le@2 AS Float64) ASC NULLS LAST], preserve_partitioning=[true] REDACTED +|_|_|_CoalesceBatchesExec: target_batch_size=8192 REDACTED +|_|_|_RepartitionExec: partitioning=REDACTED +|_|_|_CooperativeExec REDACTED +|_|_|_MergeScanExec: REDACTED +|_|_|_| +| 1_| 0_|_ProjectionExec: expr=[val@0 as val, job@1 as job, le@2 as le, ts@4 as ts] REDACTED +|_|_|_PromInstantManipulateExec: range=[0..10000], lookback=[300000], interval=[5000], time index=[ts] REDACTED +|_|_|_PromSeriesDivideExec: tags=["__tsid"] REDACTED +|_|_|_SortExec: expr=[__tsid@3 ASC, ts@4 ASC], preserve_partitioning=[true] REDACTED +|_|_|_CoalesceBatchesExec: target_batch_size=8192 REDACTED +|_|_|_RepartitionExec: partitioning=REDACTED +|_|_|_ProjectionExec: expr=[val@1 as val, job@3 as job, le@4 as le, __tsid@2 as __tsid, ts@0 as ts] REDACTED +|_|_|_CooperativeExec REDACTED +|_|_|_SeqScan: region=REDACTED, "partition_count":{"count":1, "mem_ranges":1, "files":0, "file_ranges":0} REDACTED +|_|_|_| +|_|_| Total rows: 3_| ++-+-+-+ + +-- SQLNESS SORT_RESULT 3 1 +TQL EVAL (0, 10, '5s') histogram_quantile(0.5, tsid_no_aggr_histogram_bucket); + ++-----+------+---------------------+ +| val | job | ts | ++-----+------+---------------------+ +| 1.5 | job1 | 1970-01-01T00:00:00 | +| 1.5 | job1 | 1970-01-01T00:00:05 | +| 1.5 | job1 | 1970-01-01T00:00:10 | ++-----+------+---------------------+ + +DROP TABLE tsid_no_aggr_histogram_bucket; + +Affected Rows: 0 + +DROP TABLE tsid_no_aggr_metric; + +Affected Rows: 0 + +DROP TABLE tsid_no_aggr_physical; + +Affected Rows: 0 + diff --git a/tests/cases/standalone/common/promql/tsid_histogram_quantile_regression.sql b/tests/cases/standalone/common/promql/tsid_histogram_quantile_regression.sql new file mode 100644 index 0000000000..2c450d3bc2 --- /dev/null +++ b/tests/cases/standalone/common/promql/tsid_histogram_quantile_regression.sql @@ -0,0 +1,81 @@ +-- Regression test for `__tsid` optimization on non-aggregating PromQL queries. +-- The `__tsid` column must not affect operators that infer label columns from the input schema, +-- such as `histogram_quantile` (HistogramFold). + +CREATE TABLE tsid_no_aggr_physical ( + ts TIMESTAMP(3) TIME INDEX, + val DOUBLE, +) ENGINE = metric WITH ("physical_metric_table" = ""); + +CREATE TABLE tsid_no_aggr_metric ( + job STRING NULL, + instance STRING NULL, + ts TIMESTAMP(3) NOT NULL, + val DOUBLE NULL, + TIME INDEX (ts), + PRIMARY KEY(job, instance), +) +ENGINE = metric +WITH( + on_physical_table = 'tsid_no_aggr_physical' +); + +CREATE TABLE tsid_no_aggr_histogram_bucket ( + job STRING NULL, + le STRING NULL, + ts TIMESTAMP(3) NOT NULL, + val DOUBLE NULL, + TIME INDEX (ts), + PRIMARY KEY(job, le), +) +ENGINE = metric +WITH( + on_physical_table = 'tsid_no_aggr_physical' +); + +INSERT INTO tsid_no_aggr_metric VALUES + ('job1', 'instance1', 0, 1), + ('job1', 'instance1', 5000, 3), + ('job1', 'instance1', 10000, 5); + +INSERT INTO tsid_no_aggr_histogram_bucket VALUES + ('job1', '1', 0, 1), + ('job1', '2', 0, 2), + ('job1', '+Inf', 0, 3), + ('job1', '1', 5000, 2), + ('job1', '2', 5000, 4), + ('job1', '+Inf', 5000, 6), + ('job1', '1', 10000, 3), + ('job1', '2', 10000, 6), + ('job1', '+Inf', 10000, 9); + +-- Selector (no series merge) +-- SQLNESS SORT_RESULT 3 1 +TQL EVAL (0, 10, '5s') tsid_no_aggr_metric; + +-- Scalar function (no series merge) +-- SQLNESS SORT_RESULT 3 1 +TQL EVAL (0, 10, '5s') abs(tsid_no_aggr_metric); + +-- Range function (no series merge) +-- SQLNESS SORT_RESULT 3 1 +TQL EVAL (0, 10, '5s') avg_over_time(tsid_no_aggr_metric[5s]); + +-- Histogram quantile must fold buckets across `le` even when `__tsid` exists. +-- The physical plan must not treat `__tsid` as a label column for HistogramFold. +-- SQLNESS REPLACE (metrics.*) REDACTED +-- SQLNESS REPLACE (RoundRobinBatch.*) REDACTED +-- SQLNESS REPLACE (-+) - +-- SQLNESS REPLACE (\s\s+) _ +-- SQLNESS REPLACE (peers.*) REDACTED +-- SQLNESS REPLACE region=\d+\(\d+,\s+\d+\) region=REDACTED +-- SQLNESS REPLACE (Hash.*) REDACTED +TQL ANALYZE (0, 10, '5s') histogram_quantile(0.5, tsid_no_aggr_histogram_bucket); + +-- SQLNESS SORT_RESULT 3 1 +TQL EVAL (0, 10, '5s') histogram_quantile(0.5, tsid_no_aggr_histogram_bucket); + +DROP TABLE tsid_no_aggr_histogram_bucket; +DROP TABLE tsid_no_aggr_metric; +DROP TABLE tsid_no_aggr_physical; +