mirror of
https://github.com/GreptimeTeam/greptimedb.git
synced 2026-05-26 01:40:36 +00:00
fix: correct histogram result (#7608)
correct result by removing tsid Signed-off-by: Ruihang Xia <waynestxia@gmail.com>
This commit is contained in:
@@ -2949,6 +2949,11 @@ impl PromPlanner {
|
||||
|
||||
let input = args.args[1].as_ref().clone();
|
||||
let input_plan = self.prom_expr_to_plan(&input, query_engine_state).await?;
|
||||
// `histogram_quantile` folds buckets across `le`, so `__tsid` (which includes `le`) is not
|
||||
// a stable series identifier anymore. Also, HistogramFold infers label columns from the
|
||||
// input schema and must not treat `__tsid` as a label column.
|
||||
let input_plan = self.strip_tsid_column(input_plan)?;
|
||||
self.ctx.use_tsid = false;
|
||||
|
||||
if !self.ctx.has_le_tag() {
|
||||
// Return empty result instead of error when 'le' column is not found
|
||||
|
||||
@@ -0,0 +1,153 @@
|
||||
-- Regression test for `__tsid` optimization on non-aggregating PromQL queries.
|
||||
-- The `__tsid` column must not affect operators that infer label columns from the input schema,
|
||||
-- such as `histogram_quantile` (HistogramFold).
|
||||
CREATE TABLE tsid_no_aggr_physical (
|
||||
ts TIMESTAMP(3) TIME INDEX,
|
||||
val DOUBLE,
|
||||
) ENGINE = metric WITH ("physical_metric_table" = "");
|
||||
|
||||
Affected Rows: 0
|
||||
|
||||
CREATE TABLE tsid_no_aggr_metric (
|
||||
job STRING NULL,
|
||||
instance STRING NULL,
|
||||
ts TIMESTAMP(3) NOT NULL,
|
||||
val DOUBLE NULL,
|
||||
TIME INDEX (ts),
|
||||
PRIMARY KEY(job, instance),
|
||||
)
|
||||
ENGINE = metric
|
||||
WITH(
|
||||
on_physical_table = 'tsid_no_aggr_physical'
|
||||
);
|
||||
|
||||
Affected Rows: 0
|
||||
|
||||
CREATE TABLE tsid_no_aggr_histogram_bucket (
|
||||
job STRING NULL,
|
||||
le STRING NULL,
|
||||
ts TIMESTAMP(3) NOT NULL,
|
||||
val DOUBLE NULL,
|
||||
TIME INDEX (ts),
|
||||
PRIMARY KEY(job, le),
|
||||
)
|
||||
ENGINE = metric
|
||||
WITH(
|
||||
on_physical_table = 'tsid_no_aggr_physical'
|
||||
);
|
||||
|
||||
Affected Rows: 0
|
||||
|
||||
INSERT INTO tsid_no_aggr_metric VALUES
|
||||
('job1', 'instance1', 0, 1),
|
||||
('job1', 'instance1', 5000, 3),
|
||||
('job1', 'instance1', 10000, 5);
|
||||
|
||||
Affected Rows: 3
|
||||
|
||||
INSERT INTO tsid_no_aggr_histogram_bucket VALUES
|
||||
('job1', '1', 0, 1),
|
||||
('job1', '2', 0, 2),
|
||||
('job1', '+Inf', 0, 3),
|
||||
('job1', '1', 5000, 2),
|
||||
('job1', '2', 5000, 4),
|
||||
('job1', '+Inf', 5000, 6),
|
||||
('job1', '1', 10000, 3),
|
||||
('job1', '2', 10000, 6),
|
||||
('job1', '+Inf', 10000, 9);
|
||||
|
||||
Affected Rows: 9
|
||||
|
||||
-- Selector (no series merge)
|
||||
-- SQLNESS SORT_RESULT 3 1
|
||||
TQL EVAL (0, 10, '5s') tsid_no_aggr_metric;
|
||||
|
||||
+-----+----------+-----------+---------------------+
|
||||
| val | instance | job | ts |
|
||||
+-----+----------+-----------+---------------------+
|
||||
| 1.0 | job1 | instance1 | 1970-01-01T00:00:00 |
|
||||
| 3.0 | job1 | instance1 | 1970-01-01T00:00:05 |
|
||||
| 5.0 | job1 | instance1 | 1970-01-01T00:00:10 |
|
||||
+-----+----------+-----------+---------------------+
|
||||
|
||||
-- Scalar function (no series merge)
|
||||
-- SQLNESS SORT_RESULT 3 1
|
||||
TQL EVAL (0, 10, '5s') abs(tsid_no_aggr_metric);
|
||||
|
||||
+---------------------+----------+----------+-----------+
|
||||
| ts | abs(val) | instance | job |
|
||||
+---------------------+----------+----------+-----------+
|
||||
| 1970-01-01T00:00:00 | 1.0 | job1 | instance1 |
|
||||
| 1970-01-01T00:00:05 | 3.0 | job1 | instance1 |
|
||||
| 1970-01-01T00:00:10 | 5.0 | job1 | instance1 |
|
||||
+---------------------+----------+----------+-----------+
|
||||
|
||||
-- Range function (no series merge)
|
||||
-- SQLNESS SORT_RESULT 3 1
|
||||
TQL EVAL (0, 10, '5s') avg_over_time(tsid_no_aggr_metric[5s]);
|
||||
|
||||
+---------------------+----------------------------------+----------+-----------+
|
||||
| ts | prom_avg_over_time(ts_range,val) | instance | job |
|
||||
+---------------------+----------------------------------+----------+-----------+
|
||||
| 1970-01-01T00:00:00 | 1.0 | job1 | instance1 |
|
||||
| 1970-01-01T00:00:05 | 2.0 | job1 | instance1 |
|
||||
| 1970-01-01T00:00:10 | 4.0 | job1 | instance1 |
|
||||
+---------------------+----------------------------------+----------+-----------+
|
||||
|
||||
-- Histogram quantile must fold buckets across `le` even when `__tsid` exists.
|
||||
-- The physical plan must not treat `__tsid` as a label column for HistogramFold.
|
||||
-- SQLNESS REPLACE (metrics.*) REDACTED
|
||||
-- SQLNESS REPLACE (RoundRobinBatch.*) REDACTED
|
||||
-- SQLNESS REPLACE (-+) -
|
||||
-- SQLNESS REPLACE (\s\s+) _
|
||||
-- SQLNESS REPLACE (peers.*) REDACTED
|
||||
-- SQLNESS REPLACE region=\d+\(\d+,\s+\d+\) region=REDACTED
|
||||
-- SQLNESS REPLACE (Hash.*) REDACTED
|
||||
TQL ANALYZE (0, 10, '5s') histogram_quantile(0.5, tsid_no_aggr_histogram_bucket);
|
||||
|
||||
+-+-+-+
|
||||
| stage | node | plan_|
|
||||
+-+-+-+
|
||||
| 0_| 0_|_HistogramFoldExec: le=@2, field=@0, quantile=0.5 REDACTED
|
||||
|_|_|_SortExec: expr=[job@1 ASC NULLS LAST, ts@3 ASC NULLS LAST, CAST(le@2 AS Float64) ASC NULLS LAST], preserve_partitioning=[true] REDACTED
|
||||
|_|_|_CoalesceBatchesExec: target_batch_size=8192 REDACTED
|
||||
|_|_|_RepartitionExec: partitioning=REDACTED
|
||||
|_|_|_CooperativeExec REDACTED
|
||||
|_|_|_MergeScanExec: REDACTED
|
||||
|_|_|_|
|
||||
| 1_| 0_|_ProjectionExec: expr=[val@0 as val, job@1 as job, le@2 as le, ts@4 as ts] REDACTED
|
||||
|_|_|_PromInstantManipulateExec: range=[0..10000], lookback=[300000], interval=[5000], time index=[ts] REDACTED
|
||||
|_|_|_PromSeriesDivideExec: tags=["__tsid"] REDACTED
|
||||
|_|_|_SortExec: expr=[__tsid@3 ASC, ts@4 ASC], preserve_partitioning=[true] REDACTED
|
||||
|_|_|_CoalesceBatchesExec: target_batch_size=8192 REDACTED
|
||||
|_|_|_RepartitionExec: partitioning=REDACTED
|
||||
|_|_|_ProjectionExec: expr=[val@1 as val, job@3 as job, le@4 as le, __tsid@2 as __tsid, ts@0 as ts] REDACTED
|
||||
|_|_|_CooperativeExec REDACTED
|
||||
|_|_|_SeqScan: region=REDACTED, "partition_count":{"count":1, "mem_ranges":1, "files":0, "file_ranges":0} REDACTED
|
||||
|_|_|_|
|
||||
|_|_| Total rows: 3_|
|
||||
+-+-+-+
|
||||
|
||||
-- SQLNESS SORT_RESULT 3 1
|
||||
TQL EVAL (0, 10, '5s') histogram_quantile(0.5, tsid_no_aggr_histogram_bucket);
|
||||
|
||||
+-----+------+---------------------+
|
||||
| val | job | ts |
|
||||
+-----+------+---------------------+
|
||||
| 1.5 | job1 | 1970-01-01T00:00:00 |
|
||||
| 1.5 | job1 | 1970-01-01T00:00:05 |
|
||||
| 1.5 | job1 | 1970-01-01T00:00:10 |
|
||||
+-----+------+---------------------+
|
||||
|
||||
DROP TABLE tsid_no_aggr_histogram_bucket;
|
||||
|
||||
Affected Rows: 0
|
||||
|
||||
DROP TABLE tsid_no_aggr_metric;
|
||||
|
||||
Affected Rows: 0
|
||||
|
||||
DROP TABLE tsid_no_aggr_physical;
|
||||
|
||||
Affected Rows: 0
|
||||
|
||||
@@ -0,0 +1,81 @@
|
||||
-- Regression test for `__tsid` optimization on non-aggregating PromQL queries.
|
||||
-- The `__tsid` column must not affect operators that infer label columns from the input schema,
|
||||
-- such as `histogram_quantile` (HistogramFold).
|
||||
|
||||
CREATE TABLE tsid_no_aggr_physical (
|
||||
ts TIMESTAMP(3) TIME INDEX,
|
||||
val DOUBLE,
|
||||
) ENGINE = metric WITH ("physical_metric_table" = "");
|
||||
|
||||
CREATE TABLE tsid_no_aggr_metric (
|
||||
job STRING NULL,
|
||||
instance STRING NULL,
|
||||
ts TIMESTAMP(3) NOT NULL,
|
||||
val DOUBLE NULL,
|
||||
TIME INDEX (ts),
|
||||
PRIMARY KEY(job, instance),
|
||||
)
|
||||
ENGINE = metric
|
||||
WITH(
|
||||
on_physical_table = 'tsid_no_aggr_physical'
|
||||
);
|
||||
|
||||
CREATE TABLE tsid_no_aggr_histogram_bucket (
|
||||
job STRING NULL,
|
||||
le STRING NULL,
|
||||
ts TIMESTAMP(3) NOT NULL,
|
||||
val DOUBLE NULL,
|
||||
TIME INDEX (ts),
|
||||
PRIMARY KEY(job, le),
|
||||
)
|
||||
ENGINE = metric
|
||||
WITH(
|
||||
on_physical_table = 'tsid_no_aggr_physical'
|
||||
);
|
||||
|
||||
INSERT INTO tsid_no_aggr_metric VALUES
|
||||
('job1', 'instance1', 0, 1),
|
||||
('job1', 'instance1', 5000, 3),
|
||||
('job1', 'instance1', 10000, 5);
|
||||
|
||||
INSERT INTO tsid_no_aggr_histogram_bucket VALUES
|
||||
('job1', '1', 0, 1),
|
||||
('job1', '2', 0, 2),
|
||||
('job1', '+Inf', 0, 3),
|
||||
('job1', '1', 5000, 2),
|
||||
('job1', '2', 5000, 4),
|
||||
('job1', '+Inf', 5000, 6),
|
||||
('job1', '1', 10000, 3),
|
||||
('job1', '2', 10000, 6),
|
||||
('job1', '+Inf', 10000, 9);
|
||||
|
||||
-- Selector (no series merge)
|
||||
-- SQLNESS SORT_RESULT 3 1
|
||||
TQL EVAL (0, 10, '5s') tsid_no_aggr_metric;
|
||||
|
||||
-- Scalar function (no series merge)
|
||||
-- SQLNESS SORT_RESULT 3 1
|
||||
TQL EVAL (0, 10, '5s') abs(tsid_no_aggr_metric);
|
||||
|
||||
-- Range function (no series merge)
|
||||
-- SQLNESS SORT_RESULT 3 1
|
||||
TQL EVAL (0, 10, '5s') avg_over_time(tsid_no_aggr_metric[5s]);
|
||||
|
||||
-- Histogram quantile must fold buckets across `le` even when `__tsid` exists.
|
||||
-- The physical plan must not treat `__tsid` as a label column for HistogramFold.
|
||||
-- SQLNESS REPLACE (metrics.*) REDACTED
|
||||
-- SQLNESS REPLACE (RoundRobinBatch.*) REDACTED
|
||||
-- SQLNESS REPLACE (-+) -
|
||||
-- SQLNESS REPLACE (\s\s+) _
|
||||
-- SQLNESS REPLACE (peers.*) REDACTED
|
||||
-- SQLNESS REPLACE region=\d+\(\d+,\s+\d+\) region=REDACTED
|
||||
-- SQLNESS REPLACE (Hash.*) REDACTED
|
||||
TQL ANALYZE (0, 10, '5s') histogram_quantile(0.5, tsid_no_aggr_histogram_bucket);
|
||||
|
||||
-- SQLNESS SORT_RESULT 3 1
|
||||
TQL EVAL (0, 10, '5s') histogram_quantile(0.5, tsid_no_aggr_histogram_bucket);
|
||||
|
||||
DROP TABLE tsid_no_aggr_histogram_bucket;
|
||||
DROP TABLE tsid_no_aggr_metric;
|
||||
DROP TABLE tsid_no_aggr_physical;
|
||||
|
||||
Reference in New Issue
Block a user