fix: sort condition in HistogramFold (#2674)

* sort ts before le

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* add test case

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

---------

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>
This commit is contained in:
Ruihang Xia
2023-11-01 16:28:53 +08:00
committed by GitHub
parent 5f3bbdca4f
commit a9137b77f0
3 changed files with 78 additions and 9 deletions

View File

@@ -52,7 +52,7 @@ use futures::{ready, Stream, StreamExt};
/// Due to the folding or sampling, the output rows number will become `input_rows` / `bucket_num`.
///
/// # Requirement
/// - Input should be sorted on `<tag list>, le ASC, ts`.
/// - Input should be sorted on `<tag list>, ts, le ASC`.
/// - The value set of `le` should be same. I.e., buckets of every series should be same.
///
/// [1]: https://prometheus.io/docs/concepts/metric_types/#histogram
@@ -248,6 +248,14 @@ impl ExecutionPlan for HistogramFoldExec {
options: None,
})
.collect::<Vec<PhysicalSortRequirement>>();
// add ts
cols.push(PhysicalSortRequirement {
expr: Arc::new(PhyColumn::new(
self.input.schema().field(self.ts_column_index).name(),
self.ts_column_index,
)),
options: None,
});
// add le ASC
cols.push(PhysicalSortRequirement {
expr: Arc::new(PhyCast::new(
@@ -263,14 +271,6 @@ impl ExecutionPlan for HistogramFoldExec {
nulls_first: false, // not nullable
}),
});
// add ts
cols.push(PhysicalSortRequirement {
expr: Arc::new(PhyColumn::new(
self.input.schema().field(self.ts_column_index).name(),
self.ts_column_index,
)),
options: None,
});
vec![Some(cols)]
}

View File

@@ -244,3 +244,44 @@ drop table histogram2_bucket;
Affected Rows: 0
-- not from Prometheus
-- makesure the sort expr works as expected
create table histogram3_bucket (
ts timestamp time index,
le string,
s string,
val double,
primary key (s, le),
);
Affected Rows: 0
insert into histogram3_bucket values
(2900000, "0.1", "a", 0),
(2900000, "1", "a", 0),
(2900000, "5", "a", 0),
(2900000, "+Inf", "a", 0),
(3000000, "0.1", "a", 50),
(3000000, "1", "a", 70),
(3000000, "5", "a", 110),
(3000000, "+Inf", "a", 120),
(3005000, "0.1", "a", 10),
(3005000, "1", "a", 20),
(3005000, "5", "a", 20),
(3005000, "+Inf", "a", 30);
Affected Rows: 12
tql eval (3000, 3005, '3s') histogram_quantile(0.5, sum by(le, s) (rate(histogram3_bucket[5m])));
+---+---------------------+---------------------------------+
| s | ts | SUM(prom_rate(ts_range,val,ts)) |
+---+---------------------+---------------------------------+
| a | 1970-01-01T00:50:00 | 0.55 |
| a | 1970-01-01T00:50:03 | 0.5500000000000002 |
+---+---------------------+---------------------------------+
drop table histogram3_bucket;
Affected Rows: 0

View File

@@ -132,3 +132,31 @@ tql eval (2820, 2820, '1s') histogram_quantile(0.5, rate(histogram2_bucket[15m])
tql eval (2820, 2820, '1s') histogram_quantile(0.833, rate(histogram2_bucket[15m]));
drop table histogram2_bucket;
-- not from Prometheus
-- makesure the sort expr works as expected
create table histogram3_bucket (
ts timestamp time index,
le string,
s string,
val double,
primary key (s, le),
);
insert into histogram3_bucket values
(2900000, "0.1", "a", 0),
(2900000, "1", "a", 0),
(2900000, "5", "a", 0),
(2900000, "+Inf", "a", 0),
(3000000, "0.1", "a", 50),
(3000000, "1", "a", 70),
(3000000, "5", "a", 110),
(3000000, "+Inf", "a", 120),
(3005000, "0.1", "a", 10),
(3005000, "1", "a", 20),
(3005000, "5", "a", 20),
(3005000, "+Inf", "a", 30);
tql eval (3000, 3005, '3s') histogram_quantile(0.5, sum by(le, s) (rate(histogram3_bucket[5m])));
drop table histogram3_bucket;