fix: qualify HistogramFold schema (#8157)

* test: add regression test for binary op on histogram_quantile (#8144)

Signed-off-by: evenyag <realevenyag@gmail.com>

* fix: preserve column qualifiers in HistogramFold output schema (#8144)

Signed-off-by: evenyag <realevenyag@gmail.com>

---------

Signed-off-by: evenyag <realevenyag@gmail.com>
This commit is contained in:
Yingwen
2026-05-25 15:40:48 +08:00
committed by GitHub
parent 8f7951c5bd
commit a25152664b
4 changed files with 187 additions and 9 deletions

View File

@@ -322,16 +322,18 @@ impl HistogramFold {
/// Transform the schema
///
/// - `le` will be removed
///
/// Column qualifiers are preserved so downstream plan nodes can keep
/// referencing the columns by their original qualified names.
fn convert_schema(
input_schema: &DFSchemaRef,
le_column: &str,
) -> DataFusionResult<DFSchemaRef> {
let fields = input_schema.fields();
// safety: those fields are checked in `check_schema()`
let mut new_fields = Vec::with_capacity(fields.len() - 1);
for f in fields {
if f.name() != le_column {
new_fields.push((None, f.clone()));
let mut new_fields = Vec::with_capacity(input_schema.fields().len() - 1);
for (qualifier, field) in input_schema.iter() {
if field.name() != le_column {
new_fields.push((qualifier.cloned(), field.clone()));
}
}
Ok(Arc::new(DFSchema::new_with_metadata(

View File

@@ -4023,12 +4023,15 @@ impl PromPlanner {
return Ok(plan);
}
// Preserve column qualifiers so downstream plan nodes can keep referencing
// the columns by their original qualified names.
let project_exprs = schema
.fields()
.iter()
.filter(|field| field.name() != DATA_SCHEMA_TSID_COLUMN_NAME)
.map(|field| Ok(DfExpr::Column(Column::from_name(field.name().clone()))))
.collect::<Result<Vec<_>>>()?;
.filter(|(_, field)| field.name() != DATA_SCHEMA_TSID_COLUMN_NAME)
.map(|(qualifier, field)| {
DfExpr::Column(Column::new(qualifier.cloned(), field.name().clone()))
})
.collect::<Vec<_>>();
LogicalPlanBuilder::from(plan)
.project(project_exprs)
@@ -6005,6 +6008,39 @@ mod test {
.unwrap();
}
#[tokio::test]
async fn test_histogram_quantile_binary_op() {
let mut eval_stmt = EvalStmt {
expr: PromExpr::NumberLiteral(NumberLiteral { val: 1.0 }),
start: UNIX_EPOCH,
end: UNIX_EPOCH
.checked_add(Duration::from_secs(100_000))
.unwrap(),
interval: Duration::from_secs(5),
lookback_delta: Duration::from_secs(1),
};
// Arithmetic applied to a histogram_quantile() result. Regression for #8144:
// HistogramFold used to drop the input column qualifiers, so the binary-op
// projection failed to resolve the qualified tag column.
let case = r#"histogram_quantile(0.5, sum by (le, pod) (rate(http_request_duration_seconds_bucket[5m]))) + 0"#;
let prom_expr = parser::parse(case).unwrap();
eval_stmt.expr = prom_expr;
let table_provider = build_test_table_provider_with_fields(
&[(
DEFAULT_SCHEMA_NAME.to_string(),
"http_request_duration_seconds_bucket".to_string(),
)],
&["pod", "le"],
)
.await;
// Should plan without a "No field named ..." error.
let _ = PromPlanner::stmt_to_plan(table_provider, &eval_stmt, &build_query_engine_state())
.await
.unwrap();
}
#[tokio::test]
async fn test_parse_and_operator() {
let mut eval_stmt = EvalStmt {

View File

@@ -0,0 +1,90 @@
-- Reproduce https://github.com/GreptimeTeam/greptimedb/issues/8144
-- Binary comparison/arithmetic applied to a histogram_quantile() result.
create table http_request_duration_seconds_bucket (
ts timestamp time index,
le string,
pod string,
val double,
primary key (pod, le),
);
Affected Rows: 0
insert into http_request_duration_seconds_bucket values
(2900000, "0.01", "pod-a", 10),
(2900000, "0.05", "pod-a", 20),
(2900000, "0.1", "pod-a", 30),
(2900000, "+Inf", "pod-a", 40),
(3000000, "0.01", "pod-a", 20),
(3000000, "0.05", "pod-a", 50),
(3000000, "0.1", "pod-a", 80),
(3000000, "+Inf", "pod-a", 100),
(2900000, "0.01", "pod-b", 5),
(2900000, "0.05", "pod-b", 8),
(2900000, "0.1", "pod-b", 12),
(2900000, "+Inf", "pod-b", 15),
(3000000, "0.01", "pod-b", 10),
(3000000, "0.05", "pod-b", 25),
(3000000, "0.1", "pod-b", 45),
(3000000, "+Inf", "pod-b", 60);
Affected Rows: 16
-- histogram_quantile alone
-- SQLNESS SORT_RESULT 3 1
tql eval (3000, 3000, '1s') histogram_quantile(0.5, sum by (le, pod) (rate(http_request_duration_seconds_bucket[5m])));
+-------+---------------------+-----------------------------------------------+
| pod | ts | sum(prom_rate(ts_range,val,ts,Int64(300000))) |
+-------+---------------------+-----------------------------------------------+
| pod-a | 1970-01-01T00:50:00 | 0.05 |
| pod-b | 1970-01-01T00:50:00 | 0.062499999999999986 |
+-------+---------------------+-----------------------------------------------+
-- comparison filter
-- SQLNESS SORT_RESULT 3 1
tql eval (3000, 3000, '1s') histogram_quantile(0.5, sum by (le, pod) (rate(http_request_duration_seconds_bucket[5m]))) >= 0.02;
+-------+---------------------+-----------------------------------------------+
| pod | ts | sum(prom_rate(ts_range,val,ts,Int64(300000))) |
+-------+---------------------+-----------------------------------------------+
| pod-a | 1970-01-01T00:50:00 | 0.05 |
| pod-b | 1970-01-01T00:50:00 | 0.062499999999999986 |
+-------+---------------------+-----------------------------------------------+
-- arithmetic
-- SQLNESS SORT_RESULT 3 1
tql eval (3000, 3000, '1s') histogram_quantile(0.5, sum by (le, pod) (rate(http_request_duration_seconds_bucket[5m]))) + 0;
+-------+---------------------+------------------------------------------------------------+
| pod | ts | sum(prom_rate(ts_range,val,ts,Int64(300000))) + Float64(0) |
+-------+---------------------+------------------------------------------------------------+
| pod-a | 1970-01-01T00:50:00 | 0.05 |
| pod-b | 1970-01-01T00:50:00 | 0.062499999999999986 |
+-------+---------------------+------------------------------------------------------------+
-- bool modifier
-- SQLNESS SORT_RESULT 3 1
tql eval (3000, 3000, '1s') histogram_quantile(0.5, sum by (le, pod) (rate(http_request_duration_seconds_bucket[5m]))) >= bool 0.02;
+-------+---------------------+----------------------------------------------------------------+
| pod | ts | sum(prom_rate(ts_range,val,ts,Int64(300000))) >= Float64(0.02) |
+-------+---------------------+----------------------------------------------------------------+
| pod-a | 1970-01-01T00:50:00 | 1.0 |
| pod-b | 1970-01-01T00:50:00 | 1.0 |
+-------+---------------------+----------------------------------------------------------------+
-- subquery
-- SQLNESS SORT_RESULT 3 1
tql eval (3000, 3000, '1s') count_over_time((histogram_quantile(0.5, sum by (le, pod) (rate(http_request_duration_seconds_bucket[5m]))) >= 0.02)[10m:1m]);
+---------------------+------------------------------------------------------------------------------+-------+
| ts | prom_count_over_time(ts_range,sum(prom_rate(ts_range,val,ts,Int64(300000)))) | pod |
+---------------------+------------------------------------------------------------------------------+-------+
| 1970-01-01T00:50:00 | 2.0 | pod-a |
+---------------------+------------------------------------------------------------------------------+-------+
drop table http_request_duration_seconds_bucket;
Affected Rows: 0

View File

@@ -0,0 +1,50 @@
-- Reproduce https://github.com/GreptimeTeam/greptimedb/issues/8144
-- Binary comparison/arithmetic applied to a histogram_quantile() result.
create table http_request_duration_seconds_bucket (
ts timestamp time index,
le string,
pod string,
val double,
primary key (pod, le),
);
insert into http_request_duration_seconds_bucket values
(2900000, "0.01", "pod-a", 10),
(2900000, "0.05", "pod-a", 20),
(2900000, "0.1", "pod-a", 30),
(2900000, "+Inf", "pod-a", 40),
(3000000, "0.01", "pod-a", 20),
(3000000, "0.05", "pod-a", 50),
(3000000, "0.1", "pod-a", 80),
(3000000, "+Inf", "pod-a", 100),
(2900000, "0.01", "pod-b", 5),
(2900000, "0.05", "pod-b", 8),
(2900000, "0.1", "pod-b", 12),
(2900000, "+Inf", "pod-b", 15),
(3000000, "0.01", "pod-b", 10),
(3000000, "0.05", "pod-b", 25),
(3000000, "0.1", "pod-b", 45),
(3000000, "+Inf", "pod-b", 60);
-- histogram_quantile alone
-- SQLNESS SORT_RESULT 3 1
tql eval (3000, 3000, '1s') histogram_quantile(0.5, sum by (le, pod) (rate(http_request_duration_seconds_bucket[5m])));
-- comparison filter
-- SQLNESS SORT_RESULT 3 1
tql eval (3000, 3000, '1s') histogram_quantile(0.5, sum by (le, pod) (rate(http_request_duration_seconds_bucket[5m]))) >= 0.02;
-- arithmetic
-- SQLNESS SORT_RESULT 3 1
tql eval (3000, 3000, '1s') histogram_quantile(0.5, sum by (le, pod) (rate(http_request_duration_seconds_bucket[5m]))) + 0;
-- bool modifier
-- SQLNESS SORT_RESULT 3 1
tql eval (3000, 3000, '1s') histogram_quantile(0.5, sum by (le, pod) (rate(http_request_duration_seconds_bucket[5m]))) >= bool 0.02;
-- subquery
-- SQLNESS SORT_RESULT 3 1
tql eval (3000, 3000, '1s') count_over_time((histogram_quantile(0.5, sum by (le, pod) (rate(http_request_duration_seconds_bucket[5m]))) >= 0.02)[10m:1m]);
drop table http_request_duration_seconds_bucket;