mirror of
https://github.com/GreptimeTeam/greptimedb.git
synced 2026-05-24 17:00:37 +00:00
fix: drop rhs columns on promql filter join (#7665)
* fix: drop rhs columns on promql filter join Signed-off-by: Ruihang Xia <waynestxia@gmail.com> * choose instant vector to operate Signed-off-by: Ruihang Xia <waynestxia@gmail.com> --------- Signed-off-by: Ruihang Xia <waynestxia@gmail.com>
This commit is contained in:
@@ -66,6 +66,7 @@ use promql::functions::{
|
||||
};
|
||||
use promql_parser::label::{METRIC_NAME, MatchOp, Matcher, Matchers};
|
||||
use promql_parser::parser::token::TokenType;
|
||||
use promql_parser::parser::value::ValueType;
|
||||
use promql_parser::parser::{
|
||||
AggregateExpr, BinModifier, BinaryExpr as PromBinaryExpr, Call, EvalStmt, Expr as PromExpr,
|
||||
Function, FunctionArgs as PromFunctionArgs, LabelModifier, MatrixSelector, NumberLiteral,
|
||||
@@ -761,7 +762,21 @@ impl PromPlanner {
|
||||
Ok(binary_expr)
|
||||
};
|
||||
if is_comparison_op && !should_return_bool {
|
||||
self.filter_on_field_column(join_plan, bin_expr_builder)
|
||||
// PromQL comparison operators without `bool` are filters:
|
||||
// - keep the instant-vector side sample values
|
||||
// - drop samples where the comparison is false
|
||||
//
|
||||
// So we filter on the join result and then project only the side that should
|
||||
// be preserved according to PromQL semantics.
|
||||
let filtered = self.filter_on_field_column(join_plan, bin_expr_builder)?;
|
||||
let (project_table_ref, project_context) =
|
||||
match (lhs.value_type(), rhs.value_type()) {
|
||||
(ValueType::Scalar, ValueType::Vector) => {
|
||||
(&right_table_ref, &right_context)
|
||||
}
|
||||
_ => (&left_table_ref, &left_context),
|
||||
};
|
||||
self.project_binary_join_side(filtered, project_table_ref, project_context)
|
||||
} else {
|
||||
self.projection_for_each_field_column(join_plan, bin_expr_builder)
|
||||
}
|
||||
@@ -769,6 +784,68 @@ impl PromPlanner {
|
||||
}
|
||||
}
|
||||
|
||||
fn project_binary_join_side(
|
||||
&mut self,
|
||||
input: LogicalPlan,
|
||||
table_ref: &TableReference,
|
||||
context: &PromPlannerContext,
|
||||
) -> Result<LogicalPlan> {
|
||||
let schema = input.schema();
|
||||
|
||||
let mut project_exprs =
|
||||
Vec::with_capacity(context.tag_columns.len() + context.field_columns.len() + 2);
|
||||
|
||||
// Project time index from the chosen side.
|
||||
if let Some(time_index_column) = &context.time_index_column {
|
||||
let time_index_col = schema
|
||||
.qualified_field_with_name(Some(table_ref), time_index_column)
|
||||
.context(DataFusionPlanningSnafu)?
|
||||
.into();
|
||||
project_exprs.push(DfExpr::Column(time_index_col));
|
||||
}
|
||||
|
||||
// Project field columns from the chosen side.
|
||||
for field_column in &context.field_columns {
|
||||
let field_col = schema
|
||||
.qualified_field_with_name(Some(table_ref), field_column)
|
||||
.context(DataFusionPlanningSnafu)?
|
||||
.into();
|
||||
project_exprs.push(DfExpr::Column(field_col));
|
||||
}
|
||||
|
||||
// Project tag columns from the chosen side.
|
||||
for tag_column in &context.tag_columns {
|
||||
let tag_col = schema
|
||||
.qualified_field_with_name(Some(table_ref), tag_column)
|
||||
.context(DataFusionPlanningSnafu)?
|
||||
.into();
|
||||
project_exprs.push(DfExpr::Column(tag_col));
|
||||
}
|
||||
|
||||
// Preserve `__tsid` if present, so it can still be used internally downstream. It's
|
||||
// stripped from the final output anyway.
|
||||
if context.use_tsid
|
||||
&& let Ok(tsid_col) =
|
||||
schema.qualified_field_with_name(Some(table_ref), DATA_SCHEMA_TSID_COLUMN_NAME)
|
||||
{
|
||||
project_exprs.push(DfExpr::Column(tsid_col.into()));
|
||||
}
|
||||
|
||||
let plan = LogicalPlanBuilder::from(input)
|
||||
.project(project_exprs)
|
||||
.context(DataFusionPlanningSnafu)?
|
||||
.build()
|
||||
.context(DataFusionPlanningSnafu)?;
|
||||
|
||||
// Update context to reflect the projected schema. Don't keep a table qualifier since
|
||||
// the result is a derived expression.
|
||||
self.ctx = context.clone();
|
||||
self.ctx.table_name = None;
|
||||
self.ctx.schema_name = None;
|
||||
|
||||
Ok(plan)
|
||||
}
|
||||
|
||||
fn prom_number_lit_to_plan(&mut self, number_literal: &NumberLiteral) -> Result<LogicalPlan> {
|
||||
let NumberLiteral { val } = number_literal;
|
||||
self.ctx.time_index_column = Some(DEFAULT_TIME_INDEX_COLUMN.to_string());
|
||||
@@ -2694,8 +2771,15 @@ impl PromPlanner {
|
||||
exprs.push(expr);
|
||||
}
|
||||
|
||||
conjunction(exprs).context(ValueNotFoundSnafu {
|
||||
table: self.table_ref()?.to_quoted_string(),
|
||||
// This error context should be computed lazily: the planner may set `ctx.table_name` to
|
||||
// `None` for derived expressions (e.g. after projecting the LHS of a vector-vector
|
||||
// comparison filter). Eagerly calling `table_ref()?` here can turn a valid plan into
|
||||
// a `TableNameNotFound` error even when `conjunction(exprs)` succeeds.
|
||||
conjunction(exprs).with_context(|| ValueNotFoundSnafu {
|
||||
table: self
|
||||
.table_ref()
|
||||
.map(|t| t.to_quoted_string())
|
||||
.unwrap_or_else(|_| "unknown".to_string()),
|
||||
})
|
||||
}
|
||||
|
||||
|
||||
119
tests/cases/standalone/common/promql/comparison_filter_or.result
Normal file
119
tests/cases/standalone/common/promql/comparison_filter_or.result
Normal file
@@ -0,0 +1,119 @@
|
||||
-- Use metric engine to match real PromQL usage.
|
||||
CREATE TABLE comparison_filter_or_physical (
|
||||
ts TIMESTAMP TIME INDEX,
|
||||
greptime_value DOUBLE
|
||||
) ENGINE=metric WITH ("physical_metric_table" = "");
|
||||
|
||||
Affected Rows: 0
|
||||
|
||||
CREATE TABLE a (
|
||||
k STRING NULL,
|
||||
ts TIMESTAMP NOT NULL,
|
||||
greptime_value DOUBLE NULL,
|
||||
TIME INDEX (ts),
|
||||
PRIMARY KEY (k)
|
||||
) ENGINE=metric WITH (on_physical_table = 'comparison_filter_or_physical');
|
||||
|
||||
Affected Rows: 0
|
||||
|
||||
CREATE TABLE b (
|
||||
k STRING NULL,
|
||||
ts TIMESTAMP NOT NULL,
|
||||
greptime_value DOUBLE NULL,
|
||||
TIME INDEX (ts),
|
||||
PRIMARY KEY (k)
|
||||
) ENGINE=metric WITH (on_physical_table = 'comparison_filter_or_physical');
|
||||
|
||||
Affected Rows: 0
|
||||
|
||||
INSERT INTO a (ts, k, greptime_value)
|
||||
VALUES
|
||||
(0, 'x', 3),
|
||||
(0, 'y', 1),
|
||||
(0, 'z', 5);
|
||||
|
||||
Affected Rows: 3
|
||||
|
||||
INSERT INTO b (ts, k, greptime_value)
|
||||
VALUES
|
||||
(0, 'x', 1),
|
||||
(0, 'y', 2);
|
||||
|
||||
Affected Rows: 2
|
||||
|
||||
-- Regression: vector-vector comparison without `bool` is a filter that keeps LHS values.
|
||||
-- It must not leak RHS columns (e.g. duplicated `ts`), otherwise set operators like `or`
|
||||
-- can fail with ambiguous column references.
|
||||
--
|
||||
-- Also cover the common PromQL pattern `(A > B) or B or A`, which uses `or` to fill missing
|
||||
-- series from RHS (e.g. B missing for some labels, then fallback to A).
|
||||
-- SQLNESS SORT_RESULT 2 1
|
||||
tql eval (0, 0, '1s') (a > b) or b or a;
|
||||
|
||||
+---------------------+----------------+---+
|
||||
| ts | greptime_value | k |
|
||||
+---------------------+----------------+---+
|
||||
| 1970-01-01T00:00:00 | 2.0 | y |
|
||||
| 1970-01-01T00:00:00 | 3.0 | x |
|
||||
| 1970-01-01T00:00:00 | 5.0 | z |
|
||||
+---------------------+----------------+---+
|
||||
|
||||
-- Regression: derived vector expressions may not have a concrete `ctx.table_name`.
|
||||
-- Function planning (which filters empty values) must not require a table name when the
|
||||
-- input plan is valid.
|
||||
-- SQLNESS SORT_RESULT 2 1
|
||||
tql eval (0, 0, '1s') abs(a > b);
|
||||
|
||||
+---------------------+---------------------+---+
|
||||
| ts | abs(greptime_value) | k |
|
||||
+---------------------+---------------------+---+
|
||||
| 1970-01-01T00:00:00 | 3.0 | x |
|
||||
+---------------------+---------------------+---+
|
||||
|
||||
-- Regression: scalar-vector comparison (scalar on LHS) without `bool` is a filter that keeps
|
||||
-- the vector side's samples/labels (not the scalar side).
|
||||
CREATE TABLE m (
|
||||
k STRING NULL,
|
||||
ts TIMESTAMP NOT NULL,
|
||||
greptime_value DOUBLE NULL,
|
||||
TIME INDEX (ts),
|
||||
PRIMARY KEY (k)
|
||||
) ENGINE=metric WITH (on_physical_table = 'comparison_filter_or_physical');
|
||||
|
||||
Affected Rows: 0
|
||||
|
||||
INSERT INTO m (ts, k, greptime_value)
|
||||
VALUES
|
||||
(1000, 'x', 3),
|
||||
(1000, 'y', 0),
|
||||
(2000, 'x', 4),
|
||||
(2000, 'y', 0);
|
||||
|
||||
Affected Rows: 4
|
||||
|
||||
-- SQLNESS SORT_RESULT 3 1
|
||||
tql eval (1, 2, '1s') time() < m;
|
||||
|
||||
+---------------------+----------------+---+
|
||||
| ts | greptime_value | k |
|
||||
+---------------------+----------------+---+
|
||||
| 1970-01-01T00:00:01 | 3.0 | x |
|
||||
| 1970-01-01T00:00:02 | 4.0 | x |
|
||||
+---------------------+----------------+---+
|
||||
|
||||
drop table m;
|
||||
|
||||
Affected Rows: 0
|
||||
|
||||
drop table a;
|
||||
|
||||
Affected Rows: 0
|
||||
|
||||
drop table b;
|
||||
|
||||
Affected Rows: 0
|
||||
|
||||
drop table comparison_filter_or_physical;
|
||||
|
||||
Affected Rows: 0
|
||||
|
||||
@@ -0,0 +1,73 @@
|
||||
-- Use metric engine to match real PromQL usage.
|
||||
CREATE TABLE comparison_filter_or_physical (
|
||||
ts TIMESTAMP TIME INDEX,
|
||||
greptime_value DOUBLE
|
||||
) ENGINE=metric WITH ("physical_metric_table" = "");
|
||||
|
||||
CREATE TABLE a (
|
||||
k STRING NULL,
|
||||
ts TIMESTAMP NOT NULL,
|
||||
greptime_value DOUBLE NULL,
|
||||
TIME INDEX (ts),
|
||||
PRIMARY KEY (k)
|
||||
) ENGINE=metric WITH (on_physical_table = 'comparison_filter_or_physical');
|
||||
|
||||
CREATE TABLE b (
|
||||
k STRING NULL,
|
||||
ts TIMESTAMP NOT NULL,
|
||||
greptime_value DOUBLE NULL,
|
||||
TIME INDEX (ts),
|
||||
PRIMARY KEY (k)
|
||||
) ENGINE=metric WITH (on_physical_table = 'comparison_filter_or_physical');
|
||||
|
||||
INSERT INTO a (ts, k, greptime_value)
|
||||
VALUES
|
||||
(0, 'x', 3),
|
||||
(0, 'y', 1),
|
||||
(0, 'z', 5);
|
||||
|
||||
INSERT INTO b (ts, k, greptime_value)
|
||||
VALUES
|
||||
(0, 'x', 1),
|
||||
(0, 'y', 2);
|
||||
|
||||
-- Regression: vector-vector comparison without `bool` is a filter that keeps LHS values.
|
||||
-- It must not leak RHS columns (e.g. duplicated `ts`), otherwise set operators like `or`
|
||||
-- can fail with ambiguous column references.
|
||||
--
|
||||
-- Also cover the common PromQL pattern `(A > B) or B or A`, which uses `or` to fill missing
|
||||
-- series from RHS (e.g. B missing for some labels, then fallback to A).
|
||||
-- SQLNESS SORT_RESULT 2 1
|
||||
tql eval (0, 0, '1s') (a > b) or b or a;
|
||||
|
||||
-- Regression: derived vector expressions may not have a concrete `ctx.table_name`.
|
||||
-- Function planning (which filters empty values) must not require a table name when the
|
||||
-- input plan is valid.
|
||||
-- SQLNESS SORT_RESULT 2 1
|
||||
tql eval (0, 0, '1s') abs(a > b);
|
||||
|
||||
-- Regression: scalar-vector comparison (scalar on LHS) without `bool` is a filter that keeps
|
||||
-- the vector side's samples/labels (not the scalar side).
|
||||
CREATE TABLE m (
|
||||
k STRING NULL,
|
||||
ts TIMESTAMP NOT NULL,
|
||||
greptime_value DOUBLE NULL,
|
||||
TIME INDEX (ts),
|
||||
PRIMARY KEY (k)
|
||||
) ENGINE=metric WITH (on_physical_table = 'comparison_filter_or_physical');
|
||||
|
||||
INSERT INTO m (ts, k, greptime_value)
|
||||
VALUES
|
||||
(1000, 'x', 3),
|
||||
(1000, 'y', 0),
|
||||
(2000, 'x', 4),
|
||||
(2000, 'y', 0);
|
||||
|
||||
-- SQLNESS SORT_RESULT 3 1
|
||||
tql eval (1, 2, '1s') time() < m;
|
||||
|
||||
drop table m;
|
||||
|
||||
drop table a;
|
||||
drop table b;
|
||||
drop table comparison_filter_or_physical;
|
||||
@@ -141,12 +141,12 @@ Affected Rows: 2
|
||||
-- SQLNESS SORT_RESULT 3 1
|
||||
TQL EVAL (0, 1, '1s') promql_instant_mismatch_nested == promql_instant_mismatch_nested;
|
||||
|
||||
+---------------------+---+--------+---------------------+---+--------+
|
||||
| ts | k | v | ts | k | v |
|
||||
+---------------------+---+--------+---------------------+---+--------+
|
||||
| 1970-01-01T00:00:00 | a | {x: 1} | 1970-01-01T00:00:00 | a | {x: 1} |
|
||||
| 1970-01-01T00:00:01 | a | {x: 2} | 1970-01-01T00:00:01 | a | {x: 2} |
|
||||
+---------------------+---+--------+---------------------+---+--------+
|
||||
+---------------------+--------+---+
|
||||
| ts | v | k |
|
||||
+---------------------+--------+---+
|
||||
| 1970-01-01T00:00:00 | {x: 1} | a |
|
||||
| 1970-01-01T00:00:01 | {x: 2} | a |
|
||||
+---------------------+--------+---+
|
||||
|
||||
DROP TABLE promql_instant_mismatch_nested;
|
||||
|
||||
|
||||
@@ -129,12 +129,12 @@ tql eval (1, 2, '1s') time() + metrics;
|
||||
-- SQLNESS SORT_RESULT 3 1
|
||||
tql eval (1, 2, '1s') time() == metrics;
|
||||
|
||||
+---------------------+----------------------+---------------------+-----+
|
||||
| time | time / Float64(1000) | ts | val |
|
||||
+---------------------+----------------------+---------------------+-----+
|
||||
| 1970-01-01T00:00:01 | 1.0 | 1970-01-01T00:00:01 | 1.0 |
|
||||
| 1970-01-01T00:00:02 | 2.0 | 1970-01-01T00:00:02 | 2.0 |
|
||||
+---------------------+----------------------+---------------------+-----+
|
||||
+---------------------+-----+
|
||||
| ts | val |
|
||||
+---------------------+-----+
|
||||
| 1970-01-01T00:00:01 | 1.0 |
|
||||
| 1970-01-01T00:00:02 | 2.0 |
|
||||
+---------------------+-----+
|
||||
|
||||
-- SQLNESS SORT_RESULT 3 1
|
||||
tql eval (1, 2, '1s') time() == bool metrics;
|
||||
@@ -159,12 +159,12 @@ tql eval (1, 2, '1s') metrics + time();
|
||||
-- SQLNESS SORT_RESULT 3 1
|
||||
tql eval (1, 2, '1s') metrics == time();
|
||||
|
||||
+---------------------+-----+---------------------+----------------------+
|
||||
| ts | val | time | time / Float64(1000) |
|
||||
+---------------------+-----+---------------------+----------------------+
|
||||
| 1970-01-01T00:00:01 | 1.0 | 1970-01-01T00:00:01 | 1.0 |
|
||||
| 1970-01-01T00:00:02 | 2.0 | 1970-01-01T00:00:02 | 2.0 |
|
||||
+---------------------+-----+---------------------+----------------------+
|
||||
+---------------------+-----+
|
||||
| ts | val |
|
||||
+---------------------+-----+
|
||||
| 1970-01-01T00:00:01 | 1.0 |
|
||||
| 1970-01-01T00:00:02 | 2.0 |
|
||||
+---------------------+-----+
|
||||
|
||||
-- SQLNESS SORT_RESULT 3 1
|
||||
tql eval (1, 2, '1s') metrics == bool time();
|
||||
|
||||
@@ -154,13 +154,13 @@ tql eval (0, 60, '30s') timestamp(timestamp_test) + timestamp(timestamp_test2);
|
||||
-- SQLNESS SORT_RESULT 3 1
|
||||
tql eval (0, 60, '30s') timestamp(timestamp_test) == timestamp(timestamp_test2);
|
||||
|
||||
+---------------------+-------+---------------------+-------+
|
||||
| ts | value | ts | value |
|
||||
+---------------------+-------+---------------------+-------+
|
||||
| 1970-01-01T00:00:00 | 0.0 | 1970-01-01T00:00:00 | 0.0 |
|
||||
| 1970-01-01T00:00:30 | 1.0 | 1970-01-01T00:00:30 | 1.0 |
|
||||
| 1970-01-01T00:01:00 | 60.0 | 1970-01-01T00:01:00 | 60.0 |
|
||||
+---------------------+-------+---------------------+-------+
|
||||
+---------------------+-------+
|
||||
| ts | value |
|
||||
+---------------------+-------+
|
||||
| 1970-01-01T00:00:00 | 0.0 |
|
||||
| 1970-01-01T00:00:30 | 1.0 |
|
||||
| 1970-01-01T00:01:00 | 60.0 |
|
||||
+---------------------+-------+
|
||||
|
||||
drop table timestamp_test;
|
||||
|
||||
|
||||
Reference in New Issue
Block a user