From 4ef038d0984bb317657542bb47b274b9a3830b70 Mon Sep 17 00:00:00 2001 From: Ruihang Xia Date: Mon, 17 Feb 2025 10:43:50 -0800 Subject: [PATCH] fix: correct promql behavior on nonexistent columns (#5547) * Revert "fix(promql): ignore filters for non-existent labels (#5519)" This reverts commit 33a2485f54a35161b0f85da0cd423a61d5739fbc. * reimplement Signed-off-by: Ruihang Xia * state safety Signed-off-by: Ruihang Xia --------- Signed-off-by: Ruihang Xia --- src/query/src/promql/planner.rs | 87 ++++++++----------- .../standalone/common/promql/label.result | 64 ++++++++++++++ .../cases/standalone/common/promql/label.sql | 28 ++++++ .../promql/non_existent_matchers.result | 44 ---------- .../common/promql/non_existent_matchers.sql | 17 ---- 5 files changed, 126 insertions(+), 114 deletions(-) delete mode 100644 tests/cases/standalone/common/promql/non_existent_matchers.result delete mode 100644 tests/cases/standalone/common/promql/non_existent_matchers.sql diff --git a/src/query/src/promql/planner.rs b/src/query/src/promql/planner.rs index 09cb27287f..1104dfa8c6 100644 --- a/src/query/src/promql/planner.rs +++ b/src/query/src/promql/planner.rs @@ -44,7 +44,6 @@ use datafusion_expr::utils::conjunction; use datafusion_expr::SortExpr; use datatypes::arrow::datatypes::{DataType as ArrowDataType, TimeUnit as ArrowTimeUnit}; use datatypes::data_type::ConcreteDataType; -use datatypes::schema::Schema; use itertools::Itertools; use promql::extension_plan::{ build_special_time_expr, EmptyMetric, HistogramFold, InstantManipulate, Millisecond, @@ -759,31 +758,26 @@ impl PromPlanner { label_matchers: Matchers, is_range_selector: bool, ) -> Result { + // make table scan plan + let table_ref = self.table_ref()?; + let mut table_scan = self.create_table_scan_plan(table_ref.clone()).await?; + let table_schema = table_scan.schema(); + // make filter exprs let offset_duration = match offset { Some(Offset::Pos(duration)) => duration.as_millis() as Millisecond, Some(Offset::Neg(duration)) => -(duration.as_millis() as Millisecond), None => 0, }; - - let time_index_filter = self.build_time_index_filter(offset_duration)?; - // make table scan with filter exprs - let table_ref = self.table_ref()?; - - let moved_label_matchers = label_matchers.clone(); - let mut table_scan = self - .create_table_scan_plan(table_ref.clone(), |schema| { - let mut scan_filters = - PromPlanner::matchers_to_expr(moved_label_matchers, |name| { - schema.column_index_by_name(name).is_some() - })?; - if let Some(time_index_filter) = time_index_filter { - scan_filters.push(time_index_filter); - } - - Ok(scan_filters) - }) - .await?; + let mut scan_filters = self.matchers_to_expr(label_matchers.clone(), table_schema)?; + if let Some(time_index_filter) = self.build_time_index_filter(offset_duration)? { + scan_filters.push(time_index_filter); + } + table_scan = LogicalPlanBuilder::from(table_scan) + .filter(conjunction(scan_filters).unwrap()) // Safety: `scan_filters` is not empty. + .context(DataFusionPlanningSnafu)? + .build() + .context(DataFusionPlanningSnafu)?; // make a projection plan if there is any `__field__` matcher if let Some(field_matchers) = &self.ctx.field_column_matcher { @@ -963,20 +957,22 @@ impl PromPlanner { } } - /// Convert [`Matchers`] to [`DfExpr`]s. - /// - /// This method will filter out the matchers that don't match the filter function. - fn matchers_to_expr(label_matchers: Matchers, filter: F) -> Result> - where - F: Fn(&str) -> bool, - { + // TODO(ruihang): ignore `MetricNameLabel` (`__name__`) matcher + fn matchers_to_expr( + &self, + label_matchers: Matchers, + table_schema: &DFSchemaRef, + ) -> Result> { let mut exprs = Vec::with_capacity(label_matchers.matchers.len()); for matcher in label_matchers.matchers { - // ignores the matchers that don't match the filter function - if !filter(&matcher.name) { - continue; - } - let col = DfExpr::Column(Column::from_name(matcher.name)); + let col = if table_schema + .field_with_unqualified_name(&matcher.name) + .is_err() + { + DfExpr::Literal(ScalarValue::Utf8(Some(String::new()))).alias(matcher.name) + } else { + DfExpr::Column(Column::from_name(matcher.name)) + }; let lit = DfExpr::Literal(ScalarValue::Utf8(Some(matcher.value))); let expr = match matcher.op { MatchOp::Equal => col.eq(lit), @@ -1076,21 +1072,14 @@ impl PromPlanner { /// /// # Panic /// If the filter is empty - async fn create_table_scan_plan( - &mut self, - table_ref: TableReference, - filter_builder: F, - ) -> Result - where - F: FnOnce(&Schema) -> Result>, - { + async fn create_table_scan_plan(&mut self, table_ref: TableReference) -> Result { let provider = self .table_provider .resolve_table(table_ref.clone()) .await .context(CatalogSnafu)?; - let schema = provider + let is_time_index_ms = provider .as_any() .downcast_ref::() .context(UnknownTableSnafu)? @@ -1099,9 +1088,7 @@ impl PromPlanner { .downcast_ref::() .context(UnknownTableSnafu)? .table() - .schema(); - - let is_time_index_ms = schema + .schema() .timestamp_column() .with_context(|| TimeIndexNotFoundSnafu { table: table_ref.to_quoted_string(), @@ -1109,7 +1096,6 @@ impl PromPlanner { .data_type == ConcreteDataType::timestamp_millisecond_datatype(); - let filter = filter_builder(schema.as_ref())?; let mut scan_plan = LogicalPlanBuilder::scan(table_ref.clone(), provider, None) .context(DataFusionPlanningSnafu)? .build() @@ -1146,14 +1132,9 @@ impl PromPlanner { .context(DataFusionPlanningSnafu)?; } - let mut builder = LogicalPlanBuilder::from(scan_plan); - if !filter.is_empty() { - // Safety: filter is not empty, checked above - builder = builder - .filter(conjunction(filter).unwrap()) - .context(DataFusionPlanningSnafu)?; - } - let result = builder.build().context(DataFusionPlanningSnafu)?; + let result = LogicalPlanBuilder::from(scan_plan) + .build() + .context(DataFusionPlanningSnafu)?; Ok(result) } diff --git a/tests/cases/standalone/common/promql/label.result b/tests/cases/standalone/common/promql/label.result index 42ba33ca92..535545b4f5 100644 --- a/tests/cases/standalone/common/promql/label.result +++ b/tests/cases/standalone/common/promql/label.result @@ -197,3 +197,67 @@ DROP TABLE test; Affected Rows: 0 +CREATE TABLE test ( + ts timestamp(3) time index, + host STRING, + val BIGINT, + PRIMARY KEY(host), + ); + +Affected Rows: 0 + +INSERT INTO TABLE test VALUES + (0, 'host1', 1), + (0, 'host2', 2); + +Affected Rows: 2 + +SELECT * FROM test; + ++---------------------+-------+-----+ +| ts | host | val | ++---------------------+-------+-----+ +| 1970-01-01T00:00:00 | host1 | 1 | +| 1970-01-01T00:00:00 | host2 | 2 | ++---------------------+-------+-----+ + +-- test the non-existent matchers -- +TQL EVAL (0, 1, '5s') test{job=~"host1|host3"}; + +++ +++ + +-- SQLNESS SORT_RESULT 3 1 +TQL EVAL (0, 1, '5s') test{job=~".*"}; + ++---------------------+-------+-----+ +| ts | host | val | ++---------------------+-------+-----+ +| 1970-01-01T00:00:00 | host1 | 1 | +| 1970-01-01T00:00:00 | host2 | 2 | ++---------------------+-------+-----+ + +TQL EVAL (0, 1, '5s') test{job=~".+"}; + +++ +++ + +-- SQLNESS SORT_RESULT 3 1 +TQL EVAL (0, 1, '5s') test{job=""}; + ++---------------------+-------+-----+ +| ts | host | val | ++---------------------+-------+-----+ +| 1970-01-01T00:00:00 | host1 | 1 | +| 1970-01-01T00:00:00 | host2 | 2 | ++---------------------+-------+-----+ + +TQL EVAL (0, 1, '5s') test{job!=""}; + +++ +++ + +DROP TABLE test; + +Affected Rows: 0 + diff --git a/tests/cases/standalone/common/promql/label.sql b/tests/cases/standalone/common/promql/label.sql index 3b9058c27e..33b31975e8 100644 --- a/tests/cases/standalone/common/promql/label.sql +++ b/tests/cases/standalone/common/promql/label.sql @@ -53,3 +53,31 @@ TQL EVAL (0, 15, '5s') label_replace(test{host="host2"}, "idc", "$2", "idc", "(. TQL EVAL (0, 15, '5s') label_replace(test{host="host2"}, "idc", "", "", ""); DROP TABLE test; + +CREATE TABLE test ( + ts timestamp(3) time index, + host STRING, + val BIGINT, + PRIMARY KEY(host), + ); + +INSERT INTO TABLE test VALUES + (0, 'host1', 1), + (0, 'host2', 2); + +SELECT * FROM test; + +-- test the non-existent matchers -- +TQL EVAL (0, 1, '5s') test{job=~"host1|host3"}; + +-- SQLNESS SORT_RESULT 3 1 +TQL EVAL (0, 1, '5s') test{job=~".*"}; + +TQL EVAL (0, 1, '5s') test{job=~".+"}; + +-- SQLNESS SORT_RESULT 3 1 +TQL EVAL (0, 1, '5s') test{job=""}; + +TQL EVAL (0, 1, '5s') test{job!=""}; + +DROP TABLE test; diff --git a/tests/cases/standalone/common/promql/non_existent_matchers.result b/tests/cases/standalone/common/promql/non_existent_matchers.result deleted file mode 100644 index 10a6bf4704..0000000000 --- a/tests/cases/standalone/common/promql/non_existent_matchers.result +++ /dev/null @@ -1,44 +0,0 @@ -CREATE TABLE test ( - ts timestamp(3) time index, - host STRING, - val BIGINT, - PRIMARY KEY(host), - ); - -Affected Rows: 0 - -INSERT INTO TABLE test VALUES - (0, 'host1', 1), - (0, 'host2', 2); - -Affected Rows: 2 - -SELECT * FROM test; - -+---------------------+-------+-----+ -| ts | host | val | -+---------------------+-------+-----+ -| 1970-01-01T00:00:00 | host1 | 1 | -| 1970-01-01T00:00:00 | host2 | 2 | -+---------------------+-------+-----+ - --- test the non-existent matchers -- -TQL EVAL (0, 15, '5s') test{job=~"host1|host3"}; - -+---------------------+-------+-----+ -| ts | host | val | -+---------------------+-------+-----+ -| 1970-01-01T00:00:00 | host1 | 1 | -| 1970-01-01T00:00:05 | host1 | 1 | -| 1970-01-01T00:00:10 | host1 | 1 | -| 1970-01-01T00:00:15 | host1 | 1 | -| 1970-01-01T00:00:00 | host2 | 2 | -| 1970-01-01T00:00:05 | host2 | 2 | -| 1970-01-01T00:00:10 | host2 | 2 | -| 1970-01-01T00:00:15 | host2 | 2 | -+---------------------+-------+-----+ - -DROP TABLE test; - -Affected Rows: 0 - diff --git a/tests/cases/standalone/common/promql/non_existent_matchers.sql b/tests/cases/standalone/common/promql/non_existent_matchers.sql deleted file mode 100644 index ce04207e26..0000000000 --- a/tests/cases/standalone/common/promql/non_existent_matchers.sql +++ /dev/null @@ -1,17 +0,0 @@ -CREATE TABLE test ( - ts timestamp(3) time index, - host STRING, - val BIGINT, - PRIMARY KEY(host), - ); - -INSERT INTO TABLE test VALUES - (0, 'host1', 1), - (0, 'host2', 2); - -SELECT * FROM test; - --- test the non-existent matchers -- -TQL EVAL (0, 15, '5s') test{job=~"host1|host3"}; - -DROP TABLE test;