diff --git a/src/query/src/promql/error.rs b/src/query/src/promql/error.rs index 27bf9bdb15..dba55c8df8 100644 --- a/src/query/src/promql/error.rs +++ b/src/query/src/promql/error.rs @@ -207,6 +207,20 @@ pub enum Error { #[snafu(implicit)] location: Location, }, + + #[snafu(display("Invalid regular expression in label_replace(): {}", regex))] + InvalidRegularExpression { + regex: String, + #[snafu(implicit)] + location: Location, + }, + + #[snafu(display("Invalid destination label name in label_replace(): {}", label_name))] + InvalidDestinationLabelName { + label_name: String, + #[snafu(implicit)] + location: Location, + }, } impl ErrorExt for Error { @@ -228,7 +242,9 @@ impl ErrorExt for Error { | UnexpectedPlanExpr { .. } | UnsupportedMatcherOp { .. } | SameLabelSet { .. } - | TimestampOutOfRange { .. } => StatusCode::InvalidArguments, + | TimestampOutOfRange { .. } + | InvalidRegularExpression { .. } + | InvalidDestinationLabelName { .. } => StatusCode::InvalidArguments, UnknownTable { .. } => StatusCode::Internal, diff --git a/src/query/src/promql/planner.rs b/src/query/src/promql/planner.rs index 0149c951af..11a4b35767 100644 --- a/src/query/src/promql/planner.rs +++ b/src/query/src/promql/planner.rs @@ -50,6 +50,7 @@ use datafusion_expr::{col, lit, ExprSchemable, SortExpr}; use datatypes::arrow::datatypes::{DataType as ArrowDataType, TimeUnit as ArrowTimeUnit}; use datatypes::data_type::ConcreteDataType; use itertools::Itertools; +use once_cell::sync::Lazy; use promql::extension_plan::{ build_special_time_expr, Absent, EmptyMetric, HistogramFold, InstantManipulate, Millisecond, RangeManipulate, ScalarCalculate, SeriesDivide, SeriesNormalize, UnionDistinctOn, @@ -67,6 +68,7 @@ use promql_parser::parser::{ NumberLiteral, Offset, ParenExpr, StringLiteral, SubqueryExpr, UnaryExpr, VectorMatchCardinality, VectorSelector, }; +use regex::{self, Regex}; use snafu::{ensure, OptionExt, ResultExt}; use store_api::metric_engine_consts::{ DATA_SCHEMA_TABLE_ID_COLUMN_NAME, DATA_SCHEMA_TSID_COLUMN_NAME, @@ -75,12 +77,13 @@ use table::table::adapter::DfTableProviderAdapter; use crate::promql::error::{ CatalogSnafu, ColumnNotFoundSnafu, CombineTableColumnMismatchSnafu, DataFusionPlanningSnafu, - ExpectRangeSelectorSnafu, FunctionInvalidArgumentSnafu, InvalidTimeRangeSnafu, - MultiFieldsNotSupportedSnafu, MultipleMetricMatchersSnafu, MultipleVectorSnafu, - NoMetricMatcherSnafu, PromqlPlanNodeSnafu, Result, SameLabelSetSnafu, TableNameNotFoundSnafu, - TimeIndexNotFoundSnafu, UnexpectedPlanExprSnafu, UnexpectedTokenSnafu, UnknownTableSnafu, - UnsupportedExprSnafu, UnsupportedMatcherOpSnafu, UnsupportedVectorMatchSnafu, - ValueNotFoundSnafu, ZeroRangeSelectorSnafu, + ExpectRangeSelectorSnafu, FunctionInvalidArgumentSnafu, InvalidDestinationLabelNameSnafu, + InvalidRegularExpressionSnafu, InvalidTimeRangeSnafu, MultiFieldsNotSupportedSnafu, + MultipleMetricMatchersSnafu, MultipleVectorSnafu, NoMetricMatcherSnafu, PromqlPlanNodeSnafu, + Result, SameLabelSetSnafu, TableNameNotFoundSnafu, TimeIndexNotFoundSnafu, + UnexpectedPlanExprSnafu, UnexpectedTokenSnafu, UnknownTableSnafu, UnsupportedExprSnafu, + UnsupportedMatcherOpSnafu, UnsupportedVectorMatchSnafu, ValueNotFoundSnafu, + ZeroRangeSelectorSnafu, }; use crate::query_engine::QueryEngineState; @@ -97,6 +100,11 @@ const SPECIAL_VECTOR_FUNCTION: &str = "vector"; /// `le` column for conventional histogram. const LE_COLUMN_NAME: &str = "le"; +/// Static regex for validating label names according to Prometheus specification. +/// Label names must match the regex: [a-zA-Z_][a-zA-Z0-9_]* +static LABEL_NAME_REGEX: Lazy = + Lazy::new(|| Regex::new(r"^[a-zA-Z_][a-zA-Z0-9_]*$").unwrap()); + const DEFAULT_TIME_INDEX_COLUMN: &str = "time"; /// default value column name for empty metric @@ -1910,6 +1918,22 @@ impl PromPlanner { Ok((exprs, new_tags)) } + /// Validate label name according to Prometheus specification. + /// Label names must match the regex: [a-zA-Z_][a-zA-Z0-9_]* + /// Additionally, label names starting with double underscores are reserved for internal use. + fn validate_label_name(label_name: &str) -> Result<()> { + // Check if label name starts with double underscores (reserved) + if label_name.starts_with("__") { + return InvalidDestinationLabelNameSnafu { label_name }.fail(); + } + // Check if label name matches the required pattern + if !LABEL_NAME_REGEX.is_match(label_name) { + return InvalidDestinationLabelNameSnafu { label_name }.fail(); + } + + Ok(()) + } + /// Build expr for `label_replace` function fn build_regexp_replace_label_expr( &self, @@ -1924,6 +1948,9 @@ impl PromPlanner { } .fail()?, }; + + // Validate the destination label name + Self::validate_label_name(&dst_label)?; let replacement = match other_input_exprs.pop_front() { Some(DfExpr::Literal(ScalarValue::Utf8(Some(r)))) => r, other => UnexpectedPlanExprSnafu { @@ -1947,6 +1974,15 @@ impl PromPlanner { .fail()?, }; + // Validate the regex before using it + // doc: https://prometheus.io/docs/prometheus/latest/querying/functions/#label_replace + regex::Regex::new(®ex).map_err(|_| { + InvalidRegularExpressionSnafu { + regex: regex.clone(), + } + .build() + })?; + // If the src_label exists and regex is empty, keep everything unchanged. if self.ctx.tag_columns.contains(&src_label) && regex.is_empty() { return Ok(None); diff --git a/tests/cases/standalone/common/promql/label.result b/tests/cases/standalone/common/promql/label.result index 3de3d7384a..1bdc37b5aa 100644 --- a/tests/cases/standalone/common/promql/label.result +++ b/tests/cases/standalone/common/promql/label.result @@ -305,6 +305,14 @@ TQL EVAL(0, 15, '5s') {__name__="test",host="host1"} * label_replace(vector(1), Error: 3000(PlanQuery), Internal error during building DataFusion plan: No field named addr. Valid fields are test.ts, test.host, test.idc, test.val. +TQL EVAL label_replace(demo_num_cpus, "~invalid", "", "src", "(.*)"); + +Error: 1004(InvalidArguments), Invalid destination label name in label_replace(): ~invalid + +TQL EVAL label_replace(demo_num_cpus, "job", "value", "src", "(.*"); + +Error: 1004(InvalidArguments), Invalid regular expression in label_replace(): (.* + -- Issue 6438 -- -- SQLNESS SORT_RESULT 3 1 TQL EVAL (0, 15, '5s') label_replace(test{host="host1"}, "new_idc", "idc99", "idc", "idc2.*") == 1.0; diff --git a/tests/cases/standalone/common/promql/label.sql b/tests/cases/standalone/common/promql/label.sql index 4e56936617..3fb20792e0 100644 --- a/tests/cases/standalone/common/promql/label.sql +++ b/tests/cases/standalone/common/promql/label.sql @@ -85,6 +85,10 @@ TQL EVAL(0, 15, '5s') {__name__="test",host="host1"} * label_replace(vector(1), -- SQLNESS SORT_RESULT 3 1 TQL EVAL(0, 15, '5s') {__name__="test",host="host1"} * label_replace(vector(1), "addr", "host1", "instance", ""); +TQL EVAL label_replace(demo_num_cpus, "~invalid", "", "src", "(.*)"); + +TQL EVAL label_replace(demo_num_cpus, "job", "value", "src", "(.*"); + -- Issue 6438 -- -- SQLNESS SORT_RESULT 3 1 TQL EVAL (0, 15, '5s') label_replace(test{host="host1"}, "new_idc", "idc99", "idc", "idc2.*") == 1.0;