fix: two label_replace different from promql (#6720)

* fix: two label_replace different from promql

Signed-off-by: yihong0618 <zouzou0208@gmail.com>

* fix: address comments

Signed-off-by: yihong0618 <zouzou0208@gmail.com>
Co-authored-by: Jiachun Feng <jiachun_feng@proton.me>

* fix: another address

Signed-off-by: yihong0618 <zouzou0208@gmail.com>

---------

Signed-off-by: yihong0618 <zouzou0208@gmail.com>
Co-authored-by: Jiachun Feng <jiachun_feng@proton.me>
This commit is contained in:
yihong
2025-08-13 14:27:49 +08:00
committed by GitHub
parent 5eb491df12
commit f0bec4940f
4 changed files with 71 additions and 7 deletions

View File

@@ -207,6 +207,20 @@ pub enum Error {
#[snafu(implicit)]
location: Location,
},
#[snafu(display("Invalid regular expression in label_replace(): {}", regex))]
InvalidRegularExpression {
regex: String,
#[snafu(implicit)]
location: Location,
},
#[snafu(display("Invalid destination label name in label_replace(): {}", label_name))]
InvalidDestinationLabelName {
label_name: String,
#[snafu(implicit)]
location: Location,
},
}
impl ErrorExt for Error {
@@ -228,7 +242,9 @@ impl ErrorExt for Error {
| UnexpectedPlanExpr { .. }
| UnsupportedMatcherOp { .. }
| SameLabelSet { .. }
| TimestampOutOfRange { .. } => StatusCode::InvalidArguments,
| TimestampOutOfRange { .. }
| InvalidRegularExpression { .. }
| InvalidDestinationLabelName { .. } => StatusCode::InvalidArguments,
UnknownTable { .. } => StatusCode::Internal,

View File

@@ -50,6 +50,7 @@ use datafusion_expr::{col, lit, ExprSchemable, SortExpr};
use datatypes::arrow::datatypes::{DataType as ArrowDataType, TimeUnit as ArrowTimeUnit};
use datatypes::data_type::ConcreteDataType;
use itertools::Itertools;
use once_cell::sync::Lazy;
use promql::extension_plan::{
build_special_time_expr, Absent, EmptyMetric, HistogramFold, InstantManipulate, Millisecond,
RangeManipulate, ScalarCalculate, SeriesDivide, SeriesNormalize, UnionDistinctOn,
@@ -67,6 +68,7 @@ use promql_parser::parser::{
NumberLiteral, Offset, ParenExpr, StringLiteral, SubqueryExpr, UnaryExpr,
VectorMatchCardinality, VectorSelector,
};
use regex::{self, Regex};
use snafu::{ensure, OptionExt, ResultExt};
use store_api::metric_engine_consts::{
DATA_SCHEMA_TABLE_ID_COLUMN_NAME, DATA_SCHEMA_TSID_COLUMN_NAME,
@@ -75,12 +77,13 @@ use table::table::adapter::DfTableProviderAdapter;
use crate::promql::error::{
CatalogSnafu, ColumnNotFoundSnafu, CombineTableColumnMismatchSnafu, DataFusionPlanningSnafu,
ExpectRangeSelectorSnafu, FunctionInvalidArgumentSnafu, InvalidTimeRangeSnafu,
MultiFieldsNotSupportedSnafu, MultipleMetricMatchersSnafu, MultipleVectorSnafu,
NoMetricMatcherSnafu, PromqlPlanNodeSnafu, Result, SameLabelSetSnafu, TableNameNotFoundSnafu,
TimeIndexNotFoundSnafu, UnexpectedPlanExprSnafu, UnexpectedTokenSnafu, UnknownTableSnafu,
UnsupportedExprSnafu, UnsupportedMatcherOpSnafu, UnsupportedVectorMatchSnafu,
ValueNotFoundSnafu, ZeroRangeSelectorSnafu,
ExpectRangeSelectorSnafu, FunctionInvalidArgumentSnafu, InvalidDestinationLabelNameSnafu,
InvalidRegularExpressionSnafu, InvalidTimeRangeSnafu, MultiFieldsNotSupportedSnafu,
MultipleMetricMatchersSnafu, MultipleVectorSnafu, NoMetricMatcherSnafu, PromqlPlanNodeSnafu,
Result, SameLabelSetSnafu, TableNameNotFoundSnafu, TimeIndexNotFoundSnafu,
UnexpectedPlanExprSnafu, UnexpectedTokenSnafu, UnknownTableSnafu, UnsupportedExprSnafu,
UnsupportedMatcherOpSnafu, UnsupportedVectorMatchSnafu, ValueNotFoundSnafu,
ZeroRangeSelectorSnafu,
};
use crate::query_engine::QueryEngineState;
@@ -97,6 +100,11 @@ const SPECIAL_VECTOR_FUNCTION: &str = "vector";
/// `le` column for conventional histogram.
const LE_COLUMN_NAME: &str = "le";
/// Static regex for validating label names according to Prometheus specification.
/// Label names must match the regex: [a-zA-Z_][a-zA-Z0-9_]*
static LABEL_NAME_REGEX: Lazy<Regex> =
Lazy::new(|| Regex::new(r"^[a-zA-Z_][a-zA-Z0-9_]*$").unwrap());
const DEFAULT_TIME_INDEX_COLUMN: &str = "time";
/// default value column name for empty metric
@@ -1910,6 +1918,22 @@ impl PromPlanner {
Ok((exprs, new_tags))
}
/// Validate label name according to Prometheus specification.
/// Label names must match the regex: [a-zA-Z_][a-zA-Z0-9_]*
/// Additionally, label names starting with double underscores are reserved for internal use.
fn validate_label_name(label_name: &str) -> Result<()> {
// Check if label name starts with double underscores (reserved)
if label_name.starts_with("__") {
return InvalidDestinationLabelNameSnafu { label_name }.fail();
}
// Check if label name matches the required pattern
if !LABEL_NAME_REGEX.is_match(label_name) {
return InvalidDestinationLabelNameSnafu { label_name }.fail();
}
Ok(())
}
/// Build expr for `label_replace` function
fn build_regexp_replace_label_expr(
&self,
@@ -1924,6 +1948,9 @@ impl PromPlanner {
}
.fail()?,
};
// Validate the destination label name
Self::validate_label_name(&dst_label)?;
let replacement = match other_input_exprs.pop_front() {
Some(DfExpr::Literal(ScalarValue::Utf8(Some(r)))) => r,
other => UnexpectedPlanExprSnafu {
@@ -1947,6 +1974,15 @@ impl PromPlanner {
.fail()?,
};
// Validate the regex before using it
// doc: https://prometheus.io/docs/prometheus/latest/querying/functions/#label_replace
regex::Regex::new(&regex).map_err(|_| {
InvalidRegularExpressionSnafu {
regex: regex.clone(),
}
.build()
})?;
// If the src_label exists and regex is empty, keep everything unchanged.
if self.ctx.tag_columns.contains(&src_label) && regex.is_empty() {
return Ok(None);

View File

@@ -305,6 +305,14 @@ TQL EVAL(0, 15, '5s') {__name__="test",host="host1"} * label_replace(vector(1),
Error: 3000(PlanQuery), Internal error during building DataFusion plan: No field named addr. Valid fields are test.ts, test.host, test.idc, test.val.
TQL EVAL label_replace(demo_num_cpus, "~invalid", "", "src", "(.*)");
Error: 1004(InvalidArguments), Invalid destination label name in label_replace(): ~invalid
TQL EVAL label_replace(demo_num_cpus, "job", "value", "src", "(.*");
Error: 1004(InvalidArguments), Invalid regular expression in label_replace(): (.*
-- Issue 6438 --
-- SQLNESS SORT_RESULT 3 1
TQL EVAL (0, 15, '5s') label_replace(test{host="host1"}, "new_idc", "idc99", "idc", "idc2.*") == 1.0;

View File

@@ -85,6 +85,10 @@ TQL EVAL(0, 15, '5s') {__name__="test",host="host1"} * label_replace(vector(1),
-- SQLNESS SORT_RESULT 3 1
TQL EVAL(0, 15, '5s') {__name__="test",host="host1"} * label_replace(vector(1), "addr", "host1", "instance", "");
TQL EVAL label_replace(demo_num_cpus, "~invalid", "", "src", "(.*)");
TQL EVAL label_replace(demo_num_cpus, "job", "value", "src", "(.*");
-- Issue 6438 --
-- SQLNESS SORT_RESULT 3 1
TQL EVAL (0, 15, '5s') label_replace(test{host="host1"}, "new_idc", "idc99", "idc", "idc2.*") == 1.0;