fix(promql): unescape matcher values (#5521)

* fix(promql): unescape matcher values

* test: add sqlness tests

* chore: apply suggestions from CR

* feat: use unescaper
This commit is contained in:
Weny Xu
2025-02-13 18:42:25 +09:00
committed by GitHub
parent 198ee87675
commit 73c29bb482
6 changed files with 126 additions and 6 deletions

20
Cargo.lock generated
View File

@@ -1053,7 +1053,7 @@ dependencies = [
"bitflags 2.6.0",
"cexpr",
"clang-sys",
"itertools 0.11.0",
"itertools 0.13.0",
"proc-macro2",
"quote",
"regex",
@@ -6269,7 +6269,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4979f22fdb869068da03c9f7528f8297c6fd2606bc3a4affe42e6a823fdb8da4"
dependencies = [
"cfg-if",
"windows-targets 0.48.5",
"windows-targets 0.52.6",
]
[[package]]
@@ -8870,7 +8870,7 @@ checksum = "0c1318b19085f08681016926435853bbf7858f9c082d0999b80550ff5d9abe15"
dependencies = [
"bytes",
"heck 0.5.0",
"itertools 0.11.0",
"itertools 0.13.0",
"log",
"multimap",
"once_cell",
@@ -8916,7 +8916,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e9552f850d5f0964a4e4d0bf306459ac29323ddfbae05e35a7c0d35cb0803cc5"
dependencies = [
"anyhow",
"itertools 0.11.0",
"itertools 0.13.0",
"proc-macro2",
"quote",
"syn 2.0.96",
@@ -9130,6 +9130,7 @@ dependencies = [
"table",
"tokio",
"tokio-stream",
"unescaper",
"uuid",
]
@@ -12995,6 +12996,15 @@ version = "0.1.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2896d95c02a80c6d6a5d6e953d479f5ddf2dfdb6a244441010e373ac0fb88971"
[[package]]
name = "unescaper"
version = "0.1.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c878a167baa8afd137494101a688ef8c67125089ff2249284bd2b5f9bfedb815"
dependencies = [
"thiserror 1.0.64",
]
[[package]]
name = "unicase"
version = "2.7.0"
@@ -13411,7 +13421,7 @@ version = "0.1.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cf221c93e13a30d793f7645a0e7762c55d169dbb0a49671918a2319d289b10bb"
dependencies = [
"windows-sys 0.48.0",
"windows-sys 0.59.0",
]
[[package]]

View File

@@ -65,6 +65,7 @@ store-api.workspace = true
substrait.workspace = true
table.workspace = true
tokio.workspace = true
unescaper = "0.1"
uuid.workspace = true
[dev-dependencies]

View File

@@ -165,6 +165,14 @@ pub struct PromPlanner {
ctx: PromPlannerContext,
}
/// Unescapes the value of the matcher
pub fn normalize_matcher(mut matcher: Matcher) -> Matcher {
if let Ok(unescaped_value) = unescaper::unescape(&matcher.value) {
matcher.value = unescaped_value;
}
matcher
}
impl PromPlanner {
pub async fn stmt_to_plan(
table_provider: DfTableSourceProvider,
@@ -738,7 +746,10 @@ impl PromPlanner {
let _ = matchers.insert(matcher.clone());
}
}
Ok(Matchers::new(matchers.into_iter().collect()))
Ok(Matchers::new(
matchers.into_iter().map(normalize_matcher).collect(),
))
}
async fn selector_to_series_normalize_plan(
@@ -3555,4 +3566,42 @@ mod test {
assert_eq!(plan.display_indent_schema().to_string(), expected);
}
#[tokio::test]
async fn test_matchers_to_expr() {
let mut eval_stmt = EvalStmt {
expr: PromExpr::NumberLiteral(NumberLiteral { val: 1.0 }),
start: UNIX_EPOCH,
end: UNIX_EPOCH
.checked_add(Duration::from_secs(100_000))
.unwrap(),
interval: Duration::from_secs(5),
lookback_delta: Duration::from_secs(1),
};
let case = r#"sum(prometheus_tsdb_head_series{tag_1=~"(10\\.0\\.160\\.237:8080|10\\.0\\.160\\.237:9090)"})"#;
let prom_expr = parser::parse(case).unwrap();
eval_stmt.expr = prom_expr;
let table_provider = build_test_table_provider(
&[(
DEFAULT_SCHEMA_NAME.to_string(),
"prometheus_tsdb_head_series".to_string(),
)],
3,
3,
)
.await;
let plan = PromPlanner::stmt_to_plan(table_provider, &eval_stmt, &build_session_state())
.await
.unwrap();
let expected = r#"Sort: prometheus_tsdb_head_series.timestamp ASC NULLS LAST [timestamp:Timestamp(Millisecond, None), sum(prometheus_tsdb_head_series.field_0):Float64;N, sum(prometheus_tsdb_head_series.field_1):Float64;N, sum(prometheus_tsdb_head_series.field_2):Float64;N]
Aggregate: groupBy=[[prometheus_tsdb_head_series.timestamp]], aggr=[[sum(prometheus_tsdb_head_series.field_0), sum(prometheus_tsdb_head_series.field_1), sum(prometheus_tsdb_head_series.field_2)]] [timestamp:Timestamp(Millisecond, None), sum(prometheus_tsdb_head_series.field_0):Float64;N, sum(prometheus_tsdb_head_series.field_1):Float64;N, sum(prometheus_tsdb_head_series.field_2):Float64;N]
PromInstantManipulate: range=[0..100000000], lookback=[1000], interval=[5000], time index=[timestamp] [tag_0:Utf8, tag_1:Utf8, tag_2:Utf8, timestamp:Timestamp(Millisecond, None), field_0:Float64;N, field_1:Float64;N, field_2:Float64;N]
PromSeriesNormalize: offset=[0], time index=[timestamp], filter NaN: [false] [tag_0:Utf8, tag_1:Utf8, tag_2:Utf8, timestamp:Timestamp(Millisecond, None), field_0:Float64;N, field_1:Float64;N, field_2:Float64;N]
PromSeriesDivide: tags=["tag_0", "tag_1", "tag_2"] [tag_0:Utf8, tag_1:Utf8, tag_2:Utf8, timestamp:Timestamp(Millisecond, None), field_0:Float64;N, field_1:Float64;N, field_2:Float64;N]
Sort: prometheus_tsdb_head_series.tag_0 DESC NULLS LAST, prometheus_tsdb_head_series.tag_1 DESC NULLS LAST, prometheus_tsdb_head_series.tag_2 DESC NULLS LAST, prometheus_tsdb_head_series.timestamp DESC NULLS LAST [tag_0:Utf8, tag_1:Utf8, tag_2:Utf8, timestamp:Timestamp(Millisecond, None), field_0:Float64;N, field_1:Float64;N, field_2:Float64;N]
Filter: prometheus_tsdb_head_series.tag_1 ~ Utf8("(10\.0\.160\.237:8080|10\.0\.160\.237:9090)") AND prometheus_tsdb_head_series.timestamp >= TimestampMillisecond(-1000, None) AND prometheus_tsdb_head_series.timestamp <= TimestampMillisecond(100001000, None) [tag_0:Utf8, tag_1:Utf8, tag_2:Utf8, timestamp:Timestamp(Millisecond, None), field_0:Float64;N, field_1:Float64;N, field_2:Float64;N]
TableScan: prometheus_tsdb_head_series [tag_0:Utf8, tag_1:Utf8, tag_2:Utf8, timestamp:Timestamp(Millisecond, None), field_0:Float64;N, field_1:Float64;N, field_2:Float64;N]"#;
assert_eq!(plan.display_indent_schema().to_string(), expected);
}
}

View File

@@ -39,6 +39,7 @@ use promql_parser::parser::{
UnaryExpr, VectorSelector,
};
use query::parser::{PromQuery, DEFAULT_LOOKBACK_STRING};
use query::promql::planner::normalize_matcher;
use serde::de::{self, MapAccess, Visitor};
use serde::{Deserialize, Serialize};
use serde_json::Value;
@@ -857,6 +858,7 @@ fn find_metric_name_not_equal_matchers(expr: &PromqlExpr) -> Option<Vec<Matcher>
matchers
.into_iter()
.filter(|m| !matches!(m.op, MatchOp::Equal))
.map(normalize_matcher)
.collect::<Vec<_>>()
})
}

View File

@@ -0,0 +1,42 @@
CREATE TABLE test (
ts timestamp(3) time index,
host STRING,
val BIGINT,
PRIMARY KEY(host),
);
Affected Rows: 0
INSERT INTO TABLE test VALUES
(0, '10.0.160.237:8080', 1),
(0, '10.0.160.237:8081', 1);
Affected Rows: 2
SELECT * FROM test;
+---------------------+-------------------+-----+
| ts | host | val |
+---------------------+-------------------+-----+
| 1970-01-01T00:00:00 | 10.0.160.237:8080 | 1 |
| 1970-01-01T00:00:00 | 10.0.160.237:8081 | 1 |
+---------------------+-------------------+-----+
TQL EVAL (0, 100, '15s') test{host=~"(10\\.0\\.160\\.237:8080|10\\.0\\.160\\.237:9090)"};
+---------------------+-------------------+-----+
| ts | host | val |
+---------------------+-------------------+-----+
| 1970-01-01T00:00:00 | 10.0.160.237:8080 | 1 |
| 1970-01-01T00:00:15 | 10.0.160.237:8080 | 1 |
| 1970-01-01T00:00:30 | 10.0.160.237:8080 | 1 |
| 1970-01-01T00:00:45 | 10.0.160.237:8080 | 1 |
| 1970-01-01T00:01:00 | 10.0.160.237:8080 | 1 |
| 1970-01-01T00:01:15 | 10.0.160.237:8080 | 1 |
| 1970-01-01T00:01:30 | 10.0.160.237:8080 | 1 |
+---------------------+-------------------+-----+
DROP TABLE test;
Affected Rows: 0

View File

@@ -0,0 +1,16 @@
CREATE TABLE test (
ts timestamp(3) time index,
host STRING,
val BIGINT,
PRIMARY KEY(host),
);
INSERT INTO TABLE test VALUES
(0, '10.0.160.237:8080', 1),
(0, '10.0.160.237:8081', 1);
SELECT * FROM test;
TQL EVAL (0, 100, '15s') test{host=~"(10\\.0\\.160\\.237:8080|10\\.0\\.160\\.237:9090)"};
DROP TABLE test;