From 73c29bb4822c25e89bc7c935b7e44983a76d478a Mon Sep 17 00:00:00 2001 From: Weny Xu Date: Thu, 13 Feb 2025 18:42:25 +0900 Subject: [PATCH] fix(promql): unescape matcher values (#5521) * fix(promql): unescape matcher values * test: add sqlness tests * chore: apply suggestions from CR * feat: use unescaper --- Cargo.lock | 20 ++++++-- src/query/Cargo.toml | 1 + src/query/src/promql/planner.rs | 51 ++++++++++++++++++- src/servers/src/http/prometheus.rs | 2 + .../standalone/common/promql/regex.result | 42 +++++++++++++++ .../cases/standalone/common/promql/regex.sql | 16 ++++++ 6 files changed, 126 insertions(+), 6 deletions(-) create mode 100644 tests/cases/standalone/common/promql/regex.result create mode 100644 tests/cases/standalone/common/promql/regex.sql diff --git a/Cargo.lock b/Cargo.lock index bdb317b202..8d1440395b 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1053,7 +1053,7 @@ dependencies = [ "bitflags 2.6.0", "cexpr", "clang-sys", - "itertools 0.11.0", + "itertools 0.13.0", "proc-macro2", "quote", "regex", @@ -6269,7 +6269,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4979f22fdb869068da03c9f7528f8297c6fd2606bc3a4affe42e6a823fdb8da4" dependencies = [ "cfg-if", - "windows-targets 0.48.5", + "windows-targets 0.52.6", ] [[package]] @@ -8870,7 +8870,7 @@ checksum = "0c1318b19085f08681016926435853bbf7858f9c082d0999b80550ff5d9abe15" dependencies = [ "bytes", "heck 0.5.0", - "itertools 0.11.0", + "itertools 0.13.0", "log", "multimap", "once_cell", @@ -8916,7 +8916,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e9552f850d5f0964a4e4d0bf306459ac29323ddfbae05e35a7c0d35cb0803cc5" dependencies = [ "anyhow", - "itertools 0.11.0", + "itertools 0.13.0", "proc-macro2", "quote", "syn 2.0.96", @@ -9130,6 +9130,7 @@ dependencies = [ "table", "tokio", "tokio-stream", + "unescaper", "uuid", ] @@ -12995,6 +12996,15 @@ version = "0.1.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2896d95c02a80c6d6a5d6e953d479f5ddf2dfdb6a244441010e373ac0fb88971" +[[package]] +name = "unescaper" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c878a167baa8afd137494101a688ef8c67125089ff2249284bd2b5f9bfedb815" +dependencies = [ + "thiserror 1.0.64", +] + [[package]] name = "unicase" version = "2.7.0" @@ -13411,7 +13421,7 @@ version = "0.1.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cf221c93e13a30d793f7645a0e7762c55d169dbb0a49671918a2319d289b10bb" dependencies = [ - "windows-sys 0.48.0", + "windows-sys 0.59.0", ] [[package]] diff --git a/src/query/Cargo.toml b/src/query/Cargo.toml index 286cd90b91..ec6be73a10 100644 --- a/src/query/Cargo.toml +++ b/src/query/Cargo.toml @@ -65,6 +65,7 @@ store-api.workspace = true substrait.workspace = true table.workspace = true tokio.workspace = true +unescaper = "0.1" uuid.workspace = true [dev-dependencies] diff --git a/src/query/src/promql/planner.rs b/src/query/src/promql/planner.rs index 51c836bd54..27c9f36770 100644 --- a/src/query/src/promql/planner.rs +++ b/src/query/src/promql/planner.rs @@ -165,6 +165,14 @@ pub struct PromPlanner { ctx: PromPlannerContext, } +/// Unescapes the value of the matcher +pub fn normalize_matcher(mut matcher: Matcher) -> Matcher { + if let Ok(unescaped_value) = unescaper::unescape(&matcher.value) { + matcher.value = unescaped_value; + } + matcher +} + impl PromPlanner { pub async fn stmt_to_plan( table_provider: DfTableSourceProvider, @@ -738,7 +746,10 @@ impl PromPlanner { let _ = matchers.insert(matcher.clone()); } } - Ok(Matchers::new(matchers.into_iter().collect())) + + Ok(Matchers::new( + matchers.into_iter().map(normalize_matcher).collect(), + )) } async fn selector_to_series_normalize_plan( @@ -3555,4 +3566,42 @@ mod test { assert_eq!(plan.display_indent_schema().to_string(), expected); } + + #[tokio::test] + async fn test_matchers_to_expr() { + let mut eval_stmt = EvalStmt { + expr: PromExpr::NumberLiteral(NumberLiteral { val: 1.0 }), + start: UNIX_EPOCH, + end: UNIX_EPOCH + .checked_add(Duration::from_secs(100_000)) + .unwrap(), + interval: Duration::from_secs(5), + lookback_delta: Duration::from_secs(1), + }; + let case = r#"sum(prometheus_tsdb_head_series{tag_1=~"(10\\.0\\.160\\.237:8080|10\\.0\\.160\\.237:9090)"})"#; + + let prom_expr = parser::parse(case).unwrap(); + eval_stmt.expr = prom_expr; + let table_provider = build_test_table_provider( + &[( + DEFAULT_SCHEMA_NAME.to_string(), + "prometheus_tsdb_head_series".to_string(), + )], + 3, + 3, + ) + .await; + let plan = PromPlanner::stmt_to_plan(table_provider, &eval_stmt, &build_session_state()) + .await + .unwrap(); + let expected = r#"Sort: prometheus_tsdb_head_series.timestamp ASC NULLS LAST [timestamp:Timestamp(Millisecond, None), sum(prometheus_tsdb_head_series.field_0):Float64;N, sum(prometheus_tsdb_head_series.field_1):Float64;N, sum(prometheus_tsdb_head_series.field_2):Float64;N] + Aggregate: groupBy=[[prometheus_tsdb_head_series.timestamp]], aggr=[[sum(prometheus_tsdb_head_series.field_0), sum(prometheus_tsdb_head_series.field_1), sum(prometheus_tsdb_head_series.field_2)]] [timestamp:Timestamp(Millisecond, None), sum(prometheus_tsdb_head_series.field_0):Float64;N, sum(prometheus_tsdb_head_series.field_1):Float64;N, sum(prometheus_tsdb_head_series.field_2):Float64;N] + PromInstantManipulate: range=[0..100000000], lookback=[1000], interval=[5000], time index=[timestamp] [tag_0:Utf8, tag_1:Utf8, tag_2:Utf8, timestamp:Timestamp(Millisecond, None), field_0:Float64;N, field_1:Float64;N, field_2:Float64;N] + PromSeriesNormalize: offset=[0], time index=[timestamp], filter NaN: [false] [tag_0:Utf8, tag_1:Utf8, tag_2:Utf8, timestamp:Timestamp(Millisecond, None), field_0:Float64;N, field_1:Float64;N, field_2:Float64;N] + PromSeriesDivide: tags=["tag_0", "tag_1", "tag_2"] [tag_0:Utf8, tag_1:Utf8, tag_2:Utf8, timestamp:Timestamp(Millisecond, None), field_0:Float64;N, field_1:Float64;N, field_2:Float64;N] + Sort: prometheus_tsdb_head_series.tag_0 DESC NULLS LAST, prometheus_tsdb_head_series.tag_1 DESC NULLS LAST, prometheus_tsdb_head_series.tag_2 DESC NULLS LAST, prometheus_tsdb_head_series.timestamp DESC NULLS LAST [tag_0:Utf8, tag_1:Utf8, tag_2:Utf8, timestamp:Timestamp(Millisecond, None), field_0:Float64;N, field_1:Float64;N, field_2:Float64;N] + Filter: prometheus_tsdb_head_series.tag_1 ~ Utf8("(10\.0\.160\.237:8080|10\.0\.160\.237:9090)") AND prometheus_tsdb_head_series.timestamp >= TimestampMillisecond(-1000, None) AND prometheus_tsdb_head_series.timestamp <= TimestampMillisecond(100001000, None) [tag_0:Utf8, tag_1:Utf8, tag_2:Utf8, timestamp:Timestamp(Millisecond, None), field_0:Float64;N, field_1:Float64;N, field_2:Float64;N] + TableScan: prometheus_tsdb_head_series [tag_0:Utf8, tag_1:Utf8, tag_2:Utf8, timestamp:Timestamp(Millisecond, None), field_0:Float64;N, field_1:Float64;N, field_2:Float64;N]"#; + assert_eq!(plan.display_indent_schema().to_string(), expected); + } } diff --git a/src/servers/src/http/prometheus.rs b/src/servers/src/http/prometheus.rs index 5455591f17..41dfc672ab 100644 --- a/src/servers/src/http/prometheus.rs +++ b/src/servers/src/http/prometheus.rs @@ -39,6 +39,7 @@ use promql_parser::parser::{ UnaryExpr, VectorSelector, }; use query::parser::{PromQuery, DEFAULT_LOOKBACK_STRING}; +use query::promql::planner::normalize_matcher; use serde::de::{self, MapAccess, Visitor}; use serde::{Deserialize, Serialize}; use serde_json::Value; @@ -857,6 +858,7 @@ fn find_metric_name_not_equal_matchers(expr: &PromqlExpr) -> Option matchers .into_iter() .filter(|m| !matches!(m.op, MatchOp::Equal)) + .map(normalize_matcher) .collect::>() }) } diff --git a/tests/cases/standalone/common/promql/regex.result b/tests/cases/standalone/common/promql/regex.result new file mode 100644 index 0000000000..ec4c74d204 --- /dev/null +++ b/tests/cases/standalone/common/promql/regex.result @@ -0,0 +1,42 @@ +CREATE TABLE test ( + ts timestamp(3) time index, + host STRING, + val BIGINT, + PRIMARY KEY(host), +); + +Affected Rows: 0 + +INSERT INTO TABLE test VALUES + (0, '10.0.160.237:8080', 1), + (0, '10.0.160.237:8081', 1); + +Affected Rows: 2 + +SELECT * FROM test; + ++---------------------+-------------------+-----+ +| ts | host | val | ++---------------------+-------------------+-----+ +| 1970-01-01T00:00:00 | 10.0.160.237:8080 | 1 | +| 1970-01-01T00:00:00 | 10.0.160.237:8081 | 1 | ++---------------------+-------------------+-----+ + +TQL EVAL (0, 100, '15s') test{host=~"(10\\.0\\.160\\.237:8080|10\\.0\\.160\\.237:9090)"}; + ++---------------------+-------------------+-----+ +| ts | host | val | ++---------------------+-------------------+-----+ +| 1970-01-01T00:00:00 | 10.0.160.237:8080 | 1 | +| 1970-01-01T00:00:15 | 10.0.160.237:8080 | 1 | +| 1970-01-01T00:00:30 | 10.0.160.237:8080 | 1 | +| 1970-01-01T00:00:45 | 10.0.160.237:8080 | 1 | +| 1970-01-01T00:01:00 | 10.0.160.237:8080 | 1 | +| 1970-01-01T00:01:15 | 10.0.160.237:8080 | 1 | +| 1970-01-01T00:01:30 | 10.0.160.237:8080 | 1 | ++---------------------+-------------------+-----+ + +DROP TABLE test; + +Affected Rows: 0 + diff --git a/tests/cases/standalone/common/promql/regex.sql b/tests/cases/standalone/common/promql/regex.sql new file mode 100644 index 0000000000..71b13a6eb6 --- /dev/null +++ b/tests/cases/standalone/common/promql/regex.sql @@ -0,0 +1,16 @@ +CREATE TABLE test ( + ts timestamp(3) time index, + host STRING, + val BIGINT, + PRIMARY KEY(host), +); + +INSERT INTO TABLE test VALUES + (0, '10.0.160.237:8080', 1), + (0, '10.0.160.237:8081', 1); + +SELECT * FROM test; + +TQL EVAL (0, 100, '15s') test{host=~"(10\\.0\\.160\\.237:8080|10\\.0\\.160\\.237:9090)"}; + +DROP TABLE test;