feat: simplify more regex patterns in promql (#6747)

* feat: simplify more regex patterns in promql

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* add sqlness cases

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* update sqlness case

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

---------

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>
This commit is contained in:
Ruihang Xia
2025-08-20 11:51:10 -07:00
committed by GitHub
parent 474a689309
commit 7e573e497c
3 changed files with 184 additions and 11 deletions

View File

@@ -1304,20 +1304,48 @@ impl PromPlanner {
MatchOp::NotEqual => col.not_eq(lit),
MatchOp::Re(re) => {
// TODO(ruihang): a more programmatic way to handle this in datafusion
if re.as_str() == ".*" {
// This is a hack to handle `.+` and `.*`, and is not strictly correct
// `.` doesn't match newline (`\n`). Given this is in PromQL context,
// most of the time it's fine.
if re.as_str() == "^(?:.*)$" {
continue;
}
DfExpr::BinaryExpr(BinaryExpr {
left: Box::new(col),
op: Operator::RegexMatch,
right: Box::new(re.as_str().lit()),
})
if re.as_str() == "^(?:.+)$" {
col.not_eq(DfExpr::Literal(
ScalarValue::Utf8(Some(String::new())),
None,
))
} else {
DfExpr::BinaryExpr(BinaryExpr {
left: Box::new(col),
op: Operator::RegexMatch,
right: Box::new(DfExpr::Literal(
ScalarValue::Utf8(Some(re.as_str().to_string())),
None,
)),
})
}
}
MatchOp::NotRe(re) => {
if re.as_str() == "^(?:.*)$" {
DfExpr::Literal(ScalarValue::Boolean(Some(false)), None)
} else if re.as_str() == "^(?:.+)$" {
col.eq(DfExpr::Literal(
ScalarValue::Utf8(Some(String::new())),
None,
))
} else {
DfExpr::BinaryExpr(BinaryExpr {
left: Box::new(col),
op: Operator::RegexNotMatch,
right: Box::new(DfExpr::Literal(
ScalarValue::Utf8(Some(re.as_str().to_string())),
None,
)),
})
}
}
MatchOp::NotRe(re) => DfExpr::BinaryExpr(BinaryExpr {
left: Box::new(col),
op: Operator::RegexNotMatch,
right: Box::new(re.as_str().lit()),
}),
};
exprs.push(expr);
}

View File

@@ -55,6 +55,113 @@ TQL EVAL (0, 100, '15s') test{host=~"(10\\.0\\.160\\.237:8080|10\\.0\\.160\\.237
| 1970-01-01T00:01:30 | 10.0.160.237:8080 | 1 |
+---------------------+-------------------+-----+
-- Some radical regex optimization
-- SQLNESS REPLACE (metrics.*) REDACTED
-- SQLNESS REPLACE (RoundRobinBatch.*) REDACTED
-- SQLNESS REPLACE (Hash.*) REDACTED
-- SQLNESS REPLACE (-+) -
-- SQLNESS REPLACE (\s\s+) _
-- SQLNESS REPLACE (peers.*) REDACTED
-- SQLNESS REPLACE region=\d+\(\d+,\s+\d+\) region=REDACTED
TQL ANALYZE VERBOSE (0, 0, '1s') test{host=~".*"};
+-+-+-+
| stage | node | plan_|
+-+-+-+
| 0_| 0_|_CooperativeExec REDACTED
|_|_|_MergeScanExec: REDACTED
|_|_|_|
| 1_| 0_|_PromInstantManipulateExec: range=[0..0], lookback=[300000], interval=[1000], time index=[ts] REDACTED
|_|_|_PromSeriesDivideExec: tags=["host"] REDACTED
|_|_|_SortExec: expr=[host@1 ASC, ts@0 ASC], preserve_partitioning=[true] REDACTED
|_|_|_CoalesceBatchesExec: target_batch_size=8192 REDACTED
|_|_|_RepartitionExec: partitioning=REDACTED
|_|_|_CoalescePartitionsExec REDACTED
|_|_|_CooperativeExec REDACTED
|_|_|_SeriesScan: region=REDACTED, {"partition_count":{"count":1, "mem_ranges":1, "files":0, "file_ranges":0}, "distribution":"PerSeries", "projection": ["ts", "host", "val"], "filters": ["ts >= TimestampMillisecond(-300000, None)", "ts <= TimestampMillisecond(300000, None)"], "REDACTED
|_|_|_|
|_|_| Total rows: 2_|
+-+-+-+
-- SQLNESS REPLACE (metrics.*) REDACTED
-- SQLNESS REPLACE (RoundRobinBatch.*) REDACTED
-- SQLNESS REPLACE (Hash.*) REDACTED
-- SQLNESS REPLACE (-+) -
-- SQLNESS REPLACE (\s\s+) _
-- SQLNESS REPLACE (peers.*) REDACTED
-- SQLNESS REPLACE region=\d+\(\d+,\s+\d+\) region=REDACTED
TQL ANALYZE VERBOSE (0, 0, '1s') test{host=~".+"};
+-+-+-+
| stage | node | plan_|
+-+-+-+
| 0_| 0_|_CooperativeExec REDACTED
|_|_|_MergeScanExec: REDACTED
|_|_|_|
| 1_| 0_|_PromInstantManipulateExec: range=[0..0], lookback=[300000], interval=[1000], time index=[ts] REDACTED
|_|_|_PromSeriesDivideExec: tags=["host"] REDACTED
|_|_|_SortExec: expr=[host@1 ASC, ts@0 ASC], preserve_partitioning=[true] REDACTED
|_|_|_CoalesceBatchesExec: target_batch_size=8192 REDACTED
|_|_|_RepartitionExec: partitioning=REDACTED
|_|_|_CoalescePartitionsExec REDACTED
|_|_|_CooperativeExec REDACTED
|_|_|_SeriesScan: region=REDACTED, {"partition_count":{"count":1, "mem_ranges":1, "files":0, "file_ranges":0}, "distribution":"PerSeries", "projection": ["ts", "host", "val"], "filters": ["host != Utf8(\"\")", "ts >= TimestampMillisecond(-300000, None)", "ts <= TimestampMillisecond(300000, None)"], "REDACTED
|_|_|_|
|_|_| Total rows: 2_|
+-+-+-+
-- SQLNESS REPLACE (metrics.*) REDACTED
-- SQLNESS REPLACE (RoundRobinBatch.*) REDACTED
-- SQLNESS REPLACE (Hash.*) REDACTED
-- SQLNESS REPLACE (-+) -
-- SQLNESS REPLACE (\s\s+) _
-- SQLNESS REPLACE (peers.*) REDACTED
-- SQLNESS REPLACE region=\d+\(\d+,\s+\d+\) region=REDACTED
TQL ANALYZE VERBOSE (0, 0, '1s') test{host!~".*"};
+-+-+-+
| stage | node | plan_|
+-+-+-+
| 0_| 0_|_CooperativeExec REDACTED
|_|_|_MergeScanExec: REDACTED
|_|_|_|
| 1_| 0_|_PromInstantManipulateExec: range=[0..0], lookback=[300000], interval=[1000], time index=[ts] REDACTED
|_|_|_CoalesceBatchesExec: target_batch_size=8192 REDACTED
|_|_|_RepartitionExec: partitioning=REDACTED
|_|_|_PromSeriesDivideExec: tags=["host"] REDACTED
|_|_|_SortExec: expr=[host@1 ASC, ts@0 ASC], preserve_partitioning=[false] REDACTED
|_|_|_EmptyExec REDACTED
|_|_|_|
|_|_| Total rows: 0_|
+-+-+-+
-- SQLNESS REPLACE (metrics.*) REDACTED
-- SQLNESS REPLACE (RoundRobinBatch.*) REDACTED
-- SQLNESS REPLACE (Hash.*) REDACTED
-- SQLNESS REPLACE (-+) -
-- SQLNESS REPLACE (\s\s+) _
-- SQLNESS REPLACE (peers.*) REDACTED
-- SQLNESS REPLACE region=\d+\(\d+,\s+\d+\) region=REDACTED
TQL ANALYZE VERBOSE (0, 0, '1s') test{host!~".+"};
+-+-+-+
| stage | node | plan_|
+-+-+-+
| 0_| 0_|_CooperativeExec REDACTED
|_|_|_MergeScanExec: REDACTED
|_|_|_|
| 1_| 0_|_PromInstantManipulateExec: range=[0..0], lookback=[300000], interval=[1000], time index=[ts] REDACTED
|_|_|_PromSeriesDivideExec: tags=["host"] REDACTED
|_|_|_SortExec: expr=[host@1 ASC, ts@0 ASC], preserve_partitioning=[true] REDACTED
|_|_|_CoalesceBatchesExec: target_batch_size=8192 REDACTED
|_|_|_RepartitionExec: partitioning=REDACTED
|_|_|_CoalescePartitionsExec REDACTED
|_|_|_CooperativeExec REDACTED
|_|_|_SeriesScan: region=REDACTED, {"partition_count":{"count":1, "mem_ranges":1, "files":0, "file_ranges":0}, "distribution":"PerSeries", "projection": ["ts", "host", "val"], "filters": ["host = Utf8(\"\")", "ts >= TimestampMillisecond(-300000, None)", "ts <= TimestampMillisecond(300000, None)"], "REDACTED
|_|_|_|
|_|_| Total rows: 0_|
+-+-+-+
DROP TABLE test;
Affected Rows: 0

View File

@@ -17,4 +17,42 @@ TQL EVAL (0, 100, '15s') test{host=~"10\\.0\\.160\\.237:808|nonexistence"};
TQL EVAL (0, 100, '15s') test{host=~"(10\\.0\\.160\\.237:8080|10\\.0\\.160\\.237:9090)"};
-- Some radical regex optimization
-- SQLNESS REPLACE (metrics.*) REDACTED
-- SQLNESS REPLACE (RoundRobinBatch.*) REDACTED
-- SQLNESS REPLACE (Hash.*) REDACTED
-- SQLNESS REPLACE (-+) -
-- SQLNESS REPLACE (\s\s+) _
-- SQLNESS REPLACE (peers.*) REDACTED
-- SQLNESS REPLACE region=\d+\(\d+,\s+\d+\) region=REDACTED
TQL ANALYZE VERBOSE (0, 0, '1s') test{host=~".*"};
-- SQLNESS REPLACE (metrics.*) REDACTED
-- SQLNESS REPLACE (RoundRobinBatch.*) REDACTED
-- SQLNESS REPLACE (Hash.*) REDACTED
-- SQLNESS REPLACE (-+) -
-- SQLNESS REPLACE (\s\s+) _
-- SQLNESS REPLACE (peers.*) REDACTED
-- SQLNESS REPLACE region=\d+\(\d+,\s+\d+\) region=REDACTED
TQL ANALYZE VERBOSE (0, 0, '1s') test{host=~".+"};
-- SQLNESS REPLACE (metrics.*) REDACTED
-- SQLNESS REPLACE (RoundRobinBatch.*) REDACTED
-- SQLNESS REPLACE (Hash.*) REDACTED
-- SQLNESS REPLACE (-+) -
-- SQLNESS REPLACE (\s\s+) _
-- SQLNESS REPLACE (peers.*) REDACTED
-- SQLNESS REPLACE region=\d+\(\d+,\s+\d+\) region=REDACTED
TQL ANALYZE VERBOSE (0, 0, '1s') test{host!~".*"};
-- SQLNESS REPLACE (metrics.*) REDACTED
-- SQLNESS REPLACE (RoundRobinBatch.*) REDACTED
-- SQLNESS REPLACE (Hash.*) REDACTED
-- SQLNESS REPLACE (-+) -
-- SQLNESS REPLACE (\s\s+) _
-- SQLNESS REPLACE (peers.*) REDACTED
-- SQLNESS REPLACE region=\d+\(\d+,\s+\d+\) region=REDACTED
TQL ANALYZE VERBOSE (0, 0, '1s') test{host!~".+"};
DROP TABLE test;