mirror of
https://github.com/GreptimeTeam/greptimedb.git
synced 2026-05-16 13:00:40 +00:00
feat: simplify more regex patterns in promql (#6747)
* feat: simplify more regex patterns in promql Signed-off-by: Ruihang Xia <waynestxia@gmail.com> * add sqlness cases Signed-off-by: Ruihang Xia <waynestxia@gmail.com> * update sqlness case Signed-off-by: Ruihang Xia <waynestxia@gmail.com> --------- Signed-off-by: Ruihang Xia <waynestxia@gmail.com>
This commit is contained in:
@@ -1304,20 +1304,48 @@ impl PromPlanner {
|
||||
MatchOp::NotEqual => col.not_eq(lit),
|
||||
MatchOp::Re(re) => {
|
||||
// TODO(ruihang): a more programmatic way to handle this in datafusion
|
||||
if re.as_str() == ".*" {
|
||||
|
||||
// This is a hack to handle `.+` and `.*`, and is not strictly correct
|
||||
// `.` doesn't match newline (`\n`). Given this is in PromQL context,
|
||||
// most of the time it's fine.
|
||||
if re.as_str() == "^(?:.*)$" {
|
||||
continue;
|
||||
}
|
||||
DfExpr::BinaryExpr(BinaryExpr {
|
||||
left: Box::new(col),
|
||||
op: Operator::RegexMatch,
|
||||
right: Box::new(re.as_str().lit()),
|
||||
})
|
||||
if re.as_str() == "^(?:.+)$" {
|
||||
col.not_eq(DfExpr::Literal(
|
||||
ScalarValue::Utf8(Some(String::new())),
|
||||
None,
|
||||
))
|
||||
} else {
|
||||
DfExpr::BinaryExpr(BinaryExpr {
|
||||
left: Box::new(col),
|
||||
op: Operator::RegexMatch,
|
||||
right: Box::new(DfExpr::Literal(
|
||||
ScalarValue::Utf8(Some(re.as_str().to_string())),
|
||||
None,
|
||||
)),
|
||||
})
|
||||
}
|
||||
}
|
||||
MatchOp::NotRe(re) => {
|
||||
if re.as_str() == "^(?:.*)$" {
|
||||
DfExpr::Literal(ScalarValue::Boolean(Some(false)), None)
|
||||
} else if re.as_str() == "^(?:.+)$" {
|
||||
col.eq(DfExpr::Literal(
|
||||
ScalarValue::Utf8(Some(String::new())),
|
||||
None,
|
||||
))
|
||||
} else {
|
||||
DfExpr::BinaryExpr(BinaryExpr {
|
||||
left: Box::new(col),
|
||||
op: Operator::RegexNotMatch,
|
||||
right: Box::new(DfExpr::Literal(
|
||||
ScalarValue::Utf8(Some(re.as_str().to_string())),
|
||||
None,
|
||||
)),
|
||||
})
|
||||
}
|
||||
}
|
||||
MatchOp::NotRe(re) => DfExpr::BinaryExpr(BinaryExpr {
|
||||
left: Box::new(col),
|
||||
op: Operator::RegexNotMatch,
|
||||
right: Box::new(re.as_str().lit()),
|
||||
}),
|
||||
};
|
||||
exprs.push(expr);
|
||||
}
|
||||
|
||||
@@ -55,6 +55,113 @@ TQL EVAL (0, 100, '15s') test{host=~"(10\\.0\\.160\\.237:8080|10\\.0\\.160\\.237
|
||||
| 1970-01-01T00:01:30 | 10.0.160.237:8080 | 1 |
|
||||
+---------------------+-------------------+-----+
|
||||
|
||||
-- Some radical regex optimization
|
||||
-- SQLNESS REPLACE (metrics.*) REDACTED
|
||||
-- SQLNESS REPLACE (RoundRobinBatch.*) REDACTED
|
||||
-- SQLNESS REPLACE (Hash.*) REDACTED
|
||||
-- SQLNESS REPLACE (-+) -
|
||||
-- SQLNESS REPLACE (\s\s+) _
|
||||
-- SQLNESS REPLACE (peers.*) REDACTED
|
||||
-- SQLNESS REPLACE region=\d+\(\d+,\s+\d+\) region=REDACTED
|
||||
TQL ANALYZE VERBOSE (0, 0, '1s') test{host=~".*"};
|
||||
|
||||
+-+-+-+
|
||||
| stage | node | plan_|
|
||||
+-+-+-+
|
||||
| 0_| 0_|_CooperativeExec REDACTED
|
||||
|_|_|_MergeScanExec: REDACTED
|
||||
|_|_|_|
|
||||
| 1_| 0_|_PromInstantManipulateExec: range=[0..0], lookback=[300000], interval=[1000], time index=[ts] REDACTED
|
||||
|_|_|_PromSeriesDivideExec: tags=["host"] REDACTED
|
||||
|_|_|_SortExec: expr=[host@1 ASC, ts@0 ASC], preserve_partitioning=[true] REDACTED
|
||||
|_|_|_CoalesceBatchesExec: target_batch_size=8192 REDACTED
|
||||
|_|_|_RepartitionExec: partitioning=REDACTED
|
||||
|_|_|_CoalescePartitionsExec REDACTED
|
||||
|_|_|_CooperativeExec REDACTED
|
||||
|_|_|_SeriesScan: region=REDACTED, {"partition_count":{"count":1, "mem_ranges":1, "files":0, "file_ranges":0}, "distribution":"PerSeries", "projection": ["ts", "host", "val"], "filters": ["ts >= TimestampMillisecond(-300000, None)", "ts <= TimestampMillisecond(300000, None)"], "REDACTED
|
||||
|_|_|_|
|
||||
|_|_| Total rows: 2_|
|
||||
+-+-+-+
|
||||
|
||||
-- SQLNESS REPLACE (metrics.*) REDACTED
|
||||
-- SQLNESS REPLACE (RoundRobinBatch.*) REDACTED
|
||||
-- SQLNESS REPLACE (Hash.*) REDACTED
|
||||
-- SQLNESS REPLACE (-+) -
|
||||
-- SQLNESS REPLACE (\s\s+) _
|
||||
-- SQLNESS REPLACE (peers.*) REDACTED
|
||||
-- SQLNESS REPLACE region=\d+\(\d+,\s+\d+\) region=REDACTED
|
||||
TQL ANALYZE VERBOSE (0, 0, '1s') test{host=~".+"};
|
||||
|
||||
+-+-+-+
|
||||
| stage | node | plan_|
|
||||
+-+-+-+
|
||||
| 0_| 0_|_CooperativeExec REDACTED
|
||||
|_|_|_MergeScanExec: REDACTED
|
||||
|_|_|_|
|
||||
| 1_| 0_|_PromInstantManipulateExec: range=[0..0], lookback=[300000], interval=[1000], time index=[ts] REDACTED
|
||||
|_|_|_PromSeriesDivideExec: tags=["host"] REDACTED
|
||||
|_|_|_SortExec: expr=[host@1 ASC, ts@0 ASC], preserve_partitioning=[true] REDACTED
|
||||
|_|_|_CoalesceBatchesExec: target_batch_size=8192 REDACTED
|
||||
|_|_|_RepartitionExec: partitioning=REDACTED
|
||||
|_|_|_CoalescePartitionsExec REDACTED
|
||||
|_|_|_CooperativeExec REDACTED
|
||||
|_|_|_SeriesScan: region=REDACTED, {"partition_count":{"count":1, "mem_ranges":1, "files":0, "file_ranges":0}, "distribution":"PerSeries", "projection": ["ts", "host", "val"], "filters": ["host != Utf8(\"\")", "ts >= TimestampMillisecond(-300000, None)", "ts <= TimestampMillisecond(300000, None)"], "REDACTED
|
||||
|_|_|_|
|
||||
|_|_| Total rows: 2_|
|
||||
+-+-+-+
|
||||
|
||||
-- SQLNESS REPLACE (metrics.*) REDACTED
|
||||
-- SQLNESS REPLACE (RoundRobinBatch.*) REDACTED
|
||||
-- SQLNESS REPLACE (Hash.*) REDACTED
|
||||
-- SQLNESS REPLACE (-+) -
|
||||
-- SQLNESS REPLACE (\s\s+) _
|
||||
-- SQLNESS REPLACE (peers.*) REDACTED
|
||||
-- SQLNESS REPLACE region=\d+\(\d+,\s+\d+\) region=REDACTED
|
||||
TQL ANALYZE VERBOSE (0, 0, '1s') test{host!~".*"};
|
||||
|
||||
+-+-+-+
|
||||
| stage | node | plan_|
|
||||
+-+-+-+
|
||||
| 0_| 0_|_CooperativeExec REDACTED
|
||||
|_|_|_MergeScanExec: REDACTED
|
||||
|_|_|_|
|
||||
| 1_| 0_|_PromInstantManipulateExec: range=[0..0], lookback=[300000], interval=[1000], time index=[ts] REDACTED
|
||||
|_|_|_CoalesceBatchesExec: target_batch_size=8192 REDACTED
|
||||
|_|_|_RepartitionExec: partitioning=REDACTED
|
||||
|_|_|_PromSeriesDivideExec: tags=["host"] REDACTED
|
||||
|_|_|_SortExec: expr=[host@1 ASC, ts@0 ASC], preserve_partitioning=[false] REDACTED
|
||||
|_|_|_EmptyExec REDACTED
|
||||
|_|_|_|
|
||||
|_|_| Total rows: 0_|
|
||||
+-+-+-+
|
||||
|
||||
-- SQLNESS REPLACE (metrics.*) REDACTED
|
||||
-- SQLNESS REPLACE (RoundRobinBatch.*) REDACTED
|
||||
-- SQLNESS REPLACE (Hash.*) REDACTED
|
||||
-- SQLNESS REPLACE (-+) -
|
||||
-- SQLNESS REPLACE (\s\s+) _
|
||||
-- SQLNESS REPLACE (peers.*) REDACTED
|
||||
-- SQLNESS REPLACE region=\d+\(\d+,\s+\d+\) region=REDACTED
|
||||
TQL ANALYZE VERBOSE (0, 0, '1s') test{host!~".+"};
|
||||
|
||||
+-+-+-+
|
||||
| stage | node | plan_|
|
||||
+-+-+-+
|
||||
| 0_| 0_|_CooperativeExec REDACTED
|
||||
|_|_|_MergeScanExec: REDACTED
|
||||
|_|_|_|
|
||||
| 1_| 0_|_PromInstantManipulateExec: range=[0..0], lookback=[300000], interval=[1000], time index=[ts] REDACTED
|
||||
|_|_|_PromSeriesDivideExec: tags=["host"] REDACTED
|
||||
|_|_|_SortExec: expr=[host@1 ASC, ts@0 ASC], preserve_partitioning=[true] REDACTED
|
||||
|_|_|_CoalesceBatchesExec: target_batch_size=8192 REDACTED
|
||||
|_|_|_RepartitionExec: partitioning=REDACTED
|
||||
|_|_|_CoalescePartitionsExec REDACTED
|
||||
|_|_|_CooperativeExec REDACTED
|
||||
|_|_|_SeriesScan: region=REDACTED, {"partition_count":{"count":1, "mem_ranges":1, "files":0, "file_ranges":0}, "distribution":"PerSeries", "projection": ["ts", "host", "val"], "filters": ["host = Utf8(\"\")", "ts >= TimestampMillisecond(-300000, None)", "ts <= TimestampMillisecond(300000, None)"], "REDACTED
|
||||
|_|_|_|
|
||||
|_|_| Total rows: 0_|
|
||||
+-+-+-+
|
||||
|
||||
DROP TABLE test;
|
||||
|
||||
Affected Rows: 0
|
||||
|
||||
@@ -17,4 +17,42 @@ TQL EVAL (0, 100, '15s') test{host=~"10\\.0\\.160\\.237:808|nonexistence"};
|
||||
|
||||
TQL EVAL (0, 100, '15s') test{host=~"(10\\.0\\.160\\.237:8080|10\\.0\\.160\\.237:9090)"};
|
||||
|
||||
-- Some radical regex optimization
|
||||
|
||||
-- SQLNESS REPLACE (metrics.*) REDACTED
|
||||
-- SQLNESS REPLACE (RoundRobinBatch.*) REDACTED
|
||||
-- SQLNESS REPLACE (Hash.*) REDACTED
|
||||
-- SQLNESS REPLACE (-+) -
|
||||
-- SQLNESS REPLACE (\s\s+) _
|
||||
-- SQLNESS REPLACE (peers.*) REDACTED
|
||||
-- SQLNESS REPLACE region=\d+\(\d+,\s+\d+\) region=REDACTED
|
||||
TQL ANALYZE VERBOSE (0, 0, '1s') test{host=~".*"};
|
||||
|
||||
-- SQLNESS REPLACE (metrics.*) REDACTED
|
||||
-- SQLNESS REPLACE (RoundRobinBatch.*) REDACTED
|
||||
-- SQLNESS REPLACE (Hash.*) REDACTED
|
||||
-- SQLNESS REPLACE (-+) -
|
||||
-- SQLNESS REPLACE (\s\s+) _
|
||||
-- SQLNESS REPLACE (peers.*) REDACTED
|
||||
-- SQLNESS REPLACE region=\d+\(\d+,\s+\d+\) region=REDACTED
|
||||
TQL ANALYZE VERBOSE (0, 0, '1s') test{host=~".+"};
|
||||
|
||||
-- SQLNESS REPLACE (metrics.*) REDACTED
|
||||
-- SQLNESS REPLACE (RoundRobinBatch.*) REDACTED
|
||||
-- SQLNESS REPLACE (Hash.*) REDACTED
|
||||
-- SQLNESS REPLACE (-+) -
|
||||
-- SQLNESS REPLACE (\s\s+) _
|
||||
-- SQLNESS REPLACE (peers.*) REDACTED
|
||||
-- SQLNESS REPLACE region=\d+\(\d+,\s+\d+\) region=REDACTED
|
||||
TQL ANALYZE VERBOSE (0, 0, '1s') test{host!~".*"};
|
||||
|
||||
-- SQLNESS REPLACE (metrics.*) REDACTED
|
||||
-- SQLNESS REPLACE (RoundRobinBatch.*) REDACTED
|
||||
-- SQLNESS REPLACE (Hash.*) REDACTED
|
||||
-- SQLNESS REPLACE (-+) -
|
||||
-- SQLNESS REPLACE (\s\s+) _
|
||||
-- SQLNESS REPLACE (peers.*) REDACTED
|
||||
-- SQLNESS REPLACE region=\d+\(\d+,\s+\d+\) region=REDACTED
|
||||
TQL ANALYZE VERBOSE (0, 0, '1s') test{host!~".+"};
|
||||
|
||||
DROP TABLE test;
|
||||
|
||||
Reference in New Issue
Block a user