From 6e8b1ba004bcda10e6838dedf08ea6dcb3206035 Mon Sep 17 00:00:00 2001 From: Ruihang Xia Date: Fri, 14 Feb 2025 20:20:26 -0800 Subject: [PATCH] feat: drop noneffective regex filter (#5544) Signed-off-by: Ruihang Xia --- src/query/src/promql/planner.rs | 16 +++- .../common/select/tql_filter.result | 96 +++++++++++++++++++ .../standalone/common/select/tql_filter.sql | 29 ++++++ 3 files changed, 136 insertions(+), 5 deletions(-) create mode 100644 tests/cases/standalone/common/select/tql_filter.result create mode 100644 tests/cases/standalone/common/select/tql_filter.sql diff --git a/src/query/src/promql/planner.rs b/src/query/src/promql/planner.rs index 1b0f996bc1..09cb27287f 100644 --- a/src/query/src/promql/planner.rs +++ b/src/query/src/promql/planner.rs @@ -981,11 +981,17 @@ impl PromPlanner { let expr = match matcher.op { MatchOp::Equal => col.eq(lit), MatchOp::NotEqual => col.not_eq(lit), - MatchOp::Re(_) => DfExpr::BinaryExpr(BinaryExpr { - left: Box::new(col), - op: Operator::RegexMatch, - right: Box::new(lit), - }), + MatchOp::Re(re) => { + // TODO(ruihang): a more programmatic way to handle this in datafusion + if re.as_str() == ".*" { + continue; + } + DfExpr::BinaryExpr(BinaryExpr { + left: Box::new(col), + op: Operator::RegexMatch, + right: Box::new(lit), + }) + } MatchOp::NotRe(_) => DfExpr::BinaryExpr(BinaryExpr { left: Box::new(col), op: Operator::RegexNotMatch, diff --git a/tests/cases/standalone/common/select/tql_filter.result b/tests/cases/standalone/common/select/tql_filter.result new file mode 100644 index 0000000000..ffd221887a --- /dev/null +++ b/tests/cases/standalone/common/select/tql_filter.result @@ -0,0 +1,96 @@ +create table t1 (a string primary key, b timestamp time index, c double); + +Affected Rows: 0 + +insert into t1 values ("a", 1000, 1.0), ("b", 2000, 2.0), ("c", 3000, 3.0); + +Affected Rows: 3 + +-- SQLNESS REPLACE (metrics.*) REDACTED +-- SQLNESS REPLACE (RoundRobinBatch.*) REDACTED +-- SQLNESS REPLACE (-+) - +-- SQLNESS REPLACE (\s\s+) _ +-- SQLNESS REPLACE (peers.*) REDACTED +-- SQLNESS REPLACE region=\d+\(\d+,\s+\d+\) region=REDACTED +tql analyze (1, 3, '1s') t1{ a = "a" }; + ++-+-+-+ +| stage | node | plan_| ++-+-+-+ +| 0_| 0_|_PromInstantManipulateExec: range=[1000..3000], lookback=[300000], interval=[1000], time index=[b] REDACTED +|_|_|_PromSeriesNormalizeExec: offset=[0], time index=[b], filter NaN: [false] REDACTED +|_|_|_PromSeriesDivideExec: tags=["a"] REDACTED +|_|_|_SortPreservingMergeExec: [a@0 ASC NULLS LAST] REDACTED +|_|_|_SortExec: expr=[a@0 ASC NULLS LAST], preserve_partitioning=[true] REDACTED +|_|_|_MergeScanExec: REDACTED +|_|_|_| +| 1_| 0_|_SortPreservingMergeExec: [a@0 DESC NULLS LAST, b@1 DESC NULLS LAST] REDACTED +|_|_|_SortExec: expr=[a@0 DESC NULLS LAST, b@1 DESC NULLS LAST], preserve_partitioning=[true] REDACTED +|_|_|_CoalesceBatchesExec: target_batch_size=8192 REDACTED +|_|_|_FilterExec: b@1 >= -299000 AND b@1 <= 303000 REDACTED +|_|_|_RepartitionExec: partitioning=REDACTED +|_|_|_SeqScan: region=REDACTED, partition_count=1 (1 memtable ranges, 0 file 0 ranges) REDACTED +|_|_|_| +|_|_| Total rows: 3_| ++-+-+-+ + +-- SQLNESS REPLACE (metrics.*) REDACTED +-- SQLNESS REPLACE (RoundRobinBatch.*) REDACTED +-- SQLNESS REPLACE (-+) - +-- SQLNESS REPLACE (\s\s+) _ +-- SQLNESS REPLACE (peers.*) REDACTED +-- SQLNESS REPLACE region=\d+\(\d+,\s+\d+\) region=REDACTED +tql analyze (1, 3, '1s') t1{ a =~ ".*" }; + ++-+-+-+ +| stage | node | plan_| ++-+-+-+ +| 0_| 0_|_PromInstantManipulateExec: range=[1000..3000], lookback=[300000], interval=[1000], time index=[b] REDACTED +|_|_|_PromSeriesNormalizeExec: offset=[0], time index=[b], filter NaN: [false] REDACTED +|_|_|_PromSeriesDivideExec: tags=["a"] REDACTED +|_|_|_SortPreservingMergeExec: [a@0 ASC NULLS LAST] REDACTED +|_|_|_SortExec: expr=[a@0 ASC NULLS LAST], preserve_partitioning=[true] REDACTED +|_|_|_MergeScanExec: REDACTED +|_|_|_| +| 1_| 0_|_SortPreservingMergeExec: [a@0 DESC NULLS LAST, b@1 DESC NULLS LAST] REDACTED +|_|_|_SortExec: expr=[a@0 DESC NULLS LAST, b@1 DESC NULLS LAST], preserve_partitioning=[true] REDACTED +|_|_|_CoalesceBatchesExec: target_batch_size=8192 REDACTED +|_|_|_FilterExec: b@1 >= -299000 AND b@1 <= 303000 REDACTED +|_|_|_RepartitionExec: partitioning=REDACTED +|_|_|_SeqScan: region=REDACTED, partition_count=1 (1 memtable ranges, 0 file 0 ranges) REDACTED +|_|_|_| +|_|_| Total rows: 6_| ++-+-+-+ + +-- SQLNESS REPLACE (metrics.*) REDACTED +-- SQLNESS REPLACE (RoundRobinBatch.*) REDACTED +-- SQLNESS REPLACE (-+) - +-- SQLNESS REPLACE (\s\s+) _ +-- SQLNESS REPLACE (peers.*) REDACTED +-- SQLNESS REPLACE region=\d+\(\d+,\s+\d+\) region=REDACTED +tql analyze (1, 3, '1s') t1{ a =~ "a.*" }; + ++-+-+-+ +| stage | node | plan_| ++-+-+-+ +| 0_| 0_|_PromInstantManipulateExec: range=[1000..3000], lookback=[300000], interval=[1000], time index=[b] REDACTED +|_|_|_PromSeriesNormalizeExec: offset=[0], time index=[b], filter NaN: [false] REDACTED +|_|_|_PromSeriesDivideExec: tags=["a"] REDACTED +|_|_|_SortPreservingMergeExec: [a@0 ASC NULLS LAST] REDACTED +|_|_|_SortExec: expr=[a@0 ASC NULLS LAST], preserve_partitioning=[true] REDACTED +|_|_|_MergeScanExec: REDACTED +|_|_|_| +| 1_| 0_|_SortPreservingMergeExec: [a@0 DESC NULLS LAST, b@1 DESC NULLS LAST] REDACTED +|_|_|_SortExec: expr=[a@0 DESC NULLS LAST, b@1 DESC NULLS LAST], preserve_partitioning=[true] REDACTED +|_|_|_CoalesceBatchesExec: target_batch_size=8192 REDACTED +|_|_|_FilterExec: a@0 ~ a.* AND b@1 >= -299000 AND b@1 <= 303000 REDACTED +|_|_|_RepartitionExec: partitioning=REDACTED +|_|_|_SeqScan: region=REDACTED, partition_count=1 (1 memtable ranges, 0 file 0 ranges) REDACTED +|_|_|_| +|_|_| Total rows: 3_| ++-+-+-+ + +drop table t1; + +Affected Rows: 0 + diff --git a/tests/cases/standalone/common/select/tql_filter.sql b/tests/cases/standalone/common/select/tql_filter.sql new file mode 100644 index 0000000000..c37512f362 --- /dev/null +++ b/tests/cases/standalone/common/select/tql_filter.sql @@ -0,0 +1,29 @@ +create table t1 (a string primary key, b timestamp time index, c double); + +insert into t1 values ("a", 1000, 1.0), ("b", 2000, 2.0), ("c", 3000, 3.0); + +-- SQLNESS REPLACE (metrics.*) REDACTED +-- SQLNESS REPLACE (RoundRobinBatch.*) REDACTED +-- SQLNESS REPLACE (-+) - +-- SQLNESS REPLACE (\s\s+) _ +-- SQLNESS REPLACE (peers.*) REDACTED +-- SQLNESS REPLACE region=\d+\(\d+,\s+\d+\) region=REDACTED +tql analyze (1, 3, '1s') t1{ a = "a" }; + +-- SQLNESS REPLACE (metrics.*) REDACTED +-- SQLNESS REPLACE (RoundRobinBatch.*) REDACTED +-- SQLNESS REPLACE (-+) - +-- SQLNESS REPLACE (\s\s+) _ +-- SQLNESS REPLACE (peers.*) REDACTED +-- SQLNESS REPLACE region=\d+\(\d+,\s+\d+\) region=REDACTED +tql analyze (1, 3, '1s') t1{ a =~ ".*" }; + +-- SQLNESS REPLACE (metrics.*) REDACTED +-- SQLNESS REPLACE (RoundRobinBatch.*) REDACTED +-- SQLNESS REPLACE (-+) - +-- SQLNESS REPLACE (\s\s+) _ +-- SQLNESS REPLACE (peers.*) REDACTED +-- SQLNESS REPLACE region=\d+\(\d+,\s+\d+\) region=REDACTED +tql analyze (1, 3, '1s') t1{ a =~ "a.*" }; + +drop table t1;