feat: support @@ (AtAt) operator for term matching (#5902)

* update dep and sqlness case

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* implement transcribe rule

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* more tests

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* update sqlness result

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

---------

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>
This commit is contained in:
Ruihang Xia
2025-04-15 19:05:17 +08:00
committed by GitHub
parent 6700c0762d
commit dcf1a486f6
8 changed files with 726 additions and 33 deletions

48
Cargo.lock generated
View File

@@ -2897,7 +2897,7 @@ checksum = "e8566979429cf69b49a5c740c60791108e86440e8be149bbea4fe54d2c32d6e2"
[[package]]
name = "datafusion"
version = "45.0.0"
source = "git+https://github.com/apache/datafusion.git?rev=8ebed674dd71f8a466f658626877944cd16a4375#8ebed674dd71f8a466f658626877944cd16a4375"
source = "git+https://github.com/waynexia/arrow-datafusion.git?rev=5bbedc6704162afb03478f56ffb629405a4e1220#5bbedc6704162afb03478f56ffb629405a4e1220"
dependencies = [
"arrow",
"arrow-array",
@@ -2948,7 +2948,7 @@ dependencies = [
[[package]]
name = "datafusion-catalog"
version = "45.0.0"
source = "git+https://github.com/apache/datafusion.git?rev=8ebed674dd71f8a466f658626877944cd16a4375#8ebed674dd71f8a466f658626877944cd16a4375"
source = "git+https://github.com/waynexia/arrow-datafusion.git?rev=5bbedc6704162afb03478f56ffb629405a4e1220#5bbedc6704162afb03478f56ffb629405a4e1220"
dependencies = [
"arrow",
"async-trait",
@@ -2968,7 +2968,7 @@ dependencies = [
[[package]]
name = "datafusion-catalog-listing"
version = "45.0.0"
source = "git+https://github.com/apache/datafusion.git?rev=8ebed674dd71f8a466f658626877944cd16a4375#8ebed674dd71f8a466f658626877944cd16a4375"
source = "git+https://github.com/waynexia/arrow-datafusion.git?rev=5bbedc6704162afb03478f56ffb629405a4e1220#5bbedc6704162afb03478f56ffb629405a4e1220"
dependencies = [
"arrow",
"arrow-schema",
@@ -2991,7 +2991,7 @@ dependencies = [
[[package]]
name = "datafusion-common"
version = "45.0.0"
source = "git+https://github.com/apache/datafusion.git?rev=8ebed674dd71f8a466f658626877944cd16a4375#8ebed674dd71f8a466f658626877944cd16a4375"
source = "git+https://github.com/waynexia/arrow-datafusion.git?rev=5bbedc6704162afb03478f56ffb629405a4e1220#5bbedc6704162afb03478f56ffb629405a4e1220"
dependencies = [
"ahash 0.8.11",
"arrow",
@@ -3016,7 +3016,7 @@ dependencies = [
[[package]]
name = "datafusion-common-runtime"
version = "45.0.0"
source = "git+https://github.com/apache/datafusion.git?rev=8ebed674dd71f8a466f658626877944cd16a4375#8ebed674dd71f8a466f658626877944cd16a4375"
source = "git+https://github.com/waynexia/arrow-datafusion.git?rev=5bbedc6704162afb03478f56ffb629405a4e1220#5bbedc6704162afb03478f56ffb629405a4e1220"
dependencies = [
"log",
"tokio",
@@ -3025,12 +3025,12 @@ dependencies = [
[[package]]
name = "datafusion-doc"
version = "45.0.0"
source = "git+https://github.com/apache/datafusion.git?rev=8ebed674dd71f8a466f658626877944cd16a4375#8ebed674dd71f8a466f658626877944cd16a4375"
source = "git+https://github.com/waynexia/arrow-datafusion.git?rev=5bbedc6704162afb03478f56ffb629405a4e1220#5bbedc6704162afb03478f56ffb629405a4e1220"
[[package]]
name = "datafusion-execution"
version = "45.0.0"
source = "git+https://github.com/apache/datafusion.git?rev=8ebed674dd71f8a466f658626877944cd16a4375#8ebed674dd71f8a466f658626877944cd16a4375"
source = "git+https://github.com/waynexia/arrow-datafusion.git?rev=5bbedc6704162afb03478f56ffb629405a4e1220#5bbedc6704162afb03478f56ffb629405a4e1220"
dependencies = [
"arrow",
"dashmap",
@@ -3048,7 +3048,7 @@ dependencies = [
[[package]]
name = "datafusion-expr"
version = "45.0.0"
source = "git+https://github.com/apache/datafusion.git?rev=8ebed674dd71f8a466f658626877944cd16a4375#8ebed674dd71f8a466f658626877944cd16a4375"
source = "git+https://github.com/waynexia/arrow-datafusion.git?rev=5bbedc6704162afb03478f56ffb629405a4e1220#5bbedc6704162afb03478f56ffb629405a4e1220"
dependencies = [
"arrow",
"chrono",
@@ -3068,7 +3068,7 @@ dependencies = [
[[package]]
name = "datafusion-expr-common"
version = "45.0.0"
source = "git+https://github.com/apache/datafusion.git?rev=8ebed674dd71f8a466f658626877944cd16a4375#8ebed674dd71f8a466f658626877944cd16a4375"
source = "git+https://github.com/waynexia/arrow-datafusion.git?rev=5bbedc6704162afb03478f56ffb629405a4e1220#5bbedc6704162afb03478f56ffb629405a4e1220"
dependencies = [
"arrow",
"datafusion-common",
@@ -3079,7 +3079,7 @@ dependencies = [
[[package]]
name = "datafusion-functions"
version = "45.0.0"
source = "git+https://github.com/apache/datafusion.git?rev=8ebed674dd71f8a466f658626877944cd16a4375#8ebed674dd71f8a466f658626877944cd16a4375"
source = "git+https://github.com/waynexia/arrow-datafusion.git?rev=5bbedc6704162afb03478f56ffb629405a4e1220#5bbedc6704162afb03478f56ffb629405a4e1220"
dependencies = [
"arrow",
"arrow-buffer",
@@ -3108,7 +3108,7 @@ dependencies = [
[[package]]
name = "datafusion-functions-aggregate"
version = "45.0.0"
source = "git+https://github.com/apache/datafusion.git?rev=8ebed674dd71f8a466f658626877944cd16a4375#8ebed674dd71f8a466f658626877944cd16a4375"
source = "git+https://github.com/waynexia/arrow-datafusion.git?rev=5bbedc6704162afb03478f56ffb629405a4e1220#5bbedc6704162afb03478f56ffb629405a4e1220"
dependencies = [
"ahash 0.8.11",
"arrow",
@@ -3129,7 +3129,7 @@ dependencies = [
[[package]]
name = "datafusion-functions-aggregate-common"
version = "45.0.0"
source = "git+https://github.com/apache/datafusion.git?rev=8ebed674dd71f8a466f658626877944cd16a4375#8ebed674dd71f8a466f658626877944cd16a4375"
source = "git+https://github.com/waynexia/arrow-datafusion.git?rev=5bbedc6704162afb03478f56ffb629405a4e1220#5bbedc6704162afb03478f56ffb629405a4e1220"
dependencies = [
"ahash 0.8.11",
"arrow",
@@ -3141,7 +3141,7 @@ dependencies = [
[[package]]
name = "datafusion-functions-nested"
version = "45.0.0"
source = "git+https://github.com/apache/datafusion.git?rev=8ebed674dd71f8a466f658626877944cd16a4375#8ebed674dd71f8a466f658626877944cd16a4375"
source = "git+https://github.com/waynexia/arrow-datafusion.git?rev=5bbedc6704162afb03478f56ffb629405a4e1220#5bbedc6704162afb03478f56ffb629405a4e1220"
dependencies = [
"arrow",
"arrow-array",
@@ -3163,7 +3163,7 @@ dependencies = [
[[package]]
name = "datafusion-functions-table"
version = "45.0.0"
source = "git+https://github.com/apache/datafusion.git?rev=8ebed674dd71f8a466f658626877944cd16a4375#8ebed674dd71f8a466f658626877944cd16a4375"
source = "git+https://github.com/waynexia/arrow-datafusion.git?rev=5bbedc6704162afb03478f56ffb629405a4e1220#5bbedc6704162afb03478f56ffb629405a4e1220"
dependencies = [
"arrow",
"async-trait",
@@ -3178,7 +3178,7 @@ dependencies = [
[[package]]
name = "datafusion-functions-window"
version = "45.0.0"
source = "git+https://github.com/apache/datafusion.git?rev=8ebed674dd71f8a466f658626877944cd16a4375#8ebed674dd71f8a466f658626877944cd16a4375"
source = "git+https://github.com/waynexia/arrow-datafusion.git?rev=5bbedc6704162afb03478f56ffb629405a4e1220#5bbedc6704162afb03478f56ffb629405a4e1220"
dependencies = [
"datafusion-common",
"datafusion-doc",
@@ -3194,7 +3194,7 @@ dependencies = [
[[package]]
name = "datafusion-functions-window-common"
version = "45.0.0"
source = "git+https://github.com/apache/datafusion.git?rev=8ebed674dd71f8a466f658626877944cd16a4375#8ebed674dd71f8a466f658626877944cd16a4375"
source = "git+https://github.com/waynexia/arrow-datafusion.git?rev=5bbedc6704162afb03478f56ffb629405a4e1220#5bbedc6704162afb03478f56ffb629405a4e1220"
dependencies = [
"datafusion-common",
"datafusion-physical-expr-common",
@@ -3203,7 +3203,7 @@ dependencies = [
[[package]]
name = "datafusion-macros"
version = "45.0.0"
source = "git+https://github.com/apache/datafusion.git?rev=8ebed674dd71f8a466f658626877944cd16a4375#8ebed674dd71f8a466f658626877944cd16a4375"
source = "git+https://github.com/waynexia/arrow-datafusion.git?rev=5bbedc6704162afb03478f56ffb629405a4e1220#5bbedc6704162afb03478f56ffb629405a4e1220"
dependencies = [
"datafusion-expr",
"quote",
@@ -3213,7 +3213,7 @@ dependencies = [
[[package]]
name = "datafusion-optimizer"
version = "45.0.0"
source = "git+https://github.com/apache/datafusion.git?rev=8ebed674dd71f8a466f658626877944cd16a4375#8ebed674dd71f8a466f658626877944cd16a4375"
source = "git+https://github.com/waynexia/arrow-datafusion.git?rev=5bbedc6704162afb03478f56ffb629405a4e1220#5bbedc6704162afb03478f56ffb629405a4e1220"
dependencies = [
"arrow",
"chrono",
@@ -3231,7 +3231,7 @@ dependencies = [
[[package]]
name = "datafusion-physical-expr"
version = "45.0.0"
source = "git+https://github.com/apache/datafusion.git?rev=8ebed674dd71f8a466f658626877944cd16a4375#8ebed674dd71f8a466f658626877944cd16a4375"
source = "git+https://github.com/waynexia/arrow-datafusion.git?rev=5bbedc6704162afb03478f56ffb629405a4e1220#5bbedc6704162afb03478f56ffb629405a4e1220"
dependencies = [
"ahash 0.8.11",
"arrow",
@@ -3254,7 +3254,7 @@ dependencies = [
[[package]]
name = "datafusion-physical-expr-common"
version = "45.0.0"
source = "git+https://github.com/apache/datafusion.git?rev=8ebed674dd71f8a466f658626877944cd16a4375#8ebed674dd71f8a466f658626877944cd16a4375"
source = "git+https://github.com/waynexia/arrow-datafusion.git?rev=5bbedc6704162afb03478f56ffb629405a4e1220#5bbedc6704162afb03478f56ffb629405a4e1220"
dependencies = [
"ahash 0.8.11",
"arrow",
@@ -3267,7 +3267,7 @@ dependencies = [
[[package]]
name = "datafusion-physical-optimizer"
version = "45.0.0"
source = "git+https://github.com/apache/datafusion.git?rev=8ebed674dd71f8a466f658626877944cd16a4375#8ebed674dd71f8a466f658626877944cd16a4375"
source = "git+https://github.com/waynexia/arrow-datafusion.git?rev=5bbedc6704162afb03478f56ffb629405a4e1220#5bbedc6704162afb03478f56ffb629405a4e1220"
dependencies = [
"arrow",
"arrow-schema",
@@ -3288,7 +3288,7 @@ dependencies = [
[[package]]
name = "datafusion-physical-plan"
version = "45.0.0"
source = "git+https://github.com/apache/datafusion.git?rev=8ebed674dd71f8a466f658626877944cd16a4375#8ebed674dd71f8a466f658626877944cd16a4375"
source = "git+https://github.com/waynexia/arrow-datafusion.git?rev=5bbedc6704162afb03478f56ffb629405a4e1220#5bbedc6704162afb03478f56ffb629405a4e1220"
dependencies = [
"ahash 0.8.11",
"arrow",
@@ -3318,7 +3318,7 @@ dependencies = [
[[package]]
name = "datafusion-sql"
version = "45.0.0"
source = "git+https://github.com/apache/datafusion.git?rev=8ebed674dd71f8a466f658626877944cd16a4375#8ebed674dd71f8a466f658626877944cd16a4375"
source = "git+https://github.com/waynexia/arrow-datafusion.git?rev=5bbedc6704162afb03478f56ffb629405a4e1220#5bbedc6704162afb03478f56ffb629405a4e1220"
dependencies = [
"arrow",
"arrow-array",
@@ -3336,7 +3336,7 @@ dependencies = [
[[package]]
name = "datafusion-substrait"
version = "45.0.0"
source = "git+https://github.com/apache/datafusion.git?rev=8ebed674dd71f8a466f658626877944cd16a4375#8ebed674dd71f8a466f658626877944cd16a4375"
source = "git+https://github.com/waynexia/arrow-datafusion.git?rev=5bbedc6704162afb03478f56ffb629405a4e1220#5bbedc6704162afb03478f56ffb629405a4e1220"
dependencies = [
"async-recursion",
"async-trait",

View File

@@ -112,15 +112,15 @@ clap = { version = "4.4", features = ["derive"] }
config = "0.13.0"
crossbeam-utils = "0.8"
dashmap = "6.1"
datafusion = { git = "https://github.com/apache/datafusion.git", rev = "8ebed674dd71f8a466f658626877944cd16a4375" }
datafusion-common = { git = "https://github.com/apache/datafusion.git", rev = "8ebed674dd71f8a466f658626877944cd16a4375" }
datafusion-expr = { git = "https://github.com/apache/datafusion.git", rev = "8ebed674dd71f8a466f658626877944cd16a4375" }
datafusion-functions = { git = "https://github.com/apache/datafusion.git", rev = "8ebed674dd71f8a466f658626877944cd16a4375" }
datafusion-optimizer = { git = "https://github.com/apache/datafusion.git", rev = "8ebed674dd71f8a466f658626877944cd16a4375" }
datafusion-physical-expr = { git = "https://github.com/apache/datafusion.git", rev = "8ebed674dd71f8a466f658626877944cd16a4375" }
datafusion-physical-plan = { git = "https://github.com/apache/datafusion.git", rev = "8ebed674dd71f8a466f658626877944cd16a4375" }
datafusion-sql = { git = "https://github.com/apache/datafusion.git", rev = "8ebed674dd71f8a466f658626877944cd16a4375" }
datafusion-substrait = { git = "https://github.com/apache/datafusion.git", rev = "8ebed674dd71f8a466f658626877944cd16a4375" }
datafusion = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "5bbedc6704162afb03478f56ffb629405a4e1220" }
datafusion-common = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "5bbedc6704162afb03478f56ffb629405a4e1220" }
datafusion-expr = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "5bbedc6704162afb03478f56ffb629405a4e1220" }
datafusion-functions = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "5bbedc6704162afb03478f56ffb629405a4e1220" }
datafusion-optimizer = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "5bbedc6704162afb03478f56ffb629405a4e1220" }
datafusion-physical-expr = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "5bbedc6704162afb03478f56ffb629405a4e1220" }
datafusion-physical-plan = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "5bbedc6704162afb03478f56ffb629405a4e1220" }
datafusion-sql = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "5bbedc6704162afb03478f56ffb629405a4e1220" }
datafusion-substrait = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "5bbedc6704162afb03478f56ffb629405a4e1220" }
deadpool = "0.12"
deadpool-postgres = "0.14"
derive_builder = "0.20"

View File

@@ -21,6 +21,7 @@ pub mod scan_hint;
pub mod string_normalization;
#[cfg(test)]
pub(crate) mod test_util;
pub mod transcribe_atat;
pub mod type_conversion;
pub mod windowed_sort;

View File

@@ -0,0 +1,230 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use std::sync::Arc;
use common_function::scalars::matches_term::MatchesTermFunction;
use common_function::scalars::udf::create_udf;
use common_function::state::FunctionState;
use datafusion::config::ConfigOptions;
use datafusion_common::tree_node::{Transformed, TreeNode, TreeNodeRewriter};
use datafusion_common::Result;
use datafusion_expr::expr::ScalarFunction;
use datafusion_expr::{Expr, LogicalPlan};
use datafusion_optimizer::analyzer::AnalyzerRule;
use session::context::QueryContext;
use crate::plan::ExtractExpr;
/// TranscribeAtatRule is an analyzer rule that transcribes `@@` operator
/// to `matches_term` function.
///
/// Example:
/// ```sql
/// SELECT matches_term('cat!', 'cat') as result;
///
/// SELECT matches_term(`log_message`, '/start') as `matches_start` FROM t;
/// ```
///
/// to
///
/// ```sql
/// SELECT 'cat!' @@ 'cat' as result;
///
/// SELECT `log_message` @@ '/start' as `matches_start` FROM t;
/// ```
#[derive(Debug)]
pub struct TranscribeAtatRule;
impl AnalyzerRule for TranscribeAtatRule {
fn analyze(&self, plan: LogicalPlan, _config: &ConfigOptions) -> Result<LogicalPlan> {
plan.transform(Self::do_analyze).map(|x| x.data)
}
fn name(&self) -> &str {
"TranscribeAtatRule"
}
}
impl TranscribeAtatRule {
fn do_analyze(plan: LogicalPlan) -> Result<Transformed<LogicalPlan>> {
let mut rewriter = TranscribeAtatRewriter::default();
let new_expr = plan
.expressions_consider_join()
.into_iter()
.map(|e| e.rewrite(&mut rewriter).map(|x| x.data))
.collect::<Result<Vec<_>>>()?;
if rewriter.transcribed {
let inputs = plan.inputs().into_iter().cloned().collect::<Vec<_>>();
plan.with_new_exprs(new_expr, inputs).map(Transformed::yes)
} else {
Ok(Transformed::no(plan))
}
}
}
#[derive(Default)]
struct TranscribeAtatRewriter {
transcribed: bool,
}
impl TreeNodeRewriter for TranscribeAtatRewriter {
type Node = Expr;
fn f_up(&mut self, expr: Expr) -> Result<Transformed<Expr>> {
if let Expr::BinaryExpr(binary_expr) = &expr
&& matches!(binary_expr.op, datafusion_expr::Operator::AtAt)
{
self.transcribed = true;
let scalar_udf = create_udf(
Arc::new(MatchesTermFunction),
QueryContext::arc(),
Arc::new(FunctionState::default()),
);
let exprs = vec![
binary_expr.left.as_ref().clone(),
binary_expr.right.as_ref().clone(),
];
Ok(Transformed::yes(Expr::ScalarFunction(
ScalarFunction::new_udf(Arc::new(scalar_udf), exprs),
)))
} else {
Ok(Transformed::no(expr))
}
}
}
#[cfg(test)]
mod tests {
use arrow_schema::SchemaRef;
use datafusion::datasource::{provider_as_source, MemTable};
use datafusion::logical_expr::{col, lit, LogicalPlan, LogicalPlanBuilder};
use datafusion_expr::{BinaryExpr, Operator};
use datatypes::arrow::datatypes::{DataType, Field, Schema};
use super::*;
fn optimize(plan: LogicalPlan) -> Result<LogicalPlan> {
TranscribeAtatRule.analyze(plan, &ConfigOptions::default())
}
fn prepare_test_plan_builder() -> LogicalPlanBuilder {
let schema = Schema::new(vec![
Field::new("a", DataType::Utf8, false),
Field::new("b", DataType::Utf8, false),
]);
let table = MemTable::try_new(SchemaRef::from(schema), vec![]).unwrap();
LogicalPlanBuilder::scan("t", provider_as_source(Arc::new(table)), None).unwrap()
}
#[test]
fn test_multiple_atat() {
let plan = prepare_test_plan_builder()
.filter(
Expr::BinaryExpr(BinaryExpr {
left: Box::new(col("a")),
op: Operator::AtAt,
right: Box::new(lit("foo")),
})
.and(Expr::BinaryExpr(BinaryExpr {
left: Box::new(col("b")),
op: Operator::AtAt,
right: Box::new(lit("bar")),
})),
)
.unwrap()
.project(vec![
Expr::BinaryExpr(BinaryExpr {
left: Box::new(col("a")),
op: Operator::AtAt,
right: Box::new(col("b")),
}),
col("b"),
])
.unwrap()
.build()
.unwrap();
let expected = r#"Projection: matches_term(t.a, t.b), t.b
Filter: matches_term(t.a, Utf8("foo")) AND matches_term(t.b, Utf8("bar"))
TableScan: t"#;
let optimized_plan = optimize(plan).unwrap();
let formatted = optimized_plan.to_string();
assert_eq!(formatted, expected);
}
#[test]
fn test_nested_atat() {
let plan = prepare_test_plan_builder()
.filter(
Expr::BinaryExpr(BinaryExpr {
left: Box::new(col("a")),
op: Operator::AtAt,
right: Box::new(lit("foo")),
})
.and(
Expr::BinaryExpr(BinaryExpr {
left: Box::new(col("b")),
op: Operator::AtAt,
right: Box::new(lit("bar")),
})
.or(Expr::BinaryExpr(BinaryExpr {
left: Box::new(
// Nested case in function argument
Expr::BinaryExpr(BinaryExpr {
left: Box::new(col("a")),
op: Operator::AtAt,
right: Box::new(lit("nested")),
}),
),
op: Operator::Eq,
right: Box::new(lit(true)),
})),
),
)
.unwrap()
.project(vec![
col("a"),
// Complex nested expression with multiple @@ operators
Expr::BinaryExpr(BinaryExpr {
left: Box::new(Expr::BinaryExpr(BinaryExpr {
left: Box::new(col("a")),
op: Operator::AtAt,
right: Box::new(lit("foo")),
})),
op: Operator::And,
right: Box::new(Expr::BinaryExpr(BinaryExpr {
left: Box::new(col("b")),
op: Operator::AtAt,
right: Box::new(lit("bar")),
})),
}),
])
.unwrap()
.build()
.unwrap();
let expected = r#"Projection: t.a, matches_term(t.a, Utf8("foo")) AND matches_term(t.b, Utf8("bar"))
Filter: matches_term(t.a, Utf8("foo")) AND (matches_term(t.b, Utf8("bar")) OR matches_term(t.a, Utf8("nested")) = Boolean(true))
TableScan: t"#;
let optimized_plan = optimize(plan).unwrap();
let formatted = optimized_plan.to_string();
assert_eq!(formatted, expected);
}
}

View File

@@ -52,6 +52,7 @@ use crate::optimizer::pass_distribution::PassDistribution;
use crate::optimizer::remove_duplicate::RemoveDuplicate;
use crate::optimizer::scan_hint::ScanHintRule;
use crate::optimizer::string_normalization::StringNormalizationRule;
use crate::optimizer::transcribe_atat::TranscribeAtatRule;
use crate::optimizer::type_conversion::TypeConversionRule;
use crate::optimizer::windowed_sort::WindowedSortPhysicalRule;
use crate::optimizer::ExtensionAnalyzerRule;
@@ -115,6 +116,7 @@ impl QueryEngineState {
// Apply the datafusion rules
let mut analyzer = Analyzer::new();
analyzer.rules.insert(0, Arc::new(TranscribeAtatRule));
analyzer.rules.insert(0, Arc::new(StringNormalizationRule));
// Use our custom rule instead to optimize the count(*) query

View File

@@ -0,0 +1,315 @@
-- Derived from matches_term cases
-- Test basic term matching
-- Expect: true
SELECT 'cat!' @@ 'cat' as result;
+--------+
| result |
+--------+
| true |
+--------+
-- Test phrase matching with spaces
-- Expect: true
SELECT 'warning:hello world!' @@ 'hello world' as result;
+--------+
| result |
+--------+
| true |
+--------+
-- Test numbers in term
SELECT 'v1.0!' @@ 'v1.0' as result;
+--------+
| result |
+--------+
| true |
+--------+
-- Test case sensitivity
-- Expect: true
SELECT 'Cat' @@ 'Cat' as result;
+--------+
| result |
+--------+
| true |
+--------+
-- Expect: false
SELECT 'cat' @@ 'Cat' as result;
+--------+
| result |
+--------+
| false |
+--------+
-- Test empty string handling
-- Expect: true
SELECT '' @@ '' as result;
+--------+
| result |
+--------+
| true |
+--------+
-- Expect: false
SELECT 'any' @@ '' as result;
+--------+
| result |
+--------+
| false |
+--------+
-- Expect: false
SELECT '' @@ 'any' as result;
+--------+
| result |
+--------+
| false |
+--------+
-- Test partial matches (should fail)
-- Expect: false
SELECT 'category' @@ 'cat' as result;
+--------+
| result |
+--------+
| false |
+--------+
-- Expect: false
SELECT 'rebooted' @@ 'boot' as result;
+--------+
| result |
+--------+
| false |
+--------+
-- Test adjacent alphanumeric characters
SELECT 'cat5' @@ 'cat' as result;
+--------+
| result |
+--------+
| false |
+--------+
SELECT 'dogcat' @@ 'dog' as result;
+--------+
| result |
+--------+
| false |
+--------+
-- Test leading non-alphanumeric
-- Expect: true
SELECT 'dog/cat' @@ '/cat' as result;
+--------+
| result |
+--------+
| true |
+--------+
-- Expect: true
SELECT 'dog/cat' @@ 'dog/' as result;
+--------+
| result |
+--------+
| true |
+--------+
-- Expect: true
SELECT 'dog/cat' @@ 'dog/cat' as result;
+--------+
| result |
+--------+
| true |
+--------+
-- Test unicode characters
-- Expect: true
SELECT 'café>' @@ 'café' as result;
+--------+
| result |
+--------+
| true |
+--------+
-- Expect: true
SELECT 'русский!' @@ 'русский' as result;
+--------+
| result |
+--------+
| true |
+--------+
-- Test complete word matching
CREATE TABLE logs (
`id` TIMESTAMP TIME INDEX,
`log_message` STRING
);
Affected Rows: 0
INSERT INTO logs VALUES
(1, 'An error occurred!'),
(2, 'Critical error: system failure'),
(3, 'error-prone'),
(4, 'errors'),
(5, 'error123'),
(6, 'errorLogs'),
(7, 'Version v1.0 released'),
(8, 'v1.0!'),
(9, 'v1.0a'),
(10, 'v1.0beta'),
(11, 'GET /app/start'),
(12, 'Command: /start-prosess'),
(13, 'Command: /start'),
(14, 'start'),
(15, 'start/stop'),
(16, 'Alert: system failure detected'),
(17, 'system failure!'),
(18, 'system-failure'),
(19, 'system failure2023'),
(20, 'critical error: system failure'),
(21, 'critical failure detected'),
(22, 'critical issue'),
(23, 'failure imminent'),
(24, 'Warning: high temperature'),
(25, 'WARNING: system overload'),
(26, 'warned'),
(27, 'warnings');
Affected Rows: 27
-- Test complete word matching for 'error'
-- Expect:
-- 1|An error occurred!|true
-- 2|Critical error: system failure|true
-- 3|error-prone|true
-- 4|errors|false
-- 5|error123|false
-- 6|errorLogs|false
SELECT `id`, `log_message`, `log_message` @@ 'error' as `matches_error` FROM logs WHERE `id` <= 6 ORDER BY `id`;
+-------------------------+--------------------------------+---------------+
| id | log_message | matches_error |
+-------------------------+--------------------------------+---------------+
| 1970-01-01T00:00:00.001 | An error occurred! | true |
| 1970-01-01T00:00:00.002 | Critical error: system failure | true |
| 1970-01-01T00:00:00.003 | error-prone | true |
| 1970-01-01T00:00:00.004 | errors | false |
| 1970-01-01T00:00:00.005 | error123 | false |
| 1970-01-01T00:00:00.006 | errorLogs | false |
+-------------------------+--------------------------------+---------------+
-- Test complete word matching for 'v1.0'
-- Expect:
-- 7|Version v1.0 released|true
-- 8|v1.0!|true
-- 9|v1.0a|false
-- 10|v1.0beta|false
SELECT `id`, `log_message`, `log_message` @@ 'v1.0' as `matches_version` FROM logs WHERE `id` BETWEEN 7 AND 10 ORDER BY `id`;
+-------------------------+-----------------------+-----------------+
| id | log_message | matches_version |
+-------------------------+-----------------------+-----------------+
| 1970-01-01T00:00:00.007 | Version v1.0 released | true |
| 1970-01-01T00:00:00.008 | v1.0! | true |
| 1970-01-01T00:00:00.009 | v1.0a | false |
| 1970-01-01T00:00:00.010 | v1.0beta | false |
+-------------------------+-----------------------+-----------------+
-- Test complete word matching for '/start'
-- Expect:
-- 11|GET /app/start|true
-- 12|Command: /start-prosess|true
-- 13|Command: /start|true
-- 14|start|false
-- 15|start/stop|false
SELECT `id`, `log_message`, `log_message` @@ '/start' as `matches_start` FROM logs WHERE `id` BETWEEN 11 AND 15 ORDER BY `id`;
+-------------------------+-------------------------+---------------+
| id | log_message | matches_start |
+-------------------------+-------------------------+---------------+
| 1970-01-01T00:00:00.011 | GET /app/start | true |
| 1970-01-01T00:00:00.012 | Command: /start-prosess | true |
| 1970-01-01T00:00:00.013 | Command: /start | true |
| 1970-01-01T00:00:00.014 | start | false |
| 1970-01-01T00:00:00.015 | start/stop | false |
+-------------------------+-------------------------+---------------+
-- Test phrase matching for 'system failure'
-- Expect:
-- 16|Alert: system failure detected|true
-- 17|system failure!|true
-- 18|system-failure|false
-- 19|system failure2023|false
SELECT `id`, `log_message`, `log_message` @@ 'system failure' as `matches_phrase` FROM logs WHERE `id` BETWEEN 16 AND 19 ORDER BY `id`;
+-------------------------+--------------------------------+----------------+
| id | log_message | matches_phrase |
+-------------------------+--------------------------------+----------------+
| 1970-01-01T00:00:00.016 | Alert: system failure detected | true |
| 1970-01-01T00:00:00.017 | system failure! | true |
| 1970-01-01T00:00:00.018 | system-failure | false |
| 1970-01-01T00:00:00.019 | system failure2023 | false |
+-------------------------+--------------------------------+----------------+
-- Test multi-word matching using AND
-- Expect:
-- 20|critical error: system failure|true|true|true
-- 21|critical failure detected|true|true|true
-- 22|critical issue|true|false|false
-- 23|failure imminent|false|true|false
SELECT `id`, `log_message`,
`log_message` @@ 'critical' as `matches_critical`,
`log_message` @@ 'failure' as `matches_failure`,
`log_message` @@ 'critical' AND `log_message` @@ 'failure' as `matches_both`
FROM logs WHERE `id` BETWEEN 20 AND 23 ORDER BY `id`;
+-------------------------+--------------------------------+------------------+-----------------+--------------+
| id | log_message | matches_critical | matches_failure | matches_both |
+-------------------------+--------------------------------+------------------+-----------------+--------------+
| 1970-01-01T00:00:00.020 | critical error: system failure | true | true | true |
| 1970-01-01T00:00:00.021 | critical failure detected | true | true | true |
| 1970-01-01T00:00:00.022 | critical issue | true | false | false |
| 1970-01-01T00:00:00.023 | failure imminent | false | true | false |
+-------------------------+--------------------------------+------------------+-----------------+--------------+
-- Test case-insensitive matching using lower()
-- Expect:
-- 24|Warning: high temperature|true
-- 25|WARNING: system overload|true
-- 26|warned|false
-- 27|warnings|false
SELECT `id`, `log_message`, lower(`log_message`) @@ 'warning' as `matches_warning` FROM logs WHERE `id` >= 24 ORDER BY `id`;
+-------------------------+---------------------------+-----------------+
| id | log_message | matches_warning |
+-------------------------+---------------------------+-----------------+
| 1970-01-01T00:00:00.024 | Warning: high temperature | true |
| 1970-01-01T00:00:00.025 | WARNING: system overload | true |
| 1970-01-01T00:00:00.026 | warned | false |
| 1970-01-01T00:00:00.027 | warnings | false |
+-------------------------+---------------------------+-----------------+
DROP TABLE logs;
Affected Rows: 0

View File

@@ -0,0 +1,144 @@
-- Derived from matches_term cases
-- Test basic term matching
-- Expect: true
SELECT 'cat!' @@ 'cat' as result;
-- Test phrase matching with spaces
-- Expect: true
SELECT 'warning:hello world!' @@ 'hello world' as result;
-- Test numbers in term
SELECT 'v1.0!' @@ 'v1.0' as result;
-- Test case sensitivity
-- Expect: true
SELECT 'Cat' @@ 'Cat' as result;
-- Expect: false
SELECT 'cat' @@ 'Cat' as result;
-- Test empty string handling
-- Expect: true
SELECT '' @@ '' as result;
-- Expect: false
SELECT 'any' @@ '' as result;
-- Expect: false
SELECT '' @@ 'any' as result;
-- Test partial matches (should fail)
-- Expect: false
SELECT 'category' @@ 'cat' as result;
-- Expect: false
SELECT 'rebooted' @@ 'boot' as result;
-- Test adjacent alphanumeric characters
SELECT 'cat5' @@ 'cat' as result;
SELECT 'dogcat' @@ 'dog' as result;
-- Test leading non-alphanumeric
-- Expect: true
SELECT 'dog/cat' @@ '/cat' as result;
-- Expect: true
SELECT 'dog/cat' @@ 'dog/' as result;
-- Expect: true
SELECT 'dog/cat' @@ 'dog/cat' as result;
-- Test unicode characters
-- Expect: true
SELECT 'café>' @@ 'café' as result;
-- Expect: true
SELECT 'русский!' @@ 'русский' as result;
-- Test complete word matching
CREATE TABLE logs (
`id` TIMESTAMP TIME INDEX,
`log_message` STRING
);
INSERT INTO logs VALUES
(1, 'An error occurred!'),
(2, 'Critical error: system failure'),
(3, 'error-prone'),
(4, 'errors'),
(5, 'error123'),
(6, 'errorLogs'),
(7, 'Version v1.0 released'),
(8, 'v1.0!'),
(9, 'v1.0a'),
(10, 'v1.0beta'),
(11, 'GET /app/start'),
(12, 'Command: /start-prosess'),
(13, 'Command: /start'),
(14, 'start'),
(15, 'start/stop'),
(16, 'Alert: system failure detected'),
(17, 'system failure!'),
(18, 'system-failure'),
(19, 'system failure2023'),
(20, 'critical error: system failure'),
(21, 'critical failure detected'),
(22, 'critical issue'),
(23, 'failure imminent'),
(24, 'Warning: high temperature'),
(25, 'WARNING: system overload'),
(26, 'warned'),
(27, 'warnings');
-- Test complete word matching for 'error'
-- Expect:
-- 1|An error occurred!|true
-- 2|Critical error: system failure|true
-- 3|error-prone|true
-- 4|errors|false
-- 5|error123|false
-- 6|errorLogs|false
SELECT `id`, `log_message`, `log_message` @@ 'error' as `matches_error` FROM logs WHERE `id` <= 6 ORDER BY `id`;
-- Test complete word matching for 'v1.0'
-- Expect:
-- 7|Version v1.0 released|true
-- 8|v1.0!|true
-- 9|v1.0a|false
-- 10|v1.0beta|false
SELECT `id`, `log_message`, `log_message` @@ 'v1.0' as `matches_version` FROM logs WHERE `id` BETWEEN 7 AND 10 ORDER BY `id`;
-- Test complete word matching for '/start'
-- Expect:
-- 11|GET /app/start|true
-- 12|Command: /start-prosess|true
-- 13|Command: /start|true
-- 14|start|false
-- 15|start/stop|false
SELECT `id`, `log_message`, `log_message` @@ '/start' as `matches_start` FROM logs WHERE `id` BETWEEN 11 AND 15 ORDER BY `id`;
-- Test phrase matching for 'system failure'
-- Expect:
-- 16|Alert: system failure detected|true
-- 17|system failure!|true
-- 18|system-failure|false
-- 19|system failure2023|false
SELECT `id`, `log_message`, `log_message` @@ 'system failure' as `matches_phrase` FROM logs WHERE `id` BETWEEN 16 AND 19 ORDER BY `id`;
-- Test multi-word matching using AND
-- Expect:
-- 20|critical error: system failure|true|true|true
-- 21|critical failure detected|true|true|true
-- 22|critical issue|true|false|false
-- 23|failure imminent|false|true|false
SELECT `id`, `log_message`,
`log_message` @@ 'critical' as `matches_critical`,
`log_message` @@ 'failure' as `matches_failure`,
`log_message` @@ 'critical' AND `log_message` @@ 'failure' as `matches_both`
FROM logs WHERE `id` BETWEEN 20 AND 23 ORDER BY `id`;
-- Test case-insensitive matching using lower()
-- Expect:
-- 24|Warning: high temperature|true
-- 25|WARNING: system overload|true
-- 26|warned|false
-- 27|warnings|false
SELECT `id`, `log_message`, lower(`log_message`) @@ 'warning' as `matches_warning` FROM logs WHERE `id` >= 24 ORDER BY `id`;
DROP TABLE logs;

View File

@@ -80,6 +80,7 @@ TQL EXPLAIN VERBOSE (0, 10, '5s') test;
|_|_TableScan: test_|
| logical_plan after count_wildcard_to_time_index_rule_| SAME TEXT AS ABOVE_|
| logical_plan after StringNormalizationRule_| SAME TEXT AS ABOVE_|
| logical_plan after TranscribeAtatRule_| SAME TEXT AS ABOVE_|
| logical_plan after inline_table_scan_| SAME TEXT AS ABOVE_|
| logical_plan after expand_wildcard_rule_| SAME TEXT AS ABOVE_|
| logical_plan after resolve_grouping_function_| SAME TEXT AS ABOVE_|