mirror of
https://github.com/GreptimeTeam/greptimedb.git
synced 2026-01-04 04:12:55 +00:00
feat: add matches_term function (#5817)
* feat: add `matches_term` function Signed-off-by: Zhenchi <zhongzc_arch@outlook.com> * merge & fix Signed-off-by: Zhenchi <zhongzc_arch@outlook.com> * address comments Signed-off-by: Zhenchi <zhongzc_arch@outlook.com> * fix & skip char after boundary mismatch Signed-off-by: Zhenchi <zhongzc_arch@outlook.com> --------- Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>
This commit is contained in:
1
Cargo.lock
generated
1
Cargo.lock
generated
@@ -2038,6 +2038,7 @@ dependencies = [
|
||||
"h3o",
|
||||
"hyperloglogplus",
|
||||
"jsonb",
|
||||
"memchr",
|
||||
"nalgebra 0.33.2",
|
||||
"num",
|
||||
"num-traits",
|
||||
|
||||
@@ -39,6 +39,7 @@ geohash = { version = "0.13", optional = true }
|
||||
h3o = { version = "0.6", optional = true }
|
||||
hyperloglogplus = "0.4"
|
||||
jsonb.workspace = true
|
||||
memchr = "2.7"
|
||||
nalgebra.workspace = true
|
||||
num = "0.4"
|
||||
num-traits = "0.2"
|
||||
|
||||
@@ -27,6 +27,7 @@ use crate::scalars::hll_count::HllCalcFunction;
|
||||
use crate::scalars::ip::IpFunctions;
|
||||
use crate::scalars::json::JsonFunction;
|
||||
use crate::scalars::matches::MatchesFunction;
|
||||
use crate::scalars::matches_term::MatchesTermFunction;
|
||||
use crate::scalars::math::MathFunction;
|
||||
use crate::scalars::timestamp::TimestampFunction;
|
||||
use crate::scalars::uddsketch_calc::UddSketchCalcFunction;
|
||||
@@ -116,6 +117,7 @@ pub static FUNCTION_REGISTRY: Lazy<Arc<FunctionRegistry>> = Lazy::new(|| {
|
||||
|
||||
// Full text search function
|
||||
MatchesFunction::register(&function_registry);
|
||||
MatchesTermFunction::register(&function_registry);
|
||||
|
||||
// System and administration functions
|
||||
SystemFunction::register(&function_registry);
|
||||
|
||||
@@ -19,6 +19,7 @@ pub mod expression;
|
||||
pub mod geo;
|
||||
pub mod json;
|
||||
pub mod matches;
|
||||
pub mod matches_term;
|
||||
pub mod math;
|
||||
pub mod vector;
|
||||
|
||||
|
||||
375
src/common/function/src/scalars/matches_term.rs
Normal file
375
src/common/function/src/scalars/matches_term.rs
Normal file
@@ -0,0 +1,375 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::sync::Arc;
|
||||
use std::{fmt, iter};
|
||||
|
||||
use common_query::error::{InvalidFuncArgsSnafu, Result};
|
||||
use common_query::prelude::Volatility;
|
||||
use datatypes::prelude::ConcreteDataType;
|
||||
use datatypes::scalars::ScalarVectorBuilder;
|
||||
use datatypes::vectors::{BooleanVector, BooleanVectorBuilder, MutableVector, VectorRef};
|
||||
use memchr::memmem;
|
||||
use snafu::ensure;
|
||||
|
||||
use crate::function::{Function, FunctionContext};
|
||||
use crate::function_registry::FunctionRegistry;
|
||||
|
||||
/// Exact term/phrase matching function for text columns.
|
||||
///
|
||||
/// This function checks if a text column contains exact term/phrase matches
|
||||
/// with non-alphanumeric boundaries. Designed for:
|
||||
/// - Whole-word matching (e.g. "cat" in "cat!" but not in "category")
|
||||
/// - Phrase matching (e.g. "hello world" in "note:hello world!")
|
||||
///
|
||||
/// # Signature
|
||||
/// `matches_term(text: String, term: String) -> Boolean`
|
||||
///
|
||||
/// # Arguments
|
||||
/// * `text` - String column to search
|
||||
/// * `term` - Search term/phrase
|
||||
///
|
||||
/// # Returns
|
||||
/// BooleanVector where each element indicates if the corresponding text
|
||||
/// contains an exact match of the term, following these rules:
|
||||
/// 1. Exact substring match found (case-sensitive)
|
||||
/// 2. Match boundaries are either:
|
||||
/// - Start/end of text
|
||||
/// - Any non-alphanumeric character (including spaces, hyphens, punctuation, etc.)
|
||||
///
|
||||
/// # Examples
|
||||
/// ```
|
||||
/// -- SQL examples --
|
||||
/// -- Match phrase with space --
|
||||
/// SELECT matches_term(column, 'hello world') FROM table;
|
||||
/// -- Text: "warning:hello world!" => true
|
||||
/// -- Text: "hello-world" => false (hyphen instead of space)
|
||||
/// -- Text: "hello world2023" => false (ending with numbers)
|
||||
///
|
||||
/// -- Match multiple words with boundaries --
|
||||
/// SELECT matches_term(column, 'critical error') FROM logs;
|
||||
/// -- Match in: "ERROR:critical error!"
|
||||
/// -- No match: "critical_errors"
|
||||
///
|
||||
/// -- Empty string handling --
|
||||
/// SELECT matches_term(column, '') FROM table;
|
||||
/// -- Text: "" => true
|
||||
/// -- Text: "any" => false
|
||||
///
|
||||
/// -- Case sensitivity --
|
||||
/// SELECT matches_term(column, 'Cat') FROM table;
|
||||
/// -- Text: "Cat" => true
|
||||
/// -- Text: "cat" => false
|
||||
/// ```
|
||||
pub struct MatchesTermFunction;
|
||||
|
||||
impl MatchesTermFunction {
|
||||
pub fn register(registry: &FunctionRegistry) {
|
||||
registry.register(Arc::new(MatchesTermFunction));
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Display for MatchesTermFunction {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
write!(f, "MATCHES_TERM")
|
||||
}
|
||||
}
|
||||
|
||||
impl Function for MatchesTermFunction {
|
||||
fn name(&self) -> &str {
|
||||
"matches_term"
|
||||
}
|
||||
|
||||
fn return_type(&self, _input_types: &[ConcreteDataType]) -> Result<ConcreteDataType> {
|
||||
Ok(ConcreteDataType::boolean_datatype())
|
||||
}
|
||||
|
||||
fn signature(&self) -> common_query::prelude::Signature {
|
||||
common_query::prelude::Signature::exact(
|
||||
vec![
|
||||
ConcreteDataType::string_datatype(),
|
||||
ConcreteDataType::string_datatype(),
|
||||
],
|
||||
Volatility::Immutable,
|
||||
)
|
||||
}
|
||||
|
||||
fn eval(&self, _func_ctx: &FunctionContext, columns: &[VectorRef]) -> Result<VectorRef> {
|
||||
ensure!(
|
||||
columns.len() == 2,
|
||||
InvalidFuncArgsSnafu {
|
||||
err_msg: format!(
|
||||
"The length of the args is not correct, expect exactly 2, have: {}",
|
||||
columns.len()
|
||||
),
|
||||
}
|
||||
);
|
||||
|
||||
let text_column = &columns[0];
|
||||
if text_column.is_empty() {
|
||||
return Ok(Arc::new(BooleanVector::from(Vec::<bool>::with_capacity(0))));
|
||||
}
|
||||
|
||||
let term_column = &columns[1];
|
||||
let compiled_finder = if term_column.is_const() {
|
||||
let term = term_column.get_ref(0).as_string().unwrap();
|
||||
match term {
|
||||
None => {
|
||||
return Ok(Arc::new(BooleanVector::from_iter(
|
||||
iter::repeat(None).take(text_column.len()),
|
||||
)));
|
||||
}
|
||||
Some(term) => Some(MatchesTermFinder::new(term)),
|
||||
}
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
let len = text_column.len();
|
||||
let mut result = BooleanVectorBuilder::with_capacity(len);
|
||||
for i in 0..len {
|
||||
let text = text_column.get_ref(i).as_string().unwrap();
|
||||
let Some(text) = text else {
|
||||
result.push_null();
|
||||
continue;
|
||||
};
|
||||
|
||||
let contains = match &compiled_finder {
|
||||
Some(finder) => finder.find(text),
|
||||
None => {
|
||||
let term = match term_column.get_ref(i).as_string().unwrap() {
|
||||
None => {
|
||||
result.push_null();
|
||||
continue;
|
||||
}
|
||||
Some(term) => term,
|
||||
};
|
||||
MatchesTermFinder::new(term).find(text)
|
||||
}
|
||||
};
|
||||
result.push(Some(contains));
|
||||
}
|
||||
|
||||
Ok(result.to_vector())
|
||||
}
|
||||
}
|
||||
|
||||
/// A compiled finder for `matches_term` function that holds the compiled term
|
||||
/// and its metadata for efficient matching.
|
||||
///
|
||||
/// A term is considered matched when:
|
||||
/// 1. The exact sequence appears in the text
|
||||
/// 2. It is either:
|
||||
/// - At the start/end of text with adjacent non-alphanumeric character
|
||||
/// - Surrounded by non-alphanumeric characters
|
||||
///
|
||||
/// # Examples
|
||||
/// ```
|
||||
/// let finder = MatchesTermFinder::new("cat");
|
||||
/// assert!(finder.find("cat!")); // Term at end with punctuation
|
||||
/// assert!(finder.find("dog,cat")); // Term preceded by comma
|
||||
/// assert!(!finder.find("category")); // Partial match rejected
|
||||
///
|
||||
/// let finder = MatchesTermFinder::new("world");
|
||||
/// assert!(finder.find("hello-world")); // Hyphen boundary
|
||||
/// ```
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct MatchesTermFinder {
|
||||
finder: memmem::Finder<'static>,
|
||||
term: String,
|
||||
starts_with_non_alnum: bool,
|
||||
ends_with_non_alnum: bool,
|
||||
}
|
||||
|
||||
impl MatchesTermFinder {
|
||||
/// Create a new `MatchesTermFinder` for the given term.
|
||||
pub fn new(term: &str) -> Self {
|
||||
let starts_with_non_alnum = term.chars().next().is_some_and(|c| !c.is_alphanumeric());
|
||||
let ends_with_non_alnum = term.chars().last().is_some_and(|c| !c.is_alphanumeric());
|
||||
|
||||
Self {
|
||||
finder: memmem::Finder::new(term).into_owned(),
|
||||
term: term.to_string(),
|
||||
starts_with_non_alnum,
|
||||
ends_with_non_alnum,
|
||||
}
|
||||
}
|
||||
|
||||
/// Find the term in the text.
|
||||
pub fn find(&self, text: &str) -> bool {
|
||||
if self.term.is_empty() {
|
||||
return text.is_empty();
|
||||
}
|
||||
|
||||
if text.len() < self.term.len() {
|
||||
return false;
|
||||
}
|
||||
|
||||
let mut pos = 0;
|
||||
while let Some(found_pos) = self.finder.find(text[pos..].as_bytes()) {
|
||||
let actual_pos = pos + found_pos;
|
||||
|
||||
let prev_ok = self.starts_with_non_alnum
|
||||
|| text[..actual_pos]
|
||||
.chars()
|
||||
.last()
|
||||
.map(|c| !c.is_alphanumeric())
|
||||
.unwrap_or(true);
|
||||
|
||||
if prev_ok {
|
||||
let next_pos = actual_pos + self.finder.needle().len();
|
||||
let next_ok = self.ends_with_non_alnum
|
||||
|| text[next_pos..]
|
||||
.chars()
|
||||
.next()
|
||||
.map(|c| !c.is_alphanumeric())
|
||||
.unwrap_or(true);
|
||||
|
||||
if next_ok {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
if let Some(next_char) = text[actual_pos..].chars().next() {
|
||||
pos = actual_pos + next_char.len_utf8();
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
false
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn matches_term_example() {
|
||||
let finder = MatchesTermFinder::new("hello world");
|
||||
assert!(finder.find("warning:hello world!"));
|
||||
assert!(!finder.find("hello-world"));
|
||||
assert!(!finder.find("hello world2023"));
|
||||
|
||||
let finder = MatchesTermFinder::new("critical error");
|
||||
assert!(finder.find("ERROR:critical error!"));
|
||||
assert!(!finder.find("critical_errors"));
|
||||
|
||||
let finder = MatchesTermFinder::new("");
|
||||
assert!(finder.find(""));
|
||||
assert!(!finder.find("any"));
|
||||
|
||||
let finder = MatchesTermFinder::new("Cat");
|
||||
assert!(finder.find("Cat"));
|
||||
assert!(!finder.find("cat"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn matches_term_with_punctuation() {
|
||||
assert!(MatchesTermFinder::new("cat").find("cat!"));
|
||||
assert!(MatchesTermFinder::new("dog").find("!dog"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn matches_phrase_with_boundaries() {
|
||||
assert!(MatchesTermFinder::new("hello-world").find("hello-world"));
|
||||
assert!(MatchesTermFinder::new("'foo bar'").find("test: 'foo bar'"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn matches_at_text_boundaries() {
|
||||
assert!(MatchesTermFinder::new("start").find("start..."));
|
||||
assert!(MatchesTermFinder::new("end").find("...end"));
|
||||
}
|
||||
|
||||
// Negative cases
|
||||
#[test]
|
||||
fn rejects_partial_matches() {
|
||||
assert!(!MatchesTermFinder::new("cat").find("category"));
|
||||
assert!(!MatchesTermFinder::new("boot").find("rebooted"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn rejects_missing_term() {
|
||||
assert!(!MatchesTermFinder::new("foo").find("hello world"));
|
||||
}
|
||||
|
||||
// Edge cases
|
||||
#[test]
|
||||
fn handles_empty_inputs() {
|
||||
assert!(!MatchesTermFinder::new("test").find(""));
|
||||
assert!(!MatchesTermFinder::new("").find("text"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn different_unicode_boundaries() {
|
||||
assert!(MatchesTermFinder::new("café").find("café>"));
|
||||
assert!(!MatchesTermFinder::new("café").find("口café>"));
|
||||
assert!(!MatchesTermFinder::new("café").find("café口"));
|
||||
assert!(!MatchesTermFinder::new("café").find("cafémore"));
|
||||
assert!(MatchesTermFinder::new("русский").find("русский!"));
|
||||
assert!(MatchesTermFinder::new("русский").find("русский!"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn case_sensitive_matching() {
|
||||
assert!(!MatchesTermFinder::new("cat").find("Cat"));
|
||||
assert!(MatchesTermFinder::new("CaT").find("CaT"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn numbers_in_term() {
|
||||
assert!(MatchesTermFinder::new("v1.0").find("v1.0!"));
|
||||
assert!(!MatchesTermFinder::new("v1.0").find("v1.0a"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn adjacent_alphanumeric_fails() {
|
||||
assert!(!MatchesTermFinder::new("cat").find("cat5"));
|
||||
assert!(!MatchesTermFinder::new("dog").find("dogcat"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn empty_term_text() {
|
||||
assert!(!MatchesTermFinder::new("").find("text"));
|
||||
assert!(MatchesTermFinder::new("").find(""));
|
||||
assert!(!MatchesTermFinder::new("text").find(""));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn leading_non_alphanumeric() {
|
||||
assert!(MatchesTermFinder::new("/cat").find("dog/cat"));
|
||||
assert!(MatchesTermFinder::new("dog/").find("dog/cat"));
|
||||
assert!(MatchesTermFinder::new("dog/cat").find("dog/cat"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn continues_searching_after_boundary_mismatch() {
|
||||
assert!(!MatchesTermFinder::new("log").find("bloglog!"));
|
||||
assert!(MatchesTermFinder::new("log").find("bloglog log"));
|
||||
assert!(MatchesTermFinder::new("log").find("alogblog_log!"));
|
||||
|
||||
assert!(MatchesTermFinder::new("error").find("errorlog_error_case"));
|
||||
assert!(MatchesTermFinder::new("test").find("atestbtestc_test_end"));
|
||||
assert!(MatchesTermFinder::new("data").find("database_data_store"));
|
||||
assert!(!MatchesTermFinder::new("data").find("database_datastore"));
|
||||
assert!(MatchesTermFinder::new("log.txt").find("catalog.txt_log.txt!"));
|
||||
assert!(!MatchesTermFinder::new("log.txt").find("catalog.txtlog.txt!"));
|
||||
assert!(MatchesTermFinder::new("data-set").find("bigdata-set_data-set!"));
|
||||
|
||||
assert!(MatchesTermFinder::new("中文").find("这是中文测试,中文!"));
|
||||
assert!(MatchesTermFinder::new("error").find("错误errorerror日志_error!"));
|
||||
}
|
||||
}
|
||||
314
tests/cases/standalone/common/function/matches_term.result
Normal file
314
tests/cases/standalone/common/function/matches_term.result
Normal file
@@ -0,0 +1,314 @@
|
||||
-- Test basic term matching
|
||||
-- Expect: true
|
||||
SELECT matches_term('cat!', 'cat') as result;
|
||||
|
||||
+--------+
|
||||
| result |
|
||||
+--------+
|
||||
| true |
|
||||
+--------+
|
||||
|
||||
-- Test phrase matching with spaces
|
||||
-- Expect: true
|
||||
SELECT matches_term('warning:hello world!', 'hello world') as result;
|
||||
|
||||
+--------+
|
||||
| result |
|
||||
+--------+
|
||||
| true |
|
||||
+--------+
|
||||
|
||||
-- Test numbers in term
|
||||
SELECT matches_term('v1.0!', 'v1.0') as result;
|
||||
|
||||
+--------+
|
||||
| result |
|
||||
+--------+
|
||||
| true |
|
||||
+--------+
|
||||
|
||||
-- Test case sensitivity
|
||||
-- Expect: true
|
||||
SELECT matches_term('Cat', 'Cat') as result;
|
||||
|
||||
+--------+
|
||||
| result |
|
||||
+--------+
|
||||
| true |
|
||||
+--------+
|
||||
|
||||
-- Expect: false
|
||||
SELECT matches_term('cat', 'Cat') as result;
|
||||
|
||||
+--------+
|
||||
| result |
|
||||
+--------+
|
||||
| false |
|
||||
+--------+
|
||||
|
||||
-- Test empty string handling
|
||||
-- Expect: true
|
||||
SELECT matches_term('', '') as result;
|
||||
|
||||
+--------+
|
||||
| result |
|
||||
+--------+
|
||||
| true |
|
||||
+--------+
|
||||
|
||||
-- Expect: false
|
||||
SELECT matches_term('any', '') as result;
|
||||
|
||||
+--------+
|
||||
| result |
|
||||
+--------+
|
||||
| false |
|
||||
+--------+
|
||||
|
||||
-- Expect: false
|
||||
SELECT matches_term('', 'any') as result;
|
||||
|
||||
+--------+
|
||||
| result |
|
||||
+--------+
|
||||
| false |
|
||||
+--------+
|
||||
|
||||
-- Test partial matches (should fail)
|
||||
-- Expect: false
|
||||
SELECT matches_term('category', 'cat') as result;
|
||||
|
||||
+--------+
|
||||
| result |
|
||||
+--------+
|
||||
| false |
|
||||
+--------+
|
||||
|
||||
-- Expect: false
|
||||
SELECT matches_term('rebooted', 'boot') as result;
|
||||
|
||||
+--------+
|
||||
| result |
|
||||
+--------+
|
||||
| false |
|
||||
+--------+
|
||||
|
||||
-- Test adjacent alphanumeric characters
|
||||
SELECT matches_term('cat5', 'cat') as result;
|
||||
|
||||
+--------+
|
||||
| result |
|
||||
+--------+
|
||||
| false |
|
||||
+--------+
|
||||
|
||||
SELECT matches_term('dogcat', 'dog') as result;
|
||||
|
||||
+--------+
|
||||
| result |
|
||||
+--------+
|
||||
| false |
|
||||
+--------+
|
||||
|
||||
-- Test leading non-alphanumeric
|
||||
-- Expect: true
|
||||
SELECT matches_term('dog/cat', '/cat') as result;
|
||||
|
||||
+--------+
|
||||
| result |
|
||||
+--------+
|
||||
| true |
|
||||
+--------+
|
||||
|
||||
-- Expect: true
|
||||
SELECT matches_term('dog/cat', 'dog/') as result;
|
||||
|
||||
+--------+
|
||||
| result |
|
||||
+--------+
|
||||
| true |
|
||||
+--------+
|
||||
|
||||
-- Expect: true
|
||||
SELECT matches_term('dog/cat', 'dog/cat') as result;
|
||||
|
||||
+--------+
|
||||
| result |
|
||||
+--------+
|
||||
| true |
|
||||
+--------+
|
||||
|
||||
-- Test unicode characters
|
||||
-- Expect: true
|
||||
SELECT matches_term('café>', 'café') as result;
|
||||
|
||||
+--------+
|
||||
| result |
|
||||
+--------+
|
||||
| true |
|
||||
+--------+
|
||||
|
||||
-- Expect: true
|
||||
SELECT matches_term('русский!', 'русский') as result;
|
||||
|
||||
+--------+
|
||||
| result |
|
||||
+--------+
|
||||
| true |
|
||||
+--------+
|
||||
|
||||
-- Test complete word matching
|
||||
CREATE TABLE logs (
|
||||
`id` TIMESTAMP TIME INDEX,
|
||||
`log_message` STRING
|
||||
);
|
||||
|
||||
Affected Rows: 0
|
||||
|
||||
INSERT INTO logs VALUES
|
||||
(1, 'An error occurred!'),
|
||||
(2, 'Critical error: system failure'),
|
||||
(3, 'error-prone'),
|
||||
(4, 'errors'),
|
||||
(5, 'error123'),
|
||||
(6, 'errorLogs'),
|
||||
(7, 'Version v1.0 released'),
|
||||
(8, 'v1.0!'),
|
||||
(9, 'v1.0a'),
|
||||
(10, 'v1.0beta'),
|
||||
(11, 'GET /app/start'),
|
||||
(12, 'Command: /start-prosess'),
|
||||
(13, 'Command: /start'),
|
||||
(14, 'start'),
|
||||
(15, 'start/stop'),
|
||||
(16, 'Alert: system failure detected'),
|
||||
(17, 'system failure!'),
|
||||
(18, 'system-failure'),
|
||||
(19, 'system failure2023'),
|
||||
(20, 'critical error: system failure'),
|
||||
(21, 'critical failure detected'),
|
||||
(22, 'critical issue'),
|
||||
(23, 'failure imminent'),
|
||||
(24, 'Warning: high temperature'),
|
||||
(25, 'WARNING: system overload'),
|
||||
(26, 'warned'),
|
||||
(27, 'warnings');
|
||||
|
||||
Affected Rows: 27
|
||||
|
||||
-- Test complete word matching for 'error'
|
||||
-- Expect:
|
||||
-- 1|An error occurred!|true
|
||||
-- 2|Critical error: system failure|true
|
||||
-- 3|error-prone|true
|
||||
-- 4|errors|false
|
||||
-- 5|error123|false
|
||||
-- 6|errorLogs|false
|
||||
SELECT `id`, `log_message`, matches_term(`log_message`, 'error') as `matches_error` FROM logs WHERE `id` <= 6 ORDER BY `id`;
|
||||
|
||||
+-------------------------+--------------------------------+---------------+
|
||||
| id | log_message | matches_error |
|
||||
+-------------------------+--------------------------------+---------------+
|
||||
| 1970-01-01T00:00:00.001 | An error occurred! | true |
|
||||
| 1970-01-01T00:00:00.002 | Critical error: system failure | true |
|
||||
| 1970-01-01T00:00:00.003 | error-prone | true |
|
||||
| 1970-01-01T00:00:00.004 | errors | false |
|
||||
| 1970-01-01T00:00:00.005 | error123 | false |
|
||||
| 1970-01-01T00:00:00.006 | errorLogs | false |
|
||||
+-------------------------+--------------------------------+---------------+
|
||||
|
||||
-- Test complete word matching for 'v1.0'
|
||||
-- Expect:
|
||||
-- 7|Version v1.0 released|true
|
||||
-- 8|v1.0!|true
|
||||
-- 9|v1.0a|false
|
||||
-- 10|v1.0beta|false
|
||||
SELECT `id`, `log_message`, matches_term(`log_message`, 'v1.0') as `matches_version` FROM logs WHERE `id` BETWEEN 7 AND 10 ORDER BY `id`;
|
||||
|
||||
+-------------------------+-----------------------+-----------------+
|
||||
| id | log_message | matches_version |
|
||||
+-------------------------+-----------------------+-----------------+
|
||||
| 1970-01-01T00:00:00.007 | Version v1.0 released | true |
|
||||
| 1970-01-01T00:00:00.008 | v1.0! | true |
|
||||
| 1970-01-01T00:00:00.009 | v1.0a | false |
|
||||
| 1970-01-01T00:00:00.010 | v1.0beta | false |
|
||||
+-------------------------+-----------------------+-----------------+
|
||||
|
||||
-- Test complete word matching for '/start'
|
||||
-- Expect:
|
||||
-- 11|GET /app/start|true
|
||||
-- 12|Command: /start-prosess|true
|
||||
-- 13|Command: /start|true
|
||||
-- 14|start|false
|
||||
-- 15|start/stop|false
|
||||
SELECT `id`, `log_message`, matches_term(`log_message`, '/start') as `matches_start` FROM logs WHERE `id` BETWEEN 11 AND 15 ORDER BY `id`;
|
||||
|
||||
+-------------------------+-------------------------+---------------+
|
||||
| id | log_message | matches_start |
|
||||
+-------------------------+-------------------------+---------------+
|
||||
| 1970-01-01T00:00:00.011 | GET /app/start | true |
|
||||
| 1970-01-01T00:00:00.012 | Command: /start-prosess | true |
|
||||
| 1970-01-01T00:00:00.013 | Command: /start | true |
|
||||
| 1970-01-01T00:00:00.014 | start | false |
|
||||
| 1970-01-01T00:00:00.015 | start/stop | false |
|
||||
+-------------------------+-------------------------+---------------+
|
||||
|
||||
-- Test phrase matching for 'system failure'
|
||||
-- Expect:
|
||||
-- 16|Alert: system failure detected|true
|
||||
-- 17|system failure!|true
|
||||
-- 18|system-failure|false
|
||||
-- 19|system failure2023|false
|
||||
SELECT `id`, `log_message`, matches_term(`log_message`, 'system failure') as `matches_phrase` FROM logs WHERE `id` BETWEEN 16 AND 19 ORDER BY `id`;
|
||||
|
||||
+-------------------------+--------------------------------+----------------+
|
||||
| id | log_message | matches_phrase |
|
||||
+-------------------------+--------------------------------+----------------+
|
||||
| 1970-01-01T00:00:00.016 | Alert: system failure detected | true |
|
||||
| 1970-01-01T00:00:00.017 | system failure! | true |
|
||||
| 1970-01-01T00:00:00.018 | system-failure | false |
|
||||
| 1970-01-01T00:00:00.019 | system failure2023 | false |
|
||||
+-------------------------+--------------------------------+----------------+
|
||||
|
||||
-- Test multi-word matching using AND
|
||||
-- Expect:
|
||||
-- 20|critical error: system failure|true|true|true
|
||||
-- 21|critical failure detected|true|true|true
|
||||
-- 22|critical issue|true|false|false
|
||||
-- 23|failure imminent|false|true|false
|
||||
SELECT `id`, `log_message`,
|
||||
matches_term(`log_message`, 'critical') as `matches_critical`,
|
||||
matches_term(`log_message`, 'failure') as `matches_failure`,
|
||||
matches_term(`log_message`, 'critical') AND matches_term(`log_message`, 'failure') as `matches_both`
|
||||
FROM logs WHERE `id` BETWEEN 20 AND 23 ORDER BY `id`;
|
||||
|
||||
+-------------------------+--------------------------------+------------------+-----------------+--------------+
|
||||
| id | log_message | matches_critical | matches_failure | matches_both |
|
||||
+-------------------------+--------------------------------+------------------+-----------------+--------------+
|
||||
| 1970-01-01T00:00:00.020 | critical error: system failure | true | true | true |
|
||||
| 1970-01-01T00:00:00.021 | critical failure detected | true | true | true |
|
||||
| 1970-01-01T00:00:00.022 | critical issue | true | false | false |
|
||||
| 1970-01-01T00:00:00.023 | failure imminent | false | true | false |
|
||||
+-------------------------+--------------------------------+------------------+-----------------+--------------+
|
||||
|
||||
-- Test case-insensitive matching using lower()
|
||||
-- Expect:
|
||||
-- 24|Warning: high temperature|true
|
||||
-- 25|WARNING: system overload|true
|
||||
-- 26|warned|false
|
||||
-- 27|warnings|false
|
||||
SELECT `id`, `log_message`, matches_term(lower(`log_message`), 'warning') as `matches_warning` FROM logs WHERE `id` >= 24 ORDER BY `id`;
|
||||
|
||||
+-------------------------+---------------------------+-----------------+
|
||||
| id | log_message | matches_warning |
|
||||
+-------------------------+---------------------------+-----------------+
|
||||
| 1970-01-01T00:00:00.024 | Warning: high temperature | true |
|
||||
| 1970-01-01T00:00:00.025 | WARNING: system overload | true |
|
||||
| 1970-01-01T00:00:00.026 | warned | false |
|
||||
| 1970-01-01T00:00:00.027 | warnings | false |
|
||||
+-------------------------+---------------------------+-----------------+
|
||||
|
||||
DROP TABLE logs;
|
||||
|
||||
Affected Rows: 0
|
||||
|
||||
142
tests/cases/standalone/common/function/matches_term.sql
Normal file
142
tests/cases/standalone/common/function/matches_term.sql
Normal file
@@ -0,0 +1,142 @@
|
||||
-- Test basic term matching
|
||||
-- Expect: true
|
||||
SELECT matches_term('cat!', 'cat') as result;
|
||||
|
||||
-- Test phrase matching with spaces
|
||||
-- Expect: true
|
||||
SELECT matches_term('warning:hello world!', 'hello world') as result;
|
||||
|
||||
-- Test numbers in term
|
||||
SELECT matches_term('v1.0!', 'v1.0') as result;
|
||||
|
||||
-- Test case sensitivity
|
||||
-- Expect: true
|
||||
SELECT matches_term('Cat', 'Cat') as result;
|
||||
-- Expect: false
|
||||
SELECT matches_term('cat', 'Cat') as result;
|
||||
|
||||
-- Test empty string handling
|
||||
-- Expect: true
|
||||
SELECT matches_term('', '') as result;
|
||||
-- Expect: false
|
||||
SELECT matches_term('any', '') as result;
|
||||
-- Expect: false
|
||||
SELECT matches_term('', 'any') as result;
|
||||
|
||||
-- Test partial matches (should fail)
|
||||
-- Expect: false
|
||||
SELECT matches_term('category', 'cat') as result;
|
||||
-- Expect: false
|
||||
SELECT matches_term('rebooted', 'boot') as result;
|
||||
|
||||
-- Test adjacent alphanumeric characters
|
||||
SELECT matches_term('cat5', 'cat') as result;
|
||||
SELECT matches_term('dogcat', 'dog') as result;
|
||||
|
||||
-- Test leading non-alphanumeric
|
||||
-- Expect: true
|
||||
SELECT matches_term('dog/cat', '/cat') as result;
|
||||
-- Expect: true
|
||||
SELECT matches_term('dog/cat', 'dog/') as result;
|
||||
-- Expect: true
|
||||
SELECT matches_term('dog/cat', 'dog/cat') as result;
|
||||
|
||||
-- Test unicode characters
|
||||
-- Expect: true
|
||||
SELECT matches_term('café>', 'café') as result;
|
||||
-- Expect: true
|
||||
SELECT matches_term('русский!', 'русский') as result;
|
||||
|
||||
-- Test complete word matching
|
||||
CREATE TABLE logs (
|
||||
`id` TIMESTAMP TIME INDEX,
|
||||
`log_message` STRING
|
||||
);
|
||||
|
||||
INSERT INTO logs VALUES
|
||||
(1, 'An error occurred!'),
|
||||
(2, 'Critical error: system failure'),
|
||||
(3, 'error-prone'),
|
||||
(4, 'errors'),
|
||||
(5, 'error123'),
|
||||
(6, 'errorLogs'),
|
||||
(7, 'Version v1.0 released'),
|
||||
(8, 'v1.0!'),
|
||||
(9, 'v1.0a'),
|
||||
(10, 'v1.0beta'),
|
||||
(11, 'GET /app/start'),
|
||||
(12, 'Command: /start-prosess'),
|
||||
(13, 'Command: /start'),
|
||||
(14, 'start'),
|
||||
(15, 'start/stop'),
|
||||
(16, 'Alert: system failure detected'),
|
||||
(17, 'system failure!'),
|
||||
(18, 'system-failure'),
|
||||
(19, 'system failure2023'),
|
||||
(20, 'critical error: system failure'),
|
||||
(21, 'critical failure detected'),
|
||||
(22, 'critical issue'),
|
||||
(23, 'failure imminent'),
|
||||
(24, 'Warning: high temperature'),
|
||||
(25, 'WARNING: system overload'),
|
||||
(26, 'warned'),
|
||||
(27, 'warnings');
|
||||
|
||||
-- Test complete word matching for 'error'
|
||||
-- Expect:
|
||||
-- 1|An error occurred!|true
|
||||
-- 2|Critical error: system failure|true
|
||||
-- 3|error-prone|true
|
||||
-- 4|errors|false
|
||||
-- 5|error123|false
|
||||
-- 6|errorLogs|false
|
||||
SELECT `id`, `log_message`, matches_term(`log_message`, 'error') as `matches_error` FROM logs WHERE `id` <= 6 ORDER BY `id`;
|
||||
|
||||
|
||||
-- Test complete word matching for 'v1.0'
|
||||
-- Expect:
|
||||
-- 7|Version v1.0 released|true
|
||||
-- 8|v1.0!|true
|
||||
-- 9|v1.0a|false
|
||||
-- 10|v1.0beta|false
|
||||
SELECT `id`, `log_message`, matches_term(`log_message`, 'v1.0') as `matches_version` FROM logs WHERE `id` BETWEEN 7 AND 10 ORDER BY `id`;
|
||||
|
||||
-- Test complete word matching for '/start'
|
||||
-- Expect:
|
||||
-- 11|GET /app/start|true
|
||||
-- 12|Command: /start-prosess|true
|
||||
-- 13|Command: /start|true
|
||||
-- 14|start|false
|
||||
-- 15|start/stop|false
|
||||
SELECT `id`, `log_message`, matches_term(`log_message`, '/start') as `matches_start` FROM logs WHERE `id` BETWEEN 11 AND 15 ORDER BY `id`;
|
||||
|
||||
-- Test phrase matching for 'system failure'
|
||||
-- Expect:
|
||||
-- 16|Alert: system failure detected|true
|
||||
-- 17|system failure!|true
|
||||
-- 18|system-failure|false
|
||||
-- 19|system failure2023|false
|
||||
SELECT `id`, `log_message`, matches_term(`log_message`, 'system failure') as `matches_phrase` FROM logs WHERE `id` BETWEEN 16 AND 19 ORDER BY `id`;
|
||||
|
||||
|
||||
-- Test multi-word matching using AND
|
||||
-- Expect:
|
||||
-- 20|critical error: system failure|true|true|true
|
||||
-- 21|critical failure detected|true|true|true
|
||||
-- 22|critical issue|true|false|false
|
||||
-- 23|failure imminent|false|true|false
|
||||
SELECT `id`, `log_message`,
|
||||
matches_term(`log_message`, 'critical') as `matches_critical`,
|
||||
matches_term(`log_message`, 'failure') as `matches_failure`,
|
||||
matches_term(`log_message`, 'critical') AND matches_term(`log_message`, 'failure') as `matches_both`
|
||||
FROM logs WHERE `id` BETWEEN 20 AND 23 ORDER BY `id`;
|
||||
|
||||
-- Test case-insensitive matching using lower()
|
||||
-- Expect:
|
||||
-- 24|Warning: high temperature|true
|
||||
-- 25|WARNING: system overload|true
|
||||
-- 26|warned|false
|
||||
-- 27|warnings|false
|
||||
SELECT `id`, `log_message`, matches_term(lower(`log_message`), 'warning') as `matches_warning` FROM logs WHERE `id` >= 24 ORDER BY `id`;
|
||||
|
||||
DROP TABLE logs;
|
||||
Reference in New Issue
Block a user