From 637e7eda5c7196c8f8fe91f007d91380aa7d6ae6 Mon Sep 17 00:00:00 2001 From: discord9 Date: Fri, 10 Apr 2026 15:45:48 +0800 Subject: [PATCH] fix: match term zh Signed-off-by: discord9 --- .../function/src/scalars/matches_term.rs | 171 +++++++++++++---- src/index/src/fulltext_index/tokenizer.rs | 20 ++ src/query/src/promql/planner.rs | 172 ++++++++++++------ .../common/function/matches_term.result | 65 +++++++ .../common/function/matches_term.sql | 10 + 5 files changed, 355 insertions(+), 83 deletions(-) diff --git a/src/common/function/src/scalars/matches_term.rs b/src/common/function/src/scalars/matches_term.rs index 8dfb25cbc0..fe37210885 100644 --- a/src/common/function/src/scalars/matches_term.rs +++ b/src/common/function/src/scalars/matches_term.rs @@ -27,10 +27,11 @@ use crate::function_registry::FunctionRegistry; /// Exact term/phrase matching function for text columns. /// -/// This function checks if a text column contains exact term/phrase matches -/// with non-alphanumeric boundaries. Designed for: -/// - Whole-word matching (e.g. "cat" in "cat!" but not in "category") +/// This function uses script-aware matching rules: +/// - ASCII-only terms keep whole-word style boundary matching, like Whole-word matching (e.g. "cat" in "cat!" but not in "category") /// - Phrase matching (e.g. "hello world" in "note:hello world!") +/// - Terms containing Han characters match as contiguous substrings +/// - Mixed-script identifiers and numeric terms remain searchable in Chinese text /// /// # Signature /// `matches_term(text: String, term: String) -> Boolean` @@ -43,9 +44,8 @@ use crate::function_registry::FunctionRegistry; /// BooleanVector where each element indicates if the corresponding text /// contains an exact match of the term, following these rules: /// 1. Exact substring match found (case-sensitive) -/// 2. Match boundaries are either: -/// - Start/end of text -/// - Any non-alphanumeric character (including spaces, hyphens, punctuation, etc.) +/// 2. For ASCII-only terms, adjacent ASCII word characters block the match +/// 3. For Han-containing terms, contiguous substring match is sufficient /// /// # Examples /// ``` @@ -60,6 +60,9 @@ use crate::function_registry::FunctionRegistry; /// SELECT matches_term(column, 'critical error') FROM logs; /// -- Match in: "ERROR:critical error!" /// -- No match: "critical_errors" +/// -- Chinese substring examples -- +/// SELECT matches_term(column, '手机') FROM table; +/// -- Text: "登录手机号18888888888的动态key" => true /// /// -- Empty string handling -- /// SELECT matches_term(column, '') FROM table; @@ -204,9 +207,8 @@ impl Function for MatchesTermFunction { /// /// A term is considered matched when: /// 1. The exact sequence appears in the text -/// 2. It is either: -/// - At the start/end of text with adjacent non-alphanumeric character -/// - Surrounded by non-alphanumeric characters +/// 2. ASCII-only terms are not adjacent to ASCII word characters +/// 3. Han-containing terms match as contiguous substrings /// /// # Examples /// ``` @@ -215,28 +217,113 @@ impl Function for MatchesTermFunction { /// assert!(finder.find("dog,cat")); // Term preceded by comma /// assert!(!finder.find("category")); // Partial match rejected /// -/// let finder = MatchesTermFinder::new("world"); -/// assert!(finder.find("hello-world")); // Hyphen boundary +/// let finder = MatchesTermFinder::new("手机"); +/// assert!(finder.find("登录手机号18888888888的动态key")); /// ``` #[derive(Clone, Debug)] pub struct MatchesTermFinder { finder: memmem::Finder<'static>, term: String, - starts_with_non_alnum: bool, - ends_with_non_alnum: bool, + term_kind: TermKind, + starts_with_other: bool, + ends_with_other: bool, +} + +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +enum CharClass { + AsciiWord, + Han, + UnicodeWord, + Other, +} + +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +enum TermKind { + AsciiLike, + UnicodeWord, + HanContaining, +} + +fn classify_char(c: char) -> CharClass { + if c.is_ascii_alphanumeric() { + CharClass::AsciiWord + } else if is_han(c) { + CharClass::Han + } else if c.is_alphanumeric() { + CharClass::UnicodeWord + } else { + CharClass::Other + } +} + +fn is_han(c: char) -> bool { + matches!( + c as u32, + 0x3400..=0x4DBF + | 0x4E00..=0x9FFF + | 0xF900..=0xFAFF + | 0x20000..=0x2A6DF + | 0x2A700..=0x2B73F + | 0x2B740..=0x2B81F + | 0x2B820..=0x2CEAF + | 0x2CEB0..=0x2EBEF + | 0x30000..=0x3134F + ) +} + +fn classify_term(term: &str) -> TermKind { + let mut has_han = false; + let mut has_unicode_word = false; + for c in term.chars() { + match classify_char(c) { + CharClass::AsciiWord => {} + CharClass::Han => has_han = true, + CharClass::UnicodeWord => has_unicode_word = true, + CharClass::Other => {} + } + } + + if has_han { + TermKind::HanContaining + } else if has_unicode_word { + TermKind::UnicodeWord + } else { + TermKind::AsciiLike + } +} + +fn boundary_ok(term_kind: TermKind, neighbor: Option, term_has_other_boundary: bool) -> bool { + if term_has_other_boundary { + return true; + } + + match term_kind { + TermKind::AsciiLike => !matches!(neighbor.map(classify_char), Some(CharClass::AsciiWord)), + TermKind::UnicodeWord => !matches!( + neighbor.map(classify_char), + Some(CharClass::AsciiWord | CharClass::UnicodeWord | CharClass::Han) + ), + TermKind::HanContaining => true, + } } impl MatchesTermFinder { /// Create a new `MatchesTermFinder` for the given term. pub fn new(term: &str) -> Self { - let starts_with_non_alnum = term.chars().next().is_some_and(|c| !c.is_alphanumeric()); - let ends_with_non_alnum = term.chars().last().is_some_and(|c| !c.is_alphanumeric()); - + let starts_with_other = term + .chars() + .next() + .is_some_and(|c| classify_char(c) == CharClass::Other); + let ends_with_other = term + .chars() + .last() + .is_some_and(|c| classify_char(c) == CharClass::Other); Self { finder: memmem::Finder::new(term).into_owned(), term: term.to_string(), - starts_with_non_alnum, - ends_with_non_alnum, + term_kind: classify_term(term), + starts_with_other, + ends_with_other, } } @@ -254,23 +341,20 @@ impl MatchesTermFinder { while let Some(found_pos) = self.finder.find(&text.as_bytes()[pos..]) { let actual_pos = pos + found_pos; - let prev_ok = self.starts_with_non_alnum - || text[..actual_pos] - .chars() - .last() - .map(|c| !c.is_alphanumeric()) - .unwrap_or(true); + let prev = text[..actual_pos].chars().last(); + let prev_ok = self.starts_with_other || boundary_ok(self.term_kind, prev, false); if prev_ok { let next_pos = actual_pos + self.finder.needle().len(); - let next_ok = self.ends_with_non_alnum - || text[next_pos..] - .chars() - .next() - .map(|c| !c.is_alphanumeric()) - .unwrap_or(true); + let next = text[next_pos..].chars().next(); + let next_ok = self.ends_with_other || boundary_ok(self.term_kind, next, false); - if next_ok { + let match_ok = match self.term_kind { + TermKind::HanContaining => true, + _ => prev_ok && next_ok, + }; + + if match_ok { return true; } } @@ -369,6 +453,25 @@ mod tests { assert!(!MatchesTermFinder::new("v1.0").find("v1.0a")); } + #[test] + fn mixed_script_terms_match_inside_chinese_context() { + let text = "登录手机号18888888888的动态key"; + assert!(MatchesTermFinder::new("手机号").find(text)); + assert!(MatchesTermFinder::new("18888888888").find(text)); + assert!(MatchesTermFinder::new("手机").find(text)); + assert!(MatchesTermFinder::new("机号").find(text)); + assert!(MatchesTermFinder::new("机号1888").find(text)); + assert!(MatchesTermFinder::new("农业").find("中国农业银行")); + assert!(MatchesTermFinder::new("error").find("错误error日志")); + } + + #[test] + fn underscore_still_counts_as_boundary_for_ascii_terms() { + assert!(MatchesTermFinder::new("world").find("hello_world")); + assert!(MatchesTermFinder::new("id").find("trace_id=abc")); + assert!(!MatchesTermFinder::new("error").find("criticalerrors")); + } + #[test] fn adjacent_alphanumeric_fails() { assert!(!MatchesTermFinder::new("cat").find("cat5")); @@ -406,4 +509,10 @@ mod tests { assert!(MatchesTermFinder::new("中文").find("这是中文测试,中文!")); assert!(MatchesTermFinder::new("error").find("错误errorerror日志_error!")); } + + #[test] + fn han_terms_match_as_contiguous_substrings() { + assert!(MatchesTermFinder::new("行账号").find("中国农业银行账号")); + assert!(MatchesTermFinder::new("登录").find("登录手机号18888888888的动态key")); + } } diff --git a/src/index/src/fulltext_index/tokenizer.rs b/src/index/src/fulltext_index/tokenizer.rs index b5093afb33..3c55cf1009 100644 --- a/src/index/src/fulltext_index/tokenizer.rs +++ b/src/index/src/fulltext_index/tokenizer.rs @@ -167,6 +167,26 @@ mod tests { assert_eq!(tokens, vec!["我", "喜欢", "苹果"]); } + #[test] + fn test_chinese_tokenizer_issue_7943_sample() { + let tokenizer = ChineseTokenizer; + let text = "登录手机号18888888888的动态key:829889AC8"; + let tokens = tokenizer.tokenize(text); + assert_eq!( + tokens, + vec![ + "登录", + "手机号", + "18888888888", + "的", + "动态", + "key", + ":", + "829889AC8" + ] + ); + } + #[test] fn test_valid_ascii_token_lookup_table() { // Test all ASCII values in a single loop diff --git a/src/query/src/promql/planner.rs b/src/query/src/promql/planner.rs index 640994dea2..3dfd75fdd4 100644 --- a/src/query/src/promql/planner.rs +++ b/src/query/src/promql/planner.rs @@ -23,6 +23,8 @@ use common_error::ext::ErrorExt; use common_error::status_code::StatusCode; use common_function::function::FunctionContext; use common_query::prelude::greptime_value; +use common_time::Timestamp; +use common_time::timestamp::TimeUnit; use datafusion::common::DFSchemaRef; use datafusion::datasource::DefaultTableSource; use datafusion::functions_aggregate::average::avg_udaf; @@ -91,9 +93,9 @@ use crate::promql::error::{ InvalidRegularExpressionSnafu, InvalidTimeRangeSnafu, MultiFieldsNotSupportedSnafu, MultipleMetricMatchersSnafu, MultipleVectorSnafu, NoMetricMatcherSnafu, PromqlPlanNodeSnafu, Result, SameLabelSetSnafu, TableNameNotFoundSnafu, TimeIndexNotFoundSnafu, - UnexpectedPlanExprSnafu, UnexpectedTokenSnafu, UnknownTableSnafu, UnsupportedExprSnafu, - UnsupportedMatcherOpSnafu, UnsupportedVectorMatchSnafu, ValueNotFoundSnafu, - ZeroRangeSelectorSnafu, + TimestampOutOfRangeSnafu, UnexpectedPlanExprSnafu, UnexpectedTokenSnafu, UnknownTableSnafu, + UnsupportedExprSnafu, UnsupportedMatcherOpSnafu, UnsupportedVectorMatchSnafu, + ValueNotFoundSnafu, ZeroRangeSelectorSnafu, }; use crate::query_engine::QueryEngineState; @@ -1221,26 +1223,34 @@ impl PromPlanner { label_matchers: Matchers, is_range_selector: bool, ) -> Result { - // make table scan plan - let table_ref = self.table_ref()?; - let mut table_scan = self.create_table_scan_plan(table_ref.clone()).await?; - let table_schema = table_scan.schema(); - - // make filter exprs let offset_duration = match offset { Some(Offset::Pos(duration)) => duration.as_millis() as Millisecond, Some(Offset::Neg(duration)) => -(duration.as_millis() as Millisecond), None => 0, }; + + // make table scan plan + let table_ref = self.table_ref()?; + let (mut table_scan, time_filter_pushed_down) = self + .create_table_scan_plan(table_ref.clone(), offset_duration) + .await?; + let table_schema = table_scan.schema(); + + // make filter exprs let mut scan_filters = Self::matchers_to_expr(label_matchers.clone(), table_schema)?; - if let Some(time_index_filter) = self.build_time_index_filter(offset_duration)? { + if !time_filter_pushed_down + && let Some(time_index_filter) = + self.build_time_index_filter(offset_duration, TimeUnit::Millisecond)? + { scan_filters.push(time_index_filter); } - table_scan = LogicalPlanBuilder::from(table_scan) - .filter(conjunction(scan_filters).unwrap()) // Safety: `scan_filters` is not empty. - .context(DataFusionPlanningSnafu)? - .build() - .context(DataFusionPlanningSnafu)?; + if let Some(scan_filter) = conjunction(scan_filters) { + table_scan = LogicalPlanBuilder::from(table_scan) + .filter(scan_filter) + .context(DataFusionPlanningSnafu)? + .build() + .context(DataFusionPlanningSnafu)?; + } // make a projection plan if there is any `__field__` matcher if let Some(field_matchers) = &self.ctx.field_column_matcher { @@ -1590,7 +1600,11 @@ impl PromPlanner { Ok(table_ref) } - fn build_time_index_filter(&self, offset_duration: i64) -> Result> { + fn build_time_index_filter( + &self, + offset_duration: i64, + time_index_unit: TimeUnit, + ) -> Result> { let start = self.ctx.start; let end = self.ctx.end; if end < start { @@ -1614,56 +1628,92 @@ impl PromPlanner { // Scan a continuous time range if (end - start) / interval > MAX_SCATTER_POINTS || interval <= INTERVAL_1H { + let lower_bound = self.build_scan_time_filter_literal( + start - offset_duration - selector_window + lower_exclusive_adjustment, + time_index_unit, + )?; + let upper_bound = self.build_scan_time_filter_literal( + self.ctx + .end + .checked_sub(offset_duration) + .and_then(|ts| ts.checked_add(1)) + .with_context(|| TimestampOutOfRangeSnafu { + timestamp: self.ctx.end - offset_duration, + unit: TimeUnit::Millisecond, + })?, + time_index_unit, + )?; let single_time_range = time_index_expr .clone() - .gt_eq(DfExpr::Literal( - ScalarValue::TimestampMillisecond( - Some( - self.ctx.start - offset_duration - selector_window - + lower_exclusive_adjustment, - ), - None, - ), - None, - )) - .and(time_index_expr.lt_eq(DfExpr::Literal( - ScalarValue::TimestampMillisecond(Some(self.ctx.end - offset_duration), None), - None, - ))); + .gt_eq(lower_bound) + .and(time_index_expr.lt(upper_bound)); return Ok(Some(single_time_range)); } // Otherwise scan scatter ranges separately let mut filters = Vec::with_capacity(num_points as usize + 1); for timestamp in (start..=end).step_by(interval as usize) { + let lower_bound = self.build_scan_time_filter_literal( + timestamp - offset_duration - selector_window + lower_exclusive_adjustment, + time_index_unit, + )?; + let upper_bound = self.build_scan_time_filter_literal( + timestamp + .checked_sub(offset_duration) + .and_then(|ts| ts.checked_add(1)) + .with_context(|| TimestampOutOfRangeSnafu { + timestamp: timestamp - offset_duration, + unit: TimeUnit::Millisecond, + })?, + time_index_unit, + )?; filters.push( time_index_expr .clone() - .gt_eq(DfExpr::Literal( - ScalarValue::TimestampMillisecond( - Some( - timestamp - offset_duration - selector_window - + lower_exclusive_adjustment, - ), - None, - ), - None, - )) - .and(time_index_expr.clone().lt_eq(DfExpr::Literal( - ScalarValue::TimestampMillisecond(Some(timestamp - offset_duration), None), - None, - ))), + .gt_eq(lower_bound) + .and(time_index_expr.clone().lt(upper_bound)), ) } Ok(filters.into_iter().reduce(DfExpr::or)) } + fn build_scan_time_filter_literal( + &self, + timestamp_ms: i64, + time_index_unit: TimeUnit, + ) -> Result { + let timestamp = Timestamp::new(timestamp_ms, TimeUnit::Millisecond) + .convert_to_ceil(time_index_unit) + .with_context(|| TimestampOutOfRangeSnafu { + timestamp: timestamp_ms, + unit: time_index_unit, + })?; + Ok(DfExpr::Literal( + Self::timestamp_to_scalar_value(timestamp), + None, + )) + } + + fn timestamp_to_scalar_value(timestamp: Timestamp) -> ScalarValue { + let value = timestamp.value(); + match timestamp.unit() { + TimeUnit::Second => ScalarValue::TimestampSecond(Some(value), None), + TimeUnit::Millisecond => ScalarValue::TimestampMillisecond(Some(value), None), + TimeUnit::Microsecond => ScalarValue::TimestampMicrosecond(Some(value), None), + TimeUnit::Nanosecond => ScalarValue::TimestampNanosecond(Some(value), None), + } + } + /// Create a table scan plan and a filter plan with given filter. /// /// # Panic /// If the filter is empty - async fn create_table_scan_plan(&mut self, table_ref: TableReference) -> Result { + async fn create_table_scan_plan( + &mut self, + table_ref: TableReference, + offset_duration: i64, + ) -> Result<(LogicalPlan, bool)> { let provider = self .table_provider .resolve_table(table_ref.clone()) @@ -1756,14 +1806,19 @@ impl PromPlanner { self.ctx.tag_columns.clone() }; - let is_time_index_ms = scan_table + let time_index_unit = scan_table .schema() .timestamp_column() .with_context(|| TimeIndexNotFoundSnafu { table: maybe_phy_table_ref.to_quoted_string(), })? .data_type - == ConcreteDataType::timestamp_millisecond_datatype(); + .as_timestamp() + .with_context(|| TimeIndexNotFoundSnafu { + table: maybe_phy_table_ref.to_quoted_string(), + })? + .unit(); + let is_time_index_ms = time_index_unit == TimeUnit::Millisecond; let scan_projection = if table_id_filter.is_some() { let mut required_columns = HashSet::new(); @@ -1816,6 +1871,17 @@ impl PromPlanner { .context(DataFusionPlanningSnafu)?; } + if !is_time_index_ms + && let Some(time_index_filter) = + self.build_time_index_filter(offset_duration, time_index_unit)? + { + scan_plan = LogicalPlanBuilder::from(scan_plan) + .filter(time_index_filter) + .context(DataFusionPlanningSnafu)? + .build() + .context(DataFusionPlanningSnafu)?; + } + if !is_time_index_ms { // cast to ms if time_index not in Millisecond precision let expr: Vec<_> = self @@ -1882,7 +1948,7 @@ impl PromPlanner { let result = LogicalPlanBuilder::from(scan_plan) .build() .context(DataFusionPlanningSnafu)?; - Ok(result) + Ok((result, !is_time_index_ms)) } fn collect_row_key_tag_columns_from_plan( @@ -6085,9 +6151,10 @@ mod test { "PromInstantManipulate: range=[0..100000000], lookback=[1000], interval=[5000], time index=[timestamp] [field:Float64;N, tag:Utf8, timestamp:Timestamp(ms)]\ \n PromSeriesDivide: tags=[\"tag\"] [field:Float64;N, tag:Utf8, timestamp:Timestamp(ms)]\ \n Sort: metrics.tag ASC NULLS FIRST, metrics.timestamp ASC NULLS FIRST [field:Float64;N, tag:Utf8, timestamp:Timestamp(ms)]\ - \n Filter: metrics.tag = Utf8(\"1\") AND metrics.timestamp >= TimestampMillisecond(-999, None) AND metrics.timestamp <= TimestampMillisecond(100000000, None) [field:Float64;N, tag:Utf8, timestamp:Timestamp(ms)]\ + \n Filter: metrics.tag = Utf8(\"1\") [field:Float64;N, tag:Utf8, timestamp:Timestamp(ms)]\ \n Projection: metrics.field, metrics.tag, CAST(metrics.timestamp AS Timestamp(ms)) AS timestamp [field:Float64;N, tag:Utf8, timestamp:Timestamp(ms)]\ - \n TableScan: metrics [tag:Utf8, timestamp:Timestamp(ns), field:Float64;N]" + \n Filter: metrics.timestamp >= TimestampNanosecond(-999000000, None) AND metrics.timestamp < TimestampNanosecond(100000001000000, None) [tag:Utf8, timestamp:Timestamp(ns), field:Float64;N]\ + \n TableScan: metrics [tag:Utf8, timestamp:Timestamp(ns), field:Float64;N]" ); let plan = PromPlanner::stmt_to_plan( DfTableSourceProvider::new( @@ -6118,9 +6185,10 @@ mod test { \n PromSeriesNormalize: offset=[0], time index=[timestamp], filter NaN: [true] [field:Float64;N, tag:Utf8, timestamp:Timestamp(ms)]\ \n PromSeriesDivide: tags=[\"tag\"] [field:Float64;N, tag:Utf8, timestamp:Timestamp(ms)]\ \n Sort: metrics.tag ASC NULLS FIRST, metrics.timestamp ASC NULLS FIRST [field:Float64;N, tag:Utf8, timestamp:Timestamp(ms)]\ - \n Filter: metrics.tag = Utf8(\"1\") AND metrics.timestamp >= TimestampMillisecond(-4999, None) AND metrics.timestamp <= TimestampMillisecond(100000000, None) [field:Float64;N, tag:Utf8, timestamp:Timestamp(ms)]\ + \n Filter: metrics.tag = Utf8(\"1\") [field:Float64;N, tag:Utf8, timestamp:Timestamp(ms)]\ \n Projection: metrics.field, metrics.tag, CAST(metrics.timestamp AS Timestamp(ms)) AS timestamp [field:Float64;N, tag:Utf8, timestamp:Timestamp(ms)]\ - \n TableScan: metrics [tag:Utf8, timestamp:Timestamp(ns), field:Float64;N]" + \n Filter: metrics.timestamp >= TimestampNanosecond(-4999000000, None) AND metrics.timestamp < TimestampNanosecond(100000001000000, None) [tag:Utf8, timestamp:Timestamp(ns), field:Float64;N]\ + \n TableScan: metrics [tag:Utf8, timestamp:Timestamp(ns), field:Float64;N]" ); } diff --git a/tests/cases/standalone/common/function/matches_term.result b/tests/cases/standalone/common/function/matches_term.result index 38b9f30723..ef1e94fa8b 100644 --- a/tests/cases/standalone/common/function/matches_term.result +++ b/tests/cases/standalone/common/function/matches_term.result @@ -157,6 +157,71 @@ SELECT matches_term('русский!', 'русский') as result; | true | +--------+ +-- Phase 1 mixed Chinese and numeric behavior +SELECT matches_term('登录手机号18888888888的动态key', '手机号') as result; + ++--------+ +| result | ++--------+ +| true | ++--------+ + +SELECT matches_term('登录手机号18888888888的动态key', '18888888888') as result; + ++--------+ +| result | ++--------+ +| true | ++--------+ + +SELECT matches_term('登录手机号18888888888的动态key', '手机') as result; + ++--------+ +| result | ++--------+ +| true | ++--------+ + +SELECT matches_term('登录手机号18888888888的动态key', '机号') as result; + ++--------+ +| result | ++--------+ +| true | ++--------+ + +SELECT matches_term('登录手机号18888888888的动态key', '机号1888') as result; + ++--------+ +| result | ++--------+ +| true | ++--------+ + +SELECT matches_term('中国农业银行', '农业') as result; + ++--------+ +| result | ++--------+ +| true | ++--------+ + +SELECT matches_term('中国农业银行账号', '行账号') as result; + ++--------+ +| result | ++--------+ +| true | ++--------+ + +SELECT matches_term('错误error日志', 'error') as result; + ++--------+ +| result | ++--------+ +| true | ++--------+ + -- Test complete word matching CREATE TABLE logs ( `id` TIMESTAMP TIME INDEX, diff --git a/tests/cases/standalone/common/function/matches_term.sql b/tests/cases/standalone/common/function/matches_term.sql index e91b9fdf5f..e01b1451c0 100644 --- a/tests/cases/standalone/common/function/matches_term.sql +++ b/tests/cases/standalone/common/function/matches_term.sql @@ -47,6 +47,16 @@ SELECT matches_term('café>', 'café') as result; -- Expect: true SELECT matches_term('русский!', 'русский') as result; +-- Phase 1 mixed Chinese and numeric behavior +SELECT matches_term('登录手机号18888888888的动态key', '手机号') as result; +SELECT matches_term('登录手机号18888888888的动态key', '18888888888') as result; +SELECT matches_term('登录手机号18888888888的动态key', '手机') as result; +SELECT matches_term('登录手机号18888888888的动态key', '机号') as result; +SELECT matches_term('登录手机号18888888888的动态key', '机号1888') as result; +SELECT matches_term('中国农业银行', '农业') as result; +SELECT matches_term('中国农业银行账号', '行账号') as result; +SELECT matches_term('错误error日志', 'error') as result; + -- Test complete word matching CREATE TABLE logs ( `id` TIMESTAMP TIME INDEX,