fix: match term zh

Signed-off-by: discord9 <discord9@163.com>
2026-05-20 23:10:37 +00:00 · 2026-04-10 15:45:48 +08:00
parent 76cad696c6
commit 637e7eda5c
5 changed files with 355 additions and 83 deletions
--- a/src/common/function/src/scalars/matches_term.rs
+++ b/src/common/function/src/scalars/matches_term.rs
@@ -27,10 +27,11 @@ use crate::function_registry::FunctionRegistry;

 /// Exact term/phrase matching function for text columns.
 ///
-/// This function checks if a text column contains exact term/phrase matches
-/// with non-alphanumeric boundaries. Designed for:
-/// - Whole-word matching (e.g. "cat" in "cat!" but not in "category")
+/// This function uses script-aware matching rules:
+/// - ASCII-only terms keep whole-word style boundary matching, like Whole-word matching (e.g. "cat" in "cat!" but not in "category")
 /// - Phrase matching (e.g. "hello world" in "note:hello world!")
+/// - Terms containing Han characters match as contiguous substrings
+/// - Mixed-script identifiers and numeric terms remain searchable in Chinese text
 ///
 /// # Signature
 /// `matches_term(text: String, term: String) -> Boolean`
@@ -43,9 +44,8 @@ use crate::function_registry::FunctionRegistry;
 /// BooleanVector where each element indicates if the corresponding text
 /// contains an exact match of the term, following these rules:
 /// 1. Exact substring match found (case-sensitive)
-/// 2. Match boundaries are either:
-///    - Start/end of text
-///    - Any non-alphanumeric character (including spaces, hyphens, punctuation, etc.)
+/// 2. For ASCII-only terms, adjacent ASCII word characters block the match
+/// 3. For Han-containing terms, contiguous substring match is sufficient
 ///
 /// # Examples
 /// ```
@@ -60,6 +60,9 @@ use crate::function_registry::FunctionRegistry;
 /// SELECT matches_term(column, 'critical error') FROM logs;
 /// -- Match in: "ERROR:critical error!"
 /// -- No match: "critical_errors"
+/// -- Chinese substring examples --
+/// SELECT matches_term(column, '手机') FROM table;
+/// -- Text: "登录手机号18888888888的动态key" => true
 ///
 /// -- Empty string handling --
 /// SELECT matches_term(column, '') FROM table;
@@ -204,9 +207,8 @@ impl Function for MatchesTermFunction {
 ///
 /// A term is considered matched when:
 /// 1. The exact sequence appears in the text
-/// 2. It is either:
-///    - At the start/end of text with adjacent non-alphanumeric character
-///    - Surrounded by non-alphanumeric characters
+/// 2. ASCII-only terms are not adjacent to ASCII word characters
+/// 3. Han-containing terms match as contiguous substrings
 ///
 /// # Examples
 /// ```
@@ -215,28 +217,113 @@ impl Function for MatchesTermFunction {
 /// assert!(finder.find("dog,cat"));   // Term preceded by comma
 /// assert!(!finder.find("category")); // Partial match rejected
 ///
-/// let finder = MatchesTermFinder::new("world");
-/// assert!(finder.find("hello-world")); // Hyphen boundary
+/// let finder = MatchesTermFinder::new("手机");
+/// assert!(finder.find("登录手机号18888888888的动态key"));
 /// ```
 #[derive(Clone, Debug)]
 pub struct MatchesTermFinder {
    finder: memmem::Finder<'static>,
    term: String,
-    starts_with_non_alnum: bool,
-    ends_with_non_alnum: bool,
+    term_kind: TermKind,
+    starts_with_other: bool,
+    ends_with_other: bool,
+}
+
+#[derive(Clone, Copy, Debug, PartialEq, Eq)]
+enum CharClass {
+    AsciiWord,
+    Han,
+    UnicodeWord,
+    Other,
+}
+
+#[derive(Clone, Copy, Debug, PartialEq, Eq)]
+enum TermKind {
+    AsciiLike,
+    UnicodeWord,
+    HanContaining,
+}
+
+fn classify_char(c: char) -> CharClass {
+    if c.is_ascii_alphanumeric() {
+        CharClass::AsciiWord
+    } else if is_han(c) {
+        CharClass::Han
+    } else if c.is_alphanumeric() {
+        CharClass::UnicodeWord
+    } else {
+        CharClass::Other
+    }
+}
+
+fn is_han(c: char) -> bool {
+    matches!(
+        c as u32,
+        0x3400..=0x4DBF
+            | 0x4E00..=0x9FFF
+            | 0xF900..=0xFAFF
+            | 0x20000..=0x2A6DF
+            | 0x2A700..=0x2B73F
+            | 0x2B740..=0x2B81F
+            | 0x2B820..=0x2CEAF
+            | 0x2CEB0..=0x2EBEF
+            | 0x30000..=0x3134F
+    )
+}
+
+fn classify_term(term: &str) -> TermKind {
+    let mut has_han = false;
+    let mut has_unicode_word = false;
+    for c in term.chars() {
+        match classify_char(c) {
+            CharClass::AsciiWord => {}
+            CharClass::Han => has_han = true,
+            CharClass::UnicodeWord => has_unicode_word = true,
+            CharClass::Other => {}
+        }
+    }
+
+    if has_han {
+        TermKind::HanContaining
+    } else if has_unicode_word {
+        TermKind::UnicodeWord
+    } else {
+        TermKind::AsciiLike
+    }
+}
+
+fn boundary_ok(term_kind: TermKind, neighbor: Option<char>, term_has_other_boundary: bool) -> bool {
+    if term_has_other_boundary {
+        return true;
+    }
+
+    match term_kind {
+        TermKind::AsciiLike => !matches!(neighbor.map(classify_char), Some(CharClass::AsciiWord)),
+        TermKind::UnicodeWord => !matches!(
+            neighbor.map(classify_char),
+            Some(CharClass::AsciiWord | CharClass::UnicodeWord | CharClass::Han)
+        ),
+        TermKind::HanContaining => true,
+    }
 }

 impl MatchesTermFinder {
    /// Create a new `MatchesTermFinder` for the given term.
    pub fn new(term: &str) -> Self {
-        let starts_with_non_alnum = term.chars().next().is_some_and(|c| !c.is_alphanumeric());
-        let ends_with_non_alnum = term.chars().last().is_some_and(|c| !c.is_alphanumeric());
-
+        let starts_with_other = term
+            .chars()
+            .next()
+            .is_some_and(|c| classify_char(c) == CharClass::Other);
+        let ends_with_other = term
+            .chars()
+            .last()
+            .is_some_and(|c| classify_char(c) == CharClass::Other);
        Self {
            finder: memmem::Finder::new(term).into_owned(),
            term: term.to_string(),
-            starts_with_non_alnum,
-            ends_with_non_alnum,
+            term_kind: classify_term(term),
+            starts_with_other,
+            ends_with_other,
        }
    }

@@ -254,23 +341,20 @@ impl MatchesTermFinder {
        while let Some(found_pos) = self.finder.find(&text.as_bytes()[pos..]) {
            let actual_pos = pos + found_pos;

-            let prev_ok = self.starts_with_non_alnum
-                || text[..actual_pos]
-                    .chars()
-                    .last()
-                    .map(|c| !c.is_alphanumeric())
-                    .unwrap_or(true);
+            let prev = text[..actual_pos].chars().last();
+            let prev_ok = self.starts_with_other || boundary_ok(self.term_kind, prev, false);

            if prev_ok {
                let next_pos = actual_pos + self.finder.needle().len();
-                let next_ok = self.ends_with_non_alnum
-                    || text[next_pos..]
-                        .chars()
-                        .next()
-                        .map(|c| !c.is_alphanumeric())
-                        .unwrap_or(true);
+                let next = text[next_pos..].chars().next();
+                let next_ok = self.ends_with_other || boundary_ok(self.term_kind, next, false);

-                if next_ok {
+                let match_ok = match self.term_kind {
+                    TermKind::HanContaining => true,
+                    _ => prev_ok && next_ok,
+                };
+
+                if match_ok {
                    return true;
                }
            }
@@ -369,6 +453,25 @@ mod tests {
        assert!(!MatchesTermFinder::new("v1.0").find("v1.0a"));
    }

+    #[test]
+    fn mixed_script_terms_match_inside_chinese_context() {
+        let text = "登录手机号18888888888的动态key";
+        assert!(MatchesTermFinder::new("手机号").find(text));
+        assert!(MatchesTermFinder::new("18888888888").find(text));
+        assert!(MatchesTermFinder::new("手机").find(text));
+        assert!(MatchesTermFinder::new("机号").find(text));
+        assert!(MatchesTermFinder::new("机号1888").find(text));
+        assert!(MatchesTermFinder::new("农业").find("中国农业银行"));
+        assert!(MatchesTermFinder::new("error").find("错误error日志"));
+    }
+
+    #[test]
+    fn underscore_still_counts_as_boundary_for_ascii_terms() {
+        assert!(MatchesTermFinder::new("world").find("hello_world"));
+        assert!(MatchesTermFinder::new("id").find("trace_id=abc"));
+        assert!(!MatchesTermFinder::new("error").find("criticalerrors"));
+    }
+
    #[test]
    fn adjacent_alphanumeric_fails() {
        assert!(!MatchesTermFinder::new("cat").find("cat5"));
@@ -406,4 +509,10 @@ mod tests {
        assert!(MatchesTermFinder::new("中文").find("这是中文测试，中文！"));
        assert!(MatchesTermFinder::new("error").find("错误errorerror日志_error!"));
    }
+
+    #[test]
+    fn han_terms_match_as_contiguous_substrings() {
+        assert!(MatchesTermFinder::new("行账号").find("中国农业银行账号"));
+        assert!(MatchesTermFinder::new("登录").find("登录手机号18888888888的动态key"));
+    }
 }
--- a/src/index/src/fulltext_index/tokenizer.rs
+++ b/src/index/src/fulltext_index/tokenizer.rs
@@ -167,6 +167,26 @@ mod tests {
        assert_eq!(tokens, vec!["我", "喜欢", "苹果"]);
    }

+    #[test]
+    fn test_chinese_tokenizer_issue_7943_sample() {
+        let tokenizer = ChineseTokenizer;
+        let text = "登录手机号18888888888的动态key：829889AC8";
+        let tokens = tokenizer.tokenize(text);
+        assert_eq!(
+            tokens,
+            vec![
+                "登录",
+                "手机号",
+                "18888888888",
+                "的",
+                "动态",
+                "key",
+                "：",
+                "829889AC8"
+            ]
+        );
+    }
+
    #[test]
    fn test_valid_ascii_token_lookup_table() {
        // Test all ASCII values in a single loop
--- a/src/query/src/promql/planner.rs
+++ b/src/query/src/promql/planner.rs
@@ -23,6 +23,8 @@ use common_error::ext::ErrorExt;
 use common_error::status_code::StatusCode;
 use common_function::function::FunctionContext;
 use common_query::prelude::greptime_value;
+use common_time::Timestamp;
+use common_time::timestamp::TimeUnit;
 use datafusion::common::DFSchemaRef;
 use datafusion::datasource::DefaultTableSource;
 use datafusion::functions_aggregate::average::avg_udaf;
@@ -91,9 +93,9 @@ use crate::promql::error::{
    InvalidRegularExpressionSnafu, InvalidTimeRangeSnafu, MultiFieldsNotSupportedSnafu,
    MultipleMetricMatchersSnafu, MultipleVectorSnafu, NoMetricMatcherSnafu, PromqlPlanNodeSnafu,
    Result, SameLabelSetSnafu, TableNameNotFoundSnafu, TimeIndexNotFoundSnafu,
-    UnexpectedPlanExprSnafu, UnexpectedTokenSnafu, UnknownTableSnafu, UnsupportedExprSnafu,
-    UnsupportedMatcherOpSnafu, UnsupportedVectorMatchSnafu, ValueNotFoundSnafu,
-    ZeroRangeSelectorSnafu,
+    TimestampOutOfRangeSnafu, UnexpectedPlanExprSnafu, UnexpectedTokenSnafu, UnknownTableSnafu,
+    UnsupportedExprSnafu, UnsupportedMatcherOpSnafu, UnsupportedVectorMatchSnafu,
+    ValueNotFoundSnafu, ZeroRangeSelectorSnafu,
 };
 use crate::query_engine::QueryEngineState;

@@ -1221,26 +1223,34 @@ impl PromPlanner {
        label_matchers: Matchers,
        is_range_selector: bool,
    ) -> Result<LogicalPlan> {
-        // make table scan plan
-        let table_ref = self.table_ref()?;
-        let mut table_scan = self.create_table_scan_plan(table_ref.clone()).await?;
-        let table_schema = table_scan.schema();
-
-        // make filter exprs
        let offset_duration = match offset {
            Some(Offset::Pos(duration)) => duration.as_millis() as Millisecond,
            Some(Offset::Neg(duration)) => -(duration.as_millis() as Millisecond),
            None => 0,
        };
+
+        // make table scan plan
+        let table_ref = self.table_ref()?;
+        let (mut table_scan, time_filter_pushed_down) = self
+            .create_table_scan_plan(table_ref.clone(), offset_duration)
+            .await?;
+        let table_schema = table_scan.schema();
+
+        // make filter exprs
        let mut scan_filters = Self::matchers_to_expr(label_matchers.clone(), table_schema)?;
-        if let Some(time_index_filter) = self.build_time_index_filter(offset_duration)? {
+        if !time_filter_pushed_down
+            && let Some(time_index_filter) =
+                self.build_time_index_filter(offset_duration, TimeUnit::Millisecond)?
+        {
            scan_filters.push(time_index_filter);
        }
-        table_scan = LogicalPlanBuilder::from(table_scan)
-            .filter(conjunction(scan_filters).unwrap()) // Safety: `scan_filters` is not empty.
-            .context(DataFusionPlanningSnafu)?
-            .build()
-            .context(DataFusionPlanningSnafu)?;
+        if let Some(scan_filter) = conjunction(scan_filters) {
+            table_scan = LogicalPlanBuilder::from(table_scan)
+                .filter(scan_filter)
+                .context(DataFusionPlanningSnafu)?
+                .build()
+                .context(DataFusionPlanningSnafu)?;
+        }

        // make a projection plan if there is any `__field__` matcher
        if let Some(field_matchers) = &self.ctx.field_column_matcher {
@@ -1590,7 +1600,11 @@ impl PromPlanner {
        Ok(table_ref)
    }

-    fn build_time_index_filter(&self, offset_duration: i64) -> Result<Option<DfExpr>> {
+    fn build_time_index_filter(
+        &self,
+        offset_duration: i64,
+        time_index_unit: TimeUnit,
+    ) -> Result<Option<DfExpr>> {
        let start = self.ctx.start;
        let end = self.ctx.end;
        if end < start {
@@ -1614,56 +1628,92 @@ impl PromPlanner {

        // Scan a continuous time range
        if (end - start) / interval > MAX_SCATTER_POINTS || interval <= INTERVAL_1H {
+            let lower_bound = self.build_scan_time_filter_literal(
+                start - offset_duration - selector_window + lower_exclusive_adjustment,
+                time_index_unit,
+            )?;
+            let upper_bound = self.build_scan_time_filter_literal(
+                self.ctx
+                    .end
+                    .checked_sub(offset_duration)
+                    .and_then(|ts| ts.checked_add(1))
+                    .with_context(|| TimestampOutOfRangeSnafu {
+                        timestamp: self.ctx.end - offset_duration,
+                        unit: TimeUnit::Millisecond,
+                    })?,
+                time_index_unit,
+            )?;
            let single_time_range = time_index_expr
                .clone()
-                .gt_eq(DfExpr::Literal(
-                    ScalarValue::TimestampMillisecond(
-                        Some(
-                            self.ctx.start - offset_duration - selector_window
-                                + lower_exclusive_adjustment,
-                        ),
-                        None,
-                    ),
-                    None,
-                ))
-                .and(time_index_expr.lt_eq(DfExpr::Literal(
-                    ScalarValue::TimestampMillisecond(Some(self.ctx.end - offset_duration), None),
-                    None,
-                )));
+                .gt_eq(lower_bound)
+                .and(time_index_expr.lt(upper_bound));
            return Ok(Some(single_time_range));
        }

        // Otherwise scan scatter ranges separately
        let mut filters = Vec::with_capacity(num_points as usize + 1);
        for timestamp in (start..=end).step_by(interval as usize) {
+            let lower_bound = self.build_scan_time_filter_literal(
+                timestamp - offset_duration - selector_window + lower_exclusive_adjustment,
+                time_index_unit,
+            )?;
+            let upper_bound = self.build_scan_time_filter_literal(
+                timestamp
+                    .checked_sub(offset_duration)
+                    .and_then(|ts| ts.checked_add(1))
+                    .with_context(|| TimestampOutOfRangeSnafu {
+                        timestamp: timestamp - offset_duration,
+                        unit: TimeUnit::Millisecond,
+                    })?,
+                time_index_unit,
+            )?;
            filters.push(
                time_index_expr
                    .clone()
-                    .gt_eq(DfExpr::Literal(
-                        ScalarValue::TimestampMillisecond(
-                            Some(
-                                timestamp - offset_duration - selector_window
-                                    + lower_exclusive_adjustment,
-                            ),
-                            None,
-                        ),
-                        None,
-                    ))
-                    .and(time_index_expr.clone().lt_eq(DfExpr::Literal(
-                        ScalarValue::TimestampMillisecond(Some(timestamp - offset_duration), None),
-                        None,
-                    ))),
+                    .gt_eq(lower_bound)
+                    .and(time_index_expr.clone().lt(upper_bound)),
            )
        }

        Ok(filters.into_iter().reduce(DfExpr::or))
    }

+    fn build_scan_time_filter_literal(
+        &self,
+        timestamp_ms: i64,
+        time_index_unit: TimeUnit,
+    ) -> Result<DfExpr> {
+        let timestamp = Timestamp::new(timestamp_ms, TimeUnit::Millisecond)
+            .convert_to_ceil(time_index_unit)
+            .with_context(|| TimestampOutOfRangeSnafu {
+                timestamp: timestamp_ms,
+                unit: time_index_unit,
+            })?;
+        Ok(DfExpr::Literal(
+            Self::timestamp_to_scalar_value(timestamp),
+            None,
+        ))
+    }
+
+    fn timestamp_to_scalar_value(timestamp: Timestamp) -> ScalarValue {
+        let value = timestamp.value();
+        match timestamp.unit() {
+            TimeUnit::Second => ScalarValue::TimestampSecond(Some(value), None),
+            TimeUnit::Millisecond => ScalarValue::TimestampMillisecond(Some(value), None),
+            TimeUnit::Microsecond => ScalarValue::TimestampMicrosecond(Some(value), None),
+            TimeUnit::Nanosecond => ScalarValue::TimestampNanosecond(Some(value), None),
+        }
+    }
+
    /// Create a table scan plan and a filter plan with given filter.
    ///
    /// # Panic
    /// If the filter is empty
-    async fn create_table_scan_plan(&mut self, table_ref: TableReference) -> Result<LogicalPlan> {
+    async fn create_table_scan_plan(
+        &mut self,
+        table_ref: TableReference,
+        offset_duration: i64,
+    ) -> Result<(LogicalPlan, bool)> {
        let provider = self
            .table_provider
            .resolve_table(table_ref.clone())
@@ -1756,14 +1806,19 @@ impl PromPlanner {
            self.ctx.tag_columns.clone()
        };

-        let is_time_index_ms = scan_table
+        let time_index_unit = scan_table
            .schema()
            .timestamp_column()
            .with_context(|| TimeIndexNotFoundSnafu {
                table: maybe_phy_table_ref.to_quoted_string(),
            })?
            .data_type
-            == ConcreteDataType::timestamp_millisecond_datatype();
+            .as_timestamp()
+            .with_context(|| TimeIndexNotFoundSnafu {
+                table: maybe_phy_table_ref.to_quoted_string(),
+            })?
+            .unit();
+        let is_time_index_ms = time_index_unit == TimeUnit::Millisecond;

        let scan_projection = if table_id_filter.is_some() {
            let mut required_columns = HashSet::new();
@@ -1816,6 +1871,17 @@ impl PromPlanner {
                .context(DataFusionPlanningSnafu)?;
        }

+        if !is_time_index_ms
+            && let Some(time_index_filter) =
+                self.build_time_index_filter(offset_duration, time_index_unit)?
+        {
+            scan_plan = LogicalPlanBuilder::from(scan_plan)
+                .filter(time_index_filter)
+                .context(DataFusionPlanningSnafu)?
+                .build()
+                .context(DataFusionPlanningSnafu)?;
+        }
+
        if !is_time_index_ms {
            // cast to ms if time_index not in Millisecond precision
            let expr: Vec<_> = self
@@ -1882,7 +1948,7 @@ impl PromPlanner {
        let result = LogicalPlanBuilder::from(scan_plan)
            .build()
            .context(DataFusionPlanningSnafu)?;
-        Ok(result)
+        Ok((result, !is_time_index_ms))
    }

    fn collect_row_key_tag_columns_from_plan(
@@ -6085,9 +6151,10 @@ mod test {
            "PromInstantManipulate: range=[0..100000000], lookback=[1000], interval=[5000], time index=[timestamp] [field:Float64;N, tag:Utf8, timestamp:Timestamp(ms)]\
            \n  PromSeriesDivide: tags=[\"tag\"] [field:Float64;N, tag:Utf8, timestamp:Timestamp(ms)]\
            \n    Sort: metrics.tag ASC NULLS FIRST, metrics.timestamp ASC NULLS FIRST [field:Float64;N, tag:Utf8, timestamp:Timestamp(ms)]\
-            \n      Filter: metrics.tag = Utf8(\"1\") AND metrics.timestamp >= TimestampMillisecond(-999, None) AND metrics.timestamp <= TimestampMillisecond(100000000, None) [field:Float64;N, tag:Utf8, timestamp:Timestamp(ms)]\
+            \n      Filter: metrics.tag = Utf8(\"1\") [field:Float64;N, tag:Utf8, timestamp:Timestamp(ms)]\
            \n        Projection: metrics.field, metrics.tag, CAST(metrics.timestamp AS Timestamp(ms)) AS timestamp [field:Float64;N, tag:Utf8, timestamp:Timestamp(ms)]\
-            \n          TableScan: metrics [tag:Utf8, timestamp:Timestamp(ns), field:Float64;N]"
+            \n          Filter: metrics.timestamp >= TimestampNanosecond(-999000000, None) AND metrics.timestamp < TimestampNanosecond(100000001000000, None) [tag:Utf8, timestamp:Timestamp(ns), field:Float64;N]\
+            \n            TableScan: metrics [tag:Utf8, timestamp:Timestamp(ns), field:Float64;N]"
        );
        let plan = PromPlanner::stmt_to_plan(
            DfTableSourceProvider::new(
@@ -6118,9 +6185,10 @@ mod test {
            \n      PromSeriesNormalize: offset=[0], time index=[timestamp], filter NaN: [true] [field:Float64;N, tag:Utf8, timestamp:Timestamp(ms)]\
            \n        PromSeriesDivide: tags=[\"tag\"] [field:Float64;N, tag:Utf8, timestamp:Timestamp(ms)]\
            \n          Sort: metrics.tag ASC NULLS FIRST, metrics.timestamp ASC NULLS FIRST [field:Float64;N, tag:Utf8, timestamp:Timestamp(ms)]\
-            \n            Filter: metrics.tag = Utf8(\"1\") AND metrics.timestamp >= TimestampMillisecond(-4999, None) AND metrics.timestamp <= TimestampMillisecond(100000000, None) [field:Float64;N, tag:Utf8, timestamp:Timestamp(ms)]\
+            \n            Filter: metrics.tag = Utf8(\"1\") [field:Float64;N, tag:Utf8, timestamp:Timestamp(ms)]\
            \n              Projection: metrics.field, metrics.tag, CAST(metrics.timestamp AS Timestamp(ms)) AS timestamp [field:Float64;N, tag:Utf8, timestamp:Timestamp(ms)]\
-            \n                TableScan: metrics [tag:Utf8, timestamp:Timestamp(ns), field:Float64;N]"
+            \n                Filter: metrics.timestamp >= TimestampNanosecond(-4999000000, None) AND metrics.timestamp < TimestampNanosecond(100000001000000, None) [tag:Utf8, timestamp:Timestamp(ns), field:Float64;N]\
+            \n                  TableScan: metrics [tag:Utf8, timestamp:Timestamp(ns), field:Float64;N]"
        );
    }

--- a/tests/cases/standalone/common/function/matches_term.result
+++ b/tests/cases/standalone/common/function/matches_term.result
@@ -157,6 +157,71 @@ SELECT matches_term('русский!', 'русский') as result;
 | true   |
 +--------+

+-- Phase 1 mixed Chinese and numeric behavior
+SELECT matches_term('登录手机号18888888888的动态key', '手机号') as result;
+
+--------+
+| result |
+--------+
+| true   |
+--------+
+
+SELECT matches_term('登录手机号18888888888的动态key', '18888888888') as result;
+
+--------+
+| result |
+--------+
+| true   |
+--------+
+
+SELECT matches_term('登录手机号18888888888的动态key', '手机') as result;
+
+--------+
+| result |
+--------+
+| true   |
+--------+
+
+SELECT matches_term('登录手机号18888888888的动态key', '机号') as result;
+
+--------+
+| result |
+--------+
+| true   |
+--------+
+
+SELECT matches_term('登录手机号18888888888的动态key', '机号1888') as result;
+
+--------+
+| result |
+--------+
+| true   |
+--------+
+
+SELECT matches_term('中国农业银行', '农业') as result;
+
+--------+
+| result |
+--------+
+| true   |
+--------+
+
+SELECT matches_term('中国农业银行账号', '行账号') as result;
+
+--------+
+| result |
+--------+
+| true   |
+--------+
+
+SELECT matches_term('错误error日志', 'error') as result;
+
+--------+
+| result |
+--------+
+| true   |
+--------+
+
 -- Test complete word matching
 CREATE TABLE logs (
    `id` TIMESTAMP TIME INDEX,
--- a/tests/cases/standalone/common/function/matches_term.sql
+++ b/tests/cases/standalone/common/function/matches_term.sql
@@ -47,6 +47,16 @@ SELECT matches_term('café>', 'café') as result;
 -- Expect: true
 SELECT matches_term('русский!', 'русский') as result;

+-- Phase 1 mixed Chinese and numeric behavior
+SELECT matches_term('登录手机号18888888888的动态key', '手机号') as result;
+SELECT matches_term('登录手机号18888888888的动态key', '18888888888') as result;
+SELECT matches_term('登录手机号18888888888的动态key', '手机') as result;
+SELECT matches_term('登录手机号18888888888的动态key', '机号') as result;
+SELECT matches_term('登录手机号18888888888的动态key', '机号1888') as result;
+SELECT matches_term('中国农业银行', '农业') as result;
+SELECT matches_term('中国农业银行账号', '行账号') as result;
+SELECT matches_term('错误error日志', 'error') as result;
+
 -- Test complete word matching
 CREATE TABLE logs (
    `id` TIMESTAMP TIME INDEX,