mirror of
https://github.com/GreptimeTeam/greptimedb.git
synced 2026-05-20 23:10:37 +00:00
@@ -27,10 +27,11 @@ use crate::function_registry::FunctionRegistry;
|
||||
|
||||
/// Exact term/phrase matching function for text columns.
|
||||
///
|
||||
/// This function checks if a text column contains exact term/phrase matches
|
||||
/// with non-alphanumeric boundaries. Designed for:
|
||||
/// - Whole-word matching (e.g. "cat" in "cat!" but not in "category")
|
||||
/// This function uses script-aware matching rules:
|
||||
/// - ASCII-only terms keep whole-word style boundary matching, like Whole-word matching (e.g. "cat" in "cat!" but not in "category")
|
||||
/// - Phrase matching (e.g. "hello world" in "note:hello world!")
|
||||
/// - Terms containing Han characters match as contiguous substrings
|
||||
/// - Mixed-script identifiers and numeric terms remain searchable in Chinese text
|
||||
///
|
||||
/// # Signature
|
||||
/// `matches_term(text: String, term: String) -> Boolean`
|
||||
@@ -43,9 +44,8 @@ use crate::function_registry::FunctionRegistry;
|
||||
/// BooleanVector where each element indicates if the corresponding text
|
||||
/// contains an exact match of the term, following these rules:
|
||||
/// 1. Exact substring match found (case-sensitive)
|
||||
/// 2. Match boundaries are either:
|
||||
/// - Start/end of text
|
||||
/// - Any non-alphanumeric character (including spaces, hyphens, punctuation, etc.)
|
||||
/// 2. For ASCII-only terms, adjacent ASCII word characters block the match
|
||||
/// 3. For Han-containing terms, contiguous substring match is sufficient
|
||||
///
|
||||
/// # Examples
|
||||
/// ```
|
||||
@@ -60,6 +60,9 @@ use crate::function_registry::FunctionRegistry;
|
||||
/// SELECT matches_term(column, 'critical error') FROM logs;
|
||||
/// -- Match in: "ERROR:critical error!"
|
||||
/// -- No match: "critical_errors"
|
||||
/// -- Chinese substring examples --
|
||||
/// SELECT matches_term(column, '手机') FROM table;
|
||||
/// -- Text: "登录手机号18888888888的动态key" => true
|
||||
///
|
||||
/// -- Empty string handling --
|
||||
/// SELECT matches_term(column, '') FROM table;
|
||||
@@ -204,9 +207,8 @@ impl Function for MatchesTermFunction {
|
||||
///
|
||||
/// A term is considered matched when:
|
||||
/// 1. The exact sequence appears in the text
|
||||
/// 2. It is either:
|
||||
/// - At the start/end of text with adjacent non-alphanumeric character
|
||||
/// - Surrounded by non-alphanumeric characters
|
||||
/// 2. ASCII-only terms are not adjacent to ASCII word characters
|
||||
/// 3. Han-containing terms match as contiguous substrings
|
||||
///
|
||||
/// # Examples
|
||||
/// ```
|
||||
@@ -215,28 +217,113 @@ impl Function for MatchesTermFunction {
|
||||
/// assert!(finder.find("dog,cat")); // Term preceded by comma
|
||||
/// assert!(!finder.find("category")); // Partial match rejected
|
||||
///
|
||||
/// let finder = MatchesTermFinder::new("world");
|
||||
/// assert!(finder.find("hello-world")); // Hyphen boundary
|
||||
/// let finder = MatchesTermFinder::new("手机");
|
||||
/// assert!(finder.find("登录手机号18888888888的动态key"));
|
||||
/// ```
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct MatchesTermFinder {
|
||||
finder: memmem::Finder<'static>,
|
||||
term: String,
|
||||
starts_with_non_alnum: bool,
|
||||
ends_with_non_alnum: bool,
|
||||
term_kind: TermKind,
|
||||
starts_with_other: bool,
|
||||
ends_with_other: bool,
|
||||
}
|
||||
|
||||
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
|
||||
enum CharClass {
|
||||
AsciiWord,
|
||||
Han,
|
||||
UnicodeWord,
|
||||
Other,
|
||||
}
|
||||
|
||||
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
|
||||
enum TermKind {
|
||||
AsciiLike,
|
||||
UnicodeWord,
|
||||
HanContaining,
|
||||
}
|
||||
|
||||
fn classify_char(c: char) -> CharClass {
|
||||
if c.is_ascii_alphanumeric() {
|
||||
CharClass::AsciiWord
|
||||
} else if is_han(c) {
|
||||
CharClass::Han
|
||||
} else if c.is_alphanumeric() {
|
||||
CharClass::UnicodeWord
|
||||
} else {
|
||||
CharClass::Other
|
||||
}
|
||||
}
|
||||
|
||||
fn is_han(c: char) -> bool {
|
||||
matches!(
|
||||
c as u32,
|
||||
0x3400..=0x4DBF
|
||||
| 0x4E00..=0x9FFF
|
||||
| 0xF900..=0xFAFF
|
||||
| 0x20000..=0x2A6DF
|
||||
| 0x2A700..=0x2B73F
|
||||
| 0x2B740..=0x2B81F
|
||||
| 0x2B820..=0x2CEAF
|
||||
| 0x2CEB0..=0x2EBEF
|
||||
| 0x30000..=0x3134F
|
||||
)
|
||||
}
|
||||
|
||||
fn classify_term(term: &str) -> TermKind {
|
||||
let mut has_han = false;
|
||||
let mut has_unicode_word = false;
|
||||
for c in term.chars() {
|
||||
match classify_char(c) {
|
||||
CharClass::AsciiWord => {}
|
||||
CharClass::Han => has_han = true,
|
||||
CharClass::UnicodeWord => has_unicode_word = true,
|
||||
CharClass::Other => {}
|
||||
}
|
||||
}
|
||||
|
||||
if has_han {
|
||||
TermKind::HanContaining
|
||||
} else if has_unicode_word {
|
||||
TermKind::UnicodeWord
|
||||
} else {
|
||||
TermKind::AsciiLike
|
||||
}
|
||||
}
|
||||
|
||||
fn boundary_ok(term_kind: TermKind, neighbor: Option<char>, term_has_other_boundary: bool) -> bool {
|
||||
if term_has_other_boundary {
|
||||
return true;
|
||||
}
|
||||
|
||||
match term_kind {
|
||||
TermKind::AsciiLike => !matches!(neighbor.map(classify_char), Some(CharClass::AsciiWord)),
|
||||
TermKind::UnicodeWord => !matches!(
|
||||
neighbor.map(classify_char),
|
||||
Some(CharClass::AsciiWord | CharClass::UnicodeWord | CharClass::Han)
|
||||
),
|
||||
TermKind::HanContaining => true,
|
||||
}
|
||||
}
|
||||
|
||||
impl MatchesTermFinder {
|
||||
/// Create a new `MatchesTermFinder` for the given term.
|
||||
pub fn new(term: &str) -> Self {
|
||||
let starts_with_non_alnum = term.chars().next().is_some_and(|c| !c.is_alphanumeric());
|
||||
let ends_with_non_alnum = term.chars().last().is_some_and(|c| !c.is_alphanumeric());
|
||||
|
||||
let starts_with_other = term
|
||||
.chars()
|
||||
.next()
|
||||
.is_some_and(|c| classify_char(c) == CharClass::Other);
|
||||
let ends_with_other = term
|
||||
.chars()
|
||||
.last()
|
||||
.is_some_and(|c| classify_char(c) == CharClass::Other);
|
||||
Self {
|
||||
finder: memmem::Finder::new(term).into_owned(),
|
||||
term: term.to_string(),
|
||||
starts_with_non_alnum,
|
||||
ends_with_non_alnum,
|
||||
term_kind: classify_term(term),
|
||||
starts_with_other,
|
||||
ends_with_other,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -254,23 +341,20 @@ impl MatchesTermFinder {
|
||||
while let Some(found_pos) = self.finder.find(&text.as_bytes()[pos..]) {
|
||||
let actual_pos = pos + found_pos;
|
||||
|
||||
let prev_ok = self.starts_with_non_alnum
|
||||
|| text[..actual_pos]
|
||||
.chars()
|
||||
.last()
|
||||
.map(|c| !c.is_alphanumeric())
|
||||
.unwrap_or(true);
|
||||
let prev = text[..actual_pos].chars().last();
|
||||
let prev_ok = self.starts_with_other || boundary_ok(self.term_kind, prev, false);
|
||||
|
||||
if prev_ok {
|
||||
let next_pos = actual_pos + self.finder.needle().len();
|
||||
let next_ok = self.ends_with_non_alnum
|
||||
|| text[next_pos..]
|
||||
.chars()
|
||||
.next()
|
||||
.map(|c| !c.is_alphanumeric())
|
||||
.unwrap_or(true);
|
||||
let next = text[next_pos..].chars().next();
|
||||
let next_ok = self.ends_with_other || boundary_ok(self.term_kind, next, false);
|
||||
|
||||
if next_ok {
|
||||
let match_ok = match self.term_kind {
|
||||
TermKind::HanContaining => true,
|
||||
_ => prev_ok && next_ok,
|
||||
};
|
||||
|
||||
if match_ok {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
@@ -369,6 +453,25 @@ mod tests {
|
||||
assert!(!MatchesTermFinder::new("v1.0").find("v1.0a"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn mixed_script_terms_match_inside_chinese_context() {
|
||||
let text = "登录手机号18888888888的动态key";
|
||||
assert!(MatchesTermFinder::new("手机号").find(text));
|
||||
assert!(MatchesTermFinder::new("18888888888").find(text));
|
||||
assert!(MatchesTermFinder::new("手机").find(text));
|
||||
assert!(MatchesTermFinder::new("机号").find(text));
|
||||
assert!(MatchesTermFinder::new("机号1888").find(text));
|
||||
assert!(MatchesTermFinder::new("农业").find("中国农业银行"));
|
||||
assert!(MatchesTermFinder::new("error").find("错误error日志"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn underscore_still_counts_as_boundary_for_ascii_terms() {
|
||||
assert!(MatchesTermFinder::new("world").find("hello_world"));
|
||||
assert!(MatchesTermFinder::new("id").find("trace_id=abc"));
|
||||
assert!(!MatchesTermFinder::new("error").find("criticalerrors"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn adjacent_alphanumeric_fails() {
|
||||
assert!(!MatchesTermFinder::new("cat").find("cat5"));
|
||||
@@ -406,4 +509,10 @@ mod tests {
|
||||
assert!(MatchesTermFinder::new("中文").find("这是中文测试,中文!"));
|
||||
assert!(MatchesTermFinder::new("error").find("错误errorerror日志_error!"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn han_terms_match_as_contiguous_substrings() {
|
||||
assert!(MatchesTermFinder::new("行账号").find("中国农业银行账号"));
|
||||
assert!(MatchesTermFinder::new("登录").find("登录手机号18888888888的动态key"));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -167,6 +167,26 @@ mod tests {
|
||||
assert_eq!(tokens, vec!["我", "喜欢", "苹果"]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_chinese_tokenizer_issue_7943_sample() {
|
||||
let tokenizer = ChineseTokenizer;
|
||||
let text = "登录手机号18888888888的动态key:829889AC8";
|
||||
let tokens = tokenizer.tokenize(text);
|
||||
assert_eq!(
|
||||
tokens,
|
||||
vec![
|
||||
"登录",
|
||||
"手机号",
|
||||
"18888888888",
|
||||
"的",
|
||||
"动态",
|
||||
"key",
|
||||
":",
|
||||
"829889AC8"
|
||||
]
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_valid_ascii_token_lookup_table() {
|
||||
// Test all ASCII values in a single loop
|
||||
|
||||
@@ -23,6 +23,8 @@ use common_error::ext::ErrorExt;
|
||||
use common_error::status_code::StatusCode;
|
||||
use common_function::function::FunctionContext;
|
||||
use common_query::prelude::greptime_value;
|
||||
use common_time::Timestamp;
|
||||
use common_time::timestamp::TimeUnit;
|
||||
use datafusion::common::DFSchemaRef;
|
||||
use datafusion::datasource::DefaultTableSource;
|
||||
use datafusion::functions_aggregate::average::avg_udaf;
|
||||
@@ -91,9 +93,9 @@ use crate::promql::error::{
|
||||
InvalidRegularExpressionSnafu, InvalidTimeRangeSnafu, MultiFieldsNotSupportedSnafu,
|
||||
MultipleMetricMatchersSnafu, MultipleVectorSnafu, NoMetricMatcherSnafu, PromqlPlanNodeSnafu,
|
||||
Result, SameLabelSetSnafu, TableNameNotFoundSnafu, TimeIndexNotFoundSnafu,
|
||||
UnexpectedPlanExprSnafu, UnexpectedTokenSnafu, UnknownTableSnafu, UnsupportedExprSnafu,
|
||||
UnsupportedMatcherOpSnafu, UnsupportedVectorMatchSnafu, ValueNotFoundSnafu,
|
||||
ZeroRangeSelectorSnafu,
|
||||
TimestampOutOfRangeSnafu, UnexpectedPlanExprSnafu, UnexpectedTokenSnafu, UnknownTableSnafu,
|
||||
UnsupportedExprSnafu, UnsupportedMatcherOpSnafu, UnsupportedVectorMatchSnafu,
|
||||
ValueNotFoundSnafu, ZeroRangeSelectorSnafu,
|
||||
};
|
||||
use crate::query_engine::QueryEngineState;
|
||||
|
||||
@@ -1221,26 +1223,34 @@ impl PromPlanner {
|
||||
label_matchers: Matchers,
|
||||
is_range_selector: bool,
|
||||
) -> Result<LogicalPlan> {
|
||||
// make table scan plan
|
||||
let table_ref = self.table_ref()?;
|
||||
let mut table_scan = self.create_table_scan_plan(table_ref.clone()).await?;
|
||||
let table_schema = table_scan.schema();
|
||||
|
||||
// make filter exprs
|
||||
let offset_duration = match offset {
|
||||
Some(Offset::Pos(duration)) => duration.as_millis() as Millisecond,
|
||||
Some(Offset::Neg(duration)) => -(duration.as_millis() as Millisecond),
|
||||
None => 0,
|
||||
};
|
||||
|
||||
// make table scan plan
|
||||
let table_ref = self.table_ref()?;
|
||||
let (mut table_scan, time_filter_pushed_down) = self
|
||||
.create_table_scan_plan(table_ref.clone(), offset_duration)
|
||||
.await?;
|
||||
let table_schema = table_scan.schema();
|
||||
|
||||
// make filter exprs
|
||||
let mut scan_filters = Self::matchers_to_expr(label_matchers.clone(), table_schema)?;
|
||||
if let Some(time_index_filter) = self.build_time_index_filter(offset_duration)? {
|
||||
if !time_filter_pushed_down
|
||||
&& let Some(time_index_filter) =
|
||||
self.build_time_index_filter(offset_duration, TimeUnit::Millisecond)?
|
||||
{
|
||||
scan_filters.push(time_index_filter);
|
||||
}
|
||||
table_scan = LogicalPlanBuilder::from(table_scan)
|
||||
.filter(conjunction(scan_filters).unwrap()) // Safety: `scan_filters` is not empty.
|
||||
.context(DataFusionPlanningSnafu)?
|
||||
.build()
|
||||
.context(DataFusionPlanningSnafu)?;
|
||||
if let Some(scan_filter) = conjunction(scan_filters) {
|
||||
table_scan = LogicalPlanBuilder::from(table_scan)
|
||||
.filter(scan_filter)
|
||||
.context(DataFusionPlanningSnafu)?
|
||||
.build()
|
||||
.context(DataFusionPlanningSnafu)?;
|
||||
}
|
||||
|
||||
// make a projection plan if there is any `__field__` matcher
|
||||
if let Some(field_matchers) = &self.ctx.field_column_matcher {
|
||||
@@ -1590,7 +1600,11 @@ impl PromPlanner {
|
||||
Ok(table_ref)
|
||||
}
|
||||
|
||||
fn build_time_index_filter(&self, offset_duration: i64) -> Result<Option<DfExpr>> {
|
||||
fn build_time_index_filter(
|
||||
&self,
|
||||
offset_duration: i64,
|
||||
time_index_unit: TimeUnit,
|
||||
) -> Result<Option<DfExpr>> {
|
||||
let start = self.ctx.start;
|
||||
let end = self.ctx.end;
|
||||
if end < start {
|
||||
@@ -1614,56 +1628,92 @@ impl PromPlanner {
|
||||
|
||||
// Scan a continuous time range
|
||||
if (end - start) / interval > MAX_SCATTER_POINTS || interval <= INTERVAL_1H {
|
||||
let lower_bound = self.build_scan_time_filter_literal(
|
||||
start - offset_duration - selector_window + lower_exclusive_adjustment,
|
||||
time_index_unit,
|
||||
)?;
|
||||
let upper_bound = self.build_scan_time_filter_literal(
|
||||
self.ctx
|
||||
.end
|
||||
.checked_sub(offset_duration)
|
||||
.and_then(|ts| ts.checked_add(1))
|
||||
.with_context(|| TimestampOutOfRangeSnafu {
|
||||
timestamp: self.ctx.end - offset_duration,
|
||||
unit: TimeUnit::Millisecond,
|
||||
})?,
|
||||
time_index_unit,
|
||||
)?;
|
||||
let single_time_range = time_index_expr
|
||||
.clone()
|
||||
.gt_eq(DfExpr::Literal(
|
||||
ScalarValue::TimestampMillisecond(
|
||||
Some(
|
||||
self.ctx.start - offset_duration - selector_window
|
||||
+ lower_exclusive_adjustment,
|
||||
),
|
||||
None,
|
||||
),
|
||||
None,
|
||||
))
|
||||
.and(time_index_expr.lt_eq(DfExpr::Literal(
|
||||
ScalarValue::TimestampMillisecond(Some(self.ctx.end - offset_duration), None),
|
||||
None,
|
||||
)));
|
||||
.gt_eq(lower_bound)
|
||||
.and(time_index_expr.lt(upper_bound));
|
||||
return Ok(Some(single_time_range));
|
||||
}
|
||||
|
||||
// Otherwise scan scatter ranges separately
|
||||
let mut filters = Vec::with_capacity(num_points as usize + 1);
|
||||
for timestamp in (start..=end).step_by(interval as usize) {
|
||||
let lower_bound = self.build_scan_time_filter_literal(
|
||||
timestamp - offset_duration - selector_window + lower_exclusive_adjustment,
|
||||
time_index_unit,
|
||||
)?;
|
||||
let upper_bound = self.build_scan_time_filter_literal(
|
||||
timestamp
|
||||
.checked_sub(offset_duration)
|
||||
.and_then(|ts| ts.checked_add(1))
|
||||
.with_context(|| TimestampOutOfRangeSnafu {
|
||||
timestamp: timestamp - offset_duration,
|
||||
unit: TimeUnit::Millisecond,
|
||||
})?,
|
||||
time_index_unit,
|
||||
)?;
|
||||
filters.push(
|
||||
time_index_expr
|
||||
.clone()
|
||||
.gt_eq(DfExpr::Literal(
|
||||
ScalarValue::TimestampMillisecond(
|
||||
Some(
|
||||
timestamp - offset_duration - selector_window
|
||||
+ lower_exclusive_adjustment,
|
||||
),
|
||||
None,
|
||||
),
|
||||
None,
|
||||
))
|
||||
.and(time_index_expr.clone().lt_eq(DfExpr::Literal(
|
||||
ScalarValue::TimestampMillisecond(Some(timestamp - offset_duration), None),
|
||||
None,
|
||||
))),
|
||||
.gt_eq(lower_bound)
|
||||
.and(time_index_expr.clone().lt(upper_bound)),
|
||||
)
|
||||
}
|
||||
|
||||
Ok(filters.into_iter().reduce(DfExpr::or))
|
||||
}
|
||||
|
||||
fn build_scan_time_filter_literal(
|
||||
&self,
|
||||
timestamp_ms: i64,
|
||||
time_index_unit: TimeUnit,
|
||||
) -> Result<DfExpr> {
|
||||
let timestamp = Timestamp::new(timestamp_ms, TimeUnit::Millisecond)
|
||||
.convert_to_ceil(time_index_unit)
|
||||
.with_context(|| TimestampOutOfRangeSnafu {
|
||||
timestamp: timestamp_ms,
|
||||
unit: time_index_unit,
|
||||
})?;
|
||||
Ok(DfExpr::Literal(
|
||||
Self::timestamp_to_scalar_value(timestamp),
|
||||
None,
|
||||
))
|
||||
}
|
||||
|
||||
fn timestamp_to_scalar_value(timestamp: Timestamp) -> ScalarValue {
|
||||
let value = timestamp.value();
|
||||
match timestamp.unit() {
|
||||
TimeUnit::Second => ScalarValue::TimestampSecond(Some(value), None),
|
||||
TimeUnit::Millisecond => ScalarValue::TimestampMillisecond(Some(value), None),
|
||||
TimeUnit::Microsecond => ScalarValue::TimestampMicrosecond(Some(value), None),
|
||||
TimeUnit::Nanosecond => ScalarValue::TimestampNanosecond(Some(value), None),
|
||||
}
|
||||
}
|
||||
|
||||
/// Create a table scan plan and a filter plan with given filter.
|
||||
///
|
||||
/// # Panic
|
||||
/// If the filter is empty
|
||||
async fn create_table_scan_plan(&mut self, table_ref: TableReference) -> Result<LogicalPlan> {
|
||||
async fn create_table_scan_plan(
|
||||
&mut self,
|
||||
table_ref: TableReference,
|
||||
offset_duration: i64,
|
||||
) -> Result<(LogicalPlan, bool)> {
|
||||
let provider = self
|
||||
.table_provider
|
||||
.resolve_table(table_ref.clone())
|
||||
@@ -1756,14 +1806,19 @@ impl PromPlanner {
|
||||
self.ctx.tag_columns.clone()
|
||||
};
|
||||
|
||||
let is_time_index_ms = scan_table
|
||||
let time_index_unit = scan_table
|
||||
.schema()
|
||||
.timestamp_column()
|
||||
.with_context(|| TimeIndexNotFoundSnafu {
|
||||
table: maybe_phy_table_ref.to_quoted_string(),
|
||||
})?
|
||||
.data_type
|
||||
== ConcreteDataType::timestamp_millisecond_datatype();
|
||||
.as_timestamp()
|
||||
.with_context(|| TimeIndexNotFoundSnafu {
|
||||
table: maybe_phy_table_ref.to_quoted_string(),
|
||||
})?
|
||||
.unit();
|
||||
let is_time_index_ms = time_index_unit == TimeUnit::Millisecond;
|
||||
|
||||
let scan_projection = if table_id_filter.is_some() {
|
||||
let mut required_columns = HashSet::new();
|
||||
@@ -1816,6 +1871,17 @@ impl PromPlanner {
|
||||
.context(DataFusionPlanningSnafu)?;
|
||||
}
|
||||
|
||||
if !is_time_index_ms
|
||||
&& let Some(time_index_filter) =
|
||||
self.build_time_index_filter(offset_duration, time_index_unit)?
|
||||
{
|
||||
scan_plan = LogicalPlanBuilder::from(scan_plan)
|
||||
.filter(time_index_filter)
|
||||
.context(DataFusionPlanningSnafu)?
|
||||
.build()
|
||||
.context(DataFusionPlanningSnafu)?;
|
||||
}
|
||||
|
||||
if !is_time_index_ms {
|
||||
// cast to ms if time_index not in Millisecond precision
|
||||
let expr: Vec<_> = self
|
||||
@@ -1882,7 +1948,7 @@ impl PromPlanner {
|
||||
let result = LogicalPlanBuilder::from(scan_plan)
|
||||
.build()
|
||||
.context(DataFusionPlanningSnafu)?;
|
||||
Ok(result)
|
||||
Ok((result, !is_time_index_ms))
|
||||
}
|
||||
|
||||
fn collect_row_key_tag_columns_from_plan(
|
||||
@@ -6085,9 +6151,10 @@ mod test {
|
||||
"PromInstantManipulate: range=[0..100000000], lookback=[1000], interval=[5000], time index=[timestamp] [field:Float64;N, tag:Utf8, timestamp:Timestamp(ms)]\
|
||||
\n PromSeriesDivide: tags=[\"tag\"] [field:Float64;N, tag:Utf8, timestamp:Timestamp(ms)]\
|
||||
\n Sort: metrics.tag ASC NULLS FIRST, metrics.timestamp ASC NULLS FIRST [field:Float64;N, tag:Utf8, timestamp:Timestamp(ms)]\
|
||||
\n Filter: metrics.tag = Utf8(\"1\") AND metrics.timestamp >= TimestampMillisecond(-999, None) AND metrics.timestamp <= TimestampMillisecond(100000000, None) [field:Float64;N, tag:Utf8, timestamp:Timestamp(ms)]\
|
||||
\n Filter: metrics.tag = Utf8(\"1\") [field:Float64;N, tag:Utf8, timestamp:Timestamp(ms)]\
|
||||
\n Projection: metrics.field, metrics.tag, CAST(metrics.timestamp AS Timestamp(ms)) AS timestamp [field:Float64;N, tag:Utf8, timestamp:Timestamp(ms)]\
|
||||
\n TableScan: metrics [tag:Utf8, timestamp:Timestamp(ns), field:Float64;N]"
|
||||
\n Filter: metrics.timestamp >= TimestampNanosecond(-999000000, None) AND metrics.timestamp < TimestampNanosecond(100000001000000, None) [tag:Utf8, timestamp:Timestamp(ns), field:Float64;N]\
|
||||
\n TableScan: metrics [tag:Utf8, timestamp:Timestamp(ns), field:Float64;N]"
|
||||
);
|
||||
let plan = PromPlanner::stmt_to_plan(
|
||||
DfTableSourceProvider::new(
|
||||
@@ -6118,9 +6185,10 @@ mod test {
|
||||
\n PromSeriesNormalize: offset=[0], time index=[timestamp], filter NaN: [true] [field:Float64;N, tag:Utf8, timestamp:Timestamp(ms)]\
|
||||
\n PromSeriesDivide: tags=[\"tag\"] [field:Float64;N, tag:Utf8, timestamp:Timestamp(ms)]\
|
||||
\n Sort: metrics.tag ASC NULLS FIRST, metrics.timestamp ASC NULLS FIRST [field:Float64;N, tag:Utf8, timestamp:Timestamp(ms)]\
|
||||
\n Filter: metrics.tag = Utf8(\"1\") AND metrics.timestamp >= TimestampMillisecond(-4999, None) AND metrics.timestamp <= TimestampMillisecond(100000000, None) [field:Float64;N, tag:Utf8, timestamp:Timestamp(ms)]\
|
||||
\n Filter: metrics.tag = Utf8(\"1\") [field:Float64;N, tag:Utf8, timestamp:Timestamp(ms)]\
|
||||
\n Projection: metrics.field, metrics.tag, CAST(metrics.timestamp AS Timestamp(ms)) AS timestamp [field:Float64;N, tag:Utf8, timestamp:Timestamp(ms)]\
|
||||
\n TableScan: metrics [tag:Utf8, timestamp:Timestamp(ns), field:Float64;N]"
|
||||
\n Filter: metrics.timestamp >= TimestampNanosecond(-4999000000, None) AND metrics.timestamp < TimestampNanosecond(100000001000000, None) [tag:Utf8, timestamp:Timestamp(ns), field:Float64;N]\
|
||||
\n TableScan: metrics [tag:Utf8, timestamp:Timestamp(ns), field:Float64;N]"
|
||||
);
|
||||
}
|
||||
|
||||
|
||||
@@ -157,6 +157,71 @@ SELECT matches_term('русский!', 'русский') as result;
|
||||
| true |
|
||||
+--------+
|
||||
|
||||
-- Phase 1 mixed Chinese and numeric behavior
|
||||
SELECT matches_term('登录手机号18888888888的动态key', '手机号') as result;
|
||||
|
||||
+--------+
|
||||
| result |
|
||||
+--------+
|
||||
| true |
|
||||
+--------+
|
||||
|
||||
SELECT matches_term('登录手机号18888888888的动态key', '18888888888') as result;
|
||||
|
||||
+--------+
|
||||
| result |
|
||||
+--------+
|
||||
| true |
|
||||
+--------+
|
||||
|
||||
SELECT matches_term('登录手机号18888888888的动态key', '手机') as result;
|
||||
|
||||
+--------+
|
||||
| result |
|
||||
+--------+
|
||||
| true |
|
||||
+--------+
|
||||
|
||||
SELECT matches_term('登录手机号18888888888的动态key', '机号') as result;
|
||||
|
||||
+--------+
|
||||
| result |
|
||||
+--------+
|
||||
| true |
|
||||
+--------+
|
||||
|
||||
SELECT matches_term('登录手机号18888888888的动态key', '机号1888') as result;
|
||||
|
||||
+--------+
|
||||
| result |
|
||||
+--------+
|
||||
| true |
|
||||
+--------+
|
||||
|
||||
SELECT matches_term('中国农业银行', '农业') as result;
|
||||
|
||||
+--------+
|
||||
| result |
|
||||
+--------+
|
||||
| true |
|
||||
+--------+
|
||||
|
||||
SELECT matches_term('中国农业银行账号', '行账号') as result;
|
||||
|
||||
+--------+
|
||||
| result |
|
||||
+--------+
|
||||
| true |
|
||||
+--------+
|
||||
|
||||
SELECT matches_term('错误error日志', 'error') as result;
|
||||
|
||||
+--------+
|
||||
| result |
|
||||
+--------+
|
||||
| true |
|
||||
+--------+
|
||||
|
||||
-- Test complete word matching
|
||||
CREATE TABLE logs (
|
||||
`id` TIMESTAMP TIME INDEX,
|
||||
|
||||
@@ -47,6 +47,16 @@ SELECT matches_term('café>', 'café') as result;
|
||||
-- Expect: true
|
||||
SELECT matches_term('русский!', 'русский') as result;
|
||||
|
||||
-- Phase 1 mixed Chinese and numeric behavior
|
||||
SELECT matches_term('登录手机号18888888888的动态key', '手机号') as result;
|
||||
SELECT matches_term('登录手机号18888888888的动态key', '18888888888') as result;
|
||||
SELECT matches_term('登录手机号18888888888的动态key', '手机') as result;
|
||||
SELECT matches_term('登录手机号18888888888的动态key', '机号') as result;
|
||||
SELECT matches_term('登录手机号18888888888的动态key', '机号1888') as result;
|
||||
SELECT matches_term('中国农业银行', '农业') as result;
|
||||
SELECT matches_term('中国农业银行账号', '行账号') as result;
|
||||
SELECT matches_term('错误error日志', 'error') as result;
|
||||
|
||||
-- Test complete word matching
|
||||
CREATE TABLE logs (
|
||||
`id` TIMESTAMP TIME INDEX,
|
||||
|
||||
Reference in New Issue
Block a user