mirror of
https://github.com/GreptimeTeam/greptimedb.git
synced 2026-05-17 13:30:38 +00:00
fix: matches incorrectly uses byte len as char len (#5411)
Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>
This commit is contained in:
@@ -725,7 +725,8 @@ struct Tokenizer {
|
||||
impl Tokenizer {
|
||||
pub fn tokenize(mut self, pattern: &str) -> Result<Vec<Token>> {
|
||||
let mut tokens = vec![];
|
||||
while self.cursor < pattern.len() {
|
||||
let char_len = pattern.chars().count();
|
||||
while self.cursor < char_len {
|
||||
// TODO: collect pattern into Vec<char> if this tokenizer is bottleneck in the future
|
||||
let c = pattern.chars().nth(self.cursor).unwrap();
|
||||
match c {
|
||||
@@ -794,7 +795,8 @@ impl Tokenizer {
|
||||
let mut phase = String::new();
|
||||
let mut is_quote_present = false;
|
||||
|
||||
while self.cursor < pattern.len() {
|
||||
let char_len = pattern.chars().count();
|
||||
while self.cursor < char_len {
|
||||
let mut c = pattern.chars().nth(self.cursor).unwrap();
|
||||
|
||||
match c {
|
||||
@@ -899,6 +901,26 @@ mod test {
|
||||
Phase("c".to_string()),
|
||||
],
|
||||
),
|
||||
(
|
||||
r#"中文 测试"#,
|
||||
vec![Phase("中文".to_string()), Phase("测试".to_string())],
|
||||
),
|
||||
(
|
||||
r#"中文 AND 测试"#,
|
||||
vec![Phase("中文".to_string()), And, Phase("测试".to_string())],
|
||||
),
|
||||
(
|
||||
r#"中文 +测试"#,
|
||||
vec![Phase("中文".to_string()), Must, Phase("测试".to_string())],
|
||||
),
|
||||
(
|
||||
r#"中文 -测试"#,
|
||||
vec![
|
||||
Phase("中文".to_string()),
|
||||
Negative,
|
||||
Phase("测试".to_string()),
|
||||
],
|
||||
),
|
||||
];
|
||||
|
||||
for (query, expected) in cases {
|
||||
@@ -1030,6 +1052,61 @@ mod test {
|
||||
],
|
||||
},
|
||||
),
|
||||
(
|
||||
r#"中文 测试"#,
|
||||
PatternAst::Binary {
|
||||
op: BinaryOp::Or,
|
||||
children: vec![
|
||||
PatternAst::Literal {
|
||||
op: UnaryOp::Optional,
|
||||
pattern: "中文".to_string(),
|
||||
},
|
||||
PatternAst::Literal {
|
||||
op: UnaryOp::Optional,
|
||||
pattern: "测试".to_string(),
|
||||
},
|
||||
],
|
||||
},
|
||||
),
|
||||
(
|
||||
r#"中文 AND 测试"#,
|
||||
PatternAst::Binary {
|
||||
op: BinaryOp::And,
|
||||
children: vec![
|
||||
PatternAst::Literal {
|
||||
op: UnaryOp::Optional,
|
||||
pattern: "中文".to_string(),
|
||||
},
|
||||
PatternAst::Literal {
|
||||
op: UnaryOp::Optional,
|
||||
pattern: "测试".to_string(),
|
||||
},
|
||||
],
|
||||
},
|
||||
),
|
||||
(
|
||||
r#"中文 +测试"#,
|
||||
PatternAst::Literal {
|
||||
op: UnaryOp::Must,
|
||||
pattern: "测试".to_string(),
|
||||
},
|
||||
),
|
||||
(
|
||||
r#"中文 -测试"#,
|
||||
PatternAst::Binary {
|
||||
op: BinaryOp::And,
|
||||
children: vec![
|
||||
PatternAst::Literal {
|
||||
op: UnaryOp::Negative,
|
||||
pattern: "测试".to_string(),
|
||||
},
|
||||
PatternAst::Literal {
|
||||
op: UnaryOp::Optional,
|
||||
pattern: "中文".to_string(),
|
||||
},
|
||||
],
|
||||
},
|
||||
),
|
||||
];
|
||||
|
||||
for (query, expected) in cases {
|
||||
|
||||
@@ -257,3 +257,149 @@ drop table fox;
|
||||
|
||||
Affected Rows: 0
|
||||
|
||||
create table fox_zh (
|
||||
ts timestamp time index,
|
||||
fox string,
|
||||
);
|
||||
|
||||
Affected Rows: 0
|
||||
|
||||
insert into fox_zh values
|
||||
(1, '快速的棕色狐狸跳过了懒狗'),
|
||||
(2, '这只狐狸非常聪明,跳过了高高的栅栏'),
|
||||
(3, '狐狸和狗是好朋友,它们一起玩耍'),
|
||||
(4, '狐狸跳过了一条小溪,狗在后面追赶'),
|
||||
(5, '狐狸和狗都喜欢在森林里探险'),
|
||||
(6, '狐狸跳过了一个大石头,狗却没有跳过去'),
|
||||
(7, '狐狸和狗在阳光下休息,享受着温暖的时光'),
|
||||
(8, '狐狸跳过了一个小山坡,狗在后面慢慢地走'),
|
||||
(9, '狐狸和狗一起找到了一颗闪闪发光的宝石'),
|
||||
(10, '狐狸跳过了一个小水坑,狗在旁边看着');
|
||||
|
||||
Affected Rows: 10
|
||||
|
||||
select fox from fox_zh where matches(fox, '狐狸 AND 跳过') order by ts;
|
||||
|
||||
+----------------------------------------+
|
||||
| fox |
|
||||
+----------------------------------------+
|
||||
| 快速的棕色狐狸跳过了懒狗 |
|
||||
| 这只狐狸非常聪明,跳过了高高的栅栏 |
|
||||
| 狐狸跳过了一条小溪,狗在后面追赶 |
|
||||
| 狐狸跳过了一个大石头,狗却没有跳过去 |
|
||||
| 狐狸跳过了一个小山坡,狗在后面慢慢地走 |
|
||||
| 狐狸跳过了一个小水坑,狗在旁边看着 |
|
||||
+----------------------------------------+
|
||||
|
||||
select fox from fox_zh where matches(fox, '狐狸 OR 狗') order by ts;
|
||||
|
||||
+----------------------------------------+
|
||||
| fox |
|
||||
+----------------------------------------+
|
||||
| 快速的棕色狐狸跳过了懒狗 |
|
||||
| 这只狐狸非常聪明,跳过了高高的栅栏 |
|
||||
| 狐狸和狗是好朋友,它们一起玩耍 |
|
||||
| 狐狸跳过了一条小溪,狗在后面追赶 |
|
||||
| 狐狸和狗都喜欢在森林里探险 |
|
||||
| 狐狸跳过了一个大石头,狗却没有跳过去 |
|
||||
| 狐狸和狗在阳光下休息,享受着温暖的时光 |
|
||||
| 狐狸跳过了一个小山坡,狗在后面慢慢地走 |
|
||||
| 狐狸和狗一起找到了一颗闪闪发光的宝石 |
|
||||
| 狐狸跳过了一个小水坑,狗在旁边看着 |
|
||||
+----------------------------------------+
|
||||
|
||||
select fox from fox_zh where matches(fox, '狐狸 AND 狗') order by ts;
|
||||
|
||||
+----------------------------------------+
|
||||
| fox |
|
||||
+----------------------------------------+
|
||||
| 快速的棕色狐狸跳过了懒狗 |
|
||||
| 狐狸和狗是好朋友,它们一起玩耍 |
|
||||
| 狐狸跳过了一条小溪,狗在后面追赶 |
|
||||
| 狐狸和狗都喜欢在森林里探险 |
|
||||
| 狐狸跳过了一个大石头,狗却没有跳过去 |
|
||||
| 狐狸和狗在阳光下休息,享受着温暖的时光 |
|
||||
| 狐狸跳过了一个小山坡,狗在后面慢慢地走 |
|
||||
| 狐狸和狗一起找到了一颗闪闪发光的宝石 |
|
||||
| 狐狸跳过了一个小水坑,狗在旁边看着 |
|
||||
+----------------------------------------+
|
||||
|
||||
select fox from fox_zh where matches(fox, '狐狸 -跳过') order by ts;
|
||||
|
||||
+----------------------------------------+
|
||||
| fox |
|
||||
+----------------------------------------+
|
||||
| 狐狸和狗是好朋友,它们一起玩耍 |
|
||||
| 狐狸和狗都喜欢在森林里探险 |
|
||||
| 狐狸和狗在阳光下休息,享受着温暖的时光 |
|
||||
| 狐狸和狗一起找到了一颗闪闪发光的宝石 |
|
||||
+----------------------------------------+
|
||||
|
||||
select fox from fox_zh where matches(fox, '狐狸 AND 跳过 -石头') order by ts;
|
||||
|
||||
+----------------------------------------+
|
||||
| fox |
|
||||
+----------------------------------------+
|
||||
| 快速的棕色狐狸跳过了懒狗 |
|
||||
| 这只狐狸非常聪明,跳过了高高的栅栏 |
|
||||
| 狐狸跳过了一条小溪,狗在后面追赶 |
|
||||
| 狐狸跳过了一个小山坡,狗在后面慢慢地走 |
|
||||
| 狐狸跳过了一个小水坑,狗在旁边看着 |
|
||||
+----------------------------------------+
|
||||
|
||||
select fox from fox_zh where matches(fox, '(狐狸 OR 狗) AND 森林') order by ts;
|
||||
|
||||
+----------------------------+
|
||||
| fox |
|
||||
+----------------------------+
|
||||
| 狐狸和狗都喜欢在森林里探险 |
|
||||
+----------------------------+
|
||||
|
||||
select fox from fox_zh where matches(fox, '狐狸 AND (跳过 OR 追赶)') order by ts;
|
||||
|
||||
+----------------------------------------+
|
||||
| fox |
|
||||
+----------------------------------------+
|
||||
| 快速的棕色狐狸跳过了懒狗 |
|
||||
| 这只狐狸非常聪明,跳过了高高的栅栏 |
|
||||
| 狐狸跳过了一条小溪,狗在后面追赶 |
|
||||
| 狐狸跳过了一个大石头,狗却没有跳过去 |
|
||||
| 狐狸跳过了一个小山坡,狗在后面慢慢地走 |
|
||||
| 狐狸跳过了一个小水坑,狗在旁边看着 |
|
||||
+----------------------------------------+
|
||||
|
||||
select fox from fox_zh where matches(fox, '狐狸 AND -(跳过 OR 追赶)') order by ts;
|
||||
|
||||
+----------------------------------------+
|
||||
| fox |
|
||||
+----------------------------------------+
|
||||
| 狐狸和狗是好朋友,它们一起玩耍 |
|
||||
| 狐狸和狗都喜欢在森林里探险 |
|
||||
| 狐狸和狗在阳光下休息,享受着温暖的时光 |
|
||||
| 狐狸和狗一起找到了一颗闪闪发光的宝石 |
|
||||
+----------------------------------------+
|
||||
|
||||
select fox from fox_zh where matches(fox, '狐狸 AND 跳过 AND (小溪 OR 石头)') order by ts;
|
||||
|
||||
+--------------------------------------+
|
||||
| fox |
|
||||
+--------------------------------------+
|
||||
| 狐狸跳过了一条小溪,狗在后面追赶 |
|
||||
| 狐狸跳过了一个大石头,狗却没有跳过去 |
|
||||
+--------------------------------------+
|
||||
|
||||
select fox from fox_zh where matches(fox, '狐狸 AND 跳过 AND -(石头 OR 栅栏)') order by ts;
|
||||
|
||||
+----------------------------------------+
|
||||
| fox |
|
||||
+----------------------------------------+
|
||||
| 快速的棕色狐狸跳过了懒狗 |
|
||||
| 狐狸跳过了一条小溪,狗在后面追赶 |
|
||||
| 狐狸跳过了一个小山坡,狗在后面慢慢地走 |
|
||||
| 狐狸跳过了一个小水坑,狗在旁边看着 |
|
||||
+----------------------------------------+
|
||||
|
||||
drop table fox_zh;
|
||||
|
||||
Affected Rows: 0
|
||||
|
||||
|
||||
@@ -55,3 +55,42 @@ select fox from fox where matches(fox, 'over -(fox AND jumps)') order by ts;
|
||||
select fox from fox where matches(fox, 'over AND -(-(fox OR jumps))') order by ts;
|
||||
|
||||
drop table fox;
|
||||
|
||||
create table fox_zh (
|
||||
ts timestamp time index,
|
||||
fox string,
|
||||
);
|
||||
|
||||
insert into fox_zh values
|
||||
(1, '快速的棕色狐狸跳过了懒狗'),
|
||||
(2, '这只狐狸非常聪明,跳过了高高的栅栏'),
|
||||
(3, '狐狸和狗是好朋友,它们一起玩耍'),
|
||||
(4, '狐狸跳过了一条小溪,狗在后面追赶'),
|
||||
(5, '狐狸和狗都喜欢在森林里探险'),
|
||||
(6, '狐狸跳过了一个大石头,狗却没有跳过去'),
|
||||
(7, '狐狸和狗在阳光下休息,享受着温暖的时光'),
|
||||
(8, '狐狸跳过了一个小山坡,狗在后面慢慢地走'),
|
||||
(9, '狐狸和狗一起找到了一颗闪闪发光的宝石'),
|
||||
(10, '狐狸跳过了一个小水坑,狗在旁边看着');
|
||||
|
||||
select fox from fox_zh where matches(fox, '狐狸 AND 跳过') order by ts;
|
||||
|
||||
select fox from fox_zh where matches(fox, '狐狸 OR 狗') order by ts;
|
||||
|
||||
select fox from fox_zh where matches(fox, '狐狸 AND 狗') order by ts;
|
||||
|
||||
select fox from fox_zh where matches(fox, '狐狸 -跳过') order by ts;
|
||||
|
||||
select fox from fox_zh where matches(fox, '狐狸 AND 跳过 -石头') order by ts;
|
||||
|
||||
select fox from fox_zh where matches(fox, '(狐狸 OR 狗) AND 森林') order by ts;
|
||||
|
||||
select fox from fox_zh where matches(fox, '狐狸 AND (跳过 OR 追赶)') order by ts;
|
||||
|
||||
select fox from fox_zh where matches(fox, '狐狸 AND -(跳过 OR 追赶)') order by ts;
|
||||
|
||||
select fox from fox_zh where matches(fox, '狐狸 AND 跳过 AND (小溪 OR 石头)') order by ts;
|
||||
|
||||
select fox from fox_zh where matches(fox, '狐狸 AND 跳过 AND -(石头 OR 栅栏)') order by ts;
|
||||
|
||||
drop table fox_zh;
|
||||
|
||||
Reference in New Issue
Block a user