Files
greptimedb/tests/cases/standalone/common/function/matches_term.sql
discord9 73325acfe4 fix: zh same underscore behavior (#8002)
* fix: zh same underscore behavior

Signed-off-by: discord9 <discord9@163.com>

* fix: only add token with _ from en analyzer

Signed-off-by: discord9 <discord9@163.com>

* test: neg sqlness case

Signed-off-by: discord9 <discord9@163.com>

---------

Signed-off-by: discord9 <discord9@163.com>
2026-04-22 03:21:36 +00:00

189 lines
6.4 KiB
SQL
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

-- Test basic term matching
-- Expect: true
SELECT matches_term('cat!', 'cat') as result;
-- Test phrase matching with spaces
-- Expect: true
SELECT matches_term('warning:hello world!', 'hello world') as result;
-- Test numbers in term
SELECT matches_term('v1.0!', 'v1.0') as result;
-- Test case sensitivity
-- Expect: true
SELECT matches_term('Cat', 'Cat') as result;
-- Expect: false
SELECT matches_term('cat', 'Cat') as result;
-- Test empty string handling
-- Expect: true
SELECT matches_term('', '') as result;
-- Expect: false
SELECT matches_term('any', '') as result;
-- Expect: false
SELECT matches_term('', 'any') as result;
-- Test partial matches (should fail)
-- Expect: false
SELECT matches_term('category', 'cat') as result;
-- Expect: false
SELECT matches_term('rebooted', 'boot') as result;
-- Test adjacent alphanumeric characters
SELECT matches_term('cat5', 'cat') as result;
SELECT matches_term('dogcat', 'dog') as result;
-- Test leading non-alphanumeric
-- Expect: true
SELECT matches_term('dog/cat', '/cat') as result;
-- Expect: true
SELECT matches_term('dog/cat', 'dog/') as result;
-- Expect: true
SELECT matches_term('dog/cat', 'dog/cat') as result;
-- Test unicode characters
-- Expect: true
SELECT matches_term('café>', 'café') as result;
-- Expect: true
SELECT matches_term('русский!', 'русский') as result;
-- Phase 1 mixed Chinese and numeric behavior
SELECT matches_term('登录手机号18888888888的动态key', '手机号') as result;
SELECT matches_term('登录手机号18888888888的动态key', '18888888888') as result;
SELECT matches_term('登录手机号18888888888的动态key', '手机') as result;
SELECT matches_term('登录手机号18888888888的动态key', '机号') as result;
SELECT matches_term('登录手机号18888888888的动态key', '机号1888') as result;
SELECT matches_term('中国农业银行', '农业') as result;
SELECT matches_term('中国农业银行账号', '行账号') as result;
SELECT matches_term('错误error日志', 'error') as result;
SELECT matches_term('trace_id=abc', 'trace_id') as result;
SELECT matches_term('ship__ship', 'ship__ship') as result;
SELECT matches_term('__IDENTIFIER__', '__IDENTIFIER__') as result;
SELECT matches_term('_ship', '_ship') as result;
SELECT matches_term('ship_', 'ship_') as result;
-- Test complete word matching
CREATE TABLE logs (
`id` TIMESTAMP TIME INDEX,
`log_message` STRING
);
INSERT INTO logs VALUES
(1, 'An error occurred!'),
(2, 'Critical error: system failure'),
(3, 'error-prone'),
(4, 'errors'),
(5, 'error123'),
(6, 'errorLogs'),
(7, 'Version v1.0 released'),
(8, 'v1.0!'),
(9, 'v1.0a'),
(10, 'v1.0beta'),
(11, 'GET /app/start'),
(12, 'Command: /start-prosess'),
(13, 'Command: /start'),
(14, 'start'),
(15, 'start/stop'),
(16, 'Alert: system failure detected'),
(17, 'system failure!'),
(18, 'system-failure'),
(19, 'system failure2023'),
(20, 'critical error: system failure'),
(21, 'critical failure detected'),
(22, 'critical issue'),
(23, 'failure imminent'),
(24, 'Warning: high temperature'),
(25, 'WARNING: system overload'),
(26, 'warned'),
(27, 'warnings');
-- Test complete word matching for 'error'
-- Expect:
-- 1|An error occurred!|true
-- 2|Critical error: system failure|true
-- 3|error-prone|true
-- 4|errors|false
-- 5|error123|false
-- 6|errorLogs|false
SELECT `id`, `log_message`, matches_term(`log_message`, 'error') as `matches_error` FROM logs WHERE `id` <= 6 ORDER BY `id`;
-- Test complete word matching for 'v1.0'
-- Expect:
-- 7|Version v1.0 released|true
-- 8|v1.0!|true
-- 9|v1.0a|false
-- 10|v1.0beta|false
SELECT `id`, `log_message`, matches_term(`log_message`, 'v1.0') as `matches_version` FROM logs WHERE `id` BETWEEN 7 AND 10 ORDER BY `id`;
-- Test complete word matching for '/start'
-- Expect:
-- 11|GET /app/start|true
-- 12|Command: /start-prosess|true
-- 13|Command: /start|true
-- 14|start|false
-- 15|start/stop|false
SELECT `id`, `log_message`, matches_term(`log_message`, '/start') as `matches_start` FROM logs WHERE `id` BETWEEN 11 AND 15 ORDER BY `id`;
-- Test phrase matching for 'system failure'
-- Expect:
-- 16|Alert: system failure detected|true
-- 17|system failure!|true
-- 18|system-failure|false
-- 19|system failure2023|false
SELECT `id`, `log_message`, matches_term(`log_message`, 'system failure') as `matches_phrase` FROM logs WHERE `id` BETWEEN 16 AND 19 ORDER BY `id`;
-- Test multi-word matching using AND
-- Expect:
-- 20|critical error: system failure|true|true|true
-- 21|critical failure detected|true|true|true
-- 22|critical issue|true|false|false
-- 23|failure imminent|false|true|false
SELECT `id`, `log_message`,
matches_term(`log_message`, 'critical') as `matches_critical`,
matches_term(`log_message`, 'failure') as `matches_failure`,
matches_term(`log_message`, 'critical') AND matches_term(`log_message`, 'failure') as `matches_both`
FROM logs WHERE `id` BETWEEN 20 AND 23 ORDER BY `id`;
-- Test case-insensitive matching using lower()
-- Expect:
-- 24|Warning: high temperature|true
-- 25|WARNING: system overload|true
-- 26|warned|false
-- 27|warnings|false
SELECT `id`, `log_message`, matches_term(lower(`log_message`), 'warning') as `matches_warning` FROM logs WHERE `id` >= 24 ORDER BY `id`;
DROP TABLE logs;
CREATE TABLE zh_logs (
`id` TIMESTAMP TIME INDEX,
`log_message` STRING FULLTEXT INDEX WITH(analyzer = 'Chinese', backend = 'bloom', case_sensitive = 'true', false_positive_rate = '0.01', granularity = '10240')
);
INSERT INTO zh_logs VALUES
(1, '[2026/04/09/ 13:56:11.031]2026-04-09 13:56:11.031 - [ trace_id=340a6a44b0bd8e37bb7697ss7da61ff0 span_id=085ff5ttf1e0a23b trace_flags=01] - [http-nio-8081-exec-16] INFO c.h.p.xx.web.service.impl.CCCXForwardKKKServiceImpl.pushout(188) - 登录手机号18888888888的动态key829889AC8 ship_ship ship__ship __IDENTIFIER__ _ship ship_'),
(2, '哈基米曼波');
ADMIN flush_table('zh_logs');
SELECT * FROM zh_logs where `log_message` @@ 'trace_id';
SELECT * FROM zh_logs where `log_message` @@ 'ship_ship';
SELECT * FROM zh_logs where `log_message` @@ 'ship__ship';
SELECT * FROM zh_logs where `log_message` @@ '__IDENTIFIER__';
SELECT * FROM zh_logs where `log_message` @@ '_ship';
SELECT * FROM zh_logs where `log_message` @@ 'ship_';
SELECT * FROM zh_logs where `log_message` @@ '登录_id';
SELECT * FROM zh_logs where `log_message` @@ '手机号_trace';
SELECT * FROM zh_logs where `log_message` @@ '手机';
DROP TABLE zh_logs;