Files
greptimedb/src/index/Cargo.toml
Ruihang Xia ed2dff6d27 feat: count underscore in English tokenizer and improve performance (#6660)
* feat: count underscore in English tokenizer and improve performance

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* update lock file

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* update test results

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* assert lookup table

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* handle utf8 alphanumeric

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* finalize

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

---------

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>
2025-08-06 18:53:42 +08:00

54 lines
1.2 KiB
TOML

[package]
name = "index"
version.workspace = true
edition.workspace = true
license.workspace = true
[lints]
workspace = true
[dependencies]
async-trait.workspace = true
asynchronous-codec = "0.7.0"
bytemuck.workspace = true
bytes.workspace = true
common-base.workspace = true
common-error.workspace = true
common-macro.workspace = true
common-runtime.workspace = true
common-telemetry.workspace = true
fastbloom = "0.8"
fst.workspace = true
futures.workspace = true
greptime-proto.workspace = true
itertools.workspace = true
jieba-rs = "0.8"
lazy_static.workspace = true
mockall.workspace = true
pin-project.workspace = true
prost.workspace = true
puffin.workspace = true
regex.workspace = true
regex-automata.workspace = true
roaring = "0.10"
serde.workspace = true
serde_json.workspace = true
snafu.workspace = true
tantivy = { version = "0.24", features = ["zstd-compression"] }
tantivy-jieba = "0.16"
tokio.workspace = true
tokio-util.workspace = true
uuid.workspace = true
[dev-dependencies]
common-test-util.workspace = true
criterion = "0.4"
rand.workspace = true
tempfile.workspace = true
tokio.workspace = true
tokio-util.workspace = true
[[bench]]
name = "tokenizer_bench"
harness = false