From a84cf5ec670f99e0ed6dfabd9f91f7acd5e523c8 Mon Sep 17 00:00:00 2001 From: yihong Date: Mon, 4 Aug 2025 03:08:36 +0800 Subject: [PATCH] chore: update jieba tantivy-jieba and tantivy version (#6637) * chore: update jieba tantivy-jieba and tantivy version Signed-off-by: yihong0618 * fix: address comments Signed-off-by: yihong0618 --------- Signed-off-by: yihong0618 --- Cargo.lock | 293 ++++++++++++++++++++++++++++++++++--------- src/index/Cargo.toml | 6 +- 2 files changed, 236 insertions(+), 63 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 2871750516..20f746fce6 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -17,6 +17,12 @@ version = "2.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "512761e0bb2578dd7380c6baaa0f4ce03e84f95e960231d1dec8bf4d7d6e2627" +[[package]] +name = "adler32" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "aae1277d39aeec15cb388266ecc24b11c80469deae6067e17a1a7aa9e5c1f234" + [[package]] name = "aead" version = "0.5.2" @@ -1425,6 +1431,31 @@ dependencies = [ "piper", ] +[[package]] +name = "bon" +version = "3.6.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "33d9ef19ae5263a138da9a86871eca537478ab0332a7770bac7e3f08b801f89f" +dependencies = [ + "bon-macros", + "rustversion", +] + +[[package]] +name = "bon-macros" +version = "3.6.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "577ae008f2ca11ca7641bd44601002ee5ab49ef0af64846ce1ab6057218a5cc1" +dependencies = [ + "darling 0.21.0", + "ident_case", + "prettyplease", + "proc-macro2", + "quote", + "rustversion", + "syn 2.0.100", +] + [[package]] name = "borsh" version = "1.5.1" @@ -1828,7 +1859,7 @@ checksum = "9c6ac4f2c0bf0f44e9161aec9675e1050aa4a530663c4a9e37e108fa948bca9f" dependencies = [ "chrono", "chrono-tz-build", - "phf", + "phf 0.11.2", ] [[package]] @@ -1838,7 +1869,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e94fea34d77a245229e7746bd2beb786cd2a896f306ff491fb8cecb3074b10a7" dependencies = [ "parse-zoneinfo", - "phf_codegen", + "phf_codegen 0.11.2", ] [[package]] @@ -3261,6 +3292,16 @@ dependencies = [ "darling_macro 0.20.10", ] +[[package]] +name = "darling" +version = "0.21.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a79c4acb1fd5fa3d9304be4c76e031c54d2e92d172a393e24b19a14fe8532fe9" +dependencies = [ + "darling_core 0.21.0", + "darling_macro 0.21.0", +] + [[package]] name = "darling_core" version = "0.14.4" @@ -3289,6 +3330,20 @@ dependencies = [ "syn 2.0.100", ] +[[package]] +name = "darling_core" +version = "0.21.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "74875de90daf30eb59609910b84d4d368103aaec4c924824c6799b28f77d6a1d" +dependencies = [ + "fnv", + "ident_case", + "proc-macro2", + "quote", + "strsim 0.11.1", + "syn 2.0.100", +] + [[package]] name = "darling_macro" version = "0.14.4" @@ -3311,6 +3366,23 @@ dependencies = [ "syn 2.0.100", ] +[[package]] +name = "darling_macro" +version = "0.21.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e79f8e61677d5df9167cd85265f8e5f64b215cdea3fb55eebc3e622e44c7a146" +dependencies = [ + "darling_core 0.21.0", + "quote", + "syn 2.0.100", +] + +[[package]] +name = "dary_heap" +version = "0.3.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "04d2cd9c18b9f454ed67da600630b021a8a80bf33f8c95896ab33aaf1c26b728" + [[package]] name = "dashmap" version = "6.1.0" @@ -4237,9 +4309,9 @@ checksum = "1435fa1053d8b2fbbe9be7e97eca7f33d37b28409959813daefc1446a14247f1" [[package]] name = "downcast-rs" -version = "1.2.1" +version = "2.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "75b325c5dbd37f80359721ad39aca5a29fb04c89279657cffdda8736d0c0b9d2" +checksum = "ea8a8b81cacc08888170eef4d13b775126db426d0b348bee9d18c2c1eaf123cf" [[package]] name = "duration-str" @@ -5983,6 +6055,29 @@ version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cb56e1aa765b4b4f3aadfab769793b7087bb03a4ea4920644a6d238e2df5b9ed" +[[package]] +name = "include-flate" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "df49c16750695486c1f34de05da5b7438096156466e7f76c38fcdf285cf0113e" +dependencies = [ + "include-flate-codegen", + "lazy_static", + "libflate", +] + +[[package]] +name = "include-flate-codegen" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8c5b246c6261be723b85c61ecf87804e8ea4a35cb68be0ff282ed84b95ffe7d7" +dependencies = [ + "libflate", + "proc-macro2", + "quote", + "syn 2.0.100", +] + [[package]] name = "include_dir" version = "0.7.4" @@ -6171,9 +6266,6 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e0242819d153cba4b4b05a5a8f2a7e9bbf97b6055b2a002b395c96b5ff3c0222" dependencies = [ "cfg-if", - "js-sys", - "wasm-bindgen", - "web-sys", ] [[package]] @@ -6303,17 +6395,25 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "49f1f14873335454500d59611f1cf4a4b0f786f9ac11f4312a78e4cf2566695b" [[package]] -name = "jieba-rs" -version = "0.7.0" +name = "jieba-macros" +version = "0.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c1e2b0210dc78b49337af9e49d7ae41a39dceac6e5985613f1cf7763e2f76a25" +checksum = "6105f38f083bb1a79ad523bd32fa0d8ffcb6abd2fc4da9da203c32bca5b6ace3" +dependencies = [ + "phf_codegen 0.12.1", +] + +[[package]] +name = "jieba-rs" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "47982a320106da83b0c5d6aec0fb83e109f0132b69670b063adaa6fa5b4f3f4a" dependencies = [ "cedarwood", - "derive_builder 0.20.1", "fxhash", - "lazy_static", - "phf", - "phf_codegen", + "include-flate", + "jieba-macros", + "phf 0.12.1", "regex", ] @@ -6754,6 +6854,30 @@ version = "0.2.171" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c19937216e9d3aa9956d9bb8dfc0b0c8beb6058fc4f7a4dc4d850edf86a237d6" +[[package]] +name = "libflate" +version = "2.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "45d9dfdc14ea4ef0900c1cddbc8dcd553fbaacd8a4a282cf4018ae9dd04fb21e" +dependencies = [ + "adler32", + "core2", + "crc32fast", + "dary_heap", + "libflate_lz77", +] + +[[package]] +name = "libflate_lz77" +version = "2.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e6e0d73b369f386f1c44abd9c570d5318f55ccde816ff4b562fa452e5182863d" +dependencies = [ + "core2", + "hashbrown 0.14.5", + "rle-decode-fast", +] + [[package]] name = "libfuzzer-sys" version = "0.4.7" @@ -6784,7 +6908,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4979f22fdb869068da03c9f7528f8297c6fd2606bc3a4affe42e6a823fdb8da4" dependencies = [ "cfg-if", - "windows-targets 0.48.5", + "windows-targets 0.52.6", ] [[package]] @@ -7145,11 +7269,10 @@ checksum = "490cc448043f947bae3cbee9c203358d62dbee0db12107a74be5c30ccfd09771" [[package]] name = "measure_time" -version = "0.8.3" +version = "0.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dbefd235b0aadd181626f281e1d684e116972988c14c264e42069d5e8a5775cc" +checksum = "51c55d61e72fc3ab704396c5fa16f4c184db37978ae4e94ca8959693a235fc0e" dependencies = [ - "instant", "log", ] @@ -8706,9 +8829,9 @@ checksum = "b15813163c1d831bf4a13c3610c05c0d03b39feb07f7e09fa234dac9b15aaf39" [[package]] name = "ownedbytes" -version = "0.7.0" +version = "0.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c3a059efb063b8f425b948e042e6b9bd85edfe60e913630ed727b23e2dfcc558" +checksum = "2fbd56f7631767e61784dc43f8580f403f4475bd4aaa4da003e6295e1bab4a7e" dependencies = [ "stable_deref_trait", ] @@ -9065,7 +9188,17 @@ version = "0.11.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ade2d8b8f33c7333b51bcf0428d37e217e9f32192ae4772156f65063b8ce03dc" dependencies = [ - "phf_shared", + "phf_shared 0.11.2", +] + +[[package]] +name = "phf" +version = "0.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "913273894cec178f401a31ec4b656318d95473527be05c0752cc41cdc32be8b7" +dependencies = [ + "phf_shared 0.12.1", + "serde", ] [[package]] @@ -9074,8 +9207,18 @@ version = "0.11.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e8d39688d359e6b34654d328e262234662d16cc0f60ec8dcbe5e718709342a5a" dependencies = [ - "phf_generator", - "phf_shared", + "phf_generator 0.11.2", + "phf_shared 0.11.2", +] + +[[package]] +name = "phf_codegen" +version = "0.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "efbdcb6f01d193b17f0b9c3360fa7e0e620991b193ff08702f78b3ce365d7e61" +dependencies = [ + "phf_generator 0.12.1", + "phf_shared 0.12.1", ] [[package]] @@ -9084,10 +9227,20 @@ version = "0.11.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "48e4cc64c2ad9ebe670cb8fd69dd50ae301650392e81c05f9bfcb2d5bdbc24b0" dependencies = [ - "phf_shared", + "phf_shared 0.11.2", "rand 0.8.5", ] +[[package]] +name = "phf_generator" +version = "0.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2cbb1126afed61dd6368748dae63b1ee7dc480191c6262a3b4ff1e29d86a6c5b" +dependencies = [ + "fastrand", + "phf_shared 0.12.1", +] + [[package]] name = "phf_shared" version = "0.11.2" @@ -9097,6 +9250,15 @@ dependencies = [ "siphasher 0.3.11", ] +[[package]] +name = "phf_shared" +version = "0.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "06005508882fb681fd97892ecff4b7fd0fee13ef1aa569f8695dae7ab9099981" +dependencies = [ + "siphasher 1.0.1", +] + [[package]] name = "pin-project" version = "1.1.5" @@ -9685,7 +9847,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "be769465445e8c1474e9c5dac2018218498557af32d9ed057325ec9a41ae81bf" dependencies = [ "heck 0.5.0", - "itertools 0.11.0", + "itertools 0.14.0", "log", "multimap", "once_cell", @@ -9731,7 +9893,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8a56d757972c98b346a9b766e3f02746cde6dd1cd1d1d563472929fdd74bec4d" dependencies = [ "anyhow", - "itertools 0.11.0", + "itertools 0.14.0", "proc-macro2", "quote", "syn 2.0.100", @@ -10309,9 +10471,9 @@ dependencies = [ [[package]] name = "regex" -version = "1.11.0" +version = "1.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "38200e5ee88914975b69f657f0801b6f6dccafd44fd9326302a4aaeecfacb1d8" +checksum = "b544ef1b4eac5dc2db33ea63606ae9ffcfac26c1416a2806ae0bf5f56b201191" dependencies = [ "aho-corasick", "memchr", @@ -10584,6 +10746,12 @@ dependencies = [ "syn 1.0.109", ] +[[package]] +name = "rle-decode-fast" +version = "1.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3582f63211428f83597b51b2ddb88e2a91a9d52d12831f9d08f5e624e8977422" + [[package]] name = "roaring" version = "0.10.9" @@ -11152,9 +11320,9 @@ checksum = "a3f0bf26fd526d2a95683cd0f87bf103b8539e2ca1ef48ce002d67aad59aa0b4" [[package]] name = "serde" -version = "1.0.217" +version = "1.0.219" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "02fc4265df13d6fa1d00ecff087228cc0a2b5f3c0e87e258d8b94a156e984c70" +checksum = "5f0e2c6ed6606019b4e29e69dbaba95b11854410e5347d525002456dbbb786b6" dependencies = [ "serde_derive", ] @@ -11171,9 +11339,9 @@ dependencies = [ [[package]] name = "serde_derive" -version = "1.0.217" +version = "1.0.219" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5a9bf7cf98d04a2b28aead066b7496853d4779c9cc183c440dbac457641e19a0" +checksum = "5b0276cf7f2c73365f7157c8123c21cd9a50fbbd844757af28ca1f5925fc2a00" dependencies = [ "proc-macro2", "quote", @@ -11628,9 +11796,9 @@ checksum = "56199f7ddabf13fe5074ce809e7d3f42b42ae711800501b5b16ea82ad029c39d" [[package]] name = "sketches-ddsketch" -version = "0.2.2" +version = "0.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "85636c14b73d81f541e525f585c0a2109e6744e1565b5c1668e31c70c10ed65c" +checksum = "c1e9a774a6c28142ac54bb25d25562e6bcf957493a184f15ad4eebccb23e410a" dependencies = [ "serde", ] @@ -12200,7 +12368,7 @@ checksum = "bf776ba3fa74f83bf4b63c3dcbbf82173db2632ed8452cb2d891d33f459de70f" dependencies = [ "new_debug_unreachable", "parking_lot 0.12.3", - "phf_shared", + "phf_shared 0.11.2", "precomputed-hash", ] @@ -12559,14 +12727,15 @@ checksum = "7b2093cf4c8eb1e67749a6762251bc9cd836b6fc171623bd0a9d324d37af2417" [[package]] name = "tantivy" -version = "0.22.0" +version = "0.24.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f8d0582f186c0a6d55655d24543f15e43607299425c5ad8352c242b914b31856" +checksum = "64a966cb0e76e311f09cf18507c9af192f15d34886ee43d7ba7c7e3803660c43" dependencies = [ "aho-corasick", "arc-swap", "base64 0.22.1", "bitpacking", + "bon", "byteorder", "census", "crc32fast", @@ -12576,20 +12745,20 @@ dependencies = [ "fnv", "fs4", "htmlescape", - "itertools 0.12.1", + "hyperloglogplus", + "itertools 0.14.0", "levenshtein_automata", "log", "lru", "lz4_flex", "measure_time", "memmap2", - "num_cpus", "once_cell", "oneshot", "rayon", "regex", "rust-stemmers", - "rustc-hash 1.1.0", + "rustc-hash 2.0.0", "serde", "serde_json", "sketches-ddsketch", @@ -12602,7 +12771,7 @@ dependencies = [ "tantivy-stacker", "tantivy-tokenizer-api", "tempfile", - "thiserror 1.0.64", + "thiserror 2.0.12", "time", "uuid", "winapi", @@ -12611,22 +12780,22 @@ dependencies = [ [[package]] name = "tantivy-bitpacker" -version = "0.6.0" +version = "0.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "284899c2325d6832203ac6ff5891b297fc5239c3dc754c5bc1977855b23c10df" +checksum = "1adc286a39e089ae9938935cd488d7d34f14502544a36607effd2239ff0e2494" dependencies = [ "bitpacking", ] [[package]] name = "tantivy-columnar" -version = "0.3.0" +version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "12722224ffbe346c7fec3275c699e508fd0d4710e629e933d5736ec524a1f44e" +checksum = "6300428e0c104c4f7db6f95b466a6f5c1b9aece094ec57cdd365337908dc7344" dependencies = [ "downcast-rs", "fastdivide", - "itertools 0.12.1", + "itertools 0.14.0", "serde", "tantivy-bitpacker", "tantivy-common", @@ -12636,9 +12805,9 @@ dependencies = [ [[package]] name = "tantivy-common" -version = "0.7.0" +version = "0.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8019e3cabcfd20a1380b491e13ff42f57bb38bf97c3d5fa5c07e50816e0621f4" +checksum = "e91b6ea6090ce03dc72c27d0619e77185d26cc3b20775966c346c6d4f7e99d7f" dependencies = [ "async-trait", "byteorder", @@ -12660,9 +12829,9 @@ dependencies = [ [[package]] name = "tantivy-jieba" -version = "0.11.0" +version = "0.16.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0f2fe65c125f0d76d06f0f2ce9fbb9287b53f0dafb51a6270d984a840e2f16c1" +checksum = "3b08147cc130e323ecc522117927b198bec617fe1df562a0b6449905858d0363" dependencies = [ "jieba-rs", "lazy_static", @@ -12671,19 +12840,23 @@ dependencies = [ [[package]] name = "tantivy-query-grammar" -version = "0.22.0" +version = "0.24.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "847434d4af57b32e309f4ab1b4f1707a6c566656264caa427ff4285c4d9d0b82" +checksum = "e810cdeeebca57fc3f7bfec5f85fdbea9031b2ac9b990eb5ff49b371d52bbe6a" dependencies = [ "nom", + "serde", + "serde_json", ] [[package]] name = "tantivy-sstable" -version = "0.3.0" +version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c69578242e8e9fc989119f522ba5b49a38ac20f576fc778035b96cc94f41f98e" +checksum = "709f22c08a4c90e1b36711c1c6cad5ae21b20b093e535b69b18783dd2cb99416" dependencies = [ + "futures-util", + "itertools 0.14.0", "tantivy-bitpacker", "tantivy-common", "tantivy-fst", @@ -12692,9 +12865,9 @@ dependencies = [ [[package]] name = "tantivy-stacker" -version = "0.3.0" +version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c56d6ff5591fc332739b3ce7035b57995a3ce29a93ffd6012660e0949c956ea8" +checksum = "2bcdebb267671311d1e8891fd9d1301803fdb8ad21ba22e0a30d0cab49ba59c1" dependencies = [ "murmurhash32", "rand_distr", @@ -12703,9 +12876,9 @@ dependencies = [ [[package]] name = "tantivy-tokenizer-api" -version = "0.3.0" +version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2a0dcade25819a89cfe6f17d932c9cedff11989936bf6dd4f336d50392053b04" +checksum = "dfa942fcee81e213e09715bbce8734ae2180070b97b33839a795ba1de201547d" dependencies = [ "serde", ] @@ -13168,7 +13341,7 @@ dependencies = [ "log", "parking_lot 0.12.3", "percent-encoding", - "phf", + "phf 0.11.2", "pin-project-lite", "postgres-protocol", "postgres-types", @@ -14376,7 +14549,7 @@ version = "0.1.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cf221c93e13a30d793f7645a0e7762c55d169dbb0a49671918a2319d289b10bb" dependencies = [ - "windows-sys 0.48.0", + "windows-sys 0.59.0", ] [[package]] diff --git a/src/index/Cargo.toml b/src/index/Cargo.toml index c4b7057895..e0732ded4d 100644 --- a/src/index/Cargo.toml +++ b/src/index/Cargo.toml @@ -22,7 +22,7 @@ fst.workspace = true futures.workspace = true greptime-proto.workspace = true itertools.workspace = true -jieba-rs = "0.7" +jieba-rs = "0.8" lazy_static.workspace = true mockall.workspace = true pin-project.workspace = true @@ -34,8 +34,8 @@ roaring = "0.10" serde.workspace = true serde_json.workspace = true snafu.workspace = true -tantivy = { version = "0.22", features = ["zstd-compression"] } -tantivy-jieba = "0.11.0" +tantivy = { version = "0.24", features = ["zstd-compression"] } +tantivy-jieba = "0.16" tokio.workspace = true tokio-util.workspace = true uuid.workspace = true