diff --git a/Cargo.lock b/Cargo.lock index 83bd615..6f67a47 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -17,6 +17,20 @@ version = "2.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "512761e0bb2578dd7380c6baaa0f4ce03e84f95e960231d1dec8bf4d7d6e2627" +[[package]] +name = "ahash" +version = "0.8.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5a15f179cd60c4584b8a8c596927aadc462e27f2ca70c04e0071964a73ba7a75" +dependencies = [ + "cfg-if", + "getrandom 0.3.2", + "once_cell", + "serde", + "version_check", + "zerocopy", +] + [[package]] name = "aho-corasick" version = "1.1.3" @@ -244,6 +258,15 @@ version = "1.10.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d71b6127be86fdcfddb610f7182ac57211d4b18a3e9c82eb2d17662f2227ad6a" +[[package]] +name = "castaway" +version = "0.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0abae9be0aaf9ea96a3b1b8b1b55c602ca751eba1b1500220cea4ecbafe7c0d5" +dependencies = [ + "rustversion", +] + [[package]] name = "cc" version = "1.2.21" @@ -267,6 +290,21 @@ version = "1.0.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5b63caa9aa9397e2d9480a9b13673856c78d8ac123288526c37d7839f2a86990" +[[package]] +name = "compact_str" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3fdb1325a1cece981e8a296ab8f0f9b63ae357bd0784a9faaf548cc7b480707a" +dependencies = [ + "castaway", + "cfg-if", + "itoa", + "rustversion", + "ryu", + "serde", + "static_assertions", +] + [[package]] name = "console" version = "0.15.11" @@ -405,6 +443,15 @@ dependencies = [ "syn", ] +[[package]] +name = "dary_heap" +version = "0.3.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "04d2cd9c18b9f454ed67da600630b021a8a80bf33f8c95896ab33aaf1c26b728" +dependencies = [ + "serde", +] + [[package]] name = "der" version = "0.7.10" @@ -761,10 +808,8 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "335ff9f135e4384c8150d6f27c6daed433577f86b4750418338c01a1a2528592" dependencies = [ "cfg-if", - "js-sys", "libc", "wasi 0.11.0+wasi-snapshot-preview1", - "wasm-bindgen", ] [[package]] @@ -774,9 +819,11 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "73fea8450eea4bac3940448fb7ae50d91f034f941199fcd9d909a5a07aa455f0" dependencies = [ "cfg-if", + "js-sys", "libc", "r-efi", "wasi 0.14.2+wasi-0.2.4", + "wasm-bindgen", ] [[package]] @@ -1122,18 +1169,9 @@ checksum = "7943c866cc5cd64cbc25b2e01621d07fa8eb2a1a23160ee81ce38704e97b8ecf" [[package]] name = "itertools" -version = "0.11.0" +version = "0.14.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b1c173a5686ce8bfa551b3563d0c2170bf24ca44da99c7ca4bfdab5418c3fe57" -dependencies = [ - "either", -] - -[[package]] -name = "itertools" -version = "0.13.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "413ee7dfc52ee1a4949ceeb7dbc8a33f2d6c088194d9f922fb8318faf1f01186" +checksum = "2b192c782037fadd9cfa75548310488aabdbf3d2da73885b31bd0abd03351285" dependencies = [ "either", ] @@ -1301,12 +1339,6 @@ dependencies = [ "bitflags 1.3.2", ] -[[package]] -name = "lazy_static" -version = "1.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe" - [[package]] name = "libc" version = "0.2.172" @@ -1635,11 +1667,11 @@ checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d" [[package]] name = "onig" -version = "6.4.0" +version = "6.5.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8c4b31c8722ad9171c6d77d3557db078cab2bd50afcc9d09c8b315c59df8ca4f" +checksum = "336b9c63443aceef14bea841b899035ae3abe89b7c486aaf4c5bd8aafedac3f0" dependencies = [ - "bitflags 1.3.2", + "bitflags 2.9.0", "libc", "once_cell", "onig_sys", @@ -1647,9 +1679,9 @@ dependencies = [ [[package]] name = "onig_sys" -version = "69.8.1" +version = "69.9.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7b829e3d7e9cc74c7e315ee8edb185bf4190da5acde74afd7fc59c35b1f086e7" +checksum = "c7f86c6eef3d6df15f23bcfb6af487cbd2fed4e5581d58d5bf1f5f8b7f6727dc" dependencies = [ "cc", "pkg-config", @@ -1835,7 +1867,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3c80231409c20246a13fddb31776fb942c38553c51e871f8cbd687a4cfb5843d" dependencies = [ "phf_shared", - "rand", + "rand 0.8.5", ] [[package]] @@ -2017,19 +2049,27 @@ version = "0.8.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404" dependencies = [ - "libc", + "rand_core 0.6.4", +] + +[[package]] +name = "rand" +version = "0.9.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9fbfd9d094a40bf3ae768db9361049ace4c0e04a4fd6b359518bd7b73a73dd97" +dependencies = [ "rand_chacha", - "rand_core", + "rand_core 0.9.3", ] [[package]] name = "rand_chacha" -version = "0.3.1" +version = "0.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88" +checksum = "d3022b5f1df60f26e1ffddd6c66e8aa15de382ae63b3a0c1bfc0e4d3e3f325cb" dependencies = [ "ppv-lite86", - "rand_core", + "rand_core 0.9.3", ] [[package]] @@ -2037,8 +2077,14 @@ name = "rand_core" version = "0.6.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c" + +[[package]] +name = "rand_core" +version = "0.9.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "99d9a13982dcf210057a8a78572b2217b667c3beacbf3a0d8b454f6f82837d38" dependencies = [ - "getrandom 0.2.16", + "getrandom 0.3.2", ] [[package]] @@ -2059,12 +2105,12 @@ dependencies = [ [[package]] name = "rayon-cond" -version = "0.3.0" +version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "059f538b55efd2309c9794130bc149c6a553db90e9d99c2030785c82f0bd7df9" +checksum = "2964d0cf57a3e7a06e8183d14a8b527195c706b7983549cd5462d5aa3747438f" dependencies = [ "either", - "itertools 0.11.0", + "itertools", "rayon", ] @@ -2521,6 +2567,12 @@ version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a8f112729512f8e442d81f95a8a7ddf2b7c6b8a1a6f509a95864142b30cab2d3" +[[package]] +name = "static_assertions" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f" + [[package]] name = "strsim" version = "0.11.1" @@ -2704,24 +2756,26 @@ checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20" [[package]] name = "tokenizers" -version = "0.21.1" +version = "0.21.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3169b3195f925496c895caee7978a335d49218488ef22375267fba5a46a40bd7" +checksum = "4c3846d8588abed0daba25a0e47edd58ea15e450a6088b2575f5116fdb0b27ca" dependencies = [ + "ahash", "aho-corasick", + "compact_str", + "dary_heap", "derive_builder", "esaxx-rs", "fancy-regex", - "getrandom 0.2.16", + "getrandom 0.3.2", "indicatif", - "itertools 0.13.0", - "lazy_static", + "itertools", "log", "macro_rules_attribute", "monostate", "onig", "paste", - "rand", + "rand 0.9.1", "rayon", "rayon-cond", "regex", diff --git a/crates/sbv2_core/Cargo.toml b/crates/sbv2_core/Cargo.toml index 4c35941..2d2a1db 100644 --- a/crates/sbv2_core/Cargo.toml +++ b/crates/sbv2_core/Cargo.toml @@ -25,7 +25,7 @@ serde = { version = "1.0.210", features = ["derive"] } serde_json = "1.0.128" tar = "0.4.41" thiserror = "2.0.11" -tokenizers = { version = "0.21.0", default-features = false } +tokenizers = { version = "0.21.2", default-features = false } zstd = "0.13.2" [features] diff --git a/crates/sbv2_core/src/jtalk.rs b/crates/sbv2_core/src/jtalk.rs index 778d98d..a4646ab 100644 --- a/crates/sbv2_core/src/jtalk.rs +++ b/crates/sbv2_core/src/jtalk.rs @@ -123,7 +123,7 @@ impl JTalkProcess { .map(|(_letter, tone)| *tone) .collect(); if tone_values.len() == 1 { - assert!(tone_values == hash_set![0], "{:?}", tone_values); + assert!(tone_values == hash_set![0], "{tone_values:?}"); Ok(phone_tone_list) } else if tone_values.len() == 2 { if tone_values == hash_set![0, 1] { @@ -226,12 +226,12 @@ impl JTalkProcess { } else if PUNCTUATIONS.contains(&phone.as_str()) { result.push((phone, 0)); } else { - println!("phones {:?}", phone_with_punct); - println!("phone_tone_list: {:?}", phone_tone_list); - println!("result: {:?}", result); - println!("tone_index: {:?}", tone_index); - println!("phone: {:?}", phone); - return Err(Error::ValueError(format!("Mismatched phoneme: {}", phone))); + println!("phones {phone_with_punct:?}"); + println!("phone_tone_list: {phone_tone_list:?}"); + println!("result: {result:?}"); + println!("tone_index: {tone_index:?}"); + println!("phone: {phone:?}"); + return Err(Error::ValueError(format!("Mismatched phoneme: {phone}"))); } } @@ -276,8 +276,7 @@ impl JTalkProcess { } if !KATAKANA_PATTERN.is_match(&text) { return Err(Error::ValueError(format!( - "Input must be katakana only: {}", - text + "Input must be katakana only: {text}" ))); } @@ -285,7 +284,7 @@ impl JTalkProcess { let mora = mora.to_string(); let (consonant, vowel) = MORA_KATA_TO_MORA_PHONEMES.get(&mora).unwrap(); if consonant.is_none() { - text = text.replace(&mora, &format!(" {}", vowel)); + text = text.replace(&mora, &format!(" {vowel}")); } else { text = text.replace( &mora, @@ -319,7 +318,7 @@ impl JTalkProcess { let (string, pron) = self.parse_to_string_and_pron(parts.clone()); let mut yomi = pron.replace('’', ""); let word = replace_punctuation(string); - assert!(!yomi.is_empty(), "Empty yomi: {}", word); + assert!(!yomi.is_empty(), "Empty yomi: {word}"); if yomi == "、" { if !word .chars() @@ -330,7 +329,7 @@ impl JTalkProcess { yomi = word.clone(); } } else if yomi == "?" { - assert!(word == "?", "yomi `?` comes from: {}", word); + assert!(word == "?", "yomi `?` comes from: {word}"); yomi = "?".to_string(); } seq_text.push(word);