From 27cb54da4fb778b5d858f694fc7f6f4c9de5e970 Mon Sep 17 00:00:00 2001 From: tuna2134 Date: Tue, 10 Sep 2024 00:20:34 +0000 Subject: [PATCH] fixed --- sbv2_core/src/main.rs | 8 ++++---- sbv2_core/src/norm.rs | 9 ++++++++- 2 files changed, 12 insertions(+), 5 deletions(-) diff --git a/sbv2_core/src/main.rs b/sbv2_core/src/main.rs index 9a0f7ab..046c31f 100644 --- a/sbv2_core/src/main.rs +++ b/sbv2_core/src/main.rs @@ -1,14 +1,14 @@ -use sbv2_core::{bert, error, jtalk}; +use sbv2_core::{bert, error, jtalk, nlp, norm}; fn main() -> error::Result<()> { let text = "こんにちは,世界!"; - let normalized_text = jtalk::normalize_text(text); + let normalized_text = norm::normalize_text(text); println!("{}", normalized_text); let jtalk = jtalk::JTalk::new()?; - let (phones, tones, _) = jtalk.g2p(&normalized_text)?; - println!("{:?}", tones); + let (phones, tones, word2ph) = jtalk.g2p(&normalized_text)?; + let (phones, tones, lang_ids) = nlp::cleaned_text_to_sequence(phones, tones); let tokenizer = jtalk::get_tokenizer()?; println!("{:?}", tokenizer); diff --git a/sbv2_core/src/norm.rs b/sbv2_core/src/norm.rs index 5139427..7897d57 100644 --- a/sbv2_core/src/norm.rs +++ b/sbv2_core/src/norm.rs @@ -69,6 +69,13 @@ __PUNCTUATION_CLEANUP_PATTERN = re.compile( ) */ +pub const ZH_SYMBOLS: [&str; 65] = [ + "E", "En", "a", "ai", "an", "ang", "ao", "b", "c", "ch", "d", "e", "ei", "en", "eng", "er", + "f", "g", "h", "i", "i0", "ia", "ian", "iang", "iao", "ie", "in", "ing", "iong", "ir", "iu", + "j", "k", "l", "m", "n", "o", "ong", "ou", "p", "q", "r", "s", "sh", "t", "u", "ua", "uai", + "uan", "uang", "ui", "un", "uo", "v", "van", "ve", "vn", "w", "x", "y", "z", "zh", "AA", "EE", + "OO", +]; pub const JP_SYMBOLS: [&str; 42] = [ "N", "a", "a:", "b", "by", "ch", "d", "dy", "e", "e:", "f", "g", "gy", "h", "hy", "i", "i:", "j", "k", "ky", "m", "my", "n", "ny", "o", "o:", "p", "py", "q", "r", "ry", "s", "sh", "t", @@ -83,7 +90,7 @@ pub static PUNCTUATION_SYMBOLS: Lazy> = Lazy::new(|| { }); const PAD: &str = "_"; pub static SYMBOLS: Lazy> = Lazy::new(|| { - let mut symbols = JP_SYMBOLS.to_vec(); + let mut symbols = vec![PAD]; symbols.append(&mut JP_SYMBOLS.to_vec()); symbols.append(&mut PUNCTUATION_SYMBOLS.to_vec()); symbols