From 38f9d98d1a173f54bcaaad4b7b3d2a6128e86ef8 Mon Sep 17 00:00:00 2001 From: tuna2134 Date: Sat, 14 Sep 2024 03:25:53 +0000 Subject: [PATCH] fix bug --- Cargo.lock | 4 ++-- Cargo.toml | 2 +- content.txt | 8 +------- sbv2_core/src/jtalk.rs | 11 +++++++++++ sbv2_core/src/norm.rs | 5 +++-- sbv2_core/src/tts.rs | 3 ++- 6 files changed, 20 insertions(+), 13 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index f75ea99..ecfcf26 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1361,9 +1361,9 @@ dependencies = [ [[package]] name = "once_cell" -version = "1.19.0" +version = "1.20.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3fdb12b2476b595f9358c5161aa467c2438859caa136dec86c26fdd2efe17b92" +checksum = "33ea5043e58958ee56f3e15a90aee535795cd7dfd319846288d93c5b57d85cbe" [[package]] name = "onig" diff --git a/Cargo.toml b/Cargo.toml index 822de4d..3785a69 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -6,4 +6,4 @@ members = ["sbv2_api", "sbv2_core", "sbv2_bindings"] anyhow = "1.0.86" dotenvy = "0.15.7" env_logger = "0.11.5" -ndarray = "0.16.1" \ No newline at end of file +ndarray = "0.16.1" diff --git a/content.txt b/content.txt index 5d7ae0a..9c95a0f 100644 --- a/content.txt +++ b/content.txt @@ -1,7 +1 @@ -日本語を母国語としない人々にとって、「日本語は非常に難しい言語である」と言われています。 -その理由として、 -・漢字、ひらがな、カタカナ、と表記が何種類もある。 -・同一の漢字でも音読みと訓読みがある -・地名の読みが難しい -・主語、述語が省略される -などが挙げられます。 \ No newline at end of file +10,000年前までコロナが流行っていました \ No newline at end of file diff --git a/sbv2_core/src/jtalk.rs b/sbv2_core/src/jtalk.rs index 16dcad5..662980c 100644 --- a/sbv2_core/src/jtalk.rs +++ b/sbv2_core/src/jtalk.rs @@ -54,6 +54,17 @@ impl JTalk { Ok(Self { jpreprocess }) } + pub fn num2word(&self, text: &str) -> Result { + let mut parsed = self.jpreprocess.text_to_njd(text)?; + parsed.preprocess(); + let texts: Vec = parsed + .nodes + .iter() + .map(|x| x.get_string().to_string()) + .collect(); + Ok(texts.join("")) + } + pub fn process_text(&self, text: &str) -> Result { let parsed = self.jpreprocess.run_frontend(text)?; let jtalk_process = JTalkProcess::new(Arc::clone(&self.jpreprocess), parsed); diff --git a/sbv2_core/src/norm.rs b/sbv2_core/src/norm.rs index a79ca05..3b8333a 100644 --- a/sbv2_core/src/norm.rs +++ b/sbv2_core/src/norm.rs @@ -120,7 +120,8 @@ pub fn replace_punctuation(mut text: String) -> String { for (k, v) in REPLACE_MAP.iter() { text = text.replace(k, v); } - PUNCTUATION_CLEANUP_PATTERN + let content = PUNCTUATION_CLEANUP_PATTERN .replace_all(&text, "") - .to_string() + .to_string(); + content } diff --git a/sbv2_core/src/tts.rs b/sbv2_core/src/tts.rs index 314795a..108ef94 100644 --- a/sbv2_core/src/tts.rs +++ b/sbv2_core/src/tts.rs @@ -123,7 +123,8 @@ impl TTSModelHolder { &self, text: &str, ) -> Result<(Array2, Array1, Array1, Array1)> { - let normalized_text = norm::normalize_text(text); + let text = self.jtalk.num2word(text)?; + let normalized_text = norm::normalize_text(&text); let process = self.jtalk.process_text(&normalized_text)?; let (phones, tones, mut word2ph) = process.g2p()?;