diff --git a/crates/sbv2_core/src/jtalk.rs b/crates/sbv2_core/src/jtalk.rs index 7ef7232..c242f65 100644 --- a/crates/sbv2_core/src/jtalk.rs +++ b/crates/sbv2_core/src/jtalk.rs @@ -196,6 +196,11 @@ impl JTalkProcess { Ok((phones, tones, new_word2ph)) } + pub fn g2kana_tone(&self) -> Result> { + let (phones, tones, _) = self.g2p()?; + Ok(phone_tone_to_kana(phones, tones)) + } + fn distribute_phone(n_phone: i32, n_word: i32) -> Vec { let mut phones_per_word = vec![0; n_word as usize]; for _ in 0..n_phone { diff --git a/crates/sbv2_core/src/tts_util.rs b/crates/sbv2_core/src/tts_util.rs index 1334128..58cfbeb 100644 --- a/crates/sbv2_core/src/tts_util.rs +++ b/crates/sbv2_core/src/tts_util.rs @@ -10,13 +10,12 @@ use tokenizers::Tokenizer; pub fn preprocess_parse_text( text: &str, jtalk: &jtalk::JTalk, -) -> Result<(Vec, Vec, Vec, String, JTalkProcess)> { +) -> Result<(String, JTalkProcess)> { let text = jtalk.num2word(text)?; let normalized_text = norm::normalize_text(&text); let process = jtalk.process_text(&normalized_text)?; - let (phones, tones, word2ph) = process.g2p()?; - Ok((phones, tones, word2ph, normalized_text, process)) + Ok((normalized_text, process)) } /// Parse text and return the input for synthesize @@ -35,8 +34,8 @@ pub async fn parse_text( Box>>>, >, ) -> Result<(Array2, Array1, Array1, Array1)> { - let (phones, tones, mut word2ph, normalized_text, process) = - preprocess_parse_text(text, jtalk)?; + let (normalized_text, process) = preprocess_parse_text(text, jtalk)?; + let (phones, tones, mut word2ph) = process.g2p()?; let (phones, tones, lang_ids) = nlp::cleaned_text_to_sequence(phones, tones); let phones = utils::intersperse(&phones, 0); diff --git a/crates/sbv2_voicevox/src/main.rs b/crates/sbv2_voicevox/src/main.rs index e552d6c..2e21525 100644 --- a/crates/sbv2_voicevox/src/main.rs +++ b/crates/sbv2_voicevox/src/main.rs @@ -13,9 +13,8 @@ struct RequestCreateAudioQuery { } async fn create_audio_query(Query(request): Query) -> AppResult<()> { - let (phones, tones, _, normalized_text, process) = - preprocess_parse_text(&request.text, &JTalk::new()?)?; - println!("{:?}", phones); + let (normalized_text, process) = preprocess_parse_text(&request.text, &JTalk::new()?)?; + let kana_tone_list = process.g2kana_tone()?; Ok(()) }