From 29cc209b94d858b99d9b67f83baf4b146f40cc18 Mon Sep 17 00:00:00 2001 From: tuna2134 Date: Mon, 9 Sep 2024 17:01:27 +0000 Subject: [PATCH] =?UTF-8?q?g2p=E5=AE=8C=E6=88=90=EF=BC=9F?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- sbv2_core/src/text.rs | 84 ++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 80 insertions(+), 4 deletions(-) diff --git a/sbv2_core/src/text.rs b/sbv2_core/src/text.rs index 748b0d6..1e0a858 100644 --- a/sbv2_core/src/text.rs +++ b/sbv2_core/src/text.rs @@ -110,7 +110,7 @@ impl JTalkProcess { } } - pub fn g2p(&self) -> Result<()> { + pub fn g2p(&self) -> Result<(Vec, Vec, Vec)> { let phone_tone_list_wo_punct = self.g2phone_tone_wo_punct()?; let (seq_text, seq_kata) = self.text_to_seq_kata()?; let sep_phonemes = JTalkProcess::handle_long( @@ -119,9 +119,85 @@ impl JTalkProcess { .map(|x| JTalkProcess::kata_to_phoneme_list(x.clone()).unwrap()) .collect(), ); - println!("{:?}", sep_phonemes); - println!("{:?}", seq_kata); - Ok(()) + // println!("{:?}", sep_phonemes); + let phone_w_punct: Vec = sep_phonemes + .iter() + .flat_map(|x| x.iter()) + .map(|x| x.clone()) + .collect(); + // println!("{:?}", phone_w_punct); + + let mut phone_tone_list = + JTalkProcess::align_tones(phone_w_punct, phone_tone_list_wo_punct)?; + println!("{:?}", phone_tone_list); + + let mut sep_tokenized: Vec> = Vec::new(); + for i in 0..seq_text.len() { + let text = seq_text[i].clone(); + if !PUNCTUATIONS.contains(&text.as_str()) { + sep_tokenized.push(text.chars().map(|x| x.to_string()).collect()); + } else { + sep_tokenized.push(vec![text]); + } + } + + let mut word2ph = Vec::new(); + for (token, phoneme) in sep_tokenized.iter().zip(sep_phonemes.iter()) { + let phone_len = phoneme.len() as i32; + let word_len = token.len() as i32; + word2ph.extend(JTalkProcess::distribute_phone(phone_len, word_len)); + } + + let mut new_phone_tone_list = vec![("_".to_string(), 0)]; + new_phone_tone_list.append(&mut phone_tone_list); + new_phone_tone_list.push(("_".to_string(), 0)); + + let mut word2ph = vec![1]; + word2ph.append(&mut word2ph.clone()); + word2ph.push(1); + + let phones: Vec = new_phone_tone_list + .iter() + .map(|(x, _)| x.clone()) + .collect(); + let tones: Vec = new_phone_tone_list.iter().map(|(_, x)| *x).collect(); + + Ok((phones, tones, word2ph)) + } + + fn distribute_phone(n_phone: i32, n_word: i32) -> Vec { + let mut phones_per_word = vec![0; n_word as usize]; + for _ in 0..n_phone { + let min_task = phones_per_word.iter().min().unwrap(); + let min_index = phones_per_word + .iter() + .position(|&x| x == *min_task) + .unwrap(); + phones_per_word[min_index] += 1; + } + phones_per_word + } + + fn align_tones( + phone_with_punct: Vec, + phone_tone_list: Vec<(String, i32)>, + ) -> Result> { + let mut result: Vec<(String, i32)> = Vec::new(); + let mut tone_index = 0; + for phone in phone_with_punct { + if tone_index >= phone_tone_list.len() { + result.push((phone, 0)); + } else if phone == phone_tone_list[tone_index].0 { + result.push((phone, phone_tone_list[tone_index].1)); + tone_index += 1; + } else if PUNCTUATIONS.contains(&phone.as_str()) { + result.push((phone, 0)); + } else { + return Err(Error::ValueError(format!("Mismatched phoneme: {}", phone))); + } + } + + Ok(result) } fn handle_long(mut sep_phonemes: Vec>) -> Vec> {