From a5138fc5b61608a5c34a292652367d2c5082cd88 Mon Sep 17 00:00:00 2001 From: tuna2134 Date: Mon, 9 Sep 2024 15:31:25 +0000 Subject: [PATCH] fixed --- sbv2_core/src/lib.rs | 2 +- sbv2_core/src/mora.rs | 22 +++++++++++++-------- sbv2_core/src/text.rs | 46 +++++++++++++++++++++++++++++++++---------- 3 files changed, 51 insertions(+), 19 deletions(-) diff --git a/sbv2_core/src/lib.rs b/sbv2_core/src/lib.rs index 476b053..b035dd1 100644 --- a/sbv2_core/src/lib.rs +++ b/sbv2_core/src/lib.rs @@ -1,8 +1,8 @@ pub mod bert; pub mod error; +pub mod mora; pub mod norm; pub mod text; -pub mod mora; pub fn add(left: usize, right: usize) -> usize { left + right diff --git a/sbv2_core/src/mora.rs b/sbv2_core/src/mora.rs index 6feb701..de7f54f 100644 --- a/sbv2_core/src/mora.rs +++ b/sbv2_core/src/mora.rs @@ -1,5 +1,5 @@ -use serde::{Deserialize, Serialize}; use once_cell::sync::Lazy; +use serde::{Deserialize, Serialize}; use std::collections::HashMap; #[derive(Debug, Serialize, Deserialize)] @@ -25,10 +25,16 @@ static MORA_LIST_ADDITIONAL: Lazy> = Lazy::new(|| { data.additional }); -pub static MORA_KATA_TO_MORA_PHONEMES: Lazy, String)>> = Lazy::new(|| { - let mut map = HashMap::new(); - for mora in MORA_LIST_MINIMUM.iter().chain(MORA_LIST_ADDITIONAL.iter()) { - map.insert(mora.mora.clone(), (mora.consonant.clone(), mora.vowel.clone())); - } - map -}); \ No newline at end of file +pub static MORA_KATA_TO_MORA_PHONEMES: Lazy, String)>> = + Lazy::new(|| { + let mut map = HashMap::new(); + for mora in MORA_LIST_MINIMUM.iter().chain(MORA_LIST_ADDITIONAL.iter()) { + map.insert( + mora.mora.clone(), + (mora.consonant.clone(), mora.vowel.clone()), + ); + } + map + }); + +pub const VOWELS: [&str; 6] = ["a", "i", "u", "e", "o", "N"]; diff --git a/sbv2_core/src/text.rs b/sbv2_core/src/text.rs index 47fbf0b..54d0f74 100644 --- a/sbv2_core/src/text.rs +++ b/sbv2_core/src/text.rs @@ -1,6 +1,6 @@ use crate::error::{Error, Result}; +use crate::mora::{MORA_KATA_TO_MORA_PHONEMES, VOWELS}; use crate::norm::{replace_punctuation, PUNCTUATIONS}; -use crate::mora::MORA_KATA_TO_MORA_PHONEMES; use jpreprocess::*; use once_cell::sync::Lazy; use regex::Regex; @@ -113,23 +113,44 @@ impl JTalkProcess { pub fn g2p(&self) -> Result<()> { let phone_tone_list_wo_punct = self.g2phone_tone_wo_punct()?; let (seq_text, seq_kata) = self.text_to_seq_kata()?; - println!("{:?}", seq_text); + let sep_phonemes = JTalkProcess::handle_long( + seq_kata + .iter() + .map(|x| JTalkProcess::kata_to_phoneme_list(x.clone()).unwrap()) + .collect(), + ); + println!("{:?}", sep_phonemes); println!("{:?}", seq_kata); Ok(()) } - fn handle_long(sep_phonemes: Vec>) -> Vec> { - for (i, phonemes) in sep_phonemes.iter().enumerate() { - if phonemes.len() == 0 { + fn handle_long(mut sep_phonemes: Vec>) -> Vec> { + for i in 0..sep_phonemes.len() { + if sep_phonemes[i].len() == 0 { continue; } - if phonemes[0] == "ー" { + if sep_phonemes[i][0] == "ー" { if i != 0 { let prev_phoneme = sep_phonemes[i - 1].last().unwrap(); + if VOWELS.contains(&prev_phoneme.as_str()) { + sep_phonemes[i][0] = prev_phoneme.clone(); + } else { + sep_phonemes[i][0] = "ー".to_string(); + } + } else { + sep_phonemes[i][0] = "ー".to_string(); + } + } + if sep_phonemes[i].contains(&"ー".to_string()) { + for e in 0..sep_phonemes[i].len() { + if sep_phonemes[i][e] == "ー" { + sep_phonemes[i][e] = + sep_phonemes[i][e - 1].chars().last().unwrap().to_string(); + } } } } - vec![] + sep_phonemes } fn kata_to_phoneme_list(mut text: String) -> Result> { @@ -157,8 +178,11 @@ impl JTalkProcess { if PUNCTUATIONS.contains(&text.as_str()) { return Ok(text.chars().map(|x| x.to_string()).collect()); } - if KATAKANA_PATTERN.is_match(&text) { - return Err(Error::ValueError(format!("Input must be katakana only: {}", text))); + if !KATAKANA_PATTERN.is_match(&text) { + return Err(Error::ValueError(format!( + "Input must be katakana only: {}", + text + ))); } fn mora2phonemes(mora: &str) -> String { @@ -182,7 +206,9 @@ impl JTalkProcess { } result }; - text = LONG_PATTERN.replace_all(&text, long_replacement).to_string(); + text = LONG_PATTERN + .replace_all(&text, long_replacement) + .to_string(); return Ok(text.trim().split(" ").map(|x| x.to_string()).collect()); }