diff --git a/crates/sbv2_core/src/jtalk.rs b/crates/sbv2_core/src/jtalk.rs index 8359784..594c14d 100644 --- a/crates/sbv2_core/src/jtalk.rs +++ b/crates/sbv2_core/src/jtalk.rs @@ -1,5 +1,5 @@ use crate::error::{Error, Result}; -use crate::mora::{MORA_KATA_TO_MORA_PHONEMES, VOWELS}; +use crate::mora::{CONSONANTS, MORA_KATA_TO_MORA_PHONEMES, MORA_PHONEMES_TO_MORA_KATA, VOWELS}; use crate::norm::{replace_punctuation, PUNCTUATIONS}; use jpreprocess::{kind, DefaultTokenizer, JPreprocess, SystemDictionaryConfig, UserDictionary}; use once_cell::sync::Lazy; @@ -76,6 +76,30 @@ static MORA_PATTERN: Lazy> = Lazy::new(|| { }); static LONG_PATTERN: Lazy = Lazy::new(|| Regex::new(r"(\w)(ー*)").unwrap()); +fn phone_tone_to_kana(phones: Vec, tones: Vec) { + let mut results = Vec::new(); + let mut current_mora = String::new(); + for ((phone, next_phone), (tone, next_tone)) in phones + .iter() + .zip(phones.iter().skip(1)) + .zip(tones.iter().zip(tones.iter().skip(1))) + { + if PUNCTUATIONS.contains(&phone.clone().as_str()) { + results.push((phone, tone)); + continue; + } + if CONSONANTS.contains(&phone.clone()) { + assert_eq!(current_mora, ""); + assert_eq!(tone, next_tone); + current_mora = phone.to_string() + } else { + current_mora += phone; + results.push((MORA_PHONEMES_TO_MORA_KATA.get(¤t_mora).unwrap(), tone)); + current_mora = String::new(); + } + } +} + pub struct JTalkProcess { jpreprocess: Arc, parsed: Vec, diff --git a/crates/sbv2_core/src/mora.rs b/crates/sbv2_core/src/mora.rs index de7f54f..4becd67 100644 --- a/crates/sbv2_core/src/mora.rs +++ b/crates/sbv2_core/src/mora.rs @@ -25,6 +25,21 @@ static MORA_LIST_ADDITIONAL: Lazy> = Lazy::new(|| { data.additional }); +pub static MORA_PHONEMES_TO_MORA_KATA: Lazy> = Lazy::new(|| { + let mut map = HashMap::new(); + for mora in MORA_LIST_MINIMUM.iter() { + map.insert( + format!( + "{}{}", + mora.consonant.clone().unwrap_or("".to_string()), + mora.vowel + ), + mora.mora.clone(), + ); + } + map +}); + pub static MORA_KATA_TO_MORA_PHONEMES: Lazy, String)>> = Lazy::new(|| { let mut map = HashMap::new(); @@ -37,4 +52,12 @@ pub static MORA_KATA_TO_MORA_PHONEMES: Lazy, Str map }); +pub static CONSONANTS: Lazy> = Lazy::new(|| { + let consonants = MORA_KATA_TO_MORA_PHONEMES + .values() + .filter_map(|(consonant, _)| consonant.clone()) + .collect::>(); + consonants +}); + pub const VOWELS: [&str; 6] = ["a", "i", "u", "e", "o", "N"]; diff --git a/crates/sbv2_voicevox/src/main.rs b/crates/sbv2_voicevox/src/main.rs index 3c4e757..e552d6c 100644 --- a/crates/sbv2_voicevox/src/main.rs +++ b/crates/sbv2_voicevox/src/main.rs @@ -1,7 +1,7 @@ -use axum::{routing::get, Router, extract::Query}; +use axum::{extract::Query, routing::get, Router}; use sbv2_core::{jtalk::JTalk, tts_util::preprocess_parse_text}; -use tokio::net::TcpListener; use serde::Deserialize; +use tokio::net::TcpListener; use error::AppResult; @@ -12,10 +12,9 @@ struct RequestCreateAudioQuery { text: String, } -async fn create_audio_query( - Query(request): Query, -) -> AppResult<()> { - let (phones, tones, mut word2ph, normalized_text, process) = preprocess_parse_text(&request.text, &JTalk::new()?)?; +async fn create_audio_query(Query(request): Query) -> AppResult<()> { + let (phones, tones, _, normalized_text, process) = + preprocess_parse_text(&request.text, &JTalk::new()?)?; println!("{:?}", phones); Ok(()) }