feat: phone_tone_to_kana

This commit is contained in:
Masato Kikuchi
2025-03-27 13:17:20 +09:00
parent 22ed557395
commit e915e2bc84
3 changed files with 53 additions and 7 deletions

View File

@@ -1,5 +1,5 @@
use crate::error::{Error, Result};
use crate::mora::{MORA_KATA_TO_MORA_PHONEMES, VOWELS};
use crate::mora::{CONSONANTS, MORA_KATA_TO_MORA_PHONEMES, MORA_PHONEMES_TO_MORA_KATA, VOWELS};
use crate::norm::{replace_punctuation, PUNCTUATIONS};
use jpreprocess::{kind, DefaultTokenizer, JPreprocess, SystemDictionaryConfig, UserDictionary};
use once_cell::sync::Lazy;
@@ -76,6 +76,30 @@ static MORA_PATTERN: Lazy<Vec<String>> = Lazy::new(|| {
});
static LONG_PATTERN: Lazy<Regex> = Lazy::new(|| Regex::new(r"(\w)(ー*)").unwrap());
fn phone_tone_to_kana(phones: Vec<String>, tones: Vec<i32>) {
let mut results = Vec::new();
let mut current_mora = String::new();
for ((phone, next_phone), (tone, next_tone)) in phones
.iter()
.zip(phones.iter().skip(1))
.zip(tones.iter().zip(tones.iter().skip(1)))
{
if PUNCTUATIONS.contains(&phone.clone().as_str()) {
results.push((phone, tone));
continue;
}
if CONSONANTS.contains(&phone.clone()) {
assert_eq!(current_mora, "");
assert_eq!(tone, next_tone);
current_mora = phone.to_string()
} else {
current_mora += phone;
results.push((MORA_PHONEMES_TO_MORA_KATA.get(&current_mora).unwrap(), tone));
current_mora = String::new();
}
}
}
pub struct JTalkProcess {
jpreprocess: Arc<JPreprocessType>,
parsed: Vec<String>,

View File

@@ -25,6 +25,21 @@ static MORA_LIST_ADDITIONAL: Lazy<Vec<Mora>> = Lazy::new(|| {
data.additional
});
pub static MORA_PHONEMES_TO_MORA_KATA: Lazy<HashMap<String, String>> = Lazy::new(|| {
let mut map = HashMap::new();
for mora in MORA_LIST_MINIMUM.iter() {
map.insert(
format!(
"{}{}",
mora.consonant.clone().unwrap_or("".to_string()),
mora.vowel
),
mora.mora.clone(),
);
}
map
});
pub static MORA_KATA_TO_MORA_PHONEMES: Lazy<HashMap<String, (Option<String>, String)>> =
Lazy::new(|| {
let mut map = HashMap::new();
@@ -37,4 +52,12 @@ pub static MORA_KATA_TO_MORA_PHONEMES: Lazy<HashMap<String, (Option<String>, Str
map
});
pub static CONSONANTS: Lazy<Vec<String>> = Lazy::new(|| {
let consonants = MORA_KATA_TO_MORA_PHONEMES
.values()
.filter_map(|(consonant, _)| consonant.clone())
.collect::<Vec<_>>();
consonants
});
pub const VOWELS: [&str; 6] = ["a", "i", "u", "e", "o", "N"];