mirror of
https://github.com/neodyland/sbv2-api.git
synced 2025-12-26 17:19:58 +00:00
feat: phone_tone_to_kana
This commit is contained in:
@@ -1,5 +1,5 @@
|
||||
use crate::error::{Error, Result};
|
||||
use crate::mora::{MORA_KATA_TO_MORA_PHONEMES, VOWELS};
|
||||
use crate::mora::{CONSONANTS, MORA_KATA_TO_MORA_PHONEMES, MORA_PHONEMES_TO_MORA_KATA, VOWELS};
|
||||
use crate::norm::{replace_punctuation, PUNCTUATIONS};
|
||||
use jpreprocess::{kind, DefaultTokenizer, JPreprocess, SystemDictionaryConfig, UserDictionary};
|
||||
use once_cell::sync::Lazy;
|
||||
@@ -76,6 +76,30 @@ static MORA_PATTERN: Lazy<Vec<String>> = Lazy::new(|| {
|
||||
});
|
||||
static LONG_PATTERN: Lazy<Regex> = Lazy::new(|| Regex::new(r"(\w)(ー*)").unwrap());
|
||||
|
||||
fn phone_tone_to_kana(phones: Vec<String>, tones: Vec<i32>) {
|
||||
let mut results = Vec::new();
|
||||
let mut current_mora = String::new();
|
||||
for ((phone, next_phone), (tone, next_tone)) in phones
|
||||
.iter()
|
||||
.zip(phones.iter().skip(1))
|
||||
.zip(tones.iter().zip(tones.iter().skip(1)))
|
||||
{
|
||||
if PUNCTUATIONS.contains(&phone.clone().as_str()) {
|
||||
results.push((phone, tone));
|
||||
continue;
|
||||
}
|
||||
if CONSONANTS.contains(&phone.clone()) {
|
||||
assert_eq!(current_mora, "");
|
||||
assert_eq!(tone, next_tone);
|
||||
current_mora = phone.to_string()
|
||||
} else {
|
||||
current_mora += phone;
|
||||
results.push((MORA_PHONEMES_TO_MORA_KATA.get(¤t_mora).unwrap(), tone));
|
||||
current_mora = String::new();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub struct JTalkProcess {
|
||||
jpreprocess: Arc<JPreprocessType>,
|
||||
parsed: Vec<String>,
|
||||
|
||||
@@ -25,6 +25,21 @@ static MORA_LIST_ADDITIONAL: Lazy<Vec<Mora>> = Lazy::new(|| {
|
||||
data.additional
|
||||
});
|
||||
|
||||
pub static MORA_PHONEMES_TO_MORA_KATA: Lazy<HashMap<String, String>> = Lazy::new(|| {
|
||||
let mut map = HashMap::new();
|
||||
for mora in MORA_LIST_MINIMUM.iter() {
|
||||
map.insert(
|
||||
format!(
|
||||
"{}{}",
|
||||
mora.consonant.clone().unwrap_or("".to_string()),
|
||||
mora.vowel
|
||||
),
|
||||
mora.mora.clone(),
|
||||
);
|
||||
}
|
||||
map
|
||||
});
|
||||
|
||||
pub static MORA_KATA_TO_MORA_PHONEMES: Lazy<HashMap<String, (Option<String>, String)>> =
|
||||
Lazy::new(|| {
|
||||
let mut map = HashMap::new();
|
||||
@@ -37,4 +52,12 @@ pub static MORA_KATA_TO_MORA_PHONEMES: Lazy<HashMap<String, (Option<String>, Str
|
||||
map
|
||||
});
|
||||
|
||||
pub static CONSONANTS: Lazy<Vec<String>> = Lazy::new(|| {
|
||||
let consonants = MORA_KATA_TO_MORA_PHONEMES
|
||||
.values()
|
||||
.filter_map(|(consonant, _)| consonant.clone())
|
||||
.collect::<Vec<_>>();
|
||||
consonants
|
||||
});
|
||||
|
||||
pub const VOWELS: [&str; 6] = ["a", "i", "u", "e", "o", "N"];
|
||||
|
||||
Reference in New Issue
Block a user