This commit is contained in:
Masato Kikuchi
2025-03-31 23:35:39 +09:00
parent 633dfc305e
commit a5d783bd65
2 changed files with 34 additions and 3 deletions

View File

@@ -2,6 +2,8 @@ use std::io::Cursor;
use crate::error::Result;
use crate::jtalk::JTalkProcess;
use crate::mora::MORA_KATA_TO_MORA_PHONEMES;
use crate::norm::PUNCTUATIONS;
use crate::{jtalk, nlp, norm, tokenizer, utils};
use hound::{SampleFormat, WavSpec, WavWriter};
use ndarray::{concatenate, s, Array, Array1, Array2, Array3, Axis};
@@ -111,6 +113,7 @@ pub fn parse_text_blocking(
if let Some(given_tones) = given_tones {
tones = given_tones;
}
println!("tones: {:?}", tones);
let (phones, tones, lang_ids) = nlp::cleaned_text_to_sequence(phones, tones);
let phones = utils::intersperse(&phones, 0);
@@ -188,3 +191,23 @@ pub fn array_to_vec(audio_array: Array3<f32>) -> Result<Vec<u8>> {
writer.finalize()?;
Ok(cursor.into_inner())
}
pub fn kata_tone2phone_tone(kata_tone: Vec<(String, i32)>) -> Vec<(String, i32)> {
let mut results = vec![("_".to_string(), 0)];
for (mora, tone) in kata_tone {
if PUNCTUATIONS.contains(&mora.as_str()) {
results.push((mora, 0));
continue;
} else {
let (consonant, vowel) = MORA_KATA_TO_MORA_PHONEMES.get(&mora).unwrap();
if let Some(consonant) = consonant {
results.push((consonant.to_string(), tone));
results.push((vowel.to_string(), tone));
} else {
results.push((vowel.to_string(), tone));
}
}
}
results.push(("_".to_string(), 0));
results
}

View File

@@ -6,6 +6,7 @@ use axum::{
Json, Router,
http::header::CONTENT_TYPE,
};
use sbv2_core::tts_util::kata_tone2phone_tone;
use sbv2_core::{jtalk::JTalk, tts::{TTSModelHolder, SynthesizeOptions}, tts_util::preprocess_parse_text};
use serde::{Deserialize, Serialize};
use tokio::{fs, net::TcpListener, sync::Mutex};
@@ -68,9 +69,16 @@ async fn synthesis(
State(state): State<AppState>,
Json(request): Json<RequestSynthesis>,
) -> AppResult<impl IntoResponse> {
let mut tones: Vec<i32> = request.audio_query.iter().map(|query| query.tone).collect();
tones.insert(0, 0);
tones.push(0);
let phone_tone = request
.audio_query
.iter()
.map(|query| (query.kana.clone(), query.tone))
.collect::<Vec<_>>();
let phone_tone = kata_tone2phone_tone(phone_tone);
let tones = phone_tone
.iter()
.map(|(_, tone)| *tone)
.collect::<Vec<_>>();
let buffer = {
let mut tts_model = state.tts_model.lock().await;
tts_model.easy_synthesize_neo(