diff --git a/sbv2_core/src/main.rs b/sbv2_core/src/main.rs index 925e735..0212b03 100644 --- a/sbv2_core/src/main.rs +++ b/sbv2_core/src/main.rs @@ -1,4 +1,4 @@ -use std::{fs, time::Instant}; +use std::fs; use sbv2_core::tts; use std::env; @@ -6,7 +6,7 @@ use std::env; fn main() -> anyhow::Result<()> { dotenvy::dotenv_override().ok(); env_logger::init(); - let text = "眠たい"; + let text = "眠たい。"; let ident = "aaa"; let mut tts_holder = tts::TTSModelHolder::new( &fs::read(env::var("BERT_MODEL_PATH")?)?, @@ -14,44 +14,8 @@ fn main() -> anyhow::Result<()> { )?; tts_holder.load_sbv2file(ident, fs::read(env::var("MODEL_PATH")?)?)?; - let (bert_ori, phones, tones, lang_ids) = tts_holder.parse_text(text)?; + let audio = tts_holder.easy_synthesize(ident, &text, 0, tts::SynthesizeOptions::default())?; + fs::write("output.wav", &audio)?; - let style_vector = tts_holder.get_style_vector(ident, 0, 1.0)?; - let data = tts_holder.synthesize( - ident, - bert_ori.to_owned(), - phones.clone(), - tones.clone(), - lang_ids.clone(), - style_vector.clone(), - 0.0, - 0.5, - )?; - std::fs::write("output.wav", data)?; - let now = Instant::now(); - for _ in 0..10 { - tts_holder.parse_text(text)?; - } - println!( - "Time taken(parse_text): {}ms/it", - now.elapsed().as_millis() / 10 - ); - let now = Instant::now(); - for _ in 0..10 { - tts_holder.synthesize( - ident, - bert_ori.to_owned(), - phones.clone(), - tones.clone(), - lang_ids.clone(), - style_vector.clone(), - 0.0, - 1.0, - )?; - } - println!( - "Time taken(synthesize): {}ms/it", - now.elapsed().as_millis() / 10 - ); Ok(()) } diff --git a/sbv2_core/src/tts.rs b/sbv2_core/src/tts.rs index b031e6f..280d843 100644 --- a/sbv2_core/src/tts.rs +++ b/sbv2_core/src/tts.rs @@ -226,7 +226,6 @@ impl TTSModelHolder { )?; audios.push(audio); if i != texts.len() - 1 { - // 44100 * 0.5s 無音区間 audios.push(Array3::zeros((1, 22050, 1))); } }