From 073ae6f952b2c32f10c0495f8633845878ba5975 Mon Sep 17 00:00:00 2001 From: tuna2134 Date: Fri, 13 Sep 2024 09:12:32 +0000 Subject: [PATCH] =?UTF-8?q?add=20easy=20synthesize=E3=81=AE=E3=83=86?= =?UTF-8?q?=E3=82=B9=E3=83=88=E8=BF=BD=E5=8A=A0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- sbv2_core/src/main.rs | 44 ++++--------------------------------------- sbv2_core/src/tts.rs | 1 - 2 files changed, 4 insertions(+), 41 deletions(-) diff --git a/sbv2_core/src/main.rs b/sbv2_core/src/main.rs index 925e735..0212b03 100644 --- a/sbv2_core/src/main.rs +++ b/sbv2_core/src/main.rs @@ -1,4 +1,4 @@ -use std::{fs, time::Instant}; +use std::fs; use sbv2_core::tts; use std::env; @@ -6,7 +6,7 @@ use std::env; fn main() -> anyhow::Result<()> { dotenvy::dotenv_override().ok(); env_logger::init(); - let text = "眠たい"; + let text = "眠たい。"; let ident = "aaa"; let mut tts_holder = tts::TTSModelHolder::new( &fs::read(env::var("BERT_MODEL_PATH")?)?, @@ -14,44 +14,8 @@ fn main() -> anyhow::Result<()> { )?; tts_holder.load_sbv2file(ident, fs::read(env::var("MODEL_PATH")?)?)?; - let (bert_ori, phones, tones, lang_ids) = tts_holder.parse_text(text)?; + let audio = tts_holder.easy_synthesize(ident, &text, 0, tts::SynthesizeOptions::default())?; + fs::write("output.wav", &audio)?; - let style_vector = tts_holder.get_style_vector(ident, 0, 1.0)?; - let data = tts_holder.synthesize( - ident, - bert_ori.to_owned(), - phones.clone(), - tones.clone(), - lang_ids.clone(), - style_vector.clone(), - 0.0, - 0.5, - )?; - std::fs::write("output.wav", data)?; - let now = Instant::now(); - for _ in 0..10 { - tts_holder.parse_text(text)?; - } - println!( - "Time taken(parse_text): {}ms/it", - now.elapsed().as_millis() / 10 - ); - let now = Instant::now(); - for _ in 0..10 { - tts_holder.synthesize( - ident, - bert_ori.to_owned(), - phones.clone(), - tones.clone(), - lang_ids.clone(), - style_vector.clone(), - 0.0, - 1.0, - )?; - } - println!( - "Time taken(synthesize): {}ms/it", - now.elapsed().as_millis() / 10 - ); Ok(()) } diff --git a/sbv2_core/src/tts.rs b/sbv2_core/src/tts.rs index b031e6f..280d843 100644 --- a/sbv2_core/src/tts.rs +++ b/sbv2_core/src/tts.rs @@ -226,7 +226,6 @@ impl TTSModelHolder { )?; audios.push(audio); if i != texts.len() - 1 { - // 44100 * 0.5s 無音区間 audios.push(Array3::zeros((1, 22050, 1))); } }