diff --git a/output.wav b/output.wav index 0069376..91bad2e 100644 Binary files a/output.wav and b/output.wav differ diff --git a/sbv2_core/src/bert.rs b/sbv2_core/src/bert.rs index 021d2bb..50762bf 100644 --- a/sbv2_core/src/bert.rs +++ b/sbv2_core/src/bert.rs @@ -25,11 +25,7 @@ pub fn predict( let output = outputs.get("output").unwrap(); let content = output.try_extract_tensor::()?.to_owned(); - println!("{:?}", content); + let (data, _) = content.clone().into_raw_vec_and_offset(); - Ok(Array2::from_shape_vec( - (content.shape()[0], content.shape()[1]), - content.into_raw_vec(), - ) - .unwrap()) + Ok(Array2::from_shape_vec((content.shape()[0], content.shape()[1]), data).unwrap()) } diff --git a/sbv2_core/src/main.rs b/sbv2_core/src/main.rs index 291e786..b2d9a0f 100644 --- a/sbv2_core/src/main.rs +++ b/sbv2_core/src/main.rs @@ -5,12 +5,10 @@ fn main() -> error::Result<()> { let text = "隣の客はよくかき食う客だ"; let normalized_text = norm::normalize_text(text); - println!("{}", normalized_text); let jtalk = jtalk::JTalk::new()?; let (phones, tones, mut word2ph) = jtalk.g2p(&normalized_text)?; let (phones, tones, lang_ids) = nlp::cleaned_text_to_sequence(phones, tones); - println!("{:?}", phones); // add black let phones = utils::intersperse(&phones, 0);