use once_cell::sync::Lazy; use sbv2_core::*; use wasm_bindgen::prelude::*; use wasm_bindgen_futures::JsFuture; mod array_helper; static JTALK: Lazy = Lazy::new(|| jtalk::JTalk::new().unwrap()); #[wasm_bindgen] pub struct TokenizerWrap { tokenizer: tokenizer::Tokenizer, } #[wasm_bindgen] pub fn load_tokenizer(s: js_sys::JsString) -> Result { if let Some(s) = s.as_string() { Ok(TokenizerWrap { tokenizer: tokenizer::Tokenizer::from_bytes(s.as_bytes()) .map_err(|e| JsError::new(&e.to_string()))?, }) } else { Err(JsError::new("invalid utf8")) } } #[wasm_bindgen] pub struct StyleVectorWrap { style_vector: ndarray::Array2, } #[wasm_bindgen] pub fn load_sbv2file(buf: js_sys::Uint8Array) -> Result { let (style_vectors, vits2) = sbv2file::parse_sbv2file(array_helper::array8_to_vec8(buf))?; let buf = array_helper::vec8_to_array8(vits2); Ok(array_helper::vec_to_array(vec![ StyleVectorWrap { style_vector: style::load_style(style_vectors)?, } .into(), buf.into(), ])) } #[allow(clippy::too_many_arguments)] #[wasm_bindgen] pub async fn synthesize( text: &str, tokenizer: &TokenizerWrap, bert_predict_fn: js_sys::Function, synthesize_fn: js_sys::Function, sdp_ratio: f32, length_scale: f32, style_id: i32, style_weight: f32, style_vectors: &StyleVectorWrap, ) -> Result { let synthesize_wrap = |bert_ori: ndarray::Array2, x_tst: ndarray::Array1, tones: ndarray::Array1, lang_ids: ndarray::Array1, style_vector: ndarray::Array1, sdp_ratio: f32, length_scale: f32| async move { let arr = array_helper::vec_to_array(vec![ array_helper::array2_f32_to_array(bert_ori).into(), array_helper::vec64_to_array64(x_tst.to_vec()).into(), array_helper::vec64_to_array64(tones.to_vec()).into(), array_helper::vec64_to_array64(lang_ids.to_vec()).into(), array_helper::vec_f32_to_array_f32(style_vector.to_vec()).into(), sdp_ratio.into(), length_scale.into(), ]); let res = synthesize_fn .apply(&js_sys::Object::new().into(), &arr) .map_err(|e| { error::Error::OtherError(e.as_string().unwrap_or("unknown".to_string())) })?; let res = JsFuture::from(Into::::into(res)) .await .map_err(|e| { sbv2_core::error::Error::OtherError(e.as_string().unwrap_or("unknown".to_string())) })?; array_helper::array_to_array3_f32(res) }; let (bert_ori, phones, tones, lang_ids) = tts_util::parse_text( text, &JTALK, &tokenizer.tokenizer, |token_ids: Vec, attention_masks: Vec| { Box::pin(async move { let arr = array_helper::vec_to_array(vec![ array_helper::vec64_to_array64(token_ids).into(), array_helper::vec64_to_array64(attention_masks).into(), ]); let res = bert_predict_fn .apply(&js_sys::Object::new().into(), &arr) .map_err(|e| { error::Error::OtherError(e.as_string().unwrap_or("unknown".to_string())) })?; let res = JsFuture::from(Into::::into(res)) .await .map_err(|e| { sbv2_core::error::Error::OtherError( e.as_string().unwrap_or("unknown".to_string()), ) })?; array_helper::array_to_array2_f32(res) }) }, ) .await?; let audio = synthesize_wrap( bert_ori.to_owned(), phones, tones, lang_ids, style::get_style_vector(&style_vectors.style_vector, style_id, style_weight)?, sdp_ratio, length_scale, ) .await?; Ok(array_helper::vec8_to_array8(tts_util::array_to_vec(audio)?)) }