diff --git a/Cargo.lock b/Cargo.lock index c2c4f42..2970273 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -189,6 +189,21 @@ dependencies = [ "serde", ] +[[package]] +name = "bit-set" +version = "0.5.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0700ddab506f33b20a03b13996eccd309a48e5ff77d0d95926aa0210fb4e95f1" +dependencies = [ + "bit-vec", +] + +[[package]] +name = "bit-vec" +version = "0.6.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "349f9b6a179ed607305526ca489b34ad0a41aed5f7980fa90eb03160b69598fb" + [[package]] name = "bitflags" version = "1.3.2" @@ -210,6 +225,12 @@ dependencies = [ "generic-array", ] +[[package]] +name = "bumpalo" +version = "3.16.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "79296716171880943b8470b5f8d03aa55eb2e645a4874bdbb28adb49162e012c" + [[package]] name = "byteorder" version = "1.5.0" @@ -556,6 +577,17 @@ dependencies = [ "cc", ] +[[package]] +name = "fancy-regex" +version = "0.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "531e46835a22af56d1e3b66f04844bed63158bc094a628bec1d321d9b4c44bf2" +dependencies = [ + "bit-set", + "regex-automata", + "regex-syntax", +] + [[package]] name = "filetime" version = "0.2.25" @@ -643,8 +675,10 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c4567c8db10ae91089c99af84c68c38da3ec2f087c3f82960bcdbf3656b6f4d7" dependencies = [ "cfg-if", + "js-sys", "libc", "wasi", + "wasm-bindgen", ] [[package]] @@ -985,6 +1019,15 @@ version = "0.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9c06d7aceb8ce626a3318183096aa6dad82f046b3cec5d43e90066d1b07445a2" +[[package]] +name = "js-sys" +version = "0.3.70" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1868808506b929d7b0cfa8f75951347aa71bb21144b7791bae35d9bccfcfe37a" +dependencies = [ + "wasm-bindgen", +] + [[package]] name = "lazy_static" version = "1.5.0" @@ -1863,6 +1906,17 @@ dependencies = [ "zstd", ] +[[package]] +name = "sbv2_wasm" +version = "0.1.0" +dependencies = [ + "js-sys", + "ndarray", + "once_cell", + "sbv2_core", + "wasm-bindgen", +] + [[package]] name = "scopeguard" version = "1.2.0" @@ -2118,6 +2172,7 @@ dependencies = [ "aho-corasick", "derive_builder", "esaxx-rs", + "fancy-regex", "getrandom", "indicatif", "itertools 0.12.1", @@ -2344,6 +2399,61 @@ version = "0.11.0+wasi-snapshot-preview1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" +[[package]] +name = "wasm-bindgen" +version = "0.2.93" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a82edfc16a6c469f5f44dc7b571814045d60404b55a0ee849f9bcfa2e63dd9b5" +dependencies = [ + "cfg-if", + "once_cell", + "wasm-bindgen-macro", +] + +[[package]] +name = "wasm-bindgen-backend" +version = "0.2.93" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9de396da306523044d3302746f1208fa71d7532227f15e347e2d93e4145dd77b" +dependencies = [ + "bumpalo", + "log", + "once_cell", + "proc-macro2", + "quote", + "syn", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-macro" +version = "0.2.93" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "585c4c91a46b072c92e908d99cb1dcdf95c5218eeb6f3bf1efa991ee7a68cccf" +dependencies = [ + "quote", + "wasm-bindgen-macro-support", +] + +[[package]] +name = "wasm-bindgen-macro-support" +version = "0.2.93" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "afc340c74d9005395cf9dd098506f7f44e38f2b4a21c6aaacf9a105ea5e1e836" +dependencies = [ + "proc-macro2", + "quote", + "syn", + "wasm-bindgen-backend", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-shared" +version = "0.2.93" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c62a0a307cb4a311d3a07867860911ca130c3494e8c2719593806c08bc5d0484" + [[package]] name = "webpki-roots" version = "0.26.5" diff --git a/Cargo.toml b/Cargo.toml index eac8581..c7ee18e 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,14 +1,15 @@ [workspace] resolver = "2" -members = ["sbv2_api", "sbv2_core", "sbv2_bindings"] +members = ["sbv2_api", "sbv2_core", "sbv2_bindings", "sbv2_wasm"] [workspace.dependencies] anyhow = "1.0.86" dotenvy = "0.15.7" env_logger = "0.11.5" ndarray = "0.16.1" +once_cell = "1.19.0" [profile.release] lto = true debug = false -strip = true \ No newline at end of file +strip = true diff --git a/sbv2_core/Cargo.toml b/sbv2_core/Cargo.toml index c5f1608..70cab04 100644 --- a/sbv2_core/Cargo.toml +++ b/sbv2_core/Cargo.toml @@ -16,20 +16,23 @@ hound = "3.5.1" jpreprocess = { version = "0.10.0", features = ["naist-jdic"] } ndarray.workspace = true num_cpus = "1.16.0" -once_cell = "1.19.0" -ort = { git = "https://github.com/pykeio/ort.git", version = "2.0.0-rc.6" } +once_cell.workspace = true +ort = { git = "https://github.com/pykeio/ort.git", version = "2.0.0-rc.6", optional = true } regex = "1.10.6" serde = { version = "1.0.210", features = ["derive"] } serde_json = "1.0.128" tar = "0.4.41" thiserror = "1.0.63" -tokenizers = "0.20.0" +tokenizers = { version = "0.20.0", default-features = false } zstd = "0.13.2" [features] -cuda = ["ort/cuda"] -cuda_tf32 = [] -dynamic = ["ort/load-dynamic"] -directml = ["ort/directml"] -tensorrt = ["ort/tensorrt"] -coreml = ["ort/coreml"] \ No newline at end of file +cuda = ["ort/cuda", "std"] +cuda_tf32 = ["std", "cuda"] +std = ["dep:ort", "tokenizers/progressbar", "tokenizers/onig", "tokenizers/esaxx_fast"] +dynamic = ["ort/load-dynamic", "std"] +directml = ["ort/directml", "std"] +tensorrt = ["ort/tensorrt", "std"] +coreml = ["ort/coreml", "std"] +default = ["std"] +no_std = ["tokenizers/unstable_wasm"] \ No newline at end of file diff --git a/sbv2_core/src/error.rs b/sbv2_core/src/error.rs index 99aadd9..6bb449e 100644 --- a/sbv2_core/src/error.rs +++ b/sbv2_core/src/error.rs @@ -6,6 +6,7 @@ pub enum Error { TokenizerError(#[from] tokenizers::Error), #[error("JPreprocess error: {0}")] JPreprocessError(#[from] jpreprocess::error::JPreprocessError), + #[cfg(feature = "std")] #[error("ONNX error: {0}")] OrtError(#[from] ort::Error), #[error("NDArray error: {0}")] @@ -20,6 +21,8 @@ pub enum Error { HoundError(#[from] hound::Error), #[error("model not found error")] ModelNotFoundError(String), + #[error("other")] + OtherError(String), } pub type Result = std::result::Result; diff --git a/sbv2_core/src/lib.rs b/sbv2_core/src/lib.rs index 23e1fbd..9e7eeac 100644 --- a/sbv2_core/src/lib.rs +++ b/sbv2_core/src/lib.rs @@ -1,11 +1,16 @@ +#[cfg(feature = "std")] pub mod bert; pub mod error; pub mod jtalk; +#[cfg(feature = "std")] pub mod model; pub mod mora; pub mod nlp; pub mod norm; +pub mod sbv2file; pub mod style; pub mod tokenizer; +#[cfg(feature = "std")] pub mod tts; +pub mod tts_util; pub mod utils; diff --git a/sbv2_core/src/main.rs b/sbv2_core/src/main.rs index 5b2f077..3700cd4 100644 --- a/sbv2_core/src/main.rs +++ b/sbv2_core/src/main.rs @@ -1,9 +1,9 @@ +use std::env; use std::fs; -use sbv2_core::tts; -use std::env; - -fn main() -> anyhow::Result<()> { +#[cfg(feature = "std")] +fn main_inner() -> anyhow::Result<()> { + use sbv2_core::tts; dotenvy::dotenv_override().ok(); env_logger::init(); let text = fs::read_to_string("content.txt")?; @@ -19,3 +19,13 @@ fn main() -> anyhow::Result<()> { Ok(()) } +#[cfg(not(feature = "std"))] +fn main_inner() -> anyhow::Result<()> { + Ok(()) +} + +fn main() { + if let Err(e) = main_inner() { + println!("Error: {e}"); + } +} diff --git a/sbv2_core/src/sbv2file.rs b/sbv2_core/src/sbv2file.rs new file mode 100644 index 0000000..97eedb9 --- /dev/null +++ b/sbv2_core/src/sbv2file.rs @@ -0,0 +1,37 @@ +use std::io::{Cursor, Read}; + +use tar::Archive; +use zstd::decode_all; + +use crate::error::{Error, Result}; + +/// Parse a .sbv2 file binary +/// +/// # Examples +/// +/// ```rs +/// parse_sbv2file("tsukuyomi", std::fs::read("tsukuyomi.sbv2")?)?; +/// ``` +pub fn parse_sbv2file>(sbv2_bytes: P) -> Result<(Vec, Vec)> { + let mut arc = Archive::new(Cursor::new(decode_all(Cursor::new(sbv2_bytes.as_ref()))?)); + let mut vits2 = None; + let mut style_vectors = None; + let mut et = arc.entries()?; + while let Some(Ok(mut e)) = et.next() { + let pth = String::from_utf8_lossy(&e.path_bytes()).to_string(); + let mut b = Vec::with_capacity(e.size() as usize); + e.read_to_end(&mut b)?; + match pth.as_str() { + "model.onnx" => vits2 = Some(b), + "style_vectors.json" => style_vectors = Some(b), + _ => continue, + } + } + if style_vectors.is_none() { + return Err(Error::ModelNotFoundError("style_vectors".to_string())); + } + if vits2.is_none() { + return Err(Error::ModelNotFoundError("vits2".to_string())); + } + Ok((style_vectors.unwrap(), vits2.unwrap())) +} diff --git a/sbv2_core/src/tokenizer.rs b/sbv2_core/src/tokenizer.rs index 56997c6..a4125bc 100644 --- a/sbv2_core/src/tokenizer.rs +++ b/sbv2_core/src/tokenizer.rs @@ -1,5 +1,5 @@ use crate::error::Result; -use tokenizers::Tokenizer; +pub use tokenizers::Tokenizer; pub fn get_tokenizer>(p: P) -> Result { let tokenizer = Tokenizer::from_bytes(p)?; diff --git a/sbv2_core/src/tts.rs b/sbv2_core/src/tts.rs index e870b84..4e1eabb 100644 --- a/sbv2_core/src/tts.rs +++ b/sbv2_core/src/tts.rs @@ -1,12 +1,8 @@ use crate::error::{Error, Result}; -use crate::{bert, jtalk, model, nlp, norm, style, tokenizer, utils}; -use hound::{SampleFormat, WavSpec, WavWriter}; -use ndarray::{concatenate, s, Array, Array1, Array2, Array3, Axis}; +use crate::{jtalk, model, style, tokenizer, tts_util}; +use ndarray::{concatenate, Array1, Array2, Array3, Axis}; use ort::Session; -use std::io::{Cursor, Read}; -use tar::Archive; use tokenizers::Tokenizer; -use zstd::decode_all; #[derive(PartialEq, Eq, Clone)] pub struct TTSIdent(String); @@ -78,27 +74,8 @@ impl TTSModelHolder { ident: I, sbv2_bytes: P, ) -> Result<()> { - let mut arc = Archive::new(Cursor::new(decode_all(Cursor::new(sbv2_bytes.as_ref()))?)); - let mut vits2 = None; - let mut style_vectors = None; - let mut et = arc.entries()?; - while let Some(Ok(mut e)) = et.next() { - let pth = String::from_utf8_lossy(&e.path_bytes()).to_string(); - let mut b = Vec::with_capacity(e.size() as usize); - e.read_to_end(&mut b)?; - match pth.as_str() { - "model.onnx" => vits2 = Some(b), - "style_vectors.json" => style_vectors = Some(b), - _ => continue, - } - } - if style_vectors.is_none() { - return Err(Error::ModelNotFoundError("style_vectors".to_string())); - } - if vits2.is_none() { - return Err(Error::ModelNotFoundError("vits2".to_string())); - } - self.load(ident, style_vectors.unwrap(), vits2.unwrap())?; + let (style_vectors, vits2) = crate::sbv2file::parse_sbv2file(sbv2_bytes)?; + self.load(ident, style_vectors, vits2)?; Ok(()) } @@ -151,69 +128,14 @@ impl TTSModelHolder { &self, text: &str, ) -> Result<(Array2, Array1, Array1, Array1)> { - let text = self.jtalk.num2word(text)?; - let normalized_text = norm::normalize_text(&text); - - let process = self.jtalk.process_text(&normalized_text)?; - let (phones, tones, mut word2ph) = process.g2p()?; - let (phones, tones, lang_ids) = nlp::cleaned_text_to_sequence(phones, tones); - - let phones = utils::intersperse(&phones, 0); - let tones = utils::intersperse(&tones, 0); - let lang_ids = utils::intersperse(&lang_ids, 0); - for item in &mut word2ph { - *item *= 2; - } - word2ph[0] += 1; - - let text = { - let (seq_text, _) = process.text_to_seq_kata()?; - seq_text.join("") - }; - let (token_ids, attention_masks) = tokenizer::tokenize(&text, &self.tokenizer)?; - - let bert_content = bert::predict(&self.bert, token_ids, attention_masks)?; - - assert!( - word2ph.len() == text.chars().count() + 2, - "{} {}", - word2ph.len(), - normalized_text.chars().count() - ); - - let mut phone_level_feature = vec![]; - for (i, reps) in word2ph.iter().enumerate() { - let repeat_feature = { - let (reps_rows, reps_cols) = (*reps, 1); - let arr_len = bert_content.slice(s![i, ..]).len(); - - let mut results: Array2 = - Array::zeros((reps_rows as usize, arr_len * reps_cols)); - - for j in 0..reps_rows { - for k in 0..reps_cols { - let mut view = results.slice_mut(s![j, k * arr_len..(k + 1) * arr_len]); - view.assign(&bert_content.slice(s![i, ..])); - } - } - results - }; - phone_level_feature.push(repeat_feature); - } - let phone_level_feature = concatenate( - Axis(0), - &phone_level_feature - .iter() - .map(|x| x.view()) - .collect::>(), - )?; - let bert_ori = phone_level_feature.t(); - Ok(( - bert_ori.to_owned(), - phones.into(), - tones.into(), - lang_ids.into(), - )) + crate::tts_util::parse_text( + text, + &self.jtalk, + &self.tokenizer, + |token_ids, attention_masks| { + crate::bert::predict(&self.bert, token_ids, attention_masks) + }, + ) } fn find_model>(&self, ident: I) -> Result<&TTSModel> { @@ -292,26 +214,7 @@ impl TTSModelHolder { options.length_scale, )? }; - Self::array_to_vec(audio_array) - } - - fn array_to_vec(audio_array: Array3) -> Result> { - let spec = WavSpec { - channels: 1, - sample_rate: 44100, - bits_per_sample: 32, - sample_format: SampleFormat::Float, - }; - let mut cursor = Cursor::new(Vec::new()); - let mut writer = WavWriter::new(&mut cursor, spec)?; - for i in 0..audio_array.shape()[0] { - let output = audio_array.slice(s![i, 0, ..]).to_vec(); - for sample in output { - writer.write_sample(sample)?; - } - } - writer.finalize()?; - Ok(cursor.into_inner()) + tts_util::array_to_vec(audio_array) } /// Synthesize text to audio @@ -340,7 +243,7 @@ impl TTSModelHolder { sdp_ratio, length_scale, )?; - Self::array_to_vec(audio_array) + tts_util::array_to_vec(audio_array) } } diff --git a/sbv2_core/src/tts_util.rs b/sbv2_core/src/tts_util.rs new file mode 100644 index 0000000..eee5474 --- /dev/null +++ b/sbv2_core/src/tts_util.rs @@ -0,0 +1,100 @@ +use std::io::Cursor; + +use crate::error::Result; +use crate::{jtalk, nlp, norm, tokenizer, utils}; +use hound::{SampleFormat, WavSpec, WavWriter}; +use ndarray::{concatenate, s, Array, Array1, Array2, Array3, Axis}; +use tokenizers::Tokenizer; +/// Parse text and return the input for synthesize +/// +/// # Note +/// This function is for low-level usage, use `easy_synthesize` for high-level usage. +#[allow(clippy::type_complexity)] +pub fn parse_text( + text: &str, + jtalk: &jtalk::JTalk, + tokenizer: &Tokenizer, + bert_predict: impl FnOnce(Vec, Vec) -> Result>, +) -> Result<(Array2, Array1, Array1, Array1)> { + let text = jtalk.num2word(text)?; + let normalized_text = norm::normalize_text(&text); + + let process = jtalk.process_text(&normalized_text)?; + let (phones, tones, mut word2ph) = process.g2p()?; + let (phones, tones, lang_ids) = nlp::cleaned_text_to_sequence(phones, tones); + + let phones = utils::intersperse(&phones, 0); + let tones = utils::intersperse(&tones, 0); + let lang_ids = utils::intersperse(&lang_ids, 0); + for item in &mut word2ph { + *item *= 2; + } + word2ph[0] += 1; + + let text = { + let (seq_text, _) = process.text_to_seq_kata()?; + seq_text.join("") + }; + let (token_ids, attention_masks) = tokenizer::tokenize(&text, tokenizer)?; + + let bert_content = bert_predict(token_ids, attention_masks)?; + + assert!( + word2ph.len() == text.chars().count() + 2, + "{} {}", + word2ph.len(), + normalized_text.chars().count() + ); + + let mut phone_level_feature = vec![]; + for (i, reps) in word2ph.iter().enumerate() { + let repeat_feature = { + let (reps_rows, reps_cols) = (*reps, 1); + let arr_len = bert_content.slice(s![i, ..]).len(); + + let mut results: Array2 = Array::zeros((reps_rows as usize, arr_len * reps_cols)); + + for j in 0..reps_rows { + for k in 0..reps_cols { + let mut view = results.slice_mut(s![j, k * arr_len..(k + 1) * arr_len]); + view.assign(&bert_content.slice(s![i, ..])); + } + } + results + }; + phone_level_feature.push(repeat_feature); + } + let phone_level_feature = concatenate( + Axis(0), + &phone_level_feature + .iter() + .map(|x| x.view()) + .collect::>(), + )?; + let bert_ori = phone_level_feature.t(); + Ok(( + bert_ori.to_owned(), + phones.into(), + tones.into(), + lang_ids.into(), + )) +} + +pub fn array_to_vec(audio_array: Array3) -> Result> { + let spec = WavSpec { + channels: 1, + sample_rate: 44100, + bits_per_sample: 32, + sample_format: SampleFormat::Float, + }; + let mut cursor = Cursor::new(Vec::new()); + let mut writer = WavWriter::new(&mut cursor, spec)?; + for i in 0..audio_array.shape()[0] { + let output = audio_array.slice(s![i, 0, ..]).to_vec(); + for sample in output { + writer.write_sample(sample)?; + } + } + writer.finalize()?; + Ok(cursor.into_inner()) +} diff --git a/sbv2_wasm/Cargo.toml b/sbv2_wasm/Cargo.toml new file mode 100644 index 0000000..a44af89 --- /dev/null +++ b/sbv2_wasm/Cargo.toml @@ -0,0 +1,18 @@ +[package] +name = "sbv2_wasm" +version = "0.1.0" +edition = "2021" + +[lib] +crate-type = ["cdylib", "rlib"] + +[dependencies] +wasm-bindgen = "0.2.93" +sbv2_core = { path = "../sbv2_core", default-features = false, features = ["no_std"] } +once_cell.workspace = true +js-sys = "0.3.70" +ndarray.workspace = true + +[profile.release] +lto = true +opt-level = "s" diff --git a/sbv2_wasm/build.sh b/sbv2_wasm/build.sh new file mode 100644 index 0000000..2240af3 --- /dev/null +++ b/sbv2_wasm/build.sh @@ -0,0 +1,2 @@ +wasm-pack build --target web sbv2_wasm +wasm-opt -O3 -o sbv2_wasm/pkg/sbv2_wasm_bg.wasm sbv2_wasm/pkg/sbv2_wasm_bg.wasm \ No newline at end of file diff --git a/sbv2_wasm/src/lib.rs b/sbv2_wasm/src/lib.rs new file mode 100644 index 0000000..69d2ac6 --- /dev/null +++ b/sbv2_wasm/src/lib.rs @@ -0,0 +1,106 @@ +use once_cell::sync::Lazy; +use sbv2_core::*; +use wasm_bindgen::prelude::*; + +static JTALK: Lazy = Lazy::new(|| jtalk::JTalk::new().unwrap()); + +#[wasm_bindgen] +pub struct TokenizerWrap { + tokenizer: tokenizer::Tokenizer, +} + +#[wasm_bindgen] +pub fn load_tokenizer(s: js_sys::JsString) -> Result { + if let Some(s) = s.as_string() { + Ok(TokenizerWrap { + tokenizer: tokenizer::Tokenizer::from_bytes(s.as_bytes()) + .map_err(|e| JsError::new(&e.to_string()))?, + }) + } else { + Err(JsError::new("invalid utf8")) + } +} + +#[wasm_bindgen] +pub struct StyleVectorWrap { + style_vector: ndarray::Array2, +} + +#[wasm_bindgen] +pub fn load_sbv2file(buf: js_sys::Uint8Array) -> Result { + let mut body = vec![0; buf.length() as usize]; + buf.copy_to(&mut body[..]); + let (style_vectors, vits2) = sbv2file::parse_sbv2file(body)?; + let buf = js_sys::Uint8Array::new_with_length(vits2.len() as u32); + buf.copy_from(&vits2); + let arr = js_sys::Array::new_with_length(2); + arr.set( + 0, + StyleVectorWrap { + style_vector: style::load_style(style_vectors)?, + } + .into(), + ); + arr.set(1, buf.into()); + Ok(arr) +} + +#[wasm_bindgen] +pub fn synthesize( + text: &str, + tokenizer: &TokenizerWrap, + bert_predict_fn: js_sys::Function, + synthesize_fn: js_sys::Function, + sdp_ratio: f32, + length_scale: f32, + style_id: i32, + style_weight: f32, + style_vectors: &StyleVectorWrap, +) -> Result { + fn synthesize_wrap( + bert_ori: ndarray::Array2, + x_tst: ndarray::Array1, + tones: ndarray::Array1, + lang_ids: ndarray::Array1, + style_vector: ndarray::Array1, + sdp_ratio: f32, + length_scale: f32, + ) -> error::Result> { + todo!() + } + let (bert_ori, phones, tones, lang_ids) = tts_util::parse_text( + text, + &JTALK, + &tokenizer.tokenizer, + |token_ids: Vec, attention_masks: Vec| { + let token_ids_ = js_sys::BigInt64Array::new_with_length(token_ids.len() as u32); + token_ids_.copy_from(&token_ids); + let attention_masks_ = + js_sys::BigInt64Array::new_with_length(attention_masks.len() as u32); + attention_masks_.copy_from(&attention_masks); + let arr = js_sys::Array::new_with_length(2); + arr.set(0, token_ids_.into()); + arr.set(1, attention_masks_.into()); + let res = bert_predict_fn + .apply(&js_sys::Object::new().into(), &arr) + .map_err(|e| { + error::Error::OtherError(e.as_string().unwrap_or("unknown".to_string())) + })?; + let res: js_sys::Array = res.into(); + Ok(todo!()) + }, + )?; + let audio = synthesize_wrap( + bert_ori.to_owned(), + phones, + tones, + lang_ids, + style::get_style_vector(&style_vectors.style_vector, style_id, style_weight)?, + sdp_ratio, + length_scale, + )?; + let vec = tts_util::array_to_vec(audio)?; + let buf = js_sys::Uint8Array::new_with_length(vec.len() as u32); + buf.copy_from(&vec); + Ok(buf) +}