feat: sbv2 wasm

This commit is contained in:
Googlefan
2024-09-30 08:04:37 +00:00
parent ee292315e1
commit 596eec654d
15 changed files with 961 additions and 48 deletions

3
.gitignore vendored
View File

@@ -4,4 +4,5 @@ models/
venv/
.env
output.wav
node_modules
node_modules
dist/

23
Cargo.lock generated
View File

@@ -1967,6 +1967,7 @@ dependencies = [
"once_cell",
"sbv2_core",
"wasm-bindgen",
"wasm-bindgen-futures",
]
[[package]]
@@ -2524,6 +2525,18 @@ dependencies = [
"wasm-bindgen-shared",
]
[[package]]
name = "wasm-bindgen-futures"
version = "0.4.43"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "61e9300f63a621e96ed275155c108eb6f843b6a26d053f122ab69724559dc8ed"
dependencies = [
"cfg-if",
"js-sys",
"wasm-bindgen",
"web-sys",
]
[[package]]
name = "wasm-bindgen-macro"
version = "0.2.93"
@@ -2553,6 +2566,16 @@ version = "0.2.93"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c62a0a307cb4a311d3a07867860911ca130c3494e8c2719593806c08bc5d0484"
[[package]]
name = "web-sys"
version = "0.3.70"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "26fdeaafd9bd129f65e7c031593c24d62186301e0c72c8978fa1678be7d532c0"
dependencies = [
"js-sys",
"wasm-bindgen",
]
[[package]]
name = "webpki-roots"
version = "0.26.5"

View File

@@ -128,7 +128,7 @@ impl TTSModelHolder {
&self,
text: &str,
) -> Result<(Array2<f32>, Array1<i64>, Array1<i64>, Array1<i64>)> {
crate::tts_util::parse_text(
crate::tts_util::parse_text_blocking(
text,
&self.jtalk,
&self.tokenizer,

View File

@@ -10,7 +10,87 @@ use tokenizers::Tokenizer;
/// # Note
/// This function is for low-level usage, use `easy_synthesize` for high-level usage.
#[allow(clippy::type_complexity)]
pub fn parse_text(
pub async fn parse_text(
text: &str,
jtalk: &jtalk::JTalk,
tokenizer: &Tokenizer,
bert_predict: impl FnOnce(
Vec<i64>,
Vec<i64>,
) -> std::pin::Pin<
Box<dyn std::future::Future<Output = Result<ndarray::Array2<f32>>>>,
>,
) -> Result<(Array2<f32>, Array1<i64>, Array1<i64>, Array1<i64>)> {
let text = jtalk.num2word(text)?;
let normalized_text = norm::normalize_text(&text);
let process = jtalk.process_text(&normalized_text)?;
let (phones, tones, mut word2ph) = process.g2p()?;
let (phones, tones, lang_ids) = nlp::cleaned_text_to_sequence(phones, tones);
let phones = utils::intersperse(&phones, 0);
let tones = utils::intersperse(&tones, 0);
let lang_ids = utils::intersperse(&lang_ids, 0);
for item in &mut word2ph {
*item *= 2;
}
word2ph[0] += 1;
let text = {
let (seq_text, _) = process.text_to_seq_kata()?;
seq_text.join("")
};
let (token_ids, attention_masks) = tokenizer::tokenize(&text, tokenizer)?;
let bert_content = bert_predict(token_ids, attention_masks).await?;
assert!(
word2ph.len() == text.chars().count() + 2,
"{} {}",
word2ph.len(),
normalized_text.chars().count()
);
let mut phone_level_feature = vec![];
for (i, reps) in word2ph.iter().enumerate() {
let repeat_feature = {
let (reps_rows, reps_cols) = (*reps, 1);
let arr_len = bert_content.slice(s![i, ..]).len();
let mut results: Array2<f32> = Array::zeros((reps_rows as usize, arr_len * reps_cols));
for j in 0..reps_rows {
for k in 0..reps_cols {
let mut view = results.slice_mut(s![j, k * arr_len..(k + 1) * arr_len]);
view.assign(&bert_content.slice(s![i, ..]));
}
}
results
};
phone_level_feature.push(repeat_feature);
}
let phone_level_feature = concatenate(
Axis(0),
&phone_level_feature
.iter()
.map(|x| x.view())
.collect::<Vec<_>>(),
)?;
let bert_ori = phone_level_feature.t();
Ok((
bert_ori.to_owned(),
phones.into(),
tones.into(),
lang_ids.into(),
))
}
/// Parse text and return the input for synthesize
///
/// # Note
/// This function is for low-level usage, use `easy_synthesize` for high-level usage.
#[allow(clippy::type_complexity)]
pub fn parse_text_blocking(
text: &str,
jtalk: &jtalk::JTalk,
tokenizer: &Tokenizer,

View File

@@ -12,6 +12,7 @@ sbv2_core = { path = "../sbv2_core", default-features = false, features = ["no_s
once_cell.workspace = true
js-sys = "0.3.70"
ndarray.workspace = true
wasm-bindgen-futures = "0.4.43"
[profile.release]
lto = true

2
sbv2_wasm/README.md Normal file
View File

@@ -0,0 +1,2 @@
# StyleBertVITS2 wasm
refer to https://github.com/tuna2134/sbv2-api

31
sbv2_wasm/biome.json Normal file
View File

@@ -0,0 +1,31 @@
{
"$schema": "https://biomejs.dev/schemas/1.9.2/schema.json",
"vcs": {
"enabled": false,
"clientKind": "git",
"useIgnoreFile": false
},
"files": {
"ignoreUnknown": false,
"ignore": []
},
"formatter": {
"enabled": true,
"indentStyle": "tab",
"ignore": ["dist/", "pkg/"]
},
"organizeImports": {
"enabled": true
},
"linter": {
"enabled": true,
"rules": {
"recommended": true
}
},
"javascript": {
"formatter": {
"quoteStyle": "double"
}
}
}

4
sbv2_wasm/build.sh Normal file → Executable file
View File

@@ -1,2 +1,4 @@
wasm-pack build --target web sbv2_wasm
wasm-opt -O3 -o sbv2_wasm/pkg/sbv2_wasm_bg.wasm sbv2_wasm/pkg/sbv2_wasm_bg.wasm
wasm-opt -O3 -o sbv2_wasm/pkg/sbv2_wasm_bg.wasm sbv2_wasm/pkg/sbv2_wasm_bg.wasm
mkdir -p sbv2_wasm/dist
cp sbv2_wasm/sbv2_wasm/pkg/sbv2_wasm_bg.wasm sbv2_wasm/dist/sbv2_wasm_bg.wasm

11
sbv2_wasm/example.js Normal file
View File

@@ -0,0 +1,11 @@
import { ModelHolder } from "./dist/index.js";
import fs from "node:fs/promises";
ModelHolder.globalInit(await fs.readFile("./dist/sbv2_wasm_bg.wasm"));
const holder = await ModelHolder.create(
(await fs.readFile("../models/tokenizer.json")).toString("utf-8"),
await fs.readFile("../models/deberta.onnx"),
);
await holder.load("tsukuyomi", await fs.readFile("../models/iroha2.sbv2"));
await fs.writeFile("out.wav", await holder.synthesize("tsukuyomi", "おはよう"));
holder.unload("tsukuyomi");

29
sbv2_wasm/package.json Normal file
View File

@@ -0,0 +1,29 @@
{
"name": "sbv2",
"version": "0.1.0",
"description": "Style Bert VITS2 wasm",
"main": "dist/index.js",
"types": "dist/index.d.ts",
"type": "module",
"scripts": {
"build": "tsc && esbuild src-js/index.ts --outfile=dist/index.js --minify --format=esm --bundle --external:onnxruntime-web",
"format": "biome format --write ."
},
"keywords": [],
"author": "tuna2134",
"license": "MIT",
"devDependencies": {
"@biomejs/biome": "^1.9.2",
"@types/node": "^22.7.4",
"esbuild": "^0.24.0",
"typescript": "^5.6.2"
},
"dependencies": {
"onnxruntime-web": "^1.19.2"
},
"files": [
"dist/*",
"package.json",
"README.md"
]
}

493
sbv2_wasm/pnpm-lock.yaml generated Normal file
View File

@@ -0,0 +1,493 @@
lockfileVersion: '9.0'
settings:
autoInstallPeers: true
excludeLinksFromLockfile: false
importers:
.:
devDependencies:
'@biomejs/biome':
specifier: ^1.9.2
version: 1.9.2
'@types/node':
specifier: ^22.7.4
version: 22.7.4
esbuild:
specifier: ^0.24.0
version: 0.24.0
onnxruntime-web:
specifier: ^1.19.2
version: 1.19.2
typescript:
specifier: ^5.6.2
version: 5.6.2
packages:
'@biomejs/biome@1.9.2':
resolution: {integrity: sha512-4j2Gfwft8Jqp1X0qLYvK4TEy4xhTo4o6rlvJPsjPeEame8gsmbGQfOPBkw7ur+7/Z/f0HZmCZKqbMvR7vTXQYQ==}
engines: {node: '>=14.21.3'}
hasBin: true
'@biomejs/cli-darwin-arm64@1.9.2':
resolution: {integrity: sha512-rbs9uJHFmhqB3Td0Ro+1wmeZOHhAPTL3WHr8NtaVczUmDhXkRDWScaxicG9+vhSLj1iLrW47itiK6xiIJy6vaA==}
engines: {node: '>=14.21.3'}
cpu: [arm64]
os: [darwin]
'@biomejs/cli-darwin-x64@1.9.2':
resolution: {integrity: sha512-BlfULKijNaMigQ9GH9fqJVt+3JTDOSiZeWOQtG/1S1sa8Lp046JHG3wRJVOvekTPL9q/CNFW1NVG8J0JN+L1OA==}
engines: {node: '>=14.21.3'}
cpu: [x64]
os: [darwin]
'@biomejs/cli-linux-arm64-musl@1.9.2':
resolution: {integrity: sha512-ZATvbUWhNxegSALUnCKWqetTZqrK72r2RsFD19OK5jXDj/7o1hzI1KzDNG78LloZxftrwr3uI9SqCLh06shSZw==}
engines: {node: '>=14.21.3'}
cpu: [arm64]
os: [linux]
'@biomejs/cli-linux-arm64@1.9.2':
resolution: {integrity: sha512-T8TJuSxuBDeQCQzxZu2o3OU4eyLumTofhCxxFd3+aH2AEWVMnH7Z/c3QP1lHI5RRMBP9xIJeMORqDQ5j+gVZzw==}
engines: {node: '>=14.21.3'}
cpu: [arm64]
os: [linux]
'@biomejs/cli-linux-x64-musl@1.9.2':
resolution: {integrity: sha512-CjPM6jT1miV5pry9C7qv8YJk0FIZvZd86QRD3atvDgfgeh9WQU0k2Aoo0xUcPdTnoz0WNwRtDicHxwik63MmSg==}
engines: {node: '>=14.21.3'}
cpu: [x64]
os: [linux]
'@biomejs/cli-linux-x64@1.9.2':
resolution: {integrity: sha512-T0cPk3C3Jr2pVlsuQVTBqk2qPjTm8cYcTD9p/wmR9MeVqui1C/xTVfOIwd3miRODFMrJaVQ8MYSXnVIhV9jTjg==}
engines: {node: '>=14.21.3'}
cpu: [x64]
os: [linux]
'@biomejs/cli-win32-arm64@1.9.2':
resolution: {integrity: sha512-2x7gSty75bNIeD23ZRPXyox6Z/V0M71ObeJtvQBhi1fgrvPdtkEuw7/0wEHg6buNCubzOFuN9WYJm6FKoUHfhg==}
engines: {node: '>=14.21.3'}
cpu: [arm64]
os: [win32]
'@biomejs/cli-win32-x64@1.9.2':
resolution: {integrity: sha512-JC3XvdYcjmu1FmAehVwVV0SebLpeNTnO2ZaMdGCSOdS7f8O9Fq14T2P1gTG1Q29Q8Dt1S03hh0IdVpIZykOL8g==}
engines: {node: '>=14.21.3'}
cpu: [x64]
os: [win32]
'@esbuild/aix-ppc64@0.24.0':
resolution: {integrity: sha512-WtKdFM7ls47zkKHFVzMz8opM7LkcsIp9amDUBIAWirg70RM71WRSjdILPsY5Uv1D42ZpUfaPILDlfactHgsRkw==}
engines: {node: '>=18'}
cpu: [ppc64]
os: [aix]
'@esbuild/android-arm64@0.24.0':
resolution: {integrity: sha512-Vsm497xFM7tTIPYK9bNTYJyF/lsP590Qc1WxJdlB6ljCbdZKU9SY8i7+Iin4kyhV/KV5J2rOKsBQbB77Ab7L/w==}
engines: {node: '>=18'}
cpu: [arm64]
os: [android]
'@esbuild/android-arm@0.24.0':
resolution: {integrity: sha512-arAtTPo76fJ/ICkXWetLCc9EwEHKaeya4vMrReVlEIUCAUncH7M4bhMQ+M9Vf+FFOZJdTNMXNBrWwW+OXWpSew==}
engines: {node: '>=18'}
cpu: [arm]
os: [android]
'@esbuild/android-x64@0.24.0':
resolution: {integrity: sha512-t8GrvnFkiIY7pa7mMgJd7p8p8qqYIz1NYiAoKc75Zyv73L3DZW++oYMSHPRarcotTKuSs6m3hTOa5CKHaS02TQ==}
engines: {node: '>=18'}
cpu: [x64]
os: [android]
'@esbuild/darwin-arm64@0.24.0':
resolution: {integrity: sha512-CKyDpRbK1hXwv79soeTJNHb5EiG6ct3efd/FTPdzOWdbZZfGhpbcqIpiD0+vwmpu0wTIL97ZRPZu8vUt46nBSw==}
engines: {node: '>=18'}
cpu: [arm64]
os: [darwin]
'@esbuild/darwin-x64@0.24.0':
resolution: {integrity: sha512-rgtz6flkVkh58od4PwTRqxbKH9cOjaXCMZgWD905JOzjFKW+7EiUObfd/Kav+A6Gyud6WZk9w+xu6QLytdi2OA==}
engines: {node: '>=18'}
cpu: [x64]
os: [darwin]
'@esbuild/freebsd-arm64@0.24.0':
resolution: {integrity: sha512-6Mtdq5nHggwfDNLAHkPlyLBpE5L6hwsuXZX8XNmHno9JuL2+bg2BX5tRkwjyfn6sKbxZTq68suOjgWqCicvPXA==}
engines: {node: '>=18'}
cpu: [arm64]
os: [freebsd]
'@esbuild/freebsd-x64@0.24.0':
resolution: {integrity: sha512-D3H+xh3/zphoX8ck4S2RxKR6gHlHDXXzOf6f/9dbFt/NRBDIE33+cVa49Kil4WUjxMGW0ZIYBYtaGCa2+OsQwQ==}
engines: {node: '>=18'}
cpu: [x64]
os: [freebsd]
'@esbuild/linux-arm64@0.24.0':
resolution: {integrity: sha512-TDijPXTOeE3eaMkRYpcy3LarIg13dS9wWHRdwYRnzlwlA370rNdZqbcp0WTyyV/k2zSxfko52+C7jU5F9Tfj1g==}
engines: {node: '>=18'}
cpu: [arm64]
os: [linux]
'@esbuild/linux-arm@0.24.0':
resolution: {integrity: sha512-gJKIi2IjRo5G6Glxb8d3DzYXlxdEj2NlkixPsqePSZMhLudqPhtZ4BUrpIuTjJYXxvF9njql+vRjB2oaC9XpBw==}
engines: {node: '>=18'}
cpu: [arm]
os: [linux]
'@esbuild/linux-ia32@0.24.0':
resolution: {integrity: sha512-K40ip1LAcA0byL05TbCQ4yJ4swvnbzHscRmUilrmP9Am7//0UjPreh4lpYzvThT2Quw66MhjG//20mrufm40mA==}
engines: {node: '>=18'}
cpu: [ia32]
os: [linux]
'@esbuild/linux-loong64@0.24.0':
resolution: {integrity: sha512-0mswrYP/9ai+CU0BzBfPMZ8RVm3RGAN/lmOMgW4aFUSOQBjA31UP8Mr6DDhWSuMwj7jaWOT0p0WoZ6jeHhrD7g==}
engines: {node: '>=18'}
cpu: [loong64]
os: [linux]
'@esbuild/linux-mips64el@0.24.0':
resolution: {integrity: sha512-hIKvXm0/3w/5+RDtCJeXqMZGkI2s4oMUGj3/jM0QzhgIASWrGO5/RlzAzm5nNh/awHE0A19h/CvHQe6FaBNrRA==}
engines: {node: '>=18'}
cpu: [mips64el]
os: [linux]
'@esbuild/linux-ppc64@0.24.0':
resolution: {integrity: sha512-HcZh5BNq0aC52UoocJxaKORfFODWXZxtBaaZNuN3PUX3MoDsChsZqopzi5UupRhPHSEHotoiptqikjN/B77mYQ==}
engines: {node: '>=18'}
cpu: [ppc64]
os: [linux]
'@esbuild/linux-riscv64@0.24.0':
resolution: {integrity: sha512-bEh7dMn/h3QxeR2KTy1DUszQjUrIHPZKyO6aN1X4BCnhfYhuQqedHaa5MxSQA/06j3GpiIlFGSsy1c7Gf9padw==}
engines: {node: '>=18'}
cpu: [riscv64]
os: [linux]
'@esbuild/linux-s390x@0.24.0':
resolution: {integrity: sha512-ZcQ6+qRkw1UcZGPyrCiHHkmBaj9SiCD8Oqd556HldP+QlpUIe2Wgn3ehQGVoPOvZvtHm8HPx+bH20c9pvbkX3g==}
engines: {node: '>=18'}
cpu: [s390x]
os: [linux]
'@esbuild/linux-x64@0.24.0':
resolution: {integrity: sha512-vbutsFqQ+foy3wSSbmjBXXIJ6PL3scghJoM8zCL142cGaZKAdCZHyf+Bpu/MmX9zT9Q0zFBVKb36Ma5Fzfa8xA==}
engines: {node: '>=18'}
cpu: [x64]
os: [linux]
'@esbuild/netbsd-x64@0.24.0':
resolution: {integrity: sha512-hjQ0R/ulkO8fCYFsG0FZoH+pWgTTDreqpqY7UnQntnaKv95uP5iW3+dChxnx7C3trQQU40S+OgWhUVwCjVFLvg==}
engines: {node: '>=18'}
cpu: [x64]
os: [netbsd]
'@esbuild/openbsd-arm64@0.24.0':
resolution: {integrity: sha512-MD9uzzkPQbYehwcN583yx3Tu5M8EIoTD+tUgKF982WYL9Pf5rKy9ltgD0eUgs8pvKnmizxjXZyLt0z6DC3rRXg==}
engines: {node: '>=18'}
cpu: [arm64]
os: [openbsd]
'@esbuild/openbsd-x64@0.24.0':
resolution: {integrity: sha512-4ir0aY1NGUhIC1hdoCzr1+5b43mw99uNwVzhIq1OY3QcEwPDO3B7WNXBzaKY5Nsf1+N11i1eOfFcq+D/gOS15Q==}
engines: {node: '>=18'}
cpu: [x64]
os: [openbsd]
'@esbuild/sunos-x64@0.24.0':
resolution: {integrity: sha512-jVzdzsbM5xrotH+W5f1s+JtUy1UWgjU0Cf4wMvffTB8m6wP5/kx0KiaLHlbJO+dMgtxKV8RQ/JvtlFcdZ1zCPA==}
engines: {node: '>=18'}
cpu: [x64]
os: [sunos]
'@esbuild/win32-arm64@0.24.0':
resolution: {integrity: sha512-iKc8GAslzRpBytO2/aN3d2yb2z8XTVfNV0PjGlCxKo5SgWmNXx82I/Q3aG1tFfS+A2igVCY97TJ8tnYwpUWLCA==}
engines: {node: '>=18'}
cpu: [arm64]
os: [win32]
'@esbuild/win32-ia32@0.24.0':
resolution: {integrity: sha512-vQW36KZolfIudCcTnaTpmLQ24Ha1RjygBo39/aLkM2kmjkWmZGEJ5Gn9l5/7tzXA42QGIoWbICfg6KLLkIw6yw==}
engines: {node: '>=18'}
cpu: [ia32]
os: [win32]
'@esbuild/win32-x64@0.24.0':
resolution: {integrity: sha512-7IAFPrjSQIJrGsK6flwg7NFmwBoSTyF3rl7If0hNUFQU4ilTsEPL6GuMuU9BfIWVVGuRnuIidkSMC+c0Otu8IA==}
engines: {node: '>=18'}
cpu: [x64]
os: [win32]
'@protobufjs/aspromise@1.1.2':
resolution: {integrity: sha512-j+gKExEuLmKwvz3OgROXtrJ2UG2x8Ch2YZUxahh+s1F2HZ+wAceUNLkvy6zKCPVRkU++ZWQrdxsUeQXmcg4uoQ==}
'@protobufjs/base64@1.1.2':
resolution: {integrity: sha512-AZkcAA5vnN/v4PDqKyMR5lx7hZttPDgClv83E//FMNhR2TMcLUhfRUBHCmSl0oi9zMgDDqRUJkSxO3wm85+XLg==}
'@protobufjs/codegen@2.0.4':
resolution: {integrity: sha512-YyFaikqM5sH0ziFZCN3xDC7zeGaB/d0IUb9CATugHWbd1FRFwWwt4ld4OYMPWu5a3Xe01mGAULCdqhMlPl29Jg==}
'@protobufjs/eventemitter@1.1.0':
resolution: {integrity: sha512-j9ednRT81vYJ9OfVuXG6ERSTdEL1xVsNgqpkxMsbIabzSo3goCjDIveeGv5d03om39ML71RdmrGNjG5SReBP/Q==}
'@protobufjs/fetch@1.1.0':
resolution: {integrity: sha512-lljVXpqXebpsijW71PZaCYeIcE5on1w5DlQy5WH6GLbFryLUrBD4932W/E2BSpfRJWseIL4v/KPgBFxDOIdKpQ==}
'@protobufjs/float@1.0.2':
resolution: {integrity: sha512-Ddb+kVXlXst9d+R9PfTIxh1EdNkgoRe5tOX6t01f1lYWOvJnSPDBlG241QLzcyPdoNTsblLUdujGSE4RzrTZGQ==}
'@protobufjs/inquire@1.1.0':
resolution: {integrity: sha512-kdSefcPdruJiFMVSbn801t4vFK7KB/5gd2fYvrxhuJYg8ILrmn9SKSX2tZdV6V+ksulWqS7aXjBcRXl3wHoD9Q==}
'@protobufjs/path@1.1.2':
resolution: {integrity: sha512-6JOcJ5Tm08dOHAbdR3GrvP+yUUfkjG5ePsHYczMFLq3ZmMkAD98cDgcT2iA1lJ9NVwFd4tH/iSSoe44YWkltEA==}
'@protobufjs/pool@1.1.0':
resolution: {integrity: sha512-0kELaGSIDBKvcgS4zkjz1PeddatrjYcmMWOlAuAPwAeccUrPHdUqo/J6LiymHHEiJT5NrF1UVwxY14f+fy4WQw==}
'@protobufjs/utf8@1.1.0':
resolution: {integrity: sha512-Vvn3zZrhQZkkBE8LSuW3em98c0FwgO4nxzv6OdSxPKJIEKY2bGbHn+mhGIPerzI4twdxaP8/0+06HBpwf345Lw==}
'@types/node@22.7.4':
resolution: {integrity: sha512-y+NPi1rFzDs1NdQHHToqeiX2TIS79SWEAw9GYhkkx8bD0ChpfqC+n2j5OXOCpzfojBEBt6DnEnnG9MY0zk1XLg==}
esbuild@0.24.0:
resolution: {integrity: sha512-FuLPevChGDshgSicjisSooU0cemp/sGXR841D5LHMB7mTVOmsEHcAxaH3irL53+8YDIeVNQEySh4DaYU/iuPqQ==}
engines: {node: '>=18'}
hasBin: true
flatbuffers@1.12.0:
resolution: {integrity: sha512-c7CZADjRcl6j0PlvFy0ZqXQ67qSEZfrVPynmnL+2zPc+NtMvrF8Y0QceMo7QqnSPc7+uWjUIAbvCQ5WIKlMVdQ==}
guid-typescript@1.0.9:
resolution: {integrity: sha512-Y8T4vYhEfwJOTbouREvG+3XDsjr8E3kIr7uf+JZ0BYloFsttiHU0WfvANVsR7TxNUJa/WpCnw/Ino/p+DeBhBQ==}
long@5.2.3:
resolution: {integrity: sha512-lcHwpNoggQTObv5apGNCTdJrO69eHOZMi4BNC+rTLER8iHAqGrUVeLh/irVIM7zTw2bOXA8T6uNPeujwOLg/2Q==}
onnxruntime-common@1.19.2:
resolution: {integrity: sha512-a4R7wYEVFbZBlp0BfhpbFWqe4opCor3KM+5Wm22Az3NGDcQMiU2hfG/0MfnBs+1ZrlSGmlgWeMcXQkDk1UFb8Q==}
onnxruntime-web@1.19.2:
resolution: {integrity: sha512-r0ok6KpTUXR4WA+rHvUiZn7JoH02e8iS7XE1p5bXk7q3E0UaRFfYvpMNUHqEPiTBMuIssfBxDCQjUihV8dDFPg==}
platform@1.3.6:
resolution: {integrity: sha512-fnWVljUchTro6RiCFvCXBbNhJc2NijN7oIQxbwsyL0buWJPG85v81ehlHI9fXrJsMNgTofEoWIQeClKpgxFLrg==}
protobufjs@7.4.0:
resolution: {integrity: sha512-mRUWCc3KUU4w1jU8sGxICXH/gNS94DvI1gxqDvBzhj1JpcsimQkYiOJfwsPUykUI5ZaspFbSgmBLER8IrQ3tqw==}
engines: {node: '>=12.0.0'}
typescript@5.6.2:
resolution: {integrity: sha512-NW8ByodCSNCwZeghjN3o+JX5OFH0Ojg6sadjEKY4huZ52TqbJTJnDo5+Tw98lSy63NZvi4n+ez5m2u5d4PkZyw==}
engines: {node: '>=14.17'}
hasBin: true
undici-types@6.19.8:
resolution: {integrity: sha512-ve2KP6f/JnbPBFyobGHuerC9g1FYGn/F8n1LWTwNxCEzd6IfqTwUQcNXgEtmmQ6DlRrC1hrSrBnCZPokRrDHjw==}
snapshots:
'@biomejs/biome@1.9.2':
optionalDependencies:
'@biomejs/cli-darwin-arm64': 1.9.2
'@biomejs/cli-darwin-x64': 1.9.2
'@biomejs/cli-linux-arm64': 1.9.2
'@biomejs/cli-linux-arm64-musl': 1.9.2
'@biomejs/cli-linux-x64': 1.9.2
'@biomejs/cli-linux-x64-musl': 1.9.2
'@biomejs/cli-win32-arm64': 1.9.2
'@biomejs/cli-win32-x64': 1.9.2
'@biomejs/cli-darwin-arm64@1.9.2':
optional: true
'@biomejs/cli-darwin-x64@1.9.2':
optional: true
'@biomejs/cli-linux-arm64-musl@1.9.2':
optional: true
'@biomejs/cli-linux-arm64@1.9.2':
optional: true
'@biomejs/cli-linux-x64-musl@1.9.2':
optional: true
'@biomejs/cli-linux-x64@1.9.2':
optional: true
'@biomejs/cli-win32-arm64@1.9.2':
optional: true
'@biomejs/cli-win32-x64@1.9.2':
optional: true
'@esbuild/aix-ppc64@0.24.0':
optional: true
'@esbuild/android-arm64@0.24.0':
optional: true
'@esbuild/android-arm@0.24.0':
optional: true
'@esbuild/android-x64@0.24.0':
optional: true
'@esbuild/darwin-arm64@0.24.0':
optional: true
'@esbuild/darwin-x64@0.24.0':
optional: true
'@esbuild/freebsd-arm64@0.24.0':
optional: true
'@esbuild/freebsd-x64@0.24.0':
optional: true
'@esbuild/linux-arm64@0.24.0':
optional: true
'@esbuild/linux-arm@0.24.0':
optional: true
'@esbuild/linux-ia32@0.24.0':
optional: true
'@esbuild/linux-loong64@0.24.0':
optional: true
'@esbuild/linux-mips64el@0.24.0':
optional: true
'@esbuild/linux-ppc64@0.24.0':
optional: true
'@esbuild/linux-riscv64@0.24.0':
optional: true
'@esbuild/linux-s390x@0.24.0':
optional: true
'@esbuild/linux-x64@0.24.0':
optional: true
'@esbuild/netbsd-x64@0.24.0':
optional: true
'@esbuild/openbsd-arm64@0.24.0':
optional: true
'@esbuild/openbsd-x64@0.24.0':
optional: true
'@esbuild/sunos-x64@0.24.0':
optional: true
'@esbuild/win32-arm64@0.24.0':
optional: true
'@esbuild/win32-ia32@0.24.0':
optional: true
'@esbuild/win32-x64@0.24.0':
optional: true
'@protobufjs/aspromise@1.1.2': {}
'@protobufjs/base64@1.1.2': {}
'@protobufjs/codegen@2.0.4': {}
'@protobufjs/eventemitter@1.1.0': {}
'@protobufjs/fetch@1.1.0':
dependencies:
'@protobufjs/aspromise': 1.1.2
'@protobufjs/inquire': 1.1.0
'@protobufjs/float@1.0.2': {}
'@protobufjs/inquire@1.1.0': {}
'@protobufjs/path@1.1.2': {}
'@protobufjs/pool@1.1.0': {}
'@protobufjs/utf8@1.1.0': {}
'@types/node@22.7.4':
dependencies:
undici-types: 6.19.8
esbuild@0.24.0:
optionalDependencies:
'@esbuild/aix-ppc64': 0.24.0
'@esbuild/android-arm': 0.24.0
'@esbuild/android-arm64': 0.24.0
'@esbuild/android-x64': 0.24.0
'@esbuild/darwin-arm64': 0.24.0
'@esbuild/darwin-x64': 0.24.0
'@esbuild/freebsd-arm64': 0.24.0
'@esbuild/freebsd-x64': 0.24.0
'@esbuild/linux-arm': 0.24.0
'@esbuild/linux-arm64': 0.24.0
'@esbuild/linux-ia32': 0.24.0
'@esbuild/linux-loong64': 0.24.0
'@esbuild/linux-mips64el': 0.24.0
'@esbuild/linux-ppc64': 0.24.0
'@esbuild/linux-riscv64': 0.24.0
'@esbuild/linux-s390x': 0.24.0
'@esbuild/linux-x64': 0.24.0
'@esbuild/netbsd-x64': 0.24.0
'@esbuild/openbsd-arm64': 0.24.0
'@esbuild/openbsd-x64': 0.24.0
'@esbuild/sunos-x64': 0.24.0
'@esbuild/win32-arm64': 0.24.0
'@esbuild/win32-ia32': 0.24.0
'@esbuild/win32-x64': 0.24.0
flatbuffers@1.12.0: {}
guid-typescript@1.0.9: {}
long@5.2.3: {}
onnxruntime-common@1.19.2: {}
onnxruntime-web@1.19.2:
dependencies:
flatbuffers: 1.12.0
guid-typescript: 1.0.9
long: 5.2.3
onnxruntime-common: 1.19.2
platform: 1.3.6
protobufjs: 7.4.0
platform@1.3.6: {}
protobufjs@7.4.0:
dependencies:
'@protobufjs/aspromise': 1.1.2
'@protobufjs/base64': 1.1.2
'@protobufjs/codegen': 2.0.4
'@protobufjs/eventemitter': 1.1.0
'@protobufjs/fetch': 1.1.0
'@protobufjs/float': 1.0.2
'@protobufjs/inquire': 1.1.0
'@protobufjs/path': 1.1.2
'@protobufjs/pool': 1.1.0
'@protobufjs/utf8': 1.1.0
'@types/node': 22.7.4
long: 5.2.3
typescript@5.6.2: {}
undici-types@6.19.8: {}

106
sbv2_wasm/src-js/index.ts Normal file
View File

@@ -0,0 +1,106 @@
import * as wasm from "../pkg/sbv2_wasm.js";
import { InferenceSession, Tensor } from "onnxruntime-web";
export class ModelHolder {
private models: Map<string, [InferenceSession, wasm.StyleVectorWrap]> =
new Map();
constructor(
private tok: wasm.TokenizerWrap,
private deberta: InferenceSession,
) {}
public static async globalInit(buf: ArrayBufferLike) {
await wasm.default(buf);
}
public static async create(tok: string, deberta: ArrayBufferLike) {
return new ModelHolder(
await wasm.load_tokenizer(tok),
await InferenceSession.create(deberta, {
executionProviders: ["webnn", "webgpu", "webgl", "wasm", "cpu"],
graphOptimizationLevel: "all",
}),
);
}
public async synthesize(
name: string,
text: string,
style_id: number = 0,
style_weight: number = 1.0,
sdp_ratio: number = 0.4,
speed: number = 1.0,
) {
const mod = this.models.get(name);
if (!mod) throw new Error(`No model named ${name}`);
const [vits2, style] = mod;
return wasm.synthesize(
text,
this.tok,
async (a: BigInt64Array, b: BigInt64Array) => {
try {
const res = (
await this.deberta.run({
input_ids: new Tensor("int64", a, [1, a.length]),
attention_mask: new Tensor("int64", b, [1, b.length]),
})
)["output"];
return [new Uint32Array(res.dims), await res.getData(true)];
} catch (e) {
console.warn(e);
throw e;
}
},
async (
[a_shape, a_array]: any,
b_d: any,
c_d: any,
d_d: any,
e_d: any,
f: number,
g: number,
) => {
try {
const a = new Tensor("float32", a_array, [1, ...a_shape]);
const b = new Tensor("int64", b_d, [1, b_d.length]);
const c = new Tensor("int64", c_d, [1, c_d.length]);
const d = new Tensor("int64", d_d, [1, d_d.length]);
const e = new Tensor("float32", e_d, [1, e_d.length]);
const res = (
await vits2.run({
x_tst: b,
x_tst_lengths: new Tensor("int64", [b_d.length]),
sid: new Tensor("int64", [0]),
tones: c,
language: d,
bert: a,
style_vec: e,
sdp_ratio: new Tensor("float32", [f]),
length_scale: new Tensor("float32", [g]),
})
).output;
return [new Uint32Array(res.dims), await res.getData(true)];
} catch (e) {
console.warn(e);
throw e;
}
},
sdp_ratio,
1.0 / speed,
style_id,
style_weight,
style,
);
}
public async load(name: string, b: Uint8Array) {
const [style, vits2_b] = wasm.load_sbv2file(b);
const vits2 = await InferenceSession.create(vits2_b as Uint8Array, {
executionProviders: ["webnn", "webgpu", "webgl", "wasm", "cpu"],
graphOptimizationLevel: "all",
});
this.models.set(name, [vits2, style]);
}
public async unload(name: string) {
return this.models.delete(name);
}
public modelList() {
return this.models.keys();
}
}

View File

@@ -0,0 +1,102 @@
pub fn vec8_to_array8(v: Vec<u8>) -> js_sys::Uint8Array {
let arr = js_sys::Uint8Array::new_with_length(v.len() as u32);
arr.copy_from(&v);
arr
}
pub fn vec_f32_to_array_f32(v: Vec<f32>) -> js_sys::Float32Array {
let arr = js_sys::Float32Array::new_with_length(v.len() as u32);
arr.copy_from(&v);
arr
}
pub fn array8_to_vec8(buf: js_sys::Uint8Array) -> Vec<u8> {
let mut body = vec![0; buf.length() as usize];
buf.copy_to(&mut body[..]);
body
}
pub fn vec64_to_array64(v: Vec<i64>) -> js_sys::BigInt64Array {
let arr = js_sys::BigInt64Array::new_with_length(v.len() as u32);
arr.copy_from(&v);
arr
}
pub fn vec_to_array(v: Vec<wasm_bindgen::JsValue>) -> js_sys::Array {
let arr = js_sys::Array::new_with_length(v.len() as u32);
for (i, v) in v.into_iter().enumerate() {
arr.set(i as u32, v);
}
arr
}
struct A {
shape: Vec<u32>,
data: Vec<f32>,
}
impl TryFrom<wasm_bindgen::JsValue> for A {
type Error = sbv2_core::error::Error;
fn try_from(value: wasm_bindgen::JsValue) -> Result<Self, Self::Error> {
let value: js_sys::Array = value.into();
let mut shape = vec![];
let mut data = vec![];
for (i, v) in value.iter().enumerate() {
match i {
0 => {
let v: js_sys::Uint32Array = v.into();
shape = vec![0; v.length() as usize];
v.copy_to(&mut shape);
}
1 => {
let v: js_sys::Float32Array = v.into();
data = vec![0.0; v.length() as usize];
v.copy_to(&mut data);
}
_ => {}
};
}
Ok(A { shape, data })
}
}
pub fn array_to_array2_f32(
a: wasm_bindgen::JsValue,
) -> sbv2_core::error::Result<ndarray::Array2<f32>> {
let a = A::try_from(a)?;
if a.shape.len() != 2 {
return Err(sbv2_core::error::Error::OtherError(
"Length mismatch".to_string(),
));
}
let shape = [a.shape[0] as usize, a.shape[1] as usize];
let arr = ndarray::Array2::from_shape_vec(shape, a.data.to_vec())
.map_err(|e| sbv2_core::error::Error::OtherError(e.to_string()))?;
Ok(arr)
}
pub fn array_to_array3_f32(
a: wasm_bindgen::JsValue,
) -> sbv2_core::error::Result<ndarray::Array3<f32>> {
let a = A::try_from(a)?;
if a.shape.len() != 3 {
return Err(sbv2_core::error::Error::OtherError(
"Length mismatch".to_string(),
));
}
let shape = [
a.shape[0] as usize,
a.shape[1] as usize,
a.shape[2] as usize,
];
let arr = ndarray::Array3::from_shape_vec(shape, a.data.to_vec())
.map_err(|e| sbv2_core::error::Error::OtherError(e.to_string()))?;
Ok(arr)
}
pub fn array2_f32_to_array(a: ndarray::Array2<f32>) -> js_sys::Array {
let shape: Vec<wasm_bindgen::JsValue> = a.shape().iter().map(|f| (*f as u32).into()).collect();
let typed_array = js_sys::Float32Array::new_with_length(a.len() as u32);
typed_array.copy_from(&a.into_flat().to_vec());
vec_to_array(vec![vec_to_array(shape).into(), typed_array.into()])
}

View File

@@ -1,6 +1,8 @@
use once_cell::sync::Lazy;
use sbv2_core::*;
use wasm_bindgen::prelude::*;
use wasm_bindgen_futures::JsFuture;
mod array_helper;
static JTALK: Lazy<jtalk::JTalk> = Lazy::new(|| jtalk::JTalk::new().unwrap());
@@ -28,25 +30,20 @@ pub struct StyleVectorWrap {
#[wasm_bindgen]
pub fn load_sbv2file(buf: js_sys::Uint8Array) -> Result<js_sys::Array, JsError> {
let mut body = vec![0; buf.length() as usize];
buf.copy_to(&mut body[..]);
let (style_vectors, vits2) = sbv2file::parse_sbv2file(body)?;
let buf = js_sys::Uint8Array::new_with_length(vits2.len() as u32);
buf.copy_from(&vits2);
let arr = js_sys::Array::new_with_length(2);
arr.set(
0,
let (style_vectors, vits2) = sbv2file::parse_sbv2file(array_helper::array8_to_vec8(buf))?;
let buf = array_helper::vec8_to_array8(vits2);
Ok(array_helper::vec_to_array(vec![
StyleVectorWrap {
style_vector: style::load_style(style_vectors)?,
}
.into(),
);
arr.set(1, buf.into());
Ok(arr)
buf.into(),
]))
}
#[allow(clippy::too_many_arguments)]
#[wasm_bindgen]
pub fn synthesize(
pub async fn synthesize(
text: &str,
tokenizer: &TokenizerWrap,
bert_predict_fn: js_sys::Function,
@@ -57,39 +54,61 @@ pub fn synthesize(
style_weight: f32,
style_vectors: &StyleVectorWrap,
) -> Result<js_sys::Uint8Array, JsError> {
fn synthesize_wrap(
bert_ori: ndarray::Array2<f32>,
x_tst: ndarray::Array1<i64>,
tones: ndarray::Array1<i64>,
lang_ids: ndarray::Array1<i64>,
style_vector: ndarray::Array1<f32>,
sdp_ratio: f32,
length_scale: f32,
) -> error::Result<ndarray::Array3<f32>> {
todo!()
}
let synthesize_wrap = |bert_ori: ndarray::Array2<f32>,
x_tst: ndarray::Array1<i64>,
tones: ndarray::Array1<i64>,
lang_ids: ndarray::Array1<i64>,
style_vector: ndarray::Array1<f32>,
sdp_ratio: f32,
length_scale: f32| async move {
let arr = array_helper::vec_to_array(vec![
array_helper::array2_f32_to_array(bert_ori).into(),
array_helper::vec64_to_array64(x_tst.to_vec()).into(),
array_helper::vec64_to_array64(tones.to_vec()).into(),
array_helper::vec64_to_array64(lang_ids.to_vec()).into(),
array_helper::vec_f32_to_array_f32(style_vector.to_vec()).into(),
sdp_ratio.into(),
length_scale.into(),
]);
let res = synthesize_fn
.apply(&js_sys::Object::new().into(), &arr)
.map_err(|e| {
error::Error::OtherError(e.as_string().unwrap_or("unknown".to_string()))
})?;
let res = JsFuture::from(Into::<js_sys::Promise>::into(res))
.await
.map_err(|e| {
sbv2_core::error::Error::OtherError(e.as_string().unwrap_or("unknown".to_string()))
})?;
array_helper::array_to_array3_f32(res)
};
let (bert_ori, phones, tones, lang_ids) = tts_util::parse_text(
text,
&JTALK,
&tokenizer.tokenizer,
|token_ids: Vec<i64>, attention_masks: Vec<i64>| {
let token_ids_ = js_sys::BigInt64Array::new_with_length(token_ids.len() as u32);
token_ids_.copy_from(&token_ids);
let attention_masks_ =
js_sys::BigInt64Array::new_with_length(attention_masks.len() as u32);
attention_masks_.copy_from(&attention_masks);
let arr = js_sys::Array::new_with_length(2);
arr.set(0, token_ids_.into());
arr.set(1, attention_masks_.into());
let res = bert_predict_fn
.apply(&js_sys::Object::new().into(), &arr)
.map_err(|e| {
error::Error::OtherError(e.as_string().unwrap_or("unknown".to_string()))
})?;
let res: js_sys::Array = res.into();
Ok(todo!())
Box::pin(async move {
let arr = array_helper::vec_to_array(vec![
array_helper::vec64_to_array64(token_ids).into(),
array_helper::vec64_to_array64(attention_masks).into(),
]);
let res = bert_predict_fn
.apply(&js_sys::Object::new().into(), &arr)
.map_err(|e| {
error::Error::OtherError(e.as_string().unwrap_or("unknown".to_string()))
})?;
let res = JsFuture::from(Into::<js_sys::Promise>::into(res))
.await
.map_err(|e| {
sbv2_core::error::Error::OtherError(
e.as_string().unwrap_or("unknown".to_string()),
)
})?;
array_helper::array_to_array2_f32(res)
})
},
)?;
)
.await?;
let audio = synthesize_wrap(
bert_ori.to_owned(),
phones,
@@ -98,9 +117,7 @@ pub fn synthesize(
style::get_style_vector(&style_vectors.style_vector, style_id, style_weight)?,
sdp_ratio,
length_scale,
)?;
let vec = tts_util::array_to_vec(audio)?;
let buf = js_sys::Uint8Array::new_with_length(vec.len() as u32);
buf.copy_from(&vec);
Ok(buf)
)
.await?;
Ok(array_helper::vec8_to_array8(tts_util::array_to_vec(audio)?))
}

15
sbv2_wasm/tsconfig.json Normal file
View File

@@ -0,0 +1,15 @@
{
"compilerOptions": {
"target": "ESNext",
"module": "ESNext",
"rootDir": "./src-js",
"outDir": "./dist",
"moduleResolution": "node",
"esModuleInterop": true,
"forceConsistentCasingInFileNames": true,
"strict": true,
"skipLibCheck": true,
"declaration": true,
"emitDeclarationOnly": true
}
}