Compare commits

...

44 Commits

Author SHA1 Message Date
コマリン親衛隊
5e500b2c42 Support arm64 2024-09-22 19:12:29 +09:00
コマリン親衛隊
136375e5b6 Merge pull request #48 from tuna2134/renovate/pyo3-0.x-lockfile
fix(deps): update rust crate pyo3 to v0.22.3
2024-09-22 18:56:40 +09:00
tuna2134
aade119ddb add stripe 2024-09-22 08:05:48 +00:00
tuna2134
55cedb2f6d fix dists path 2024-09-22 07:48:53 +00:00
tuna2134
f2940f4ebe bump version 2024-09-22 07:41:58 +00:00
tuna2134
96a5ab0672 fix returns type 2024-09-22 07:40:46 +00:00
tuna2134
64cbd151a6 change to openid upload 2024-09-22 07:36:03 +00:00
tuna2134
3103fcef17 fix bug 2024-09-22 07:25:48 +00:00
tuna2134
dd8ae77edc fix bug 2024-09-22 07:24:37 +00:00
tuna2134
ee4c4ab8ad use easy function 2024-09-22 07:22:26 +00:00
コマリン親衛隊
79120e4aee Fix 2024-09-22 16:17:08 +09:00
コマリン親衛隊
c947df2105 Unsupport musllinux 2024-09-22 16:16:25 +09:00
tuna2134
dcbb19fcdd fixed 2024-09-22 07:13:26 +00:00
tuna2134
b5601410f8 Merge branch 'main' of https://github.com/tuna2134/sbv2-api 2024-09-22 07:10:30 +00:00
tuna2134
a3160ea2e8 add 2024-09-22 07:10:28 +00:00
コマリン親衛隊
1a978c3fe3 Create pull_request_template.md 2024-09-22 15:23:43 +09:00
コマリン親衛隊
5837b66759 Merge pull request #53 from Googlefan256/main
Build optimization and convert model onnx
2024-09-22 15:15:44 +09:00
Googlefan
962fa9a49d fix: build optimization, convert model onnx 2024-09-22 06:00:57 +00:00
コマリン親衛隊
290fb37c16 Merge pull request #52 from tuna2134/docs
クレートのdocsの充実化
2024-09-22 11:01:56 +09:00
tuna2134
0c926751a4 bump version 2024-09-22 02:01:13 +00:00
tuna2134
da86aa811d Merge branch 'main' of https://github.com/tuna2134/sbv2-api into docs 2024-09-22 01:54:19 +00:00
tuna2134
4e0edaebcd docsの充実化 2024-09-22 01:54:06 +00:00
tuna2134
1d7d65ae21 add support arm64(maybe) 2024-09-22 01:16:04 +00:00
tuna2134
3112e3e8ec added new todo 2024-09-22 01:01:53 +00:00
コマリン親衛隊
5724251fb5 Merge pull request #50 from kamakiri1192/feature/apple-silicon-mac
feat: Added explanation for Apple Silicon Mac
2024-09-22 09:58:35 +09:00
コマリン親衛隊
3b1182f07d Merge pull request #49 from tuna2134/renovate/axum-0.x-lockfile
Update Rust crate axum to v0.7.6
2024-09-22 09:34:15 +09:00
kamakiri1192
4ed463b05b feat: Added explanation for Apple Silicon Mac 2024-09-21 16:05:47 +09:00
renovate[bot]
c641bc7529 Update Rust crate axum to v0.7.6 2024-09-20 20:05:48 +00:00
tuna2134
be0370a2f9 format 2024-09-17 10:55:45 +00:00
tuna2134
ec3e412ca0 fix bug 2024-09-17 10:55:27 +00:00
tuna2134
ccad71c564 use easy synthesize function 2024-09-17 10:44:18 +00:00
コマリン親衛隊
b9ea462497 Merge pull request #47 from tuna2134/renovate/anyhow-1.x-lockfile
Update Rust crate anyhow to v1.0.89
2024-09-17 10:54:18 +09:00
renovate[bot]
0bb3c5b8ea Update Rust crate pyo3 to v0.22.3 2024-09-16 09:25:40 +00:00
renovate[bot]
b21d425733 Update Rust crate anyhow to v1.0.89 2024-09-16 09:25:35 +00:00
コマリン親衛隊
dc347fd5b3 Merge pull request #46 from tuna2134/renovate/anyhow-1.x-lockfile 2024-09-15 15:36:09 +09:00
renovate[bot]
d8d8c82deb Update Rust crate anyhow to v1.0.89 2024-09-15 04:42:37 +00:00
コマリン親衛隊
e70b8e51d2 Merge pull request #45 from tuna2134/number
数字を読めれるように
2024-09-14 12:28:47 +09:00
tuna2134
38f9d98d1a fix bug 2024-09-14 03:25:53 +00:00
コマリン親衛隊
16725552bf Merge pull request #44 from tuna2134/tuna2134-patch-1
Fix Japanese to english
2024-09-14 07:31:25 +09:00
コマリン親衛隊
24bb626282 Rename 機能追加.md to feature_request.md 2024-09-14 07:28:47 +09:00
コマリン親衛隊
cc1f704e0b Rename バグの報告.md to bug_report.md 2024-09-14 07:28:21 +09:00
コマリン親衛隊
d5b2c4842e Update issue templates 2024-09-13 21:02:14 +09:00
コマリン親衛隊
46649fad18 Update issue templates 2024-09-13 20:59:14 +09:00
tuna2134
9e4e098170 fix private to public 2024-09-13 11:36:24 +00:00
19 changed files with 366 additions and 74 deletions

28
.github/ISSUE_TEMPLATE/bug_report.md vendored Normal file
View File

@@ -0,0 +1,28 @@
---
name: バグの報告
about: バグを報告する場所です。
title: ''
labels: bug
assignees: ''
---
**バグの説明**
バグのエラーを張ってください
**再現する方法**
どのようにバグが発生したか時系列でまとめてください。
**本来の挙動**
本来動作すべきことについて簡潔にまとめてください。
**スクリーンショット**
もしもあるならでいいです。
**端末の情報**
- OS: [e.g. Linux]
**コード**
```rs
ここにコード貼ってください
```

View File

@@ -0,0 +1,17 @@
---
name: 機能追加
about: 機能追加してほしい場合これで作ってください。
title: ''
labels: enhancement
assignees: ''
---
**機能追加の説明**
ここで追加される機能の説明してください。
**メリット**
ここにメリットを書いてください。
**デメリット**
ここにデメリットを書いてください。

8
.github/pull_request_template.md vendored Normal file
View File

@@ -0,0 +1,8 @@
## 概要
(ここに本PRの説明をしてください。)
## 関連issue
(ここに該当するissueの番号を書いてください。)
## 確認
- [ ] 動作確認しましたか?

132
.github/workflows/CI.yml vendored Normal file
View File

@@ -0,0 +1,132 @@
# This file is autogenerated by maturin v1.7.1
# To update, run
#
# maturin generate-ci github
#
name: CI
on:
push:
branches:
- main
- master
tags:
- '*'
pull_request:
workflow_dispatch:
permissions:
contents: read
id-token: write
jobs:
linux:
runs-on: ${{ matrix.platform.runner }}
strategy:
matrix:
platform:
- runner: ubuntu-latest
target: x86_64
- runner: ubuntu-latest
target: aarch64
steps:
- uses: actions/checkout@v4
- uses: actions/setup-python@v5
with:
python-version: 3.x
- name: Build wheels
uses: PyO3/maturin-action@v1
with:
target: ${{ matrix.platform.target }}
args: --release --out dist --find-interpreter
sccache: 'true'
manylinux: auto
working-directory: sbv2_bindings
- name: Upload wheels
uses: actions/upload-artifact@v4
with:
name: wheels-linux-${{ matrix.platform.target }}
path: sbv2_bindings/dist
windows:
runs-on: ${{ matrix.platform.runner }}
strategy:
matrix:
platform:
- runner: windows-latest
target: x64
steps:
- uses: actions/checkout@v4
- uses: actions/setup-python@v5
with:
python-version: 3.x
architecture: ${{ matrix.platform.target }}
- name: Build wheels
uses: PyO3/maturin-action@v1
with:
target: ${{ matrix.platform.target }}
args: --release --out dist --find-interpreter
sccache: 'true'
working-directory: sbv2_bindings
- name: Upload wheels
uses: actions/upload-artifact@v4
with:
name: wheels-windows-${{ matrix.platform.target }}
path: sbv2_bindings/dist
macos:
runs-on: ${{ matrix.platform.runner }}
strategy:
matrix:
platform:
- runner: macos-12
target: x86_64
- runner: macos-14
target: aarch64
steps:
- uses: actions/checkout@v4
- uses: actions/setup-python@v5
with:
python-version: 3.x
- name: Build wheels
uses: PyO3/maturin-action@v1
with:
target: ${{ matrix.platform.target }}
args: --release --out dist --find-interpreter
sccache: 'true'
working-directory: sbv2_bindings
- name: Upload wheels
uses: actions/upload-artifact@v4
with:
name: wheels-macos-${{ matrix.platform.target }}
path: sbv2_bindings/dist
sdist:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- name: Build sdist
uses: PyO3/maturin-action@v1
with:
command: sdist
args: --out dist
working-directory: sbv2_bindings
- name: Upload sdist
uses: actions/upload-artifact@v4
with:
name: wheels-sdist
path: sbv2_bindings/dist
release:
name: Release
runs-on: ubuntu-latest
if: "startsWith(github.ref, 'refs/tags/')"
needs: [linux, windows, macos, sdist]
environment: release
steps:
- uses: actions/download-artifact@v4
- name: Publish to PyPI
uses: PyO3/maturin-action@v1
with:
command: upload
args: --non-interactive --skip-existing wheels-*/*

View File

@@ -14,6 +14,9 @@ jobs:
strategy:
matrix:
tag: [cpu, cuda]
platform:
- linux/amd64
- linux/arm64
steps:
- uses: actions/checkout@v4
- name: Set up QEMU
@@ -33,4 +36,5 @@ jobs:
push: true
tags: |
ghcr.io/${{ github.repository }}:${{ matrix.tag }}
file: docker/${{ matrix.tag }}.Dockerfile
file: docker/${{ matrix.tag }}.Dockerfile
platforms: ${{ matrix.platform }}

61
Cargo.lock generated
View File

@@ -77,9 +77,9 @@ dependencies = [
[[package]]
name = "anyhow"
version = "1.0.87"
version = "1.0.89"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "10f00e1f6e58a40e807377c75c6a7f97bf9044fab57816f2414e6f5f4499d7b8"
checksum = "86fdf8605db99b54d3cd748a44c6d04df638eb5dafb219b135d0149bd0db01f6"
[[package]]
name = "async-trait"
@@ -100,9 +100,9 @@ checksum = "0c4b4d0bd25bd0b74681c0ad21497610ce1b7c91b1022cd21c80c6fbdd9476b0"
[[package]]
name = "axum"
version = "0.7.5"
version = "0.7.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3a6c9af12842a67734c9a2e355436e5d03b22383ed60cf13cd0c18fbfe3dcbcf"
checksum = "8f43644eed690f5374f1af436ecd6aea01cd201f6fbdf0178adaf6907afb2cec"
dependencies = [
"async-trait",
"axum-core",
@@ -126,7 +126,7 @@ dependencies = [
"serde_urlencoded",
"sync_wrapper 1.0.1",
"tokio",
"tower",
"tower 0.5.1",
"tower-layer",
"tower-service",
"tracing",
@@ -134,9 +134,9 @@ dependencies = [
[[package]]
name = "axum-core"
version = "0.4.3"
version = "0.4.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a15c63fd72d41492dc4f497196f5da1fb04fb7529e631d73630d1b491e47a2e3"
checksum = "5e6b8ba012a258d63c9adfa28b9ddcf66149da6f986c5b5452e629d5ee64bf00"
dependencies = [
"async-trait",
"bytes",
@@ -147,7 +147,7 @@ dependencies = [
"mime",
"pin-project-lite",
"rustversion",
"sync_wrapper 0.1.2",
"sync_wrapper 1.0.1",
"tower-layer",
"tower-service",
"tracing",
@@ -771,6 +771,8 @@ dependencies = [
"hyper",
"pin-project-lite",
"tokio",
"tower 0.4.13",
"tower-service",
]
[[package]]
@@ -1361,9 +1363,9 @@ dependencies = [
[[package]]
name = "once_cell"
version = "1.19.0"
version = "1.20.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3fdb12b2476b595f9358c5161aa467c2438859caa136dec86c26fdd2efe17b92"
checksum = "33ea5043e58958ee56f3e15a90aee535795cd7dfd319846288d93c5b57d85cbe"
[[package]]
name = "onig"
@@ -1561,9 +1563,9 @@ dependencies = [
[[package]]
name = "pyo3"
version = "0.22.2"
version = "0.22.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "831e8e819a138c36e212f3af3fd9eeffed6bf1510a805af35b0edee5ffa59433"
checksum = "15ee168e30649f7f234c3d49ef5a7a6cbf5134289bc46c29ff3155fa3221c225"
dependencies = [
"anyhow",
"cfg-if",
@@ -1580,9 +1582,9 @@ dependencies = [
[[package]]
name = "pyo3-build-config"
version = "0.22.2"
version = "0.22.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1e8730e591b14492a8945cdff32f089250b05f5accecf74aeddf9e8272ce1fa8"
checksum = "e61cef80755fe9e46bb8a0b8f20752ca7676dcc07a5277d8b7768c6172e529b3"
dependencies = [
"once_cell",
"target-lexicon",
@@ -1590,9 +1592,9 @@ dependencies = [
[[package]]
name = "pyo3-ffi"
version = "0.22.2"
version = "0.22.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5e97e919d2df92eb88ca80a037969f44e5e70356559654962cbb3316d00300c6"
checksum = "67ce096073ec5405f5ee2b8b31f03a68e02aa10d5d4f565eca04acc41931fa1c"
dependencies = [
"libc",
"pyo3-build-config",
@@ -1600,9 +1602,9 @@ dependencies = [
[[package]]
name = "pyo3-macros"
version = "0.22.2"
version = "0.22.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "eb57983022ad41f9e683a599f2fd13c3664d7063a3ac5714cae4b7bee7d3f206"
checksum = "2440c6d12bc8f3ae39f1e775266fa5122fd0c8891ce7520fa6048e683ad3de28"
dependencies = [
"proc-macro2",
"pyo3-macros-backend",
@@ -1612,9 +1614,9 @@ dependencies = [
[[package]]
name = "pyo3-macros-backend"
version = "0.22.2"
version = "0.22.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ec480c0c51ddec81019531705acac51bcdbeae563557c982aa8263bb96880372"
checksum = "1be962f0e06da8f8465729ea2cb71a416d2257dff56cbe40a70d3e62a93ae5d1"
dependencies = [
"heck",
"proc-macro2",
@@ -1831,7 +1833,7 @@ dependencies = [
[[package]]
name = "sbv2_bindings"
version = "0.1.0"
version = "0.1.1"
dependencies = [
"anyhow",
"ndarray",
@@ -1841,7 +1843,7 @@ dependencies = [
[[package]]
name = "sbv2_core"
version = "0.1.3"
version = "0.1.4"
dependencies = [
"anyhow",
"dotenvy",
@@ -2181,6 +2183,21 @@ dependencies = [
"tokio",
"tower-layer",
"tower-service",
]
[[package]]
name = "tower"
version = "0.5.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2873938d487c3cfb9aed7546dc9f2711d867c9f90c46b889989a2cb84eba6b4f"
dependencies = [
"futures-core",
"futures-util",
"pin-project-lite",
"sync_wrapper 0.1.2",
"tokio",
"tower-layer",
"tower-service",
"tracing",
]

View File

@@ -6,4 +6,9 @@ members = ["sbv2_api", "sbv2_core", "sbv2_bindings"]
anyhow = "1.0.86"
dotenvy = "0.15.7"
env_logger = "0.11.5"
ndarray = "0.16.1"
ndarray = "0.16.1"
[profile.release]
lto = true
debug = false
strip = true

View File

@@ -27,6 +27,7 @@ JP-Extra しか対応していません。(基本的に対応する予定もあ
- [x] GPU 対応(DirectML)
- [x] GPU 対応(CoreML)
- [ ] WASM 変換(依存ライブラリの関係により現在は不可)
- [x] arm64のdockerサポート
## 構造説明
@@ -57,6 +58,22 @@ docker run -it --rm -p 3000:3000 --name sbv2 \
ghcr.io/tuna2134/sbv2-api:cpu
```
<details>
<summary>Apple Silicon搭載のMac(M1以降)の場合</summary>
docker上で動作させる場合、.envのADDRをlocalhostから0.0.0.0に変更してください。
```yaml
ADDR=0.0.0.0:3000
```
CPUの場合は
```bash
docker run --platform linux/amd64 -it --rm -p 3000:3000 --name sbv2 \
-v ./models:/work/models --env-file .env \
ghcr.io/tuna2134/sbv2-api:cpu
```
</details>
CUDAの場合は
```sh
docker run -it --rm -p 3000:3000 --name sbv2 \

View File

@@ -1,7 +1 @@
日本語を母国語としない人々にとって、「日本語は非常に難しい言語である」と言われています。
その理由として、
・漢字、ひらがな、カタカナ、と表記が何種類もある。
・同一の漢字でも音読みと訓読みがある
・地名の読みが難しい
・主語、述語が省略される
などが挙げられます。
10,000年前までコロナが流行っていました

View File

@@ -126,11 +126,13 @@ torch.onnx.export(
f"../models/model_{out_name}.onnx",
verbose=True,
dynamic_axes={
"x_tst": {1: "batch_size"},
"x_tst": {0: "batch_size", 1: "x_tst_max_length"},
"x_tst_lengths": {0: "batch_size"},
"tones": {1: "batch_size"},
"language": {1: "batch_size"},
"bert": {2: "batch_size"},
"sid": {0: "batch_size"},
"tones": {0: "batch_size", 1: "x_tst_max_length"},
"language": {0: "batch_size", 1: "x_tst_max_length"},
"bert": {0: "batch_size", 2: "x_tst_max_length"},
"style_vec": {0: "batch_size"},
},
input_names=[
"x_tst",

View File

@@ -5,7 +5,7 @@ use axum::{
routing::{get, post},
Json, Router,
};
use sbv2_core::tts::TTSModelHolder;
use sbv2_core::tts::{SynthesizeOptions, TTSModelHolder};
use serde::Deserialize;
use std::env;
use std::sync::Arc;
@@ -49,17 +49,15 @@ async fn synthesize(
log::debug!("processing request: text={text}, ident={ident}, sdp_ratio={sdp_ratio}, length_scale={length_scale}");
let buffer = {
let tts_model = state.tts_model.lock().await;
let (bert_ori, phones, tones, lang_ids) = tts_model.parse_text(&text)?;
let style_vector = tts_model.get_style_vector(&ident, 0, 1.0)?;
tts_model.synthesize(
ident,
bert_ori.to_owned(),
phones,
tones,
lang_ids,
style_vector,
sdp_ratio,
length_scale,
tts_model.easy_synthesize(
&ident,
&text,
0,
SynthesizeOptions {
sdp_ratio,
length_scale,
..Default::default()
},
)?
};
Ok(([(CONTENT_TYPE, "audio/wav")], buffer))

View File

@@ -1,6 +1,6 @@
[package]
name = "sbv2_bindings"
version = "0.1.0"
version = "0.1.1"
edition = "2021"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
@@ -12,4 +12,4 @@ crate-type = ["cdylib"]
anyhow.workspace = true
ndarray.workspace = true
pyo3 = { version = "0.22.0", features = ["anyhow"] }
sbv2_core = { version = "0.1.3", path = "../sbv2_core" }
sbv2_core = { version = "0.1.4", path = "../sbv2_core" }

View File

@@ -8,11 +8,13 @@ def main():
model.load_sbv2file_from_path("amitaro", "../models/amitaro.sbv2")
print("All setup is done!")
style_vector = model.get_style_vector("amitaro", 0, 1.0)
with open("output.wav", "wb") as f:
f.write(model.synthesize("おはようございます。", "amitaro", style_vector, 0.0, 0.5))
f.write(
model.synthesize("おはようございます。", "amitaro", style_vector, 0.0, 0.5)
)
if __name__ == "__main__":
main()
main()

View File

@@ -11,5 +11,7 @@ classifiers = [
"Programming Language :: Python :: Implementation :: PyPy",
]
dynamic = ["version"]
[tool.maturin]
features = ["pyo3/extension-module"]
strip = true

View File

@@ -1,6 +1,6 @@
use pyo3::prelude::*;
use pyo3::types::PyBytes;
use sbv2_core::tts::TTSModelHolder;
use sbv2_core::tts::{TTSModelHolder, SynthesizeOptions};
use crate::style::StyleVector;
@@ -109,8 +109,8 @@ impl TTSModel {
/// テキスト
/// ident : str
/// 識別子
/// style_vector : StyleVector
/// スタイルベクトル
/// style_id : int
/// スタイルID
/// sdp_ratio : float
/// SDP比率
/// length_scale : float
@@ -125,21 +125,24 @@ impl TTSModel {
py: Python<'p>,
text: String,
ident: String,
style_vector: StyleVector,
style_id: i32,
sdp_ratio: f32,
length_scale: f32,
) -> anyhow::Result<Bound<PyBytes>> {
let (bert_ori, phones, tones, lang_ids) = self.model.parse_text(&text)?;
let data = self.model.synthesize(
ident,
bert_ori,
phones,
tones,
lang_ids,
style_vector.get(),
sdp_ratio,
length_scale,
let data = self.model.easy_synthesize(
ident.as_str(),
&text,
style_id,
SynthesizeOptions {
sdp_ratio,
length_scale,
..Default::default()
},
)?;
Ok(PyBytes::new_bound(py, &data))
}
fn unload(&mut self, ident: String) -> bool {
self.model.unload(ident)
}
}

View File

@@ -1,7 +1,7 @@
[package]
name = "sbv2_core"
description = "Style-Bert-VITSの推論ライブラリ"
version = "0.1.3"
version = "0.1.4"
edition = "2021"
license = "MIT"
readme = "../README.md"

View File

@@ -54,6 +54,17 @@ impl JTalk {
Ok(Self { jpreprocess })
}
pub fn num2word(&self, text: &str) -> Result<String> {
let mut parsed = self.jpreprocess.text_to_njd(text)?;
parsed.preprocess();
let texts: Vec<String> = parsed
.nodes
.iter()
.map(|x| x.get_string().to_string())
.collect();
Ok(texts.join(""))
}
pub fn process_text(&self, text: &str) -> Result<JTalkProcess> {
let parsed = self.jpreprocess.run_frontend(text)?;
let jtalk_process = JTalkProcess::new(Arc::clone(&self.jpreprocess), parsed);

View File

@@ -120,7 +120,8 @@ pub fn replace_punctuation(mut text: String) -> String {
for (k, v) in REPLACE_MAP.iter() {
text = text.replace(k, v);
}
PUNCTUATION_CLEANUP_PATTERN
let content = PUNCTUATION_CLEANUP_PATTERN
.replace_all(&text, "")
.to_string()
.to_string();
content
}

View File

@@ -33,6 +33,7 @@ pub struct TTSModel {
ident: TTSIdent,
}
/// High-level Style-Bert-VITS2's API
pub struct TTSModelHolder {
tokenizer: Tokenizer,
bert: Session,
@@ -41,6 +42,13 @@ pub struct TTSModelHolder {
}
impl TTSModelHolder {
/// Initialize a new TTSModelHolder
///
/// # Examples
///
/// ```rs
/// let mut tts_holder = TTSModelHolder::new(std::fs::read("deberta.onnx")?, std::fs::read("tokenizer.json")?)?;
/// ```
pub fn new<P: AsRef<[u8]>>(bert_model_bytes: P, tokenizer_bytes: P) -> Result<Self> {
let bert = model::load_model(bert_model_bytes, true)?;
let jtalk = jtalk::JTalk::new()?;
@@ -53,10 +61,18 @@ impl TTSModelHolder {
})
}
/// Return a list of model names
pub fn models(&self) -> Vec<String> {
self.models.iter().map(|m| m.ident.to_string()).collect()
}
/// Load a .sbv2 file binary
///
/// # Examples
///
/// ```rs
/// tts_holder.load_sbv2file("tsukuyomi", std::fs::read("tsukuyomi.sbv2")?)?;
/// ```
pub fn load_sbv2file<I: Into<TTSIdent>, P: AsRef<[u8]>>(
&mut self,
ident: I,
@@ -86,6 +102,13 @@ impl TTSModelHolder {
Ok(())
}
/// Load a style vector and onnx model binary
///
/// # Examples
///
/// ```rs
/// tts_holder.load("tsukuyomi", std::fs::read("style_vectors.json")?, std::fs::read("model.onnx")?)?;
/// ```
pub fn load<I: Into<TTSIdent>, P: AsRef<[u8]>>(
&mut self,
ident: I,
@@ -103,6 +126,7 @@ impl TTSModelHolder {
Ok(())
}
/// Unload a model
pub fn unload<I: Into<TTSIdent>>(&mut self, ident: I) -> bool {
let ident = ident.into();
if let Some((i, _)) = self
@@ -118,12 +142,17 @@ impl TTSModelHolder {
}
}
/// Parse text and return the input for synthesize
///
/// # Note
/// This function is for low-level usage, use `easy_synthesize` for high-level usage.
#[allow(clippy::type_complexity)]
pub fn parse_text(
&self,
text: &str,
) -> Result<(Array2<f32>, Array1<i64>, Array1<i64>, Array1<i64>)> {
let normalized_text = norm::normalize_text(text);
let text = self.jtalk.num2word(text)?;
let normalized_text = norm::normalize_text(&text);
let process = self.jtalk.process_text(&normalized_text)?;
let (phones, tones, mut word2ph) = process.g2p()?;
@@ -195,6 +224,10 @@ impl TTSModelHolder {
.ok_or(Error::ModelNotFoundError(ident.to_string()))
}
/// Get style vector by style id and weight
///
/// # Note
/// This function is for low-level usage, use `easy_synthesize` for high-level usage.
pub fn get_style_vector<I: Into<TTSIdent>>(
&self,
ident: I,
@@ -204,6 +237,13 @@ impl TTSModelHolder {
style::get_style_vector(&self.find_model(ident)?.style_vectors, style_id, weight)
}
/// Synthesize text to audio
///
/// # Examples
///
/// ```rs
/// let audio = tts_holder.easy_synthesize("tsukuyomi", "こんにちは", 0, SynthesizeOptions::default())?;
/// ```
pub fn easy_synthesize<I: Into<TTSIdent> + Copy>(
&self,
ident: I,
@@ -274,6 +314,10 @@ impl TTSModelHolder {
Ok(cursor.into_inner())
}
/// Synthesize text to audio
///
/// # Note
/// This function is for low-level usage, use `easy_synthesize` for high-level usage.
#[allow(clippy::too_many_arguments)]
pub fn synthesize<I: Into<TTSIdent>>(
&self,
@@ -300,11 +344,18 @@ impl TTSModelHolder {
}
}
/// Synthesize options
///
/// # Fields
/// - `sdp_ratio`: SDP ratio
/// - `length_scale`: Length scale
/// - `style_weight`: Style weight
/// - `split_sentences`: Split sentences
pub struct SynthesizeOptions {
sdp_ratio: f32,
length_scale: f32,
style_weight: f32,
split_sentences: bool,
pub sdp_ratio: f32,
pub length_scale: f32,
pub style_weight: f32,
pub split_sentences: bool,
}
impl Default for SynthesizeOptions {