mirror of
https://github.com/neodyland/sbv2-api.git
synced 2025-12-25 08:39:57 +00:00
Compare commits
44 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
5e500b2c42 | ||
|
|
136375e5b6 | ||
|
|
aade119ddb | ||
|
|
55cedb2f6d | ||
|
|
f2940f4ebe | ||
|
|
96a5ab0672 | ||
|
|
64cbd151a6 | ||
|
|
3103fcef17 | ||
|
|
dd8ae77edc | ||
|
|
ee4c4ab8ad | ||
|
|
79120e4aee | ||
|
|
c947df2105 | ||
|
|
dcbb19fcdd | ||
|
|
b5601410f8 | ||
|
|
a3160ea2e8 | ||
|
|
1a978c3fe3 | ||
|
|
5837b66759 | ||
|
|
962fa9a49d | ||
|
|
290fb37c16 | ||
|
|
0c926751a4 | ||
|
|
da86aa811d | ||
|
|
4e0edaebcd | ||
|
|
1d7d65ae21 | ||
|
|
3112e3e8ec | ||
|
|
5724251fb5 | ||
|
|
3b1182f07d | ||
|
|
4ed463b05b | ||
|
|
c641bc7529 | ||
|
|
be0370a2f9 | ||
|
|
ec3e412ca0 | ||
|
|
ccad71c564 | ||
|
|
b9ea462497 | ||
|
|
0bb3c5b8ea | ||
|
|
b21d425733 | ||
|
|
dc347fd5b3 | ||
|
|
d8d8c82deb | ||
|
|
e70b8e51d2 | ||
|
|
38f9d98d1a | ||
|
|
16725552bf | ||
|
|
24bb626282 | ||
|
|
cc1f704e0b | ||
|
|
d5b2c4842e | ||
|
|
46649fad18 | ||
|
|
9e4e098170 |
28
.github/ISSUE_TEMPLATE/bug_report.md
vendored
Normal file
28
.github/ISSUE_TEMPLATE/bug_report.md
vendored
Normal file
@@ -0,0 +1,28 @@
|
||||
---
|
||||
name: バグの報告
|
||||
about: バグを報告する場所です。
|
||||
title: ''
|
||||
labels: bug
|
||||
assignees: ''
|
||||
|
||||
---
|
||||
|
||||
**バグの説明**
|
||||
バグのエラーを張ってください
|
||||
|
||||
**再現する方法**
|
||||
どのようにバグが発生したか時系列でまとめてください。
|
||||
|
||||
**本来の挙動**
|
||||
本来動作すべきことについて簡潔にまとめてください。
|
||||
|
||||
**スクリーンショット**
|
||||
もしもあるならでいいです。
|
||||
|
||||
**端末の情報**
|
||||
- OS: [e.g. Linux]
|
||||
|
||||
**コード**
|
||||
```rs
|
||||
ここにコード貼ってください
|
||||
```
|
||||
17
.github/ISSUE_TEMPLATE/feature_request.md
vendored
Normal file
17
.github/ISSUE_TEMPLATE/feature_request.md
vendored
Normal file
@@ -0,0 +1,17 @@
|
||||
---
|
||||
name: 機能追加
|
||||
about: 機能追加してほしい場合これで作ってください。
|
||||
title: ''
|
||||
labels: enhancement
|
||||
assignees: ''
|
||||
|
||||
---
|
||||
|
||||
**機能追加の説明**
|
||||
ここで追加される機能の説明してください。
|
||||
|
||||
**メリット**
|
||||
ここにメリットを書いてください。
|
||||
|
||||
**デメリット**
|
||||
ここにデメリットを書いてください。
|
||||
8
.github/pull_request_template.md
vendored
Normal file
8
.github/pull_request_template.md
vendored
Normal file
@@ -0,0 +1,8 @@
|
||||
## 概要
|
||||
(ここに本PRの説明をしてください。)
|
||||
|
||||
## 関連issue
|
||||
(ここに該当するissueの番号を書いてください。)
|
||||
|
||||
## 確認
|
||||
- [ ] 動作確認しましたか?
|
||||
132
.github/workflows/CI.yml
vendored
Normal file
132
.github/workflows/CI.yml
vendored
Normal file
@@ -0,0 +1,132 @@
|
||||
# This file is autogenerated by maturin v1.7.1
|
||||
# To update, run
|
||||
#
|
||||
# maturin generate-ci github
|
||||
#
|
||||
name: CI
|
||||
|
||||
on:
|
||||
push:
|
||||
branches:
|
||||
- main
|
||||
- master
|
||||
tags:
|
||||
- '*'
|
||||
pull_request:
|
||||
workflow_dispatch:
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
id-token: write
|
||||
|
||||
jobs:
|
||||
linux:
|
||||
runs-on: ${{ matrix.platform.runner }}
|
||||
strategy:
|
||||
matrix:
|
||||
platform:
|
||||
- runner: ubuntu-latest
|
||||
target: x86_64
|
||||
- runner: ubuntu-latest
|
||||
target: aarch64
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: 3.x
|
||||
- name: Build wheels
|
||||
uses: PyO3/maturin-action@v1
|
||||
with:
|
||||
target: ${{ matrix.platform.target }}
|
||||
args: --release --out dist --find-interpreter
|
||||
sccache: 'true'
|
||||
manylinux: auto
|
||||
working-directory: sbv2_bindings
|
||||
- name: Upload wheels
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: wheels-linux-${{ matrix.platform.target }}
|
||||
path: sbv2_bindings/dist
|
||||
|
||||
windows:
|
||||
runs-on: ${{ matrix.platform.runner }}
|
||||
strategy:
|
||||
matrix:
|
||||
platform:
|
||||
- runner: windows-latest
|
||||
target: x64
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: 3.x
|
||||
architecture: ${{ matrix.platform.target }}
|
||||
- name: Build wheels
|
||||
uses: PyO3/maturin-action@v1
|
||||
with:
|
||||
target: ${{ matrix.platform.target }}
|
||||
args: --release --out dist --find-interpreter
|
||||
sccache: 'true'
|
||||
working-directory: sbv2_bindings
|
||||
- name: Upload wheels
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: wheels-windows-${{ matrix.platform.target }}
|
||||
path: sbv2_bindings/dist
|
||||
|
||||
macos:
|
||||
runs-on: ${{ matrix.platform.runner }}
|
||||
strategy:
|
||||
matrix:
|
||||
platform:
|
||||
- runner: macos-12
|
||||
target: x86_64
|
||||
- runner: macos-14
|
||||
target: aarch64
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: 3.x
|
||||
- name: Build wheels
|
||||
uses: PyO3/maturin-action@v1
|
||||
with:
|
||||
target: ${{ matrix.platform.target }}
|
||||
args: --release --out dist --find-interpreter
|
||||
sccache: 'true'
|
||||
working-directory: sbv2_bindings
|
||||
- name: Upload wheels
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: wheels-macos-${{ matrix.platform.target }}
|
||||
path: sbv2_bindings/dist
|
||||
|
||||
sdist:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- name: Build sdist
|
||||
uses: PyO3/maturin-action@v1
|
||||
with:
|
||||
command: sdist
|
||||
args: --out dist
|
||||
working-directory: sbv2_bindings
|
||||
- name: Upload sdist
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: wheels-sdist
|
||||
path: sbv2_bindings/dist
|
||||
|
||||
release:
|
||||
name: Release
|
||||
runs-on: ubuntu-latest
|
||||
if: "startsWith(github.ref, 'refs/tags/')"
|
||||
needs: [linux, windows, macos, sdist]
|
||||
environment: release
|
||||
steps:
|
||||
- uses: actions/download-artifact@v4
|
||||
- name: Publish to PyPI
|
||||
uses: PyO3/maturin-action@v1
|
||||
with:
|
||||
command: upload
|
||||
args: --non-interactive --skip-existing wheels-*/*
|
||||
6
.github/workflows/build.yml
vendored
6
.github/workflows/build.yml
vendored
@@ -14,6 +14,9 @@ jobs:
|
||||
strategy:
|
||||
matrix:
|
||||
tag: [cpu, cuda]
|
||||
platform:
|
||||
- linux/amd64
|
||||
- linux/arm64
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- name: Set up QEMU
|
||||
@@ -33,4 +36,5 @@ jobs:
|
||||
push: true
|
||||
tags: |
|
||||
ghcr.io/${{ github.repository }}:${{ matrix.tag }}
|
||||
file: docker/${{ matrix.tag }}.Dockerfile
|
||||
file: docker/${{ matrix.tag }}.Dockerfile
|
||||
platforms: ${{ matrix.platform }}
|
||||
61
Cargo.lock
generated
61
Cargo.lock
generated
@@ -77,9 +77,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "anyhow"
|
||||
version = "1.0.87"
|
||||
version = "1.0.89"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "10f00e1f6e58a40e807377c75c6a7f97bf9044fab57816f2414e6f5f4499d7b8"
|
||||
checksum = "86fdf8605db99b54d3cd748a44c6d04df638eb5dafb219b135d0149bd0db01f6"
|
||||
|
||||
[[package]]
|
||||
name = "async-trait"
|
||||
@@ -100,9 +100,9 @@ checksum = "0c4b4d0bd25bd0b74681c0ad21497610ce1b7c91b1022cd21c80c6fbdd9476b0"
|
||||
|
||||
[[package]]
|
||||
name = "axum"
|
||||
version = "0.7.5"
|
||||
version = "0.7.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "3a6c9af12842a67734c9a2e355436e5d03b22383ed60cf13cd0c18fbfe3dcbcf"
|
||||
checksum = "8f43644eed690f5374f1af436ecd6aea01cd201f6fbdf0178adaf6907afb2cec"
|
||||
dependencies = [
|
||||
"async-trait",
|
||||
"axum-core",
|
||||
@@ -126,7 +126,7 @@ dependencies = [
|
||||
"serde_urlencoded",
|
||||
"sync_wrapper 1.0.1",
|
||||
"tokio",
|
||||
"tower",
|
||||
"tower 0.5.1",
|
||||
"tower-layer",
|
||||
"tower-service",
|
||||
"tracing",
|
||||
@@ -134,9 +134,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "axum-core"
|
||||
version = "0.4.3"
|
||||
version = "0.4.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a15c63fd72d41492dc4f497196f5da1fb04fb7529e631d73630d1b491e47a2e3"
|
||||
checksum = "5e6b8ba012a258d63c9adfa28b9ddcf66149da6f986c5b5452e629d5ee64bf00"
|
||||
dependencies = [
|
||||
"async-trait",
|
||||
"bytes",
|
||||
@@ -147,7 +147,7 @@ dependencies = [
|
||||
"mime",
|
||||
"pin-project-lite",
|
||||
"rustversion",
|
||||
"sync_wrapper 0.1.2",
|
||||
"sync_wrapper 1.0.1",
|
||||
"tower-layer",
|
||||
"tower-service",
|
||||
"tracing",
|
||||
@@ -771,6 +771,8 @@ dependencies = [
|
||||
"hyper",
|
||||
"pin-project-lite",
|
||||
"tokio",
|
||||
"tower 0.4.13",
|
||||
"tower-service",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -1361,9 +1363,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "once_cell"
|
||||
version = "1.19.0"
|
||||
version = "1.20.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "3fdb12b2476b595f9358c5161aa467c2438859caa136dec86c26fdd2efe17b92"
|
||||
checksum = "33ea5043e58958ee56f3e15a90aee535795cd7dfd319846288d93c5b57d85cbe"
|
||||
|
||||
[[package]]
|
||||
name = "onig"
|
||||
@@ -1561,9 +1563,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "pyo3"
|
||||
version = "0.22.2"
|
||||
version = "0.22.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "831e8e819a138c36e212f3af3fd9eeffed6bf1510a805af35b0edee5ffa59433"
|
||||
checksum = "15ee168e30649f7f234c3d49ef5a7a6cbf5134289bc46c29ff3155fa3221c225"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"cfg-if",
|
||||
@@ -1580,9 +1582,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "pyo3-build-config"
|
||||
version = "0.22.2"
|
||||
version = "0.22.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "1e8730e591b14492a8945cdff32f089250b05f5accecf74aeddf9e8272ce1fa8"
|
||||
checksum = "e61cef80755fe9e46bb8a0b8f20752ca7676dcc07a5277d8b7768c6172e529b3"
|
||||
dependencies = [
|
||||
"once_cell",
|
||||
"target-lexicon",
|
||||
@@ -1590,9 +1592,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "pyo3-ffi"
|
||||
version = "0.22.2"
|
||||
version = "0.22.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "5e97e919d2df92eb88ca80a037969f44e5e70356559654962cbb3316d00300c6"
|
||||
checksum = "67ce096073ec5405f5ee2b8b31f03a68e02aa10d5d4f565eca04acc41931fa1c"
|
||||
dependencies = [
|
||||
"libc",
|
||||
"pyo3-build-config",
|
||||
@@ -1600,9 +1602,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "pyo3-macros"
|
||||
version = "0.22.2"
|
||||
version = "0.22.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "eb57983022ad41f9e683a599f2fd13c3664d7063a3ac5714cae4b7bee7d3f206"
|
||||
checksum = "2440c6d12bc8f3ae39f1e775266fa5122fd0c8891ce7520fa6048e683ad3de28"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"pyo3-macros-backend",
|
||||
@@ -1612,9 +1614,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "pyo3-macros-backend"
|
||||
version = "0.22.2"
|
||||
version = "0.22.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ec480c0c51ddec81019531705acac51bcdbeae563557c982aa8263bb96880372"
|
||||
checksum = "1be962f0e06da8f8465729ea2cb71a416d2257dff56cbe40a70d3e62a93ae5d1"
|
||||
dependencies = [
|
||||
"heck",
|
||||
"proc-macro2",
|
||||
@@ -1831,7 +1833,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "sbv2_bindings"
|
||||
version = "0.1.0"
|
||||
version = "0.1.1"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"ndarray",
|
||||
@@ -1841,7 +1843,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "sbv2_core"
|
||||
version = "0.1.3"
|
||||
version = "0.1.4"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"dotenvy",
|
||||
@@ -2181,6 +2183,21 @@ dependencies = [
|
||||
"tokio",
|
||||
"tower-layer",
|
||||
"tower-service",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "tower"
|
||||
version = "0.5.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "2873938d487c3cfb9aed7546dc9f2711d867c9f90c46b889989a2cb84eba6b4f"
|
||||
dependencies = [
|
||||
"futures-core",
|
||||
"futures-util",
|
||||
"pin-project-lite",
|
||||
"sync_wrapper 0.1.2",
|
||||
"tokio",
|
||||
"tower-layer",
|
||||
"tower-service",
|
||||
"tracing",
|
||||
]
|
||||
|
||||
|
||||
@@ -6,4 +6,9 @@ members = ["sbv2_api", "sbv2_core", "sbv2_bindings"]
|
||||
anyhow = "1.0.86"
|
||||
dotenvy = "0.15.7"
|
||||
env_logger = "0.11.5"
|
||||
ndarray = "0.16.1"
|
||||
ndarray = "0.16.1"
|
||||
|
||||
[profile.release]
|
||||
lto = true
|
||||
debug = false
|
||||
strip = true
|
||||
17
README.md
17
README.md
@@ -27,6 +27,7 @@ JP-Extra しか対応していません。(基本的に対応する予定もあ
|
||||
- [x] GPU 対応(DirectML)
|
||||
- [x] GPU 対応(CoreML)
|
||||
- [ ] WASM 変換(依存ライブラリの関係により現在は不可)
|
||||
- [x] arm64のdockerサポート
|
||||
|
||||
## 構造説明
|
||||
|
||||
@@ -57,6 +58,22 @@ docker run -it --rm -p 3000:3000 --name sbv2 \
|
||||
ghcr.io/tuna2134/sbv2-api:cpu
|
||||
```
|
||||
|
||||
<details>
|
||||
<summary>Apple Silicon搭載のMac(M1以降)の場合</summary>
|
||||
docker上で動作させる場合、.envのADDRをlocalhostから0.0.0.0に変更してください。
|
||||
|
||||
```yaml
|
||||
ADDR=0.0.0.0:3000
|
||||
```
|
||||
|
||||
CPUの場合は
|
||||
```bash
|
||||
docker run --platform linux/amd64 -it --rm -p 3000:3000 --name sbv2 \
|
||||
-v ./models:/work/models --env-file .env \
|
||||
ghcr.io/tuna2134/sbv2-api:cpu
|
||||
```
|
||||
</details>
|
||||
|
||||
CUDAの場合は
|
||||
```sh
|
||||
docker run -it --rm -p 3000:3000 --name sbv2 \
|
||||
|
||||
@@ -1,7 +1 @@
|
||||
日本語を母国語としない人々にとって、「日本語は非常に難しい言語である」と言われています。
|
||||
その理由として、
|
||||
・漢字、ひらがな、カタカナ、と表記が何種類もある。
|
||||
・同一の漢字でも音読みと訓読みがある
|
||||
・地名の読みが難しい
|
||||
・主語、述語が省略される
|
||||
などが挙げられます。
|
||||
10,000年前までコロナが流行っていました
|
||||
@@ -126,11 +126,13 @@ torch.onnx.export(
|
||||
f"../models/model_{out_name}.onnx",
|
||||
verbose=True,
|
||||
dynamic_axes={
|
||||
"x_tst": {1: "batch_size"},
|
||||
"x_tst": {0: "batch_size", 1: "x_tst_max_length"},
|
||||
"x_tst_lengths": {0: "batch_size"},
|
||||
"tones": {1: "batch_size"},
|
||||
"language": {1: "batch_size"},
|
||||
"bert": {2: "batch_size"},
|
||||
"sid": {0: "batch_size"},
|
||||
"tones": {0: "batch_size", 1: "x_tst_max_length"},
|
||||
"language": {0: "batch_size", 1: "x_tst_max_length"},
|
||||
"bert": {0: "batch_size", 2: "x_tst_max_length"},
|
||||
"style_vec": {0: "batch_size"},
|
||||
},
|
||||
input_names=[
|
||||
"x_tst",
|
||||
|
||||
@@ -5,7 +5,7 @@ use axum::{
|
||||
routing::{get, post},
|
||||
Json, Router,
|
||||
};
|
||||
use sbv2_core::tts::TTSModelHolder;
|
||||
use sbv2_core::tts::{SynthesizeOptions, TTSModelHolder};
|
||||
use serde::Deserialize;
|
||||
use std::env;
|
||||
use std::sync::Arc;
|
||||
@@ -49,17 +49,15 @@ async fn synthesize(
|
||||
log::debug!("processing request: text={text}, ident={ident}, sdp_ratio={sdp_ratio}, length_scale={length_scale}");
|
||||
let buffer = {
|
||||
let tts_model = state.tts_model.lock().await;
|
||||
let (bert_ori, phones, tones, lang_ids) = tts_model.parse_text(&text)?;
|
||||
let style_vector = tts_model.get_style_vector(&ident, 0, 1.0)?;
|
||||
tts_model.synthesize(
|
||||
ident,
|
||||
bert_ori.to_owned(),
|
||||
phones,
|
||||
tones,
|
||||
lang_ids,
|
||||
style_vector,
|
||||
sdp_ratio,
|
||||
length_scale,
|
||||
tts_model.easy_synthesize(
|
||||
&ident,
|
||||
&text,
|
||||
0,
|
||||
SynthesizeOptions {
|
||||
sdp_ratio,
|
||||
length_scale,
|
||||
..Default::default()
|
||||
},
|
||||
)?
|
||||
};
|
||||
Ok(([(CONTENT_TYPE, "audio/wav")], buffer))
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
[package]
|
||||
name = "sbv2_bindings"
|
||||
version = "0.1.0"
|
||||
version = "0.1.1"
|
||||
edition = "2021"
|
||||
|
||||
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
||||
@@ -12,4 +12,4 @@ crate-type = ["cdylib"]
|
||||
anyhow.workspace = true
|
||||
ndarray.workspace = true
|
||||
pyo3 = { version = "0.22.0", features = ["anyhow"] }
|
||||
sbv2_core = { version = "0.1.3", path = "../sbv2_core" }
|
||||
sbv2_core = { version = "0.1.4", path = "../sbv2_core" }
|
||||
|
||||
@@ -8,11 +8,13 @@ def main():
|
||||
|
||||
model.load_sbv2file_from_path("amitaro", "../models/amitaro.sbv2")
|
||||
print("All setup is done!")
|
||||
|
||||
|
||||
style_vector = model.get_style_vector("amitaro", 0, 1.0)
|
||||
with open("output.wav", "wb") as f:
|
||||
f.write(model.synthesize("おはようございます。", "amitaro", style_vector, 0.0, 0.5))
|
||||
f.write(
|
||||
model.synthesize("おはようございます。", "amitaro", style_vector, 0.0, 0.5)
|
||||
)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
main()
|
||||
|
||||
@@ -11,5 +11,7 @@ classifiers = [
|
||||
"Programming Language :: Python :: Implementation :: PyPy",
|
||||
]
|
||||
dynamic = ["version"]
|
||||
|
||||
[tool.maturin]
|
||||
features = ["pyo3/extension-module"]
|
||||
strip = true
|
||||
@@ -1,6 +1,6 @@
|
||||
use pyo3::prelude::*;
|
||||
use pyo3::types::PyBytes;
|
||||
use sbv2_core::tts::TTSModelHolder;
|
||||
use sbv2_core::tts::{TTSModelHolder, SynthesizeOptions};
|
||||
|
||||
use crate::style::StyleVector;
|
||||
|
||||
@@ -109,8 +109,8 @@ impl TTSModel {
|
||||
/// テキスト
|
||||
/// ident : str
|
||||
/// 識別子
|
||||
/// style_vector : StyleVector
|
||||
/// スタイルベクトル
|
||||
/// style_id : int
|
||||
/// スタイルID
|
||||
/// sdp_ratio : float
|
||||
/// SDP比率
|
||||
/// length_scale : float
|
||||
@@ -125,21 +125,24 @@ impl TTSModel {
|
||||
py: Python<'p>,
|
||||
text: String,
|
||||
ident: String,
|
||||
style_vector: StyleVector,
|
||||
style_id: i32,
|
||||
sdp_ratio: f32,
|
||||
length_scale: f32,
|
||||
) -> anyhow::Result<Bound<PyBytes>> {
|
||||
let (bert_ori, phones, tones, lang_ids) = self.model.parse_text(&text)?;
|
||||
let data = self.model.synthesize(
|
||||
ident,
|
||||
bert_ori,
|
||||
phones,
|
||||
tones,
|
||||
lang_ids,
|
||||
style_vector.get(),
|
||||
sdp_ratio,
|
||||
length_scale,
|
||||
let data = self.model.easy_synthesize(
|
||||
ident.as_str(),
|
||||
&text,
|
||||
style_id,
|
||||
SynthesizeOptions {
|
||||
sdp_ratio,
|
||||
length_scale,
|
||||
..Default::default()
|
||||
},
|
||||
)?;
|
||||
Ok(PyBytes::new_bound(py, &data))
|
||||
}
|
||||
|
||||
fn unload(&mut self, ident: String) -> bool {
|
||||
self.model.unload(ident)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
[package]
|
||||
name = "sbv2_core"
|
||||
description = "Style-Bert-VITSの推論ライブラリ"
|
||||
version = "0.1.3"
|
||||
version = "0.1.4"
|
||||
edition = "2021"
|
||||
license = "MIT"
|
||||
readme = "../README.md"
|
||||
|
||||
@@ -54,6 +54,17 @@ impl JTalk {
|
||||
Ok(Self { jpreprocess })
|
||||
}
|
||||
|
||||
pub fn num2word(&self, text: &str) -> Result<String> {
|
||||
let mut parsed = self.jpreprocess.text_to_njd(text)?;
|
||||
parsed.preprocess();
|
||||
let texts: Vec<String> = parsed
|
||||
.nodes
|
||||
.iter()
|
||||
.map(|x| x.get_string().to_string())
|
||||
.collect();
|
||||
Ok(texts.join(""))
|
||||
}
|
||||
|
||||
pub fn process_text(&self, text: &str) -> Result<JTalkProcess> {
|
||||
let parsed = self.jpreprocess.run_frontend(text)?;
|
||||
let jtalk_process = JTalkProcess::new(Arc::clone(&self.jpreprocess), parsed);
|
||||
|
||||
@@ -120,7 +120,8 @@ pub fn replace_punctuation(mut text: String) -> String {
|
||||
for (k, v) in REPLACE_MAP.iter() {
|
||||
text = text.replace(k, v);
|
||||
}
|
||||
PUNCTUATION_CLEANUP_PATTERN
|
||||
let content = PUNCTUATION_CLEANUP_PATTERN
|
||||
.replace_all(&text, "")
|
||||
.to_string()
|
||||
.to_string();
|
||||
content
|
||||
}
|
||||
|
||||
@@ -33,6 +33,7 @@ pub struct TTSModel {
|
||||
ident: TTSIdent,
|
||||
}
|
||||
|
||||
/// High-level Style-Bert-VITS2's API
|
||||
pub struct TTSModelHolder {
|
||||
tokenizer: Tokenizer,
|
||||
bert: Session,
|
||||
@@ -41,6 +42,13 @@ pub struct TTSModelHolder {
|
||||
}
|
||||
|
||||
impl TTSModelHolder {
|
||||
/// Initialize a new TTSModelHolder
|
||||
///
|
||||
/// # Examples
|
||||
///
|
||||
/// ```rs
|
||||
/// let mut tts_holder = TTSModelHolder::new(std::fs::read("deberta.onnx")?, std::fs::read("tokenizer.json")?)?;
|
||||
/// ```
|
||||
pub fn new<P: AsRef<[u8]>>(bert_model_bytes: P, tokenizer_bytes: P) -> Result<Self> {
|
||||
let bert = model::load_model(bert_model_bytes, true)?;
|
||||
let jtalk = jtalk::JTalk::new()?;
|
||||
@@ -53,10 +61,18 @@ impl TTSModelHolder {
|
||||
})
|
||||
}
|
||||
|
||||
/// Return a list of model names
|
||||
pub fn models(&self) -> Vec<String> {
|
||||
self.models.iter().map(|m| m.ident.to_string()).collect()
|
||||
}
|
||||
|
||||
/// Load a .sbv2 file binary
|
||||
///
|
||||
/// # Examples
|
||||
///
|
||||
/// ```rs
|
||||
/// tts_holder.load_sbv2file("tsukuyomi", std::fs::read("tsukuyomi.sbv2")?)?;
|
||||
/// ```
|
||||
pub fn load_sbv2file<I: Into<TTSIdent>, P: AsRef<[u8]>>(
|
||||
&mut self,
|
||||
ident: I,
|
||||
@@ -86,6 +102,13 @@ impl TTSModelHolder {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Load a style vector and onnx model binary
|
||||
///
|
||||
/// # Examples
|
||||
///
|
||||
/// ```rs
|
||||
/// tts_holder.load("tsukuyomi", std::fs::read("style_vectors.json")?, std::fs::read("model.onnx")?)?;
|
||||
/// ```
|
||||
pub fn load<I: Into<TTSIdent>, P: AsRef<[u8]>>(
|
||||
&mut self,
|
||||
ident: I,
|
||||
@@ -103,6 +126,7 @@ impl TTSModelHolder {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Unload a model
|
||||
pub fn unload<I: Into<TTSIdent>>(&mut self, ident: I) -> bool {
|
||||
let ident = ident.into();
|
||||
if let Some((i, _)) = self
|
||||
@@ -118,12 +142,17 @@ impl TTSModelHolder {
|
||||
}
|
||||
}
|
||||
|
||||
/// Parse text and return the input for synthesize
|
||||
///
|
||||
/// # Note
|
||||
/// This function is for low-level usage, use `easy_synthesize` for high-level usage.
|
||||
#[allow(clippy::type_complexity)]
|
||||
pub fn parse_text(
|
||||
&self,
|
||||
text: &str,
|
||||
) -> Result<(Array2<f32>, Array1<i64>, Array1<i64>, Array1<i64>)> {
|
||||
let normalized_text = norm::normalize_text(text);
|
||||
let text = self.jtalk.num2word(text)?;
|
||||
let normalized_text = norm::normalize_text(&text);
|
||||
|
||||
let process = self.jtalk.process_text(&normalized_text)?;
|
||||
let (phones, tones, mut word2ph) = process.g2p()?;
|
||||
@@ -195,6 +224,10 @@ impl TTSModelHolder {
|
||||
.ok_or(Error::ModelNotFoundError(ident.to_string()))
|
||||
}
|
||||
|
||||
/// Get style vector by style id and weight
|
||||
///
|
||||
/// # Note
|
||||
/// This function is for low-level usage, use `easy_synthesize` for high-level usage.
|
||||
pub fn get_style_vector<I: Into<TTSIdent>>(
|
||||
&self,
|
||||
ident: I,
|
||||
@@ -204,6 +237,13 @@ impl TTSModelHolder {
|
||||
style::get_style_vector(&self.find_model(ident)?.style_vectors, style_id, weight)
|
||||
}
|
||||
|
||||
/// Synthesize text to audio
|
||||
///
|
||||
/// # Examples
|
||||
///
|
||||
/// ```rs
|
||||
/// let audio = tts_holder.easy_synthesize("tsukuyomi", "こんにちは", 0, SynthesizeOptions::default())?;
|
||||
/// ```
|
||||
pub fn easy_synthesize<I: Into<TTSIdent> + Copy>(
|
||||
&self,
|
||||
ident: I,
|
||||
@@ -274,6 +314,10 @@ impl TTSModelHolder {
|
||||
Ok(cursor.into_inner())
|
||||
}
|
||||
|
||||
/// Synthesize text to audio
|
||||
///
|
||||
/// # Note
|
||||
/// This function is for low-level usage, use `easy_synthesize` for high-level usage.
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
pub fn synthesize<I: Into<TTSIdent>>(
|
||||
&self,
|
||||
@@ -300,11 +344,18 @@ impl TTSModelHolder {
|
||||
}
|
||||
}
|
||||
|
||||
/// Synthesize options
|
||||
///
|
||||
/// # Fields
|
||||
/// - `sdp_ratio`: SDP ratio
|
||||
/// - `length_scale`: Length scale
|
||||
/// - `style_weight`: Style weight
|
||||
/// - `split_sentences`: Split sentences
|
||||
pub struct SynthesizeOptions {
|
||||
sdp_ratio: f32,
|
||||
length_scale: f32,
|
||||
style_weight: f32,
|
||||
split_sentences: bool,
|
||||
pub sdp_ratio: f32,
|
||||
pub length_scale: f32,
|
||||
pub style_weight: f32,
|
||||
pub split_sentences: bool,
|
||||
}
|
||||
|
||||
impl Default for SynthesizeOptions {
|
||||
|
||||
Reference in New Issue
Block a user