Compare commits

..

58 Commits

Author SHA1 Message Date
コマリン親衛隊
a99fd39834 Merge pull request #60 from tuna2134/label
正規表現使うのやめた
2024-09-25 22:32:08 +09:00
tuna2134
886ab78eeb Merge branch 'label' of https://github.com/tuna2134/sbv2-api into label 2024-09-25 13:22:59 +00:00
コマリン親衛隊
c85f474dbf Update jtalk.rs 2024-09-25 22:22:52 +09:00
tuna2134
6d160d7ae8 remove 2024-09-25 13:16:09 +00:00
tuna2134
ee927d65cb remove e3 2024-09-25 12:59:12 +00:00
tuna2134
6e7d641ecb fix bug 2024-09-25 12:56:13 +00:00
tuna2134
eb249aad81 Merge branch 'main' of https://github.com/tuna2134/sbv2-api 2024-09-25 12:53:26 +00:00
tuna2134
f79a67138f fix stop to use re 2024-09-25 12:53:23 +00:00
コマリン親衛隊
09945e2c1c Merge pull request #59 from tuna2134/renovate/tar-0.x-lockfile
fix(deps): update rust crate tar to v0.4.42
2024-09-25 17:25:04 +09:00
renovate[bot]
821b4c7fb3 fix(deps): update rust crate tar to v0.4.42 2024-09-25 03:03:08 +00:00
コマリン親衛隊
ec06c35929 Merge pull request #56 from tuna2134/fix-coreml
fix coremlのビルド失敗を修正
2024-09-24 06:42:45 +09:00
コマリン親衛隊
1373aef4b2 Merge pull request #57 from tuna2134/renovate/thiserror-1.x-lockfile
fix(deps): update rust crate thiserror to v1.0.64
2024-09-23 07:43:50 +09:00
renovate[bot]
e2e49fd0e8 fix(deps): update rust crate thiserror to v1.0.64 2024-09-22 19:16:03 +00:00
tuna2134
0cf9f87cc9 fix build 2024-09-22 14:26:15 +00:00
コマリン親衛隊
5e500b2c42 Support arm64 2024-09-22 19:12:29 +09:00
コマリン親衛隊
136375e5b6 Merge pull request #48 from tuna2134/renovate/pyo3-0.x-lockfile
fix(deps): update rust crate pyo3 to v0.22.3
2024-09-22 18:56:40 +09:00
tuna2134
aade119ddb add stripe 2024-09-22 08:05:48 +00:00
tuna2134
55cedb2f6d fix dists path 2024-09-22 07:48:53 +00:00
tuna2134
f2940f4ebe bump version 2024-09-22 07:41:58 +00:00
tuna2134
96a5ab0672 fix returns type 2024-09-22 07:40:46 +00:00
tuna2134
64cbd151a6 change to openid upload 2024-09-22 07:36:03 +00:00
tuna2134
3103fcef17 fix bug 2024-09-22 07:25:48 +00:00
tuna2134
dd8ae77edc fix bug 2024-09-22 07:24:37 +00:00
tuna2134
ee4c4ab8ad use easy function 2024-09-22 07:22:26 +00:00
コマリン親衛隊
79120e4aee Fix 2024-09-22 16:17:08 +09:00
コマリン親衛隊
c947df2105 Unsupport musllinux 2024-09-22 16:16:25 +09:00
tuna2134
dcbb19fcdd fixed 2024-09-22 07:13:26 +00:00
tuna2134
b5601410f8 Merge branch 'main' of https://github.com/tuna2134/sbv2-api 2024-09-22 07:10:30 +00:00
tuna2134
a3160ea2e8 add 2024-09-22 07:10:28 +00:00
コマリン親衛隊
1a978c3fe3 Create pull_request_template.md 2024-09-22 15:23:43 +09:00
コマリン親衛隊
5837b66759 Merge pull request #53 from Googlefan256/main
Build optimization and convert model onnx
2024-09-22 15:15:44 +09:00
Googlefan
962fa9a49d fix: build optimization, convert model onnx 2024-09-22 06:00:57 +00:00
コマリン親衛隊
290fb37c16 Merge pull request #52 from tuna2134/docs
クレートのdocsの充実化
2024-09-22 11:01:56 +09:00
tuna2134
0c926751a4 bump version 2024-09-22 02:01:13 +00:00
tuna2134
da86aa811d Merge branch 'main' of https://github.com/tuna2134/sbv2-api into docs 2024-09-22 01:54:19 +00:00
tuna2134
4e0edaebcd docsの充実化 2024-09-22 01:54:06 +00:00
tuna2134
1d7d65ae21 add support arm64(maybe) 2024-09-22 01:16:04 +00:00
tuna2134
3112e3e8ec added new todo 2024-09-22 01:01:53 +00:00
コマリン親衛隊
5724251fb5 Merge pull request #50 from kamakiri1192/feature/apple-silicon-mac
feat: Added explanation for Apple Silicon Mac
2024-09-22 09:58:35 +09:00
コマリン親衛隊
3b1182f07d Merge pull request #49 from tuna2134/renovate/axum-0.x-lockfile
Update Rust crate axum to v0.7.6
2024-09-22 09:34:15 +09:00
kamakiri1192
4ed463b05b feat: Added explanation for Apple Silicon Mac 2024-09-21 16:05:47 +09:00
renovate[bot]
c641bc7529 Update Rust crate axum to v0.7.6 2024-09-20 20:05:48 +00:00
tuna2134
be0370a2f9 format 2024-09-17 10:55:45 +00:00
tuna2134
ec3e412ca0 fix bug 2024-09-17 10:55:27 +00:00
tuna2134
ccad71c564 use easy synthesize function 2024-09-17 10:44:18 +00:00
コマリン親衛隊
b9ea462497 Merge pull request #47 from tuna2134/renovate/anyhow-1.x-lockfile
Update Rust crate anyhow to v1.0.89
2024-09-17 10:54:18 +09:00
renovate[bot]
0bb3c5b8ea Update Rust crate pyo3 to v0.22.3 2024-09-16 09:25:40 +00:00
renovate[bot]
b21d425733 Update Rust crate anyhow to v1.0.89 2024-09-16 09:25:35 +00:00
コマリン親衛隊
dc347fd5b3 Merge pull request #46 from tuna2134/renovate/anyhow-1.x-lockfile 2024-09-15 15:36:09 +09:00
renovate[bot]
d8d8c82deb Update Rust crate anyhow to v1.0.89 2024-09-15 04:42:37 +00:00
コマリン親衛隊
e70b8e51d2 Merge pull request #45 from tuna2134/number
数字を読めれるように
2024-09-14 12:28:47 +09:00
tuna2134
38f9d98d1a fix bug 2024-09-14 03:25:53 +00:00
コマリン親衛隊
16725552bf Merge pull request #44 from tuna2134/tuna2134-patch-1
Fix Japanese to english
2024-09-14 07:31:25 +09:00
コマリン親衛隊
24bb626282 Rename 機能追加.md to feature_request.md 2024-09-14 07:28:47 +09:00
コマリン親衛隊
cc1f704e0b Rename バグの報告.md to bug_report.md 2024-09-14 07:28:21 +09:00
コマリン親衛隊
d5b2c4842e Update issue templates 2024-09-13 21:02:14 +09:00
コマリン親衛隊
46649fad18 Update issue templates 2024-09-13 20:59:14 +09:00
tuna2134
9e4e098170 fix private to public 2024-09-13 11:36:24 +00:00
21 changed files with 412 additions and 112 deletions

28
.github/ISSUE_TEMPLATE/bug_report.md vendored Normal file
View File

@@ -0,0 +1,28 @@
---
name: バグの報告
about: バグを報告する場所です。
title: ''
labels: bug
assignees: ''
---
**バグの説明**
バグのエラーを張ってください
**再現する方法**
どのようにバグが発生したか時系列でまとめてください。
**本来の挙動**
本来動作すべきことについて簡潔にまとめてください。
**スクリーンショット**
もしもあるならでいいです。
**端末の情報**
- OS: [e.g. Linux]
**コード**
```rs
ここにコード貼ってください
```

View File

@@ -0,0 +1,17 @@
---
name: 機能追加
about: 機能追加してほしい場合これで作ってください。
title: ''
labels: enhancement
assignees: ''
---
**機能追加の説明**
ここで追加される機能の説明してください。
**メリット**
ここにメリットを書いてください。
**デメリット**
ここにデメリットを書いてください。

8
.github/pull_request_template.md vendored Normal file
View File

@@ -0,0 +1,8 @@
## 概要
(ここに本PRの説明をしてください。)
## 関連issue
(ここに該当するissueの番号を書いてください。)
## 確認
- [ ] 動作確認しましたか?

132
.github/workflows/CI.yml vendored Normal file
View File

@@ -0,0 +1,132 @@
# This file is autogenerated by maturin v1.7.1
# To update, run
#
# maturin generate-ci github
#
name: CI
on:
push:
branches:
- main
- master
tags:
- '*'
pull_request:
workflow_dispatch:
permissions:
contents: read
id-token: write
jobs:
linux:
runs-on: ${{ matrix.platform.runner }}
strategy:
matrix:
platform:
- runner: ubuntu-latest
target: x86_64
- runner: ubuntu-latest
target: aarch64
steps:
- uses: actions/checkout@v4
- uses: actions/setup-python@v5
with:
python-version: 3.x
- name: Build wheels
uses: PyO3/maturin-action@v1
with:
target: ${{ matrix.platform.target }}
args: --release --out dist --find-interpreter
sccache: 'true'
manylinux: auto
working-directory: sbv2_bindings
- name: Upload wheels
uses: actions/upload-artifact@v4
with:
name: wheels-linux-${{ matrix.platform.target }}
path: sbv2_bindings/dist
windows:
runs-on: ${{ matrix.platform.runner }}
strategy:
matrix:
platform:
- runner: windows-latest
target: x64
steps:
- uses: actions/checkout@v4
- uses: actions/setup-python@v5
with:
python-version: 3.x
architecture: ${{ matrix.platform.target }}
- name: Build wheels
uses: PyO3/maturin-action@v1
with:
target: ${{ matrix.platform.target }}
args: --release --out dist --find-interpreter
sccache: 'true'
working-directory: sbv2_bindings
- name: Upload wheels
uses: actions/upload-artifact@v4
with:
name: wheels-windows-${{ matrix.platform.target }}
path: sbv2_bindings/dist
macos:
runs-on: ${{ matrix.platform.runner }}
strategy:
matrix:
platform:
- runner: macos-12
target: x86_64
- runner: macos-14
target: aarch64
steps:
- uses: actions/checkout@v4
- uses: actions/setup-python@v5
with:
python-version: 3.x
- name: Build wheels
uses: PyO3/maturin-action@v1
with:
target: ${{ matrix.platform.target }}
args: --release --out dist --find-interpreter
sccache: 'true'
working-directory: sbv2_bindings
- name: Upload wheels
uses: actions/upload-artifact@v4
with:
name: wheels-macos-${{ matrix.platform.target }}
path: sbv2_bindings/dist
sdist:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- name: Build sdist
uses: PyO3/maturin-action@v1
with:
command: sdist
args: --out dist
working-directory: sbv2_bindings
- name: Upload sdist
uses: actions/upload-artifact@v4
with:
name: wheels-sdist
path: sbv2_bindings/dist
release:
name: Release
runs-on: ubuntu-latest
if: "startsWith(github.ref, 'refs/tags/')"
needs: [linux, windows, macos, sdist]
environment: release
steps:
- uses: actions/download-artifact@v4
- name: Publish to PyPI
uses: PyO3/maturin-action@v1
with:
command: upload
args: --non-interactive --skip-existing wheels-*/*

View File

@@ -14,6 +14,9 @@ jobs:
strategy:
matrix:
tag: [cpu, cuda]
platform:
- linux/amd64
- linux/arm64
steps:
- uses: actions/checkout@v4
- name: Set up QEMU
@@ -33,4 +36,5 @@ jobs:
push: true
tags: |
ghcr.io/${{ github.repository }}:${{ matrix.tag }}
file: docker/${{ matrix.tag }}.Dockerfile
file: docker/${{ matrix.tag }}.Dockerfile
platforms: ${{ matrix.platform }}

75
Cargo.lock generated
View File

@@ -77,9 +77,9 @@ dependencies = [
[[package]]
name = "anyhow"
version = "1.0.87"
version = "1.0.89"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "10f00e1f6e58a40e807377c75c6a7f97bf9044fab57816f2414e6f5f4499d7b8"
checksum = "86fdf8605db99b54d3cd748a44c6d04df638eb5dafb219b135d0149bd0db01f6"
[[package]]
name = "async-trait"
@@ -100,9 +100,9 @@ checksum = "0c4b4d0bd25bd0b74681c0ad21497610ce1b7c91b1022cd21c80c6fbdd9476b0"
[[package]]
name = "axum"
version = "0.7.5"
version = "0.7.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3a6c9af12842a67734c9a2e355436e5d03b22383ed60cf13cd0c18fbfe3dcbcf"
checksum = "8f43644eed690f5374f1af436ecd6aea01cd201f6fbdf0178adaf6907afb2cec"
dependencies = [
"async-trait",
"axum-core",
@@ -126,7 +126,7 @@ dependencies = [
"serde_urlencoded",
"sync_wrapper 1.0.1",
"tokio",
"tower",
"tower 0.5.1",
"tower-layer",
"tower-service",
"tracing",
@@ -134,9 +134,9 @@ dependencies = [
[[package]]
name = "axum-core"
version = "0.4.3"
version = "0.4.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a15c63fd72d41492dc4f497196f5da1fb04fb7529e631d73630d1b491e47a2e3"
checksum = "5e6b8ba012a258d63c9adfa28b9ddcf66149da6f986c5b5452e629d5ee64bf00"
dependencies = [
"async-trait",
"bytes",
@@ -147,7 +147,7 @@ dependencies = [
"mime",
"pin-project-lite",
"rustversion",
"sync_wrapper 0.1.2",
"sync_wrapper 1.0.1",
"tower-layer",
"tower-service",
"tracing",
@@ -771,6 +771,8 @@ dependencies = [
"hyper",
"pin-project-lite",
"tokio",
"tower 0.4.13",
"tower-service",
]
[[package]]
@@ -1361,9 +1363,9 @@ dependencies = [
[[package]]
name = "once_cell"
version = "1.19.0"
version = "1.20.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3fdb12b2476b595f9358c5161aa467c2438859caa136dec86c26fdd2efe17b92"
checksum = "33ea5043e58958ee56f3e15a90aee535795cd7dfd319846288d93c5b57d85cbe"
[[package]]
name = "onig"
@@ -1561,9 +1563,9 @@ dependencies = [
[[package]]
name = "pyo3"
version = "0.22.2"
version = "0.22.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "831e8e819a138c36e212f3af3fd9eeffed6bf1510a805af35b0edee5ffa59433"
checksum = "15ee168e30649f7f234c3d49ef5a7a6cbf5134289bc46c29ff3155fa3221c225"
dependencies = [
"anyhow",
"cfg-if",
@@ -1580,9 +1582,9 @@ dependencies = [
[[package]]
name = "pyo3-build-config"
version = "0.22.2"
version = "0.22.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1e8730e591b14492a8945cdff32f089250b05f5accecf74aeddf9e8272ce1fa8"
checksum = "e61cef80755fe9e46bb8a0b8f20752ca7676dcc07a5277d8b7768c6172e529b3"
dependencies = [
"once_cell",
"target-lexicon",
@@ -1590,9 +1592,9 @@ dependencies = [
[[package]]
name = "pyo3-ffi"
version = "0.22.2"
version = "0.22.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5e97e919d2df92eb88ca80a037969f44e5e70356559654962cbb3316d00300c6"
checksum = "67ce096073ec5405f5ee2b8b31f03a68e02aa10d5d4f565eca04acc41931fa1c"
dependencies = [
"libc",
"pyo3-build-config",
@@ -1600,9 +1602,9 @@ dependencies = [
[[package]]
name = "pyo3-macros"
version = "0.22.2"
version = "0.22.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "eb57983022ad41f9e683a599f2fd13c3664d7063a3ac5714cae4b7bee7d3f206"
checksum = "2440c6d12bc8f3ae39f1e775266fa5122fd0c8891ce7520fa6048e683ad3de28"
dependencies = [
"proc-macro2",
"pyo3-macros-backend",
@@ -1612,9 +1614,9 @@ dependencies = [
[[package]]
name = "pyo3-macros-backend"
version = "0.22.2"
version = "0.22.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ec480c0c51ddec81019531705acac51bcdbeae563557c982aa8263bb96880372"
checksum = "1be962f0e06da8f8465729ea2cb71a416d2257dff56cbe40a70d3e62a93ae5d1"
dependencies = [
"heck",
"proc-macro2",
@@ -1817,7 +1819,7 @@ checksum = "f3cb5ba0dc43242ce17de99c180e96db90b235b8a9fdc9543c96d2209116bd9f"
[[package]]
name = "sbv2_api"
version = "0.1.0"
version = "0.2.0-alpha"
dependencies = [
"anyhow",
"axum",
@@ -1831,7 +1833,7 @@ dependencies = [
[[package]]
name = "sbv2_bindings"
version = "0.1.0"
version = "0.1.1"
dependencies = [
"anyhow",
"ndarray",
@@ -1841,7 +1843,7 @@ dependencies = [
[[package]]
name = "sbv2_core"
version = "0.1.3"
version = "0.2.0-alpha"
dependencies = [
"anyhow",
"dotenvy",
@@ -2057,9 +2059,9 @@ checksum = "a7065abeca94b6a8a577f9bd45aa0867a2238b74e8eb67cf10d492bc39351394"
[[package]]
name = "tar"
version = "0.4.41"
version = "0.4.42"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cb797dad5fb5b76fcf519e702f4a589483b5ef06567f160c392832c1f5e44909"
checksum = "4ff6c40d3aedb5e06b57c6f669ad17ab063dd1e63d977c6a88e7f4dfa4f04020"
dependencies = [
"filetime",
"libc",
@@ -2074,18 +2076,18 @@ checksum = "61c41af27dd6d1e27b1b16b489db798443478cef1f06a660c96db617ba5de3b1"
[[package]]
name = "thiserror"
version = "1.0.63"
version = "1.0.64"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c0342370b38b6a11b6cc11d6a805569958d54cfa061a29969c3b5ce2ea405724"
checksum = "d50af8abc119fb8bb6dbabcfa89656f46f84aa0ac7688088608076ad2b459a84"
dependencies = [
"thiserror-impl",
]
[[package]]
name = "thiserror-impl"
version = "1.0.63"
version = "1.0.64"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a4558b58466b9ad7ca0f102865eccc95938dca1a74a856f2b57b6629050da261"
checksum = "08904e7672f5eb876eaaf87e0ce17857500934f4981c4a0ab2b4aa98baac7fc3"
dependencies = [
"proc-macro2",
"quote",
@@ -2181,6 +2183,21 @@ dependencies = [
"tokio",
"tower-layer",
"tower-service",
]
[[package]]
name = "tower"
version = "0.5.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2873938d487c3cfb9aed7546dc9f2711d867c9f90c46b889989a2cb84eba6b4f"
dependencies = [
"futures-core",
"futures-util",
"pin-project-lite",
"sync_wrapper 0.1.2",
"tokio",
"tower-layer",
"tower-service",
"tracing",
]

View File

@@ -6,4 +6,9 @@ members = ["sbv2_api", "sbv2_core", "sbv2_bindings"]
anyhow = "1.0.86"
dotenvy = "0.15.7"
env_logger = "0.11.5"
ndarray = "0.16.1"
ndarray = "0.16.1"
[profile.release]
lto = true
debug = false
strip = true

View File

@@ -1,5 +1,8 @@
# SBV2-API
## 注意:本バージョンはアルファ版です。
安定版を利用したい場合は[こちら](https://github.com/tuna2134/sbv2-api/tree/v0.1.x)をご覧ください。
## プログラミングに詳しくない方向け
[こちら](https://github.com/tuna2134/sbv2-gui?tab=readme-ov-file)を参照してください。
@@ -27,6 +30,7 @@ JP-Extra しか対応していません。(基本的に対応する予定もあ
- [x] GPU 対応(DirectML)
- [x] GPU 対応(CoreML)
- [ ] WASM 変換(依存ライブラリの関係により現在は不可)
- [x] arm64のdockerサポート
## 構造説明
@@ -57,6 +61,22 @@ docker run -it --rm -p 3000:3000 --name sbv2 \
ghcr.io/tuna2134/sbv2-api:cpu
```
<details>
<summary>Apple Silicon搭載のMac(M1以降)の場合</summary>
docker上で動作させる場合、.envのADDRをlocalhostから0.0.0.0に変更してください。
```yaml
ADDR=0.0.0.0:3000
```
CPUの場合は
```bash
docker run --platform linux/amd64 -it --rm -p 3000:3000 --name sbv2 \
-v ./models:/work/models --env-file .env \
ghcr.io/tuna2134/sbv2-api:cpu
```
</details>
CUDAの場合は
```sh
docker run -it --rm -p 3000:3000 --name sbv2 \

View File

@@ -1,7 +1 @@
日本語を母国語としない人々にとって、「日本語は非常に難しい言語である」と言われています。
その理由として、
・漢字、ひらがな、カタカナ、と表記が何種類もある。
・同一の漢字でも音読みと訓読みがある
・地名の読みが難しい
・主語、述語が省略される
などが挙げられます。
10,000年前までコロナが流行っていました

View File

@@ -126,11 +126,13 @@ torch.onnx.export(
f"../models/model_{out_name}.onnx",
verbose=True,
dynamic_axes={
"x_tst": {1: "batch_size"},
"x_tst": {0: "batch_size", 1: "x_tst_max_length"},
"x_tst_lengths": {0: "batch_size"},
"tones": {1: "batch_size"},
"language": {1: "batch_size"},
"bert": {2: "batch_size"},
"sid": {0: "batch_size"},
"tones": {0: "batch_size", 1: "x_tst_max_length"},
"language": {0: "batch_size", 1: "x_tst_max_length"},
"bert": {0: "batch_size", 2: "x_tst_max_length"},
"style_vec": {0: "batch_size"},
},
input_names=[
"x_tst",

View File

@@ -1,6 +1,6 @@
[package]
name = "sbv2_api"
version = "0.1.0"
version = "0.2.0-alpha"
edition = "2021"
[dependencies]
@@ -9,7 +9,7 @@ axum = "0.7.5"
dotenvy.workspace = true
env_logger.workspace = true
log = "0.4.22"
sbv2_core = { version = "0.1.3", path = "../sbv2_core" }
sbv2_core = { version = "0.2.0-alpha", path = "../sbv2_core" }
serde = { version = "1.0.210", features = ["derive"] }
tokio = { version = "1.40.0", features = ["full"] }

5
sbv2_api/build.rs Normal file
View File

@@ -0,0 +1,5 @@
fn main() {
if cfg!(feature = "coreml") {
println!("cargo:rustc-link-arg=-fapple-link-rtlib");
}
}

View File

@@ -5,7 +5,7 @@ use axum::{
routing::{get, post},
Json, Router,
};
use sbv2_core::tts::TTSModelHolder;
use sbv2_core::tts::{SynthesizeOptions, TTSModelHolder};
use serde::Deserialize;
use std::env;
use std::sync::Arc;
@@ -49,17 +49,15 @@ async fn synthesize(
log::debug!("processing request: text={text}, ident={ident}, sdp_ratio={sdp_ratio}, length_scale={length_scale}");
let buffer = {
let tts_model = state.tts_model.lock().await;
let (bert_ori, phones, tones, lang_ids) = tts_model.parse_text(&text)?;
let style_vector = tts_model.get_style_vector(&ident, 0, 1.0)?;
tts_model.synthesize(
ident,
bert_ori.to_owned(),
phones,
tones,
lang_ids,
style_vector,
sdp_ratio,
length_scale,
tts_model.easy_synthesize(
&ident,
&text,
0,
SynthesizeOptions {
sdp_ratio,
length_scale,
..Default::default()
},
)?
};
Ok(([(CONTENT_TYPE, "audio/wav")], buffer))

View File

@@ -1,6 +1,6 @@
[package]
name = "sbv2_bindings"
version = "0.1.0"
version = "0.1.1"
edition = "2021"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
@@ -12,4 +12,4 @@ crate-type = ["cdylib"]
anyhow.workspace = true
ndarray.workspace = true
pyo3 = { version = "0.22.0", features = ["anyhow"] }
sbv2_core = { version = "0.1.3", path = "../sbv2_core" }
sbv2_core = { version = "0.2.0-alpha", path = "../sbv2_core" }

View File

@@ -8,11 +8,13 @@ def main():
model.load_sbv2file_from_path("amitaro", "../models/amitaro.sbv2")
print("All setup is done!")
style_vector = model.get_style_vector("amitaro", 0, 1.0)
with open("output.wav", "wb") as f:
f.write(model.synthesize("おはようございます。", "amitaro", style_vector, 0.0, 0.5))
f.write(
model.synthesize("おはようございます。", "amitaro", style_vector, 0.0, 0.5)
)
if __name__ == "__main__":
main()
main()

View File

@@ -11,5 +11,7 @@ classifiers = [
"Programming Language :: Python :: Implementation :: PyPy",
]
dynamic = ["version"]
[tool.maturin]
features = ["pyo3/extension-module"]
strip = true

View File

@@ -1,6 +1,6 @@
use pyo3::prelude::*;
use pyo3::types::PyBytes;
use sbv2_core::tts::TTSModelHolder;
use sbv2_core::tts::{SynthesizeOptions, TTSModelHolder};
use crate::style::StyleVector;
@@ -109,8 +109,8 @@ impl TTSModel {
/// テキスト
/// ident : str
/// 識別子
/// style_vector : StyleVector
/// スタイルベクトル
/// style_id : int
/// スタイルID
/// sdp_ratio : float
/// SDP比率
/// length_scale : float
@@ -125,21 +125,24 @@ impl TTSModel {
py: Python<'p>,
text: String,
ident: String,
style_vector: StyleVector,
style_id: i32,
sdp_ratio: f32,
length_scale: f32,
) -> anyhow::Result<Bound<PyBytes>> {
let (bert_ori, phones, tones, lang_ids) = self.model.parse_text(&text)?;
let data = self.model.synthesize(
ident,
bert_ori,
phones,
tones,
lang_ids,
style_vector.get(),
sdp_ratio,
length_scale,
let data = self.model.easy_synthesize(
ident.as_str(),
&text,
style_id,
SynthesizeOptions {
sdp_ratio,
length_scale,
..Default::default()
},
)?;
Ok(PyBytes::new_bound(py, &data))
}
fn unload(&mut self, ident: String) -> bool {
self.model.unload(ident)
}
}

View File

@@ -1,7 +1,7 @@
[package]
name = "sbv2_core"
description = "Style-Bert-VITSの推論ライブラリ"
version = "0.1.3"
version = "0.2.0-alpha"
edition = "2021"
license = "MIT"
readme = "../README.md"

View File

@@ -19,21 +19,6 @@ fn initialize_jtalk() -> Result<JPreprocessType> {
Ok(jpreprocess)
}
static JTALK_G2P_G_A1_PATTERN: Lazy<Regex> = Lazy::new(|| Regex::new(r"/A:([0-9\-]+)\+").unwrap());
static JTALK_G2P_G_A2_PATTERN: Lazy<Regex> = Lazy::new(|| Regex::new(r"\+(\d+)\+").unwrap());
static JTALK_G2P_G_A3_PATTERN: Lazy<Regex> = Lazy::new(|| Regex::new(r"\+(\d+)/").unwrap());
static JTALK_G2P_G_E3_PATTERN: Lazy<Regex> = Lazy::new(|| Regex::new(r"!(\d+)_").unwrap());
static JTALK_G2P_G_F1_PATTERN: Lazy<Regex> = Lazy::new(|| Regex::new(r"/F:(\d+)_").unwrap());
static JTALK_G2P_G_P3_PATTERN: Lazy<Regex> = Lazy::new(|| Regex::new(r"\-(.*?)\+").unwrap());
fn numeric_feature_by_regex(regex: &Regex, text: &str) -> i32 {
if let Some(mat) = regex.captures(text) {
mat[1].parse::<i32>().unwrap()
} else {
-50
}
}
macro_rules! hash_set {
($($elem:expr),* $(,)?) => {{
let mut set = HashSet::new();
@@ -54,6 +39,17 @@ impl JTalk {
Ok(Self { jpreprocess })
}
pub fn num2word(&self, text: &str) -> Result<String> {
let mut parsed = self.jpreprocess.text_to_njd(text)?;
parsed.preprocess();
let texts: Vec<String> = parsed
.nodes
.iter()
.map(|x| x.get_string().to_string())
.collect();
Ok(texts.join(""))
}
pub fn process_text(&self, text: &str) -> Result<JTalkProcess> {
let parsed = self.jpreprocess.run_frontend(text)?;
let jtalk_process = JTalkProcess::new(Arc::clone(&self.jpreprocess), parsed);
@@ -340,11 +336,7 @@ impl JTalkProcess {
let mut phones: Vec<String> = Vec::new();
for (i, label) in labels.iter().enumerate() {
let mut p3 = {
let label_text = label.to_string();
let mattched = JTALK_G2P_G_P3_PATTERN.captures(&label_text).unwrap();
mattched[1].to_string()
};
let mut p3 = label.phoneme.c.clone().unwrap();
if "AIUEO".contains(&p3) {
// 文字をlowerする
p3 = p3.to_lowercase();
@@ -354,10 +346,10 @@ impl JTalkProcess {
if i == 0 {
phones.push("^".to_string());
} else if i == labels.len() - 1 {
let e3 = numeric_feature_by_regex(&JTALK_G2P_G_E3_PATTERN, &label.to_string());
if e3 == 0 {
let e3 = label.accent_phrase_prev.clone().unwrap().is_interrogative;
if e3 {
phones.push("$".to_string());
} else if e3 == 1 {
} else {
phones.push("?".to_string());
}
}
@@ -369,14 +361,33 @@ impl JTalkProcess {
phones.push(p3.clone());
}
let a1 = numeric_feature_by_regex(&JTALK_G2P_G_A1_PATTERN, &label.to_string());
let a2 = numeric_feature_by_regex(&JTALK_G2P_G_A2_PATTERN, &label.to_string());
let a3 = numeric_feature_by_regex(&JTALK_G2P_G_A3_PATTERN, &label.to_string());
let a1 = if let Some(mora) = &label.mora {
mora.relative_accent_position as i32
} else {
-50
};
let a2 = if let Some(mora) = &label.mora {
mora.position_forward as i32
} else {
-50
};
let a3 = if let Some(mora) = &label.mora {
mora.position_backward as i32
} else {
-50
};
let f1 = numeric_feature_by_regex(&JTALK_G2P_G_F1_PATTERN, &label.to_string());
let f1 = if let Some(accent_phrase) = &label.accent_phrase_curr {
accent_phrase.mora_count as i32
} else {
-50
};
let a2_next =
numeric_feature_by_regex(&JTALK_G2P_G_A2_PATTERN, &labels[i + 1].to_string());
let a2_next = if let Some(mora) = &labels[i + 1].mora {
mora.position_forward as i32
} else {
-50
};
if a3 == 1 && a2_next == 1 && "aeiouAEIOUNcl".contains(&p3) {
phones.push("#".to_string());

View File

@@ -120,7 +120,8 @@ pub fn replace_punctuation(mut text: String) -> String {
for (k, v) in REPLACE_MAP.iter() {
text = text.replace(k, v);
}
PUNCTUATION_CLEANUP_PATTERN
let content = PUNCTUATION_CLEANUP_PATTERN
.replace_all(&text, "")
.to_string()
.to_string();
content
}

View File

@@ -33,6 +33,7 @@ pub struct TTSModel {
ident: TTSIdent,
}
/// High-level Style-Bert-VITS2's API
pub struct TTSModelHolder {
tokenizer: Tokenizer,
bert: Session,
@@ -41,6 +42,13 @@ pub struct TTSModelHolder {
}
impl TTSModelHolder {
/// Initialize a new TTSModelHolder
///
/// # Examples
///
/// ```rs
/// let mut tts_holder = TTSModelHolder::new(std::fs::read("deberta.onnx")?, std::fs::read("tokenizer.json")?)?;
/// ```
pub fn new<P: AsRef<[u8]>>(bert_model_bytes: P, tokenizer_bytes: P) -> Result<Self> {
let bert = model::load_model(bert_model_bytes, true)?;
let jtalk = jtalk::JTalk::new()?;
@@ -53,10 +61,18 @@ impl TTSModelHolder {
})
}
/// Return a list of model names
pub fn models(&self) -> Vec<String> {
self.models.iter().map(|m| m.ident.to_string()).collect()
}
/// Load a .sbv2 file binary
///
/// # Examples
///
/// ```rs
/// tts_holder.load_sbv2file("tsukuyomi", std::fs::read("tsukuyomi.sbv2")?)?;
/// ```
pub fn load_sbv2file<I: Into<TTSIdent>, P: AsRef<[u8]>>(
&mut self,
ident: I,
@@ -86,6 +102,13 @@ impl TTSModelHolder {
Ok(())
}
/// Load a style vector and onnx model binary
///
/// # Examples
///
/// ```rs
/// tts_holder.load("tsukuyomi", std::fs::read("style_vectors.json")?, std::fs::read("model.onnx")?)?;
/// ```
pub fn load<I: Into<TTSIdent>, P: AsRef<[u8]>>(
&mut self,
ident: I,
@@ -103,6 +126,7 @@ impl TTSModelHolder {
Ok(())
}
/// Unload a model
pub fn unload<I: Into<TTSIdent>>(&mut self, ident: I) -> bool {
let ident = ident.into();
if let Some((i, _)) = self
@@ -118,12 +142,17 @@ impl TTSModelHolder {
}
}
/// Parse text and return the input for synthesize
///
/// # Note
/// This function is for low-level usage, use `easy_synthesize` for high-level usage.
#[allow(clippy::type_complexity)]
pub fn parse_text(
&self,
text: &str,
) -> Result<(Array2<f32>, Array1<i64>, Array1<i64>, Array1<i64>)> {
let normalized_text = norm::normalize_text(text);
let text = self.jtalk.num2word(text)?;
let normalized_text = norm::normalize_text(&text);
let process = self.jtalk.process_text(&normalized_text)?;
let (phones, tones, mut word2ph) = process.g2p()?;
@@ -195,6 +224,10 @@ impl TTSModelHolder {
.ok_or(Error::ModelNotFoundError(ident.to_string()))
}
/// Get style vector by style id and weight
///
/// # Note
/// This function is for low-level usage, use `easy_synthesize` for high-level usage.
pub fn get_style_vector<I: Into<TTSIdent>>(
&self,
ident: I,
@@ -204,6 +237,13 @@ impl TTSModelHolder {
style::get_style_vector(&self.find_model(ident)?.style_vectors, style_id, weight)
}
/// Synthesize text to audio
///
/// # Examples
///
/// ```rs
/// let audio = tts_holder.easy_synthesize("tsukuyomi", "こんにちは", 0, SynthesizeOptions::default())?;
/// ```
pub fn easy_synthesize<I: Into<TTSIdent> + Copy>(
&self,
ident: I,
@@ -274,6 +314,10 @@ impl TTSModelHolder {
Ok(cursor.into_inner())
}
/// Synthesize text to audio
///
/// # Note
/// This function is for low-level usage, use `easy_synthesize` for high-level usage.
#[allow(clippy::too_many_arguments)]
pub fn synthesize<I: Into<TTSIdent>>(
&self,
@@ -300,11 +344,18 @@ impl TTSModelHolder {
}
}
/// Synthesize options
///
/// # Fields
/// - `sdp_ratio`: SDP ratio
/// - `length_scale`: Length scale
/// - `style_weight`: Style weight
/// - `split_sentences`: Split sentences
pub struct SynthesizeOptions {
sdp_ratio: f32,
length_scale: f32,
style_weight: f32,
split_sentences: bool,
pub sdp_ratio: f32,
pub length_scale: f32,
pub style_weight: f32,
pub split_sentences: bool,
}
impl Default for SynthesizeOptions {