Compare commits

..

22 Commits

Author SHA1 Message Date
tuna2134
64cbd151a6 change to openid upload 2024-09-22 07:36:03 +00:00
tuna2134
3103fcef17 fix bug 2024-09-22 07:25:48 +00:00
tuna2134
dd8ae77edc fix bug 2024-09-22 07:24:37 +00:00
tuna2134
ee4c4ab8ad use easy function 2024-09-22 07:22:26 +00:00
コマリン親衛隊
79120e4aee Fix 2024-09-22 16:17:08 +09:00
コマリン親衛隊
c947df2105 Unsupport musllinux 2024-09-22 16:16:25 +09:00
tuna2134
dcbb19fcdd fixed 2024-09-22 07:13:26 +00:00
tuna2134
b5601410f8 Merge branch 'main' of https://github.com/tuna2134/sbv2-api 2024-09-22 07:10:30 +00:00
tuna2134
a3160ea2e8 add 2024-09-22 07:10:28 +00:00
コマリン親衛隊
1a978c3fe3 Create pull_request_template.md 2024-09-22 15:23:43 +09:00
コマリン親衛隊
5837b66759 Merge pull request #53 from Googlefan256/main
Build optimization and convert model onnx
2024-09-22 15:15:44 +09:00
Googlefan
962fa9a49d fix: build optimization, convert model onnx 2024-09-22 06:00:57 +00:00
コマリン親衛隊
290fb37c16 Merge pull request #52 from tuna2134/docs
クレートのdocsの充実化
2024-09-22 11:01:56 +09:00
tuna2134
0c926751a4 bump version 2024-09-22 02:01:13 +00:00
tuna2134
da86aa811d Merge branch 'main' of https://github.com/tuna2134/sbv2-api into docs 2024-09-22 01:54:19 +00:00
tuna2134
4e0edaebcd docsの充実化 2024-09-22 01:54:06 +00:00
tuna2134
1d7d65ae21 add support arm64(maybe) 2024-09-22 01:16:04 +00:00
tuna2134
3112e3e8ec added new todo 2024-09-22 01:01:53 +00:00
コマリン親衛隊
5724251fb5 Merge pull request #50 from kamakiri1192/feature/apple-silicon-mac
feat: Added explanation for Apple Silicon Mac
2024-09-22 09:58:35 +09:00
コマリン親衛隊
3b1182f07d Merge pull request #49 from tuna2134/renovate/axum-0.x-lockfile
Update Rust crate axum to v0.7.6
2024-09-22 09:34:15 +09:00
kamakiri1192
4ed463b05b feat: Added explanation for Apple Silicon Mac 2024-09-21 16:05:47 +09:00
renovate[bot]
c641bc7529 Update Rust crate axum to v0.7.6 2024-09-20 20:05:48 +00:00
12 changed files with 268 additions and 32 deletions

8
.github/pull_request_template.md vendored Normal file
View File

@@ -0,0 +1,8 @@
## 概要
(ここに本PRの説明をしてください。)
## 関連issue
(ここに該当するissueの番号を書いてください。)
## 確認
- [ ] 動作確認しましたか?

132
.github/workflows/CI.yml vendored Normal file
View File

@@ -0,0 +1,132 @@
# This file is autogenerated by maturin v1.7.1
# To update, run
#
# maturin generate-ci github
#
name: CI
on:
push:
branches:
- main
- master
tags:
- '*'
pull_request:
workflow_dispatch:
permissions:
contents: read
id-token: write
jobs:
linux:
runs-on: ${{ matrix.platform.runner }}
strategy:
matrix:
platform:
- runner: ubuntu-latest
target: x86_64
- runner: ubuntu-latest
target: aarch64
steps:
- uses: actions/checkout@v4
- uses: actions/setup-python@v5
with:
python-version: 3.x
- name: Build wheels
uses: PyO3/maturin-action@v1
with:
target: ${{ matrix.platform.target }}
args: --release --out dist --find-interpreter
sccache: 'true'
manylinux: auto
working-directory: sbv2_bindings
- name: Upload wheels
uses: actions/upload-artifact@v4
with:
name: wheels-linux-${{ matrix.platform.target }}
path: dist
windows:
runs-on: ${{ matrix.platform.runner }}
strategy:
matrix:
platform:
- runner: windows-latest
target: x64
steps:
- uses: actions/checkout@v4
- uses: actions/setup-python@v5
with:
python-version: 3.x
architecture: ${{ matrix.platform.target }}
- name: Build wheels
uses: PyO3/maturin-action@v1
with:
target: ${{ matrix.platform.target }}
args: --release --out dist --find-interpreter
sccache: 'true'
working-directory: sbv2_bindings
- name: Upload wheels
uses: actions/upload-artifact@v4
with:
name: wheels-windows-${{ matrix.platform.target }}
path: dist
macos:
runs-on: ${{ matrix.platform.runner }}
strategy:
matrix:
platform:
- runner: macos-12
target: x86_64
- runner: macos-14
target: aarch64
steps:
- uses: actions/checkout@v4
- uses: actions/setup-python@v5
with:
python-version: 3.x
- name: Build wheels
uses: PyO3/maturin-action@v1
with:
target: ${{ matrix.platform.target }}
args: --release --out dist --find-interpreter
sccache: 'true'
working-directory: sbv2_bindings
- name: Upload wheels
uses: actions/upload-artifact@v4
with:
name: wheels-macos-${{ matrix.platform.target }}
path: dist
sdist:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- name: Build sdist
uses: PyO3/maturin-action@v1
with:
command: sdist
args: --out dist
working-directory: sbv2_bindings
- name: Upload sdist
uses: actions/upload-artifact@v4
with:
name: wheels-sdist
path: dist
release:
name: Release
runs-on: ubuntu-latest
if: "startsWith(github.ref, 'refs/tags/')"
needs: [linux, windows, macos, sdist]
environment: release
steps:
- uses: actions/download-artifact@v4
- name: Publish to PyPI
uses: PyO3/maturin-action@v1
with:
command: upload
args: --non-interactive --skip-existing wheels-*/*

View File

@@ -14,6 +14,9 @@ jobs:
strategy:
matrix:
tag: [cpu, cuda]
platform:
- linux/amd64
- linux/arm64
steps:
- uses: actions/checkout@v4
- name: Set up QEMU
@@ -33,4 +36,5 @@ jobs:
push: true
tags: |
ghcr.io/${{ github.repository }}:${{ matrix.tag }}
file: docker/${{ matrix.tag }}.Dockerfile
file: docker/${{ matrix.tag }}.Dockerfile
platforms: ${{ matrix.platform }}

31
Cargo.lock generated
View File

@@ -100,9 +100,9 @@ checksum = "0c4b4d0bd25bd0b74681c0ad21497610ce1b7c91b1022cd21c80c6fbdd9476b0"
[[package]]
name = "axum"
version = "0.7.5"
version = "0.7.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3a6c9af12842a67734c9a2e355436e5d03b22383ed60cf13cd0c18fbfe3dcbcf"
checksum = "8f43644eed690f5374f1af436ecd6aea01cd201f6fbdf0178adaf6907afb2cec"
dependencies = [
"async-trait",
"axum-core",
@@ -126,7 +126,7 @@ dependencies = [
"serde_urlencoded",
"sync_wrapper 1.0.1",
"tokio",
"tower",
"tower 0.5.1",
"tower-layer",
"tower-service",
"tracing",
@@ -134,9 +134,9 @@ dependencies = [
[[package]]
name = "axum-core"
version = "0.4.3"
version = "0.4.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a15c63fd72d41492dc4f497196f5da1fb04fb7529e631d73630d1b491e47a2e3"
checksum = "5e6b8ba012a258d63c9adfa28b9ddcf66149da6f986c5b5452e629d5ee64bf00"
dependencies = [
"async-trait",
"bytes",
@@ -147,7 +147,7 @@ dependencies = [
"mime",
"pin-project-lite",
"rustversion",
"sync_wrapper 0.1.2",
"sync_wrapper 1.0.1",
"tower-layer",
"tower-service",
"tracing",
@@ -771,6 +771,8 @@ dependencies = [
"hyper",
"pin-project-lite",
"tokio",
"tower 0.4.13",
"tower-service",
]
[[package]]
@@ -1841,7 +1843,7 @@ dependencies = [
[[package]]
name = "sbv2_core"
version = "0.1.3"
version = "0.1.4"
dependencies = [
"anyhow",
"dotenvy",
@@ -2181,6 +2183,21 @@ dependencies = [
"tokio",
"tower-layer",
"tower-service",
]
[[package]]
name = "tower"
version = "0.5.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2873938d487c3cfb9aed7546dc9f2711d867c9f90c46b889989a2cb84eba6b4f"
dependencies = [
"futures-core",
"futures-util",
"pin-project-lite",
"sync_wrapper 0.1.2",
"tokio",
"tower-layer",
"tower-service",
"tracing",
]

View File

@@ -6,4 +6,9 @@ members = ["sbv2_api", "sbv2_core", "sbv2_bindings"]
anyhow = "1.0.86"
dotenvy = "0.15.7"
env_logger = "0.11.5"
ndarray = "0.16.1"
ndarray = "0.16.1"
[profile.release]
lto = true
debug = false
strip = true

View File

@@ -27,6 +27,7 @@ JP-Extra しか対応していません。(基本的に対応する予定もあ
- [x] GPU 対応(DirectML)
- [x] GPU 対応(CoreML)
- [ ] WASM 変換(依存ライブラリの関係により現在は不可)
- [ ] arm64のdockerサポート
## 構造説明
@@ -57,6 +58,22 @@ docker run -it --rm -p 3000:3000 --name sbv2 \
ghcr.io/tuna2134/sbv2-api:cpu
```
<details>
<summary>Apple Silicon搭載のMac(M1以降)の場合</summary>
docker上で動作させる場合、.envのADDRをlocalhostから0.0.0.0に変更してください。
```yaml
ADDR=0.0.0.0:3000
```
CPUの場合は
```bash
docker run --platform linux/amd64 -it --rm -p 3000:3000 --name sbv2 \
-v ./models:/work/models --env-file .env \
ghcr.io/tuna2134/sbv2-api:cpu
```
</details>
CUDAの場合は
```sh
docker run -it --rm -p 3000:3000 --name sbv2 \

View File

@@ -126,11 +126,13 @@ torch.onnx.export(
f"../models/model_{out_name}.onnx",
verbose=True,
dynamic_axes={
"x_tst": {1: "batch_size"},
"x_tst": {0: "batch_size", 1: "x_tst_max_length"},
"x_tst_lengths": {0: "batch_size"},
"tones": {1: "batch_size"},
"language": {1: "batch_size"},
"bert": {2: "batch_size"},
"sid": {0: "batch_size"},
"tones": {0: "batch_size", 1: "x_tst_max_length"},
"language": {0: "batch_size", 1: "x_tst_max_length"},
"bert": {0: "batch_size", 2: "x_tst_max_length"},
"style_vec": {0: "batch_size"},
},
input_names=[
"x_tst",

View File

@@ -12,4 +12,4 @@ crate-type = ["cdylib"]
anyhow.workspace = true
ndarray.workspace = true
pyo3 = { version = "0.22.0", features = ["anyhow"] }
sbv2_core = { version = "0.1.3", path = "../sbv2_core" }
sbv2_core = { version = "0.1.4", path = "../sbv2_core" }

View File

@@ -8,11 +8,13 @@ def main():
model.load_sbv2file_from_path("amitaro", "../models/amitaro.sbv2")
print("All setup is done!")
style_vector = model.get_style_vector("amitaro", 0, 1.0)
with open("output.wav", "wb") as f:
f.write(model.synthesize("おはようございます。", "amitaro", style_vector, 0.0, 0.5))
f.write(
model.synthesize("おはようございます。", "amitaro", style_vector, 0.0, 0.5)
)
if __name__ == "__main__":
main()
main()

View File

@@ -1,6 +1,6 @@
use pyo3::prelude::*;
use pyo3::types::PyBytes;
use sbv2_core::tts::TTSModelHolder;
use sbv2_core::tts::{TTSModelHolder, SynthesizeOptions};
use crate::style::StyleVector;
@@ -109,8 +109,8 @@ impl TTSModel {
/// テキスト
/// ident : str
/// 識別子
/// style_vector : StyleVector
/// スタイルベクトル
/// style_id : int
/// スタイルID
/// sdp_ratio : float
/// SDP比率
/// length_scale : float
@@ -125,20 +125,19 @@ impl TTSModel {
py: Python<'p>,
text: String,
ident: String,
style_vector: StyleVector,
style_id: i32,
sdp_ratio: f32,
length_scale: f32,
) -> anyhow::Result<Bound<PyBytes>> {
let (bert_ori, phones, tones, lang_ids) = self.model.parse_text(&text)?;
let data = self.model.synthesize(
ident,
bert_ori,
phones,
tones,
lang_ids,
style_vector.get(),
sdp_ratio,
length_scale,
let data = self.model.easy_synthesize(
ident.as_str(),
&text,
style_id,
SynthesizeOptions {
sdp_ratio,
length_scale,
..Default::default()
},
)?;
Ok(PyBytes::new_bound(py, &data))
}

View File

@@ -1,7 +1,7 @@
[package]
name = "sbv2_core"
description = "Style-Bert-VITSの推論ライブラリ"
version = "0.1.3"
version = "0.1.4"
edition = "2021"
license = "MIT"
readme = "../README.md"

View File

@@ -33,6 +33,7 @@ pub struct TTSModel {
ident: TTSIdent,
}
/// High-level Style-Bert-VITS2's API
pub struct TTSModelHolder {
tokenizer: Tokenizer,
bert: Session,
@@ -41,6 +42,13 @@ pub struct TTSModelHolder {
}
impl TTSModelHolder {
/// Initialize a new TTSModelHolder
///
/// # Examples
///
/// ```rs
/// let mut tts_holder = TTSModelHolder::new(std::fs::read("deberta.onnx")?, std::fs::read("tokenizer.json")?)?;
/// ```
pub fn new<P: AsRef<[u8]>>(bert_model_bytes: P, tokenizer_bytes: P) -> Result<Self> {
let bert = model::load_model(bert_model_bytes, true)?;
let jtalk = jtalk::JTalk::new()?;
@@ -53,10 +61,18 @@ impl TTSModelHolder {
})
}
/// Return a list of model names
pub fn models(&self) -> Vec<String> {
self.models.iter().map(|m| m.ident.to_string()).collect()
}
/// Load a .sbv2 file binary
///
/// # Examples
///
/// ```rs
/// tts_holder.load_sbv2file("tsukuyomi", std::fs::read("tsukuyomi.sbv2")?)?;
/// ```
pub fn load_sbv2file<I: Into<TTSIdent>, P: AsRef<[u8]>>(
&mut self,
ident: I,
@@ -86,6 +102,13 @@ impl TTSModelHolder {
Ok(())
}
/// Load a style vector and onnx model binary
///
/// # Examples
///
/// ```rs
/// tts_holder.load("tsukuyomi", std::fs::read("style_vectors.json")?, std::fs::read("model.onnx")?)?;
/// ```
pub fn load<I: Into<TTSIdent>, P: AsRef<[u8]>>(
&mut self,
ident: I,
@@ -103,6 +126,7 @@ impl TTSModelHolder {
Ok(())
}
/// Unload a model
pub fn unload<I: Into<TTSIdent>>(&mut self, ident: I) -> bool {
let ident = ident.into();
if let Some((i, _)) = self
@@ -118,6 +142,10 @@ impl TTSModelHolder {
}
}
/// Parse text and return the input for synthesize
///
/// # Note
/// This function is for low-level usage, use `easy_synthesize` for high-level usage.
#[allow(clippy::type_complexity)]
pub fn parse_text(
&self,
@@ -196,6 +224,10 @@ impl TTSModelHolder {
.ok_or(Error::ModelNotFoundError(ident.to_string()))
}
/// Get style vector by style id and weight
///
/// # Note
/// This function is for low-level usage, use `easy_synthesize` for high-level usage.
pub fn get_style_vector<I: Into<TTSIdent>>(
&self,
ident: I,
@@ -205,6 +237,13 @@ impl TTSModelHolder {
style::get_style_vector(&self.find_model(ident)?.style_vectors, style_id, weight)
}
/// Synthesize text to audio
///
/// # Examples
///
/// ```rs
/// let audio = tts_holder.easy_synthesize("tsukuyomi", "こんにちは", 0, SynthesizeOptions::default())?;
/// ```
pub fn easy_synthesize<I: Into<TTSIdent> + Copy>(
&self,
ident: I,
@@ -275,6 +314,10 @@ impl TTSModelHolder {
Ok(cursor.into_inner())
}
/// Synthesize text to audio
///
/// # Note
/// This function is for low-level usage, use `easy_synthesize` for high-level usage.
#[allow(clippy::too_many_arguments)]
pub fn synthesize<I: Into<TTSIdent>>(
&self,
@@ -301,6 +344,13 @@ impl TTSModelHolder {
}
}
/// Synthesize options
///
/// # Fields
/// - `sdp_ratio`: SDP ratio
/// - `length_scale`: Length scale
/// - `style_weight`: Style weight
/// - `split_sentences`: Split sentences
pub struct SynthesizeOptions {
pub sdp_ratio: f32,
pub length_scale: f32,