mirror of
https://github.com/neodyland/sbv2-api.git
synced 2025-12-24 00:09:57 +00:00
Compare commits
22 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
64cbd151a6 | ||
|
|
3103fcef17 | ||
|
|
dd8ae77edc | ||
|
|
ee4c4ab8ad | ||
|
|
79120e4aee | ||
|
|
c947df2105 | ||
|
|
dcbb19fcdd | ||
|
|
b5601410f8 | ||
|
|
a3160ea2e8 | ||
|
|
1a978c3fe3 | ||
|
|
5837b66759 | ||
|
|
962fa9a49d | ||
|
|
290fb37c16 | ||
|
|
0c926751a4 | ||
|
|
da86aa811d | ||
|
|
4e0edaebcd | ||
|
|
1d7d65ae21 | ||
|
|
3112e3e8ec | ||
|
|
5724251fb5 | ||
|
|
3b1182f07d | ||
|
|
4ed463b05b | ||
|
|
c641bc7529 |
8
.github/pull_request_template.md
vendored
Normal file
8
.github/pull_request_template.md
vendored
Normal file
@@ -0,0 +1,8 @@
|
||||
## 概要
|
||||
(ここに本PRの説明をしてください。)
|
||||
|
||||
## 関連issue
|
||||
(ここに該当するissueの番号を書いてください。)
|
||||
|
||||
## 確認
|
||||
- [ ] 動作確認しましたか?
|
||||
132
.github/workflows/CI.yml
vendored
Normal file
132
.github/workflows/CI.yml
vendored
Normal file
@@ -0,0 +1,132 @@
|
||||
# This file is autogenerated by maturin v1.7.1
|
||||
# To update, run
|
||||
#
|
||||
# maturin generate-ci github
|
||||
#
|
||||
name: CI
|
||||
|
||||
on:
|
||||
push:
|
||||
branches:
|
||||
- main
|
||||
- master
|
||||
tags:
|
||||
- '*'
|
||||
pull_request:
|
||||
workflow_dispatch:
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
id-token: write
|
||||
|
||||
jobs:
|
||||
linux:
|
||||
runs-on: ${{ matrix.platform.runner }}
|
||||
strategy:
|
||||
matrix:
|
||||
platform:
|
||||
- runner: ubuntu-latest
|
||||
target: x86_64
|
||||
- runner: ubuntu-latest
|
||||
target: aarch64
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: 3.x
|
||||
- name: Build wheels
|
||||
uses: PyO3/maturin-action@v1
|
||||
with:
|
||||
target: ${{ matrix.platform.target }}
|
||||
args: --release --out dist --find-interpreter
|
||||
sccache: 'true'
|
||||
manylinux: auto
|
||||
working-directory: sbv2_bindings
|
||||
- name: Upload wheels
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: wheels-linux-${{ matrix.platform.target }}
|
||||
path: dist
|
||||
|
||||
windows:
|
||||
runs-on: ${{ matrix.platform.runner }}
|
||||
strategy:
|
||||
matrix:
|
||||
platform:
|
||||
- runner: windows-latest
|
||||
target: x64
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: 3.x
|
||||
architecture: ${{ matrix.platform.target }}
|
||||
- name: Build wheels
|
||||
uses: PyO3/maturin-action@v1
|
||||
with:
|
||||
target: ${{ matrix.platform.target }}
|
||||
args: --release --out dist --find-interpreter
|
||||
sccache: 'true'
|
||||
working-directory: sbv2_bindings
|
||||
- name: Upload wheels
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: wheels-windows-${{ matrix.platform.target }}
|
||||
path: dist
|
||||
|
||||
macos:
|
||||
runs-on: ${{ matrix.platform.runner }}
|
||||
strategy:
|
||||
matrix:
|
||||
platform:
|
||||
- runner: macos-12
|
||||
target: x86_64
|
||||
- runner: macos-14
|
||||
target: aarch64
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: 3.x
|
||||
- name: Build wheels
|
||||
uses: PyO3/maturin-action@v1
|
||||
with:
|
||||
target: ${{ matrix.platform.target }}
|
||||
args: --release --out dist --find-interpreter
|
||||
sccache: 'true'
|
||||
working-directory: sbv2_bindings
|
||||
- name: Upload wheels
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: wheels-macos-${{ matrix.platform.target }}
|
||||
path: dist
|
||||
|
||||
sdist:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- name: Build sdist
|
||||
uses: PyO3/maturin-action@v1
|
||||
with:
|
||||
command: sdist
|
||||
args: --out dist
|
||||
working-directory: sbv2_bindings
|
||||
- name: Upload sdist
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: wheels-sdist
|
||||
path: dist
|
||||
|
||||
release:
|
||||
name: Release
|
||||
runs-on: ubuntu-latest
|
||||
if: "startsWith(github.ref, 'refs/tags/')"
|
||||
needs: [linux, windows, macos, sdist]
|
||||
environment: release
|
||||
steps:
|
||||
- uses: actions/download-artifact@v4
|
||||
- name: Publish to PyPI
|
||||
uses: PyO3/maturin-action@v1
|
||||
with:
|
||||
command: upload
|
||||
args: --non-interactive --skip-existing wheels-*/*
|
||||
6
.github/workflows/build.yml
vendored
6
.github/workflows/build.yml
vendored
@@ -14,6 +14,9 @@ jobs:
|
||||
strategy:
|
||||
matrix:
|
||||
tag: [cpu, cuda]
|
||||
platform:
|
||||
- linux/amd64
|
||||
- linux/arm64
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- name: Set up QEMU
|
||||
@@ -33,4 +36,5 @@ jobs:
|
||||
push: true
|
||||
tags: |
|
||||
ghcr.io/${{ github.repository }}:${{ matrix.tag }}
|
||||
file: docker/${{ matrix.tag }}.Dockerfile
|
||||
file: docker/${{ matrix.tag }}.Dockerfile
|
||||
platforms: ${{ matrix.platform }}
|
||||
31
Cargo.lock
generated
31
Cargo.lock
generated
@@ -100,9 +100,9 @@ checksum = "0c4b4d0bd25bd0b74681c0ad21497610ce1b7c91b1022cd21c80c6fbdd9476b0"
|
||||
|
||||
[[package]]
|
||||
name = "axum"
|
||||
version = "0.7.5"
|
||||
version = "0.7.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "3a6c9af12842a67734c9a2e355436e5d03b22383ed60cf13cd0c18fbfe3dcbcf"
|
||||
checksum = "8f43644eed690f5374f1af436ecd6aea01cd201f6fbdf0178adaf6907afb2cec"
|
||||
dependencies = [
|
||||
"async-trait",
|
||||
"axum-core",
|
||||
@@ -126,7 +126,7 @@ dependencies = [
|
||||
"serde_urlencoded",
|
||||
"sync_wrapper 1.0.1",
|
||||
"tokio",
|
||||
"tower",
|
||||
"tower 0.5.1",
|
||||
"tower-layer",
|
||||
"tower-service",
|
||||
"tracing",
|
||||
@@ -134,9 +134,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "axum-core"
|
||||
version = "0.4.3"
|
||||
version = "0.4.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a15c63fd72d41492dc4f497196f5da1fb04fb7529e631d73630d1b491e47a2e3"
|
||||
checksum = "5e6b8ba012a258d63c9adfa28b9ddcf66149da6f986c5b5452e629d5ee64bf00"
|
||||
dependencies = [
|
||||
"async-trait",
|
||||
"bytes",
|
||||
@@ -147,7 +147,7 @@ dependencies = [
|
||||
"mime",
|
||||
"pin-project-lite",
|
||||
"rustversion",
|
||||
"sync_wrapper 0.1.2",
|
||||
"sync_wrapper 1.0.1",
|
||||
"tower-layer",
|
||||
"tower-service",
|
||||
"tracing",
|
||||
@@ -771,6 +771,8 @@ dependencies = [
|
||||
"hyper",
|
||||
"pin-project-lite",
|
||||
"tokio",
|
||||
"tower 0.4.13",
|
||||
"tower-service",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -1841,7 +1843,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "sbv2_core"
|
||||
version = "0.1.3"
|
||||
version = "0.1.4"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"dotenvy",
|
||||
@@ -2181,6 +2183,21 @@ dependencies = [
|
||||
"tokio",
|
||||
"tower-layer",
|
||||
"tower-service",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "tower"
|
||||
version = "0.5.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "2873938d487c3cfb9aed7546dc9f2711d867c9f90c46b889989a2cb84eba6b4f"
|
||||
dependencies = [
|
||||
"futures-core",
|
||||
"futures-util",
|
||||
"pin-project-lite",
|
||||
"sync_wrapper 0.1.2",
|
||||
"tokio",
|
||||
"tower-layer",
|
||||
"tower-service",
|
||||
"tracing",
|
||||
]
|
||||
|
||||
|
||||
@@ -6,4 +6,9 @@ members = ["sbv2_api", "sbv2_core", "sbv2_bindings"]
|
||||
anyhow = "1.0.86"
|
||||
dotenvy = "0.15.7"
|
||||
env_logger = "0.11.5"
|
||||
ndarray = "0.16.1"
|
||||
ndarray = "0.16.1"
|
||||
|
||||
[profile.release]
|
||||
lto = true
|
||||
debug = false
|
||||
strip = true
|
||||
17
README.md
17
README.md
@@ -27,6 +27,7 @@ JP-Extra しか対応していません。(基本的に対応する予定もあ
|
||||
- [x] GPU 対応(DirectML)
|
||||
- [x] GPU 対応(CoreML)
|
||||
- [ ] WASM 変換(依存ライブラリの関係により現在は不可)
|
||||
- [ ] arm64のdockerサポート
|
||||
|
||||
## 構造説明
|
||||
|
||||
@@ -57,6 +58,22 @@ docker run -it --rm -p 3000:3000 --name sbv2 \
|
||||
ghcr.io/tuna2134/sbv2-api:cpu
|
||||
```
|
||||
|
||||
<details>
|
||||
<summary>Apple Silicon搭載のMac(M1以降)の場合</summary>
|
||||
docker上で動作させる場合、.envのADDRをlocalhostから0.0.0.0に変更してください。
|
||||
|
||||
```yaml
|
||||
ADDR=0.0.0.0:3000
|
||||
```
|
||||
|
||||
CPUの場合は
|
||||
```bash
|
||||
docker run --platform linux/amd64 -it --rm -p 3000:3000 --name sbv2 \
|
||||
-v ./models:/work/models --env-file .env \
|
||||
ghcr.io/tuna2134/sbv2-api:cpu
|
||||
```
|
||||
</details>
|
||||
|
||||
CUDAの場合は
|
||||
```sh
|
||||
docker run -it --rm -p 3000:3000 --name sbv2 \
|
||||
|
||||
@@ -126,11 +126,13 @@ torch.onnx.export(
|
||||
f"../models/model_{out_name}.onnx",
|
||||
verbose=True,
|
||||
dynamic_axes={
|
||||
"x_tst": {1: "batch_size"},
|
||||
"x_tst": {0: "batch_size", 1: "x_tst_max_length"},
|
||||
"x_tst_lengths": {0: "batch_size"},
|
||||
"tones": {1: "batch_size"},
|
||||
"language": {1: "batch_size"},
|
||||
"bert": {2: "batch_size"},
|
||||
"sid": {0: "batch_size"},
|
||||
"tones": {0: "batch_size", 1: "x_tst_max_length"},
|
||||
"language": {0: "batch_size", 1: "x_tst_max_length"},
|
||||
"bert": {0: "batch_size", 2: "x_tst_max_length"},
|
||||
"style_vec": {0: "batch_size"},
|
||||
},
|
||||
input_names=[
|
||||
"x_tst",
|
||||
|
||||
@@ -12,4 +12,4 @@ crate-type = ["cdylib"]
|
||||
anyhow.workspace = true
|
||||
ndarray.workspace = true
|
||||
pyo3 = { version = "0.22.0", features = ["anyhow"] }
|
||||
sbv2_core = { version = "0.1.3", path = "../sbv2_core" }
|
||||
sbv2_core = { version = "0.1.4", path = "../sbv2_core" }
|
||||
|
||||
@@ -8,11 +8,13 @@ def main():
|
||||
|
||||
model.load_sbv2file_from_path("amitaro", "../models/amitaro.sbv2")
|
||||
print("All setup is done!")
|
||||
|
||||
|
||||
style_vector = model.get_style_vector("amitaro", 0, 1.0)
|
||||
with open("output.wav", "wb") as f:
|
||||
f.write(model.synthesize("おはようございます。", "amitaro", style_vector, 0.0, 0.5))
|
||||
f.write(
|
||||
model.synthesize("おはようございます。", "amitaro", style_vector, 0.0, 0.5)
|
||||
)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
main()
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
use pyo3::prelude::*;
|
||||
use pyo3::types::PyBytes;
|
||||
use sbv2_core::tts::TTSModelHolder;
|
||||
use sbv2_core::tts::{TTSModelHolder, SynthesizeOptions};
|
||||
|
||||
use crate::style::StyleVector;
|
||||
|
||||
@@ -109,8 +109,8 @@ impl TTSModel {
|
||||
/// テキスト
|
||||
/// ident : str
|
||||
/// 識別子
|
||||
/// style_vector : StyleVector
|
||||
/// スタイルベクトル
|
||||
/// style_id : int
|
||||
/// スタイルID
|
||||
/// sdp_ratio : float
|
||||
/// SDP比率
|
||||
/// length_scale : float
|
||||
@@ -125,20 +125,19 @@ impl TTSModel {
|
||||
py: Python<'p>,
|
||||
text: String,
|
||||
ident: String,
|
||||
style_vector: StyleVector,
|
||||
style_id: i32,
|
||||
sdp_ratio: f32,
|
||||
length_scale: f32,
|
||||
) -> anyhow::Result<Bound<PyBytes>> {
|
||||
let (bert_ori, phones, tones, lang_ids) = self.model.parse_text(&text)?;
|
||||
let data = self.model.synthesize(
|
||||
ident,
|
||||
bert_ori,
|
||||
phones,
|
||||
tones,
|
||||
lang_ids,
|
||||
style_vector.get(),
|
||||
sdp_ratio,
|
||||
length_scale,
|
||||
let data = self.model.easy_synthesize(
|
||||
ident.as_str(),
|
||||
&text,
|
||||
style_id,
|
||||
SynthesizeOptions {
|
||||
sdp_ratio,
|
||||
length_scale,
|
||||
..Default::default()
|
||||
},
|
||||
)?;
|
||||
Ok(PyBytes::new_bound(py, &data))
|
||||
}
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
[package]
|
||||
name = "sbv2_core"
|
||||
description = "Style-Bert-VITSの推論ライブラリ"
|
||||
version = "0.1.3"
|
||||
version = "0.1.4"
|
||||
edition = "2021"
|
||||
license = "MIT"
|
||||
readme = "../README.md"
|
||||
|
||||
@@ -33,6 +33,7 @@ pub struct TTSModel {
|
||||
ident: TTSIdent,
|
||||
}
|
||||
|
||||
/// High-level Style-Bert-VITS2's API
|
||||
pub struct TTSModelHolder {
|
||||
tokenizer: Tokenizer,
|
||||
bert: Session,
|
||||
@@ -41,6 +42,13 @@ pub struct TTSModelHolder {
|
||||
}
|
||||
|
||||
impl TTSModelHolder {
|
||||
/// Initialize a new TTSModelHolder
|
||||
///
|
||||
/// # Examples
|
||||
///
|
||||
/// ```rs
|
||||
/// let mut tts_holder = TTSModelHolder::new(std::fs::read("deberta.onnx")?, std::fs::read("tokenizer.json")?)?;
|
||||
/// ```
|
||||
pub fn new<P: AsRef<[u8]>>(bert_model_bytes: P, tokenizer_bytes: P) -> Result<Self> {
|
||||
let bert = model::load_model(bert_model_bytes, true)?;
|
||||
let jtalk = jtalk::JTalk::new()?;
|
||||
@@ -53,10 +61,18 @@ impl TTSModelHolder {
|
||||
})
|
||||
}
|
||||
|
||||
/// Return a list of model names
|
||||
pub fn models(&self) -> Vec<String> {
|
||||
self.models.iter().map(|m| m.ident.to_string()).collect()
|
||||
}
|
||||
|
||||
/// Load a .sbv2 file binary
|
||||
///
|
||||
/// # Examples
|
||||
///
|
||||
/// ```rs
|
||||
/// tts_holder.load_sbv2file("tsukuyomi", std::fs::read("tsukuyomi.sbv2")?)?;
|
||||
/// ```
|
||||
pub fn load_sbv2file<I: Into<TTSIdent>, P: AsRef<[u8]>>(
|
||||
&mut self,
|
||||
ident: I,
|
||||
@@ -86,6 +102,13 @@ impl TTSModelHolder {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Load a style vector and onnx model binary
|
||||
///
|
||||
/// # Examples
|
||||
///
|
||||
/// ```rs
|
||||
/// tts_holder.load("tsukuyomi", std::fs::read("style_vectors.json")?, std::fs::read("model.onnx")?)?;
|
||||
/// ```
|
||||
pub fn load<I: Into<TTSIdent>, P: AsRef<[u8]>>(
|
||||
&mut self,
|
||||
ident: I,
|
||||
@@ -103,6 +126,7 @@ impl TTSModelHolder {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Unload a model
|
||||
pub fn unload<I: Into<TTSIdent>>(&mut self, ident: I) -> bool {
|
||||
let ident = ident.into();
|
||||
if let Some((i, _)) = self
|
||||
@@ -118,6 +142,10 @@ impl TTSModelHolder {
|
||||
}
|
||||
}
|
||||
|
||||
/// Parse text and return the input for synthesize
|
||||
///
|
||||
/// # Note
|
||||
/// This function is for low-level usage, use `easy_synthesize` for high-level usage.
|
||||
#[allow(clippy::type_complexity)]
|
||||
pub fn parse_text(
|
||||
&self,
|
||||
@@ -196,6 +224,10 @@ impl TTSModelHolder {
|
||||
.ok_or(Error::ModelNotFoundError(ident.to_string()))
|
||||
}
|
||||
|
||||
/// Get style vector by style id and weight
|
||||
///
|
||||
/// # Note
|
||||
/// This function is for low-level usage, use `easy_synthesize` for high-level usage.
|
||||
pub fn get_style_vector<I: Into<TTSIdent>>(
|
||||
&self,
|
||||
ident: I,
|
||||
@@ -205,6 +237,13 @@ impl TTSModelHolder {
|
||||
style::get_style_vector(&self.find_model(ident)?.style_vectors, style_id, weight)
|
||||
}
|
||||
|
||||
/// Synthesize text to audio
|
||||
///
|
||||
/// # Examples
|
||||
///
|
||||
/// ```rs
|
||||
/// let audio = tts_holder.easy_synthesize("tsukuyomi", "こんにちは", 0, SynthesizeOptions::default())?;
|
||||
/// ```
|
||||
pub fn easy_synthesize<I: Into<TTSIdent> + Copy>(
|
||||
&self,
|
||||
ident: I,
|
||||
@@ -275,6 +314,10 @@ impl TTSModelHolder {
|
||||
Ok(cursor.into_inner())
|
||||
}
|
||||
|
||||
/// Synthesize text to audio
|
||||
///
|
||||
/// # Note
|
||||
/// This function is for low-level usage, use `easy_synthesize` for high-level usage.
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
pub fn synthesize<I: Into<TTSIdent>>(
|
||||
&self,
|
||||
@@ -301,6 +344,13 @@ impl TTSModelHolder {
|
||||
}
|
||||
}
|
||||
|
||||
/// Synthesize options
|
||||
///
|
||||
/// # Fields
|
||||
/// - `sdp_ratio`: SDP ratio
|
||||
/// - `length_scale`: Length scale
|
||||
/// - `style_weight`: Style weight
|
||||
/// - `split_sentences`: Split sentences
|
||||
pub struct SynthesizeOptions {
|
||||
pub sdp_ratio: f32,
|
||||
pub length_scale: f32,
|
||||
|
||||
Reference in New Issue
Block a user