mirror of
https://github.com/neodyland/sbv2-api.git
synced 2026-01-14 10:22:57 +00:00
refactor
This commit is contained in:
5
scripts/.gitignore
vendored
Normal file
5
scripts/.gitignore
vendored
Normal file
@@ -0,0 +1,5 @@
|
||||
*.json
|
||||
venv/
|
||||
tmp/
|
||||
*.safetensors
|
||||
*.npy
|
||||
36
scripts/convert/README.md
Normal file
36
scripts/convert/README.md
Normal file
@@ -0,0 +1,36 @@
|
||||
# 変換方法
|
||||
|
||||
## 初心者向け準備
|
||||
|
||||
わかる人は飛ばしてください。
|
||||
|
||||
1. pythonを入れます。3.11.8で動作確認をしていますが、最近のバージョンなら大体動くはずです。
|
||||
|
||||
4. `cd convert`
|
||||
|
||||
3. `python -m venv venv`
|
||||
|
||||
4. `source venv/bin/activate`
|
||||
|
||||
5. `pip install -r requirements.txt`
|
||||
|
||||
## モデル変換
|
||||
|
||||
1. 変換したいモデルの`.safetensors`で終わるファイルの位置を特定してください。
|
||||
|
||||
2. 同様に`config.json`、`style_vectors.npy`というファイルを探してください。
|
||||
|
||||
3. 以下のコマンドを実行します。
|
||||
```sh
|
||||
python convert_model.py --style_file "ここにstyle_vectors.npyの場所" --config_file "同様にconfig.json場所" --model_file "同様に.safetensorsで終わるファイルの場所"
|
||||
```
|
||||
|
||||
4. `models/名前.sbv2`というファイルが出力されます。GUI版のモデルファイルに入れてあげたら使えます。
|
||||
|
||||
## Deberta変換
|
||||
|
||||
意味が分からないならおそらく変換しなくてもいいってことです。
|
||||
|
||||
venvを用意し、requirementsを入れて、`python convert_model.py`を実行するだけです。
|
||||
|
||||
`models/deberta.onnx`と`models/tokenizer.json`が出力されたら成功です。
|
||||
50
scripts/convert/convert_deberta.py
Normal file
50
scripts/convert/convert_deberta.py
Normal file
@@ -0,0 +1,50 @@
|
||||
from transformers.convert_slow_tokenizer import BertConverter
|
||||
from style_bert_vits2.nlp import bert_models
|
||||
from style_bert_vits2.constants import Languages
|
||||
from transformers import AutoModelForMaskedLM, AutoTokenizer
|
||||
import torch
|
||||
from torch import nn
|
||||
from argparse import ArgumentParser
|
||||
|
||||
parser = ArgumentParser()
|
||||
parser.add_argument("--model", default="ku-nlp/deberta-v2-large-japanese-char-wwm")
|
||||
args = parser.parse_args()
|
||||
model_name = args.model
|
||||
|
||||
bert_models.load_tokenizer(Languages.JP, model_name)
|
||||
tokenizer = bert_models.load_tokenizer(Languages.JP)
|
||||
converter = BertConverter(tokenizer)
|
||||
tokenizer = converter.converted()
|
||||
tokenizer.save("../models/tokenizer.json")
|
||||
|
||||
|
||||
class ORTDeberta(nn.Module):
|
||||
def __init__(self, model_name):
|
||||
super(ORTDeberta, self).__init__()
|
||||
self.model = AutoModelForMaskedLM.from_pretrained(model_name)
|
||||
|
||||
def forward(self, input_ids, token_type_ids, attention_mask):
|
||||
inputs = {
|
||||
"input_ids": input_ids,
|
||||
"token_type_ids": token_type_ids,
|
||||
"attention_mask": attention_mask,
|
||||
}
|
||||
res = self.model(**inputs, output_hidden_states=True)
|
||||
res = torch.cat(res["hidden_states"][-3:-2], -1)[0].cpu()
|
||||
return res
|
||||
|
||||
|
||||
model = ORTDeberta(model_name)
|
||||
inputs = AutoTokenizer.from_pretrained(model_name)(
|
||||
"今日はいい天気ですね", return_tensors="pt"
|
||||
)
|
||||
|
||||
torch.onnx.export(
|
||||
model,
|
||||
(inputs["input_ids"], inputs["token_type_ids"], inputs["attention_mask"]),
|
||||
"../models/deberta.onnx",
|
||||
input_names=["input_ids", "token_type_ids", "attention_mask"],
|
||||
output_names=["output"],
|
||||
verbose=True,
|
||||
dynamic_axes={"input_ids": {1: "batch_size"}, "attention_mask": {1: "batch_size"}},
|
||||
)
|
||||
169
scripts/convert/convert_model.py
Normal file
169
scripts/convert/convert_model.py
Normal file
@@ -0,0 +1,169 @@
|
||||
import numpy as np
|
||||
import json
|
||||
from io import BytesIO
|
||||
from style_bert_vits2.nlp import bert_models
|
||||
from style_bert_vits2.constants import Languages
|
||||
from style_bert_vits2.models.infer import get_net_g, get_text
|
||||
from style_bert_vits2.models.hyper_parameters import HyperParameters
|
||||
import torch
|
||||
from style_bert_vits2.constants import (
|
||||
DEFAULT_ASSIST_TEXT_WEIGHT,
|
||||
DEFAULT_STYLE,
|
||||
DEFAULT_STYLE_WEIGHT,
|
||||
Languages,
|
||||
)
|
||||
import os
|
||||
from tarfile import open as taropen, TarInfo
|
||||
from zstandard import ZstdCompressor
|
||||
from style_bert_vits2.tts_model import TTSModel
|
||||
import numpy as np
|
||||
from argparse import ArgumentParser
|
||||
|
||||
parser = ArgumentParser()
|
||||
parser.add_argument("--style_file", required=True)
|
||||
parser.add_argument("--config_file", required=True)
|
||||
parser.add_argument("--model_file", required=True)
|
||||
args = parser.parse_args()
|
||||
style_file = args.style_file
|
||||
config_file = args.config_file
|
||||
model_file = args.model_file
|
||||
|
||||
bert_models.load_model(Languages.JP, "ku-nlp/deberta-v2-large-japanese-char-wwm")
|
||||
bert_models.load_tokenizer(Languages.JP, "ku-nlp/deberta-v2-large-japanese-char-wwm")
|
||||
|
||||
array = np.load(style_file)
|
||||
data = array.tolist()
|
||||
hyper_parameters = HyperParameters.load_from_json(config_file)
|
||||
out_name = hyper_parameters.model_name
|
||||
|
||||
with open(f"../models/style_vectors_{out_name}.json", "w") as f:
|
||||
json.dump(
|
||||
{
|
||||
"data": data,
|
||||
"shape": array.shape,
|
||||
},
|
||||
f,
|
||||
)
|
||||
text = "今日はいい天気ですね。"
|
||||
|
||||
bert, ja_bert, en_bert, phones, tones, lang_ids = get_text(
|
||||
text,
|
||||
Languages.JP,
|
||||
hyper_parameters,
|
||||
"cpu",
|
||||
assist_text=None,
|
||||
assist_text_weight=DEFAULT_ASSIST_TEXT_WEIGHT,
|
||||
given_phone=None,
|
||||
given_tone=None,
|
||||
)
|
||||
|
||||
tts_model = TTSModel(
|
||||
model_path=model_file,
|
||||
config_path=config_file,
|
||||
style_vec_path=style_file,
|
||||
device="cpu",
|
||||
)
|
||||
device = "cpu"
|
||||
style_id = tts_model.style2id[DEFAULT_STYLE]
|
||||
|
||||
|
||||
def get_style_vector(style_id, weight):
|
||||
style_vectors = np.load(style_file)
|
||||
mean = style_vectors[0]
|
||||
style_vec = style_vectors[style_id]
|
||||
style_vec = mean + (style_vec - mean) * weight
|
||||
return style_vec
|
||||
|
||||
|
||||
style_vector = get_style_vector(style_id, DEFAULT_STYLE_WEIGHT)
|
||||
|
||||
x_tst = phones.to(device).unsqueeze(0)
|
||||
tones = tones.to(device).unsqueeze(0)
|
||||
lang_ids = lang_ids.to(device).unsqueeze(0)
|
||||
bert = bert.to(device).unsqueeze(0)
|
||||
ja_bert = ja_bert.to(device).unsqueeze(0)
|
||||
en_bert = en_bert.to(device).unsqueeze(0)
|
||||
x_tst_lengths = torch.LongTensor([phones.size(0)]).to(device)
|
||||
style_vec_tensor = torch.from_numpy(style_vector).to(device).unsqueeze(0)
|
||||
|
||||
model = get_net_g(
|
||||
model_file,
|
||||
hyper_parameters.version,
|
||||
device,
|
||||
hyper_parameters,
|
||||
)
|
||||
|
||||
|
||||
def forward(x, x_len, sid, tone, lang, bert, style, length_scale, sdp_ratio):
|
||||
return model.infer(
|
||||
x,
|
||||
x_len,
|
||||
sid,
|
||||
tone,
|
||||
lang,
|
||||
bert,
|
||||
style,
|
||||
sdp_ratio=sdp_ratio,
|
||||
length_scale=length_scale,
|
||||
)
|
||||
|
||||
|
||||
model.forward = forward
|
||||
|
||||
torch.onnx.export(
|
||||
model,
|
||||
(
|
||||
x_tst,
|
||||
x_tst_lengths,
|
||||
torch.LongTensor([0]).to(device),
|
||||
tones,
|
||||
lang_ids,
|
||||
bert,
|
||||
style_vec_tensor,
|
||||
torch.tensor(1.0),
|
||||
torch.tensor(0.0),
|
||||
),
|
||||
f"../models/model_{out_name}.onnx",
|
||||
verbose=True,
|
||||
dynamic_axes={
|
||||
"x_tst": {0: "batch_size", 1: "x_tst_max_length"},
|
||||
"x_tst_lengths": {0: "batch_size"},
|
||||
"sid": {0: "batch_size"},
|
||||
"tones": {0: "batch_size", 1: "x_tst_max_length"},
|
||||
"language": {0: "batch_size", 1: "x_tst_max_length"},
|
||||
"bert": {0: "batch_size", 2: "x_tst_max_length"},
|
||||
"style_vec": {0: "batch_size"},
|
||||
},
|
||||
input_names=[
|
||||
"x_tst",
|
||||
"x_tst_lengths",
|
||||
"sid",
|
||||
"tones",
|
||||
"language",
|
||||
"bert",
|
||||
"style_vec",
|
||||
"length_scale",
|
||||
"sdp_ratio",
|
||||
],
|
||||
output_names=["output"],
|
||||
)
|
||||
os.system(f"onnxsim ../models/model_{out_name}.onnx ../models/model_{out_name}.onnx")
|
||||
onnxfile = open(f"../models/model_{out_name}.onnx", "rb").read()
|
||||
stylefile = open(f"../models/style_vectors_{out_name}.json", "rb").read()
|
||||
version = bytes("1", "utf8")
|
||||
with taropen(f"../models/tmp_{out_name}.sbv2tar", "w") as w:
|
||||
|
||||
def add_tar(f, b):
|
||||
t = TarInfo(f)
|
||||
t.size = len(b)
|
||||
w.addfile(t, BytesIO(b))
|
||||
|
||||
add_tar("version.txt", version)
|
||||
add_tar("model.onnx", onnxfile)
|
||||
add_tar("style_vectors.json", stylefile)
|
||||
open(f"../models/{out_name}.sbv2", "wb").write(
|
||||
ZstdCompressor(threads=-1, level=22).compress(
|
||||
open(f"../models/tmp_{out_name}.sbv2tar", "rb").read()
|
||||
)
|
||||
)
|
||||
os.unlink(f"../models/tmp_{out_name}.sbv2tar")
|
||||
5
scripts/convert/requirements.txt
Normal file
5
scripts/convert/requirements.txt
Normal file
@@ -0,0 +1,5 @@
|
||||
style-bert-vits2
|
||||
onnxsim
|
||||
numpy<2
|
||||
zstandard
|
||||
onnxruntime
|
||||
9
scripts/docker/cpu.Dockerfile
Normal file
9
scripts/docker/cpu.Dockerfile
Normal file
@@ -0,0 +1,9 @@
|
||||
FROM rust AS builder
|
||||
WORKDIR /work
|
||||
COPY . .
|
||||
RUN cargo build -r --bin sbv2_api
|
||||
FROM gcr.io/distroless/cc-debian12
|
||||
WORKDIR /work
|
||||
COPY --from=builder /work/target/release/sbv2_api /work/main
|
||||
COPY --from=builder /work/target/release/*.so /work
|
||||
CMD ["/work/main"]
|
||||
10
scripts/docker/cuda.Dockerfile
Normal file
10
scripts/docker/cuda.Dockerfile
Normal file
@@ -0,0 +1,10 @@
|
||||
FROM rust AS builder
|
||||
WORKDIR /work
|
||||
COPY . .
|
||||
RUN cargo build -r --bin sbv2_api -F cuda,cuda_tf32
|
||||
FROM nvidia/cuda:12.3.2-cudnn9-runtime-ubuntu22.04
|
||||
WORKDIR /work
|
||||
COPY --from=builder /work/target/release/sbv2_api /work/main
|
||||
COPY --from=builder /work/target/release/*.so /work
|
||||
ENV LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/work
|
||||
CMD ["/work/main"]
|
||||
3
scripts/docker/run_cpu.sh
Executable file
3
scripts/docker/run_cpu.sh
Executable file
@@ -0,0 +1,3 @@
|
||||
docker run -it --rm -p 3000:3000 --name sbv2 \
|
||||
-v ./models:/work/models --env-file .env \
|
||||
ghcr.io/tuna2134/sbv2-api:cpu
|
||||
4
scripts/docker/run_cuda.sh
Executable file
4
scripts/docker/run_cuda.sh
Executable file
@@ -0,0 +1,4 @@
|
||||
docker run -it --rm -p 3000:3000 --name sbv2 \
|
||||
-v ./models:/work/models --env-file .env \
|
||||
--gpus all \
|
||||
ghcr.io/tuna2134/sbv2-api:cuda
|
||||
14
scripts/make_dict.sh
Executable file
14
scripts/make_dict.sh
Executable file
@@ -0,0 +1,14 @@
|
||||
#!/bin/bash
|
||||
set -e
|
||||
git clone https://github.com/Aivis-Project/AivisSpeech-Engine ./scripts/tmp --filter=blob:none -n
|
||||
cd ./scripts/tmp
|
||||
git checkout 168b2a1144afe300b0490d9a6dd773ec6e927667 -- resources/dictionaries/*.csv
|
||||
cd ../..
|
||||
rm -rf ./crates/sbv2_core/src/dic
|
||||
cp -r ./scripts/tmp/resources/dictionaries ./crates/sbv2_core/src/dic
|
||||
rm -rf ./scripts/tmp
|
||||
for file in ./crates/sbv2_core/src/dic/0*.csv; do
|
||||
/usr/bin/cat "$file"
|
||||
echo
|
||||
done > ./crates/sbv2_core/src/all.csv
|
||||
lindera build ./crates/sbv2_core/src/all.csv ./crates/sbv2_core/src/dic/all.dic -u -k ipadic
|
||||
180
scripts/sbv2-bindings-colab.ipynb
Normal file
180
scripts/sbv2-bindings-colab.ipynb
Normal file
@@ -0,0 +1,180 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# 音声合成プログラム\n",
|
||||
"\n",
|
||||
"このノートブックでは、`sbv2_bindings` パッケージを使用して音声合成を行います。必要なモデルをダウンロードし、ユーザーが入力したテキストから音声を生成します。音声合成が終わったら、再度テキストの入力を求め、ユーザーが終了するまで繰り返します。"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# 必要なパッケージのインストール\n",
|
||||
"%pip install sbv2_bindings\n",
|
||||
"\n",
|
||||
"# 必要なモジュールのインポート\n",
|
||||
"import os\n",
|
||||
"import urllib.request\n",
|
||||
"import time\n",
|
||||
"from sbv2_bindings import TTSModel"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## モデルのダウンロード\n",
|
||||
"\n",
|
||||
"モデルファイルとトークナイザーをダウンロードします。ユーザーが独自のモデルを使用したい場合は、該当するURLまたはローカルパスを指定してください。"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# モデルの URL またはローカルパスの指定\n",
|
||||
"user_sbv2_model_url = \"\" # カスタムモデルのURLがあればここに指定\n",
|
||||
"user_sbv2_model_path = \"\" # カスタムモデルのローカルパスがあればここに指定\n",
|
||||
"\n",
|
||||
"# モデル用のディレクトリを作成\n",
|
||||
"model_dir = 'models'\n",
|
||||
"os.makedirs(model_dir, exist_ok=True)\n",
|
||||
"\n",
|
||||
"# ダウンロードするファイルの URL\n",
|
||||
"file_urls = [\n",
|
||||
" \"https://huggingface.co/googlefan/sbv2_onnx_models/resolve/main/tokenizer.json\",\n",
|
||||
" \"https://huggingface.co/googlefan/sbv2_onnx_models/resolve/main/deberta.onnx\",\n",
|
||||
"]\n",
|
||||
"\n",
|
||||
"# モデルのパス決定\n",
|
||||
"if user_sbv2_model_path:\n",
|
||||
" sbv2_model_path = user_sbv2_model_path # ローカルモデルのパスを使用\n",
|
||||
"elif user_sbv2_model_url:\n",
|
||||
" sbv2_model_filename = os.path.basename(user_sbv2_model_url)\n",
|
||||
" sbv2_model_path = os.path.join(model_dir, sbv2_model_filename)\n",
|
||||
" file_urls.append(user_sbv2_model_url)\n",
|
||||
"else:\n",
|
||||
" # デフォルトのモデルを使用\n",
|
||||
" sbv2_model_filename = \"tsukuyomi.sbv2\"\n",
|
||||
" sbv2_model_path = os.path.join(model_dir, sbv2_model_filename)\n",
|
||||
" file_urls.append(\"https://huggingface.co/googlefan/sbv2_onnx_models/resolve/main/tsukuyomi.sbv2\")\n",
|
||||
"\n",
|
||||
"# ファイルをダウンロード\n",
|
||||
"for url in file_urls:\n",
|
||||
" file_name = os.path.join(model_dir, os.path.basename(url))\n",
|
||||
" if not os.path.exists(file_name):\n",
|
||||
" print(f\"{file_name} をダウンロードしています...\")\n",
|
||||
" urllib.request.urlretrieve(url, file_name)\n",
|
||||
" else:\n",
|
||||
" print(f\"{file_name} は既に存在します。\")\n",
|
||||
"\n",
|
||||
"# ダウンロードまたは使用するファイルを確認\n",
|
||||
"print(\"\\n使用するファイル:\")\n",
|
||||
"for file in os.listdir(model_dir):\n",
|
||||
" print(file)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## モデルの読み込みと音声合成\n",
|
||||
"\n",
|
||||
"モデルを読み込み、ユーザーが入力したテキストから音声を生成します。話者名は使用する `.sbv2` ファイル名から自動的に取得します。音声合成が終わったら、再度テキストの入力を求め、ユーザーが終了するまで繰り返します。"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# 音声合成の実行\n",
|
||||
"def main():\n",
|
||||
" try:\n",
|
||||
" print(\"\\nモデルを読み込んでいます...\")\n",
|
||||
" model = TTSModel.from_path(\n",
|
||||
" os.path.join(model_dir, \"deberta.onnx\"),\n",
|
||||
" os.path.join(model_dir, \"tokenizer.json\")\n",
|
||||
" )\n",
|
||||
" print(\"モデルの読み込みが完了しました!\")\n",
|
||||
" except Exception as e:\n",
|
||||
" print(f\"モデルの読み込みに失敗しました: {e}\")\n",
|
||||
" return\n",
|
||||
"\n",
|
||||
" # 話者名を取得(.sbv2 ファイル名の拡張子を除いた部分)\n",
|
||||
" speaker_name = os.path.splitext(os.path.basename(sbv2_model_path))[0]\n",
|
||||
" \n",
|
||||
" # 指定されたモデルのパスを使用\n",
|
||||
" try:\n",
|
||||
" model.load_sbv2file_from_path(speaker_name, sbv2_model_path)\n",
|
||||
" print(f\"話者 '{speaker_name}' のセットアップが完了しました!\")\n",
|
||||
" except Exception as e:\n",
|
||||
" print(f\"SBV2ファイルの読み込みに失敗しました: {e}\")\n",
|
||||
" return\n",
|
||||
"\n",
|
||||
" # 音声合成を繰り返し実行\n",
|
||||
" while True:\n",
|
||||
" # 合成したいテキストをユーザーから入力\n",
|
||||
" user_input = input(\"\\n音声合成したいテキストを入力してください(終了するには 'exit' と入力): \")\n",
|
||||
" \n",
|
||||
" if user_input.strip().lower() == 'exit':\n",
|
||||
" print(\"音声合成を終了します。\")\n",
|
||||
" break\n",
|
||||
"\n",
|
||||
" # 出力ファイル名\n",
|
||||
" output_file = \"output.wav\"\n",
|
||||
"\n",
|
||||
" # 音声合成を実行\n",
|
||||
" try:\n",
|
||||
" print(\"\\n音声合成を開始します...\")\n",
|
||||
" start_time = time.time()\n",
|
||||
"\n",
|
||||
" audio_data = model.synthesize(user_input, speaker_name, 0, 0.0, 1)\n",
|
||||
"\n",
|
||||
" with open(output_file, \"wb\") as f:\n",
|
||||
" f.write(audio_data)\n",
|
||||
"\n",
|
||||
" end_time = time.time()\n",
|
||||
" elapsed_time = end_time - start_time\n",
|
||||
"\n",
|
||||
" print(f\"\\n音声が '{output_file}' に保存されました。\")\n",
|
||||
" print(f\"音声合成にかかった時間: {elapsed_time:.2f} 秒\")\n",
|
||||
" except Exception as e:\n",
|
||||
" print(f\"音声合成に失敗しました: {e}\")\n",
|
||||
"\n",
|
||||
"if __name__ == \"__main__\":\n",
|
||||
" main()"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.x"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 4
|
||||
}
|
||||
8
scripts/sbv2-test-api.py
Normal file
8
scripts/sbv2-test-api.py
Normal file
@@ -0,0 +1,8 @@
|
||||
import requests
|
||||
|
||||
res = requests.post(
|
||||
"http://localhost:3000/synthesize",
|
||||
json={"text": "おはようございます", "ident": "tsukuyomi"},
|
||||
)
|
||||
with open("output.wav", "wb") as f:
|
||||
f.write(res.content)
|
||||
20
scripts/sbv2-test-bindings.py
Normal file
20
scripts/sbv2-test-bindings.py
Normal file
@@ -0,0 +1,20 @@
|
||||
from sbv2_bindings import TTSModel
|
||||
|
||||
|
||||
def main():
|
||||
print("Loading models...")
|
||||
model = TTSModel.from_path("./models/debert.onnx", "./models/tokenizer.json")
|
||||
print("Models loaded!")
|
||||
|
||||
model.load_sbv2file_from_path("amitaro", "./models/amitaro.sbv2")
|
||||
print("All setup is done!")
|
||||
|
||||
style_vector = model.get_style_vector("amitaro", 0, 1.0)
|
||||
with open("output.wav", "wb") as f:
|
||||
f.write(
|
||||
model.synthesize("おはようございます。", "amitaro", style_vector, 0.0, 0.5)
|
||||
)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user