This commit is contained in:
Googlefan
2025-02-22 08:00:17 +00:00
parent 14d631eeaa
commit 506ee4d883
60 changed files with 927 additions and 517 deletions

36
scripts/convert/README.md Normal file
View File

@@ -0,0 +1,36 @@
# 変換方法
## 初心者向け準備
わかる人は飛ばしてください。
1. pythonを入れます。3.11.8で動作確認をしていますが、最近のバージョンなら大体動くはずです。
4. `cd convert`
3. `python -m venv venv`
4. `source venv/bin/activate`
5. `pip install -r requirements.txt`
## モデル変換
1. 変換したいモデルの`.safetensors`で終わるファイルの位置を特定してください。
2. 同様に`config.json``style_vectors.npy`というファイルを探してください。
3. 以下のコマンドを実行します。
```sh
python convert_model.py --style_file "ここにstyle_vectors.npyの場所" --config_file "同様にconfig.json場所" --model_file "同様に.safetensorsで終わるファイルの場所"
```
4. `models/名前.sbv2`というファイルが出力されます。GUI版のモデルファイルに入れてあげたら使えます。
## Deberta変換
意味が分からないならおそらく変換しなくてもいいってことです。
venvを用意し、requirementsを入れて、`python convert_model.py`を実行するだけです。
`models/deberta.onnx``models/tokenizer.json`が出力されたら成功です。

View File

@@ -0,0 +1,50 @@
from transformers.convert_slow_tokenizer import BertConverter
from style_bert_vits2.nlp import bert_models
from style_bert_vits2.constants import Languages
from transformers import AutoModelForMaskedLM, AutoTokenizer
import torch
from torch import nn
from argparse import ArgumentParser
parser = ArgumentParser()
parser.add_argument("--model", default="ku-nlp/deberta-v2-large-japanese-char-wwm")
args = parser.parse_args()
model_name = args.model
bert_models.load_tokenizer(Languages.JP, model_name)
tokenizer = bert_models.load_tokenizer(Languages.JP)
converter = BertConverter(tokenizer)
tokenizer = converter.converted()
tokenizer.save("../models/tokenizer.json")
class ORTDeberta(nn.Module):
def __init__(self, model_name):
super(ORTDeberta, self).__init__()
self.model = AutoModelForMaskedLM.from_pretrained(model_name)
def forward(self, input_ids, token_type_ids, attention_mask):
inputs = {
"input_ids": input_ids,
"token_type_ids": token_type_ids,
"attention_mask": attention_mask,
}
res = self.model(**inputs, output_hidden_states=True)
res = torch.cat(res["hidden_states"][-3:-2], -1)[0].cpu()
return res
model = ORTDeberta(model_name)
inputs = AutoTokenizer.from_pretrained(model_name)(
"今日はいい天気ですね", return_tensors="pt"
)
torch.onnx.export(
model,
(inputs["input_ids"], inputs["token_type_ids"], inputs["attention_mask"]),
"../models/deberta.onnx",
input_names=["input_ids", "token_type_ids", "attention_mask"],
output_names=["output"],
verbose=True,
dynamic_axes={"input_ids": {1: "batch_size"}, "attention_mask": {1: "batch_size"}},
)

View File

@@ -0,0 +1,169 @@
import numpy as np
import json
from io import BytesIO
from style_bert_vits2.nlp import bert_models
from style_bert_vits2.constants import Languages
from style_bert_vits2.models.infer import get_net_g, get_text
from style_bert_vits2.models.hyper_parameters import HyperParameters
import torch
from style_bert_vits2.constants import (
DEFAULT_ASSIST_TEXT_WEIGHT,
DEFAULT_STYLE,
DEFAULT_STYLE_WEIGHT,
Languages,
)
import os
from tarfile import open as taropen, TarInfo
from zstandard import ZstdCompressor
from style_bert_vits2.tts_model import TTSModel
import numpy as np
from argparse import ArgumentParser
parser = ArgumentParser()
parser.add_argument("--style_file", required=True)
parser.add_argument("--config_file", required=True)
parser.add_argument("--model_file", required=True)
args = parser.parse_args()
style_file = args.style_file
config_file = args.config_file
model_file = args.model_file
bert_models.load_model(Languages.JP, "ku-nlp/deberta-v2-large-japanese-char-wwm")
bert_models.load_tokenizer(Languages.JP, "ku-nlp/deberta-v2-large-japanese-char-wwm")
array = np.load(style_file)
data = array.tolist()
hyper_parameters = HyperParameters.load_from_json(config_file)
out_name = hyper_parameters.model_name
with open(f"../models/style_vectors_{out_name}.json", "w") as f:
json.dump(
{
"data": data,
"shape": array.shape,
},
f,
)
text = "今日はいい天気ですね。"
bert, ja_bert, en_bert, phones, tones, lang_ids = get_text(
text,
Languages.JP,
hyper_parameters,
"cpu",
assist_text=None,
assist_text_weight=DEFAULT_ASSIST_TEXT_WEIGHT,
given_phone=None,
given_tone=None,
)
tts_model = TTSModel(
model_path=model_file,
config_path=config_file,
style_vec_path=style_file,
device="cpu",
)
device = "cpu"
style_id = tts_model.style2id[DEFAULT_STYLE]
def get_style_vector(style_id, weight):
style_vectors = np.load(style_file)
mean = style_vectors[0]
style_vec = style_vectors[style_id]
style_vec = mean + (style_vec - mean) * weight
return style_vec
style_vector = get_style_vector(style_id, DEFAULT_STYLE_WEIGHT)
x_tst = phones.to(device).unsqueeze(0)
tones = tones.to(device).unsqueeze(0)
lang_ids = lang_ids.to(device).unsqueeze(0)
bert = bert.to(device).unsqueeze(0)
ja_bert = ja_bert.to(device).unsqueeze(0)
en_bert = en_bert.to(device).unsqueeze(0)
x_tst_lengths = torch.LongTensor([phones.size(0)]).to(device)
style_vec_tensor = torch.from_numpy(style_vector).to(device).unsqueeze(0)
model = get_net_g(
model_file,
hyper_parameters.version,
device,
hyper_parameters,
)
def forward(x, x_len, sid, tone, lang, bert, style, length_scale, sdp_ratio):
return model.infer(
x,
x_len,
sid,
tone,
lang,
bert,
style,
sdp_ratio=sdp_ratio,
length_scale=length_scale,
)
model.forward = forward
torch.onnx.export(
model,
(
x_tst,
x_tst_lengths,
torch.LongTensor([0]).to(device),
tones,
lang_ids,
bert,
style_vec_tensor,
torch.tensor(1.0),
torch.tensor(0.0),
),
f"../models/model_{out_name}.onnx",
verbose=True,
dynamic_axes={
"x_tst": {0: "batch_size", 1: "x_tst_max_length"},
"x_tst_lengths": {0: "batch_size"},
"sid": {0: "batch_size"},
"tones": {0: "batch_size", 1: "x_tst_max_length"},
"language": {0: "batch_size", 1: "x_tst_max_length"},
"bert": {0: "batch_size", 2: "x_tst_max_length"},
"style_vec": {0: "batch_size"},
},
input_names=[
"x_tst",
"x_tst_lengths",
"sid",
"tones",
"language",
"bert",
"style_vec",
"length_scale",
"sdp_ratio",
],
output_names=["output"],
)
os.system(f"onnxsim ../models/model_{out_name}.onnx ../models/model_{out_name}.onnx")
onnxfile = open(f"../models/model_{out_name}.onnx", "rb").read()
stylefile = open(f"../models/style_vectors_{out_name}.json", "rb").read()
version = bytes("1", "utf8")
with taropen(f"../models/tmp_{out_name}.sbv2tar", "w") as w:
def add_tar(f, b):
t = TarInfo(f)
t.size = len(b)
w.addfile(t, BytesIO(b))
add_tar("version.txt", version)
add_tar("model.onnx", onnxfile)
add_tar("style_vectors.json", stylefile)
open(f"../models/{out_name}.sbv2", "wb").write(
ZstdCompressor(threads=-1, level=22).compress(
open(f"../models/tmp_{out_name}.sbv2tar", "rb").read()
)
)
os.unlink(f"../models/tmp_{out_name}.sbv2tar")

View File

@@ -0,0 +1,5 @@
style-bert-vits2
onnxsim
numpy<2
zstandard
onnxruntime