initial commit

This commit is contained in:
tuna2134
2024-09-12 12:12:21 +00:00
parent 38392f2e23
commit 2a65627414
8 changed files with 82 additions and 4 deletions

3
Cargo.lock generated
View File

@@ -1834,13 +1834,14 @@ name = "sbv2_bindings"
version = "0.1.0"
dependencies = [
"anyhow",
"ndarray",
"pyo3",
"sbv2_core",
]
[[package]]
name = "sbv2_core"
version = "0.1.1"
version = "0.1.2"
dependencies = [
"anyhow",
"dotenvy",

View File

@@ -6,3 +6,4 @@ members = ["sbv2_api", "sbv2_core", "sbv2_bindings"]
anyhow = "1.0.86"
dotenvy = "0.15.7"
env_logger = "0.11.5"
ndarray = "0.16.1"

View File

@@ -10,5 +10,6 @@ crate-type = ["cdylib"]
[dependencies]
anyhow.workspace = true
ndarray.workspace = true
pyo3 = { version = "0.22.0", features = ["anyhow"] }
sbv2_core = { version = "0.1.0", path = "../sbv2_core" }

View File

@@ -0,0 +1,21 @@
from sbv2_bindings import TTSModel
def main():
with open("../models/debert.onnx", "rb") as f:
bert = f.read()
with open("../models/tokenizer.json", "rb") as f:
tokenizer = f.read()
model = TTSModel(bert, tokenizer)
with open("../models/amitaro.sbv2", "rb") as f:
model.load_sbv2file(f.read())
style_vector = model.get_style_vector("amitaro", 0, 1.0)
with open("output.wav", "wb") as f:
f.write(model.synthesize("こんにちは", "amitaro", style_vector, 0.0, 0.5))
if __name__ == "__main__":
main()

View File

@@ -1,5 +1,6 @@
use pyo3::prelude::*;
mod sbv2;
pub mod style;
/// Formats the sum of two numbers as string.
#[pyfunction]

View File

@@ -1,6 +1,8 @@
use pyo3::prelude::*;
use sbv2_core::tts::TTSModelHolder;
use crate::style::StyleVector;
#[pyclass]
pub struct TTSModel {
pub model: TTSModelHolder,
@@ -15,5 +17,40 @@ impl TTSModel {
})
}
fn load()
fn load_sbv2file(&mut self, ident: String, sbv2file_bytes: Vec<u8>) -> anyhow::Result<()> {
self.model.load_sbv2file(ident, sbv2file_bytes)?;
Ok(())
}
fn get_style_vector(
&self,
ident: String,
style_id: i32,
weight: f32,
) -> anyhow::Result<StyleVector> {
Ok(StyleVector::new(
self.model.get_style_vector(ident, style_id, weight)?,
))
}
fn synthesize(
&self,
text: String,
ident: String,
style_vector: StyleVector,
sdp_ratio: f32,
length_scale: f32,
) -> anyhow::Result<Vec<u8>> {
let (bert_ori, phones, tones, lang_ids) = self.model.parse_text(&text)?;
Ok(self.model.synthesize(
ident,
bert_ori,
phones,
tones,
lang_ids,
style_vector.get(),
sdp_ratio,
length_scale,
)?)
}
}

View File

@@ -0,0 +1,16 @@
use ndarray::Array1;
use pyo3::prelude::*;
#[pyclass]
#[derive(Clone)]
pub struct StyleVector(Array1<f32>);
impl StyleVector {
pub fn new(data: Array1<f32>) -> Self {
StyleVector(data)
}
pub fn get(&self) -> Array1<f32> {
self.0.clone()
}
}

View File

@@ -14,7 +14,7 @@ dotenvy.workspace = true
env_logger.workspace = true
hound = "3.5.1"
jpreprocess = { version = "0.10.0", features = ["naist-jdic"] }
ndarray = "0.16.1"
ndarray.workspace = true
num_cpus = "1.16.0"
once_cell = "1.19.0"
ort = { git = "https://github.com/pykeio/ort.git", version = "2.0.0-rc.6" }