mirror of
https://github.com/neodyland/sbv2-api.git
synced 2025-12-22 23:49:58 +00:00
initial commit
This commit is contained in:
3
Cargo.lock
generated
3
Cargo.lock
generated
@@ -1834,13 +1834,14 @@ name = "sbv2_bindings"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"ndarray",
|
||||
"pyo3",
|
||||
"sbv2_core",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "sbv2_core"
|
||||
version = "0.1.1"
|
||||
version = "0.1.2"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"dotenvy",
|
||||
|
||||
@@ -6,3 +6,4 @@ members = ["sbv2_api", "sbv2_core", "sbv2_bindings"]
|
||||
anyhow = "1.0.86"
|
||||
dotenvy = "0.15.7"
|
||||
env_logger = "0.11.5"
|
||||
ndarray = "0.16.1"
|
||||
@@ -10,5 +10,6 @@ crate-type = ["cdylib"]
|
||||
|
||||
[dependencies]
|
||||
anyhow.workspace = true
|
||||
ndarray.workspace = true
|
||||
pyo3 = { version = "0.22.0", features = ["anyhow"] }
|
||||
sbv2_core = { version = "0.1.0", path = "../sbv2_core" }
|
||||
|
||||
21
sbv2_bindings/examples/basic.py
Normal file
21
sbv2_bindings/examples/basic.py
Normal file
@@ -0,0 +1,21 @@
|
||||
from sbv2_bindings import TTSModel
|
||||
|
||||
|
||||
def main():
|
||||
with open("../models/debert.onnx", "rb") as f:
|
||||
bert = f.read()
|
||||
with open("../models/tokenizer.json", "rb") as f:
|
||||
tokenizer = f.read()
|
||||
|
||||
model = TTSModel(bert, tokenizer)
|
||||
|
||||
with open("../models/amitaro.sbv2", "rb") as f:
|
||||
model.load_sbv2file(f.read())
|
||||
|
||||
style_vector = model.get_style_vector("amitaro", 0, 1.0)
|
||||
with open("output.wav", "wb") as f:
|
||||
f.write(model.synthesize("こんにちは", "amitaro", style_vector, 0.0, 0.5))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -1,5 +1,6 @@
|
||||
use pyo3::prelude::*;
|
||||
mod sbv2;
|
||||
pub mod style;
|
||||
|
||||
/// Formats the sum of two numbers as string.
|
||||
#[pyfunction]
|
||||
|
||||
@@ -1,6 +1,8 @@
|
||||
use pyo3::prelude::*;
|
||||
use sbv2_core::tts::TTSModelHolder;
|
||||
|
||||
use crate::style::StyleVector;
|
||||
|
||||
#[pyclass]
|
||||
pub struct TTSModel {
|
||||
pub model: TTSModelHolder,
|
||||
@@ -15,5 +17,40 @@ impl TTSModel {
|
||||
})
|
||||
}
|
||||
|
||||
fn load()
|
||||
fn load_sbv2file(&mut self, ident: String, sbv2file_bytes: Vec<u8>) -> anyhow::Result<()> {
|
||||
self.model.load_sbv2file(ident, sbv2file_bytes)?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn get_style_vector(
|
||||
&self,
|
||||
ident: String,
|
||||
style_id: i32,
|
||||
weight: f32,
|
||||
) -> anyhow::Result<StyleVector> {
|
||||
Ok(StyleVector::new(
|
||||
self.model.get_style_vector(ident, style_id, weight)?,
|
||||
))
|
||||
}
|
||||
|
||||
fn synthesize(
|
||||
&self,
|
||||
text: String,
|
||||
ident: String,
|
||||
style_vector: StyleVector,
|
||||
sdp_ratio: f32,
|
||||
length_scale: f32,
|
||||
) -> anyhow::Result<Vec<u8>> {
|
||||
let (bert_ori, phones, tones, lang_ids) = self.model.parse_text(&text)?;
|
||||
Ok(self.model.synthesize(
|
||||
ident,
|
||||
bert_ori,
|
||||
phones,
|
||||
tones,
|
||||
lang_ids,
|
||||
style_vector.get(),
|
||||
sdp_ratio,
|
||||
length_scale,
|
||||
)?)
|
||||
}
|
||||
}
|
||||
16
sbv2_bindings/src/style.rs
Normal file
16
sbv2_bindings/src/style.rs
Normal file
@@ -0,0 +1,16 @@
|
||||
use ndarray::Array1;
|
||||
use pyo3::prelude::*;
|
||||
|
||||
#[pyclass]
|
||||
#[derive(Clone)]
|
||||
pub struct StyleVector(Array1<f32>);
|
||||
|
||||
impl StyleVector {
|
||||
pub fn new(data: Array1<f32>) -> Self {
|
||||
StyleVector(data)
|
||||
}
|
||||
|
||||
pub fn get(&self) -> Array1<f32> {
|
||||
self.0.clone()
|
||||
}
|
||||
}
|
||||
@@ -14,7 +14,7 @@ dotenvy.workspace = true
|
||||
env_logger.workspace = true
|
||||
hound = "3.5.1"
|
||||
jpreprocess = { version = "0.10.0", features = ["naist-jdic"] }
|
||||
ndarray = "0.16.1"
|
||||
ndarray.workspace = true
|
||||
num_cpus = "1.16.0"
|
||||
once_cell = "1.19.0"
|
||||
ort = { git = "https://github.com/pykeio/ort.git", version = "2.0.0-rc.6" }
|
||||
|
||||
Reference in New Issue
Block a user