mirror of
https://github.com/neodyland/sbv2-api.git
synced 2026-01-06 14:32:57 +00:00
initial commit
This commit is contained in:
3
Cargo.lock
generated
3
Cargo.lock
generated
@@ -1834,13 +1834,14 @@ name = "sbv2_bindings"
|
|||||||
version = "0.1.0"
|
version = "0.1.0"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"anyhow",
|
"anyhow",
|
||||||
|
"ndarray",
|
||||||
"pyo3",
|
"pyo3",
|
||||||
"sbv2_core",
|
"sbv2_core",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "sbv2_core"
|
name = "sbv2_core"
|
||||||
version = "0.1.1"
|
version = "0.1.2"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"anyhow",
|
"anyhow",
|
||||||
"dotenvy",
|
"dotenvy",
|
||||||
|
|||||||
@@ -6,3 +6,4 @@ members = ["sbv2_api", "sbv2_core", "sbv2_bindings"]
|
|||||||
anyhow = "1.0.86"
|
anyhow = "1.0.86"
|
||||||
dotenvy = "0.15.7"
|
dotenvy = "0.15.7"
|
||||||
env_logger = "0.11.5"
|
env_logger = "0.11.5"
|
||||||
|
ndarray = "0.16.1"
|
||||||
@@ -10,5 +10,6 @@ crate-type = ["cdylib"]
|
|||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
anyhow.workspace = true
|
anyhow.workspace = true
|
||||||
|
ndarray.workspace = true
|
||||||
pyo3 = { version = "0.22.0", features = ["anyhow"] }
|
pyo3 = { version = "0.22.0", features = ["anyhow"] }
|
||||||
sbv2_core = { version = "0.1.0", path = "../sbv2_core" }
|
sbv2_core = { version = "0.1.0", path = "../sbv2_core" }
|
||||||
|
|||||||
21
sbv2_bindings/examples/basic.py
Normal file
21
sbv2_bindings/examples/basic.py
Normal file
@@ -0,0 +1,21 @@
|
|||||||
|
from sbv2_bindings import TTSModel
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
with open("../models/debert.onnx", "rb") as f:
|
||||||
|
bert = f.read()
|
||||||
|
with open("../models/tokenizer.json", "rb") as f:
|
||||||
|
tokenizer = f.read()
|
||||||
|
|
||||||
|
model = TTSModel(bert, tokenizer)
|
||||||
|
|
||||||
|
with open("../models/amitaro.sbv2", "rb") as f:
|
||||||
|
model.load_sbv2file(f.read())
|
||||||
|
|
||||||
|
style_vector = model.get_style_vector("amitaro", 0, 1.0)
|
||||||
|
with open("output.wav", "wb") as f:
|
||||||
|
f.write(model.synthesize("こんにちは", "amitaro", style_vector, 0.0, 0.5))
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
@@ -1,5 +1,6 @@
|
|||||||
use pyo3::prelude::*;
|
use pyo3::prelude::*;
|
||||||
mod sbv2;
|
mod sbv2;
|
||||||
|
pub mod style;
|
||||||
|
|
||||||
/// Formats the sum of two numbers as string.
|
/// Formats the sum of two numbers as string.
|
||||||
#[pyfunction]
|
#[pyfunction]
|
||||||
|
|||||||
@@ -1,6 +1,8 @@
|
|||||||
use pyo3::prelude::*;
|
use pyo3::prelude::*;
|
||||||
use sbv2_core::tts::TTSModelHolder;
|
use sbv2_core::tts::TTSModelHolder;
|
||||||
|
|
||||||
|
use crate::style::StyleVector;
|
||||||
|
|
||||||
#[pyclass]
|
#[pyclass]
|
||||||
pub struct TTSModel {
|
pub struct TTSModel {
|
||||||
pub model: TTSModelHolder,
|
pub model: TTSModelHolder,
|
||||||
@@ -15,5 +17,40 @@ impl TTSModel {
|
|||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
fn load()
|
fn load_sbv2file(&mut self, ident: String, sbv2file_bytes: Vec<u8>) -> anyhow::Result<()> {
|
||||||
|
self.model.load_sbv2file(ident, sbv2file_bytes)?;
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
fn get_style_vector(
|
||||||
|
&self,
|
||||||
|
ident: String,
|
||||||
|
style_id: i32,
|
||||||
|
weight: f32,
|
||||||
|
) -> anyhow::Result<StyleVector> {
|
||||||
|
Ok(StyleVector::new(
|
||||||
|
self.model.get_style_vector(ident, style_id, weight)?,
|
||||||
|
))
|
||||||
|
}
|
||||||
|
|
||||||
|
fn synthesize(
|
||||||
|
&self,
|
||||||
|
text: String,
|
||||||
|
ident: String,
|
||||||
|
style_vector: StyleVector,
|
||||||
|
sdp_ratio: f32,
|
||||||
|
length_scale: f32,
|
||||||
|
) -> anyhow::Result<Vec<u8>> {
|
||||||
|
let (bert_ori, phones, tones, lang_ids) = self.model.parse_text(&text)?;
|
||||||
|
Ok(self.model.synthesize(
|
||||||
|
ident,
|
||||||
|
bert_ori,
|
||||||
|
phones,
|
||||||
|
tones,
|
||||||
|
lang_ids,
|
||||||
|
style_vector.get(),
|
||||||
|
sdp_ratio,
|
||||||
|
length_scale,
|
||||||
|
)?)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
16
sbv2_bindings/src/style.rs
Normal file
16
sbv2_bindings/src/style.rs
Normal file
@@ -0,0 +1,16 @@
|
|||||||
|
use ndarray::Array1;
|
||||||
|
use pyo3::prelude::*;
|
||||||
|
|
||||||
|
#[pyclass]
|
||||||
|
#[derive(Clone)]
|
||||||
|
pub struct StyleVector(Array1<f32>);
|
||||||
|
|
||||||
|
impl StyleVector {
|
||||||
|
pub fn new(data: Array1<f32>) -> Self {
|
||||||
|
StyleVector(data)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn get(&self) -> Array1<f32> {
|
||||||
|
self.0.clone()
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -14,7 +14,7 @@ dotenvy.workspace = true
|
|||||||
env_logger.workspace = true
|
env_logger.workspace = true
|
||||||
hound = "3.5.1"
|
hound = "3.5.1"
|
||||||
jpreprocess = { version = "0.10.0", features = ["naist-jdic"] }
|
jpreprocess = { version = "0.10.0", features = ["naist-jdic"] }
|
||||||
ndarray = "0.16.1"
|
ndarray.workspace = true
|
||||||
num_cpus = "1.16.0"
|
num_cpus = "1.16.0"
|
||||||
once_cell = "1.19.0"
|
once_cell = "1.19.0"
|
||||||
ort = { git = "https://github.com/pykeio/ort.git", version = "2.0.0-rc.6" }
|
ort = { git = "https://github.com/pykeio/ort.git", version = "2.0.0-rc.6" }
|
||||||
|
|||||||
Reference in New Issue
Block a user