mirror of
https://github.com/neodyland/sbv2-api.git
synced 2025-12-22 23:49:58 +00:00
initial commit: voicevox
This commit is contained in:
5
Cargo.lock
generated
5
Cargo.lock
generated
@@ -2319,6 +2319,7 @@ dependencies = [
|
||||
"anyhow",
|
||||
"axum",
|
||||
"sbv2_core",
|
||||
"tokio",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -2741,9 +2742,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "tokio"
|
||||
version = "1.43.0"
|
||||
version = "1.44.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "3d61fa4ffa3de412bfea335c6ecff681de2b609ba3c77ef3e00e521813a9ed9e"
|
||||
checksum = "f382da615b842244d4b8738c82ed1275e6c5dd90c459a30941cd07080b06c91a"
|
||||
dependencies = [
|
||||
"backtrace",
|
||||
"bytes",
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
use std::io::Cursor;
|
||||
|
||||
use crate::error::Result;
|
||||
use crate::jtalk::JTalkProcess;
|
||||
use crate::{jtalk, nlp, norm, tokenizer, utils};
|
||||
use hound::{SampleFormat, WavSpec, WavWriter};
|
||||
use ndarray::{concatenate, s, Array, Array1, Array2, Array3, Axis};
|
||||
@@ -9,13 +10,13 @@ use tokenizers::Tokenizer;
|
||||
pub fn preprocess_parse_text(
|
||||
text: &str,
|
||||
jtalk: &jtalk::JTalk,
|
||||
) -> Result<(Vec<String>, Vec<i32>, Vec<i32>)> {
|
||||
) -> Result<(Vec<String>, Vec<i32>, Vec<i32>, String, JTalkProcess)> {
|
||||
let text = jtalk.num2word(text)?;
|
||||
let normalized_text = norm::normalize_text(&text);
|
||||
|
||||
let process = jtalk.process_text(&normalized_text)?;
|
||||
let result = process.g2p()?;
|
||||
Ok(result)
|
||||
let (phones, tones, word2ph) = process.g2p()?;
|
||||
Ok((phones, tones, word2ph, normalized_text, process))
|
||||
}
|
||||
|
||||
/// Parse text and return the input for synthesize
|
||||
@@ -34,7 +35,8 @@ pub async fn parse_text(
|
||||
Box<dyn std::future::Future<Output = Result<ndarray::Array2<f32>>>>,
|
||||
>,
|
||||
) -> Result<(Array2<f32>, Array1<i64>, Array1<i64>, Array1<i64>)> {
|
||||
let (phones, tones, mut word2ph) = preprocess_parse_text(text, jtalk)?;
|
||||
let (phones, tones, mut word2ph, normalized_text, process) =
|
||||
preprocess_parse_text(text, jtalk)?;
|
||||
let (phones, tones, lang_ids) = nlp::cleaned_text_to_sequence(phones, tones);
|
||||
|
||||
let phones = utils::intersperse(&phones, 0);
|
||||
|
||||
@@ -12,3 +12,4 @@ documentation.workspace = true
|
||||
anyhow.workspace = true
|
||||
axum = "0.8.1"
|
||||
sbv2_core = { version = "0.2.0-alpha6", path = "../sbv2_core" }
|
||||
tokio = { version = "1.44.1", features = ["full"] }
|
||||
|
||||
@@ -1,5 +1,10 @@
|
||||
use axum::{routing::get, Router};
|
||||
use tokio::net::TcpListener;
|
||||
|
||||
#[tokio::main]
|
||||
async fn main() -> anyhow::Result<()> {
|
||||
println!("Hello, world!");
|
||||
let app = Router::new().route("/", get(|| async { "Hello, world!" }));
|
||||
let listener = TcpListener::bind("0.0.0.0:8080").await?;
|
||||
axum::serve(listener, app).await?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user