split the code for support voicevox

This commit is contained in:
Masato Kikuchi
2025-03-26 15:14:22 +09:00
parent 99a4b130af
commit fc944b9d33
6 changed files with 45 additions and 6 deletions

View File

@@ -5,6 +5,19 @@ use crate::{jtalk, nlp, norm, tokenizer, utils};
use hound::{SampleFormat, WavSpec, WavWriter};
use ndarray::{concatenate, s, Array, Array1, Array2, Array3, Axis};
use tokenizers::Tokenizer;
pub fn preprocess_parse_text(
text: &str,
jtalk: &jtalk::JTalk,
) -> Result<(Vec<String>, Vec<i32>, Vec<i32>)> {
let text = jtalk.num2word(text)?;
let normalized_text = norm::normalize_text(&text);
let process = jtalk.process_text(&normalized_text)?;
let result = process.g2p()?;
Ok(result)
}
/// Parse text and return the input for synthesize
///
/// # Note
@@ -21,11 +34,7 @@ pub async fn parse_text(
Box<dyn std::future::Future<Output = Result<ndarray::Array2<f32>>>>,
>,
) -> Result<(Array2<f32>, Array1<i64>, Array1<i64>, Array1<i64>)> {
let text = jtalk.num2word(text)?;
let normalized_text = norm::normalize_text(&text);
let process = jtalk.process_text(&normalized_text)?;
let (phones, tones, mut word2ph) = process.g2p()?;
let (phones, tones, mut word2ph) = preprocess_parse_text(text, jtalk)?;
let (phones, tones, lang_ids) = nlp::cleaned_text_to_sequence(phones, tones);
let phones = utils::intersperse(&phones, 0);

View File

@@ -0,0 +1,14 @@
[package]
name = "sbv2_voicevox"
version.workspace = true
edition.workspace = true
description.workspace = true
license.workspace = true
readme.workspace = true
repository.workspace = true
documentation.workspace = true
[dependencies]
anyhow.workspace = true
axum = "0.8.1"
sbv2_core = { version = "0.2.0-alpha6", path = "../sbv2_core" }

View File

@@ -0,0 +1,2 @@
# sbv2-voicevox
sbv2-apiをvoicevox化します。

View File

@@ -0,0 +1,5 @@
async fn main() -> anyhow::Result<()> {
println!("Hello, world!");
Ok(())
}