From fc944b9d33c0fa58f95692cf2e89e44572b751ae Mon Sep 17 00:00:00 2001 From: Masato Kikuchi Date: Wed, 26 Mar 2025 15:14:22 +0900 Subject: [PATCH] split the code for support voicevox --- Cargo.lock | 9 +++++++++ Cargo.toml | 2 +- crates/sbv2_core/src/tts_util.rs | 19 ++++++++++++++----- crates/sbv2_voicevox/Cargo.toml | 14 ++++++++++++++ crates/sbv2_voicevox/README.md | 2 ++ crates/sbv2_voicevox/src/main.rs | 5 +++++ 6 files changed, 45 insertions(+), 6 deletions(-) create mode 100644 crates/sbv2_voicevox/Cargo.toml create mode 100644 crates/sbv2_voicevox/README.md create mode 100644 crates/sbv2_voicevox/src/main.rs diff --git a/Cargo.lock b/Cargo.lock index 3245a45..1d69f54 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2312,6 +2312,15 @@ dependencies = [ "zstd", ] +[[package]] +name = "sbv2_voicevox" +version = "0.2.0-alpha6" +dependencies = [ + "anyhow", + "axum", + "sbv2_core", +] + [[package]] name = "sbv2_wasm" version = "0.2.0-alpha6" diff --git a/Cargo.toml b/Cargo.toml index 240b6e0..fcc17c0 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [workspace] resolver = "2" -members = ["./crates/sbv2_api", "./crates/sbv2_core", "./crates/sbv2_bindings", "./crates/sbv2_wasm"] +members = ["./crates/sbv2_api", "./crates/sbv2_core", "./crates/sbv2_bindings", "./crates/sbv2_wasm", "crates/sbv2_voicevox"] [workspace.package] version = "0.2.0-alpha6" diff --git a/crates/sbv2_core/src/tts_util.rs b/crates/sbv2_core/src/tts_util.rs index 24b059a..8cab20d 100644 --- a/crates/sbv2_core/src/tts_util.rs +++ b/crates/sbv2_core/src/tts_util.rs @@ -5,6 +5,19 @@ use crate::{jtalk, nlp, norm, tokenizer, utils}; use hound::{SampleFormat, WavSpec, WavWriter}; use ndarray::{concatenate, s, Array, Array1, Array2, Array3, Axis}; use tokenizers::Tokenizer; + +pub fn preprocess_parse_text( + text: &str, + jtalk: &jtalk::JTalk, +) -> Result<(Vec, Vec, Vec)> { + let text = jtalk.num2word(text)?; + let normalized_text = norm::normalize_text(&text); + + let process = jtalk.process_text(&normalized_text)?; + let result = process.g2p()?; + Ok(result) +} + /// Parse text and return the input for synthesize /// /// # Note @@ -21,11 +34,7 @@ pub async fn parse_text( Box>>>, >, ) -> Result<(Array2, Array1, Array1, Array1)> { - let text = jtalk.num2word(text)?; - let normalized_text = norm::normalize_text(&text); - - let process = jtalk.process_text(&normalized_text)?; - let (phones, tones, mut word2ph) = process.g2p()?; + let (phones, tones, mut word2ph) = preprocess_parse_text(text, jtalk)?; let (phones, tones, lang_ids) = nlp::cleaned_text_to_sequence(phones, tones); let phones = utils::intersperse(&phones, 0); diff --git a/crates/sbv2_voicevox/Cargo.toml b/crates/sbv2_voicevox/Cargo.toml new file mode 100644 index 0000000..4bd10aa --- /dev/null +++ b/crates/sbv2_voicevox/Cargo.toml @@ -0,0 +1,14 @@ +[package] +name = "sbv2_voicevox" +version.workspace = true +edition.workspace = true +description.workspace = true +license.workspace = true +readme.workspace = true +repository.workspace = true +documentation.workspace = true + +[dependencies] +anyhow.workspace = true +axum = "0.8.1" +sbv2_core = { version = "0.2.0-alpha6", path = "../sbv2_core" } diff --git a/crates/sbv2_voicevox/README.md b/crates/sbv2_voicevox/README.md new file mode 100644 index 0000000..d9b4b1f --- /dev/null +++ b/crates/sbv2_voicevox/README.md @@ -0,0 +1,2 @@ +# sbv2-voicevox +sbv2-apiをvoicevox化します。 \ No newline at end of file diff --git a/crates/sbv2_voicevox/src/main.rs b/crates/sbv2_voicevox/src/main.rs new file mode 100644 index 0000000..8d6f05f --- /dev/null +++ b/crates/sbv2_voicevox/src/main.rs @@ -0,0 +1,5 @@ + +async fn main() -> anyhow::Result<()> { + println!("Hello, world!"); + Ok(()) +}