mirror of
https://github.com/neodyland/sbv2-api.git
synced 2025-12-22 23:49:58 +00:00
feat: audio query request
This commit is contained in:
9
Cargo.lock
generated
9
Cargo.lock
generated
@@ -2319,6 +2319,7 @@ dependencies = [
|
||||
"anyhow",
|
||||
"axum",
|
||||
"sbv2_core",
|
||||
"serde",
|
||||
"tokio",
|
||||
]
|
||||
|
||||
@@ -2374,18 +2375,18 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "serde"
|
||||
version = "1.0.218"
|
||||
version = "1.0.219"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e8dfc9d19bdbf6d17e22319da49161d5d0108e4188e8b680aef6299eed22df60"
|
||||
checksum = "5f0e2c6ed6606019b4e29e69dbaba95b11854410e5347d525002456dbbb786b6"
|
||||
dependencies = [
|
||||
"serde_derive",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "serde_derive"
|
||||
version = "1.0.218"
|
||||
version = "1.0.219"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f09503e191f4e797cb8aac08e9a4a4695c5edf6a2e70e376d961ddd5c969f82b"
|
||||
checksum = "5b0276cf7f2c73365f7157c8123c21cd9a50fbbd844757af28ca1f5925fc2a00"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
|
||||
@@ -12,4 +12,5 @@ documentation.workspace = true
|
||||
anyhow.workspace = true
|
||||
axum = "0.8.1"
|
||||
sbv2_core = { version = "0.2.0-alpha6", path = "../sbv2_core" }
|
||||
serde = { version = "1.0.219", features = ["derive"] }
|
||||
tokio = { version = "1.44.1", features = ["full"] }
|
||||
|
||||
226
crates/sbv2_voicevox/query2.json
Normal file
226
crates/sbv2_voicevox/query2.json
Normal file
@@ -0,0 +1,226 @@
|
||||
{
|
||||
"accent_phrases": [
|
||||
{
|
||||
"moras": [
|
||||
{
|
||||
"text": "コ",
|
||||
"consonant": "k",
|
||||
"consonant_length": 0.10002632439136505,
|
||||
"vowel": "o",
|
||||
"vowel_length": 0.15740256011486053,
|
||||
"pitch": 5.749961853027344
|
||||
},
|
||||
{
|
||||
"text": "ン",
|
||||
"consonant": null,
|
||||
"consonant_length": null,
|
||||
"vowel": "N",
|
||||
"vowel_length": 0.08265873789787292,
|
||||
"pitch": 5.89122200012207
|
||||
},
|
||||
{
|
||||
"text": "ニ",
|
||||
"consonant": "n",
|
||||
"consonant_length": 0.03657080978155136,
|
||||
"vowel": "i",
|
||||
"vowel_length": 0.1175866425037384,
|
||||
"pitch": 5.969866752624512
|
||||
},
|
||||
{
|
||||
"text": "チ",
|
||||
"consonant": "ch",
|
||||
"consonant_length": 0.09005842357873917,
|
||||
"vowel": "i",
|
||||
"vowel_length": 0.08666137605905533,
|
||||
"pitch": 5.958892822265625
|
||||
},
|
||||
{
|
||||
"text": "ワ",
|
||||
"consonant": "w",
|
||||
"consonant_length": 0.07833231985569,
|
||||
"vowel": "a",
|
||||
"vowel_length": 0.21250136196613312,
|
||||
"pitch": 5.949411392211914
|
||||
}
|
||||
],
|
||||
"accent": 5,
|
||||
"pause_mora": {
|
||||
"text": "、",
|
||||
"consonant": null,
|
||||
"consonant_length": null,
|
||||
"vowel": "pau",
|
||||
"vowel_length": 0.4723339378833771,
|
||||
"pitch": 0.0
|
||||
},
|
||||
"is_interrogative": false
|
||||
},
|
||||
{
|
||||
"moras": [
|
||||
{
|
||||
"text": "オ",
|
||||
"consonant": null,
|
||||
"consonant_length": null,
|
||||
"vowel": "o",
|
||||
"vowel_length": 0.22004225850105286,
|
||||
"pitch": 5.6870927810668945
|
||||
},
|
||||
{
|
||||
"text": "ン",
|
||||
"consonant": null,
|
||||
"consonant_length": null,
|
||||
"vowel": "N",
|
||||
"vowel_length": 0.09161105751991272,
|
||||
"pitch": 5.93472957611084
|
||||
},
|
||||
{
|
||||
"text": "セ",
|
||||
"consonant": "s",
|
||||
"consonant_length": 0.08924821764230728,
|
||||
"vowel": "e",
|
||||
"vowel_length": 0.14142127335071564,
|
||||
"pitch": 6.121850490570068
|
||||
},
|
||||
{
|
||||
"text": "エ",
|
||||
"consonant": null,
|
||||
"consonant_length": null,
|
||||
"vowel": "e",
|
||||
"vowel_length": 0.10636933892965317,
|
||||
"pitch": 6.157896041870117
|
||||
},
|
||||
{
|
||||
"text": "ゴ",
|
||||
"consonant": "g",
|
||||
"consonant_length": 0.07600915431976318,
|
||||
"vowel": "o",
|
||||
"vowel_length": 0.09598273783922195,
|
||||
"pitch": 6.188933849334717
|
||||
},
|
||||
{
|
||||
"text": "オ",
|
||||
"consonant": null,
|
||||
"consonant_length": null,
|
||||
"vowel": "o",
|
||||
"vowel_length": 0.1079121008515358,
|
||||
"pitch": 6.235202789306641
|
||||
},
|
||||
{
|
||||
"text": "セ",
|
||||
"consonant": "s",
|
||||
"consonant_length": 0.09591838717460632,
|
||||
"vowel": "e",
|
||||
"vowel_length": 0.10286372154951096,
|
||||
"pitch": 6.153214454650879
|
||||
},
|
||||
{
|
||||
"text": "エ",
|
||||
"consonant": null,
|
||||
"consonant_length": null,
|
||||
"vowel": "e",
|
||||
"vowel_length": 0.08992656320333481,
|
||||
"pitch": 6.02571439743042
|
||||
},
|
||||
{
|
||||
"text": "ノ",
|
||||
"consonant": "n",
|
||||
"consonant_length": 0.05660202354192734,
|
||||
"vowel": "o",
|
||||
"vowel_length": 0.09676017612218857,
|
||||
"pitch": 5.711844444274902
|
||||
}
|
||||
],
|
||||
"accent": 5,
|
||||
"pause_mora": null,
|
||||
"is_interrogative": false
|
||||
},
|
||||
{
|
||||
"moras": [
|
||||
{
|
||||
"text": "セ",
|
||||
"consonant": "s",
|
||||
"consonant_length": 0.07805486768484116,
|
||||
"vowel": "e",
|
||||
"vowel_length": 0.09617523103952408,
|
||||
"pitch": 5.774399280548096
|
||||
},
|
||||
{
|
||||
"text": "カ",
|
||||
"consonant": "k",
|
||||
"consonant_length": 0.06712044775485992,
|
||||
"vowel": "a",
|
||||
"vowel_length": 0.148829385638237,
|
||||
"pitch": 6.063965797424316
|
||||
},
|
||||
{
|
||||
"text": "イ",
|
||||
"consonant": null,
|
||||
"consonant_length": null,
|
||||
"vowel": "i",
|
||||
"vowel_length": 0.11061104387044907,
|
||||
"pitch": 6.040698051452637
|
||||
},
|
||||
{
|
||||
"text": "エ",
|
||||
"consonant": null,
|
||||
"consonant_length": null,
|
||||
"vowel": "e",
|
||||
"vowel_length": 0.13046696782112122,
|
||||
"pitch": 5.806027889251709
|
||||
}
|
||||
],
|
||||
"accent": 1,
|
||||
"pause_mora": null,
|
||||
"is_interrogative": false
|
||||
},
|
||||
{
|
||||
"moras": [
|
||||
{
|
||||
"text": "ヨ",
|
||||
"consonant": "y",
|
||||
"consonant_length": 0.07194744795560837,
|
||||
"vowel": "o",
|
||||
"vowel_length": 0.08622600883245468,
|
||||
"pitch": 5.694094657897949
|
||||
},
|
||||
{
|
||||
"text": "オ",
|
||||
"consonant": null,
|
||||
"consonant_length": null,
|
||||
"vowel": "o",
|
||||
"vowel_length": 0.10635452717542648,
|
||||
"pitch": 5.787222385406494
|
||||
},
|
||||
{
|
||||
"text": "コ",
|
||||
"consonant": "k",
|
||||
"consonant_length": 0.07077334076166153,
|
||||
"vowel": "o",
|
||||
"vowel_length": 0.09248624742031097,
|
||||
"pitch": 5.793357849121094
|
||||
},
|
||||
{
|
||||
"text": "ソ",
|
||||
"consonant": "s",
|
||||
"consonant_length": 0.08705667406320572,
|
||||
"vowel": "o",
|
||||
"vowel_length": 0.2238258570432663,
|
||||
"pitch": 5.643765449523926
|
||||
}
|
||||
],
|
||||
"accent": 1,
|
||||
"pause_mora": null,
|
||||
"is_interrogative": false
|
||||
}
|
||||
],
|
||||
"speedScale": 1.0,
|
||||
"pitchScale": 0.0,
|
||||
"intonationScale": 1.0,
|
||||
"volumeScale": 1.0,
|
||||
"prePhonemeLength": 0.1,
|
||||
"postPhonemeLength": 0.1,
|
||||
"pauseLength": null,
|
||||
"pauseLengthScale": 1.0,
|
||||
"outputSamplingRate": 24000,
|
||||
"outputStereo": false,
|
||||
"kana": "コンニチワ'、オンセエゴ'オセエノ/セ'カイエ/ヨ'オコソ"
|
||||
}
|
||||
27
crates/sbv2_voicevox/src/error.rs
Normal file
27
crates/sbv2_voicevox/src/error.rs
Normal file
@@ -0,0 +1,27 @@
|
||||
use axum::{
|
||||
http::StatusCode,
|
||||
response::{IntoResponse, Response},
|
||||
};
|
||||
|
||||
pub type AppResult<T> = std::result::Result<T, AppError>;
|
||||
|
||||
pub struct AppError(anyhow::Error);
|
||||
|
||||
impl IntoResponse for AppError {
|
||||
fn into_response(self) -> Response {
|
||||
(
|
||||
StatusCode::INTERNAL_SERVER_ERROR,
|
||||
format!("Something went wrong: {}", self.0),
|
||||
)
|
||||
.into_response()
|
||||
}
|
||||
}
|
||||
|
||||
impl<E> From<E> for AppError
|
||||
where
|
||||
E: Into<anyhow::Error>,
|
||||
{
|
||||
fn from(err: E) -> Self {
|
||||
Self(err.into())
|
||||
}
|
||||
}
|
||||
@@ -1,5 +1,23 @@
|
||||
use axum::{routing::get, Router};
|
||||
use axum::{routing::get, Router, extract::Query};
|
||||
use sbv2_core::{jtalk::JTalk, tts_util::preprocess_parse_text};
|
||||
use tokio::net::TcpListener;
|
||||
use serde::Deserialize;
|
||||
|
||||
use error::AppResult;
|
||||
|
||||
mod error;
|
||||
|
||||
#[derive(Deserialize)]
|
||||
struct RequestCreateAudioQuery {
|
||||
text: String,
|
||||
}
|
||||
|
||||
async fn create_audio_query(
|
||||
Query(request): Query<RequestCreateAudioQuery>,
|
||||
) -> AppResult<()> {
|
||||
let (phones, tones, mut word2ph, normalized_text, process) = preprocess_parse_text(&request.text, &JTalk::new()?)?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tokio::main]
|
||||
async fn main() -> anyhow::Result<()> {
|
||||
|
||||
Reference in New Issue
Block a user