This commit is contained in:
tuna2134
2024-09-10 06:20:15 +00:00
parent 41c9d2c092
commit 80a9899567
6 changed files with 32 additions and 1 deletions

7
Cargo.lock generated
View File

@@ -454,6 +454,12 @@ version = "0.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea"
[[package]]
name = "hound"
version = "3.5.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "62adaabb884c94955b19907d60019f4e145d091c75345379e70d1ee696f7854f"
[[package]]
name = "ident_case"
version = "1.0.1"
@@ -1288,6 +1294,7 @@ name = "sbv2_core"
version = "0.1.0"
dependencies = [
"anyhow",
"hound",
"jpreprocess",
"ndarray",
"once_cell",

BIN
output.wav Normal file

Binary file not shown.

View File

@@ -5,6 +5,7 @@ edition = "2021"
[dependencies]
anyhow.workspace = true
hound = "3.5.1"
jpreprocess = { version = "0.10.0", features = ["naist-jdic"] }
ndarray = "0.16.1"
once_cell = "1.19.0"

View File

@@ -16,6 +16,8 @@ pub enum Error {
SerdeJsonError(#[from] serde_json::Error),
#[error("IO error: {0}")]
IoError(#[from] std::io::Error),
#[error("hound error: {0}")]
HoundError(#[from] hound::Error),
}
pub type Result<T> = std::result::Result<T, Error>;

View File

@@ -1,7 +1,26 @@
use crate::error::Result;
use hound::{SampleFormat, WavSpec, WavWriter};
use ndarray::{array, Array1, Array2, Axis};
use ort::Session;
fn write_wav(file_path: &str, audio: &[f32], sample_rate: u32) -> Result<()> {
let spec = WavSpec {
channels: 1, // モラルの場合。ステレオなどの場合は2に変更
sample_rate,
bits_per_sample: 16,
sample_format: SampleFormat::Int,
};
let mut writer = WavWriter::create(file_path, spec)?;
for &sample in audio {
let int_sample = (sample * i16::MAX as f32).clamp(i16::MIN as f32, i16::MAX as f32) as i16;
writer.write_sample(int_sample)?;
}
writer.finalize()?;
Ok(())
}
pub fn synthesize(
session: &Session,
bert_ori: Array2<f32>,
@@ -25,5 +44,8 @@ pub fn synthesize(
"bert" => bert,
"ja_bert" => style_vector,
}?)?;
let audio_array = outputs.get("output").unwrap().try_extract_tensor::<f32>()?;
write_wav("output.wav", audio_array.as_slice().unwrap(), 44100)?;
Ok(())
}

View File

@@ -1,4 +1,3 @@
pub fn intersperse<T>(slice: &[T], sep: T) -> Vec<T>
where
T: Clone,