mirror of
https://github.com/neodyland/sbv2-api.git
synced 2025-12-23 07:59:56 +00:00
Compare commits
7 Commits
commit-320
...
commit-9f2
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
9f22694df0 | ||
|
|
62ba2c802f | ||
|
|
4f5b936f6f | ||
|
|
3c8efc716c | ||
|
|
e9ced32b70 | ||
|
|
e7a1575cbc | ||
|
|
873bbb77b6 |
4
Cargo.lock
generated
4
Cargo.lock
generated
@@ -1740,7 +1740,7 @@ checksum = "04744f49eae99ab78e0d5c0b603ab218f515ea8cfe5a456d7629ad883a3b6e7d"
|
|||||||
[[package]]
|
[[package]]
|
||||||
name = "ort"
|
name = "ort"
|
||||||
version = "2.0.0-rc.10"
|
version = "2.0.0-rc.10"
|
||||||
source = "git+https://github.com/pykeio/ort.git#5f96a2d5857c3fe9f06282dbf4bdcddbca6c5fe6"
|
source = "git+https://github.com/pykeio/ort.git#f4ab181702495bff99a488322d3a8de0d7050349"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"libloading",
|
"libloading",
|
||||||
"ndarray",
|
"ndarray",
|
||||||
@@ -1752,7 +1752,7 @@ dependencies = [
|
|||||||
[[package]]
|
[[package]]
|
||||||
name = "ort-sys"
|
name = "ort-sys"
|
||||||
version = "2.0.0-rc.10"
|
version = "2.0.0-rc.10"
|
||||||
source = "git+https://github.com/pykeio/ort.git#5f96a2d5857c3fe9f06282dbf4bdcddbca6c5fe6"
|
source = "git+https://github.com/pykeio/ort.git#f4ab181702495bff99a488322d3a8de0d7050349"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"flate2",
|
"flate2",
|
||||||
"pkg-config",
|
"pkg-config",
|
||||||
|
|||||||
@@ -127,20 +127,20 @@ impl JTalkProcess {
|
|||||||
Ok(phone_tone_list)
|
Ok(phone_tone_list)
|
||||||
} else if tone_values.len() == 2 {
|
} else if tone_values.len() == 2 {
|
||||||
if tone_values == hash_set![0, 1] {
|
if tone_values == hash_set![0, 1] {
|
||||||
return Ok(phone_tone_list);
|
Ok(phone_tone_list)
|
||||||
} else if tone_values == hash_set![-1, 0] {
|
} else if tone_values == hash_set![-1, 0] {
|
||||||
return Ok(phone_tone_list
|
Ok(phone_tone_list
|
||||||
.iter()
|
.iter()
|
||||||
.map(|x| {
|
.map(|x| {
|
||||||
let new_tone = if x.1 == -1 { 0 } else { 1 };
|
let new_tone = if x.1 == -1 { 0 } else { 1 };
|
||||||
(x.0.clone(), new_tone)
|
(x.0.clone(), new_tone)
|
||||||
})
|
})
|
||||||
.collect());
|
.collect())
|
||||||
} else {
|
} else {
|
||||||
return Err(Error::ValueError("Invalid tone values 0".to_string()));
|
Err(Error::ValueError("Invalid tone values 0".to_string()))
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
return Err(Error::ValueError("Invalid tone values 1".to_string()));
|
Err(Error::ValueError("Invalid tone values 1".to_string()))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -240,39 +240,43 @@ impl TTSModelHolder {
|
|||||||
}
|
}
|
||||||
fn find_and_load_model<I: Into<TTSIdent>>(&mut self, ident: I) -> Result<bool> {
|
fn find_and_load_model<I: Into<TTSIdent>>(&mut self, ident: I) -> Result<bool> {
|
||||||
let ident = ident.into();
|
let ident = ident.into();
|
||||||
let (bytes, style_vectors) = {
|
// Locate target model entry
|
||||||
let model = self
|
let target_index = self
|
||||||
.models
|
|
||||||
.iter()
|
|
||||||
.find(|m| m.ident == ident)
|
|
||||||
.ok_or(Error::ModelNotFoundError(ident.to_string()))?;
|
|
||||||
if model.vits2.is_some() {
|
|
||||||
return Ok(true);
|
|
||||||
}
|
|
||||||
(model.bytes.clone().unwrap(), model.style_vectors.clone())
|
|
||||||
};
|
|
||||||
self.unload(ident.clone());
|
|
||||||
let s = model::load_model(&bytes, false)?;
|
|
||||||
if let Some(max) = self.max_loaded_models {
|
|
||||||
if self.models.iter().filter(|x| x.vits2.is_some()).count() >= max {
|
|
||||||
self.unload(self.models.first().unwrap().ident.clone());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
self.models.push(TTSModel {
|
|
||||||
bytes: Some(bytes.to_vec()),
|
|
||||||
vits2: Some(s),
|
|
||||||
style_vectors,
|
|
||||||
ident: ident.clone(),
|
|
||||||
});
|
|
||||||
let model = self
|
|
||||||
.models
|
.models
|
||||||
.iter()
|
.iter()
|
||||||
.find(|m| m.ident == ident)
|
.position(|m| m.ident == ident)
|
||||||
.ok_or(Error::ModelNotFoundError(ident.to_string()))?;
|
.ok_or(Error::ModelNotFoundError(ident.to_string()))?;
|
||||||
if model.vits2.is_some() {
|
|
||||||
|
// Already loaded
|
||||||
|
if self.models[target_index].vits2.is_some() {
|
||||||
return Ok(true);
|
return Ok(true);
|
||||||
}
|
}
|
||||||
Err(Error::ModelNotFoundError(ident.to_string()))
|
|
||||||
|
// Get bytes to build a Session
|
||||||
|
let bytes = self.models[target_index]
|
||||||
|
.bytes
|
||||||
|
.clone()
|
||||||
|
.ok_or(Error::ModelNotFoundError(ident.to_string()))?;
|
||||||
|
|
||||||
|
// Enforce max loaded models by evicting a different loaded model's session, not removing the entry
|
||||||
|
if let Some(max) = self.max_loaded_models {
|
||||||
|
let loaded_count = self.models.iter().filter(|m| m.vits2.is_some()).count();
|
||||||
|
if loaded_count >= max {
|
||||||
|
if let Some(evict_index) = self
|
||||||
|
.models
|
||||||
|
.iter()
|
||||||
|
.position(|m| m.vits2.is_some() && m.ident != ident)
|
||||||
|
{
|
||||||
|
// Drop only the session to free memory; keep bytes/style for future reload
|
||||||
|
self.models[evict_index].vits2 = None;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Build and set session in-place for the target model
|
||||||
|
let s = model::load_model(&bytes, false)?;
|
||||||
|
self.models[target_index].vits2 = Some(s);
|
||||||
|
Ok(true)
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Get style vector by style id and weight
|
/// Get style vector by style id and weight
|
||||||
|
|||||||
@@ -173,8 +173,15 @@ pub fn parse_text_blocking(
|
|||||||
}
|
}
|
||||||
|
|
||||||
pub fn array_to_vec(audio_array: Array3<f32>) -> Result<Vec<u8>> {
|
pub fn array_to_vec(audio_array: Array3<f32>) -> Result<Vec<u8>> {
|
||||||
|
// If SBV2_FORCE_STEREO is set ("1"/"true"), duplicate mono to stereo
|
||||||
|
let force_stereo = std::env::var("SBV2_FORCE_STEREO")
|
||||||
|
.ok()
|
||||||
|
.map(|v| matches!(v.as_str(), "1" | "true" | "TRUE" | "True"))
|
||||||
|
.unwrap_or(false);
|
||||||
|
|
||||||
|
let channels: u16 = if force_stereo { 2 } else { 1 };
|
||||||
let spec = WavSpec {
|
let spec = WavSpec {
|
||||||
channels: 1,
|
channels,
|
||||||
sample_rate: 44100,
|
sample_rate: 44100,
|
||||||
bits_per_sample: 32,
|
bits_per_sample: 32,
|
||||||
sample_format: SampleFormat::Float,
|
sample_format: SampleFormat::Float,
|
||||||
@@ -183,8 +190,16 @@ pub fn array_to_vec(audio_array: Array3<f32>) -> Result<Vec<u8>> {
|
|||||||
let mut writer = WavWriter::new(&mut cursor, spec)?;
|
let mut writer = WavWriter::new(&mut cursor, spec)?;
|
||||||
for i in 0..audio_array.shape()[0] {
|
for i in 0..audio_array.shape()[0] {
|
||||||
let output = audio_array.slice(s![i, 0, ..]).to_vec();
|
let output = audio_array.slice(s![i, 0, ..]).to_vec();
|
||||||
for sample in output {
|
if force_stereo {
|
||||||
writer.write_sample(sample)?;
|
for sample in output {
|
||||||
|
// Write to Left and Right channels
|
||||||
|
writer.write_sample(sample)?;
|
||||||
|
writer.write_sample(sample)?;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
for sample in output {
|
||||||
|
writer.write_sample(sample)?;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
writer.finalize()?;
|
writer.finalize()?;
|
||||||
|
|||||||
Reference in New Issue
Block a user