Compare commits

...

15 Commits

Author SHA1 Message Date
tuna2134
9f22694df0 Merge pull request #236 from neodyland/dependabot/cargo/ort-f4ab181 2025-08-12 07:48:08 +09:00
tuna2134
62ba2c802f Merge pull request #235 from kono-dada/fix/inplace-model-load 2025-08-11 23:46:42 +09:00
dependabot[bot]
4f5b936f6f build(deps): bump ort from 5f96a2d to f4ab181
Bumps [ort](https://github.com/pykeio/ort) from `5f96a2d` to `f4ab181`.
- [Release notes](https://github.com/pykeio/ort/releases)
- [Commits](5f96a2d585...f4ab181702)

---
updated-dependencies:
- dependency-name: ort
  dependency-version: f4ab181702495bff99a488322d3a8de0d7050349
  dependency-type: direct:production
...

Signed-off-by: dependabot[bot] <support@github.com>
2025-08-11 12:22:23 +00:00
kono-dada
3c8efc716c Fix: Load model in-place and safely evict sessions without removing entries
- Avoid removing and re-inserting model entries during load
- Preserve metadata (bytes, style_vectors) when evicting
- Ensure eviction targets a different loaded model, not always the first
- Reduce unnecessary memory allocations and keep list order stable
2025-08-11 16:31:57 +08:00
tuna2134
e9ced32b70 fix: streamline tone value handling in JTalkProcess 2025-08-11 17:30:46 +09:00
tuna2134
e7a1575cbc Merge pull request #233 from kono-dada/feature/stereo-output
feat: add stereo synthesis option via SBV2_FORCE_STEREO env var
2025-08-11 17:13:19 +09:00
kono-dada
873bbb77b6 feat: add stereo synthesis option via SBV2_FORCE_STEREO env var
Previously, synthesis output was fixed to mono (channels=1).
Now, setting the environment variable SBV2_FORCE_STEREO=1 forces stereo (2-channel) output.

This allows generating stereo audio without changing the code, useful for users needing dual-channel output.
2025-08-11 11:38:32 +08:00
tuna2134
1725863fca Merge pull request #228 from neodyland/dependabot/cargo/serde_json-1.0.142
build(deps): bump serde_json from 1.0.141 to 1.0.142
2025-08-04 22:12:36 +09:00
tuna2134
55f05580e4 Merge pull request #229 from neodyland/dependabot/cargo/tokenizers-0.21.4
build(deps): bump tokenizers from 0.21.2 to 0.21.4
2025-08-04 22:12:24 +09:00
tuna2134
320664eae2 Merge pull request #231 from neodyland/dependabot/cargo/tokio-1.47.1
build(deps): bump tokio from 1.47.0 to 1.47.1
2025-08-04 22:12:07 +09:00
tuna2134
87903827fa Merge pull request #230 from neodyland/dependabot/cargo/ort-5f96a2d
build(deps): bump ort from `d28c835` to `5f96a2d`
2025-08-04 22:11:55 +09:00
dependabot[bot]
9b8e9dc39d build(deps): bump tokio from 1.47.0 to 1.47.1
Bumps [tokio](https://github.com/tokio-rs/tokio) from 1.47.0 to 1.47.1.
- [Release notes](https://github.com/tokio-rs/tokio/releases)
- [Commits](https://github.com/tokio-rs/tokio/compare/tokio-1.47.0...tokio-1.47.1)

---
updated-dependencies:
- dependency-name: tokio
  dependency-version: 1.47.1
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>
2025-08-04 11:10:28 +00:00
dependabot[bot]
bbc38081b6 build(deps): bump ort from d28c835 to 5f96a2d
Bumps [ort](https://github.com/pykeio/ort) from `d28c835` to `5f96a2d`.
- [Release notes](https://github.com/pykeio/ort/releases)
- [Commits](d28c835c3c...5f96a2d585)

---
updated-dependencies:
- dependency-name: ort
  dependency-version: 5f96a2d5857c3fe9f06282dbf4bdcddbca6c5fe6
  dependency-type: direct:production
...

Signed-off-by: dependabot[bot] <support@github.com>
2025-08-04 10:24:07 +00:00
dependabot[bot]
0b822f704a build(deps): bump tokenizers from 0.21.2 to 0.21.4
Bumps [tokenizers](https://github.com/huggingface/tokenizers) from 0.21.2 to 0.21.4.
- [Release notes](https://github.com/huggingface/tokenizers/releases)
- [Changelog](https://github.com/huggingface/tokenizers/blob/main/RELEASE.md)
- [Commits](https://github.com/huggingface/tokenizers/compare/v0.21.2...v0.21.4)

---
updated-dependencies:
- dependency-name: tokenizers
  dependency-version: 0.21.4
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>
2025-08-04 10:18:59 +00:00
dependabot[bot]
132eb6386d build(deps): bump serde_json from 1.0.141 to 1.0.142
Bumps [serde_json](https://github.com/serde-rs/json) from 1.0.141 to 1.0.142.
- [Release notes](https://github.com/serde-rs/json/releases)
- [Commits](https://github.com/serde-rs/json/compare/v1.0.141...v1.0.142)

---
updated-dependencies:
- dependency-name: serde_json
  dependency-version: 1.0.142
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>
2025-08-04 10:18:20 +00:00
6 changed files with 66 additions and 47 deletions

16
Cargo.lock generated
View File

@@ -1740,7 +1740,7 @@ checksum = "04744f49eae99ab78e0d5c0b603ab218f515ea8cfe5a456d7629ad883a3b6e7d"
[[package]] [[package]]
name = "ort" name = "ort"
version = "2.0.0-rc.10" version = "2.0.0-rc.10"
source = "git+https://github.com/pykeio/ort.git#d28c835c3cc98bcbefc208dc26c8618ccbadec3f" source = "git+https://github.com/pykeio/ort.git#f4ab181702495bff99a488322d3a8de0d7050349"
dependencies = [ dependencies = [
"libloading", "libloading",
"ndarray", "ndarray",
@@ -1752,7 +1752,7 @@ dependencies = [
[[package]] [[package]]
name = "ort-sys" name = "ort-sys"
version = "2.0.0-rc.10" version = "2.0.0-rc.10"
source = "git+https://github.com/pykeio/ort.git#d28c835c3cc98bcbefc208dc26c8618ccbadec3f" source = "git+https://github.com/pykeio/ort.git#f4ab181702495bff99a488322d3a8de0d7050349"
dependencies = [ dependencies = [
"flate2", "flate2",
"pkg-config", "pkg-config",
@@ -2430,9 +2430,9 @@ dependencies = [
[[package]] [[package]]
name = "serde_json" name = "serde_json"
version = "1.0.141" version = "1.0.142"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "30b9eff21ebe718216c6ec64e1d9ac57087aad11efc64e32002bce4a0d4c03d3" checksum = "030fedb782600dcbd6f02d479bf0d817ac3bb40d644745b769d6a96bc3afc5a7"
dependencies = [ dependencies = [
"itoa", "itoa",
"memchr", "memchr",
@@ -2766,9 +2766,9 @@ checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20"
[[package]] [[package]]
name = "tokenizers" name = "tokenizers"
version = "0.21.2" version = "0.21.4"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4c3846d8588abed0daba25a0e47edd58ea15e450a6088b2575f5116fdb0b27ca" checksum = "a620b996116a59e184c2fa2dfd8251ea34a36d0a514758c6f966386bd2e03476"
dependencies = [ dependencies = [
"ahash", "ahash",
"aho-corasick", "aho-corasick",
@@ -2801,9 +2801,9 @@ dependencies = [
[[package]] [[package]]
name = "tokio" name = "tokio"
version = "1.47.0" version = "1.47.1"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "43864ed400b6043a4757a25c7a64a8efde741aed79a056a2fb348a406701bb35" checksum = "89e49afdadebb872d3145a5638b59eb0691ea23e46ca484037cfab3b76b95038"
dependencies = [ dependencies = [
"backtrace", "backtrace",
"bytes", "bytes",

View File

@@ -16,7 +16,7 @@ env_logger.workspace = true
log = "0.4.22" log = "0.4.22"
sbv2_core = { version = "0.2.0-alpha6", path = "../sbv2_core", features = ["aivmx"] } sbv2_core = { version = "0.2.0-alpha6", path = "../sbv2_core", features = ["aivmx"] }
serde = { version = "1.0.210", features = ["derive"] } serde = { version = "1.0.210", features = ["derive"] }
tokio = { version = "1.47.0", features = ["full"] } tokio = { version = "1.47.1", features = ["full"] }
utoipa = { version = "5.4.0", features = ["axum_extras"] } utoipa = { version = "5.4.0", features = ["axum_extras"] }
utoipa-scalar = { version = "0.3.0", features = ["axum"] } utoipa-scalar = { version = "0.3.0", features = ["axum"] }

View File

@@ -22,10 +22,10 @@ once_cell.workspace = true
ort = { git = "https://github.com/pykeio/ort.git", version = "2.0.0-rc.9", optional = true } ort = { git = "https://github.com/pykeio/ort.git", version = "2.0.0-rc.9", optional = true }
regex = "1.10.6" regex = "1.10.6"
serde = { version = "1.0.210", features = ["derive"] } serde = { version = "1.0.210", features = ["derive"] }
serde_json = "1.0.141" serde_json = "1.0.142"
tar = "0.4.41" tar = "0.4.41"
thiserror = "2.0.11" thiserror = "2.0.11"
tokenizers = { version = "0.21.2", default-features = false } tokenizers = { version = "0.21.4", default-features = false }
zstd = "0.13.2" zstd = "0.13.2"
[features] [features]

View File

@@ -127,20 +127,20 @@ impl JTalkProcess {
Ok(phone_tone_list) Ok(phone_tone_list)
} else if tone_values.len() == 2 { } else if tone_values.len() == 2 {
if tone_values == hash_set![0, 1] { if tone_values == hash_set![0, 1] {
return Ok(phone_tone_list); Ok(phone_tone_list)
} else if tone_values == hash_set![-1, 0] { } else if tone_values == hash_set![-1, 0] {
return Ok(phone_tone_list Ok(phone_tone_list
.iter() .iter()
.map(|x| { .map(|x| {
let new_tone = if x.1 == -1 { 0 } else { 1 }; let new_tone = if x.1 == -1 { 0 } else { 1 };
(x.0.clone(), new_tone) (x.0.clone(), new_tone)
}) })
.collect()); .collect())
} else { } else {
return Err(Error::ValueError("Invalid tone values 0".to_string())); Err(Error::ValueError("Invalid tone values 0".to_string()))
} }
} else { } else {
return Err(Error::ValueError("Invalid tone values 1".to_string())); Err(Error::ValueError("Invalid tone values 1".to_string()))
} }
} }

View File

@@ -240,39 +240,43 @@ impl TTSModelHolder {
} }
fn find_and_load_model<I: Into<TTSIdent>>(&mut self, ident: I) -> Result<bool> { fn find_and_load_model<I: Into<TTSIdent>>(&mut self, ident: I) -> Result<bool> {
let ident = ident.into(); let ident = ident.into();
let (bytes, style_vectors) = { // Locate target model entry
let model = self let target_index = self
.models .models
.iter() .iter()
.find(|m| m.ident == ident) .position(|m| m.ident == ident)
.ok_or(Error::ModelNotFoundError(ident.to_string()))?; .ok_or(Error::ModelNotFoundError(ident.to_string()))?;
if model.vits2.is_some() {
// Already loaded
if self.models[target_index].vits2.is_some() {
return Ok(true); return Ok(true);
} }
(model.bytes.clone().unwrap(), model.style_vectors.clone())
}; // Get bytes to build a Session
self.unload(ident.clone()); let bytes = self.models[target_index]
let s = model::load_model(&bytes, false)?; .bytes
.clone()
.ok_or(Error::ModelNotFoundError(ident.to_string()))?;
// Enforce max loaded models by evicting a different loaded model's session, not removing the entry
if let Some(max) = self.max_loaded_models { if let Some(max) = self.max_loaded_models {
if self.models.iter().filter(|x| x.vits2.is_some()).count() >= max { let loaded_count = self.models.iter().filter(|m| m.vits2.is_some()).count();
self.unload(self.models.first().unwrap().ident.clone()); if loaded_count >= max {
} if let Some(evict_index) = self
}
self.models.push(TTSModel {
bytes: Some(bytes.to_vec()),
vits2: Some(s),
style_vectors,
ident: ident.clone(),
});
let model = self
.models .models
.iter() .iter()
.find(|m| m.ident == ident) .position(|m| m.vits2.is_some() && m.ident != ident)
.ok_or(Error::ModelNotFoundError(ident.to_string()))?; {
if model.vits2.is_some() { // Drop only the session to free memory; keep bytes/style for future reload
return Ok(true); self.models[evict_index].vits2 = None;
} }
Err(Error::ModelNotFoundError(ident.to_string())) }
}
// Build and set session in-place for the target model
let s = model::load_model(&bytes, false)?;
self.models[target_index].vits2 = Some(s);
Ok(true)
} }
/// Get style vector by style id and weight /// Get style vector by style id and weight

View File

@@ -173,8 +173,15 @@ pub fn parse_text_blocking(
} }
pub fn array_to_vec(audio_array: Array3<f32>) -> Result<Vec<u8>> { pub fn array_to_vec(audio_array: Array3<f32>) -> Result<Vec<u8>> {
// If SBV2_FORCE_STEREO is set ("1"/"true"), duplicate mono to stereo
let force_stereo = std::env::var("SBV2_FORCE_STEREO")
.ok()
.map(|v| matches!(v.as_str(), "1" | "true" | "TRUE" | "True"))
.unwrap_or(false);
let channels: u16 = if force_stereo { 2 } else { 1 };
let spec = WavSpec { let spec = WavSpec {
channels: 1, channels,
sample_rate: 44100, sample_rate: 44100,
bits_per_sample: 32, bits_per_sample: 32,
sample_format: SampleFormat::Float, sample_format: SampleFormat::Float,
@@ -183,10 +190,18 @@ pub fn array_to_vec(audio_array: Array3<f32>) -> Result<Vec<u8>> {
let mut writer = WavWriter::new(&mut cursor, spec)?; let mut writer = WavWriter::new(&mut cursor, spec)?;
for i in 0..audio_array.shape()[0] { for i in 0..audio_array.shape()[0] {
let output = audio_array.slice(s![i, 0, ..]).to_vec(); let output = audio_array.slice(s![i, 0, ..]).to_vec();
if force_stereo {
for sample in output {
// Write to Left and Right channels
writer.write_sample(sample)?;
writer.write_sample(sample)?;
}
} else {
for sample in output { for sample in output {
writer.write_sample(sample)?; writer.write_sample(sample)?;
} }
} }
}
writer.finalize()?; writer.finalize()?;
Ok(cursor.into_inner()) Ok(cursor.into_inner())
} }