Merge pull request #235 from kono-dada/fix/inplace-model-load

Fix: Load model in-place and safely evict sessions without removing entries
- Avoid removing and re-inserting model entries during load - Preserve metadata (bytes, style_vectors) when evicting - Ensure eviction targets a different loaded model, not always the first - Reduce unnecessary memory allocations and keep list order stable
2025-12-23 07:59:56 +00:00 · 2025-08-11 23:46:42 +09:00 · 2025-08-11 16:31:57 +08:00 · 2025-08-11 17:30:46 +09:00 · 2025-08-11 17:13:19 +09:00 · 2025-08-11 11:38:32 +08:00
3 changed files with 55 additions and 36 deletions
--- a/crates/sbv2_core/src/jtalk.rs
+++ b/crates/sbv2_core/src/jtalk.rs
@@ -127,20 +127,20 @@ impl JTalkProcess {
            Ok(phone_tone_list)
        } else if tone_values.len() == 2 {
            if tone_values == hash_set![0, 1] {
-                return Ok(phone_tone_list);
+                Ok(phone_tone_list)
            } else if tone_values == hash_set![-1, 0] {
-                return Ok(phone_tone_list
+                Ok(phone_tone_list
                    .iter()
                    .map(|x| {
                        let new_tone = if x.1 == -1 { 0 } else { 1 };
                        (x.0.clone(), new_tone)
                    })
-                    .collect());
+                    .collect())
            } else {
-                return Err(Error::ValueError("Invalid tone values 0".to_string()));
+                Err(Error::ValueError("Invalid tone values 0".to_string()))
            }
        } else {
-            return Err(Error::ValueError("Invalid tone values 1".to_string()));
+            Err(Error::ValueError("Invalid tone values 1".to_string()))
        }
    }

--- a/crates/sbv2_core/src/tts.rs
+++ b/crates/sbv2_core/src/tts.rs
@@ -240,39 +240,43 @@ impl TTSModelHolder {
    }
    fn find_and_load_model<I: Into<TTSIdent>>(&mut self, ident: I) -> Result<bool> {
        let ident = ident.into();
-        let (bytes, style_vectors) = {
-            let model = self
-                .models
-                .iter()
-                .find(|m| m.ident == ident)
-                .ok_or(Error::ModelNotFoundError(ident.to_string()))?;
-            if model.vits2.is_some() {
-                return Ok(true);
-            }
-            (model.bytes.clone().unwrap(), model.style_vectors.clone())
-        };
-        self.unload(ident.clone());
-        let s = model::load_model(&bytes, false)?;
-        if let Some(max) = self.max_loaded_models {
-            if self.models.iter().filter(|x| x.vits2.is_some()).count() >= max {
-                self.unload(self.models.first().unwrap().ident.clone());
-            }
-        }
-        self.models.push(TTSModel {
-            bytes: Some(bytes.to_vec()),
-            vits2: Some(s),
-            style_vectors,
-            ident: ident.clone(),
-        });
-        let model = self
+        // Locate target model entry
+        let target_index = self
            .models
            .iter()
-            .find(|m| m.ident == ident)
+            .position(|m| m.ident == ident)
            .ok_or(Error::ModelNotFoundError(ident.to_string()))?;
-        if model.vits2.is_some() {
+
+        // Already loaded
+        if self.models[target_index].vits2.is_some() {
            return Ok(true);
        }
-        Err(Error::ModelNotFoundError(ident.to_string()))
+
+        // Get bytes to build a Session
+        let bytes = self.models[target_index]
+            .bytes
+            .clone()
+            .ok_or(Error::ModelNotFoundError(ident.to_string()))?;
+
+        // Enforce max loaded models by evicting a different loaded model's session, not removing the entry
+        if let Some(max) = self.max_loaded_models {
+            let loaded_count = self.models.iter().filter(|m| m.vits2.is_some()).count();
+            if loaded_count >= max {
+                if let Some(evict_index) = self
+                    .models
+                    .iter()
+                    .position(|m| m.vits2.is_some() && m.ident != ident)
+                {
+                    // Drop only the session to free memory; keep bytes/style for future reload
+                    self.models[evict_index].vits2 = None;
+                }
+            }
+        }
+
+        // Build and set session in-place for the target model
+        let s = model::load_model(&bytes, false)?;
+        self.models[target_index].vits2 = Some(s);
+        Ok(true)
    }

    /// Get style vector by style id and weight
--- a/crates/sbv2_core/src/tts_util.rs
+++ b/crates/sbv2_core/src/tts_util.rs
@@ -173,8 +173,15 @@ pub fn parse_text_blocking(
 }

 pub fn array_to_vec(audio_array: Array3<f32>) -> Result<Vec<u8>> {
+    // If SBV2_FORCE_STEREO is set ("1"/"true"), duplicate mono to stereo
+    let force_stereo = std::env::var("SBV2_FORCE_STEREO")
+        .ok()
+        .map(|v| matches!(v.as_str(), "1" | "true" | "TRUE" | "True"))
+        .unwrap_or(false);
+
+    let channels: u16 = if force_stereo { 2 } else { 1 };
    let spec = WavSpec {
-        channels: 1,
+        channels,
        sample_rate: 44100,
        bits_per_sample: 32,
        sample_format: SampleFormat::Float,
@@ -183,8 +190,16 @@ pub fn array_to_vec(audio_array: Array3<f32>) -> Result<Vec<u8>> {
    let mut writer = WavWriter::new(&mut cursor, spec)?;
    for i in 0..audio_array.shape()[0] {
        let output = audio_array.slice(s![i, 0, ..]).to_vec();
-        for sample in output {
-            writer.write_sample(sample)?;
+        if force_stereo {
+            for sample in output {
+                // Write to Left and Right channels
+                writer.write_sample(sample)?;
+                writer.write_sample(sample)?;
+            }
+        } else {
+            for sample in output {
+                writer.write_sample(sample)?;
+            }
        }
    }
    writer.finalize()?;
Author	SHA1	Message	Date
tuna2134	62ba2c802f	Merge pull request #235 from kono-dada/fix/inplace-model-load	2025-08-11 23:46:42 +09:00
kono-dada	3c8efc716c	Fix: Load model in-place and safely evict sessions without removing entries - Avoid removing and re-inserting model entries during load - Preserve metadata (bytes, style_vectors) when evicting - Ensure eviction targets a different loaded model, not always the first - Reduce unnecessary memory allocations and keep list order stable	2025-08-11 16:31:57 +08:00
tuna2134	e9ced32b70	fix: streamline tone value handling in JTalkProcess	2025-08-11 17:30:46 +09:00
tuna2134	e7a1575cbc	Merge pull request #233 from kono-dada/feature/stereo-output feat: add stereo synthesis option via SBV2_FORCE_STEREO env var	2025-08-11 17:13:19 +09:00
kono-dada	873bbb77b6	feat: add stereo synthesis option via SBV2_FORCE_STEREO env var Previously, synthesis output was fixed to mono (channels=1). Now, setting the environment variable SBV2_FORCE_STEREO=1 forces stereo (2-channel) output. This allows generating stereo audio without changing the code, useful for users needing dual-channel output.	2025-08-11 11:38:32 +08:00