From 2084eabd0c3497468b5d97e6de921bfd10fee325 Mon Sep 17 00:00:00 2001
From: laurent <laurent.mazare@gmail.com>
Date: Tue, 26 Aug 2025 12:55:31 +0200
Subject: [PATCH] Enable ruff in the pre-commit hooks.

---
 .pre-commit-config.yaml | 14 ++++++++++++++
 scripts/tts_pytorch.py  |  6 ++++++
 stt_pytorch.ipynb       |  1 -
 tts_pytorch.ipynb       | 18 ++++++++----------
 4 files changed, 28 insertions(+), 11 deletions(-)

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index e2f0230..19d9d43 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -1,4 +1,18 @@
 repos:
+  - repo: local
+    hooks:
+    - id: ruff
+      name: ruff
+      language: system
+      entry: bash -c 'uvx ruff check'
+      pass_filenames: false
+      always_run: true
+    - id: ruff-format
+      name: ruff format
+      language: system
+      entry: bash -c 'uvx ruff format --check'
+      pass_filenames: false
+      always_run: true
     # Get rid of Jupyter Notebook output because we don't want to keep it in Git
   - repo: https://github.com/kynan/nbstripout
     rev: 0.8.1
diff --git a/scripts/tts_pytorch.py b/scripts/tts_pytorch.py
index 424d600..5ba58fa 100644
--- a/scripts/tts_pytorch.py
+++ b/scripts/tts_pytorch.py
@@ -117,16 +117,22 @@ def main():
                     break
                 time.sleep(1)
     else:
+        last_time = time.time()
 
         def _on_frame(frame):
             nonlocal _frames_cnt
+            nonlocal last_time
             if (frame != -1).all():
                 _frames_cnt += 1
                 print(f"generated {_frames_cnt / 12.5:.2f}s", end="\r", flush=True)
+            print("{}", time.time() - last_time)
+            last_time = time.time()
 
+        start_time = time.time()
         result = tts_model.generate(
             [entries], [condition_attributes], on_frame=_on_frame
         )
+        print(f"\nTotal time: {time.time() - start_time:.2f}s")
         with tts_model.mimi.streaming(1), torch.no_grad():
             pcms = []
             for frame in result.frames[tts_model.delay_steps :]:
diff --git a/stt_pytorch.ipynb b/stt_pytorch.ipynb
index acad8e5..072776b 100644
--- a/stt_pytorch.ipynb
+++ b/stt_pytorch.ipynb
@@ -80,7 +80,6 @@
     "        self.lm_gen.streaming_forever(batch_size)\n",
     "\n",
     "    def run(self, in_pcms: torch.Tensor):\n",
-    "        device = self.lm_gen.lm_model.device\n",
     "        ntokens = 0\n",
     "        first_frame = True\n",
     "        chunks = [\n",
diff --git a/tts_pytorch.ipynb b/tts_pytorch.ipynb
index e13eed8..7c722da 100644
--- a/tts_pytorch.ipynb
+++ b/tts_pytorch.ipynb
@@ -21,9 +21,6 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "import argparse\n",
-    "import sys\n",
-    "\n",
     "import numpy as np\n",
     "import torch\n",
     "from moshi.models.loaders import CheckpointInfo\n",
@@ -64,9 +61,7 @@
     "# CFG coef goes here because the model was trained with CFG distillation,\n",
     "# so it's not _actually_ doing CFG at inference time.\n",
     "# Also, if you are generating a dialog, you should have two voices in the list.\n",
-    "condition_attributes = tts_model.make_condition_attributes(\n",
-    "    [voice_path], cfg_coef=2.0\n",
-    ")"
+    "condition_attributes = tts_model.make_condition_attributes([voice_path], cfg_coef=2.0)"
    ]
   },
   {
@@ -79,17 +74,22 @@
     "print(\"Generating audio...\")\n",
     "\n",
     "pcms = []\n",
+    "\n",
+    "\n",
     "def _on_frame(frame):\n",
     "    print(\"Step\", len(pcms), end=\"\\r\")\n",
     "    if (frame != -1).all():\n",
     "        pcm = tts_model.mimi.decode(frame[:, 1:, :]).cpu().numpy()\n",
     "        pcms.append(np.clip(pcm[0, 0], -1, 1))\n",
     "\n",
+    "\n",
     "# You could also generate multiple audios at once by extending the following lists.\n",
     "all_entries = [entries]\n",
     "all_condition_attributes = [condition_attributes]\n",
     "with tts_model.mimi.streaming(len(all_entries)):\n",
-    "    result = tts_model.generate(all_entries, all_condition_attributes, on_frame=_on_frame)\n",
+    "    result = tts_model.generate(\n",
+    "        all_entries, all_condition_attributes, on_frame=_on_frame\n",
+    "    )\n",
     "\n",
     "print(\"Done generating.\")\n",
     "audio = np.concatenate(pcms, axis=-1)"
@@ -102,9 +102,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "display(\n",
-    "    Audio(audio, rate=tts_model.mimi.sample_rate, autoplay=True)\n",
-    ")"
+    "display(Audio(audio, rate=tts_model.mimi.sample_rate, autoplay=True))"
    ]
   },
   {