From 2084eabd0c3497468b5d97e6de921bfd10fee325 Mon Sep 17 00:00:00 2001 From: laurent Date: Tue, 26 Aug 2025 12:55:31 +0200 Subject: [PATCH] Enable ruff in the pre-commit hooks. --- .pre-commit-config.yaml | 14 ++++++++++++++ scripts/tts_pytorch.py | 6 ++++++ stt_pytorch.ipynb | 1 - tts_pytorch.ipynb | 18 ++++++++---------- 4 files changed, 28 insertions(+), 11 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index e2f0230..19d9d43 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,4 +1,18 @@ repos: + - repo: local + hooks: + - id: ruff + name: ruff + language: system + entry: bash -c 'uvx ruff check' + pass_filenames: false + always_run: true + - id: ruff-format + name: ruff format + language: system + entry: bash -c 'uvx ruff format --check' + pass_filenames: false + always_run: true # Get rid of Jupyter Notebook output because we don't want to keep it in Git - repo: https://github.com/kynan/nbstripout rev: 0.8.1 diff --git a/scripts/tts_pytorch.py b/scripts/tts_pytorch.py index 424d600..5ba58fa 100644 --- a/scripts/tts_pytorch.py +++ b/scripts/tts_pytorch.py @@ -117,16 +117,22 @@ def main(): break time.sleep(1) else: + last_time = time.time() def _on_frame(frame): nonlocal _frames_cnt + nonlocal last_time if (frame != -1).all(): _frames_cnt += 1 print(f"generated {_frames_cnt / 12.5:.2f}s", end="\r", flush=True) + print("{}", time.time() - last_time) + last_time = time.time() + start_time = time.time() result = tts_model.generate( [entries], [condition_attributes], on_frame=_on_frame ) + print(f"\nTotal time: {time.time() - start_time:.2f}s") with tts_model.mimi.streaming(1), torch.no_grad(): pcms = [] for frame in result.frames[tts_model.delay_steps :]: diff --git a/stt_pytorch.ipynb b/stt_pytorch.ipynb index acad8e5..072776b 100644 --- a/stt_pytorch.ipynb +++ b/stt_pytorch.ipynb @@ -80,7 +80,6 @@ " self.lm_gen.streaming_forever(batch_size)\n", "\n", " def run(self, in_pcms: torch.Tensor):\n", - " device = self.lm_gen.lm_model.device\n", " ntokens = 0\n", " first_frame = True\n", " chunks = [\n", diff --git a/tts_pytorch.ipynb b/tts_pytorch.ipynb index e13eed8..7c722da 100644 --- a/tts_pytorch.ipynb +++ b/tts_pytorch.ipynb @@ -21,9 +21,6 @@ "metadata": {}, "outputs": [], "source": [ - "import argparse\n", - "import sys\n", - "\n", "import numpy as np\n", "import torch\n", "from moshi.models.loaders import CheckpointInfo\n", @@ -64,9 +61,7 @@ "# CFG coef goes here because the model was trained with CFG distillation,\n", "# so it's not _actually_ doing CFG at inference time.\n", "# Also, if you are generating a dialog, you should have two voices in the list.\n", - "condition_attributes = tts_model.make_condition_attributes(\n", - " [voice_path], cfg_coef=2.0\n", - ")" + "condition_attributes = tts_model.make_condition_attributes([voice_path], cfg_coef=2.0)" ] }, { @@ -79,17 +74,22 @@ "print(\"Generating audio...\")\n", "\n", "pcms = []\n", + "\n", + "\n", "def _on_frame(frame):\n", " print(\"Step\", len(pcms), end=\"\\r\")\n", " if (frame != -1).all():\n", " pcm = tts_model.mimi.decode(frame[:, 1:, :]).cpu().numpy()\n", " pcms.append(np.clip(pcm[0, 0], -1, 1))\n", "\n", + "\n", "# You could also generate multiple audios at once by extending the following lists.\n", "all_entries = [entries]\n", "all_condition_attributes = [condition_attributes]\n", "with tts_model.mimi.streaming(len(all_entries)):\n", - " result = tts_model.generate(all_entries, all_condition_attributes, on_frame=_on_frame)\n", + " result = tts_model.generate(\n", + " all_entries, all_condition_attributes, on_frame=_on_frame\n", + " )\n", "\n", "print(\"Done generating.\")\n", "audio = np.concatenate(pcms, axis=-1)" @@ -102,9 +102,7 @@ "metadata": {}, "outputs": [], "source": [ - "display(\n", - " Audio(audio, rate=tts_model.mimi.sample_rate, autoplay=True)\n", - ")" + "display(Audio(audio, rate=tts_model.mimi.sample_rate, autoplay=True))" ] }, {