Merge pull request #4 from 1319224734/fix_30s_audio

fix audio longer than 30s
xingchensong · Sep 20, 2024 · c4f6c56 · c4f6c56
2 parents 1647c91 + 0545950
commit c4f6c56
Showing 1 changed file with 5 additions and 1 deletion.
diff --git a/s3tokenizer/cli.py b/s3tokenizer/cli.py
@@ -60,7 +60,11 @@ def __getitem__(self, idx):
         file_path = self.data[idx]
         key = self.keys[idx]
         audio = s3tokenizer.load_audio(file_path)
-        mel = s3tokenizer.log_mel_spectrogram(audio)
+        if audio.shape[0] / 16000 > 30:
+            print(f'do not support extract speech token for audio longer than 30s, file_path: {file_path}')
+            mel = torch.zeros(128, 0)
+        else:
+            mel = s3tokenizer.log_mel_spectrogram(audio)
         return key, mel