#4266 TaskCfgVTT(is_cuda=True, uuid='a7b0810154', cache_folder='F:/win-pyvideotrans-v3.99-418/tmp/32316/a7b0810154', target_di

119.237* Posted at: 4 hours ago 👁9

语音识别阶段出错 [openai-whisper(本地)] Traceback (most recent call last):
File "videotrans\process\stt_fun.py", line 114, in openai_whisper
File "whisper\transcribe.py", line 295, in transcribe
File "whisper\transcribe.py", line 201, in decode_with_fallback
File "torch\utils\_contextlib.py", line 116, in decorate_context

return func(*args, **kwargs)

File "whisper\decoding.py", line 824, in decode
File "torch\utils\_contextlib.py", line 116, in decorate_context

return func(*args, **kwargs)

File "whisper\decoding.py", line 737, in run
File "whisper\decoding.py", line 687, in _main_loop
File "whisper\decoding.py", line 163, in logits
File "torch n\modules\module.py", line 1751, in _wrapped_call_impl

return self._call_impl(*args, **kwargs)

File "torch n\modules\module.py", line 1762, in _call_impl

return forward_call(*args, **kwargs)

File "whisper\model.py", line 246, in forward
RuntimeError: CUDA error: unspecified launch failure
Compile with TORCH_USE_CUDA_DSA to enable device-side assertions.

Traceback (most recent call last):
File "videotrans\task\job.py", line 105, in run
File "videotrans\task\trans_create.py", line 361, in recogn
File "videotrans\recognition\__init__.py", line 293, in run
File "videotrans\recognition\_base.py", line 143, in run
File "videotrans\recognition\_overall.py", line 31, in _exec
File "videotrans\recognition\_overall.py", line 73, in _openai
File "videotrans\configure\_base.py", line 289, in _new_process
RuntimeError: Traceback (most recent call last):
File "videotrans\process\stt_fun.py", line 114, in openai_whisper
File "whisper\transcribe.py", line 295, in transcribe
File "whisper\transcribe.py", line 201, in decode_with_fallback
File "torch\utils\_contextlib.py", line 116, in decorate_context

return func(*args, **kwargs)

File "whisper\decoding.py", line 824, in decode
File "torch\utils\_contextlib.py", line 116, in decorate_context

return func(*args, **kwargs)

File "whisper\decoding.py", line 737, in run
File "whisper\decoding.py", line 687, in _main_loop
File "whisper\decoding.py", line 163, in logits
File "torch n\modules\module.py", line 1751, in _wrapped_call_impl

return self._call_impl(*args, **kwargs)

File "torch n\modules\module.py", line 1762, in _call_impl

return forward_call(*args, **kwargs)

File "whisper\model.py", line 246, in forward
RuntimeError: CUDA error: unspecified launch failure
Compile with TORCH_USE_CUDA_DSA to enable device-side assertions.
TaskCfgVTT(is_cuda=True, uuid='a7b0810154', cache_folder='F:/win-pyvideotrans-v3.99-418/tmp/32316/a7b0810154', target_dir='F:/AAA/Fundamentals of Photography/_video_out/04 - Shutter Speeds-mp4', source_language='英语', source_language_code='en', source_sub='F:/AAA/Fundamentals of Photography/_video_out/04 - Shutter Speeds-mp4/en.srt', source_wav='F:/win-pyvideotrans-v3.99-418/tmp/32316/a7b0810154/en.wav', source_wav_output='F:/AAA/Fundamentals of Photography/_video_out/04 - Shutter Speeds-mp4/en.m4a', target_language='简体中文', target_language_code='zh-cn', target_sub='F:/AAA/Fundamentals of Photography/_video_out/04 - Shutter Speeds-mp4/zh-cn.srt', target_wav='F:/win-pyvideotrans-v3.99-418/tmp/32316/a7b0810154/target.wav', target_wav_output='F:/AAA/Fundamentals of Photography/_video_out/04 - Shutter Speeds-mp4/zh-cn.m4a', name='F:/AAA/Fundamentals of Photography/04 - Shutter Speeds.mp4', noextname='04 - Shutter Speeds', basename='04 - Shutter Speeds.mp4', ext='mp4', dirname='F:/AAA/Fundamentals of Photography', shound_del_name=None, translate_type=13, tts_type=0, volume='+0%', pitch='+0Hz', voice_rate='+0%', voice_role='No', voice_autorate=False, video_autorate=False, remove_silent_mid=False, align_sub_audio=True, detect_language='en', recogn_type=1, model_name='large-v3', shibie_audio=None, remove_noise=False, enable_diariz=False, nums_diariz=0, rephrase=0, fix_punc=False, subtitle_language=None, app_mode='biaozhun', subtitles='', targetdir_mp4='F:/AAA/Fundamentals of Photography/_video_out/04 - Shutter Speeds-mp4/04 - Shutter Speeds.mp4', novoice_mp4='F:/win-pyvideotrans-v3.99-418/tmp/32316/a7b0810154/novoice.mp4', is_separate=False, embed_bgm=True, instrument=None, vocal=None, back_audio='', clear_cache=False, background_music=None, subtitle_type=3, only_out_mp4=True, recogn2pass=True, output_srt=2, copysrt_rawvideo=False)


=
system:Windows-10-10.0.26200-SP0
version:v3.99
frozen:True
language:zh
root_dir:F:/win-pyvideotrans-v3.99-418

1 Replies

这是由于 CUDA 运行失败导致的,常见原因有显卡驱动过旧、CUDA 版本不兼容、显存不足或 GPU 被其他程序占用。

解决步骤:

  1. 确认 CUDA 环境:确保已安装 CUDA 12.8 和 cuDNN 9.11+。可在 CMD 中运行 nvcc -V 查看版本,nvidia-smi 查看显卡驱动和可用显存。
  2. 降低显存占用(按顺序尝试)

    • 在语音识别下拉菜单中切换为 small 或 medium 模型(当前使用的是 large-v3,最低需求约10GB显存)。
    • 前往 菜单-工具-高级选项-语音识别参数,将:

      • CUDA数据类型 改为 float16int8
      • beam_size 改为 1
      • best_of 改为 1
      • 若仍报错,选中 Whisper预分割音频 可进一步降低显存。
  3. 关闭其他 GPU 程序:关闭浏览器、游戏等占用显存的应用。
  4. 更新显卡驱动:前往 NVIDIA 官网安装最新驱动。

如果问题依旧,请提供 logs/ 文件夹下的最新日志文件底部30行,以便进一步分析。

请查阅相关文档:

Post Your Reply
Open source and free maintenance is not easy. If this project is helpful to you, please consider making a small donation to help the project continue to maintain and update.

Related resource