#4322 TaskCfgVTT(is_cuda=True, uuid='addc14beda', cache_folder='F:/win-pyvideotrans-v3.99-418/tmp/13256/addc14beda', target_di

220.246* Posted at: 4 hours ago 👁7

语音识别阶段出错 [openai-whisper(本地)] Traceback (most recent call last):
File "videotrans\process\stt_fun.py", line 114, in openai_whisper
File "whisper\transcribe.py", line 295, in transcribe
File "whisper\transcribe.py", line 201, in decode_with_fallback
File "torch\utils\_contextlib.py", line 116, in decorate_context

return func(*args, **kwargs)

File "whisper\decoding.py", line 824, in decode
File "torch\utils\_contextlib.py", line 116, in decorate_context

return func(*args, **kwargs)

File "whisper\decoding.py", line 718, in run
File "whisper\decoding.py", line 655, in _get_audio_features
File "torch n\modules\module.py", line 1751, in _wrapped_call_impl

return self._call_impl(*args, **kwargs)

File "torch n\modules\module.py", line 1762, in _call_impl

return forward_call(*args, **kwargs)

File "whisper\model.py", line 201, in forward
File "torch n\modules\module.py", line 1751, in _wrapped_call_impl

return self._call_impl(*args, **

......
th_fallback
File "torch\utils\_contextlib.py", line 116, in decorate_context

return func(*args, **kwargs)

File "whisper\decoding.py", line 824, in decode
File "torch\utils\_contextlib.py", line 116, in decorate_context

return func(*args, **kwargs)

File "whisper\decoding.py", line 718, in run
File "whisper\decoding.py", line 655, in _get_audio_features
File "torch n\modules\module.py", line 1751, in _wrapped_call_impl

return self._call_impl(*args, **kwargs)

File "torch n\modules\module.py", line 1762, in _call_impl

return forward_call(*args, **kwargs)

File "whisper\model.py", line 201, in forward
File "torch n\modules\module.py", line 1751, in _wrapped_call_impl

return self._call_impl(*args, **kwargs)

File "torch n\modules\module.py", line 1762, in _call_impl

return forward_call(*args, **kwargs)

File "whisper\model.py", line 170, in forward
File "torch n\modules\module.py", line 1751, in _wrapped_call_impl

return self._call_impl(*args, **kwargs)

File "torch n\modules\module.py", line 1762, in _call_impl

return forward_call(*args, **kwargs)

File "torch n\modules\container.py", line 240, in forward

input = module(input)

File "torch n\modules\module.py", line 1751, in _wrapped_call_impl

return self._call_impl(*args, **kwargs)

File "torch n\modules\module.py", line 1762, in _call_impl

return forward_call(*args, **kwargs)

File "whisper\model.py", line 48, in forward
RuntimeError: CUDA error: unspecified launch failure
Compile with TORCH_USE_CUDA_DSA to enable device-side assertions.
TaskCfgVTT(is_cuda=True, uuid='addc14beda', cache_folder='F:/win-pyvideotrans-v3.99-418/tmp/13256/addc14beda', target_dir='D:/AAA/Berkshire Hathaway Annual Meetings/_video_out/1998 Berkshire Hathaway Annual Meeting (Full Version)-mp4', source_language='英语', source_language_code='en', source_sub='D:/AAA/Berkshire Hathaway Annual Meetings/_video_out/1998 Berkshire Hathaway Annual Meeting (Full Version)-mp4/en.srt', source_wav='F:/win-pyvideotrans-v3.99-418/tmp/13256/addc14beda/en.wav', source_wav_output='D:/AAA/Berkshire Hathaway Annual Meetings/_video_out/1998 Berkshire Hathaway Annual Meeting (Full Version)-mp4/en.m4a', target_language='简体中文', target_language_code='zh-cn', target_sub='D:/AAA/Berkshire Hathaway Annual Meetings/_video_out/1998 Berkshire Hathaway Annual Meeting (Full Version)-mp4/zh-cn.srt', target_wav='F:/win-pyvideotrans-v3.99-418/tmp/13256/addc14beda/target.wav', target_wav_output='D:/AAA/Berkshire Hathaway Annual Meetings/_video_out/1998 Berkshire Hathaway Annual Meeting (Full Version)-mp4/zh-cn.m4a', name='D:/AAA/Berkshire Hathaway Annual Meetings/1998 Berkshire Hathaway Annual Meeting (Full Version).mp4', noextname='1998 Berkshire Hathaway Annual Meeting (Full Version)', basename='1998 Berkshire Hathaway Annual Meeting (Full Version).mp4', ext='mp4', dirname='D:/AAA/Berkshire Hathaway Annual Meetings', shound_del_name=None, translate_type=13, tts_type=0, volume='+0%', pitch='+0Hz', voice_rate='+0%', voice_role='No', voice_autorate=False, video_autorate=False, remove_silent_mid=False, align_sub_audio=True, detect_language='en', recogn_type=1, model_name='large-v3-turbo', shibie_audio=None, remove_noise=False, enable_diariz=False, nums_diariz=0, rephrase=0, fix_punc=False, subtitle_language=None, app_mode='biaozhun', subtitles='', targetdir_mp4='D:/AAA/Berkshire Hathaway Annual Meetings/_video_out/1998 Berkshire Hathaway Annual Meeting (Full Version)-mp4/1998 Berkshire Hathaway Annual Meeting (Full Version).mp4', novoice_mp4='F:/win-pyvideotrans-v3.99-418/tmp/13256/addc14beda/novoice.mp4', is_separate=False, embed_bgm=True, instrument=None, vocal=None, back_audio='', clear_cache=False, background_music=None, subtitle_type=1, only_out_mp4=True, recogn2pass=True, output_srt=2, copysrt_rawvideo=False)


=
system:Windows-10-10.0.26200-SP0
version:v3.99
frozen:True
language:zh
root_dir:F:/win-pyvideotrans-v3.99-418

1 Replies

这是由于 CUDA 环境异常导致的语音识别启动失败,常见原因包括 CUDA/cuDNN 版本不匹配、驱动过旧或显存不足。

解决步骤:

  1. 检查 CUDA 和 cuDNN 版本:软件需要 CUDA 12.8 和 cuDNN 9.11 或更高版本。在命令行执行 nvcc -Vnvidia-smi 确认。若不满足,请参照文档重新安装。
  2. 更新显卡驱动:确保 NVIDIA 驱动为最新。
  3. 降低显存占用:尝试将模型从 large-v3-turbo 更换为 smallmedium;同时在菜单 工具/选项 → 高级选项 → 语音识别参数 中将 beam_size 改为 1best_of 改为 1、CUDA 数据类型改为 int8
  4. 排除 GPU 问题:临时取消勾选主界面的 CUDA加速,改用 CPU 运行一次,若能成功则说明问题在 GPU 环境。
  5. 50 系显卡特殊处理:若您使用的是 RTX 50 系列显卡,可能需参照特定解决方案调整 PyTorch 或 CUDA 启动参数。

请查阅相关文档:

Post Your Reply
Open source and free maintenance is not easy. If this project is helpful to you, please consider making a small donation to help the project continue to maintain and update.

Related resource