#4222 TaskCfgVTT(is_cuda=True, uuid='5ecec1af2c', cache_folder='F:/win-pyvideotrans-v3.99-418/tmp/28784/5ecec1af2c', target_di

220.246* Posted at: 2 hours ago 👁8

语音识别阶段出错 [openai-whisper(本地)] Traceback (most recent call last):
File "videotrans\process\stt_fun.py", line 114, in openai_whisper
File "whisper\transcribe.py", line 295, in transcribe
File "whisper\transcribe.py", line 201, in decode_with_fallback
File "torch\utils\_contextlib.py", line 116, in decorate_context

return func(*args, **kwargs)

File "whisper\decoding.py", line 824, in decode
File "torch\utils\_contextlib.py", line 116, in decorate_context

return func(*args, **kwargs)

File "whisper\decoding.py", line 737, in run
File "whisper\decoding.py", line 687, in _main_loop
File "whisper\decoding.py", line 163, in logits
File "torch n\modules\module.py", line 1751, in _wrapped_call_impl

return self._call_impl(*args, **kwargs)

File "torch n\modules\module.py", line 1762, in _call_impl

return forward_call(*args, **kwargs)

File "whisper\model.py", line 244, in forward
File "torch n\modules\module.py", line 1751, in wrapped_call
......
e "torch n\functional.py", line 2910, in layer_norm

return torch.layer_norm(

RuntimeError: CUDA error: unspecified launch failure
Compile with TORCH_USE_CUDA_DSA to enable device-side assertions.

Traceback (most recent call last):
File "videotrans\task\job.py", line 105, in run
File "videotrans\task\trans_create.py", line 361, in recogn
File "videotrans\recognition\__init__.py", line 293, in run
File "videotrans\recognition\_base.py", line 143, in run
File "videotrans\recognition\_overall.py", line 31, in _exec
File "videotrans\recognition\_overall.py", line 73, in _openai
File "videotrans\configure\_base.py", line 289, in _new_process
RuntimeError: Traceback (most recent call last):
File "videotrans\process\stt_fun.py", line 114, in openai_whisper
File "whisper\transcribe.py", line 295, in transcribe
File "whisper\transcribe.py", line 201, in decode_with_fallback
File "torch\utils\_contextlib.py", line 116, in decorate_context

return func(*args, **kwargs)

File "whisper\decoding.py", line 824, in decode
File "torch\utils\_contextlib.py", line 116, in decorate_context

return func(*args, **kwargs)

File "whisper\decoding.py", line 737, in run
File "whisper\decoding.py", line 687, in _main_loop
File "whisper\decoding.py", line 163, in logits
File "torch n\modules\module.py", line 1751, in _wrapped_call_impl

return self._call_impl(*args, **kwargs)

File "torch n\modules\module.py", line 1762, in _call_impl

return forward_call(*args, **kwargs)

File "whisper\model.py", line 244, in forward
File "torch n\modules\module.py", line 1751, in _wrapped_call_impl

return self._call_impl(*args, **kwargs)

File "torch n\modules\module.py", line 1762, in _call_impl

return forward_call(*args, **kwargs)

File "whisper\model.py", line 41, in forward
File "torch n\modules ormalization.py", line 217, in forward

return F.layer_norm(

File "torch n\functional.py", line 2910, in layer_norm

return torch.layer_norm(

RuntimeError: CUDA error: unspecified launch failure
Compile with TORCH_USE_CUDA_DSA to enable device-side assertions.
TaskCfgVTT(is_cuda=True, uuid='5ecec1af2c', cache_folder='F:/win-pyvideotrans-v3.99-418/tmp/28784/5ecec1af2c', target_dir='D:/IDM/_video_out/第13回 まとめ\u3000新しい世界史へ 後藤 春美-mp4', source_language='日语', source_language_code='ja', source_sub='D:/IDM/_video_out/第13回 まとめ\u3000新しい世界史へ 後藤 春美-mp4/ja.srt', source_wav='F:/win-pyvideotrans-v3.99-418/tmp/28784/5ecec1af2c/ja.wav', source_wav_output='D:/IDM/_video_out/第13回 まとめ\u3000新しい世界史へ 後藤 春美-mp4/ja.m4a', target_language='简体中文', target_language_code='zh-cn', target_sub='D:/IDM/_video_out/第13回 まとめ\u3000新しい世界史へ 後藤 春美-mp4/zh-cn.srt', target_wav='F:/win-pyvideotrans-v3.99-418/tmp/28784/5ecec1af2c/target.wav', target_wav_output='D:/IDM/_video_out/第13回 まとめ\u3000新しい世界史へ 後藤 春美-mp4/zh-cn.m4a', name='D:/IDM/第13回 まとめ\u3000新しい世界史へ 後藤 春美.mp4', noextname='第13回 まとめ\u3000新しい世界史へ 後藤 春美', basename='第13回 まとめ\u3000新しい世界史へ 後藤 春美.mp4', ext='mp4', dirname='D:/IDM', shound_del_name=None, translate_type=3, tts_type=0, volume='+0%', pitch='+0Hz', voice_rate='+0%', voice_role='No', voice_autorate=False, video_autorate=False, remove_silent_mid=False, align_sub_audio=True, detect_language='ja', recogn_type=1, model_name='large-v3', shibie_audio=None, remove_noise=False, enable_diariz=False, nums_diariz=0, rephrase=0, fix_punc=False, subtitle_language=None, app_mode='biaozhun', subtitles='', targetdir_mp4='D:/IDM/_video_out/第13回 まとめ\u3000新しい世界史へ 後藤 春美-mp4/第13回 まとめ\u3000新しい世界史へ 後藤 春美.mp4', novoice_mp4='F:/win-pyvideotrans-v3.99-418/tmp/28784/5ecec1af2c/novoice.mp4', is_separate=False, embed_bgm=True, instrument=None, vocal=None, back_audio='', clear_cache=False, background_music=None, subtitle_type=3, only_out_mp4=True, recogn2pass=True, output_srt=2, copysrt_rawvideo=False)


=
system:Windows-10-10.0.26200-SP0
version:v3.99
frozen:True
language:zh
root_dir:F:/win-pyvideotrans-v3.99-418

1 Replies

这是 CUDA 相关的 GPU 计算错误,通常由显存不足、CUDA 版本不兼容或显卡驱动问题导致。

  1. 检查并降低显存占用

    • 如果启用了“二次识别”,请先取消勾选。同时处理多个任务时,二次识别会显著增加显存需求。
    • 尝试使用更小的语音识别模型,例如将 large-v3 更换为 mediumsmall
    • 菜单 -> 工具/选项 -> 高级选项 -> 语音识别参数 中,进行以下调整以降低显存需求:

      • CUDA数据类型:从 float32 改为 float16int8
      • 识别准确度beam_size:从 5 改为 1
      • 识别准确度best_of:从 5 改为 1
      • 启用上下文感知:设置为 false
  2. 验证 CUDA 环境

    • 确保已正确安装 CUDA 12.8 及对应版本的 cuDNN
    • 在命令行中输入 nvcc -Vnvidia-smi 检查 CUDA 版本和显卡驱动状态。
    • 更新 NVIDIA 显卡驱动到最新版本。
  3. 调整高级设置

    • 菜单 -> 工具/选项 -> 高级选项 -> 通用设置 中,将 GPU同时任务数 设置为 1(修改后需重启软件生效)。
  4. 处理长视频

    • 如果视频较长,可在 高级选项 -> 语音识别参数 中,勾选 Whisper预分割音频,这有助于降低单次处理的显存峰值。

请查阅相关文档:

Post Your Reply
Open source and free maintenance is not easy. If this project is helpful to you, please consider making a small donation to help the project continue to maintain and update.

Related resource