#4634 TaskCfg(cache_folder='D:/My Softwares/视频翻译配音 Pyvideotran 3.96/tmp/16092/2b81f05837', target_dir='E:/5/_video_out/[影视源www

120.217* Posted at: 14 hours ago 👁27

语音识别阶段出错 [openai-whisper(本地)] Traceback (most recent call last):
File "videotrans\process\stt_fun.py", line 78, in openai_whisper
File "whisper\transcribe.py", line 295, in transcribe
File "whisper\transcribe.py", line 201, in decode_with_fallback
File "torch\utils\_contextlib.py", line 116, in decorate_context

return func(*args, **kwargs)

File "whisper\decoding.py", line 824, in decode
File "torch\utils\_contextlib.py", line 116, in decorate_context

return func(*args, **kwargs)

File "whisper\decoding.py", line 737, in run
File "whisper\decoding.py", line 703, in _main_loop
File "whisper\decoding.py", line 283, in update
File "torch\distributions\categorical.py", line 73, in init

super().__init__(batch_shape, validate_args=validate_args)

File "torch\distributions\distribution.py", line 72, in init

raise ValueError(

ValueError: Expected parameter logits (Tensor of shape (1, 51866)) of distribution Categorical(logits: torch.Si
......
the constraint IndependentConstraint(Real(), 1), but found invalid values:
tensor([[nan, nan, nan, ..., nan, nan, nan]], device='cuda:0')

Traceback (most recent call last):
File "videotrans\task\job.py", line 106, in run
File "videotrans\task\trans_create.py", line 358, in recogn
File "videotrans\recognition\__init__.py", line 284, in run
File "videotrans\recognition\_base.py", line 141, in run
File "videotrans\recognition\_overall.py", line 61, in _exec
File "videotrans\recognition\_overall.py", line 106, in _openai
File "videotrans\configure\_base.py", line 276, in _new_process
RuntimeError: Traceback (most recent call last):
File "videotrans\process\stt_fun.py", line 78, in openai_whisper
File "whisper\transcribe.py", line 295, in transcribe
File "whisper\transcribe.py", line 201, in decode_with_fallback
File "torch\utils\_contextlib.py", line 116, in decorate_context

return func(*args, **kwargs)

File "whisper\decoding.py", line 824, in decode
File "torch\utils\_contextlib.py", line 116, in decorate_context

return func(*args, **kwargs)

File "whisper\decoding.py", line 737, in run
File "whisper\decoding.py", line 703, in _main_loop
File "whisper\decoding.py", line 283, in update
File "torch\distributions\categorical.py", line 73, in init

super().__init__(batch_shape, validate_args=validate_args)

File "torch\distributions\distribution.py", line 72, in init

raise ValueError(

ValueError: Expected parameter logits (Tensor of shape (1, 51866)) of distribution Categorical(logits: torch.Size([1, 51866])) to satisfy the constraint IndependentConstraint(Real(), 1), but found invalid values:
tensor([[nan, nan, nan, ..., nan, nan, nan]], device='cuda:0')
TaskCfg(cache_folder='D:/My Softwares/视频翻译配音 Pyvideotran 3.96/tmp/16092/2b81f05837', target_dir='E:/5/_video_out/[影视源www.gsy118.com] 偷窥狂和暴露狂 El mirón y la exhibicionista 1986[中文字幕]-mkv', remove_noise=False, is_separate=False, detect_language='en', subtitle_language=None, source_language='英语', target_language='简体中文', source_language_code='en', target_language_code='zh-cn', source_sub='E:/5/_video_out/[影视源www.gsy118.com] 偷窥狂和暴露狂 El mirón y la exhibicionista 1986[中文字幕]-mkv/en.srt', target_sub='E:/5/_video_out/[影视源www.gsy118.com] 偷窥狂和暴露狂 El mirón y la exhibicionista 1986[中文字幕]-mkv/zh-cn.srt', source_wav='D:/My Softwares/视频翻译配音 Pyvideotran 3.96/tmp/16092/2b81f05837/en.wav', source_wav_output='E:/5/_video_out/[影视源www.gsy118.com] 偷窥狂和暴露狂 El mirón y la exhibicionista 1986[中文字幕]-mkv/en.m4a', target_wav='D:/My Softwares/视频翻译配音 Pyvideotran 3.96/tmp/16092/2b81f05837/target.wav', target_wav_output='E:/5/_video_out/[影视源www.gsy118.com] 偷窥狂和暴露狂 El mirón y la exhibicionista 1986[中文字幕]-mkv/zh-cn.m4a', subtitles='', novoice_mp4='D:/My Softwares/视频翻译配音 Pyvideotran 3.96/tmp/16092/2b81f05837/novoice.mp4', noextname='[影视源www.gsy118.com] 偷窥狂和暴露狂 El mirón y la exhibicionista 1986[中文字幕]', shibie_audio=None, targetdir_mp4='E:/5/_video_out/[影视源www.gsy118.com] 偷窥狂和暴露狂 El mirón y la exhibicionista 1986[中文字幕]-mkv/[影视源www.gsy118.com] 偷窥狂和暴露狂 El mirón y la exhibicionista 1986[中文字幕].mp4', instrument=None, vocal=None, back_audio='', background_music=None, app_mode='tiqu', subtitle_type=0, volume='+0%', pitch='+0Hz', voice_rate='+0%', voice_role='No', copysrt_rawvideo=False, clear_cache=False, translate_type=1, tts_type=0, recogn_type=1, model_name='large-v3-turbo', voice_autorate=False, video_autorate=False, cuda=True, name='E:/5/[影视源www.gsy118.com] 偷窥狂和暴露狂 El mirón y la exhibicionista 1986[中文字幕].mkv', basename='[影视源www.gsy118.com] 偷窥狂和暴露狂 El mirón y la exhibicionista 1986[中文字幕].mkv', ext='mkv', dirname='E:/5', shound_del_name=None, uuid='2b81f05837', remove_silent_mid=False, align_sub_audio=False, enable_diariz=False, nums_diariz=0, rephrase=0, only_out_mp4=False, fix_punc=False, recogn2pass=True)


=
system:Windows-10-10.0.26200-SP0
version:v3.96
frozen:True
language:zh
root_dir:D:/My Softwares/视频翻译配音 Pyvideotran 3.96

1 Replies

这是由于 openai-whisper 解码时产生了 NaN(无效数值),通常由 GPU 计算不稳定、音频文件异常或显存不足引起。

解决方案:

  1. 改用 faster-whisper:在“语音识别渠道”下拉菜单中选择 faster-whisper(本地),这是更稳定的替代方案。
  2. 降低 GPU 计算精度:进入 菜单 → 工具 → 高级选项 → 语音识别参数,将 CUDA数据类型 改为 float16int8,同时将 beam_sizebest_of 改为 1
  3. 尝试不使用 CUDA 加速:取消勾选主界面的 CUDA加速,用 CPU 运行(速度较慢但可排查 GPU 问题)。
  4. 检查文件路径:你的视频文件名包含特殊符号(如 []、中文字符、空格),建议将视频重命名为简短纯英文(如 video.mkv),并移到浅层目录(如 D:\videos\)。
  5. 升级到最新版:当前为 v3.96,请升级至 v3.99,该版本已修复许多相关问题。
  6. 临时关闭二次识别:取消勾选主界面的 二次识别,可降低显存占用。

请查阅相关文档:

Post Your Reply
Open source and free maintenance is not easy. If this project is helpful to you, please consider making a small donation to help the project continue to maintain and update.

Related resource

Similar issues already exist