#5214 TaskCfgSTT(uuid='2d65212b45', name='E:/DownLoad/DCL/ASHLEY MASON – MOMMY PANTY ANAL.mp4', dirname='E:/DownLoad/DCL', noe

106.61* Posted at: 2 hours ago

语音识别阶段出错[openai-whisper(本地)] Expected parameter logits (Tensor of shape (1, 51866)) of distribution Categorical(logits: torch.Size([1, 51866])) to satisfy the constraint IndependentConstraint(Real(), 1), but found invalid values:
tensor([[nan, nan, nan, ..., nan, nan, nan]], device='cuda:0'):Traceback (most recent call last):
File "videotrans\process\stt_fun.py", line 102, in openai_whisper
File "whisper\transcribe.py", line 295, in transcribe
File "whisper\transcribe.py", line 201, in decode_with_fallback
File "torch\utils\_contextlib.py", line 116, in decorate_context

return func(*args, **kwargs)

File "whisper\decoding.py", line 824, in decode
File "torch\utils\_contextlib.py", line 116, in decorate_context

return func(*args, **kwargs)

File "whisper\decoding.py", line 737, in run
File "whisper\decoding.py", line 703, in _main_loop
File "whisper\decoding.py", line 283, in update
File "torch\distributions\categorical.py", line 73, in init

super().__init__(batch_shape, validate_args=validate_args)

File "torch\distributions\distribution.py", line 72, in init

raise ValueError(

ValueError: Expected parameter logits (Tensor of shape (1, 51866)) of distribution Categorical(logits: torch.Size([1, 51866])) to satisfy the constraint IndependentConstraint(Real(), 1), but found invalid values:
tensor([[nan, nan, nan, ..., nan, nan, nan]], device='cuda:0')

Traceback (most recent call last):

File "videotrans\task\job.py", line 35, in run

File "videotrans\task\job.py", line 100, in process_task

File "videotrans\task\speech2text.py", line 126, in recogn

File "videotrans\recognition\__init__.py", line 190, in run

File "videotrans\recognition\_base.py", line 94, in run

File "videotrans\recognition\_whisper.py", line 34, in _exec

File "videotrans\recognition\_whisper.py", line 77, in _openai

File "videotrans\configure\base.py", line 253, in _new_process

videotrans.configure.excepts.VideoTransError: Expected parameter logits (Tensor of shape (1, 51866)) of distribution Categorical(logits: torch.Size([1, 51866])) to satisfy the constraint IndependentConstraint(Real(), 1), but found invalid values:
tensor([[nan, nan, nan, ..., nan, nan, nan]], device='cuda:0'):Traceback (most recent call last):
File "videotrans\process\stt_fun.py", line 102, in openai_whisper
File "whisper\transcribe.py", line 295, in transcribe
File "whisper\transcribe.py", line 201, in decode_with_fallback
File "torch\utils\_contextlib.py", line 116, in decorate_context

return func(*args, **kwargs)

File "whisper\decoding.py", line 824, in decode
File "torch\utils\_contextlib.py", line 116, in decorate_context

return func(*args, **kwargs)

File "whisper\decoding.py", line 737, in run
File "whisper\decoding.py", line 703, in _main_loop
File "whisper\decoding.py", line 283, in update
File "torch\distributions\categorical.py", line 73, in init

super().__init__(batch_shape, validate_args=validate_args)

File "torch\distributions\distribution.py", line 72, in init

raise ValueError(

ValueError: Expected parameter logits (Tensor of shape (1, 51866)) of distribution Categorical(logits: torch.Size([1, 51866])) to satisfy the constraint IndependentConstraint(Real(), 1), but found invalid values:
tensor([[nan, nan, nan, ..., nan, nan, nan]], device='cuda:0')
TaskCfgSTT(uuid='2d65212b45', name='E:/DownLoad/DCL/ASHLEY MASON – MOMMY PANTY ANAL.mp4', dirname='E:/DownLoad/DCL', noextname='ASHLEY MASON – MOMMY PANTY ANAL', basename='ASHLEY MASON – MOMMY PANTY ANAL.mp4', ext='mp4', target_dir='D:/win-pyvideotrans-v4.01-0602/output/recogn', cache_folder='D:/win-pyvideotrans-v4.01-0602/tmp/11692/2d65212b45', is_cuda=True, source_language=None, source_language_code=None, source_sub=None, source_wav=None, source_wav_output=None, target_language=None, target_language_code=None, target_sub='D:/win-pyvideotrans-v4.01-0602/output/recogn/ASHLEY MASON – MOMMY PANTY ANAL.srt', target_wav=None, target_wav_output=None, detect_language='auto', recogn_type=1, model_name='large-v3-turbo', shibie_audio='D:/win-pyvideotrans-v4.01-0602/tmp/11692/2d65212b45/ASHLEY MASON – MOMMY PANTY ANAL-1781529475.0595853.wav', remove_noise=False, enable_diariz=False, nums_diariz=0, rephrase=0, fix_punc=False)


=
system:Windows-10-10.0.26100-SP0
version:v4.01
frozen:True
language:zh
root_dir:D:/win-pyvideotrans-v4.01-0602

1 Replies

核心原因是 显存不足或数值计算不稳定 导致 GPU 计算产生了 NaN(无穷大/无效值)。openai-whisper(本地)large-v3-turbo 模型对显存要求较高(建议 8GB+),一旦显存撞墙或驱动/环境不匹配,就会报此类 logits 包含 nan 的错误。

按顺序尝试以下解决方案(任选其一即可):

  1. 降低 ASR 模型规模
    在主界面语音识别渠道中,将模型从 large-v3-turbo 改为 smallmedium,它们显存占用更低,稳定性更高。
  2. 修改计算数据类型(最推荐)
    进入 菜单 → 工具 → 高级选项 → 语音识别参数,将 计算数据类型 改为 float16(若仍失败则试 int8)。float16 能在几乎不影响准确度的情况下大幅降低显存占用。
  3. 临时关闭 CUDA 加速,使用 CPU 验证
    取消勾选主界面的 CUDA加速,用 CPU 跑一次。若 CPU 能正常完成,则确认为 GPU 显存/驱动问题,需按上面步骤调优。
  4. 减少搜索深度
    在同一个高级选项面板中,将 beam_sizebest_of 都设为 1,降低显存和计算压力。
  5. 关闭上下文感知
    同样在高级选项→语音识别参数中,取消勾选 启用上下文感知

请查阅相关文档:

Post Your Reply