#3327 TaskCfg(cache_folder='D:/win-pyvideotrans-v3.97/tmp/2076/47bbb1cd19', target_dir='E:/DownLoads/DCL/_video_out/Breeding M

240e:44d* Posted at: 9 hours ago 👁11

语音识别阶段出错 [openai-whisper(本地)] Traceback (most recent call last):
File "videotrans\process\stt_fun.py", line 80, in openai_whisper
File "whisper\transcribe.py", line 295, in transcribe
File "whisper\transcribe.py", line 201, in decode_with_fallback
File "torch\utils\_contextlib.py", line 116, in decorate_context

return func(*args, **kwargs)

File "whisper\decoding.py", line 824, in decode
File "torch\utils\_contextlib.py", line 116, in decorate_context

return func(*args, **kwargs)

File "whisper\decoding.py", line 737, in run
File "whisper\decoding.py", line 703, in _main_loop
File "whisper\decoding.py", line 283, in update
File "torch\distributions\categorical.py", line 73, in init

super().__init__(batch_shape, validate_args=validate_args)

File "torch\distributions\distribution.py", line 72, in init

raise ValueError(

ValueError: Expected parameter logits (Tensor of shape (1, 51866)) of distribution Categorical(logits: torch.Size([1, 51866])) to satisfy the constraint IndependentConstraint(Real(), 1), but found invalid values:
tensor([[nan, nan, nan, ..., nan, nan, nan]], device='cuda:0')

Traceback (most recent call last):
File "videotrans\task\job.py", line 106, in run
File "videotrans\task\trans_create.py", line 358, in recogn
File "videotrans\recognition\__init__.py", line 268, in run
File "videotrans\recognition\_base.py", line 141, in run
File "videotrans\recognition\_overall.py", line 61, in _exec
File "videotrans\recognition\_overall.py", line 106, in _openai
File "videotrans\configure\_base.py", line 291, in _new_process
RuntimeError: Traceback (most recent call last):
File "videotrans\process\stt_fun.py", line 80, in openai_whisper
File "whisper\transcribe.py", line 295, in transcribe
File "whisper\transcribe.py", line 201, in decode_with_fallback
File "torch\utils\_contextlib.py", line 116, in decorate_context

return func(*args, **kwargs)

File "whisper\decoding.py", line 824, in decode
File "torch\utils\_contextlib.py", line 116, in decorate_context

return func(*args, **kwargs)

File "whisper\decoding.py", line 737, in run
File "whisper\decoding.py", line 703, in _main_loop
File "whisper\decoding.py", line 283, in update
File "torch\distributions\categorical.py", line 73, in init

super().__init__(batch_shape, validate_args=validate_args)

File "torch\distributions\distribution.py", line 72, in init

raise ValueError(

ValueError: Expected parameter logits (Tensor of shape (1, 51866)) of distribution Categorical(logits: torch.Size([1, 51866])) to satisfy the constraint IndependentConstraint(Real(), 1), but found invalid values:
tensor([[nan, nan, nan, ..., nan, nan, nan]], device='cuda:0')
TaskCfg(cache_folder='D:/win-pyvideotrans-v3.97/tmp/2076/47bbb1cd19', target_dir='E:/DownLoads/DCL/_video_out/Breeding My Hot Mommy – Mira Sangre-mp4', remove_noise=False, is_separate=False, detect_language='en', subtitle_language=None, source_language='英语', target_language='简体中文', source_language_code='en', target_language_code='zh-cn', source_sub='E:/DownLoads/DCL/_video_out/Breeding My Hot Mommy – Mira Sangre-mp4/en.srt', target_sub='E:/DownLoads/DCL/_video_out/Breeding My Hot Mommy – Mira Sangre-mp4/zh-cn.srt', source_wav='D:/win-pyvideotrans-v3.97/tmp/2076/47bbb1cd19/en.wav', source_wav_output='E:/DownLoads/DCL/_video_out/Breeding My Hot Mommy – Mira Sangre-mp4/en.m4a', target_wav='D:/win-pyvideotrans-v3.97/tmp/2076/47bbb1cd19/target.wav', target_wav_output='E:/DownLoads/DCL/_video_out/Breeding My Hot Mommy – Mira Sangre-mp4/zh-cn.m4a', subtitles='', novoice_mp4='D:/win-pyvideotrans-v3.97/tmp/2076/47bbb1cd19/novoice.mp4', noextname='Breeding My Hot Mommy – Mira Sangre', shibie_audio=None, targetdir_mp4='E:/DownLoads/DCL/_video_out/Breeding My Hot Mommy – Mira Sangre-mp4/Breeding My Hot Mommy – Mira Sangre.mp4', instrument=None, vocal=None, back_audio='', background_music=None, app_mode='tiqu', subtitle_type=0, volume='+0%', pitch='+0Hz', voice_rate='+0%', voice_role='No', copysrt_rawvideo=False, clear_cache=False, translate_type=19, tts_type=0, recogn_type=1, model_name='large-v3-turbo', voice_autorate=False, video_autorate=False, is_cuda=True, name='E:/DownLoads/DCL/Breeding My Hot Mommy – Mira Sangre.mp4', basename='Breeding My Hot Mommy – Mira Sangre.mp4', ext='mp4', dirname='E:/DownLoads/DCL', shound_del_name=None, uuid='47bbb1cd19', remove_silent_mid=False, align_sub_audio=True, enable_diariz=False, nums_diariz=0, rephrase=0, only_out_mp4=False, fix_punc=False, recogn2pass=True)


=
system:Windows-10-10.0.26100-SP0
version:v3.97
frozen:True
language:zh
root_dir:D:/win-pyvideotrans-v3.97

1 Replies

这是由于使用 openai-whisper (本地) 模型进行语音识别时,模型推理过程中产生了 NaN(非数值)张量,导致解码失败。这通常与模型文件损坏、显存问题或特定视频/音频内容有关。

解决方案:

  1. 切换语音识别渠道:在“语音识别渠道”中,将 openai-whisper(本地) 暂时更换为 faster-whisper(本地)faster-whisper 通常更稳定且速度更快。
  2. 检查并重新下载模型:如果问题持续,可能是 openai-whisper 的模型文件损坏。请尝试删除 models/ 目录下对应的模型文件夹(例如 openai-whisper-large-v3-turbo),然后重新启动软件,让其重新下载。
  3. 检查显存:确保您的 NVIDIA 显卡有足够的空闲显存。如果显存不足,尝试关闭其他占用显存的程序,或在软件“高级选项”中降低 beam_size 参数,或使用更小的模型(如 small)。
  4. 检查音频文件:确认待识别的视频/音频文件本身没有损坏,且包含有效的音轨。可以尝试用其他播放器播放确认。

如果以上步骤无效,请提供更详细的日志信息以便进一步分析。

请查阅相关文档:

Post Your Reply
Open source and free maintenance is not easy. If this project is helpful to you, please consider making a small donation to help the project continue to maintain and update.

Related resource