#5478 cfg=TaskCfgSTT(uuid='6f1ee0d9fa', name='D:/张建光/姜胡说/2021年/@姜胡说_20201008_我Z核心的底层方法之一_建议多看几遍_刻意练习_值....mp4', dirname='D:/张建

106.35* Posted at: 2 hours ago

语音识别阶段出错[Qwen-ASR(本地内置)] CUDA out of memory. Tried to allocate 48.00 MiB. GPU 0 has a total capacity of 4.00 GiB of which 0 bytes is free. Of the allocated memory 2.79 GiB is allocated by PyTorch, and 658.40 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation. See documentation for Memory Management (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables):Traceback (most recent call last):
File "videotrans\process\stt_fun.py", line 556, in qwen3asr_fun
File "torch\utils\_contextlib.py", line 116, in decorate_context

return func(*args, **kwargs)

File "D:\download\pyVideoTrans\_internal\qwen_asr\inference\qwen3_asr.py", line 383, in transcribe

raw_outputs = self._infer_asr(chunk_ctx, chunk_wavs, chunk_lang)

File "D:\download\pyVideoTrans\_internal\qwen_asr\inference\qwen3_asr.py", line 485, in _infer_asr

return self._infer_asr

......
s = model_forward(**model_inputs, return_dict=True)
File "torch\nn\modules\module.py", line 1751, in _wrapped_call_impl

return self._call_impl(*args, **kwargs)

File "torch\nn\modules\module.py", line 1762, in _call_impl

return forward_call(*args, **kwargs)

File "transformers\utils\generic.py", line 918, in wrapper

output = func(self, *args, **kwargs)

File "D:\download\pyVideoTrans\_internal\qwen_asr\core\transformers_backend\modeling_qwen3_asr.py", line 1230, in forward

outputs = self.model(

File "torch\nn\modules\module.py", line 1751, in _wrapped_call_impl

return self._call_impl(*args, **kwargs)

File "torch\nn\modules\module.py", line 1762, in _call_impl

return forward_call(*args, **kwargs)

File "transformers\utils\generic.py", line 1072, in wrapper

outputs = func(self, *args, **kwargs)

File "D:\download\pyVideoTrans\_internal\qwen_asr\core\transformers_backend\modeling_qwen3_asr.py", line 1043, in forward

layer_outputs = decoder_layer(

File "transformers\modeling_layers.py", line 94, in call

return super().__call__(*args, **kwargs)

File "torch\nn\modules\module.py", line 1751, in _wrapped_call_impl

return self._call_impl(*args, **kwargs)

File "torch\nn\modules\module.py", line 1762, in _call_impl

return forward_call(*args, **kwargs)

File "transformers\utils\deprecation.py", line 172, in wrapped_func

return func(*args, **kwargs)

File "D:\download\pyVideoTrans\_internal\qwen_asr\core\transformers_backend\modeling_qwen3_asr.py", line 262, in forward

hidden_states, _ = self.self_attn(

File "torch\nn\modules\module.py", line 1751, in _wrapped_call_impl

return self._call_impl(*args, **kwargs)

File "torch\nn\modules\module.py", line 1762, in _call_impl

return forward_call(*args, **kwargs)

File "transformers\utils\deprecation.py", line 172, in wrapped_func

return func(*args, **kwargs)

File "D:\download\pyVideoTrans\_internal\qwen_asr\core\transformers_backend\modeling_qwen3_asr.py", line 204, in forward

attn_output, attn_weights = attention_interface(

File "transformers\integrations\sdpa_attention.py", line 68, in sdpa_attention_forward

value = repeat_kv(value, module.num_key_value_groups)

File "transformers\integrations\sdpa_attention.py", line 27, in repeat_kv

return hidden_states.reshape(batch, num_key_value_heads * n_rep, slen, head_dim)

torch.OutOfMemoryError: CUDA out of memory. Tried to allocate 48.00 MiB. GPU 0 has a total capacity of 4.00 GiB of which 0 bytes is free. Of the allocated memory 2.79 GiB is allocated by PyTorch, and 658.40 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation. See documentation for Memory Management (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)
cfg=TaskCfgSTT(uuid='6f1ee0d9fa', name='D:/张建光/姜胡说/2021年/@姜胡说_20201008_我Z核心的底层方法之一_建议多看几遍_刻意练习_值....mp4', dirname='D:/张建光/姜胡说/2021年', noextname='@姜胡说_20201008_我Z核心的底层方法之一_建议多看几遍_刻意练习_值...', basename='@姜胡说_20201008_我Z核心的底层方法之一_建议多看几遍_刻意练习_值....mp4', ext='mp4', target_dir='D:/download/pyVideoTrans/output/recogn', cache_folder='D:/download/pyVideoTrans/tmp/10384/6f1ee0d9fa', is_cuda=True, source_language=None, source_language_code=None, source_sub=None, source_wav=None, source_wav_output=None, target_language=None, target_language_code=None, target_sub='D:/download/pyVideoTrans/output/recogn/@姜胡说_20201008_我Z核心的底层方法之一_建议多看几遍_刻意练习_值....srt', target_wav=None, target_wav_output=None, detect_language='zh-cn', recogn_type=2, model_name='0.6B', shibie_audio='D:/download/pyVideoTrans/tmp/10384/6f1ee0d9fa/@姜胡说_20201008_我Z核心的底层方法之一_建议多看几遍_刻意练习_值...-1782388628.8159573.wav', remove_noise=False, enable_diariz=False, nums_diariz=0, rephrase=0, fix_punc=0)


=
system:Windows-10-10.0.26200-SP0
version:v4.03
frozen:True
language:zh
root_dir:D:/download/pyVideoTrans

1 Replies

这是典型的显卡显存不足导致的报错。您的 GPU 0 总显存仅 4GB,运行 Qwen-ASR(本地) 模型(即使是 0.6B 小模型)仍超出可用容量,导致 torch.OutOfMemoryError

解决方案(按推荐顺序尝试):

  1. 换用更轻量的本地语音识别渠道
    推荐使用 faster-whisper(本地),并选择 basesmall 模型(large-v3 同样会爆显存)。在高级选项中关闭 CUDA 加速(若显存紧张),或者使用 int8 计算数据类型以降低显存占用。
  2. 降低 Qwen-ASR 的显存消耗(如果仍希望使用它)
    进入 菜单 → 工具 → 高级选项 → 语音识别参数

    • 计算数据类型 改为 int8
    • beam_sizebest_of 降为 1
    • 取消勾选 启用上下文感知
    • 若仍失败,请彻底关闭 CUDA 加速(即不勾选主界面的 CUDA 加速),使用 CPU 运行(速度会极慢)。
  3. 释放显卡资源
    关闭其他占用 GPU 的程序(如浏览器硬件加速、其他 AI 工具),重启软件后再试。
  4. 使用在线识别渠道替代
    若本地模型始终无法跑通,可改用在线 API 渠道,例如:

    • 阿里百炼 Qwen3-ASR(需要 API Key)
    • 字节语音识别大模型极速版(中文效果极佳,需 API Key)
    • OpenAI 语音识别 API
      这些在线服务将处理过程放在云端,不消耗本地显存。

如果以上方法仍然无效,请提供 logs/ 目录下最新的日志文件(复制最后约 30 行报错信息),并告知软件左上角的版本号,以便进一步排查。

Post Your Reply
Open source and free maintenance is not easy. If this project is helpful to you, please consider making a small donation to help the project continue to maintain and update. Dondate: ko-fi.com/jianchang512

Related resource

Similar issues already exist