#4875 SpeechToText(uuid='eee88a6218', proxy_str=None, precent=1, hasend=False, should_recogn=True, should_trans=False, should_

111.16* Posted at: 2 hours ago

语音识别阶段出错[Qwen-ASR(本地)] [enforce fail at alloc_cpu.cpp:116] data. DefaultCPUAllocator: not enough memory: you tried to allocate 1244659712 bytes.:Traceback (most recent call last):
File "H:\pyvideotrans-4.00\videotrans\process\stt_fun.py", line 536, in qwen3asr_fun

model = Qwen3ASRModel.from_pretrained(

File "H:\pyvideotrans-4.00.venv\lib\site-packages\qwen_asr\inference\qwen3_asr.py", line 206, in from_pretrained

model = AutoModel.from_pretrained(pretrained_model_name_or_path, **kwargs)

File "H:\pyvideotrans-4.00.venv\lib\site-packages\transformers\models\auto\auto_factory.py", line 604, in from_pretrained

return model_class.from_pretrained(

File "H:\pyvideotrans-4.00.venv\lib\site-packages\transformers\modeling_utils.py", line 277, in _wrapper

return func(*args, **kwargs)

File "H:\pyvideotrans-4.00.venv\lib\site-packages\transformers\modeling_utils.py", line 5048, in from_pretrained

) = cls._load_pretrained_model(

File "H:\pyvideotrans-4.
......
.00\videotrans\task\job.py", line 54, in run

self.process_task(trk)

File "H:\pyvideotrans-4.00\videotrans\task\job.py", line 119, in process_task

trk.recogn()

File "H:\pyvideotrans-4.00\videotrans\task\speech2text.py", line 125, in recogn

raw_subtitles = run(

File "H:\pyvideotrans-4.00\videotrans\recognition\__init__.py", line 188, in run

return _cls(**kwargs).run()  # type:ignore

File "H:\pyvideotrans-4.00\videotrans\recognition\_base.py", line 93, in run

res = self._exec()

File "H:\pyvideotrans-4.00\videotrans\recognition\_qwenasrlocal.py", line 43, in _exec

jsdata = self._new_process(callback=qwen3asr_fun, title=title, is_cuda=self.is_cuda, kwargs=kwargs)

File "H:\pyvideotrans-4.00\videotrans\configure\base.py", line 240, in _new_process

raise VideoTransError(err)

videotrans.configure.excepts.VideoTransError: [enforce fail at alloc_cpu.cpp:116] data. DefaultCPUAllocator: not enough memory: you tried to allocate 1244659712 bytes.:Traceback (most recent call last):
File "H:\pyvideotrans-4.00\videotrans\process\stt_fun.py", line 536, in qwen3asr_fun

model = Qwen3ASRModel.from_pretrained(

File "H:\pyvideotrans-4.00.venv\lib\site-packages\qwen_asr\inference\qwen3_asr.py", line 206, in from_pretrained

model = AutoModel.from_pretrained(pretrained_model_name_or_path, **kwargs)

File "H:\pyvideotrans-4.00.venv\lib\site-packages\transformers\models\auto\auto_factory.py", line 604, in from_pretrained

return model_class.from_pretrained(

File "H:\pyvideotrans-4.00.venv\lib\site-packages\transformers\modeling_utils.py", line 277, in _wrapper

return func(*args, **kwargs)

File "H:\pyvideotrans-4.00.venv\lib\site-packages\transformers\modeling_utils.py", line 5048, in from_pretrained

) = cls._load_pretrained_model(

File "H:\pyvideotrans-4.00.venv\lib\site-packages\transformers\modeling_utils.py", line 5468, in _load_pretrained_model

_error_msgs, disk_offload_index = load_shard_file(args)

File "H:\pyvideotrans-4.00.venv\lib\site-packages\transformers\modeling_utils.py", line 843, in load_shard_file

disk_offload_index = _load_state_dict_into_meta_model(

File "H:\pyvideotrans-4.00.venv\lib\site-packages\torch\utils\_contextlib.py", line 116, in decorate_context

return func(*args, **kwargs)

File "H:\pyvideotrans-4.00.venv\lib\site-packages\transformers\modeling_utils.py", line 750, in _load_state_dict_into_meta_model

param = param.to(casting_dtype)

RuntimeError: [enforce fail at alloc_cpu.cpp:116] data. DefaultCPUAllocator: not enough memory: you tried to allocate 1244659712 bytes.
SpeechToText(uuid='eee88a6218', proxy_str=None, precent=1, hasend=False, should_recogn=True, should_trans=False, should_dubbing=False, should_separate=False, should_hebing=False, out_format='srt', copysrt_rawvideo=False, source_srt_list=[], spk_insert=False)\TaskCfgSTT(uuid='eee88a6218', name='I:/BaiduYunDownload/威力导演13视频学习视频官方教程/威力导演13教学视频1-启动威力导演,开启创作世界.mp4', dirname='I:/BaiduYunDownload/威力导演13视频学习视频官方教程', noextname='威力导演13教学视频1-启动威力导演,开启创作世界', basename='威力导演13教学视频1-启动威力导演,开启创作世界.mp4', ext='mp4', target_dir='H:/pyvideotrans-4.00/output/recogn', cache_folder='H:/pyvideotrans-4.00/tmp/33216/eee88a6218', is_cuda=False, source_language=None, source_language_code=None, source_sub=None, source_wav=None, source_wav_output=None, target_language=None, target_language_code=None, target_sub='H:/pyvideotrans-4.00/output/recogn/威力导演13教学视频1-启动威力导演,开启创作世界.srt', target_wav=None, target_wav_output=None, detect_language='zh-cn', recogn_type=2, model_name='1.7B', shibie_audio='H:/pyvideotrans-4.00/tmp/33216/eee88a6218/威力导演13教学视频1-启动威力导演,开启创作世界-1780382921.7493083.wav', remove_noise=False, enable_diariz=False, nums_diariz=0, rephrase=0, fix_punc=False)


=
system:Windows-10-10.0.19045-SP0
version:v4.00
frozen:False
language:zh
root_dir:H:/pyvideotrans-4.00
Python: 3.10.19 (main, Oct 28 2025, 12:01:56) [MSC v.1944 64 bit (AMD64)]

1 Replies

这是因为系统可用内存不足,无法为 Qwen-ASR(本地) 的 1.7B 模型分配约 1.24GB 的内存。

建议按以下顺序尝试解决:

  1. 关闭其他占用内存的程序(浏览器、IDE等),释放系统内存。
  2. 在菜单 工具->选项->高级选项->通用设置 中,将 CPU同时任务数[重启生效] 设为 1,避免多任务竞争内存。
  3. 更换为内存需求更低的语音识别渠道:

    • 使用 faster-whisper(本地) 并选择 basesmall 模型(large-v3 同样消耗较大)。
    • 如果必须对中文视频做本地识别,可尝试 阿里FunASR(本地)+paraformer-zh,其对内存需求相对较低。
  4. 如果以上仍无法解决,请增加系统虚拟内存(控制面板 → 系统 → 高级系统设置 → 性能设置 → 高级 → 虚拟内存,建议初始大小设为物理内存的 1.5 倍)。

请查阅相关文档:

Post Your Reply
Open source and free maintenance is not easy. If this project is helpful to you, please consider making a small donation to help the project continue to maintain and update.

Related resource

Similar issues already exist