#4941 ValueError: The elements of the batch contain different keys. Cannot batch them ({'attention_mask', 'num_frames', 'is_la

151.242* Posted at: 2 hours ago

Traceback (most recent call last):
File "videotrans\process\stt_fun.py", line 606, in pipe_asr
File "transformers\pipelines\pt_utils.py", line 126, in next

item = next(self.iterator)

File "transformers\pipelines\pt_utils.py", line 271, in next

processed = self.infer(next(self.iterator), **self.params)

File "torch\utils\data\dataloader.py", line 733, in next

data = self._next_data()

File "torch\utils\data\dataloader.py", line 789, in _next_data

data = self._dataset_fetcher.fetch(index)  # may raise StopIteration

File "torch\utils\data\_utils\fetch.py", line 43, in fetch

return self.collate_fn(data)

File "transformers\pipelines\base.py", line 182, in inner

raise ValueError(

ValueError: The elements of the batch contain different keys. Cannot batch them ({'attention_mask', 'num_frames', 'is_last', 'input_features'} != {'attention_mask', 'is_last', 'input_features'})

Traceback (most recent call last):
File "videotrans\task\_only_one.py", line 50, in run
File "videotrans\task\trans_create.py", line 390, in recogn
File "videotrans\recognition\__init__.py", line 280, in run
File "videotrans\recognition\_base.py", line 143, in run
File "videotrans\recognition\_huggingface.py", line 38, in _exec
File "videotrans\recognition\_huggingface.py", line 63, in _pipe_asr
File "videotrans\configure\_base.py", line 289, in _new_process
RuntimeError: Traceback (most recent call last):
File "videotrans\process\stt_fun.py", line 606, in pipe_asr
File "transformers\pipelines\pt_utils.py", line 126, in next

item = next(self.iterator)

File "transformers\pipelines\pt_utils.py", line 271, in next

processed = self.infer(next(self.iterator), **self.params)

File "torch\utils\data\dataloader.py", line 733, in next

data = self._next_data()

File "torch\utils\data\dataloader.py", line 789, in _next_data

data = self._dataset_fetcher.fetch(index)  # may raise StopIteration

File "torch\utils\data\_utils\fetch.py", line 43, in fetch

return self.collate_fn(data)

File "transformers\pipelines\base.py", line 182, in inner

raise ValueError(

ValueError: The elements of the batch contain different keys. Cannot batch them ({'attention_mask', 'num_frames', 'is_last', 'input_features'} != {'attention_mask', 'is_last', 'input_features'})


=
system:Windows-10-10.0.22631-SP0
version:v3.99
frozen:True
language:zh
root_dir:D:/下载/下载视频/win-pyvideotrans-v3.99-0508

1 Replies

该错误发生在使用 HuggingFace ASR 语音识别渠道时,因为批处理(batch)中音频片段的特征键不一致(部分片段包含 num_frames 键,部分不包含),导致无法合并为批次。

解决方案

  1. 切换识别渠道:暂时改用 faster-whisper(本地)openai-whisper(本地),它们对音频片段长度不敏感,稳定性更高。
  2. 检查模型配置:若必须使用 HuggingFace ASR,请确保所选模型(如 parakeet-ctckotoba-whisper)与音频格式兼容,并尝试在 菜单 → 工具 → 高级选项 → 语音识别参数 中降低 批量大小(如设为1)。

如果问题依旧

暂无法确定具体原因。请提供软件安装目录下的 logs/ 文件夹中最新的日志文件(复制底部约30行报错信息)以便进一步分析。

Post Your Reply