语音识别阶段出错[Huggingface_ASR] The elements of the batch contain different keys. Cannot batch them ({'is_last', 'attention_mask', 'input_features'} != {'is_last', 'num_frames', 'attention_mask', 'input_features'}):Traceback (most recent call last):
File "videotrans\process\stt_fun.py", line 425, in pipe_asr
File "transformers\pipelines\pt_utils.py", line 126, in next
item = next(self.iterator)File "transformers\pipelines\pt_utils.py", line 271, in next
processed = self.infer(next(self.iterator), **self.params)File "torch\utils\data\dataloader.py", line 733, in next
data = self._next_data()File "torch\utils\data\dataloader.py", line 789, in _next_data
data = self._dataset_fetcher.fetch(index) # may raise StopIterationFile "torch\utils\data\_utils\fetch.py", line 43, in fetch
return self.collate_fn(data)File "transformers\pipelines\base.py", line 182, in inner
raise ValueError(ValueError: The elements of the batch contain different keys. Cannot batch them ({'is_last', 'attention_mask', 'input_features'} != {'is_last', 'num_frames', 'attention_mask', 'input_features'})
Traceback (most recent call last):
File "videotrans\task\job.py", line 35, in run
File "videotrans\task\job.py", line 100, in process_task
File "videotrans\task\speech2text.py", line 126, in recogn
File "videotrans\recognition\__init__.py", line 190, in run
File "videotrans\recognition\_base.py", line 94, in run
File "videotrans\recognition\_huggingface.py", line 42, in _exec
File "videotrans\recognition\_huggingface.py", line 66, in _pipe_asr
File "videotrans\configure\base.py", line 253, in _new_process
videotrans.configure.excepts.VideoTransError: The elements of the batch contain different keys. Cannot batch them ({'is_last', 'attention_mask', 'input_features'} != {'is_last', 'num_frames', 'attention_mask', 'input_features'}):Traceback (most recent call last):
File "videotrans\process\stt_fun.py", line 425, in pipe_asr
File "transformers\pipelines\pt_utils.py", line 126, in next
item = next(self.iterator)File "transformers\pipelines\pt_utils.py", line 271, in next
processed = self.infer(next(self.iterator), **self.params)File "torch\utils\data\dataloader.py", line 733, in next
data = self._next_data()File "torch\utils\data\dataloader.py", line 789, in _next_data
data = self._dataset_fetcher.fetch(index) # may raise StopIterationFile "torch\utils\data\_utils\fetch.py", line 43, in fetch
return self.collate_fn(data)File "transformers\pipelines\base.py", line 182, in inner
raise ValueError(ValueError: The elements of the batch contain different keys. Cannot batch them ({'is_last', 'attention_mask', 'input_features'} != {'is_last', 'num_frames', 'attention_mask', 'input_features'})
TaskCfgSTT(uuid='3a183d1184', name='E:/QMDownload/www.98T[email protected]4', dirname='E:/QMDownload', noextname='www.98T.la@fc4852372', basename='www.98T[email protected]4', ext='mp4', target_dir='H:/BaiduNetdiskDownload/win-pyvideotrans-v4.01-0602/output/recogn', cache_folder='H:/BaiduNetdiskDownload/win-pyvideotrans-v4.01-0602/tmp/6652/3a183d1184', is_cuda=True, source_language=None, source_language_code=None, source_sub=None, source_wav=None, source_wav_output=None, target_language=None, target_language_code=None, target_sub='H:/BaiduNetdiskDownload/win-pyvideotrans-v4.01-0602/output/recogn/www.98T[email protected]', target_wav=None, target_wav_output=None, detect_language='ja', recogn_type=4, model_name='kotoba-tech/kotoba-whisper-v2.0', shibie_audio='H:/BaiduNetdiskDownload/win-pyvideotrans-v4.01-0602/tmp/6652/3a183d1184/[email protected]', remove_noise=False, enable_diariz=False, nums_diariz=0, rephrase=0, fix_punc=False)
=
system:Windows-10-10.0.22631-SP0
version:v4.01
frozen:True
language:zh
root_dir:H:/BaiduNetdiskDownload/win-pyvideotrans-v4.01-0602