语音识别阶段出错 [Huggingface_ASR] Traceback (most recent call last):
File "videotrans\process\stt_fun.py", line 606, in pipe_asr
File "transformers\pipelines\pt_utils.py", line 126, in next
item = next(self.iterator)
File "transformers\pipelines\pt_utils.py", line 271, in next
processed = self.infer(next(self.iterator), **self.params)
File "torch\utils\data\dataloader.py", line 733, in next
data = self._next_data()
File "torch\utils\data\dataloader.py", line 789, in _next_data
data = self._dataset_fetcher.fetch(index) # may raise StopIteration
File "torch\utils\data\_utils\fetch.py", line 43, in fetch
return self.collate_fn(data)
File "transformers\pipelines\base.py", line 182, in inner
raise ValueError(
ValueError: The elements of the batch contain different keys. Cannot batch them ({'input_features', 'is_last', 'attention_mask'} != {'num_frames', 'input_features', 'is_last', 'attention_mask'})
Traceback (most recent call last):
File "videotrans\task\job.py", line 105, in run
File "videotrans\task\_speech2text.py", line 191, in recogn
File "videotrans\recognition\__init__.py", line 280, in run
File "videotrans\recognition\_base.py", line 143, in run
File "videotrans\recognition\_huggingface.py", line 38, in _exec
File "videotrans\recognition\_huggingface.py", line 63, in _pipe_asr
File "videotrans\configure\_base.py", line 289, in _new_process
RuntimeError: Traceback (most recent call last):
File "videotrans\process\stt_fun.py", line 606, in pipe_asr
File "transformers\pipelines\pt_utils.py", line 126, in next
item = next(self.iterator)
File "transformers\pipelines\pt_utils.py", line 271, in next
processed = self.infer(next(self.iterator), **self.params)
File "torch\utils\data\dataloader.py", line 733, in next
data = self._next_data()
File "torch\utils\data\dataloader.py", line 789, in _next_data
data = self._dataset_fetcher.fetch(index) # may raise StopIteration
File "torch\utils\data\_utils\fetch.py", line 43, in fetch
return self.collate_fn(data)
File "transformers\pipelines\base.py", line 182, in inner
raise ValueError(
ValueError: The elements of the batch contain different keys. Cannot batch them ({'input_features', 'is_last', 'attention_mask'} != {'num_frames', 'input_features', 'is_last', 'attention_mask'})
TaskCfgSTT(is_cuda=True, uuid='ff8bdfce71', cache_folder='D:/win-pyvideotrans-v3.99-0508/tmp/13572/ff8bdfce71', target_dir='D:/翻译win-pyvideotrans-v3.99-0508/output/recogn', source_language=None, source_language_code=None, source_sub=None, source_wav=None, source_wav_output=None, target_language=None, target_language_code=None, target_sub='D:/翻译win-pyvideotrans-v3.99-0508/output/recogn/sdjs259.srt', target_wav=None, target_wav_output=None, name='C:/Users/7800X3D 7900XTX/Desktop/sdjs259.wav', noextname='sdjs259', basename='sdjs259.wav', ext='wav', dirname='C:/Users/7800X3D 7900XTX/Desktop', shound_del_name=None, detect_language='ja', recogn_type=4, model_name='kotoba-tech/kotoba-whisper-v2.0', shibie_audio='D:/win-pyvideotrans-v3.99-0508/tmp/13572/ff8bdfce71/sdjs259-1778647381.0989764.wav', remove_noise=False, enable_diariz=False, nums_diariz=0, rephrase=0, fix_punc=False)