TaskCfgVTT(is_cuda=True, uuid='6dcede9748', cache_folder='G:/win-pyvideotrans-v3.99-420/tmp/16724/6dcede9748', target_dir='G:/VideoCaptioner/work-dir/output/DSVR-027-wav', source_language='日语', source_language_code='ja', source_sub='G:/VideoCaptioner/work-dir/output/DSVR-027-wav/ja.srt', source_wav='G:/win-pyvideotrans-v3.99-420/tmp/16724/6dcede9748/ja.wav', source_wav_output='G:/VideoCaptioner/work-dir/output/DSVR-027-wav/ja.m4a', target_language='简体中文', target_language_code='zh-cn', target_sub='G:/VideoCaptioner/work-dir/output/DSVR-027-wav/zh-cn.srt', target_wav='G:/win-pyvideotrans-v3.99-420/tmp/16724/6dcede9748/target.wav', target_wav_output='G:/VideoCaptioner/work-dir/output/DSVR-027-wav/zh-cn.m4a', name='G:/VideoCaptioner/work-dir/DSVR-027.wav', noextname='DSVR-027', basename='DSVR-027.wav', ext='wav', dirname='G:/VideoCaptioner/work-dir', shound_del_name=None, translate_type=0, tts_type=0, volume='+0%', pitch='+0Hz', voice_rate='+0%', voice_role='No', voice_autorate=False, video_autorate=False, remove_silent_mid=False, align_sub_audio=True, detect_language='ja', recogn_type=4, model_name='kotoba-tech/kotoba-whisper-v2.0', shibie_audio=None, remove_noise=False, enable_diariz=False, nums_diariz=0, rephrase=0, fix_punc=False, subtitle_language=None, app_mode='tiqu', subtitles='', targetdir_mp4='G:/VideoCaptioner/work-dir/output/DSVR-027-wav/DSVR-027.mp4', novoice_mp4='G:/win-pyvideotrans-v3.99-420/tmp/16724/6dcede9748/novoice.mp4', is_separate=False, embed_bgm=False, instrument=None, vocal='G:/win-pyvideotrans-v3.99-420/tmp/16724/6dcede9748/vocal.wav', back_audio='', clear_cache=False, background_music=None, subtitle_type=0, only_out_mp4=False, recogn2pass=False, output_srt=0, copysrt_rawvideo=False)

语音识别阶段出错 [Huggingface_ASR] Traceback (most recent call last):
File "videotrans\process\stt_fun.py", line 578, in pipe_asr
File "transformers\pipelines\pt_utils.py", line 126, in next

item = next(self.iterator)

File "transformers\pipelines\pt_utils.py", line 271, in next

processed = self.infer(next(self.iterator), **self.params)

File "torch\utils\data\dataloader.py", line 733, in next

data = self._next_data()

File "torch\utils\data\dataloader.py", line 789, in _next_data

data = self._dataset_fetcher.fetch(index)  # may raise StopIteration

File "torch\utils\data\_utils\fetch.py", line 43, in fetch

return self.collate_fn(data)

File "transformers\pipelines\base.py", line 182, in inner

raise ValueError(

ValueError: The elements of the batch contain different keys. Cannot batch them ({'is_last', 'input_features', 'attention_mask'} != {'is_last', 'input_features', 'attention_mask', 'num_frames'})

Traceback (most recent call last):
File "videotrans\task\job.py", line 105, in run
File "videotrans\task\trans_create.py", line 380, in recogn
File "videotrans\recognition\__init__.py", line 280, in run
File "videotrans\recognition\_base.py", line 143, in run
File "videotrans\recognition\_huggingface.py", line 38, in _exec
File "videotrans\recognition\_huggingface.py", line 63, in _pipe_asr
File "videotrans\configure\_base.py", line 289, in _new_process
RuntimeError: Traceback (most recent call last):
File "videotrans\process\stt_fun.py", line 578, in pipe_asr
File "transformers\pipelines\pt_utils.py", line 126, in next

item = next(self.iterator)

File "transformers\pipelines\pt_utils.py", line 271, in next

processed = self.infer(next(self.iterator), **self.params)

File "torch\utils\data\dataloader.py", line 733, in next

data = self._next_data()

File "torch\utils\data\dataloader.py", line 789, in _next_data

data = self._dataset_fetcher.fetch(index)  # may raise StopIteration

File "torch\utils\data\_utils\fetch.py", line 43, in fetch

return self.collate_fn(data)

File "transformers\pipelines\base.py", line 182, in inner

raise ValueError(

ValueError: The elements of the batch contain different keys. Cannot batch them ({'is_last', 'input_features', 'attention_mask'} != {'is_last', 'input_features', 'attention_mask', 'num_frames'})
TaskCfgVTT(is_cuda=True, uuid='6dcede9748', cache_folder='G:/win-pyvideotrans-v3.99-420/tmp/16724/6dcede9748', target_dir='G:/VideoCaptioner/work-dir/output/DSVR-027-wav', source_language='日语', source_language_code='ja', source_sub='G:/VideoCaptioner/work-dir/output/DSVR-027-wav/ja.srt', source_wav='G:/win-pyvideotrans-v3.99-420/tmp/16724/6dcede9748/ja.wav', source_wav_output='G:/VideoCaptioner/work-dir/output/DSVR-027-wav/ja.m4a', target_language='简体中文', target_language_code='zh-cn', target_sub='G:/VideoCaptioner/work-dir/output/DSVR-027-wav/zh-cn.srt', target_wav='G:/win-pyvideotrans-v3.99-420/tmp/16724/6dcede9748/target.wav', target_wav_output='G:/VideoCaptioner/work-dir/output/DSVR-027-wav/zh-cn.m4a', name='G:/VideoCaptioner/work-dir/DSVR-027.wav', noextname='DSVR-027', basename='DSVR-027.wav', ext='wav', dirname='G:/VideoCaptioner/work-dir', shound_del_name=None, translate_type=0, tts_type=0, volume='+0%', pitch='+0Hz', voice_rate='+0%', voice_role='No', voice_autorate=False, video_autorate=False, remove_silent_mid=False, align_sub_audio=True, detect_language='ja', recogn_type=4, model_name='kotoba-tech/kotoba-whisper-v2.0', shibie_audio=None, remove_noise=False, enable_diariz=False, nums_diariz=0, rephrase=0, fix_punc=False, subtitle_language=None, app_mode='tiqu', subtitles='', targetdir_mp4='G:/VideoCaptioner/work-dir/output/DSVR-027-wav/DSVR-027.mp4', novoice_mp4='G:/win-pyvideotrans-v3.99-420/tmp/16724/6dcede9748/novoice.mp4', is_separate=False, embed_bgm=False, instrument=None, vocal='G:/win-pyvideotrans-v3.99-420/tmp/16724/6dcede9748/vocal.wav', back_audio='', clear_cache=False, background_music=None, subtitle_type=0, only_out_mp4=False, recogn2pass=False, output_srt=0, copysrt_rawvideo=False)

=
system:Windows-10-10.0.19045-SP0
version:v3.99
frozen:True
language:zh
root_dir:G:/win-pyvideotrans-v3.99-420

#4255 TaskCfgVTT(is_cuda=True, uuid='6dcede9748', cache_folder='G:/win-pyvideotrans-v3.99-420/tmp/16724/6dcede9748', target_di

1 Replies

Post Your Reply

Related resource