#3714 TaskCfgVTT(is_cuda=True, uuid='0c8439abca', cache_folder='D:/win-pyvideotrans-v3.98-317/tmp/8756/0c8439abca', target_dir

118.167* Posted at: 4 hours ago 👁10

语音识别阶段出错 [Huggingface_ASR] Traceback (most recent call last):
File "videotrans\process\stt_fun.py", line 524, in pipe_asr
File "transformers\pipelines\__init__.py", line 1027, in pipeline

framework, model = infer_framework_load_model(

File "transformers\pipelines\base.py", line 333, in infer_framework_load_model

raise ValueError(

ValueError: Could not load model D:/win-pyvideotrans-v3.98-317/models/models--reazon-research--japanese-wav2vec2-large-rs35kh with any of the following classes: (, , ). See the original errors:

while loading with AutoModelForCTC, an error is thrown:
Traceback (most recent call last):
File "transformers\pipelines\base.py", line 293, in infer_framework_load_model

model = model_class.from_pretrained(model, **kwargs)

File "transformers\models\auto\auto_factory.py", line 604, in from_pretrained

return model_class.from_pretrained(

File "transformers\modeling_utils.py", line 277, in _wrapper

return func(*args, **kwargs)

......
line 311, in infer_framework_load_model

model = model_class.from_pretrained(model, **fp32_kwargs)

File "transformers\models\auto\auto_factory.py", line 607, in from_pretrained

raise ValueError(

ValueError: Unrecognized configuration class for this kind of AutoModel: AutoModelForSpeechSeq2Seq.
Model type should be one of DiaConfig, GraniteSpeechConfig, KyutaiSpeechToTextConfig, MoonshineConfig, Pop2PianoConfig, SeamlessM4TConfig, SeamlessM4Tv2Config, SpeechEncoderDecoderConfig, Speech2TextConfig, SpeechT5Config, WhisperConfig.

while loading with Wav2Vec2ForCTC, an error is thrown:
Traceback (most recent call last):
File "transformers\pipelines\base.py", line 293, in infer_framework_load_model

model = model_class.from_pretrained(model, **kwargs)

File "transformers\modeling_utils.py", line 277, in _wrapper

return func(*args, **kwargs)

File "transformers\modeling_utils.py", line 4900, in from_pretrained

checkpoint_files, sharded_metadata = _get_resolved_checkpoint_files(

File "transformers\modeling_utils.py", line 989, in _get_resolved_checkpoint_files

raise OSError(

OSError: Error no file named pytorch_model.bin, model.safetensors, tf_model.h5, model.ckpt.index or flax_model.msgpack found in directory D:/win-pyvideotrans-v3.98-317/models/models--reazon-research--japanese-wav2vec2-large-rs35kh.

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
File "transformers\pipelines\base.py", line 311, in infer_framework_load_model

model = model_class.from_pretrained(model, **fp32_kwargs)

File "transformers\modeling_utils.py", line 277, in _wrapper

return func(*args, **kwargs)

File "transformers\modeling_utils.py", line 4900, in from_pretrained

checkpoint_files, sharded_metadata = _get_resolved_checkpoint_files(

File "transformers\modeling_utils.py", line 989, in _get_resolved_checkpoint_files

raise OSError(

OSError: Error no file named pytorch_model.bin, model.safetensors, tf_model.h5, model.ckpt.index or flax_model.msgpack found in directory D:/win-pyvideotrans-v3.98-317/models/models--reazon-research--japanese-wav2vec2-large-rs35kh.
TaskCfgVTT(is_cuda=True, uuid='0c8439abca', cache_folder='D:/win-pyvideotrans-v3.98-317/tmp/8756/0c8439abca', target_dir='D:/新增資料夾/_video_out/hhd800.com@MFYD-099-mp4', source_language='日语', source_language_code='ja', source_sub='D:/新增資料夾/_video_out/hhd800.com@MFYD-099-mp4/ja.srt', source_wav='D:/win-pyvideotrans-v3.98-317/tmp/8756/0c8439abca/remove_noise.wav', source_wav_output='D:/新增資料夾/_video_out/hhd800.com@MFYD-099-mp4/ja.m4a', target_language='简体中文', target_language_code='zh-cn', target_sub='D:/新增資料夾/_video_out/hhd800.com@MFYD-099-mp4/zh-cn.srt', target_wav='D:/win-pyvideotrans-v3.98-317/tmp/8756/0c8439abca/target.wav', target_wav_output='D:/新增資料夾/_video_out/hhd800.com@MFYD-099-mp4/zh-cn.m4a', name='D:/新增資料夾/[email protected]4', noextname='hhd800.com@MFYD-099', basename='[email protected]4', ext='mp4', dirname='D:/新增資料夾', shound_del_name=None, translate_type=5, tts_type=0, volume='+0%', pitch='+0Hz', voice_rate='+0%', voice_role='No', voice_autorate=False, video_autorate=False, remove_silent_mid=False, align_sub_audio=True, detect_language='ja', recogn_type=4, model_name='reazon-research/japanese-wav2vec2-large-rs35kh', shibie_audio=None, remove_noise=True, enable_diariz=False, nums_diariz=0, rephrase=0, fix_punc=False, subtitle_language=None, app_mode='biaozhun', subtitles='', targetdir_mp4='D:/新增資料夾/_video_out/hhd800.com@MFYD-099-mp4/[email protected]4', novoice_mp4='D:/win-pyvideotrans-v3.98-317/tmp/8756/0c8439abca/novoice.mp4', is_separate=False, instrument=None, vocal=None, back_audio='', clear_cache=False, background_music=None, subtitle_type=1, only_out_mp4=False, recogn2pass=False, output_srt=0, copysrt_rawvideo=False)


=
system:Windows-10-10.0.19045-SP0
version:v3.98
frozen:True
language:zh
root_dir:D:/win-pyvideotrans-v3.98-317

1 Replies

这是由于 HuggingFace_ASR 渠道的模型文件下载不完整或缺失核心文件(如 pytorch_model.binmodel.safetensors)导致的。

解决方案:

  1. 手动下载并放置模型文件

    • 打开模型页面:https://huggingface.co/reazon-research/japanese-wav2vec2-large-rs35kh/tree/main
    • 将该页面中所有 .json.bin.safetensors.txt 等文件下载到本地。
    • 在软件根目录的 models 文件夹内,找到或创建名为 models--reazon-research--japanese-wav2vec2-large-rs35kh 的文件夹。
    • 将下载的所有文件复制到该文件夹内,覆盖已有文件。
  2. 检查网络代理:如果手动下载困难,请确保软件的网络代理设置正确,以便自动下载能顺利完成。
  3. 更换语音识别渠道:作为临时解决方案,对于日语视频,可以切换到其他语音识别渠道,例如:

    • faster-whisper(本地)large-v3 模型(对多语言支持良好)。
    • 或使用在线付费渠道如 OpenAI语音识别API

请查阅相关文档:

Post Your Reply
Open source and free maintenance is not easy. If this project is helpful to you, please consider making a small donation to help the project continue to maintain and update.

Related resource