#5574 代理地址:http://127.0.0.1:7890

78.105* Posted at: 3 hours ago

'str' object has no attribute 'size':Traceback (most recent call last):
File "videotrans\process\stt_fun.py", line 636, in funasr_mlt
File "funasr\auto\auto_model.py", line 324, in generate

return self.inference(

File "funasr\auto\auto_model.py", line 381, in inference

res = model.inference(**batch, **kwargs)

File "C:\Users/THUNDEROBOT/Documents/全应用/图吧工具箱/win-pyvideotrans-v4.03-0622/videotrans/codes\model.py", line 610, in inference

return self.inference_llm(

File "C:\Users/THUNDEROBOT/Documents/全应用/图吧工具箱/win-pyvideotrans-v4.03-0622/videotrans/codes\model.py", line 628, in inference_llm

inputs_embeds, contents, batch, source_ids, meta_data = self.inference_prepare(

File "C:\Users/THUNDEROBOT/Documents/全应用/图吧工具箱/win-pyvideotrans-v4.03-0622/videotrans/codes\model.py", line 473, in inference_prepare

output = self.data_load_speech(

File "C:\Users/THUNDEROBOT/Documents/全应用/图吧工具箱/win-pyvideotrans-v4.03-0622/videotrans/codes\model.py", line 381, in data_load_speech

speech, speech_lengths = extract_fbank(

File "funasr\utils\load_utils.py", line 217, in extract_fbank

data, data_len = frontend(data, data_len, **kwargs)

File "torch\nn\modules\module.py", line 1751, in _wrapped_call_impl

return self._call_impl(*args, **kwargs)

File "torch\nn\modules\module.py", line 1762, in _call_impl

return forward_call(*args, **kwargs)

File "C:\Users\THUNDEROBOT\Documents\全应用\图吧工具箱\win-pyvideotrans-v4.03-0622\_internal\funasr\frontends\wav_frontend.py", line 124, in forward

batch_size = input.size(0)

AttributeError: 'str' object has no attribute 'size'

Traceback (most recent call last):
File "videotrans\task\only_one.py", line 46, in run
File "videotrans\task\trans_create.py", line 319, in recogn
File "videotrans\recognition\__init__.py", line 191, in run
File "videotrans\recognition\_base.py", line 90, in run
File "videotrans\recognition\_funasr.py", line 61, in _exec
File "videotrans\configure\base.py", line 272, in _new_process
videotrans.configure.excepts.VideoTransError: 'str' object has no attribute 'size':Traceback (most recent call last):
File "videotrans\process\stt_fun.py", line 636, in funasr_mlt
File "funasr\auto\auto_model.py", line 324, in generate

return self.inference(

File "funasr\auto\auto_model.py", line 381, in inference

res = model.inference(**batch, **kwargs)

File "C:\Users/THUNDEROBOT/Documents/全应用/图吧工具箱/win-pyvideotrans-v4.03-0622/videotrans/codes\model.py", line 610, in inference

return self.inference_llm(

File "C:\Users/THUNDEROBOT/Documents/全应用/图吧工具箱/win-pyvideotrans-v4.03-0622/videotrans/codes\model.py", line 628, in inference_llm

inputs_embeds, contents, batch, source_ids, meta_data = self.inference_prepare(

File "C:\Users/THUNDEROBOT/Documents/全应用/图吧工具箱/win-pyvideotrans-v4.03-0622/videotrans/codes\model.py", line 473, in inference_prepare

output = self.data_load_speech(

File "C:\Users/THUNDEROBOT/Documents/全应用/图吧工具箱/win-pyvideotrans-v4.03-0622/videotrans/codes\model.py", line 381, in data_load_speech

speech, speech_lengths = extract_fbank(

File "funasr\utils\load_utils.py", line 217, in extract_fbank

data, data_len = frontend(data, data_len, **kwargs)

File "torch\nn\modules\module.py", line 1751, in _wrapped_call_impl

return self._call_impl(*args, **kwargs)

File "torch\nn\modules\module.py", line 1762, in _call_impl

return forward_call(*args, **kwargs)

File "C:\Users\THUNDEROBOT\Documents\全应用\图吧工具箱\win-pyvideotrans-v4.03-0622\_internal\funasr\frontends\wav_frontend.py", line 124, in forward

batch_size = input.size(0)

AttributeError: 'str' object has no attribute 'size'

cfg=[TaskCfgVTT]当前工作模式: 翻译视频或音频 单视频模式
原始输入文件名: C:/Users/THUNDEROBOT/Downloads/34652488276-1-192.mp4,
输出结果保存到文件夹: 34652488276-1-192-mp4,
临时文件夹: C:/Users/THUNDEROBOT/Documents/全应用/图吧工具箱/win-pyvideotrans-v4.03-0622/tmp/21092/68a711d52a
已选 清理已存在
未选 启用CUDA加速
未选 降噪
语音识别:阿里FunASR(本地)(本地内置), model_name: Fun-ASR-Nano-2512, 发音语言: 英语, 断句方式:默认断句
翻译渠道:微软(免费),原始语言:英语,目标语言:简体中文, 已选 发送完整字幕
配音渠道:Edge-TTS(免费), 角色:Yunyang(Male/CN), 配音语言:简体中文, 未选 二次语音识别
音量:+0%, 语速:+0%, 未选 音频加速, 已选 视频慢速
字幕: 嵌入硬字幕(双) 目标语言在上双字幕
代理地址:http://127.0.0.1:7890


=
system:Windows-10-10.0.26200-SP0
version:v4.03
frozen:True
language:zh
root_dir:C:/Users/THUNDEROBOT/Documents/全应用/图吧工具箱/win-pyvideotrans-v4.03-0622

1 Replies

FunASR 在 extract_fbank 阶段将音频数据当作字符串处理,通常是因为模型加载异常或音频预处理失败导致的。这是 FunASR 渠道在特定环境下(尤其是 Windows 打包版)的已知兼容性缺陷。

解决方案

  1. 首选更换 ASR 渠道:点击主界面 ASR Channel 下拉框,切换为 faster-whisper(本地内置),并选择 large-v3 模型。该渠道更稳定,英文字幕识别效果出色。
  2. 若坚持使用 FunASR:尝试重新运行一次(偶尔模型加载会卡顿)。如仍失败,请确保音频文件可正常播放(路径不含特殊字符)。

请查阅相关文档:

Post Your Reply
Open source and free maintenance is not easy. If this project is helpful to you, please consider making a small donation to help the project continue to maintain and update. Dondate: ko-fi.com/jianchang512

Related resource