#5083 TaskCfgVTT(uuid='096d59d432', name='E:/short/zip/爱如坟墓/50.mp4', dirname='E:/short/zip/爱如坟墓', noextname='50', basename='50

240e:3b7* Posted at: 1 day ago

语音识别阶段出错[faster-whisper(本地)] No transcription results returned. Please check the original audio/video or model and try again.
Traceback (most recent call last):

File "videotrans\task\job.py", line 35, in run

File "videotrans\task\job.py", line 100, in process_task

File "videotrans\task\trans_create.py", line 322, in recogn

File "videotrans\recognition\__init__.py", line 190, in run

File "videotrans\recognition\_base.py", line 94, in run

File "videotrans\recognition\_whisper.py", line 36, in _exec

File "videotrans\recognition\_whisper.py", line 109, in _faster

File "videotrans\configure\base.py", line 253, in _new_process

videotrans.configure.excepts.VideoTransError: No transcription results returned. Please check the original audio/video or model and try again.
TaskCfgVTT(uuid='096d59d432', name='E:/short/zip/爱如坟墓/50.mp4', dirname='E:/short/zip/爱如坟墓', noextname='50', basename='50.mp4', ext='mp4', target_dir='E:/short/zip/爱如坟墓/subtitles/50-mp4', cache_folder='E:/short/app/win-pyvideotrans-v4.01-0602/tmp/8040/096d59d432', is_cuda=False, source_language='简体中文', source_language_code='zh-cn', source_sub='E:/short/zip/爱如坟墓/subtitles/50-mp4/zh-cn.srt', source_wav='E:/short/app/win-pyvideotrans-v4.01-0602/tmp/8040/096d59d432/zh-cn.wav', source_wav_output='E:/short/zip/爱如坟墓/subtitles/50-mp4/zh-cn.m4a', target_language='英语', target_language_code='en', target_sub='E:/short/zip/爱如坟墓/subtitles/50-mp4/en.srt', target_wav='E:/short/app/win-pyvideotrans-v4.01-0602/tmp/8040/096d59d432/target.wav', target_wav_output='E:/short/zip/爱如坟墓/subtitles/50-mp4/en.m4a', translate_type=0, tts_type=0, volume='+0%', pitch='+0Hz', voice_rate='+0%', voice_role='No', voice_autorate=False, video_autorate=False, remove_silent_mid=False, align_sub_audio=True, detect_language='zh-cn', recogn_type=0, model_name='large-v3-turbo', shibie_audio=None, remove_noise=False, enable_diariz=False, nums_diariz=0, rephrase=0, fix_punc=False, subtitle_language=None, app_mode='tiqu', subtitles='', targetdir_mp4='E:/short/zip/爱如坟墓/subtitles/50-mp4/50.mp4', novoice_mp4='E:/short/app/win-pyvideotrans-v4.01-0602/tmp/8040/096d59d432/novoice.mp4', is_separate=False, embed_bgm=True, instrument=None, vocal='E:/short/app/win-pyvideotrans-v4.01-0602/tmp/8040/096d59d432/vocal.wav', clear_cache=False, background_music='', subtitle_type=0, only_out_mp4=False, recogn2pass=False, output_srt=0, copysrt_rawvideo=False, loop_backaudio=1, backaudio_volume=0.8)


=
system:Windows-10-10.0.19045-SP0
version:v4.01
frozen:True
language:zh
root_dir:E:/short/app/win-pyvideotrans-v4.01-0602

7 Replies

语音识别未返回任何结果,通常是因为视频音频中无人声、模型下载不完整、语言选择错误、或文件路径/名称含有中文/特殊符号导致 ffmpeg 提取音频失败。

请按以下步骤排查解决:

  1. 确认视频存在有效人声音轨:用播放器(如 VLC)打开视频,确认有清晰的说话声,而不是纯背景音乐。
  2. 移动视频到纯英文短路径:当前路径包含中文文件夹 爱如坟墓,且可能层级过深。请将视频文件复制到磁盘根目录的纯英文文件夹下,例如 D:\video\50.mp4,然后重新加载。
  3. 检查发音语言:确保主界面“发音语言”手动选择为“简体中文”(不要使用 auto)。
  4. 更换或重装 ASR 模型large-v3-turbo 模型可能首次下载不完整。建议先在菜单-工具-高级选项中临时切换为 basesmall 模型测试。若仍需 large-v3-turbo,可参考知识库手动下载并放入 models\models--mobiuslabsgmbh--faster-whisper-large-v3-turbo 目录。
  5. 开启降噪:在主界面勾选“降噪”,消除背景噪声干扰。
  6. 启用 GPU 加速:如果你的显卡支持 CUDA(NVIDIA),请安装 CUDA 12.8+ 和 cuDNN 9.11+,并在软件主界面勾选“CUDA加速”,以提升识别稳定性。

请查阅相关文档:

其他视频处理正常 应该不是语言4吧 有声音。第5个我没有发现有这个勾选

既然其他视频处理正常,说明软件、模型和环境配置没有问题。问题只出在这个 特定视频文件 上。

可能原因和解决方法:

  1. 文件路径包含特殊字符或过长
    当前路径 E:/short/zip/爱如坟墓/50.mp4 含有中文字符。建议将视频文件移到 纯英文短路径(例如 D:\videos\50.mp4)后重新选择文件。
  2. 视频音轨存在但人声被严重压缩或无声
    用播放器(如 VLC)实际播放确认是否有清晰说话声。可用“批量语音转字幕”功能单独测试该视频是否能转录出文字。
  3. 视频文件损坏或编码不兼容
    尝试用格式工厂将视频转为 h.264 + aac 格式的 mp4 后重新处理。
  4. 降噪选项在主界面右上角
    你提到的“第5个”,指的是之前建议的勾选“降噪”。这个选项在软件主界面右上角,是一个 复选框,文字为 降噪。视频背景嘈杂时可以开启,但其他视频正常说明该视频可能背景噪声过大掩盖人声,可以试一下。
  5. 日志排查
    请打开软件根目录下的 logs 文件夹,找到当天日志文件,搜索该视频的 uuid(你提供日志中有 uuid='096d59d432'),查看更详细错误信息(尤其是 ffmpeg 阶段是否提示音轨提取失败)。

请先尝试移动视频位置这一步,这是最可能的原因。

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
File "logging\__init__.py", line 1103, in emit
AttributeError: 'NoneType' object has no attribute 'write'
Call stack:
File "videotrans\task\job.py", line 37, in run
File "videotrans\task\job.py", line 44, in handle_error
File "logging\__init__.py", line 1512, in exception
File "logging\__init__.py", line 1506, in error
File "logging\__init__.py", line 1624, in _log
File "logging\__init__.py", line 1634, in handle
File "logging\__init__.py", line 1696, in callHandlers
File "logging\__init__.py", line 968, in handle
File "logging\__init__.py", line 1108, in emit
Message: VideoTransError('No transcription results returned. Please check the original audio/video or model and try again.')
Arguments: ()
[DEBUG] 新进程执行结束结束timeout=0
[DEBUG] [TransCreate]最终配置信息:self=TransCreate(uuid='096d59d432', proxy_str=None, last_down_time=0, precent=1, hasend=False, sh
......
faster
File "videotrans\configure\base.py", line 266, in _new_process
File "logging\__init__.py", line 1512, in exception
File "logging\__init__.py", line 1506, in error
File "logging\__init__.py", line 1624, in _log
File "logging\__init__.py", line 1634, in handle
File "logging\__init__.py", line 1696, in callHandlers
File "logging\__init__.py", line 968, in handle
File "logging\__init__.py", line 1108, in emit
Message: 'STT use large-v3-turbo,No transcription results returned. Please check the original audio/video or model and try again.'
Arguments: ()
[DEBUG] [新进程任务 结束:STT use large-v3-turbo],耗时13.06447172164917s
[ERROR] No transcription results returned. Please check the original audio/video or model and try again.
Traceback (most recent call last):
File "videotrans\task\job.py", line 35, in run
File "videotrans\task\job.py", line 100, in process_task
File "videotrans\task\trans_create.py", line 322, in recogn
File "videotrans\recognition\__init__.py", line 190, in run
File "videotrans\recognition\_base.py", line 94, in run
File "videotrans\recognition\_whisper.py", line 36, in _exec
File "videotrans\recognition\_whisper.py", line 109, in _faster
File "videotrans\configure\base.py", line 253, in _new_process
videotrans.configure.excepts.VideoTransError: No transcription results returned. Please check the original audio/video or model and try again.


Logging error

Traceback (most recent call last):
File "videotrans\task\job.py", line 35, in run
File "videotrans\task\job.py", line 100, in process_task
File "videotrans\task\trans_create.py", line 322, in recogn
File "videotrans\recognition\__init__.py", line 190, in run
File "videotrans\recognition\_base.py", line 94, in run
File "videotrans\recognition\_whisper.py", line 36, in _exec
File "videotrans\recognition\_whisper.py", line 109, in _faster
File "videotrans\configure\base.py", line 253, in _new_process
videotrans.configure.excepts.VideoTransError: No transcription results returned. Please check the original audio/video or model and try again.

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
File "logging\__init__.py", line 1103, in emit
AttributeError: 'NoneType' object has no attribute 'write'
Call stack:
File "videotrans\task\job.py", line 37, in run
File "videotrans\task\job.py", line 44, in handle_error
File "logging\__init__.py", line 1512, in exception
File "logging\__init__.py", line 1506, in error
File "logging\__init__.py", line 1624, in _log
File "logging\__init__.py", line 1634, in handle
File "logging\__init__.py", line 1696, in callHandlers
File "logging\__init__.py", line 968, in handle
File "logging\__init__.py", line 1108, in emit
Message: VideoTransError('No transcription results returned. Please check the original audio/video or model and try again.')
Arguments: ()
[DEBUG] 新进程执行结束结束timeout=6

从日志看,问题仅出现在这个特定视频上(其他正常),且 large-v3-turbo 模型运行 13 秒后返回空结果,说明模型成功加载但未能从音频中提取到有效语音。

直接原因推测: 该视频的音频轨道可能非标准格式、无声或人声被背景严重淹没,导致 Whisper 输出为空。

请按顺序依次尝试以下步骤:

  1. 检查视频音轨
    用播放器(VLC)打开 50.mp4,确认是否有清晰人声说话。若只有背景音乐或静音,则无法转录。
  2. 移动文件到纯英文短路径
    当前路径含有中文 爱如坟墓,建议将视频复制到 D:\test\50.mp4 后重新选择该文件处理。
  3. 临时切换小模型
    在主界面语音识别模型下拉框中,从 large-v3-turbo 改为 basesmall 再试。若突然成功,说明原模型对该视频存在兼容性问题(可尝试重新下载 large-v3-turbo 模型)。
  4. 开启降噪
    降噪选项在软件主界面 右上角,紧挨着“二次识别”复选框,文字为“降噪”。开启后可滤除背景噪声,提升识别率。
  5. 检查音频提取
    处理完成后,查看软件根目录下 tmp/8040/096d59d432/ 文件夹中生成的 zh-cn.wav 文件大小。若文件是 0KB 或极小,说明 ffmpeg 提取音频失败(多由路径中文引起),请移步步骤 2。

若以上均无效,请提供该视频的时长、编码格式等信息,或尝试用其他工具(如格式工厂)将其转为纯 H.264 + AAC 格式后重试。

想问一个问题。如何停止当前的。继续执行后面的视频文件。目前没有停止单个文件的操作

软件目前不支持在批量处理过程中单独停止某个视频任务并继续执行下一个。如果你需要跳过某个文件,只能停止全部任务后重新选择需要处理的文件。可配置串行处理减少影响:在菜单->工具->高级选项->通用设置中,将“批量翻译视频时每批数量”设为 1,这样一次只处理一个,完成后自动开始下一个。期间若点击“停止”按钮,只会中断当前文件,不会自动开始后续文件。

Post Your Reply
Open source and free maintenance is not easy. If this project is helpful to you, please consider making a small donation to help the project continue to maintain and update. Dondate: ko-fi.com/jianchang512

Related resource