继续,是不是在这个app.py脚本中写死了?# /// script
requires-python = "==3.12.*"
dependencies = [
"ffmpeg-python>=0.2.0",
"flask>=3.1.2",
"openai>=2.7.2",
"pydub>=0.25.1",
"waitress>=3.0.2",
"whisperx>=3.7.4",
]
[[tool.uv.index]]
url = "https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple"
[[tool.uv.index]]
name = "pytorch-cu128"
url = "https://download.pytorch.org/whl/cu128"
explicit = true
[[tool.uv.index]]
name = "pytorch-cpu"
url = "https://download.pytorch.org/whl/cpu"
explicit = true
[tool.uv.sources]
torch = [
{ index = "pytorch-cu128", marker = "sys_platform == 'win32' or sys_platform == 'linux'" },
{ index = "pytorch-cpu", marker = "sys_platform == 'darwin'" }, # darwin 为 macOS
]
torchaudio = [
{ index = "pytorch-cu128", marker = "sys_platform == 'win32' or sys_platform == 'linux'" },
{ index = "pytorch-cpu", marker = "sys_platform == 'darwin'" },
]
///
impo
......
x.html')
@app.route('/v1/audio/transcriptions', methods=['POST'])
def audio_transcriptions():
if 'file' not in request.files:
return jsonify({"error": "请求中未包含文件部分"}), 400
file = request.files['file']
if file.filename == '':
return jsonify({"error": "未选择任何文件"}), 400
model_id = request.form.get('model', DEFAULT_MODEL)
model_name = 'large-v3' if model_id == 'large-v3-turbo' else model_id
if model_name not in ALLOWED_MODELS:
model_name = DEFAULT_MODEL
language = request.form.get('language') or None
prompt = request.form.get('prompt')
logging.info(f"收到请求: 模型='{model_id}', 语言='{language or '自动检测'}', 提示词='{'有' if prompt else '无'}'")
input_file_path = None
processed_wav_path = None
try:
suffix = os.path.splitext(file.filename)[1]
with tempfile.NamedTemporaryFile(delete=False, suffix=suffix) as tmp:
file.save(tmp.name)
input_file_path = tmp.name
logging.info(f"正在将上传的文件 '{file.filename}' 转换为标准的 16kHz 单声道 WAV 格式...")
with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp_wav:
processed_wav_path = tmp_wav.name
try:
(
ffmpeg
.input(input_file_path)
.output(processed_wav_path, ac=1, ar=16000, acodec='pcm_s16le', vn=None)
.run(capture_stdout=True, capture_stderr=True, overwrite_output=True)
)
logging.info("文件格式转换成功。")
except ffmpeg.Error as e:
error_details = e.stderr.decode('utf-8', errors='ignore')
logging.error(f"FFmpeg 文件转换失败: {error_details}")
return jsonify({"error": f"音频/视频文件处理失败,可能是文件已损坏或格式不受支持。"}), 400
audio = whisperx.load_audio(processed_wav_path)
model = get_whisper_model(model_name)
# ---
# *** FIX IS HERE ***
# ---
transcribe_options = {}
if language:
transcribe_options['language'] = language
if prompt:
# 使用正确的参数名 'prompt'
transcribe_options['prompt'] = prompt
result = model.transcribe(audio, batch_size=BATCH_SIZE, **transcribe_options)
model_a, metadata = whisperx.load_align_model(language_code=result["language"], device=DEVICE)
result = whisperx.align(result["segments"], model_a, metadata, audio, DEVICE, return_char_alignments=False)
diar_model = get_diarize_model()
if diar_model:
try:
diarize_segments = diar_model(audio)
result = whisperx.assign_word_speakers(diarize_segments, result)
except Exception as e:
logging.error(f"说话人分离运行时失败: {e}。将回退到单说话人模式。")
speakers = {segment.get('speaker') for segment in result["segments"] if 'speaker' in segment}
is_single_speaker = len(speakers)