Files
Moltbot/src/media-understanding/providers/openai/audio.ts
2026-02-02 04:58:32 -08:00

71 lines
2.2 KiB
TypeScript

import path from "node:path";
import type { AudioTranscriptionRequest, AudioTranscriptionResult } from "../../types.js";
import { fetchWithTimeoutGuarded, normalizeBaseUrl, readErrorResponse } from "../shared.js";
export const DEFAULT_OPENAI_AUDIO_BASE_URL = "https://api.openai.com/v1";
const DEFAULT_OPENAI_AUDIO_MODEL = "gpt-4o-mini-transcribe";
function resolveModel(model?: string): string {
const trimmed = model?.trim();
return trimmed || DEFAULT_OPENAI_AUDIO_MODEL;
}
export async function transcribeOpenAiCompatibleAudio(
params: AudioTranscriptionRequest,
): Promise<AudioTranscriptionResult> {
const fetchFn = params.fetchFn ?? fetch;
const baseUrl = normalizeBaseUrl(params.baseUrl, DEFAULT_OPENAI_AUDIO_BASE_URL);
const allowPrivate = Boolean(params.baseUrl?.trim());
const url = `${baseUrl}/audio/transcriptions`;
const model = resolveModel(params.model);
const form = new FormData();
const fileName = params.fileName?.trim() || path.basename(params.fileName) || "audio";
const bytes = new Uint8Array(params.buffer);
const blob = new Blob([bytes], {
type: params.mime ?? "application/octet-stream",
});
form.append("file", blob, fileName);
form.append("model", model);
if (params.language?.trim()) {
form.append("language", params.language.trim());
}
if (params.prompt?.trim()) {
form.append("prompt", params.prompt.trim());
}
const headers = new Headers(params.headers);
if (!headers.has("authorization")) {
headers.set("authorization", `Bearer ${params.apiKey}`);
}
const { response: res, release } = await fetchWithTimeoutGuarded(
url,
{
method: "POST",
headers,
body: form,
},
params.timeoutMs,
fetchFn,
allowPrivate ? { ssrfPolicy: { allowPrivateNetwork: true } } : undefined,
);
try {
if (!res.ok) {
const detail = await readErrorResponse(res);
const suffix = detail ? `: ${detail}` : "";
throw new Error(`Audio transcription failed (HTTP ${res.status})${suffix}`);
}
const payload = (await res.json()) as { text?: string };
const text = payload.text?.trim();
if (!text) {
throw new Error("Audio transcription response missing text");
}
return { text, model };
} finally {
await release();
}
}