refactor(mana/web): migrate dreams + memoro to /api/v1/voice/transcribe

The per-module /api/v1/memoro/transcribe and /api/v1/dreams/transcribe
endpoints were literal copies that proxied to mana-stt. Now that the
generic /api/v1/voice/transcribe endpoint exists (added with notes),
point both stores at it and delete the duplicates. -200 LOC, one place
to update STT auth or response shape from now on.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Till JS 2026-04-08 16:05:49 +02:00
parent b841a24e73
commit b48c9ff80f
6 changed files with 10 additions and 216 deletions

View file

@ -177,7 +177,7 @@ export const dreamsStore = {
},
/**
* Upload an audio blob to /api/v1/dreams/transcribe and write the result
* Upload an audio blob to /api/v1/voice/transcribe and write the result
* back into the dream. Reset to idle on success, mark failed on error.
*/
async transcribeBlob(dreamId: string, blob: Blob, language?: string): Promise<void> {
@ -191,7 +191,7 @@ export const dreamsStore = {
form.append('file', blob, `dream${ext}`);
if (language) form.append('language', language);
const response = await fetch('/api/v1/dreams/transcribe', {
const response = await fetch('/api/v1/voice/transcribe', {
method: 'POST',
body: form,
});

View file

@ -64,7 +64,7 @@ export const memosStore = {
},
/**
* Upload an audio blob to /api/v1/memoro/transcribe and write the result
* Upload an audio blob to /api/v1/voice/transcribe and write the result
* back into the memo. Marks completed on success, failed on error.
*/
async transcribeBlob(memoId: string, blob: Blob, language?: string): Promise<void> {
@ -78,7 +78,7 @@ export const memosStore = {
form.append('file', blob, `memo${ext}`);
if (language) form.append('language', language);
const response = await fetch('/api/v1/memoro/transcribe', {
const response = await fetch('/api/v1/voice/transcribe', {
method: 'POST',
body: form,
});

View file

@ -1,103 +0,0 @@
/**
* POST /api/v1/dreams/transcribe
*
* Server-side proxy to mana-stt for the Dreams module's voice capture.
* The browser uploads an audio Blob; we forward it to mana-stt with the
* server-held API key and return the transcript JSON.
*
* Request: multipart/form-data with `file` (audio blob) and optional `language`
* Response: { text: string, language?: string, duration_seconds?: number }
*/
import { error, json } from '@sveltejs/kit';
import { env } from '$env/dynamic/private';
import type { RequestHandler } from './$types';
const MAX_BYTES = 25 * 1024 * 1024; // 25 MB
function isAcceptableType(mime: string): boolean {
if (!mime) return true; // tolerate missing type — let upstream validate
if (mime === 'application/octet-stream') return true;
return mime.startsWith('audio/') || mime.startsWith('video/'); // m4a often reports video/mp4
}
export const POST: RequestHandler = async ({ request }) => {
const sttUrl = env.MANA_STT_URL;
const apiKey = env.MANA_STT_API_KEY;
if (!sttUrl) {
throw error(503, 'mana-stt is not configured (MANA_STT_URL missing)');
}
let incoming: FormData;
try {
incoming = await request.formData();
} catch {
throw error(400, 'Expected multipart/form-data with a file field');
}
const file = incoming.get('file');
const language = (incoming.get('language') as string | null) ?? null;
if (!(file instanceof Blob)) {
throw error(400, 'Missing file');
}
if (file.size === 0) {
throw error(400, 'Empty audio');
}
if (file.size > MAX_BYTES) {
throw error(413, `Audio too large (max ${MAX_BYTES / 1024 / 1024} MB)`);
}
if (!isAcceptableType(file.type)) {
throw error(415, `Unsupported audio type: ${file.type}`);
}
// Pick a sensible filename + extension based on the blob mime type
const ext = mimeToExtension(file.type);
const filename = `dream${ext}`;
const upstream = new FormData();
upstream.append('file', file, filename);
if (language) upstream.append('language', language);
const headers: Record<string, string> = { Accept: 'application/json' };
if (apiKey) headers['X-API-Key'] = apiKey;
let response: Response;
try {
response = await fetch(`${sttUrl.replace(/\/$/, '')}/transcribe`, {
method: 'POST',
headers,
body: upstream,
});
} catch (e) {
const msg = e instanceof Error ? e.message : String(e);
throw error(502, `Could not reach mana-stt: ${msg}`);
}
if (!response.ok) {
const text = await response.text();
throw error(response.status, `mana-stt error: ${text || response.statusText}`);
}
const result = (await response.json()) as {
text: string;
language?: string;
duration_seconds?: number;
};
return json({
text: result.text ?? '',
language: result.language ?? null,
durationSeconds: result.duration_seconds ?? null,
});
};
function mimeToExtension(mime: string): string {
if (mime.includes('webm')) return '.webm';
if (mime.includes('ogg')) return '.ogg';
if (mime.includes('mp4') || mime.includes('m4a')) return '.m4a';
if (mime.includes('mpeg')) return '.mp3';
if (mime.includes('wav')) return '.wav';
if (mime.includes('flac')) return '.flac';
return '.webm';
}

View file

@ -1,102 +0,0 @@
/**
* POST /api/v1/memoro/transcribe
*
* Server-side proxy to mana-stt for the Memoro module's voice capture.
* The browser uploads an audio Blob; we forward it to mana-stt with the
* server-held API key and return the transcript JSON.
*
* Request: multipart/form-data with `file` (audio blob) and optional `language`
* Response: { text: string, language: string | null, durationSeconds: number | null }
*/
import { error, json } from '@sveltejs/kit';
import { env } from '$env/dynamic/private';
import type { RequestHandler } from './$types';
const MAX_BYTES = 25 * 1024 * 1024; // 25 MB
function isAcceptableType(mime: string): boolean {
if (!mime) return true; // tolerate missing type — let upstream validate
if (mime === 'application/octet-stream') return true;
return mime.startsWith('audio/') || mime.startsWith('video/'); // m4a often reports video/mp4
}
export const POST: RequestHandler = async ({ request }) => {
const sttUrl = env.MANA_STT_URL;
const apiKey = env.MANA_STT_API_KEY;
if (!sttUrl) {
throw error(503, 'mana-stt is not configured (MANA_STT_URL missing)');
}
let incoming: FormData;
try {
incoming = await request.formData();
} catch {
throw error(400, 'Expected multipart/form-data with a file field');
}
const file = incoming.get('file');
const language = (incoming.get('language') as string | null) ?? null;
if (!(file instanceof Blob)) {
throw error(400, 'Missing file');
}
if (file.size === 0) {
throw error(400, 'Empty audio');
}
if (file.size > MAX_BYTES) {
throw error(413, `Audio too large (max ${MAX_BYTES / 1024 / 1024} MB)`);
}
if (!isAcceptableType(file.type)) {
throw error(415, `Unsupported audio type: ${file.type}`);
}
const ext = mimeToExtension(file.type);
const filename = `memo${ext}`;
const upstream = new FormData();
upstream.append('file', file, filename);
if (language) upstream.append('language', language);
const headers: Record<string, string> = { Accept: 'application/json' };
if (apiKey) headers['X-API-Key'] = apiKey;
let response: Response;
try {
response = await fetch(`${sttUrl.replace(/\/$/, '')}/transcribe`, {
method: 'POST',
headers,
body: upstream,
});
} catch (e) {
const msg = e instanceof Error ? e.message : String(e);
throw error(502, `Could not reach mana-stt: ${msg}`);
}
if (!response.ok) {
const text = await response.text();
throw error(response.status, `mana-stt error: ${text || response.statusText}`);
}
const result = (await response.json()) as {
text: string;
language?: string;
duration_seconds?: number;
};
return json({
text: result.text ?? '',
language: result.language ?? null,
durationSeconds: result.duration_seconds ?? null,
});
};
function mimeToExtension(mime: string): string {
if (mime.includes('webm')) return '.webm';
if (mime.includes('ogg')) return '.ogg';
if (mime.includes('mp4') || mime.includes('m4a')) return '.m4a';
if (mime.includes('mpeg')) return '.mp3';
if (mime.includes('wav')) return '.wav';
if (mime.includes('flac')) return '.flac';
return '.webm';
}

View file

@ -113,10 +113,10 @@ The generator reads `.env.development` and creates app-specific `.env` files wit
### Speech-to-Text (mana-stt)
Used by the unified Mana web app's voice features (Memoro recording, Dreams voice capture, etc).
The browser never talks to mana-stt directly — requests go through the SvelteKit server-side proxy
(`/api/v1/memoro/transcribe`, `/api/v1/dreams/transcribe`) which attaches the API key from
`MANA_STT_API_KEY`. Keep that key out of the browser bundle.
Used by the unified Mana web app's voice features (Memoro recording, Dreams voice capture, Notes
voice memos, Todo voice quick-add, etc). The browser never talks to mana-stt directly — requests
go through the SvelteKit server-side proxy at `/api/v1/voice/transcribe` which attaches the API
key from `MANA_STT_API_KEY`. Keep that key out of the browser bundle.
| Variable | Description | Default |
|----------|-------------|---------|

View file

@ -275,9 +275,8 @@ Other 502 root causes to check, in order of likelihood:
### API key for STT proxy
The unified mana-web container's `/api/v1/memoro/transcribe` and
`/api/v1/dreams/transcribe` proxies need `MANA_STT_API_KEY` to authenticate
against `gpu-stt.mana.how`. The key:
The unified mana-web container's `/api/v1/voice/transcribe` proxy needs
`MANA_STT_API_KEY` to authenticate against `gpu-stt.mana.how`. The key:
- Lives in **Mac Mini `~/projects/mana-monorepo/.env`** (gitignored)
- Is referenced from `docker-compose.macmini.yml` as `${MANA_STT_API_KEY:-}`