mirror of
https://github.com/Memo-2023/mana-monorepo.git
synced 2026-05-14 19:01:08 +02:00
feat(memoro): voice recording → mana-stt transcription pipeline
Adds end-to-end browser voice capture for the Memoro module, mirroring the existing dreams pattern: MediaRecorder → SvelteKit server proxy → mana-stt on the Windows GPU box via Cloudflare tunnel. Recording UI lives in /memoro page header (mic button + live timer + cancel + sticky-permission retry). Server proxy at /api/v1/memoro/transcribe forwards the blob with the server-held X-API-Key. memosStore.createFromVoice creates a placeholder memo with processingStatus='processing' and fires transcribeBlob in the background, which writes the transcript and flips status on completion (or 'failed' with error in metadata). Also corrects the mana-stt hostname across the repo: stt-api.mana.how (which never existed in DNS) → gpu-stt.mana.how (the actual Cloudflare tunnel route to the Windows GPU box). Adds an ENVIRONMENT_VARIABLES.md section explaining how to obtain MANA_STT_API_KEY and where the tunnel terminates. Adds tunnel health probes to the mac-mini health-check script so we catch tunnel-side breakage in addition to LAN-side. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
4d9bf78f41
commit
c5aeaf5e7f
9 changed files with 568 additions and 8 deletions
|
|
@ -288,10 +288,11 @@ CALENDAR_BACKEND_URL=http://localhost:3014
|
|||
CALENDAR_DATABASE_URL=postgresql://mana:devpassword@localhost:5432/mana_platform
|
||||
|
||||
# Speech-to-Text Service (mana-stt)
|
||||
# Production: https://stt-api.mana.how
|
||||
# Local dev: http://localhost:3020
|
||||
STT_URL=https://stt-api.mana.how
|
||||
# API key for mana-stt (set in your local .env, never commit a real key)
|
||||
# Production: https://gpu-stt.mana.how (Cloudflare tunnel → Windows GPU box)
|
||||
# Local dev: http://localhost:3020 (or http://192.168.178.11:3020 from LAN)
|
||||
STT_URL=https://gpu-stt.mana.how
|
||||
# API key for mana-stt — DO NOT COMMIT a real key.
|
||||
# See docs/ENVIRONMENT_VARIABLES.md for where to obtain it.
|
||||
MANA_STT_API_KEY=
|
||||
|
||||
# ============================================
|
||||
|
|
|
|||
245
apps/mana/apps/web/src/lib/modules/memoro/recorder.svelte.ts
Normal file
245
apps/mana/apps/web/src/lib/modules/memoro/recorder.svelte.ts
Normal file
|
|
@ -0,0 +1,245 @@
|
|||
/**
|
||||
* Browser audio recorder for the Memoro voice-capture feature.
|
||||
*
|
||||
* Uses MediaRecorder under the hood. Exposes a small reactive state object
|
||||
* that components can read to render the mic button state and elapsed time.
|
||||
*/
|
||||
|
||||
export type RecorderStatus = 'idle' | 'requesting' | 'recording' | 'stopping';
|
||||
|
||||
export interface RecordingResult {
|
||||
blob: Blob;
|
||||
durationMs: number;
|
||||
mimeType: string;
|
||||
}
|
||||
|
||||
class MemoRecorder {
|
||||
status = $state<RecorderStatus>('idle');
|
||||
error = $state<string | null>(null);
|
||||
elapsedMs = $state(0);
|
||||
|
||||
#mediaRecorder: MediaRecorder | null = null;
|
||||
#stream: MediaStream | null = null;
|
||||
#chunks: Blob[] = [];
|
||||
#startedAt = 0;
|
||||
#tickHandle: ReturnType<typeof setInterval> | null = null;
|
||||
#resolve: ((result: RecordingResult) => void) | null = null;
|
||||
#reject: ((reason: Error) => void) | null = null;
|
||||
|
||||
get isAvailable(): boolean {
|
||||
return (
|
||||
typeof navigator !== 'undefined' &&
|
||||
!!navigator.mediaDevices?.getUserMedia &&
|
||||
typeof MediaRecorder !== 'undefined'
|
||||
);
|
||||
}
|
||||
|
||||
get isSecureContext(): boolean {
|
||||
return typeof window !== 'undefined' && window.isSecureContext === true;
|
||||
}
|
||||
|
||||
async start(options: { force?: boolean } = {}): Promise<void> {
|
||||
if (this.status !== 'idle') return;
|
||||
|
||||
if (!this.isSecureContext) {
|
||||
const host = typeof window !== 'undefined' ? window.location.host : '';
|
||||
this.error = `Mikrofon-Zugriff braucht eine sichere Verbindung. Öffne die App über https:// oder http://localhost statt http://${host}.`;
|
||||
return;
|
||||
}
|
||||
|
||||
if (!this.isAvailable) {
|
||||
this.error = 'Audio-Aufnahme wird in diesem Browser nicht unterstützt.';
|
||||
return;
|
||||
}
|
||||
|
||||
if (!options.force) {
|
||||
const stickyDenied = await this.#checkStickyDeny();
|
||||
if (stickyDenied) {
|
||||
this.error = this.#stickyDenyMessage();
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
this.error = null;
|
||||
this.status = 'requesting';
|
||||
|
||||
try {
|
||||
this.#stream = await navigator.mediaDevices.getUserMedia({
|
||||
audio: {
|
||||
echoCancellation: true,
|
||||
noiseSuppression: true,
|
||||
autoGainControl: true,
|
||||
},
|
||||
});
|
||||
} catch (e) {
|
||||
this.error = this.#explainError(e);
|
||||
this.status = 'idle';
|
||||
return;
|
||||
}
|
||||
|
||||
const mimeType = pickSupportedMimeType();
|
||||
try {
|
||||
this.#mediaRecorder = new MediaRecorder(this.#stream, mimeType ? { mimeType } : {});
|
||||
} catch (e) {
|
||||
const msg = e instanceof Error ? e.message : String(e);
|
||||
this.error = `MediaRecorder konnte nicht gestartet werden: ${msg}`;
|
||||
this.#cleanupStream();
|
||||
this.status = 'idle';
|
||||
return;
|
||||
}
|
||||
|
||||
this.#chunks = [];
|
||||
this.#mediaRecorder.ondataavailable = (event) => {
|
||||
if (event.data && event.data.size > 0) this.#chunks.push(event.data);
|
||||
};
|
||||
this.#mediaRecorder.onerror = (event: Event) => {
|
||||
const err = (event as Event & { error?: Error }).error;
|
||||
this.#failWith(err ?? new Error('MediaRecorder error'));
|
||||
};
|
||||
this.#mediaRecorder.onstop = () => {
|
||||
const durationMs = this.elapsedMs;
|
||||
const type = this.#mediaRecorder?.mimeType || mimeType || 'audio/webm';
|
||||
const blob = new Blob(this.#chunks, { type });
|
||||
this.#cleanupStream();
|
||||
this.#cleanupTimer();
|
||||
this.status = 'idle';
|
||||
this.elapsedMs = 0;
|
||||
const resolve = this.#resolve;
|
||||
this.#resolve = null;
|
||||
this.#reject = null;
|
||||
resolve?.({ blob, durationMs, mimeType: type });
|
||||
};
|
||||
|
||||
this.#startedAt = Date.now();
|
||||
this.elapsedMs = 0;
|
||||
this.#tickHandle = setInterval(() => {
|
||||
this.elapsedMs = Date.now() - this.#startedAt;
|
||||
}, 100);
|
||||
this.#mediaRecorder.start();
|
||||
this.status = 'recording';
|
||||
}
|
||||
|
||||
stop(): Promise<RecordingResult> {
|
||||
if (this.status !== 'recording' || !this.#mediaRecorder) {
|
||||
return Promise.reject(new Error('Not recording'));
|
||||
}
|
||||
this.status = 'stopping';
|
||||
return new Promise<RecordingResult>((resolve, reject) => {
|
||||
this.#resolve = resolve;
|
||||
this.#reject = reject;
|
||||
this.#mediaRecorder?.stop();
|
||||
});
|
||||
}
|
||||
|
||||
cancel(): void {
|
||||
if (this.status === 'idle') return;
|
||||
this.#cleanupStream();
|
||||
this.#cleanupTimer();
|
||||
this.#mediaRecorder = null;
|
||||
this.#chunks = [];
|
||||
this.elapsedMs = 0;
|
||||
this.status = 'idle';
|
||||
const reject = this.#reject;
|
||||
this.#resolve = null;
|
||||
this.#reject = null;
|
||||
reject?.(new Error('cancelled'));
|
||||
}
|
||||
|
||||
#failWith(err: Error) {
|
||||
this.error = err.message;
|
||||
this.#cleanupStream();
|
||||
this.#cleanupTimer();
|
||||
this.status = 'idle';
|
||||
this.elapsedMs = 0;
|
||||
const reject = this.#reject;
|
||||
this.#resolve = null;
|
||||
this.#reject = null;
|
||||
reject?.(err);
|
||||
}
|
||||
|
||||
#stickyDenyMessage(): string {
|
||||
const isMac =
|
||||
typeof navigator !== 'undefined' && /Mac|iPhone|iPad/i.test(navigator.platform || '');
|
||||
if (isMac) {
|
||||
return [
|
||||
'Mikrofon-Zugriff blockiert. Auf macOS hat das fast immer eine von zwei Ursachen:',
|
||||
'1) System-Einstellungen → Datenschutz & Sicherheit → Mikrofon: dein Browser muss in der Liste aktiviert sein. Wenn er fehlt oder deaktiviert ist, schalte ihn ein und starte den Browser komplett neu (Cmd+Q, nicht nur Tab schließen).',
|
||||
'2) Browser-Einstellung: chrome://settings/content/microphone (Chrome) oder about:preferences#privacy (Firefox) → "localhost" darf nicht in der Block-Liste stehen.',
|
||||
'Tipp: Klicke auf "Trotzdem versuchen" um den exakten Browser-Fehler zu sehen.',
|
||||
].join('\n');
|
||||
}
|
||||
return [
|
||||
'Mikrofon-Zugriff blockiert. Mögliche Ursachen:',
|
||||
'1) Browser-Einstellungen → Mikrofon → "localhost" darf nicht blockiert sein.',
|
||||
'2) System-Einstellungen → Datenschutz → Mikrofon → Browser muss erlaubt sein.',
|
||||
'Tipp: Klicke auf "Trotzdem versuchen" um den exakten Browser-Fehler zu sehen.',
|
||||
].join('\n');
|
||||
}
|
||||
|
||||
async #checkStickyDeny(): Promise<boolean> {
|
||||
try {
|
||||
const perms = (
|
||||
navigator as Navigator & {
|
||||
permissions?: {
|
||||
query: (descriptor: { name: string }) => Promise<{ state: string }>;
|
||||
};
|
||||
}
|
||||
).permissions;
|
||||
if (!perms?.query) return false;
|
||||
const status = await perms.query({ name: 'microphone' });
|
||||
return status.state === 'denied';
|
||||
} catch {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
#explainError(e: unknown): string {
|
||||
const err = e instanceof Error ? e : new Error(String(e));
|
||||
const name = err.name || '';
|
||||
const msg = err.message || '';
|
||||
|
||||
if (name === 'NotAllowedError' || /denied|permission/i.test(msg)) {
|
||||
return 'Mikrofon-Zugriff wurde verweigert. Klicke in der Adressleiste auf das Schloss-Symbol und erlaube den Zugriff.';
|
||||
}
|
||||
if (name === 'NotFoundError' || /not.?found|no.?device/i.test(msg)) {
|
||||
return 'Kein Mikrofon gefunden. Schließe ein Mikrofon an oder prüfe deine System-Einstellungen.';
|
||||
}
|
||||
if (name === 'NotReadableError' || /in use|busy/i.test(msg)) {
|
||||
return 'Mikrofon ist gerade von einer anderen Anwendung belegt.';
|
||||
}
|
||||
if (name === 'SecurityError') {
|
||||
return 'Mikrofon-Zugriff vom Browser blockiert (Sicherheitsrichtlinie).';
|
||||
}
|
||||
return `Mikrofon konnte nicht geöffnet werden: ${msg || name || 'Unbekannter Fehler'}`;
|
||||
}
|
||||
|
||||
#cleanupStream() {
|
||||
this.#stream?.getTracks().forEach((t) => t.stop());
|
||||
this.#stream = null;
|
||||
}
|
||||
|
||||
#cleanupTimer() {
|
||||
if (this.#tickHandle !== null) {
|
||||
clearInterval(this.#tickHandle);
|
||||
this.#tickHandle = null;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
function pickSupportedMimeType(): string | null {
|
||||
if (typeof MediaRecorder === 'undefined') return null;
|
||||
const candidates = ['audio/webm;codecs=opus', 'audio/webm', 'audio/ogg;codecs=opus', 'audio/mp4'];
|
||||
for (const c of candidates) {
|
||||
if (MediaRecorder.isTypeSupported(c)) return c;
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
export const memoRecorder = new MemoRecorder();
|
||||
|
||||
export function formatElapsed(ms: number): string {
|
||||
const totalSec = Math.floor(ms / 1000);
|
||||
const min = Math.floor(totalSec / 60);
|
||||
const sec = totalSec % 60;
|
||||
return `${min}:${sec.toString().padStart(2, '0')}`;
|
||||
}
|
||||
|
|
@ -23,14 +23,16 @@ export const memosStore = {
|
|||
transcript?: string;
|
||||
language?: string;
|
||||
blueprintId?: string;
|
||||
audioDurationMs?: number;
|
||||
processingStatus?: LocalMemo['processingStatus'];
|
||||
}) {
|
||||
const newLocal: LocalMemo = {
|
||||
id: crypto.randomUUID(),
|
||||
title: data.title ?? null,
|
||||
intro: null,
|
||||
transcript: data.transcript ?? null,
|
||||
audioDurationMs: null,
|
||||
processingStatus: data.transcript ? 'completed' : 'pending',
|
||||
audioDurationMs: data.audioDurationMs ?? null,
|
||||
processingStatus: data.processingStatus ?? (data.transcript ? 'completed' : 'pending'),
|
||||
isArchived: false,
|
||||
isPinned: false,
|
||||
isPublic: false,
|
||||
|
|
@ -42,6 +44,73 @@ export const memosStore = {
|
|||
return toMemo(newLocal);
|
||||
},
|
||||
|
||||
/**
|
||||
* Create a placeholder memo from a fresh voice recording and start the
|
||||
* background transcription. Returns the new memo immediately so the UI
|
||||
* can navigate / show a "processing" state without waiting.
|
||||
*/
|
||||
async createFromVoice(blob: Blob, durationMs: number, language?: string) {
|
||||
const memo = await this.create({
|
||||
audioDurationMs: durationMs,
|
||||
language,
|
||||
processingStatus: 'processing',
|
||||
});
|
||||
// Fire and forget — transcription updates the memo when it returns.
|
||||
void this.transcribeBlob(memo.id, blob, language);
|
||||
return memo;
|
||||
},
|
||||
|
||||
/**
|
||||
* Upload an audio blob to /api/v1/memoro/transcribe and write the result
|
||||
* back into the memo. Marks completed on success, failed on error.
|
||||
*/
|
||||
async transcribeBlob(memoId: string, blob: Blob, language?: string): Promise<void> {
|
||||
try {
|
||||
const form = new FormData();
|
||||
const ext = blob.type.includes('webm')
|
||||
? '.webm'
|
||||
: blob.type.includes('mp4')
|
||||
? '.m4a'
|
||||
: '.audio';
|
||||
form.append('file', blob, `memo${ext}`);
|
||||
if (language) form.append('language', language);
|
||||
|
||||
const response = await fetch('/api/v1/memoro/transcribe', {
|
||||
method: 'POST',
|
||||
body: form,
|
||||
});
|
||||
|
||||
if (!response.ok) {
|
||||
const text = await response.text();
|
||||
throw new Error(text || `HTTP ${response.status}`);
|
||||
}
|
||||
|
||||
const result = (await response.json()) as {
|
||||
text: string;
|
||||
language: string | null;
|
||||
durationSeconds: number | null;
|
||||
};
|
||||
|
||||
const transcript = (result.text ?? '').trim();
|
||||
const existing = await memoTable.get(memoId);
|
||||
if (!existing) return;
|
||||
|
||||
await memoTable.update(memoId, {
|
||||
transcript,
|
||||
language: existing.language ?? result.language ?? null,
|
||||
processingStatus: 'completed',
|
||||
updatedAt: new Date().toISOString(),
|
||||
});
|
||||
} catch (e) {
|
||||
const msg = e instanceof Error ? e.message : String(e);
|
||||
await memoTable.update(memoId, {
|
||||
processingStatus: 'failed',
|
||||
metadata: { ...(((await memoTable.get(memoId))?.metadata as object) ?? {}), error: msg },
|
||||
updatedAt: new Date().toISOString(),
|
||||
});
|
||||
}
|
||||
},
|
||||
|
||||
/** Update a memo's fields. */
|
||||
async update(
|
||||
id: string,
|
||||
|
|
|
|||
|
|
@ -275,7 +275,7 @@
|
|||
<div>
|
||||
<p class="text-sm font-medium mb-2">Speech-to-Text (STT)</p>
|
||||
<pre class="bg-muted p-3 rounded-lg text-sm overflow-x-auto"><code
|
||||
>curl -X POST https://stt-api.mana.how/transcribe \
|
||||
>curl -X POST https://gpu-stt.mana.how/transcribe \
|
||||
-H "X-API-Key: sk_live_your_key_here" \
|
||||
-F "audio=@audio.mp3"</code
|
||||
></pre>
|
||||
|
|
|
|||
|
|
@ -2,6 +2,7 @@
|
|||
import { goto } from '$app/navigation';
|
||||
import { getContext } from 'svelte';
|
||||
import { memosStore } from '$lib/modules/memoro/stores/memos.svelte';
|
||||
import { memoRecorder, formatElapsed } from '$lib/modules/memoro/recorder.svelte';
|
||||
import {
|
||||
filterBySearch,
|
||||
filterByTag,
|
||||
|
|
@ -43,6 +44,44 @@
|
|||
goto(`/memoro/${memo.id}`);
|
||||
}
|
||||
|
||||
// ── Voice capture ─────────────────────────────────────────
|
||||
let recError = $state<string | null>(null);
|
||||
|
||||
async function handleMicClick() {
|
||||
recError = null;
|
||||
if (memoRecorder.status === 'recording') {
|
||||
try {
|
||||
const result = await memoRecorder.stop();
|
||||
if (result.durationMs < 500) {
|
||||
recError = 'Aufnahme war zu kurz.';
|
||||
return;
|
||||
}
|
||||
const memo = await memosStore.createFromVoice(result.blob, result.durationMs, 'de');
|
||||
goto(`/memoro/${memo.id}`);
|
||||
} catch (e) {
|
||||
const msg = e instanceof Error ? e.message : String(e);
|
||||
if (msg !== 'cancelled') recError = msg;
|
||||
}
|
||||
} else if (memoRecorder.status === 'idle') {
|
||||
await memoRecorder.start();
|
||||
if (memoRecorder.error) {
|
||||
recError = memoRecorder.error;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
async function forceRetryMic() {
|
||||
recError = null;
|
||||
await memoRecorder.start({ force: true });
|
||||
if (memoRecorder.error) {
|
||||
recError = memoRecorder.error;
|
||||
}
|
||||
}
|
||||
|
||||
function cancelRecording() {
|
||||
memoRecorder.cancel();
|
||||
}
|
||||
|
||||
async function handlePin(e: Event, id: string, isPinned: boolean) {
|
||||
e.stopPropagation();
|
||||
if (isPinned) {
|
||||
|
|
@ -94,6 +133,41 @@
|
|||
<TagIcon size={16} />
|
||||
Tags
|
||||
</a>
|
||||
<button
|
||||
onclick={handleMicClick}
|
||||
disabled={memoRecorder.status === 'requesting' || memoRecorder.status === 'stopping'}
|
||||
aria-label={memoRecorder.status === 'recording' ? 'Aufnahme beenden' : 'Aufnahme starten'}
|
||||
class="flex items-center gap-2 rounded-lg px-4 py-2 text-sm font-medium transition-colors disabled:opacity-60"
|
||||
class:recording={memoRecorder.status === 'recording'}
|
||||
style:background-color={memoRecorder.status === 'recording'
|
||||
? '#ef4444'
|
||||
: 'hsl(var(--muted))'}
|
||||
style:color={memoRecorder.status === 'recording' ? 'white' : 'hsl(var(--foreground))'}
|
||||
>
|
||||
{#if memoRecorder.status === 'recording'}
|
||||
<span class="rec-dot"></span>
|
||||
{formatElapsed(memoRecorder.elapsedMs)}
|
||||
{:else if memoRecorder.status === 'requesting'}
|
||||
<Microphone size={18} />
|
||||
Mikro öffnen…
|
||||
{:else if memoRecorder.status === 'stopping'}
|
||||
<Microphone size={18} />
|
||||
Verarbeite…
|
||||
{:else}
|
||||
<Microphone size={18} />
|
||||
Aufnehmen
|
||||
{/if}
|
||||
</button>
|
||||
{#if memoRecorder.status === 'recording'}
|
||||
<button
|
||||
onclick={cancelRecording}
|
||||
title="Aufnahme verwerfen"
|
||||
aria-label="Aufnahme verwerfen"
|
||||
class="rounded-lg border border-[hsl(var(--border))] px-3 py-2 text-sm text-[hsl(var(--muted-foreground))] hover:bg-[hsl(var(--muted))]"
|
||||
>
|
||||
×
|
||||
</button>
|
||||
{/if}
|
||||
<button
|
||||
onclick={handleNewMemo}
|
||||
class="flex items-center gap-2 rounded-lg bg-[hsl(var(--primary))] px-4 py-2 text-sm font-medium text-[hsl(var(--primary-foreground))] transition-colors hover:opacity-90"
|
||||
|
|
@ -104,6 +178,17 @@
|
|||
</div>
|
||||
</div>
|
||||
|
||||
{#if recError}
|
||||
<div
|
||||
class="rounded-lg border border-red-500/30 bg-red-500/10 p-3 text-sm text-red-600 dark:text-red-300"
|
||||
>
|
||||
<p class="whitespace-pre-line">{recError}</p>
|
||||
<button onclick={forceRetryMic} class="mt-2 text-xs font-medium underline hover:no-underline">
|
||||
Trotzdem versuchen
|
||||
</button>
|
||||
</div>
|
||||
{/if}
|
||||
|
||||
<!-- Search -->
|
||||
<div class="relative">
|
||||
<MagnifyingGlass
|
||||
|
|
@ -260,3 +345,23 @@
|
|||
</a>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<style>
|
||||
.rec-dot {
|
||||
display: inline-block;
|
||||
width: 0.625rem;
|
||||
height: 0.625rem;
|
||||
border-radius: 9999px;
|
||||
background: white;
|
||||
animation: pulse 1s ease-in-out infinite;
|
||||
}
|
||||
@keyframes pulse {
|
||||
0%,
|
||||
100% {
|
||||
opacity: 1;
|
||||
}
|
||||
50% {
|
||||
opacity: 0.4;
|
||||
}
|
||||
}
|
||||
</style>
|
||||
|
|
|
|||
|
|
@ -0,0 +1,102 @@
|
|||
/**
|
||||
* POST /api/v1/memoro/transcribe
|
||||
*
|
||||
* Server-side proxy to mana-stt for the Memoro module's voice capture.
|
||||
* The browser uploads an audio Blob; we forward it to mana-stt with the
|
||||
* server-held API key and return the transcript JSON.
|
||||
*
|
||||
* Request: multipart/form-data with `file` (audio blob) and optional `language`
|
||||
* Response: { text: string, language: string | null, durationSeconds: number | null }
|
||||
*/
|
||||
|
||||
import { error, json } from '@sveltejs/kit';
|
||||
import { env } from '$env/dynamic/private';
|
||||
import type { RequestHandler } from './$types';
|
||||
|
||||
const MAX_BYTES = 25 * 1024 * 1024; // 25 MB
|
||||
|
||||
function isAcceptableType(mime: string): boolean {
|
||||
if (!mime) return true; // tolerate missing type — let upstream validate
|
||||
if (mime === 'application/octet-stream') return true;
|
||||
return mime.startsWith('audio/') || mime.startsWith('video/'); // m4a often reports video/mp4
|
||||
}
|
||||
|
||||
export const POST: RequestHandler = async ({ request }) => {
|
||||
const sttUrl = env.MANA_STT_URL;
|
||||
const apiKey = env.MANA_STT_API_KEY;
|
||||
|
||||
if (!sttUrl) {
|
||||
throw error(503, 'mana-stt is not configured (MANA_STT_URL missing)');
|
||||
}
|
||||
|
||||
let incoming: FormData;
|
||||
try {
|
||||
incoming = await request.formData();
|
||||
} catch {
|
||||
throw error(400, 'Expected multipart/form-data with a file field');
|
||||
}
|
||||
const file = incoming.get('file');
|
||||
const language = (incoming.get('language') as string | null) ?? null;
|
||||
|
||||
if (!(file instanceof Blob)) {
|
||||
throw error(400, 'Missing file');
|
||||
}
|
||||
if (file.size === 0) {
|
||||
throw error(400, 'Empty audio');
|
||||
}
|
||||
if (file.size > MAX_BYTES) {
|
||||
throw error(413, `Audio too large (max ${MAX_BYTES / 1024 / 1024} MB)`);
|
||||
}
|
||||
if (!isAcceptableType(file.type)) {
|
||||
throw error(415, `Unsupported audio type: ${file.type}`);
|
||||
}
|
||||
|
||||
const ext = mimeToExtension(file.type);
|
||||
const filename = `memo${ext}`;
|
||||
|
||||
const upstream = new FormData();
|
||||
upstream.append('file', file, filename);
|
||||
if (language) upstream.append('language', language);
|
||||
|
||||
const headers: Record<string, string> = { Accept: 'application/json' };
|
||||
if (apiKey) headers['X-API-Key'] = apiKey;
|
||||
|
||||
let response: Response;
|
||||
try {
|
||||
response = await fetch(`${sttUrl.replace(/\/$/, '')}/transcribe`, {
|
||||
method: 'POST',
|
||||
headers,
|
||||
body: upstream,
|
||||
});
|
||||
} catch (e) {
|
||||
const msg = e instanceof Error ? e.message : String(e);
|
||||
throw error(502, `Could not reach mana-stt: ${msg}`);
|
||||
}
|
||||
|
||||
if (!response.ok) {
|
||||
const text = await response.text();
|
||||
throw error(response.status, `mana-stt error: ${text || response.statusText}`);
|
||||
}
|
||||
|
||||
const result = (await response.json()) as {
|
||||
text: string;
|
||||
language?: string;
|
||||
duration_seconds?: number;
|
||||
};
|
||||
|
||||
return json({
|
||||
text: result.text ?? '',
|
||||
language: result.language ?? null,
|
||||
durationSeconds: result.duration_seconds ?? null,
|
||||
});
|
||||
};
|
||||
|
||||
function mimeToExtension(mime: string): string {
|
||||
if (mime.includes('webm')) return '.webm';
|
||||
if (mime.includes('ogg')) return '.ogg';
|
||||
if (mime.includes('mp4') || mime.includes('m4a')) return '.m4a';
|
||||
if (mime.includes('mpeg')) return '.mp3';
|
||||
if (mime.includes('wav')) return '.wav';
|
||||
if (mime.includes('flac')) return '.flac';
|
||||
return '.webm';
|
||||
}
|
||||
|
|
@ -111,6 +111,34 @@ The generator reads `.env.development` and creates app-specific `.env` files wit
|
|||
| `CARDS_SUPABASE_URL` | Supabase project URL | - |
|
||||
| `CARDS_SUPABASE_ANON_KEY` | Supabase anonymous key | - |
|
||||
|
||||
### Speech-to-Text (mana-stt)
|
||||
|
||||
Used by the unified Mana web app's voice features (Memoro recording, Dreams voice capture, etc).
|
||||
The browser never talks to mana-stt directly — requests go through the SvelteKit server-side proxy
|
||||
(`/api/v1/memoro/transcribe`, `/api/v1/dreams/transcribe`) which attaches the API key from
|
||||
`MANA_STT_API_KEY`. Keep that key out of the browser bundle.
|
||||
|
||||
| Variable | Description | Default |
|
||||
|----------|-------------|---------|
|
||||
| `STT_URL` | Public mana-stt URL — generates `MANA_STT_URL` for the web app | `https://gpu-stt.mana.how` |
|
||||
| `MANA_STT_API_KEY` | API key for mana-stt. **Never commit a real value.** | _(empty)_ |
|
||||
|
||||
**Where to obtain a key:**
|
||||
|
||||
- Production deploy: set `MANA_STT_API_KEY` in the Mac Mini's `.env` (sourced by
|
||||
`docker-compose.macmini.yml`, line ~1076). The key lives on the Windows GPU box in
|
||||
`services/mana-stt/.env` under `API_KEYS=<key>:<name>` and is the source of truth.
|
||||
- Local dev: paste the dev key into your local `apps/mana/apps/web/.env` after running
|
||||
`pnpm setup:env` (the generator only writes an empty placeholder). Ask in `#mana-dev` or
|
||||
pull from the team's password manager under `mana-stt → web-key`.
|
||||
- New dev key: SSH to the Windows GPU box (`ssh mana-gpu`), append a new entry to
|
||||
`C:\mana\services\mana-stt\.env` `API_KEYS` (format: `<random>:<name>`), restart the
|
||||
`ManaSTT` scheduled task. Use a fresh key per consumer (`mana-web`, `chat-server`, etc.)
|
||||
so we can revoke individually.
|
||||
|
||||
**Endpoint:** `https://gpu-stt.mana.how` — Cloudflare Tunnel `mana-gpu-server` →
|
||||
Windows GPU box (`192.168.178.11:3020`). Health: `curl https://gpu-stt.mana.how/health`.
|
||||
|
||||
## Adding New Variables
|
||||
|
||||
### Step 1: Add to `.env.development`
|
||||
|
|
|
|||
|
|
@ -266,6 +266,16 @@ check_service "GPU TTS" "http://192.168.178.11:3022/health" 3
|
|||
check_service "GPU Image Gen" "http://192.168.178.11:3023/health" 3
|
||||
check_service "GPU Video Gen" "http://192.168.178.11:3026/health" 3
|
||||
|
||||
echo ""
|
||||
echo "GPU Server (Cloudflare Tunnel):"
|
||||
# These probes go through the public Cloudflare tunnel rather than the LAN.
|
||||
# They catch tunnel-side breakage (cloudflared down on Windows, DNS misroute,
|
||||
# Public Hostname missing) that LAN probes above wouldn't see.
|
||||
check_service "GPU STT (tunnel)" "https://gpu-stt.mana.how/health" 8
|
||||
check_service "GPU LLM (tunnel)" "https://gpu-llm.mana.how/health" 8
|
||||
check_service "GPU TTS (tunnel)" "https://gpu-tts.mana.how/health" 8
|
||||
check_service "GPU Image Gen (tunnel)" "https://gpu-img.mana.how/health" 8
|
||||
|
||||
echo ""
|
||||
echo "Matrix:"
|
||||
check_service "Synapse" "http://localhost:4000/health"
|
||||
|
|
|
|||
|
|
@ -176,7 +176,7 @@ const { text } = await response.json();
|
|||
const formData = new FormData();
|
||||
formData.append('file', audioBlob, 'recording.webm');
|
||||
|
||||
const response = await fetch('https://stt-api.mana.how/transcribe', {
|
||||
const response = await fetch('https://gpu-stt.mana.how/transcribe', {
|
||||
method: 'POST',
|
||||
body: formData,
|
||||
});
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue