feat(dreams): voice capture via mana-stt

Adds a one-tap voice recorder at the top of the Dreams module. Speak
your dream right after waking, the audio is sent through a server-side
proxy to mana-stt, and the transcript appears in the entry as soon as
it lands.

- New /api/v1/dreams/transcribe SvelteKit server route proxies the
  upload to mana-stt with the server-held MANA_STT_API_KEY (never
  exposed to the browser); validates mime, size, missing config
- Adds MANA_STT_URL + MANA_STT_API_KEY to the mana-web env config in
  generate-env.mjs (private, not PUBLIC_ prefixed)
- New DreamRecorder class wraps MediaRecorder with reactive
  $state — status, elapsed timer, error; supports cancel
- dreamsStore.createFromVoice creates a placeholder dream with
  processingStatus='transcribing' and kicks off the upload
- dreamsStore.transcribeBlob uploads, writes the result back into
  the dream, falls back to processingStatus='failed' on errors
- Adds processingStatus + processingError + audioDurationMs to
  LocalDream; backwards-compatible defaults in toDream
- Mic button in ListView with idle / requesting / recording
  (with elapsed timer + pulsing red) / stopping states
- Cancel button discards the in-flight recording
- Transcribing badge ●●● + failed ! badge on dream rows
- Inline editor shows live transcription status; while it's running
  and the user hasn't typed anything, the transcript folds into the
  edit buffer as soon as it arrives

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Till JS 2026-04-07 14:39:11 +02:00
parent 836c9692c5
commit 578c9f3397
9 changed files with 632 additions and 1 deletions

View file

@ -282,6 +282,8 @@ CALENDAR_DATABASE_URL=postgresql://mana:devpassword@localhost:5432/mana_platform
# Production: https://stt-api.mana.how
# Local dev: http://localhost:3020
STT_URL=https://stt-api.mana.how
# API key for mana-stt (set in your local .env, never commit a real key)
MANA_STT_API_KEY=
# ============================================
# CONTEXT PROJECT

View file

@ -11,6 +11,7 @@
useAllDreams,
} from './queries';
import { dreamsStore } from './stores/dreams.svelte';
import { dreamRecorder, formatElapsed } from './recorder.svelte';
import { MOOD_COLORS, MOOD_LABELS, type Dream, type DreamMood, type SleepQuality } from './types';
import type { ViewProps } from '$lib/app-registry';
import { ContextMenu, type ContextMenuItem } from '@mana/shared-ui';
@ -59,6 +60,18 @@
symbolFilter ? filteredByMode.filter((d) => d.symbols?.includes(symbolFilter!)) : filteredByMode
);
let filtered = $derived(searchDreams(filteredBySymbol, searchQuery));
// While the inline editor is open, the `dreams` array updates whenever the
// transcript lands. If the user hasn't typed anything yet, fold the fresh
// content into the edit buffer so they see the transcription appear inline.
$effect(() => {
if (!editingId) return;
const live = dreams.find((d) => d.id === editingId);
if (!live) return;
if (!editContent.trim() && live.content.trim()) {
editContent = live.content;
}
});
let grouped = $derived(groupByMonth(filtered));
let insights = $derived(computeInsights(dreams));
@ -166,6 +179,38 @@
);
const MOODS: DreamMood[] = ['angenehm', 'neutral', 'unangenehm', 'albtraum'];
// ── Voice capture ─────────────────────────────────────────
let recError = $state<string | null>(null);
async function handleMicClick() {
recError = null;
if (dreamRecorder.status === 'recording') {
try {
const result = await dreamRecorder.stop();
if (result.durationMs < 500) {
recError = 'Aufnahme war zu kurz.';
return;
}
const dream = await dreamsStore.createFromVoice(result.blob, result.durationMs, 'de');
// Open the dream so the user sees the transcript appear inline
viewMode = 'list';
startEdit(dream);
} catch (e) {
const msg = e instanceof Error ? e.message : String(e);
if (msg !== 'cancelled') recError = msg;
}
} else if (dreamRecorder.status === 'idle') {
await dreamRecorder.start();
if (dreamRecorder.error) {
recError = dreamRecorder.error;
}
}
}
function cancelRecording() {
dreamRecorder.cancel();
}
</script>
<div class="app-view">
@ -191,6 +236,38 @@
}}
/>
{:else}
<!-- Voice capture -->
<div class="capture-row">
<button
class="mic-btn"
class:recording={dreamRecorder.status === 'recording'}
class:busy={dreamRecorder.status === 'requesting' || dreamRecorder.status === 'stopping'}
onclick={handleMicClick}
disabled={dreamRecorder.status === 'requesting' || dreamRecorder.status === 'stopping'}
aria-label={dreamRecorder.status === 'recording' ? 'Aufnahme beenden' : 'Aufnahme starten'}
>
{#if dreamRecorder.status === 'recording'}
<span class="mic-stop"></span>
<span class="mic-time">{formatElapsed(dreamRecorder.elapsedMs)}</span>
{:else if dreamRecorder.status === 'requesting'}
<span class="mic-icon"></span>
<span class="mic-time">Mikro öffnen…</span>
{:else if dreamRecorder.status === 'stopping'}
<span class="mic-icon"></span>
<span class="mic-time">Verarbeite…</span>
{:else}
<span class="mic-icon">&#x1f3a4;</span>
<span class="mic-time">Traum sprechen</span>
{/if}
</button>
{#if dreamRecorder.status === 'recording'}
<button class="mic-cancel" onclick={cancelRecording} title="Aufnahme verwerfen"> × </button>
{/if}
</div>
{#if recError}
<p class="rec-error">{recError}</p>
{/if}
<!-- Quick create -->
<form onsubmit={(e) => e.preventDefault()} class="quick-add">
<span class="add-icon">&#x1f319;</span>
@ -289,6 +366,18 @@
placeholder="Titel (optional)..."
autofocus
/>
{#if dream.processingStatus === 'transcribing'}
<div class="ed-status">
<span class="ed-status-dots">●●●</span>
Transkribiert deine Aufnahme…
</div>
{:else if dream.processingStatus === 'failed'}
<div class="ed-status failed">
Transkription fehlgeschlagen{dream.processingError
? `: ${dream.processingError}`
: ''}
</div>
{/if}
<textarea
class="ed-content"
bind:value={editContent}
@ -392,6 +481,11 @@
<div class="dream-content">
<div class="dream-top">
<span class="dream-title">{dream.title || 'Traum ohne Titel'}</span>
{#if dream.processingStatus === 'transcribing'}
<span class="badge transcribing" title="Wird transkribiert…">●●●</span>
{:else if dream.processingStatus === 'failed'}
<span class="badge failed" title={dream.processingError ?? 'Fehler'}>!</span>
{/if}
{#if dream.isLucid}<span class="badge lucid">&#x2728;</span>{/if}
{#if dream.isRecurring}<span class="badge">&#x21bb;</span>{/if}
{#if dream.isPinned}<span class="badge">&#x1f4cc;</span>{/if}
@ -454,6 +548,93 @@
height: 100%;
}
/* ── Voice capture ─────────────────────────── */
.capture-row {
display: flex;
align-items: center;
gap: 0.375rem;
}
.mic-btn {
flex: 1;
display: flex;
align-items: center;
justify-content: center;
gap: 0.5rem;
padding: 0.625rem 0.875rem;
border-radius: 0.5rem;
border: 1px solid rgba(99, 102, 241, 0.2);
background: rgba(99, 102, 241, 0.04);
color: #6366f1;
font-size: 0.8125rem;
font-weight: 500;
cursor: pointer;
transition: all 0.15s;
}
.mic-btn:hover:not(:disabled) {
background: rgba(99, 102, 241, 0.08);
border-color: #6366f1;
}
.mic-btn:disabled {
opacity: 0.6;
cursor: wait;
}
.mic-btn.recording {
background: rgba(239, 68, 68, 0.08);
border-color: rgba(239, 68, 68, 0.4);
color: #ef4444;
animation: rec-pulse 1.5s ease-in-out infinite;
}
@keyframes rec-pulse {
0%,
100% {
background: rgba(239, 68, 68, 0.08);
}
50% {
background: rgba(239, 68, 68, 0.16);
}
}
.mic-icon {
font-size: 1rem;
}
.mic-stop {
display: inline-block;
width: 10px;
height: 10px;
background: #ef4444;
border-radius: 2px;
}
.mic-time {
font-variant-numeric: tabular-nums;
}
.mic-cancel {
width: 32px;
height: 32px;
border-radius: 0.375rem;
border: 1px solid rgba(0, 0, 0, 0.08);
background: transparent;
color: #9ca3af;
font-size: 1.125rem;
line-height: 1;
cursor: pointer;
}
.mic-cancel:hover {
color: #ef4444;
border-color: #ef4444;
}
:global(.dark) .mic-cancel {
border-color: rgba(255, 255, 255, 0.1);
}
.rec-error {
font-size: 0.6875rem;
color: #ef4444;
margin: 0;
padding: 0 0.25rem;
}
/* ── View Tabs ─────────────────────────────── */
.view-tabs {
display: flex;
@ -683,6 +864,33 @@
.badge {
font-size: 0.625rem;
}
.badge.transcribing {
color: #6366f1;
font-size: 0.5rem;
letter-spacing: 0.0625rem;
animation: dots-pulse 1.4s ease-in-out infinite;
}
@keyframes dots-pulse {
0%,
100% {
opacity: 0.4;
}
50% {
opacity: 1;
}
}
.badge.failed {
display: inline-flex;
align-items: center;
justify-content: center;
width: 13px;
height: 13px;
border-radius: 9999px;
background: rgba(239, 68, 68, 0.15);
color: #ef4444;
font-size: 0.5625rem;
font-weight: 700;
}
.dream-preview {
font-size: 0.6875rem;
@ -762,6 +970,26 @@
color: #f3f4f6;
}
.ed-status {
display: flex;
align-items: center;
gap: 0.375rem;
padding: 0.25rem 0.5rem;
border-radius: 0.25rem;
background: rgba(99, 102, 241, 0.06);
color: #6366f1;
font-size: 0.6875rem;
}
.ed-status.failed {
background: rgba(239, 68, 68, 0.06);
color: #ef4444;
}
.ed-status-dots {
font-size: 0.5rem;
letter-spacing: 0.0625rem;
animation: dots-pulse 1.4s ease-in-out infinite;
}
.ed-content {
width: 100%;
background: transparent;

View file

@ -36,7 +36,10 @@ export const DREAMS_GUEST_SEED = {
emotions: ['Ruhe', 'Neugier'],
symbols: [],
audioPath: null,
audioDurationMs: null,
transcript: null,
processingStatus: 'idle',
processingError: null,
interpretation: null,
aiInterpretation: null,
isPrivate: false,
@ -61,7 +64,10 @@ export const DREAMS_GUEST_SEED = {
emotions: ['Freiheit', 'Staunen'],
symbols: ['Fliegen', 'Licht'],
audioPath: null,
audioDurationMs: null,
transcript: null,
processingStatus: 'idle',
processingError: null,
interpretation: 'Gefühl von Kontrolle und Leichtigkeit nach einer entspannten Woche.',
aiInterpretation: null,
isPrivate: false,

View file

@ -26,7 +26,10 @@ export function toDream(local: LocalDream): Dream {
emotions: local.emotions ?? [],
symbols: local.symbols ?? [],
audioPath: local.audioPath,
audioDurationMs: local.audioDurationMs ?? null,
transcript: local.transcript,
processingStatus: local.processingStatus ?? 'idle',
processingError: local.processingError ?? null,
interpretation: local.interpretation,
aiInterpretation: local.aiInterpretation,
isPrivate: local.isPrivate,

View file

@ -0,0 +1,173 @@
/**
* Browser audio recorder for the Dreams voice-capture feature.
*
* Uses MediaRecorder under the hood. Exposes a small reactive state object
* that components can read to render the mic button state and elapsed time.
*/
export type RecorderStatus = 'idle' | 'requesting' | 'recording' | 'stopping';
export interface RecordingResult {
blob: Blob;
durationMs: number;
mimeType: string;
}
class DreamRecorder {
status = $state<RecorderStatus>('idle');
error = $state<string | null>(null);
elapsedMs = $state(0);
#mediaRecorder: MediaRecorder | null = null;
#stream: MediaStream | null = null;
#chunks: Blob[] = [];
#startedAt = 0;
#tickHandle: ReturnType<typeof setInterval> | null = null;
#resolve: ((result: RecordingResult) => void) | null = null;
#reject: ((reason: Error) => void) | null = null;
get isAvailable(): boolean {
return (
typeof navigator !== 'undefined' &&
!!navigator.mediaDevices?.getUserMedia &&
typeof MediaRecorder !== 'undefined'
);
}
async start(): Promise<void> {
if (this.status !== 'idle') return;
if (!this.isAvailable) {
this.error = 'Audio-Aufnahme wird in diesem Browser nicht unterstützt.';
return;
}
this.error = null;
this.status = 'requesting';
try {
this.#stream = await navigator.mediaDevices.getUserMedia({
audio: {
echoCancellation: true,
noiseSuppression: true,
autoGainControl: true,
},
});
} catch (e) {
const msg = e instanceof Error ? e.message : String(e);
this.error = msg.includes('Permission')
? 'Mikrofon-Zugriff wurde verweigert.'
: `Mikrofon konnte nicht geöffnet werden: ${msg}`;
this.status = 'idle';
return;
}
const mimeType = pickSupportedMimeType();
try {
this.#mediaRecorder = new MediaRecorder(this.#stream, mimeType ? { mimeType } : {});
} catch (e) {
const msg = e instanceof Error ? e.message : String(e);
this.error = `MediaRecorder konnte nicht gestartet werden: ${msg}`;
this.#cleanupStream();
this.status = 'idle';
return;
}
this.#chunks = [];
this.#mediaRecorder.ondataavailable = (event) => {
if (event.data && event.data.size > 0) this.#chunks.push(event.data);
};
this.#mediaRecorder.onerror = (event: Event) => {
const err = (event as Event & { error?: Error }).error;
this.#failWith(err ?? new Error('MediaRecorder error'));
};
this.#mediaRecorder.onstop = () => {
const durationMs = this.elapsedMs;
const type = this.#mediaRecorder?.mimeType || mimeType || 'audio/webm';
const blob = new Blob(this.#chunks, { type });
this.#cleanupStream();
this.#cleanupTimer();
this.status = 'idle';
this.elapsedMs = 0;
const resolve = this.#resolve;
this.#resolve = null;
this.#reject = null;
resolve?.({ blob, durationMs, mimeType: type });
};
this.#startedAt = Date.now();
this.elapsedMs = 0;
this.#tickHandle = setInterval(() => {
this.elapsedMs = Date.now() - this.#startedAt;
}, 100);
this.#mediaRecorder.start();
this.status = 'recording';
}
stop(): Promise<RecordingResult> {
if (this.status !== 'recording' || !this.#mediaRecorder) {
return Promise.reject(new Error('Not recording'));
}
this.status = 'stopping';
return new Promise<RecordingResult>((resolve, reject) => {
this.#resolve = resolve;
this.#reject = reject;
this.#mediaRecorder?.stop();
});
}
cancel(): void {
if (this.status === 'idle') return;
this.#cleanupStream();
this.#cleanupTimer();
this.#mediaRecorder = null;
this.#chunks = [];
this.elapsedMs = 0;
this.status = 'idle';
const reject = this.#reject;
this.#resolve = null;
this.#reject = null;
reject?.(new Error('cancelled'));
}
#failWith(err: Error) {
this.error = err.message;
this.#cleanupStream();
this.#cleanupTimer();
this.status = 'idle';
this.elapsedMs = 0;
const reject = this.#reject;
this.#resolve = null;
this.#reject = null;
reject?.(err);
}
#cleanupStream() {
this.#stream?.getTracks().forEach((t) => t.stop());
this.#stream = null;
}
#cleanupTimer() {
if (this.#tickHandle !== null) {
clearInterval(this.#tickHandle);
this.#tickHandle = null;
}
}
}
function pickSupportedMimeType(): string | null {
if (typeof MediaRecorder === 'undefined') return null;
const candidates = ['audio/webm;codecs=opus', 'audio/webm', 'audio/ogg;codecs=opus', 'audio/mp4'];
for (const c of candidates) {
if (MediaRecorder.isTypeSupported(c)) return c;
}
return null;
}
export const dreamRecorder = new DreamRecorder();
export function formatElapsed(ms: number): string {
const totalSec = Math.floor(ms / 1000);
const min = Math.floor(totalSec / 60);
const sec = totalSec % 60;
return `${min}:${sec.toString().padStart(2, '0')}`;
}

View file

@ -4,7 +4,14 @@
import { dreamSymbolTable, dreamTable } from '../collections';
import { toDream } from '../queries';
import type { DreamClarity, DreamMood, LocalDream, SleepQuality } from '../types';
import type {
Dream,
DreamClarity,
DreamMood,
DreamProcessingStatus,
LocalDream,
SleepQuality,
} from '../types';
function todayIsoDate(): string {
return new Date().toISOString().slice(0, 10);
@ -38,7 +45,10 @@ export const dreamsStore = {
emotions: data.emotions ?? [],
symbols: data.symbols ?? [],
audioPath: null,
audioDurationMs: null,
transcript: null,
processingStatus: 'idle',
processingError: null,
interpretation: null,
aiInterpretation: null,
isPrivate: false,
@ -96,6 +106,112 @@ export const dreamsStore = {
});
},
/**
* Create a placeholder dream from a fresh voice recording and start the
* background transcription. Returns the new dream immediately so the UI
* can navigate / show a "transcribing" state without waiting.
*/
async createFromVoice(blob: Blob, durationMs: number, language?: string): Promise<Dream> {
const newLocal: LocalDream = {
id: crypto.randomUUID(),
title: null,
content: '',
dreamDate: todayIsoDate(),
mood: null,
clarity: null,
isLucid: false,
isRecurring: false,
sleepQuality: null,
bedtime: null,
wakeTime: null,
location: null,
people: [],
emotions: [],
symbols: [],
audioPath: null,
audioDurationMs: durationMs,
transcript: null,
processingStatus: 'transcribing',
processingError: null,
interpretation: null,
aiInterpretation: null,
isPrivate: false,
isPinned: false,
isArchived: false,
};
await dreamTable.add(newLocal);
// Fire and forget — transcription updates the dream when it returns.
void this.transcribeBlob(newLocal.id, blob, language);
return toDream(newLocal);
},
async setProcessingStatus(
id: string,
status: DreamProcessingStatus,
error: string | null = null
) {
await dreamTable.update(id, {
processingStatus: status,
processingError: error,
updatedAt: new Date().toISOString(),
});
},
/**
* Upload an audio blob to /api/v1/dreams/transcribe and write the result
* back into the dream. Reset to idle on success, mark failed on error.
*/
async transcribeBlob(dreamId: string, blob: Blob, language?: string): Promise<void> {
try {
const form = new FormData();
const ext = blob.type.includes('webm')
? '.webm'
: blob.type.includes('mp4')
? '.m4a'
: '.audio';
form.append('file', blob, `dream${ext}`);
if (language) form.append('language', language);
const response = await fetch('/api/v1/dreams/transcribe', {
method: 'POST',
body: form,
});
if (!response.ok) {
const text = await response.text();
throw new Error(text || `HTTP ${response.status}`);
}
const result = (await response.json()) as {
text: string;
language: string | null;
durationSeconds: number | null;
};
const transcript = (result.text ?? '').trim();
const existing = await dreamTable.get(dreamId);
if (!existing) return;
await dreamTable.update(dreamId, {
transcript,
// Only fill content if user hasn't typed anything yet
content: existing.content?.trim() ? existing.content : transcript,
processingStatus: 'idle',
processingError: null,
updatedAt: new Date().toISOString(),
});
} catch (e) {
const msg = e instanceof Error ? e.message : String(e);
await dreamTable.update(dreamId, {
processingStatus: 'failed',
processingError: msg,
updatedAt: new Date().toISOString(),
});
}
},
async deleteDream(id: string) {
const existing = await dreamTable.get(id);
if (existing?.symbols?.length) {

View file

@ -7,6 +7,7 @@ import type { BaseRecord } from '@mana/local-store';
export type DreamMood = 'angenehm' | 'neutral' | 'unangenehm' | 'albtraum';
export type DreamClarity = 1 | 2 | 3 | 4 | 5;
export type SleepQuality = 1 | 2 | 3 | 4 | 5;
export type DreamProcessingStatus = 'idle' | 'recording' | 'transcribing' | 'failed';
// ─── Local Record Types (Dexie) ───────────────────────────
@ -26,7 +27,10 @@ export interface LocalDream extends BaseRecord {
emotions: string[];
symbols: string[];
audioPath: string | null;
audioDurationMs: number | null;
transcript: string | null;
processingStatus: DreamProcessingStatus;
processingError: string | null;
interpretation: string | null;
aiInterpretation: string | null;
isPrivate: boolean;
@ -65,7 +69,10 @@ export interface Dream {
emotions: string[];
symbols: string[];
audioPath: string | null;
audioDurationMs: number | null;
transcript: string | null;
processingStatus: DreamProcessingStatus;
processingError: string | null;
interpretation: string | null;
aiInterpretation: string | null;
isPrivate: boolean;

View file

@ -0,0 +1,93 @@
/**
* POST /api/v1/dreams/transcribe
*
* Server-side proxy to mana-stt for the Dreams module's voice capture.
* The browser uploads an audio Blob; we forward it to mana-stt with the
* server-held API key and return the transcript JSON.
*
* Request: multipart/form-data with `file` (audio blob) and optional `language`
* Response: { text: string, language?: string, duration_seconds?: number }
*/
import { error, json } from '@sveltejs/kit';
import { env } from '$env/dynamic/private';
import type { RequestHandler } from './$types';
const ALLOWED_MIME_PREFIXES = ['audio/'];
const MAX_BYTES = 25 * 1024 * 1024; // 25 MB
export const POST: RequestHandler = async ({ request }) => {
const sttUrl = env.MANA_STT_URL;
const apiKey = env.MANA_STT_API_KEY;
if (!sttUrl) {
throw error(503, 'mana-stt is not configured (MANA_STT_URL missing)');
}
const incoming = await request.formData();
const file = incoming.get('file');
const language = (incoming.get('language') as string | null) ?? null;
if (!(file instanceof Blob)) {
throw error(400, 'Missing file');
}
if (file.size === 0) {
throw error(400, 'Empty audio');
}
if (file.size > MAX_BYTES) {
throw error(413, `Audio too large (max ${MAX_BYTES / 1024 / 1024} MB)`);
}
if (file.type && !ALLOWED_MIME_PREFIXES.some((p) => file.type.startsWith(p))) {
throw error(415, `Unsupported audio type: ${file.type}`);
}
// Pick a sensible filename + extension based on the blob mime type
const ext = mimeToExtension(file.type);
const filename = `dream${ext}`;
const upstream = new FormData();
upstream.append('file', file, filename);
if (language) upstream.append('language', language);
const headers: Record<string, string> = { Accept: 'application/json' };
if (apiKey) headers['X-API-Key'] = apiKey;
let response: Response;
try {
response = await fetch(`${sttUrl.replace(/\/$/, '')}/transcribe`, {
method: 'POST',
headers,
body: upstream,
});
} catch (e) {
const msg = e instanceof Error ? e.message : String(e);
throw error(502, `Could not reach mana-stt: ${msg}`);
}
if (!response.ok) {
const text = await response.text();
throw error(response.status, `mana-stt error: ${text || response.statusText}`);
}
const result = (await response.json()) as {
text: string;
language?: string;
duration_seconds?: number;
};
return json({
text: result.text ?? '',
language: result.language ?? null,
durationSeconds: result.duration_seconds ?? null,
});
};
function mimeToExtension(mime: string): string {
if (mime.includes('webm')) return '.webm';
if (mime.includes('ogg')) return '.ogg';
if (mime.includes('mp4') || mime.includes('m4a')) return '.m4a';
if (mime.includes('mpeg')) return '.mp3';
if (mime.includes('wav')) return '.wav';
if (mime.includes('flac')) return '.flac';
return '.webm';
}

View file

@ -126,6 +126,9 @@ const APP_CONFIGS = [
MIDDLEWARE_URL: (env) => env.MANA_AUTH_URL,
PUBLIC_UMAMI_WEBSITE_ID: (env) => env.UMAMI_WEBSITE_ID_MANA || '',
PUBLIC_GLITCHTIP_DSN: (env) => env.PUBLIC_GLITCHTIP_DSN || '',
// Speech-to-Text proxy (server-side only, never exposed to the client)
MANA_STT_URL: (env) => env.STT_URL || 'http://localhost:3020',
MANA_STT_API_KEY: (env) => env.MANA_STT_API_KEY || '',
},
},