feat(notes): voice capture in workbench ListView via shared <VoiceCaptureBar>

Drop a mic into Notes — record, transcribe through the new generic /api/v1/voice/transcribe proxy (mana-stt), then write the result back into the placeholder note. The first transcript line becomes the title when it fits in 80 chars, otherwise a generic 'Sprachnotiz' label. The inline editor refreshes from the live note while the placeholder '…' content is still on screen, so a transcript that arrives a moment after the editor opens shows up automatically without overwriting anything the user has typed. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-05-14 22:41:09 +02:00 · 2026-04-08 15:59:42 +02:00 · 2026-04-08 15:59:42 +02:00 · 9b3d7c7325
commit 9b3d7c7325
parent e0e801956a
3 changed files with 193 additions and 1 deletions
--- a/apps/mana/apps/web/src/lib/modules/notes/ListView.svelte
+++ b/apps/mana/apps/web/src/lib/modules/notes/ListView.svelte
@ -9,6 +9,7 @@
 	import type { ViewProps } from '$lib/app-registry';
 	import { ContextMenu, type ContextMenuItem } from '@mana/shared-ui';
 	import { PencilSimple, Trash, PushPin } from '@mana/shared-icons';
+	import VoiceCaptureBar from '$lib/components/voice/VoiceCaptureBar.svelte';

 	let { navigate, goBack, params }: ViewProps = $props();

@ -31,6 +32,11 @@
 		startEdit(note);
 	}

+	async function handleVoiceComplete(blob: Blob, durationMs: number) {
+		const note = await notesStore.createFromVoice(blob, durationMs, 'de');
+		startEdit(note);
+	}
+
 	function startEdit(note: Note) {
 		if (editingId && editingId !== note.id) saveEdit();
 		editingId = note.id;
@ -38,6 +44,21 @@
 		editContent = note.content;
 	}

+	// When a voice note's transcript arrives asynchronously while the
+	// inline editor is open, the underlying Dexie row updates but the
+	// editor's local copy stays on the "…" placeholder. Sync it back in
+	// — but ONLY while the editor still shows the placeholder, so we
+	// never overwrite content the user has already typed.
+	$effect(() => {
+		if (!editingId) return;
+		const live = notes.find((n) => n.id === editingId);
+		if (!live) return;
+		if (editContent === '…' && live.content !== '…') {
+			editTitle = live.title;
+			editContent = live.content;
+		}
+	});
+
 	async function saveEdit() {
 		if (!editingId) return;
 		await notesStore.updateNote(editingId, {
@ -105,6 +126,14 @@
 </script>

 <div class="app-view">
+	<!-- Voice capture -->
+	<VoiceCaptureBar
+		idleLabel="Notiz sprechen"
+		feature="notes-voice-capture"
+		reason="Notizen werden verschlüsselt gespeichert. Dafür brauchst du ein Mana-Konto."
+		onComplete={handleVoiceComplete}
+	/>
+
 	<!-- Quick create -->
 	<form onsubmit={(e) => e.preventDefault()} class="quick-add">
 		<span class="add-icon">+</span>
--- a/apps/mana/apps/web/src/lib/modules/notes/stores/notes.svelte.ts
+++ b/apps/mana/apps/web/src/lib/modules/notes/stores/notes.svelte.ts
@ -16,7 +16,7 @@

 import { noteTable } from '../collections';
 import { toNote } from '../queries';
-import type { LocalNote } from '../types';
+import type { LocalNote, Note } from '../types';
 import { encryptRecord } from '$lib/data/crypto';

 export const notesStore = {
@ -38,6 +38,63 @@ export const notesStore = {
 		return plaintextSnapshot;
 	},

+	/**
+	 * Create a note from a voice recording. Returns the placeholder note
+	 * immediately so the UI can navigate to it; the transcript is filled
+	 * in asynchronously once mana-stt returns. The placeholder title
+	 * 'Sprachnotiz' is intentionally generic — once we have a transcript,
+	 * the user can rename inline like any other note.
+	 */
+	async createFromVoice(blob: Blob, _durationMs: number, language = 'de'): Promise<Note> {
+		const note = await this.createNote({ title: 'Sprachnotiz', content: '…' });
+		// Fire-and-forget: caller has already navigated into edit mode.
+		void this.transcribeIntoNote(note.id, blob, language);
+		return note;
+	},
+
+	/**
+	 * Upload an audio blob to /api/v1/voice/transcribe and write the
+	 * transcript into an existing note. On failure, surfaces the error
+	 * inline as the note content so the user isn't left with an empty
+	 * placeholder.
+	 */
+	async transcribeIntoNote(noteId: string, blob: Blob, language?: string): Promise<void> {
+		try {
+			const form = new FormData();
+			const ext = blob.type.includes('webm')
+				? '.webm'
+				: blob.type.includes('mp4')
+					? '.m4a'
+					: '.audio';
+			form.append('file', blob, `note${ext}`);
+			if (language) form.append('language', language);
+
+			const response = await fetch('/api/v1/voice/transcribe', {
+				method: 'POST',
+				body: form,
+			});
+			if (!response.ok) {
+				const text = await response.text();
+				throw new Error(text || `HTTP ${response.status}`);
+			}
+			const result = (await response.json()) as { text: string };
+			const transcript = (result.text ?? '').trim();
+
+			// Use the first line as the title if it's short — keeps the
+			// note browseable without forcing the user to rename it.
+			const firstLine = transcript.split('\n')[0]?.trim() ?? '';
+			const title = firstLine.length > 0 && firstLine.length <= 80 ? firstLine : 'Sprachnotiz';
+
+			await this.updateNote(noteId, { title, content: transcript });
+		} catch (e) {
+			const msg = e instanceof Error ? e.message : String(e);
+			await this.updateNote(noteId, {
+				title: 'Sprachnotiz (Fehler)',
+				content: `Transkription fehlgeschlagen: ${msg}`,
+			});
+		}
+	},
+
 	async updateNote(
 		id: string,
 		data: Partial<Pick<LocalNote, 'title' | 'content' | 'color' | 'isPinned' | 'isArchived'>>
--- a/apps/mana/apps/web/src/routes/api/v1/voice/transcribe/+server.ts
+++ b/apps/mana/apps/web/src/routes/api/v1/voice/transcribe/+server.ts
@ -0,0 +1,106 @@
+/**
+ * POST /api/v1/voice/transcribe
+ *
+ * Generic server-side proxy to mana-stt for any module that needs voice
+ * transcription. The browser uploads an audio Blob; we forward it to
+ * mana-stt with the server-held API key and return the transcript JSON.
+ *
+ * Use this from new modules instead of cloning the per-module endpoints
+ * (memoro, dreams) — those exist for historical reasons and will be
+ * migrated when convenient.
+ *
+ * Request:  multipart/form-data with `file` (audio blob) and optional `language`
+ * Response: { text: string, language: string | null, durationSeconds: number | null }
+ */
+
+import { error, json } from '@sveltejs/kit';
+import { env } from '$env/dynamic/private';
+import type { RequestHandler } from './$types';
+
+const MAX_BYTES = 25 * 1024 * 1024; // 25 MB
+
+function isAcceptableType(mime: string): boolean {
+	if (!mime) return true; // tolerate missing type — let upstream validate
+	if (mime === 'application/octet-stream') return true;
+	return mime.startsWith('audio/') || mime.startsWith('video/'); // m4a often reports video/mp4
+}
+
+export const POST: RequestHandler = async ({ request }) => {
+	const sttUrl = env.MANA_STT_URL;
+	const apiKey = env.MANA_STT_API_KEY;
+
+	if (!sttUrl) {
+		throw error(503, 'mana-stt is not configured (MANA_STT_URL missing)');
+	}
+
+	let incoming: FormData;
+	try {
+		incoming = await request.formData();
+	} catch {
+		throw error(400, 'Expected multipart/form-data with a file field');
+	}
+	const file = incoming.get('file');
+	const language = (incoming.get('language') as string | null) ?? null;
+
+	if (!(file instanceof Blob)) {
+		throw error(400, 'Missing file');
+	}
+	if (file.size === 0) {
+		throw error(400, 'Empty audio');
+	}
+	if (file.size > MAX_BYTES) {
+		throw error(413, `Audio too large (max ${MAX_BYTES / 1024 / 1024} MB)`);
+	}
+	if (!isAcceptableType(file.type)) {
+		throw error(415, `Unsupported audio type: ${file.type}`);
+	}
+
+	const ext = mimeToExtension(file.type);
+	const filename = `voice${ext}`;
+
+	const upstream = new FormData();
+	upstream.append('file', file, filename);
+	if (language) upstream.append('language', language);
+
+	const headers: Record<string, string> = { Accept: 'application/json' };
+	if (apiKey) headers['X-API-Key'] = apiKey;
+
+	let response: Response;
+	try {
+		response = await fetch(`${sttUrl.replace(/\/$/, '')}/transcribe`, {
+			method: 'POST',
+			headers,
+			body: upstream,
+		});
+	} catch (e) {
+		const msg = e instanceof Error ? e.message : String(e);
+		throw error(502, `Could not reach mana-stt: ${msg}`);
+	}
+
+	if (!response.ok) {
+		const text = await response.text();
+		throw error(response.status, `mana-stt error: ${text || response.statusText}`);
+	}
+
+	const result = (await response.json()) as {
+		text: string;
+		language?: string;
+		duration_seconds?: number;
+	};
+
+	return json({
+		text: result.text ?? '',
+		language: result.language ?? null,
+		durationSeconds: result.duration_seconds ?? null,
+	});
+};
+
+function mimeToExtension(mime: string): string {
+	if (mime.includes('webm')) return '.webm';
+	if (mime.includes('ogg')) return '.ogg';
+	if (mime.includes('mp4') || mime.includes('m4a')) return '.m4a';
+	if (mime.includes('mpeg')) return '.mp3';
+	if (mime.includes('wav')) return '.wav';
+	if (mime.includes('flac')) return '.flac';
+	return '.webm';
+}