feat(web): PillNav bar mode, fullscreen, local STT + mic button

PillNav overhaul: - Dropdown-as-bar: theme/AI/sync/user menus render as horizontal bars in the bottom stack (PillDropdownBar) instead of floating popovers. New onOpenBar/activeBarId props on PillNavigation. - iconOnly pills: tags/search/workbench-tabs pills show only icons. Home pill removed. New iconOnly flag on PillNavItem. - Segmented toggle groups: items sharing a `group` id render as a single segmented pill (e.g. Light/Dark/System triple). - Fullscreen mode: press "f" to hide all bottom chrome, Esc to exit. - QuickInputBar + bottom bar visibility toggles via new pills. - Progress ring on AI trigger pill during model download (conic-gradient ::after, follows pill border-radius). @mana/local-stt — new package for browser-local speech-to-text: - Whisper models via transformers.js v4 (WebGPU + WASM fallback) - Same Web Worker architecture as @mana/local-llm - Two models: Whisper Tiny (150 MB) and Whisper Small (950 MB) - Reactive Svelte 5 bindings (getLocalSttStatus, loadLocalStt, transcribe) Voice-to-text integration: - useLocalStt() composable: mic capture via AudioContext + ScriptProcessor, resample to 16kHz mono, feed into Whisper worker - Mic button in QuickInputBar (leftAction slot) with recording/loading/transcribing states + pulse animation - Transcribed text injected into InputBar via new injectedText prop - STT model selector in AI bar alongside LLM tier controls Also: vite.config.ts server.fs.allow expanded to monorepo root so workspace package workers resolve in dev. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-05-20 06:23:40 +02:00 · 2026-04-12 16:05:43 +02:00 · 2026-04-12 16:05:43 +02:00 · 3deee755b3
commit 3deee755b3
parent 8c2f9306e9
24 changed files with 2145 additions and 28 deletions
--- a/apps/mana/CLAUDE.md
+++ b/apps/mana/CLAUDE.md
@ -160,6 +160,7 @@ pnpm test:e2e     # Playwright
 - **Auth**: Mana Auth (Better Auth + EdDSA JWT) via `@mana/shared-auth`
 - **Data**: Dexie.js (local-first) + mana-sync (Go) backend
 - **Encryption**: AES-GCM-256 via Web Crypto, server-wrapped MK with optional zero-knowledge
+- **Local AI**: `@mana/local-llm` (Gemma 4 E2B, WebGPU) + `@mana/local-stt` (Whisper, WebGPU) — both run entirely in-browser via transformers.js
 - **Testing**: Vitest, Playwright
 - **Mobile**: Expo, Expo Router, NativeWind, EAS Build

--- a/apps/mana/apps/web/package.json
+++ b/apps/mana/apps/web/package.json
@ -51,6 +51,7 @@
 		"@mana/help": "workspace:*",
 		"@mana/local-llm": "workspace:*",
 		"@mana/local-store": "workspace:*",
+		"@mana/local-stt": "workspace:^",
 		"@mana/qr-export": "workspace:*",
 		"@mana/shared-auth": "workspace:*",
 		"@mana/shared-auth-ui": "workspace:*",
--- a/apps/mana/apps/web/src/lib/components/voice/use-local-stt.svelte.ts
+++ b/apps/mana/apps/web/src/lib/components/voice/use-local-stt.svelte.ts
@ -0,0 +1,288 @@
+/**
+ * useLocalStt() — Svelte 5 composable that wires microphone capture
+ * directly into @mana/local-stt for fully on-device speech-to-text.
+ *
+ * Usage:
+ *   const stt = useLocalStt();
+ *   // stt.state  — 'idle' | 'loading' | 'recording' | 'transcribing'
+ *   // stt.text   — transcribed text (accumulates across chunks)
+ *   // stt.error  — error message or null
+ *   // stt.modelStatus — LoadingStatus from local-stt
+ *   // stt.toggle()    — start recording or stop + transcribe
+ *   // stt.cancel()    — abort recording without transcribing
+ *
+ * Audio pipeline:
+ *   getUserMedia (native sample rate)
+ *     → AudioContext + ScriptProcessor → collect Float32 chunks
+ *     → on stop: merge + resample to 16 kHz mono
+ *     → feed into local-stt transcribe()
+ *
+ * The model is loaded lazily on first toggle(). Subsequent calls skip
+ * the download. The model stays loaded for the session (same as local-llm).
+ */
+
+import { getLocalSttStatus, loadLocalStt, transcribe, isLocalSttSupported } from '@mana/local-stt';
+import type { LoadingStatus } from '@mana/local-stt';
+
+export type SttState = 'idle' | 'loading' | 'recording' | 'transcribing';
+
+export interface LocalSttHandle {
+	/** Current state of the STT pipeline */
+	readonly state: SttState;
+	/** Transcribed text (updated after transcription completes) */
+	readonly text: string;
+	/** Partial/streaming text (updated per chunk during transcription) */
+	readonly partial: string;
+	/** Error message or null */
+	readonly error: string | null;
+	/** Model loading status from @mana/local-stt */
+	readonly modelStatus: LoadingStatus;
+	/** Elapsed recording time in ms */
+	readonly elapsedMs: number;
+	/** Whether WebGPU/WASM STT is supported */
+	readonly isSupported: boolean;
+	/** Start recording (loads model first if needed) or stop + transcribe */
+	toggle: () => void;
+	/** Cancel recording without transcribing */
+	cancel: () => void;
+}
+
+export function useLocalStt(options?: { language?: string }): LocalSttHandle {
+	let state = $state<SttState>('idle');
+	let text = $state('');
+	let partial = $state('');
+	let error = $state<string | null>(null);
+	let elapsedMs = $state(0);
+
+	const modelStatus = getLocalSttStatus();
+	const supported = isLocalSttSupported();
+
+	// Audio capture state (not reactive — internal only)
+	let stream: MediaStream | null = null;
+	let audioContext: AudioContext | null = null;
+	let chunks: Float32Array[] = [];
+	let sampleRate = 0;
+	let tickHandle: ReturnType<typeof setInterval> | null = null;
+	let startedAt = 0;
+
+	// ScriptProcessorNode is deprecated but universally supported and
+	// simpler than AudioWorklet for our use case (we just collect raw
+	// samples, no real-time processing). AudioWorklet requires a
+	// separate module URL which complicates bundling.
+	let scriptNode: ScriptProcessorNode | null = null;
+
+	function cleanup() {
+		if (tickHandle !== null) {
+			clearInterval(tickHandle);
+			tickHandle = null;
+		}
+		scriptNode?.disconnect();
+		scriptNode = null;
+		stream?.getTracks().forEach((t) => t.stop());
+		stream = null;
+		if (audioContext && audioContext.state !== 'closed') {
+			audioContext.close().catch(() => {});
+		}
+		audioContext = null;
+		chunks = [];
+		sampleRate = 0;
+		elapsedMs = 0;
+	}
+
+	async function startRecording() {
+		error = null;
+		text = '';
+		partial = '';
+
+		// Ensure model is loaded first
+		if (modelStatus.current.state !== 'ready') {
+			state = 'loading';
+			try {
+				await loadLocalStt();
+			} catch (e) {
+				error = e instanceof Error ? e.message : String(e);
+				state = 'idle';
+				return;
+			}
+		}
+
+		// Get microphone access
+		state = 'recording';
+		try {
+			stream = await navigator.mediaDevices.getUserMedia({
+				audio: {
+					echoCancellation: true,
+					noiseSuppression: true,
+					autoGainControl: true,
+				},
+			});
+		} catch (e) {
+			error = explainMicError(e);
+			state = 'idle';
+			return;
+		}
+
+		// Set up AudioContext to capture raw PCM
+		audioContext = new AudioContext();
+		sampleRate = audioContext.sampleRate;
+		const source = audioContext.createMediaStreamSource(stream);
+
+		// Buffer size 4096 is a good balance between latency and overhead
+		scriptNode = audioContext.createScriptProcessor(4096, 1, 1);
+		scriptNode.onaudioprocess = (e) => {
+			const input = e.inputBuffer.getChannelData(0);
+			// Copy — the buffer is reused by the browser
+			chunks.push(new Float32Array(input));
+		};
+		source.connect(scriptNode);
+		scriptNode.connect(audioContext.destination);
+
+		startedAt = Date.now();
+		tickHandle = setInterval(() => {
+			elapsedMs = Date.now() - startedAt;
+		}, 100);
+	}
+
+	async function stopAndTranscribe() {
+		if (state !== 'recording') return;
+
+		// Stop recording
+		const capturedChunks = [...chunks];
+		const capturedRate = sampleRate;
+		cleanup();
+
+		console.log(
+			'[local-stt] Captured',
+			capturedChunks.length,
+			'chunks, sample rate:',
+			capturedRate
+		);
+
+		if (capturedChunks.length === 0) {
+			error = 'Keine Audiodaten aufgenommen.';
+			console.warn('[local-stt] No audio chunks captured');
+			state = 'idle';
+			return;
+		}
+
+		state = 'transcribing';
+
+		try {
+			// Merge chunks into one Float32Array
+			const totalLength = capturedChunks.reduce((sum, c) => sum + c.length, 0);
+			const merged = new Float32Array(totalLength);
+			let offset = 0;
+			for (const chunk of capturedChunks) {
+				merged.set(chunk, offset);
+				offset += chunk.length;
+			}
+
+			// Resample to 16 kHz if needed
+			const audio = capturedRate === 16000 ? merged : resample(merged, capturedRate, 16000);
+
+			const durationSec = audio.length / 16000;
+			console.log('[local-stt] Audio ready:', {
+				originalSamples: merged.length,
+				resampledSamples: audio.length,
+				durationSec: durationSec.toFixed(1),
+				sampleRate: capturedRate,
+				maxAmplitude: Math.max(...Array.from(audio.slice(0, 16000)).map(Math.abs)),
+			});
+
+			const result = await transcribe({
+				audio,
+				language: options?.language,
+				onChunk: (t: string) => {
+					partial += t;
+					console.log('[local-stt] Chunk:', t);
+				},
+			});
+
+			console.log('[local-stt] Result:', result);
+			text = result.text.trim();
+		} catch (e) {
+			error = e instanceof Error ? e.message : String(e);
+			console.error('[local-stt] Transcription error:', e);
+		}
+
+		state = 'idle';
+	}
+
+	function toggle() {
+		if (state === 'idle') {
+			startRecording();
+		} else if (state === 'recording') {
+			stopAndTranscribe();
+		}
+		// If loading or transcribing, ignore
+	}
+
+	function cancel() {
+		cleanup();
+		state = 'idle';
+	}
+
+	return {
+		get state() {
+			return state;
+		},
+		get text() {
+			return text;
+		},
+		get partial() {
+			return partial;
+		},
+		get error() {
+			return error;
+		},
+		get modelStatus() {
+			return modelStatus.current;
+		},
+		get elapsedMs() {
+			return elapsedMs;
+		},
+		get isSupported() {
+			return supported;
+		},
+		toggle,
+		cancel,
+	};
+}
+
+// ─── Helpers ────────────────────────────────────────────────
+
+/**
+ * Linear resample from sourceSampleRate to targetSampleRate.
+ * Simple and good enough for speech — no need for a polyphase filter.
+ */
+function resample(input: Float32Array, fromRate: number, toRate: number): Float32Array {
+	if (fromRate === toRate) return input;
+	const ratio = fromRate / toRate;
+	const outputLength = Math.round(input.length / ratio);
+	const output = new Float32Array(outputLength);
+	for (let i = 0; i < outputLength; i++) {
+		const srcIndex = i * ratio;
+		const lo = Math.floor(srcIndex);
+		const hi = Math.min(lo + 1, input.length - 1);
+		const frac = srcIndex - lo;
+		output[i] = input[lo] * (1 - frac) + input[hi] * frac;
+	}
+	return output;
+}
+
+function explainMicError(e: unknown): string {
+	const err = e instanceof Error ? e : new Error(String(e));
+	const name = err.name || '';
+	const msg = err.message || '';
+
+	if (name === 'NotAllowedError' || /denied|permission/i.test(msg)) {
+		return 'Mikrofon-Zugriff verweigert. Erlaube den Zugriff in deinen Browser-Einstellungen.';
+	}
+	if (name === 'NotFoundError' || /not.?found|no.?device/i.test(msg)) {
+		return 'Kein Mikrofon gefunden.';
+	}
+	if (name === 'NotReadableError' || /in use|busy/i.test(msg)) {
+		return 'Mikrofon ist gerade belegt.';
+	}
+	return `Mikrofon-Fehler: ${msg || name || 'Unbekannt'}`;
+}
--- a/apps/mana/apps/web/src/routes/(app)/+layout.svelte
+++ b/apps/mana/apps/web/src/routes/(app)/+layout.svelte
@ -210,14 +210,17 @@
 	let aiTierItems = $derived<PillDropdownItem[]>([
 		// Tier toggles — browser tier item and its model-status buddy share a
 		// group so PillDropdownBar renders them as a paired pill.
-		...TIER_TOGGLE_LIST.filter((t) => t.tier !== 'browser' || webgpuSupported).map((t) => ({
-			id: `ai-tier-${t.tier}`,
-			label: t.shortLabel,
-			icon: t.icon,
-			active: llmSettings.allowedTiers.includes(t.tier),
-			onClick: () => toggleAiTier(t.tier),
-			...(t.tier === 'browser' ? { group: 'local-llm' } : {}),
-		})),
+		...TIER_TOGGLE_LIST.filter((t) => t.tier !== 'browser' || webgpuSupported).map((t) => {
+			const isActive = llmSettings.allowedTiers.includes(t.tier);
+			return {
+				id: `ai-tier-${t.tier}`,
+				label: t.shortLabel,
+				icon: isActive ? 'checkCircle' : t.icon,
+				active: isActive,
+				onClick: () => toggleAiTier(t.tier),
+				...(t.tier === 'browser' ? { group: 'local-llm' } : {}),
+			};
+		}),
 		// Browser model status / load button (grouped with the "Lokal" toggle).
 		// Handles all LoadingStatus states so the user sees feedback during
 		// download, initialization, and on error (e.g. worker crash).
@ -234,7 +237,7 @@
 						switch (state) {
 							case 'ready':
 								label = 'Geladen';
-								icon = 'check';
+								icon = 'checkCircle';
 								disabled = true;
 								break;
 							case 'downloading':
@ -280,16 +283,19 @@
 		// STT model selector — each model is a pill, active = currently selected
 		...(sttSupported
 			? (Object.entries(STT_MODELS) as [SttModelKey, (typeof STT_MODELS)[SttModelKey]][]).map(
-					([key, model]) => ({
-						id: `stt-model-${key}`,
-						label: model.displayName,
-						icon: 'mic' as const,
-						active: selectedSttModel === key,
-						onClick: () => {
-							selectedSttModel = key;
-							void loadLocalStt(key);
-						},
-					})
+					([key, model]) => {
+						const isSelected = selectedSttModel === key;
+						return {
+							id: `stt-model-${key}`,
+							label: model.displayName,
+							icon: isSelected ? 'checkCircle' : 'mic',
+							active: isSelected,
+							onClick: () => {
+								selectedSttModel = key;
+								void loadLocalStt(key);
+							},
+						};
+					}
 				)
 			: []),
 		// STT model status (grouped with selected model)
@ -306,7 +312,7 @@
 						switch (state) {
 							case 'ready':
 								label = 'STT bereit';
-								icon = 'check';
+								icon = 'checkCircle';
 								disabled = true;
 								break;
 							case 'downloading':
--- a/apps/mana/apps/web/vite.config.ts
+++ b/apps/mana/apps/web/vite.config.ts
@ -54,6 +54,14 @@ export default defineConfig({
 	server: {
 		port: 5173,
 		strictPort: true,
+		fs: {
+			// Allow serving files from the monorepo root so that workspace
+			// packages (e.g. @mana/local-llm's Web Worker entry) can be
+			// resolved by Vite's dev server. Without this, worker.ts in
+			// packages/local-llm triggers "request url is outside of Vite
+			// serving allow list".
+			allow: ['../../../..'],
+		},
 	},
 	preview: {
 		port: 4173,