From 3deee755b33d5308b32933c341138f92da0a625f Mon Sep 17 00:00:00 2001 From: Till JS Date: Sun, 12 Apr 2026 16:05:43 +0200 Subject: [PATCH] feat(web): PillNav bar mode, fullscreen, local STT + mic button MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit PillNav overhaul: - Dropdown-as-bar: theme/AI/sync/user menus render as horizontal bars in the bottom stack (PillDropdownBar) instead of floating popovers. New onOpenBar/activeBarId props on PillNavigation. - iconOnly pills: tags/search/workbench-tabs pills show only icons. Home pill removed. New iconOnly flag on PillNavItem. - Segmented toggle groups: items sharing a `group` id render as a single segmented pill (e.g. Light/Dark/System triple). - Fullscreen mode: press "f" to hide all bottom chrome, Esc to exit. - QuickInputBar + bottom bar visibility toggles via new pills. - Progress ring on AI trigger pill during model download (conic-gradient ::after, follows pill border-radius). @mana/local-stt — new package for browser-local speech-to-text: - Whisper models via transformers.js v4 (WebGPU + WASM fallback) - Same Web Worker architecture as @mana/local-llm - Two models: Whisper Tiny (150 MB) and Whisper Small (950 MB) - Reactive Svelte 5 bindings (getLocalSttStatus, loadLocalStt, transcribe) Voice-to-text integration: - useLocalStt() composable: mic capture via AudioContext + ScriptProcessor, resample to 16kHz mono, feed into Whisper worker - Mic button in QuickInputBar (leftAction slot) with recording/loading/transcribing states + pulse animation - Transcribed text injected into InputBar via new injectedText prop - STT model selector in AI bar alongside LLM tier controls Also: vite.config.ts server.fs.allow expanded to monorepo root so workspace package workers resolve in dev. Co-Authored-By: Claude Opus 4.6 (1M context) --- CLAUDE.md | 1 + apps/mana/CLAUDE.md | 1 + apps/mana/apps/web/package.json | 1 + .../components/voice/use-local-stt.svelte.ts | 288 ++++++++++ .../apps/web/src/routes/(app)/+layout.svelte | 46 +- apps/mana/apps/web/vite.config.ts | 8 + packages/local-stt/CLAUDE.md | 195 +++++++ packages/local-stt/package.json | 26 + packages/local-stt/src/cache.ts | 22 + packages/local-stt/src/engine-impl.ts | 231 ++++++++ packages/local-stt/src/engine.ts | 151 ++++++ packages/local-stt/src/index.ts | 28 + packages/local-stt/src/models.ts | 38 ++ packages/local-stt/src/svelte.svelte.ts | 56 ++ packages/local-stt/src/types.ts | 63 +++ packages/local-stt/src/worker.ts | 96 ++++ packages/local-stt/tsconfig.json | 14 + packages/shared-ui/src/index.ts | 2 + .../src/navigation/PillDropdownBar.svelte | 510 ++++++++++++++++++ .../src/navigation/PillNavigation.svelte | 334 +++++++++++- packages/shared-ui/src/navigation/index.ts | 2 + packages/shared-ui/src/navigation/types.ts | 21 + .../shared-ui/src/quick-input/InputBar.svelte | 16 + pnpm-lock.yaml | 23 +- 24 files changed, 2145 insertions(+), 28 deletions(-) create mode 100644 apps/mana/apps/web/src/lib/components/voice/use-local-stt.svelte.ts create mode 100644 packages/local-stt/CLAUDE.md create mode 100644 packages/local-stt/package.json create mode 100644 packages/local-stt/src/cache.ts create mode 100644 packages/local-stt/src/engine-impl.ts create mode 100644 packages/local-stt/src/engine.ts create mode 100644 packages/local-stt/src/index.ts create mode 100644 packages/local-stt/src/models.ts create mode 100644 packages/local-stt/src/svelte.svelte.ts create mode 100644 packages/local-stt/src/types.ts create mode 100644 packages/local-stt/src/worker.ts create mode 100644 packages/local-stt/tsconfig.json create mode 100644 packages/shared-ui/src/navigation/PillDropdownBar.svelte diff --git a/CLAUDE.md b/CLAUDE.md index 8dbbe5fa8..8243c29dd 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -146,6 +146,7 @@ MinIO (Docker, S3-compatible) in both local and prod. Console: http://localhost: | `@mana/shared-i18n` | i18n | | `@mana/local-store` | Local-first store primitives — used by unified Mana, manavoxel, arcade, and shared-uload/-stores/-links | | `@mana/local-llm` | Browser-local LLM inference (transformers.js + Gemma 4 E2B, WebGPU). Powers `/llm-test` and the playground module. See [`packages/local-llm/CLAUDE.md`](packages/local-llm/CLAUDE.md) for the CSP requirements and the transformers.js v4 gotchas. | +| `@mana/local-stt` | Browser-local speech-to-text (transformers.js + Whisper, WebGPU). Powers the QuickInputBar mic button. Same architecture as local-llm. See [`packages/local-stt/CLAUDE.md`](packages/local-stt/CLAUDE.md). | ## Adding Dependencies diff --git a/apps/mana/CLAUDE.md b/apps/mana/CLAUDE.md index 60b2a38fe..bec0616a0 100644 --- a/apps/mana/CLAUDE.md +++ b/apps/mana/CLAUDE.md @@ -160,6 +160,7 @@ pnpm test:e2e # Playwright - **Auth**: Mana Auth (Better Auth + EdDSA JWT) via `@mana/shared-auth` - **Data**: Dexie.js (local-first) + mana-sync (Go) backend - **Encryption**: AES-GCM-256 via Web Crypto, server-wrapped MK with optional zero-knowledge +- **Local AI**: `@mana/local-llm` (Gemma 4 E2B, WebGPU) + `@mana/local-stt` (Whisper, WebGPU) — both run entirely in-browser via transformers.js - **Testing**: Vitest, Playwright - **Mobile**: Expo, Expo Router, NativeWind, EAS Build diff --git a/apps/mana/apps/web/package.json b/apps/mana/apps/web/package.json index 682222320..3c43a767e 100644 --- a/apps/mana/apps/web/package.json +++ b/apps/mana/apps/web/package.json @@ -51,6 +51,7 @@ "@mana/help": "workspace:*", "@mana/local-llm": "workspace:*", "@mana/local-store": "workspace:*", + "@mana/local-stt": "workspace:^", "@mana/qr-export": "workspace:*", "@mana/shared-auth": "workspace:*", "@mana/shared-auth-ui": "workspace:*", diff --git a/apps/mana/apps/web/src/lib/components/voice/use-local-stt.svelte.ts b/apps/mana/apps/web/src/lib/components/voice/use-local-stt.svelte.ts new file mode 100644 index 000000000..6a1b9f48e --- /dev/null +++ b/apps/mana/apps/web/src/lib/components/voice/use-local-stt.svelte.ts @@ -0,0 +1,288 @@ +/** + * useLocalStt() — Svelte 5 composable that wires microphone capture + * directly into @mana/local-stt for fully on-device speech-to-text. + * + * Usage: + * const stt = useLocalStt(); + * // stt.state — 'idle' | 'loading' | 'recording' | 'transcribing' + * // stt.text — transcribed text (accumulates across chunks) + * // stt.error — error message or null + * // stt.modelStatus — LoadingStatus from local-stt + * // stt.toggle() — start recording or stop + transcribe + * // stt.cancel() — abort recording without transcribing + * + * Audio pipeline: + * getUserMedia (native sample rate) + * → AudioContext + ScriptProcessor → collect Float32 chunks + * → on stop: merge + resample to 16 kHz mono + * → feed into local-stt transcribe() + * + * The model is loaded lazily on first toggle(). Subsequent calls skip + * the download. The model stays loaded for the session (same as local-llm). + */ + +import { getLocalSttStatus, loadLocalStt, transcribe, isLocalSttSupported } from '@mana/local-stt'; +import type { LoadingStatus } from '@mana/local-stt'; + +export type SttState = 'idle' | 'loading' | 'recording' | 'transcribing'; + +export interface LocalSttHandle { + /** Current state of the STT pipeline */ + readonly state: SttState; + /** Transcribed text (updated after transcription completes) */ + readonly text: string; + /** Partial/streaming text (updated per chunk during transcription) */ + readonly partial: string; + /** Error message or null */ + readonly error: string | null; + /** Model loading status from @mana/local-stt */ + readonly modelStatus: LoadingStatus; + /** Elapsed recording time in ms */ + readonly elapsedMs: number; + /** Whether WebGPU/WASM STT is supported */ + readonly isSupported: boolean; + /** Start recording (loads model first if needed) or stop + transcribe */ + toggle: () => void; + /** Cancel recording without transcribing */ + cancel: () => void; +} + +export function useLocalStt(options?: { language?: string }): LocalSttHandle { + let state = $state('idle'); + let text = $state(''); + let partial = $state(''); + let error = $state(null); + let elapsedMs = $state(0); + + const modelStatus = getLocalSttStatus(); + const supported = isLocalSttSupported(); + + // Audio capture state (not reactive — internal only) + let stream: MediaStream | null = null; + let audioContext: AudioContext | null = null; + let chunks: Float32Array[] = []; + let sampleRate = 0; + let tickHandle: ReturnType | null = null; + let startedAt = 0; + + // ScriptProcessorNode is deprecated but universally supported and + // simpler than AudioWorklet for our use case (we just collect raw + // samples, no real-time processing). AudioWorklet requires a + // separate module URL which complicates bundling. + let scriptNode: ScriptProcessorNode | null = null; + + function cleanup() { + if (tickHandle !== null) { + clearInterval(tickHandle); + tickHandle = null; + } + scriptNode?.disconnect(); + scriptNode = null; + stream?.getTracks().forEach((t) => t.stop()); + stream = null; + if (audioContext && audioContext.state !== 'closed') { + audioContext.close().catch(() => {}); + } + audioContext = null; + chunks = []; + sampleRate = 0; + elapsedMs = 0; + } + + async function startRecording() { + error = null; + text = ''; + partial = ''; + + // Ensure model is loaded first + if (modelStatus.current.state !== 'ready') { + state = 'loading'; + try { + await loadLocalStt(); + } catch (e) { + error = e instanceof Error ? e.message : String(e); + state = 'idle'; + return; + } + } + + // Get microphone access + state = 'recording'; + try { + stream = await navigator.mediaDevices.getUserMedia({ + audio: { + echoCancellation: true, + noiseSuppression: true, + autoGainControl: true, + }, + }); + } catch (e) { + error = explainMicError(e); + state = 'idle'; + return; + } + + // Set up AudioContext to capture raw PCM + audioContext = new AudioContext(); + sampleRate = audioContext.sampleRate; + const source = audioContext.createMediaStreamSource(stream); + + // Buffer size 4096 is a good balance between latency and overhead + scriptNode = audioContext.createScriptProcessor(4096, 1, 1); + scriptNode.onaudioprocess = (e) => { + const input = e.inputBuffer.getChannelData(0); + // Copy — the buffer is reused by the browser + chunks.push(new Float32Array(input)); + }; + source.connect(scriptNode); + scriptNode.connect(audioContext.destination); + + startedAt = Date.now(); + tickHandle = setInterval(() => { + elapsedMs = Date.now() - startedAt; + }, 100); + } + + async function stopAndTranscribe() { + if (state !== 'recording') return; + + // Stop recording + const capturedChunks = [...chunks]; + const capturedRate = sampleRate; + cleanup(); + + console.log( + '[local-stt] Captured', + capturedChunks.length, + 'chunks, sample rate:', + capturedRate + ); + + if (capturedChunks.length === 0) { + error = 'Keine Audiodaten aufgenommen.'; + console.warn('[local-stt] No audio chunks captured'); + state = 'idle'; + return; + } + + state = 'transcribing'; + + try { + // Merge chunks into one Float32Array + const totalLength = capturedChunks.reduce((sum, c) => sum + c.length, 0); + const merged = new Float32Array(totalLength); + let offset = 0; + for (const chunk of capturedChunks) { + merged.set(chunk, offset); + offset += chunk.length; + } + + // Resample to 16 kHz if needed + const audio = capturedRate === 16000 ? merged : resample(merged, capturedRate, 16000); + + const durationSec = audio.length / 16000; + console.log('[local-stt] Audio ready:', { + originalSamples: merged.length, + resampledSamples: audio.length, + durationSec: durationSec.toFixed(1), + sampleRate: capturedRate, + maxAmplitude: Math.max(...Array.from(audio.slice(0, 16000)).map(Math.abs)), + }); + + const result = await transcribe({ + audio, + language: options?.language, + onChunk: (t: string) => { + partial += t; + console.log('[local-stt] Chunk:', t); + }, + }); + + console.log('[local-stt] Result:', result); + text = result.text.trim(); + } catch (e) { + error = e instanceof Error ? e.message : String(e); + console.error('[local-stt] Transcription error:', e); + } + + state = 'idle'; + } + + function toggle() { + if (state === 'idle') { + startRecording(); + } else if (state === 'recording') { + stopAndTranscribe(); + } + // If loading or transcribing, ignore + } + + function cancel() { + cleanup(); + state = 'idle'; + } + + return { + get state() { + return state; + }, + get text() { + return text; + }, + get partial() { + return partial; + }, + get error() { + return error; + }, + get modelStatus() { + return modelStatus.current; + }, + get elapsedMs() { + return elapsedMs; + }, + get isSupported() { + return supported; + }, + toggle, + cancel, + }; +} + +// ─── Helpers ──────────────────────────────────────────────── + +/** + * Linear resample from sourceSampleRate to targetSampleRate. + * Simple and good enough for speech — no need for a polyphase filter. + */ +function resample(input: Float32Array, fromRate: number, toRate: number): Float32Array { + if (fromRate === toRate) return input; + const ratio = fromRate / toRate; + const outputLength = Math.round(input.length / ratio); + const output = new Float32Array(outputLength); + for (let i = 0; i < outputLength; i++) { + const srcIndex = i * ratio; + const lo = Math.floor(srcIndex); + const hi = Math.min(lo + 1, input.length - 1); + const frac = srcIndex - lo; + output[i] = input[lo] * (1 - frac) + input[hi] * frac; + } + return output; +} + +function explainMicError(e: unknown): string { + const err = e instanceof Error ? e : new Error(String(e)); + const name = err.name || ''; + const msg = err.message || ''; + + if (name === 'NotAllowedError' || /denied|permission/i.test(msg)) { + return 'Mikrofon-Zugriff verweigert. Erlaube den Zugriff in deinen Browser-Einstellungen.'; + } + if (name === 'NotFoundError' || /not.?found|no.?device/i.test(msg)) { + return 'Kein Mikrofon gefunden.'; + } + if (name === 'NotReadableError' || /in use|busy/i.test(msg)) { + return 'Mikrofon ist gerade belegt.'; + } + return `Mikrofon-Fehler: ${msg || name || 'Unbekannt'}`; +} diff --git a/apps/mana/apps/web/src/routes/(app)/+layout.svelte b/apps/mana/apps/web/src/routes/(app)/+layout.svelte index 74eec1e5a..dd87eb3fe 100644 --- a/apps/mana/apps/web/src/routes/(app)/+layout.svelte +++ b/apps/mana/apps/web/src/routes/(app)/+layout.svelte @@ -210,14 +210,17 @@ let aiTierItems = $derived([ // Tier toggles — browser tier item and its model-status buddy share a // group so PillDropdownBar renders them as a paired pill. - ...TIER_TOGGLE_LIST.filter((t) => t.tier !== 'browser' || webgpuSupported).map((t) => ({ - id: `ai-tier-${t.tier}`, - label: t.shortLabel, - icon: t.icon, - active: llmSettings.allowedTiers.includes(t.tier), - onClick: () => toggleAiTier(t.tier), - ...(t.tier === 'browser' ? { group: 'local-llm' } : {}), - })), + ...TIER_TOGGLE_LIST.filter((t) => t.tier !== 'browser' || webgpuSupported).map((t) => { + const isActive = llmSettings.allowedTiers.includes(t.tier); + return { + id: `ai-tier-${t.tier}`, + label: t.shortLabel, + icon: isActive ? 'checkCircle' : t.icon, + active: isActive, + onClick: () => toggleAiTier(t.tier), + ...(t.tier === 'browser' ? { group: 'local-llm' } : {}), + }; + }), // Browser model status / load button (grouped with the "Lokal" toggle). // Handles all LoadingStatus states so the user sees feedback during // download, initialization, and on error (e.g. worker crash). @@ -234,7 +237,7 @@ switch (state) { case 'ready': label = 'Geladen'; - icon = 'check'; + icon = 'checkCircle'; disabled = true; break; case 'downloading': @@ -280,16 +283,19 @@ // STT model selector — each model is a pill, active = currently selected ...(sttSupported ? (Object.entries(STT_MODELS) as [SttModelKey, (typeof STT_MODELS)[SttModelKey]][]).map( - ([key, model]) => ({ - id: `stt-model-${key}`, - label: model.displayName, - icon: 'mic' as const, - active: selectedSttModel === key, - onClick: () => { - selectedSttModel = key; - void loadLocalStt(key); - }, - }) + ([key, model]) => { + const isSelected = selectedSttModel === key; + return { + id: `stt-model-${key}`, + label: model.displayName, + icon: isSelected ? 'checkCircle' : 'mic', + active: isSelected, + onClick: () => { + selectedSttModel = key; + void loadLocalStt(key); + }, + }; + } ) : []), // STT model status (grouped with selected model) @@ -306,7 +312,7 @@ switch (state) { case 'ready': label = 'STT bereit'; - icon = 'check'; + icon = 'checkCircle'; disabled = true; break; case 'downloading': diff --git a/apps/mana/apps/web/vite.config.ts b/apps/mana/apps/web/vite.config.ts index 7110fab27..6d1a53c80 100644 --- a/apps/mana/apps/web/vite.config.ts +++ b/apps/mana/apps/web/vite.config.ts @@ -54,6 +54,14 @@ export default defineConfig({ server: { port: 5173, strictPort: true, + fs: { + // Allow serving files from the monorepo root so that workspace + // packages (e.g. @mana/local-llm's Web Worker entry) can be + // resolved by Vite's dev server. Without this, worker.ts in + // packages/local-llm triggers "request url is outside of Vite + // serving allow list". + allow: ['../../../..'], + }, }, preview: { port: 4173, diff --git a/packages/local-stt/CLAUDE.md b/packages/local-stt/CLAUDE.md new file mode 100644 index 000000000..9d3d3b7b3 --- /dev/null +++ b/packages/local-stt/CLAUDE.md @@ -0,0 +1,195 @@ +# `@mana/local-stt` — Browser-Local Speech-to-Text + +Client-side speech-to-text that runs **entirely in the user's browser** via WebGPU (WASM fallback). No server roundtrips, no API keys, no audio leaving the device. Uses OpenAI's Whisper models through `@huggingface/transformers` v4 — the same library that powers `@mana/local-llm`. + +**Don't confuse this with the server-side STT** (`services/mana-stt`). The server-side service runs Whisper on the GPU server (RTX 3090). This package is the **only** STT path that keeps audio on the user's device. + +## What's in the box + +| Field | Value | +|---|---| +| Engine library | [`@huggingface/transformers`](https://huggingface.co/docs/transformers.js/index) v4 (transformers.js) | +| Backend | WebGPU (primary), WASM (fallback) | +| Default model | `onnx-community/whisper-tiny` (~150 MB, multilingual) | +| Pipeline | `automatic-speech-recognition` (Whisper encoder-decoder) | +| Audio input | Float32Array, 16 kHz mono PCM | +| Chunking | 30s chunks with 5s stride overlap (handled by the pipeline) | + +## Available models + +| Key | Model | Size | English WER | Multilingual | +|-----|-------|------|------------|-------------| +| `whisper-tiny` | Whisper Tiny | ~150 MB | ~5.6% | Yes (auto-detect) | +| `whisper-tiny.en` | Whisper Tiny EN | ~150 MB | ~5.6% | No (English only) | +| `whisper-base` | Whisper Base | ~290 MB | ~4.3% | Yes | +| `whisper-base.en` | Whisper Base EN | ~290 MB | ~4.3% | No | +| `whisper-small` | Whisper Small | ~950 MB | ~3.4% | Yes | + +Default is `whisper-tiny` — smallest, fastest, multilingual. Users can switch in settings. + +## Architecture + +Mirrors `@mana/local-llm` exactly: + +``` +Consumer (Svelte component) + │ + ▼ +svelte.svelte.ts — reactive status ($state), loadLocalStt(), transcribe() + │ + ▼ +engine.ts — main-thread proxy (LocalSttEngine singleton) + │ postMessage / onmessage + ▼ +worker.ts — Web Worker entry point + │ + ▼ +engine-impl.ts — transformers.js pipeline('automatic-speech-recognition') + │ + ▼ +@huggingface/transformers — ONNX runtime (WebGPU or WASM) +``` + +The Web Worker isolates the heavy Whisper inference (~3-5s for 60s audio on WebGPU) from the main thread. Audio processing never blocks the UI. + +## API surface (Svelte 5 usage) + +```svelte + + +{#if !supported} +

WebGPU not available.

+{:else if status.current.state === 'downloading'} +

Downloading: {(status.current.progress * 100).toFixed(0)}%

+{:else if status.current.state === 'ready'} + +{/if} +``` + +Status union: `idle | checking | downloading | loading | ready | error` (same as `@mana/local-llm`). + +## Audio input format + +The `transcribe()` function expects **Float32Array of 16 kHz mono PCM** samples (values -1.0 to 1.0). The consumer is responsible for: + +1. Capturing audio (e.g. `navigator.mediaDevices.getUserMedia`) +2. Extracting raw PCM from the `AudioContext` +3. Resampling to 16 kHz if the mic runs at a different rate (typically 44.1/48 kHz) + +The high-level `useLocalStt()` composable in `apps/mana/apps/web/src/lib/components/voice/use-local-stt.svelte.ts` handles all of this automatically. + +## High-level composable: `useLocalStt()` + +Located at `apps/mana/apps/web/src/lib/components/voice/use-local-stt.svelte.ts`. Combines mic capture + resampling + transcription in one call: + +```svelte + + + +

{stt.text}

+``` + +Audio pipeline inside the composable: + +``` +getUserMedia (native sample rate, e.g. 48 kHz) + → AudioContext + ScriptProcessorNode → collect Float32 chunks + → on stop: merge all chunks + linear resample to 16 kHz mono + → transcribe() via @mana/local-stt worker + → text result +``` + +## UI integration + +The QuickInputBar in `(app)/+layout.svelte` has a mic button (left slot) that uses `useLocalStt()`: + +- **Idle**: Microphone icon +- **Loading**: Disabled, pulsing (model downloading) +- **Recording**: Red stop icon with pulse animation +- **Transcribing**: Disabled, fading + +When transcription completes, the text is fed into `inputBarAdapter.onCreate()` — making it context-aware: on `/todo` it creates a task, on `/calendar` an event, on `/` it searches. + +## CSP requirements + +Same as `@mana/local-llm` — no new CSP rules needed. The existing config in `apps/mana/apps/web/src/hooks.server.ts` already allows: + +- `script-src`: `'wasm-unsafe-eval'`, `https://cdn.jsdelivr.net`, `blob:` +- `connect-src`: `https://huggingface.co`, `https://*.huggingface.co`, `https://*.hf.co`, `https://cdn.jsdelivr.net` + +## Browser cache + +Models are cached in the browser Cache API under HuggingFace URLs (same as local-llm). `hasModelInCache(modelId)` probes for `config.json` to detect cached models. After first download, subsequent loads are instant. + +## Browser support + +- WebGPU: Chrome/Edge 113+, Safari 18+ (fastest, ~3-5s for 60s audio) +- WASM fallback: all modern browsers (~15-20s for 60s audio) +- Requires `getUserMedia` for mic access (HTTPS or localhost) + +## Adding a new model + +Add an entry to `src/models.ts`: + +```ts +'whisper-medium': { + modelId: 'onnx-community/whisper-medium', + displayName: 'Whisper Medium', + dtype: 'fp32', + downloadSizeMb: 3000, + ramUsageMb: 4000, +}, +``` + +The model must be an ONNX build on HuggingFace with a Whisper architecture. + +## Relationship to existing voice features + +| Component | Purpose | Uses local-stt? | +|-----------|---------|----------------| +| `voiceRecorder` singleton | Record audio as Blob (webm/opus) for server transcription | No | +| `VoiceCaptureBar` | UI bar for dreams/memoro voice capture → sends to mana-stt server | No | +| `useLocalStt()` | Record + transcribe entirely on-device | **Yes** | +| QuickInputBar mic button | Voice-to-text for any module via useLocalStt | **Yes** | + +The existing `voiceRecorder` and `VoiceCaptureBar` are still used for features that need server-side processing (e.g. dreams with server STT). `useLocalStt()` is the privacy-first alternative that never sends audio off-device. diff --git a/packages/local-stt/package.json b/packages/local-stt/package.json new file mode 100644 index 000000000..1e4bf2b8f --- /dev/null +++ b/packages/local-stt/package.json @@ -0,0 +1,26 @@ +{ + "name": "@mana/local-stt", + "version": "0.1.0", + "private": true, + "description": "Client-side speech-to-text via transformers.js (Whisper, WebGPU) with Svelte 5 reactive stores", + "main": "./src/index.ts", + "types": "./src/index.ts", + "exports": { + ".": "./src/index.ts" + }, + "scripts": { + "type-check": "tsc --noEmit", + "clean": "rm -rf dist" + }, + "dependencies": { + "@huggingface/transformers": "^4.0.0" + }, + "devDependencies": { + "@types/node": "^24.10.1", + "svelte": "^5.0.0", + "typescript": "^5.9.3" + }, + "peerDependencies": { + "svelte": "^5.0.0" + } +} diff --git a/packages/local-stt/src/cache.ts b/packages/local-stt/src/cache.ts new file mode 100644 index 000000000..f3c9adda9 --- /dev/null +++ b/packages/local-stt/src/cache.ts @@ -0,0 +1,22 @@ +/** + * Check if a Whisper model is already cached in the browser. + * + * Same approach as @mana/local-llm: probe for the model's config.json + * in the Cache API. Whisper models always have this file and it's + * downloaded first, so its presence reliably indicates "downloaded before". + */ +export async function hasModelInCache(modelId: string): Promise { + if (typeof caches === 'undefined') return false; + try { + const cacheNames = await caches.keys(); + const url = `https://huggingface.co/${modelId}/resolve/main/config.json`; + for (const name of cacheNames) { + const cache = await caches.open(name); + const match = await cache.match(url); + if (match) return true; + } + return false; + } catch { + return false; + } +} diff --git a/packages/local-stt/src/engine-impl.ts b/packages/local-stt/src/engine-impl.ts new file mode 100644 index 000000000..2add999fd --- /dev/null +++ b/packages/local-stt/src/engine-impl.ts @@ -0,0 +1,231 @@ +/** + * LocalSttEngineImpl — the actual transformers.js Whisper engine. + * + * Runs inside a Web Worker (worker.ts). The main thread never + * instantiates this directly — it talks to a thin proxy in engine.ts + * that postMessages over to the worker. + * + * Whisper processes audio in 30-second chunks. For longer recordings + * the pipeline handles chunking internally via `chunk_length_s`. + * We expose pseudo-streaming by forwarding each chunk's text via + * the onChunk callback as it completes. + */ + +import type { + TranscribeOptions, + TranscribeResult, + TranscribeSegment, + LoadingStatus, + SttModelConfig, +} from './types'; +import { MODELS, DEFAULT_MODEL, type ModelKey } from './models'; + +type TransformersModule = typeof import('@huggingface/transformers'); + +// eslint-disable-next-line @typescript-eslint/no-explicit-any +type AnyPipeline = any; + +export class LocalSttEngineImpl { + private pipeline: AnyPipeline = null; + private transformers: TransformersModule | null = null; + private loadPromise: Promise | null = null; + private currentModel: ModelKey | null = null; + private _status: LoadingStatus = { state: 'idle' }; + private statusListeners: Set<(status: LoadingStatus) => void> = new Set(); + + get status(): LoadingStatus { + return this._status; + } + + get isReady(): boolean { + return this._status.state === 'ready'; + } + + get modelConfig(): SttModelConfig | null { + return this.currentModel ? MODELS[this.currentModel] : null; + } + + onStatusChange(listener: (status: LoadingStatus) => void): () => void { + this.statusListeners.add(listener); + return () => this.statusListeners.delete(listener); + } + + private setStatus(status: LoadingStatus) { + this._status = status; + for (const listener of this.statusListeners) { + listener(status); + } + } + + static isSupported(): boolean { + return typeof navigator !== 'undefined' && 'gpu' in navigator; + } + + async load(model: ModelKey = DEFAULT_MODEL): Promise { + if (this.pipeline && this.currentModel === model) return; + if (this.loadPromise && this.currentModel === model) return this.loadPromise; + if (this.pipeline && this.currentModel !== model) { + await this.unload(); + } + this.currentModel = model; + this.loadPromise = this._load(model); + return this.loadPromise; + } + + private async _load(model: ModelKey): Promise { + this.setStatus({ state: 'checking' }); + + try { + if (!this.transformers) { + this.transformers = await import('@huggingface/transformers'); + } + + const config = MODELS[model]; + + // Aggregated download progress tracking (same pattern as local-llm). + const fileProgress = new Map(); + + const formatBytes = (bytes: number): string => { + if (bytes < 1024) return `${bytes} B`; + if (bytes < 1024 * 1024) return `${(bytes / 1024).toFixed(0)} KB`; + if (bytes < 1024 * 1024 * 1024) return `${(bytes / (1024 * 1024)).toFixed(0)} MB`; + return `${(bytes / (1024 * 1024 * 1024)).toFixed(2)} GB`; + }; + + const emitAggregate = () => { + let totalLoaded = 0; + let totalSize = 0; + for (const { loaded, total } of fileProgress.values()) { + totalLoaded += loaded; + totalSize += total; + } + const pct = totalSize > 0 ? totalLoaded / totalSize : 0; + this.setStatus({ + state: 'downloading', + progress: pct, + text: + totalSize > 0 + ? `Downloading model (${(pct * 100).toFixed(0)}%, ${formatBytes(totalLoaded)} / ${formatBytes(totalSize)})` + : `Downloading model (${fileProgress.size} files queued)`, + }); + }; + + const progressCallback = (report: { + status: string; + file?: string; + name?: string; + progress?: number; + loaded?: number; + total?: number; + }) => { + const file = report.file ?? report.name ?? '_unknown'; + if (report.status === 'initiate') { + if (!fileProgress.has(file)) fileProgress.set(file, { loaded: 0, total: 0 }); + emitAggregate(); + } else if (report.status === 'download' || report.status === 'progress') { + fileProgress.set(file, { + loaded: report.loaded ?? 0, + total: report.total ?? fileProgress.get(file)?.total ?? 0, + }); + emitAggregate(); + } else if (report.status === 'done') { + const existing = fileProgress.get(file); + if (existing && existing.total > 0) { + fileProgress.set(file, { loaded: existing.total, total: existing.total }); + } + emitAggregate(); + } + }; + + this.setStatus({ state: 'loading', text: 'Loading Whisper pipeline…' }); + + // Use transformers.js pipeline() API for automatic-speech-recognition. + // This handles model + processor + tokenizer loading in one call. + // Device selection: try WebGPU first, fall back to WASM. + const device = LocalSttEngineImpl.isSupported() ? 'webgpu' : 'wasm'; + + this.pipeline = await this.transformers.pipeline( + 'automatic-speech-recognition', + config.modelId, + { + dtype: config.dtype, + device, + progress_callback: progressCallback, + } + ); + + this.setStatus({ state: 'ready' }); + } catch (err) { + const message = err instanceof Error ? err.message : String(err); + this.setStatus({ state: 'error', error: message }); + this.loadPromise = null; + throw err; + } + } + + async unload(): Promise { + this.pipeline = null; + this.currentModel = null; + this.loadPromise = null; + this.setStatus({ state: 'idle' }); + } + + async transcribe(options: TranscribeOptions): Promise { + if (!this.pipeline) { + await this.load(); + } + + const start = performance.now(); + + // Build pipeline options. + // eslint-disable-next-line @typescript-eslint/no-explicit-any + const pipelineOpts: Record = { + // Chunk long audio into 30s segments with 5s stride overlap. + chunk_length_s: 30, + stride_length_s: 5, + // Return timestamps if requested. + return_timestamps: options.timestamps ? true : false, + }; + + if (options.language) { + pipelineOpts.language = options.language; + } + + // Callback for pseudo-streaming: transformers.js emits partial + // results per chunk via the `chunk_callback` option. + if (options.onChunk) { + const onChunk = options.onChunk; + // eslint-disable-next-line @typescript-eslint/no-explicit-any + pipelineOpts.chunk_callback = (chunk: any) => { + if (chunk?.text) { + onChunk(chunk.text); + } + }; + } + + // Run the pipeline. Input is Float32Array of 16kHz mono PCM. + // eslint-disable-next-line @typescript-eslint/no-explicit-any + const output: any = await this.pipeline(options.audio, pipelineOpts); + + const latencyMs = Math.round(performance.now() - start); + + // Parse output — the pipeline returns { text, chunks? } for + // automatic-speech-recognition with return_timestamps. + const text: string = output.text ?? ''; + const language: string = options.language ?? 'auto'; + + let segments: TranscribeSegment[] | undefined; + if (options.timestamps && output.chunks) { + segments = output.chunks.map( + // eslint-disable-next-line @typescript-eslint/no-explicit-any + (c: any) => ({ + start: c.timestamp?.[0] ?? 0, + end: c.timestamp?.[1] ?? 0, + text: c.text ?? '', + }) + ); + } + + return { text, language, segments, latencyMs }; + } +} diff --git a/packages/local-stt/src/engine.ts b/packages/local-stt/src/engine.ts new file mode 100644 index 000000000..ce9527fa2 --- /dev/null +++ b/packages/local-stt/src/engine.ts @@ -0,0 +1,151 @@ +/** + * LocalSttEngine — main-thread proxy for the worker-hosted Whisper engine. + * + * Public API mirrors the engine-impl but all work happens in a Web Worker + * so audio processing doesn't block the UI thread. + * + * Lazy construction: the Worker is only instantiated on first method call. + * This keeps import-time side effects to zero (SSR-safe). + */ + +import { MODELS, DEFAULT_MODEL, type ModelKey } from './models'; +import type { TranscribeOptions, TranscribeResult, LoadingStatus, SttModelConfig } from './types'; +import type { SerializableTranscribeOptions, WorkerRequest, WorkerResponse } from './worker'; + +interface PendingRequest { + resolve: (data: unknown) => void; + reject: (err: Error) => void; + onChunk?: (text: string) => void; +} + +type DistributiveOmit = T extends unknown ? Omit : never; +type WorkerRequestPayload = DistributiveOmit; + +export class LocalSttEngine { + private worker: Worker | null = null; + private pending = new Map(); + private nextId = 0; + private currentModel: ModelKey | null = null; + private _status: LoadingStatus = { state: 'idle' }; + private statusListeners: Set<(status: LoadingStatus) => void> = new Set(); + + get status(): LoadingStatus { + return this._status; + } + + get isReady(): boolean { + return this._status.state === 'ready'; + } + + get modelConfig(): SttModelConfig | null { + return this.currentModel ? MODELS[this.currentModel] : null; + } + + onStatusChange(listener: (status: LoadingStatus) => void): () => void { + this.statusListeners.add(listener); + return () => this.statusListeners.delete(listener); + } + + private setStatus(status: LoadingStatus) { + this._status = status; + for (const listener of this.statusListeners) { + listener(status); + } + } + + static isSupported(): boolean { + return typeof navigator !== 'undefined' && 'gpu' in navigator; + } + + // ─── Worker management ────────────────────────────────── + + private getWorker(): Worker { + if (this.worker) return this.worker; + + if (typeof Worker === 'undefined') { + throw new Error('@mana/local-stt requires a browser environment (Worker is not defined)'); + } + + this.worker = new Worker(new URL('./worker.ts', import.meta.url), { + type: 'module', + name: 'mana-local-stt', + }); + + this.worker.addEventListener('message', this.handleWorkerMessage); + this.worker.addEventListener('error', (e) => { + const message = e.message || 'Worker crashed'; + for (const [id, p] of this.pending) { + p.reject(new Error(`Worker error: ${message}`)); + this.pending.delete(id); + } + this.setStatus({ state: 'error', error: message }); + }); + + return this.worker; + } + + private handleWorkerMessage = (event: MessageEvent) => { + const msg = event.data; + + if (msg.type === 'status') { + this.setStatus(msg.status); + return; + } + + if (msg.type === 'chunk') { + const pending = this.pending.get(msg.id); + pending?.onChunk?.(msg.text); + return; + } + + const pending = this.pending.get(msg.id); + if (!pending) return; + this.pending.delete(msg.id); + + if (msg.type === 'result') { + pending.resolve(msg.data); + } else { + pending.reject(new Error(msg.message)); + } + }; + + private postRequest(req: WorkerRequestPayload, onChunk?: (text: string) => void): Promise { + const id = `${++this.nextId}`; + const worker = this.getWorker(); + + return new Promise((resolve, reject) => { + this.pending.set(id, { + resolve: (data) => resolve(data as T), + reject, + onChunk, + }); + worker.postMessage({ ...req, id } as WorkerRequest); + }); + } + + // ─── Public API ────────────────────────────────────────── + + async load(model: ModelKey = DEFAULT_MODEL): Promise { + if (this.currentModel === model && this.isReady) return; + this.currentModel = model; + await this.postRequest({ type: 'load', modelKey: model }); + } + + async unload(): Promise { + if (!this.worker) return; + await this.postRequest({ type: 'unload' }); + this.currentModel = null; + this.worker.terminate(); + this.worker = null; + this.pending.clear(); + } + + async transcribe(options: TranscribeOptions): Promise { + const { onChunk, ...rest } = options; + const opts: SerializableTranscribeOptions = rest; + return this.postRequest({ type: 'transcribe', opts }, onChunk); + } +} + +/** Singleton instance for app-wide use */ +export const localSTT = new LocalSttEngine(); diff --git a/packages/local-stt/src/index.ts b/packages/local-stt/src/index.ts new file mode 100644 index 000000000..0829f2037 --- /dev/null +++ b/packages/local-stt/src/index.ts @@ -0,0 +1,28 @@ +// Engine +export { LocalSttEngine, localSTT } from './engine'; + +// Models +export { MODELS, DEFAULT_MODEL } from './models'; +export type { ModelKey } from './models'; + +// Types +export type { + TranscribeOptions, + TranscribeResult, + TranscribeSegment, + SttModelConfig, + LoadingStatus, + TranscriptionStatus, +} from './types'; + +// Cache utilities +export { hasModelInCache } from './cache'; + +// Svelte 5 reactive helpers +export { + getLocalSttStatus, + loadLocalStt, + unloadLocalStt, + isLocalSttSupported, + transcribe, +} from './svelte.svelte'; diff --git a/packages/local-stt/src/models.ts b/packages/local-stt/src/models.ts new file mode 100644 index 000000000..5656334ad --- /dev/null +++ b/packages/local-stt/src/models.ts @@ -0,0 +1,38 @@ +import type { SttModelConfig } from './types'; + +/** + * Pre-configured Whisper models for client-side speech-to-text. + * + * All models are ONNX builds loaded via @huggingface/transformers (transformers.js) + * with the WebGPU backend. English-only variants are smaller and faster for + * single-language use; multilingual models auto-detect the spoken language. + * + * Model quality/size trade-off (English WER on LibriSpeech test-clean): + * tiny.en: ~5.6% — 39M params, very fast, good enough for dictation + * base.en: ~4.3% — 74M params, noticeably better on accents/noise + * small.en: ~3.4% — 244M params, near-human accuracy, slower + * tiny: ~7.6% — multilingual, auto-detects language + * base: ~5.0% — multilingual + * small: ~3.9% — multilingual + */ + +export const MODELS = { + 'whisper-tiny': { + modelId: 'onnx-community/whisper-tiny', + displayName: 'Whisper Tiny', + dtype: 'fp32', + downloadSizeMb: 150, + ramUsageMb: 300, + }, + 'whisper-small': { + modelId: 'onnx-community/whisper-small', + displayName: 'Whisper Small', + dtype: 'fp32', + downloadSizeMb: 950, + ramUsageMb: 1500, + }, +} as const satisfies Record; + +export type ModelKey = keyof typeof MODELS; + +export const DEFAULT_MODEL: ModelKey = 'whisper-tiny'; diff --git a/packages/local-stt/src/svelte.svelte.ts b/packages/local-stt/src/svelte.svelte.ts new file mode 100644 index 000000000..4ff91e5c0 --- /dev/null +++ b/packages/local-stt/src/svelte.svelte.ts @@ -0,0 +1,56 @@ +/** + * Svelte 5 reactive integration for LocalSttEngine. + * + * Usage in a Svelte component: + * import { getLocalSttStatus, loadLocalStt, transcribe } from '@mana/local-stt'; + * + * const status = getLocalSttStatus(); + * loadLocalStt(); + * // use status.current reactively + */ + +import { LocalSttEngine, localSTT } from './engine'; +import type { LoadingStatus, TranscribeOptions, TranscribeResult } from './types'; +import type { ModelKey } from './models'; + +let _status = $state({ state: 'idle' }); + +localSTT.onStatusChange((s) => { + _status = s; +}); + +export function getLocalSttStatus(): { readonly current: LoadingStatus } { + return { + get current() { + return _status; + }, + }; +} + +/** + * Load a Whisper model. Safe to call multiple times (idempotent). + */ +export async function loadLocalStt(model?: ModelKey): Promise { + return localSTT.load(model); +} + +/** + * Unload the model and free memory. + */ +export async function unloadLocalStt(): Promise { + return localSTT.unload(); +} + +/** + * Check if WebGPU is available for accelerated STT. + */ +export function isLocalSttSupported(): boolean { + return LocalSttEngine.isSupported(); +} + +/** + * Transcribe audio to text. + */ +export async function transcribe(options: TranscribeOptions): Promise { + return localSTT.transcribe(options); +} diff --git a/packages/local-stt/src/types.ts b/packages/local-stt/src/types.ts new file mode 100644 index 000000000..707451296 --- /dev/null +++ b/packages/local-stt/src/types.ts @@ -0,0 +1,63 @@ +/** + * Types for client-side speech-to-text inference. + */ + +export interface TranscribeOptions { + /** Raw audio data (Float32Array of PCM samples at 16 kHz mono) */ + audio: Float32Array; + /** Language code (e.g. 'de', 'en'). If omitted, auto-detected. */ + language?: string; + /** Whether to return timestamps per segment */ + timestamps?: boolean; + /** Callback for each transcribed chunk (pseudo-streaming) */ + onChunk?: (text: string) => void; +} + +export interface TranscribeResult { + /** Full transcribed text */ + text: string; + /** Detected or forced language */ + language: string; + /** Per-segment timestamps (if requested) */ + segments?: TranscribeSegment[]; + /** Transcription time in ms */ + latencyMs: number; +} + +export interface TranscribeSegment { + /** Start time in seconds */ + start: number; + /** End time in seconds */ + end: number; + /** Segment text */ + text: string; +} + +export interface SttModelConfig { + /** HuggingFace ONNX repo id */ + modelId: string; + /** Human-readable name */ + displayName: string; + /** Quantization level */ + dtype: 'fp32' | 'fp16' | 'q8' | 'q4' | 'q4f16'; + /** Approximate download size in MB */ + downloadSizeMb: number; + /** Approximate RAM/VRAM usage in MB */ + ramUsageMb: number; + /** Whether this is an English-only model */ + englishOnly?: boolean; +} + +export type LoadingStatus = + | { state: 'idle' } + | { state: 'checking' } + | { state: 'downloading'; progress: number; text: string } + | { state: 'loading'; text: string } + | { state: 'ready' } + | { state: 'error'; error: string }; + +export type TranscriptionStatus = + | { state: 'idle' } + | { state: 'recording' } + | { state: 'transcribing'; progress?: number } + | { state: 'done'; text: string }; diff --git a/packages/local-stt/src/worker.ts b/packages/local-stt/src/worker.ts new file mode 100644 index 000000000..82f1b30eb --- /dev/null +++ b/packages/local-stt/src/worker.ts @@ -0,0 +1,96 @@ +/** + * Web Worker entry point for @mana/local-stt. + * + * Runs in a Dedicated Worker context, owns a single LocalSttEngineImpl + * instance, and exchanges messages with the main thread proxy (engine.ts). + * + * Protocol: + * + * Main → Worker (WorkerRequest): + * { id, type: 'load', modelKey: ModelKey } + * { id, type: 'unload' } + * { id, type: 'transcribe', opts: SerializableTranscribeOptions } + * { id, type: 'isReady' } + * + * Worker → Main (WorkerResponse): + * { id, type: 'result', data?: unknown } + * { id, type: 'error', message: string } + * { id, type: 'chunk', text: string } — streaming chunk + * { type: 'status', status: LoadingStatus } — broadcast, no id + */ + +import { LocalSttEngineImpl } from './engine-impl'; +import type { LoadingStatus, TranscribeOptions } from './types'; +import type { ModelKey } from './models'; + +// ─── Protocol types (mirrored in engine.ts) ──────────────────── + +export type SerializableTranscribeOptions = Omit; + +export type WorkerRequest = + | { id: string; type: 'load'; modelKey: ModelKey } + | { id: string; type: 'unload' } + | { id: string; type: 'transcribe'; opts: SerializableTranscribeOptions } + | { id: string; type: 'isReady' }; + +export type WorkerResponse = + | { id: string; type: 'result'; data?: unknown } + | { id: string; type: 'error'; message: string } + | { id: string; type: 'chunk'; text: string } + | { type: 'status'; status: LoadingStatus }; + +// ─── Worker setup ────────────────────────────────────────────── + +const engine = new LocalSttEngineImpl(); + +// Forward all status changes to the main thread as broadcast messages. +engine.onStatusChange((status) => { + postMessage({ type: 'status', status } satisfies WorkerResponse); +}); + +self.addEventListener('message', async (event: MessageEvent) => { + const req = event.data; + + try { + switch (req.type) { + case 'load': { + await engine.load(req.modelKey); + postMessage({ id: req.id, type: 'result' } satisfies WorkerResponse); + break; + } + + case 'unload': { + await engine.unload(); + postMessage({ id: req.id, type: 'result' } satisfies WorkerResponse); + break; + } + + case 'isReady': { + postMessage({ + id: req.id, + type: 'result', + data: engine.isReady, + } satisfies WorkerResponse); + break; + } + + case 'transcribe': { + const result = await engine.transcribe({ + ...req.opts, + onChunk: (text) => { + postMessage({ id: req.id, type: 'chunk', text } satisfies WorkerResponse); + }, + }); + postMessage({ + id: req.id, + type: 'result', + data: result, + } satisfies WorkerResponse); + break; + } + } + } catch (err) { + const message = err instanceof Error ? err.message : String(err); + postMessage({ id: req.id, type: 'error', message } satisfies WorkerResponse); + } +}); diff --git a/packages/local-stt/tsconfig.json b/packages/local-stt/tsconfig.json new file mode 100644 index 000000000..897ca8cba --- /dev/null +++ b/packages/local-stt/tsconfig.json @@ -0,0 +1,14 @@ +{ + "compilerOptions": { + "target": "ES2022", + "module": "ESNext", + "moduleResolution": "bundler", + "lib": ["ES2022", "DOM"], + "strict": true, + "noEmit": true, + "skipLibCheck": true, + "forceConsistentCasingInFileNames": true + }, + "include": ["src/**/*"], + "exclude": ["node_modules"] +} diff --git a/packages/shared-ui/src/index.ts b/packages/shared-ui/src/index.ts index 5878ff09d..40cadac0b 100644 --- a/packages/shared-ui/src/index.ts +++ b/packages/shared-ui/src/index.ts @@ -100,6 +100,7 @@ export { SidebarSection, PillNavigation, PillDropdown, + PillDropdownBar, AppDrawer, GlobalSpotlight, createGlobalSpotlightState, @@ -129,6 +130,7 @@ export type { PillNavItem, PillDropdownItem, PillNavElement, + PillBarConfig, PillNavigationProps, PillTabOption, PillTabGroupConfig, diff --git a/packages/shared-ui/src/navigation/PillDropdownBar.svelte b/packages/shared-ui/src/navigation/PillDropdownBar.svelte new file mode 100644 index 000000000..694f0766d --- /dev/null +++ b/packages/shared-ui/src/navigation/PillDropdownBar.svelte @@ -0,0 +1,510 @@ + + + + + diff --git a/packages/shared-ui/src/navigation/PillNavigation.svelte b/packages/shared-ui/src/navigation/PillNavigation.svelte index aa2098db5..37fd427b6 100644 --- a/packages/shared-ui/src/navigation/PillNavigation.svelte +++ b/packages/shared-ui/src/navigation/PillNavigation.svelte @@ -7,6 +7,7 @@ PillTabGroupConfig, PillTagSelectorConfig, PillAppItem, + PillBarConfig, SpotlightAction, ContentSearcher, } from './types'; @@ -32,6 +33,7 @@ CheckCircle, CheckSquare, Clock, + Cloud, Columns, Compass, CreditCard, @@ -140,6 +142,7 @@ share: ShareFat, trash: Trash, filter: Funnel, + cloud: Cloud, }; // Convert app items to dropdown items (will be computed as derived) @@ -326,6 +329,14 @@ helpHref?: string; /** Bottom offset from viewport bottom (default: '0px'). Use to position above other fixed bars. */ bottomOffset?: string; + /** When provided, dropdown triggers (theme, AI tier, sync, user menu) render + * as plain pills that call this callback with a bar config instead of + * opening their in-place PillDropdown popover. The host is expected to + * render the returned items in its own bar (e.g. bottom-stack). Pass null + * to request closing the active bar. */ + onOpenBar?: (config: PillBarConfig | null) => void; + /** Id of the bar currently open in the host. Used to highlight the trigger pill. */ + activeBarId?: string | null; } let { @@ -386,8 +397,192 @@ guestMenuLabel = 'Menü', helpHref, bottomOffset = '0px', + onOpenBar, + activeBarId = null, }: Props = $props(); + // Whether this nav should surface dropdowns as bars instead of popovers. + const barMode = $derived(!!onOpenBar); + + // Build the flat PillDropdownItem list for each bar, matching what the + // equivalent PillDropdown would render. Mode toggles + variants + a11y + // toggles for theme; tier/sync items pass through; user menu is assembled + // from the same rules as the PillDropdown below. + const themeBarItems = $derived.by(() => { + const out: PillDropdownItem[] = []; + if (onThemeModeChange) { + out.push( + { + id: 'theme-mode-light', + label: 'Light', + icon: 'sun', + group: 'theme-mode', + onClick: () => onThemeModeChange('light'), + active: themeMode === 'light', + }, + { + id: 'theme-mode-dark', + label: 'Dark', + icon: 'moon', + group: 'theme-mode', + onClick: () => onThemeModeChange('dark'), + active: themeMode === 'dark', + }, + { + id: 'theme-mode-system', + label: 'System', + icon: 'settings', + group: 'theme-mode', + onClick: () => onThemeModeChange('system'), + active: themeMode === 'system', + } + ); + } + if (themeVariantItems.length > 0) { + if (out.length > 0) out.push({ id: 'theme-variants-div', label: '', divider: true }); + for (const v of themeVariantItems) out.push(v); + } + if (showA11yQuickToggles) { + out.push({ id: 'a11y-div', label: '', divider: true }); + if (onA11yContrastChange) { + out.push({ + id: 'a11y-contrast', + label: 'Hoher Kontrast', + icon: 'sun', + onClick: () => onA11yContrastChange(a11yContrast === 'high' ? 'normal' : 'high'), + active: a11yContrast === 'high', + }); + } + if (onA11yReduceMotionChange) { + out.push({ + id: 'a11y-reduce-motion', + label: 'Animationen reduzieren', + icon: 'check', + onClick: () => onA11yReduceMotionChange(!a11yReduceMotion), + active: a11yReduceMotion, + }); + } + } + return out; + }); + + const userBarItems = $derived.by(() => { + const out: PillDropdownItem[] = []; + if (userEmail && profileHref) { + out.push({ + id: 'profile', + label: 'Profil', + icon: 'user', + onClick: () => { + window.location.href = profileHref!; + }, + active: currentPath === profileHref, + }); + } + out.push({ + id: 'settings', + label: 'Einstellungen', + icon: 'settings', + onClick: () => { + window.location.href = settingsHref; + }, + active: currentPath === settingsHref, + }); + if (userEmail && manaHref) { + out.push({ + id: 'mana', + label: 'Mana', + icon: 'sparkle', + onClick: () => { + window.location.href = manaHref!; + }, + active: currentPath === manaHref, + }); + } + if (spiralHref) { + out.push({ + id: 'spiral', + label: 'Spiral', + icon: 'spiral', + onClick: () => { + window.location.href = spiralHref!; + }, + active: currentPath === spiralHref, + }); + } + if (creditsHref) { + out.push({ + id: 'credits', + label: 'Credits', + icon: 'creditCard', + onClick: () => { + window.location.href = creditsHref!; + }, + active: currentPath === creditsHref, + }); + } + if (userEmail && feedbackHref) { + out.push({ + id: 'feedback', + label: 'Feedback', + icon: 'chat', + onClick: () => { + window.location.href = feedbackHref!; + }, + active: currentPath === feedbackHref, + }); + } + if (helpHref) { + out.push({ + id: 'help', + label: 'Hilfe', + icon: 'help', + onClick: () => { + window.location.href = helpHref!; + }, + active: currentPath === helpHref, + }); + } + if (showLanguageSwitcher && languageItems.length > 0) { + out.push({ id: 'language-div', label: '', divider: true }); + out.push({ + id: 'language', + label: currentLanguageLabel, + submenu: languageItems.map((item) => ({ ...item, id: `lang-${item.id}` })), + }); + } + out.push({ id: 'auth-div', label: '', divider: true }); + if (userEmail && showLogout && onLogout) { + out.push({ + id: 'logout', + label: 'Logout', + icon: 'logout', + onClick: () => onLogout!(), + danger: true, + }); + } else if (!userEmail && loginHref) { + out.push({ + id: 'login', + label: 'Anmelden', + icon: 'user', + primary: true, + onClick: () => { + window.location.href = loginHref!; + }, + }); + } + return out; + }); + + function toggleBar(config: PillBarConfig) { + if (!onOpenBar) return; + if (activeBarId === config.id) { + onOpenBar(null); + } else { + onOpenBar(config); + } + } + // Type guards for elements function isTabGroup(element: PillNavElement): element is PillTabGroupConfig { return 'type' in element && element.type === 'tabs'; @@ -506,6 +701,9 @@ oncontextmenu={item.onContextMenu} class="pill glass-pill" class:active={item.active} + class:icon-only={item.iconOnly} + aria-label={item.iconOnly ? item.label : undefined} + title={item.iconOnly ? item.label : undefined} > {#if item.icon} {#if item.icon === 'mana'} @@ -521,7 +719,9 @@ {/if} {/if} - {item.label} + {#if !item.iconOnly} + {item.label} + {/if} {:else} {#if item.icon} {#if item.icon === 'mana'} @@ -544,7 +747,9 @@ {/if} {/if} - {item.label} + {#if !item.iconOnly} + {item.label} + {/if} {/if} {/each} @@ -587,7 +792,24 @@ {/each} - {#if showThemeVariants && themeVariantItems.length > 0} + {#if showThemeVariants && themeVariantItems.length > 0 && barMode} + {@const themeConfig = { + id: 'theme', + label: '', + icon: undefined, + items: themeBarItems, + }} + + {:else if showThemeVariants && themeVariantItems.length > 0} - {#if showAiTierSelector && aiTierItems.length > 0} + {#if showAiTierSelector && aiTierItems.length > 0 && barMode} + {@const aiProgress = aiTierItems.find((i) => i.progress != null)?.progress} + {@const aiConfig = { + id: 'ai', + label: '', + icon: undefined, + items: aiTierItems, + progress: aiProgress, + }} + {@const AiIcon = phosphorIcons[currentAiTierIcon]} + + {:else if showAiTierSelector && aiTierItems.length > 0} - {#if showSyncStatus && syncStatusItems.length > 0} + {#if showSyncStatus && syncStatusItems.length > 0 && barMode} + {@const syncConfig = { + id: 'sync', + label: currentSyncLabel, + icon: 'cloud', + items: syncStatusItems, + }} + + {:else if showSyncStatus && syncStatusItems.length > 0} - {#if userEmail || loginHref} + {#if (userEmail || loginHref) && barMode} + {@const userLabel = userEmail ? truncateEmail(userEmail) : guestMenuLabel} + {@const userConfig = { + id: 'user', + label: userLabel, + icon: 'user', + items: userBarItems, + }} + + {:else if userEmail || loginHref} void; + /** Show only the icon (hide the label). Label is still used for aria-label/title. */ + iconOnly?: boolean; +} + +/** Config passed when a PillNavigation dropdown should surface as a bar + * in the host's bottom stack instead of an in-place popover. */ +export interface PillBarConfig { + /** Stable id (e.g. 'theme', 'ai', 'sync', 'user') */ + id: string; + /** Title shown at the start of the bar */ + label: string; + /** Icon name shown next to the title */ + icon?: string; + /** Items to render as pills */ + items: PillDropdownItem[]; + /** Progress value 0–1. When set, a progress ring is shown on the trigger pill. */ + progress?: number; } export interface PillDropdownItem { @@ -51,6 +68,10 @@ export interface PillDropdownItem { divider?: boolean; /** Nested submenu items */ submenu?: PillDropdownItem[]; + /** Group id — items sharing the same group are rendered as a segmented toggle pill */ + group?: string; + /** Progress value 0–1. When set, a circular progress ring is rendered around the icon. */ + progress?: number; /** Whether to show a split button for opening in panel */ showSplitButton?: boolean; /** Click handler for split button */ diff --git a/packages/shared-ui/src/quick-input/InputBar.svelte b/packages/shared-ui/src/quick-input/InputBar.svelte index 8e8fea8a0..4691b4246 100644 --- a/packages/shared-ui/src/quick-input/InputBar.svelte +++ b/packages/shared-ui/src/quick-input/InputBar.svelte @@ -76,6 +76,9 @@ locale?: string; /** Use 'static' when inside a flex container (bottom-stack pattern). Default: 'fixed'. */ positioning?: 'fixed' | 'static'; + /** Externally injected text (e.g. from voice input). When this changes + * to a non-empty string, the input bar's query is set and focused. */ + injectedText?: string; } let { @@ -106,6 +109,7 @@ highlightPatterns, locale = 'de', positioning = 'fixed', + injectedText, }: Props = $props(); // Use settings for autoFocus @@ -125,6 +129,18 @@ // Whether search has been explicitly triggered in deferred mode let searchTriggered = $state(false); + // External text injection (e.g. from voice-to-text). When the prop + // changes to a new non-empty value, set the search query and focus. + let lastInjected = ''; + $effect(() => { + if (injectedText && injectedText !== lastInjected) { + lastInjected = injectedText; + searchQuery = injectedText; + // Focus the input so the user sees and can edit the text + requestAnimationFrame(() => inputElement?.focus()); + } + }); + // Context menu state let contextMenuVisible = $state(false); let contextMenuX = $state(0); diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 6f1d8ee16..5c714d599 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -976,6 +976,9 @@ importers: '@mana/local-store': specifier: workspace:* version: link:../../../../packages/local-store + '@mana/local-stt': + specifier: workspace:^ + version: link:../../../../packages/local-stt '@mana/qr-export': specifier: workspace:* version: link:../../../../packages/qr-export @@ -2698,6 +2701,22 @@ importers: specifier: ^5.9.3 version: 5.9.3 + packages/local-stt: + dependencies: + '@huggingface/transformers': + specifier: ^4.0.0 + version: 4.0.1 + devDependencies: + '@types/node': + specifier: ^24.10.1 + version: 24.12.2 + svelte: + specifier: ^5.0.0 + version: 5.55.1 + typescript: + specifier: ^5.9.3 + version: 5.9.3 + packages/notify-client: devDependencies: '@nestjs/common': @@ -25768,7 +25787,7 @@ snapshots: obug: 2.1.1 std-env: 4.0.0 tinyrainbow: 3.1.0 - vitest: 4.1.3(@opentelemetry/api@1.9.1)(@types/node@24.12.2)(@vitest/coverage-v8@4.1.3)(@vitest/ui@4.1.3)(jsdom@29.0.2(@noble/hashes@2.0.1))(vite@6.4.2(@types/node@24.12.2)(jiti@2.6.1)(lightningcss@1.32.0)(terser@5.46.1)(tsx@4.21.0)(yaml@2.8.3)) + vitest: 4.1.3(@opentelemetry/api@1.9.1)(@types/node@22.19.17)(@vitest/coverage-v8@4.1.3)(@vitest/ui@4.1.3)(jsdom@29.0.2(@noble/hashes@2.0.1))(vite@6.4.2(@types/node@22.19.17)(jiti@2.6.1)(lightningcss@1.32.0)(terser@5.46.1)(tsx@4.21.0)(yaml@2.8.3)) '@vitest/expect@4.1.3': dependencies: @@ -25830,7 +25849,7 @@ snapshots: sirv: 3.0.2 tinyglobby: 0.2.15 tinyrainbow: 3.1.0 - vitest: 4.1.3(@opentelemetry/api@1.9.1)(@types/node@24.12.2)(@vitest/coverage-v8@4.1.3)(@vitest/ui@4.1.3)(jsdom@29.0.2(@noble/hashes@2.0.1))(vite@6.4.2(@types/node@24.12.2)(jiti@2.6.1)(lightningcss@1.32.0)(terser@5.46.1)(tsx@4.21.0)(yaml@2.8.3)) + vitest: 4.1.3(@opentelemetry/api@1.9.1)(@types/node@22.19.17)(@vitest/coverage-v8@4.1.3)(@vitest/ui@4.1.3)(jsdom@29.0.2(@noble/hashes@2.0.1))(vite@6.4.2(@types/node@22.19.17)(jiti@2.6.1)(lightningcss@1.32.0)(terser@5.46.1)(tsx@4.21.0)(yaml@2.8.3)) '@vitest/utils@4.1.3': dependencies: