feat(web): PillNav bar mode, fullscreen, local STT + mic button

PillNav overhaul:
- Dropdown-as-bar: theme/AI/sync/user menus render as horizontal
  bars in the bottom stack (PillDropdownBar) instead of floating
  popovers. New onOpenBar/activeBarId props on PillNavigation.
- iconOnly pills: tags/search/workbench-tabs pills show only icons.
  Home pill removed. New iconOnly flag on PillNavItem.
- Segmented toggle groups: items sharing a `group` id render as a
  single segmented pill (e.g. Light/Dark/System triple).
- Fullscreen mode: press "f" to hide all bottom chrome, Esc to exit.
- QuickInputBar + bottom bar visibility toggles via new pills.
- Progress ring on AI trigger pill during model download
  (conic-gradient ::after, follows pill border-radius).

@mana/local-stt — new package for browser-local speech-to-text:
- Whisper models via transformers.js v4 (WebGPU + WASM fallback)
- Same Web Worker architecture as @mana/local-llm
- Two models: Whisper Tiny (150 MB) and Whisper Small (950 MB)
- Reactive Svelte 5 bindings (getLocalSttStatus, loadLocalStt, transcribe)

Voice-to-text integration:
- useLocalStt() composable: mic capture via AudioContext +
  ScriptProcessor, resample to 16kHz mono, feed into Whisper worker
- Mic button in QuickInputBar (leftAction slot) with
  recording/loading/transcribing states + pulse animation
- Transcribed text injected into InputBar via new injectedText prop
- STT model selector in AI bar alongside LLM tier controls

Also: vite.config.ts server.fs.allow expanded to monorepo root
so workspace package workers resolve in dev.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Till JS 2026-04-12 16:05:43 +02:00
parent 8c2f9306e9
commit 3deee755b3
24 changed files with 2145 additions and 28 deletions

View file

@ -160,6 +160,7 @@ pnpm test:e2e # Playwright
- **Auth**: Mana Auth (Better Auth + EdDSA JWT) via `@mana/shared-auth`
- **Data**: Dexie.js (local-first) + mana-sync (Go) backend
- **Encryption**: AES-GCM-256 via Web Crypto, server-wrapped MK with optional zero-knowledge
- **Local AI**: `@mana/local-llm` (Gemma 4 E2B, WebGPU) + `@mana/local-stt` (Whisper, WebGPU) — both run entirely in-browser via transformers.js
- **Testing**: Vitest, Playwright
- **Mobile**: Expo, Expo Router, NativeWind, EAS Build

View file

@ -51,6 +51,7 @@
"@mana/help": "workspace:*",
"@mana/local-llm": "workspace:*",
"@mana/local-store": "workspace:*",
"@mana/local-stt": "workspace:^",
"@mana/qr-export": "workspace:*",
"@mana/shared-auth": "workspace:*",
"@mana/shared-auth-ui": "workspace:*",

View file

@ -0,0 +1,288 @@
/**
* useLocalStt() Svelte 5 composable that wires microphone capture
* directly into @mana/local-stt for fully on-device speech-to-text.
*
* Usage:
* const stt = useLocalStt();
* // stt.state — 'idle' | 'loading' | 'recording' | 'transcribing'
* // stt.text — transcribed text (accumulates across chunks)
* // stt.error — error message or null
* // stt.modelStatus — LoadingStatus from local-stt
* // stt.toggle() — start recording or stop + transcribe
* // stt.cancel() — abort recording without transcribing
*
* Audio pipeline:
* getUserMedia (native sample rate)
* AudioContext + ScriptProcessor collect Float32 chunks
* on stop: merge + resample to 16 kHz mono
* feed into local-stt transcribe()
*
* The model is loaded lazily on first toggle(). Subsequent calls skip
* the download. The model stays loaded for the session (same as local-llm).
*/
import { getLocalSttStatus, loadLocalStt, transcribe, isLocalSttSupported } from '@mana/local-stt';
import type { LoadingStatus } from '@mana/local-stt';
export type SttState = 'idle' | 'loading' | 'recording' | 'transcribing';
export interface LocalSttHandle {
/** Current state of the STT pipeline */
readonly state: SttState;
/** Transcribed text (updated after transcription completes) */
readonly text: string;
/** Partial/streaming text (updated per chunk during transcription) */
readonly partial: string;
/** Error message or null */
readonly error: string | null;
/** Model loading status from @mana/local-stt */
readonly modelStatus: LoadingStatus;
/** Elapsed recording time in ms */
readonly elapsedMs: number;
/** Whether WebGPU/WASM STT is supported */
readonly isSupported: boolean;
/** Start recording (loads model first if needed) or stop + transcribe */
toggle: () => void;
/** Cancel recording without transcribing */
cancel: () => void;
}
export function useLocalStt(options?: { language?: string }): LocalSttHandle {
let state = $state<SttState>('idle');
let text = $state('');
let partial = $state('');
let error = $state<string | null>(null);
let elapsedMs = $state(0);
const modelStatus = getLocalSttStatus();
const supported = isLocalSttSupported();
// Audio capture state (not reactive — internal only)
let stream: MediaStream | null = null;
let audioContext: AudioContext | null = null;
let chunks: Float32Array[] = [];
let sampleRate = 0;
let tickHandle: ReturnType<typeof setInterval> | null = null;
let startedAt = 0;
// ScriptProcessorNode is deprecated but universally supported and
// simpler than AudioWorklet for our use case (we just collect raw
// samples, no real-time processing). AudioWorklet requires a
// separate module URL which complicates bundling.
let scriptNode: ScriptProcessorNode | null = null;
function cleanup() {
if (tickHandle !== null) {
clearInterval(tickHandle);
tickHandle = null;
}
scriptNode?.disconnect();
scriptNode = null;
stream?.getTracks().forEach((t) => t.stop());
stream = null;
if (audioContext && audioContext.state !== 'closed') {
audioContext.close().catch(() => {});
}
audioContext = null;
chunks = [];
sampleRate = 0;
elapsedMs = 0;
}
async function startRecording() {
error = null;
text = '';
partial = '';
// Ensure model is loaded first
if (modelStatus.current.state !== 'ready') {
state = 'loading';
try {
await loadLocalStt();
} catch (e) {
error = e instanceof Error ? e.message : String(e);
state = 'idle';
return;
}
}
// Get microphone access
state = 'recording';
try {
stream = await navigator.mediaDevices.getUserMedia({
audio: {
echoCancellation: true,
noiseSuppression: true,
autoGainControl: true,
},
});
} catch (e) {
error = explainMicError(e);
state = 'idle';
return;
}
// Set up AudioContext to capture raw PCM
audioContext = new AudioContext();
sampleRate = audioContext.sampleRate;
const source = audioContext.createMediaStreamSource(stream);
// Buffer size 4096 is a good balance between latency and overhead
scriptNode = audioContext.createScriptProcessor(4096, 1, 1);
scriptNode.onaudioprocess = (e) => {
const input = e.inputBuffer.getChannelData(0);
// Copy — the buffer is reused by the browser
chunks.push(new Float32Array(input));
};
source.connect(scriptNode);
scriptNode.connect(audioContext.destination);
startedAt = Date.now();
tickHandle = setInterval(() => {
elapsedMs = Date.now() - startedAt;
}, 100);
}
async function stopAndTranscribe() {
if (state !== 'recording') return;
// Stop recording
const capturedChunks = [...chunks];
const capturedRate = sampleRate;
cleanup();
console.log(
'[local-stt] Captured',
capturedChunks.length,
'chunks, sample rate:',
capturedRate
);
if (capturedChunks.length === 0) {
error = 'Keine Audiodaten aufgenommen.';
console.warn('[local-stt] No audio chunks captured');
state = 'idle';
return;
}
state = 'transcribing';
try {
// Merge chunks into one Float32Array
const totalLength = capturedChunks.reduce((sum, c) => sum + c.length, 0);
const merged = new Float32Array(totalLength);
let offset = 0;
for (const chunk of capturedChunks) {
merged.set(chunk, offset);
offset += chunk.length;
}
// Resample to 16 kHz if needed
const audio = capturedRate === 16000 ? merged : resample(merged, capturedRate, 16000);
const durationSec = audio.length / 16000;
console.log('[local-stt] Audio ready:', {
originalSamples: merged.length,
resampledSamples: audio.length,
durationSec: durationSec.toFixed(1),
sampleRate: capturedRate,
maxAmplitude: Math.max(...Array.from(audio.slice(0, 16000)).map(Math.abs)),
});
const result = await transcribe({
audio,
language: options?.language,
onChunk: (t: string) => {
partial += t;
console.log('[local-stt] Chunk:', t);
},
});
console.log('[local-stt] Result:', result);
text = result.text.trim();
} catch (e) {
error = e instanceof Error ? e.message : String(e);
console.error('[local-stt] Transcription error:', e);
}
state = 'idle';
}
function toggle() {
if (state === 'idle') {
startRecording();
} else if (state === 'recording') {
stopAndTranscribe();
}
// If loading or transcribing, ignore
}
function cancel() {
cleanup();
state = 'idle';
}
return {
get state() {
return state;
},
get text() {
return text;
},
get partial() {
return partial;
},
get error() {
return error;
},
get modelStatus() {
return modelStatus.current;
},
get elapsedMs() {
return elapsedMs;
},
get isSupported() {
return supported;
},
toggle,
cancel,
};
}
// ─── Helpers ────────────────────────────────────────────────
/**
* Linear resample from sourceSampleRate to targetSampleRate.
* Simple and good enough for speech no need for a polyphase filter.
*/
function resample(input: Float32Array, fromRate: number, toRate: number): Float32Array {
if (fromRate === toRate) return input;
const ratio = fromRate / toRate;
const outputLength = Math.round(input.length / ratio);
const output = new Float32Array(outputLength);
for (let i = 0; i < outputLength; i++) {
const srcIndex = i * ratio;
const lo = Math.floor(srcIndex);
const hi = Math.min(lo + 1, input.length - 1);
const frac = srcIndex - lo;
output[i] = input[lo] * (1 - frac) + input[hi] * frac;
}
return output;
}
function explainMicError(e: unknown): string {
const err = e instanceof Error ? e : new Error(String(e));
const name = err.name || '';
const msg = err.message || '';
if (name === 'NotAllowedError' || /denied|permission/i.test(msg)) {
return 'Mikrofon-Zugriff verweigert. Erlaube den Zugriff in deinen Browser-Einstellungen.';
}
if (name === 'NotFoundError' || /not.?found|no.?device/i.test(msg)) {
return 'Kein Mikrofon gefunden.';
}
if (name === 'NotReadableError' || /in use|busy/i.test(msg)) {
return 'Mikrofon ist gerade belegt.';
}
return `Mikrofon-Fehler: ${msg || name || 'Unbekannt'}`;
}

View file

@ -210,14 +210,17 @@
let aiTierItems = $derived<PillDropdownItem[]>([
// Tier toggles — browser tier item and its model-status buddy share a
// group so PillDropdownBar renders them as a paired pill.
...TIER_TOGGLE_LIST.filter((t) => t.tier !== 'browser' || webgpuSupported).map((t) => ({
id: `ai-tier-${t.tier}`,
label: t.shortLabel,
icon: t.icon,
active: llmSettings.allowedTiers.includes(t.tier),
onClick: () => toggleAiTier(t.tier),
...(t.tier === 'browser' ? { group: 'local-llm' } : {}),
})),
...TIER_TOGGLE_LIST.filter((t) => t.tier !== 'browser' || webgpuSupported).map((t) => {
const isActive = llmSettings.allowedTiers.includes(t.tier);
return {
id: `ai-tier-${t.tier}`,
label: t.shortLabel,
icon: isActive ? 'checkCircle' : t.icon,
active: isActive,
onClick: () => toggleAiTier(t.tier),
...(t.tier === 'browser' ? { group: 'local-llm' } : {}),
};
}),
// Browser model status / load button (grouped with the "Lokal" toggle).
// Handles all LoadingStatus states so the user sees feedback during
// download, initialization, and on error (e.g. worker crash).
@ -234,7 +237,7 @@
switch (state) {
case 'ready':
label = 'Geladen';
icon = 'check';
icon = 'checkCircle';
disabled = true;
break;
case 'downloading':
@ -280,16 +283,19 @@
// STT model selector — each model is a pill, active = currently selected
...(sttSupported
? (Object.entries(STT_MODELS) as [SttModelKey, (typeof STT_MODELS)[SttModelKey]][]).map(
([key, model]) => ({
id: `stt-model-${key}`,
label: model.displayName,
icon: 'mic' as const,
active: selectedSttModel === key,
onClick: () => {
selectedSttModel = key;
void loadLocalStt(key);
},
})
([key, model]) => {
const isSelected = selectedSttModel === key;
return {
id: `stt-model-${key}`,
label: model.displayName,
icon: isSelected ? 'checkCircle' : 'mic',
active: isSelected,
onClick: () => {
selectedSttModel = key;
void loadLocalStt(key);
},
};
}
)
: []),
// STT model status (grouped with selected model)
@ -306,7 +312,7 @@
switch (state) {
case 'ready':
label = 'STT bereit';
icon = 'check';
icon = 'checkCircle';
disabled = true;
break;
case 'downloading':

View file

@ -54,6 +54,14 @@ export default defineConfig({
server: {
port: 5173,
strictPort: true,
fs: {
// Allow serving files from the monorepo root so that workspace
// packages (e.g. @mana/local-llm's Web Worker entry) can be
// resolved by Vite's dev server. Without this, worker.ts in
// packages/local-llm triggers "request url is outside of Vite
// serving allow list".
allow: ['../../../..'],
},
},
preview: {
port: 4173,