diff --git a/apps/mana/apps/web/src/lib/modules/notes/ListView.svelte b/apps/mana/apps/web/src/lib/modules/notes/ListView.svelte index 0ce30c223..d848b4d5c 100644 --- a/apps/mana/apps/web/src/lib/modules/notes/ListView.svelte +++ b/apps/mana/apps/web/src/lib/modules/notes/ListView.svelte @@ -9,6 +9,7 @@ import type { ViewProps } from '$lib/app-registry'; import { ContextMenu, type ContextMenuItem } from '@mana/shared-ui'; import { PencilSimple, Trash, PushPin } from '@mana/shared-icons'; + import VoiceCaptureBar from '$lib/components/voice/VoiceCaptureBar.svelte'; let { navigate, goBack, params }: ViewProps = $props(); @@ -31,6 +32,11 @@ startEdit(note); } + async function handleVoiceComplete(blob: Blob, durationMs: number) { + const note = await notesStore.createFromVoice(blob, durationMs, 'de'); + startEdit(note); + } + function startEdit(note: Note) { if (editingId && editingId !== note.id) saveEdit(); editingId = note.id; @@ -38,6 +44,21 @@ editContent = note.content; } + // When a voice note's transcript arrives asynchronously while the + // inline editor is open, the underlying Dexie row updates but the + // editor's local copy stays on the "…" placeholder. Sync it back in + // — but ONLY while the editor still shows the placeholder, so we + // never overwrite content the user has already typed. + $effect(() => { + if (!editingId) return; + const live = notes.find((n) => n.id === editingId); + if (!live) return; + if (editContent === '…' && live.content !== '…') { + editTitle = live.title; + editContent = live.content; + } + }); + async function saveEdit() { if (!editingId) return; await notesStore.updateNote(editingId, { @@ -105,6 +126,14 @@
+ + +
e.preventDefault()} class="quick-add"> + diff --git a/apps/mana/apps/web/src/lib/modules/notes/stores/notes.svelte.ts b/apps/mana/apps/web/src/lib/modules/notes/stores/notes.svelte.ts index cbf6709f7..29da14956 100644 --- a/apps/mana/apps/web/src/lib/modules/notes/stores/notes.svelte.ts +++ b/apps/mana/apps/web/src/lib/modules/notes/stores/notes.svelte.ts @@ -16,7 +16,7 @@ import { noteTable } from '../collections'; import { toNote } from '../queries'; -import type { LocalNote } from '../types'; +import type { LocalNote, Note } from '../types'; import { encryptRecord } from '$lib/data/crypto'; export const notesStore = { @@ -38,6 +38,63 @@ export const notesStore = { return plaintextSnapshot; }, + /** + * Create a note from a voice recording. Returns the placeholder note + * immediately so the UI can navigate to it; the transcript is filled + * in asynchronously once mana-stt returns. The placeholder title + * 'Sprachnotiz' is intentionally generic — once we have a transcript, + * the user can rename inline like any other note. + */ + async createFromVoice(blob: Blob, _durationMs: number, language = 'de'): Promise { + const note = await this.createNote({ title: 'Sprachnotiz', content: '…' }); + // Fire-and-forget: caller has already navigated into edit mode. + void this.transcribeIntoNote(note.id, blob, language); + return note; + }, + + /** + * Upload an audio blob to /api/v1/voice/transcribe and write the + * transcript into an existing note. On failure, surfaces the error + * inline as the note content so the user isn't left with an empty + * placeholder. + */ + async transcribeIntoNote(noteId: string, blob: Blob, language?: string): Promise { + try { + const form = new FormData(); + const ext = blob.type.includes('webm') + ? '.webm' + : blob.type.includes('mp4') + ? '.m4a' + : '.audio'; + form.append('file', blob, `note${ext}`); + if (language) form.append('language', language); + + const response = await fetch('/api/v1/voice/transcribe', { + method: 'POST', + body: form, + }); + if (!response.ok) { + const text = await response.text(); + throw new Error(text || `HTTP ${response.status}`); + } + const result = (await response.json()) as { text: string }; + const transcript = (result.text ?? '').trim(); + + // Use the first line as the title if it's short — keeps the + // note browseable without forcing the user to rename it. + const firstLine = transcript.split('\n')[0]?.trim() ?? ''; + const title = firstLine.length > 0 && firstLine.length <= 80 ? firstLine : 'Sprachnotiz'; + + await this.updateNote(noteId, { title, content: transcript }); + } catch (e) { + const msg = e instanceof Error ? e.message : String(e); + await this.updateNote(noteId, { + title: 'Sprachnotiz (Fehler)', + content: `Transkription fehlgeschlagen: ${msg}`, + }); + } + }, + async updateNote( id: string, data: Partial> diff --git a/apps/mana/apps/web/src/routes/api/v1/voice/transcribe/+server.ts b/apps/mana/apps/web/src/routes/api/v1/voice/transcribe/+server.ts new file mode 100644 index 000000000..13cc3ef3f --- /dev/null +++ b/apps/mana/apps/web/src/routes/api/v1/voice/transcribe/+server.ts @@ -0,0 +1,106 @@ +/** + * POST /api/v1/voice/transcribe + * + * Generic server-side proxy to mana-stt for any module that needs voice + * transcription. The browser uploads an audio Blob; we forward it to + * mana-stt with the server-held API key and return the transcript JSON. + * + * Use this from new modules instead of cloning the per-module endpoints + * (memoro, dreams) — those exist for historical reasons and will be + * migrated when convenient. + * + * Request: multipart/form-data with `file` (audio blob) and optional `language` + * Response: { text: string, language: string | null, durationSeconds: number | null } + */ + +import { error, json } from '@sveltejs/kit'; +import { env } from '$env/dynamic/private'; +import type { RequestHandler } from './$types'; + +const MAX_BYTES = 25 * 1024 * 1024; // 25 MB + +function isAcceptableType(mime: string): boolean { + if (!mime) return true; // tolerate missing type — let upstream validate + if (mime === 'application/octet-stream') return true; + return mime.startsWith('audio/') || mime.startsWith('video/'); // m4a often reports video/mp4 +} + +export const POST: RequestHandler = async ({ request }) => { + const sttUrl = env.MANA_STT_URL; + const apiKey = env.MANA_STT_API_KEY; + + if (!sttUrl) { + throw error(503, 'mana-stt is not configured (MANA_STT_URL missing)'); + } + + let incoming: FormData; + try { + incoming = await request.formData(); + } catch { + throw error(400, 'Expected multipart/form-data with a file field'); + } + const file = incoming.get('file'); + const language = (incoming.get('language') as string | null) ?? null; + + if (!(file instanceof Blob)) { + throw error(400, 'Missing file'); + } + if (file.size === 0) { + throw error(400, 'Empty audio'); + } + if (file.size > MAX_BYTES) { + throw error(413, `Audio too large (max ${MAX_BYTES / 1024 / 1024} MB)`); + } + if (!isAcceptableType(file.type)) { + throw error(415, `Unsupported audio type: ${file.type}`); + } + + const ext = mimeToExtension(file.type); + const filename = `voice${ext}`; + + const upstream = new FormData(); + upstream.append('file', file, filename); + if (language) upstream.append('language', language); + + const headers: Record = { Accept: 'application/json' }; + if (apiKey) headers['X-API-Key'] = apiKey; + + let response: Response; + try { + response = await fetch(`${sttUrl.replace(/\/$/, '')}/transcribe`, { + method: 'POST', + headers, + body: upstream, + }); + } catch (e) { + const msg = e instanceof Error ? e.message : String(e); + throw error(502, `Could not reach mana-stt: ${msg}`); + } + + if (!response.ok) { + const text = await response.text(); + throw error(response.status, `mana-stt error: ${text || response.statusText}`); + } + + const result = (await response.json()) as { + text: string; + language?: string; + duration_seconds?: number; + }; + + return json({ + text: result.text ?? '', + language: result.language ?? null, + durationSeconds: result.duration_seconds ?? null, + }); +}; + +function mimeToExtension(mime: string): string { + if (mime.includes('webm')) return '.webm'; + if (mime.includes('ogg')) return '.ogg'; + if (mime.includes('mp4') || mime.includes('m4a')) return '.m4a'; + if (mime.includes('mpeg')) return '.mp3'; + if (mime.includes('wav')) return '.wav'; + if (mime.includes('flac')) return '.flac'; + return '.webm'; +}