mirror of
https://github.com/Memo-2023/mana-monorepo.git
synced 2026-05-14 22:41:09 +02:00
feat(notes): voice capture in workbench ListView via shared <VoiceCaptureBar>
Drop a mic into Notes — record, transcribe through the new generic /api/v1/voice/transcribe proxy (mana-stt), then write the result back into the placeholder note. The first transcript line becomes the title when it fits in 80 chars, otherwise a generic 'Sprachnotiz' label. The inline editor refreshes from the live note while the placeholder '…' content is still on screen, so a transcript that arrives a moment after the editor opens shows up automatically without overwriting anything the user has typed. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
e0e801956a
commit
9b3d7c7325
3 changed files with 193 additions and 1 deletions
|
|
@ -9,6 +9,7 @@
|
|||
import type { ViewProps } from '$lib/app-registry';
|
||||
import { ContextMenu, type ContextMenuItem } from '@mana/shared-ui';
|
||||
import { PencilSimple, Trash, PushPin } from '@mana/shared-icons';
|
||||
import VoiceCaptureBar from '$lib/components/voice/VoiceCaptureBar.svelte';
|
||||
|
||||
let { navigate, goBack, params }: ViewProps = $props();
|
||||
|
||||
|
|
@ -31,6 +32,11 @@
|
|||
startEdit(note);
|
||||
}
|
||||
|
||||
async function handleVoiceComplete(blob: Blob, durationMs: number) {
|
||||
const note = await notesStore.createFromVoice(blob, durationMs, 'de');
|
||||
startEdit(note);
|
||||
}
|
||||
|
||||
function startEdit(note: Note) {
|
||||
if (editingId && editingId !== note.id) saveEdit();
|
||||
editingId = note.id;
|
||||
|
|
@ -38,6 +44,21 @@
|
|||
editContent = note.content;
|
||||
}
|
||||
|
||||
// When a voice note's transcript arrives asynchronously while the
|
||||
// inline editor is open, the underlying Dexie row updates but the
|
||||
// editor's local copy stays on the "…" placeholder. Sync it back in
|
||||
// — but ONLY while the editor still shows the placeholder, so we
|
||||
// never overwrite content the user has already typed.
|
||||
$effect(() => {
|
||||
if (!editingId) return;
|
||||
const live = notes.find((n) => n.id === editingId);
|
||||
if (!live) return;
|
||||
if (editContent === '…' && live.content !== '…') {
|
||||
editTitle = live.title;
|
||||
editContent = live.content;
|
||||
}
|
||||
});
|
||||
|
||||
async function saveEdit() {
|
||||
if (!editingId) return;
|
||||
await notesStore.updateNote(editingId, {
|
||||
|
|
@ -105,6 +126,14 @@
|
|||
</script>
|
||||
|
||||
<div class="app-view">
|
||||
<!-- Voice capture -->
|
||||
<VoiceCaptureBar
|
||||
idleLabel="Notiz sprechen"
|
||||
feature="notes-voice-capture"
|
||||
reason="Notizen werden verschlüsselt gespeichert. Dafür brauchst du ein Mana-Konto."
|
||||
onComplete={handleVoiceComplete}
|
||||
/>
|
||||
|
||||
<!-- Quick create -->
|
||||
<form onsubmit={(e) => e.preventDefault()} class="quick-add">
|
||||
<span class="add-icon">+</span>
|
||||
|
|
|
|||
|
|
@ -16,7 +16,7 @@
|
|||
|
||||
import { noteTable } from '../collections';
|
||||
import { toNote } from '../queries';
|
||||
import type { LocalNote } from '../types';
|
||||
import type { LocalNote, Note } from '../types';
|
||||
import { encryptRecord } from '$lib/data/crypto';
|
||||
|
||||
export const notesStore = {
|
||||
|
|
@ -38,6 +38,63 @@ export const notesStore = {
|
|||
return plaintextSnapshot;
|
||||
},
|
||||
|
||||
/**
|
||||
* Create a note from a voice recording. Returns the placeholder note
|
||||
* immediately so the UI can navigate to it; the transcript is filled
|
||||
* in asynchronously once mana-stt returns. The placeholder title
|
||||
* 'Sprachnotiz' is intentionally generic — once we have a transcript,
|
||||
* the user can rename inline like any other note.
|
||||
*/
|
||||
async createFromVoice(blob: Blob, _durationMs: number, language = 'de'): Promise<Note> {
|
||||
const note = await this.createNote({ title: 'Sprachnotiz', content: '…' });
|
||||
// Fire-and-forget: caller has already navigated into edit mode.
|
||||
void this.transcribeIntoNote(note.id, blob, language);
|
||||
return note;
|
||||
},
|
||||
|
||||
/**
|
||||
* Upload an audio blob to /api/v1/voice/transcribe and write the
|
||||
* transcript into an existing note. On failure, surfaces the error
|
||||
* inline as the note content so the user isn't left with an empty
|
||||
* placeholder.
|
||||
*/
|
||||
async transcribeIntoNote(noteId: string, blob: Blob, language?: string): Promise<void> {
|
||||
try {
|
||||
const form = new FormData();
|
||||
const ext = blob.type.includes('webm')
|
||||
? '.webm'
|
||||
: blob.type.includes('mp4')
|
||||
? '.m4a'
|
||||
: '.audio';
|
||||
form.append('file', blob, `note${ext}`);
|
||||
if (language) form.append('language', language);
|
||||
|
||||
const response = await fetch('/api/v1/voice/transcribe', {
|
||||
method: 'POST',
|
||||
body: form,
|
||||
});
|
||||
if (!response.ok) {
|
||||
const text = await response.text();
|
||||
throw new Error(text || `HTTP ${response.status}`);
|
||||
}
|
||||
const result = (await response.json()) as { text: string };
|
||||
const transcript = (result.text ?? '').trim();
|
||||
|
||||
// Use the first line as the title if it's short — keeps the
|
||||
// note browseable without forcing the user to rename it.
|
||||
const firstLine = transcript.split('\n')[0]?.trim() ?? '';
|
||||
const title = firstLine.length > 0 && firstLine.length <= 80 ? firstLine : 'Sprachnotiz';
|
||||
|
||||
await this.updateNote(noteId, { title, content: transcript });
|
||||
} catch (e) {
|
||||
const msg = e instanceof Error ? e.message : String(e);
|
||||
await this.updateNote(noteId, {
|
||||
title: 'Sprachnotiz (Fehler)',
|
||||
content: `Transkription fehlgeschlagen: ${msg}`,
|
||||
});
|
||||
}
|
||||
},
|
||||
|
||||
async updateNote(
|
||||
id: string,
|
||||
data: Partial<Pick<LocalNote, 'title' | 'content' | 'color' | 'isPinned' | 'isArchived'>>
|
||||
|
|
|
|||
106
apps/mana/apps/web/src/routes/api/v1/voice/transcribe/+server.ts
Normal file
106
apps/mana/apps/web/src/routes/api/v1/voice/transcribe/+server.ts
Normal file
|
|
@ -0,0 +1,106 @@
|
|||
/**
|
||||
* POST /api/v1/voice/transcribe
|
||||
*
|
||||
* Generic server-side proxy to mana-stt for any module that needs voice
|
||||
* transcription. The browser uploads an audio Blob; we forward it to
|
||||
* mana-stt with the server-held API key and return the transcript JSON.
|
||||
*
|
||||
* Use this from new modules instead of cloning the per-module endpoints
|
||||
* (memoro, dreams) — those exist for historical reasons and will be
|
||||
* migrated when convenient.
|
||||
*
|
||||
* Request: multipart/form-data with `file` (audio blob) and optional `language`
|
||||
* Response: { text: string, language: string | null, durationSeconds: number | null }
|
||||
*/
|
||||
|
||||
import { error, json } from '@sveltejs/kit';
|
||||
import { env } from '$env/dynamic/private';
|
||||
import type { RequestHandler } from './$types';
|
||||
|
||||
const MAX_BYTES = 25 * 1024 * 1024; // 25 MB
|
||||
|
||||
function isAcceptableType(mime: string): boolean {
|
||||
if (!mime) return true; // tolerate missing type — let upstream validate
|
||||
if (mime === 'application/octet-stream') return true;
|
||||
return mime.startsWith('audio/') || mime.startsWith('video/'); // m4a often reports video/mp4
|
||||
}
|
||||
|
||||
export const POST: RequestHandler = async ({ request }) => {
|
||||
const sttUrl = env.MANA_STT_URL;
|
||||
const apiKey = env.MANA_STT_API_KEY;
|
||||
|
||||
if (!sttUrl) {
|
||||
throw error(503, 'mana-stt is not configured (MANA_STT_URL missing)');
|
||||
}
|
||||
|
||||
let incoming: FormData;
|
||||
try {
|
||||
incoming = await request.formData();
|
||||
} catch {
|
||||
throw error(400, 'Expected multipart/form-data with a file field');
|
||||
}
|
||||
const file = incoming.get('file');
|
||||
const language = (incoming.get('language') as string | null) ?? null;
|
||||
|
||||
if (!(file instanceof Blob)) {
|
||||
throw error(400, 'Missing file');
|
||||
}
|
||||
if (file.size === 0) {
|
||||
throw error(400, 'Empty audio');
|
||||
}
|
||||
if (file.size > MAX_BYTES) {
|
||||
throw error(413, `Audio too large (max ${MAX_BYTES / 1024 / 1024} MB)`);
|
||||
}
|
||||
if (!isAcceptableType(file.type)) {
|
||||
throw error(415, `Unsupported audio type: ${file.type}`);
|
||||
}
|
||||
|
||||
const ext = mimeToExtension(file.type);
|
||||
const filename = `voice${ext}`;
|
||||
|
||||
const upstream = new FormData();
|
||||
upstream.append('file', file, filename);
|
||||
if (language) upstream.append('language', language);
|
||||
|
||||
const headers: Record<string, string> = { Accept: 'application/json' };
|
||||
if (apiKey) headers['X-API-Key'] = apiKey;
|
||||
|
||||
let response: Response;
|
||||
try {
|
||||
response = await fetch(`${sttUrl.replace(/\/$/, '')}/transcribe`, {
|
||||
method: 'POST',
|
||||
headers,
|
||||
body: upstream,
|
||||
});
|
||||
} catch (e) {
|
||||
const msg = e instanceof Error ? e.message : String(e);
|
||||
throw error(502, `Could not reach mana-stt: ${msg}`);
|
||||
}
|
||||
|
||||
if (!response.ok) {
|
||||
const text = await response.text();
|
||||
throw error(response.status, `mana-stt error: ${text || response.statusText}`);
|
||||
}
|
||||
|
||||
const result = (await response.json()) as {
|
||||
text: string;
|
||||
language?: string;
|
||||
duration_seconds?: number;
|
||||
};
|
||||
|
||||
return json({
|
||||
text: result.text ?? '',
|
||||
language: result.language ?? null,
|
||||
durationSeconds: result.duration_seconds ?? null,
|
||||
});
|
||||
};
|
||||
|
||||
function mimeToExtension(mime: string): string {
|
||||
if (mime.includes('webm')) return '.webm';
|
||||
if (mime.includes('ogg')) return '.ogg';
|
||||
if (mime.includes('mp4') || mime.includes('m4a')) return '.m4a';
|
||||
if (mime.includes('mpeg')) return '.mp3';
|
||||
if (mime.includes('wav')) return '.wav';
|
||||
if (mime.includes('flac')) return '.flac';
|
||||
return '.webm';
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue