feat(todo): LLM-parse spoken tasks into title + dueDate + priority

The previous voice quick-add dumped the whole transcript into the task
title — fine for "Steuererklärung" but useless for "Steuererklärung
morgen 14 Uhr hoch", which should land as title="Steuererklärung",
dueDate=tomorrow, priority="high".

New endpoint /api/v1/voice/parse-task posts the transcript to mana-llm
(gemma3:4b, temperature 0) with a tight system prompt that asks for
strict JSON: { title, dueDate, priority, labels }. The endpoint coerces
the response back into the typed shape and falls through to
{ title: transcript, … } whenever anything goes wrong — mana-llm down,
JSON garbled, network timeout. Voice quick-add must never fail harder
than typed quick-add, so the fallback path is the rule, not the
exception.

Labels come back from the LLM as free-text topic hints and don't yet
map to the workspace's tag IDs — fuzzy matching against existing tags
is a follow-up.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Till JS 2026-04-08 16:08:09 +02:00
parent b48c9ff80f
commit c32a5a57de
2 changed files with 193 additions and 17 deletions

View file

@ -86,25 +86,29 @@ export const tasksStore = {
/**
* Create a task from a voice recording. Inserts a placeholder task
* immediately so the user sees instant feedback in the list, then
* fills in the real title once mana-stt returns the transcript.
*
* No date/priority parsing yet that needs an LLM pass and is its
* own follow-up. The user can edit the task inline like any other.
* runs transcription + LLM parsing in the background and updates
* the task with the structured result (title, due date, priority,
* labels). If the LLM step fails or mana-llm is unavailable, the
* server returns the raw transcript as the title and the user gets
* a usable task either way see /api/v1/voice/parse-task.
*/
async createFromVoice(blob: Blob, _durationMs: number, language = 'de') {
const placeholder = await this.createTask({ title: 'Sprachaufgabe wird transkribiert…' });
void this.transcribeIntoTask(placeholder.id, blob, language);
void this.transcribeAndParseIntoTask(placeholder.id, blob, language);
return placeholder;
},
/**
* Upload an audio blob to /api/v1/voice/transcribe and write the
* transcript into an existing task as the new title. On failure,
* surfaces the error inline so the user isn't left with the
* "wird transkribiert…" placeholder forever.
* Two-step pipeline: STT LLM parse updateTask. Both steps go
* through server-side proxies (/api/v1/voice/transcribe and
* /api/v1/voice/parse-task) so the browser never sees STT or LLM
* credentials. Failures at either step surface inline as the task
* title so the user isn't left with the "wird transkribiert…"
* placeholder forever.
*/
async transcribeIntoTask(taskId: string, blob: Blob, language?: string): Promise<void> {
async transcribeAndParseIntoTask(taskId: string, blob: Blob, language?: string): Promise<void> {
try {
// Step 1: speech to text
const form = new FormData();
const ext = blob.type.includes('webm')
? '.webm'
@ -114,17 +118,46 @@ export const tasksStore = {
form.append('file', blob, `task${ext}`);
if (language) form.append('language', language);
const response = await fetch('/api/v1/voice/transcribe', {
const sttResponse = await fetch('/api/v1/voice/transcribe', {
method: 'POST',
body: form,
});
if (!response.ok) {
const text = await response.text();
throw new Error(text || `HTTP ${response.status}`);
if (!sttResponse.ok) {
const text = await sttResponse.text();
throw new Error(text || `HTTP ${sttResponse.status}`);
}
const result = (await response.json()) as { text: string };
const transcript = (result.text ?? '').trim() || 'Sprachaufgabe';
await this.updateTask(taskId, { title: transcript });
const sttResult = (await sttResponse.json()) as { text: string };
const transcript = (sttResult.text ?? '').trim();
if (!transcript) {
await this.updateTask(taskId, { title: 'Sprachaufgabe' });
return;
}
// Step 2: structured extraction. parse-task gracefully falls
// back to { title: transcript, dueDate: null, ... } if mana-llm
// is unreachable, so we don't wrap this in another try/catch.
const parseResponse = await fetch('/api/v1/voice/parse-task', {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ transcript, language }),
});
const parsed = parseResponse.ok
? ((await parseResponse.json()) as {
title: string;
dueDate: string | null;
priority: 'low' | 'medium' | 'high' | null;
labels: string[];
})
: { title: transcript, dueDate: null, priority: null as null, labels: [] as string[] };
const update: Record<string, unknown> = { title: parsed.title };
if (parsed.dueDate) update.dueDate = parsed.dueDate;
if (parsed.priority) update.priority = parsed.priority;
// labels are free-text topic hints from the LLM and don't yet
// map to the workspace's tag IDs — leave label wiring to a
// follow-up that does fuzzy matching against existing tags.
await this.updateTask(taskId, update);
} catch (e) {
const msg = e instanceof Error ? e.message : String(e);
await this.updateTask(taskId, { title: `Sprachaufgabe (Fehler: ${msg})` });

View file

@ -0,0 +1,143 @@
/**
* POST /api/v1/voice/parse-task
*
* Turn a spoken-task transcript into structured task data via mana-llm.
* Used by the Todo voice quick-add flow: the user speaks a task like
* "Steuererklärung morgen 14 Uhr" and we extract title + due date.
*
* Graceful degradation is the rule here, not the exception. If mana-llm
* is unreachable, mis-configured, or returns garbage JSON, fall back to
* { title: transcript } with no error the user still gets a usable
* task and can edit it inline. The goal is "voice quick-add never fails
* harder than a typed quick-add", not "voice quick-add only works when
* the LLM is happy".
*
* Request: { transcript: string, language?: string }
* Response: { title, dueDate, priority, labels } same shape regardless
* of whether the LLM ran or we fell through to the fallback.
*/
import { json } from '@sveltejs/kit';
import { env } from '$env/dynamic/private';
import type { RequestHandler } from './$types';
interface ParseResult {
title: string;
dueDate: string | null; // ISO date (YYYY-MM-DD) or full ISO timestamp
priority: 'low' | 'medium' | 'high' | null;
labels: string[];
}
const MAX_TRANSCRIPT_CHARS = 1000;
const LLM_TIMEOUT_MS = 8000;
const DEFAULT_MODEL = 'ollama/gemma3:4b';
function fallback(transcript: string): ParseResult {
return { title: transcript.trim() || 'Sprachaufgabe', dueDate: null, priority: null, labels: [] };
}
function buildPrompt(transcript: string, language: string): string {
const today = new Date().toISOString().slice(0, 10);
const langName = language === 'de' ? 'German' : language === 'en' ? 'English' : language;
return [
`You are a task parser. The user spoke a task in ${langName}.`,
`Today is ${today}.`,
'',
'Extract the following fields and return ONLY a JSON object with these exact keys:',
' - title: short imperative title without filler words (string, required)',
' - dueDate: ISO date YYYY-MM-DD or null if no date is mentioned',
' - priority: "low" | "medium" | "high" | null',
' - labels: array of short topic labels (max 3, lowercase, may be empty)',
'',
'Rules:',
'- Resolve relative dates ("morgen", "tomorrow", "nächsten Montag") against today.',
'- If only a time is mentioned, assume today.',
'- Never invent details. If unsure, use null / empty array.',
'- Output JSON only, no markdown, no commentary, no code fences.',
'',
`Transcript: ${JSON.stringify(transcript)}`,
].join('\n');
}
function coerce(raw: unknown, transcript: string): ParseResult {
if (!raw || typeof raw !== 'object') return fallback(transcript);
const r = raw as Record<string, unknown>;
const title = typeof r.title === 'string' && r.title.trim() ? r.title.trim() : transcript.trim();
const dueDate =
typeof r.dueDate === 'string' && /^\d{4}-\d{2}-\d{2}/.test(r.dueDate) ? r.dueDate : null;
const priority =
r.priority === 'low' || r.priority === 'medium' || r.priority === 'high' ? r.priority : null;
const labels = Array.isArray(r.labels)
? r.labels.filter((l): l is string => typeof l === 'string').slice(0, 3)
: [];
return { title, dueDate, priority, labels };
}
function extractJson(text: string): unknown {
// Models sometimes wrap JSON in ```json ... ``` even when told not to;
// strip a fenced block if present, then take the first {...} run.
const fenced = text.match(/```(?:json)?\s*([\s\S]*?)```/i);
const body = fenced ? fenced[1] : text;
const start = body.indexOf('{');
const end = body.lastIndexOf('}');
if (start === -1 || end === -1 || end < start) return null;
try {
return JSON.parse(body.slice(start, end + 1));
} catch {
return null;
}
}
export const POST: RequestHandler = async ({ request }) => {
let body: { transcript?: string; language?: string };
try {
body = await request.json();
} catch {
return json(fallback(''));
}
const transcript = (body.transcript ?? '').slice(0, MAX_TRANSCRIPT_CHARS).trim();
const language = body.language ?? 'de';
if (!transcript) return json(fallback(''));
const llmUrl = env.MANA_LLM_URL || env.PUBLIC_MANA_LLM_URL || 'http://localhost:3025';
let response: Response;
const controller = new AbortController();
const timer = setTimeout(() => controller.abort(), LLM_TIMEOUT_MS);
try {
response = await fetch(`${llmUrl.replace(/\/$/, '')}/v1/chat/completions`, {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
signal: controller.signal,
body: JSON.stringify({
model: DEFAULT_MODEL,
stream: false,
temperature: 0,
messages: [
{ role: 'system', content: 'Output JSON only. No prose.' },
{ role: 'user', content: buildPrompt(transcript, language) },
],
}),
});
} catch {
clearTimeout(timer);
return json(fallback(transcript));
}
clearTimeout(timer);
if (!response.ok) return json(fallback(transcript));
let payload: unknown;
try {
payload = await response.json();
} catch {
return json(fallback(transcript));
}
const content =
(payload as { choices?: Array<{ message?: { content?: string } }> })?.choices?.[0]?.message
?.content ?? '';
const parsed = extractJson(content);
return json(coerce(parsed, transcript));
};