feat(ai): SSE streaming for foreground Mission Runner

Enable real-time token streaming during the planner "calling-llm" phase so the user sees live progress ("empfange Plan… 128 tokens") instead of a static spinner. The parser still receives the full text once complete — no partial-JSON risk. Changes: - Extract shared SSE parser from playground into @mana/shared-llm/sse-parser - remote.ts: use stream:true when onToken callback is provided - AiPlanInput: add optional onToken field (shared-ai) - ai-plan task: pass onToken through to backend.generate() - runner.ts: throttled (500ms) phaseDetail updates during streaming - Playground: refactored to use shared SSE parser Also includes: AI agent architecture comparison report (docs/reports/) Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-05-23 11:46:41 +02:00 · 2026-04-16 12:32:43 +02:00 · 2026-04-16 12:32:43 +02:00 · be81d11dc3
commit be81d11dc3
parent 8a0bf93699
9 changed files with 633 additions and 106 deletions
--- a/packages/shared-llm/src/sse-parser.ts
+++ b/packages/shared-llm/src/sse-parser.ts
@ -0,0 +1,74 @@
+/**
+ * Shared SSE parser for OpenAI-compatible streaming responses.
+ *
+ * The wire format from mana-llm is straight OpenAI: `data: {…}\n\n`
+ * lines with a sentinel `data: [DONE]`. This ~40-line reader is simpler
+ * than adding a dependency and is shared between the LLM orchestrator
+ * (backends/remote.ts) and the playground module.
+ */
+
+export interface SSEUsage {
+	readonly promptTokens: number;
+	readonly completionTokens: number;
+}
+
+/**
+ * Consume a ReadableStream of SSE chunks from an OpenAI-compatible
+ * `/v1/chat/completions` endpoint.
+ *
+ * Calls `onDelta` for each content token and `onUsage` (if provided)
+ * when the final usage stats arrive. Returns the accumulated full
+ * content string once the stream is done.
+ */
+export async function consumeSSEStream(
+	body: ReadableStream<Uint8Array>,
+	onDelta?: (content: string) => void,
+	onUsage?: (usage: SSEUsage) => void
+): Promise<string> {
+	const reader = body.getReader();
+	const decoder = new TextDecoder();
+	let buffer = '';
+	let content = '';
+
+	while (true) {
+		const { value, done } = await reader.read();
+		if (done) break;
+		buffer += decoder.decode(value, { stream: true });
+
+		// SSE frames are separated by blank lines. Process complete frames
+		// and leave any partial trailing frame in the buffer.
+		let sep: number;
+		while ((sep = buffer.indexOf('\n\n')) !== -1) {
+			const frame = buffer.slice(0, sep);
+			buffer = buffer.slice(sep + 2);
+
+			for (const line of frame.split('\n')) {
+				if (!line.startsWith('data:')) continue;
+				const data = line.slice(5).trim();
+				if (!data || data === '[DONE]') continue;
+				try {
+					const json = JSON.parse(data) as {
+						choices?: Array<{ delta?: { content?: string } }>;
+						usage?: { prompt_tokens?: number; completion_tokens?: number };
+					};
+					const delta = json.choices?.[0]?.delta?.content;
+					if (delta) {
+						content += delta;
+						onDelta?.(delta);
+					}
+					if (json.usage?.prompt_tokens != null) {
+						onUsage?.({
+							promptTokens: json.usage.prompt_tokens,
+							completionTokens: json.usage.completion_tokens ?? 0,
+						});
+					}
+				} catch {
+					// Malformed frame — skip silently. mana-llm occasionally
+					// emits keepalive comments.
+				}
+			}
+		}
+	}
+
+	return content;
+}