feat(chat): wire AI streaming completions in chat detail page

Replace the placeholder stub with a real streaming SSE connection to mana-api at /api/v1/chat/completions/stream. Extracts the send-and-stream cycle into a shared services/completion.ts helper so both the route page and workbench overlay can reuse it. - Streams assistant response chunks into a live bubble - Shows thinking dots (●●●) while waiting for first token - Handles 402 (insufficient credits) with German error message - Auto-titles conversation from first user message - Persists final assistant text to IndexedDB with encryption Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-05-15 05:41:09 +02:00 · 2026-04-10 17:52:02 +02:00 · 2026-04-10 17:52:02 +02:00 · 6cc40242e9
commit 6cc40242e9
parent 4f17626d3d
2 changed files with 197 additions and 15 deletions
--- a/apps/mana/apps/web/src/lib/modules/chat/services/completion.ts
+++ b/apps/mana/apps/web/src/lib/modules/chat/services/completion.ts
@ -0,0 +1,145 @@
+/**
+ * Chat completion service — streams assistant responses from mana-api.
+ *
+ * Used by both the route-based detail page and the workbench overlay.
+ * Handles the full send → stream → persist cycle:
+ *   1. Add user message to IndexedDB
+ *   2. Auto-title conversation from first message
+ *   3. Create empty assistant message placeholder
+ *   4. POST to /api/v1/chat/completions/stream (SSE)
+ *   5. Append streamed chunks to the assistant message
+ */
+
+import { getManaApiUrl } from '$lib/api/config';
+import { messagesStore } from '../stores/messages.svelte';
+import { conversationsStore } from '../stores/conversations.svelte';
+import type { Message } from '../types';
+
+export interface SendOptions {
+	conversationId: string;
+	text: string;
+	/** All prior messages (for LLM context). */
+	history: Array<{ sender: string; messageText: string }>;
+	/** Current conversation title — used to decide whether to auto-title. */
+	currentTitle?: string;
+	/** Model override (default: server picks gemma3:4b). */
+	model?: string;
+}
+
+export interface SendResult {
+	userMessage: Message;
+	assistantMessage: Message;
+}
+
+/**
+ * Send a user message and stream the assistant response.
+ *
+ * @param onChunk  Called with the accumulated assistant text on every SSE chunk.
+ *                 Use this to drive optimistic UI updates while the stream is open.
+ * @returns        The final user + assistant messages after the stream closes.
+ * @throws         On network errors or non-200 responses.
+ */
+export async function sendAndStream(
+	opts: SendOptions,
+	onChunk?: (accumulated: string) => void
+): Promise<SendResult> {
+	const { conversationId, text, history, currentTitle, model } = opts;
+
+	// 1. Persist user message
+	const userMessage = await messagesStore.addUserMessage(conversationId, text);
+
+	// 2. Auto-title from first message
+	if (history.length === 0 && !currentTitle) {
+		const title = text.length > 50 ? text.slice(0, 50) + '…' : text;
+		await conversationsStore.updateTitle(conversationId, title);
+	}
+
+	// 3. Build LLM messages array
+	const llmMessages = [
+		...history.map((m) => ({
+			role: m.sender === 'user' ? 'user' : 'assistant',
+			content: m.messageText,
+		})),
+		{ role: 'user' as const, content: text },
+	];
+
+	// 4. Create assistant placeholder
+	const assistantMessage = await messagesStore.addAssistantMessage(conversationId, '');
+
+	// 5. Stream from mana-api
+	const apiUrl = getManaApiUrl();
+	const response = await fetch(`${apiUrl}/api/v1/chat/completions/stream`, {
+		method: 'POST',
+		headers: { 'Content-Type': 'application/json' },
+		body: JSON.stringify({
+			messages: llmMessages,
+			model: model ?? undefined,
+		}),
+	});
+
+	if (!response.ok) {
+		const errText = await response.text().catch(() => response.statusText);
+		const fallback =
+			response.status === 402 ? 'Nicht genügend Credits für diese Anfrage.' : `Fehler: ${errText}`;
+		await messagesStore.updateText(assistantMessage.id, fallback);
+		return { userMessage, assistantMessage: { ...assistantMessage, messageText: fallback } };
+	}
+
+	// 6. Read SSE stream
+	let accumulated = '';
+	const reader = response.body?.getReader();
+	if (!reader) {
+		await messagesStore.updateText(assistantMessage.id, '(keine Antwort)');
+		return {
+			userMessage,
+			assistantMessage: { ...assistantMessage, messageText: '(keine Antwort)' },
+		};
+	}
+
+	const decoder = new TextDecoder();
+	let buffer = '';
+
+	while (true) {
+		const { done, value } = await reader.read();
+		if (done) break;
+
+		buffer += decoder.decode(value, { stream: true });
+		const lines = buffer.split('\n');
+		buffer = lines.pop() ?? '';
+
+		for (const line of lines) {
+			if (!line.startsWith('data:')) continue;
+			const payload = line.slice(5).trim();
+			if (payload === '[DONE]') continue;
+
+			try {
+				const parsed = JSON.parse(payload);
+				// OpenAI-compatible chunk format
+				const delta = parsed.choices?.[0]?.delta?.content;
+				if (delta) {
+					accumulated += delta;
+					onChunk?.(accumulated);
+				}
+			} catch {
+				// Non-JSON payload (e.g. error string) — append as-is
+				if (payload && payload !== '[DONE]') {
+					accumulated += payload;
+					onChunk?.(accumulated);
+				}
+			}
+		}
+	}
+
+	// 7. Final persist
+	if (accumulated) {
+		await messagesStore.updateText(assistantMessage.id, accumulated);
+	} else {
+		await messagesStore.updateText(assistantMessage.id, '(keine Antwort)');
+		accumulated = '(keine Antwort)';
+	}
+
+	return {
+		userMessage,
+		assistantMessage: { ...assistantMessage, messageText: accumulated },
+	};
+}
--- a/apps/mana/apps/web/src/routes/(app)/chat/[id]/+page.svelte
+++ b/apps/mana/apps/web/src/routes/(app)/chat/[id]/+page.svelte
@ -3,9 +3,9 @@
 	import { goto } from '$app/navigation';
 	import { getContext } from 'svelte';
 	import { conversationsStore } from '$lib/modules/chat/stores/conversations.svelte';
-	import { messagesStore } from '$lib/modules/chat/stores/messages.svelte';
 	import { useConversationMessages } from '$lib/modules/chat/queries';
-	import type { Conversation, Message } from '$lib/modules/chat/types';
+	import { sendAndStream } from '$lib/modules/chat/services/completion';
+	import type { Conversation } from '$lib/modules/chat/types';
 	import {
 		PaperPlaneRight,
 		ArrowLeft,
@ -29,6 +29,7 @@

 	let inputText = $state('');
 	let isSending = $state(false);
+	let streamingText = $state('');
 	let isEditingTitle = $state(false);
 	let editTitle = $state('');
 	let showShare = $state(false);
@ -41,26 +42,26 @@
 		if (!text || isSending) return;

 		isSending = true;
+		streamingText = '';
 		inputText = '';

 		try {
-			// Add user message to IndexedDB
-			await messagesStore.addUserMessage(conversationId, text);
-
-			// Auto-set title if first message and no title
-			if (messages.length <= 1 && !conversation?.title) {
-				const title = text.length > 50 ? text.substring(0, 50) + '...' : text;
-				await conversationsStore.updateTitle(conversationId, title);
-			}
-
-			// NOTE: In the standalone chat app, this would call the chat backend for AI response.
-			// In the unified app, the chat compute server handles streaming completions.
-			// For now, messages are stored locally. AI integration will be added via
-			// the chat compute server at /api/v1/chat/completions.
+			await sendAndStream(
+				{
+					conversationId,
+					text,
+					history: messages,
+					currentTitle: conversation?.title,
+				},
+				(accumulated) => {
+					streamingText = accumulated;
+				}
+			);
 		} catch (e) {
 			console.error('Failed to send message:', e);
 		} finally {
 			isSending = false;
+			streamingText = '';
 		}
 	}

@ -203,6 +204,25 @@
 						</div>
 					</div>
 				{/each}
+
+				{#if isSending && streamingText}
+					<div class="flex justify-start">
+						<div
+							class="max-w-[80%] rounded-2xl px-4 py-2.5 text-sm bg-[hsl(var(--muted))] text-[hsl(var(--foreground))]"
+						>
+							<p class="whitespace-pre-wrap">{streamingText}</p>
+							<p class="mt-1 text-[10px] opacity-60">...</p>
+						</div>
+					</div>
+				{:else if isSending}
+					<div class="flex justify-start">
+						<div
+							class="rounded-2xl px-4 py-2.5 text-sm bg-[hsl(var(--muted))] text-[hsl(var(--muted-foreground))]"
+						>
+							<span class="thinking-dots">●●●</span>
+						</div>
+					</div>
+				{/if}
 			</div>
 		{/if}
 	</div>
@ -236,3 +256,20 @@
 	title={conversation?.title ?? 'Chat'}
 	source="chat"
 />
+
+<style>
+	.thinking-dots {
+		font-size: 0.625rem;
+		letter-spacing: 0.125rem;
+		animation: dots-pulse 1.4s ease-in-out infinite;
+	}
+	@keyframes dots-pulse {
+		0%,
+		100% {
+			opacity: 0.3;
+		}
+		50% {
+			opacity: 1;
+		}
+	}
+</style>