feat(chat): wire AI streaming completions in chat detail page

Replace the placeholder stub with a real streaming SSE connection
to mana-api at /api/v1/chat/completions/stream. Extracts the
send-and-stream cycle into a shared services/completion.ts helper
so both the route page and workbench overlay can reuse it.

- Streams assistant response chunks into a live bubble
- Shows thinking dots (●●●) while waiting for first token
- Handles 402 (insufficient credits) with German error message
- Auto-titles conversation from first user message
- Persists final assistant text to IndexedDB with encryption

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Till JS 2026-04-10 17:52:02 +02:00
parent 4f17626d3d
commit 6cc40242e9
2 changed files with 197 additions and 15 deletions

View file

@ -0,0 +1,145 @@
/**
* Chat completion service streams assistant responses from mana-api.
*
* Used by both the route-based detail page and the workbench overlay.
* Handles the full send stream persist cycle:
* 1. Add user message to IndexedDB
* 2. Auto-title conversation from first message
* 3. Create empty assistant message placeholder
* 4. POST to /api/v1/chat/completions/stream (SSE)
* 5. Append streamed chunks to the assistant message
*/
import { getManaApiUrl } from '$lib/api/config';
import { messagesStore } from '../stores/messages.svelte';
import { conversationsStore } from '../stores/conversations.svelte';
import type { Message } from '../types';
export interface SendOptions {
conversationId: string;
text: string;
/** All prior messages (for LLM context). */
history: Array<{ sender: string; messageText: string }>;
/** Current conversation title — used to decide whether to auto-title. */
currentTitle?: string;
/** Model override (default: server picks gemma3:4b). */
model?: string;
}
export interface SendResult {
userMessage: Message;
assistantMessage: Message;
}
/**
* Send a user message and stream the assistant response.
*
* @param onChunk Called with the accumulated assistant text on every SSE chunk.
* Use this to drive optimistic UI updates while the stream is open.
* @returns The final user + assistant messages after the stream closes.
* @throws On network errors or non-200 responses.
*/
export async function sendAndStream(
opts: SendOptions,
onChunk?: (accumulated: string) => void
): Promise<SendResult> {
const { conversationId, text, history, currentTitle, model } = opts;
// 1. Persist user message
const userMessage = await messagesStore.addUserMessage(conversationId, text);
// 2. Auto-title from first message
if (history.length === 0 && !currentTitle) {
const title = text.length > 50 ? text.slice(0, 50) + '…' : text;
await conversationsStore.updateTitle(conversationId, title);
}
// 3. Build LLM messages array
const llmMessages = [
...history.map((m) => ({
role: m.sender === 'user' ? 'user' : 'assistant',
content: m.messageText,
})),
{ role: 'user' as const, content: text },
];
// 4. Create assistant placeholder
const assistantMessage = await messagesStore.addAssistantMessage(conversationId, '');
// 5. Stream from mana-api
const apiUrl = getManaApiUrl();
const response = await fetch(`${apiUrl}/api/v1/chat/completions/stream`, {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({
messages: llmMessages,
model: model ?? undefined,
}),
});
if (!response.ok) {
const errText = await response.text().catch(() => response.statusText);
const fallback =
response.status === 402 ? 'Nicht genügend Credits für diese Anfrage.' : `Fehler: ${errText}`;
await messagesStore.updateText(assistantMessage.id, fallback);
return { userMessage, assistantMessage: { ...assistantMessage, messageText: fallback } };
}
// 6. Read SSE stream
let accumulated = '';
const reader = response.body?.getReader();
if (!reader) {
await messagesStore.updateText(assistantMessage.id, '(keine Antwort)');
return {
userMessage,
assistantMessage: { ...assistantMessage, messageText: '(keine Antwort)' },
};
}
const decoder = new TextDecoder();
let buffer = '';
while (true) {
const { done, value } = await reader.read();
if (done) break;
buffer += decoder.decode(value, { stream: true });
const lines = buffer.split('\n');
buffer = lines.pop() ?? '';
for (const line of lines) {
if (!line.startsWith('data:')) continue;
const payload = line.slice(5).trim();
if (payload === '[DONE]') continue;
try {
const parsed = JSON.parse(payload);
// OpenAI-compatible chunk format
const delta = parsed.choices?.[0]?.delta?.content;
if (delta) {
accumulated += delta;
onChunk?.(accumulated);
}
} catch {
// Non-JSON payload (e.g. error string) — append as-is
if (payload && payload !== '[DONE]') {
accumulated += payload;
onChunk?.(accumulated);
}
}
}
}
// 7. Final persist
if (accumulated) {
await messagesStore.updateText(assistantMessage.id, accumulated);
} else {
await messagesStore.updateText(assistantMessage.id, '(keine Antwort)');
accumulated = '(keine Antwort)';
}
return {
userMessage,
assistantMessage: { ...assistantMessage, messageText: accumulated },
};
}

View file

@ -3,9 +3,9 @@
import { goto } from '$app/navigation';
import { getContext } from 'svelte';
import { conversationsStore } from '$lib/modules/chat/stores/conversations.svelte';
import { messagesStore } from '$lib/modules/chat/stores/messages.svelte';
import { useConversationMessages } from '$lib/modules/chat/queries';
import type { Conversation, Message } from '$lib/modules/chat/types';
import { sendAndStream } from '$lib/modules/chat/services/completion';
import type { Conversation } from '$lib/modules/chat/types';
import {
PaperPlaneRight,
ArrowLeft,
@ -29,6 +29,7 @@
let inputText = $state('');
let isSending = $state(false);
let streamingText = $state('');
let isEditingTitle = $state(false);
let editTitle = $state('');
let showShare = $state(false);
@ -41,26 +42,26 @@
if (!text || isSending) return;
isSending = true;
streamingText = '';
inputText = '';
try {
// Add user message to IndexedDB
await messagesStore.addUserMessage(conversationId, text);
// Auto-set title if first message and no title
if (messages.length <= 1 && !conversation?.title) {
const title = text.length > 50 ? text.substring(0, 50) + '...' : text;
await conversationsStore.updateTitle(conversationId, title);
}
// NOTE: In the standalone chat app, this would call the chat backend for AI response.
// In the unified app, the chat compute server handles streaming completions.
// For now, messages are stored locally. AI integration will be added via
// the chat compute server at /api/v1/chat/completions.
await sendAndStream(
{
conversationId,
text,
history: messages,
currentTitle: conversation?.title,
},
(accumulated) => {
streamingText = accumulated;
}
);
} catch (e) {
console.error('Failed to send message:', e);
} finally {
isSending = false;
streamingText = '';
}
}
@ -203,6 +204,25 @@
</div>
</div>
{/each}
{#if isSending && streamingText}
<div class="flex justify-start">
<div
class="max-w-[80%] rounded-2xl px-4 py-2.5 text-sm bg-[hsl(var(--muted))] text-[hsl(var(--foreground))]"
>
<p class="whitespace-pre-wrap">{streamingText}</p>
<p class="mt-1 text-[10px] opacity-60">...</p>
</div>
</div>
{:else if isSending}
<div class="flex justify-start">
<div
class="rounded-2xl px-4 py-2.5 text-sm bg-[hsl(var(--muted))] text-[hsl(var(--muted-foreground))]"
>
<span class="thinking-dots">●●●</span>
</div>
</div>
{/if}
</div>
{/if}
</div>
@ -236,3 +256,20 @@
title={conversation?.title ?? 'Chat'}
source="chat"
/>
<style>
.thinking-dots {
font-size: 0.625rem;
letter-spacing: 0.125rem;
animation: dots-pulse 1.4s ease-in-out infinite;
}
@keyframes dots-pulse {
0%,
100% {
opacity: 0.3;
}
50% {
opacity: 1;
}
}
</style>