mirror of
https://github.com/Memo-2023/mana-monorepo.git
synced 2026-05-15 05:41:09 +02:00
feat(chat): wire AI streaming completions in chat detail page
Replace the placeholder stub with a real streaming SSE connection to mana-api at /api/v1/chat/completions/stream. Extracts the send-and-stream cycle into a shared services/completion.ts helper so both the route page and workbench overlay can reuse it. - Streams assistant response chunks into a live bubble - Shows thinking dots (●●●) while waiting for first token - Handles 402 (insufficient credits) with German error message - Auto-titles conversation from first user message - Persists final assistant text to IndexedDB with encryption Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
4f17626d3d
commit
6cc40242e9
2 changed files with 197 additions and 15 deletions
145
apps/mana/apps/web/src/lib/modules/chat/services/completion.ts
Normal file
145
apps/mana/apps/web/src/lib/modules/chat/services/completion.ts
Normal file
|
|
@ -0,0 +1,145 @@
|
|||
/**
|
||||
* Chat completion service — streams assistant responses from mana-api.
|
||||
*
|
||||
* Used by both the route-based detail page and the workbench overlay.
|
||||
* Handles the full send → stream → persist cycle:
|
||||
* 1. Add user message to IndexedDB
|
||||
* 2. Auto-title conversation from first message
|
||||
* 3. Create empty assistant message placeholder
|
||||
* 4. POST to /api/v1/chat/completions/stream (SSE)
|
||||
* 5. Append streamed chunks to the assistant message
|
||||
*/
|
||||
|
||||
import { getManaApiUrl } from '$lib/api/config';
|
||||
import { messagesStore } from '../stores/messages.svelte';
|
||||
import { conversationsStore } from '../stores/conversations.svelte';
|
||||
import type { Message } from '../types';
|
||||
|
||||
export interface SendOptions {
|
||||
conversationId: string;
|
||||
text: string;
|
||||
/** All prior messages (for LLM context). */
|
||||
history: Array<{ sender: string; messageText: string }>;
|
||||
/** Current conversation title — used to decide whether to auto-title. */
|
||||
currentTitle?: string;
|
||||
/** Model override (default: server picks gemma3:4b). */
|
||||
model?: string;
|
||||
}
|
||||
|
||||
export interface SendResult {
|
||||
userMessage: Message;
|
||||
assistantMessage: Message;
|
||||
}
|
||||
|
||||
/**
|
||||
* Send a user message and stream the assistant response.
|
||||
*
|
||||
* @param onChunk Called with the accumulated assistant text on every SSE chunk.
|
||||
* Use this to drive optimistic UI updates while the stream is open.
|
||||
* @returns The final user + assistant messages after the stream closes.
|
||||
* @throws On network errors or non-200 responses.
|
||||
*/
|
||||
export async function sendAndStream(
|
||||
opts: SendOptions,
|
||||
onChunk?: (accumulated: string) => void
|
||||
): Promise<SendResult> {
|
||||
const { conversationId, text, history, currentTitle, model } = opts;
|
||||
|
||||
// 1. Persist user message
|
||||
const userMessage = await messagesStore.addUserMessage(conversationId, text);
|
||||
|
||||
// 2. Auto-title from first message
|
||||
if (history.length === 0 && !currentTitle) {
|
||||
const title = text.length > 50 ? text.slice(0, 50) + '…' : text;
|
||||
await conversationsStore.updateTitle(conversationId, title);
|
||||
}
|
||||
|
||||
// 3. Build LLM messages array
|
||||
const llmMessages = [
|
||||
...history.map((m) => ({
|
||||
role: m.sender === 'user' ? 'user' : 'assistant',
|
||||
content: m.messageText,
|
||||
})),
|
||||
{ role: 'user' as const, content: text },
|
||||
];
|
||||
|
||||
// 4. Create assistant placeholder
|
||||
const assistantMessage = await messagesStore.addAssistantMessage(conversationId, '');
|
||||
|
||||
// 5. Stream from mana-api
|
||||
const apiUrl = getManaApiUrl();
|
||||
const response = await fetch(`${apiUrl}/api/v1/chat/completions/stream`, {
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify({
|
||||
messages: llmMessages,
|
||||
model: model ?? undefined,
|
||||
}),
|
||||
});
|
||||
|
||||
if (!response.ok) {
|
||||
const errText = await response.text().catch(() => response.statusText);
|
||||
const fallback =
|
||||
response.status === 402 ? 'Nicht genügend Credits für diese Anfrage.' : `Fehler: ${errText}`;
|
||||
await messagesStore.updateText(assistantMessage.id, fallback);
|
||||
return { userMessage, assistantMessage: { ...assistantMessage, messageText: fallback } };
|
||||
}
|
||||
|
||||
// 6. Read SSE stream
|
||||
let accumulated = '';
|
||||
const reader = response.body?.getReader();
|
||||
if (!reader) {
|
||||
await messagesStore.updateText(assistantMessage.id, '(keine Antwort)');
|
||||
return {
|
||||
userMessage,
|
||||
assistantMessage: { ...assistantMessage, messageText: '(keine Antwort)' },
|
||||
};
|
||||
}
|
||||
|
||||
const decoder = new TextDecoder();
|
||||
let buffer = '';
|
||||
|
||||
while (true) {
|
||||
const { done, value } = await reader.read();
|
||||
if (done) break;
|
||||
|
||||
buffer += decoder.decode(value, { stream: true });
|
||||
const lines = buffer.split('\n');
|
||||
buffer = lines.pop() ?? '';
|
||||
|
||||
for (const line of lines) {
|
||||
if (!line.startsWith('data:')) continue;
|
||||
const payload = line.slice(5).trim();
|
||||
if (payload === '[DONE]') continue;
|
||||
|
||||
try {
|
||||
const parsed = JSON.parse(payload);
|
||||
// OpenAI-compatible chunk format
|
||||
const delta = parsed.choices?.[0]?.delta?.content;
|
||||
if (delta) {
|
||||
accumulated += delta;
|
||||
onChunk?.(accumulated);
|
||||
}
|
||||
} catch {
|
||||
// Non-JSON payload (e.g. error string) — append as-is
|
||||
if (payload && payload !== '[DONE]') {
|
||||
accumulated += payload;
|
||||
onChunk?.(accumulated);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// 7. Final persist
|
||||
if (accumulated) {
|
||||
await messagesStore.updateText(assistantMessage.id, accumulated);
|
||||
} else {
|
||||
await messagesStore.updateText(assistantMessage.id, '(keine Antwort)');
|
||||
accumulated = '(keine Antwort)';
|
||||
}
|
||||
|
||||
return {
|
||||
userMessage,
|
||||
assistantMessage: { ...assistantMessage, messageText: accumulated },
|
||||
};
|
||||
}
|
||||
|
|
@ -3,9 +3,9 @@
|
|||
import { goto } from '$app/navigation';
|
||||
import { getContext } from 'svelte';
|
||||
import { conversationsStore } from '$lib/modules/chat/stores/conversations.svelte';
|
||||
import { messagesStore } from '$lib/modules/chat/stores/messages.svelte';
|
||||
import { useConversationMessages } from '$lib/modules/chat/queries';
|
||||
import type { Conversation, Message } from '$lib/modules/chat/types';
|
||||
import { sendAndStream } from '$lib/modules/chat/services/completion';
|
||||
import type { Conversation } from '$lib/modules/chat/types';
|
||||
import {
|
||||
PaperPlaneRight,
|
||||
ArrowLeft,
|
||||
|
|
@ -29,6 +29,7 @@
|
|||
|
||||
let inputText = $state('');
|
||||
let isSending = $state(false);
|
||||
let streamingText = $state('');
|
||||
let isEditingTitle = $state(false);
|
||||
let editTitle = $state('');
|
||||
let showShare = $state(false);
|
||||
|
|
@ -41,26 +42,26 @@
|
|||
if (!text || isSending) return;
|
||||
|
||||
isSending = true;
|
||||
streamingText = '';
|
||||
inputText = '';
|
||||
|
||||
try {
|
||||
// Add user message to IndexedDB
|
||||
await messagesStore.addUserMessage(conversationId, text);
|
||||
|
||||
// Auto-set title if first message and no title
|
||||
if (messages.length <= 1 && !conversation?.title) {
|
||||
const title = text.length > 50 ? text.substring(0, 50) + '...' : text;
|
||||
await conversationsStore.updateTitle(conversationId, title);
|
||||
}
|
||||
|
||||
// NOTE: In the standalone chat app, this would call the chat backend for AI response.
|
||||
// In the unified app, the chat compute server handles streaming completions.
|
||||
// For now, messages are stored locally. AI integration will be added via
|
||||
// the chat compute server at /api/v1/chat/completions.
|
||||
await sendAndStream(
|
||||
{
|
||||
conversationId,
|
||||
text,
|
||||
history: messages,
|
||||
currentTitle: conversation?.title,
|
||||
},
|
||||
(accumulated) => {
|
||||
streamingText = accumulated;
|
||||
}
|
||||
);
|
||||
} catch (e) {
|
||||
console.error('Failed to send message:', e);
|
||||
} finally {
|
||||
isSending = false;
|
||||
streamingText = '';
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -203,6 +204,25 @@
|
|||
</div>
|
||||
</div>
|
||||
{/each}
|
||||
|
||||
{#if isSending && streamingText}
|
||||
<div class="flex justify-start">
|
||||
<div
|
||||
class="max-w-[80%] rounded-2xl px-4 py-2.5 text-sm bg-[hsl(var(--muted))] text-[hsl(var(--foreground))]"
|
||||
>
|
||||
<p class="whitespace-pre-wrap">{streamingText}</p>
|
||||
<p class="mt-1 text-[10px] opacity-60">...</p>
|
||||
</div>
|
||||
</div>
|
||||
{:else if isSending}
|
||||
<div class="flex justify-start">
|
||||
<div
|
||||
class="rounded-2xl px-4 py-2.5 text-sm bg-[hsl(var(--muted))] text-[hsl(var(--muted-foreground))]"
|
||||
>
|
||||
<span class="thinking-dots">●●●</span>
|
||||
</div>
|
||||
</div>
|
||||
{/if}
|
||||
</div>
|
||||
{/if}
|
||||
</div>
|
||||
|
|
@ -236,3 +256,20 @@
|
|||
title={conversation?.title ?? 'Chat'}
|
||||
source="chat"
|
||||
/>
|
||||
|
||||
<style>
|
||||
.thinking-dots {
|
||||
font-size: 0.625rem;
|
||||
letter-spacing: 0.125rem;
|
||||
animation: dots-pulse 1.4s ease-in-out infinite;
|
||||
}
|
||||
@keyframes dots-pulse {
|
||||
0%,
|
||||
100% {
|
||||
opacity: 0.3;
|
||||
}
|
||||
50% {
|
||||
opacity: 1;
|
||||
}
|
||||
}
|
||||
</style>
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue