mirror of
https://github.com/Memo-2023/mana-monorepo.git
synced 2026-05-15 07:21:10 +02:00
Enable real-time token streaming during the planner "calling-llm" phase
so the user sees live progress ("empfange Plan… 128 tokens") instead of
a static spinner. The parser still receives the full text once complete —
no partial-JSON risk.
Changes:
- Extract shared SSE parser from playground into @mana/shared-llm/sse-parser
- remote.ts: use stream:true when onToken callback is provided
- AiPlanInput: add optional onToken field (shared-ai)
- ai-plan task: pass onToken through to backend.generate()
- runner.ts: throttled (500ms) phaseDetail updates during streaming
- Playground: refactored to use shared SSE parser
Also includes: AI agent architecture comparison report (docs/reports/)
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
74 lines
2.2 KiB
TypeScript
74 lines
2.2 KiB
TypeScript
/**
|
|
* Shared SSE parser for OpenAI-compatible streaming responses.
|
|
*
|
|
* The wire format from mana-llm is straight OpenAI: `data: {…}\n\n`
|
|
* lines with a sentinel `data: [DONE]`. This ~40-line reader is simpler
|
|
* than adding a dependency and is shared between the LLM orchestrator
|
|
* (backends/remote.ts) and the playground module.
|
|
*/
|
|
|
|
export interface SSEUsage {
|
|
readonly promptTokens: number;
|
|
readonly completionTokens: number;
|
|
}
|
|
|
|
/**
|
|
* Consume a ReadableStream of SSE chunks from an OpenAI-compatible
|
|
* `/v1/chat/completions` endpoint.
|
|
*
|
|
* Calls `onDelta` for each content token and `onUsage` (if provided)
|
|
* when the final usage stats arrive. Returns the accumulated full
|
|
* content string once the stream is done.
|
|
*/
|
|
export async function consumeSSEStream(
|
|
body: ReadableStream<Uint8Array>,
|
|
onDelta?: (content: string) => void,
|
|
onUsage?: (usage: SSEUsage) => void
|
|
): Promise<string> {
|
|
const reader = body.getReader();
|
|
const decoder = new TextDecoder();
|
|
let buffer = '';
|
|
let content = '';
|
|
|
|
while (true) {
|
|
const { value, done } = await reader.read();
|
|
if (done) break;
|
|
buffer += decoder.decode(value, { stream: true });
|
|
|
|
// SSE frames are separated by blank lines. Process complete frames
|
|
// and leave any partial trailing frame in the buffer.
|
|
let sep: number;
|
|
while ((sep = buffer.indexOf('\n\n')) !== -1) {
|
|
const frame = buffer.slice(0, sep);
|
|
buffer = buffer.slice(sep + 2);
|
|
|
|
for (const line of frame.split('\n')) {
|
|
if (!line.startsWith('data:')) continue;
|
|
const data = line.slice(5).trim();
|
|
if (!data || data === '[DONE]') continue;
|
|
try {
|
|
const json = JSON.parse(data) as {
|
|
choices?: Array<{ delta?: { content?: string } }>;
|
|
usage?: { prompt_tokens?: number; completion_tokens?: number };
|
|
};
|
|
const delta = json.choices?.[0]?.delta?.content;
|
|
if (delta) {
|
|
content += delta;
|
|
onDelta?.(delta);
|
|
}
|
|
if (json.usage?.prompt_tokens != null) {
|
|
onUsage?.({
|
|
promptTokens: json.usage.prompt_tokens,
|
|
completionTokens: json.usage.completion_tokens ?? 0,
|
|
});
|
|
}
|
|
} catch {
|
|
// Malformed frame — skip silently. mana-llm occasionally
|
|
// emits keepalive comments.
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
return content;
|
|
}
|