managarten/services/mana-llm/src/streaming/sse.py
Till-JS 1495dbe476 feat(mana-llm): add central LLM abstraction service
Python/FastAPI service providing unified OpenAI-compatible API for
Ollama and cloud LLM providers (OpenRouter, Groq, Together).

Features:
- Chat completions with streaming (SSE)
- Vision/multimodal support
- Embeddings generation
- Multi-provider routing (provider/model format)
- Prometheus metrics
- Optional Redis caching
2026-01-29 22:01:00 +01:00

43 lines
1.2 KiB
Python

"""Server-Sent Events (SSE) response handling."""
import json
import logging
from collections.abc import AsyncIterator
from src.models import ChatCompletionRequest, ChatCompletionStreamResponse
from src.providers import ProviderRouter
logger = logging.getLogger(__name__)
async def stream_chat_completion(
router: ProviderRouter,
request: ChatCompletionRequest,
) -> AsyncIterator[str]:
"""
Stream chat completion responses as SSE data lines.
Yields strings in SSE format:
data: {"choices":[{"delta":{"content":"Hello"}}]}
data: [DONE]
"""
try:
async for chunk in router.chat_completion_stream(request):
# Convert to OpenAI-compatible SSE format
data = chunk.model_dump(exclude_none=True)
yield f"data: {json.dumps(data)}\n\n"
# Send final [DONE] marker
yield "data: [DONE]\n\n"
except Exception as e:
logger.error(f"Streaming error: {e}")
# Send error as SSE event
error_data = {
"error": {
"message": str(e),
"type": "server_error",
}
}
yield f"data: {json.dumps(error_data)}\n\n"
yield "data: [DONE]\n\n"