managarten/services/mana-llm/src/models/requests.py
Till-JS 1495dbe476 feat(mana-llm): add central LLM abstraction service
Python/FastAPI service providing unified OpenAI-compatible API for
Ollama and cloud LLM providers (OpenRouter, Groq, Together).

Features:
- Chat completions with streaming (SSE)
- Vision/multimodal support
- Embeddings generation
- Multi-provider routing (provider/model format)
- Prometheus metrics
- Optional Redis caching
2026-01-29 22:01:00 +01:00

57 lines
1.6 KiB
Python

"""Request models for OpenAI-compatible API."""
from typing import Literal
from pydantic import BaseModel, Field
class TextContent(BaseModel):
"""Text content in a message."""
type: Literal["text"] = "text"
text: str
class ImageUrl(BaseModel):
"""Image URL reference."""
url: str # Can be http(s):// or data:image/...;base64,...
class ImageContent(BaseModel):
"""Image content in a message."""
type: Literal["image_url"] = "image_url"
image_url: ImageUrl
MessageContent = str | list[TextContent | ImageContent]
class Message(BaseModel):
"""A single message in the conversation."""
role: Literal["system", "user", "assistant"]
content: MessageContent
class ChatCompletionRequest(BaseModel):
"""Request body for chat completions endpoint."""
model: str = Field(..., description="Model identifier in format 'provider/model' or just 'model'")
messages: list[Message] = Field(..., min_length=1)
stream: bool = False
temperature: float | None = Field(default=None, ge=0.0, le=2.0)
max_tokens: int | None = Field(default=None, gt=0)
top_p: float | None = Field(default=None, ge=0.0, le=1.0)
frequency_penalty: float | None = Field(default=None, ge=-2.0, le=2.0)
presence_penalty: float | None = Field(default=None, ge=-2.0, le=2.0)
stop: str | list[str] | None = None
class EmbeddingRequest(BaseModel):
"""Request body for embeddings endpoint."""
model: str = Field(..., description="Model identifier")
input: str | list[str] = Field(..., description="Text(s) to embed")
encoding_format: Literal["float", "base64"] = "float"