mirror of
https://github.com/Memo-2023/mana-monorepo.git
synced 2026-05-21 06:06:41 +02:00
Python/FastAPI service providing unified OpenAI-compatible API for Ollama and cloud LLM providers (OpenRouter, Groq, Together). Features: - Chat completions with streaming (SSE) - Vision/multimodal support - Embeddings generation - Multi-provider routing (provider/model format) - Prometheus metrics - Optional Redis caching
61 lines
1.4 KiB
Python
61 lines
1.4 KiB
Python
"""Abstract base class for LLM providers."""
|
|
|
|
from abc import ABC, abstractmethod
|
|
from collections.abc import AsyncIterator
|
|
from typing import Any
|
|
|
|
from src.models import (
|
|
ChatCompletionRequest,
|
|
ChatCompletionResponse,
|
|
ChatCompletionStreamResponse,
|
|
EmbeddingRequest,
|
|
EmbeddingResponse,
|
|
ModelInfo,
|
|
)
|
|
|
|
|
|
class LLMProvider(ABC):
|
|
"""Abstract base class for LLM providers."""
|
|
|
|
name: str = "base"
|
|
|
|
@abstractmethod
|
|
async def chat_completion(
|
|
self,
|
|
request: ChatCompletionRequest,
|
|
model: str,
|
|
) -> ChatCompletionResponse:
|
|
"""Generate a chat completion (non-streaming)."""
|
|
...
|
|
|
|
@abstractmethod
|
|
async def chat_completion_stream(
|
|
self,
|
|
request: ChatCompletionRequest,
|
|
model: str,
|
|
) -> AsyncIterator[ChatCompletionStreamResponse]:
|
|
"""Generate a chat completion (streaming)."""
|
|
...
|
|
|
|
@abstractmethod
|
|
async def list_models(self) -> list[ModelInfo]:
|
|
"""List available models."""
|
|
...
|
|
|
|
@abstractmethod
|
|
async def embeddings(
|
|
self,
|
|
request: EmbeddingRequest,
|
|
model: str,
|
|
) -> EmbeddingResponse:
|
|
"""Generate embeddings for input text."""
|
|
...
|
|
|
|
@abstractmethod
|
|
async def health_check(self) -> dict[str, Any]:
|
|
"""Check provider health status."""
|
|
...
|
|
|
|
async def close(self) -> None:
|
|
"""Clean up resources."""
|
|
pass
|