fix(mana-llm): add response_format to ChatCompletionRequest model

The first iteration of the Ollama response_format passthrough crashed with 'ChatCompletionRequest object has no attribute response_format' because the Pydantic request model didn't declare the field at all — incoming response_format from OpenAI-compatible clients was being silently dropped at the parsing layer before the provider could see it. Fix: declare a typed ResponseFormat sub-model with the two OpenAI shapes ('json_object' and 'json_schema'), add it as an optional field on ChatCompletionRequest, and let the Ollama provider read it directly without defensive getattr fallbacks. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-05-16 11:39:39 +02:00 · 2026-04-09 18:50:54 +02:00 · 2026-04-09 18:50:54 +02:00 · 5520f1385e
commit 5520f1385e
parent cb87d23509
2 changed files with 26 additions and 18 deletions
--- a/services/mana-llm/src/models/requests.py
+++ b/services/mana-llm/src/models/requests.py
@ -1,6 +1,6 @@
 """Request models for OpenAI-compatible API."""
-from typing import Literal
+from typing import Any, Literal
 from pydantic import BaseModel, Field
@ -35,6 +35,21 @@ class Message(BaseModel):
    content: MessageContent
 class ResponseFormat(BaseModel):
    """OpenAI structured-output response_format hint.
    Two shapes are accepted:
      - {"type": "json_object"}             — free-form JSON
      - {"type": "json_schema",
         "json_schema": {"name": "...", "schema": {...}, "strict": bool}}
        — schema-constrained JSON; passed through to providers that
          support it (e.g. Ollama 0.5+ via its native `format` field).
    """
    type: Literal["json_object", "json_schema"]
    json_schema: dict[str, Any] | None = None
 class ChatCompletionRequest(BaseModel):
    """Request body for chat completions endpoint."""
@ -47,6 +62,7 @@ class ChatCompletionRequest(BaseModel):
    frequency_penalty: float | None = Field(default=None, ge=-2.0, le=2.0)
    presence_penalty: float | None = Field(default=None, ge=-2.0, le=2.0)
    stop: str | list[str] | None = None
    response_format: ResponseFormat | None = None
 class EmbeddingRequest(BaseModel):
--- a/services/mana-llm/src/providers/ollama.py
+++ b/services/mana-llm/src/providers/ollama.py
@ -127,24 +127,16 @@ class OllamaProvider(LLMProvider):
        # generateObject() helper.
        if request.response_format is not None:
            rf = request.response_format
-            rf_type = getattr(rf, "type", None) or (
+            if rf.type == "json_object":
-                rf.get("type") if isinstance(rf, dict) else None
+                payload["format"] = "json"
-            )
+            elif rf.type == "json_schema" and rf.json_schema is not None:
-            if rf_type == "json_object":
+                # rf.json_schema is the OpenAI envelope:
                #   {"name": "...", "schema": {...}, "strict": true}
                # Ollama wants just the inner schema dict.
                inner = rf.json_schema.get("schema")
                payload["format"] = inner if inner is not None else "json"
            else:
                payload["format"] = "json"
            elif rf_type == "json_schema":
                rf_schema = (
                    getattr(rf, "json_schema", None)
                    or (rf.get("json_schema") if isinstance(rf, dict) else None)
                )
                if rf_schema is not None:
                    inner = (
                        getattr(rf_schema, "schema", None)
                        or (rf_schema.get("schema") if isinstance(rf_schema, dict) else None)
                    )
                    payload["format"] = inner if inner is not None else "json"
                else:
                    payload["format"] = "json"
        # Add optional parameters
        options: dict[str, Any] = {}