fix(mana-llm): add response_format to ChatCompletionRequest model

The first iteration of the Ollama response_format passthrough crashed with 'ChatCompletionRequest object has no attribute response_format' because the Pydantic request model didn't declare the field at all — incoming response_format from OpenAI-compatible clients was being silently dropped at the parsing layer before the provider could see it. Fix: declare a typed ResponseFormat sub-model with the two OpenAI shapes ('json_object' and 'json_schema'), add it as an optional field on ChatCompletionRequest, and let the Ollama provider read it directly without defensive getattr fallbacks. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-05-14 19:41:09 +02:00 · 2026-04-09 18:50:54 +02:00 · 2026-04-09 18:50:54 +02:00 · 5520f1385e
commit 5520f1385e
parent cb87d23509
2 changed files with 26 additions and 18 deletions
--- a/services/mana-llm/src/models/requests.py
+++ b/services/mana-llm/src/models/requests.py
@ -1,6 +1,6 @@
 """Request models for OpenAI-compatible API."""

-from typing import Literal
+from typing import Any, Literal

 from pydantic import BaseModel, Field

@ -35,6 +35,21 @@ class Message(BaseModel):
    content: MessageContent


+class ResponseFormat(BaseModel):
+    """OpenAI structured-output response_format hint.
+
+    Two shapes are accepted:
+      - {"type": "json_object"}             — free-form JSON
+      - {"type": "json_schema",
+         "json_schema": {"name": "...", "schema": {...}, "strict": bool}}
+        — schema-constrained JSON; passed through to providers that
+          support it (e.g. Ollama 0.5+ via its native `format` field).
+    """
+
+    type: Literal["json_object", "json_schema"]
+    json_schema: dict[str, Any] | None = None
+
+
 class ChatCompletionRequest(BaseModel):
    """Request body for chat completions endpoint."""

@ -47,6 +62,7 @@ class ChatCompletionRequest(BaseModel):
    frequency_penalty: float | None = Field(default=None, ge=-2.0, le=2.0)
    presence_penalty: float | None = Field(default=None, ge=-2.0, le=2.0)
    stop: str | list[str] | None = None
+    response_format: ResponseFormat | None = None


 class EmbeddingRequest(BaseModel):
--- a/services/mana-llm/src/providers/ollama.py
+++ b/services/mana-llm/src/providers/ollama.py
@ -127,24 +127,16 @@ class OllamaProvider(LLMProvider):
        # generateObject() helper.
        if request.response_format is not None:
            rf = request.response_format
-            rf_type = getattr(rf, "type", None) or (
-                rf.get("type") if isinstance(rf, dict) else None
-            )
-            if rf_type == "json_object":
+            if rf.type == "json_object":
+                payload["format"] = "json"
+            elif rf.type == "json_schema" and rf.json_schema is not None:
+                # rf.json_schema is the OpenAI envelope:
+                #   {"name": "...", "schema": {...}, "strict": true}
+                # Ollama wants just the inner schema dict.
+                inner = rf.json_schema.get("schema")
+                payload["format"] = inner if inner is not None else "json"
+            else:
                payload["format"] = "json"
-            elif rf_type == "json_schema":
-                rf_schema = (
-                    getattr(rf, "json_schema", None)
-                    or (rf.get("json_schema") if isinstance(rf, dict) else None)
-                )
-                if rf_schema is not None:
-                    inner = (
-                        getattr(rf_schema, "schema", None)
-                        or (rf_schema.get("schema") if isinstance(rf_schema, dict) else None)
-                    )
-                    payload["format"] = inner if inner is not None else "json"
-                else:
-                    payload["format"] = "json"

        # Add optional parameters
        options: dict[str, Any] = {}