fix(mana-llm): add response_format to ChatCompletionRequest model

The first iteration of the Ollama response_format passthrough crashed
with 'ChatCompletionRequest object has no attribute response_format'
because the Pydantic request model didn't declare the field at all —
incoming response_format from OpenAI-compatible clients was being
silently dropped at the parsing layer before the provider could see it.

Fix: declare a typed ResponseFormat sub-model with the two OpenAI shapes
('json_object' and 'json_schema'), add it as an optional field on
ChatCompletionRequest, and let the Ollama provider read it directly
without defensive getattr fallbacks.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Till JS 2026-04-09 18:50:54 +02:00
parent cb87d23509
commit 5520f1385e
2 changed files with 26 additions and 18 deletions

View file

@ -1,6 +1,6 @@
"""Request models for OpenAI-compatible API.""" """Request models for OpenAI-compatible API."""
from typing import Literal from typing import Any, Literal
from pydantic import BaseModel, Field from pydantic import BaseModel, Field
@ -35,6 +35,21 @@ class Message(BaseModel):
content: MessageContent content: MessageContent
class ResponseFormat(BaseModel):
"""OpenAI structured-output response_format hint.
Two shapes are accepted:
- {"type": "json_object"} free-form JSON
- {"type": "json_schema",
"json_schema": {"name": "...", "schema": {...}, "strict": bool}}
schema-constrained JSON; passed through to providers that
support it (e.g. Ollama 0.5+ via its native `format` field).
"""
type: Literal["json_object", "json_schema"]
json_schema: dict[str, Any] | None = None
class ChatCompletionRequest(BaseModel): class ChatCompletionRequest(BaseModel):
"""Request body for chat completions endpoint.""" """Request body for chat completions endpoint."""
@ -47,6 +62,7 @@ class ChatCompletionRequest(BaseModel):
frequency_penalty: float | None = Field(default=None, ge=-2.0, le=2.0) frequency_penalty: float | None = Field(default=None, ge=-2.0, le=2.0)
presence_penalty: float | None = Field(default=None, ge=-2.0, le=2.0) presence_penalty: float | None = Field(default=None, ge=-2.0, le=2.0)
stop: str | list[str] | None = None stop: str | list[str] | None = None
response_format: ResponseFormat | None = None
class EmbeddingRequest(BaseModel): class EmbeddingRequest(BaseModel):

View file

@ -127,24 +127,16 @@ class OllamaProvider(LLMProvider):
# generateObject() helper. # generateObject() helper.
if request.response_format is not None: if request.response_format is not None:
rf = request.response_format rf = request.response_format
rf_type = getattr(rf, "type", None) or ( if rf.type == "json_object":
rf.get("type") if isinstance(rf, dict) else None payload["format"] = "json"
) elif rf.type == "json_schema" and rf.json_schema is not None:
if rf_type == "json_object": # rf.json_schema is the OpenAI envelope:
# {"name": "...", "schema": {...}, "strict": true}
# Ollama wants just the inner schema dict.
inner = rf.json_schema.get("schema")
payload["format"] = inner if inner is not None else "json"
else:
payload["format"] = "json" payload["format"] = "json"
elif rf_type == "json_schema":
rf_schema = (
getattr(rf, "json_schema", None)
or (rf.get("json_schema") if isinstance(rf, dict) else None)
)
if rf_schema is not None:
inner = (
getattr(rf_schema, "schema", None)
or (rf_schema.get("schema") if isinstance(rf_schema, dict) else None)
)
payload["format"] = inner if inner is not None else "json"
else:
payload["format"] = "json"
# Add optional parameters # Add optional parameters
options: dict[str, Any] = {} options: dict[str, Any] = {}