diff --git a/services/mana-llm/src/providers/ollama.py b/services/mana-llm/src/providers/ollama.py
index a0aed64bc..5e8c5262f 100644
--- a/services/mana-llm/src/providers/ollama.py
+++ b/services/mana-llm/src/providers/ollama.py
@@ -28,6 +28,27 @@ from .base import LLMProvider
 logger = logging.getLogger(__name__)
 
 
+def _strip_json_fences(content: str) -> str:
+    """Strip ```json ... ``` markdown fences from a string if present.
+
+    Some Ollama vision models still wrap structured-output responses in
+    a markdown code block even when `format` is set. Downstream parsers
+    (Vercel AI SDK generateObject, manual JSON.parse) expect clean JSON,
+    so we normalize the response here at the proxy boundary.
+    """
+    s = content.strip()
+    if s.startswith("```"):
+        # Drop the opening fence (```json or ``` plus any language tag)
+        first_newline = s.find("\n")
+        if first_newline != -1:
+            s = s[first_newline + 1 :]
+        # Drop the closing fence
+        if s.endswith("```"):
+            s = s[:-3]
+        s = s.strip()
+    return s
+
+
 class OllamaProvider(LLMProvider):
     """Ollama LLM provider."""
 
@@ -94,6 +115,37 @@ class OllamaProvider(LLMProvider):
             "stream": False,
         }
 
+        # Pass through structured-output requests to Ollama's native
+        # `format` field. Ollama supports either `"json"` (free-form
+        # JSON object) or a full JSON schema dict. The OpenAI-style
+        # response_format the consumer sends maps as follows:
+        #   - {"type": "json_object"}            → "json"
+        #   - {"type": "json_schema", "json_schema": {"schema": {...}}}
+        #     → the schema dict (Ollama 0.5+ supports full schemas)
+        # Without this, Ollama wraps JSON in ```json ... ``` markdown
+        # fences, which breaks downstream strict parsers like the AI SDK
+        # generateObject() helper.
+        if request.response_format is not None:
+            rf = request.response_format
+            rf_type = getattr(rf, "type", None) or (
+                rf.get("type") if isinstance(rf, dict) else None
+            )
+            if rf_type == "json_object":
+                payload["format"] = "json"
+            elif rf_type == "json_schema":
+                rf_schema = (
+                    getattr(rf, "json_schema", None)
+                    or (rf.get("json_schema") if isinstance(rf, dict) else None)
+                )
+                if rf_schema is not None:
+                    inner = (
+                        getattr(rf_schema, "schema", None)
+                        or (rf_schema.get("schema") if isinstance(rf_schema, dict) else None)
+                    )
+                    payload["format"] = inner if inner is not None else "json"
+                else:
+                    payload["format"] = "json"
+
         # Add optional parameters
         options: dict[str, Any] = {}
         if request.temperature is not None:
@@ -114,11 +166,16 @@ class OllamaProvider(LLMProvider):
         response.raise_for_status()
         data = response.json()
 
+        # Defensive fence-stripping: even with `format` set, some older
+        # Ollama versions still emit ```json ... ``` wrappers for vision
+        # models. Strip them so strict downstream parsers see clean JSON.
+        content = _strip_json_fences(data["message"]["content"])
+
         return ChatCompletionResponse(
             model=f"ollama/{model}",
             choices=[
                 Choice(
-                    message=MessageResponse(content=data["message"]["content"]),
+                    message=MessageResponse(content=content),
                     finish_reason="stop" if data.get("done") else None,
                 )
             ],