diff --git a/packages/shared-llm/src/backends/remote.ts b/packages/shared-llm/src/backends/remote.ts
index d6f97dfe8..80385de78 100644
--- a/packages/shared-llm/src/backends/remote.ts
+++ b/packages/shared-llm/src/backends/remote.ts
@@ -104,7 +104,7 @@ export async function callManaLlmStreaming(
 
 	let json: {
 		choices?: Array<{
-			message?: { content?: string };
+			message?: { content?: string; reasoning?: string };
 			text?: string;
 		}>;
 		usage?: { prompt_tokens?: number; completion_tokens?: number };
@@ -116,8 +116,17 @@ export async function callManaLlmStreaming(
 		throw new BackendUnreachableError(tier, res.status, 'invalid JSON response');
 	}
 
+	// Field ordering: prefer the canonical OpenAI `message.content` first.
+	// If that's empty AND `message.reasoning` is set, fall back to it —
+	// reasoning models like Gemma 4 emit their thought process there
+	// when given too few tokens to also produce a final answer (we hit
+	// this with max_tokens=10 / no system prompt: content was "" while
+	// reasoning had the half-finished thought). For our title task this
+	// rarely happens because the system prompt is directive, but the
+	// fallback is cheap and protects against future tasks that might
+	// trigger longer reasoning chains.
 	const choice = json.choices?.[0];
-	const content = choice?.message?.content ?? choice?.text ?? '';
+	const content = choice?.message?.content ?? choice?.message?.reasoning ?? choice?.text ?? '';
 
 	if (!content) {
 		console.warn(`[shared-llm:${tier}] empty completion content`, { model, json });