From 2497a65937099f7402b1163448f7dbc241649cc9 Mon Sep 17 00:00:00 2001
From: Till JS <tills95@gmail.com>
Date: Wed, 15 Apr 2026 14:37:15 +0200
Subject: [PATCH] feat(ai-missions): richer error surfacing + retry button on
 failed runs
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Replaces the single-line summary ("Planner failed: fetch …") with
full diagnostic detail: error name + message + last-active phase +
stack trace, all persisted onto the iteration itself. UI expands a
collapsed details block next to each failed iteration, so the user
can see *where* it broke ("TypeError in calling-llm") without opening
DevTools.

Paired with a one-click Retry button that re-runs the mission under
the same config — useful while debugging a flaky backend (GPU server
down, Gemini quota, etc.).

- `packages/shared-ai/src/missions/types.ts` — new
  `MissionIteration.errorDetails: { name, message, phase?, stack? }`
- `finishIteration` accepts the field, deep-clones it, and also now
  clears the transient phase markers (currentPhase/phaseStartedAt/
  phaseDetail/cancelRequested) whenever an iteration finalises — keeps
  the schema honest (phases are sub-state of \`running\` only).
- `runMission` tracks \`lastPhase\` via a new \`enterPhase\` helper that
  wraps setIterationPhase. The catch handler populates errorDetails
  with lastPhase + message + stack.
- ListView: \`<details>\` block under each failed iteration + Retry
  button (disabled while another run is in-flight).

77/77 webapp tests still green; svelte-check clean.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 .../web/src/lib/data/ai/missions/runner.ts    | 40 +++++----
 .../web/src/lib/data/ai/missions/store.ts     | 10 +++
 .../lib/modules/ai-missions/ListView.svelte   | 90 +++++++++++++++++++
 packages/shared-ai/src/missions/types.ts      | 14 +++
 4 files changed, 137 insertions(+), 17 deletions(-)
diff --git a/apps/mana/apps/web/src/lib/data/ai/missions/runner.ts b/apps/mana/apps/web/src/lib/data/ai/missions/runner.ts
index 822a44a32..fd6e52f4f 100644
--- a/apps/mana/apps/web/src/lib/data/ai/missions/runner.ts
+++ b/apps/mana/apps/web/src/lib/data/ai/missions/runner.ts
@@ -120,6 +120,18 @@ export async function runMission(
 		}
 	}
 
+	// Track the phase that was last active — so a catch handler can
+	// attribute the error ("calling-llm" vs "parsing-response" is
+	// enough context for most debugging without a stack trace).
+	let lastPhase: import('@mana/shared-ai').IterationPhase | undefined;
+	async function enterPhase(
+		phase: import('@mana/shared-ai').IterationPhase,
+		detail?: string
+	): Promise<void> {
+		lastPhase = phase;
+		await setIterationPhase(mission!.id, iterationId, phase, detail);
+	}
+
 	async function runPipeline(): Promise<{
 		recordedSteps: PlanStep[];
 		stagedCount: number;
@@ -128,9 +140,7 @@ export async function runMission(
 		planStepCount: number;
 	}> {
 		// ── Phase: resolving-inputs ────────────────────────────
-		await setIterationPhase(
-			mission!.id,
-			iterationId,
+		await enterPhase(
 			'resolving-inputs',
 			mission!.inputs.length > 0 ? `${mission!.inputs.length} Input(s)` : 'keine Inputs'
 		);
@@ -139,17 +149,12 @@ export async function runMission(
 		await checkCancel();
 
 		// ── Phase: calling-llm ─────────────────────────────────
-		await setIterationPhase(mission!.id, iterationId, 'calling-llm', 'frage Planner an');
+		await enterPhase('calling-llm', 'frage Planner an');
 		const plan = await deps.plan({ mission: mission!, resolvedInputs, availableTools });
 		await checkCancel();
 
 		// ── Phase: parsing-response ────────────────────────────
-		await setIterationPhase(
-			mission!.id,
-			iterationId,
-			'parsing-response',
-			`${plan.steps.length} Step(s) erhalten`
-		);
+		await enterPhase('parsing-response', `${plan.steps.length} Step(s) erhalten`);
 		await checkCancel();
 
 		// ── Phase: staging-proposals ───────────────────────────
@@ -159,12 +164,7 @@ export async function runMission(
 		let failedCount = 0;
 
 		for (const [i, ps] of plan.steps.entries()) {
-			await setIterationPhase(
-				mission!.id,
-				iterationId,
-				'staging-proposals',
-				`Step ${i + 1} von ${plan.steps.length}`
-			);
+			await enterPhase('staging-proposals', `Step ${i + 1} von ${plan.steps.length}`);
 			await checkCancel();
 
 			const outcome = await stage(ps, aiActor);
@@ -188,7 +188,7 @@ export async function runMission(
 			}
 		}
 
-		await setIterationPhase(mission!.id, iterationId, 'finalizing');
+		await enterPhase('finalizing');
 		return {
 			recordedSteps,
 			stagedCount,
@@ -216,6 +216,12 @@ export async function runMission(
 		await finishIteration(mission.id, iterationId, {
 			summary: isCancellation ? msg : `Planner failed: ${msg}`,
 			overallStatus: 'failed',
+			errorDetails: {
+				name: err instanceof Error ? err.name : 'UnknownError',
+				message: msg,
+				phase: lastPhase,
+				stack: err instanceof Error ? err.stack : undefined,
+			},
 		});
 		return emptyResult(mission, iterationId, 'failed', msg);
 	}
diff --git a/apps/mana/apps/web/src/lib/data/ai/missions/store.ts b/apps/mana/apps/web/src/lib/data/ai/missions/store.ts
index c71e2d92b..aeb9eaef5 100644
--- a/apps/mana/apps/web/src/lib/data/ai/missions/store.ts
+++ b/apps/mana/apps/web/src/lib/data/ai/missions/store.ts
@@ -276,6 +276,8 @@ export interface FinishIterationInput {
 	overallStatus: MissionIteration['overallStatus'];
 	/** Replace the plan with the post-run state (steps with proposal ids / final statuses). */
 	plan?: PlanStep[];
+	/** Diagnostic detail for failed iterations — surfaced in the UI. */
+	errorDetails?: MissionIteration['errorDetails'];
 }
 
 export async function finishIteration(
@@ -292,8 +294,16 @@ export async function finishIteration(
 					...it,
 					finishedAt: new Date().toISOString(),
 					overallStatus: input.overallStatus,
+					// Clear in-flight phase markers — the iteration has finalised.
+					currentPhase: undefined,
+					phaseStartedAt: undefined,
+					phaseDetail: undefined,
+					cancelRequested: undefined,
 					...(input.summary !== undefined ? { summary: input.summary } : {}),
 					...(input.plan !== undefined ? { plan: deepClone(input.plan) } : {}),
+					...(input.errorDetails !== undefined
+						? { errorDetails: deepClone(input.errorDetails) }
+						: {}),
 				}
 			: it
 	);
diff --git a/apps/mana/apps/web/src/lib/modules/ai-missions/ListView.svelte b/apps/mana/apps/web/src/lib/modules/ai-missions/ListView.svelte
index 6dd48e64e..9890843f9 100644
--- a/apps/mana/apps/web/src/lib/modules/ai-missions/ListView.svelte
+++ b/apps/mana/apps/web/src/lib/modules/ai-missions/ListView.svelte
@@ -416,6 +416,34 @@
 					{/if}
 
 					{#if it.summary}<p class="it-summary">{it.summary}</p>{/if}
+
+					{#if it.overallStatus === 'failed' && it.errorDetails}
+						<details class="err-details">
+							<summary>
+								<span class="err-name">{it.errorDetails.name}</span>
+								{#if it.errorDetails.phase}
+									<span class="err-phase"
+										>in {PHASE_LABELS[it.errorDetails.phase] ?? it.errorDetails.phase}</span
+									>
+								{/if}
+							</summary>
+							<p class="err-message">{it.errorDetails.message}</p>
+							{#if it.errorDetails.stack}
+								<pre class="err-stack">{it.errorDetails.stack}</pre>
+							{/if}
+						</details>
+						<div class="retry-row">
+							<button
+								type="button"
+								class="retry-btn"
+								disabled={runningNow}
+								onclick={() => handleRunNow(selected)}
+							>
+								{runningNow ? 'Läuft…' : '↻ Erneut versuchen'}
+							</button>
+						</div>
+					{/if}
+
 					{#if it.userFeedback}
 						<blockquote class="fb">{it.userFeedback}</blockquote>
 					{:else if it.overallStatus === 'awaiting-review'}
@@ -743,6 +771,68 @@
 		opacity: 0.5;
 		cursor: not-allowed;
 	}
+	.err-details {
+		margin-top: 0.375rem;
+		border: 1px solid #f7d7d7;
+		border-radius: 0.375rem;
+		padding: 0.375rem 0.5rem;
+		background: color-mix(in oklab, #8a1b1b 4%, transparent);
+		font-size: 0.8125rem;
+	}
+	.err-details summary {
+		cursor: pointer;
+		display: flex;
+		gap: 0.375rem;
+		align-items: center;
+	}
+	.err-name {
+		font-family: var(--font-mono, ui-monospace, monospace);
+		font-weight: 600;
+		color: #8a1b1b;
+	}
+	.err-phase {
+		color: hsl(var(--color-muted-foreground));
+		font-size: 0.75rem;
+	}
+	.err-message {
+		margin: 0.375rem 0 0;
+		color: #6a1515;
+		word-break: break-word;
+	}
+	.err-stack {
+		margin: 0.375rem 0 0;
+		padding: 0.375rem 0.5rem;
+		background: hsl(var(--color-surface));
+		border-radius: 0.25rem;
+		font-family: var(--font-mono, ui-monospace, monospace);
+		font-size: 0.6875rem;
+		max-height: 10rem;
+		overflow: auto;
+		white-space: pre-wrap;
+		color: hsl(var(--color-muted-foreground));
+	}
+	.retry-row {
+		display: flex;
+		justify-content: flex-end;
+		margin-top: 0.375rem;
+	}
+	.retry-btn {
+		display: inline-flex;
+		align-items: center;
+		gap: 0.25rem;
+		padding: 0.25rem 0.625rem;
+		border: 1px solid color-mix(in oklab, hsl(var(--color-primary)) 45%, transparent);
+		border-radius: 0.25rem;
+		background: color-mix(in oklab, hsl(var(--color-primary)) 12%, hsl(var(--color-surface)));
+		color: hsl(var(--color-primary));
+		cursor: pointer;
+		font: inherit;
+		font-size: 0.75rem;
+	}
+	.retry-btn:disabled {
+		opacity: 0.5;
+		cursor: not-allowed;
+	}
 	.it-summary {
 		margin: 0 0 0.375rem;
 		font-size: 0.8125rem;
diff --git a/packages/shared-ai/src/missions/types.ts b/packages/shared-ai/src/missions/types.ts
index fc151b7d0..90a8f31ae 100644
--- a/packages/shared-ai/src/missions/types.ts
+++ b/packages/shared-ai/src/missions/types.ts
@@ -71,6 +71,20 @@ export interface MissionIteration {
 	 * pre-server iterations.
 	 */
 	readonly source?: 'browser' | 'server';
+	/**
+	 * Full diagnostic detail for failed iterations. Populated when the
+	 * runner catches an error; omitted on success / cancel.
+	 *
+	 * `phase` is the last phase the iteration was in before failing —
+	 * usually enough to diagnose without a stack trace ("timeout in
+	 * calling-llm" is already actionable).
+	 */
+	readonly errorDetails?: {
+		readonly name: string;
+		readonly message: string;
+		readonly phase?: IterationPhase;
+		readonly stack?: string;
+	};
 	/** Sub-status while `overallStatus === 'running'`. Undefined otherwise. */
 	readonly currentPhase?: IterationPhase;
 	/** When the runner advanced into the current phase — for elapsed-in-phase. */