feat(ai): per-iteration debug log — capture prompt + response + inputs

New local-only Dexie table _aiDebugLog (v20, never synced) holds one
row per mission iteration with the full system+user prompt, raw LLM
response, latency, every ResolvedInput the planner saw, and pre-step
state (kontext-injected? web-research-ok-or-error?). Capped at 50
newest rows.

aiPlanTask always returns the captured prompt/response on AiPlanOutput.
debug; the runner persists it only when isAiDebugEnabled() — toggled
via a checkbox in the Mission detail header (defaults to on in DEV
builds, off in prod, override via localStorage 'mana.ai.debug').

New <AiDebugBlock> component renders below each iteration card:
expandable sections for Pre-Step, Resolved Inputs (each input
individually collapsible), System Prompt, User Prompt, Raw Response,
plus a "📋 JSON" copy-to-clipboard button for bug reports.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Till JS 2026-04-15 20:33:17 +02:00
parent 6e842a83c9
commit d5c351d63e
7 changed files with 467 additions and 16 deletions

View file

@ -0,0 +1,206 @@
<!--
AiDebugBlock — expandable inspector for one mission iteration's
captured prompt/response/inputs.
Renders zero UI when no debug entry exists for the iteration (which is
the normal case in production — debug capture is opt-in via the
`mana.ai.debug` localStorage flag).
-->
<script lang="ts">
import { useAiDebugForIteration } from '$lib/data/ai/missions/debug';
interface Props {
iterationId: string;
}
let { iterationId }: Props = $props();
const debug = $derived(useAiDebugForIteration(iterationId));
let copied = $state(false);
async function copyAsJson() {
if (!debug.value) return;
try {
await navigator.clipboard.writeText(JSON.stringify(debug.value, null, 2));
copied = true;
setTimeout(() => (copied = false), 1500);
} catch (err) {
console.warn('[AiDebugBlock] clipboard write failed:', err);
}
}
</script>
{#if debug.value}
{@const d = debug.value}
<details class="debug-block">
<summary>
<span class="dbg-tag">🔍 Debug</span>
<span class="dbg-meta">
{d.resolvedInputs.length} Input(s)
{#if d.preStep.kontextInjected}
· Kontext{/if}
{#if d.preStep.webResearch?.ok}
· Web {d.preStep.webResearch.sourceCount}q
{:else if d.preStep.webResearch && !d.preStep.webResearch.ok}
· Web ❌
{/if}
{#if d.planner}· {Math.round(d.planner.latencyMs)}ms{/if}
{#if d.plannerError}· Planner ❌{/if}
</span>
<button
type="button"
class="copy-btn"
onclick={(e) => (e.preventDefault(), copyAsJson())}
title="Als JSON in Zwischenablage kopieren"
>
{copied ? '✓ Kopiert' : '📋 JSON'}
</button>
</summary>
{#if d.preStep.webResearch}
<section>
<h5>Pre-Step: Web-Recherche</h5>
{#if d.preStep.webResearch.ok}
<p class="ok">{d.preStep.webResearch.sourceCount} Quellen.</p>
<pre>{d.preStep.webResearch.summary}</pre>
{:else}
<p class="err">FEHLER: {d.preStep.webResearch.error}</p>
{/if}
</section>
{/if}
<section>
<h5>Resolved Inputs ({d.resolvedInputs.length})</h5>
{#if d.resolvedInputs.length === 0}
<p class="muted">— keine —</p>
{:else}
{#each d.resolvedInputs as inp (inp.id)}
<details class="nested">
<summary>
<code>{inp.module}/{inp.table}</code>
{inp.title ?? inp.id}
</summary>
<pre>{inp.content}</pre>
</details>
{/each}
{/if}
</section>
{#if d.planner}
<section>
<h5>System Prompt</h5>
<pre>{d.planner.systemPrompt}</pre>
</section>
<section>
<h5>User Prompt</h5>
<pre>{d.planner.userPrompt}</pre>
</section>
<section>
<h5>Raw LLM Response</h5>
<pre>{d.planner.rawResponse}</pre>
</section>
{/if}
{#if d.plannerError}
<section>
<h5>Planner Error</h5>
<p class="err">{d.plannerError}</p>
</section>
{/if}
</details>
{/if}
<style>
.debug-block {
margin-top: 0.5rem;
border: 1px dashed color-mix(in oklab, hsl(var(--color-border)) 80%, transparent);
border-radius: 0.375rem;
font-size: 0.75rem;
}
.debug-block > summary {
display: flex;
align-items: center;
gap: 0.5rem;
padding: 0.375rem 0.5rem;
cursor: pointer;
color: hsl(var(--color-muted-foreground));
list-style: none;
}
.debug-block > summary::-webkit-details-marker {
display: none;
}
.dbg-tag {
font-weight: 600;
}
.dbg-meta {
flex: 1;
font-family: var(--font-mono, ui-monospace, monospace);
font-size: 0.6875rem;
}
.copy-btn {
padding: 0.125rem 0.5rem;
border: 1px solid hsl(var(--color-border));
border-radius: 0.25rem;
background: hsl(var(--color-surface));
color: hsl(var(--color-foreground));
font: inherit;
font-size: 0.6875rem;
cursor: pointer;
}
.copy-btn:hover {
border-color: hsl(var(--color-primary));
}
section {
padding: 0.375rem 0.625rem 0.625rem;
border-top: 1px solid hsl(var(--color-border));
}
h5 {
margin: 0 0 0.25rem;
font-size: 0.6875rem;
font-weight: 600;
color: hsl(var(--color-muted-foreground));
text-transform: uppercase;
letter-spacing: 0.05em;
}
pre {
margin: 0.25rem 0 0;
padding: 0.5rem;
max-height: 14rem;
overflow: auto;
border-radius: 0.25rem;
background: color-mix(in oklab, hsl(var(--color-foreground)) 5%, transparent);
font-family: var(--font-mono, ui-monospace, monospace);
font-size: 0.6875rem;
line-height: 1.45;
white-space: pre-wrap;
word-break: break-word;
}
.nested {
margin: 0.25rem 0;
}
.nested > summary {
cursor: pointer;
padding: 0.125rem 0;
}
.nested code {
margin-right: 0.375rem;
padding: 0 0.25rem;
border-radius: 0.25rem;
background: color-mix(in oklab, hsl(var(--color-primary)) 12%, transparent);
font-size: 0.625rem;
}
.ok {
margin: 0.25rem 0;
color: #16a34a;
}
.err {
margin: 0.25rem 0;
color: #dc2626;
font-weight: 500;
}
.muted {
margin: 0.25rem 0;
color: hsl(var(--color-muted-foreground));
font-style: italic;
}
</style>

View file

@ -0,0 +1,110 @@
/**
* AI Mission debug log per-iteration capture of what the planner saw
* and what it returned, for debugging / prompt iteration.
*
* Local-only (Dexie table `_aiDebugLog`, never synced) because the
* captured prompt contains the user's resolved inputs, which include
* decrypted note bodies and goal text. Sending those to the server
* would defeat the at-rest encryption.
*
* Toggled via localStorage flag `mana.ai.debug` ('1' enables). Defaults
* to enabled in DEV builds and disabled in production. Capped at
* MAX_ENTRIES newest rows; the writer trims older ones on every insert.
*/
import { useLiveQueryWithDefault } from '@mana/local-store/svelte';
import { db } from '../../database';
import type { ResolvedInput } from './planner/types';
const TABLE = '_aiDebugLog';
const STORAGE_KEY = 'mana.ai.debug';
const MAX_ENTRIES = 50;
/**
* Captured by `aiPlanTask` and passed back via the planner output so the
* runner can record it without the planner needing to know about Dexie.
*/
export interface PlannerCallDebug {
readonly systemPrompt: string;
readonly userPrompt: string;
readonly rawResponse: string;
readonly latencyMs: number;
readonly backendId?: string;
readonly model?: string;
}
export interface AiDebugEntry {
/** Primary key — one row per iteration. */
iterationId: string;
missionId: string;
missionTitle: string;
missionObjective: string;
capturedAt: string;
resolvedInputs: ResolvedInput[];
preStep: {
webResearch?: { ok: true; sourceCount: number; summary: string } | { ok: false; error: string };
kontextInjected: boolean;
};
planner?: PlannerCallDebug;
plannerError?: string;
}
/** True when the user has opted in to debug capture. */
export function isAiDebugEnabled(): boolean {
if (typeof localStorage === 'undefined') return false;
const raw = localStorage.getItem(STORAGE_KEY);
if (raw === '1') return true;
if (raw === '0') return false;
// Default: on in dev builds, off in prod.
try {
return Boolean(import.meta.env?.DEV);
} catch {
return false;
}
}
export function setAiDebugEnabled(enabled: boolean): void {
if (typeof localStorage === 'undefined') return;
localStorage.setItem(STORAGE_KEY, enabled ? '1' : '0');
}
/** Persist one debug entry + trim oldest if over cap. Idempotent on
* iterationId re-running an iteration overwrites the prior capture. */
export async function recordAiDebug(entry: AiDebugEntry): Promise<void> {
try {
await db.table<AiDebugEntry>(TABLE).put(entry);
const total = await db.table<AiDebugEntry>(TABLE).count();
if (total > MAX_ENTRIES) {
const overflow = total - MAX_ENTRIES;
const oldest = await db
.table<AiDebugEntry>(TABLE)
.orderBy('capturedAt')
.limit(overflow)
.primaryKeys();
if (oldest.length) {
await db.table<AiDebugEntry>(TABLE).bulkDelete(oldest);
}
}
} catch (err) {
console.warn('[AiDebug] persist failed:', err);
}
}
export async function getAiDebugForIteration(
iterationId: string
): Promise<AiDebugEntry | undefined> {
return db.table<AiDebugEntry>(TABLE).get(iterationId);
}
/** Reactive Svelte 5 query returns the debug entry for an iteration
* or `null` while loading / when none exists yet. */
export function useAiDebugForIteration(iterationId: string | null) {
return useLiveQueryWithDefault(
async () => {
if (!iterationId) return null;
const row = await db.table<AiDebugEntry>(TABLE).get(iterationId);
return row ?? null;
},
null as AiDebugEntry | null
);
}

View file

@ -32,7 +32,10 @@ import { executeTool } from '../../tools/executor';
import { db } from '../../database';
import { decryptRecords } from '../../crypto';
import { researchApi } from '$lib/api/research';
import { isAiDebugEnabled, recordAiDebug, type AiDebugEntry } from './debug';
import { makeAgentActor, LEGACY_AI_PRINCIPAL, type Actor } from '../../events/actor';
import { getAgent } from '../agents/store';
import { DEFAULT_AGENT_NAME } from '../agents/types';
import type { Mission, MissionIteration, PlanStep } from './types';
import type { AiPlanInput, AiPlanOutput, PlannedStep, ResolvedInput } from './planner/types';
@ -109,12 +112,15 @@ export async function runMission(
// Use the id the store generates so finishIteration updates the same row.
const startedIteration = await startIteration(mission.id, { plan: [] });
const iterationId = startedIteration.id;
// Phase 1: agent identity not yet wired (Phase 2 will). Use the
// legacy AI principal so every write is still identity-aware; the
// Phase-2 migration will rewrite these to a real agentId.
// Resolve the owning agent. Missions that pre-date the Multi-Agent
// rollout or whose agent was deleted fall back to the legacy
// principal + default name — runner still attributes cleanly, UI
// renders the work as "Mana".
const owningAgent = mission.agentId ? await getAgent(mission.agentId) : null;
const aiActor = makeAgentActor({
agentId: LEGACY_AI_PRINCIPAL,
displayName: 'Mana',
agentId: owningAgent?.id ?? LEGACY_AI_PRINCIPAL,
displayName: owningAgent?.name ?? DEFAULT_AGENT_NAME,
missionId: mission.id,
iterationId,
rationale: mission.objective,
@ -161,6 +167,7 @@ export async function runMission(
);
const baseInputs = await resolveMissionInputs(mission!.inputs);
const resolvedInputs: ResolvedInput[] = [...baseInputs];
const preStep: AiDebugEntry['preStep'] = { kontextInjected: false };
// Auto-inject the kontext singleton (if non-empty and not already
// linked) so every mission has the user's standing context as
@ -168,7 +175,10 @@ export async function runMission(
const alreadyHasKontext = mission!.inputs.some((i) => i.module === 'kontext');
if (!alreadyHasKontext) {
const kontextEntry = await loadKontextAsResolvedInput();
if (kontextEntry) resolvedInputs.push(kontextEntry);
if (kontextEntry) {
resolvedInputs.push(kontextEntry);
preStep.kontextInjected = true;
}
}
// Pre-step web research: if the objective looks like research,
@ -180,12 +190,20 @@ export async function runMission(
if (RESEARCH_TRIGGER.test(mission!.objective)) {
await enterPhase('resolving-inputs', 'Web-Recherche…');
try {
const researchEntry = await runWebResearch(mission!);
if (researchEntry) resolvedInputs.push(researchEntry);
const research = await runWebResearch(mission!);
if (research) {
resolvedInputs.push(research.input);
preStep.webResearch = {
ok: true,
sourceCount: research.sourceCount,
summary: research.summary,
};
}
} catch (err) {
const msg = err instanceof Error ? err.message : String(err);
console.warn('[MissionRunner] web-research pre-step failed:', err);
await enterPhase('resolving-inputs', `Web-Recherche fehlgeschlagen: ${msg.slice(0, 80)}`);
preStep.webResearch = { ok: false, error: msg };
resolvedInputs.push({
id: 'web-research-error',
module: 'research',
@ -209,9 +227,42 @@ export async function runMission(
// ── Phase: calling-llm ─────────────────────────────────
await enterPhase('calling-llm', 'frage Planner an');
const plan = await deps.plan({ mission: mission!, resolvedInputs, availableTools });
let plan: AiPlanOutput;
try {
plan = await deps.plan({ mission: mission!, resolvedInputs, availableTools });
} catch (err) {
// Capture even the failure for debug visibility before re-throwing.
if (isAiDebugEnabled()) {
void recordAiDebug({
iterationId,
missionId: mission!.id,
missionTitle: mission!.title,
missionObjective: mission!.objective,
capturedAt: new Date().toISOString(),
resolvedInputs,
preStep,
plannerError: err instanceof Error ? err.message : String(err),
});
}
throw err;
}
await checkCancel();
// Persist debug capture if enabled. Off by default in production
// (toggle via Settings or `localStorage.setItem('mana.ai.debug','1')`).
if (isAiDebugEnabled()) {
void recordAiDebug({
iterationId,
missionId: mission!.id,
missionTitle: mission!.title,
missionObjective: mission!.objective,
capturedAt: new Date().toISOString(),
resolvedInputs,
preStep,
planner: plan.debug,
});
}
// ── Phase: parsing-response ────────────────────────────
await enterPhase('parsing-response', `${plan.steps.length} Step(s) erhalten`);
await checkCancel();
@ -360,7 +411,13 @@ async function loadKontextAsResolvedInput(): Promise<ResolvedInput | null> {
/** Run the deep-research pipeline against the mission objective and
* collapse its summary + sources into one ResolvedInput formatted so
* the planner can copy URLs into save_news_article calls. */
async function runWebResearch(mission: Mission): Promise<ResolvedInput | null> {
interface WebResearchOutcome {
input: ResolvedInput;
sourceCount: number;
summary: string;
}
async function runWebResearch(mission: Mission): Promise<WebResearchOutcome | null> {
const result = await researchApi.startSync({
// Tag the run with the mission id so backend logs can correlate.
questionId: `mission:${mission.id}`,
@ -387,11 +444,15 @@ async function runWebResearch(mission: Mission): Promise<ResolvedInput | null> {
].join('\n');
return {
id: result.id,
module: 'research',
table: 'researchResults',
title: 'Web-Recherche zu diesem Auftrag',
content,
input: {
id: result.id,
module: 'research',
table: 'researchResults',
title: 'Web-Recherche zu diesem Auftrag',
content,
},
sourceCount: sources.length,
summary: result.summary,
};
}

View file

@ -516,6 +516,26 @@ db.version(18).stores({
aiMissions: 'id, state, createdAt, nextRunAt, [state+nextRunAt]',
});
// v19 — AI Agents: named personas that own Missions, carry policy +
// memory, and show up as identities in the Workbench timeline.
// Syncs cross-device so the same agent exists everywhere. Name
// uniqueness is enforced at write time in the store (Dexie's unique
// index would error on the default-agent-backfill race between two
// tabs). See docs/plans/multi-agent-workbench.md §Phase 2b.
db.version(19).stores({
agents: 'id, state, createdAt, name, [state+name]',
});
// v20 — AI Debug Log: per-iteration capture of the prompt sent to the
// planner LLM, the raw response, the resolved-inputs the planner saw,
// and any pre-step output (e.g. web-research). LOCAL-ONLY, never synced
// (would leak personal context through mana-sync) — that is enforced by
// keeping it out of every module's SYNC_APP_MAP. Capped to ~50 newest
// rows by the writer so a long-running tab doesn't bloat IndexedDB.
db.version(20).stores({
_aiDebugLog: 'iterationId, capturedAt',
});
// ─── Sync Routing ──────────────────────────────────────────
// SYNC_APP_MAP, TABLE_TO_SYNC_NAME, TABLE_TO_APP, SYNC_NAME_TO_TABLE,
// toSyncName() and fromSyncName() are now derived from per-module

View file

@ -48,6 +48,16 @@ export const aiPlanTask: LlmTask<AiPlanInput, AiPlanOutput> = {
maxTokens: 1024,
});
// Always populate debug payload (cheap — strings already in memory).
// The runner decides whether to persist it based on the user's
// localStorage `mana.ai.debug` toggle.
const debug = {
systemPrompt: system,
userPrompt: user,
rawResponse: result.content,
latencyMs: result.latencyMs,
};
const knownToolNames = new Set(input.availableTools.map((t) => t.name));
const parsed = parsePlannerResponse(result.content, knownToolNames);
@ -55,8 +65,9 @@ export const aiPlanTask: LlmTask<AiPlanInput, AiPlanOutput> = {
return {
steps: [],
summary: `Plan konnte nicht erzeugt werden: ${parsed.reason}`,
debug,
};
}
return parsed.value;
return { ...parsed.value, debug };
},
};

View file

@ -21,6 +21,8 @@
import { productionDeps } from '$lib/data/ai/missions/setup';
import MissionInputPicker from '$lib/components/ai/MissionInputPicker.svelte';
import MissionGrantDialog from '$lib/components/ai/MissionGrantDialog.svelte';
import AiDebugBlock from '$lib/components/ai/AiDebugBlock.svelte';
import { isAiDebugEnabled, setAiDebugEnabled } from '$lib/data/ai/missions/debug';
import { isMissionGrantsEnabled } from '$lib/api/config';
import type { Mission, MissionCadence, MissionInputRef } from '$lib/data/ai/missions/types';
@ -28,6 +30,12 @@
let mode = $state<'list' | 'create' | 'detail'>('list');
let selectedId = $state<string | null>(null);
let debugEnabled = $state(isAiDebugEnabled());
function toggleDebug() {
debugEnabled = !debugEnabled;
setAiDebugEnabled(debugEnabled);
}
const selected = $derived(
selectedId ? (missions.value.find((m) => m.id === selectedId) ?? null) : null
);
@ -294,6 +302,10 @@
<button type="button" onclick={() => handleRunNow(selected)} disabled={runningNow}>
<Play size={12} /><span>{runningNow ? 'Läuft…' : 'Jetzt ausführen'}</span>
</button>
<label class="debug-toggle" title="Erfasst Prompts + Responses lokal pro Iteration">
<input type="checkbox" checked={debugEnabled} onchange={toggleDebug} />
<span>🔍 Debug</span>
</label>
{#if selected.state === 'active'}
<button type="button" onclick={() => pauseMission(selected.id)}>
<Pause size={12} /><span>Pause</span>
@ -444,6 +456,8 @@
</div>
{/if}
<AiDebugBlock iterationId={it.id} />
{#if it.userFeedback}
<blockquote class="fb">{it.userFeedback}</blockquote>
{:else if it.overallStatus === 'awaiting-review'}
@ -637,6 +651,22 @@
.detail-actions button.danger {
color: hsl(var(--color-error));
}
.debug-toggle {
display: inline-flex;
align-items: center;
gap: 0.25rem;
margin-left: auto;
padding: 0.25rem 0.5rem;
border: 1px solid hsl(var(--color-border));
border-radius: 0.375rem;
font-size: 0.75rem;
color: hsl(var(--color-muted-foreground));
cursor: pointer;
user-select: none;
}
.debug-toggle input {
margin: 0;
}
.meta {
display: grid;
grid-template-columns: auto 1fr;

View file

@ -42,4 +42,17 @@ export interface PlannedStep {
export interface AiPlanOutput {
readonly steps: readonly PlannedStep[];
readonly summary: string;
/**
* Optional capture of the prompt + raw response, populated by the
* planner implementation when AI Debug is enabled. The runner reads
* this and persists it locally never synced.
*/
readonly debug?: {
readonly systemPrompt: string;
readonly userPrompt: string;
readonly rawResponse: string;
readonly latencyMs: number;
readonly backendId?: string;
readonly model?: string;
};
}