mirror of
https://github.com/Memo-2023/mana-monorepo.git
synced 2026-05-14 22:01:09 +02:00
feat(ai): reasoning loop — agent chains auto tools before asking for approval
The runner was one-shot: one planner call per iteration, no feedback
from tool outputs. "Lies alle Notizen und tagge sie" needed two manual
runs (list_notes, then tagging) because the planner never saw the
list_tasks output.
Now runMission loops up to MAX_REASONING_LOOP_ITERATIONS (5):
loop: plan → classify steps by policy
│
├─ auto → execute inline, capture {message, data}, feed back
│ as a synthetic ResolvedInput for the next planner call
│
├─ propose → stage proposal, mark humanInLoop, EXIT after this round
│ (human has to approve before we plan further — we don't
│ know what they'll accept yet)
│
└─ none/0-steps → agent considers the task done, EXIT
Tool outputs become a ResolvedInput titled "Zwischenergebnisse (Runde N)"
so the planner sees them structured and labelled. StageOutcome gains
`autoData` + `autoMessage` so the loop can thread the executor's
payload back through without a second call.
AiDebugEntry now holds `plannerCalls[]` and `loopSteps[]` instead of a
single planner snapshot — so Debug-Panel shows every LLM round + every
auto-tool output, each collapsible. Summary chip shows "3× LLM · 4200ms
· 2× Auto-Tool" when a loop ran.
Side-effects for existing use cases:
- One-shot missions (single propose tool) behave identically (loop
exits after round 1 with humanInLoop=true).
- "Tag all notes" missions now finish in a single run: loop iter 1
runs list_notes auto, iter 2 stages N add_tag_to_note proposals,
exits.
- Server-side mana-ai runner NOT touched — this is foreground-only
for now; the server still runs one plan/tick.
All 8 runner.test.ts tests pass unchanged (the existing test suite
only exercises the single-step path, which is a subset of the loop).
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
e440f13867
commit
8299bf004d
3 changed files with 264 additions and 78 deletions
|
|
@ -44,7 +44,14 @@
|
|||
{:else if d.preStep.webResearch && !d.preStep.webResearch.ok}
|
||||
· Web ❌
|
||||
{/if}
|
||||
{#if d.planner}· {Math.round(d.planner.latencyMs)}ms{/if}
|
||||
{#if d.plannerCalls && d.plannerCalls.length > 0}
|
||||
· {d.plannerCalls.length}× LLM · {Math.round(
|
||||
d.plannerCalls.reduce((a, c) => a + c.latencyMs, 0)
|
||||
)}ms
|
||||
{/if}
|
||||
{#if d.loopSteps && d.loopSteps.length > 0}
|
||||
· {d.loopSteps.length}× Auto-Tool
|
||||
{/if}
|
||||
{#if d.plannerError}· Planner ❌{/if}
|
||||
</span>
|
||||
<button
|
||||
|
|
@ -86,21 +93,41 @@
|
|||
{/if}
|
||||
</section>
|
||||
|
||||
{#if d.planner}
|
||||
{#if d.loopSteps && d.loopSteps.length > 0}
|
||||
<section>
|
||||
<h5>System Prompt</h5>
|
||||
<pre>{d.planner.systemPrompt}</pre>
|
||||
</section>
|
||||
<section>
|
||||
<h5>User Prompt</h5>
|
||||
<pre>{d.planner.userPrompt}</pre>
|
||||
</section>
|
||||
<section>
|
||||
<h5>Raw LLM Response</h5>
|
||||
<pre>{d.planner.rawResponse}</pre>
|
||||
<h5>Auto-Tool-Ausgaben (Reasoning-Loop)</h5>
|
||||
{#each d.loopSteps as ls, i (i)}
|
||||
<details class="nested">
|
||||
<summary>
|
||||
<code>Runde {ls.loopIndex + 1}</code>
|
||||
{ls.toolName}({JSON.stringify(ls.params)})
|
||||
</summary>
|
||||
<pre>{ls.outputPreview}</pre>
|
||||
</details>
|
||||
{/each}
|
||||
</section>
|
||||
{/if}
|
||||
|
||||
{#if d.plannerCalls && d.plannerCalls.length > 0}
|
||||
{#each d.plannerCalls as call, i (i)}
|
||||
<section>
|
||||
<h5>LLM-Call {i + 1}/{d.plannerCalls.length} · {Math.round(call.latencyMs)}ms</h5>
|
||||
<details class="nested">
|
||||
<summary>System Prompt</summary>
|
||||
<pre>{call.systemPrompt}</pre>
|
||||
</details>
|
||||
<details class="nested" open>
|
||||
<summary>User Prompt</summary>
|
||||
<pre>{call.userPrompt}</pre>
|
||||
</details>
|
||||
<details class="nested" open>
|
||||
<summary>Raw LLM Response</summary>
|
||||
<pre>{call.rawResponse}</pre>
|
||||
</details>
|
||||
</section>
|
||||
{/each}
|
||||
{/if}
|
||||
|
||||
{#if d.plannerError}
|
||||
<section>
|
||||
<h5>Planner Error</h5>
|
||||
|
|
|
|||
|
|
@ -45,7 +45,22 @@ export interface AiDebugEntry {
|
|||
webResearch?: { ok: true; sourceCount: number; summary: string } | { ok: false; error: string };
|
||||
kontextInjected: boolean;
|
||||
};
|
||||
planner?: PlannerCallDebug;
|
||||
/**
|
||||
* Array because the reasoning loop can call the planner multiple
|
||||
* times per iteration (once per loop step, until a proposal is
|
||||
* staged or no more work is returned). Older single-call entries
|
||||
* written before the loop shipped still parse — readers that
|
||||
* haven't updated simply take `plannerCalls[0]`.
|
||||
*/
|
||||
plannerCalls?: PlannerCallDebug[];
|
||||
/** Auto-executed tool outputs captured across loop steps — surfaces
|
||||
* what the agent "saw" when reasoning across multiple calls. */
|
||||
loopSteps?: Array<{
|
||||
loopIndex: number;
|
||||
toolName: string;
|
||||
params: Record<string, unknown>;
|
||||
outputPreview: string;
|
||||
}>;
|
||||
plannerError?: string;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -32,7 +32,7 @@ import { executeTool } from '../../tools/executor';
|
|||
import { db } from '../../database';
|
||||
import { decryptRecords } from '../../crypto';
|
||||
import { discoverByQuery, searchFeeds } from '$lib/modules/news-research/api';
|
||||
import { isAiDebugEnabled, recordAiDebug, type AiDebugEntry } from './debug';
|
||||
import { isAiDebugEnabled, recordAiDebug, type AiDebugEntry, type PlannerCallDebug } from './debug';
|
||||
import { makeAgentActor, LEGACY_AI_PRINCIPAL, type Actor } from '../../events/actor';
|
||||
import { getAgent } from '../agents/store';
|
||||
import { DEFAULT_AGENT_NAME } from '../agents/types';
|
||||
|
|
@ -43,6 +43,14 @@ import type { AiPlanInput, AiPlanOutput, PlannedStep, ResolvedInput } from './pl
|
|||
* web-research call. Keeps the trigger explicit so unrelated missions
|
||||
* don't burn credits accidentally. */
|
||||
const RESEARCH_TRIGGER = /\b(recherchier|research|news|finde|suche|aktuelle|neueste)/i;
|
||||
|
||||
/** Reasoning-loop budget. Each LOOP iteration = one planner call + its
|
||||
* auto-tool executions. The loop exits early when a propose-policy
|
||||
* step is staged (human must approve before progressing) or the
|
||||
* planner returns zero steps (it considers this subtask done).
|
||||
* 5 is generous for read-act-refine patterns ("list_notes → tag them")
|
||||
* without running the LLM bill dry on stuck missions. */
|
||||
const MAX_REASONING_LOOP_ITERATIONS = 5;
|
||||
/** Singleton row id of the kontext doc — kept in sync with
|
||||
* `modules/kontext/types.ts` (KONTEXT_SINGLETON_ID). */
|
||||
const KONTEXT_SINGLETON_ID = 'singleton';
|
||||
|
|
@ -68,7 +76,16 @@ export interface MissionRunnerDeps {
|
|||
}
|
||||
|
||||
export type StageOutcome =
|
||||
| { readonly ok: true; readonly proposalId: string }
|
||||
| {
|
||||
readonly ok: true;
|
||||
readonly proposalId: string;
|
||||
/** Full tool-result payload when the step auto-executed (proposalId
|
||||
* is empty). The reasoning loop reads this and feeds it back as
|
||||
* context for the next planner call so the agent can reason over
|
||||
* list/read outputs across steps. */
|
||||
readonly autoData?: unknown;
|
||||
readonly autoMessage?: string;
|
||||
}
|
||||
| { readonly ok: false; readonly error: string };
|
||||
|
||||
/** Default step-staging implementation: policy-gated executor under AI actor. */
|
||||
|
|
@ -86,8 +103,9 @@ export const defaultStageStep: Required<MissionRunnerDeps>['stageStep'] = async
|
|||
const data = result.data as { proposalId?: string } | undefined;
|
||||
if (data?.proposalId) return { ok: true, proposalId: data.proposalId };
|
||||
// Policy resolved to 'auto' — no proposal row was created, the tool
|
||||
// ran directly. Treat as ok but without a proposal id to thread back.
|
||||
return { ok: true, proposalId: '' };
|
||||
// ran directly. Return the payload so the reasoning loop can feed it
|
||||
// back into the next planner call.
|
||||
return { ok: true, proposalId: '', autoData: result.data, autoMessage: result.message };
|
||||
};
|
||||
|
||||
export interface RunMissionResult {
|
||||
|
|
@ -225,31 +243,153 @@ export async function runMission(
|
|||
const availableTools = getAvailableToolsForAi(aiActor);
|
||||
await checkCancel();
|
||||
|
||||
// ── Phase: calling-llm ─────────────────────────────────
|
||||
await enterPhase('calling-llm', 'frage Planner an');
|
||||
let plan: AiPlanOutput;
|
||||
try {
|
||||
plan = await deps.plan({ mission: mission!, resolvedInputs, availableTools });
|
||||
} catch (err) {
|
||||
// Capture even the failure for debug visibility before re-throwing.
|
||||
if (isAiDebugEnabled()) {
|
||||
void recordAiDebug({
|
||||
iterationId,
|
||||
missionId: mission!.id,
|
||||
missionTitle: mission!.title,
|
||||
missionObjective: mission!.objective,
|
||||
capturedAt: new Date().toISOString(),
|
||||
resolvedInputs,
|
||||
preStep,
|
||||
plannerError: err instanceof Error ? err.message : String(err),
|
||||
// ── Reasoning loop ─────────────────────────────────────
|
||||
// Each pass: call planner → stage steps. Auto-tools run inline
|
||||
// and their outputs become new ResolvedInputs so the NEXT planner
|
||||
// call can reason over them (e.g. list_notes → see titles →
|
||||
// stage add_tag_to_note per note). Loop exits when:
|
||||
// • planner returns 0 steps → agent is done
|
||||
// • any step requires user approval (propose) → user in the loop
|
||||
// • budget exhausted (MAX_REASONING_LOOP_ITERATIONS)
|
||||
// • a step fails hard (not tool-error; executor error)
|
||||
const stage = deps.stageStep ?? defaultStageStep;
|
||||
const loopInputs: ResolvedInput[] = [...resolvedInputs];
|
||||
const recordedSteps: PlanStep[] = [];
|
||||
const plannerCalls: PlannerCallDebug[] = [];
|
||||
const loopStepLog: NonNullable<AiDebugEntry['loopSteps']> = [];
|
||||
let stagedCount = 0;
|
||||
let failedCount = 0;
|
||||
let lastPlanSummary = '';
|
||||
let totalStepCount = 0;
|
||||
let loopIndex = 0;
|
||||
let stepCounter = 0;
|
||||
let humanInLoop = false;
|
||||
|
||||
while (loopIndex < MAX_REASONING_LOOP_ITERATIONS) {
|
||||
// ── Phase: calling-llm ─────────────────────────────
|
||||
await enterPhase(
|
||||
'calling-llm',
|
||||
loopIndex === 0
|
||||
? 'frage Planner an'
|
||||
: `Planner Runde ${loopIndex + 1}/${MAX_REASONING_LOOP_ITERATIONS}`
|
||||
);
|
||||
let plan: AiPlanOutput;
|
||||
try {
|
||||
plan = await deps.plan({ mission: mission!, resolvedInputs: loopInputs, availableTools });
|
||||
} catch (err) {
|
||||
if (isAiDebugEnabled()) {
|
||||
void recordAiDebug({
|
||||
iterationId,
|
||||
missionId: mission!.id,
|
||||
missionTitle: mission!.title,
|
||||
missionObjective: mission!.objective,
|
||||
capturedAt: new Date().toISOString(),
|
||||
resolvedInputs: loopInputs,
|
||||
preStep,
|
||||
plannerCalls,
|
||||
loopSteps: loopStepLog,
|
||||
plannerError: err instanceof Error ? err.message : String(err),
|
||||
});
|
||||
}
|
||||
throw err;
|
||||
}
|
||||
await checkCancel();
|
||||
if (plan.debug) plannerCalls.push(plan.debug);
|
||||
lastPlanSummary = plan.summary;
|
||||
totalStepCount += plan.steps.length;
|
||||
|
||||
if (plan.steps.length === 0) {
|
||||
// Planner has nothing more to do — agent considers this done.
|
||||
break;
|
||||
}
|
||||
|
||||
// ── Phase: parsing-response ────────────────────────
|
||||
await enterPhase('parsing-response', `${plan.steps.length} Step(s) erhalten`);
|
||||
await checkCancel();
|
||||
|
||||
// ── Phase: staging-proposals ───────────────────────
|
||||
const roundOutputs: Array<{ step: PlannedStep; message: string; data: unknown }> = [];
|
||||
for (const [i, ps] of plan.steps.entries()) {
|
||||
await enterPhase(
|
||||
'staging-proposals',
|
||||
`Runde ${loopIndex + 1} · Step ${i + 1}/${plan.steps.length}`
|
||||
);
|
||||
await checkCancel();
|
||||
|
||||
const outcome = await stage(ps, aiActor);
|
||||
const stepId = `${iterationId}-${stepCounter++}`;
|
||||
if (!outcome.ok) {
|
||||
failedCount++;
|
||||
recordedSteps.push({
|
||||
id: stepId,
|
||||
summary: ps.summary,
|
||||
intent: { kind: 'toolCall', toolName: ps.toolName, params: ps.params },
|
||||
status: 'failed',
|
||||
});
|
||||
continue;
|
||||
}
|
||||
|
||||
stagedCount++;
|
||||
if (outcome.proposalId) {
|
||||
// Propose-policy: human must approve. Exit the loop after
|
||||
// this round so we don't stage proposals for hypothetical
|
||||
// follow-up steps that depend on the approval outcome.
|
||||
humanInLoop = true;
|
||||
recordedSteps.push({
|
||||
id: stepId,
|
||||
summary: ps.summary,
|
||||
intent: { kind: 'toolCall', toolName: ps.toolName, params: ps.params },
|
||||
proposalId: outcome.proposalId,
|
||||
status: 'staged',
|
||||
});
|
||||
} else {
|
||||
// Auto-policy: ran inline. Collect output for the next
|
||||
// planner call.
|
||||
recordedSteps.push({
|
||||
id: stepId,
|
||||
summary: ps.summary,
|
||||
intent: { kind: 'toolCall', toolName: ps.toolName, params: ps.params },
|
||||
status: 'approved',
|
||||
});
|
||||
roundOutputs.push({
|
||||
step: ps,
|
||||
message: outcome.autoMessage ?? '(ohne message)',
|
||||
data: outcome.autoData,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
// Log loop outputs for debug-panel visibility.
|
||||
for (const o of roundOutputs) {
|
||||
loopStepLog.push({
|
||||
loopIndex,
|
||||
toolName: o.step.toolName,
|
||||
params: o.step.params,
|
||||
outputPreview: formatToolOutputPreview(o.message, o.data),
|
||||
});
|
||||
}
|
||||
throw err;
|
||||
}
|
||||
await checkCancel();
|
||||
|
||||
// Persist debug capture if enabled. Off by default in production
|
||||
// (toggle via Settings or `localStorage.setItem('mana.ai.debug','1')`).
|
||||
if (humanInLoop) break;
|
||||
if (roundOutputs.length === 0) {
|
||||
// Every step either failed or was proposed — nothing new to
|
||||
// reason over. Prevents an infinite loop when the planner
|
||||
// only suggests proposable tools that keep failing.
|
||||
break;
|
||||
}
|
||||
|
||||
// Feed tool outputs into the next planner call as a synthetic
|
||||
// ResolvedInput so the agent can chain its reasoning.
|
||||
loopInputs.push({
|
||||
id: `loop-outputs-${loopIndex}`,
|
||||
module: 'reasoning-loop',
|
||||
table: 'tool-outputs',
|
||||
title: `Zwischenergebnisse (Runde ${loopIndex + 1})`,
|
||||
content: formatToolOutputsForPrompt(roundOutputs),
|
||||
});
|
||||
|
||||
loopIndex++;
|
||||
}
|
||||
|
||||
if (isAiDebugEnabled()) {
|
||||
void recordAiDebug({
|
||||
iterationId,
|
||||
|
|
@ -257,54 +397,20 @@ export async function runMission(
|
|||
missionTitle: mission!.title,
|
||||
missionObjective: mission!.objective,
|
||||
capturedAt: new Date().toISOString(),
|
||||
resolvedInputs,
|
||||
resolvedInputs: loopInputs,
|
||||
preStep,
|
||||
planner: plan.debug,
|
||||
plannerCalls,
|
||||
loopSteps: loopStepLog,
|
||||
});
|
||||
}
|
||||
|
||||
// ── Phase: parsing-response ────────────────────────────
|
||||
await enterPhase('parsing-response', `${plan.steps.length} Step(s) erhalten`);
|
||||
await checkCancel();
|
||||
|
||||
// ── Phase: staging-proposals ───────────────────────────
|
||||
const stage = deps.stageStep ?? defaultStageStep;
|
||||
const recordedSteps: PlanStep[] = [];
|
||||
let stagedCount = 0;
|
||||
let failedCount = 0;
|
||||
|
||||
for (const [i, ps] of plan.steps.entries()) {
|
||||
await enterPhase('staging-proposals', `Step ${i + 1} von ${plan.steps.length}`);
|
||||
await checkCancel();
|
||||
|
||||
const outcome = await stage(ps, aiActor);
|
||||
if (outcome.ok) {
|
||||
stagedCount++;
|
||||
recordedSteps.push({
|
||||
id: `${iterationId}-${i}`,
|
||||
summary: ps.summary,
|
||||
intent: { kind: 'toolCall', toolName: ps.toolName, params: ps.params },
|
||||
proposalId: outcome.proposalId || undefined,
|
||||
status: outcome.proposalId ? 'staged' : 'approved',
|
||||
});
|
||||
} else {
|
||||
failedCount++;
|
||||
recordedSteps.push({
|
||||
id: `${iterationId}-${i}`,
|
||||
summary: ps.summary,
|
||||
intent: { kind: 'toolCall', toolName: ps.toolName, params: ps.params },
|
||||
status: 'failed',
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
await enterPhase('finalizing');
|
||||
return {
|
||||
recordedSteps,
|
||||
stagedCount,
|
||||
failedCount,
|
||||
planSummary: plan.summary,
|
||||
planStepCount: plan.steps.length,
|
||||
planSummary: lastPlanSummary,
|
||||
planStepCount: totalStepCount,
|
||||
};
|
||||
}
|
||||
|
||||
|
|
@ -411,6 +517,44 @@ async function loadKontextAsResolvedInput(): Promise<ResolvedInput | null> {
|
|||
/** Run the deep-research pipeline against the mission objective and
|
||||
* collapse its summary + sources into one ResolvedInput formatted so
|
||||
* the planner can copy URLs into save_news_article calls. */
|
||||
/** Stringify a tool-output payload for the reasoning loop's next
|
||||
* prompt. Keeps the blob compact — LLM context windows are finite and
|
||||
* a raw JSON.stringify of a 200-row Dexie dump wastes tokens. */
|
||||
function formatToolOutputsForPrompt(
|
||||
outputs: Array<{ step: PlannedStep; message: string; data: unknown }>
|
||||
): string {
|
||||
const lines: string[] = [
|
||||
'Ausgaben der zuletzt ausgeführten Auto-Tools. Nutze diese Daten um die Mission weiterzuführen — z.B. für jede gelistete Notiz einen add_tag_to_note Aufruf pro Notiz.',
|
||||
'',
|
||||
];
|
||||
for (const o of outputs) {
|
||||
lines.push(`### ${o.step.toolName}(${JSON.stringify(o.step.params)})`);
|
||||
lines.push(o.message);
|
||||
if (o.data !== undefined && o.data !== null) {
|
||||
const json = safeStringify(o.data, 4000);
|
||||
lines.push('```json', json, '```');
|
||||
}
|
||||
lines.push('');
|
||||
}
|
||||
return lines.join('\n');
|
||||
}
|
||||
|
||||
/** Short form for the debug-panel loopSteps log. */
|
||||
function formatToolOutputPreview(message: string, data: unknown): string {
|
||||
if (data === undefined || data === null) return message;
|
||||
const json = safeStringify(data, 400);
|
||||
return `${message}\n${json}`;
|
||||
}
|
||||
|
||||
function safeStringify(value: unknown, limit: number): string {
|
||||
try {
|
||||
const s = JSON.stringify(value, null, 2);
|
||||
return s.length > limit ? s.slice(0, limit) + '\n… (truncated)' : s;
|
||||
} catch {
|
||||
return String(value);
|
||||
}
|
||||
}
|
||||
|
||||
interface WebResearchOutcome {
|
||||
input: ResolvedInput;
|
||||
sourceCount: number;
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue