feat(webapp): mission runner on native function calling, tools execute directly

The runner now drives runPlannerLoop from @mana/shared-ai: the LLM
emits native tool_calls via mana-llm's tools passthrough, we execute
each call immediately under the AI actor, and feed the result back as
a tool-message for the next turn. The reasoning loop still runs up to
5 rounds (same budget as before) but needs no hand-rolled re-prompting
because the SDK-level tool-message exchange does that for us.

Tool execution is direct — no Proposal staging. The executor's propose
branch collapses into auto (proposal store calls stay in place for
legacy consumers this commit doesn't touch; those go next). Agent-
level deny still refuses and surfaces the refusal as a tool-message
the LLM can react to.

New surface:
- missions/llm-client.ts — mana-llm HTTP adapter conforming to shared-
  ai's LlmClient. Posts /v1/chat/completions with tools + tool_choice,
  converts OpenAI-shape tool_calls back to our ToolCallRequest shape.
- runner.ts shrinks from ~770 to ~410 lines — pre-step research,
  guardrails, agent scope, timeout, cancel, debug capture all kept.
- debug.ts stores rawMessages[] (shared-ai ChatMessage) instead of
  plannerCalls[]/loopSteps. AiDebugBlock renders the chat transcript.
- available-tools.ts returns ToolSchema[] directly so the runner can
  hand the array to runPlannerLoop unchanged.
- setup.ts wires createManaLlmClient() instead of aiPlanTask +
  llmOrchestrator. The old aiPlanTask + planner/ re-export files
  remain orphaned for the next commit to delete.

Test shape: MockLlmClient scriptable via enqueue-style turns. Three
cases cover happy path, empty-plan stop, and tool-failure propagation.

Dead-but-still-compiling afterwards: the proposals folder, the
AiProposalInbox component + its 9 call-sites, server-iteration-
staging.ts, ai-plan.ts, the legacy planner/ wrappers, and the old
buildPlannerPrompt/parsePlannerResponse exports in shared-ai. These
go in commits 5b/5c/5d.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Till JS 2026-04-20 16:08:58 +02:00
parent 5af23d30b6
commit 5af96bfeff
8 changed files with 519 additions and 748 deletions

View file

@ -44,13 +44,14 @@
{:else if d.preStep.webResearch && !d.preStep.webResearch.ok}
· Web ❌
{/if}
{#if d.plannerCalls && d.plannerCalls.length > 0}
· {d.plannerCalls.length}× LLM · {Math.round(
d.plannerCalls.reduce((a, c) => a + c.latencyMs, 0)
)}ms
{#if d.rounds}
· {d.rounds} Runde{d.rounds === 1 ? '' : 'n'}
{/if}
{#if d.loopSteps && d.loopSteps.length > 0}
· {d.loopSteps.length}× Auto-Tool
{#if d.messages}
· {d.messages.length} Messages
{/if}
{#if d.stopReason && d.stopReason !== 'assistant-stop'}
· {d.stopReason}
{/if}
{#if d.plannerError}· Planner ❌{/if}
</span>
@ -93,41 +94,34 @@
{/if}
</section>
{#if d.loopSteps && d.loopSteps.length > 0}
{#if d.messages && d.messages.length > 0}
<section>
<h5>Auto-Tool-Ausgaben (Reasoning-Loop)</h5>
{#each d.loopSteps as ls, i (i)}
<details class="nested">
<h5>Chat-Verlauf ({d.messages.length} Messages · {d.rounds ?? '?'} Runden)</h5>
{#each d.messages as m, i (i)}
<details class="nested" open={m.role === 'assistant' || m.role === 'tool'}>
<summary>
<code>Runde {ls.loopIndex + 1}</code>
{ls.toolName}({JSON.stringify(ls.params)})
<code>{m.role}</code>
{#if m.toolCalls && m.toolCalls.length > 0}
tool_calls: {m.toolCalls.map((c) => c.name).join(', ')}
{:else if m.toolCallId}
tool_result (id: {m.toolCallId})
{:else}
{typeof m.content === 'string' ? m.content.slice(0, 100) : ''}
{/if}
</summary>
<pre>{ls.outputPreview}</pre>
{#if m.content}
<pre>{m.content}</pre>
{/if}
{#if m.toolCalls && m.toolCalls.length > 0}
{#each m.toolCalls as call (call.id)}
<pre>{call.name}({JSON.stringify(call.arguments, null, 2)})</pre>
{/each}
{/if}
</details>
{/each}
</section>
{/if}
{#if d.plannerCalls && d.plannerCalls.length > 0}
{#each d.plannerCalls as call, i (i)}
<section>
<h5>LLM-Call {i + 1}/{d.plannerCalls.length} · {Math.round(call.latencyMs)}ms</h5>
<details class="nested">
<summary>System Prompt</summary>
<pre>{call.systemPrompt}</pre>
</details>
<details class="nested" open>
<summary>User Prompt</summary>
<pre>{call.userPrompt}</pre>
</details>
<details class="nested" open>
<summary>Raw LLM Response</summary>
<pre>{call.rawResponse}</pre>
</details>
</section>
{/each}
{/if}
{#if d.plannerError}
<section>
<h5>Planner Error</h5>

View file

@ -1,30 +1,41 @@
/**
* Build the tool list the Planner is allowed to consider.
*
* Only tools the policy rates `auto` or `propose` are exposed `deny` is
* invisible to the AI. This is defence-in-depth: even if the Planner
* hallucinates a denied tool name, the parser rejects it because the name
* isn't in the allow-set, AND the executor would refuse at runtime.
* Only tools the policy rates `auto` or `propose` are exposed `deny`
* is invisible to the AI. Defence-in-depth: even if the LLM somehow
* names a denied tool, the executor refuses at runtime.
*
* Returns the shared ToolSchema shape directly so the runner can pass
* the list straight into runPlannerLoop (which calls
* toolsToFunctionSchemas internally).
*/
import { getTools } from '../../tools/registry';
import { resolvePolicy } from '../policy';
import type { Actor } from '../../events/actor';
import type { AvailableTool } from './planner/types';
import type { ToolSchema } from '@mana/shared-ai';
import { AI_TOOL_CATALOG_BY_NAME } from '@mana/shared-ai';
export function getAvailableToolsForAi(aiActor: Extract<Actor, { kind: 'ai' }>): AvailableTool[] {
export function getAvailableToolsForAi(aiActor: Extract<Actor, { kind: 'ai' }>): ToolSchema[] {
return getTools()
.filter((tool) => resolvePolicy(tool.name, aiActor) !== 'deny')
.map((tool) => ({
name: tool.name,
module: tool.module,
description: tool.description,
parameters: tool.parameters.map((p) => ({
name: p.name,
type: p.type,
required: p.required,
description: p.description,
enum: p.enum,
})),
}));
.map((tool) => {
// Prefer the catalog entry when available — it carries the
// defaultPolicy we need on ToolSchema. Tools without a catalog
// entry (playground / test-only) fall back to 'auto'.
const catalogEntry = AI_TOOL_CATALOG_BY_NAME.get(tool.name);
return {
name: tool.name,
module: tool.module,
description: tool.description,
defaultPolicy: catalogEntry?.defaultPolicy ?? 'auto',
parameters: tool.parameters.map((p) => ({
name: p.name,
type: p.type,
required: p.required,
description: p.description,
enum: p.enum,
})),
};
});
}

View file

@ -14,7 +14,7 @@
import { useLiveQueryWithDefault } from '@mana/local-store/svelte';
import { db } from '../../database';
import type { ResolvedInput } from './planner/types';
import type { ChatMessage, LoopStopReason, ResolvedInput } from '@mana/shared-ai';
const TABLE = '_aiDebugLog';
const STORAGE_KEY = 'mana.ai.debug';
@ -26,19 +26,6 @@ const MAX_AGE_MS = 7 * 24 * 60 * 60 * 1000; // 7 days
* is truncated to reduce the privacy surface if the device is stolen. */
const INPUT_CONTENT_LIMIT = 500;
/**
* Captured by `aiPlanTask` and passed back via the planner output so the
* runner can record it without the planner needing to know about Dexie.
*/
export interface PlannerCallDebug {
readonly systemPrompt: string;
readonly userPrompt: string;
readonly rawResponse: string;
readonly latencyMs: number;
readonly backendId?: string;
readonly model?: string;
}
export interface AiDebugEntry {
/** Primary key — one row per iteration. */
iterationId: string;
@ -51,22 +38,14 @@ export interface AiDebugEntry {
webResearch?: { ok: true; sourceCount: number; summary: string } | { ok: false; error: string };
kontextInjected: boolean;
};
/**
* Array because the reasoning loop can call the planner multiple
* times per iteration (once per loop step, until a proposal is
* staged or no more work is returned). Older single-call entries
* written before the loop shipped still parse readers that
* haven't updated simply take `plannerCalls[0]`.
*/
plannerCalls?: PlannerCallDebug[];
/** Auto-executed tool outputs captured across loop steps surfaces
* what the agent "saw" when reasoning across multiple calls. */
loopSteps?: Array<{
loopIndex: number;
toolName: string;
params: Record<string, unknown>;
outputPreview: string;
}>;
/** Full chat history of the planner loop: system + user + every
* assistant turn (with tool_calls) + every tool-message result.
* Replaces the pre-migration plannerCalls[]/loopSteps structure. */
messages?: ChatMessage[];
/** Number of planner rounds consumed inside this iteration. */
rounds?: number;
/** Why the loop terminated (assistant-stop, max-rounds, …). */
stopReason?: LoopStopReason;
plannerError?: string;
}

View file

@ -0,0 +1,189 @@
/**
* LlmClient implementation that speaks to the mana-llm service using its
* OpenAI-compatible /v1/chat/completions endpoint, with native tool_calls
* passthrough. Used by the webapp Mission Runner and Companion Chat to
* drive the shared runPlannerLoop from @mana/shared-ai.
*
* The shared-ai LlmClient contract is intentionally small we don't go
* through the LlmOrchestrator's tier routing here. Tool calling needs
* a specific server-proxied path (mana-llm forwards to Google / OpenAI
* / Ollama with tools enabled), not the legacy text-JSON orchestrator.
* Tier integration can come later once shared-llm grows tool-call
* awareness.
*/
import {
type ChatMessage,
type LlmClient,
type LlmCompletionRequest,
type LlmCompletionResponse,
type LlmFinishReason,
type ToolCallRequest,
} from '@mana/shared-ai';
const DEFAULT_LLM_URL = 'http://localhost:3025';
/** Resolve the mana-llm base URL from window-injected env; falls back
* to localhost. Mirrors the helper in @mana/shared-llm's remote.ts. */
function resolveLlmBaseUrl(): string {
if (typeof window !== 'undefined') {
const fromWindow = (window as unknown as { __PUBLIC_MANA_LLM_URL__?: string })
.__PUBLIC_MANA_LLM_URL__;
if (fromWindow) return fromWindow.replace(/\/$/, '');
}
return DEFAULT_LLM_URL;
}
export interface ManaLlmClientOptions {
/** Default model id used when callers don't override per request.
* Format matches mana-llm's provider/model syntax. */
readonly defaultModel?: string;
/** Override the base URL mostly for tests. Production resolves from
* window.__PUBLIC_MANA_LLM_URL__. */
readonly baseUrl?: string;
/** Hard stop for the fetch. The runner wraps runPlannerLoop in its
* own iteration-level timeout (180 s) so this is mostly a belt +
* braces for pathological provider stalls. */
readonly fetchTimeoutMs?: number;
}
const DEFAULT_MODEL = 'google/gemini-2.5-flash';
const DEFAULT_FETCH_TIMEOUT_MS = 120_000;
export function createManaLlmClient(opts: ManaLlmClientOptions = {}): LlmClient {
const baseUrl = (opts.baseUrl ?? resolveLlmBaseUrl()).replace(/\/$/, '');
const defaultModel = opts.defaultModel ?? DEFAULT_MODEL;
const fetchTimeoutMs = opts.fetchTimeoutMs ?? DEFAULT_FETCH_TIMEOUT_MS;
return {
async complete(req: LlmCompletionRequest): Promise<LlmCompletionResponse> {
const url = `${baseUrl}/v1/chat/completions`;
const controller = new AbortController();
const timeout = setTimeout(() => controller.abort(), fetchTimeoutMs);
const body = {
model: req.model || defaultModel,
messages: req.messages.map(toWireMessage),
tools: req.tools, // already in OpenAI {type, function} shape
tool_choice: 'auto' as const,
temperature: req.temperature ?? 0.3,
stream: false,
};
let res: Response;
try {
res = await fetch(url, {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify(body),
signal: controller.signal,
});
} catch (err) {
clearTimeout(timeout);
const msg = err instanceof Error ? err.message : String(err);
throw new Error(`mana-llm unreachable at ${baseUrl}: ${msg}`);
}
clearTimeout(timeout);
if (!res.ok) {
let detail: unknown;
try {
detail = await res.json();
} catch {
detail = await res.text().catch(() => '');
}
throw new Error(
`mana-llm ${res.status}: ${typeof detail === 'string' ? detail : JSON.stringify(detail)}`
);
}
const data = (await res.json()) as ChatCompletionResponseShape;
const choice = data.choices?.[0];
if (!choice) {
throw new Error('mana-llm response had no choices');
}
const content = choice.message?.content ?? null;
const toolCalls = (choice.message?.tool_calls ?? []).map(fromWireToolCall);
const finishReason = normaliseFinishReason(choice.finish_reason);
return { content, toolCalls, finishReason };
},
};
}
// ── Wire-format helpers ─────────────────────────────────────────────
interface WireMessage {
role: 'system' | 'user' | 'assistant' | 'tool';
content?: string | null;
tool_calls?: Array<{
id: string;
type: 'function';
function: { name: string; arguments: string };
}>;
tool_call_id?: string;
}
function toWireMessage(m: ChatMessage): WireMessage {
const out: WireMessage = { role: m.role };
if (m.content !== undefined) out.content = m.content;
if (m.toolCallId) out.tool_call_id = m.toolCallId;
if (m.toolCalls && m.toolCalls.length > 0) {
out.tool_calls = m.toolCalls.map((c) => ({
id: c.id,
type: 'function',
function: {
name: c.name,
arguments: JSON.stringify(c.arguments),
},
}));
}
return out;
}
interface ChatCompletionResponseShape {
choices?: Array<{
message?: {
content?: string | null;
tool_calls?: Array<{
id: string;
type?: string;
function: { name: string; arguments?: string };
}>;
};
finish_reason?: string | null;
}>;
}
function fromWireToolCall(raw: {
id: string;
function: { name: string; arguments?: string };
}): ToolCallRequest {
let args: Record<string, unknown> = {};
if (raw.function.arguments) {
try {
const parsed = JSON.parse(raw.function.arguments);
if (parsed && typeof parsed === 'object' && !Array.isArray(parsed)) {
args = parsed as Record<string, unknown>;
}
} catch {
// Malformed arguments — surface as empty and let the executor
// reject on the missing-required-parameter path.
}
}
return { id: raw.id, name: raw.function.name, arguments: args };
}
function normaliseFinishReason(raw: string | null | undefined): LlmFinishReason {
switch (raw) {
case 'tool_calls':
return 'tool_calls';
case 'length':
return 'length';
case 'content_filter':
return 'content_filter';
case 'stop':
default:
return 'stop';
}
}

View file

@ -9,28 +9,22 @@ vi.mock('$lib/triggers/inline-suggest', () => ({
import { db } from '../../database';
import { registerTools } from '../../tools/registry';
import { setAiPolicy } from '../policy';
import { createMission, getMission, pauseMission } from './store';
import { runMission, runDueMissions } from './runner';
import {
registerInputResolver,
unregisterInputResolver,
resolveMissionInputs,
} from './input-resolvers';
import { createMission, getMission } from './store';
import { runMission } from './runner';
import { MISSIONS_TABLE } from './types';
import type { AiPlanInput, AiPlanOutput } from './planner/types';
import type { LlmClient, LlmCompletionRequest, LlmCompletionResponse } from '@mana/shared-ai';
let executed: { name: string; params: Record<string, unknown> }[] = [];
registerTools([
{
name: 'runner_test_stage',
name: 'runner_test_action',
module: 'runnerTest',
description: 'proposes',
description: 'test action',
parameters: [{ name: 'val', type: 'string', required: true, description: 'v' }],
async execute(params) {
executed.push({ name: 'runner_test_stage', params: { ...params } });
return { success: true, message: 'ok' };
executed.push({ name: 'runner_test_action', params: { ...params } });
return { success: true, message: `did ${params.val}` };
},
},
]);
@ -38,179 +32,97 @@ registerTools([
beforeEach(async () => {
executed = [];
await db.table(MISSIONS_TABLE).clear();
await db.table('pendingProposals').clear();
});
/** Minimal LlmClient for runner tests scripts one or more assistant
* turns via enqueueToolCalls / enqueueStop. */
function mockLlm(
turns: Array<
| { kind: 'tool_calls'; calls: Array<{ name: string; args: Record<string, unknown> }> }
| { kind: 'stop'; content?: string }
>
): LlmClient {
let i = 0;
return {
async complete(_req: LlmCompletionRequest): Promise<LlmCompletionResponse> {
const turn = turns[i++];
if (!turn) throw new Error('MockLlm exhausted');
if (turn.kind === 'stop') {
return { content: turn.content ?? null, toolCalls: [], finishReason: 'stop' };
}
return {
content: null,
toolCalls: turn.calls.map((c, n) => ({
id: `call_${i}_${n}`,
name: c.name,
arguments: c.args,
})),
finishReason: 'tool_calls',
};
},
};
}
describe('runMission', () => {
it('runs the planner, stages proposals, and marks the iteration awaiting-review', async () => {
const restore = setAiPolicy({
tools: { runner_test_stage: 'propose' },
defaultForAi: 'propose',
});
try {
const m = await createMission({
title: 'Test mission',
conceptMarkdown: '',
objective: 'test',
cadence: { kind: 'manual' },
});
const planStub: AiPlanOutput = {
summary: 'Staged a test step',
steps: [
{
summary: 'Do a thing',
toolName: 'runner_test_stage',
params: { val: 'hello' },
rationale: 'because test',
},
],
};
const result = await runMission(m.id, {
plan: async (_input: AiPlanInput) => planStub,
});
expect(result.plannedSteps).toBe(1);
expect(result.stagedSteps).toBe(1);
expect(result.iteration.overallStatus).toBe('awaiting-review');
const after = await getMission(m.id);
expect(after?.iterations).toHaveLength(1);
expect(after?.iterations[0].plan[0].proposalId).toBeTruthy();
expect(after?.iterations[0].plan[0].status).toBe('staged');
// Tool did NOT execute — proposal was staged
expect(executed).toHaveLength(0);
} finally {
restore();
}
});
it('passes the built AiPlanInput to the planner with mission + tool allowlist', async () => {
const restore = setAiPolicy({
tools: { runner_test_stage: 'propose' },
defaultForAi: 'deny',
});
try {
const m = await createMission({
title: 'Test',
conceptMarkdown: '',
objective: 'test',
cadence: { kind: 'manual' },
});
let captured: AiPlanInput | null = null;
await runMission(m.id, {
plan: async (input) => {
captured = input;
return { summary: '', steps: [] };
},
});
expect(captured).toBeTruthy();
expect(captured!.mission.id).toBe(m.id);
const allowedNames = captured!.availableTools.map((t) => t.name);
expect(allowedNames).toContain('runner_test_stage');
} finally {
restore();
}
});
it('marks an iteration failed when the planner throws', async () => {
it('executes a tool_call directly and records it in the iteration', async () => {
const m = await createMission({
title: 'x',
title: 'Test mission',
conceptMarkdown: '',
objective: 'x',
objective: 'test',
cadence: { kind: 'manual' },
});
const result = await runMission(m.id, {
plan: async () => {
throw new Error('planner down');
},
});
expect(result.iteration.overallStatus).toBe('failed');
const after = await getMission(m.id);
expect(after?.iterations[0].overallStatus).toBe('failed');
expect(after?.iterations[0].summary).toContain('planner down');
});
it('produces an approved iteration when planner returns zero steps', async () => {
const m = await createMission({
title: 'x',
conceptMarkdown: '',
objective: 'x',
cadence: { kind: 'manual' },
});
const result = await runMission(m.id, {
plan: async () => ({ summary: 'nothing needed', steps: [] }),
});
const llm = mockLlm([
{ kind: 'tool_calls', calls: [{ name: 'runner_test_action', args: { val: 'hello' } }] },
{ kind: 'stop', content: 'done' },
]);
const result = await runMission(m.id, { llm });
expect(result.plannedSteps).toBe(1);
expect(result.failedSteps).toBe(0);
expect(result.iteration.overallStatus).toBe('approved');
expect(executed).toEqual([{ name: 'runner_test_action', params: { val: 'hello' } }]);
const after = await getMission(m.id);
expect(after?.iterations).toHaveLength(1);
expect(after?.iterations[0].plan).toHaveLength(1);
expect(after?.iterations[0].plan[0].status).toBe('approved');
});
it('refuses to run a paused mission', async () => {
it('marks the iteration approved with zero steps when the LLM just stops', async () => {
const m = await createMission({
title: 'x',
title: 'Empty',
conceptMarkdown: '',
objective: 'x',
objective: 'nothing to do',
cadence: { kind: 'manual' },
});
await pauseMission(m.id);
await expect(
runMission(m.id, { plan: async () => ({ summary: '', steps: [] }) })
).rejects.toThrow(/paused/);
});
});
describe('runDueMissions', () => {
it('runs only active missions whose nextRunAt has passed', async () => {
const a = await createMission({
title: 'due',
const llm = mockLlm([{ kind: 'stop', content: 'nichts zu tun' }]);
const result = await runMission(m.id, { llm });
expect(result.plannedSteps).toBe(0);
expect(result.iteration.overallStatus).toBe('approved');
expect(executed).toHaveLength(0);
});
it('surfaces tool failures as failed PlanSteps without aborting the iteration', async () => {
const m = await createMission({
title: 'Mixed',
conceptMarkdown: '',
objective: 'x',
cadence: { kind: 'interval', everyMinutes: 5 },
objective: 'test',
cadence: { kind: 'manual' },
});
const b = await createMission({
title: 'future',
conceptMarkdown: '',
objective: 'x',
cadence: { kind: 'interval', everyMinutes: 5 },
});
// Force `a` into the past, leave `b` in the future
await db.table(MISSIONS_TABLE).update(a.id, { nextRunAt: '2020-01-01T00:00:00.000Z' });
const runs: string[] = [];
await runDueMissions(new Date(), {
plan: async (input) => {
runs.push(input.mission.id);
return { summary: '', steps: [] };
},
});
expect(runs).toEqual([a.id]);
expect(runs).not.toContain(b.id);
});
});
describe('resolveMissionInputs', () => {
it('resolves via registered resolvers and skips missing modules', async () => {
registerInputResolver('testmod', async (ref) => ({
id: ref.id,
module: 'testmod',
table: ref.table,
title: 'T',
content: `content for ${ref.id}`,
}));
try {
const refs = [
{ module: 'testmod', table: 't', id: 'a' },
{ module: 'nope', table: 't', id: 'b' },
];
const resolved = await resolveMissionInputs(refs);
expect(resolved).toHaveLength(1);
expect(resolved[0].content).toContain('a');
} finally {
unregisterInputResolver('testmod');
}
});
it('returns empty array when nothing is registered', async () => {
const r = await resolveMissionInputs([{ module: 'unknown', table: 't', id: 'x' }]);
expect(r).toEqual([]);
// One call to an unknown tool (executor returns success:false) plus a stop.
const llm = mockLlm([
{ kind: 'tool_calls', calls: [{ name: 'does_not_exist', args: {} }] },
{ kind: 'stop' },
]);
const result = await runMission(m.id, { llm });
expect(result.plannedSteps).toBe(1);
expect(result.failedSteps).toBe(1);
expect(result.iteration.overallStatus).toBe('failed');
});
});

View file

@ -5,18 +5,21 @@
*
* resolve inputs via registered resolvers
*
* build available-tool list (policy-filtered)
* pre-step web research (when the objective looks like a research task)
*
* call planner (LLM) AiPlanOutput
* build system + user prompts (compact no tool listing)
*
* for each step: stage a Proposal under the AI actor
* runPlannerLoop with native function calling
*
* finishIteration(summary, overallStatus, plan-with-proposal-ids)
* each tool_call executes directly via the policy-gated executor;
* results feed back as tool-messages for the next turn
*
* finishIteration(summary, overallStatus, executed-steps)
*
* Planner + proposal-staging are injected so the Runner is unit-testable
* without a live LLM or Dexie hooks. Default implementations call the
* shared LlmOrchestrator / `executeTool(...)` respectively; production
* code passes those in via the setup module.
* Post-migration note: there is no propose/approve gate. Tools run
* directly under the AI actor. The user's review surface is the
* Workbench Timeline + per-iteration revert. See
* docs/plans/planner-function-calling.md for the design rationale.
*/
import {
@ -29,21 +32,23 @@ import {
import { resolveMissionInputs } from './input-resolvers';
import { getAvailableToolsForAi } from './available-tools';
import { executeTool } from '../../tools/executor';
import { db } from '../../database';
import { decryptRecords } from '../../crypto';
import { discoverByQuery, searchFeeds } from '$lib/modules/news-research/api';
import { getAgentKontext } from '../agents/kontext';
import { withAgentScope } from '../scope-context';
import { isAiDebugEnabled, recordAiDebug, type AiDebugEntry, type PlannerCallDebug } from './debug';
import { isAiDebugEnabled, recordAiDebug, type AiDebugEntry } from './debug';
import { makeAgentActor, LEGACY_AI_PRINCIPAL, type Actor } from '../../events/actor';
import { getAgent } from '../agents/store';
import { DEFAULT_AGENT_NAME } from '../agents/types';
import type { Mission, MissionIteration, PlanStep } from './types';
import type { AiPlanInput, AiPlanOutput, PlannedStep, ResolvedInput } from './planner/types';
import {
buildSystemPrompt,
runPlannerLoop,
runPrePlanGuardrails,
runPostPlanGuardrails,
runPreExecuteGuardrails,
type ChatMessage,
type LlmClient,
type ResolvedInput,
type ToolCallRequest,
type ToolResult,
} from '@mana/shared-ai';
/** Heuristic: mission objective text that should trigger a pre-step
@ -51,33 +56,14 @@ import {
* don't burn credits accidentally. */
const RESEARCH_TRIGGER = /\b(recherchier|research|news|finde|suche|aktuelle|neueste)/i;
/** Reasoning-loop budget. Each LOOP iteration = one planner call + its
* auto-tool executions. The loop exits early when a propose-policy
* step is staged (human must approve before progressing) or the
* planner returns zero steps (it considers this subtask done).
* 5 is generous for read-act-refine patterns ("list_notes → tag them")
* without running the LLM bill dry on stuck missions. */
/** Keep in sync with the planner system prompt in
* packages/shared-ai/src/planner/prompt.ts which tells the LLM
* "bis zu 5 Planungsrunden pro Iteration, 15 Schritte pro Runde". */
const MAX_REASONING_LOOP_ITERATIONS = 5;
/** Hard ceiling on planner rounds inside one iteration. One round = one
* LLM call plus whatever tool executions its output triggered. Matches
* the shared-ai default; re-declared here for clarity. */
const MAX_PLANNER_ROUNDS = 5;
/** Min interval between Dexie phaseDetail writes during streaming.
* 50 tokens/s × 500ms = ~25 tokens between writes frequent enough
* for the UI to feel live, infrequent enough to avoid Dexie thrashing. */
const STREAMING_PHASE_THROTTLE_MS = 500;
/** Singleton row id of the kontext doc kept in sync with
* `modules/kontext/types.ts` (KONTEXT_SINGLETON_ID). */
const KONTEXT_SINGLETON_ID = 'singleton';
/** Hard timeout for one mission run. Cancels the in-flight planner call
* and finalises the iteration as failed. 90 s is comfortable for a
* cloud-tier model but short enough that a wedged backend doesn't sit
* in `running` indefinitely. */
/** 180s gives the reasoning loop (up to 5 LLM calls) enough headroom
* even on slow models. Each call can take 1030s on Ollama/GPU with
* network latency; the old 90s limit regularly timed out during the
* second loop round. */
/** Hard timeout for one mission run. 180 s is comfortable for a cloud
* model doing up to 5 reasoning rounds; anything longer means a wedged
* backend and should fail the iteration rather than sit in `running`. */
const ITERATION_TIMEOUT_MS = 180_000;
class CancelledError extends Error {
@ -87,46 +73,24 @@ class CancelledError extends Error {
}
}
// ─── Public API ─────────────────────────────────────────────────────
export interface MissionRunnerDeps {
/** Invoke the Planner LLM task with the fully-built input. */
plan: (input: AiPlanInput) => Promise<AiPlanOutput>;
/** Stage a single planned step as a Proposal. Returns the proposal id on success. */
stageStep?: (step: PlannedStep, aiActor: Extract<Actor, { kind: 'ai' }>) => Promise<StageOutcome>;
/** LLM transport. Typically the mana-llm client from llm-client.ts;
* tests inject a MockLlmClient. */
llm: LlmClient;
/** Model id to pass to the LLM (provider/model). Defaults handled by
* the client; exposed here so per-mission overrides can plug in. */
model?: string;
/** Per-tool executor. Tests inject a mock; production defaults to
* the policy-gated `executeTool`. */
executeTool?: (
name: string,
params: Record<string, unknown>,
actor: Actor
) => Promise<ToolResult>;
}
export type StageOutcome =
| {
readonly ok: true;
readonly proposalId: string;
/** Full tool-result payload when the step auto-executed (proposalId
* is empty). The reasoning loop reads this and feeds it back as
* context for the next planner call so the agent can reason over
* list/read outputs across steps. */
readonly autoData?: unknown;
readonly autoMessage?: string;
}
| { readonly ok: false; readonly error: string };
/** Default step-staging implementation: policy-gated executor under AI actor. */
export const defaultStageStep: Required<MissionRunnerDeps>['stageStep'] = async (step, aiActor) => {
const stepActor: Extract<Actor, { kind: 'ai' }> = {
...aiActor,
// Per-step rationale wins over the mission-wide one so the review UI
// shows *this step's* reasoning.
rationale: step.rationale || aiActor.rationale,
};
const result = await executeTool(step.toolName, step.params, stepActor);
if (!result.success) {
return { ok: false, error: result.message };
}
const data = result.data as { proposalId?: string } | undefined;
if (data?.proposalId) return { ok: true, proposalId: data.proposalId };
// Policy resolved to 'auto' — no proposal row was created, the tool
// ran directly. Return the payload so the reasoning loop can feed it
// back into the next planner call.
return { ok: true, proposalId: '', autoData: result.data, autoMessage: result.message };
};
export interface RunMissionResult {
readonly iteration: MissionIteration;
readonly plannedSteps: number;
@ -138,12 +102,10 @@ export interface RunMissionResult {
* scope context. Queued runs wait until the previous one finishes. */
let runMutex: Promise<void> = Promise.resolve();
/** Run one iteration of the given mission. */
export async function runMission(
missionId: string,
deps: MissionRunnerDeps
): Promise<RunMissionResult> {
// Serialize mission runs so withAgentScope doesn't interleave.
let release: () => void;
const prev = runMutex;
runMutex = new Promise((r) => (release = r));
@ -155,6 +117,27 @@ export async function runMission(
}
}
/** Scan all active missions whose `nextRunAt` has passed and run them
* once each. Drives the foreground tick wired in `+layout.svelte`. */
export async function runDueMissions(
now: Date,
deps: MissionRunnerDeps
): Promise<RunMissionResult[]> {
const { listMissions } = await import('./store');
const due = await listMissions({ dueBefore: now.toISOString() });
const results: RunMissionResult[] = [];
for (const m of due) {
try {
results.push(await runMission(m.id, deps));
} catch (err) {
console.error(`[MissionRunner] mission ${m.id} run threw:`, err);
}
}
return results;
}
// ─── Implementation ─────────────────────────────────────────────────
async function runMissionInner(
missionId: string,
deps: MissionRunnerDeps
@ -165,15 +148,9 @@ async function runMissionInner(
throw new Error(`Mission ${missionId} is ${mission.state}, cannot run`);
}
// Start the iteration with an empty plan so it's visible in the UI as "running".
// Use the id the store generates so finishIteration updates the same row.
const startedIteration = await startIteration(mission.id, { plan: [] });
const iterationId = startedIteration.id;
// Resolve the owning agent. Missions that pre-date the Multi-Agent
// rollout or whose agent was deleted fall back to the legacy
// principal + default name — runner still attributes cleanly, UI
// renders the work as "Mana".
const owningAgent = mission.agentId ? await getAgent(mission.agentId) : null;
const aiActor = makeAgentActor({
agentId: owningAgent?.id ?? LEGACY_AI_PRINCIPAL,
@ -183,8 +160,6 @@ async function runMissionInner(
rationale: mission.objective,
});
// Hard timeout: any phase taking longer than ITERATION_TIMEOUT_MS aborts
// the run. Wraps the whole pipeline in a Promise.race against a timer.
const timeoutPromise = new Promise<never>((_, reject) =>
setTimeout(
() => reject(new CancelledError(`timeout after ${ITERATION_TIMEOUT_MS / 1000}s`)),
@ -198,9 +173,6 @@ async function runMissionInner(
}
}
// Track the phase that was last active — so a catch handler can
// attribute the error ("calling-llm" vs "parsing-response" is
// enough context for most debugging without a stack trace).
let lastPhase: import('@mana/shared-ai').IterationPhase | undefined;
async function enterPhase(
phase: import('@mana/shared-ai').IterationPhase,
@ -210,6 +182,8 @@ async function runMissionInner(
await setIterationPhase(mission!.id, iterationId, phase, detail);
}
const runToolCall = deps.executeTool ?? executeTool;
async function runPipeline(): Promise<{
recordedSteps: PlanStep[];
stagedCount: number;
@ -226,16 +200,7 @@ async function runMissionInner(
const resolvedInputs: ResolvedInput[] = [...baseInputs];
const preStep: AiDebugEntry['preStep'] = { kontextInjected: false };
// User context and agent kontext are available as explicit mission
// inputs via the input picker — no auto-inject. The user decides
// what context the AI sees.
// Pre-step web research: if the objective looks like research,
// run the deep-research pipeline (mana-search + mana-llm) and
// attach the summary + sources so the planner can decide which
// to save via save_news_article. Failures are non-fatal — we
// inject a synthetic "research failed" input instead so the
// planner doesn't hallucinate that the search ran.
// Pre-step web research (unchanged from pre-migration).
if (RESEARCH_TRIGGER.test(mission!.objective)) {
await enterPhase('resolving-inputs', 'Web-Recherche…');
try {
@ -274,222 +239,77 @@ async function runMissionInner(
const availableTools = getAvailableToolsForAi(aiActor);
await checkCancel();
// ── Reasoning loop ─────────────────────────────────────
// Each pass: call planner → stage steps. Auto-tools run inline
// and their outputs become new ResolvedInputs so the NEXT planner
// call can reason over them (e.g. list_notes → see titles →
// stage add_tag_to_note per note). Loop exits when:
// • planner returns 0 steps → agent is done
// • any step requires user approval (propose) → user in the loop
// • budget exhausted (MAX_REASONING_LOOP_ITERATIONS)
// • a step fails hard (not tool-error; executor error)
const stage = deps.stageStep ?? defaultStageStep;
const loopInputs: ResolvedInput[] = [...resolvedInputs];
const recordedSteps: PlanStep[] = [];
const plannerCalls: PlannerCallDebug[] = [];
const loopStepLog: NonNullable<AiDebugEntry['loopSteps']> = [];
let stagedCount = 0;
let failedCount = 0;
let lastPlanSummary = '';
let totalStepCount = 0;
let loopIndex = 0;
let stepCounter = 0;
let humanInLoop = false;
while (loopIndex < MAX_REASONING_LOOP_ITERATIONS) {
// ── Phase: calling-llm ─────────────────────────────
await enterPhase(
'calling-llm',
loopIndex === 0
? 'frage Planner an'
: `Planner Runde ${loopIndex + 1}/${MAX_REASONING_LOOP_ITERATIONS}`
);
let plan: AiPlanOutput;
// Streaming: show live token progress while waiting for the
// planner response. Throttled to avoid Dexie write floods.
let streamTokenCount = 0;
let lastStreamWrite = 0;
const roundLabel = loopIndex === 0 ? '' : ` (Runde ${loopIndex + 1})`;
const onToken = (_delta: string) => {
streamTokenCount++;
const now = Date.now();
if (now - lastStreamWrite < STREAMING_PHASE_THROTTLE_MS) return;
lastStreamWrite = now;
void setIterationPhase(
mission!.id,
iterationId,
'calling-llm',
`empfange Plan${roundLabel}${streamTokenCount} tokens`
);
};
// ── Guardrail: pre-plan ────────────────────────
const planInput: AiPlanInput = {
mission: mission!,
resolvedInputs: loopInputs,
availableTools,
onToken,
};
const prePlanCheck = runPrePlanGuardrails(planInput);
if (!prePlanCheck.passed) {
throw new Error(`Guardrail blocked: ${prePlanCheck.blockReason}`);
}
try {
plan = await deps.plan(planInput);
} catch (err) {
if (isAiDebugEnabled()) {
void recordAiDebug({
iterationId,
missionId: mission!.id,
missionTitle: mission!.title,
missionObjective: mission!.objective,
capturedAt: new Date().toISOString(),
resolvedInputs: loopInputs,
preStep,
plannerCalls,
loopSteps: loopStepLog,
plannerError: err instanceof Error ? err.message : String(err),
});
}
throw err;
}
await checkCancel();
if (plan.debug) plannerCalls.push(plan.debug);
lastPlanSummary = plan.summary;
totalStepCount += plan.steps.length;
if (plan.steps.length === 0) {
// Planner has nothing more to do — agent considers this done.
break;
}
// ── Guardrail: post-plan ──────────────────────────
const postPlanCheck = runPostPlanGuardrails(planInput, plan);
if (!postPlanCheck.passed) {
throw new Error(`Guardrail blocked plan: ${postPlanCheck.blockReason}`);
}
// ── Phase: parsing-response ────────────────────────
await enterPhase('parsing-response', `${plan.steps.length} Step(s) erhalten`);
await checkCancel();
// ── Phase: staging-proposals ───────────────────────
const roundOutputs: Array<{ step: PlannedStep; message: string; data: unknown }> = [];
for (const [i, ps] of plan.steps.entries()) {
await enterPhase(
'staging-proposals',
`Runde ${loopIndex + 1} · Step ${i + 1}/${plan.steps.length}`
);
await checkCancel();
// ── Guardrail: pre-execute ─────────────────────
const execCheck = runPreExecuteGuardrails(ps);
if (!execCheck.passed) {
failedCount++;
const stepId = `${iterationId}-${stepCounter++}`;
recordedSteps.push({
id: stepId,
summary: `Guardrail: ${execCheck.blockReason}`,
intent: { kind: 'toolCall', toolName: ps.toolName, params: ps.params },
status: 'failed',
});
continue;
}
const stepId = `${iterationId}-${stepCounter++}`;
let outcome: StageOutcome;
try {
outcome = await stage(ps, aiActor);
} catch (err) {
// Tool threw an unhandled exception (Dexie error, vault locked,
// network timeout, etc.). Record the step as failed and continue
// with the next step so one broken tool doesn't abort the entire
// iteration. The error message surfaces in the iteration plan.
const errMsg = err instanceof Error ? err.message : String(err);
console.error(`[MissionRunner] step ${ps.toolName} threw:`, err);
failedCount++;
recordedSteps.push({
id: stepId,
summary: `${ps.summary} (FEHLER: ${errMsg.slice(0, 100)})`,
intent: { kind: 'toolCall', toolName: ps.toolName, params: ps.params },
status: 'failed',
});
continue;
}
if (!outcome.ok) {
failedCount++;
recordedSteps.push({
id: stepId,
summary: ps.summary,
intent: { kind: 'toolCall', toolName: ps.toolName, params: ps.params },
status: 'failed',
});
continue;
}
stagedCount++;
if (outcome.proposalId) {
// Propose-policy: human must approve. Exit the loop after
// this round so we don't stage proposals for hypothetical
// follow-up steps that depend on the approval outcome.
humanInLoop = true;
recordedSteps.push({
id: stepId,
summary: ps.summary,
intent: { kind: 'toolCall', toolName: ps.toolName, params: ps.params },
proposalId: outcome.proposalId,
status: 'staged',
});
} else {
// Auto-policy: ran inline. Collect output for the next
// planner call.
recordedSteps.push({
id: stepId,
summary: ps.summary,
intent: { kind: 'toolCall', toolName: ps.toolName, params: ps.params },
status: 'approved',
});
roundOutputs.push({
step: ps,
message: outcome.autoMessage ?? '(ohne message)',
data: outcome.autoData,
});
}
}
// Log loop outputs for debug-panel visibility.
for (const o of roundOutputs) {
loopStepLog.push({
loopIndex,
toolName: o.step.toolName,
params: o.step.params,
outputPreview: formatToolOutputPreview(o.message, o.data),
});
}
if (humanInLoop) break;
if (roundOutputs.length === 0) {
// Every step either failed or was proposed — nothing new to
// reason over. Prevents an infinite loop when the planner
// only suggests proposable tools that keep failing.
break;
}
// Feed tool outputs into the next planner call as a synthetic
// ResolvedInput so the agent can chain its reasoning.
loopInputs.push({
id: `loop-outputs-${loopIndex}`,
module: 'reasoning-loop',
table: 'tool-outputs',
title: `Zwischenergebnisse (Runde ${loopIndex + 1})`,
content: formatToolOutputsForPrompt(roundOutputs),
});
loopIndex++;
// Pre-plan guardrail (kept — catches prompt-injection in resolved inputs etc.).
const prePlanCheck = runPrePlanGuardrails({
mission: mission!,
resolvedInputs,
availableTools,
});
if (!prePlanCheck.passed) {
throw new Error(`Guardrail blocked: ${prePlanCheck.blockReason}`);
}
// ── Phase: calling-llm / reasoning loop ────────────────
await enterPhase('calling-llm', 'Planner…');
const { systemPrompt, userPrompt } = buildSystemPrompt({
mission: mission!,
resolvedInputs,
agentSystemPrompt: owningAgent?.systemPrompt ?? null,
agentMemory: owningAgent?.memory ?? null,
});
const loopResult = await runPlannerLoop({
llm: deps.llm,
input: {
systemPrompt,
userPrompt,
tools: availableTools,
model: deps.model ?? 'google/gemini-2.5-flash',
maxRounds: MAX_PLANNER_ROUNDS,
},
onToolCall: async (call: ToolCallRequest): Promise<ToolResult> => {
await checkCancel();
await enterPhase('staging-proposals', call.name);
// Pre-execute guardrail per call. Failures come back as
// tool-messages so the LLM can choose a different path.
const execCheck = runPreExecuteGuardrails({
summary: call.name,
toolName: call.name,
params: call.arguments,
rationale: mission!.objective,
});
if (!execCheck.passed) {
return {
success: false,
message: `Guardrail: ${execCheck.blockReason}`,
};
}
try {
return await runToolCall(call.name, call.arguments, aiActor);
} catch (err) {
const msg = err instanceof Error ? err.message : String(err);
console.error(`[MissionRunner] tool ${call.name} threw:`, err);
return { success: false, message: `Tool execution failed: ${msg}` };
}
},
});
await checkCancel();
// Build the persisted plan from the loop's executed calls.
const recordedSteps: PlanStep[] = loopResult.executedCalls.map((ec, i) => ({
id: `${iterationId}-${i}`,
summary: renderStepSummary(ec.call, ec.result),
intent: {
kind: 'toolCall',
toolName: ec.call.name,
params: ec.call.arguments,
},
status: ec.result.success ? 'approved' : 'failed',
}));
if (isAiDebugEnabled()) {
void recordAiDebug({
iterationId,
@ -497,25 +317,33 @@ async function runMissionInner(
missionTitle: mission!.title,
missionObjective: mission!.objective,
capturedAt: new Date().toISOString(),
resolvedInputs: loopInputs,
resolvedInputs,
preStep,
plannerCalls,
loopSteps: loopStepLog,
rounds: loopResult.rounds,
stopReason: loopResult.stopReason,
messages: loopResult.messages as ChatMessage[],
});
}
await enterPhase('finalizing');
const failedCount = recordedSteps.filter((s) => s.status === 'failed').length;
const planSummary =
loopResult.summary ??
(recordedSteps.length === 0
? 'Keine Tool-Aufrufe — Mission hat nichts zu tun'
: `${recordedSteps.length} Tool-Aufrufe ausgeführt (${failedCount} Fehler).`);
return {
recordedSteps,
stagedCount,
stagedCount: recordedSteps.length,
failedCount,
planSummary: lastPlanSummary,
planStepCount: totalStepCount,
planSummary,
planStepCount: recordedSteps.length,
};
}
let recordedSteps: PlanStep[] = [];
let stagedCount = 0;
let failedCount = 0;
let planSummary = '';
let planStepCount = 0;
@ -525,7 +353,6 @@ async function runMissionInner(
timeoutPromise,
]);
recordedSteps = result.recordedSteps;
stagedCount = result.stagedCount;
failedCount = result.failedCount;
planSummary = result.planSummary;
planStepCount = result.planStepCount;
@ -545,14 +372,10 @@ async function runMissionInner(
return emptyResult(mission, iterationId, 'failed', msg);
}
// Status: everything executed → 'approved'. Some failures but not all → still 'approved'
// (the user can revert). Only wholesale failure or zero progress is 'failed'.
const overallStatus: MissionIteration['overallStatus'] =
planStepCount === 0
? 'approved' // nothing to do is a valid outcome
: failedCount === planStepCount
? 'failed'
: stagedCount > 0
? 'awaiting-review'
: 'approved';
planStepCount === 0 ? 'approved' : failedCount === planStepCount ? 'failed' : 'approved';
await finishIteration(mission.id, iterationId, {
summary: planSummary,
@ -569,11 +392,20 @@ async function runMissionInner(
overallStatus,
},
plannedSteps: planStepCount,
stagedSteps: stagedCount,
stagedSteps: planStepCount,
failedSteps: failedCount,
};
}
// ─── Helpers ────────────────────────────────────────────────────────
function renderStepSummary(call: ToolCallRequest, result: ToolResult): string {
if (!result.success) {
return `${call.name} (FEHLER: ${result.message.slice(0, 120)})`;
}
return result.message || call.name;
}
function emptyResult(
_mission: Mission,
iterationId: string,
@ -594,92 +426,6 @@ function emptyResult(
};
}
/** Read the kontext singleton + decrypt; returns null if empty/missing. */
async function loadKontextAsResolvedInput(): Promise<ResolvedInput | null> {
try {
const local = await db
.table<{ id: string; content?: string; deletedAt?: string }>('kontextDoc')
.get(KONTEXT_SINGLETON_ID);
if (!local || local.deletedAt) return null;
const [decrypted] = await decryptRecords('kontextDoc', [local]);
const content = decrypted?.content?.trim();
if (!content) return null;
return {
id: KONTEXT_SINGLETON_ID,
module: 'kontext',
table: 'kontextDoc',
title: 'Kontext (Standing)',
content,
};
} catch (err) {
console.warn('[MissionRunner] kontext auto-inject failed:', err);
return null;
}
}
/** Load the agent-specific kontext doc. Falls back to null (caller
* may then fall back to the global singleton if desired). */
/** Load the agent-specific kontext doc. Returns null when the agent
* has no dedicated doc (does NOT fall back to the global singleton
* kontext injection is explicit via the input picker, not auto). */
async function loadAgentKontextAsResolvedInput(agentId: string): Promise<ResolvedInput | null> {
try {
const doc = await getAgentKontext(agentId);
if (!doc) return null;
return {
id: doc.id,
module: 'kontext',
table: 'agentKontextDocs',
title: 'Agent-Kontext',
content: doc.content,
};
} catch (err) {
console.warn('[MissionRunner] agent kontext load failed:', err);
return null;
}
}
/** Run the deep-research pipeline against the mission objective and
* collapse its summary + sources into one ResolvedInput formatted so
* the planner can copy URLs into save_news_article calls. */
/** Stringify a tool-output payload for the reasoning loop's next
* prompt. Keeps the blob compact LLM context windows are finite and
* a raw JSON.stringify of a 200-row Dexie dump wastes tokens. */
function formatToolOutputsForPrompt(
outputs: Array<{ step: PlannedStep; message: string; data: unknown }>
): string {
const lines: string[] = [
'Ausgaben der zuletzt ausgeführten Auto-Tools. Nutze diese Daten um die Mission weiterzuführen — z.B. für jede gelistete Notiz einen add_tag_to_note Aufruf pro Notiz.',
'',
];
for (const o of outputs) {
lines.push(`### ${o.step.toolName}(${JSON.stringify(o.step.params)})`);
lines.push(o.message);
if (o.data !== undefined && o.data !== null) {
const json = safeStringify(o.data, 4000);
lines.push('```json', json, '```');
}
lines.push('');
}
return lines.join('\n');
}
/** Short form for the debug-panel loopSteps log. */
function formatToolOutputPreview(message: string, data: unknown): string {
if (data === undefined || data === null) return message;
const json = safeStringify(data, 400);
return `${message}\n${json}`;
}
function safeStringify(value: unknown, limit: number): string {
try {
const s = JSON.stringify(value, null, 2);
return s.length > limit ? s.slice(0, limit) + '\n… (truncated)' : s;
} catch {
return String(value);
}
}
interface WebResearchOutcome {
input: ResolvedInput;
sourceCount: number;
@ -689,9 +435,6 @@ interface WebResearchOutcome {
async function runWebResearch(mission: Mission): Promise<WebResearchOutcome | null> {
// RSS-based news research via news-research module: discoverByQuery
// finds matching feeds, searchFeeds ranks recent articles by relevance.
// Robust (own infra, no external SearXNG dependency), free (no credits),
// and the documented happy-path for the AI companion's news flow.
// Detect language hint from objective: German chars/words → de, else en.
const objective = mission.objective;
const isGerman = /[äöüß]|recherchier|aktuelle|neueste|finde|suche/i.test(objective);
const language = isGerman ? 'de' : 'en';
@ -699,9 +442,6 @@ async function runWebResearch(mission: Mission): Promise<WebResearchOutcome | nu
const discovered = await discoverByQuery(objective, language);
const feedUrls = discovered.feeds.slice(0, 10).map((f) => f.url);
if (feedUrls.length === 0) {
// No feeds discovered — surface as failure so the planner doesn't
// pretend it has data. Caller wraps this in a "research failed"
// ResolvedInput.
throw new Error(
`news-research: keine RSS-Feeds für "${objective}" gefunden (${discovered.searched ?? 0} Quellen abgesucht).`
);
@ -732,8 +472,6 @@ async function runWebResearch(mission: Mission): Promise<WebResearchOutcome | nu
return {
input: {
// Stable ID so re-running the same mission replaces the prior
// research input instead of appending duplicates.
id: `news-research-${mission.id}`,
module: 'news-research',
table: 'rssArticles',
@ -744,25 +482,3 @@ async function runWebResearch(mission: Mission): Promise<WebResearchOutcome | nu
summary: `${articles.length} Artikel aus ${feedUrls.length} Feeds.`,
};
}
/**
* Scan all active missions whose `nextRunAt` has passed and run them once
* each. Used by the foreground tick that wires this into `+layout.svelte`.
* Safe to call concurrently each mission run is independent.
*/
export async function runDueMissions(
now: Date,
deps: MissionRunnerDeps
): Promise<RunMissionResult[]> {
const { listMissions } = await import('./store');
const due = await listMissions({ dueBefore: now.toISOString() });
const results: RunMissionResult[] = [];
for (const m of due) {
try {
results.push(await runMission(m.id, deps));
} catch (err) {
console.error(`[MissionRunner] mission ${m.id} run threw:`, err);
}
}
return results;
}

View file

@ -1,8 +1,8 @@
/**
* Production wiring for the Mission Runner.
*
* Connects the dependency-injected `runMission` to the real LlmOrchestrator
* (via `aiPlanTask`) and drives `runDueMissions` on a foreground interval.
* Connects the dependency-injected runner to the real mana-llm client
* and drives `runDueMissions` on a foreground interval.
*
* Use pattern:
*
@ -10,14 +10,12 @@
* import { startMissionTick } from '$lib/data/ai/missions/setup';
* onMount(() => startMissionTick());
*
* The tick is intentionally foreground-only: the Runner requires the
* LlmOrchestrator which needs WebGPU / network. A background service for
* offline-of-tab execution is tracked as Phase 7 see
* COMPANION_BRAIN_ARCHITECTURE.md §20.5.
* The tick is intentionally foreground-only for now a background
* service worker for offline-of-tab execution is tracked as Phase 7;
* see COMPANION_BRAIN_ARCHITECTURE.md §20.5.
*/
import { llmOrchestrator } from '@mana/shared-llm';
import { aiPlanTask } from '$lib/llm-tasks/ai-plan';
import { createManaLlmClient } from './llm-client';
import { runDueMissions, type MissionRunnerDeps } from './runner';
import { registerDefaultInputResolvers } from './default-resolvers';
import { runAgentsBootstrap } from '../agents/bootstrap';
@ -29,20 +27,13 @@ import { runAgentsBootstrap } from '../agents/bootstrap';
import '$lib/modules/meditate/seed';
import '$lib/modules/habits/seed';
import '$lib/companion/goals/seed';
import type { AiPlanInput, AiPlanOutput } from './planner/types';
/** Default interval between tick scans. One minute is fine for foreground use. */
const DEFAULT_TICK_INTERVAL_MS = 60_000;
/** Swap-in planner that routes through the real LLM orchestrator. */
const productionPlan = async (input: AiPlanInput): Promise<AiPlanOutput> => {
const result = await llmOrchestrator.run(aiPlanTask, input);
return result.value;
};
export const productionDeps: MissionRunnerDeps = {
plan: productionPlan,
// stageStep defaults to the policy-gated executor — nothing to override here.
llm: createManaLlmClient(),
// model + executeTool defaults handled inside the runner.
};
let tickHandle: ReturnType<typeof setInterval> | null = null;

View file

@ -1,22 +1,21 @@
/**
* Tool Executor validates parameters, resolves AI policy, and runs or
* stages the tool by name.
* Tool Executor validates parameters, resolves AI policy, runs the tool.
*
* Call paths:
* - User action from the UI: `executeTool(name, params)` with no actor
* ambient `USER_ACTOR`, policy returns `auto`, tool runs directly.
* - AI in the companion orchestrator: `executeTool(name, params, aiActor)`
* policy resolves per-tool; `propose` writes a Proposal and returns
* a success result carrying the proposal id, `auto` executes, `deny`
* refuses.
* - Approval path: proposal store calls `executeToolRaw(name, params)`
* under `runAsAsync(aiActor, ...)` same validation, but no policy.
* Policy semantics post-migration to native function-calling:
* - `auto` execute directly under the actor's scope
* - `deny` refuse with a ToolResult error (the runner turns this into
* a tool-message the LLM can react to)
*
* There is no proposal/approval gate in this pipeline anymore; the
* Workbench Timeline plus per-iteration Revert is the user's review
* surface. Tools flagged as `propose` in the catalog are treated as
* `auto` here the distinction only matters as legacy metadata that
* higher layers (UI, analytics) may still read.
*/
import { getTool } from './registry';
import { runAsAsync, USER_ACTOR } from '../events/actor';
import { resolvePolicy } from '../ai/policy';
import { createProposal } from '../ai/proposals/store';
import { getAgent } from '../ai/agents/store';
import type { Actor } from '../events/actor';
import type { AiPolicy } from '@mana/shared-ai';
@ -37,11 +36,9 @@ export async function executeTool(
const effectiveActor: Actor = actor ?? USER_ACTOR;
// Multi-Agent Workbench (Phase 4): policy lives on the agent. When
// the actor is AI, look up the owning agent and use its policy. If
// the agent record is missing (legacy write, deleted agent, race),
// resolvePolicy falls back to the user-level DEFAULT_AI_POLICY via
// its optional-argument default.
// Agent-scoped policy: the AI actor may have a per-agent policy
// override. If the agent record is missing (deleted / legacy /
// race), resolvePolicy falls back to the user-level default.
let agentPolicy: AiPolicy | undefined;
if (effectiveActor.kind === 'ai') {
const agent = await getAgent(effectiveActor.principalId);
@ -56,25 +53,7 @@ export async function executeTool(
};
}
if (decision === 'propose') {
// Only ai actors can hit `propose` — resolvePolicy short-circuits
// user/system to `auto`. Narrow defensively in case policy is swapped.
if (effectiveActor.kind !== 'ai') {
return { success: false, message: `propose policy requires an AI actor` };
}
const proposal = await createProposal({
actor: effectiveActor,
intent: { kind: 'toolCall', toolName: name, params },
rationale: effectiveActor.rationale,
});
return {
success: true,
data: { proposalId: proposal.id, status: 'pending' },
message: `Vorgeschlagen: "${name}" wartet auf Freigabe.`,
};
}
// decision === 'auto'
// `auto` or `propose` both execute here — see file-level comment.
return runAsAsync(effectiveActor, () => runValidatedTool(tool, params));
}