mirror of
https://github.com/Memo-2023/mana-monorepo.git
synced 2026-05-14 21:01:08 +02:00
feat(webapp): mission runner on native function calling, tools execute directly
The runner now drives runPlannerLoop from @mana/shared-ai: the LLM emits native tool_calls via mana-llm's tools passthrough, we execute each call immediately under the AI actor, and feed the result back as a tool-message for the next turn. The reasoning loop still runs up to 5 rounds (same budget as before) but needs no hand-rolled re-prompting because the SDK-level tool-message exchange does that for us. Tool execution is direct — no Proposal staging. The executor's propose branch collapses into auto (proposal store calls stay in place for legacy consumers this commit doesn't touch; those go next). Agent- level deny still refuses and surfaces the refusal as a tool-message the LLM can react to. New surface: - missions/llm-client.ts — mana-llm HTTP adapter conforming to shared- ai's LlmClient. Posts /v1/chat/completions with tools + tool_choice, converts OpenAI-shape tool_calls back to our ToolCallRequest shape. - runner.ts shrinks from ~770 to ~410 lines — pre-step research, guardrails, agent scope, timeout, cancel, debug capture all kept. - debug.ts stores rawMessages[] (shared-ai ChatMessage) instead of plannerCalls[]/loopSteps. AiDebugBlock renders the chat transcript. - available-tools.ts returns ToolSchema[] directly so the runner can hand the array to runPlannerLoop unchanged. - setup.ts wires createManaLlmClient() instead of aiPlanTask + llmOrchestrator. The old aiPlanTask + planner/ re-export files remain orphaned for the next commit to delete. Test shape: MockLlmClient scriptable via enqueue-style turns. Three cases cover happy path, empty-plan stop, and tool-failure propagation. Dead-but-still-compiling afterwards: the proposals folder, the AiProposalInbox component + its 9 call-sites, server-iteration- staging.ts, ai-plan.ts, the legacy planner/ wrappers, and the old buildPlannerPrompt/parsePlannerResponse exports in shared-ai. These go in commits 5b/5c/5d. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
5af23d30b6
commit
5af96bfeff
8 changed files with 519 additions and 748 deletions
|
|
@ -44,13 +44,14 @@
|
|||
{:else if d.preStep.webResearch && !d.preStep.webResearch.ok}
|
||||
· Web ❌
|
||||
{/if}
|
||||
{#if d.plannerCalls && d.plannerCalls.length > 0}
|
||||
· {d.plannerCalls.length}× LLM · {Math.round(
|
||||
d.plannerCalls.reduce((a, c) => a + c.latencyMs, 0)
|
||||
)}ms
|
||||
{#if d.rounds}
|
||||
· {d.rounds} Runde{d.rounds === 1 ? '' : 'n'}
|
||||
{/if}
|
||||
{#if d.loopSteps && d.loopSteps.length > 0}
|
||||
· {d.loopSteps.length}× Auto-Tool
|
||||
{#if d.messages}
|
||||
· {d.messages.length} Messages
|
||||
{/if}
|
||||
{#if d.stopReason && d.stopReason !== 'assistant-stop'}
|
||||
· {d.stopReason}
|
||||
{/if}
|
||||
{#if d.plannerError}· Planner ❌{/if}
|
||||
</span>
|
||||
|
|
@ -93,41 +94,34 @@
|
|||
{/if}
|
||||
</section>
|
||||
|
||||
{#if d.loopSteps && d.loopSteps.length > 0}
|
||||
{#if d.messages && d.messages.length > 0}
|
||||
<section>
|
||||
<h5>Auto-Tool-Ausgaben (Reasoning-Loop)</h5>
|
||||
{#each d.loopSteps as ls, i (i)}
|
||||
<details class="nested">
|
||||
<h5>Chat-Verlauf ({d.messages.length} Messages · {d.rounds ?? '?'} Runden)</h5>
|
||||
{#each d.messages as m, i (i)}
|
||||
<details class="nested" open={m.role === 'assistant' || m.role === 'tool'}>
|
||||
<summary>
|
||||
<code>Runde {ls.loopIndex + 1}</code>
|
||||
{ls.toolName}({JSON.stringify(ls.params)})
|
||||
<code>{m.role}</code>
|
||||
{#if m.toolCalls && m.toolCalls.length > 0}
|
||||
tool_calls: {m.toolCalls.map((c) => c.name).join(', ')}
|
||||
{:else if m.toolCallId}
|
||||
tool_result (id: {m.toolCallId})
|
||||
{:else}
|
||||
{typeof m.content === 'string' ? m.content.slice(0, 100) : ''}
|
||||
{/if}
|
||||
</summary>
|
||||
<pre>{ls.outputPreview}</pre>
|
||||
{#if m.content}
|
||||
<pre>{m.content}</pre>
|
||||
{/if}
|
||||
{#if m.toolCalls && m.toolCalls.length > 0}
|
||||
{#each m.toolCalls as call (call.id)}
|
||||
<pre>{call.name}({JSON.stringify(call.arguments, null, 2)})</pre>
|
||||
{/each}
|
||||
{/if}
|
||||
</details>
|
||||
{/each}
|
||||
</section>
|
||||
{/if}
|
||||
|
||||
{#if d.plannerCalls && d.plannerCalls.length > 0}
|
||||
{#each d.plannerCalls as call, i (i)}
|
||||
<section>
|
||||
<h5>LLM-Call {i + 1}/{d.plannerCalls.length} · {Math.round(call.latencyMs)}ms</h5>
|
||||
<details class="nested">
|
||||
<summary>System Prompt</summary>
|
||||
<pre>{call.systemPrompt}</pre>
|
||||
</details>
|
||||
<details class="nested" open>
|
||||
<summary>User Prompt</summary>
|
||||
<pre>{call.userPrompt}</pre>
|
||||
</details>
|
||||
<details class="nested" open>
|
||||
<summary>Raw LLM Response</summary>
|
||||
<pre>{call.rawResponse}</pre>
|
||||
</details>
|
||||
</section>
|
||||
{/each}
|
||||
{/if}
|
||||
|
||||
{#if d.plannerError}
|
||||
<section>
|
||||
<h5>Planner Error</h5>
|
||||
|
|
|
|||
|
|
@ -1,30 +1,41 @@
|
|||
/**
|
||||
* Build the tool list the Planner is allowed to consider.
|
||||
*
|
||||
* Only tools the policy rates `auto` or `propose` are exposed — `deny` is
|
||||
* invisible to the AI. This is defence-in-depth: even if the Planner
|
||||
* hallucinates a denied tool name, the parser rejects it because the name
|
||||
* isn't in the allow-set, AND the executor would refuse at runtime.
|
||||
* Only tools the policy rates `auto` or `propose` are exposed — `deny`
|
||||
* is invisible to the AI. Defence-in-depth: even if the LLM somehow
|
||||
* names a denied tool, the executor refuses at runtime.
|
||||
*
|
||||
* Returns the shared ToolSchema shape directly so the runner can pass
|
||||
* the list straight into runPlannerLoop (which calls
|
||||
* toolsToFunctionSchemas internally).
|
||||
*/
|
||||
|
||||
import { getTools } from '../../tools/registry';
|
||||
import { resolvePolicy } from '../policy';
|
||||
import type { Actor } from '../../events/actor';
|
||||
import type { AvailableTool } from './planner/types';
|
||||
import type { ToolSchema } from '@mana/shared-ai';
|
||||
import { AI_TOOL_CATALOG_BY_NAME } from '@mana/shared-ai';
|
||||
|
||||
export function getAvailableToolsForAi(aiActor: Extract<Actor, { kind: 'ai' }>): AvailableTool[] {
|
||||
export function getAvailableToolsForAi(aiActor: Extract<Actor, { kind: 'ai' }>): ToolSchema[] {
|
||||
return getTools()
|
||||
.filter((tool) => resolvePolicy(tool.name, aiActor) !== 'deny')
|
||||
.map((tool) => ({
|
||||
name: tool.name,
|
||||
module: tool.module,
|
||||
description: tool.description,
|
||||
parameters: tool.parameters.map((p) => ({
|
||||
name: p.name,
|
||||
type: p.type,
|
||||
required: p.required,
|
||||
description: p.description,
|
||||
enum: p.enum,
|
||||
})),
|
||||
}));
|
||||
.map((tool) => {
|
||||
// Prefer the catalog entry when available — it carries the
|
||||
// defaultPolicy we need on ToolSchema. Tools without a catalog
|
||||
// entry (playground / test-only) fall back to 'auto'.
|
||||
const catalogEntry = AI_TOOL_CATALOG_BY_NAME.get(tool.name);
|
||||
return {
|
||||
name: tool.name,
|
||||
module: tool.module,
|
||||
description: tool.description,
|
||||
defaultPolicy: catalogEntry?.defaultPolicy ?? 'auto',
|
||||
parameters: tool.parameters.map((p) => ({
|
||||
name: p.name,
|
||||
type: p.type,
|
||||
required: p.required,
|
||||
description: p.description,
|
||||
enum: p.enum,
|
||||
})),
|
||||
};
|
||||
});
|
||||
}
|
||||
|
|
|
|||
|
|
@ -14,7 +14,7 @@
|
|||
|
||||
import { useLiveQueryWithDefault } from '@mana/local-store/svelte';
|
||||
import { db } from '../../database';
|
||||
import type { ResolvedInput } from './planner/types';
|
||||
import type { ChatMessage, LoopStopReason, ResolvedInput } from '@mana/shared-ai';
|
||||
|
||||
const TABLE = '_aiDebugLog';
|
||||
const STORAGE_KEY = 'mana.ai.debug';
|
||||
|
|
@ -26,19 +26,6 @@ const MAX_AGE_MS = 7 * 24 * 60 * 60 * 1000; // 7 days
|
|||
* is truncated to reduce the privacy surface if the device is stolen. */
|
||||
const INPUT_CONTENT_LIMIT = 500;
|
||||
|
||||
/**
|
||||
* Captured by `aiPlanTask` and passed back via the planner output so the
|
||||
* runner can record it without the planner needing to know about Dexie.
|
||||
*/
|
||||
export interface PlannerCallDebug {
|
||||
readonly systemPrompt: string;
|
||||
readonly userPrompt: string;
|
||||
readonly rawResponse: string;
|
||||
readonly latencyMs: number;
|
||||
readonly backendId?: string;
|
||||
readonly model?: string;
|
||||
}
|
||||
|
||||
export interface AiDebugEntry {
|
||||
/** Primary key — one row per iteration. */
|
||||
iterationId: string;
|
||||
|
|
@ -51,22 +38,14 @@ export interface AiDebugEntry {
|
|||
webResearch?: { ok: true; sourceCount: number; summary: string } | { ok: false; error: string };
|
||||
kontextInjected: boolean;
|
||||
};
|
||||
/**
|
||||
* Array because the reasoning loop can call the planner multiple
|
||||
* times per iteration (once per loop step, until a proposal is
|
||||
* staged or no more work is returned). Older single-call entries
|
||||
* written before the loop shipped still parse — readers that
|
||||
* haven't updated simply take `plannerCalls[0]`.
|
||||
*/
|
||||
plannerCalls?: PlannerCallDebug[];
|
||||
/** Auto-executed tool outputs captured across loop steps — surfaces
|
||||
* what the agent "saw" when reasoning across multiple calls. */
|
||||
loopSteps?: Array<{
|
||||
loopIndex: number;
|
||||
toolName: string;
|
||||
params: Record<string, unknown>;
|
||||
outputPreview: string;
|
||||
}>;
|
||||
/** Full chat history of the planner loop: system + user + every
|
||||
* assistant turn (with tool_calls) + every tool-message result.
|
||||
* Replaces the pre-migration plannerCalls[]/loopSteps structure. */
|
||||
messages?: ChatMessage[];
|
||||
/** Number of planner rounds consumed inside this iteration. */
|
||||
rounds?: number;
|
||||
/** Why the loop terminated (assistant-stop, max-rounds, …). */
|
||||
stopReason?: LoopStopReason;
|
||||
plannerError?: string;
|
||||
}
|
||||
|
||||
|
|
|
|||
189
apps/mana/apps/web/src/lib/data/ai/missions/llm-client.ts
Normal file
189
apps/mana/apps/web/src/lib/data/ai/missions/llm-client.ts
Normal file
|
|
@ -0,0 +1,189 @@
|
|||
/**
|
||||
* LlmClient implementation that speaks to the mana-llm service using its
|
||||
* OpenAI-compatible /v1/chat/completions endpoint, with native tool_calls
|
||||
* passthrough. Used by the webapp Mission Runner and Companion Chat to
|
||||
* drive the shared runPlannerLoop from @mana/shared-ai.
|
||||
*
|
||||
* The shared-ai LlmClient contract is intentionally small — we don't go
|
||||
* through the LlmOrchestrator's tier routing here. Tool calling needs
|
||||
* a specific server-proxied path (mana-llm forwards to Google / OpenAI
|
||||
* / Ollama with tools enabled), not the legacy text-JSON orchestrator.
|
||||
* Tier integration can come later once shared-llm grows tool-call
|
||||
* awareness.
|
||||
*/
|
||||
|
||||
import {
|
||||
type ChatMessage,
|
||||
type LlmClient,
|
||||
type LlmCompletionRequest,
|
||||
type LlmCompletionResponse,
|
||||
type LlmFinishReason,
|
||||
type ToolCallRequest,
|
||||
} from '@mana/shared-ai';
|
||||
|
||||
const DEFAULT_LLM_URL = 'http://localhost:3025';
|
||||
|
||||
/** Resolve the mana-llm base URL from window-injected env; falls back
|
||||
* to localhost. Mirrors the helper in @mana/shared-llm's remote.ts. */
|
||||
function resolveLlmBaseUrl(): string {
|
||||
if (typeof window !== 'undefined') {
|
||||
const fromWindow = (window as unknown as { __PUBLIC_MANA_LLM_URL__?: string })
|
||||
.__PUBLIC_MANA_LLM_URL__;
|
||||
if (fromWindow) return fromWindow.replace(/\/$/, '');
|
||||
}
|
||||
return DEFAULT_LLM_URL;
|
||||
}
|
||||
|
||||
export interface ManaLlmClientOptions {
|
||||
/** Default model id used when callers don't override per request.
|
||||
* Format matches mana-llm's provider/model syntax. */
|
||||
readonly defaultModel?: string;
|
||||
/** Override the base URL — mostly for tests. Production resolves from
|
||||
* window.__PUBLIC_MANA_LLM_URL__. */
|
||||
readonly baseUrl?: string;
|
||||
/** Hard stop for the fetch. The runner wraps runPlannerLoop in its
|
||||
* own iteration-level timeout (180 s) so this is mostly a belt +
|
||||
* braces for pathological provider stalls. */
|
||||
readonly fetchTimeoutMs?: number;
|
||||
}
|
||||
|
||||
const DEFAULT_MODEL = 'google/gemini-2.5-flash';
|
||||
const DEFAULT_FETCH_TIMEOUT_MS = 120_000;
|
||||
|
||||
export function createManaLlmClient(opts: ManaLlmClientOptions = {}): LlmClient {
|
||||
const baseUrl = (opts.baseUrl ?? resolveLlmBaseUrl()).replace(/\/$/, '');
|
||||
const defaultModel = opts.defaultModel ?? DEFAULT_MODEL;
|
||||
const fetchTimeoutMs = opts.fetchTimeoutMs ?? DEFAULT_FETCH_TIMEOUT_MS;
|
||||
|
||||
return {
|
||||
async complete(req: LlmCompletionRequest): Promise<LlmCompletionResponse> {
|
||||
const url = `${baseUrl}/v1/chat/completions`;
|
||||
const controller = new AbortController();
|
||||
const timeout = setTimeout(() => controller.abort(), fetchTimeoutMs);
|
||||
|
||||
const body = {
|
||||
model: req.model || defaultModel,
|
||||
messages: req.messages.map(toWireMessage),
|
||||
tools: req.tools, // already in OpenAI {type, function} shape
|
||||
tool_choice: 'auto' as const,
|
||||
temperature: req.temperature ?? 0.3,
|
||||
stream: false,
|
||||
};
|
||||
|
||||
let res: Response;
|
||||
try {
|
||||
res = await fetch(url, {
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify(body),
|
||||
signal: controller.signal,
|
||||
});
|
||||
} catch (err) {
|
||||
clearTimeout(timeout);
|
||||
const msg = err instanceof Error ? err.message : String(err);
|
||||
throw new Error(`mana-llm unreachable at ${baseUrl}: ${msg}`);
|
||||
}
|
||||
clearTimeout(timeout);
|
||||
|
||||
if (!res.ok) {
|
||||
let detail: unknown;
|
||||
try {
|
||||
detail = await res.json();
|
||||
} catch {
|
||||
detail = await res.text().catch(() => '');
|
||||
}
|
||||
throw new Error(
|
||||
`mana-llm ${res.status}: ${typeof detail === 'string' ? detail : JSON.stringify(detail)}`
|
||||
);
|
||||
}
|
||||
|
||||
const data = (await res.json()) as ChatCompletionResponseShape;
|
||||
const choice = data.choices?.[0];
|
||||
if (!choice) {
|
||||
throw new Error('mana-llm response had no choices');
|
||||
}
|
||||
const content = choice.message?.content ?? null;
|
||||
const toolCalls = (choice.message?.tool_calls ?? []).map(fromWireToolCall);
|
||||
const finishReason = normaliseFinishReason(choice.finish_reason);
|
||||
|
||||
return { content, toolCalls, finishReason };
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
// ── Wire-format helpers ─────────────────────────────────────────────
|
||||
|
||||
interface WireMessage {
|
||||
role: 'system' | 'user' | 'assistant' | 'tool';
|
||||
content?: string | null;
|
||||
tool_calls?: Array<{
|
||||
id: string;
|
||||
type: 'function';
|
||||
function: { name: string; arguments: string };
|
||||
}>;
|
||||
tool_call_id?: string;
|
||||
}
|
||||
|
||||
function toWireMessage(m: ChatMessage): WireMessage {
|
||||
const out: WireMessage = { role: m.role };
|
||||
if (m.content !== undefined) out.content = m.content;
|
||||
if (m.toolCallId) out.tool_call_id = m.toolCallId;
|
||||
if (m.toolCalls && m.toolCalls.length > 0) {
|
||||
out.tool_calls = m.toolCalls.map((c) => ({
|
||||
id: c.id,
|
||||
type: 'function',
|
||||
function: {
|
||||
name: c.name,
|
||||
arguments: JSON.stringify(c.arguments),
|
||||
},
|
||||
}));
|
||||
}
|
||||
return out;
|
||||
}
|
||||
|
||||
interface ChatCompletionResponseShape {
|
||||
choices?: Array<{
|
||||
message?: {
|
||||
content?: string | null;
|
||||
tool_calls?: Array<{
|
||||
id: string;
|
||||
type?: string;
|
||||
function: { name: string; arguments?: string };
|
||||
}>;
|
||||
};
|
||||
finish_reason?: string | null;
|
||||
}>;
|
||||
}
|
||||
|
||||
function fromWireToolCall(raw: {
|
||||
id: string;
|
||||
function: { name: string; arguments?: string };
|
||||
}): ToolCallRequest {
|
||||
let args: Record<string, unknown> = {};
|
||||
if (raw.function.arguments) {
|
||||
try {
|
||||
const parsed = JSON.parse(raw.function.arguments);
|
||||
if (parsed && typeof parsed === 'object' && !Array.isArray(parsed)) {
|
||||
args = parsed as Record<string, unknown>;
|
||||
}
|
||||
} catch {
|
||||
// Malformed arguments — surface as empty and let the executor
|
||||
// reject on the missing-required-parameter path.
|
||||
}
|
||||
}
|
||||
return { id: raw.id, name: raw.function.name, arguments: args };
|
||||
}
|
||||
|
||||
function normaliseFinishReason(raw: string | null | undefined): LlmFinishReason {
|
||||
switch (raw) {
|
||||
case 'tool_calls':
|
||||
return 'tool_calls';
|
||||
case 'length':
|
||||
return 'length';
|
||||
case 'content_filter':
|
||||
return 'content_filter';
|
||||
case 'stop':
|
||||
default:
|
||||
return 'stop';
|
||||
}
|
||||
}
|
||||
|
|
@ -9,28 +9,22 @@ vi.mock('$lib/triggers/inline-suggest', () => ({
|
|||
|
||||
import { db } from '../../database';
|
||||
import { registerTools } from '../../tools/registry';
|
||||
import { setAiPolicy } from '../policy';
|
||||
import { createMission, getMission, pauseMission } from './store';
|
||||
import { runMission, runDueMissions } from './runner';
|
||||
import {
|
||||
registerInputResolver,
|
||||
unregisterInputResolver,
|
||||
resolveMissionInputs,
|
||||
} from './input-resolvers';
|
||||
import { createMission, getMission } from './store';
|
||||
import { runMission } from './runner';
|
||||
import { MISSIONS_TABLE } from './types';
|
||||
import type { AiPlanInput, AiPlanOutput } from './planner/types';
|
||||
import type { LlmClient, LlmCompletionRequest, LlmCompletionResponse } from '@mana/shared-ai';
|
||||
|
||||
let executed: { name: string; params: Record<string, unknown> }[] = [];
|
||||
|
||||
registerTools([
|
||||
{
|
||||
name: 'runner_test_stage',
|
||||
name: 'runner_test_action',
|
||||
module: 'runnerTest',
|
||||
description: 'proposes',
|
||||
description: 'test action',
|
||||
parameters: [{ name: 'val', type: 'string', required: true, description: 'v' }],
|
||||
async execute(params) {
|
||||
executed.push({ name: 'runner_test_stage', params: { ...params } });
|
||||
return { success: true, message: 'ok' };
|
||||
executed.push({ name: 'runner_test_action', params: { ...params } });
|
||||
return { success: true, message: `did ${params.val}` };
|
||||
},
|
||||
},
|
||||
]);
|
||||
|
|
@ -38,179 +32,97 @@ registerTools([
|
|||
beforeEach(async () => {
|
||||
executed = [];
|
||||
await db.table(MISSIONS_TABLE).clear();
|
||||
await db.table('pendingProposals').clear();
|
||||
});
|
||||
|
||||
/** Minimal LlmClient for runner tests — scripts one or more assistant
|
||||
* turns via enqueueToolCalls / enqueueStop. */
|
||||
function mockLlm(
|
||||
turns: Array<
|
||||
| { kind: 'tool_calls'; calls: Array<{ name: string; args: Record<string, unknown> }> }
|
||||
| { kind: 'stop'; content?: string }
|
||||
>
|
||||
): LlmClient {
|
||||
let i = 0;
|
||||
return {
|
||||
async complete(_req: LlmCompletionRequest): Promise<LlmCompletionResponse> {
|
||||
const turn = turns[i++];
|
||||
if (!turn) throw new Error('MockLlm exhausted');
|
||||
if (turn.kind === 'stop') {
|
||||
return { content: turn.content ?? null, toolCalls: [], finishReason: 'stop' };
|
||||
}
|
||||
return {
|
||||
content: null,
|
||||
toolCalls: turn.calls.map((c, n) => ({
|
||||
id: `call_${i}_${n}`,
|
||||
name: c.name,
|
||||
arguments: c.args,
|
||||
})),
|
||||
finishReason: 'tool_calls',
|
||||
};
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
describe('runMission', () => {
|
||||
it('runs the planner, stages proposals, and marks the iteration awaiting-review', async () => {
|
||||
const restore = setAiPolicy({
|
||||
tools: { runner_test_stage: 'propose' },
|
||||
defaultForAi: 'propose',
|
||||
});
|
||||
try {
|
||||
const m = await createMission({
|
||||
title: 'Test mission',
|
||||
conceptMarkdown: '',
|
||||
objective: 'test',
|
||||
cadence: { kind: 'manual' },
|
||||
});
|
||||
const planStub: AiPlanOutput = {
|
||||
summary: 'Staged a test step',
|
||||
steps: [
|
||||
{
|
||||
summary: 'Do a thing',
|
||||
toolName: 'runner_test_stage',
|
||||
params: { val: 'hello' },
|
||||
rationale: 'because test',
|
||||
},
|
||||
],
|
||||
};
|
||||
const result = await runMission(m.id, {
|
||||
plan: async (_input: AiPlanInput) => planStub,
|
||||
});
|
||||
|
||||
expect(result.plannedSteps).toBe(1);
|
||||
expect(result.stagedSteps).toBe(1);
|
||||
expect(result.iteration.overallStatus).toBe('awaiting-review');
|
||||
|
||||
const after = await getMission(m.id);
|
||||
expect(after?.iterations).toHaveLength(1);
|
||||
expect(after?.iterations[0].plan[0].proposalId).toBeTruthy();
|
||||
expect(after?.iterations[0].plan[0].status).toBe('staged');
|
||||
|
||||
// Tool did NOT execute — proposal was staged
|
||||
expect(executed).toHaveLength(0);
|
||||
} finally {
|
||||
restore();
|
||||
}
|
||||
});
|
||||
|
||||
it('passes the built AiPlanInput to the planner with mission + tool allowlist', async () => {
|
||||
const restore = setAiPolicy({
|
||||
tools: { runner_test_stage: 'propose' },
|
||||
defaultForAi: 'deny',
|
||||
});
|
||||
try {
|
||||
const m = await createMission({
|
||||
title: 'Test',
|
||||
conceptMarkdown: '',
|
||||
objective: 'test',
|
||||
cadence: { kind: 'manual' },
|
||||
});
|
||||
let captured: AiPlanInput | null = null;
|
||||
await runMission(m.id, {
|
||||
plan: async (input) => {
|
||||
captured = input;
|
||||
return { summary: '', steps: [] };
|
||||
},
|
||||
});
|
||||
expect(captured).toBeTruthy();
|
||||
expect(captured!.mission.id).toBe(m.id);
|
||||
const allowedNames = captured!.availableTools.map((t) => t.name);
|
||||
expect(allowedNames).toContain('runner_test_stage');
|
||||
} finally {
|
||||
restore();
|
||||
}
|
||||
});
|
||||
|
||||
it('marks an iteration failed when the planner throws', async () => {
|
||||
it('executes a tool_call directly and records it in the iteration', async () => {
|
||||
const m = await createMission({
|
||||
title: 'x',
|
||||
title: 'Test mission',
|
||||
conceptMarkdown: '',
|
||||
objective: 'x',
|
||||
objective: 'test',
|
||||
cadence: { kind: 'manual' },
|
||||
});
|
||||
const result = await runMission(m.id, {
|
||||
plan: async () => {
|
||||
throw new Error('planner down');
|
||||
},
|
||||
});
|
||||
expect(result.iteration.overallStatus).toBe('failed');
|
||||
const after = await getMission(m.id);
|
||||
expect(after?.iterations[0].overallStatus).toBe('failed');
|
||||
expect(after?.iterations[0].summary).toContain('planner down');
|
||||
});
|
||||
|
||||
it('produces an approved iteration when planner returns zero steps', async () => {
|
||||
const m = await createMission({
|
||||
title: 'x',
|
||||
conceptMarkdown: '',
|
||||
objective: 'x',
|
||||
cadence: { kind: 'manual' },
|
||||
});
|
||||
const result = await runMission(m.id, {
|
||||
plan: async () => ({ summary: 'nothing needed', steps: [] }),
|
||||
});
|
||||
const llm = mockLlm([
|
||||
{ kind: 'tool_calls', calls: [{ name: 'runner_test_action', args: { val: 'hello' } }] },
|
||||
{ kind: 'stop', content: 'done' },
|
||||
]);
|
||||
|
||||
const result = await runMission(m.id, { llm });
|
||||
|
||||
expect(result.plannedSteps).toBe(1);
|
||||
expect(result.failedSteps).toBe(0);
|
||||
expect(result.iteration.overallStatus).toBe('approved');
|
||||
expect(executed).toEqual([{ name: 'runner_test_action', params: { val: 'hello' } }]);
|
||||
|
||||
const after = await getMission(m.id);
|
||||
expect(after?.iterations).toHaveLength(1);
|
||||
expect(after?.iterations[0].plan).toHaveLength(1);
|
||||
expect(after?.iterations[0].plan[0].status).toBe('approved');
|
||||
});
|
||||
|
||||
it('refuses to run a paused mission', async () => {
|
||||
it('marks the iteration approved with zero steps when the LLM just stops', async () => {
|
||||
const m = await createMission({
|
||||
title: 'x',
|
||||
title: 'Empty',
|
||||
conceptMarkdown: '',
|
||||
objective: 'x',
|
||||
objective: 'nothing to do',
|
||||
cadence: { kind: 'manual' },
|
||||
});
|
||||
await pauseMission(m.id);
|
||||
await expect(
|
||||
runMission(m.id, { plan: async () => ({ summary: '', steps: [] }) })
|
||||
).rejects.toThrow(/paused/);
|
||||
});
|
||||
});
|
||||
|
||||
describe('runDueMissions', () => {
|
||||
it('runs only active missions whose nextRunAt has passed', async () => {
|
||||
const a = await createMission({
|
||||
title: 'due',
|
||||
const llm = mockLlm([{ kind: 'stop', content: 'nichts zu tun' }]);
|
||||
const result = await runMission(m.id, { llm });
|
||||
|
||||
expect(result.plannedSteps).toBe(0);
|
||||
expect(result.iteration.overallStatus).toBe('approved');
|
||||
expect(executed).toHaveLength(0);
|
||||
});
|
||||
|
||||
it('surfaces tool failures as failed PlanSteps without aborting the iteration', async () => {
|
||||
const m = await createMission({
|
||||
title: 'Mixed',
|
||||
conceptMarkdown: '',
|
||||
objective: 'x',
|
||||
cadence: { kind: 'interval', everyMinutes: 5 },
|
||||
objective: 'test',
|
||||
cadence: { kind: 'manual' },
|
||||
});
|
||||
const b = await createMission({
|
||||
title: 'future',
|
||||
conceptMarkdown: '',
|
||||
objective: 'x',
|
||||
cadence: { kind: 'interval', everyMinutes: 5 },
|
||||
});
|
||||
// Force `a` into the past, leave `b` in the future
|
||||
await db.table(MISSIONS_TABLE).update(a.id, { nextRunAt: '2020-01-01T00:00:00.000Z' });
|
||||
|
||||
const runs: string[] = [];
|
||||
await runDueMissions(new Date(), {
|
||||
plan: async (input) => {
|
||||
runs.push(input.mission.id);
|
||||
return { summary: '', steps: [] };
|
||||
},
|
||||
});
|
||||
expect(runs).toEqual([a.id]);
|
||||
expect(runs).not.toContain(b.id);
|
||||
});
|
||||
});
|
||||
|
||||
describe('resolveMissionInputs', () => {
|
||||
it('resolves via registered resolvers and skips missing modules', async () => {
|
||||
registerInputResolver('testmod', async (ref) => ({
|
||||
id: ref.id,
|
||||
module: 'testmod',
|
||||
table: ref.table,
|
||||
title: 'T',
|
||||
content: `content for ${ref.id}`,
|
||||
}));
|
||||
try {
|
||||
const refs = [
|
||||
{ module: 'testmod', table: 't', id: 'a' },
|
||||
{ module: 'nope', table: 't', id: 'b' },
|
||||
];
|
||||
const resolved = await resolveMissionInputs(refs);
|
||||
expect(resolved).toHaveLength(1);
|
||||
expect(resolved[0].content).toContain('a');
|
||||
} finally {
|
||||
unregisterInputResolver('testmod');
|
||||
}
|
||||
});
|
||||
|
||||
it('returns empty array when nothing is registered', async () => {
|
||||
const r = await resolveMissionInputs([{ module: 'unknown', table: 't', id: 'x' }]);
|
||||
expect(r).toEqual([]);
|
||||
// One call to an unknown tool (executor returns success:false) plus a stop.
|
||||
const llm = mockLlm([
|
||||
{ kind: 'tool_calls', calls: [{ name: 'does_not_exist', args: {} }] },
|
||||
{ kind: 'stop' },
|
||||
]);
|
||||
const result = await runMission(m.id, { llm });
|
||||
|
||||
expect(result.plannedSteps).toBe(1);
|
||||
expect(result.failedSteps).toBe(1);
|
||||
expect(result.iteration.overallStatus).toBe('failed');
|
||||
});
|
||||
});
|
||||
|
|
|
|||
|
|
@ -5,18 +5,21 @@
|
|||
* ↓
|
||||
* resolve inputs via registered resolvers
|
||||
* ↓
|
||||
* build available-tool list (policy-filtered)
|
||||
* pre-step web research (when the objective looks like a research task)
|
||||
* ↓
|
||||
* call planner (LLM) → AiPlanOutput
|
||||
* build system + user prompts (compact — no tool listing)
|
||||
* ↓
|
||||
* for each step: stage a Proposal under the AI actor
|
||||
* runPlannerLoop with native function calling
|
||||
* ↓
|
||||
* finishIteration(summary, overallStatus, plan-with-proposal-ids)
|
||||
* each tool_call executes directly via the policy-gated executor;
|
||||
* results feed back as tool-messages for the next turn
|
||||
* ↓
|
||||
* finishIteration(summary, overallStatus, executed-steps)
|
||||
*
|
||||
* Planner + proposal-staging are injected so the Runner is unit-testable
|
||||
* without a live LLM or Dexie hooks. Default implementations call the
|
||||
* shared LlmOrchestrator / `executeTool(...)` respectively; production
|
||||
* code passes those in via the setup module.
|
||||
* Post-migration note: there is no propose/approve gate. Tools run
|
||||
* directly under the AI actor. The user's review surface is the
|
||||
* Workbench Timeline + per-iteration revert. See
|
||||
* docs/plans/planner-function-calling.md for the design rationale.
|
||||
*/
|
||||
|
||||
import {
|
||||
|
|
@ -29,21 +32,23 @@ import {
|
|||
import { resolveMissionInputs } from './input-resolvers';
|
||||
import { getAvailableToolsForAi } from './available-tools';
|
||||
import { executeTool } from '../../tools/executor';
|
||||
import { db } from '../../database';
|
||||
import { decryptRecords } from '../../crypto';
|
||||
import { discoverByQuery, searchFeeds } from '$lib/modules/news-research/api';
|
||||
import { getAgentKontext } from '../agents/kontext';
|
||||
import { withAgentScope } from '../scope-context';
|
||||
import { isAiDebugEnabled, recordAiDebug, type AiDebugEntry, type PlannerCallDebug } from './debug';
|
||||
import { isAiDebugEnabled, recordAiDebug, type AiDebugEntry } from './debug';
|
||||
import { makeAgentActor, LEGACY_AI_PRINCIPAL, type Actor } from '../../events/actor';
|
||||
import { getAgent } from '../agents/store';
|
||||
import { DEFAULT_AGENT_NAME } from '../agents/types';
|
||||
import type { Mission, MissionIteration, PlanStep } from './types';
|
||||
import type { AiPlanInput, AiPlanOutput, PlannedStep, ResolvedInput } from './planner/types';
|
||||
import {
|
||||
buildSystemPrompt,
|
||||
runPlannerLoop,
|
||||
runPrePlanGuardrails,
|
||||
runPostPlanGuardrails,
|
||||
runPreExecuteGuardrails,
|
||||
type ChatMessage,
|
||||
type LlmClient,
|
||||
type ResolvedInput,
|
||||
type ToolCallRequest,
|
||||
type ToolResult,
|
||||
} from '@mana/shared-ai';
|
||||
|
||||
/** Heuristic: mission objective text that should trigger a pre-step
|
||||
|
|
@ -51,33 +56,14 @@ import {
|
|||
* don't burn credits accidentally. */
|
||||
const RESEARCH_TRIGGER = /\b(recherchier|research|news|finde|suche|aktuelle|neueste)/i;
|
||||
|
||||
/** Reasoning-loop budget. Each LOOP iteration = one planner call + its
|
||||
* auto-tool executions. The loop exits early when a propose-policy
|
||||
* step is staged (human must approve before progressing) or the
|
||||
* planner returns zero steps (it considers this subtask done).
|
||||
* 5 is generous for read-act-refine patterns ("list_notes → tag them")
|
||||
* without running the LLM bill dry on stuck missions. */
|
||||
/** Keep in sync with the planner system prompt in
|
||||
* packages/shared-ai/src/planner/prompt.ts which tells the LLM
|
||||
* "bis zu 5 Planungsrunden pro Iteration, 1–5 Schritte pro Runde". */
|
||||
const MAX_REASONING_LOOP_ITERATIONS = 5;
|
||||
/** Hard ceiling on planner rounds inside one iteration. One round = one
|
||||
* LLM call plus whatever tool executions its output triggered. Matches
|
||||
* the shared-ai default; re-declared here for clarity. */
|
||||
const MAX_PLANNER_ROUNDS = 5;
|
||||
|
||||
/** Min interval between Dexie phaseDetail writes during streaming.
|
||||
* 50 tokens/s × 500ms = ~25 tokens between writes — frequent enough
|
||||
* for the UI to feel live, infrequent enough to avoid Dexie thrashing. */
|
||||
const STREAMING_PHASE_THROTTLE_MS = 500;
|
||||
/** Singleton row id of the kontext doc — kept in sync with
|
||||
* `modules/kontext/types.ts` (KONTEXT_SINGLETON_ID). */
|
||||
const KONTEXT_SINGLETON_ID = 'singleton';
|
||||
|
||||
/** Hard timeout for one mission run. Cancels the in-flight planner call
|
||||
* and finalises the iteration as failed. 90 s is comfortable for a
|
||||
* cloud-tier model but short enough that a wedged backend doesn't sit
|
||||
* in `running` indefinitely. */
|
||||
/** 180s gives the reasoning loop (up to 5 LLM calls) enough headroom
|
||||
* even on slow models. Each call can take 10–30s on Ollama/GPU with
|
||||
* network latency; the old 90s limit regularly timed out during the
|
||||
* second loop round. */
|
||||
/** Hard timeout for one mission run. 180 s is comfortable for a cloud
|
||||
* model doing up to 5 reasoning rounds; anything longer means a wedged
|
||||
* backend and should fail the iteration rather than sit in `running`. */
|
||||
const ITERATION_TIMEOUT_MS = 180_000;
|
||||
|
||||
class CancelledError extends Error {
|
||||
|
|
@ -87,46 +73,24 @@ class CancelledError extends Error {
|
|||
}
|
||||
}
|
||||
|
||||
// ─── Public API ─────────────────────────────────────────────────────
|
||||
|
||||
export interface MissionRunnerDeps {
|
||||
/** Invoke the Planner LLM task with the fully-built input. */
|
||||
plan: (input: AiPlanInput) => Promise<AiPlanOutput>;
|
||||
/** Stage a single planned step as a Proposal. Returns the proposal id on success. */
|
||||
stageStep?: (step: PlannedStep, aiActor: Extract<Actor, { kind: 'ai' }>) => Promise<StageOutcome>;
|
||||
/** LLM transport. Typically the mana-llm client from llm-client.ts;
|
||||
* tests inject a MockLlmClient. */
|
||||
llm: LlmClient;
|
||||
/** Model id to pass to the LLM (provider/model). Defaults handled by
|
||||
* the client; exposed here so per-mission overrides can plug in. */
|
||||
model?: string;
|
||||
/** Per-tool executor. Tests inject a mock; production defaults to
|
||||
* the policy-gated `executeTool`. */
|
||||
executeTool?: (
|
||||
name: string,
|
||||
params: Record<string, unknown>,
|
||||
actor: Actor
|
||||
) => Promise<ToolResult>;
|
||||
}
|
||||
|
||||
export type StageOutcome =
|
||||
| {
|
||||
readonly ok: true;
|
||||
readonly proposalId: string;
|
||||
/** Full tool-result payload when the step auto-executed (proposalId
|
||||
* is empty). The reasoning loop reads this and feeds it back as
|
||||
* context for the next planner call so the agent can reason over
|
||||
* list/read outputs across steps. */
|
||||
readonly autoData?: unknown;
|
||||
readonly autoMessage?: string;
|
||||
}
|
||||
| { readonly ok: false; readonly error: string };
|
||||
|
||||
/** Default step-staging implementation: policy-gated executor under AI actor. */
|
||||
export const defaultStageStep: Required<MissionRunnerDeps>['stageStep'] = async (step, aiActor) => {
|
||||
const stepActor: Extract<Actor, { kind: 'ai' }> = {
|
||||
...aiActor,
|
||||
// Per-step rationale wins over the mission-wide one so the review UI
|
||||
// shows *this step's* reasoning.
|
||||
rationale: step.rationale || aiActor.rationale,
|
||||
};
|
||||
const result = await executeTool(step.toolName, step.params, stepActor);
|
||||
if (!result.success) {
|
||||
return { ok: false, error: result.message };
|
||||
}
|
||||
const data = result.data as { proposalId?: string } | undefined;
|
||||
if (data?.proposalId) return { ok: true, proposalId: data.proposalId };
|
||||
// Policy resolved to 'auto' — no proposal row was created, the tool
|
||||
// ran directly. Return the payload so the reasoning loop can feed it
|
||||
// back into the next planner call.
|
||||
return { ok: true, proposalId: '', autoData: result.data, autoMessage: result.message };
|
||||
};
|
||||
|
||||
export interface RunMissionResult {
|
||||
readonly iteration: MissionIteration;
|
||||
readonly plannedSteps: number;
|
||||
|
|
@ -138,12 +102,10 @@ export interface RunMissionResult {
|
|||
* scope context. Queued runs wait until the previous one finishes. */
|
||||
let runMutex: Promise<void> = Promise.resolve();
|
||||
|
||||
/** Run one iteration of the given mission. */
|
||||
export async function runMission(
|
||||
missionId: string,
|
||||
deps: MissionRunnerDeps
|
||||
): Promise<RunMissionResult> {
|
||||
// Serialize mission runs so withAgentScope doesn't interleave.
|
||||
let release: () => void;
|
||||
const prev = runMutex;
|
||||
runMutex = new Promise((r) => (release = r));
|
||||
|
|
@ -155,6 +117,27 @@ export async function runMission(
|
|||
}
|
||||
}
|
||||
|
||||
/** Scan all active missions whose `nextRunAt` has passed and run them
|
||||
* once each. Drives the foreground tick wired in `+layout.svelte`. */
|
||||
export async function runDueMissions(
|
||||
now: Date,
|
||||
deps: MissionRunnerDeps
|
||||
): Promise<RunMissionResult[]> {
|
||||
const { listMissions } = await import('./store');
|
||||
const due = await listMissions({ dueBefore: now.toISOString() });
|
||||
const results: RunMissionResult[] = [];
|
||||
for (const m of due) {
|
||||
try {
|
||||
results.push(await runMission(m.id, deps));
|
||||
} catch (err) {
|
||||
console.error(`[MissionRunner] mission ${m.id} run threw:`, err);
|
||||
}
|
||||
}
|
||||
return results;
|
||||
}
|
||||
|
||||
// ─── Implementation ─────────────────────────────────────────────────
|
||||
|
||||
async function runMissionInner(
|
||||
missionId: string,
|
||||
deps: MissionRunnerDeps
|
||||
|
|
@ -165,15 +148,9 @@ async function runMissionInner(
|
|||
throw new Error(`Mission ${missionId} is ${mission.state}, cannot run`);
|
||||
}
|
||||
|
||||
// Start the iteration with an empty plan so it's visible in the UI as "running".
|
||||
// Use the id the store generates so finishIteration updates the same row.
|
||||
const startedIteration = await startIteration(mission.id, { plan: [] });
|
||||
const iterationId = startedIteration.id;
|
||||
|
||||
// Resolve the owning agent. Missions that pre-date the Multi-Agent
|
||||
// rollout or whose agent was deleted fall back to the legacy
|
||||
// principal + default name — runner still attributes cleanly, UI
|
||||
// renders the work as "Mana".
|
||||
const owningAgent = mission.agentId ? await getAgent(mission.agentId) : null;
|
||||
const aiActor = makeAgentActor({
|
||||
agentId: owningAgent?.id ?? LEGACY_AI_PRINCIPAL,
|
||||
|
|
@ -183,8 +160,6 @@ async function runMissionInner(
|
|||
rationale: mission.objective,
|
||||
});
|
||||
|
||||
// Hard timeout: any phase taking longer than ITERATION_TIMEOUT_MS aborts
|
||||
// the run. Wraps the whole pipeline in a Promise.race against a timer.
|
||||
const timeoutPromise = new Promise<never>((_, reject) =>
|
||||
setTimeout(
|
||||
() => reject(new CancelledError(`timeout after ${ITERATION_TIMEOUT_MS / 1000}s`)),
|
||||
|
|
@ -198,9 +173,6 @@ async function runMissionInner(
|
|||
}
|
||||
}
|
||||
|
||||
// Track the phase that was last active — so a catch handler can
|
||||
// attribute the error ("calling-llm" vs "parsing-response" is
|
||||
// enough context for most debugging without a stack trace).
|
||||
let lastPhase: import('@mana/shared-ai').IterationPhase | undefined;
|
||||
async function enterPhase(
|
||||
phase: import('@mana/shared-ai').IterationPhase,
|
||||
|
|
@ -210,6 +182,8 @@ async function runMissionInner(
|
|||
await setIterationPhase(mission!.id, iterationId, phase, detail);
|
||||
}
|
||||
|
||||
const runToolCall = deps.executeTool ?? executeTool;
|
||||
|
||||
async function runPipeline(): Promise<{
|
||||
recordedSteps: PlanStep[];
|
||||
stagedCount: number;
|
||||
|
|
@ -226,16 +200,7 @@ async function runMissionInner(
|
|||
const resolvedInputs: ResolvedInput[] = [...baseInputs];
|
||||
const preStep: AiDebugEntry['preStep'] = { kontextInjected: false };
|
||||
|
||||
// User context and agent kontext are available as explicit mission
|
||||
// inputs via the input picker — no auto-inject. The user decides
|
||||
// what context the AI sees.
|
||||
|
||||
// Pre-step web research: if the objective looks like research,
|
||||
// run the deep-research pipeline (mana-search + mana-llm) and
|
||||
// attach the summary + sources so the planner can decide which
|
||||
// to save via save_news_article. Failures are non-fatal — we
|
||||
// inject a synthetic "research failed" input instead so the
|
||||
// planner doesn't hallucinate that the search ran.
|
||||
// Pre-step web research (unchanged from pre-migration).
|
||||
if (RESEARCH_TRIGGER.test(mission!.objective)) {
|
||||
await enterPhase('resolving-inputs', 'Web-Recherche…');
|
||||
try {
|
||||
|
|
@ -274,222 +239,77 @@ async function runMissionInner(
|
|||
const availableTools = getAvailableToolsForAi(aiActor);
|
||||
await checkCancel();
|
||||
|
||||
// ── Reasoning loop ─────────────────────────────────────
|
||||
// Each pass: call planner → stage steps. Auto-tools run inline
|
||||
// and their outputs become new ResolvedInputs so the NEXT planner
|
||||
// call can reason over them (e.g. list_notes → see titles →
|
||||
// stage add_tag_to_note per note). Loop exits when:
|
||||
// • planner returns 0 steps → agent is done
|
||||
// • any step requires user approval (propose) → user in the loop
|
||||
// • budget exhausted (MAX_REASONING_LOOP_ITERATIONS)
|
||||
// • a step fails hard (not tool-error; executor error)
|
||||
const stage = deps.stageStep ?? defaultStageStep;
|
||||
const loopInputs: ResolvedInput[] = [...resolvedInputs];
|
||||
const recordedSteps: PlanStep[] = [];
|
||||
const plannerCalls: PlannerCallDebug[] = [];
|
||||
const loopStepLog: NonNullable<AiDebugEntry['loopSteps']> = [];
|
||||
let stagedCount = 0;
|
||||
let failedCount = 0;
|
||||
let lastPlanSummary = '';
|
||||
let totalStepCount = 0;
|
||||
let loopIndex = 0;
|
||||
let stepCounter = 0;
|
||||
let humanInLoop = false;
|
||||
|
||||
while (loopIndex < MAX_REASONING_LOOP_ITERATIONS) {
|
||||
// ── Phase: calling-llm ─────────────────────────────
|
||||
await enterPhase(
|
||||
'calling-llm',
|
||||
loopIndex === 0
|
||||
? 'frage Planner an'
|
||||
: `Planner Runde ${loopIndex + 1}/${MAX_REASONING_LOOP_ITERATIONS}`
|
||||
);
|
||||
let plan: AiPlanOutput;
|
||||
|
||||
// Streaming: show live token progress while waiting for the
|
||||
// planner response. Throttled to avoid Dexie write floods.
|
||||
let streamTokenCount = 0;
|
||||
let lastStreamWrite = 0;
|
||||
const roundLabel = loopIndex === 0 ? '' : ` (Runde ${loopIndex + 1})`;
|
||||
const onToken = (_delta: string) => {
|
||||
streamTokenCount++;
|
||||
const now = Date.now();
|
||||
if (now - lastStreamWrite < STREAMING_PHASE_THROTTLE_MS) return;
|
||||
lastStreamWrite = now;
|
||||
void setIterationPhase(
|
||||
mission!.id,
|
||||
iterationId,
|
||||
'calling-llm',
|
||||
`empfange Plan${roundLabel}… ${streamTokenCount} tokens`
|
||||
);
|
||||
};
|
||||
|
||||
// ── Guardrail: pre-plan ────────────────────────
|
||||
const planInput: AiPlanInput = {
|
||||
mission: mission!,
|
||||
resolvedInputs: loopInputs,
|
||||
availableTools,
|
||||
onToken,
|
||||
};
|
||||
const prePlanCheck = runPrePlanGuardrails(planInput);
|
||||
if (!prePlanCheck.passed) {
|
||||
throw new Error(`Guardrail blocked: ${prePlanCheck.blockReason}`);
|
||||
}
|
||||
|
||||
try {
|
||||
plan = await deps.plan(planInput);
|
||||
} catch (err) {
|
||||
if (isAiDebugEnabled()) {
|
||||
void recordAiDebug({
|
||||
iterationId,
|
||||
missionId: mission!.id,
|
||||
missionTitle: mission!.title,
|
||||
missionObjective: mission!.objective,
|
||||
capturedAt: new Date().toISOString(),
|
||||
resolvedInputs: loopInputs,
|
||||
preStep,
|
||||
plannerCalls,
|
||||
loopSteps: loopStepLog,
|
||||
plannerError: err instanceof Error ? err.message : String(err),
|
||||
});
|
||||
}
|
||||
throw err;
|
||||
}
|
||||
await checkCancel();
|
||||
if (plan.debug) plannerCalls.push(plan.debug);
|
||||
lastPlanSummary = plan.summary;
|
||||
totalStepCount += plan.steps.length;
|
||||
|
||||
if (plan.steps.length === 0) {
|
||||
// Planner has nothing more to do — agent considers this done.
|
||||
break;
|
||||
}
|
||||
|
||||
// ── Guardrail: post-plan ──────────────────────────
|
||||
const postPlanCheck = runPostPlanGuardrails(planInput, plan);
|
||||
if (!postPlanCheck.passed) {
|
||||
throw new Error(`Guardrail blocked plan: ${postPlanCheck.blockReason}`);
|
||||
}
|
||||
|
||||
// ── Phase: parsing-response ────────────────────────
|
||||
await enterPhase('parsing-response', `${plan.steps.length} Step(s) erhalten`);
|
||||
await checkCancel();
|
||||
|
||||
// ── Phase: staging-proposals ───────────────────────
|
||||
const roundOutputs: Array<{ step: PlannedStep; message: string; data: unknown }> = [];
|
||||
for (const [i, ps] of plan.steps.entries()) {
|
||||
await enterPhase(
|
||||
'staging-proposals',
|
||||
`Runde ${loopIndex + 1} · Step ${i + 1}/${plan.steps.length}`
|
||||
);
|
||||
await checkCancel();
|
||||
|
||||
// ── Guardrail: pre-execute ─────────────────────
|
||||
const execCheck = runPreExecuteGuardrails(ps);
|
||||
if (!execCheck.passed) {
|
||||
failedCount++;
|
||||
const stepId = `${iterationId}-${stepCounter++}`;
|
||||
recordedSteps.push({
|
||||
id: stepId,
|
||||
summary: `Guardrail: ${execCheck.blockReason}`,
|
||||
intent: { kind: 'toolCall', toolName: ps.toolName, params: ps.params },
|
||||
status: 'failed',
|
||||
});
|
||||
continue;
|
||||
}
|
||||
|
||||
const stepId = `${iterationId}-${stepCounter++}`;
|
||||
let outcome: StageOutcome;
|
||||
try {
|
||||
outcome = await stage(ps, aiActor);
|
||||
} catch (err) {
|
||||
// Tool threw an unhandled exception (Dexie error, vault locked,
|
||||
// network timeout, etc.). Record the step as failed and continue
|
||||
// with the next step so one broken tool doesn't abort the entire
|
||||
// iteration. The error message surfaces in the iteration plan.
|
||||
const errMsg = err instanceof Error ? err.message : String(err);
|
||||
console.error(`[MissionRunner] step ${ps.toolName} threw:`, err);
|
||||
failedCount++;
|
||||
recordedSteps.push({
|
||||
id: stepId,
|
||||
summary: `${ps.summary} (FEHLER: ${errMsg.slice(0, 100)})`,
|
||||
intent: { kind: 'toolCall', toolName: ps.toolName, params: ps.params },
|
||||
status: 'failed',
|
||||
});
|
||||
continue;
|
||||
}
|
||||
if (!outcome.ok) {
|
||||
failedCount++;
|
||||
recordedSteps.push({
|
||||
id: stepId,
|
||||
summary: ps.summary,
|
||||
intent: { kind: 'toolCall', toolName: ps.toolName, params: ps.params },
|
||||
status: 'failed',
|
||||
});
|
||||
continue;
|
||||
}
|
||||
|
||||
stagedCount++;
|
||||
if (outcome.proposalId) {
|
||||
// Propose-policy: human must approve. Exit the loop after
|
||||
// this round so we don't stage proposals for hypothetical
|
||||
// follow-up steps that depend on the approval outcome.
|
||||
humanInLoop = true;
|
||||
recordedSteps.push({
|
||||
id: stepId,
|
||||
summary: ps.summary,
|
||||
intent: { kind: 'toolCall', toolName: ps.toolName, params: ps.params },
|
||||
proposalId: outcome.proposalId,
|
||||
status: 'staged',
|
||||
});
|
||||
} else {
|
||||
// Auto-policy: ran inline. Collect output for the next
|
||||
// planner call.
|
||||
recordedSteps.push({
|
||||
id: stepId,
|
||||
summary: ps.summary,
|
||||
intent: { kind: 'toolCall', toolName: ps.toolName, params: ps.params },
|
||||
status: 'approved',
|
||||
});
|
||||
roundOutputs.push({
|
||||
step: ps,
|
||||
message: outcome.autoMessage ?? '(ohne message)',
|
||||
data: outcome.autoData,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
// Log loop outputs for debug-panel visibility.
|
||||
for (const o of roundOutputs) {
|
||||
loopStepLog.push({
|
||||
loopIndex,
|
||||
toolName: o.step.toolName,
|
||||
params: o.step.params,
|
||||
outputPreview: formatToolOutputPreview(o.message, o.data),
|
||||
});
|
||||
}
|
||||
|
||||
if (humanInLoop) break;
|
||||
if (roundOutputs.length === 0) {
|
||||
// Every step either failed or was proposed — nothing new to
|
||||
// reason over. Prevents an infinite loop when the planner
|
||||
// only suggests proposable tools that keep failing.
|
||||
break;
|
||||
}
|
||||
|
||||
// Feed tool outputs into the next planner call as a synthetic
|
||||
// ResolvedInput so the agent can chain its reasoning.
|
||||
loopInputs.push({
|
||||
id: `loop-outputs-${loopIndex}`,
|
||||
module: 'reasoning-loop',
|
||||
table: 'tool-outputs',
|
||||
title: `Zwischenergebnisse (Runde ${loopIndex + 1})`,
|
||||
content: formatToolOutputsForPrompt(roundOutputs),
|
||||
});
|
||||
|
||||
loopIndex++;
|
||||
// Pre-plan guardrail (kept — catches prompt-injection in resolved inputs etc.).
|
||||
const prePlanCheck = runPrePlanGuardrails({
|
||||
mission: mission!,
|
||||
resolvedInputs,
|
||||
availableTools,
|
||||
});
|
||||
if (!prePlanCheck.passed) {
|
||||
throw new Error(`Guardrail blocked: ${prePlanCheck.blockReason}`);
|
||||
}
|
||||
|
||||
// ── Phase: calling-llm / reasoning loop ────────────────
|
||||
await enterPhase('calling-llm', 'Planner…');
|
||||
const { systemPrompt, userPrompt } = buildSystemPrompt({
|
||||
mission: mission!,
|
||||
resolvedInputs,
|
||||
agentSystemPrompt: owningAgent?.systemPrompt ?? null,
|
||||
agentMemory: owningAgent?.memory ?? null,
|
||||
});
|
||||
|
||||
const loopResult = await runPlannerLoop({
|
||||
llm: deps.llm,
|
||||
input: {
|
||||
systemPrompt,
|
||||
userPrompt,
|
||||
tools: availableTools,
|
||||
model: deps.model ?? 'google/gemini-2.5-flash',
|
||||
maxRounds: MAX_PLANNER_ROUNDS,
|
||||
},
|
||||
onToolCall: async (call: ToolCallRequest): Promise<ToolResult> => {
|
||||
await checkCancel();
|
||||
await enterPhase('staging-proposals', call.name);
|
||||
|
||||
// Pre-execute guardrail per call. Failures come back as
|
||||
// tool-messages so the LLM can choose a different path.
|
||||
const execCheck = runPreExecuteGuardrails({
|
||||
summary: call.name,
|
||||
toolName: call.name,
|
||||
params: call.arguments,
|
||||
rationale: mission!.objective,
|
||||
});
|
||||
if (!execCheck.passed) {
|
||||
return {
|
||||
success: false,
|
||||
message: `Guardrail: ${execCheck.blockReason}`,
|
||||
};
|
||||
}
|
||||
|
||||
try {
|
||||
return await runToolCall(call.name, call.arguments, aiActor);
|
||||
} catch (err) {
|
||||
const msg = err instanceof Error ? err.message : String(err);
|
||||
console.error(`[MissionRunner] tool ${call.name} threw:`, err);
|
||||
return { success: false, message: `Tool execution failed: ${msg}` };
|
||||
}
|
||||
},
|
||||
});
|
||||
|
||||
await checkCancel();
|
||||
|
||||
// Build the persisted plan from the loop's executed calls.
|
||||
const recordedSteps: PlanStep[] = loopResult.executedCalls.map((ec, i) => ({
|
||||
id: `${iterationId}-${i}`,
|
||||
summary: renderStepSummary(ec.call, ec.result),
|
||||
intent: {
|
||||
kind: 'toolCall',
|
||||
toolName: ec.call.name,
|
||||
params: ec.call.arguments,
|
||||
},
|
||||
status: ec.result.success ? 'approved' : 'failed',
|
||||
}));
|
||||
|
||||
if (isAiDebugEnabled()) {
|
||||
void recordAiDebug({
|
||||
iterationId,
|
||||
|
|
@ -497,25 +317,33 @@ async function runMissionInner(
|
|||
missionTitle: mission!.title,
|
||||
missionObjective: mission!.objective,
|
||||
capturedAt: new Date().toISOString(),
|
||||
resolvedInputs: loopInputs,
|
||||
resolvedInputs,
|
||||
preStep,
|
||||
plannerCalls,
|
||||
loopSteps: loopStepLog,
|
||||
rounds: loopResult.rounds,
|
||||
stopReason: loopResult.stopReason,
|
||||
messages: loopResult.messages as ChatMessage[],
|
||||
});
|
||||
}
|
||||
|
||||
await enterPhase('finalizing');
|
||||
|
||||
const failedCount = recordedSteps.filter((s) => s.status === 'failed').length;
|
||||
const planSummary =
|
||||
loopResult.summary ??
|
||||
(recordedSteps.length === 0
|
||||
? 'Keine Tool-Aufrufe — Mission hat nichts zu tun'
|
||||
: `${recordedSteps.length} Tool-Aufrufe ausgeführt (${failedCount} Fehler).`);
|
||||
|
||||
return {
|
||||
recordedSteps,
|
||||
stagedCount,
|
||||
stagedCount: recordedSteps.length,
|
||||
failedCount,
|
||||
planSummary: lastPlanSummary,
|
||||
planStepCount: totalStepCount,
|
||||
planSummary,
|
||||
planStepCount: recordedSteps.length,
|
||||
};
|
||||
}
|
||||
|
||||
let recordedSteps: PlanStep[] = [];
|
||||
let stagedCount = 0;
|
||||
let failedCount = 0;
|
||||
let planSummary = '';
|
||||
let planStepCount = 0;
|
||||
|
|
@ -525,7 +353,6 @@ async function runMissionInner(
|
|||
timeoutPromise,
|
||||
]);
|
||||
recordedSteps = result.recordedSteps;
|
||||
stagedCount = result.stagedCount;
|
||||
failedCount = result.failedCount;
|
||||
planSummary = result.planSummary;
|
||||
planStepCount = result.planStepCount;
|
||||
|
|
@ -545,14 +372,10 @@ async function runMissionInner(
|
|||
return emptyResult(mission, iterationId, 'failed', msg);
|
||||
}
|
||||
|
||||
// Status: everything executed → 'approved'. Some failures but not all → still 'approved'
|
||||
// (the user can revert). Only wholesale failure or zero progress is 'failed'.
|
||||
const overallStatus: MissionIteration['overallStatus'] =
|
||||
planStepCount === 0
|
||||
? 'approved' // nothing to do is a valid outcome
|
||||
: failedCount === planStepCount
|
||||
? 'failed'
|
||||
: stagedCount > 0
|
||||
? 'awaiting-review'
|
||||
: 'approved';
|
||||
planStepCount === 0 ? 'approved' : failedCount === planStepCount ? 'failed' : 'approved';
|
||||
|
||||
await finishIteration(mission.id, iterationId, {
|
||||
summary: planSummary,
|
||||
|
|
@ -569,11 +392,20 @@ async function runMissionInner(
|
|||
overallStatus,
|
||||
},
|
||||
plannedSteps: planStepCount,
|
||||
stagedSteps: stagedCount,
|
||||
stagedSteps: planStepCount,
|
||||
failedSteps: failedCount,
|
||||
};
|
||||
}
|
||||
|
||||
// ─── Helpers ────────────────────────────────────────────────────────
|
||||
|
||||
function renderStepSummary(call: ToolCallRequest, result: ToolResult): string {
|
||||
if (!result.success) {
|
||||
return `${call.name} (FEHLER: ${result.message.slice(0, 120)})`;
|
||||
}
|
||||
return result.message || call.name;
|
||||
}
|
||||
|
||||
function emptyResult(
|
||||
_mission: Mission,
|
||||
iterationId: string,
|
||||
|
|
@ -594,92 +426,6 @@ function emptyResult(
|
|||
};
|
||||
}
|
||||
|
||||
/** Read the kontext singleton + decrypt; returns null if empty/missing. */
|
||||
async function loadKontextAsResolvedInput(): Promise<ResolvedInput | null> {
|
||||
try {
|
||||
const local = await db
|
||||
.table<{ id: string; content?: string; deletedAt?: string }>('kontextDoc')
|
||||
.get(KONTEXT_SINGLETON_ID);
|
||||
if (!local || local.deletedAt) return null;
|
||||
const [decrypted] = await decryptRecords('kontextDoc', [local]);
|
||||
const content = decrypted?.content?.trim();
|
||||
if (!content) return null;
|
||||
return {
|
||||
id: KONTEXT_SINGLETON_ID,
|
||||
module: 'kontext',
|
||||
table: 'kontextDoc',
|
||||
title: 'Kontext (Standing)',
|
||||
content,
|
||||
};
|
||||
} catch (err) {
|
||||
console.warn('[MissionRunner] kontext auto-inject failed:', err);
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
/** Load the agent-specific kontext doc. Falls back to null (caller
|
||||
* may then fall back to the global singleton if desired). */
|
||||
/** Load the agent-specific kontext doc. Returns null when the agent
|
||||
* has no dedicated doc (does NOT fall back to the global singleton —
|
||||
* kontext injection is explicit via the input picker, not auto). */
|
||||
async function loadAgentKontextAsResolvedInput(agentId: string): Promise<ResolvedInput | null> {
|
||||
try {
|
||||
const doc = await getAgentKontext(agentId);
|
||||
if (!doc) return null;
|
||||
return {
|
||||
id: doc.id,
|
||||
module: 'kontext',
|
||||
table: 'agentKontextDocs',
|
||||
title: 'Agent-Kontext',
|
||||
content: doc.content,
|
||||
};
|
||||
} catch (err) {
|
||||
console.warn('[MissionRunner] agent kontext load failed:', err);
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
/** Run the deep-research pipeline against the mission objective and
|
||||
* collapse its summary + sources into one ResolvedInput formatted so
|
||||
* the planner can copy URLs into save_news_article calls. */
|
||||
/** Stringify a tool-output payload for the reasoning loop's next
|
||||
* prompt. Keeps the blob compact — LLM context windows are finite and
|
||||
* a raw JSON.stringify of a 200-row Dexie dump wastes tokens. */
|
||||
function formatToolOutputsForPrompt(
|
||||
outputs: Array<{ step: PlannedStep; message: string; data: unknown }>
|
||||
): string {
|
||||
const lines: string[] = [
|
||||
'Ausgaben der zuletzt ausgeführten Auto-Tools. Nutze diese Daten um die Mission weiterzuführen — z.B. für jede gelistete Notiz einen add_tag_to_note Aufruf pro Notiz.',
|
||||
'',
|
||||
];
|
||||
for (const o of outputs) {
|
||||
lines.push(`### ${o.step.toolName}(${JSON.stringify(o.step.params)})`);
|
||||
lines.push(o.message);
|
||||
if (o.data !== undefined && o.data !== null) {
|
||||
const json = safeStringify(o.data, 4000);
|
||||
lines.push('```json', json, '```');
|
||||
}
|
||||
lines.push('');
|
||||
}
|
||||
return lines.join('\n');
|
||||
}
|
||||
|
||||
/** Short form for the debug-panel loopSteps log. */
|
||||
function formatToolOutputPreview(message: string, data: unknown): string {
|
||||
if (data === undefined || data === null) return message;
|
||||
const json = safeStringify(data, 400);
|
||||
return `${message}\n${json}`;
|
||||
}
|
||||
|
||||
function safeStringify(value: unknown, limit: number): string {
|
||||
try {
|
||||
const s = JSON.stringify(value, null, 2);
|
||||
return s.length > limit ? s.slice(0, limit) + '\n… (truncated)' : s;
|
||||
} catch {
|
||||
return String(value);
|
||||
}
|
||||
}
|
||||
|
||||
interface WebResearchOutcome {
|
||||
input: ResolvedInput;
|
||||
sourceCount: number;
|
||||
|
|
@ -689,9 +435,6 @@ interface WebResearchOutcome {
|
|||
async function runWebResearch(mission: Mission): Promise<WebResearchOutcome | null> {
|
||||
// RSS-based news research via news-research module: discoverByQuery
|
||||
// finds matching feeds, searchFeeds ranks recent articles by relevance.
|
||||
// Robust (own infra, no external SearXNG dependency), free (no credits),
|
||||
// and the documented happy-path for the AI companion's news flow.
|
||||
// Detect language hint from objective: German chars/words → de, else en.
|
||||
const objective = mission.objective;
|
||||
const isGerman = /[äöüß]|recherchier|aktuelle|neueste|finde|suche/i.test(objective);
|
||||
const language = isGerman ? 'de' : 'en';
|
||||
|
|
@ -699,9 +442,6 @@ async function runWebResearch(mission: Mission): Promise<WebResearchOutcome | nu
|
|||
const discovered = await discoverByQuery(objective, language);
|
||||
const feedUrls = discovered.feeds.slice(0, 10).map((f) => f.url);
|
||||
if (feedUrls.length === 0) {
|
||||
// No feeds discovered — surface as failure so the planner doesn't
|
||||
// pretend it has data. Caller wraps this in a "research failed"
|
||||
// ResolvedInput.
|
||||
throw new Error(
|
||||
`news-research: keine RSS-Feeds für "${objective}" gefunden (${discovered.searched ?? 0} Quellen abgesucht).`
|
||||
);
|
||||
|
|
@ -732,8 +472,6 @@ async function runWebResearch(mission: Mission): Promise<WebResearchOutcome | nu
|
|||
|
||||
return {
|
||||
input: {
|
||||
// Stable ID so re-running the same mission replaces the prior
|
||||
// research input instead of appending duplicates.
|
||||
id: `news-research-${mission.id}`,
|
||||
module: 'news-research',
|
||||
table: 'rssArticles',
|
||||
|
|
@ -744,25 +482,3 @@ async function runWebResearch(mission: Mission): Promise<WebResearchOutcome | nu
|
|||
summary: `${articles.length} Artikel aus ${feedUrls.length} Feeds.`,
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Scan all active missions whose `nextRunAt` has passed and run them once
|
||||
* each. Used by the foreground tick that wires this into `+layout.svelte`.
|
||||
* Safe to call concurrently — each mission run is independent.
|
||||
*/
|
||||
export async function runDueMissions(
|
||||
now: Date,
|
||||
deps: MissionRunnerDeps
|
||||
): Promise<RunMissionResult[]> {
|
||||
const { listMissions } = await import('./store');
|
||||
const due = await listMissions({ dueBefore: now.toISOString() });
|
||||
const results: RunMissionResult[] = [];
|
||||
for (const m of due) {
|
||||
try {
|
||||
results.push(await runMission(m.id, deps));
|
||||
} catch (err) {
|
||||
console.error(`[MissionRunner] mission ${m.id} run threw:`, err);
|
||||
}
|
||||
}
|
||||
return results;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,8 +1,8 @@
|
|||
/**
|
||||
* Production wiring for the Mission Runner.
|
||||
*
|
||||
* Connects the dependency-injected `runMission` to the real LlmOrchestrator
|
||||
* (via `aiPlanTask`) and drives `runDueMissions` on a foreground interval.
|
||||
* Connects the dependency-injected runner to the real mana-llm client
|
||||
* and drives `runDueMissions` on a foreground interval.
|
||||
*
|
||||
* Use pattern:
|
||||
*
|
||||
|
|
@ -10,14 +10,12 @@
|
|||
* import { startMissionTick } from '$lib/data/ai/missions/setup';
|
||||
* onMount(() => startMissionTick());
|
||||
*
|
||||
* The tick is intentionally foreground-only: the Runner requires the
|
||||
* LlmOrchestrator which needs WebGPU / network. A background service for
|
||||
* offline-of-tab execution is tracked as Phase 7 — see
|
||||
* COMPANION_BRAIN_ARCHITECTURE.md §20.5.
|
||||
* The tick is intentionally foreground-only for now — a background
|
||||
* service worker for offline-of-tab execution is tracked as Phase 7;
|
||||
* see COMPANION_BRAIN_ARCHITECTURE.md §20.5.
|
||||
*/
|
||||
|
||||
import { llmOrchestrator } from '@mana/shared-llm';
|
||||
import { aiPlanTask } from '$lib/llm-tasks/ai-plan';
|
||||
import { createManaLlmClient } from './llm-client';
|
||||
import { runDueMissions, type MissionRunnerDeps } from './runner';
|
||||
import { registerDefaultInputResolvers } from './default-resolvers';
|
||||
import { runAgentsBootstrap } from '../agents/bootstrap';
|
||||
|
|
@ -29,20 +27,13 @@ import { runAgentsBootstrap } from '../agents/bootstrap';
|
|||
import '$lib/modules/meditate/seed';
|
||||
import '$lib/modules/habits/seed';
|
||||
import '$lib/companion/goals/seed';
|
||||
import type { AiPlanInput, AiPlanOutput } from './planner/types';
|
||||
|
||||
/** Default interval between tick scans. One minute is fine for foreground use. */
|
||||
const DEFAULT_TICK_INTERVAL_MS = 60_000;
|
||||
|
||||
/** Swap-in planner that routes through the real LLM orchestrator. */
|
||||
const productionPlan = async (input: AiPlanInput): Promise<AiPlanOutput> => {
|
||||
const result = await llmOrchestrator.run(aiPlanTask, input);
|
||||
return result.value;
|
||||
};
|
||||
|
||||
export const productionDeps: MissionRunnerDeps = {
|
||||
plan: productionPlan,
|
||||
// stageStep defaults to the policy-gated executor — nothing to override here.
|
||||
llm: createManaLlmClient(),
|
||||
// model + executeTool defaults handled inside the runner.
|
||||
};
|
||||
|
||||
let tickHandle: ReturnType<typeof setInterval> | null = null;
|
||||
|
|
|
|||
|
|
@ -1,22 +1,21 @@
|
|||
/**
|
||||
* Tool Executor — validates parameters, resolves AI policy, and runs or
|
||||
* stages the tool by name.
|
||||
* Tool Executor — validates parameters, resolves AI policy, runs the tool.
|
||||
*
|
||||
* Call paths:
|
||||
* - User action from the UI: `executeTool(name, params)` with no actor
|
||||
* → ambient `USER_ACTOR`, policy returns `auto`, tool runs directly.
|
||||
* - AI in the companion orchestrator: `executeTool(name, params, aiActor)`
|
||||
* → policy resolves per-tool; `propose` writes a Proposal and returns
|
||||
* a success result carrying the proposal id, `auto` executes, `deny`
|
||||
* refuses.
|
||||
* - Approval path: proposal store calls `executeToolRaw(name, params)`
|
||||
* under `runAsAsync(aiActor, ...)` — same validation, but no policy.
|
||||
* Policy semantics post-migration to native function-calling:
|
||||
* - `auto` — execute directly under the actor's scope
|
||||
* - `deny` — refuse with a ToolResult error (the runner turns this into
|
||||
* a tool-message the LLM can react to)
|
||||
*
|
||||
* There is no proposal/approval gate in this pipeline anymore; the
|
||||
* Workbench Timeline plus per-iteration Revert is the user's review
|
||||
* surface. Tools flagged as `propose` in the catalog are treated as
|
||||
* `auto` here — the distinction only matters as legacy metadata that
|
||||
* higher layers (UI, analytics) may still read.
|
||||
*/
|
||||
|
||||
import { getTool } from './registry';
|
||||
import { runAsAsync, USER_ACTOR } from '../events/actor';
|
||||
import { resolvePolicy } from '../ai/policy';
|
||||
import { createProposal } from '../ai/proposals/store';
|
||||
import { getAgent } from '../ai/agents/store';
|
||||
import type { Actor } from '../events/actor';
|
||||
import type { AiPolicy } from '@mana/shared-ai';
|
||||
|
|
@ -37,11 +36,9 @@ export async function executeTool(
|
|||
|
||||
const effectiveActor: Actor = actor ?? USER_ACTOR;
|
||||
|
||||
// Multi-Agent Workbench (Phase 4): policy lives on the agent. When
|
||||
// the actor is AI, look up the owning agent and use its policy. If
|
||||
// the agent record is missing (legacy write, deleted agent, race),
|
||||
// resolvePolicy falls back to the user-level DEFAULT_AI_POLICY via
|
||||
// its optional-argument default.
|
||||
// Agent-scoped policy: the AI actor may have a per-agent policy
|
||||
// override. If the agent record is missing (deleted / legacy /
|
||||
// race), resolvePolicy falls back to the user-level default.
|
||||
let agentPolicy: AiPolicy | undefined;
|
||||
if (effectiveActor.kind === 'ai') {
|
||||
const agent = await getAgent(effectiveActor.principalId);
|
||||
|
|
@ -56,25 +53,7 @@ export async function executeTool(
|
|||
};
|
||||
}
|
||||
|
||||
if (decision === 'propose') {
|
||||
// Only ai actors can hit `propose` — resolvePolicy short-circuits
|
||||
// user/system to `auto`. Narrow defensively in case policy is swapped.
|
||||
if (effectiveActor.kind !== 'ai') {
|
||||
return { success: false, message: `propose policy requires an AI actor` };
|
||||
}
|
||||
const proposal = await createProposal({
|
||||
actor: effectiveActor,
|
||||
intent: { kind: 'toolCall', toolName: name, params },
|
||||
rationale: effectiveActor.rationale,
|
||||
});
|
||||
return {
|
||||
success: true,
|
||||
data: { proposalId: proposal.id, status: 'pending' },
|
||||
message: `Vorgeschlagen: "${name}" wartet auf Freigabe.`,
|
||||
};
|
||||
}
|
||||
|
||||
// decision === 'auto'
|
||||
// `auto` or `propose` both execute here — see file-level comment.
|
||||
return runAsAsync(effectiveActor, () => runValidatedTool(tool, params));
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue