diff --git a/apps/mana/apps/web/src/lib/components/ai/AiDebugBlock.svelte b/apps/mana/apps/web/src/lib/components/ai/AiDebugBlock.svelte index 30e0c0695..a8cbd17fb 100644 --- a/apps/mana/apps/web/src/lib/components/ai/AiDebugBlock.svelte +++ b/apps/mana/apps/web/src/lib/components/ai/AiDebugBlock.svelte @@ -44,13 +44,14 @@ {:else if d.preStep.webResearch && !d.preStep.webResearch.ok} · Web ❌ {/if} - {#if d.plannerCalls && d.plannerCalls.length > 0} - · {d.plannerCalls.length}× LLM · {Math.round( - d.plannerCalls.reduce((a, c) => a + c.latencyMs, 0) - )}ms + {#if d.rounds} + · {d.rounds} Runde{d.rounds === 1 ? '' : 'n'} {/if} - {#if d.loopSteps && d.loopSteps.length > 0} - · {d.loopSteps.length}× Auto-Tool + {#if d.messages} + · {d.messages.length} Messages + {/if} + {#if d.stopReason && d.stopReason !== 'assistant-stop'} + · {d.stopReason} {/if} {#if d.plannerError}· Planner ❌{/if} @@ -93,41 +94,34 @@ {/if} - {#if d.loopSteps && d.loopSteps.length > 0} + {#if d.messages && d.messages.length > 0}
-
Auto-Tool-Ausgaben (Reasoning-Loop)
- {#each d.loopSteps as ls, i (i)} -
+
Chat-Verlauf ({d.messages.length} Messages · {d.rounds ?? '?'} Runden)
+ {#each d.messages as m, i (i)} +
- Runde {ls.loopIndex + 1} - {ls.toolName}({JSON.stringify(ls.params)}) + {m.role} + {#if m.toolCalls && m.toolCalls.length > 0} + tool_calls: {m.toolCalls.map((c) => c.name).join(', ')} + {:else if m.toolCallId} + tool_result (id: {m.toolCallId}) + {:else} + {typeof m.content === 'string' ? m.content.slice(0, 100) : ''} + {/if} -
{ls.outputPreview}
+ {#if m.content} +
{m.content}
+ {/if} + {#if m.toolCalls && m.toolCalls.length > 0} + {#each m.toolCalls as call (call.id)} +
{call.name}({JSON.stringify(call.arguments, null, 2)})
+ {/each} + {/if}
{/each}
{/if} - {#if d.plannerCalls && d.plannerCalls.length > 0} - {#each d.plannerCalls as call, i (i)} -
-
LLM-Call {i + 1}/{d.plannerCalls.length} · {Math.round(call.latencyMs)}ms
-
- System Prompt -
{call.systemPrompt}
-
-
- User Prompt -
{call.userPrompt}
-
-
- Raw LLM Response -
{call.rawResponse}
-
-
- {/each} - {/if} - {#if d.plannerError}
Planner Error
diff --git a/apps/mana/apps/web/src/lib/data/ai/missions/available-tools.ts b/apps/mana/apps/web/src/lib/data/ai/missions/available-tools.ts index 7fd23aa12..3464a1929 100644 --- a/apps/mana/apps/web/src/lib/data/ai/missions/available-tools.ts +++ b/apps/mana/apps/web/src/lib/data/ai/missions/available-tools.ts @@ -1,30 +1,41 @@ /** * Build the tool list the Planner is allowed to consider. * - * Only tools the policy rates `auto` or `propose` are exposed — `deny` is - * invisible to the AI. This is defence-in-depth: even if the Planner - * hallucinates a denied tool name, the parser rejects it because the name - * isn't in the allow-set, AND the executor would refuse at runtime. + * Only tools the policy rates `auto` or `propose` are exposed — `deny` + * is invisible to the AI. Defence-in-depth: even if the LLM somehow + * names a denied tool, the executor refuses at runtime. + * + * Returns the shared ToolSchema shape directly so the runner can pass + * the list straight into runPlannerLoop (which calls + * toolsToFunctionSchemas internally). */ import { getTools } from '../../tools/registry'; import { resolvePolicy } from '../policy'; import type { Actor } from '../../events/actor'; -import type { AvailableTool } from './planner/types'; +import type { ToolSchema } from '@mana/shared-ai'; +import { AI_TOOL_CATALOG_BY_NAME } from '@mana/shared-ai'; -export function getAvailableToolsForAi(aiActor: Extract): AvailableTool[] { +export function getAvailableToolsForAi(aiActor: Extract): ToolSchema[] { return getTools() .filter((tool) => resolvePolicy(tool.name, aiActor) !== 'deny') - .map((tool) => ({ - name: tool.name, - module: tool.module, - description: tool.description, - parameters: tool.parameters.map((p) => ({ - name: p.name, - type: p.type, - required: p.required, - description: p.description, - enum: p.enum, - })), - })); + .map((tool) => { + // Prefer the catalog entry when available — it carries the + // defaultPolicy we need on ToolSchema. Tools without a catalog + // entry (playground / test-only) fall back to 'auto'. + const catalogEntry = AI_TOOL_CATALOG_BY_NAME.get(tool.name); + return { + name: tool.name, + module: tool.module, + description: tool.description, + defaultPolicy: catalogEntry?.defaultPolicy ?? 'auto', + parameters: tool.parameters.map((p) => ({ + name: p.name, + type: p.type, + required: p.required, + description: p.description, + enum: p.enum, + })), + }; + }); } diff --git a/apps/mana/apps/web/src/lib/data/ai/missions/debug.ts b/apps/mana/apps/web/src/lib/data/ai/missions/debug.ts index bc02a1071..3f84759e5 100644 --- a/apps/mana/apps/web/src/lib/data/ai/missions/debug.ts +++ b/apps/mana/apps/web/src/lib/data/ai/missions/debug.ts @@ -14,7 +14,7 @@ import { useLiveQueryWithDefault } from '@mana/local-store/svelte'; import { db } from '../../database'; -import type { ResolvedInput } from './planner/types'; +import type { ChatMessage, LoopStopReason, ResolvedInput } from '@mana/shared-ai'; const TABLE = '_aiDebugLog'; const STORAGE_KEY = 'mana.ai.debug'; @@ -26,19 +26,6 @@ const MAX_AGE_MS = 7 * 24 * 60 * 60 * 1000; // 7 days * is truncated to reduce the privacy surface if the device is stolen. */ const INPUT_CONTENT_LIMIT = 500; -/** - * Captured by `aiPlanTask` and passed back via the planner output so the - * runner can record it without the planner needing to know about Dexie. - */ -export interface PlannerCallDebug { - readonly systemPrompt: string; - readonly userPrompt: string; - readonly rawResponse: string; - readonly latencyMs: number; - readonly backendId?: string; - readonly model?: string; -} - export interface AiDebugEntry { /** Primary key — one row per iteration. */ iterationId: string; @@ -51,22 +38,14 @@ export interface AiDebugEntry { webResearch?: { ok: true; sourceCount: number; summary: string } | { ok: false; error: string }; kontextInjected: boolean; }; - /** - * Array because the reasoning loop can call the planner multiple - * times per iteration (once per loop step, until a proposal is - * staged or no more work is returned). Older single-call entries - * written before the loop shipped still parse — readers that - * haven't updated simply take `plannerCalls[0]`. - */ - plannerCalls?: PlannerCallDebug[]; - /** Auto-executed tool outputs captured across loop steps — surfaces - * what the agent "saw" when reasoning across multiple calls. */ - loopSteps?: Array<{ - loopIndex: number; - toolName: string; - params: Record; - outputPreview: string; - }>; + /** Full chat history of the planner loop: system + user + every + * assistant turn (with tool_calls) + every tool-message result. + * Replaces the pre-migration plannerCalls[]/loopSteps structure. */ + messages?: ChatMessage[]; + /** Number of planner rounds consumed inside this iteration. */ + rounds?: number; + /** Why the loop terminated (assistant-stop, max-rounds, …). */ + stopReason?: LoopStopReason; plannerError?: string; } diff --git a/apps/mana/apps/web/src/lib/data/ai/missions/llm-client.ts b/apps/mana/apps/web/src/lib/data/ai/missions/llm-client.ts new file mode 100644 index 000000000..04f24afd4 --- /dev/null +++ b/apps/mana/apps/web/src/lib/data/ai/missions/llm-client.ts @@ -0,0 +1,189 @@ +/** + * LlmClient implementation that speaks to the mana-llm service using its + * OpenAI-compatible /v1/chat/completions endpoint, with native tool_calls + * passthrough. Used by the webapp Mission Runner and Companion Chat to + * drive the shared runPlannerLoop from @mana/shared-ai. + * + * The shared-ai LlmClient contract is intentionally small — we don't go + * through the LlmOrchestrator's tier routing here. Tool calling needs + * a specific server-proxied path (mana-llm forwards to Google / OpenAI + * / Ollama with tools enabled), not the legacy text-JSON orchestrator. + * Tier integration can come later once shared-llm grows tool-call + * awareness. + */ + +import { + type ChatMessage, + type LlmClient, + type LlmCompletionRequest, + type LlmCompletionResponse, + type LlmFinishReason, + type ToolCallRequest, +} from '@mana/shared-ai'; + +const DEFAULT_LLM_URL = 'http://localhost:3025'; + +/** Resolve the mana-llm base URL from window-injected env; falls back + * to localhost. Mirrors the helper in @mana/shared-llm's remote.ts. */ +function resolveLlmBaseUrl(): string { + if (typeof window !== 'undefined') { + const fromWindow = (window as unknown as { __PUBLIC_MANA_LLM_URL__?: string }) + .__PUBLIC_MANA_LLM_URL__; + if (fromWindow) return fromWindow.replace(/\/$/, ''); + } + return DEFAULT_LLM_URL; +} + +export interface ManaLlmClientOptions { + /** Default model id used when callers don't override per request. + * Format matches mana-llm's provider/model syntax. */ + readonly defaultModel?: string; + /** Override the base URL — mostly for tests. Production resolves from + * window.__PUBLIC_MANA_LLM_URL__. */ + readonly baseUrl?: string; + /** Hard stop for the fetch. The runner wraps runPlannerLoop in its + * own iteration-level timeout (180 s) so this is mostly a belt + + * braces for pathological provider stalls. */ + readonly fetchTimeoutMs?: number; +} + +const DEFAULT_MODEL = 'google/gemini-2.5-flash'; +const DEFAULT_FETCH_TIMEOUT_MS = 120_000; + +export function createManaLlmClient(opts: ManaLlmClientOptions = {}): LlmClient { + const baseUrl = (opts.baseUrl ?? resolveLlmBaseUrl()).replace(/\/$/, ''); + const defaultModel = opts.defaultModel ?? DEFAULT_MODEL; + const fetchTimeoutMs = opts.fetchTimeoutMs ?? DEFAULT_FETCH_TIMEOUT_MS; + + return { + async complete(req: LlmCompletionRequest): Promise { + const url = `${baseUrl}/v1/chat/completions`; + const controller = new AbortController(); + const timeout = setTimeout(() => controller.abort(), fetchTimeoutMs); + + const body = { + model: req.model || defaultModel, + messages: req.messages.map(toWireMessage), + tools: req.tools, // already in OpenAI {type, function} shape + tool_choice: 'auto' as const, + temperature: req.temperature ?? 0.3, + stream: false, + }; + + let res: Response; + try { + res = await fetch(url, { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify(body), + signal: controller.signal, + }); + } catch (err) { + clearTimeout(timeout); + const msg = err instanceof Error ? err.message : String(err); + throw new Error(`mana-llm unreachable at ${baseUrl}: ${msg}`); + } + clearTimeout(timeout); + + if (!res.ok) { + let detail: unknown; + try { + detail = await res.json(); + } catch { + detail = await res.text().catch(() => ''); + } + throw new Error( + `mana-llm ${res.status}: ${typeof detail === 'string' ? detail : JSON.stringify(detail)}` + ); + } + + const data = (await res.json()) as ChatCompletionResponseShape; + const choice = data.choices?.[0]; + if (!choice) { + throw new Error('mana-llm response had no choices'); + } + const content = choice.message?.content ?? null; + const toolCalls = (choice.message?.tool_calls ?? []).map(fromWireToolCall); + const finishReason = normaliseFinishReason(choice.finish_reason); + + return { content, toolCalls, finishReason }; + }, + }; +} + +// ── Wire-format helpers ───────────────────────────────────────────── + +interface WireMessage { + role: 'system' | 'user' | 'assistant' | 'tool'; + content?: string | null; + tool_calls?: Array<{ + id: string; + type: 'function'; + function: { name: string; arguments: string }; + }>; + tool_call_id?: string; +} + +function toWireMessage(m: ChatMessage): WireMessage { + const out: WireMessage = { role: m.role }; + if (m.content !== undefined) out.content = m.content; + if (m.toolCallId) out.tool_call_id = m.toolCallId; + if (m.toolCalls && m.toolCalls.length > 0) { + out.tool_calls = m.toolCalls.map((c) => ({ + id: c.id, + type: 'function', + function: { + name: c.name, + arguments: JSON.stringify(c.arguments), + }, + })); + } + return out; +} + +interface ChatCompletionResponseShape { + choices?: Array<{ + message?: { + content?: string | null; + tool_calls?: Array<{ + id: string; + type?: string; + function: { name: string; arguments?: string }; + }>; + }; + finish_reason?: string | null; + }>; +} + +function fromWireToolCall(raw: { + id: string; + function: { name: string; arguments?: string }; +}): ToolCallRequest { + let args: Record = {}; + if (raw.function.arguments) { + try { + const parsed = JSON.parse(raw.function.arguments); + if (parsed && typeof parsed === 'object' && !Array.isArray(parsed)) { + args = parsed as Record; + } + } catch { + // Malformed arguments — surface as empty and let the executor + // reject on the missing-required-parameter path. + } + } + return { id: raw.id, name: raw.function.name, arguments: args }; +} + +function normaliseFinishReason(raw: string | null | undefined): LlmFinishReason { + switch (raw) { + case 'tool_calls': + return 'tool_calls'; + case 'length': + return 'length'; + case 'content_filter': + return 'content_filter'; + case 'stop': + default: + return 'stop'; + } +} diff --git a/apps/mana/apps/web/src/lib/data/ai/missions/runner.test.ts b/apps/mana/apps/web/src/lib/data/ai/missions/runner.test.ts index 2fbdfe5ae..67fdd62f0 100644 --- a/apps/mana/apps/web/src/lib/data/ai/missions/runner.test.ts +++ b/apps/mana/apps/web/src/lib/data/ai/missions/runner.test.ts @@ -9,28 +9,22 @@ vi.mock('$lib/triggers/inline-suggest', () => ({ import { db } from '../../database'; import { registerTools } from '../../tools/registry'; -import { setAiPolicy } from '../policy'; -import { createMission, getMission, pauseMission } from './store'; -import { runMission, runDueMissions } from './runner'; -import { - registerInputResolver, - unregisterInputResolver, - resolveMissionInputs, -} from './input-resolvers'; +import { createMission, getMission } from './store'; +import { runMission } from './runner'; import { MISSIONS_TABLE } from './types'; -import type { AiPlanInput, AiPlanOutput } from './planner/types'; +import type { LlmClient, LlmCompletionRequest, LlmCompletionResponse } from '@mana/shared-ai'; let executed: { name: string; params: Record }[] = []; registerTools([ { - name: 'runner_test_stage', + name: 'runner_test_action', module: 'runnerTest', - description: 'proposes', + description: 'test action', parameters: [{ name: 'val', type: 'string', required: true, description: 'v' }], async execute(params) { - executed.push({ name: 'runner_test_stage', params: { ...params } }); - return { success: true, message: 'ok' }; + executed.push({ name: 'runner_test_action', params: { ...params } }); + return { success: true, message: `did ${params.val}` }; }, }, ]); @@ -38,179 +32,97 @@ registerTools([ beforeEach(async () => { executed = []; await db.table(MISSIONS_TABLE).clear(); - await db.table('pendingProposals').clear(); }); +/** Minimal LlmClient for runner tests — scripts one or more assistant + * turns via enqueueToolCalls / enqueueStop. */ +function mockLlm( + turns: Array< + | { kind: 'tool_calls'; calls: Array<{ name: string; args: Record }> } + | { kind: 'stop'; content?: string } + > +): LlmClient { + let i = 0; + return { + async complete(_req: LlmCompletionRequest): Promise { + const turn = turns[i++]; + if (!turn) throw new Error('MockLlm exhausted'); + if (turn.kind === 'stop') { + return { content: turn.content ?? null, toolCalls: [], finishReason: 'stop' }; + } + return { + content: null, + toolCalls: turn.calls.map((c, n) => ({ + id: `call_${i}_${n}`, + name: c.name, + arguments: c.args, + })), + finishReason: 'tool_calls', + }; + }, + }; +} + describe('runMission', () => { - it('runs the planner, stages proposals, and marks the iteration awaiting-review', async () => { - const restore = setAiPolicy({ - tools: { runner_test_stage: 'propose' }, - defaultForAi: 'propose', - }); - try { - const m = await createMission({ - title: 'Test mission', - conceptMarkdown: '', - objective: 'test', - cadence: { kind: 'manual' }, - }); - const planStub: AiPlanOutput = { - summary: 'Staged a test step', - steps: [ - { - summary: 'Do a thing', - toolName: 'runner_test_stage', - params: { val: 'hello' }, - rationale: 'because test', - }, - ], - }; - const result = await runMission(m.id, { - plan: async (_input: AiPlanInput) => planStub, - }); - - expect(result.plannedSteps).toBe(1); - expect(result.stagedSteps).toBe(1); - expect(result.iteration.overallStatus).toBe('awaiting-review'); - - const after = await getMission(m.id); - expect(after?.iterations).toHaveLength(1); - expect(after?.iterations[0].plan[0].proposalId).toBeTruthy(); - expect(after?.iterations[0].plan[0].status).toBe('staged'); - - // Tool did NOT execute — proposal was staged - expect(executed).toHaveLength(0); - } finally { - restore(); - } - }); - - it('passes the built AiPlanInput to the planner with mission + tool allowlist', async () => { - const restore = setAiPolicy({ - tools: { runner_test_stage: 'propose' }, - defaultForAi: 'deny', - }); - try { - const m = await createMission({ - title: 'Test', - conceptMarkdown: '', - objective: 'test', - cadence: { kind: 'manual' }, - }); - let captured: AiPlanInput | null = null; - await runMission(m.id, { - plan: async (input) => { - captured = input; - return { summary: '', steps: [] }; - }, - }); - expect(captured).toBeTruthy(); - expect(captured!.mission.id).toBe(m.id); - const allowedNames = captured!.availableTools.map((t) => t.name); - expect(allowedNames).toContain('runner_test_stage'); - } finally { - restore(); - } - }); - - it('marks an iteration failed when the planner throws', async () => { + it('executes a tool_call directly and records it in the iteration', async () => { const m = await createMission({ - title: 'x', + title: 'Test mission', conceptMarkdown: '', - objective: 'x', + objective: 'test', cadence: { kind: 'manual' }, }); - const result = await runMission(m.id, { - plan: async () => { - throw new Error('planner down'); - }, - }); - expect(result.iteration.overallStatus).toBe('failed'); - const after = await getMission(m.id); - expect(after?.iterations[0].overallStatus).toBe('failed'); - expect(after?.iterations[0].summary).toContain('planner down'); - }); - it('produces an approved iteration when planner returns zero steps', async () => { - const m = await createMission({ - title: 'x', - conceptMarkdown: '', - objective: 'x', - cadence: { kind: 'manual' }, - }); - const result = await runMission(m.id, { - plan: async () => ({ summary: 'nothing needed', steps: [] }), - }); + const llm = mockLlm([ + { kind: 'tool_calls', calls: [{ name: 'runner_test_action', args: { val: 'hello' } }] }, + { kind: 'stop', content: 'done' }, + ]); + + const result = await runMission(m.id, { llm }); + + expect(result.plannedSteps).toBe(1); + expect(result.failedSteps).toBe(0); expect(result.iteration.overallStatus).toBe('approved'); + expect(executed).toEqual([{ name: 'runner_test_action', params: { val: 'hello' } }]); + + const after = await getMission(m.id); + expect(after?.iterations).toHaveLength(1); + expect(after?.iterations[0].plan).toHaveLength(1); + expect(after?.iterations[0].plan[0].status).toBe('approved'); }); - it('refuses to run a paused mission', async () => { + it('marks the iteration approved with zero steps when the LLM just stops', async () => { const m = await createMission({ - title: 'x', + title: 'Empty', conceptMarkdown: '', - objective: 'x', + objective: 'nothing to do', cadence: { kind: 'manual' }, }); - await pauseMission(m.id); - await expect( - runMission(m.id, { plan: async () => ({ summary: '', steps: [] }) }) - ).rejects.toThrow(/paused/); - }); -}); -describe('runDueMissions', () => { - it('runs only active missions whose nextRunAt has passed', async () => { - const a = await createMission({ - title: 'due', + const llm = mockLlm([{ kind: 'stop', content: 'nichts zu tun' }]); + const result = await runMission(m.id, { llm }); + + expect(result.plannedSteps).toBe(0); + expect(result.iteration.overallStatus).toBe('approved'); + expect(executed).toHaveLength(0); + }); + + it('surfaces tool failures as failed PlanSteps without aborting the iteration', async () => { + const m = await createMission({ + title: 'Mixed', conceptMarkdown: '', - objective: 'x', - cadence: { kind: 'interval', everyMinutes: 5 }, + objective: 'test', + cadence: { kind: 'manual' }, }); - const b = await createMission({ - title: 'future', - conceptMarkdown: '', - objective: 'x', - cadence: { kind: 'interval', everyMinutes: 5 }, - }); - // Force `a` into the past, leave `b` in the future - await db.table(MISSIONS_TABLE).update(a.id, { nextRunAt: '2020-01-01T00:00:00.000Z' }); - const runs: string[] = []; - await runDueMissions(new Date(), { - plan: async (input) => { - runs.push(input.mission.id); - return { summary: '', steps: [] }; - }, - }); - expect(runs).toEqual([a.id]); - expect(runs).not.toContain(b.id); - }); -}); - -describe('resolveMissionInputs', () => { - it('resolves via registered resolvers and skips missing modules', async () => { - registerInputResolver('testmod', async (ref) => ({ - id: ref.id, - module: 'testmod', - table: ref.table, - title: 'T', - content: `content for ${ref.id}`, - })); - try { - const refs = [ - { module: 'testmod', table: 't', id: 'a' }, - { module: 'nope', table: 't', id: 'b' }, - ]; - const resolved = await resolveMissionInputs(refs); - expect(resolved).toHaveLength(1); - expect(resolved[0].content).toContain('a'); - } finally { - unregisterInputResolver('testmod'); - } - }); - - it('returns empty array when nothing is registered', async () => { - const r = await resolveMissionInputs([{ module: 'unknown', table: 't', id: 'x' }]); - expect(r).toEqual([]); + // One call to an unknown tool (executor returns success:false) plus a stop. + const llm = mockLlm([ + { kind: 'tool_calls', calls: [{ name: 'does_not_exist', args: {} }] }, + { kind: 'stop' }, + ]); + const result = await runMission(m.id, { llm }); + + expect(result.plannedSteps).toBe(1); + expect(result.failedSteps).toBe(1); + expect(result.iteration.overallStatus).toBe('failed'); }); }); diff --git a/apps/mana/apps/web/src/lib/data/ai/missions/runner.ts b/apps/mana/apps/web/src/lib/data/ai/missions/runner.ts index d9a007fca..c47c3dd80 100644 --- a/apps/mana/apps/web/src/lib/data/ai/missions/runner.ts +++ b/apps/mana/apps/web/src/lib/data/ai/missions/runner.ts @@ -5,18 +5,21 @@ * ↓ * resolve inputs via registered resolvers * ↓ - * build available-tool list (policy-filtered) + * pre-step web research (when the objective looks like a research task) * ↓ - * call planner (LLM) → AiPlanOutput + * build system + user prompts (compact — no tool listing) * ↓ - * for each step: stage a Proposal under the AI actor + * runPlannerLoop with native function calling * ↓ - * finishIteration(summary, overallStatus, plan-with-proposal-ids) + * each tool_call executes directly via the policy-gated executor; + * results feed back as tool-messages for the next turn + * ↓ + * finishIteration(summary, overallStatus, executed-steps) * - * Planner + proposal-staging are injected so the Runner is unit-testable - * without a live LLM or Dexie hooks. Default implementations call the - * shared LlmOrchestrator / `executeTool(...)` respectively; production - * code passes those in via the setup module. + * Post-migration note: there is no propose/approve gate. Tools run + * directly under the AI actor. The user's review surface is the + * Workbench Timeline + per-iteration revert. See + * docs/plans/planner-function-calling.md for the design rationale. */ import { @@ -29,21 +32,23 @@ import { import { resolveMissionInputs } from './input-resolvers'; import { getAvailableToolsForAi } from './available-tools'; import { executeTool } from '../../tools/executor'; -import { db } from '../../database'; -import { decryptRecords } from '../../crypto'; import { discoverByQuery, searchFeeds } from '$lib/modules/news-research/api'; -import { getAgentKontext } from '../agents/kontext'; import { withAgentScope } from '../scope-context'; -import { isAiDebugEnabled, recordAiDebug, type AiDebugEntry, type PlannerCallDebug } from './debug'; +import { isAiDebugEnabled, recordAiDebug, type AiDebugEntry } from './debug'; import { makeAgentActor, LEGACY_AI_PRINCIPAL, type Actor } from '../../events/actor'; import { getAgent } from '../agents/store'; import { DEFAULT_AGENT_NAME } from '../agents/types'; import type { Mission, MissionIteration, PlanStep } from './types'; -import type { AiPlanInput, AiPlanOutput, PlannedStep, ResolvedInput } from './planner/types'; import { + buildSystemPrompt, + runPlannerLoop, runPrePlanGuardrails, - runPostPlanGuardrails, runPreExecuteGuardrails, + type ChatMessage, + type LlmClient, + type ResolvedInput, + type ToolCallRequest, + type ToolResult, } from '@mana/shared-ai'; /** Heuristic: mission objective text that should trigger a pre-step @@ -51,33 +56,14 @@ import { * don't burn credits accidentally. */ const RESEARCH_TRIGGER = /\b(recherchier|research|news|finde|suche|aktuelle|neueste)/i; -/** Reasoning-loop budget. Each LOOP iteration = one planner call + its - * auto-tool executions. The loop exits early when a propose-policy - * step is staged (human must approve before progressing) or the - * planner returns zero steps (it considers this subtask done). - * 5 is generous for read-act-refine patterns ("list_notes → tag them") - * without running the LLM bill dry on stuck missions. */ -/** Keep in sync with the planner system prompt in - * packages/shared-ai/src/planner/prompt.ts which tells the LLM - * "bis zu 5 Planungsrunden pro Iteration, 1–5 Schritte pro Runde". */ -const MAX_REASONING_LOOP_ITERATIONS = 5; +/** Hard ceiling on planner rounds inside one iteration. One round = one + * LLM call plus whatever tool executions its output triggered. Matches + * the shared-ai default; re-declared here for clarity. */ +const MAX_PLANNER_ROUNDS = 5; -/** Min interval between Dexie phaseDetail writes during streaming. - * 50 tokens/s × 500ms = ~25 tokens between writes — frequent enough - * for the UI to feel live, infrequent enough to avoid Dexie thrashing. */ -const STREAMING_PHASE_THROTTLE_MS = 500; -/** Singleton row id of the kontext doc — kept in sync with - * `modules/kontext/types.ts` (KONTEXT_SINGLETON_ID). */ -const KONTEXT_SINGLETON_ID = 'singleton'; - -/** Hard timeout for one mission run. Cancels the in-flight planner call - * and finalises the iteration as failed. 90 s is comfortable for a - * cloud-tier model but short enough that a wedged backend doesn't sit - * in `running` indefinitely. */ -/** 180s gives the reasoning loop (up to 5 LLM calls) enough headroom - * even on slow models. Each call can take 10–30s on Ollama/GPU with - * network latency; the old 90s limit regularly timed out during the - * second loop round. */ +/** Hard timeout for one mission run. 180 s is comfortable for a cloud + * model doing up to 5 reasoning rounds; anything longer means a wedged + * backend and should fail the iteration rather than sit in `running`. */ const ITERATION_TIMEOUT_MS = 180_000; class CancelledError extends Error { @@ -87,46 +73,24 @@ class CancelledError extends Error { } } +// ─── Public API ───────────────────────────────────────────────────── + export interface MissionRunnerDeps { - /** Invoke the Planner LLM task with the fully-built input. */ - plan: (input: AiPlanInput) => Promise; - /** Stage a single planned step as a Proposal. Returns the proposal id on success. */ - stageStep?: (step: PlannedStep, aiActor: Extract) => Promise; + /** LLM transport. Typically the mana-llm client from llm-client.ts; + * tests inject a MockLlmClient. */ + llm: LlmClient; + /** Model id to pass to the LLM (provider/model). Defaults handled by + * the client; exposed here so per-mission overrides can plug in. */ + model?: string; + /** Per-tool executor. Tests inject a mock; production defaults to + * the policy-gated `executeTool`. */ + executeTool?: ( + name: string, + params: Record, + actor: Actor + ) => Promise; } -export type StageOutcome = - | { - readonly ok: true; - readonly proposalId: string; - /** Full tool-result payload when the step auto-executed (proposalId - * is empty). The reasoning loop reads this and feeds it back as - * context for the next planner call so the agent can reason over - * list/read outputs across steps. */ - readonly autoData?: unknown; - readonly autoMessage?: string; - } - | { readonly ok: false; readonly error: string }; - -/** Default step-staging implementation: policy-gated executor under AI actor. */ -export const defaultStageStep: Required['stageStep'] = async (step, aiActor) => { - const stepActor: Extract = { - ...aiActor, - // Per-step rationale wins over the mission-wide one so the review UI - // shows *this step's* reasoning. - rationale: step.rationale || aiActor.rationale, - }; - const result = await executeTool(step.toolName, step.params, stepActor); - if (!result.success) { - return { ok: false, error: result.message }; - } - const data = result.data as { proposalId?: string } | undefined; - if (data?.proposalId) return { ok: true, proposalId: data.proposalId }; - // Policy resolved to 'auto' — no proposal row was created, the tool - // ran directly. Return the payload so the reasoning loop can feed it - // back into the next planner call. - return { ok: true, proposalId: '', autoData: result.data, autoMessage: result.message }; -}; - export interface RunMissionResult { readonly iteration: MissionIteration; readonly plannedSteps: number; @@ -138,12 +102,10 @@ export interface RunMissionResult { * scope context. Queued runs wait until the previous one finishes. */ let runMutex: Promise = Promise.resolve(); -/** Run one iteration of the given mission. */ export async function runMission( missionId: string, deps: MissionRunnerDeps ): Promise { - // Serialize mission runs so withAgentScope doesn't interleave. let release: () => void; const prev = runMutex; runMutex = new Promise((r) => (release = r)); @@ -155,6 +117,27 @@ export async function runMission( } } +/** Scan all active missions whose `nextRunAt` has passed and run them + * once each. Drives the foreground tick wired in `+layout.svelte`. */ +export async function runDueMissions( + now: Date, + deps: MissionRunnerDeps +): Promise { + const { listMissions } = await import('./store'); + const due = await listMissions({ dueBefore: now.toISOString() }); + const results: RunMissionResult[] = []; + for (const m of due) { + try { + results.push(await runMission(m.id, deps)); + } catch (err) { + console.error(`[MissionRunner] mission ${m.id} run threw:`, err); + } + } + return results; +} + +// ─── Implementation ───────────────────────────────────────────────── + async function runMissionInner( missionId: string, deps: MissionRunnerDeps @@ -165,15 +148,9 @@ async function runMissionInner( throw new Error(`Mission ${missionId} is ${mission.state}, cannot run`); } - // Start the iteration with an empty plan so it's visible in the UI as "running". - // Use the id the store generates so finishIteration updates the same row. const startedIteration = await startIteration(mission.id, { plan: [] }); const iterationId = startedIteration.id; - // Resolve the owning agent. Missions that pre-date the Multi-Agent - // rollout or whose agent was deleted fall back to the legacy - // principal + default name — runner still attributes cleanly, UI - // renders the work as "Mana". const owningAgent = mission.agentId ? await getAgent(mission.agentId) : null; const aiActor = makeAgentActor({ agentId: owningAgent?.id ?? LEGACY_AI_PRINCIPAL, @@ -183,8 +160,6 @@ async function runMissionInner( rationale: mission.objective, }); - // Hard timeout: any phase taking longer than ITERATION_TIMEOUT_MS aborts - // the run. Wraps the whole pipeline in a Promise.race against a timer. const timeoutPromise = new Promise((_, reject) => setTimeout( () => reject(new CancelledError(`timeout after ${ITERATION_TIMEOUT_MS / 1000}s`)), @@ -198,9 +173,6 @@ async function runMissionInner( } } - // Track the phase that was last active — so a catch handler can - // attribute the error ("calling-llm" vs "parsing-response" is - // enough context for most debugging without a stack trace). let lastPhase: import('@mana/shared-ai').IterationPhase | undefined; async function enterPhase( phase: import('@mana/shared-ai').IterationPhase, @@ -210,6 +182,8 @@ async function runMissionInner( await setIterationPhase(mission!.id, iterationId, phase, detail); } + const runToolCall = deps.executeTool ?? executeTool; + async function runPipeline(): Promise<{ recordedSteps: PlanStep[]; stagedCount: number; @@ -226,16 +200,7 @@ async function runMissionInner( const resolvedInputs: ResolvedInput[] = [...baseInputs]; const preStep: AiDebugEntry['preStep'] = { kontextInjected: false }; - // User context and agent kontext are available as explicit mission - // inputs via the input picker — no auto-inject. The user decides - // what context the AI sees. - - // Pre-step web research: if the objective looks like research, - // run the deep-research pipeline (mana-search + mana-llm) and - // attach the summary + sources so the planner can decide which - // to save via save_news_article. Failures are non-fatal — we - // inject a synthetic "research failed" input instead so the - // planner doesn't hallucinate that the search ran. + // Pre-step web research (unchanged from pre-migration). if (RESEARCH_TRIGGER.test(mission!.objective)) { await enterPhase('resolving-inputs', 'Web-Recherche…'); try { @@ -274,222 +239,77 @@ async function runMissionInner( const availableTools = getAvailableToolsForAi(aiActor); await checkCancel(); - // ── Reasoning loop ───────────────────────────────────── - // Each pass: call planner → stage steps. Auto-tools run inline - // and their outputs become new ResolvedInputs so the NEXT planner - // call can reason over them (e.g. list_notes → see titles → - // stage add_tag_to_note per note). Loop exits when: - // • planner returns 0 steps → agent is done - // • any step requires user approval (propose) → user in the loop - // • budget exhausted (MAX_REASONING_LOOP_ITERATIONS) - // • a step fails hard (not tool-error; executor error) - const stage = deps.stageStep ?? defaultStageStep; - const loopInputs: ResolvedInput[] = [...resolvedInputs]; - const recordedSteps: PlanStep[] = []; - const plannerCalls: PlannerCallDebug[] = []; - const loopStepLog: NonNullable = []; - let stagedCount = 0; - let failedCount = 0; - let lastPlanSummary = ''; - let totalStepCount = 0; - let loopIndex = 0; - let stepCounter = 0; - let humanInLoop = false; - - while (loopIndex < MAX_REASONING_LOOP_ITERATIONS) { - // ── Phase: calling-llm ───────────────────────────── - await enterPhase( - 'calling-llm', - loopIndex === 0 - ? 'frage Planner an' - : `Planner Runde ${loopIndex + 1}/${MAX_REASONING_LOOP_ITERATIONS}` - ); - let plan: AiPlanOutput; - - // Streaming: show live token progress while waiting for the - // planner response. Throttled to avoid Dexie write floods. - let streamTokenCount = 0; - let lastStreamWrite = 0; - const roundLabel = loopIndex === 0 ? '' : ` (Runde ${loopIndex + 1})`; - const onToken = (_delta: string) => { - streamTokenCount++; - const now = Date.now(); - if (now - lastStreamWrite < STREAMING_PHASE_THROTTLE_MS) return; - lastStreamWrite = now; - void setIterationPhase( - mission!.id, - iterationId, - 'calling-llm', - `empfange Plan${roundLabel}… ${streamTokenCount} tokens` - ); - }; - - // ── Guardrail: pre-plan ──────────────────────── - const planInput: AiPlanInput = { - mission: mission!, - resolvedInputs: loopInputs, - availableTools, - onToken, - }; - const prePlanCheck = runPrePlanGuardrails(planInput); - if (!prePlanCheck.passed) { - throw new Error(`Guardrail blocked: ${prePlanCheck.blockReason}`); - } - - try { - plan = await deps.plan(planInput); - } catch (err) { - if (isAiDebugEnabled()) { - void recordAiDebug({ - iterationId, - missionId: mission!.id, - missionTitle: mission!.title, - missionObjective: mission!.objective, - capturedAt: new Date().toISOString(), - resolvedInputs: loopInputs, - preStep, - plannerCalls, - loopSteps: loopStepLog, - plannerError: err instanceof Error ? err.message : String(err), - }); - } - throw err; - } - await checkCancel(); - if (plan.debug) plannerCalls.push(plan.debug); - lastPlanSummary = plan.summary; - totalStepCount += plan.steps.length; - - if (plan.steps.length === 0) { - // Planner has nothing more to do — agent considers this done. - break; - } - - // ── Guardrail: post-plan ────────────────────────── - const postPlanCheck = runPostPlanGuardrails(planInput, plan); - if (!postPlanCheck.passed) { - throw new Error(`Guardrail blocked plan: ${postPlanCheck.blockReason}`); - } - - // ── Phase: parsing-response ──────────────────────── - await enterPhase('parsing-response', `${plan.steps.length} Step(s) erhalten`); - await checkCancel(); - - // ── Phase: staging-proposals ─────────────────────── - const roundOutputs: Array<{ step: PlannedStep; message: string; data: unknown }> = []; - for (const [i, ps] of plan.steps.entries()) { - await enterPhase( - 'staging-proposals', - `Runde ${loopIndex + 1} · Step ${i + 1}/${plan.steps.length}` - ); - await checkCancel(); - - // ── Guardrail: pre-execute ───────────────────── - const execCheck = runPreExecuteGuardrails(ps); - if (!execCheck.passed) { - failedCount++; - const stepId = `${iterationId}-${stepCounter++}`; - recordedSteps.push({ - id: stepId, - summary: `Guardrail: ${execCheck.blockReason}`, - intent: { kind: 'toolCall', toolName: ps.toolName, params: ps.params }, - status: 'failed', - }); - continue; - } - - const stepId = `${iterationId}-${stepCounter++}`; - let outcome: StageOutcome; - try { - outcome = await stage(ps, aiActor); - } catch (err) { - // Tool threw an unhandled exception (Dexie error, vault locked, - // network timeout, etc.). Record the step as failed and continue - // with the next step so one broken tool doesn't abort the entire - // iteration. The error message surfaces in the iteration plan. - const errMsg = err instanceof Error ? err.message : String(err); - console.error(`[MissionRunner] step ${ps.toolName} threw:`, err); - failedCount++; - recordedSteps.push({ - id: stepId, - summary: `${ps.summary} (FEHLER: ${errMsg.slice(0, 100)})`, - intent: { kind: 'toolCall', toolName: ps.toolName, params: ps.params }, - status: 'failed', - }); - continue; - } - if (!outcome.ok) { - failedCount++; - recordedSteps.push({ - id: stepId, - summary: ps.summary, - intent: { kind: 'toolCall', toolName: ps.toolName, params: ps.params }, - status: 'failed', - }); - continue; - } - - stagedCount++; - if (outcome.proposalId) { - // Propose-policy: human must approve. Exit the loop after - // this round so we don't stage proposals for hypothetical - // follow-up steps that depend on the approval outcome. - humanInLoop = true; - recordedSteps.push({ - id: stepId, - summary: ps.summary, - intent: { kind: 'toolCall', toolName: ps.toolName, params: ps.params }, - proposalId: outcome.proposalId, - status: 'staged', - }); - } else { - // Auto-policy: ran inline. Collect output for the next - // planner call. - recordedSteps.push({ - id: stepId, - summary: ps.summary, - intent: { kind: 'toolCall', toolName: ps.toolName, params: ps.params }, - status: 'approved', - }); - roundOutputs.push({ - step: ps, - message: outcome.autoMessage ?? '(ohne message)', - data: outcome.autoData, - }); - } - } - - // Log loop outputs for debug-panel visibility. - for (const o of roundOutputs) { - loopStepLog.push({ - loopIndex, - toolName: o.step.toolName, - params: o.step.params, - outputPreview: formatToolOutputPreview(o.message, o.data), - }); - } - - if (humanInLoop) break; - if (roundOutputs.length === 0) { - // Every step either failed or was proposed — nothing new to - // reason over. Prevents an infinite loop when the planner - // only suggests proposable tools that keep failing. - break; - } - - // Feed tool outputs into the next planner call as a synthetic - // ResolvedInput so the agent can chain its reasoning. - loopInputs.push({ - id: `loop-outputs-${loopIndex}`, - module: 'reasoning-loop', - table: 'tool-outputs', - title: `Zwischenergebnisse (Runde ${loopIndex + 1})`, - content: formatToolOutputsForPrompt(roundOutputs), - }); - - loopIndex++; + // Pre-plan guardrail (kept — catches prompt-injection in resolved inputs etc.). + const prePlanCheck = runPrePlanGuardrails({ + mission: mission!, + resolvedInputs, + availableTools, + }); + if (!prePlanCheck.passed) { + throw new Error(`Guardrail blocked: ${prePlanCheck.blockReason}`); } + // ── Phase: calling-llm / reasoning loop ──────────────── + await enterPhase('calling-llm', 'Planner…'); + const { systemPrompt, userPrompt } = buildSystemPrompt({ + mission: mission!, + resolvedInputs, + agentSystemPrompt: owningAgent?.systemPrompt ?? null, + agentMemory: owningAgent?.memory ?? null, + }); + + const loopResult = await runPlannerLoop({ + llm: deps.llm, + input: { + systemPrompt, + userPrompt, + tools: availableTools, + model: deps.model ?? 'google/gemini-2.5-flash', + maxRounds: MAX_PLANNER_ROUNDS, + }, + onToolCall: async (call: ToolCallRequest): Promise => { + await checkCancel(); + await enterPhase('staging-proposals', call.name); + + // Pre-execute guardrail per call. Failures come back as + // tool-messages so the LLM can choose a different path. + const execCheck = runPreExecuteGuardrails({ + summary: call.name, + toolName: call.name, + params: call.arguments, + rationale: mission!.objective, + }); + if (!execCheck.passed) { + return { + success: false, + message: `Guardrail: ${execCheck.blockReason}`, + }; + } + + try { + return await runToolCall(call.name, call.arguments, aiActor); + } catch (err) { + const msg = err instanceof Error ? err.message : String(err); + console.error(`[MissionRunner] tool ${call.name} threw:`, err); + return { success: false, message: `Tool execution failed: ${msg}` }; + } + }, + }); + + await checkCancel(); + + // Build the persisted plan from the loop's executed calls. + const recordedSteps: PlanStep[] = loopResult.executedCalls.map((ec, i) => ({ + id: `${iterationId}-${i}`, + summary: renderStepSummary(ec.call, ec.result), + intent: { + kind: 'toolCall', + toolName: ec.call.name, + params: ec.call.arguments, + }, + status: ec.result.success ? 'approved' : 'failed', + })); + if (isAiDebugEnabled()) { void recordAiDebug({ iterationId, @@ -497,25 +317,33 @@ async function runMissionInner( missionTitle: mission!.title, missionObjective: mission!.objective, capturedAt: new Date().toISOString(), - resolvedInputs: loopInputs, + resolvedInputs, preStep, - plannerCalls, - loopSteps: loopStepLog, + rounds: loopResult.rounds, + stopReason: loopResult.stopReason, + messages: loopResult.messages as ChatMessage[], }); } await enterPhase('finalizing'); + + const failedCount = recordedSteps.filter((s) => s.status === 'failed').length; + const planSummary = + loopResult.summary ?? + (recordedSteps.length === 0 + ? 'Keine Tool-Aufrufe — Mission hat nichts zu tun' + : `${recordedSteps.length} Tool-Aufrufe ausgeführt (${failedCount} Fehler).`); + return { recordedSteps, - stagedCount, + stagedCount: recordedSteps.length, failedCount, - planSummary: lastPlanSummary, - planStepCount: totalStepCount, + planSummary, + planStepCount: recordedSteps.length, }; } let recordedSteps: PlanStep[] = []; - let stagedCount = 0; let failedCount = 0; let planSummary = ''; let planStepCount = 0; @@ -525,7 +353,6 @@ async function runMissionInner( timeoutPromise, ]); recordedSteps = result.recordedSteps; - stagedCount = result.stagedCount; failedCount = result.failedCount; planSummary = result.planSummary; planStepCount = result.planStepCount; @@ -545,14 +372,10 @@ async function runMissionInner( return emptyResult(mission, iterationId, 'failed', msg); } + // Status: everything executed → 'approved'. Some failures but not all → still 'approved' + // (the user can revert). Only wholesale failure or zero progress is 'failed'. const overallStatus: MissionIteration['overallStatus'] = - planStepCount === 0 - ? 'approved' // nothing to do is a valid outcome - : failedCount === planStepCount - ? 'failed' - : stagedCount > 0 - ? 'awaiting-review' - : 'approved'; + planStepCount === 0 ? 'approved' : failedCount === planStepCount ? 'failed' : 'approved'; await finishIteration(mission.id, iterationId, { summary: planSummary, @@ -569,11 +392,20 @@ async function runMissionInner( overallStatus, }, plannedSteps: planStepCount, - stagedSteps: stagedCount, + stagedSteps: planStepCount, failedSteps: failedCount, }; } +// ─── Helpers ──────────────────────────────────────────────────────── + +function renderStepSummary(call: ToolCallRequest, result: ToolResult): string { + if (!result.success) { + return `${call.name} (FEHLER: ${result.message.slice(0, 120)})`; + } + return result.message || call.name; +} + function emptyResult( _mission: Mission, iterationId: string, @@ -594,92 +426,6 @@ function emptyResult( }; } -/** Read the kontext singleton + decrypt; returns null if empty/missing. */ -async function loadKontextAsResolvedInput(): Promise { - try { - const local = await db - .table<{ id: string; content?: string; deletedAt?: string }>('kontextDoc') - .get(KONTEXT_SINGLETON_ID); - if (!local || local.deletedAt) return null; - const [decrypted] = await decryptRecords('kontextDoc', [local]); - const content = decrypted?.content?.trim(); - if (!content) return null; - return { - id: KONTEXT_SINGLETON_ID, - module: 'kontext', - table: 'kontextDoc', - title: 'Kontext (Standing)', - content, - }; - } catch (err) { - console.warn('[MissionRunner] kontext auto-inject failed:', err); - return null; - } -} - -/** Load the agent-specific kontext doc. Falls back to null (caller - * may then fall back to the global singleton if desired). */ -/** Load the agent-specific kontext doc. Returns null when the agent - * has no dedicated doc (does NOT fall back to the global singleton — - * kontext injection is explicit via the input picker, not auto). */ -async function loadAgentKontextAsResolvedInput(agentId: string): Promise { - try { - const doc = await getAgentKontext(agentId); - if (!doc) return null; - return { - id: doc.id, - module: 'kontext', - table: 'agentKontextDocs', - title: 'Agent-Kontext', - content: doc.content, - }; - } catch (err) { - console.warn('[MissionRunner] agent kontext load failed:', err); - return null; - } -} - -/** Run the deep-research pipeline against the mission objective and - * collapse its summary + sources into one ResolvedInput formatted so - * the planner can copy URLs into save_news_article calls. */ -/** Stringify a tool-output payload for the reasoning loop's next - * prompt. Keeps the blob compact — LLM context windows are finite and - * a raw JSON.stringify of a 200-row Dexie dump wastes tokens. */ -function formatToolOutputsForPrompt( - outputs: Array<{ step: PlannedStep; message: string; data: unknown }> -): string { - const lines: string[] = [ - 'Ausgaben der zuletzt ausgeführten Auto-Tools. Nutze diese Daten um die Mission weiterzuführen — z.B. für jede gelistete Notiz einen add_tag_to_note Aufruf pro Notiz.', - '', - ]; - for (const o of outputs) { - lines.push(`### ${o.step.toolName}(${JSON.stringify(o.step.params)})`); - lines.push(o.message); - if (o.data !== undefined && o.data !== null) { - const json = safeStringify(o.data, 4000); - lines.push('```json', json, '```'); - } - lines.push(''); - } - return lines.join('\n'); -} - -/** Short form for the debug-panel loopSteps log. */ -function formatToolOutputPreview(message: string, data: unknown): string { - if (data === undefined || data === null) return message; - const json = safeStringify(data, 400); - return `${message}\n${json}`; -} - -function safeStringify(value: unknown, limit: number): string { - try { - const s = JSON.stringify(value, null, 2); - return s.length > limit ? s.slice(0, limit) + '\n… (truncated)' : s; - } catch { - return String(value); - } -} - interface WebResearchOutcome { input: ResolvedInput; sourceCount: number; @@ -689,9 +435,6 @@ interface WebResearchOutcome { async function runWebResearch(mission: Mission): Promise { // RSS-based news research via news-research module: discoverByQuery // finds matching feeds, searchFeeds ranks recent articles by relevance. - // Robust (own infra, no external SearXNG dependency), free (no credits), - // and the documented happy-path for the AI companion's news flow. - // Detect language hint from objective: German chars/words → de, else en. const objective = mission.objective; const isGerman = /[äöüß]|recherchier|aktuelle|neueste|finde|suche/i.test(objective); const language = isGerman ? 'de' : 'en'; @@ -699,9 +442,6 @@ async function runWebResearch(mission: Mission): Promise f.url); if (feedUrls.length === 0) { - // No feeds discovered — surface as failure so the planner doesn't - // pretend it has data. Caller wraps this in a "research failed" - // ResolvedInput. throw new Error( `news-research: keine RSS-Feeds für "${objective}" gefunden (${discovered.searched ?? 0} Quellen abgesucht).` ); @@ -732,8 +472,6 @@ async function runWebResearch(mission: Mission): Promise { - const { listMissions } = await import('./store'); - const due = await listMissions({ dueBefore: now.toISOString() }); - const results: RunMissionResult[] = []; - for (const m of due) { - try { - results.push(await runMission(m.id, deps)); - } catch (err) { - console.error(`[MissionRunner] mission ${m.id} run threw:`, err); - } - } - return results; -} diff --git a/apps/mana/apps/web/src/lib/data/ai/missions/setup.ts b/apps/mana/apps/web/src/lib/data/ai/missions/setup.ts index a055d74ee..43e20b45f 100644 --- a/apps/mana/apps/web/src/lib/data/ai/missions/setup.ts +++ b/apps/mana/apps/web/src/lib/data/ai/missions/setup.ts @@ -1,8 +1,8 @@ /** * Production wiring for the Mission Runner. * - * Connects the dependency-injected `runMission` to the real LlmOrchestrator - * (via `aiPlanTask`) and drives `runDueMissions` on a foreground interval. + * Connects the dependency-injected runner to the real mana-llm client + * and drives `runDueMissions` on a foreground interval. * * Use pattern: * @@ -10,14 +10,12 @@ * import { startMissionTick } from '$lib/data/ai/missions/setup'; * onMount(() => startMissionTick()); * - * The tick is intentionally foreground-only: the Runner requires the - * LlmOrchestrator which needs WebGPU / network. A background service for - * offline-of-tab execution is tracked as Phase 7 — see - * COMPANION_BRAIN_ARCHITECTURE.md §20.5. + * The tick is intentionally foreground-only for now — a background + * service worker for offline-of-tab execution is tracked as Phase 7; + * see COMPANION_BRAIN_ARCHITECTURE.md §20.5. */ -import { llmOrchestrator } from '@mana/shared-llm'; -import { aiPlanTask } from '$lib/llm-tasks/ai-plan'; +import { createManaLlmClient } from './llm-client'; import { runDueMissions, type MissionRunnerDeps } from './runner'; import { registerDefaultInputResolvers } from './default-resolvers'; import { runAgentsBootstrap } from '../agents/bootstrap'; @@ -29,20 +27,13 @@ import { runAgentsBootstrap } from '../agents/bootstrap'; import '$lib/modules/meditate/seed'; import '$lib/modules/habits/seed'; import '$lib/companion/goals/seed'; -import type { AiPlanInput, AiPlanOutput } from './planner/types'; /** Default interval between tick scans. One minute is fine for foreground use. */ const DEFAULT_TICK_INTERVAL_MS = 60_000; -/** Swap-in planner that routes through the real LLM orchestrator. */ -const productionPlan = async (input: AiPlanInput): Promise => { - const result = await llmOrchestrator.run(aiPlanTask, input); - return result.value; -}; - export const productionDeps: MissionRunnerDeps = { - plan: productionPlan, - // stageStep defaults to the policy-gated executor — nothing to override here. + llm: createManaLlmClient(), + // model + executeTool defaults handled inside the runner. }; let tickHandle: ReturnType | null = null; diff --git a/apps/mana/apps/web/src/lib/data/tools/executor.ts b/apps/mana/apps/web/src/lib/data/tools/executor.ts index f9cbd0345..d2121c421 100644 --- a/apps/mana/apps/web/src/lib/data/tools/executor.ts +++ b/apps/mana/apps/web/src/lib/data/tools/executor.ts @@ -1,22 +1,21 @@ /** - * Tool Executor — validates parameters, resolves AI policy, and runs or - * stages the tool by name. + * Tool Executor — validates parameters, resolves AI policy, runs the tool. * - * Call paths: - * - User action from the UI: `executeTool(name, params)` with no actor - * → ambient `USER_ACTOR`, policy returns `auto`, tool runs directly. - * - AI in the companion orchestrator: `executeTool(name, params, aiActor)` - * → policy resolves per-tool; `propose` writes a Proposal and returns - * a success result carrying the proposal id, `auto` executes, `deny` - * refuses. - * - Approval path: proposal store calls `executeToolRaw(name, params)` - * under `runAsAsync(aiActor, ...)` — same validation, but no policy. + * Policy semantics post-migration to native function-calling: + * - `auto` — execute directly under the actor's scope + * - `deny` — refuse with a ToolResult error (the runner turns this into + * a tool-message the LLM can react to) + * + * There is no proposal/approval gate in this pipeline anymore; the + * Workbench Timeline plus per-iteration Revert is the user's review + * surface. Tools flagged as `propose` in the catalog are treated as + * `auto` here — the distinction only matters as legacy metadata that + * higher layers (UI, analytics) may still read. */ import { getTool } from './registry'; import { runAsAsync, USER_ACTOR } from '../events/actor'; import { resolvePolicy } from '../ai/policy'; -import { createProposal } from '../ai/proposals/store'; import { getAgent } from '../ai/agents/store'; import type { Actor } from '../events/actor'; import type { AiPolicy } from '@mana/shared-ai'; @@ -37,11 +36,9 @@ export async function executeTool( const effectiveActor: Actor = actor ?? USER_ACTOR; - // Multi-Agent Workbench (Phase 4): policy lives on the agent. When - // the actor is AI, look up the owning agent and use its policy. If - // the agent record is missing (legacy write, deleted agent, race), - // resolvePolicy falls back to the user-level DEFAULT_AI_POLICY via - // its optional-argument default. + // Agent-scoped policy: the AI actor may have a per-agent policy + // override. If the agent record is missing (deleted / legacy / + // race), resolvePolicy falls back to the user-level default. let agentPolicy: AiPolicy | undefined; if (effectiveActor.kind === 'ai') { const agent = await getAgent(effectiveActor.principalId); @@ -56,25 +53,7 @@ export async function executeTool( }; } - if (decision === 'propose') { - // Only ai actors can hit `propose` — resolvePolicy short-circuits - // user/system to `auto`. Narrow defensively in case policy is swapped. - if (effectiveActor.kind !== 'ai') { - return { success: false, message: `propose policy requires an AI actor` }; - } - const proposal = await createProposal({ - actor: effectiveActor, - intent: { kind: 'toolCall', toolName: name, params }, - rationale: effectiveActor.rationale, - }); - return { - success: true, - data: { proposalId: proposal.id, status: 'pending' }, - message: `Vorgeschlagen: "${name}" wartet auf Freigabe.`, - }; - } - - // decision === 'auto' + // `auto` or `propose` both execute here — see file-level comment. return runAsAsync(effectiveActor, () => runValidatedTool(tool, params)); }