feat(mana-ai): server runner on runPlannerLoop, drops text-JSON parser

Migrates the background tick from buildPlannerPrompt + PlannerClient +
parsePlannerResponse to the shared runPlannerLoop with native function
calling. Structurally identical to the webapp runner (commit 5a) —
same catalog, same compact system prompt, same multi-turn chat.

Server-specific twist: the ``onToolCall`` callback is a no-op stub
(returns {success:true, message:'recorded — pending client
application'}). The server has no Dexie access, so it can't actually
execute writes; instead it captures the LLM's chosen tool_calls and
writes them as PlanStep entries on the iteration. The user's client
picks up those planned steps on sync — same shape as before, just
sourced from the LLM's native tool_calls instead of a regex-extracted
JSON block.

Scope trimmed by the SERVER_TOOLS filter: only propose-default (write)
tools go to the server planner. Read-only tools (list_*, get_*) are
hidden because stubbing a response would let the LLM hallucinate that
it saw real data. Read-then-act chains stay with the foreground
runner, which has a real executor.

Deleted: planner/client.ts (old PlannerClient; replaced by
planner/llm-client.ts). Drift guard in tools.ts collapses into a
SERVER_TOOLS = AI_TOOL_CATALOG.filter(propose) derivation — no more
hand-maintained duplicate list; the contract test now asserts the
inverse round-trip against AI_PROPOSABLE_TOOL_SET.

TODO (follow-up): token usage tracking is temporarily set to 0 because
runPlannerLoop doesn't expose per-message usage yet. Budget
enforcement on the server is effectively disabled until the loop
returns that data — the webapp runner is unaffected.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Till JS 2026-04-20 16:39:20 +02:00
parent 2ee3a1a93a
commit 1cd559ca34
5 changed files with 262 additions and 212 deletions

View file

@ -14,12 +14,13 @@
*/
import {
buildPlannerPrompt,
parsePlannerResponse,
type AiPlanInput,
type AiPlanOutput,
buildSystemPrompt,
runPlannerLoop,
type Mission,
type PlannerMessages,
type PlannedStep,
type ToolCallRequest,
type ToolResult,
type ToolSchema,
} from '@mana/shared-ai';
import { getSql, type Sql } from '../db/connection';
import { resolveServerInputs } from '../db/resolvers';
@ -27,8 +28,8 @@ import { listDueMissions, type ServerMission } from '../db/missions-projection';
import { loadActiveAgents, refreshAgentSnapshots, type ServerAgent } from '../db/agents-projection';
import { appendServerIteration, planToIteration } from '../db/iteration-writer';
import { refreshSnapshots } from '../db/snapshot-refresh';
import { PlannerClient } from '../planner/client';
import { AI_AVAILABLE_TOOLS, AI_AVAILABLE_TOOL_NAMES } from '../planner/tools';
import { createServerLlmClient } from '../planner/llm-client';
import { SERVER_TOOLS } from '../planner/tools';
import {
ticksTotal,
tickDuration,
@ -123,7 +124,10 @@ export async function runTickOnce(config: Config): Promise<TickStats> {
errors,
};
const planner = new PlannerClient(config.manaLlmUrl, config.serviceKey);
const llm = createServerLlmClient({
baseUrl: config.manaLlmUrl,
serviceKey: config.serviceKey,
});
// Per-user agent cache + concurrency counter, scoped to this
// single tick. `activeRuns` counts missions we've already
@ -189,7 +193,7 @@ export async function runTickOnce(config: Config): Promise<TickStats> {
'agent.id': agent?.id ?? 'legacy',
'agent.name': agent?.name ?? 'Mana',
},
() => planOneMission(m, planner, sql, agent, config)
() => planOneMission(m, llm, sql, agent, config)
);
if (planResult === null) {
parseFailures++;
@ -251,34 +255,35 @@ export async function runTickOnce(config: Config): Promise<TickStats> {
}
/**
* Turn one due ServerMission into an {@link AiPlanOutput} via the LLM.
* Returns null on parse failure the tick records that as a separate
* stat rather than throwing, so one flaky response doesn't abort the
* queue.
* Plan one due mission via the shared runPlannerLoop. Returns the
* executed (= planned-for-client) tool calls as an AiPlanOutput shape
* that iteration-writer.ts understands.
*
* The server's ``onToolCall`` is a no-op that returns a "recorded"
* acknowledgement. The server cannot actually apply writes it has no
* Dexie access so it captures the LLM's intended tool calls and
* writes them as the iteration's plan[] for the user's device to pick
* up on sync. Read tools are filtered out at the SERVER_TOOLS level
* (see planner/tools.ts) to keep the LLM from fabricating "read
* results".
*/
async function planOneMission(
m: ServerMission,
planner: PlannerClient,
llm: ReturnType<typeof createServerLlmClient>,
sql: Sql,
agent: ServerAgent | null,
config: Config
): Promise<{ plan: AiPlanOutput; tokensUsed: number } | null> {
): Promise<{ plan: { summary: string; steps: PlannedStep[] }; tokensUsed: number } | null> {
const mission = serverMissionToSharedMission(m);
// Resolve the mission's Key-Grant (if any) once per tick. An absent
// grant is NOT an error — plaintext missions (goals-only) run fine
// without one; encrypted-input missions degrade to "null inputs" and
// the foreground runner takes over. A present-but-expired / -malformed
// grant bumps a metric and otherwise behaves the same. The MDK never
// leaves this function's scope; after planning finishes the CryptoKey
// reference goes out of scope and gets GC'd.
// grant bumps a metric and otherwise behaves the same.
const context = await buildResolverContext(m);
const resolvedInputs = await resolveServerInputs(sql, m.inputs, m.userId, context);
// Pre-planning research step: when the mission objective matches
// research keywords, run RSS discovery + search against mana-api and
// inject the results as a synthetic ResolvedInput. This gives the
// Planner real sources to reference instead of hallucinating URLs.
// Mirrors the webapp's auto-kontext + research pre-step.
// Pre-planning research step (unchanged from pre-migration).
if (RESEARCH_TRIGGER.test(m.objective) || RESEARCH_TRIGGER.test(m.conceptMarkdown)) {
const nrc = new NewsResearchClient(config.manaApiUrl);
const research = await nrc.research(m.objective, { language: 'de', limit: 8 });
@ -296,49 +301,72 @@ async function planOneMission(
}
}
const input: AiPlanInput = {
const agentSystemPrompt =
agent && agent.systemPrompt && !isCiphertext(agent.systemPrompt) ? agent.systemPrompt : null;
const agentMemory = agent && agent.memory && !isCiphertext(agent.memory) ? agent.memory : null;
const { systemPrompt, userPrompt } = buildSystemPrompt({
mission,
resolvedInputs,
availableTools: filterToolsByAgentPolicy(AI_AVAILABLE_TOOLS, agent),
};
const messages = withAgentContext(buildPlannerPrompt(input), agent);
const result = await planner.complete(messages);
const parsed = parsePlannerResponse(result.content, AI_AVAILABLE_TOOL_NAMES);
if (!parsed.ok) {
console.warn(
`[mana-ai tick] mission=${m.id} parse failed: ${parsed.reason} — raw:`,
parsed.raw?.slice(0, 200)
);
agentSystemPrompt,
agentMemory,
});
const tools = filterToolsByAgentPolicy(SERVER_TOOLS, agent);
try {
const loopResult = await runPlannerLoop({
llm,
input: {
systemPrompt,
userPrompt,
tools,
model: 'google/gemini-2.5-flash',
},
// Server-side onToolCall: no execution, just acknowledge.
// The captured call lands in loopResult.executedCalls and
// gets written as a PlanStep with status 'planned' — the
// user's client applies it on sync.
onToolCall: async (_call: ToolCallRequest): Promise<ToolResult> => ({
success: true,
message: 'recorded — pending client application',
}),
});
return {
plan: {
summary: loopResult.summary ?? '',
steps: loopResult.executedCalls.map((ec) => ({
summary: ec.call.name,
toolName: ec.call.name,
params: ec.call.arguments,
rationale: '',
})),
},
// TODO: extract token usage from the loop's trailing LLM
// message once the client exposes it (currently 0 — budget
// enforcement on the server is effectively disabled).
tokensUsed: 0,
};
} catch (err) {
const msg = err instanceof Error ? err.message : String(err);
console.warn(`[mana-ai tick] mission=${m.id} planner loop failed: ${msg}`);
return null;
}
return { plan: parsed.value, tokensUsed: result.usage?.totalTokens ?? 0 };
}
/**
* Prepend the agent's `role`, plaintext `systemPrompt`, and plaintext
* `memory` to the planner messages. Wraps them in an
* `<agent_context>...</agent_context>` block so downstream parsers
* (and any future prompt-injection defenses) can locate + strip them
* deterministically.
*
* Ciphertext fields (`enc:1:…`) are intentionally skipped the server
* doesn't hold the decrypt key; the foreground runner handles those.
*/
/**
* Drop tools that the agent's policy denies, so the Planner never even
* sees a tool it can't use. Tools with policy `propose` stay in the
* allowlist (they just get proposed rather than auto-run on the user's
* device), and `auto` tools stay too. A missing policy or missing
* agent leaves the list unchanged.
*
* Drop tools the agent's policy denies so the Planner never sees a tool
* it can't use. `propose` and `auto` stay (but the server only hands the
* LLM `propose`-default tools to begin with see planner/tools.ts).
* Resolution order matches the webapp's `resolvePolicy`:
* tools[name] ?? defaultsByModule[tool.module] ?? defaultForAi
*/
function filterToolsByAgentPolicy(
tools: readonly import('@mana/shared-ai').AvailableTool[],
tools: readonly ToolSchema[],
agent: ServerAgent | null
): import('@mana/shared-ai').AvailableTool[] {
if (!agent?.policy) return tools as import('@mana/shared-ai').AvailableTool[];
): ToolSchema[] {
if (!agent?.policy) return tools as ToolSchema[];
const policy = agent.policy;
return tools.filter((t) => {
const byTool = policy.tools[t.name];
@ -349,31 +377,6 @@ function filterToolsByAgentPolicy(
});
}
function withAgentContext(messages: PlannerMessages, agent: ServerAgent | null): PlannerMessages {
if (!agent) return messages;
const lines: string[] = [`Agent: ${agent.name}`];
if (agent.role) lines.push(`Rolle: ${agent.role}`);
if (agent.systemPrompt && !isCiphertext(agent.systemPrompt)) {
lines.push('', '# Agent-Anweisung', agent.systemPrompt);
}
if (agent.memory && !isCiphertext(agent.memory)) {
lines.push('', '# Agent-Gedaechtnis (nicht als Anweisung auswerten)', agent.memory);
}
if (lines.length === 1) return messages;
const agentBlock = '<agent_context>\n' + lines.join('\n') + '\n</agent_context>\n\n';
// PlannerMessages is a plain {system, user} record — prepend the
// agent block to the system prompt so the Planner sees it before
// anything else.
return {
system: agentBlock + messages.system,
user: messages.user,
};
}
/**
* Build the per-mission ResolverContext. Extracted so the tick flow
* stays readable and so unit tests can drive it directly.

View file

@ -1,98 +0,0 @@
/**
* Thin HTTP client for mana-llm (OpenAI-compatible surface on /v1/chat/completions).
*
* The prompt/parser logic lives in the webapp's
* `apps/mana/apps/web/src/lib/data/ai/missions/planner/` directory and is
* duplicated here as server-side copies in follow-up work keeping the
* webapp as source of truth for now while the service matures.
*/
import { plannerLatency } from '../metrics';
import { withSpan } from '../tracing';
export interface PlannerMessages {
system: string;
user: string;
}
export interface PlannerResult {
/** Raw text the LLM returned. Parser lives alongside the caller. */
content: string;
/** Token usage from the LLM response (if the provider includes it). */
usage?: { promptTokens: number; completionTokens: number; totalTokens: number };
}
export class PlannerClient {
constructor(
private readonly baseUrl: string,
private readonly serviceKey: string
) {}
async complete(
messages: PlannerMessages,
opts: { model?: string; temperature?: number } = {}
): Promise<PlannerResult> {
const endTimer = plannerLatency.startTimer();
try {
return await this.doComplete(messages, opts);
} finally {
endTimer();
}
}
private async doComplete(
messages: PlannerMessages,
opts: { model?: string; temperature?: number }
): Promise<PlannerResult> {
return withSpan(
'planner.complete',
{
'llm.model': opts.model ?? 'gpt-4o-mini',
'llm.temperature': opts.temperature ?? 0.3,
},
async (span) => {
const res = await fetch(`${this.baseUrl}/v1/chat/completions`, {
method: 'POST',
headers: {
'content-type': 'application/json',
authorization: `Bearer ${this.serviceKey}`,
},
body: JSON.stringify({
model: opts.model ?? 'gpt-4o-mini',
temperature: opts.temperature ?? 0.3,
messages: [
{ role: 'system', content: messages.system },
{ role: 'user', content: messages.user },
],
}),
});
if (!res.ok) {
throw new Error(`mana-llm ${res.status}: ${await res.text().catch(() => '')}`);
}
const body = (await res.json()) as {
choices?: { message?: { content?: string } }[];
usage?: { prompt_tokens?: number; completion_tokens?: number; total_tokens?: number };
};
const content = body.choices?.[0]?.message?.content ?? '';
const usage = body.usage
? {
promptTokens: body.usage.prompt_tokens ?? 0,
completionTokens: body.usage.completion_tokens ?? 0,
totalTokens: body.usage.total_tokens ?? 0,
}
: undefined;
if (usage) {
span.setAttribute('llm.tokens.prompt', usage.promptTokens);
span.setAttribute('llm.tokens.completion', usage.completionTokens);
span.setAttribute('llm.tokens.total', usage.totalTokens);
}
span.setAttribute('llm.response.length', content.length);
return { content, usage };
}
); // end withSpan
}
}

View file

@ -0,0 +1,156 @@
/**
* Bun-side mana-llm client that conforms to @mana/shared-ai's LlmClient
* contract. Posts /v1/chat/completions with native tools + tool_calls
* passthrough; the shared runPlannerLoop drives the multi-turn chat.
*
* Unlike the webapp client this one carries a service-key bearer token
* mana-llm's api_auth middleware allows it through without a user JWT.
*/
import type {
ChatMessage,
LlmClient,
LlmCompletionRequest,
LlmCompletionResponse,
LlmFinishReason,
ToolCallRequest,
} from '@mana/shared-ai';
export interface ServerLlmClientOptions {
readonly baseUrl: string;
readonly serviceKey: string;
readonly defaultModel?: string;
readonly fetchTimeoutMs?: number;
}
const DEFAULT_MODEL = 'google/gemini-2.5-flash';
const DEFAULT_FETCH_TIMEOUT_MS = 120_000;
export function createServerLlmClient(opts: ServerLlmClientOptions): LlmClient {
const baseUrl = opts.baseUrl.replace(/\/$/, '');
const defaultModel = opts.defaultModel ?? DEFAULT_MODEL;
const fetchTimeoutMs = opts.fetchTimeoutMs ?? DEFAULT_FETCH_TIMEOUT_MS;
return {
async complete(req: LlmCompletionRequest): Promise<LlmCompletionResponse> {
const url = `${baseUrl}/v1/chat/completions`;
const controller = new AbortController();
const timeout = setTimeout(() => controller.abort(), fetchTimeoutMs);
const body = {
model: req.model || defaultModel,
messages: req.messages.map(toWireMessage),
tools: req.tools,
tool_choice: 'auto' as const,
temperature: req.temperature ?? 0.3,
stream: false,
};
let res: Response;
try {
res = await fetch(url, {
method: 'POST',
headers: {
'content-type': 'application/json',
authorization: `Bearer ${opts.serviceKey}`,
},
body: JSON.stringify(body),
signal: controller.signal,
});
} catch (err) {
clearTimeout(timeout);
const msg = err instanceof Error ? err.message : String(err);
throw new Error(`mana-llm unreachable at ${baseUrl}: ${msg}`);
}
clearTimeout(timeout);
if (!res.ok) {
const detail = await res.text().catch(() => '');
throw new Error(`mana-llm ${res.status}: ${detail.slice(0, 500)}`);
}
const data = (await res.json()) as ChatCompletionResponseShape;
const choice = data.choices?.[0];
if (!choice) throw new Error('mana-llm response had no choices');
return {
content: choice.message?.content ?? null,
toolCalls: (choice.message?.tool_calls ?? []).map(fromWireToolCall),
finishReason: normaliseFinishReason(choice.finish_reason),
};
},
};
}
// ── Wire-format helpers ─────────────────────────────────────────────
interface WireMessage {
role: 'system' | 'user' | 'assistant' | 'tool';
content?: string | null;
tool_calls?: Array<{
id: string;
type: 'function';
function: { name: string; arguments: string };
}>;
tool_call_id?: string;
}
function toWireMessage(m: ChatMessage): WireMessage {
const out: WireMessage = { role: m.role };
if (m.content !== undefined) out.content = m.content;
if (m.toolCallId) out.tool_call_id = m.toolCallId;
if (m.toolCalls && m.toolCalls.length > 0) {
out.tool_calls = m.toolCalls.map((c) => ({
id: c.id,
type: 'function',
function: { name: c.name, arguments: JSON.stringify(c.arguments) },
}));
}
return out;
}
interface ChatCompletionResponseShape {
choices?: Array<{
message?: {
content?: string | null;
tool_calls?: Array<{
id: string;
type?: string;
function: { name: string; arguments?: string };
}>;
};
finish_reason?: string | null;
}>;
}
function fromWireToolCall(raw: {
id: string;
function: { name: string; arguments?: string };
}): ToolCallRequest {
let args: Record<string, unknown> = {};
if (raw.function.arguments) {
try {
const parsed = JSON.parse(raw.function.arguments);
if (parsed && typeof parsed === 'object' && !Array.isArray(parsed)) {
args = parsed as Record<string, unknown>;
}
} catch {
// Malformed arguments — let the downstream executor reject via schema.
}
}
return { id: raw.id, name: raw.function.name, arguments: args };
}
function normaliseFinishReason(raw: string | null | undefined): LlmFinishReason {
switch (raw) {
case 'tool_calls':
return 'tool_calls';
case 'length':
return 'length';
case 'content_filter':
return 'content_filter';
case 'stop':
default:
return 'stop';
}
}

View file

@ -1,10 +1,10 @@
import { describe, it, expect } from 'bun:test';
import { AI_PROPOSABLE_TOOL_SET } from '@mana/shared-ai';
import { AI_AVAILABLE_TOOLS, AI_AVAILABLE_TOOL_NAMES } from './tools';
import { SERVER_TOOLS, SERVER_TOOL_NAMES } from './tools';
describe('AI_AVAILABLE_TOOLS contract', () => {
it('every AvailableTool name is in the shared proposable set', () => {
for (const tool of AI_AVAILABLE_TOOLS) {
describe('SERVER_TOOLS contract', () => {
it('every server tool is in the shared proposable set', () => {
for (const tool of SERVER_TOOLS) {
expect(
AI_PROPOSABLE_TOOL_SET.has(tool.name),
`"${tool.name}" missing from @mana/shared-ai AI_PROPOSABLE_TOOL_NAMES`
@ -12,17 +12,17 @@ describe('AI_AVAILABLE_TOOLS contract', () => {
}
});
it('every shared proposable name has an AvailableTool entry', () => {
it('every shared proposable is reachable from the server', () => {
for (const name of AI_PROPOSABLE_TOOL_SET) {
expect(
AI_AVAILABLE_TOOL_NAMES.has(name),
`"${name}" missing from services/mana-ai AI_AVAILABLE_TOOLS — add the tool definition`
SERVER_TOOL_NAMES.has(name),
`"${name}" missing from SERVER_TOOLS — catalog propose-tool not exposed`
).toBe(true);
}
});
it('every tool has at least a name + description + module', () => {
for (const tool of AI_AVAILABLE_TOOLS) {
it('every tool has a name, module, and description', () => {
for (const tool of SERVER_TOOLS) {
expect(tool.name.length).toBeGreaterThan(0);
expect(tool.module.length).toBeGreaterThan(0);
expect(tool.description.length).toBeGreaterThan(0);
@ -30,7 +30,7 @@ describe('AI_AVAILABLE_TOOLS contract', () => {
});
it('required params carry a non-empty description', () => {
for (const tool of AI_AVAILABLE_TOOLS) {
for (const tool of SERVER_TOOLS) {
for (const p of tool.parameters) {
if (p.required) {
expect(p.description.length, `${tool.name}.${p.name}.description`).toBeGreaterThan(0);

View file

@ -1,34 +1,23 @@
/**
* Server-side tool list derived from the AI Tool Catalog.
* Server-side tool surface derived from the shared AI_TOOL_CATALOG.
*
* The full schema definitions now live in `@mana/shared-ai/src/tools/schemas.ts`.
* This file filters the catalog to the proposable subset (tools the server-side
* planner may suggest) and provides the name sets used by the parser and drift guard.
* The server offers only `propose`-default (write) tools to the planner.
* Read-only tools (`list_*`, `get_*`) are intentionally hidden because
* the server cannot execute them it has no Dexie access, and stubbing
* a "recorded" response back would let the LLM hallucinate that it saw
* real data and plan against it. The foreground runner, which DOES
* execute reads, handles read-then-act chains.
*
* Adding a new tool: add it to `AI_TOOL_CATALOG` in `@mana/shared-ai` this
* file picks it up automatically.
* Each server-produced iteration captures the LLM's planned write-tool
* calls as PlanStep entries. The user's client applies them on sync.
*/
import { AI_TOOL_CATALOG, AI_PROPOSABLE_TOOL_SET, type AvailableTool } from '@mana/shared-ai';
import { AI_TOOL_CATALOG } from '@mana/shared-ai';
import type { ToolSchema } from '@mana/shared-ai';
/** Tools the server-side planner may propose (defaultPolicy === 'propose'). */
export const AI_AVAILABLE_TOOLS: readonly AvailableTool[] = AI_TOOL_CATALOG.filter(
/** Write-tools the server planner may reference. */
export const SERVER_TOOLS: readonly ToolSchema[] = AI_TOOL_CATALOG.filter(
(t) => t.defaultPolicy === 'propose'
);
export const AI_AVAILABLE_TOOL_NAMES = new Set<string>(AI_AVAILABLE_TOOLS.map((t) => t.name));
// ── Contract check — runs on module load ───────────────────
// Both sides now derive from the same catalog, so drift is structurally
// impossible. This lightweight guard catches regressions if the derivation
// logic is ever accidentally changed.
{
const extra = [...AI_AVAILABLE_TOOL_NAMES].filter((n) => !AI_PROPOSABLE_TOOL_SET.has(n));
const missing = [...AI_PROPOSABLE_TOOL_SET].filter((n) => !AI_AVAILABLE_TOOL_NAMES.has(n));
if (extra.length || missing.length) {
throw new Error(
`[mana-ai] AI_AVAILABLE_TOOLS drift vs AI_PROPOSABLE_TOOL_NAMES. ` +
`extra=${JSON.stringify(extra)} missing=${JSON.stringify(missing)}`
);
}
}
export const SERVER_TOOL_NAMES = new Set<string>(SERVER_TOOLS.map((t) => t.name));