feat(ai): guardrail layer — pre/post-plan + pre-execute checks

Add a guardrail system that runs alongside the Mission Runner pipeline
to catch obvious issues before they waste tokens or corrupt data.

Architecture (packages/shared-ai/src/guardrails/):
- types.ts: Guardrail, GuardrailResult, 4 phase interfaces
- builtin.ts: 4 built-in guardrails (always active):
  - input-size-limit: blocks >100K chars of resolved input
  - plan-step-limit: blocks plans with >25 steps (runaway planner)
  - duplicate-destructive-tool: warns if undo_drink called 2x
  - empty-required-params: blocks create_task without title
- runner.ts: runPrePlanGuardrails/runPostPlanGuardrails/runPreExecuteGuardrails

Wired into runner.ts at 3 checkpoints:
- Before deps.plan() — pre-plan check
- After plan received — post-plan check
- Before each stage() call — pre-execute check

Guardrails are synchronous, never hit the network, and produce
clear error messages when they block.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Till JS 2026-04-16 15:11:34 +02:00
parent f5392b8b63
commit fad7f4bea3
7 changed files with 301 additions and 9 deletions

View file

@ -40,6 +40,11 @@ import { getAgent } from '../agents/store';
import { DEFAULT_AGENT_NAME } from '../agents/types';
import type { Mission, MissionIteration, PlanStep } from './types';
import type { AiPlanInput, AiPlanOutput, PlannedStep, ResolvedInput } from './planner/types';
import {
runPrePlanGuardrails,
runPostPlanGuardrails,
runPreExecuteGuardrails,
} from '@mana/shared-ai';
/** Heuristic: mission objective text that should trigger a pre-step
* web-research call. Keeps the trigger explicit so unrelated missions
@ -292,13 +297,20 @@ export async function runMission(
);
};
// ── Guardrail: pre-plan ────────────────────────
const planInput: AiPlanInput = {
mission: mission!,
resolvedInputs: loopInputs,
availableTools,
onToken,
};
const prePlanCheck = runPrePlanGuardrails(planInput);
if (!prePlanCheck.passed) {
throw new Error(`Guardrail blocked: ${prePlanCheck.blockReason}`);
}
try {
plan = await deps.plan({
mission: mission!,
resolvedInputs: loopInputs,
availableTools,
onToken,
});
plan = await deps.plan(planInput);
} catch (err) {
if (isAiDebugEnabled()) {
void recordAiDebug({
@ -326,6 +338,12 @@ export async function runMission(
break;
}
// ── Guardrail: post-plan ──────────────────────────
const postPlanCheck = runPostPlanGuardrails(planInput, plan);
if (!postPlanCheck.passed) {
throw new Error(`Guardrail blocked plan: ${postPlanCheck.blockReason}`);
}
// ── Phase: parsing-response ────────────────────────
await enterPhase('parsing-response', `${plan.steps.length} Step(s) erhalten`);
await checkCancel();
@ -339,6 +357,20 @@ export async function runMission(
);
await checkCancel();
// ── Guardrail: pre-execute ─────────────────────
const execCheck = runPreExecuteGuardrails(ps);
if (!execCheck.passed) {
failedCount++;
const stepId = `${iterationId}-${stepCounter++}`;
recordedSteps.push({
id: stepId,
summary: `Guardrail: ${execCheck.blockReason}`,
intent: { kind: 'toolCall', toolName: ps.toolName, params: ps.params },
status: 'failed',
});
continue;
}
const outcome = await stage(ps, aiActor);
const stepId = `${iterationId}-${stepCounter++}`;
if (!outcome.ok) {