mirror of
https://github.com/Memo-2023/mana-monorepo.git
synced 2026-05-21 07:06:42 +02:00
feat(ai): guardrail layer — pre/post-plan + pre-execute checks
Add a guardrail system that runs alongside the Mission Runner pipeline to catch obvious issues before they waste tokens or corrupt data. Architecture (packages/shared-ai/src/guardrails/): - types.ts: Guardrail, GuardrailResult, 4 phase interfaces - builtin.ts: 4 built-in guardrails (always active): - input-size-limit: blocks >100K chars of resolved input - plan-step-limit: blocks plans with >25 steps (runaway planner) - duplicate-destructive-tool: warns if undo_drink called 2x - empty-required-params: blocks create_task without title - runner.ts: runPrePlanGuardrails/runPostPlanGuardrails/runPreExecuteGuardrails Wired into runner.ts at 3 checkpoints: - Before deps.plan() — pre-plan check - After plan received — post-plan check - Before each stage() call — pre-execute check Guardrails are synchronous, never hit the network, and produce clear error messages when they block. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
f5392b8b63
commit
fad7f4bea3
7 changed files with 301 additions and 9 deletions
|
|
@ -40,6 +40,11 @@ import { getAgent } from '../agents/store';
|
|||
import { DEFAULT_AGENT_NAME } from '../agents/types';
|
||||
import type { Mission, MissionIteration, PlanStep } from './types';
|
||||
import type { AiPlanInput, AiPlanOutput, PlannedStep, ResolvedInput } from './planner/types';
|
||||
import {
|
||||
runPrePlanGuardrails,
|
||||
runPostPlanGuardrails,
|
||||
runPreExecuteGuardrails,
|
||||
} from '@mana/shared-ai';
|
||||
|
||||
/** Heuristic: mission objective text that should trigger a pre-step
|
||||
* web-research call. Keeps the trigger explicit so unrelated missions
|
||||
|
|
@ -292,13 +297,20 @@ export async function runMission(
|
|||
);
|
||||
};
|
||||
|
||||
// ── Guardrail: pre-plan ────────────────────────
|
||||
const planInput: AiPlanInput = {
|
||||
mission: mission!,
|
||||
resolvedInputs: loopInputs,
|
||||
availableTools,
|
||||
onToken,
|
||||
};
|
||||
const prePlanCheck = runPrePlanGuardrails(planInput);
|
||||
if (!prePlanCheck.passed) {
|
||||
throw new Error(`Guardrail blocked: ${prePlanCheck.blockReason}`);
|
||||
}
|
||||
|
||||
try {
|
||||
plan = await deps.plan({
|
||||
mission: mission!,
|
||||
resolvedInputs: loopInputs,
|
||||
availableTools,
|
||||
onToken,
|
||||
});
|
||||
plan = await deps.plan(planInput);
|
||||
} catch (err) {
|
||||
if (isAiDebugEnabled()) {
|
||||
void recordAiDebug({
|
||||
|
|
@ -326,6 +338,12 @@ export async function runMission(
|
|||
break;
|
||||
}
|
||||
|
||||
// ── Guardrail: post-plan ──────────────────────────
|
||||
const postPlanCheck = runPostPlanGuardrails(planInput, plan);
|
||||
if (!postPlanCheck.passed) {
|
||||
throw new Error(`Guardrail blocked plan: ${postPlanCheck.blockReason}`);
|
||||
}
|
||||
|
||||
// ── Phase: parsing-response ────────────────────────
|
||||
await enterPhase('parsing-response', `${plan.steps.length} Step(s) erhalten`);
|
||||
await checkCancel();
|
||||
|
|
@ -339,6 +357,20 @@ export async function runMission(
|
|||
);
|
||||
await checkCancel();
|
||||
|
||||
// ── Guardrail: pre-execute ─────────────────────
|
||||
const execCheck = runPreExecuteGuardrails(ps);
|
||||
if (!execCheck.passed) {
|
||||
failedCount++;
|
||||
const stepId = `${iterationId}-${stepCounter++}`;
|
||||
recordedSteps.push({
|
||||
id: stepId,
|
||||
summary: `Guardrail: ${execCheck.blockReason}`,
|
||||
intent: { kind: 'toolCall', toolName: ps.toolName, params: ps.params },
|
||||
status: 'failed',
|
||||
});
|
||||
continue;
|
||||
}
|
||||
|
||||
const outcome = await stage(ps, aiActor);
|
||||
const stepId = `${iterationId}-${stepCounter++}`;
|
||||
if (!outcome.ok) {
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue