feat(shared-ai): canonical proposable-tool list + drift guard on mana-ai

Makes the webapp's AI policy and the server's tool allow-list physically
impossible to drift. Adds the missing entries the guard caught on first
run: `complete_tasks_by_title`, `visit_place`, `undo_drink` now have
parameter schemas server-side too.

- `packages/shared-ai/src/policy/proposable-tools.ts`
  - `AI_PROPOSABLE_TOOL_NAMES` as `const` array + literal union type
  - `AI_PROPOSABLE_TOOL_SET` for set-membership checks
- Webapp `DEFAULT_AI_POLICY` derives its `propose` entries from the
  shared list via `Object.fromEntries(...)` — adding a tool there is now
  a one-line change in `@mana/shared-ai`
- mana-ai `AI_AVAILABLE_TOOLS`: module-load assertion compares its
  hardcoded names against `AI_PROPOSABLE_TOOL_SET` and throws with a
  pointed error on drift (extras in one direction, missing in the
  other). Service refuses to start on mismatch — better than silent
  degradation.
- Bun test (`tools.test.ts`) runs the same contract plus sanity checks
  (non-empty description, required params carry docs). Vitest policy
  test adds the symmetric check on the webapp side.

All three runtimes now green: webapp 66/66, shared-ai 2/2,
mana-ai 9/9 Bun tests.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Till JS 2026-04-15 00:52:38 +02:00
parent dccd9c5c4e
commit 4be5e29bd3
7 changed files with 164 additions and 37 deletions

View file

@ -1,6 +1,7 @@
import { describe, it, expect } from 'vitest';
import { resolvePolicy, setAiPolicy, DEFAULT_AI_POLICY } from './policy';
import { registerTools } from '../tools/registry';
import { AI_PROPOSABLE_TOOL_NAMES } from '@mana/shared-ai';
import type { Actor } from '../events/actor';
const AI: Actor = { kind: 'ai', missionId: 'm', iterationId: 'i', rationale: 'r' };
@ -57,4 +58,10 @@ describe('resolvePolicy', () => {
restore();
expect(resolvePolicy('create_task', AI)).toBe('propose');
});
it('every shared-ai proposable tool maps to propose in DEFAULT_AI_POLICY', () => {
for (const name of AI_PROPOSABLE_TOOL_NAMES) {
expect(DEFAULT_AI_POLICY.tools[name], `${name} should be 'propose'`).toBe('propose');
}
});
});

View file

@ -21,6 +21,7 @@
import { getTool } from '../tools/registry';
import type { Actor } from '../events/actor';
import { AI_PROPOSABLE_TOOL_NAMES } from '@mana/shared-ai';
export type PolicyDecision = 'auto' | 'propose' | 'deny';
@ -33,33 +34,34 @@ export interface AiPolicy {
readonly defaultForAi: PolicyDecision;
}
// ── Auto-executed tools (read-only / append-only self-state) ──────────
// Kept here as the canonical local-only list — policies that don't mutate
// user-visible records are webapp-specific and don't need to travel
// through @mana/shared-ai.
const AUTO_TOOLS: Record<string, 'auto'> = {
get_task_stats: 'auto',
list_tasks: 'auto',
get_todays_events: 'auto',
get_drink_progress: 'auto',
nutrition_summary: 'auto',
get_places: 'auto',
location_log: 'auto',
// Append-only self-state logs: AI proposing "did you drink water?" +
// user confirming + AI logging it should not require a second approval.
log_drink: 'auto',
log_meal: 'auto',
};
// ── Proposable tools derived from the shared canonical list ───────────
// Keeps the webapp policy and mana-ai's `AI_AVAILABLE_TOOLS` from drifting.
// Adding a new proposable tool → append to AI_PROPOSABLE_TOOL_NAMES in
// @mana/shared-ai and both sides pick it up automatically.
const PROPOSE_TOOLS: Record<string, 'propose'> = Object.fromEntries(
AI_PROPOSABLE_TOOL_NAMES.map((name) => [name, 'propose'] as const)
);
export const DEFAULT_AI_POLICY: AiPolicy = {
tools: {
// ── Read-only / harmless → auto ───────────────────────
get_task_stats: 'auto',
list_tasks: 'auto',
get_todays_events: 'auto',
get_drink_progress: 'auto',
nutrition_summary: 'auto',
get_places: 'auto',
location_log: 'auto',
// ── Append-only self-state logs → auto ────────────────
// These are fast-feedback user-logged values (drink, meal). The AI
// proposing "did you drink water?" then the user confirming + AI
// logging it should not require a second approval step.
log_drink: 'auto',
log_meal: 'auto',
// ── Mutating user-visible records → propose ───────────
create_task: 'propose',
complete_task: 'propose',
complete_tasks_by_title: 'propose',
create_event: 'propose',
create_place: 'propose',
visit_place: 'propose',
undo_drink: 'propose',
},
tools: { ...AUTO_TOOLS, ...PROPOSE_TOOLS },
defaultForAi: 'propose',
};

View file

@ -29,3 +29,9 @@ export type {
ResolvedInput,
} from './planner';
export { buildPlannerPrompt, parsePlannerResponse } from './planner';
export {
AI_PROPOSABLE_TOOL_NAMES,
AI_PROPOSABLE_TOOL_SET,
type AiProposableToolName,
} from './policy';

View file

@ -0,0 +1,5 @@
export {
AI_PROPOSABLE_TOOL_NAMES,
AI_PROPOSABLE_TOOL_SET,
type AiProposableToolName,
} from './proposable-tools';

View file

@ -0,0 +1,31 @@
/**
* Canonical list of tool names the AI is allowed to *propose*.
*
* Both the webapp's `DEFAULT_AI_POLICY` and the server-side
* `AI_AVAILABLE_TOOLS` list in `services/mana-ai/` derive from here.
* Adding a new proposable tool:
*
* 1. Append its name to {@link AI_PROPOSABLE_TOOL_NAMES}
* 2. Add the tool with its params to `AI_AVAILABLE_TOOLS` in mana-ai
* (the contract test below ensures step 2 isn't forgotten)
* 3. The webapp's `DEFAULT_AI_POLICY` picks it up automatically
*
* Tools NOT in this list default to `'propose'` only if the per-tool
* policy map lacks an explicit entry. Most `auto` / `deny` decisions
* stay hardcoded in the webapp policy this shared list only covers
* the tools the *server-side* planner actively proposes.
*/
export const AI_PROPOSABLE_TOOL_NAMES = [
'create_task',
'complete_task',
'complete_tasks_by_title',
'create_event',
'create_place',
'visit_place',
'undo_drink',
] as const;
export type AiProposableToolName = (typeof AI_PROPOSABLE_TOOL_NAMES)[number];
export const AI_PROPOSABLE_TOOL_SET: ReadonlySet<string> = new Set(AI_PROPOSABLE_TOOL_NAMES);

View file

@ -0,0 +1,41 @@
import { describe, it, expect } from 'bun:test';
import { AI_PROPOSABLE_TOOL_SET } from '@mana/shared-ai';
import { AI_AVAILABLE_TOOLS, AI_AVAILABLE_TOOL_NAMES } from './tools';
describe('AI_AVAILABLE_TOOLS contract', () => {
it('every AvailableTool name is in the shared proposable set', () => {
for (const tool of AI_AVAILABLE_TOOLS) {
expect(
AI_PROPOSABLE_TOOL_SET.has(tool.name),
`"${tool.name}" missing from @mana/shared-ai AI_PROPOSABLE_TOOL_NAMES`
).toBe(true);
}
});
it('every shared proposable name has an AvailableTool entry', () => {
for (const name of AI_PROPOSABLE_TOOL_SET) {
expect(
AI_AVAILABLE_TOOL_NAMES.has(name),
`"${name}" missing from services/mana-ai AI_AVAILABLE_TOOLS — add the tool definition`
).toBe(true);
}
});
it('every tool has at least a name + description + module', () => {
for (const tool of AI_AVAILABLE_TOOLS) {
expect(tool.name.length).toBeGreaterThan(0);
expect(tool.module.length).toBeGreaterThan(0);
expect(tool.description.length).toBeGreaterThan(0);
}
});
it('required params carry a non-empty description', () => {
for (const tool of AI_AVAILABLE_TOOLS) {
for (const p of tool.parameters) {
if (p.required) {
expect(p.description.length, `${tool.name}.${p.name}.description`).toBeGreaterThan(0);
}
}
}
});
});

View file

@ -1,19 +1,14 @@
/**
* Hardcoded allow-list of tools the server-side Planner may propose.
*
* The webapp owns the full tool registry (in
* `apps/mana/apps/web/src/lib/data/tools/registry.ts`) and the policy
* (`DEFAULT_AI_POLICY` in `data/ai/policy.ts`). This file mirrors the
* subset where policy === 'propose' so the mana-ai Bun service can
* build a valid prompt without importing Dexie-bound code.
*
* Drift risk: if the webapp adds a new proposable tool and this file
* isn't updated, the mana-ai Planner simply won't suggest it graceful
* degradation. A contract test that compares both lists would be a
* sensible follow-up.
* Parameter shapes live here (the webapp owns the full Dexie-bound
* registry); the set of NAMES is shared via `@mana/shared-ai`'s
* `AI_PROPOSABLE_TOOL_NAMES`. The module-load assertion at the bottom
* guards against drift in either direction if this file or the shared
* list falls out of sync, the service refuses to start.
*/
import type { AvailableTool } from '@mana/shared-ai';
import { AI_PROPOSABLE_TOOL_SET, type AvailableTool } from '@mana/shared-ai';
export const AI_AVAILABLE_TOOLS: readonly AvailableTool[] = [
{
@ -44,6 +39,19 @@ export const AI_AVAILABLE_TOOLS: readonly AvailableTool[] = [
description: 'Markiert einen Task als erledigt',
parameters: [{ name: 'taskId', type: 'string', description: 'ID des Tasks', required: true }],
},
{
name: 'complete_tasks_by_title',
module: 'todo',
description: 'Markiert alle Tasks deren Titel den Substring enthält (case-insensitive)',
parameters: [
{
name: 'titleSubstring',
type: 'string',
description: 'Teil des Task-Titels',
required: true,
},
],
},
{
name: 'create_event',
module: 'calendar',
@ -63,6 +71,33 @@ export const AI_AVAILABLE_TOOLS: readonly AvailableTool[] = [
{ name: 'category', type: 'string', description: 'Kategorie', required: false },
],
},
{
name: 'visit_place',
module: 'places',
description: 'Vermerkt einen Besuch an einem bereits erfassten Ort',
parameters: [{ name: 'placeId', type: 'string', description: 'ID des Ortes', required: true }],
},
{
name: 'undo_drink',
module: 'drink',
description: 'Macht den letzten Drink-Eintrag rückgängig',
parameters: [],
},
];
export const AI_AVAILABLE_TOOL_NAMES = new Set<string>(AI_AVAILABLE_TOOLS.map((t) => t.name));
// ── Contract check — runs on module load ───────────────────
// Catches drift between this file and @mana/shared-ai's canonical
// proposable list. A mismatch means the webapp's policy + mana-ai are
// about to disagree; better fail fast than ship a silently-degraded AI.
{
const extra = [...AI_AVAILABLE_TOOL_NAMES].filter((n) => !AI_PROPOSABLE_TOOL_SET.has(n));
const missing = [...AI_PROPOSABLE_TOOL_SET].filter((n) => !AI_AVAILABLE_TOOL_NAMES.has(n));
if (extra.length || missing.length) {
throw new Error(
`[mana-ai] AI_AVAILABLE_TOOLS drift vs AI_PROPOSABLE_TOOL_NAMES. ` +
`extra=${JSON.stringify(extra)} missing=${JSON.stringify(missing)}`
);
}
}