feat(mana-ai): wire shared-ai planner + real mana-llm calls (v0.2)

Service now produces plans end-to-end for due missions. Takes the
shared prompt/parser from @mana/shared-ai, calls mana-llm's
OpenAI-compatible endpoint, parses + validates the response against a
server-side tool allow-list.

- `src/planner/tools.ts` — hardcoded subset of webapp tools where
  policy === 'propose'. Mirror of `DEFAULT_AI_POLICY` in the webapp;
  drift just means the server doesn't suggest newly-added tools
  (graceful degradation). Contract test between the two lists is a
  sensible follow-up.
- `src/cron/tick.ts`
  - Iterates due missions, builds the shared Planner prompt per mission,
    parses the LLM response, logs the resulting plan
  - Per-mission try/catch so one flaky LLM response doesn't abort the
    queue; stats now track `plansProduced` + `parseFailures`
  - `serverMissionToSharedMission()` converts the projection shape to
    the shared-ai Mission type at the boundary
- `resolvedInputs: []` today — the Planner sees concept + objective +
  iteration history only. Full resolvers (notes/kontext/goals via
  Postgres replay) land alongside write-back in the next PR.
- No write-back yet: the plan is logged but not persisted to
  `sync_changes`. Write-back needs an RLS-scoped helper mirroring
  mana-sync's `withUser` pattern — tracked explicitly as the remaining
  open piece in CLAUDE.md.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Till JS 2026-04-15 00:06:22 +02:00
parent 0d90b12d1c
commit 203fe3ef05
4 changed files with 185 additions and 27 deletions

7
pnpm-lock.yaml generated
View file

@ -3285,6 +3285,9 @@ importers:
services/mana-ai:
dependencies:
'@mana/shared-ai':
specifier: workspace:*
version: link:../../packages/shared-ai
'@mana/shared-hono':
specifier: workspace:*
version: link:../../packages/shared-hono
@ -25836,7 +25839,7 @@ snapshots:
obug: 2.1.1
std-env: 4.0.0
tinyrainbow: 3.1.0
vitest: 4.1.3(@opentelemetry/api@1.9.1)(@types/node@22.19.17)(@vitest/coverage-v8@4.1.3)(@vitest/ui@4.1.3)(jsdom@29.0.2(@noble/hashes@2.0.1))(vite@6.4.2(@types/node@22.19.17)(jiti@2.6.1)(lightningcss@1.32.0)(terser@5.46.1)(tsx@4.21.0)(yaml@2.8.3))
vitest: 4.1.3(@opentelemetry/api@1.9.1)(@types/node@24.12.2)(@vitest/coverage-v8@4.1.3)(@vitest/ui@4.1.3)(jsdom@29.0.2(@noble/hashes@2.0.1))(vite@6.4.2(@types/node@24.12.2)(jiti@2.6.1)(lightningcss@1.32.0)(terser@5.46.1)(tsx@4.21.0)(yaml@2.8.3))
'@vitest/expect@4.1.3':
dependencies:
@ -25898,7 +25901,7 @@ snapshots:
sirv: 3.0.2
tinyglobby: 0.2.15
tinyrainbow: 3.1.0
vitest: 4.1.3(@opentelemetry/api@1.9.1)(@types/node@22.19.17)(@vitest/coverage-v8@4.1.3)(@vitest/ui@4.1.3)(jsdom@29.0.2(@noble/hashes@2.0.1))(vite@6.4.2(@types/node@22.19.17)(jiti@2.6.1)(lightningcss@1.32.0)(terser@5.46.1)(tsx@4.21.0)(yaml@2.8.3))
vitest: 4.1.3(@opentelemetry/api@1.9.1)(@types/node@24.12.2)(@vitest/coverage-v8@4.1.3)(@vitest/ui@4.1.3)(jsdom@29.0.2(@noble/hashes@2.0.1))(vite@6.4.2(@types/node@24.12.2)(jiti@2.6.1)(lightningcss@1.32.0)(terser@5.46.1)(tsx@4.21.0)(yaml@2.8.3))
'@vitest/utils@4.1.3':
dependencies:

View file

@ -9,6 +9,7 @@
"test": "bun test"
},
"dependencies": {
"@mana/shared-ai": "workspace:*",
"@mana/shared-hono": "workspace:*",
"hono": "^4.7.0",
"postgres": "^3.4.5"

View file

@ -1,25 +1,35 @@
/**
* Background tick scans Postgres for due Missions and (eventually) runs
* them through the Planner + writes the resulting plan back as a Mission
* iteration.
* Background tick scans Postgres for due Missions, calls mana-llm via
* the shared Planner prompt/parser, logs the resulting plan.
*
* Current state (v0.1): reads due missions, logs the intent, does NOT
* write back. Writing requires deciding how proposals materialize
* server-side see `CLAUDE.md` "Open design questions" for the
* trade-offs. Shipping this as a scaffold unblocks:
* - deployability of the service
* - smoke-testing Postgres connectivity + mana-llm reachability
* - next PR wires the actual mission-execution flow
* Current state (v0.2): produces plans end-to-end, does NOT yet write
* them back as Mission iterations. The write-back requires RLS-scoped
* transactions on `mana_sync` (same pattern as the Go server's
* `withUser`) tracked as the next PR in `CLAUDE.md`.
*
* Input-resolver wiring is also stubbed: `resolvedInputs: []` is handed
* to the Planner today, so the LLM sees only the mission's concept +
* objective. Real resolvers land alongside write-back.
*/
import {
buildPlannerPrompt,
parsePlannerResponse,
type AiPlanInput,
type AiPlanOutput,
type Mission,
} from '@mana/shared-ai';
import { getSql } from '../db/connection';
import { listDueMissions } from '../db/missions-projection';
import { listDueMissions, type ServerMission } from '../db/missions-projection';
import { PlannerClient } from '../planner/client';
import { AI_AVAILABLE_TOOLS, AI_AVAILABLE_TOOL_NAMES } from '../planner/tools';
import type { Config } from '../config';
export interface TickStats {
scannedAt: string;
dueMissionCount: number;
plansProduced: number;
parseFailures: number;
errors: string[];
}
@ -28,11 +38,19 @@ let running = false;
/** One tick pass. Idempotent; overlap-guarded at module level. */
export async function runTickOnce(config: Config): Promise<TickStats> {
if (running) {
return { scannedAt: new Date().toISOString(), dueMissionCount: 0, errors: ['overlap-skipped'] };
return {
scannedAt: new Date().toISOString(),
dueMissionCount: 0,
plansProduced: 0,
parseFailures: 0,
errors: ['overlap-skipped'],
};
}
running = true;
const errors: string[] = [];
let dueMissionCount = 0;
let plansProduced = 0;
let parseFailures = 0;
const scannedAt = new Date().toISOString();
try {
@ -40,36 +58,104 @@ export async function runTickOnce(config: Config): Promise<TickStats> {
const missions = await listDueMissions(sql, scannedAt);
dueMissionCount = missions.length;
if (missions.length === 0) return { scannedAt, dueMissionCount, errors };
if (missions.length === 0)
return { scannedAt, dueMissionCount, plansProduced, parseFailures, errors };
// Planner is instantiated here but not invoked yet — see CLAUDE.md.
// The constructor is cheap; holding onto it sets the shape for the
// next PR that actually calls `complete()` per mission.
void new PlannerClient(config.manaLlmUrl, config.serviceKey);
const planner = new PlannerClient(config.manaLlmUrl, config.serviceKey);
for (const m of missions) {
console.log(
`[mana-ai tick] would plan mission=${m.id} user=${m.userId} title=${JSON.stringify(
m.title
)}`
);
try {
const plan = await planOneMission(m, planner);
if (plan === null) {
parseFailures++;
continue;
}
plansProduced++;
console.log(
`[mana-ai tick] mission=${m.id} user=${m.userId} plan=${plan.steps.length}step(s) summary=${JSON.stringify(
plan.summary
)}`
);
// TODO: write plan back as `Mission.iterations[]` entry with
// `source: 'server'` so the webapp staging-effect can turn
// each PlannedStep into a local Proposal. Requires RLS-
// scoped write helper (see CLAUDE.md, design option A).
} catch (err) {
const msg = err instanceof Error ? err.message : String(err);
errors.push(`mission=${m.id}: ${msg}`);
console.error(`[mana-ai tick] mission=${m.id} plan failed:`, msg);
}
}
} catch (err) {
const msg = err instanceof Error ? err.message : String(err);
errors.push(msg);
console.error('[mana-ai tick] error:', msg);
console.error('[mana-ai tick] scan error:', msg);
} finally {
running = false;
}
return { scannedAt, dueMissionCount, errors };
return { scannedAt, dueMissionCount, plansProduced, parseFailures, errors };
}
/**
* Turn one due ServerMission into an {@link AiPlanOutput} via the LLM.
* Returns null on parse failure the tick records that as a separate
* stat rather than throwing, so one flaky response doesn't abort the
* queue.
*/
async function planOneMission(
m: ServerMission,
planner: PlannerClient
): Promise<AiPlanOutput | null> {
const mission = serverMissionToSharedMission(m);
const input: AiPlanInput = {
mission,
// No resolvers yet — the LLM only sees concept + objective +
// iteration history. Matches the webapp's behaviour for a mission
// with zero linked inputs.
resolvedInputs: [],
availableTools: AI_AVAILABLE_TOOLS,
};
const messages = buildPlannerPrompt(input);
const result = await planner.complete(messages);
const parsed = parsePlannerResponse(result.content, AI_AVAILABLE_TOOL_NAMES);
if (!parsed.ok) {
console.warn(
`[mana-ai tick] mission=${m.id} parse failed: ${parsed.reason} — raw:`,
parsed.raw?.slice(0, 200)
);
return null;
}
return parsed.value;
}
/**
* Projection shared-ai Mission shape. The projection leaves a few
* fields as `unknown` because the server doesn't need to interpret them
* (cadence math, iteration bookkeeping live in the webapp); we cast
* once here at the boundary.
*/
function serverMissionToSharedMission(m: ServerMission): Mission {
return {
id: m.id,
createdAt: m.nextRunAt ?? new Date().toISOString(),
updatedAt: m.nextRunAt ?? new Date().toISOString(),
title: m.title,
conceptMarkdown: m.conceptMarkdown,
objective: m.objective,
inputs: m.inputs,
cadence: m.cadence as Mission['cadence'],
state: m.state,
nextRunAt: m.nextRunAt,
iterations: m.iterations as Mission['iterations'],
userId: m.userId,
};
}
let handle: ReturnType<typeof setInterval> | null = null;
export function startTick(config: Config): () => void {
if (!config.tickEnabled || handle !== null) return stopTick;
// Kick once immediately so a just-due mission doesn't wait a full interval.
void runTickOnce(config);
handle = setInterval(() => void runTickOnce(config), config.tickIntervalMs);
return stopTick;

View file

@ -0,0 +1,68 @@
/**
* Hardcoded allow-list of tools the server-side Planner may propose.
*
* The webapp owns the full tool registry (in
* `apps/mana/apps/web/src/lib/data/tools/registry.ts`) and the policy
* (`DEFAULT_AI_POLICY` in `data/ai/policy.ts`). This file mirrors the
* subset where policy === 'propose' so the mana-ai Bun service can
* build a valid prompt without importing Dexie-bound code.
*
* Drift risk: if the webapp adds a new proposable tool and this file
* isn't updated, the mana-ai Planner simply won't suggest it graceful
* degradation. A contract test that compares both lists would be a
* sensible follow-up.
*/
import type { AvailableTool } from '@mana/shared-ai';
export const AI_AVAILABLE_TOOLS: readonly AvailableTool[] = [
{
name: 'create_task',
module: 'todo',
description: 'Erstellt einen neuen Task mit optionalem Faelligkeitsdatum und Prioritaet',
parameters: [
{ name: 'title', type: 'string', description: 'Titel des Tasks', required: true },
{
name: 'dueDate',
type: 'string',
description: 'Faelligkeitsdatum (YYYY-MM-DD)',
required: false,
},
{
name: 'priority',
type: 'string',
description: 'Prioritaet',
required: false,
enum: ['low', 'medium', 'high'],
},
{ name: 'description', type: 'string', description: 'Beschreibung', required: false },
],
},
{
name: 'complete_task',
module: 'todo',
description: 'Markiert einen Task als erledigt',
parameters: [{ name: 'taskId', type: 'string', description: 'ID des Tasks', required: true }],
},
{
name: 'create_event',
module: 'calendar',
description: 'Erstellt einen Kalender-Event',
parameters: [
{ name: 'title', type: 'string', description: 'Event-Titel', required: true },
{ name: 'startIso', type: 'string', description: 'Start (ISO)', required: true },
{ name: 'endIso', type: 'string', description: 'Ende (ISO)', required: false },
],
},
{
name: 'create_place',
module: 'places',
description: 'Fügt einen neuen Ort hinzu',
parameters: [
{ name: 'name', type: 'string', description: 'Name des Ortes', required: true },
{ name: 'category', type: 'string', description: 'Kategorie', required: false },
],
},
];
export const AI_AVAILABLE_TOOL_NAMES = new Set<string>(AI_AVAILABLE_TOOLS.map((t) => t.name));