diff --git a/.env.development b/.env.development index 323c01b73..1d7c28442 100644 --- a/.env.development +++ b/.env.development @@ -176,7 +176,10 @@ OLLAMA_URL=http://localhost:11434 # the GPU LLM proxy (gpu-llm.mana.how). MANA_LLM_URL=https://llm.mana.how MANA_LLM_API_KEY= -MANA_LLM_DEFAULT_MODEL=gemma3:4b +# Legacy: MANA_LLM_DEFAULT_MODEL / WRITING_MODEL / COMIC_STORYBOARD_MODEL +# / VISION_MODEL — removed in M5 of llm-fallback-aliases. Backend code +# now requests `mana/` aliases (see packages/shared-ai/src/llm- +# aliases.ts) which mana-llm resolves via services/mana-llm/aliases.yaml. # mana-research — unified research orchestration (port 3068). Fronts # search + extract + sync/async research agents behind one API. mana-ai @@ -523,9 +526,6 @@ GPU_API_KEY=sk-gpu-cf483ede1e05e28fba5e56c94cd3c24e7c245e57816d3e86 GPU_SERVER_URL=https://gpu.mana.how GPU_SERVER_LAN_URL=http://192.168.178.11 -# Vision Model for Food + Planta (local, replaces Google Gemini) -VISION_MODEL=ollama/gemma3:12b - # ============================================ # MANA-MAIL SERVICE (Port 3042) # ============================================ diff --git a/apps/api/src/lib/llm.ts b/apps/api/src/lib/llm.ts index 6744d99c7..0e5f2b1cf 100644 --- a/apps/api/src/lib/llm.ts +++ b/apps/api/src/lib/llm.ts @@ -10,8 +10,11 @@ * the full concatenated text at the end. Used for synthesis. * * mana-llm exposes an OpenAI-compatible /v1/chat/completions endpoint - * (see services/mana-llm). Models are namespaced as `provider/model`, e.g. - * `ollama/gemma3:4b`, `openrouter/meta-llama/llama-3.1-70b-instruct`. + * (see services/mana-llm). Callers should request models via the + * `MANA_LLM.` aliases from `./llm-aliases` — the gateway resolves + * them through `services/mana-llm/aliases.yaml` with health-aware + * fallback. Concrete provider/model strings are reserved for the + * registry itself. * * Internal service-to-service calls — no auth on the wire (private network). */ diff --git a/apps/api/src/modules/comic/routes.ts b/apps/api/src/modules/comic/routes.ts index e83253a61..0998b97c3 100644 --- a/apps/api/src/modules/comic/routes.ts +++ b/apps/api/src/modules/comic/routes.ts @@ -27,9 +27,10 @@ import { Hono } from 'hono'; import { llmJson, LlmError } from '../../lib/llm'; +import { MANA_LLM } from '@mana/shared-ai'; import { logger, type AuthVariables } from '@mana/shared-hono'; -const STORYBOARD_MODEL = process.env.COMIC_STORYBOARD_MODEL || 'ollama/gemma3:4b'; +const STORYBOARD_MODEL = MANA_LLM.STRUCTURED; type ComicStyle = 'comic' | 'manga' | 'cartoon' | 'graphic-novel' | 'webtoon'; diff --git a/apps/api/src/modules/context/routes.ts b/apps/api/src/modules/context/routes.ts index f5cd59414..875d7ac06 100644 --- a/apps/api/src/modules/context/routes.ts +++ b/apps/api/src/modules/context/routes.ts @@ -8,10 +8,11 @@ import { Hono } from 'hono'; import { consumeCredits, validateCredits } from '@mana/shared-hono/credits'; import type { AuthVariables } from '@mana/shared-hono'; +import { MANA_LLM } from '@mana/shared-ai'; const LLM_URL = process.env.MANA_LLM_URL || 'http://localhost:3025'; const CRAWLER_URL = process.env.MANA_CRAWLER_URL || 'http://localhost:3023'; -const DEFAULT_SUMMARY_MODEL = process.env.MANA_LLM_DEFAULT_MODEL || 'gemma3:4b'; +const DEFAULT_SUMMARY_MODEL = MANA_LLM.FAST_TEXT; const routes = new Hono<{ Variables: AuthVariables }>(); @@ -231,7 +232,7 @@ routes.post('/ai/generate', async (c) => { headers: { 'Content-Type': 'application/json' }, body: JSON.stringify({ messages, - model: model || 'gemma3:4b', + model: model || MANA_LLM.FAST_TEXT, max_tokens: maxTokens || 2000, }), }); @@ -245,7 +246,7 @@ routes.post('/ai/generate', async (c) => { // Consume credits await consumeCredits(userId, 'AI_CONTEXT_GENERATE', 5, `AI generation (${tokensUsed} tokens)`); - return c.json({ content, tokensUsed, model: model || 'gemma3:4b' }); + return c.json({ content, tokensUsed, model: model || MANA_LLM.FAST_TEXT }); } catch (_err) { return c.json({ error: 'Generation failed' }, 500); } diff --git a/apps/api/src/modules/food/routes.ts b/apps/api/src/modules/food/routes.ts index 8e02c359a..f23583d0f 100644 --- a/apps/api/src/modules/food/routes.ts +++ b/apps/api/src/modules/food/routes.ts @@ -30,14 +30,13 @@ import { type MealAnalysis, } from '@mana/shared-types'; import { logger, type AuthVariables } from '@mana/shared-hono'; +import { MANA_LLM } from '@mana/shared-ai'; const LLM_URL = process.env.MANA_LLM_URL || 'http://localhost:3025'; -// mana-llm parses model strings as `provider/model` (router.py:_parse_model). -// Default to Gemma 3 (4B, multimodal) on the local Ollama instance — it -// runs on the GPU server (192.168.178.11) via the gpu-proxy bridge and -// supports vision out of the box. Override with VISION_MODEL=google/gemini-2.0-flash -// (or similar) once mana-llm has GOOGLE_API_KEY configured. -const VISION_MODEL = process.env.VISION_MODEL || 'ollama/gemma3:4b'; +// mana-llm resolves this alias to a healthy vision model (chain in +// services/mana-llm/aliases.yaml). To swap the chain, edit the YAML +// and SIGHUP — no service redeploy here. +const VISION_MODEL = MANA_LLM.VISION; const llm = createOpenAICompatible({ name: 'mana-llm', diff --git a/apps/api/src/modules/plants/routes.ts b/apps/api/src/modules/plants/routes.ts index 0d7c19612..c6a7ce155 100644 --- a/apps/api/src/modules/plants/routes.ts +++ b/apps/api/src/modules/plants/routes.ts @@ -19,11 +19,10 @@ import { type PlantIdentification, } from '@mana/shared-types'; import { logger, type AuthVariables } from '@mana/shared-hono'; +import { MANA_LLM } from '@mana/shared-ai'; const LLM_URL = process.env.MANA_LLM_URL || 'http://localhost:3025'; -// See food/routes.ts for the rationale on the default model and -// the /v1 base URL. -const VISION_MODEL = process.env.VISION_MODEL || 'ollama/gemma3:4b'; +const VISION_MODEL = MANA_LLM.VISION; const llm = createOpenAICompatible({ name: 'mana-llm', diff --git a/apps/api/src/modules/research/orchestrator.ts b/apps/api/src/modules/research/orchestrator.ts index 676442723..080453acf 100644 --- a/apps/api/src/modules/research/orchestrator.ts +++ b/apps/api/src/modules/research/orchestrator.ts @@ -18,9 +18,15 @@ import { eq } from 'drizzle-orm'; import { db, researchResults, sources, type ResearchDepth } from './schema'; import { llmJson, llmStream, LlmError } from '../../lib/llm'; +import { MANA_LLM } from '@mana/shared-ai'; import { webSearch, bulkExtract, type SearchHit, SearchError } from '../../lib/search'; // ─── Depth configuration ──────────────────────────────────── +// +// `planModel` is always `STRUCTURED` (the planner emits JSON). +// `synthModel` varies by depth: `quick` runs through `FAST_TEXT` for a +// terse summary, `standard`/`deep` use `LONG_FORM` for richer prose. +// Concrete provider/model selection lives in services/mana-llm/aliases.yaml. interface DepthConfig { subQueryCount: number; @@ -39,8 +45,8 @@ const DEPTH_CONFIG: Record = { maxSources: 5, extract: false, categories: ['general'], - planModel: 'ollama/gemma3:4b', - synthModel: 'ollama/gemma3:4b', + planModel: MANA_LLM.STRUCTURED, + synthModel: MANA_LLM.FAST_TEXT, }, standard: { subQueryCount: 3, @@ -48,8 +54,8 @@ const DEPTH_CONFIG: Record = { maxSources: 15, extract: true, categories: ['general', 'news'], - planModel: 'ollama/gemma3:4b', - synthModel: 'ollama/gemma3:12b', + planModel: MANA_LLM.STRUCTURED, + synthModel: MANA_LLM.LONG_FORM, }, deep: { subQueryCount: 6, @@ -57,8 +63,8 @@ const DEPTH_CONFIG: Record = { maxSources: 30, extract: true, categories: ['general', 'news', 'science', 'it'], - planModel: 'ollama/gemma3:12b', - synthModel: 'ollama/gemma3:12b', + planModel: MANA_LLM.STRUCTURED, + synthModel: MANA_LLM.LONG_FORM, }, }; diff --git a/apps/api/src/modules/writing/routes.ts b/apps/api/src/modules/writing/routes.ts index c546eeb37..b5160731d 100644 --- a/apps/api/src/modules/writing/routes.ts +++ b/apps/api/src/modules/writing/routes.ts @@ -17,9 +17,10 @@ import { Hono } from 'hono'; import { llmText, LlmError } from '../../lib/llm'; +import { MANA_LLM } from '@mana/shared-ai'; import { logger, type AuthVariables } from '@mana/shared-hono'; -const DEFAULT_MODEL = process.env.WRITING_MODEL || 'ollama/gemma3:4b'; +const DEFAULT_MODEL = MANA_LLM.LONG_FORM; /** Hard cap so a runaway briefing can't burn unlimited tokens. */ const MAX_OUTPUT_TOKENS = 8000; diff --git a/apps/mana/apps/web/src/routes/api/v1/voice/parse-habit/+server.ts b/apps/mana/apps/web/src/routes/api/v1/voice/parse-habit/+server.ts index 753c2e9b7..edcce42bd 100644 --- a/apps/mana/apps/web/src/routes/api/v1/voice/parse-habit/+server.ts +++ b/apps/mana/apps/web/src/routes/api/v1/voice/parse-habit/+server.ts @@ -24,18 +24,18 @@ import { json } from '@sveltejs/kit'; import { env } from '$env/dynamic/private'; +import { MANA_LLM } from '@mana/shared-ai'; import type { RequestHandler } from './$types'; const MAX_TRANSCRIPT_CHARS = 500; const MAX_HABITS = 50; const LLM_TIMEOUT_MS = 8000; -// gemma3:12b is more consistent than 4b at the "pick from this list, -// don't paraphrase" instruction — 4b sometimes returns "Joggen" when -// "Laufen" was in the list, which the verbatim-validation in coerce -// then drops, costing an LLM round-trip for nothing. The accuracy -// win matters more here than for parse-task because parse-habit only -// runs at all when the cheap client-side substring fast path missed. -const DEFAULT_MODEL = 'ollama/gemma3:12b'; +// Voice → JSON intent: STRUCTURED is the right class. mana-llm's +// alias chain picks a model that consistently honours the +// "verbatim from this list" constraint that parse-habit needs (the +// coerce step still drops paraphrases, so accuracy here is direct +// round-trip savings). +const DEFAULT_MODEL = MANA_LLM.STRUCTURED; interface ParseResult { match: string | null; diff --git a/apps/mana/apps/web/src/routes/api/v1/voice/parse-task/+server.ts b/apps/mana/apps/web/src/routes/api/v1/voice/parse-task/+server.ts index 4c765c7c2..fbd6c1d7c 100644 --- a/apps/mana/apps/web/src/routes/api/v1/voice/parse-task/+server.ts +++ b/apps/mana/apps/web/src/routes/api/v1/voice/parse-task/+server.ts @@ -19,20 +19,16 @@ import { json } from '@sveltejs/kit'; import { env } from '$env/dynamic/private'; +import { MANA_LLM } from '@mana/shared-ai'; import type { RequestHandler } from './$types'; import { coerce, extractJson, fallback } from './coerce'; const MAX_TRANSCRIPT_CHARS = 1000; const LLM_TIMEOUT_MS = 8000; -// gemma3:12b consistently nails relative date math ("nächsten Montag" -// from a Wednesday → next Monday's date) and respects "null when -// absent" for both dueDate and priority. gemma3:4b gets weekday math -// off-by-one and stamps today's date on every bare task. The 12b -// model is only ~10% slower in practice on the GPU box (~1.1s vs -// ~1.0s for these tiny prompts) so the accuracy win is essentially -// free. The deterministic guards in coerce() are still kept as a -// safety net in case the GPU box swaps in a weaker model. -const DEFAULT_MODEL = 'ollama/gemma3:12b'; +// Voice → JSON intent (relative dates, priority, title cleanup): +// STRUCTURED. The deterministic guards in coerce() stay as a backstop +// in case the alias chain falls back to a model with weaker date math. +const DEFAULT_MODEL = MANA_LLM.STRUCTURED; function buildPrompt(transcript: string, language: string): string { const now = new Date(); diff --git a/apps/memoro/apps/server/src/lib/ai.ts b/apps/memoro/apps/server/src/lib/ai.ts index aab359640..bab8d2f55 100644 --- a/apps/memoro/apps/server/src/lib/ai.ts +++ b/apps/memoro/apps/server/src/lib/ai.ts @@ -7,9 +7,14 @@ * 3. Azure OpenAI (Microsoft Cloud) */ -// Self-hosted mana-llm service +// Self-hosted mana-llm service. Use the `mana/` alias system — +// see packages/shared-ai/src/llm-aliases.ts and +// services/mana-llm/aliases.yaml for the SSOT and the resolution chain. +// (memoro-server doesn't pull @mana/shared-ai as a workspace dep yet, +// so the alias string is inlined here. If memoro adds more LLM call +// sites, fold this into a workspace import.) const MANA_LLM_URL = process.env.MANA_LLM_URL || ''; -const MANA_LLM_MODEL = process.env.MANA_LLM_MODEL || 'ollama/gemma3:4b'; +const MANA_LLM_MODEL = process.env.MANA_LLM_MODEL || 'mana/fast-text'; // Gemini (cloud fallback) const GEMINI_ENDPOINT = 'https://generativelanguage.googleapis.com/v1beta/models'; diff --git a/package.json b/package.json index 8a55d5260..4f3ffea63 100644 --- a/package.json +++ b/package.json @@ -27,7 +27,8 @@ "validate:i18n-parity": "node scripts/validate-i18n-parity.mjs", "validate:i18n-hardcoded": "node scripts/validate-no-hardcoded-strings.mjs", "validate:i18n-keys": "node scripts/validate-i18n-keys.mjs", - "validate:all": "pnpm run validate:turbo && pnpm run validate:pg-schema && pnpm run validate:theme-variables && pnpm run validate:theme-utilities && pnpm run validate:theme-parity && pnpm run validate:i18n-parity && pnpm run validate:i18n-hardcoded && pnpm run validate:i18n-keys && pnpm run check:crypto && pnpm run audit:encrypted-tools", + "validate:llm-strings": "node scripts/validate-llm-strings.mjs", + "validate:all": "pnpm run validate:turbo && pnpm run validate:pg-schema && pnpm run validate:theme-variables && pnpm run validate:theme-utilities && pnpm run validate:theme-parity && pnpm run validate:i18n-parity && pnpm run validate:i18n-hardcoded && pnpm run validate:i18n-keys && pnpm run validate:llm-strings && pnpm run check:crypto && pnpm run audit:encrypted-tools", "check:crypto": "node scripts/audit-crypto-registry.mjs", "check:crypto:seed": "node scripts/audit-crypto-registry.mjs --seed", "audit:encrypted-tools": "bun run scripts/audit-encrypted-tools.ts", diff --git a/packages/shared-ai/src/index.ts b/packages/shared-ai/src/index.ts index 93361efce..0ecefac08 100644 --- a/packages/shared-ai/src/index.ts +++ b/packages/shared-ai/src/index.ts @@ -37,6 +37,9 @@ export { isFromMissionRunner, } from './actor'; +export type { FieldMeta, FieldOrigin } from './field-meta'; +export { makeFieldMeta, isUserOriginatedField } from './field-meta'; + export type { IterationPhase, Mission, @@ -166,3 +169,5 @@ export type { WorkbenchTemplateCategory, } from './agents'; export { DEFAULT_AGENT_ID, DEFAULT_AGENT_NAME, ALL_TEMPLATES, getTemplateById } from './agents'; + +export { MANA_LLM, type ManaLlmAlias } from './llm-aliases'; diff --git a/packages/shared-ai/src/llm-aliases.ts b/packages/shared-ai/src/llm-aliases.ts new file mode 100644 index 000000000..dbbb168c7 --- /dev/null +++ b/packages/shared-ai/src/llm-aliases.ts @@ -0,0 +1,25 @@ +/** + * Mana LLM model aliases — single source of truth for which class of + * model each backend feature uses. + * + * Resolved server-side by mana-llm via `services/mana-llm/aliases.yaml`; + * consumers don't see the underlying provider/model unless they really + * need to (mainly for token-cost accounting via the + * `X-Mana-LLM-Resolved` response header). + * + * Plan: docs/plans/llm-fallback-aliases.md. + */ +export const MANA_LLM = { + /** Short answers, classification, single-shot Q&A. Cheap class. */ + FAST_TEXT: 'mana/fast-text', + /** Writing, essays, stories, longer prose. */ + LONG_FORM: 'mana/long-form', + /** JSON output (comic storyboards, research subqueries, voice-intent parsing). */ + STRUCTURED: 'mana/structured', + /** Agent missions, tool calls, multi-step plans. */ + REASONING: 'mana/reasoning', + /** Multimodal (image + text). */ + VISION: 'mana/vision', +} as const; + +export type ManaLlmAlias = (typeof MANA_LLM)[keyof typeof MANA_LLM]; diff --git a/scripts/validate-llm-strings.mjs b/scripts/validate-llm-strings.mjs new file mode 100644 index 000000000..858f34d66 --- /dev/null +++ b/scripts/validate-llm-strings.mjs @@ -0,0 +1,198 @@ +#!/usr/bin/env node + +/** + * Validate that no consumer code hardcodes provider/model strings. + * + * After M5 of `docs/plans/llm-fallback-aliases.md`, every backend caller + * should request models via the `mana/` aliases (resolved server- + * side by mana-llm). Bare `provider/model` strings (`ollama/gemma3:4b`, + * `groq/llama-3.3-70b-versatile`, …) are a sign someone bypassed the + * registry — that path skips fallback, the health-cache, and the alias- + * resolution metrics. + * + * The two legitimate places to keep concrete model strings are: + * + * - `services/mana-llm/aliases.yaml` — the registry itself + * - `services/mana-llm/**` — provider adapters, tests, fixtures + * + * Anything else fails the check. Add the file to ALLOWED_PATHS below + * with a comment if you have a justified reason. + * + * Usage: + * node scripts/validate-llm-strings.mjs + */ + +import { readdirSync, readFileSync, statSync } from 'fs'; +import { dirname, join, relative, sep } from 'path'; +import { fileURLToPath } from 'url'; + +const __dirname = dirname(fileURLToPath(import.meta.url)); +const ROOT = join(__dirname, '..'); + +const RED = '\x1b[31m'; +const GREEN = '\x1b[32m'; +const YELLOW = '\x1b[33m'; +const DIM = '\x1b[2m'; +const BOLD = '\x1b[1m'; +const RESET = '\x1b[0m'; + +const SCAN_ROOTS = ['apps', 'services', 'packages']; +const SCAN_EXTS = new Set(['.ts', '.tsx', '.mts', '.cts']); +const SKIP_DIRS = new Set([ + 'node_modules', + 'dist', + 'build', + '.svelte-kit', + '.next', + '.turbo', + '.vercel', + '.vite', +]); + +/** + * Paths (POSIX-form, relative to repo root) that are allowed to contain + * concrete provider/model strings. Use slashes — the matcher normalises + * Windows separators. + */ +const ALLOWED_PATHS = [ + // The registry itself. + 'services/mana-llm/aliases.yaml', + + // mana-llm internals: provider adapters, router, config, tests. + 'services/mana-llm/', + + // Picture module routes route between IMAGE generators (gpt-image, + // gemini-3-pro-image-preview, …) — those are not LLM chat models + // and don't go through mana-llm's chat-completions endpoint. + 'apps/api/src/modules/picture/routes.ts', + + // generate-who-dossiers is an admin script with an explicit + // `--model ` flag; the operator deliberately picks a strong + // model for one-shot dossier authoring. Aliasing wouldn't help. + 'apps/api/scripts/generate-who-dossiers.ts', + + // Chat / who modules INSPECT user-supplied model strings to gate + // behaviour (prefix checks like `model.startsWith('ollama/')`); they + // don't pick a model themselves. + 'apps/api/src/modules/chat/routes.ts', + 'apps/api/src/modules/who/routes.ts', + + // Picture-Workbench client-side path (browser): the user picks a + // concrete image model from a dropdown. + 'apps/mana/apps/web/src/lib/modules/picture/', + + // llm-test playground in the web app — explicit model picker. + 'apps/mana/apps/web/src/routes/(app)/llm-test/', + + // Validators / scripts can reference example strings in their own + // docstrings. + 'scripts/', + + // SSOT alias-constant file. + 'packages/shared-ai/src/llm-aliases.ts', +]; + +/** + * Concrete provider strings the validator hunts for. The pattern is a + * literal `/...` token in source; matched by a regex that + * requires a quote / backtick before the slash so we don't fire on + * import paths like `from '../ollama/foo'`. + */ +const PROVIDERS = ['ollama', 'groq', 'openrouter', 'together']; +// `google/` is intentionally not in PROVIDERS — Google is the namespace +// for both Gemini text models AND Nano-Banana image models. Matching it +// would yield too many false positives in image code paths. + +const PROVIDER_RE = new RegExp(`(?<=['"\`])(?:${PROVIDERS.join('|')})/[a-zA-Z0-9_:.\\-]+`, 'g'); + +function walk(dir, hits = []) { + let entries; + try { + entries = readdirSync(dir); + } catch { + return hits; + } + for (const name of entries) { + if (SKIP_DIRS.has(name)) continue; + const path = join(dir, name); + let st; + try { + st = statSync(path); + } catch { + continue; + } + if (st.isDirectory()) walk(path, hits); + else if (st.isFile()) { + const dot = name.lastIndexOf('.'); + if (dot < 0) continue; + const ext = name.slice(dot); + if (SCAN_EXTS.has(ext)) hits.push(path); + } + } + return hits; +} + +function isAllowed(relPath) { + const norm = relPath.split(sep).join('/'); + return ALLOWED_PATHS.some((p) => (p.endsWith('/') ? norm.startsWith(p) : norm === p)); +} + +const violations = []; +let scanned = 0; + +for (const subdir of SCAN_ROOTS) { + const root = join(ROOT, subdir); + for (const file of walk(root)) { + scanned += 1; + const rel = relative(ROOT, file); + if (isAllowed(rel)) continue; + const src = readFileSync(file, 'utf8'); + PROVIDER_RE.lastIndex = 0; + let m; + while ((m = PROVIDER_RE.exec(src)) !== null) { + // Compute 1-based line number of the match. + const lineNo = src.slice(0, m.index).split('\n').length; + violations.push({ file: rel, line: lineNo, match: m[0] }); + } + } +} + +if (violations.length === 0) { + console.log( + `${GREEN}✓${RESET} LLM strings clean — scanned ${BOLD}${scanned}${RESET} files, ` + + `no hardcoded provider/model strings found outside the SSOT.` + ); + process.exit(0); +} + +console.log(); +console.log(`${RED}${BOLD}✗ Hardcoded provider/model strings found.${RESET}`); +console.log(); +console.log( + `${DIM}These should use ${BOLD}MANA_LLM.${RESET}${DIM} aliases instead. ` + + `The aliases resolve via ${BOLD}services/mana-llm/aliases.yaml${RESET}${DIM} ` + + `with health-aware fallback.${RESET}` +); +console.log( + `${DIM}If a site genuinely needs a concrete model, add the file path to ` + + `${BOLD}ALLOWED_PATHS${RESET}${DIM} in ` + + `${BOLD}scripts/validate-llm-strings.mjs${RESET}${DIM} with a comment.${RESET}` +); +console.log(); + +const grouped = new Map(); +for (const v of violations) { + if (!grouped.has(v.file)) grouped.set(v.file, []); + grouped.get(v.file).push(v); +} +for (const [file, list] of grouped) { + console.log(`${YELLOW}${file}${RESET}`); + for (const v of list) { + console.log(` ${DIM}:${v.line}${RESET} ${v.match}`); + } +} +console.log(); +console.log( + `${RED}Total: ${violations.length} violation(s) across ${grouped.size} file(s).${RESET}` +); +process.exit(1); diff --git a/services/mana-ai/src/cron/tick.ts b/services/mana-ai/src/cron/tick.ts index 94087c0c6..5d0a05c99 100644 --- a/services/mana-ai/src/cron/tick.ts +++ b/services/mana-ai/src/cron/tick.ts @@ -30,6 +30,7 @@ import { loadActiveAgents, refreshAgentSnapshots, type ServerAgent } from '../db import { appendServerIteration, planToIteration } from '../db/iteration-writer'; import { refreshSnapshots } from '../db/snapshot-refresh'; import { createServerLlmClient, ProviderCallError } from '../planner/llm-client'; +import { MANA_LLM } from '@mana/shared-ai'; import { SERVER_TOOLS } from '../planner/tools'; import { ticksTotal, @@ -393,7 +394,7 @@ async function planOneMission( pretickUsage24h, }); - const plannerModel = 'google/gemini-2.5-flash'; + const plannerModel = MANA_LLM.REASONING; // Claude-Code wU2 pattern: fold the middle of messages into a structured // summary once cumulative tokens cross 92% of maxContextTokens. @@ -493,7 +494,7 @@ async function planOneMission( } catch (err) { const msg = err instanceof Error ? err.message : String(err); if (err instanceof ProviderCallError) { - const provider = inferProviderFromModel('google/gemini-2.5-flash'); + const provider = inferProviderFromModel(MANA_LLM.REASONING); providerErrorsTotal.inc({ provider, kind: err.kind }); } console.warn(`[mana-ai tick] mission=${m.id} planner loop failed: ${msg}`); diff --git a/services/mana-ai/src/planner/llm-client.ts b/services/mana-ai/src/planner/llm-client.ts index 81f12bbbd..64c6a0a51 100644 --- a/services/mana-ai/src/planner/llm-client.ts +++ b/services/mana-ai/src/planner/llm-client.ts @@ -15,6 +15,7 @@ import type { LlmFinishReason, ToolCallRequest, } from '@mana/shared-ai'; +import { MANA_LLM } from '@mana/shared-ai'; /** Thrown when mana-llm returns a non-2xx status. ``kind`` mirrors the * structured ProviderError vocabulary (blocked / truncated / auth / @@ -37,7 +38,7 @@ export interface ServerLlmClientOptions { readonly fetchTimeoutMs?: number; } -const DEFAULT_MODEL = 'google/gemini-2.5-flash'; +const DEFAULT_MODEL = MANA_LLM.REASONING; const DEFAULT_FETCH_TIMEOUT_MS = 120_000; export function createServerLlmClient(opts: ServerLlmClientOptions): LlmClient { diff --git a/services/mana-events/src/discovery/website-extractor.ts b/services/mana-events/src/discovery/website-extractor.ts index f7101a80e..9f9ca6c03 100644 --- a/services/mana-events/src/discovery/website-extractor.ts +++ b/services/mana-events/src/discovery/website-extractor.ts @@ -121,7 +121,10 @@ async function llmExtractEvents( method: 'POST', headers: { 'Content-Type': 'application/json' }, body: JSON.stringify({ - model: 'ollama/gemma3:4b', + // JSON event extraction → STRUCTURED alias (resolved by mana-llm). + // SSOT: packages/shared-ai/src/llm-aliases.ts. Inlined because + // mana-events doesn't depend on @mana/shared-ai today. + model: 'mana/structured', messages: [ { role: 'system', content: buildExtractionPrompt() }, { role: 'user', content: `Extrahiere Events von dieser Seite:\n\n${pageContent}` }, diff --git a/services/mana-research/src/clients/mana-llm.ts b/services/mana-research/src/clients/mana-llm.ts index d3252b852..0e7dec03a 100644 --- a/services/mana-research/src/clients/mana-llm.ts +++ b/services/mana-research/src/clients/mana-llm.ts @@ -19,7 +19,11 @@ export class ManaLlmClient { method: 'POST', headers: { 'Content-Type': 'application/json' }, body: JSON.stringify({ - model: opts.model ?? 'ollama/gemma3:4b', + // Default to the FAST_TEXT alias — mana-research uses mana-llm + // for query classification and short-form analysis. SSOT: + // packages/shared-ai/src/llm-aliases.ts. Inlined because + // mana-research doesn't depend on @mana/shared-ai today. + model: opts.model ?? 'mana/fast-text', messages, max_tokens: opts.maxTokens ?? 256, temperature: opts.temperature ?? 0.2,