mirror of
https://github.com/Memo-2023/mana-monorepo.git
synced 2026-05-15 02:21:10 +02:00
Final milestone of docs/plans/llm-fallback-aliases.md. Every backend
caller now requests models via the `mana/<class>` alias system instead
of hardcoded `ollama/...` strings. mana-llm resolves aliases through
`services/mana-llm/aliases.yaml` with health-aware fallback (M3) and
emits resolved-model + fallback metrics (M4).
SSOT moved to `packages/shared-ai/src/llm-aliases.ts` so apps/api,
apps/mana/apps/web, and services/mana-ai all import the same
`MANA_LLM` constant via the existing `@mana/shared-ai` workspace
dependency. Three additional sites (memoro-server, mana-events,
mana-research) inline the alias string with a SSOT comment because
they don't pull @mana/shared-ai today.
Migrated 14 sites across 10 files:
- apps/api: writing(LONG_FORM), comic(STRUCTURED), context(FAST_TEXT),
food(VISION), plants(VISION), research orchestrator (3 tiers
collapsed to STRUCTURED+FAST_TEXT/LONG_FORM)
- apps/mana/apps/web: voice/parse-task + parse-habit (STRUCTURED)
- services/mana-ai: planner llm-client + tick.ts (REASONING)
- services/mana-events: website-extractor (STRUCTURED, inlined)
- services/mana-research: mana-llm client (FAST_TEXT, inlined)
- apps/memoro/apps/server: ai.ts (FAST_TEXT, inlined)
Legacy env-vars removed: WRITING_MODEL, COMIC_STORYBOARD_MODEL,
VISION_MODEL, MANA_LLM_DEFAULT_MODEL. The chain in aliases.yaml is
now the single tuning surface; SIGHUP reloads it without redeploys.
New `scripts/validate-llm-strings.mjs` regex-scans 2538 files for
hardcoded `<provider>/<model>` strings and fails the build if any
land outside the SSOT or the explicitly-allowed paths (image-gen
modules, model-inspector code, this validator itself, the registry).
Wired into `validate:all` next to the i18n + theme validators.
Verified: `pnpm validate:llm-strings` clean, `pnpm --filter @mana/api
type-check` clean, `pnpm --filter @mana/ai-service type-check`
clean. Web type-check has 2 pre-existing errors in
SettingsSidebar.svelte (i18n MessageFormatter type drift, last
touched in 988c17a67 — unrelated to this work).
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
267 lines
8.1 KiB
TypeScript
267 lines
8.1 KiB
TypeScript
/**
|
|
* Context module — AI text generation + token estimation
|
|
* Ported from apps/context/apps/server
|
|
*
|
|
* CRUD for spaces/documents handled by mana-sync.
|
|
*/
|
|
|
|
import { Hono } from 'hono';
|
|
import { consumeCredits, validateCredits } from '@mana/shared-hono/credits';
|
|
import type { AuthVariables } from '@mana/shared-hono';
|
|
import { MANA_LLM } from '@mana/shared-ai';
|
|
|
|
const LLM_URL = process.env.MANA_LLM_URL || 'http://localhost:3025';
|
|
const CRAWLER_URL = process.env.MANA_CRAWLER_URL || 'http://localhost:3023';
|
|
const DEFAULT_SUMMARY_MODEL = MANA_LLM.FAST_TEXT;
|
|
|
|
const routes = new Hono<{ Variables: AuthVariables }>();
|
|
|
|
// ─── URL Import (crawler → optional LLM summary → document) ──
|
|
|
|
const DEEP_MAX_PAGES = 20;
|
|
const CRAWL_POLL_INTERVAL_MS = 1500;
|
|
const CRAWL_TIMEOUT_MS = 90_000;
|
|
|
|
/**
|
|
* Local LLMs love to wrap Markdown in ```markdown fences or prepend
|
|
* a "Hier ist die Zusammenfassung:" preamble. Strip those so the
|
|
* output renders correctly when dropped into the Kontext document.
|
|
*/
|
|
function sanitizeSummary(raw: string): string {
|
|
let s = raw.trim();
|
|
// Strip a leading ```markdown / ```md / ``` fence and its closing ```.
|
|
const fenceMatch = s.match(/^```(?:markdown|md)?\s*\n([\s\S]*?)\n?```\s*$/i);
|
|
if (fenceMatch) s = fenceMatch[1].trim();
|
|
// Drop a single-line preamble that ends with a colon (LLM chatter).
|
|
const lines = s.split('\n');
|
|
if (lines.length > 2 && /^[^#\n].{0,80}:\s*$/.test(lines[0].trim())) {
|
|
s = lines.slice(1).join('\n').trim();
|
|
}
|
|
// Demote a solitary leading H1 to H2 so it doesn't clash with our
|
|
// section header that the frontend prepends.
|
|
s = s.replace(/^#\s+/, '## ');
|
|
return s;
|
|
}
|
|
|
|
async function pollCrawlJob(jobId: string) {
|
|
const deadline = Date.now() + CRAWL_TIMEOUT_MS;
|
|
while (Date.now() < deadline) {
|
|
await new Promise((r) => setTimeout(r, CRAWL_POLL_INTERVAL_MS));
|
|
const res = await fetch(`${CRAWLER_URL}/api/v1/crawl/${jobId}`);
|
|
if (!res.ok) throw new Error(`crawl status ${res.status}`);
|
|
const job = (await res.json()) as { status: string; error?: string };
|
|
if (job.status === 'completed') return;
|
|
if (job.status === 'failed') throw new Error(job.error || 'crawl failed');
|
|
}
|
|
throw new Error('crawl timeout');
|
|
}
|
|
|
|
routes.post('/import-url', async (c) => {
|
|
const userId = c.get('userId');
|
|
const {
|
|
url,
|
|
mode = 'single',
|
|
summarize = false,
|
|
} = (await c.req.json()) as {
|
|
url?: string;
|
|
mode?: 'single' | 'deep';
|
|
summarize?: boolean;
|
|
};
|
|
|
|
if (!url || !/^https?:\/\//i.test(url)) {
|
|
return c.json({ error: 'valid http(s) url required' }, 400);
|
|
}
|
|
|
|
const creditCost = summarize ? 5 : 1;
|
|
const validation = await validateCredits(userId, 'AI_CONTEXT_IMPORT_URL', creditCost);
|
|
if (!validation.hasCredits) {
|
|
return c.json(
|
|
{
|
|
error: 'Insufficient credits',
|
|
required: creditCost,
|
|
available: validation.availableCredits,
|
|
},
|
|
402
|
|
);
|
|
}
|
|
|
|
try {
|
|
const crawlBody = {
|
|
startUrl: url,
|
|
config: {
|
|
maxDepth: mode === 'deep' ? 3 : 0,
|
|
maxPages: mode === 'deep' ? DEEP_MAX_PAGES : 1,
|
|
rateLimit: 2,
|
|
respectRobots: true,
|
|
outputFormat: 'markdown',
|
|
},
|
|
};
|
|
|
|
const startRes = await fetch(`${CRAWLER_URL}/api/v1/crawl`, {
|
|
method: 'POST',
|
|
headers: { 'Content-Type': 'application/json' },
|
|
body: JSON.stringify(crawlBody),
|
|
});
|
|
if (!startRes.ok) return c.json({ error: 'crawler unreachable' }, 502);
|
|
const { jobId } = (await startRes.json()) as { jobId: string };
|
|
|
|
await pollCrawlJob(jobId);
|
|
|
|
const resultsRes = await fetch(
|
|
`${CRAWLER_URL}/api/v1/crawl/${jobId}/results?page=1&limit=${DEEP_MAX_PAGES}`
|
|
);
|
|
if (!resultsRes.ok) return c.json({ error: 'crawl results failed' }, 502);
|
|
const results = (await resultsRes.json()) as {
|
|
results: Array<{
|
|
url: string;
|
|
title?: string | null;
|
|
markdown?: string | null;
|
|
content?: string | null;
|
|
depth: number;
|
|
}>;
|
|
};
|
|
const items = (results.results || []).filter((it) => it.markdown || it.content);
|
|
if (items.length === 0) return c.json({ error: 'no content crawled' }, 422);
|
|
|
|
items.sort((a, b) => a.depth - b.depth);
|
|
const root = items[0];
|
|
const pageTitle = root.title || new URL(url).hostname;
|
|
|
|
let content: string;
|
|
if (mode === 'deep' && items.length > 1) {
|
|
content = items
|
|
.map((it) => `# ${it.title || it.url}\n\n_${it.url}_\n\n${it.markdown || it.content}`)
|
|
.join('\n\n---\n\n');
|
|
} else {
|
|
content = root.markdown || root.content || '';
|
|
}
|
|
|
|
if (summarize) {
|
|
const summaryRes = await fetch(`${LLM_URL}/api/v1/chat/completions`, {
|
|
method: 'POST',
|
|
headers: { 'Content-Type': 'application/json' },
|
|
body: JSON.stringify({
|
|
model: DEFAULT_SUMMARY_MODEL,
|
|
max_tokens: 2000,
|
|
messages: [
|
|
{
|
|
role: 'system',
|
|
content:
|
|
'Du bist ein Assistent, der Web-Inhalte in strukturierte Kontext-Dokumente zusammenfasst. ' +
|
|
'Antworte ausschließlich in sauberem Markdown. Gliedere in H2-Abschnitte: ' +
|
|
'"## Überblick", "## Kernaussagen", "## Details". Nutze die Sprache der Quelle. ' +
|
|
'Schreibe die Antwort direkt, ohne Einleitung ("Hier ist…"), ohne Schlussformel, ' +
|
|
'und OHNE Code-Fences (```) um die Antwort.',
|
|
},
|
|
{
|
|
role: 'user',
|
|
content: `Quelle: ${url}\n\n${content.slice(0, 60_000)}`,
|
|
},
|
|
],
|
|
}),
|
|
});
|
|
if (summaryRes.ok) {
|
|
const data = (await summaryRes.json()) as {
|
|
choices?: Array<{ message?: { content?: string } }>;
|
|
};
|
|
const raw = data.choices?.[0]?.message?.content?.trim();
|
|
if (raw) {
|
|
content = sanitizeSummary(raw);
|
|
}
|
|
} else {
|
|
return c.json({ error: 'summary failed' }, 502);
|
|
}
|
|
}
|
|
|
|
await consumeCredits(
|
|
userId,
|
|
'AI_CONTEXT_IMPORT_URL',
|
|
creditCost,
|
|
`URL import (${mode}${summarize ? ' + summary' : ''})`
|
|
);
|
|
|
|
return c.json({
|
|
title: pageTitle,
|
|
content,
|
|
sourceUrl: url,
|
|
crawlMode: mode,
|
|
crawledAt: new Date().toISOString(),
|
|
pageCount: items.length,
|
|
});
|
|
} catch (err) {
|
|
const message = err instanceof Error ? err.message : 'import failed';
|
|
return c.json({ error: message }, 500);
|
|
}
|
|
});
|
|
|
|
// ─── AI Generation (server-only: mana-llm) ──────────────────
|
|
|
|
routes.post('/ai/generate', async (c) => {
|
|
const userId = c.get('userId');
|
|
const { prompt, documents, model, maxTokens } = await c.req.json();
|
|
|
|
if (!prompt) return c.json({ error: 'prompt required' }, 400);
|
|
|
|
// Validate credits
|
|
const validation = await validateCredits(userId, 'AI_CONTEXT_GENERATE', 5);
|
|
if (!validation.hasCredits) {
|
|
return c.json(
|
|
{ error: 'Insufficient credits', required: 5, available: validation.availableCredits },
|
|
402
|
|
);
|
|
}
|
|
|
|
try {
|
|
// Build messages with document context
|
|
const messages: Array<{ role: string; content: string }> = [];
|
|
|
|
if (documents?.length) {
|
|
const contextText = documents
|
|
.map((d: { title: string; content: string }) => `--- ${d.title} ---\n${d.content}`)
|
|
.join('\n\n');
|
|
messages.push({
|
|
role: 'system',
|
|
content: `Verwende diese Dokumente als Kontext:\n\n${contextText}`,
|
|
});
|
|
}
|
|
|
|
messages.push({ role: 'user', content: prompt });
|
|
|
|
const res = await fetch(`${LLM_URL}/api/v1/chat/completions`, {
|
|
method: 'POST',
|
|
headers: { 'Content-Type': 'application/json' },
|
|
body: JSON.stringify({
|
|
messages,
|
|
model: model || MANA_LLM.FAST_TEXT,
|
|
max_tokens: maxTokens || 2000,
|
|
}),
|
|
});
|
|
|
|
if (!res.ok) return c.json({ error: 'AI generation failed' }, 502);
|
|
|
|
const data = await res.json();
|
|
const content = data.choices?.[0]?.message?.content || '';
|
|
const tokensUsed = data.usage?.total_tokens || 0;
|
|
|
|
// Consume credits
|
|
await consumeCredits(userId, 'AI_CONTEXT_GENERATE', 5, `AI generation (${tokensUsed} tokens)`);
|
|
|
|
return c.json({ content, tokensUsed, model: model || MANA_LLM.FAST_TEXT });
|
|
} catch (_err) {
|
|
return c.json({ error: 'Generation failed' }, 500);
|
|
}
|
|
});
|
|
|
|
routes.post('/ai/estimate', async (c) => {
|
|
const { prompt, documents } = await c.req.json();
|
|
const charCount =
|
|
(prompt?.length || 0) +
|
|
(documents || []).reduce(
|
|
(sum: number, d: { content: string }) => sum + (d.content?.length || 0),
|
|
0
|
|
);
|
|
const estimatedTokens = Math.ceil(charCount / 4);
|
|
return c.json({ estimatedTokens, estimatedCost: 5 });
|
|
});
|
|
|
|
export { routes as contextRoutes };
|