managarten/packages/shared-llm/src/utils/json-extractor.ts
Till JS e2f144962c feat: add unified @manacore/shared-llm package and migrate all backends
Create a shared LLM client package that provides a unified interface
to the mana-llm service, replacing 9 individual fetch-based integrations
with consistent error handling, retry logic, and JSON extraction.

Package (@manacore/shared-llm):
- LlmModule with forRoot/forRootAsync (NestJS dynamic module)
- LlmClientService: chat, json, vision, visionJson, embed, stream
- LlmClient standalone class for non-NestJS consumers
- extractJson utility (consolidates 3 markdown-stripping implementations)
- retryFetch with exponential backoff (429, 5xx, network errors)
- 44 unit tests (json-extractor, retry, llm-client)

Migrated backends:
- mana-core-auth: raw fetch → llm.json()
- planta: raw fetch + vision → llm.visionJson()
- nutriphi: raw fetch + regex → llm.visionJson() + llm.json()
- chat: custom OllamaService (175 LOC) → llm.chatMessages()
- context: raw fetch → llm.chat() (keeps token tracking)
- traces: 2x raw fetch → llm.chat()
- manadeck: @google/genai SDK → llm.json() + llm.visionJson()
- bot-services: raw Ollama API → LlmClient standalone
- matrix-ollama-bot: raw fetch → llm.chatMessages() + llm.vision()

New credit operations:
- AI_PLANT_ANALYSIS (2 credits, planta)
- AI_GUIDE_GENERATION (5 credits, traces)
- AI_CONTEXT_GENERATION (2 credits, context)
- AI_BOT_CHAT (0.1 credits, matrix)

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-23 22:06:30 +01:00

94 lines
2.3 KiB
TypeScript

/**
* Extract and parse JSON from LLM responses.
*
* LLMs often wrap JSON in markdown code fences or include extra text.
* This utility handles all common patterns:
* 1. Direct JSON parse
* 2. Markdown ```json ... ``` fences
* 3. First { ... } or [ ... ] block in text
*/
export function extractJson<T = unknown>(text: string, validate?: (data: unknown) => T): T {
const trimmed = text.trim();
// Step 1: Try direct parse
const direct = tryParse<T>(trimmed, validate);
if (direct !== undefined) return direct;
// Step 2: Strip markdown code fences
const fenceMatch = trimmed.match(/```(?:json)?\s*([\s\S]*?)```/);
if (fenceMatch) {
const fenced = tryParse<T>(fenceMatch[1].trim(), validate);
if (fenced !== undefined) return fenced;
}
// Step 3: Find first JSON object
const objectStart = trimmed.indexOf('{');
if (objectStart !== -1) {
const objectStr = extractBalanced(trimmed, objectStart, '{', '}');
if (objectStr) {
const obj = tryParse<T>(objectStr, validate);
if (obj !== undefined) return obj;
}
}
// Step 4: Find first JSON array
const arrayStart = trimmed.indexOf('[');
if (arrayStart !== -1) {
const arrayStr = extractBalanced(trimmed, arrayStart, '[', ']');
if (arrayStr) {
const arr = tryParse<T>(arrayStr, validate);
if (arr !== undefined) return arr;
}
}
throw new Error(`Failed to extract JSON from LLM response: ${trimmed.slice(0, 200)}...`);
}
function tryParse<T>(text: string, validate?: (data: unknown) => T): T | undefined {
try {
const parsed = JSON.parse(text);
return validate ? validate(parsed) : parsed;
} catch {
return undefined;
}
}
/**
* Extract a balanced block starting from the given position.
* Handles nested braces/brackets but not strings with escaped delimiters.
*/
function extractBalanced(text: string, start: number, open: string, close: string): string | null {
let depth = 0;
let inString = false;
let escape = false;
for (let i = start; i < text.length; i++) {
const ch = text[i];
if (escape) {
escape = false;
continue;
}
if (ch === '\\') {
escape = true;
continue;
}
if (ch === '"') {
inString = !inString;
continue;
}
if (inString) continue;
if (ch === open) depth++;
if (ch === close) depth--;
if (depth === 0) {
return text.slice(start, i + 1);
}
}
return null;
}