feat: add unified @manacore/shared-llm package and migrate all backends

Create a shared LLM client package that provides a unified interface to the mana-llm service, replacing 9 individual fetch-based integrations with consistent error handling, retry logic, and JSON extraction. Package (@manacore/shared-llm): - LlmModule with forRoot/forRootAsync (NestJS dynamic module) - LlmClientService: chat, json, vision, visionJson, embed, stream - LlmClient standalone class for non-NestJS consumers - extractJson utility (consolidates 3 markdown-stripping implementations) - retryFetch with exponential backoff (429, 5xx, network errors) - 44 unit tests (json-extractor, retry, llm-client) Migrated backends: - mana-core-auth: raw fetch → llm.json() - planta: raw fetch + vision → llm.visionJson() - nutriphi: raw fetch + regex → llm.visionJson() + llm.json() - chat: custom OllamaService (175 LOC) → llm.chatMessages() - context: raw fetch → llm.chat() (keeps token tracking) - traces: 2x raw fetch → llm.chat() - manadeck: @google/genai SDK → llm.json() + llm.visionJson() - bot-services: raw Ollama API → LlmClient standalone - matrix-ollama-bot: raw fetch → llm.chatMessages() + llm.vision() New credit operations: - AI_PLANT_ANALYSIS (2 credits, planta) - AI_GUIDE_GENERATION (5 credits, traces) - AI_CONTEXT_GENERATION (2 credits, context) - AI_BOT_CHAT (0.1 credits, matrix) Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-05-24 00:16:42 +02:00 · 2026-03-23 22:06:30 +01:00 · 2026-03-23 22:06:30 +01:00 · e2f144962c
commit e2f144962c
parent e7bf58c5b6
48 changed files with 2476 additions and 1297 deletions
--- a/apps/context/apps/backend/package.json
+++ b/apps/context/apps/backend/package.json
@ -21,8 +21,9 @@
 		"db:seed": "tsx src/db/seed.ts"
 	},
 	"dependencies": {
-		"@manacore/shared-error-tracking": "workspace:*",
 		"@manacore/shared-drizzle-config": "workspace:*",
+		"@manacore/shared-error-tracking": "workspace:*",
+		"@manacore/shared-llm": "workspace:^",
 		"@manacore/shared-nestjs-auth": "workspace:*",
 		"@manacore/shared-nestjs-health": "workspace:*",
 		"@manacore/shared-nestjs-setup": "workspace:*",
--- a/apps/context/apps/backend/src/ai/ai.service.ts
+++ b/apps/context/apps/backend/src/ai/ai.service.ts
@ -1,5 +1,5 @@
 import { Injectable, BadRequestException, Logger } from '@nestjs/common';
-import { ConfigService } from '@nestjs/config';
+import { LlmClientService } from '@manacore/shared-llm';
 import { TokenService } from '../token/token.service';

 interface GenerateOptions {
@ -19,14 +19,11 @@ function estimateTokens(text: string): number {
@Injectable()
 export class AiService {
 	private readonly logger = new Logger(AiService.name);
-	private readonly manaLlmUrl: string;

 	constructor(
-		private configService: ConfigService,
+		private readonly llm: LlmClientService,
 		private tokenService: TokenService
-	) {
-		this.manaLlmUrl = this.configService.get<string>('MANA_LLM_URL') || 'http://localhost:3025';
-	}
+	) {}

 	async generate(userId: string, options: GenerateOptions) {
 		const model = options.model || 'ollama/gemma3:4b';
@ -51,11 +48,16 @@ export class AiService {
 		}

 		// Generate text via mana-llm
-		const completionText = await this.generateWithManaLlm(fullPrompt, options, model);
+		const result = await this.llm.chat(fullPrompt, {
+			model,
+			systemPrompt: 'You are a helpful assistant.',
+			temperature: options.temperature || 0.7,
+			maxTokens: options.maxTokens || 2000,
+		});

-		// Calculate actual cost and log
-		const actualPromptTokens = estimateTokens(fullPrompt);
-		const completionTokens = estimateTokens(completionText);
+		// Use actual token counts from response when available, fall back to estimates
+		const actualPromptTokens = result.usage.prompt_tokens || estimateTokens(fullPrompt);
+		const completionTokens = result.usage.completion_tokens || estimateTokens(result.content);
 		const { tokensUsed, remainingBalance } = await this.tokenService.logUsage(
 			userId,
 			model,
@ -65,7 +67,7 @@ export class AiService {
 		);

 		return {
-			text: completionText,
+			text: result.content,
 			tokenInfo: {
 				promptTokens: actualPromptTokens,
 				completionTokens,
@ -110,34 +112,4 @@ export class AiService {
 			balance,
 		};
 	}
-
-	private async generateWithManaLlm(
-		prompt: string,
-		options: GenerateOptions,
-		model: string
-	): Promise<string> {
-		const response = await fetch(`${this.manaLlmUrl}/v1/chat/completions`, {
-			method: 'POST',
-			headers: { 'Content-Type': 'application/json' },
-			body: JSON.stringify({
-				model,
-				messages: [
-					{ role: 'system', content: 'You are a helpful assistant.' },
-					{ role: 'user', content: prompt },
-				],
-				temperature: options.temperature || 0.7,
-				max_tokens: options.maxTokens || 2000,
-			}),
-			signal: AbortSignal.timeout(120000),
-		});
-
-		if (!response.ok) {
-			const errorText = await response.text();
-			this.logger.error(`mana-llm error: ${response.status} - ${errorText}`);
-			throw new BadRequestException(`LLM generation failed: ${response.status}`);
-		}
-
-		const data = await response.json();
-		return data.choices?.[0]?.message?.content || '';
-	}
 }
--- a/apps/context/apps/backend/src/app.module.ts
+++ b/apps/context/apps/backend/src/app.module.ts
@ -1,7 +1,8 @@
 import { Module } from '@nestjs/common';
 import { APP_FILTER } from '@nestjs/core';
-import { ConfigModule } from '@nestjs/config';
+import { ConfigModule, ConfigService } from '@nestjs/config';
 import { ThrottlerModule } from '@nestjs/throttler';
+import { LlmModule } from '@manacore/shared-llm';
 import { DatabaseModule } from './db/database.module';
 import { HealthModule } from '@manacore/shared-nestjs-health';
 import { SpaceModule } from './space/space.module';
@ -22,6 +23,14 @@ import { HttpExceptionFilter } from './common/http-exception.filter';
 				limit: 100,
 			},
 		]),
+		LlmModule.forRootAsync({
+			imports: [ConfigModule],
+			useFactory: (config: ConfigService) => ({
+				manaLlmUrl: config.get('MANA_LLM_URL'),
+				debug: config.get('NODE_ENV') === 'development',
+			}),
+			inject: [ConfigService],
+		}),
 		DatabaseModule,
 		HealthModule.forRoot({ serviceName: 'context-backend' }),
 		SpaceModule,