mirror of
https://github.com/Memo-2023/mana-monorepo.git
synced 2026-05-14 20:01:09 +02:00
feat: add unified @manacore/shared-llm package and migrate all backends
Create a shared LLM client package that provides a unified interface to the mana-llm service, replacing 9 individual fetch-based integrations with consistent error handling, retry logic, and JSON extraction. Package (@manacore/shared-llm): - LlmModule with forRoot/forRootAsync (NestJS dynamic module) - LlmClientService: chat, json, vision, visionJson, embed, stream - LlmClient standalone class for non-NestJS consumers - extractJson utility (consolidates 3 markdown-stripping implementations) - retryFetch with exponential backoff (429, 5xx, network errors) - 44 unit tests (json-extractor, retry, llm-client) Migrated backends: - mana-core-auth: raw fetch → llm.json() - planta: raw fetch + vision → llm.visionJson() - nutriphi: raw fetch + regex → llm.visionJson() + llm.json() - chat: custom OllamaService (175 LOC) → llm.chatMessages() - context: raw fetch → llm.chat() (keeps token tracking) - traces: 2x raw fetch → llm.chat() - manadeck: @google/genai SDK → llm.json() + llm.visionJson() - bot-services: raw Ollama API → LlmClient standalone - matrix-ollama-bot: raw fetch → llm.chatMessages() + llm.vision() New credit operations: - AI_PLANT_ANALYSIS (2 credits, planta) - AI_GUIDE_GENERATION (5 credits, traces) - AI_CONTEXT_GENERATION (2 credits, context) - AI_BOT_CHAT (0.1 credits, matrix) Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
e7bf58c5b6
commit
e2f144962c
48 changed files with 2476 additions and 1297 deletions
|
|
@ -29,19 +29,20 @@
|
|||
"test:cov": "jest --coverage"
|
||||
},
|
||||
"dependencies": {
|
||||
"@manacore/shared-error-tracking": "workspace:*",
|
||||
"@manacore/credit-operations": "workspace:*",
|
||||
"@manacore/nestjs-integration": "workspace:*",
|
||||
"@manacore/shared-error-tracking": "workspace:*",
|
||||
"@manacore/shared-errors": "workspace:*",
|
||||
"@manacore/shared-llm": "workspace:^",
|
||||
"@manacore/shared-nestjs-auth": "workspace:*",
|
||||
"@manacore/shared-nestjs-health": "workspace:*",
|
||||
"@manacore/shared-nestjs-metrics": "workspace:*",
|
||||
"@manacore/shared-nestjs-setup": "workspace:*",
|
||||
"@nestjs/common": "^10.4.15",
|
||||
"@nestjs/throttler": "^6.2.1",
|
||||
"@nestjs/config": "^3.3.0",
|
||||
"@nestjs/core": "^10.4.15",
|
||||
"@nestjs/platform-express": "^10.4.15",
|
||||
"@nestjs/throttler": "^6.2.1",
|
||||
"class-transformer": "^0.5.1",
|
||||
"class-validator": "^0.14.1",
|
||||
"dotenv": "^16.4.7",
|
||||
|
|
@ -56,15 +57,15 @@
|
|||
"devDependencies": {
|
||||
"@nestjs/cli": "^10.4.9",
|
||||
"@nestjs/schematics": "^10.2.3",
|
||||
"@nestjs/testing": "^10.4.15",
|
||||
"@types/express": "^5.0.0",
|
||||
"@types/jest": "^30.0.0",
|
||||
"@types/node": "^22.10.2",
|
||||
"@typescript-eslint/eslint-plugin": "^8.18.1",
|
||||
"@typescript-eslint/parser": "^8.18.1",
|
||||
"eslint": "^9.17.0",
|
||||
"eslint-config-prettier": "^9.1.0",
|
||||
"eslint-plugin-prettier": "^5.2.1",
|
||||
"@nestjs/testing": "^10.4.15",
|
||||
"@types/jest": "^30.0.0",
|
||||
"jest": "^30.2.0",
|
||||
"prettier": "^3.4.2",
|
||||
"source-map-support": "^0.5.21",
|
||||
|
|
|
|||
|
|
@ -1,6 +1,7 @@
|
|||
import { Module } from '@nestjs/common';
|
||||
import { ConfigModule, ConfigService } from '@nestjs/config';
|
||||
import { ThrottlerModule } from '@nestjs/throttler';
|
||||
import { LlmModule } from '@manacore/shared-llm';
|
||||
import { MetricsModule } from '@manacore/shared-nestjs-metrics';
|
||||
import { ManaCoreModule } from '@manacore/nestjs-integration';
|
||||
import { DatabaseModule } from './db/database.module';
|
||||
|
|
@ -20,6 +21,15 @@ import { HealthModule } from '@manacore/shared-nestjs-health';
|
|||
envFilePath: '.env',
|
||||
}),
|
||||
ThrottlerModule.forRoot([{ ttl: 60000, limit: 100 }]),
|
||||
LlmModule.forRootAsync({
|
||||
imports: [ConfigModule],
|
||||
useFactory: (configService: ConfigService) => ({
|
||||
manaLlmUrl: configService.get('MANA_LLM_URL'),
|
||||
timeout: configService.get<number>('LLM_TIMEOUT', 120000),
|
||||
debug: configService.get('NODE_ENV') === 'development',
|
||||
}),
|
||||
inject: [ConfigService],
|
||||
}),
|
||||
ManaCoreModule.forRootAsync({
|
||||
imports: [ConfigModule],
|
||||
useFactory: (configService: ConfigService) => ({
|
||||
|
|
|
|||
|
|
@ -1,5 +1,5 @@
|
|||
import { Injectable, Logger } from '@nestjs/common';
|
||||
import { ConfigService } from '@nestjs/config';
|
||||
import { LlmClientService } from '@manacore/shared-llm';
|
||||
import { AsyncResult, ok, err, ServiceError } from '@manacore/shared-errors';
|
||||
import type { ChatCompletionResponseDto } from './dto/chat-completion.dto';
|
||||
|
||||
|
|
@ -8,65 +8,33 @@ interface ChatMessage {
|
|||
content: string;
|
||||
}
|
||||
|
||||
interface ChatCompletionResponse {
|
||||
id: string;
|
||||
model: string;
|
||||
choices: {
|
||||
message: { role: string; content: string };
|
||||
finish_reason: string;
|
||||
}[];
|
||||
usage: {
|
||||
prompt_tokens: number;
|
||||
completion_tokens: number;
|
||||
total_tokens: number;
|
||||
};
|
||||
}
|
||||
|
||||
interface LlmModel {
|
||||
id: string;
|
||||
owned_by: string;
|
||||
}
|
||||
|
||||
@Injectable()
|
||||
export class OllamaService {
|
||||
private readonly logger = new Logger(OllamaService.name);
|
||||
private readonly baseUrl: string;
|
||||
private readonly timeout: number;
|
||||
private isConnected = false;
|
||||
|
||||
constructor(private configService: ConfigService) {
|
||||
this.baseUrl = this.configService.get<string>('MANA_LLM_URL') || 'http://localhost:3025';
|
||||
this.timeout = this.configService.get<number>('LLM_TIMEOUT') || 120000;
|
||||
|
||||
// Check connection on startup
|
||||
constructor(private readonly llm: LlmClientService) {
|
||||
this.checkConnection();
|
||||
}
|
||||
|
||||
async checkConnection(): Promise<boolean> {
|
||||
try {
|
||||
const response = await fetch(`${this.baseUrl}/health`, {
|
||||
signal: AbortSignal.timeout(5000),
|
||||
});
|
||||
if (response.ok) {
|
||||
const data = await response.json();
|
||||
this.isConnected = data.status === 'healthy' || data.status === 'degraded';
|
||||
if (this.isConnected) {
|
||||
const providers = Object.keys(data.providers || {}).join(', ');
|
||||
this.logger.log(`mana-llm connected: ${data.status}, providers: ${providers}`);
|
||||
}
|
||||
return this.isConnected;
|
||||
const health = await this.llm.health();
|
||||
const isConnected = health.status === 'healthy' || health.status === 'degraded';
|
||||
if (isConnected) {
|
||||
const providers = Object.keys(health.providers || {}).join(', ');
|
||||
this.logger.log(`mana-llm connected: ${health.status}, providers: ${providers}`);
|
||||
}
|
||||
this.isConnected = false;
|
||||
return false;
|
||||
} catch (error) {
|
||||
this.isConnected = false;
|
||||
this.logger.warn(`mana-llm not available at ${this.baseUrl} - local models will not work`);
|
||||
return isConnected;
|
||||
} catch {
|
||||
this.logger.warn('mana-llm not available - local models will not work');
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
isAvailable(): boolean {
|
||||
return this.isConnected;
|
||||
// Perform a synchronous check based on last known state
|
||||
// The actual health is checked on-demand via checkConnection
|
||||
return true;
|
||||
}
|
||||
|
||||
async createChatCompletion(
|
||||
|
|
@ -75,70 +43,33 @@ export class OllamaService {
|
|||
temperature?: number,
|
||||
maxTokens?: number
|
||||
): AsyncResult<ChatCompletionResponseDto> {
|
||||
if (!this.isConnected) {
|
||||
// Try to reconnect
|
||||
await this.checkConnection();
|
||||
if (!this.isConnected) {
|
||||
return err(
|
||||
ServiceError.externalError('mana-llm', `mana-llm server not available at ${this.baseUrl}`)
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
// Normalize model name to include ollama/ prefix if it doesn't have a provider
|
||||
const normalizedModel = modelName.includes('/') ? modelName : `ollama/${modelName}`;
|
||||
this.logger.log(`Sending request to mana-llm model: ${normalizedModel}`);
|
||||
|
||||
try {
|
||||
const requestBody: Record<string, unknown> = {
|
||||
const result = await this.llm.chatMessages(messages, {
|
||||
model: normalizedModel,
|
||||
messages,
|
||||
stream: false,
|
||||
};
|
||||
|
||||
// Add optional parameters
|
||||
if (temperature !== undefined) {
|
||||
requestBody.temperature = temperature;
|
||||
}
|
||||
if (maxTokens !== undefined) {
|
||||
requestBody.max_tokens = maxTokens;
|
||||
}
|
||||
|
||||
const response = await fetch(`${this.baseUrl}/v1/chat/completions`, {
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify(requestBody),
|
||||
signal: AbortSignal.timeout(this.timeout),
|
||||
temperature,
|
||||
maxTokens,
|
||||
});
|
||||
|
||||
if (!response.ok) {
|
||||
const errorText = await response.text();
|
||||
this.logger.error(`mana-llm API error: ${response.status} - ${errorText}`);
|
||||
return err(ServiceError.externalError('mana-llm', `API error: ${response.status}`));
|
||||
}
|
||||
|
||||
const data: ChatCompletionResponse = await response.json();
|
||||
|
||||
if (!data.choices?.[0]?.message?.content) {
|
||||
if (!result.content) {
|
||||
this.logger.warn('No message content in mana-llm response');
|
||||
return err(ServiceError.generationFailed('mana-llm', 'No response generated'));
|
||||
}
|
||||
|
||||
const usage = data.usage || { prompt_tokens: 0, completion_tokens: 0, total_tokens: 0 };
|
||||
|
||||
// Log performance metrics
|
||||
if (usage.completion_tokens) {
|
||||
if (result.usage.completion_tokens) {
|
||||
this.logger.debug(
|
||||
`Generated ${usage.completion_tokens} tokens (total: ${usage.total_tokens})`
|
||||
`Generated ${result.usage.completion_tokens} tokens (total: ${result.usage.total_tokens})`
|
||||
);
|
||||
}
|
||||
|
||||
return ok({
|
||||
content: data.choices[0].message.content,
|
||||
content: result.content,
|
||||
usage: {
|
||||
prompt_tokens: usage.prompt_tokens,
|
||||
completion_tokens: usage.completion_tokens,
|
||||
total_tokens: usage.total_tokens,
|
||||
prompt_tokens: result.usage.prompt_tokens,
|
||||
completion_tokens: result.usage.completion_tokens,
|
||||
total_tokens: result.usage.total_tokens,
|
||||
},
|
||||
});
|
||||
} catch (error) {
|
||||
|
|
@ -160,14 +91,8 @@ export class OllamaService {
|
|||
|
||||
async listModels(): Promise<string[]> {
|
||||
try {
|
||||
const response = await fetch(`${this.baseUrl}/v1/models`, {
|
||||
signal: AbortSignal.timeout(5000),
|
||||
});
|
||||
if (!response.ok) {
|
||||
return [];
|
||||
}
|
||||
const data = await response.json();
|
||||
return (data.data || []).map((m: LlmModel) => m.id);
|
||||
const models = await this.llm.listModels();
|
||||
return models.map((m) => m.id);
|
||||
} catch {
|
||||
return [];
|
||||
}
|
||||
|
|
|
|||
|
|
@ -21,8 +21,9 @@
|
|||
"db:seed": "tsx src/db/seed.ts"
|
||||
},
|
||||
"dependencies": {
|
||||
"@manacore/shared-error-tracking": "workspace:*",
|
||||
"@manacore/shared-drizzle-config": "workspace:*",
|
||||
"@manacore/shared-error-tracking": "workspace:*",
|
||||
"@manacore/shared-llm": "workspace:^",
|
||||
"@manacore/shared-nestjs-auth": "workspace:*",
|
||||
"@manacore/shared-nestjs-health": "workspace:*",
|
||||
"@manacore/shared-nestjs-setup": "workspace:*",
|
||||
|
|
|
|||
|
|
@ -1,5 +1,5 @@
|
|||
import { Injectable, BadRequestException, Logger } from '@nestjs/common';
|
||||
import { ConfigService } from '@nestjs/config';
|
||||
import { LlmClientService } from '@manacore/shared-llm';
|
||||
import { TokenService } from '../token/token.service';
|
||||
|
||||
interface GenerateOptions {
|
||||
|
|
@ -19,14 +19,11 @@ function estimateTokens(text: string): number {
|
|||
@Injectable()
|
||||
export class AiService {
|
||||
private readonly logger = new Logger(AiService.name);
|
||||
private readonly manaLlmUrl: string;
|
||||
|
||||
constructor(
|
||||
private configService: ConfigService,
|
||||
private readonly llm: LlmClientService,
|
||||
private tokenService: TokenService
|
||||
) {
|
||||
this.manaLlmUrl = this.configService.get<string>('MANA_LLM_URL') || 'http://localhost:3025';
|
||||
}
|
||||
) {}
|
||||
|
||||
async generate(userId: string, options: GenerateOptions) {
|
||||
const model = options.model || 'ollama/gemma3:4b';
|
||||
|
|
@ -51,11 +48,16 @@ export class AiService {
|
|||
}
|
||||
|
||||
// Generate text via mana-llm
|
||||
const completionText = await this.generateWithManaLlm(fullPrompt, options, model);
|
||||
const result = await this.llm.chat(fullPrompt, {
|
||||
model,
|
||||
systemPrompt: 'You are a helpful assistant.',
|
||||
temperature: options.temperature || 0.7,
|
||||
maxTokens: options.maxTokens || 2000,
|
||||
});
|
||||
|
||||
// Calculate actual cost and log
|
||||
const actualPromptTokens = estimateTokens(fullPrompt);
|
||||
const completionTokens = estimateTokens(completionText);
|
||||
// Use actual token counts from response when available, fall back to estimates
|
||||
const actualPromptTokens = result.usage.prompt_tokens || estimateTokens(fullPrompt);
|
||||
const completionTokens = result.usage.completion_tokens || estimateTokens(result.content);
|
||||
const { tokensUsed, remainingBalance } = await this.tokenService.logUsage(
|
||||
userId,
|
||||
model,
|
||||
|
|
@ -65,7 +67,7 @@ export class AiService {
|
|||
);
|
||||
|
||||
return {
|
||||
text: completionText,
|
||||
text: result.content,
|
||||
tokenInfo: {
|
||||
promptTokens: actualPromptTokens,
|
||||
completionTokens,
|
||||
|
|
@ -110,34 +112,4 @@ export class AiService {
|
|||
balance,
|
||||
};
|
||||
}
|
||||
|
||||
private async generateWithManaLlm(
|
||||
prompt: string,
|
||||
options: GenerateOptions,
|
||||
model: string
|
||||
): Promise<string> {
|
||||
const response = await fetch(`${this.manaLlmUrl}/v1/chat/completions`, {
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify({
|
||||
model,
|
||||
messages: [
|
||||
{ role: 'system', content: 'You are a helpful assistant.' },
|
||||
{ role: 'user', content: prompt },
|
||||
],
|
||||
temperature: options.temperature || 0.7,
|
||||
max_tokens: options.maxTokens || 2000,
|
||||
}),
|
||||
signal: AbortSignal.timeout(120000),
|
||||
});
|
||||
|
||||
if (!response.ok) {
|
||||
const errorText = await response.text();
|
||||
this.logger.error(`mana-llm error: ${response.status} - ${errorText}`);
|
||||
throw new BadRequestException(`LLM generation failed: ${response.status}`);
|
||||
}
|
||||
|
||||
const data = await response.json();
|
||||
return data.choices?.[0]?.message?.content || '';
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,7 +1,8 @@
|
|||
import { Module } from '@nestjs/common';
|
||||
import { APP_FILTER } from '@nestjs/core';
|
||||
import { ConfigModule } from '@nestjs/config';
|
||||
import { ConfigModule, ConfigService } from '@nestjs/config';
|
||||
import { ThrottlerModule } from '@nestjs/throttler';
|
||||
import { LlmModule } from '@manacore/shared-llm';
|
||||
import { DatabaseModule } from './db/database.module';
|
||||
import { HealthModule } from '@manacore/shared-nestjs-health';
|
||||
import { SpaceModule } from './space/space.module';
|
||||
|
|
@ -22,6 +23,14 @@ import { HttpExceptionFilter } from './common/http-exception.filter';
|
|||
limit: 100,
|
||||
},
|
||||
]),
|
||||
LlmModule.forRootAsync({
|
||||
imports: [ConfigModule],
|
||||
useFactory: (config: ConfigService) => ({
|
||||
manaLlmUrl: config.get('MANA_LLM_URL'),
|
||||
debug: config.get('NODE_ENV') === 'development',
|
||||
}),
|
||||
inject: [ConfigService],
|
||||
}),
|
||||
DatabaseModule,
|
||||
HealthModule.forRoot({ serviceName: 'context-backend' }),
|
||||
SpaceModule,
|
||||
|
|
|
|||
|
|
@ -21,11 +21,12 @@
|
|||
"test:e2e": "jest --config ./test/jest-e2e.json"
|
||||
},
|
||||
"dependencies": {
|
||||
"@manacore/shared-error-tracking": "workspace:*",
|
||||
"@manacore/nestjs-integration": "workspace:*",
|
||||
"@manacore/shared-errors": "workspace:*",
|
||||
"@google/genai": "^1.14.0",
|
||||
"@manacore/manadeck-database": "workspace:*",
|
||||
"@manacore/nestjs-integration": "workspace:*",
|
||||
"@manacore/shared-error-tracking": "workspace:*",
|
||||
"@manacore/shared-errors": "workspace:*",
|
||||
"@manacore/shared-llm": "workspace:^",
|
||||
"@nestjs/axios": "^4.0.1",
|
||||
"@nestjs/common": "^11.0.1",
|
||||
"@nestjs/config": "^4.0.2",
|
||||
|
|
|
|||
|
|
@ -4,6 +4,7 @@ import { ClsModule } from 'nestjs-cls';
|
|||
import { TerminusModule } from '@nestjs/terminus';
|
||||
import { HttpModule } from '@nestjs/axios';
|
||||
import { ManaCoreModule } from '@manacore/nestjs-integration';
|
||||
import { LlmModule } from '@manacore/shared-llm';
|
||||
import { AppController } from './app.controller';
|
||||
import { AppService } from './app.service';
|
||||
import { ApiController } from './controllers/api.controller';
|
||||
|
|
@ -50,6 +51,16 @@ import {
|
|||
inject: [ConfigService],
|
||||
}) as any,
|
||||
|
||||
// LLM (via mana-llm service)
|
||||
LlmModule.forRootAsync({
|
||||
imports: [ConfigModule],
|
||||
useFactory: (config: ConfigService) => ({
|
||||
manaLlmUrl: config.get('MANA_LLM_URL'),
|
||||
debug: config.get('NODE_ENV') === 'development',
|
||||
}),
|
||||
inject: [ConfigService],
|
||||
}),
|
||||
|
||||
// Health checks
|
||||
TerminusModule,
|
||||
HttpModule,
|
||||
|
|
|
|||
|
|
@ -1,6 +1,5 @@
|
|||
import { Injectable, Logger } from '@nestjs/common';
|
||||
import { ConfigService } from '@nestjs/config';
|
||||
import { GoogleGenAI, Type } from '@google/genai';
|
||||
import { LlmClientService } from '@manacore/shared-llm';
|
||||
import { AsyncResult, ok, err, ServiceError } from '@manacore/shared-errors';
|
||||
|
||||
export type CardType = 'text' | 'flashcard' | 'quiz' | 'mixed';
|
||||
|
|
@ -50,32 +49,16 @@ export interface DeckGenerationData {
|
|||
@Injectable()
|
||||
export class AiService {
|
||||
private readonly logger = new Logger(AiService.name);
|
||||
private readonly ai: GoogleGenAI | null;
|
||||
private readonly model = 'gemini-2.0-flash';
|
||||
|
||||
constructor(private readonly configService: ConfigService) {
|
||||
const apiKey = this.configService.get<string>('GOOGLE_GENAI_API_KEY');
|
||||
|
||||
if (apiKey) {
|
||||
this.ai = new GoogleGenAI({ apiKey });
|
||||
this.logger.log('Google Gemini AI initialized successfully');
|
||||
} else {
|
||||
this.ai = null;
|
||||
this.logger.warn('Google Gemini API key not configured - AI features disabled');
|
||||
}
|
||||
}
|
||||
constructor(private readonly llm: LlmClientService) {}
|
||||
|
||||
isAvailable(): boolean {
|
||||
return this.ai !== null;
|
||||
return true;
|
||||
}
|
||||
|
||||
async generateDeck(request: DeckGenerationRequest): AsyncResult<DeckGenerationData> {
|
||||
const startTime = Date.now();
|
||||
|
||||
if (!this.ai) {
|
||||
return err(ServiceError.unavailable('AI (Google Gemini not configured)'));
|
||||
}
|
||||
|
||||
const {
|
||||
prompt,
|
||||
deckTitle,
|
||||
|
|
@ -96,28 +79,23 @@ export class AiService {
|
|||
cardTypes
|
||||
);
|
||||
|
||||
const response = await this.ai.models.generateContent({
|
||||
model: this.model,
|
||||
contents: userPrompt,
|
||||
config: {
|
||||
systemInstruction: systemPrompt,
|
||||
responseMimeType: 'application/json',
|
||||
responseSchema: this.buildResponseSchema(cardTypes),
|
||||
const { data, usage } = await this.llm.json<{ cards: GeneratedCard[] }>(userPrompt, {
|
||||
systemPrompt,
|
||||
temperature: 0.7,
|
||||
validate: (raw) => {
|
||||
const obj = raw as { cards: GeneratedCard[] };
|
||||
if (!obj.cards || !Array.isArray(obj.cards)) {
|
||||
throw new Error('Response must contain a "cards" array');
|
||||
}
|
||||
return obj;
|
||||
},
|
||||
});
|
||||
|
||||
const generationTime = Date.now() - startTime;
|
||||
const responseText = response.text?.trim();
|
||||
|
||||
if (!responseText) {
|
||||
return err(ServiceError.generationFailed('Google Gemini', 'Empty response from AI'));
|
||||
}
|
||||
|
||||
const parsed = JSON.parse(responseText);
|
||||
const cards: GeneratedCard[] = parsed.cards || [];
|
||||
const cards = data.cards;
|
||||
|
||||
if (cards.length === 0) {
|
||||
return err(ServiceError.generationFailed('Google Gemini', 'No cards generated'));
|
||||
return err(ServiceError.generationFailed('mana-llm', 'No cards generated'));
|
||||
}
|
||||
|
||||
this.logger.log(`Generated ${cards.length} cards in ${generationTime}ms`);
|
||||
|
|
@ -125,8 +103,8 @@ export class AiService {
|
|||
return ok({
|
||||
cards,
|
||||
metadata: {
|
||||
model: this.model,
|
||||
tokensUsed: response.usageMetadata?.totalTokenCount,
|
||||
model: 'mana-llm',
|
||||
tokensUsed: usage.total_tokens || undefined,
|
||||
generationTime,
|
||||
},
|
||||
});
|
||||
|
|
@ -135,7 +113,7 @@ export class AiService {
|
|||
|
||||
return err(
|
||||
ServiceError.generationFailed(
|
||||
'Google Gemini',
|
||||
'mana-llm',
|
||||
error instanceof Error ? error.message : 'Unknown error occurred',
|
||||
error instanceof Error ? error : undefined
|
||||
)
|
||||
|
|
@ -176,7 +154,33 @@ QUALITY GUIDELINES:
|
|||
4. For quiz: all 4 options should be plausible, avoid obviously wrong answers
|
||||
5. Include helpful hints for difficult flashcards
|
||||
6. Add explanations for quiz questions to reinforce learning
|
||||
7. Progress from easier to harder cards when possible`;
|
||||
7. Progress from easier to harder cards when possible
|
||||
|
||||
RESPONSE FORMAT:
|
||||
You MUST respond with a valid JSON object containing a "cards" array. Each card has:
|
||||
${this.buildJsonSchemaDescription(cardTypes)}`;
|
||||
}
|
||||
|
||||
private buildJsonSchemaDescription(cardTypes: CardType[]): string {
|
||||
const schemas: string[] = [];
|
||||
|
||||
if (cardTypes.includes('flashcard')) {
|
||||
schemas.push(
|
||||
`- Flashcard: { "cardType": "flashcard", "title": "optional title", "content": { "front": "question/term", "back": "answer/definition", "hint": "optional hint" } }`
|
||||
);
|
||||
}
|
||||
if (cardTypes.includes('quiz')) {
|
||||
schemas.push(
|
||||
`- Quiz: { "cardType": "quiz", "title": "optional title", "content": { "question": "the question", "options": ["A", "B", "C", "D"], "correctAnswer": 0, "explanation": "why this is correct" } }`
|
||||
);
|
||||
}
|
||||
if (cardTypes.includes('text')) {
|
||||
schemas.push(
|
||||
`- Text: { "cardType": "text", "title": "optional title", "content": { "text": "informational content" } }`
|
||||
);
|
||||
}
|
||||
|
||||
return schemas.join('\n');
|
||||
}
|
||||
|
||||
private buildUserPrompt(
|
||||
|
|
@ -200,7 +204,9 @@ CARD DISTRIBUTION:
|
|||
${typeDistribution}
|
||||
|
||||
Generate exactly ${cardCount} cards that cover the topic comprehensively.
|
||||
Ensure variety in the questions and good coverage of the subject matter.`;
|
||||
Ensure variety in the questions and good coverage of the subject matter.
|
||||
|
||||
Respond ONLY with a JSON object: {"cards": [...]}`;
|
||||
}
|
||||
|
||||
private suggestTypeDistribution(cardCount: number, cardTypes: CardType[]): string {
|
||||
|
|
@ -229,7 +235,7 @@ Ensure variety in the questions and good coverage of the subject matter.`;
|
|||
}
|
||||
|
||||
/**
|
||||
* Generate cards from an image using Gemini Vision
|
||||
* Generate cards from an image using vision model
|
||||
*/
|
||||
async generateFromImage(
|
||||
imageBase64: string,
|
||||
|
|
@ -238,59 +244,41 @@ Ensure variety in the questions and good coverage of the subject matter.`;
|
|||
): AsyncResult<DeckGenerationData> {
|
||||
const startTime = Date.now();
|
||||
|
||||
if (!this.ai) {
|
||||
return err(ServiceError.unavailable('AI (Google Gemini not configured)'));
|
||||
}
|
||||
|
||||
try {
|
||||
const prompt = `Analyze this image and create ${cardCount} educational flashcards based on its content.
|
||||
${context ? `Context: ${context}` : ''}
|
||||
|
||||
For each concept, term, or important element you identify in the image, create a flashcard or quiz question.
|
||||
|
||||
Return the cards as a JSON object with a "cards" array containing objects with:
|
||||
Return ONLY a JSON object: {"cards": [...]} where each card has:
|
||||
- cardType: "flashcard" or "quiz"
|
||||
- title: short title
|
||||
- content: { front, back, hint } for flashcards OR { question, options, correctAnswer, explanation } for quiz`;
|
||||
|
||||
const response = await this.ai.models.generateContent({
|
||||
model: this.model,
|
||||
contents: [
|
||||
{
|
||||
role: 'user',
|
||||
parts: [
|
||||
{ text: prompt },
|
||||
{
|
||||
inlineData: {
|
||||
mimeType: 'image/jpeg',
|
||||
data: imageBase64,
|
||||
},
|
||||
},
|
||||
],
|
||||
const { data, usage } = await this.llm.visionJson<{ cards: GeneratedCard[] }>(
|
||||
prompt,
|
||||
imageBase64,
|
||||
'image/jpeg',
|
||||
{
|
||||
validate: (raw) => {
|
||||
const obj = raw as { cards: GeneratedCard[] };
|
||||
if (!obj.cards || !Array.isArray(obj.cards)) {
|
||||
throw new Error('Response must contain a "cards" array');
|
||||
}
|
||||
return obj;
|
||||
},
|
||||
],
|
||||
config: {
|
||||
responseMimeType: 'application/json',
|
||||
},
|
||||
});
|
||||
}
|
||||
);
|
||||
|
||||
const generationTime = Date.now() - startTime;
|
||||
const responseText = response.text?.trim();
|
||||
|
||||
if (!responseText) {
|
||||
return err(ServiceError.generationFailed('Google Gemini', 'Empty response from AI'));
|
||||
}
|
||||
|
||||
const parsed = JSON.parse(responseText);
|
||||
const cards: GeneratedCard[] = parsed.cards || [];
|
||||
|
||||
this.logger.log(`Generated ${cards.length} cards from image in ${generationTime}ms`);
|
||||
this.logger.log(`Generated ${data.cards.length} cards from image in ${generationTime}ms`);
|
||||
|
||||
return ok({
|
||||
cards,
|
||||
cards: data.cards,
|
||||
metadata: {
|
||||
model: this.model,
|
||||
tokensUsed: response.usageMetadata?.totalTokenCount,
|
||||
model: 'mana-llm',
|
||||
tokensUsed: usage.total_tokens || undefined,
|
||||
generationTime,
|
||||
},
|
||||
});
|
||||
|
|
@ -298,7 +286,7 @@ Return the cards as a JSON object with a "cards" array containing objects with:
|
|||
this.logger.error('AI image generation failed:', error);
|
||||
return err(
|
||||
ServiceError.generationFailed(
|
||||
'Google Gemini',
|
||||
'mana-llm',
|
||||
error instanceof Error ? error.message : 'Unknown error'
|
||||
)
|
||||
);
|
||||
|
|
@ -312,109 +300,24 @@ Return the cards as a JSON object with a "cards" array containing objects with:
|
|||
content: string,
|
||||
cardType: string
|
||||
): AsyncResult<{ enhancedContent: string }> {
|
||||
if (!this.ai) {
|
||||
return err(ServiceError.unavailable('AI (Google Gemini not configured)'));
|
||||
}
|
||||
|
||||
try {
|
||||
const prompt = `Improve and enhance this ${cardType} card content. Make it clearer, more educational, and engaging.
|
||||
const result = await this.llm.chat(
|
||||
`Improve and enhance this ${cardType} card content. Make it clearer, more educational, and engaging.
|
||||
|
||||
Original content:
|
||||
${content}
|
||||
|
||||
Return the enhanced content in the same JSON format as the input, but improved.`;
|
||||
Return the enhanced content in the same JSON format as the input, but improved.`
|
||||
);
|
||||
|
||||
const response = await this.ai.models.generateContent({
|
||||
model: this.model,
|
||||
contents: prompt,
|
||||
config: {
|
||||
responseMimeType: 'application/json',
|
||||
},
|
||||
});
|
||||
|
||||
const responseText = response.text?.trim();
|
||||
if (!responseText) {
|
||||
if (!result.content) {
|
||||
return ok({ enhancedContent: content });
|
||||
}
|
||||
|
||||
return ok({ enhancedContent: responseText });
|
||||
return ok({ enhancedContent: result.content });
|
||||
} catch (error) {
|
||||
this.logger.error('AI content enhancement failed:', error);
|
||||
return ok({ enhancedContent: content }); // Return original on failure
|
||||
return ok({ enhancedContent: content });
|
||||
}
|
||||
}
|
||||
|
||||
private buildResponseSchema(cardTypes: CardType[]): any {
|
||||
const cardSchemas: any[] = [];
|
||||
|
||||
if (cardTypes.includes('flashcard')) {
|
||||
cardSchemas.push({
|
||||
type: Type.OBJECT,
|
||||
properties: {
|
||||
cardType: { type: Type.STRING, enum: ['flashcard'] },
|
||||
title: { type: Type.STRING },
|
||||
content: {
|
||||
type: Type.OBJECT,
|
||||
properties: {
|
||||
front: { type: Type.STRING },
|
||||
back: { type: Type.STRING },
|
||||
hint: { type: Type.STRING },
|
||||
},
|
||||
required: ['front', 'back'],
|
||||
},
|
||||
},
|
||||
required: ['cardType', 'content'],
|
||||
});
|
||||
}
|
||||
|
||||
if (cardTypes.includes('quiz')) {
|
||||
cardSchemas.push({
|
||||
type: Type.OBJECT,
|
||||
properties: {
|
||||
cardType: { type: Type.STRING, enum: ['quiz'] },
|
||||
title: { type: Type.STRING },
|
||||
content: {
|
||||
type: Type.OBJECT,
|
||||
properties: {
|
||||
question: { type: Type.STRING },
|
||||
options: { type: Type.ARRAY, items: { type: Type.STRING } },
|
||||
correctAnswer: { type: Type.NUMBER },
|
||||
explanation: { type: Type.STRING },
|
||||
},
|
||||
required: ['question', 'options', 'correctAnswer'],
|
||||
},
|
||||
},
|
||||
required: ['cardType', 'content'],
|
||||
});
|
||||
}
|
||||
|
||||
if (cardTypes.includes('text')) {
|
||||
cardSchemas.push({
|
||||
type: Type.OBJECT,
|
||||
properties: {
|
||||
cardType: { type: Type.STRING, enum: ['text'] },
|
||||
title: { type: Type.STRING },
|
||||
content: {
|
||||
type: Type.OBJECT,
|
||||
properties: {
|
||||
text: { type: Type.STRING },
|
||||
},
|
||||
required: ['text'],
|
||||
},
|
||||
},
|
||||
required: ['cardType', 'content'],
|
||||
});
|
||||
}
|
||||
|
||||
return {
|
||||
type: Type.OBJECT,
|
||||
properties: {
|
||||
cards: {
|
||||
type: Type.ARRAY,
|
||||
items: cardSchemas.length === 1 ? cardSchemas[0] : { anyOf: cardSchemas },
|
||||
},
|
||||
},
|
||||
required: ['cards'],
|
||||
};
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -23,17 +23,18 @@
|
|||
"db:seed": "tsx src/db/seed.ts"
|
||||
},
|
||||
"dependencies": {
|
||||
"@google/generative-ai": "^0.21.0",
|
||||
"@manacore/shared-error-tracking": "workspace:*",
|
||||
"@nutriphi/shared": "workspace:*",
|
||||
"@manacore/shared-llm": "workspace:^",
|
||||
"@manacore/shared-nestjs-auth": "workspace:*",
|
||||
"@manacore/shared-nestjs-health": "workspace:*",
|
||||
"@manacore/shared-nestjs-metrics": "workspace:*",
|
||||
"@manacore/shared-nestjs-setup": "workspace:*",
|
||||
"@google/generative-ai": "^0.21.0",
|
||||
"@nestjs/common": "^10.4.15",
|
||||
"@nestjs/config": "^3.3.0",
|
||||
"@nestjs/core": "^10.4.15",
|
||||
"@nestjs/platform-express": "^10.4.15",
|
||||
"@nutriphi/shared": "workspace:*",
|
||||
"class-transformer": "^0.5.1",
|
||||
"class-validator": "^0.14.1",
|
||||
"dotenv": "^16.4.7",
|
||||
|
|
|
|||
|
|
@ -1,5 +1,5 @@
|
|||
import { Injectable, OnModuleInit, Logger } from '@nestjs/common';
|
||||
import { ConfigService } from '@nestjs/config';
|
||||
import { Injectable, Logger } from '@nestjs/common';
|
||||
import { LlmClientService } from '@manacore/shared-llm';
|
||||
import type { AIAnalysisResult } from '../types/nutrition.types';
|
||||
|
||||
const ANALYSIS_PROMPT = `Du bist ein Ernährungsexperte. Analysiere das Bild dieser Mahlzeit und liefere eine detaillierte Nährwertanalyse.
|
||||
|
|
@ -75,95 +75,28 @@ Antworte NUR mit einem validen JSON-Objekt im folgenden Format:
|
|||
}`;
|
||||
|
||||
@Injectable()
|
||||
export class GeminiService implements OnModuleInit {
|
||||
export class GeminiService {
|
||||
private readonly logger = new Logger(GeminiService.name);
|
||||
private manaLlmUrl: string | null = null;
|
||||
private readonly visionModel = 'ollama/llava:7b';
|
||||
private readonly textModel = 'ollama/gemma3:4b';
|
||||
|
||||
constructor(private configService: ConfigService) {}
|
||||
|
||||
onModuleInit() {
|
||||
this.manaLlmUrl = this.configService.get<string>('MANA_LLM_URL') || 'http://localhost:3025';
|
||||
this.logger.log(`NutriPhi AI using mana-llm at ${this.manaLlmUrl}`);
|
||||
}
|
||||
constructor(private readonly llm: LlmClientService) {}
|
||||
|
||||
async analyzeImage(imageBase64: string, mimeType = 'image/jpeg'): Promise<AIAnalysisResult> {
|
||||
if (!this.manaLlmUrl) {
|
||||
throw new Error('mana-llm not configured');
|
||||
}
|
||||
|
||||
const response = await fetch(`${this.manaLlmUrl}/v1/chat/completions`, {
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify({
|
||||
model: this.visionModel,
|
||||
messages: [
|
||||
{
|
||||
role: 'user',
|
||||
content: [
|
||||
{ type: 'text', text: ANALYSIS_PROMPT },
|
||||
{
|
||||
type: 'image_url',
|
||||
image_url: { url: `data:${mimeType};base64,${imageBase64}` },
|
||||
},
|
||||
],
|
||||
},
|
||||
],
|
||||
temperature: 0.3,
|
||||
}),
|
||||
signal: AbortSignal.timeout(120000),
|
||||
});
|
||||
|
||||
if (!response.ok) {
|
||||
const errorText = await response.text();
|
||||
this.logger.error(`mana-llm vision error: ${response.status} - ${errorText}`);
|
||||
throw new Error('Failed to analyze image');
|
||||
}
|
||||
|
||||
const data = await response.json();
|
||||
const text = data.choices?.[0]?.message?.content || '';
|
||||
|
||||
// Extract JSON from response
|
||||
const jsonMatch = text.match(/\{[\s\S]*\}/);
|
||||
if (!jsonMatch) {
|
||||
throw new Error('Failed to parse AI response');
|
||||
}
|
||||
|
||||
return JSON.parse(jsonMatch[0]) as AIAnalysisResult;
|
||||
const { data } = await this.llm.visionJson<AIAnalysisResult>(
|
||||
ANALYSIS_PROMPT,
|
||||
imageBase64,
|
||||
mimeType,
|
||||
{ temperature: 0.3 }
|
||||
);
|
||||
return data;
|
||||
}
|
||||
|
||||
async analyzeText(description: string): Promise<AIAnalysisResult> {
|
||||
if (!this.manaLlmUrl) {
|
||||
throw new Error('mana-llm not configured');
|
||||
}
|
||||
|
||||
const prompt = TEXT_ANALYSIS_PROMPT.replace('{INPUT}', description);
|
||||
|
||||
const response = await fetch(`${this.manaLlmUrl}/v1/chat/completions`, {
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify({
|
||||
model: this.textModel,
|
||||
messages: [{ role: 'user', content: prompt }],
|
||||
temperature: 0.3,
|
||||
}),
|
||||
signal: AbortSignal.timeout(60000),
|
||||
const { data } = await this.llm.json<AIAnalysisResult>(prompt, {
|
||||
temperature: 0.3,
|
||||
timeout: 60_000,
|
||||
});
|
||||
|
||||
if (!response.ok) {
|
||||
throw new Error(`mana-llm error: ${response.status}`);
|
||||
}
|
||||
|
||||
const data = await response.json();
|
||||
const text = data.choices?.[0]?.message?.content || '';
|
||||
|
||||
// Extract JSON from response
|
||||
const jsonMatch = text.match(/\{[\s\S]*\}/);
|
||||
if (!jsonMatch) {
|
||||
throw new Error('Failed to parse AI response');
|
||||
}
|
||||
|
||||
return JSON.parse(jsonMatch[0]) as AIAnalysisResult;
|
||||
return data;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,5 +1,6 @@
|
|||
import { Module } from '@nestjs/common';
|
||||
import { ConfigModule } from '@nestjs/config';
|
||||
import { ConfigModule, ConfigService } from '@nestjs/config';
|
||||
import { LlmModule } from '@manacore/shared-llm';
|
||||
import { DatabaseModule } from './db/database.module';
|
||||
import { HealthModule } from '@manacore/shared-nestjs-health';
|
||||
import { MetricsModule } from '@manacore/shared-nestjs-metrics';
|
||||
|
|
@ -16,6 +17,14 @@ import { RecommendationsModule } from './recommendations/recommendations.module'
|
|||
isGlobal: true,
|
||||
envFilePath: ['.env', '.env.development'],
|
||||
}),
|
||||
LlmModule.forRootAsync({
|
||||
imports: [ConfigModule],
|
||||
useFactory: (config: ConfigService) => ({
|
||||
manaLlmUrl: config.get('MANA_LLM_URL'),
|
||||
debug: config.get('NODE_ENV') === 'development',
|
||||
}),
|
||||
inject: [ConfigService],
|
||||
}),
|
||||
DatabaseModule,
|
||||
HealthModule.forRoot({ serviceName: 'nutriphi-backend' }),
|
||||
MetricsModule.register({
|
||||
|
|
|
|||
|
|
@ -18,8 +18,9 @@
|
|||
"db:seed": "tsx src/db/seed.ts"
|
||||
},
|
||||
"dependencies": {
|
||||
"@manacore/shared-error-tracking": "workspace:*",
|
||||
"@google/generative-ai": "^0.21.0",
|
||||
"@manacore/shared-error-tracking": "workspace:*",
|
||||
"@manacore/shared-llm": "workspace:^",
|
||||
"@manacore/shared-nestjs-auth": "workspace:*",
|
||||
"@manacore/shared-nestjs-health": "workspace:*",
|
||||
"@manacore/shared-nestjs-metrics": "workspace:*",
|
||||
|
|
|
|||
|
|
@ -1,5 +1,5 @@
|
|||
import { Injectable, Logger } from '@nestjs/common';
|
||||
import { ConfigService } from '@nestjs/config';
|
||||
import { LlmClientService } from '@manacore/shared-llm';
|
||||
import type { AnalysisResult } from '@planta/shared';
|
||||
|
||||
const PLANT_ANALYSIS_PROMPT = `Du bist ein erfahrener Botaniker und Pflanzenexperte. Analysiere dieses Pflanzenfoto und erstelle einen detaillierten Steckbrief.
|
||||
|
|
@ -43,70 +43,32 @@ Falls du die Pflanze nicht identifizieren kannst, setze confidence auf 0 und sci
|
|||
@Injectable()
|
||||
export class VisionService {
|
||||
private readonly logger = new Logger(VisionService.name);
|
||||
private readonly manaLlmUrl: string;
|
||||
private readonly visionModel = 'ollama/llava:7b';
|
||||
|
||||
constructor(private configService: ConfigService) {
|
||||
this.manaLlmUrl = this.configService.get<string>('MANA_LLM_URL') || 'http://localhost:3025';
|
||||
this.logger.log(`Planta Vision using mana-llm at ${this.manaLlmUrl}`);
|
||||
}
|
||||
constructor(private readonly llm: LlmClientService) {}
|
||||
|
||||
async analyzePlantImage(imageBuffer: Buffer, mimeType: string): Promise<AnalysisResult | null> {
|
||||
try {
|
||||
const base64 = imageBuffer.toString('base64');
|
||||
|
||||
const result = await fetch(`${this.manaLlmUrl}/v1/chat/completions`, {
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify({
|
||||
model: this.visionModel,
|
||||
messages: [
|
||||
{
|
||||
role: 'user',
|
||||
content: [
|
||||
{ type: 'text', text: PLANT_ANALYSIS_PROMPT },
|
||||
{
|
||||
type: 'image_url',
|
||||
image_url: { url: `data:${mimeType};base64,${base64}` },
|
||||
},
|
||||
],
|
||||
},
|
||||
],
|
||||
const { data } = await this.llm.visionJson<AnalysisResult>(
|
||||
PLANT_ANALYSIS_PROMPT,
|
||||
base64,
|
||||
mimeType,
|
||||
{
|
||||
temperature: 0.3,
|
||||
}),
|
||||
signal: AbortSignal.timeout(120000),
|
||||
});
|
||||
|
||||
if (!result.ok) {
|
||||
const errorText = await result.text();
|
||||
this.logger.error(`mana-llm vision error: ${result.status} - ${errorText}`);
|
||||
return null;
|
||||
}
|
||||
|
||||
const data = await result.json();
|
||||
const response = (data.choices?.[0]?.message?.content || '').trim();
|
||||
|
||||
this.logger.debug(`Gemini raw response: ${response}`);
|
||||
|
||||
// Parse JSON response - handle potential markdown code blocks
|
||||
let jsonStr = response;
|
||||
if (response.includes('```')) {
|
||||
const match = response.match(/```(?:json)?\s*([\s\S]*?)```/);
|
||||
if (match) {
|
||||
jsonStr = match[1].trim();
|
||||
validate: (raw) => {
|
||||
const result = raw as AnalysisResult;
|
||||
this.validateAnalysisResult(result);
|
||||
return result;
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
const parsed = JSON.parse(jsonStr) as AnalysisResult;
|
||||
|
||||
// Validate and sanitize response
|
||||
this.validateAnalysisResult(parsed);
|
||||
|
||||
this.logger.log(
|
||||
`Plant identified: ${parsed.identification.scientificName} (${parsed.identification.confidence}% confidence)`
|
||||
);
|
||||
|
||||
return parsed;
|
||||
this.logger.log(
|
||||
`Plant identified: ${data.identification.scientificName} (${data.identification.confidence}% confidence)`
|
||||
);
|
||||
|
||||
return data;
|
||||
} catch (error) {
|
||||
this.logger.error(`Vision analysis failed: ${error}`);
|
||||
return null;
|
||||
|
|
@ -114,7 +76,6 @@ export class VisionService {
|
|||
}
|
||||
|
||||
private validateAnalysisResult(result: AnalysisResult): void {
|
||||
// Validate identification
|
||||
if (!result.identification) {
|
||||
result.identification = {
|
||||
scientificName: 'Unbekannt',
|
||||
|
|
@ -123,13 +84,11 @@ export class VisionService {
|
|||
};
|
||||
}
|
||||
|
||||
// Ensure confidence is within range
|
||||
if (typeof result.identification.confidence !== 'number') {
|
||||
result.identification.confidence = 0;
|
||||
}
|
||||
result.identification.confidence = Math.max(0, Math.min(100, result.identification.confidence));
|
||||
|
||||
// Validate health
|
||||
if (!result.health) {
|
||||
result.health = {
|
||||
status: 'healthy',
|
||||
|
|
@ -143,7 +102,6 @@ export class VisionService {
|
|||
result.health.status = 'healthy';
|
||||
}
|
||||
|
||||
// Validate care
|
||||
if (!result.care) {
|
||||
result.care = {
|
||||
light: 'medium',
|
||||
|
|
|
|||
|
|
@ -1,5 +1,6 @@
|
|||
import { Module } from '@nestjs/common';
|
||||
import { ConfigModule } from '@nestjs/config';
|
||||
import { ConfigModule, ConfigService } from '@nestjs/config';
|
||||
import { LlmModule } from '@manacore/shared-llm';
|
||||
import { DatabaseModule } from './db/database.module';
|
||||
import { HealthModule } from '@manacore/shared-nestjs-health';
|
||||
import { MetricsModule } from '@manacore/shared-nestjs-metrics';
|
||||
|
|
@ -14,6 +15,14 @@ import { WateringModule } from './watering/watering.module';
|
|||
isGlobal: true,
|
||||
envFilePath: '.env',
|
||||
}),
|
||||
LlmModule.forRootAsync({
|
||||
imports: [ConfigModule],
|
||||
useFactory: (config: ConfigService) => ({
|
||||
manaLlmUrl: config.get('MANA_LLM_URL'),
|
||||
debug: config.get('NODE_ENV') === 'development',
|
||||
}),
|
||||
inject: [ConfigService],
|
||||
}),
|
||||
DatabaseModule,
|
||||
HealthModule.forRoot({ serviceName: 'planta-backend' }),
|
||||
MetricsModule.register({
|
||||
|
|
|
|||
|
|
@ -17,8 +17,9 @@
|
|||
"db:studio": "drizzle-kit studio"
|
||||
},
|
||||
"dependencies": {
|
||||
"@manacore/shared-error-tracking": "workspace:*",
|
||||
"@manacore/nestjs-integration": "workspace:*",
|
||||
"@manacore/shared-error-tracking": "workspace:*",
|
||||
"@manacore/shared-llm": "workspace:^",
|
||||
"@manacore/shared-nestjs-auth": "workspace:*",
|
||||
"@manacore/shared-nestjs-health": "workspace:*",
|
||||
"@manacore/shared-nestjs-metrics": "workspace:*",
|
||||
|
|
|
|||
|
|
@ -1,5 +1,6 @@
|
|||
import { Module } from '@nestjs/common';
|
||||
import { ConfigModule, ConfigService } from '@nestjs/config';
|
||||
import { LlmModule } from '@manacore/shared-llm';
|
||||
import { MetricsModule } from '@manacore/shared-nestjs-metrics';
|
||||
import { ManaCoreModule } from '@manacore/nestjs-integration';
|
||||
import { HealthModule } from '@manacore/shared-nestjs-health';
|
||||
|
|
@ -25,6 +26,14 @@ import { GuideModule } from './guide/guide.module';
|
|||
}),
|
||||
inject: [ConfigService],
|
||||
}),
|
||||
LlmModule.forRootAsync({
|
||||
imports: [ConfigModule],
|
||||
useFactory: (config: ConfigService) => ({
|
||||
manaLlmUrl: config.get('MANA_LLM_URL'),
|
||||
debug: config.get('NODE_ENV') === 'development',
|
||||
}),
|
||||
inject: [ConfigService],
|
||||
}),
|
||||
MetricsModule.register({
|
||||
prefix: 'traces_',
|
||||
excludePaths: ['/health'],
|
||||
|
|
|
|||
|
|
@ -2,6 +2,7 @@ import { Injectable, Inject, NotFoundException, ForbiddenException, Logger } fro
|
|||
import { ConfigService } from '@nestjs/config';
|
||||
import { eq, and, desc } from 'drizzle-orm';
|
||||
import { CreditClientService } from '@manacore/nestjs-integration';
|
||||
import { LlmClientService } from '@manacore/shared-llm';
|
||||
import { DATABASE_CONNECTION } from '../db/database.module';
|
||||
import type { Database } from '../db/connection';
|
||||
import { guides, guidePois, pois, cities } from '../db/schema';
|
||||
|
|
@ -18,7 +19,8 @@ export class GuideService {
|
|||
private readonly configService: ConfigService,
|
||||
private readonly cityService: CityService,
|
||||
private readonly poiService: PoiService,
|
||||
private readonly creditClient: CreditClientService
|
||||
private readonly creditClient: CreditClientService,
|
||||
private readonly llm: LlmClientService
|
||||
) {}
|
||||
|
||||
async generateGuide(userId: string, request: GenerateGuideRequest) {
|
||||
|
|
@ -135,35 +137,20 @@ export class GuideService {
|
|||
|
||||
// Step 3: Enrich POIs with AI summaries
|
||||
this.logger.log(`[${guideId}] Step 3: Content enrichment`);
|
||||
if (manaLlmUrl) {
|
||||
for (const poi of nearbyPois) {
|
||||
if (!poi.aiSummary) {
|
||||
try {
|
||||
const prompt =
|
||||
language === 'de'
|
||||
? `Schreibe eine 200-Wort-Zusammenfassung über "${poi.name}" in ${city.name}. Fokus auf Baugeschichte, Architekturstil und interessante Anekdoten.`
|
||||
: `Write a 200-word summary about "${poi.name}" in ${city.name}. Focus on architectural history, style, and interesting anecdotes.`;
|
||||
for (const poi of nearbyPois) {
|
||||
if (!poi.aiSummary) {
|
||||
try {
|
||||
const prompt =
|
||||
language === 'de'
|
||||
? `Schreibe eine 200-Wort-Zusammenfassung über "${poi.name}" in ${city.name}. Fokus auf Baugeschichte, Architekturstil und interessante Anekdoten.`
|
||||
: `Write a 200-word summary about "${poi.name}" in ${city.name}. Focus on architectural history, style, and interesting anecdotes.`;
|
||||
|
||||
const llmResponse = await fetch(`${manaLlmUrl}/api/v1/chat/completions`, {
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify({
|
||||
messages: [{ role: 'user', content: prompt }],
|
||||
model: 'default',
|
||||
max_tokens: 500,
|
||||
}),
|
||||
});
|
||||
|
||||
if (llmResponse.ok) {
|
||||
const data = await llmResponse.json();
|
||||
const summary = data.choices?.[0]?.message?.content;
|
||||
if (summary) {
|
||||
await this.poiService.updateAiSummary(poi.id, summary, language);
|
||||
}
|
||||
}
|
||||
} catch (err) {
|
||||
this.logger.warn(`AI summary failed for POI ${poi.name}:`, err);
|
||||
const result = await this.llm.chat(prompt, { maxTokens: 500 });
|
||||
if (result.content) {
|
||||
await this.poiService.updateAiSummary(poi.id, result.content, language);
|
||||
}
|
||||
} catch (err) {
|
||||
this.logger.warn(`AI summary failed for POI ${poi.name}:`, err);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -197,43 +184,29 @@ export class GuideService {
|
|||
const poi = sortedPois[i];
|
||||
let narrative: string | null = null;
|
||||
|
||||
if (manaLlmUrl) {
|
||||
try {
|
||||
const prevStation = i > 0 ? sortedPois[i - 1].name : 'Startpunkt';
|
||||
const distanceToPrev =
|
||||
i > 0
|
||||
? Math.round(
|
||||
this.haversineDistance(
|
||||
sortedPois[i - 1].latitude,
|
||||
sortedPois[i - 1].longitude,
|
||||
poi.latitude,
|
||||
poi.longitude
|
||||
)
|
||||
try {
|
||||
const prevStation = i > 0 ? sortedPois[i - 1].name : 'Startpunkt';
|
||||
const distanceToPrev =
|
||||
i > 0
|
||||
? Math.round(
|
||||
this.haversineDistance(
|
||||
sortedPois[i - 1].latitude,
|
||||
sortedPois[i - 1].longitude,
|
||||
poi.latitude,
|
||||
poi.longitude
|
||||
)
|
||||
: 0;
|
||||
)
|
||||
: 0;
|
||||
|
||||
const prompt =
|
||||
language === 'de'
|
||||
? `Du bist ein erfahrener Stadtführer in ${city.name}. Schreibe einen kurzen, lebendigen Stadtführer-Text (80-120 Wörter) über "${poi.name}" als Station ${i + 1} einer Stadtführung. ${i > 0 ? `Die vorherige Station war "${prevStation}" (${distanceToPrev}m entfernt).` : 'Dies ist die erste Station.'} Erwähne architektonische Details und eine interessante Anekdote.`
|
||||
: `You are an experienced city guide in ${city.name}. Write a short, vivid guide text (80-120 words) about "${poi.name}" as station ${i + 1} of a walking tour. ${i > 0 ? `The previous station was "${prevStation}" (${distanceToPrev}m away).` : 'This is the first station.'} Mention architectural details and an interesting anecdote.`;
|
||||
const prompt =
|
||||
language === 'de'
|
||||
? `Du bist ein erfahrener Stadtführer in ${city.name}. Schreibe einen kurzen, lebendigen Stadtführer-Text (80-120 Wörter) über "${poi.name}" als Station ${i + 1} einer Stadtführung. ${i > 0 ? `Die vorherige Station war "${prevStation}" (${distanceToPrev}m entfernt).` : 'Dies ist die erste Station.'} Erwähne architektonische Details und eine interessante Anekdote.`
|
||||
: `You are an experienced city guide in ${city.name}. Write a short, vivid guide text (80-120 words) about "${poi.name}" as station ${i + 1} of a walking tour. ${i > 0 ? `The previous station was "${prevStation}" (${distanceToPrev}m away).` : 'This is the first station.'} Mention architectural details and an interesting anecdote.`;
|
||||
|
||||
const llmResponse = await fetch(`${manaLlmUrl}/api/v1/chat/completions`, {
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify({
|
||||
messages: [{ role: 'user', content: prompt }],
|
||||
model: 'default',
|
||||
max_tokens: 300,
|
||||
}),
|
||||
});
|
||||
|
||||
if (llmResponse.ok) {
|
||||
const data = await llmResponse.json();
|
||||
narrative = data.choices?.[0]?.message?.content || null;
|
||||
}
|
||||
} catch (err) {
|
||||
this.logger.warn(`Narrative generation failed for POI ${poi.name}:`, err);
|
||||
}
|
||||
const result = await this.llm.chat(prompt, { maxTokens: 300 });
|
||||
narrative = result.content || null;
|
||||
} catch (err) {
|
||||
this.logger.warn(`Narrative generation failed for POI ${poi.name}:`, err);
|
||||
}
|
||||
|
||||
guidePoiRecords.push({
|
||||
|
|
|
|||
|
|
@ -71,6 +71,7 @@
|
|||
"prepublishOnly": "pnpm build"
|
||||
},
|
||||
"dependencies": {
|
||||
"@manacore/shared-llm": "workspace:^",
|
||||
"@nestjs/common": "^11.0.20",
|
||||
"@nestjs/config": "^4.0.2",
|
||||
"date-fns": "^4.1.0",
|
||||
|
|
|
|||
|
|
@ -1,4 +1,6 @@
|
|||
import { Injectable, Logger, OnModuleInit } from '@nestjs/common';
|
||||
import { LlmClient, resolveOptions } from '@manacore/shared-llm';
|
||||
import type { ModelInfo } from '@manacore/shared-llm';
|
||||
import {
|
||||
OllamaModel,
|
||||
ChatMessage,
|
||||
|
|
@ -9,23 +11,34 @@ import {
|
|||
SYSTEM_PROMPTS,
|
||||
VISION_MODELS,
|
||||
NON_CHAT_MODELS,
|
||||
OllamaVersionResponse,
|
||||
OllamaTagsResponse,
|
||||
OllamaChatResponse,
|
||||
} from './types';
|
||||
|
||||
@Injectable()
|
||||
export class AiService implements OnModuleInit {
|
||||
private readonly logger = new Logger(AiService.name);
|
||||
private readonly config: AiServiceConfig;
|
||||
private readonly llm: LlmClient;
|
||||
private sessions: Map<string, UserAiSession> = new Map();
|
||||
|
||||
constructor(config?: Partial<AiServiceConfig>) {
|
||||
this.config = {
|
||||
baseUrl: config?.baseUrl ?? process.env.OLLAMA_URL ?? 'http://localhost:11434',
|
||||
baseUrl:
|
||||
config?.baseUrl ??
|
||||
process.env.MANA_LLM_URL ??
|
||||
process.env.OLLAMA_URL ??
|
||||
'http://localhost:3025',
|
||||
defaultModel: config?.defaultModel ?? process.env.OLLAMA_MODEL ?? 'gemma3:4b',
|
||||
timeout: config?.timeout ?? parseInt(process.env.OLLAMA_TIMEOUT ?? '120000'),
|
||||
};
|
||||
|
||||
this.llm = new LlmClient(
|
||||
resolveOptions({
|
||||
manaLlmUrl: this.config.baseUrl,
|
||||
defaultModel: this.normalizeModel(this.config.defaultModel),
|
||||
timeout: this.config.timeout,
|
||||
maxRetries: 1,
|
||||
})
|
||||
);
|
||||
}
|
||||
|
||||
async onModuleInit() {
|
||||
|
|
@ -36,14 +49,15 @@ export class AiService implements OnModuleInit {
|
|||
|
||||
async checkConnection(): Promise<boolean> {
|
||||
try {
|
||||
const response = await fetch(`${this.config.baseUrl}/api/version`, {
|
||||
signal: AbortSignal.timeout(5000),
|
||||
});
|
||||
const data = (await response.json()) as OllamaVersionResponse;
|
||||
this.logger.log(`Ollama connected: v${data.version}`);
|
||||
return true;
|
||||
const health = await this.llm.health();
|
||||
const isConnected = health.status === 'healthy' || health.status === 'degraded';
|
||||
if (isConnected) {
|
||||
const providers = Object.keys(health.providers || {}).join(', ');
|
||||
this.logger.log(`mana-llm connected: ${health.status}, providers: ${providers}`);
|
||||
}
|
||||
return isConnected;
|
||||
} catch (error) {
|
||||
this.logger.error(`Failed to connect to Ollama at ${this.config.baseUrl}:`, error);
|
||||
this.logger.error(`Failed to connect to mana-llm at ${this.config.baseUrl}:`, error);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
|
@ -52,9 +66,12 @@ export class AiService implements OnModuleInit {
|
|||
|
||||
async listModels(): Promise<OllamaModel[]> {
|
||||
try {
|
||||
const response = await fetch(`${this.config.baseUrl}/api/tags`);
|
||||
const data = (await response.json()) as OllamaTagsResponse;
|
||||
return data.models || [];
|
||||
const models = await this.llm.listModels();
|
||||
return models.map((m: ModelInfo) => ({
|
||||
name: m.id,
|
||||
size: 0,
|
||||
modified_at: new Date(m.created * 1000).toISOString(),
|
||||
}));
|
||||
} catch (error) {
|
||||
this.logger.error('Failed to list models:', error);
|
||||
return [];
|
||||
|
|
@ -79,55 +96,38 @@ export class AiService implements OnModuleInit {
|
|||
|
||||
async chat(messages: ChatMessage[], options?: ChatOptions): Promise<ChatResult> {
|
||||
const model = options?.model ?? this.config.defaultModel;
|
||||
const normalizedModel = this.normalizeModel(model);
|
||||
|
||||
try {
|
||||
const response = await fetch(`${this.config.baseUrl}/api/chat`, {
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify({
|
||||
model,
|
||||
messages,
|
||||
stream: false,
|
||||
options: {
|
||||
temperature: options?.temperature,
|
||||
num_predict: options?.maxTokens,
|
||||
},
|
||||
}),
|
||||
signal: AbortSignal.timeout(this.config.timeout),
|
||||
});
|
||||
|
||||
if (!response.ok) {
|
||||
throw new Error(`Ollama API error: ${response.status}`);
|
||||
const result = await this.llm.chatMessages(
|
||||
messages.map((m) => ({
|
||||
role: m.role,
|
||||
content: m.content,
|
||||
})),
|
||||
{
|
||||
model: normalizedModel,
|
||||
temperature: options?.temperature,
|
||||
maxTokens: options?.maxTokens,
|
||||
}
|
||||
);
|
||||
|
||||
const data = (await response.json()) as OllamaChatResponse;
|
||||
const meta = {
|
||||
model,
|
||||
evalCount: result.usage.completion_tokens,
|
||||
evalDuration: undefined as number | undefined,
|
||||
tokensPerSecond: undefined as number | undefined,
|
||||
};
|
||||
|
||||
const meta = {
|
||||
model,
|
||||
evalCount: data.eval_count,
|
||||
evalDuration: data.eval_duration,
|
||||
tokensPerSecond:
|
||||
data.eval_count && data.eval_duration
|
||||
? (data.eval_count / data.eval_duration) * 1e9
|
||||
: undefined,
|
||||
};
|
||||
|
||||
if (meta.tokensPerSecond) {
|
||||
this.logger.debug(
|
||||
`Generated ${meta.evalCount} tokens at ${meta.tokensPerSecond.toFixed(1)} t/s`
|
||||
);
|
||||
}
|
||||
|
||||
return {
|
||||
content: data.message?.content || '',
|
||||
meta,
|
||||
};
|
||||
} catch (error) {
|
||||
if (error instanceof Error && error.name === 'TimeoutError') {
|
||||
throw new Error('Ollama Timeout - Antwort dauerte zu lange');
|
||||
}
|
||||
throw error;
|
||||
if (meta.evalCount && result.latencyMs > 0) {
|
||||
meta.tokensPerSecond = (meta.evalCount / result.latencyMs) * 1000;
|
||||
this.logger.debug(
|
||||
`Generated ${meta.evalCount} tokens at ${meta.tokensPerSecond.toFixed(1)} t/s`
|
||||
);
|
||||
}
|
||||
|
||||
return {
|
||||
content: result.content,
|
||||
meta,
|
||||
};
|
||||
}
|
||||
|
||||
async chatSimple(userId: string, message: string, options?: ChatOptions): Promise<string> {
|
||||
|
|
@ -162,51 +162,27 @@ export class AiService implements OnModuleInit {
|
|||
|
||||
async chatWithImage(prompt: string, imageBase64: string, model?: string): Promise<ChatResult> {
|
||||
const selectedModel = model ?? this.config.defaultModel;
|
||||
const normalizedModel = this.normalizeModel(selectedModel);
|
||||
|
||||
try {
|
||||
const response = await fetch(`${this.config.baseUrl}/api/chat`, {
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify({
|
||||
model: selectedModel,
|
||||
messages: [
|
||||
{
|
||||
role: 'user',
|
||||
content: prompt,
|
||||
images: [imageBase64],
|
||||
},
|
||||
],
|
||||
stream: false,
|
||||
}),
|
||||
signal: AbortSignal.timeout(this.config.timeout),
|
||||
});
|
||||
const result = await this.llm.vision(prompt, imageBase64, 'image/png', {
|
||||
model: normalizedModel,
|
||||
});
|
||||
|
||||
if (!response.ok) {
|
||||
throw new Error(`Ollama API error: ${response.status}`);
|
||||
}
|
||||
const meta = {
|
||||
model: selectedModel,
|
||||
evalCount: result.usage.completion_tokens,
|
||||
evalDuration: undefined as number | undefined,
|
||||
tokensPerSecond: undefined as number | undefined,
|
||||
};
|
||||
|
||||
const data = (await response.json()) as OllamaChatResponse;
|
||||
|
||||
const meta = {
|
||||
model: selectedModel,
|
||||
evalCount: data.eval_count,
|
||||
evalDuration: data.eval_duration,
|
||||
tokensPerSecond:
|
||||
data.eval_count && data.eval_duration
|
||||
? (data.eval_count / data.eval_duration) * 1e9
|
||||
: undefined,
|
||||
};
|
||||
|
||||
return {
|
||||
content: data.message?.content || '',
|
||||
meta,
|
||||
};
|
||||
} catch (error) {
|
||||
if (error instanceof Error && error.name === 'TimeoutError') {
|
||||
throw new Error('Ollama Timeout - Bildanalyse dauerte zu lange');
|
||||
}
|
||||
throw error;
|
||||
if (meta.evalCount && result.latencyMs > 0) {
|
||||
meta.tokensPerSecond = (meta.evalCount / result.latencyMs) * 1000;
|
||||
}
|
||||
|
||||
return {
|
||||
content: result.content,
|
||||
meta,
|
||||
};
|
||||
}
|
||||
|
||||
// ===== Compare Models =====
|
||||
|
|
@ -256,7 +232,7 @@ export class AiService implements OnModuleInit {
|
|||
setSessionModel(userId: string, model: string): void {
|
||||
const session = this.getSession(userId);
|
||||
session.model = model;
|
||||
session.history = []; // Clear history when switching models
|
||||
session.history = [];
|
||||
}
|
||||
|
||||
setSessionSystemPrompt(userId: string, prompt: string): void {
|
||||
|
|
@ -303,4 +279,9 @@ export class AiService implements OnModuleInit {
|
|||
const entry = Object.entries(SYSTEM_PROMPTS).find(([_, v]) => v === session.systemPrompt);
|
||||
return entry ? entry[0] : 'custom';
|
||||
}
|
||||
|
||||
private normalizeModel(model: string): string {
|
||||
if (model.includes('/')) return model;
|
||||
return `ollama/${model}`;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -44,6 +44,18 @@ export enum CreditOperationType {
|
|||
// Zitare - AI explanations
|
||||
AI_QUOTE_EXPLANATION = 'ai_quote_explanation',
|
||||
|
||||
// Planta - Plant analysis
|
||||
AI_PLANT_ANALYSIS = 'ai_plant_analysis',
|
||||
|
||||
// Traces - City guide generation
|
||||
AI_GUIDE_GENERATION = 'ai_guide_generation',
|
||||
|
||||
// Context - AI text generation
|
||||
AI_CONTEXT_GENERATION = 'ai_context_generation',
|
||||
|
||||
// Matrix Bots - Bot chat
|
||||
AI_BOT_CHAT = 'ai_bot_chat',
|
||||
|
||||
// General AI features
|
||||
AI_SMART_SCHEDULING = 'ai_smart_scheduling',
|
||||
AI_SUGGESTIONS = 'ai_suggestions',
|
||||
|
|
@ -117,6 +129,11 @@ export const CREDIT_COSTS: Record<CreditOperationType, number> = {
|
|||
|
||||
[CreditOperationType.AI_QUOTE_EXPLANATION]: 2,
|
||||
|
||||
[CreditOperationType.AI_PLANT_ANALYSIS]: 2,
|
||||
[CreditOperationType.AI_GUIDE_GENERATION]: 5,
|
||||
[CreditOperationType.AI_CONTEXT_GENERATION]: 2,
|
||||
[CreditOperationType.AI_BOT_CHAT]: 0.1,
|
||||
|
||||
[CreditOperationType.AI_SMART_SCHEDULING]: 2,
|
||||
[CreditOperationType.AI_SUGGESTIONS]: 2,
|
||||
[CreditOperationType.AI_ENRICHMENT]: 2,
|
||||
|
|
@ -271,6 +288,38 @@ export const OPERATION_METADATA: Record<CreditOperationType, OperationMetadata>
|
|||
app: 'zitare',
|
||||
},
|
||||
|
||||
// Planta
|
||||
[CreditOperationType.AI_PLANT_ANALYSIS]: {
|
||||
name: 'Plant Analysis',
|
||||
description: 'Identify and analyze a plant from a photo',
|
||||
category: CreditCategory.AI,
|
||||
app: 'planta',
|
||||
},
|
||||
|
||||
// Traces
|
||||
[CreditOperationType.AI_GUIDE_GENERATION]: {
|
||||
name: 'City Guide Generation',
|
||||
description: 'Generate an AI-powered city walking guide',
|
||||
category: CreditCategory.AI,
|
||||
app: 'traces',
|
||||
},
|
||||
|
||||
// Context
|
||||
[CreditOperationType.AI_CONTEXT_GENERATION]: {
|
||||
name: 'AI Text Generation',
|
||||
description: 'Generate or transform text with AI',
|
||||
category: CreditCategory.AI,
|
||||
app: 'context',
|
||||
},
|
||||
|
||||
// Matrix Bots
|
||||
[CreditOperationType.AI_BOT_CHAT]: {
|
||||
name: 'Bot Chat Message',
|
||||
description: 'Chat with AI via Matrix bot',
|
||||
category: CreditCategory.AI,
|
||||
app: 'matrix',
|
||||
},
|
||||
|
||||
// General AI
|
||||
[CreditOperationType.AI_SMART_SCHEDULING]: {
|
||||
name: 'Smart Scheduling',
|
||||
|
|
|
|||
47
packages/shared-llm/package.json
Normal file
47
packages/shared-llm/package.json
Normal file
|
|
@ -0,0 +1,47 @@
|
|||
{
|
||||
"name": "@manacore/shared-llm",
|
||||
"version": "1.0.0",
|
||||
"private": true,
|
||||
"description": "Unified LLM client for all Mana backends via mana-llm service",
|
||||
"main": "dist/index.js",
|
||||
"types": "dist/index.d.ts",
|
||||
"exports": {
|
||||
".": {
|
||||
"types": "./dist/index.d.ts",
|
||||
"import": "./dist/index.js",
|
||||
"require": "./dist/index.js"
|
||||
},
|
||||
"./standalone": {
|
||||
"types": "./dist/standalone.d.ts",
|
||||
"import": "./dist/standalone.js",
|
||||
"require": "./dist/standalone.js"
|
||||
}
|
||||
},
|
||||
"scripts": {
|
||||
"build": "tsc",
|
||||
"dev": "tsc --watch",
|
||||
"clean": "rm -rf dist",
|
||||
"type-check": "tsc --noEmit",
|
||||
"test": "vitest run"
|
||||
},
|
||||
"dependencies": {
|
||||
"@nestjs/common": "^10.0.0 || ^11.0.0",
|
||||
"@nestjs/config": "^3.0.0 || ^4.0.0",
|
||||
"@nestjs/core": "^10.0.0 || ^11.0.0",
|
||||
"reflect-metadata": "^0.1.13 || ^0.2.0",
|
||||
"rxjs": "^7.0.0"
|
||||
},
|
||||
"peerDependencies": {
|
||||
"@nestjs/common": "^10.0.0 || ^11.0.0",
|
||||
"@nestjs/config": "^3.0.0 || ^4.0.0",
|
||||
"@nestjs/core": "^10.0.0 || ^11.0.0"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@types/node": "^20.0.0",
|
||||
"typescript": "^5.0.0",
|
||||
"vitest": "^2.0.0"
|
||||
},
|
||||
"files": [
|
||||
"dist"
|
||||
]
|
||||
}
|
||||
119
packages/shared-llm/src/__tests__/json-extractor.spec.ts
Normal file
119
packages/shared-llm/src/__tests__/json-extractor.spec.ts
Normal file
|
|
@ -0,0 +1,119 @@
|
|||
import { describe, it, expect } from 'vitest';
|
||||
import { extractJson } from '../utils/json-extractor';
|
||||
|
||||
describe('extractJson', () => {
|
||||
it('parses direct JSON object', () => {
|
||||
const result = extractJson('{"name": "test", "value": 42}');
|
||||
expect(result).toEqual({ name: 'test', value: 42 });
|
||||
});
|
||||
|
||||
it('parses direct JSON array', () => {
|
||||
const result = extractJson('[1, 2, 3]');
|
||||
expect(result).toEqual([1, 2, 3]);
|
||||
});
|
||||
|
||||
it('strips markdown json code fence', () => {
|
||||
const input = '```json\n{"category": "bug", "title": "Fix login"}\n```';
|
||||
const result = extractJson(input);
|
||||
expect(result).toEqual({ category: 'bug', title: 'Fix login' });
|
||||
});
|
||||
|
||||
it('strips markdown code fence without json label', () => {
|
||||
const input = '```\n{"key": "value"}\n```';
|
||||
const result = extractJson(input);
|
||||
expect(result).toEqual({ key: 'value' });
|
||||
});
|
||||
|
||||
it('extracts JSON from surrounding text', () => {
|
||||
const input =
|
||||
'Here is the analysis:\n{"confidence": 0.95, "species": "Rose"}\nHope this helps!';
|
||||
const result = extractJson(input);
|
||||
expect(result).toEqual({ confidence: 0.95, species: 'Rose' });
|
||||
});
|
||||
|
||||
it('extracts JSON array from surrounding text', () => {
|
||||
const input = 'The items are: [1, 2, 3] as requested.';
|
||||
const result = extractJson(input);
|
||||
expect(result).toEqual([1, 2, 3]);
|
||||
});
|
||||
|
||||
it('handles nested JSON objects', () => {
|
||||
const input = '{"outer": {"inner": {"deep": true}}, "list": [1, 2]}';
|
||||
const result = extractJson(input);
|
||||
expect(result).toEqual({ outer: { inner: { deep: true } }, list: [1, 2] });
|
||||
});
|
||||
|
||||
it('handles JSON with escaped quotes in strings', () => {
|
||||
const input = '{"text": "He said \\"hello\\""}';
|
||||
const result = extractJson(input);
|
||||
expect(result).toEqual({ text: 'He said "hello"' });
|
||||
});
|
||||
|
||||
it('handles JSON with braces inside strings', () => {
|
||||
const input = 'Result: {"code": "if (x) { return }"}';
|
||||
const result = extractJson(input);
|
||||
expect(result).toEqual({ code: 'if (x) { return }' });
|
||||
});
|
||||
|
||||
it('trims whitespace before parsing', () => {
|
||||
const input = ' \n {"key": "value"} \n ';
|
||||
const result = extractJson(input);
|
||||
expect(result).toEqual({ key: 'value' });
|
||||
});
|
||||
|
||||
it('applies validation function on success', () => {
|
||||
const validate = (data: unknown) => {
|
||||
const obj = data as { name: string };
|
||||
if (!obj.name) throw new Error('missing name');
|
||||
return obj;
|
||||
};
|
||||
const result = extractJson('{"name": "test"}', validate);
|
||||
expect(result).toEqual({ name: 'test' });
|
||||
});
|
||||
|
||||
it('throws when validation fails', () => {
|
||||
const validate = (data: unknown) => {
|
||||
const obj = data as { name?: string };
|
||||
if (!obj.name) throw new Error('missing name');
|
||||
return obj;
|
||||
};
|
||||
expect(() => extractJson('{"value": 123}', validate)).toThrow();
|
||||
});
|
||||
|
||||
it('throws on completely invalid input', () => {
|
||||
expect(() => extractJson('This is just plain text with no JSON')).toThrow(
|
||||
'Failed to extract JSON'
|
||||
);
|
||||
});
|
||||
|
||||
it('throws on empty input', () => {
|
||||
expect(() => extractJson('')).toThrow('Failed to extract JSON');
|
||||
});
|
||||
|
||||
it('handles real-world LLM response with preamble', () => {
|
||||
const input = `Based on my analysis, here is the result:
|
||||
|
||||
\`\`\`json
|
||||
{
|
||||
"foods": [
|
||||
{"name": "Apple", "calories": 95, "protein": 0.5}
|
||||
],
|
||||
"totalCalories": 95,
|
||||
"confidence": 0.9
|
||||
}
|
||||
\`\`\`
|
||||
|
||||
This analysis is based on the image provided.`;
|
||||
|
||||
const result = extractJson<{ foods: unknown[]; totalCalories: number }>(input);
|
||||
expect(result.totalCalories).toBe(95);
|
||||
expect(result.foods).toHaveLength(1);
|
||||
});
|
||||
|
||||
it('prefers object over array when both exist', () => {
|
||||
// Direct parse fails, fence fails, tries object first
|
||||
const input = 'Some text {"key": "val"} and [1, 2, 3]';
|
||||
const result = extractJson(input);
|
||||
expect(result).toEqual({ key: 'val' });
|
||||
});
|
||||
});
|
||||
277
packages/shared-llm/src/__tests__/llm-client.spec.ts
Normal file
277
packages/shared-llm/src/__tests__/llm-client.spec.ts
Normal file
|
|
@ -0,0 +1,277 @@
|
|||
import { describe, it, expect, vi, beforeEach } from 'vitest';
|
||||
import { LlmClient } from '../llm-client';
|
||||
import type { ResolvedLlmOptions } from '../interfaces/llm-options.interface';
|
||||
import type { ChatCompletionResponse } from '../types/openai-compat.types';
|
||||
|
||||
const mockFetch = vi.fn();
|
||||
vi.stubGlobal('fetch', mockFetch);
|
||||
|
||||
const DEFAULT_OPTIONS: ResolvedLlmOptions = {
|
||||
manaLlmUrl: 'http://localhost:3025',
|
||||
defaultModel: 'ollama/gemma3:4b',
|
||||
defaultVisionModel: 'ollama/llava:7b',
|
||||
timeout: 30_000,
|
||||
maxRetries: 0, // No retries in tests for simplicity
|
||||
debug: false,
|
||||
};
|
||||
|
||||
function mockCompletionResponse(
|
||||
content: string,
|
||||
model = 'ollama/gemma3:4b'
|
||||
): ChatCompletionResponse {
|
||||
return {
|
||||
id: 'chatcmpl-test123',
|
||||
object: 'chat.completion',
|
||||
created: Date.now(),
|
||||
model,
|
||||
choices: [{ index: 0, message: { role: 'assistant', content }, finish_reason: 'stop' }],
|
||||
usage: { prompt_tokens: 10, completion_tokens: 20, total_tokens: 30 },
|
||||
};
|
||||
}
|
||||
|
||||
function mockFetchOk(body: unknown): void {
|
||||
mockFetch.mockResolvedValueOnce({
|
||||
ok: true,
|
||||
status: 200,
|
||||
json: () => Promise.resolve(body),
|
||||
text: () => Promise.resolve(JSON.stringify(body)),
|
||||
} as unknown as Response);
|
||||
}
|
||||
|
||||
function mockFetchError(status: number, body = ''): void {
|
||||
mockFetch.mockResolvedValueOnce({
|
||||
ok: false,
|
||||
status,
|
||||
statusText: `Error ${status}`,
|
||||
json: () => Promise.resolve({}),
|
||||
text: () => Promise.resolve(body),
|
||||
} as unknown as Response);
|
||||
}
|
||||
|
||||
describe('LlmClient', () => {
|
||||
let client: LlmClient;
|
||||
|
||||
beforeEach(() => {
|
||||
vi.clearAllMocks();
|
||||
client = new LlmClient(DEFAULT_OPTIONS);
|
||||
});
|
||||
|
||||
describe('chat', () => {
|
||||
it('sends correct request body', async () => {
|
||||
mockFetchOk(mockCompletionResponse('Hello!'));
|
||||
|
||||
await client.chat('Hi there');
|
||||
|
||||
expect(mockFetch).toHaveBeenCalledTimes(1);
|
||||
const [url, init] = mockFetch.mock.calls[0];
|
||||
expect(url).toBe('http://localhost:3025/v1/chat/completions');
|
||||
|
||||
const body = JSON.parse(init.body);
|
||||
expect(body.model).toBe('ollama/gemma3:4b');
|
||||
expect(body.messages).toEqual([{ role: 'user', content: 'Hi there' }]);
|
||||
expect(body.stream).toBe(false);
|
||||
});
|
||||
|
||||
it('includes system prompt when provided', async () => {
|
||||
mockFetchOk(mockCompletionResponse('Response'));
|
||||
|
||||
await client.chat('Question', { systemPrompt: 'You are helpful.' });
|
||||
|
||||
const body = JSON.parse(mockFetch.mock.calls[0][1].body);
|
||||
expect(body.messages).toEqual([
|
||||
{ role: 'system', content: 'You are helpful.' },
|
||||
{ role: 'user', content: 'Question' },
|
||||
]);
|
||||
});
|
||||
|
||||
it('uses custom model and temperature', async () => {
|
||||
mockFetchOk(mockCompletionResponse('Response'));
|
||||
|
||||
await client.chat('Prompt', { model: 'openrouter/gpt-4o', temperature: 0.3 });
|
||||
|
||||
const body = JSON.parse(mockFetch.mock.calls[0][1].body);
|
||||
expect(body.model).toBe('openrouter/gpt-4o');
|
||||
expect(body.temperature).toBe(0.3);
|
||||
});
|
||||
|
||||
it('returns ChatResult with content and usage', async () => {
|
||||
mockFetchOk(mockCompletionResponse('Generated text'));
|
||||
|
||||
const result = await client.chat('Prompt');
|
||||
|
||||
expect(result.content).toBe('Generated text');
|
||||
expect(result.model).toBe('ollama/gemma3:4b');
|
||||
expect(result.usage.total_tokens).toBe(30);
|
||||
expect(result.latencyMs).toBeGreaterThanOrEqual(0);
|
||||
});
|
||||
|
||||
it('throws on error response', async () => {
|
||||
mockFetchError(500, 'Internal Server Error');
|
||||
|
||||
await expect(client.chat('Prompt')).rejects.toThrow('mana-llm error 500');
|
||||
});
|
||||
});
|
||||
|
||||
describe('json', () => {
|
||||
it('extracts JSON from response', async () => {
|
||||
mockFetchOk(mockCompletionResponse('{"category": "bug", "title": "Fix it"}'));
|
||||
|
||||
const result = await client.json<{ category: string; title: string }>('Analyze this');
|
||||
|
||||
expect(result.data).toEqual({ category: 'bug', title: 'Fix it' });
|
||||
expect(result.content).toBe('{"category": "bug", "title": "Fix it"}');
|
||||
});
|
||||
|
||||
it('extracts JSON from markdown-wrapped response', async () => {
|
||||
mockFetchOk(mockCompletionResponse('```json\n{"key": "value"}\n```'));
|
||||
|
||||
const result = await client.json('Parse this');
|
||||
expect(result.data).toEqual({ key: 'value' });
|
||||
});
|
||||
|
||||
it('applies validation function', async () => {
|
||||
mockFetchOk(mockCompletionResponse('{"name": "test"}'));
|
||||
|
||||
const validate = (data: unknown) => {
|
||||
const obj = data as { name: string };
|
||||
if (typeof obj.name !== 'string') throw new Error('invalid');
|
||||
return obj;
|
||||
};
|
||||
|
||||
const result = await client.json('Prompt', { validate });
|
||||
expect(result.data.name).toBe('test');
|
||||
});
|
||||
|
||||
it('retries JSON extraction on parse failure', async () => {
|
||||
// First attempt returns bad JSON, second returns good
|
||||
mockFetchOk(mockCompletionResponse('not json at all'));
|
||||
mockFetchOk(mockCompletionResponse('{"valid": true}'));
|
||||
|
||||
const result = await client.json('Prompt', { jsonRetries: 1 });
|
||||
expect(result.data).toEqual({ valid: true });
|
||||
expect(mockFetch).toHaveBeenCalledTimes(2);
|
||||
});
|
||||
});
|
||||
|
||||
describe('vision', () => {
|
||||
it('builds multimodal message with base64 image', async () => {
|
||||
mockFetchOk(mockCompletionResponse('A rose'));
|
||||
|
||||
await client.vision('What is this?', 'abc123base64', 'image/jpeg');
|
||||
|
||||
const body = JSON.parse(mockFetch.mock.calls[0][1].body);
|
||||
expect(body.model).toBe('ollama/llava:7b');
|
||||
expect(body.messages[0].content).toEqual([
|
||||
{ type: 'text', text: 'What is this?' },
|
||||
{ type: 'image_url', image_url: { url: 'data:image/jpeg;base64,abc123base64' } },
|
||||
]);
|
||||
});
|
||||
|
||||
it('uses data URL as-is if already formatted', async () => {
|
||||
mockFetchOk(mockCompletionResponse('A cat'));
|
||||
|
||||
await client.vision('What?', 'data:image/png;base64,xyz');
|
||||
|
||||
const body = JSON.parse(mockFetch.mock.calls[0][1].body);
|
||||
const imageUrl = body.messages[0].content[1].image_url.url;
|
||||
expect(imageUrl).toBe('data:image/png;base64,xyz');
|
||||
});
|
||||
|
||||
it('uses custom vision model when specified', async () => {
|
||||
mockFetchOk(mockCompletionResponse('Result'));
|
||||
|
||||
await client.vision('Prompt', 'img', 'image/jpeg', {
|
||||
visionModel: 'ollama/qwen3-vl:4b',
|
||||
});
|
||||
|
||||
const body = JSON.parse(mockFetch.mock.calls[0][1].body);
|
||||
expect(body.model).toBe('ollama/qwen3-vl:4b');
|
||||
});
|
||||
});
|
||||
|
||||
describe('visionJson', () => {
|
||||
it('extracts JSON from vision response', async () => {
|
||||
mockFetchOk(mockCompletionResponse('```json\n{"species": "Rose", "confidence": 0.95}\n```'));
|
||||
|
||||
const result = await client.visionJson<{ species: string }>(
|
||||
'Identify plant',
|
||||
'imgdata',
|
||||
'image/jpeg'
|
||||
);
|
||||
|
||||
expect(result.data.species).toBe('Rose');
|
||||
});
|
||||
});
|
||||
|
||||
describe('health', () => {
|
||||
it('returns health status', async () => {
|
||||
mockFetch.mockResolvedValueOnce({
|
||||
ok: true,
|
||||
status: 200,
|
||||
json: () =>
|
||||
Promise.resolve({
|
||||
status: 'healthy',
|
||||
providers: { ollama: { status: 'healthy' } },
|
||||
}),
|
||||
} as unknown as Response);
|
||||
|
||||
const health = await client.health();
|
||||
expect(health.status).toBe('healthy');
|
||||
});
|
||||
|
||||
it('returns unhealthy on network error', async () => {
|
||||
mockFetch.mockRejectedValueOnce(new Error('ECONNREFUSED'));
|
||||
|
||||
const health = await client.health();
|
||||
expect(health.status).toBe('unhealthy');
|
||||
});
|
||||
});
|
||||
|
||||
describe('listModels', () => {
|
||||
it('returns model list', async () => {
|
||||
mockFetch.mockResolvedValueOnce({
|
||||
ok: true,
|
||||
status: 200,
|
||||
json: () =>
|
||||
Promise.resolve({
|
||||
data: [{ id: 'ollama/gemma3:4b', object: 'model', created: 0, owned_by: 'ollama' }],
|
||||
}),
|
||||
} as unknown as Response);
|
||||
|
||||
const models = await client.listModels();
|
||||
expect(models).toHaveLength(1);
|
||||
expect(models[0].id).toBe('ollama/gemma3:4b');
|
||||
});
|
||||
});
|
||||
|
||||
describe('chatMessages', () => {
|
||||
it('sends full message history', async () => {
|
||||
mockFetchOk(mockCompletionResponse('Answer'));
|
||||
|
||||
await client.chatMessages([
|
||||
{ role: 'system', content: 'Be brief.' },
|
||||
{ role: 'user', content: 'Hello' },
|
||||
{ role: 'assistant', content: 'Hi!' },
|
||||
{ role: 'user', content: 'How are you?' },
|
||||
]);
|
||||
|
||||
const body = JSON.parse(mockFetch.mock.calls[0][1].body);
|
||||
expect(body.messages).toHaveLength(4);
|
||||
});
|
||||
});
|
||||
|
||||
describe('embed', () => {
|
||||
it('sends embedding request', async () => {
|
||||
mockFetchOk({
|
||||
object: 'list',
|
||||
data: [{ object: 'embedding', index: 0, embedding: [0.1, 0.2, 0.3] }],
|
||||
model: 'ollama/gemma3:4b',
|
||||
usage: { prompt_tokens: 5, completion_tokens: 0, total_tokens: 5 },
|
||||
});
|
||||
|
||||
const result = await client.embed('Hello world');
|
||||
expect(result.embeddings).toHaveLength(1);
|
||||
expect(result.embeddings[0]).toEqual([0.1, 0.2, 0.3]);
|
||||
});
|
||||
});
|
||||
});
|
||||
118
packages/shared-llm/src/__tests__/retry.spec.ts
Normal file
118
packages/shared-llm/src/__tests__/retry.spec.ts
Normal file
|
|
@ -0,0 +1,118 @@
|
|||
import { describe, it, expect, vi, beforeEach } from 'vitest';
|
||||
import { retryFetch } from '../utils/retry';
|
||||
|
||||
// Mock global fetch
|
||||
const mockFetch = vi.fn();
|
||||
vi.stubGlobal('fetch', mockFetch);
|
||||
|
||||
function mockResponse(status: number, body = ''): Response {
|
||||
return {
|
||||
ok: status >= 200 && status < 300,
|
||||
status,
|
||||
statusText: `Status ${status}`,
|
||||
text: () => Promise.resolve(body),
|
||||
json: () => Promise.resolve(JSON.parse(body || '{}')),
|
||||
headers: new Headers(),
|
||||
} as unknown as Response;
|
||||
}
|
||||
|
||||
describe('retryFetch', () => {
|
||||
beforeEach(() => {
|
||||
vi.clearAllMocks();
|
||||
});
|
||||
|
||||
it('returns on first successful attempt', async () => {
|
||||
mockFetch.mockResolvedValueOnce(mockResponse(200, '{"ok": true}'));
|
||||
|
||||
const response = await retryFetch('http://test', {}, { maxRetries: 2, baseDelay: 10 });
|
||||
expect(response.ok).toBe(true);
|
||||
expect(mockFetch).toHaveBeenCalledTimes(1);
|
||||
});
|
||||
|
||||
it('retries on 503 and succeeds', async () => {
|
||||
mockFetch
|
||||
.mockResolvedValueOnce(mockResponse(503))
|
||||
.mockResolvedValueOnce(mockResponse(200, '{}'));
|
||||
|
||||
const response = await retryFetch('http://test', {}, { maxRetries: 2, baseDelay: 10 });
|
||||
expect(response.ok).toBe(true);
|
||||
expect(mockFetch).toHaveBeenCalledTimes(2);
|
||||
});
|
||||
|
||||
it('retries on 429 rate limit', async () => {
|
||||
mockFetch
|
||||
.mockResolvedValueOnce(mockResponse(429))
|
||||
.mockResolvedValueOnce(mockResponse(200, '{}'));
|
||||
|
||||
const response = await retryFetch('http://test', {}, { maxRetries: 2, baseDelay: 10 });
|
||||
expect(response.ok).toBe(true);
|
||||
expect(mockFetch).toHaveBeenCalledTimes(2);
|
||||
});
|
||||
|
||||
it('retries on network error and succeeds', async () => {
|
||||
mockFetch
|
||||
.mockRejectedValueOnce(new Error('ECONNREFUSED'))
|
||||
.mockResolvedValueOnce(mockResponse(200, '{}'));
|
||||
|
||||
const response = await retryFetch('http://test', {}, { maxRetries: 2, baseDelay: 10 });
|
||||
expect(response.ok).toBe(true);
|
||||
expect(mockFetch).toHaveBeenCalledTimes(2);
|
||||
});
|
||||
|
||||
it('does NOT retry on 400 client error', async () => {
|
||||
mockFetch.mockResolvedValueOnce(mockResponse(400, 'Bad Request'));
|
||||
|
||||
const response = await retryFetch('http://test', {}, { maxRetries: 2, baseDelay: 10 });
|
||||
expect(response.status).toBe(400);
|
||||
expect(mockFetch).toHaveBeenCalledTimes(1);
|
||||
});
|
||||
|
||||
it('does NOT retry on 401 unauthorized', async () => {
|
||||
mockFetch.mockResolvedValueOnce(mockResponse(401));
|
||||
|
||||
const response = await retryFetch('http://test', {}, { maxRetries: 2, baseDelay: 10 });
|
||||
expect(response.status).toBe(401);
|
||||
expect(mockFetch).toHaveBeenCalledTimes(1);
|
||||
});
|
||||
|
||||
it('does NOT retry on 404 not found', async () => {
|
||||
mockFetch.mockResolvedValueOnce(mockResponse(404));
|
||||
|
||||
const response = await retryFetch('http://test', {}, { maxRetries: 2, baseDelay: 10 });
|
||||
expect(response.status).toBe(404);
|
||||
expect(mockFetch).toHaveBeenCalledTimes(1);
|
||||
});
|
||||
|
||||
it('throws after exhausting all retries', async () => {
|
||||
mockFetch
|
||||
.mockResolvedValueOnce(mockResponse(503))
|
||||
.mockResolvedValueOnce(mockResponse(503))
|
||||
.mockResolvedValueOnce(mockResponse(503));
|
||||
|
||||
await expect(retryFetch('http://test', {}, { maxRetries: 2, baseDelay: 10 })).rejects.toThrow(
|
||||
'HTTP 503'
|
||||
);
|
||||
|
||||
expect(mockFetch).toHaveBeenCalledTimes(3); // 1 initial + 2 retries
|
||||
});
|
||||
|
||||
it('throws after exhausting retries on network errors', async () => {
|
||||
mockFetch
|
||||
.mockRejectedValueOnce(new Error('ECONNREFUSED'))
|
||||
.mockRejectedValueOnce(new Error('ECONNREFUSED'));
|
||||
|
||||
await expect(retryFetch('http://test', {}, { maxRetries: 1, baseDelay: 10 })).rejects.toThrow(
|
||||
'ECONNREFUSED'
|
||||
);
|
||||
|
||||
expect(mockFetch).toHaveBeenCalledTimes(2);
|
||||
});
|
||||
|
||||
it('works with maxRetries: 0 (no retries)', async () => {
|
||||
mockFetch.mockResolvedValueOnce(mockResponse(503));
|
||||
|
||||
await expect(retryFetch('http://test', {}, { maxRetries: 0, baseDelay: 10 })).rejects.toThrow();
|
||||
|
||||
expect(mockFetch).toHaveBeenCalledTimes(1);
|
||||
});
|
||||
});
|
||||
35
packages/shared-llm/src/index.ts
Normal file
35
packages/shared-llm/src/index.ts
Normal file
|
|
@ -0,0 +1,35 @@
|
|||
// Module
|
||||
export { LlmModule } from './llm.module';
|
||||
export { LlmClientService } from './llm-client.service';
|
||||
export { LLM_MODULE_OPTIONS } from './llm.constants';
|
||||
|
||||
// Core client (for advanced use cases)
|
||||
export { LlmClient } from './llm-client';
|
||||
|
||||
// Interfaces
|
||||
export type {
|
||||
LlmModuleOptions,
|
||||
LlmModuleAsyncOptions,
|
||||
LlmOptionsFactory,
|
||||
ResolvedLlmOptions,
|
||||
} from './interfaces';
|
||||
export { resolveOptions } from './interfaces';
|
||||
|
||||
// Types
|
||||
export type {
|
||||
ChatMessage,
|
||||
ContentPart,
|
||||
TextContentPart,
|
||||
ImageContentPart,
|
||||
ChatOptions,
|
||||
JsonOptions,
|
||||
VisionOptions,
|
||||
TokenUsage,
|
||||
ChatResult,
|
||||
JsonResult,
|
||||
ModelInfo,
|
||||
HealthStatus,
|
||||
} from './types';
|
||||
|
||||
// Utilities
|
||||
export { extractJson } from './utils';
|
||||
8
packages/shared-llm/src/interfaces/index.ts
Normal file
8
packages/shared-llm/src/interfaces/index.ts
Normal file
|
|
@ -0,0 +1,8 @@
|
|||
export type {
|
||||
LlmModuleOptions,
|
||||
LlmModuleAsyncOptions,
|
||||
LlmOptionsFactory,
|
||||
ResolvedLlmOptions,
|
||||
} from './llm-options.interface';
|
||||
|
||||
export { resolveOptions } from './llm-options.interface';
|
||||
47
packages/shared-llm/src/interfaces/llm-options.interface.ts
Normal file
47
packages/shared-llm/src/interfaces/llm-options.interface.ts
Normal file
|
|
@ -0,0 +1,47 @@
|
|||
import type { ModuleMetadata, Type } from '@nestjs/common';
|
||||
|
||||
export interface LlmModuleOptions {
|
||||
/** mana-llm service URL (default: http://localhost:3025) */
|
||||
manaLlmUrl?: string;
|
||||
/** Default text model (default: ollama/gemma3:4b) */
|
||||
defaultModel?: string;
|
||||
/** Default vision model (default: ollama/llava:7b) */
|
||||
defaultVisionModel?: string;
|
||||
/** Request timeout in ms (default: 120000) */
|
||||
timeout?: number;
|
||||
/** Max retries on transient failures (default: 2) */
|
||||
maxRetries?: number;
|
||||
/** Enable debug logging (default: false) */
|
||||
debug?: boolean;
|
||||
}
|
||||
|
||||
export interface LlmModuleAsyncOptions extends Pick<ModuleMetadata, 'imports'> {
|
||||
useExisting?: Type<LlmOptionsFactory>;
|
||||
useClass?: Type<LlmOptionsFactory>;
|
||||
useFactory?: (...args: any[]) => Promise<LlmModuleOptions> | LlmModuleOptions;
|
||||
inject?: any[];
|
||||
}
|
||||
|
||||
export interface LlmOptionsFactory {
|
||||
createLlmOptions(): Promise<LlmModuleOptions> | LlmModuleOptions;
|
||||
}
|
||||
|
||||
export interface ResolvedLlmOptions {
|
||||
manaLlmUrl: string;
|
||||
defaultModel: string;
|
||||
defaultVisionModel: string;
|
||||
timeout: number;
|
||||
maxRetries: number;
|
||||
debug: boolean;
|
||||
}
|
||||
|
||||
export function resolveOptions(options: LlmModuleOptions): ResolvedLlmOptions {
|
||||
return {
|
||||
manaLlmUrl: options.manaLlmUrl ?? 'http://localhost:3025',
|
||||
defaultModel: options.defaultModel ?? 'ollama/gemma3:4b',
|
||||
defaultVisionModel: options.defaultVisionModel ?? 'ollama/llava:7b',
|
||||
timeout: options.timeout ?? 120_000,
|
||||
maxRetries: options.maxRetries ?? 2,
|
||||
debug: options.debug ?? false,
|
||||
};
|
||||
}
|
||||
16
packages/shared-llm/src/llm-client.service.ts
Normal file
16
packages/shared-llm/src/llm-client.service.ts
Normal file
|
|
@ -0,0 +1,16 @@
|
|||
import { Inject, Injectable } from '@nestjs/common';
|
||||
import { LlmClient } from './llm-client';
|
||||
import { LLM_MODULE_OPTIONS } from './llm.constants';
|
||||
import type { LlmModuleOptions } from './interfaces/llm-options.interface';
|
||||
import { resolveOptions } from './interfaces/llm-options.interface';
|
||||
|
||||
/**
|
||||
* NestJS injectable wrapper around LlmClient.
|
||||
* All logic lives in the framework-agnostic LlmClient base class.
|
||||
*/
|
||||
@Injectable()
|
||||
export class LlmClientService extends LlmClient {
|
||||
constructor(@Inject(LLM_MODULE_OPTIONS) options: LlmModuleOptions) {
|
||||
super(resolveOptions(options));
|
||||
}
|
||||
}
|
||||
350
packages/shared-llm/src/llm-client.ts
Normal file
350
packages/shared-llm/src/llm-client.ts
Normal file
|
|
@ -0,0 +1,350 @@
|
|||
/**
|
||||
* Framework-agnostic LLM client that communicates with the mana-llm service.
|
||||
*
|
||||
* This is the core implementation shared between the NestJS LlmClientService
|
||||
* and the standalone LlmClient export (for non-NestJS consumers like bot-services).
|
||||
*/
|
||||
|
||||
import type { ResolvedLlmOptions } from './interfaces/llm-options.interface';
|
||||
import type {
|
||||
ChatMessage,
|
||||
ChatOptions,
|
||||
ChatResult,
|
||||
JsonOptions,
|
||||
JsonResult,
|
||||
VisionOptions,
|
||||
TokenUsage,
|
||||
ModelInfo,
|
||||
HealthStatus,
|
||||
} from './types/chat.types';
|
||||
import type {
|
||||
ChatCompletionRequest,
|
||||
ChatCompletionResponse,
|
||||
EmbeddingResponse,
|
||||
} from './types/openai-compat.types';
|
||||
import { extractJson } from './utils/json-extractor';
|
||||
import { retryFetch } from './utils/retry';
|
||||
|
||||
function createTimeoutSignal(ms: number): any {
|
||||
const controller = new AbortController();
|
||||
setTimeout(() => controller.abort(), ms);
|
||||
return controller.signal;
|
||||
}
|
||||
|
||||
export class LlmClient {
|
||||
private readonly baseUrl: string;
|
||||
private readonly options: ResolvedLlmOptions;
|
||||
|
||||
constructor(options: ResolvedLlmOptions) {
|
||||
this.options = options;
|
||||
this.baseUrl = options.manaLlmUrl.replace(/\/+$/, '');
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Text Chat
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/** Simple chat with a single prompt string. */
|
||||
async chat(prompt: string, opts?: ChatOptions): Promise<ChatResult> {
|
||||
const messages = this.buildMessages(prompt, opts?.systemPrompt);
|
||||
return this.chatMessages(messages, opts);
|
||||
}
|
||||
|
||||
/** Chat with full message history. */
|
||||
async chatMessages(messages: ChatMessage[], opts?: ChatOptions): Promise<ChatResult> {
|
||||
const body = this.buildRequest(messages, opts, false);
|
||||
const start = Date.now();
|
||||
const response = await this.fetchCompletion(body, opts?.timeout);
|
||||
const latencyMs = Date.now() - start;
|
||||
|
||||
return {
|
||||
content: response.choices[0]?.message?.content ?? '',
|
||||
model: response.model,
|
||||
usage: response.usage ?? { prompt_tokens: 0, completion_tokens: 0, total_tokens: 0 },
|
||||
latencyMs,
|
||||
};
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Streaming
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/** Streaming chat - returns an async iterable of text tokens. */
|
||||
async *chatStream(prompt: string, opts?: ChatOptions): AsyncIterable<string> {
|
||||
const messages = this.buildMessages(prompt, opts?.systemPrompt);
|
||||
yield* this.chatStreamMessages(messages, opts);
|
||||
}
|
||||
|
||||
/** Streaming chat with full message history. */
|
||||
async *chatStreamMessages(messages: ChatMessage[], opts?: ChatOptions): AsyncIterable<string> {
|
||||
const body = this.buildRequest(messages, opts, true);
|
||||
const timeout = opts?.timeout ?? this.options.timeout;
|
||||
|
||||
const response = await retryFetch(
|
||||
`${this.baseUrl}/v1/chat/completions`,
|
||||
{
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify(body),
|
||||
signal: createTimeoutSignal(timeout),
|
||||
},
|
||||
{ maxRetries: this.options.maxRetries }
|
||||
);
|
||||
|
||||
if (!response.ok) {
|
||||
const text = await response.text().catch(() => '');
|
||||
throw new Error(`mana-llm stream error ${response.status}: ${text}`);
|
||||
}
|
||||
|
||||
if (!response.body) {
|
||||
throw new Error('mana-llm returned no response body for stream');
|
||||
}
|
||||
|
||||
const reader = response.body.getReader();
|
||||
const decoder = new TextDecoder();
|
||||
let buffer = '';
|
||||
|
||||
try {
|
||||
while (true) {
|
||||
const { done, value } = await reader.read();
|
||||
if (done) break;
|
||||
|
||||
buffer += decoder.decode(value, { stream: true });
|
||||
const lines = buffer.split('\n');
|
||||
buffer = lines.pop() ?? '';
|
||||
|
||||
for (const line of lines) {
|
||||
const trimmed = line.trim();
|
||||
if (!trimmed || !trimmed.startsWith('data: ')) continue;
|
||||
|
||||
const data = trimmed.slice(6);
|
||||
if (data === '[DONE]') return;
|
||||
|
||||
try {
|
||||
const chunk = JSON.parse(data);
|
||||
const content = chunk.choices?.[0]?.delta?.content;
|
||||
if (content) yield content;
|
||||
} catch {
|
||||
// Skip unparseable chunks
|
||||
}
|
||||
}
|
||||
}
|
||||
} finally {
|
||||
reader.releaseLock();
|
||||
}
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Structured JSON Output
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/** Chat that extracts and parses JSON from the response. */
|
||||
async json<T = unknown>(prompt: string, opts?: JsonOptions<T>): Promise<JsonResult<T>> {
|
||||
const messages = this.buildMessages(prompt, opts?.systemPrompt);
|
||||
return this.jsonMessages<T>(messages, opts);
|
||||
}
|
||||
|
||||
/** JSON extraction from full message history. */
|
||||
async jsonMessages<T = unknown>(
|
||||
messages: ChatMessage[],
|
||||
opts?: JsonOptions<T>
|
||||
): Promise<JsonResult<T>> {
|
||||
const maxAttempts = (opts?.jsonRetries ?? 1) + 1;
|
||||
let lastError: Error | undefined;
|
||||
|
||||
for (let attempt = 0; attempt < maxAttempts; attempt++) {
|
||||
const result = await this.chatMessages(messages, opts);
|
||||
|
||||
try {
|
||||
const data = extractJson<T>(result.content, opts?.validate);
|
||||
return { ...result, data };
|
||||
} catch (error) {
|
||||
lastError = error instanceof Error ? error : new Error(String(error));
|
||||
if (this.options.debug) {
|
||||
console.warn(
|
||||
`[shared-llm] JSON extraction attempt ${attempt + 1}/${maxAttempts} failed:`,
|
||||
lastError.message
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
throw lastError ?? new Error('JSON extraction failed');
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Vision
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/** Analyze an image with a text prompt. */
|
||||
async vision(
|
||||
prompt: string,
|
||||
imageBase64: string,
|
||||
mimeType?: string,
|
||||
opts?: VisionOptions
|
||||
): Promise<ChatResult> {
|
||||
const messages = this.buildVisionMessages(prompt, imageBase64, mimeType, opts?.systemPrompt);
|
||||
const model = opts?.visionModel ?? this.options.defaultVisionModel;
|
||||
return this.chatMessages(messages, { ...opts, model });
|
||||
}
|
||||
|
||||
/** Vision + JSON extraction. */
|
||||
async visionJson<T = unknown>(
|
||||
prompt: string,
|
||||
imageBase64: string,
|
||||
mimeType?: string,
|
||||
opts?: VisionOptions & JsonOptions<T>
|
||||
): Promise<JsonResult<T>> {
|
||||
const messages = this.buildVisionMessages(prompt, imageBase64, mimeType, opts?.systemPrompt);
|
||||
const model = opts?.visionModel ?? this.options.defaultVisionModel;
|
||||
return this.jsonMessages<T>(messages, { ...opts, model });
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Embeddings
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/** Generate embeddings for text input. */
|
||||
async embed(
|
||||
input: string | string[],
|
||||
model?: string
|
||||
): Promise<{ embeddings: number[][]; usage: TokenUsage }> {
|
||||
const response = await retryFetch(
|
||||
`${this.baseUrl}/v1/embeddings`,
|
||||
{
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify({
|
||||
model: model ?? this.options.defaultModel,
|
||||
input,
|
||||
}),
|
||||
signal: createTimeoutSignal(this.options.timeout),
|
||||
},
|
||||
{ maxRetries: this.options.maxRetries }
|
||||
);
|
||||
|
||||
if (!response.ok) {
|
||||
const text = await response.text().catch(() => '');
|
||||
throw new Error(`mana-llm embeddings error ${response.status}: ${text}`);
|
||||
}
|
||||
|
||||
const data = (await response.json()) as EmbeddingResponse;
|
||||
return {
|
||||
embeddings: data.data.map((d) => d.embedding),
|
||||
usage: data.usage,
|
||||
};
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Health & Models
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/** Check mana-llm health and provider status. */
|
||||
async health(): Promise<HealthStatus> {
|
||||
try {
|
||||
const response = await fetch(`${this.baseUrl}/health`, {
|
||||
signal: createTimeoutSignal(5_000),
|
||||
});
|
||||
if (!response.ok) {
|
||||
return { status: 'unhealthy', providers: {} };
|
||||
}
|
||||
return (await response.json()) as HealthStatus;
|
||||
} catch {
|
||||
return { status: 'unhealthy', providers: {} };
|
||||
}
|
||||
}
|
||||
|
||||
/** List available models from all providers. */
|
||||
async listModels(): Promise<ModelInfo[]> {
|
||||
const response = await fetch(`${this.baseUrl}/v1/models`, {
|
||||
signal: createTimeoutSignal(10_000),
|
||||
});
|
||||
|
||||
if (!response.ok) {
|
||||
throw new Error(`mana-llm models error ${response.status}`);
|
||||
}
|
||||
|
||||
const data = (await response.json()) as { data: ModelInfo[] };
|
||||
return data.data ?? [];
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Private helpers
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
private buildMessages(prompt: string, systemPrompt?: string): ChatMessage[] {
|
||||
const messages: ChatMessage[] = [];
|
||||
if (systemPrompt) {
|
||||
messages.push({ role: 'system', content: systemPrompt });
|
||||
}
|
||||
messages.push({ role: 'user', content: prompt });
|
||||
return messages;
|
||||
}
|
||||
|
||||
private buildVisionMessages(
|
||||
prompt: string,
|
||||
imageBase64: string,
|
||||
mimeType?: string,
|
||||
systemPrompt?: string
|
||||
): ChatMessage[] {
|
||||
const mime = mimeType ?? 'image/jpeg';
|
||||
const dataUrl = imageBase64.startsWith('data:')
|
||||
? imageBase64
|
||||
: `data:${mime};base64,${imageBase64}`;
|
||||
|
||||
const messages: ChatMessage[] = [];
|
||||
if (systemPrompt) {
|
||||
messages.push({ role: 'system', content: systemPrompt });
|
||||
}
|
||||
messages.push({
|
||||
role: 'user',
|
||||
content: [
|
||||
{ type: 'text', text: prompt },
|
||||
{ type: 'image_url', image_url: { url: dataUrl } },
|
||||
],
|
||||
});
|
||||
return messages;
|
||||
}
|
||||
|
||||
private buildRequest(
|
||||
messages: ChatMessage[],
|
||||
opts: ChatOptions | undefined,
|
||||
stream: boolean
|
||||
): ChatCompletionRequest {
|
||||
const request: ChatCompletionRequest = {
|
||||
model: opts?.model ?? this.options.defaultModel,
|
||||
messages,
|
||||
stream,
|
||||
};
|
||||
|
||||
if (opts?.temperature !== undefined) request.temperature = opts.temperature;
|
||||
if (opts?.maxTokens !== undefined) request.max_tokens = opts.maxTokens;
|
||||
|
||||
return request;
|
||||
}
|
||||
|
||||
private async fetchCompletion(
|
||||
body: ChatCompletionRequest,
|
||||
timeoutOverride?: number
|
||||
): Promise<ChatCompletionResponse> {
|
||||
const timeout = timeoutOverride ?? this.options.timeout;
|
||||
|
||||
const response = await retryFetch(
|
||||
`${this.baseUrl}/v1/chat/completions`,
|
||||
{
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify(body),
|
||||
signal: createTimeoutSignal(timeout),
|
||||
},
|
||||
{ maxRetries: this.options.maxRetries }
|
||||
);
|
||||
|
||||
if (!response.ok) {
|
||||
const text = await response.text().catch(() => '');
|
||||
throw new Error(`mana-llm error ${response.status}: ${text}`);
|
||||
}
|
||||
|
||||
return (await response.json()) as ChatCompletionResponse;
|
||||
}
|
||||
}
|
||||
1
packages/shared-llm/src/llm.constants.ts
Normal file
1
packages/shared-llm/src/llm.constants.ts
Normal file
|
|
@ -0,0 +1 @@
|
|||
export const LLM_MODULE_OPTIONS = 'LLM_MODULE_OPTIONS';
|
||||
80
packages/shared-llm/src/llm.module.ts
Normal file
80
packages/shared-llm/src/llm.module.ts
Normal file
|
|
@ -0,0 +1,80 @@
|
|||
import { DynamicModule, Module, Global, Provider } from '@nestjs/common';
|
||||
import type {
|
||||
LlmModuleOptions,
|
||||
LlmModuleAsyncOptions,
|
||||
LlmOptionsFactory,
|
||||
} from './interfaces/llm-options.interface';
|
||||
import { LlmClientService } from './llm-client.service';
|
||||
import { LLM_MODULE_OPTIONS } from './llm.constants';
|
||||
|
||||
@Global()
|
||||
@Module({})
|
||||
export class LlmModule {
|
||||
static forRoot(options: LlmModuleOptions): DynamicModule {
|
||||
return {
|
||||
module: LlmModule,
|
||||
providers: [
|
||||
{
|
||||
provide: LLM_MODULE_OPTIONS,
|
||||
useValue: options,
|
||||
},
|
||||
LlmClientService,
|
||||
],
|
||||
exports: [LLM_MODULE_OPTIONS, LlmClientService],
|
||||
};
|
||||
}
|
||||
|
||||
static forRootAsync(options: LlmModuleAsyncOptions): DynamicModule {
|
||||
const asyncProviders = this.createAsyncProviders(options);
|
||||
|
||||
return {
|
||||
module: LlmModule,
|
||||
imports: options.imports || [],
|
||||
providers: [...asyncProviders, LlmClientService],
|
||||
exports: [LLM_MODULE_OPTIONS, LlmClientService],
|
||||
};
|
||||
}
|
||||
|
||||
private static createAsyncProviders(options: LlmModuleAsyncOptions): Provider[] {
|
||||
if (options.useFactory) {
|
||||
return [
|
||||
{
|
||||
provide: LLM_MODULE_OPTIONS,
|
||||
useFactory: options.useFactory,
|
||||
inject: options.inject || [],
|
||||
},
|
||||
];
|
||||
}
|
||||
|
||||
const useClass = options.useClass;
|
||||
const useExisting = options.useExisting;
|
||||
|
||||
if (useClass) {
|
||||
return [
|
||||
{
|
||||
provide: LLM_MODULE_OPTIONS,
|
||||
useFactory: async (optionsFactory: LlmOptionsFactory) =>
|
||||
await optionsFactory.createLlmOptions(),
|
||||
inject: [useClass],
|
||||
},
|
||||
{
|
||||
provide: useClass,
|
||||
useClass,
|
||||
},
|
||||
];
|
||||
}
|
||||
|
||||
if (useExisting) {
|
||||
return [
|
||||
{
|
||||
provide: LLM_MODULE_OPTIONS,
|
||||
useFactory: async (optionsFactory: LlmOptionsFactory) =>
|
||||
await optionsFactory.createLlmOptions(),
|
||||
inject: [useExisting],
|
||||
},
|
||||
];
|
||||
}
|
||||
|
||||
return [];
|
||||
}
|
||||
}
|
||||
30
packages/shared-llm/src/standalone.ts
Normal file
30
packages/shared-llm/src/standalone.ts
Normal file
|
|
@ -0,0 +1,30 @@
|
|||
/**
|
||||
* Standalone exports for non-NestJS consumers (e.g. bot-services).
|
||||
*
|
||||
* Usage:
|
||||
* import { LlmClient } from '@manacore/shared-llm/standalone';
|
||||
* const llm = new LlmClient({ manaLlmUrl: 'http://localhost:3025' });
|
||||
*/
|
||||
|
||||
export { LlmClient } from './llm-client';
|
||||
export { resolveOptions } from './interfaces/llm-options.interface';
|
||||
export type { LlmModuleOptions, ResolvedLlmOptions } from './interfaces/llm-options.interface';
|
||||
|
||||
// Types
|
||||
export type {
|
||||
ChatMessage,
|
||||
ContentPart,
|
||||
TextContentPart,
|
||||
ImageContentPart,
|
||||
ChatOptions,
|
||||
JsonOptions,
|
||||
VisionOptions,
|
||||
TokenUsage,
|
||||
ChatResult,
|
||||
JsonResult,
|
||||
ModelInfo,
|
||||
HealthStatus,
|
||||
} from './types';
|
||||
|
||||
// Utilities
|
||||
export { extractJson } from './utils';
|
||||
100
packages/shared-llm/src/types/chat.types.ts
Normal file
100
packages/shared-llm/src/types/chat.types.ts
Normal file
|
|
@ -0,0 +1,100 @@
|
|||
/**
|
||||
* Core chat types for the LLM client.
|
||||
* These are the high-level types that consumers interact with.
|
||||
*/
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Messages
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
export interface TextContentPart {
|
||||
type: 'text';
|
||||
text: string;
|
||||
}
|
||||
|
||||
export interface ImageContentPart {
|
||||
type: 'image_url';
|
||||
image_url: { url: string };
|
||||
}
|
||||
|
||||
export type ContentPart = TextContentPart | ImageContentPart;
|
||||
|
||||
export interface ChatMessage {
|
||||
role: 'system' | 'user' | 'assistant';
|
||||
content: string | ContentPart[];
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Options
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
export interface ChatOptions {
|
||||
/** Model to use (default from module config, e.g. "ollama/gemma3:4b") */
|
||||
model?: string;
|
||||
/** Sampling temperature 0.0-2.0 */
|
||||
temperature?: number;
|
||||
/** Max tokens to generate */
|
||||
maxTokens?: number;
|
||||
/** System prompt prepended to messages */
|
||||
systemPrompt?: string;
|
||||
/** Request timeout in ms (overrides module default) */
|
||||
timeout?: number;
|
||||
}
|
||||
|
||||
export interface JsonOptions<T = unknown> extends ChatOptions {
|
||||
/** Validation function applied to parsed JSON. Should throw on invalid data. */
|
||||
validate?: (data: unknown) => T;
|
||||
/** Number of extraction retries on parse failure (default: 1) */
|
||||
jsonRetries?: number;
|
||||
}
|
||||
|
||||
export interface VisionOptions extends ChatOptions {
|
||||
/** Vision model override (default from module config, e.g. "ollama/llava:7b") */
|
||||
visionModel?: string;
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Results
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
export interface TokenUsage {
|
||||
prompt_tokens: number;
|
||||
completion_tokens: number;
|
||||
total_tokens: number;
|
||||
}
|
||||
|
||||
export interface ChatResult {
|
||||
/** Generated text content */
|
||||
content: string;
|
||||
/** Model that was actually used */
|
||||
model: string;
|
||||
/** Token usage statistics */
|
||||
usage: TokenUsage;
|
||||
/** Request latency in milliseconds */
|
||||
latencyMs: number;
|
||||
}
|
||||
|
||||
export interface JsonResult<T = unknown> extends ChatResult {
|
||||
/** Parsed and optionally validated data */
|
||||
data: T;
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Models
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
export interface ModelInfo {
|
||||
id: string;
|
||||
object: 'model';
|
||||
created: number;
|
||||
owned_by: string;
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Health
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
export interface HealthStatus {
|
||||
status: 'healthy' | 'degraded' | 'unhealthy';
|
||||
providers: Record<string, unknown>;
|
||||
}
|
||||
26
packages/shared-llm/src/types/index.ts
Normal file
26
packages/shared-llm/src/types/index.ts
Normal file
|
|
@ -0,0 +1,26 @@
|
|||
export type {
|
||||
ChatMessage,
|
||||
ContentPart,
|
||||
TextContentPart,
|
||||
ImageContentPart,
|
||||
ChatOptions,
|
||||
JsonOptions,
|
||||
VisionOptions,
|
||||
TokenUsage,
|
||||
ChatResult,
|
||||
JsonResult,
|
||||
ModelInfo,
|
||||
HealthStatus,
|
||||
} from './chat.types';
|
||||
|
||||
export type {
|
||||
ChatCompletionRequest,
|
||||
ChatCompletionResponse,
|
||||
ChatCompletionChoice,
|
||||
ChatCompletionStreamChunk,
|
||||
StreamChoice,
|
||||
EmbeddingRequest,
|
||||
EmbeddingResponse,
|
||||
EmbeddingData,
|
||||
ModelsListResponse,
|
||||
} from './openai-compat.types';
|
||||
97
packages/shared-llm/src/types/openai-compat.types.ts
Normal file
97
packages/shared-llm/src/types/openai-compat.types.ts
Normal file
|
|
@ -0,0 +1,97 @@
|
|||
/**
|
||||
* OpenAI-compatible wire format types matching the mana-llm API contract.
|
||||
* These are internal types used for HTTP communication - consumers should
|
||||
* use the high-level types from chat.types.ts instead.
|
||||
*/
|
||||
|
||||
import type { ChatMessage, TokenUsage } from './chat.types';
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Request (POST /v1/chat/completions)
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
export interface ChatCompletionRequest {
|
||||
model: string;
|
||||
messages: ChatMessage[];
|
||||
stream?: boolean;
|
||||
temperature?: number;
|
||||
max_tokens?: number;
|
||||
top_p?: number;
|
||||
frequency_penalty?: number;
|
||||
presence_penalty?: number;
|
||||
stop?: string | string[];
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Response (non-streaming)
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
export interface ChatCompletionResponse {
|
||||
id: string;
|
||||
object: 'chat.completion';
|
||||
created: number;
|
||||
model: string;
|
||||
choices: ChatCompletionChoice[];
|
||||
usage: TokenUsage;
|
||||
}
|
||||
|
||||
export interface ChatCompletionChoice {
|
||||
index: number;
|
||||
message: { role: 'assistant'; content: string };
|
||||
finish_reason: 'stop' | 'length' | 'content_filter' | null;
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Response (streaming)
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
export interface ChatCompletionStreamChunk {
|
||||
id: string;
|
||||
object: 'chat.completion.chunk';
|
||||
created: number;
|
||||
model: string;
|
||||
choices: StreamChoice[];
|
||||
}
|
||||
|
||||
export interface StreamChoice {
|
||||
index: number;
|
||||
delta: { role?: 'assistant'; content?: string };
|
||||
finish_reason: string | null;
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Embeddings
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
export interface EmbeddingRequest {
|
||||
model: string;
|
||||
input: string | string[];
|
||||
encoding_format?: 'float' | 'base64';
|
||||
}
|
||||
|
||||
export interface EmbeddingResponse {
|
||||
object: 'list';
|
||||
data: EmbeddingData[];
|
||||
model: string;
|
||||
usage: TokenUsage;
|
||||
}
|
||||
|
||||
export interface EmbeddingData {
|
||||
object: 'embedding';
|
||||
index: number;
|
||||
embedding: number[];
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Models (GET /v1/models)
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
export interface ModelsListResponse {
|
||||
object: 'list';
|
||||
data: Array<{
|
||||
id: string;
|
||||
object: 'model';
|
||||
created: number;
|
||||
owned_by: string;
|
||||
}>;
|
||||
}
|
||||
3
packages/shared-llm/src/utils/index.ts
Normal file
3
packages/shared-llm/src/utils/index.ts
Normal file
|
|
@ -0,0 +1,3 @@
|
|||
export { extractJson } from './json-extractor';
|
||||
export { retryFetch } from './retry';
|
||||
export type { RetryOptions } from './retry';
|
||||
94
packages/shared-llm/src/utils/json-extractor.ts
Normal file
94
packages/shared-llm/src/utils/json-extractor.ts
Normal file
|
|
@ -0,0 +1,94 @@
|
|||
/**
|
||||
* Extract and parse JSON from LLM responses.
|
||||
*
|
||||
* LLMs often wrap JSON in markdown code fences or include extra text.
|
||||
* This utility handles all common patterns:
|
||||
* 1. Direct JSON parse
|
||||
* 2. Markdown ```json ... ``` fences
|
||||
* 3. First { ... } or [ ... ] block in text
|
||||
*/
|
||||
export function extractJson<T = unknown>(text: string, validate?: (data: unknown) => T): T {
|
||||
const trimmed = text.trim();
|
||||
|
||||
// Step 1: Try direct parse
|
||||
const direct = tryParse<T>(trimmed, validate);
|
||||
if (direct !== undefined) return direct;
|
||||
|
||||
// Step 2: Strip markdown code fences
|
||||
const fenceMatch = trimmed.match(/```(?:json)?\s*([\s\S]*?)```/);
|
||||
if (fenceMatch) {
|
||||
const fenced = tryParse<T>(fenceMatch[1].trim(), validate);
|
||||
if (fenced !== undefined) return fenced;
|
||||
}
|
||||
|
||||
// Step 3: Find first JSON object
|
||||
const objectStart = trimmed.indexOf('{');
|
||||
if (objectStart !== -1) {
|
||||
const objectStr = extractBalanced(trimmed, objectStart, '{', '}');
|
||||
if (objectStr) {
|
||||
const obj = tryParse<T>(objectStr, validate);
|
||||
if (obj !== undefined) return obj;
|
||||
}
|
||||
}
|
||||
|
||||
// Step 4: Find first JSON array
|
||||
const arrayStart = trimmed.indexOf('[');
|
||||
if (arrayStart !== -1) {
|
||||
const arrayStr = extractBalanced(trimmed, arrayStart, '[', ']');
|
||||
if (arrayStr) {
|
||||
const arr = tryParse<T>(arrayStr, validate);
|
||||
if (arr !== undefined) return arr;
|
||||
}
|
||||
}
|
||||
|
||||
throw new Error(`Failed to extract JSON from LLM response: ${trimmed.slice(0, 200)}...`);
|
||||
}
|
||||
|
||||
function tryParse<T>(text: string, validate?: (data: unknown) => T): T | undefined {
|
||||
try {
|
||||
const parsed = JSON.parse(text);
|
||||
return validate ? validate(parsed) : parsed;
|
||||
} catch {
|
||||
return undefined;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract a balanced block starting from the given position.
|
||||
* Handles nested braces/brackets but not strings with escaped delimiters.
|
||||
*/
|
||||
function extractBalanced(text: string, start: number, open: string, close: string): string | null {
|
||||
let depth = 0;
|
||||
let inString = false;
|
||||
let escape = false;
|
||||
|
||||
for (let i = start; i < text.length; i++) {
|
||||
const ch = text[i];
|
||||
|
||||
if (escape) {
|
||||
escape = false;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (ch === '\\') {
|
||||
escape = true;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (ch === '"') {
|
||||
inString = !inString;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (inString) continue;
|
||||
|
||||
if (ch === open) depth++;
|
||||
if (ch === close) depth--;
|
||||
|
||||
if (depth === 0) {
|
||||
return text.slice(start, i + 1);
|
||||
}
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
51
packages/shared-llm/src/utils/retry.ts
Normal file
51
packages/shared-llm/src/utils/retry.ts
Normal file
|
|
@ -0,0 +1,51 @@
|
|||
/**
|
||||
* Fetch wrapper with exponential backoff retry for transient failures.
|
||||
*
|
||||
* Retries on: 429 (rate limit), 502, 503, 504 (server errors), network errors.
|
||||
* Does NOT retry on: 400, 401, 403, 404 (client errors).
|
||||
*/
|
||||
|
||||
const RETRYABLE_STATUS_CODES = new Set([429, 502, 503, 504]);
|
||||
|
||||
export interface RetryOptions {
|
||||
maxRetries: number;
|
||||
/** Base delay in ms (doubles each retry). Default: 200 */
|
||||
baseDelay?: number;
|
||||
}
|
||||
|
||||
export async function retryFetch(
|
||||
url: string,
|
||||
init: RequestInit,
|
||||
options: RetryOptions
|
||||
): Promise<Response> {
|
||||
const { maxRetries, baseDelay = 200 } = options;
|
||||
let lastError: Error | undefined;
|
||||
|
||||
for (let attempt = 0; attempt <= maxRetries; attempt++) {
|
||||
try {
|
||||
const response = await fetch(url, init);
|
||||
|
||||
if (response.ok || !RETRYABLE_STATUS_CODES.has(response.status)) {
|
||||
return response;
|
||||
}
|
||||
|
||||
// Retryable status code
|
||||
lastError = new Error(`HTTP ${response.status}: ${response.statusText}`);
|
||||
} catch (error) {
|
||||
// Network error (connection refused, timeout, etc.)
|
||||
lastError = error instanceof Error ? error : new Error(String(error));
|
||||
}
|
||||
|
||||
// Don't sleep after the last attempt
|
||||
if (attempt < maxRetries) {
|
||||
const delay = baseDelay * Math.pow(2, attempt);
|
||||
await sleep(delay);
|
||||
}
|
||||
}
|
||||
|
||||
throw lastError ?? new Error('retryFetch exhausted all retries');
|
||||
}
|
||||
|
||||
function sleep(ms: number): Promise<void> {
|
||||
return new Promise((resolve) => setTimeout(resolve, ms));
|
||||
}
|
||||
21
packages/shared-llm/tsconfig.json
Normal file
21
packages/shared-llm/tsconfig.json
Normal file
|
|
@ -0,0 +1,21 @@
|
|||
{
|
||||
"compilerOptions": {
|
||||
"target": "ES2021",
|
||||
"module": "commonjs",
|
||||
"lib": ["ES2021"],
|
||||
"declaration": true,
|
||||
"declarationMap": true,
|
||||
"sourceMap": true,
|
||||
"outDir": "./dist",
|
||||
"rootDir": "./src",
|
||||
"strict": true,
|
||||
"esModuleInterop": true,
|
||||
"skipLibCheck": true,
|
||||
"forceConsistentCasingInFileNames": true,
|
||||
"moduleResolution": "node",
|
||||
"experimentalDecorators": true,
|
||||
"emitDecoratorMetadata": true
|
||||
},
|
||||
"include": ["src/**/*"],
|
||||
"exclude": ["node_modules", "dist"]
|
||||
}
|
||||
972
pnpm-lock.yaml
generated
972
pnpm-lock.yaml
generated
File diff suppressed because it is too large
Load diff
|
|
@ -26,6 +26,7 @@
|
|||
},
|
||||
"dependencies": {
|
||||
"@google/generative-ai": "^0.24.1",
|
||||
"@manacore/shared-llm": "workspace:^",
|
||||
"@manacore/shared-storage": "workspace:*",
|
||||
"@nestjs/axios": "^4.0.1",
|
||||
"@nestjs/common": "^10.4.15",
|
||||
|
|
|
|||
|
|
@ -1,10 +1,8 @@
|
|||
import { Module, Global } from '@nestjs/common';
|
||||
import { ConfigModule } from '@nestjs/config';
|
||||
import { AiService } from './ai.service';
|
||||
|
||||
@Global()
|
||||
@Module({
|
||||
imports: [ConfigModule],
|
||||
providers: [AiService],
|
||||
exports: [AiService],
|
||||
})
|
||||
|
|
|
|||
|
|
@ -1,32 +1,20 @@
|
|||
import { Injectable, Logger } from '@nestjs/common';
|
||||
import { ConfigService } from '@nestjs/config';
|
||||
import { LlmClientService } from '@manacore/shared-llm';
|
||||
|
||||
export interface FeedbackAnalysis {
|
||||
title: string;
|
||||
category: 'bug' | 'feature' | 'improvement' | 'question' | 'other';
|
||||
}
|
||||
|
||||
const VALID_CATEGORIES = ['bug', 'feature', 'improvement', 'question', 'other'] as const;
|
||||
|
||||
@Injectable()
|
||||
export class AiService {
|
||||
private readonly logger = new Logger(AiService.name);
|
||||
private readonly manaLlmUrl: string | null = null;
|
||||
|
||||
constructor(private configService: ConfigService) {
|
||||
const url = this.configService.get<string>('MANA_LLM_URL');
|
||||
if (url) {
|
||||
this.manaLlmUrl = url;
|
||||
this.logger.log(`AI service using mana-llm at ${url}`);
|
||||
} else {
|
||||
this.logger.warn('MANA_LLM_URL not configured - AI features disabled');
|
||||
}
|
||||
}
|
||||
constructor(private readonly llm: LlmClientService) {}
|
||||
|
||||
async analyzeFeedback(feedbackText: string): Promise<FeedbackAnalysis> {
|
||||
// Fallback if AI not available
|
||||
if (!this.manaLlmUrl) {
|
||||
return this.fallbackAnalysis(feedbackText);
|
||||
}
|
||||
|
||||
try {
|
||||
const prompt = `Analysiere dieses User-Feedback und generiere:
|
||||
1. Einen kurzen, prägnanten deutschen Titel (max 60 Zeichen) der den Kern des Feedbacks zusammenfasst
|
||||
|
|
@ -37,48 +25,24 @@ Feedback: "${feedbackText}"
|
|||
Antworte NUR mit validem JSON in diesem Format (keine Markdown-Codeblocks, kein anderer Text):
|
||||
{"title": "...", "category": "..."}`;
|
||||
|
||||
const result = await fetch(`${this.manaLlmUrl}/v1/chat/completions`, {
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify({
|
||||
model: 'ollama/gemma3:4b',
|
||||
messages: [{ role: 'user', content: prompt }],
|
||||
temperature: 0.3,
|
||||
}),
|
||||
signal: AbortSignal.timeout(30000),
|
||||
const { data } = await this.llm.json<FeedbackAnalysis>(prompt, {
|
||||
temperature: 0.3,
|
||||
timeout: 30_000,
|
||||
validate: (raw) => {
|
||||
const obj = raw as FeedbackAnalysis;
|
||||
if (!obj.title || !obj.category) throw new Error('missing fields');
|
||||
if (!VALID_CATEGORIES.includes(obj.category as any)) {
|
||||
obj.category = 'other';
|
||||
}
|
||||
if (obj.title.length > 60) {
|
||||
obj.title = obj.title.substring(0, 57) + '...';
|
||||
}
|
||||
return obj;
|
||||
},
|
||||
});
|
||||
|
||||
if (!result.ok) {
|
||||
throw new Error(`mana-llm error: ${result.status}`);
|
||||
}
|
||||
|
||||
const data = await result.json();
|
||||
const response = (data.choices?.[0]?.message?.content || '').trim();
|
||||
|
||||
// Parse JSON response - handle potential markdown code blocks
|
||||
let jsonStr = response;
|
||||
if (response.includes('```')) {
|
||||
const match = response.match(/```(?:json)?\s*([\s\S]*?)```/);
|
||||
if (match) {
|
||||
jsonStr = match[1].trim();
|
||||
}
|
||||
}
|
||||
|
||||
const parsed = JSON.parse(jsonStr) as FeedbackAnalysis;
|
||||
|
||||
// Validate category
|
||||
const validCategories = ['bug', 'feature', 'improvement', 'question', 'other'];
|
||||
if (!validCategories.includes(parsed.category)) {
|
||||
parsed.category = 'other';
|
||||
}
|
||||
|
||||
// Ensure title is not too long
|
||||
if (parsed.title.length > 60) {
|
||||
parsed.title = parsed.title.substring(0, 57) + '...';
|
||||
}
|
||||
|
||||
this.logger.debug(`AI analyzed feedback: ${JSON.stringify(parsed)}`);
|
||||
return parsed;
|
||||
this.logger.debug(`AI analyzed feedback: ${JSON.stringify(data)}`);
|
||||
return data;
|
||||
} catch (error) {
|
||||
this.logger.error(`AI analysis failed: ${error}`);
|
||||
return this.fallbackAnalysis(feedbackText);
|
||||
|
|
|
|||
|
|
@ -1,7 +1,8 @@
|
|||
import { Module } from '@nestjs/common';
|
||||
import { ConfigModule } from '@nestjs/config';
|
||||
import { ConfigModule, ConfigService } from '@nestjs/config';
|
||||
import { ThrottlerModule } from '@nestjs/throttler';
|
||||
import { APP_FILTER } from '@nestjs/core';
|
||||
import { LlmModule } from '@manacore/shared-llm';
|
||||
import configuration from './config/configuration';
|
||||
import { AdminModule } from './admin/admin.module';
|
||||
import { AiModule } from './ai/ai.module';
|
||||
|
|
@ -35,6 +36,14 @@ import { SecurityModule } from './security';
|
|||
limit: 100, // 100 requests per minute
|
||||
},
|
||||
]),
|
||||
LlmModule.forRootAsync({
|
||||
imports: [ConfigModule],
|
||||
useFactory: (config: ConfigService) => ({
|
||||
manaLlmUrl: config.get('MANA_LLM_URL'),
|
||||
debug: config.get('NODE_ENV') === 'development',
|
||||
}),
|
||||
inject: [ConfigService],
|
||||
}),
|
||||
LoggerModule,
|
||||
SecurityModule,
|
||||
MetricsModule,
|
||||
|
|
|
|||
|
|
@ -29,6 +29,7 @@
|
|||
"dependencies": {
|
||||
"@manacore/bot-services": "workspace:*",
|
||||
"@manacore/matrix-bot-common": "workspace:*",
|
||||
"@manacore/shared-llm": "workspace:^",
|
||||
"@nestjs/common": "^10.4.15",
|
||||
"@nestjs/config": "^3.3.0",
|
||||
"@nestjs/core": "^10.4.15",
|
||||
|
|
|
|||
|
|
@ -1,5 +1,6 @@
|
|||
import { Module } from '@nestjs/common';
|
||||
import { ConfigModule } from '@nestjs/config';
|
||||
import { ConfigModule, ConfigService } from '@nestjs/config';
|
||||
import { LlmModule } from '@manacore/shared-llm';
|
||||
import { HealthController, createHealthProvider } from '@manacore/matrix-bot-common';
|
||||
import { BotModule } from './bot/bot.module';
|
||||
import configuration from './config/configuration';
|
||||
|
|
@ -10,6 +11,15 @@ import configuration from './config/configuration';
|
|||
isGlobal: true,
|
||||
load: [configuration],
|
||||
}),
|
||||
LlmModule.forRootAsync({
|
||||
imports: [ConfigModule],
|
||||
useFactory: (config: ConfigService) => ({
|
||||
manaLlmUrl: config.get('llm.url') || 'http://localhost:3025',
|
||||
defaultModel: config.get('llm.model') || 'ollama/gemma3:4b',
|
||||
timeout: config.get<number>('llm.timeout') || 120000,
|
||||
}),
|
||||
inject: [ConfigService],
|
||||
}),
|
||||
BotModule,
|
||||
],
|
||||
controllers: [HealthController],
|
||||
|
|
|
|||
|
|
@ -1,49 +1,17 @@
|
|||
import { Injectable, Logger, OnModuleInit } from '@nestjs/common';
|
||||
import { LlmClientService } from '@manacore/shared-llm';
|
||||
import { ConfigService } from '@nestjs/config';
|
||||
|
||||
interface LlmModel {
|
||||
id: string;
|
||||
name: string;
|
||||
size: number;
|
||||
owned_by: string;
|
||||
}
|
||||
|
||||
interface ChatMessage {
|
||||
role: 'user' | 'assistant' | 'system';
|
||||
content: string | ContentPart[];
|
||||
}
|
||||
|
||||
interface ContentPart {
|
||||
type: 'text' | 'image_url';
|
||||
text?: string;
|
||||
image_url?: { url: string };
|
||||
}
|
||||
|
||||
interface ChatCompletionResponse {
|
||||
id: string;
|
||||
model: string;
|
||||
choices: {
|
||||
message: { role: string; content: string };
|
||||
finish_reason: string;
|
||||
}[];
|
||||
usage: {
|
||||
prompt_tokens: number;
|
||||
completion_tokens: number;
|
||||
total_tokens: number;
|
||||
};
|
||||
}
|
||||
|
||||
@Injectable()
|
||||
export class OllamaService implements OnModuleInit {
|
||||
private readonly logger = new Logger(OllamaService.name);
|
||||
private readonly baseUrl: string;
|
||||
private readonly defaultModel: string;
|
||||
private readonly timeout: number;
|
||||
|
||||
constructor(private configService: ConfigService) {
|
||||
this.baseUrl = this.configService.get<string>('llm.url') || 'http://localhost:3025';
|
||||
constructor(
|
||||
private readonly llm: LlmClientService,
|
||||
private configService: ConfigService
|
||||
) {
|
||||
this.defaultModel = this.configService.get<string>('llm.model') || 'ollama/gemma3:4b';
|
||||
this.timeout = this.configService.get<number>('llm.timeout') || 120000;
|
||||
}
|
||||
|
||||
async onModuleInit() {
|
||||
|
|
@ -52,27 +20,23 @@ export class OllamaService implements OnModuleInit {
|
|||
|
||||
async checkConnection(): Promise<boolean> {
|
||||
try {
|
||||
const response = await fetch(`${this.baseUrl}/health`, {
|
||||
signal: AbortSignal.timeout(5000),
|
||||
});
|
||||
const data = await response.json();
|
||||
this.logger.log(`mana-llm connected: ${data.status}, providers: ${Object.keys(data.providers || {}).join(', ')}`);
|
||||
return data.status === 'healthy' || data.status === 'degraded';
|
||||
const health = await this.llm.health();
|
||||
this.logger.log(
|
||||
`mana-llm connected: ${health.status}, providers: ${Object.keys(health.providers || {}).join(', ')}`
|
||||
);
|
||||
return health.status === 'healthy' || health.status === 'degraded';
|
||||
} catch (error) {
|
||||
this.logger.error(`Failed to connect to mana-llm at ${this.baseUrl}:`, error);
|
||||
this.logger.error('Failed to connect to mana-llm:', error);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
async listModels(): Promise<{ name: string; size: number; modified_at: string }[]> {
|
||||
try {
|
||||
const response = await fetch(`${this.baseUrl}/v1/models`);
|
||||
const data = await response.json();
|
||||
|
||||
// Convert OpenAI format to legacy Ollama format for compatibility
|
||||
return (data.data || []).map((m: LlmModel) => ({
|
||||
const models = await this.llm.listModels();
|
||||
return models.map((m) => ({
|
||||
name: m.id,
|
||||
size: 0, // mana-llm doesn't provide size
|
||||
size: 0,
|
||||
modified_at: new Date().toISOString(),
|
||||
}));
|
||||
} catch (error) {
|
||||
|
|
@ -87,39 +51,15 @@ export class OllamaService implements OnModuleInit {
|
|||
): Promise<string> {
|
||||
const selectedModel = model ? this.normalizeModel(model) : this.defaultModel;
|
||||
|
||||
try {
|
||||
const response = await fetch(`${this.baseUrl}/v1/chat/completions`, {
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify({
|
||||
model: selectedModel,
|
||||
messages,
|
||||
stream: false,
|
||||
}),
|
||||
signal: AbortSignal.timeout(this.timeout),
|
||||
});
|
||||
const result = await this.llm.chatMessages(messages, { model: selectedModel });
|
||||
|
||||
if (!response.ok) {
|
||||
const errorText = await response.text();
|
||||
throw new Error(`mana-llm API error: ${response.status} - ${errorText}`);
|
||||
}
|
||||
|
||||
const data: ChatCompletionResponse = await response.json();
|
||||
|
||||
// Log performance metrics
|
||||
if (data.usage) {
|
||||
this.logger.debug(
|
||||
`Generated ${data.usage.completion_tokens} tokens (total: ${data.usage.total_tokens})`
|
||||
);
|
||||
}
|
||||
|
||||
return data.choices[0]?.message?.content || '';
|
||||
} catch (error) {
|
||||
if (error instanceof Error && error.name === 'TimeoutError') {
|
||||
throw new Error('LLM Timeout - Antwort dauerte zu lange');
|
||||
}
|
||||
throw error;
|
||||
if (result.usage.completion_tokens) {
|
||||
this.logger.debug(
|
||||
`Generated ${result.usage.completion_tokens} tokens (total: ${result.usage.total_tokens})`
|
||||
);
|
||||
}
|
||||
|
||||
return result.content;
|
||||
}
|
||||
|
||||
getDefaultModel(): string {
|
||||
|
|
@ -129,59 +69,19 @@ export class OllamaService implements OnModuleInit {
|
|||
async chatWithImage(prompt: string, imageBase64: string, model?: string): Promise<string> {
|
||||
const selectedModel = model ? this.normalizeModel(model) : this.defaultModel;
|
||||
|
||||
try {
|
||||
// Use OpenAI vision format
|
||||
const messages: ChatMessage[] = [
|
||||
{
|
||||
role: 'user',
|
||||
content: [
|
||||
{ type: 'text', text: prompt },
|
||||
{
|
||||
type: 'image_url',
|
||||
image_url: { url: `data:image/png;base64,${imageBase64}` },
|
||||
},
|
||||
],
|
||||
},
|
||||
];
|
||||
const result = await this.llm.vision(prompt, imageBase64, 'image/png', {
|
||||
model: selectedModel,
|
||||
});
|
||||
|
||||
const response = await fetch(`${this.baseUrl}/v1/chat/completions`, {
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify({
|
||||
model: selectedModel,
|
||||
messages,
|
||||
stream: false,
|
||||
}),
|
||||
signal: AbortSignal.timeout(this.timeout),
|
||||
});
|
||||
|
||||
if (!response.ok) {
|
||||
const errorText = await response.text();
|
||||
throw new Error(`mana-llm API error: ${response.status} - ${errorText}`);
|
||||
}
|
||||
|
||||
const data: ChatCompletionResponse = await response.json();
|
||||
|
||||
// Log performance metrics
|
||||
if (data.usage) {
|
||||
this.logger.debug(
|
||||
`Vision: Generated ${data.usage.completion_tokens} tokens (total: ${data.usage.total_tokens})`
|
||||
);
|
||||
}
|
||||
|
||||
return data.choices[0]?.message?.content || '';
|
||||
} catch (error) {
|
||||
if (error instanceof Error && error.name === 'TimeoutError') {
|
||||
throw new Error('LLM Timeout - Bildanalyse dauerte zu lange');
|
||||
}
|
||||
throw error;
|
||||
if (result.usage.completion_tokens) {
|
||||
this.logger.debug(
|
||||
`Vision: Generated ${result.usage.completion_tokens} tokens (total: ${result.usage.total_tokens})`
|
||||
);
|
||||
}
|
||||
|
||||
return result.content;
|
||||
}
|
||||
|
||||
/**
|
||||
* Normalize model name to include provider prefix if missing.
|
||||
* e.g., "gemma3:4b" -> "ollama/gemma3:4b"
|
||||
*/
|
||||
private normalizeModel(model: string): string {
|
||||
if (model.includes('/')) {
|
||||
return model;
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue