mirror of
https://github.com/Memo-2023/mana-monorepo.git
synced 2026-05-14 22:01:09 +02:00
feat(chat): add Google Gemini API support with Flash as default model
- Add @google/generative-ai package for Gemini integration - Update ChatService to support both Azure OpenAI and Gemini providers - Add Gemini 2.5 Flash, Flash-Lite, and Pro models to seed - Set Gemini 2.5 Flash as the default model for fast responses - Add DEV_BYPASS_AUTH mode for local development - Make /api/models endpoint public (no auth required) - Add port 5174 to CORS allowed origins - Add isDefault field to models schema 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
parent
7deb5b9a1e
commit
95bba8ef6e
9 changed files with 426 additions and 211 deletions
|
|
@ -24,6 +24,7 @@
|
|||
"docker:clean": "docker compose down -v --rmi local"
|
||||
},
|
||||
"dependencies": {
|
||||
"@google/generative-ai": "^0.24.1",
|
||||
"@manacore/shared-errors": "workspace:*",
|
||||
"@nestjs/common": "^10.4.15",
|
||||
"@nestjs/config": "^3.3.0",
|
||||
|
|
|
|||
|
|
@ -2,6 +2,7 @@ import { Injectable, Inject, Logger } from '@nestjs/common';
|
|||
import { ConfigService } from '@nestjs/config';
|
||||
import { eq } from 'drizzle-orm';
|
||||
import { type AsyncResult, ok, err, ValidationError, ServiceError } from '@manacore/shared-errors';
|
||||
import { GoogleGenerativeAI } from '@google/generative-ai';
|
||||
import { DATABASE_CONNECTION } from '../db/database.module';
|
||||
import { type Database } from '../db/connection';
|
||||
import { models, type Model } from '../db/schema/models.schema';
|
||||
|
|
@ -10,23 +11,36 @@ import { ChatCompletionDto, ChatCompletionResponseDto } from './dto/chat-complet
|
|||
@Injectable()
|
||||
export class ChatService {
|
||||
private readonly logger = new Logger(ChatService.name);
|
||||
private readonly apiKey: string;
|
||||
private readonly endpoint: string;
|
||||
private readonly apiVersion: string;
|
||||
// Azure OpenAI config
|
||||
private readonly azureApiKey: string;
|
||||
private readonly azureEndpoint: string;
|
||||
private readonly azureApiVersion: string;
|
||||
// Google Gemini config
|
||||
private readonly geminiClient: GoogleGenerativeAI | null = null;
|
||||
|
||||
constructor(
|
||||
private configService: ConfigService,
|
||||
@Inject(DATABASE_CONNECTION) private readonly db: Database
|
||||
) {
|
||||
this.apiKey = this.configService.get<string>('AZURE_OPENAI_API_KEY') || '';
|
||||
this.endpoint =
|
||||
// Azure OpenAI setup
|
||||
this.azureApiKey = this.configService.get<string>('AZURE_OPENAI_API_KEY') || '';
|
||||
this.azureEndpoint =
|
||||
this.configService.get<string>('AZURE_OPENAI_ENDPOINT') ||
|
||||
'https://memoroseopenai.openai.azure.com';
|
||||
this.apiVersion =
|
||||
this.azureApiVersion =
|
||||
this.configService.get<string>('AZURE_OPENAI_API_VERSION') || '2024-12-01-preview';
|
||||
|
||||
if (!this.apiKey) {
|
||||
this.logger.warn('AZURE_OPENAI_API_KEY is not set!');
|
||||
// Google Gemini setup
|
||||
const geminiApiKey = this.configService.get<string>('GOOGLE_GENAI_API_KEY');
|
||||
if (geminiApiKey) {
|
||||
this.geminiClient = new GoogleGenerativeAI(geminiApiKey);
|
||||
this.logger.log('Google Gemini client initialized');
|
||||
} else {
|
||||
this.logger.warn('GOOGLE_GENAI_API_KEY is not set - Gemini models unavailable');
|
||||
}
|
||||
|
||||
if (!this.azureApiKey) {
|
||||
this.logger.warn('AZURE_OPENAI_API_KEY is not set - Azure models unavailable');
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -65,6 +79,101 @@ export class ChatService {
|
|||
this.logger.log(`User ${userId} creating chat completion with model ${dto.modelId}`);
|
||||
}
|
||||
|
||||
// Route to appropriate provider
|
||||
if (model.provider === 'gemini') {
|
||||
return this.createGeminiCompletion(model, dto);
|
||||
} else {
|
||||
return this.createAzureCompletion(model, dto);
|
||||
}
|
||||
}
|
||||
|
||||
private async createGeminiCompletion(
|
||||
model: Model,
|
||||
dto: ChatCompletionDto
|
||||
): AsyncResult<ChatCompletionResponseDto> {
|
||||
if (!this.geminiClient) {
|
||||
return err(ServiceError.externalError('Google Gemini', 'Gemini client not configured'));
|
||||
}
|
||||
|
||||
const params = model.parameters as {
|
||||
model?: string;
|
||||
temperature?: number;
|
||||
max_tokens?: number;
|
||||
} | null;
|
||||
|
||||
const modelName = params?.model || 'gemini-2.5-flash';
|
||||
const temperature = dto.temperature ?? params?.temperature ?? 0.7;
|
||||
const maxTokens = dto.maxTokens ?? params?.max_tokens ?? 8192;
|
||||
|
||||
this.logger.log(`Sending request to Google Gemini model: ${modelName}`);
|
||||
|
||||
try {
|
||||
const genModel = this.geminiClient.getGenerativeModel({
|
||||
model: modelName,
|
||||
generationConfig: {
|
||||
temperature,
|
||||
maxOutputTokens: maxTokens,
|
||||
},
|
||||
});
|
||||
|
||||
// Convert messages to Gemini format
|
||||
// Gemini expects alternating user/model messages, with system as first user message
|
||||
const systemMessages = dto.messages.filter((m) => m.role === 'system');
|
||||
const chatMessages = dto.messages.filter((m) => m.role !== 'system');
|
||||
|
||||
// Build history for chat (all but last message)
|
||||
const history = chatMessages.slice(0, -1).map((msg) => ({
|
||||
role: msg.role === 'user' ? 'user' : 'model',
|
||||
parts: [{ text: msg.content }],
|
||||
}));
|
||||
|
||||
// Last message to send
|
||||
const lastMessage = chatMessages[chatMessages.length - 1];
|
||||
let userPrompt = lastMessage?.content || '';
|
||||
|
||||
// Prepend system instruction if present
|
||||
if (systemMessages.length > 0) {
|
||||
const systemPrompt = systemMessages.map((m) => m.content).join('\n');
|
||||
userPrompt = `${systemPrompt}\n\n${userPrompt}`;
|
||||
}
|
||||
|
||||
const chat = genModel.startChat({ history });
|
||||
const result = await chat.sendMessage(userPrompt);
|
||||
const response = result.response;
|
||||
const messageContent = response.text();
|
||||
|
||||
if (!messageContent) {
|
||||
this.logger.warn('No message content in Gemini response');
|
||||
return err(ServiceError.generationFailed('Google Gemini', 'No response generated'));
|
||||
}
|
||||
|
||||
// Gemini provides usage metadata
|
||||
const usageMetadata = response.usageMetadata;
|
||||
|
||||
return ok({
|
||||
content: messageContent,
|
||||
usage: {
|
||||
prompt_tokens: usageMetadata?.promptTokenCount || 0,
|
||||
completion_tokens: usageMetadata?.candidatesTokenCount || 0,
|
||||
total_tokens: usageMetadata?.totalTokenCount || 0,
|
||||
},
|
||||
});
|
||||
} catch (error) {
|
||||
this.logger.error('Error calling Google Gemini API', error);
|
||||
return err(
|
||||
ServiceError.generationFailed(
|
||||
'Google Gemini',
|
||||
error instanceof Error ? error.message : 'Unknown error',
|
||||
error instanceof Error ? error : undefined
|
||||
)
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
private async createAzureCompletion(
|
||||
model: Model,
|
||||
dto: ChatCompletionDto
|
||||
): AsyncResult<ChatCompletionResponseDto> {
|
||||
const params = model.parameters as {
|
||||
deployment?: string;
|
||||
temperature?: number;
|
||||
|
|
@ -91,16 +200,16 @@ export class ChatService {
|
|||
requestBody.temperature = temperature;
|
||||
}
|
||||
|
||||
const url = `${this.endpoint}/openai/deployments/${deployment}/chat/completions?api-version=${this.apiVersion}`;
|
||||
const url = `${this.azureEndpoint}/openai/deployments/${deployment}/chat/completions?api-version=${this.azureApiVersion}`;
|
||||
|
||||
this.logger.log(`Sending request to: ${url}`);
|
||||
this.logger.log(`Sending request to Azure OpenAI: ${url}`);
|
||||
|
||||
try {
|
||||
const response = await fetch(url, {
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'Content-Type': 'application/json',
|
||||
'api-key': this.apiKey,
|
||||
'api-key': this.azureApiKey,
|
||||
},
|
||||
body: JSON.stringify(requestBody),
|
||||
});
|
||||
|
|
|
|||
|
|
@ -1,12 +1,31 @@
|
|||
import { Injectable, CanActivate, ExecutionContext, UnauthorizedException } from '@nestjs/common';
|
||||
import { ConfigService } from '@nestjs/config';
|
||||
|
||||
// Development test user ID - used when DEV_BYPASS_AUTH=true
|
||||
const DEV_USER_ID = '17cb0be7-058a-4964-9e18-1fe7055fd014';
|
||||
|
||||
@Injectable()
|
||||
export class JwtAuthGuard implements CanActivate {
|
||||
constructor(private configService: ConfigService) {}
|
||||
|
||||
async canActivate(context: ExecutionContext): Promise<boolean> {
|
||||
const request = context.switchToHttp().getRequest();
|
||||
|
||||
// Development mode: bypass auth if DEV_BYPASS_AUTH is set
|
||||
const isDev = this.configService.get<string>('NODE_ENV') === 'development';
|
||||
const bypassAuth = this.configService.get<string>('DEV_BYPASS_AUTH') === 'true';
|
||||
|
||||
if (isDev && bypassAuth) {
|
||||
// Use test user for development
|
||||
request.user = {
|
||||
userId: DEV_USER_ID,
|
||||
email: 'test@example.com',
|
||||
role: 'user',
|
||||
sessionId: 'dev-session',
|
||||
};
|
||||
return true;
|
||||
}
|
||||
|
||||
const token = this.extractTokenFromHeader(request);
|
||||
|
||||
if (!token) {
|
||||
|
|
|
|||
|
|
@ -12,6 +12,7 @@ export const models = pgTable('models', {
|
|||
top_p?: number;
|
||||
}>(),
|
||||
isActive: boolean('is_active').default(true).notNull(),
|
||||
isDefault: boolean('is_default').default(false).notNull(),
|
||||
createdAt: timestamp('created_at', { withTimezone: true }).defaultNow().notNull(),
|
||||
updatedAt: timestamp('updated_at', { withTimezone: true }).defaultNow().notNull(),
|
||||
});
|
||||
|
|
|
|||
|
|
@ -32,41 +32,142 @@ async function seed() {
|
|||
console.log('Seeding AI models...');
|
||||
|
||||
const modelData = [
|
||||
// ============================================
|
||||
// Google Gemini Models (Primary - fast & cost-effective)
|
||||
// ============================================
|
||||
{
|
||||
id: '550e8400-e29b-41d4-a716-446655440000',
|
||||
name: 'GPT-O3-Mini',
|
||||
description: 'Fast, efficient responses for everyday tasks',
|
||||
id: '550e8400-e29b-41d4-a716-446655440101',
|
||||
name: 'Gemini 2.5 Flash',
|
||||
description: 'Fastest & most cost-effective - ideal for everyday tasks',
|
||||
provider: 'gemini',
|
||||
parameters: {
|
||||
model: 'gemini-2.5-flash-preview-05-20',
|
||||
temperature: 0.7,
|
||||
max_tokens: 8192,
|
||||
},
|
||||
isActive: true,
|
||||
isDefault: true, // Default model
|
||||
},
|
||||
{
|
||||
id: '550e8400-e29b-41d4-a716-446655440102',
|
||||
name: 'Gemini 2.5 Flash-Lite',
|
||||
description: 'Ultra-fast lightweight model - minimal latency',
|
||||
provider: 'gemini',
|
||||
parameters: {
|
||||
model: 'gemini-2.5-flash-lite-preview-06-17',
|
||||
temperature: 0.7,
|
||||
max_tokens: 4096,
|
||||
},
|
||||
isActive: true,
|
||||
isDefault: false,
|
||||
},
|
||||
{
|
||||
id: '550e8400-e29b-41d4-a716-446655440103',
|
||||
name: 'Gemini 2.5 Pro',
|
||||
description: 'Most powerful Gemini - complex reasoning & analysis',
|
||||
provider: 'gemini',
|
||||
parameters: {
|
||||
model: 'gemini-2.5-pro-preview-06-05',
|
||||
temperature: 0.7,
|
||||
max_tokens: 16384,
|
||||
},
|
||||
isActive: true,
|
||||
isDefault: false,
|
||||
},
|
||||
// ============================================
|
||||
// Azure OpenAI GPT-5 Family (Alternative)
|
||||
// ============================================
|
||||
{
|
||||
id: '550e8400-e29b-41d4-a716-446655440001',
|
||||
name: 'GPT-5 Mini',
|
||||
description: 'Fast & cost-effective - best for everyday tasks',
|
||||
provider: 'azure',
|
||||
parameters: {
|
||||
temperature: 0.7,
|
||||
max_tokens: 800,
|
||||
deployment: 'gpt-o3-mini-se',
|
||||
max_tokens: 8192,
|
||||
deployment: 'gpt-5-mini',
|
||||
},
|
||||
isActive: true,
|
||||
isDefault: false,
|
||||
},
|
||||
{
|
||||
id: '550e8400-e29b-41d4-a716-446655440002',
|
||||
name: 'GPT-5 Nano',
|
||||
description: 'Ultra-fast responses with low latency',
|
||||
provider: 'azure',
|
||||
parameters: {
|
||||
temperature: 0.7,
|
||||
max_tokens: 4096,
|
||||
deployment: 'gpt-5-nano',
|
||||
},
|
||||
isActive: true,
|
||||
isDefault: false,
|
||||
},
|
||||
{
|
||||
id: '550e8400-e29b-41d4-a716-446655440003',
|
||||
name: 'GPT-5 Chat',
|
||||
description: 'Advanced multimodal conversations with emotional intelligence',
|
||||
provider: 'azure',
|
||||
parameters: {
|
||||
temperature: 0.7,
|
||||
max_tokens: 16384,
|
||||
deployment: 'gpt-5-chat',
|
||||
},
|
||||
isActive: true,
|
||||
isDefault: false,
|
||||
},
|
||||
{
|
||||
id: '550e8400-e29b-41d4-a716-446655440004',
|
||||
name: 'GPT-4o-Mini',
|
||||
description: 'Compact and powerful for complex tasks',
|
||||
name: 'GPT-5',
|
||||
description: 'Most powerful LLM - logic-heavy & multi-step tasks',
|
||||
provider: 'azure',
|
||||
parameters: {
|
||||
temperature: 0.7,
|
||||
max_tokens: 1000,
|
||||
deployment: 'gpt-4o-mini-se',
|
||||
max_tokens: 32768,
|
||||
deployment: 'gpt-5',
|
||||
},
|
||||
isActive: true,
|
||||
isDefault: false,
|
||||
},
|
||||
{
|
||||
id: '550e8400-e29b-41d4-a716-446655440005',
|
||||
name: 'GPT-4o',
|
||||
description: 'Most advanced model for demanding tasks',
|
||||
name: 'GPT-5 Codex',
|
||||
description: 'Optimized for coding & front-end development',
|
||||
provider: 'azure',
|
||||
parameters: {
|
||||
temperature: 0.7,
|
||||
max_tokens: 2000,
|
||||
deployment: 'gpt-4o-se',
|
||||
max_tokens: 32768,
|
||||
deployment: 'gpt-5-codex',
|
||||
},
|
||||
isActive: true,
|
||||
isDefault: false,
|
||||
},
|
||||
// O-Series Reasoning Models
|
||||
{
|
||||
id: '550e8400-e29b-41d4-a716-446655440006',
|
||||
name: 'o4-mini',
|
||||
description: 'Latest reasoning model - best for STEM & code',
|
||||
provider: 'azure',
|
||||
parameters: {
|
||||
temperature: 1, // Reasoning models work best with temp=1
|
||||
max_tokens: 16384,
|
||||
deployment: 'o4-mini',
|
||||
},
|
||||
isActive: true,
|
||||
isDefault: false,
|
||||
},
|
||||
{
|
||||
id: '550e8400-e29b-41d4-a716-446655440007',
|
||||
name: 'o3',
|
||||
description: 'Advanced reasoning - 20% fewer errors than o1',
|
||||
provider: 'azure',
|
||||
parameters: {
|
||||
temperature: 1,
|
||||
max_tokens: 32768,
|
||||
deployment: 'o3',
|
||||
},
|
||||
isActive: true,
|
||||
isDefault: false,
|
||||
},
|
||||
];
|
||||
|
||||
|
|
|
|||
|
|
@ -10,7 +10,8 @@ async function bootstrap() {
|
|||
origin: [
|
||||
'http://localhost:3000',
|
||||
'http://localhost:5173',
|
||||
'http://localhost:5178', // Chat web app
|
||||
'http://localhost:5174', // Chat web app (dev server port)
|
||||
'http://localhost:5178', // Chat web app (alternative)
|
||||
'http://localhost:8081',
|
||||
'exp://localhost:8081',
|
||||
'http://localhost:3001', // Mana Core Auth
|
||||
|
|
|
|||
|
|
@ -1,11 +1,10 @@
|
|||
import { Controller, Get, Param, UseGuards } from '@nestjs/common';
|
||||
import { Controller, Get, Param } from '@nestjs/common';
|
||||
import { isOk } from '@manacore/shared-errors';
|
||||
import { ModelService } from './model.service';
|
||||
import { type Model } from '../db/schema/models.schema';
|
||||
import { JwtAuthGuard } from '../common/guards/jwt-auth.guard';
|
||||
|
||||
// Models are publicly accessible - no auth required to list available models
|
||||
@Controller('models')
|
||||
@UseGuards(JwtAuthGuard)
|
||||
export class ModelController {
|
||||
constructor(private readonly modelService: ModelService) {}
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue