feat(chat): add Google Gemini API support with Flash as default model

- Add @google/generative-ai package for Gemini integration - Update ChatService to support both Azure OpenAI and Gemini providers - Add Gemini 2.5 Flash, Flash-Lite, and Pro models to seed - Set Gemini 2.5 Flash as the default model for fast responses - Add DEV_BYPASS_AUTH mode for local development - Make /api/models endpoint public (no auth required) - Add port 5174 to CORS allowed origins - Add isDefault field to models schema 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
2026-05-14 22:01:09 +02:00 · 2025-11-29 13:22:58 +01:00 · 2025-11-29 13:22:58 +01:00 · 95bba8ef6e
commit 95bba8ef6e
parent 7deb5b9a1e
9 changed files with 426 additions and 211 deletions
--- a/apps/chat/apps/backend/package.json
+++ b/apps/chat/apps/backend/package.json
@ -24,6 +24,7 @@
 		"docker:clean": "docker compose down -v --rmi local"
 	},
 	"dependencies": {
+		"@google/generative-ai": "^0.24.1",
 		"@manacore/shared-errors": "workspace:*",
 		"@nestjs/common": "^10.4.15",
 		"@nestjs/config": "^3.3.0",
--- a/apps/chat/apps/backend/src/chat/chat.service.ts
+++ b/apps/chat/apps/backend/src/chat/chat.service.ts
@ -2,6 +2,7 @@ import { Injectable, Inject, Logger } from '@nestjs/common';
 import { ConfigService } from '@nestjs/config';
 import { eq } from 'drizzle-orm';
 import { type AsyncResult, ok, err, ValidationError, ServiceError } from '@manacore/shared-errors';
+import { GoogleGenerativeAI } from '@google/generative-ai';
 import { DATABASE_CONNECTION } from '../db/database.module';
 import { type Database } from '../db/connection';
 import { models, type Model } from '../db/schema/models.schema';
@ -10,23 +11,36 @@ import { ChatCompletionDto, ChatCompletionResponseDto } from './dto/chat-complet
@Injectable()
 export class ChatService {
 	private readonly logger = new Logger(ChatService.name);
-	private readonly apiKey: string;
-	private readonly endpoint: string;
-	private readonly apiVersion: string;
+	// Azure OpenAI config
+	private readonly azureApiKey: string;
+	private readonly azureEndpoint: string;
+	private readonly azureApiVersion: string;
+	// Google Gemini config
+	private readonly geminiClient: GoogleGenerativeAI | null = null;

 	constructor(
 		private configService: ConfigService,
 		@Inject(DATABASE_CONNECTION) private readonly db: Database
 	) {
-		this.apiKey = this.configService.get<string>('AZURE_OPENAI_API_KEY') || '';
-		this.endpoint =
+		// Azure OpenAI setup
+		this.azureApiKey = this.configService.get<string>('AZURE_OPENAI_API_KEY') || '';
+		this.azureEndpoint =
 			this.configService.get<string>('AZURE_OPENAI_ENDPOINT') ||
 			'https://memoroseopenai.openai.azure.com';
-		this.apiVersion =
+		this.azureApiVersion =
 			this.configService.get<string>('AZURE_OPENAI_API_VERSION') || '2024-12-01-preview';

-		if (!this.apiKey) {
-			this.logger.warn('AZURE_OPENAI_API_KEY is not set!');
+		// Google Gemini setup
+		const geminiApiKey = this.configService.get<string>('GOOGLE_GENAI_API_KEY');
+		if (geminiApiKey) {
+			this.geminiClient = new GoogleGenerativeAI(geminiApiKey);
+			this.logger.log('Google Gemini client initialized');
+		} else {
+			this.logger.warn('GOOGLE_GENAI_API_KEY is not set - Gemini models unavailable');
+		}
+
+		if (!this.azureApiKey) {
+			this.logger.warn('AZURE_OPENAI_API_KEY is not set - Azure models unavailable');
 		}
 	}

@ -65,6 +79,101 @@ export class ChatService {
 			this.logger.log(`User ${userId} creating chat completion with model ${dto.modelId}`);
 		}

+		// Route to appropriate provider
+		if (model.provider === 'gemini') {
+			return this.createGeminiCompletion(model, dto);
+		} else {
+			return this.createAzureCompletion(model, dto);
+		}
+	}
+
+	private async createGeminiCompletion(
+		model: Model,
+		dto: ChatCompletionDto
+	): AsyncResult<ChatCompletionResponseDto> {
+		if (!this.geminiClient) {
+			return err(ServiceError.externalError('Google Gemini', 'Gemini client not configured'));
+		}
+
+		const params = model.parameters as {
+			model?: string;
+			temperature?: number;
+			max_tokens?: number;
+		} | null;
+
+		const modelName = params?.model || 'gemini-2.5-flash';
+		const temperature = dto.temperature ?? params?.temperature ?? 0.7;
+		const maxTokens = dto.maxTokens ?? params?.max_tokens ?? 8192;
+
+		this.logger.log(`Sending request to Google Gemini model: ${modelName}`);
+
+		try {
+			const genModel = this.geminiClient.getGenerativeModel({
+				model: modelName,
+				generationConfig: {
+					temperature,
+					maxOutputTokens: maxTokens,
+				},
+			});
+
+			// Convert messages to Gemini format
+			// Gemini expects alternating user/model messages, with system as first user message
+			const systemMessages = dto.messages.filter((m) => m.role === 'system');
+			const chatMessages = dto.messages.filter((m) => m.role !== 'system');
+
+			// Build history for chat (all but last message)
+			const history = chatMessages.slice(0, -1).map((msg) => ({
+				role: msg.role === 'user' ? 'user' : 'model',
+				parts: [{ text: msg.content }],
+			}));
+
+			// Last message to send
+			const lastMessage = chatMessages[chatMessages.length - 1];
+			let userPrompt = lastMessage?.content || '';
+
+			// Prepend system instruction if present
+			if (systemMessages.length > 0) {
+				const systemPrompt = systemMessages.map((m) => m.content).join('\n');
+				userPrompt = `${systemPrompt}\n\n${userPrompt}`;
+			}
+
+			const chat = genModel.startChat({ history });
+			const result = await chat.sendMessage(userPrompt);
+			const response = result.response;
+			const messageContent = response.text();
+
+			if (!messageContent) {
+				this.logger.warn('No message content in Gemini response');
+				return err(ServiceError.generationFailed('Google Gemini', 'No response generated'));
+			}
+
+			// Gemini provides usage metadata
+			const usageMetadata = response.usageMetadata;
+
+			return ok({
+				content: messageContent,
+				usage: {
+					prompt_tokens: usageMetadata?.promptTokenCount || 0,
+					completion_tokens: usageMetadata?.candidatesTokenCount || 0,
+					total_tokens: usageMetadata?.totalTokenCount || 0,
+				},
+			});
+		} catch (error) {
+			this.logger.error('Error calling Google Gemini API', error);
+			return err(
+				ServiceError.generationFailed(
+					'Google Gemini',
+					error instanceof Error ? error.message : 'Unknown error',
+					error instanceof Error ? error : undefined
+				)
+			);
+		}
+	}
+
+	private async createAzureCompletion(
+		model: Model,
+		dto: ChatCompletionDto
+	): AsyncResult<ChatCompletionResponseDto> {
 		const params = model.parameters as {
 			deployment?: string;
 			temperature?: number;
@ -91,16 +200,16 @@ export class ChatService {
 			requestBody.temperature = temperature;
 		}

-		const url = `${this.endpoint}/openai/deployments/${deployment}/chat/completions?api-version=${this.apiVersion}`;
+		const url = `${this.azureEndpoint}/openai/deployments/${deployment}/chat/completions?api-version=${this.azureApiVersion}`;

-		this.logger.log(`Sending request to: ${url}`);
+		this.logger.log(`Sending request to Azure OpenAI: ${url}`);

 		try {
 			const response = await fetch(url, {
 				method: 'POST',
 				headers: {
 					'Content-Type': 'application/json',
-					'api-key': this.apiKey,
+					'api-key': this.azureApiKey,
 				},
 				body: JSON.stringify(requestBody),
 			});
--- a/apps/chat/apps/backend/src/common/guards/jwt-auth.guard.ts
+++ b/apps/chat/apps/backend/src/common/guards/jwt-auth.guard.ts
@ -1,12 +1,31 @@
 import { Injectable, CanActivate, ExecutionContext, UnauthorizedException } from '@nestjs/common';
 import { ConfigService } from '@nestjs/config';

+// Development test user ID - used when DEV_BYPASS_AUTH=true
+const DEV_USER_ID = '17cb0be7-058a-4964-9e18-1fe7055fd014';
+
@Injectable()
 export class JwtAuthGuard implements CanActivate {
 	constructor(private configService: ConfigService) {}

 	async canActivate(context: ExecutionContext): Promise<boolean> {
 		const request = context.switchToHttp().getRequest();
+
+		// Development mode: bypass auth if DEV_BYPASS_AUTH is set
+		const isDev = this.configService.get<string>('NODE_ENV') === 'development';
+		const bypassAuth = this.configService.get<string>('DEV_BYPASS_AUTH') === 'true';
+
+		if (isDev && bypassAuth) {
+			// Use test user for development
+			request.user = {
+				userId: DEV_USER_ID,
+				email: 'test@example.com',
+				role: 'user',
+				sessionId: 'dev-session',
+			};
+			return true;
+		}
+
 		const token = this.extractTokenFromHeader(request);

 		if (!token) {
--- a/apps/chat/apps/backend/src/db/schema/models.schema.ts
+++ b/apps/chat/apps/backend/src/db/schema/models.schema.ts
@ -12,6 +12,7 @@ export const models = pgTable('models', {
 		top_p?: number;
 	}>(),
 	isActive: boolean('is_active').default(true).notNull(),
+	isDefault: boolean('is_default').default(false).notNull(),
 	createdAt: timestamp('created_at', { withTimezone: true }).defaultNow().notNull(),
 	updatedAt: timestamp('updated_at', { withTimezone: true }).defaultNow().notNull(),
 });
--- a/apps/chat/apps/backend/src/db/seed.ts
+++ b/apps/chat/apps/backend/src/db/seed.ts
@ -32,41 +32,142 @@ async function seed() {
 		console.log('Seeding AI models...');

 		const modelData = [
+			// ============================================
+			// Google Gemini Models (Primary - fast & cost-effective)
+			// ============================================
 			{
-				id: '550e8400-e29b-41d4-a716-446655440000',
-				name: 'GPT-O3-Mini',
-				description: 'Fast, efficient responses for everyday tasks',
+				id: '550e8400-e29b-41d4-a716-446655440101',
+				name: 'Gemini 2.5 Flash',
+				description: 'Fastest & most cost-effective - ideal for everyday tasks',
+				provider: 'gemini',
+				parameters: {
+					model: 'gemini-2.5-flash-preview-05-20',
+					temperature: 0.7,
+					max_tokens: 8192,
+				},
+				isActive: true,
+				isDefault: true, // Default model
+			},
+			{
+				id: '550e8400-e29b-41d4-a716-446655440102',
+				name: 'Gemini 2.5 Flash-Lite',
+				description: 'Ultra-fast lightweight model - minimal latency',
+				provider: 'gemini',
+				parameters: {
+					model: 'gemini-2.5-flash-lite-preview-06-17',
+					temperature: 0.7,
+					max_tokens: 4096,
+				},
+				isActive: true,
+				isDefault: false,
+			},
+			{
+				id: '550e8400-e29b-41d4-a716-446655440103',
+				name: 'Gemini 2.5 Pro',
+				description: 'Most powerful Gemini - complex reasoning & analysis',
+				provider: 'gemini',
+				parameters: {
+					model: 'gemini-2.5-pro-preview-06-05',
+					temperature: 0.7,
+					max_tokens: 16384,
+				},
+				isActive: true,
+				isDefault: false,
+			},
+			// ============================================
+			// Azure OpenAI GPT-5 Family (Alternative)
+			// ============================================
+			{
+				id: '550e8400-e29b-41d4-a716-446655440001',
+				name: 'GPT-5 Mini',
+				description: 'Fast & cost-effective - best for everyday tasks',
 				provider: 'azure',
 				parameters: {
 					temperature: 0.7,
-					max_tokens: 800,
-					deployment: 'gpt-o3-mini-se',
+					max_tokens: 8192,
+					deployment: 'gpt-5-mini',
 				},
 				isActive: true,
+				isDefault: false,
+			},
+			{
+				id: '550e8400-e29b-41d4-a716-446655440002',
+				name: 'GPT-5 Nano',
+				description: 'Ultra-fast responses with low latency',
+				provider: 'azure',
+				parameters: {
+					temperature: 0.7,
+					max_tokens: 4096,
+					deployment: 'gpt-5-nano',
+				},
+				isActive: true,
+				isDefault: false,
+			},
+			{
+				id: '550e8400-e29b-41d4-a716-446655440003',
+				name: 'GPT-5 Chat',
+				description: 'Advanced multimodal conversations with emotional intelligence',
+				provider: 'azure',
+				parameters: {
+					temperature: 0.7,
+					max_tokens: 16384,
+					deployment: 'gpt-5-chat',
+				},
+				isActive: true,
+				isDefault: false,
 			},
 			{
 				id: '550e8400-e29b-41d4-a716-446655440004',
-				name: 'GPT-4o-Mini',
-				description: 'Compact and powerful for complex tasks',
+				name: 'GPT-5',
+				description: 'Most powerful LLM - logic-heavy & multi-step tasks',
 				provider: 'azure',
 				parameters: {
 					temperature: 0.7,
-					max_tokens: 1000,
-					deployment: 'gpt-4o-mini-se',
+					max_tokens: 32768,
+					deployment: 'gpt-5',
 				},
 				isActive: true,
+				isDefault: false,
 			},
 			{
 				id: '550e8400-e29b-41d4-a716-446655440005',
-				name: 'GPT-4o',
-				description: 'Most advanced model for demanding tasks',
+				name: 'GPT-5 Codex',
+				description: 'Optimized for coding & front-end development',
 				provider: 'azure',
 				parameters: {
 					temperature: 0.7,
-					max_tokens: 2000,
-					deployment: 'gpt-4o-se',
+					max_tokens: 32768,
+					deployment: 'gpt-5-codex',
 				},
 				isActive: true,
+				isDefault: false,
+			},
+			// O-Series Reasoning Models
+			{
+				id: '550e8400-e29b-41d4-a716-446655440006',
+				name: 'o4-mini',
+				description: 'Latest reasoning model - best for STEM & code',
+				provider: 'azure',
+				parameters: {
+					temperature: 1, // Reasoning models work best with temp=1
+					max_tokens: 16384,
+					deployment: 'o4-mini',
+				},
+				isActive: true,
+				isDefault: false,
+			},
+			{
+				id: '550e8400-e29b-41d4-a716-446655440007',
+				name: 'o3',
+				description: 'Advanced reasoning - 20% fewer errors than o1',
+				provider: 'azure',
+				parameters: {
+					temperature: 1,
+					max_tokens: 32768,
+					deployment: 'o3',
+				},
+				isActive: true,
+				isDefault: false,
 			},
 		];

--- a/apps/chat/apps/backend/src/main.ts
+++ b/apps/chat/apps/backend/src/main.ts
@ -10,7 +10,8 @@ async function bootstrap() {
 		origin: [
 			'http://localhost:3000',
 			'http://localhost:5173',
-			'http://localhost:5178', // Chat web app
+			'http://localhost:5174', // Chat web app (dev server port)
+			'http://localhost:5178', // Chat web app (alternative)
 			'http://localhost:8081',
 			'exp://localhost:8081',
 			'http://localhost:3001', // Mana Core Auth
--- a/apps/chat/apps/backend/src/model/model.controller.ts
+++ b/apps/chat/apps/backend/src/model/model.controller.ts
@ -1,11 +1,10 @@
-import { Controller, Get, Param, UseGuards } from '@nestjs/common';
+import { Controller, Get, Param } from '@nestjs/common';
 import { isOk } from '@manacore/shared-errors';
 import { ModelService } from './model.service';
 import { type Model } from '../db/schema/models.schema';
-import { JwtAuthGuard } from '../common/guards/jwt-auth.guard';

+// Models are publicly accessible - no auth required to list available models
@Controller('models')
-@UseGuards(JwtAuthGuard)
 export class ModelController {
 	constructor(private readonly modelService: ModelService) {}