feat(chat): add Google Gemini API support with Flash as default model

- Add @google/generative-ai package for Gemini integration
- Update ChatService to support both Azure OpenAI and Gemini providers
- Add Gemini 2.5 Flash, Flash-Lite, and Pro models to seed
- Set Gemini 2.5 Flash as the default model for fast responses
- Add DEV_BYPASS_AUTH mode for local development
- Make /api/models endpoint public (no auth required)
- Add port 5174 to CORS allowed origins
- Add isDefault field to models schema

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
Till-JS 2025-11-29 13:22:58 +01:00
parent 7deb5b9a1e
commit 95bba8ef6e
9 changed files with 426 additions and 211 deletions

View file

@ -24,6 +24,7 @@
"docker:clean": "docker compose down -v --rmi local"
},
"dependencies": {
"@google/generative-ai": "^0.24.1",
"@manacore/shared-errors": "workspace:*",
"@nestjs/common": "^10.4.15",
"@nestjs/config": "^3.3.0",

View file

@ -2,6 +2,7 @@ import { Injectable, Inject, Logger } from '@nestjs/common';
import { ConfigService } from '@nestjs/config';
import { eq } from 'drizzle-orm';
import { type AsyncResult, ok, err, ValidationError, ServiceError } from '@manacore/shared-errors';
import { GoogleGenerativeAI } from '@google/generative-ai';
import { DATABASE_CONNECTION } from '../db/database.module';
import { type Database } from '../db/connection';
import { models, type Model } from '../db/schema/models.schema';
@ -10,23 +11,36 @@ import { ChatCompletionDto, ChatCompletionResponseDto } from './dto/chat-complet
@Injectable()
export class ChatService {
private readonly logger = new Logger(ChatService.name);
private readonly apiKey: string;
private readonly endpoint: string;
private readonly apiVersion: string;
// Azure OpenAI config
private readonly azureApiKey: string;
private readonly azureEndpoint: string;
private readonly azureApiVersion: string;
// Google Gemini config
private readonly geminiClient: GoogleGenerativeAI | null = null;
constructor(
private configService: ConfigService,
@Inject(DATABASE_CONNECTION) private readonly db: Database
) {
this.apiKey = this.configService.get<string>('AZURE_OPENAI_API_KEY') || '';
this.endpoint =
// Azure OpenAI setup
this.azureApiKey = this.configService.get<string>('AZURE_OPENAI_API_KEY') || '';
this.azureEndpoint =
this.configService.get<string>('AZURE_OPENAI_ENDPOINT') ||
'https://memoroseopenai.openai.azure.com';
this.apiVersion =
this.azureApiVersion =
this.configService.get<string>('AZURE_OPENAI_API_VERSION') || '2024-12-01-preview';
if (!this.apiKey) {
this.logger.warn('AZURE_OPENAI_API_KEY is not set!');
// Google Gemini setup
const geminiApiKey = this.configService.get<string>('GOOGLE_GENAI_API_KEY');
if (geminiApiKey) {
this.geminiClient = new GoogleGenerativeAI(geminiApiKey);
this.logger.log('Google Gemini client initialized');
} else {
this.logger.warn('GOOGLE_GENAI_API_KEY is not set - Gemini models unavailable');
}
if (!this.azureApiKey) {
this.logger.warn('AZURE_OPENAI_API_KEY is not set - Azure models unavailable');
}
}
@ -65,6 +79,101 @@ export class ChatService {
this.logger.log(`User ${userId} creating chat completion with model ${dto.modelId}`);
}
// Route to appropriate provider
if (model.provider === 'gemini') {
return this.createGeminiCompletion(model, dto);
} else {
return this.createAzureCompletion(model, dto);
}
}
private async createGeminiCompletion(
model: Model,
dto: ChatCompletionDto
): AsyncResult<ChatCompletionResponseDto> {
if (!this.geminiClient) {
return err(ServiceError.externalError('Google Gemini', 'Gemini client not configured'));
}
const params = model.parameters as {
model?: string;
temperature?: number;
max_tokens?: number;
} | null;
const modelName = params?.model || 'gemini-2.5-flash';
const temperature = dto.temperature ?? params?.temperature ?? 0.7;
const maxTokens = dto.maxTokens ?? params?.max_tokens ?? 8192;
this.logger.log(`Sending request to Google Gemini model: ${modelName}`);
try {
const genModel = this.geminiClient.getGenerativeModel({
model: modelName,
generationConfig: {
temperature,
maxOutputTokens: maxTokens,
},
});
// Convert messages to Gemini format
// Gemini expects alternating user/model messages, with system as first user message
const systemMessages = dto.messages.filter((m) => m.role === 'system');
const chatMessages = dto.messages.filter((m) => m.role !== 'system');
// Build history for chat (all but last message)
const history = chatMessages.slice(0, -1).map((msg) => ({
role: msg.role === 'user' ? 'user' : 'model',
parts: [{ text: msg.content }],
}));
// Last message to send
const lastMessage = chatMessages[chatMessages.length - 1];
let userPrompt = lastMessage?.content || '';
// Prepend system instruction if present
if (systemMessages.length > 0) {
const systemPrompt = systemMessages.map((m) => m.content).join('\n');
userPrompt = `${systemPrompt}\n\n${userPrompt}`;
}
const chat = genModel.startChat({ history });
const result = await chat.sendMessage(userPrompt);
const response = result.response;
const messageContent = response.text();
if (!messageContent) {
this.logger.warn('No message content in Gemini response');
return err(ServiceError.generationFailed('Google Gemini', 'No response generated'));
}
// Gemini provides usage metadata
const usageMetadata = response.usageMetadata;
return ok({
content: messageContent,
usage: {
prompt_tokens: usageMetadata?.promptTokenCount || 0,
completion_tokens: usageMetadata?.candidatesTokenCount || 0,
total_tokens: usageMetadata?.totalTokenCount || 0,
},
});
} catch (error) {
this.logger.error('Error calling Google Gemini API', error);
return err(
ServiceError.generationFailed(
'Google Gemini',
error instanceof Error ? error.message : 'Unknown error',
error instanceof Error ? error : undefined
)
);
}
}
private async createAzureCompletion(
model: Model,
dto: ChatCompletionDto
): AsyncResult<ChatCompletionResponseDto> {
const params = model.parameters as {
deployment?: string;
temperature?: number;
@ -91,16 +200,16 @@ export class ChatService {
requestBody.temperature = temperature;
}
const url = `${this.endpoint}/openai/deployments/${deployment}/chat/completions?api-version=${this.apiVersion}`;
const url = `${this.azureEndpoint}/openai/deployments/${deployment}/chat/completions?api-version=${this.azureApiVersion}`;
this.logger.log(`Sending request to: ${url}`);
this.logger.log(`Sending request to Azure OpenAI: ${url}`);
try {
const response = await fetch(url, {
method: 'POST',
headers: {
'Content-Type': 'application/json',
'api-key': this.apiKey,
'api-key': this.azureApiKey,
},
body: JSON.stringify(requestBody),
});

View file

@ -1,12 +1,31 @@
import { Injectable, CanActivate, ExecutionContext, UnauthorizedException } from '@nestjs/common';
import { ConfigService } from '@nestjs/config';
// Development test user ID - used when DEV_BYPASS_AUTH=true
const DEV_USER_ID = '17cb0be7-058a-4964-9e18-1fe7055fd014';
@Injectable()
export class JwtAuthGuard implements CanActivate {
constructor(private configService: ConfigService) {}
async canActivate(context: ExecutionContext): Promise<boolean> {
const request = context.switchToHttp().getRequest();
// Development mode: bypass auth if DEV_BYPASS_AUTH is set
const isDev = this.configService.get<string>('NODE_ENV') === 'development';
const bypassAuth = this.configService.get<string>('DEV_BYPASS_AUTH') === 'true';
if (isDev && bypassAuth) {
// Use test user for development
request.user = {
userId: DEV_USER_ID,
email: 'test@example.com',
role: 'user',
sessionId: 'dev-session',
};
return true;
}
const token = this.extractTokenFromHeader(request);
if (!token) {

View file

@ -12,6 +12,7 @@ export const models = pgTable('models', {
top_p?: number;
}>(),
isActive: boolean('is_active').default(true).notNull(),
isDefault: boolean('is_default').default(false).notNull(),
createdAt: timestamp('created_at', { withTimezone: true }).defaultNow().notNull(),
updatedAt: timestamp('updated_at', { withTimezone: true }).defaultNow().notNull(),
});

View file

@ -32,41 +32,142 @@ async function seed() {
console.log('Seeding AI models...');
const modelData = [
// ============================================
// Google Gemini Models (Primary - fast & cost-effective)
// ============================================
{
id: '550e8400-e29b-41d4-a716-446655440000',
name: 'GPT-O3-Mini',
description: 'Fast, efficient responses for everyday tasks',
id: '550e8400-e29b-41d4-a716-446655440101',
name: 'Gemini 2.5 Flash',
description: 'Fastest & most cost-effective - ideal for everyday tasks',
provider: 'gemini',
parameters: {
model: 'gemini-2.5-flash-preview-05-20',
temperature: 0.7,
max_tokens: 8192,
},
isActive: true,
isDefault: true, // Default model
},
{
id: '550e8400-e29b-41d4-a716-446655440102',
name: 'Gemini 2.5 Flash-Lite',
description: 'Ultra-fast lightweight model - minimal latency',
provider: 'gemini',
parameters: {
model: 'gemini-2.5-flash-lite-preview-06-17',
temperature: 0.7,
max_tokens: 4096,
},
isActive: true,
isDefault: false,
},
{
id: '550e8400-e29b-41d4-a716-446655440103',
name: 'Gemini 2.5 Pro',
description: 'Most powerful Gemini - complex reasoning & analysis',
provider: 'gemini',
parameters: {
model: 'gemini-2.5-pro-preview-06-05',
temperature: 0.7,
max_tokens: 16384,
},
isActive: true,
isDefault: false,
},
// ============================================
// Azure OpenAI GPT-5 Family (Alternative)
// ============================================
{
id: '550e8400-e29b-41d4-a716-446655440001',
name: 'GPT-5 Mini',
description: 'Fast & cost-effective - best for everyday tasks',
provider: 'azure',
parameters: {
temperature: 0.7,
max_tokens: 800,
deployment: 'gpt-o3-mini-se',
max_tokens: 8192,
deployment: 'gpt-5-mini',
},
isActive: true,
isDefault: false,
},
{
id: '550e8400-e29b-41d4-a716-446655440002',
name: 'GPT-5 Nano',
description: 'Ultra-fast responses with low latency',
provider: 'azure',
parameters: {
temperature: 0.7,
max_tokens: 4096,
deployment: 'gpt-5-nano',
},
isActive: true,
isDefault: false,
},
{
id: '550e8400-e29b-41d4-a716-446655440003',
name: 'GPT-5 Chat',
description: 'Advanced multimodal conversations with emotional intelligence',
provider: 'azure',
parameters: {
temperature: 0.7,
max_tokens: 16384,
deployment: 'gpt-5-chat',
},
isActive: true,
isDefault: false,
},
{
id: '550e8400-e29b-41d4-a716-446655440004',
name: 'GPT-4o-Mini',
description: 'Compact and powerful for complex tasks',
name: 'GPT-5',
description: 'Most powerful LLM - logic-heavy & multi-step tasks',
provider: 'azure',
parameters: {
temperature: 0.7,
max_tokens: 1000,
deployment: 'gpt-4o-mini-se',
max_tokens: 32768,
deployment: 'gpt-5',
},
isActive: true,
isDefault: false,
},
{
id: '550e8400-e29b-41d4-a716-446655440005',
name: 'GPT-4o',
description: 'Most advanced model for demanding tasks',
name: 'GPT-5 Codex',
description: 'Optimized for coding & front-end development',
provider: 'azure',
parameters: {
temperature: 0.7,
max_tokens: 2000,
deployment: 'gpt-4o-se',
max_tokens: 32768,
deployment: 'gpt-5-codex',
},
isActive: true,
isDefault: false,
},
// O-Series Reasoning Models
{
id: '550e8400-e29b-41d4-a716-446655440006',
name: 'o4-mini',
description: 'Latest reasoning model - best for STEM & code',
provider: 'azure',
parameters: {
temperature: 1, // Reasoning models work best with temp=1
max_tokens: 16384,
deployment: 'o4-mini',
},
isActive: true,
isDefault: false,
},
{
id: '550e8400-e29b-41d4-a716-446655440007',
name: 'o3',
description: 'Advanced reasoning - 20% fewer errors than o1',
provider: 'azure',
parameters: {
temperature: 1,
max_tokens: 32768,
deployment: 'o3',
},
isActive: true,
isDefault: false,
},
];

View file

@ -10,7 +10,8 @@ async function bootstrap() {
origin: [
'http://localhost:3000',
'http://localhost:5173',
'http://localhost:5178', // Chat web app
'http://localhost:5174', // Chat web app (dev server port)
'http://localhost:5178', // Chat web app (alternative)
'http://localhost:8081',
'exp://localhost:8081',
'http://localhost:3001', // Mana Core Auth

View file

@ -1,11 +1,10 @@
import { Controller, Get, Param, UseGuards } from '@nestjs/common';
import { Controller, Get, Param } from '@nestjs/common';
import { isOk } from '@manacore/shared-errors';
import { ModelService } from './model.service';
import { type Model } from '../db/schema/models.schema';
import { JwtAuthGuard } from '../common/guards/jwt-auth.guard';
// Models are publicly accessible - no auth required to list available models
@Controller('models')
@UseGuards(JwtAuthGuard)
export class ModelController {
constructor(private readonly modelService: ModelService) {}