From 3f9bc5761b09644e51e94d2f326760252d86bbfe Mon Sep 17 00:00:00 2001 From: Wuesteon Date: Fri, 12 Dec 2025 20:48:10 +0100 Subject: [PATCH] =?UTF-8?q?=E2=99=BB=EF=B8=8F=20refactor(chat):=20simplify?= =?UTF-8?q?=20chat=20service=20and=20update=20seed=20data?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Remove unused code from chat.service.ts - Simplify seed.ts with fewer AI models - Update CLAUDE.md documentation - Update .env.example - Remove unused package dependency - Minor UI fix in chat page --- apps/chat/CLAUDE.md | 98 +++++---- apps/chat/apps/backend/.env.example | 14 +- apps/chat/apps/backend/package.json | 1 - .../apps/backend/src/chat/chat.service.ts | 206 +----------------- apps/chat/apps/backend/src/db/seed.ts | 141 +----------- .../routes/(protected)/chat/[id]/+page.svelte | 8 +- 6 files changed, 75 insertions(+), 393 deletions(-) diff --git a/apps/chat/CLAUDE.md b/apps/chat/CLAUDE.md index 107216de4..0632271fd 100644 --- a/apps/chat/CLAUDE.md +++ b/apps/chat/CLAUDE.md @@ -24,6 +24,7 @@ pnpm dev:chat:mobile # Start mobile app pnpm dev:chat:web # Start web app pnpm dev:chat:landing # Start landing page pnpm dev:chat:backend # Start backend server +pnpm dev:chat:full # Start backend + web + auth together ``` ### Mobile App (chat/apps/mobile) @@ -43,6 +44,9 @@ pnpm build:prod # Build production version pnpm start:dev # Start with hot reload pnpm build # Build for production pnpm start:prod # Start production server +pnpm db:push # Push schema to database +pnpm db:seed # Seed AI models +pnpm db:studio # Open Drizzle Studio ``` ### Web App (chat/apps/web) @@ -66,7 +70,8 @@ pnpm preview # Preview production build - **Mobile**: React Native 0.76.7 + Expo SDK 52, NativeWind, Expo Router - **Web**: SvelteKit 2.x, Svelte 5, Tailwind CSS 4 - **Landing**: Astro 5.16, Tailwind CSS -- **Backend**: NestJS 10, OpenRouter/Gemini AI, Supabase +- **Backend**: NestJS 10, OpenRouter AI, Drizzle ORM, PostgreSQL +- **Auth**: Mana Core Auth (JWT) - **Types**: TypeScript 5.x ## Architecture @@ -75,34 +80,45 @@ pnpm preview # Preview production build | Endpoint | Method | Description | | --------------------------------- | ------ | --------------------------- | -| `/api/health` | GET | Health check | -| `/api/chat/models` | GET | List available AI models | -| `/api/chat/completions` | POST | Create chat completion | -| `/api/conversations` | GET | List user conversations | -| `/api/conversations/:id` | GET | Get conversation details | -| `/api/conversations/:id/messages` | GET | Get conversation messages | -| `/api/conversations` | POST | Create new conversation | -| `/api/conversations/:id/messages` | POST | Add message to conversation | +| `/api/v1/health` | GET | Health check | +| `/api/v1/chat/models` | GET | List available AI models | +| `/api/v1/chat/completions` | POST | Create chat completion | +| `/api/v1/conversations` | GET | List user conversations | +| `/api/v1/conversations/:id` | GET | Get conversation details | +| `/api/v1/conversations/:id/messages` | GET | Get conversation messages | +| `/api/v1/conversations` | POST | Create new conversation | +| `/api/v1/conversations/:id/messages` | POST | Add message to conversation | ### Environment Variables #### Backend (.env) -``` -OPENROUTER_API_KEY=... # Get at https://openrouter.ai/keys -GOOGLE_GENAI_API_KEY=... # Optional: For Gemini models -SUPABASE_URL=https://... -SUPABASE_SERVICE_KEY=... +```env +# Required - All AI models via OpenRouter +OPENROUTER_API_KEY=sk-or-v1-xxx # Get at https://openrouter.ai/keys + +# Database (uses shared Docker PostgreSQL) +DATABASE_URL=postgresql://manacore:devpassword@localhost:5432/chat + +# Auth +MANA_CORE_AUTH_URL=http://localhost:3001 + +# Server PORT=3002 -DEV_BYPASS_AUTH=true # Optional: Skip auth in development ``` #### Mobile (.env) +```env +EXPO_PUBLIC_MANA_CORE_AUTH_URL=http://localhost:3001 +EXPO_PUBLIC_BACKEND_URL=http://localhost:3002 ``` -EXPO_PUBLIC_SUPABASE_URL=https://... -EXPO_PUBLIC_SUPABASE_ANON_KEY=... -EXPO_PUBLIC_BACKEND_URL=http://localhost:3001 + +#### Web (.env) + +```env +PUBLIC_MANA_CORE_AUTH_URL=http://localhost:3001 +PUBLIC_BACKEND_URL=http://localhost:3002 ``` ## Code Style Guidelines @@ -113,39 +129,43 @@ EXPO_PUBLIC_BACKEND_URL=http://localhost:3001 - **Styling**: Tailwind CSS everywhere - **Formatting**: 100 char line limit, 2 space tabs, single quotes -## AI Models Available +## AI Models Available (via OpenRouter) -### OpenRouter Models (Recommended) +All models are accessed through OpenRouter, providing access to 100+ models with a single API key. | Model ID | Name | Price | Best For | | -------- | ---- | ----- | -------- | -| ...440201 | Llama 3.1 8B | $0.05/M | Everyday tasks, cheap | +| ...440201 | Llama 3.1 8B | $0.05/M | Everyday tasks (default) | | ...440202 | Llama 3.1 70B | $0.35/M | Complex reasoning | | ...440203 | DeepSeek V3 | $0.14/M | Reasoning at low cost | | ...440204 | Mistral Small | $0.10/M | General tasks | | ...440205 | Claude 3.5 Sonnet | $3/M | Best quality | | ...440206 | GPT-4o Mini | $0.15/M | Balanced performance | -### Google Gemini Models +## Quick Start -| Model ID | Name | Description | Default | -| -------- | ---- | ----------- | ------- | -| ...440101 | Gemini 2.5 Flash | Fast, efficient responses | Yes | -| ...440102 | Gemini 2.0 Flash-Lite | Ultra-lightweight model | No | -| ...440103 | Gemini 2.5 Pro | Most capable model | No | - -## OpenRouter Setup - -To enable OpenRouter models: - -- [ ] Get API key at https://openrouter.ai/keys -- [ ] Add `OPENROUTER_API_KEY=sk-or-v1-xxx` to `apps/chat/apps/backend/.env` -- [ ] Re-seed database: `pnpm --filter @chat/backend db:seed` -- [ ] Test: `pnpm dev:chat:backend` +1. **Get OpenRouter API key** at https://openrouter.ai/keys +2. **Create `.env`** in `apps/chat/apps/backend/`: + ```env + OPENROUTER_API_KEY=sk-or-v1-xxx + DATABASE_URL=postgresql://manacore:devpassword@localhost:5432/chat + MANA_CORE_AUTH_URL=http://localhost:3001 + PORT=3002 + ``` +3. **Start services**: + ```bash + pnpm docker:up # Start PostgreSQL + pnpm dev:chat:full # Start auth + backend + web + ``` +4. **Seed database** (first time only): + ```bash + pnpm --filter @chat/backend db:push + pnpm --filter @chat/backend db:seed + ``` ## Important Notes 1. **Security**: API keys are stored in the backend only - never in client apps -2. **Authentication**: Uses Supabase Auth, shared with Mana Core ecosystem -3. **Database**: Supabase PostgreSQL with RLS policies -4. **Deployment**: Backend runs on port 3001 by default +2. **Authentication**: Uses Mana Core Auth (JWT tokens) +3. **Database**: PostgreSQL with Drizzle ORM (uses shared Docker container) +4. **Deployment**: Backend runs on port 3002 diff --git a/apps/chat/apps/backend/.env.example b/apps/chat/apps/backend/.env.example index f011962ae..e94bb4010 100644 --- a/apps/chat/apps/backend/.env.example +++ b/apps/chat/apps/backend/.env.example @@ -1,20 +1,14 @@ -# OpenRouter Configuration (Recommended - multi-model access) +# OpenRouter Configuration (Required) # Get your API key at https://openrouter.ai/keys +# All AI models are accessed through OpenRouter OPENROUTER_API_KEY=your-openrouter-api-key -# Google Gemini Configuration -GOOGLE_GENAI_API_KEY=your-google-api-key - -# Azure OpenAI Configuration (Optional) -AZURE_OPENAI_ENDPOINT=https://your-azure-openai-endpoint.openai.azure.com -AZURE_OPENAI_API_KEY=your-api-key-here -AZURE_OPENAI_API_VERSION=2024-12-01-preview - # Mana Core Auth Configuration MANA_CORE_AUTH_URL=http://localhost:3001 # PostgreSQL Database Configuration -DATABASE_URL=postgresql://chat:password@localhost:5432/chat +# Uses shared Docker PostgreSQL with separate 'chat' database +DATABASE_URL=postgresql://manacore:devpassword@localhost:5432/chat # Server Configuration PORT=3002 diff --git a/apps/chat/apps/backend/package.json b/apps/chat/apps/backend/package.json index 6ce38d5e5..c4b909062 100644 --- a/apps/chat/apps/backend/package.json +++ b/apps/chat/apps/backend/package.json @@ -24,7 +24,6 @@ "docker:clean": "docker compose down -v --rmi local" }, "dependencies": { - "@google/generative-ai": "^0.24.1", "@manacore/shared-errors": "workspace:*", "@manacore/shared-nestjs-auth": "workspace:*", "@nestjs/common": "^10.4.15", diff --git a/apps/chat/apps/backend/src/chat/chat.service.ts b/apps/chat/apps/backend/src/chat/chat.service.ts index edc00e97a..20a569d59 100644 --- a/apps/chat/apps/backend/src/chat/chat.service.ts +++ b/apps/chat/apps/backend/src/chat/chat.service.ts @@ -2,7 +2,6 @@ import { Injectable, Inject, Logger } from '@nestjs/common'; import { ConfigService } from '@nestjs/config'; import { eq } from 'drizzle-orm'; import { AsyncResult, ok, err, ValidationError, ServiceError } from '@manacore/shared-errors'; -import { GoogleGenerativeAI } from '@google/generative-ai'; import OpenAI from 'openai'; import { DATABASE_CONNECTION } from '../db/database.module'; import { Database } from '../db/connection'; @@ -14,37 +13,14 @@ import type { ChatCompletionResponseDto } from './dto/chat-completion.dto'; @Injectable() export class ChatService { private readonly logger = new Logger(ChatService.name); - // Azure OpenAI config - private readonly azureApiKey: string; - private readonly azureEndpoint: string; - private readonly azureApiVersion: string; - // Google Gemini config - private readonly geminiClient: GoogleGenerativeAI | null = null; - // OpenRouter config + // OpenRouter config (primary provider) private readonly openRouterClient: OpenAI | null = null; constructor( private configService: ConfigService, @Inject(DATABASE_CONNECTION) private readonly db: Database ) { - // Azure OpenAI setup - this.azureApiKey = this.configService.get('AZURE_OPENAI_API_KEY') || ''; - this.azureEndpoint = - this.configService.get('AZURE_OPENAI_ENDPOINT') || - 'https://memoroseopenai.openai.azure.com'; - this.azureApiVersion = - this.configService.get('AZURE_OPENAI_API_VERSION') || '2024-12-01-preview'; - - // Google Gemini setup - const geminiApiKey = this.configService.get('GOOGLE_GENAI_API_KEY'); - if (geminiApiKey) { - this.geminiClient = new GoogleGenerativeAI(geminiApiKey); - this.logger.log('Google Gemini client initialized'); - } else { - this.logger.warn('GOOGLE_GENAI_API_KEY is not set - Gemini models unavailable'); - } - - // OpenRouter setup + // OpenRouter setup (primary and only provider) const openRouterApiKey = this.configService.get('OPENROUTER_API_KEY'); if (openRouterApiKey) { this.openRouterClient = new OpenAI({ @@ -57,11 +33,7 @@ export class ChatService { }); this.logger.log('OpenRouter client initialized'); } else { - this.logger.warn('OPENROUTER_API_KEY is not set - OpenRouter models unavailable'); - } - - if (!this.azureApiKey) { - this.logger.warn('AZURE_OPENAI_API_KEY is not set - Azure models unavailable'); + this.logger.error('OPENROUTER_API_KEY is not set - Chat will not work!'); } } @@ -100,176 +72,8 @@ export class ChatService { this.logger.log(`User ${userId} creating chat completion with model ${dto.modelId}`); } - // Route to appropriate provider - if (model.provider === 'gemini') { - return this.createGeminiCompletion(model, dto); - } else if (model.provider === 'openrouter') { - return this.createOpenRouterCompletion(model, dto); - } else { - return this.createAzureCompletion(model, dto); - } - } - - private async createGeminiCompletion( - model: Model, - dto: ChatCompletionDto - ): AsyncResult { - if (!this.geminiClient) { - return err(ServiceError.externalError('Google Gemini', 'Gemini client not configured')); - } - - const params = model.parameters as { - model?: string; - temperature?: number; - max_tokens?: number; - } | null; - - const modelName = params?.model || 'gemini-2.5-flash'; - const temperature = dto.temperature ?? params?.temperature ?? 0.7; - const maxTokens = dto.maxTokens ?? params?.max_tokens ?? 8192; - - this.logger.log(`Sending request to Google Gemini model: ${modelName}`); - - try { - const genModel = this.geminiClient.getGenerativeModel({ - model: modelName, - generationConfig: { - temperature, - maxOutputTokens: maxTokens, - }, - }); - - // Convert messages to Gemini format - // Gemini expects alternating user/model messages, with system as first user message - const systemMessages = dto.messages.filter((m) => m.role === 'system'); - const chatMessages = dto.messages.filter((m) => m.role !== 'system'); - - // Build history for chat (all but last message) - const history = chatMessages.slice(0, -1).map((msg) => ({ - role: msg.role === 'user' ? 'user' : 'model', - parts: [{ text: msg.content }], - })); - - // Last message to send - const lastMessage = chatMessages[chatMessages.length - 1]; - let userPrompt = lastMessage?.content || ''; - - // Prepend system instruction if present - if (systemMessages.length > 0) { - const systemPrompt = systemMessages.map((m) => m.content).join('\n'); - userPrompt = `${systemPrompt}\n\n${userPrompt}`; - } - - const chat = genModel.startChat({ history }); - const result = await chat.sendMessage(userPrompt); - const response = result.response; - const messageContent = response.text(); - - if (!messageContent) { - this.logger.warn('No message content in Gemini response'); - return err(ServiceError.generationFailed('Google Gemini', 'No response generated')); - } - - // Gemini provides usage metadata - const usageMetadata = response.usageMetadata; - - return ok({ - content: messageContent, - usage: { - prompt_tokens: usageMetadata?.promptTokenCount || 0, - completion_tokens: usageMetadata?.candidatesTokenCount || 0, - total_tokens: usageMetadata?.totalTokenCount || 0, - }, - }); - } catch (error) { - this.logger.error('Error calling Google Gemini API', error); - return err( - ServiceError.generationFailed( - 'Google Gemini', - error instanceof Error ? error.message : 'Unknown error', - error instanceof Error ? error : undefined - ) - ); - } - } - - private async createAzureCompletion( - model: Model, - dto: ChatCompletionDto - ): AsyncResult { - const params = model.parameters as { - deployment?: string; - temperature?: number; - max_tokens?: number; - } | null; - - const deployment = params?.deployment || 'gpt-4o-mini-se'; - const temperature = dto.temperature ?? params?.temperature ?? 0.7; - const maxTokens = dto.maxTokens ?? params?.max_tokens ?? 1000; - - // Prepare request body - const requestBody: Record = { - messages: dto.messages.map((msg) => ({ - role: msg.role, - content: msg.content, - })), - }; - - // Model-specific parameters - const isGPTOModel = deployment.includes('gpt-o') || deployment.includes('gpt-4o'); - - if (!isGPTOModel) { - requestBody.max_tokens = maxTokens; - requestBody.temperature = temperature; - } - - const url = `${this.azureEndpoint}/openai/deployments/${deployment}/chat/completions?api-version=${this.azureApiVersion}`; - - this.logger.log(`Sending request to Azure OpenAI: ${url}`); - - try { - const response = await fetch(url, { - method: 'POST', - headers: { - 'Content-Type': 'application/json', - 'api-key': this.azureApiKey, - }, - body: JSON.stringify(requestBody), - }); - - if (!response.ok) { - const errorText = await response.text(); - this.logger.error(`API error: ${response.status} - ${errorText}`); - return err(ServiceError.externalError('Azure OpenAI', `API error: ${response.status}`)); - } - - const data = await response.json(); - - const messageContent = data.choices?.[0]?.message?.content; - - if (!messageContent) { - this.logger.warn('No message content in response'); - return err(ServiceError.generationFailed('Azure OpenAI', 'No response generated')); - } - - return ok({ - content: messageContent, - usage: { - prompt_tokens: data.usage?.prompt_tokens || 0, - completion_tokens: data.usage?.completion_tokens || 0, - total_tokens: data.usage?.total_tokens || 0, - }, - }); - } catch (error) { - this.logger.error('Error calling Azure OpenAI API', error); - return err( - ServiceError.generationFailed( - 'Azure OpenAI', - error instanceof Error ? error.message : 'Unknown error', - error instanceof Error ? error : undefined - ) - ); - } + // All models go through OpenRouter + return this.createOpenRouterCompletion(model, dto); } private async createOpenRouterCompletion( diff --git a/apps/chat/apps/backend/src/db/seed.ts b/apps/chat/apps/backend/src/db/seed.ts index c478cac10..109c9b3f3 100644 --- a/apps/chat/apps/backend/src/db/seed.ts +++ b/apps/chat/apps/backend/src/db/seed.ts @@ -33,49 +33,7 @@ async function seed() { const modelData = [ // ============================================ - // Google Gemini Models (Primary - fast & cost-effective) - // ============================================ - { - id: '550e8400-e29b-41d4-a716-446655440101', - name: 'Gemini 2.5 Flash', - description: 'Fastest & most cost-effective - ideal for everyday tasks', - provider: 'gemini', - parameters: { - model: 'gemini-2.5-flash', - temperature: 0.7, - max_tokens: 8192, - }, - isActive: true, - isDefault: true, // Default model - }, - { - id: '550e8400-e29b-41d4-a716-446655440102', - name: 'Gemini 2.0 Flash-Lite', - description: 'Ultra-fast lightweight model - minimal latency', - provider: 'gemini', - parameters: { - model: 'gemini-2.0-flash-lite', - temperature: 0.7, - max_tokens: 4096, - }, - isActive: true, - isDefault: false, - }, - { - id: '550e8400-e29b-41d4-a716-446655440103', - name: 'Gemini 2.5 Pro', - description: 'Most powerful Gemini - complex reasoning & analysis', - provider: 'gemini', - parameters: { - model: 'gemini-2.5-pro', - temperature: 0.7, - max_tokens: 16384, - }, - isActive: true, - isDefault: false, - }, - // ============================================ - // OpenRouter Models (Multi-provider, cost-effective) + // OpenRouter Models (All models via OpenRouter) // ============================================ { id: '550e8400-e29b-41d4-a716-446655440201', @@ -88,7 +46,7 @@ async function seed() { max_tokens: 4096, }, isActive: true, - isDefault: false, + isDefault: true, // Default model - fast and cost-effective }, { id: '550e8400-e29b-41d4-a716-446655440202', @@ -155,101 +113,6 @@ async function seed() { isActive: true, isDefault: false, }, - // ============================================ - // Azure OpenAI GPT-5 Family (Inactive - no deployment) - // ============================================ - { - id: '550e8400-e29b-41d4-a716-446655440001', - name: 'GPT-5 Mini', - description: 'Fast & cost-effective - best for everyday tasks', - provider: 'azure', - parameters: { - temperature: 0.7, - max_tokens: 8192, - deployment: 'gpt-5-mini', - }, - isActive: false, - isDefault: false, - }, - { - id: '550e8400-e29b-41d4-a716-446655440002', - name: 'GPT-5 Nano', - description: 'Ultra-fast responses with low latency', - provider: 'azure', - parameters: { - temperature: 0.7, - max_tokens: 4096, - deployment: 'gpt-5-nano', - }, - isActive: false, - isDefault: false, - }, - { - id: '550e8400-e29b-41d4-a716-446655440003', - name: 'GPT-5 Chat', - description: 'Advanced multimodal conversations with emotional intelligence', - provider: 'azure', - parameters: { - temperature: 0.7, - max_tokens: 16384, - deployment: 'gpt-5-chat', - }, - isActive: false, - isDefault: false, - }, - { - id: '550e8400-e29b-41d4-a716-446655440004', - name: 'GPT-5', - description: 'Most powerful LLM - logic-heavy & multi-step tasks', - provider: 'azure', - parameters: { - temperature: 0.7, - max_tokens: 32768, - deployment: 'gpt-5', - }, - isActive: false, - isDefault: false, - }, - { - id: '550e8400-e29b-41d4-a716-446655440005', - name: 'GPT-5 Codex', - description: 'Optimized for coding & front-end development', - provider: 'azure', - parameters: { - temperature: 0.7, - max_tokens: 32768, - deployment: 'gpt-5-codex', - }, - isActive: false, - isDefault: false, - }, - // O-Series Reasoning Models (Inactive - no deployment) - { - id: '550e8400-e29b-41d4-a716-446655440006', - name: 'o4-mini', - description: 'Latest reasoning model - best for STEM & code', - provider: 'azure', - parameters: { - temperature: 1, // Reasoning models work best with temp=1 - max_tokens: 16384, - deployment: 'o4-mini', - }, - isActive: false, - isDefault: false, - }, - { - id: '550e8400-e29b-41d4-a716-446655440007', - name: 'o3', - description: 'Advanced reasoning - 20% fewer errors than o1', - provider: 'azure', - parameters: { - temperature: 1, - max_tokens: 32768, - deployment: 'o3', - }, - isActive: false, - isDefault: false, - }, ]; await db.insert(models).values(modelData); diff --git a/apps/chat/apps/web/src/routes/(protected)/chat/[id]/+page.svelte b/apps/chat/apps/web/src/routes/(protected)/chat/[id]/+page.svelte index a9b58b72e..5262047bf 100644 --- a/apps/chat/apps/web/src/routes/(protected)/chat/[id]/+page.svelte +++ b/apps/chat/apps/web/src/routes/(protected)/chat/[id]/+page.svelte @@ -28,15 +28,17 @@ let showVersionsModal = $state(false); let showDocumentPanel = $state(true); - // Track current request to prevent race conditions - let currentLoadId = $state(0); + // Track current request to prevent race conditions (not reactive to avoid effect loops) + let currentLoadId = 0; + let lastLoadedConversationId = ''; const conversationId = $derived($page.params.id ?? ''); const isDocumentMode = $derived(conversation?.documentMode ?? false); // React to conversationId changes with race condition protection $effect(() => { - if (conversationId) { + if (conversationId && conversationId !== lastLoadedConversationId) { + lastLoadedConversationId = conversationId; loadData(conversationId); } });