feat: GPU offload, signup limit, load tests & capacity planning

- Route all AI workloads (Ollama, STT, TTS, Image Gen) to GPU server (192.168.178.11) via LAN instead of host.docker.internal - Upgrade default model to gemma3:12b and max concurrent to 5 - Add daily signup limit service (MAX_DAILY_SIGNUPS env var) - Add GET /api/v1/auth/signup-status public endpoint - Add k6 load test suite (web-apps, auth, sync-websocket, ollama) - Add capacity planning documentation - Fix: add eslint-config to sveltekit-base and calendar Dockerfiles Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-05-14 18:01:09 +02:00 · 2026-03-28 21:14:24 +01:00 · 2026-03-28 21:14:24 +01:00 · 9276d9a212
commit 9276d9a212
parent 16367384c7
12 changed files with 683 additions and 14 deletions
--- a/load-tests/README.md
+++ b/load-tests/README.md
@ -0,0 +1,58 @@
+# Load Tests
+
+k6-basierte Load Tests fuer die Mana-Infrastruktur.
+
+## Setup
+
+```bash
+# k6 installieren (macOS)
+brew install k6
+
+# WebSocket-Extension (fuer Sync-Tests)
+# k6 hat WebSocket-Support eingebaut
+```
+
+## Tests ausfuehren
+
+```bash
+# Gegen lokale Umgebung
+k6 run load-tests/web-apps.js
+k6 run load-tests/auth-api.js
+k6 run load-tests/sync-websocket.js
+k6 run load-tests/llm-ollama.js
+
+# Gegen Produktion (vorsichtig!)
+k6 run -e BASE_URL=https://mana.how load-tests/web-apps.js
+
+# Mit mehr/weniger Last
+k6 run --vus 100 --duration 5m load-tests/web-apps.js
+
+# JSON-Output fuer Grafana
+k6 run --out json=results.json load-tests/web-apps.js
+```
+
+## Test-Szenarien
+
+| Script | Ziel | Default VUs | Dauer |
+|--------|------|-------------|-------|
+| `web-apps.js` | SvelteKit Frontends (HTML-Responses) | 10→50→10 | 5 min |
+| `auth-api.js` | Login, Register, Token Validation | 5→20→5 | 4 min |
+| `sync-websocket.js` | mana-sync WebSocket Connections | 10→30→10 | 5 min |
+| `llm-ollama.js` | Ollama Chat Completions | 1→3→1 | 3 min |
+
+## Metriken interpretieren
+
+| Metrik | Gut | Akzeptabel | Schlecht |
+|--------|-----|-----------|---------|
+| http_req_duration (p95) | < 200ms | < 1s | > 2s |
+| http_req_failed | 0% | < 1% | > 5% |
+| ws_connecting (p95) | < 100ms | < 500ms | > 1s |
+| iterations | Steigend | Stabil | Fallend |
+
+## Monitoring waehrend Tests
+
+Grafana-Dashboard auf http://localhost:8080 (oder https://grafana.mana.how) beobachten:
+- Container CPU/RAM (cAdvisor)
+- PostgreSQL Connections
+- Redis Commands/sec
+- Netzwerk-Throughput
--- a/load-tests/auth-api.js
+++ b/load-tests/auth-api.js
@ -0,0 +1,81 @@
+/* eslint-disable no-undef */
+import http from 'k6/http';
+import { check, sleep, group } from 'k6';
+import { Rate, Counter } from 'k6/metrics';
+
+const errorRate = new Rate('errors');
+const signupBlocked = new Counter('signup_blocked');
+
+const AUTH_URL = __ENV.AUTH_URL || 'http://localhost:3001';
+
+export const options = {
+	stages: [
+		{ duration: '30s', target: 5 },
+		{ duration: '2m', target: 20 },
+		{ duration: '30s', target: 0 },
+	],
+	thresholds: {
+		http_req_duration: ['p(95)<3000'],
+		errors: ['rate<0.10'],
+	},
+};
+
+// Generate unique test emails
+function testEmail(vuId, iter) {
+	return `loadtest_vu${vuId}_${iter}_${Date.now()}@test.invalid`;
+}
+
+export default function () {
+	// Weighted random: 70% health, 20% login attempts, 10% register
+	const roll = Math.random();
+
+	if (roll < 0.7) {
+		// Health check — lightweight, tests baseline
+		group('health', () => {
+			const res = http.get(`${AUTH_URL}/health`);
+			const ok = check(res, {
+				'health 200': (r) => r.status === 200,
+			});
+			errorRate.add(!ok);
+		});
+	} else if (roll < 0.9) {
+		// Login attempt with invalid credentials — tests lockout + DB
+		group('login', () => {
+			const res = http.post(
+				`${AUTH_URL}/api/v1/auth/login`,
+				JSON.stringify({
+					email: 'loadtest@nonexistent.invalid',
+					password: 'wrongpassword',
+				}),
+				{ headers: { 'Content-Type': 'application/json' } }
+			);
+			const ok = check(res, {
+				'login returns 401 or 429': (r) => r.status === 401 || r.status === 429,
+			});
+			errorRate.add(!ok);
+		});
+	} else {
+		// Registration — tests signup limit
+		group('register', () => {
+			const email = testEmail(__VU, __ITER);
+			const res = http.post(
+				`${AUTH_URL}/api/v1/auth/register`,
+				JSON.stringify({
+					email: email,
+					password: 'TestPassword123!',
+					name: `Load Test ${__VU}`,
+				}),
+				{ headers: { 'Content-Type': 'application/json' } }
+			);
+			const ok = check(res, {
+				'register returns 200 or 429': (r) => r.status === 200 || r.status === 429,
+			});
+			if (res.status === 429) {
+				signupBlocked.add(1);
+			}
+			errorRate.add(!ok);
+		});
+	}
+
+	sleep(Math.random() * 1.5 + 0.5);
+}
--- a/load-tests/llm-ollama.js
+++ b/load-tests/llm-ollama.js
@ -0,0 +1,82 @@
+/* eslint-disable no-undef, @typescript-eslint/no-unused-vars */
+import http from 'k6/http';
+import { check, sleep } from 'k6';
+import { Rate, Trend, Counter } from 'k6/metrics';
+
+const errorRate = new Rate('errors');
+const tokensPerSec = new Trend('tokens_per_second', true);
+const totalTokens = new Counter('total_tokens_generated');
+
+const OLLAMA_URL = __ENV.OLLAMA_URL || 'http://localhost:11434';
+const MODEL = __ENV.MODEL || 'gemma3:4b';
+
+export const options = {
+	// LLM is single-threaded effectively — test with few VUs
+	stages: [
+		{ duration: '30s', target: 1 },
+		{ duration: '2m', target: 3 },
+		{ duration: '30s', target: 1 },
+	],
+	thresholds: {
+		http_req_duration: ['p(95)<30000'], // LLM responses can be slow
+		errors: ['rate<0.10'],
+	},
+};
+
+const prompts = [
+	'Was ist die Hauptstadt von Deutschland? Antworte in einem Satz.',
+	'Erklaere Photosynthese in 2 Saetzen.',
+	'Schreibe ein kurzes Haiku ueber Programmierung.',
+	'Was ist der Unterschied zwischen TCP und UDP? Kurz.',
+	'Nenne 3 Vorteile von Self-Hosting.',
+];
+
+export default function () {
+	const prompt = prompts[Math.floor(Math.random() * prompts.length)];
+
+	// Non-streaming request for easier metrics
+	const res = http.post(
+		`${OLLAMA_URL}/api/generate`,
+		JSON.stringify({
+			model: MODEL,
+			prompt: prompt,
+			stream: false,
+			options: {
+				num_predict: 100, // Cap tokens to keep tests fast
+			},
+		}),
+		{
+			headers: { 'Content-Type': 'application/json' },
+			timeout: '60s',
+		}
+	);
+
+	const ok = check(res, {
+		'status is 200': (r) => r.status === 200,
+		'has response text': (r) => {
+			try {
+				const body = JSON.parse(r.body);
+				return body.response && body.response.length > 0;
+			} catch {
+				return false;
+			}
+		},
+	});
+
+	if (ok && res.status === 200) {
+		try {
+			const body = JSON.parse(res.body);
+			// Ollama returns eval_count and eval_duration
+			if (body.eval_count && body.eval_duration) {
+				const tps = body.eval_count / (body.eval_duration / 1e9);
+				tokensPerSec.add(tps);
+				totalTokens.add(body.eval_count);
+			}
+		} catch (_) {}
+	}
+
+	errorRate.add(!ok);
+
+	// Longer pause between LLM requests — realistic usage
+	sleep(Math.random() * 5 + 3);
+}
--- a/load-tests/sync-websocket.js
+++ b/load-tests/sync-websocket.js
@ -0,0 +1,84 @@
+/* eslint-disable no-undef, no-console, @typescript-eslint/no-unused-vars */
+import ws from 'k6/ws';
+import { check, sleep } from 'k6';
+import { Rate, Counter, Trend } from 'k6/metrics';
+
+const errorRate = new Rate('errors');
+const messagesReceived = new Counter('ws_messages_received');
+const messagesSent = new Counter('ws_messages_sent');
+const connectTime = new Trend('ws_connect_time', true);
+
+const SYNC_URL = __ENV.SYNC_URL || 'ws://localhost:3050';
+
+export const options = {
+	stages: [
+		{ duration: '30s', target: 10 },
+		{ duration: '3m', target: 30 },
+		{ duration: '30s', target: 0 },
+	],
+	thresholds: {
+		errors: ['rate<0.10'],
+		ws_connect_time: ['p(95)<1000'],
+	},
+};
+
+export default function () {
+	const url = `${SYNC_URL}/ws`;
+
+	const startTime = Date.now();
+	const res = ws.connect(url, {}, function (socket) {
+		const connected = Date.now() - startTime;
+		connectTime.add(connected);
+
+		socket.on('open', () => {
+			// Send a sync handshake (collection subscription)
+			const handshake = JSON.stringify({
+				type: 'subscribe',
+				collections: ['tasks', 'events', 'contacts'],
+				userId: `loadtest-vu-${__VU}`,
+				lastSyncTimestamp: new Date(Date.now() - 60000).toISOString(),
+			});
+			socket.send(handshake);
+			messagesSent.add(1);
+		});
+
+		socket.on('message', (data) => {
+			messagesReceived.add(1);
+
+			// Parse and validate sync messages
+			try {
+				const msg = JSON.parse(data);
+				check(msg, {
+					'has type field': (m) => m.type !== undefined,
+				});
+			} catch (_) {
+				// Binary or non-JSON message
+			}
+		});
+
+		socket.on('error', (e) => {
+			errorRate.add(true);
+			console.error(`WS error VU ${__VU}: ${e.error()}`);
+		});
+
+		// Keep connection alive for 10-30 seconds (simulates real user session)
+		const sessionDuration = Math.random() * 20 + 10;
+
+		// Send periodic sync pings
+		const pingInterval = setInterval(() => {
+			socket.send(JSON.stringify({ type: 'ping' }));
+			messagesSent.add(1);
+		}, 5000);
+
+		sleep(sessionDuration);
+		clearInterval(pingInterval);
+		socket.close();
+	});
+
+	const ok = check(res, {
+		'WS connection status is 101': (r) => r && r.status === 101,
+	});
+	errorRate.add(!ok);
+
+	sleep(Math.random() * 2 + 1);
+}
--- a/load-tests/web-apps.js
+++ b/load-tests/web-apps.js
@ -0,0 +1,72 @@
+/* eslint-disable no-undef */
+import http from 'k6/http';
+import { check, sleep } from 'k6';
+import { Rate, Trend } from 'k6/metrics';
+
+const errorRate = new Rate('errors');
+const appLatency = new Trend('app_latency', true);
+
+const BASE = __ENV.BASE_URL || 'http://localhost';
+
+// All deployed SvelteKit web apps with their ports
+const apps = [
+	{ name: 'dashboard', url: `${BASE}:5173` },
+	{ name: 'chat', url: `${BASE}:3000` },
+	{ name: 'todo', url: `${BASE}:5188` },
+	{ name: 'zitare', url: `${BASE}:5185` },
+	{ name: 'calendar', url: `${BASE}:5186` },
+	{ name: 'clock', url: `${BASE}:5187` },
+	{ name: 'contacts', url: `${BASE}:5176` },
+	{ name: 'storage', url: `${BASE}:5178` },
+	{ name: 'presi', url: `${BASE}:5180` },
+	{ name: 'manadeck', url: `${BASE}:5181` },
+	{ name: 'nutriphi', url: `${BASE}:5182` },
+	{ name: 'skilltree', url: `${BASE}:5183` },
+	{ name: 'photos', url: `${BASE}:5184` },
+	{ name: 'mukke', url: `${BASE}:5189` },
+	{ name: 'citycorners', url: `${BASE}:5190` },
+	{ name: 'picture', url: `${BASE}:5174` },
+	{ name: 'inventar', url: `${BASE}:5191` },
+];
+
+// When testing against production, use subdomains
+const prodApps = [
+	{ name: 'dashboard', url: 'https://mana.how' },
+	{ name: 'chat', url: 'https://chat.mana.how' },
+	{ name: 'todo', url: 'https://todo.mana.how' },
+	{ name: 'calendar', url: 'https://calendar.mana.how' },
+	{ name: 'clock', url: 'https://clock.mana.how' },
+];
+
+export const options = {
+	stages: [
+		{ duration: '30s', target: 10 }, // Ramp up
+		{ duration: '3m', target: 50 }, // Hold at 50 VUs
+		{ duration: '30s', target: 0 }, // Ramp down
+	],
+	thresholds: {
+		http_req_duration: ['p(95)<2000'], // 95% under 2s
+		errors: ['rate<0.05'], // <5% errors
+	},
+};
+
+export default function () {
+	const targets = __ENV.BASE_URL?.startsWith('https') ? prodApps : apps;
+	const app = targets[Math.floor(Math.random() * targets.length)];
+
+	const res = http.get(app.url, {
+		tags: { app: app.name },
+		timeout: '10s',
+	});
+
+	const success = check(res, {
+		'status is 200': (r) => r.status === 200,
+		'response has body': (r) => r.body && r.body.length > 0,
+		'response time < 2s': (r) => r.timings.duration < 2000,
+	});
+
+	errorRate.add(!success);
+	appLatency.add(res.timings.duration, { app: app.name });
+
+	sleep(Math.random() * 2 + 0.5); // 0.5-2.5s between requests
+}