From 9276d9a2129c07fca1fc91c8625ed3b5bbc661eb Mon Sep 17 00:00:00 2001
From: Till JS <tills95@gmail.com>
Date: Sat, 28 Mar 2026 21:14:24 +0100
Subject: [PATCH] feat: GPU offload, signup limit, load tests & capacity
 planning

- Route all AI workloads (Ollama, STT, TTS, Image Gen) to GPU server
  (192.168.178.11) via LAN instead of host.docker.internal
- Upgrade default model to gemma3:12b and max concurrent to 5
- Add daily signup limit service (MAX_DAILY_SIGNUPS env var)
- Add GET /api/v1/auth/signup-status public endpoint
- Add k6 load test suite (web-apps, auth, sync-websocket, ollama)
- Add capacity planning documentation
- Fix: add eslint-config to sveltekit-base and calendar Dockerfiles

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 apps/calendar/apps/web/Dockerfile             |   1 +
 docker-compose.macmini.yml                    |  25 +--
 docker/Dockerfile.sveltekit-base              |   1 +
 docs/CAPACITY_PLANNING.md                     | 171 ++++++++++++++++++
 load-tests/README.md                          |  58 ++++++
 load-tests/auth-api.js                        |  81 +++++++++
 load-tests/llm-ollama.js                      |  82 +++++++++
 load-tests/sync-websocket.js                  |  84 +++++++++
 load-tests/web-apps.js                        |  72 ++++++++
 services/mana-auth/src/index.ts               |   4 +-
 services/mana-auth/src/routes/auth.ts         |  25 ++-
 .../mana-auth/src/services/signup-limit.ts    |  93 ++++++++++
 12 files changed, 683 insertions(+), 14 deletions(-)
 create mode 100644 docs/CAPACITY_PLANNING.md
 create mode 100644 load-tests/README.md
 create mode 100644 load-tests/auth-api.js
 create mode 100644 load-tests/llm-ollama.js
 create mode 100644 load-tests/sync-websocket.js
 create mode 100644 load-tests/web-apps.js
 create mode 100644 services/mana-auth/src/services/signup-limit.ts

diff --git a/apps/calendar/apps/web/Dockerfile b/apps/calendar/apps/web/Dockerfile
index 780fe81f8..1608b687c 100644
--- a/apps/calendar/apps/web/Dockerfile
+++ b/apps/calendar/apps/web/Dockerfile
@@ -23,6 +23,7 @@ COPY pnpm-lock.yaml ./
 # --- AUTO-GENERATED COPY STATEMENTS (do not edit manually) ---
 COPY patches/ ./patches/
 COPY packages/feedback ./packages/feedback
+COPY packages/eslint-config ./packages/eslint-config
 COPY packages/help ./packages/help
 COPY packages/local-store ./packages/local-store
 COPY packages/shared-api-client ./packages/shared-api-client
diff --git a/docker-compose.macmini.yml b/docker-compose.macmini.yml
index cdbdaf4df..3fcda5c5d 100644
--- a/docker-compose.macmini.yml
+++ b/docker-compose.macmini.yml
@@ -268,6 +268,7 @@ services:
       SMTP_USER: ${SMTP_USER:-94cde5002@smtp-brevo.com}
       SMTP_PASS: ${SMTP_PASSWORD}
       SYNAPSE_OIDC_CLIENT_SECRET: ${SYNAPSE_OIDC_CLIENT_SECRET:-}
+      MAX_DAILY_SIGNUPS: ${MAX_DAILY_SIGNUPS:-0}
       CORS_ORIGINS: https://mana.how,https://calendar.mana.how,https://chat.mana.how,https://clock.mana.how,https://contacts.mana.how,https://context.mana.how,https://docs.mana.how,https://element.mana.how,https://inventar.mana.how,https://link.mana.how,https://manadeck.mana.how,https://matrix.mana.how,https://mukke.mana.how,https://nutriphi.mana.how,https://photos.mana.how,https://picture.mana.how,https://planta.mana.how,https://playground.mana.how,https://presi.mana.how,https://questions.mana.how,https://skilltree.mana.how,https://storage.mana.how,https://todo.mana.how,https://traces.mana.how,https://zitare.mana.how
     ports:
       - "3001:3001"
@@ -419,8 +420,8 @@ services:
       REDIS_PASSWORD: ${REDIS_PASSWORD:-redis123}
       MANA_CORE_AUTH_URL: http://mana-auth:3001
       SEARCH_SERVICE_URL: http://mana-search:3012
-      STT_SERVICE_URL: http://host.docker.internal:3026
-      TTS_SERVICE_URL: http://host.docker.internal:3022
+      STT_SERVICE_URL: ${STT_SERVICE_URL:-http://192.168.178.11:3020}
+      TTS_SERVICE_URL: ${TTS_SERVICE_URL:-http://192.168.178.11:3022}
       CORS_ORIGINS: https://api.mana.how,https://mana.how
       ADMIN_USER_IDS: ${ADMIN_USER_IDS:-}
     ports:
@@ -755,12 +756,12 @@ services:
       REDIS_HOST: redis
       REDIS_PORT: 6379
       REDIS_PASSWORD: ${REDIS_PASSWORD:-redis123}
-      # Voice services
-      STT_URL: http://host.docker.internal:3026
-      TTS_URL: http://host.docker.internal:3022
-      # AI
-      OLLAMA_URL: http://host.docker.internal:11434
-      OLLAMA_MODEL: ${OLLAMA_MODEL:-gemma3:4b}
+      # Voice services (GPU server via LAN)
+      STT_URL: ${STT_SERVICE_URL:-http://192.168.178.11:3020}
+      TTS_URL: ${TTS_SERVICE_URL:-http://192.168.178.11:3022}
+      # AI (GPU server via LAN)
+      OLLAMA_URL: ${OLLAMA_URL:-http://192.168.178.11:11434}
+      OLLAMA_MODEL: ${OLLAMA_MODEL:-gemma3:12b}
       # Plugin tokens (all 21 bot identities)
       MATRIX_MANA_BOT_TOKEN: ${MATRIX_MANA_BOT_TOKEN}
       MATRIX_MANA_BOT_ROOMS: ${MATRIX_MANA_BOT_ROOMS:-}
@@ -1214,7 +1215,7 @@ services:
       DB_USER: postgres
       MANA_CORE_AUTH_URL: http://mana-auth:3001
       REPLICATE_API_TOKEN: ${REPLICATE_API_TOKEN}
-      IMAGE_GEN_SERVICE_URL: http://host.docker.internal:3025
+      IMAGE_GEN_SERVICE_URL: ${IMAGE_GEN_SERVICE_URL:-http://192.168.178.11:3023}
       APP_ID: picture-app
       MANA_CORE_SERVICE_KEY: ${MANA_CORE_SERVICE_KEY}
       S3_ENDPOINT: http://minio:9000
@@ -1298,8 +1299,8 @@ services:
     environment:
       PORT: 3020
       LOG_LEVEL: info
-      OLLAMA_URL: http://host.docker.internal:11434
-      OLLAMA_DEFAULT_MODEL: gemma3:4b
+      OLLAMA_URL: ${OLLAMA_URL:-http://192.168.178.11:11434}
+      OLLAMA_DEFAULT_MODEL: ${OLLAMA_MODEL:-gemma3:12b}
       OLLAMA_TIMEOUT: 120
       REDIS_URL: redis://redis:6379
       OPENROUTER_API_KEY: ${OPENROUTER_API_KEY:-}
@@ -1308,7 +1309,7 @@ services:
       GOOGLE_API_KEY: ${GOOGLE_API_KEY:-}
       GOOGLE_DEFAULT_MODEL: gemini-2.0-flash
       AUTO_FALLBACK_ENABLED: "true"
-      OLLAMA_MAX_CONCURRENT: 3
+      OLLAMA_MAX_CONCURRENT: 5
       CORS_ORIGINS: https://playground.mana.how,https://mana.how,https://chat.mana.how
     extra_hosts:
       - "host.docker.internal:host-gateway"
diff --git a/docker/Dockerfile.sveltekit-base b/docker/Dockerfile.sveltekit-base
index da8614729..0f2ec5cbe 100644
--- a/docker/Dockerfile.sveltekit-base
+++ b/docker/Dockerfile.sveltekit-base
@@ -54,6 +54,7 @@ COPY packages/shared-types ./packages/shared-types
 COPY packages/shared-ui ./packages/shared-ui
 COPY packages/shared-utils ./packages/shared-utils
 COPY packages/shared-vite-config ./packages/shared-vite-config
+COPY packages/eslint-config ./packages/eslint-config
 COPY packages/shared-hono ./packages/shared-hono
 COPY packages/shared-storage ./packages/shared-storage
 COPY packages/shared-landing-ui ./packages/shared-landing-ui
diff --git a/docs/CAPACITY_PLANNING.md b/docs/CAPACITY_PLANNING.md
new file mode 100644
index 000000000..5cece1da4
--- /dev/null
+++ b/docs/CAPACITY_PLANNING.md
@@ -0,0 +1,171 @@
+# Kapazitaetsplanung & Hardware-Ressourcen
+
+Stand: 2026-03-28
+
+## Hardware-Uebersicht
+
+### Mac Mini M4 (Produktionsserver)
+
+| Ressource | Wert |
+|-----------|------|
+| **CPU** | Apple M4, 10 Cores (4P + 6E) |
+| **RAM** | 16 GB Unified Memory |
+| **GPU** | 10-Core Metal (geteilt mit CPU/RAM) |
+| **Storage intern** | 228 GB SSD |
+| **Storage extern** | 4 TB SSD (USB-C, ~1 GB/s) |
+| **Netzwerk** | Cloudflare Tunnel (kein direktes Port-Forwarding) |
+
+### Windows GPU Server (AI-Workloads)
+
+| Ressource | Wert |
+|-----------|------|
+| **GPU** | NVIDIA RTX 3090, 24 GB VRAM |
+| **Netzwerk** | LAN (192.168.178.11), Tunnel-Subdomains unter gpu-*.mana.how |
+| **Services** | Ollama (11434), STT (3020), TTS (3022), Image Gen (3023) |
+| **Status** | **Aktiv** — alle AI-Services vom Mac Mini hierher verlagert |
+
+## Aktuelle Auslastung (Mac Mini)
+
+### Container-Uebersicht (61 Container)
+
+| Kategorie | Anzahl | Geschaetzter RAM |
+|-----------|--------|------------------|
+| Infrastruktur (Postgres, Redis, MinIO, Forgejo) | 7 | ~2.5 GB |
+| Core Services (Auth, Credits, User, Subs, Analytics) | 5 | ~0.8 GB |
+| Go Services (Gateway, Sync, Search, Notify, Crawler, Media) | 6 | ~0.3 GB |
+| Web Apps (SvelteKit, 19 Apps) | 19 | ~3.0 GB |
+| Backends (NestJS/Hono) | 3 | ~0.5 GB |
+| Matrix (Synapse, Element, Bots) | 4 | ~1.0 GB |
+| Monitoring (Grafana, Victoria, Loki, cAdvisor, etc.) | 13 | ~2.0 GB |
+| Sonstiges (Watchtower, Landing Builder, LLM) | 4 | ~0.5 GB |
+| **Gesamt** | **61** | **~10.6 GB** |
+
+### Native Services
+
+| Service | RAM (idle) | RAM (aktiv) |
+|---------|-----------|-------------|
+| Ollama (Gemma 3 4B) | ~0 MB (nach 5min entladen) | ~3.3 GB |
+| Ollama (Gemma 3 27B) | ~0 MB | ~16 GB (gesamter RAM!) |
+| FLUX.2 klein | ~0.5 GB | ~2 GB |
+
+### RAM-Budget
+
+```
+Verfuegbar:           16.0 GB
+Docker Container:    -10.6 GB
+macOS Overhead:       -1.5 GB
+─────────────────────────────
+Frei:                  3.9 GB  ← fuer Ollama, Builds, Peaks
+```
+
+**Kritisch:** Bei aktivem Ollama (3.3 GB fuer 4B-Modell) bleiben nur ~0.6 GB fuer Peaks.
+Build-Script stoppt deshalb 13 Monitoring-Container (~2 GB) vor dem Bauen.
+
+## Kapazitaetsschaetzung nach Workload-Typ
+
+### Tier 1: Statische/Local-First Apps (wenig Server-Last)
+
+Apps wie Todo, Calendar, Clock, Zitare, Contacts, etc.
+
+| Metrik | Wert | Begruendung |
+|--------|------|-------------|
+| Gleichzeitige User | **100-200** | Local-first: Reads aus IndexedDB, Server nur fuer Sync |
+| Sync-Connections (WebSocket) | **~50 aktiv** | mana-sync (Go) ist sehr effizient, ~10 KB/Connection |
+| Bottleneck | Cloudflare Tunnel Latenz (~4s first byte) | Nicht die App selbst |
+
+### Tier 2: API-lastige Apps (Chat, Questions, Context)
+
+| Metrik | Wert | Begruendung |
+|--------|------|-------------|
+| Gleichzeitige User | **20-50** | Abhaengig von Postgres-Connections (max 20 pro Service) |
+| API Requests/sec | **~100-200** | NestJS/Hono koennen mehr, DB ist Limit |
+| Bottleneck | PostgreSQL Connections + RAM | |
+
+### Tier 3: AI-Workloads (Ollama, FLUX.2)
+
+| Metrik | Wert | Begruendung |
+|--------|------|-------------|
+| LLM gleichzeitig | **1** | OLLAMA_NUM_PARALLEL=1, Modell belegt 3-16 GB |
+| LLM Durchsatz | **~53 tokens/sec** (4B) | ~260 tokens/sec Prompt Processing |
+| Bildgenerierung | **1 gleichzeitig** | ~1.5s pro 1024x1024 Bild |
+| Bottleneck | **RAM** (Ollama + Container konkurrieren) | |
+
+### Gesamtschaetzung
+
+| Szenario | Max. gleichzeitige User |
+|----------|------------------------|
+| Nur Local-First Apps | ~200 |
+| Mixed (Local-First + API) | ~50-100 |
+| Mit aktiver LLM-Nutzung | ~20-30 |
+| Peak (alle Services + LLM + Bildgen) | **~10-20** |
+
+## Bottleneck-Analyse
+
+| Rang | Bottleneck | Auswirkung | Loesung |
+|------|-----------|------------|---------|
+| 1 | **RAM (16 GB)** | Ollama + Container kaempfen um Speicher | RAM-Upgrade (neuer Mac Mini) oder GPU-Server fuer LLM |
+| 2 | **Cloudflare Tunnel Latenz** | ~4s TTFB fuer erste Requests | CDN/Workers fuer statische Assets |
+| 3 | **PostgreSQL Connections** | Max 20 pro Service, shared DB | Connection Pooling (PgBouncer) |
+| 4 | **Single Server** | Kein Failover, kein horizontales Scaling | Zweiter Mac Mini oder Cloud-Burst |
+
+## Scaling-Roadmap
+
+### Phase 1: Optimierung (0 EUR)
+
+- [x] GPU-Server ueber LAN anbinden → alle AI-Last vom Mac Mini verlagert
+- [x] Registrierungslimit implementiert (MAX_DAILY_SIGNUPS, default: unlimitiert)
+- [ ] PgBouncer fuer Connection Pooling einrichten
+- [ ] Cloudflare Cache Rules fuer statische Assets
+- [ ] Registrierungslimit aktivieren (5/Tag) in .env auf Server
+
+### Phase 2: RAM-Upgrade (~700 EUR)
+
+- [ ] Neuer Mac Mini M4 mit 32 GB → doppelte Kapazitaet
+- [ ] Oder: gebrauchter Mac Mini M2 als zweiter Server
+- [ ] Registrierungslimit auf 15/Tag erhoehen
+
+### Phase 3: Horizontales Scaling (~50 EUR/Monat)
+
+- [ ] Hetzner VPS fuer statische Web-Apps (CAX21: 8 GB, 4 vCPU, ~8 EUR/Monat)
+- [ ] Oder: Coolify/Kamal auf dediziertem Server
+- [ ] Registrierungslimit auf 50/Tag erhoehen
+
+### Phase 4: Production-Grade (~200 EUR/Monat)
+
+- [ ] Managed PostgreSQL (z.B. Supabase, Neon)
+- [ ] CDN fuer alle Web-Apps
+- [ ] Multi-Server mit Load Balancing
+- [ ] Registrierungslimit entfernen oder auf 500/Tag
+
+## Registrierungslimit ("Organic Growth Gate")
+
+Siehe Implementierung in `services/mana-auth/src/services/signup-limit.ts`.
+
+### Konzept
+
+Pro Tag koennen sich maximal X neue Nutzer registrieren. Das Limit ist konfigurierbar und waechst mit der Hardware.
+
+### Vorteile
+
+1. **Infrastruktur-Schutz:** Hardware waechst mit der Community
+2. **Exklusivitaet:** "Heute noch 2 Plaetze frei" erzeugt Nachfrage
+3. **Qualitaet:** Fruehe User geben besseres Feedback
+4. **Kostenlos:** Kein Over-Provisioning noetig
+
+### Geplante Limits
+
+| Phase | Limit | Kumuliert/30 Tage | Hardware |
+|-------|-------|-------------------|----------|
+| Start | 5/Tag | ~150 User | Mac Mini 16 GB |
+| Phase 2 | 15/Tag | ~450 User | Mac Mini 32 GB |
+| Phase 3 | 50/Tag | ~1500 User | Multi-Server |
+
+## Load Testing
+
+Load Tests liegen in `load-tests/`. Siehe `load-tests/README.md` fuer Ausfuehrung.
+
+### Empfohlene Test-Zyklen
+
+1. **Vor jedem Hardware-Upgrade:** Baseline messen
+2. **Nach Limit-Erhoehung:** Verifizieren dass Hardware haelt
+3. **Monatlich:** Regression erkennen
diff --git a/load-tests/README.md b/load-tests/README.md
new file mode 100644
index 000000000..287a72eee
--- /dev/null
+++ b/load-tests/README.md
@@ -0,0 +1,58 @@
+# Load Tests
+
+k6-basierte Load Tests fuer die Mana-Infrastruktur.
+
+## Setup
+
+```bash
+# k6 installieren (macOS)
+brew install k6
+
+# WebSocket-Extension (fuer Sync-Tests)
+# k6 hat WebSocket-Support eingebaut
+```
+
+## Tests ausfuehren
+
+```bash
+# Gegen lokale Umgebung
+k6 run load-tests/web-apps.js
+k6 run load-tests/auth-api.js
+k6 run load-tests/sync-websocket.js
+k6 run load-tests/llm-ollama.js
+
+# Gegen Produktion (vorsichtig!)
+k6 run -e BASE_URL=https://mana.how load-tests/web-apps.js
+
+# Mit mehr/weniger Last
+k6 run --vus 100 --duration 5m load-tests/web-apps.js
+
+# JSON-Output fuer Grafana
+k6 run --out json=results.json load-tests/web-apps.js
+```
+
+## Test-Szenarien
+
+| Script | Ziel | Default VUs | Dauer |
+|--------|------|-------------|-------|
+| `web-apps.js` | SvelteKit Frontends (HTML-Responses) | 10→50→10 | 5 min |
+| `auth-api.js` | Login, Register, Token Validation | 5→20→5 | 4 min |
+| `sync-websocket.js` | mana-sync WebSocket Connections | 10→30→10 | 5 min |
+| `llm-ollama.js` | Ollama Chat Completions | 1→3→1 | 3 min |
+
+## Metriken interpretieren
+
+| Metrik | Gut | Akzeptabel | Schlecht |
+|--------|-----|-----------|---------|
+| http_req_duration (p95) | < 200ms | < 1s | > 2s |
+| http_req_failed | 0% | < 1% | > 5% |
+| ws_connecting (p95) | < 100ms | < 500ms | > 1s |
+| iterations | Steigend | Stabil | Fallend |
+
+## Monitoring waehrend Tests
+
+Grafana-Dashboard auf http://localhost:8080 (oder https://grafana.mana.how) beobachten:
+- Container CPU/RAM (cAdvisor)
+- PostgreSQL Connections
+- Redis Commands/sec
+- Netzwerk-Throughput
diff --git a/load-tests/auth-api.js b/load-tests/auth-api.js
new file mode 100644
index 000000000..5dfeae759
--- /dev/null
+++ b/load-tests/auth-api.js
@@ -0,0 +1,81 @@
+/* eslint-disable no-undef */
+import http from 'k6/http';
+import { check, sleep, group } from 'k6';
+import { Rate, Counter } from 'k6/metrics';
+
+const errorRate = new Rate('errors');
+const signupBlocked = new Counter('signup_blocked');
+
+const AUTH_URL = __ENV.AUTH_URL || 'http://localhost:3001';
+
+export const options = {
+	stages: [
+		{ duration: '30s', target: 5 },
+		{ duration: '2m', target: 20 },
+		{ duration: '30s', target: 0 },
+	],
+	thresholds: {
+		http_req_duration: ['p(95)<3000'],
+		errors: ['rate<0.10'],
+	},
+};
+
+// Generate unique test emails
+function testEmail(vuId, iter) {
+	return `loadtest_vu${vuId}_${iter}_${Date.now()}@test.invalid`;
+}
+
+export default function () {
+	// Weighted random: 70% health, 20% login attempts, 10% register
+	const roll = Math.random();
+
+	if (roll < 0.7) {
+		// Health check — lightweight, tests baseline
+		group('health', () => {
+			const res = http.get(`${AUTH_URL}/health`);
+			const ok = check(res, {
+				'health 200': (r) => r.status === 200,
+			});
+			errorRate.add(!ok);
+		});
+	} else if (roll < 0.9) {
+		// Login attempt with invalid credentials — tests lockout + DB
+		group('login', () => {
+			const res = http.post(
+				`${AUTH_URL}/api/v1/auth/login`,
+				JSON.stringify({
+					email: 'loadtest@nonexistent.invalid',
+					password: 'wrongpassword',
+				}),
+				{ headers: { 'Content-Type': 'application/json' } }
+			);
+			const ok = check(res, {
+				'login returns 401 or 429': (r) => r.status === 401 || r.status === 429,
+			});
+			errorRate.add(!ok);
+		});
+	} else {
+		// Registration — tests signup limit
+		group('register', () => {
+			const email = testEmail(__VU, __ITER);
+			const res = http.post(
+				`${AUTH_URL}/api/v1/auth/register`,
+				JSON.stringify({
+					email: email,
+					password: 'TestPassword123!',
+					name: `Load Test ${__VU}`,
+				}),
+				{ headers: { 'Content-Type': 'application/json' } }
+			);
+			const ok = check(res, {
+				'register returns 200 or 429': (r) => r.status === 200 || r.status === 429,
+			});
+			if (res.status === 429) {
+				signupBlocked.add(1);
+			}
+			errorRate.add(!ok);
+		});
+	}
+
+	sleep(Math.random() * 1.5 + 0.5);
+}
diff --git a/load-tests/llm-ollama.js b/load-tests/llm-ollama.js
new file mode 100644
index 000000000..a9a5f08ee
--- /dev/null
+++ b/load-tests/llm-ollama.js
@@ -0,0 +1,82 @@
+/* eslint-disable no-undef, @typescript-eslint/no-unused-vars */
+import http from 'k6/http';
+import { check, sleep } from 'k6';
+import { Rate, Trend, Counter } from 'k6/metrics';
+
+const errorRate = new Rate('errors');
+const tokensPerSec = new Trend('tokens_per_second', true);
+const totalTokens = new Counter('total_tokens_generated');
+
+const OLLAMA_URL = __ENV.OLLAMA_URL || 'http://localhost:11434';
+const MODEL = __ENV.MODEL || 'gemma3:4b';
+
+export const options = {
+	// LLM is single-threaded effectively — test with few VUs
+	stages: [
+		{ duration: '30s', target: 1 },
+		{ duration: '2m', target: 3 },
+		{ duration: '30s', target: 1 },
+	],
+	thresholds: {
+		http_req_duration: ['p(95)<30000'], // LLM responses can be slow
+		errors: ['rate<0.10'],
+	},
+};
+
+const prompts = [
+	'Was ist die Hauptstadt von Deutschland? Antworte in einem Satz.',
+	'Erklaere Photosynthese in 2 Saetzen.',
+	'Schreibe ein kurzes Haiku ueber Programmierung.',
+	'Was ist der Unterschied zwischen TCP und UDP? Kurz.',
+	'Nenne 3 Vorteile von Self-Hosting.',
+];
+
+export default function () {
+	const prompt = prompts[Math.floor(Math.random() * prompts.length)];
+
+	// Non-streaming request for easier metrics
+	const res = http.post(
+		`${OLLAMA_URL}/api/generate`,
+		JSON.stringify({
+			model: MODEL,
+			prompt: prompt,
+			stream: false,
+			options: {
+				num_predict: 100, // Cap tokens to keep tests fast
+			},
+		}),
+		{
+			headers: { 'Content-Type': 'application/json' },
+			timeout: '60s',
+		}
+	);
+
+	const ok = check(res, {
+		'status is 200': (r) => r.status === 200,
+		'has response text': (r) => {
+			try {
+				const body = JSON.parse(r.body);
+				return body.response && body.response.length > 0;
+			} catch {
+				return false;
+			}
+		},
+	});
+
+	if (ok && res.status === 200) {
+		try {
+			const body = JSON.parse(res.body);
+			// Ollama returns eval_count and eval_duration
+			if (body.eval_count && body.eval_duration) {
+				const tps = body.eval_count / (body.eval_duration / 1e9);
+				tokensPerSec.add(tps);
+				totalTokens.add(body.eval_count);
+			}
+		} catch (_) {}
+	}
+
+	errorRate.add(!ok);
+
+	// Longer pause between LLM requests — realistic usage
+	sleep(Math.random() * 5 + 3);
+}
diff --git a/load-tests/sync-websocket.js b/load-tests/sync-websocket.js
new file mode 100644
index 000000000..495e1c705
--- /dev/null
+++ b/load-tests/sync-websocket.js
@@ -0,0 +1,84 @@
+/* eslint-disable no-undef, no-console, @typescript-eslint/no-unused-vars */
+import ws from 'k6/ws';
+import { check, sleep } from 'k6';
+import { Rate, Counter, Trend } from 'k6/metrics';
+
+const errorRate = new Rate('errors');
+const messagesReceived = new Counter('ws_messages_received');
+const messagesSent = new Counter('ws_messages_sent');
+const connectTime = new Trend('ws_connect_time', true);
+
+const SYNC_URL = __ENV.SYNC_URL || 'ws://localhost:3050';
+
+export const options = {
+	stages: [
+		{ duration: '30s', target: 10 },
+		{ duration: '3m', target: 30 },
+		{ duration: '30s', target: 0 },
+	],
+	thresholds: {
+		errors: ['rate<0.10'],
+		ws_connect_time: ['p(95)<1000'],
+	},
+};
+
+export default function () {
+	const url = `${SYNC_URL}/ws`;
+
+	const startTime = Date.now();
+	const res = ws.connect(url, {}, function (socket) {
+		const connected = Date.now() - startTime;
+		connectTime.add(connected);
+
+		socket.on('open', () => {
+			// Send a sync handshake (collection subscription)
+			const handshake = JSON.stringify({
+				type: 'subscribe',
+				collections: ['tasks', 'events', 'contacts'],
+				userId: `loadtest-vu-${__VU}`,
+				lastSyncTimestamp: new Date(Date.now() - 60000).toISOString(),
+			});
+			socket.send(handshake);
+			messagesSent.add(1);
+		});
+
+		socket.on('message', (data) => {
+			messagesReceived.add(1);
+
+			// Parse and validate sync messages
+			try {
+				const msg = JSON.parse(data);
+				check(msg, {
+					'has type field': (m) => m.type !== undefined,
+				});
+			} catch (_) {
+				// Binary or non-JSON message
+			}
+		});
+
+		socket.on('error', (e) => {
+			errorRate.add(true);
+			console.error(`WS error VU ${__VU}: ${e.error()}`);
+		});
+
+		// Keep connection alive for 10-30 seconds (simulates real user session)
+		const sessionDuration = Math.random() * 20 + 10;
+
+		// Send periodic sync pings
+		const pingInterval = setInterval(() => {
+			socket.send(JSON.stringify({ type: 'ping' }));
+			messagesSent.add(1);
+		}, 5000);
+
+		sleep(sessionDuration);
+		clearInterval(pingInterval);
+		socket.close();
+	});
+
+	const ok = check(res, {
+		'WS connection status is 101': (r) => r && r.status === 101,
+	});
+	errorRate.add(!ok);
+
+	sleep(Math.random() * 2 + 1);
+}
diff --git a/load-tests/web-apps.js b/load-tests/web-apps.js
new file mode 100644
index 000000000..7305a4ea5
--- /dev/null
+++ b/load-tests/web-apps.js
@@ -0,0 +1,72 @@
+/* eslint-disable no-undef */
+import http from 'k6/http';
+import { check, sleep } from 'k6';
+import { Rate, Trend } from 'k6/metrics';
+
+const errorRate = new Rate('errors');
+const appLatency = new Trend('app_latency', true);
+
+const BASE = __ENV.BASE_URL || 'http://localhost';
+
+// All deployed SvelteKit web apps with their ports
+const apps = [
+	{ name: 'dashboard', url: `${BASE}:5173` },
+	{ name: 'chat', url: `${BASE}:3000` },
+	{ name: 'todo', url: `${BASE}:5188` },
+	{ name: 'zitare', url: `${BASE}:5185` },
+	{ name: 'calendar', url: `${BASE}:5186` },
+	{ name: 'clock', url: `${BASE}:5187` },
+	{ name: 'contacts', url: `${BASE}:5176` },
+	{ name: 'storage', url: `${BASE}:5178` },
+	{ name: 'presi', url: `${BASE}:5180` },
+	{ name: 'manadeck', url: `${BASE}:5181` },
+	{ name: 'nutriphi', url: `${BASE}:5182` },
+	{ name: 'skilltree', url: `${BASE}:5183` },
+	{ name: 'photos', url: `${BASE}:5184` },
+	{ name: 'mukke', url: `${BASE}:5189` },
+	{ name: 'citycorners', url: `${BASE}:5190` },
+	{ name: 'picture', url: `${BASE}:5174` },
+	{ name: 'inventar', url: `${BASE}:5191` },
+];
+
+// When testing against production, use subdomains
+const prodApps = [
+	{ name: 'dashboard', url: 'https://mana.how' },
+	{ name: 'chat', url: 'https://chat.mana.how' },
+	{ name: 'todo', url: 'https://todo.mana.how' },
+	{ name: 'calendar', url: 'https://calendar.mana.how' },
+	{ name: 'clock', url: 'https://clock.mana.how' },
+];
+
+export const options = {
+	stages: [
+		{ duration: '30s', target: 10 }, // Ramp up
+		{ duration: '3m', target: 50 }, // Hold at 50 VUs
+		{ duration: '30s', target: 0 }, // Ramp down
+	],
+	thresholds: {
+		http_req_duration: ['p(95)<2000'], // 95% under 2s
+		errors: ['rate<0.05'], // <5% errors
+	},
+};
+
+export default function () {
+	const targets = __ENV.BASE_URL?.startsWith('https') ? prodApps : apps;
+	const app = targets[Math.floor(Math.random() * targets.length)];
+
+	const res = http.get(app.url, {
+		tags: { app: app.name },
+		timeout: '10s',
+	});
+
+	const success = check(res, {
+		'status is 200': (r) => r.status === 200,
+		'response has body': (r) => r.body && r.body.length > 0,
+		'response time < 2s': (r) => r.timings.duration < 2000,
+	});
+
+	errorRate.add(!success);
+	appLatency.add(res.timings.duration, { app: app.name });
+
+	sleep(Math.random() * 2 + 0.5); // 0.5-2.5s between requests
+}
diff --git a/services/mana-auth/src/index.ts b/services/mana-auth/src/index.ts
index 0ab14b7c7..3384d138f 100644
--- a/services/mana-auth/src/index.ts
+++ b/services/mana-auth/src/index.ts
@@ -15,6 +15,7 @@ import { jwtAuth } from './middleware/jwt-auth';
 import { serviceAuth } from './middleware/service-auth';
 import { initializeEmail } from './email/send';
 import { SecurityEventsService, AccountLockoutService } from './services/security';
+import { SignupLimitService } from './services/signup-limit';
 import { ApiKeysService } from './services/api-keys';
 import { createAuthRoutes } from './routes/auth';
 import { createGuildRoutes } from './routes/guilds';
@@ -31,6 +32,7 @@ const auth = createBetterAuth(config.databaseUrl);
 initializeEmail(config.smtp);
 const security = new SecurityEventsService(db);
 const lockout = new AccountLockoutService(db);
+const signupLimit = new SignupLimitService(db);
 const apiKeysService = new ApiKeysService(db);
 
 // ─── App ────────────────────────────────────────────────────
@@ -61,7 +63,7 @@ app.get('/.well-known/openid-configuration', async (c) => auth.handler(c.req.raw
 
 // ─── Custom Auth Endpoints ──────────────────────────────────
 
-app.route('/api/v1/auth', createAuthRoutes(auth, config, security, lockout));
+app.route('/api/v1/auth', createAuthRoutes(auth, config, security, lockout, signupLimit));
 
 // ─── Guilds ─────────────────────────────────────────────────
 
diff --git a/services/mana-auth/src/routes/auth.ts b/services/mana-auth/src/routes/auth.ts
index 6487ab7a0..7ba155b83 100644
--- a/services/mana-auth/src/routes/auth.ts
+++ b/services/mana-auth/src/routes/auth.ts
@@ -9,6 +9,7 @@ import { Hono } from 'hono';
 import type { AuthUser } from '../middleware/jwt-auth';
 import type { BetterAuthInstance } from '../auth/better-auth.config';
 import type { SecurityEventsService, AccountLockoutService } from '../services/security';
+import type { SignupLimitService } from '../services/signup-limit';
 import type { Config } from '../config';
 import { sourceAppStore, passwordResetRedirectStore } from '../auth/stores';
 
@@ -16,15 +17,37 @@ export function createAuthRoutes(
 	auth: BetterAuthInstance,
 	config: Config,
 	security: SecurityEventsService,
-	lockout: AccountLockoutService
+	lockout: AccountLockoutService,
+	signupLimit: SignupLimitService
 ) {
 	const app = new Hono<{ Variables: { user: AuthUser } }>();
 
 	// ─── Registration ────────────────────────────────────────
 
+	// ─── Signup Status (public) ─────────────────────────────
+
+	app.get('/signup-status', async (c) => {
+		const status = await signupLimit.getStatus();
+		return c.json(status);
+	});
+
 	app.post('/register', async (c) => {
 		const body = await c.req.json();
 
+		// Check daily signup limit
+		const limitCheck = await signupLimit.checkLimit();
+		if (!limitCheck.allowed) {
+			return c.json(
+				{
+					error: 'Registration limit reached',
+					message: 'Das tägliche Registrierungslimit ist erreicht. Versuche es morgen wieder.',
+					spotsRemaining: 0,
+					resetsAt: limitCheck.resetsAt,
+				},
+				429
+			);
+		}
+
 		// Store source app URL for email verification redirect
 		if (body.sourceAppUrl && body.email) {
 			sourceAppStore.set(body.email, body.sourceAppUrl);
diff --git a/services/mana-auth/src/services/signup-limit.ts b/services/mana-auth/src/services/signup-limit.ts
new file mode 100644
index 000000000..365e7cdf9
--- /dev/null
+++ b/services/mana-auth/src/services/signup-limit.ts
@@ -0,0 +1,93 @@
+/**
+ * Signup Limit — Daily registration cap ("Organic Growth Gate")
+ *
+ * Limits new registrations per day to protect hardware and
+ * enable organic growth. Uses PostgreSQL security_events table
+ * (no Redis dependency needed).
+ *
+ * Configure via MAX_DAILY_SIGNUPS env var (default: 0 = unlimited).
+ */
+
+import { sql } from 'drizzle-orm';
+import type { Database } from '../db/connection';
+
+export class SignupLimitService {
+	private maxDaily: number;
+
+	constructor(private db: Database) {
+		this.maxDaily = parseInt(process.env.MAX_DAILY_SIGNUPS || '0', 10);
+	}
+
+	/** Check if registration is allowed right now */
+	async checkLimit(): Promise<{
+		allowed: boolean;
+		current: number;
+		limit: number;
+		resetsAt: string;
+	}> {
+		// 0 = unlimited (feature disabled)
+		if (this.maxDaily <= 0) {
+			return { allowed: true, current: 0, limit: 0, resetsAt: '' };
+		}
+
+		const todayCount = await this.getTodayCount();
+		const midnight = new Date();
+		midnight.setHours(24, 0, 0, 0);
+
+		return {
+			allowed: todayCount < this.maxDaily,
+			current: todayCount,
+			limit: this.maxDaily,
+			resetsAt: midnight.toISOString(),
+		};
+	}
+
+	/** Count registrations today (UTC) */
+	private async getTodayCount(): Promise<number> {
+		try {
+			const result = await this.db.execute(
+				sql`SELECT COUNT(*) as count
+				FROM auth.security_events
+				WHERE event_type = 'REGISTER'
+				AND created_at >= CURRENT_DATE
+				AND created_at < CURRENT_DATE + INTERVAL '1 day'`
+			);
+			const row = (result as any)[0];
+			return row ? Number(row.count) : 0;
+		} catch {
+			// On error, allow registration (fail open)
+			return 0;
+		}
+	}
+
+	/** Public status for the signup page */
+	async getStatus(): Promise<{
+		registrationOpen: boolean;
+		spotsRemaining: number | null;
+		totalToday: number;
+		limit: number;
+		resetsAt: string;
+	}> {
+		if (this.maxDaily <= 0) {
+			return {
+				registrationOpen: true,
+				spotsRemaining: null,
+				totalToday: 0,
+				limit: 0,
+				resetsAt: '',
+			};
+		}
+
+		const todayCount = await this.getTodayCount();
+		const midnight = new Date();
+		midnight.setHours(24, 0, 0, 0);
+
+		return {
+			registrationOpen: todayCount < this.maxDaily,
+			spotsRemaining: Math.max(0, this.maxDaily - todayCount),
+			totalToday: todayCount,
+			limit: this.maxDaily,
+			resetsAt: midnight.toISOString(),
+		};
+	}
+}