From 029c7973ef07758cb28b476227d44b8ec2190e1e Mon Sep 17 00:00:00 2001
From: Till JS <tills95@gmail.com>
Date: Wed, 8 Apr 2026 16:40:26 +0200
Subject: [PATCH] feat(mana/web): pass MANA_LLM_API_KEY from voice parse
 proxies
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The /api/v1/voice/parse-task and /api/v1/voice/parse-habit endpoints
forwarded transcripts to mana-llm without an X-API-Key header. This
worked against the local mana-llm container (no auth) but silently
fell back to the no-LLM path when pointed at gpu-llm.mana.how, which
requires an API key — voice quick-add would look like it was running
in degraded mode forever with no signal that auth was the cause.

Now both endpoints read MANA_LLM_API_KEY from the server-side env and
attach it as X-API-Key when present, mirroring the pattern already
used by /api/v1/voice/transcribe for mana-stt. When the var is empty
the header is omitted, so local Docker setups without auth still work.

Plumbing: generate-env.mjs writes MANA_LLM_URL + MANA_LLM_API_KEY into
apps/mana/apps/web/.env, .env.development gets the new keys with empty
defaults, ENVIRONMENT_VARIABLES.md documents the gateway and where to
get a key.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 .env.development                              |  6 +++++
 .../api/v1/voice/parse-habit/+server.ts       |  5 ++++-
 .../routes/api/v1/voice/parse-task/+server.ts |  5 ++++-
 docs/ENVIRONMENT_VARIABLES.md                 | 22 +++++++++++++++++++
 scripts/generate-env.mjs                      |  5 +++++
 5 files changed, 41 insertions(+), 2 deletions(-)

diff --git a/.env.development b/.env.development
index 0593305a3..8f4e69db1 100644
--- a/.env.development
+++ b/.env.development
@@ -167,6 +167,12 @@ OPENROUTER_API_KEY=sk-or-v1-5bcd6de8d88ed9b7211230892df44764b2013d57d4d3c14ec302
 # Or set to direct URL if Ollama is exposed (e.g., https://ollama.mana.how)
 OLLAMA_URL=http://localhost:11434
 
+# mana-llm (OpenAI-compatible gateway, port 3025 locally / llm.mana.how prod)
+# Used by server-side voice quick-add proxies (parse-task, parse-habit).
+# API key is required when pointing at the GPU LLM proxy (gpu-llm.mana.how).
+MANA_LLM_URL=http://localhost:3025
+MANA_LLM_API_KEY=
+
 # ============================================
 # MAERCHENZAUBER PROJECT
 # ============================================
diff --git a/apps/mana/apps/web/src/routes/api/v1/voice/parse-habit/+server.ts b/apps/mana/apps/web/src/routes/api/v1/voice/parse-habit/+server.ts
index a47134777..d0c4c3217 100644
--- a/apps/mana/apps/web/src/routes/api/v1/voice/parse-habit/+server.ts
+++ b/apps/mana/apps/web/src/routes/api/v1/voice/parse-habit/+server.ts
@@ -107,14 +107,17 @@ export const POST: RequestHandler = async ({ request }) => {
 	if (!transcript || habits.length === 0) return json(fallback());
 
 	const llmUrl = env.MANA_LLM_URL || env.PUBLIC_MANA_LLM_URL || 'http://localhost:3025';
+	const apiKey = env.MANA_LLM_API_KEY;
 
 	let response: Response;
 	const controller = new AbortController();
 	const timer = setTimeout(() => controller.abort(), LLM_TIMEOUT_MS);
 	try {
+		const headers: Record<string, string> = { 'Content-Type': 'application/json' };
+		if (apiKey) headers['X-API-Key'] = apiKey;
 		response = await fetch(`${llmUrl.replace(/\/$/, '')}/v1/chat/completions`, {
 			method: 'POST',
-			headers: { 'Content-Type': 'application/json' },
+			headers,
 			signal: controller.signal,
 			body: JSON.stringify({
 				model: DEFAULT_MODEL,
diff --git a/apps/mana/apps/web/src/routes/api/v1/voice/parse-task/+server.ts b/apps/mana/apps/web/src/routes/api/v1/voice/parse-task/+server.ts
index 93a9ec659..aa5a15483 100644
--- a/apps/mana/apps/web/src/routes/api/v1/voice/parse-task/+server.ts
+++ b/apps/mana/apps/web/src/routes/api/v1/voice/parse-task/+server.ts
@@ -101,14 +101,17 @@ export const POST: RequestHandler = async ({ request }) => {
 	if (!transcript) return json(fallback(''));
 
 	const llmUrl = env.MANA_LLM_URL || env.PUBLIC_MANA_LLM_URL || 'http://localhost:3025';
+	const apiKey = env.MANA_LLM_API_KEY;
 
 	let response: Response;
 	const controller = new AbortController();
 	const timer = setTimeout(() => controller.abort(), LLM_TIMEOUT_MS);
 	try {
+		const headers: Record<string, string> = { 'Content-Type': 'application/json' };
+		if (apiKey) headers['X-API-Key'] = apiKey;
 		response = await fetch(`${llmUrl.replace(/\/$/, '')}/v1/chat/completions`, {
 			method: 'POST',
-			headers: { 'Content-Type': 'application/json' },
+			headers,
 			signal: controller.signal,
 			body: JSON.stringify({
 				model: DEFAULT_MODEL,
diff --git a/docs/ENVIRONMENT_VARIABLES.md b/docs/ENVIRONMENT_VARIABLES.md
index 71ffc4bd1..e46f41367 100644
--- a/docs/ENVIRONMENT_VARIABLES.md
+++ b/docs/ENVIRONMENT_VARIABLES.md
@@ -153,6 +153,28 @@ curl https://gpu-stt.mana.how/health
 If this returns 502, see "GPU Tunnel" in `docs/MAC_MINI_SERVER.md` for the standard
 debug ladder.
 
+### LLM gateway (mana-llm)
+
+Used by the unified Mana web app's voice quick-add features to turn transcripts into structured
+data: `/api/v1/voice/parse-task` (todo titles + due dates + priorities) and `/api/v1/voice/parse-habit`
+(habit picker for voice logging). Both proxies live server-side and degrade gracefully — if
+mana-llm is unreachable or unauthorized, the endpoints return a fallback shape and voice quick-add
+still works, just without LLM enrichment.
+
+| Variable | Description | Default |
+|----------|-------------|---------|
+| `MANA_LLM_URL` | mana-llm gateway URL (server-side, never exposed) | `http://localhost:3025` |
+| `MANA_LLM_API_KEY` | API key — required when pointing at the GPU LLM proxy. **Never commit a real value.** | _(empty)_ |
+| `PUBLIC_MANA_LLM_URL` | Same URL exposed to the browser for direct use (status page, playground) | mirrors `MANA_LLM_URL` |
+
+**Local dev**: leave `MANA_LLM_URL=http://localhost:3025` and run mana-llm in Docker. If your local
+mana-llm has no models loaded (`curl http://localhost:3025/v1/models` returns `{"data":[]}`), point
+at the public proxy with `MANA_LLM_URL=https://gpu-llm.mana.how` and set `MANA_LLM_API_KEY` to a key
+from `services/mana-llm/.env` on the GPU box.
+
+**Endpoints:** `http://localhost:3025` (Docker), `https://llm.mana.how` (Mac Mini, no auth),
+`https://gpu-llm.mana.how` (GPU server, X-API-Key required).
+
 ## Adding New Variables
 
 ### Step 1: Add to `.env.development`
diff --git a/scripts/generate-env.mjs b/scripts/generate-env.mjs
index 2d1a02244..e236ff0d0 100644
--- a/scripts/generate-env.mjs
+++ b/scripts/generate-env.mjs
@@ -129,6 +129,11 @@ const APP_CONFIGS = [
 			// Speech-to-Text proxy (server-side only, never exposed to the client)
 			MANA_STT_URL: (env) => env.STT_URL || 'http://localhost:3020',
 			MANA_STT_API_KEY: (env) => env.MANA_STT_API_KEY || '',
+			// LLM proxy for /api/v1/voice/parse-task and /api/v1/voice/parse-habit
+			// (server-side only). The fallback path inside those endpoints
+			// keeps voice quick-add usable when this is unset.
+			MANA_LLM_URL: (env) => env.MANA_LLM_URL || 'http://localhost:3025',
+			MANA_LLM_API_KEY: (env) => env.MANA_LLM_API_KEY || '',
 		},
 	},