feat(writing): M3 — one-shot prose generation via mana-llm

Server: - New llmText() helper in apps/api/src/lib/llm.ts for plain-text (non-streaming) completions with token-usage reporting. - POST /api/v1/writing/generations (Hono + requireTier('beta')) accepts system+user prompts, forwards to mana-llm (default model ollama/gemma3:4b), returns raw output + model + tokenUsage. The endpoint is stateless — draft/version bookkeeping is entirely client-side so the same route serves refinement calls later. Client: - writing/api.ts — Bearer-authed fetch client (follows the food/ news-research pattern). - writing/utils/prompt-builder.ts — pure builder turning a briefing (+ optional style preset / extracted principles) into a system+user pair. Forbids preamble / sign-off / meta commentary so the output is ready to paste into a version. - writing/stores/generations.svelte.ts — orchestrates the full flow: queued → running → call → new LocalDraftVersion → pointer flip → succeeded. On failure leaves the current version untouched with the error on the generation record. Emits WritingDraftGenerationStarted / WritingDraftVersionCreated / WritingDraftGenerationFailed events. UI: - Generate button in DetailView.svelte (label flips "Generate" / "Neu generieren" based on whether the draft already has content). - GenerationStatus.svelte strip surfaces queued / running / failed with model + duration badges; succeeded generations auto-disappear because the new version is already live via the currentVersionId pointer. M3 is synchronous and non-streaming by design. M7 adds mission-based long-form with streaming + outline stage + reference injection. M6 will reuse the same /generations endpoint for selection-refinement prompts. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-14 19:41:09 +02:00 · 2026-04-24 15:11:48 +02:00 · 2026-04-24 15:11:48 +02:00 · d725a8df8b
commit d725a8df8b
parent 3c3b2ebbc7
9 changed files with 814 additions and 11 deletions
--- a/apps/api/src/index.ts
+++ b/apps/api/src/index.ts
@ -42,6 +42,7 @@ import { newsRoutes } from './modules/news/routes';
 import { newsResearchRoutes } from './modules/news-research/routes';
 import { articlesRoutes } from './modules/articles/routes';
 import { tracesRoutes } from './modules/traces/routes';
+import { writingRoutes } from './modules/writing/routes';
 import { presiRoutes } from './modules/presi/routes';
 import { researchRoutes } from './modules/research/routes';
 import { whoRoutes } from './modules/who/routes';
@ -96,6 +97,7 @@ const RESOURCE_MODULES = [
 	'research',
 	'traces',
 	'who',
+	'writing',
 ] as const;
 for (const mod of RESOURCE_MODULES) {
 	app.use(`/api/v1/${mod}/*`, requireTier('beta'));
@ -131,6 +133,7 @@ app.route('/api/v1/presi', presiRoutes);
 app.route('/api/v1/research', researchRoutes);
 app.route('/api/v1/website', websiteRoutes);
 app.route('/api/v1/who', whoRoutes);
+app.route('/api/v1/writing', writingRoutes);

 // ─── Server Info ────────────────────────────────────────────
 console.log(`mana-api starting on port ${PORT}...`);
--- a/apps/api/src/lib/llm.ts
+++ b/apps/api/src/lib/llm.ts
@ -31,6 +31,15 @@ export interface LlmJsonOptions {
 	maxTokens?: number;
 }

+export interface LlmTextOptions {
+	model: string;
+	system?: string;
+	user: string;
+	temperature?: number;
+	maxTokens?: number;
+	signal?: AbortSignal;
+}
+
 export interface LlmStreamOptions {
 	model: string;
 	system?: string;
@ -101,6 +110,56 @@ export async function llmJson<T = unknown>(opts: LlmJsonOptions): Promise<T> {
 	}
 }

+/**
+ * Call the LLM and return the raw text content — no JSON parsing, no
+ * streaming. Used when you want a finished prose artifact (a generated
+ * draft, a summary, a translation) as one string. Includes token usage
+ * when the provider reports it so generation records can store it.
+ */
+export interface LlmTextResult {
+	text: string;
+	tokenUsage?: { input: number; output: number };
+	model: string;
+}
+
+export async function llmText(opts: LlmTextOptions): Promise<LlmTextResult> {
+	const res = await fetch(`${LLM_URL}/v1/chat/completions`, {
+		method: 'POST',
+		headers: { 'Content-Type': 'application/json' },
+		body: JSON.stringify({
+			model: opts.model,
+			messages: buildMessages(opts.system, opts.user),
+			temperature: opts.temperature ?? 0.7,
+			max_tokens: opts.maxTokens ?? 2000,
+		}),
+		signal: opts.signal,
+	});
+
+	if (!res.ok) {
+		const body = await res.text().catch(() => '');
+		throw new LlmError(`mana-llm returned ${res.status}`, res.status, body);
+	}
+
+	const data = (await res.json()) as {
+		choices?: Array<{ message?: { content?: string } }>;
+		usage?: { prompt_tokens?: number; completion_tokens?: number };
+		model?: string;
+	};
+	const text = data.choices?.[0]?.message?.content;
+	if (!text) throw new LlmError('mana-llm response missing content');
+	return {
+		text: text.trim(),
+		tokenUsage:
+			data.usage && typeof data.usage.prompt_tokens === 'number'
+				? {
+						input: data.usage.prompt_tokens ?? 0,
+						output: data.usage.completion_tokens ?? 0,
+					}
+				: undefined,
+		model: data.model ?? opts.model,
+	};
+}
+
 /**
 * Call the LLM in streaming mode. Invokes onToken() for each delta and
 * returns the full concatenated text once the stream completes.
--- a/apps/api/src/modules/writing/routes.ts
+++ b/apps/api/src/modules/writing/routes.ts
@ -0,0 +1,94 @@
+/**
+ * Writing module — one-shot prose generation against mana-llm.
+ *
+ * M3 scope: the client sends a fully-built prompt (system + user), we
+ * round-trip to mana-llm and return the raw completion text. Draft +
+ * version bookkeeping stays entirely client-side — the browser writes
+ * the returned text into a new LocalDraftVersion via the generations
+ * store. This keeps the server stateless and lets the same endpoint
+ * serve refinement calls later (shorten / expand / tone).
+ *
+ * Later milestones:
+ *   M6 — selection-refinement tools will call this same endpoint with
+ *        different system/user prompts (shorten, expand, change tone).
+ *   M7 — long-form drafts flip to mana-ai missions with streaming; the
+ *        sync endpoint here stays for short-form as a fast path.
+ */
+
+import { Hono } from 'hono';
+import { llmText, LlmError } from '../../lib/llm';
+import { logger, type AuthVariables } from '@mana/shared-hono';
+
+const DEFAULT_MODEL = process.env.WRITING_MODEL || 'ollama/gemma3:4b';
+
+/** Hard cap so a runaway briefing can't burn unlimited tokens. */
+const MAX_OUTPUT_TOKENS = 8000;
+
+interface GenerationRequest {
+	systemPrompt?: string;
+	userPrompt: string;
+	/** Kind discriminator — logged for observability, not used for routing. */
+	kind?: string;
+	/** Ghostwriter default 0.7; selection-refinements might want 0.3. */
+	temperature?: number;
+	/** Token ceiling. Server clamps to MAX_OUTPUT_TOKENS. */
+	maxTokens?: number;
+	/** Optional model override — most callers leave this unset. */
+	model?: string;
+}
+
+const routes = new Hono<{ Variables: AuthVariables }>();
+
+routes.post('/generations', async (c) => {
+	const userId = c.get('userId');
+	const body = (await c.req.json()) as Partial<GenerationRequest>;
+
+	if (!body.userPrompt || typeof body.userPrompt !== 'string') {
+		return c.json({ error: 'userPrompt required' }, 400);
+	}
+
+	const maxTokens = Math.min(MAX_OUTPUT_TOKENS, Math.max(64, body.maxTokens ?? 2000));
+	const temperature =
+		typeof body.temperature === 'number' ? Math.max(0, Math.min(1.2, body.temperature)) : 0.7;
+	const model = body.model || DEFAULT_MODEL;
+
+	const startedAt = Date.now();
+	try {
+		const result = await llmText({
+			model,
+			system: body.systemPrompt,
+			user: body.userPrompt,
+			temperature,
+			maxTokens,
+		});
+		const durationMs = Date.now() - startedAt;
+		logger.info('writing.generation_ok', {
+			userId,
+			kind: body.kind,
+			model: result.model,
+			outputChars: result.text.length,
+			tokenUsage: result.tokenUsage,
+			durationMs,
+		});
+		return c.json({
+			output: result.text,
+			model: result.model,
+			tokenUsage: result.tokenUsage,
+			durationMs,
+		});
+	} catch (err) {
+		const durationMs = Date.now() - startedAt;
+		const message = err instanceof Error ? err.message : String(err);
+		logger.error('writing.generation_failed', {
+			userId,
+			kind: body.kind,
+			model,
+			error: message,
+			status: err instanceof LlmError ? err.status : undefined,
+			durationMs,
+		});
+		return c.json({ error: 'Generation failed', detail: message, durationMs }, 500);
+	}
+});
+
+export { routes as writingRoutes };
--- a/apps/mana/apps/web/src/lib/modules/writing/api.ts
+++ b/apps/mana/apps/web/src/lib/modules/writing/api.ts
@ -0,0 +1,54 @@
+/**
+ * Writing — server-only API client. Browser → mana-api → mana-llm.
+ *
+ * CRUD of drafts/versions/generations stays local-first (IndexedDB
+ * + sync). This module talks to mana-api for the one operation that
+ * needs a server-side LLM round-trip: generating the prose itself.
+ * Everything before and after the fetch — briefing storage, prompt
+ * composition, version bookkeeping — lives on the client.
+ */
+
+import { authStore } from '$lib/stores/auth.svelte';
+import { getManaApiUrl } from '$lib/api/config';
+
+export interface GenerateDraftRequest {
+	systemPrompt?: string;
+	userPrompt: string;
+	/** Plaintext kind discriminator forwarded for server-side logging. */
+	kind?: string;
+	temperature?: number;
+	maxTokens?: number;
+	model?: string;
+}
+
+export interface GenerateDraftResponse {
+	output: string;
+	model: string;
+	tokenUsage?: { input: number; output: number };
+	durationMs: number;
+}
+
+async function authHeader(): Promise<Record<string, string>> {
+	const token = await authStore.getValidToken();
+	return token ? { Authorization: `Bearer ${token}` } : {};
+}
+
+export async function callWritingGeneration(
+	req: GenerateDraftRequest,
+	signal?: AbortSignal
+): Promise<GenerateDraftResponse> {
+	const res = await fetch(`${getManaApiUrl()}/api/v1/writing/generations`, {
+		method: 'POST',
+		headers: {
+			'Content-Type': 'application/json',
+			...(await authHeader()),
+		},
+		body: JSON.stringify(req),
+		signal,
+	});
+	if (!res.ok) {
+		const body = await res.text().catch(() => '');
+		throw new Error(`Generation failed (${res.status}): ${body || res.statusText}`);
+	}
+	return res.json() as Promise<GenerateDraftResponse>;
+}
--- a/apps/mana/apps/web/src/lib/modules/writing/components/GenerationStatus.svelte
+++ b/apps/mana/apps/web/src/lib/modules/writing/components/GenerationStatus.svelte
@ -0,0 +1,111 @@
+<!--
+  Compact status strip for the currently-running (or most-recent) generation.
+  In M3 we only render when a generation is queued/running/failed — on
+  success the new version auto-replaces the editor content via the
+  currentVersionId pointer, so there's nothing to show here.
+-->
+<script lang="ts">
+	import { GENERATION_STATUS_LABELS } from '../constants';
+	import type { Generation } from '../types';
+
+	let {
+		generation,
+		ondismiss,
+	}: {
+		generation: Generation;
+		ondismiss?: () => void;
+	} = $props();
+
+	const isError = $derived(generation.status === 'failed');
+	const isDone = $derived(generation.status === 'succeeded' || generation.status === 'cancelled');
+	const label = $derived(GENERATION_STATUS_LABELS[generation.status].de);
+</script>
+
+<aside class="status" class:error={isError} class:running={!isDone && !isError}>
+	<div class="row">
+		<span class="dot" aria-hidden="true"></span>
+		<strong>{label}</strong>
+		{#if generation.model}
+			<span class="meta">· {generation.model}</span>
+		{/if}
+		{#if generation.durationMs}
+			<span class="meta">· {(generation.durationMs / 1000).toFixed(1)}s</span>
+		{/if}
+		{#if ondismiss && (isDone || isError)}
+			<button type="button" class="dismiss" onclick={ondismiss}>×</button>
+		{/if}
+	</div>
+	{#if generation.error}
+		<p class="err-msg">{generation.error}</p>
+	{/if}
+</aside>
+
+<style>
+	.status {
+		display: flex;
+		flex-direction: column;
+		gap: 0.25rem;
+		padding: 0.5rem 0.75rem;
+		border-radius: 0.55rem;
+		border: 1px solid var(--color-border, rgba(0, 0, 0, 0.08));
+		background: var(--color-surface, rgba(255, 255, 255, 0.04));
+		font-size: 0.85rem;
+	}
+	.status.running {
+		border-color: color-mix(in srgb, #0ea5e9 40%, transparent);
+		background: color-mix(in srgb, #0ea5e9 6%, transparent);
+	}
+	.status.error {
+		border-color: color-mix(in srgb, #ef4444 50%, transparent);
+		background: color-mix(in srgb, #ef4444 6%, transparent);
+	}
+	.row {
+		display: flex;
+		align-items: center;
+		gap: 0.4rem;
+	}
+	.dot {
+		width: 0.55rem;
+		height: 0.55rem;
+		border-radius: 999px;
+		background: #94a3b8;
+	}
+	.status.running .dot {
+		background: #0ea5e9;
+		animation: pulse 1.1s ease-in-out infinite;
+	}
+	.status.error .dot {
+		background: #ef4444;
+	}
+	@keyframes pulse {
+		0%,
+		100% {
+			transform: scale(1);
+			opacity: 1;
+		}
+		50% {
+			transform: scale(0.7);
+			opacity: 0.5;
+		}
+	}
+	.meta {
+		color: var(--color-text-muted, rgba(0, 0, 0, 0.55));
+		font-size: 0.8rem;
+	}
+	.err-msg {
+		margin: 0;
+		color: #ef4444;
+		font-size: 0.8rem;
+		line-height: 1.35;
+	}
+	.dismiss {
+		margin-left: auto;
+		padding: 0 0.4rem;
+		background: transparent;
+		border: none;
+		font-size: 1.1rem;
+		cursor: pointer;
+		color: inherit;
+		line-height: 1;
+	}
+</style>
--- a/apps/mana/apps/web/src/lib/modules/writing/index.ts
+++ b/apps/mana/apps/web/src/lib/modules/writing/index.ts
@ -8,6 +8,15 @@ export type { CreateDraftInput, UpdateDraftPatch } from './stores/drafts.svelte'
 export { stylesStore } from './stores/styles.svelte';
 export type { CreateStyleInput, UpdateStylePatch } from './stores/styles.svelte';

+export { generationsStore } from './stores/generations.svelte';
+export type { StartDraftGenerationOptions } from './stores/generations.svelte';
+
+export { callWritingGeneration } from './api';
+export type { GenerateDraftRequest, GenerateDraftResponse } from './api';
+
+export { buildDraftPrompt, estimateMaxTokens } from './utils/prompt-builder';
+export type { PromptPair, BuildDraftPromptInput } from './utils/prompt-builder';
+
 export {
 	useAllDrafts,
 	useDraft,
--- a/apps/mana/apps/web/src/lib/modules/writing/stores/generations.svelte.ts
+++ b/apps/mana/apps/web/src/lib/modules/writing/stores/generations.svelte.ts
@ -0,0 +1,231 @@
+/**
+ * Writing generations store — orchestrates the "Generate" button end-to-end.
+ *
+ * startDraftGeneration flow:
+ *   1. Write a LocalGeneration with status='queued' → UI shows pending.
+ *   2. Build the prompt from the draft's briefing + any attached style.
+ *   3. Flip to status='running', call the mana-api /generations endpoint.
+ *   4. On success: create a new LocalDraftVersion with the output, point
+ *      the draft at it (currentVersionId flip), mark generation succeeded
+ *      and link it to the version.
+ *   5. On failure: mark generation failed with the error message so the
+ *      UI can surface it; leave the current version untouched.
+ *
+ * Selection-refinements (M6) will add a second entrypoint that writes
+ * back into the same current version in-place.
+ */
+
+import { encryptRecord } from '$lib/data/crypto';
+import { emitDomainEvent } from '$lib/data/events';
+import { generationTable, draftTable, draftVersionTable, writingStyleTable } from '../collections';
+import { callWritingGeneration } from '../api';
+import { buildDraftPrompt, estimateMaxTokens } from '../utils/prompt-builder';
+import { getStylePreset } from '../presets/styles';
+import type {
+	LocalDraftVersion,
+	LocalGeneration,
+	LocalWritingStyle,
+	GenerationKind,
+	GenerationProvider,
+} from '../types';
+
+const PROVIDER: GenerationProvider = 'mana-llm';
+
+function wordCountOf(text: string): number {
+	const trimmed = text.trim();
+	if (!trimmed) return 0;
+	return trimmed.split(/\s+/).length;
+}
+
+async function loadStyle(styleId: string | null | undefined): Promise<LocalWritingStyle | null> {
+	if (!styleId) return null;
+	const row = await writingStyleTable.get(styleId);
+	return row && !row.deletedAt ? row : null;
+}
+
+async function nextVersionNumber(draftId: string): Promise<number> {
+	const existing = await draftVersionTable.where('draftId').equals(draftId).toArray();
+	return Math.max(0, ...existing.map((v) => v.versionNumber)) + 1;
+}
+
+export interface StartDraftGenerationOptions {
+	/** Override the default ghostwriter temperature (0.7). */
+	temperature?: number;
+	/** Override the auto-computed max-token ceiling. */
+	maxTokens?: number;
+	/** Override the default model. Leave unset to use the server default. */
+	model?: string;
+}
+
+export const generationsStore = {
+	/**
+	 * Generate a fresh draft from the briefing attached to the draft.
+	 * Writes a new LocalDraftVersion and points the draft at it on success.
+	 * Returns the generation id so the caller can subscribe for UI status.
+	 */
+	async startDraftGeneration(
+		draftId: string,
+		opts: StartDraftGenerationOptions = {}
+	): Promise<string> {
+		const draft = await draftTable.get(draftId);
+		if (!draft) throw new Error(`Draft ${draftId} not found`);
+
+		const generationId = crypto.randomUUID();
+		const kind: GenerationKind =
+			draft.currentVersionId &&
+			(await draftVersionTable.get(draft.currentVersionId))?.content?.trim()
+				? 'full-regenerate'
+				: 'draft-from-brief';
+		const style = await loadStyle(draft.styleId);
+		const stylePreset =
+			style?.source === 'preset' && style.presetId ? getStylePreset(style.presetId) : undefined;
+
+		const { system, user } = buildDraftPrompt({
+			kind: draft.kind,
+			title: draft.title,
+			briefing: draft.briefing,
+			stylePreset,
+			styleExtracted: style?.extractedPrinciples ?? undefined,
+		});
+
+		const maxTokens = opts.maxTokens ?? estimateMaxTokens(draft.briefing);
+		const temperature = opts.temperature ?? 0.7;
+
+		// 1. Queued record. Prompt is stored so a later audit knows exactly
+		//    what went to the model; it's encrypted alongside the output.
+		const now = new Date().toISOString();
+		const queued: LocalGeneration = {
+			id: generationId,
+			draftId,
+			kind,
+			status: 'queued',
+			prompt: `SYSTEM:\n${system}\n\nUSER:\n${user}`,
+			provider: PROVIDER,
+			model: opts.model ?? null,
+			params: { temperature, maxTokens },
+			inputSelection: null,
+			output: null,
+			outputVersionId: null,
+			startedAt: null,
+			completedAt: null,
+			durationMs: null,
+			tokenUsage: null,
+			error: null,
+			missionId: null,
+		};
+		await encryptRecord('writingGenerations', queued);
+		await generationTable.add(queued);
+
+		emitDomainEvent(
+			'WritingDraftGenerationStarted',
+			'writing',
+			'writingGenerations',
+			generationId,
+			{ generationId, draftId, kind }
+		);
+
+		// 2. Flip to running before the fetch so the UI gets a progress tick.
+		await generationTable.update(generationId, {
+			status: 'running',
+			startedAt: now,
+			updatedAt: new Date().toISOString(),
+		});
+		await draftTable.update(draftId, {
+			status: 'refining',
+			updatedAt: new Date().toISOString(),
+		});
+
+		try {
+			const result = await callWritingGeneration({
+				systemPrompt: system,
+				userPrompt: user,
+				kind: draft.kind,
+				temperature,
+				maxTokens,
+				model: opts.model,
+			});
+
+			const versionId = crypto.randomUUID();
+			const versionNumber = await nextVersionNumber(draftId);
+			const newVersion: LocalDraftVersion = {
+				id: versionId,
+				draftId,
+				versionNumber,
+				content: result.output,
+				wordCount: wordCountOf(result.output),
+				generationId,
+				isAiGenerated: true,
+				parentVersionId: draft.currentVersionId ?? null,
+				summary: null,
+			};
+			await encryptRecord('writingDraftVersions', newVersion);
+			await draftVersionTable.add(newVersion);
+
+			const completedAt = new Date().toISOString();
+			const successPatch: Record<string, unknown> = {
+				status: 'succeeded',
+				output: result.output,
+				outputVersionId: versionId,
+				model: result.model,
+				tokenUsage: result.tokenUsage ?? null,
+				completedAt,
+				durationMs: result.durationMs,
+				updatedAt: completedAt,
+			};
+			await encryptRecord('writingGenerations', successPatch);
+			await generationTable.update(generationId, successPatch);
+
+			// Point the draft at the new version. Keep status='refining'
+			// because the user typically reviews + tweaks after a generate.
+			await draftTable.update(draftId, {
+				currentVersionId: versionId,
+				updatedAt: completedAt,
+			});
+
+			emitDomainEvent('WritingDraftVersionCreated', 'writing', 'writingDraftVersions', versionId, {
+				draftId,
+				versionId,
+				versionNumber,
+				isAiGenerated: true,
+				generationId,
+			});
+
+			return generationId;
+		} catch (err) {
+			const message = err instanceof Error ? err.message : String(err);
+			const completedAt = new Date().toISOString();
+			await generationTable.update(generationId, {
+				status: 'failed',
+				error: message,
+				completedAt,
+				durationMs: Date.now() - new Date(now).getTime(),
+				updatedAt: completedAt,
+			});
+			emitDomainEvent(
+				'WritingDraftGenerationFailed',
+				'writing',
+				'writingGenerations',
+				generationId,
+				{ generationId, draftId, error: message }
+			);
+			throw err;
+		}
+	},
+
+	/**
+	 * Mark a generation as cancelled client-side. We don't abort the
+	 * server call in M3 (the fetch runs to completion and the result is
+	 * just ignored); a proper AbortSignal pass-through can come with the
+	 * streaming path in M7.
+	 */
+	async cancelGeneration(generationId: string) {
+		const existing = await generationTable.get(generationId);
+		if (!existing) return;
+		if (existing.status === 'succeeded' || existing.status === 'failed') return;
+		await generationTable.update(generationId, {
+			status: 'cancelled',
+			completedAt: new Date().toISOString(),
+			updatedAt: new Date().toISOString(),
+		});
+	},
+};
--- a/apps/mana/apps/web/src/lib/modules/writing/utils/prompt-builder.ts
+++ b/apps/mana/apps/web/src/lib/modules/writing/utils/prompt-builder.ts
@ -0,0 +1,133 @@
+/**
+ * Prompt builder — turns a briefing (+ optional style) into a system/user
+ * prompt pair for mana-llm. Deliberately pure and client-side: everything
+ * that goes into the prompt is already decrypted in the store and the
+ * API endpoint is a thin passthrough, so building the prompt here keeps
+ * all the taste-and-tone decisions visible to the user.
+ *
+ * References (M5+) will extend `buildDraftPrompt` with resolved input
+ * snippets. Selection-refinement prompts (M6) live as their own builders
+ * alongside this one.
+ */
+
+import { KIND_LABELS } from '../constants';
+import type { DraftBriefing, DraftKind, StyleExtractedPrinciples } from '../types';
+import type { StylePreset } from '../presets/styles';
+
+export interface PromptPair {
+	system: string;
+	user: string;
+}
+
+const LANGUAGE_LABELS: Record<string, string> = {
+	de: 'Deutsch',
+	en: 'English',
+	fr: 'Français',
+	es: 'Español',
+	it: 'Italiano',
+};
+
+function languageLabel(code: string): string {
+	return LANGUAGE_LABELS[code] ?? code;
+}
+
+function kindLabel(kind: DraftKind): string {
+	const de = KIND_LABELS[kind].de;
+	// Drop pluralising 's' / use singular voice for the prompt.
+	if (kind === 'blog') return 'Blogpost';
+	if (kind === 'essay') return 'Essay';
+	if (kind === 'email') return 'E-Mail';
+	if (kind === 'social') return 'Social-Media-Post';
+	if (kind === 'story') return 'Kurzgeschichte';
+	if (kind === 'letter') return 'Brief';
+	if (kind === 'speech') return 'Rede';
+	if (kind === 'cover-letter') return 'Bewerbungsanschreiben';
+	if (kind === 'product-description') return 'Produktbeschreibung';
+	if (kind === 'press-release') return 'Pressemitteilung';
+	if (kind === 'bio') return 'Bio / Kurzvita';
+	return de;
+}
+
+function renderStyle(
+	preset: StylePreset | undefined,
+	principles: StyleExtractedPrinciples | undefined
+): string | null {
+	if (preset) {
+		const lines: string[] = [`Stil: ${preset.name.de}. ${preset.description.de}`];
+		if (preset.principles.rawAnalysis) {
+			lines.push(`Stil-Richtlinien: ${preset.principles.rawAnalysis}`);
+		}
+		if (preset.principles.toneTraits.length) {
+			lines.push(`Ton: ${preset.principles.toneTraits.join(', ')}.`);
+		}
+		return lines.join('\n');
+	}
+	if (principles) {
+		const lines: string[] = [];
+		if (principles.rawAnalysis) lines.push(`Stil: ${principles.rawAnalysis}`);
+		if (principles.toneTraits.length) lines.push(`Ton: ${principles.toneTraits.join(', ')}.`);
+		if (principles.vocabulary?.length)
+			lines.push(`Bevorzugtes Vokabular: ${principles.vocabulary.join(', ')}.`);
+		return lines.length ? lines.join('\n') : null;
+	}
+	return null;
+}
+
+export interface BuildDraftPromptInput {
+	kind: DraftKind;
+	title: string;
+	briefing: DraftBriefing;
+	stylePreset?: StylePreset;
+	styleExtracted?: StyleExtractedPrinciples;
+}
+
+/**
+ * Build a system + user prompt for a fresh draft. M3 shape — referenced
+ * inputs (articles / notes / library) are NOT injected yet; that's M5.
+ * The system prompt forbids preamble / sign-off / meta commentary so
+ * the returned text is ready to paste into a version.
+ */
+export function buildDraftPrompt(input: BuildDraftPromptInput): PromptPair {
+	const { kind, title, briefing, stylePreset, styleExtracted } = input;
+	const lang = languageLabel(briefing.language);
+	const kindLbl = kindLabel(kind);
+
+	const systemLines: string[] = [
+		`Du bist ein professioneller Ghostwriter. Deine Aufgabe: Schreibe einen fertigen ${kindLbl} auf ${lang} basierend auf dem Briefing des Nutzers.`,
+		`Gib ausschließlich den fertigen Text zurück. Keine Einleitung, keine Metakommentare, kein "Hier ist dein Text", keine Abschlussphrase nach dem Text. Markdown ist erlaubt, aber nicht erzwungen.`,
+	];
+	const styleBlock = renderStyle(stylePreset, styleExtracted);
+	if (styleBlock) systemLines.push(styleBlock);
+
+	const userLines: string[] = [];
+	userLines.push(`Titel: ${title}`);
+	userLines.push(`Thema: ${briefing.topic}`);
+	if (briefing.audience) userLines.push(`Zielgruppe: ${briefing.audience}`);
+	if (briefing.tone) userLines.push(`Ton: ${briefing.tone}`);
+	if (briefing.targetLength) {
+		const { type, value } = briefing.targetLength;
+		const unitLabel = type === 'words' ? 'Wörter' : type === 'chars' ? 'Zeichen' : 'Minuten';
+		userLines.push(`Ziel-Länge: ca. ${value} ${unitLabel}`);
+	}
+	if (briefing.extraInstructions) {
+		userLines.push(`Zusätzliche Hinweise: ${briefing.extraInstructions}`);
+	}
+	userLines.push('');
+	userLines.push(`Schreibe den ${kindLbl} jetzt.`);
+
+	return {
+		system: systemLines.join('\n\n'),
+		user: userLines.join('\n'),
+	};
+}
+
+/**
+ * Rough max-tokens heuristic — 2x target words + buffer, clamped to 8000.
+ * Words-to-tokens ratio of ~1.5 for German and English; 2x leaves room.
+ */
+export function estimateMaxTokens(briefing: DraftBriefing): number {
+	const target = briefing.targetLength?.value ?? 500;
+	const unit = briefing.targetLength?.type ?? 'words';
+	const words = unit === 'words' ? target : unit === 'chars' ? target / 5 : target * 150;
+	return Math.min(8000, Math.max(256, Math.round(words * 2 + 200)));
+}
--- a/apps/mana/apps/web/src/lib/modules/writing/views/DetailView.svelte
+++ b/apps/mana/apps/web/src/lib/modules/writing/views/DetailView.svelte
@ -12,8 +12,15 @@
 	import StatusBadge from '../components/StatusBadge.svelte';
 	import VersionEditor from '../components/VersionEditor.svelte';
 	import VersionHistory from '../components/VersionHistory.svelte';
+	import GenerationStatus from '../components/GenerationStatus.svelte';
 	import { draftsStore } from '../stores/drafts.svelte';
-	import { useDraft, useVersionsForDraft, useCurrentVersionForDraft } from '../queries';
+	import { generationsStore } from '../stores/generations.svelte';
+	import {
+		useDraft,
+		useVersionsForDraft,
+		useCurrentVersionForDraft,
+		useGenerationsForDraft,
+	} from '../queries';
 	import { KIND_LABELS, STATUS_LABELS } from '../constants';
 	import type { DraftStatus } from '../types';

@ -28,12 +35,30 @@
 	const versions$ = useVersionsForDraft(id);
 	/* svelte-ignore state_referenced_locally */
 	const currentVersion$ = useCurrentVersionForDraft(id);
+	/* svelte-ignore state_referenced_locally */
+	const generations$ = useGenerationsForDraft(id);
 	const draft = $derived(draft$.value);
 	const versions = $derived(versions$.value);
 	const currentVersion = $derived(currentVersion$.value);
+	const generations = $derived(generations$.value);
+
+	// Surface the freshest running generation, or the most recent failure
+	// so the user can dismiss it. On success we hide — the new version is
+	// already live in the editor via the currentVersionId pointer.
+	const latestGeneration = $derived(
+		generations.find((g) => g.status === 'queued' || g.status === 'running') ??
+			generations.find((g) => g.status === 'failed') ??
+			null
+	);
+	let dismissedGenerationIds = $state<Set<string>>(new Set());
+	const visibleGeneration = $derived(
+		latestGeneration && !dismissedGenerationIds.has(latestGeneration.id) ? latestGeneration : null
+	);

 	let briefingOpen = $state(false);
 	let saving = $state(false);
+	let generating = $state(false);
+	let generateError = $state<string | null>(null);

 	async function setStatus(next: DraftStatus) {
 		if (!draft) return;
@ -62,6 +87,25 @@
 		goto('/writing');
 	}

+	async function generate() {
+		if (!draft || generating) return;
+		generating = true;
+		generateError = null;
+		try {
+			await generationsStore.startDraftGeneration(draft.id);
+		} catch (err) {
+			generateError = err instanceof Error ? err.message : String(err);
+		} finally {
+			generating = false;
+		}
+	}
+
+	function dismissGeneration(id: string) {
+		dismissedGenerationIds = new Set([...dismissedGenerationIds, id]);
+	}
+
+	const hasDraftContent = $derived((currentVersion?.content ?? '').trim().length > 0);
+
 	const kind = $derived(draft ? KIND_LABELS[draft.kind] : null);
 	const targetWords = $derived(draft?.briefing.targetLength?.value ?? null);
 	const STATUS_ORDER: DraftStatus[] = ['draft', 'refining', 'complete', 'published'];
@ -130,22 +174,50 @@
 			<section class="editor-column">
 				{#if currentVersion}
 					<div class="editor-head">
-						<div>
+						<div class="version-label">
 							<strong>Version {currentVersion.versionNumber}</strong>
 							{#if currentVersion.isAiGenerated}
 								<span class="ai-tag">KI</span>
 							{/if}
 						</div>
-						<button
-							type="button"
-							class="checkpoint"
-							onclick={saveCheckpoint}
-							disabled={saving}
-							title="Aktuellen Text als neue Version einfrieren"
-						>
-							{saving ? 'Speichert…' : '＋ Als Checkpoint speichern'}
-						</button>
+						<div class="editor-actions">
+							<button
+								type="button"
+								class="generate"
+								onclick={generate}
+								disabled={generating}
+								title={hasDraftContent
+									? 'Kompletten Text neu generieren (überschreibt nicht — neue Version)'
+									: 'Ersten Entwurf aus dem Briefing generieren'}
+							>
+								{#if generating}
+									Schreibt…
+								{:else if hasDraftContent}
+									⟳ Neu generieren
+								{:else}
+									✨ Generate
+								{/if}
+							</button>
+							<button
+								type="button"
+								class="checkpoint"
+								onclick={saveCheckpoint}
+								disabled={saving}
+								title="Aktuellen Text als neue Version einfrieren"
+							>
+								{saving ? 'Speichert…' : '＋ Checkpoint'}
+							</button>
+						</div>
 					</div>
+					{#if visibleGeneration}
+						<GenerationStatus
+							generation={visibleGeneration}
+							ondismiss={() => dismissGeneration(visibleGeneration.id)}
+						/>
+					{/if}
+					{#if generateError}
+						<p class="error">{generateError}</p>
+					{/if}
 					<VersionEditor version={currentVersion} {targetWords} />
 				{:else}
 					<p class="muted">Diese Version existiert nicht mehr.</p>
@ -321,6 +393,34 @@
 		text-transform: uppercase;
 		letter-spacing: 0.04em;
 	}
+	.version-label {
+		display: flex;
+		align-items: center;
+		gap: 0.3rem;
+	}
+	.editor-actions {
+		display: inline-flex;
+		gap: 0.4rem;
+	}
+	.generate {
+		padding: 0.4rem 0.9rem;
+		border-radius: 0.5rem;
+		border: 1px solid #0ea5e9;
+		background: #0ea5e9;
+		color: white;
+		cursor: pointer;
+		font: inherit;
+		font-weight: 500;
+		font-size: 0.85rem;
+	}
+	.generate:hover:not(:disabled) {
+		background: #0284c7;
+		border-color: #0284c7;
+	}
+	.generate:disabled {
+		opacity: 0.6;
+		cursor: not-allowed;
+	}
 	.checkpoint {
 		padding: 0.4rem 0.8rem;
 		border-radius: 0.5rem;
@ -338,6 +438,15 @@
 		opacity: 0.5;
 		cursor: not-allowed;
 	}
+	.error {
+		margin: 0;
+		padding: 0.5rem 0.75rem;
+		border-radius: 0.5rem;
+		color: #ef4444;
+		background: color-mix(in srgb, #ef4444 6%, transparent);
+		border: 1px solid color-mix(in srgb, #ef4444 40%, transparent);
+		font-size: 0.85rem;
+	}
 	.history-column h2 {
 		font-size: 0.8rem;
 		margin: 0 0 0.5rem;