fix(events): production wiring + polling resilience (quick wins)

Five small follow-ups on Phase 1b:

- docker-compose.macmini.yml: add the mana-events container with the
  same shape as mana-credits, expose port 3065, add a Traefik route
  for events.mana.how, and inject PUBLIC_MANA_EVENTS_URL into the
  mana-web container so the SvelteKit SSR + browser both reach it.
- mana-events: background sweeper that deletes rsvp_rate_buckets
  rows older than 2h every hour. Without it, long-published events
  accumulate one row per traffic-hour forever (FK cascade only fires
  on snapshot delete).
- PublicRsvpList: track consecutiveFailures and only show the error
  banner after two failures in a row, so a single mid-poll network
  hiccup doesn't flash a 30s error the user can't act on.
- apps/mana/apps/web: declare postgres as a devDep (already imported
  by the e2e spec via pnpm hoisting, now explicit).
This commit is contained in:
Till JS 2026-04-07 18:53:29 +02:00
parent 354cbcb176
commit 640242500e
5 changed files with 116 additions and 5 deletions

View file

@ -31,6 +31,7 @@
"autoprefixer": "^10.4.20",
"fake-indexeddb": "^6.2.5",
"postcss": "^8.4.49",
"postgres": "^3.4.9",
"prettier": "^3.4.2",
"prettier-plugin-svelte": "^3.3.2",
"prettier-plugin-tailwindcss": "^0.6.10",

View file

@ -11,19 +11,26 @@
let rsvps = $state<PublicRsvpRecord[]>([]);
let loading = $state(false);
let lastError = $state<string | null>(null);
let lastErrorMessage = $state<string | null>(null);
let consecutiveFailures = $state(0);
let lastFetchedAt = $state<Date | null>(null);
// Surface the error only after two failures in a row so a single network
// hiccup mid-poll doesn't flash a scary banner the user can't act on.
const showError = $derived(consecutiveFailures >= 2 && lastErrorMessage !== null);
async function fetchRsvps() {
if (!isPublished) return;
loading = true;
try {
const res = await eventsApi.getRsvps(eventId);
rsvps = res.rsvps;
lastError = null;
lastErrorMessage = null;
consecutiveFailures = 0;
lastFetchedAt = new Date();
} catch (e) {
lastError = e instanceof Error ? e.message : 'Fehler beim Laden';
lastErrorMessage = e instanceof Error ? e.message : 'Fehler beim Laden';
consecutiveFailures++;
} finally {
loading = false;
}
@ -62,8 +69,8 @@
</button>
</div>
{#if lastError}
<p class="error">{lastError}</p>
{#if showError}
<p class="error">{lastErrorMessage}</p>
{:else if rsvps.length === 0 && !loading}
<p class="empty">Noch keine Antworten via Share-Link.</p>
{:else}

View file

@ -323,6 +323,39 @@ services:
- "traefik.http.routers.mana-credits.tls=true"
- "traefik.http.services.mana-credits.loadbalancer.server.port=3002"
mana-events:
build:
context: services/mana-events
dockerfile: Dockerfile
image: mana-events:local
container_name: mana-events
restart: always
mem_limit: 128m
depends_on:
postgres: { condition: service_healthy }
environment:
TZ: Europe/Berlin
PORT: 3065
DATABASE_URL: postgresql://postgres:${POSTGRES_PASSWORD:-mana123}@postgres:5432/mana_platform
MANA_AUTH_URL: http://mana-auth:3001
# The public RSVP endpoints accept requests from anywhere — they're
# designed to be hit by guests who only have the share link.
# The host endpoints sit behind JWT auth so CORS is still scoped.
CORS_ORIGINS: https://mana.how
ports:
- "3065:3065"
healthcheck:
test: ["CMD", "bun", "-e", "fetch('http://127.0.0.1:3065/health').then(r=>process.exit(r.ok?0:1)).catch(()=>process.exit(1))"]
interval: 120s
timeout: 10s
retries: 3
start_period: 15s
labels:
- "traefik.enable=true"
- "traefik.http.routers.mana-events.rule=Host(`events.mana.how`)"
- "traefik.http.routers.mana-events.tls=true"
- "traefik.http.services.mana-events.loadbalancer.server.port=3065"
mana-user:
build:
context: services/mana-user
@ -909,6 +942,8 @@ services:
PUBLIC_MANA_MEDIA_URL_CLIENT: https://media.mana.how
PUBLIC_MANA_LLM_URL: http://mana-llm:3025
PUBLIC_MANA_LLM_URL_CLIENT: https://llm.mana.how
PUBLIC_MANA_EVENTS_URL: http://mana-events:3065
PUBLIC_MANA_EVENTS_URL_CLIENT: https://events.mana.how
# Analytics & Error Tracking
PUBLIC_UMAMI_WEBSITE_ID: 32777167-e026-4618-933a-3429120b479b
PUBLIC_GLITCHTIP_DSN: ${GLITCHTIP_DSN_MANA_WEB:-}

View file

@ -16,10 +16,15 @@ import { jwtAuth } from './middleware/jwt-auth';
import { healthRoutes } from './routes/health';
import { createEventsRoutes } from './routes/events';
import { createRsvpRoutes } from './routes/rsvp';
import { startRateBucketSweeper } from './lib/cleanup';
const config = loadConfig();
const db = getDb(config.databaseUrl);
// Background cleanup of stale rate-limit buckets so they don't accumulate
// for the lifetime of long-published events.
startRateBucketSweeper(db);
const app = new Hono();
app.onError(errorHandler);

View file

@ -0,0 +1,63 @@
/**
* Periodic cleanup of stale rate-limit buckets.
*
* Each (token, hour-bucket) row is only useful for the hour it represents
* once that hour is over, the row is just dead weight in Postgres.
* The FK cascade only fires when an event snapshot is deleted; long-lived
* snapshots therefore accumulate one bucket row per traffic-hour forever.
*
* This sweeper deletes any bucket whose hour is more than KEEP_HOURS old.
* Conservative window so we don't delete a row another request could still
* read for the same hour boundary in flight.
*/
import { lt, sql } from 'drizzle-orm';
import type { Database } from '../db/connection';
import { rsvpRateBuckets } from '../db/schema/events';
const KEEP_HOURS = 2;
function cutoffBucket(): string {
const d = new Date(Date.now() - KEEP_HOURS * 60 * 60 * 1000);
const pad = (n: number) => n.toString().padStart(2, '0');
return `${d.getUTCFullYear()}-${pad(d.getUTCMonth() + 1)}-${pad(d.getUTCDate())}T${pad(d.getUTCHours())}`;
}
export async function sweepRateBuckets(db: Database): Promise<number> {
const cutoff = cutoffBucket();
const result = await db
.delete(rsvpRateBuckets)
.where(lt(rsvpRateBuckets.hourBucket, cutoff))
.returning({ token: rsvpRateBuckets.token });
return result.length;
}
/**
* Start a periodic sweep. Returns a stop function for tests.
* Runs once on boot, then on the configured interval.
*/
export function startRateBucketSweeper(
db: Database,
intervalMs = 60 * 60 * 1000 // 1h
): () => void {
const tick = async () => {
try {
const removed = await sweepRateBuckets(db);
if (removed > 0) {
console.log(`[mana-events] swept ${removed} stale rate buckets`);
}
} catch (err) {
console.error('[mana-events] rate bucket sweep failed:', err);
}
};
// Fire once shortly after boot so we don't wait a full hour for the
// first cleanup, but defer slightly so startup logs aren't interleaved.
const bootTimer = setTimeout(tick, 5_000);
const intervalTimer = setInterval(tick, intervalMs);
return () => {
clearTimeout(bootTimer);
clearInterval(intervalTimer);
};
}