✅ test: implement comprehensive automated testing system with daily CI/CD

Implement rock-solid automated testing infrastructure for mana-core-auth with daily execution, notifications, and comprehensive monitoring. Test Suite Improvements: - Fix all 36 failing BetterAuthService tests (missing service mocks) - Add 21 JwtAuthGuard tests achieving 100% statement coverage - Create silentError helper to suppress intentional error logs - Fix Todo backend TaskService test structure - Add jose mock for JWT testing - Configure jest collectCoverageFrom for mana-core-auth GitHub Actions Workflow: - Daily automated test execution (2 AM UTC + manual trigger) - Matrix parallelization across 6 backend services - PostgreSQL and Redis service containers - Coverage enforcement (80% threshold) - Multi-channel notifications (Discord, Slack, GitHub Issues) - Support for success notifications (opt-in) Test Infrastructure: - Coverage aggregation across multiple services - Flaky test detection with 30-run history tracking - Performance metrics tracking with regression detection - Test data seeding and cleanup scripts - Comprehensive test reporting with formatted metrics Documentation: - TESTING_GUIDE.md (4000+ words) - Complete testing documentation - AUTOMATED_TESTING_SYSTEM.md - System architecture and workflows - DISCORD_NOTIFICATIONS_SETUP.md - Discord webhook setup guide - TESTING_DEPLOYMENT_CHECKLIST.md - Pre-deployment verification - TESTING_QUICK_REFERENCE.md - Quick command reference Final Result: - 180/180 tests passing (100% pass rate) - Zero console errors in test output - Automated daily testing with rich notifications - Production-ready test infrastructure
2026-05-14 19:41:09 +02:00 · 2025-12-25 19:12:27 +01:00 · 2025-12-25 19:12:27 +01:00 · 304897261d
commit 304897261d
parent 9dbd6e6c09
24 changed files with 5017 additions and 16 deletions
--- a/scripts/run-tests-with-coverage.sh
+++ b/scripts/run-tests-with-coverage.sh
@ -0,0 +1,153 @@
+#!/bin/bash
+# Run Tests with Coverage
+#
+# Executes tests for specific packages or all packages with coverage reporting.
+# Automatically sets up test databases and cleans up after execution.
+#
+# Usage:
+#   ./scripts/run-tests-with-coverage.sh [package-filter]
+#
+# Examples:
+#   ./scripts/run-tests-with-coverage.sh                    # Run all tests
+#   ./scripts/run-tests-with-coverage.sh mana-core-auth     # Run auth tests only
+#   ./scripts/run-tests-with-coverage.sh chat-backend       # Run chat backend tests only
+
+set -e
+
+# Colors for output
+RED='\033[0;31m'
+GREEN='\033[0;32m'
+YELLOW='\033[1;33m'
+NC='\033[0m' # No Color
+
+# Configuration
+COVERAGE_THRESHOLD=${COVERAGE_THRESHOLD:-80}
+PACKAGE_FILTER=${1:-""}
+
+echo -e "${GREEN}Running tests with coverage${NC}"
+echo "Coverage threshold: ${COVERAGE_THRESHOLD}%"
+
+# Check if Docker is running (for database tests)
+if ! docker ps > /dev/null 2>&1; then
+  echo -e "${YELLOW}Warning: Docker is not running. Database tests may fail.${NC}"
+  echo "Start Docker and run: pnpm docker:up"
+fi
+
+# Function to run tests for a package
+run_package_tests() {
+  local package_name=$1
+  local package_path=$2
+
+  echo -e "\n${GREEN}Testing ${package_name}...${NC}"
+
+  cd "$package_path"
+
+  # Check if package has tests
+  if ! grep -q "\"test\"" package.json 2>/dev/null; then
+    echo -e "${YELLOW}No test script found in ${package_name}, skipping${NC}"
+    cd - > /dev/null
+    return 0
+  fi
+
+  # Setup test database if needed
+  if grep -q "DATABASE_URL" .env* 2>/dev/null || grep -q "db:push" package.json 2>/dev/null; then
+    echo "Setting up test database..."
+
+    # Extract database name from package
+    DB_NAME=$(echo "$package_name" | sed 's/-backend$//' | sed 's/mana-core-//')
+
+    export DATABASE_URL="postgresql://manacore:devpassword@localhost:5432/${DB_NAME}"
+    export NODE_ENV="test"
+
+    # Run migrations if available
+    if grep -q "db:push" package.json; then
+      pnpm run db:push 2>/dev/null || echo "No migrations to run"
+    fi
+  fi
+
+  # Run tests with coverage
+  if grep -q "test:cov" package.json; then
+    pnpm run test:cov
+  elif grep -q "\"test\"" package.json; then
+    pnpm run test -- --coverage
+  fi
+
+  # Check coverage threshold
+  if [ -f "coverage/coverage-summary.json" ]; then
+    COVERAGE=$(node -e "const c = require('./coverage/coverage-summary.json'); console.log(c.total.lines.pct)")
+    echo -e "Coverage: ${COVERAGE}%"
+
+    if (( $(echo "$COVERAGE < $COVERAGE_THRESHOLD" | bc -l) )); then
+      echo -e "${RED}✗ Coverage ${COVERAGE}% is below threshold ${COVERAGE_THRESHOLD}%${NC}"
+      cd - > /dev/null
+      return 1
+    else
+      echo -e "${GREEN}✓ Coverage ${COVERAGE}% meets threshold${NC}"
+    fi
+  fi
+
+  cd - > /dev/null
+  return 0
+}
+
+# Collect packages to test
+PACKAGES=()
+
+if [ -n "$PACKAGE_FILTER" ]; then
+  # Test specific package
+  if [ -d "services/$PACKAGE_FILTER" ]; then
+    PACKAGES+=("services/$PACKAGE_FILTER")
+  elif [ -d "apps/$PACKAGE_FILTER/apps/backend" ]; then
+    PACKAGES+=("apps/$PACKAGE_FILTER/apps/backend")
+  else
+    echo -e "${RED}Package not found: $PACKAGE_FILTER${NC}"
+    exit 1
+  fi
+else
+  # Test all backend packages
+  for service in services/*; do
+    if [ -d "$service" ] && [ -f "$service/package.json" ]; then
+      PACKAGES+=("$service")
+    fi
+  done
+
+  for app_backend in apps/*/apps/backend; do
+    if [ -d "$app_backend" ] && [ -f "$app_backend/package.json" ]; then
+      PACKAGES+=("$app_backend")
+    fi
+  done
+fi
+
+echo -e "\n${GREEN}Found ${#PACKAGES[@]} package(s) to test${NC}\n"
+
+# Run tests for each package
+FAILED_PACKAGES=()
+PASSED_PACKAGES=()
+
+for pkg in "${PACKAGES[@]}"; do
+  pkg_name=$(basename "$pkg")
+
+  if run_package_tests "$pkg_name" "$pkg"; then
+    PASSED_PACKAGES+=("$pkg_name")
+  else
+    FAILED_PACKAGES+=("$pkg_name")
+  fi
+done
+
+# Summary
+echo -e "\n${GREEN}========================================${NC}"
+echo -e "${GREEN}Test Summary${NC}"
+echo -e "${GREEN}========================================${NC}"
+echo -e "Passed: ${GREEN}${#PASSED_PACKAGES[@]}${NC}"
+echo -e "Failed: ${RED}${#FAILED_PACKAGES[@]}${NC}"
+
+if [ ${#FAILED_PACKAGES[@]} -gt 0 ]; then
+  echo -e "\n${RED}Failed packages:${NC}"
+  for pkg in "${FAILED_PACKAGES[@]}"; do
+    echo -e "  - ${RED}${pkg}${NC}"
+  done
+  exit 1
+fi
+
+echo -e "\n${GREEN}✓ All tests passed!${NC}"
+exit 0
--- a/scripts/test-data/cleanup-test-data.sh
+++ b/scripts/test-data/cleanup-test-data.sh
@ -0,0 +1,79 @@
+#!/bin/bash
+# Cleanup Test Data
+#
+# Removes test data from databases after test execution.
+# Can be used to reset databases to a clean state.
+#
+# Usage:
+#   ./scripts/test-data/cleanup-test-data.sh [service]
+#
+# Examples:
+#   ./scripts/test-data/cleanup-test-data.sh              # Clean all services
+#   ./scripts/test-data/cleanup-test-data.sh auth         # Clean auth only
+
+set -e
+
+RED='\033[0;31m'
+GREEN='\033[0;32m'
+YELLOW='\033[1;33m'
+NC='\033[0m'
+
+SERVICE_FILTER=${1:-"all"}
+
+echo -e "${YELLOW}Cleaning up test data...${NC}"
+
+# Configuration
+export NODE_ENV="test"
+export DATABASE_URL_TEMPLATE="postgresql://manacore:devpassword@localhost:5432"
+
+# Cleanup function
+cleanup_database() {
+  local db_name=$1
+
+  echo -e "\n${YELLOW}Cleaning database: ${db_name}${NC}"
+
+  export DATABASE_URL="${DATABASE_URL_TEMPLATE}/${db_name}"
+
+  # Drop and recreate database
+  psql -U manacore -h localhost -c "DROP DATABASE IF EXISTS ${db_name};" postgres 2>/dev/null || true
+  psql -U manacore -h localhost -c "CREATE DATABASE ${db_name};" postgres 2>/dev/null || true
+
+  echo -e "${GREEN}✓ Cleaned ${db_name}${NC}"
+}
+
+# Execute cleanup based on filter
+case "$SERVICE_FILTER" in
+  "all")
+    cleanup_database "manacore"
+    cleanup_database "chat"
+    cleanup_database "todo"
+    cleanup_database "calendar"
+    cleanup_database "contacts"
+    cleanup_database "picture"
+    ;;
+  "auth")
+    cleanup_database "manacore"
+    ;;
+  "chat")
+    cleanup_database "chat"
+    ;;
+  "todo")
+    cleanup_database "todo"
+    ;;
+  "calendar")
+    cleanup_database "calendar"
+    ;;
+  "contacts")
+    cleanup_database "contacts"
+    ;;
+  "picture")
+    cleanup_database "picture"
+    ;;
+  *)
+    echo -e "${RED}Unknown service: $SERVICE_FILTER${NC}"
+    echo "Available services: all, auth, chat, todo, calendar, contacts, picture"
+    exit 1
+    ;;
+esac
+
+echo -e "\n${GREEN}✓ Test data cleaned up successfully!${NC}"
--- a/scripts/test-data/seed-test-data.sh
+++ b/scripts/test-data/seed-test-data.sh
@ -0,0 +1,237 @@
+#!/bin/bash
+# Seed Test Data
+#
+# Seeds databases with consistent test data for integration and E2E tests.
+# Uses predetermined UUIDs and data to ensure reproducible tests.
+#
+# Usage:
+#   ./scripts/test-data/seed-test-data.sh [service]
+#
+# Examples:
+#   ./scripts/test-data/seed-test-data.sh              # Seed all services
+#   ./scripts/test-data/seed-test-data.sh auth         # Seed auth only
+#   ./scripts/test-data/seed-test-data.sh chat         # Seed chat only
+
+set -e
+
+RED='\033[0;31m'
+GREEN='\033[0;32m'
+YELLOW='\033[1;33m'
+NC='\033[0m'
+
+SERVICE_FILTER=${1:-"all"}
+
+echo -e "${GREEN}Seeding test data...${NC}"
+
+# Configuration
+export NODE_ENV="test"
+export DATABASE_URL_TEMPLATE="postgresql://manacore:devpassword@localhost:5432"
+
+# Seed auth service
+seed_auth() {
+  echo -e "\n${GREEN}Seeding mana-core-auth...${NC}"
+
+  export DATABASE_URL="${DATABASE_URL_TEMPLATE}/manacore"
+
+  cd services/mana-core-auth
+
+  # Run migrations
+  pnpm run db:push
+
+  # Create test users using Node.js script
+  node -e "
+    const { db } = require('./src/db/connection');
+    const { users, accounts, creditBalances } = require('./src/db/schema/auth.schema');
+    const bcrypt = require('bcrypt');
+
+    async function seedTestUsers() {
+      console.log('Creating test users...');
+
+      // Deterministic test user IDs
+      const testUsers = [
+        {
+          id: '00000000-0000-0000-0000-000000000001',
+          email: 'test-user-1@example.com',
+          name: 'Test User 1',
+          password: 'TestPassword123!',
+        },
+        {
+          id: '00000000-0000-0000-0000-000000000002',
+          email: 'test-user-2@example.com',
+          name: 'Test User 2',
+          password: 'TestPassword123!',
+        },
+        {
+          id: '00000000-0000-0000-0000-000000000003',
+          email: 'admin@example.com',
+          name: 'Admin User',
+          password: 'AdminPassword123!',
+          role: 'admin',
+        },
+      ];
+
+      for (const user of testUsers) {
+        try {
+          // Check if user exists
+          const existing = await db.select().from(users).where(eq(users.email, user.email)).limit(1);
+
+          if (existing.length > 0) {
+            console.log(\`User \${user.email} already exists, skipping\`);
+            continue;
+          }
+
+          // Hash password
+          const hashedPassword = await bcrypt.hash(user.password, 10);
+
+          // Insert user
+          await db.insert(users).values({
+            id: user.id,
+            email: user.email,
+            name: user.name,
+            emailVerified: true,
+            role: user.role || 'user',
+          });
+
+          // Insert credential account
+          await db.insert(accounts).values({
+            id: \`\${user.id}-credential\`,
+            userId: user.id,
+            accountId: user.id,
+            providerId: 'credential',
+            password: hashedPassword,
+          });
+
+          // Initialize credit balance
+          await db.insert(creditBalances).values({
+            userId: user.id,
+            balance: 0,
+            freeCreditsRemaining: 150,
+            dailyFreeCredits: 5,
+          });
+
+          console.log(\`Created test user: \${user.email}\`);
+        } catch (error) {
+          console.error(\`Error creating user \${user.email}:\`, error);
+        }
+      }
+
+      console.log('Test users seeded successfully');
+      process.exit(0);
+    }
+
+    seedTestUsers().catch(console.error);
+  "
+
+  cd ../..
+}
+
+# Seed chat service
+seed_chat() {
+  echo -e "\n${GREEN}Seeding chat...${NC}"
+
+  export DATABASE_URL="${DATABASE_URL_TEMPLATE}/chat"
+
+  cd apps/chat/apps/backend
+
+  # Run migrations
+  if grep -q "db:push" package.json; then
+    pnpm run db:push
+  fi
+
+  # Seed AI models
+  if grep -q "db:seed" package.json; then
+    pnpm run db:seed
+  fi
+
+  cd ../../../..
+}
+
+# Seed todo service
+seed_todo() {
+  echo -e "\n${GREEN}Seeding todo...${NC}"
+
+  export DATABASE_URL="${DATABASE_URL_TEMPLATE}/todo"
+
+  cd apps/todo/apps/backend
+
+  if grep -q "db:push" package.json; then
+    pnpm run db:push
+  fi
+
+  if grep -q "db:seed" package.json; then
+    pnpm run db:seed
+  fi
+
+  cd ../../../..
+}
+
+# Seed calendar service
+seed_calendar() {
+  echo -e "\n${GREEN}Seeding calendar...${NC}"
+
+  export DATABASE_URL="${DATABASE_URL_TEMPLATE}/calendar"
+
+  cd apps/calendar/apps/backend
+
+  if grep -q "db:push" package.json; then
+    pnpm run db:push
+  fi
+
+  if grep -q "db:seed" package.json; then
+    pnpm run db:seed
+  fi
+
+  cd ../../../..
+}
+
+# Seed contacts service
+seed_contacts() {
+  echo -e "\n${GREEN}Seeding contacts...${NC}"
+
+  export DATABASE_URL="${DATABASE_URL_TEMPLATE}/contacts"
+
+  cd apps/contacts/apps/backend
+
+  if grep -q "db:push" package.json; then
+    pnpm run db:push
+  fi
+
+  if grep -q "db:seed" package.json; then
+    pnpm run db:seed
+  fi
+
+  cd ../../../..
+}
+
+# Execute seeding based on filter
+case "$SERVICE_FILTER" in
+  "all")
+    seed_auth
+    seed_chat
+    seed_todo
+    seed_calendar
+    seed_contacts
+    ;;
+  "auth")
+    seed_auth
+    ;;
+  "chat")
+    seed_chat
+    ;;
+  "todo")
+    seed_todo
+    ;;
+  "calendar")
+    seed_calendar
+    ;;
+  "contacts")
+    seed_contacts
+    ;;
+  *)
+    echo -e "${RED}Unknown service: $SERVICE_FILTER${NC}"
+    echo "Available services: all, auth, chat, todo, calendar, contacts"
+    exit 1
+    ;;
+esac
+
+echo -e "\n${GREEN}✓ Test data seeded successfully!${NC}"
--- a/scripts/test-reporting/README.md
+++ b/scripts/test-reporting/README.md
@ -0,0 +1,258 @@
+# Test Reporting Scripts
+
+Collection of Node.js scripts for aggregating, analyzing, and reporting on test results in the ManaCore monorepo.
+
+## Scripts Overview
+
+| Script | Purpose | Used By |
+|--------|---------|---------|
+| `aggregate-coverage.js` | Merge coverage reports from multiple test suites | Daily Tests workflow |
+| `generate-summary.js` | Create GitHub Actions summary from test results | Daily Tests workflow |
+| `detect-flaky-tests.js` | Identify tests that fail intermittently | Daily Tests workflow |
+| `track-metrics.js` | Record and track test performance over time | Daily Tests workflow |
+| `format-metrics.js` | Format metrics for GitHub summary display | Daily Tests workflow |
+
+## Usage
+
+### Aggregate Coverage
+
+Merges multiple `coverage-summary.json` files into a single aggregated report.
+
+```bash
+node aggregate-coverage.js <input-dir> <output-dir>
+
+# Example
+node aggregate-coverage.js ./coverage-reports ./aggregated-coverage
+```
+
+**Inputs**:
+- `input-dir`: Directory containing coverage artifacts (searches recursively)
+
+**Outputs**:
+- `total-coverage.json`: Aggregated coverage data
+- `summary.md`: Markdown summary of coverage
+
+**Exit Codes**:
+- `0`: Success and coverage meets 80% threshold
+- `1`: Coverage below 80% threshold or error
+
+### Generate Summary
+
+Creates a formatted test summary for GitHub Actions.
+
+```bash
+node generate-summary.js <test-results-dir>
+
+# Example
+node generate-summary.js ./coverage-reports
+```
+
+**Inputs**:
+- `test-results-dir`: Directory with test coverage reports
+
+**Outputs**:
+- Markdown summary to stdout (captured by GitHub Actions)
+
+### Detect Flaky Tests
+
+Analyzes test results over time to identify flaky tests.
+
+```bash
+node detect-flaky-tests.js <test-results-dir>
+
+# Example
+node detect-flaky-tests.js ./test-results
+```
+
+**Inputs**:
+- `test-results-dir`: Directory with test result files
+- `test-history.json`: Historical test data (auto-created)
+
+**Outputs**:
+- `flaky-tests.json`: List of flaky tests (if any found)
+- `test-history.json`: Updated historical data
+
+**Configuration**:
+- `FLAKY_THRESHOLD`: 0.1 (test fails 10%+ = flaky)
+- `MIN_RUNS`: 3 (minimum runs to detect flakiness)
+
+### Track Metrics
+
+Records test execution time and performance metrics.
+
+```bash
+node track-metrics.js <test-results-dir>
+
+# Example
+node track-metrics.js ./test-results
+```
+
+**Inputs**:
+- `test-results-dir`: Directory with test result files
+
+**Outputs**:
+- `metrics.json`: Current test metrics
+- `metrics-report.md`: Formatted metrics report
+- `metrics-history.json`: Historical metrics (90 days)
+
+**Exit Codes**:
+- `0`: Success, no performance regressions
+- `1`: Performance regression detected
+
+### Format Metrics
+
+Formats metrics.json for display in GitHub Actions summary.
+
+```bash
+node format-metrics.js <metrics-file>
+
+# Example
+node format-metrics.js ./test-results/metrics.json
+```
+
+**Inputs**:
+- `metrics-file`: Path to metrics.json
+
+**Outputs**:
+- Formatted markdown to stdout
+
+## Data Formats
+
+### Coverage Summary Format
+
+```json
+{
+  "total": {
+    "lines": { "total": 1000, "covered": 850, "pct": 85 },
+    "statements": { "total": 1200, "covered": 980, "pct": 81.67 },
+    "functions": { "total": 150, "covered": 135, "pct": 90 },
+    "branches": { "total": 400, "covered": 340, "pct": 85 }
+  },
+  "suites": {
+    "mana-core-auth": { /* same structure */ },
+    "chat-backend": { /* same structure */ }
+  }
+}
+```
+
+### Test History Format
+
+```json
+{
+  "suite::testName": {
+    "name": "should validate JWT tokens",
+    "suite": "AuthService",
+    "runs": [
+      { "timestamp": "2025-12-25T00:00:00Z", "status": "passed", "duration": 150 },
+      { "timestamp": "2025-12-24T00:00:00Z", "status": "failed", "duration": 200 }
+    ]
+  }
+}
+```
+
+### Metrics Format
+
+```json
+{
+  "timestamp": "2025-12-25T02:00:00Z",
+  "totalTests": 500,
+  "totalDuration": 45000,
+  "averageDuration": 90,
+  "slowestTest": {
+    "name": "should complete full auth flow",
+    "duration": 2500,
+    "suite": "integration/auth-flow.spec.ts"
+  },
+  "suiteMetrics": {
+    "mana-core-auth": {
+      "tests": 120,
+      "duration": 15000,
+      "slowestTest": { /* ... */ }
+    }
+  }
+}
+```
+
+## Development
+
+### Adding New Metrics
+
+To track additional metrics:
+
+1. Modify `track-metrics.js` to collect new data
+2. Update `format-metrics.js` to display new metrics
+3. Update this README with new data format
+
+### Testing Scripts Locally
+
+```bash
+# Create mock test results
+mkdir -p test-data/coverage-mana-core-auth
+echo '{"total":{"lines":{"total":100,"covered":85,"pct":85}}}' > test-data/coverage-mana-core-auth/coverage-summary.json
+
+# Run aggregation
+node aggregate-coverage.js test-data aggregated-output
+
+# View output
+cat aggregated-output/summary.md
+```
+
+## Integration with CI/CD
+
+These scripts are used in `.github/workflows/daily-tests.yml`:
+
+```yaml
+- name: Aggregate coverage reports
+  run: |
+    node scripts/test-reporting/aggregate-coverage.js coverage-reports aggregated-coverage
+
+- name: Generate test summary
+  run: |
+    node scripts/test-reporting/generate-summary.js coverage-reports > $GITHUB_STEP_SUMMARY
+
+- name: Detect flaky tests
+  run: |
+    node scripts/test-reporting/detect-flaky-tests.js test-results
+
+- name: Track metrics
+  run: |
+    node scripts/test-reporting/track-metrics.js test-results
+```
+
+## Troubleshooting
+
+### No coverage files found
+
+**Problem**: `Found 0 coverage files`
+
+**Solutions**:
+- Ensure tests ran with coverage: `pnpm test:cov`
+- Check coverage output directory exists
+- Verify `coverage-summary.json` is generated
+
+### Flaky test detection not working
+
+**Problem**: Known flaky tests not detected
+
+**Solutions**:
+- Need minimum 3 test runs for detection
+- Check `test-history.json` has data
+- Verify test names are consistent across runs
+
+### Performance regression false positive
+
+**Problem**: Script reports regression when none exists
+
+**Solutions**:
+- Check if test suite changed (more/fewer tests)
+- Review `metrics-history.json` for anomalies
+- Adjust regression threshold if needed
+
+## Dependencies
+
+All scripts use Node.js built-in modules only:
+- `fs`: File system operations
+- `path`: Path manipulation
+- No external npm packages required
+
+This keeps the scripts lightweight and reduces dependency risks.
--- a/scripts/test-reporting/aggregate-coverage.js
+++ b/scripts/test-reporting/aggregate-coverage.js
@ -0,0 +1,158 @@
+#!/usr/bin/env node
+/* eslint-disable @typescript-eslint/no-require-imports, no-console */
+/**
+ * Aggregate Coverage Reports
+ *
+ * Merges multiple coverage reports from different test suites into a single
+ * aggregated report for overall project coverage analysis.
+ *
+ * Usage:
+ *   node aggregate-coverage.js <input-dir> <output-dir>
+ */
+
+const fs = require('fs');
+const path = require('path');
+
+function findCoverageFiles(dir) {
+	const coverageFiles = [];
+
+	function walk(currentDir) {
+		const entries = fs.readdirSync(currentDir, { withFileTypes: true });
+
+		for (const entry of entries) {
+			const fullPath = path.join(currentDir, entry.name);
+
+			if (entry.isDirectory()) {
+				walk(fullPath);
+			} else if (entry.name === 'coverage-summary.json') {
+				coverageFiles.push(fullPath);
+			}
+		}
+	}
+
+	walk(dir);
+	return coverageFiles;
+}
+
+function mergeCoverage(coverageFiles) {
+	const merged = {
+		total: {
+			lines: { total: 0, covered: 0, skipped: 0, pct: 0 },
+			statements: { total: 0, covered: 0, skipped: 0, pct: 0 },
+			functions: { total: 0, covered: 0, skipped: 0, pct: 0 },
+			branches: { total: 0, covered: 0, skipped: 0, pct: 0 },
+		},
+		suites: {},
+	};
+
+	for (const file of coverageFiles) {
+		const content = JSON.parse(fs.readFileSync(file, 'utf8'));
+		const suiteName = path.basename(path.dirname(path.dirname(file)));
+
+		// Store individual suite data
+		merged.suites[suiteName] = content.total;
+
+		// Aggregate totals
+		if (content.total) {
+			['lines', 'statements', 'functions', 'branches'].forEach((metric) => {
+				merged.total[metric].total += content.total[metric].total || 0;
+				merged.total[metric].covered += content.total[metric].covered || 0;
+				merged.total[metric].skipped += content.total[metric].skipped || 0;
+			});
+		}
+	}
+
+	// Calculate percentages
+	['lines', 'statements', 'functions', 'branches'].forEach((metric) => {
+		if (merged.total[metric].total > 0) {
+			merged.total[metric].pct = (merged.total[metric].covered / merged.total[metric].total) * 100;
+		}
+	});
+
+	return merged;
+}
+
+function generateMarkdownSummary(coverage) {
+	let markdown = '# Test Coverage Summary\n\n';
+
+	// Overall coverage table
+	markdown += '## Overall Coverage\n\n';
+	markdown += '| Metric | Coverage | Total | Covered |\n';
+	markdown += '|--------|----------|-------|--------|\n';
+
+	['lines', 'statements', 'functions', 'branches'].forEach((metric) => {
+		const data = coverage.total[metric];
+		const pct = data.pct.toFixed(2);
+		const icon = data.pct >= 80 ? '✅' : data.pct >= 60 ? '⚠️' : '❌';
+		markdown += `| ${metric.charAt(0).toUpperCase() + metric.slice(1)} | ${icon} ${pct}% | ${data.total} | ${data.covered} |\n`;
+	});
+
+	// Per-suite breakdown
+	markdown += '\n## Coverage by Test Suite\n\n';
+	markdown += '| Suite | Lines | Statements | Functions | Branches |\n';
+	markdown += '|-------|-------|------------|-----------|----------|\n';
+
+	Object.entries(coverage.suites).forEach(([suite, data]) => {
+		const linesPct = data.lines.pct.toFixed(1);
+		const stmtPct = data.statements.pct.toFixed(1);
+		const funcPct = data.functions.pct.toFixed(1);
+		const branchPct = data.branches.pct.toFixed(1);
+
+		markdown += `| ${suite} | ${linesPct}% | ${stmtPct}% | ${funcPct}% | ${branchPct}% |\n`;
+	});
+
+	return markdown;
+}
+
+function main() {
+	const inputDir = process.argv[2];
+	const outputDir = process.argv[3];
+
+	if (!inputDir || !outputDir) {
+		console.error('Usage: node aggregate-coverage.js <input-dir> <output-dir>');
+		process.exit(1);
+	}
+
+	// Ensure output directory exists
+	if (!fs.existsSync(outputDir)) {
+		fs.mkdirSync(outputDir, { recursive: true });
+	}
+
+	// Find all coverage files
+	console.log(`Searching for coverage files in ${inputDir}...`);
+	const coverageFiles = findCoverageFiles(inputDir);
+	console.log(`Found ${coverageFiles.length} coverage files`);
+
+	if (coverageFiles.length === 0) {
+		console.log('No coverage files found. Skipping aggregation.');
+		process.exit(0);
+	}
+
+	// Merge coverage data
+	console.log('Merging coverage data...');
+	const merged = mergeCoverage(coverageFiles);
+
+	// Write aggregated coverage
+	const outputFile = path.join(outputDir, 'total-coverage.json');
+	fs.writeFileSync(outputFile, JSON.stringify(merged, null, 2));
+	console.log(`Wrote aggregated coverage to ${outputFile}`);
+
+	// Generate markdown summary
+	const summary = generateMarkdownSummary(merged);
+	const summaryFile = path.join(outputDir, 'summary.md');
+	fs.writeFileSync(summaryFile, summary);
+	console.log(`Wrote summary to ${summaryFile}`);
+
+	// Output summary to console
+	console.log('\n' + summary);
+
+	// Exit with error if coverage is too low
+	if (merged.total.lines.pct < 80) {
+		console.error(`\n❌ Coverage ${merged.total.lines.pct.toFixed(2)}% is below 80% threshold`);
+		process.exit(1);
+	}
+
+	console.log(`\n✅ Coverage ${merged.total.lines.pct.toFixed(2)}% meets 80% threshold`);
+}
+
+main();
--- a/scripts/test-reporting/detect-flaky-tests.js
+++ b/scripts/test-reporting/detect-flaky-tests.js
@ -0,0 +1,235 @@
+#!/usr/bin/env node
+/* eslint-disable @typescript-eslint/no-require-imports, no-console */
+/**
+ * Detect Flaky Tests
+ *
+ * Analyzes test results over time to identify tests that fail intermittently.
+ * A test is considered flaky if it fails sometimes but not always.
+ *
+ * Uses historical data from previous runs stored in GitHub Actions artifacts.
+ *
+ * Usage:
+ *   node detect-flaky-tests.js <test-results-dir>
+ */
+
+const fs = require('fs');
+const path = require('path');
+
+// Configuration
+const FLAKY_THRESHOLD = 0.1; // Test fails 10%+ of the time = flaky
+const MIN_RUNS = 3; // Need at least 3 runs to detect flakiness
+
+function loadTestHistory(resultsDir) {
+	const historyFile = path.join(resultsDir, 'test-history.json');
+
+	if (!fs.existsSync(historyFile)) {
+		return {};
+	}
+
+	return JSON.parse(fs.readFileSync(historyFile, 'utf8'));
+}
+
+function saveTestHistory(resultsDir, history) {
+	const historyFile = path.join(resultsDir, 'test-history.json');
+	fs.writeFileSync(historyFile, JSON.stringify(history, null, 2));
+}
+
+function findTestResultFiles(dir) {
+	const results = [];
+
+	function walk(currentDir) {
+		if (!fs.existsSync(currentDir)) {
+			return;
+		}
+
+		const entries = fs.readdirSync(currentDir, { withFileTypes: true });
+
+		for (const entry of entries) {
+			const fullPath = path.join(currentDir, entry.name);
+
+			if (entry.isDirectory()) {
+				walk(fullPath);
+			} else if (entry.name.match(/test.*results.*\.json$/i)) {
+				results.push(fullPath);
+			}
+		}
+	}
+
+	walk(dir);
+	return results;
+}
+
+function parseTestResults(files) {
+	const allTests = [];
+
+	for (const file of files) {
+		try {
+			const content = JSON.parse(fs.readFileSync(file, 'utf8'));
+
+			// Handle different test result formats (Jest, Vitest, etc.)
+			if (content.testResults) {
+				// Jest format
+				content.testResults.forEach((suite) => {
+					suite.assertionResults?.forEach((test) => {
+						allTests.push({
+							name: test.fullName || test.title,
+							status: test.status,
+							duration: test.duration,
+							suite: suite.name,
+						});
+					});
+				});
+			} else if (content.tests) {
+				// Generic format
+				content.tests.forEach((test) => {
+					allTests.push({
+						name: test.name || test.title,
+						status: test.status || (test.pass ? 'passed' : 'failed'),
+						duration: test.duration,
+						suite: test.suite || 'unknown',
+					});
+				});
+			}
+		} catch (error) {
+			console.error(`Error parsing ${file}:`, error.message);
+		}
+	}
+
+	return allTests;
+}
+
+function updateHistory(history, currentTests) {
+	const timestamp = new Date().toISOString();
+
+	for (const test of currentTests) {
+		const key = `${test.suite}::${test.name}`;
+
+		if (!history[key]) {
+			history[key] = {
+				name: test.name,
+				suite: test.suite,
+				runs: [],
+			};
+		}
+
+		history[key].runs.push({
+			timestamp,
+			status: test.status,
+			duration: test.duration,
+		});
+
+		// Keep only last 30 runs
+		if (history[key].runs.length > 30) {
+			history[key].runs = history[key].runs.slice(-30);
+		}
+	}
+
+	return history;
+}
+
+function detectFlakyTests(history) {
+	const flakyTests = [];
+
+	for (const data of Object.values(history)) {
+		if (data.runs.length < MIN_RUNS) {
+			continue;
+		}
+
+		const totalRuns = data.runs.length;
+		const failures = data.runs.filter((r) => r.status === 'failed' || r.status === 'fail').length;
+		const failureRate = failures / totalRuns;
+
+		// Flaky: Sometimes passes, sometimes fails
+		if (failureRate > 0 && failureRate < 1 && failureRate >= FLAKY_THRESHOLD) {
+			flakyTests.push({
+				name: data.name,
+				suite: data.suite,
+				totalRuns,
+				failures,
+				failureRate: (failureRate * 100).toFixed(1),
+				lastFailure: data.runs
+					.slice()
+					.reverse()
+					.find((r) => r.status === 'failed')?.timestamp,
+			});
+		}
+	}
+
+	// Sort by failure rate (descending)
+	flakyTests.sort((a, b) => b.failureRate - a.failureRate);
+
+	return flakyTests;
+}
+
+function generateFlakyReport(flakyTests) {
+	if (flakyTests.length === 0) {
+		return {
+			summary: 'No flaky tests detected. ✅',
+			tests: [],
+		};
+	}
+
+	const summary =
+		`Found ${flakyTests.length} flaky test(s). ⚠️\n\n` +
+		'These tests fail intermittently and should be investigated:\n\n' +
+		flakyTests
+			.map(
+				(t) =>
+					`- **${t.name}**\n  - Suite: ${t.suite}\n  - Failure rate: ${t.failureRate}%\n  - Last failure: ${t.lastFailure}`
+			)
+			.join('\n\n');
+
+	return {
+		summary,
+		tests: flakyTests,
+	};
+}
+
+function main() {
+	const resultsDir = process.argv[2];
+
+	if (!resultsDir) {
+		console.error('Usage: node detect-flaky-tests.js <test-results-dir>');
+		process.exit(1);
+	}
+
+	console.log('Detecting flaky tests...');
+
+	// Ensure results directory exists
+	if (!fs.existsSync(resultsDir)) {
+		fs.mkdirSync(resultsDir, { recursive: true });
+	}
+
+	// Load historical data
+	const history = loadTestHistory(resultsDir);
+	console.log(`Loaded history for ${Object.keys(history).length} tests`);
+
+	// Find and parse current test results
+	const resultFiles = findTestResultFiles(resultsDir);
+	console.log(`Found ${resultFiles.length} test result files`);
+
+	if (resultFiles.length > 0) {
+		const currentTests = parseTestResults(resultFiles);
+		console.log(`Parsed ${currentTests.length} test results`);
+
+		// Update history
+		const updatedHistory = updateHistory(history, currentTests);
+		saveTestHistory(resultsDir, updatedHistory);
+	}
+
+	// Detect flaky tests
+	const flakyTests = detectFlakyTests(history);
+	const report = generateFlakyReport(flakyTests);
+
+	// Save flaky tests report
+	if (flakyTests.length > 0) {
+		const flakyFile = path.join(resultsDir, 'flaky-tests.json');
+		fs.writeFileSync(flakyFile, JSON.stringify(flakyTests, null, 2));
+		console.log(`\n${report.summary}`);
+		console.log(`\nFlaky tests report saved to ${flakyFile}`);
+	} else {
+		console.log('\n✅ No flaky tests detected!');
+	}
+}
+
+main();
--- a/scripts/test-reporting/format-metrics.js
+++ b/scripts/test-reporting/format-metrics.js
@ -0,0 +1,64 @@
+#!/usr/bin/env node
+/* eslint-disable @typescript-eslint/no-require-imports, no-console */
+/**
+ * Format Metrics for GitHub Summary
+ *
+ * Formats test performance metrics for display in GitHub Actions summary.
+ *
+ * Usage:
+ *   node format-metrics.js <metrics-file>
+ */
+
+const fs = require('fs');
+
+function formatDuration(ms) {
+	if (ms < 1000) {
+		return `${ms}ms`;
+	}
+	return `${(ms / 1000).toFixed(2)}s`;
+}
+
+function formatMetrics(metrics) {
+	let output = '';
+
+	output += `\n**Total Tests:** ${metrics.totalTests}\n`;
+	output += `**Total Duration:** ${formatDuration(metrics.totalDuration)}\n`;
+	output += `**Average Duration:** ${formatDuration(metrics.averageDuration)}\n\n`;
+
+	if (metrics.slowestTest) {
+		output += `**Slowest Test:** ${metrics.slowestTest.name} (${formatDuration(metrics.slowestTest.duration)})\n\n`;
+	}
+
+	// Suite breakdown
+	output += '### Suite Performance\n\n';
+	output += '| Suite | Tests | Duration | Avg/Test |\n';
+	output += '|-------|-------|----------|----------|\n';
+
+	for (const [suite, data] of Object.entries(metrics.suiteMetrics)) {
+		const avgPerTest = data.tests > 0 ? Math.round(data.duration / data.tests) : 0;
+		output += `| ${suite} | ${data.tests} | ${formatDuration(data.duration)} | ${formatDuration(avgPerTest)} |\n`;
+	}
+
+	return output;
+}
+
+function main() {
+	const metricsFile = process.argv[2];
+
+	if (!metricsFile) {
+		console.error('Usage: node format-metrics.js <metrics-file>');
+		process.exit(1);
+	}
+
+	if (!fs.existsSync(metricsFile)) {
+		console.log('No metrics file found.');
+		return;
+	}
+
+	const metrics = JSON.parse(fs.readFileSync(metricsFile, 'utf8'));
+	const formatted = formatMetrics(metrics);
+
+	console.log(formatted);
+}
+
+main();
--- a/scripts/test-reporting/generate-summary.js
+++ b/scripts/test-reporting/generate-summary.js
@ -0,0 +1,130 @@
+#!/usr/bin/env node
+/* eslint-disable @typescript-eslint/no-require-imports, no-console */
+/**
+ * Generate Test Summary
+ *
+ * Creates a GitHub Actions summary with test results, coverage, and trends.
+ *
+ * Usage:
+ *   node generate-summary.js <test-results-dir>
+ */
+
+const fs = require('fs');
+const path = require('path');
+
+function findTestResults(dir) {
+	const results = {
+		coverage: [],
+		testResults: [],
+	};
+
+	function walk(currentDir) {
+		if (!fs.existsSync(currentDir)) {
+			return;
+		}
+
+		const entries = fs.readdirSync(currentDir, { withFileTypes: true });
+
+		for (const entry of entries) {
+			const fullPath = path.join(currentDir, entry.name);
+
+			if (entry.isDirectory()) {
+				walk(fullPath);
+			} else if (entry.name === 'coverage-summary.json') {
+				results.coverage.push(fullPath);
+			} else if (entry.name.includes('test-results.json')) {
+				results.testResults.push(fullPath);
+			}
+		}
+	}
+
+	walk(dir);
+	return results;
+}
+
+function generateSummary(resultsDir) {
+	const { coverage } = findTestResults(resultsDir);
+
+	let summary = '# 🧪 Daily Test Suite Results\n\n';
+	summary += `**Date:** ${new Date().toISOString().split('T')[0]}\n\n`;
+
+	if (coverage.length === 0) {
+		summary += '⚠️ No coverage reports found.\n';
+		return summary;
+	}
+
+	// Aggregate coverage stats
+	const suites = [];
+	let totalPassed = 0;
+	let totalFailed = 0;
+
+	coverage.forEach((file) => {
+		const content = JSON.parse(fs.readFileSync(file, 'utf8'));
+		const suiteName = path.basename(path.dirname(path.dirname(file)));
+
+		if (content.total) {
+			suites.push({
+				name: suiteName,
+				lines: content.total.lines.pct,
+				statements: content.total.statements.pct,
+				functions: content.total.functions.pct,
+				branches: content.total.branches.pct,
+			});
+		}
+	});
+
+	// Coverage table
+	summary += '## Coverage by Suite\n\n';
+	summary += '| Suite | Lines | Statements | Functions | Branches | Status |\n';
+	summary += '|-------|-------|------------|-----------|----------|--------|\n';
+
+	suites.forEach((suite) => {
+		const avgCoverage = (suite.lines + suite.statements + suite.functions + suite.branches) / 4;
+		const status = avgCoverage >= 80 ? '✅ Pass' : avgCoverage >= 60 ? '⚠️ Warning' : '❌ Fail';
+
+		summary += `| ${suite.name} | ${suite.lines.toFixed(1)}% | ${suite.statements.toFixed(1)}% | ${suite.functions.toFixed(1)}% | ${suite.branches.toFixed(1)}% | ${status} |\n`;
+
+		if (avgCoverage >= 80) {
+			totalPassed++;
+		} else {
+			totalFailed++;
+		}
+	});
+
+	// Overall stats
+	summary += '\n## Overall Statistics\n\n';
+	summary += `- **Total Test Suites:** ${suites.length}\n`;
+	summary += `- **Passed:** ${totalPassed} ✅\n`;
+	summary += `- **Failed:** ${totalFailed} ❌\n`;
+
+	const successRate = ((totalPassed / suites.length) * 100).toFixed(1);
+	summary += `- **Success Rate:** ${successRate}%\n`;
+
+	// Recommendations
+	if (totalFailed > 0) {
+		summary += '\n## ⚠️ Recommendations\n\n';
+		summary += 'The following test suites need attention:\n\n';
+
+		suites
+			.filter((s) => (s.lines + s.statements + s.functions + s.branches) / 4 < 80)
+			.forEach((suite) => {
+				summary += `- **${suite.name}**: Improve coverage (currently ${((suite.lines + suite.statements + suite.functions + suite.branches) / 4).toFixed(1)}%)\n`;
+			});
+	}
+
+	return summary;
+}
+
+function main() {
+	const resultsDir = process.argv[2];
+
+	if (!resultsDir) {
+		console.error('Usage: node generate-summary.js <test-results-dir>');
+		process.exit(1);
+	}
+
+	const summary = generateSummary(resultsDir);
+	console.log(summary);
+}
+
+main();
--- a/scripts/test-reporting/track-metrics.js
+++ b/scripts/test-reporting/track-metrics.js
@ -0,0 +1,265 @@
+#!/usr/bin/env node
+/* eslint-disable @typescript-eslint/no-require-imports, no-console */
+/**
+ * Track Test Performance Metrics
+ *
+ * Records test execution time, memory usage, and other performance metrics
+ * to track trends over time and identify performance regressions.
+ *
+ * Usage:
+ *   node track-metrics.js <test-results-dir>
+ */
+
+const fs = require('fs');
+const path = require('path');
+
+function loadMetricsHistory(resultsDir) {
+	const historyFile = path.join(resultsDir, 'metrics-history.json');
+
+	if (!fs.existsSync(historyFile)) {
+		return [];
+	}
+
+	return JSON.parse(fs.readFileSync(historyFile, 'utf8'));
+}
+
+function saveMetricsHistory(resultsDir, history) {
+	const historyFile = path.join(resultsDir, 'metrics-history.json');
+	fs.writeFileSync(historyFile, JSON.stringify(history, null, 2));
+}
+
+function findTestResultFiles(dir) {
+	const results = [];
+
+	function walk(currentDir) {
+		if (!fs.existsSync(currentDir)) {
+			return;
+		}
+
+		const entries = fs.readdirSync(currentDir, { withFileTypes: true });
+
+		for (const entry of entries) {
+			const fullPath = path.join(currentDir, entry.name);
+
+			if (entry.isDirectory()) {
+				walk(fullPath);
+			} else if (entry.name.match(/test.*results.*\.json$/i)) {
+				results.push(fullPath);
+			}
+		}
+	}
+
+	walk(dir);
+	return results;
+}
+
+function calculateMetrics(resultFiles) {
+	let totalTests = 0;
+	let totalDuration = 0;
+	let slowestTest = null;
+	const suiteMetrics = {};
+
+	for (const file of resultFiles) {
+		try {
+			const content = JSON.parse(fs.readFileSync(file, 'utf8'));
+			const suiteName = path.basename(path.dirname(file));
+
+			if (!suiteMetrics[suiteName]) {
+				suiteMetrics[suiteName] = {
+					tests: 0,
+					duration: 0,
+					slowestTest: null,
+				};
+			}
+
+			// Jest format
+			if (content.testResults) {
+				content.testResults.forEach((suite) => {
+					const suiteTests = suite.assertionResults || [];
+					totalTests += suiteTests.length;
+					suiteMetrics[suiteName].tests += suiteTests.length;
+
+					suiteTests.forEach((test) => {
+						const duration = test.duration || 0;
+						totalDuration += duration;
+						suiteMetrics[suiteName].duration += duration;
+
+						if (!slowestTest || duration > slowestTest.duration) {
+							slowestTest = {
+								name: test.fullName || test.title,
+								duration,
+								suite: suite.name,
+							};
+						}
+
+						if (
+							!suiteMetrics[suiteName].slowestTest ||
+							duration > suiteMetrics[suiteName].slowestTest.duration
+						) {
+							suiteMetrics[suiteName].slowestTest = {
+								name: test.fullName || test.title,
+								duration,
+							};
+						}
+					});
+				});
+			}
+
+			// Handle other formats...
+		} catch (error) {
+			console.error(`Error parsing ${file}:`, error.message);
+		}
+	}
+
+	return {
+		timestamp: new Date().toISOString(),
+		totalTests,
+		totalDuration: Math.round(totalDuration),
+		averageDuration: totalTests > 0 ? Math.round(totalDuration / totalTests) : 0,
+		slowestTest,
+		suiteMetrics,
+	};
+}
+
+function detectRegressions(currentMetrics, history) {
+	if (history.length === 0) {
+		return [];
+	}
+
+	const regressions = [];
+	const lastRun = history[history.length - 1];
+
+	// Check total duration increase
+	const durationIncrease =
+		((currentMetrics.totalDuration - lastRun.totalDuration) / lastRun.totalDuration) * 100;
+
+	if (durationIncrease > 20) {
+		regressions.push({
+			type: 'duration',
+			message: `Total test duration increased by ${durationIncrease.toFixed(1)}%`,
+			previous: lastRun.totalDuration,
+			current: currentMetrics.totalDuration,
+		});
+	}
+
+	// Check per-suite regressions
+	for (const [suite, metrics] of Object.entries(currentMetrics.suiteMetrics)) {
+		const previousSuite = lastRun.suiteMetrics?.[suite];
+
+		if (previousSuite) {
+			const suiteIncrease =
+				((metrics.duration - previousSuite.duration) / previousSuite.duration) * 100;
+
+			if (suiteIncrease > 30) {
+				regressions.push({
+					type: 'suite',
+					suite,
+					message: `${suite} duration increased by ${suiteIncrease.toFixed(1)}%`,
+					previous: previousSuite.duration,
+					current: metrics.duration,
+				});
+			}
+		}
+	}
+
+	return regressions;
+}
+
+function generateMetricsReport(metrics, regressions) {
+	let report = '# Test Performance Metrics\n\n';
+
+	// Summary
+	report += `**Date:** ${new Date(metrics.timestamp).toISOString().split('T')[0]}\n\n`;
+	report += `- **Total Tests:** ${metrics.totalTests}\n`;
+	report += `- **Total Duration:** ${(metrics.totalDuration / 1000).toFixed(2)}s\n`;
+	report += `- **Average Duration:** ${metrics.averageDuration}ms per test\n`;
+
+	if (metrics.slowestTest) {
+		report += `- **Slowest Test:** ${metrics.slowestTest.name} (${metrics.slowestTest.duration}ms)\n`;
+	}
+
+	// Performance regressions
+	if (regressions.length > 0) {
+		report += '\n## ⚠️ Performance Regressions Detected\n\n';
+		regressions.forEach((reg) => {
+			report += `- ${reg.message}\n`;
+			report += `  - Previous: ${reg.previous}ms\n`;
+			report += `  - Current: ${reg.current}ms\n`;
+		});
+	}
+
+	// Suite breakdown
+	report += '\n## Suite Performance\n\n';
+	report += '| Suite | Tests | Duration | Avg/Test | Slowest |\n';
+	report += '|-------|-------|----------|----------|--------|\n';
+
+	for (const [suite, data] of Object.entries(metrics.suiteMetrics)) {
+		const avgPerTest = data.tests > 0 ? Math.round(data.duration / data.tests) : 0;
+		const slowest = data.slowestTest ? `${data.slowestTest.duration}ms` : 'N/A';
+
+		report += `| ${suite} | ${data.tests} | ${data.duration}ms | ${avgPerTest}ms | ${slowest} |\n`;
+	}
+
+	return report;
+}
+
+function main() {
+	const resultsDir = process.argv[2];
+
+	if (!resultsDir) {
+		console.error('Usage: node track-metrics.js <test-results-dir>');
+		process.exit(1);
+	}
+
+	console.log('Tracking test performance metrics...');
+
+	// Ensure results directory exists
+	if (!fs.existsSync(resultsDir)) {
+		fs.mkdirSync(resultsDir, { recursive: true });
+	}
+
+	// Find test result files
+	const resultFiles = findTestResultFiles(resultsDir);
+	console.log(`Found ${resultFiles.length} test result files`);
+
+	if (resultFiles.length === 0) {
+		console.log('No test results to analyze.');
+		return;
+	}
+
+	// Calculate current metrics
+	const currentMetrics = calculateMetrics(resultFiles);
+	console.log(`Analyzed ${currentMetrics.totalTests} tests`);
+
+	// Load history and detect regressions
+	const history = loadMetricsHistory(resultsDir);
+	const regressions = detectRegressions(currentMetrics, history);
+
+	// Update history
+	history.push(currentMetrics);
+
+	// Keep only last 90 days
+	const ninetyDaysAgo = Date.now() - 90 * 24 * 60 * 60 * 1000;
+	const filteredHistory = history.filter((m) => new Date(m.timestamp).getTime() > ninetyDaysAgo);
+
+	saveMetricsHistory(resultsDir, filteredHistory);
+
+	// Save current metrics
+	const metricsFile = path.join(resultsDir, 'metrics.json');
+	fs.writeFileSync(metricsFile, JSON.stringify(currentMetrics, null, 2));
+
+	// Generate and save report
+	const report = generateMetricsReport(currentMetrics, regressions);
+	const reportFile = path.join(resultsDir, 'metrics-report.md');
+	fs.writeFileSync(reportFile, report);
+
+	console.log(`\n${report}`);
+	console.log(`\nMetrics saved to ${metricsFile}`);
+
+	if (regressions.length > 0) {
+		console.error(`\n⚠️ ${regressions.length} performance regression(s) detected!`);
+		process.exit(1);
+	}
+}
+
+main();