refactor(services): rename Go services, remove -go suffix

mana-search-go → mana-search mana-notify-go → mana-notify mana-crawler-go → mana-crawler mana-api-gateway-go → mana-api-gateway Legacy NestJS versions are deleted, suffix no longer needed. Updated all references in docker-compose, CLAUDE.md, package.json, Forgejo workflows, and service package.json files. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-05-22 13:26:42 +02:00 · 2026-03-28 10:18:40 +01:00 · 2026-03-28 10:18:40 +01:00 · 7e931b1c6d
commit 7e931b1c6d
parent 79080d6654
90 changed files with 41 additions and 38 deletions
--- a/services/mana-crawler/internal/handler/handler.go
+++ b/services/mana-crawler/internal/handler/handler.go
@ -0,0 +1,291 @@
+package handler
+
+import (
+	"context"
+	"encoding/json"
+	"fmt"
+	"log/slog"
+	"net/http"
+	"net/url"
+	"strconv"
+	"time"
+
+	"github.com/jackc/pgx/v5/pgxpool"
+	"github.com/manacore/mana-crawler/internal/crawler"
+)
+
+// Handler serves the crawler HTTP API.
+type Handler struct {
+	pool    *pgxpool.Pool
+	crawler *crawler.Crawler
+}
+
+// NewHandler creates a new handler.
+func NewHandler(pool *pgxpool.Pool, c *crawler.Crawler) *Handler {
+	return &Handler{pool: pool, crawler: c}
+}
+
+// StartCrawl handles POST /api/v1/crawl
+func (h *Handler) StartCrawl(w http.ResponseWriter, r *http.Request) {
+	var body struct {
+		StartURL   string               `json:"startUrl"`
+		Config     *crawler.CrawlConfig `json:"config"`
+		WebhookURL string               `json:"webhookUrl"`
+	}
+	if err := json.NewDecoder(r.Body).Decode(&body); err != nil {
+		writeJSON(w, http.StatusBadRequest, map[string]string{"error": "invalid request"})
+		return
+	}
+
+	if body.StartURL == "" {
+		writeJSON(w, http.StatusBadRequest, map[string]string{"error": "startUrl is required"})
+		return
+	}
+
+	parsed, err := url.Parse(body.StartURL)
+	if err != nil || parsed.Host == "" {
+		writeJSON(w, http.StatusBadRequest, map[string]string{"error": "invalid URL"})
+		return
+	}
+
+	// Defaults
+	cfg := crawler.CrawlConfig{
+		MaxDepth:      3,
+		MaxPages:      100,
+		RateLimit:     2,
+		RespectRobots: true,
+		OutputFormat:  "markdown",
+	}
+	if body.Config != nil {
+		if body.Config.MaxDepth > 0 {
+			cfg.MaxDepth = body.Config.MaxDepth
+		}
+		if body.Config.MaxPages > 0 {
+			cfg.MaxPages = body.Config.MaxPages
+		}
+		if body.Config.RateLimit > 0 {
+			cfg.RateLimit = body.Config.RateLimit
+		}
+		cfg.RespectRobots = body.Config.RespectRobots
+		cfg.IncludePatterns = body.Config.IncludePatterns
+		cfg.ExcludePatterns = body.Config.ExcludePatterns
+		cfg.Selectors = body.Config.Selectors
+		if body.Config.OutputFormat != "" {
+			cfg.OutputFormat = body.Config.OutputFormat
+		}
+	}
+
+	// Insert job
+	var jobID string
+	configJSON, _ := json.Marshal(cfg)
+	err = h.pool.QueryRow(r.Context(), `
+		INSERT INTO crawler.crawl_jobs (start_url, domain, max_depth, max_pages, rate_limit, respect_robots, selectors, output, status)
+		VALUES ($1, $2, $3, $4, $5, $6, $7, $8, 'pending')
+		RETURNING id
+	`, body.StartURL, parsed.Host, cfg.MaxDepth, cfg.MaxPages, cfg.RateLimit, cfg.RespectRobots,
+		string(configJSON), fmt.Sprintf(`{"format":"%s"}`, cfg.OutputFormat)).Scan(&jobID)
+	if err != nil {
+		slog.Error("create job failed", "error", err)
+		writeJSON(w, http.StatusInternalServerError, map[string]string{"error": "failed to create job"})
+		return
+	}
+
+	// Start crawl (use background context so it outlives the HTTP request)
+	if err := h.crawler.StartJob(context.Background(), jobID, body.StartURL, cfg); err != nil {
+		writeJSON(w, http.StatusInternalServerError, map[string]string{"error": "failed to start crawl"})
+		return
+	}
+
+	writeJSON(w, http.StatusCreated, map[string]any{
+		"jobId":    jobID,
+		"status":   "running",
+		"startUrl": body.StartURL,
+		"domain":   parsed.Host,
+		"config":   cfg,
+	})
+}
+
+// GetJob handles GET /api/v1/crawl/{jobId}
+func (h *Handler) GetJob(w http.ResponseWriter, r *http.Request) {
+	jobID := r.PathValue("jobId")
+
+	var job struct {
+		ID          string     `json:"jobId"`
+		StartURL    string     `json:"startUrl"`
+		Domain      string     `json:"domain"`
+		Status      string     `json:"status"`
+		Progress    string     `json:"progress"`
+		Error       *string    `json:"error"`
+		StartedAt   *time.Time `json:"startedAt"`
+		CompletedAt *time.Time `json:"completedAt"`
+		CreatedAt   time.Time  `json:"createdAt"`
+	}
+
+	err := h.pool.QueryRow(r.Context(), `
+		SELECT id, start_url, domain, status, COALESCE(progress::text, '{}'), error, started_at, completed_at, created_at
+		FROM crawler.crawl_jobs WHERE id = $1
+	`, jobID).Scan(&job.ID, &job.StartURL, &job.Domain, &job.Status, &job.Progress, &job.Error, &job.StartedAt, &job.CompletedAt, &job.CreatedAt)
+	if err != nil {
+		writeJSON(w, http.StatusNotFound, map[string]string{"error": "job not found"})
+		return
+	}
+
+	writeJSON(w, http.StatusOK, job)
+}
+
+// GetJobResults handles GET /api/v1/crawl/{jobId}/results
+func (h *Handler) GetJobResults(w http.ResponseWriter, r *http.Request) {
+	jobID := r.PathValue("jobId")
+	page, _ := strconv.Atoi(r.URL.Query().Get("page"))
+	limit, _ := strconv.Atoi(r.URL.Query().Get("limit"))
+	if page < 1 {
+		page = 1
+	}
+	if limit < 1 || limit > 100 {
+		limit = 50
+	}
+	offset := (page - 1) * limit
+
+	// Count total
+	var total int
+	h.pool.QueryRow(r.Context(), `SELECT COUNT(*) FROM crawler.crawl_results WHERE job_id = $1`, jobID).Scan(&total)
+
+	rows, err := h.pool.Query(r.Context(), `
+		SELECT id, url, parent_url, depth, title, content, markdown, links, metadata, status_code, error, created_at
+		FROM crawler.crawl_results WHERE job_id = $1 ORDER BY created_at LIMIT $2 OFFSET $3
+	`, jobID, limit, offset)
+	if err != nil {
+		writeJSON(w, http.StatusInternalServerError, map[string]string{"error": "query failed"})
+		return
+	}
+	defer rows.Close()
+
+	var results []map[string]any
+	for rows.Next() {
+		var id, u string
+		var parentURL, title, content, markdown, links, metadata, errMsg *string
+		var depth, statusCode int
+		var createdAt time.Time
+
+		rows.Scan(&id, &u, &parentURL, &depth, &title, &content, &markdown, &links, &metadata, &statusCode, &errMsg, &createdAt)
+		results = append(results, map[string]any{
+			"id": id, "url": u, "parentUrl": parentURL, "depth": depth,
+			"title": title, "content": content, "markdown": markdown,
+			"links": links, "metadata": metadata,
+			"statusCode": statusCode, "error": errMsg, "createdAt": createdAt,
+		})
+	}
+
+	if results == nil {
+		results = []map[string]any{}
+	}
+
+	writeJSON(w, http.StatusOK, map[string]any{
+		"results": results,
+		"pagination": map[string]any{
+			"page": page, "limit": limit, "total": total,
+			"totalPages": (total + limit - 1) / limit,
+		},
+	})
+}
+
+// ListJobs handles GET /api/v1/crawl
+func (h *Handler) ListJobs(w http.ResponseWriter, r *http.Request) {
+	page, _ := strconv.Atoi(r.URL.Query().Get("page"))
+	limit, _ := strconv.Atoi(r.URL.Query().Get("limit"))
+	if page < 1 {
+		page = 1
+	}
+	if limit < 1 || limit > 100 {
+		limit = 20
+	}
+	offset := (page - 1) * limit
+	status := r.URL.Query().Get("status")
+
+	var total int
+	if status != "" {
+		h.pool.QueryRow(r.Context(), `SELECT COUNT(*) FROM crawler.crawl_jobs WHERE status=$1`, status).Scan(&total)
+	} else {
+		h.pool.QueryRow(r.Context(), `SELECT COUNT(*) FROM crawler.crawl_jobs`).Scan(&total)
+	}
+
+	query := `SELECT id, start_url, domain, status, COALESCE(progress::text,'{}'), created_at FROM crawler.crawl_jobs ORDER BY created_at DESC LIMIT $1 OFFSET $2`
+	args := []any{limit, offset}
+	if status != "" {
+		query = `SELECT id, start_url, domain, status, COALESCE(progress::text,'{}'), created_at FROM crawler.crawl_jobs WHERE status=$3 ORDER BY created_at DESC LIMIT $1 OFFSET $2`
+		args = append(args, status)
+	}
+
+	rows, err := h.pool.Query(r.Context(), query, args...)
+	if err != nil {
+		writeJSON(w, http.StatusInternalServerError, map[string]string{"error": "query failed"})
+		return
+	}
+	defer rows.Close()
+
+	var jobs []map[string]any
+	for rows.Next() {
+		var id, startURL, domain, st, progress string
+		var createdAt time.Time
+		rows.Scan(&id, &startURL, &domain, &st, &progress, &createdAt)
+		jobs = append(jobs, map[string]any{
+			"jobId": id, "startUrl": startURL, "domain": domain,
+			"status": st, "progress": progress, "createdAt": createdAt,
+		})
+	}
+	if jobs == nil {
+		jobs = []map[string]any{}
+	}
+
+	writeJSON(w, http.StatusOK, map[string]any{
+		"results": jobs,
+		"pagination": map[string]any{
+			"page": page, "limit": limit, "total": total,
+		},
+	})
+}
+
+// CancelJob handles DELETE /api/v1/crawl/{jobId}
+func (h *Handler) CancelJob(w http.ResponseWriter, r *http.Request) {
+	jobID := r.PathValue("jobId")
+	h.crawler.CancelJob(jobID)
+	h.pool.Exec(r.Context(), `UPDATE crawler.crawl_jobs SET status='cancelled', updated_at=NOW() WHERE id=$1`, jobID)
+	w.WriteHeader(http.StatusNoContent)
+}
+
+// Health handles GET /health
+func (h *Handler) Health(w http.ResponseWriter, r *http.Request) {
+	dbOK := "ok"
+	if err := h.pool.Ping(r.Context()); err != nil {
+		dbOK = "error"
+	}
+	status := "ok"
+	if dbOK != "ok" {
+		status = "degraded"
+	}
+	writeJSON(w, http.StatusOK, map[string]any{
+		"status": status, "service": "mana-crawler", "database": dbOK,
+		"timestamp": time.Now().UTC().Format(time.RFC3339),
+	})
+}
+
+// Metrics handles GET /metrics
+func (h *Handler) Metrics(w http.ResponseWriter, r *http.Request) {
+	var running, completed, failed int
+	h.pool.QueryRow(r.Context(), `SELECT COUNT(*) FROM crawler.crawl_jobs WHERE status='running'`).Scan(&running)
+	h.pool.QueryRow(r.Context(), `SELECT COUNT(*) FROM crawler.crawl_jobs WHERE status='completed'`).Scan(&completed)
+	h.pool.QueryRow(r.Context(), `SELECT COUNT(*) FROM crawler.crawl_jobs WHERE status='failed'`).Scan(&failed)
+
+	w.Header().Set("Content-Type", "text/plain")
+	fmt.Fprintf(w, "# HELP mana_crawler_jobs Crawl jobs by status\n")
+	fmt.Fprintf(w, "# TYPE mana_crawler_jobs gauge\n")
+	fmt.Fprintf(w, "mana_crawler_jobs{status=\"running\"} %d\n", running)
+	fmt.Fprintf(w, "mana_crawler_jobs{status=\"completed\"} %d\n", completed)
+	fmt.Fprintf(w, "mana_crawler_jobs{status=\"failed\"} %d\n", failed)
+}
+
+func writeJSON(w http.ResponseWriter, status int, data any) {
+	w.Header().Set("Content-Type", "application/json")
+	w.WriteHeader(status)
+	json.NewEncoder(w).Encode(data)
+}