refactor(go-services): integrate shared-go into crawler + gateway, fix Dockerfiles

- mana-crawler: config → envutil, handler → httputil.WriteJSON
- mana-api-gateway: config → envutil, handlers → httputil.WriteJSON
- Fix Dockerfile COPY paths (remove stale -go suffix in all 4 services)
- All services now use packages/shared-go via replace directive

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Till JS 2026-03-28 16:25:30 +01:00
parent ba6dbf16c4
commit bf4d9cb9aa
39 changed files with 1313 additions and 1379 deletions

View file

@ -2,10 +2,10 @@
FROM golang:1.25-alpine AS builder
WORKDIR /app
COPY services/mana-crawler-go/go.mod services/mana-crawler-go/go.sum ./
COPY services/mana-crawler/go.mod services/mana-crawler/go.sum ./
RUN go mod download
COPY services/mana-crawler-go/ .
COPY services/mana-crawler/ .
RUN CGO_ENABLED=0 GOOS=linux go build -ldflags="-s -w" -o /mana-crawler ./cmd/server
# Runtime stage

View file

@ -3,6 +3,7 @@ module github.com/manacore/mana-crawler
go 1.25.0
require (
github.com/manacore/shared-go v0.0.0
github.com/PuerkitoBio/goquery v1.12.0
github.com/jackc/pgx/v5 v5.9.1
github.com/rs/cors v1.11.1
@ -18,3 +19,5 @@ require (
golang.org/x/sync v0.20.0 // indirect
golang.org/x/text v0.35.0 // indirect
)
replace github.com/manacore/shared-go => ../../packages/shared-go

View file

@ -1,9 +1,9 @@
package config
import (
"os"
"strconv"
"strings"
"github.com/manacore/shared-go/envutil"
)
type Config struct {
@ -14,51 +14,31 @@ type Config struct {
RedisPort int
RedisPassword string
UserAgent string
UserAgent string
DefaultRateLimit float64
DefaultMaxDepth int
DefaultMaxPages int
Timeout int // ms
Concurrency int
DefaultMaxDepth int
DefaultMaxPages int
Timeout int // ms
Concurrency int
CORSOrigins []string
}
func Load() *Config {
port, _ := strconv.Atoi(getEnv("PORT", "3023"))
redisPort, _ := strconv.Atoi(getEnv("REDIS_PORT", "6379"))
rateLimit, _ := strconv.ParseFloat(getEnv("CRAWLER_DEFAULT_RATE_LIMIT", "2"), 64)
maxDepth, _ := strconv.Atoi(getEnv("CRAWLER_DEFAULT_MAX_DEPTH", "3"))
maxPages, _ := strconv.Atoi(getEnv("CRAWLER_DEFAULT_MAX_PAGES", "100"))
timeout, _ := strconv.Atoi(getEnv("CRAWLER_TIMEOUT", "30000"))
concurrency, _ := strconv.Atoi(getEnv("QUEUE_CONCURRENCY", "5"))
var origins []string
if o := os.Getenv("CORS_ORIGINS"); o != "" {
origins = strings.Split(o, ",")
} else {
origins = []string{"http://localhost:3000", "http://localhost:5173"}
}
rateLimit, _ := strconv.ParseFloat(envutil.Get("CRAWLER_DEFAULT_RATE_LIMIT", "2"), 64)
return &Config{
Port: port,
DatabaseURL: getEnv("DATABASE_URL", "postgresql://manacore:devpassword@localhost:5432/manacore"),
RedisHost: getEnv("REDIS_HOST", "localhost"),
RedisPort: redisPort,
RedisPassword: getEnv("REDIS_PASSWORD", ""),
UserAgent: getEnv("CRAWLER_USER_AGENT", "ManaCoreCrawler/1.0 (+https://manacore.io/bot)"),
Port: envutil.GetInt("PORT", 3023),
DatabaseURL: envutil.Get("DATABASE_URL", "postgresql://manacore:devpassword@localhost:5432/manacore"),
RedisHost: envutil.Get("REDIS_HOST", "localhost"),
RedisPort: envutil.GetInt("REDIS_PORT", 6379),
RedisPassword: envutil.Get("REDIS_PASSWORD", ""),
UserAgent: envutil.Get("CRAWLER_USER_AGENT", "ManaCoreCrawler/1.0 (+https://manacore.io/bot)"),
DefaultRateLimit: rateLimit,
DefaultMaxDepth: maxDepth,
DefaultMaxPages: maxPages,
Timeout: timeout,
Concurrency: concurrency,
CORSOrigins: origins,
DefaultMaxDepth: envutil.GetInt("CRAWLER_DEFAULT_MAX_DEPTH", 3),
DefaultMaxPages: envutil.GetInt("CRAWLER_DEFAULT_MAX_PAGES", 100),
Timeout: envutil.GetInt("CRAWLER_TIMEOUT", 30000),
Concurrency: envutil.GetInt("QUEUE_CONCURRENCY", 5),
CORSOrigins: envutil.GetSlice("CORS_ORIGINS", []string{"http://localhost:3000", "http://localhost:5173"}),
}
}
func getEnv(key, fallback string) string {
if v := os.Getenv(key); v != "" {
return v
}
return fallback
}

View file

@ -6,6 +6,8 @@ import (
"fmt"
"log/slog"
"net/http"
"github.com/manacore/shared-go/httputil"
"net/url"
"strconv"
"time"
@ -33,18 +35,18 @@ func (h *Handler) StartCrawl(w http.ResponseWriter, r *http.Request) {
WebhookURL string `json:"webhookUrl"`
}
if err := json.NewDecoder(r.Body).Decode(&body); err != nil {
writeJSON(w, http.StatusBadRequest, map[string]string{"error": "invalid request"})
httputil.WriteJSON(w, http.StatusBadRequest, map[string]string{"error": "invalid request"})
return
}
if body.StartURL == "" {
writeJSON(w, http.StatusBadRequest, map[string]string{"error": "startUrl is required"})
httputil.WriteJSON(w, http.StatusBadRequest, map[string]string{"error": "startUrl is required"})
return
}
parsed, err := url.Parse(body.StartURL)
if err != nil || parsed.Host == "" {
writeJSON(w, http.StatusBadRequest, map[string]string{"error": "invalid URL"})
httputil.WriteJSON(w, http.StatusBadRequest, map[string]string{"error": "invalid URL"})
return
}
@ -86,17 +88,17 @@ func (h *Handler) StartCrawl(w http.ResponseWriter, r *http.Request) {
string(configJSON), fmt.Sprintf(`{"format":"%s"}`, cfg.OutputFormat)).Scan(&jobID)
if err != nil {
slog.Error("create job failed", "error", err)
writeJSON(w, http.StatusInternalServerError, map[string]string{"error": "failed to create job"})
httputil.WriteJSON(w, http.StatusInternalServerError, map[string]string{"error": "failed to create job"})
return
}
// Start crawl (use background context so it outlives the HTTP request)
if err := h.crawler.StartJob(context.Background(), jobID, body.StartURL, cfg); err != nil {
writeJSON(w, http.StatusInternalServerError, map[string]string{"error": "failed to start crawl"})
httputil.WriteJSON(w, http.StatusInternalServerError, map[string]string{"error": "failed to start crawl"})
return
}
writeJSON(w, http.StatusCreated, map[string]any{
httputil.WriteJSON(w, http.StatusCreated, map[string]any{
"jobId": jobID,
"status": "running",
"startUrl": body.StartURL,
@ -126,11 +128,11 @@ func (h *Handler) GetJob(w http.ResponseWriter, r *http.Request) {
FROM crawler.crawl_jobs WHERE id = $1
`, jobID).Scan(&job.ID, &job.StartURL, &job.Domain, &job.Status, &job.Progress, &job.Error, &job.StartedAt, &job.CompletedAt, &job.CreatedAt)
if err != nil {
writeJSON(w, http.StatusNotFound, map[string]string{"error": "job not found"})
httputil.WriteJSON(w, http.StatusNotFound, map[string]string{"error": "job not found"})
return
}
writeJSON(w, http.StatusOK, job)
httputil.WriteJSON(w, http.StatusOK, job)
}
// GetJobResults handles GET /api/v1/crawl/{jobId}/results
@ -155,7 +157,7 @@ func (h *Handler) GetJobResults(w http.ResponseWriter, r *http.Request) {
FROM crawler.crawl_results WHERE job_id = $1 ORDER BY created_at LIMIT $2 OFFSET $3
`, jobID, limit, offset)
if err != nil {
writeJSON(w, http.StatusInternalServerError, map[string]string{"error": "query failed"})
httputil.WriteJSON(w, http.StatusInternalServerError, map[string]string{"error": "query failed"})
return
}
defer rows.Close()
@ -180,7 +182,7 @@ func (h *Handler) GetJobResults(w http.ResponseWriter, r *http.Request) {
results = []map[string]any{}
}
writeJSON(w, http.StatusOK, map[string]any{
httputil.WriteJSON(w, http.StatusOK, map[string]any{
"results": results,
"pagination": map[string]any{
"page": page, "limit": limit, "total": total,
@ -218,7 +220,7 @@ func (h *Handler) ListJobs(w http.ResponseWriter, r *http.Request) {
rows, err := h.pool.Query(r.Context(), query, args...)
if err != nil {
writeJSON(w, http.StatusInternalServerError, map[string]string{"error": "query failed"})
httputil.WriteJSON(w, http.StatusInternalServerError, map[string]string{"error": "query failed"})
return
}
defer rows.Close()
@ -237,7 +239,7 @@ func (h *Handler) ListJobs(w http.ResponseWriter, r *http.Request) {
jobs = []map[string]any{}
}
writeJSON(w, http.StatusOK, map[string]any{
httputil.WriteJSON(w, http.StatusOK, map[string]any{
"results": jobs,
"pagination": map[string]any{
"page": page, "limit": limit, "total": total,
@ -263,7 +265,7 @@ func (h *Handler) Health(w http.ResponseWriter, r *http.Request) {
if dbOK != "ok" {
status = "degraded"
}
writeJSON(w, http.StatusOK, map[string]any{
httputil.WriteJSON(w, http.StatusOK, map[string]any{
"status": status, "service": "mana-crawler", "database": dbOK,
"timestamp": time.Now().UTC().Format(time.RFC3339),
})
@ -284,8 +286,3 @@ func (h *Handler) Metrics(w http.ResponseWriter, r *http.Request) {
fmt.Fprintf(w, "mana_crawler_jobs{status=\"failed\"} %d\n", failed)
}
func writeJSON(w http.ResponseWriter, status int, data any) {
w.Header().Set("Content-Type", "application/json")
w.WriteHeader(status)
json.NewEncoder(w).Encode(data)
}