mirror of
https://github.com/Memo-2023/mana-monorepo.git
synced 2026-05-14 19:01:08 +02:00
refactor(go-services): integrate shared-go into crawler + gateway, fix Dockerfiles
- mana-crawler: config → envutil, handler → httputil.WriteJSON - mana-api-gateway: config → envutil, handlers → httputil.WriteJSON - Fix Dockerfile COPY paths (remove stale -go suffix in all 4 services) - All services now use packages/shared-go via replace directive Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
ba6dbf16c4
commit
bf4d9cb9aa
39 changed files with 1313 additions and 1379 deletions
|
|
@ -2,10 +2,10 @@
|
|||
FROM golang:1.25-alpine AS builder
|
||||
|
||||
WORKDIR /app
|
||||
COPY services/mana-crawler-go/go.mod services/mana-crawler-go/go.sum ./
|
||||
COPY services/mana-crawler/go.mod services/mana-crawler/go.sum ./
|
||||
RUN go mod download
|
||||
|
||||
COPY services/mana-crawler-go/ .
|
||||
COPY services/mana-crawler/ .
|
||||
RUN CGO_ENABLED=0 GOOS=linux go build -ldflags="-s -w" -o /mana-crawler ./cmd/server
|
||||
|
||||
# Runtime stage
|
||||
|
|
|
|||
|
|
@ -3,6 +3,7 @@ module github.com/manacore/mana-crawler
|
|||
go 1.25.0
|
||||
|
||||
require (
|
||||
github.com/manacore/shared-go v0.0.0
|
||||
github.com/PuerkitoBio/goquery v1.12.0
|
||||
github.com/jackc/pgx/v5 v5.9.1
|
||||
github.com/rs/cors v1.11.1
|
||||
|
|
@ -18,3 +19,5 @@ require (
|
|||
golang.org/x/sync v0.20.0 // indirect
|
||||
golang.org/x/text v0.35.0 // indirect
|
||||
)
|
||||
|
||||
replace github.com/manacore/shared-go => ../../packages/shared-go
|
||||
|
|
|
|||
|
|
@ -1,9 +1,9 @@
|
|||
package config
|
||||
|
||||
import (
|
||||
"os"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
"github.com/manacore/shared-go/envutil"
|
||||
)
|
||||
|
||||
type Config struct {
|
||||
|
|
@ -14,51 +14,31 @@ type Config struct {
|
|||
RedisPort int
|
||||
RedisPassword string
|
||||
|
||||
UserAgent string
|
||||
UserAgent string
|
||||
DefaultRateLimit float64
|
||||
DefaultMaxDepth int
|
||||
DefaultMaxPages int
|
||||
Timeout int // ms
|
||||
Concurrency int
|
||||
DefaultMaxDepth int
|
||||
DefaultMaxPages int
|
||||
Timeout int // ms
|
||||
Concurrency int
|
||||
|
||||
CORSOrigins []string
|
||||
}
|
||||
|
||||
func Load() *Config {
|
||||
port, _ := strconv.Atoi(getEnv("PORT", "3023"))
|
||||
redisPort, _ := strconv.Atoi(getEnv("REDIS_PORT", "6379"))
|
||||
rateLimit, _ := strconv.ParseFloat(getEnv("CRAWLER_DEFAULT_RATE_LIMIT", "2"), 64)
|
||||
maxDepth, _ := strconv.Atoi(getEnv("CRAWLER_DEFAULT_MAX_DEPTH", "3"))
|
||||
maxPages, _ := strconv.Atoi(getEnv("CRAWLER_DEFAULT_MAX_PAGES", "100"))
|
||||
timeout, _ := strconv.Atoi(getEnv("CRAWLER_TIMEOUT", "30000"))
|
||||
concurrency, _ := strconv.Atoi(getEnv("QUEUE_CONCURRENCY", "5"))
|
||||
|
||||
var origins []string
|
||||
if o := os.Getenv("CORS_ORIGINS"); o != "" {
|
||||
origins = strings.Split(o, ",")
|
||||
} else {
|
||||
origins = []string{"http://localhost:3000", "http://localhost:5173"}
|
||||
}
|
||||
rateLimit, _ := strconv.ParseFloat(envutil.Get("CRAWLER_DEFAULT_RATE_LIMIT", "2"), 64)
|
||||
|
||||
return &Config{
|
||||
Port: port,
|
||||
DatabaseURL: getEnv("DATABASE_URL", "postgresql://manacore:devpassword@localhost:5432/manacore"),
|
||||
RedisHost: getEnv("REDIS_HOST", "localhost"),
|
||||
RedisPort: redisPort,
|
||||
RedisPassword: getEnv("REDIS_PASSWORD", ""),
|
||||
UserAgent: getEnv("CRAWLER_USER_AGENT", "ManaCoreCrawler/1.0 (+https://manacore.io/bot)"),
|
||||
Port: envutil.GetInt("PORT", 3023),
|
||||
DatabaseURL: envutil.Get("DATABASE_URL", "postgresql://manacore:devpassword@localhost:5432/manacore"),
|
||||
RedisHost: envutil.Get("REDIS_HOST", "localhost"),
|
||||
RedisPort: envutil.GetInt("REDIS_PORT", 6379),
|
||||
RedisPassword: envutil.Get("REDIS_PASSWORD", ""),
|
||||
UserAgent: envutil.Get("CRAWLER_USER_AGENT", "ManaCoreCrawler/1.0 (+https://manacore.io/bot)"),
|
||||
DefaultRateLimit: rateLimit,
|
||||
DefaultMaxDepth: maxDepth,
|
||||
DefaultMaxPages: maxPages,
|
||||
Timeout: timeout,
|
||||
Concurrency: concurrency,
|
||||
CORSOrigins: origins,
|
||||
DefaultMaxDepth: envutil.GetInt("CRAWLER_DEFAULT_MAX_DEPTH", 3),
|
||||
DefaultMaxPages: envutil.GetInt("CRAWLER_DEFAULT_MAX_PAGES", 100),
|
||||
Timeout: envutil.GetInt("CRAWLER_TIMEOUT", 30000),
|
||||
Concurrency: envutil.GetInt("QUEUE_CONCURRENCY", 5),
|
||||
CORSOrigins: envutil.GetSlice("CORS_ORIGINS", []string{"http://localhost:3000", "http://localhost:5173"}),
|
||||
}
|
||||
}
|
||||
|
||||
func getEnv(key, fallback string) string {
|
||||
if v := os.Getenv(key); v != "" {
|
||||
return v
|
||||
}
|
||||
return fallback
|
||||
}
|
||||
|
|
|
|||
|
|
@ -6,6 +6,8 @@ import (
|
|||
"fmt"
|
||||
"log/slog"
|
||||
"net/http"
|
||||
|
||||
"github.com/manacore/shared-go/httputil"
|
||||
"net/url"
|
||||
"strconv"
|
||||
"time"
|
||||
|
|
@ -33,18 +35,18 @@ func (h *Handler) StartCrawl(w http.ResponseWriter, r *http.Request) {
|
|||
WebhookURL string `json:"webhookUrl"`
|
||||
}
|
||||
if err := json.NewDecoder(r.Body).Decode(&body); err != nil {
|
||||
writeJSON(w, http.StatusBadRequest, map[string]string{"error": "invalid request"})
|
||||
httputil.WriteJSON(w, http.StatusBadRequest, map[string]string{"error": "invalid request"})
|
||||
return
|
||||
}
|
||||
|
||||
if body.StartURL == "" {
|
||||
writeJSON(w, http.StatusBadRequest, map[string]string{"error": "startUrl is required"})
|
||||
httputil.WriteJSON(w, http.StatusBadRequest, map[string]string{"error": "startUrl is required"})
|
||||
return
|
||||
}
|
||||
|
||||
parsed, err := url.Parse(body.StartURL)
|
||||
if err != nil || parsed.Host == "" {
|
||||
writeJSON(w, http.StatusBadRequest, map[string]string{"error": "invalid URL"})
|
||||
httputil.WriteJSON(w, http.StatusBadRequest, map[string]string{"error": "invalid URL"})
|
||||
return
|
||||
}
|
||||
|
||||
|
|
@ -86,17 +88,17 @@ func (h *Handler) StartCrawl(w http.ResponseWriter, r *http.Request) {
|
|||
string(configJSON), fmt.Sprintf(`{"format":"%s"}`, cfg.OutputFormat)).Scan(&jobID)
|
||||
if err != nil {
|
||||
slog.Error("create job failed", "error", err)
|
||||
writeJSON(w, http.StatusInternalServerError, map[string]string{"error": "failed to create job"})
|
||||
httputil.WriteJSON(w, http.StatusInternalServerError, map[string]string{"error": "failed to create job"})
|
||||
return
|
||||
}
|
||||
|
||||
// Start crawl (use background context so it outlives the HTTP request)
|
||||
if err := h.crawler.StartJob(context.Background(), jobID, body.StartURL, cfg); err != nil {
|
||||
writeJSON(w, http.StatusInternalServerError, map[string]string{"error": "failed to start crawl"})
|
||||
httputil.WriteJSON(w, http.StatusInternalServerError, map[string]string{"error": "failed to start crawl"})
|
||||
return
|
||||
}
|
||||
|
||||
writeJSON(w, http.StatusCreated, map[string]any{
|
||||
httputil.WriteJSON(w, http.StatusCreated, map[string]any{
|
||||
"jobId": jobID,
|
||||
"status": "running",
|
||||
"startUrl": body.StartURL,
|
||||
|
|
@ -126,11 +128,11 @@ func (h *Handler) GetJob(w http.ResponseWriter, r *http.Request) {
|
|||
FROM crawler.crawl_jobs WHERE id = $1
|
||||
`, jobID).Scan(&job.ID, &job.StartURL, &job.Domain, &job.Status, &job.Progress, &job.Error, &job.StartedAt, &job.CompletedAt, &job.CreatedAt)
|
||||
if err != nil {
|
||||
writeJSON(w, http.StatusNotFound, map[string]string{"error": "job not found"})
|
||||
httputil.WriteJSON(w, http.StatusNotFound, map[string]string{"error": "job not found"})
|
||||
return
|
||||
}
|
||||
|
||||
writeJSON(w, http.StatusOK, job)
|
||||
httputil.WriteJSON(w, http.StatusOK, job)
|
||||
}
|
||||
|
||||
// GetJobResults handles GET /api/v1/crawl/{jobId}/results
|
||||
|
|
@ -155,7 +157,7 @@ func (h *Handler) GetJobResults(w http.ResponseWriter, r *http.Request) {
|
|||
FROM crawler.crawl_results WHERE job_id = $1 ORDER BY created_at LIMIT $2 OFFSET $3
|
||||
`, jobID, limit, offset)
|
||||
if err != nil {
|
||||
writeJSON(w, http.StatusInternalServerError, map[string]string{"error": "query failed"})
|
||||
httputil.WriteJSON(w, http.StatusInternalServerError, map[string]string{"error": "query failed"})
|
||||
return
|
||||
}
|
||||
defer rows.Close()
|
||||
|
|
@ -180,7 +182,7 @@ func (h *Handler) GetJobResults(w http.ResponseWriter, r *http.Request) {
|
|||
results = []map[string]any{}
|
||||
}
|
||||
|
||||
writeJSON(w, http.StatusOK, map[string]any{
|
||||
httputil.WriteJSON(w, http.StatusOK, map[string]any{
|
||||
"results": results,
|
||||
"pagination": map[string]any{
|
||||
"page": page, "limit": limit, "total": total,
|
||||
|
|
@ -218,7 +220,7 @@ func (h *Handler) ListJobs(w http.ResponseWriter, r *http.Request) {
|
|||
|
||||
rows, err := h.pool.Query(r.Context(), query, args...)
|
||||
if err != nil {
|
||||
writeJSON(w, http.StatusInternalServerError, map[string]string{"error": "query failed"})
|
||||
httputil.WriteJSON(w, http.StatusInternalServerError, map[string]string{"error": "query failed"})
|
||||
return
|
||||
}
|
||||
defer rows.Close()
|
||||
|
|
@ -237,7 +239,7 @@ func (h *Handler) ListJobs(w http.ResponseWriter, r *http.Request) {
|
|||
jobs = []map[string]any{}
|
||||
}
|
||||
|
||||
writeJSON(w, http.StatusOK, map[string]any{
|
||||
httputil.WriteJSON(w, http.StatusOK, map[string]any{
|
||||
"results": jobs,
|
||||
"pagination": map[string]any{
|
||||
"page": page, "limit": limit, "total": total,
|
||||
|
|
@ -263,7 +265,7 @@ func (h *Handler) Health(w http.ResponseWriter, r *http.Request) {
|
|||
if dbOK != "ok" {
|
||||
status = "degraded"
|
||||
}
|
||||
writeJSON(w, http.StatusOK, map[string]any{
|
||||
httputil.WriteJSON(w, http.StatusOK, map[string]any{
|
||||
"status": status, "service": "mana-crawler", "database": dbOK,
|
||||
"timestamp": time.Now().UTC().Format(time.RFC3339),
|
||||
})
|
||||
|
|
@ -284,8 +286,3 @@ func (h *Handler) Metrics(w http.ResponseWriter, r *http.Request) {
|
|||
fmt.Fprintf(w, "mana_crawler_jobs{status=\"failed\"} %d\n", failed)
|
||||
}
|
||||
|
||||
func writeJSON(w http.ResponseWriter, status int, data any) {
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
w.WriteHeader(status)
|
||||
json.NewEncoder(w).Encode(data)
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue