mirror of
https://github.com/Memo-2023/mana-monorepo.git
synced 2026-05-23 19:46:42 +02:00
fix(mana-search): Go best practices hardening
- Fix response body leak in SearXNG HealthCheck (defer resp.Body.Close) - Handle ignored errors in HTTP request creation - Add panic recovery in BulkExtract goroutines - Add request body size limit (1 MB) via http.MaxBytesReader - Add MaxHeaderBytes to HTTP server - Sort engine list for deterministic responses - Fix variable shadowing (r → res in loop) - Run as non-root user in Docker container - Log shutdown errors Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
15deaf4e0a
commit
28bbd7bbb0
6 changed files with 42 additions and 20 deletions
|
|
@ -9,10 +9,12 @@ RUN CGO_ENABLED=0 GOOS=linux go build -ldflags="-s -w" -o /mana-search ./cmd/ser
|
||||||
|
|
||||||
FROM alpine:3.21
|
FROM alpine:3.21
|
||||||
|
|
||||||
RUN apk --no-cache add ca-certificates tzdata
|
RUN apk --no-cache add ca-certificates tzdata && \
|
||||||
|
addgroup -g 1000 mana && adduser -u 1000 -G mana -s /sbin/nologin -D mana
|
||||||
|
|
||||||
COPY --from=builder /mana-search /usr/local/bin/mana-search
|
COPY --from=builder /mana-search /usr/local/bin/mana-search
|
||||||
|
|
||||||
|
USER mana
|
||||||
EXPOSE 3021
|
EXPOSE 3021
|
||||||
|
|
||||||
HEALTHCHECK --interval=30s --timeout=5s --start-period=5s --retries=3 \
|
HEALTHCHECK --interval=30s --timeout=5s --start-period=5s --retries=3 \
|
||||||
|
|
|
||||||
|
|
@ -55,7 +55,7 @@ func main() {
|
||||||
mux.HandleFunc("POST /api/v1/extract", extractHandler.Extract)
|
mux.HandleFunc("POST /api/v1/extract", extractHandler.Extract)
|
||||||
mux.HandleFunc("POST /api/v1/extract/bulk", extractHandler.BulkExtract)
|
mux.HandleFunc("POST /api/v1/extract/bulk", extractHandler.BulkExtract)
|
||||||
|
|
||||||
c_handler := cors.New(cors.Options{
|
corsHandler := cors.New(cors.Options{
|
||||||
AllowedOrigins: cfg.CORSOrigins,
|
AllowedOrigins: cfg.CORSOrigins,
|
||||||
AllowedMethods: []string{"GET", "POST", "DELETE", "OPTIONS"},
|
AllowedMethods: []string{"GET", "POST", "DELETE", "OPTIONS"},
|
||||||
AllowedHeaders: []string{"Content-Type", "Authorization"},
|
AllowedHeaders: []string{"Content-Type", "Authorization"},
|
||||||
|
|
@ -63,11 +63,12 @@ func main() {
|
||||||
}).Handler(mux)
|
}).Handler(mux)
|
||||||
|
|
||||||
server := &http.Server{
|
server := &http.Server{
|
||||||
Addr: fmt.Sprintf(":%d", cfg.Port),
|
Addr: fmt.Sprintf(":%d", cfg.Port),
|
||||||
Handler: c_handler,
|
Handler: corsHandler,
|
||||||
ReadTimeout: 30 * time.Second,
|
ReadTimeout: 30 * time.Second,
|
||||||
WriteTimeout: 60 * time.Second,
|
WriteTimeout: 60 * time.Second,
|
||||||
IdleTimeout: 120 * time.Second,
|
IdleTimeout: 120 * time.Second,
|
||||||
|
MaxHeaderBytes: 1 << 20, // 1 MB
|
||||||
}
|
}
|
||||||
|
|
||||||
// Graceful shutdown
|
// Graceful shutdown
|
||||||
|
|
@ -87,7 +88,9 @@ func main() {
|
||||||
|
|
||||||
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
|
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
|
||||||
defer cancel()
|
defer cancel()
|
||||||
server.Shutdown(ctx)
|
if err := server.Shutdown(ctx); err != nil {
|
||||||
|
slog.Error("shutdown error", "error", err)
|
||||||
|
}
|
||||||
|
|
||||||
slog.Info("server stopped")
|
slog.Info("server stopped")
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -121,7 +121,7 @@ func (e *Extractor) Extract(ctx context.Context, req *ExtractRequest) *ExtractRe
|
||||||
})
|
})
|
||||||
if err != nil {
|
if err != nil {
|
||||||
slog.Warn("extraction failed", "url", req.URL, "error", err)
|
slog.Warn("extraction failed", "url", req.URL, "error", err)
|
||||||
return errorResponse(req.URL, fmt.Sprintf("extraction failed: %v", err), start)
|
return errorResponse(req.URL, fmt.Sprintf("extraction failed: %s", err), start)
|
||||||
}
|
}
|
||||||
|
|
||||||
text := cleanText(article.TextContent)
|
text := cleanText(article.TextContent)
|
||||||
|
|
@ -196,6 +196,12 @@ func (e *Extractor) BulkExtract(ctx context.Context, req *BulkExtractRequest) *B
|
||||||
ch := make(chan indexedResult, end-i)
|
ch := make(chan indexedResult, end-i)
|
||||||
for j := i; j < end; j++ {
|
for j := i; j < end; j++ {
|
||||||
go func(idx int, u string) {
|
go func(idx int, u string) {
|
||||||
|
defer func() {
|
||||||
|
if p := recover(); p != nil {
|
||||||
|
slog.Error("extract panic", "url", u, "panic", p)
|
||||||
|
ch <- indexedResult{index: idx, result: errorResponse(u, "extraction panicked", start)}
|
||||||
|
}
|
||||||
|
}()
|
||||||
r := e.Extract(ctx, &ExtractRequest{URL: u, Options: req.Options})
|
r := e.Extract(ctx, &ExtractRequest{URL: u, Options: req.Options})
|
||||||
ch <- indexedResult{index: idx, result: r}
|
ch <- indexedResult{index: idx, result: r}
|
||||||
}(j, req.URLs[j])
|
}(j, req.URLs[j])
|
||||||
|
|
|
||||||
|
|
@ -33,7 +33,7 @@ func (h *ExtractHandler) Extract(w http.ResponseWriter, r *http.Request) {
|
||||||
start := time.Now()
|
start := time.Now()
|
||||||
|
|
||||||
var req extract.ExtractRequest
|
var req extract.ExtractRequest
|
||||||
if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
|
if err := json.NewDecoder(http.MaxBytesReader(w, r.Body, 1<<20)).Decode(&req); err != nil {
|
||||||
writeError(w, http.StatusBadRequest, "invalid request body")
|
writeError(w, http.StatusBadRequest, "invalid request body")
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
@ -97,7 +97,7 @@ func (h *ExtractHandler) BulkExtract(w http.ResponseWriter, r *http.Request) {
|
||||||
start := time.Now()
|
start := time.Now()
|
||||||
|
|
||||||
var req extract.BulkExtractRequest
|
var req extract.BulkExtractRequest
|
||||||
if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
|
if err := json.NewDecoder(http.MaxBytesReader(w, r.Body, 1<<20)).Decode(&req); err != nil {
|
||||||
writeError(w, http.StatusBadRequest, "invalid request body")
|
writeError(w, http.StatusBadRequest, "invalid request body")
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -4,6 +4,7 @@ import (
|
||||||
"encoding/json"
|
"encoding/json"
|
||||||
"log/slog"
|
"log/slog"
|
||||||
"net/http"
|
"net/http"
|
||||||
|
"sort"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
"github.com/manacore/mana-search/internal/cache"
|
"github.com/manacore/mana-search/internal/cache"
|
||||||
|
|
@ -35,7 +36,7 @@ func (h *SearchHandler) Search(w http.ResponseWriter, r *http.Request) {
|
||||||
defer h.metrics.ActiveSearches.Dec()
|
defer h.metrics.ActiveSearches.Dec()
|
||||||
|
|
||||||
var req search.SearchRequest
|
var req search.SearchRequest
|
||||||
if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
|
if err := json.NewDecoder(http.MaxBytesReader(w, r.Body, 1<<20)).Decode(&req); err != nil {
|
||||||
writeError(w, http.StatusBadRequest, "invalid request body")
|
writeError(w, http.StatusBadRequest, "invalid request body")
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
@ -84,15 +85,16 @@ func (h *SearchHandler) Search(w http.ResponseWriter, r *http.Request) {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
// Collect unique engines
|
// Collect unique engines (sorted for deterministic cache keys)
|
||||||
engineSet := make(map[string]bool)
|
engineSet := make(map[string]bool)
|
||||||
for _, r := range results {
|
for _, res := range results {
|
||||||
engineSet[r.Engine] = true
|
engineSet[res.Engine] = true
|
||||||
}
|
}
|
||||||
engines := make([]string, 0, len(engineSet))
|
engines := make([]string, 0, len(engineSet))
|
||||||
for e := range engineSet {
|
for e := range engineSet {
|
||||||
engines = append(engines, e)
|
engines = append(engines, e)
|
||||||
}
|
}
|
||||||
|
sort.Strings(engines)
|
||||||
|
|
||||||
resp := search.SearchResponse{
|
resp := search.SearchResponse{
|
||||||
Results: results,
|
Results: results,
|
||||||
|
|
|
||||||
|
|
@ -160,14 +160,20 @@ func (p *SearxngProvider) HealthCheck(ctx context.Context) (string, int64) {
|
||||||
ctx, cancel := context.WithTimeout(ctx, 5*time.Second)
|
ctx, cancel := context.WithTimeout(ctx, 5*time.Second)
|
||||||
defer cancel()
|
defer cancel()
|
||||||
|
|
||||||
req, _ := http.NewRequestWithContext(ctx, http.MethodGet, p.baseURL+"/healthz", nil)
|
req, err := http.NewRequestWithContext(ctx, http.MethodGet, p.baseURL+"/healthz", nil)
|
||||||
|
if err != nil {
|
||||||
|
return "error", time.Since(start).Milliseconds()
|
||||||
|
}
|
||||||
resp, err := p.client.Do(req)
|
resp, err := p.client.Do(req)
|
||||||
latency := time.Since(start).Milliseconds()
|
latency := time.Since(start).Milliseconds()
|
||||||
|
if err != nil {
|
||||||
if err != nil || resp.StatusCode != http.StatusOK {
|
return "error", latency
|
||||||
|
}
|
||||||
|
defer resp.Body.Close()
|
||||||
|
|
||||||
|
if resp.StatusCode != http.StatusOK {
|
||||||
return "error", latency
|
return "error", latency
|
||||||
}
|
}
|
||||||
resp.Body.Close()
|
|
||||||
return "ok", latency
|
return "ok", latency
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -176,7 +182,10 @@ func (p *SearxngProvider) GetEngines(ctx context.Context) []string {
|
||||||
ctx, cancel := context.WithTimeout(ctx, 5*time.Second)
|
ctx, cancel := context.WithTimeout(ctx, 5*time.Second)
|
||||||
defer cancel()
|
defer cancel()
|
||||||
|
|
||||||
req, _ := http.NewRequestWithContext(ctx, http.MethodGet, p.baseURL+"/config", nil)
|
req, err := http.NewRequestWithContext(ctx, http.MethodGet, p.baseURL+"/config", nil)
|
||||||
|
if err != nil {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
resp, err := p.client.Do(req)
|
resp, err := p.client.Do(req)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
slog.Warn("failed to fetch searxng engines", "error", err)
|
slog.Warn("failed to fetch searxng engines", "error", err)
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue