managarten/services/mana-search/internal/handler/extract.go
Till JS d3d11e661d feat(apps): create Hono compute servers for Traces, Planta, NutriPhi
Add lightweight Hono + Bun servers for server-only compute endpoints.
CRUD is handled by mana-sync, these handle AI + file upload only.

Traces: AI guide generation, location sync (Port 3026)
Planta: Photo upload (S3), AI plant analysis (Port 3022)
NutriPhi: AI meal analysis (photo+text), recommendations (Port 3023)

Each uses @manacore/shared-hono for auth/health/errors. ~100-200 LOC.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-28 16:16:57 +01:00

129 lines
3.4 KiB
Go

package handler
import (
"encoding/json"
"net/http"
"github.com/manacore/shared-go/httputil"
"net/url"
"time"
"github.com/manacore/mana-search/internal/cache"
"github.com/manacore/mana-search/internal/config"
"github.com/manacore/mana-search/internal/extract"
"github.com/manacore/mana-search/internal/metrics"
)
type ExtractHandler struct {
extractor *extract.Extractor
cache *cache.Cache
metrics *metrics.Metrics
cfg *config.Config
}
func NewExtractHandler(extractor *extract.Extractor, c *cache.Cache, m *metrics.Metrics, cfg *config.Config) *ExtractHandler {
return &ExtractHandler{
extractor: extractor,
cache: c,
metrics: m,
cfg: cfg,
}
}
// Extract handles POST /api/v1/extract
func (h *ExtractHandler) Extract(w http.ResponseWriter, r *http.Request) {
start := time.Now()
var req extract.ExtractRequest
if err := json.NewDecoder(http.MaxBytesReader(w, r.Body, 1<<20)).Decode(&req); err != nil {
httputil.WriteError(w, http.StatusBadRequest, "invalid request body")
return
}
if req.URL == "" {
httputil.WriteError(w, http.StatusBadRequest, "url is required")
return
}
if _, err := url.ParseRequestURI(req.URL); err != nil {
httputil.WriteError(w, http.StatusBadRequest, "url must be a valid URL")
return
}
// Validate options
if req.Options != nil {
if req.Options.MaxLength > 0 && (req.Options.MaxLength < 100 || req.Options.MaxLength > 100000) {
httputil.WriteError(w, http.StatusBadRequest, "maxLength must be between 100 and 100000")
return
}
if req.Options.Timeout > 0 && (req.Options.Timeout < 1000 || req.Options.Timeout > 30000) {
httputil.WriteError(w, http.StatusBadRequest, "timeout must be between 1000 and 30000")
return
}
}
cacheKey := extract.BuildCacheKey(req.URL)
// Check cache
if data, ok := h.cache.Get(r.Context(), cacheKey); ok {
var cached extract.ExtractResponse
if err := json.Unmarshal(data, &cached); err == nil {
cached.Meta.Cached = true
duration := time.Since(start).Seconds()
h.metrics.RecordRequest("extract", "200", duration)
httputil.WriteJSON(w, http.StatusOK, cached)
return
}
}
// Extract content
resp := h.extractor.Extract(r.Context(), &req)
// Cache successful results
if resp.Success {
ttl := time.Duration(h.cfg.CacheExtractTTL) * time.Second
h.cache.Set(r.Context(), cacheKey, resp, ttl)
}
status := "200"
if !resp.Success {
status = "500"
}
duration := time.Since(start).Seconds()
h.metrics.RecordRequest("extract", status, duration)
httputil.WriteJSON(w, http.StatusOK, resp)
}
// BulkExtract handles POST /api/v1/extract/bulk
func (h *ExtractHandler) BulkExtract(w http.ResponseWriter, r *http.Request) {
start := time.Now()
var req extract.BulkExtractRequest
if err := json.NewDecoder(http.MaxBytesReader(w, r.Body, 1<<20)).Decode(&req); err != nil {
httputil.WriteError(w, http.StatusBadRequest, "invalid request body")
return
}
if len(req.URLs) == 0 {
httputil.WriteError(w, http.StatusBadRequest, "urls is required")
return
}
if len(req.URLs) > 20 {
httputil.WriteError(w, http.StatusBadRequest, "maximum 20 URLs allowed")
return
}
for _, u := range req.URLs {
if _, err := url.ParseRequestURI(u); err != nil {
httputil.WriteError(w, http.StatusBadRequest, "invalid URL: "+u)
return
}
}
resp := h.extractor.BulkExtract(r.Context(), &req)
duration := time.Since(start).Seconds()
h.metrics.RecordRequest("extract_bulk", "200", duration)
httputil.WriteJSON(w, http.StatusOK, resp)
}