managarten/services/mana-crawler/internal/db/db.go
Till JS 7e931b1c6d refactor(services): rename Go services, remove -go suffix
mana-search-go → mana-search
mana-notify-go → mana-notify
mana-crawler-go → mana-crawler
mana-api-gateway-go → mana-api-gateway

Legacy NestJS versions are deleted, suffix no longer needed.
Updated all references in docker-compose, CLAUDE.md, package.json,
Forgejo workflows, and service package.json files.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-28 10:18:40 +01:00

97 lines
2.4 KiB
Go

package db
import (
"context"
"fmt"
"log/slog"
"time"
"github.com/jackc/pgx/v5/pgxpool"
)
type DB struct {
Pool *pgxpool.Pool
}
func New(ctx context.Context, databaseURL string) (*DB, error) {
config, err := pgxpool.ParseConfig(databaseURL)
if err != nil {
return nil, fmt.Errorf("parse config: %w", err)
}
config.MaxConns = 20
config.MinConns = 2
config.MaxConnLifetime = 30 * time.Minute
pool, err := pgxpool.NewWithConfig(ctx, config)
if err != nil {
return nil, fmt.Errorf("create pool: %w", err)
}
if err := pool.Ping(ctx); err != nil {
return nil, fmt.Errorf("ping: %w", err)
}
slog.Info("database connected")
return &DB{Pool: pool}, nil
}
func (d *DB) Migrate(ctx context.Context) error {
sql := `
CREATE SCHEMA IF NOT EXISTS crawler;
CREATE TABLE IF NOT EXISTS crawler.crawl_jobs (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
start_url TEXT NOT NULL,
domain TEXT NOT NULL,
max_depth INT NOT NULL DEFAULT 3,
max_pages INT NOT NULL DEFAULT 100,
rate_limit INT NOT NULL DEFAULT 2,
include_patterns JSONB,
exclude_patterns JSONB,
selectors JSONB,
output JSONB,
respect_robots BOOLEAN NOT NULL DEFAULT true,
status TEXT NOT NULL DEFAULT 'pending',
progress JSONB DEFAULT '{"discovered":0,"crawled":0,"failed":0,"queued":0}',
error TEXT,
user_id TEXT,
webhook_url TEXT,
started_at TIMESTAMPTZ,
completed_at TIMESTAMPTZ,
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
);
CREATE INDEX IF NOT EXISTS idx_crawl_jobs_status ON crawler.crawl_jobs(status);
CREATE INDEX IF NOT EXISTS idx_crawl_jobs_domain ON crawler.crawl_jobs(domain);
CREATE TABLE IF NOT EXISTS crawler.crawl_results (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
job_id UUID NOT NULL REFERENCES crawler.crawl_jobs(id) ON DELETE CASCADE,
url TEXT NOT NULL,
parent_url TEXT,
depth INT NOT NULL,
title TEXT,
content TEXT,
markdown TEXT,
html TEXT,
metadata JSONB,
links JSONB,
status_code INT,
error TEXT,
fetch_duration_ms INT,
parse_duration_ms INT,
content_length INT,
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
);
CREATE INDEX IF NOT EXISTS idx_crawl_results_job ON crawler.crawl_results(job_id);
CREATE INDEX IF NOT EXISTS idx_crawl_results_url ON crawler.crawl_results(url);
`
_, err := d.Pool.Exec(ctx, sql)
if err != nil {
return fmt.Errorf("migrate: %w", err)
}
slog.Info("database migrated")
return nil
}
func (d *DB) Close() { d.Pool.Close() }