mirror of
https://github.com/Memo-2023/mana-monorepo.git
synced 2026-05-15 16:39:39 +02:00
mana-search-go → mana-search mana-notify-go → mana-notify mana-crawler-go → mana-crawler mana-api-gateway-go → mana-api-gateway Legacy NestJS versions are deleted, suffix no longer needed. Updated all references in docker-compose, CLAUDE.md, package.json, Forgejo workflows, and service package.json files. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
97 lines
2.4 KiB
Go
97 lines
2.4 KiB
Go
package db
|
|
|
|
import (
|
|
"context"
|
|
"fmt"
|
|
"log/slog"
|
|
"time"
|
|
|
|
"github.com/jackc/pgx/v5/pgxpool"
|
|
)
|
|
|
|
type DB struct {
|
|
Pool *pgxpool.Pool
|
|
}
|
|
|
|
func New(ctx context.Context, databaseURL string) (*DB, error) {
|
|
config, err := pgxpool.ParseConfig(databaseURL)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("parse config: %w", err)
|
|
}
|
|
config.MaxConns = 20
|
|
config.MinConns = 2
|
|
config.MaxConnLifetime = 30 * time.Minute
|
|
|
|
pool, err := pgxpool.NewWithConfig(ctx, config)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("create pool: %w", err)
|
|
}
|
|
if err := pool.Ping(ctx); err != nil {
|
|
return nil, fmt.Errorf("ping: %w", err)
|
|
}
|
|
slog.Info("database connected")
|
|
return &DB{Pool: pool}, nil
|
|
}
|
|
|
|
func (d *DB) Migrate(ctx context.Context) error {
|
|
sql := `
|
|
CREATE SCHEMA IF NOT EXISTS crawler;
|
|
|
|
CREATE TABLE IF NOT EXISTS crawler.crawl_jobs (
|
|
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
|
start_url TEXT NOT NULL,
|
|
domain TEXT NOT NULL,
|
|
max_depth INT NOT NULL DEFAULT 3,
|
|
max_pages INT NOT NULL DEFAULT 100,
|
|
rate_limit INT NOT NULL DEFAULT 2,
|
|
include_patterns JSONB,
|
|
exclude_patterns JSONB,
|
|
selectors JSONB,
|
|
output JSONB,
|
|
respect_robots BOOLEAN NOT NULL DEFAULT true,
|
|
status TEXT NOT NULL DEFAULT 'pending',
|
|
progress JSONB DEFAULT '{"discovered":0,"crawled":0,"failed":0,"queued":0}',
|
|
error TEXT,
|
|
user_id TEXT,
|
|
webhook_url TEXT,
|
|
started_at TIMESTAMPTZ,
|
|
completed_at TIMESTAMPTZ,
|
|
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
|
updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
|
|
);
|
|
|
|
CREATE INDEX IF NOT EXISTS idx_crawl_jobs_status ON crawler.crawl_jobs(status);
|
|
CREATE INDEX IF NOT EXISTS idx_crawl_jobs_domain ON crawler.crawl_jobs(domain);
|
|
|
|
CREATE TABLE IF NOT EXISTS crawler.crawl_results (
|
|
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
|
job_id UUID NOT NULL REFERENCES crawler.crawl_jobs(id) ON DELETE CASCADE,
|
|
url TEXT NOT NULL,
|
|
parent_url TEXT,
|
|
depth INT NOT NULL,
|
|
title TEXT,
|
|
content TEXT,
|
|
markdown TEXT,
|
|
html TEXT,
|
|
metadata JSONB,
|
|
links JSONB,
|
|
status_code INT,
|
|
error TEXT,
|
|
fetch_duration_ms INT,
|
|
parse_duration_ms INT,
|
|
content_length INT,
|
|
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
|
|
);
|
|
|
|
CREATE INDEX IF NOT EXISTS idx_crawl_results_job ON crawler.crawl_results(job_id);
|
|
CREATE INDEX IF NOT EXISTS idx_crawl_results_url ON crawler.crawl_results(url);
|
|
`
|
|
_, err := d.Pool.Exec(ctx, sql)
|
|
if err != nil {
|
|
return fmt.Errorf("migrate: %w", err)
|
|
}
|
|
slog.Info("database migrated")
|
|
return nil
|
|
}
|
|
|
|
func (d *DB) Close() { d.Pool.Close() }
|