managarten/services/mana-crawler/src/cache/cache.service.ts
Till-JS 4a3295d1d0 feat(mana-crawler): add web crawler service
NestJS-based web crawler service for structured content extraction.

Features:
- Depth-controlled crawling with URL pattern filtering
- robots.txt compliance
- HTML/PDF/Markdown content extraction
- BullMQ job queue for async processing
- Redis caching layer
- Prometheus metrics
2026-01-29 22:00:36 +01:00

152 lines
3.7 KiB
TypeScript

import { Injectable, Logger, OnModuleInit, OnModuleDestroy } from '@nestjs/common';
import { ConfigService } from '@nestjs/config';
import Redis from 'ioredis';
import { MetricsService } from '../metrics/metrics.service';
@Injectable()
export class CacheService implements OnModuleInit, OnModuleDestroy {
private readonly logger = new Logger(CacheService.name);
private client: Redis | null = null;
private readonly keyPrefix: string;
private stats = {
hits: 0,
misses: 0,
};
constructor(
private readonly configService: ConfigService,
private readonly metricsService: MetricsService,
) {
this.keyPrefix = this.configService.get<string>('redis.keyPrefix', 'mana-crawler:');
}
async onModuleInit() {
const host = this.configService.get<string>('redis.host', 'localhost');
const port = this.configService.get<number>('redis.port', 6379);
const password = this.configService.get<string>('redis.password');
try {
this.client = new Redis({
host,
port,
password,
retryStrategy: (times) => {
if (times > 3) {
this.logger.warn('Redis connection failed, running without cache');
return null;
}
return Math.min(times * 200, 2000);
},
maxRetriesPerRequest: 1,
});
this.client.on('error', (err) => {
this.logger.error(`Redis error: ${err.message}`);
});
this.client.on('connect', () => {
this.logger.log(`Connected to Redis at ${host}:${port}`);
});
await this.client.ping();
} catch (error) {
this.logger.warn(`Could not connect to Redis: ${error}. Running without cache.`);
this.client = null;
}
}
async onModuleDestroy() {
if (this.client) {
await this.client.quit();
}
}
private buildKey(key: string): string {
return `${this.keyPrefix}${key}`;
}
async get<T>(key: string): Promise<T | null> {
if (!this.client) return null;
try {
const data = await this.client.get(this.buildKey(key));
if (data) {
this.stats.hits++;
this.metricsService.recordCacheHit();
return JSON.parse(data);
}
this.stats.misses++;
this.metricsService.recordCacheMiss();
return null;
} catch (error) {
this.logger.error(`Cache get error: ${error}`);
return null;
}
}
async set(key: string, value: unknown, ttlSeconds: number): Promise<void> {
if (!this.client) return;
try {
await this.client.setex(this.buildKey(key), ttlSeconds, JSON.stringify(value));
} catch (error) {
this.logger.error(`Cache set error: ${error}`);
}
}
async delete(key: string): Promise<void> {
if (!this.client) return;
try {
await this.client.del(this.buildKey(key));
} catch (error) {
this.logger.error(`Cache delete error: ${error}`);
}
}
async clear(pattern?: string): Promise<number> {
if (!this.client) return 0;
try {
const searchPattern = pattern
? `${this.keyPrefix}${pattern}`
: `${this.keyPrefix}*`;
const keys = await this.client.keys(searchPattern);
if (keys.length > 0) {
await this.client.del(...keys);
}
return keys.length;
} catch (error) {
this.logger.error(`Cache clear error: ${error}`);
return 0;
}
}
getStats() {
const total = this.stats.hits + this.stats.misses;
return {
hits: this.stats.hits,
misses: this.stats.misses,
hitRate: total > 0 ? this.stats.hits / total : 0,
};
}
async healthCheck(): Promise<{ status: string; latency: number }> {
if (!this.client) {
return { status: 'disabled', latency: 0 };
}
const start = Date.now();
try {
await this.client.ping();
return { status: 'ok', latency: Date.now() - start };
} catch {
return { status: 'error', latency: Date.now() - start };
}
}
isConnected(): boolean {
return this.client !== null && this.client.status === 'ready';
}
}