mirror of
https://github.com/Memo-2023/mana-monorepo.git
synced 2026-05-20 02:01:25 +02:00
NestJS-based web crawler service for structured content extraction. Features: - Depth-controlled crawling with URL pattern filtering - robots.txt compliance - HTML/PDF/Markdown content extraction - BullMQ job queue for async processing - Redis caching layer - Prometheus metrics
20 lines
618 B
TypeScript
20 lines
618 B
TypeScript
import { Module, forwardRef } from '@nestjs/common';
|
|
import { CrawlProcessor } from './processors/crawl.processor';
|
|
import { ParserModule } from '../parser/parser.module';
|
|
import { RobotsModule } from '../robots/robots.module';
|
|
import { CacheModule } from '../cache/cache.module';
|
|
import { MetricsModule } from '../metrics/metrics.module';
|
|
import { QueueModule } from './queue.module';
|
|
import { CRAWL_QUEUE } from './constants';
|
|
|
|
@Module({
|
|
imports: [
|
|
forwardRef(() => QueueModule),
|
|
ParserModule,
|
|
RobotsModule,
|
|
CacheModule,
|
|
MetricsModule,
|
|
],
|
|
providers: [CrawlProcessor],
|
|
})
|
|
export class ProcessorModule {}
|