mirror of
https://github.com/Memo-2023/mana-monorepo.git
synced 2026-05-27 20:17:43 +02:00
✨ feat(mana-crawler): add web crawler service
NestJS-based web crawler service for structured content extraction. Features: - Depth-controlled crawling with URL pattern filtering - robots.txt compliance - HTML/PDF/Markdown content extraction - BullMQ job queue for async processing - Redis caching layer - Prometheus metrics
This commit is contained in:
parent
c64b4d6ac9
commit
4a3295d1d0
39 changed files with 2795 additions and 0 deletions
24
services/mana-crawler/src/db/database.module.ts
Normal file
24
services/mana-crawler/src/db/database.module.ts
Normal file
|
|
@ -0,0 +1,24 @@
|
|||
import { Global, Module } from '@nestjs/common';
|
||||
import { ConfigService } from '@nestjs/config';
|
||||
import { getDb } from './connection';
|
||||
|
||||
export const DATABASE_CONNECTION = 'DATABASE_CONNECTION';
|
||||
|
||||
@Global()
|
||||
@Module({
|
||||
providers: [
|
||||
{
|
||||
provide: DATABASE_CONNECTION,
|
||||
useFactory: (configService: ConfigService) => {
|
||||
const databaseUrl = configService.get<string>('database.url');
|
||||
if (!databaseUrl) {
|
||||
throw new Error('DATABASE_URL is not configured');
|
||||
}
|
||||
return getDb(databaseUrl);
|
||||
},
|
||||
inject: [ConfigService],
|
||||
},
|
||||
],
|
||||
exports: [DATABASE_CONNECTION],
|
||||
})
|
||||
export class DatabaseModule {}
|
||||
Loading…
Add table
Add a link
Reference in a new issue