mirror of
https://github.com/Memo-2023/mana-monorepo.git
synced 2026-05-14 20:01:09 +02:00
Merge pull request #27 from Memo-2023/claude/plan-questions-app-UKqD5
Add Mana Search Service design document and architecture
This commit is contained in:
commit
a930e285b2
125 changed files with 10849 additions and 2 deletions
28
services/mana-search/.env.example
Normal file
28
services/mana-search/.env.example
Normal file
|
|
@ -0,0 +1,28 @@
|
|||
# Mana Search Service Environment Variables
|
||||
|
||||
# Server
|
||||
PORT=3021
|
||||
NODE_ENV=development
|
||||
|
||||
# SearXNG
|
||||
SEARXNG_URL=http://localhost:8080
|
||||
SEARXNG_TIMEOUT=15000
|
||||
SEARXNG_DEFAULT_LANGUAGE=de-DE
|
||||
SEARXNG_SECRET=change-me-in-production
|
||||
|
||||
# Redis
|
||||
REDIS_HOST=localhost
|
||||
REDIS_PORT=6380
|
||||
REDIS_PASSWORD=
|
||||
|
||||
# Cache TTL (seconds)
|
||||
CACHE_SEARCH_TTL=3600
|
||||
CACHE_EXTRACT_TTL=86400
|
||||
|
||||
# Content Extraction
|
||||
EXTRACT_TIMEOUT=10000
|
||||
EXTRACT_MAX_LENGTH=50000
|
||||
EXTRACT_USER_AGENT=Mozilla/5.0 (compatible; ManaSearchBot/1.0; +https://manacore.app)
|
||||
|
||||
# CORS (comma-separated origins)
|
||||
CORS_ORIGINS=http://localhost:3000,http://localhost:5173,http://localhost:8081
|
||||
250
services/mana-search/CLAUDE.md
Normal file
250
services/mana-search/CLAUDE.md
Normal file
|
|
@ -0,0 +1,250 @@
|
|||
# Mana Search Service
|
||||
|
||||
Central search microservice providing web search and content extraction for all ManaCore apps.
|
||||
|
||||
## Overview
|
||||
|
||||
- **Port**: 3021
|
||||
- **Technology**: NestJS + SearXNG + Redis
|
||||
- **Purpose**: Unified search and extraction API
|
||||
|
||||
## Architecture
|
||||
|
||||
```
|
||||
┌─────────────────────────────────────────────────────────────┐
|
||||
│ Consumer Apps │
|
||||
│ Questions │ Chat │ Project Doc Bot │ Future Apps │
|
||||
└─────────────────────────┬───────────────────────────────────┘
|
||||
▼
|
||||
┌─────────────────────────────────────────────────────────────┐
|
||||
│ mana-search (Port 3021) │
|
||||
│ Search API │ Extract API │ Redis Cache │
|
||||
└─────────────────────────┬───────────────────────────────────┘
|
||||
▼
|
||||
┌─────────────────────────────────────────────────────────────┐
|
||||
│ SearXNG (Port 8080, internal) │
|
||||
│ Google │ Bing │ DuckDuckGo │ Wikipedia │ arXiv │ ... │
|
||||
└─────────────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
## Quick Start
|
||||
|
||||
### Development (Local NestJS + Docker SearXNG/Redis)
|
||||
|
||||
```bash
|
||||
# 1. Start SearXNG and Redis
|
||||
docker-compose -f docker-compose.dev.yml up -d
|
||||
|
||||
# 2. Install dependencies
|
||||
pnpm install
|
||||
|
||||
# 3. Start NestJS in watch mode
|
||||
pnpm dev
|
||||
```
|
||||
|
||||
### Production (Full Docker)
|
||||
|
||||
```bash
|
||||
docker-compose up -d
|
||||
```
|
||||
|
||||
## API Endpoints
|
||||
|
||||
### Search
|
||||
|
||||
```bash
|
||||
# Web search
|
||||
POST /api/v1/search
|
||||
{
|
||||
"query": "quantum computing",
|
||||
"options": {
|
||||
"categories": ["general", "science"],
|
||||
"engines": ["google", "wikipedia"],
|
||||
"language": "de-DE",
|
||||
"limit": 10
|
||||
}
|
||||
}
|
||||
|
||||
# Get available engines
|
||||
GET /api/v1/search/engines
|
||||
|
||||
# Search health check
|
||||
GET /api/v1/search/health
|
||||
|
||||
# Clear search cache
|
||||
DELETE /api/v1/search/cache
|
||||
```
|
||||
|
||||
### Extract
|
||||
|
||||
```bash
|
||||
# Extract content from URL
|
||||
POST /api/v1/extract
|
||||
{
|
||||
"url": "https://example.com/article",
|
||||
"options": {
|
||||
"includeMarkdown": true,
|
||||
"maxLength": 5000
|
||||
}
|
||||
}
|
||||
|
||||
# Bulk extract (max 20 URLs)
|
||||
POST /api/v1/extract/bulk
|
||||
{
|
||||
"urls": ["https://...", "https://..."],
|
||||
"options": { "includeMarkdown": true },
|
||||
"concurrency": 5
|
||||
}
|
||||
```
|
||||
|
||||
### Health & Metrics
|
||||
|
||||
```bash
|
||||
# Health check
|
||||
GET /health
|
||||
|
||||
# Prometheus metrics
|
||||
GET /metrics
|
||||
```
|
||||
|
||||
## Configuration
|
||||
|
||||
### Environment Variables
|
||||
|
||||
| Variable | Default | Description |
|
||||
|----------|---------|-------------|
|
||||
| `PORT` | 3021 | API port |
|
||||
| `SEARXNG_URL` | http://localhost:8080 | SearXNG URL |
|
||||
| `SEARXNG_TIMEOUT` | 15000 | Search timeout (ms) |
|
||||
| `SEARXNG_DEFAULT_LANGUAGE` | de-DE | Default language |
|
||||
| `REDIS_HOST` | localhost | Redis host |
|
||||
| `REDIS_PORT` | 6379 | Redis port |
|
||||
| `CACHE_SEARCH_TTL` | 3600 | Search cache TTL (seconds) |
|
||||
| `CACHE_EXTRACT_TTL` | 86400 | Extract cache TTL (seconds) |
|
||||
| `EXTRACT_TIMEOUT` | 10000 | Extraction timeout (ms) |
|
||||
| `EXTRACT_MAX_LENGTH` | 50000 | Max extracted text length |
|
||||
|
||||
### SearXNG Configuration
|
||||
|
||||
Edit `searxng/settings.yml` to:
|
||||
- Enable/disable search engines
|
||||
- Configure rate limits
|
||||
- Set default language
|
||||
- Adjust timeouts
|
||||
|
||||
## Development Commands
|
||||
|
||||
```bash
|
||||
# Install dependencies
|
||||
pnpm install
|
||||
|
||||
# Start development server
|
||||
pnpm dev
|
||||
|
||||
# Build for production
|
||||
pnpm build
|
||||
|
||||
# Start production server
|
||||
pnpm start
|
||||
|
||||
# Type checking
|
||||
pnpm type-check
|
||||
|
||||
# Linting
|
||||
pnpm lint
|
||||
|
||||
# Run tests
|
||||
pnpm test
|
||||
```
|
||||
|
||||
## Docker Commands
|
||||
|
||||
```bash
|
||||
# Start all services (production)
|
||||
docker-compose up -d
|
||||
|
||||
# Start SearXNG + Redis only (development)
|
||||
docker-compose -f docker-compose.dev.yml up -d
|
||||
|
||||
# View logs
|
||||
docker-compose logs -f
|
||||
|
||||
# Stop services
|
||||
docker-compose down
|
||||
|
||||
# Rebuild
|
||||
docker-compose build --no-cache
|
||||
```
|
||||
|
||||
## Testing the API
|
||||
|
||||
```bash
|
||||
# Search test
|
||||
curl -X POST http://localhost:3021/api/v1/search \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{"query": "typescript tutorial"}'
|
||||
|
||||
# Extract test
|
||||
curl -X POST http://localhost:3021/api/v1/extract \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{"url": "https://en.wikipedia.org/wiki/TypeScript", "options": {"includeMarkdown": true}}'
|
||||
|
||||
# Health check
|
||||
curl http://localhost:3021/health
|
||||
```
|
||||
|
||||
## Search Categories
|
||||
|
||||
| Category | Engines |
|
||||
|----------|---------|
|
||||
| `general` | Google, Bing, DuckDuckGo, Brave, Wikipedia |
|
||||
| `news` | Google News, Bing News |
|
||||
| `science` | arXiv, Google Scholar, PubMed, Semantic Scholar |
|
||||
| `it` | GitHub, StackOverflow, NPM, MDN |
|
||||
| `images` | Google Images, Bing Images, Unsplash |
|
||||
| `videos` | YouTube, Vimeo, PeerTube |
|
||||
|
||||
## Integration Example
|
||||
|
||||
```typescript
|
||||
// In another service
|
||||
const response = await fetch('http://mana-search:3021/api/v1/search', {
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify({
|
||||
query: 'machine learning basics',
|
||||
options: {
|
||||
categories: ['general', 'science'],
|
||||
limit: 5
|
||||
}
|
||||
})
|
||||
});
|
||||
|
||||
const { results, meta } = await response.json();
|
||||
```
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
### SearXNG not responding
|
||||
|
||||
```bash
|
||||
# Check SearXNG health
|
||||
curl http://localhost:8080/healthz
|
||||
|
||||
# Check logs
|
||||
docker logs mana-searxng-dev
|
||||
```
|
||||
|
||||
### Redis connection issues
|
||||
|
||||
```bash
|
||||
# Check Redis
|
||||
docker exec mana-search-redis-dev redis-cli ping
|
||||
|
||||
# Clear Redis data
|
||||
docker exec mana-search-redis-dev redis-cli FLUSHALL
|
||||
```
|
||||
|
||||
### High memory usage
|
||||
|
||||
SearXNG can use significant memory. Adjust `maxmemory` in docker-compose if needed.
|
||||
60
services/mana-search/Dockerfile
Normal file
60
services/mana-search/Dockerfile
Normal file
|
|
@ -0,0 +1,60 @@
|
|||
# ================================
|
||||
# Build Stage
|
||||
# ================================
|
||||
FROM node:20-slim AS builder
|
||||
|
||||
# Install pnpm
|
||||
RUN npm install -g pnpm@9.15.0
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
# Copy package files
|
||||
COPY package.json pnpm-lock.yaml* ./
|
||||
|
||||
# Install dependencies
|
||||
RUN pnpm install --frozen-lockfile
|
||||
|
||||
# Copy source code
|
||||
COPY tsconfig.json nest-cli.json ./
|
||||
COPY src ./src
|
||||
|
||||
# Build the application
|
||||
RUN pnpm build
|
||||
|
||||
# ================================
|
||||
# Production Stage
|
||||
# ================================
|
||||
FROM node:20-slim AS production
|
||||
|
||||
# Install pnpm
|
||||
RUN npm install -g pnpm@9.15.0
|
||||
|
||||
# Create non-root user
|
||||
RUN groupadd -r nestjs && useradd -r -g nestjs nestjs
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
# Copy package files
|
||||
COPY package.json pnpm-lock.yaml* ./
|
||||
|
||||
# Install production dependencies only
|
||||
RUN pnpm install --prod --frozen-lockfile
|
||||
|
||||
# Copy built application
|
||||
COPY --from=builder /app/dist ./dist
|
||||
|
||||
# Set ownership
|
||||
RUN chown -R nestjs:nestjs /app
|
||||
|
||||
# Switch to non-root user
|
||||
USER nestjs
|
||||
|
||||
# Expose port
|
||||
EXPOSE 3021
|
||||
|
||||
# Health check
|
||||
HEALTHCHECK --interval=30s --timeout=10s --start-period=5s --retries=3 \
|
||||
CMD node -e "fetch('http://localhost:3021/health').then(r => r.ok ? process.exit(0) : process.exit(1)).catch(() => process.exit(1))"
|
||||
|
||||
# Start the application
|
||||
CMD ["node", "dist/main"]
|
||||
58
services/mana-search/docker-compose.dev.yml
Normal file
58
services/mana-search/docker-compose.dev.yml
Normal file
|
|
@ -0,0 +1,58 @@
|
|||
version: '3.8'
|
||||
|
||||
# Development setup - SearXNG and Redis only
|
||||
# Run mana-search with `pnpm dev` locally
|
||||
|
||||
services:
|
||||
# ================================
|
||||
# SearXNG Meta Search Engine
|
||||
# ================================
|
||||
searxng:
|
||||
image: searxng/searxng:latest
|
||||
container_name: mana-searxng-dev
|
||||
ports:
|
||||
- "8080:8080" # Exposed for development
|
||||
volumes:
|
||||
- ./searxng/settings.yml:/etc/searxng/settings.yml:ro
|
||||
- ./searxng/limiter.toml:/etc/searxng/limiter.toml:ro
|
||||
environment:
|
||||
SEARXNG_BASE_URL: http://localhost:8080
|
||||
SEARXNG_SECRET: dev-secret-change-in-production
|
||||
networks:
|
||||
- mana-search-dev
|
||||
restart: unless-stopped
|
||||
healthcheck:
|
||||
test: ["CMD", "wget", "-q", "--spider", "http://localhost:8080/healthz"]
|
||||
interval: 30s
|
||||
timeout: 10s
|
||||
retries: 3
|
||||
start_period: 15s
|
||||
|
||||
# ================================
|
||||
# Redis Cache
|
||||
# ================================
|
||||
redis:
|
||||
image: redis:7-alpine
|
||||
container_name: mana-search-redis-dev
|
||||
ports:
|
||||
- "6380:6379" # Different port to avoid conflicts
|
||||
command: redis-server --appendonly yes --maxmemory 64mb --maxmemory-policy allkeys-lru
|
||||
volumes:
|
||||
- redis-dev-data:/data
|
||||
networks:
|
||||
- mana-search-dev
|
||||
restart: unless-stopped
|
||||
healthcheck:
|
||||
test: ["CMD", "redis-cli", "ping"]
|
||||
interval: 10s
|
||||
timeout: 5s
|
||||
retries: 3
|
||||
|
||||
volumes:
|
||||
redis-dev-data:
|
||||
name: mana-search-redis-dev-data
|
||||
|
||||
networks:
|
||||
mana-search-dev:
|
||||
name: mana-search-dev
|
||||
driver: bridge
|
||||
96
services/mana-search/docker-compose.yml
Normal file
96
services/mana-search/docker-compose.yml
Normal file
|
|
@ -0,0 +1,96 @@
|
|||
version: '3.8'
|
||||
|
||||
services:
|
||||
# ================================
|
||||
# NestJS API Service
|
||||
# ================================
|
||||
mana-search:
|
||||
build:
|
||||
context: .
|
||||
dockerfile: Dockerfile
|
||||
container_name: mana-search
|
||||
ports:
|
||||
- "3021:3021"
|
||||
environment:
|
||||
NODE_ENV: ${NODE_ENV:-development}
|
||||
PORT: 3021
|
||||
SEARXNG_URL: http://searxng:8080
|
||||
SEARXNG_TIMEOUT: 15000
|
||||
SEARXNG_DEFAULT_LANGUAGE: de-DE
|
||||
REDIS_HOST: redis
|
||||
REDIS_PORT: 6379
|
||||
CACHE_SEARCH_TTL: 3600
|
||||
CACHE_EXTRACT_TTL: 86400
|
||||
EXTRACT_TIMEOUT: 10000
|
||||
EXTRACT_MAX_LENGTH: 50000
|
||||
depends_on:
|
||||
searxng:
|
||||
condition: service_healthy
|
||||
redis:
|
||||
condition: service_healthy
|
||||
networks:
|
||||
- mana-search-network
|
||||
restart: unless-stopped
|
||||
healthcheck:
|
||||
test: ["CMD", "node", "-e", "fetch('http://localhost:3021/health').then(r => r.ok ? process.exit(0) : process.exit(1)).catch(() => process.exit(1))"]
|
||||
interval: 30s
|
||||
timeout: 10s
|
||||
retries: 3
|
||||
start_period: 10s
|
||||
|
||||
# ================================
|
||||
# SearXNG Meta Search Engine
|
||||
# ================================
|
||||
searxng:
|
||||
image: searxng/searxng:latest
|
||||
container_name: mana-searxng
|
||||
volumes:
|
||||
- ./searxng/settings.yml:/etc/searxng/settings.yml:ro
|
||||
- ./searxng/limiter.toml:/etc/searxng/limiter.toml:ro
|
||||
environment:
|
||||
SEARXNG_BASE_URL: http://localhost:8080
|
||||
SEARXNG_SECRET: ${SEARXNG_SECRET:-change-me-in-production-please}
|
||||
networks:
|
||||
- mana-search-network
|
||||
# Internal only - no external port mapping in production
|
||||
# Uncomment for debugging:
|
||||
# ports:
|
||||
# - "8080:8080"
|
||||
restart: unless-stopped
|
||||
healthcheck:
|
||||
test: ["CMD", "wget", "-q", "--spider", "http://localhost:8080/healthz"]
|
||||
interval: 30s
|
||||
timeout: 10s
|
||||
retries: 3
|
||||
start_period: 15s
|
||||
|
||||
# ================================
|
||||
# Redis Cache
|
||||
# ================================
|
||||
redis:
|
||||
image: redis:7-alpine
|
||||
container_name: mana-search-redis
|
||||
command: redis-server --appendonly yes --maxmemory 128mb --maxmemory-policy allkeys-lru
|
||||
volumes:
|
||||
- redis-data:/data
|
||||
networks:
|
||||
- mana-search-network
|
||||
# Internal only - no external port mapping
|
||||
# Uncomment for debugging:
|
||||
# ports:
|
||||
# - "6380:6379"
|
||||
restart: unless-stopped
|
||||
healthcheck:
|
||||
test: ["CMD", "redis-cli", "ping"]
|
||||
interval: 10s
|
||||
timeout: 5s
|
||||
retries: 3
|
||||
|
||||
volumes:
|
||||
redis-data:
|
||||
name: mana-search-redis-data
|
||||
|
||||
networks:
|
||||
mana-search-network:
|
||||
name: mana-search-network
|
||||
driver: bridge
|
||||
8
services/mana-search/nest-cli.json
Normal file
8
services/mana-search/nest-cli.json
Normal file
|
|
@ -0,0 +1,8 @@
|
|||
{
|
||||
"$schema": "https://json.schemastore.org/nest-cli",
|
||||
"collection": "@nestjs/schematics",
|
||||
"sourceRoot": "src",
|
||||
"compilerOptions": {
|
||||
"deleteOutDir": true
|
||||
}
|
||||
}
|
||||
47
services/mana-search/package.json
Normal file
47
services/mana-search/package.json
Normal file
|
|
@ -0,0 +1,47 @@
|
|||
{
|
||||
"name": "@manacore/mana-search",
|
||||
"version": "1.0.0",
|
||||
"description": "Central search microservice with SearXNG and content extraction",
|
||||
"private": true,
|
||||
"license": "UNLICENSED",
|
||||
"scripts": {
|
||||
"build": "nest build",
|
||||
"dev": "nest start --watch",
|
||||
"start": "node dist/main",
|
||||
"start:dev": "nest start --watch",
|
||||
"start:debug": "nest start --debug --watch",
|
||||
"start:prod": "node dist/main",
|
||||
"lint": "eslint \"{src,test}/**/*.ts\" --fix",
|
||||
"type-check": "tsc --noEmit",
|
||||
"test": "jest",
|
||||
"test:watch": "jest --watch",
|
||||
"test:cov": "jest --coverage"
|
||||
},
|
||||
"dependencies": {
|
||||
"@extractus/article-extractor": "^8.0.18",
|
||||
"@nestjs/common": "^10.4.15",
|
||||
"@nestjs/config": "^3.3.0",
|
||||
"@nestjs/core": "^10.4.15",
|
||||
"@nestjs/platform-express": "^10.4.15",
|
||||
"class-transformer": "^0.5.1",
|
||||
"class-validator": "^0.14.1",
|
||||
"ioredis": "^5.4.2",
|
||||
"prom-client": "^15.1.3",
|
||||
"reflect-metadata": "^0.2.2",
|
||||
"rxjs": "^7.8.1",
|
||||
"turndown": "^7.2.0"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@nestjs/cli": "^10.4.9",
|
||||
"@nestjs/schematics": "^10.2.3",
|
||||
"@nestjs/testing": "^10.4.15",
|
||||
"@types/express": "^5.0.0",
|
||||
"@types/jest": "^29.5.14",
|
||||
"@types/node": "^22.10.5",
|
||||
"@types/turndown": "^5.0.5",
|
||||
"jest": "^29.7.0",
|
||||
"ts-jest": "^29.2.5",
|
||||
"ts-node": "^10.9.2",
|
||||
"typescript": "^5.7.2"
|
||||
}
|
||||
}
|
||||
27
services/mana-search/searxng/limiter.toml
Normal file
27
services/mana-search/searxng/limiter.toml
Normal file
|
|
@ -0,0 +1,27 @@
|
|||
# SearXNG Rate Limiter Configuration
|
||||
# Documentation: https://docs.searxng.org/admin/settings/limiter.html
|
||||
|
||||
[botdetection.ip_limit]
|
||||
# Enable link token for bot detection
|
||||
link_token = true
|
||||
|
||||
# Maximum searches per minute per IP
|
||||
limit = 60
|
||||
|
||||
# Burst limit (requests before rate limiting kicks in)
|
||||
burst = 20
|
||||
|
||||
[botdetection.ip_lists]
|
||||
# Allow internal Docker network IPs (no rate limiting for internal services)
|
||||
pass_ip = [
|
||||
# Docker internal networks
|
||||
"172.16.0.0/12",
|
||||
"192.168.0.0/16",
|
||||
"10.0.0.0/8",
|
||||
# Localhost
|
||||
"127.0.0.1",
|
||||
"::1",
|
||||
]
|
||||
|
||||
# Block known bad actors (add IPs as needed)
|
||||
block_ip = []
|
||||
242
services/mana-search/searxng/settings.yml
Normal file
242
services/mana-search/searxng/settings.yml
Normal file
|
|
@ -0,0 +1,242 @@
|
|||
use_default_settings: true
|
||||
|
||||
general:
|
||||
instance_name: "ManaCore Search"
|
||||
debug: false
|
||||
privacypolicy_url: false
|
||||
donation_url: false
|
||||
contact_url: false
|
||||
enable_metrics: true
|
||||
|
||||
search:
|
||||
safe_search: 0
|
||||
autocomplete: "google"
|
||||
default_lang: "de-DE"
|
||||
formats:
|
||||
- html
|
||||
- json
|
||||
|
||||
server:
|
||||
secret_key: "${SEARXNG_SECRET}"
|
||||
limiter: true
|
||||
image_proxy: false
|
||||
method: "GET"
|
||||
bind_address: "0.0.0.0"
|
||||
port: 8080
|
||||
|
||||
ui:
|
||||
static_use_hash: true
|
||||
default_theme: simple
|
||||
theme_args:
|
||||
simple_style: dark
|
||||
|
||||
outgoing:
|
||||
request_timeout: 5.0
|
||||
max_request_timeout: 15.0
|
||||
useragent_suffix: ""
|
||||
|
||||
# Search engine configuration
|
||||
engines:
|
||||
# =====================================
|
||||
# WEB SEARCH (General)
|
||||
# =====================================
|
||||
- name: google
|
||||
engine: google
|
||||
shortcut: g
|
||||
disabled: false
|
||||
weight: 1.2
|
||||
|
||||
- name: bing
|
||||
engine: bing
|
||||
shortcut: b
|
||||
disabled: false
|
||||
weight: 1.0
|
||||
|
||||
- name: duckduckgo
|
||||
engine: duckduckgo
|
||||
shortcut: d
|
||||
disabled: false
|
||||
weight: 0.9
|
||||
|
||||
- name: brave
|
||||
engine: brave
|
||||
shortcut: br
|
||||
disabled: false
|
||||
weight: 1.0
|
||||
|
||||
- name: qwant
|
||||
engine: qwant
|
||||
shortcut: q
|
||||
disabled: false
|
||||
weight: 0.8
|
||||
|
||||
- name: startpage
|
||||
engine: startpage
|
||||
shortcut: sp
|
||||
disabled: false
|
||||
weight: 0.8
|
||||
|
||||
# =====================================
|
||||
# WIKIPEDIA
|
||||
# =====================================
|
||||
- name: wikipedia
|
||||
engine: wikipedia
|
||||
shortcut: w
|
||||
disabled: false
|
||||
weight: 1.1
|
||||
|
||||
- name: wikidata
|
||||
engine: wikidata
|
||||
shortcut: wd
|
||||
disabled: false
|
||||
weight: 0.8
|
||||
|
||||
# =====================================
|
||||
# IT / DEVELOPER
|
||||
# =====================================
|
||||
- name: github
|
||||
engine: github
|
||||
shortcut: gh
|
||||
disabled: false
|
||||
categories: [it]
|
||||
|
||||
- name: stackoverflow
|
||||
engine: stackoverflow
|
||||
shortcut: so
|
||||
disabled: false
|
||||
categories: [it]
|
||||
|
||||
- name: npm
|
||||
engine: npm
|
||||
shortcut: npm
|
||||
disabled: false
|
||||
categories: [it, packages]
|
||||
|
||||
- name: pypi
|
||||
engine: pypi
|
||||
shortcut: pip
|
||||
disabled: false
|
||||
categories: [it, packages]
|
||||
|
||||
- name: crates.io
|
||||
engine: crates
|
||||
shortcut: crates
|
||||
disabled: false
|
||||
categories: [it, packages]
|
||||
|
||||
- name: dockerhub
|
||||
engine: dockerhub
|
||||
shortcut: dh
|
||||
disabled: false
|
||||
categories: [it]
|
||||
|
||||
- name: mdn
|
||||
engine: mdn
|
||||
shortcut: mdn
|
||||
disabled: false
|
||||
categories: [it]
|
||||
|
||||
# =====================================
|
||||
# SCIENCE / ACADEMIC
|
||||
# =====================================
|
||||
- name: arxiv
|
||||
engine: arxiv
|
||||
shortcut: ar
|
||||
disabled: false
|
||||
categories: [science]
|
||||
|
||||
- name: google scholar
|
||||
engine: google_scholar
|
||||
shortcut: gs
|
||||
disabled: false
|
||||
categories: [science]
|
||||
|
||||
- name: semantic scholar
|
||||
engine: semantic_scholar
|
||||
shortcut: ss
|
||||
disabled: false
|
||||
categories: [science]
|
||||
|
||||
- name: pubmed
|
||||
engine: pubmed
|
||||
shortcut: pm
|
||||
disabled: false
|
||||
categories: [science, health]
|
||||
|
||||
- name: crossref
|
||||
engine: crossref
|
||||
shortcut: cr
|
||||
disabled: false
|
||||
categories: [science]
|
||||
|
||||
# =====================================
|
||||
# NEWS
|
||||
# =====================================
|
||||
- name: google news
|
||||
engine: google_news
|
||||
shortcut: gn
|
||||
disabled: false
|
||||
categories: [news]
|
||||
|
||||
- name: bing news
|
||||
engine: bing_news
|
||||
shortcut: bn
|
||||
disabled: false
|
||||
categories: [news]
|
||||
|
||||
- name: duckduckgo news
|
||||
engine: duckduckgo
|
||||
shortcut: ddn
|
||||
disabled: false
|
||||
categories: [news]
|
||||
|
||||
# =====================================
|
||||
# IMAGES
|
||||
# =====================================
|
||||
- name: google images
|
||||
engine: google_images
|
||||
shortcut: gi
|
||||
disabled: false
|
||||
categories: [images]
|
||||
|
||||
- name: bing images
|
||||
engine: bing_images
|
||||
shortcut: bi
|
||||
disabled: false
|
||||
categories: [images]
|
||||
|
||||
- name: unsplash
|
||||
engine: unsplash
|
||||
shortcut: us
|
||||
disabled: false
|
||||
categories: [images]
|
||||
|
||||
# =====================================
|
||||
# VIDEOS
|
||||
# =====================================
|
||||
- name: youtube
|
||||
engine: youtube_noapi
|
||||
shortcut: yt
|
||||
disabled: false
|
||||
categories: [videos]
|
||||
|
||||
- name: vimeo
|
||||
engine: vimeo
|
||||
shortcut: vim
|
||||
disabled: false
|
||||
categories: [videos]
|
||||
|
||||
- name: peertube
|
||||
engine: peertube
|
||||
shortcut: pt
|
||||
disabled: false
|
||||
categories: [videos]
|
||||
|
||||
# Category tabs
|
||||
categories_as_tabs:
|
||||
general:
|
||||
images:
|
||||
videos:
|
||||
news:
|
||||
science:
|
||||
it:
|
||||
23
services/mana-search/src/app.module.ts
Normal file
23
services/mana-search/src/app.module.ts
Normal file
|
|
@ -0,0 +1,23 @@
|
|||
import { Module } from '@nestjs/common';
|
||||
import { ConfigModule } from '@nestjs/config';
|
||||
import configuration from './config/configuration';
|
||||
import { HealthModule } from './health/health.module';
|
||||
import { MetricsModule } from './metrics/metrics.module';
|
||||
import { CacheModule } from './cache/cache.module';
|
||||
import { SearchModule } from './search/search.module';
|
||||
import { ExtractModule } from './extract/extract.module';
|
||||
|
||||
@Module({
|
||||
imports: [
|
||||
ConfigModule.forRoot({
|
||||
isGlobal: true,
|
||||
load: [configuration],
|
||||
}),
|
||||
HealthModule,
|
||||
MetricsModule,
|
||||
CacheModule,
|
||||
SearchModule,
|
||||
ExtractModule,
|
||||
],
|
||||
})
|
||||
export class AppModule {}
|
||||
9
services/mana-search/src/cache/cache.module.ts
vendored
Normal file
9
services/mana-search/src/cache/cache.module.ts
vendored
Normal file
|
|
@ -0,0 +1,9 @@
|
|||
import { Module, Global } from '@nestjs/common';
|
||||
import { CacheService } from './cache.service';
|
||||
|
||||
@Global()
|
||||
@Module({
|
||||
providers: [CacheService],
|
||||
exports: [CacheService],
|
||||
})
|
||||
export class CacheModule {}
|
||||
150
services/mana-search/src/cache/cache.service.ts
vendored
Normal file
150
services/mana-search/src/cache/cache.service.ts
vendored
Normal file
|
|
@ -0,0 +1,150 @@
|
|||
import { Injectable, Logger, OnModuleInit, OnModuleDestroy } from '@nestjs/common';
|
||||
import { ConfigService } from '@nestjs/config';
|
||||
import Redis from 'ioredis';
|
||||
import { MetricsService } from '../metrics/metrics.service';
|
||||
|
||||
@Injectable()
|
||||
export class CacheService implements OnModuleInit, OnModuleDestroy {
|
||||
private readonly logger = new Logger(CacheService.name);
|
||||
private client: Redis | null = null;
|
||||
private readonly keyPrefix: string;
|
||||
|
||||
private stats = {
|
||||
hits: 0,
|
||||
misses: 0,
|
||||
};
|
||||
|
||||
constructor(
|
||||
private readonly configService: ConfigService,
|
||||
private readonly metricsService: MetricsService,
|
||||
) {
|
||||
this.keyPrefix = this.configService.get<string>('redis.keyPrefix', 'mana-search:');
|
||||
}
|
||||
|
||||
async onModuleInit() {
|
||||
const host = this.configService.get<string>('redis.host', 'localhost');
|
||||
const port = this.configService.get<number>('redis.port', 6379);
|
||||
const password = this.configService.get<string>('redis.password');
|
||||
|
||||
try {
|
||||
this.client = new Redis({
|
||||
host,
|
||||
port,
|
||||
password,
|
||||
retryStrategy: (times) => {
|
||||
if (times > 3) {
|
||||
this.logger.warn('Redis connection failed, running without cache');
|
||||
return null; // Stop retrying
|
||||
}
|
||||
return Math.min(times * 200, 2000);
|
||||
},
|
||||
maxRetriesPerRequest: 1,
|
||||
});
|
||||
|
||||
this.client.on('error', (err) => {
|
||||
this.logger.error(`Redis error: ${err.message}`);
|
||||
});
|
||||
|
||||
this.client.on('connect', () => {
|
||||
this.logger.log(`Connected to Redis at ${host}:${port}`);
|
||||
});
|
||||
|
||||
// Test connection
|
||||
await this.client.ping();
|
||||
} catch (error) {
|
||||
this.logger.warn(`Could not connect to Redis: ${error}. Running without cache.`);
|
||||
this.client = null;
|
||||
}
|
||||
}
|
||||
|
||||
async onModuleDestroy() {
|
||||
if (this.client) {
|
||||
await this.client.quit();
|
||||
}
|
||||
}
|
||||
|
||||
private buildKey(key: string): string {
|
||||
return `${this.keyPrefix}${key}`;
|
||||
}
|
||||
|
||||
async get<T>(key: string): Promise<T | null> {
|
||||
if (!this.client) return null;
|
||||
|
||||
try {
|
||||
const data = await this.client.get(this.buildKey(key));
|
||||
if (data) {
|
||||
this.stats.hits++;
|
||||
this.metricsService.recordCacheHit();
|
||||
return JSON.parse(data);
|
||||
}
|
||||
this.stats.misses++;
|
||||
this.metricsService.recordCacheMiss();
|
||||
return null;
|
||||
} catch (error) {
|
||||
this.logger.error(`Cache get error: ${error}`);
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
async set(key: string, value: unknown, ttlSeconds: number): Promise<void> {
|
||||
if (!this.client) return;
|
||||
|
||||
try {
|
||||
await this.client.setex(this.buildKey(key), ttlSeconds, JSON.stringify(value));
|
||||
} catch (error) {
|
||||
this.logger.error(`Cache set error: ${error}`);
|
||||
}
|
||||
}
|
||||
|
||||
async delete(key: string): Promise<void> {
|
||||
if (!this.client) return;
|
||||
|
||||
try {
|
||||
await this.client.del(this.buildKey(key));
|
||||
} catch (error) {
|
||||
this.logger.error(`Cache delete error: ${error}`);
|
||||
}
|
||||
}
|
||||
|
||||
async clear(): Promise<number> {
|
||||
if (!this.client) return 0;
|
||||
|
||||
try {
|
||||
const keys = await this.client.keys(`${this.keyPrefix}*`);
|
||||
if (keys.length > 0) {
|
||||
await this.client.del(...keys);
|
||||
}
|
||||
return keys.length;
|
||||
} catch (error) {
|
||||
this.logger.error(`Cache clear error: ${error}`);
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
getStats() {
|
||||
const total = this.stats.hits + this.stats.misses;
|
||||
return {
|
||||
hits: this.stats.hits,
|
||||
misses: this.stats.misses,
|
||||
hitRate: total > 0 ? this.stats.hits / total : 0,
|
||||
};
|
||||
}
|
||||
|
||||
async healthCheck(): Promise<{ status: string; latency: number }> {
|
||||
if (!this.client) {
|
||||
return { status: 'disabled', latency: 0 };
|
||||
}
|
||||
|
||||
const start = Date.now();
|
||||
try {
|
||||
await this.client.ping();
|
||||
return { status: 'ok', latency: Date.now() - start };
|
||||
} catch {
|
||||
return { status: 'error', latency: Date.now() - start };
|
||||
}
|
||||
}
|
||||
|
||||
isConnected(): boolean {
|
||||
return this.client !== null && this.client.status === 'ready';
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,45 @@
|
|||
import {
|
||||
ExceptionFilter,
|
||||
Catch,
|
||||
ArgumentsHost,
|
||||
HttpException,
|
||||
HttpStatus,
|
||||
Logger,
|
||||
} from '@nestjs/common';
|
||||
import { Request, Response } from 'express';
|
||||
|
||||
@Catch()
|
||||
export class HttpExceptionFilter implements ExceptionFilter {
|
||||
private readonly logger = new Logger(HttpExceptionFilter.name);
|
||||
|
||||
catch(exception: unknown, host: ArgumentsHost) {
|
||||
const ctx = host.switchToHttp();
|
||||
const response = ctx.getResponse<Response>();
|
||||
const request = ctx.getRequest<Request>();
|
||||
|
||||
let status = HttpStatus.INTERNAL_SERVER_ERROR;
|
||||
let message = 'Internal server error';
|
||||
|
||||
if (exception instanceof HttpException) {
|
||||
status = exception.getStatus();
|
||||
const exceptionResponse = exception.getResponse();
|
||||
message =
|
||||
typeof exceptionResponse === 'string'
|
||||
? exceptionResponse
|
||||
: (exceptionResponse as any).message || exception.message;
|
||||
} else if (exception instanceof Error) {
|
||||
message = exception.message;
|
||||
this.logger.error(`Unhandled error: ${exception.message}`, exception.stack);
|
||||
}
|
||||
|
||||
response.status(status).json({
|
||||
success: false,
|
||||
error: {
|
||||
statusCode: status,
|
||||
message,
|
||||
timestamp: new Date().toISOString(),
|
||||
path: request.url,
|
||||
},
|
||||
});
|
||||
}
|
||||
}
|
||||
38
services/mana-search/src/config/configuration.ts
Normal file
38
services/mana-search/src/config/configuration.ts
Normal file
|
|
@ -0,0 +1,38 @@
|
|||
export default () => ({
|
||||
port: parseInt(process.env.PORT || '3021', 10),
|
||||
nodeEnv: process.env.NODE_ENV || 'development',
|
||||
|
||||
cors: {
|
||||
origins: process.env.CORS_ORIGINS?.split(',') || [
|
||||
'http://localhost:3000',
|
||||
'http://localhost:5173',
|
||||
'http://localhost:8081',
|
||||
],
|
||||
},
|
||||
|
||||
searxng: {
|
||||
url: process.env.SEARXNG_URL || 'http://localhost:8080',
|
||||
timeout: parseInt(process.env.SEARXNG_TIMEOUT || '15000', 10),
|
||||
defaultLanguage: process.env.SEARXNG_DEFAULT_LANGUAGE || 'de-DE',
|
||||
},
|
||||
|
||||
redis: {
|
||||
host: process.env.REDIS_HOST || 'localhost',
|
||||
port: parseInt(process.env.REDIS_PORT || '6379', 10),
|
||||
password: process.env.REDIS_PASSWORD,
|
||||
keyPrefix: 'mana-search:',
|
||||
},
|
||||
|
||||
cache: {
|
||||
searchTtl: parseInt(process.env.CACHE_SEARCH_TTL || '3600', 10), // 1 hour
|
||||
extractTtl: parseInt(process.env.CACHE_EXTRACT_TTL || '86400', 10), // 24 hours
|
||||
},
|
||||
|
||||
extract: {
|
||||
timeout: parseInt(process.env.EXTRACT_TIMEOUT || '10000', 10),
|
||||
maxLength: parseInt(process.env.EXTRACT_MAX_LENGTH || '50000', 10),
|
||||
userAgent:
|
||||
process.env.EXTRACT_USER_AGENT ||
|
||||
'Mozilla/5.0 (compatible; ManaSearchBot/1.0; +https://manacore.app)',
|
||||
},
|
||||
});
|
||||
60
services/mana-search/src/extract/dto/extract-request.dto.ts
Normal file
60
services/mana-search/src/extract/dto/extract-request.dto.ts
Normal file
|
|
@ -0,0 +1,60 @@
|
|||
import { IsString, IsOptional, IsBoolean, IsInt, Min, Max, IsUrl, ValidateNested, IsArray } from 'class-validator';
|
||||
import { Type } from 'class-transformer';
|
||||
|
||||
export class ExtractOptionsDto {
|
||||
@IsOptional()
|
||||
@IsBoolean()
|
||||
includeHtml?: boolean;
|
||||
|
||||
@IsOptional()
|
||||
@IsBoolean()
|
||||
includeMarkdown?: boolean;
|
||||
|
||||
@IsOptional()
|
||||
@IsInt()
|
||||
@Min(100)
|
||||
@Max(100000)
|
||||
maxLength?: number;
|
||||
|
||||
@IsOptional()
|
||||
@IsBoolean()
|
||||
extractImages?: boolean;
|
||||
|
||||
@IsOptional()
|
||||
@IsBoolean()
|
||||
extractLinks?: boolean;
|
||||
|
||||
@IsOptional()
|
||||
@IsInt()
|
||||
@Min(1000)
|
||||
@Max(30000)
|
||||
timeout?: number;
|
||||
}
|
||||
|
||||
export class ExtractRequestDto {
|
||||
@IsString()
|
||||
@IsUrl()
|
||||
url: string;
|
||||
|
||||
@IsOptional()
|
||||
@ValidateNested()
|
||||
@Type(() => ExtractOptionsDto)
|
||||
options?: ExtractOptionsDto;
|
||||
}
|
||||
|
||||
export class BulkExtractRequestDto {
|
||||
@IsArray()
|
||||
@IsUrl({}, { each: true })
|
||||
urls: string[];
|
||||
|
||||
@IsOptional()
|
||||
@ValidateNested()
|
||||
@Type(() => ExtractOptionsDto)
|
||||
options?: ExtractOptionsDto;
|
||||
|
||||
@IsOptional()
|
||||
@IsInt()
|
||||
@Min(1)
|
||||
@Max(10)
|
||||
concurrency?: number;
|
||||
}
|
||||
67
services/mana-search/src/extract/dto/extract-response.dto.ts
Normal file
67
services/mana-search/src/extract/dto/extract-response.dto.ts
Normal file
|
|
@ -0,0 +1,67 @@
|
|||
export interface ExtractedImage {
|
||||
url: string;
|
||||
alt?: string;
|
||||
}
|
||||
|
||||
export interface ExtractedLink {
|
||||
url: string;
|
||||
text: string;
|
||||
isExternal: boolean;
|
||||
}
|
||||
|
||||
export interface ExtractedContent {
|
||||
title: string;
|
||||
description?: string;
|
||||
author?: string;
|
||||
publishedDate?: string;
|
||||
siteName?: string;
|
||||
|
||||
// Content
|
||||
text: string;
|
||||
markdown?: string;
|
||||
html?: string;
|
||||
|
||||
// Stats
|
||||
wordCount: number;
|
||||
readingTime: number;
|
||||
|
||||
// Media
|
||||
images?: ExtractedImage[];
|
||||
links?: ExtractedLink[];
|
||||
|
||||
// Meta
|
||||
ogImage?: string;
|
||||
ogType?: string;
|
||||
language?: string;
|
||||
}
|
||||
|
||||
export interface ExtractMeta {
|
||||
url: string;
|
||||
duration: number;
|
||||
cached: boolean;
|
||||
contentType: string;
|
||||
}
|
||||
|
||||
export interface ExtractResponse {
|
||||
success: boolean;
|
||||
content?: ExtractedContent;
|
||||
error?: string;
|
||||
meta: ExtractMeta;
|
||||
}
|
||||
|
||||
export interface BulkExtractResult {
|
||||
url: string;
|
||||
success: boolean;
|
||||
content?: ExtractedContent;
|
||||
error?: string;
|
||||
}
|
||||
|
||||
export interface BulkExtractResponse {
|
||||
results: BulkExtractResult[];
|
||||
meta: {
|
||||
total: number;
|
||||
successful: number;
|
||||
failed: number;
|
||||
duration: number;
|
||||
};
|
||||
}
|
||||
2
services/mana-search/src/extract/dto/index.ts
Normal file
2
services/mana-search/src/extract/dto/index.ts
Normal file
|
|
@ -0,0 +1,2 @@
|
|||
export * from './extract-request.dto';
|
||||
export * from './extract-response.dto';
|
||||
31
services/mana-search/src/extract/extract.controller.ts
Normal file
31
services/mana-search/src/extract/extract.controller.ts
Normal file
|
|
@ -0,0 +1,31 @@
|
|||
import { Controller, Post, Body, Logger } from '@nestjs/common';
|
||||
import { ExtractService } from './extract.service';
|
||||
import { ExtractRequestDto, BulkExtractRequestDto } from './dto/extract-request.dto';
|
||||
import { ExtractResponse, BulkExtractResponse } from './dto/extract-response.dto';
|
||||
|
||||
@Controller('extract')
|
||||
export class ExtractController {
|
||||
private readonly logger = new Logger(ExtractController.name);
|
||||
|
||||
constructor(private readonly extractService: ExtractService) {}
|
||||
|
||||
/**
|
||||
* Extract content from a URL
|
||||
* POST /api/v1/extract
|
||||
*/
|
||||
@Post()
|
||||
async extract(@Body() request: ExtractRequestDto): Promise<ExtractResponse> {
|
||||
this.logger.log(`Extract request: ${request.url}`);
|
||||
return this.extractService.extract(request);
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract content from multiple URLs
|
||||
* POST /api/v1/extract/bulk
|
||||
*/
|
||||
@Post('bulk')
|
||||
async bulkExtract(@Body() request: BulkExtractRequestDto): Promise<BulkExtractResponse> {
|
||||
this.logger.log(`Bulk extract request: ${request.urls.length} URLs`);
|
||||
return this.extractService.bulkExtract(request);
|
||||
}
|
||||
}
|
||||
10
services/mana-search/src/extract/extract.module.ts
Normal file
10
services/mana-search/src/extract/extract.module.ts
Normal file
|
|
@ -0,0 +1,10 @@
|
|||
import { Module } from '@nestjs/common';
|
||||
import { ExtractController } from './extract.controller';
|
||||
import { ExtractService } from './extract.service';
|
||||
|
||||
@Module({
|
||||
controllers: [ExtractController],
|
||||
providers: [ExtractService],
|
||||
exports: [ExtractService],
|
||||
})
|
||||
export class ExtractModule {}
|
||||
228
services/mana-search/src/extract/extract.service.ts
Normal file
228
services/mana-search/src/extract/extract.service.ts
Normal file
|
|
@ -0,0 +1,228 @@
|
|||
import { Injectable, Logger } from '@nestjs/common';
|
||||
import { ConfigService } from '@nestjs/config';
|
||||
import { extract } from '@extractus/article-extractor';
|
||||
import TurndownService from 'turndown';
|
||||
import { CacheService } from '../cache/cache.service';
|
||||
import { MetricsService } from '../metrics/metrics.service';
|
||||
import {
|
||||
ExtractRequestDto,
|
||||
ExtractOptionsDto,
|
||||
BulkExtractRequestDto,
|
||||
} from './dto/extract-request.dto';
|
||||
import {
|
||||
ExtractResponse,
|
||||
ExtractedContent,
|
||||
BulkExtractResponse,
|
||||
BulkExtractResult,
|
||||
} from './dto/extract-response.dto';
|
||||
|
||||
@Injectable()
|
||||
export class ExtractService {
|
||||
private readonly logger = new Logger(ExtractService.name);
|
||||
private readonly turndown: TurndownService;
|
||||
private readonly defaultTimeout: number;
|
||||
private readonly defaultMaxLength: number;
|
||||
private readonly userAgent: string;
|
||||
|
||||
constructor(
|
||||
private readonly configService: ConfigService,
|
||||
private readonly cacheService: CacheService,
|
||||
private readonly metricsService: MetricsService,
|
||||
) {
|
||||
this.defaultTimeout = this.configService.get<number>('extract.timeout', 10000);
|
||||
this.defaultMaxLength = this.configService.get<number>('extract.maxLength', 50000);
|
||||
this.userAgent = this.configService.get<string>(
|
||||
'extract.userAgent',
|
||||
'Mozilla/5.0 (compatible; ManaSearchBot/1.0)',
|
||||
);
|
||||
|
||||
// Configure Turndown for Markdown conversion
|
||||
this.turndown = new TurndownService({
|
||||
headingStyle: 'atx',
|
||||
codeBlockStyle: 'fenced',
|
||||
bulletListMarker: '-',
|
||||
});
|
||||
|
||||
// Custom rules for better Markdown output
|
||||
this.turndown.addRule('codeBlocks', {
|
||||
filter: ['pre'],
|
||||
replacement: (content: string) => `\n\`\`\`\n${content}\n\`\`\`\n`,
|
||||
});
|
||||
|
||||
this.turndown.addRule('inlineCode', {
|
||||
filter: ['code'],
|
||||
replacement: (content: string) => `\`${content}\``,
|
||||
});
|
||||
}
|
||||
|
||||
async extract(request: ExtractRequestDto): Promise<ExtractResponse> {
|
||||
const startTime = Date.now();
|
||||
const cacheKey = `extract:${request.url}`;
|
||||
|
||||
// Check cache
|
||||
const cached = await this.cacheService.get<ExtractResponse>(cacheKey);
|
||||
if (cached) {
|
||||
this.logger.debug(`Cache hit for: ${request.url}`);
|
||||
return {
|
||||
...cached,
|
||||
meta: { ...cached.meta, cached: true },
|
||||
};
|
||||
}
|
||||
|
||||
try {
|
||||
// Use Promise.race for timeout since extract doesn't support AbortSignal
|
||||
const timeout = request.options?.timeout || this.defaultTimeout;
|
||||
const extractPromise = extract(request.url);
|
||||
const timeoutPromise = new Promise<null>((_, reject) =>
|
||||
setTimeout(() => reject(new Error('Extraction timeout')), timeout),
|
||||
);
|
||||
|
||||
const article = await Promise.race([extractPromise, timeoutPromise]);
|
||||
|
||||
if (!article) {
|
||||
return this.buildErrorResponse(
|
||||
request.url,
|
||||
'Could not extract content from URL',
|
||||
startTime,
|
||||
);
|
||||
}
|
||||
|
||||
// Process content
|
||||
let text = this.cleanText(article.content || '');
|
||||
const maxLength = request.options?.maxLength || this.defaultMaxLength;
|
||||
|
||||
if (text.length > maxLength) {
|
||||
text = text.substring(0, maxLength) + '...';
|
||||
}
|
||||
|
||||
const content: ExtractedContent = {
|
||||
title: article.title || '',
|
||||
description: article.description,
|
||||
author: article.author,
|
||||
publishedDate: article.published,
|
||||
siteName: article.source,
|
||||
|
||||
text,
|
||||
wordCount: this.countWords(text),
|
||||
readingTime: Math.ceil(this.countWords(text) / 200),
|
||||
|
||||
ogImage: article.image,
|
||||
};
|
||||
|
||||
// Optional: Markdown conversion
|
||||
if (request.options?.includeMarkdown && article.content) {
|
||||
content.markdown = this.turndown.turndown(article.content);
|
||||
}
|
||||
|
||||
// Optional: Include raw HTML
|
||||
if (request.options?.includeHtml && article.content) {
|
||||
content.html = article.content;
|
||||
}
|
||||
|
||||
const response: ExtractResponse = {
|
||||
success: true,
|
||||
content,
|
||||
meta: {
|
||||
url: request.url,
|
||||
duration: Date.now() - startTime,
|
||||
cached: false,
|
||||
contentType: 'text/html',
|
||||
},
|
||||
};
|
||||
|
||||
// Cache the result
|
||||
const ttl = this.configService.get<number>('cache.extractTtl', 86400);
|
||||
await this.cacheService.set(cacheKey, response, ttl);
|
||||
|
||||
this.metricsService.recordRequest('extract', 200, Date.now() - startTime);
|
||||
return response;
|
||||
} catch (error) {
|
||||
this.logger.error(`Extraction failed for ${request.url}: ${error}`);
|
||||
this.metricsService.recordRequest('extract', 500, Date.now() - startTime);
|
||||
|
||||
return this.buildErrorResponse(
|
||||
request.url,
|
||||
error instanceof Error ? error.message : 'Extraction failed',
|
||||
startTime,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
async bulkExtract(request: BulkExtractRequestDto): Promise<BulkExtractResponse> {
|
||||
const startTime = Date.now();
|
||||
const concurrency = request.concurrency || 5;
|
||||
|
||||
// Process URLs in batches
|
||||
const results: BulkExtractResult[] = [];
|
||||
|
||||
for (let i = 0; i < request.urls.length; i += concurrency) {
|
||||
const batch = request.urls.slice(i, i + concurrency);
|
||||
const batchResults = await Promise.all(
|
||||
batch.map(async (url) => {
|
||||
const response = await this.extract({
|
||||
url,
|
||||
options: request.options,
|
||||
});
|
||||
|
||||
return {
|
||||
url,
|
||||
success: response.success,
|
||||
content: response.content,
|
||||
error: response.error,
|
||||
};
|
||||
}),
|
||||
);
|
||||
|
||||
results.push(...batchResults);
|
||||
}
|
||||
|
||||
const successful = results.filter((r) => r.success).length;
|
||||
|
||||
return {
|
||||
results,
|
||||
meta: {
|
||||
total: results.length,
|
||||
successful,
|
||||
failed: results.length - successful,
|
||||
duration: Date.now() - startTime,
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
private buildErrorResponse(
|
||||
url: string,
|
||||
error: string,
|
||||
startTime: number,
|
||||
): ExtractResponse {
|
||||
return {
|
||||
success: false,
|
||||
error,
|
||||
meta: {
|
||||
url,
|
||||
duration: Date.now() - startTime,
|
||||
cached: false,
|
||||
contentType: 'unknown',
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
private cleanText(html: string): string {
|
||||
return html
|
||||
.replace(/<script\b[^<]*(?:(?!<\/script>)<[^<]*)*<\/script>/gi, '')
|
||||
.replace(/<style\b[^<]*(?:(?!<\/style>)<[^<]*)*<\/style>/gi, '')
|
||||
.replace(/<[^>]+>/g, ' ')
|
||||
.replace(/ /g, ' ')
|
||||
.replace(/&/g, '&')
|
||||
.replace(/</g, '<')
|
||||
.replace(/>/g, '>')
|
||||
.replace(/"/g, '"')
|
||||
.replace(/\s+/g, ' ')
|
||||
.trim();
|
||||
}
|
||||
|
||||
private countWords(text: string): number {
|
||||
return text
|
||||
.split(/\s+/)
|
||||
.filter((word) => word.length > 0).length;
|
||||
}
|
||||
}
|
||||
57
services/mana-search/src/health/health.controller.ts
Normal file
57
services/mana-search/src/health/health.controller.ts
Normal file
|
|
@ -0,0 +1,57 @@
|
|||
import { Controller, Get } from '@nestjs/common';
|
||||
import { ConfigService } from '@nestjs/config';
|
||||
|
||||
@Controller()
|
||||
export class HealthController {
|
||||
constructor(private readonly configService: ConfigService) {}
|
||||
|
||||
@Get('/health')
|
||||
async health() {
|
||||
const searxngUrl = this.configService.get<string>('searxng.url');
|
||||
const redisHost = this.configService.get<string>('redis.host');
|
||||
|
||||
// Check SearXNG
|
||||
let searxngStatus = { status: 'unknown', latency: 0 };
|
||||
try {
|
||||
const start = Date.now();
|
||||
const response = await fetch(`${searxngUrl}/healthz`, {
|
||||
signal: AbortSignal.timeout(5000),
|
||||
});
|
||||
searxngStatus = {
|
||||
status: response.ok ? 'ok' : 'error',
|
||||
latency: Date.now() - start,
|
||||
};
|
||||
} catch {
|
||||
searxngStatus = { status: 'error', latency: 0 };
|
||||
}
|
||||
|
||||
// Check Redis (basic TCP check)
|
||||
let redisStatus = { status: 'unknown', latency: 0 };
|
||||
try {
|
||||
const start = Date.now();
|
||||
// Redis check is done via CacheService in production
|
||||
// For now, just mark as ok if we can reach it
|
||||
redisStatus = { status: 'ok', latency: Date.now() - start };
|
||||
} catch {
|
||||
redisStatus = { status: 'error', latency: 0 };
|
||||
}
|
||||
|
||||
const overallStatus =
|
||||
searxngStatus.status === 'ok' && redisStatus.status === 'ok'
|
||||
? 'ok'
|
||||
: searxngStatus.status === 'error' && redisStatus.status === 'error'
|
||||
? 'error'
|
||||
: 'degraded';
|
||||
|
||||
return {
|
||||
status: overallStatus,
|
||||
service: 'mana-search',
|
||||
version: '1.0.0',
|
||||
timestamp: new Date().toISOString(),
|
||||
components: {
|
||||
searxng: searxngStatus,
|
||||
redis: redisStatus,
|
||||
},
|
||||
};
|
||||
}
|
||||
}
|
||||
7
services/mana-search/src/health/health.module.ts
Normal file
7
services/mana-search/src/health/health.module.ts
Normal file
|
|
@ -0,0 +1,7 @@
|
|||
import { Module } from '@nestjs/common';
|
||||
import { HealthController } from './health.controller';
|
||||
|
||||
@Module({
|
||||
controllers: [HealthController],
|
||||
})
|
||||
export class HealthModule {}
|
||||
42
services/mana-search/src/main.ts
Normal file
42
services/mana-search/src/main.ts
Normal file
|
|
@ -0,0 +1,42 @@
|
|||
import { NestFactory } from '@nestjs/core';
|
||||
import { ValidationPipe, Logger } from '@nestjs/common';
|
||||
import { ConfigService } from '@nestjs/config';
|
||||
import { AppModule } from './app.module';
|
||||
import { HttpExceptionFilter } from './common/filters/http-exception.filter';
|
||||
|
||||
async function bootstrap() {
|
||||
const logger = new Logger('Bootstrap');
|
||||
|
||||
const app = await NestFactory.create(AppModule);
|
||||
|
||||
const configService = app.get(ConfigService);
|
||||
const port = configService.get<number>('port', 3021);
|
||||
|
||||
// Global prefix
|
||||
app.setGlobalPrefix('api/v1');
|
||||
|
||||
// CORS - intern, aber für Development nützlich
|
||||
app.enableCors({
|
||||
origin: configService.get<string[]>('cors.origins', ['http://localhost:*']),
|
||||
credentials: true,
|
||||
});
|
||||
|
||||
// Global pipes
|
||||
app.useGlobalPipes(
|
||||
new ValidationPipe({
|
||||
whitelist: true,
|
||||
transform: true,
|
||||
forbidNonWhitelisted: true,
|
||||
}),
|
||||
);
|
||||
|
||||
// Global filters
|
||||
app.useGlobalFilters(new HttpExceptionFilter());
|
||||
|
||||
await app.listen(port);
|
||||
logger.log(`Mana Search Service running on port ${port}`);
|
||||
logger.log(`Health check: http://localhost:${port}/health`);
|
||||
logger.log(`Metrics: http://localhost:${port}/metrics`);
|
||||
}
|
||||
|
||||
bootstrap();
|
||||
17
services/mana-search/src/metrics/metrics.controller.ts
Normal file
17
services/mana-search/src/metrics/metrics.controller.ts
Normal file
|
|
@ -0,0 +1,17 @@
|
|||
import { Controller, Get, Header, Res } from '@nestjs/common';
|
||||
import { Response } from 'express';
|
||||
import { MetricsService } from './metrics.service';
|
||||
|
||||
@Controller()
|
||||
export class MetricsController {
|
||||
constructor(private readonly metricsService: MetricsService) {}
|
||||
|
||||
@Get('/metrics')
|
||||
async metrics(@Res() res: Response) {
|
||||
const contentType = await this.metricsService.getContentType();
|
||||
const metrics = await this.metricsService.getMetrics();
|
||||
|
||||
res.setHeader('Content-Type', contentType);
|
||||
res.send(metrics);
|
||||
}
|
||||
}
|
||||
11
services/mana-search/src/metrics/metrics.module.ts
Normal file
11
services/mana-search/src/metrics/metrics.module.ts
Normal file
|
|
@ -0,0 +1,11 @@
|
|||
import { Module, Global } from '@nestjs/common';
|
||||
import { MetricsService } from './metrics.service';
|
||||
import { MetricsController } from './metrics.controller';
|
||||
|
||||
@Global()
|
||||
@Module({
|
||||
providers: [MetricsService],
|
||||
controllers: [MetricsController],
|
||||
exports: [MetricsService],
|
||||
})
|
||||
export class MetricsModule {}
|
||||
101
services/mana-search/src/metrics/metrics.service.ts
Normal file
101
services/mana-search/src/metrics/metrics.service.ts
Normal file
|
|
@ -0,0 +1,101 @@
|
|||
import { Injectable } from '@nestjs/common';
|
||||
import { Counter, Histogram, Gauge, Registry, collectDefaultMetrics } from 'prom-client';
|
||||
|
||||
@Injectable()
|
||||
export class MetricsService {
|
||||
private readonly registry = new Registry();
|
||||
|
||||
// Request Counter
|
||||
private readonly requestsTotal: Counter<string>;
|
||||
|
||||
// Latency Histogram
|
||||
private readonly latency: Histogram<string>;
|
||||
|
||||
// Cache Metrics
|
||||
private readonly cacheHits: Counter<string>;
|
||||
private readonly cacheMisses: Counter<string>;
|
||||
|
||||
// SearXNG Engine Status
|
||||
private readonly engineStatus: Gauge<string>;
|
||||
|
||||
// Active searches
|
||||
private readonly activeSearches: Gauge<string>;
|
||||
|
||||
constructor() {
|
||||
// Collect default Node.js metrics
|
||||
collectDefaultMetrics({ register: this.registry });
|
||||
|
||||
this.requestsTotal = new Counter({
|
||||
name: 'mana_search_requests_total',
|
||||
help: 'Total number of requests',
|
||||
labelNames: ['endpoint', 'status'],
|
||||
registers: [this.registry],
|
||||
});
|
||||
|
||||
this.latency = new Histogram({
|
||||
name: 'mana_search_latency_seconds',
|
||||
help: 'Request latency in seconds',
|
||||
labelNames: ['endpoint'],
|
||||
buckets: [0.1, 0.25, 0.5, 1, 2, 5, 10],
|
||||
registers: [this.registry],
|
||||
});
|
||||
|
||||
this.cacheHits = new Counter({
|
||||
name: 'mana_search_cache_hits_total',
|
||||
help: 'Total cache hits',
|
||||
registers: [this.registry],
|
||||
});
|
||||
|
||||
this.cacheMisses = new Counter({
|
||||
name: 'mana_search_cache_misses_total',
|
||||
help: 'Total cache misses',
|
||||
registers: [this.registry],
|
||||
});
|
||||
|
||||
this.engineStatus = new Gauge({
|
||||
name: 'mana_search_engine_status',
|
||||
help: 'SearXNG engine status (1=ok, 0=error)',
|
||||
labelNames: ['engine'],
|
||||
registers: [this.registry],
|
||||
});
|
||||
|
||||
this.activeSearches = new Gauge({
|
||||
name: 'mana_search_active_searches',
|
||||
help: 'Number of currently active searches',
|
||||
registers: [this.registry],
|
||||
});
|
||||
}
|
||||
|
||||
recordRequest(endpoint: string, status: number, durationMs: number) {
|
||||
this.requestsTotal.inc({ endpoint, status: String(status) });
|
||||
this.latency.observe({ endpoint }, durationMs / 1000);
|
||||
}
|
||||
|
||||
recordCacheHit() {
|
||||
this.cacheHits.inc();
|
||||
}
|
||||
|
||||
recordCacheMiss() {
|
||||
this.cacheMisses.inc();
|
||||
}
|
||||
|
||||
setEngineStatus(engine: string, isOk: boolean) {
|
||||
this.engineStatus.set({ engine }, isOk ? 1 : 0);
|
||||
}
|
||||
|
||||
incrementActiveSearches() {
|
||||
this.activeSearches.inc();
|
||||
}
|
||||
|
||||
decrementActiveSearches() {
|
||||
this.activeSearches.dec();
|
||||
}
|
||||
|
||||
async getMetrics(): Promise<string> {
|
||||
return this.registry.metrics();
|
||||
}
|
||||
|
||||
async getContentType(): Promise<string> {
|
||||
return this.registry.contentType;
|
||||
}
|
||||
}
|
||||
2
services/mana-search/src/search/dto/index.ts
Normal file
2
services/mana-search/src/search/dto/index.ts
Normal file
|
|
@ -0,0 +1,2 @@
|
|||
export * from './search-request.dto';
|
||||
export * from './search-response.dto';
|
||||
87
services/mana-search/src/search/dto/search-request.dto.ts
Normal file
87
services/mana-search/src/search/dto/search-request.dto.ts
Normal file
|
|
@ -0,0 +1,87 @@
|
|||
import {
|
||||
IsString,
|
||||
IsOptional,
|
||||
IsArray,
|
||||
IsEnum,
|
||||
IsInt,
|
||||
Min,
|
||||
Max,
|
||||
IsBoolean,
|
||||
ValidateNested,
|
||||
} from 'class-validator';
|
||||
import { Type } from 'class-transformer';
|
||||
|
||||
export enum SearchCategory {
|
||||
GENERAL = 'general',
|
||||
NEWS = 'news',
|
||||
SCIENCE = 'science',
|
||||
IT = 'it',
|
||||
IMAGES = 'images',
|
||||
VIDEOS = 'videos',
|
||||
}
|
||||
|
||||
export enum TimeRange {
|
||||
DAY = 'day',
|
||||
WEEK = 'week',
|
||||
MONTH = 'month',
|
||||
YEAR = 'year',
|
||||
}
|
||||
|
||||
export class SearchOptionsDto {
|
||||
@IsOptional()
|
||||
@IsArray()
|
||||
@IsEnum(SearchCategory, { each: true })
|
||||
categories?: SearchCategory[];
|
||||
|
||||
@IsOptional()
|
||||
@IsArray()
|
||||
@IsString({ each: true })
|
||||
engines?: string[];
|
||||
|
||||
@IsOptional()
|
||||
@IsString()
|
||||
language?: string;
|
||||
|
||||
@IsOptional()
|
||||
@IsEnum(TimeRange)
|
||||
timeRange?: TimeRange;
|
||||
|
||||
@IsOptional()
|
||||
@IsInt()
|
||||
@Min(0)
|
||||
@Max(2)
|
||||
safeSearch?: number;
|
||||
|
||||
@IsOptional()
|
||||
@IsInt()
|
||||
@Min(1)
|
||||
@Max(50)
|
||||
limit?: number;
|
||||
}
|
||||
|
||||
export class CacheOptionsDto {
|
||||
@IsOptional()
|
||||
@IsBoolean()
|
||||
enabled?: boolean;
|
||||
|
||||
@IsOptional()
|
||||
@IsInt()
|
||||
@Min(60)
|
||||
@Max(86400)
|
||||
ttl?: number;
|
||||
}
|
||||
|
||||
export class SearchRequestDto {
|
||||
@IsString()
|
||||
query: string;
|
||||
|
||||
@IsOptional()
|
||||
@ValidateNested()
|
||||
@Type(() => SearchOptionsDto)
|
||||
options?: SearchOptionsDto;
|
||||
|
||||
@IsOptional()
|
||||
@ValidateNested()
|
||||
@Type(() => CacheOptionsDto)
|
||||
cache?: CacheOptionsDto;
|
||||
}
|
||||
24
services/mana-search/src/search/dto/search-response.dto.ts
Normal file
24
services/mana-search/src/search/dto/search-response.dto.ts
Normal file
|
|
@ -0,0 +1,24 @@
|
|||
export interface SearchResult {
|
||||
url: string;
|
||||
title: string;
|
||||
snippet: string;
|
||||
engine: string;
|
||||
score: number;
|
||||
publishedDate?: string;
|
||||
thumbnail?: string;
|
||||
category: string;
|
||||
}
|
||||
|
||||
export interface SearchMeta {
|
||||
query: string;
|
||||
totalResults: number;
|
||||
engines: string[];
|
||||
duration: number;
|
||||
cached: boolean;
|
||||
cacheKey?: string;
|
||||
}
|
||||
|
||||
export interface SearchResponse {
|
||||
results: SearchResult[];
|
||||
meta: SearchMeta;
|
||||
}
|
||||
133
services/mana-search/src/search/providers/searxng.provider.ts
Normal file
133
services/mana-search/src/search/providers/searxng.provider.ts
Normal file
|
|
@ -0,0 +1,133 @@
|
|||
import { Injectable, Logger, HttpException, HttpStatus } from '@nestjs/common';
|
||||
import { ConfigService } from '@nestjs/config';
|
||||
|
||||
export interface SearxngQuery {
|
||||
q: string;
|
||||
categories?: string;
|
||||
engines?: string;
|
||||
language?: string;
|
||||
time_range?: string;
|
||||
safesearch?: number;
|
||||
format: 'json';
|
||||
}
|
||||
|
||||
export interface SearxngResult {
|
||||
url: string;
|
||||
title: string;
|
||||
content?: string;
|
||||
engine: string;
|
||||
score?: number;
|
||||
category?: string;
|
||||
publishedDate?: string;
|
||||
thumbnail?: string;
|
||||
parsed_url?: string[];
|
||||
engines?: string[];
|
||||
positions?: number[];
|
||||
}
|
||||
|
||||
interface SearxngResponse {
|
||||
query: string;
|
||||
results: SearxngResult[];
|
||||
suggestions: string[];
|
||||
infoboxes: unknown[];
|
||||
number_of_results: number;
|
||||
}
|
||||
|
||||
@Injectable()
|
||||
export class SearxngProvider {
|
||||
private readonly logger = new Logger(SearxngProvider.name);
|
||||
private readonly baseUrl: string;
|
||||
private readonly timeout: number;
|
||||
|
||||
constructor(private readonly configService: ConfigService) {
|
||||
this.baseUrl = this.configService.get<string>('searxng.url', 'http://searxng:8080');
|
||||
this.timeout = this.configService.get<number>('searxng.timeout', 15000);
|
||||
}
|
||||
|
||||
async search(query: SearxngQuery): Promise<SearxngResult[]> {
|
||||
const url = new URL('/search', this.baseUrl);
|
||||
|
||||
// Query-Parameter setzen
|
||||
Object.entries(query).forEach(([key, value]) => {
|
||||
if (value !== undefined && value !== null && value !== '') {
|
||||
url.searchParams.set(key, String(value));
|
||||
}
|
||||
});
|
||||
|
||||
this.logger.debug(`SearXNG request: ${url.toString()}`);
|
||||
|
||||
try {
|
||||
const response = await fetch(url.toString(), {
|
||||
method: 'GET',
|
||||
headers: {
|
||||
Accept: 'application/json',
|
||||
},
|
||||
signal: AbortSignal.timeout(this.timeout),
|
||||
});
|
||||
|
||||
if (!response.ok) {
|
||||
const text = await response.text();
|
||||
this.logger.error(`SearXNG error ${response.status}: ${text}`);
|
||||
throw new HttpException(
|
||||
`Search engine error: ${response.status}`,
|
||||
HttpStatus.BAD_GATEWAY,
|
||||
);
|
||||
}
|
||||
|
||||
const data: SearxngResponse = await response.json();
|
||||
|
||||
this.logger.debug(
|
||||
`SearXNG returned ${data.results.length} results for "${query.q}"`,
|
||||
);
|
||||
|
||||
return data.results;
|
||||
} catch (error) {
|
||||
if (error instanceof HttpException) {
|
||||
throw error;
|
||||
}
|
||||
|
||||
if (error instanceof Error && error.name === 'TimeoutError') {
|
||||
this.logger.error(`SearXNG timeout for query: ${query.q}`);
|
||||
throw new HttpException('Search timeout', HttpStatus.GATEWAY_TIMEOUT);
|
||||
}
|
||||
|
||||
this.logger.error(`SearXNG search failed: ${error}`);
|
||||
throw new HttpException(
|
||||
'Search service unavailable',
|
||||
HttpStatus.SERVICE_UNAVAILABLE,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
async healthCheck(): Promise<{ status: string; latency: number }> {
|
||||
const start = Date.now();
|
||||
try {
|
||||
const response = await fetch(`${this.baseUrl}/healthz`, {
|
||||
signal: AbortSignal.timeout(5000),
|
||||
});
|
||||
return {
|
||||
status: response.ok ? 'ok' : 'error',
|
||||
latency: Date.now() - start,
|
||||
};
|
||||
} catch {
|
||||
return { status: 'error', latency: Date.now() - start };
|
||||
}
|
||||
}
|
||||
|
||||
async getEngines(): Promise<string[]> {
|
||||
try {
|
||||
const response = await fetch(`${this.baseUrl}/config`, {
|
||||
signal: AbortSignal.timeout(5000),
|
||||
});
|
||||
|
||||
if (!response.ok) {
|
||||
return [];
|
||||
}
|
||||
|
||||
const config = await response.json();
|
||||
return Object.keys(config.engines || {});
|
||||
} catch {
|
||||
return [];
|
||||
}
|
||||
}
|
||||
}
|
||||
64
services/mana-search/src/search/search.controller.ts
Normal file
64
services/mana-search/src/search/search.controller.ts
Normal file
|
|
@ -0,0 +1,64 @@
|
|||
import { Controller, Post, Get, Body, Delete, Logger } from '@nestjs/common';
|
||||
import { SearchService } from './search.service';
|
||||
import { CacheService } from '../cache/cache.service';
|
||||
import { SearchRequestDto } from './dto/search-request.dto';
|
||||
import { SearchResponse } from './dto/search-response.dto';
|
||||
|
||||
@Controller('search')
|
||||
export class SearchController {
|
||||
private readonly logger = new Logger(SearchController.name);
|
||||
|
||||
constructor(
|
||||
private readonly searchService: SearchService,
|
||||
private readonly cacheService: CacheService,
|
||||
) {}
|
||||
|
||||
/**
|
||||
* Perform a web search
|
||||
* POST /api/v1/search
|
||||
*/
|
||||
@Post()
|
||||
async search(@Body() request: SearchRequestDto): Promise<SearchResponse> {
|
||||
this.logger.log(`Search request: "${request.query}"`);
|
||||
return this.searchService.search(request);
|
||||
}
|
||||
|
||||
/**
|
||||
* Get available search engines
|
||||
* GET /api/v1/search/engines
|
||||
*/
|
||||
@Get('engines')
|
||||
async getEngines(): Promise<{ engines: string[] }> {
|
||||
const engines = await this.searchService.getEngines();
|
||||
return { engines };
|
||||
}
|
||||
|
||||
/**
|
||||
* Get search service health
|
||||
* GET /api/v1/search/health
|
||||
*/
|
||||
@Get('health')
|
||||
async health() {
|
||||
const searxng = await this.searchService.healthCheck();
|
||||
const cache = await this.cacheService.healthCheck();
|
||||
const cacheStats = this.cacheService.getStats();
|
||||
|
||||
return {
|
||||
searxng,
|
||||
cache: {
|
||||
...cache,
|
||||
stats: cacheStats,
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Clear search cache
|
||||
* DELETE /api/v1/search/cache
|
||||
*/
|
||||
@Delete('cache')
|
||||
async clearCache(): Promise<{ cleared: boolean; keysRemoved: number }> {
|
||||
const keysRemoved = await this.cacheService.clear();
|
||||
return { cleared: true, keysRemoved };
|
||||
}
|
||||
}
|
||||
11
services/mana-search/src/search/search.module.ts
Normal file
11
services/mana-search/src/search/search.module.ts
Normal file
|
|
@ -0,0 +1,11 @@
|
|||
import { Module } from '@nestjs/common';
|
||||
import { SearchController } from './search.controller';
|
||||
import { SearchService } from './search.service';
|
||||
import { SearxngProvider } from './providers/searxng.provider';
|
||||
|
||||
@Module({
|
||||
controllers: [SearchController],
|
||||
providers: [SearchService, SearxngProvider],
|
||||
exports: [SearchService],
|
||||
})
|
||||
export class SearchModule {}
|
||||
155
services/mana-search/src/search/search.service.ts
Normal file
155
services/mana-search/src/search/search.service.ts
Normal file
|
|
@ -0,0 +1,155 @@
|
|||
import { Injectable, Logger } from '@nestjs/common';
|
||||
import { ConfigService } from '@nestjs/config';
|
||||
import { CacheService } from '../cache/cache.service';
|
||||
import { MetricsService } from '../metrics/metrics.service';
|
||||
import { SearxngProvider, SearxngResult } from './providers/searxng.provider';
|
||||
import { SearchRequestDto, SearchCategory } from './dto/search-request.dto';
|
||||
import { SearchResponse, SearchResult } from './dto/search-response.dto';
|
||||
|
||||
@Injectable()
|
||||
export class SearchService {
|
||||
private readonly logger = new Logger(SearchService.name);
|
||||
|
||||
constructor(
|
||||
private readonly configService: ConfigService,
|
||||
private readonly cacheService: CacheService,
|
||||
private readonly metricsService: MetricsService,
|
||||
private readonly searxngProvider: SearxngProvider,
|
||||
) {}
|
||||
|
||||
async search(request: SearchRequestDto): Promise<SearchResponse> {
|
||||
const startTime = Date.now();
|
||||
this.metricsService.incrementActiveSearches();
|
||||
|
||||
try {
|
||||
// 1. Build cache key
|
||||
const cacheKey = this.buildCacheKey(request);
|
||||
|
||||
// 2. Check cache
|
||||
if (request.cache?.enabled !== false) {
|
||||
const cached = await this.cacheService.get<SearchResponse>(cacheKey);
|
||||
if (cached) {
|
||||
this.logger.debug(`Cache hit for: ${request.query}`);
|
||||
return {
|
||||
...cached,
|
||||
meta: { ...cached.meta, cached: true },
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
// 3. Query SearXNG
|
||||
const results = await this.searxngProvider.search({
|
||||
q: request.query,
|
||||
categories: request.options?.categories?.join(','),
|
||||
engines: request.options?.engines?.join(','),
|
||||
language:
|
||||
request.options?.language ||
|
||||
this.configService.get('searxng.defaultLanguage', 'de-DE'),
|
||||
time_range: request.options?.timeRange,
|
||||
safesearch: request.options?.safeSearch ?? 0,
|
||||
format: 'json',
|
||||
});
|
||||
|
||||
// 4. Normalize and rank results
|
||||
const normalizedResults = this.normalizeResults(
|
||||
results,
|
||||
request.options?.limit || 10,
|
||||
);
|
||||
|
||||
// 5. Build response
|
||||
const response: SearchResponse = {
|
||||
results: normalizedResults,
|
||||
meta: {
|
||||
query: request.query,
|
||||
totalResults: normalizedResults.length,
|
||||
engines: [...new Set(normalizedResults.map((r) => r.engine))],
|
||||
duration: Date.now() - startTime,
|
||||
cached: false,
|
||||
cacheKey,
|
||||
},
|
||||
};
|
||||
|
||||
// 6. Cache result
|
||||
if (request.cache?.enabled !== false) {
|
||||
const ttl =
|
||||
request.cache?.ttl ||
|
||||
this.configService.get<number>('cache.searchTtl', 3600);
|
||||
await this.cacheService.set(cacheKey, response, ttl);
|
||||
}
|
||||
|
||||
this.metricsService.recordRequest('search', 200, Date.now() - startTime);
|
||||
return response;
|
||||
} finally {
|
||||
this.metricsService.decrementActiveSearches();
|
||||
}
|
||||
}
|
||||
|
||||
private buildCacheKey(request: SearchRequestDto): string {
|
||||
const parts = [
|
||||
'search',
|
||||
request.query.toLowerCase().trim(),
|
||||
request.options?.categories?.sort().join('-') || 'all',
|
||||
request.options?.engines?.sort().join('-') || 'all',
|
||||
request.options?.language || 'default',
|
||||
request.options?.timeRange || 'any',
|
||||
String(request.options?.safeSearch ?? 0),
|
||||
];
|
||||
return parts.join(':');
|
||||
}
|
||||
|
||||
private normalizeResults(rawResults: SearxngResult[], limit: number): SearchResult[] {
|
||||
// Deduplicate by URL
|
||||
const seen = new Set<string>();
|
||||
const deduped = rawResults.filter((r) => {
|
||||
const normalizedUrl = r.url.toLowerCase().replace(/\/$/, '');
|
||||
if (seen.has(normalizedUrl)) return false;
|
||||
seen.add(normalizedUrl);
|
||||
return true;
|
||||
});
|
||||
|
||||
return deduped
|
||||
.map((r) => ({
|
||||
url: r.url,
|
||||
title: r.title || 'Untitled',
|
||||
snippet: r.content || '',
|
||||
engine: r.engine,
|
||||
score: this.calculateScore(r),
|
||||
publishedDate: r.publishedDate,
|
||||
thumbnail: r.thumbnail,
|
||||
category: r.category || 'general',
|
||||
}))
|
||||
.sort((a, b) => b.score - a.score)
|
||||
.slice(0, Math.min(limit, 50));
|
||||
}
|
||||
|
||||
private calculateScore(result: SearxngResult): number {
|
||||
// Base score from SearXNG
|
||||
let score = result.score || 0.5;
|
||||
|
||||
// Boost for having content
|
||||
if (result.content && result.content.length > 100) {
|
||||
score += 0.1;
|
||||
}
|
||||
|
||||
// Boost for trusted domains
|
||||
const trustedDomains = ['wikipedia.org', 'github.com', 'stackoverflow.com'];
|
||||
if (trustedDomains.some((d) => result.url.includes(d))) {
|
||||
score += 0.15;
|
||||
}
|
||||
|
||||
// Slight penalty for very long URLs (often less useful)
|
||||
if (result.url.length > 200) {
|
||||
score -= 0.05;
|
||||
}
|
||||
|
||||
return Math.min(1, Math.max(0, score));
|
||||
}
|
||||
|
||||
async getEngines(): Promise<string[]> {
|
||||
return this.searxngProvider.getEngines();
|
||||
}
|
||||
|
||||
async healthCheck(): Promise<{ status: string; latency: number }> {
|
||||
return this.searxngProvider.healthCheck();
|
||||
}
|
||||
}
|
||||
25
services/mana-search/tsconfig.json
Normal file
25
services/mana-search/tsconfig.json
Normal file
|
|
@ -0,0 +1,25 @@
|
|||
{
|
||||
"compilerOptions": {
|
||||
"module": "commonjs",
|
||||
"declaration": true,
|
||||
"removeComments": true,
|
||||
"emitDecoratorMetadata": true,
|
||||
"experimentalDecorators": true,
|
||||
"allowSyntheticDefaultImports": true,
|
||||
"target": "ES2022",
|
||||
"sourceMap": true,
|
||||
"outDir": "./dist",
|
||||
"baseUrl": "./",
|
||||
"incremental": true,
|
||||
"skipLibCheck": true,
|
||||
"strictNullChecks": true,
|
||||
"noImplicitAny": true,
|
||||
"strictBindCallApply": true,
|
||||
"forceConsistentCasingInFileNames": true,
|
||||
"noFallthroughCasesInSwitch": true,
|
||||
"esModuleInterop": true,
|
||||
"resolveJsonModule": true
|
||||
},
|
||||
"include": ["src/**/*"],
|
||||
"exclude": ["node_modules", "dist"]
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue