diff --git a/.env.development b/.env.development index 9b95deea9..1bac82b17 100644 --- a/.env.development +++ b/.env.development @@ -178,6 +178,19 @@ MANA_LLM_URL=https://llm.mana.how MANA_LLM_API_KEY= MANA_LLM_DEFAULT_MODEL=gemma3:4b +# mana-research — unified research orchestration (port 3068). Fronts +# search + extract + sync/async research agents behind one API. mana-ai +# calls the service-to-service /api/v1/internal/research/async path +# for cross-tick Deep Research Max jobs. +MANA_RESEARCH_URL=http://localhost:3068 + +# mana-ai deep-research opt-in. When true AND a mission's objective +# matches DEEP_RESEARCH_TRIGGER (see services/mana-ai/src/cron/tick.ts), +# mana-ai submits a gemini-deep-research-max task (~$3–7 / 1500 credits +# per run) and polls across ticks instead of the shallow RSS path. +# Keep this off in dev unless you're actively testing the feature. +MANA_AI_DEEP_RESEARCH_ENABLED=false + # mana-crawler — Go service. Default binary port is 3023 (local dev); # the macmini docker-compose overrides to 3014 internally. Used by the # Kontext URL import endpoint (POST /api/v1/context/import-url) to diff --git a/docker-compose.macmini.yml b/docker-compose.macmini.yml index 6cf2e7a25..a9976d014 100644 --- a/docker-compose.macmini.yml +++ b/docker-compose.macmini.yml @@ -318,6 +318,8 @@ services: condition: service_started mana-api: condition: service_healthy + mana-research: + condition: service_started environment: TZ: Europe/Berlin NODE_ENV: production @@ -325,6 +327,8 @@ services: SYNC_DATABASE_URL: postgresql://postgres:${POSTGRES_PASSWORD:-mana123}@postgres:5432/mana_sync MANA_LLM_URL: http://mana-llm:3020 MANA_API_URL: http://mana-api:3060 + MANA_RESEARCH_URL: http://mana-research:3068 + MANA_AI_DEEP_RESEARCH_ENABLED: ${MANA_AI_DEEP_RESEARCH_ENABLED:-false} MANA_SERVICE_KEY: ${MANA_SERVICE_KEY} TICK_INTERVAL_MS: ${MANA_AI_TICK_INTERVAL_MS:-60000} TICK_ENABLED: ${MANA_AI_TICK_ENABLED:-true} @@ -1666,6 +1670,11 @@ services: MANA_CRAWLER_URL: http://mana-crawler:3014 MANA_LLM_DEFAULT_MODEL: ${MANA_LLM_DEFAULT_MODEL:-gemma3:4b} MANA_SERVICE_KEY: ${MANA_SERVICE_KEY} + # OpenAI — picture module gpt-image-2 path. Optional: without it, + # /api/v1/picture/generate falls through to Replicate/local Flux. + OPENAI_API_KEY: ${OPENAI_API_KEY:-} + # Replicate — fallback for Flux-schnell image generation + REPLICATE_API_TOKEN: ${REPLICATE_API_TOKEN:-} APP_ID: mana-api # Database (used by modules that have server-side state — research, # presi share-links, traces guides). Same Postgres + schema split diff --git a/docs/reports/gemini-deep-research.md b/docs/reports/gemini-deep-research.md new file mode 100644 index 000000000..a5f6e87ea --- /dev/null +++ b/docs/reports/gemini-deep-research.md @@ -0,0 +1,431 @@ +# Gemini 3.1 Pro Deep Research & Deep Research Max + +**Datum:** 2026-04-22 +**Anlass:** Googles Launch am 2026-04-21 — zwei autonome Research-Agenten auf Basis von Gemini 3.1 Pro, verfügbar als Public Preview über die Gemini API. +**Status:** Schritt 1 + 2 geliefert. Schritt 3 (MCP-Server) geplant, nicht implementiert. + +## TL;DR + +Google hat zwei neue Research-Agenten veröffentlicht, die bei uns direkt in die Provider-Landschaft von `mana-research` passen und die Phase-3b-Lücke (`openai-deep-research` als einziger async Agent) auf natürliche Weise ergänzen. Besonderheit: **beide Agenten sprechen Model Context Protocol (MCP)**, wodurch sie potenziell an *unsere* Daten andocken könnten — was in Kombination mit dem existierenden AI-Mission-Key-Grant-System (RSA-wrapped MDK, audit-logged) ein strategisch interessanter Vektor ist. + +Schritt 1 (async Provider) und Schritt 2 (`mana-ai` Cross-Tick-Pre-Research) sind geliefert und hinter einem Opt-in-Flag (`MANA_AI_DEEP_RESEARCH_ENABLED=true`) aktivierbar. Schritt 3 (MCP-Server für unsere verschlüsselten Kontextdaten) ist spezifiziert, aber Security-Review erforderlich vor Rollout. + +## 1. Die Modelle im Detail + +### 1.1 Varianten und Positionierung + +| | **Deep Research** | **Deep Research Max** | +|---|---|---| +| **Model-ID** | `deep-research-preview-04-2026` | `deep-research-max-preview-04-2026` | +| **Zielbild** | Interaktiv, niedrige Latenz, eingebettet in User-Surfaces | Asynchron, nächtliche Cron-Jobs, maximale Tiefe | +| **Typ. Laufzeit** | Minutenbereich (eingebaut in Chat-UIs) | Bis zu 60 min (typ. ~20 min) | +| **Typ. Volumen** | ~80 Suchen, ~250k Input-Tokens, ~60k Output | ~160 Suchen, ~900k Input-Tokens, ~80k Output | +| **Preis (geschätzt)** | $1.00–3.00 pro Task | $3.00–7.00 pro Task | +| **DeepSearchQA** | — | 93.3 % (von 66.1 % im Dez 2025) | +| **Humanity's Last Exam** | — | 54.6 % (von 46.4 %) | + +Beide laufen ausschließlich **async** (`background=true` ist Pflicht, `store=true` erforderlich). Es gibt keinen synchronen Request-Response-Modus wie bei `gemini-grounding`. + +### 1.2 API-Shape — submit + +``` +POST https://generativelanguage.googleapis.com/v1beta/interactions +x-goog-api-key: +Content-Type: application/json + +{ + "agent": "deep-research-preview-04-2026", + "input": "query", + "background": true, + "store": true, + "agent_config": { + "type": "deep-research", + "thinking_summaries": "auto", // Live-Gedanken streamen + "visualization": "auto", // Charts/Infographics inline + "collaborative_planning": false + }, + "tools": [ + { "type": "google_search" }, + { "type": "url_context" }, + { "type": "code_execution" }, + { "type": "file_search" }, + { + "type": "mcp_server", + "name": "mana-kontext", + "url": "https://mcp.mana.how/...", + "headers": { "Authorization": "Bearer …" }, + "allowed_tools": ["search_notes", "search_journal"] + } + ] +} + +→ 200 +{ + "id": "v1_Chd...", + "status": "in_progress", + "role": "agent", + "created": "...", + "agent": "deep-research-preview-04-2026" +} +``` + +### 1.3 API-Shape — poll (completed) + +Die tatsächlich beobachtete Response-Shape (Smoke-Test am 2026-04-22) weicht von der OpenAI-Responses-API deutlich ab. Wir hatten initial OpenAI-Style (`output: [{type:'message', content:[...]}]`) erwartet — die echte Shape sieht so aus: + +``` +GET https://generativelanguage.googleapis.com/v1beta/interactions/{id} + +→ 200 +{ + "id": "v1_Chd...", + "status": "completed", + "outputs": [ // Plural! Flaches Array. + { "type": "thought", + "signature": "...", + "summary": [ // Thought-Summaries als Liste + { "type": "text", "text": "..." } + ] + }, + {}, // Gelegentlich leer → ignorieren + { "type": "text", + "text": "# Hono and Bun...", + "annotations": [ + { "type": "url_citation", + "url": "https://...", + "start_index": 268, + "end_index": 283 + } + ] + }, + { "type": "image", + "mime_type": "image/png", + "data": "" // Charts/Infographics + }, + { "type": "text", "text": "**Sources:** ..." } + ], + "usage": { + "total_tokens": 145268, + "total_input_tokens": 93025, + "total_output_tokens": 7770, + "total_cached_tokens": 16384, // Prompt-Cache + "total_tool_use_tokens": 28371, + "total_thought_tokens": 16102, + "input_tokens_by_modality": [...], + "output_tokens_by_modality": [...] + }, + "role": "agent", + "object": "interaction", + "agent": "deep-research-preview-04-2026" +} +``` + +Wichtig für den Parser: +- **`outputs` (nicht `output`)** ist ein flaches Array mit `type: 'thought' | 'text' | 'image' | null` +- `text` items haben `annotations[]` mit `type: 'url_citation'`, `url`, `start_index`/`end_index` — **kein `title`-Feld**, wir ziehen den Hostname aus der URL +- `image` items tragen base64-codierte Bilder (PNG) als `data` mit `mime_type` +- `thought` items sind die live-gestreamten Reasoning-Summaries — wenn wir nur den finalen Report brauchen, skip +- `usage` nutzt `total_input_tokens` / `total_output_tokens` (nicht `input_tokens` / `output_tokens`) + +### 1.4 Was die Agenten können + +- **Autonom planen** und iterativ nachsuchen, bis sie eine Antwort haben +- **Google Search + URL Context + Code Execution + File Search** parallel oder einzeln; Web-Zugriff lässt sich auch komplett deaktivieren (Pure-MCP-Mode) +- **MCP-Server** als Brücke zu proprietären Datenquellen — Enterprise-Partner sind FactSet, S&P Global, PitchBook +- **Native Visualisierung**: Charts und Infographics inline als HTML oder Nano-Banana +- **Thought-Summaries** als Live-Stream — brauchbar für "Agent denkt gerade…"-UIs +- **Keine** Structured Outputs, **keine** custom Function-Calls (dafür gibt es MCP) + +## 2. Einordnung in unsere Landschaft + +### 2.1 `mana-research` (port 3068) + +Phase 3a liefert vier **synchrone** Agents (`perplexity-sonar`, `gemini-grounding`, `openai-responses`, `claude-web-search`). Phase 3b war ursprünglich nur für `openai-deep-research` (1000 credits) vorgesehen; mit Schritt 1 kommen die beiden Gemini-Async-Provider dazu und verdoppeln die Abdeckung des Max-Tiers. + +### 2.2 `mana-ai` (port 3067) + +v0.6 hatte den RSS-basierten `NewsResearchClient` und den **Pre-Planning-Research-Step** geshipped. Mit Schritt 2 (v0.7) kommt daneben ein zweiter Pfad: wenn eine Mission explizit nach deep research fragt und der Service die entsprechende ENV-Flag gesetzt hat, wird statt RSS eine async Gemini Deep Research Max Task submittet und über Ticks hinweg gepollt. + +## 3. Integrationsplan — Status + +| Schritt | Status | Datum | Details | +|---|---|---|---| +| 1 — Gemini als async Provider in mana-research | ✅ Geliefert | 2026-04-22 | §3.1 | +| 2 — Cross-Tick Deep-Research in mana-ai | ✅ Geliefert | 2026-04-22 | §3.2 | +| 3 — MCP-Server für unsere Daten | ⏳ Spezifiziert | — | §3.3 | + +### 3.1 Schritt 1 — `gemini-deep-research[-max]` als async Provider — ✅ GELIEFERT + +Zwei neue Provider-IDs neben `openai-deep-research` in `mana-research`: + +**Geänderte/neue Dateien:** +- `packages/shared-research/src/ids.ts` — `AGENT_PROVIDER_IDS` erweitert um `gemini-deep-research` + `gemini-deep-research-max` +- `services/mana-research/src/providers/agent/gemini-deep-research.ts` — neuer Provider, submit/poll-Split tier-parametrisiert (`standard`/`max`). Parser nutzt die echte Response-Shape aus §1.3. +- `services/mana-research/src/routes/research.ts` — `/async` POST + `/async/:id` GET dispatchen via `dispatchAsync(providerId, config)`. Default: `openai-deep-research` (backward compatible). +- `services/mana-research/src/routes/internal-research.ts` — **neu** — service-to-service Pendant unter `/api/v1/internal/research/async`, gated durch `X-Service-Key` + `X-User-Id` Header für Credit-Accounting. +- `src/lib/pricing.ts` — 300 / 1500 credits (Standard / Max) +- `src/executor/env-map.ts` + `src/router/auto-route.ts` — beide neue IDs auf `googleGenai`, explizit **nicht** in `AGENT_DEFAULT_ORDER` (sync-Auto-Route überspringt sie; nur `/async` erreichbar) +- `src/routes/providers.ts` — Health-keyMap ergänzt +- `src/index.ts` — internal route gemountet unter `/api/v1/internal/research/*` +- `services/mana-research/API_KEYS.md` + `CLAUDE.md` — dokumentiert + +**Use (user-facing):** +``` +POST /api/v1/research/async +{ "query": "…", "provider": "gemini-deep-research-max" } +→ { taskId, status: "queued", providerId, costCredits: 1500 } + +GET /api/v1/research/async/:taskId +``` + +**Use (service-to-service):** +``` +POST /api/v1/internal/research/async +X-Service-Key: +X-User-Id: +{ "query": "…", "provider": "gemini-deep-research-max" } +``` + +**Verifiziert** mit echtem `GOOGLE_GENAI_API_KEY` am 2026-04-22: submit + poll über Googles Preview-API → HTTP 200 in beiden Richtungen, completed Response korrekt geparst. + +### 3.2 Schritt 2 — `mana-ai` v0.7 Cross-Tick Deep Research — ✅ GELIEFERT + +**Problem:** Max-Tasks laufen bis 60 min. Der mana-ai Tick-Loop läuft alle 60 s. Wir brauchen Cross-Tick-State, um genau einen pending Research-Job pro Mission zu tracken und über mehrere Ticks zu pollen, ohne neu zu submitten. + +**Geänderte/neue Dateien:** +- `services/mana-ai/src/clients/mana-research.ts` — **neu** — HTTP-Client für die internen Async-Endpoints. Graceful-null bei Fehler, damit eine kaputte mana-research den Tick nicht crasht. +- `services/mana-ai/src/db/migrate.ts` — neue Tabelle `mana_ai.mission_research_jobs (user_id, mission_id, task_id, provider_id, submitted_at, last_polled_at)` mit PK `(user_id, mission_id)`. Ein Row = es läuft ein Job. +- `services/mana-ai/src/db/research-jobs.ts` — **neu** — `get/insert/touch/delete`. Nach `completed`/`failed`: DELETE. +- `services/mana-ai/src/cron/tick.ts`: + - neuer `handleDeepResearch(m, sql, config)` mit State-Machine: + - Wenn pending Job existiert → poll + - `queued`/`running` → `'pending'` (skip tick) + - `failed`/`cancelled` → delete, fall through zu shallow + - `completed` → delete + return ResolvedInput + - Kein Job aber `DEEP_RESEARCH_TRIGGER` + `config.deepResearchEnabled` → submit + insert → `'pending'` + - `planOneMission` Rückgabetyp auf Discriminated Union erweitert: `{outcome:'planned'|'skipped'|'failed'}` statt `T | null`, damit skipped-wegen-pending-research nicht als parse-failure gezählt wird + - Shallow RSS-Pfad läuft nur noch, wenn deep weder ein Ergebnis geliefert hat, noch pending ist +- `services/mana-ai/src/config.ts` — `manaResearchUrl` + `deepResearchEnabled` (`MANA_AI_DEEP_RESEARCH_ENABLED`) +- `services/mana-ai/src/metrics.ts` — vier neue Counter: `mana_ai_research_jobs_submitted_total{provider}`, `_completed_total{provider}`, `_failed_total{provider}`, `_pending_skips_total` +- `services/mana-ai/package.json` — `@mana/shared-research` als workspace-dep + `type-check` script +- `docker-compose.macmini.yml` — mana-ai bekommt `MANA_RESEARCH_URL`, `MANA_AI_DEEP_RESEARCH_ENABLED` (default `false`), `depends_on: mana-research` + +**Opt-in-Trigger (streng enger als shallow):** +``` +DEEP_RESEARCH_TRIGGER = /\b(deep research|tiefe recherche|umfassende recherche|hintergrundrecherche|deep dive)\b/i +``` +Zusätzlich per ENV gegated. In Prod default off; ein expliziter Flip erlaubt Rollout nur an uns selbst / Founder-Tier zuerst. + +**Flow:** + +``` + tick N-1: Mission X, objective: "deep research zu Thema Y" + ├ RESEARCH_TRIGGER matched UND DEEP_RESEARCH_TRIGGER matched + ├ config.deepResearchEnabled = true + ├ client.submit(userId, "deep research zu…", "gemini-deep-research-max") + │ → { taskId, status: "queued", costCredits: 1500 } + ├ INSERT INTO mana_ai.mission_research_jobs (task_id, …) + └ skip planner this tick + + tick N, N+1, … während Google den Task ausarbeitet (~20 min): + ├ SELECT * FROM mana_ai.mission_research_jobs WHERE (user_id, mission_id) = … + ├ client.poll(userId, taskId) → { status: "running" } + ├ touchPendingResearchJob() bumpt last_polled_at + └ skip planner this tick + + tick N+k: Max ist fertig + ├ client.poll(userId, taskId) → { status: "completed", result: { answer: {...} } } + ├ DELETE FROM mana_ai.mission_research_jobs WHERE … + ├ resolvedInputs.push({id:"__web-research__", content: formatDeepResearchContext(...)}) + └ Planner läuft mit Deep-Research-Kontext als Input +``` + +**Graceful Degradation:** Wenn `mana-research` down oder der Gemini-Submit 500t, fällt die Mission auf den Shallow-RSS-Pfad zurück. Wenn ein bereits submittierter Job nicht mehr gepollt werden kann, rotiert der Row ab über `touchPendingResearchJob`, bis manuelle Intervention oder ein späterer poll Erfolg hat. + +### 3.3 Schritt 3 — MCP-Server für verschlüsselte Kontextdaten — ⏳ SPEZIFIZIERT + +**Warum der Aufwand:** Der Grund, warum dieses Feature gerade *für uns* strategisch ist: wir haben ein Zero-Knowledge-Crypto-Setup mit per-mission Key-Grants (RSA-OAEP-2048 wrapped MDK, HKDF-Scope-Binding, audit-logged). Das MCP-Pattern kreuzt exakt unsere Stärke (lokale, verschlüsselte Daten) mit Googles Stärke (Deep-Research-Synthese). Positioning: **"Deep Research, das deine Kontextdaten kennt — ohne sie in Google's Trainings-Pipeline zu schicken"**. + +#### 3.3.1 Architektur + +``` + ┌──────────────────┐ ┌──────────────────┐ + │ Gemini DR Max │◀──────│ mana-mcp-server │ (Cloudflare-tunneled, + │ (bei Google) │ MCP │ (neu, port 3069)│ öffentlich erreichbar) + └──────────────────┘ └──────┬───────────┘ + │ Bearer + │ (Mission-ID, TTL-clamped) + ▼ + ┌──────────────────┐ + │ mana-auth │ verifiziert token, + │ │ lädt Mission-Grant, + │ /api/v1/mcp- │ unwrappt MDK + │ token/verify │ + └──────┬───────────┘ + │ + ▼ + ┌──────────────────┐ ┌─────────────────┐ + │ Encrypted-Data │──────▶│ mana_sync DB │ + │ Resolver │ │ (+ RLS) │ + │ (re-use of │ └─────────────────┘ + │ mana-ai's │ + │ encrypted.ts) │ + └──────────────────┘ +``` + +Der neue Service `mana-mcp` übernimmt die Rolle des MCP-Servers für Gemini. Er: + +1. **hört auf eingehende MCP-Requests** von Google (z. B. `tools/call` mit `search_notes`), authentifiziert per **Bearer-Token pro Mission**. Der Token ist kein JWT, sondern eine opaque ID, die in mana-auth auf `{missionId, userId, allowedTools, expiresAt}` aufgelöst wird — analog zum existierenden Mission-Key-Grant. +2. **verifiziert das Token** gegen mana-auth's neue `/api/v1/mcp-token/verify` Route (returned `{userId, missionId, mdk-wrapped, allowedTools}` oder 401). +3. **unwrappt den Mission-Grant** (MDK) via den existierenden `crypto/unwrap-grant.ts`-Code aus mana-ai — wird in ein geteiltes Paket `@mana/mission-grant` gehoben. +4. **liest und entschlüsselt** die relevanten Records aus `mana_sync` via den existierenden `encrypted.ts` Resolver-Pattern (ebenfalls ins gleiche Paket heben). +5. **antwortet in MCP-Shape** mit Titel + Body + URL pro gefundenem Record. + +#### 3.3.2 Tool-Set (Minimal) + +Start mit drei readonly Tools, alle audit-logged nach `mana_ai.decrypt_audit`: + +| Tool | Signatur | Was es macht | +|---|---|---| +| `search_notes` | `(query: string, limit?: number)` | Volltext über entschlüsselte Notizen | +| `search_journal` | `(date_range?: {from,to}, query?: string, limit?: number)` | Journal-Einträge nach Datum + Query | +| `search_kontext` | `(scope: string)` | Kontext-Felder aus dem Interview (bereits strukturiert) | + +Alle drei sind scoped auf die Mission — sie sehen nur Records, die der Mission-Grant in der Allowlist hat. Schreibende Tools bewusst nicht. + +#### 3.3.3 Token-Lifecycle + +1. **Mission-Erstellung (Webapp):** Wenn eine Mission für Deep-Research-Max konfiguriert wird, erzeugt die Webapp zusätzlich zum existierenden Key-Grant einen **MCP-Token** via `POST /api/v1/me/ai-mission-mcp-token` auf mana-auth. Der Endpoint: + - erzeugt eine opaque Bearer-ID (crypto random, 32 bytes) + - speichert sie in `mana_auth.mcp_tokens` mit `{tokenHash, userId, missionId, allowedTools, expiresAt}` + - TTL-clamped [1h, 7d] — kürzer als Key-Grants, weil das Token exakt einen Research-Run abdeckt + +2. **Submit (mana-ai):** Beim Submit der Max-Task wird der MCP-Token als `headers.Authorization` in den `tools.mcp_server`-Block eingebaut, passend zum Request-Shape aus §1.2. + +3. **MCP-Call (mana-mcp):** Jeder eingehende Request wird via mana-auth verifiziert. Bei `expires`/unbekanntem Token → 401. Bei OK → Resolver-Aufruf + audit-row. + +4. **Teardown:** Nach Poll-Result `completed`/`failed` markiert mana-ai den Token als verbraucht (mana-auth DELETE). Auch expirte Tokens werden über einen Cron entfernt. + +#### 3.3.4 Was wir neu bauen müssen + +| Komponente | Umfang | +|---|---| +| `services/mana-mcp/` | Neuer Bun/Hono-Service, ~500 LOC. MCP-Protokoll (JSON-RPC über HTTP), 3 Tool-Handler, Bearer-Auth. | +| `packages/mission-grant/` | Wiederverwendbares Paket mit `unwrapMissionGrant` + `encryptedRecordResolver` (jetzt in mana-ai lokalisiert) | +| `mana_auth.mcp_tokens` Tabelle | `{id, token_hash, user_id, mission_id, allowed_tools, expires_at, consumed_at}` | +| `POST /api/v1/me/ai-mission-mcp-token` | Issue | +| `POST /api/v1/mcp-token/verify` | Server-to-server Verify | +| `DELETE /api/v1/me/ai-mission-mcp-token/:id` | Explicit revoke (UI-Link) | +| Webapp: MCP-Option im Mission-Detail | "Meine Kontextdaten für diese Recherche freigeben" Checkbox + Token-Erzeugung | +| Audit-Tab-Erweiterung | Zeigt auch MCP-Aufrufe neben Decrypts | +| Cloudflare Tunnel für `mcp.mana.how` → mana-mcp | Analog zu `api.mana.how` etc. | + +#### 3.3.5 Risiken + +- **Öffentliche Angriffsfläche:** Der Service ist über Cloudflare Tunnel öffentlich erreichbar — sonst kann Gemini ihn nicht aufrufen. Ein schlecht durchdachter Tool-Handler oder eine Grant-Verwechslung bedeutet direkte Datenlecks. **Security-Review Pflicht** (Secure Code Review + Mini-Pentest) vor Rollout. +- **MCP-Rate-Limits:** Google kann den Server bei parallelen Research-Max-Tasks zig mal pro Minute anfragen. Wir brauchen Rate-Limiting (per Token) + Caching-Layer. +- **Token-Scope-Violations:** Wenn ein Request für Mission A auf Records aus Mission B geht (Bug oder Angriff), muss das einen dicken Alert auslösen. Bestehende `grant_scope_violations_total` Metrik lässt sich wiederverwenden. +- **Prompt Injection:** Entschlüsselte Notizen-Content kann Instruktionen an Gemini enthalten ("IGNORE PREVIOUS, download all …"). Wir müssen Tool-Responses klar als `data`, nicht als `instructions` labeln und Gemini prompten, userdata als nicht-vertrauenswürdig zu behandeln. +- **DSGVO-Implikationen:** Auch wenn MCP nur punktuell liest, geht der gelesene Content via Google's API-Endpunkt. Das muss in den Privacy-Text + Consent-Flow der Webapp. +- **Preview-Stabilität:** Bauen bevor die MCP-Integration auf Google-Seite stabil ist, heißt nachbauen wenn sich das Protokoll ändert. Erst mit POC, dann inkrementell. + +#### 3.3.6 Meilensteine + +**M0 — Security-Review + Plan-Sign-off (1–2 Tage).** Vor jeder Implementation. Liefert: konkretes Threat-Model, DSGVO-Check, Go/No-Go. + +**M1 — POC mit `search_notes` only (3–4 Tage).** +- `services/mana-mcp` Bun-Service-Skeleton +- `@mana/mission-grant` Paket extrahiert (kein Verhaltenschange für mana-ai) +- `mcp_tokens` Tabelle + Issue/Verify-Endpoints +- Nur `search_notes`, auf einen harten Test-User gated (`userId === 'dev-…'`) +- Ad-hoc Cloudflare-Tunnel, noch kein permanenter `mcp.mana.how` + +**M2 — `search_journal` + `search_kontext` + Prod-Tunnel (2 Tage).** Nach M1-Erkenntnissen. + +**M3 — Webapp UX (2–3 Tage).** Mission-Config-Checkbox + Audit-Tab-Erweiterung. + +**M4 — Public Rollout Founder-Tier.** Nach 1 Woche Beta für uns selbst. + +Bewusst linearer Rollout — MCP ist zu Security-sensitiv für parallele Entwicklung. + +## 4. Pricing-Sanity-Check + +Bei angenommenen $3–7 pro Max-Task: eine nächtliche Max-Mission pro aktivem Nutzer entspricht **$90–210/Monat pro Nutzer**. Das sitzt jetzt hinter zwei Gates: + +1. `MANA_AI_DEEP_RESEARCH_ENABLED=true` auf dem Server (default off) +2. `DEEP_RESEARCH_TRIGGER` Regex im Mission-Text (explizite User-Wording) + +Das `mana-credits` 2-Phase-Debit-Modell fängt Per-User-Limits ab, aber wir haben konservativ mit 1500 credits (≈ $15) für Max gepreist statt 700 (≈ $7) — als Puffer. Nach ersten echten Runs nachjustieren in `services/mana-research/src/lib/pricing.ts`. + +## 5. Risiken & offene Fragen (Stand nach Schritt 1+2) + +- **Preview-Status:** Model-IDs enden auf `-preview-04-2026`. Google deprecated solche Varianten typischerweise innerhalb von 6–12 Monaten. Aktuelle Strategie: `modelVersion` liegt als Konstante in `gemini-deep-research.ts` — Upgrade auf GA ist ein 1-Zeilen-Change, kein Refactor. +- **Charts/Infographics (`image`-Items):** Wir parsen sie derzeit nur in `providerRaw` — nicht im `AgentAnswer.answer` String. Follow-up: neues optionales Feld `AgentAnswer.visualizations: Array<{mime, data}>` plus MinIO-Upload für große Bilder. +- **Thought-Summaries:** Gehen heute verloren. Für eine zukünftige Research-Lab-UI wäre Streaming (`stream=true`) + Live-Anzeige der Gedanken ein differenzierendes Feature. +- **Quota:** Public Preview heißt niedrige Rate-Limits. Ersten 2 Wochen nur Founder-Tier. Monitoring via `mana_ai_research_jobs_failed_total{provider}` — sobald Spikes, Rate-Limit-Retry-Logic nachziehen. +- **`collaborative_planning: true`**: Ignoriert. Wäre ein interessanter UX-Modus für eine zukünftige **Webapp**-Research-Lab-UI (Agent fragt vor Start zurück, ob der Plan passt). Irrelevant für den Background-Runner. +- **MCP-Seite (Schritt 3):** siehe §3.3.5 — eigener Risk-Catalog. + +## 6. Betrieb + +### 6.1 Metriken + +``` +# mana-research +research.async_jobs — Tabelle, ein Row pro submit (inkl. finalem result) + +# mana-ai +mana_ai_research_jobs_submitted_total{provider} — Pro Tick submittet +mana_ai_research_jobs_completed_total{provider} — Pro Tick Ergebnisse verfuettert +mana_ai_research_jobs_failed_total{provider} — Pro Tick failed/cancelled +mana_ai_research_jobs_pending_skips_total — Pro Tick skipped (Job läuft noch) +mana_ai_mission_research_jobs — Tabelle, ein Row pro pending Job pro Mission +``` + +### 6.2 Debug-Workflow + +```bash +# Welche Missions haben aktuell einen pending Deep-Research-Job? +docker exec mana-postgres psql -U mana -d mana_sync -c " + SELECT user_id, mission_id, provider_id, + age(now(), submitted_at) AS running_for, + last_polled_at + FROM mana_ai.mission_research_jobs + ORDER BY submitted_at DESC;" + +# Was ist der aktuelle Status upstream? +docker exec mana-postgres psql -U mana -d mana_platform -c " + SELECT id, user_id, provider_id, status, cost_credits, age(now(), created_at) AS age + FROM research.async_jobs + ORDER BY created_at DESC LIMIT 20;" +``` + +### 6.3 Notfall-Kill-Switch + +`MANA_AI_DEEP_RESEARCH_ENABLED=false` + Service-Restart → keine neuen Jobs. Bereits pending Jobs laufen zu Ende (werden brav gepollt bis fertig), keine neuen kommen dazu. + +Härter: direkte DB-Bereinigung — + +```sql +DELETE FROM mana_ai.mission_research_jobs WHERE submitted_at < now() - interval '2 hours'; +``` + +Die upstream-Tasks bleiben bei Google, aber wir lassen sie einfach laufen (Google berechnet sie trotzdem — aber das ist der Compute-Kosten-Sunk-Cost, nicht der Hebel). + +## 7. Empfehlung + +1. **Jetzt (erledigt, 2026-04-22):** Schritt 1 + 2 umgesetzt. Ein Pilot mit uns selbst (einem Test-User mit Founder-Tier und expliziter "deep research" Mission) kann ab sofort laufen. `MANA_AI_DEEP_RESEARCH_ENABLED=true` auf dem Mac-Mini setzen und eine Nightly-Mission anlegen. +2. **Nach 1 Woche Pilot:** Wenn Response-Qualität + Latenz überzeugen und kein größerer Parser-Fail auftritt → Founder-Tier öffnen (ENV-Flag bleibt, Opt-in per Mission-Wording). +3. **Nach 2 Wochen Pilot + Beta-Tier-Öffnung:** Schritt 3 (MCP-Server) M0-M1 starten — POC mit `search_notes` only + harter User-Gate. +4. **Nach Erfolg von M1:** M2-M4 iterativ. Public Rollout erst mit kompletter Audit-UX + dokumentiertem Privacy-Flow. + +## Quellen + +- [Google Blog — Deep Research Max: a step change for autonomous research agents](https://blog.google/innovation-and-ai/models-and-research/gemini-models/next-generation-gemini-deep-research/) +- [Google AI for Developers — Gemini Deep Research Agent docs](https://ai.google.dev/gemini-api/docs/deep-research) +- [Google AI for Developers — Gemini API changelog](https://ai.google.dev/gemini-api/docs/changelog) +- [VentureBeat — Google's new Deep Research and Deep Research Max agents can search the web and your private data](https://venturebeat.com/technology/googles-new-deep-research-and-deep-research-max-agents-can-search-the-web-and-your-private-data) +- [The Decoder — Google launches Deep Research and Deep Research Max agents to automate complex research](https://the-decoder.com/google-launches-deep-research-and-deep-research-max-agents-to-automate-complex-research/) +- [Testing Catalog — Google debuts Deep Research agents on AI Studio and APIs](https://www.testingcatalog.com/google-debuts-deep-research-agents-on-ai-studio-and-apis/) +- [H2S Media — Google Launches Deep Research Max, Its Most Powerful Autonomous Research Agent](https://www.how2shout.com/news/google-deep-research-max-gemini-api-autonomous-agent.html) +- [Model Context Protocol Specification (für Schritt 3)](https://modelcontextprotocol.io/specification) diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 65283fe2c..73b0fd234 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -2538,6 +2538,9 @@ importers: '@mana/shared-hono': specifier: workspace:* version: link:../../packages/shared-hono + '@mana/shared-research': + specifier: workspace:* + version: link:../../packages/shared-research '@opentelemetry/api': specifier: ^1.9.0 version: 1.9.1 diff --git a/services/mana-ai/CLAUDE.md b/services/mana-ai/CLAUDE.md index 1180aa9bc..3675bca56 100644 --- a/services/mana-ai/CLAUDE.md +++ b/services/mana-ai/CLAUDE.md @@ -75,6 +75,28 @@ Der Runner wird agent-bewusst — Missionen gehoeren einem benannten Agent, Poli - [x] `filterToolsByAgentPolicy` schneidet `deny`-Tools raus bevor der Planner sie sieht. - [x] Metrik `mana_ai_agent_decisions_total{decision}`. +## Status: v0.7 (Cross-Tick Deep Research, 2026-04-22) + +Opt-in asynchroner Deep-Research-Pfad für Missions, die explizit tiefe Recherche wollen. Ruft `mana-research`'s neue Gemini-Deep-Research-Max-Provider (`gemini-deep-research` / `gemini-deep-research-max`) über den internen Service-to-Service-Endpunkt `/api/v1/internal/research/async` auf. Weil Max bis zu 60 min läuft und unser Tick 60 s, läuft das über Ticks hinweg. + +- [x] `ManaResearchClient` (`clients/mana-research.ts`) — HTTP-Client für mana-research's interne async-Endpoints. `X-Service-Key` + `X-User-Id`. Graceful-null bei Fehler. +- [x] `mana_ai.mission_research_jobs` Tabelle — ein Row pro pending Job pro Mission, PK `(user_id, mission_id)`. Präsenz = "läuft gerade". Nach `completed`/`failed` wird gelöscht. +- [x] Cross-Tick State-Machine in `cron/tick.ts` (`handleDeepResearch`): + - Pending Job → poll → `queued`/`running` skip, `completed` inject Result, `failed` fall-through zu Shallow + - Kein Job + `DEEP_RESEARCH_TRIGGER` + `config.deepResearchEnabled` → submit + insert → skip +- [x] Neuer Trigger `DEEP_RESEARCH_TRIGGER` ist **strenger** als der heutige `RESEARCH_TRIGGER` — matcht nur "deep research", "tiefe recherche", "umfassende recherche", "hintergrundrecherche", "deep dive". Zusätzlich per ENV gegated (`MANA_AI_DEEP_RESEARCH_ENABLED=true`, default off). +- [x] `planOneMission` Rückgabetyp ist jetzt eine Discriminated Union `{outcome:'planned'|'skipped'|'failed'}`. `'skipped'` (= research pending) wird **nicht** als parse-failure gezählt. +- [x] Metriken: `mana_ai_research_jobs_submitted_total{provider}`, `_completed_total{provider}`, `_failed_total{provider}`, `_pending_skips_total`. +- [x] Docker-Compose: `MANA_RESEARCH_URL`, `MANA_AI_DEEP_RESEARCH_ENABLED`, `depends_on: mana-research`. +- [x] `@mana/shared-research` als workspace-dep + `type-check` script in `package.json`. + +Bewusst nicht gemacht (offen): +- Mission-Config-Flag in der Webapp. Trigger ist heute Regex-basiert, nicht explizit konfigurierbar. Das reicht für den Pilot; wenn wir öffnen, brauchen wir eine UI-Checkbox im Mission-Detail. +- Image-Output (`charts`, Nano-Banana). Steckt in `providerRaw`, wird nicht im Answer-Text gerendert. +- Streaming-Thought-Summaries. Würde eine eigene SSE-Brücke zum Frontend brauchen. + +Details zum Deep-Research-Flow: [`docs/reports/gemini-deep-research.md`](../../docs/reports/gemini-deep-research.md) §3.2. + ## Status: v0.6 (Server-side Web-Research + erweiterte Tools) Der Runner kann jetzt vor dem Planner-Call eigenstaendig Web-Recherche ausfuehren (ohne Browser). Serverseitig werden 31 propose-Tools ueber 16 Module vom Planner vorgeschlagen (auto-Tools laufen ausschliesslich in der Webapp-Reasoning-Loop — der Server sieht nur propose). @@ -130,6 +152,9 @@ curl -X POST -H "X-Service-Key: dev-service-key" http://localhost:3067/internal/ PORT=3067 SYNC_DATABASE_URL=postgresql://mana:devpassword@localhost:5432/mana_sync MANA_LLM_URL=http://localhost:3020 +MANA_API_URL=http://localhost:3060 # news-research (RSS, shallow) +MANA_RESEARCH_URL=http://localhost:3068 # gemini-deep-research (deep, v0.7+) +MANA_AI_DEEP_RESEARCH_ENABLED=false # opt-in gate for Max tasks MANA_SERVICE_KEY=dev-service-key TICK_INTERVAL_MS=60000 TICK_ENABLED=true # flip to false to boot HTTP-only (for Docker health-check) @@ -217,11 +242,23 @@ services/mana-ai/ ├── src/ │ ├── index.ts — Hono bootstrap + tick scheduler wiring │ ├── config.ts — Env loading -│ ├── cron/tick.ts — Scan loop, overlap-guarded +│ ├── cron/tick.ts — Scan loop, overlap-guarded. v0.7: cross-tick +│ │ deep-research state machine in +│ │ handleDeepResearch() +│ ├── clients/ +│ │ └── mana-research.ts — v0.7: HTTP client for mana-research's +│ │ internal /research/async endpoints │ ├── db/ │ │ ├── connection.ts — postgres.js pool -│ │ └── missions-projection.ts — sync_changes → Mission LWW replay -│ ├── planner/client.ts — mana-llm HTTP client (OpenAI-compatible) +│ │ ├── migrate.ts — schema bootstrap (mission_snapshots, +│ │ │ decrypt_audit, agent_snapshots, +│ │ │ token_usage, mission_research_jobs) +│ │ ├── missions-projection.ts — sync_changes → Mission LWW replay +│ │ └── research-jobs.ts — v0.7: CRUD for mission_research_jobs +│ ├── planner/ +│ │ ├── llm-client.ts — mana-llm HTTP client (OpenAI-compatible) +│ │ └── news-research-client.ts — mana-api RSS-based news-research +│ │ (shallow pre-planning step) │ └── middleware/service-auth.ts — X-Service-Key gate for /internal/* ├── Dockerfile ├── package.json diff --git a/services/mana-ai/package.json b/services/mana-ai/package.json index a2eab0600..5b7df1e99 100644 --- a/services/mana-ai/package.json +++ b/services/mana-ai/package.json @@ -6,11 +6,13 @@ "scripts": { "dev": "bun run --watch src/index.ts", "start": "bun run src/index.ts", - "test": "bun test" + "test": "bun test", + "type-check": "tsc --noEmit" }, "dependencies": { "@mana/shared-ai": "workspace:*", "@mana/shared-hono": "workspace:*", + "@mana/shared-research": "workspace:*", "@opentelemetry/api": "^1.9.0", "@opentelemetry/exporter-trace-otlp-http": "^0.57.0", "@opentelemetry/resources": "^1.30.0", diff --git a/services/mana-ai/src/clients/mana-research.ts b/services/mana-ai/src/clients/mana-research.ts new file mode 100644 index 000000000..c2d5c46af --- /dev/null +++ b/services/mana-ai/src/clients/mana-research.ts @@ -0,0 +1,110 @@ +/** + * HTTP client for mana-research's internal service-to-service endpoints. + * + * Used by the deep-research pre-planning step in the tick loop. We + * submit the long-running research task on behalf of the mission's + * owner, then poll on the next tick until the job is complete. Credits + * are reserved / committed against the user by mana-research — this + * client is a thin HTTP wrapper. + * + * Endpoints: + * POST /api/v1/internal/research/async — submit { query, provider } + * GET /api/v1/internal/research/async/:id — poll + * + * Auth: X-Service-Key on every call; X-User-Id identifies the owner of + * the credit wallet the reservation + commit hit. + * + * All methods return `null` on transport/parse errors rather than + * throwing — a broken mana-research must not crash the tick loop. + */ + +import type { AgentAnswer } from '@mana/shared-research'; + +export type DeepResearchProvider = + | 'openai-deep-research' + | 'gemini-deep-research' + | 'gemini-deep-research-max'; + +export interface SubmitResult { + taskId: string; + status: 'queued' | 'running'; + providerId: DeepResearchProvider; + costCredits: number; +} + +export type PollStatus = 'queued' | 'running' | 'completed' | 'failed' | 'cancelled'; + +export interface PollResult { + taskId: string; + status: PollStatus; + providerId: DeepResearchProvider; + result?: { answer: AgentAnswer }; + error?: string; +} + +export class ManaResearchClient { + constructor( + private baseUrl: string, + private serviceKey: string + ) {} + + async submit( + userId: string, + query: string, + provider: DeepResearchProvider + ): Promise { + try { + const res = await fetch(`${this.baseUrl}/api/v1/internal/research/async`, { + method: 'POST', + headers: { + 'Content-Type': 'application/json', + 'X-Service-Key': this.serviceKey, + 'X-User-Id': userId, + 'X-App-Id': 'mana-ai', + }, + body: JSON.stringify({ query, provider }), + signal: AbortSignal.timeout(30_000), + }); + if (!res.ok) { + const body = await res.text().catch(() => ''); + console.warn(`[mana-research-client] submit ${res.status}: ${body.slice(0, 200)}`); + return null; + } + return (await res.json()) as SubmitResult; + } catch (err) { + console.warn( + '[mana-research-client] submit error:', + err instanceof Error ? err.message : String(err) + ); + return null; + } + } + + async poll(userId: string, taskId: string): Promise { + try { + const res = await fetch( + `${this.baseUrl}/api/v1/internal/research/async/${encodeURIComponent(taskId)}`, + { + headers: { + 'X-Service-Key': this.serviceKey, + 'X-User-Id': userId, + 'X-App-Id': 'mana-ai', + }, + signal: AbortSignal.timeout(15_000), + } + ); + if (!res.ok) { + const body = await res.text().catch(() => ''); + console.warn(`[mana-research-client] poll ${res.status}: ${body.slice(0, 200)}`); + return null; + } + return (await res.json()) as PollResult; + } catch (err) { + console.warn( + '[mana-research-client] poll error:', + err instanceof Error ? err.message : String(err) + ); + return null; + } + } +} diff --git a/services/mana-ai/src/config.ts b/services/mana-ai/src/config.ts index e4ee3830a..4f3189cd4 100644 --- a/services/mana-ai/src/config.ts +++ b/services/mana-ai/src/config.ts @@ -17,6 +17,14 @@ export interface Config { * research step to feed web-research context into the planner prompt * before it produces plan steps. */ manaApiUrl: string; + /** mana-research HTTP endpoint (Hono/Bun, port 3068). Hosts the + * async-research submit/poll endpoints that the deep-research pre- + * planning path delegates to for multi-minute Gemini tasks. */ + manaResearchUrl: string; + /** Opt-in gate for the deep-research pre-planning path. Default off + * — deep runs cost $1–7 per mission, so we only want them triggered + * when explicitly enabled on the server. */ + deepResearchEnabled: boolean; /** Shared key for service-to-service calls. */ serviceKey: string; /** How often the background tick scans for due Missions, in ms. */ @@ -55,6 +63,8 @@ export function loadConfig(): Config { ), manaLlmUrl: requireEnv('MANA_LLM_URL', 'http://localhost:3020'), manaApiUrl: requireEnv('MANA_API_URL', 'http://localhost:3060'), + manaResearchUrl: requireEnv('MANA_RESEARCH_URL', 'http://localhost:3068'), + deepResearchEnabled: process.env.MANA_AI_DEEP_RESEARCH_ENABLED === 'true', serviceKey: requireEnv('MANA_SERVICE_KEY', 'dev-service-key'), tickIntervalMs: parseInt(process.env.TICK_INTERVAL_MS ?? '60000', 10), tickEnabled: process.env.TICK_ENABLED !== 'false', diff --git a/services/mana-ai/src/cron/tick.ts b/services/mana-ai/src/cron/tick.ts index d62ffa24f..3a3564c3d 100644 --- a/services/mana-ai/src/cron/tick.ts +++ b/services/mana-ai/src/cron/tick.ts @@ -49,6 +49,19 @@ import { } from '../metrics'; import { unwrapMissionGrant } from '../crypto/unwrap-grant'; import { NewsResearchClient } from '../planner/news-research-client'; +import { ManaResearchClient, type DeepResearchProvider } from '../clients/mana-research'; +import { + deletePendingResearchJob, + getPendingResearchJob, + insertPendingResearchJob, + touchPendingResearchJob, +} from '../db/research-jobs'; +import { + researchJobsSubmittedTotal, + researchJobsCompletedTotal, + researchJobsFailedTotal, + researchJobsPendingSkipsTotal, +} from '../metrics'; import type { ResolverContext } from '../db/resolvers/types'; import type { Config } from '../config'; import { withSpan } from '../tracing'; @@ -61,6 +74,15 @@ const ENC_PREFIX = 'enc:1:'; const RESEARCH_TRIGGER = /\b(recherchier|research|news|finde|suche|aktuelle|neueste|today|history|historisch|on this day)/i; +/** Strict opt-in for the expensive async deep-research path (Gemini + * Deep Research Max, ~$3–7 per task). Only matches explicit wording + * so users must deliberately ask for it in the mission objective. + * Gated further by `config.deepResearchEnabled` at the tick level. */ +const DEEP_RESEARCH_TRIGGER = + /\b(deep research|tiefe recherche|umfassende recherche|hintergrundrecherche|deep dive)\b/i; + +const DEEP_RESEARCH_PROVIDER: DeepResearchProvider = 'gemini-deep-research-max'; + /** True when the value looks like the webapp's AES-GCM wire format. */ function isCiphertext(value: string | undefined): value is string { return typeof value === 'string' && value.startsWith(ENC_PREFIX); @@ -198,7 +220,13 @@ export async function runTickOnce(config: Config): Promise { }, () => planOneMission(m, llm, sql, agent, config) ); - if (planResult === null) { + if (planResult.outcome === 'skipped') { + // Deep-research job still running — pick this mission + // back up on the next tick. No plan produced, no + // parse-failure accounting. + continue; + } + if (planResult.outcome === 'failed') { parseFailures++; parseFailuresTotal.inc(); continue; @@ -270,13 +298,18 @@ export async function runTickOnce(config: Config): Promise { * (see planner/tools.ts) to keep the LLM from fabricating "read * results". */ +type PlanMissionOutcome = + | { outcome: 'planned'; plan: { summary: string; steps: PlannedStep[] }; tokensUsed: number } + | { outcome: 'skipped'; reason: 'research-pending' } + | { outcome: 'failed' }; + async function planOneMission( m: ServerMission, llm: ReturnType, sql: Sql, agent: ServerAgent | null, config: Config -): Promise<{ plan: { summary: string; steps: PlannedStep[] }; tokensUsed: number } | null> { +): Promise { const mission = serverMissionToSharedMission(m); // Resolve the mission's Key-Grant (if any) once per tick. An absent // grant is NOT an error — plaintext missions (goals-only) run fine @@ -286,8 +319,28 @@ async function planOneMission( const context = await buildResolverContext(m); const resolvedInputs = await resolveServerInputs(sql, m.inputs, m.userId, context); - // Pre-planning research step (unchanged from pre-migration). - if (RESEARCH_TRIGGER.test(m.objective) || RESEARCH_TRIGGER.test(m.conceptMarkdown)) { + // ─── Deep research pre-planning (opt-in, cross-tick) ───────── + // A pending job means a previous tick submitted an async research + // task; we poll here. A completed result is injected as a + // ResolvedInput and the plan proceeds normally; queued/running + // means we bail this tick and try again next time. No pending job + // + the opt-in trigger fires → we submit and bail. + const deepInput = await handleDeepResearch(m, sql, config); + if (deepInput === 'pending') { + return { outcome: 'skipped', reason: 'research-pending' }; + } + if (deepInput) { + resolvedInputs.push(deepInput); + } + + // Shallow pre-planning research step (RSS-based, synchronous). We + // still run this when deep research didn't fire — same behaviour + // as before. Skipped when deep research already supplied a + // __web-research__ block so we don't double-feed the planner. + if ( + !deepInput && + (RESEARCH_TRIGGER.test(m.objective) || RESEARCH_TRIGGER.test(m.conceptMarkdown)) + ) { const nrc = new NewsResearchClient(config.manaApiUrl); const research = await nrc.research(m.objective, { language: 'de', limit: 8 }); if (research) { @@ -352,6 +405,7 @@ async function planOneMission( } return { + outcome: 'planned', plan: { summary: loopResult.summary ?? '', steps: loopResult.executedCalls.map((ec) => ({ @@ -370,8 +424,122 @@ async function planOneMission( providerErrorsTotal.inc({ provider, kind: err.kind }); } console.warn(`[mana-ai tick] mission=${m.id} planner loop failed: ${msg}`); + return { outcome: 'failed' }; + } +} + +/** + * Cross-tick state machine for the deep-research pre-planning path. + * + * Return value: + * - `'pending'`: a job is currently queued/running upstream; caller + * must skip this mission for this tick. + * - a ResolvedInput: a job just completed, feed it into the planner. + * - `null`: no deep-research involvement — fall through to the + * existing shallow path. + */ +async function handleDeepResearch( + m: ServerMission, + sql: Sql, + config: Config +): Promise< + 'pending' | { id: string; module: string; table: string; title: string; content: string } | null +> { + const client = new ManaResearchClient(config.manaResearchUrl, config.serviceKey); + const existing = await getPendingResearchJob(sql, m.userId, m.id); + + if (existing) { + const poll = await client.poll(m.userId, existing.taskId); + if (!poll) { + // Transport failure — keep the job around, try again next tick. + await touchPendingResearchJob(sql, m.userId, m.id); + researchJobsPendingSkipsTotal.inc(); + return 'pending'; + } + + if (poll.status === 'queued' || poll.status === 'running') { + await touchPendingResearchJob(sql, m.userId, m.id); + researchJobsPendingSkipsTotal.inc(); + return 'pending'; + } + + if (poll.status === 'failed' || poll.status === 'cancelled') { + await deletePendingResearchJob(sql, m.userId, m.id); + researchJobsFailedTotal.inc({ provider: existing.providerId }); + console.warn( + `[mana-ai tick] mission=${m.id} deep-research failed (${existing.providerId}): ${poll.error ?? poll.status}` + ); + // Fall through to shallow pre-planning this tick. + return null; + } + + // completed + await deletePendingResearchJob(sql, m.userId, m.id); + researchJobsCompletedTotal.inc({ provider: existing.providerId }); + const answer = poll.result?.answer; + if (!answer || !answer.answer) { + console.warn(`[mana-ai tick] mission=${m.id} deep-research completed without body`); + return null; + } + console.log( + `[mana-ai tick] mission=${m.id} deep-research done (${existing.providerId}): ` + + `${answer.citations.length} citations, ${answer.answer.length} chars` + ); + return { + id: '__web-research__', + module: 'news-research', + table: 'web', + title: `Deep Research: "${m.objective.slice(0, 60)}"`, + content: formatDeepResearchContext(m.objective, answer), + }; + } + + // No existing job. Do we want to submit one? + if (!config.deepResearchEnabled) return null; + if (!DEEP_RESEARCH_TRIGGER.test(m.objective) && !DEEP_RESEARCH_TRIGGER.test(m.conceptMarkdown)) { return null; } + + const submission = await client.submit(m.userId, m.objective, DEEP_RESEARCH_PROVIDER); + if (!submission) { + // Submit failed — fall through to shallow so the mission still runs. + console.warn( + `[mana-ai tick] mission=${m.id} deep-research submit failed, falling back to shallow` + ); + return null; + } + await insertPendingResearchJob(sql, m.userId, m.id, submission.taskId, submission.providerId); + researchJobsSubmittedTotal.inc({ provider: submission.providerId }); + researchJobsPendingSkipsTotal.inc(); + console.log( + `[mana-ai tick] mission=${m.id} deep-research submitted ` + + `(${submission.providerId}, task=${submission.taskId.slice(0, 16)}…, ${submission.costCredits}c)` + ); + return 'pending'; +} + +/** + * Render the deep-research answer into the same markdown-shape the + * shallow pre-research step produces, so downstream planner prompts + * don't need to distinguish the two sources. + */ +function formatDeepResearchContext( + query: string, + answer: import('@mana/shared-research').AgentAnswer +): string { + const lines: string[] = [`# Deep-Research: "${query}"`, '', answer.answer.trim(), '']; + if (answer.citations.length > 0) { + lines.push('## Quellen'); + for (const c of answer.citations) { + lines.push(`- [${c.title}](${c.url})${c.snippet ? ` — ${c.snippet}` : ''}`); + } + lines.push(''); + } + lines.push( + '---', + 'Nutze diese Quellen fuer deinen Plan. Verwende nur URLs die oben stehen; erfinde keine.' + ); + return lines.join('\n'); } /** Parse provider name off a `provider/model` string. Used purely for diff --git a/services/mana-ai/src/db/migrate.ts b/services/mana-ai/src/db/migrate.ts index 7d38a63af..9b290d039 100644 --- a/services/mana-ai/src/db/migrate.ts +++ b/services/mana-ai/src/db/migrate.ts @@ -119,6 +119,30 @@ export async function migrate(sql: Sql): Promise { WHERE record->>'state' = 'active' `; + // ─── Pending deep-research jobs ────────────────────────────── + // When a mission's pre-planning step kicks off a long-running + // research task on mana-research (gemini-deep-research[-max] or + // openai-deep-research), we record it here so the NEXT tick knows + // to poll instead of re-submitting. One row per (user, mission) + // while the job is outstanding; the row is DELETED as soon as the + // job completes / fails, so presence == "a job is pending". + await sql` + CREATE TABLE IF NOT EXISTS mana_ai.mission_research_jobs ( + user_id TEXT NOT NULL, + mission_id TEXT NOT NULL, + task_id TEXT NOT NULL, + provider_id TEXT NOT NULL, + submitted_at TIMESTAMPTZ NOT NULL DEFAULT now(), + last_polled_at TIMESTAMPTZ, + PRIMARY KEY (user_id, mission_id) + ) + `; + + await sql` + CREATE INDEX IF NOT EXISTS idx_mission_research_jobs_user + ON mana_ai.mission_research_jobs (user_id, submitted_at DESC) + `; + // ─── Token usage tracking (Budget Enforcement) ────────────── // Append-only log of token consumption per planner call. The tick // loop queries the rolling 24h window to enforce Agent.maxTokensPerDay. diff --git a/services/mana-ai/src/db/research-jobs.ts b/services/mana-ai/src/db/research-jobs.ts new file mode 100644 index 000000000..5bfde87ad --- /dev/null +++ b/services/mana-ai/src/db/research-jobs.ts @@ -0,0 +1,103 @@ +/** + * Pending deep-research jobs — bookkeeping for the cross-tick state + * machine in the mission planner. + * + * Lifecycle: + * tick N: mission triggers deep research → submit via mana-research + * → insert row { taskId, providerId, submitted_at } + * → skip planner this tick (result not ready) + * tick N+k: row present → poll via mana-research + * → if still running: touch last_polled_at, skip planner + * → if completed: read result, DELETE row, feed planner + * → if failed: DELETE row (mission goes through normal + * shallow path next tick) + * + * Storage only tracks the pending phase. Finished results are consumed + * immediately by the tick that sees them — no persistence beyond the + * resulting iteration written back to sync_changes. + */ + +import type { Sql } from './connection'; + +export interface PendingResearchJob { + userId: string; + missionId: string; + taskId: string; + providerId: string; + submittedAt: Date; + lastPolledAt: Date | null; +} + +export async function getPendingResearchJob( + sql: Sql, + userId: string, + missionId: string +): Promise { + const rows = await sql< + { + user_id: string; + mission_id: string; + task_id: string; + provider_id: string; + submitted_at: Date; + last_polled_at: Date | null; + }[] + >` + SELECT user_id, mission_id, task_id, provider_id, submitted_at, last_polled_at + FROM mana_ai.mission_research_jobs + WHERE user_id = ${userId} AND mission_id = ${missionId} + LIMIT 1 + `; + if (rows.length === 0) return null; + const r = rows[0]; + return { + userId: r.user_id, + missionId: r.mission_id, + taskId: r.task_id, + providerId: r.provider_id, + submittedAt: r.submitted_at, + lastPolledAt: r.last_polled_at, + }; +} + +export async function insertPendingResearchJob( + sql: Sql, + userId: string, + missionId: string, + taskId: string, + providerId: string +): Promise { + await sql` + INSERT INTO mana_ai.mission_research_jobs + (user_id, mission_id, task_id, provider_id) + VALUES (${userId}, ${missionId}, ${taskId}, ${providerId}) + ON CONFLICT (user_id, mission_id) DO UPDATE SET + task_id = EXCLUDED.task_id, + provider_id = EXCLUDED.provider_id, + submitted_at = now(), + last_polled_at = NULL + `; +} + +export async function touchPendingResearchJob( + sql: Sql, + userId: string, + missionId: string +): Promise { + await sql` + UPDATE mana_ai.mission_research_jobs + SET last_polled_at = now() + WHERE user_id = ${userId} AND mission_id = ${missionId} + `; +} + +export async function deletePendingResearchJob( + sql: Sql, + userId: string, + missionId: string +): Promise { + await sql` + DELETE FROM mana_ai.mission_research_jobs + WHERE user_id = ${userId} AND mission_id = ${missionId} + `; +} diff --git a/services/mana-ai/src/metrics.ts b/services/mana-ai/src/metrics.ts index 5520e38da..7ed3fe84f 100644 --- a/services/mana-ai/src/metrics.ts +++ b/services/mana-ai/src/metrics.ts @@ -72,6 +72,35 @@ export const missionErrorsTotal = new Counter({ registers: [register], }); +// ── Deep research (async cross-tick pre-planning) ───────── + +export const researchJobsSubmittedTotal = new Counter({ + name: 'mana_ai_research_jobs_submitted_total', + help: 'Deep-research jobs submitted to mana-research (per tick).', + labelNames: ['provider'] as const, + registers: [register], +}); + +export const researchJobsCompletedTotal = new Counter({ + name: 'mana_ai_research_jobs_completed_total', + help: 'Deep-research jobs that returned completed results to the planner.', + labelNames: ['provider'] as const, + registers: [register], +}); + +export const researchJobsFailedTotal = new Counter({ + name: 'mana_ai_research_jobs_failed_total', + help: 'Deep-research jobs that returned failed/cancelled/timeout.', + labelNames: ['provider'] as const, + registers: [register], +}); + +export const researchJobsPendingSkipsTotal = new Counter({ + name: 'mana_ai_research_jobs_pending_skips_total', + help: 'Tick iterations skipped because a deep-research job is still running.', + registers: [register], +}); + export const plannerLatency = new Histogram({ name: 'mana_ai_planner_request_duration_seconds', help: 'Latency of calls to the mana-llm backend.',