mirror of
https://github.com/Memo-2023/mana-monorepo.git
synced 2026-05-19 06:01:24 +02:00
Add distributed tracing to the mana-ai background runner so mission
execution can be visualized end-to-end in Grafana.
Instrumentation (services/mana-ai/):
- tracing.ts: OTel provider setup with OTLP/HTTP exporter, withSpan() helper
- tick.ts: tick.planMission span with mission/agent/user attributes
- client.ts: planner.complete span with LLM model, tokens, latency
Infrastructure:
- docker/tempo/tempo.yaml: Grafana Tempo config (OTLP HTTP on 4318)
- docker-compose: tempo service + tempo_data volume + mana-ai env var
- docker/grafana/provisioning/datasources/tempo.yml: auto-provisioned
Trace flow:
tick.planMission (root span)
└── planner.complete (child span)
├── llm.model = "gpt-4o-mini"
├── llm.tokens.total = 1234
└── llm.response.length = 567
Enable: set OTEL_EXPORTER_OTLP_ENDPOINT=http://localhost:4318
View: Grafana → Explore → Tempo datasource
Also fixes: removed broken @mana/subscriptions workspace ref from arcade.
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
96 lines
2.9 KiB
TypeScript
96 lines
2.9 KiB
TypeScript
/**
|
|
* OpenTelemetry tracing setup for mana-ai.
|
|
*
|
|
* Exports a tracer and initializes the trace provider on first import.
|
|
* Traces are exported via OTLP/HTTP to Grafana Tempo (or any
|
|
* OTLP-compatible backend). When no backend is configured
|
|
* (OTEL_EXPORTER_OTLP_ENDPOINT not set), tracing is a no-op.
|
|
*
|
|
* Usage in service code:
|
|
* import { tracer } from '../tracing';
|
|
* const span = tracer.startSpan('tick.planOneMission');
|
|
* try { ... } finally { span.end(); }
|
|
*
|
|
* Or with the helper:
|
|
* import { withSpan } from '../tracing';
|
|
* const result = await withSpan('tick.planOneMission', { missionId }, async (span) => {
|
|
* // ... your code
|
|
* });
|
|
*/
|
|
|
|
import {
|
|
trace,
|
|
SpanStatusCode,
|
|
type Span,
|
|
type Tracer,
|
|
type SpanOptions,
|
|
} from '@opentelemetry/api';
|
|
import {
|
|
BasicTracerProvider,
|
|
SimpleSpanProcessor,
|
|
BatchSpanProcessor,
|
|
} from '@opentelemetry/sdk-trace-base';
|
|
import { OTLPTraceExporter } from '@opentelemetry/exporter-trace-otlp-http';
|
|
import { Resource } from '@opentelemetry/resources';
|
|
import { ATTR_SERVICE_NAME, ATTR_SERVICE_VERSION } from '@opentelemetry/semantic-conventions';
|
|
|
|
const OTEL_ENDPOINT = process.env.OTEL_EXPORTER_OTLP_ENDPOINT;
|
|
|
|
// Initialize provider once on module load
|
|
if (OTEL_ENDPOINT) {
|
|
const resource = new Resource({
|
|
[ATTR_SERVICE_NAME]: 'mana-ai',
|
|
[ATTR_SERVICE_VERSION]: '0.6.0',
|
|
});
|
|
|
|
const exporter = new OTLPTraceExporter({
|
|
url: `${OTEL_ENDPOINT}/v1/traces`,
|
|
});
|
|
|
|
const provider = new BasicTracerProvider({ resource });
|
|
// Use batch in production (less overhead), simple in dev (immediate export)
|
|
const isDev = process.env.NODE_ENV === 'development';
|
|
provider.addSpanProcessor(
|
|
isDev ? new SimpleSpanProcessor(exporter) : new BatchSpanProcessor(exporter)
|
|
);
|
|
provider.register();
|
|
|
|
console.log(`[mana-ai] OTel tracing enabled → ${OTEL_ENDPOINT}/v1/traces`);
|
|
} else {
|
|
console.log('[mana-ai] OTel tracing disabled (set OTEL_EXPORTER_OTLP_ENDPOINT to enable)');
|
|
}
|
|
|
|
/** The mana-ai tracer instance. When OTel is not configured, all
|
|
* operations are no-ops (the API guarantees this). */
|
|
export const tracer: Tracer = trace.getTracer('mana-ai', '0.6.0');
|
|
|
|
/**
|
|
* Execute an async function within a traced span. Automatically:
|
|
* - Sets span attributes from the provided record
|
|
* - Marks the span as ERROR on throw
|
|
* - Ends the span in all cases
|
|
*/
|
|
export async function withSpan<T>(
|
|
name: string,
|
|
attributes: Record<string, string | number | boolean>,
|
|
fn: (span: Span) => Promise<T>,
|
|
options?: SpanOptions
|
|
): Promise<T> {
|
|
return tracer.startActiveSpan(name, options ?? {}, async (span) => {
|
|
for (const [key, value] of Object.entries(attributes)) {
|
|
span.setAttribute(key, value);
|
|
}
|
|
try {
|
|
const result = await fn(span);
|
|
return result;
|
|
} catch (err) {
|
|
span.setStatus({
|
|
code: SpanStatusCode.ERROR,
|
|
message: err instanceof Error ? err.message : String(err),
|
|
});
|
|
throw err;
|
|
} finally {
|
|
span.end();
|
|
}
|
|
});
|
|
}
|