mirror of
https://github.com/Memo-2023/mana-monorepo.git
synced 2026-05-14 19:01:08 +02:00
feat(shared-ai): route compactor to Haiku-tier model by default (M2.5)
compactHistory() now defaults to DEFAULT_COMPACT_MODEL = 'google/gemini-2.5-flash-lite' when the caller doesn't override. Lite is ~3–5x cheaper than gemini-2.5-flash with near-identical summarisation quality — summarisation doesn't need the same tier as reasoning + tool-calling, and the compactor fires exactly when token spend is highest, so the cheaper route saves exactly where it matters. CompactHistoryOptions.model is now optional. All three consumers (mana-ai tick, webapp Companion, webapp Mission runner) drop their explicit gemini-2.5-flash override and let the default apply. This is the pragmatic M2.5: no mana-llm changes. The "tier" abstraction (X-Model-Tier header, env-routed aliases) from the Claude-Code report makes sense only once multiple utility tasks need cheaper routing — topic-detection, classification, command-injection checks. Today only the compactor wants it, and a model constant is the simplest contract that works. 2 new tests (default applied + override honoured). 79 shared-ai tests green, all three consumers type-check clean. One pre-existing unrelated type error in apps/mana/apps/web/src/lib/modules/wardrobe/queries.ts (not touched by this commit). Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
2769241de3
commit
f7536bc0b9
7 changed files with 83 additions and 16 deletions
|
|
@ -88,6 +88,7 @@ export {
|
|||
compactHistory,
|
||||
COMPACT_SYSTEM_PROMPT,
|
||||
DEFAULT_COMPACT_KEEP_RECENT,
|
||||
DEFAULT_COMPACT_MODEL,
|
||||
DEFAULT_COMPACT_THRESHOLD,
|
||||
MockLlmClient,
|
||||
parseCompactSummary,
|
||||
|
|
|
|||
|
|
@ -2,6 +2,7 @@ import { describe, expect, it } from 'vitest';
|
|||
import {
|
||||
COMPACT_SYSTEM_PROMPT,
|
||||
DEFAULT_COMPACT_KEEP_RECENT,
|
||||
DEFAULT_COMPACT_MODEL,
|
||||
DEFAULT_COMPACT_THRESHOLD,
|
||||
compactHistory,
|
||||
parseCompactSummary,
|
||||
|
|
@ -191,6 +192,46 @@ describe('compactHistory', () => {
|
|||
expect(res.usage).toEqual({ promptTokens: 100, completionTokens: 30 });
|
||||
});
|
||||
|
||||
it('defaults to DEFAULT_COMPACT_MODEL when model is omitted (fast-tier routing)', async () => {
|
||||
const history = buildHistory(3, 4);
|
||||
const seenModels: string[] = [];
|
||||
const capturingLlm = {
|
||||
async complete(req: { model: string }) {
|
||||
seenModels.push(req.model);
|
||||
return {
|
||||
content: '## Goal\n\n## Decisions\n\n## Tools Called\n\n## Current Progress\n',
|
||||
toolCalls: [],
|
||||
finishReason: 'stop' as const,
|
||||
};
|
||||
},
|
||||
};
|
||||
|
||||
await compactHistory(history, { llm: capturingLlm }); // no explicit model
|
||||
|
||||
expect(seenModels).toHaveLength(1);
|
||||
expect(seenModels[0]).toBe(DEFAULT_COMPACT_MODEL);
|
||||
expect(DEFAULT_COMPACT_MODEL).toBe('google/gemini-2.5-flash-lite');
|
||||
});
|
||||
|
||||
it('honours an explicit model override', async () => {
|
||||
const history = buildHistory(3, 4);
|
||||
const seenModels: string[] = [];
|
||||
const capturingLlm = {
|
||||
async complete(req: { model: string }) {
|
||||
seenModels.push(req.model);
|
||||
return {
|
||||
content: '## Goal\n\n## Decisions\n\n## Tools Called\n\n## Current Progress\n',
|
||||
toolCalls: [],
|
||||
finishReason: 'stop' as const,
|
||||
};
|
||||
},
|
||||
};
|
||||
|
||||
await compactHistory(history, { llm: capturingLlm, model: 'custom/override-model' });
|
||||
|
||||
expect(seenModels[0]).toBe('custom/override-model');
|
||||
});
|
||||
|
||||
it('respects a custom keepRecent value', async () => {
|
||||
const history = buildHistory(5, 6);
|
||||
const llm = new MockLlmClient().enqueueStop('## Goal\n\n## Decisions\n');
|
||||
|
|
|
|||
|
|
@ -37,6 +37,21 @@ export const DEFAULT_COMPACT_THRESHOLD = 0.92;
|
|||
* should stay intact for coherence. */
|
||||
export const DEFAULT_COMPACT_KEEP_RECENT = 4;
|
||||
|
||||
/**
|
||||
* Cheap "fast-tier" model the compactor runs on by default. Matches
|
||||
* Claude Code's pattern of routing utility tasks (summarisation,
|
||||
* topic detection, session-summary) to Haiku instead of burning the
|
||||
* primary-tier budget on them.
|
||||
*
|
||||
* google/gemini-2.5-flash-lite is ~3–5x cheaper than gemini-2.5-flash
|
||||
* with near-identical summarisation quality. Consumers that need
|
||||
* something different (cost policy, offline fallback to Ollama) can
|
||||
* override per-call via `CompactHistoryOptions.model`.
|
||||
*
|
||||
* Format follows mana-llm's `provider/model` convention.
|
||||
*/
|
||||
export const DEFAULT_COMPACT_MODEL = 'google/gemini-2.5-flash-lite';
|
||||
|
||||
/**
|
||||
* Decide whether to compact based on token usage against a ceiling.
|
||||
* Returns false on missing inputs so the caller can skip silently when
|
||||
|
|
@ -122,7 +137,11 @@ export function renderCompactSummary(s: CompactSummary): string {
|
|||
|
||||
export interface CompactHistoryOptions {
|
||||
readonly llm: LlmClient;
|
||||
readonly model: string;
|
||||
/** Model to summarise with. Defaults to `DEFAULT_COMPACT_MODEL`
|
||||
* (gemini-2.5-flash-lite) — cheaper than the primary planner
|
||||
* model, which is the whole point: summarisation doesn't need
|
||||
* the same tier as reasoning + tool-calling. */
|
||||
readonly model?: string;
|
||||
/** How many most-recent turns to preserve verbatim. Default 4. */
|
||||
readonly keepRecent?: number;
|
||||
/** Upper bound on compactor-LLM temperature — we want summarisation,
|
||||
|
|
@ -222,7 +241,7 @@ export async function compactHistory(
|
|||
const response = await opts.llm.complete({
|
||||
messages: compactRequestMessages,
|
||||
tools: [],
|
||||
model: opts.model,
|
||||
model: opts.model ?? DEFAULT_COMPACT_MODEL,
|
||||
temperature: opts.temperature ?? 0.2,
|
||||
});
|
||||
|
||||
|
|
|
|||
|
|
@ -13,6 +13,7 @@ export { runPlannerLoop, LOOP_STATE_RECENT_CALLS_WINDOW, PARALLEL_TOOL_BATCH_SIZ
|
|||
export {
|
||||
COMPACT_SYSTEM_PROMPT,
|
||||
DEFAULT_COMPACT_KEEP_RECENT,
|
||||
DEFAULT_COMPACT_MODEL,
|
||||
DEFAULT_COMPACT_THRESHOLD,
|
||||
compactHistory,
|
||||
parseCompactSummary,
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue