mirror of
https://github.com/Memo-2023/mana-monorepo.git
synced 2026-05-14 21:21:10 +02:00
fix(memoro): generateTitle empty-result fallback + watcher diagnostics
User test surfaced the actual auto-title bug: the entire pipeline
(enqueue → process → watcher) works correctly, but the task result
itself is empty:
[memoro] enqueued title task { taskId, memoId }
[memoro-llm-watcher] saw 1 done title task(s)
[memoro-llm-watcher] writing title to memo XXX: ""
[memoro-llm-watcher] applied + cleared row YYY
The watcher faithfully wrote "" to memo.title, the input placeholder
showed "Titel..." again, and we looked stuck. Three layered fixes so
this can't bite us in any execution path going forward.
1. generate-title.ts: extract rules logic + use it as runLlm fallback
Pulled the deterministic first-sentence heuristic into a private
`rulesImpl()` function so both `runLlm` and `runRules` can call
it. runLlm now invokes rulesImpl as a fallback when the cleaned
LLM output is empty. This catches the case where the model emits
only punctuation, only special tokens, or only whitespace — all
of which collapse to "" after my cleanup chain (`.trim()` → strip
surrounding quotes/markdown → strip trailing dots → `.trim()`).
The most likely real-world trigger: Gemma 4 occasionally emits a
single `.` for short prompts that hit its over-strict
"answer with ONLY the title" instruction. The cleanup turns
"." into "" and we lose the result.
2. llm-watcher.svelte.ts: date-based backstop for any empty result
Belt-and-suspenders: even if a future task implementation forgets
the rules fallback, the watcher itself now guarantees a non-empty
title. When `row.result.trim()` is empty, synthesize a label like
"Memo vom 9. April 2026" from the memo's createdAt (or the
current date if createdAt is also broken). The user always sees a
real title — never an empty placeholder.
Same write path otherwise (encryptRecord + memoTable.update +
delete queue row), just with the guaranteed-non-empty value.
3. llm-watcher.svelte.ts: enhanced diagnostic logging
The "writing title" log now includes `row.source` (which tier
actually executed) and `row.attempts`, so the next time we see
weird behavior we can tell at a glance whether it was the
browser tier, the rules tier, or the server. The empty-result
path logs `console.warn` (not info) with the raw result via
JSON.stringify so we see exactly what came back ("", ".", " ",
undefined-coerced-to-string, etc.).
After this commit lands:
- Tier 0 user: runRules returns at minimum "Ohne Titel" (its
own fallback). The watcher writes that.
- Browser tier with empty Gemma output: runLlm now falls through
to rulesImpl which also can't return empty. The watcher writes
the rules-tier output.
- Any other freak case where the result is still empty: the
watcher's date-based backstop kicks in. "Memo vom <date>".
So the user-visible "stuck on empty title" symptom is impossible in
all three layers.
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
0987b08185
commit
9760269e9f
2 changed files with 64 additions and 19 deletions
|
|
@ -21,6 +21,29 @@ export interface GenerateTitleInput {
|
|||
|
||||
export type GenerateTitleOutput = string;
|
||||
|
||||
/** Deterministic first-sentence heuristic. Extracted to a module-scope
|
||||
* function so runLlm can call it as a fallback when the LLM returns
|
||||
* empty or whitespace-only output (which happens when the model emits
|
||||
* only a `.` or special tokens that get stripped by skip_special_tokens). */
|
||||
function rulesImpl(input: GenerateTitleInput): string {
|
||||
const text = input.text.trim();
|
||||
if (!text) return 'Ohne Titel';
|
||||
|
||||
// Take the first sentence — split on .!? or newline.
|
||||
const firstSentence = text.split(/[.!?\n]/)[0]?.trim() ?? text;
|
||||
|
||||
// Cap at ~60 chars / maxWords words, whichever comes first.
|
||||
const maxWords = input.maxWords ?? 7;
|
||||
const words = firstSentence.split(/\s+/).slice(0, maxWords);
|
||||
let candidate = words.join(' ');
|
||||
|
||||
if (candidate.length > 60) {
|
||||
candidate = candidate.slice(0, 57).trimEnd() + '…';
|
||||
}
|
||||
|
||||
return candidate || 'Ohne Titel';
|
||||
}
|
||||
|
||||
export const generateTitleTask: LlmTask<GenerateTitleInput, GenerateTitleOutput> = {
|
||||
name: 'common.generateTitle',
|
||||
minTier: 'none', // works on Tier 0 via the first-sentence heuristic
|
||||
|
|
@ -49,29 +72,26 @@ export const generateTitleTask: LlmTask<GenerateTitleInput, GenerateTitleOutput>
|
|||
|
||||
// Defensive: strip surrounding quotes / markdown / trailing dots in
|
||||
// case the model didn't fully respect the system prompt.
|
||||
return result.content
|
||||
const cleaned = result.content
|
||||
.trim()
|
||||
.replace(/^["'`*_]+|["'`*_]+$/g, '')
|
||||
.replace(/\.+$/, '')
|
||||
.trim();
|
||||
|
||||
// LLM produced nothing usable (empty content, only punctuation,
|
||||
// only special tokens that got stripped, etc.) — fall back to the
|
||||
// deterministic rules implementation so the user gets *something*.
|
||||
// Without this fallback the watcher writes "" to memo.title and the
|
||||
// user sees an empty placeholder forever.
|
||||
if (!cleaned) {
|
||||
console.info('[generateTitle] LLM returned empty after cleanup, falling back to rules');
|
||||
return rulesImpl(input);
|
||||
}
|
||||
|
||||
return cleaned;
|
||||
},
|
||||
|
||||
async runRules(input): Promise<GenerateTitleOutput> {
|
||||
const text = input.text.trim();
|
||||
if (!text) return 'Ohne Titel';
|
||||
|
||||
// Take the first sentence — split on .!? or newline.
|
||||
const firstSentence = text.split(/[.!?\n]/)[0]?.trim() ?? text;
|
||||
|
||||
// Cap at ~60 chars / maxWords words, whichever comes first.
|
||||
const maxWords = input.maxWords ?? 7;
|
||||
const words = firstSentence.split(/\s+/).slice(0, maxWords);
|
||||
let candidate = words.join(' ');
|
||||
|
||||
if (candidate.length > 60) {
|
||||
candidate = candidate.slice(0, 57).trimEnd() + '…';
|
||||
}
|
||||
|
||||
return candidate || 'Ohne Titel';
|
||||
return rulesImpl(input);
|
||||
},
|
||||
};
|
||||
|
|
|
|||
|
|
@ -132,9 +132,34 @@ async function applyRow(row: QueuedTask): Promise<void> {
|
|||
return;
|
||||
}
|
||||
|
||||
console.info(`[memoro-llm-watcher] writing title to memo ${row.refId}: "${row.result}"`);
|
||||
// Backstop: if the task result somehow came back empty/whitespace
|
||||
// (LLM emitted only special tokens, runRules got an empty input,
|
||||
// any other edge case), synthesize a date-based fallback so the
|
||||
// user always gets *some* title rather than a stuck empty input.
|
||||
let titleToWrite = row.result.trim();
|
||||
if (!titleToWrite) {
|
||||
const created = (memo as { createdAt?: string }).createdAt;
|
||||
const dateLabel = created
|
||||
? new Date(created).toLocaleDateString('de', {
|
||||
day: 'numeric',
|
||||
month: 'long',
|
||||
year: 'numeric',
|
||||
})
|
||||
: new Date().toLocaleDateString('de');
|
||||
titleToWrite = `Memo vom ${dateLabel}`;
|
||||
console.warn(
|
||||
`[memoro-llm-watcher] row ${row.id} returned empty title — using date fallback "${titleToWrite}"`,
|
||||
{ source: row.source, attempts: row.attempts, rawResult: JSON.stringify(row.result) }
|
||||
);
|
||||
} else {
|
||||
console.info(`[memoro-llm-watcher] writing title to memo ${row.refId}: "${titleToWrite}"`, {
|
||||
source: row.source,
|
||||
attempts: row.attempts,
|
||||
});
|
||||
}
|
||||
|
||||
const diff: Partial<LocalMemo> = {
|
||||
title: row.result,
|
||||
title: titleToWrite,
|
||||
updatedAt: new Date().toISOString(),
|
||||
};
|
||||
await encryptRecord('memos', diff);
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue