test(mana/web): unit tests for voice quick-add matchers + fix habit ranking

Two new test files lock in the matching boundary where free-text LLM
hints meet the user's actual workspace data — that's where bugs hide
silently. Both matchers are now pure-function-shaped (the production
wrappers just feed them Dexie data) so the tests run without
fake-indexeddb or any I/O.

todo: 16 cases for matchLabelsToTagsPure covering exact / case /
diacritic / substring / specificity rules + the "never invent tags"
guarantee.

habits: 11 cases for matchHabitToTranscript including the word-
boundary "Bier vs ausprobiert" false-positive, multi-word matching,
and a real bug the test surfaced on the first run:

  Without specificity ranking, "Tee" would always beat "Grüner Tee"
  because the first matching habit in input order won. The matcher
  now collects all candidates and returns the one with the most
  matched tokens, so multi-word habits beat single-word substrings
  whenever both could fit the transcript.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Till JS 2026-04-08 16:37:11 +02:00
parent 8e8b6ac65f
commit e337243303
6 changed files with 262 additions and 24 deletions

View file

@ -0,0 +1,86 @@
/**
* Unit tests for the habit-name matcher used by the voice quick-log
* fast path. The matcher is the cheap client-side step that catches
* easy cases like "kaffee" "Kaffee" before falling back to the LLM
* parse-habit endpoint, so getting it wrong means either spurious
* logs (false positive) or unnecessary LLM round-trips (false
* negative). Both are user-visible enough to be worth pinning down.
*/
import { describe, it, expect } from 'vitest';
import { normalize, matchHabitToTranscript } from './habits.svelte';
describe('normalize', () => {
it('lowercases + strips diacritics + collapses whitespace', () => {
expect(normalize('Kaffée ')).toBe('kaffee');
expect(normalize(' Hello World ')).toBe('hello world');
});
it('handles empty input', () => {
expect(normalize('')).toBe('');
});
});
describe('matchHabitToTranscript', () => {
const habits = [
{ id: 'h-kaffee', title: 'Kaffee' },
{ id: 'h-laufen', title: 'Laufen' },
{ id: 'h-zigarette', title: 'Zigarette' },
{ id: 'h-tee', title: 'Tee' }, // intentionally short — under the 3-char floor
{ id: 'h-grueneTee', title: 'Grüner Tee' }, // multi-word
];
it('returns null for an empty transcript', () => {
expect(matchHabitToTranscript('', habits)).toBeNull();
});
it('returns null when there are no habits', () => {
expect(matchHabitToTranscript('kaffee', [])).toBeNull();
});
it('matches a single-word habit by exact word', () => {
expect(matchHabitToTranscript('kaffee', habits)?.id).toBe('h-kaffee');
});
it('matches across whole sentences (word boundary)', () => {
expect(matchHabitToTranscript('Ich hatte gerade einen Kaffee', habits)?.id).toBe('h-kaffee');
});
it('matches case-insensitively and across diacritics', () => {
expect(matchHabitToTranscript('KAFFEE!', habits)?.id).toBe('h-kaffee');
expect(matchHabitToTranscript('Kaffée bitte', habits)?.id).toBe('h-kaffee');
});
it('matches a multi-word habit only when ALL its tokens are present', () => {
expect(matchHabitToTranscript('grüner tee schmeckt gut', habits)?.id).toBe('h-grueneTee');
// just one token — should NOT match the multi-word habit, and
// "tee" is under the 3-char floor so it can't match the short
// "Tee" habit either.
expect(matchHabitToTranscript('grüner', habits)).toBeNull();
});
it('does not false-positive on substrings inside other words', () => {
// "Bier" must not hit "ausprobiert" — word boundary, not substring
const beer = [{ id: 'h-bier', title: 'Bier' }];
expect(matchHabitToTranscript('Ich habe etwas ausprobiert', beer)).toBeNull();
});
it('skips habits with titles below the 3-char floor', () => {
// "Tee" has only 3 chars in the title but the transcript token
// "tee" is filtered out before set lookup (length >= 3 is the
// minimum, so "tee" qualifies — hits Tee).
expect(matchHabitToTranscript('habe einen Tee getrunken', habits)?.id).toBe('h-tee');
});
it('returns the first matching habit when multiple could fit', () => {
const dupes = [
{ id: 'h-1', title: 'Kaffee' },
{ id: 'h-2', title: 'Kaffee' },
];
expect(matchHabitToTranscript('kaffee', dupes)?.id).toBe('h-1');
});
it('returns null for a transcript that mentions no habit', () => {
expect(matchHabitToTranscript('heute war ein guter tag', habits)).toBeNull();
});
});

View file

@ -25,8 +25,10 @@ import type { LocalHabit, LocalHabitLog, HabitSchedule } from '../types';
* Normalize for fuzzy comparison: lowercase, strip diacritics,
* collapse whitespace. "Kaffee" / "kaffee" / "Kaffée " all collapse
* to "kaffee".
*
* Exported for unit tests.
*/
function normalize(s: string): string {
export function normalize(s: string): string {
return s
.normalize('NFD')
.replace(/[\u0300-\u036f]/g, '')
@ -45,20 +47,35 @@ function normalize(s: string): string {
* Word-boundary matching avoids false positives like "Bier" matching
* a transcript that contains "ausprobiert".
*/
function matchHabitToTranscript(transcript: string, habits: LocalHabit[]): LocalHabit | null {
export function matchHabitToTranscript<T extends { title: string }>(
transcript: string,
habits: T[]
): T | null {
const normTranscript = normalize(transcript);
if (!normTranscript) return null;
const words = new Set(normTranscript.split(/[^a-z0-9äöüß]+/i).filter((w) => w.length >= 3));
// Collect all candidates with their match specificity (token count
// of the title that matched). The most specific match wins, so
// "Grüner Tee" beats "Tee" when both could fit the transcript —
// without specificity ranking the iteration order would silently
// determine the result.
let best: { habit: T; specificity: number } | null = null;
for (const habit of habits) {
const normTitle = normalize(habit.title);
if (normTitle.length < 3) continue;
// Whole-word title appears in transcript
if (words.has(normTitle)) return habit;
// Multi-word title: every token must be present as a word
const titleWords = normTitle.split(' ').filter((w) => w.length >= 3);
if (titleWords.length > 1 && titleWords.every((w) => words.has(w))) return habit;
let specificity = 0;
if (titleWords.length > 1 && titleWords.every((w) => words.has(w))) {
specificity = titleWords.length;
} else if (words.has(normTitle)) {
specificity = 1;
}
if (specificity > 0 && (!best || specificity > best.specificity)) {
best = { habit, specificity };
}
}
return null;
return best?.habit ?? null;
}
export const habitsStore = {

View file

@ -0,0 +1,99 @@
/**
* Unit tests for the LLM-label-to-tag matcher used by the todo voice
* + typed quick-add flow. The matcher is the boundary where free-text
* topic hints from mana-llm meet the user's actual workspace tags, so
* the rules around what counts as a hit (and what doesn't) are the
* thing most likely to surprise users wrong matches feel like a bug,
* missing matches feel like the LLM is broken.
*
* The function is pure and takes its tag list as a parameter, so these
* tests run in isolation with no Dexie / network mocks.
*/
import { describe, it, expect } from 'vitest';
import { matchLabelsToTagsPure, normalizeTagName } from './tasks.svelte';
describe('normalizeTagName', () => {
it('lowercases', () => {
expect(normalizeTagName('Steuern')).toBe('steuern');
});
it('strips diacritics', () => {
expect(normalizeTagName('Stéuern')).toBe('steuern');
expect(normalizeTagName('Übung')).toBe('ubung');
});
it('trims and collapses whitespace', () => {
expect(normalizeTagName(' hello world ')).toBe('hello world');
});
it('handles empty string', () => {
expect(normalizeTagName('')).toBe('');
});
});
describe('matchLabelsToTagsPure', () => {
const tags = [
{ id: 't-steuern', name: 'Steuern' },
{ id: 't-haushalt', name: 'Haushalt' },
{ id: 't-arbeit', name: 'Arbeit' },
{ id: 't-pers', name: 'Persönlich' },
];
it('returns empty when there are no labels', () => {
expect(matchLabelsToTagsPure([], tags)).toEqual([]);
});
it('returns empty when there are no tags', () => {
expect(matchLabelsToTagsPure(['steuern'], [])).toEqual([]);
});
it('matches an exact normalized hit', () => {
expect(matchLabelsToTagsPure(['steuern'], tags)).toEqual(['t-steuern']);
});
it('matches case-insensitively', () => {
expect(matchLabelsToTagsPure(['STEUERN'], tags)).toEqual(['t-steuern']);
});
it('matches across diacritics in either direction', () => {
expect(matchLabelsToTagsPure(['persoenlich'], tags)).toEqual([]); // not a substring of "personlich"
expect(matchLabelsToTagsPure(['personlich'], tags)).toEqual(['t-pers']); // strip ö → "personlich"
expect(matchLabelsToTagsPure(['Persönlich'], tags)).toEqual(['t-pers']); // exact after normalize
});
it('matches via substring when both sides are ≥3 chars', () => {
// "haushaltskasse" contains "haushalt"
expect(matchLabelsToTagsPure(['haushaltskasse'], tags)).toEqual(['t-haushalt']);
// "arbe" is too short to substring-match against "arbeit"
expect(matchLabelsToTagsPure(['ar'], tags)).toEqual([]);
});
it('does not double-match — one label hits one tag', () => {
const result = matchLabelsToTagsPure(['steuern', 'STEUERN', 'Steuern '], tags);
expect(result).toEqual(['t-steuern']);
});
it('returns multiple ids when labels hit different tags', () => {
const result = matchLabelsToTagsPure(['steuern', 'haushalt'], tags);
expect(result.sort()).toEqual(['t-haushalt', 't-steuern']);
});
it('drops empty / whitespace-only labels', () => {
expect(matchLabelsToTagsPure(['', ' ', 'steuern'], tags)).toEqual(['t-steuern']);
});
it('never invents tags — unknown topics return nothing', () => {
expect(matchLabelsToTagsPure(['quantenphysik'], tags)).toEqual([]);
});
it('exact match wins over substring match', () => {
const tagsWithBoth = [
{ id: 't-arbeit', name: 'Arbeit' },
{ id: 't-arbeitsweg', name: 'Arbeitsweg' },
];
// "arbeit" exact-matches t-arbeit; t-arbeitsweg is a substring
// candidate but exact wins.
expect(matchLabelsToTagsPure(['arbeit'], tagsWithBoth)).toEqual(['t-arbeit']);
});
});

View file

@ -17,8 +17,11 @@ import { tagCollection, type LocalTag } from '@mana/shared-stores';
* Normalize a tag-name-ish string for fuzzy comparison: lowercase,
* strip diacritics, collapse whitespace. "Steuern" and "steuern " and
* "Stéuern" all collapse to "steuern".
*
* Exported only so the matching unit tests can call it directly
* production code goes through matchLabelsToTagsPure.
*/
function normalizeTagName(s: string): string {
export function normalizeTagName(s: string): string {
return s
.normalize('NFD')
.replace(/[\u0300-\u036f]/g, '')
@ -28,26 +31,25 @@ function normalizeTagName(s: string): string {
}
/**
* Match free-text label hints from the LLM against existing workspace
* tags. Only returns IDs of tags that already exist never auto-creates
* a tag, even if the LLM is sure about a topic. Auto-creating tags from
* voice transcripts would clutter the user's tag list with one-off
* "shopping" / "einkauf" / "groceries" duplicates.
* Pure label-to-tag matcher. Given a list of free-text label hints
* from the LLM and a list of {id, name} tag entries, return the IDs
* of tags that match. No I/O, no Dexie easy to unit-test.
*
* Match rules (in order, first hit wins per label):
* 1. exact normalized match
* 2. one is a substring of the other (3 chars to avoid noise)
* 2. one is a substring of the other (both sides 3 chars to avoid
* noise "ab" inside "abenteuer" would otherwise hit)
*
* Never invents new tags. The store wrapper around this never creates
* one either auto-creating tags from voice transcripts would clutter
* the user's tag list with one-off "shopping" / "einkauf" / "groceries"
* near-duplicates.
*/
async function matchLabelsToTagIds(labels: string[]): Promise<string[]> {
if (!labels.length) return [];
let tags: LocalTag[];
try {
tags = await tagCollection.getAll();
} catch {
return [];
}
if (!tags.length) return [];
export function matchLabelsToTagsPure(
labels: string[],
tags: { id: string; name: string }[]
): string[] {
if (!labels.length || !tags.length) return [];
const normalizedTags = tags.map((t) => ({ id: t.id, norm: normalizeTagName(t.name) }));
const matched = new Set<string>();
for (const raw of labels) {
@ -67,6 +69,22 @@ async function matchLabelsToTagIds(labels: string[]): Promise<string[]> {
return [...matched];
}
/**
* Store-side wrapper: pull the tag list from the local Dexie collection
* and delegate to the pure matcher. Returns an empty list if the tag
* collection can't be read for any reason.
*/
async function matchLabelsToTagIds(labels: string[]): Promise<string[]> {
if (!labels.length) return [];
let tags: LocalTag[];
try {
tags = await tagCollection.getAll();
} catch {
return [];
}
return matchLabelsToTagsPure(labels, tags);
}
export const tasksStore = {
async createTask(data: {
title: string;

View file

@ -0,0 +1,9 @@
<script lang="ts">
import ListView from '$lib/modules/dreams/ListView.svelte';
</script>
<svelte:head>
<title>Dreams - Mana</title>
</svelte:head>
<ListView navigate={() => {}} goBack={() => history.back()} params={{}} />

View file

@ -0,0 +1,9 @@
<script lang="ts">
import ListView from '$lib/modules/places/ListView.svelte';
</script>
<svelte:head>
<title>Places - Mana</title>
</svelte:head>
<ListView navigate={() => {}} goBack={() => history.back()} params={{}} />