mirror of
https://github.com/Memo-2023/mana-monorepo.git
synced 2026-05-17 00:39:41 +02:00
- Base parser: multilingual (DE/EN/FR/ES/IT) date, time, weekday, month parsing - Base parser: fuzzy/typo tolerance (Levenshtein), recurrence (RRULE), relative time - Base parser: timezone extraction, date ranges, ordinal dates, confidence scoring - Base parser: past dates (gestern/yesterday), this/next week distinction - Base parser: compose helper (createAppParser), multiple @references - Calendar: event-parser with duration, time ranges, location, all-day, calendar ref - Calendar: wire up UnifiedBar with onCreate/onParseCreate for quick event creation - Todo: task-parser multilingual priority keywords (urgent/important/normal/later) - Planta: plant-parser with acquisition keywords (gekauft/bought/acheté) - Mukke: song-parser with Artist-Title format, BPM, genre, playlist/project creation - NutriPhi: meal-parser with meal type detection, add QuickInputBar to layout - All parsers: 210 tests across 7 test suites, all passing Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1260 lines
37 KiB
TypeScript
1260 lines
37 KiB
TypeScript
/**
|
||
* Base Natural Language Parser
|
||
*
|
||
* Shared parsing utilities for date, time, and tags across all apps.
|
||
* App-specific parsers (task-parser, event-parser, contact-parser) extend this.
|
||
*
|
||
* Supports locales: de, en, fr, es, it
|
||
*/
|
||
|
||
import {
|
||
addDays,
|
||
addWeeks,
|
||
addHours,
|
||
addMinutes,
|
||
nextMonday,
|
||
nextTuesday,
|
||
nextWednesday,
|
||
nextThursday,
|
||
nextFriday,
|
||
nextSaturday,
|
||
nextSunday,
|
||
setHours,
|
||
setMinutes,
|
||
isBefore,
|
||
startOfWeek,
|
||
} from 'date-fns';
|
||
|
||
export type ParserLocale = 'de' | 'en' | 'fr' | 'es' | 'it';
|
||
|
||
export interface BaseParsedInput {
|
||
title: string;
|
||
date?: Date;
|
||
time?: { hours: number; minutes: number };
|
||
tagNames: string[];
|
||
rawInput: string;
|
||
/** Confidence score 0-1. 1.0 = exact match, 0.8 = fuzzy, 0.5 = ambiguous */
|
||
confidence: number;
|
||
}
|
||
|
||
export interface ExtractResult<T> {
|
||
value: T | undefined;
|
||
remaining: string;
|
||
}
|
||
|
||
// ============================================================================
|
||
// Locale-aware Pattern Definitions
|
||
// ============================================================================
|
||
|
||
interface DatePattern {
|
||
pattern: RegExp;
|
||
getDate: (match?: RegExpMatchArray) => Date;
|
||
}
|
||
|
||
type DayFn = (date: Date) => Date;
|
||
|
||
const NEXT_DAY_FNS: DayFn[] = [
|
||
nextMonday,
|
||
nextTuesday,
|
||
nextWednesday,
|
||
nextThursday,
|
||
nextFriday,
|
||
nextSaturday,
|
||
nextSunday,
|
||
];
|
||
|
||
// Weekday names per locale (Monday-Sunday order)
|
||
const WEEKDAY_NAMES: Record<ParserLocale, string[]> = {
|
||
de: ['montag', 'dienstag', 'mittwoch', 'donnerstag', 'freitag', 'samstag', 'sonntag'],
|
||
en: ['monday', 'tuesday', 'wednesday', 'thursday', 'friday', 'saturday', 'sunday'],
|
||
fr: ['lundi', 'mardi', 'mercredi', 'jeudi', 'vendredi', 'samedi', 'dimanche'],
|
||
es: ['lunes', 'martes', 'miércoles', 'jueves', 'viernes', 'sábado', 'domingo'],
|
||
it: ['lunedì', 'martedì', 'mercoledì', 'giovedì', 'venerdì', 'sabato', 'domenica'],
|
||
};
|
||
|
||
// Relative date keywords per locale
|
||
interface RelativeDateWords {
|
||
today: string[];
|
||
tomorrow: string[];
|
||
dayAfterTomorrow: string[];
|
||
yesterday: string[];
|
||
dayBeforeYesterday: string[];
|
||
nextWeek: RegExp;
|
||
weekAfterNext: RegExp;
|
||
nextPrefix: RegExp;
|
||
thisPrefix: RegExp;
|
||
}
|
||
|
||
const RELATIVE_DATE_WORDS: Record<ParserLocale, RelativeDateWords> = {
|
||
de: {
|
||
today: ['heute'],
|
||
tomorrow: ['morgen'],
|
||
dayAfterTomorrow: ['übermorgen'],
|
||
yesterday: ['gestern'],
|
||
dayBeforeYesterday: ['vorgestern'],
|
||
nextWeek: /(?<!\p{L})nächste[nr]?\s*woche(?!\p{L})/iu,
|
||
weekAfterNext: /(?<!\p{L})über\s*nächste[nr]?\s*woche(?!\p{L})/iu,
|
||
nextPrefix: /(?<!\p{L})nächste[nr]?\s*/iu,
|
||
thisPrefix: /\bdiese[nr]?\s*/i,
|
||
},
|
||
en: {
|
||
today: ['today'],
|
||
tomorrow: ['tomorrow'],
|
||
dayAfterTomorrow: [],
|
||
yesterday: ['yesterday'],
|
||
dayBeforeYesterday: [],
|
||
nextWeek: /\bnext\s*week\b/i,
|
||
weekAfterNext: /\bweek\s*after\s*next\b/i,
|
||
nextPrefix: /\bnext\s*/i,
|
||
thisPrefix: /\bthis\s*/i,
|
||
},
|
||
fr: {
|
||
today: ["aujourd'hui", 'aujourdhui'],
|
||
tomorrow: ['demain'],
|
||
dayAfterTomorrow: ['après-demain', 'apres-demain'],
|
||
yesterday: ['hier'],
|
||
dayBeforeYesterday: ['avant-hier'],
|
||
nextWeek: /\bsemaine\s*prochaine\b/i,
|
||
weekAfterNext: /\bsemaine\s*d'?après\b/i,
|
||
nextPrefix: /\bprochain[e]?\s*/i,
|
||
thisPrefix: /\bce(?:tte?)?\s*/i,
|
||
},
|
||
es: {
|
||
today: ['hoy'],
|
||
tomorrow: ['mañana', 'manana'],
|
||
dayAfterTomorrow: ['pasado\\s*mañana', 'pasado\\s*manana'],
|
||
yesterday: ['ayer'],
|
||
dayBeforeYesterday: ['anteayer'],
|
||
nextWeek: /(?<!\p{L})próxima\s*semana(?!\p{L})/iu,
|
||
weekAfterNext: /(?<!\p{L})semana\s*después(?!\p{L})/iu,
|
||
nextPrefix: /(?<!\p{L})próxim[oa]\s*/iu,
|
||
thisPrefix: /\best[ea]\s*/i,
|
||
},
|
||
it: {
|
||
today: ['oggi'],
|
||
tomorrow: ['domani'],
|
||
dayAfterTomorrow: ['dopodomani'],
|
||
yesterday: ['ieri'],
|
||
dayBeforeYesterday: ["l'?altro\\s*ieri", 'avantieri'],
|
||
nextWeek: /\bprossima\s*settimana\b/i,
|
||
weekAfterNext: /\bsettimana\s*dopo\b/i,
|
||
nextPrefix: /\bprossim[oa]\s*/i,
|
||
thisPrefix: /\bquest[oa]\s*/i,
|
||
},
|
||
};
|
||
|
||
// "in X days" and "in X weeks" patterns per locale
|
||
const IN_DAYS_PATTERNS: Record<ParserLocale, RegExp> = {
|
||
de: /\bin\s*(\d+)\s*tage?n?\b/i,
|
||
en: /\bin\s*(\d+)\s*days?\b/i,
|
||
fr: /\bdans\s*(\d+)\s*jours?\b/i,
|
||
es: /\ben\s*(\d+)\s*d[ií]as?\b/i,
|
||
it: /\btra\s*(\d+)\s*giorni?\b/i,
|
||
};
|
||
|
||
const IN_WEEKS_PATTERNS: Record<ParserLocale, RegExp> = {
|
||
de: /\bin\s*(\d+)\s*wochen?\b/i,
|
||
en: /\bin\s*(\d+)\s*weeks?\b/i,
|
||
fr: /\bdans\s*(\d+)\s*semaines?\b/i,
|
||
es: /\ben\s*(\d+)\s*semanas?\b/i,
|
||
it: /\btra\s*(\d+)\s*settimane?\b/i,
|
||
};
|
||
|
||
// Month names per locale (January=0)
|
||
const MONTH_NAMES: Record<ParserLocale, string[]> = {
|
||
de: [
|
||
'januar',
|
||
'februar',
|
||
'märz',
|
||
'april',
|
||
'mai',
|
||
'juni',
|
||
'juli',
|
||
'august',
|
||
'september',
|
||
'oktober',
|
||
'november',
|
||
'dezember',
|
||
],
|
||
en: [
|
||
'january',
|
||
'february',
|
||
'march',
|
||
'april',
|
||
'may',
|
||
'june',
|
||
'july',
|
||
'august',
|
||
'september',
|
||
'october',
|
||
'november',
|
||
'december',
|
||
],
|
||
fr: [
|
||
'janvier',
|
||
'février',
|
||
'mars',
|
||
'avril',
|
||
'mai',
|
||
'juin',
|
||
'juillet',
|
||
'août',
|
||
'septembre',
|
||
'octobre',
|
||
'novembre',
|
||
'décembre',
|
||
],
|
||
es: [
|
||
'enero',
|
||
'febrero',
|
||
'marzo',
|
||
'abril',
|
||
'mayo',
|
||
'junio',
|
||
'julio',
|
||
'agosto',
|
||
'septiembre',
|
||
'octubre',
|
||
'noviembre',
|
||
'diciembre',
|
||
],
|
||
it: [
|
||
'gennaio',
|
||
'febbraio',
|
||
'marzo',
|
||
'aprile',
|
||
'maggio',
|
||
'giugno',
|
||
'luglio',
|
||
'agosto',
|
||
'settembre',
|
||
'ottobre',
|
||
'novembre',
|
||
'dicembre',
|
||
],
|
||
};
|
||
|
||
// Short month names (3 chars)
|
||
const SHORT_MONTH_NAMES: Record<ParserLocale, string[]> = {
|
||
de: ['jan', 'feb', 'mär', 'apr', 'mai', 'jun', 'jul', 'aug', 'sep', 'okt', 'nov', 'dez'],
|
||
en: ['jan', 'feb', 'mar', 'apr', 'may', 'jun', 'jul', 'aug', 'sep', 'oct', 'nov', 'dec'],
|
||
fr: ['jan', 'fév', 'mar', 'avr', 'mai', 'jun', 'jul', 'aoû', 'sep', 'oct', 'nov', 'déc'],
|
||
es: ['ene', 'feb', 'mar', 'abr', 'may', 'jun', 'jul', 'ago', 'sep', 'oct', 'nov', 'dic'],
|
||
it: ['gen', 'feb', 'mar', 'apr', 'mag', 'giu', 'lug', 'ago', 'set', 'ott', 'nov', 'dic'],
|
||
};
|
||
|
||
// Time patterns per locale
|
||
const TIME_PATTERNS: Record<ParserLocale, RegExp> = {
|
||
de: /\b(?:um\s*)?(\d{1,2})(?::(\d{2}))?\s*(?:uhr)?\b/i,
|
||
en: /\b(?:at\s*)?(\d{1,2})(?::(\d{2}))?\s*(?:o'?clock|am|pm)?\b/i,
|
||
fr: /\b(?:à\s*)?(\d{1,2})(?:[h:](\d{2}))?\s*(?:heures?)?\b/i,
|
||
es: /\b(?:a\s*las?\s*)?(\d{1,2})(?::(\d{2}))?\s*(?:horas?)?\b/i,
|
||
it: /\b(?:alle?\s*)?(\d{1,2})(?::(\d{2}))?\b/i,
|
||
};
|
||
|
||
// Preview formatting words
|
||
const PREVIEW_WORDS: Record<ParserLocale, { today: string; tomorrow: string; locale: string }> = {
|
||
de: { today: 'Heute', tomorrow: 'Morgen', locale: 'de-DE' },
|
||
en: { today: 'Today', tomorrow: 'Tomorrow', locale: 'en-US' },
|
||
fr: { today: "Aujourd'hui", tomorrow: 'Demain', locale: 'fr-FR' },
|
||
es: { today: 'Hoy', tomorrow: 'Mañana', locale: 'es-ES' },
|
||
it: { today: 'Oggi', tomorrow: 'Domani', locale: 'it-IT' },
|
||
};
|
||
|
||
// ============================================================================
|
||
// Fuzzy Matching Utilities
|
||
// ============================================================================
|
||
|
||
/**
|
||
* Simple Levenshtein distance (for short words only)
|
||
*/
|
||
function levenshtein(a: string, b: string): number {
|
||
const m = a.length;
|
||
const n = b.length;
|
||
const dp: number[][] = Array.from({ length: m + 1 }, () => Array(n + 1).fill(0));
|
||
|
||
for (let i = 0; i <= m; i++) dp[i][0] = i;
|
||
for (let j = 0; j <= n; j++) dp[0][j] = j;
|
||
|
||
for (let i = 1; i <= m; i++) {
|
||
for (let j = 1; j <= n; j++) {
|
||
dp[i][j] = Math.min(
|
||
dp[i - 1][j] + 1,
|
||
dp[i][j - 1] + 1,
|
||
dp[i - 1][j - 1] + (a[i - 1] === b[j - 1] ? 0 : 1)
|
||
);
|
||
}
|
||
}
|
||
return dp[m][n];
|
||
}
|
||
|
||
// Keywords that should support fuzzy matching (max distance 1-2 depending on length)
|
||
const FUZZY_DATE_WORDS: Record<ParserLocale, string[]> = {
|
||
de: [
|
||
'heute',
|
||
'morgen',
|
||
'übermorgen',
|
||
'montag',
|
||
'dienstag',
|
||
'mittwoch',
|
||
'donnerstag',
|
||
'freitag',
|
||
'samstag',
|
||
'sonntag',
|
||
],
|
||
en: [
|
||
'today',
|
||
'tomorrow',
|
||
'monday',
|
||
'tuesday',
|
||
'wednesday',
|
||
'thursday',
|
||
'friday',
|
||
'saturday',
|
||
'sunday',
|
||
],
|
||
fr: ['demain', 'lundi', 'mardi', 'mercredi', 'jeudi', 'vendredi', 'samedi', 'dimanche'],
|
||
es: ['hoy', 'lunes', 'martes', 'jueves', 'viernes'],
|
||
it: ['oggi', 'domani'],
|
||
};
|
||
|
||
/**
|
||
* Try fuzzy matching a word against known date keywords.
|
||
* Returns the canonical keyword if a close match is found, undefined otherwise.
|
||
* Max distance: 1 for words <= 5 chars, 2 for longer words.
|
||
*/
|
||
export function fuzzyMatchDateKeyword(
|
||
word: string,
|
||
locale: ParserLocale = 'de'
|
||
): string | undefined {
|
||
const keywords = FUZZY_DATE_WORDS[locale];
|
||
if (!keywords) return undefined;
|
||
|
||
const lower = word.toLowerCase();
|
||
const maxDist = lower.length <= 5 ? 1 : 2;
|
||
|
||
let bestMatch: string | undefined;
|
||
let bestDist = Infinity;
|
||
|
||
for (const keyword of keywords) {
|
||
// Skip if length difference is too large
|
||
if (Math.abs(lower.length - keyword.length) > maxDist) continue;
|
||
|
||
const dist = levenshtein(lower, keyword);
|
||
if (dist <= maxDist && dist < bestDist) {
|
||
bestDist = dist;
|
||
bestMatch = keyword;
|
||
}
|
||
}
|
||
|
||
return bestMatch;
|
||
}
|
||
|
||
// ============================================================================
|
||
// Pattern Builder
|
||
// ============================================================================
|
||
|
||
// Word boundary that works with accented characters (lunedì, mañana, etc.)
|
||
// Standard \b doesn't treat accented chars as word chars.
|
||
// We use Unicode-aware regex with lookbehind/lookahead.
|
||
function wb(word: string): string {
|
||
// Use negative lookbehind/lookahead for word-like chars including accented ones
|
||
return `(?<![\\p{L}\\p{N}])${word}(?![\\p{L}\\p{N}])`;
|
||
}
|
||
|
||
function buildDatePatterns(locale: ParserLocale): DatePattern[] {
|
||
const words = RELATIVE_DATE_WORDS[locale];
|
||
const weekdays = WEEKDAY_NAMES[locale];
|
||
const patterns: DatePattern[] = [];
|
||
|
||
// Today
|
||
for (const word of words.today) {
|
||
patterns.push({ pattern: new RegExp(wb(word), 'iu'), getDate: () => new Date() });
|
||
}
|
||
|
||
// Tomorrow
|
||
for (const word of words.tomorrow) {
|
||
patterns.push({
|
||
pattern: new RegExp(wb(word), 'iu'),
|
||
getDate: () => addDays(new Date(), 1),
|
||
});
|
||
}
|
||
|
||
// Day after tomorrow
|
||
for (const word of words.dayAfterTomorrow) {
|
||
patterns.push({
|
||
pattern: new RegExp(wb(word), 'iu'),
|
||
getDate: () => addDays(new Date(), 2),
|
||
});
|
||
}
|
||
|
||
// Yesterday
|
||
for (const word of words.yesterday) {
|
||
patterns.push({
|
||
pattern: new RegExp(wb(word), 'iu'),
|
||
getDate: () => addDays(new Date(), -1),
|
||
});
|
||
}
|
||
|
||
// Day before yesterday
|
||
for (const word of words.dayBeforeYesterday) {
|
||
patterns.push({
|
||
pattern: new RegExp(wb(word), 'iu'),
|
||
getDate: () => addDays(new Date(), -2),
|
||
});
|
||
}
|
||
|
||
// Week after next (must come before "next week")
|
||
patterns.push({ pattern: words.weekAfterNext, getDate: () => addDays(new Date(), 14) });
|
||
|
||
// Next week
|
||
patterns.push({ pattern: words.nextWeek, getDate: () => addDays(new Date(), 7) });
|
||
|
||
// "this <weekday>" patterns - gets the day in the current week
|
||
// If already past, still returns this week's day (for logging retroactively)
|
||
for (let i = 0; i < weekdays.length; i++) {
|
||
const day = weekdays[i];
|
||
const targetDayOfWeek = [1, 2, 3, 4, 5, 6, 0][i]; // Mon=1..Sun=0
|
||
|
||
patterns.push({
|
||
pattern: new RegExp(`${words.thisPrefix.source}${day}(?![\\p{L}\\p{N}])`, 'iu'),
|
||
getDate: () => {
|
||
const now = new Date();
|
||
const currentDay = now.getDay();
|
||
if (currentDay === targetDayOfWeek) return now;
|
||
// If the target day is earlier in the week, use previous, otherwise next
|
||
const thisWeekStart = startOfWeek(now, { weekStartsOn: 1 });
|
||
const diff = targetDayOfWeek === 0 ? 6 : targetDayOfWeek - 1; // days from Monday
|
||
return addDays(thisWeekStart, diff);
|
||
},
|
||
});
|
||
}
|
||
|
||
// "next <weekday>" patterns
|
||
for (let i = 0; i < weekdays.length; i++) {
|
||
const dayFn = NEXT_DAY_FNS[i];
|
||
const day = weekdays[i];
|
||
patterns.push({
|
||
pattern: new RegExp(`${words.nextPrefix.source}${day}(?![\\p{L}\\p{N}])`, 'iu'),
|
||
getDate: () => dayFn(new Date()),
|
||
});
|
||
}
|
||
|
||
// Plain weekday names (implies "next")
|
||
for (let i = 0; i < weekdays.length; i++) {
|
||
const dayFn = NEXT_DAY_FNS[i];
|
||
const day = weekdays[i];
|
||
patterns.push({
|
||
pattern: new RegExp(wb(day), 'iu'),
|
||
getDate: () => dayFn(new Date()),
|
||
});
|
||
}
|
||
|
||
// Month names: "im März", "in January", "en février"
|
||
const months = MONTH_NAMES[locale];
|
||
const monthPrepositions: Record<ParserLocale, string> = {
|
||
de: '(?:im|in)\\s+',
|
||
en: '(?:in)\\s+',
|
||
fr: '(?:en)\\s+',
|
||
es: '(?:en)\\s+',
|
||
it: '(?:in|a)\\s+',
|
||
};
|
||
|
||
for (let i = 0; i < months.length; i++) {
|
||
const monthIndex = i;
|
||
const monthName = months[i];
|
||
// "im März" / "in January"
|
||
patterns.push({
|
||
pattern: new RegExp(`\\b${monthPrepositions[locale]}${monthName}\\b`, 'iu'),
|
||
getDate: () => {
|
||
const now = new Date();
|
||
let year = now.getFullYear();
|
||
// If month already passed, use next year
|
||
if (monthIndex < now.getMonth()) year++;
|
||
return new Date(year, monthIndex, 1);
|
||
},
|
||
});
|
||
}
|
||
|
||
return patterns;
|
||
}
|
||
|
||
// Cache built patterns per locale
|
||
const datePatternCache = new Map<ParserLocale, DatePattern[]>();
|
||
|
||
function getDatePatterns(locale: ParserLocale): DatePattern[] {
|
||
let patterns = datePatternCache.get(locale);
|
||
if (!patterns) {
|
||
patterns = buildDatePatterns(locale);
|
||
datePatternCache.set(locale, patterns);
|
||
}
|
||
return patterns;
|
||
}
|
||
|
||
// ============================================================================
|
||
// Specific date pattern (DD.MM. or DD.MM.YYYY or MM/DD/YYYY)
|
||
// ============================================================================
|
||
|
||
// DD.MM. or DD.MM.YYYY (European)
|
||
const EU_DATE_PATTERN = /\b(\d{1,2})\.(\d{1,2})\.?(\d{2,4})?\b/;
|
||
// MM/DD/YYYY or MM/DD (US)
|
||
const US_DATE_PATTERN = /\b(\d{1,2})\/(\d{1,2})(?:\/(\d{2,4}))?\b/;
|
||
|
||
function getSpecificDatePattern(locale: ParserLocale): {
|
||
pattern: RegExp;
|
||
parse: (match: RegExpMatchArray) => Date;
|
||
} {
|
||
if (locale === 'en') {
|
||
return {
|
||
pattern: US_DATE_PATTERN,
|
||
parse: (match) => {
|
||
const month = parseInt(match[1], 10) - 1;
|
||
const day = parseInt(match[2], 10);
|
||
const year = match[3]
|
||
? parseInt(match[3], 10) < 100
|
||
? 2000 + parseInt(match[3], 10)
|
||
: parseInt(match[3], 10)
|
||
: new Date().getFullYear();
|
||
return new Date(year, month, day);
|
||
},
|
||
};
|
||
}
|
||
// European format (DE, FR, ES, IT)
|
||
return {
|
||
pattern: EU_DATE_PATTERN,
|
||
parse: (match) => {
|
||
const day = parseInt(match[1], 10);
|
||
const month = parseInt(match[2], 10) - 1;
|
||
const year = match[3]
|
||
? parseInt(match[3], 10) < 100
|
||
? 2000 + parseInt(match[3], 10)
|
||
: parseInt(match[3], 10)
|
||
: new Date().getFullYear();
|
||
return new Date(year, month, day);
|
||
},
|
||
};
|
||
}
|
||
|
||
// ============================================================================
|
||
// Date Extraction
|
||
// ============================================================================
|
||
|
||
/**
|
||
* Extract date from text
|
||
*/
|
||
export function extractDate(text: string, locale: ParserLocale = 'de'): ExtractResult<Date> {
|
||
let remaining = text;
|
||
|
||
// Try "in X weeks" pattern first (before "in X days" to avoid partial match)
|
||
const inWeeksPattern = IN_WEEKS_PATTERNS[locale];
|
||
const inWeeksMatch = remaining.match(inWeeksPattern);
|
||
if (inWeeksMatch) {
|
||
const weeks = parseInt(inWeeksMatch[1], 10);
|
||
const date = addWeeks(new Date(), weeks);
|
||
remaining = remaining.replace(inWeeksPattern, '').trim();
|
||
return { value: date, remaining };
|
||
}
|
||
|
||
// Try "in X days" pattern
|
||
const inDaysPattern = IN_DAYS_PATTERNS[locale];
|
||
const inDaysMatch = remaining.match(inDaysPattern);
|
||
if (inDaysMatch) {
|
||
const days = parseInt(inDaysMatch[1], 10);
|
||
const date = addDays(new Date(), days);
|
||
remaining = remaining.replace(inDaysPattern, '').trim();
|
||
return { value: date, remaining };
|
||
}
|
||
|
||
// Try ordinal + month: "5. März", "3rd of May", "le 5 mars"
|
||
const months = MONTH_NAMES[locale];
|
||
const shortMonthsList = SHORT_MONTH_NAMES[locale];
|
||
const allMonths = [...months, ...shortMonthsList];
|
||
const monthPattern = allMonths.join('|');
|
||
|
||
// Ordinal patterns per locale
|
||
const ordinalPatterns: Record<ParserLocale, RegExp> = {
|
||
de: new RegExp(`\\b(\\d{1,2})\\.\\s*(${monthPattern})\\b`, 'iu'),
|
||
en: new RegExp(`\\b(\\d{1,2})(?:st|nd|rd|th)?\\s+(?:of\\s+)?(${monthPattern})\\b`, 'iu'),
|
||
fr: new RegExp(`\\b(?:le\\s+)?(\\d{1,2})(?:er|e|ème)?\\s+(${monthPattern})\\b`, 'iu'),
|
||
es: new RegExp(`\\b(?:el\\s+)?(\\d{1,2})\\s+(?:de\\s+)?(${monthPattern})\\b`, 'iu'),
|
||
it: new RegExp(`\\b(?:il\\s+)?(\\d{1,2})\\s+(${monthPattern})\\b`, 'iu'),
|
||
};
|
||
|
||
const ordinalMatch = remaining.match(ordinalPatterns[locale]);
|
||
if (ordinalMatch) {
|
||
const day = parseInt(ordinalMatch[1], 10);
|
||
const monthStr = ordinalMatch[2].toLowerCase();
|
||
let monthIndex = months.findIndex((m) => m.toLowerCase() === monthStr);
|
||
if (monthIndex === -1) {
|
||
monthIndex = shortMonthsList.findIndex((m) => m.toLowerCase() === monthStr);
|
||
}
|
||
if (monthIndex >= 0 && day >= 1 && day <= 31) {
|
||
const now = new Date();
|
||
let year = now.getFullYear();
|
||
const candidate = new Date(year, monthIndex, day);
|
||
if (isBefore(candidate, now)) year++;
|
||
remaining = remaining.replace(ordinalPatterns[locale], '').trim();
|
||
return { value: new Date(year, monthIndex, day), remaining };
|
||
}
|
||
}
|
||
|
||
// Try specific date (DD.MM. or MM/DD)
|
||
const { pattern: specificPattern, parse: parseSpecific } = getSpecificDatePattern(locale);
|
||
const specificDateMatch = remaining.match(specificPattern);
|
||
if (specificDateMatch) {
|
||
const date = parseSpecific(specificDateMatch);
|
||
remaining = remaining.replace(specificPattern, '').trim();
|
||
return { value: date, remaining };
|
||
}
|
||
|
||
// Try relative date patterns (exact match)
|
||
const patterns = getDatePatterns(locale);
|
||
for (const { pattern, getDate } of patterns) {
|
||
if (pattern.test(remaining)) {
|
||
const date = getDate();
|
||
remaining = remaining.replace(pattern, '').trim();
|
||
return { value: date, remaining };
|
||
}
|
||
}
|
||
|
||
// Fuzzy match: try each word against known date keywords
|
||
const words = remaining.split(/\s+/);
|
||
for (const word of words) {
|
||
if (word.length < 3) continue; // Skip very short words
|
||
const matched = fuzzyMatchDateKeyword(word, locale);
|
||
if (matched) {
|
||
// Re-run extraction with the corrected keyword
|
||
const corrected = remaining.replace(
|
||
new RegExp(word.replace(/[.*+?^${}()|[\]\\]/g, '\\$&'), 'i'),
|
||
matched
|
||
);
|
||
const retryResult = extractDate(corrected, locale);
|
||
if (retryResult.value) {
|
||
return retryResult;
|
||
}
|
||
}
|
||
}
|
||
|
||
return { value: undefined, remaining };
|
||
}
|
||
|
||
// ============================================================================
|
||
// Date Range Extraction
|
||
// ============================================================================
|
||
|
||
export interface DateRange {
|
||
start: Date;
|
||
end: Date;
|
||
}
|
||
|
||
// "15.-17. März", "15-17 March", "Mon-Fri"
|
||
const EU_DATE_RANGE_PATTERN = /\b(\d{1,2})\.\s*[-–]\s*(\d{1,2})\.\s*(\d{1,2})?\.\s*/;
|
||
|
||
/**
|
||
* Extract a date range (e.g., "15.-17.3.", "15.-17. März")
|
||
*/
|
||
export function extractDateRange(
|
||
text: string,
|
||
locale: ParserLocale = 'de'
|
||
): ExtractResult<DateRange> {
|
||
// Try "DD.-DD.MM." or "DD.-DD. MonthName"
|
||
const months = MONTH_NAMES[locale];
|
||
const monthPattern = months.join('|');
|
||
|
||
// "15.-17. März" / "15-17 March"
|
||
const withMonthName = new RegExp(
|
||
`\\b(\\d{1,2})\\.?\\s*[-–]\\s*(\\d{1,2})\\.?\\s+(${monthPattern})\\b`,
|
||
'iu'
|
||
);
|
||
const match = text.match(withMonthName);
|
||
if (match) {
|
||
const startDay = parseInt(match[1]);
|
||
const endDay = parseInt(match[2]);
|
||
const monthStr = match[3].toLowerCase();
|
||
const monthIndex = months.findIndex((m) => m.toLowerCase() === monthStr);
|
||
if (monthIndex >= 0 && startDay >= 1 && endDay >= 1) {
|
||
const year = new Date().getFullYear();
|
||
return {
|
||
value: {
|
||
start: new Date(year, monthIndex, startDay),
|
||
end: new Date(year, monthIndex, endDay),
|
||
},
|
||
remaining: text.replace(withMonthName, '').trim(),
|
||
};
|
||
}
|
||
}
|
||
|
||
// "15.-17.3." (EU numeric format)
|
||
const euMatch = text.match(EU_DATE_RANGE_PATTERN);
|
||
if (euMatch && euMatch[3]) {
|
||
const startDay = parseInt(euMatch[1]);
|
||
const endDay = parseInt(euMatch[2]);
|
||
const month = parseInt(euMatch[3]) - 1;
|
||
const year = new Date().getFullYear();
|
||
if (startDay >= 1 && endDay >= 1 && month >= 0 && month <= 11) {
|
||
return {
|
||
value: {
|
||
start: new Date(year, month, startDay),
|
||
end: new Date(year, month, endDay),
|
||
},
|
||
remaining: text.replace(EU_DATE_RANGE_PATTERN, '').trim(),
|
||
};
|
||
}
|
||
}
|
||
|
||
return { value: undefined, remaining: text };
|
||
}
|
||
|
||
// ============================================================================
|
||
// Relative Time Extraction ("in 2 Stunden", "in 30 Minuten")
|
||
// ============================================================================
|
||
|
||
interface RelativeTimePattern {
|
||
pattern: RegExp;
|
||
getDate: (match: RegExpMatchArray) => Date;
|
||
}
|
||
|
||
const RELATIVE_TIME_PATTERNS: Record<ParserLocale, RelativeTimePattern[]> = {
|
||
de: [
|
||
{ pattern: /\bin\s+einer?\s+halben\s+stunde\b/i, getDate: () => addMinutes(new Date(), 30) },
|
||
{
|
||
pattern: /\bin\s+(\d+)\s+stunde[n]?\b/i,
|
||
getDate: (m) => addHours(new Date(), parseInt(m[1])),
|
||
},
|
||
{
|
||
pattern: /\bin\s+(\d+)\s+minute[n]?\b/i,
|
||
getDate: (m) => addMinutes(new Date(), parseInt(m[1])),
|
||
},
|
||
],
|
||
en: [
|
||
{ pattern: /\bin\s+half\s+an?\s+hour\b/i, getDate: () => addMinutes(new Date(), 30) },
|
||
{
|
||
pattern: /\bin\s+(\d+)\s+hours?\b/i,
|
||
getDate: (m) => addHours(new Date(), parseInt(m[1])),
|
||
},
|
||
{
|
||
pattern: /\bin\s+(\d+)\s+minutes?\b/i,
|
||
getDate: (m) => addMinutes(new Date(), parseInt(m[1])),
|
||
},
|
||
],
|
||
fr: [
|
||
{ pattern: /\bdans\s+une?\s+demi[e]?\s+heure\b/i, getDate: () => addMinutes(new Date(), 30) },
|
||
{
|
||
pattern: /\bdans\s+(\d+)\s+heures?\b/i,
|
||
getDate: (m) => addHours(new Date(), parseInt(m[1])),
|
||
},
|
||
{
|
||
pattern: /\bdans\s+(\d+)\s+minutes?\b/i,
|
||
getDate: (m) => addMinutes(new Date(), parseInt(m[1])),
|
||
},
|
||
],
|
||
es: [
|
||
{ pattern: /\ben\s+media\s+hora\b/i, getDate: () => addMinutes(new Date(), 30) },
|
||
{
|
||
pattern: /\ben\s+(\d+)\s+horas?\b/i,
|
||
getDate: (m) => addHours(new Date(), parseInt(m[1])),
|
||
},
|
||
{
|
||
pattern: /\ben\s+(\d+)\s+minutos?\b/i,
|
||
getDate: (m) => addMinutes(new Date(), parseInt(m[1])),
|
||
},
|
||
],
|
||
it: [
|
||
{ pattern: /\btra\s+mezz'?ora\b/i, getDate: () => addMinutes(new Date(), 30) },
|
||
{
|
||
pattern: /\btra\s+(\d+)\s+or[ea]\b/i,
|
||
getDate: (m) => addHours(new Date(), parseInt(m[1])),
|
||
},
|
||
{
|
||
pattern: /\btra\s+(\d+)\s+minut[io]\b/i,
|
||
getDate: (m) => addMinutes(new Date(), parseInt(m[1])),
|
||
},
|
||
],
|
||
};
|
||
|
||
/**
|
||
* Extract relative time expressions ("in 2 hours", "in 30 minutes")
|
||
* Returns a full Date since relative time implies date + time
|
||
*/
|
||
export function extractRelativeTime(
|
||
text: string,
|
||
locale: ParserLocale = 'de'
|
||
): ExtractResult<Date> {
|
||
const patterns = RELATIVE_TIME_PATTERNS[locale];
|
||
for (const { pattern, getDate } of patterns) {
|
||
const match = text.match(pattern);
|
||
if (match) {
|
||
return {
|
||
value: getDate(match),
|
||
remaining: text.replace(pattern, '').trim(),
|
||
};
|
||
}
|
||
}
|
||
return { value: undefined, remaining: text };
|
||
}
|
||
|
||
// ============================================================================
|
||
// Time Extraction
|
||
// ============================================================================
|
||
|
||
/**
|
||
* Extract time from text
|
||
*/
|
||
export function extractTime(
|
||
text: string,
|
||
locale: ParserLocale = 'de'
|
||
): ExtractResult<{ hours: number; minutes: number }> {
|
||
const timePattern = TIME_PATTERNS[locale];
|
||
const match = text.match(timePattern);
|
||
|
||
if (match) {
|
||
let hours = parseInt(match[1], 10);
|
||
const minutes = match[2] ? parseInt(match[2], 10) : 0;
|
||
|
||
// Handle AM/PM for English
|
||
if (locale === 'en') {
|
||
const fullMatch = match[0].toLowerCase();
|
||
if (fullMatch.includes('pm') && hours < 12) hours += 12;
|
||
if (fullMatch.includes('am') && hours === 12) hours = 0;
|
||
}
|
||
|
||
// Validate time
|
||
if (hours >= 0 && hours <= 23 && minutes >= 0 && minutes <= 59) {
|
||
const remaining = text.replace(timePattern, '').trim();
|
||
return { value: { hours, minutes }, remaining };
|
||
}
|
||
}
|
||
|
||
return { value: undefined, remaining: text };
|
||
}
|
||
|
||
// ============================================================================
|
||
// Timezone Extraction
|
||
// ============================================================================
|
||
|
||
// Common timezone abbreviations mapped to IANA timezone identifiers
|
||
const TIMEZONE_MAP: Record<string, string> = {
|
||
// European
|
||
CET: 'Europe/Berlin',
|
||
CEST: 'Europe/Berlin',
|
||
MET: 'Europe/Berlin',
|
||
MEST: 'Europe/Berlin',
|
||
WET: 'Europe/London',
|
||
WEST: 'Europe/London',
|
||
EET: 'Europe/Athens',
|
||
EEST: 'Europe/Athens',
|
||
GMT: 'Europe/London',
|
||
// US
|
||
EST: 'America/New_York',
|
||
EDT: 'America/New_York',
|
||
CST: 'America/Chicago',
|
||
CDT: 'America/Chicago',
|
||
MST: 'America/Denver',
|
||
MDT: 'America/Denver',
|
||
PST: 'America/Los_Angeles',
|
||
PDT: 'America/Los_Angeles',
|
||
// Asia/Pacific
|
||
JST: 'Asia/Tokyo',
|
||
KST: 'Asia/Seoul',
|
||
IST: 'Asia/Kolkata',
|
||
AEST: 'Australia/Sydney',
|
||
// Universal
|
||
UTC: 'UTC',
|
||
};
|
||
|
||
const TIMEZONE_ABBREVS = Object.keys(TIMEZONE_MAP).join('|');
|
||
const TIMEZONE_PATTERN = new RegExp(`\\b(${TIMEZONE_ABBREVS})\\b`);
|
||
|
||
/**
|
||
* Extract timezone abbreviation from text
|
||
* Returns the IANA timezone identifier
|
||
*/
|
||
export function extractTimezone(text: string): ExtractResult<string> {
|
||
const match = text.match(TIMEZONE_PATTERN);
|
||
if (match) {
|
||
const tz = TIMEZONE_MAP[match[1].toUpperCase()];
|
||
if (tz) {
|
||
return {
|
||
value: tz,
|
||
remaining: text.replace(TIMEZONE_PATTERN, '').trim(),
|
||
};
|
||
}
|
||
}
|
||
return { value: undefined, remaining: text };
|
||
}
|
||
|
||
// ============================================================================
|
||
// Tag Extraction
|
||
// ============================================================================
|
||
|
||
/**
|
||
* Extract tags (#tag1 #tag2) from text
|
||
*/
|
||
export function extractTags(text: string): ExtractResult<string[]> {
|
||
const tags: string[] = [];
|
||
const tagRegex = /#(\S+)/g;
|
||
let match;
|
||
|
||
while ((match = tagRegex.exec(text)) !== null) {
|
||
tags.push(match[1]);
|
||
}
|
||
|
||
const remaining = text.replace(/#\S+/g, '').trim();
|
||
return { value: tags, remaining };
|
||
}
|
||
|
||
// ============================================================================
|
||
// @ Reference Extraction (Projects, Calendars, Companies)
|
||
// ============================================================================
|
||
|
||
/**
|
||
* Extract @reference from text (single)
|
||
*/
|
||
export function extractAtReference(text: string): ExtractResult<string> {
|
||
const match = text.match(/@(\S+)/);
|
||
|
||
if (match) {
|
||
const remaining = text.replace(/@\S+/, '').trim();
|
||
return { value: match[1], remaining };
|
||
}
|
||
|
||
return { value: undefined, remaining: text };
|
||
}
|
||
|
||
/**
|
||
* Extract all @references from text
|
||
*/
|
||
export function extractAtReferences(text: string): ExtractResult<string[]> {
|
||
const refs: string[] = [];
|
||
const refRegex = /@(\S+)/g;
|
||
let match;
|
||
|
||
while ((match = refRegex.exec(text)) !== null) {
|
||
refs.push(match[1]);
|
||
}
|
||
|
||
const remaining = text.replace(/@\S+/g, '').trim();
|
||
return { value: refs.length > 0 ? refs : undefined, remaining };
|
||
}
|
||
|
||
// ============================================================================
|
||
// Combined Date + Time
|
||
// ============================================================================
|
||
|
||
/**
|
||
* Combine date and time into a single Date object
|
||
*/
|
||
export function combineDateAndTime(
|
||
date?: Date,
|
||
time?: { hours: number; minutes: number }
|
||
): Date | undefined {
|
||
if (!date) return undefined;
|
||
|
||
if (time) {
|
||
return setHours(setMinutes(date, time.minutes), time.hours);
|
||
}
|
||
|
||
return date;
|
||
}
|
||
|
||
// ============================================================================
|
||
// Preview Formatting
|
||
// ============================================================================
|
||
|
||
/**
|
||
* Format date for preview display
|
||
*/
|
||
export function formatDatePreview(date: Date, locale: ParserLocale = 'de'): string {
|
||
const now = new Date();
|
||
const tomorrow = addDays(now, 1);
|
||
const words = PREVIEW_WORDS[locale];
|
||
|
||
if (date.toDateString() === now.toDateString()) {
|
||
return words.today;
|
||
}
|
||
if (date.toDateString() === tomorrow.toDateString()) {
|
||
return words.tomorrow;
|
||
}
|
||
|
||
return date.toLocaleDateString(words.locale, {
|
||
weekday: 'short',
|
||
day: 'numeric',
|
||
month: 'short',
|
||
});
|
||
}
|
||
|
||
/**
|
||
* Format time for preview display
|
||
*/
|
||
export function formatTimePreview(time: { hours: number; minutes: number }): string {
|
||
return `${time.hours.toString().padStart(2, '0')}:${time.minutes.toString().padStart(2, '0')}`;
|
||
}
|
||
|
||
/**
|
||
* Format date and time for preview
|
||
*/
|
||
export function formatDateTimePreview(
|
||
date?: Date,
|
||
time?: { hours: number; minutes: number },
|
||
locale: ParserLocale = 'de'
|
||
): string {
|
||
if (!date) return '';
|
||
|
||
let result = formatDatePreview(date, locale);
|
||
|
||
if (time) {
|
||
result += ` ${formatTimePreview(time)}`;
|
||
}
|
||
|
||
return result;
|
||
}
|
||
|
||
// ============================================================================
|
||
// Recurrence Extraction
|
||
// ============================================================================
|
||
|
||
interface RecurrencePattern {
|
||
pattern: RegExp;
|
||
rrule: string;
|
||
}
|
||
|
||
const RECURRENCE_PATTERNS: Record<ParserLocale, RecurrencePattern[]> = {
|
||
de: [
|
||
{ pattern: /\bjeden\s+tag\b/i, rrule: 'FREQ=DAILY' },
|
||
{ pattern: /\btäglich\b/i, rrule: 'FREQ=DAILY' },
|
||
{ pattern: /\bjede\s+woche\b/i, rrule: 'FREQ=WEEKLY' },
|
||
{ pattern: /\bwöchentlich\b/i, rrule: 'FREQ=WEEKLY' },
|
||
{ pattern: /\bjeden\s+monat\b/i, rrule: 'FREQ=MONTHLY' },
|
||
{ pattern: /\bmonatlich\b/i, rrule: 'FREQ=MONTHLY' },
|
||
{ pattern: /\bjedes\s+jahr\b/i, rrule: 'FREQ=YEARLY' },
|
||
{ pattern: /\bjährlich\b/i, rrule: 'FREQ=YEARLY' },
|
||
{ pattern: /\bjeden\s+montag\b/i, rrule: 'FREQ=WEEKLY;BYDAY=MO' },
|
||
{ pattern: /\bjeden\s+dienstag\b/i, rrule: 'FREQ=WEEKLY;BYDAY=TU' },
|
||
{ pattern: /\bjeden\s+mittwoch\b/i, rrule: 'FREQ=WEEKLY;BYDAY=WE' },
|
||
{ pattern: /\bjeden\s+donnerstag\b/i, rrule: 'FREQ=WEEKLY;BYDAY=TH' },
|
||
{ pattern: /\bjeden\s+freitag\b/i, rrule: 'FREQ=WEEKLY;BYDAY=FR' },
|
||
{ pattern: /\bjeden\s+samstag\b/i, rrule: 'FREQ=WEEKLY;BYDAY=SA' },
|
||
{ pattern: /\bjeden\s+sonntag\b/i, rrule: 'FREQ=WEEKLY;BYDAY=SU' },
|
||
{ pattern: /\balle\s+(\d+)\s+tage\b/i, rrule: 'FREQ=DAILY;INTERVAL=$1' },
|
||
{ pattern: /\balle\s+(\d+)\s+wochen\b/i, rrule: 'FREQ=WEEKLY;INTERVAL=$1' },
|
||
],
|
||
en: [
|
||
{ pattern: /\bevery\s+day\b/i, rrule: 'FREQ=DAILY' },
|
||
{ pattern: /\bdaily\b/i, rrule: 'FREQ=DAILY' },
|
||
{ pattern: /\bevery\s+week\b/i, rrule: 'FREQ=WEEKLY' },
|
||
{ pattern: /\bweekly\b/i, rrule: 'FREQ=WEEKLY' },
|
||
{ pattern: /\bevery\s+month\b/i, rrule: 'FREQ=MONTHLY' },
|
||
{ pattern: /\bmonthly\b/i, rrule: 'FREQ=MONTHLY' },
|
||
{ pattern: /\bevery\s+year\b/i, rrule: 'FREQ=YEARLY' },
|
||
{ pattern: /\byearly\b/i, rrule: 'FREQ=YEARLY' },
|
||
{ pattern: /\bevery\s+monday\b/i, rrule: 'FREQ=WEEKLY;BYDAY=MO' },
|
||
{ pattern: /\bevery\s+tuesday\b/i, rrule: 'FREQ=WEEKLY;BYDAY=TU' },
|
||
{ pattern: /\bevery\s+wednesday\b/i, rrule: 'FREQ=WEEKLY;BYDAY=WE' },
|
||
{ pattern: /\bevery\s+thursday\b/i, rrule: 'FREQ=WEEKLY;BYDAY=TH' },
|
||
{ pattern: /\bevery\s+friday\b/i, rrule: 'FREQ=WEEKLY;BYDAY=FR' },
|
||
{ pattern: /\bevery\s+saturday\b/i, rrule: 'FREQ=WEEKLY;BYDAY=SA' },
|
||
{ pattern: /\bevery\s+sunday\b/i, rrule: 'FREQ=WEEKLY;BYDAY=SU' },
|
||
{ pattern: /\bevery\s+(\d+)\s+days\b/i, rrule: 'FREQ=DAILY;INTERVAL=$1' },
|
||
{ pattern: /\bevery\s+(\d+)\s+weeks\b/i, rrule: 'FREQ=WEEKLY;INTERVAL=$1' },
|
||
],
|
||
fr: [
|
||
{ pattern: /\btous\s+les\s+jours\b/i, rrule: 'FREQ=DAILY' },
|
||
{ pattern: /\bquotidien\b/i, rrule: 'FREQ=DAILY' },
|
||
{ pattern: /\bchaque\s+semaine\b/i, rrule: 'FREQ=WEEKLY' },
|
||
{ pattern: /\bhebdomadaire\b/i, rrule: 'FREQ=WEEKLY' },
|
||
{ pattern: /\bchaque\s+mois\b/i, rrule: 'FREQ=MONTHLY' },
|
||
{ pattern: /\bmensuel\b/i, rrule: 'FREQ=MONTHLY' },
|
||
{ pattern: /\bchaque\s+année\b/i, rrule: 'FREQ=YEARLY' },
|
||
{ pattern: /\bannuel\b/i, rrule: 'FREQ=YEARLY' },
|
||
{ pattern: /\bchaque\s+lundi\b/i, rrule: 'FREQ=WEEKLY;BYDAY=MO' },
|
||
{ pattern: /\bchaque\s+mardi\b/i, rrule: 'FREQ=WEEKLY;BYDAY=TU' },
|
||
{ pattern: /\bchaque\s+mercredi\b/i, rrule: 'FREQ=WEEKLY;BYDAY=WE' },
|
||
{ pattern: /\bchaque\s+jeudi\b/i, rrule: 'FREQ=WEEKLY;BYDAY=TH' },
|
||
{ pattern: /\bchaque\s+vendredi\b/i, rrule: 'FREQ=WEEKLY;BYDAY=FR' },
|
||
],
|
||
es: [
|
||
{ pattern: /\btodos\s+los\s+d[ií]as\b/i, rrule: 'FREQ=DAILY' },
|
||
{ pattern: /\bdiario\b/i, rrule: 'FREQ=DAILY' },
|
||
{ pattern: /\bcada\s+semana\b/i, rrule: 'FREQ=WEEKLY' },
|
||
{ pattern: /\bsemanal\b/i, rrule: 'FREQ=WEEKLY' },
|
||
{ pattern: /\bcada\s+mes\b/i, rrule: 'FREQ=MONTHLY' },
|
||
{ pattern: /\bmensual\b/i, rrule: 'FREQ=MONTHLY' },
|
||
{ pattern: /\bcada\s+año\b/i, rrule: 'FREQ=YEARLY' },
|
||
{ pattern: /\banual\b/i, rrule: 'FREQ=YEARLY' },
|
||
{ pattern: /\bcada\s+lunes\b/i, rrule: 'FREQ=WEEKLY;BYDAY=MO' },
|
||
],
|
||
it: [
|
||
{ pattern: /\bogni\s+giorno\b/i, rrule: 'FREQ=DAILY' },
|
||
{ pattern: /\bgiornaliero\b/i, rrule: 'FREQ=DAILY' },
|
||
{ pattern: /\bogni\s+settimana\b/i, rrule: 'FREQ=WEEKLY' },
|
||
{ pattern: /\bsettimanale\b/i, rrule: 'FREQ=WEEKLY' },
|
||
{ pattern: /\bogni\s+mese\b/i, rrule: 'FREQ=MONTHLY' },
|
||
{ pattern: /\bmensile\b/i, rrule: 'FREQ=MONTHLY' },
|
||
{ pattern: /\bogni\s+anno\b/i, rrule: 'FREQ=YEARLY' },
|
||
{ pattern: /\bannuale\b/i, rrule: 'FREQ=YEARLY' },
|
||
],
|
||
};
|
||
|
||
/**
|
||
* Extract recurrence rule from text, returns RFC 5545 RRULE string
|
||
*/
|
||
export function extractRecurrence(
|
||
text: string,
|
||
locale: ParserLocale = 'de'
|
||
): ExtractResult<string> {
|
||
const patterns = RECURRENCE_PATTERNS[locale];
|
||
for (const { pattern, rrule } of patterns) {
|
||
const match = text.match(pattern);
|
||
if (match) {
|
||
// Replace $1 with captured group if present
|
||
const resolvedRrule = match[1] ? rrule.replace('$1', match[1]) : rrule;
|
||
const remaining = text.replace(pattern, '').trim();
|
||
return { value: resolvedRrule, remaining };
|
||
}
|
||
}
|
||
return { value: undefined, remaining: text };
|
||
}
|
||
|
||
// ============================================================================
|
||
// Main Parser Function
|
||
// ============================================================================
|
||
|
||
/**
|
||
* Parse base input - extracts common patterns (date, time, tags, @reference)
|
||
*
|
||
* App-specific parsers should call this first, then extract their own patterns.
|
||
*/
|
||
export function parseBaseInput(input: string, locale: ParserLocale = 'de'): BaseParsedInput {
|
||
let text = input.trim();
|
||
const rawInput = text;
|
||
|
||
// Extract tags first (they're clearly delimited)
|
||
const tagsResult = extractTags(text);
|
||
text = tagsResult.remaining;
|
||
const tagNames = tagsResult.value || [];
|
||
|
||
// Extract date
|
||
const dateResult = extractDate(text, locale);
|
||
text = dateResult.remaining;
|
||
const date = dateResult.value;
|
||
|
||
// Extract time
|
||
const timeResult = extractTime(text, locale);
|
||
text = timeResult.remaining;
|
||
const time = timeResult.value;
|
||
|
||
// If we got time but no date, assume today
|
||
const finalDate = time && !date ? new Date() : date;
|
||
|
||
// Clean up multiple spaces
|
||
const title = text.replace(/\s+/g, ' ').trim();
|
||
|
||
// Calculate confidence: how much was extracted vs raw input
|
||
const hasExtractions = !!(finalDate || time || tagNames.length > 0);
|
||
let confidence = 1.0;
|
||
if (!hasExtractions && title === rawInput) {
|
||
confidence = 0.5; // Nothing was extracted - ambiguous
|
||
} else if (hasExtractions) {
|
||
// Check if the remaining title still looks clean
|
||
confidence = title.length > 0 ? 1.0 : 0.8;
|
||
}
|
||
|
||
return {
|
||
title,
|
||
date: finalDate,
|
||
time,
|
||
tagNames,
|
||
rawInput,
|
||
confidence,
|
||
};
|
||
}
|
||
|
||
// ============================================================================
|
||
// Utility: Clean title from all patterns
|
||
// ============================================================================
|
||
|
||
/**
|
||
* Remove all recognized patterns from text to get clean title
|
||
*/
|
||
export function cleanTitle(text: string, locale: ParserLocale = 'de'): string {
|
||
let result = text;
|
||
|
||
// Remove tags
|
||
result = result.replace(/#\S+/g, '');
|
||
|
||
// Remove @references
|
||
result = result.replace(/@\S+/g, '');
|
||
|
||
// Remove "in X days"
|
||
result = result.replace(IN_DAYS_PATTERNS[locale], '');
|
||
|
||
// Remove specific dates
|
||
const { pattern: specificPattern } = getSpecificDatePattern(locale);
|
||
result = result.replace(specificPattern, '');
|
||
|
||
// Remove relative date patterns
|
||
for (const { pattern } of getDatePatterns(locale)) {
|
||
result = result.replace(pattern, '');
|
||
}
|
||
|
||
// Remove time
|
||
result = result.replace(TIME_PATTERNS[locale], '');
|
||
|
||
// Clean up
|
||
return result.replace(/\s+/g, ' ').trim();
|
||
}
|
||
|
||
// ============================================================================
|
||
// Parser Compose Helper
|
||
// ============================================================================
|
||
|
||
/**
|
||
* Extraction step definition for compose helper
|
||
*/
|
||
export interface ExtractionStep<T> {
|
||
/** Name of this extraction (used as key in result) */
|
||
name: string;
|
||
/** Extract function: takes text, returns value and remaining text */
|
||
extract: (text: string) => { value: T | undefined; remaining: string };
|
||
}
|
||
|
||
/**
|
||
* Create an app-specific parser from a list of extraction steps.
|
||
* Runs base parser first (date, time, tags), then custom steps.
|
||
*
|
||
* @example
|
||
* ```ts
|
||
* const { parse } = createAppParser('de', [
|
||
* { name: 'priority', extract: extractPriority },
|
||
* { name: 'project', extract: (t) => extractAtReference(t) },
|
||
* ]);
|
||
* const result = parse('Task morgen @Arbeit !!!');
|
||
* // result.base = { title, date, time, tagNames, ... }
|
||
* // result.extractions = { priority: 'urgent', project: 'Arbeit' }
|
||
* ```
|
||
*/
|
||
export function createAppParser<T extends Record<string, unknown>>(
|
||
locale: ParserLocale,
|
||
steps: ExtractionStep<unknown>[]
|
||
): {
|
||
parse: (input: string) => { base: BaseParsedInput; extractions: T };
|
||
} {
|
||
return {
|
||
parse(input: string) {
|
||
let text = input.trim();
|
||
const extractions: Record<string, unknown> = {};
|
||
|
||
// Run custom extraction steps first (before base parser)
|
||
for (const step of steps) {
|
||
const result = step.extract(text);
|
||
extractions[step.name] = result.value;
|
||
text = result.remaining;
|
||
}
|
||
|
||
// Run base parser on remaining text
|
||
const base = parseBaseInput(text, locale);
|
||
|
||
return { base, extractions: extractions as T };
|
||
},
|
||
};
|
||
}
|