/** * Base Natural Language Parser * * Shared parsing utilities for date, time, and tags across all apps. * App-specific parsers (task-parser, event-parser, contact-parser) extend this. * * Supports locales: de, en, fr, es, it */ import { addDays, addWeeks, addHours, addMinutes, nextMonday, nextTuesday, nextWednesday, nextThursday, nextFriday, nextSaturday, nextSunday, setHours, setMinutes, isBefore, startOfWeek, } from 'date-fns'; export type ParserLocale = 'de' | 'en' | 'fr' | 'es' | 'it'; export interface BaseParsedInput { title: string; date?: Date; time?: { hours: number; minutes: number }; tagNames: string[]; rawInput: string; /** Confidence score 0-1. 1.0 = exact match, 0.8 = fuzzy, 0.5 = ambiguous */ confidence: number; } export interface ExtractResult { value: T | undefined; remaining: string; } // ============================================================================ // Locale-aware Pattern Definitions // ============================================================================ interface DatePattern { pattern: RegExp; getDate: (match?: RegExpMatchArray) => Date; } type DayFn = (date: Date) => Date; const NEXT_DAY_FNS: DayFn[] = [ nextMonday, nextTuesday, nextWednesday, nextThursday, nextFriday, nextSaturday, nextSunday, ]; // Weekday names per locale (Monday-Sunday order) const WEEKDAY_NAMES: Record = { de: ['montag', 'dienstag', 'mittwoch', 'donnerstag', 'freitag', 'samstag', 'sonntag'], en: ['monday', 'tuesday', 'wednesday', 'thursday', 'friday', 'saturday', 'sunday'], fr: ['lundi', 'mardi', 'mercredi', 'jeudi', 'vendredi', 'samedi', 'dimanche'], es: ['lunes', 'martes', 'miércoles', 'jueves', 'viernes', 'sábado', 'domingo'], it: ['lunedì', 'martedì', 'mercoledì', 'giovedì', 'venerdì', 'sabato', 'domenica'], }; // Relative date keywords per locale interface RelativeDateWords { today: string[]; tomorrow: string[]; dayAfterTomorrow: string[]; yesterday: string[]; dayBeforeYesterday: string[]; nextWeek: RegExp; weekAfterNext: RegExp; nextPrefix: RegExp; thisPrefix: RegExp; } const RELATIVE_DATE_WORDS: Record = { de: { today: ['heute'], tomorrow: ['morgen'], dayAfterTomorrow: ['übermorgen'], yesterday: ['gestern'], dayBeforeYesterday: ['vorgestern'], nextWeek: /(? = { de: /\bin\s*(\d+)\s*tage?n?\b/i, en: /\bin\s*(\d+)\s*days?\b/i, fr: /\bdans\s*(\d+)\s*jours?\b/i, es: /\ben\s*(\d+)\s*d[ií]as?\b/i, it: /\btra\s*(\d+)\s*giorni?\b/i, }; const IN_WEEKS_PATTERNS: Record = { de: /\bin\s*(\d+)\s*wochen?\b/i, en: /\bin\s*(\d+)\s*weeks?\b/i, fr: /\bdans\s*(\d+)\s*semaines?\b/i, es: /\ben\s*(\d+)\s*semanas?\b/i, it: /\btra\s*(\d+)\s*settimane?\b/i, }; // Month names per locale (January=0) const MONTH_NAMES: Record = { de: [ 'januar', 'februar', 'märz', 'april', 'mai', 'juni', 'juli', 'august', 'september', 'oktober', 'november', 'dezember', ], en: [ 'january', 'february', 'march', 'april', 'may', 'june', 'july', 'august', 'september', 'october', 'november', 'december', ], fr: [ 'janvier', 'février', 'mars', 'avril', 'mai', 'juin', 'juillet', 'août', 'septembre', 'octobre', 'novembre', 'décembre', ], es: [ 'enero', 'febrero', 'marzo', 'abril', 'mayo', 'junio', 'julio', 'agosto', 'septiembre', 'octubre', 'noviembre', 'diciembre', ], it: [ 'gennaio', 'febbraio', 'marzo', 'aprile', 'maggio', 'giugno', 'luglio', 'agosto', 'settembre', 'ottobre', 'novembre', 'dicembre', ], }; // Short month names (3 chars) const SHORT_MONTH_NAMES: Record = { de: ['jan', 'feb', 'mär', 'apr', 'mai', 'jun', 'jul', 'aug', 'sep', 'okt', 'nov', 'dez'], en: ['jan', 'feb', 'mar', 'apr', 'may', 'jun', 'jul', 'aug', 'sep', 'oct', 'nov', 'dec'], fr: ['jan', 'fév', 'mar', 'avr', 'mai', 'jun', 'jul', 'aoû', 'sep', 'oct', 'nov', 'déc'], es: ['ene', 'feb', 'mar', 'abr', 'may', 'jun', 'jul', 'ago', 'sep', 'oct', 'nov', 'dic'], it: ['gen', 'feb', 'mar', 'apr', 'mag', 'giu', 'lug', 'ago', 'set', 'ott', 'nov', 'dic'], }; // Time patterns per locale const TIME_PATTERNS: Record = { de: /\b(?:um\s*)?(\d{1,2})(?::(\d{2}))?\s*(?:uhr)?\b/i, en: /\b(?:at\s*)?(\d{1,2})(?::(\d{2}))?\s*(?:o'?clock|am|pm)?\b/i, fr: /\b(?:à\s*)?(\d{1,2})(?:[h:](\d{2}))?\s*(?:heures?)?\b/i, es: /\b(?:a\s*las?\s*)?(\d{1,2})(?::(\d{2}))?\s*(?:horas?)?\b/i, it: /\b(?:alle?\s*)?(\d{1,2})(?::(\d{2}))?\b/i, }; // Preview formatting words const PREVIEW_WORDS: Record = { de: { today: 'Heute', tomorrow: 'Morgen', locale: 'de-DE' }, en: { today: 'Today', tomorrow: 'Tomorrow', locale: 'en-US' }, fr: { today: "Aujourd'hui", tomorrow: 'Demain', locale: 'fr-FR' }, es: { today: 'Hoy', tomorrow: 'Mañana', locale: 'es-ES' }, it: { today: 'Oggi', tomorrow: 'Domani', locale: 'it-IT' }, }; // ============================================================================ // Fuzzy Matching Utilities // ============================================================================ /** * Simple Levenshtein distance (for short words only) */ function levenshtein(a: string, b: string): number { const m = a.length; const n = b.length; const dp: number[][] = Array.from({ length: m + 1 }, () => Array(n + 1).fill(0)); for (let i = 0; i <= m; i++) dp[i][0] = i; for (let j = 0; j <= n; j++) dp[0][j] = j; for (let i = 1; i <= m; i++) { for (let j = 1; j <= n; j++) { dp[i][j] = Math.min( dp[i - 1][j] + 1, dp[i][j - 1] + 1, dp[i - 1][j - 1] + (a[i - 1] === b[j - 1] ? 0 : 1) ); } } return dp[m][n]; } // Keywords that should support fuzzy matching (max distance 1-2 depending on length) const FUZZY_DATE_WORDS: Record = { de: [ 'heute', 'morgen', 'übermorgen', 'montag', 'dienstag', 'mittwoch', 'donnerstag', 'freitag', 'samstag', 'sonntag', ], en: [ 'today', 'tomorrow', 'monday', 'tuesday', 'wednesday', 'thursday', 'friday', 'saturday', 'sunday', ], fr: ['demain', 'lundi', 'mardi', 'mercredi', 'jeudi', 'vendredi', 'samedi', 'dimanche'], es: ['hoy', 'lunes', 'martes', 'jueves', 'viernes'], it: ['oggi', 'domani'], }; /** * Try fuzzy matching a word against known date keywords. * Returns the canonical keyword if a close match is found, undefined otherwise. * Max distance: 1 for words <= 5 chars, 2 for longer words. */ export function fuzzyMatchDateKeyword( word: string, locale: ParserLocale = 'de' ): string | undefined { const keywords = FUZZY_DATE_WORDS[locale]; if (!keywords) return undefined; const lower = word.toLowerCase(); const maxDist = lower.length <= 5 ? 1 : 2; let bestMatch: string | undefined; let bestDist = Infinity; for (const keyword of keywords) { // Skip if length difference is too large if (Math.abs(lower.length - keyword.length) > maxDist) continue; const dist = levenshtein(lower, keyword); if (dist <= maxDist && dist < bestDist) { bestDist = dist; bestMatch = keyword; } } return bestMatch; } // ============================================================================ // Pattern Builder // ============================================================================ // Word boundary that works with accented characters (lunedì, mañana, etc.) // Standard \b doesn't treat accented chars as word chars. // We use Unicode-aware regex with lookbehind/lookahead. function wb(word: string): string { // Use negative lookbehind/lookahead for word-like chars including accented ones return `(? new Date() }); } // Tomorrow for (const word of words.tomorrow) { patterns.push({ pattern: new RegExp(wb(word), 'iu'), getDate: () => addDays(new Date(), 1), }); } // Day after tomorrow for (const word of words.dayAfterTomorrow) { patterns.push({ pattern: new RegExp(wb(word), 'iu'), getDate: () => addDays(new Date(), 2), }); } // Yesterday for (const word of words.yesterday) { patterns.push({ pattern: new RegExp(wb(word), 'iu'), getDate: () => addDays(new Date(), -1), }); } // Day before yesterday for (const word of words.dayBeforeYesterday) { patterns.push({ pattern: new RegExp(wb(word), 'iu'), getDate: () => addDays(new Date(), -2), }); } // Week after next (must come before "next week") patterns.push({ pattern: words.weekAfterNext, getDate: () => addDays(new Date(), 14) }); // Next week patterns.push({ pattern: words.nextWeek, getDate: () => addDays(new Date(), 7) }); // "this " patterns - gets the day in the current week // If already past, still returns this week's day (for logging retroactively) for (let i = 0; i < weekdays.length; i++) { const day = weekdays[i]; const targetDayOfWeek = [1, 2, 3, 4, 5, 6, 0][i]; // Mon=1..Sun=0 patterns.push({ pattern: new RegExp(`${words.thisPrefix.source}${day}(?![\\p{L}\\p{N}])`, 'iu'), getDate: () => { const now = new Date(); const currentDay = now.getDay(); if (currentDay === targetDayOfWeek) return now; // If the target day is earlier in the week, use previous, otherwise next const thisWeekStart = startOfWeek(now, { weekStartsOn: 1 }); const diff = targetDayOfWeek === 0 ? 6 : targetDayOfWeek - 1; // days from Monday return addDays(thisWeekStart, diff); }, }); } // "next " patterns for (let i = 0; i < weekdays.length; i++) { const dayFn = NEXT_DAY_FNS[i]; const day = weekdays[i]; patterns.push({ pattern: new RegExp(`${words.nextPrefix.source}${day}(?![\\p{L}\\p{N}])`, 'iu'), getDate: () => dayFn(new Date()), }); } // Plain weekday names (implies "next") for (let i = 0; i < weekdays.length; i++) { const dayFn = NEXT_DAY_FNS[i]; const day = weekdays[i]; patterns.push({ pattern: new RegExp(wb(day), 'iu'), getDate: () => dayFn(new Date()), }); } // Month names: "im März", "in January", "en février" const months = MONTH_NAMES[locale]; const monthPrepositions: Record = { de: '(?:im|in)\\s+', en: '(?:in)\\s+', fr: '(?:en)\\s+', es: '(?:en)\\s+', it: '(?:in|a)\\s+', }; for (let i = 0; i < months.length; i++) { const monthIndex = i; const monthName = months[i]; // "im März" / "in January" patterns.push({ pattern: new RegExp(`\\b${monthPrepositions[locale]}${monthName}\\b`, 'iu'), getDate: () => { const now = new Date(); let year = now.getFullYear(); // If month already passed, use next year if (monthIndex < now.getMonth()) year++; return new Date(year, monthIndex, 1); }, }); } return patterns; } // Cache built patterns per locale const datePatternCache = new Map(); function getDatePatterns(locale: ParserLocale): DatePattern[] { let patterns = datePatternCache.get(locale); if (!patterns) { patterns = buildDatePatterns(locale); datePatternCache.set(locale, patterns); } return patterns; } // ============================================================================ // Specific date pattern (DD.MM. or DD.MM.YYYY or MM/DD/YYYY) // ============================================================================ // DD.MM. or DD.MM.YYYY (European) const EU_DATE_PATTERN = /\b(\d{1,2})\.(\d{1,2})\.?(\d{2,4})?\b/; // MM/DD/YYYY or MM/DD (US) const US_DATE_PATTERN = /\b(\d{1,2})\/(\d{1,2})(?:\/(\d{2,4}))?\b/; function getSpecificDatePattern(locale: ParserLocale): { pattern: RegExp; parse: (match: RegExpMatchArray) => Date; } { if (locale === 'en') { return { pattern: US_DATE_PATTERN, parse: (match) => { const month = parseInt(match[1], 10) - 1; const day = parseInt(match[2], 10); const year = match[3] ? parseInt(match[3], 10) < 100 ? 2000 + parseInt(match[3], 10) : parseInt(match[3], 10) : new Date().getFullYear(); return new Date(year, month, day); }, }; } // European format (DE, FR, ES, IT) return { pattern: EU_DATE_PATTERN, parse: (match) => { const day = parseInt(match[1], 10); const month = parseInt(match[2], 10) - 1; const year = match[3] ? parseInt(match[3], 10) < 100 ? 2000 + parseInt(match[3], 10) : parseInt(match[3], 10) : new Date().getFullYear(); return new Date(year, month, day); }, }; } // ============================================================================ // Date Extraction // ============================================================================ /** * Extract date from text */ export function extractDate(text: string, locale: ParserLocale = 'de'): ExtractResult { let remaining = text; // Try "in X weeks" pattern first (before "in X days" to avoid partial match) const inWeeksPattern = IN_WEEKS_PATTERNS[locale]; const inWeeksMatch = remaining.match(inWeeksPattern); if (inWeeksMatch) { const weeks = parseInt(inWeeksMatch[1], 10); const date = addWeeks(new Date(), weeks); remaining = remaining.replace(inWeeksPattern, '').trim(); return { value: date, remaining }; } // Try "in X days" pattern const inDaysPattern = IN_DAYS_PATTERNS[locale]; const inDaysMatch = remaining.match(inDaysPattern); if (inDaysMatch) { const days = parseInt(inDaysMatch[1], 10); const date = addDays(new Date(), days); remaining = remaining.replace(inDaysPattern, '').trim(); return { value: date, remaining }; } // Try ordinal + month: "5. März", "3rd of May", "le 5 mars" const months = MONTH_NAMES[locale]; const shortMonthsList = SHORT_MONTH_NAMES[locale]; const allMonths = [...months, ...shortMonthsList]; const monthPattern = allMonths.join('|'); // Ordinal patterns per locale const ordinalPatterns: Record = { de: new RegExp(`\\b(\\d{1,2})\\.\\s*(${monthPattern})\\b`, 'iu'), en: new RegExp(`\\b(\\d{1,2})(?:st|nd|rd|th)?\\s+(?:of\\s+)?(${monthPattern})\\b`, 'iu'), fr: new RegExp(`\\b(?:le\\s+)?(\\d{1,2})(?:er|e|ème)?\\s+(${monthPattern})\\b`, 'iu'), es: new RegExp(`\\b(?:el\\s+)?(\\d{1,2})\\s+(?:de\\s+)?(${monthPattern})\\b`, 'iu'), it: new RegExp(`\\b(?:il\\s+)?(\\d{1,2})\\s+(${monthPattern})\\b`, 'iu'), }; const ordinalMatch = remaining.match(ordinalPatterns[locale]); if (ordinalMatch) { const day = parseInt(ordinalMatch[1], 10); const monthStr = ordinalMatch[2].toLowerCase(); let monthIndex = months.findIndex((m) => m.toLowerCase() === monthStr); if (monthIndex === -1) { monthIndex = shortMonthsList.findIndex((m) => m.toLowerCase() === monthStr); } if (monthIndex >= 0 && day >= 1 && day <= 31) { const now = new Date(); let year = now.getFullYear(); const candidate = new Date(year, monthIndex, day); if (isBefore(candidate, now)) year++; remaining = remaining.replace(ordinalPatterns[locale], '').trim(); return { value: new Date(year, monthIndex, day), remaining }; } } // Try specific date (DD.MM. or MM/DD) const { pattern: specificPattern, parse: parseSpecific } = getSpecificDatePattern(locale); const specificDateMatch = remaining.match(specificPattern); if (specificDateMatch) { const date = parseSpecific(specificDateMatch); remaining = remaining.replace(specificPattern, '').trim(); return { value: date, remaining }; } // Try relative date patterns (exact match) const patterns = getDatePatterns(locale); for (const { pattern, getDate } of patterns) { if (pattern.test(remaining)) { const date = getDate(); remaining = remaining.replace(pattern, '').trim(); return { value: date, remaining }; } } // Fuzzy match: try each word against known date keywords const words = remaining.split(/\s+/); for (const word of words) { if (word.length < 3) continue; // Skip very short words const matched = fuzzyMatchDateKeyword(word, locale); if (matched) { // Re-run extraction with the corrected keyword const corrected = remaining.replace( new RegExp(word.replace(/[.*+?^${}()|[\]\\]/g, '\\$&'), 'i'), matched ); const retryResult = extractDate(corrected, locale); if (retryResult.value) { return retryResult; } } } return { value: undefined, remaining }; } // ============================================================================ // Date Range Extraction // ============================================================================ export interface DateRange { start: Date; end: Date; } // "15.-17. März", "15-17 March", "Mon-Fri" const EU_DATE_RANGE_PATTERN = /\b(\d{1,2})\.\s*[-–]\s*(\d{1,2})\.\s*(\d{1,2})?\.\s*/; /** * Extract a date range (e.g., "15.-17.3.", "15.-17. März") */ export function extractDateRange( text: string, locale: ParserLocale = 'de' ): ExtractResult { // Try "DD.-DD.MM." or "DD.-DD. MonthName" const months = MONTH_NAMES[locale]; const monthPattern = months.join('|'); // "15.-17. März" / "15-17 March" const withMonthName = new RegExp( `\\b(\\d{1,2})\\.?\\s*[-–]\\s*(\\d{1,2})\\.?\\s+(${monthPattern})\\b`, 'iu' ); const match = text.match(withMonthName); if (match) { const startDay = parseInt(match[1]); const endDay = parseInt(match[2]); const monthStr = match[3].toLowerCase(); const monthIndex = months.findIndex((m) => m.toLowerCase() === monthStr); if (monthIndex >= 0 && startDay >= 1 && endDay >= 1) { const year = new Date().getFullYear(); return { value: { start: new Date(year, monthIndex, startDay), end: new Date(year, monthIndex, endDay), }, remaining: text.replace(withMonthName, '').trim(), }; } } // "15.-17.3." (EU numeric format) const euMatch = text.match(EU_DATE_RANGE_PATTERN); if (euMatch && euMatch[3]) { const startDay = parseInt(euMatch[1]); const endDay = parseInt(euMatch[2]); const month = parseInt(euMatch[3]) - 1; const year = new Date().getFullYear(); if (startDay >= 1 && endDay >= 1 && month >= 0 && month <= 11) { return { value: { start: new Date(year, month, startDay), end: new Date(year, month, endDay), }, remaining: text.replace(EU_DATE_RANGE_PATTERN, '').trim(), }; } } return { value: undefined, remaining: text }; } // ============================================================================ // Relative Time Extraction ("in 2 Stunden", "in 30 Minuten") // ============================================================================ interface RelativeTimePattern { pattern: RegExp; getDate: (match: RegExpMatchArray) => Date; } const RELATIVE_TIME_PATTERNS: Record = { de: [ { pattern: /\bin\s+einer?\s+halben\s+stunde\b/i, getDate: () => addMinutes(new Date(), 30) }, { pattern: /\bin\s+(\d+)\s+stunde[n]?\b/i, getDate: (m) => addHours(new Date(), parseInt(m[1])), }, { pattern: /\bin\s+(\d+)\s+minute[n]?\b/i, getDate: (m) => addMinutes(new Date(), parseInt(m[1])), }, ], en: [ { pattern: /\bin\s+half\s+an?\s+hour\b/i, getDate: () => addMinutes(new Date(), 30) }, { pattern: /\bin\s+(\d+)\s+hours?\b/i, getDate: (m) => addHours(new Date(), parseInt(m[1])), }, { pattern: /\bin\s+(\d+)\s+minutes?\b/i, getDate: (m) => addMinutes(new Date(), parseInt(m[1])), }, ], fr: [ { pattern: /\bdans\s+une?\s+demi[e]?\s+heure\b/i, getDate: () => addMinutes(new Date(), 30) }, { pattern: /\bdans\s+(\d+)\s+heures?\b/i, getDate: (m) => addHours(new Date(), parseInt(m[1])), }, { pattern: /\bdans\s+(\d+)\s+minutes?\b/i, getDate: (m) => addMinutes(new Date(), parseInt(m[1])), }, ], es: [ { pattern: /\ben\s+media\s+hora\b/i, getDate: () => addMinutes(new Date(), 30) }, { pattern: /\ben\s+(\d+)\s+horas?\b/i, getDate: (m) => addHours(new Date(), parseInt(m[1])), }, { pattern: /\ben\s+(\d+)\s+minutos?\b/i, getDate: (m) => addMinutes(new Date(), parseInt(m[1])), }, ], it: [ { pattern: /\btra\s+mezz'?ora\b/i, getDate: () => addMinutes(new Date(), 30) }, { pattern: /\btra\s+(\d+)\s+or[ea]\b/i, getDate: (m) => addHours(new Date(), parseInt(m[1])), }, { pattern: /\btra\s+(\d+)\s+minut[io]\b/i, getDate: (m) => addMinutes(new Date(), parseInt(m[1])), }, ], }; /** * Extract relative time expressions ("in 2 hours", "in 30 minutes") * Returns a full Date since relative time implies date + time */ export function extractRelativeTime( text: string, locale: ParserLocale = 'de' ): ExtractResult { const patterns = RELATIVE_TIME_PATTERNS[locale]; for (const { pattern, getDate } of patterns) { const match = text.match(pattern); if (match) { return { value: getDate(match), remaining: text.replace(pattern, '').trim(), }; } } return { value: undefined, remaining: text }; } // ============================================================================ // Time Extraction // ============================================================================ /** * Extract time from text */ export function extractTime( text: string, locale: ParserLocale = 'de' ): ExtractResult<{ hours: number; minutes: number }> { const timePattern = TIME_PATTERNS[locale]; const match = text.match(timePattern); if (match) { let hours = parseInt(match[1], 10); const minutes = match[2] ? parseInt(match[2], 10) : 0; // Handle AM/PM for English if (locale === 'en') { const fullMatch = match[0].toLowerCase(); if (fullMatch.includes('pm') && hours < 12) hours += 12; if (fullMatch.includes('am') && hours === 12) hours = 0; } // Validate time if (hours >= 0 && hours <= 23 && minutes >= 0 && minutes <= 59) { const remaining = text.replace(timePattern, '').trim(); return { value: { hours, minutes }, remaining }; } } return { value: undefined, remaining: text }; } // ============================================================================ // Timezone Extraction // ============================================================================ // Common timezone abbreviations mapped to IANA timezone identifiers const TIMEZONE_MAP: Record = { // European CET: 'Europe/Berlin', CEST: 'Europe/Berlin', MET: 'Europe/Berlin', MEST: 'Europe/Berlin', WET: 'Europe/London', WEST: 'Europe/London', EET: 'Europe/Athens', EEST: 'Europe/Athens', GMT: 'Europe/London', // US EST: 'America/New_York', EDT: 'America/New_York', CST: 'America/Chicago', CDT: 'America/Chicago', MST: 'America/Denver', MDT: 'America/Denver', PST: 'America/Los_Angeles', PDT: 'America/Los_Angeles', // Asia/Pacific JST: 'Asia/Tokyo', KST: 'Asia/Seoul', IST: 'Asia/Kolkata', AEST: 'Australia/Sydney', // Universal UTC: 'UTC', }; const TIMEZONE_ABBREVS = Object.keys(TIMEZONE_MAP).join('|'); const TIMEZONE_PATTERN = new RegExp(`\\b(${TIMEZONE_ABBREVS})\\b`); /** * Extract timezone abbreviation from text * Returns the IANA timezone identifier */ export function extractTimezone(text: string): ExtractResult { const match = text.match(TIMEZONE_PATTERN); if (match) { const tz = TIMEZONE_MAP[match[1].toUpperCase()]; if (tz) { return { value: tz, remaining: text.replace(TIMEZONE_PATTERN, '').trim(), }; } } return { value: undefined, remaining: text }; } // ============================================================================ // Tag Extraction // ============================================================================ /** * Extract tags (#tag1 #tag2) from text */ export function extractTags(text: string): ExtractResult { const tags: string[] = []; const tagRegex = /#(\S+)/g; let match; while ((match = tagRegex.exec(text)) !== null) { tags.push(match[1]); } const remaining = text.replace(/#\S+/g, '').trim(); return { value: tags, remaining }; } // ============================================================================ // @ Reference Extraction (Projects, Calendars, Companies) // ============================================================================ /** * Extract @reference from text (single) */ export function extractAtReference(text: string): ExtractResult { const match = text.match(/@(\S+)/); if (match) { const remaining = text.replace(/@\S+/, '').trim(); return { value: match[1], remaining }; } return { value: undefined, remaining: text }; } /** * Extract all @references from text */ export function extractAtReferences(text: string): ExtractResult { const refs: string[] = []; const refRegex = /@(\S+)/g; let match; while ((match = refRegex.exec(text)) !== null) { refs.push(match[1]); } const remaining = text.replace(/@\S+/g, '').trim(); return { value: refs.length > 0 ? refs : undefined, remaining }; } // ============================================================================ // Combined Date + Time // ============================================================================ /** * Combine date and time into a single Date object */ export function combineDateAndTime( date?: Date, time?: { hours: number; minutes: number } ): Date | undefined { if (!date) return undefined; if (time) { return setHours(setMinutes(date, time.minutes), time.hours); } return date; } // ============================================================================ // Preview Formatting // ============================================================================ /** * Format date for preview display */ export function formatDatePreview(date: Date, locale: ParserLocale = 'de'): string { const now = new Date(); const tomorrow = addDays(now, 1); const words = PREVIEW_WORDS[locale]; if (date.toDateString() === now.toDateString()) { return words.today; } if (date.toDateString() === tomorrow.toDateString()) { return words.tomorrow; } return date.toLocaleDateString(words.locale, { weekday: 'short', day: 'numeric', month: 'short', }); } /** * Format time for preview display */ export function formatTimePreview(time: { hours: number; minutes: number }): string { return `${time.hours.toString().padStart(2, '0')}:${time.minutes.toString().padStart(2, '0')}`; } /** * Format date and time for preview */ export function formatDateTimePreview( date?: Date, time?: { hours: number; minutes: number }, locale: ParserLocale = 'de' ): string { if (!date) return ''; let result = formatDatePreview(date, locale); if (time) { result += ` ${formatTimePreview(time)}`; } return result; } // ============================================================================ // Recurrence Extraction // ============================================================================ interface RecurrencePattern { pattern: RegExp; rrule: string; } const RECURRENCE_PATTERNS: Record = { de: [ { pattern: /\bjeden\s+tag\b/i, rrule: 'FREQ=DAILY' }, { pattern: /\btäglich\b/i, rrule: 'FREQ=DAILY' }, { pattern: /\bjede\s+woche\b/i, rrule: 'FREQ=WEEKLY' }, { pattern: /\bwöchentlich\b/i, rrule: 'FREQ=WEEKLY' }, { pattern: /\bjeden\s+monat\b/i, rrule: 'FREQ=MONTHLY' }, { pattern: /\bmonatlich\b/i, rrule: 'FREQ=MONTHLY' }, { pattern: /\bjedes\s+jahr\b/i, rrule: 'FREQ=YEARLY' }, { pattern: /\bjährlich\b/i, rrule: 'FREQ=YEARLY' }, { pattern: /\bjeden\s+montag\b/i, rrule: 'FREQ=WEEKLY;BYDAY=MO' }, { pattern: /\bjeden\s+dienstag\b/i, rrule: 'FREQ=WEEKLY;BYDAY=TU' }, { pattern: /\bjeden\s+mittwoch\b/i, rrule: 'FREQ=WEEKLY;BYDAY=WE' }, { pattern: /\bjeden\s+donnerstag\b/i, rrule: 'FREQ=WEEKLY;BYDAY=TH' }, { pattern: /\bjeden\s+freitag\b/i, rrule: 'FREQ=WEEKLY;BYDAY=FR' }, { pattern: /\bjeden\s+samstag\b/i, rrule: 'FREQ=WEEKLY;BYDAY=SA' }, { pattern: /\bjeden\s+sonntag\b/i, rrule: 'FREQ=WEEKLY;BYDAY=SU' }, { pattern: /\balle\s+(\d+)\s+tage\b/i, rrule: 'FREQ=DAILY;INTERVAL=$1' }, { pattern: /\balle\s+(\d+)\s+wochen\b/i, rrule: 'FREQ=WEEKLY;INTERVAL=$1' }, ], en: [ { pattern: /\bevery\s+day\b/i, rrule: 'FREQ=DAILY' }, { pattern: /\bdaily\b/i, rrule: 'FREQ=DAILY' }, { pattern: /\bevery\s+week\b/i, rrule: 'FREQ=WEEKLY' }, { pattern: /\bweekly\b/i, rrule: 'FREQ=WEEKLY' }, { pattern: /\bevery\s+month\b/i, rrule: 'FREQ=MONTHLY' }, { pattern: /\bmonthly\b/i, rrule: 'FREQ=MONTHLY' }, { pattern: /\bevery\s+year\b/i, rrule: 'FREQ=YEARLY' }, { pattern: /\byearly\b/i, rrule: 'FREQ=YEARLY' }, { pattern: /\bevery\s+monday\b/i, rrule: 'FREQ=WEEKLY;BYDAY=MO' }, { pattern: /\bevery\s+tuesday\b/i, rrule: 'FREQ=WEEKLY;BYDAY=TU' }, { pattern: /\bevery\s+wednesday\b/i, rrule: 'FREQ=WEEKLY;BYDAY=WE' }, { pattern: /\bevery\s+thursday\b/i, rrule: 'FREQ=WEEKLY;BYDAY=TH' }, { pattern: /\bevery\s+friday\b/i, rrule: 'FREQ=WEEKLY;BYDAY=FR' }, { pattern: /\bevery\s+saturday\b/i, rrule: 'FREQ=WEEKLY;BYDAY=SA' }, { pattern: /\bevery\s+sunday\b/i, rrule: 'FREQ=WEEKLY;BYDAY=SU' }, { pattern: /\bevery\s+(\d+)\s+days\b/i, rrule: 'FREQ=DAILY;INTERVAL=$1' }, { pattern: /\bevery\s+(\d+)\s+weeks\b/i, rrule: 'FREQ=WEEKLY;INTERVAL=$1' }, ], fr: [ { pattern: /\btous\s+les\s+jours\b/i, rrule: 'FREQ=DAILY' }, { pattern: /\bquotidien\b/i, rrule: 'FREQ=DAILY' }, { pattern: /\bchaque\s+semaine\b/i, rrule: 'FREQ=WEEKLY' }, { pattern: /\bhebdomadaire\b/i, rrule: 'FREQ=WEEKLY' }, { pattern: /\bchaque\s+mois\b/i, rrule: 'FREQ=MONTHLY' }, { pattern: /\bmensuel\b/i, rrule: 'FREQ=MONTHLY' }, { pattern: /\bchaque\s+année\b/i, rrule: 'FREQ=YEARLY' }, { pattern: /\bannuel\b/i, rrule: 'FREQ=YEARLY' }, { pattern: /\bchaque\s+lundi\b/i, rrule: 'FREQ=WEEKLY;BYDAY=MO' }, { pattern: /\bchaque\s+mardi\b/i, rrule: 'FREQ=WEEKLY;BYDAY=TU' }, { pattern: /\bchaque\s+mercredi\b/i, rrule: 'FREQ=WEEKLY;BYDAY=WE' }, { pattern: /\bchaque\s+jeudi\b/i, rrule: 'FREQ=WEEKLY;BYDAY=TH' }, { pattern: /\bchaque\s+vendredi\b/i, rrule: 'FREQ=WEEKLY;BYDAY=FR' }, ], es: [ { pattern: /\btodos\s+los\s+d[ií]as\b/i, rrule: 'FREQ=DAILY' }, { pattern: /\bdiario\b/i, rrule: 'FREQ=DAILY' }, { pattern: /\bcada\s+semana\b/i, rrule: 'FREQ=WEEKLY' }, { pattern: /\bsemanal\b/i, rrule: 'FREQ=WEEKLY' }, { pattern: /\bcada\s+mes\b/i, rrule: 'FREQ=MONTHLY' }, { pattern: /\bmensual\b/i, rrule: 'FREQ=MONTHLY' }, { pattern: /\bcada\s+año\b/i, rrule: 'FREQ=YEARLY' }, { pattern: /\banual\b/i, rrule: 'FREQ=YEARLY' }, { pattern: /\bcada\s+lunes\b/i, rrule: 'FREQ=WEEKLY;BYDAY=MO' }, ], it: [ { pattern: /\bogni\s+giorno\b/i, rrule: 'FREQ=DAILY' }, { pattern: /\bgiornaliero\b/i, rrule: 'FREQ=DAILY' }, { pattern: /\bogni\s+settimana\b/i, rrule: 'FREQ=WEEKLY' }, { pattern: /\bsettimanale\b/i, rrule: 'FREQ=WEEKLY' }, { pattern: /\bogni\s+mese\b/i, rrule: 'FREQ=MONTHLY' }, { pattern: /\bmensile\b/i, rrule: 'FREQ=MONTHLY' }, { pattern: /\bogni\s+anno\b/i, rrule: 'FREQ=YEARLY' }, { pattern: /\bannuale\b/i, rrule: 'FREQ=YEARLY' }, ], }; /** * Extract recurrence rule from text, returns RFC 5545 RRULE string */ export function extractRecurrence( text: string, locale: ParserLocale = 'de' ): ExtractResult { const patterns = RECURRENCE_PATTERNS[locale]; for (const { pattern, rrule } of patterns) { const match = text.match(pattern); if (match) { // Replace $1 with captured group if present const resolvedRrule = match[1] ? rrule.replace('$1', match[1]) : rrule; const remaining = text.replace(pattern, '').trim(); return { value: resolvedRrule, remaining }; } } return { value: undefined, remaining: text }; } // ============================================================================ // Main Parser Function // ============================================================================ /** * Parse base input - extracts common patterns (date, time, tags, @reference) * * App-specific parsers should call this first, then extract their own patterns. */ export function parseBaseInput(input: string, locale: ParserLocale = 'de'): BaseParsedInput { let text = input.trim(); const rawInput = text; // Extract tags first (they're clearly delimited) const tagsResult = extractTags(text); text = tagsResult.remaining; const tagNames = tagsResult.value || []; // Extract date const dateResult = extractDate(text, locale); text = dateResult.remaining; const date = dateResult.value; // Extract time const timeResult = extractTime(text, locale); text = timeResult.remaining; const time = timeResult.value; // If we got time but no date, assume today const finalDate = time && !date ? new Date() : date; // Clean up multiple spaces const title = text.replace(/\s+/g, ' ').trim(); // Calculate confidence: how much was extracted vs raw input const hasExtractions = !!(finalDate || time || tagNames.length > 0); let confidence = 1.0; if (!hasExtractions && title === rawInput) { confidence = 0.5; // Nothing was extracted - ambiguous } else if (hasExtractions) { // Check if the remaining title still looks clean confidence = title.length > 0 ? 1.0 : 0.8; } return { title, date: finalDate, time, tagNames, rawInput, confidence, }; } // ============================================================================ // Utility: Clean title from all patterns // ============================================================================ /** * Remove all recognized patterns from text to get clean title */ export function cleanTitle(text: string, locale: ParserLocale = 'de'): string { let result = text; // Remove tags result = result.replace(/#\S+/g, ''); // Remove @references result = result.replace(/@\S+/g, ''); // Remove "in X days" result = result.replace(IN_DAYS_PATTERNS[locale], ''); // Remove specific dates const { pattern: specificPattern } = getSpecificDatePattern(locale); result = result.replace(specificPattern, ''); // Remove relative date patterns for (const { pattern } of getDatePatterns(locale)) { result = result.replace(pattern, ''); } // Remove time result = result.replace(TIME_PATTERNS[locale], ''); // Clean up return result.replace(/\s+/g, ' ').trim(); } // ============================================================================ // Parser Compose Helper // ============================================================================ /** * Extraction step definition for compose helper */ export interface ExtractionStep { /** Name of this extraction (used as key in result) */ name: string; /** Extract function: takes text, returns value and remaining text */ extract: (text: string) => { value: T | undefined; remaining: string }; } /** * Create an app-specific parser from a list of extraction steps. * Runs base parser first (date, time, tags), then custom steps. * * @example * ```ts * const { parse } = createAppParser('de', [ * { name: 'priority', extract: extractPriority }, * { name: 'project', extract: (t) => extractAtReference(t) }, * ]); * const result = parse('Task morgen @Arbeit !!!'); * // result.base = { title, date, time, tagNames, ... } * // result.extractions = { priority: 'urgent', project: 'Arbeit' } * ``` */ export function createAppParser>( locale: ParserLocale, steps: ExtractionStep[] ): { parse: (input: string) => { base: BaseParsedInput; extractions: T }; } { return { parse(input: string) { let text = input.trim(); const extractions: Record = {}; // Run custom extraction steps first (before base parser) for (const step of steps) { const result = step.extract(text); extractions[step.name] = result.value; text = result.remaining; } // Run base parser on remaining text const base = parseBaseInput(text, locale); return { base, extractions: extractions as T }; }, }; }