package extract import ( "testing" "time" ) func TestCleanText(t *testing.T) { tests := []struct { name string input string want string }{ { name: "plain text unchanged", input: "Hello world", want: "Hello world", }, { name: "strips HTML tags", input: "

Hello world

", want: "Hello world", }, { name: "strips script tags and content", input: "BeforeAfter", want: "BeforeAfter", }, { name: "strips style tags and content", input: "BeforeAfter", want: "BeforeAfter", }, { name: "collapses whitespace", input: "Hello \n\t world", want: "Hello world", }, { name: "trims leading and trailing whitespace", input: " Hello world ", want: "Hello world", }, { name: "handles multiline script", input: "AB", want: "AB", }, { name: "empty string", input: "", want: "", }, { name: "only tags", input: "
", want: "", }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { got := cleanText(tt.input) if got != tt.want { t.Errorf("cleanText() = %q, want %q", got, tt.want) } }) } } func TestCountWords(t *testing.T) { tests := []struct { name string input string want int }{ {"empty string", "", 0}, {"single word", "hello", 1}, {"multiple words", "hello world foo bar", 4}, {"extra whitespace", " hello world ", 2}, {"tabs and newlines", "hello\tworld\nfoo", 3}, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { got := countWords(tt.input) if got != tt.want { t.Errorf("countWords() = %d, want %d", got, tt.want) } }) } } func TestFormatTime(t *testing.T) { tests := []struct { name string t *time.Time want string }{ {"nil time", nil, ""}, {"zero time", func() *time.Time { t := time.Time{}; return &t }(), ""}, { "valid time", func() *time.Time { t := time.Date(2024, 6, 15, 12, 30, 0, 0, time.UTC) return &t }(), "2024-06-15T12:30:00Z", }, } for _, tt := range tests { t.Run(tt.name, func(t2 *testing.T) { got := formatTime(tt.t) if got != tt.want { t2.Errorf("formatTime() = %q, want %q", got, tt.want) } }) } } func TestBuildCacheKey(t *testing.T) { tests := []struct { name string url string want string }{ {"simple URL", "https://example.com", "extract:https://example.com"}, {"URL with path", "https://example.com/path/to/page", "extract:https://example.com/path/to/page"}, {"empty string", "", "extract:"}, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { got := BuildCacheKey(tt.url) if got != tt.want { t.Errorf("BuildCacheKey() = %q, want %q", got, tt.want) } }) } }