import { extractUrls, htmlToText, decodeEntities, fetchUrlDirect } from '../src/features/web/webFetch'; describe('webFetch.extractUrls', () => { it('extracts http(s) URLs and strips trailing punctuation', () => { const urls = extractUrls('https://koritips.com 가서 내용 분석해줘.'); expect(urls).toEqual(['https://koritips.com']); }); it('handles multiple URLs with dedupe and max cap', () => { const text = 'A: https://a.com/x, B: https://b.com/y 그리고 다시 https://a.com/x 또 https://c.com'; const urls = extractUrls(text, 2); expect(urls).toEqual(['https://a.com/x', 'https://b.com/y']); }); it('ignores slash commands and empty input', () => { expect(extractUrls('/wikify https://a.com')).toEqual([]); expect(extractUrls('')).toEqual([]); expect(extractUrls('URL 없는 문장')).toEqual([]); }); it('strips Korean closing punctuation contamination', () => { expect(extractUrls('링크(https://a.com/page)를 봐줘')).toEqual(['https://a.com/page']); expect(extractUrls('「https://b.com」 분석')).toEqual(['https://b.com']); }); it('recognizes bare domains without scheme and prepends https://', () => { expect(extractUrls('koritips.com 가서 내용 분석해줘')).toEqual(['https://koritips.com']); expect(extractUrls('www.example.net/path/page 확인')).toEqual(['https://www.example.net/path/page']); expect(extractUrls('naver.co.kr 어때?')).toEqual(['https://naver.co.kr']); }); it('does not mistake filenames or emails for bare domains', () => { expect(extractUrls('utils.ts 와 package.json 수정해줘')).toEqual([]); expect(extractUrls('메일은 user@gmail.com 입니다')).toEqual([]); }); it('does not double-count a scheme URL as a bare domain', () => { expect(extractUrls('https://koritips.com 그리고 koritips.com')).toEqual(['https://koritips.com']); }); }); describe('webFetch.htmlToText', () => { it('strips scripts, styles, and tags while preserving block structure', () => { const html = `

제목입니다

첫 문단.

둘째 문단.

`; const text = htmlToText(html); expect(text).not.toContain('alert'); expect(text).not.toContain('color:red'); expect(text).toContain('제목입니다'); expect(text).toMatch(/첫 문단\.\n/); }); it('decodes common entities', () => { expect(decodeEntities('A & B <tag> "q" 's'  ')).toBe(`A & B "q" 's' `); expect(decodeEntities('김')).toBe('김'); }); it('collapses excessive whitespace', () => { const text = htmlToText('

a

\n\n\n\n

b

'); expect(text).not.toMatch(/\n{3,}/); }); }); describe('webFetch.fetchUrlDirect (no network)', () => { it('rejects non-http schemes without throwing', async () => { const r = await fetchUrlDirect('ftp://example.com'); expect(r.ok).toBe(false); expect(r.error).toContain('http'); }); it('returns honest failure for unreachable host (no throw)', async () => { const r = await fetchUrlDirect('http://127.0.0.1:1', { timeoutMs: 1500 }); expect(r.ok).toBe(false); expect(typeof r.error).toBe('string'); }, 10_000); });