import { describe, it, expect, beforeEach, vi } from 'vitest'; const API_BASE = "https://api.cloudflare.com/client/v4"; function getEnv(key) { const value = process.env[key]; if (!value) { throw new Error(`Missing required environment variable: ${key}`); } return value; } async function initiateCrawl(accountId, apiToken, options) { const response = await fetch(`${API_BASE}/accounts/${accountId}/browser-rendering/crawl`, { method: "POST", headers: { Authorization: `Bearer ${apiToken}`, "Content-Type": "application/json", }, body: JSON.stringify({ url: options.url, limit: options.limit ?? 10, depth: options.depth ?? 1, formats: options.formats ?? ["markdown"], render: options.render ?? true, maxAge: options.maxAge, source: options.source ?? "all", options: options.options ?? {}, }), }); if (!response.ok) { const error = await response.text(); throw new Error(`Failed to initiate crawl: ${response.status} ${error}`); } const data = await response.json(); if (!data.success) { throw new Error(`Crawl initiation failed: ${JSON.stringify(data.errors)}`); } return data.result.id; } async function waitForCrawl(accountId, apiToken, jobId, maxAttempts = 120, delayMs = 5000) { for (let i = 0; i < maxAttempts; i++) { const response = await fetch(`${API_BASE}/accounts/${accountId}/browser-rendering/crawl/${jobId}?limit=1`, { headers: { Authorization: `Bearer ${apiToken}`, }, }); if (!response.ok) { const error = await response.text(); throw new Error(`Failed to check crawl status: ${response.status} ${error}`); } const data = await response.json(); const status = data.result.status; if (status !== "running") { return data.result; } await new Promise((resolve) => setTimeout(resolve, delayMs)); } throw new Error("Crawl job did not complete within timeout"); } function buildCrawlOptions(args, formats) { return { url: args.url, limit: args.limit, depth: args.depth, formats, render: args.render, options: { includeExternalLinks: args.includeExternalLinks, includeSubdomains: args.includeSubdomains, includePatterns: args.includePatterns, excludePatterns: args.excludePatterns, }, }; } function formatMarkdownResult(result) { const records = result.records || []; const completedRecords = records.filter((r) => r.status === "completed"); const content = completedRecords .map((record) => { const title = record.metadata?.title || record.url; return `## ${title}\n\nURL: ${record.url}\n\n${record.markdown || ""}\n\n---\n`; }) .join("\n"); return `Crawl completed: ${completedRecords.length} of ${result.total} pages crawled successfully.\n\n${content}`; } function formatHtmlResult(result) { const records = result.records || []; const completedRecords = records.filter((r) => r.status === "completed"); const content = completedRecords .map((record) => { const title = record.metadata?.title || record.url; return `
\n

${title}

\n

Source: ${record.url}

\n
${record.html || ""}
\n
\n`; }) .join("\n"); return `Crawl completed: ${completedRecords.length} of ${result.total} pages crawled successfully.\n\n${content}`; } function formatJsonResult(result) { const records = result.records || []; const completedRecords = records.filter((r) => r.status === "completed"); const jsonOutput = { summary: { total: result.total, completed: completedRecords.length, status: result.status, }, pages: completedRecords.map((record) => ({ url: record.url, title: record.metadata?.title, status: record.metadata?.status, markdown: record.markdown, html: record.html, json: record.json, })), }; return JSON.stringify(jsonOutput, null, 2); } function handleErrorResult(result, jobId) { const errorMessages = { errored: `Crawl job errored. Job ID: ${jobId}`, cancelled_due_to_timeout: `Crawl job cancelled due to timeout (7 days max). Job ID: ${jobId}`, cancelled_due_to_limits: `Crawl job cancelled due to account limits. Job ID: ${jobId}`, cancelled_by_user: `Crawl job was cancelled by user. Job ID: ${jobId}`, }; const message = errorMessages[result.status] || `Crawl job failed with status: ${result.status}. Job ID: ${jobId}`; return { content: [{ type: "text", text: message }], isError: true, }; } describe('getEnv', () => { const testCases = [ { name: 'returns value when env var exists', envKey: 'TEST_VAR', envValue: 'test-value', expected: 'test-value', }, { name: 'throws when env var is empty string', envKey: 'EMPTY_VAR', envValue: '', expectedError: 'Missing required environment variable: EMPTY_VAR', }, { name: 'throws when env var is undefined', envKey: 'UNDEFINED_VAR', envValue: undefined, expectedError: 'Missing required environment variable: UNDEFINED_VAR', }, ]; it.each(testCases)('$name', ({ envKey, envValue, expected, expectedError }) => { if (expectedError) { if (envValue === undefined) { delete process.env[envKey]; } else { process.env[envKey] = envValue; } expect(() => getEnv(envKey)).toThrow(expectedError); } else { process.env[envKey] = envValue; expect(getEnv(envKey)).toBe(expected); } }); }); describe('buildCrawlOptions', () => { const testCases = [ { name: 'builds options with markdown format', args: { url: 'https://example.com' }, formats: ['markdown'], expected: { url: 'https://example.com', limit: undefined, depth: undefined, formats: ['markdown'], render: undefined, options: { includeExternalLinks: undefined, includeSubdomains: undefined, includePatterns: undefined, excludePatterns: undefined, }, }, }, { name: 'builds options with all parameters', args: { url: 'https://example.com', limit: 50, depth: 2, includeSubdomains: true, includeExternalLinks: false, includePatterns: ['**/docs/**'], excludePatterns: ['**/archive/**'], render: true, }, formats: ['html'], expected: { url: 'https://example.com', limit: 50, depth: 2, formats: ['html'], render: true, options: { includeExternalLinks: false, includeSubdomains: true, includePatterns: ['**/docs/**'], excludePatterns: ['**/archive/**'], }, }, }, { name: 'builds options with json format', args: { url: 'https://api.example.com', limit: 100 }, formats: ['json'], expected: { url: 'https://api.example.com', limit: 100, formats: ['json'], depth: undefined, render: undefined, options: { includeExternalLinks: undefined, includeSubdomains: undefined, includePatterns: undefined, excludePatterns: undefined, }, }, }, { name: 'handles empty options object', args: { url: 'https://test.com' }, formats: ['markdown'], expected: { url: 'https://test.com', formats: ['markdown'], options: { includeExternalLinks: undefined, includeSubdomains: undefined, includePatterns: undefined, excludePatterns: undefined, }, }, }, ]; it.each(testCases)('$name', ({ args, formats, expected }) => { const result = buildCrawlOptions(args, formats); expect(result).toEqual(expected); }); }); describe('formatMarkdownResult', () => { const testCases = [ { name: 'formats single completed page', result: { total: 1, status: 'completed', records: [ { url: 'https://example.com', status: 'completed', markdown: '# Hello World', metadata: { title: 'Home Page', status: 200 }, }, ], }, expectedContains: ['## Home Page', '# Hello World', 'Crawl completed: 1 of 1'], }, { name: 'formats multiple completed pages', result: { total: 2, status: 'completed', records: [ { url: 'https://example.com', status: 'completed', markdown: '# Page 1', metadata: { title: 'Page One', status: 200 }, }, { url: 'https://example.com/about', status: 'completed', markdown: '# About Us', metadata: { title: 'About', status: 200 }, }, ], }, expectedContains: ['## Page One', '## About', 'Crawl completed: 2 of 2'], }, { name: 'handles missing markdown content', result: { total: 1, status: 'completed', records: [ { url: 'https://example.com', status: 'completed', markdown: '', metadata: { title: 'Test', status: 200 }, }, ], }, expectedContains: ['## Test', 'URL: https://example.com'], }, { name: 'uses url as title when metadata.title is missing', result: { total: 1, status: 'completed', records: [ { url: 'https://example.com/unnamed', status: 'completed', markdown: 'Content here', }, ], }, expectedContains: ['## https://example.com/unnamed', 'Content here'], }, { name: 'handles empty records array', result: { total: 0, status: 'completed', records: [], }, expectedContains: ['Crawl completed: 0 of 0'], }, { name: 'filters out non-completed records', result: { total: 3, status: 'completed', records: [ { url: 'https://example.com/1', status: 'completed', markdown: '# Done' }, { url: 'https://example.com/2', status: 'errored', markdown: '# Failed' }, { url: 'https://example.com/3', status: 'skipped' }, ], }, expectedContains: ['Crawl completed: 1 of 3', '# Done'], }, ]; it.each(testCases)('$name', ({ result, expectedContains }) => { const output = formatMarkdownResult(result); expectedContains.forEach((expected) => { expect(output).toContain(expected); }); }); }); describe('formatHtmlResult', () => { const testCases = [ { name: 'formats single completed page with HTML', result: { total: 1, status: 'completed', records: [ { url: 'https://example.com', status: 'completed', html: '

Hello World

', metadata: { title: 'Home Page', status: 200 }, }, ], }, expectedContains: ['

Home Page

', '

Hello World

', 'Crawl completed: 1 of 1'], }, { name: 'formats multiple completed pages', result: { total: 2, status: 'completed', records: [ { url: 'https://example.com', status: 'completed', html: '
Page 1
', metadata: { title: 'Page One', status: 200 }, }, { url: 'https://example.com/about', status: 'completed', html: '
About Us
', metadata: { title: 'About', status: 200 }, }, ], }, expectedContains: ['

Page One

', '

About

', 'Crawl completed: 2 of 2'], }, { name: 'handles missing HTML content', result: { total: 1, status: 'completed', records: [ { url: 'https://example.com', status: 'completed', html: '', metadata: { title: 'Test', status: 200 }, }, ], }, expectedContains: ['

Test

', ''], }, ]; it.each(testCases)('$name', ({ result, expectedContains }) => { const output = formatHtmlResult(result); expectedContains.forEach((expected) => { expect(output).toContain(expected); }); }); }); describe('formatJsonResult', () => { const testCases = [ { name: 'formats single completed page as JSON', result: { total: 1, status: 'completed', records: [ { url: 'https://example.com', status: 'completed', markdown: '# Hello', html: '

Hello

', json: { key: 'value' }, metadata: { title: 'Home', status: 200 }, }, ], }, }, { name: 'formats multiple completed pages as JSON', result: { total: 2, status: 'completed', records: [ { url: 'https://example.com/page1', status: 'completed', markdown: '# Page 1', }, { url: 'https://example.com/page2', status: 'completed', markdown: '# Page 2', }, ], }, }, { name: 'includes summary with correct counts', result: { total: 5, status: 'completed', records: [ { url: 'https://example.com/1', status: 'completed' }, { url: 'https://example.com/2', status: 'completed' }, { url: 'https://example.com/3', status: 'errored' }, { url: 'https://example.com/4', status: 'skipped' }, { url: 'https://example.com/5', status: 'completed' }, ], }, }, { name: 'handles empty records', result: { total: 0, status: 'completed', records: [], }, }, ]; it.each(testCases)('$name', ({ result }) => { const output = formatJsonResult(result); const parsed = JSON.parse(output); expect(parsed).toHaveProperty('summary'); expect(parsed).toHaveProperty('pages'); const completedCount = result.records.filter((r) => r.status === 'completed').length; expect(parsed.summary.completed).toBe(completedCount); expect(parsed.summary.total).toBe(result.total); expect(parsed.summary.status).toBe(result.status); }); }); describe('handleErrorResult', () => { const testCases = [ { name: 'handles errored status', result: { status: 'errored' }, jobId: 'test-job-123', expectedError: true, expectedContains: ['errored', 'test-job-123'], }, { name: 'handles cancelled_due_to_timeout status', result: { status: 'cancelled_due_to_timeout' }, jobId: 'job-456', expectedError: true, expectedContains: ['timeout', 'job-456'], }, { name: 'handles cancelled_due_to_limits status', result: { status: 'cancelled_due_to_limits' }, jobId: 'job-789', expectedError: true, expectedContains: ['limits', 'job-789'], }, { name: 'handles cancelled_by_user status', result: { status: 'cancelled_by_user' }, jobId: 'job-000', expectedError: true, expectedContains: ['cancelled by user', 'job-000'], }, { name: 'handles unknown status', result: { status: 'some_unknown_status' }, jobId: 'job-unknown', expectedError: true, expectedContains: ['some_unknown_status', 'job-unknown'], }, ]; it.each(testCases)('$name', ({ result, jobId, expectedError, expectedContains }) => { const output = handleErrorResult(result, jobId); expect(output.isError).toBe(expectedError); expectedContains.forEach((expected) => { expect(output.content[0].text).toContain(expected); }); }); }); describe('initiateCrawl', () => { beforeEach(() => { vi.stubGlobal('fetch', vi.fn()); }); const testCases = [ { name: 'initiates crawl successfully', accountId: 'acc-123', apiToken: 'token-abc', options: { url: 'https://example.com', formats: ['markdown'] }, mockResponse: { success: true, result: { id: 'job-123' } }, expectedJobId: 'job-123', }, { name: 'throws on HTTP error', accountId: 'acc-123', apiToken: 'token-abc', options: { url: 'https://example.com' }, mockResponse: null, mockStatus: 401, expectedError: 'Failed to initiate crawl: 401', }, { name: 'throws on API failure', accountId: 'acc-123', apiToken: 'token-abc', options: { url: 'https://example.com' }, mockResponse: { success: false, errors: [{ message: 'Invalid URL' }] }, expectedError: 'Crawl initiation failed', }, ]; it.each(testCases)('$name', async ({ accountId, apiToken, options, mockResponse, mockStatus, expectedJobId, expectedError }) => { const fetchMock = vi.mocked(fetch); if (expectedError) { if (mockStatus) { fetchMock.mockResolvedValueOnce(new Response('', { status: mockStatus })); } else { fetchMock.mockResolvedValueOnce(new Response(JSON.stringify(mockResponse), { status: 200, headers: { 'Content-Type': 'application/json' } })); } await expect(initiateCrawl(accountId, apiToken, options)).rejects.toThrow(expectedError); } else { fetchMock.mockResolvedValueOnce(new Response(JSON.stringify(mockResponse), { status: 200, headers: { 'Content-Type': 'application/json' } })); const result = await initiateCrawl(accountId, apiToken, options); expect(result).toBe(expectedJobId); } }); }); describe('waitForCrawl', () => { beforeEach(() => { vi.stubGlobal('fetch', vi.fn()); }); const testCases = [ { name: 'returns completed result immediately', accountId: 'acc-123', apiToken: 'token-abc', jobId: 'job-123', mockResponse: { result: { status: 'completed', total: 5, records: [] } }, expectedStatus: 'completed', }, { name: 'returns errored result', accountId: 'acc-123', apiToken: 'token-abc', jobId: 'job-123', mockResponse: { result: { status: 'errored', error: 'Something went wrong' } }, expectedStatus: 'errored', }, { name: 'returns cancelled_due_to_limits result', accountId: 'acc-123', apiToken: 'token-abc', jobId: 'job-123', mockResponse: { result: { status: 'cancelled_due_to_limits' } }, expectedStatus: 'cancelled_due_to_limits', }, ]; it.each(testCases)('$name', async ({ accountId, apiToken, jobId, mockResponse, expectedStatus }) => { const fetchMock = vi.mocked(fetch); fetchMock.mockResolvedValue(new Response(JSON.stringify(mockResponse), { status: 200, headers: { 'Content-Type': 'application/json' } })); const result = await waitForCrawl(accountId, apiToken, jobId, 1, 1); expect(result.status).toBe(expectedStatus); }); });