Files
cloudflare-crawl-mcp/dist/integration.test.js
T
2026-03-11 19:24:29 +00:00

135 lines
5.4 KiB
JavaScript

import { describe, it, expect, beforeAll } from 'vitest';
const API_BASE = "https://api.cloudflare.com/client/v4";
async function initiateCrawl(accountId, apiToken, options) {
const response = await fetch(`${API_BASE}/accounts/${accountId}/browser-rendering/crawl`, {
method: "POST",
headers: {
Authorization: `Bearer ${apiToken}`,
"Content-Type": "application/json",
},
body: JSON.stringify({
url: options.url,
limit: options.limit ?? 10,
depth: options.depth ?? 1,
formats: options.formats ?? ["markdown"],
render: options.render ?? true,
options: options.options ?? {},
}),
});
if (!response.ok) {
const error = await response.text();
throw new Error(`Failed to initiate crawl: ${response.status} ${error}`);
}
const data = await response.json();
if (!data.success) {
throw new Error(`Crawl initiation failed: ${JSON.stringify(data.errors)}`);
}
return data.result.id;
}
async function waitForCrawl(accountId, apiToken, jobId, maxAttempts = 60, delayMs = 5000) {
for (let i = 0; i < maxAttempts; i++) {
const response = await fetch(`${API_BASE}/accounts/${accountId}/browser-rendering/crawl/${jobId}?limit=1`, {
headers: {
Authorization: `Bearer ${apiToken}`,
},
});
if (!response.ok) {
const error = await response.text();
throw new Error(`Failed to check crawl status: ${response.status} ${error}`);
}
const data = await response.json();
const status = data.result.status;
if (status !== "running") {
return data.result;
}
await new Promise((resolve) => setTimeout(resolve, delayMs));
}
throw new Error("Crawl job did not complete within timeout");
}
function getEnv(key) {
const value = process.env[key];
if (!value) {
throw new Error(`Missing required environment variable: ${key}`);
}
return value;
}
describe('Integration: Cloudflare Crawl API', () => {
const apiToken = process.env.CF_API_TOKEN;
const accountId = process.env.CF_ACCOUNT_ID;
const hasCredentials = apiToken && accountId;
beforeAll(() => {
if (!hasCredentials) {
console.log('\n⚠️ Skipping integration tests - CF_API_TOKEN or CF_ACCOUNT_ID not set\n');
}
});
it.skipIf(!hasCredentials)('should crawl raczylo.com with multiple pages in markdown format', async () => {
const accountId = getEnv("CF_ACCOUNT_ID");
const apiToken = getEnv("CF_API_TOKEN");
try {
const jobId = await initiateCrawl(accountId, apiToken, {
url: "https://raczylo.com",
limit: 5,
depth: 2,
formats: ["markdown"],
});
console.log(` Started crawl job: ${jobId}`);
expect(jobId).toBeDefined();
expect(typeof jobId).toBe("string");
const result = await waitForCrawl(accountId, apiToken, jobId, 60, 5000);
console.log(` Crawl status: ${result.status}`);
console.log(` Total pages discovered: ${result.total}`);
console.log(` Pages finished: ${result.finished}`);
expect(result.status).toBe("completed");
expect(result.total).toBeGreaterThan(0);
expect(result.records).toBeDefined();
expect(Array.isArray(result.records)).toBe(true);
expect(result.records.length).toBeGreaterThan(0);
const completedRecords = result.records.filter((r) => r.status === "completed");
console.log(` Completed pages: ${completedRecords.length}`);
completedRecords.forEach((record, index) => {
expect(record.url).toBeDefined();
expect(record.markdown).toBeDefined();
expect(record.markdown.length).toBeGreaterThan(0);
console.log(` Page ${index + 1}: ${record.url} (${record.markdown.length} chars)`);
});
const firstRecord = result.records[0];
expect(firstRecord.markdown).toContain("#");
}
catch (error) {
if (error.message.includes("Rate limit")) {
console.log(" ⚠️ Skipped - Rate limit exceeded");
return;
}
throw error;
}
}, 360000);
});
describe('Environment Variable Validation', () => {
const testCases = [
{
name: 'CF_API_TOKEN is required',
envKey: 'CF_API_TOKEN',
expectedError: 'Missing required environment variable: CF_API_TOKEN',
},
{
name: 'CF_ACCOUNT_ID is required',
envKey: 'CF_ACCOUNT_ID',
expectedError: 'Missing required environment variable: CF_ACCOUNT_ID',
},
];
it.each(testCases)('$name', ({ envKey, expectedError }) => {
delete process.env[envKey];
expect(() => getEnv(envKey)).toThrow(expectedError);
});
it('should return value when CF_API_TOKEN is set', () => {
process.env.CF_API_TOKEN = 'test-token';
expect(getEnv('CF_API_TOKEN')).toBe('test-token');
delete process.env.CF_API_TOKEN;
});
it('should return value when CF_ACCOUNT_ID is set', () => {
process.env.CF_ACCOUNT_ID = 'test-account';
expect(getEnv('CF_ACCOUNT_ID')).toBe('test-account');
delete process.env.CF_ACCOUNT_ID;
});
});