mirror of
https://github.com/lukaszraczylo/cloudflare-crawl-mcp.git
synced 2026-06-06 22:53:40 +00:00
135 lines
5.4 KiB
JavaScript
135 lines
5.4 KiB
JavaScript
import { describe, it, expect, beforeAll } from 'vitest';
|
|
const API_BASE = "https://api.cloudflare.com/client/v4";
|
|
async function initiateCrawl(accountId, apiToken, options) {
|
|
const response = await fetch(`${API_BASE}/accounts/${accountId}/browser-rendering/crawl`, {
|
|
method: "POST",
|
|
headers: {
|
|
Authorization: `Bearer ${apiToken}`,
|
|
"Content-Type": "application/json",
|
|
},
|
|
body: JSON.stringify({
|
|
url: options.url,
|
|
limit: options.limit ?? 10,
|
|
depth: options.depth ?? 1,
|
|
formats: options.formats ?? ["markdown"],
|
|
render: options.render ?? true,
|
|
options: options.options ?? {},
|
|
}),
|
|
});
|
|
if (!response.ok) {
|
|
const error = await response.text();
|
|
throw new Error(`Failed to initiate crawl: ${response.status} ${error}`);
|
|
}
|
|
const data = await response.json();
|
|
if (!data.success) {
|
|
throw new Error(`Crawl initiation failed: ${JSON.stringify(data.errors)}`);
|
|
}
|
|
return data.result.id;
|
|
}
|
|
async function waitForCrawl(accountId, apiToken, jobId, maxAttempts = 60, delayMs = 5000) {
|
|
for (let i = 0; i < maxAttempts; i++) {
|
|
const response = await fetch(`${API_BASE}/accounts/${accountId}/browser-rendering/crawl/${jobId}?limit=1`, {
|
|
headers: {
|
|
Authorization: `Bearer ${apiToken}`,
|
|
},
|
|
});
|
|
if (!response.ok) {
|
|
const error = await response.text();
|
|
throw new Error(`Failed to check crawl status: ${response.status} ${error}`);
|
|
}
|
|
const data = await response.json();
|
|
const status = data.result.status;
|
|
if (status !== "running") {
|
|
return data.result;
|
|
}
|
|
await new Promise((resolve) => setTimeout(resolve, delayMs));
|
|
}
|
|
throw new Error("Crawl job did not complete within timeout");
|
|
}
|
|
function getEnv(key) {
|
|
const value = process.env[key];
|
|
if (!value) {
|
|
throw new Error(`Missing required environment variable: ${key}`);
|
|
}
|
|
return value;
|
|
}
|
|
describe('Integration: Cloudflare Crawl API', () => {
|
|
const apiToken = process.env.CF_API_TOKEN;
|
|
const accountId = process.env.CF_ACCOUNT_ID;
|
|
const hasCredentials = apiToken && accountId;
|
|
beforeAll(() => {
|
|
if (!hasCredentials) {
|
|
console.log('\n⚠️ Skipping integration tests - CF_API_TOKEN or CF_ACCOUNT_ID not set\n');
|
|
}
|
|
});
|
|
it.skipIf(!hasCredentials)('should crawl raczylo.com with multiple pages in markdown format', async () => {
|
|
const accountId = getEnv("CF_ACCOUNT_ID");
|
|
const apiToken = getEnv("CF_API_TOKEN");
|
|
try {
|
|
const jobId = await initiateCrawl(accountId, apiToken, {
|
|
url: "https://raczylo.com",
|
|
limit: 5,
|
|
depth: 2,
|
|
formats: ["markdown"],
|
|
});
|
|
console.log(` Started crawl job: ${jobId}`);
|
|
expect(jobId).toBeDefined();
|
|
expect(typeof jobId).toBe("string");
|
|
const result = await waitForCrawl(accountId, apiToken, jobId, 60, 5000);
|
|
console.log(` Crawl status: ${result.status}`);
|
|
console.log(` Total pages discovered: ${result.total}`);
|
|
console.log(` Pages finished: ${result.finished}`);
|
|
expect(result.status).toBe("completed");
|
|
expect(result.total).toBeGreaterThan(0);
|
|
expect(result.records).toBeDefined();
|
|
expect(Array.isArray(result.records)).toBe(true);
|
|
expect(result.records.length).toBeGreaterThan(0);
|
|
const completedRecords = result.records.filter((r) => r.status === "completed");
|
|
console.log(` Completed pages: ${completedRecords.length}`);
|
|
completedRecords.forEach((record, index) => {
|
|
expect(record.url).toBeDefined();
|
|
expect(record.markdown).toBeDefined();
|
|
expect(record.markdown.length).toBeGreaterThan(0);
|
|
console.log(` Page ${index + 1}: ${record.url} (${record.markdown.length} chars)`);
|
|
});
|
|
const firstRecord = result.records[0];
|
|
expect(firstRecord.markdown).toContain("#");
|
|
}
|
|
catch (error) {
|
|
if (error.message.includes("Rate limit")) {
|
|
console.log(" ⚠️ Skipped - Rate limit exceeded");
|
|
return;
|
|
}
|
|
throw error;
|
|
}
|
|
}, 360000);
|
|
});
|
|
describe('Environment Variable Validation', () => {
|
|
const testCases = [
|
|
{
|
|
name: 'CF_API_TOKEN is required',
|
|
envKey: 'CF_API_TOKEN',
|
|
expectedError: 'Missing required environment variable: CF_API_TOKEN',
|
|
},
|
|
{
|
|
name: 'CF_ACCOUNT_ID is required',
|
|
envKey: 'CF_ACCOUNT_ID',
|
|
expectedError: 'Missing required environment variable: CF_ACCOUNT_ID',
|
|
},
|
|
];
|
|
it.each(testCases)('$name', ({ envKey, expectedError }) => {
|
|
delete process.env[envKey];
|
|
expect(() => getEnv(envKey)).toThrow(expectedError);
|
|
});
|
|
it('should return value when CF_API_TOKEN is set', () => {
|
|
process.env.CF_API_TOKEN = 'test-token';
|
|
expect(getEnv('CF_API_TOKEN')).toBe('test-token');
|
|
delete process.env.CF_API_TOKEN;
|
|
});
|
|
it('should return value when CF_ACCOUNT_ID is set', () => {
|
|
process.env.CF_ACCOUNT_ID = 'test-account';
|
|
expect(getEnv('CF_ACCOUNT_ID')).toBe('test-account');
|
|
delete process.env.CF_ACCOUNT_ID;
|
|
});
|
|
});
|