Files
connectai/tests/agentEngine.test.ts
T

544 lines
22 KiB
TypeScript

/**
* AgentEngine Integration Tests & Performance Benchmarks
*
* 검증 대상:
* 1. ErrorClassifier — 오류 유형(Transient/Permanent/Abort) 자동 분류
* 2. ErrorRecoveryMatrix — 각 규칙이 의도한 대응 전략으로 매핑되는지 검증
* 3. resilientExecute — 지수 백오프 재시도 및 즉시 중단 흐름
* 4. MissionState — 감사 이력(Audit Trail) 및 구조화된 로그 포맷
* 5. Performance Benchmark — 미션 평균 처리 시간 및 재시도 오버헤드 측정
*/
import {
AgentEngine,
IAgent,
AgentExecuteOptions,
ErrorClassifier,
ErrorType,
ERROR_RECOVERY_MATRIX,
MissionState,
PipelineStage
} from '../src/lib/engine';
// ─── Mock Agents ───
class MockSuccessAgent implements IAgent {
public callCount = 0;
constructor(private readonly response: string = 'This is a valid mock response for testing purposes.') {}
async execute(input: string, context?: string, signal?: AbortSignal, options?: AgentExecuteOptions): Promise<string> {
this.callCount++;
return this.response;
}
}
class MockTransientAgent implements IAgent {
public callCount = 0;
constructor(private readonly failCount: number = 2) {}
async execute(input: string, context?: string, signal?: AbortSignal, options?: AgentExecuteOptions): Promise<string> {
this.callCount++;
if (this.callCount <= this.failCount) {
throw new Error('ECONNREFUSED: Connection refused');
}
return 'Recovery successful after transient failures.';
}
}
class MockPermanentAgent implements IAgent {
async execute(): Promise<string> {
throw new Error('404: model not found');
}
}
class MockTimeoutAgent implements IAgent {
async execute(): Promise<string> {
throw new Error('timeout: request took too long');
}
}
class MockNetworkAgent implements IAgent {
async execute(): Promise<string> {
throw new Error('Failed to fetch');
}
}
class MockAbortAgent implements IAgent {
async execute(): Promise<string> {
const err = new Error('AbortError');
err.name = 'AbortError';
throw err;
}
}
class MockSlowAgent implements IAgent {
constructor(private readonly delayMs: number = 100) {}
async execute(): Promise<string> {
await new Promise(r => setTimeout(r, this.delayMs));
return 'Slow but valid agent response for performance measurement.';
}
}
// ─── Helper ───
function createAbortSignal(): AbortSignal {
const controller = new AbortController();
return controller.signal;
}
const noopProgress = (_stage: PipelineStage, _message: string) => {};
// ═══════════════════════════════════════════════
// Test Suite 1: ErrorClassifier
// ═══════════════════════════════════════════════
describe('ErrorClassifier', () => {
describe('Transient Error Classification', () => {
const transientMessages = [
'ECONNREFUSED: Connection refused',
'Request timeout exceeded',
'ETIMEDOUT: operation timed out',
'ECONNRESET: connection reset by peer',
'network error occurred',
'Failed to fetch',
'HTTP 503: Service Unavailable',
'HTTP 502: Bad Gateway',
'HTTP 429: Too Many Requests',
'socket hang up',
];
test.each(transientMessages)('"%s" → TRANSIENT', (msg) => {
const result = ErrorClassifier.classify(new Error(msg));
expect(result.type).toBe(ErrorType.TRANSIENT);
expect(result.rule.action).toBe('retry');
expect(result.rule.maxRetries).toBe(3);
});
});
describe('Permanent Error Classification', () => {
const permanentMessages = [
'HTTP 401: Unauthorized',
'HTTP 403: Forbidden',
'HTTP 404: Not Found',
'Planner 에이전트로부터 유효한 응답을 받지 못했습니다',
'Ollama URL이 설정되지 않았습니다',
'invalid model name specified',
'model not found in registry',
];
test.each(permanentMessages)('"%s" → PERMANENT', (msg) => {
const result = ErrorClassifier.classify(new Error(msg));
expect(result.type).toBe(ErrorType.PERMANENT);
expect(result.rule.action).toBe('fail_with_message');
expect(result.rule.maxRetries).toBe(0);
});
});
describe('Abort Classification', () => {
test('AbortError by name → ABORT', () => {
const err = new Error('cancelled');
err.name = 'AbortError';
const result = ErrorClassifier.classify(err);
expect(result.type).toBe(ErrorType.ABORT);
expect(result.rule.action).toBe('abort');
});
test('AbortError by message → ABORT', () => {
const result = ErrorClassifier.classify(new Error('AbortError'));
expect(result.type).toBe(ErrorType.ABORT);
});
});
describe('Unknown Error → Permanent (보수적 처리)', () => {
test('분류 불가한 오류는 PERMANENT로 처리', () => {
const result = ErrorClassifier.classify(new Error('something completely unexpected'));
expect(result.type).toBe(ErrorType.PERMANENT);
});
});
});
// ═══════════════════════════════════════════════
// Test Suite 2: Error Recovery Matrix
// ═══════════════════════════════════════════════
describe('Error Recovery Matrix', () => {
test('매트릭스에 3가지 유형이 모두 정의되어 있어야 한다', () => {
const types = ERROR_RECOVERY_MATRIX.map(r => r.type);
expect(types).toContain(ErrorType.TRANSIENT);
expect(types).toContain(ErrorType.PERMANENT);
expect(types).toContain(ErrorType.ABORT);
});
test('TRANSIENT 규칙은 재시도가 가능해야 한다', () => {
const rule = ERROR_RECOVERY_MATRIX.find(r => r.type === ErrorType.TRANSIENT)!;
expect(rule.maxRetries).toBeGreaterThan(0);
expect(rule.backoffBaseMs).toBeGreaterThan(0);
expect(rule.action).toBe('retry');
});
test('PERMANENT 규칙은 재시도하지 않아야 한다', () => {
const rule = ERROR_RECOVERY_MATRIX.find(r => r.type === ErrorType.PERMANENT)!;
expect(rule.maxRetries).toBe(0);
expect(rule.action).toBe('fail_with_message');
expect(rule.userMessage.length).toBeGreaterThan(0);
});
test('ABORT 규칙은 조용하게 종료해야 한다', () => {
const rule = ERROR_RECOVERY_MATRIX.find(r => r.type === ErrorType.ABORT)!;
expect(rule.maxRetries).toBe(0);
expect(rule.action).toBe('abort');
});
});
// ═══════════════════════════════════════════════
// Test Suite 3: MissionState
// ═══════════════════════════════════════════════
describe('MissionState', () => {
test('초기 상태는 idle이어야 한다', () => {
const state = new MissionState('test_001');
expect(state.stage).toBe('idle');
expect(state.auditTrail.length).toBe(0);
});
test('상태 전환이 감사 이력에 기록되어야 한다', () => {
const state = new MissionState('test_002');
state.transition('planner', '전략 수립 중...');
state.transition('researcher', '연구 수행 중...');
state.transition('completed', '완료');
expect(state.stage).toBe('completed');
expect(state.auditTrail.length).toBe(3);
expect(state.auditTrail[0].from).toBe('idle');
expect(state.auditTrail[0].to).toBe('planner');
expect(state.auditTrail[1].from).toBe('planner');
expect(state.auditTrail[1].to).toBe('researcher');
});
test('toStructuredLog()가 올바른 JSON 형식을 반환해야 한다', () => {
const state = new MissionState('test_003');
state.transition('planner', '시작');
state.transition('completed', '완료');
const log = state.toStructuredLog() as any;
expect(log.missionId).toBe('test_003');
expect(log.status).toBe('completed');
expect(log.totalElapsedMs).toBeGreaterThanOrEqual(0);
expect(log.transitionCount).toBe(2);
expect(log.transitions).toHaveLength(2);
expect(log.transitions[0]).toHaveProperty('from');
expect(log.transitions[0]).toHaveProperty('to');
expect(log.transitions[0]).toHaveProperty('durationMs');
expect(log.transitions[0]).toHaveProperty('ts');
});
test('getElapsedMs()가 양수를 반환해야 한다', () => {
const state = new MissionState('test_004');
expect(state.getElapsedMs()).toBeGreaterThanOrEqual(0);
});
});
// ═══════════════════════════════════════════════
// Test Suite 4: AgentEngine Integration
// ═══════════════════════════════════════════════
describe('AgentEngine Integration', () => {
test('정상 미션 흐름이 최종 리포트를 반환해야 한다', async () => {
const engine = new AgentEngine(
new MockSuccessAgent('Plan: detailed strategy for the mission ahead.'),
new MockSuccessAgent('Research: comprehensive analysis of available data.'),
new MockSuccessAgent('Report: final synthesized output for the user.')
);
const result = await engine.runMission(
'integration_001', 'Test prompt', 'brain context', createAbortSignal(), noopProgress
);
expect(result).toBe('Report: final synthesized output for the user.');
});
test('Transient 오류 발생 시 자동 재시도 후 복구되어야 한다', async () => {
const transientAgent = new MockTransientAgent(2); // 2회 실패 후 성공
const engine = new AgentEngine(
transientAgent,
new MockSuccessAgent('Research data after recovery from transient errors.'),
new MockSuccessAgent('Final report written successfully after recovery.')
);
const result = await engine.runMission(
'integration_002', 'Test prompt', 'context', createAbortSignal(), noopProgress
);
expect(transientAgent.callCount).toBe(3); // 2회 실패 + 1회 성공
expect(result).toContain('Final report');
}, 30000);
test('Permanent 오류 발생 시 즉시 중단되어야 한다', async () => {
const engine = new AgentEngine(
new MockPermanentAgent(),
new MockSuccessAgent(),
new MockSuccessAgent()
);
await expect(
engine.runMission('integration_003', 'Test', 'ctx', createAbortSignal(), noopProgress)
).rejects.toThrow();
});
test('Abort 시그널 발생 시 Graceful Exit해야 한다', async () => {
const engine = new AgentEngine(
new MockAbortAgent(),
new MockSuccessAgent(),
new MockSuccessAgent()
);
await expect(
engine.runMission('integration_004', 'Test', 'ctx', createAbortSignal(), noopProgress)
).rejects.toThrow('AbortError');
});
test('Transient 오류가 maxRetries를 초과하면 실패해야 한다', async () => {
const alwaysFailAgent = new MockTransientAgent(100); // 항상 실패
const engine = new AgentEngine(
alwaysFailAgent,
new MockSuccessAgent(),
new MockSuccessAgent()
);
await expect(
engine.runMission('integration_005', 'Test', 'ctx', createAbortSignal(), noopProgress)
).rejects.toThrow('재시도');
// maxRetries(3) + 초기 시도(1) = 4회 호출
expect(alwaysFailAgent.callCount).toBe(4);
}, 30000);
test('미션 완료 후 상태가 정리되어야 한다', async () => {
const engine = new AgentEngine(
new MockSuccessAgent('Plan output that meets validation requirements.'),
new MockSuccessAgent('Research output that meets validation requirements.'),
new MockSuccessAgent('Final report output that meets validation requirements.')
);
await engine.runMission('integration_006', 'Test', 'ctx', createAbortSignal(), noopProgress);
// 미션 완료 후 state는 null로 정리
expect(engine.getMissionState()).toBeNull();
});
});
// ═══════════════════════════════════════════════
// Test Suite 5: Performance Benchmark
// ═══════════════════════════════════════════════
describe('Performance Benchmark', () => {
test('정상 미션의 평균 처리 시간 측정', async () => {
const iterations = 5;
const durations: number[] = [];
for (let i = 0; i < iterations; i++) {
const engine = new AgentEngine(
new MockSlowAgent(50),
new MockSlowAgent(50),
new MockSlowAgent(50)
);
const start = Date.now();
await engine.runMission(`bench_normal_${i}`, 'Benchmark prompt', 'ctx', createAbortSignal(), noopProgress);
durations.push(Date.now() - start);
}
const avg = durations.reduce((a, b) => a + b, 0) / durations.length;
const max = Math.max(...durations);
const min = Math.min(...durations);
console.log(`\n📊 [Normal Mission Benchmark]`);
console.log(` Iterations: ${iterations}`);
console.log(` Avg Latency: ${Math.round(avg)}ms`);
console.log(` Min: ${min}ms | Max: ${max}ms`);
// 각 에이전트 50ms * 3 + 오버헤드 → 200ms 이내가 합리적
expect(avg).toBeLessThan(1000);
}, 30000);
test('Transient 복구 시 재시도 오버헤드 측정', async () => {
const engine = new AgentEngine(
new MockTransientAgent(2), // 2회 실패 후 성공 (백오프: 1s + 2s)
new MockSuccessAgent('Research after transient recovery benchmark data.'),
new MockSuccessAgent('Final benchmark report output for measurement.')
);
const start = Date.now();
await engine.runMission('bench_retry', 'Benchmark', 'ctx', createAbortSignal(), noopProgress);
const elapsed = Date.now() - start;
console.log(`\n📊 [Retry Overhead Benchmark]`);
console.log(` Retries: 2`);
console.log(` Total Time: ${elapsed}ms`);
console.log(` Expected Backoff: ~3000ms (1000 + 2000)`);
// 지수 백오프 1s + 2s ≈ 3000ms + 처리 시간
expect(elapsed).toBeGreaterThan(2500);
expect(elapsed).toBeLessThan(10000);
}, 30000);
test('Permanent 오류 시 즉시 중단 시간 측정', async () => {
const engine = new AgentEngine(
new MockPermanentAgent(),
new MockSuccessAgent(),
new MockSuccessAgent()
);
const start = Date.now();
try {
await engine.runMission('bench_permanent', 'Benchmark', 'ctx', createAbortSignal(), noopProgress);
} catch { /* expected */ }
const elapsed = Date.now() - start;
console.log(`\n📊 [Permanent Error Benchmark]`);
console.log(` Time to Fail: ${elapsed}ms`);
// Permanent 오류는 재시도 없이 즉시 중단 → 100ms 이내
expect(elapsed).toBeLessThan(500);
});
});
// ═══════════════════════════════════════════════
// Test Suite 6: Concurrency & Stress Tests
// ═══════════════════════════════════════════════
describe('Concurrency & Stress Tests', () => {
test('5개 미션 동시 실행 시 모두 정상 완료되어야 한다', async () => {
const concurrentCount = 5;
const results: Promise<string>[] = [];
for (let i = 0; i < concurrentCount; i++) {
const engine = new AgentEngine(
new MockSuccessAgent(`Plan output ${i} that passes validation checks.`),
new MockSuccessAgent(`Research output ${i} that passes validation checks.`),
new MockSuccessAgent(`Report output ${i} that passes validation checks.`)
);
results.push(
engine.runMission(`concurrent_${i}`, `Prompt ${i}`, 'ctx', createAbortSignal(), noopProgress)
);
}
const outputs = await Promise.all(results);
expect(outputs).toHaveLength(concurrentCount);
outputs.forEach((output, i) => {
expect(output).toContain(`Report output ${i}`);
});
console.log(`\n📊 [Concurrency Test]`);
console.log(` Concurrent Missions: ${concurrentCount}`);
console.log(` All Resolved: ✅`);
}, 30000);
test('동시에 Transient + Permanent + 정상 미션이 혼합될 때 각각 올바르게 처리되어야 한다', async () => {
// 미션 1: 정상
const engine1 = new AgentEngine(
new MockSuccessAgent('Plan result that meets the minimum validation length.'),
new MockSuccessAgent('Research result that meets the minimum validation length.'),
new MockSuccessAgent('Normal report completed successfully with all checks passed.')
);
const p1 = engine1.runMission('mix_normal', 'Test', 'ctx', createAbortSignal(), noopProgress);
// 미션 2: Permanent 실패
const engine2 = new AgentEngine(
new MockPermanentAgent(),
new MockSuccessAgent(),
new MockSuccessAgent()
);
const p2 = engine2.runMission('mix_permanent', 'Test', 'ctx', createAbortSignal(), noopProgress)
.catch(e => `ERROR:${e.message}`);
// 미션 3: Transient 복구
const engine3 = new AgentEngine(
new MockTransientAgent(1), // 1회 실패 후 성공
new MockSuccessAgent('Research after single transient recovery for mixed test.'),
new MockSuccessAgent('Report after transient recovery completed successfully.')
);
const p3 = engine3.runMission('mix_transient', 'Test', 'ctx', createAbortSignal(), noopProgress);
const [r1, r2, r3] = await Promise.all([p1, p2, p3]);
// 정상 미션은 성공
expect(r1).toContain('Normal report');
// Permanent 미션은 에러 메시지 반환
expect(r2).toContain('ERROR:');
// Transient 미션은 복구 후 성공
expect(r3).toContain('Report after transient');
console.log(`\n📊 [Mixed Error Concurrency Test]`);
console.log(` Normal: ✅ | Permanent: ❌ (expected) | Transient: ✅ (recovered)`);
}, 30000);
test('큐 포화 상태에서 10개 작업이 순서대로 처리되어야 한다', async () => {
const taskCount = 10;
const completionOrder: number[] = [];
const results: Promise<string>[] = [];
for (let i = 0; i < taskCount; i++) {
const idx = i;
const engine = new AgentEngine(
new MockSuccessAgent(`Plan ${idx} passes the minimum validation requirement.`),
new MockSuccessAgent(`Research ${idx} passes the minimum validation requirement.`),
{
execute: async () => {
completionOrder.push(idx);
return `Report ${idx} is valid and meets all minimum length requirements.`;
}
} as IAgent
);
results.push(
engine.runMission(`queue_sat_${idx}`, `Prompt ${idx}`, 'ctx', createAbortSignal(), noopProgress)
);
}
const outputs = await Promise.all(results);
// 모든 작업이 완료되어야 함
expect(outputs).toHaveLength(taskCount);
expect(completionOrder).toHaveLength(taskCount);
console.log(`\n📊 [Queue Saturation Test]`);
console.log(` Tasks Submitted: ${taskCount}`);
console.log(` Tasks Completed: ${completionOrder.length}`);
console.log(` Completion Order: [${completionOrder.join(', ')}]`);
}, 60000);
test('동일 미션 ID로 동시 실행 시 Mutex가 경합을 방지해야 한다', async () => {
const sharedMissionId = 'race_condition_test';
let executionCount = 0;
const engine1 = new AgentEngine(
{
execute: async () => {
executionCount++;
await new Promise(r => setTimeout(r, 100));
return `Planner result from execution ${executionCount} for race test.`;
}
} as IAgent,
new MockSuccessAgent('Research result that is valid and passes all minimum checks.'),
new MockSuccessAgent('Report result that is valid and passes all minimum checks.')
);
const engine2 = new AgentEngine(
new MockSuccessAgent('Plan result that is valid and passes all minimum checks.'),
new MockSuccessAgent('Research result that is valid and passes all minimum checks.'),
new MockSuccessAgent('Report result that is valid and passes all minimum checks.')
);
// 동일 미션 ID로 두 엔진 동시 실행 → Mutex에 의해 순차 실행되어야 함
const [r1, r2] = await Promise.all([
engine1.runMission(sharedMissionId, 'Test', 'ctx', createAbortSignal(), noopProgress),
engine2.runMission(sharedMissionId, 'Test', 'ctx', createAbortSignal(), noopProgress)
]);
// 둘 다 성공해야 함 (Mutex가 순서를 보장)
expect(r1).toBeTruthy();
expect(r2).toBeTruthy();
console.log(`\n📊 [Race Condition / Mutex Test]`);
console.log(` Shared Mission ID: ${sharedMissionId}`);
console.log(` Both Completed: ✅ (Mutex serialized execution)`);
}, 30000);
});