feat: v2.62.0 - Astra Autonomous Loop (AAL) foundation & enhanced file analysis

This commit is contained in:
g1nation
2026-05-04 12:58:43 +09:00
parent 445d530b63
commit 215c5f9457
23 changed files with 2964 additions and 62 deletions
+543
View File
@@ -0,0 +1,543 @@
/**
* AgentEngine Integration Tests & Performance Benchmarks
*
* 검증 대상:
* 1. ErrorClassifier — 오류 유형(Transient/Permanent/Abort) 자동 분류
* 2. ErrorRecoveryMatrix — 각 규칙이 의도한 대응 전략으로 매핑되는지 검증
* 3. resilientExecute — 지수 백오프 재시도 및 즉시 중단 흐름
* 4. MissionState — 감사 이력(Audit Trail) 및 구조화된 로그 포맷
* 5. Performance Benchmark — 미션 평균 처리 시간 및 재시도 오버헤드 측정
*/
import {
AgentEngine,
IAgent,
AgentExecuteOptions,
ErrorClassifier,
ErrorType,
ERROR_RECOVERY_MATRIX,
MissionState,
PipelineStage
} from '../src/lib/engine';
// ─── Mock Agents ───
class MockSuccessAgent implements IAgent {
public callCount = 0;
constructor(private readonly response: string = 'This is a valid mock response for testing purposes.') {}
async execute(input: string, context?: string, signal?: AbortSignal, options?: AgentExecuteOptions): Promise<string> {
this.callCount++;
return this.response;
}
}
class MockTransientAgent implements IAgent {
public callCount = 0;
constructor(private readonly failCount: number = 2) {}
async execute(input: string, context?: string, signal?: AbortSignal, options?: AgentExecuteOptions): Promise<string> {
this.callCount++;
if (this.callCount <= this.failCount) {
throw new Error('ECONNREFUSED: Connection refused');
}
return 'Recovery successful after transient failures.';
}
}
class MockPermanentAgent implements IAgent {
async execute(): Promise<string> {
throw new Error('404: model not found');
}
}
class MockTimeoutAgent implements IAgent {
async execute(): Promise<string> {
throw new Error('timeout: request took too long');
}
}
class MockNetworkAgent implements IAgent {
async execute(): Promise<string> {
throw new Error('Failed to fetch');
}
}
class MockAbortAgent implements IAgent {
async execute(): Promise<string> {
const err = new Error('AbortError');
err.name = 'AbortError';
throw err;
}
}
class MockSlowAgent implements IAgent {
constructor(private readonly delayMs: number = 100) {}
async execute(): Promise<string> {
await new Promise(r => setTimeout(r, this.delayMs));
return 'Slow but valid agent response for performance measurement.';
}
}
// ─── Helper ───
function createAbortSignal(): AbortSignal {
const controller = new AbortController();
return controller.signal;
}
const noopProgress = (_stage: PipelineStage, _message: string) => {};
// ═══════════════════════════════════════════════
// Test Suite 1: ErrorClassifier
// ═══════════════════════════════════════════════
describe('ErrorClassifier', () => {
describe('Transient Error Classification', () => {
const transientMessages = [
'ECONNREFUSED: Connection refused',
'Request timeout exceeded',
'ETIMEDOUT: operation timed out',
'ECONNRESET: connection reset by peer',
'network error occurred',
'Failed to fetch',
'HTTP 503: Service Unavailable',
'HTTP 502: Bad Gateway',
'HTTP 429: Too Many Requests',
'socket hang up',
];
test.each(transientMessages)('"%s" → TRANSIENT', (msg) => {
const result = ErrorClassifier.classify(new Error(msg));
expect(result.type).toBe(ErrorType.TRANSIENT);
expect(result.rule.action).toBe('retry');
expect(result.rule.maxRetries).toBe(3);
});
});
describe('Permanent Error Classification', () => {
const permanentMessages = [
'HTTP 401: Unauthorized',
'HTTP 403: Forbidden',
'HTTP 404: Not Found',
'Planner 에이전트로부터 유효한 응답을 받지 못했습니다',
'Ollama URL이 설정되지 않았습니다',
'invalid model name specified',
'model not found in registry',
];
test.each(permanentMessages)('"%s" → PERMANENT', (msg) => {
const result = ErrorClassifier.classify(new Error(msg));
expect(result.type).toBe(ErrorType.PERMANENT);
expect(result.rule.action).toBe('fail_with_message');
expect(result.rule.maxRetries).toBe(0);
});
});
describe('Abort Classification', () => {
test('AbortError by name → ABORT', () => {
const err = new Error('cancelled');
err.name = 'AbortError';
const result = ErrorClassifier.classify(err);
expect(result.type).toBe(ErrorType.ABORT);
expect(result.rule.action).toBe('abort');
});
test('AbortError by message → ABORT', () => {
const result = ErrorClassifier.classify(new Error('AbortError'));
expect(result.type).toBe(ErrorType.ABORT);
});
});
describe('Unknown Error → Permanent (보수적 처리)', () => {
test('분류 불가한 오류는 PERMANENT로 처리', () => {
const result = ErrorClassifier.classify(new Error('something completely unexpected'));
expect(result.type).toBe(ErrorType.PERMANENT);
});
});
});
// ═══════════════════════════════════════════════
// Test Suite 2: Error Recovery Matrix
// ═══════════════════════════════════════════════
describe('Error Recovery Matrix', () => {
test('매트릭스에 3가지 유형이 모두 정의되어 있어야 한다', () => {
const types = ERROR_RECOVERY_MATRIX.map(r => r.type);
expect(types).toContain(ErrorType.TRANSIENT);
expect(types).toContain(ErrorType.PERMANENT);
expect(types).toContain(ErrorType.ABORT);
});
test('TRANSIENT 규칙은 재시도가 가능해야 한다', () => {
const rule = ERROR_RECOVERY_MATRIX.find(r => r.type === ErrorType.TRANSIENT)!;
expect(rule.maxRetries).toBeGreaterThan(0);
expect(rule.backoffBaseMs).toBeGreaterThan(0);
expect(rule.action).toBe('retry');
});
test('PERMANENT 규칙은 재시도하지 않아야 한다', () => {
const rule = ERROR_RECOVERY_MATRIX.find(r => r.type === ErrorType.PERMANENT)!;
expect(rule.maxRetries).toBe(0);
expect(rule.action).toBe('fail_with_message');
expect(rule.userMessage.length).toBeGreaterThan(0);
});
test('ABORT 규칙은 조용하게 종료해야 한다', () => {
const rule = ERROR_RECOVERY_MATRIX.find(r => r.type === ErrorType.ABORT)!;
expect(rule.maxRetries).toBe(0);
expect(rule.action).toBe('abort');
});
});
// ═══════════════════════════════════════════════
// Test Suite 3: MissionState
// ═══════════════════════════════════════════════
describe('MissionState', () => {
test('초기 상태는 idle이어야 한다', () => {
const state = new MissionState('test_001');
expect(state.stage).toBe('idle');
expect(state.auditTrail.length).toBe(0);
});
test('상태 전환이 감사 이력에 기록되어야 한다', () => {
const state = new MissionState('test_002');
state.transition('planner', '전략 수립 중...');
state.transition('researcher', '연구 수행 중...');
state.transition('completed', '완료');
expect(state.stage).toBe('completed');
expect(state.auditTrail.length).toBe(3);
expect(state.auditTrail[0].from).toBe('idle');
expect(state.auditTrail[0].to).toBe('planner');
expect(state.auditTrail[1].from).toBe('planner');
expect(state.auditTrail[1].to).toBe('researcher');
});
test('toStructuredLog()가 올바른 JSON 형식을 반환해야 한다', () => {
const state = new MissionState('test_003');
state.transition('planner', '시작');
state.transition('completed', '완료');
const log = state.toStructuredLog() as any;
expect(log.missionId).toBe('test_003');
expect(log.status).toBe('completed');
expect(log.totalElapsedMs).toBeGreaterThanOrEqual(0);
expect(log.transitionCount).toBe(2);
expect(log.transitions).toHaveLength(2);
expect(log.transitions[0]).toHaveProperty('from');
expect(log.transitions[0]).toHaveProperty('to');
expect(log.transitions[0]).toHaveProperty('durationMs');
expect(log.transitions[0]).toHaveProperty('ts');
});
test('getElapsedMs()가 양수를 반환해야 한다', () => {
const state = new MissionState('test_004');
expect(state.getElapsedMs()).toBeGreaterThanOrEqual(0);
});
});
// ═══════════════════════════════════════════════
// Test Suite 4: AgentEngine Integration
// ═══════════════════════════════════════════════
describe('AgentEngine Integration', () => {
test('정상 미션 흐름이 최종 리포트를 반환해야 한다', async () => {
const engine = new AgentEngine(
new MockSuccessAgent('Plan: detailed strategy for the mission ahead.'),
new MockSuccessAgent('Research: comprehensive analysis of available data.'),
new MockSuccessAgent('Report: final synthesized output for the user.')
);
const result = await engine.runMission(
'integration_001', 'Test prompt', 'brain context', createAbortSignal(), noopProgress
);
expect(result).toBe('Report: final synthesized output for the user.');
});
test('Transient 오류 발생 시 자동 재시도 후 복구되어야 한다', async () => {
const transientAgent = new MockTransientAgent(2); // 2회 실패 후 성공
const engine = new AgentEngine(
transientAgent,
new MockSuccessAgent('Research data after recovery from transient errors.'),
new MockSuccessAgent('Final report written successfully after recovery.')
);
const result = await engine.runMission(
'integration_002', 'Test prompt', 'context', createAbortSignal(), noopProgress
);
expect(transientAgent.callCount).toBe(3); // 2회 실패 + 1회 성공
expect(result).toContain('Final report');
}, 30000);
test('Permanent 오류 발생 시 즉시 중단되어야 한다', async () => {
const engine = new AgentEngine(
new MockPermanentAgent(),
new MockSuccessAgent(),
new MockSuccessAgent()
);
await expect(
engine.runMission('integration_003', 'Test', 'ctx', createAbortSignal(), noopProgress)
).rejects.toThrow();
});
test('Abort 시그널 발생 시 Graceful Exit해야 한다', async () => {
const engine = new AgentEngine(
new MockAbortAgent(),
new MockSuccessAgent(),
new MockSuccessAgent()
);
await expect(
engine.runMission('integration_004', 'Test', 'ctx', createAbortSignal(), noopProgress)
).rejects.toThrow('AbortError');
});
test('Transient 오류가 maxRetries를 초과하면 실패해야 한다', async () => {
const alwaysFailAgent = new MockTransientAgent(100); // 항상 실패
const engine = new AgentEngine(
alwaysFailAgent,
new MockSuccessAgent(),
new MockSuccessAgent()
);
await expect(
engine.runMission('integration_005', 'Test', 'ctx', createAbortSignal(), noopProgress)
).rejects.toThrow('재시도');
// maxRetries(3) + 초기 시도(1) = 4회 호출
expect(alwaysFailAgent.callCount).toBe(4);
}, 30000);
test('미션 완료 후 상태가 정리되어야 한다', async () => {
const engine = new AgentEngine(
new MockSuccessAgent('Plan output that meets validation requirements.'),
new MockSuccessAgent('Research output that meets validation requirements.'),
new MockSuccessAgent('Final report output that meets validation requirements.')
);
await engine.runMission('integration_006', 'Test', 'ctx', createAbortSignal(), noopProgress);
// 미션 완료 후 state는 null로 정리
expect(engine.getMissionState()).toBeNull();
});
});
// ═══════════════════════════════════════════════
// Test Suite 5: Performance Benchmark
// ═══════════════════════════════════════════════
describe('Performance Benchmark', () => {
test('정상 미션의 평균 처리 시간 측정', async () => {
const iterations = 5;
const durations: number[] = [];
for (let i = 0; i < iterations; i++) {
const engine = new AgentEngine(
new MockSlowAgent(50),
new MockSlowAgent(50),
new MockSlowAgent(50)
);
const start = Date.now();
await engine.runMission(`bench_normal_${i}`, 'Benchmark prompt', 'ctx', createAbortSignal(), noopProgress);
durations.push(Date.now() - start);
}
const avg = durations.reduce((a, b) => a + b, 0) / durations.length;
const max = Math.max(...durations);
const min = Math.min(...durations);
console.log(`\n📊 [Normal Mission Benchmark]`);
console.log(` Iterations: ${iterations}`);
console.log(` Avg Latency: ${Math.round(avg)}ms`);
console.log(` Min: ${min}ms | Max: ${max}ms`);
// 각 에이전트 50ms * 3 + 오버헤드 → 200ms 이내가 합리적
expect(avg).toBeLessThan(1000);
}, 30000);
test('Transient 복구 시 재시도 오버헤드 측정', async () => {
const engine = new AgentEngine(
new MockTransientAgent(2), // 2회 실패 후 성공 (백오프: 1s + 2s)
new MockSuccessAgent('Research after transient recovery benchmark data.'),
new MockSuccessAgent('Final benchmark report output for measurement.')
);
const start = Date.now();
await engine.runMission('bench_retry', 'Benchmark', 'ctx', createAbortSignal(), noopProgress);
const elapsed = Date.now() - start;
console.log(`\n📊 [Retry Overhead Benchmark]`);
console.log(` Retries: 2`);
console.log(` Total Time: ${elapsed}ms`);
console.log(` Expected Backoff: ~3000ms (1000 + 2000)`);
// 지수 백오프 1s + 2s ≈ 3000ms + 처리 시간
expect(elapsed).toBeGreaterThan(2500);
expect(elapsed).toBeLessThan(10000);
}, 30000);
test('Permanent 오류 시 즉시 중단 시간 측정', async () => {
const engine = new AgentEngine(
new MockPermanentAgent(),
new MockSuccessAgent(),
new MockSuccessAgent()
);
const start = Date.now();
try {
await engine.runMission('bench_permanent', 'Benchmark', 'ctx', createAbortSignal(), noopProgress);
} catch { /* expected */ }
const elapsed = Date.now() - start;
console.log(`\n📊 [Permanent Error Benchmark]`);
console.log(` Time to Fail: ${elapsed}ms`);
// Permanent 오류는 재시도 없이 즉시 중단 → 100ms 이내
expect(elapsed).toBeLessThan(500);
});
});
// ═══════════════════════════════════════════════
// Test Suite 6: Concurrency & Stress Tests
// ═══════════════════════════════════════════════
describe('Concurrency & Stress Tests', () => {
test('5개 미션 동시 실행 시 모두 정상 완료되어야 한다', async () => {
const concurrentCount = 5;
const results: Promise<string>[] = [];
for (let i = 0; i < concurrentCount; i++) {
const engine = new AgentEngine(
new MockSuccessAgent(`Plan output ${i} that passes validation checks.`),
new MockSuccessAgent(`Research output ${i} that passes validation checks.`),
new MockSuccessAgent(`Report output ${i} that passes validation checks.`)
);
results.push(
engine.runMission(`concurrent_${i}`, `Prompt ${i}`, 'ctx', createAbortSignal(), noopProgress)
);
}
const outputs = await Promise.all(results);
expect(outputs).toHaveLength(concurrentCount);
outputs.forEach((output, i) => {
expect(output).toContain(`Report output ${i}`);
});
console.log(`\n📊 [Concurrency Test]`);
console.log(` Concurrent Missions: ${concurrentCount}`);
console.log(` All Resolved: ✅`);
}, 30000);
test('동시에 Transient + Permanent + 정상 미션이 혼합될 때 각각 올바르게 처리되어야 한다', async () => {
// 미션 1: 정상
const engine1 = new AgentEngine(
new MockSuccessAgent('Plan result that meets the minimum validation length.'),
new MockSuccessAgent('Research result that meets the minimum validation length.'),
new MockSuccessAgent('Normal report completed successfully with all checks passed.')
);
const p1 = engine1.runMission('mix_normal', 'Test', 'ctx', createAbortSignal(), noopProgress);
// 미션 2: Permanent 실패
const engine2 = new AgentEngine(
new MockPermanentAgent(),
new MockSuccessAgent(),
new MockSuccessAgent()
);
const p2 = engine2.runMission('mix_permanent', 'Test', 'ctx', createAbortSignal(), noopProgress)
.catch(e => `ERROR:${e.message}`);
// 미션 3: Transient 복구
const engine3 = new AgentEngine(
new MockTransientAgent(1), // 1회 실패 후 성공
new MockSuccessAgent('Research after single transient recovery for mixed test.'),
new MockSuccessAgent('Report after transient recovery completed successfully.')
);
const p3 = engine3.runMission('mix_transient', 'Test', 'ctx', createAbortSignal(), noopProgress);
const [r1, r2, r3] = await Promise.all([p1, p2, p3]);
// 정상 미션은 성공
expect(r1).toContain('Normal report');
// Permanent 미션은 에러 메시지 반환
expect(r2).toContain('ERROR:');
// Transient 미션은 복구 후 성공
expect(r3).toContain('Report after transient');
console.log(`\n📊 [Mixed Error Concurrency Test]`);
console.log(` Normal: ✅ | Permanent: ❌ (expected) | Transient: ✅ (recovered)`);
}, 30000);
test('큐 포화 상태에서 10개 작업이 순서대로 처리되어야 한다', async () => {
const taskCount = 10;
const completionOrder: number[] = [];
const results: Promise<string>[] = [];
for (let i = 0; i < taskCount; i++) {
const idx = i;
const engine = new AgentEngine(
new MockSuccessAgent(`Plan ${idx} passes the minimum validation requirement.`),
new MockSuccessAgent(`Research ${idx} passes the minimum validation requirement.`),
{
execute: async () => {
completionOrder.push(idx);
return `Report ${idx} is valid and meets all minimum length requirements.`;
}
} as IAgent
);
results.push(
engine.runMission(`queue_sat_${idx}`, `Prompt ${idx}`, 'ctx', createAbortSignal(), noopProgress)
);
}
const outputs = await Promise.all(results);
// 모든 작업이 완료되어야 함
expect(outputs).toHaveLength(taskCount);
expect(completionOrder).toHaveLength(taskCount);
console.log(`\n📊 [Queue Saturation Test]`);
console.log(` Tasks Submitted: ${taskCount}`);
console.log(` Tasks Completed: ${completionOrder.length}`);
console.log(` Completion Order: [${completionOrder.join(', ')}]`);
}, 60000);
test('동일 미션 ID로 동시 실행 시 Mutex가 경합을 방지해야 한다', async () => {
const sharedMissionId = 'race_condition_test';
let executionCount = 0;
const engine1 = new AgentEngine(
{
execute: async () => {
executionCount++;
await new Promise(r => setTimeout(r, 100));
return `Planner result from execution ${executionCount} for race test.`;
}
} as IAgent,
new MockSuccessAgent('Research result that is valid and passes all minimum checks.'),
new MockSuccessAgent('Report result that is valid and passes all minimum checks.')
);
const engine2 = new AgentEngine(
new MockSuccessAgent('Plan result that is valid and passes all minimum checks.'),
new MockSuccessAgent('Research result that is valid and passes all minimum checks.'),
new MockSuccessAgent('Report result that is valid and passes all minimum checks.')
);
// 동일 미션 ID로 두 엔진 동시 실행 → Mutex에 의해 순차 실행되어야 함
const [r1, r2] = await Promise.all([
engine1.runMission(sharedMissionId, 'Test', 'ctx', createAbortSignal(), noopProgress),
engine2.runMission(sharedMissionId, 'Test', 'ctx', createAbortSignal(), noopProgress)
]);
// 둘 다 성공해야 함 (Mutex가 순서를 보장)
expect(r1).toBeTruthy();
expect(r2).toBeTruthy();
console.log(`\n📊 [Race Condition / Mutex Test]`);
console.log(` Shared Mission ID: ${sharedMissionId}`);
console.log(` Both Completed: ✅ (Mutex serialized execution)`);
}, 30000);
});
+29 -5
View File
@@ -47,7 +47,13 @@ describe('Second Brain Trace', () => {
'# Customer Journey Virtual Store',
'',
'Customer-facing virtual stores should connect spatial experience to product discovery, product understanding, and purchase conversion.',
'Stakeholder approval often depends on requirement fit, business value, and acceptance criteria rather than visual novelty alone.'
'Stakeholder approval often depends on requirement fit, business value, and acceptance criteria rather than visual novelty alone.',
'Customer journey mapping reveals how users navigate from initial interest to final purchase decision.',
'The approval process evaluates customer experience quality, conversion flow effectiveness, and business value proposition.',
'Product discovery in virtual stores requires intuitive spatial navigation and curated customer journey touchpoints.',
'Stakeholder approval criteria include requirement fit assessment, business value validation, and acceptance criteria verification.',
'Virtual store UX should prioritize customer journey continuity, product discovery efficiency, and conversion optimization.',
'Business value is measured through customer engagement metrics, approval rates, and conversion funnel analysis.'
].join('\n'),
'utf8'
);
@@ -58,7 +64,13 @@ describe('Second Brain Trace', () => {
'# Report Evidence Mapping',
'',
'Template-driven reports should map each section to evidence, insight, risk, and next action knowledge.',
'A schema should guide structure while Second Brain notes supply the actual content.'
'A schema should guide structure while Second Brain notes supply the actual content.',
'Evidence mapping connects report sections to factual source documents and verified project records.',
'Each report template section should link to concrete evidence, analytical insight, identified risk, and actionable next steps.',
'The evidence layer provides facts and source documentation for the report body.',
'Insight sections synthesize patterns from evidence into strategic analysis and interpretation.',
'Risk sections document limitations, validation gaps, tradeoffs, and items requiring verification.',
'Action sections translate knowledge into MVP implementation steps, recommendations, and decision items.'
].join('\n'),
'utf8'
);
@@ -79,7 +91,11 @@ describe('Second Brain Trace', () => {
'# Knowledge Graph Concepts',
'',
'Ontology notes define concepts, relations, categories, and graph structure before writing.',
'They help a report decide which ideas are parent concepts, evidence, methods, and outcomes.'
'They help a report decide which ideas are parent concepts, evidence, methods, and outcomes.',
'An ontology provides a taxonomy of concept types, relation types, and category hierarchies.',
'Knowledge graph structure organizes domain concepts into navigable networks of meaning.',
'Ontology-driven classification helps reports maintain consistent concept definitions and relation mappings.',
'Graph-based concept organization enables systematic categorization and cross-referencing of knowledge.'
].join('\n'),
'utf8'
);
@@ -90,7 +106,11 @@ describe('Second Brain Trace', () => {
'# Report Narrative Structure',
'',
'Writing guidance should shape report structure, section order, narrative flow, and concise executive summaries.',
'It should not replace evidence; it organizes selected knowledge into a readable output.'
'It should not replace evidence; it organizes selected knowledge into a readable output.',
'Report writing structure defines the narrative arc from executive summary through detailed analysis to conclusions.',
'Template-based writing organizes content into headline, body, evidence citation, and recommendation sections.',
'Good report narrative maintains logical flow between sections while preserving analytical rigor.',
'Writing style guidance ensures consistent tone, appropriate formality, and reader-oriented structure across reports.'
].join('\n'),
'utf8'
);
@@ -101,7 +121,11 @@ describe('Second Brain Trace', () => {
'# Implementation Techniques',
'',
'Technical technique notes explain implementation methods, architecture choices, and tooling tradeoffs.',
'They should support practical next actions after the report identifies risks and evidence.'
'They should support practical next actions after the report identifies risks and evidence.',
'Implementation technique documentation covers method selection, architecture decision rationale, and tool evaluation.',
'Technical references provide concrete implementation patterns, code architecture examples, and integration approaches.',
'Technique notes bridge the gap between strategic analysis and practical engineering execution.',
'Architecture technique guides help translate report findings into actionable technical implementation plans.'
].join('\n'),
'utf8'
);