/** * Critic Agent / Reflection Store / Task Eval Harness (Self-Evolving OS P1 잔여 + P3) 테스트. */ import * as fs from 'fs'; import * as os from 'os'; import * as path from 'path'; import { buildCritiquePrompt, parseCritique, runCriticReview, formatCriticFooter, } from '../src/intelligence/criticAgent'; import { appendReflection, loadReflections, summarizeFailurePatterns, recurrentMisses, formatGrowthReport, ReflectionRecord, } from '../src/intelligence/reflectionStore'; import { loadTaskGoldenSet, scoreTaskAnswer, runTaskEval, formatTaskEvalReport, TASK_GOLDEN_DIR, TaskGoldenRecord, } from '../src/intelligence/taskEvalHarness'; import { DEFAULT_TASK_REQUIREMENTS, buildRequirementGraphBlock } from '../src/intelligence/requirementGraph'; const MEETING_REQ = DEFAULT_TASK_REQUIREMENTS.find((r) => r.id === 'meeting-minutes')!; function tmpBrain(): string { return fs.mkdtempSync(path.join(os.tmpdir(), 'astra-test-brain-')); } function mkReflection(partial: Partial): ReflectionRecord { return { ts: '2026-06-11T10:00:00.000Z', taskId: 'meeting-minutes', taskLabel: '회의록', confidenceScore: 70, confidenceBand: 'medium', missing: [], escalated: false, criticIssues: null, promptPreview: '회의록 정리', ...partial, }; } describe('criticAgent', () => { it('critique 프롬프트에 필수 요소와 누락 신호가 포함된다', () => { const { system, user } = buildCritiquePrompt('회의록 정리해줘', '초안...', MEETING_REQ, ['담당자', '기한']); expect(system).toContain('JSON'); expect(user).toContain('담당자, 기한'); expect(user).toContain('회의록'); }); it('코드펜스·잡설 섞인 응답에서도 JSON 을 파싱한다', () => { const raw = '검토 결과입니다.\n```json\n{"pass": false, "issues": [{"severity": "major", "description": "기한 누락"}], "supplement": "## 기한\\n- (기한 미정)"}\n```'; const c = parseCritique(raw); expect(c).not.toBeNull(); expect(c!.pass).toBe(false); expect(c!.issues[0].severity).toBe('major'); expect(c!.supplement).toContain('기한'); }); it('pass=true 여도 issues 가 있으면 pass 취급하지 않는다', () => { const c = parseCritique('{"pass": true, "issues": [{"severity": "minor", "description": "x"}], "supplement": ""}'); expect(c!.pass).toBe(false); }); it('runCriticReview — LLM 실패 시 null (silent skip)', async () => { const result = await runCriticReview({ userPrompt: 'q', draft: 'd', requirement: MEETING_REQ, missingLabels: [], callLlm: async () => { throw new Error('LLM down'); }, }); expect(result).toBeNull(); }); it('formatCriticFooter — pass 면 빈 문자열, 실패면 이슈+보완 표시', () => { expect(formatCriticFooter({ pass: true, issues: [], supplement: '' })).toBe(''); const f = formatCriticFooter({ pass: false, issues: [{ severity: 'major', description: '결정과 미결이 섞임' }], supplement: '## 보완', }); expect(f).toContain('검수 (Critic)'); expect(f).toContain('결정과 미결이 섞임'); expect(f).toContain('보완 제안'); }); }); describe('reflectionStore', () => { it('append → load 라운드트립', () => { const brain = tmpBrain(); expect(appendReflection(brain, mkReflection({ missing: ['기한'] }))).toBe(true); expect(appendReflection(brain, mkReflection({ missing: ['기한', '담당자'] }))).toBe(true); const records = loadReflections(brain); expect(records.length).toBe(2); expect(records[1].missing).toEqual(['기한', '담당자']); }); it('summarizeFailurePatterns — 반복 누락 집계 (많은 순)', () => { const records = [ mkReflection({ missing: ['기한'] }), mkReflection({ missing: ['기한'] }), mkReflection({ missing: ['기한', '담당자'] }), ]; const patterns = summarizeFailurePatterns(records); expect(patterns[0]).toMatchObject({ element: '기한', count: 3 }); expect(patterns[1]).toMatchObject({ element: '담당자', count: 1 }); }); it('recurrentMisses — threshold 이상만 반환', () => { const records = [ mkReflection({ missing: ['기한'] }), mkReflection({ missing: ['기한'] }), mkReflection({ missing: ['기한'] }), mkReflection({ missing: ['담당자'] }), ]; expect(recurrentMisses(records, 'meeting-minutes', 3)).toEqual(['기한']); expect(recurrentMisses(records, 'market-research', 3)).toEqual([]); }); it('반복 누락 요소가 Requirement Graph 블록에 강조된다 (T5 루프)', () => { const block = buildRequirementGraphBlock('회의록 정리해줘', undefined, ['기한']); expect(block).toContain('과거에 자주 누락된 요소'); }); it('formatGrowthReport — 주별 추이 테이블 + 반복 실수 Top', () => { const records = [ mkReflection({ ts: '2026-06-01T10:00:00.000Z', confidenceScore: 60, missing: ['기한'] }), mkReflection({ ts: '2026-06-09T10:00:00.000Z', confidenceScore: 85, missing: [] }), ]; const md = formatGrowthReport(records); expect(md).toContain('평균 확신도'); expect(md).toContain('기한'); expect(formatGrowthReport([])).toContain('기록 없음'); }); }); describe('taskEvalHarness', () => { const record: TaskGoldenRecord = { id: 'mm-test', query: '이 회의 내용을 회의록으로 정리해줘', sourceFile: 'fake.txt', expectedElements: ['참석자', '결정사항', '액션 아이템', '담당자', '기한'], reference: 'ref', }; it('골든셋 로드 — 주석·깨진 줄 처리', () => { const brain = tmpBrain(); const dir = path.join(brain, TASK_GOLDEN_DIR); fs.mkdirSync(dir, { recursive: true }); fs.writeFileSync(path.join(dir, 'meeting-minutes.golden.jsonl'), [ '// 주석', JSON.stringify(record), '{broken', '', ].join('\n'), 'utf8'); const { records, parseErrors } = loadTaskGoldenSet(brain); expect(records.length).toBe(1); expect(parseErrors).toBe(1); expect(records[0].id).toBe('mm-test'); }); it('scoreTaskAnswer — 커버리지·정직성·구조 채점', () => { const answer = '# 회의록\n## 참석자: 김OO\n## 결정사항: A안\n## 액션 아이템\n- 발송 (담당자: 김OO, (기한 미정))'; const s = scoreTaskAnswer(answer, record); expect(s.coverageRate).toBe(1); expect(s.honestyMarkers).toBeGreaterThanOrEqual(1); expect(s.sectionCount).toBeGreaterThanOrEqual(3); }); it('runTaskEval — 생성 실패가 전체를 막지 않고 에러 레코드로 남는다', async () => { const result = await runTaskEval({ records: [record, { ...record, id: 'mm-fail' }], readSource: () => '전사 내용', generate: async (r) => { if (r.id === 'mm-fail') throw new Error('engine down'); return '## 참석자 a ## 결정사항 b ## 액션 아이템 c 담당자 d 기한 e'; }, }); expect(result.scores.length).toBe(2); expect(result.scores[0].coverageRate).toBe(1); expect(result.scores[1].error).toContain('engine down'); expect(result.avgCoverage).toBe(1); // 실패 레코드는 평균에서 제외 }); it('formatTaskEvalReport — 요약·테이블 포함', () => { const md = formatTaskEvalReport( { scores: [scoreTaskAnswer('참석자 결정사항', record)], avgCoverage: 0.4, perfectCount: 0 }, { taskLabel: '회의록', brainName: 'B', dateStr: 'now', modelName: 'gemma' }, ); expect(md).toContain('평균 요소 커버리지'); expect(md).toContain('mm-test'); }); });