feat(growth): Correction Loop — 정정 1회가 시스템 세 곳을 성장시키는 파이프라인 (v2.2.223)

self-evolving 고도화: 사용자 정정이 곧 Ground Truth — 정답지를 사람이 따로 만들지 않고, 태그 통계가 리포트에 머물지 않고 다음 턴의 행동을 바꾼다. ① 정정 감지·태깅 (correctionLoop.ts + agent.ts 훅, fire-and-forget): - "아니야/틀렸어/~가 아니라" 류 정정 발화 감지 (보수적 — 추임새 "아니"는 제외) - LLM 오류 분류 (사실오류/근거누락/맥락누락/추론오류/지시불이행/형식오류, 실패 시 휴리스틱 fallback) → error-tag frontmatter 레슨(lessons/) 저장 - 동시에 회귀 케이스 적립: .astra/eval/corrections.jsonl {질문, 틀린답, 정정} ② 주간 성장 사이클 확장 (1.5단계): - 정정 회귀 테스트: 정정받은 질문을 두뇌 검색 컨텍스트와 함께 재실행 → LLM-judge "같은 실수 반복?" 판정 → growth/regression-report.md (사이클당 ≤8건) - 약점 프로필: 최근 60일 태그 통계 → growth/weakness-profile.json ③ 결핍의 행동화 (memoryContext): - GROUNDING 약함 + agent scope 적용 중 → 전체 두뇌 1회 재검색 (scope 가 정답 문서를 가리는 경우 구제, 더 강한 근거일 때만 채택) - 그래도 약함 → 학습 큐에 지식 공백 자동 proposed 등록 (질문 해시 중복 차단, 20건 폭주 방지, 승인은 사람 — Permission Based Learning 유지) - 약점 프로필 → [자기검토] 블록 주입 (태그 2회 이상만): "너는 최근 X 정정을 N회 받았다 — <유형별 자기검토 지시>" 테스트 25건 추가 (감지 패턴·프로필 집계·큐 등록·영속화·fallback 분류). Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-11 19:28:46 +09:00
parent 67927b1d4e
commit 72faa07480
7 changed files with 650 additions and 10 deletions
@@ -0,0 +1,158 @@
+/**
+ * Correction Loop 단위 테스트 — 순수 로직 (감지·프로필·레슨·큐 등록·영속화).
+ * LLM 의존 부분(classifyCorrection)은 엔드포인트 실패 → 휴리스틱 fallback 경로만 검증.
+ */
+import * as fs from 'fs';
+import * as os from 'os';
+import * as path from 'path';
+import {
+    looksLikeCorrection, classifyCorrection, correctionLessonMarkdown,
+    appendCorrectionCase, loadCorrectionCases, registerKnowledgeGap,
+    computeWeaknessProfile, saveWeaknessProfile, loadWeaknessProfile,
+    buildSelfReviewBlock, formatRegressionReport,
+    type CorrectionCase,
+} from '../src/intelligence/correctionLoop';
+import { loadQueue } from '../src/intelligence/learningQueue';
+
+const tmpBrain = () => fs.mkdtempSync(path.join(os.tmpdir(), 'astra-corr-'));
+
+const CASE = (over: Partial<CorrectionCase> = {}): CorrectionCase => ({
+    ts: new Date().toISOString(),
+    errorTag: '사실오류',
+    question: '회의가 언제였지?',
+    wrongAnswer: '5월 10일입니다.',
+    correction: '아니야, 6월 10일이야.',
+    title: '회의 날짜 오답',
+    ...over,
+});
+
+describe('looksLikeCorrection — 정정 감지 (보수적)', () => {
+    test.each([
+        '아니야, 그 회의는 6월이야',
+        '틀렸어. 다시 확인해',
+        '그게 아니라 화요일이야',
+        '담당자는 김OO가 아니라 박OO야',
+        '그 수치는 사실과 달라',
+        '잘못 알고 있네 — 정정해줄게',
+        '지어내지 마',
+    ])('정정으로 감지: %s', (p) => expect(looksLikeCorrection(p)).toBe(true));
+
+    test.each([
+        '커밋하고 푸쉬해줘',
+        '내일 일정 알려줘',
+        '아니 그래서 결과가 어떻게 됐어?', // "아니" 단독 추임새는 비정정
+        '회의록 요약해줘',
+        '',
+        '응',
+    ])('비정정: %s', (p) => expect(looksLikeCorrection(p)).toBe(false));
+});
+
+describe('classifyCorrection — LLM 실패 시 휴리스틱 fallback', () => {
+    const deadLlm = { baseUrl: 'http://127.0.0.1:1', model: 'x' };
+    test('출처 언급 → 근거누락', async () => {
+        const r = await classifyCorrection('q', 'a', '출처도 없이 단정하지 마', deadLlm);
+        expect(r.tag).toBe('근거누락');
+        expect(r.title.length).toBeGreaterThan(0);
+    });
+    test('맥락 언급 → 맥락누락', async () => {
+        const r = await classifyCorrection('q', 'a', '아까 위에서 말했잖아', deadLlm);
+        expect(r.tag).toBe('맥락누락');
+    });
+    test('기본 → 사실오류', async () => {
+        const r = await classifyCorrection('q', 'a', '6월 10일이 맞아', deadLlm);
+        expect(r.tag).toBe('사실오류');
+    });
+});
+
+describe('회귀 케이스 영속화', () => {
+    test('append → load 라운드트립 + 필드 길이 제한', () => {
+        const brain = tmpBrain();
+        const long = 'x'.repeat(2000);
+        expect(appendCorrectionCase(brain, CASE({ wrongAnswer: long }))).toBe(true);
+        expect(appendCorrectionCase(brain, CASE({ title: '두번째' }))).toBe(true);
+        const cases = loadCorrectionCases(brain);
+        expect(cases).toHaveLength(2);
+        expect(cases[0].wrongAnswer.length).toBeLessThanOrEqual(600);
+        expect(cases[1].title).toBe('두번째');
+    });
+    test('손상 라인은 건너뛴다', () => {
+        const brain = tmpBrain();
+        appendCorrectionCase(brain, CASE());
+        fs.appendFileSync(path.join(brain, '.astra', 'eval', 'corrections.jsonl'), '{broken json\n');
+        appendCorrectionCase(brain, CASE({ title: 'b' }));
+        expect(loadCorrectionCases(brain)).toHaveLength(2);
+    });
+});
+
+describe('correctionLessonMarkdown', () => {
+    test('error-tag frontmatter + Ground Truth 포함', () => {
+        const md = correctionLessonMarkdown(CASE(), '2026-06-11');
+        expect(md).toContain('error-tag: 사실오류');
+        expect(md).toContain('source: user-correction');
+        expect(md).toContain('아니야, 6월 10일이야.');
+        expect(md).toMatch(/^---\ntype: lesson/);
+    });
+});
+
+describe('registerKnowledgeGap — 학습 큐 자동 proposed', () => {
+    test('등록 + 같은 질문 중복 차단', () => {
+        const brain = tmpBrain();
+        expect(registerKnowledgeGap(brain, 'CRAG 교정 검색이 뭐야?', 0.12)).toBe(true);
+        expect(registerKnowledgeGap(brain, 'CRAG 교정 검색이 뭐야?', 0.2)).toBe(false);
+        const q = loadQueue(brain);
+        expect(q).toHaveLength(1);
+        expect(q[0].status).toBe('proposed');
+        expect(q[0].id).toMatch(/^gap-/);
+        expect(q[0].reason).toContain('0.12');
+    });
+    test('짧은 질문 무시 + 20건 폭주 방지', () => {
+        const brain = tmpBrain();
+        expect(registerKnowledgeGap(brain, '뭐야?', 0)).toBe(false);
+        for (let i = 0; i < 25; i++) registerKnowledgeGap(brain, `지식 공백 질문 번호 ${i} 에 대한 상세 내용`, 0.1);
+        expect(loadQueue(brain).length).toBeLessThanOrEqual(20);
+    });
+});
+
+describe('약점 프로필', () => {
+    const now = Date.parse('2026-06-11T00:00:00Z');
+    test('윈도우 필터 + 태그 집계 내림차순', () => {
+        const cases = [
+            CASE({ ts: '2026-06-01T00:00:00Z', errorTag: '사실오류' }),
+            CASE({ ts: '2026-06-05T00:00:00Z', errorTag: '사실오류', title: '최신 예시' }),
+            CASE({ ts: '2026-06-03T00:00:00Z', errorTag: '근거누락' }),
+            CASE({ ts: '2025-01-01T00:00:00Z', errorTag: '형식오류' }), // 윈도우 밖
+        ];
+        const p = computeWeaknessProfile(cases, now, 60);
+        expect(p.totalCases).toBe(3);
+        expect(p.tagCounts[0]).toMatchObject({ tag: '사실오류', count: 2, example: '최신 예시' });
+        expect(p.tagCounts.find(t => t.tag === '형식오류')).toBeUndefined();
+    });
+    test('save → load 라운드트립 + 자기검토 블록 (2회 이상만)', () => {
+        const brain = tmpBrain();
+        const p = computeWeaknessProfile([
+            CASE({ errorTag: '사실오류' }), CASE({ errorTag: '사실오류' }), CASE({ errorTag: '근거누락' }),
+        ], Date.now(), 60);
+        expect(saveWeaknessProfile(brain, p)).toBe(true);
+        const block = buildSelfReviewBlock(loadWeaknessProfile(brain));
+        expect(block).toContain('[자기검토');
+        expect(block).toContain('사실오류');
+        expect(block).not.toContain('근거누락'); // 1회짜리는 미주입
+    });
+    test('데이터 없으면 빈 블록', () => {
+        expect(buildSelfReviewBlock(null)).toBe('');
+        expect(buildSelfReviewBlock(computeWeaknessProfile([], Date.now()))).toBe('');
+    });
+});
+
+describe('formatRegressionReport', () => {
+    test('재발/통과/판정불가 마크', () => {
+        const md = formatRegressionReport([
+            { question: 'q1', errorTag: '사실오류', repeated: true, note: 'n1' },
+            { question: 'q2', errorTag: '근거누락', repeated: false, note: 'n2' },
+            { question: 'q3', errorTag: '기타', repeated: null, note: 'n3' },
+        ], { dateStr: '2026-06-11' });
+        expect(md).toContain('❌ 재발');
+        expect(md).toContain('✅ 통과');
+        expect(md).toContain('⚠️ 판정불가');
+    });
+});