feat: Self-Evolving Digital Employee OS P0~P6 + 캘린더 충돌 게이트

신뢰성 코어 (P1~P2): - Requirement Graph: 업무 유형(회의록/시장조사/업무조사/일정) 필수 요소 주입 + 커버리지 hook - Confidence Engine(0~100 결정론적) / Escalation Engine(검토 요청) / Epistemic Guard(모름·추정·확실 3분류) - Provenance: citationTrace 에 출처 수정일·오래됨 경고 - Critic Loop: 문제 신호 turn 만 LLM 검수 1회 + 보완 카드 성장 루프 (P3): - Gap Detector(Requirement-Knowledge) / Need Engine(30/25/20/15/10 공식) / Knowledge Inventory - Learning Queue(proposed 전용 병합 — 승인은 사람만) / Decision Journal / Reflection 기록 - 반복 누락 요소(3회+)는 다음 turn 체크리스트에 자동 강조 (T5 루프) 지식 운영 (P4) + 기억 (P5) + 학습 실행 (P6): - Knowledge Validation + Belief Revision(중복 reject·충돌 시 update/add 권고) - Knowledge Decay(분야별 반감기 감사) / Knowledge Debt(blocked x impact) - Organizational Memory(.astra/organization.md 상시 주입) - Research Agent(approved 큐 -> 조사 브리프+추정 라벨 초안+Validation 게이트 -> proposals/) - Skill Score(전/후반 추세) + Success Pattern DB(전요소충족+확신도90+ 자동 적재) 병렬 트랙: - 캘린더 충돌 게이트: conflictCheck + 구조화 이벤트 캐시 + create_calendar_event 차단(force 는 사용자 승인 후) - Task Eval Harness: 회의록 골든셋 자동 채점 명령 + 성장 리포트/학습 큐/노후 점검 명령 신규 모듈 17종(src/intelligence/), VS Code 명령 5종, 설정 11종, 테스트 +89건(전체 508 통과). 설계 문서: docs/SELF_EVOLVING_OS_MASTER_PLAN.md Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
2026-06-11 13:42:09 +09:00
parent cbc2558550
commit 2afd1ac589
41 changed files with 4364 additions and 2 deletions
@@ -0,0 +1,162 @@
+/**
+ * Reflection Store — 업무 turn 회고 기록 + Failure Pattern 집계.
+ *
+ * Self-Evolving OS 마스터 플랜 Phase 1 / Track 2-4 (Reflection Engine v1) +
+ * Phase 3 / Track 3-6 (Failure Pattern DB v1 시드). 신뢰 조건 T5
+ * "같은 실수를 반복하지 않는다" 의 데이터 기반.
+ *
+ * v1 은 결정론적 신호만 기록 (LLM 회고 질문은 후속 증분):
+ *   업무 turn 종료 → {업무유형, 확신도, 누락 요소, 에스컬레이션 여부, Critic 이슈 수}
+ *   를 <brain>/.astra/growth/reflections.jsonl 에 append.
+ *
+ * 이 파일이 쌓이면:
+ *   - summarizeFailurePatterns() → "회의록·기한 누락 N회" 류 반복 실수 집계
+ *   - formatGrowthReport() → 기간별 확신도/누락률 추이 = *성장세 그래프의 원천*
+ */
+
+import * as fs from 'fs';
+import * as path from 'path';
+
+export const REFLECTIONS_REL_PATH = path.join('.astra', 'growth', 'reflections.jsonl');
+
+export interface ReflectionRecord {
+    /** ISO timestamp. */
+    ts: string;
+    taskId: string | null;
+    taskLabel: string | null;
+    confidenceScore: number;
+    confidenceBand: string;
+    /** 커버리지 누락 요소 label 목록. */
+    missing: string[];
+    escalated: boolean;
+    /** Critic 검수가 돌았으면 발견 이슈 수, 안 돌았으면 null. */
+    criticIssues: number | null;
+    /** 요청 미리보기 (디버그·회고용, 120자). */
+    promptPreview: string;
+
+    // ── Decision Journal v1 (Track 3-7) — "왜 이 확신도/판단이었나" 역추적 필드 ──
+    /** 확신도 기여 요인 label 목록 (confidenceEngine factors). */
+    factors?: string[];
+    /** 답변에 쓰인 상위 출처 title (citation/selfCheckSources 기준). */
+    usedSources?: string[];
+
+    // ── Gap Detector v1 (Track 3-2) — Need Engine 입력 신호 ──
+    /** 검색 그라운딩: 청크 수·최고 score. */
+    retrieval?: { chunkCount: number; topScore: number };
+    /** 검색 근거 없이/약하게 수행한 업무 turn (지식 갭 신호). */
+    weakGrounding?: boolean;
+    /** 갭 심각도 (none/low/medium/high). */
+    gapSeverity?: string;
+}
+
+/** 회고 1건 append — 실패해도 throw 하지 않음 (회고가 turn 을 막으면 안 됨). */
+export function appendReflection(brainPath: string, record: ReflectionRecord): boolean {
+    try {
+        if (!brainPath) return false;
+        const file = path.join(brainPath, REFLECTIONS_REL_PATH);
+        fs.mkdirSync(path.dirname(file), { recursive: true });
+        fs.appendFileSync(file, JSON.stringify(record) + '\n', 'utf8');
+        return true;
+    } catch {
+        return false;
+    }
+}
+
+/** 회고 로드 — 깨진 줄은 무시. limit 은 *최근* N건. */
+export function loadReflections(brainPath: string, limit?: number): ReflectionRecord[] {
+    try {
+        const file = path.join(brainPath, REFLECTIONS_REL_PATH);
+        if (!fs.existsSync(file)) return [];
+        const lines = fs.readFileSync(file, 'utf8').split('\n').filter((l) => l.trim());
+        const records: ReflectionRecord[] = [];
+        for (const line of lines) {
+            try {
+                const obj = JSON.parse(line);
+                if (obj && typeof obj.ts === 'string') records.push(obj as ReflectionRecord);
+            } catch { /* skip broken line */ }
+        }
+        return limit && limit > 0 ? records.slice(-limit) : records;
+    } catch {
+        return [];
+    }
+}
+
+export interface FailurePattern {
+    taskId: string;
+    taskLabel: string;
+    element: string;
+    count: number;
+}
+
+/**
+ * Failure Pattern 집계 — (업무유형 × 누락 요소) 별 반복 횟수, 많은 순.
+ * "시장규모 누락 27회" 류의 반복 실수를 수치로 노출 (설계서 12장).
+ */
+export function summarizeFailurePatterns(records: ReflectionRecord[]): FailurePattern[] {
+    const counts = new Map<string, FailurePattern>();
+    for (const r of records) {
+        if (!r.taskId) continue;
+        for (const el of r.missing || []) {
+            const key = `${r.taskId}::${el}`;
+            const cur = counts.get(key);
+            if (cur) cur.count++;
+            else counts.set(key, { taskId: r.taskId, taskLabel: r.taskLabel || r.taskId, element: el, count: 1 });
+        }
+    }
+    return Array.from(counts.values()).sort((a, b) => b.count - a.count);
+}
+
+/**
+ * 반복 실수 경고 — 같은 (업무 × 요소) 누락이 threshold 회 이상이면 해당 요소를
+ * 시스템 프롬프트 강조 대상으로 반환. Requirement Graph 블록이 이걸 받아
+ * "특히 자주 누락되는 요소" 로 표시 (T5 루프의 첫 닫힘).
+ */
+export function recurrentMisses(records: ReflectionRecord[], taskId: string, threshold = 3): string[] {
+    return summarizeFailurePatterns(records)
+        .filter((p) => p.taskId === taskId && p.count >= threshold)
+        .map((p) => p.element);
+}
+
+/** 기간(주) 단위 성장 리포트 — 확신도 평균·누락률 추이. */
+export function formatGrowthReport(records: ReflectionRecord[]): string {
+    if (records.length === 0) return '# 성장 리포트\n\n기록 없음 — 업무 turn 이 쌓이면 추이가 표시됩니다.\n';
+
+    // 주 단위 버킷 (ISO week 근사 — ts 앞 10자의 날짜 기준 7일 묶음).
+    const byWeek = new Map<string, ReflectionRecord[]>();
+    for (const r of records) {
+        const d = new Date(r.ts);
+        if (isNaN(d.getTime())) continue;
+        const weekStart = new Date(d);
+        weekStart.setDate(d.getDate() - d.getDay()); // 일요일 기준
+        const key = weekStart.toISOString().slice(0, 10);
+        const arr = byWeek.get(key) || [];
+        arr.push(r);
+        byWeek.set(key, arr);
+    }
+
+    const lines: string[] = [];
+    lines.push('# ASTRA 성장 리포트 (Reflection 기반)');
+    lines.push('');
+    lines.push(`총 업무 turn: ${records.length}`);
+    lines.push('');
+    lines.push('| 주 (시작일) | 업무 수 | 평균 확신도 | 요소 누락률 | 에스컬레이션 |');
+    lines.push('|---|---|---|---|---|');
+    const weeks = Array.from(byWeek.keys()).sort();
+    for (const w of weeks) {
+        const rs = byWeek.get(w)!;
+        const avgConf = rs.reduce((s, r) => s + (r.confidenceScore || 0), 0) / rs.length;
+        const missRate = rs.filter((r) => (r.missing || []).length > 0).length / rs.length;
+        const escCount = rs.filter((r) => r.escalated).length;
+        lines.push(`| ${w} | ${rs.length} | ${avgConf.toFixed(0)} | ${(missRate * 100).toFixed(0)}% | ${escCount} |`);
+    }
+    lines.push('');
+    lines.push('## 반복 실수 Top (Failure Patterns)');
+    const patterns = summarizeFailurePatterns(records).slice(0, 10);
+    if (patterns.length === 0) {
+        lines.push('- 없음');
+    } else {
+        for (const p of patterns) lines.push(`- ${p.taskLabel} · **${p.element}** 누락 ${p.count}회`);
+    }
+    lines.push('');
+    return lines.join('\n');
+}