feat: Self-Evolving Digital Employee OS P0~P6 + 캘린더 충돌 게이트

신뢰성 코어 (P1~P2): - Requirement Graph: 업무 유형(회의록/시장조사/업무조사/일정) 필수 요소 주입 + 커버리지 hook - Confidence Engine(0~100 결정론적) / Escalation Engine(검토 요청) / Epistemic Guard(모름·추정·확실 3분류) - Provenance: citationTrace 에 출처 수정일·오래됨 경고 - Critic Loop: 문제 신호 turn 만 LLM 검수 1회 + 보완 카드 성장 루프 (P3): - Gap Detector(Requirement-Knowledge) / Need Engine(30/25/20/15/10 공식) / Knowledge Inventory - Learning Queue(proposed 전용 병합 — 승인은 사람만) / Decision Journal / Reflection 기록 - 반복 누락 요소(3회+)는 다음 turn 체크리스트에 자동 강조 (T5 루프) 지식 운영 (P4) + 기억 (P5) + 학습 실행 (P6): - Knowledge Validation + Belief Revision(중복 reject·충돌 시 update/add 권고) - Knowledge Decay(분야별 반감기 감사) / Knowledge Debt(blocked x impact) - Organizational Memory(.astra/organization.md 상시 주입) - Research Agent(approved 큐 -> 조사 브리프+추정 라벨 초안+Validation 게이트 -> proposals/) - Skill Score(전/후반 추세) + Success Pattern DB(전요소충족+확신도90+ 자동 적재) 병렬 트랙: - 캘린더 충돌 게이트: conflictCheck + 구조화 이벤트 캐시 + create_calendar_event 차단(force 는 사용자 승인 후) - Task Eval Harness: 회의록 골든셋 자동 채점 명령 + 성장 리포트/학습 큐/노후 점검 명령 신규 모듈 17종(src/intelligence/), VS Code 명령 5종, 설정 11종, 테스트 +89건(전체 508 통과). 설계 문서: docs/SELF_EVOLVING_OS_MASTER_PLAN.md Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
2026-06-11 13:42:09 +09:00
parent cbc2558550
commit 2afd1ac589
41 changed files with 4364 additions and 2 deletions
@@ -0,0 +1,220 @@
+/**
+ * Need Engine — 학습 필요성 산출 (설계서 7.6) + Knowledge Inventory v1 (7.3).
+ *
+ * Self-Evolving OS 마스터 플랜 Phase 3 / Track 3-3 + 3-1. Reflection 기록을
+ * 집계해 "무엇을 먼저 배워야 하는가" 를 점수로 산출한다 — 성장 루프의 두뇌.
+ *
+ * Need Score (설계서 공식, 0~100):
+ *   정보 부족도 × 30% + 실패율 × 25% + 업무 빈도 × 20% + 확신도 부족 × 15% + 사용자 피드백 × 10%
+ *
+ * v1 신호 매핑 (전부 Reflection 에서 결정론적으로):
+ *   - 정보 부족도: weakGrounding 비율 (검색 근거 없이 수행한 turn 비중)
+ *   - 실패율: 필수 요소 누락이 있었던 turn 비율
+ *   - 업무 빈도: 해당 업무 turn 수 / 전체 업무 turn 수
+ *   - 확신도 부족: (100 − 평균 확신도) / 100
+ *   - 사용자 피드백: v1 미수집 → 0 (필드는 유지, 후속 증분에서 연결)
+ *
+ * 출력은 Learning Queue 의 입력이 된다. 학습 실행은 승인 후 (Permission Based Learning).
+ */
+
+import type { ReflectionRecord } from './reflectionStore';
+
+export interface NeedItem {
+    /** 업무 유형 ID (학습 주제 단위 v1 — 후속: 요소/토픽 단위 세분화). */
+    taskId: string;
+    taskLabel: string;
+    /** 0~100. */
+    score: number;
+    /** 가중치별 기여 내역 (사람이 읽는 근거). */
+    breakdown: {
+        infoLack: number;    // 0~1
+        failRate: number;    // 0~1
+        frequency: number;   // 0~1
+        confidenceLack: number; // 0~1
+        feedback: number;    // 0~1 (v1 = 0)
+    };
+    /** 집계 표본 수. */
+    sampleCount: number;
+    /** 자주 누락된 요소 Top 3 — 학습 주제 구체화용. */
+    topMisses: string[];
+    reason: string;
+}
+
+export const NEED_WEIGHTS = {
+    infoLack: 0.30,
+    failRate: 0.25,
+    frequency: 0.20,
+    confidenceLack: 0.15,
+    feedback: 0.10,
+} as const;
+
+export function computeNeeds(records: ReflectionRecord[]): NeedItem[] {
+    const taskRecords = records.filter((r) => r.taskId);
+    if (taskRecords.length === 0) return [];
+
+    const byTask = new Map<string, ReflectionRecord[]>();
+    for (const r of taskRecords) {
+        const arr = byTask.get(r.taskId!) || [];
+        arr.push(r);
+        byTask.set(r.taskId!, arr);
+    }
+
+    const needs: NeedItem[] = [];
+    for (const [taskId, rs] of byTask) {
+        const infoLack = rs.filter((r) => r.weakGrounding === true).length / rs.length;
+        const failRate = rs.filter((r) => (r.missing || []).length > 0).length / rs.length;
+        const frequency = rs.length / taskRecords.length;
+        const avgConf = rs.reduce((s, r) => s + (r.confidenceScore || 0), 0) / rs.length;
+        const confidenceLack = Math.max(0, Math.min(1, (100 - avgConf) / 100));
+        const feedback = 0; // v1 미수집
+
+        const score = Math.round(100 * (
+            infoLack * NEED_WEIGHTS.infoLack +
+            failRate * NEED_WEIGHTS.failRate +
+            frequency * NEED_WEIGHTS.frequency +
+            confidenceLack * NEED_WEIGHTS.confidenceLack +
+            feedback * NEED_WEIGHTS.feedback
+        ));
+
+        // 자주 누락된 요소 Top 3.
+        const missCounts = new Map<string, number>();
+        for (const r of rs) for (const m of r.missing || []) missCounts.set(m, (missCounts.get(m) || 0) + 1);
+        const topMisses = Array.from(missCounts.entries()).sort((a, b) => b[1] - a[1]).slice(0, 3).map(([m]) => m);
+
+        const reasonParts: string[] = [];
+        if (infoLack > 0.3) reasonParts.push(`근거 없는 수행 ${(infoLack * 100).toFixed(0)}%`);
+        if (failRate > 0.3) reasonParts.push(`요소 누락률 ${(failRate * 100).toFixed(0)}%`);
+        if (confidenceLack > 0.3) reasonParts.push(`평균 확신도 ${avgConf.toFixed(0)}`);
+        if (topMisses.length > 0) reasonParts.push(`자주 누락: ${topMisses.join(', ')}`);
+
+        needs.push({
+            taskId,
+            taskLabel: rs[0].taskLabel || taskId,
+            score,
+            breakdown: { infoLack, failRate, frequency, confidenceLack, feedback },
+            sampleCount: rs.length,
+            topMisses,
+            reason: reasonParts.join(' · ') || '특이 신호 없음 (빈도 기반)',
+        });
+    }
+    return needs.sort((a, b) => b.score - a.score);
+}
+
+/**
+ * Knowledge Inventory v1 (Track 3-1) — 업무 유형별 지식 보유 상태.
+ * 보유/부족/없음 3등급 (설계서 7.3) 을 그라운딩 신호로 판정.
+ */
+export interface InventoryItem {
+    taskId: string;
+    taskLabel: string;
+    /** 'sufficient' | 'partial' | 'missing' */
+    status: 'sufficient' | 'partial' | 'missing';
+    avgChunkCount: number;
+    avgTopScore: number;
+    sampleCount: number;
+}
+
+export function knowledgeInventory(records: ReflectionRecord[]): InventoryItem[] {
+    const withRetrieval = records.filter((r) => r.taskId && r.retrieval);
+    const byTask = new Map<string, ReflectionRecord[]>();
+    for (const r of withRetrieval) {
+        const arr = byTask.get(r.taskId!) || [];
+        arr.push(r);
+        byTask.set(r.taskId!, arr);
+    }
+    const items: InventoryItem[] = [];
+    for (const [taskId, rs] of byTask) {
+        const avgChunkCount = rs.reduce((s, r) => s + (r.retrieval!.chunkCount || 0), 0) / rs.length;
+        const avgTopScore = rs.reduce((s, r) => s + (r.retrieval!.topScore || 0), 0) / rs.length;
+        const status: InventoryItem['status'] =
+            avgChunkCount >= 3 && avgTopScore >= 0.5 ? 'sufficient'
+            : avgChunkCount >= 1 ? 'partial'
+            : 'missing';
+        items.push({ taskId, taskLabel: rs[0].taskLabel || taskId, status, avgChunkCount, avgTopScore, sampleCount: rs.length });
+    }
+    return items.sort((a, b) => a.avgTopScore - b.avgTopScore);
+}
+
+/**
+ * Knowledge Debt (Track 4-4) — 부족 지식이 실제로 막은 업무 집계 (설계서 예:
+ * "GA4 — Blocked Tasks 17, Impact 9"). v1 단위는 업무 유형: 근거 없이/약하게
+ * 수행된 turn 수 = blocked, 그 turn 들의 갭 심각도 평균 = impact (0~10).
+ */
+export interface DebtItem {
+    taskId: string;
+    taskLabel: string;
+    /** 지식 부족 상태로 수행된 업무 turn 수. */
+    blockedTurns: number;
+    /** 평균 갭 심각도 0~10. */
+    impact: number;
+    /** blocked × impact — 정렬 키. */
+    debtScore: number;
+}
+
+const SEVERITY_SCORE: Record<string, number> = { none: 0, low: 3, medium: 6, high: 10 };
+
+export function computeKnowledgeDebt(records: ReflectionRecord[]): DebtItem[] {
+    const blocked = records.filter((r) => r.taskId && r.weakGrounding === true);
+    const byTask = new Map<string, ReflectionRecord[]>();
+    for (const r of blocked) {
+        const arr = byTask.get(r.taskId!) || [];
+        arr.push(r);
+        byTask.set(r.taskId!, arr);
+    }
+    const items: DebtItem[] = [];
+    for (const [taskId, rs] of byTask) {
+        const impact = rs.reduce((s, r) => s + (SEVERITY_SCORE[r.gapSeverity || 'low'] ?? 3), 0) / rs.length;
+        items.push({
+            taskId,
+            taskLabel: rs[0].taskLabel || taskId,
+            blockedTurns: rs.length,
+            impact: Math.round(impact * 10) / 10,
+            debtScore: Math.round(rs.length * impact),
+        });
+    }
+    return items.sort((a, b) => b.debtScore - a.debtScore);
+}
+
+export function formatNeedsMarkdown(needs: NeedItem[], inventory: InventoryItem[], debt: DebtItem[] = []): string {
+    const lines: string[] = [];
+    lines.push('# 학습 필요성 (Need Engine)');
+    lines.push('');
+    lines.push('공식: 정보부족 30% + 실패율 25% + 빈도 20% + 확신부족 15% + 피드백 10%');
+    lines.push('');
+    if (needs.length === 0) {
+        lines.push('Reflection 기록 없음 — 업무 turn 이 쌓이면 학습 우선순위가 산출됩니다.');
+    } else {
+        lines.push('| 우선순위 | 업무 | Need Score | 표본 | 근거 |');
+        lines.push('|---|---|---|---|---|');
+        needs.forEach((n, i) => {
+            lines.push(`| ${i + 1} | ${n.taskLabel} | **${n.score}** | ${n.sampleCount} | ${n.reason} |`);
+        });
+    }
+    lines.push('');
+    lines.push('## Knowledge Inventory (지식 보유 상태)');
+    lines.push('');
+    if (inventory.length === 0) {
+        lines.push('- 데이터 없음');
+    } else {
+        const statusLabel = { sufficient: '보유', partial: '부족', missing: '없음' } as const;
+        lines.push('| 업무 | 상태 | 평균 근거 수 | 평균 top score |');
+        lines.push('|---|---|---|---|');
+        for (const it of inventory) {
+            lines.push(`| ${it.taskLabel} | ${statusLabel[it.status]} | ${it.avgChunkCount.toFixed(1)} | ${it.avgTopScore.toFixed(2)} |`);
+        }
+    }
+    lines.push('');
+    lines.push('## Knowledge Debt (지식 부채)');
+    lines.push('');
+    if (debt.length === 0) {
+        lines.push('- 부채 없음 — 지식 부족 상태로 수행된 업무가 없습니다.');
+    } else {
+        lines.push('| 업무 | Blocked Turns | Impact (0~10) | Debt Score |');
+        lines.push('|---|---|---|---|');
+        for (const d of debt) {
+            lines.push(`| ${d.taskLabel} | ${d.blockedTurns} | ${d.impact} | **${d.debtScore}** |`);
+        }
+    }
+    lines.push('');
+    return lines.join('\n');
+}