feat: Self-Evolving Digital Employee OS P0~P6 + 캘린더 충돌 게이트

신뢰성 코어 (P1~P2): - Requirement Graph: 업무 유형(회의록/시장조사/업무조사/일정) 필수 요소 주입 + 커버리지 hook - Confidence Engine(0~100 결정론적) / Escalation Engine(검토 요청) / Epistemic Guard(모름·추정·확실 3분류) - Provenance: citationTrace 에 출처 수정일·오래됨 경고 - Critic Loop: 문제 신호 turn 만 LLM 검수 1회 + 보완 카드 성장 루프 (P3): - Gap Detector(Requirement-Knowledge) / Need Engine(30/25/20/15/10 공식) / Knowledge Inventory - Learning Queue(proposed 전용 병합 — 승인은 사람만) / Decision Journal / Reflection 기록 - 반복 누락 요소(3회+)는 다음 turn 체크리스트에 자동 강조 (T5 루프) 지식 운영 (P4) + 기억 (P5) + 학습 실행 (P6): - Knowledge Validation + Belief Revision(중복 reject·충돌 시 update/add 권고) - Knowledge Decay(분야별 반감기 감사) / Knowledge Debt(blocked x impact) - Organizational Memory(.astra/organization.md 상시 주입) - Research Agent(approved 큐 -> 조사 브리프+추정 라벨 초안+Validation 게이트 -> proposals/) - Skill Score(전/후반 추세) + Success Pattern DB(전요소충족+확신도90+ 자동 적재) 병렬 트랙: - 캘린더 충돌 게이트: conflictCheck + 구조화 이벤트 캐시 + create_calendar_event 차단(force 는 사용자 승인 후) - Task Eval Harness: 회의록 골든셋 자동 채점 명령 + 성장 리포트/학습 큐/노후 점검 명령 신규 모듈 17종(src/intelligence/), VS Code 명령 5종, 설정 11종, 테스트 +89건(전체 508 통과). 설계 문서: docs/SELF_EVOLVING_OS_MASTER_PLAN.md Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
2026-06-11 13:42:09 +09:00
parent cbc2558550
commit 2afd1ac589
41 changed files with 4364 additions and 2 deletions
@@ -0,0 +1,273 @@
+/**
+ * Requirement Graph — 업무 유형별 필수 요소 정의 + 감지 + 커버리지 검사.
+ *
+ * Self-Evolving Digital Employee OS 마스터 플랜(docs/SELF_EVOLVING_OS_MASTER_PLAN.md)
+ * Phase 1 / Track 2-1. 신뢰 조건 T3 "품질이 일관적이다 — 필수 요소 누락 없음" 담당.
+ *
+ * 동작 2단계:
+ *   1. *Instructional* — 사용자 요청에서 업무 유형(회의록/시장조사/업무조사/일정) 감지 시
+ *      [TASK REQUIREMENTS] 블록을 시스템 프롬프트에 주입 → 모델이 필수 요소를 빠짐없이 작성.
+ *      정보가 없어 채울 수 없는 요소는 "(확인 필요)" 로 명시하게 강제 — 조용한 생략 금지
+ *      (Anti-Hallucination T1 과 연결).
+ *   2. *Deterministic* — 답변 완료 후 post-answer hook 이 필수 요소 커버리지를 정규식으로
+ *      스캔, 누락 가능 요소를 footer 로 표시 (termValidator 와 같은 패턴, LLM 호출 없음).
+ *
+ * Gap Detector (Phase 3) 가 이 모듈의 Requirement 정의를 입력으로 사용한다:
+ * Gap = Requirement − Knowledge.
+ */
+
+export interface RequirementElement {
+    /** 안정적 식별자 (Failure Pattern DB 가 누락 카운트 키로 사용 예정). */
+    id: string;
+    /** 사람이 읽는 요소명 — 블록·footer 에 표시. */
+    label: string;
+    /** 모델에게 주는 작성 힌트. */
+    hint: string;
+    /** 커버리지 검사용 정규식 소스 (OR 결합, i+u 플래그). */
+    detectPatterns: string[];
+}
+
+export interface TaskRequirement {
+    /** 업무 유형 ID (예: 'meeting-minutes'). */
+    id: string;
+    /** 사람이 읽는 업무명 (예: '회의록'). */
+    label: string;
+    /** 사용자 요청에서 업무 유형을 감지하는 정규식 소스 (OR). */
+    detectKeywords: string[];
+    /**
+     * 답변 커버리지 검사 여부. 일정 등 짧은 확인형 응답이 정상인 업무는 false —
+     * footer 노이즈(false-positive) 방지. 블록 주입은 항상 수행.
+     */
+    coverageCheck: boolean;
+    elements: RequirementElement[];
+}
+
+export interface CoverageResult {
+    ran: boolean;
+    taskId?: string;
+    taskLabel?: string;
+    covered: string[];   // element labels
+    missing: string[];   // element labels
+}
+
+/**
+ * 기본 업무 정의 4종. 배열 순서 = 감지 우선순위 (구체적 유형 먼저, 범용 '업무조사' 마지막 —
+ * "조사" 류 키워드가 시장조사를 가로채지 않도록).
+ */
+export const DEFAULT_TASK_REQUIREMENTS: TaskRequirement[] = [
+    {
+        id: 'meeting-minutes',
+        label: '회의록',
+        detectKeywords: ['회의록', '회의 ?(내용|결과)? ?정리', '미팅 ?(노트|정리)', 'meeting (minutes|notes)'],
+        coverageCheck: true,
+        elements: [
+            {
+                id: 'attendees', label: '참석자',
+                hint: '회의 참석 인원 전원. 불명확하면 "(확인 필요)".',
+                detectPatterns: ['참석자', '참석 ?인원', 'attendees?'],
+            },
+            {
+                id: 'decisions', label: '결정사항',
+                hint: '회의에서 합의·확정된 사항. 논의만 되고 미결인 항목과 구분.',
+                detectPatterns: ['결정 ?사항', '결정된', '합의', '확정', 'decisions?'],
+            },
+            {
+                id: 'action-items', label: '액션 아이템',
+                hint: '후속 실행 항목. 각 항목에 담당자·기한 연결.',
+                detectPatterns: ['액션 ?아이템', 'action ?items?', '할 ?일', '후속 ?(조치|작업)', 'to-?do'],
+            },
+            {
+                id: 'owners', label: '담당자',
+                hint: '액션 아이템별 책임자. 미정이면 "(담당자 미정)" 명시.',
+                detectPatterns: ['담당자?', '책임자', 'owner'],
+            },
+            {
+                id: 'due-dates', label: '기한',
+                hint: '액션 아이템별 마감일. 미정이면 "(기한 미정)" 명시.',
+                detectPatterns: ['기한', '마감', '까지', 'due', '\\d{1,2}\\s*월\\s*\\d{1,2}\\s*일'],
+            },
+        ],
+    },
+    {
+        id: 'market-research',
+        label: '시장조사',
+        detectKeywords: ['시장 ?조사', '시장 ?분석', '시장 ?(규모|동향|현황)', 'market (research|analysis)'],
+        coverageCheck: true,
+        elements: [
+            {
+                id: 'market-size', label: '시장 규모',
+                hint: '금액/수량 기준 규모. 수치 출처 필수, 없으면 "(확인 필요)".',
+                detectPatterns: ['시장 ?규모', 'market ?size', '\\d+\\s*(억|조|만\\s*달러|billion|million)'],
+            },
+            {
+                id: 'growth', label: '성장률',
+                hint: '연 성장률(CAGR 등) 또는 성장 추세.',
+                detectPatterns: ['성장률', '성장세', 'CAGR', 'growth', '연평균'],
+            },
+            {
+                id: 'competitors', label: '경쟁사',
+                hint: '주요 플레이어와 각자의 포지션.',
+                detectPatterns: ['경쟁사', '경쟁 ?업체', '주요 ?(업체|기업|플레이어)', 'competitors?'],
+            },
+            {
+                id: 'pricing', label: '가격',
+                hint: '가격대·요금 구조.',
+                detectPatterns: ['가격', '요금', '단가', 'pricing', '원대', '달러'],
+            },
+            {
+                id: 'customer-needs', label: '고객 니즈',
+                hint: '고객 요구·페인 포인트.',
+                detectPatterns: ['니즈', '고객 ?(요구|수요)', '페인 ?포인트', 'needs', 'pain ?points?'],
+            },
+            {
+                id: 'trends', label: '트렌드',
+                hint: '시장 동향·변화 방향.',
+                detectPatterns: ['트렌드', '동향', '추세', 'trends?'],
+            },
+            {
+                id: 'sources', label: '출처',
+                hint: '핵심 수치·주장의 출처. 모델 일반 지식이면 그렇게 명시.',
+                detectPatterns: ['출처', '근거', 'source', '자료:', '참고'],
+            },
+        ],
+    },
+    {
+        id: 'schedule',
+        label: '일정 관리',
+        detectKeywords: ['일정 ?(등록|추가|확인|조회|정리|관리)', '스케줄', '캘린더', '미팅 ?잡', '약속 ?(등록|추가|잡)'],
+        coverageCheck: false, // 짧은 확인형 응답이 정상 — footer 검사는 노이즈
+        elements: [
+            {
+                id: 'datetime', label: '일시',
+                hint: '날짜와 시간을 명시. 모호하면 되묻기.',
+                detectPatterns: ['\\d{1,2}\\s*[:시]', '날짜', '일시'],
+            },
+            {
+                id: 'title', label: '일정 제목',
+                hint: '무엇을 위한 일정인지.',
+                detectPatterns: ['제목', '일정명', '건명'],
+            },
+            {
+                id: 'conflict-check', label: '충돌 확인',
+                hint: '기존 일정과 겹침 여부 확인 결과 명시.',
+                detectPatterns: ['충돌', '겹치', '겹침'],
+            },
+        ],
+    },
+    {
+        id: 'work-research',
+        label: '업무조사',
+        detectKeywords: ['업무 ?조사', '조사해', '리서치', '알아봐\\s*줘?', '서치해', 'research'],
+        coverageCheck: true,
+        elements: [
+            {
+                id: 'purpose', label: '조사 목적',
+                hint: '무엇을 알기 위한 조사인지 한 줄 명시.',
+                detectPatterns: ['목적', '배경', '알아보기 위해'],
+            },
+            {
+                id: 'summary', label: '핵심 요약',
+                hint: '결론 먼저 — 3줄 이내 요약.',
+                detectPatterns: ['요약', '핵심', '결론부터', 'TL;?DR', 'summary'],
+            },
+            {
+                id: 'details', label: '세부 내용',
+                hint: '요약을 뒷받침하는 상세 조사 내용.',
+                detectPatterns: ['상세', '세부', '구체적', '자세히'],
+            },
+            {
+                id: 'sources', label: '출처',
+                hint: '핵심 주장의 출처. 모델 일반 지식이면 그렇게 명시.',
+                detectPatterns: ['출처', '근거', 'source', '참고'],
+            },
+            {
+                id: 'implications', label: '시사점·다음 단계',
+                hint: '조사 결과가 의미하는 것과 권장 다음 행동.',
+                detectPatterns: ['시사점', '다음 ?단계', '권장', '제안', '결론'],
+            },
+        ],
+    },
+];
+
+function toRegex(sources: string[]): RegExp {
+    return new RegExp(sources.join('|'), 'iu');
+}
+
+/**
+ * 사용자 요청에서 업무 유형 감지. 배열 순서대로 첫 매치 반환, 없으면 null.
+ * 짧은 인사·일반 잡담은 키워드 미매치로 자연스럽게 제외.
+ */
+export function detectTaskType(
+    userPrompt: string,
+    requirements: TaskRequirement[] = DEFAULT_TASK_REQUIREMENTS,
+): TaskRequirement | null {
+    if (!userPrompt || !userPrompt.trim()) return null;
+    for (const req of requirements) {
+        if (toRegex(req.detectKeywords).test(userPrompt)) return req;
+    }
+    return null;
+}
+
+/**
+ * [TASK REQUIREMENTS] 시스템 프롬프트 블록 생성. 업무 유형 미감지 시 빈 문자열 —
+ * memoryContext 의 dynamicBlocks join 에서 자동 제외.
+ */
+export function buildRequirementGraphBlock(
+    userPrompt: string,
+    requirements: TaskRequirement[] = DEFAULT_TASK_REQUIREMENTS,
+    /** 과거 자주 누락된 요소 label — Reflection/Failure Pattern 이 공급 (T5: 같은 실수 반복 방지). */
+    emphasizeLabels: string[] = [],
+): string {
+    const req = detectTaskType(userPrompt, requirements);
+    if (!req) return '';
+
+    const emphasize = new Set(emphasizeLabels);
+    const lines: string[] = [];
+    lines.push(`[TASK REQUIREMENTS — ${req.label}]`);
+    lines.push(`이 요청은 '${req.label}' 업무로 감지됨. 아래 필수 요소를 *모두* 포함해 작성할 것.`);
+    lines.push('정보가 없어 채울 수 없는 요소는 조용히 생략하지 말고 "(확인 필요)" 로 명시 후 사용자에게 질문.');
+    lines.push('');
+    for (const el of req.elements) {
+        const mark = emphasize.has(el.label) ? ' ⚠️ *과거에 자주 누락된 요소 — 특히 주의*' : '';
+        lines.push(`- [ ] **${el.label}** — ${el.hint}${mark}`);
+    }
+    lines.push('');
+    lines.push('제출 전 위 체크리스트를 스스로 점검하고, 누락 요소가 있으면 보완 후 답변할 것.');
+    lines.push('[/TASK REQUIREMENTS]');
+    return lines.join('\n');
+}
+
+/**
+ * 답변 커버리지 결정론적 검사 — 각 필수 요소의 detectPatterns 가 답변에 하나도 안 나타나면
+ * missing. LLM 호출 없음 (정규식), 매 turn 안전.
+ *
+ * 한계(의도된 보수성): 패턴 매치 = "요소가 언급됨" 이지 "내용이 충실함" 이 아님.
+ * 내용 충실도 평가는 Phase 3 Self Evaluation 담당.
+ */
+export function checkRequirementCoverage(
+    userPrompt: string,
+    assistantAnswer: string,
+    requirements: TaskRequirement[] = DEFAULT_TASK_REQUIREMENTS,
+): CoverageResult {
+    const req = detectTaskType(userPrompt, requirements);
+    if (!req || !req.coverageCheck || !assistantAnswer || !assistantAnswer.trim()) {
+        return { ran: false, covered: [], missing: [] };
+    }
+    const covered: string[] = [];
+    const missing: string[] = [];
+    for (const el of req.elements) {
+        if (toRegex(el.detectPatterns).test(assistantAnswer)) covered.push(el.label);
+        else missing.push(el.label);
+    }
+    return { ran: true, taskId: req.id, taskLabel: req.label, covered, missing };
+}
+
+/**
+ * 커버리지 footer — 누락 있을 때만 문자열 반환 (전부 충족 시 빈 문자열, 노이즈 방지).
+ * termValidator footer 와 같은 위치(답변 아래 streamChunk)에 표시.
+ */
+export function formatRequirementCoverageFooter(result: CoverageResult): string {
+    if (!result.ran || result.missing.length === 0) return '';
+    const miss = result.missing.join(', ');
+    return `\n\n> ⚠️ **Requirement Check (${result.taskLabel})** — 누락 가능 요소: ${miss}. 해당 내용이 없었다면 "(확인 필요)" 로 표시하거나 추가 정보를 요청하세요.`;
+}