feat: Self-Evolving Digital Employee OS P0~P6 + 캘린더 충돌 게이트

신뢰성 코어 (P1~P2): - Requirement Graph: 업무 유형(회의록/시장조사/업무조사/일정) 필수 요소 주입 + 커버리지 hook - Confidence Engine(0~100 결정론적) / Escalation Engine(검토 요청) / Epistemic Guard(모름·추정·확실 3분류) - Provenance: citationTrace 에 출처 수정일·오래됨 경고 - Critic Loop: 문제 신호 turn 만 LLM 검수 1회 + 보완 카드 성장 루프 (P3): - Gap Detector(Requirement-Knowledge) / Need Engine(30/25/20/15/10 공식) / Knowledge Inventory - Learning Queue(proposed 전용 병합 — 승인은 사람만) / Decision Journal / Reflection 기록 - 반복 누락 요소(3회+)는 다음 turn 체크리스트에 자동 강조 (T5 루프) 지식 운영 (P4) + 기억 (P5) + 학습 실행 (P6): - Knowledge Validation + Belief Revision(중복 reject·충돌 시 update/add 권고) - Knowledge Decay(분야별 반감기 감사) / Knowledge Debt(blocked x impact) - Organizational Memory(.astra/organization.md 상시 주입) - Research Agent(approved 큐 -> 조사 브리프+추정 라벨 초안+Validation 게이트 -> proposals/) - Skill Score(전/후반 추세) + Success Pattern DB(전요소충족+확신도90+ 자동 적재) 병렬 트랙: - 캘린더 충돌 게이트: conflictCheck + 구조화 이벤트 캐시 + create_calendar_event 차단(force 는 사용자 승인 후) - Task Eval Harness: 회의록 골든셋 자동 채점 명령 + 성장 리포트/학습 큐/노후 점검 명령 신규 모듈 17종(src/intelligence/), VS Code 명령 5종, 설정 11종, 테스트 +89건(전체 508 통과). 설계 문서: docs/SELF_EVOLVING_OS_MASTER_PLAN.md Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
2026-06-11 13:42:09 +09:00
parent cbc2558550
commit 2afd1ac589
41 changed files with 4364 additions and 2 deletions
@@ -16,6 +16,29 @@ export async function applyCalendarActions(ctx: HandlerContext): Promise<void> {
            report.push(`❌ Calendar Event: title / start 누락`);
            continue;
        }
+        // ── 충돌 게이트 (Self-Evolving OS Track 6-2/6-3) — 기존 일정과 겹치면 생성 보류.
+        // force="true" 는 사용자 확인 후에만 (Constitution: 승인 없는 외부 액션 금지).
+        try {
+            const { readCalendarEventsCache } = await import('../../features/calendar');
+            const { findScheduleConflicts, formatConflictReport } = await import('../../features/calendar/conflictCheck');
+            const existing = readCalendarEventsCache(ctx.context);
+            const conflicts = findScheduleConflicts(existing, {
+                startIso: attrs.start,
+                endIso: attrs.end,
+                durationMinutes: attrs.duration,
+                allDay: attrs.allDay,
+            });
+            if (conflicts.length > 0 && attrs.force !== true) {
+                const msg = formatConflictReport(conflicts);
+                report.push(`⚠️ Calendar Event 보류 — ${attrs.title}: 일정 충돌 ${conflicts.length}건`);
+                ctx.chatHistory.push({
+                    role: 'system',
+                    content: `[Calendar conflict — 생성 보류] "${attrs.title}" (${attrs.start})\n${msg}\n사용자에게 충돌 사실을 알리고 진행 여부를 물을 것.`,
+                    internal: true,
+                });
+                continue;
+            }
+        } catch { /* 충돌 검사 실패가 일정 생성을 막지 않음 — 캐시 없으면 검사 skip */ }
        try {
            const { createCalendarEvent } = await import('../../features/calendar');
            const r = await createCalendarEvent(ctx.context, {
@@ -85,6 +85,8 @@ export function _parseCalEventAttrs(raw: string): {
    duration?: number;
    location?: string;
    allDay?: boolean;
+    /** 충돌 감지 무시하고 강행 — 사용자 확인 후에만 설정해야 함 (conflictCheck). */
+    force?: boolean;
 } {
    const out: any = {};
    // `-` 포함 키 (all-day) 지원 — 일부러 ATTR_RE 와 동일 패턴이지만 매번 fresh
@@ -110,6 +112,9 @@ export function _parseCalEventAttrs(raw: string): {
            case 'all-day':
                out.allDay = val === 'true' || val === '1' || val === 'yes';
                break;
+            case 'force':
+                out.force = val === 'true' || val === '1' || val === 'yes';
+                break;
        }
    }
    return out;
@@ -7,12 +7,22 @@
 *  1. devilRebuttal — Devil Agent 반박 카드 (비활성 시 silent skip)
 *  2. postHocSelfCheck — 답변 검증 LLM 호출 (opt-in, 기본 OFF)
 *  3. termValidator — 결정론적 글로서리 forbidden 검사 (기본 ON)
+ *  4. requirementCoverage — 업무 필수 요소 커버리지 결정론적 검사 (기본 ON)
+ *  5. confidenceEscalation — 확신도 산출 + 인간 검토 요청 + Reflection 기록 (기본 ON)
+ *  6. criticLoop — 결정론적 검사가 문제 신호한 업무 turn 만 LLM 검수 1회 (기본 ON)
 */

 import type { PostAnswerHook, PostAnswerHookContext } from './types';
 import { maybeEmitDevilRebuttal as maybeEmitDevilRebuttalFn } from '../llm/devilRebuttal';
 import { postHocSelfCheck, formatSelfCheckFooter, DEFAULT_SELF_CHECK_OPTIONS } from '../postHocSelfCheck';
 import { validateTermUsage, formatTermValidatorFooter } from '../termValidator';
+import { checkRequirementCoverage, formatRequirementCoverageFooter, detectTaskType } from '../../intelligence/requirementGraph';
+import { extractAnswerSignals, computeConfidence, formatConfidenceFooter } from '../../intelligence/confidenceEngine';
+import { decideEscalation, formatEscalationFooter } from '../../intelligence/escalationEngine';
+import { runCriticReview, formatCriticFooter } from '../../intelligence/criticAgent';
+import { appendReflection } from '../../intelligence/reflectionStore';
+import { detectGaps } from '../../intelligence/gapDetector';
+import { appendSuccessPattern } from '../../intelligence/skillScore';
 import { getConfig } from '../../config';

 const devilRebuttalHook: PostAnswerHook = {
@@ -74,10 +84,147 @@ const termValidatorHook: PostAnswerHook = {
    },
 };

+const requirementCoverageHook: PostAnswerHook = {
+    id: 'requirement-coverage',
+    runAsync: false,
+    run(ctx: PostAnswerHookContext): void {
+        const cfg = getConfig();
+        if (cfg.requirementCoverageEnabled === false) return;
+        if (!ctx.userPrompt.trim() || !ctx.assistantAnswer.trim()) return;
+        const result = checkRequirementCoverage(ctx.userPrompt, ctx.assistantAnswer);
+        const footer = formatRequirementCoverageFooter(result);
+        if (footer) ctx.getWebview()?.postMessage({ type: 'streamChunk', value: footer });
+    },
+};
+
+const confidenceEscalationHook: PostAnswerHook = {
+    id: 'confidence-escalation',
+    runAsync: false,
+    run(ctx: PostAnswerHookContext): void {
+        const cfg = getConfig();
+        if (cfg.confidenceEngineEnabled === false) return;
+        if (!ctx.userPrompt.trim() || !ctx.assistantAnswer.trim()) return;
+
+        // 검색이 안 돈 turn (casual 등) 은 신호 null → 보수적 기본값 (근거 0건).
+        const retrievalSignals = ctx.confidenceSignals ?? {
+            chunkCount: 0, topScore: 0, conflictCount: 0, ambiguityDetected: false,
+        };
+        const coverage = checkRequirementCoverage(ctx.userPrompt, ctx.assistantAnswer);
+        const answerSignals = extractAnswerSignals(
+            ctx.assistantAnswer,
+            coverage.ran ? coverage.missing.length : null,
+        );
+        const confidence = computeConfidence(retrievalSignals, answerSignals);
+
+        // 업무 산출물 turn 에만 footer 표시 — 잡담까지 점수 붙이면 노이즈.
+        // 단, 확신도 '매우 낮음' 은 업무 여부와 무관하게 표시 (T4).
+        const isTask = coverage.ran || coverage.taskId !== undefined;
+        if (!isTask && confidence.band !== 'very-low') return;
+
+        let footer = formatConfidenceFooter(confidence);
+        let escalated = false;
+        if (cfg.escalationEnabled !== false) {
+            const decision = decideEscalation({
+                confidence, coverage, conflictCount: retrievalSignals.conflictCount,
+            });
+            escalated = decision.escalate;
+            footer += formatEscalationFooter(decision);
+        }
+        if (footer) ctx.getWebview()?.postMessage({ type: 'streamChunk', value: footer });
+
+        // ── Reflection 기록 (Track 2-4 / 3-6) — 업무 turn 의 결정론적 회고를
+        // <brain>/.astra/growth/reflections.jsonl 에 적재. 성장 추이·Failure Pattern 의 원천.
+        if (cfg.reflectionEnabled !== false) {
+            const task = detectTaskType(ctx.userPrompt);
+            const brainPath = ctx.getBrainPath?.();
+            if (task && brainPath) {
+                // Gap Detector (Track 3-2) — Requirement − Knowledge. Need Engine 의 입력.
+                const gap = detectGaps({ coverage, signals: retrievalSignals, taskId: task.id });
+                const reflectionRecord = {
+                    ts: new Date().toISOString(),
+                    taskId: task.id,
+                    taskLabel: task.label,
+                    confidenceScore: confidence.score,
+                    confidenceBand: confidence.band,
+                    missing: coverage.ran ? coverage.missing : [],
+                    escalated,
+                    criticIssues: null, // Critic 은 비동기 별도 hook — v1 은 미집계
+                    promptPreview: ctx.userPrompt.replace(/\s+/g, ' ').slice(0, 120),
+                    // Decision Journal v1 (Track 3-7) — 판단 근거 역추적.
+                    factors: confidence.factors.map((f) => `${f.label} (${f.delta > 0 ? '+' : ''}${f.delta})`),
+                    usedSources: (ctx.selfCheckSources || []).map((s) => s.title).slice(0, 5),
+                    // Gap 신호.
+                    retrieval: { chunkCount: retrievalSignals.chunkCount, topScore: retrievalSignals.topScore },
+                    weakGrounding: gap.weakGrounding,
+                    gapSeverity: gap.severity,
+                };
+                appendReflection(brainPath, reflectionRecord);
+                // Success Pattern DB (Track 7-4) — 전 요소 충족 + 확신도 90+ 만 적재.
+                appendSuccessPattern(brainPath, reflectionRecord);
+            }
+        }
+    },
+};
+
+const criticLoopHook: PostAnswerHook = {
+    id: 'critic-loop',
+    runAsync: true,
+    async run(ctx: PostAnswerHookContext): Promise<void> {
+        const cfg = getConfig();
+        if (cfg.criticLoopEnabled === false) return;
+        if (!ctx.userPrompt.trim() || !ctx.assistantAnswer.trim()) return;
+
+        // 게이트 — 결정론적 검사가 문제를 신호한 업무 turn 에만 LLM 검수 1회
+        // (로컬 모델 latency 보호: 깨끗한 답변에는 안 돈다).
+        const task = detectTaskType(ctx.userPrompt);
+        if (!task) return;
+        const coverage = checkRequirementCoverage(ctx.userPrompt, ctx.assistantAnswer);
+        const retrievalSignals = ctx.confidenceSignals ?? {
+            chunkCount: 0, topScore: 0, conflictCount: 0, ambiguityDetected: false,
+        };
+        const answerSignals = extractAnswerSignals(
+            ctx.assistantAnswer,
+            coverage.ran ? coverage.missing.length : null,
+        );
+        const confidence = computeConfidence(retrievalSignals, answerSignals);
+        const needsReview = (coverage.ran && coverage.missing.length > 0) || confidence.score < 70;
+        if (!needsReview) return;
+
+        const critique = await runCriticReview({
+            userPrompt: ctx.userPrompt,
+            draft: ctx.assistantAnswer,
+            requirement: task,
+            missingLabels: coverage.ran ? coverage.missing : [],
+            callLlm: async (system, user, maxTokens) => {
+                const r = await ctx.callNonStreaming({
+                    baseUrl: ctx.baseUrl,
+                    modelName: ctx.modelName,
+                    engine: ctx.engine,
+                    messages: [
+                        { role: 'system', content: system },
+                        { role: 'user', content: user },
+                    ],
+                    temperature: 0.2,
+                    maxTokens,
+                    contextLength: ctx.contextLength,
+                    signal: ctx.getAbortSignal(),
+                });
+                return r.text;
+            },
+        });
+        if (!critique) return; // LLM/파싱 실패 — silent skip, main turn 영향 없음
+        const footer = formatCriticFooter(critique);
+        if (footer) ctx.getWebview()?.postMessage({ type: 'streamChunk', value: footer });
+    },
+};
+
 export const POST_ANSWER_HOOKS: PostAnswerHook[] = [
    devilRebuttalHook,
    postHocSelfCheckHook,
    termValidatorHook,
+    requirementCoverageHook,
+    confidenceEscalationHook,
+    criticLoopHook,
 ];

 /** 모든 hook 을 안전하게 실행 — 한 hook 의 throw 가 다른 hook 막지 않음. */
@@ -27,12 +27,16 @@ export interface PostAnswerHookContext {
    engine: 'lmstudio' | 'ollama';
    /** Self-check 용 출처 미리보기. memoryContext 가 turnCtx 에 채움. */
    selfCheckSources: Array<{ title: string; excerpt: string }>;
+    /** Confidence Engine 검색 신호 (Phase 2). memoryContext 가 채움 — 검색 안 돈 turn 은 null. */
+    confidenceSignals?: import('../../intelligence/confidenceEngine').RetrievalConfidenceSignals | null;
    /** Devil Agent 가 호출 — non-streaming LLM. */
    callNonStreaming: (params: any) => Promise<{ text: string; stopReason?: string }>;
    /** Abort signal accessor. */
    getAbortSignal: () => AbortSignal | undefined;
    /** Webview accessor — hook 결과 streamChunk 송출. vscode.Webview / 간이 Webview 호환. */
    getWebview: () => PostMessageWebview | undefined;
+    /** 활성 두뇌 경로 — Reflection 기록용. 없으면 회고 skip. */
+    getBrainPath?: () => string | undefined;
 }

 export interface PostAnswerHook {