feat: Self-Evolving Digital Employee OS P0~P6 + 캘린더 충돌 게이트

신뢰성 코어 (P1~P2): - Requirement Graph: 업무 유형(회의록/시장조사/업무조사/일정) 필수 요소 주입 + 커버리지 hook - Confidence Engine(0~100 결정론적) / Escalation Engine(검토 요청) / Epistemic Guard(모름·추정·확실 3분류) - Provenance: citationTrace 에 출처 수정일·오래됨 경고 - Critic Loop: 문제 신호 turn 만 LLM 검수 1회 + 보완 카드 성장 루프 (P3): - Gap Detector(Requirement-Knowledge) / Need Engine(30/25/20/15/10 공식) / Knowledge Inventory - Learning Queue(proposed 전용 병합 — 승인은 사람만) / Decision Journal / Reflection 기록 - 반복 누락 요소(3회+)는 다음 turn 체크리스트에 자동 강조 (T5 루프) 지식 운영 (P4) + 기억 (P5) + 학습 실행 (P6): - Knowledge Validation + Belief Revision(중복 reject·충돌 시 update/add 권고) - Knowledge Decay(분야별 반감기 감사) / Knowledge Debt(blocked x impact) - Organizational Memory(.astra/organization.md 상시 주입) - Research Agent(approved 큐 -> 조사 브리프+추정 라벨 초안+Validation 게이트 -> proposals/) - Skill Score(전/후반 추세) + Success Pattern DB(전요소충족+확신도90+ 자동 적재) 병렬 트랙: - 캘린더 충돌 게이트: conflictCheck + 구조화 이벤트 캐시 + create_calendar_event 차단(force 는 사용자 승인 후) - Task Eval Harness: 회의록 골든셋 자동 채점 명령 + 성장 리포트/학습 큐/노후 점검 명령 신규 모듈 17종(src/intelligence/), VS Code 명령 5종, 설정 11종, 테스트 +89건(전체 508 통과). 설계 문서: docs/SELF_EVOLVING_OS_MASTER_PLAN.md Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
2026-06-11 13:42:09 +09:00
parent cbc2558550
commit 2afd1ac589
41 changed files with 4364 additions and 2 deletions
@@ -13,6 +13,22 @@ import {
    GOLDEN_TEMPLATE,
    GOLDEN_REL_JSONL,
 } from '../retrieval/evalHarness';
+import {
+    loadTaskGoldenSet,
+    runTaskEval,
+    formatTaskEvalReport,
+    TASK_GOLDEN_DIR,
+} from '../intelligence/taskEvalHarness';
+import { buildRequirementGraphBlock } from '../intelligence/requirementGraph';
+import { buildEpistemicGuardBlock } from '../intelligence/epistemicGuardBlock';
+import { simpleChatCompletion } from '../intelligence/llmCall';
+import { loadReflections, formatGrowthReport } from '../intelligence/reflectionStore';
+import { computeNeeds, knowledgeInventory, computeKnowledgeDebt, formatNeedsMarkdown } from '../intelligence/needEngine';
+import { auditKnowledgeDecay, formatDecayReport } from '../intelligence/knowledgeDecay';
+import { computeSkillScores, formatSkillScoresMarkdown, loadSuccessPatterns, formatSuccessPatternsMarkdown } from '../intelligence/skillScore';
+import { runResearch, formatProposalMarkdown } from '../intelligence/researchAgent';
+import type { ExistingKnowledgeRef } from '../intelligence/knowledgeValidation';
+import { loadQueue, saveQueue, mergeNeedsIntoQueue, formatQueueMarkdown, LEARNING_QUEUE_REL_PATH } from '../intelligence/learningQueue';

 /**
 * 검색 평가 명령 묶음 (Phase 1-나).
@@ -25,6 +41,11 @@ export function registerEvalCommands(): vscode.Disposable[] {
    return [
        vscode.commands.registerCommand('g1nation.eval.retrieval', runRetrievalEvalCommand),
        vscode.commands.registerCommand('g1nation.embeddings.backfill', backfillEmbeddingsCommand),
+        vscode.commands.registerCommand('g1nation.eval.tasks', runTaskEvalCommand),
+        vscode.commands.registerCommand('g1nation.growth.report', growthReportCommand),
+        vscode.commands.registerCommand('g1nation.growth.learningQueue', learningQueueCommand),
+        vscode.commands.registerCommand('g1nation.knowledge.decayAudit', decayAuditCommand),
+        vscode.commands.registerCommand('g1nation.research.runQueue', researchRunQueueCommand),
    ];
 }

@@ -205,6 +226,278 @@ async function backfillEmbeddingsCommand(): Promise<void> {
    }
 }

+/**
+ * 업무 평가 (Self Evaluation v1, Phase 3 / Track 3-4) — 회의록 골든셋의 각 원자료를
+ * LLM 에게 회의록으로 작성시키고 필수 요소 커버리지를 결정론적으로 채점. 같은 골든셋을
+ * 버전마다 돌려 점수 추이로 성장세를 증명한다 (검색 평가와 동일 방법론).
+ */
+async function runTaskEvalCommand(): Promise<void> {
+    try {
+        const brain = getActiveBrainProfile();
+        if (!brain?.localBrainPath || !fs.existsSync(brain.localBrainPath)) {
+            vscode.window.showErrorMessage('활성 두뇌 폴더를 찾을 수 없습니다. 먼저 두뇌를 추가/선택하세요.');
+            return;
+        }
+        const { records, parseErrors, sourcePath } = loadTaskGoldenSet(brain.localBrainPath, 'meeting-minutes');
+        if (records.length === 0) {
+            vscode.window.showWarningMessage(
+                `업무 골든셋이 없습니다: ${path.join(TASK_GOLDEN_DIR, 'meeting-minutes.golden.jsonl')}` +
+                (parseErrors ? ` (파싱 실패 ${parseErrors}줄)` : ''),
+            );
+            return;
+        }
+        const config = getConfig();
+        const model = config.defaultModel;
+        if (!model || !config.ollamaUrl) {
+            vscode.window.showErrorMessage('모델/엔진 설정이 없습니다 (defaultModel, ollamaUrl).');
+            return;
+        }
+
+        await vscode.window.withProgress(
+            { location: vscode.ProgressLocation.Notification, title: 'Astra 업무 평가 (회의록)', cancellable: true },
+            async (progress, token) => {
+                const result = await runTaskEval({
+                    records,
+                    readSource: (sourceFile) => fs.readFileSync(sourceFile, 'utf8'),
+                    generate: async (record, sourceContent) => {
+                        if (token.isCancellationRequested) throw new Error('취소됨');
+                        // 프로덕션과 같은 지시 체계 — Requirement Graph + Epistemic Guard 블록 주입.
+                        const system = [
+                            '너는 업무 비서다. 제공된 회의 전사를 회의록으로 정리한다.',
+                            buildRequirementGraphBlock(record.query),
+                            buildEpistemicGuardBlock({ chunkCount: 1, taskDetected: true }),
+                        ].filter(Boolean).join('\n\n');
+                        const user = `${record.query}\n\n[회의 전사]\n${sourceContent}`;
+                        return simpleChatCompletion(system, user, {
+                            baseUrl: config.ollamaUrl,
+                            model,
+                            temperature: 0.2,
+                            maxTokens: 1600,
+                            timeoutMs: 180000,
+                        });
+                    },
+                    onProgress: (done, total) => progress.report({ message: `${done}/${total} 레코드 평가 중…` }),
+                });
+
+                const now = new Date();
+                const stamp = now.toISOString().replace(/[:.]/g, '-').slice(0, 19);
+                const md = formatTaskEvalReport(result, {
+                    taskLabel: '회의록',
+                    brainName: brain.name,
+                    dateStr: now.toLocaleString(),
+                    modelName: model,
+                    notes: parseErrors ? `골든셋 파싱 실패 ${parseErrors}줄 (무시됨)` : undefined,
+                });
+                const reportPath = path.join(brain.localBrainPath, TASK_GOLDEN_DIR, `report-${stamp}.md`);
+                fs.mkdirSync(path.dirname(reportPath), { recursive: true });
+                fs.writeFileSync(reportPath, md, 'utf8');
+                logInfo('Task eval complete.', { records: result.scores.length, avgCoverage: result.avgCoverage, reportPath });
+
+                const doc = await vscode.workspace.openTextDocument(vscode.Uri.file(reportPath));
+                await vscode.window.showTextDocument(doc, { preview: false });
+                vscode.window.showInformationMessage(
+                    `업무 평가 완료 · 평균 커버리지 ${(result.avgCoverage * 100).toFixed(1)}% · 전 요소 충족 ${result.perfectCount}/${result.scores.length}건 (골든셋: ${path.basename(sourcePath)})`,
+                );
+            },
+        );
+    } catch (err: any) {
+        logError('Task eval command failed.', { error: err?.message || String(err) });
+        vscode.window.showErrorMessage(`업무 평가 실패: ${err?.message ?? err}`);
+    }
+}
+
+/** 성장 리포트 — Reflection 기록(.astra/growth/reflections.jsonl)의 주별 추이 + 반복 실수 Top. */
+async function growthReportCommand(): Promise<void> {
+    try {
+        const brain = getActiveBrainProfile();
+        if (!brain?.localBrainPath || !fs.existsSync(brain.localBrainPath)) {
+            vscode.window.showErrorMessage('활성 두뇌 폴더를 찾을 수 없습니다.');
+            return;
+        }
+        const records = loadReflections(brain.localBrainPath);
+        const md = [
+            formatGrowthReport(records),
+            formatSkillScoresMarkdown(computeSkillScores(records)),
+            formatSuccessPatternsMarkdown(loadSuccessPatterns(brain.localBrainPath)),
+        ].join('\n\n');
+        const reportPath = path.join(brain.localBrainPath, '.astra', 'growth', 'growth-report.md');
+        fs.mkdirSync(path.dirname(reportPath), { recursive: true });
+        fs.writeFileSync(reportPath, md, 'utf8');
+        const doc = await vscode.workspace.openTextDocument(vscode.Uri.file(reportPath));
+        await vscode.window.showTextDocument(doc, { preview: false });
+        if (records.length === 0) {
+            vscode.window.showInformationMessage('아직 Reflection 기록이 없습니다 — 업무(회의록/조사/일정) 요청을 처리하면 자동으로 쌓입니다.');
+        }
+    } catch (err: any) {
+        logError('Growth report command failed.', { error: err?.message || String(err) });
+        vscode.window.showErrorMessage(`성장 리포트 실패: ${err?.message ?? err}`);
+    }
+}
+
+/**
+ * 학습 큐 갱신 (Phase 3 / Track 3-3 + 3-5) — Reflection 기록을 Need Engine 으로 집계해
+ * 학습 우선순위를 산출하고 Learning Queue 에 *proposed* 로 병합한다. 승인(approved)은
+ * 사람이 learning-queue.json 에서 직접 — Permission Based Learning (Constitution 8-2).
+ */
+async function learningQueueCommand(): Promise<void> {
+    try {
+        const brain = getActiveBrainProfile();
+        if (!brain?.localBrainPath || !fs.existsSync(brain.localBrainPath)) {
+            vscode.window.showErrorMessage('활성 두뇌 폴더를 찾을 수 없습니다.');
+            return;
+        }
+        const records = loadReflections(brain.localBrainPath);
+        const needs = computeNeeds(records);
+        const inventory = knowledgeInventory(records);
+        const debt = computeKnowledgeDebt(records);
+
+        const queue = mergeNeedsIntoQueue(loadQueue(brain.localBrainPath), needs, new Date().toISOString());
+        saveQueue(brain.localBrainPath, queue);
+
+        // 사람이 읽는 요약 md — Need 근거 + Inventory + Debt + 큐 현황.
+        const md = [formatNeedsMarkdown(needs, inventory, debt), formatQueueMarkdown(queue)].join('\n---\n\n');
+        const reportPath = path.join(brain.localBrainPath, '.astra', 'growth', 'learning-needs.md');
+        fs.mkdirSync(path.dirname(reportPath), { recursive: true });
+        fs.writeFileSync(reportPath, md, 'utf8');
+
+        const doc = await vscode.workspace.openTextDocument(vscode.Uri.file(reportPath));
+        await vscode.window.showTextDocument(doc, { preview: false });
+        const proposed = queue.filter((q) => q.status === 'proposed').length;
+        vscode.window.showInformationMessage(
+            records.length === 0
+                ? '아직 Reflection 기록이 없습니다 — 업무 turn 이 쌓이면 학습 우선순위가 산출됩니다.'
+                : `학습 큐 갱신 완료 · 제안 ${proposed}건 (승인은 ${LEARNING_QUEUE_REL_PATH} 에서 status 를 approved 로).`,
+        );
+    } catch (err: any) {
+        logError('Learning queue command failed.', { error: err?.message || String(err) });
+        vscode.window.showErrorMessage(`학습 큐 갱신 실패: ${err?.message ?? err}`);
+    }
+}
+
+/**
+ * 지식 노후 점검 (Phase 4 / Track 4-3) — 두뇌 전체 파일의 mtime 을 분야별 반감기로
+ * 감쇠 평가, 노후 지식 보고서를 연다. v1 은 보고만 — 자동 이동/삭제 없음 (Human Override).
+ */
+async function decayAuditCommand(): Promise<void> {
+    try {
+        const brain = getActiveBrainProfile();
+        if (!brain?.localBrainPath || !fs.existsSync(brain.localBrainPath)) {
+            vscode.window.showErrorMessage('활성 두뇌 폴더를 찾을 수 없습니다.');
+            return;
+        }
+        const allFiles = findBrainFiles(brain.localBrainPath);
+        const entries: Array<{ relPath: string; lastUpdated: number }> = [];
+        for (const f of allFiles) {
+            try {
+                const abs = path.isAbsolute(f) ? f : path.join(brain.localBrainPath, f);
+                const st = fs.statSync(abs);
+                entries.push({ relPath: path.relative(brain.localBrainPath, abs) || f, lastUpdated: st.mtimeMs });
+            } catch { /* 파일 사라짐 등 — skip */ }
+        }
+        const items = auditKnowledgeDecay(entries);
+        const md = formatDecayReport(items, { brainName: brain.name, dateStr: new Date().toLocaleString() });
+        const reportPath = path.join(brain.localBrainPath, '.astra', 'growth', 'decay-report.md');
+        fs.mkdirSync(path.dirname(reportPath), { recursive: true });
+        fs.writeFileSync(reportPath, md, 'utf8');
+        const doc = await vscode.workspace.openTextDocument(vscode.Uri.file(reportPath));
+        await vscode.window.showTextDocument(doc, { preview: false });
+        const stale = items.filter((i) => i.status === 'stale').length;
+        vscode.window.showInformationMessage(`지식 노후 점검 완료 · ${entries.length}개 파일 중 노후 ${stale}개.`);
+    } catch (err: any) {
+        logError('Decay audit command failed.', { error: err?.message || String(err) });
+        vscode.window.showErrorMessage(`지식 노후 점검 실패: ${err?.message ?? err}`);
+    }
+}
+
+/**
+ * 학습 실행 (Phase 6 / Track 7-1, Research Agent) — Learning Queue 의 *approved* 항목을
+ * 조사 패키지(브리프 + 내부 현황 + 추정 라벨 초안 + Validation 판정)로 만들어
+ * proposals/ 에 저장하고 상태를 in-progress 로 바꾼다. 두뇌 본문 자동 저장 없음 —
+ * 사람이 외부 근거로 보강·승인해야 지식이 된다 (Permission Based Learning).
+ */
+async function researchRunQueueCommand(): Promise<void> {
+    try {
+        const brain = getActiveBrainProfile();
+        if (!brain?.localBrainPath || !fs.existsSync(brain.localBrainPath)) {
+            vscode.window.showErrorMessage('활성 두뇌 폴더를 찾을 수 없습니다.');
+            return;
+        }
+        const config = getConfig();
+        const model = config.defaultModel;
+        if (!model || !config.ollamaUrl) {
+            vscode.window.showErrorMessage('모델/엔진 설정이 없습니다 (defaultModel, ollamaUrl).');
+            return;
+        }
+        const queue = loadQueue(brain.localBrainPath);
+        const approved = queue.filter((q) => q.status === 'approved');
+        if (approved.length === 0) {
+            vscode.window.showInformationMessage(
+                `승인된 학습 항목이 없습니다 — ${LEARNING_QUEUE_REL_PATH} 에서 status 를 approved 로 바꾼 뒤 다시 실행하세요.`,
+            );
+            return;
+        }
+
+        await vscode.window.withProgress(
+            { location: vscode.ProgressLocation.Notification, title: 'Astra 학습 실행 (Research Agent)', cancellable: true },
+            async (progress, token) => {
+                const orchestrator = new RetrievalOrchestrator();
+                const allFiles = findBrainFiles(brain.localBrainPath);
+                getBrainTokenIndex(brain.localBrainPath, allFiles);
+
+                const fetchInternalRefs = async (topic: string): Promise<ExistingKnowledgeRef[]> => {
+                    const ranked = orchestrator.rankBrainForEval(topic, brain, { limit: 5 }).slice(0, 5);
+                    const refs: ExistingKnowledgeRef[] = [];
+                    for (const r of ranked) {
+                        try {
+                            const abs = path.join(brain.localBrainPath, r.relativePath);
+                            const content = fs.readFileSync(abs, 'utf8').slice(0, 2000);
+                            const st = fs.statSync(abs);
+                            refs.push({ title: path.basename(r.relativePath), content, lastUpdated: st.mtimeMs, filePath: r.relativePath });
+                        } catch { /* skip unreadable */ }
+                    }
+                    return refs;
+                };
+
+                let done = 0;
+                const proposalsDir = path.join(brain.localBrainPath, '.astra', 'growth', 'proposals');
+                fs.mkdirSync(proposalsDir, { recursive: true });
+                const proposalPaths: string[] = [];
+                for (const item of approved) {
+                    if (token.isCancellationRequested) break;
+                    progress.report({ message: `${++done}/${approved.length} — ${item.topic}` });
+                    const pkg = await runResearch({
+                        item,
+                        fetchInternalRefs,
+                        callLlm: (system, user, maxTokens) => simpleChatCompletion(system, user, {
+                            baseUrl: config.ollamaUrl, model, temperature: 0.3, maxTokens, timeoutMs: 180000,
+                        }),
+                        nowIso: new Date().toISOString(),
+                    });
+                    const md = formatProposalMarkdown(pkg, { dateStr: new Date().toLocaleString(), modelName: model });
+                    const filePath = path.join(proposalsDir, `${item.id}.md`);
+                    fs.writeFileSync(filePath, md, 'utf8');
+                    proposalPaths.push(filePath);
+                    item.status = 'in-progress';
+                    item.updatedAt = new Date().toISOString();
+                }
+                saveQueue(brain.localBrainPath, queue);
+                logInfo('Research agent run complete.', { processed: proposalPaths.length });
+
+                if (proposalPaths.length > 0) {
+                    const doc = await vscode.workspace.openTextDocument(vscode.Uri.file(proposalPaths[0]));
+                    await vscode.window.showTextDocument(doc, { preview: false });
+                }
+                vscode.window.showInformationMessage(
+                    `학습 제안 ${proposalPaths.length}건 생성 (.astra/growth/proposals/). 외부 근거로 보강 후 두뇌에 저장하고 큐 상태를 done 으로 바꾸세요.`,
+                );
+            },
+        );
+    } catch (err: any) {
+        logError('Research run command failed.', { error: err?.message || String(err) });
+        vscode.window.showErrorMessage(`학습 실행 실패: ${err?.message ?? err}`);
+    }
+}
+
 /** 골든셋 파일이 없을 때 템플릿을 만든다. 이미 (깨진/빈) 파일이 있으면 덮어쓰지 않는다. */
 async function scaffoldGoldenSet(goldenPath: string, existingSource: string | null, parseErrors: number): Promise<boolean> {
    if (existingSource && fs.existsSync(existingSource)) {