feat: Self-Evolving Digital Employee OS P0~P6 + 캘린더 충돌 게이트

신뢰성 코어 (P1~P2):
- Requirement Graph: 업무 유형(회의록/시장조사/업무조사/일정) 필수 요소 주입 + 커버리지 hook
- Confidence Engine(0~100 결정론적) / Escalation Engine(검토 요청) / Epistemic Guard(모름·추정·확실 3분류)
- Provenance: citationTrace 에 출처 수정일·오래됨 경고
- Critic Loop: 문제 신호 turn 만 LLM 검수 1회 + 보완 카드

성장 루프 (P3):
- Gap Detector(Requirement-Knowledge) / Need Engine(30/25/20/15/10 공식) / Knowledge Inventory
- Learning Queue(proposed 전용 병합 — 승인은 사람만) / Decision Journal / Reflection 기록
- 반복 누락 요소(3회+)는 다음 turn 체크리스트에 자동 강조 (T5 루프)

지식 운영 (P4) + 기억 (P5) + 학습 실행 (P6):
- Knowledge Validation + Belief Revision(중복 reject·충돌 시 update/add 권고)
- Knowledge Decay(분야별 반감기 감사) / Knowledge Debt(blocked x impact)
- Organizational Memory(.astra/organization.md 상시 주입)
- Research Agent(approved 큐 -> 조사 브리프+추정 라벨 초안+Validation 게이트 -> proposals/)
- Skill Score(전/후반 추세) + Success Pattern DB(전요소충족+확신도90+ 자동 적재)

병렬 트랙:
- 캘린더 충돌 게이트: conflictCheck + 구조화 이벤트 캐시 + create_calendar_event 차단(force 는 사용자 승인 후)
- Task Eval Harness: 회의록 골든셋 자동 채점 명령 + 성장 리포트/학습 큐/노후 점검 명령

신규 모듈 17종(src/intelligence/), VS Code 명령 5종, 설정 11종, 테스트 +89건(전체 508 통과).
설계 문서: docs/SELF_EVOLVING_OS_MASTER_PLAN.md

Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
This commit is contained in:
2026-06-11 13:42:09 +09:00
parent cbc2558550
commit 2afd1ac589
41 changed files with 4364 additions and 2 deletions
+293
View File
@@ -13,6 +13,22 @@ import {
GOLDEN_TEMPLATE,
GOLDEN_REL_JSONL,
} from '../retrieval/evalHarness';
import {
loadTaskGoldenSet,
runTaskEval,
formatTaskEvalReport,
TASK_GOLDEN_DIR,
} from '../intelligence/taskEvalHarness';
import { buildRequirementGraphBlock } from '../intelligence/requirementGraph';
import { buildEpistemicGuardBlock } from '../intelligence/epistemicGuardBlock';
import { simpleChatCompletion } from '../intelligence/llmCall';
import { loadReflections, formatGrowthReport } from '../intelligence/reflectionStore';
import { computeNeeds, knowledgeInventory, computeKnowledgeDebt, formatNeedsMarkdown } from '../intelligence/needEngine';
import { auditKnowledgeDecay, formatDecayReport } from '../intelligence/knowledgeDecay';
import { computeSkillScores, formatSkillScoresMarkdown, loadSuccessPatterns, formatSuccessPatternsMarkdown } from '../intelligence/skillScore';
import { runResearch, formatProposalMarkdown } from '../intelligence/researchAgent';
import type { ExistingKnowledgeRef } from '../intelligence/knowledgeValidation';
import { loadQueue, saveQueue, mergeNeedsIntoQueue, formatQueueMarkdown, LEARNING_QUEUE_REL_PATH } from '../intelligence/learningQueue';
/**
* 검색 평가 명령 묶음 (Phase 1-나).
@@ -25,6 +41,11 @@ export function registerEvalCommands(): vscode.Disposable[] {
return [
vscode.commands.registerCommand('g1nation.eval.retrieval', runRetrievalEvalCommand),
vscode.commands.registerCommand('g1nation.embeddings.backfill', backfillEmbeddingsCommand),
vscode.commands.registerCommand('g1nation.eval.tasks', runTaskEvalCommand),
vscode.commands.registerCommand('g1nation.growth.report', growthReportCommand),
vscode.commands.registerCommand('g1nation.growth.learningQueue', learningQueueCommand),
vscode.commands.registerCommand('g1nation.knowledge.decayAudit', decayAuditCommand),
vscode.commands.registerCommand('g1nation.research.runQueue', researchRunQueueCommand),
];
}
@@ -205,6 +226,278 @@ async function backfillEmbeddingsCommand(): Promise<void> {
}
}
/**
* 업무 평가 (Self Evaluation v1, Phase 3 / Track 3-4) — 회의록 골든셋의 각 원자료를
* LLM 에게 회의록으로 작성시키고 필수 요소 커버리지를 결정론적으로 채점. 같은 골든셋을
* 버전마다 돌려 점수 추이로 성장세를 증명한다 (검색 평가와 동일 방법론).
*/
async function runTaskEvalCommand(): Promise<void> {
try {
const brain = getActiveBrainProfile();
if (!brain?.localBrainPath || !fs.existsSync(brain.localBrainPath)) {
vscode.window.showErrorMessage('활성 두뇌 폴더를 찾을 수 없습니다. 먼저 두뇌를 추가/선택하세요.');
return;
}
const { records, parseErrors, sourcePath } = loadTaskGoldenSet(brain.localBrainPath, 'meeting-minutes');
if (records.length === 0) {
vscode.window.showWarningMessage(
`업무 골든셋이 없습니다: ${path.join(TASK_GOLDEN_DIR, 'meeting-minutes.golden.jsonl')}` +
(parseErrors ? ` (파싱 실패 ${parseErrors}줄)` : ''),
);
return;
}
const config = getConfig();
const model = config.defaultModel;
if (!model || !config.ollamaUrl) {
vscode.window.showErrorMessage('모델/엔진 설정이 없습니다 (defaultModel, ollamaUrl).');
return;
}
await vscode.window.withProgress(
{ location: vscode.ProgressLocation.Notification, title: 'Astra 업무 평가 (회의록)', cancellable: true },
async (progress, token) => {
const result = await runTaskEval({
records,
readSource: (sourceFile) => fs.readFileSync(sourceFile, 'utf8'),
generate: async (record, sourceContent) => {
if (token.isCancellationRequested) throw new Error('취소됨');
// 프로덕션과 같은 지시 체계 — Requirement Graph + Epistemic Guard 블록 주입.
const system = [
'너는 업무 비서다. 제공된 회의 전사를 회의록으로 정리한다.',
buildRequirementGraphBlock(record.query),
buildEpistemicGuardBlock({ chunkCount: 1, taskDetected: true }),
].filter(Boolean).join('\n\n');
const user = `${record.query}\n\n[회의 전사]\n${sourceContent}`;
return simpleChatCompletion(system, user, {
baseUrl: config.ollamaUrl,
model,
temperature: 0.2,
maxTokens: 1600,
timeoutMs: 180000,
});
},
onProgress: (done, total) => progress.report({ message: `${done}/${total} 레코드 평가 중…` }),
});
const now = new Date();
const stamp = now.toISOString().replace(/[:.]/g, '-').slice(0, 19);
const md = formatTaskEvalReport(result, {
taskLabel: '회의록',
brainName: brain.name,
dateStr: now.toLocaleString(),
modelName: model,
notes: parseErrors ? `골든셋 파싱 실패 ${parseErrors}줄 (무시됨)` : undefined,
});
const reportPath = path.join(brain.localBrainPath, TASK_GOLDEN_DIR, `report-${stamp}.md`);
fs.mkdirSync(path.dirname(reportPath), { recursive: true });
fs.writeFileSync(reportPath, md, 'utf8');
logInfo('Task eval complete.', { records: result.scores.length, avgCoverage: result.avgCoverage, reportPath });
const doc = await vscode.workspace.openTextDocument(vscode.Uri.file(reportPath));
await vscode.window.showTextDocument(doc, { preview: false });
vscode.window.showInformationMessage(
`업무 평가 완료 · 평균 커버리지 ${(result.avgCoverage * 100).toFixed(1)}% · 전 요소 충족 ${result.perfectCount}/${result.scores.length}건 (골든셋: ${path.basename(sourcePath)})`,
);
},
);
} catch (err: any) {
logError('Task eval command failed.', { error: err?.message || String(err) });
vscode.window.showErrorMessage(`업무 평가 실패: ${err?.message ?? err}`);
}
}
/** 성장 리포트 — Reflection 기록(.astra/growth/reflections.jsonl)의 주별 추이 + 반복 실수 Top. */
async function growthReportCommand(): Promise<void> {
try {
const brain = getActiveBrainProfile();
if (!brain?.localBrainPath || !fs.existsSync(brain.localBrainPath)) {
vscode.window.showErrorMessage('활성 두뇌 폴더를 찾을 수 없습니다.');
return;
}
const records = loadReflections(brain.localBrainPath);
const md = [
formatGrowthReport(records),
formatSkillScoresMarkdown(computeSkillScores(records)),
formatSuccessPatternsMarkdown(loadSuccessPatterns(brain.localBrainPath)),
].join('\n\n');
const reportPath = path.join(brain.localBrainPath, '.astra', 'growth', 'growth-report.md');
fs.mkdirSync(path.dirname(reportPath), { recursive: true });
fs.writeFileSync(reportPath, md, 'utf8');
const doc = await vscode.workspace.openTextDocument(vscode.Uri.file(reportPath));
await vscode.window.showTextDocument(doc, { preview: false });
if (records.length === 0) {
vscode.window.showInformationMessage('아직 Reflection 기록이 없습니다 — 업무(회의록/조사/일정) 요청을 처리하면 자동으로 쌓입니다.');
}
} catch (err: any) {
logError('Growth report command failed.', { error: err?.message || String(err) });
vscode.window.showErrorMessage(`성장 리포트 실패: ${err?.message ?? err}`);
}
}
/**
* 학습 큐 갱신 (Phase 3 / Track 3-3 + 3-5) — Reflection 기록을 Need Engine 으로 집계해
* 학습 우선순위를 산출하고 Learning Queue 에 *proposed* 로 병합한다. 승인(approved)은
* 사람이 learning-queue.json 에서 직접 — Permission Based Learning (Constitution 8-2).
*/
async function learningQueueCommand(): Promise<void> {
try {
const brain = getActiveBrainProfile();
if (!brain?.localBrainPath || !fs.existsSync(brain.localBrainPath)) {
vscode.window.showErrorMessage('활성 두뇌 폴더를 찾을 수 없습니다.');
return;
}
const records = loadReflections(brain.localBrainPath);
const needs = computeNeeds(records);
const inventory = knowledgeInventory(records);
const debt = computeKnowledgeDebt(records);
const queue = mergeNeedsIntoQueue(loadQueue(brain.localBrainPath), needs, new Date().toISOString());
saveQueue(brain.localBrainPath, queue);
// 사람이 읽는 요약 md — Need 근거 + Inventory + Debt + 큐 현황.
const md = [formatNeedsMarkdown(needs, inventory, debt), formatQueueMarkdown(queue)].join('\n---\n\n');
const reportPath = path.join(brain.localBrainPath, '.astra', 'growth', 'learning-needs.md');
fs.mkdirSync(path.dirname(reportPath), { recursive: true });
fs.writeFileSync(reportPath, md, 'utf8');
const doc = await vscode.workspace.openTextDocument(vscode.Uri.file(reportPath));
await vscode.window.showTextDocument(doc, { preview: false });
const proposed = queue.filter((q) => q.status === 'proposed').length;
vscode.window.showInformationMessage(
records.length === 0
? '아직 Reflection 기록이 없습니다 — 업무 turn 이 쌓이면 학습 우선순위가 산출됩니다.'
: `학습 큐 갱신 완료 · 제안 ${proposed}건 (승인은 ${LEARNING_QUEUE_REL_PATH} 에서 status 를 approved 로).`,
);
} catch (err: any) {
logError('Learning queue command failed.', { error: err?.message || String(err) });
vscode.window.showErrorMessage(`학습 큐 갱신 실패: ${err?.message ?? err}`);
}
}
/**
* 지식 노후 점검 (Phase 4 / Track 4-3) — 두뇌 전체 파일의 mtime 을 분야별 반감기로
* 감쇠 평가, 노후 지식 보고서를 연다. v1 은 보고만 — 자동 이동/삭제 없음 (Human Override).
*/
async function decayAuditCommand(): Promise<void> {
try {
const brain = getActiveBrainProfile();
if (!brain?.localBrainPath || !fs.existsSync(brain.localBrainPath)) {
vscode.window.showErrorMessage('활성 두뇌 폴더를 찾을 수 없습니다.');
return;
}
const allFiles = findBrainFiles(brain.localBrainPath);
const entries: Array<{ relPath: string; lastUpdated: number }> = [];
for (const f of allFiles) {
try {
const abs = path.isAbsolute(f) ? f : path.join(brain.localBrainPath, f);
const st = fs.statSync(abs);
entries.push({ relPath: path.relative(brain.localBrainPath, abs) || f, lastUpdated: st.mtimeMs });
} catch { /* 파일 사라짐 등 — skip */ }
}
const items = auditKnowledgeDecay(entries);
const md = formatDecayReport(items, { brainName: brain.name, dateStr: new Date().toLocaleString() });
const reportPath = path.join(brain.localBrainPath, '.astra', 'growth', 'decay-report.md');
fs.mkdirSync(path.dirname(reportPath), { recursive: true });
fs.writeFileSync(reportPath, md, 'utf8');
const doc = await vscode.workspace.openTextDocument(vscode.Uri.file(reportPath));
await vscode.window.showTextDocument(doc, { preview: false });
const stale = items.filter((i) => i.status === 'stale').length;
vscode.window.showInformationMessage(`지식 노후 점검 완료 · ${entries.length}개 파일 중 노후 ${stale}개.`);
} catch (err: any) {
logError('Decay audit command failed.', { error: err?.message || String(err) });
vscode.window.showErrorMessage(`지식 노후 점검 실패: ${err?.message ?? err}`);
}
}
/**
* 학습 실행 (Phase 6 / Track 7-1, Research Agent) — Learning Queue 의 *approved* 항목을
* 조사 패키지(브리프 + 내부 현황 + 추정 라벨 초안 + Validation 판정)로 만들어
* proposals/ 에 저장하고 상태를 in-progress 로 바꾼다. 두뇌 본문 자동 저장 없음 —
* 사람이 외부 근거로 보강·승인해야 지식이 된다 (Permission Based Learning).
*/
async function researchRunQueueCommand(): Promise<void> {
try {
const brain = getActiveBrainProfile();
if (!brain?.localBrainPath || !fs.existsSync(brain.localBrainPath)) {
vscode.window.showErrorMessage('활성 두뇌 폴더를 찾을 수 없습니다.');
return;
}
const config = getConfig();
const model = config.defaultModel;
if (!model || !config.ollamaUrl) {
vscode.window.showErrorMessage('모델/엔진 설정이 없습니다 (defaultModel, ollamaUrl).');
return;
}
const queue = loadQueue(brain.localBrainPath);
const approved = queue.filter((q) => q.status === 'approved');
if (approved.length === 0) {
vscode.window.showInformationMessage(
`승인된 학습 항목이 없습니다 — ${LEARNING_QUEUE_REL_PATH} 에서 status 를 approved 로 바꾼 뒤 다시 실행하세요.`,
);
return;
}
await vscode.window.withProgress(
{ location: vscode.ProgressLocation.Notification, title: 'Astra 학습 실행 (Research Agent)', cancellable: true },
async (progress, token) => {
const orchestrator = new RetrievalOrchestrator();
const allFiles = findBrainFiles(brain.localBrainPath);
getBrainTokenIndex(brain.localBrainPath, allFiles);
const fetchInternalRefs = async (topic: string): Promise<ExistingKnowledgeRef[]> => {
const ranked = orchestrator.rankBrainForEval(topic, brain, { limit: 5 }).slice(0, 5);
const refs: ExistingKnowledgeRef[] = [];
for (const r of ranked) {
try {
const abs = path.join(brain.localBrainPath, r.relativePath);
const content = fs.readFileSync(abs, 'utf8').slice(0, 2000);
const st = fs.statSync(abs);
refs.push({ title: path.basename(r.relativePath), content, lastUpdated: st.mtimeMs, filePath: r.relativePath });
} catch { /* skip unreadable */ }
}
return refs;
};
let done = 0;
const proposalsDir = path.join(brain.localBrainPath, '.astra', 'growth', 'proposals');
fs.mkdirSync(proposalsDir, { recursive: true });
const proposalPaths: string[] = [];
for (const item of approved) {
if (token.isCancellationRequested) break;
progress.report({ message: `${++done}/${approved.length}${item.topic}` });
const pkg = await runResearch({
item,
fetchInternalRefs,
callLlm: (system, user, maxTokens) => simpleChatCompletion(system, user, {
baseUrl: config.ollamaUrl, model, temperature: 0.3, maxTokens, timeoutMs: 180000,
}),
nowIso: new Date().toISOString(),
});
const md = formatProposalMarkdown(pkg, { dateStr: new Date().toLocaleString(), modelName: model });
const filePath = path.join(proposalsDir, `${item.id}.md`);
fs.writeFileSync(filePath, md, 'utf8');
proposalPaths.push(filePath);
item.status = 'in-progress';
item.updatedAt = new Date().toISOString();
}
saveQueue(brain.localBrainPath, queue);
logInfo('Research agent run complete.', { processed: proposalPaths.length });
if (proposalPaths.length > 0) {
const doc = await vscode.workspace.openTextDocument(vscode.Uri.file(proposalPaths[0]));
await vscode.window.showTextDocument(doc, { preview: false });
}
vscode.window.showInformationMessage(
`학습 제안 ${proposalPaths.length}건 생성 (.astra/growth/proposals/). 외부 근거로 보강 후 두뇌에 저장하고 큐 상태를 done 으로 바꾸세요.`,
);
},
);
} catch (err: any) {
logError('Research run command failed.', { error: err?.message || String(err) });
vscode.window.showErrorMessage(`학습 실행 실패: ${err?.message ?? err}`);
}
}
/** 골든셋 파일이 없을 때 템플릿을 만든다. 이미 (깨진/빈) 파일이 있으면 덮어쓰지 않는다. */
async function scaffoldGoldenSet(goldenPath: string, existingSource: string | null, parseErrors: number): Promise<boolean> {
if (existingSource && fs.existsSync(existingSource)) {