2afd1ac589
신뢰성 코어 (P1~P2): - Requirement Graph: 업무 유형(회의록/시장조사/업무조사/일정) 필수 요소 주입 + 커버리지 hook - Confidence Engine(0~100 결정론적) / Escalation Engine(검토 요청) / Epistemic Guard(모름·추정·확실 3분류) - Provenance: citationTrace 에 출처 수정일·오래됨 경고 - Critic Loop: 문제 신호 turn 만 LLM 검수 1회 + 보완 카드 성장 루프 (P3): - Gap Detector(Requirement-Knowledge) / Need Engine(30/25/20/15/10 공식) / Knowledge Inventory - Learning Queue(proposed 전용 병합 — 승인은 사람만) / Decision Journal / Reflection 기록 - 반복 누락 요소(3회+)는 다음 turn 체크리스트에 자동 강조 (T5 루프) 지식 운영 (P4) + 기억 (P5) + 학습 실행 (P6): - Knowledge Validation + Belief Revision(중복 reject·충돌 시 update/add 권고) - Knowledge Decay(분야별 반감기 감사) / Knowledge Debt(blocked x impact) - Organizational Memory(.astra/organization.md 상시 주입) - Research Agent(approved 큐 -> 조사 브리프+추정 라벨 초안+Validation 게이트 -> proposals/) - Skill Score(전/후반 추세) + Success Pattern DB(전요소충족+확신도90+ 자동 적재) 병렬 트랙: - 캘린더 충돌 게이트: conflictCheck + 구조화 이벤트 캐시 + create_calendar_event 차단(force 는 사용자 승인 후) - Task Eval Harness: 회의록 골든셋 자동 채점 명령 + 성장 리포트/학습 큐/노후 점검 명령 신규 모듈 17종(src/intelligence/), VS Code 명령 5종, 설정 11종, 테스트 +89건(전체 508 통과). 설계 문서: docs/SELF_EVOLVING_OS_MASTER_PLAN.md Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
160 lines
7.0 KiB
TypeScript
160 lines
7.0 KiB
TypeScript
/**
|
|
* Gap Detector / Need Engine / Knowledge Inventory / Learning Queue
|
|
* (Self-Evolving OS Phase 3 — 성장 루프 코어) 테스트.
|
|
*/
|
|
import * as fs from 'fs';
|
|
import * as os from 'os';
|
|
import * as path from 'path';
|
|
import { detectGaps } from '../src/intelligence/gapDetector';
|
|
import { computeNeeds, knowledgeInventory, formatNeedsMarkdown, NEED_WEIGHTS } from '../src/intelligence/needEngine';
|
|
import {
|
|
loadQueue,
|
|
saveQueue,
|
|
mergeNeedsIntoQueue,
|
|
formatQueueMarkdown,
|
|
QueueItem,
|
|
} from '../src/intelligence/learningQueue';
|
|
import type { ReflectionRecord } from '../src/intelligence/reflectionStore';
|
|
|
|
function mkReflection(partial: Partial<ReflectionRecord>): ReflectionRecord {
|
|
return {
|
|
ts: '2026-06-11T10:00:00.000Z',
|
|
taskId: 'meeting-minutes',
|
|
taskLabel: '회의록',
|
|
confidenceScore: 70,
|
|
confidenceBand: 'medium',
|
|
missing: [],
|
|
escalated: false,
|
|
criticIssues: null,
|
|
promptPreview: 'p',
|
|
retrieval: { chunkCount: 3, topScore: 0.6 },
|
|
weakGrounding: false,
|
|
...partial,
|
|
};
|
|
}
|
|
|
|
describe('detectGaps', () => {
|
|
const okSignals = { chunkCount: 4, topScore: 0.7, conflictCount: 0, ambiguityDetected: false };
|
|
const noGrounding = { chunkCount: 0, topScore: 0, conflictCount: 0, ambiguityDetected: false };
|
|
|
|
it('누락 3개 이상 → high', () => {
|
|
const g = detectGaps({
|
|
coverage: { ran: true, taskId: 'meeting-minutes', taskLabel: '회의록', covered: [], missing: ['참석자', '담당자', '기한'] },
|
|
signals: okSignals, taskId: 'meeting-minutes',
|
|
});
|
|
expect(g.severity).toBe('high');
|
|
expect(g.summary).toContain('3개 누락');
|
|
});
|
|
|
|
it('근거 0건 단독 → low, 고영향 업무 + 누락이면 한 단계 상향', () => {
|
|
const clean = detectGaps({
|
|
coverage: { ran: false, covered: [], missing: [] },
|
|
signals: noGrounding, taskId: null,
|
|
});
|
|
expect(clean.severity).toBe('low');
|
|
expect(clean.weakGrounding).toBe(true);
|
|
|
|
const worse = detectGaps({
|
|
coverage: { ran: true, taskId: 'meeting-minutes', taskLabel: '회의록', covered: [], missing: ['기한'] },
|
|
signals: noGrounding, taskId: 'meeting-minutes',
|
|
});
|
|
expect(worse.severity).toBe('high'); // medium(누락1) + 고영향·근거없음 bump
|
|
});
|
|
|
|
it('갭 없으면 none', () => {
|
|
const g = detectGaps({
|
|
coverage: { ran: true, taskId: 'meeting-minutes', taskLabel: '회의록', covered: ['참석자'], missing: [] },
|
|
signals: okSignals, taskId: 'meeting-minutes',
|
|
});
|
|
expect(g.severity).toBe('none');
|
|
expect(g.summary).toBe('갭 없음');
|
|
});
|
|
});
|
|
|
|
describe('computeNeeds', () => {
|
|
it('약한 그라운딩·누락 많은 업무가 높은 점수를 받는다', () => {
|
|
const records: ReflectionRecord[] = [
|
|
// 회의록: 깨끗한 수행 3회
|
|
mkReflection({}), mkReflection({}), mkReflection({}),
|
|
// 시장조사: 근거 없음 + 누락 + 저확신 2회
|
|
mkReflection({ taskId: 'market-research', taskLabel: '시장조사', weakGrounding: true, missing: ['출처', '시장 규모'], confidenceScore: 40, retrieval: { chunkCount: 0, topScore: 0 } }),
|
|
mkReflection({ taskId: 'market-research', taskLabel: '시장조사', weakGrounding: true, missing: ['출처'], confidenceScore: 45, retrieval: { chunkCount: 0, topScore: 0 } }),
|
|
];
|
|
const needs = computeNeeds(records);
|
|
expect(needs[0].taskId).toBe('market-research');
|
|
expect(needs[0].score).toBeGreaterThan(needs[1].score);
|
|
expect(needs[0].topMisses).toContain('출처');
|
|
expect(needs[0].reason).toContain('누락');
|
|
});
|
|
|
|
it('가중치 합이 1', () => {
|
|
const sum = Object.values(NEED_WEIGHTS).reduce((s, w) => s + w, 0);
|
|
expect(sum).toBeCloseTo(1.0);
|
|
});
|
|
|
|
it('기록 없으면 빈 배열 + md 안내', () => {
|
|
expect(computeNeeds([])).toEqual([]);
|
|
expect(formatNeedsMarkdown([], [])).toContain('기록 없음');
|
|
});
|
|
});
|
|
|
|
describe('knowledgeInventory', () => {
|
|
it('그라운딩 평균으로 보유/부족/없음 판정', () => {
|
|
const records: ReflectionRecord[] = [
|
|
mkReflection({ retrieval: { chunkCount: 5, topScore: 0.8 } }),
|
|
mkReflection({ taskId: 'market-research', taskLabel: '시장조사', retrieval: { chunkCount: 0, topScore: 0 } }),
|
|
mkReflection({ taskId: 'work-research', taskLabel: '업무조사', retrieval: { chunkCount: 1, topScore: 0.3 } }),
|
|
];
|
|
const inv = knowledgeInventory(records);
|
|
const byId = new Map(inv.map((i) => [i.taskId, i.status]));
|
|
expect(byId.get('meeting-minutes')).toBe('sufficient');
|
|
expect(byId.get('market-research')).toBe('missing');
|
|
expect(byId.get('work-research')).toBe('partial');
|
|
});
|
|
});
|
|
|
|
describe('learningQueue', () => {
|
|
const needs = computeNeeds([
|
|
mkReflection({ taskId: 'market-research', taskLabel: '시장조사', weakGrounding: true, missing: ['출처'], confidenceScore: 40 }),
|
|
]);
|
|
|
|
it('save → load 라운드트립 + 우선순위 정렬 저장', () => {
|
|
const brain = fs.mkdtempSync(path.join(os.tmpdir(), 'astra-test-queue-'));
|
|
const queue = mergeNeedsIntoQueue([], needs, '2026-06-11T00:00:00.000Z');
|
|
expect(saveQueue(brain, queue)).toBe(true);
|
|
const loaded = loadQueue(brain);
|
|
expect(loaded.length).toBe(1);
|
|
expect(loaded[0].status).toBe('proposed');
|
|
expect(loaded[0].topic).toContain('시장조사');
|
|
});
|
|
|
|
it('proposed 는 갱신되지만 approved 는 불변 (Permission Based Learning)', () => {
|
|
const approved: QueueItem = {
|
|
id: 'need-market-research', topic: '시장조사 역량 보강', priority: 10, reason: '이전',
|
|
status: 'approved', createdAt: 'a', updatedAt: 'a',
|
|
};
|
|
const merged = mergeNeedsIntoQueue([approved], needs, '2026-06-11T00:00:00.000Z');
|
|
expect(merged.length).toBe(1);
|
|
expect(merged[0].status).toBe('approved');
|
|
expect(merged[0].priority).toBe(10); // Need 점수로 덮어쓰지 않음
|
|
expect(merged[0].reason).toBe('이전');
|
|
});
|
|
|
|
it('새 주제는 proposed 로 추가된다', () => {
|
|
const other: QueueItem = {
|
|
id: 'need-schedule', topic: '일정', priority: 5, reason: 'r',
|
|
status: 'done', createdAt: 'a', updatedAt: 'a',
|
|
};
|
|
const merged = mergeNeedsIntoQueue([other], needs, 'now');
|
|
expect(merged.length).toBe(2);
|
|
expect(merged.find((q) => q.id === 'need-market-research')?.status).toBe('proposed');
|
|
expect(merged.find((q) => q.id === 'need-schedule')?.status).toBe('done'); // 불변
|
|
});
|
|
|
|
it('formatQueueMarkdown — 승인 안내 포함', () => {
|
|
const md = formatQueueMarkdown(mergeNeedsIntoQueue([], needs, 'now'));
|
|
expect(md).toContain('approved');
|
|
expect(md).toContain('시장조사');
|
|
});
|
|
});
|