connectai/src/intelligence/needEngine.ts

/**
 * Need Engine — 학습 필요성 산출 (설계서 7.6) + Knowledge Inventory v1 (7.3).
 *
 * Self-Evolving OS 마스터 플랜 Phase 3 / Track 3-3 + 3-1. Reflection 기록을
 * 집계해 "무엇을 먼저 배워야 하는가" 를 점수로 산출한다 — 성장 루프의 두뇌.
 *
 * Need Score (설계서 공식, 0~100):
 *   정보 부족도 × 30% + 실패율 × 25% + 업무 빈도 × 20% + 확신도 부족 × 15% + 사용자 피드백 × 10%
 *
 * v1 신호 매핑 (전부 Reflection 에서 결정론적으로):
 *   - 정보 부족도: weakGrounding 비율 (검색 근거 없이 수행한 turn 비중)
 *   - 실패율: 필수 요소 누락이 있었던 turn 비율
 *   - 업무 빈도: 해당 업무 turn 수 / 전체 업무 turn 수
 *   - 확신도 부족: (100 − 평균 확신도) / 100
 *   - 사용자 피드백: v1 미수집 → 0 (필드는 유지, 후속 증분에서 연결)
 *
 * 출력은 Learning Queue 의 입력이 된다. 학습 실행은 승인 후 (Permission Based Learning).
 */

import type { ReflectionRecord } from './reflectionStore';

export interface NeedItem {
    /** 업무 유형 ID (학습 주제 단위 v1 — 후속: 요소/토픽 단위 세분화). */
    taskId: string;
    taskLabel: string;
    /** 0~100. */
    score: number;
    /** 가중치별 기여 내역 (사람이 읽는 근거). */
    breakdown: {
        infoLack: number;    // 0~1
        failRate: number;    // 0~1
        frequency: number;   // 0~1
        confidenceLack: number; // 0~1
        feedback: number;    // 0~1 (v1 = 0)
    };
    /** 집계 표본 수. */
    sampleCount: number;
    /** 자주 누락된 요소 Top 3 — 학습 주제 구체화용. */
    topMisses: string[];
    reason: string;
}

export const NEED_WEIGHTS = {
    infoLack: 0.30,
    failRate: 0.25,
    frequency: 0.20,
    confidenceLack: 0.15,
    feedback: 0.10,
} as const;

export function computeNeeds(records: ReflectionRecord[]): NeedItem[] {
    const taskRecords = records.filter((r) => r.taskId);
    if (taskRecords.length === 0) return [];

    const byTask = new Map<string, ReflectionRecord[]>();
    for (const r of taskRecords) {
        const arr = byTask.get(r.taskId!) || [];
        arr.push(r);
        byTask.set(r.taskId!, arr);
    }

    const needs: NeedItem[] = [];
    for (const [taskId, rs] of byTask) {
        const infoLack = rs.filter((r) => r.weakGrounding === true).length / rs.length;
        const failRate = rs.filter((r) => (r.missing || []).length > 0).length / rs.length;
        const frequency = rs.length / taskRecords.length;
        const avgConf = rs.reduce((s, r) => s + (r.confidenceScore || 0), 0) / rs.length;
        const confidenceLack = Math.max(0, Math.min(1, (100 - avgConf) / 100));
        const feedback = 0; // v1 미수집

        const score = Math.round(100 * (
            infoLack * NEED_WEIGHTS.infoLack +
            failRate * NEED_WEIGHTS.failRate +
            frequency * NEED_WEIGHTS.frequency +
            confidenceLack * NEED_WEIGHTS.confidenceLack +
            feedback * NEED_WEIGHTS.feedback
        ));

        // 자주 누락된 요소 Top 3.
        const missCounts = new Map<string, number>();
        for (const r of rs) for (const m of r.missing || []) missCounts.set(m, (missCounts.get(m) || 0) + 1);
        const topMisses = Array.from(missCounts.entries()).sort((a, b) => b[1] - a[1]).slice(0, 3).map(([m]) => m);

        const reasonParts: string[] = [];
        if (infoLack > 0.3) reasonParts.push(`근거 없는 수행 ${(infoLack * 100).toFixed(0)}%`);
        if (failRate > 0.3) reasonParts.push(`요소 누락률 ${(failRate * 100).toFixed(0)}%`);
        if (confidenceLack > 0.3) reasonParts.push(`평균 확신도 ${avgConf.toFixed(0)}`);
        if (topMisses.length > 0) reasonParts.push(`자주 누락: ${topMisses.join(', ')}`);

        needs.push({
            taskId,
            taskLabel: rs[0].taskLabel || taskId,
            score,
            breakdown: { infoLack, failRate, frequency, confidenceLack, feedback },
            sampleCount: rs.length,
            topMisses,
            reason: reasonParts.join(' · ') || '특이 신호 없음 (빈도 기반)',
        });
    }
    return needs.sort((a, b) => b.score - a.score);
}

/**
 * Knowledge Inventory v1 (Track 3-1) — 업무 유형별 지식 보유 상태.
 * 보유/부족/없음 3등급 (설계서 7.3) 을 그라운딩 신호로 판정.
 */
export interface InventoryItem {
    taskId: string;
    taskLabel: string;
    /** 'sufficient' | 'partial' | 'missing' */
    status: 'sufficient' | 'partial' | 'missing';
    avgChunkCount: number;
    avgTopScore: number;
    sampleCount: number;
}

export function knowledgeInventory(records: ReflectionRecord[]): InventoryItem[] {
    const withRetrieval = records.filter((r) => r.taskId && r.retrieval);
    const byTask = new Map<string, ReflectionRecord[]>();
    for (const r of withRetrieval) {
        const arr = byTask.get(r.taskId!) || [];
        arr.push(r);
        byTask.set(r.taskId!, arr);
    }
    const items: InventoryItem[] = [];
    for (const [taskId, rs] of byTask) {
        const avgChunkCount = rs.reduce((s, r) => s + (r.retrieval!.chunkCount || 0), 0) / rs.length;
        const avgTopScore = rs.reduce((s, r) => s + (r.retrieval!.topScore || 0), 0) / rs.length;
        const status: InventoryItem['status'] =
            avgChunkCount >= 3 && avgTopScore >= 0.5 ? 'sufficient'
            : avgChunkCount >= 1 ? 'partial'
            : 'missing';
        items.push({ taskId, taskLabel: rs[0].taskLabel || taskId, status, avgChunkCount, avgTopScore, sampleCount: rs.length });
    }
    return items.sort((a, b) => a.avgTopScore - b.avgTopScore);
}

/**
 * Knowledge Debt (Track 4-4) — 부족 지식이 실제로 막은 업무 집계 (설계서 예:
 * "GA4 — Blocked Tasks 17, Impact 9"). v1 단위는 업무 유형: 근거 없이/약하게
 * 수행된 turn 수 = blocked, 그 turn 들의 갭 심각도 평균 = impact (0~10).
 */
export interface DebtItem {
    taskId: string;
    taskLabel: string;
    /** 지식 부족 상태로 수행된 업무 turn 수. */
    blockedTurns: number;
    /** 평균 갭 심각도 0~10. */
    impact: number;
    /** blocked × impact — 정렬 키. */
    debtScore: number;
}

const SEVERITY_SCORE: Record<string, number> = { none: 0, low: 3, medium: 6, high: 10 };

export function computeKnowledgeDebt(records: ReflectionRecord[]): DebtItem[] {
    const blocked = records.filter((r) => r.taskId && r.weakGrounding === true);
    const byTask = new Map<string, ReflectionRecord[]>();
    for (const r of blocked) {
        const arr = byTask.get(r.taskId!) || [];
        arr.push(r);
        byTask.set(r.taskId!, arr);
    }
    const items: DebtItem[] = [];
    for (const [taskId, rs] of byTask) {
        const impact = rs.reduce((s, r) => s + (SEVERITY_SCORE[r.gapSeverity || 'low'] ?? 3), 0) / rs.length;
        items.push({
            taskId,
            taskLabel: rs[0].taskLabel || taskId,
            blockedTurns: rs.length,
            impact: Math.round(impact * 10) / 10,
            debtScore: Math.round(rs.length * impact),
        });
    }
    return items.sort((a, b) => b.debtScore - a.debtScore);
}

export function formatNeedsMarkdown(needs: NeedItem[], inventory: InventoryItem[], debt: DebtItem[] = []): string {
    const lines: string[] = [];
    lines.push('# 학습 필요성 (Need Engine)');
    lines.push('');
    lines.push('공식: 정보부족 30% + 실패율 25% + 빈도 20% + 확신부족 15% + 피드백 10%');
    lines.push('');
    if (needs.length === 0) {
        lines.push('Reflection 기록 없음 — 업무 turn 이 쌓이면 학습 우선순위가 산출됩니다.');
    } else {
        lines.push('| 우선순위 | 업무 | Need Score | 표본 | 근거 |');
        lines.push('|---|---|---|---|---|');
        needs.forEach((n, i) => {
            lines.push(`| ${i + 1} | ${n.taskLabel} | **${n.score}** | ${n.sampleCount} | ${n.reason} |`);
        });
    }
    lines.push('');
    lines.push('## Knowledge Inventory (지식 보유 상태)');
    lines.push('');
    if (inventory.length === 0) {
        lines.push('- 데이터 없음');
    } else {
        const statusLabel = { sufficient: '보유', partial: '부족', missing: '없음' } as const;
        lines.push('| 업무 | 상태 | 평균 근거 수 | 평균 top score |');
        lines.push('|---|---|---|---|');
        for (const it of inventory) {
            lines.push(`| ${it.taskLabel} | ${statusLabel[it.status]} | ${it.avgChunkCount.toFixed(1)} | ${it.avgTopScore.toFixed(2)} |`);
        }
    }
    lines.push('');
    lines.push('## Knowledge Debt (지식 부채)');
    lines.push('');
    if (debt.length === 0) {
        lines.push('- 부채 없음 — 지식 부족 상태로 수행된 업무가 없습니다.');
    } else {
        lines.push('| 업무 | Blocked Turns | Impact (0~10) | Debt Score |');
        lines.push('|---|---|---|---|');
        for (const d of debt) {
            lines.push(`| ${d.taskLabel} | ${d.blockedTurns} | ${d.impact} | **${d.debtScore}** |`);
        }
    }
    lines.push('');
    return lines.join('\n');
}