connectai/src/retrieval/hierarchicalLevel.ts

/**
 * Hierarchical Context Window — 질의·문서의 *추상도 레벨* 매칭으로 검색 노이즈 감소.
 *
 * 사용자 제안: "사용자가 '배포해줘' 라고 하면 L1(실행) 우선, '전략 검토' 라고 하면
 * L3(전략) 우선". 같은 키워드 매치 점수여도 추상도가 안 맞으면 noise.
 *
 * v1 — 3-level 휴리스틱 (LLM 호출 없음, 결정적):
 *   - `concrete`  — 코드, 로그, 디버그, 실행 명령
 *   - `operational` — 작업, 일정, 운영 절차, 회의록 (기본/기본값)
 *   - `strategic` — 전략, 비전, 의사결정 근거, 아키텍처 방향
 *
 * 매칭 정책:
 *   - 같은 레벨 → 보너스 (× 1.15)
 *   - 인접 레벨 (concrete↔operational, operational↔strategic) → 변화 없음
 *   - 양 끝 mismatch (concrete↔strategic) → 페널티 (× 0.7)
 *
 * 약한 시그널 — TF-IDF dominant 유지, 동점 깨기 역할. 검색 결과를 *제외* 하지 않음.
 */

import { RetrievalChunk } from './types';

export type AbstractionLevel = 'concrete' | 'operational' | 'strategic';

const QUERY_STRATEGIC_KEYWORDS = [
    '전략', '방향', '비전', '미션', '목표', '의사결정', '아키텍처', '설계 방향',
    '왜 이렇게', '왜 그렇게', '뭐가 맞', '어떤 게 좋', '어떻게 가야', '어떤 방향',
    '판단', '결정', '관점', '평가', '검토',
    'strategy', 'vision', 'mission', 'roadmap', 'direction', 'goal',
    'why', 'rationale', 'pros and cons', 'tradeoff', 'evaluate',
];

const QUERY_CONCRETE_KEYWORDS = [
    '코드', '함수', '버그', '에러', '로그', '실행', '명령어', '스크립트', '디버그',
    '고쳐', '수정', '리팩토링', '리팩터', '커밋', '머지', '배포해', '돌려',
    '에러 메시지', '스택 트레이스', 'syntax', 'compile',
    'code', 'function', 'bug', 'error', 'log', 'execute', 'command', 'script',
    'debug', 'fix', 'refactor', 'commit', 'merge', 'deploy', 'run',
];

const FOLDER_STRATEGIC_HINTS = ['strategy', 'vision', 'mission', 'roadmap', 'decision', 'principle', '전략', '비전'];
const FOLDER_OPERATIONAL_HINTS = ['playbook', 'runbook', 'operation', 'process', 'sop', '운영', '절차', 'meeting', '회의'];
const FOLDER_CONCRETE_HINTS = ['code', 'snippet', 'log', 'debug', 'fix', 'patch', '디버그', 'commit'];

const TITLE_STRATEGIC_HINTS = ['strategy', 'vision', 'rationale', 'direction', 'decision', 'plan', '전략', '계획', '방향', '결정', '평가'];
const TITLE_CONCRETE_HINTS = ['fix', 'bug', 'error', 'log', 'script', 'command', '버그', '에러', '로그', '커밋'];

function countMatches(text: string, keywords: string[]): number {
    const lower = text.toLowerCase();
    let n = 0;
    for (const k of keywords) if (lower.includes(k.toLowerCase())) n++;
    return n;
}

/**
 * 질의 추상도 분류. 키워드 카운트 우열로 결정, 동률·없음이면 'operational' (기본).
 */
export function classifyQueryLevel(query: string): AbstractionLevel {
    if (!query) return 'operational';
    const s = countMatches(query, QUERY_STRATEGIC_KEYWORDS);
    const c = countMatches(query, QUERY_CONCRETE_KEYWORDS);
    if (s > c && s >= 1) return 'strategic';
    if (c > s && c >= 1) return 'concrete';
    return 'operational';
}

/**
 * 한 chunk 의 추상도 분류 — 폴더 경로 → 파일명/제목 → 본문 순으로 강도 감소.
 * 어느 신호도 없으면 'operational' (기본).
 */
export function classifyChunkLevel(chunk: RetrievalChunk): AbstractionLevel {
    // 1. 폴더 경로 (가장 강함)
    const fp = (chunk.metadata?.filePath || '').toLowerCase();
    if (fp) {
        for (const h of FOLDER_STRATEGIC_HINTS) if (fp.includes(`/${h}`) || fp.includes(`\\${h}`)) return 'strategic';
        for (const h of FOLDER_CONCRETE_HINTS) if (fp.includes(`/${h}`) || fp.includes(`\\${h}`)) return 'concrete';
        for (const h of FOLDER_OPERATIONAL_HINTS) if (fp.includes(`/${h}`) || fp.includes(`\\${h}`)) return 'operational';
    }

    // 2. 제목
    const t = (chunk.title || '').toLowerCase();
    if (t) {
        let strat = 0, conc = 0;
        for (const h of TITLE_STRATEGIC_HINTS) if (t.includes(h.toLowerCase())) strat++;
        for (const h of TITLE_CONCRETE_HINTS) if (t.includes(h.toLowerCase())) conc++;
        if (strat > conc && strat >= 1) return 'strategic';
        if (conc > strat && conc >= 1) return 'concrete';
    }

    return 'operational';
}

const LEVEL_INDEX: Record<AbstractionLevel, number> = {
    concrete: 0, operational: 1, strategic: 2,
};

export interface HierarchicalWeights {
    /** 같은 레벨 매치 multiplier. 기본 1.15. */
    sameLevelBonus: number;
    /** 양 끝 mismatch (concrete↔strategic) multiplier. 기본 0.70. */
    farMismatchPenalty: number;
}

export const DEFAULT_HIERARCHICAL_WEIGHTS: HierarchicalWeights = {
    sameLevelBonus: 1.15,
    farMismatchPenalty: 0.70,
};

/**
 * 질의 레벨에 따라 chunks 의 score 를 hierarchical 매칭으로 재가중. in-place.
 * metadata 에 분류 결과 기록 (debug/UI 노출).
 */
export function applyHierarchicalReweight(
    chunks: RetrievalChunk[],
    queryLevel: AbstractionLevel,
    weights: HierarchicalWeights = DEFAULT_HIERARCHICAL_WEIGHTS,
): { sameLevel: number; farMismatch: number } {
    let sameLevel = 0;
    let farMismatch = 0;
    const qi = LEVEL_INDEX[queryLevel];
    for (const c of chunks) {
        const cl = classifyChunkLevel(c);
        (c.metadata as any).abstractionLevel = cl;
        const ci = LEVEL_INDEX[cl];
        const diff = Math.abs(qi - ci);
        if (diff === 0) {
            c.score *= weights.sameLevelBonus;
            sameLevel++;
        } else if (diff === 2) {
            c.score *= weights.farMismatchPenalty;
            farMismatch++;
        }
        // diff === 1: 인접 레벨 → 변화 없음
    }
    return { sameLevel, farMismatch };
}