connectai/src/agent/postHocSelfCheck.ts

/**
 * Post-hoc Self-Check — 답변 *완료 후* LLM 한 번 호출로 3가지 평가.
 *
 * 사용자 제안: "[Self-Check] 단계 — 이 답변이 사용자 질문에 직접 답하는가 / 규칙
 * 준수 / 논리 모순 없는가".
 *
 * 기존 CoVe (v2.2.184) 와 차이:
 *   - CoVe = *답변 작성 전* 모델에게 self-verify 지시 (instructional, 1 pass)
 *   - Self-Check = *답변 완료 후* 별도 LLM 호출로 검증 (post-hoc, 2 pass)
 *
 * 비용·위험:
 *   - 매 turn 추가 LLM 호출 1회 (latency 비용)
 *   - 기본 OFF — semantic re-rank 와 같은 opt-in 패턴
 *   - 짧은 timeout (기본 6초). 실패해도 답변 자체엔 영향 없음 — 그냥 평가 못 함.
 *   - 빠른 작은 모델 권장 (예: gemma2:2b)
 *
 * 위치: 답변 streaming 완료 후, `usedScope` 메시지 전송 직전. 비동기 — 답변
 * 표시를 *블록 하지 않음*. 결과는 webview 에 별도 메시지로 push.
 */

export interface SelfCheckOptions {
    ollamaUrl: string;
    model: string;
    timeoutMs: number;
    /** 출처 컨텍스트 미리보기 길이. 기본 180 chars. */
    excerptLength: number;
    /** 컨텍스트로 넘길 최대 출처 개수. 기본 5. */
    maxSources: number;
}

export const DEFAULT_SELF_CHECK_OPTIONS: Omit<SelfCheckOptions, 'ollamaUrl' | 'model'> = {
    timeoutMs: 6000,
    excerptLength: 180,
    maxSources: 5,
};

export type SelfCheckVerdict = 'yes' | 'partial' | 'no' | 'unknown';
export type ContradictionLevel = 'none' | 'minor' | 'major' | 'unknown';

export interface SelfCheckResult {
    success: boolean;
    answersQuestion: SelfCheckVerdict;
    grounded: SelfCheckVerdict;
    contradiction: ContradictionLevel;
    note: string;
    durationMs: number;
    /** 디버그·footer 표시용. */
    rawResponse?: string;
}

const FAILURE_RESULT: Omit<SelfCheckResult, 'durationMs' | 'note'> = {
    success: false,
    answersQuestion: 'unknown',
    grounded: 'unknown',
    contradiction: 'unknown',
};

function shortExcerpt(text: string, n: number): string {
    if (!text) return '';
    const cleaned = text.replace(/\s+/g, ' ').trim();
    return cleaned.length <= n ? cleaned : cleaned.slice(0, n) + '…';
}

function buildPrompt(
    userPrompt: string,
    answer: string,
    sources: { title: string; excerpt: string }[],
    excerptLength: number,
): { system: string; user: string } {
    const system = [
        '당신은 답변 검증기 (judge). 사용자 질문, 답변, 출처를 받아 3가지 평가:',
        '',
        '1. answersQuestion: 답변이 질문에 *직접* 답하는가? (yes/partial/no)',
        '2. grounded: 답변이 *제공된 출처에 근거* 하는가? (출처 없으면 unknown 가능) (yes/partial/no/unknown)',
        '3. contradiction: 답변에 *논리적 모순* 이 있나? (none/minor/major)',
        '',
        '[출력 형식 — 정확히 한 줄 JSON, 다른 텍스트 절대 금지]',
        '{"answersQuestion":"yes","grounded":"partial","contradiction":"none","note":"답변은 직접적이나 일부 주장이 모델 일반 지식 기반"}',
        '',
        '[규칙]',
        '- partial/minor 는 *진짜* 애매한 경우에만. 둘 중 하나로 단정 가능하면 단정.',
        '- note 는 1문장, 80자 이내, 핵심 평가 근거.',
        '- JSON 한 줄 외 텍스트 (서론·설명·코드블록) 절대 출력 금지.',
    ].join('\n');

    const srcLines = sources.length > 0
        ? sources.map((s, i) => `[S${i + 1}] ${s.title}\n  ${shortExcerpt(s.excerpt, excerptLength)}`).join('\n')
        : '(검색된 출처 없음 — grounded 는 unknown 또는 no 평가)';

    const user = [
        '[사용자 질문]',
        userPrompt,
        '',
        '[답변]',
        answer,
        '',
        '[제공된 출처]',
        srcLines,
        '',
        '위 평가 기준에 따라 JSON 한 줄 출력.',
    ].join('\n');

    return { system, user };
}

function parseResult(raw: string): Omit<SelfCheckResult, 'durationMs' | 'rawResponse'> | null {
    if (!raw) return null;
    const match = raw.match(/\{[\s\S]*?\}/);
    if (!match) return null;
    try {
        const parsed = JSON.parse(match[0]);
        const aq = String(parsed?.answersQuestion || '').toLowerCase();
        const gr = String(parsed?.grounded || '').toLowerCase();
        const co = String(parsed?.contradiction || '').toLowerCase();
        const validVerdict = (v: string): v is SelfCheckVerdict => ['yes', 'partial', 'no', 'unknown'].includes(v);
        const validCo = (v: string): v is ContradictionLevel => ['none', 'minor', 'major', 'unknown'].includes(v);
        if (!validVerdict(aq) || !validVerdict(gr) || !validCo(co)) return null;
        const note = typeof parsed?.note === 'string' ? parsed.note.slice(0, 120) : '';
        return {
            success: true,
            answersQuestion: aq,
            grounded: gr,
            contradiction: co,
            note: note || '평가 노트 없음',
        };
    } catch {
        return null;
    }
}

export async function postHocSelfCheck(
    userPrompt: string,
    answer: string,
    sources: { title: string; excerpt: string }[],
    options: SelfCheckOptions,
): Promise<SelfCheckResult> {
    const start = Date.now();
    if (!userPrompt.trim() || !answer.trim()) {
        return { ...FAILURE_RESULT, note: 'empty input', durationMs: Date.now() - start };
    }
    const sourcesCap = (sources || []).slice(0, options.maxSources);
    const { system, user } = buildPrompt(userPrompt, answer, sourcesCap, options.excerptLength);

    const isOllama = options.ollamaUrl.includes(':11434') || options.ollamaUrl.includes('ollama');
    const endpoint = isOllama ? `${options.ollamaUrl}/api/chat` : `${options.ollamaUrl}/v1/chat/completions`;
    const controller = new AbortController();
    const timer = setTimeout(() => controller.abort(), options.timeoutMs);
    let raw = '';
    try {
        const body = isOllama
            ? {
                model: options.model, stream: false,
                messages: [
                    { role: 'system', content: system },
                    { role: 'user', content: user },
                ],
                options: { temperature: 0.0, num_predict: 200 },
            }
            : {
                model: options.model, stream: false, temperature: 0.0, max_tokens: 200,
                messages: [
                    { role: 'system', content: system },
                    { role: 'user', content: user },
                ],
            };
        const res = await fetch(endpoint, {
            method: 'POST',
            headers: { 'Content-Type': 'application/json' },
            body: JSON.stringify(body),
            signal: controller.signal,
        });
        if (!res.ok) throw new Error(`HTTP ${res.status}`);
        const data: any = await res.json();
        raw = String(
            data?.message?.content ??
            data?.choices?.[0]?.message?.content ??
            data?.choices?.[0]?.text ??
            data?.response ??
            '',
        );
    } catch (e: any) {
        clearTimeout(timer);
        return {
            ...FAILURE_RESULT,
            note: `LLM call failed: ${e?.name || e?.message || 'unknown'}`,
            durationMs: Date.now() - start,
            rawResponse: '',
        };
    } finally {
        clearTimeout(timer);
    }

    const parsed = parseResult(raw);
    if (!parsed) {
        return {
            ...FAILURE_RESULT,
            note: 'unparseable response',
            durationMs: Date.now() - start,
            rawResponse: raw.slice(0, 200),
        };
    }
    return {
        ...parsed,
        durationMs: Date.now() - start,
        rawResponse: raw.slice(0, 200),
    };
}

/**
 * 결과를 markdown 한 줄 footer 로 포맷 — 사용자가 답변 아래에서 바로 봄.
 *
 * 형식: `\n\n---\n_🔍 Self-check_: 답함=✓ · 근거=○ · 모순=없음 _(2.4s · 모델: gemma2:2b)_`
 *
 * 실패면 흐릿한 한 줄.
 */
export function formatSelfCheckFooter(result: SelfCheckResult, model: string): string {
    if (!result.success) {
        return `\n\n---\n_🔍 Self-check: ⊘ ${result.note} (${(result.durationMs / 1000).toFixed(1)}s)_`;
    }
    const aq = result.answersQuestion === 'yes' ? '✓'
        : result.answersQuestion === 'partial' ? '◐'
        : result.answersQuestion === 'no' ? '✗' : '?';
    const gr = result.grounded === 'yes' ? '✓'
        : result.grounded === 'partial' ? '◐'
        : result.grounded === 'no' ? '✗' : '?';
    const co = result.contradiction === 'none' ? '없음'
        : result.contradiction === 'minor' ? '경미'
        : result.contradiction === 'major' ? '⚠️ 중대' : '?';
    return `\n\n---\n_🔍 **Self-check**: 답함=${aq} · 근거=${gr} · 모순=${co} — ${result.note} _(${(result.durationMs / 1000).toFixed(1)}s · ${model})__`;
}