connectai/src/features/company/intentAlignment.ts

/**
 * Intent Alignment — 사용자의 자연어 요청을 *실행 가능한 작업 조건*으로 변환.
 *
 * 사용자는 자기 의도와 배경지식이 에이전트에게 충분히 전달되었다고 착각하는
 * 경향이 있다 (투명성의 착각·지식의 저주·공통 기반 부족). 그래서 에이전트가
 * 즉시 작업에 돌입하면 사용자가 머릿속에 가진 것과 다른 결과를 만들어 낸다.
 *
 * 이 모듈은 그 격차를 메꾸는 한 단계 앞 절차다. 사용자가 던진 한 줄을 받아
 * `RequirementContract` 5필드(C-G-C-F-Q) 로 채우고, 채우다가 비는 자리가
 * 있으면 *추측하지 말고* 사용자에게 되묻는다. 분석기 자체는 LLM 한 번 호출로
 * 끝난다; 추가 라운드(되묻기→답변→재분석)는 호출자(상태 머신, Phase B)가
 * 관리한다.
 *
 * 출력 형식은 dispatcher의 다른 모듈(planner/promptBuilder/reviewer)이 모두
 * 같은 ground truth로 contract를 읽어 가는 것이 목표라, 필드 이름과 의미는
 * `types.ts`의 `RequirementContract`와 1:1로 맞췄다.
 */
import { IAIService } from '../../core/services';
import { logError, logInfo } from '../../utils';
import { RequirementContract } from './types';

/**
 * Alignment 라운드 기본 상한. config 의 `company.intentAlignmentMaxRounds`
 * 미지정 시 fallback 값. config 시 [1,5] 범위로 clamp.
 *
 * 의도: 사용자가 명시 설정 없이도 무한정 질문받지 않도록 *코드 레벨* 에서 보장.
 * 라운드 한도 도달 시 smart 모드에선 자동 진행, strict 모드에선 확인 카드.
 */
export const ALIGNMENT_DEFAULT_MAX_ROUNDS = 3;

/**
 * 분석 한 회차의 결과. contract는 항상 채워서 돌아오고, 추가 정보가 필요한
 * 경우만 confidence가 medium/low이고 openQuestions가 비어 있지 않다. 호출자가
 * 사용자에게 보여주고 답을 받아 다음 라운드의 `previousAnswers`로 넣어주면
 * 같은 함수가 갱신된 contract를 반환한다.
 */
export interface IntentAnalysisResult {
    contract: RequirementContract;
    /** Raw LLM body — 디버그 로그 / 카드에 raw 안 보여줄 거지만 남겨 둠. */
    raw: string;
    /** JSON 파싱 성공 여부. false면 contract는 fallback 값(원문만 채워진 상태). */
    parsed: boolean;
}

/**
 * 호출자가 한 라운드의 컨텍스트로 넘기는 입력. `previousAnswers`는 직전
 * 라운드에서 사용자가 답한 질문/응답 쌍이며 LLM이 그걸 반영해 contract를
 * 다시 채운다. `previousContract`는 직전 분석의 결과 — 분석기는 보통 이걸
 * 출발점으로 부족분만 보강한다.
 */
export interface IntentAnalysisInput {
    userOriginalPrompt: string;
    /** 직전 라운드의 사용자 응답들. 첫 라운드면 빈 배열. */
    previousAnswers?: Array<{ q: string; a: string }>;
    /** 직전 라운드 contract (있으면 부분 갱신을 유도). */
    previousContract?: RequirementContract;
    /** 활성 파이프라인 이름 — 분석기가 format 추정에 사용 가능. */
    activePipelineName?: string;
    /**
     * 활성 직군 목록 — "이 회사가 어떤 일들을 할 수 있나"를 분석기가 알면
     * goal/format을 그쪽 능력에 맞춰 추출할 수 있다.
     */
    availableRoleCategories?: string[];
    /**
     * 모드 전환 *직전* 의 일반 채팅 히스토리 요약. 사용자가 일반 채팅에서
     * 프로젝트·맥락·요구를 충분히 논의한 뒤 기업모드로 전환해 *후속 작업* 을
     * 요청한 경우, 분석기가 이를 보면 context/goal/criteria 를 이미 도출
     * 가능 — 중복 질문(맥락/목표/기준/형식) 을 안 던진다.
     *
     * 형식: 최근 N(기본 10) 턴의 `role: content` 한 줄씩, 각 content 200자 cap.
     * 없으면 undefined (첫 진입 / 모드 토글 없는 케이스).
     */
    priorChatSummary?: string;
}

const SYSTEM_PROMPT = `당신은 "1인 기업 모드"의 *요청 분석가*입니다. 사용자의 자연어 요청을 받아 그것을 실행 가능한 작업 조건 5가지(C-G-C-F-Q)로 정리합니다.

  - context          : 현재 상황·프로젝트 맥락 (한 단락 또는 빈 문자열).
  - goal             : 사용자가 *결과로* 달성하려는 것 (1~2 문장).
  - criteria         : 좋은 결과의 판단 기준들. 측정 가능하면 더 좋음. 최대 4개.
  - format           : 원하는 산출물의 형식 (예: "마크다운 기획서", "Python 단일 파일", "JSON + 짧은 요약").
  - openQuestions    : 채워지지 않아 사용자에게 *물어봐야* 할 질문들. 최대 3개. 정말 결정적인 것만.

⚠️ 추측 금지. 사용자의 한 줄 + 컨텍스트에서 *직접 추론*되지 않는 정보는 채우지 마세요. 빈 칸은 그대로 두고 그 자리에 대응하는 질문을 openQuestions에 넣으세요.

⚠️ **[모드 전환 시 context 우선 추출]**: 입력에 \`[모드 전환 직전 일반 채팅 요약]\` 블록이 있으면, 그것을 **사용자의 한 줄과 같은 권위로** 취급하세요. 거기서 context/goal/criteria/format 을 *직접 추출* 한 뒤, 그래도 빠진 항목만 openQuestions 에 넣으세요. 사용자가 이미 일반 채팅에서 충분히 설명한 내용을 다시 물어보면 안 됩니다 — 일반 채팅에서 *명시적으로 언급* 된 항목은 추측이 아니라 **명시된 사실** 입니다.

confidence는 다음 기준으로 자체 판정:
  - "high"   : C·G·C·F 4개 모두 prompt에서 직접 추론 가능. openQuestions = [] 가능.
  - "medium" : 대체로 명확하지만 1~2개 항목에서 합리적 가정 필요. 추가 질문 1~2개.
  - "low"    : 핵심 정보(특히 goal 또는 format)가 빠짐. 질문 2~3개.

직전 라운드 답변이 있으면 그 내용을 반영해 contract를 *갱신*하세요. 같은 질문을 다시 묻지 마세요.

⚠️ 반드시 아래 JSON 한 번만 출력. 다른 텍스트(설명·코드펜스·머리말) 일체 금지.

{
  "context": "<문자열 또는 빈값>",
  "goal": "<문자열 또는 빈값>",
  "criteria": ["<항목1>", "<항목2>", ...],
  "format": "<문자열 또는 빈값>",
  "openQuestions": ["<질문1>", "<질문2>", ...],
  "confidence": "low"|"medium"|"high"
}`;

function _buildUserMessage(input: IntentAnalysisInput): string {
    const lines: string[] = [];
    lines.push('[사용자 원본 요청]');
    lines.push(input.userOriginalPrompt);
    // 모드 전환 직전 일반 채팅 요약 — 분석기가 context/goal/criteria 를 *여기서 먼저 추출*.
    // 사용자가 일반 채팅에서 이미 설명한 항목을 openQuestions 에 다시 넣지 못하게 막음.
    if (input.priorChatSummary && input.priorChatSummary.trim()) {
        lines.push('');
        lines.push('[모드 전환 직전 일반 채팅 요약]');
        lines.push('아래는 사용자가 *기업모드 전환 전* 일반 채팅에서 같은 주제로 나눈 대화입니다.');
        lines.push('여기에 명시된 context/goal/criteria/format 은 *사용자가 이미 말한 사실* 로 취급하여');
        lines.push('contract 의 해당 슬롯을 채우고, 다시 묻지 마세요.');
        lines.push('---');
        lines.push(input.priorChatSummary);
        lines.push('---');
    }
    if (input.activePipelineName) {
        lines.push('');
        lines.push(`(활성 파이프라인) "${input.activePipelineName}"`);
    }
    if (input.availableRoleCategories && input.availableRoleCategories.length > 0) {
        lines.push(`(이 회사 가능 직군) ${input.availableRoleCategories.join(', ')}`);
    }
    if (input.previousContract) {
        const c = input.previousContract;
        lines.push('');
        lines.push('[직전 라운드까지 도출된 contract]');
        lines.push(`context: ${c.context || '(미)'}`);
        lines.push(`goal: ${c.goal || '(미)'}`);
        lines.push(`criteria: ${c.criteria.length ? c.criteria.join(' | ') : '(미)'}`);
        lines.push(`format: ${c.format || '(미)'}`);
    }
    if (input.previousAnswers && input.previousAnswers.length > 0) {
        lines.push('');
        lines.push('[사용자가 직전 라운드에 답한 내용]');
        for (const qa of input.previousAnswers) {
            lines.push(`- Q: ${qa.q}`);
            lines.push(`  A: ${qa.a}`);
        }
        lines.push('위 답변을 반영해 contract를 갱신하고 새 openQuestions를 적되, 이미 답을 받은 질문은 *다시 묻지 마세요*.');
    }
    lines.push('');
    lines.push('분석 JSON만 출력:');
    return lines.join('\n');
}

/**
 * 4-stage 관용 파서. intentClassifier와 동일 패턴 — 작은 모델이 펜스/머리말
 * 흔히 추가하므로 strict JSON.parse 한 번만 시도하면 절반 가까이 놓친다.
 */
function _parseAnalysisJson(raw: string): {
    context: string;
    goal: string;
    criteria: string[];
    format: string;
    openQuestions: string[];
    confidence: 'low' | 'medium' | 'high';
} | null {
    if (!raw || !raw.trim()) return null;
    const fenced = raw.match(/```(?:json)?\s*([\s\S]*?)\s*```/i);
    const stage1 = (fenced ? fenced[1] : raw).trim();
    try {
        const obj = JSON.parse(stage1);
        const c = _coerce(obj);
        if (c) return c;
    } catch { /* fall through */ }
    const balanced = _extractFirstBalancedObject(stage1);
    if (balanced) {
        try {
            const obj = JSON.parse(balanced);
            const c = _coerce(obj);
            if (c) return c;
        } catch { /* fall through */ }
    }
    return null;
}

function _coerce(obj: unknown): ReturnType<typeof _parseAnalysisJson> {
    if (!obj || typeof obj !== 'object') return null;
    const o = obj as Record<string, unknown>;
    const context = typeof o.context === 'string' ? o.context.trim() : '';
    const goal = typeof o.goal === 'string' ? o.goal.trim() : '';
    const format = typeof o.format === 'string' ? o.format.trim() : '';
    const criteria = Array.isArray(o.criteria)
        ? o.criteria.filter((c): c is string => typeof c === 'string' && c.trim().length > 0)
            .map((c) => c.trim()).slice(0, 6)
        : [];
    const openQuestions = Array.isArray(o.openQuestions)
        ? o.openQuestions.filter((q): q is string => typeof q === 'string' && q.trim().length > 0)
            .map((q) => q.trim()).slice(0, 4)
        : [];
    const conf = typeof o.confidence === 'string' ? o.confidence.trim().toLowerCase() : '';
    const confidence: 'low' | 'medium' | 'high' =
        conf === 'high' ? 'high' : conf === 'medium' ? 'medium' : 'low';
    return { context, goal, criteria, format, openQuestions, confidence };
}

function _extractFirstBalancedObject(s: string): string | null {
    const start = s.indexOf('{');
    if (start === -1) return null;
    let depth = 0;
    let inString = false;
    let escape = false;
    for (let i = start; i < s.length; i++) {
        const ch = s[i];
        if (inString) {
            if (escape) escape = false;
            else if (ch === '\\') escape = true;
            else if (ch === '"') inString = false;
            continue;
        }
        if (ch === '"') { inString = true; continue; }
        if (ch === '{') depth++;
        else if (ch === '}') {
            depth--;
            if (depth === 0) return s.slice(start, i + 1);
        }
    }
    return null;
}

/**
 * End-to-end 분석 호출. 절대 throw 하지 않는다 — 호출 실패 / 파싱 실패 시
 * confidence='low' + 원문만 채워진 contract를 돌려서 호출자가 안전하게
 * "더 물어봐야 함" 흐름으로 진입할 수 있게 한다. 즉 실패가 *추측 진행*으로
 * 미끄러지지 않게 한다 — 이 기능의 본질이 추측 방지이므로.
 */
export async function analyzeIntent(
    ai: IAIService,
    input: IntentAnalysisInput,
    options: { model?: string; timeoutMs?: number } = {},
): Promise<IntentAnalysisResult> {
    const prompt = input.userOriginalPrompt.trim();
    if (!prompt) {
        return {
            contract: _fallbackContract(input.userOriginalPrompt, [
                '요청 내용이 비어 있습니다. 무엇을 만들고 싶으신가요?',
            ]),
            raw: '',
            parsed: false,
        };
    }
    let raw = '';
    try {
        const result = await ai.chat({
            system: SYSTEM_PROMPT,
            user: _buildUserMessage(input),
            model: options.model,
            timeoutMs: options.timeoutMs,
        });
        raw = result.content || '';
    } catch (e: any) {
        logError('intentAlignment: analyzer call failed; falling back to low-conf.', {
            error: e?.message ?? String(e),
        });
        return {
            contract: _fallbackContract(input.userOriginalPrompt, [
                '요청을 더 구체적으로 알려주실 수 있을까요? (분석기 호출 실패)',
            ], input.previousAnswers),
            raw,
            parsed: false,
        };
    }
    const parsed = _parseAnalysisJson(raw);
    if (!parsed) {
        logInfo('intentAlignment: parse failed; falling back to low-conf.', {
            rawHead: raw.slice(0, 100),
        });
        return {
            contract: _fallbackContract(input.userOriginalPrompt, [
                '요청을 더 구체적으로 풀어 설명해 주세요.',
            ], input.previousAnswers),
            raw,
            parsed: false,
        };
    }
    // 이미 사용자가 답한 질문이 새 openQuestions에 다시 끼어 있으면 제거 — 동일
    // 텍스트 비교는 작은 모델이 약간씩 다르게 바꿔 적어 잡기 어렵지만, 정확한
    // 중복은 흔하므로 헬퍼로 1차 거름.
    const askedAlready = new Set((input.previousAnswers ?? []).map((a) => a.q.trim()));
    const openQuestions = parsed.openQuestions.filter((q) => !askedAlready.has(q.trim()));

    const contract: RequirementContract = {
        userOriginalPrompt: input.userOriginalPrompt,
        context: parsed.context,
        goal: parsed.goal,
        criteria: parsed.criteria,
        format: parsed.format,
        answeredQuestions: input.previousAnswers ? [...input.previousAnswers] : [],
        openQuestions,
        // 사용자가 한 라운드 이상 답해줬으면 confidence를 한 단계 끌어올리는
        // 사후 보정 — 그래야 분석기가 보수적으로 'low'를 고집해도 사용자가
        // 추가 정보를 줬다는 사실이 반영된다.
        confidence: _adjustConfidence(parsed.confidence, parsed.openQuestions.length, input.previousAnswers?.length ?? 0),
    };
    return { contract, raw, parsed: true };
}

function _adjustConfidence(
    base: 'low' | 'medium' | 'high',
    openCount: number,
    answeredCount: number,
): 'low' | 'medium' | 'high' {
    // 한 라운드 이상 답을 받았는데 분석기가 여전히 low면 medium으로 한 단계만 올림.
    // 답 한 번에 high로 점프하면 사용자 확인 단계를 너무 빨리 건너뜀.
    if (answeredCount >= 1 && base === 'low') return 'medium';
    // openQuestions가 모두 비었으면 medium → high 승격(분석기가 보수적인 경우 보정).
    if (openCount === 0 && base === 'medium' && answeredCount > 0) return 'high';
    return base;
}

function _fallbackContract(
    prompt: string,
    questions: string[],
    answered?: Array<{ q: string; a: string }>,
): RequirementContract {
    return {
        userOriginalPrompt: prompt,
        context: '',
        goal: '',
        criteria: [],
        format: '',
        answeredQuestions: answered ? [...answered] : [],
        openQuestions: questions,
        confidence: 'low',
    };
}

/**
 * Contract를 LLM 시스템 프롬프트에 끼울 수 있는 마크다운 블록으로 직렬화.
 * Phase D에서 planner/specialist/reviewer가 모두 이걸 그대로 prepend.
 * 빈 필드는 "(미)" 로 명시 — 누락이 LLM 시야에서도 *명시적 부재*가 되도록.
 */
export function formatContractForPrompt(contract: RequirementContract): string {
    const lines: string[] = [];
    lines.push('## [REQUIREMENT CONTRACT — 사용자와 사전 합의된 작업 조건]');
    lines.push(`- **원본 요청**: ${contract.userOriginalPrompt}`);
    lines.push(`- **맥락 (Context)**: ${contract.context || '(미)'}`);
    lines.push(`- **목표 (Goal)**: ${contract.goal || '(미)'}`);
    if (contract.criteria.length > 0) {
        lines.push('- **판단 기준 (Criteria)**:');
        for (const c of contract.criteria) lines.push(`  - ${c}`);
    } else {
        lines.push('- **판단 기준 (Criteria)**: (미)');
    }
    lines.push(`- **산출 형식 (Format)**: ${contract.format || '(미)'}`);
    if (contract.answeredQuestions.length > 0) {
        lines.push('- **확인된 응답**:');
        for (const qa of contract.answeredQuestions) {
            lines.push(`  - Q: ${qa.q}`);
            lines.push(`    A: ${qa.a}`);
        }
    }
    if (contract.openQuestions.length > 0) {
        lines.push('- **미해결 질문 (사용자가 답 안 받아 보수적으로 처리)**:');
        for (const q of contract.openQuestions) lines.push(`  - ${q}`);
    }
    lines.push(`- **신뢰도**: ${contract.confidence}`);
    lines.push('');
    lines.push('위 contract가 모든 판단의 ground truth입니다. 추측이나 contract 외 가정을 추가하지 마세요. 미해결 항목이 작업에 결정적이라면 산출물에 "이 부분은 보수적으로 처리했습니다"라고 명시.');
    return lines.join('\n');
}