connectai/src/agents/factory.ts

import * as vscode from 'vscode';
import { getConfig } from '../config';
import { AgentExecuteOptions } from '../lib/engine';

export abstract class BaseAgent {
    constructor(protected readonly modelName: string) {}

    protected async callLLM(persona: string, prompt: string, signal?: AbortSignal): Promise<string> {
        const { ollamaUrl } = getConfig();
        if (!ollamaUrl) {
            throw new Error('Ollama URL이 설정되지 않았습니다. 설정을 확인해주세요.');
        }

        if (typeof fetch === 'undefined') {
            throw new Error('이 환경에서는 fetch 함수를 사용할 수 없습니다. Node.js 버전을 확인하거나 polyfill이 필요합니다.');
        }

        const messages = [
            { role: 'system', content: persona },
            { role: 'user', content: prompt }
        ];

        // 엔진 자동 감지 (Ollama vs OpenAI/LM Studio)
        const isOllama = ollamaUrl.includes(':11434') || ollamaUrl.includes('ollama');
        const endpoint = isOllama ? `${ollamaUrl}/api/chat` : `${ollamaUrl}/v1/chat/completions`;

        // 컨텍스트 초과 방지를 위해 출력 토큰 상한을 항상 명시한다 (서브에이전트 중간 산출물용).
        const { contextLength, maxOutputTokens } = getConfig();
        const numCtx = Math.max(2048, contextLength);
        const outCap = Math.max(256, maxOutputTokens);

        let lastError: any;
        for (let attempt = 1; attempt <= 3; attempt++) {
            const controller = new AbortController();
            const timeoutId = setTimeout(() => controller.abort(), 45000);
            const combinedSignal = signal ? anySignal([signal, controller.signal]) : controller.signal;

            try {
                if (attempt > 1) await new Promise(resolve => setTimeout(resolve, 1000 * attempt));

            const response = await fetch(endpoint, {
                method: 'POST',
                headers: { 'Content-Type': 'application/json' },
                body: JSON.stringify(isOllama ? {
                    model: this.modelName,
                    messages,
                    stream: false,
                    options: { temperature: 0.3, num_ctx: numCtx, num_predict: outCap }
                } : {
                    model: this.modelName,
                    messages,
                    stream: false,
                    temperature: 0.3,
                    max_tokens: outCap
                }),
                signal: combinedSignal
            });

            clearTimeout(timeoutId);

            if (!response.ok) {
                throw new Error(`Agent API Error: ${response.statusText} (${response.status})`);
            }

                const data = await response.json() as any;

                // 강력한 응답 추출 (Multi-path parsing)
                let content = '';
                if (data.message?.content) content = data.message.content;
                else if (data.choices?.[0]?.message?.content) content = data.choices[0].message.content;
                else if (data.choices?.[0]?.text) content = data.choices[0].text;
                else if (data.response) content = data.response;
                else if (typeof data === 'string') content = data;

                return content || '';
            } catch (error: any) {
                clearTimeout(timeoutId);
                lastError = error;
                if (error.name === 'AbortError') break;
                if (attempt === 3) break;
            }
        }
        throw lastError;
    }

    abstract execute(input: string, context?: string, signal?: AbortSignal, options?: AgentExecuteOptions): Promise<string>;
}

// Helper to combine signals (since AbortSignal.any is not always available in older Node)
function anySignal(signals: AbortSignal[]): AbortSignal {
    const controller = new AbortController();
    for (const signal of signals) {
        if (signal.aborted) {
            controller.abort();
            return signal;
        }
        signal.addEventListener('abort', () => controller.abort(), { once: true });
    }
    return controller.signal;
}

/**
 * Section outline shape produced by ChunkedWriter in the 'outline' role.
 * Tokens are kept minimal — heading is what the section is about, scope tells
 * the next call what facts to keep inside that section so adjacent sections
 * don't duplicate content.
 */
export interface SectionOutline {
    heading: string;
    scope: string;
}

/**
 * ChunkedWriter — single-agent replacement for the old 5-stage pipeline.
 *
 * Why this exists: the old pipeline (planner → researcher → reflector → writer
 * → synthesizer) was different *personas* in series, which (a) burned tokens
 * by repeating context at every hop and (b) drifted away from the user's
 * actual request because intermediate agents only saw earlier agents'
 * abstractions — never the original message. The user's intent was simpler:
 * **split the *answer* into chunks so each LLM call stays under the token
 * cap, then join.** That's what this class does.
 *
 * Flow inside `AgentEngine.runMission`:
 *   1. role='outline'  → 1 LLM call returns a JSON list of section outlines
 *                        (N = 1..MAX, the model decides based on expected
 *                        output length).
 *   2. role='section'  → N LLM calls, one per outline entry, each given the
 *                        original prompt + this section's scope + already-
 *                        written sections (truncated) so it can avoid
 *                        repeating earlier content.
 *   3. role='polish'   → 1 LLM call takes the joined draft and produces a
 *                        final clean copy (fixes typos, removes
 *                        hallucinations / unsupported claims, smooths flow).
 *
 * Every role uses the *same* model — no persona mismatch, no agent-to-agent
 * abstraction loss. The only thing that changes is the per-call system
 * prompt picked here based on `options.config.role`.
 */
export class ChunkedWriter extends BaseAgent {
    /**
     * Hard ceiling — *사용자 config 가 어떤 값이든 이걸 넘을 수 없다*. 안전망 의미.
     * 실제 사용 상한은 `getConfig().chunkedMaxSections` (default 3). 사용자가
     * Astra Settings 에서 1~10 사이 조정 가능, 이 상수가 그 위 절대 한도.
     */
    static readonly MAX_SECTIONS_HARD_CEILING = 10;

    private readonly outlinePersona = `You are a concise editor planning the structure of a Korean answer.
Decide how many sections the answer needs. The exact upper bound (MAX_N) is given in the user message below — never exceed it. Pick the *smallest* count that still covers the request well — a short factual question should be 0-1 section, a meaty analysis up to MAX_N.

Output STRICTLY a JSON array of objects: \`[{"heading": "...", "scope": "..."}]\`. No prose, no fences, no leading text.
- 🟢 **빈 배열 \`[]\`** = "쪼갤 필요 없음". 사용자 질문이 간단해서 단일 LLM 호출로 즉답이 더 빠르고 자연스러울 때 (예: 단순 사실 질문, 짧은 코드 한 줄, 정의 묻기). 시스템이 이걸 받으면 outline·section 단계 건너뛰고 1회 직답으로 처리한다.
- heading: a short Korean section label (≤ 24 chars). For 1-section answers, set heading to "본문".
- scope: one Korean sentence describing exactly what facts/points belong inside this section so adjacent sections don't overlap.

판단 기준:
- 답변이 한 단락 (대략 3~5문장) 이내로 완결 가능 → \`[]\`
- 본문 분석·여러 측면 비교·구조화된 보고서가 필요 → N개 섹션 (단, MAX_N 절대 초과 금지)

If the user attached source content (article/code/log) the sections must cover *that content*, not analysis methodology.`;

    private readonly sectionPersona = `You are writing ONE section of a longer Korean answer. You will be given:
- the user's original request (possibly with attached content),
- this section's heading + scope,
- the full outline (for context only — DO NOT write other sections),
- already-written previous sections (so you can avoid repeating them).

Rules:
- Stay strictly inside this section's scope. Do NOT cover other outline entries.
- Korean, plain markdown (no top-level "#" — the heading will be added by the joiner).
- 이모지 / 이모티콘 사용 금지 (📌 🎯 💡 ✅ 등). 사용자 명시 피드백.
- Pack facts. Avoid filler / executive summaries / closing remarks (the polish pass adds those).
- If the user attached source content, cite from it; do not invent facts.
- Do NOT output the heading itself — only the body of this section.`;

    private readonly polishPersona = `You are the final editor producing the user-facing Korean answer from a sectioned draft.

[Job]
1. Fix typos, broken markdown, inconsistent terminology.
2. Remove unsupported claims / hallucinations: if a sentence asserts a fact that isn't grounded in the user's request (or the earlier sections themselves), delete it. Better to be short than wrong.
3. Smooth section transitions and remove duplicated information across sections.
4. Preserve every factually grounded claim from the draft. Don't invent new facts.

[정리·리뷰·요약 self-check — 출력 직전에 반드시 머릿속으로 통과]
사용자가 원본을 첨부했거나 draft 가 원본 자료를 다루고 있을 때, 답변 출력 전에 다음 5가지를
스스로 점검. 어기면 그 부분 삭제·수정 후 출력.
(1) **사실 오류** — 원본의 고유명사·수치·비유·대응 관계가 정확히 옮겨졌나? 비유는
    방향이 뒤집히기 쉬움 (예: "A=자료실, B=공부방" 을 "B=자료실, A=공부방" 으로 뒤집기).
(2) **없는 내용 추가 금지** — 원본에 없는 *인과·순서·단계 구분* 을 만들지 말 것. "따라서",
    "그러므로", "단계별로", "A → B → C 순으로" 같은 표현이 답변에 들어가려 하면, 원본에
    그 흐름이 *명시* 돼 있는지 확인. 없으면 그 표현 빼거나 "(정리자 추론)" 로 라벨링.
(3) **원본 뉘앙스 유지** — "A 와 B 를 *동시에* 하라" 를 "A 후 B *순서로*" 로 바꾸는 식의
    양상(동시/순차/선택/필수) 변형 금지. 원본 표현 그대로 따옴표 인용 권장.
(4) **중요도 비례** — 원본의 핵심이 답변에서도 부각되고, 부가 디테일은 그에 비례한 분량만.
    본문 길이가 아니라 중요도에 비례해서 요약.
(5) **중복 제거** — 마지막 단락에서 앞 내용을 다시 요약·반복하지 말 것. 한 줄 요약이 있으면
    그 역할은 거기서 끝.

[답변 포맷 — Readability / Visibility]
사용자가 명시 피드백을 줘서 다음 포맷을 따른다. 답변 *복잡도* 에 따라 두 분기:

A. **본문이 길거나 여러 단위의 정보를 다룰 때** (대략 본문 250자 이상 / 비교·분석·계획·리뷰 등):
   1. 답변 첫 섹션 헤더는 정확히 \`## 한 줄 요약\` 으로 시작 (한국어 사용자 친화 — "TL;DR", "Summary", "요약" 같은 다른 표현 금지). 결론·핵심을 1~3문장으로 압축. 사용자가 본문을 다 안 읽어도 take-away 가 잡혀야 함. **헤더에 이모지 절대 사용 금지**.
   2. 본문은 \`##\` 또는 \`###\` subheading 으로 시각 분할. 한 덩어리 prose 금지.
   3. 비교 가능한 정보(장단점·옵션·항목별 평가)는 마크다운 표로. 순서·체크리스트는 \`- \` 불릿.
   4. 첫 문장 자체가 결론이어야 한다는 룰은 유지 — 한 줄 요약 안에서 첫 문장이 결론.

B. **짧은 직답 (1~3문장 정도로 충분한 경우)**:
   1. 한 줄 요약 / subheading 강제 안 함. 그냥 결론으로 직답.
   2. 인사·서문 없이 첫 문장이 답. ("좋은 질문입니다" "분석해보겠습니다" 금지)

[공통 규칙]
- 한국어 마크다운. 코드 블록은 실제 코드일 때만 (\`\`\`).
- **이모지·이모티콘 절대 사용 금지** — 헤더든 본문이든 📌 🎯 💡 ✅ ⚠️ 🚀 ❓ 🧩 등 모두 금지. 사용자가 시각 노이즈로 느낀다고 명시 피드백. 정보는 텍스트·표·불릿으로만 전달.
- 추론 과정·\`<think>\`·"Thinking Process:" 같은 hidden reasoning 절대 노출 금지.
- 본문 분기를 LLM 자신이 판단 — 사용자가 모드 명시 안 함.`;

    /**
     * Single-pass 직답 persona. 짧은 질문·정의 묻기·간단한 사실 확인처럼
     * 쪼갤 필요 없는 입력을 1회 호출로 끝낸다. outline → section → polish 의
     * 3회 LLM 호출을 통째로 우회 → 작은 모델로 즉답 가능.
     */
    private readonly directPersona = `You are answering a Korean user request in one shot. No outline, no drafting — just the final answer.

Rules:
- 첫 문장이 결론 / 직답이다. "분석해보겠습니다" "좋은 질문입니다" 같은 서문 금지.
- Korean. Plain markdown.
- **이모지 / 이모티콘 사용 금지** (📌 🎯 💡 ✅ ⚠️ 등 전부). 사용자 명시 피드백.
- 짧은 질문엔 짧은 답. 한 문장으로 충분하면 한 문장. 1~3문장 직답이면 헤더·표 없이 그냥 prose 로.
- 만약 답이 *예상보다 길어지거나* 여러 정보 단위를 다루게 되면 \`## 한 줄 요약\` 후 \`##\` subheading 으로 분할 (사용자가 Readability 위해 요청한 룰). 표·불릿도 활용. 헤더에 이모지 사용 금지.
- 사용자가 본문(코드·기사·로그)을 첨부했으면 그 본문에서 인용. 본문에 없는 사실 지어내지 말 것.
- 추론 과정·"Thinking:"·<think> 노출 금지.`;

    async execute(input: string, context?: string, signal?: AbortSignal, options?: AgentExecuteOptions): Promise<string> {
        const role = (options?.config?.role as string | undefined) || 'section';
        switch (role) {
            case 'outline': {
                // 호출자(AgentEngine)가 사용자 config 의 chunkedMaxSections 값을
                // options.config.maxSections 로 박아 넘긴다. 없으면 hard ceiling 사용
                // (실행 안 되어야 할 코드 경로 — 안전망).
                const maxN = (typeof options?.config?.maxSections === 'number' && options.config.maxSections > 0)
                    ? Math.min(ChunkedWriter.MAX_SECTIONS_HARD_CEILING, Math.floor(options.config.maxSections as number))
                    : ChunkedWriter.MAX_SECTIONS_HARD_CEILING;
                return this.callLLM(this.outlinePersona, this.buildOutlinePrompt(input, context, maxN), signal);
            }
            case 'polish':
                return this.callLLM(this.polishPersona, this.buildPolishPrompt(input, options), signal);
            case 'direct':
                return this.callLLM(this.directPersona, this.buildDirectPrompt(input, context), signal);
            case 'section':
            default:
                return this.callLLM(this.sectionPersona, this.buildSectionPrompt(input, context, options), signal);
        }
    }

    private buildOutlinePrompt(userRequest: string, brainContext?: string, maxN: number = ChunkedWriter.MAX_SECTIONS_HARD_CEILING): string {
        const ctx = brainContext && brainContext.trim().length > 0
            ? `\n\n[보조 지식 컨텍스트 — 답변에 직접 인용하기보단 분할 결정에만 참고]\n${brainContext.substring(0, 1200)}`
            : '';
        return `[사용자 요청 — 본문이 포함돼 있다면 그게 1차 자료입니다]\n${userRequest}${ctx}\n\n[제약]\nMAX_N = ${maxN} — 절대 ${maxN}개 초과 금지.\n\n위 요청에 대한 답변을 ${maxN}개 이내의 섹션으로 어떻게 나눌지 JSON 배열로만 출력하세요.`;
    }

    private buildSectionPrompt(input: string, brainContext?: string, options?: AgentExecuteOptions): string {
        const prior = options?.priorResults ?? {};
        const heading = prior.sectionHeading ?? '본문';
        const scope = prior.sectionScope ?? '사용자 요청 전체';
        const outlineJoined = prior.outlineSummary ?? '';
        const prev = prior.prevSectionsTrimmed ?? '';
        const originalPrompt = prior.originalPrompt ?? input;
        const ctx = brainContext && brainContext.trim().length > 0
            ? `\n\n[보조 지식 컨텍스트]\n${brainContext.substring(0, 2000)}`
            : '';
        return `[사용자 원본 요청]\n${originalPrompt}\n\n[이 섹션 정보]\nheading: ${heading}\nscope: ${scope}\n\n[전체 outline — 다른 섹션은 다루지 마세요]\n${outlineJoined}\n\n[이미 작성된 섹션들 — 중복 금지]\n${prev || '(없음 — 첫 섹션)'}${ctx}\n\n위 scope만 다루는 섹션 본문을 작성하세요. heading 줄은 출력하지 말고 본문만.`;
    }

    private buildPolishPrompt(draft: string, options?: AgentExecuteOptions): string {
        const prior = options?.priorResults ?? {};
        const originalPrompt = prior.originalPrompt ?? '(원본 요청 없음)';
        return `[사용자 원본 요청]\n${originalPrompt}\n\n[섹션별 초안 — 이것을 다듬어 최종 답변으로]\n${draft}\n\n위 초안을 사용자에게 보낼 최종본으로 다듬으세요. 새 사실 추가 금지, 근거 없는 주장은 제거.`;
    }

    private buildDirectPrompt(userRequest: string, brainContext?: string): string {
        const ctx = brainContext && brainContext.trim().length > 0
            ? `\n\n[보조 지식 컨텍스트 — 필요할 때만 인용]\n${brainContext.substring(0, 2000)}`
            : '';
        return `[사용자 요청]\n${userRequest}${ctx}\n\n위 요청에 대한 최종 답변을 1회로 끝내세요.`;
    }
}