feat: v2.2.92 → v2.2.158 — god-file 분해 + Stocks feature + 대화 연속성

R56–R59: agent.ts 2731→1529줄 god-file 분해 (25 modules) · attrParsers + LLM 메서드 8개 (callNonStreaming, streamChatOnce 등) · executeActions 415줄 → 8 handler 그룹 (file/run/list/brain/calendar/sheets/tasks) · handlePrompt 1100줄 → 7 phase 모듈 (system prompt + budget + autoContinue 등) R50–R55: extension.ts 1145→349줄 (telegram/settings/provider commands 분리) Stocks feature 신규: /stocks slash command (v2.2.152~158) · .astra/stocks.json 저장소 + Yahoo Finance 현재가 갱신 · 8 키워드 필터 (ROE/성장성/유동성/수익성/영업효율/기술력/안정성/PBR) · Naver 시가총액 페이지 JSON API (m.stock.naver.com) 발굴 · LLM Top 5 매력도 분석 + Telegram 자동 보고서 · KST 09:00/15:00 watcher 자동 모니터링 대화 연속성 (v2.2.150~157): · [PRIOR TURN CONCLUSION] block 으로 직전 결론 anchor · thin follow-up 분류 → boilerplate 헤더 suppression · slash 명령 결과 chatHistory mirror (capture wrapper) · echo/parrot 금지 system prompt rule 기타: /stocks 슬래시 자동완성 dropdown UI, Naver JSON API 전환 (cheerio 제거) Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-05-25 09:59:32 +09:00
parent 4153f640c2
commit 0a97324f1b
149 changed files with 14628 additions and 6927 deletions
@@ -0,0 +1,161 @@
+import { logInfo, logError } from '../../utils';
+import type { ChatMessage } from '../../agent';
+import {
+    estimateTokens,
+    estimateMessagesTokens,
+    computeOutputBudget,
+    trimHistoryToBudget,
+    truncateSystemPromptContext,
+    estimateModelParamsB,
+    type ContextLimits,
+} from '../../lib/contextManager';
+import { buildDroppedHistorySummary } from '../../lib/contextBuilders/droppedHistorySummary';
+
+export interface ComputeBudgetedRequestInput {
+    fullSystemPrompt: string;
+    /** Caller is expected to have run `capChatHistory` on this already. */
+    reqMessages: ChatMessage[];
+    actualModel: string;
+    /** Result of `getConfig()` — reads contextLength, maxOutputTokens, contextSafetyMargin, smallModelContextCap, autoCompactHistory. */
+    config: any;
+    imageCount: number;
+}
+
+export interface ComputeBudgetedRequestResult {
+    messagesForRequest: ChatMessage[];
+    ctxLimits: ContextLimits;
+    inputTokens: number;
+    maxOutputTokens: number;
+    systemTokens: number;
+    systemTruncated: boolean;
+    droppedHistoryCount: number;
+    budgetedHistoryLength: number;
+    /** Exact return shape of `computeOutputBudget`. */
+    outputBudget: { maxOutputTokens: number; available: number; tight: boolean };
+    modelParamB: number | null;
+    cappedForSmallModel: boolean;
+}
+
+/**
+ * 입력(시스템 프롬프트 + 대화 기록 + 이미지)을 컨텍스트 윈도우 예산에 맞게 정리하고
+ * 최종 요청 메시지 배열과 동적 출력 상한을 계산합니다.
+ *
+ * 호출 측에서 미리 capChatHistory 로 메시지 개수를 캡한 뒤 넘겨주는 것을 전제로 합니다
+ * (AgentExecutor.MAX_RETAINED_MESSAGES 같은 정적 한도는 이 함수의 관심사가 아닙니다).
+ */
+export function computeBudgetedRequest(input: ComputeBudgetedRequestInput): ComputeBudgetedRequestResult {
+    const { fullSystemPrompt, reqMessages, actualModel, config, imageCount } = input;
+
+    // ──────────────────────────────────────────────────────────────────
+    // [Context Limit Manager] context length 는 "답변을 그만큼 길게 써도 된다"
+    // 는 뜻이 아니다: 시스템 프롬프트 + 대화 기록 + 입력 + 생성될 답변 + 여유분 ≤ context length.
+    // 요청을 보내기 전에 입력 토큰을 추정해서
+    //   (1) 시스템 프롬프트가 과하면 [CONTEXT] 블록을 마지막 수단으로 줄이고
+    //   (2) 대화 기록을 남은 예산에 맞게 압축하고 (UI 표시용 chatHistory 는 건드리지 않음)
+    //   (3) 동적으로 출력 상한(maxOutputTokens)을 계산한다.
+    // ──────────────────────────────────────────────────────────────────
+    // Optional opt-in guard (g1nation.smallModelContextCap, OFF/0 by default): some very small
+    // models (≤3B) emit EOS as the first token when the prompt is near their context window
+    // even though it nominally fits. If the user opted in, budget ≤3B models against that
+    // smaller effective window. Never applied to 4B+ models, and never when the setting is 0 —
+    // capping squeezes the output-token budget, so it's a knob, not a default.
+    const modelParamB = estimateModelParamsB(actualModel);
+    const smallModelCap = config.smallModelContextCap; // 0 = disabled (default)
+    const cappedForSmallModel = smallModelCap > 0
+        && modelParamB !== null && modelParamB <= 3
+        && config.contextLength > smallModelCap;
+    const effectiveContextLength = cappedForSmallModel ? smallModelCap : config.contextLength;
+    if (cappedForSmallModel) {
+        logInfo('Small model detected — capping effective context window for budgeting.', {
+            model: actualModel, paramB: modelParamB,
+            nominalContext: config.contextLength, effectiveContext: effectiveContextLength,
+        });
+    }
+    const ctxLimits: ContextLimits = {
+        contextLength: effectiveContextLength,
+        maxOutputTokens: config.maxOutputTokens,
+        safetyMargin: config.contextSafetyMargin,
+        minOutputTokens: 512,
+    };
+    const imageTokenReserve = imageCount * 1024;
+
+    // Output budget we ACTUALLY reserve before trimming — not the bare
+    // minOutputTokens floor (512). If we only reserve 512, a long session
+    // is allowed to grow the prompt until ~512-1k tokens remain for the
+    // answer; small/MoE local models (e.g. gemma 4B-active) then emit EOS
+    // as the first token and return an empty response. Reserving ~10% of
+    // the window (>=2048) forces history/system trimming to keep a real
+    // answer-sized hole open. Capped at maxOutputTokens.
+    const preferredOutputReserve = Math.min(
+        ctxLimits.maxOutputTokens,
+        Math.max(2048, Math.floor(ctxLimits.contextLength * 0.1))
+    );
+
+    // (1) 시스템 프롬프트는 예산의 ~65%까지만 허용 — 그 이상이면 [CONTEXT] 블록부터 잘라낸다.
+    const systemCapTokens = Math.max(
+        1024,
+        Math.floor((ctxLimits.contextLength - ctxLimits.safetyMargin - preferredOutputReserve - imageTokenReserve) * 0.65)
+    );
+    const { prompt: budgetedSystemPrompt, truncated: systemTruncated } =
+        truncateSystemPromptContext(fullSystemPrompt, systemCapTokens);
+    if (systemTruncated) {
+        logInfo('System prompt context truncated to fit the context window.', { model: actualModel, systemCapTokens });
+    }
+    const systemTokens = estimateTokens(budgetedSystemPrompt) + 4;
+
+    // (2) 대화 기록 압축.
+    const historyBudget = Math.max(
+        256,
+        ctxLimits.contextLength - systemTokens - ctxLimits.safetyMargin - preferredOutputReserve - imageTokenReserve
+    );
+    let budgetedHistory: ChatMessage[] = reqMessages;
+    if (config.autoCompactHistory) {
+        // v2.2.69 — dropped 메시지를 받아 heuristic 요약을 만든 뒤 한 system 메시지로 prepend.
+        // 단순 count 마커는 "이전에 무슨 얘기를 했는지" 를 전혀 알려주지 않아 후속 턴에서 모델이
+        // 맥락을 잃어버리는 회귀를 낳았다. 이제는 U1/A1/U2/A2 골자가 남아 sliding window 가 동작.
+        const trim = trimHistoryToBudget<ChatMessage>(reqMessages, historyBudget, (_n, dropped) => ({
+            role: 'system',
+            content: buildDroppedHistorySummary(dropped),
+            internal: true,
+        }));
+        budgetedHistory = trim.messages;
+        if (trim.droppedCount > 0) {
+            logInfo('Conversation history compacted to fit the context window (with summary).', {
+                model: actualModel, droppedCount: trim.droppedCount, historyBudget,
+            });
+        }
+    }
+
+    const messagesForRequest: ChatMessage[] = [
+        { role: 'system', content: budgetedSystemPrompt, internal: true },
+        ...budgetedHistory
+    ];
+
+    // (3) 동적 출력 상한.
+    const inputTokens = estimateMessagesTokens(messagesForRequest) + imageTokenReserve;
+    const outputBudget = computeOutputBudget(inputTokens, ctxLimits);
+    const maxOutputTokens = outputBudget.maxOutputTokens;
+    if (outputBudget.tight) {
+        logError('Prompt nearly fills the context window — output budget is at the minimum.', {
+            model: actualModel, contextLength: ctxLimits.contextLength, inputTokens, maxOutputTokens,
+        });
+    }
+    logInfo('Context budget computed.', {
+        model: actualModel, contextLength: ctxLimits.contextLength,
+        inputTokens, maxOutputTokens, droppedHistory: reqMessages.length - budgetedHistory.length,
+    });
+
+    return {
+        messagesForRequest,
+        ctxLimits,
+        inputTokens,
+        maxOutputTokens,
+        systemTokens,
+        systemTruncated,
+        droppedHistoryCount: reqMessages.length - budgetedHistory.length,
+        budgetedHistoryLength: budgetedHistory.length,
+        outputBudget,
+        modelParamB,
+        cappedForSmallModel,
+    };
+}