Update project files

This commit is contained in:
2026-05-22 15:00:14 +09:00
parent 132d130ff1
commit 8016ef18fa
29 changed files with 1353 additions and 804 deletions
+83 -4
View File
@@ -183,6 +183,25 @@ export class AgentExecutor {
static readonly ABS_PATH_RE = new RegExp(POSIX_ABS_PATH_SRC, 'i');
static readonly WIN_ABS_PATH_RE = new RegExp(WIN_ABS_PATH_SRC, 'i');
/**
* Hard cap on retained in-memory chat messages. Older messages beyond this
* are dropped (the system/first message is always preserved). Generous so a
* normal session is untouched — this only fights unbounded growth in very
* long-running sessions. The per-request context budgeter
* (`trimHistoryToBudget`) still does the real fitting; this just stops the
* array itself from leaking memory across hundreds of turns.
*/
private static readonly MAX_RETAINED_MESSAGES = 40;
/**
* Older internal tool-result messages (read_file / list_files / list_brain /
* read_brain dumps) are the bulkiest part of history and add little once the
* conversation has moved on. Anything older than the most recent
* `RECENT_FULL_MESSAGES` gets its bulky tool-result content shrunk to this
* many characters. Recent messages are kept full for conversation continuity.
*/
private static readonly RECENT_FULL_MESSAGES = 16;
private static readonly OLD_TOOL_RESULT_CAP = 600;
private chatHistory: ChatMessage[] = [];
private abortController: AbortController | null = null;
private webview: vscode.Webview | undefined;
@@ -225,9 +244,10 @@ export class AgentExecutor {
// Initialize 5-Layer Cognitive Memory System
const activeBrain = getActiveBrainProfile();
const initConfig = getConfig();
this.memoryManager = new MemoryManager(activeBrain.localBrainPath, {
enabled: getConfig().memoryEnabled,
shortTermLimit: getConfig().memoryShortTermMessages,
enabled: initConfig.memoryEnabled,
shortTermLimit: initConfig.memoryShortTermMessages,
});
// Initialize RAG Pipeline Orchestrator
@@ -495,6 +515,9 @@ export class AgentExecutor {
// 3. API Request Setup (라인 229에서 이미 추출한 ollamaUrl, configDefaultModel 재사용)
const actualModel = (modelName && modelName.trim()) || configDefaultModel;
// Bound the in-memory history before building the request — shrinks bulky
// older tool-result bodies and drops the oldest messages past the cap.
this.capChatHistory();
const reqMessages = this.buildRequestHistory(this.chatHistory);
// Handle Vision Content Injection
@@ -666,10 +689,22 @@ export class AgentExecutor {
.reduce((n, m) => n + (Array.isArray(m?.images) ? m.images.length : 0), 0);
const imageTokenReserve = imageCount * 1024;
// Output budget we ACTUALLY reserve before trimming — not the bare
// minOutputTokens floor (512). If we only reserve 512, a long session
// is allowed to grow the prompt until ~512-1k tokens remain for the
// answer; small/MoE local models (e.g. gemma 4B-active) then emit EOS
// as the first token and return an empty response. Reserving ~10% of
// the window (>=2048) forces history/system trimming to keep a real
// answer-sized hole open. Capped at maxOutputTokens.
const preferredOutputReserve = Math.min(
ctxLimits.maxOutputTokens,
Math.max(2048, Math.floor(ctxLimits.contextLength * 0.1))
);
// (1) 시스템 프롬프트는 예산의 ~65%까지만 허용 — 그 이상이면 [CONTEXT] 블록부터 잘라낸다.
const systemCapTokens = Math.max(
1024,
Math.floor((ctxLimits.contextLength - ctxLimits.safetyMargin - ctxLimits.minOutputTokens - imageTokenReserve) * 0.65)
Math.floor((ctxLimits.contextLength - ctxLimits.safetyMargin - preferredOutputReserve - imageTokenReserve) * 0.65)
);
const { prompt: budgetedSystemPrompt, truncated: systemTruncated } =
truncateSystemPromptContext(fullSystemPrompt, systemCapTokens);
@@ -681,7 +716,7 @@ export class AgentExecutor {
// (2) 대화 기록 압축.
const historyBudget = Math.max(
256,
ctxLimits.contextLength - systemTokens - ctxLimits.safetyMargin - ctxLimits.minOutputTokens - imageTokenReserve
ctxLimits.contextLength - systemTokens - ctxLimits.safetyMargin - preferredOutputReserve - imageTokenReserve
);
let budgetedHistory: ChatMessage[] = reqMessages;
if (config.autoCompactHistory) {
@@ -1977,6 +2012,50 @@ export class AgentExecutor {
].join('\n');
}
/**
* Bound the in-memory `chatHistory` so a very long-running session does not
* grow it without limit:
* 1. Older internal tool-result messages (the bulky read_file/list_files/…
* dumps) beyond the most recent `RECENT_FULL_MESSAGES` have their content
* truncated — recent messages stay full so continuity is unaffected.
* 2. If the array still exceeds `MAX_RETAINED_MESSAGES`, the oldest messages
* are dropped, but a leading system/first message is always preserved so
* session restore and conversation framing are not broken.
* This only mutates *internal* (`internal: true`) tool-result bodies and
* drops the very oldest entries — it never alters visible user/assistant text
* within the retained window, so the request the model sees is unchanged for
* any normal-length conversation.
*/
private capChatHistory(): void {
const history = this.chatHistory;
if (history.length === 0) return;
// (1) Shrink bulky tool-result bodies of older internal messages.
const recentStart = Math.max(0, history.length - AgentExecutor.RECENT_FULL_MESSAGES);
for (let i = 0; i < recentStart; i++) {
const msg = history[i];
if (msg.role !== 'system' || !msg.internal || typeof msg.content !== 'string') continue;
// Only the bulky tool-result dumps — leave compaction notices etc. alone.
if (!/^\[Result of (read_file|list_files|list_brain|read_brain)\b/.test(msg.content)) continue;
if (msg.content.length <= AgentExecutor.OLD_TOOL_RESULT_CAP) continue;
msg.content = msg.content.slice(0, AgentExecutor.OLD_TOOL_RESULT_CAP)
+ '\n…[이전 도구 결과는 컨텍스트 절약을 위해 축약되었습니다]';
}
// (2) Drop the oldest messages once over the hard cap, preserving a
// leading system/first message if present.
if (history.length > AgentExecutor.MAX_RETAINED_MESSAGES) {
const first = history[0];
const preserveFirst = first.role === 'system';
const overflow = history.length - AgentExecutor.MAX_RETAINED_MESSAGES;
if (preserveFirst) {
history.splice(1, overflow);
} else {
history.splice(0, overflow);
}
}
}
private buildRequestHistory(history: ChatMessage[]): ChatMessage[] {
return history.map((message) => {
if (message.role !== 'assistant' || typeof message.content !== 'string') {