v2.2.256: 코어 채팅 큰 입력 청킹·통합 + 실제 컨텍스트 창 정렬 + 모델 핸들 race 수정

큰 입력 시 "Failed to acquire LM Studio model handle … Operation canceled"
로 턴 전체가 죽던 문제를 3계층으로 해결. 일반 채팅(코어 경로)은 그동안
단일 예산 호출이라 약한 모델·큰 입력에서 무너졌다 — 그 갭을 메움.

- 핸들 race 수정: getModelHandle 을 재시도 루프 안으로 이동. 취소/죽은-핸들
  류 에러는 SDK 재생성 후 1회 자동 재시도(실제 사용자 취소는 존중). 라이프
  사이클의 동시 로드가 abort 되며 SDK 가 coalesce 한 JIT 조회까지 죽던 것.
- Phase 1 실제 창 정렬: llm.getContextLength()(캐시)로 실측 창에 예산 클램프.
  설정값보다 작은 창으로 로드된 경우 서버 truncation/빈 답변 차단. 배지에 표시.
- Phase 2 코어 Map-Reduce: 단일 입력이 (유효 창 × ratio) 초과 시 청크→질의
  인지형 추출→통합. 부분/전체 폴백, 무관 시 정직 신호. 동시성 기본 2.
- Phase 3 메타 노출: 진행/결과 배지 표시, [조각 k] 출처 옵트인.

신규 설정 5종. /meet·/review 전용 경로는 불변. 테스트 +25건, 전체 684 통과.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-06-19 18:05:44 +09:00
parent 6adbc2a6fa
commit 76d5fedfb5
13 changed files with 883 additions and 19 deletions
+94
View File
@@ -140,6 +140,7 @@ export { _parseTaskAttrs, _parseSheetAttrs, _parseCalEventAttrs };
// 8 method bodies extracted to dedicated modules. AgentExecutor 의 동명 메서드는
// 이제 thin wrapper — deps 객체를 묶어서 free function 으로 위임.
import { callNonStreaming as callNonStreamingFn } from './agent/llm/callNonStreaming';
import { runMapReduce, shouldMapReduce } from './agent/handlePrompt/largeInputMapReduce';
import { createStreamingRequest as createStreamingRequestFn } from './agent/llm/createStreamingRequest';
import { streamChatOnce as streamChatOnceFn } from './agent/llm/streamChatOnce';
import { maybeEmitDevilRebuttal as maybeEmitDevilRebuttalFn } from './agent/llm/devilRebuttal';
@@ -768,12 +769,103 @@ export class AgentExecutor {
// Context budget computation → src/agent/handlePrompt/computeBudgetedRequest.ts
const imageCount = (reqMessages as any[])
.reduce((n, m) => n + (Array.isArray(m?.images) ? m.images.length : 0), 0);
// Budget against the model's REAL loaded window, not just the user's
// contextLength setting. Best-effort + cached; only for the LM Studio
// SDK path (REST/Ollama/cloud expose no such query → undefined → prior behavior).
let actualContextLength: number | undefined;
try {
const _isCloud = (() => {
try {
const { parseModelPrefix } = require('./features/providers') as typeof import('./features/providers');
return !!parseModelPrefix(actualModel);
} catch { return false; }
})();
if (!_isCloud
&& resolveEngine(ollamaUrl) === 'lmstudio'
&& this.options.lmStudioStreamer?.getModelContextLength) {
actualContextLength = await this.options.lmStudioStreamer.getModelContextLength(actualModel);
}
} catch { /* best-effort — fall back to configured window */ }
// ── Large-input Map-Reduce ────────────────────────────────────────
// When a SINGLE user message is too big to fit the (real) window,
// history-trimming can't help — you can't drop the current question.
// Chunk it, extract only the request-relevant facts per chunk, and
// integrate, then let the normal streaming path answer from the
// condensed context. Only the user-visible turn; casual chat skipped.
if (loopDepth === 0 && !isCasualConversation && config.largeInputMapReduce) {
try {
const effWindow = (typeof actualContextLength === 'number' && actualContextLength > 0)
? Math.min(config.contextLength, actualContextLength)
: config.contextLength;
const lastUserIdx = reqMessages.map((m) => m.role).lastIndexOf('user');
const lastUser = lastUserIdx >= 0 ? reqMessages[lastUserIdx] : undefined;
const content = typeof lastUser?.content === 'string' ? lastUser.content : '';
const sysTokens = estimateTokens(fullSystemPrompt) + 4;
const mrCfg = {
enabled: true,
triggerRatio: config.mapReduceTriggerRatio,
concurrency: config.mapReduceConcurrency,
maxDepth: config.mapReduceMaxDepth,
showProvenance: config.mapReduceShowProvenance,
};
if (lastUser && shouldMapReduce(estimateTokens(content), effWindow, mrCfg)) {
const intent = content.length > 1400
? `${content.slice(0, 800)}\n…\n${content.slice(-400)}`
: content;
const mrEngine = resolveEngine(ollamaUrl);
this.webview?.postMessage({ type: 'mapReduceStatus', value: { phase: 'start' } });
const mr = await runMapReduce(
{
callLLM: async (messages, maxTokens) => {
const r = await this.callNonStreaming({
baseUrl: ollamaUrl,
modelName: actualModel,
engine: mrEngine,
messages,
temperature: 0.1,
maxTokens,
contextLength: effWindow,
signal: this.abortController?.signal,
});
return r.text;
},
estimateTokens,
log: (msg, meta) => logInfo(msg, meta),
signal: this.abortController?.signal,
},
{ intent, largeContent: content, windowTokens: effWindow, systemTokens: sysTokens, safetyMargin: config.contextSafetyMargin, cfg: mrCfg },
);
// allIrrelevant → keep original (budgeter truncates) rather than forcing an empty context.
if (!mr.allIrrelevant && mr.condensedContext.trim()) {
reqMessages[lastUserIdx] = {
...lastUser,
content: `${intent}\n\n──────── 추출된 관련 자료 (원본 ${mr.chunkCount}조각 중 ${mr.relevantCount}조각, 통합 ${mr.reduceDepth}단계) ────────\n${mr.condensedContext}`,
} as any;
logInfo('Large input condensed via map-reduce.', {
model: actualModel, chunkCount: mr.chunkCount, relevantCount: mr.relevantCount, reduceDepth: mr.reduceDepth,
});
}
this.webview?.postMessage({
type: 'mapReduceStatus',
value: { phase: 'done', chunkCount: mr.chunkCount, relevantCount: mr.relevantCount, allIrrelevant: mr.allIrrelevant },
});
}
} catch (e: any) {
// Any failure → fall through to the normal (single-shot) path. Worst case the
// budgeter truncates the oversized input, which is the prior behavior.
logError('Large-input map-reduce failed — falling back to single-shot path.', { error: e?.message ?? String(e) });
this.webview?.postMessage({ type: 'mapReduceStatus', value: { phase: 'error' } });
}
}
const _budget = computeBudgetedRequest({
fullSystemPrompt,
reqMessages,
actualModel,
config,
imageCount,
actualContextLength,
});
const messagesForRequest = _budget.messagesForRequest;
const ctxLimits = _budget.ctxLimits;
@@ -819,6 +911,8 @@ export class AgentExecutor {
paramB: modelParamB,
contextLength: ctxLimits.contextLength,
nominalContextLength: config.contextLength,
actualContextLength,
windowMismatch: _budget.windowMismatch,
cappedForSmallModel,
inputTokens,
maxOutputTokens,