v2.2.256: 코어 채팅 큰 입력 청킹·통합 + 실제 컨텍스트 창 정렬 + 모델 핸들 race 수정

큰 입력 시 "Failed to acquire LM Studio model handle … Operation canceled" 로 턴 전체가 죽던 문제를 3계층으로 해결. 일반 채팅(코어 경로)은 그동안 단일 예산 호출이라 약한 모델·큰 입력에서 무너졌다 — 그 갭을 메움. - 핸들 race 수정: getModelHandle 을 재시도 루프 안으로 이동. 취소/죽은-핸들 류 에러는 SDK 재생성 후 1회 자동 재시도(실제 사용자 취소는 존중). 라이프 사이클의 동시 로드가 abort 되며 SDK 가 coalesce 한 JIT 조회까지 죽던 것. - Phase 1 실제 창 정렬: llm.getContextLength()(캐시)로 실측 창에 예산 클램프. 설정값보다 작은 창으로 로드된 경우 서버 truncation/빈 답변 차단. 배지에 표시. - Phase 2 코어 Map-Reduce: 단일 입력이 (유효 창 × ratio) 초과 시 청크→질의 인지형 추출→통합. 부분/전체 폴백, 무관 시 정직 신호. 동시성 기본 2. - Phase 3 메타 노출: 진행/결과 배지 표시, [조각 k] 출처 옵트인. 신규 설정 5종. /meet·/review 전용 경로는 불변. 테스트 +25건, 전체 684 통과. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-06-19 18:05:44 +09:00
parent 6adbc2a6fa
commit 76d5fedfb5
13 changed files with 883 additions and 19 deletions
@@ -19,6 +19,13 @@ export interface ComputeBudgetedRequestInput {
    /** Result of `getConfig()` — reads contextLength, maxOutputTokens, contextSafetyMargin, smallModelContextCap, autoCompactHistory. */
    config: any;
    imageCount: number;
+    /**
+     * The model's *actually-loaded* context window (LM Studio `getContextLength()`),
+     * when known. Budgeting uses the smaller of this and `config.contextLength` so we
+     * never overflow a model loaded with a smaller window than the user's setting.
+     * Omit (undefined) to budget against the configured value alone (prior behavior).
+     */
+    actualContextLength?: number;
 }

 export interface ComputeBudgetedRequestResult {
@@ -34,6 +41,10 @@ export interface ComputeBudgetedRequestResult {
    outputBudget: { maxOutputTokens: number; available: number; tight: boolean };
    modelParamB: number | null;
    cappedForSmallModel: boolean;
+    /** True when the model's real loaded window is smaller than `config.contextLength` (we clamped to the real one). */
+    windowMismatch: boolean;
+    /** The window actually used for budgeting (after real-window clamp + small-model cap). */
+    effectiveContextLength: number;
 }

 /**
@@ -60,15 +71,34 @@ export function computeBudgetedRequest(input: ComputeBudgetedRequestInput): Comp
    // smaller effective window. Never applied to 4B+ models, and never when the setting is 0 —
    // capping squeezes the output-token budget, so it's a knob, not a default.
    const modelParamB = estimateModelParamsB(actualModel);
+
+    // The real ceiling is whatever window the model was actually loaded with — the
+    // server truncates anything past it. When known, clamp the configured setting
+    // down to it so we budget against the smaller of the two. (When unknown, keep
+    // the configured value — prior behavior.)
+    const actualWindow = (typeof input.actualContextLength === 'number'
+        && Number.isFinite(input.actualContextLength)
+        && input.actualContextLength > 0)
+        ? input.actualContextLength
+        : undefined;
+    const configuredWindow = config.contextLength;
+    const windowMismatch = actualWindow !== undefined && actualWindow < configuredWindow;
+    const realWindow = actualWindow !== undefined ? Math.min(configuredWindow, actualWindow) : configuredWindow;
+    if (windowMismatch) {
+        logInfo('Model loaded with a smaller context window than the setting — clamping budget to the real window.', {
+            model: actualModel, configuredWindow, actualWindow,
+        });
+    }
+
    const smallModelCap = config.smallModelContextCap; // 0 = disabled (default)
    const cappedForSmallModel = smallModelCap > 0
        && modelParamB !== null && modelParamB <= 3
-        && config.contextLength > smallModelCap;
-    const effectiveContextLength = cappedForSmallModel ? smallModelCap : config.contextLength;
+        && realWindow > smallModelCap;
+    const effectiveContextLength = cappedForSmallModel ? smallModelCap : realWindow;
    if (cappedForSmallModel) {
        logInfo('Small model detected — capping effective context window for budgeting.', {
            model: actualModel, paramB: modelParamB,
-            nominalContext: config.contextLength, effectiveContext: effectiveContextLength,
+            nominalContext: realWindow, effectiveContext: effectiveContextLength,
        });
    }
    const ctxLimits: ContextLimits = {
@@ -157,5 +187,7 @@ export function computeBudgetedRequest(input: ComputeBudgetedRequestInput): Comp
        outputBudget,
        modelParamB,
        cappedForSmallModel,
+        windowMismatch,
+        effectiveContextLength,
    };
 }