v2.2.256: 코어 채팅 큰 입력 청킹·통합 + 실제 컨텍스트 창 정렬 + 모델 핸들 race 수정
큰 입력 시 "Failed to acquire LM Studio model handle … Operation canceled" 로 턴 전체가 죽던 문제를 3계층으로 해결. 일반 채팅(코어 경로)은 그동안 단일 예산 호출이라 약한 모델·큰 입력에서 무너졌다 — 그 갭을 메움. - 핸들 race 수정: getModelHandle 을 재시도 루프 안으로 이동. 취소/죽은-핸들 류 에러는 SDK 재생성 후 1회 자동 재시도(실제 사용자 취소는 존중). 라이프 사이클의 동시 로드가 abort 되며 SDK 가 coalesce 한 JIT 조회까지 죽던 것. - Phase 1 실제 창 정렬: llm.getContextLength()(캐시)로 실측 창에 예산 클램프. 설정값보다 작은 창으로 로드된 경우 서버 truncation/빈 답변 차단. 배지에 표시. - Phase 2 코어 Map-Reduce: 단일 입력이 (유효 창 × ratio) 초과 시 청크→질의 인지형 추출→통합. 부분/전체 폴백, 무관 시 정직 신호. 동시성 기본 2. - Phase 3 메타 노출: 진행/결과 배지 표시, [조각 k] 출처 옵트인. 신규 설정 5종. /meet·/review 전용 경로는 불변. 테스트 +25건, 전체 684 통과. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
+61
-12
@@ -83,6 +83,12 @@ export interface IChatStreamer {
|
||||
* silently-disposed handle that needs a fresh WebSocket round-trip.
|
||||
*/
|
||||
resetHandle?(modelName: string): Promise<void>;
|
||||
/**
|
||||
* The model's actually-loaded context window in tokens, or `undefined` if
|
||||
* unavailable. Callers use this to budget against the real ceiling instead
|
||||
* of the user's `contextLength` setting. Best-effort — never throws.
|
||||
*/
|
||||
getModelContextLength?(modelName: string): Promise<number | undefined>;
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -115,7 +121,28 @@ export class LMStudioStreamer implements IChatStreamer {
|
||||
// would duplicate tokens.
|
||||
for (let attempt = 1; attempt <= 2; attempt++) {
|
||||
const refresh = attempt > 1;
|
||||
const model = await this.client.getModelHandle(trimmedModel, refresh ? { refresh: true } : undefined);
|
||||
// Handle acquisition is guarded on its own: it happens BEFORE the
|
||||
// stream try/catch below, so without this an "Operation canceled"
|
||||
// (the lifecycle manager's concurrent load for this same model was
|
||||
// superseded/aborted and the SDK coalesced our JIT lookup into that
|
||||
// dead load), a disposed handle, or a dropped WebSocket would crash
|
||||
// the whole turn with no retry. Large inputs make this far more
|
||||
// likely: loading a big model to hold a large prompt is slow, which
|
||||
// widens the window for a concurrent switch/abort to land mid-load.
|
||||
let model: Awaited<ReturnType<ILMStudioClient['getModelHandle']>>;
|
||||
try {
|
||||
model = await this.client.getModelHandle(trimmedModel, refresh ? { refresh: true } : undefined);
|
||||
} catch (acqErr: any) {
|
||||
// Genuine user cancel — don't retry, just stop quietly.
|
||||
if (req.signal?.aborted || acqErr?.name === 'AbortError') return;
|
||||
const acqMsg = String(acqErr?.message ?? acqErr);
|
||||
if (this.isTransientHandleError(acqMsg) && attempt === 1) {
|
||||
logInfo('LM Studio model handle acquisition hit a transient error — retrying with a fresh SDK.', { model: trimmedModel, error: acqMsg });
|
||||
continue; // attempt 2 passes { refresh: true } → recreates the SDK client
|
||||
}
|
||||
logError('LM Studio model handle acquisition failed.', { model: trimmedModel, error: acqMsg, attempt });
|
||||
throw acqErr;
|
||||
}
|
||||
logInfo('LM Studio SDK chat stream started.', { model: trimmedModel, messageCount: req.messages.length, attempt });
|
||||
|
||||
// Sampling defaults match the historical glitch-suppression preset for small /
|
||||
@@ -216,17 +243,7 @@ export class LMStudioStreamer implements IChatStreamer {
|
||||
}
|
||||
|
||||
const errMsg = String(caught?.message ?? caught);
|
||||
// Broaden the "handle is bound to a dead WebSocket binding" detection. All of
|
||||
// these resolve with the same fix (recreate the SDK client so the next
|
||||
// llm.model() lookup mints a fresh handle).
|
||||
const handleDead =
|
||||
/\bdisposed\b/i.test(errMsg)
|
||||
|| /lock\(\) request could not be registered/i.test(errMsg)
|
||||
|| /channel\s+closed/i.test(errMsg)
|
||||
|| /WebSocket\s+(?:is\s+not\s+open|closed|disconnected)/i.test(errMsg)
|
||||
|| /Connection\s+(?:lost|reset|closed)/i.test(errMsg)
|
||||
|| /\bECONNRESET\b/i.test(errMsg)
|
||||
|| /socket\s+hang\s*up/i.test(errMsg);
|
||||
const handleDead = this.isTransientHandleError(errMsg);
|
||||
|
||||
if (handleDead && yielded === 0 && attempt === 1) {
|
||||
logInfo('Dead LM Studio handle detected — retrying with a fresh SDK.', { model: trimmedModel, error: errMsg });
|
||||
@@ -238,6 +255,38 @@ export class LMStudioStreamer implements IChatStreamer {
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* True when an error message indicates the SDK handle / WebSocket binding is
|
||||
* dead, or its in-flight (coalesced) load was canceled out from under us —
|
||||
* all fixable by recreating the SDK client so the next `llm.model()` lookup
|
||||
* mints a fresh handle. Deliberately excludes genuine user aborts, which are
|
||||
* caught earlier via `req.signal.aborted` / `AbortError` before reaching here.
|
||||
*/
|
||||
private isTransientHandleError(errMsg: string): boolean {
|
||||
return (
|
||||
/\bdisposed\b/i.test(errMsg)
|
||||
|| /lock\(\) request could not be registered/i.test(errMsg)
|
||||
|| /channel\s+closed/i.test(errMsg)
|
||||
|| /WebSocket\s+(?:is\s+not\s+open|closed|disconnected)/i.test(errMsg)
|
||||
|| /Connection\s+(?:lost|reset|closed)/i.test(errMsg)
|
||||
|| /\bECONNRESET\b/i.test(errMsg)
|
||||
|| /socket\s+hang\s*up/i.test(errMsg)
|
||||
// The lifecycle manager's load got superseded/aborted and the SDK
|
||||
// coalesced our JIT model() lookup into that canceled load.
|
||||
|| /\boperation\s+cancell?ed\b/i.test(errMsg)
|
||||
);
|
||||
}
|
||||
|
||||
async getModelContextLength(modelName: string): Promise<number | undefined> {
|
||||
const trimmed = (modelName || '').trim();
|
||||
if (!trimmed) return undefined;
|
||||
try {
|
||||
return await this.client.getModelContextLength(trimmed);
|
||||
} catch {
|
||||
return undefined; // best-effort — caller falls back to the configured window
|
||||
}
|
||||
}
|
||||
|
||||
async resetHandle(modelName: string): Promise<void> {
|
||||
const trimmed = (modelName || '').trim();
|
||||
if (!trimmed) return;
|
||||
|
||||
Reference in New Issue
Block a user