v2.2.256: 코어 채팅 큰 입력 청킹·통합 + 실제 컨텍스트 창 정렬 + 모델 핸들 race 수정

큰 입력 시 "Failed to acquire LM Studio model handle … Operation canceled"
로 턴 전체가 죽던 문제를 3계층으로 해결. 일반 채팅(코어 경로)은 그동안
단일 예산 호출이라 약한 모델·큰 입력에서 무너졌다 — 그 갭을 메움.

- 핸들 race 수정: getModelHandle 을 재시도 루프 안으로 이동. 취소/죽은-핸들
  류 에러는 SDK 재생성 후 1회 자동 재시도(실제 사용자 취소는 존중). 라이프
  사이클의 동시 로드가 abort 되며 SDK 가 coalesce 한 JIT 조회까지 죽던 것.
- Phase 1 실제 창 정렬: llm.getContextLength()(캐시)로 실측 창에 예산 클램프.
  설정값보다 작은 창으로 로드된 경우 서버 truncation/빈 답변 차단. 배지에 표시.
- Phase 2 코어 Map-Reduce: 단일 입력이 (유효 창 × ratio) 초과 시 청크→질의
  인지형 추출→통합. 부분/전체 폴백, 무관 시 정직 신호. 동시성 기본 2.
- Phase 3 메타 노출: 진행/결과 배지 표시, [조각 k] 출처 옵트인.

신규 설정 5종. /meet·/review 전용 경로는 불변. 테스트 +25건, 전체 684 통과.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-06-19 18:05:44 +09:00
parent 6adbc2a6fa
commit 76d5fedfb5
13 changed files with 883 additions and 19 deletions
+74 -1
View File
@@ -69,6 +69,9 @@ class FakeModel {
class FakeClient implements ILMStudioClient {
public model: FakeModel;
public getModelHandleCalls: string[] = [];
public getModelHandleOpts: Array<{ refresh?: boolean } | undefined> = [];
/** Errors to throw on successive getModelHandle calls before returning the model. */
public handleAcqFailures: Error[] = [];
constructor(model: FakeModel = new FakeModel()) {
this.model = model;
@@ -83,10 +86,18 @@ class FakeClient implements ILMStudioClient {
async listDownloadedCached(): Promise<string[]> { return []; }
async isReachable(): Promise<boolean> { return true; }
async getModelHandle(modelKey: string): Promise<any> {
async getModelHandle(modelKey: string, options?: { refresh?: boolean }): Promise<any> {
this.getModelHandleCalls.push(modelKey);
this.getModelHandleOpts.push(options);
const failure = this.handleAcqFailures.shift();
if (failure) throw failure;
return this.model;
}
public contextLength: number | undefined = undefined;
async getModelContextLength(_modelKey: string): Promise<number | undefined> {
return this.contextLength;
}
}
// The streamer emits a trailing { token: '', stopReason } event on normal completion;
@@ -209,6 +220,68 @@ describe('LMStudioStreamer', () => {
expect(out).toEqual(['a']);
});
test('transient "Operation canceled" on handle acquisition is retried with a fresh SDK', async () => {
// The lifecycle manager's concurrent load for this model got superseded;
// the SDK coalesced our JIT model() lookup into that aborted load. The
// first getModelHandle throws — the streamer must recreate the SDK
// (refresh) and retry rather than crashing the whole turn.
const client = new FakeClient(new FakeModel({ chunks: ['ok'] }));
client.handleAcqFailures = [new Error('Failed to acquire LM Studio model handle "m1": Operation canceled.')];
const streamer = new LMStudioStreamer(client);
const tokens = await collect(streamer.stream({
modelName: 'm1',
messages: [{ role: 'user', content: 'hi' }],
temperature: 0.2,
}));
expect(tokens).toEqual(['ok']);
expect(client.getModelHandleCalls).toEqual(['m1', 'm1']);
// First attempt: no refresh. Retry: refresh=true so the SDK is recreated.
expect(client.getModelHandleOpts[0]).toBeUndefined();
expect(client.getModelHandleOpts[1]).toEqual({ refresh: true });
});
test('non-transient handle acquisition error is thrown without retry', async () => {
const client = new FakeClient();
client.handleAcqFailures = [new Error('Failed to acquire LM Studio model handle "m1": model not found')];
const streamer = new LMStudioStreamer(client);
await expect(collect(streamer.stream({
modelName: 'm1',
messages: [{ role: 'user', content: 'hi' }],
temperature: 0.2,
}))).rejects.toThrow(/model not found/);
expect(client.getModelHandleCalls).toEqual(['m1']); // no retry
});
test('handle acquisition failure is swallowed when the user already aborted', async () => {
const client = new FakeClient();
client.handleAcqFailures = [new Error('Operation canceled')];
const streamer = new LMStudioStreamer(client);
const ac = new AbortController();
ac.abort();
const out = await collect(streamer.stream({
modelName: 'm1',
messages: [{ role: 'user', content: 'hi' }],
temperature: 0.2,
signal: ac.signal,
}));
expect(out).toEqual([]);
expect(client.getModelHandleCalls).toEqual(['m1']); // no retry — genuine cancel
});
test('getModelContextLength delegates to the client (and survives a throwing client)', async () => {
const client = new FakeClient();
client.contextLength = 8192;
const streamer = new LMStudioStreamer(client);
expect(await streamer.getModelContextLength('m1')).toBe(8192);
expect(await streamer.getModelContextLength('')).toBeUndefined();
// A throwing client must degrade to undefined, never reject.
const throwing = new FakeClient();
throwing.getModelContextLength = async () => { throw new Error('ws down'); };
const s2 = new LMStudioStreamer(throwing);
expect(await s2.getModelContextLength('m1')).toBeUndefined();
});
test('passes messages through to model.respond', async () => {
const client = new FakeClient();
const streamer = new LMStudioStreamer(client);