/** * Unit tests for LMStudioStreamer. * * Strategy: inject a fake ILMStudioClient that returns a fake model handle whose * `respond()` yields a controllable async iterable. No real SDK or WebSocket touched. */ import { LMStudioStreamer } from '../src/lmstudio/streamer'; import type { ChatStreamEvent } from '../src/lmstudio/streamer'; import type { ILMStudioClient } from '../src/lmstudio/client'; class FakeModel { public lastChat: any = null; public lastOpts: any = null; public cancelCount = 0; public failNext: Error | null = null; public chunks: string[] = []; constructor(opts: { chunks?: string[]; failAfter?: number; throwOnRespond?: Error; stopReason?: string } = {}) { this.chunks = opts.chunks ?? ['Hel', 'lo, ', 'world']; this._failAfter = opts.failAfter; this._throwOnRespond = opts.throwOnRespond; this.stopReason = opts.stopReason; } private _failAfter?: number; private _throwOnRespond?: Error; public stopReason: string | undefined; respond(chat: any, opts: any) { if (this._throwOnRespond) { throw this._throwOnRespond; } this.lastChat = chat; this.lastOpts = opts; const chunks = this.chunks; const failAfter = this._failAfter; const stopReason = this.stopReason; let i = 0; const self = this; // Real OngoingPrediction is both async-iterable AND a thenable resolving to a // PredictionResult with `.stats.stopReason`. Mirror that shape so the streamer // can read the stop reason after the stream drains. const prediction: any = { cancel: async () => { self.cancelCount++; }, then(resolve: (v: any) => void) { resolve({ stats: { stopReason } }); }, [Symbol.asyncIterator]() { return { async next() { if (opts?.signal?.aborted) { return { value: undefined, done: true }; } if (failAfter !== undefined && i >= failAfter) { throw new Error('mid-stream failure'); } if (i >= chunks.length) { return { value: undefined, done: true }; } const fragment = { content: chunks[i++] }; return { value: fragment, done: false }; }, }; }, }; return prediction; } } class FakeClient implements ILMStudioClient { public model: FakeModel; public getModelHandleCalls: string[] = []; constructor(model: FakeModel = new FakeModel()) { this.model = model; } setBaseUrl(_: string): void { /* noop */ } async load(_: string): Promise { /* noop */ } async unload(_: string): Promise { /* noop */ } async listLoaded(): Promise { return []; } async listLoadedCached(): Promise { return []; } async listDownloaded(): Promise { return []; } async listDownloadedCached(): Promise { return []; } async isReachable(): Promise { return true; } async getModelHandle(modelKey: string): Promise { this.getModelHandleCalls.push(modelKey); return this.model; } } // The streamer emits a trailing { token: '', stopReason } event on normal completion; // `collect` returns just the non-empty content tokens (what every real consumer uses). async function collect(stream: AsyncIterable): Promise { const out: string[] = []; for await (const { token } of stream) { if (token) out.push(token); } return out; } async function collectEvents(stream: AsyncIterable): Promise { const out: ChatStreamEvent[] = []; for await (const ev of stream) out.push(ev); return out; } describe('LMStudioStreamer', () => { test('streams tokens from the SDK respond iterator', async () => { const client = new FakeClient(new FakeModel({ chunks: ['Hel', 'lo'] })); const streamer = new LMStudioStreamer(client); const tokens = await collect(streamer.stream({ modelName: 'm1', messages: [{ role: 'user', content: 'hi' }], temperature: 0.4, })); expect(tokens).toEqual(['Hel', 'lo']); expect(client.getModelHandleCalls).toEqual(['m1']); expect(client.model.lastOpts.temperature).toBe(0.4); }); test('emits a trailing stopReason event from prediction stats', async () => { const client = new FakeClient(new FakeModel({ chunks: ['hi'], stopReason: 'maxPredictedTokensReached' })); const streamer = new LMStudioStreamer(client); const events = await collectEvents(streamer.stream({ modelName: 'm1', messages: [{ role: 'user', content: 'hi' }], temperature: 0.1, maxTokens: 64, })); expect(events.map(e => e.token)).toEqual(['hi', '']); expect(events[events.length - 1].stopReason).toBe('maxPredictedTokensReached'); // maxTokens / contextOverflowPolicy are forwarded to the SDK expect(client.model.lastOpts.maxTokens).toBe(64); expect(client.model.lastOpts.contextOverflowPolicy).toBe('stopAtLimit'); }); test('passes signal through to the SDK', async () => { const client = new FakeClient(); const streamer = new LMStudioStreamer(client); const ac = new AbortController(); await collect(streamer.stream({ modelName: 'm1', messages: [{ role: 'user', content: 'hi' }], temperature: 0.2, signal: ac.signal, })); expect(client.model.lastOpts.signal).toBe(ac.signal); }); test('aborting mid-stream stops cleanly without throwing', async () => { const client = new FakeClient(new FakeModel({ chunks: ['a', 'b', 'c', 'd'] })); const streamer = new LMStudioStreamer(client); const ac = new AbortController(); const out: string[] = []; const iter = streamer.stream({ modelName: 'm1', messages: [{ role: 'user', content: 'hi' }], temperature: 0.3, signal: ac.signal, }); for await (const { token } of iter) { out.push(token); if (out.length === 2) ac.abort(); } expect(out.length).toBeGreaterThanOrEqual(2); expect(out.length).toBeLessThanOrEqual(3); }); test('rejects when modelName is empty', async () => { const client = new FakeClient(); const streamer = new LMStudioStreamer(client); await expect(collect(streamer.stream({ modelName: '', messages: [{ role: 'user', content: 'hi' }], temperature: 0.2, }))).rejects.toThrow(/without a model name/i); }); test('mid-stream SDK failure is re-thrown when signal not aborted', async () => { const client = new FakeClient(new FakeModel({ chunks: ['a', 'b'], failAfter: 1 })); const streamer = new LMStudioStreamer(client); await expect(collect(streamer.stream({ modelName: 'm1', messages: [{ role: 'user', content: 'hi' }], temperature: 0.2, }))).rejects.toThrow(/mid-stream failure/); }); test('mid-stream SDK failure swallowed if signal already aborted', async () => { const client = new FakeClient(new FakeModel({ chunks: ['a', 'b'], failAfter: 1 })); const streamer = new LMStudioStreamer(client); const ac = new AbortController(); const iter = streamer.stream({ modelName: 'm1', messages: [{ role: 'user', content: 'hi' }], temperature: 0.2, signal: ac.signal, }); const out: string[] = []; try { for await (const { token } of iter) { out.push(token); ac.abort(); // abort right after first token, before failure point } } catch (e) { // expected to be swallowed } expect(out).toEqual(['a']); }); test('passes messages through to model.respond', async () => { const client = new FakeClient(); const streamer = new LMStudioStreamer(client); const messages = [ { role: 'system' as const, content: 'sys' }, { role: 'user' as const, content: 'hi' }, ]; await collect(streamer.stream({ modelName: 'm1', messages, temperature: 0.5 })); expect(client.model.lastChat).toEqual(messages); }); });