connectai/tests/lmStudioStreamer.test.ts

/**
 * Unit tests for LMStudioStreamer.
 *
 * Strategy: inject a fake ILMStudioClient that returns a fake model handle whose
 * `respond()` yields a controllable async iterable. No real SDK or WebSocket touched.
 */

import { LMStudioStreamer } from '../src/lmstudio/streamer';
import type { ChatStreamEvent } from '../src/lmstudio/streamer';
import type { ILMStudioClient } from '../src/lmstudio/client';

class FakeModel {
    public lastChat: any = null;
    public lastOpts: any = null;
    public cancelCount = 0;
    public failNext: Error | null = null;
    public chunks: string[] = [];

    constructor(opts: { chunks?: string[]; failAfter?: number; throwOnRespond?: Error; stopReason?: string } = {}) {
        this.chunks = opts.chunks ?? ['Hel', 'lo, ', 'world'];
        this._failAfter = opts.failAfter;
        this._throwOnRespond = opts.throwOnRespond;
        this.stopReason = opts.stopReason;
    }

    private _failAfter?: number;
    private _throwOnRespond?: Error;
    public stopReason: string | undefined;

    respond(chat: any, opts: any) {
        if (this._throwOnRespond) {
            throw this._throwOnRespond;
        }
        this.lastChat = chat;
        this.lastOpts = opts;
        const chunks = this.chunks;
        const failAfter = this._failAfter;
        const stopReason = this.stopReason;
        let i = 0;
        const self = this;
        // Real OngoingPrediction is both async-iterable AND a thenable resolving to a
        // PredictionResult with `.stats.stopReason`. Mirror that shape so the streamer
        // can read the stop reason after the stream drains.
        const prediction: any = {
            cancel: async () => { self.cancelCount++; },
            then(resolve: (v: any) => void) { resolve({ stats: { stopReason } }); },
            [Symbol.asyncIterator]() {
                return {
                    async next() {
                        if (opts?.signal?.aborted) {
                            return { value: undefined, done: true };
                        }
                        if (failAfter !== undefined && i >= failAfter) {
                            throw new Error('mid-stream failure');
                        }
                        if (i >= chunks.length) {
                            return { value: undefined, done: true };
                        }
                        const fragment = { content: chunks[i++] };
                        return { value: fragment, done: false };
                    },
                };
            },
        };
        return prediction;
    }
}

class FakeClient implements ILMStudioClient {
    public model: FakeModel;
    public getModelHandleCalls: string[] = [];
    public getModelHandleOpts: Array<{ refresh?: boolean } | undefined> = [];
    /** Errors to throw on successive getModelHandle calls before returning the model. */
    public handleAcqFailures: Error[] = [];

    constructor(model: FakeModel = new FakeModel()) {
        this.model = model;
    }

    setBaseUrl(_: string): void { /* noop */ }
    async load(_: string): Promise<void> { /* noop */ }
    async unload(_: string): Promise<void> { /* noop */ }
    async listLoaded(): Promise<string[]> { return []; }
    async listLoadedCached(): Promise<string[]> { return []; }
    async listDownloaded(): Promise<string[]> { return []; }
    async listDownloadedCached(): Promise<string[]> { return []; }
    async isReachable(): Promise<boolean> { return true; }

    async getModelHandle(modelKey: string, options?: { refresh?: boolean }): Promise<any> {
        this.getModelHandleCalls.push(modelKey);
        this.getModelHandleOpts.push(options);
        const failure = this.handleAcqFailures.shift();
        if (failure) throw failure;
        return this.model;
    }

    public contextLength: number | undefined = undefined;
    async getModelContextLength(_modelKey: string): Promise<number | undefined> {
        return this.contextLength;
    }
}

// The streamer emits a trailing { token: '', stopReason } event on normal completion;
// `collect` returns just the non-empty content tokens (what every real consumer uses).
async function collect(stream: AsyncIterable<ChatStreamEvent>): Promise<string[]> {
    const out: string[] = [];
    for await (const { token } of stream) {
        if (token) out.push(token);
    }
    return out;
}

async function collectEvents(stream: AsyncIterable<ChatStreamEvent>): Promise<ChatStreamEvent[]> {
    const out: ChatStreamEvent[] = [];
    for await (const ev of stream) out.push(ev);
    return out;
}

describe('LMStudioStreamer', () => {
    test('streams tokens from the SDK respond iterator', async () => {
        const client = new FakeClient(new FakeModel({ chunks: ['Hel', 'lo'] }));
        const streamer = new LMStudioStreamer(client);
        const tokens = await collect(streamer.stream({
            modelName: 'm1',
            messages: [{ role: 'user', content: 'hi' }],
            temperature: 0.4,
        }));
        expect(tokens).toEqual(['Hel', 'lo']);
        expect(client.getModelHandleCalls).toEqual(['m1']);
        expect(client.model.lastOpts.temperature).toBe(0.4);
    });

    test('emits a trailing stopReason event from prediction stats', async () => {
        const client = new FakeClient(new FakeModel({ chunks: ['hi'], stopReason: 'maxPredictedTokensReached' }));
        const streamer = new LMStudioStreamer(client);
        const events = await collectEvents(streamer.stream({
            modelName: 'm1',
            messages: [{ role: 'user', content: 'hi' }],
            temperature: 0.1,
            maxTokens: 64,
        }));
        expect(events.map(e => e.token)).toEqual(['hi', '']);
        expect(events[events.length - 1].stopReason).toBe('maxPredictedTokensReached');
        // maxTokens / contextOverflowPolicy are forwarded to the SDK
        expect(client.model.lastOpts.maxTokens).toBe(64);
        expect(client.model.lastOpts.contextOverflowPolicy).toBe('stopAtLimit');
    });

    test('passes signal through to the SDK', async () => {
        const client = new FakeClient();
        const streamer = new LMStudioStreamer(client);
        const ac = new AbortController();
        await collect(streamer.stream({
            modelName: 'm1',
            messages: [{ role: 'user', content: 'hi' }],
            temperature: 0.2,
            signal: ac.signal,
        }));
        expect(client.model.lastOpts.signal).toBe(ac.signal);
    });

    test('aborting mid-stream stops cleanly without throwing', async () => {
        const client = new FakeClient(new FakeModel({ chunks: ['a', 'b', 'c', 'd'] }));
        const streamer = new LMStudioStreamer(client);
        const ac = new AbortController();
        const out: string[] = [];
        const iter = streamer.stream({
            modelName: 'm1',
            messages: [{ role: 'user', content: 'hi' }],
            temperature: 0.3,
            signal: ac.signal,
        });
        for await (const { token } of iter) {
            out.push(token);
            if (out.length === 2) ac.abort();
        }
        expect(out.length).toBeGreaterThanOrEqual(2);
        expect(out.length).toBeLessThanOrEqual(3);
    });

    test('rejects when modelName is empty', async () => {
        const client = new FakeClient();
        const streamer = new LMStudioStreamer(client);
        await expect(collect(streamer.stream({
            modelName: '',
            messages: [{ role: 'user', content: 'hi' }],
            temperature: 0.2,
        }))).rejects.toThrow(/without a model name/i);
    });

    test('mid-stream SDK failure is re-thrown when signal not aborted', async () => {
        const client = new FakeClient(new FakeModel({ chunks: ['a', 'b'], failAfter: 1 }));
        const streamer = new LMStudioStreamer(client);
        await expect(collect(streamer.stream({
            modelName: 'm1',
            messages: [{ role: 'user', content: 'hi' }],
            temperature: 0.2,
        }))).rejects.toThrow(/mid-stream failure/);
    });

    test('mid-stream SDK failure swallowed if signal already aborted', async () => {
        const client = new FakeClient(new FakeModel({ chunks: ['a', 'b'], failAfter: 1 }));
        const streamer = new LMStudioStreamer(client);
        const ac = new AbortController();
        const iter = streamer.stream({
            modelName: 'm1',
            messages: [{ role: 'user', content: 'hi' }],
            temperature: 0.2,
            signal: ac.signal,
        });
        const out: string[] = [];
        try {
            for await (const { token } of iter) {
                out.push(token);
                ac.abort(); // abort right after first token, before failure point
            }
        } catch (e) {
            // expected to be swallowed
        }
        expect(out).toEqual(['a']);
    });

    test('transient "Operation canceled" on handle acquisition is retried with a fresh SDK', async () => {
        // The lifecycle manager's concurrent load for this model got superseded;
        // the SDK coalesced our JIT model() lookup into that aborted load. The
        // first getModelHandle throws — the streamer must recreate the SDK
        // (refresh) and retry rather than crashing the whole turn.
        const client = new FakeClient(new FakeModel({ chunks: ['ok'] }));
        client.handleAcqFailures = [new Error('Failed to acquire LM Studio model handle "m1": Operation canceled.')];
        const streamer = new LMStudioStreamer(client);
        const tokens = await collect(streamer.stream({
            modelName: 'm1',
            messages: [{ role: 'user', content: 'hi' }],
            temperature: 0.2,
        }));
        expect(tokens).toEqual(['ok']);
        expect(client.getModelHandleCalls).toEqual(['m1', 'm1']);
        // First attempt: no refresh. Retry: refresh=true so the SDK is recreated.
        expect(client.getModelHandleOpts[0]).toBeUndefined();
        expect(client.getModelHandleOpts[1]).toEqual({ refresh: true });
    });

    test('non-transient handle acquisition error is thrown without retry', async () => {
        const client = new FakeClient();
        client.handleAcqFailures = [new Error('Failed to acquire LM Studio model handle "m1": model not found')];
        const streamer = new LMStudioStreamer(client);
        await expect(collect(streamer.stream({
            modelName: 'm1',
            messages: [{ role: 'user', content: 'hi' }],
            temperature: 0.2,
        }))).rejects.toThrow(/model not found/);
        expect(client.getModelHandleCalls).toEqual(['m1']); // no retry
    });

    test('handle acquisition failure is swallowed when the user already aborted', async () => {
        const client = new FakeClient();
        client.handleAcqFailures = [new Error('Operation canceled')];
        const streamer = new LMStudioStreamer(client);
        const ac = new AbortController();
        ac.abort();
        const out = await collect(streamer.stream({
            modelName: 'm1',
            messages: [{ role: 'user', content: 'hi' }],
            temperature: 0.2,
            signal: ac.signal,
        }));
        expect(out).toEqual([]);
        expect(client.getModelHandleCalls).toEqual(['m1']); // no retry — genuine cancel
    });

    test('getModelContextLength delegates to the client (and survives a throwing client)', async () => {
        const client = new FakeClient();
        client.contextLength = 8192;
        const streamer = new LMStudioStreamer(client);
        expect(await streamer.getModelContextLength('m1')).toBe(8192);
        expect(await streamer.getModelContextLength('')).toBeUndefined();

        // A throwing client must degrade to undefined, never reject.
        const throwing = new FakeClient();
        throwing.getModelContextLength = async () => { throw new Error('ws down'); };
        const s2 = new LMStudioStreamer(throwing);
        expect(await s2.getModelContextLength('m1')).toBeUndefined();
    });

    test('passes messages through to model.respond', async () => {
        const client = new FakeClient();
        const streamer = new LMStudioStreamer(client);
        const messages = [
            { role: 'system' as const, content: 'sys' },
            { role: 'user' as const, content: 'hi' },
        ];
        await collect(streamer.stream({ modelName: 'm1', messages, temperature: 0.5 }));
        expect(client.model.lastChat).toEqual(messages);
    });
});