chore: bump version to 2.80.27 and update core features
This commit is contained in:
@@ -0,0 +1,64 @@
|
||||
import type { ILMStudioClient } from './client';
|
||||
import { LMStudioLifecycleError } from './client';
|
||||
import { logError, logInfo } from '../utils';
|
||||
|
||||
export interface ChatStreamMessage {
|
||||
role: 'user' | 'assistant' | 'system';
|
||||
content: string;
|
||||
}
|
||||
|
||||
export interface ChatStreamRequest {
|
||||
modelName: string;
|
||||
messages: ChatStreamMessage[];
|
||||
temperature: number;
|
||||
maxTokens?: number;
|
||||
signal?: AbortSignal;
|
||||
}
|
||||
|
||||
export interface IChatStreamer {
|
||||
/** Token-level streaming for an LM Studio chat completion via the WebSocket SDK. */
|
||||
stream(req: ChatStreamRequest): AsyncIterable<{ token: string }>;
|
||||
}
|
||||
|
||||
/**
|
||||
* Adapter that streams LM Studio chat completions via @lmstudio/sdk's `model.respond()`,
|
||||
* replacing the manual fetch + SSE parser path used for the OpenAI-compatible REST endpoint.
|
||||
*
|
||||
* Benefits over the REST path:
|
||||
* - No SSE parsing (no `data: [DONE]` / partial-chunk fragility).
|
||||
* - Reuses the same WebSocket the lifecycle manager already opened — handle lookup is cheap
|
||||
* if the model is already loaded, and load-on-first-use is implicit when it isn't.
|
||||
* - First-class `signal` support for user-cancel and abort propagation.
|
||||
*/
|
||||
export class LMStudioStreamer implements IChatStreamer {
|
||||
constructor(private readonly client: ILMStudioClient) {}
|
||||
|
||||
async *stream(req: ChatStreamRequest): AsyncIterable<{ token: string }> {
|
||||
const trimmedModel = (req.modelName || '').trim();
|
||||
if (!trimmedModel) {
|
||||
throw new LMStudioLifecycleError('LMStudioStreamer.stream called without a model name.');
|
||||
}
|
||||
|
||||
const model = await this.client.getModelHandle(trimmedModel);
|
||||
logInfo('LM Studio SDK chat stream started.', { model: trimmedModel, messageCount: req.messages.length });
|
||||
|
||||
const prediction = (model as any).respond(req.messages, {
|
||||
temperature: req.temperature,
|
||||
maxTokens: req.maxTokens ?? 4096,
|
||||
signal: req.signal,
|
||||
});
|
||||
|
||||
try {
|
||||
for await (const fragment of prediction as AsyncIterable<{ content: string }>) {
|
||||
if (req.signal?.aborted) return;
|
||||
const token = fragment?.content ?? '';
|
||||
if (token) yield { token };
|
||||
}
|
||||
} catch (err: any) {
|
||||
if (req.signal?.aborted) return;
|
||||
if (err?.name === 'AbortError') return;
|
||||
logError('LM Studio SDK chat stream failed.', { model: trimmedModel, error: err?.message ?? String(err) });
|
||||
throw err;
|
||||
}
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user