chore: version up to 2.80.34 and package
This commit is contained in:
@@ -11,13 +11,26 @@ export interface ChatStreamRequest {
|
||||
modelName: string;
|
||||
messages: ChatStreamMessage[];
|
||||
temperature: number;
|
||||
/** Upper bound on tokens to generate. Omit to fall back to a conservative default. */
|
||||
maxTokens?: number;
|
||||
/** LM Studio context-overflow safety net used only if the prompt still exceeds the window. */
|
||||
contextOverflowPolicy?: 'stopAtLimit' | 'truncateMiddle' | 'rollingWindow';
|
||||
signal?: AbortSignal;
|
||||
}
|
||||
|
||||
/**
|
||||
* One stream event. `token` carries generated text (possibly empty for the final event);
|
||||
* `stopReason` is set on the *last* event only and is the SDK's `stats.stopReason`
|
||||
* (e.g. `eosFound`, `maxPredictedTokensReached`, `contextLengthReached`, `userStopped`).
|
||||
*/
|
||||
export interface ChatStreamEvent {
|
||||
token: string;
|
||||
stopReason?: string;
|
||||
}
|
||||
|
||||
export interface IChatStreamer {
|
||||
/** Token-level streaming for an LM Studio chat completion via the WebSocket SDK. */
|
||||
stream(req: ChatStreamRequest): AsyncIterable<{ token: string }>;
|
||||
stream(req: ChatStreamRequest): AsyncIterable<ChatStreamEvent>;
|
||||
/**
|
||||
* Drop the SDK's cached handle for `modelName`. Callers invoke this when
|
||||
* the previous stream returned zero tokens with no error — a symptom of a
|
||||
@@ -39,7 +52,7 @@ export interface IChatStreamer {
|
||||
export class LMStudioStreamer implements IChatStreamer {
|
||||
constructor(private readonly client: ILMStudioClient) {}
|
||||
|
||||
async *stream(req: ChatStreamRequest): AsyncIterable<{ token: string }> {
|
||||
async *stream(req: ChatStreamRequest): AsyncIterable<ChatStreamEvent> {
|
||||
const trimmedModel = (req.modelName || '').trim();
|
||||
if (!trimmedModel) {
|
||||
throw new LMStudioLifecycleError('LMStudioStreamer.stream called without a model name.');
|
||||
@@ -62,6 +75,10 @@ export class LMStudioStreamer implements IChatStreamer {
|
||||
const prediction = (model as any).respond(req.messages, {
|
||||
temperature: req.temperature,
|
||||
maxTokens: req.maxTokens ?? 4096,
|
||||
// Safety net: if our own token budgeting still underestimated and the prompt
|
||||
// exceeds the model's context window, decide whether the SDK should fail
|
||||
// loudly (stopAtLimit — default) or silently drop content.
|
||||
contextOverflowPolicy: req.contextOverflowPolicy ?? 'stopAtLimit',
|
||||
signal: req.signal,
|
||||
});
|
||||
|
||||
@@ -98,7 +115,22 @@ export class LMStudioStreamer implements IChatStreamer {
|
||||
req.signal?.removeEventListener?.('abort', onAbort);
|
||||
}
|
||||
|
||||
if (!caught) return;
|
||||
if (!caught) {
|
||||
if (req.signal?.aborted) return;
|
||||
// The prediction object is also a Promise<PredictionResult>; awaiting it after
|
||||
// the stream drains gives us stats.stopReason so callers can tell a truncated
|
||||
// answer (maxPredictedTokensReached / contextLengthReached) from a normal one.
|
||||
let stopReason: string | undefined;
|
||||
try {
|
||||
const result: any = await prediction;
|
||||
stopReason = result?.stats?.stopReason;
|
||||
if (stopReason) {
|
||||
logInfo('LM Studio SDK chat stream finished.', { model: trimmedModel, stopReason, tokensYielded: yielded });
|
||||
}
|
||||
} catch { /* result unavailable on some SDK versions — non-fatal */ }
|
||||
yield { token: '', stopReason: stopReason ?? 'eosFound' };
|
||||
return;
|
||||
}
|
||||
|
||||
const errMsg = String(caught?.message ?? caught);
|
||||
const handleDead = /\bdisposed\b/i.test(errMsg)
|
||||
|
||||
Reference in New Issue
Block a user