chore: version up to 2.80.34 and package

This commit is contained in:
g1nation
2026-05-12 22:54:21 +09:00
parent 148bfb070b
commit 065e598cca
26 changed files with 2023 additions and 139 deletions
+35 -3
View File
@@ -11,13 +11,26 @@ export interface ChatStreamRequest {
modelName: string;
messages: ChatStreamMessage[];
temperature: number;
/** Upper bound on tokens to generate. Omit to fall back to a conservative default. */
maxTokens?: number;
/** LM Studio context-overflow safety net used only if the prompt still exceeds the window. */
contextOverflowPolicy?: 'stopAtLimit' | 'truncateMiddle' | 'rollingWindow';
signal?: AbortSignal;
}
/**
* One stream event. `token` carries generated text (possibly empty for the final event);
* `stopReason` is set on the *last* event only and is the SDK's `stats.stopReason`
* (e.g. `eosFound`, `maxPredictedTokensReached`, `contextLengthReached`, `userStopped`).
*/
export interface ChatStreamEvent {
token: string;
stopReason?: string;
}
export interface IChatStreamer {
/** Token-level streaming for an LM Studio chat completion via the WebSocket SDK. */
stream(req: ChatStreamRequest): AsyncIterable<{ token: string }>;
stream(req: ChatStreamRequest): AsyncIterable<ChatStreamEvent>;
/**
* Drop the SDK's cached handle for `modelName`. Callers invoke this when
* the previous stream returned zero tokens with no error — a symptom of a
@@ -39,7 +52,7 @@ export interface IChatStreamer {
export class LMStudioStreamer implements IChatStreamer {
constructor(private readonly client: ILMStudioClient) {}
async *stream(req: ChatStreamRequest): AsyncIterable<{ token: string }> {
async *stream(req: ChatStreamRequest): AsyncIterable<ChatStreamEvent> {
const trimmedModel = (req.modelName || '').trim();
if (!trimmedModel) {
throw new LMStudioLifecycleError('LMStudioStreamer.stream called without a model name.');
@@ -62,6 +75,10 @@ export class LMStudioStreamer implements IChatStreamer {
const prediction = (model as any).respond(req.messages, {
temperature: req.temperature,
maxTokens: req.maxTokens ?? 4096,
// Safety net: if our own token budgeting still underestimated and the prompt
// exceeds the model's context window, decide whether the SDK should fail
// loudly (stopAtLimit — default) or silently drop content.
contextOverflowPolicy: req.contextOverflowPolicy ?? 'stopAtLimit',
signal: req.signal,
});
@@ -98,7 +115,22 @@ export class LMStudioStreamer implements IChatStreamer {
req.signal?.removeEventListener?.('abort', onAbort);
}
if (!caught) return;
if (!caught) {
if (req.signal?.aborted) return;
// The prediction object is also a Promise<PredictionResult>; awaiting it after
// the stream drains gives us stats.stopReason so callers can tell a truncated
// answer (maxPredictedTokensReached / contextLengthReached) from a normal one.
let stopReason: string | undefined;
try {
const result: any = await prediction;
stopReason = result?.stats?.stopReason;
if (stopReason) {
logInfo('LM Studio SDK chat stream finished.', { model: trimmedModel, stopReason, tokensYielded: yielded });
}
} catch { /* result unavailable on some SDK versions — non-fatal */ }
yield { token: '', stopReason: stopReason ?? 'eosFound' };
return;
}
const errMsg = String(caught?.message ?? caught);
const handleDead = /\bdisposed\b/i.test(errMsg)