chore: bump version to 2.80.27 and update core features

2026-05-09 01:16:12 +09:00
parent 5ffb472d22
commit 3220a126fd
41 changed files with 4457 additions and 72 deletions
@@ -0,0 +1,64 @@
+import type { ILMStudioClient } from './client';
+import { LMStudioLifecycleError } from './client';
+import { logError, logInfo } from '../utils';
+
+export interface ChatStreamMessage {
+    role: 'user' | 'assistant' | 'system';
+    content: string;
+}
+
+export interface ChatStreamRequest {
+    modelName: string;
+    messages: ChatStreamMessage[];
+    temperature: number;
+    maxTokens?: number;
+    signal?: AbortSignal;
+}
+
+export interface IChatStreamer {
+    /** Token-level streaming for an LM Studio chat completion via the WebSocket SDK. */
+    stream(req: ChatStreamRequest): AsyncIterable<{ token: string }>;
+}
+
+/**
+ * Adapter that streams LM Studio chat completions via @lmstudio/sdk's `model.respond()`,
+ * replacing the manual fetch + SSE parser path used for the OpenAI-compatible REST endpoint.
+ *
+ * Benefits over the REST path:
+ *  - No SSE parsing (no `data: [DONE]` / partial-chunk fragility).
+ *  - Reuses the same WebSocket the lifecycle manager already opened — handle lookup is cheap
+ *    if the model is already loaded, and load-on-first-use is implicit when it isn't.
+ *  - First-class `signal` support for user-cancel and abort propagation.
+ */
+export class LMStudioStreamer implements IChatStreamer {
+    constructor(private readonly client: ILMStudioClient) {}
+
+    async *stream(req: ChatStreamRequest): AsyncIterable<{ token: string }> {
+        const trimmedModel = (req.modelName || '').trim();
+        if (!trimmedModel) {
+            throw new LMStudioLifecycleError('LMStudioStreamer.stream called without a model name.');
+        }
+
+        const model = await this.client.getModelHandle(trimmedModel);
+        logInfo('LM Studio SDK chat stream started.', { model: trimmedModel, messageCount: req.messages.length });
+
+        const prediction = (model as any).respond(req.messages, {
+            temperature: req.temperature,
+            maxTokens: req.maxTokens ?? 4096,
+            signal: req.signal,
+        });
+
+        try {
+            for await (const fragment of prediction as AsyncIterable<{ content: string }>) {
+                if (req.signal?.aborted) return;
+                const token = fragment?.content ?? '';
+                if (token) yield { token };
+            }
+        } catch (err: any) {
+            if (req.signal?.aborted) return;
+            if (err?.name === 'AbortError') return;
+            logError('LM Studio SDK chat stream failed.', { model: trimmedModel, error: err?.message ?? String(err) });
+            throw err;
+        }
+    }
+}