chore: bump version to 2.80.27 and update core features

This commit is contained in:
g1nation
2026-05-09 01:16:12 +09:00
parent 5ffb472d22
commit 3220a126fd
41 changed files with 4457 additions and 72 deletions
+33 -1
View File
@@ -1,10 +1,14 @@
import { LMStudioClient as SDKClient } from '@lmstudio/sdk';
import { LMStudioClient as SDKClient, LLM } from '@lmstudio/sdk';
import { logError, logInfo } from '../utils';
export interface ILMStudioClient {
load(modelKey: string, signal?: AbortSignal): Promise<void>;
unload(modelKey: string): Promise<void>;
listLoaded(): Promise<string[]>;
/** Like listLoaded() but caches the result for `ttlMs` to avoid hammering the SDK. */
listLoadedCached(ttlMs?: number): Promise<string[]>;
/** Resolve a chat-ready handle for an already-loaded (or just-loaded) model. */
getModelHandle(modelKey: string): Promise<LLM>;
isReachable(): Promise<boolean>;
setBaseUrl(httpBaseUrl: string): void;
}
@@ -36,6 +40,8 @@ export function httpToWebSocketUrl(httpBaseUrl: string): string | undefined {
export class LMStudioClient implements ILMStudioClient {
private _sdk: SDKClient | undefined;
private _wsUrl: string | undefined;
private _loadedCache: { value: string[]; expiresAt: number } | undefined;
private static readonly DEFAULT_LOADED_CACHE_TTL_MS = 5000;
constructor(httpBaseUrl: string) {
this.setBaseUrl(httpBaseUrl);
@@ -46,6 +52,7 @@ export class LMStudioClient implements ILMStudioClient {
if (ws !== this._wsUrl) {
this._wsUrl = ws;
this._sdk = undefined;
this._loadedCache = undefined;
}
}
@@ -59,6 +66,7 @@ export class LMStudioClient implements ILMStudioClient {
async load(modelKey: string, signal?: AbortSignal): Promise<void> {
try {
await this.getSdk().llm.load(modelKey, signal ? { signal } : undefined);
this._loadedCache = undefined;
logInfo('LM Studio model loaded.', { modelKey });
} catch (e: any) {
const msg = e?.message ?? String(e);
@@ -69,6 +77,7 @@ export class LMStudioClient implements ILMStudioClient {
async unload(modelKey: string): Promise<void> {
try {
await this.getSdk().llm.unload(modelKey);
this._loadedCache = undefined;
logInfo('LM Studio model unloaded.', { modelKey });
} catch (e: any) {
const msg = e?.message ?? String(e);
@@ -88,6 +97,29 @@ export class LMStudioClient implements ILMStudioClient {
}
}
async listLoadedCached(ttlMs: number = LMStudioClient.DEFAULT_LOADED_CACHE_TTL_MS): Promise<string[]> {
const now = Date.now();
if (this._loadedCache && this._loadedCache.expiresAt > now) {
return this._loadedCache.value.slice();
}
try {
const value = await this.listLoaded();
this._loadedCache = { value, expiresAt: now + ttlMs };
return value.slice();
} catch {
return [];
}
}
async getModelHandle(modelKey: string): Promise<LLM> {
try {
return await this.getSdk().llm.model(modelKey);
} catch (e: any) {
const msg = e?.message ?? String(e);
throw new LMStudioLifecycleError(`Failed to acquire LM Studio model handle "${modelKey}": ${msg}`, e);
}
}
async isReachable(): Promise<boolean> {
try {
await this.getSdk().llm.listLoaded();
+44
View File
@@ -1,6 +1,7 @@
import type { ILMStudioClient } from './client';
import type { IActivityTracker } from './activityTracker';
import type { EngineKind } from '../utils';
import type { ISystemSpecsProvider, IModelMemoryEstimator } from '../system/specs';
import { logError, logInfo } from '../utils';
export type LifecycleState = 'idle' | 'loading' | 'loaded' | 'streaming' | 'unloading';
@@ -19,6 +20,15 @@ export interface LifecycleManagerDeps {
switchDebounceMs?: number;
/** Initial engine. Default 'lmstudio'. */
initialEngine?: EngineKind;
/**
* Optional pre-load memory budget check. When both are provided, a warn-only
* advisory is emitted via `notifyError` (and a structured log line) before
* attempting to load a model that the heuristic predicts will not fit.
* The load is **not** blocked — the user may have a quantization the
* estimator does not recognize.
*/
systemSpecs?: ISystemSpecsProvider;
memoryEstimator?: IModelMemoryEstimator;
}
export class ModelLifecycleManager {
@@ -207,6 +217,38 @@ export class ModelLifecycleManager {
}
}
/**
* Warn-only RAM budget check. If the heuristic estimator says the model is
* unlikely to fit, surface a non-blocking advisory and log it. The load
* still proceeds — the heuristic can be wrong (unrecognized quantization,
* sparse / MoE models) and the user may have explicit intent.
*/
private checkMemoryBudget(modelKey: string): void {
const specsProvider = this.deps.systemSpecs;
const estimator = this.deps.memoryEstimator;
if (!specsProvider || !estimator) return;
try {
const specs = specsProvider.get();
const requiredGB = estimator.estimate(modelKey);
if (requiredGB > specs.safeModelBudgetGB) {
const msg =
`Model "${modelKey}" estimated at ~${requiredGB.toFixed(1)}GB ` +
`exceeds your safe RAM budget of ${specs.safeModelBudgetGB}GB. ` +
`If load fails, try a smaller quantization (q4 / q5).`;
logInfo('LM Studio pre-load memory advisory.', {
model: modelKey,
requiredGB: Number(requiredGB.toFixed(2)),
budgetGB: specs.safeModelBudgetGB,
totalRamGB: Number(specs.totalRamGB.toFixed(2)),
});
this.deps.notifyError?.(msg);
}
} catch (e: any) {
// Diagnostic-only; never block a load on advisory failures.
logError('Memory budget check failed.', { error: e?.message ?? String(e) });
}
}
private async doSwitch(modelKey: string): Promise<void> {
if (this.disposed) return;
if (this.engine !== 'lmstudio') return;
@@ -225,6 +267,8 @@ export class ModelLifecycleManager {
this.currentModel = null;
}
this.checkMemoryBudget(modelKey);
this.state = 'loading';
this.currentModel = modelKey;
const ac = new AbortController();
+64
View File
@@ -0,0 +1,64 @@
import type { ILMStudioClient } from './client';
import { LMStudioLifecycleError } from './client';
import { logError, logInfo } from '../utils';
export interface ChatStreamMessage {
role: 'user' | 'assistant' | 'system';
content: string;
}
export interface ChatStreamRequest {
modelName: string;
messages: ChatStreamMessage[];
temperature: number;
maxTokens?: number;
signal?: AbortSignal;
}
export interface IChatStreamer {
/** Token-level streaming for an LM Studio chat completion via the WebSocket SDK. */
stream(req: ChatStreamRequest): AsyncIterable<{ token: string }>;
}
/**
* Adapter that streams LM Studio chat completions via @lmstudio/sdk's `model.respond()`,
* replacing the manual fetch + SSE parser path used for the OpenAI-compatible REST endpoint.
*
* Benefits over the REST path:
* - No SSE parsing (no `data: [DONE]` / partial-chunk fragility).
* - Reuses the same WebSocket the lifecycle manager already opened — handle lookup is cheap
* if the model is already loaded, and load-on-first-use is implicit when it isn't.
* - First-class `signal` support for user-cancel and abort propagation.
*/
export class LMStudioStreamer implements IChatStreamer {
constructor(private readonly client: ILMStudioClient) {}
async *stream(req: ChatStreamRequest): AsyncIterable<{ token: string }> {
const trimmedModel = (req.modelName || '').trim();
if (!trimmedModel) {
throw new LMStudioLifecycleError('LMStudioStreamer.stream called without a model name.');
}
const model = await this.client.getModelHandle(trimmedModel);
logInfo('LM Studio SDK chat stream started.', { model: trimmedModel, messageCount: req.messages.length });
const prediction = (model as any).respond(req.messages, {
temperature: req.temperature,
maxTokens: req.maxTokens ?? 4096,
signal: req.signal,
});
try {
for await (const fragment of prediction as AsyncIterable<{ content: string }>) {
if (req.signal?.aborted) return;
const token = fragment?.content ?? '';
if (token) yield { token };
}
} catch (err: any) {
if (req.signal?.aborted) return;
if (err?.name === 'AbortError') return;
logError('LM Studio SDK chat stream failed.', { model: trimmedModel, error: err?.message ?? String(err) });
throw err;
}
}
}