refactor: optimize core engine and retrieval logic for v2.80.43
This commit is contained in:
+19
-3
@@ -1,8 +1,24 @@
|
||||
import * as os from 'os';
|
||||
import { logInfo, logError } from '../utils';
|
||||
|
||||
/**
|
||||
* ActionQueueManager: Manages large-scale tasks by processing them
|
||||
* with a concurrency limit to prevent resource exhaustion and I/O bottlenecks
|
||||
* Default concurrency = max(2, cpus - 1). Leaves one core for the VS Code UI
|
||||
* thread and the extension host, scales up on bigger boxes. Static per-process
|
||||
* (no dynamic adjustment) — kept simple because the heavy work (LLM calls)
|
||||
* is gated by `missionId` locks elsewhere, not the action queue.
|
||||
*/
|
||||
function defaultConcurrencyLimit(): number {
|
||||
try {
|
||||
const cpus = os.cpus()?.length ?? 4;
|
||||
return Math.max(2, cpus - 1);
|
||||
} catch {
|
||||
return 3;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* ActionQueueManager: Manages large-scale tasks by processing them
|
||||
* with a concurrency limit to prevent resource exhaustion and I/O bottlenecks
|
||||
* while maintaining high throughput under maximum load.
|
||||
*/
|
||||
export class ActionQueueManager {
|
||||
@@ -10,7 +26,7 @@ export class ActionQueueManager {
|
||||
private activeCount: number = 0;
|
||||
private readonly concurrencyLimit: number;
|
||||
|
||||
constructor(concurrencyLimit: number = 3) {
|
||||
constructor(concurrencyLimit: number = defaultConcurrencyLimit()) {
|
||||
this.concurrencyLimit = concurrencyLimit;
|
||||
}
|
||||
|
||||
|
||||
@@ -0,0 +1,129 @@
|
||||
/**
|
||||
* ============================================================
|
||||
* Telemetry — append-only usage events to `.astra/usage.jsonl`
|
||||
*
|
||||
* Why local-file telemetry instead of a webview dashboard or remote endpoint:
|
||||
* - Astra is local-first. No data leaves the machine.
|
||||
* - JSONL is trivial to inspect manually (`tail`, jq) and trivial to ingest
|
||||
* into a future webview chart without schema migrations.
|
||||
* - Append-only means the writer never blocks on history.
|
||||
*
|
||||
* Event shape is intentionally flat — top-level scalar fields only, so a future
|
||||
* dashboard can sum/group/filter without parsing nested structures.
|
||||
* ============================================================
|
||||
*/
|
||||
|
||||
import * as fs from 'fs';
|
||||
import * as path from 'path';
|
||||
import { getAstraDataDir } from './astraPath';
|
||||
import { logError } from '../utils';
|
||||
|
||||
/** Top-level event kinds. Add sparingly — each is a stable contract for the JSONL. */
|
||||
export type TelemetryEventKind =
|
||||
| 'turn' // one user-visible chat turn (input → final answer)
|
||||
| 'continuation' // an auto-continuation round inside a turn
|
||||
| 'retrieval' // brain + memory retrieval summary
|
||||
| 'session-end'; // session closed (used to bound aggregation queries)
|
||||
|
||||
export interface TelemetryEvent {
|
||||
kind: TelemetryEventKind;
|
||||
/** ISO timestamp. Always present so a viewer can plot on a time axis without recomputing. */
|
||||
ts: string;
|
||||
/** Wall-clock milliseconds the event took, when applicable. 0 for instantaneous events. */
|
||||
durationMs?: number;
|
||||
/** Model identifier the request was bound to, when applicable. */
|
||||
model?: string;
|
||||
/** Engine name (lmstudio | ollama), when applicable. */
|
||||
engine?: string;
|
||||
/** Input token estimate that went into this event, when applicable. */
|
||||
inputTokens?: number;
|
||||
/** Output token estimate produced by this event, when applicable. */
|
||||
outputTokens?: number;
|
||||
/** Configured context window for this event, when applicable. */
|
||||
contextLength?: number;
|
||||
/** Continuation round index for `kind: 'continuation'`. */
|
||||
round?: number;
|
||||
/** Stop reason from the engine, when applicable. */
|
||||
stopReason?: string;
|
||||
/** Brain files actually used this turn. */
|
||||
brainFiles?: number;
|
||||
/** Memory layers that contributed chunks this turn. */
|
||||
memoryLayers?: string[];
|
||||
/** Free-form structured details. Keep small — this lives in the JSONL forever. */
|
||||
note?: string;
|
||||
}
|
||||
|
||||
const MAX_FILE_BYTES = 5 * 1024 * 1024; // 5 MB → ~25k events worst case
|
||||
const ROTATE_KEEP = 2; // keep usage.jsonl + usage.1.jsonl
|
||||
|
||||
function jsonlPath(): string {
|
||||
return path.join(getAstraDataDir(), 'usage.jsonl');
|
||||
}
|
||||
|
||||
function rotateIfNeeded(p: string): void {
|
||||
try {
|
||||
const stat = fs.statSync(p);
|
||||
if (stat.size <= MAX_FILE_BYTES) return;
|
||||
// Shift usage.{N-1}.jsonl → usage.{N}.jsonl, drop the oldest.
|
||||
for (let i = ROTATE_KEEP; i >= 1; i--) {
|
||||
const older = path.join(getAstraDataDir(), `usage.${i}.jsonl`);
|
||||
const newer = i === 1 ? p : path.join(getAstraDataDir(), `usage.${i - 1}.jsonl`);
|
||||
if (fs.existsSync(newer)) {
|
||||
if (i === ROTATE_KEEP && fs.existsSync(older)) {
|
||||
try { fs.unlinkSync(older); } catch { /* non-fatal */ }
|
||||
}
|
||||
try { fs.renameSync(newer, older); } catch { /* non-fatal */ }
|
||||
}
|
||||
}
|
||||
} catch {
|
||||
// File doesn't exist yet — first write will create it.
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Append one event to the rotating JSONL. Best-effort: failures are logged but
|
||||
* never thrown, because telemetry must not break a live chat turn.
|
||||
*/
|
||||
export function recordTelemetry(event: Omit<TelemetryEvent, 'ts'> & { ts?: string }): void {
|
||||
try {
|
||||
const full: TelemetryEvent = { ts: new Date().toISOString(), ...event };
|
||||
const line = JSON.stringify(full) + '\n';
|
||||
const p = jsonlPath();
|
||||
rotateIfNeeded(p);
|
||||
fs.appendFile(p, line, { encoding: 'utf8' }, (err) => {
|
||||
if (err) logError('Telemetry append failed.', { error: err.message });
|
||||
});
|
||||
} catch (e: any) {
|
||||
// Final safety net — telemetry must never escape.
|
||||
logError('Telemetry recordTelemetry threw.', { error: e?.message ?? String(e) });
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Read the last `limit` events from the current and prior usage files. Used by
|
||||
* a future Settings panel chart; here so the viewer doesn't have to parse paths
|
||||
* or worry about rotation.
|
||||
*/
|
||||
export function readRecentTelemetry(limit = 500): TelemetryEvent[] {
|
||||
const dir = getAstraDataDir();
|
||||
const files: string[] = [];
|
||||
const head = path.join(dir, 'usage.jsonl');
|
||||
if (fs.existsSync(head)) files.push(head);
|
||||
for (let i = 1; i <= ROTATE_KEEP; i++) {
|
||||
const p = path.join(dir, `usage.${i}.jsonl`);
|
||||
if (fs.existsSync(p)) files.push(p);
|
||||
}
|
||||
const out: TelemetryEvent[] = [];
|
||||
for (const f of files) {
|
||||
try {
|
||||
const raw = fs.readFileSync(f, 'utf8');
|
||||
for (const line of raw.split('\n')) {
|
||||
const trimmed = line.trim();
|
||||
if (!trimmed) continue;
|
||||
try { out.push(JSON.parse(trimmed) as TelemetryEvent); } catch { /* skip bad line */ }
|
||||
}
|
||||
} catch { /* skip unreadable file */ }
|
||||
if (out.length >= limit * 2) break; // rough upper bound to bound work
|
||||
}
|
||||
return out.slice(-limit);
|
||||
}
|
||||
Reference in New Issue
Block a user