refactor: optimize core engine and retrieval logic for v2.80.43

This commit is contained in:
2026-05-13 19:23:57 +09:00
parent c4260466b9
commit 089abf22db
17 changed files with 1311 additions and 88 deletions
+19 -3
View File
@@ -1,8 +1,24 @@
import * as os from 'os';
import { logInfo, logError } from '../utils';
/**
* ActionQueueManager: Manages large-scale tasks by processing them
* with a concurrency limit to prevent resource exhaustion and I/O bottlenecks
* Default concurrency = max(2, cpus - 1). Leaves one core for the VS Code UI
* thread and the extension host, scales up on bigger boxes. Static per-process
* (no dynamic adjustment) — kept simple because the heavy work (LLM calls)
* is gated by `missionId` locks elsewhere, not the action queue.
*/
function defaultConcurrencyLimit(): number {
try {
const cpus = os.cpus()?.length ?? 4;
return Math.max(2, cpus - 1);
} catch {
return 3;
}
}
/**
* ActionQueueManager: Manages large-scale tasks by processing them
* with a concurrency limit to prevent resource exhaustion and I/O bottlenecks
* while maintaining high throughput under maximum load.
*/
export class ActionQueueManager {
@@ -10,7 +26,7 @@ export class ActionQueueManager {
private activeCount: number = 0;
private readonly concurrencyLimit: number;
constructor(concurrencyLimit: number = 3) {
constructor(concurrencyLimit: number = defaultConcurrencyLimit()) {
this.concurrencyLimit = concurrencyLimit;
}
+129
View File
@@ -0,0 +1,129 @@
/**
* ============================================================
* Telemetry — append-only usage events to `.astra/usage.jsonl`
*
* Why local-file telemetry instead of a webview dashboard or remote endpoint:
* - Astra is local-first. No data leaves the machine.
* - JSONL is trivial to inspect manually (`tail`, jq) and trivial to ingest
* into a future webview chart without schema migrations.
* - Append-only means the writer never blocks on history.
*
* Event shape is intentionally flat — top-level scalar fields only, so a future
* dashboard can sum/group/filter without parsing nested structures.
* ============================================================
*/
import * as fs from 'fs';
import * as path from 'path';
import { getAstraDataDir } from './astraPath';
import { logError } from '../utils';
/** Top-level event kinds. Add sparingly — each is a stable contract for the JSONL. */
export type TelemetryEventKind =
| 'turn' // one user-visible chat turn (input → final answer)
| 'continuation' // an auto-continuation round inside a turn
| 'retrieval' // brain + memory retrieval summary
| 'session-end'; // session closed (used to bound aggregation queries)
export interface TelemetryEvent {
kind: TelemetryEventKind;
/** ISO timestamp. Always present so a viewer can plot on a time axis without recomputing. */
ts: string;
/** Wall-clock milliseconds the event took, when applicable. 0 for instantaneous events. */
durationMs?: number;
/** Model identifier the request was bound to, when applicable. */
model?: string;
/** Engine name (lmstudio | ollama), when applicable. */
engine?: string;
/** Input token estimate that went into this event, when applicable. */
inputTokens?: number;
/** Output token estimate produced by this event, when applicable. */
outputTokens?: number;
/** Configured context window for this event, when applicable. */
contextLength?: number;
/** Continuation round index for `kind: 'continuation'`. */
round?: number;
/** Stop reason from the engine, when applicable. */
stopReason?: string;
/** Brain files actually used this turn. */
brainFiles?: number;
/** Memory layers that contributed chunks this turn. */
memoryLayers?: string[];
/** Free-form structured details. Keep small — this lives in the JSONL forever. */
note?: string;
}
const MAX_FILE_BYTES = 5 * 1024 * 1024; // 5 MB → ~25k events worst case
const ROTATE_KEEP = 2; // keep usage.jsonl + usage.1.jsonl
function jsonlPath(): string {
return path.join(getAstraDataDir(), 'usage.jsonl');
}
function rotateIfNeeded(p: string): void {
try {
const stat = fs.statSync(p);
if (stat.size <= MAX_FILE_BYTES) return;
// Shift usage.{N-1}.jsonl → usage.{N}.jsonl, drop the oldest.
for (let i = ROTATE_KEEP; i >= 1; i--) {
const older = path.join(getAstraDataDir(), `usage.${i}.jsonl`);
const newer = i === 1 ? p : path.join(getAstraDataDir(), `usage.${i - 1}.jsonl`);
if (fs.existsSync(newer)) {
if (i === ROTATE_KEEP && fs.existsSync(older)) {
try { fs.unlinkSync(older); } catch { /* non-fatal */ }
}
try { fs.renameSync(newer, older); } catch { /* non-fatal */ }
}
}
} catch {
// File doesn't exist yet — first write will create it.
}
}
/**
* Append one event to the rotating JSONL. Best-effort: failures are logged but
* never thrown, because telemetry must not break a live chat turn.
*/
export function recordTelemetry(event: Omit<TelemetryEvent, 'ts'> & { ts?: string }): void {
try {
const full: TelemetryEvent = { ts: new Date().toISOString(), ...event };
const line = JSON.stringify(full) + '\n';
const p = jsonlPath();
rotateIfNeeded(p);
fs.appendFile(p, line, { encoding: 'utf8' }, (err) => {
if (err) logError('Telemetry append failed.', { error: err.message });
});
} catch (e: any) {
// Final safety net — telemetry must never escape.
logError('Telemetry recordTelemetry threw.', { error: e?.message ?? String(e) });
}
}
/**
* Read the last `limit` events from the current and prior usage files. Used by
* a future Settings panel chart; here so the viewer doesn't have to parse paths
* or worry about rotation.
*/
export function readRecentTelemetry(limit = 500): TelemetryEvent[] {
const dir = getAstraDataDir();
const files: string[] = [];
const head = path.join(dir, 'usage.jsonl');
if (fs.existsSync(head)) files.push(head);
for (let i = 1; i <= ROTATE_KEEP; i++) {
const p = path.join(dir, `usage.${i}.jsonl`);
if (fs.existsSync(p)) files.push(p);
}
const out: TelemetryEvent[] = [];
for (const f of files) {
try {
const raw = fs.readFileSync(f, 'utf8');
for (const line of raw.split('\n')) {
const trimmed = line.trim();
if (!trimmed) continue;
try { out.push(JSON.parse(trimmed) as TelemetryEvent); } catch { /* skip bad line */ }
}
} catch { /* skip unreadable file */ }
if (out.length >= limit * 2) break; // rough upper bound to bound work
}
return out.slice(-limit);
}