release: v2.0.2 - Structural Integrity & Automated Context Management
This commit is contained in:
@@ -27,23 +27,72 @@
|
||||
*/
|
||||
import * as fs from 'fs';
|
||||
import * as path from 'path';
|
||||
import * as crypto from 'crypto';
|
||||
import { logError, logInfo } from '../../utils';
|
||||
import { deepScan, DeepScanResult, ScanCache, RefreshStats } from './scanner';
|
||||
import { renderDirectoryTreeDiagram, renderModuleDependencyDiagram } from './mermaid';
|
||||
|
||||
/** Sub-folder under the project root where the architecture doc lives. */
|
||||
const ARCH_DIR_REL = path.join('.astra', 'project-context');
|
||||
const ARCH_FILE = 'architecture.md';
|
||||
const CACHE_FILE = 'scan-cache.json';
|
||||
|
||||
/** Top-level directories we consider "code" worth listing under Main Modules. */
|
||||
const CODE_DIRS = ['src', 'media', 'core_py', 'lib', 'app', 'apps', 'packages', 'tests'];
|
||||
/**
|
||||
* Resolve the scan-cache path for a given project root. Sits next to the doc
|
||||
* itself so anyone inspecting `.astra/project-context/` can see both the
|
||||
* generated markdown and the per-file fingerprints that feed it.
|
||||
*/
|
||||
function _cachePathFor(projectRoot: string): string {
|
||||
return path.join(projectRoot, ARCH_DIR_REL, CACHE_FILE);
|
||||
}
|
||||
|
||||
/** Files at the project root worth highlighting under "Important Files". */
|
||||
const ROOT_IMPORTANT = [
|
||||
'package.json', 'pnpm-workspace.yaml', 'tsconfig.json',
|
||||
'README.md', 'CHANGELOG.md', 'ARCHITECTURE.md',
|
||||
'pyproject.toml', 'requirements.txt', 'Cargo.toml', 'go.mod',
|
||||
'Dockerfile', 'docker-compose.yml',
|
||||
];
|
||||
function _readScanCache(projectRoot: string): ScanCache | undefined {
|
||||
const p = _cachePathFor(projectRoot);
|
||||
if (!fs.existsSync(p)) return undefined;
|
||||
try {
|
||||
const parsed = JSON.parse(fs.readFileSync(p, 'utf8'));
|
||||
if (parsed && parsed.version === 1 && parsed.files && typeof parsed.files === 'object') {
|
||||
return parsed as ScanCache;
|
||||
}
|
||||
} catch (e: any) {
|
||||
logError('projectArchitecture: cache read failed; starting fresh.', {
|
||||
cachePath: p, error: e?.message ?? String(e),
|
||||
});
|
||||
}
|
||||
return undefined;
|
||||
}
|
||||
|
||||
function _writeScanCache(projectRoot: string, cache: ScanCache): void {
|
||||
const p = _cachePathFor(projectRoot);
|
||||
try {
|
||||
fs.mkdirSync(path.dirname(p), { recursive: true });
|
||||
fs.writeFileSync(p, JSON.stringify(cache, null, 2), 'utf8');
|
||||
} catch (e: any) {
|
||||
logError('projectArchitecture: cache write failed.', {
|
||||
cachePath: p, error: e?.message ?? String(e),
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Normalize an absolute path for display. We don't want `/Volumes/Data/...`
|
||||
* (or any other machine-specific prefix) leaking into the architecture doc or
|
||||
* the prompt — the user works across multiple environments so absolute paths
|
||||
* are noise at best, and outright wrong on the next machine. Anything that
|
||||
* lives inside the workspace becomes workspace-relative; anything else falls
|
||||
* back to just the basename. The function is exported so callers outside this
|
||||
* module can apply the same policy consistently.
|
||||
*/
|
||||
export function toWorkspaceRelative(absPath: string, workspaceRoot?: string): string {
|
||||
if (!absPath) return '';
|
||||
if (workspaceRoot) {
|
||||
const wr = workspaceRoot.replace(/[\\/]+$/, '');
|
||||
const ap = absPath.replace(/\\/g, '/');
|
||||
const wrp = wr.replace(/\\/g, '/');
|
||||
if (ap === wrp) return '.';
|
||||
if (ap.startsWith(`${wrp}/`)) return ap.slice(wrp.length + 1);
|
||||
}
|
||||
return path.basename(absPath);
|
||||
}
|
||||
|
||||
const AUTO_START = '<!-- ASTRA:AUTO-START -->';
|
||||
const AUTO_END = '<!-- ASTRA:AUTO-END -->';
|
||||
@@ -74,125 +123,28 @@ export function architectureDocPathFor(projectRoot: string): string {
|
||||
}
|
||||
|
||||
/**
|
||||
* Scan a project root and return a structured summary. Pure, side-effect free
|
||||
* (apart from reading the file system) so we can unit-test the signature/diff
|
||||
* logic without writing any files.
|
||||
* Backwards-compatible thin wrapper. The watcher / refresh path only needs the
|
||||
* shape-signature to decide whether to re-emit the doc, so we expose `scanProject`
|
||||
* with the legacy shape but delegate to the deep scanner internally.
|
||||
*/
|
||||
export function scanProject(projectRoot: string, projectName?: string): ArchitectureScanResult {
|
||||
const safeRoot = projectRoot && fs.existsSync(projectRoot) ? projectRoot : '';
|
||||
const name = (projectName?.trim()) || (safeRoot ? path.basename(safeRoot) : 'Unknown Project');
|
||||
|
||||
// ── package.json ─────────────────────────────────────────────────────────
|
||||
let description = '';
|
||||
let pkgJson: any = null;
|
||||
const pkgPath = safeRoot ? path.join(safeRoot, 'package.json') : '';
|
||||
if (pkgPath && fs.existsSync(pkgPath)) {
|
||||
try {
|
||||
pkgJson = JSON.parse(fs.readFileSync(pkgPath, 'utf8'));
|
||||
if (typeof pkgJson?.description === 'string') description = pkgJson.description.trim();
|
||||
} catch (e: any) {
|
||||
logError('projectArchitecture: package.json parse failed.', { error: e?.message ?? String(e) });
|
||||
}
|
||||
}
|
||||
|
||||
// ── Runtime / framework fingerprint ─────────────────────────────────────
|
||||
const runtimes: string[] = [];
|
||||
if (safeRoot && fs.existsSync(path.join(safeRoot, 'tsconfig.json'))) runtimes.push('TypeScript');
|
||||
if (pkgJson) {
|
||||
runtimes.push('Node.js');
|
||||
const deps = { ...(pkgJson.dependencies || {}), ...(pkgJson.devDependencies || {}) } as Record<string, string>;
|
||||
if (deps['@types/vscode'] || pkgJson.engines?.vscode) runtimes.push('VS Code Extension');
|
||||
if (deps['react']) runtimes.push('React');
|
||||
if (deps['next']) runtimes.push('Next.js');
|
||||
if (deps['express'] || deps['fastify']) runtimes.push('HTTP server');
|
||||
if (deps['@anthropic-ai/sdk']) runtimes.push('Anthropic SDK');
|
||||
if (deps['openai']) runtimes.push('OpenAI SDK');
|
||||
if (deps['@lmstudio/sdk']) runtimes.push('LM Studio SDK');
|
||||
}
|
||||
if (safeRoot && fs.existsSync(path.join(safeRoot, 'pyproject.toml'))) runtimes.push('Python');
|
||||
if (safeRoot && fs.existsSync(path.join(safeRoot, 'Cargo.toml'))) runtimes.push('Rust');
|
||||
if (safeRoot && fs.existsSync(path.join(safeRoot, 'go.mod'))) runtimes.push('Go');
|
||||
|
||||
// ── Main modules (top-level code directories) ───────────────────────────
|
||||
const mainModules: ArchitectureScanResult['mainModules'] = [];
|
||||
if (safeRoot) {
|
||||
for (const candidate of CODE_DIRS) {
|
||||
const dirAbs = path.join(safeRoot, candidate);
|
||||
if (!_isDir(dirAbs)) continue;
|
||||
const entries = _readDirSafe(dirAbs);
|
||||
const fileCount = entries.filter((e) => _isFileLike(path.join(dirAbs, e))).length;
|
||||
const subDirs = entries.filter((e) => _isDir(path.join(dirAbs, e)));
|
||||
const desc = _describeModule(candidate, fileCount, subDirs);
|
||||
mainModules.push({ dir: candidate, description: desc });
|
||||
}
|
||||
}
|
||||
|
||||
// ── Important files at the root ─────────────────────────────────────────
|
||||
const importantFiles: string[] = [];
|
||||
if (safeRoot) {
|
||||
for (const f of ROOT_IMPORTANT) {
|
||||
if (fs.existsSync(path.join(safeRoot, f))) importantFiles.push(f);
|
||||
}
|
||||
}
|
||||
|
||||
// Signature: hash of the structural inputs only. We do NOT hash file
|
||||
// *contents* — the goal is "did the shape of the project change" so the
|
||||
// watcher doesn't re-render the doc for every keystroke in a TS file.
|
||||
const signature = _hashSignature({
|
||||
name,
|
||||
runtimes,
|
||||
mainModules: mainModules.map((m) => `${m.dir}|${m.description}`),
|
||||
importantFiles,
|
||||
pkgVersion: pkgJson?.version || '',
|
||||
pkgDeps: pkgJson ? Object.keys({ ...(pkgJson.dependencies || {}), ...(pkgJson.devDependencies || {}) }).sort().join(',') : '',
|
||||
});
|
||||
|
||||
const deep = deepScan(projectRoot, projectName);
|
||||
return {
|
||||
projectName: name,
|
||||
projectRoot: safeRoot,
|
||||
description,
|
||||
runtimes,
|
||||
mainModules,
|
||||
importantFiles,
|
||||
signature,
|
||||
projectName: deep.projectName,
|
||||
projectRoot: deep.projectRoot,
|
||||
description: deep.description,
|
||||
runtimes: deep.runtimes,
|
||||
mainModules: deep.topModules.map((m) => ({
|
||||
dir: m.dir,
|
||||
description: `${m.fileCount} files${m.subDirs.length > 0
|
||||
? ` — ${m.subDirs.slice(0, 6).map((s) => s.name).join(', ')}${m.subDirs.length > 6 ? `, +${m.subDirs.length - 6} more` : ''}`
|
||||
: ''}`,
|
||||
})),
|
||||
importantFiles: deep.entryPoints.map((e) => e.rel),
|
||||
signature: deep.signature,
|
||||
};
|
||||
}
|
||||
|
||||
function _describeModule(dir: string, fileCount: number, subDirs: string[]): string {
|
||||
const subSummary = subDirs.length > 0
|
||||
? ` — ${subDirs.slice(0, 6).join(', ')}${subDirs.length > 6 ? `, +${subDirs.length - 6} more` : ''}`
|
||||
: '';
|
||||
const known: Record<string, string> = {
|
||||
src: 'Source code',
|
||||
media: 'Webview assets (HTML/CSS/JS)',
|
||||
core_py: 'Python utilities',
|
||||
tests: 'Test suite',
|
||||
lib: 'Library code',
|
||||
app: 'Application entry',
|
||||
apps: 'Application bundles',
|
||||
packages: 'Monorepo packages',
|
||||
};
|
||||
const label = known[dir] || 'Module';
|
||||
return `${label} (${fileCount} files${subSummary})`;
|
||||
}
|
||||
|
||||
function _isDir(p: string): boolean {
|
||||
try { return fs.statSync(p).isDirectory(); } catch { return false; }
|
||||
}
|
||||
function _isFileLike(p: string): boolean {
|
||||
try { return fs.statSync(p).isFile(); } catch { return false; }
|
||||
}
|
||||
function _readDirSafe(p: string): string[] {
|
||||
try {
|
||||
// Skip hidden + heavy noise dirs so the listing reads usefully.
|
||||
return fs.readdirSync(p).filter((e) => !e.startsWith('.') && e !== 'node_modules' && e !== 'out' && e !== 'dist' && e !== '__pycache__');
|
||||
} catch { return []; }
|
||||
}
|
||||
|
||||
function _hashSignature(obj: unknown): string {
|
||||
return crypto.createHash('sha1').update(JSON.stringify(obj)).digest('hex').slice(0, 16);
|
||||
}
|
||||
|
||||
/**
|
||||
* Build or refresh the architecture doc. Idempotent:
|
||||
* • If the file doesn't exist: scaffold full doc with auto + user-owned blocks.
|
||||
@@ -203,7 +155,14 @@ export function buildOrRefreshArchitectureDoc(
|
||||
projectName?: string,
|
||||
nowIso: string = new Date().toISOString()
|
||||
): BuildResult {
|
||||
const scan = scanProject(projectRoot, projectName);
|
||||
// Incremental scan: feed the previous per-file cache so unchanged files
|
||||
// are reused instead of re-parsed. The cache lives alongside the doc and
|
||||
// is rewritten at the end of every successful refresh.
|
||||
const prevCache = _readScanCache(projectRoot);
|
||||
const deep = deepScan(projectRoot, projectName, prevCache);
|
||||
_writeScanCache(projectRoot, deep.newCache);
|
||||
|
||||
const scan = scanProject(projectRoot, projectName); // shape-only wrapper for callers
|
||||
const docPath = architectureDocPathFor(projectRoot);
|
||||
const docDir = path.dirname(docPath);
|
||||
try {
|
||||
@@ -212,12 +171,16 @@ export function buildOrRefreshArchitectureDoc(
|
||||
logError('projectArchitecture: mkdir failed.', { docDir, error: e?.message ?? String(e) });
|
||||
}
|
||||
|
||||
const autoBlock = _renderAutoBlock(scan, nowIso);
|
||||
const autoBlock = _renderAutoBlock(deep, nowIso);
|
||||
|
||||
if (!fs.existsSync(docPath)) {
|
||||
const full = _renderFullDoc(scan, autoBlock);
|
||||
const full = _renderFullDoc(deep, autoBlock);
|
||||
fs.writeFileSync(docPath, full, 'utf8');
|
||||
logInfo('projectArchitecture: created.', { docPath, signature: scan.signature });
|
||||
logInfo('projectArchitecture: created.', {
|
||||
docPath, signature: deep.signature, files: deep.totalFiles,
|
||||
newlyAnalyzed: deep.refreshStats.newlyAnalyzed,
|
||||
cached: deep.refreshStats.cached,
|
||||
});
|
||||
return { docPath, created: true, scan };
|
||||
}
|
||||
|
||||
@@ -226,52 +189,182 @@ export function buildOrRefreshArchitectureDoc(
|
||||
const replaced = _replaceAutoBlock(existing, autoBlock);
|
||||
if (replaced !== existing) {
|
||||
fs.writeFileSync(docPath, replaced, 'utf8');
|
||||
logInfo('projectArchitecture: refreshed.', { docPath, signature: scan.signature });
|
||||
logInfo('projectArchitecture: refreshed.', {
|
||||
docPath, signature: deep.signature, files: deep.totalFiles,
|
||||
newlyAnalyzed: deep.refreshStats.newlyAnalyzed,
|
||||
cached: deep.refreshStats.cached,
|
||||
deleted: deep.refreshStats.deleted.length,
|
||||
});
|
||||
}
|
||||
return { docPath, created: false, scan };
|
||||
}
|
||||
|
||||
function _renderAutoBlock(scan: ArchitectureScanResult, nowIso: string): string {
|
||||
const modules = scan.mainModules.length > 0
|
||||
? scan.mainModules.map((m) => `- \`${m.dir}/\` — ${m.description}`).join('\n')
|
||||
: '_(no top-level code directories detected)_';
|
||||
const importantFiles = scan.importantFiles.length > 0
|
||||
? scan.importantFiles.map((f) => `- \`${f}\``).join('\n')
|
||||
: '_(none detected)_';
|
||||
const runtimes = scan.runtimes.length > 0 ? scan.runtimes.join(', ') : '_(unknown)_';
|
||||
return [
|
||||
AUTO_START,
|
||||
'## Project Name',
|
||||
scan.projectName,
|
||||
'',
|
||||
'## Project Root',
|
||||
scan.projectRoot || '_(not set)_',
|
||||
'',
|
||||
'## Description',
|
||||
scan.description || '_(no package.json description)_',
|
||||
'',
|
||||
'## Runtime / Stack',
|
||||
runtimes,
|
||||
'',
|
||||
'## Main Modules',
|
||||
modules,
|
||||
'',
|
||||
'## Important Files',
|
||||
importantFiles,
|
||||
'',
|
||||
`_Last auto-scan: ${nowIso}_`,
|
||||
AUTO_END,
|
||||
].join('\n');
|
||||
/**
|
||||
* Render the auto-managed block. This is everything between
|
||||
* `<!-- ASTRA:AUTO-START -->` and `<!-- ASTRA:AUTO-END -->` — overwritten on
|
||||
* every refresh. The sections are kept compact (one line per file when
|
||||
* possible) so the doc remains scannable; section headings use deterministic
|
||||
* `##` levels so prompt-time truncation can prioritise correctly.
|
||||
*/
|
||||
function _renderAutoBlock(deep: DeepScanResult, nowIso: string): string {
|
||||
const lines: string[] = [AUTO_START, ''];
|
||||
|
||||
// ── Snapshot ----------------------------------------------------------------
|
||||
// Note: we deliberately do *not* emit the absolute project root here. The
|
||||
// user works across multiple machines so a hardcoded macOS path is wrong
|
||||
// on Linux/Windows and noisy everywhere else. The workspace name is
|
||||
// sufficient — VS Code resolves the actual root at runtime.
|
||||
lines.push('## Snapshot');
|
||||
lines.push(`- **Workspace**: \`${deep.projectName}\`${deep.version ? ` \`v${deep.version}\`` : ''} _(absolute path varies by environment; resolved from the active VS Code workspace)_`);
|
||||
if (deep.description) lines.push(`- **Description**: ${deep.description}`);
|
||||
lines.push(`- **Stack**: ${deep.runtimes.length ? deep.runtimes.join(', ') : '_(unknown)_'}`);
|
||||
lines.push(`- **Stats**: ${deep.totalFiles} source files, ~${deep.totalLines.toLocaleString()} lines across ${deep.topModules.length} top-level modules.`);
|
||||
lines.push('');
|
||||
|
||||
// ── Refresh stats ----------------------------------------------------------
|
||||
// Surfaces what the most recent refresh actually did — useful to confirm
|
||||
// that incremental cache reuse is working as expected and to spot deletions.
|
||||
const r = deep.refreshStats;
|
||||
lines.push('## Last Refresh');
|
||||
lines.push(`- **Time**: ${nowIso}`);
|
||||
lines.push(`- **Files newly analysed**: ${r.newlyAnalyzed}`);
|
||||
lines.push(`- **Files reused from cache**: ${r.cached}`);
|
||||
if (r.deleted.length > 0) {
|
||||
const shown = r.deleted.slice(0, 10);
|
||||
const more = r.deleted.length - shown.length;
|
||||
lines.push(`- **Files deleted since last refresh** (${r.deleted.length}):`);
|
||||
for (const d of shown) lines.push(` - \`${d}\``);
|
||||
if (more > 0) lines.push(` - _…and ${more} more_`);
|
||||
}
|
||||
lines.push('');
|
||||
|
||||
// ── Directory mindmap ------------------------------------------------------
|
||||
const treeDiagram = renderDirectoryTreeDiagram(deep);
|
||||
if (treeDiagram) {
|
||||
lines.push('## Directory Map');
|
||||
lines.push(treeDiagram);
|
||||
lines.push('');
|
||||
}
|
||||
|
||||
// ── Module dependency flowchart -------------------------------------------
|
||||
const depDiagram = renderModuleDependencyDiagram(deep);
|
||||
if (depDiagram && deep.topModules.some((m) => m.dependsOn.length > 0)) {
|
||||
lines.push('## Module Dependencies');
|
||||
lines.push('> Arrows: which top-level module imports from which.');
|
||||
lines.push(depDiagram);
|
||||
lines.push('');
|
||||
}
|
||||
|
||||
// ── Entry points ----------------------------------------------------------
|
||||
if (deep.entryPoints.length > 0) {
|
||||
lines.push('## Entry Points');
|
||||
lines.push('> Files to read first when learning the codebase.');
|
||||
for (const ep of deep.entryPoints) {
|
||||
lines.push(`- \`${ep.rel}\`${ep.role ? ` — ${ep.role}` : ''}`);
|
||||
}
|
||||
lines.push('');
|
||||
}
|
||||
|
||||
// ── Hub files -------------------------------------------------------------
|
||||
if (deep.hubs.length > 0) {
|
||||
lines.push('## Hub Files');
|
||||
lines.push('> Imported by many other files — touching these has wide blast radius.');
|
||||
for (const h of deep.hubs) {
|
||||
lines.push(`- \`${h.rel}\` — referenced by **${h.refsIn}** files${h.role ? ` · ${h.role}` : ''}`);
|
||||
}
|
||||
lines.push('');
|
||||
}
|
||||
|
||||
// ── Per-module detail ------------------------------------------------------
|
||||
if (deep.topModules.length > 0) {
|
||||
lines.push('## Modules');
|
||||
for (const mod of deep.topModules) {
|
||||
lines.push('');
|
||||
lines.push(`### \`${mod.dir}/\` — ${mod.fileCount} files, ~${mod.totalLines.toLocaleString()} lines`);
|
||||
if (mod.dependsOn.length > 0) {
|
||||
lines.push(`*Depends on*: ${mod.dependsOn.map((d) => `\`${d}/\``).join(', ')}`);
|
||||
}
|
||||
if (mod.subDirs.length > 0) {
|
||||
lines.push('');
|
||||
lines.push('**Sub-directories**');
|
||||
for (const sub of mod.subDirs) {
|
||||
const desc = sub.description ? ` — ${sub.description}` : '';
|
||||
lines.push(`- \`${mod.dir}/${sub.name}/\` (${sub.fileCount})${desc}`);
|
||||
}
|
||||
}
|
||||
if (mod.files.length > 0) {
|
||||
lines.push('');
|
||||
lines.push('**Key files**');
|
||||
for (const f of mod.files) {
|
||||
const role = f.role ? ` — ${f.role}` : '';
|
||||
lines.push(`- \`${f.rel}\` (${f.lines} lines)${role}`);
|
||||
}
|
||||
}
|
||||
}
|
||||
lines.push('');
|
||||
}
|
||||
|
||||
// ── VS Code surface --------------------------------------------------------
|
||||
if (deep.vsCode && (deep.vsCode.commands.length || deep.vsCode.configurationProperties.length || deep.vsCode.activationEvents.length)) {
|
||||
lines.push('## VS Code Extension Surface');
|
||||
if (deep.vsCode.extensionId) lines.push(`- **Extension ID**: \`${deep.vsCode.extensionId}\``);
|
||||
if (deep.vsCode.activationEvents.length) {
|
||||
lines.push(`- **Activation events**: ${deep.vsCode.activationEvents.map((e) => `\`${e}\``).join(', ')}`);
|
||||
}
|
||||
if (deep.vsCode.commands.length) {
|
||||
lines.push(`- **Commands** (${deep.vsCode.commands.length}):`);
|
||||
for (const c of deep.vsCode.commands.slice(0, 60)) {
|
||||
lines.push(` - \`${c.command}\`${c.title ? ` — ${c.title}` : ''}`);
|
||||
}
|
||||
if (deep.vsCode.commands.length > 60) {
|
||||
lines.push(` - _…and ${deep.vsCode.commands.length - 60} more_`);
|
||||
}
|
||||
}
|
||||
if (deep.vsCode.configurationProperties.length) {
|
||||
lines.push(`- **Configuration** (${deep.vsCode.configurationProperties.length} settings):`);
|
||||
for (const c of deep.vsCode.configurationProperties.slice(0, 60)) {
|
||||
const def = c.default === undefined ? '' : ` _(default: \`${JSON.stringify(c.default)}\`)_`;
|
||||
lines.push(` - \`${c.key}\` *(${c.type})*${def}${c.description ? ` — ${c.description}` : ''}`);
|
||||
}
|
||||
if (deep.vsCode.configurationProperties.length > 60) {
|
||||
lines.push(` - _…and ${deep.vsCode.configurationProperties.length - 60} more_`);
|
||||
}
|
||||
}
|
||||
lines.push('');
|
||||
}
|
||||
|
||||
// ── Dependencies -----------------------------------------------------------
|
||||
if (deep.deps.total > 0) {
|
||||
lines.push('## Dependencies');
|
||||
lines.push(`- **Runtime** (${deep.deps.runtime.length}): ${deep.deps.runtime.length ? deep.deps.runtime.map((d) => `\`${d}\``).join(', ') : '_(none)_'}`);
|
||||
if (deep.deps.dev.length > 0) {
|
||||
lines.push(`- **Dev** (${deep.deps.dev.length}): ${deep.deps.dev.map((d) => `\`${d}\``).join(', ')}`);
|
||||
}
|
||||
lines.push('');
|
||||
}
|
||||
|
||||
// ── README excerpt ---------------------------------------------------------
|
||||
if (deep.readmeExcerpt) {
|
||||
lines.push('## README Excerpt');
|
||||
lines.push('> Pulled from the project root README — first ~2 KB.');
|
||||
lines.push('');
|
||||
lines.push(deep.readmeExcerpt.trim());
|
||||
lines.push('');
|
||||
}
|
||||
|
||||
lines.push(`_Last auto-scan: ${nowIso} · signature \`${deep.signature}\`_`);
|
||||
lines.push(AUTO_END);
|
||||
return lines.join('\n');
|
||||
}
|
||||
|
||||
function _renderFullDoc(scan: ArchitectureScanResult, autoBlock: string): string {
|
||||
function _renderFullDoc(deep: DeepScanResult, autoBlock: string): string {
|
||||
// User-owned sections start as placeholders so first-time activation gives
|
||||
// the user a clear "fill these in" surface without confusing the model.
|
||||
return [
|
||||
`# ${scan.projectName} — Project Architecture Context`,
|
||||
`# ${deep.projectName} — Project Architecture Context`,
|
||||
'',
|
||||
'> Auto-managed sections (between the AUTO markers) are rewritten by Astra on every refresh.',
|
||||
'> The rest is yours — Astra never touches it once this file exists.',
|
||||
'> The rest below is yours — Astra never touches it once this file exists.',
|
||||
'',
|
||||
autoBlock,
|
||||
'',
|
||||
@@ -314,7 +407,7 @@ function _replaceAutoBlock(existing: string, autoBlock: string): string {
|
||||
* Purpose, Main Modules, Key Workflows, Current Constraints, Known Risks,
|
||||
* Active Decisions — and drop the long auto-listing of files first.
|
||||
*/
|
||||
export function readArchitectureForPrompt(docPath: string, maxChars: number = 8000): string {
|
||||
export function readArchitectureForPrompt(docPath: string, maxChars: number = 16000): string {
|
||||
if (!docPath || !fs.existsSync(docPath)) return '';
|
||||
let raw: string;
|
||||
try {
|
||||
@@ -326,21 +419,29 @@ export function readArchitectureForPrompt(docPath: string, maxChars: number = 80
|
||||
if (raw.length <= maxChars) return raw;
|
||||
|
||||
// Section-aware trim: parse `## ` headers, prioritise the high-signal
|
||||
// sections, drop the rest until we fit. Important Files is the longest
|
||||
// auto section so it gets dropped first.
|
||||
// sections, drop the rest until we fit. The verbose listings (per-module
|
||||
// file enumerations, full dependency tables, README excerpt) are the
|
||||
// first to go — they're useful when present but rarely changes the model's
|
||||
// structural understanding of the project.
|
||||
const sections = _splitSections(raw);
|
||||
const priority = [
|
||||
// User-owned, irreplaceable.
|
||||
'Purpose',
|
||||
'Project Name',
|
||||
'Description',
|
||||
'Active Decisions',
|
||||
'Current Constraints',
|
||||
'Known Risks',
|
||||
'Key Workflows',
|
||||
'Main Modules',
|
||||
'Runtime / Stack',
|
||||
'Project Root',
|
||||
'Important Files', // drop first
|
||||
// Auto-managed high-signal.
|
||||
'Snapshot',
|
||||
'Entry Points',
|
||||
'Hub Files',
|
||||
'Module Dependencies',
|
||||
'Directory Map',
|
||||
'VS Code Extension Surface',
|
||||
// Auto-managed long tail (dropped first).
|
||||
'Modules',
|
||||
'Dependencies',
|
||||
'README Excerpt',
|
||||
];
|
||||
sections.sort((a, b) => {
|
||||
const ai = priority.indexOf(a.title); const bi = priority.indexOf(b.title);
|
||||
@@ -390,15 +491,21 @@ function _splitSections(raw: string): { title: string; body: string }[] {
|
||||
export function formatArchitectureContextForPrompt(opts: {
|
||||
projectName: string;
|
||||
docPath: string;
|
||||
/** When provided, `Source:` is emitted as a workspace-relative path. */
|
||||
projectRoot?: string;
|
||||
lastUpdated?: string;
|
||||
maxChars?: number;
|
||||
}): string {
|
||||
const content = readArchitectureForPrompt(opts.docPath, opts.maxChars ?? 8000);
|
||||
const content = readArchitectureForPrompt(opts.docPath, opts.maxChars ?? 16000);
|
||||
if (!content) return '';
|
||||
const stamp = opts.lastUpdated ? `\nLast updated: ${opts.lastUpdated}` : '';
|
||||
// Surface the doc location as a workspace-relative path so the same prompt
|
||||
// works regardless of which machine the user is on. The doc lives at
|
||||
// `.astra/project-context/architecture.md` inside the workspace by design.
|
||||
const sourceDisplay = toWorkspaceRelative(opts.docPath, opts.projectRoot);
|
||||
return [
|
||||
'[ACTIVE PROJECT ARCHITECTURE CONTEXT]',
|
||||
`Source: ${opts.docPath}`,
|
||||
`Source: ${sourceDisplay}`,
|
||||
`Project: ${opts.projectName}${stamp}`,
|
||||
'Use this as authoritative ground truth about the project structure, constraints, and active decisions. Do not contradict it without flagging the conflict.',
|
||||
'---',
|
||||
|
||||
@@ -0,0 +1,69 @@
|
||||
/**
|
||||
* Mermaid diagram renderers for the architecture doc.
|
||||
*
|
||||
* Two diagrams: a directory mindmap so the user can see the *shape* of the
|
||||
* project at a glance, and a flowchart that shows which top-level module
|
||||
* imports from which. Both are small enough to fit in a single screenful so
|
||||
* they remain useful inside the LLM context window — large mermaid blobs
|
||||
* confuse smaller models, so we intentionally cap edges and nodes.
|
||||
*/
|
||||
import { DeepScanResult } from './scanner';
|
||||
|
||||
/**
|
||||
* Mindmap of the top-level directory layout. We render one branch per scanned
|
||||
* module and at most 6 sub-directories per module so the picture stays
|
||||
* scannable. Names that would otherwise collide with mermaid keywords are
|
||||
* defensively quoted.
|
||||
*/
|
||||
export function renderDirectoryTreeDiagram(scan: DeepScanResult): string {
|
||||
if (scan.topModules.length === 0) return '';
|
||||
const lines: string[] = ['```mermaid', 'mindmap', ` root((${_safeId(scan.projectName)}))`];
|
||||
for (const mod of scan.topModules) {
|
||||
lines.push(` ${_safeId(mod.dir)}/`);
|
||||
for (const sub of mod.subDirs.slice(0, 6)) {
|
||||
lines.push(` ${_safeId(sub.name)}/`);
|
||||
}
|
||||
}
|
||||
lines.push('```');
|
||||
return lines.join('\n');
|
||||
}
|
||||
|
||||
/**
|
||||
* Module-level dependency flowchart. Edges are aggregated across all files in
|
||||
* each top-level module so the result reads as "src/agents → src/core" rather
|
||||
* than per-file noise. Edges are deduped and unidirectional.
|
||||
*/
|
||||
export function renderModuleDependencyDiagram(scan: DeepScanResult): string {
|
||||
if (scan.topModules.length === 0) return '';
|
||||
const lines: string[] = ['```mermaid', 'flowchart LR'];
|
||||
// Declare nodes first so layout is stable even when a module has no deps.
|
||||
for (const mod of scan.topModules) {
|
||||
lines.push(` ${_id(mod.dir)}["${_label(mod.dir, mod.fileCount)}"]`);
|
||||
}
|
||||
// Emit edges. Skip self-edges (already implicit) and dedupe.
|
||||
const seen = new Set<string>();
|
||||
for (const mod of scan.topModules) {
|
||||
for (const dep of mod.dependsOn) {
|
||||
if (dep === mod.dir) continue;
|
||||
const key = `${mod.dir}>${dep}`;
|
||||
if (seen.has(key)) continue;
|
||||
seen.add(key);
|
||||
lines.push(` ${_id(mod.dir)} --> ${_id(dep)}`);
|
||||
}
|
||||
}
|
||||
lines.push('```');
|
||||
return lines.join('\n');
|
||||
}
|
||||
|
||||
function _id(dir: string): string {
|
||||
return dir.replace(/[^A-Za-z0-9_]/g, '_');
|
||||
}
|
||||
|
||||
function _label(dir: string, fileCount: number): string {
|
||||
return `${dir}/<br/>${fileCount} files`;
|
||||
}
|
||||
|
||||
function _safeId(s: string): string {
|
||||
// Strip characters mermaid mindmap can interpret as syntax.
|
||||
return s.replace(/[()[\]{}|]/g, '').replace(/\s+/g, '_').slice(0, 40);
|
||||
}
|
||||
@@ -0,0 +1,644 @@
|
||||
/**
|
||||
* Deep static analyser for the Project Architecture Context generator.
|
||||
*
|
||||
* Walks the project tree (skipping the usual `node_modules` / `out` / `dist`
|
||||
* noise), pulls the *role* of each interesting file from its leading
|
||||
* JSDoc / docstring / H1, parses imports to build a directory-level
|
||||
* dependency graph, and inspects `package.json` for the VS Code extension
|
||||
* surface (commands, settings, activation events).
|
||||
*
|
||||
* Pure-ish — only file-system reads, no shell-outs, no LLM calls. A full scan
|
||||
* over a project on the order of ConnectAI (~hundreds of source files)
|
||||
* finishes in the low double-digit milliseconds, so the watcher can call this
|
||||
* after every debounce window without warming up a fan.
|
||||
*/
|
||||
import * as fs from 'fs';
|
||||
import * as path from 'path';
|
||||
|
||||
/** Top-level directories we *always* recurse into. Everything else is ignored. */
|
||||
const SCAN_ROOTS = ['src', 'media', 'tests', 'core_py', 'lib', 'app', 'apps', 'packages', 'docs'];
|
||||
|
||||
/** Directory names we never descend into, no matter where they appear. */
|
||||
const SKIP_DIRS = new Set([
|
||||
'node_modules', 'out', 'dist', 'build', '.git', '.next', '.cache',
|
||||
'__pycache__', '.pytest_cache', 'coverage', '.turbo', '.vercel',
|
||||
'.astra', // our own scratch dir, would be self-referential
|
||||
]);
|
||||
|
||||
/** Files we treat as code worth annotating. Extension drives the parser used. */
|
||||
const CODE_EXTS = new Set(['.ts', '.tsx', '.js', '.jsx', '.mjs', '.cjs', '.py', '.md', '.json', '.html', '.css']);
|
||||
|
||||
/** Per-file analyser caps. Keeps a runaway file (huge generated source) cheap. */
|
||||
const READ_BYTE_CAP = 64 * 1024;
|
||||
const ROLE_MAX_LEN = 200;
|
||||
|
||||
export interface FileSummary {
|
||||
/** Project-root-relative path. */
|
||||
rel: string;
|
||||
/** Approximate line count (full file, but cheap because we read once). */
|
||||
lines: number;
|
||||
/** Short human description: leading JSDoc / docstring / first H1, trimmed. */
|
||||
role: string;
|
||||
/** Internal imports (project-relative paths the file references). */
|
||||
imports: string[];
|
||||
}
|
||||
|
||||
/**
|
||||
* Persisted cache so refreshes only re-read files whose mtime/size actually
|
||||
* changed. Lives at `.astra/project-context/scan-cache.json`; safe to delete —
|
||||
* the next refresh will rebuild it from scratch.
|
||||
*/
|
||||
export interface ScanCache {
|
||||
version: 1;
|
||||
generatedAt: string;
|
||||
files: Record<string, CachedFile>;
|
||||
}
|
||||
|
||||
export interface CachedFile {
|
||||
/** File modification time in milliseconds since epoch. */
|
||||
mtimeMs: number;
|
||||
/** File size in bytes (cheap second-line defence against mtime collisions). */
|
||||
size: number;
|
||||
/** Approximate line count from the last full read. */
|
||||
lines: number;
|
||||
/** Extracted role string. */
|
||||
role: string;
|
||||
/** Resolved internal imports. */
|
||||
imports: string[];
|
||||
}
|
||||
|
||||
/** Summary of work done by the last `deepScan` call — surfaced in the doc footer. */
|
||||
export interface RefreshStats {
|
||||
/** Files that were freshly read and parsed this run. */
|
||||
newlyAnalyzed: number;
|
||||
/** Files reused from cache because mtime + size matched. */
|
||||
cached: number;
|
||||
/** Project-relative paths that disappeared since the previous scan. */
|
||||
deleted: string[];
|
||||
}
|
||||
|
||||
export interface ModuleSummary {
|
||||
/** Top-level directory name, e.g. "src". */
|
||||
dir: string;
|
||||
/** Total files (including nested). */
|
||||
fileCount: number;
|
||||
/** Total line count across all code files in this module. */
|
||||
totalLines: number;
|
||||
/** Most informative files: hubs (high in-degree) and entry points come first. */
|
||||
files: FileSummary[];
|
||||
/** Immediate sub-directory summaries one level deeper. */
|
||||
subDirs: { name: string; fileCount: number; description: string }[];
|
||||
/** Other top-level modules this one imports from (dir-level edges). */
|
||||
dependsOn: string[];
|
||||
}
|
||||
|
||||
export interface VsCodeContribution {
|
||||
commands: { command: string; title: string }[];
|
||||
configurationProperties: { key: string; type: string; default: unknown; description: string }[];
|
||||
activationEvents: string[];
|
||||
extensionId?: string;
|
||||
}
|
||||
|
||||
export interface DeepScanResult {
|
||||
projectName: string;
|
||||
projectRoot: string;
|
||||
description: string;
|
||||
version: string;
|
||||
runtimes: string[];
|
||||
/** Root README first ~2000 chars, useful as "what is this project". */
|
||||
readmeExcerpt: string;
|
||||
/** Categorised npm dependencies. */
|
||||
deps: { runtime: string[]; dev: string[]; total: number };
|
||||
/** Common-knowledge entry files we detected. */
|
||||
entryPoints: { rel: string; role: string }[];
|
||||
/** Files referenced by many others — useful for "where do I start?". */
|
||||
hubs: { rel: string; refsIn: number; role: string }[];
|
||||
topModules: ModuleSummary[];
|
||||
vsCode?: VsCodeContribution;
|
||||
/** Total source files across all SCAN_ROOTS. */
|
||||
totalFiles: number;
|
||||
/** Total lines of code across all source files. */
|
||||
totalLines: number;
|
||||
/** Cheap structural-shape hash — used by the watcher to skip no-op refreshes. */
|
||||
signature: string;
|
||||
/** Per-file cache to persist for the next incremental refresh. */
|
||||
newCache: ScanCache;
|
||||
/** What this scan actually did vs. reusing cache. */
|
||||
refreshStats: RefreshStats;
|
||||
}
|
||||
|
||||
/**
|
||||
* Run the deep scan. All paths in the result are project-relative so the doc
|
||||
* stays portable across machines.
|
||||
*
|
||||
* When `prevCache` is provided, files whose mtime *and* size are unchanged are
|
||||
* reused from the cache instead of being re-read — this is what makes the
|
||||
* "Refresh" button incremental on large projects. Files missing from the cache
|
||||
* are analysed fresh; files in the cache but missing from disk are reported in
|
||||
* `refreshStats.deleted`.
|
||||
*/
|
||||
export function deepScan(projectRoot: string, projectName?: string, prevCache?: ScanCache): DeepScanResult {
|
||||
const safeRoot = projectRoot && fs.existsSync(projectRoot) ? projectRoot : '';
|
||||
const stats: RefreshStats = { newlyAnalyzed: 0, cached: 0, deleted: [] };
|
||||
// Track which previously-cached files we saw this pass. Anything left over
|
||||
// at the end was deleted between runs.
|
||||
const prevSeen = new Set<string>();
|
||||
const newCacheFiles: Record<string, CachedFile> = {};
|
||||
|
||||
// ── package.json + manifest -------------------------------------------------
|
||||
let pkg: any = null;
|
||||
if (safeRoot && fs.existsSync(path.join(safeRoot, 'package.json'))) {
|
||||
try { pkg = JSON.parse(fs.readFileSync(path.join(safeRoot, 'package.json'), 'utf8')); } catch { /* ignore */ }
|
||||
}
|
||||
const name = (projectName?.trim()) || pkg?.displayName || pkg?.name || (safeRoot ? path.basename(safeRoot) : 'Unknown Project');
|
||||
const description = (pkg?.description || '').trim();
|
||||
const version = (pkg?.version || '').trim();
|
||||
|
||||
// ── Runtimes ---------------------------------------------------------------
|
||||
const runtimes: string[] = [];
|
||||
if (safeRoot && fs.existsSync(path.join(safeRoot, 'tsconfig.json'))) runtimes.push('TypeScript');
|
||||
if (pkg) {
|
||||
runtimes.push('Node.js');
|
||||
const allDeps = { ...(pkg.dependencies || {}), ...(pkg.devDependencies || {}) } as Record<string, string>;
|
||||
if (allDeps['@types/vscode'] || pkg.engines?.vscode) runtimes.push('VS Code Extension');
|
||||
if (allDeps['react']) runtimes.push('React');
|
||||
if (allDeps['next']) runtimes.push('Next.js');
|
||||
if (allDeps['express'] || allDeps['fastify'] || allDeps['hono']) runtimes.push('HTTP server');
|
||||
if (allDeps['@anthropic-ai/sdk']) runtimes.push('Anthropic SDK');
|
||||
if (allDeps['openai']) runtimes.push('OpenAI SDK');
|
||||
if (allDeps['@lmstudio/sdk']) runtimes.push('LM Studio SDK');
|
||||
if (allDeps['ollama']) runtimes.push('Ollama SDK');
|
||||
if (allDeps['jest'] || allDeps['vitest'] || allDeps['mocha']) runtimes.push('Test runner');
|
||||
}
|
||||
if (safeRoot && fs.existsSync(path.join(safeRoot, 'pyproject.toml'))) runtimes.push('Python');
|
||||
if (safeRoot && fs.existsSync(path.join(safeRoot, 'Cargo.toml'))) runtimes.push('Rust');
|
||||
if (safeRoot && fs.existsSync(path.join(safeRoot, 'go.mod'))) runtimes.push('Go');
|
||||
|
||||
// ── README excerpt ---------------------------------------------------------
|
||||
let readmeExcerpt = '';
|
||||
if (safeRoot) {
|
||||
for (const candidate of ['README.md', 'README.MD', 'Readme.md', 'readme.md']) {
|
||||
const p = path.join(safeRoot, candidate);
|
||||
if (fs.existsSync(p)) {
|
||||
try {
|
||||
const raw = fs.readFileSync(p, 'utf8');
|
||||
readmeExcerpt = raw.length > 2000 ? raw.slice(0, 2000) + '\n…(truncated)' : raw;
|
||||
} catch { /* ignore */ }
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// ── Dependency listing ----------------------------------------------------
|
||||
const runtimeDeps = pkg?.dependencies ? Object.keys(pkg.dependencies).sort() : [];
|
||||
const devDeps = pkg?.devDependencies ? Object.keys(pkg.devDependencies).sort() : [];
|
||||
|
||||
// ── Walk the file system --------------------------------------------------
|
||||
const collected: FileSummary[] = [];
|
||||
const modules: ModuleSummary[] = [];
|
||||
let totalLines = 0;
|
||||
|
||||
if (safeRoot) {
|
||||
for (const dir of SCAN_ROOTS) {
|
||||
const abs = path.join(safeRoot, dir);
|
||||
if (!_isDir(abs)) continue;
|
||||
const moduleFiles: FileSummary[] = [];
|
||||
const subDirAgg = new Map<string, number>();
|
||||
_walk(abs, safeRoot, /*depth*/ 0, moduleFiles, subDirAgg,
|
||||
/*immediateSubDir*/ undefined,
|
||||
/*cacheCtx*/ { prevCache, prevSeen, newCacheFiles, stats });
|
||||
const totalLinesInMod = moduleFiles.reduce((acc, f) => acc + f.lines, 0);
|
||||
totalLines += totalLinesInMod;
|
||||
const subDirs = Array.from(subDirAgg.entries())
|
||||
.sort((a, b) => b[1] - a[1])
|
||||
.slice(0, 12)
|
||||
.map(([sub, count]) => ({
|
||||
name: sub,
|
||||
fileCount: count,
|
||||
description: _describeSubDir(sub, moduleFiles, dir),
|
||||
}));
|
||||
|
||||
// dependsOn: aggregate imports out of this module to other SCAN_ROOTS.
|
||||
const depSet = new Set<string>();
|
||||
for (const f of moduleFiles) {
|
||||
for (const imp of f.imports) {
|
||||
const head = imp.split('/')[0];
|
||||
if (SCAN_ROOTS.includes(head) && head !== dir) depSet.add(head);
|
||||
}
|
||||
}
|
||||
|
||||
// Sort files by "interest": hub-ness (others refs) descending,
|
||||
// then size. Capped so the final doc stays readable.
|
||||
const topFiles = moduleFiles
|
||||
.map((f) => ({ f, score: _interestScore(f, moduleFiles) }))
|
||||
.sort((a, b) => b.score - a.score)
|
||||
.slice(0, 25)
|
||||
.map((x) => x.f);
|
||||
|
||||
modules.push({
|
||||
dir,
|
||||
fileCount: moduleFiles.length,
|
||||
totalLines: totalLinesInMod,
|
||||
files: topFiles,
|
||||
subDirs,
|
||||
dependsOn: Array.from(depSet).sort(),
|
||||
});
|
||||
collected.push(...moduleFiles);
|
||||
}
|
||||
}
|
||||
|
||||
// ── Hubs (files imported by many others) ----------------------------------
|
||||
const refCount = new Map<string, number>();
|
||||
for (const f of collected) {
|
||||
for (const imp of f.imports) {
|
||||
refCount.set(imp, (refCount.get(imp) ?? 0) + 1);
|
||||
}
|
||||
}
|
||||
// Hub keys come from `_resolveRelImport`, which strips the file extension
|
||||
// (e.g. `src/utils`). Real files live with extensions (`src/utils.ts`) or
|
||||
// as folder/index pairs (`src/foo/index.ts`). Try a few suffixes when
|
||||
// looking up the role so the displayed hub list is annotated, not bare.
|
||||
const hubLookupSuffixes = ['.ts', '.tsx', '.js', '.jsx', '/index.ts', '/index.tsx', '/index.js'];
|
||||
const findFileForKey = (key: string): FileSummary | undefined =>
|
||||
collected.find((f) => f.rel === key)
|
||||
?? hubLookupSuffixes.map((suf) => collected.find((f) => f.rel === `${key}${suf}`)).find(Boolean);
|
||||
const hubs = Array.from(refCount.entries())
|
||||
.map(([rel, count]) => {
|
||||
const hit = findFileForKey(rel);
|
||||
return {
|
||||
rel: hit?.rel ?? rel,
|
||||
refsIn: count,
|
||||
role: hit?.role ?? '',
|
||||
};
|
||||
})
|
||||
.filter((h) => h.refsIn >= 2)
|
||||
.sort((a, b) => b.refsIn - a.refsIn)
|
||||
.slice(0, 8);
|
||||
|
||||
// ── Entry points (well-known files) ---------------------------------------
|
||||
const entryCandidates = [
|
||||
'src/extension.ts', 'src/index.ts', 'src/main.ts', 'extension.ts', 'index.ts', 'main.ts',
|
||||
'src/app.ts', 'src/server.ts', 'media/sidebar.html', 'package.json',
|
||||
];
|
||||
const entryPoints = entryCandidates
|
||||
.map((rel) => {
|
||||
const summary = collected.find((f) => f.rel === rel);
|
||||
if (summary) return { rel, role: summary.role };
|
||||
if (safeRoot && fs.existsSync(path.join(safeRoot, rel))) {
|
||||
return { rel, role: _peekFileRole(path.join(safeRoot, rel)) };
|
||||
}
|
||||
return null;
|
||||
})
|
||||
.filter((x): x is { rel: string; role: string } => !!x);
|
||||
|
||||
// ── VS Code manifest ------------------------------------------------------
|
||||
let vsCode: VsCodeContribution | undefined;
|
||||
if (pkg?.contributes || pkg?.activationEvents) {
|
||||
vsCode = {
|
||||
extensionId: pkg.publisher && pkg.name ? `${pkg.publisher}.${pkg.name}` : pkg.name,
|
||||
commands: Array.isArray(pkg.contributes?.commands)
|
||||
? pkg.contributes.commands.map((c: any) => ({
|
||||
command: String(c?.command ?? ''),
|
||||
title: String(c?.title ?? ''),
|
||||
})).filter((c: any) => c.command)
|
||||
: [],
|
||||
configurationProperties: ((): VsCodeContribution['configurationProperties'] => {
|
||||
const props = pkg.contributes?.configuration?.properties
|
||||
|| (Array.isArray(pkg.contributes?.configuration)
|
||||
? Object.fromEntries(pkg.contributes.configuration.flatMap((c: any) => Object.entries(c?.properties ?? {})))
|
||||
: {});
|
||||
if (!props || typeof props !== 'object') return [];
|
||||
return Object.entries(props as Record<string, any>).map(([key, val]: [string, any]) => ({
|
||||
key,
|
||||
type: String(val?.type ?? ''),
|
||||
default: val?.default,
|
||||
description: String(val?.description ?? '').slice(0, 200),
|
||||
}));
|
||||
})(),
|
||||
activationEvents: Array.isArray(pkg.activationEvents) ? pkg.activationEvents.slice(0, 30) : [],
|
||||
};
|
||||
}
|
||||
|
||||
// Signature: structural shape only — count + names of top-level subdirs,
|
||||
// entry point list, dep set. We deliberately don't hash file *contents*
|
||||
// because doing so would trigger a regen every keystroke.
|
||||
const signature = _hash({
|
||||
name, version,
|
||||
runtimes,
|
||||
dirs: modules.map((m) => `${m.dir}:${m.fileCount}:${m.subDirs.map((s) => s.name).join('|')}`),
|
||||
deps: [...runtimeDeps, ...devDeps],
|
||||
entryPoints: entryPoints.map((e) => e.rel),
|
||||
vsCodeCmds: vsCode?.commands.length ?? 0,
|
||||
vsCodeCfg: vsCode?.configurationProperties.length ?? 0,
|
||||
});
|
||||
|
||||
// Anything that lived in the previous cache but wasn't seen this pass was
|
||||
// deleted (or moved). Report it so the user sees what disappeared and the
|
||||
// next refresh starts from a tidy cache.
|
||||
if (prevCache) {
|
||||
for (const cachedRel of Object.keys(prevCache.files)) {
|
||||
if (!prevSeen.has(cachedRel)) stats.deleted.push(cachedRel);
|
||||
}
|
||||
}
|
||||
|
||||
const newCache: ScanCache = {
|
||||
version: 1,
|
||||
generatedAt: new Date().toISOString(),
|
||||
files: newCacheFiles,
|
||||
};
|
||||
|
||||
return {
|
||||
projectName: name,
|
||||
projectRoot: safeRoot,
|
||||
description,
|
||||
version,
|
||||
runtimes: Array.from(new Set(runtimes)),
|
||||
readmeExcerpt,
|
||||
deps: { runtime: runtimeDeps, dev: devDeps, total: runtimeDeps.length + devDeps.length },
|
||||
entryPoints,
|
||||
hubs,
|
||||
topModules: modules,
|
||||
vsCode,
|
||||
totalFiles: collected.length,
|
||||
totalLines,
|
||||
signature,
|
||||
newCache,
|
||||
refreshStats: stats,
|
||||
};
|
||||
}
|
||||
|
||||
// ───────────────────────────── walkers ─────────────────────────────────────
|
||||
|
||||
/**
|
||||
* Cache plumbing carried through the recursion. Kept as a single context
|
||||
* object so `_walk`'s signature doesn't balloon every time we add a counter.
|
||||
*/
|
||||
interface CacheCtx {
|
||||
prevCache?: ScanCache;
|
||||
prevSeen: Set<string>;
|
||||
newCacheFiles: Record<string, CachedFile>;
|
||||
stats: RefreshStats;
|
||||
}
|
||||
|
||||
function _walk(
|
||||
abs: string,
|
||||
projectRoot: string,
|
||||
depth: number,
|
||||
out: FileSummary[],
|
||||
subDirAgg: Map<string, number>,
|
||||
/** Top-level sub-directory the recursion is currently inside (e.g. "agents"). */
|
||||
immediateSubDir: string | undefined,
|
||||
cacheCtx: CacheCtx,
|
||||
): void {
|
||||
let entries: string[];
|
||||
try { entries = fs.readdirSync(abs); } catch { return; }
|
||||
for (const entry of entries) {
|
||||
if (entry.startsWith('.') && entry !== '.gitignore') continue;
|
||||
if (SKIP_DIRS.has(entry)) continue;
|
||||
const full = path.join(abs, entry);
|
||||
let stat: fs.Stats;
|
||||
try { stat = fs.statSync(full); } catch { continue; }
|
||||
if (stat.isDirectory()) {
|
||||
if (depth === 0) {
|
||||
// Initialise sub-dir counter so we have an entry even if it ends up empty.
|
||||
subDirAgg.set(entry, subDirAgg.get(entry) ?? 0);
|
||||
}
|
||||
// Once we step into a top-level child we keep its name as the tag
|
||||
// for *every* nested file beneath it, so counts include depth > 1.
|
||||
const nextTag = depth === 0 ? entry : immediateSubDir;
|
||||
_walk(full, projectRoot, depth + 1, out, subDirAgg, nextTag, cacheCtx);
|
||||
} else if (stat.isFile()) {
|
||||
const ext = path.extname(entry).toLowerCase();
|
||||
if (!CODE_EXTS.has(ext)) continue;
|
||||
// Heuristic: massive auto-generated files aren't useful as
|
||||
// architectural signal. Skip the body extraction but still count.
|
||||
if (stat.size > 2 * 1024 * 1024) continue;
|
||||
const rel = path.relative(projectRoot, full).replace(/\\/g, '/');
|
||||
|
||||
// ── Cache lookup ────────────────────────────────────────────────
|
||||
// Same mtime *and* same size = file is structurally unchanged. We
|
||||
// also require both because a few filesystems (notably tarballs
|
||||
// extracted with --no-preserve) leave mtimes equal but bytes
|
||||
// different; the size check rules those out cheaply.
|
||||
const prev = cacheCtx.prevCache?.files[rel];
|
||||
const cacheable = prev
|
||||
&& Math.floor(prev.mtimeMs) === Math.floor(stat.mtimeMs)
|
||||
&& prev.size === stat.size;
|
||||
let summary: FileSummary;
|
||||
if (cacheable && prev) {
|
||||
summary = { rel, lines: prev.lines, role: prev.role, imports: prev.imports.slice() };
|
||||
cacheCtx.stats.cached++;
|
||||
cacheCtx.newCacheFiles[rel] = prev;
|
||||
} else {
|
||||
summary = _analyseFile(full, rel);
|
||||
cacheCtx.stats.newlyAnalyzed++;
|
||||
cacheCtx.newCacheFiles[rel] = {
|
||||
mtimeMs: stat.mtimeMs,
|
||||
size: stat.size,
|
||||
lines: summary.lines,
|
||||
role: summary.role,
|
||||
imports: summary.imports.slice(),
|
||||
};
|
||||
}
|
||||
if (prev) cacheCtx.prevSeen.add(rel);
|
||||
out.push(summary);
|
||||
|
||||
if (immediateSubDir) {
|
||||
// Nested file inside a tracked top-level sub-dir → bump its tally.
|
||||
subDirAgg.set(immediateSubDir, (subDirAgg.get(immediateSubDir) ?? 0) + 1);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
function _analyseFile(full: string, rel: string): FileSummary {
|
||||
let raw = '';
|
||||
let lines = 0;
|
||||
try {
|
||||
const stat = fs.statSync(full);
|
||||
const size = Math.min(stat.size, READ_BYTE_CAP);
|
||||
const buf = Buffer.alloc(size);
|
||||
const fd = fs.openSync(full, 'r');
|
||||
try { fs.readSync(fd, buf, 0, size, 0); } finally { fs.closeSync(fd); }
|
||||
raw = buf.toString('utf8');
|
||||
// Approximate lines: count newlines in the read window, then extrapolate
|
||||
// when we hit the cap. Within ~5% for typical source files.
|
||||
const seenLines = (raw.match(/\n/g) || []).length;
|
||||
lines = stat.size > READ_BYTE_CAP ? Math.round(seenLines * (stat.size / READ_BYTE_CAP)) : seenLines;
|
||||
} catch { /* ignore */ }
|
||||
|
||||
const role = _extractRole(rel, raw);
|
||||
const imports = _extractImports(rel, raw);
|
||||
return { rel, lines, role, imports };
|
||||
}
|
||||
|
||||
function _peekFileRole(full: string): string {
|
||||
try {
|
||||
const stat = fs.statSync(full);
|
||||
const size = Math.min(stat.size, 8192);
|
||||
const buf = Buffer.alloc(size);
|
||||
const fd = fs.openSync(full, 'r');
|
||||
try { fs.readSync(fd, buf, 0, size, 0); } finally { fs.closeSync(fd); }
|
||||
return _extractRole(path.basename(full), buf.toString('utf8'));
|
||||
} catch { return ''; }
|
||||
}
|
||||
|
||||
// ───────────────────────────── extractors ──────────────────────────────────
|
||||
|
||||
/** Pull a one-sentence "what is this file" from its header. Format depends on extension. */
|
||||
function _extractRole(rel: string, raw: string): string {
|
||||
const ext = path.extname(rel).toLowerCase();
|
||||
if (!raw) return '';
|
||||
|
||||
if (ext === '.md') {
|
||||
// First H1, or first non-blank line.
|
||||
const h1 = /^#\s+(.+)$/m.exec(raw);
|
||||
if (h1) return _clean(h1[1]);
|
||||
const para = raw.split(/\n\s*\n/)[0]?.replace(/^>\s+/gm, '').trim();
|
||||
if (para) return _clean(para);
|
||||
return '';
|
||||
}
|
||||
|
||||
if (ext === '.json') {
|
||||
if (rel.endsWith('package.json')) return 'npm package manifest';
|
||||
if (rel.endsWith('tsconfig.json')) return 'TypeScript compiler config';
|
||||
if (/system_schema/.test(rel)) return 'JSON schema';
|
||||
return 'JSON configuration';
|
||||
}
|
||||
|
||||
if (ext === '.html') {
|
||||
const title = /<title[^>]*>([^<]+)<\/title>/i.exec(raw);
|
||||
if (title) return _clean(title[1]);
|
||||
return 'HTML document';
|
||||
}
|
||||
|
||||
if (ext === '.css') return 'Stylesheet';
|
||||
|
||||
if (ext === '.py') {
|
||||
// Triple-quoted docstring or top-of-file comment block.
|
||||
const doc = /^\s*(?:#!.*\n)?(?:"""|''')([\s\S]*?)(?:"""|''')/.exec(raw);
|
||||
if (doc) return _clean(doc[1]);
|
||||
const hash = /^(?:#[^\n!][^\n]*\n){1,3}/.exec(raw);
|
||||
if (hash) return _clean(hash[0].replace(/^#\s?/gm, ''));
|
||||
return '';
|
||||
}
|
||||
|
||||
// TS / JS — prefer the first /** … */ block at top-of-file.
|
||||
const skipBom = raw.replace(/^/, '');
|
||||
const jsdoc = /^\s*\/\*\*([\s\S]*?)\*\//.exec(skipBom);
|
||||
if (jsdoc) {
|
||||
const cleaned = jsdoc[1].replace(/^\s*\*\s?/gm, '').trim();
|
||||
return _clean(cleaned);
|
||||
}
|
||||
// Fall back to leading single-line comments.
|
||||
const lineCmt = /^(?:\/\/[^\n]*\n){1,4}/.exec(skipBom);
|
||||
if (lineCmt) return _clean(lineCmt[0].replace(/^\/\/\s?/gm, '').trim());
|
||||
return '';
|
||||
}
|
||||
|
||||
function _clean(s: string): string {
|
||||
return s
|
||||
// Strip ASCII-banner decoration lines like "================" or "----" — they're
|
||||
// common in this codebase but read as visual noise once collapsed onto one line.
|
||||
.replace(/[=\-_*~]{4,}/g, ' ')
|
||||
// Drop markdown emphasis markers.
|
||||
.replace(/[`*_]+/g, '')
|
||||
// Collapse runs of whitespace.
|
||||
.replace(/\s+/g, ' ')
|
||||
.trim()
|
||||
.slice(0, ROLE_MAX_LEN);
|
||||
}
|
||||
|
||||
/**
|
||||
* Cheap import scanner — regex-based, project-relative only. We deliberately
|
||||
* skip external packages because they're already covered by the deps section
|
||||
* and would just clutter the module-dependency view.
|
||||
*/
|
||||
function _extractImports(rel: string, raw: string): string[] {
|
||||
if (!raw) return [];
|
||||
const ext = path.extname(rel).toLowerCase();
|
||||
const out: string[] = [];
|
||||
if (ext === '.py') {
|
||||
const pyRe = /^\s*(?:from\s+(\S+)\s+import|import\s+(\S+))/gm;
|
||||
let m: RegExpExecArray | null;
|
||||
while ((m = pyRe.exec(raw))) {
|
||||
const spec = m[1] || m[2];
|
||||
if (spec && !spec.startsWith('.')) continue; // external
|
||||
const resolved = _resolveRelImport(rel, spec.replace(/\./g, '/'));
|
||||
if (resolved) out.push(resolved);
|
||||
}
|
||||
return out;
|
||||
}
|
||||
// TS/JS/MJS/CJS — covers:
|
||||
// import x from './y' (with the very common space between `from` and the quote)
|
||||
// import './side-effect' (no `from`)
|
||||
// export { a } from './b' (re-exports still create a dep edge)
|
||||
// export * from './b'
|
||||
// require('./y') / import('./y')
|
||||
// Earlier versions missed the space after `from`, so deps came back nearly empty.
|
||||
const tsRe = /(?:\bfrom\s+|\brequire\s*\(\s*|\bimport\s*\(\s*|\bimport\s+)['"`]([^'"`\n]+)['"`]/g;
|
||||
let m: RegExpExecArray | null;
|
||||
while ((m = tsRe.exec(raw))) {
|
||||
const spec = m[1];
|
||||
if (!spec || !spec.startsWith('.')) continue;
|
||||
const resolved = _resolveRelImport(rel, spec);
|
||||
if (resolved) out.push(resolved);
|
||||
}
|
||||
return Array.from(new Set(out));
|
||||
}
|
||||
|
||||
function _resolveRelImport(fromRel: string, spec: string): string | null {
|
||||
if (!fromRel) return null;
|
||||
const fromDir = path.posix.dirname(fromRel);
|
||||
let candidate = path.posix.normalize(path.posix.join(fromDir, spec));
|
||||
if (candidate.startsWith('../')) return null; // escaped project — ignore
|
||||
// Trim trailing `/index` so paths line up with how files are usually written.
|
||||
candidate = candidate.replace(/\/index$/, '');
|
||||
return candidate;
|
||||
}
|
||||
|
||||
// ───────────────────────────── scoring & helpers ───────────────────────────
|
||||
|
||||
/** Heuristic "how worth listing" score for ordering a module's file list. */
|
||||
function _interestScore(file: FileSummary, all: FileSummary[]): number {
|
||||
let score = 0;
|
||||
// Hub-ness: more imports = more central.
|
||||
const refsIn = all.reduce((acc, other) =>
|
||||
acc + (other.imports.includes(file.rel) || other.imports.some((i) => file.rel.endsWith(`${i}.ts`) || file.rel.endsWith(`${i}.tsx`)) ? 1 : 0)
|
||||
, 0);
|
||||
score += refsIn * 4;
|
||||
// Has a role string we extracted — bonus, less guessable name.
|
||||
if (file.role) score += 2;
|
||||
// Bigger files usually carry more responsibility (but cap so a single
|
||||
// 5000-line file doesn't dominate).
|
||||
score += Math.min(10, Math.floor(file.lines / 100));
|
||||
// Known entry / index files boost.
|
||||
const base = path.basename(file.rel);
|
||||
if (base === 'index.ts' || base === 'index.js' || base === 'extension.ts') score += 5;
|
||||
return score;
|
||||
}
|
||||
|
||||
function _describeSubDir(name: string, allFiles: FileSummary[], parentDir: string): string {
|
||||
// Find the first role-bearing file inside this sub-dir as a proxy description.
|
||||
const prefix = `${parentDir}/${name}/`;
|
||||
const inside = allFiles.filter((f) => f.rel.startsWith(prefix));
|
||||
const withRole = inside.find((f) => f.role);
|
||||
if (withRole) return withRole.role.slice(0, 120);
|
||||
if (inside.length === 0) return '';
|
||||
// Otherwise summarise by file types.
|
||||
const exts = new Set(inside.map((f) => path.extname(f.rel).toLowerCase()));
|
||||
return `${inside.length} files (${Array.from(exts).join(', ')})`;
|
||||
}
|
||||
|
||||
function _isDir(p: string): boolean {
|
||||
try { return fs.statSync(p).isDirectory(); } catch { return false; }
|
||||
}
|
||||
|
||||
function _hash(obj: unknown): string {
|
||||
// Stable, side-effect-free hash for the scan signature.
|
||||
// Same approach as crypto.createHash but inlined to avoid a require here.
|
||||
const s = JSON.stringify(obj);
|
||||
let h = 5381;
|
||||
for (let i = 0; i < s.length; i++) h = ((h << 5) + h + s.charCodeAt(i)) | 0;
|
||||
return (h >>> 0).toString(16);
|
||||
}
|
||||
Reference in New Issue
Block a user