release: v2.0.2 - Structural Integrity & Automated Context Management

This commit is contained in:
g1nation
2026-05-13 22:34:44 +09:00
parent e85e11aac6
commit c40571b7ef
22 changed files with 2802 additions and 232 deletions
+280 -173
View File
@@ -27,23 +27,72 @@
*/
import * as fs from 'fs';
import * as path from 'path';
import * as crypto from 'crypto';
import { logError, logInfo } from '../../utils';
import { deepScan, DeepScanResult, ScanCache, RefreshStats } from './scanner';
import { renderDirectoryTreeDiagram, renderModuleDependencyDiagram } from './mermaid';
/** Sub-folder under the project root where the architecture doc lives. */
const ARCH_DIR_REL = path.join('.astra', 'project-context');
const ARCH_FILE = 'architecture.md';
const CACHE_FILE = 'scan-cache.json';
/** Top-level directories we consider "code" worth listing under Main Modules. */
const CODE_DIRS = ['src', 'media', 'core_py', 'lib', 'app', 'apps', 'packages', 'tests'];
/**
* Resolve the scan-cache path for a given project root. Sits next to the doc
* itself so anyone inspecting `.astra/project-context/` can see both the
* generated markdown and the per-file fingerprints that feed it.
*/
function _cachePathFor(projectRoot: string): string {
return path.join(projectRoot, ARCH_DIR_REL, CACHE_FILE);
}
/** Files at the project root worth highlighting under "Important Files". */
const ROOT_IMPORTANT = [
'package.json', 'pnpm-workspace.yaml', 'tsconfig.json',
'README.md', 'CHANGELOG.md', 'ARCHITECTURE.md',
'pyproject.toml', 'requirements.txt', 'Cargo.toml', 'go.mod',
'Dockerfile', 'docker-compose.yml',
];
function _readScanCache(projectRoot: string): ScanCache | undefined {
const p = _cachePathFor(projectRoot);
if (!fs.existsSync(p)) return undefined;
try {
const parsed = JSON.parse(fs.readFileSync(p, 'utf8'));
if (parsed && parsed.version === 1 && parsed.files && typeof parsed.files === 'object') {
return parsed as ScanCache;
}
} catch (e: any) {
logError('projectArchitecture: cache read failed; starting fresh.', {
cachePath: p, error: e?.message ?? String(e),
});
}
return undefined;
}
function _writeScanCache(projectRoot: string, cache: ScanCache): void {
const p = _cachePathFor(projectRoot);
try {
fs.mkdirSync(path.dirname(p), { recursive: true });
fs.writeFileSync(p, JSON.stringify(cache, null, 2), 'utf8');
} catch (e: any) {
logError('projectArchitecture: cache write failed.', {
cachePath: p, error: e?.message ?? String(e),
});
}
}
/**
* Normalize an absolute path for display. We don't want `/Volumes/Data/...`
* (or any other machine-specific prefix) leaking into the architecture doc or
* the prompt — the user works across multiple environments so absolute paths
* are noise at best, and outright wrong on the next machine. Anything that
* lives inside the workspace becomes workspace-relative; anything else falls
* back to just the basename. The function is exported so callers outside this
* module can apply the same policy consistently.
*/
export function toWorkspaceRelative(absPath: string, workspaceRoot?: string): string {
if (!absPath) return '';
if (workspaceRoot) {
const wr = workspaceRoot.replace(/[\\/]+$/, '');
const ap = absPath.replace(/\\/g, '/');
const wrp = wr.replace(/\\/g, '/');
if (ap === wrp) return '.';
if (ap.startsWith(`${wrp}/`)) return ap.slice(wrp.length + 1);
}
return path.basename(absPath);
}
const AUTO_START = '<!-- ASTRA:AUTO-START -->';
const AUTO_END = '<!-- ASTRA:AUTO-END -->';
@@ -74,125 +123,28 @@ export function architectureDocPathFor(projectRoot: string): string {
}
/**
* Scan a project root and return a structured summary. Pure, side-effect free
* (apart from reading the file system) so we can unit-test the signature/diff
* logic without writing any files.
* Backwards-compatible thin wrapper. The watcher / refresh path only needs the
* shape-signature to decide whether to re-emit the doc, so we expose `scanProject`
* with the legacy shape but delegate to the deep scanner internally.
*/
export function scanProject(projectRoot: string, projectName?: string): ArchitectureScanResult {
const safeRoot = projectRoot && fs.existsSync(projectRoot) ? projectRoot : '';
const name = (projectName?.trim()) || (safeRoot ? path.basename(safeRoot) : 'Unknown Project');
// ── package.json ─────────────────────────────────────────────────────────
let description = '';
let pkgJson: any = null;
const pkgPath = safeRoot ? path.join(safeRoot, 'package.json') : '';
if (pkgPath && fs.existsSync(pkgPath)) {
try {
pkgJson = JSON.parse(fs.readFileSync(pkgPath, 'utf8'));
if (typeof pkgJson?.description === 'string') description = pkgJson.description.trim();
} catch (e: any) {
logError('projectArchitecture: package.json parse failed.', { error: e?.message ?? String(e) });
}
}
// ── Runtime / framework fingerprint ─────────────────────────────────────
const runtimes: string[] = [];
if (safeRoot && fs.existsSync(path.join(safeRoot, 'tsconfig.json'))) runtimes.push('TypeScript');
if (pkgJson) {
runtimes.push('Node.js');
const deps = { ...(pkgJson.dependencies || {}), ...(pkgJson.devDependencies || {}) } as Record<string, string>;
if (deps['@types/vscode'] || pkgJson.engines?.vscode) runtimes.push('VS Code Extension');
if (deps['react']) runtimes.push('React');
if (deps['next']) runtimes.push('Next.js');
if (deps['express'] || deps['fastify']) runtimes.push('HTTP server');
if (deps['@anthropic-ai/sdk']) runtimes.push('Anthropic SDK');
if (deps['openai']) runtimes.push('OpenAI SDK');
if (deps['@lmstudio/sdk']) runtimes.push('LM Studio SDK');
}
if (safeRoot && fs.existsSync(path.join(safeRoot, 'pyproject.toml'))) runtimes.push('Python');
if (safeRoot && fs.existsSync(path.join(safeRoot, 'Cargo.toml'))) runtimes.push('Rust');
if (safeRoot && fs.existsSync(path.join(safeRoot, 'go.mod'))) runtimes.push('Go');
// ── Main modules (top-level code directories) ───────────────────────────
const mainModules: ArchitectureScanResult['mainModules'] = [];
if (safeRoot) {
for (const candidate of CODE_DIRS) {
const dirAbs = path.join(safeRoot, candidate);
if (!_isDir(dirAbs)) continue;
const entries = _readDirSafe(dirAbs);
const fileCount = entries.filter((e) => _isFileLike(path.join(dirAbs, e))).length;
const subDirs = entries.filter((e) => _isDir(path.join(dirAbs, e)));
const desc = _describeModule(candidate, fileCount, subDirs);
mainModules.push({ dir: candidate, description: desc });
}
}
// ── Important files at the root ─────────────────────────────────────────
const importantFiles: string[] = [];
if (safeRoot) {
for (const f of ROOT_IMPORTANT) {
if (fs.existsSync(path.join(safeRoot, f))) importantFiles.push(f);
}
}
// Signature: hash of the structural inputs only. We do NOT hash file
// *contents* — the goal is "did the shape of the project change" so the
// watcher doesn't re-render the doc for every keystroke in a TS file.
const signature = _hashSignature({
name,
runtimes,
mainModules: mainModules.map((m) => `${m.dir}|${m.description}`),
importantFiles,
pkgVersion: pkgJson?.version || '',
pkgDeps: pkgJson ? Object.keys({ ...(pkgJson.dependencies || {}), ...(pkgJson.devDependencies || {}) }).sort().join(',') : '',
});
const deep = deepScan(projectRoot, projectName);
return {
projectName: name,
projectRoot: safeRoot,
description,
runtimes,
mainModules,
importantFiles,
signature,
projectName: deep.projectName,
projectRoot: deep.projectRoot,
description: deep.description,
runtimes: deep.runtimes,
mainModules: deep.topModules.map((m) => ({
dir: m.dir,
description: `${m.fileCount} files${m.subDirs.length > 0
? `${m.subDirs.slice(0, 6).map((s) => s.name).join(', ')}${m.subDirs.length > 6 ? `, +${m.subDirs.length - 6} more` : ''}`
: ''}`,
})),
importantFiles: deep.entryPoints.map((e) => e.rel),
signature: deep.signature,
};
}
function _describeModule(dir: string, fileCount: number, subDirs: string[]): string {
const subSummary = subDirs.length > 0
? `${subDirs.slice(0, 6).join(', ')}${subDirs.length > 6 ? `, +${subDirs.length - 6} more` : ''}`
: '';
const known: Record<string, string> = {
src: 'Source code',
media: 'Webview assets (HTML/CSS/JS)',
core_py: 'Python utilities',
tests: 'Test suite',
lib: 'Library code',
app: 'Application entry',
apps: 'Application bundles',
packages: 'Monorepo packages',
};
const label = known[dir] || 'Module';
return `${label} (${fileCount} files${subSummary})`;
}
function _isDir(p: string): boolean {
try { return fs.statSync(p).isDirectory(); } catch { return false; }
}
function _isFileLike(p: string): boolean {
try { return fs.statSync(p).isFile(); } catch { return false; }
}
function _readDirSafe(p: string): string[] {
try {
// Skip hidden + heavy noise dirs so the listing reads usefully.
return fs.readdirSync(p).filter((e) => !e.startsWith('.') && e !== 'node_modules' && e !== 'out' && e !== 'dist' && e !== '__pycache__');
} catch { return []; }
}
function _hashSignature(obj: unknown): string {
return crypto.createHash('sha1').update(JSON.stringify(obj)).digest('hex').slice(0, 16);
}
/**
* Build or refresh the architecture doc. Idempotent:
* • If the file doesn't exist: scaffold full doc with auto + user-owned blocks.
@@ -203,7 +155,14 @@ export function buildOrRefreshArchitectureDoc(
projectName?: string,
nowIso: string = new Date().toISOString()
): BuildResult {
const scan = scanProject(projectRoot, projectName);
// Incremental scan: feed the previous per-file cache so unchanged files
// are reused instead of re-parsed. The cache lives alongside the doc and
// is rewritten at the end of every successful refresh.
const prevCache = _readScanCache(projectRoot);
const deep = deepScan(projectRoot, projectName, prevCache);
_writeScanCache(projectRoot, deep.newCache);
const scan = scanProject(projectRoot, projectName); // shape-only wrapper for callers
const docPath = architectureDocPathFor(projectRoot);
const docDir = path.dirname(docPath);
try {
@@ -212,12 +171,16 @@ export function buildOrRefreshArchitectureDoc(
logError('projectArchitecture: mkdir failed.', { docDir, error: e?.message ?? String(e) });
}
const autoBlock = _renderAutoBlock(scan, nowIso);
const autoBlock = _renderAutoBlock(deep, nowIso);
if (!fs.existsSync(docPath)) {
const full = _renderFullDoc(scan, autoBlock);
const full = _renderFullDoc(deep, autoBlock);
fs.writeFileSync(docPath, full, 'utf8');
logInfo('projectArchitecture: created.', { docPath, signature: scan.signature });
logInfo('projectArchitecture: created.', {
docPath, signature: deep.signature, files: deep.totalFiles,
newlyAnalyzed: deep.refreshStats.newlyAnalyzed,
cached: deep.refreshStats.cached,
});
return { docPath, created: true, scan };
}
@@ -226,52 +189,182 @@ export function buildOrRefreshArchitectureDoc(
const replaced = _replaceAutoBlock(existing, autoBlock);
if (replaced !== existing) {
fs.writeFileSync(docPath, replaced, 'utf8');
logInfo('projectArchitecture: refreshed.', { docPath, signature: scan.signature });
logInfo('projectArchitecture: refreshed.', {
docPath, signature: deep.signature, files: deep.totalFiles,
newlyAnalyzed: deep.refreshStats.newlyAnalyzed,
cached: deep.refreshStats.cached,
deleted: deep.refreshStats.deleted.length,
});
}
return { docPath, created: false, scan };
}
function _renderAutoBlock(scan: ArchitectureScanResult, nowIso: string): string {
const modules = scan.mainModules.length > 0
? scan.mainModules.map((m) => `- \`${m.dir}/\`${m.description}`).join('\n')
: '_(no top-level code directories detected)_';
const importantFiles = scan.importantFiles.length > 0
? scan.importantFiles.map((f) => `- \`${f}\``).join('\n')
: '_(none detected)_';
const runtimes = scan.runtimes.length > 0 ? scan.runtimes.join(', ') : '_(unknown)_';
return [
AUTO_START,
'## Project Name',
scan.projectName,
'',
'## Project Root',
scan.projectRoot || '_(not set)_',
'',
'## Description',
scan.description || '_(no package.json description)_',
'',
'## Runtime / Stack',
runtimes,
'',
'## Main Modules',
modules,
'',
'## Important Files',
importantFiles,
'',
`_Last auto-scan: ${nowIso}_`,
AUTO_END,
].join('\n');
/**
* Render the auto-managed block. This is everything between
* `<!-- ASTRA:AUTO-START -->` and `<!-- ASTRA:AUTO-END -->` — overwritten on
* every refresh. The sections are kept compact (one line per file when
* possible) so the doc remains scannable; section headings use deterministic
* `##` levels so prompt-time truncation can prioritise correctly.
*/
function _renderAutoBlock(deep: DeepScanResult, nowIso: string): string {
const lines: string[] = [AUTO_START, ''];
// ── Snapshot ----------------------------------------------------------------
// Note: we deliberately do *not* emit the absolute project root here. The
// user works across multiple machines so a hardcoded macOS path is wrong
// on Linux/Windows and noisy everywhere else. The workspace name is
// sufficient — VS Code resolves the actual root at runtime.
lines.push('## Snapshot');
lines.push(`- **Workspace**: \`${deep.projectName}\`${deep.version ? ` \`v${deep.version}\`` : ''} _(absolute path varies by environment; resolved from the active VS Code workspace)_`);
if (deep.description) lines.push(`- **Description**: ${deep.description}`);
lines.push(`- **Stack**: ${deep.runtimes.length ? deep.runtimes.join(', ') : '_(unknown)_'}`);
lines.push(`- **Stats**: ${deep.totalFiles} source files, ~${deep.totalLines.toLocaleString()} lines across ${deep.topModules.length} top-level modules.`);
lines.push('');
// ── Refresh stats ----------------------------------------------------------
// Surfaces what the most recent refresh actually did — useful to confirm
// that incremental cache reuse is working as expected and to spot deletions.
const r = deep.refreshStats;
lines.push('## Last Refresh');
lines.push(`- **Time**: ${nowIso}`);
lines.push(`- **Files newly analysed**: ${r.newlyAnalyzed}`);
lines.push(`- **Files reused from cache**: ${r.cached}`);
if (r.deleted.length > 0) {
const shown = r.deleted.slice(0, 10);
const more = r.deleted.length - shown.length;
lines.push(`- **Files deleted since last refresh** (${r.deleted.length}):`);
for (const d of shown) lines.push(` - \`${d}\``);
if (more > 0) lines.push(` - _…and ${more} more_`);
}
lines.push('');
// ── Directory mindmap ------------------------------------------------------
const treeDiagram = renderDirectoryTreeDiagram(deep);
if (treeDiagram) {
lines.push('## Directory Map');
lines.push(treeDiagram);
lines.push('');
}
// ── Module dependency flowchart -------------------------------------------
const depDiagram = renderModuleDependencyDiagram(deep);
if (depDiagram && deep.topModules.some((m) => m.dependsOn.length > 0)) {
lines.push('## Module Dependencies');
lines.push('> Arrows: which top-level module imports from which.');
lines.push(depDiagram);
lines.push('');
}
// ── Entry points ----------------------------------------------------------
if (deep.entryPoints.length > 0) {
lines.push('## Entry Points');
lines.push('> Files to read first when learning the codebase.');
for (const ep of deep.entryPoints) {
lines.push(`- \`${ep.rel}\`${ep.role ? `${ep.role}` : ''}`);
}
lines.push('');
}
// ── Hub files -------------------------------------------------------------
if (deep.hubs.length > 0) {
lines.push('## Hub Files');
lines.push('> Imported by many other files — touching these has wide blast radius.');
for (const h of deep.hubs) {
lines.push(`- \`${h.rel}\` — referenced by **${h.refsIn}** files${h.role ? ` · ${h.role}` : ''}`);
}
lines.push('');
}
// ── Per-module detail ------------------------------------------------------
if (deep.topModules.length > 0) {
lines.push('## Modules');
for (const mod of deep.topModules) {
lines.push('');
lines.push(`### \`${mod.dir}/\`${mod.fileCount} files, ~${mod.totalLines.toLocaleString()} lines`);
if (mod.dependsOn.length > 0) {
lines.push(`*Depends on*: ${mod.dependsOn.map((d) => `\`${d}/\``).join(', ')}`);
}
if (mod.subDirs.length > 0) {
lines.push('');
lines.push('**Sub-directories**');
for (const sub of mod.subDirs) {
const desc = sub.description ? `${sub.description}` : '';
lines.push(`- \`${mod.dir}/${sub.name}/\` (${sub.fileCount})${desc}`);
}
}
if (mod.files.length > 0) {
lines.push('');
lines.push('**Key files**');
for (const f of mod.files) {
const role = f.role ? `${f.role}` : '';
lines.push(`- \`${f.rel}\` (${f.lines} lines)${role}`);
}
}
}
lines.push('');
}
// ── VS Code surface --------------------------------------------------------
if (deep.vsCode && (deep.vsCode.commands.length || deep.vsCode.configurationProperties.length || deep.vsCode.activationEvents.length)) {
lines.push('## VS Code Extension Surface');
if (deep.vsCode.extensionId) lines.push(`- **Extension ID**: \`${deep.vsCode.extensionId}\``);
if (deep.vsCode.activationEvents.length) {
lines.push(`- **Activation events**: ${deep.vsCode.activationEvents.map((e) => `\`${e}\``).join(', ')}`);
}
if (deep.vsCode.commands.length) {
lines.push(`- **Commands** (${deep.vsCode.commands.length}):`);
for (const c of deep.vsCode.commands.slice(0, 60)) {
lines.push(` - \`${c.command}\`${c.title ? `${c.title}` : ''}`);
}
if (deep.vsCode.commands.length > 60) {
lines.push(` - _…and ${deep.vsCode.commands.length - 60} more_`);
}
}
if (deep.vsCode.configurationProperties.length) {
lines.push(`- **Configuration** (${deep.vsCode.configurationProperties.length} settings):`);
for (const c of deep.vsCode.configurationProperties.slice(0, 60)) {
const def = c.default === undefined ? '' : ` _(default: \`${JSON.stringify(c.default)}\`)_`;
lines.push(` - \`${c.key}\` *(${c.type})*${def}${c.description ? `${c.description}` : ''}`);
}
if (deep.vsCode.configurationProperties.length > 60) {
lines.push(` - _…and ${deep.vsCode.configurationProperties.length - 60} more_`);
}
}
lines.push('');
}
// ── Dependencies -----------------------------------------------------------
if (deep.deps.total > 0) {
lines.push('## Dependencies');
lines.push(`- **Runtime** (${deep.deps.runtime.length}): ${deep.deps.runtime.length ? deep.deps.runtime.map((d) => `\`${d}\``).join(', ') : '_(none)_'}`);
if (deep.deps.dev.length > 0) {
lines.push(`- **Dev** (${deep.deps.dev.length}): ${deep.deps.dev.map((d) => `\`${d}\``).join(', ')}`);
}
lines.push('');
}
// ── README excerpt ---------------------------------------------------------
if (deep.readmeExcerpt) {
lines.push('## README Excerpt');
lines.push('> Pulled from the project root README — first ~2 KB.');
lines.push('');
lines.push(deep.readmeExcerpt.trim());
lines.push('');
}
lines.push(`_Last auto-scan: ${nowIso} · signature \`${deep.signature}\`_`);
lines.push(AUTO_END);
return lines.join('\n');
}
function _renderFullDoc(scan: ArchitectureScanResult, autoBlock: string): string {
function _renderFullDoc(deep: DeepScanResult, autoBlock: string): string {
// User-owned sections start as placeholders so first-time activation gives
// the user a clear "fill these in" surface without confusing the model.
return [
`# ${scan.projectName} — Project Architecture Context`,
`# ${deep.projectName} — Project Architecture Context`,
'',
'> Auto-managed sections (between the AUTO markers) are rewritten by Astra on every refresh.',
'> The rest is yours — Astra never touches it once this file exists.',
'> The rest below is yours — Astra never touches it once this file exists.',
'',
autoBlock,
'',
@@ -314,7 +407,7 @@ function _replaceAutoBlock(existing: string, autoBlock: string): string {
* Purpose, Main Modules, Key Workflows, Current Constraints, Known Risks,
* Active Decisions — and drop the long auto-listing of files first.
*/
export function readArchitectureForPrompt(docPath: string, maxChars: number = 8000): string {
export function readArchitectureForPrompt(docPath: string, maxChars: number = 16000): string {
if (!docPath || !fs.existsSync(docPath)) return '';
let raw: string;
try {
@@ -326,21 +419,29 @@ export function readArchitectureForPrompt(docPath: string, maxChars: number = 80
if (raw.length <= maxChars) return raw;
// Section-aware trim: parse `## ` headers, prioritise the high-signal
// sections, drop the rest until we fit. Important Files is the longest
// auto section so it gets dropped first.
// sections, drop the rest until we fit. The verbose listings (per-module
// file enumerations, full dependency tables, README excerpt) are the
// first to go — they're useful when present but rarely changes the model's
// structural understanding of the project.
const sections = _splitSections(raw);
const priority = [
// User-owned, irreplaceable.
'Purpose',
'Project Name',
'Description',
'Active Decisions',
'Current Constraints',
'Known Risks',
'Key Workflows',
'Main Modules',
'Runtime / Stack',
'Project Root',
'Important Files', // drop first
// Auto-managed high-signal.
'Snapshot',
'Entry Points',
'Hub Files',
'Module Dependencies',
'Directory Map',
'VS Code Extension Surface',
// Auto-managed long tail (dropped first).
'Modules',
'Dependencies',
'README Excerpt',
];
sections.sort((a, b) => {
const ai = priority.indexOf(a.title); const bi = priority.indexOf(b.title);
@@ -390,15 +491,21 @@ function _splitSections(raw: string): { title: string; body: string }[] {
export function formatArchitectureContextForPrompt(opts: {
projectName: string;
docPath: string;
/** When provided, `Source:` is emitted as a workspace-relative path. */
projectRoot?: string;
lastUpdated?: string;
maxChars?: number;
}): string {
const content = readArchitectureForPrompt(opts.docPath, opts.maxChars ?? 8000);
const content = readArchitectureForPrompt(opts.docPath, opts.maxChars ?? 16000);
if (!content) return '';
const stamp = opts.lastUpdated ? `\nLast updated: ${opts.lastUpdated}` : '';
// Surface the doc location as a workspace-relative path so the same prompt
// works regardless of which machine the user is on. The doc lives at
// `.astra/project-context/architecture.md` inside the workspace by design.
const sourceDisplay = toWorkspaceRelative(opts.docPath, opts.projectRoot);
return [
'[ACTIVE PROJECT ARCHITECTURE CONTEXT]',
`Source: ${opts.docPath}`,
`Source: ${sourceDisplay}`,
`Project: ${opts.projectName}${stamp}`,
'Use this as authoritative ground truth about the project structure, constraints, and active decisions. Do not contradict it without flagging the conflict.',
'---',