chore: version up to 2.80.34 and package

This commit is contained in:
g1nation
2026-05-12 22:54:21 +09:00
parent 148bfb070b
commit 065e598cca
26 changed files with 2023 additions and 139 deletions
+44 -46
View File
@@ -19,11 +19,13 @@ import { findBrainFiles, summarizeText } from '../utils';
import { isInside } from '../lib/paths';
import { MemoryManager } from '../memory';
import { RetrievalChunk, RetrievalResult, ContextBudgetConfig } from './types';
import { tokenize, expandQuery, scoreTfIdf, extractBestExcerpt } from './scoring';
import { tokenize, expandQuery, scoreTfIdfPreTokenized, extractBestExcerpt } from './scoring';
import { selectWithinBudget, assembleContext, estimateTokens } from './contextBudget';
import { getBrainTokenIndex } from './brainIndex';
export { tokenize, expandQuery, scoreTfIdf, extractBestExcerpt } from './scoring';
export { tokenize, expandQuery, scoreTfIdf, scoreTfIdfPreTokenized, extractBestExcerpt } from './scoring';
export { selectWithinBudget, assembleContext, estimateTokens } from './contextBudget';
export { getBrainTokenIndex, clearBrainTokenIndex } from './brainIndex';
export * from './types';
interface RetrievalOptions {
@@ -133,52 +135,48 @@ export class RetrievalOrchestrator {
if (allFiles.length === 0) return [];
// Read all files for TF-IDF
const documents = allFiles.map((file) => {
// Tokenized docs from the persistent mtime-keyed index — unchanged files are not re-read
// or re-tokenized, so per-query work over a large brain drops from O(total content) to O(files) stats.
const indexed = getBrainTokenIndex(brain.localBrainPath, allFiles);
if (indexed.length === 0) return [];
const scored = scoreTfIdfPreTokenized(
expandedTokens,
indexed.map((d) => ({
tokens: d.tokens,
titleTokens: d.titleTokens,
lastModified: d.mtimeMs,
conflictCount: d.conflictCount,
}))
);
const topResults: RetrievalChunk[] = [];
for (const s of scored.filter((x) => x.score > 0).sort((a, b) => b.score - a.score).slice(0, limit)) {
const doc = indexed[s.index];
// Only the top `limit` files are actually read off disk (for excerpt extraction).
let content = '';
let lastModified = 0;
try {
content = fs.readFileSync(file, 'utf8');
lastModified = fs.statSync(file).mtimeMs;
} catch { /* skip */ }
return {
title: path.basename(file, '.md'),
content,
lastModified,
filePath: file,
relativePath: path.relative(brain.localBrainPath, file)
};
});
// TF-IDF scoring
const scored = scoreTfIdf(expandedTokens, documents);
return scored
.filter((s) => s.score > 0)
.sort((a, b) => b.score - a.score)
.slice(0, limit)
.map((s) => {
const doc = documents[s.index];
const excerpt = extractBestExcerpt(doc.content, expandedTokens, 400);
return {
id: `brain-${s.index}`,
source: 'brain-memory' as const,
title: doc.relativePath,
content: summarizeText(excerpt, 400),
score: s.score,
tokenEstimate: estimateTokens(excerpt),
metadata: {
filePath: doc.filePath,
category: this.inferCategory(doc.relativePath),
isProjectEvidence: this.isProjectEvidence(doc.relativePath, doc.content),
lastUpdated: doc.lastModified,
// Phase 5: Scoring Intelligence Integration
conflictDetected: s.conflictDetected,
conflictSeverity: s.conflictSeverity,
informationDensity: s.informationDensity
}
};
try { content = fs.readFileSync(doc.filePath, 'utf8'); } catch { /* deleted just now — skip */ continue; }
const excerpt = extractBestExcerpt(content, expandedTokens, 400);
topResults.push({
id: `brain-${s.index}`,
source: 'brain-memory' as const,
title: doc.relativePath,
content: summarizeText(excerpt, 400),
score: s.score,
tokenEstimate: estimateTokens(excerpt),
metadata: {
filePath: doc.filePath,
category: this.inferCategory(doc.relativePath),
isProjectEvidence: this.isProjectEvidence(doc.relativePath, content),
lastUpdated: doc.mtimeMs,
// Phase 5: Scoring Intelligence Integration
conflictDetected: s.conflictDetected,
conflictSeverity: s.conflictSeverity,
informationDensity: s.informationDensity,
},
});
}
return topResults;
} catch {
return [];
}