chore: version up to 2.80.34 and package

2026-05-12 22:54:21 +09:00
parent 148bfb070b
commit 065e598cca
26 changed files with 2023 additions and 139 deletions
@@ -19,11 +19,13 @@ import { findBrainFiles, summarizeText } from '../utils';
 import { isInside } from '../lib/paths';
 import { MemoryManager } from '../memory';
 import { RetrievalChunk, RetrievalResult, ContextBudgetConfig } from './types';
-import { tokenize, expandQuery, scoreTfIdf, extractBestExcerpt } from './scoring';
+import { tokenize, expandQuery, scoreTfIdfPreTokenized, extractBestExcerpt } from './scoring';
 import { selectWithinBudget, assembleContext, estimateTokens } from './contextBudget';
+import { getBrainTokenIndex } from './brainIndex';

-export { tokenize, expandQuery, scoreTfIdf, extractBestExcerpt } from './scoring';
+export { tokenize, expandQuery, scoreTfIdf, scoreTfIdfPreTokenized, extractBestExcerpt } from './scoring';
 export { selectWithinBudget, assembleContext, estimateTokens } from './contextBudget';
+export { getBrainTokenIndex, clearBrainTokenIndex } from './brainIndex';
 export * from './types';

 interface RetrievalOptions {
@@ -133,52 +135,48 @@ export class RetrievalOrchestrator {

            if (allFiles.length === 0) return [];

-            // Read all files for TF-IDF
-            const documents = allFiles.map((file) => {
+            // Tokenized docs from the persistent mtime-keyed index — unchanged files are not re-read
+            // or re-tokenized, so per-query work over a large brain drops from O(total content) to O(files) stats.
+            const indexed = getBrainTokenIndex(brain.localBrainPath, allFiles);
+            if (indexed.length === 0) return [];
+
+            const scored = scoreTfIdfPreTokenized(
+                expandedTokens,
+                indexed.map((d) => ({
+                    tokens: d.tokens,
+                    titleTokens: d.titleTokens,
+                    lastModified: d.mtimeMs,
+                    conflictCount: d.conflictCount,
+                }))
+            );
+
+            const topResults: RetrievalChunk[] = [];
+            for (const s of scored.filter((x) => x.score > 0).sort((a, b) => b.score - a.score).slice(0, limit)) {
+                const doc = indexed[s.index];
+                // Only the top `limit` files are actually read off disk (for excerpt extraction).
                let content = '';
-                let lastModified = 0;
-                try {
-                    content = fs.readFileSync(file, 'utf8');
-                    lastModified = fs.statSync(file).mtimeMs;
-                } catch { /* skip */ }
-                return {
-                    title: path.basename(file, '.md'),
-                    content,
-                    lastModified,
-                    filePath: file,
-                    relativePath: path.relative(brain.localBrainPath, file)
-                };
-            });
-
-            // TF-IDF scoring
-            const scored = scoreTfIdf(expandedTokens, documents);
-
-            return scored
-                .filter((s) => s.score > 0)
-                .sort((a, b) => b.score - a.score)
-                .slice(0, limit)
-                .map((s) => {
-                    const doc = documents[s.index];
-                    const excerpt = extractBestExcerpt(doc.content, expandedTokens, 400);
-                    return {
-                        id: `brain-${s.index}`,
-                        source: 'brain-memory' as const,
-                        title: doc.relativePath,
-                        content: summarizeText(excerpt, 400),
-                        score: s.score,
-                        tokenEstimate: estimateTokens(excerpt),
-                        metadata: {
-                            filePath: doc.filePath,
-                            category: this.inferCategory(doc.relativePath),
-                            isProjectEvidence: this.isProjectEvidence(doc.relativePath, doc.content),
-                            lastUpdated: doc.lastModified,
-                            // Phase 5: Scoring Intelligence Integration
-                            conflictDetected: s.conflictDetected,
-                            conflictSeverity: s.conflictSeverity,
-                            informationDensity: s.informationDensity
-                        }
-                    };
+                try { content = fs.readFileSync(doc.filePath, 'utf8'); } catch { /* deleted just now — skip */ continue; }
+                const excerpt = extractBestExcerpt(content, expandedTokens, 400);
+                topResults.push({
+                    id: `brain-${s.index}`,
+                    source: 'brain-memory' as const,
+                    title: doc.relativePath,
+                    content: summarizeText(excerpt, 400),
+                    score: s.score,
+                    tokenEstimate: estimateTokens(excerpt),
+                    metadata: {
+                        filePath: doc.filePath,
+                        category: this.inferCategory(doc.relativePath),
+                        isProjectEvidence: this.isProjectEvidence(doc.relativePath, content),
+                        lastUpdated: doc.mtimeMs,
+                        // Phase 5: Scoring Intelligence Integration
+                        conflictDetected: s.conflictDetected,
+                        conflictSeverity: s.conflictSeverity,
+                        informationDensity: s.informationDensity,
+                    },
                });
+            }
+            return topResults;
        } catch {
            return [];
        }