Update project files

This commit is contained in:
2026-05-22 15:00:14 +09:00
parent 132d130ff1
commit 8016ef18fa
29 changed files with 1353 additions and 804 deletions
+22 -19
View File
@@ -2,7 +2,8 @@ import * as fs from 'fs';
import * as path from 'path';
import { findBrainFiles, summarizeText } from '../utils';
import { isInside } from '../lib/paths';
import { tokenize, expandQuery, scoreTfIdf, extractBestExcerpt } from '../retrieval/scoring';
import { tokenize, expandQuery, scoreTfIdfPreTokenized, extractBestExcerpt } from '../retrieval/scoring';
import { getBrainTokenIndex } from '../retrieval/brainIndex';
import { estimateTokens } from '../retrieval/contextBudget';
/**
@@ -91,33 +92,35 @@ export function retrieveScoped(
});
if (candidates.length === 0) return { ...empty, candidateCount: 0 };
const documents = candidates.map((file) => {
let content = '';
let lastModified = 0;
try {
content = fs.readFileSync(file, 'utf8');
lastModified = fs.statSync(file).mtimeMs;
} catch { /* skip unreadable file */ }
return {
title: path.basename(file, '.md'),
content,
lastModified,
filePath: file,
relativePath: path.relative(brainRoot, file),
};
});
// Tokenized docs from the persistent mtime-keyed brain index — unchanged files
// are not re-read or re-tokenized. The index tokenizes `${basename} ${content}`
// (titleTokens = tokenize(basename)), which is exactly what the previous
// `scoreTfIdf` call computed here, so scoring stays byte-identical.
const indexed = getBrainTokenIndex(brainRoot, candidates);
if (indexed.length === 0) return { ...empty, candidateCount: candidates.length };
const queryTokens = tokenize(query);
const expanded = expandQuery(queryTokens);
const scored = scoreTfIdf(expanded, documents);
const scored = scoreTfIdfPreTokenized(
expanded,
indexed.map((d) => ({
tokens: d.tokens,
titleTokens: d.titleTokens,
lastModified: d.mtimeMs,
conflictCount: d.conflictCount,
}))
);
const chunks = scored
.filter((s) => s.score > 0)
.sort((a, b) => b.score - a.score)
.slice(0, maxResults)
.map<ScopedRetrievalChunk>((s) => {
const doc = documents[s.index];
const excerpt = extractBestExcerpt(doc.content, expanded, excerptLength);
const doc = indexed[s.index];
// Only the chosen top-`maxResults` files are read off disk (for excerpt extraction).
let content = '';
try { content = fs.readFileSync(doc.filePath, 'utf8'); } catch { /* deleted just now — empty excerpt */ }
const excerpt = extractBestExcerpt(content, expanded, excerptLength);
const summary = summarizeText(excerpt, excerptLength);
return {
relativePath: doc.relativePath,