Update project files
This commit is contained in:
@@ -2,7 +2,8 @@ import * as fs from 'fs';
|
||||
import * as path from 'path';
|
||||
import { findBrainFiles, summarizeText } from '../utils';
|
||||
import { isInside } from '../lib/paths';
|
||||
import { tokenize, expandQuery, scoreTfIdf, extractBestExcerpt } from '../retrieval/scoring';
|
||||
import { tokenize, expandQuery, scoreTfIdfPreTokenized, extractBestExcerpt } from '../retrieval/scoring';
|
||||
import { getBrainTokenIndex } from '../retrieval/brainIndex';
|
||||
import { estimateTokens } from '../retrieval/contextBudget';
|
||||
|
||||
/**
|
||||
@@ -91,33 +92,35 @@ export function retrieveScoped(
|
||||
});
|
||||
if (candidates.length === 0) return { ...empty, candidateCount: 0 };
|
||||
|
||||
const documents = candidates.map((file) => {
|
||||
let content = '';
|
||||
let lastModified = 0;
|
||||
try {
|
||||
content = fs.readFileSync(file, 'utf8');
|
||||
lastModified = fs.statSync(file).mtimeMs;
|
||||
} catch { /* skip unreadable file */ }
|
||||
return {
|
||||
title: path.basename(file, '.md'),
|
||||
content,
|
||||
lastModified,
|
||||
filePath: file,
|
||||
relativePath: path.relative(brainRoot, file),
|
||||
};
|
||||
});
|
||||
// Tokenized docs from the persistent mtime-keyed brain index — unchanged files
|
||||
// are not re-read or re-tokenized. The index tokenizes `${basename} ${content}`
|
||||
// (titleTokens = tokenize(basename)), which is exactly what the previous
|
||||
// `scoreTfIdf` call computed here, so scoring stays byte-identical.
|
||||
const indexed = getBrainTokenIndex(brainRoot, candidates);
|
||||
if (indexed.length === 0) return { ...empty, candidateCount: candidates.length };
|
||||
|
||||
const queryTokens = tokenize(query);
|
||||
const expanded = expandQuery(queryTokens);
|
||||
const scored = scoreTfIdf(expanded, documents);
|
||||
const scored = scoreTfIdfPreTokenized(
|
||||
expanded,
|
||||
indexed.map((d) => ({
|
||||
tokens: d.tokens,
|
||||
titleTokens: d.titleTokens,
|
||||
lastModified: d.mtimeMs,
|
||||
conflictCount: d.conflictCount,
|
||||
}))
|
||||
);
|
||||
|
||||
const chunks = scored
|
||||
.filter((s) => s.score > 0)
|
||||
.sort((a, b) => b.score - a.score)
|
||||
.slice(0, maxResults)
|
||||
.map<ScopedRetrievalChunk>((s) => {
|
||||
const doc = documents[s.index];
|
||||
const excerpt = extractBestExcerpt(doc.content, expanded, excerptLength);
|
||||
const doc = indexed[s.index];
|
||||
// Only the chosen top-`maxResults` files are read off disk (for excerpt extraction).
|
||||
let content = '';
|
||||
try { content = fs.readFileSync(doc.filePath, 'utf8'); } catch { /* deleted just now — empty excerpt */ }
|
||||
const excerpt = extractBestExcerpt(content, expanded, excerptLength);
|
||||
const summary = summarizeText(excerpt, excerptLength);
|
||||
return {
|
||||
relativePath: doc.relativePath,
|
||||
|
||||
Reference in New Issue
Block a user