From 6adbc2a6fa409de1181d1aabb77d73f365d6822b Mon Sep 17 00:00:00 2001
From: g1nation <koriweb@gmail.com>
Date: Thu, 18 Jun 2026 18:18:20 +0900
Subject: [PATCH] =?UTF-8?q?feat(review):=20/review=20=EC=BD=94=EB=93=9C=20?=
 =?UTF-8?q?=EB=A6=AC=EB=B7=B0=20map-reduce=20=EC=B2=AD=ED=82=B9=20?=
 =?UTF-8?q?=EB=AA=85=EB=A0=B9=20(v2.2.255)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

일반 에이전트 채팅이 큰 코드베이스 리뷰를 단일 호출로 처리하다 약한 로컬
모델에서 빈 응답으로 무너지던 문제를, /meet 의 검증된 map-reduce 로 우회.

- /review <디렉터리|파일> [초점] 신설 (코어 채팅 경로 무수정)
- Map: 파일별 독립 리뷰(라인 인용 근거), callLmSynthesis 재시도/붕괴감지 활용,
  한 파일 실패해도 부분 리뷰로 진행
- Reduce: 노트 통합 + hierarchical fold 로 reduce 입력을 약한 모델 한도(16K) 안 유지
- 의존성/빌드 산출물 제외, 파일 30개·400KB 상한, 결과 wiki 저장
- 신규 reviewPrompt.ts / reviewFiles.ts, 테스트 +5건(전체 667 통과)

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 PATCHNOTES.md                                 |   7 +
 package.json                                  |   2 +-
 src/features/datacollect/handlers.ts          | 196 ++++++++++++++++++
 .../datacollect/prompts/reviewPrompt.ts       |  93 +++++++++
 src/features/datacollect/reviewFiles.ts       | 102 +++++++++
 tests/reviewFiles.test.ts                     |  39 ++++
 6 files changed, 438 insertions(+), 1 deletion(-)
 create mode 100644 src/features/datacollect/prompts/reviewPrompt.ts
 create mode 100644 src/features/datacollect/reviewFiles.ts
 create mode 100644 tests/reviewFiles.test.ts

diff --git a/PATCHNOTES.md b/PATCHNOTES.md
index bb42a8b..4ee4592 100644
--- a/PATCHNOTES.md
+++ b/PATCHNOTES.md
@@ -1,5 +1,12 @@
 # Astra Patch Notes
 
+## v2.2.255 (2026-06-18)
+### 🧩 `/review` — 코드 리뷰 map-reduce 청킹 (약한 모델도 큰 코드베이스 처리)
+- 일반 에이전트 채팅은 코드 리뷰처럼 입력이 큰 작업을 단일 호출로 처리하다 약한 로컬 모델에서 빈 응답(첫 토큰 EOS)으로 무너진다. `/meet` 의 검증된 map-reduce 를 코드 리뷰에 적용한 **`/review <디렉터리|파일> [초점]`** 명령 신설. 코어 채팅 경로는 건드리지 않음.
+- **Map**: 소스 파일을 하나씩 독립 리뷰(버그·보안·성능·설계·가독성, 라인 인용 근거 필수) → 파일별 노트. `callLmSynthesis` 의 재시도/출력붕괴 감지를 그대로 활용. 한 파일이 실패해도 전체를 포기하지 않고 부분 리뷰로 진행.
+- **Reduce**: 노트를 통합해 우선순위가 매겨진 보고서(총평·우선개선·분류별·잘된점·다음단계). 노트가 크면 배치로 접는 **hierarchical fold** 로 reduce 입력도 약한 모델 한도(16K) 안에 유지.
+- 의존성·빌드 산출물 자동 제외(`node_modules`/`dist`/`.d.ts`/`.min.js`/lock 등), 파일 30개·400KB 상한(초과 시 경고), 결과는 wiki 에 저장. 신규: [reviewPrompt.ts](src/features/datacollect/prompts/reviewPrompt.ts) · [reviewFiles.ts](src/features/datacollect/reviewFiles.ts). 테스트 +5건(전체 667 통과).
+
 ## v2.2.254 (2026-06-18)
 ### 🔎 빈 응답(empty response) 진단 정확도 — MoE 활성 파라미터 인식
 - 일반 에이전트 채팅에서 약한 모델이 큰 입력에 첫 토큰 EOS 로 무너져 **빈 응답**이 날 때, 모델명 파서가 `gemma-4-26b-a4b` 를 "26B 큰 모델"로 오판해 엉뚱한 안내를 하던 문제. **활성 파라미터 추정**(`estimateActiveParamsB`: `a4b`→4, `A3B`→3, `e2b`→2) 추가 → MoE 를 정확히 식별. ([contextManager.ts](src/lib/contextManager.ts))
diff --git a/package.json b/package.json
index 6e8e981..abd6d25 100644
--- a/package.json
+++ b/package.json
@@ -2,7 +2,7 @@
   "name": "astra",
   "displayName": "Astra",
   "description": "The personal intelligence layer for Antigravity and VS Code. A private cognitive partner for deep project context, memory, and proactive strategic decision-making.",
-  "version": "2.2.254",
+  "version": "2.2.255",
   "publisher": "g1nation",
   "license": "MIT",
   "icon": "assets/icon.png",
diff --git a/src/features/datacollect/handlers.ts b/src/features/datacollect/handlers.ts
index 81a0ae9..afadf9c 100644
--- a/src/features/datacollect/handlers.ts
+++ b/src/features/datacollect/handlers.ts
@@ -12,9 +12,12 @@
 
 import * as vscode from 'vscode';
 import { promises as fsp } from 'fs';
+import * as path from 'path';
 import { registerSlashCommand, chunk, type Webview } from './slashRouter';
 import { callLmSynthesis } from './llm';
 import { bridgeFetch, BRIDGE_API } from './bridgeClient';
+import { collectSourceFiles } from './reviewFiles';
+import { buildReviewFilePrompt, buildReviewReducePrompt } from './prompts/reviewPrompt';
 import { type SynthesisPart, buildSynthesisPrompt } from './prompts/synthesisPrompt';
 import {
     type YoutubeAnalysisMode,
@@ -859,6 +862,198 @@ async function runMeetConfirm(arg: string, view: Webview | undefined, context?:
     chunk(view, '\n' + lines.map(l => `  ${l}`).join('\n') + '\n');
 }
 
+// ───────────────────────────── /review ─────────────────────────────
+/**
+ * `/review <디렉터리|파일 경로> [초점]` — 코드 리뷰 map-reduce.
+ *
+ * 일반 에이전트 채팅은 큰 코드베이스 리뷰를 단일 호출로 처리하다 약한 로컬 모델에서
+ * 빈 응답(첫 토큰 EOS)으로 무너진다. /review 는 /meet 와 같은 map-reduce 로 우회:
+ *   - Map  : 파일 하나씩 독립 리뷰 → 파일별 발견사항 노트(callLmSynthesis 가 재시도/붕괴
+ *            감지까지 내장)
+ *   - Reduce: 노트를 통합 → 우선순위 매겨진 최종 보고서. 노트가 크면 배치로 접어
+ *            (hierarchical fold) reduce 입력도 약한 모델 한도 안에 둔다.
+ * 한 조각이 끝내 실패해도 전체를 포기하지 않고 부분 리뷰로 진행한다(/meet 와 동일 정책).
+ */
+const REVIEW_MAX_FILES = 30;            // 1회 리뷰 파일 상한 (초과분은 잘림 + 경고)
+const REVIEW_MAX_FILE_BYTES = 400_000;  // 이보다 큰 파일은 생성물로 보고 제외
+const REVIEW_PER_FILE_CHARS = 16_000;   // 파일 1개에서 모델에 보낼 본문 상한 (초과 시 앞부분만)
+const REVIEW_REDUCE_BUDGET = 16_000;    // reduce 1회 입력(노트) 상한 — 약한 모델 안전선
+
+async function runReview(arg: string, view: Webview | undefined, _context?: vscode.ExtensionContext): Promise<boolean> {
+    const trimmed = arg.trim();
+    // 경로 파싱 — /meet 와 동일하게 따옴표 감싼 경로 + 뒤따르는 초점 텍스트 지원.
+    let targetPath = '';
+    let focus = '';
+    if (trimmed.startsWith('"')) {
+        const end = trimmed.indexOf('"', 1);
+        if (end > 0) { targetPath = trimmed.slice(1, end); focus = trimmed.slice(end + 1).trim(); }
+    }
+    if (!targetPath) {
+        const sp = trimmed.indexOf(' ');
+        if (sp === -1) targetPath = trimmed;
+        else { targetPath = trimmed.slice(0, sp); focus = trimmed.slice(sp + 1).trim(); }
+    }
+    if (!targetPath) {
+        chunk(view, '사용법: `/review <디렉터리 또는 파일 경로> [리뷰 초점]`\n예: `/review E:\\Wiki\\astraai`\n경로에 공백이 있으면 따옴표로: `/review "E:\\my proj\\src" 보안 위주로`\n');
+        return true;
+    }
+
+    // 대상 판별 — 파일 1개 vs 디렉터리.
+    let stat;
+    try {
+        stat = await fsp.stat(targetPath);
+    } catch (e: any) {
+        chunk(view, `\n❌ 경로를 찾을 수 없습니다: ${e?.message || String(e)}\n`);
+        return true;
+    }
+
+    const projectLabel = targetPath.replace(/[\\/]+$/, '').replace(/^.*[\\/]/, '') || targetPath;
+    chunk(view, `🔍 **코드 리뷰**: ${targetPath}${focus ? `\n초점: ${focus}` : ''}\n\n`);
+
+    // ── 대상 파일 수집 ──
+    interface RFile { absPath: string; relPath: string; }
+    let files: RFile[] = [];
+    let truncatedFiles = false;
+    let totalCandidates = 0;
+    if (stat.isDirectory()) {
+        const collected = await collectSourceFiles(targetPath, { maxFiles: REVIEW_MAX_FILES, maxFileBytes: REVIEW_MAX_FILE_BYTES });
+        files = collected.files.map(f => ({ absPath: f.absPath, relPath: f.relPath }));
+        truncatedFiles = collected.truncated;
+        totalCandidates = collected.totalCandidates;
+    } else {
+        files = [{ absPath: targetPath, relPath: path.basename(targetPath) }];
+        totalCandidates = 1;
+    }
+    if (files.length === 0) {
+        chunk(view, `\nℹ️ 리뷰할 소스 파일을 찾지 못했습니다. (의존성·빌드 산출물은 제외됩니다)\n`);
+        return true;
+    }
+    chunk(view, `📂 소스 파일 ${files.length}개 리뷰 대상${truncatedFiles ? ` (후보 ${totalCandidates}개 중 상위 ${files.length}개만 — 상한 ${REVIEW_MAX_FILES}; 범위를 좁혀 다시 실행 권장)` : ''}\n\n`);
+
+    const reviewSystem = '당신은 시니어 코드 리뷰어입니다. 제공된 코드만 근거로 사실 기반의 발견사항을 추출·통합하며, 없는 코드·취약점을 지어내지 않습니다. 모든 출력은 한국어입니다.';
+
+    // ── Map: 파일별 독립 리뷰 ──
+    const noteBlocks: string[] = [];
+    let failed = 0;
+    for (let i = 0; i < files.length; i++) {
+        const f = files[i];
+        chunk(view, `  ⏳ (${i + 1}/${files.length}) ${f.relPath} 리뷰 중…\n`);
+        let content: string;
+        try {
+            content = await fsp.readFile(f.absPath, 'utf-8');
+        } catch (e: any) {
+            failed++;
+            chunk(view, `    ⚠️ 읽기 실패(건너뜀): ${e?.message || String(e)}\n`);
+            continue;
+        }
+        if (!content.trim()) { continue; }  // 빈 파일은 조용히 스킵
+        let truncNote = '';
+        if (content.length > REVIEW_PER_FILE_CHARS) {
+            content = content.slice(0, REVIEW_PER_FILE_CHARS);
+            truncNote = `\n(파일이 커서 앞 ${REVIEW_PER_FILE_CHARS.toLocaleString()}자만 리뷰함)`;
+        }
+        try {
+            const note = await callLmSynthesis(
+                buildReviewFilePrompt(f.relPath, content, i + 1, files.length, focus),
+                reviewSystem,
+            );
+            if (!note) throw new Error('리뷰 결과가 비어 있습니다.');
+            noteBlocks.push(`### ─── ${f.relPath} ───\n${note.trim()}${truncNote}`);
+            chunk(view, `    ✓ 완료\n`);
+        } catch (e: any) {
+            failed++;
+            noteBlocks.push(`### ─── ${f.relPath} ───\n(이 파일은 모델 출력 오류로 리뷰하지 못했습니다: ${e?.message || String(e)})`);
+            chunk(view, `    ⚠️ 리뷰 실패(건너뜀): ${e?.message || String(e)}\n`);
+        }
+    }
+    if (failed === files.length) {
+        chunk(view, `\n❌ 모든 파일 리뷰에 실패했습니다 — 모델 출력이 계속 붕괴합니다. 더 큰 모델(활성 7B+) 사용을 권장합니다.\n`);
+        return true;
+    }
+    if (failed > 0) {
+        chunk(view, `\n⚠️ ${files.length}개 중 ${failed}개 파일을 리뷰하지 못해 **부분 리뷰**로 진행합니다.\n`);
+    }
+
+    // ── Reduce: 노트 통합 (노트가 크면 배치로 접어 약한 모델 한도 안에 유지) ──
+    chunk(view, `\n  🧪 발견사항 통합 중…\n`);
+    let report: string;
+    try {
+        report = await reduceReviewNotes(noteBlocks, projectLabel, files.length, focus, reviewSystem, view);
+        if (!report) throw new Error('통합 단계 응답이 비어 있습니다.');
+    } catch (e: any) {
+        chunk(view, `\n⚠️ 통합 실패: ${e?.message || String(e)}\n약한 모델일 수 있습니다 — 활성 7B+ 모델 또는 범위를 좁혀 다시 시도하세요.\n`);
+        return true;
+    }
+
+    chunk(view, '\n' + report + '\n\n');
+
+    // ── 저장 (wiki) — /meet 와 동일 경로 ──
+    try {
+        const cfg = vscode.workspace.getConfiguration('g1nation');
+        const today = new Date().toISOString().slice(0, 10);
+        const title = `코드리뷰 ${projectLabel} ${today}`;
+        const savePath = (cfg.get<string>('datacollectSavePath', '') || '').trim();
+        const body: Record<string, unknown> = { title, content: report };
+        if (savePath) body.saveDir = savePath;
+        const saved = await bridgeFetch<{ success: boolean; path?: string }>(
+            BRIDGE_API.wiki.save,
+            { method: 'POST', body: JSON.stringify(body) },
+            { timeoutMs: 30_000 },
+        );
+        chunk(view, `💾 **리뷰 저장 완료**: \`${saved?.path || '(경로 미확인)'}\`\n`);
+    } catch (e: any) {
+        chunk(view, `⚠️ 리뷰 저장 실패(보고서는 위에 출력됨): ${e?.message || String(e)}\n`);
+    }
+    return true;
+}
+
+/**
+ * 파일별 노트를 최종 보고서로 통합. 노트 합계가 REVIEW_REDUCE_BUDGET 를 넘으면
+ * 배치로 나눠 각 배치를 reduce 한 뒤(부분 보고서) 그 결과를 다시 reduce 하는 fold
+ * 로 수렴시킨다 — reduce 입력이 항상 약한 모델 한도 안에 들어오게.
+ */
+async function reduceReviewNotes(
+    noteBlocks: string[], projectLabel: string, fileCount: number, focus: string,
+    reviewSystem: string, view: Webview | undefined,
+): Promise<string> {
+    // 글자 예산으로 배치 묶기 — 한 블록이 예산보다 커도 단독 배치로 허용.
+    const packBatches = (blocks: string[]): string[][] => {
+        const batches: string[][] = [];
+        let cur: string[] = [];
+        let curLen = 0;
+        for (const b of blocks) {
+            if (cur.length && curLen + b.length > REVIEW_REDUCE_BUDGET) { batches.push(cur); cur = []; curLen = 0; }
+            cur.push(b); curLen += b.length + 2;
+        }
+        if (cur.length) batches.push(cur);
+        return batches;
+    };
+
+    let level = noteBlocks;
+    let pass = 0;
+    while (true) {
+        const batches = packBatches(level);
+        if (batches.length === 1) {
+            return await callLmSynthesis(
+                buildReviewReducePrompt(batches[0].join('\n\n'), projectLabel, fileCount, focus),
+                reviewSystem,
+            );
+        }
+        pass++;
+        chunk(view, `  · 통합 ${pass}단계 — ${level.length}개 노트를 ${batches.length}개 배치로 접는 중…\n`);
+        const partials: string[] = [];
+        for (let i = 0; i < batches.length; i++) {
+            const r = await callLmSynthesis(
+                buildReviewReducePrompt(batches[i].join('\n\n'), projectLabel, fileCount, focus),
+                reviewSystem,
+            );
+            if (r && r.trim()) partials.push(`### ─── 부분 통합 ${pass}-${i + 1} ───\n${r.trim()}`);
+        }
+        if (partials.length === 0) throw new Error('배치 통합이 모두 비었습니다.');
+        level = partials;
+    }
+}
+
 // ─── 등록 ─────────────────────────────────────────────────────────────────
 
 // /research(NotebookLM Deep Research)는 v2.2.205 에서 제거 — NotebookLM 은 로컬
@@ -869,3 +1064,4 @@ registerSlashCommand({ name: '/youtube', description: 'YouTube 단일 영상 또
 registerSlashCommand({ name: '/blog', description: 'Blog Pipeline 안내 (Datacollect 별도 흐름)', handler: runBlog });
 registerSlashCommand({ name: '/wikify', description: '웹 URL → P-Reinforce v3.0 위키 합성·저장', handler: runWikify });
 registerSlashCommand({ name: '/meet', description: '회의 transcript → 회의록 합성 + 캘린더·task 등록', handler: runMeet });
+registerSlashCommand({ name: '/review', description: '코드 리뷰 — 디렉터리/파일을 파일별 리뷰(map) 후 통합(reduce). 약한 모델도 처리', handler: runReview });
diff --git a/src/features/datacollect/prompts/reviewPrompt.ts b/src/features/datacollect/prompts/reviewPrompt.ts
new file mode 100644
index 0000000..577c7bf
--- /dev/null
+++ b/src/features/datacollect/prompts/reviewPrompt.ts
@@ -0,0 +1,93 @@
+/**
+ * 코드 리뷰 map-reduce 프롬프트.
+ *
+ * 일반 에이전트 채팅은 코드 리뷰처럼 입력이 큰 작업을 단일 호출로 처리하다
+ * 약한 로컬 모델에서 빈 응답(첫 토큰 EOS)으로 무너진다. /review 는 /meet 와 같은
+ * map-reduce 로 이를 우회한다:
+ *   - Map  : 파일 하나씩 독립 리뷰 → 파일별 발견사항(근거 인용)
+ *   - Reduce: 파일별 노트를 통합 → 우선순위가 매겨진 최종 리뷰 보고서
+ * 입력이 작게 쪼개지므로 약한 모델도 끝까지 생성할 수 있고, lost-in-the-middle 도 준다.
+ */
+
+/** [Map] 파일 1개를 리뷰해 발견사항만 추출. 근거(라인/코드 인용) 필수, 날조 금지. */
+export function buildReviewFilePrompt(relPath: string, content: string, idx: number, total: number, focus: string): string {
+    const focusBlock = focus.trim()
+        ? `\n# 리뷰 초점 (사용자 지정 — 우선 점검)\n${focus.trim()}\n`
+        : '';
+    // 라인 번호를 붙여 모델이 정확한 위치를 인용하게 한다(환각 위치 방지).
+    const numbered = content.split('\n').map((l, i) => `${String(i + 1).padStart(4, ' ')}| ${l}`).join('\n');
+    return `# 임무
+당신은 시니어 코드 리뷰어다. 아래 **단일 파일**(${idx}/${total}번째)을 리뷰해 발견사항만 추출하라.
+최종 보고서는 나중에 모든 파일 노트를 합쳐 작성하므로, 여기서는 이 파일에 대한 사실 기반 발견사항을 **누락 없이** 뽑는 것이 임무다.
+${focusBlock}
+# 점검 항목
+- **버그/오류**: 논리 오류, 경계 조건, null/undefined, 예외 미처리, 경쟁 조건, 자원 누수
+- **보안**: 입력 검증 누락, 인젝션, 비밀정보 하드코딩, 안전하지 않은 역직렬화/권한
+- **성능**: 불필요한 반복·할당, N+1, 동기 블로킹, 비효율 자료구조
+- **설계/구조**: 책임 분리, 결합도, 중복, 추상화 누락/과잉
+- **가독성/유지보수**: 네이밍, 죽은 코드, 매직 넘버, 주석/타입 부재
+
+# 규칙 (할루시네이션 방지 — 반드시 준수)
+- **이 파일에 실제로 있는 코드만** 근거로 삼는다. 없는 함수·호출·취약점을 지어내지 말 것.
+- 각 발견사항에는 **위치(라인 번호)** 와 **근거가 되는 코드 일부(짧게 인용)** 를 붙인다.
+- 확실하지 않으면 단정하지 말고 "확인 필요"로 표시한다.
+- 이 파일이 전반적으로 양호하면 발견사항을 억지로 만들지 말고 "특이사항 없음"이라고 적는다.
+- 다른 파일·외부 맥락을 가정하지 말 것(이 파일만 본다).
+
+[파일 경로] ${relPath}
+
+[파일 내용 — "라인번호| 코드" 형식]
+\`\`\`
+${numbered}
+\`\`\`
+
+# 출력 형식 (이 파일에 해당 항목이 없으면 "없음")
+## 파일: ${relPath}
+### 발견사항
+- [심각도: 높음|중간|낮음] [분류: 버그|보안|성능|설계|가독성] (L<라인>) 설명 — 근거: "코드 일부"
+  · 개선 제안: 구체적으로 무엇을 어떻게 바꿀지
+(특이사항이 없으면 "- 특이사항 없음")
+### 한줄 요약
+이 파일의 역할과 전반 상태를 한 문장으로.`;
+}
+
+/** [Reduce] 파일별 노트를 통합해 우선순위가 매겨진 최종 코드 리뷰 보고서 작성. */
+export function buildReviewReducePrompt(notes: string, projectLabel: string, fileCount: number, focus: string): string {
+    const focusBlock = focus.trim() ? `\n# 리뷰 초점 (사용자 지정)\n${focus.trim()}\n` : '';
+    return `# 임무
+아래는 \`${projectLabel}\` 의 파일 ${fileCount}개를 파일별로 리뷰한 노트다. 이 노트만 근거로 **통합 코드 리뷰 보고서**를 작성하라.
+${focusBlock}
+# 규칙
+- **노트에 있는 발견사항만** 사용한다. 노트에 없는 문제를 새로 지어내지 말 것.
+- 여러 파일에 공통으로 나타나는 문제는 묶어서 "공통/구조적 이슈"로 정리한다.
+- 심각도와 영향 범위를 기준으로 **우선순위**를 매긴다(높은 심각도·넓은 영향 우선).
+- 각 이슈에는 파일·라인 위치를 유지한다(근거 추적 가능하게).
+- 비판만 하지 말고 잘 설계된 점도 짚는다.
+
+[파일별 리뷰 노트]
+${notes}
+
+# 출력 형식 (정확히 이 구조, 한국어)
+
+# 코드 리뷰 보고서 — ${projectLabel}
+
+## 1. 총평
+전반적 품질·구조·주요 위험을 3~5줄로 요약. 비참석자도 상태를 파악할 수 있게.
+
+## 2. 우선 개선 사항 (Top 우선순위)
+가장 시급한 것부터 번호 매겨 정리. 각 항목: [심각도] 문제 — 위치(파일:라인) — 개선 방향.
+
+## 3. 분류별 발견사항
+### 🐞 버그/오류
+### 🔒 보안
+### ⚡ 성능
+### 🧩 설계/구조 (공통 이슈 포함)
+### 📖 가독성/유지보수
+(각 항목에 파일:라인 + 한 줄 개선 제안. 해당 분류에 발견사항이 없으면 "발견사항 없음".)
+
+## 4. 잘된 점
+유지·강화할 가치가 있는 설계·패턴.
+
+## 5. 권장 다음 단계
+실행 가능한 조치를 우선순위 순 체크리스트로.`;
+}
diff --git a/src/features/datacollect/reviewFiles.ts b/src/features/datacollect/reviewFiles.ts
new file mode 100644
index 0000000..b8c77cd
--- /dev/null
+++ b/src/features/datacollect/reviewFiles.ts
@@ -0,0 +1,102 @@
+/**
+ * /review 대상 소스 파일 수집기.
+ *
+ * 디렉터리를 재귀 순회하며 "리뷰할 가치가 있는 소스 파일"만 골라낸다.
+ * 의존성·빌드 산출물·바이너리·생성물은 제외한다. 순수 판정 로직(shouldReviewFile)은
+ * fs 와 분리해 테스트 가능하게 둔다.
+ */
+import { promises as fsp } from 'fs';
+import * as path from 'path';
+
+/** 리뷰 대상 소스 확장자 (소문자, 점 포함). */
+export const REVIEW_EXTENSIONS = new Set([
+    '.ts', '.tsx', '.js', '.jsx', '.mjs', '.cjs',
+    '.py', '.java', '.go', '.rs', '.rb', '.php',
+    '.c', '.cc', '.cpp', '.h', '.hpp', '.cs', '.kt', '.swift', '.scala',
+    '.vue', '.svelte', '.sql', '.sh',
+]);
+
+/** 순회에서 통째로 건너뛸 디렉터리 이름. */
+export const SKIP_DIRS = new Set([
+    'node_modules', '.git', 'out', 'dist', 'build', '.next', 'coverage',
+    'vendor', '.venv', 'venv', '__pycache__', '.astra', '.secondbrain',
+    '.vscode', '.idea', 'bin', 'obj', 'target', 'media', 'assets',
+]);
+
+/**
+ * 상대 경로(슬래시 정규화)가 리뷰 대상인지 판정. 디렉터리 스킵·확장자 필터·
+ * 생성물(.min.js / .d.ts / *.map / lock) 제외를 한곳에서 결정한다.
+ */
+export function shouldReviewFile(relPathPosix: string): boolean {
+    const parts = relPathPosix.split('/');
+    const base = parts[parts.length - 1].toLowerCase();
+    // 스킵 디렉터리가 경로 어딘가에 있으면 제외
+    if (parts.slice(0, -1).some((seg) => SKIP_DIRS.has(seg))) return false;
+    // 생성물·노이즈 제외
+    if (base.endsWith('.min.js') || base.endsWith('.d.ts') || base.endsWith('.map')) return false;
+    if (base === 'package-lock.json' || base === 'yarn.lock' || base === 'pnpm-lock.yaml') return false;
+    const ext = base.includes('.') ? base.slice(base.lastIndexOf('.')) : '';
+    return REVIEW_EXTENSIONS.has(ext);
+}
+
+export interface CollectedFile {
+    /** 절대 경로. */
+    absPath: string;
+    /** 루트 기준 상대 경로(슬래시). */
+    relPath: string;
+    /** 바이트 크기. */
+    size: number;
+}
+
+export interface CollectOptions {
+    /** 수집 파일 수 상한. 초과분은 잘리며 truncated=true. */
+    maxFiles: number;
+    /** 파일 1개 최대 바이트(이보다 크면 제외 — 거대 생성물 방어). */
+    maxFileBytes: number;
+}
+
+export interface CollectResult {
+    files: CollectedFile[];
+    /** maxFiles 초과로 잘렸는가. */
+    truncated: boolean;
+    /** 순회 중 본 리뷰 대상 후보 총수(상한 적용 전). */
+    totalCandidates: number;
+}
+
+/**
+ * root 디렉터리를 재귀 순회해 리뷰 대상 파일을 수집한다.
+ * 결정적 순서(경로 정렬)로 반환해 재실행 시 동일 결과가 나오게 한다.
+ */
+export async function collectSourceFiles(root: string, opts: CollectOptions): Promise<CollectResult> {
+    const found: CollectedFile[] = [];
+    const walk = async (dir: string): Promise<void> => {
+        let entries: import('fs').Dirent[];
+        try {
+            entries = await fsp.readdir(dir, { withFileTypes: true });
+        } catch {
+            return;  // 권한 등으로 못 읽는 디렉터리는 건너뜀
+        }
+        for (const ent of entries) {
+            const abs = path.join(dir, ent.name);
+            if (ent.isDirectory()) {
+                if (SKIP_DIRS.has(ent.name)) continue;
+                await walk(abs);
+            } else if (ent.isFile()) {
+                const rel = path.relative(root, abs).split(path.sep).join('/');
+                if (!shouldReviewFile(rel)) continue;
+                let size = 0;
+                try { size = (await fsp.stat(abs)).size; } catch { continue; }
+                if (size > opts.maxFileBytes) continue;
+                found.push({ absPath: abs, relPath: rel, size });
+            }
+        }
+    };
+    await walk(root);
+    found.sort((a, b) => a.relPath.localeCompare(b.relPath));
+    const truncated = found.length > opts.maxFiles;
+    return {
+        files: truncated ? found.slice(0, opts.maxFiles) : found,
+        truncated,
+        totalCandidates: found.length,
+    };
+}
diff --git a/tests/reviewFiles.test.ts b/tests/reviewFiles.test.ts
new file mode 100644
index 0000000..188e5a8
--- /dev/null
+++ b/tests/reviewFiles.test.ts
@@ -0,0 +1,39 @@
+/**
+ * /review 소스 파일 수집 판정(shouldReviewFile) 테스트 — 디렉터리 스킵·확장자
+ * 필터·생성물 제외의 결정적 동작을 고정한다.
+ */
+import { shouldReviewFile, REVIEW_EXTENSIONS, SKIP_DIRS } from '../src/features/datacollect/reviewFiles';
+
+describe('shouldReviewFile', () => {
+    it('소스 확장자는 통과', () => {
+        expect(shouldReviewFile('src/agent.ts')).toBe(true);
+        expect(shouldReviewFile('app/main.py')).toBe(true);
+        expect(shouldReviewFile('pkg/server.go')).toBe(true);
+        expect(shouldReviewFile('ui/App.tsx')).toBe(true);
+    });
+
+    it('비소스/문서/바이너리는 제외', () => {
+        expect(shouldReviewFile('README.md')).toBe(false);
+        expect(shouldReviewFile('assets/icon.png')).toBe(false);
+        expect(shouldReviewFile('data.json')).toBe(false);
+    });
+
+    it('스킵 디렉터리 하위는 제외', () => {
+        expect(shouldReviewFile('node_modules/foo/index.js')).toBe(false);
+        expect(shouldReviewFile('out/extension.js')).toBe(false);
+        expect(shouldReviewFile('.git/hooks/pre-commit.sample')).toBe(false);
+        expect(shouldReviewFile('src/.astra/cache/x.ts')).toBe(false); // 중간 경로에 스킵 디렉터리
+    });
+
+    it('생성물·노이즈는 제외', () => {
+        expect(shouldReviewFile('dist/bundle.min.js')).toBe(false); // dist + .min.js 둘 다
+        expect(shouldReviewFile('src/types.d.ts')).toBe(false);
+        expect(shouldReviewFile('src/app.js.map')).toBe(false);
+        expect(shouldReviewFile('package-lock.json')).toBe(false);
+    });
+
+    it('레지스트리 상수 정합성', () => {
+        expect(REVIEW_EXTENSIONS.has('.ts')).toBe(true);
+        expect(SKIP_DIRS.has('node_modules')).toBe(true);
+    });
+});