72c41ae834
Builds the "indexed library" foundation and first intelligent features on top of the organizer (sql.js index, non-destructive in-place indexing). Phase 0 — Library index: - sql.js (WASM SQLite) index DB; contentHash-keyed assets, resumable indexing (skip by path+mtime), batch persistence (chosen over native better-sqlite3 which fails to build on Node 24 / Python 3.12) - Library folders (in place, non-destructive) + background indexer w/ progress - Thumbnails generated in the AI worker (canvas->webp), cached in userData; served via photoai-media://thumb by hash; thumbnail grid w/ pagination Phase 1 — AI quality assessment & culling: - Focus (Laplacian variance), exposure (histogram), eyes-open (face-api EAR) computed in one analyze pass alongside the thumbnail - Culling filters (candidate/rejected) + quality badges - Adjustable thresholds (live SQL re-classification from stored raw scores, no re-analysis) + manual star rating (0-5) and color labels (usermeta) Phase 2 — CLIP natural-language / similarity search: - @huggingface/transformers (WASM/WebGPU, no native build) - CLIP image/text embeddings (lazy-loaded); Korean queries auto-translated via opus-mt-ko-en into the English CLIP - Embeddings stored as SQLite BLOBs; "build search index" batch w/ progress; brute-force cosine search; new Search tab - Note: models download from HF Hub on first use; fully-offline ORT-wasm packaging and KO search-accuracy tuning are follow-ups Tabs added (Organize / Library / Search). All typecheck/tests(12)/build green; boot smoke verified across phases. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
101 lines
3.7 KiB
JavaScript
101 lines
3.7 KiB
JavaScript
// Phase 0-b 색인 파이프라인 헤드리스 검증.
|
|
// sql.js 스키마 + 폴더 워크 + 샘플 해시 + upsert + count 가 실제로 동작하는지 확인.
|
|
// node scripts/verify-index.mjs [folder]
|
|
import initSqlJs from 'sql.js'
|
|
import { readdir, stat, open, mkdtemp, writeFile, rm } from 'node:fs/promises'
|
|
import { readFileSync } from 'node:fs'
|
|
import { join, extname, dirname } from 'node:path'
|
|
import { tmpdir } from 'node:os'
|
|
import { createHash } from 'node:crypto'
|
|
import { fileURLToPath } from 'node:url'
|
|
|
|
const __dirname = dirname(fileURLToPath(import.meta.url))
|
|
const IMG = new Set(['.jpg', '.jpeg', '.png', '.webp', '.mp4', '.mov'])
|
|
|
|
async function* walk(root) {
|
|
for (const e of await readdir(root, { withFileTypes: true })) {
|
|
const full = join(root, e.name)
|
|
if (e.isDirectory()) yield* walk(full)
|
|
else if (e.isFile() && IMG.has(extname(e.name).toLowerCase())) yield full
|
|
}
|
|
}
|
|
|
|
async function contentHash(path) {
|
|
const s = await stat(path)
|
|
const h = createHash('sha1')
|
|
h.update(String(s.size))
|
|
const len = Math.min(512 * 1024, s.size)
|
|
if (len > 0) {
|
|
const fh = await open(path, 'r')
|
|
try {
|
|
const buf = Buffer.alloc(len)
|
|
await fh.read(buf, 0, len, 0)
|
|
h.update(buf)
|
|
} finally {
|
|
await fh.close()
|
|
}
|
|
}
|
|
return h.digest('hex')
|
|
}
|
|
|
|
async function main() {
|
|
// 인자 폴더 없으면 더미 이미지 2개로 임시 폴더 생성
|
|
let folder = process.argv[2]
|
|
let temp = null
|
|
if (!folder) {
|
|
temp = await mkdtemp(join(tmpdir(), 'photoai-idx-'))
|
|
await writeFile(join(temp, 'a.jpg'), Buffer.from('dummy-image-a'))
|
|
await writeFile(join(temp, 'b.png'), Buffer.from('dummy-image-b-different'))
|
|
folder = temp
|
|
console.log('테스트 폴더 생성:', folder)
|
|
}
|
|
|
|
const wasm = readFileSync(join(__dirname, '..', 'node_modules', 'sql.js', 'dist', 'sql-wasm.wasm'))
|
|
const SQL = await initSqlJs({ wasmBinary: new Uint8Array(wasm).buffer })
|
|
const db = new SQL.Database()
|
|
db.run(`CREATE TABLE asset (
|
|
id INTEGER PRIMARY KEY AUTOINCREMENT, contentHash TEXT UNIQUE NOT NULL,
|
|
path TEXT, ext TEXT, sizeBytes INTEGER, mtime INTEGER, indexedAt INTEGER);
|
|
CREATE INDEX idx_path ON asset(path);`)
|
|
|
|
let indexed = 0
|
|
for await (const file of walk(folder)) {
|
|
const s = await stat(file)
|
|
const hash = await contentHash(file)
|
|
db.run(
|
|
`INSERT INTO asset (contentHash,path,ext,sizeBytes,mtime,indexedAt)
|
|
VALUES (?,?,?,?,?,?)
|
|
ON CONFLICT(contentHash) DO UPDATE SET path=excluded.path, mtime=excluded.mtime`,
|
|
[hash, file, extname(file).toLowerCase(), s.size, Math.floor(s.mtimeMs), Date.now()]
|
|
)
|
|
indexed++
|
|
}
|
|
|
|
const n = db.exec('SELECT COUNT(*) FROM asset')[0].values[0][0]
|
|
const sample = db.exec('SELECT contentHash, path FROM asset LIMIT 2')
|
|
console.log(`색인 처리: ${indexed}건, DB asset 수: ${n}`)
|
|
if (sample[0]) for (const row of sample[0].values) console.log(' row:', row[0].slice(0, 12), '…', row[1])
|
|
|
|
// 재실행 시 중복 안 늘어나는지(upsert) 확인
|
|
for await (const file of walk(folder)) {
|
|
const s = await stat(file)
|
|
const hash = await contentHash(file)
|
|
db.run(
|
|
`INSERT INTO asset (contentHash,path,ext,sizeBytes,mtime,indexedAt)
|
|
VALUES (?,?,?,?,?,?) ON CONFLICT(contentHash) DO UPDATE SET mtime=excluded.mtime`,
|
|
[hash, file, extname(file).toLowerCase(), s.size, Math.floor(s.mtimeMs), Date.now()]
|
|
)
|
|
}
|
|
const n2 = db.exec('SELECT COUNT(*) FROM asset')[0].values[0][0]
|
|
console.log(`재실행 후 asset 수(중복 없어야 함): ${n2}`)
|
|
console.log(n === n2 ? 'PASS: upsert 중복 없음' : 'FAIL: 중복 발생')
|
|
|
|
db.close()
|
|
if (temp) await rm(temp, { recursive: true, force: true })
|
|
}
|
|
|
|
main().catch((e) => {
|
|
console.error('오류:', e)
|
|
process.exit(1)
|
|
})
|