103 lines
3.6 KiB
Python
103 lines
3.6 KiB
Python
import os
|
|
import re
|
|
from datetime import datetime
|
|
import uuid
|
|
|
|
SOURCE_DIR = "/Volumes/Data/project/Antigravity/Datacollector_MAC/out_wiki"
|
|
TARGET_DIR = "/Volumes/Data/project/Antigravity/Wiki/10_Wiki/Topics"
|
|
|
|
def normalize_name(name):
|
|
# Remove extensions, content in parentheses, and special chars
|
|
name = re.sub(r'\(.*?\)', '', name)
|
|
name = name.replace(".md", "").strip()
|
|
name = re.sub(r'[^a-zA-Z0-9\s]', '', name)
|
|
return name.lower().replace(" ", "_")
|
|
|
|
def get_p_reinforce_header(title, tags=None):
|
|
if tags is None:
|
|
tags = ["automated", "datacollector", "brain_sync"]
|
|
|
|
tag_str = "[" + ", ".join(tags) + "]"
|
|
date_str = datetime.now().strftime("%Y-%m-%dT%H:%M:%S.000Z")
|
|
mission_id = f"mission_{uuid.uuid4().hex[:12]}"
|
|
|
|
header = f"""---
|
|
id: {mission_id}
|
|
date: {date_str}
|
|
type: knowledge_artifact
|
|
standard: P-Reinforce v3.0
|
|
tags: {tag_str}
|
|
---
|
|
|
|
"""
|
|
return header
|
|
|
|
def process_wikification():
|
|
files = [f for f in os.listdir(SOURCE_DIR) if f.endswith(".md")]
|
|
groups = {}
|
|
|
|
# 1. Grouping
|
|
for f in files:
|
|
norm = normalize_name(f)
|
|
if norm not in groups:
|
|
groups[norm] = []
|
|
groups[norm].append(f)
|
|
|
|
print(f"Found {len(files)} files, grouped into {len(groups)} themes.")
|
|
|
|
for norm, filenames in groups.items():
|
|
# 2. Pick the richest content
|
|
best_file = max(filenames, key=lambda x: os.path.getsize(os.path.join(SOURCE_DIR, x)))
|
|
best_path = os.path.join(SOURCE_DIR, best_file)
|
|
|
|
with open(best_path, 'r', encoding='utf-8') as f:
|
|
content = f.read()
|
|
|
|
# Clean title (remove [[ ]] if exists)
|
|
title = best_file.replace(".md", "")
|
|
clean_title = re.sub(r'\[\[(.*?)\]\]', r'\1', title)
|
|
|
|
# 3. Check for existing file in target (recursive search)
|
|
existing_path = None
|
|
for root, dirs, target_files in os.walk(TARGET_DIR):
|
|
for tf in target_files:
|
|
if tf.lower() == best_file.lower() or normalize_name(tf) == norm:
|
|
existing_path = os.path.join(root, tf)
|
|
break
|
|
|
|
# 4. Merge or Create
|
|
final_content = content
|
|
if existing_path:
|
|
with open(existing_path, 'r', encoding='utf-8') as f:
|
|
existing_content = f.read()
|
|
|
|
# Simple merge: append new content if not already there (rudimentary)
|
|
if len(content) > len(existing_content):
|
|
print(f"Merging and prioritizing NEW content for: {clean_title}")
|
|
# Keep existing frontmatter if any, or prepend new
|
|
if existing_content.startswith("---"):
|
|
parts = existing_content.split("---", 2)
|
|
if len(parts) >= 3:
|
|
final_content = "---" + parts[1] + "---" + "\n\n" + content
|
|
else:
|
|
final_content = get_p_reinforce_header(clean_title) + content
|
|
else:
|
|
print(f"Skipping update for {clean_title}, existing content is richer.")
|
|
final_content = existing_content
|
|
else:
|
|
final_content = get_p_reinforce_header(clean_title) + content
|
|
|
|
# 5. Write to target
|
|
target_path = os.path.join(TARGET_DIR, best_file)
|
|
# Avoid overwriting if we didn't merge
|
|
with open(target_path, 'w', encoding='utf-8') as f:
|
|
f.write(final_content)
|
|
|
|
# 6. Cleanup SOURCE_DIR
|
|
for f in filenames:
|
|
os.remove(os.path.join(SOURCE_DIR, f))
|
|
print(f"Deleted source: {f}")
|
|
|
|
if __name__ == "__main__":
|
|
process_wikification()
|