2nd/scratch/mass_wikify.py

import os
import re
from datetime import datetime
import uuid

SOURCE_DIR = "/Volumes/Data/project/Antigravity/Datacollector_MAC/out_wiki"
TARGET_DIR = "/Volumes/Data/project/Antigravity/Wiki/10_Wiki/Topics"

def normalize_name(name):
    # Remove extensions, content in parentheses, and special chars
    name = re.sub(r'\(.*?\)', '', name)
    name = name.replace(".md", "").strip()
    name = re.sub(r'[^a-zA-Z0-9\s]', '', name)
    return name.lower().replace(" ", "_")

def get_p_reinforce_header(title, tags=None):
    if tags is None:
        tags = ["automated", "datacollector", "brain_sync"]

    tag_str = "[" + ", ".join(tags) + "]"
    date_str = datetime.now().strftime("%Y-%m-%dT%H:%M:%S.000Z")
    mission_id = f"mission_{uuid.uuid4().hex[:12]}"

    header = f"""---
id: {mission_id}
date: {date_str}
type: knowledge_artifact
standard: P-Reinforce v3.0
tags: {tag_str}
---

"""
    return header

def process_wikification():
    files = [f for f in os.listdir(SOURCE_DIR) if f.endswith(".md")]
    groups = {}

    # 1. Grouping
    for f in files:
        norm = normalize_name(f)
        if norm not in groups:
            groups[norm] = []
        groups[norm].append(f)

    print(f"Found {len(files)} files, grouped into {len(groups)} themes.")

    for norm, filenames in groups.items():
        # 2. Pick the richest content
        best_file = max(filenames, key=lambda x: os.path.getsize(os.path.join(SOURCE_DIR, x)))
        best_path = os.path.join(SOURCE_DIR, best_file)

        with open(best_path, 'r', encoding='utf-8') as f:
            content = f.read()

        # Clean title (remove [[ ]] if exists)
        title = best_file.replace(".md", "")
        clean_title = re.sub(r'\[\[(.*?)\]\]', r'\1', title)

        # 3. Check for existing file in target (recursive search)
        existing_path = None
        for root, dirs, target_files in os.walk(TARGET_DIR):
            for tf in target_files:
                if tf.lower() == best_file.lower() or normalize_name(tf) == norm:
                    existing_path = os.path.join(root, tf)
                    break

        # 4. Merge or Create
        final_content = content
        if existing_path:
            with open(existing_path, 'r', encoding='utf-8') as f:
                existing_content = f.read()

            # Simple merge: append new content if not already there (rudimentary)
            if len(content) > len(existing_content):
                 print(f"Merging and prioritizing NEW content for: {clean_title}")
                 # Keep existing frontmatter if any, or prepend new
                 if existing_content.startswith("---"):
                     parts = existing_content.split("---", 2)
                     if len(parts) >= 3:
                         final_content = "---" + parts[1] + "---" + "\n\n" + content
                 else:
                     final_content = get_p_reinforce_header(clean_title) + content
            else:
                 print(f"Skipping update for {clean_title}, existing content is richer.")
                 final_content = existing_content
        else:
            final_content = get_p_reinforce_header(clean_title) + content

        # 5. Write to target
        target_path = os.path.join(TARGET_DIR, best_file)
        # Avoid overwriting if we didn't merge
        with open(target_path, 'w', encoding='utf-8') as f:
            f.write(final_content)

        # 6. Cleanup SOURCE_DIR
        for f in filenames:
            os.remove(os.path.join(SOURCE_DIR, f))
            print(f"Deleted source: {f}")

if __name__ == "__main__":
    process_wikification()