177 lines
5.3 KiB
Python
177 lines
5.3 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Caveman Memory Compression Orchestrator
|
|
|
|
Usage:
|
|
python scripts/compress.py <filepath>
|
|
"""
|
|
|
|
import os
|
|
import re
|
|
import subprocess
|
|
from pathlib import Path
|
|
from typing import List
|
|
|
|
OUTER_FENCE_REGEX = re.compile(
|
|
r"\A\s*(`{3,}|~{3,})[^\n]*\n(.*)\n\1\s*\Z", re.DOTALL
|
|
)
|
|
|
|
|
|
def strip_llm_wrapper(text: str) -> str:
|
|
"""Strip outer ```markdown ... ``` fence when it wraps the entire output."""
|
|
m = OUTER_FENCE_REGEX.match(text)
|
|
if m:
|
|
return m.group(2)
|
|
return text
|
|
|
|
from .detect import should_compress
|
|
from .validate import validate
|
|
|
|
MAX_RETRIES = 2
|
|
|
|
|
|
# ---------- Claude Calls ----------
|
|
|
|
|
|
def call_claude(prompt: str) -> str:
|
|
api_key = os.environ.get("ANTHROPIC_API_KEY")
|
|
if api_key:
|
|
try:
|
|
import anthropic
|
|
|
|
client = anthropic.Anthropic(api_key=api_key)
|
|
msg = client.messages.create(
|
|
model=os.environ.get("CAVEMAN_MODEL", "claude-sonnet-4-5"),
|
|
max_tokens=8192,
|
|
messages=[{"role": "user", "content": prompt}],
|
|
)
|
|
return strip_llm_wrapper(msg.content[0].text.strip())
|
|
except ImportError:
|
|
pass # anthropic not installed, fall back to CLI
|
|
# Fallback: use claude CLI (handles desktop auth)
|
|
try:
|
|
result = subprocess.run(
|
|
["claude", "--print"],
|
|
input=prompt,
|
|
text=True,
|
|
capture_output=True,
|
|
check=True,
|
|
)
|
|
return strip_llm_wrapper(result.stdout.strip())
|
|
except subprocess.CalledProcessError as e:
|
|
raise RuntimeError(f"Claude call failed:\n{e.stderr}")
|
|
|
|
|
|
def build_compress_prompt(original: str) -> str:
|
|
return f"""
|
|
Compress this markdown into caveman format.
|
|
|
|
STRICT RULES:
|
|
- Do NOT modify anything inside ``` code blocks
|
|
- Do NOT modify anything inside inline backticks
|
|
- Preserve ALL URLs exactly
|
|
- Preserve ALL headings exactly
|
|
- Preserve file paths and commands
|
|
- Return ONLY the compressed markdown body — do NOT wrap the entire output in a ```markdown fence or any other fence. Inner code blocks from the original stay as-is; do not add a new outer fence around the whole file.
|
|
|
|
Only compress natural language.
|
|
|
|
TEXT:
|
|
{original}
|
|
"""
|
|
|
|
|
|
def build_fix_prompt(original: str, compressed: str, errors: List[str]) -> str:
|
|
errors_str = "\n".join(f"- {e}" for e in errors)
|
|
return f"""You are fixing a caveman-compressed markdown file. Specific validation errors were found.
|
|
|
|
CRITICAL RULES:
|
|
- DO NOT recompress or rephrase the file
|
|
- ONLY fix the listed errors — leave everything else exactly as-is
|
|
- The ORIGINAL is provided as reference only (to restore missing content)
|
|
- Preserve caveman style in all untouched sections
|
|
|
|
ERRORS TO FIX:
|
|
{errors_str}
|
|
|
|
HOW TO FIX:
|
|
- Missing URL: find it in ORIGINAL, restore it exactly where it belongs in COMPRESSED
|
|
- Code block mismatch: find the exact code block in ORIGINAL, restore it in COMPRESSED
|
|
- Heading mismatch: restore the exact heading text from ORIGINAL into COMPRESSED
|
|
- Do not touch any section not mentioned in the errors
|
|
|
|
ORIGINAL (reference only):
|
|
{original}
|
|
|
|
COMPRESSED (fix this):
|
|
{compressed}
|
|
|
|
Return ONLY the fixed compressed file. No explanation.
|
|
"""
|
|
|
|
|
|
# ---------- Core Logic ----------
|
|
|
|
|
|
def compress_file(filepath: Path) -> bool:
|
|
# Resolve and validate path
|
|
filepath = filepath.resolve()
|
|
MAX_FILE_SIZE = 500_000 # 500KB
|
|
if not filepath.exists():
|
|
raise FileNotFoundError(f"File not found: {filepath}")
|
|
if filepath.stat().st_size > MAX_FILE_SIZE:
|
|
raise ValueError(f"File too large to compress safely (max 500KB): {filepath}")
|
|
|
|
print(f"Processing: {filepath}")
|
|
|
|
if not should_compress(filepath):
|
|
print("Skipping (not natural language)")
|
|
return False
|
|
|
|
original_text = filepath.read_text(errors="ignore")
|
|
backup_path = filepath.with_name(filepath.stem + ".original.md")
|
|
|
|
# Check if backup already exists to prevent accidental overwriting
|
|
if backup_path.exists():
|
|
print(f"⚠️ Backup file already exists: {backup_path}")
|
|
print("The original backup may contain important content.")
|
|
print("Aborting to prevent data loss. Please remove or rename the backup file if you want to proceed.")
|
|
return False
|
|
|
|
# Step 1: Compress
|
|
print("Compressing with Claude...")
|
|
compressed = call_claude(build_compress_prompt(original_text))
|
|
|
|
# Save original as backup, write compressed to original path
|
|
backup_path.write_text(original_text)
|
|
filepath.write_text(compressed)
|
|
|
|
# Step 2: Validate + Retry
|
|
for attempt in range(MAX_RETRIES):
|
|
print(f"\nValidation attempt {attempt + 1}")
|
|
|
|
result = validate(backup_path, filepath)
|
|
|
|
if result.is_valid:
|
|
print("Validation passed")
|
|
break
|
|
|
|
print("❌ Validation failed:")
|
|
for err in result.errors:
|
|
print(f" - {err}")
|
|
|
|
if attempt == MAX_RETRIES - 1:
|
|
# Restore original on failure
|
|
filepath.write_text(original_text)
|
|
backup_path.unlink(missing_ok=True)
|
|
print("❌ Failed after retries — original restored")
|
|
return False
|
|
|
|
print("Fixing with Claude...")
|
|
compressed = call_claude(
|
|
build_fix_prompt(original_text, compressed, result.errors)
|
|
)
|
|
filepath.write_text(compressed)
|
|
|
|
return True
|