updates
This commit is contained in:
176
caveman-compress/scripts/compress.py
Normal file
176
caveman-compress/scripts/compress.py
Normal file
@@ -0,0 +1,176 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Caveman Memory Compression Orchestrator
|
||||
|
||||
Usage:
|
||||
python scripts/compress.py <filepath>
|
||||
"""
|
||||
|
||||
import os
|
||||
import re
|
||||
import subprocess
|
||||
from pathlib import Path
|
||||
from typing import List
|
||||
|
||||
OUTER_FENCE_REGEX = re.compile(
|
||||
r"\A\s*(`{3,}|~{3,})[^\n]*\n(.*)\n\1\s*\Z", re.DOTALL
|
||||
)
|
||||
|
||||
|
||||
def strip_llm_wrapper(text: str) -> str:
|
||||
"""Strip outer ```markdown ... ``` fence when it wraps the entire output."""
|
||||
m = OUTER_FENCE_REGEX.match(text)
|
||||
if m:
|
||||
return m.group(2)
|
||||
return text
|
||||
|
||||
from .detect import should_compress
|
||||
from .validate import validate
|
||||
|
||||
MAX_RETRIES = 2
|
||||
|
||||
|
||||
# ---------- Claude Calls ----------
|
||||
|
||||
|
||||
def call_claude(prompt: str) -> str:
|
||||
api_key = os.environ.get("ANTHROPIC_API_KEY")
|
||||
if api_key:
|
||||
try:
|
||||
import anthropic
|
||||
|
||||
client = anthropic.Anthropic(api_key=api_key)
|
||||
msg = client.messages.create(
|
||||
model=os.environ.get("CAVEMAN_MODEL", "claude-sonnet-4-5"),
|
||||
max_tokens=8192,
|
||||
messages=[{"role": "user", "content": prompt}],
|
||||
)
|
||||
return strip_llm_wrapper(msg.content[0].text.strip())
|
||||
except ImportError:
|
||||
pass # anthropic not installed, fall back to CLI
|
||||
# Fallback: use claude CLI (handles desktop auth)
|
||||
try:
|
||||
result = subprocess.run(
|
||||
["claude", "--print"],
|
||||
input=prompt,
|
||||
text=True,
|
||||
capture_output=True,
|
||||
check=True,
|
||||
)
|
||||
return strip_llm_wrapper(result.stdout.strip())
|
||||
except subprocess.CalledProcessError as e:
|
||||
raise RuntimeError(f"Claude call failed:\n{e.stderr}")
|
||||
|
||||
|
||||
def build_compress_prompt(original: str) -> str:
|
||||
return f"""
|
||||
Compress this markdown into caveman format.
|
||||
|
||||
STRICT RULES:
|
||||
- Do NOT modify anything inside ``` code blocks
|
||||
- Do NOT modify anything inside inline backticks
|
||||
- Preserve ALL URLs exactly
|
||||
- Preserve ALL headings exactly
|
||||
- Preserve file paths and commands
|
||||
- Return ONLY the compressed markdown body — do NOT wrap the entire output in a ```markdown fence or any other fence. Inner code blocks from the original stay as-is; do not add a new outer fence around the whole file.
|
||||
|
||||
Only compress natural language.
|
||||
|
||||
TEXT:
|
||||
{original}
|
||||
"""
|
||||
|
||||
|
||||
def build_fix_prompt(original: str, compressed: str, errors: List[str]) -> str:
|
||||
errors_str = "\n".join(f"- {e}" for e in errors)
|
||||
return f"""You are fixing a caveman-compressed markdown file. Specific validation errors were found.
|
||||
|
||||
CRITICAL RULES:
|
||||
- DO NOT recompress or rephrase the file
|
||||
- ONLY fix the listed errors — leave everything else exactly as-is
|
||||
- The ORIGINAL is provided as reference only (to restore missing content)
|
||||
- Preserve caveman style in all untouched sections
|
||||
|
||||
ERRORS TO FIX:
|
||||
{errors_str}
|
||||
|
||||
HOW TO FIX:
|
||||
- Missing URL: find it in ORIGINAL, restore it exactly where it belongs in COMPRESSED
|
||||
- Code block mismatch: find the exact code block in ORIGINAL, restore it in COMPRESSED
|
||||
- Heading mismatch: restore the exact heading text from ORIGINAL into COMPRESSED
|
||||
- Do not touch any section not mentioned in the errors
|
||||
|
||||
ORIGINAL (reference only):
|
||||
{original}
|
||||
|
||||
COMPRESSED (fix this):
|
||||
{compressed}
|
||||
|
||||
Return ONLY the fixed compressed file. No explanation.
|
||||
"""
|
||||
|
||||
|
||||
# ---------- Core Logic ----------
|
||||
|
||||
|
||||
def compress_file(filepath: Path) -> bool:
|
||||
# Resolve and validate path
|
||||
filepath = filepath.resolve()
|
||||
MAX_FILE_SIZE = 500_000 # 500KB
|
||||
if not filepath.exists():
|
||||
raise FileNotFoundError(f"File not found: {filepath}")
|
||||
if filepath.stat().st_size > MAX_FILE_SIZE:
|
||||
raise ValueError(f"File too large to compress safely (max 500KB): {filepath}")
|
||||
|
||||
print(f"Processing: {filepath}")
|
||||
|
||||
if not should_compress(filepath):
|
||||
print("Skipping (not natural language)")
|
||||
return False
|
||||
|
||||
original_text = filepath.read_text(errors="ignore")
|
||||
backup_path = filepath.with_name(filepath.stem + ".original.md")
|
||||
|
||||
# Check if backup already exists to prevent accidental overwriting
|
||||
if backup_path.exists():
|
||||
print(f"⚠️ Backup file already exists: {backup_path}")
|
||||
print("The original backup may contain important content.")
|
||||
print("Aborting to prevent data loss. Please remove or rename the backup file if you want to proceed.")
|
||||
return False
|
||||
|
||||
# Step 1: Compress
|
||||
print("Compressing with Claude...")
|
||||
compressed = call_claude(build_compress_prompt(original_text))
|
||||
|
||||
# Save original as backup, write compressed to original path
|
||||
backup_path.write_text(original_text)
|
||||
filepath.write_text(compressed)
|
||||
|
||||
# Step 2: Validate + Retry
|
||||
for attempt in range(MAX_RETRIES):
|
||||
print(f"\nValidation attempt {attempt + 1}")
|
||||
|
||||
result = validate(backup_path, filepath)
|
||||
|
||||
if result.is_valid:
|
||||
print("Validation passed")
|
||||
break
|
||||
|
||||
print("❌ Validation failed:")
|
||||
for err in result.errors:
|
||||
print(f" - {err}")
|
||||
|
||||
if attempt == MAX_RETRIES - 1:
|
||||
# Restore original on failure
|
||||
filepath.write_text(original_text)
|
||||
backup_path.unlink(missing_ok=True)
|
||||
print("❌ Failed after retries — original restored")
|
||||
return False
|
||||
|
||||
print("Fixing with Claude...")
|
||||
compressed = call_claude(
|
||||
build_fix_prompt(original_text, compressed, result.errors)
|
||||
)
|
||||
filepath.write_text(compressed)
|
||||
|
||||
return True
|
||||
Reference in New Issue
Block a user