#!/usr/bin/env python3
"""
Keith DM Auto-Update Pipeline
Runs every 3 days via LaunchAgent.
1. Fetches recent IG threads (top 300, incremental)
2. Updates follow_up_FULL.csv with new/changed threads
3. Generates Claude suggestions for new entries
4. Rebuilds network_audit.html
"""

import csv, json, os, shutil, subprocess, sys, time
from datetime import datetime, timezone
from pathlib import Path

def _strip_dashes(s):
    import re as _re
    if not s: return s
    s = s.replace(" — ", ", ").replace(" —", ",").replace("— ", ", ").replace("—", ",")
    return _re.sub(r" {2,}", " ", s)


from urllib.parse import unquote

import rookiepy
from instagrapi import Client
from instagrapi.exceptions import ClientError, DirectThreadNotFound

BASE        = Path(__file__).parent
DM_CSV      = BASE / "follow_up_FULL.csv"
SESSION_FILE= BASE / "session.json"
LOG_FILE    = BASE / "auto_update.log"
PAGES_DIR   = Path("/tmp/keith-pages")
GH_BIN      = "/opt/homebrew/bin/gh"

FETCH_RECENT = 300   # only look at top N threads (sorted by recency)
MSGS_PER_THREAD = 20
DELAY_MIN = 1.5
DELAY_MAX = 2.5
ENV_NO_KEY = {k: v for k, v in os.environ.items() if k != "ANTHROPIC_API_KEY"}

def log(msg):
    ts = datetime.now().strftime("%Y-%m-%d %H:%M")
    line = f"[{ts}] {msg}"
    print(line, flush=True)
    with open(LOG_FILE, "a") as f:
        f.write(line + "\n")

def ensure_pages_repo():
    if (PAGES_DIR / ".git").exists():
        return
    log("keith-pages missing (reboot?), cloning...")
    token_r = subprocess.run([GH_BIN, "auth", "token"], capture_output=True, text=True)
    token = token_r.stdout.strip()
    subprocess.run(["git", "clone", f"https://{token}@github.com/xen9r/keith-dashboard.git", str(PAGES_DIR)], capture_output=True)
    subprocess.run(["git", "-C", str(PAGES_DIR), "config", "user.email", "daichi@macstudio.local"], capture_output=True)
    subprocess.run(["git", "-C", str(PAGES_DIR), "config", "user.name", "Daichi"], capture_output=True)
    # refresh token in remote url (token may have rotated)
    subprocess.run(["git", "-C", str(PAGES_DIR), "remote", "set-url", "origin",
                    f"https://{token}@github.com/xen9r/keith-dashboard.git"], capture_output=True)
    log("keith-pages restored ✓")

ensure_pages_repo()

SYSTEM_PROMPT = """you are helping keith motes (@shakingmedicine), an australian shaking medicine / TRE coach.

brand: Shaking Medicine. always anchor on "Shaking Medicine" by name when the conversation calls for naming what keith does. don't use vague labels like "nervous system stuff", "somatic work", "stress release", "tremoring stuff", or "trauma work". name it: Shaking Medicine.

keith's voice (study these real examples):

BUMP_UP (re-engaging cold threads):
- "Hey Carly! Just circling back. Hope you're well. What's life looking like for you right now?"
- "Hey Vindan, thought I'd bump this up. I know how easy it is for messages to get buried. 🫶🙏"
- "Hey Miriam, just checking in. How's the tremoring been going for you lately?"
- "Hey Emily, bumping this up in case it got buried. What brought you to Shaking Medicine?"
- "Hey Begum, just bumping this up. I know how easy it is for messages to get buried. 🙂 How's your world?"
- "Hey Jacob, just wanted to circle back. Easy for messages to get lost in here. What first drew you to Shaking Medicine?"
- "Hey Sadie! Just checking in 🙂 Hope you're well. Anything been resonating with you from what we share?"

NEEDS_REPLY (responding to something they said):
- "Hey Jacqueline, still curious. Has anything from our content stuck with you?"
- "Hey Youssef, still curious how the fascia release practice has been going for you. Did you ever give that a go?"
- "What does your work look like these days? Are you teaching or more in private sessions?"
- "Hey Andi! Just wanted to check in 🙂 Still curious what brought you to our page, even a word or two works!"
- "Hey Tyrone! Bali has been incredible. Deepening your yoga practice and living in the moment sounds like exactly where you need to be right now 🙏 What does your practice look like these days?"

style rules:
- "Hey [Name]" or "Hey [Name]!" as opener. Capitalize the recipient's name.
- Capitalize the start of every sentence and proper nouns.
- 1 to 3 sentences max.
- Ends with a low-pressure curious question most of the time.
- Use warm emojis where they fit: 🙂 🙏 🫶 🌿 🎵 ✨ 🤗. About half the messages have one, some have two.
- For BUMP_UP, lean on phrases like "just circling back", "just bumping this up", "just checking in", "I know how easy it is for messages to get buried".
- When introducing what keith offers or asking what brought them in, name "Shaking Medicine".
- NO em-dashes (—). Use a comma, a period, or a new sentence instead.

write keith's next message based on the conversation:
- output ONLY the message text, no preamble or labels
- BUMP_UP: re-engage gently in the style of the examples above
- NEEDS_REPLY: respond directly to what the other person said in their last message"""

def get_suggestion(display_name, summary, follow_up_type):
    prompt = f"""{SYSTEM_PROMPT}

conversation with {display_name}:
{summary.replace(' | ', chr(10))[-1200:]}

type: {follow_up_type} — write keith's reply:"""
    try:
        r = subprocess.run(
            ["/Users/daichi/.local/bin/claude", "-p", prompt],
            capture_output=True, text=True, timeout=45, env=ENV_NO_KEY
        )
        text = r.stdout.strip()
        return text if text and r.returncode == 0 else f"[failed: {r.stderr[:80]}]"
    except Exception as e:
        return f"[failed: {e}]"

# ── Login ─────────────────────────────────────────────────────────────────────
log("logging in via chrome session...")
cl = Client()
cl.delay_range = [DELAY_MIN, DELAY_MAX]
cookies    = rookiepy.chrome(domains=["instagram.com"])
session_id = unquote({c["name"]: c["value"] for c in cookies}["sessionid"])
cl.login_by_sessionid(session_id)
my_id = str(cl.user_id)
log(f"logged in as: {cl.account_info().username}")

# ── Load existing CSV ─────────────────────────────────────────────────────────
existing = {}
fieldnames = [
    "handle","display_name","follow_up_type","last_msg_date","last_sender",
    "conversation_summary","keith_last_msg","other_last_msg","suggested_reply","thread_id","lead_score","intent","focus_hint"
]
if DM_CSV.exists():
    with open(DM_CSV, encoding="utf-8") as f:
        for row in csv.DictReader(f):
            existing[row["thread_id"]] = row
log(f"loaded {len(existing)} existing threads")

# ── Fetch recent threads ──────────────────────────────────────────────────────
log(f"fetching top {FETCH_RECENT} recent threads...")
threads = cl.direct_threads(amount=FETCH_RECENT)
log(f"found {len(threads)} threads")

new_count     = 0
updated_count = 0
errors        = 0

for i, thread in enumerate(threads):
    thread_id = str(thread.id)
    try:
        users = [u for u in thread.users if str(u.pk) != my_id]
        if not users:
            continue

        other        = users[0]
        handle       = f"@{other.username}"
        display_name = other.full_name or other.username

        # check if last message changed
        if thread.messages:
            raw_last = thread.messages[0]
            last_ts  = raw_last.timestamp.strftime("%Y-%m-%d %H:%M") if hasattr(raw_last.timestamp, "strftime") else ""
        else:
            continue

        if thread_id in existing and existing[thread_id]["last_msg_date"] == last_ts:
            continue  # unchanged

        # fetch full thread
        full_thread = cl.direct_thread(thread_id, amount=MSGS_PER_THREAD)
        messages    = list(reversed(full_thread.messages))
        if not messages:
            continue

        summary_lines  = []
        keith_last_msg = ""
        other_last_msg = ""

        for m in messages:
            sender = "keith" if str(m.user_id) == my_id else display_name
            txt    = getattr(m, "text", None) or f"[{m.item_type}]"
            ts     = m.timestamp.strftime("%d.%m.%Y") if hasattr(m.timestamp, "strftime") else ""
            summary_lines.append(f"[{ts}] {sender}: {txt[:200]}")

        last_msg        = messages[-1]
        keith_sent_last = str(last_msg.user_id) == my_id
        last_sender     = "KEITH" if keith_sent_last else "OTHER"
        last_msg_date   = last_msg.timestamp.strftime("%Y-%m-%d %H:%M") if hasattr(last_msg.timestamp, "strftime") else ""
        follow_up_type  = "BUMP_UP" if keith_sent_last else "NEEDS_REPLY"

        for m in reversed(messages):
            if str(m.user_id) == my_id and not keith_last_msg:
                keith_last_msg = (getattr(m, "text", None) or f"[{m.item_type}]")[:300]
            elif str(m.user_id) != my_id and not other_last_msg:
                other_last_msg = (getattr(m, "text", None) or f"[{m.item_type}]")[:300]
            if keith_last_msg and other_last_msg:
                break

        convo_summary = " | ".join(summary_lines[-12:])

        # generate suggestion
        log(f"  [{i+1}] {handle} [{follow_up_type}] — generating suggestion...")
        suggestion = get_suggestion(display_name, convo_summary, follow_up_type)

        row = {
            "handle": handle, "display_name": display_name,
            "follow_up_type": follow_up_type, "last_msg_date": last_msg_date,
            "last_sender": last_sender, "conversation_summary": convo_summary,
            "keith_last_msg": keith_last_msg, "other_last_msg": other_last_msg,
            "suggested_reply": _strip_dashes(suggestion), "thread_id": thread_id,
            "lead_score": existing.get(thread_id, {}).get("lead_score", ""),
            "intent": existing.get(thread_id, {}).get("intent", ""),
            "focus_hint": existing.get(thread_id, {}).get("focus_hint", ""),
        }

        if thread_id in existing:
            updated_count += 1
        else:
            new_count += 1

        existing[thread_id] = row

    except DirectThreadNotFound:
        errors += 1
    except ClientError as e:
        log(f"  rate limit, sleeping 30s... ({e})")
        time.sleep(30)
    except Exception as e:
        log(f"  error on {i} ({thread_id}): {e}")
        errors += 1

# ── Write CSV ─────────────────────────────────────────────────────────────────
all_rows = list(existing.values())
# sort newest first
def parse_date(d):
    try: return datetime.strptime(d, "%Y-%m-%d %H:%M")
    except: return datetime.min

all_rows.sort(key=lambda r: parse_date(r["last_msg_date"]), reverse=True)

# backup current CSV before overwrite
if DM_CSV.exists():
    bak = DM_CSV.with_name(f"follow_up_FULL.csv.bak-{datetime.now().strftime('%Y%m%d-%H%M%S')}")
    shutil.copy2(DM_CSV, bak)
    log(f"backup written: {bak.name}")
with open(DM_CSV, "w", newline="", encoding="utf-8") as f:
    writer = csv.DictWriter(f, fieldnames=fieldnames)
    writer.writeheader()
    writer.writerows(all_rows)

log(f"csv updated: {new_count} new, {updated_count} updated, {errors} errors")

# ── Rebuild HTML ──────────────────────────────────────────────────────────────
# re-download sheet first
log("downloading google sheet...")
os.system('curl -sL "https://docs.google.com/spreadsheets/d/1zmzSweC0NY6gOqUW53HJIQePxyW6-nMhVzfXzcI_lJs/export?format=csv&gid=0" -o /tmp/keith_network.csv')

log("transcribing voice messages...")
vr = subprocess.run(["python3", str(BASE / "transcribe_voices.py")], capture_output=True, text=True)
if vr.returncode == 0:
    log("voice transcription done")
else:
    log(f"voice transcription err: {vr.stderr[:200]}")

log("generating focus hints for NEEDS_REPLY rows...")
fr = subprocess.run(["python3", "-u", str(BASE / "add_focus_hint.py")], capture_output=True, text=True)
if fr.returncode == 0:
    log("focus hints done")
else:
    log(f"focus_hint err: {fr.stderr[:200]}")

log("rebuilding html...")
r = subprocess.run(["python3", str(BASE / "build_audit.py")], capture_output=True, text=True)
if r.returncode == 0:
    log("html rebuilt ✓")
else:
    log(f"html error: {r.stderr[:200]}")

log("publishing dashboard to GitHub Pages...")
import shutil as _shutil
_pages = Path("/tmp/keith-pages")
_shutil.copy2(BASE / "network_audit.html", _pages / "index.html")
for _cmd in [
    ["git", "-C", str(_pages), "add", "index.html"],
    ["git", "-C", str(_pages), "commit", "--allow-empty", "-m", f"update {datetime.now().strftime('%Y-%m-%d')}"],
    ["git", "-C", str(_pages), "push"],
]:
    _r = subprocess.run(_cmd, capture_output=True, text=True)
    if _r.returncode != 0 and "nothing to commit" not in _r.stdout + _r.stderr:
        log(f"pages publish err: {_r.stderr[:200]}")
        break
else:
    log("dashboard published to GitHub Pages ✓")

log(f"=== update complete. new:{new_count} updated:{updated_count} errors:{errors} ===")
