#!/usr/bin/env python3
"""
Adds lead_score + intent to follow_up_FULL.csv.

lead_score: calculated from conversation metadata (no API needed)
intent:     claude -p reads conversation, outputs label + signal
"""

import csv, subprocess, os, re, random
import shutil
from pathlib import Path
from datetime import datetime, timedelta
from concurrent.futures import ThreadPoolExecutor, as_completed

DM_CSV  = Path(__file__).parent / "follow_up_FULL.csv"
ENV     = {k: v for k, v in os.environ.items() if k != "ANTHROPIC_API_KEY"}
CUTOFF  = datetime.now() - timedelta(days=60)

def parse_date(d):
    try: return datetime.strptime(d, "%Y-%m-%d %H:%M")
    except: return datetime.min

# ── Lead score from metadata (no API) ────────────────────────────────────────
def calc_lead_score(row):
    score = 0
    summary = row["conversation_summary"]
    other   = row["other_last_msg"].lower()
    dt      = parse_date(row["last_msg_date"])
    days_ago = (datetime.now() - dt).days if dt != datetime.min else 999

    # message depth
    msg_count = summary.count(" | ") + 1
    score += min(msg_count * 3, 30)

    # they replied (NEEDS_REPLY = more engaged)
    if row["follow_up_type"] == "NEEDS_REPLY":
        score += 20

    # their message length signals interest
    score += min(len(other) // 20, 15)

    # question asked = high curiosity
    if "?" in other or any(w in other for w in ["how", "when", "what", "can i", "is it", "do you", "how much", "price", "cost", "available"]):
        score += 15

    # buying/intent signals
    if any(w in other for w in ["interested", "sign up", "join", "book", "session", "coaching", "course", "price", "cost", "how much", "available", "start"]):
        score += 20

    # recency
    if days_ago <= 1:   score += 15
    elif days_ago <= 7: score += 10
    elif days_ago <= 30: score += 5

    return min(score, 100)

# ── Intent detection via claude ───────────────────────────────────────────────
INTENT_LABELS = ["HOT", "WARM", "COLD", "SUPPORT", "COLLAB", "SPAM"]
CLAUDE_BIN = "/Users/daichi/.local/bin/claude"

def get_intent(row):
    if row.get("intent") and row["intent"] not in ("", "[failed]"):
        return row["intent"]

    summary = row["conversation_summary"].replace(" | ", "\n")[-800:]
    prompt = f"""read this instagram DM conversation and classify the intent of the other person.

conversation:
{summary}

output exactly this format (one line):
LABEL | one short signal phrase

where LABEL is one of: HOT (buying signals, asking price/availability), WARM (genuinely curious, engaged), COLD (passive, no real interest), SUPPORT (needs help/info), COLLAB (partnership/collab interest), SPAM (bot/promo)

example outputs:
HOT | asked about pricing and availability
WARM | asking curious questions about the method
COLD | short replies, no follow-through
SUPPORT | question about how shaking medicine works
COLLAB | mentioned their own practice/audience"""

    try:
        r = subprocess.run([CLAUDE_BIN, "-p", prompt], capture_output=True, text=True, timeout=45, env=ENV)
        text = r.stdout.strip()
        if r.returncode != 0:
            print(f"  claude rc={r.returncode}: {r.stderr[:120]}", flush=True)
        if "|" in text and r.returncode == 0:
            return text.split("\n")[0].strip()
        return "WARM | general interest"
    except Exception as e:
        print(f"  intent err: {e}", flush=True)
        return "WARM | general interest"

# ── Run ───────────────────────────────────────────────────────────────────────
rows = []
with open(DM_CSV, encoding="utf-8") as f:
    rows = list(csv.DictReader(f))

# add columns if missing
for row in rows:
    if "lead_score" not in row: row["lead_score"] = ""
    if "intent" not in row:     row["intent"] = ""

fieldnames = list(rows[0].keys())
if "lead_score" not in fieldnames: fieldnames.append("lead_score")
if "intent" not in fieldnames:     fieldnames.append("intent")

# score all rows (instant, no API)
print("calculating lead scores...")
for row in rows:
    row["lead_score"] = str(calc_lead_score(row))
print(f"scored {len(rows)} rows\n")

# intent: only recent threads worth analyzing
to_classify = [
    (i, row) for i, row in enumerate(rows)
    if (not row.get("intent") or row["intent"] in ("", "[failed]"))
    and parse_date(row["last_msg_date"]) >= CUTOFF
]
print(f"detecting intent for {len(to_classify)} recent threads...\n")

done = 0
with ThreadPoolExecutor(max_workers=2) as ex:
    futures = {ex.submit(get_intent, row): (i, row) for i, row in to_classify}
    for future in as_completed(futures):
        i, row = futures[future]
        result = future.result()
        rows[i]["intent"] = result
        done += 1
        label = result.split("|")[0].strip()
        print(f"  [{done}/{len(to_classify)}] {row['handle']} → {label}")

from datetime import datetime as _dt
if DM_CSV.exists():
    bak = DM_CSV.with_name(f"follow_up_FULL.csv.bak-{_dt.now().strftime('%Y%m%d-%H%M%S')}-prescore")
    shutil.copy2(DM_CSV, bak)
    print(f"backup written: {bak.name}")
with open(DM_CSV, "w", newline="", encoding="utf-8") as f:
    writer = csv.DictWriter(f, fieldnames=fieldnames)
    writer.writeheader()
    writer.writerows(rows)

print(f"\ndone. {len(rows)} rows scored, {done} intents classified.")
