#!/usr/bin/env python3
"""Scrape new followers via IG's news/inbox endpoint (the same data the IG
notifications "Follows" tab shows, with real timestamps).

Falls back to follower-list comparison if news inbox is empty.

Filters: skip if in CSV, EXCLUDED/PARTNER bucket, self.

Output: new_followers.json
"""
import csv, json, os, time
from datetime import datetime
from pathlib import Path
from urllib.parse import unquote

import rookiepy
from instagrapi import Client

from _buckets_lib import load_skip_handles

# ── Config ────────────────────────────────────────────────────────────────────
DELAY_MIN       = 1.5
DELAY_MAX       = 2.5
BASE            = Path(__file__).parent
DM_CSV          = BASE / "follow_up_FULL.csv"
OUT_FILE        = BASE / "new_followers.json"
SNAPSHOT_FILE   = BASE / "followers_snapshot.json"

# ── Login ─────────────────────────────────────────────────────────────────────
print("logging in via chrome session...", flush=True)
cl = Client()
cl.delay_range = [DELAY_MIN, DELAY_MAX]
cookies    = rookiepy.chrome(domains=["instagram.com"])
session_id = unquote({c["name"]: c["value"] for c in cookies}["sessionid"])
from _auth_lib import safe_login
safe_login(cl, BASE / "session.json", session_id)
my_id = str(cl.user_id)
my_username = cl.account_info().username
print(f"logged in as: {my_username}", flush=True)

# ── Load skip set ─────────────────────────────────────────────────────────────
existing_handles = set()
if DM_CSV.exists():
    with open(DM_CSV, encoding="utf-8") as f:
        for row in csv.DictReader(f):
            h = row.get("handle", "").lower().strip()
            if h: existing_handles.add(h)
skip = load_skip_handles() | existing_handles | {f"@{my_username}".lower()}
print(f"skip set: {len(skip)} handles", flush=True)

# ── Fetch IG news inbox ───────────────────────────────────────────────────────
print("fetching news/inbox (follow notifications)...", flush=True)
try:
    inbox = cl.news_inbox_v1(mark_as_seen=False)
except Exception as e:
    print(f"news_inbox_v1 failed: {e}", flush=True)
    inbox = {}

today = datetime.now().strftime("%Y-%m-%d %H:%M")
new_followers = []
seen_handles = set()

def walk(items):
    for it in items or []:
        # follow notifications usually have type "follow" or specific type number
        # Try a defensive approach: look for any item that has a user and indicates a follow
        try:
            t = (it.get("type") or "").lower()
            args = it.get("args") or {}
            # IG news items: 'profile_id'/'username' in args, 'text' often contains "started following you"
            text = (args.get("text") or "").lower()
            is_follow = "follow" in t or "started following" in text or "followed you" in text
            if not is_follow:
                continue
            username = args.get("profile_name") or args.get("username")
            full_name = args.get("full_name") or args.get("profile_name") or username
            ts_unix = args.get("timestamp") or it.get("timestamp")
            if not username:
                # try links
                links = args.get("links") or []
                if links and isinstance(links[0], dict):
                    username = links[0].get("username") or links[0].get("text")
            if not username:
                continue
            handle = f"@{username}".lower()
            if handle in skip or handle in seen_handles:
                continue
            seen_handles.add(handle)
            if ts_unix:
                try:
                    when = datetime.fromtimestamp(float(ts_unix)).strftime("%Y-%m-%d %H:%M")
                except Exception:
                    when = today
            else:
                when = today
            new_followers.append({
                "handle":      f"@{username}",
                "name":        full_name or username,
                "followed_at": when,
                "source":      "news_inbox",
            })
        except Exception:
            continue

# news/inbox structure: typically has 'new_stories', 'old_stories', plus 'continuation_stories'
walk(inbox.get("new_stories"))
walk(inbox.get("old_stories"))
walk(inbox.get("continuation_stories"))
walk(inbox.get("stories"))  # alt

print(f"news_inbox yielded {len(new_followers)} unique new followers", flush=True)

# ── Fallback: top 500 followers minus CSV/buckets if news inbox empty ────────
if not new_followers:
    print("falling back to follower list comparison...", flush=True)
    followers = cl.user_followers(my_id, amount=500)
    for pk, u in followers.items():
        handle = f"@{u.username}".lower()
        if handle in skip: continue
        new_followers.append({
            "handle":      f"@{u.username}",
            "name":        u.full_name or u.username,
            "followed_at": today,
            "source":      "follower_list",
        })
    print(f"fallback yielded {len(new_followers)} CSV-untouched followers", flush=True)

# ── Carry over thread_ids from prior new_followers.json ─────────────────────
_tid_cache_nf = {}
if OUT_FILE.exists():
    try:
        _prev_nf = json.loads(OUT_FILE.read_text())
        for _p in _prev_nf.get("followers", []):
            _ph = _p.get("handle", "").lower().strip()
            _ptid = _p.get("thread_id", "")
            if _ph and _ptid:
                _tid_cache_nf[_ph] = _ptid
    except Exception:
        pass
_carried_nf = 0
for _nf in new_followers:
    _h = _nf.get("handle", "").lower().strip()
    if _h in _tid_cache_nf and not _nf.get("thread_id"):
        _nf["thread_id"] = _tid_cache_nf[_h]
        _carried_nf += 1
print(f"carried {_carried_nf} thread_ids from prior new_followers.json", flush=True)

# ── Resolve thread_ids for new followers that don't have one yet ────────────
_to_resolve_nf = [n for n in new_followers if not n.get("thread_id")]
print(f"resolving thread_ids for {len(_to_resolve_nf)} new followers...", flush=True)
_resolved_nf = 0
for _nf in _to_resolve_nf:
    _h = _nf.get("handle", "").lstrip("@")
    if not _h:
        continue
    try:
        _uid = cl.user_info_by_username_v1(_h).pk
        _t = cl.direct_thread_by_participants([int(_uid)])
        _tid = ""
        if isinstance(_t, dict):
            _thread_obj = _t.get("thread") or {}
            _tid = str(_thread_obj.get("thread_id") or "")
        if _tid:
            _nf["thread_id"] = _tid
            _resolved_nf += 1
    except Exception as _e:
        print(f"  thread_id err for {_h}: {_e}", flush=True)
print(f"  resolved {_resolved_nf}/{len(_to_resolve_nf)} new thread_ids", flush=True)

# ── Save ──────────────────────────────────────────────────────────────────────
out = {
    "scraped_at":  today,
    "new_count":   len(new_followers),
    "followers":   new_followers,
}
OUT_FILE.write_text(json.dumps(out, indent=2, ensure_ascii=False))
print(f"\ndone. {len(new_followers)} new followers saved to {OUT_FILE.name}", flush=True)
