#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
dexgrok_cloud.py
Requests/cloudscraper-based DexCheck collector that avoids Playwright unless explicitly asked.
- Reuses cf_clearance and other cookies from a Firefox (or Chromium) user-data-dir.
- Tries to obtain `cloudflare-xcfs` by parsing the route HTML <meta name="cloudflare-xcfs" ...>.
- Optional one-shot Playwright bootstrap (--bootstrap-xcfs) to capture fresh xcfs and cache it.
- Proper Referer per route; supports ETH hype and Top Traders (eth/sol/bsc/base).
- Optional Privy Authorization for endpoints that need it (not used by default here).

Usage example:
python3 dexgrok_cloud.py \
  --out /var/www/html/reports/dexgrok_cloud.json \
  --user-data-dir /root/teste/wallet/pw-profile \
  --debug
"""

import argparse
import json
import os
import re
import sqlite3
import sys
import time
from http.cookiejar import Cookie, CookieJar
from pathlib import Path
from typing import Dict, Optional, Tuple, Any

try:
    import cloudscraper  # type: ignore
except Exception as e:
    print("[FATAL] cloudscraper is required: pip install cloudscraper", flush=True)
    raise

DEXCHECK_BASE = "https://dexcheck.ai"
DEXCHECK_APP  = f"{DEXCHECK_BASE}/app"

APP_ROUTES = {
    "hype_eth":     f"{DEXCHECK_APP}/eth/hype-tracker",
    "traders_eth":  f"{DEXCHECK_APP}/eth/top-crypto-traders",
    "traders_sol":  f"{DEXCHECK_APP}/solana/top-crypto-traders",
    "traders_bsc":  f"{DEXCHECK_APP}/bsc/top-crypto-traders",
    "traders_base": f"{DEXCHECK_APP}/base/top-crypto-traders",
}

# API endpoints
EP_HYPE       = "/eth-api/hype_tracker"
EP_TOP_TRADER = {
    "eth":  "/eth-api/top_traders_v2",
    "sol":  "/sol-api/top_traders_v2",
    "bsc":  "/bsc-api/top_traders_v2",
    "base": "/base-api/top_traders_v2",
}

def info(msg: str): print(f"[INFO] {msg}", flush=True)
def warn(msg: str): print(f"[WARN] {msg}", flush=True)
def dbg(msg: str, on: bool): 
    if on: print(f"[DEBUG] {msg}", flush=True)

# ---------------------------- Cookie helpers ----------------------------

def _read_firefox_cookies(profile_dir: str, debug: bool=False) -> Dict[str, str]:
    """
    Read cookies from Firefox/Playwright profile sqlite cookie DBs.
    Returns a simple name->value map for dexcheck.ai cookies.
    """
    jar: Dict[str, str] = {}
    root = Path(profile_dir)
    if not root.exists():
        warn(f"user-data-dir not found: {profile_dir}")
        return jar

    # Common cookie DB names
    candidates = list(root.glob("cookies.sqlite")) + list(root.glob("**/cookies.sqlite")) + list(root.glob("**/Cookies"))
    if debug:
        dbg("Found cookie DB(s): " + ", ".join(str(p) for p in candidates), debug)

    for db_path in candidates:
        try:
            con = sqlite3.connect(str(db_path))
            cur = con.cursor()
            # Try Firefox schema
            try:
                cur.execute("SELECT host, name, value FROM moz_cookies")
                rows = cur.fetchall()
            except Exception:
                # Try Chromium schema
                try:
                    cur.execute("SELECT host_key, name, value FROM cookies")
                    rows = cur.fetchall()
                except Exception:
                    rows = []
            for host, name, value in rows:
                if "dexcheck.ai" in host and name and value:
                    jar[name] = value
            con.close()
        except Exception as e:
            if debug:
                warn(f"Cookie DB read failed {db_path}: {e}")
            continue

    if debug and jar:
        dbg("Seeded cookies: " + ", ".join(sorted(jar.keys())), debug)
    return jar

def _cookiejar_from_map(kv: Dict[str, str]) -> CookieJar:
    jar = CookieJar()
    for k, v in kv.items():
        c = Cookie(
            version=0, name=k, value=v,
            port=None, port_specified=False,
            domain="dexcheck.ai", domain_specified=True, domain_initial_dot=False,
            path="/", path_specified=True,
            secure=True, expires=None, discard=True,
            comment=None, comment_url=None, rest={},
            rfc2109=False
        )
        jar.set_cookie(c)
    return jar

# ---------------------------- XCFS helpers ----------------------------

META_XCFS_RE = re.compile(
    r'<meta[^>]+name=["\']cloudflare-xcfs["\'][^>]*content=["\']([^"\']+)["\']',
    re.I
)

def html_discover_xcfs(scraper, route_url: str, debug: bool=False) -> Optional[str]:
    """
    Fetch a route HTML and try to parse <meta name="cloudflare-xcfs" content="...">
    """
    try:
        r = scraper.get(route_url, headers={
            "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
            "Referer": DEXCHECK_BASE,
        }, timeout=30)
        if debug:
            dbg(f"route GET: {route_url}", debug)
        if r.ok and r.text:
            m = META_XCFS_RE.search(r.text)
            if m:
                return m.group(1)
    except Exception as e:
        if debug:
            warn(f"route GET failed: {e}")
    return None

def bootstrap_xcfs_with_playwright(profile_dir: str, debug: bool=False) -> Optional[str]:
    """
    OPTIONAL one-shot bootstrap using Playwright Chromium to sniff cloudflare-xcfs.
    ONLY used when --bootstrap-xcfs flag is passed.
    Avoids the previous 'ios' platform error by preferring chromium and not changing UA to iOS.
    """
    try:
        from playwright.sync_api import sync_playwright
    except Exception as e:
        warn("Playwright not installed. Try: pip install playwright && playwright install chromium")
        return None

    try:
        with sync_playwright() as pw:
            ctx = pw.chromium.launch_persistent_context(
                user_data_dir=profile_dir,
                headless=True,
                timeout=45000,
                args=[
                    "--disable-blink-features=AutomationControlled",
                    "--no-sandbox", "--disable-gpu", "--disable-dev-shm-usage"
                ]
            )
            page = ctx.new_page()
            holder = {"v": None}

            def _on_req(req):
                try:
                    if req.url.startswith(DEXCHECK_BASE):
                        v = req.headers.get("cloudflare-xcfs")
                        if v and not holder["v"]:
                            holder["v"] = v
                except:
                    pass

            page.on("request", _on_req)
            page.goto(APP_ROUTES["traders_eth"], wait_until="domcontentloaded", timeout=45000)
            page.wait_for_timeout(2500)
            page.remove_listener("request", _on_req)

            if not holder["v"]:
                # Try DOM meta
                try:
                    v = page.evaluate("""() => {
                        const m = document.querySelector('meta[name="cloudflare-xcfs"]');
                        return m && m.content || null;
                    }""")
                    if v: holder["v"] = v
                except Exception:
                    pass

            ctx.close()
            if holder["v"] and debug:
                dbg("Playwright bootstrap captured cloudflare-xcfs", debug)
            return holder["v"]
    except Exception as e:
        warn(f"Playwright bootstrap failed: {e}")
        return None

# ---------------------------- Request core ----------------------------

def make_scraper(cookie_map: Dict[str, str]) -> Any:
    s = cloudscraper.create_scraper(browser={
        "custom": "firefox",
        "platform": "linux",
        "mobile": False
    })  # cloudscraper.Session subclass
    s.cookies = _cookiejar_from_map(cookie_map)
    return s

def fetch_api(scraper, url: str, referer: str, xcfs: Optional[str], privy_auth: Optional[str], debug: bool=False) -> Dict[str, Any]:
    headers = {
        "Accept": "application/json, text/plain, */*",
        "Referer": referer,
        "Origin": DEXCHECK_BASE,
    }
    if xcfs:
        headers["cloudflare-xcfs"] = xcfs
    if privy_auth:
        headers["authorization"] = privy_auth

    r = scraper.get(url, headers=headers, timeout=35)
    if debug:
        print(f"[DEBUG] GET {url} status={r.status_code} ok={r.ok}")
        if not r.ok:
            t = (r.text or "")[:300]
            print(f"[DEBUG] body_snippet={t!r}")
    try:
        return {"ok": r.ok, "status": r.status_code, "data": r.json() if r.headers.get("Content-Type","").startswith("application/json") else None}
    except Exception:
        return {"ok": r.ok, "status": r.status_code, "data": None}

# ---------------------------- Workflows ----------------------------

def get_hype_eth(scraper, xcfs: Optional[str], debug: bool=False) -> Dict[str, Any]:
    results = []
    for pg in (1,2,3):
        q = f"chain=eth&page={pg}&size=20&t={int(time.time()*1000)}"
        url = f"{DEXCHECK_BASE}{EP_HYPE}?{q}"
        res = fetch_api(scraper, url, APP_ROUTES["hype_eth"], xcfs, None, debug=debug)
        if res["ok"] and isinstance(res["data"], dict) and res["data"].get("success") is True:
            results.extend(res["data"].get("data") or [])
        else:
            if pg >= 3 and res["status"] in (400,404):
                warn(f"hype_tracker p{pg} failed: status={res['status']} (likely end)")
                break
            else:
                warn(f"hype_tracker p{pg} failed: status={res['status']}")
                break
        time.sleep(0.2)
    return {"success": len(results) > 0, "data": results}

def get_top_traders_chain(scraper, chain: str, xcfs: Optional[str], debug: bool=False) -> Dict[str, Any]:
    """chain in eth, sol, bsc, base; Note: sol must pass chain=solana"""
    referer = APP_ROUTES[{
        "eth": "traders_eth",
        "sol": "traders_sol",
        "bsc": "traders_bsc",
        "base": "traders_base"
    }[chain]]
    ch_param = "solana" if chain == "sol" else chain

    def _build(excl: bool) -> str:
        return (f"{DEXCHECK_BASE}{EP_TOP_TRADER[chain]}?chain={ch_param}&timeframe=30&last_active=30"
                f"&page=1&size=20&exclude_bots={'true' if excl else 'false'}"
                f"&exclude_sniper={'true' if excl else 'false'}&exclude_honeypots={'true' if excl else 'false'}"
                f"&t={int(time.time()*1000)}")

    # Try excludes=true then false
    for flag in (True, False):
        url = _build(flag)
        res = fetch_api(scraper, url, referer, xcfs, None, debug=debug)
        if res["ok"] and isinstance(res["data"], dict) and res["data"].get("success") is True:
            return {"success": True, "data": res["data"].get("data")}
    warn(f"top_traders {chain} failed after retries")
    return {"success": False, "data": None}

# ---------------------------- Main ----------------------------

def main():
    ap = argparse.ArgumentParser()
    ap.add_argument("--user-data-dir", required=True, help="Firefox/Chromium profile dir to read cookies from")
    ap.add_argument("--out", required=True, help="Output JSON path")
    ap.add_argument("--xcfs", help="cloudflare-xcfs token (string)")
    ap.add_argument("--xcfs-file", help="File to read/write the cloudflare-xcfs token")
    ap.add_argument("--bootstrap-xcfs", action="store_true", help="One-shot Playwright bootstrap if html meta parse fails")
    ap.add_argument("--privy-auth", help="Privy Authorization JWT (optional)")
    ap.add_argument("--debug", action="store_true")
    args = ap.parse_args()

    # 1) Load cookies from profile
    cookie_map = _read_firefox_cookies(args.user_data_dir, debug=args.debug)
    if not cookie_map.get("cf_clearance"):
        warn("cf_clearance cookie not found in profile; requests may fail")

    # 2) Prepare cloudscraper session
    s = make_scraper(cookie_map)
    info("Cloudflare clearance present (cookie cf_clearance)" if cookie_map.get("cf_clearance") else "Proceeding without cf_clearance")

    # 3) Resolve xcfs
    xcfs = args.xcfs or None

    # 3a) load from file if present and not provided via flag
    if not xcfs and args.xcfs_file and Path(args.xcfs_file).exists():
        try:
            xcfs = Path(args.xcfs_file).read_text(encoding="utf-8").strip()
            if args.debug and xcfs:
                dbg("Loaded xcfs from file", args.debug)
        except Exception as e:
            warn(f"Failed reading xcfs file: {e}")

    # 3b) try HTML meta parse from a route
    if not xcfs:
        xcfs = html_discover_xcfs(s, APP_ROUTES["traders_eth"], debug=args.debug)
        if args.debug and xcfs:
            dbg("Discovered cloudflare-xcfs via HTML meta", args.debug)

    # 3c) Playwright bootstrap only if explicitly asked and still missing
    if not xcfs and args.bootstrap_xcfs:
        xcfs = bootstrap_xcfs_with_playwright(args.user_data_dir, debug=args.debug)

    # 3d) persist xcfs if file path provided
    if xcfs and args.xcfs_file:
        try:
            Path(args.xcfs_file).write_text(xcfs, encoding="utf-8")
        except Exception as e:
            warn(f"Could not write xcfs file: {e}")

    if not xcfs:
        warn("Proceeding without cloudflare-xcfs (API may return 400). Provide --xcfs or --xcfs-file or --bootstrap-xcfs.")

    # 4) Fetch datasets
    hype = get_hype_eth(s, xcfs, debug=args.debug)
    tt_eth  = get_top_traders_chain(s, "eth",  xcfs, debug=args.debug)
    tt_sol  = get_top_traders_chain(s, "sol",  xcfs, debug=args.debug)
    tt_bsc  = get_top_traders_chain(s, "bsc",  xcfs, debug=args.debug)
    tt_base = get_top_traders_chain(s, "base", xcfs, debug=args.debug)

    out = {
        "meta": {"generated_at": int(time.time())},
        "hype_tracker": hype,
        "top_traders": {
            "eth": tt_eth, "sol": tt_sol, "bsc": tt_bsc, "base": tt_base
        }
    }
    out_path = Path(args.out)
    out_path.parent.mkdir(parents=True, exist_ok=True)
    out_path.write_text(json.dumps(out, ensure_ascii=False, indent=2), encoding="utf-8")
    info(f"Wrote JSON -> {args.out}")

if __name__ == "__main__":
    main()