import os
import re
import logging
import requests
from bs4 import BeautifulSoup
from html import escape
from typing import List, Tuple
import time

from app.services.supabase_service import get_client
from app.services.telegram_sender import broadcast_html

LOG_LEVEL = os.getenv("LOG_LEVEL", "INFO").upper()
logging.basicConfig(level=LOG_LEVEL, format="[%(levelname)s] %(message)s")
logger = logging.getLogger("motie_n8n")

BASE = "https://www.motie.go.kr"
LIST_URL = (
    "https://www.motie.go.kr/kor/article/ATCL6e90bb9de?"
    "mno=&pageIndex=1&rowPageC=0&displayAuthor=&searchCategory=3&schClear=on&"
    "startDtD=&endDtD=&searchCondition=1&searchKeyword=#"
)

SEL_TITLE = "td:nth-of-type(3)"
SEL_DATE  = "td:nth-of-type(5)"
SEL_FILE  = 'a[href^="/attach/down"]'
SEL_LINK  = 'a[onclick^="article.view"]'

RE_ARTICLE_ID = re.compile(r"\d+")
HEADERS = {"User-Agent": "GovBot/1.0 (+https://work.jjickjjicks.com)"}

# ------------------------------
# crawler_run helpers
# ------------------------------
def SB():
    return get_client()

def _run_start(target: str) -> str:
    import uuid
    from datetime import datetime, timezone
    run_id = str(uuid.uuid4())
    try:
        SB().table("crawler_run").insert({
            "id": run_id,
            "target": target,
            "status": "running",
            "pages": 0,
            "rows": 0,
            "fail_reason": None,
            "started_at": datetime.now(timezone.utc).isoformat(),
        }).execute()
    except Exception as e:
        logger.warning(f"[crawler_run insert] {e}")
    return run_id

def _run_finish(run_id: str, *, status: str, pages: int, rows: int, fail_reason: str | None = None):
    from datetime import datetime, timezone
    payload = {
        "status": status,
        "finished_at": datetime.now(timezone.utc).isoformat(),
        "pages": pages,
        "rows": rows,
    }
    if fail_reason is not None:
        payload["fail_reason"] = fail_reason
    try:
        SB().table("crawler_run").update(payload).eq("id", run_id).execute()
    except Exception as e:
        logger.warning(f"[crawler_run finish] {e}")

# ------------------------------
# parsing
# ------------------------------
def _clean(s: str | None) -> str:
    if not s:
        return ""
    return " ".join(s.split())

def parse_list_html(html: str) -> list[dict]:
    soup = BeautifulSoup(html, "html.parser")
    rows = soup.select("table tbody tr")
    items: list[dict] = []

    for tr in rows:
        title_el = tr.select_one(SEL_TITLE)
        date_el  = tr.select_one(SEL_DATE)
        file_el  = tr.select_one(SEL_FILE)
        link_el  = tr.select_one(SEL_LINK)

        title = _clean(title_el.get_text(strip=True) if title_el else "")
        date  = _clean(date_el.get_text(strip=True) if date_el else "")
        file_url = file_el["href"].strip() if (file_el and file_el.has_attr("href")) else None
        onclick = link_el["onclick"].strip() if (link_el and link_el.has_attr("onclick")) else ""

        m = RE_ARTICLE_ID.search(onclick)
        article_id = m.group(0) if m else None

        if article_id:
            items.append({
                "title": title,
                "date": date,
                "fileUrl": (BASE + file_url) if (file_url and file_url.startswith("/")) else file_url,
                "articleId": article_id
            })
    return items

def fetch_and_extract() -> list[dict]:
    def _get(url, headers, timeout):
        delay = 0.5
        for i in range(4):
            try:
                r = requests.get(url, headers=headers, timeout=timeout)
                r.raise_for_status()
                return r
            except Exception as e:
                if i == 3:
                    raise
                logger.warning("fetch retry (%s): %s", i+1, e)
                time.sleep(delay)
                delay *= 2
        raise RuntimeError("unreachable")
    r = _get(LIST_URL, headers=HEADERS, timeout=30)
    return parse_list_html(r.text)

def upsert_to_supabase_and_notify(items: list[dict]) -> Tuple[int, int]:
    sb = SB()
    inserted, notified = 0, 0
    for it in items:
        aid = it.get("articleId")
        if not aid:
            continue
        exist = sb.table("motie_id").select("id").eq("id", aid).limit(1).execute()
        if exist.data:
            continue

        detail_url = f"{BASE}/kor/article/ATCL6e90bb9de/{aid}/view?"
        aid_value = int(aid) if str(aid).isdigit() else aid
        title = (it.get("title") or "").strip()
        posted = (it.get("date") or "").strip()

        payload = {"id": aid_value, "title": title}
        if posted:
            payload["posted_at"] = posted

        try:
            sb.table("motie_id").insert(payload).execute()
            inserted += 1
        except Exception as e:
            logger.warning(f"insert fail for {aid}: {e}")
            continue

        html = (
            "산업부 인사발령입니다.\n"
            f"[{escape(title)}]\n"
            f'<a href="{detail_url}">[자세히 보기]</a>'
        )
        try:
            # broadcast_html 이 bool 또는 보낸 개수(int)를 반환한다고 가정
            sent = broadcast_html(html)
            notified += int(sent) if isinstance(sent, int) else (1 if sent else 0)
        except Exception as e:
            logger.warning(f"notify fail for {aid}: {e}")
    return inserted, notified

# ------------------------------
# main
# ------------------------------
def run_once():
    target = "motie_id"
    run_id = _run_start(target)
    pages = 0
    rows = 0
    try:
        items = fetch_and_extract()
        pages += 1  # 목록 1페이지
        if not items:
            _run_finish(run_id, status="passed", pages=pages, rows=0)
            logger.info("MOTIE n8n-style run: no items")
            return {"source": "MOTIE_ID", "inserted": 0, "sent": 0, "run_id": run_id, "status": "passed"}

        ins, sent = upsert_to_supabase_and_notify(items)
        rows = ins  # rows = 신규 삽입 건수 기준
        _run_finish(run_id, status="passed", pages=pages, rows=rows)
        logger.info("MOTIE n8n-style run: inserted=%s, sent=%s", ins, sent)
        return {"source": "MOTIE_ID", "inserted": ins, "sent": sent, "run_id": run_id, "status": "passed"}
    except Exception as e:
        _run_finish(run_id, status="aborted", pages=pages, rows=rows, fail_reason=str(e))
        logger.exception("MOTIE n8n-style run aborted: %s", e)
        return {"source": "MOTIE_ID", "error": str(e), "run_id": run_id, "status": "aborted"}

if __name__ == "__main__":
    print(run_once())
