import os
import re
import logging
import requests
from bs4 import BeautifulSoup

from app.services.supabase_service import get_client
from app.services.telegram_sender import broadcast_html

LOG_LEVEL = os.getenv("LOG_LEVEL", "INFO").upper()
logging.basicConfig(level=LOG_LEVEL, format="[%(levelname)s] %(message)s")
logger = logging.getLogger("motie_n8n")

BASE = "https://www.motie.go.kr"
LIST_URL = (
    "https://www.motie.go.kr/kor/article/ATCL6e90bb9de?"
    "mno=&pageIndex=1&rowPageC=0&displayAuthor=&searchCategory=3&schClear=on&"
    "startDtD=&endDtD=&searchCondition=1&searchKeyword=#"
)

# n8n과 동일한 CSS 셀렉터
SEL_TITLE = "td:nth-of-type(3)"
SEL_DATE  = "td:nth-of-type(5)"
SEL_FILE  = 'a[href^="/attach/down"]'
SEL_LINK  = 'a[onclick^="article.view"]'

RE_ARTICLE_ID = re.compile(r"\d+")
HEADERS = {"User-Agent": "GovBot/1.0 (+https://work.jjickjjicks.com)"}

def _clean(s: str | None) -> str:
    if not s:
        return ""
    return " ".join(s.split())

def parse_list_html(html: str) -> list[dict]:
    soup = BeautifulSoup(html, "html.parser")
    rows = soup.select("table tbody tr")
    items: list[dict] = []

    for tr in rows:
        title_el = tr.select_one(SEL_TITLE)
        date_el  = tr.select_one(SEL_DATE)
        file_el  = tr.select_one(SEL_FILE)
        link_el  = tr.select_one(SEL_LINK)

        title = _clean(title_el.get_text(strip=True) if title_el else "")
        date  = _clean(date_el.get_text(strip=True) if date_el else "")
        file_url = file_el["href"].strip() if (file_el and file_el.has_attr("href")) else None
        onclick = link_el["onclick"].strip() if (link_el and link_el.has_attr("onclick")) else ""

        m = RE_ARTICLE_ID.search(onclick)
        article_id = m.group(0) if m else None

        if article_id:
            items.append({
                "title": title,
                "date": date,  # YYYY-MM-DD 그대로 사용
                "fileUrl": (BASE + file_url) if (file_url and file_url.startswith("/")) else file_url,
                "articleId": article_id
            })
    return items

def fetch_and_extract() -> list[dict]:
    r = requests.get(LIST_URL, headers=HEADERS, timeout=30)
    r.raise_for_status()
    return parse_list_html(r.text)

def upsert_to_supabase_and_notify(items: list[dict]) -> tuple[int, int]:
    """
    4) motie_id 중복 확인
    5) 신규 insert (id, title, posted_at 만)
    6) 텔레그램 발송 (요청 포맷)
    """
    sb = get_client()
    inserted, notified = 0, 0

    for it in items:
        aid = it.get("articleId")
        if not aid:
            continue

        # 4) 중복 확인
        exist = sb.table("motie_id").select("id").eq("id", aid).limit(1).execute()
        if exist.data:
            continue

        # 5) 신규 insert (스키마에 url 컬럼 없음)
        detail_url = f"{BASE}/kor/article/ATCL6e90bb9de/{aid}/view?"
        aid_value = int(aid) if str(aid).isdigit() else aid  # id가 numeric 타입일 수도 있으니 변환
        title = (it.get("title") or "").strip()
        posted = (it.get("date") or "").strip()  # YYYY-MM-DD

        payload = {"id": aid_value, "title": title}
        if posted:
            payload["posted_at"] = posted

        try:
            sb.table("motie_id").insert(payload).execute()
            inserted += 1
        except Exception as e:
            logger.warning(f"insert fail for {aid}: {e}")
            continue

        # 6) 텔레그램 발송 (요청하신 포맷)
        # 마지막 줄은 앵커 링크이며, 각괄호 표시를 위해 엔티티(&lt; &gt;) 사용
        html = (
            "산업부 인사발령입니다.\n"
            f"제목 : {title}\n"
            f"게시일 : {posted}\n"
            f'<a href="{detail_url}">&lt;인사발령 상세보기&gt;</a>'
        )
        try:
            notified += broadcast_html(html)
        except Exception as e:
            logger.warning(f"notify fail for {aid}: {e}")

    return inserted, notified

def run_once():
    items = fetch_and_extract()
    if not items:
        logger.info("파싱 결과 없음")
        return
    ins, sent = upsert_to_supabase_and_notify(items)
    logger.info("MOTIE n8n-style run: inserted=%s, sent=%s", ins, sent)

if __name__ == "__main__":
    run_once()
