o
    oh                     @   sz  d dl Z d dlZd dlZd dlZd dlmZ d dlmZ d dlm	Z	m
Z
 d dlZd dlmZ d dlmZ e dd Zejed	d
 edZdZdZdZdZdZdZedZddiZdd Zde de fddZ!ddde de de"de"d e dB f
d!d"Z#d#e dB de fd$d%Z$d&e de%e& fd'd(Z'de%e& fd)d*Z(d+e%e& de
e"e"f fd,d-Z)d.d/ Z*e+d0kre,e*  dS dS )1    N)BeautifulSoup)escape)ListTuple
get_client)broadcast_html	LOG_LEVELINFOz[%(levelname)s] %(message)s)levelformat	motie_n8nzhttps://www.motie.go.krzhttps://www.motie.go.kr/kor/article/ATCL6e90bb9de?mno=&pageIndex=1&rowPageC=0&displayAuthor=&searchCategory=3&schClear=on&startDtD=&endDtD=&searchCondition=1&searchKeyword=#ztd:nth-of-type(3)ztd:nth-of-type(5)za[href^="/attach/down"]za[onclick^="article.view"]z\d+z
User-Agentz*GovBot/1.0 (+https://work.jjickjjicks.com)c                   C   s   t  S )Nr    r   r   */var/www/html/bot/app/crawler/motie_n8n.pySB#   s   r   targetreturnc                 C   s   dd l }ddlm}m} t| }zt d|| dddd ||j	
 d  W |S  tyI } ztd|  W Y d }~|S d }~ww )Nr   datetimetimezonecrawler_runrunning)idr   statuspagesrowsfail_reason
started_atz[crawler_run insert] )uuidr   r   struuid4r   tableinsertnowutc	isoformatexecute	Exceptionloggerwarning)r   r   r   r   run_ider   r   r   
_run_start&   s(   r,   )r   r*   r   r   r   r   c          	   
   C   s   ddl m }m} |||j ||d}|d ur||d< zt d|d| 	  W d S  t
yJ } ztd|  W Y d }~d S d }~ww )Nr   r   )r   finished_atr   r   r   r   r   z[crawler_run finish] )r   r   r#   r$   r%   r   r!   updateeqr&   r'   r(   r)   )	r*   r   r   r   r   r   r   payloadr+   r   r   r   _run_finish8   s   $r1   sc                 C   s   | sdS d |  S )N  )joinsplit)r2   r   r   r   _cleanJ   s   r7   htmlc                 C   s  t | d}|d}g }|D ]q}|t}|t}|t}|t}t|r-|jddnd}	t|r9|jddnd}
|rI|	drI|d 
 nd }|rX|	drX|d 
 nd}t|}|rf|dnd }|r||	|
|ry|d	ryt| n||d
 q|S )Nzhtml.parserztable tbody trT)stripr3   hrefonclickr   /)titledatefileUrl	articleId)r   select
select_one	SEL_TITLESEL_DATESEL_FILESEL_LINKr7   get_texthas_attrr9   RE_ARTICLE_IDsearchgroupappend
startswithBASE)r8   soupr   itemstrtitle_eldate_elfile_ellink_elr=   r>   file_urlr;   m
article_idr   r   r   parse_list_htmlO   s.   






rY   c                  C   s    dd } | t tdd}t|jS )Nc                 S   s   d}t dD ]<}ztj| ||d}|  |W   S  tyB } z|dkr& td|d | t| |d9 }W Y d }~qd }~ww t	d)	Ng      ?   headerstimeout   zfetch retry (%s): %s      unreachable)
rangerequestsgetraise_for_statusr'   r(   r)   timesleepRuntimeError)urlr\   r]   delayirr+   r   r   r   _getl   s   

zfetch_and_extract.<locals>._get   r[   )LIST_URLHEADERSrY   text)rm   rl   r   r   r   fetch_and_extractk   s   
rr   rP   c                 C   s  t  }d\}}| D ]}|d}|sq	|ddd|d }|jr(q	t d| d}t	|
 r:t|n|}|dpBd	 }	|d
pKd	 }
||	d}|
rY|
|d< z|d|  |d7 }W n ty } ztd| d|  W Y d }~q	d }~ww dt|	 d| d}zt|}|t|trt|n|rdnd7 }W q	 ty } ztd| d|  W Y d }~q	d }~ww ||fS )N)r   r   r@   motie_idr   r_   z/kor/article/ATCL6e90bb9de/z/view?r=   r3   r>   )r   r=   	posted_atzinsert fail for z: u"   산업부 인사발령입니다.
[z]
<a href="u   ">[자세히 보기]</a>r   znotify fail for )r   rd   r!   rA   r/   limitr&   datarN   r   isdigitintr9   r"   r'   r(   r)   r   r   
isinstance)rP   sbinsertednotifieditaidexist
detail_url	aid_valuer=   postedr0   r+   r8   sentr   r   r   upsert_to_supabase_and_notify}   sL   
"
&"r   c               
   C   s   d} t | }d}d}z?t }|d7 }|s*t|d|dd td ddd|ddW S t|\}}|}t|d||d td	|| d|||ddW S  tyu } z t|d
||t|d td| dt||d
dW  Y d }~S d }~ww )Nrs   r   r_   passed)r   r   r   zMOTIE n8n-style run: no itemsMOTIE_ID)sourcer{   r   r*   r   z)MOTIE n8n-style run: inserted=%s, sent=%saborted)r   r   r   r   zMOTIE n8n-style run aborted: %s)r   errorr*   r   )	r,   rr   r1   r(   infor   r'   r   	exception)r   r*   r   r   rP   insr   r+   r   r   r   run_once   s,   
r   __main__)-osreloggingrc   bs4r   r8   r   typingr   r   rf   app.services.supabase_servicer   app.services.telegram_senderr   getenvupperr	   basicConfig	getLoggerr(   rN   ro   rC   rD   rE   rF   compilerI   rp   r   r   r,   rx   r1   r7   listdictrY   rr   r   r   __name__printr   r   r   r   <module>   sB    

(+