o
    ul¥h½  ã                   @   s   d dl Z d dlZd dlZd dlmZmZ d dlZd dlmZ d dl	m
Z
 e  dd¡ ¡ Zejedd e d	¡Zd
ZdZdZddiZdedB defdd„Zd&dd„Zdee fdd„Zdededefdd„Zdedee fdd„Zd'dee d edefd!d"„Zd#d$„ Zed%krŽeeƒ ƒ dS dS )(é    N)ÚListÚDict)ÚBeautifulSoup)Ú
get_clientÚ	LOG_LEVELÚINFOz[%(levelname)s] %(message)s)ÚlevelÚformatÚmoef_org_syncÚmoef_orgzQhttps://www.moef.go.kr/mi/orgnzt/org.do?bbsId=MOSFBBS_000000000097&menuNo=9040100za[href*="orgId="]z
User-Agentz*GovBot/1.0 (+https://work.jjickjjicks.com)ÚsÚreturnc                 C   s   | sdS d  |  dd¡ ¡ ¡S )NÚ Ú õ   Â )ÚjoinÚreplaceÚsplit)r   © r   ú2/var/www/html/bot/app/crawler/moef_org_sync_n8n.pyÚ_clean   s   r   c               
   C   sš   t ƒ } z/|  t¡ ¡  dd¡ ¡  z|  t¡ ¡  dd¡ ¡  W n	 ty)   Y nw t 	dt¡ W d S  tyL } zt 
dt|¡ W Y d }~d S d }~ww )NÚnameÚ	__never__Únullz	%s wiped.zwipe %s failed (continuing): %s)r   ÚtableÚ
TABLE_NAMEÚdeleteÚneqÚexecuteÚis_Ú	ExceptionÚloggerÚinfoÚwarning)ÚsbÚer   r   r   Úwipe_moef_org   s   ÿ€ÿr&   c                  C   sÄ   t jttdd} |  ¡  t| jdƒ}tƒ }g }| t	¡D ];}| dd¡ 
¡ }|r,d|vr-q| dd¡}| d¡r:|n
| d	¡sCd| n|}||v rJq| |¡ | ||d
œ¡ qt dt|ƒ¡ |S )Né   ©ÚheadersÚtimeoutúhtml.parserÚhrefr   zorgId=Útitleú/Úhttp)Úlinkr-   zcollected %d org links)ÚrequestsÚgetÚLIST_URLÚHEADERSÚraise_for_statusr   ÚtextÚsetÚselectÚ	SEL_LINKSÚstripÚ
startswithÚaddÚappendr!   r"   Úlen)ÚrÚsoupÚseenÚoutÚar,   r-   r0   r   r   r   Úfetch_org_links&   s"   $
rD   Úname_rawÚposition_rawc           	      C   s  t | ƒ}t |ƒ}|r|s|S |}t d|¡d  ¡ }dd„ ||hD ƒ}|D ]W}|d| fD ]}| |¡rD|d t|ƒ …  d¡}t |ƒ}q-t dt |¡› dd	|¡ d¡}t d
t |¡› dd	|¡ d¡}| |¡r||d t|ƒ …  d¡}t |ƒ}q%t |ƒ}|r…|S |S )Nu   [ \t(\[/Â·\-]r   c                 S   s   g | ]}|r|‘qS r   r   )Ú.0Úcr   r   r   Ú
<listcomp>D   s    z,strip_position_from_name.<locals>.<listcomp>r   u    -Â·/â€“â€”z\s*\(\s*z	\s*\)\s*$r   z\s*z\s*\([^)]+\)\s*$)	r   Úrer   r:   Úendswithr>   ÚrstripÚsubÚescape)	rE   rF   ÚnÚpÚoriginalÚbaseÚ
candidatesÚcandÚsufr   r   r   Ústrip_position_from_name:   s*   
€""
€rV   r0   c              	   C   sD  |   d¡r| nd|  }tj|tdd}| ¡  t|jdƒ}g }| d¡D ]{}| d¡r,q$| 	d¡}t
|ƒd	k r8q$t|d
 jddƒ}t|d jddƒ}t|d jddƒ}	t|d jddƒ}
t|d jddƒ}|d  d¡}|r‚| d¡r‚|d  dd¡ ¡ }
t||	ƒ}|	}|s”|s”|s”|s”|
s”q$| ||||
|dœ¡ q$|S )Nr/   zhttps://www.moef.go.krr'   r(   r+   ztable trÚthÚtdé   r   T)r:   é   é   é   é   za[href^="tel:"]r,   ztel:r   )Ú
departmentr   ÚpositionÚphoneÚtask)r;   r1   r2   r4   r5   r   r6   r8   ÚfindÚfind_allr>   r   Úget_textÚ
select_oneÚhas_attrr   r:   rV   r=   )r0   Úurlr?   r@   ÚrowsÚtrÚtdsr^   rE   rF   r`   ÚresponsibilityÚtel_ar   r_   r   r   r   Úparse_org_detail[   s>   



ûrm   éô  rh   Úchunkc                 C   sZ   t ƒ }d}tdt| ƒ|ƒD ]}| ||| … }|sq| t¡ |¡ ¡  |t|ƒ7 }q|S )Nr   )r   Úranger>   r   r   Úinsertr   )rh   ro   r$   ÚtotalÚiÚpartr   r   r   Úbulk_insertƒ   s   ru   c               	   C   s¨   t ƒ  tƒ } | st d¡ dddœS g }t| dƒD ]\}}|d }t|ƒ}t d|t| ƒ|t|ƒ¡ | |¡ q|sDt d¡ dddœS t|ƒ}t d	t	|¡ d|dœS )
Nzno org links found.ÚMOEF_ORGr   )ÚsourceÚinsertedrZ   r0   z(%d/%d) %s -> %d rowszno rows parsed.z%s sync done: inserted=%d)
r&   rD   r!   r"   Ú	enumeraterm   r>   Úextendru   r   )ÚlinksÚall_rowsrs   Úitemr0   rh   rx   r   r   r   Úrun_onceŽ   s"   




r~   Ú__main__)r   N)rn   ) ÚosÚloggingrJ   Útypingr   r   r1   Úbs4r   Úapp.services.supabase_servicer   ÚgetenvÚupperr   ÚbasicConfigÚ	getLoggerr!   r   r3   r9   r4   Ústrr   r&   rD   rV   rm   Úintru   r~   Ú__name__Úprintr   r   r   r   Ú<module>   s0    

!(ÿ