o
    9˜¡hË  ã                   @   s®   d dl Z d dlZd dlmZmZ d dlZd dlmZ d dlm	Z	m
Z
 dZe› dZddiZd	ejd
efdd„Zd	ejded
ee fdd„Zd
efdd„ZedkrUeƒ  dS dS )é    N)ÚListÚDict)ÚBeautifulSoup)Ú
get_clientÚloggerzhttps://www.motie.go.krz/kor/25/empSearchz
User-AgentzMozilla/5.0ÚsessÚreturnc                 C   sd   | j ttdd}| ¡  t|jdƒ}| d¡}|sdS |  dd¡}t d|¡}|r0t	| 
d¡ƒS dS )	Né   ©ÚheadersÚtimeoutúhtml.parserza.direction.lasté   ÚonclickÚ zempSearch\.list\((\d+)\))ÚgetÚLIST_URLÚHEADERSÚraise_for_statusr   ÚtextÚ
select_oneÚreÚsearchÚintÚgroup)r   ÚrÚsoupÚlastr   Úm© r   ú3/var/www/html/bot/app/crawler/motie_org_sync_n8n.pyÚ_fetch_last_page   s   
r!   Úpagec              	   C   s–  t › d|› }| j|tdd}| ¡  t|jdƒ}g }| d¡D ]¨}| d¡}t|ƒdk r.q |d j	d	d
}|d j	d	d
}	|d j	d	d
}
|d }d}| 
d¡}|rd| d¡rd| d¡ dd¡ ¡ }n|j	dd	d
}t d|¡}|rx| d¡n|}t|ƒdkr‘d dd„ |dd… D ƒ¡ ¡ }nd}|rŸ|rŸ| |d¡ ¡ }|
dkr³d|v r³t d|› d|	› ¡ q t||	|
||gƒs½q | ||	|
||dœ¡ q |S )Nz?pageIndex=r	   r
   r   ztable tbody trÚtdé   r   T©Ústripr   é   éÿÿÿÿr   za[href^="tel:"]Úhrefztel:ú z\d{2,4}-\d{3,4}-\d{4}c                 s   s    | ]
}|j d ddV  qdS )r*   Tr%   N)Úget_text)Ú.0r#   r   r   r    Ú	<genexpr>7   s   € z_parse_page.<locals>.<genexpr>é   u   íŒŒê²¬u   ê¸°íšìž¬ì •ë¶€uG   [motie_org] skip(row): dept=íŒŒê²¬, task contains 'ê¸°íšìž¬ì •ë¶€' :: z / )ÚnameÚpositionÚ
departmentÚtaskÚphone)r   r   r   r   r   r   ÚselectÚfind_allÚlenr+   r   Úreplacer&   r   r   r   Újoinr   ÚdebugÚanyÚappend)r   r"   Úurlr   r   ÚrowsÚtrÚtdsr/   r0   r1   Úphone_tdr3   Útel_aÚ	last_textr   r2   r   r   r    Ú_parse_page   sL   

"
ûrC   c            
   
   C   s*  t ƒ } t ¡ %}t|ƒ}g }td|d ƒD ]}| t||ƒ¡ t d¡ qW d  ƒ n1 s/w   Y  z|  	d¡ 
¡  dd¡ ¡  W n ty] } zt d|› ¡ W Y d}~nd}~ww d}d	}tdt|ƒ|ƒD ]}|||| … }	|	swqj|  	d¡ |	¡ ¡  |t|	ƒ7 }qjt d
|› d|› ¡ |S )uQ   MOTIE ì¡°ì§ë„ ì „ì²´ í¬ë¡¤ë§(n8n ë°©ì‹) -> motie_org í…Œì´ë¸” ì „ì²´ ê°±ì‹ r   gš™™™™™É?NÚ	motie_orgr/   Ú__keep__z[motie_org] wipe failed: r   iô  z%[motie_org] run_once completed: rows=z, pages=)r   ÚrequestsÚSessionr!   ÚrangeÚextendrC   ÚtimeÚsleepÚtableÚdeleteÚneqÚexecuteÚ	Exceptionr   Úwarningr6   ÚupsertÚinfo)
Úsupabaser   r   Úall_rowsÚpÚeÚinsertedÚbatchÚiÚchunkr   r   r    Úrun_onceP   s2   
þý€ÿr\   Ú__main__)r   rJ   Útypingr   r   rF   Úbs4r   Úapp.services.supabase_servicer   r   ÚBASEr   r   rG   r   r!   rC   r\   Ú__name__r   r   r   r    Ú<module>   s     
ÿ6
ÿ