o
    Â0±hÔg  ã                   @  sÂ  d dl mZ d dlZd dlZd dlZd dlZd dlZd dlZd dlZd dl	Z	d dl
mZmZmZmZmZ d dlmZmZ d dlZd dlZd dlmZ ej ej ej e¡dd¡¡Zeejvreej e¡ d dlmZ g d¢Zd	Z d
Z!dZ"dZ#dZ$dZ%dZ&dZ'dZ(dZ)e *d¡pŒe *d¡Z+e *d¡p–e *d¡Z,e *dd¡ -¡ Z.e /d¡Z0e 1¡ Z2e2 3e 4d¡¡ e0 5e2¡ e0 6e.¡ e 7d¡Z8d‰d d!„Z9dŠd$d%„Z:d‹d&d'„Z;dŒd)d*„Z<dd,d-„Z=dŽd0d1„Z>dŠd2d3„Z?dd7d8„Z@dd:d;„ZAd‘d=d>„ZBd‘d?d@„ZCd’d“dCdD„ZDd”dFdG„ZEd•d–dLdM„ZFd•d—dOdP„ZGd˜dRdS„ZHdTdU„ ZId™dXdY„ZJdšd]d^„ZKd›dœddde„ZLddhdi„ZMdždkdl„ZNdŸd dsdt„ZOd¡dudv„ZPd¢dxdy„ZQd£d{d|„ZRd¤d¥d€d„ZSd¦d§d„d…„ZTd†d‡„ ZUeVdˆkr_eUƒ  dS dS )¨é    )ÚannotationsN)ÚAnyÚDictÚListÚOptionalÚTuple)ÚdatetimeÚ	timedelta)Ú	PdfReaderz..©Ú
get_client)ÚC0247ÚC0042ÚC0043ÚC0066ÚC0082ÚC0220ÚC0259ÚC0248ÚC0306ÚC0305ÚC0236z/https://alio.go.kr/item/itemReportListSusi.jsonz(https://alio.go.kr/item/itemOrganList.doz$https://alio.go.kr/download/pdf.jsonÚkepco_idÚ	kepco_rawÚkepco_org_stgÚkepco_org_snapshotÚkepco_org_curÚ	kepco_orgÚkepco_historyÚTELEGRAM_BOT_TOKENÚTG_BOT_TOKENÚTELEGRAM_CHAT_IDÚ
TG_CHAT_IDÚ	LOG_LEVELÚINFOÚ	group_n8nz[%(levelname)s] %(message)su   ^\s*\(?ê³µì„\)?\s*$ÚnameúOptional[str]ÚreturnÚboolc                 C  s*   | sdS t | ƒ ¡ }t |¡rdS |dv S )NFT>   õ   ê²°ì›ú-)ÚstrÚstripÚVACANCY_PATÚmatch)r&   Ús© r1   ú*/var/www/html/bot/app/crawler/group_n8n.pyÚis_vacant_name?   s   
r3   r0   r,   c                 C  s8   | sdS |   dd¡} t dd| ¡} t dd| ¡} |  ¡ S )NÚ úz[ \t]+ú z
[ ]*\n[ ]*Ú
)ÚreplaceÚreÚsubr-   ©r0   r1   r1   r2   Ú	normalizeG   s   r<   c                 C  s2   | sdS t  dd| ¡ ¡ }|dv pt  d|¡d uS )NTú\s+r4   >   õ   â€“õ   â€”r+   u   (ë¯¸ì •|ë¬´ê¸°í•œ))r9   r:   r-   Úsearch)r0   Úvr1   r1   r2   Úis_blank_dateO   s   rB   Úpatternc                 C  s"   t  | |¡}|r| d¡ ¡ S d S )Né   ©r9   r@   Úgroupr-   )rC   r0   Úmr1   r1   r2   ÚpickT   ó   rH   Úmerged_textc                 C  s"   t  d| ¡}|r| d¡ ¡ S d S )Nu-   ìž„ì›\s*í˜„í™©\s*\n([^\n]+)\nìž„ì›\s*í˜„í™©rD   rE   )rJ   rG   r1   r1   r2   Úextract_department_from_headerX   rI   rK   Útextsú	List[str]c                 C  s*   d  dd„ | D ƒ¡}t dd|¡}| ¡ S )Nr7   c                 S  s   g | ]}|rt |ƒ‘qS r1   )r<   )Ú.0Útr1   r1   r2   Ú
<listcomp>]   ó    z$merge_page_texts.<locals>.<listcomp>z\n{2,})Újoinr9   r:   r-   )rL   Újoinedr1   r1   r2   Úmerge_page_texts\   s   rT   c                 C  ó   t  dd| pd¡ ¡ S ©Nr=   r4   ©r9   r:   r-   r;   r1   r1   r2   Ú_normalize_namea   ó   rX   ÚdeptÚstartúTuple[str, str, Optional[str]]c                 C  s   | t |ƒ|pd fS ©N)rX   )rZ   r&   r[   r1   r1   r2   Ú_keyd   s   r^   Ú	posted_atc                 C  s„   | sd S |   ¡  dd¡}t d|¡r|S t d|¡}|r@t| d¡ƒt| d¡ƒt| d¡ƒ}}}|d›d	|d
›d	|d
›S d S )Nr6   r4   z^\d{4}-\d{2}-\d{2}$z^(\d{4})\.(\d{1,2})\.(\d{1,2})$rD   é   é   Ú04dr+   Ú02d)r-   r8   r9   r/   ÚintrF   )r_   r0   rG   ÚyÚmoÚdr1   r1   r2   Ú_to_isog   s   .rh   Úiso_ymdc                 C  s$   t  | d¡}|tdd }| d¡S )Nú%Y-%m-%drD   )Údays)r   Ústrptimer	   Ústrftime)ri   ÚdtÚprevr1   r1   r2   Ú_iso_prev_days   s   
rp   c                 C  s,   t  | d¡}|j› d|jd›d|jd›dS )Nrj   u   ë…„ rc   u   ì›” u   ì¼)r   rl   ÚyearÚmonthÚday)ri   rn   r1   r1   r2   Ú_to_kor_datex   s    rt   Údepartment_hintúList[Dict[str, Any]]c                 C  sÊ  |pt | ƒ}|  d¡}|dkr| |d … n| }dd„ t d|¡D ƒ}g }|D ]¼}t|ƒ}| d¡s;d|v r<t d|¡s<q&td	|ƒ}	td
|ƒ}
td|ƒ}td|ƒ}t d|¡}|r_| d¡ 	¡ nd }|rj| d¡ 	¡ nd }t
|ƒrrd nt dd|pyd¡}t
|ƒrd nt dd|pˆd¡}d }t d|¡}|r›| d¡ 	¡ }t
|ƒr¡d nt dd|p¨d¡}td|ƒ}g }|r¾dd„ t d|¡D ƒ}d dd„}||	ƒdv sÏ||
ƒdv rÐq&|
sÓq&| ||
|	||||||dœ	¡ q&|S )!Nu   ì§ìœ„ r   c                 S  s   g | ]
}t  d |¡r|‘qS )u	   ^ì§ìœ„\s)r9   r/   )rN   Úsecr1   r1   r2   rP   ƒ   ó    z parse_people.<locals>.<listcomp>u   \n(?=ì§ìœ„\s)u   ì§ìœ„ ë³€ê²½ ì „u   ë³€ê²½ì‚¬ìœ u   (ìž„ê¸°|ì§ì±…|ì„±ë³„)\su   ì§ìœ„\s*([^\n]+?)\s*ì„±ëª…u4   ì„±ëª…\s*([^\n]+?)(?=\s*(?:ì§ì±…|ì„±ë³„|ìž„ê¸°|\n))u-   ì§ì±…\s*([^\n]+?)(?=\s*(?:ì„±ë³„|ìž„ê¸°|\n))u   ì„±ë³„\s*([ë‚¨ì—¬])uG   ìž„ê¸°\s*\(ì‹œìž‘ì¼\)\s*([^\n(]+?)\s*\(ì¢…ë£Œì¼\)\s*([^\n]+?)(?:\n|$)rD   r`   r=   r6   r4   uG   (?:ì‚¬ìœ \s*ë°œìƒ(?:ì¼ìž|ì¼)|ì‚¬ìœ ë°œìƒ(?:ì¼ìž|ì¼))\s*([^\n]+)uŒ   ì£¼ìš”ê²½ë ¥\s*([\s\S]*?)(?=\n\s*(?:ì„ ìž„ì ˆì°¨|ì„ ìž„ì ˆì°¨ê·œì •|ë‹¹ì—°ì§ì—¬ë¶€|ì§ìœ„\s|ê¸°ì¤€ì¼|ì œì¶œì¼|ê¸°ê´€ ê³µì‹œ ë‹´ë‹¹ìž|$))c                 S  s   g | ]
}|  ¡ r|  ¡ ‘qS r1   ©r-   )rN   Úlnr1   r1   r2   rP   §   rx   z\n+Úxr'   r(   r,   c                 S  s   | pd  dd¡S )Nr4   r6   )r8   ©r{   r1   r1   r2   Útokª   s    zparse_people.<locals>.tok>   õ   ì§ìœ„õ	   ë³€ê²½ì „>   õ   ì„±ëª…õ	   ë³€ê²½í›„)	Ú
departmentr&   ÚpositionÚgenderr[   ÚendÚtaskÚreason_dateÚcareer©r{   r'   r(   r,   )rK   Úfindr9   Úsplitr<   Ú
startswithr@   rH   rF   r-   rB   r:   Úappend)rJ   ru   r‚   Ú	start_idxÚbodyÚsectionsÚoutÚrawrw   rƒ   r&   r†   r„   rG   Ú	start_rawÚend_rawr[   r…   Ú
reason_rawÚm_reasonr‡   Úcareer_blockrˆ   r}   r1   r1   r2   Úparse_people   s^   




þ

÷r˜   úrequests.Sessionc                  C  s   t  ¡ } | j ddi¡ | S )Nz
User-Agentz*GovBot/1.0 (+https://work.jjickjjicks.com))ÚrequestsÚSessionÚheadersÚupdater;   r1   r1   r2   Úmake_sessionÀ   s
   ÿrž   FÚapba_idÚdebugúOptional[Dict[str, Any]]c              	   C  s4  |dddœ}| j t|dd}|rt d||j|j  d¡¡ dd	d
ddœ}d|ddœ}| jt|t 	|¡dd}|rKt d||j|j  d¡|j
d d… ¡ z| ¡ }W n
 ty[   Y d S w |  d¡dkred S |  d¡pki   d¡ppg }	|  d¡pwi   d¡p|i }
|
  d¡}|	s†d S |	d }|  d¡|  d¡|  d¡|dœS )NiQO  rD   )ÚapbaIdÚreportFormRootNoÚpageNoé   ©ÚparamsÚtimeoutz[%s] prewarm %s %szcontent-typezhttps://alio.go.krzhttps://alio.go.kr/ÚXMLHttpRequestzapplication/json;charset=UTF-8)ÚOriginÚRefererzX-Requested-WithzContent-TypeÚ1Ú20305)r¤   r¢   r£   )rœ   Údatar¨   z[%s] list %s %s %si  ÚstatusÚsuccessr®   ÚresultÚ	organInfoÚapbaNar   ÚdisclosureNoÚidateÚtitle)r´   rµ   r¶   Úorgan)ÚgetÚPREWARM_URLÚloggerr    Ústatus_coderœ   ÚpostÚLIST_URLÚjsonÚdumpsÚtextÚ	Exception)r0   rŸ   r    Ú
pre_paramsÚr0Úheaders_jsonr   Úrr®   r±   Ú
organ_infoÚ
organ_nameÚfirstr1   r1   r2   Úfetch_first_itemÇ   s>   ü&ÿ
ürÉ   Údisclosure_noc                 C  s„   d|i}| j t|dd}|rt d|j|j¡ |jdkrd S d|› d}t|dƒ}| |j¡ W d   ƒ |S 1 s;w   Y  |S )	Nr´   é<   r¦   z[PDF] %s -> %séÈ   z
/tmp/alio_z.pdfÚwb)	r¸   ÚPDF_URLrº   r    Úurlr»   ÚopenÚwriteÚcontent)r0   rÊ   r    r§   rÅ   Úout_pathÚfr1   r1   r2   Úfetch_pdf_to_tmpî   s   

ÿþrÕ   Úpdf_pathc              	   C  sN   g }t | ƒ}|jD ]}z| ¡ pd}W n ty   d}Y nw | |¡ q	|S )Nr4   )r
   ÚpagesÚextract_textrÁ   r   )rÖ   rL   ÚreaderÚpagerO   r1   r1   r2   Úpdf_to_page_textsú   s   
ÿrÛ   c                   C  s   t ƒ S r]   r   r1   r1   r1   r2   Úsb  s   rÜ   ÚtableÚid_valuec              
   C  sr   z|   |¡ d¡ d|¡ d¡ ¡ }|jpg }t|ƒdkW S  ty8 } zt 	d|||¡ W Y d }~dS d }~ww )NÚidrD   r   z exists check failed (%s, %s): %sF)
rÝ   ÚselectÚeqÚlimitÚexecuter®   ÚlenrÁ   rº   Úwarning)ÚclientrÝ   rÞ   ÚresÚrowsÚer1   r1   r2   Úsb_row_exists_id  s   "
€þrê   ÚrowúDict[str, Any]ÚNonec                 C  s   |   |¡ |¡ ¡  d S r]   )rÝ   Úinsertrã   )ræ   rÝ   rë   r1   r1   r2   Ú	sb_insert  s   rï   ÚHTMLTrÀ   Ú
parse_modeÚdisable_previewc           	   
   C  s¤   t }t}|r|st d¡ dS d|› d}|| ||dœ}ztj||dd}|jdkr6t d	|j|j¡ W dS W d
S  t	yQ } zt d|¡ W Y d }~dS d }~ww )Nz[TG] skipped (no token/chat id)Fzhttps://api.telegram.org/botz/sendMessage)Úchat_idrÀ   rñ   Údisable_web_page_previewé   )r¾   r¨   rÌ   z[TG] send failed %s: %sTz[TG] send error: %s)
r    r"   rº   Úinforš   r¼   r»   rå   rÀ   rÁ   )	rÀ   rñ   rò   Útokenró   rÏ   ÚpayloadrÅ   ré   r1   r1   r2   Úsend_telegram  s,   
ü
€þrù   r·   Úpdf_urlc                 C  s:   t  | pd¡}t j|dd}d|› d|› dg}d |¡S )Nr+   T)Úquoteu'   ê·¸ë£¹ì‚¬ ì‹ ê·œ ìž„ì›ê³µì‹œìž…ë‹ˆë‹¤.z	<a href="u   ">ìžì„¸ížˆ ë³´ê¸°</a>r7   )ÚhtmlÚescaperR   )r·   rú   Úorg_escÚurl_escÚlinesr1   r1   r2   Úbuild_tg_message1  s   
ý
r  Útargetc                 C  s6   t t ¡ ƒ}|  d¡ ||dt ¡  ¡ dœ¡ ¡  |S )NÚcrawler_runÚrunning)rß   r  r¯   Ú
started_at)	r,   ÚuuidÚuuid4rÝ   rî   r   ÚutcnowÚ	isoformatrã   )ræ   r  Úrun_idr1   r1   r2   Úinsert_crawler_run>  s   

ür  r
  r¯   r×   rd   rè   Úfail_reasonc                 C  s8   t  ¡  ¡ ||||dœ}|  d¡ |¡ d|¡ ¡  d S )N)Úfinished_atr¯   r×   rè   r  r  rß   )r   r  r	  rÝ   r   rá   rã   )ræ   r
  r¯   r×   rè   r  rø   r1   r1   r2   Úupdate_crawler_runH  s   
û r  c                 C  s8   | sdS dd„ | D ƒ}|sdS dd„ |D ƒ}t |ƒdkS )u8   ìµœì†Œ 1í–‰ & ì „ì›ì´ ê³µì„/ë¹ˆì´ë¦„ì´ ì•„ë‹ˆë©´ OKFc                 S  s"   g | ]}|  d ¡p
d ¡ r|‘qS )r&   r4   )r¸   r-   ©rN   rÅ   r1   r1   r2   rP   Y  ó   " z"stg_quality_ok.<locals>.<listcomp>c                 S  s   g | ]}t | d ¡ƒs|‘qS )r&   )r3   r¸   r  r1   r1   r2   rP   \  s    r   )rä   )rè   Ú	non_blankÚ
non_vacantr1   r1   r2   Ústg_quality_okU  s   r  r‚   c                   sª   |   d¡ d¡ dˆ ¡ d|¡ d¡ ¡ jpg }|sdS ‡ fdd„|D ƒ}|r1|   d	¡ |¡ ¡  |   d
¡ ¡  d|¡ ¡  dd„ |D ƒ}|rQ|   d
¡ |¡ ¡  t|ƒS )u¨   
    staging(run_id, department) -> snapshot ì €ìž¥ -> í˜„ìž¬ë³¸(kepco_org) êµì²´
    note: kepco_orgì—ëŠ” reason_date ì»¬ëŸ¼ì´ ì—†ìœ¼ë¯€ë¡œ ì œì™¸í•˜ê³  insert
    r   zJdepartment,name,position,gender,start,end,task,reason_date,career,key_hashr
  r‚   é'  r   c                   sd   g | ].}ˆ |  d ¡|  d¡|  d¡|  d¡|  d¡|  d¡|  d¡|  d¡|  d¡|  d	¡d
œ‘qS )r‚   r&   rƒ   r„   r[   r…   r†   r‡   rˆ   Úkey_hash)r
  r‚   r&   rƒ   r„   r[   r…   r†   r‡   rˆ   r  ©r¸   r  ©r
  r1   r2   rP   v  s    ôõÿz&promote_department.<locals>.<listcomp>r   r   c                 S  sR   g | ]%}|  d ¡|  d¡|  d¡|  d¡|  d¡|  d¡|  d¡|  d¡dœ‘qS )	r‚   r&   rƒ   r„   r[   r…   r†   rˆ   )r‚   r&   rƒ   r„   r[   r…   r†   rˆ   r  r  r1   r1   r2   rP     s    öøÿ)	rÝ   rà   rá   râ   rã   r®   rî   Údeleterä   )ræ   r‚   r
  Ústg_rowsÚsnap_payloadÚcur_payloadr1   r  r2   Úpromote_department`  s0   úø
òôr  Úposted_at_rawc                    s   t |ƒp
t ¡  d¡}t|ƒ}t|ƒ}|  t¡ d¡ 	dˆ ¡ 	dd¡ 
¡ jp'g }‡ fdd„|D ƒ‰tˆ ¡ ƒ}	|  t¡ d¡ 	dˆ ¡ d	¡ 
¡ jpJg }
i }|
D ]}tˆ | d
¡| d¡ƒ}||vre|||< qOt| ¡ ƒ}|	| }|	|@ }||	 }g ‰g ‰d(dd„}|D ]E}ˆ| pŠi  d¡}|| p“i  d¡}ˆ| pœi  d¡p¡d}|| p§i  d¡p¬d}|p°d|p³dkr»ˆ |¡ qƒ||ƒ||ƒkrÈˆ |¡ qƒ|rù‡fdd„|D ƒ}tdt|ƒdƒD ]}|  t¡ d|i¡ d|||d … ¡ dd¡ 
¡  qÜ|  t¡ ddi¡ 	dˆ ¡ 	dd¡ 
¡  ‡‡fdd„|D ƒ}|rG‡fdd„|D ƒ}tdt|ƒdƒD ]}|  t¡ d||dœ¡ d|||d … ¡ 
¡  q+ˆru‡fd d„ˆD ƒ}tdt|ƒdƒD ]}|  t¡ d!di¡ d|||d … ¡ 
¡  q[|D ]…}|| }ˆ | d
¡| d¡| d"¡| d#¡| d¡| d¡| d$¡pg ||dddd%œ}t| d
¡ƒr¹|  t¡ |¡ 
¡  qw|  t¡ d¡ 	dˆ ¡ 	d
| d
¡¡}| d¡du rÚ| dd¡n| 	d| d¡¡}| d&¡ 
¡ jpíg }|sû|  t¡ |¡ 
¡  qwˆD ]9}|| }ˆ | d
¡| d¡| d"¡| d#¡| d¡| d¡| d$¡p%g ||dddd%œ}|  t¡ |¡ 
¡  qÿˆD ]9}|| }ˆ | d
¡| d¡| d"¡| d#¡| d¡| d¡| d$¡pag ||dddd%œ}|  t¡ |¡ 
¡  q;t d'ˆ t|ƒt|ƒt| ¡ |	 ƒtˆƒtˆƒ¡ dS ))ur   
    - ì§ì „ current ìŠ¤ëƒ…ìƒ· ê¸°ì¤€ â†’ kepco_history ì—…ë°ì´íŠ¸
    - ê·œì¹™ì€ ê¸°ì¡´ ì„¤ëª…ê³¼ ë™ì¼
    rj   zPid, department, name, position, task, start, end, actual_end, posted_at, pdf_urlr‚   ÚcurrentTc                   s&   i | ]}t ˆ | d ¡| d¡ƒ|“qS )r&   r[   )r^   r¸   r  )rZ   r1   r2   Ú
<dictcomp>¯  s    ÿz!apply_history.<locals>.<dictcomp>z5department,name,position,task,gender,start,end,careerr  r&   r[   r{   r'   r(   r,   c                 S  rU   rV   rW   r|   r1   r1   r2   Ú
_norm_taskÆ  rY   z!apply_history.<locals>._norm_taskr…   r†   r4   c                   ó"   g | ]}ˆ   |¡rˆ | d  ‘qS ©rß   r  ©rN   Úk©Úprev_by_keyr1   r2   rP   Õ  r  z!apply_history.<locals>.<listcomp>r   iè  Ú
actual_endrß   NFc                   s    g | ]}|ˆ vr|ˆvr|‘qS r1   r1   r#  )Úextend_keysÚtask_change_keysr1   r2   rP   Ý  s     c                   r!  r"  r  r#  r%  r1   r2   rP   ß  r  )r  r_   rú   c                   r!  r"  r  r#  r%  r1   r2   rP   è  r  Ú
old_careerrƒ   r„   rˆ   )r‚   r&   r†   rƒ   r„   r[   r…   rˆ   r_   rú   r  Úextendedr*  rD   zW[history] dept=%s applied: close=%d, keep_same=%d, insert=%d, extend=%d, task_change=%dr‰   )rh   r   r  rm   rp   rt   rÝ   ÚTABLE_HISTORYrà   rá   rã   r®   ÚsetÚkeysÚTABLE_CUR_VIEWrâ   r^   r¸   r   Úrangerä   r   Úin_Úis_r3   rî   rº   rö   ) ræ   rZ   r  rú   rJ   Ú
posted_isoÚprev_isoÚprev_korÚ	prev_rowsÚ	prev_keysÚcur_rowsÚ
new_by_keyrÅ   r$  Únew_keysÚto_close_keysÚ	keep_keysÚinsert_keysr   Úprev_endÚnew_endÚ	prev_taskÚnew_taskÚids_to_closeÚiÚ	keep_sameÚids_to_keepÚids_oldÚprø   ÚqÚexistr1   )rZ   r(  r&  r)  r2   Úapply_history£  sê   
ÿþþ
ÿ
ÿþþ€

€$
ýó&.€
ù	
ù	(þrJ  ÚsessionÚdryú$Tuple[bool, Optional[str], int, int]c                   s¨  t  d|¡ t| ||d}|r| d¡r| d¡s!t  d|¡ dS |d }|d }| d¡p/d}	| d	¡}
t|
ƒ}t  d
||||	|
¡ |› d|› }t|t|ƒ}t| ||d}|s}t  d|¡ |sw||||	|
t	› d|› dœ}|swt
|t|ƒ d|ddfS t|ƒ}t|ƒ}t|ƒ}|s±||||	|
t	› d|› dœ}|s¡t
|t|ƒ |s±t||d ƒ}t|ddd |sÉt
|tˆ ||||	|
|t	› d|› ||dœ
ƒ t||d}d}|s|rd}tdt|ƒ|ƒD ]#}|||| … }‡ fdd„|D ƒ}| t¡ |¡ ¡  |t|ƒ7 }qád}|sEt||ˆ ƒ}|dkrEzt|||
p |p dt	› d|› |ƒ W n tyD } zt  d||¡ W Y d}~nd}~ww t  d||||¡ d|||fS )u<   
    ë°˜í™˜: (ok, department, inserted_org_rows, pages)
    z[%s] ---- start ----)r    r´   r·   z[%s] first item missing -> skip)FNr   r   r¶   r4   rµ   z9[%s] first: disclosureNo=%s, organ=%s, title=%s, idate=%sr+   z[%s] pdf.json fetch failed.z?disclosureNo=)rß   r‚   rÊ   r¶   r_   rú   Tr   rú   rð   )rñ   rò   )
r
  rŸ   r·   rÊ   r¶   r_   Úposted_at_isorú   r×   rJ   )ru   iô  c                   s`   g | ],}ˆ |  d ¡|  d¡|  d¡|  d¡|  d¡|  d¡|  d¡|  d¡|  d¡p+g d	œ
‘qS )
r‚   r&   rƒ   r„   r[   r…   r†   r‡   rˆ   )
r
  r‚   r&   rƒ   r„   r[   r…   r†   r‡   rˆ   r  )rN   rG  r  r1   r2   rP     s    ö
öz#process_company.<locals>.<listcomp>z[%s] apply_history failed: %sNz"[%s] pages=%d, stg=%d, promoted=%d)rº   rö   rÉ   r¸   rå   rh   rê   ÚTABLE_IDrÕ   rÎ   rï   rÛ   rT   rä   r  rù   Ú	TABLE_RAWr˜   r0  rÝ   Ú	TABLE_STGrî   rã   r  rJ  rÁ   Ú	exception)rK  rŸ   ræ   r
  r    rL  rÈ   rÊ   r·   r¶   rµ   r3  Ú	record_idÚalreadyrÖ   Úid_rowrL   Úmergedr×   ÚmsgÚpeopleÚins_stgÚchunkrC  Úpartrø   Úpromotedré   r1   r  r2   Úprocess_company,  s¢   

ÿúúö
õ
*€ÿr]  Úapba_idsúOptional[List[str]]c                 C  s  | pt }tƒ }tƒ }ddddg dœ}|D ]j}zAt||||||d\}	}
}}|	rN|d  d7  < |d  |7  < |d  |7  < |
rM|d  ||
||d	œ¡ n|d
  d7  < W n! tyx } zt d||¡ |d
  d7  < W Y d }~nd }~ww t 	d¡ q|S )Nr   )Ú	processedÚskippedÚinserted_org_rowsr×   Ú	companies)r    rL  r`  rD   rb  r×   rc  )r¢   r‚   Úinsertedr×   ra  z[%s] error: %sg333333ã?)
ÚAPBA_IDSrž   rÜ   r]  r   rÁ   rº   rR  ÚtimeÚsleep)r^  r    rL  r
  ÚidsrK  ræ   ÚsummaryÚapbaÚokrZ   Úinsr×   ré   r1   r1   r2   Úrun_once   s.   €€€þrm  c               
     s  t  ¡ } | jddd | jddd | jdtddd |  ¡ }|jr't d	¡ tƒ }t	|d
ƒ}zCt
}|j ¡ rKdd„ |j d¡D ƒ‰ ‡ fdd„t
D ƒ}t||j|j|d}t d|¡ t||d| dd¡| dd¡d t|ƒ W d S  tyŽ } zt d¡ t||dt|ƒd ‚ d }~ww )Nz--debugÚ
store_true)Úactionz	--dry-runz--onlyr4   u)   ì‰¼í‘œë¡œ apbaId ì œí•œ: ì˜ˆ) C0236,C0247)ÚtypeÚdefaultÚhelpÚDEBUGr%   c                 S  s   h | ]
}|  ¡ r|  ¡ ’qS r1   ry   ©rN   r{   r1   r1   r2   Ú	<setcomp>É  rx   zmain.<locals>.<setcomp>ú,c                   s   g | ]}|ˆ v r|‘qS r1   r1   rt  ©Úallowr1   r2   rP   Ê  rQ   zmain.<locals>.<listcomp>)r    rL  r
  zdone: %sÚpassedr×   r   rb  )r¯   r×   rè   zgroup_n8n failedÚfailed)r¯   r  )ÚargparseÚArgumentParserÚadd_argumentr,   Ú
parse_argsr    rº   ÚsetLevelrÜ   r  re  Úonlyr-   r‹   rm  Údry_runrö   r  r¸   ÚprintrÁ   rR  )ÚparserÚargsræ   r
  Útargetsri  ré   r1   rw  r2   Úmain¹  s<   




û
€ýr†  Ú__main__)r&   r'   r(   r)   )r0   r'   r(   r,   )r0   r'   r(   r)   )rC   r,   r0   r,   r(   r'   )rJ   r,   r(   r'   )rL   rM   r(   r,   )rZ   r,   r&   r'   r[   r'   r(   r\   )r_   r,   r(   r'   )ri   r,   r(   r,   r]   )rJ   r,   ru   r'   r(   rv   )r(   r™   )F)r0   r™   rŸ   r,   r    r)   r(   r¡   )r0   r™   rÊ   r,   r    r)   r(   r'   )rÖ   r,   r(   rM   )rÝ   r,   rÞ   r,   r(   r)   )rÝ   r,   rë   rì   r(   rí   )rð   T)rÀ   r,   rñ   r,   rò   r)   r(   r)   )r·   r,   rú   r,   r(   r,   )r  r,   r(   r,   )r   r   N)
r
  r,   r¯   r,   r×   rd   rè   rd   r  r,   )rè   rv   r(   r)   )r‚   r,   r
  r,   r(   rd   )
rZ   r,   r  r,   rú   r,   rJ   r,   r(   rí   )FF)rK  r™   rŸ   r,   r
  r,   r    r)   rL  r)   r(   rM  )NFFN)
r^  r_  r    r)   rL  r)   r
  r'   r(   rì   )WÚ
__future__r   ÚosÚsysr9   r¾   rf  r{  Úloggingrü   Útypingr   r   r   r   r   r   r	   r  rš   Úpypdfr
   ÚpathÚabspathrR   ÚdirnameÚ__file__ÚROOT_DIRr   Úapp.services.supabase_servicer   re  r½   r¹   rÎ   rO  rP  rQ  Ú
TABLE_SNAPr/  Ú	TABLE_ORGr,  Úgetenvr    r"   Úupperr#   Ú	getLoggerrº   ÚStreamHandlerÚhandlerÚsetFormatterÚ	FormatterÚ
addHandlerr  Úcompiler.   r3   r<   rB   rH   rK   rT   rX   r^   rh   rp   rt   r˜   rž   rÉ   rÕ   rÛ   rÜ   rê   rï   rù   r  r  r  r  r  rJ  r]  rm  r†  Ú__name__r1   r1   r1   r2   Ú<module>   s€   @ 
















A'


	





C 
t
#
ÿ