o
    hT                     @   sV  d dl Z d dlmZmZmZmZ d dlZd dlmZ d dl	m
ZmZ dZe dZe dZe dZe d	Zd
Zg dZdejfddZd(dejdedededdf
ddZdejdedeeef deeef dB fddZdd Zdddededeeeef  fd d!Zd)d"eee  dedefd#d$Zdddededefd%d&Zg d'Z dS )*    N)OptionalDictAnyList)upsert_kepco_id)_download_pdf_extract_org_rowszhttps://alio.go.krz/item/itemOrganList.doz/item/itemOrganListSusi.jsonz/item/itemReportListSusi.jsonz/download/pdf.jsonz*GovBot/1.0 (+https://work.jjickjjicks.com))C0247C0042C0043C0066C0082C0220C0259C0248C0306C0305C0236returnc                  C   s"   t  } | jtdddd | S )Nz*/*zhttps://alio.go.kr/zko-KR,ko;q=0.9,en;q=0.8)z
User-AgentAcceptRefererzAccept-Language)requestsSessionheadersupdateUA)s r   0/var/www/html/govbot/app/crawlers/group_kepco.py_sess   s   r   20305   r   apba_idreport_rootpage_noc                 C   sL   |||d}| j t|dd}td|j d|j dt|j  |  d S )N)apbaIdreportFormRootNopageNo   )paramstimeoutz[ALIO][PRIME][GET]  -> z, len=)get	PRIME_URLprinturlstatus_codelentextraise_for_status)r   r"   r#   r$   r)   rr   r   r   _prime_session$   s   $r5   r/   payloadc              
   C   s   | j ||ddidd}td| dt|   td| d|j  |  z| }W |S  tyL } ztd| d	|  W Y d }~d S d }~ww )
NzContent-Typezapplication/jsonr(   )jsonr   r*   z[ALIO][POST] z payload_keys=r+   z[ALIO][POST][JSONERR] z err: )postr.   listkeysr0   r3   r7   	Exception)r   r/   r6   r4   dataer   r   r   
_post_json*   s   
r>   c                 C   s   t | tr| r| d S | S )Nr   )
isinstancer9   )vr   r   r   _first_or_none6   s   rA         ?throttlerD   c                   s  t  }zut|  t| g g g  dd}t|t|}|r&|ddkr@td   W z|  W dS  t	y?   Y dS w |dpFi dpP|dpPg }|dpWi d	pa|d	pag }d
 i} fdd|pn|D }|rt
|d|d< t
|d|d< n||dpi dp|dpi  |pi d}	|pi d}
td  d|	 d|
  d |	dddddd}t|t|}|r|ddkrW z|  W dS  t	y   Y dS w |dp|}|dpg }|sW z|  W dS  t	y   Y dS w |d }t|dpd }|dr/t|dp%ddddd nd}|dp8d}td  d | d!| d"| d#	 |saW z|  W dS  t	y`   Y dS w  ||||d$W z|  W S  t	yy   Y S w z|  W w  t	y   Y w w )%u   
    1) HTML visit (prime)
    2) ORG_API: apbaType, apbaNa 등
    3) LIST_API: 보고서 목록(임원현황). 최상단 1건만 추출
    r    )apbaTypejidtDptmarear%   r&   statuserrorz[ALIO][ORG][MISS] apbaId=Nr<   	organListapbaListr%   c                    s"   g | ]}t |d  kr|qS )r%   )strr,   ).0xr"   r   r   
<listcomp>U   s   " z(fetch_latest_for_org.<locals>.<listcomp>apbaNarE   	organInfoz[ALIO][ORG] apbaId=z -> apbaType=z	, apbaNa=r!    title)r'   r%   rE   r&   search_wordsearch_flagbid_type	enfc_isttresultr   disclosureNoidate.-
      임원현황z[ALIO][LIST][TOP] apbaId=z -> disclosureNo=z, idate=z	, title='')r%   rZ   r[   rT   rR   )r   r5   timesleepr>   ORG_APIr,   r.   closer;   rA   r   LIST_APIrL   stripreplace)r"   rD   r   org_payloadorg
organ_list	apba_list
organ_infocand	apba_typeapba_nalist_payloadlstr<   resultstopdisclosure_nor[   rT   r   rO   r   fetch_latest_for_org9   s   

8""(
4"	ru   apba_idsc              
   C   s   | du rt } d}| D ]c}t||d}|std| d q
|dp#i dp(d p,|}|d	}|d
}|dp=d}t d| }	| d| }
t|
|||d||	d td|
 d| d|  |d7 }t| q
|S )u   
    최신 임원현황 1건씩을 kepco_id 에 저장 (멱등).
    kepco_id 스키마(NOT NULL 포함)에 맞춰 항상 값 채워넣음.
    Nr   rC   z[KEPCO] z: no latest disclosurerR   rQ   rS   rZ   r[   rT   r_   ?disclosureNo=r]   )compound_id
departmentrt   rT   tag	posted_atpdf_urlz[KEPCO] UPSERT ok: id=z dept=z disc=r!   )	DEFAULT_APBA_IDSru   r.   r,   rf   PDF_URLr   ra   rb   )rv   rD   	new_countr"   info
organ_namert   r{   rT   r|   rx   r   r   r   
run_ingest   s6    


r   c             
   C   s  t | |d}|std|  d dS |dpi dpd p!| }|d}|d	}t d
| }td| d| d|  td|  zt|dd}W n tyi } ztd|  W Y d}~dS d}~ww zt|}	W n ty } ztd|  W Y d}~dS d}~ww tdt|	  td t	|	dD ]Q\}
}td|
 d td|dd  td|dd  td|dd  td|d d  td!|d"d  td#|d$d  t  qt|	S )%u   
    해당 기관(apba_id)의 최신 '임원현황' PDF를 내려받아 파싱한 뒤,
    '직위/성명/직책/성별/임기시작일/임기종료일'만 콘솔에 예쁘게 출력 (DB 저장 안 함).
    rC   z
[PREVIEW] z: latest disclosure not foundr   rR   rQ   rS   rZ   r[   rw   u	   
기관: u	     공시:u     등록일:zPDF: g      >@)r*   z[PREVIEW][PDF][ERR] Nz[PREVIEW][PARSE][ERR] u   파싱 행 수: zP--------------------------------------------------------------------------------r!   []u   직위        : positionu   성명        : nameu   직책        : tasku   성별        : genderu   임기시작일  : startu   임기종료일  : end)
ru   r.   r,   rf   r~   download_pdfr;   extract_org_rowsr1   	enumerate)r"   rD   r   deptdiscr[   r/   	pdf_bytesr=   rowsir4   r   r   r   preview_latest_officers   sH    

r   )ru   r   r   r}   )r    r!   )NrB   )!ra   typingr   r   r   r   r   app.services.supabase_servicer   app.crawlers.kepco_org_parserr   r   r   r   BASEr-   rc   re   r~   r   r}   r   r   rL   intr5   r>   rA   floatru   r   r   __all__r   r   r   r   <module>   s(    



"
0( S*,