
    h                        d dl mZ d dlZd dlmZmZmZmZ ddZdd
Z	ddZ
ddZddZd dZd!dZ ej        d          Zd"dZd#d$dZdS )%    )annotationsN)AnyDictListOptionalsOptional[str]returnstrc                    | sdS |                      dd          } t          j        dd|           } t          j        dd|           } |                                 S )N z[ \t]+ z
[ ]*\n[ ]*
)replaceresubstrip)r   s    //var/www/html/bot/scripts/kepco_history_fill.py
_normalizer      sS    RR			$A
y#q!!A
}dA&&A7799    patc                    t          j        | |          }|r'|                    d                                          nd S )N   r   searchgroupr   )r   r   ms      r   _pickr      s9    
	#qA!",1771::,r   merged_textc                    t          j        d|           }|r'|                    d                                          nd S )Nu-   임원\s*현황\s*\n([^\n]+)\n임원\s*현황r   r   )r    r   s     r   _extract_department_from_headerr"      s:    
	BKPPA!",1771::,r   	List[str]c                @    d t          j        d|           D             S )Nc                <    g | ]}|                     d           |S )   직위)
startswith.0secs     r   
<listcomp>z,_split_position_sections.<locals>.<listcomp>   s*    ```Cs~~V^G_G_`C```r      \n(?=직위\s))r   split)r    s    r   _split_position_sectionsr.      s#    ``28$5{CC````r   section_textc                   t          j        d|           }|r;t          j        dd|                    d                                                    S t          j        d|           }|r;t          j        dd|                    d                                                    S t          j        d|           }|r;t          j        dd|                    d                                                    S d S )NG   임기\s*\(시작일\)\s*([^\n(]+?)\s*\(종료일\)\s*([^\n]+?)(?:\n|$)\s+r   r   u<   임기\s*[:：]?\s*([^\n~\-–—]+?)\s*[~\-–—]\s*[^\n]+u1   임기\s*(?:시작(?:일)?)?\s*[:：]?\s*([^\n]+))r   r   r   r   r   )r/   r   s     r   _parse_start_in_sectionr3      s    
	\^jkkA8QWWQZZ%5%5%7%788
8
	QS_``A8QWWQZZ%5%5%7%788
8
	FUUA8QWWQZZ%5%5%7%788
84r   namepos_hintc                d   t          |           }t          j        dt          j        |           d          }t	          |pd          }|D ]b}|                    |          st          d|          pd}|r!t	          |          r|t	          |          vrMt          |          }|r|c S cd S )Nu   성명\s*[:：]?\s*z(\s|$)r      직위\s*([^\n]+?)\s*성명)r.   r   compileescaper   r   r   r3   )	r    r4   r5   sectionsname_patpos_hint_normr*   sec_posstarts	            r   _find_start_by_name_and_posr?   "   s    '44HzH4HHHIIHx~2..M  s## 	6<<B 	Z00 	]*U\J]J]5]5]',, 	LLL	4r   boolc                    | sdS t          j        dd|                                           }|dv pt          j        d|          d uS )NTr2   r   >      –   —-u   (미정|무기한))r   r   r   r   )r   vs     r   _is_blank_daterF   1   sO    TT
vr1##%%A##Vry1F'J'JRV'VVr   u   ^\s*\(?공석\)?\s*$c                    | sdS t          |                                           }t                              |          rdS |dv rdS dS )NFT>      결원rD   )r   r   VACANCY_PATmatch)r4   r   s     r   _is_vacant_namerK   7   sM    D		A(DDOTT5r   department_hintList[Dict[str, Any]]c                V   |pt          |           }|                     d          }|dk    r
| |d         n| }d t          j        d|          D             }g }|D ]}t	          |          }|                    d          sd|v rt          j        d|          sAt          d	|          }	t          d
|          }
t          d|          }t          d|          }t          j        d|          }|r'|                    d          	                                nd}|r'|                    d          	                                nd}t          |          rdnt          j        dd|pd          }t          |          rdnt          j        dd|pd          }t          d|          }g }|rd t          j        d|          D             }|	pd                    dd          dv r|
pd                    dd          dv r|
s|                    ||
||	||||d           |S )u   
    PDF에서 병합된 텍스트(merged_text)를 입력받아, 사람 리스트를 추출한다.
    반환: [{department, name, task, position, gender, start, end, career(list[str])}, ...]
    u   직위 r   Nc                <    g | ]}t          j        d |          |S )u	   ^직위\s)r   rJ   r(   s     r   r+   z parse_people.<locals>.<listcomp>G   s*    ```BH\[^D_D_````r   r,   u   직위 변경 전u   변경사유u   (임기|직책|성별)\sr7   u4   성명\s*([^\n]+?)(?=\s*(?:직책|성별|임기|\n))u-   직책\s*([^\n]+?)(?=\s*(?:성별|임기|\n))u   성별\s*([남여])r1   r      r2   r   r   u   주요경력\s*([\s\S]*?)(?=\n\s*(?:선임절차|선임절차규정|당연직여부|직위\s|기준일|제출일|기관 공시 담당자|$))c                ^    g | ]*}|                                 |                                 +S  )r   )r)   lns     r   r+   z parse_people.<locals>.<listcomp>a   s-    XXXRRXXZZXbhhjjXXXr   z\n+>   	   변경전r&   >      성명	   변경후)
departmentr4   taskpositiongenderr>   endcareer)r"   findr   r-   r   r'   r   r   r   r   rF   r   r   append)r    rL   rW   	start_idxbodyr:   outrawr*   rY   r4   rX   rZ   r   	start_rawend_rawr>   r[   career_blockr\   s                       r   parse_peoplerf   ?   s   
 !P$CK$P$PJ  ++I&/1nn;yzz""+D``rx(94@@```H "C ) )oo>>-.. 	>S3H3HQSQZ[vx{Q|Q|3H7==PRUVVI3OO/55I`beff*+5AGGAJJ$$&&&	*+5AGGAJJ$$&&&&y11[rvfc9?XZ7[7[&w//[rvfc7?XZ7[7[ \
 
  	YXX28FL+I+IXXXFN##C,,0FFFJBR((,BBB 	

$ 	
 	
 		 		 		 		 Jr   )r   r	   r
   r   )r   r   r   r   r
   r	   )r    r   r
   r	   )r    r   r
   r#   )r/   r   r
   r	   )r    r   r4   r   r5   r	   r
   r	   )r   r	   r
   r@   )r4   r	   r
   r@   )N)r    r   rL   r	   r
   rM   )
__future__r   r   typingr   r   r   r   r   r   r"   r.   r3   r?   rF   r8   rI   rK   rf   rR   r   r   <module>ri      s#   " " " " " " 				 , , , , , , , , , , , ,   - - - -- - - -a a a a      W W W W
 bj011   5 5 5 5 5 5 5r   